lundi 16 juillet 2018

How to web scrape and access

Im trying to scrape the data from "https://www.deadstock.ca/products/adidas-futurepacer-grey-one"

I want to be able to read the variant data which looks like this:

<script>
window.ShopifyAnalytics = window.ShopifyAnalytics || {};
window.ShopifyAnalytics.meta = window.ShopifyAnalytics.meta || {};
  window.ShopifyAnalytics.meta.currency = 'CAD';
  var meta = {"product":{"id":223724142613,"vendor":"Adidas","type":"Footwear 
- QS","variants":[{"id":3063231774741,"price":26000,"name":"adidas 
Futurepacer \/ Grey One - 8","public_title":"8","sku":"AQ0907-Grey One-8"}, 
{"id":3063231807509,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
8.5","public_title":"8.5","sku":"AQ0907-Grey One-8.5"}, 
{"id":3063231840277,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
9","public_title":"9","sku":"AQ0907-Grey One-9"}, 
{"id":3063231873045,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
9.5","public_title":"9.5","sku":"AQ0907-Grey One-9.5"}, 
{"id":3063231905813,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
10","public_title":"10","sku":"AQ0907-Grey One-10"}, 
{"id":3063231938581,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
10.5","public_title":"10.5","sku":"AQ0907-Grey One-10.5"}, 
{"id":3063231971349,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
11","public_title":"11","sku":"AQ0907-Grey One-11"}, 
{"id":3063232004117,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
12","public_title":"12","sku":"AQ0907-Grey One-12"}, 
{"id":3063232036885,"price":26000,"name":"adidas Futurepacer \/ Grey One - 
 13","public_title":"13","sku":"AQ0907-Grey One-13"}]},"page": 
{"pageType":"product","resourceType":"product","resourceId":223724142613}};
  for (var attr in meta) {
    window.ShopifyAnalytics.meta[attr] = meta[attr];
  }
</script>

I don't think im targeting the properly. I want the code to be able to print all the "id": numbers. Here is my code so far, im still new to bs4 but any help would be appreciated. Thanks

import bs4 as bs
import urllib.request
import lxml

link = urllib.request.urlopen ('https://www.deadstock.ca/products/adidas-futurepacer-grey-one').read()

soup = bs.BeautifulSoup(link,'lxml')

for variants in soup.find_all('script'):
    print (variants)




Aucun commentaire:

Enregistrer un commentaire