My code is below. It takes around 10 secondes to search 1 website. I'm basically searching Jb HI-FI from a to z and pages 1 to 200. Then I'm saving the data into a list with the title of the item (eg. a TV) and it's respective price.
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
import time
name = []
price = []
alpha = ['a', 'c', 'e', 'g', 'i', 'k', 'm', 'o', 'q', 's', 'u', 'w', 'y']
for alphabet in alpha:
for i in range(1, 200):
url = 'https://www.jbhifi.com.au/?q=' + alphabet + '&hPP=36&idx=shopify_products&p=' + str(i)
print(url)
options = Options()
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)
driver.get(url)
soup = BeautifulSoup(driver.page_source, 'lxml')
ii = 0
for item in soup.findAll("h4", {'class': 'ais-hit--title product-tile__title'}):
ii = ii + 1
name.append(item.get_text(strip=True))
for item in soup.findAll(["span"], {'class': ['ais-hit--price price', 'sale']}, limit = ii):
price.append(item.get_text(strip=True))
driver.close()
Aucun commentaire:
Enregistrer un commentaire