I made sure this program works with other URL's, but for some reason when I try running it using this url (https://carmax.com/cars/all/), it doesn't load the url no matter how long I let it sit. Is it being blocked? Or what can I do. Thanks.
import csv
import urllib
import urllib.request
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
url = 'https://carmax.com/cars/all/'
req = Request(url)
req.add_header('User-Agent', 'Mozilla/5.0')
resp = urlopen(req)
soup = BeautifulSoup(resp, 'html.parser')
rows = soup.find('div', { 'class': 'car-container listing-container' }).find_all('div', attrs={ 'class': 'car-title' })
file = open('data.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(file)
# header row
writer.writerow(['yearmake', 'modeltrim', 'price'])
n = 0
for row in rows:
yearmake = row.find('span', attrs={ 'class': 'year-make' }).text.strip()
modeltrim = row.find('span', attrs={ 'class': 'model-trim' }).text.strip()
price = row.find('span', attrs={ 'class': 'price' }).text.strip()
writer.writerow([yearmake, modeltrim, price])
n = n + 1
print('COMPLETED ' + str(n) + ' ROWS OF DATA')
Aucun commentaire:
Enregistrer un commentaire