I am trying to parse multiple page on IDMb. The parser is stuck on gathering information from one page. I have tried many forums to solve this to no avail. I suspect it has something to do with not setting up my embedded loop correctly or my initial request. Please help. Thanks.
Problems with this script: Loops on one page.
#Basic libraries
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
from random import randint
#More advanced libraries
from time import sleep
from time import time
from IPython.core.display import clear_output
from warnings import warn
base_url = 'http://ift.tt/2hZ3Usi'
response = get(base_url)
soup = BeautifulSoup(response.text, 'lxml')
#data lists to append
names = []
years = []
imdb_ratings = []
metascores = []
votes = []
#preparing the monitoring loop
pages = str(range(1,5))
start_time = time()
requests = 0
#for every page in the interval 1-4
for url in urls:
#make a get request
response = get(base_url + page)
#pause the loop
sleep(randint(8,15))
#Monitor the requests
requests += 1
elapsed_time = time() - start_time
if requests > 4:
warn:('Number of requests was greater than expected.')
break
elif response.status_code != 200:
warn('Request: {}; Frequency: {} requests/s'.format(requests, response.status_code))
else:
print('Request: {}; Frequency: {} requests/s'.format(requests, requests/elapsed_time))
clear_output(wait = True)
page_html = BeautifulSoup(response.text, 'lxml')
#root
movie_containers = soup.find_all('div', class_= 'lister-item mode-advanced')
#looping through containers
for container in movie_containers:
if container.find('div', class_ = 'ratings-metascore') is not None:
#The name
name = container.h3.a.text
#print(name)
names.append(name)
#The Year
year = container.find('span', class_ = 'lister-item-year').text
#print(year)
years.append(year)
#IDMb rating
imdb = container.strong.text
#print(imdb)
imdb_ratings.append(imdb)
#Metascore
metascore = container.find('span', class_= 'metascore').text
#print(metascore)
metascores.append(int(metascore))
#Number of Votes
vote = container.find('span', attrs = {'name':'nv'})['data-value']
#print(vote)
votes.append(int(vote))
#keeping track of data
test_df= pd.DataFrame({'Movie': names,
'Year': years,
'IMDb': imdb_ratings,
'Metascore': metascores,
'Votes': votes})
print(test_df.info())
test_df
`
Aucun commentaire:
Enregistrer un commentaire