lundi 9 février 2015

i need in my scrape script to get the next pagination number python

i use a python script to scrape data from the web and i need to get automaticilly on the next page can anybody help me this is my script



from bs4 import BeautifulSoup
import urllib2
import re
for i in xrange(1):
try:
page = urllib2.urlopen("http://ift.tt/1DUctaT")

except urllib2.HTTPError:
continue
else:
pass
finally:
soup = BeautifulSoup(page)
td1=soup.findAll('h2')
td2 = soup.findAll('span',{'class':'address'})
td345 = soup.findAll('a',{'class':'number'})
#for td3, td4, and td5, use slicing method: s[i:j:k] slice of s from i to j with step k
td3 = td345[0::1]
td4 = td345[0::1]
td5 = td345[0::1]


for td1s, td2s, td3s, td4s, td5s in zip(td1,td2,td3,td4,td5):
data = [re.sub('\s+', ' ', text).strip().encode('utf8').replace(",", "") for text in td1s.find_all(text=True) + td2s.find_all(text=True) + td3s.find_all(text=True) + td4s.find_all(text=True) + td5s.find_all(text=True) if text.strip()]
print ', '.join(data)




Aucun commentaire:

Enregistrer un commentaire