I have made a python script thats basically web crawler . My aim is to get direct download links of files from some blogspot and then find-out direct download link.
def trade_spider(max_pages):
page=1
i=1
while page < max_pages:
url='http://ift.tt/1JBhCIO'
source_code=requests.get(url)
plain_text=source_code.text
soup=BeautifulSoup(plain_text)
for link in soup.findAll('a' , href=re.compile('http://ift.tt/1Kv9L2l')):
href=link.get('href')
print('link no ' + str(i) +' title ' + link.string)
i+=1
print(href)
get_download_link(href)
page+=1
def get_download_link(url):
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for link in soup.findAll('div', {"class": "download_link"}):
href = link.get('href')
print('Download link ')
print(href)
trade_spider(2)
But the output:
link no 1 title Prem Ritu
http://ift.tt/1JBhCZ2
Download link
None
Download link
None
link no 2 title Kobi Prem
http://ift.tt/1JBhCZ4
Download link
None
Download link
None
Aucun commentaire:
Enregistrer un commentaire