I have written a python code for web scrapping but there is a error named "Nontype object has no attribute text", help me to get rid from this error. I have tried so much but i cannot find the solution of this error. Here is my code..
import requests
from bs4 import BeautifulSoup
import pandas as pd
all_books=[]
url='https://books.toscrape.com/catalogue/page-1.html'
headers=('https://developers.whatismybrowser.com/useragents/parse/22526098chrome-windows-blink')
def get_page(url):
page=requests.get(url,headers)
status=page.status_code
soup=BeautifulSoup(page.text,'html.parser')
return [soup,status]
#get all books links
def get_links(soup):
links=[]
listings=soup.find_all(class_='product_pod')
for listing in listings:
bk_link=listing.find("h3").a.get("href")
base_url='https://books.toscrape.com/catalogue/page-1.html'
cmplt_link=base_url+bk_link
links.append(cmplt_link)
return links
#extraxt info from each link
def extract_info(links):
for link in links:
r=requests.get(link).text
book_soup=BeautifulSoup(r,'html.parser')
name=book_soup.find(class_='col-sm-6 product_main').text.strip()
price=book_soup.find(class_='col-sm-6 product_main').text.strip()
desc=book_soup.find(class_='sub-header').text.strip()
cat=book_soup.find('"../category/books/poetry_23/index.html">Poetry').text.strip()
book={'name':name,'price':price,'desc':desc,'cat':cat}
all_books.append(book)
pg=48
while True:
url=f'https://books.toscrape.com/catalogue/page-{pg}.html'
soup_status=get_page(url)
if soup_status[1]==200:
print(f"scrapping page{pg}")
extract_info(get_links(soup_status[0]))
pg+=1
else:
print("The End")
break
df=pd.DataFrame(all_books)
print(df)
Aucun commentaire:
Enregistrer un commentaire