lundi 20 décembre 2021

attribute error in web scrapping in python

I have written a python code for web scrapping but there is a error named "Nontype object has no attribute text", help me to get rid from this error. I have tried so much but i cannot find the solution of this error. Here is my code..

import requests
from bs4 import BeautifulSoup
import pandas as pd


all_books=[]

url='https://books.toscrape.com/catalogue/page-1.html'
headers=('https://developers.whatismybrowser.com/useragents/parse/22526098chrome-windows-blink')
def get_page(url):
    page=requests.get(url,headers)
    status=page.status_code
    soup=BeautifulSoup(page.text,'html.parser')
    return [soup,status]

#get all books links
def get_links(soup):
    links=[]
    listings=soup.find_all(class_='product_pod')
    for listing in listings:
        bk_link=listing.find("h3").a.get("href")
        base_url='https://books.toscrape.com/catalogue/page-1.html'
        cmplt_link=base_url+bk_link
        links.append(cmplt_link)
    return links
    
#extraxt info from each link
def extract_info(links):
    for link in links:
        r=requests.get(link).text
        book_soup=BeautifulSoup(r,'html.parser')

        name=book_soup.find(class_='col-sm-6 product_main').text.strip()
        price=book_soup.find(class_='col-sm-6 product_main').text.strip()
        desc=book_soup.find(class_='sub-header').text.strip()
        cat=book_soup.find('"../category/books/poetry_23/index.html">Poetry').text.strip()
        book={'name':name,'price':price,'desc':desc,'cat':cat}
        all_books.append(book)

pg=48
while True:
    url=f'https://books.toscrape.com/catalogue/page-{pg}.html'
    soup_status=get_page(url)
    if soup_status[1]==200:
        print(f"scrapping page{pg}")
        extract_info(get_links(soup_status[0]))
        pg+=1
    else:
        print("The End")
        break

df=pd.DataFrame(all_books)
print(df)
    
    



Aucun commentaire:

Enregistrer un commentaire