from bs4 import BeautifulSoup
from urllib.request import urlopen as uReq
import requests
url = 'https://en.wikisource.org/wiki/Main_Page'
r = requests.get(url)
Soup = BeautifulSoup(r.text, "html5lib")
List = Soup.find("div",class_="enws-mainpage-widget-content", id="enws-mainpage-newtexts-content").find_all('a')
ebooks=[]
i=0
for ebook in List:
x=ebook.get('title')
for ch in x:
if(ch==":"):
x=""
if x!="":
ebooks.append(x)
i=i+1
inputnumber=0
while inputnumber<len(ebooks):
print(inputnumber+1, " - ", ebooks[inputnumber])
inputnumber=inputnumber+1
input=int(input("Please select a book: "))
selectedbook = Soup.find("a", title=ebooks[input-1])
print(selectedbook['title'])
url1 = "https://en.wikisource.org/"+selectedbook['href']
r1 = requests.get(url1)
Soup1 = BeautifulSoup(r1.text, "html5lib")
List1 = Soup1.find_all("div", class_="prp-pages-output").find_all('p')
words=str(List1)
ebook1= open('ebook1.txt', 'w', encoding="utf-8")
ebook1.write(words)
ebook1.close()
I'm trying to download an e-book selected by user from this website:'https://ift.tt/1gRwOSU'
Everything is fine until when I try to get the paragraghs from the selected book. I get this error on List1 line:
Traceback (most recent call last):
File "homework.py", line 32, in <module>
List1 = Soup1.find_all("div", class_="prp-pages-output").find_all('p')
File "C:\Users\Özdal\AppData\Local\Programs\Python\Python38-32\lib\site-packages\bs4\element.py", line 2173, in __getattr__
raise AttributeError(
AttributeError: ResultSet object has no attribute 'find_all'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?
If I change it to this:
List1 = Soup1.find("div", class_="prp-pages-output").find_all('p')
Code only gives me first div but I need all of the divs. What should I do?
Aucun commentaire:
Enregistrer un commentaire