I am working on a project where I want to do webscraping on a news website from a Python script, I have been trying to figure out how I can run this code:
from spiders.NewsSpider import NewsSpider
# scrapy api
from scrapy import signals
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy.settings import Settings
## logging settings
import logging
# Create and configure logger
LOG_FORMAT = "%(levelname)s %(asctime)s - %(message)s"
logging.basicConfig(filename ="C:/Users/r/PycharmProjects/Wordpress_Auto_Post_Project/terminal_news/terminal_news/terminal_news.log",
level=logging.DEBUG,
format = LOG_FORMAT)
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
# list of crawlers
TO_CRAWL = [NewsSpider]
# list of crawlers that are running
RUNNING_CRAWLERS = []
def spider_closing(spider):
"""Activates on spider closed signal"""
logger.info("Spider closed: %s" % spider, level=logging.INFO)
RUNNING_CRAWLERS.remove(spider)
if not RUNNING_CRAWLERS:
reactor.stop()
for spider in TO_CRAWL:
settings = Settings()
# crawl responsibly
settings.set("USER_AGENT", "terminal_news http://www.example.com")
# Add to items pipelines
settings.set("ITEM_PIPELINES", {'pipelines.AddTablePipeline': 100})
crawler = Crawler(Settings)
crawler_obj = spider()
RUNNING_CRAWLERS.append(crawler_obj)
# stop reactor when spider closes
crawler.signals.connect(spider_closing, signal=signals.spider_closed)
crawler.configure()
crawler.crawl(crawler_obj)
crawler.start()
# blocks process so always keep as the last statement
reactor.run()
But after many tries I have not been successful in passing this ERROR
Traceback (most recent call last):
File "C:/Users/rafal/PycharmProjects/Wordpress_Auto_Post_Project/terminal_news/terminal_news/core.py", line 50, in <module>
crawler = Crawler(settings)
File "C:\Users\rafal\PycharmProjects\Wordpress_Auto_Post_Project\venv\lib\site-packages\scrapy\crawler.py", line 50, in __init__
self.spidercls.update_settings(self.settings)
AttributeError: 'Settings' object has no attribute 'update_settings'
Process finished with exit code 1
Is there anyone that can help me out with my code? I dont see what I done wrong
Aucun commentaire:
Enregistrer un commentaire