i've done this code, and i cant obtain results. This is my first time i try, i dont know how what im doing bad. If someone can help me. I run and only obtain info for teams in top of website, not other ones.
thanks
import scrapy
from bs4 import BeautifulSoup
from scrapy.item import Field, Item
from scrapy.spiders import CrawlSpider
from scrapy.spiders import Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.loader import ItemLoader
from scrapy.loader.processors import Join
class FichaClub(Item):
nombre = Field()
email = Field()
zona = Field()
class SacaClubes(CrawlSpider):
name="Spider100"
start_urls = ["http://www.ecuafutbol.org/web/asociaciones.php"]
allowed_domains = ['ecuafutbol.org']
rules = (
Rule(LinkExtractor(allow='asociacion_detalle.php*')),
Rule(LinkExtractor(allow='club.php*'), callback= 'parse_items'),
)
def parse_items(self, response):
item = scrapy.loader.ItemLoader(FichaClub(), response)
item.add_xpath('email','//a[starts-with(@href, "mail")]/text()')
item.add_xpath('nombre','//*[@id="gallery-post-1511"]/article/div/div/div/p/strong[1]/text()')
yield item.load_item()
Aucun commentaire:
Enregistrer un commentaire