currently im working on a crawler and i have to save the output in a csv file.
here is my code:
import scrapy
class ArticleSpider(scrapy.Spider):
name = "article"
def start_requests(self):
urls = [
'https://www.topart-online.com/de/Ahorn-japan.%2C-70cm%2C--36-Blaetter----Herbst/c-KAT282/a-150001HE'
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = response.url.split("/")[-1]
filename = 'article-%s.html' % page
with open(filename, 'wb') as f:
f.write(response.body)
self.log('Saved file %s' % filename)
def parse(self, response):
yield{
'title': response.xpath('//h1[@class="text-center text-md-left mt-0"]/text()').get(),
'quantity': response.xpath('//div[@class="col-6"]/text()')[0].get().strip(),
'delivery_status': response.xpath('//div[@class="availabilitydeliverytime"]/text()').get().replace('/','').strip(),
'itemattr': response.xpath('//div[@class="productcustomattrdesc word-break col-6"]/text()').getall(),
'itemvalues': response.xpath('//div[@class="col-6"]/text()').getall()
}
now my question: how can i output "itemattr" and "itemvalues" in correct order? so i can see for example: Umkarton(itemattr) 20/20/20(dimension of a Umkarton)
Aucun commentaire:
Enregistrer un commentaire