I'm trying to export scraped data from site to excel. But my code overwrites previous data in excel file with the last scraped. This is my first try with scraping and Pandas. Please help me to understand the logic of correct export. This is my code:
import requests
import lxml.html
import time
import sys
import pandas as pd
sys.stdin.reconfigure(encoding='utf-8')
sys.stdout.reconfigure(encoding='utf-8')
def parse_data(url):
titles = []
prices = []
try:
response = requests.get(url)
except:
return
tree = lxml.html.document_fromstring(response.text)
for item in tree.xpath('//*[contains(@class, "listing-item")]'):
title = item.xpath(".//h2/a/text()")[0]
price = item.xpath('.//*[contains(@class, "price")]/text()')
price = price[0] if price else "N/A"
titles.append(title)
prices.append(price)
return titles, prices
def output(titles, prices):
output = pd.DataFrame({"Make": titles,
"Price": prices,
})
writer = pd.ExcelWriter('avbuyer.com.xlsx', engine='xlsxwriter')
output.to_excel(writer, sheet_name='Sheet1')
output(titles, prices)
def main():
for i in range(1, 3):
url = 'https://www.avbuyer.com/aircraft/private-jets/page-' + str(i)
print(url)
parse_data(url)
i += 1
time.sleep(2)
if __name__ == "__main__":
main()
Aucun commentaire:
Enregistrer un commentaire