mardi 29 juin 2021

How to export all pages scraped from site to Excel

I'm trying to export scraped data from site to excel. But my code overwrites previous data in excel file with the last scraped. This is my first try with scraping and Pandas. Please help me to understand the logic of correct export. This is my code:

import requests
import lxml.html
import time
import sys
import pandas as pd

sys.stdin.reconfigure(encoding='utf-8')
sys.stdout.reconfigure(encoding='utf-8')


def parse_data(url):
    titles = []
    prices = []
    try:
        response = requests.get(url)
    except:
        return
    tree = lxml.html.document_fromstring(response.text)
    for item in tree.xpath('//*[contains(@class, "listing-item")]'):
            title = item.xpath(".//h2/a/text()")[0]
            price = item.xpath('.//*[contains(@class, "price")]/text()')
            price = price[0] if price else "N/A"
            titles.append(title)
            prices.append(price)
            
    return titles, prices


def output(titles, prices):
    output = pd.DataFrame({"Make": titles,
                           "Price": prices,
                           })
    writer = pd.ExcelWriter('avbuyer.com.xlsx', engine='xlsxwriter')
    output.to_excel(writer, sheet_name='Sheet1')

    output(titles, prices)


def main():
    for i in range(1, 3):
        url = 'https://www.avbuyer.com/aircraft/private-jets/page-' + str(i)
        print(url)
        parse_data(url)
        i += 1
        time.sleep(2)


if __name__ == "__main__":
    main()



Aucun commentaire:

Enregistrer un commentaire