lundi 30 mars 2020

Pyppeteer. Chromium browser stops loading pages after set period of time

I'm trying to write a program using Pyppeteer and asyncio that would take screenshots of different websites. But every time I ran the program after about 20 seconds an error occured (Runtime Error: Session closed. Most likely the page has been closed"). Later I found out that it is a known bug of pyppeteer and found a patch for it on github:

def patch_pyppeteer():
    import pyppeteer.connection
    original_method = pyppeteer.connection.websockets.client.connect

    def new_method(*args, **kwargs):
        kwargs['ping_interval'] = None
        kwargs['ping_timeout'] = None
        return original_method(*args, **kwargs)

    pyppeteer.connection.websockets.client.connect = new_method
patch_pyppeteer()

However, even though the Error stopped ocurring the browser kept loosing connection with the internet after the same period of time and the page.goto(url) function never returned control. Is there still any way to fix this problem?

Here's a part of my own code:

async def take_screenshot(page, domain, ip_addr, counter, browser):
    url = 'https://' + domain
    fname = 'Screenshots\\'+str(counter) + "_"+domain+'_'+ip_addr + '.png'
    try:
        await page.goto('https://'+domain)
        await page.screenshot({'path':fname})
    except Exception as e:
        print('Error while taking a screenshot')
        print(str(e))
        exit_program(browser)

async def main():
    counter = 0
    domains, black_list, output_file = await open_source_files(sys.argv)
    print('Launching browser')
    browser  = await launch(headless=False)
    page = await browser.newPage()
    while(True):

        domain = await read_domain(domains, browser)
        if (domain == -1):
            break
        print('Getting the amount of pages')
        amount = await get_pages(page, browser, domain)
        print('Reading IP addresses')
        ip_list = await get_ipaddr(domain, page, browser)
        await delete_domain_from_hosts(domain, browser)
        for ip in ip_list:
            print('Flushing DNS cache')
            flush_dns()
            print('Writing in hosts')
            lin = await write_in_hosts(ip, domain, browser)
            hosts_flag = ip
            print('Taking screenshot')
            await take_screenshot(page, domain, ip, counter, browser)
            print('Deleting from hosts')
            await delete_from_hosts(lin, browser)
            hosts_flag = None
            counter+=1
    await browser.close()
    domains.close()
    black_list.close()
    output_file.close()
    exit(0)

patch_pyppeteer()
asyncio.get_event_loop().run_until_complete(main())



Aucun commentaire:

Enregistrer un commentaire