vendredi 28 février 2020

Problem with puppeteer web scrape Problem

I have the need to scrape a list of URLs to collect the Title, ImageURL and Content form the URLs and put them into a JSON file to import.

But I can't seem to get it to work in a for loop, as it worked for one URL only but not since adding the FOR loop in.

Could I get some help on this?

const puppeteer = require('puppeteer');

async function scrapPamphlets()
{
  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  const urls = ['https://www.bibleed.com/the-divine-origin-of-the-bible.html','https://www.bibleed.com/god-and-creation.html'];

for (let i = 0; i < urls.length; i++) {
const pamphletURL = urls[i];

await page.goto(`${pamphletURL}`, {waitUntil: 'networkidle2'});

let pamphletData = await page.evaluate(() => {
      let data = [];
      // get the page elements
      let pamphletElms = document.querySelectorAll('div[class="wsite-section-elements"]');
      // get the pamphlet data
      pamphletElms.forEach((pamphletelement) => {
          let pamphletJson = {};
          try {
              pamphletJson.title = pamphletelement.querySelector('h2').innerText;
              pamphletJson.imgURL = 'https://www.bibleed.com' + pamphletelement.querySelector('div div.wsite-image a img').getAttribute('src');
              pamphletJson.txtContent = pamphletelement.querySelector('.paragraph').innerText;
          }
          catch (exception){
          }
          data.push(pamphletJson);
      });
      return data;
})
}

 // save data to json file
 const fs = require('fs');
 fs.writeFile('pamphletData.json', JSON.stringify(pamphletData), err => err ? console.log(err): null);

await browser.close();
}

scrapPamphlets();



Aucun commentaire:

Enregistrer un commentaire