I'm trying to scrape an Amazon URL for its product name and price and can't seem to get it to work. I'm quite new to scripting and am trying to use a regex to match to the part of the page source that contains those details, but everything keeps coming up null. The URL I'm trying to scrape is this one, and my code is:
function fetchPrices(){ var macroName = new RegExp("<title>.*:Amazon.co.uk"); var microName = new RegExp("[A-z0-9 ]+"); var macroPattern = new RegExp("\"a-size-medium a-color-price\">£[0-9]{1,4}\.[0-9]{2}</span>"); var microPattern = new RegExp("£[0-9]{1,4}\.[0-9]{2}"); var rawSheet = SpreadsheetApp.getActive().getSheetByName("Amazon"); var numRows = rawSheet.getLastRow(); for(i = 2; i < numRows + 1; i++){ var url = rawSheet.getRange(i, 1).getValue(); var urlRaw = UrlFetchApp.fetch(url, {muteHttpExceptions: true}); var urlContent = urlRaw.getContentText(); var responseCode = urlRaw.getResponseCode(); var price = 0; if(responseCode == 200){ var longString = macroPattern.exec(urlContent); var shortString = microPattern.exec(longString); var productPrice = shortString; } else { price = "Error: " + responseCode; } Utilities.sleep(5); rawSheet.getRange(i, 2).setValue(price); }; for(i = 2; i < numRows + 1; i++){ var url = rawSheet.getRange(i, 1).getValue(); var urlRaw = UrlFetchApp.fetch(url, {muteHttpExceptions: true}); var urlContent = urlRaw.getContentText(); var responseCode = urlRaw.getResponseCode(); var title = 0; if(responseCode == 200){ var longString = macroName.exec(urlContent); var shortString = microName.exec(longString); var productTitle = longString; title = productTitle; } else { title = "Error: " + responseCode; } Utilities.sleep(5); rawSheet.getRange(i, 3).setValue(title); }; };
Aucun commentaire:
Enregistrer un commentaire