jeudi 3 mars 2016

Node.JS - How to limit multi-promised requests to prevent overflow (Web scraping)

I'm receiving FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory when I execute the script which has a high amount of requests that needs to be done.

I'm assuming that I need to lower the amount of requests made? Eg: Send 5 requests (and maybe add delay) until complete, buffer to browser, then send the next batch of 5 requests... etc.

Any help appreciated!

Here's the source:

var http = require('http');
var request = require('request');
var cheerio = require('cheerio');
var rp = require('request-promise');
var _ = require("underscore");
var dommy = require('dommy');

http.createServer(function(request, response) {
//  ** Something like this causes overflow **
//  var appIds = _.range(451131,450131);
    var appIds = [253250, 445170, 327510, 346110, 421900, 385070]

    var document = dommy();
    var html = document.createElement('html');
    var body = document.createElement('body');
    var ul = document.createElement('ul');
    var li = document.createElement('li');

    function steamappRequestConfig(appId) {
        var options = {
            uri: 'http://ift.tt/1q7lxWQ' + appId + '/Avatar',
            transform: function(body) {
                return cheerio.load(body);
            }
        };
        return rp(options).then(function($) {
            return {
                appId: appId,
                appDom: $,
            };
        });
    }
    var appInfoRequests = appIds.map(steamappRequestConfig);
    var listPromise = Promise.all(appInfoRequests);
    listPromise.then(function(appResults) {
            appResults.sort(function(x) {
                return x.appId;
            });

            var results = appResults.map(function(rpResult) {
                var $ = rpResult.appDom;
                var appId = rpResult.appId;

                var statusCheck = $('h2').text();
                // Check if page contains Avatars
                if (statusCheck != 'Avatars') {
                    // We are sorry nothing important here
                } else {
                    return document.createTextNode('<li><a href="http://ift.tt/1q7lxWQ' + appId + '/Avatar">' + appId + '</a></li>');
                }
            });
            // Output
            html.appendChild(body);
            body.appendChild(ul);
            ul.appendChild(results.join('\n'));
            document.appendChild(html);
            response.write(document.outerHTML);
            // console.log(document.outerHTML);
        })
        .catch(function(err) {
            // Crawling failed or Cheerio choked...
        });
}).listen(80);

Aucun commentaire:

Enregistrer un commentaire