异步并行HTTP请求

Chr*_*ich 13 asynchronous node.js

我在应用程序加载大量URL时遇到控制流问题.我正在使用Caolan Async和NPM请求模块.

我的问题是,只要将函数添加到队列,HTTP响应就会启动.理想情况下,我想构建我的队列,并且只在队列启动时才开始发出HTTP请求.否则回调会在队列启动之前开始触发 - 导致队列过早完成.

var request = require('request') // https://www.npmjs.com/package/request
    , async = require('async'); // https://www.npmjs.com/package/async

var myLoaderQueue = []; // passed to async.parallel
var myUrls = ['http://...', 'http://...', 'http://...'] // 1000+ urls here

for(var i = 0; i < myUrls.length; i++){
    myLoaderQueue.push(function(callback){

        // Async http request
        request(myUrls[i], function(error, response, html) {

            // Some processing is happening here before the callback is invoked
            callback(error, html);
        });
    });
}

// The loader queue has been made, now start to process the queue
async.parallel(queue, function(err, results){
    // Done
});
Run Code Online (Sandbox Code Playgroud)

有没有更好的方法来攻击这个?

rob*_*lep 26

使用for循环结合异步调用是有问题的(使用ES5)并且可能产生意外结果(在您的情况下,正在检索错误的URL).

相反,考虑使用async.map():

async.map(myUrls, function(url, callback) {
  request(url, function(error, response, html) {
    // Some processing is happening here before the callback is invoked
    callback(error, html);
  });
}, function(err, results) {
  ...
});
Run Code Online (Sandbox Code Playgroud)

鉴于你有1000多个网址要检索,async.mapLimit()也可能值得考虑.


krl*_*krl 7

如果你愿意开始使用Bluebird,并Babel利用promisesES7 async/ await你能做到以下几点:

let Promise = require('bluebird');
let request = Promise.promisify(require('request'));

let myUrls = ['http://...', 'http://...', 'http://...'] // 1000+ urls here

async function load() {
  try {
    // map myUrls array into array of request promises
    // wait until all request promises in the array resolve
    let results = await Promise.all(myUrls.map(request));
    // don't know if Babel await supports syntax below
    // let results = await* myUrls.map(request));
    // print array of results or use forEach 
    // to process / collect them in any other way
    console.log(results)
  } catch (e) {
    console.log(e);
  }
}
Run Code Online (Sandbox Code Playgroud)

  • 您也可以执行`await*myUrls.map(request)`,而不是`Promise.all`. (4认同)
  • ...但是,发布Node.js ES7的赞誉! (2认同)