我正在使用 Express 和 Request 创建一个刮板。URL 数组被传递给 request,然后通过 for 循环进行迭代。解析完所有数据并解析所有数据后,回调将调用 res.send。
我正在尝试将其转换为承诺,但我相信我使用的 for 循环不允许我这样做。如果循环导致了问题,是否有另一种方法可以对此进行编码并获得相同的结果?
回调方法
function scrape(callback){
for(var i = 0; i < urls.length; i++){
request(urls[i], function(error, response, html){
if(!error && response.statusCode == 200){
// LOAD Cherio (jQuery) on the webpage
var $ = cheerio.load(html);
try{
var name = $(".name").text();
var mpn = $(".specs.block").contents().get(6).nodeValue.trim();
var jsontemp = {"MPN": "", "Name": "", "PriceList": {}};
jsontemp.MPN = mpn;
jsontemp.Name = name;
// Traverse the DOM to get tr tags and extract info
$(".wide-table tbody tr").each(function (i, row) {
var $row = $(row),
merchant = $row. attr("class").trim(),
total = $row.children(".total").text();
jsontemp.PriceList[merchant] = merchant;
jsontemp.PriceList[merchant] = total;
});
}
catch(err){
console.log('Error occured during data scraping:');
}
list.push(jsontemp);
}
else{
console.log(error);
}
count++;
callback();
});
}
}
});
scrape(() => {
console.log(count);
if(count == urls.length){res.send(list)}
});
Run Code Online (Sandbox Code Playgroud)
Promise 实现尝试
var urls = [
"http://test.com/",
"http://test.com/2"
];
var list = [];
var count = 0;
scrape().then((data) => {
list.push(data)
if(count == urls.length){res.send(list)}
})
.catch(error => console.log(error))
function scrape(){
for(var i = 0; i < urls.length; i++){
return new Promise(function (resolve, reject) {
request(urls[i], function(error, response, html){
if(!error && response.statusCode == 200){
var $ = cheerio.load(html);
try{
var name = $(".name").text();
var mpn = $(".specs.block").contents().get(6).nodeValue.trim();
var jsontemp = {"MPN": "", "Name": "", "PriceList": {}};
jsontemp.MPN = mpn;
jsontemp.Name = name;
// TRAVERSING DOM FOR DATA //
$(".wide-table tbody tr").each(function (i, row) {
var $row = $(row),
merchant = $row. attr("class").trim(),
total = $row.children(".total").text();
jsontemp.PriceList[merchant] = merchant;
jsontemp.PriceList[merchant] = total;
});
}
catch(err){
console.log('Error occured during data scraping:');
}
resolve(jsontemp);
}
else{
console.log(error);
return reject(error);
}
count++;
});
}
}
Run Code Online (Sandbox Code Playgroud)
您需要将这些承诺存储在一个列表中,然后调用Promise.all以获取所有内容的单一承诺:
function scrape() {
var promises = []; // array of promises
for(var i = 0; i < urls.length; i++) {
var url = urls[i];
var promise = new Promise(function(resolve, reject) {
// ...
};
// add to array
promises.push(promise);
}
// return a single promise with an array of the results
// by using Promise.all
return Promise.all(promises);
}
Run Code Online (Sandbox Code Playgroud)
此外,不使用循环变量(如i函数内部循环内部)使用时var。相反,您应该url在 promise 回调函数之外声明一个变量,或者替换var为更新的let.