Pet*_*lev 24 javascript automation google-chrome node.js puppeteer
我正在使用Puppeteer尝试在所有图像加载后尝试截取网站,但无法使其工作.
这是我到目前为止的代码,我使用https://www.digg.com作为示例网站:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.digg.com/');
await page.setViewport({width: 1640, height: 800});
await page.evaluate(() => {
return Promise.resolve(window.scrollTo(0,document.body.scrollHeight));
});
await page.waitFor(1000);
await page.evaluate(() => {
var images = document.querySelectorAll('img');
function preLoad() {
var promises = [];
function loadImage(img) {
return new Promise(function(resolve,reject) {
if (img.complete) {
resolve(img)
}
img.onload = function() {
resolve(img);
};
img.onerror = function(e) {
resolve(img);
};
})
}
for (var i = 0; i < images.length; i++)
{
promises.push(loadImage(images[i]));
}
return Promise.all(promises);
}
return preLoad();
});
await page.screenshot({path: 'digg.png', fullPage: true});
browser.close();
})();
Run Code Online (Sandbox Code Playgroud)
Vav*_*off 56
有一个内置选项:
await page.goto('https://www.digg.com/', {"waitUntil" : "networkidle0"});
Run Code Online (Sandbox Code Playgroud)
networkidle0 - 当网络连接数不超过0毫秒且至少500毫秒时,请考虑完成导航
networkidle2 - 当至少500毫秒不超过2个网络连接时,考虑完成导航.
PS当然,如果您使用像Twitter这样的无限滚动单页应用程序,它将无法运行.
Dan*_*rom 11
另一种选择,实际上是评估在加载所有图像时获得回调
此选项也适用于不支持wait 选项的setContentnetworkidle0
await page.evaluate(async () => {
const selectors = Array.from(document.querySelectorAll("img"));
await Promise.all(selectors.map(img => {
if (img.complete) return;
return new Promise((resolve, reject) => {
img.addEventListener('load', resolve);
img.addEventListener('error', reject);
});
}));
})
Run Code Online (Sandbox Code Playgroud)
您可能需要考虑先使用诸如Element.scrollIntoView()延迟加载图像的方法向下滚动:
await page.goto('https://www.digg.com/', {
waitUntil: 'networkidle0', // Wait for all non-lazy loaded images to load
});
await page.evaluate(async () => {
// Scroll down to bottom of page to activate lazy loading images
document.body.scrollIntoView(false);
// Wait for all remaining lazy loading images to load
await Promise.all(Array.from(document.getElementsByTagName('img'), image => {
if (image.complete) {
return;
}
return new Promise((resolve, reject) => {
image.addEventListener('load', resolve);
image.addEventListener('error', reject);
});
}));
});
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
25727 次 |
| 最近记录: |