如您在下面的示例代码中所看到的,我将Puppeteer与Node中的一组工作人员一起使用,以通过给定的URL运行多个网站截图请求:
const cluster = require('cluster');
const express = require('express');
const bodyParser = require('body-parser');
const puppeteer = require('puppeteer');
async function getScreenshot(domain) {
let screenshot;
const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] });
const page = await browser.newPage();
try {
await page.goto('http://' + domain + '/', { timeout: 60000, waitUntil: 'networkidle2' });
} catch (error) {
try {
await page.goto('http://' + domain + '/', { timeout: 120000, waitUntil: 'networkidle2' });
screenshot = await page.screenshot({ type: 'png', encoding: 'base64' });
} …Run Code Online (Sandbox Code Playgroud) node.js web-scraping node-cluster google-chrome-headless puppeteer