pes*_*mer 5 xpath jquery-selectors node.js puppeteer
我正在尝试这样做:
打开谷歌
搜索“伦敦的酒店”
点击“查看3810家酒店”
---新页面打开---
---新页面打开---
---新页面打开---
我已成功完成步骤 1-4,但我陷入了步骤 5,因为 puppeteer 不会自动点击价格。事实上,第 5 步没有任何反应。我了解到这是因为“Prices”选择器不断变化,这就是我应该使用 page.$() 的原因。然而,这也没有帮助。
这是我的代码:
const puppeteer = require("puppeteer");
(async () => {
try {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
//google.com
await page.goto("https://google.com");
await page.type("input.gLFyf.gsfi", "hotels in london");
await page.keyboard.press("Enter");
//search results
await page.waitForSelector(
"#rso > div:nth-child(2) > div > div > div > g-more-link > a > div"
);
await page.click(
"#rso > div:nth-child(2) > div > div > div > g-more-link > a > div"
);
//list of hotels
await page.waitForSelector(
"#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div.lteUWc > div > c-wiz > div > div.gpcwnc > div.cGQUT > main > div > div.Hkwcrd.Sy8xcb.XBQ4u > c-wiz > div.J6e2Vc > div > div > span > span"
);
//click on view prices
await page.click(
"#yDmH0d > c-wiz.zQTmif.SSPGKf > div > div.lteUWc > div > c-wiz > div > div.gpcwnc > div.cGQUT > main > div > div.Hkwcrd.Sy8xcb.XBQ4u > c-wiz > div.l5cSPd > c-wiz:nth-child(5) > div > div > div > div.kCsInf.ZJqrAd.qiy8jf > div > div.TPQEac.RCpQOe > a > button > span"
);
//get list of open tabs (does not include new tab)
const pages = await browser.pages();
//prints 2 although there are 3 tabs
console.log(pages.length);
// get the new page
const page2 = pages[pages.length - 1];
const url = await page2.evaluate(() => window.location.href);
//prints url of page with list of hotels
console.log(url);
//takes screen shot of penultimate tab instead of new tab
await page2.screenshot({ path: "sample.png" });
let providers = 0;
await browser.close();
return providers;
} catch (err) {
console.error(err);
}
})()
.then((resolvedValue) => {
console.log(resolvedValue);
})
.catch((rejectedValue) => {
console.log(rejectedValue);
});
Run Code Online (Sandbox Code Playgroud)
通过导航,我可以切换到新选项卡,但出现超时错误:
TimeoutError:
Navigation timeout of 30000 ms exceeded
at C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\LifecycleWatcher.js:100:111
at async FrameManager.waitForFrameNavigation (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\FrameManager.js:107:23)
at async Frame.waitForNavigation (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\FrameManager.js:298:16)
at async Page.waitForNavigation (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\Page.js:560:16)
at async C:\Users\Ayesha\Desktop\web_scraping_practice\loop.js:28:5
-- ASYNC --
at Frame.<anonymous> (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\helper.js:116:19)
at Page.waitForNavigation (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\Page.js:560:53)
at Page.<anonymous> (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\helper.js:117:27)
at C:\Users\Ayesha\Desktop\web_scraping_practice\loop.js:28:16
at processTicksAndRejections (internal/process/task_queues.js:97:5) {
name: 'TimeoutError'
}
undefined
-- ASYNC --
at Frame.<anonymous> (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\helper.js:116:19)
at Page.waitForNavigation (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\Page.js:560:53)
at Page.<anonymous> (C:\Users\Ayesha\Desktop\web_scraping_practice\node_modules\puppeteer\lib\helper.js:117:27)
at C:\Users\Ayesha\Desktop\web_scraping_practice\loop.js:28:16
at processTicksAndRejections (internal/process/task_queues.js:97:5) {
name: 'TimeoutError'
}
Run Code Online (Sandbox Code Playgroud)
更新: 修改 page.waitForNavigation() 后超时错误消失,如下所示:
//click on view prices
const btn2 = await page.$x('//span[contains(text(),"Learn more")]');
await Promise.all([
page.waitForNavigation({ waitUntil: "load" }),
btn2[0].click(),
]);
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
6823 次 |
| 最近记录: |