在 puppeteer 站点内获取数据

Gre*_*ggy 5 web-crawler node.js fetch-api puppeteer

我在从外部服务获取数据时遇到奇怪的问题。我有一个简单的脚本来从 API 打印一些数据:

fetch("http://sip.ztm.kielce.pl/Home/CNR_GetVehicles?r=0W&d=&nb=", {"credentials":"include","headers":{"accept":"application/xml, text/xml, */*; q=0.01","accept-language":"pl;q=1.3592","x-requested-with":"XMLHttpRequest"},"referrer":"http://sip.ztm.kielce.pl/","referrerPolicy":"no-referrer-when-downgrade","body":null,"method":"GET","mode":"cors"})
.then(response => {
        return response.text();
    }).then(function(data) {
    console.log(data); // this will be a string
});
Run Code Online (Sandbox Code Playgroud)

该脚本有效,但仅当我位于http://sip.ztm.kielce.pl时。所以我计划使用 puppeteer 从这个站点上下文中评估这个脚本。

所以我写了 Node.JS 代码:

const puppeteer = require('puppeteer');

(async () => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.on('console', (log) => console[log._type](log._text));

    await page.goto('http://sip.ztm.kielce.pl');
    await page.waitFor(5000);

    await page.evaluate(() => {
        console.log('test');
        fetch("http://sip.ztm.kielce.pl/Home/CNR_GetVehicles?r=0W&d=&nb=", {"credentials":"include","headers":{"accept":"application/xml, text/xml, */*; q=0.01","accept-language":"pl;q=1.3592","x-requested-with":"XMLHttpRequest"},"referrer":"http://sip.ztm.kielce.pl/","referrerPolicy":"no-referrer-when-downgrade","body":null,"method":"GET","mode":"cors"})
        .then(response => {
                return response.text();
            }).then(function(data) {
            console.log(data); // this will be a string
        });
    });
    await browser.close();
})();
Run Code Online (Sandbox Code Playgroud)

但我得到的回应不是“正常”方式。你有什么想法吗?

  • 由于 API 安全性,此代码可能无效,在第一个版本中也是如此。我正在从 Chrome 开发者工具中的网络选项卡中获取“有效”代码。

小智 4

使用相对网址,如下所示:

/Home/CNR_GetVehicles?r=0W&d=&nb= 
Run Code Online (Sandbox Code Playgroud)

而不是完整路径

/Home/CNR_GetVehicles?r=0W&d=&nb= 
Run Code Online (Sandbox Code Playgroud)

或者您可以直接访问该网址

const puppeteer = require('puppeteer');

(async () => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.on('console', (log) => console[log._type](log._text));

    await page.goto('http://sip.ztm.kielce.pl', {
        waitUntil: "networkidle2",
    })

    await page.evaluate(() => {
        console.log('test');
        fetch("/Home/CNR_GetVehicles?r=0W&d=&nb=", {"credentials":"include","headers":{"accept":"application/xml, text/xml, */*; q=0.01","accept-language":"pl;q=1.3592","x-requested-with":"XMLHttpRequest"},"referrer":"http://sip.ztm.kielce.pl/","referrerPolicy":"no-referrer-when-downgrade","body":null,"method":"GET","mode":"cors"})
        .then(response => {
                return response.text();
            }).then(function(data) {
            console.log(data); // this will be a string
        });
    });
    await browser.close();
})();
Run Code Online (Sandbox Code Playgroud)