nil*_*lse 136 javascript events phantomjs
我正在使用PhantomJS v1.4.1来加载一些网页.我没有访问他们的服务器端,我只是获得指向他们的链接.我正在使用Phantom的过时版本,因为我需要在该网页上支持Adobe Flash.
问题是许多网站正在加载他们的次要内容异步,这就是为什么Phantom的onLoadFinished回调(HTML中的onLoad的模拟)在没有任何东西仍然加载时过早发生的原因.任何人都可以建议我如何等待网页的完整加载,例如,包含广告等所有动态内容的屏幕截图?
rhu*_*cks 74
另一种方法是在执行渲染之前让PhantomJS在页面加载之后稍等一下,按照常规的rasterize.js示例,但是使用更长的超时来允许JavaScript完成加载其他资源:
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 1000); // Change timeout as required to allow sufficient time
}
});
Run Code Online (Sandbox Code Playgroud)
Mat*_*iuk 51
我宁愿定期检查document.readyState状态(https://developer.mozilla.org/en-US/docs/Web/API/document.readyState).虽然这种方法有点笨拙,但您可以确定内部onPageReady函数是使用完全加载的文档.
var page = require("webpage").create(),
url = "http://example.com/index.html";
function onPageReady() {
var htmlContent = page.evaluate(function () {
return document.documentElement.outerHTML;
});
console.log(htmlContent);
phantom.exit();
}
page.open(url, function (status) {
function checkReadyState() {
setTimeout(function () {
var readyState = page.evaluate(function () {
return document.readyState;
});
if ("complete" === readyState) {
onPageReady();
} else {
checkReadyState();
}
});
}
checkReadyState();
});
Run Code Online (Sandbox Code Playgroud)
补充说明:
当执行因某些随机原因延长时,使用嵌套setTimeout而不是setInterval防止checkReadyState"重叠"和竞争条件.setTimeout默认延迟为4毫秒(/sf/answers/250605981/),因此主动轮询不会对程序性能产生严重影响.
document.readyState === "complete"表示文档已完全加载所有资源(https://html.spec.whatwg.org/multipage/dom.html#current-document-readiness).
rhu*_*cks 20
您可以尝试waitfor和rasterize示例的组合:
/**
* See https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
*
* Wait until the test condition is true or a timeout occurs. Useful for waiting
* on a server response or for a ui change (fadeIn, etc.) to occur.
*
* @param testFx javascript condition that evaluates to a boolean,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* @param onReady what to do when testFx condition is fulfilled,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* @param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used.
*/
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()), //< defensive code
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};
var page = require('webpage').create(), system = require('system'), address, output, size;
if (system.args.length < 3 || system.args.length > 5) {
console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
phantom.exit(1);
} else {
address = system.args[1];
output = system.args[2];
if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
size = system.args[3].split('*');
page.paperSize = size.length === 2 ? {
width : size[0],
height : size[1],
margin : '0px'
} : {
format : system.args[3],
orientation : 'portrait',
margin : {
left : "5mm",
top : "8mm",
right : "5mm",
bottom : "9mm"
}
};
}
if (system.args.length > 4) {
page.zoomFactor = system.args[4];
}
var resources = [];
page.onResourceRequested = function(request) {
resources[request.id] = request.stage;
};
page.onResourceReceived = function(response) {
resources[response.id] = response.stage;
};
page.open(address, function(status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
waitFor(function() {
// Check in the page if a specific element is now visible
for ( var i = 1; i < resources.length; ++i) {
if (resources[i] != 'end') {
return false;
}
}
return true;
}, function() {
page.render(output);
phantom.exit();
}, 10000);
}
});
}
Run Code Online (Sandbox Code Playgroud)
Dav*_*ave 14
这是一个等待所有资源请求完成的解决方案.完成后,它会将页面内容记录到控制台并生成呈现页面的屏幕截图.
虽然这个解决方案可以作为一个很好的起点,但我发现它失败了,所以它绝对不是一个完整的解决方案!
我运气不好document.readyState.
我被影响waitfor.js信中例如phantomjs例子页面.
var system = require('system');
var webPage = require('webpage');
var page = webPage.create();
var url = system.args[1];
page.viewportSize = {
width: 1280,
height: 720
};
var requestsArray = [];
page.onResourceRequested = function(requestData, networkRequest) {
requestsArray.push(requestData.id);
};
page.onResourceReceived = function(response) {
var index = requestsArray.indexOf(response.id);
requestsArray.splice(index, 1);
};
page.open(url, function(status) {
var interval = setInterval(function () {
if (requestsArray.length === 0) {
clearInterval(interval);
var content = page.content;
console.log(content);
page.render('yourLoadedPage.png');
phantom.exit();
}
}, 500);
});
Run Code Online (Sandbox Code Playgroud)
Sup*_*upr 13
也许你可以使用onResourceRequested和onResourceReceived回调来检测异步加载.以下是从文档中使用这些回调的示例:
var page = require('webpage').create();
page.onResourceRequested = function (request) {
console.log('Request ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function (response) {
console.log('Receive ' + JSON.stringify(response, undefined, 4));
};
page.open(url);
Run Code Online (Sandbox Code Playgroud)
此外,您可以查看examples/netsniff.js一个工作示例.
小智 13
在我的程序中,我使用一些逻辑来判断它是否是onload:看着它的网络请求,如果在过去的200ms没有新的请求,我会把它处理为onload.
在onLoadFinish()之后使用它.
function onLoadComplete(page, callback){
var waiting = []; // request id
var interval = 200; //ms time waiting new request
var timer = setTimeout( timeout, interval);
var max_retry = 3; //
var counter_retry = 0;
function timeout(){
if(waiting.length && counter_retry < max_retry){
timer = setTimeout( timeout, interval);
counter_retry++;
return;
}else{
try{
callback(null, page);
}catch(e){}
}
}
//for debug, log time cost
var tlogger = {};
bindEvent(page, 'request', function(req){
waiting.push(req.id);
});
bindEvent(page, 'receive', function (res) {
var cT = res.contentType;
if(!cT){
console.log('[contentType] ', cT, ' [url] ', res.url);
}
if(!cT) return remove(res.id);
if(cT.indexOf('application') * cT.indexOf('text') != 0) return remove(res.id);
if (res.stage === 'start') {
console.log('!!received start: ', res.id);
//console.log( JSON.stringify(res) );
tlogger[res.id] = new Date();
}else if (res.stage === 'end') {
console.log('!!received end: ', res.id, (new Date() - tlogger[res.id]) );
//console.log( JSON.stringify(res) );
remove(res.id);
clearTimeout(timer);
timer = setTimeout(timeout, interval);
}
});
bindEvent(page, 'error', function(err){
remove(err.id);
if(waiting.length === 0){
counter_retry = 0;
}
});
function remove(id){
var i = waiting.indexOf( id );
if(i < 0){
return;
}else{
waiting.splice(i,1);
}
}
function bindEvent(page, evt, cb){
switch(evt){
case 'request':
page.onResourceRequested = cb;
break;
case 'receive':
page.onResourceReceived = cb;
break;
case 'error':
page.onResourceError = cb;
break;
case 'timeout':
page.onResourceTimeout = cb;
break;
}
}
}
Run Code Online (Sandbox Code Playgroud)
Bra*_*odd 11
我发现这种方法在某些情况下很有用:
page.onConsoleMessage(function(msg) {
// do something e.g. page.render
});
Run Code Online (Sandbox Code Playgroud)
如果您拥有该页面,则将一些脚本放入其中:
<script>
window.onload = function(){
console.log('page loaded');
}
</script>
Run Code Online (Sandbox Code Playgroud)
我发现这个解决方案在 NodeJS 应用程序中很有用。我只是在绝望的情况下使用它,因为它会启动超时以等待整个页面加载。
第二个参数是回调函数,一旦响应准备好将被调用。
phantom = require('phantom');
var fullLoad = function(anUrl, callbackDone) {
phantom.create(function (ph) {
ph.createPage(function (page) {
page.open(anUrl, function (status) {
if (status !== 'success') {
console.error("pahtom: error opening " + anUrl, status);
ph.exit();
} else {
// timeOut
global.setTimeout(function () {
page.evaluate(function () {
return document.documentElement.innerHTML;
}, function (result) {
ph.exit(); // EXTREMLY IMPORTANT
callbackDone(result); // callback
});
}, 5000);
}
});
});
});
}
var callback = function(htmlBody) {
// do smth with the htmlBody
}
fullLoad('your/url/', callback);
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
139640 次 |
| 最近记录: |