使用casperjs下载资源图像文件

HP.*_*HP. 5 javascript node.js phantomjs casperjs slimerjs

我阅读了这些文件,看起来你需要有slimerjs http://docs.casperjs.org/en/latest/events-filters.html才能responseData.bodypage.resource.received活动中获得.

我的用例是在页面加载时下载图像,所以我不再做往返:获取资源JSON,下载并保存任何图像文件,重新加载文件以检查图像尺寸,如果太小(图标) ) - >消除.

我想知道是否有更好的方法来做到这一点.我实际上可以evaluate img选择,但有些网站使用background-urlCSS,这很棘手.

EMX*_*EMX 0

评估可能很棘手,但这是一种可能的方法:(仅限 PhantomJS

\n\n

通过这个例子,有可能

\n\n
    \n
  1. 将与这些元素clipRects后面的特定条件匹配的图像存储到屏幕截图文件中。page.render()

  2. \n
  3. 存储匹配资源的 URL 以供后续下载\n请求

  4. \n
  5. 捕获\'src\'属性或\'background-image\'\ncss属性内的url,并尝试获取宽度和高度以用于标准匹配和捕获目的。

  6. \n
\n\n
var page = require(\'webpage\').create();\npage.onConsoleMessage = function(msg) {console.log(msg);};\nconsole.log(\'[#] I M A G E \xc2\xb7 N I N J A\');\npage.open(\'http://cartawifi.com\', function(status) {\n  var clipRectList = page.evaluate(function(pagex) {\n    // .: Captured Images : In-Memory Reference Storage :.\n    const IMAGES = {\n        \'src\':{\'indxs\':[],\'ref\':[]},\n        \'background-image\':{\'indxs\':[],\'ref\':[]},\n        \'selectors\':[]\n    }; \n    var clipRects = []; // maybe you want to take page screenshots of specific elements containing matching images\n    var capturedImages = 0; var totalElements = 0;\n    // .: Define Image Capture : Min/Max Width/Height :.\n    const minWidth = 1; const minHeight = 1;\n    const maxWidth = 9999;  const maxHeight = 9999;\n    const regxp = new RegExp(\'url\');\n    $(\'*\').each(function(index, el) { var ignore=false;\n        // search for elements with \'background-image\' css property\n        if($(el).css(\'background-image\')!=null!=null){\n            var wu = $(this).css(\'width\');\n            var width = parseFloat(wu.replace(\'px\',\'\'));\n            var hu = $(this).css(\'height\');\n            var height = parseFloat(wu.replace(\'px\',\'\'));\n            var src = $(el).css(\'background-image\');\n            var group = "background-image"\n            if(!src.match(regxp)){ignore=true;}else{\n                //remove the keep the contents inside the \'url()\' string\'\n                src = (($(el).css(\'background-image\')).slice(4));\n                src = src.substring(0, src.length - 1);\n            }\n        }\n        // search for elements with \'src\' html attribute\n        else if($(el).attr(\'src\')!=null){\n            var width = $(this).get(0).naturalWidth; \n            var height = $(this).get(0).naturalHeight;\n            var group = "src" \n            var src = $(el).attr(\'src\');\n        }\n        //---------------------------------------------------------\n        if(width>=minWidth&&height>=minWidth&&\n           width<=maxWidth&&height<=maxWidth&&\n           !ignore){\n                IMAGES[group].indxs.push(index); \n                IMAGES[group].ref.push(src); \n                IMAGES.selectors.push(this); \n                capturedImages++;\n                console.log("  [captured] :",group,width,height,src);\n                //:store clipRect for this element\n                var clipR = $.extend({},$(el).offset(),{width: $(el).offsetWidth,height: $(el).offsetHeight});\n                console.log("    (clipRect)",JSON.stringify(clipR));\n                clipRects.push(clipR);\n        }\n        totalElements++;\n    });\n    // report information :\n    console.log(\'[i] Total Elements Parsed : \',totalElements);\n    console.log(\'[*] Total Images Captured : \',capturedImages);\n    console.log(\'     >              [src] : \',IMAGES[\'src\'].indxs.length);\n    console.log(\'     > [background-image] : \',IMAGES[\'background-image\'].indxs.length);\n  });\n  console.log(\'[!] TO-DO : STORE CAPTURED IMAGES AS FILES\');\n  phantom.exit();\n});\n
Run Code Online (Sandbox Code Playgroud)\n