尝试屏幕抓取网站而不必在python脚本中启动实际的浏览器实例(使用Selenium).我可以用Chrome或Firefox做到这一点 - 我已经尝试了它并且它有效 - 但我想使用PhantomJS所以它是无头的.
代码如下所示:
import sys
import traceback
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53 "
"(KHTML, like Gecko) Chrome/15.0.87"
)
try:
# Choose our browser
browser = webdriver.PhantomJS(desired_capabilities=dcap)
#browser = webdriver.PhantomJS()
#browser = webdriver.Firefox()
#browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver")
# Go to the login page
browser.get("https://www.whatever.com")
# For debug, see what we got back
html_source = browser.page_source
with open('out.html', 'w') as …Run Code Online (Sandbox Code Playgroud)