Tom*_*Tom 7 javascript python twitter selenium beautifulsoup
import requests\nfrom bs4 import BeautifulSoup\nfrom selenium import webdriver\nfrom selenium.webdriver.common.keys import Keys\nfrom selenium.webdriver.common.by import By\nfrom selenium.webdriver.support.ui import WebDriverWait\nfrom selenium.webdriver.support import expected_conditions as EC\nfrom selenium.webdriver.chrome.options import Options\nimport time\n\ndef checkLinkedIn(command):\n url = f"https://www.linkedin.com/in/{command}"\n path = "C:\\Program Files (x86)\\chromedriver.exe"\n options = Options()\n options.add_argument("--headless")\n driver = webdriver.Chrome(path, options=options)\n driver.get(url)\n soup = BeautifulSoup(driver.page_source, 'html.parser')\n time.sleep(2)\n driver.quit()\n name = soup.find("h1", attrs={"class": "top-card-layout__title"})\n if name:\n print("LinkedIn profile found")\n print(url)\n else:\n print("No LinkedIn profile found")\n\ndef checkTwitter(command):\n url = f"https://www.twitter.com/{command}"\n path = "C:\\Program Files (x86)\\chromedriver.exe"\n options = Options()\n options.add_argument("--headless")\n driver = webdriver.Chrome(path, options=options)\n driver.get(url)\n soup = BeautifulSoup(driver.page_source, 'html.parser')\n time.sleep(2)\n driver.quit()\n at_tag = soup.find("div", attrs={"dir": "ltr"})\n print(soup.text)\n if at_tag:\n print("Twitter profile found")\n print(url)\n else:\n print("No Twitter profile found")\n\nusrname = input("--> ")\n\ncheckTwitter(usrname)\nRun Code Online (Sandbox Code Playgroud)\nLinkedIn 功能有效。然而,推特上却提出了这一点:
\nJavaScript 不可用。\n我们\xe2\x80\x99 检测到 JavaScript 在此浏览器中被禁用。请启用 JavaScript 或切换到支持的浏览器以继续使用 twitter.com。您可以在我们的帮助中心查看支持的浏览器列表。
\n如何在无头 Chrome 中启用 Javascript?提前致谢。
\nRoy*_*rot 21
这可能是因为该网站检测到它是无头浏览器并禁用了某些功能。
为了解决这个问题,您可以(尽可能地)欺骗无头浏览器的身份来欺骗网站。
尝试以下选项:
from fake_useragent import UserAgent
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument("--incognito")
options.add_argument("--nogpu")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1280,1280")
options.add_argument("--no-sandbox")
options.add_argument("--enable-javascript")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
ua = UserAgent()
userAgent = ua.random
driver = webdriver.Chrome(options=options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": userAgent})
Run Code Online (Sandbox Code Playgroud)
这对我来说对一个特别顽固的网站很有用。我从许多答案中收集的选项,特别是这个:https ://stackoverflow.com/a/53040904/5339857
| 归档时间: |
|
| 查看次数: |
10139 次 |
| 最近记录: |