rob*_*txt 3 python selenium web-scraping python-3.x selenium-webdriver
我用 python 结合 selenium 编写了一个脚本来登录一个站点,然后将 cookie 传输到 ,driver以便requests我可以继续使用它requests来进行进一步的活动。
我用item = soup.select_one("div[class^='gravatar-wrapper-']").get("title")这一行来检查脚本是否可以在一切完成后获取我的用户名。
这是我到目前为止的尝试:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
url = "https://stackoverflow.com/users/login"
driver = webdriver.Chrome()
driver.get(url)
driver.find_element_by_css_selector("#email").send_keys("your_username")
driver.find_element_by_css_selector("#password").send_keys("your_password")
driver.find_element_by_css_selector("#submit-button").click()
driver_cookies = driver.get_cookies()
c = {c['name']:c['value'] for c in driver_cookies}
res = requests.get(driver.current_url,cookies=c)
soup = BeautifulSoup(res.text,"lxml")
item = soup.select_one("div[class^='gravatar-wrapper-']").get("title")
print(item)
driver.quit()
Run Code Online (Sandbox Code Playgroud)
当我运行脚本时,它找不到用户名并给出 None 作为输出。
如何在使用 selenium 登录后在selenium和之间传递 cookierequests以便进行抓取?requests
你已经走在正确的轨道上了。您现在需要做的就是让脚本稍等片刻以加载 cookie。您可以通过以下方式获得响应:
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
url = "https://stackoverflow.com/users/login"
with webdriver.Chrome() as driver:
driver.get(url)
driver.find_element_by_css_selector("#email").send_keys("your_username")
driver.find_element_by_css_selector("#password").send_keys("your_password")
driver.find_element_by_css_selector("#submit-button").click()
time.sleep(5) #This is the fix
driver_cookies = driver.get_cookies()
c = {c['name']:c['value'] for c in driver_cookies}
res = requests.get(driver.current_url,cookies=c)
soup = BeautifulSoup(res.text,"lxml")
item = soup.select_one("div[class^='gravatar-wrapper-']").get("title")
print(item)
Run Code Online (Sandbox Code Playgroud)