我正在做一个网页抓取项目,遇到了以下错误。
requests.exceptions.MissingSchema:无效的 URL 'h':未提供架构。也许你的意思是http://h?
下面是我的代码。我从 html 表中检索所有链接,并按预期打印出来。但是当我尝试使用 request.get 遍历它们(链接)时,我得到了上面的错误。
from bs4 import BeautifulSoup
import requests
import unicodedata
from pandas import DataFrame
page = requests.get("http://properties.kimcorealty.com/property/output/find/search4/view:list/")
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table')
for ref in table.find_all('a', href=True):
links = (ref['href'])
print (links)
for link in links:
page = requests.get(link)
soup = BeautifulSoup(page.content, 'html.parser')
table = []
# Find all the divs we need in one go.
divs = soup.find_all('div', {'id':['units_box_1', 'units_box_2', 'units_box_3']})
for div in divs:
# find all …Run Code Online (Sandbox Code Playgroud)