我想在这里抓住这个网站:
但是,它需要我向下滚动才能收集其他数据.我不知道如何使用美丽的汤或蟒蛇向下滚动.这里有人知道怎么样吗?
代码有点乱,但现在是.
import scrapy
from scrapy.selector import Selector
from testtest.items import TesttestItem
import datetime
from selenium import webdriver
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
import re
import time
class MLStripper(HTMLParser):
class MySpider(scrapy.Spider):
name = "A1Locker"
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
allowed_domains = ['https://www.a1lockerrental.com']
start_urls = ['http://www.a1lockerrental.com/self-storage/mo/st-
louis/4427-meramec-bottom-rd-facility/unit-sizes-prices#/units?
category=all']
def parse(self, response):
url='http://www.a1lockerrental.com/self-storage/mo/st-
louis/4427-meramec-bottom-rd-facility/unit-sizes-prices#/units?
category=Small'
driver = webdriver.Firefox()
driver.get(url)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
url2='http://www.a1lockerrental.com/self-storage/mo/st-louis/4427-
meramec-bottom-rd-facility/unit-sizes-prices#/units?category=Medium'
driver2 = webdriver.Firefox()
driver2.get(url2)
html2 = driver.page_source …Run Code Online (Sandbox Code Playgroud)