我正在尝试抓取 Booking.Com。蜘蛛打开和关闭而不打开和抓取 url。[输出][1] [1]:https : //i.stack.imgur.com/9hDt6.png 我是 python 和 Scrapy 的新手。这是我到目前为止编写的代码。请指出我做错了什么。
import scrapy
import urllib
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.item import Item
from scrapy.loader import ItemLoader
from CinemaScraper.items import CinemascraperItem
class trip(CrawlSpider):
name="tripadvisor"
def start_requests(self):
urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
reviewsurl = response.xpath('//a[@class="show_all_reviews_btn"]/@href')
url = response.urljoin(reviewsurl[0].extract())
self.pageNumber = 1
return scrapy.Request(url, callback=self.parse_reviews)
def parse_reviews(self, response):
for rev in response.xpath('//li[starts-with(@class,"review_item")]'):
item =CinemascraperItem()
#sometimes …Run Code Online (Sandbox Code Playgroud)