Sma*_*hed 3 python httpresponse scrapy web-scraping
我正在处理scrapy中的响应并继续收到此消息.
我只给出了发生错误的片段.我正在尝试浏览不同的网页,并且需要获取该特定网页中的#个页面.所以我创建了一个响应对象,我得到了下一个按钮的href,但继续得到AttributeError: 'Response' object has no attribute 'body_as_unicode'
代码使用.
from scrapy.spiders import Spider
from scrapy.selector import Selector
from scrapy.http import Request
from scrapingtest.items import ScrapingTestingItem
from collections import OrderedDict
import json
from scrapy.selector.lxmlsel import HtmlXPathSelector
import csv
import scrapy
from scrapy.http import Response
class scrapingtestspider(Spider):
name = "scrapytesting"
allowed_domains = ["tripadvisor.in"]
# base_uri = ["tripadvisor.in"]
def start_requests(self):
site_array=["http://www.tripadvisor.in/Hotel_Review-g3581633-d2290190-Reviews-Corbett_Treetop_Riverview-Marchula_Jim_Corbett_National_Park_Uttarakhand.html"
"http://www.tripadvisor.in/Hotel_Review-g297600-d8029162-Reviews-Daman_Casa_Tesoro-Daman_Daman_and_Diu.html",
"http://www.tripadvisor.in/Hotel_Review-g304557-d2519662-Reviews-Darjeeling_Khushalaya_Sterling_Holidays_Resort-Darjeeling_West_Bengal.html",
"http://www.tripadvisor.in/Hotel_Review-g319724-d3795261-Reviews-Dharamshala_The_Sanctuary_A_Sterling_Holidays_Resort-Dharamsala_Himachal_Pradesh.html",
"http://www.tripadvisor.in/Hotel_Review-g1544623-d8029274-Reviews-Dindi_By_The_Godavari-Nalgonda_Andhra_Pradesh.html"]
for i in range(len(site_array)):
response = Response(url=site_array[i])
sites = Selector(response).xpath('//a[contains(text(), "Next")]/@href').extract()
# sites = response.selector.xpath('//a[contains(text(), "Next")]/@href').extract()
for site in sites:
yield Request(site_array[i],self.parse)
Run Code Online (Sandbox Code Playgroud)
`