
我试过复制和粘贴网站的元素(xpath),但没有返回任何结果。
可以scrapy抓取iframe内的数据吗?如果是,如何做,如果不是,还应该做哪些其他事情?谢谢!
rules = (Rule (SgmlLinkExtractor(deny = path_deny_base, restrict_xpaths=('*'))
, callback="parse", follow= True),
)
def parse(self, response):
yield(Request(url, callback = self.parse_iframe))
def parse_iframe(self, response):
#your code to scrape the content from iframe
#def parse_items(self, response):
hxs = HtmlXPathSelector(response)
titles = hxs.select('//div[2]/h1')
#//div[2]/h1
linker = hxs.select('//div[2]/div[10]/a[1]')
#//div[2]/div[10]/a[1]
loc_Con = hxs.select('//div[2]/div[1]/div[2]/span/span/span[1]') #//div[2]/div[1]/div[2]/span/span/span[1]
loc_Reg = hxs.select('//div[2]/div[1]/div[2]/span/span/span[2]') #/div[2]/div[1]/div[2]/span/span/span[2]
loc_Loc = hxs.select('//div[2]/div[1]/div[2]/span/span/span[3]') #/div[2]/div[1]/div[2]/span/span/span[3]
items = []
for titles in titles:
item = CraigslistSampleItem()
#item ["job_id"] = id.select('text()').extract()[0].strip()
item ["title"] = map(unicode.strip, titles.select('text()').extract()) #ok
item ["link"] …Run Code Online (Sandbox Code Playgroud) 我怎样才能删除[u'\n\n\n result here \n\n\n']
并得到结果[u'result here']...我正在使用 scrapy
def parse_items(self, response):
str = ""
hxs = HtmlXPathSelector(response)
for titles in titles:
item = CraigslistSampleItem()
item ["job_id"] = (id.select('text()').extract() #ok
items.append(item)
return(items)
end
Run Code Online (Sandbox Code Playgroud)
谁能帮我?