免责声明:我对Scrapy很新.
明确地提出我的问题:如何从页面上的链接检索Item属性并将结果返回到同一个Item中?
给出以下示例蜘蛛:
class SiteSpider(Spider):
site_loader = SiteLoader
...
def parse(self, response):
item = Place()
sel = Selector(response)
bl = self.site_loader(item=item, selector=sel)
bl.add_value('domain', self.parent_domain)
bl.add_value('origin', response.url)
for place_property in item.fields:
parse_xpath = self.template.get(place_property)
# parse_xpath will look like either:
# '//path/to/property/text()'
# or
# {'url': '//a[@id="Location"]/@href',
# 'xpath': '//div[@class="directions"]/span[@class="address"]/text()'}
if isinstance(parse_xpath, dict): # place_property is at a URL
url = sel.xpath(parse_xpath['url_elem']).extract()
yield Request(url, callback=self.get_url_property,
meta={'loader': bl, 'parse_xpath': parse_xpath,
'place_property': place_property})
else: # parse_xpath is just an xpath; process normally
bl.add_xpath(place_property, …Run Code Online (Sandbox Code Playgroud)