免责声明:我对Scrapy很新.
明确地提出我的问题:如何从页面上的链接检索Item属性并将结果返回到同一个Item中?
给出以下示例蜘蛛:
class SiteSpider(Spider):
site_loader = SiteLoader
...
def parse(self, response):
item = Place()
sel = Selector(response)
bl = self.site_loader(item=item, selector=sel)
bl.add_value('domain', self.parent_domain)
bl.add_value('origin', response.url)
for place_property in item.fields:
parse_xpath = self.template.get(place_property)
# parse_xpath will look like either:
# '//path/to/property/text()'
# or
# {'url': '//a[@id="Location"]/@href',
# 'xpath': '//div[@class="directions"]/span[@class="address"]/text()'}
if isinstance(parse_xpath, dict): # place_property is at a URL
url = sel.xpath(parse_xpath['url_elem']).extract()
yield Request(url, callback=self.get_url_property,
meta={'loader': bl, 'parse_xpath': parse_xpath,
'place_property': place_property})
else: # parse_xpath is just an xpath; process normally
bl.add_xpath(place_property, …Run Code Online (Sandbox Code Playgroud) 考虑以下Elasticsearch(v5.4)对象("奖励"doc类型):
{
"name": "Gold 1000",
"date": "2017-06-01T16:43:00.000+00:00",
"recipient": {
"name": "James Conroy",
"date_of_birth": "1991-05-30"
}
}
Run Code Online (Sandbox Code Playgroud)
映射类型都award.date和award.recipient.date_of_birth是"日期".
我想进行范围汇总,以获得该奖项的获奖者年龄范围列表("18岁以下","18-24岁","24-30岁","30岁以上").奖励.我尝试了以下聚合查询:
{
"size": 0,
"query": {"match_all": {}},
"aggs": {
"recipients": {
"nested": {
"path": "recipient"
},
"aggs": {
"age_ranges": {
"range": {
"script": {
"inline": "doc['date'].date - doc['recipient.date_of_birth'].date"
},
"keyed": true,
"ranges": [{
"key": "Under 18",
"from": 0,
"to": 18
}, {
"key": "18-24",
"from": 18,
"to": 24
}, {
"key": "24-30",
"from": 24,
"to": …Run Code Online (Sandbox Code Playgroud) datetime date elasticsearch elasticsearch-5 elasticsearch-painless