MongoDB $ redact来过滤掉一些数组元素

Ele*_*ena 8 mongodb mongodb-query aggregation-framework

我正在尝试对样本bios集合http://docs.mongodb.org/manual/reference/bios-example-collection/进行查询:

在获得图灵奖之前,检索他们收到的所有人及其奖励.

我想出了这个问题:

db.bios.aggregate([
    {$match: {"awards.award" : "Turing Award"}},
    {$project: {"award1": "$awards", "award2": "$awards", "first_name": "$name.first", "last_name": "$name.last"}},
    {$unwind: "$award1"},
    {$match: {"award1.award" : "Turing Award"}},
    {$unwind: "$award2"},
    {$redact: {
        $cond: {
           if: { $eq: [ { $gt: [ "$award1.year", "$award2.year"] }, true]},
           then: "$$KEEP",
           else: "$$PRUNE"
           }
        }
    }
])
Run Code Online (Sandbox Code Playgroud)

这就是答案:

/* 0 */
{
    "result" : [ 
    {
        "_id" : 1,
        "award1" : {
            "award" : "Turing Award",
            "year" : 1977,
            "by" : "ACM"
        },
        "award2" : {
            "award" : "W.W. McDowell Award",
            "year" : 1967,
            "by" : "IEEE Computer Society"
        },
        "first_name" : "John",
        "last_name" : "Backus"
    }, 
    {
        "_id" : 1,
        "award1" : {
            "award" : "Turing Award",
            "year" : 1977,
            "by" : "ACM"
        },
        "award2" : {
            "award" : "National Medal of Science",
            "year" : 1975,
            "by" : "National Science Foundation"
        },
        "first_name" : "John",
        "last_name" : "Backus"
    }, 
    {
        "_id" : 4,
        "award1" : {
            "award" : "Turing Award",
            "year" : 2001,
            "by" : "ACM"
        },
        "award2" : {
            "award" : "Rosing Prize",
            "year" : 1999,
            "by" : "Norwegian Data Association"
        },
        "first_name" : "Kristen",
        "last_name" : "Nygaard"
    }, 
    {
        "_id" : 5,
        "award1" : {
            "award" : "Turing Award",
            "year" : 2001,
            "by" : "ACM"
        },
        "award2" : {
            "award" : "Rosing Prize",
            "year" : 1999,
            "by" : "Norwegian Data Association"
        },
        "first_name" : "Ole-Johan",
        "last_name" : "Dahl"
    }
],
"ok" : 1
}
Run Code Online (Sandbox Code Playgroud)

我不喜欢这个解决方案是我放松$award2.相反,我很乐意将award2作为阵列保留,并且只删除奖励后获得的奖励1.因此,例如,John Backus的答案应该是:

{
    "_id" : 1,
    "first_name" : "John",
    "last_name" : "Backus",
    "award1" : {
        "award" : "Turing Award",
        "year" : 1977,
        "by" : "ACM"
    },
    "award2" : [ 
        {
            "award" : "W.W. McDowell Award",
            "year" : 1967,
            "by" : "IEEE Computer Society"
        }, 
        {
            "award" : "National Medal of Science",
            "year" : 1975,
            "by" : "National Science Foundation"
        }
    ]
}
Run Code Online (Sandbox Code Playgroud)

是否可以在$redact不做的情况下实现它$unwind: "$award2"

Bla*_*ven 5

如果您将文档的原始状态作为示例包含在您的问题中,这可能会更有帮助,因为这清楚地显示了"您来自哪里",然后是"您想要去的地方"作为目标除了给定的所需输出外.

这只是一个提示,但似乎你开始使用这样的文档:

{
    "_id" : 1,
    "name": { 
        "first" : "John",
        "last" : "Backus"
    },
    "awards" : [
        {
            "award" : "W.W. McDowell Award",
            "year" : 1967,
            "by" : "IEEE Computer Society"
        }, 
        {
            "award" : "National Medal of Science",
            "year" : 1975,
            "by" : "National Science Foundation"
        },
        { 
            "award" : "Turing Award",
            "year" : 1977,
            "by" : "ACM"
        },
        {
            "award" : "Some other award",
            "year" : 1979,
            "by" : "Someone Else"
        }
    ]
}
Run Code Online (Sandbox Code Playgroud)

所以这里真正的要点是,虽然你可能已经到达$redact这里(并且它比使用$project逻辑条件然后$match用来过滤那个逻辑匹配更好)这可能不是你想要的比较的最佳工具在这里

在继续之前,我只想指出这里的主要问题$redact.无论你在这里做什么,逻辑(没有展开)基本上是"直接"比较$$DESCEND,以便在任何级别上处理"年"值的数组元素.

该递归将使"award1"条件无效,因为它具有相同的字段名称.即使重命名该字段也会杀死逻辑,因为缺少的预测值不会大于测试值.

简而言之,$redact由于你不能用它所适用的逻辑说"只从这里拿走" ,因此被排除在外.

替代方法是使用$map$setDifference过滤数组中的内容,如下所示:

db.bios.aggregate([
    { "$match": { "awards.award": "Turing Award" } },
    { "$project": {
        "first_name": "$name.first",
        "last_name": "$name.last",
        "award1": { "$setDifference": [
            { "$map": {
                "input": "$awards",
                "as": "a",
                "in": { "$cond": [
                    { "$eq": [ "$$a.award", "Turing Award" ] },
                    "$$a",
                    false
                ]}
            }},
            [false]
        ]},
        "award2": { "$setDifference": [
            { "$map": {
                "input": "$awards",
                "as": "a",
                "in": { "$cond": [
                    { "$ne": [ "$$a.award", "Turing Award" ] },
                    "$$a",
                    false
                ]}
            }},
            [false]
        ]}
    }},
    { "$unwind": "$award1" },
    { "$project": {
        "first_name": 1,
        "last_name": 1,
        "award1": 1,
        "award2": { "$setDifference": [
            { "$map": {
                "input": "$award2",
                "as": "a",
                "in": { "$cond": [
                     { "$gt": [ "$award1.year", "$$a.year" ] },
                     "$$a",
                     false
                 ]}
            }},
            [false]            
        ]}
    }}
])
Run Code Online (Sandbox Code Playgroud)

并且实际上没有"漂亮"的方式来解决$unwinditermediatary阶段的使用,甚至是第二阶段$project,因为$map (和$setDifference过滤器)返回的是"仍然是一个数组".因此,$unwind必须使"数组"成为单数(假设您的条件仅匹配1个元素)条目,以供比较使用.

试图在单个中"挤压"所有逻辑$project只会在第二个输出中产生"数组数组",因此仍然需要一些"展开",但至少这样解开(希望)1匹配并不是真的那样昂贵并保持输出清洁.


但另一件要注意的是,你根本就没有"聚合"任何东西.这只是文档操作,因此您可能会考虑直接在客户端代码中进行操作.正如这个shell示例所示:

db.bios.find(
    { "awards.award": "Turing Award" },
    { "name": 1, "awards": 1 }
).forEach(function(doc) {
    doc.first_name = doc.name.first;
    doc.last_name = doc.name.last;
    doc.award1 = doc.awards.filter(function(award) {
        return award.award == "Turing Award"
    })[0];
    doc.award2 = doc.awards.filter(function(award) {
        return doc.award1.year > award.year;
    });
    delete doc.name;
    delete doc.awards;
    printjson(doc);
})
Run Code Online (Sandbox Code Playgroud)

无论如何,两种方法都会输出相同的:

{
    "_id" : 1,
    "first_name" : "John",
    "last_name" : "Backus",
    "award1" : {
            "award" : "Turing Award",
            "year" : 1977,
            "by" : "ACM"
    },
    "award2" : [
            {
                    "award" : "W.W. McDowell Award",
                    "year" : 1967,
                    "by" : "IEEE Computer Society"
            },
            {
                    "award" : "National Medal of Science",
                    "year" : 1975,
                    "by" : "National Science Foundation"
            }
    ]
}
Run Code Online (Sandbox Code Playgroud)

这里唯一真正的区别是,通过使用.aggregate()"award2"的内容,从服务器返回时已经过滤了,这可能与进行客户端处理方法没有太大区别,除非要删除的项目包括每个文档一个相当大的列表.


对于记录,这里真正需要的对现有聚合管道的唯一更改是添加一个$group到最后将数组条目"重新组合"到一个文档中:

db.bios.aggregate([
    { "$match": { "awards.award": "Turing Award" } },
    { "$project": {
        "first_name": "$name.first", 
        "last_name": "$name.last",
        "award1": "$awards",
        "award2": "$awards"
    }},
    { "$unwind": "$award1" },
    { "$match": {"award1.award" : "Turing Award" }},
    { "$unwind": "$award2" },
    { "$redact": {
        "$cond": {
             "if": { "$gt": [ "$award1.year", "$award2.year"] },
             "then": "$$KEEP",
             "else": "$$PRUNE"
        }
    }},
    { "$group": {
        "_id": "$_id",
        "first_name": { "$first": "$first_name" },
        "last_name": { "$first": "$last_name" },
        "award1": { "$first": "$award1" },
        "award2": { "$push": "$award2" }
    }}
])
Run Code Online (Sandbox Code Playgroud)

但话说回来,所有的操作都与"阵列重复"和"放松成本"有关.因此,前两种方法中的任何一种都是您真正想要的,以避免这种情况.