MongoDB 聚合 - SUM WHERE

Cor*_*bin 1 mongodb mongodb-query aggregation-framework

我在 mongoDB 中有一个类似于下面的大数据集

我想在 MongoDB 中运行一个聚合,它相当于这个 SQL:

SELECT SUM(cores) from machines 
WHERE idc='AMS' AND cluster='1' AND type='Physical';
Run Code Online (Sandbox Code Playgroud)

我如何在 MongoDB 中进行此操作?

[
    {
        "_id" : "55d5dc40281077b6d8af1bfa",
        "hostname" : "x",
        "domain" : "domain",
        "description" : "VMWare ESXi 5",
        "cluster" : "1",
        "type" : "Physical",
        "os" : "EXSi",
        "idc" : "AMS",
        "environment" : "DR",
        "deviceclass" : "host",
        "cores" : "64",
        "memory" : "256",
        "mounts" : [ ],
        "roles" : [
                "ESX-HOST"
        ],
        "ipset" : {
                "backnet" : "1"
        },
        "frontnet" : [ ],
        "created" : "2015-09-08T07:35:03.343Z"
    },
    {
        "_id" : "55d5dc40281077b6d8af1bfb",
        "hostname" : "x",
        "domain" : "domain",
        "description" : "VMWare ESXi 5",
        "cluster" : "1",
        "type" : "Physical",
        "os" : "EXSi",
        "idc" : "AMS",
        "environment" : "DR",
        "deviceclass" : "host",
        "cores" : "64",
        "memory" : "256",
        "mounts" : [ ],
        "roles" : [
                "ESX-HOST"
        ],
        "ipset" : {
                "backnet" : "1"
        },
        "frontnet" : [ ],
        "created" : "2015-09-08T07:35:03.346Z"
    }
]
Run Code Online (Sandbox Code Playgroud)

sty*_*ane 5

首先,您需要更新您的文档,因为cores值是字符串而不是数字。为此,我们使用“批量”操作。

var bulk = db.machines.initializeOrderedBulkOp(),
    count = 0;
db.machines.find({ "cores": { "$type": 2 }}).forEach(function(doc){
    var cores = parseInt(doc.cores); 
    bulk.find({ "_id": doc._id }).update({     
        "$set": { "cores": cores } }) 
        count++;
        if (count % 200 == 0){  
            // execute per 200 operations and re-init  
            bulk.execute();     
            bulk = db.machines.initializeOrderedBulkOp(); 
        } 
    })

// clean up queues
if (count % 200 != 0)
    bulk.execute();
Run Code Online (Sandbox Code Playgroud)

然后使用聚合框架,我们可以得到 的总和cores。首先,我们需要使用$match运算符过滤我们的文档,在$group阶段,我们使用$sum运算符来获取cores值的总和。

db.machines.aggregate([
    { "$match": { "idc": "AMS", "cluster": "1", "type": "Physical" }},
    { "$group": { "_id": null, "sum_cores": { "$sum": "$cores" }}}
])
Run Code Online (Sandbox Code Playgroud)

返回:

{ "_id" : null, "sum_cores" : 128 }
Run Code Online (Sandbox Code Playgroud)