Cor*_*bin 1 mongodb mongodb-query aggregation-framework
我在 mongoDB 中有一个类似于下面的大数据集
我想在 MongoDB 中运行一个聚合,它相当于这个 SQL:
SELECT SUM(cores) from machines
WHERE idc='AMS' AND cluster='1' AND type='Physical';
Run Code Online (Sandbox Code Playgroud)
我如何在 MongoDB 中进行此操作?
[
{
"_id" : "55d5dc40281077b6d8af1bfa",
"hostname" : "x",
"domain" : "domain",
"description" : "VMWare ESXi 5",
"cluster" : "1",
"type" : "Physical",
"os" : "EXSi",
"idc" : "AMS",
"environment" : "DR",
"deviceclass" : "host",
"cores" : "64",
"memory" : "256",
"mounts" : [ ],
"roles" : [
"ESX-HOST"
],
"ipset" : {
"backnet" : "1"
},
"frontnet" : [ ],
"created" : "2015-09-08T07:35:03.343Z"
},
{
"_id" : "55d5dc40281077b6d8af1bfb",
"hostname" : "x",
"domain" : "domain",
"description" : "VMWare ESXi 5",
"cluster" : "1",
"type" : "Physical",
"os" : "EXSi",
"idc" : "AMS",
"environment" : "DR",
"deviceclass" : "host",
"cores" : "64",
"memory" : "256",
"mounts" : [ ],
"roles" : [
"ESX-HOST"
],
"ipset" : {
"backnet" : "1"
},
"frontnet" : [ ],
"created" : "2015-09-08T07:35:03.346Z"
}
]
Run Code Online (Sandbox Code Playgroud)
首先,您需要更新您的文档,因为cores值是字符串而不是数字。为此,我们使用“批量”操作。
var bulk = db.machines.initializeOrderedBulkOp(),
count = 0;
db.machines.find({ "cores": { "$type": 2 }}).forEach(function(doc){
var cores = parseInt(doc.cores);
bulk.find({ "_id": doc._id }).update({
"$set": { "cores": cores } })
count++;
if (count % 200 == 0){
// execute per 200 operations and re-init
bulk.execute();
bulk = db.machines.initializeOrderedBulkOp();
}
})
// clean up queues
if (count % 200 != 0)
bulk.execute();
Run Code Online (Sandbox Code Playgroud)
然后使用聚合框架,我们可以得到 的总和cores。首先,我们需要使用$match运算符过滤我们的文档,在$group阶段,我们使用$sum运算符来获取cores值的总和。
db.machines.aggregate([
{ "$match": { "idc": "AMS", "cluster": "1", "type": "Physical" }},
{ "$group": { "_id": null, "sum_cores": { "$sum": "$cores" }}}
])
Run Code Online (Sandbox Code Playgroud)
返回:
{ "_id" : null, "sum_cores" : 128 }
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
2295 次 |
| 最近记录: |