如何在Mongo文件中更改嵌套字段的数据类型?

Vis*_*was 7 mongodb mongodb-query

我的Mongo结构如下,

"topProcesses" : [
        {
            "cpuUtilizationPercent" : "0.0",
            "processId" : "1",
            "memoryUtilizationPercent" : "0.1",
            "command" : "init",
            "user" : "root"
        },
        {
            "cpuUtilizationPercent" : "0.0",
            "processId" : "2",
            "memoryUtilizationPercent" : "0.0",
            "command" : "kthreadd",
            "user" : "root"
        },
        {
            "cpuUtilizationPercent" : "0.0",
            "processId" : "3",
            "memoryUtilizationPercent" : "0.0",
            "command" : "ksoftirqd/0",
            "user" : "root"
        },
        {
            "cpuUtilizationPercent" : "0.0",
            "processId" : "5",
            "memoryUtilizationPercent" : "0.0",
            "command" : "kworker/0:+",
            "user" : "root"
        },
        {
            "cpuUtilizationPercent" : "0.0",
            "processId" : "6",
            "memoryUtilizationPercent" : "0.0",
            "command" : "kworker/u3+",
            "user" : "root"
        },
        {
            "cpuUtilizationPercent" : "0.0",
            "processId" : "8",
            "memoryUtilizationPercent" : "0.0",
            "command" : "rcu_sched",
            "user" : "root"
        } 
    ]
Run Code Online (Sandbox Code Playgroud)

现在上面的文档topProcesses.cpuUtilizationPercent是字符串,我想将topProcesses.cpuUtilizationPercent数据类型更改为Float.为此,我尝试了下面,但它没有奏效

db.collectionName.find({
   "topProcesses":{"$exists":true}}).forEach(function(data){
    for(var ii=0;ii<data.topProcesses.length;ii++){
   db.collectionName.update({_id: data._id},{$set:{"topProcesses.$.cpuUtilizationPercent":parseFloat(data.topProcesses[ii].cpuUtilizationPercent)}},false,true);
  }
})
Run Code Online (Sandbox Code Playgroud)

任何人都可以帮助如何在嵌套的Mongo文档中将字符串更改为float

Nei*_*unn 7

您正在以正确的方式执行此操作,但您没有在以下查询部分中包含要匹配的数组元素.update():

db.collectionName.find({
   "topProcesses":{"$exists":true}}).forEach(function(data){
    for(var ii=0;ii<data.topProcesses.length;ii++) {
      db.collectionName.update(
         { 
             "_id": data._id, 
             "topProcesses.processId": data.topProcesses[ii].processId // corrected
         },
         {
             "$set": {
               "topProcesses.$.cpuUtilizationPercent":
                   parseFloat(data.topProcesses[ii].cpuUtilizationPercent)
             }
         }
      );
  }
})
Run Code Online (Sandbox Code Playgroud)

因此,您需要匹配数组中的某些内容,以便位置$运算符产生任何效果.

您也可以在符号中使用"index"值,因为您仍然在循环中生成它:

db.collectionName.find({
   "topProcesses":{"$exists":true}}).forEach(function(data){
    for(var ii=0;ii<data.topProcesses.length;ii++) {

      var updoc =  { 
          "$set": {}
      };

      var myKey = "topProcesses." + ii + ".cpuUtilizationPercent";
      updoc["$set"][myKey] = parseFloat(data.topProcesses[ii].cpuUtilizationPercent);

      db.collectionName.update(
         { 
             "_id": data._id
         },
         updoc
      );
  }
})
Run Code Online (Sandbox Code Playgroud)

它只使用匹配的索引,并且在没有数组元素的唯一标识符的情况下很方便.

另请注意,由于处理现有文档的性质,"upsert"或"multi"选项都不适用于此处.


正如对此的"后记"说明一样,在2.6及更高版本中考虑MongoDB的Bulk Operations API也是值得的.使用这些API方法可以显着减少客户端应用程序与数据库之间的网络流量.这里明显的改进是整体速度:

var bulk = db.collectionName.initializeOrderedBulkOp();
var counter = 0;

db.collectionName.find({
   "topProcesses":{"$exists":true}}
).forEach(function(data){
    for(var ii=0;ii<data.topProcesses.length;ii++) {

      var updoc =  { 
          "$set": {}
      };

      var myKey = "topProcesses." + ii + ".cpuUtilizationPercent";
      updoc["$set"][myKey] = parseFloat(data.topProcesses[ii].cpuUtilizationPercent);

      // queue the update
      bulk.find({ "_id": data._id }).update(updoc);
      counter++;

      // Drain and re-initialize every 1000 update statements
      if ( counter % 1000 == 0 ) {
          bulk.execute();
          bulk = db.collectionName.initializeOrderedBulkOp();
      }
  }
})

// Add the rest in the queue
if ( counter % 1000 != 0 )
    bulk.execute();
Run Code Online (Sandbox Code Playgroud)

这基本上减少了发送到服务器的操作语句数量,每1000个排队操作只发送一次.您可以使用该数字以及如何对事物进行分组,但它会以相对安全的方式显着提高速度.