将$ graphLookup的ObjectId与String匹配

Ces*_*sar 2 mongodb mongodb-query aggregation-framework

我正在尝试运行$graphLookup类似以下所示的示例:

$ graphLookup示例

目的是在给定特定记录($match在此处有注释)的情况下,通过immediateAncestors属性检索其完整的“路径” 。如您所见,这没有发生。

$convert在这里介绍了_idfrom的处理方式,因为string相信可以与_idfrom immediateAncestors记录列表(是string)“匹配” 。

因此,我确实使用不同的数据运行了另一个测试(不ObjectId涉及):

db.nodos.insert({"id":5,"name":"cinco","children":[{"id":4}]})
db.nodos.insert({"id":4,"name":"quatro","ancestors":[{"id":5}],"children":[{"id":3}]})
db.nodos.insert({"id":6,"name":"seis","children":[{"id":3}]})
db.nodos.insert({"id":1,"name":"um","children":[{"id":2}]})
db.nodos.insert({"id":2,"name":"dois","ancestors":[{"id":1}],"children":[{"id":3}]})
db.nodos.insert({"id":3,"name":"três","ancestors":[{"id":2},{"id":4},{"id":6}]})
db.nodos.insert({"id":7,"name":"sete","children":[{"id":5}]})
Run Code Online (Sandbox Code Playgroud)

和查询:

db.nodos.aggregate( [
  { $match: { "id": 3 } },
  { $graphLookup: {
      from: "nodos",
      startWith: "$ancestors.id",
      connectFromField: "ancestors.id",
      connectToField: "id",
      as: "ANCESTORS_FROM_BEGINNING"
    }
  },
  { $project: {
      "name": 1,
      "id": 1,
      "ANCESTORS_FROM_BEGINNING": "$ANCESTORS_FROM_BEGINNING.id"
    }
  }
] )
Run Code Online (Sandbox Code Playgroud)

...输出了我期望的结果(这5条记录直接或间接与id3 条记录相关):

{
    "_id" : ObjectId("5afe270fb4719112b613f1b4"),
    "id" : 3.0,
    "name" : "três",
    "ANCESTORS_FROM_BEGINNING" : [ 
        1.0, 
        4.0, 
        6.0, 
        5.0, 
        2.0
    ]
}
Run Code Online (Sandbox Code Playgroud)

问题是:有没有一种方法可以实现我一开始提到的对象?

我正在运行Mongo 3.7.9(来自官方Docker)

提前致谢!

Nei*_*unn 6

您当前正在使用MongoDB的开发版本,该版本已启用某些功能,这些功能预计将与MongoDB 4.0一起作为正式发行版发布。请注意,某些功能可能会在最终发行版之前进行更改,因此,在提交产品代码之前,生产代码应意识到这一点。

为什么$ convert在这里失败

可能最好的解释方式是查看更改后的样本,但用数组下面的ObjectId_id和“字符串” 替换:

{
  "_id" : ObjectId("5afe5763419503c46544e272"),
   "name" : "cinco",
   "children" : [ { "_id" : "5afe5763419503c46544e273" } ]
},
{
  "_id" : ObjectId("5afe5763419503c46544e273"),
  "name" : "quatro",
  "ancestors" : [ { "_id" : "5afe5763419503c46544e272" } ],
  "children" : [ { "_id" : "5afe5763419503c46544e277" } ]
},
{ 
  "_id" : ObjectId("5afe5763419503c46544e274"),
  "name" : "seis",
  "children" : [ { "_id" : "5afe5763419503c46544e277" } ]
},
{ 
  "_id" : ObjectId("5afe5763419503c46544e275"),
  "name" : "um",
  "children" : [ { "_id" : "5afe5763419503c46544e276" } ]
}
{
  "_id" : ObjectId("5afe5763419503c46544e276"),
  "name" : "dois",
  "ancestors" : [ { "_id" : "5afe5763419503c46544e275" } ],
  "children" : [ { "_id" : "5afe5763419503c46544e277" } ]
},
{ 
  "_id" : ObjectId("5afe5763419503c46544e277"),
  "name" : "três",
  "ancestors" : [
    { "_id" : "5afe5763419503c46544e273" },
    { "_id" : "5afe5763419503c46544e274" },
    { "_id" : "5afe5763419503c46544e276" }
  ]
},
{ 
  "_id" : ObjectId("5afe5764419503c46544e278"),
  "name" : "sete",
  "children" : [ { "_id" : "5afe5763419503c46544e272" } ]
}
Run Code Online (Sandbox Code Playgroud)

这应该可以大致模拟您要使用的内容。

您试图在进入阶段之前_id通过将值转换为“字符串” 。失败的原因是当您在此管道中执行了初始“内部”操作时,问题在于该选项中的源仍然是未更改的集合,因此您在后续的“查找”迭代中未获得正确的详细信息。$project$graphLookup$project$graphLookup"from"

db.strcoll.aggregate([
  { "$match": { "name": "três" } },
  { "$addFields": {
    "_id": { "$toString": "$_id" }
  }},
  { "$graphLookup": {
    "from": "strcoll",
    "startWith": "$ancestors._id",
    "connectFromField": "ancestors._id",
    "connectToField": "_id",
    "as": "ANCESTORS_FROM_BEGINNING"
  }},
  { "$project": {
    "name": 1,
    "ANCESTORS_FROM_BEGINNING": "$ANCESTORS_FROM_BEGINNING._id"
  }}
])
Run Code Online (Sandbox Code Playgroud)

因此,与“查找”不匹配:

{
        "_id" : "5afe5763419503c46544e277",
        "name" : "três",
        "ANCESTORS_FROM_BEGINNING" : [ ]
}
Run Code Online (Sandbox Code Playgroud)

“修补”问题

但是,这是核心问题,而不是失败的原因,$convert或者是别名本身。为了使它真正起作用,我们可以改而创建一个“视图”,出于输入目的将其自身表示为集合。

我将以另一种方式进行此操作,并将“字符串”转换为ObjectIdvia $toObjectId

db.createView("idview","strcoll",[
  { "$addFields": {
    "ancestors": {
      "$ifNull": [ 
        { "$map": {
          "input": "$ancestors",
          "in": { "_id": { "$toObjectId": "$$this._id" } }
        }},
        "$$REMOVE"
      ]
    },
    "children": {
      "$ifNull": [
        { "$map": {
          "input": "$children",
          "in": { "_id": { "$toObjectId": "$$this._id" } }
        }},
        "$$REMOVE"
      ]
    }
  }}
])
Run Code Online (Sandbox Code Playgroud)

但是,使用“视图”意味着已转换的值始终可以看到数据。因此下面使用视图进行聚合:

db.idview.aggregate([
  { "$match": { "name": "três" } },
  { "$graphLookup": {
    "from": "idview",
    "startWith": "$ancestors._id",
    "connectFromField": "ancestors._id",
    "connectToField": "_id",
    "as": "ANCESTORS_FROM_BEGINNING"
  }},
  { "$project": {
    "name": 1,
    "ANCESTORS_FROM_BEGINNING": "$ANCESTORS_FROM_BEGINNING._id"
  }}
])
Run Code Online (Sandbox Code Playgroud)

返回预期的输出:

{
    "_id" : ObjectId("5afe5763419503c46544e277"),
    "name" : "três",
    "ANCESTORS_FROM_BEGINNING" : [
        ObjectId("5afe5763419503c46544e275"),
        ObjectId("5afe5763419503c46544e273"),
        ObjectId("5afe5763419503c46544e274"),
        ObjectId("5afe5763419503c46544e276"),
        ObjectId("5afe5763419503c46544e272")
    ]
}
Run Code Online (Sandbox Code Playgroud)

解决问题

综上所述,真正的问题是您拥有一些“看起来像” ObjectId值的数据,实际上是有效的ObjectId,但是已将其记录为“字符串”。一切正常工作的基本问题是两个“类型”不相同,并且在尝试“联接”时导致相等不匹配。

因此,实际的修复方法仍然与以前一样,而是遍历数据并对其进行修复,以使“字符串”实际上也是ObjectId值。然后_id,它们将与它们要引用的键匹配,并且您节省了大量的存储空间,因为与ObjectId十六进制字符的字符串表示相比,此方法占用的存储空间要少得多。

使用MongoDB 4.0方法,您实际上可以“使用” "$toObjectId"来编写新的集合,几乎与我们之前创建“视图”的情况相同:

db.strcoll.aggregate([
  { "$addFields": {
    "ancestors": {
      "$ifNull": [ 
        { "$map": {
          "input": "$ancestors",
          "in": { "_id": { "$toObjectId": "$$this._id" } }
        }},
        "$$REMOVE"
      ]
    },
    "children": {
      "$ifNull": [
        { "$map": {
          "input": "$children",
          "in": { "_id": { "$toObjectId": "$$this._id" } }
        }},
        "$$REMOVE"
      ]
    }
  }}
  { "$out": "fixedcol" }
])
Run Code Online (Sandbox Code Playgroud)

或者,当然,如果您“需要”保留相同的集合,那么传统的“循环和更新”仍然与始终需要的相同:

var updates = [];

db.strcoll.find().forEach(doc => {
  var update = { '$set': {} };

  if ( doc.hasOwnProperty('children') )
    update.$set.children = doc.children.map(e => ({ _id: new ObjectId(e._id) }));
  if ( doc.hasOwnProperty('ancestors') )
    update.$set.ancestors = doc.ancestors.map(e => ({ _id: new ObjectId(e._id) }));

  updates.push({
    "updateOne": {
      "filter": { "_id": doc._id },
      update
    }
  });

  if ( updates.length > 1000 ) {
    db.strcoll.bulkWrite(updates);
    updates = [];
  }

})

if ( updates.length > 0 ) {
  db.strcoll.bulkWrite(updates);
  updates = [];
}
Run Code Online (Sandbox Code Playgroud)

由于实际上一次覆盖了整个阵列,因此实际上有点“大锤”。对于生产环境而言,这不是一个好主意,但对于本练习的目的而言,足以作为一个演示。

结论

因此,尽管MongoDB 4.0将添加这些确实非常有用的“广播”功能,但它们的实际意图并非真正适用于此类情况。实际上,如使用聚合管道“转换”为新集合所展示的,它们比大多数其他可能的用途要有用得多。

虽然我们“可以”创造“视图”的转换数据类型,使之类的东西$lookup$graphLookup工作在实际收集的数据不同,这真的只是一个“创可贴”的现实问题,因为数据类型实在不应该不同,实际上应该永久转换。

使用“视图”实际上意味着每次访问“集合”(实际上是“视图”)时,用于构建的聚合管道都需要有效地运行,这会产生实际的开销。

避免开销通常是设计目标,因此,纠正此类数据存储错误对于使您的应用程序获得真正的性能至关重要,而不仅仅是使用“蛮力”工作,这只会使事情变慢。


一个更加安全的“转换”脚本,该脚本将“匹配的”更新应用于每个数组元素。此处的代码需要NodeJS v10.x和最新版本的MongoDB节点驱动程序3.1.x:

const { MongoClient, ObjectID: ObjectId } = require('mongodb');
const EJSON = require('mongodb-extended-json');

const uri = 'mongodb://localhost/';

const log = data => console.log(EJSON.stringify(data, undefined, 2));

(async function() {

  try {

    const client = await MongoClient.connect(uri);
    let db = client.db('test');
    let coll = db.collection('strcoll');

    let fields = ["ancestors", "children"];

    let cursor = coll.find({
      $or: fields.map(f => ({ [`${f}._id`]: { "$type": "string" } }))
    }).project(fields.reduce((o,f) => ({ ...o, [f]: 1 }),{}));

    let batch = [];

    for await ( let { _id, ...doc } of cursor ) {

      let $set = {};
      let arrayFilters = [];

      for ( const f of fields ) {
        if ( doc.hasOwnProperty(f) ) {
          $set = { ...$set,
            ...doc[f].reduce((o,{ _id },i) =>
              ({ ...o, [`${f}.$[${f.substr(0,1)}${i}]._id`]: ObjectId(_id) }),
              {})
          };

          arrayFilters = [ ...arrayFilters,
            ...doc[f].map(({ _id },i) =>
              ({ [`${f.substr(0,1)}${i}._id`]: _id }))
          ];
        }
      }

      if (arrayFilters.length > 0)
        batch = [ ...batch,
          { updateOne: { filter: { _id }, update: { $set }, arrayFilters } }
        ];

      if ( batch.length > 1000 ) {
        let result = await coll.bulkWrite(batch);
        batch = [];
      }

    }

    if ( batch.length > 0 ) {
      log({ batch });
      let result = await coll.bulkWrite(batch);
      log({ result });
    }

    await client.close();

  } catch(e) {
    console.error(e)
  } finally {
    process.exit()
  }

})()
Run Code Online (Sandbox Code Playgroud)

为七个文档产生并执行如下批量操作:

{
  "updateOne": {
    "filter": {
      "_id": {
        "$oid": "5afe5763419503c46544e272"
      }
    },
    "update": {
      "$set": {
        "children.$[c0]._id": {
          "$oid": "5afe5763419503c46544e273"
        }
      }
    },
    "arrayFilters": [
      {
        "c0._id": "5afe5763419503c46544e273"
      }
    ]
  }
},
{
  "updateOne": {
    "filter": {
      "_id": {
        "$oid": "5afe5763419503c46544e273"
      }
    },
    "update": {
      "$set": {
        "ancestors.$[a0]._id": {
          "$oid": "5afe5763419503c46544e272"
        },
        "children.$[c0]._id": {
          "$oid": "5afe5763419503c46544e277"
        }
      }
    },
    "arrayFilters": [
      {
        "a0._id": "5afe5763419503c46544e272"
      },
      {
        "c0._id": "5afe5763419503c46544e277"
      }
    ]
  }
},
{
  "updateOne": {
    "filter": {
      "_id": {
        "$oid": "5afe5763419503c46544e274"
      }
    },
    "update": {
      "$set": {
        "children.$[c0]._id": {
          "$oid": "5afe5763419503c46544e277"
        }
      }
    },
    "arrayFilters": [
      {
        "c0._id": "5afe5763419503c46544e277"
      }
    ]
  }
},
{
  "updateOne": {
    "filter": {
      "_id": {
        "$oid": "5afe5763419503c46544e275"
      }
    },
    "update": {
      "$set": {
        "children.$[c0]._id": {
          "$oid": "5afe5763419503c46544e276"
        }
      }
    },
    "arrayFilters": [
      {
        "c0._id": "5afe5763419503c46544e276"
      }
    ]
  }
},
{
  "updateOne": {
    "filter": {
      "_id": {
        "$oid": "5afe5763419503c46544e276"
      }
    },
    "update": {
      "$set": {
        "ancestors.$[a0]._id": {
          "$oid": "5afe5763419503c46544e275"
        },
        "children.$[c0]._id": {
          "$oid": "5afe5763419503c46544e277"
        }
      }
    },
    "arrayFilters": [
      {
        "a0._id": "5afe5763419503c46544e275"
      },
      {
        "c0._id": "5afe5763419503c46544e277"
      }
    ]
  }
},
{
  "updateOne": {
    "filter": {
      "_id": {
        "$oid": "5afe5763419503c46544e277"
      }
    },
    "update": {
      "$set": {
        "ancestors.$[a0]._id": {
          "$oid": "5afe5763419503c46544e273"
        },
        "ancestors.$[a1]._id": {
          "$oid": "5afe5763419503c46544e274"
        },
        "ancestors.$[a2]._id": {
          "$oid": "5afe5763419503c46544e276"
        }
      }
    },
    "arrayFilters": [
      {
        "a0._id": "5afe5763419503c46544e273"
      },
      {
        "a1._id": "5afe5763419503c46544e274"
      },
      {
        "a2._id": "5afe5763419503c46544e276"
      }
    ]
  }
},
{
  "updateOne": {
    "filter": {
      "_id": {
        "$oid": "5afe5764419503c46544e278"
      }
    },
    "update": {
      "$set": {
        "children.$[c0]._id": {
          "$oid": "5afe5763419503c46544e272"
        }
      }
    },
    "arrayFilters": [
      {
        "c0._id": "5afe5763419503c46544e272"
      }
    ]
  }
}
Run Code Online (Sandbox Code Playgroud)