I. *_*lov 4 mongodb mongodb-query
我有一个带有number字段的文档。一个进程添加那些number值不在集合中的文档,但首先,它检查是否number存在具有该值的文档。
考虑number从 0 到 234、number从 653 到 667 和number从 10543 到 22000的文档集合。number从 235 到 652 和 668 到 10542 之间存在差距,其文档需要导入。
是否可以构建一个查询来返回集合中存在的连续值的范围?(即 0 到 234 和 653 到 667 和 10543 到 22000)
有了这些信息,我会立即知道在 235 到 652 和 668 到 10542 之间填写丢失的文件,并在 22001 继续......
如果您可以接受取回所有丢失的单个 ID 而不是范围,那么这就是您的查询:
collection.aggregate({
$group: {
"_id": null, // group all documents into the same bucket
"numbers":
{
$push: "$number" // create an array of all "number" fields
}
}
}, {
$project: {
"_id": 0, // get rid of the "_id" field - not really needed
"numbers": {
$setDifference: [ { // compute the difference between...
$range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
}, "$numbers" ] // ...and the available values for "number"
}
}
})
Run Code Online (Sandbox Code Playgroud)
有多种方法可以计算出这些信息的范围,但我觉得在您的情况下甚至可能不需要。
更新(根据您的评论):这是一个更长的版本,它增加了一些额外的阶段,以从离散数字到范围 - 代码不是很漂亮,可能不是超快,但它至少应该工作......
collection.aggregate({
$sort: {
"number": 1 // we need to sort in order to find ranges later
}
},
{
$group: {
"_id": null, // group all documents into the same bucket
"numbers":
{
$push: "$number" // create an array of all "number" fields
}
}
}, {
$project: {
"_id": 0, // get rid of the "_id" field - not really needed
"numbers": {
$setDifference: [ { // compute the difference between...
$range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
}, "$numbers" ] // ...and the available values for "number"
}
}
},
{
$project: {
"numbers": "$numbers", // ...we create two identical arrays
"numbers2": "$numbers" // ...by duplicating our missing numbers array
}
},
{
$unwind: "$numbers" // this will flatten one of the two created number arrays
},
{
$project: {
"number": "$numbers",
"precedingNumber": {
$arrayElemAt: [
"$numbers2", // use the second (remaining) numbers array to find the previous number...
{ $max: [0, { $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, -1 ] } ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now - 1
]
},
"followingNumber": {
$arrayElemAt: [
"$numbers2", // use the second (remaining) numbers array to find the next number...
{ $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, 1 ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now + 1
]
}
}
}, {
$project: {
"number": 1, // include number
"precedingInRange": { $cond: [ { $eq: [ { $add: [ "$number", -1 ] }, "$precedingNumber" ] }, true, false ] },
"followingInRange": { $cond: [ { $eq: [ { $add: [ "$number", 1 ] }, "$followingNumber" ] }, true, false ] }
}
}, {
$match: {
$or: [ // filter out all items that are inside a range (or rather: include only the outer items of each range)
{ "precedingInRange": false },
{ "followingInRange": false }
]
}
}, {
$project: { // some beautification of the ouput to help deal with the data in your application
"singleNumber": { $cond: [ { $not: { $or: [ "$precedingInRange", "$followingInRange" ] } }, "$number", null ] },
"startOfRange": { $cond: [ "$followingInRange", "$number", null ] },
"endOfRange": { $cond: [ "$precedingInRange", "$number", null ] }
}
})
Run Code Online (Sandbox Code Playgroud)
更新 2:
我有一种感觉,我找到了一种更好的方法来很好地获得范围,而无需涉及太多魔法:
collection.aggregate({
$sort: {
"number": 1 // we need to sort by numbers in order to be able to do the range magic later
}
}, {
$group: {
"_id": null, // group all documents into the same bucket
"numbers":
{
$push: "$number" // create an array of all "number" fields
}
}
}, {
$project: {
"numbers": {
$reduce: {
input: "$numbers",
initialValue: [],
in: {
"start": {
$concatArrays: [
"$$value.start",
{
$cond: { // if preceding element in array of numbers is not "current element - 1" then add it, otherwise skip
if: { $ne: [ { $add: [ "$$this", -1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, -1 ] } ] } ] },
then: [ "$$this" ],
else: []
}
}
]
},
"end": {
$concatArrays: [
"$$value.end",
{
$cond: { // if following element in array of numbers is not "current element + 1" then add it, otherwise skip
if: { $ne: [ { $add: [ "$$this", 1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, 1 ] } ] } ] },
then: [ "$$this" ],
else: []
}
}
]
}
}
}
}
}
}, {
$project: {
"ranges": {
$zip: {
inputs: [ "$numbers.start", "$numbers.end" ],
}
}
}
})
Run Code Online (Sandbox Code Playgroud)