查询连续值的范围 mongo

I. *_*lov 4 mongodb mongodb-query

我有一个带有number字段的文档。一个进程添加那些number值不在集合中的文档,但首先,它检查是否number存在具有该值的文档。

考虑number从 0 到 234、number从 653 到 667 和number从 10543 到 22000的文档集合。number从 235 到 652 和 668 到 10542 之间存在差距,其文档需要导入。

是否可以构建一个查询来返回集合中存在的连续值的范围?(即 0 到 234 和 653 到 667 和 10543 到 22000)

有了这些信息,我会立即知道在 235 到 652 和 668 到 10542 之间填写丢失的文件,并在 22001 继续......

dni*_*ess 5

如果您可以接受取回所有丢失的单个 ID 而不是范围,那么这就是您的查询:

collection.aggregate({
    $group: {
        "_id": null, // group all documents into the same bucket
        "numbers":
        {
            $push: "$number" // create an array of all "number" fields
        }
    }
}, {
    $project: {
        "_id": 0, // get rid of the "_id" field - not really needed
        "numbers": {
            $setDifference: [ { // compute the difference between...
                $range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
            }, "$numbers" ] // ...and the available values for "number"
        }
    }
})
Run Code Online (Sandbox Code Playgroud)

有多种方法可以计算出这些信息的范围,但我觉得在您的情况下甚至可能不需要。

更新(根据您的评论):这是一个更长的版本,它增加了一些额外的阶段,以从离散数字到范围 - 代码不是很漂亮,可能不是超快,但它至少应该工作......

collection.aggregate({
    $sort: {
        "number": 1 // we need to sort in order to find ranges later
    }
},
{
    $group: {
        "_id": null, // group all documents into the same bucket
        "numbers":
        {
            $push: "$number" // create an array of all "number" fields
        }
    }
}, {
    $project: {
        "_id": 0, // get rid of the "_id" field - not really needed
        "numbers": {
            $setDifference: [ { // compute the difference between...
                $range: [ 0, 10 ] // ... all numbers from 0 to 10 - adjust this to your needs...
            }, "$numbers" ] // ...and the available values for "number"
        }
    }
},
{
    $project: {
        "numbers": "$numbers", // ...we create two identical arrays
        "numbers2": "$numbers" // ...by duplicating our missing numbers array
    }
},
{
    $unwind: "$numbers" // this will flatten one of the two created number arrays
},
{
    $project: {
        "number": "$numbers",
        "precedingNumber": {
            $arrayElemAt: [
                "$numbers2", // use the second (remaining) numbers array to find the previous number...
                { $max: [0, { $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, -1 ] } ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now - 1
            ]
        },
        "followingNumber": {
            $arrayElemAt: [
                "$numbers2", // use the second (remaining) numbers array to find the next number...
                { $add: [ { $indexOfArray: [ "$numbers2", "$numbers" ] }, 1 ] } // ...which needs to sit in that sorted array at the position of the element we're looking at right now + 1
            ]
        }
    }
}, {
    $project: {
        "number": 1, // include number 
        "precedingInRange": { $cond: [ { $eq: [ { $add: [ "$number", -1 ] }, "$precedingNumber" ] }, true, false ] },
        "followingInRange": { $cond: [ { $eq: [ { $add: [ "$number", 1 ] }, "$followingNumber" ] }, true, false ] }
    }
}, {
    $match: {
        $or: [ // filter out all items that are inside a range (or rather: include only the outer items of each range)
            { "precedingInRange": false },
            { "followingInRange": false }
        ]
    }
}, {
    $project: { // some beautification of the ouput to help deal with the data in your application
        "singleNumber": { $cond: [ { $not: { $or: [ "$precedingInRange", "$followingInRange" ] } }, "$number", null ] },
        "startOfRange": { $cond: [ "$followingInRange", "$number", null ] },
        "endOfRange": { $cond: [ "$precedingInRange", "$number", null ] }
    }
})
Run Code Online (Sandbox Code Playgroud)

更新 2:

我有一种感觉,我找到了一种更好的方法来很好地获得范围,而无需涉及太多魔法:

collection.aggregate({
    $sort: {
        "number": 1 // we need to sort by numbers in order to be able to do the range magic later
    }
}, {
    $group: {
        "_id": null, // group all documents into the same bucket
        "numbers":
        {
            $push: "$number" // create an array of all "number" fields
        }
    }
}, {
    $project: {
        "numbers": {
            $reduce: {
                input: "$numbers",
                initialValue: [],
                in: {
                    "start": { 
                        $concatArrays: [
                            "$$value.start",
                            {
                                $cond: { // if preceding element in array of numbers is not "current element - 1" then add it, otherwise skip
                                    if: { $ne: [ { $add: [ "$$this", -1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, -1 ] } ] } ] },
                                    then: [ "$$this" ],
                                    else: []
                                }
                            }
                        ]
                    },
                    "end": { 
                        $concatArrays: [
                            "$$value.end",
                            {
                                $cond: { // if following element in array of numbers is not "current element + 1" then add it, otherwise skip
                                    if: { $ne: [ { $add: [ "$$this", 1 ] }, { $arrayElemAt: [ "$numbers", { $add: [ { $indexOfArray: [ "$numbers", "$$this" ] }, 1 ] } ] } ] },
                                    then: [ "$$this" ],
                                    else: []
                                }
                            }
                        ]
                    }
                }
            }
        }
    }
}, {
    $project: {
        "ranges": {
            $zip: {
                inputs: [ "$numbers.start", "$numbers.end" ],
            }
        }
    }
})
Run Code Online (Sandbox Code Playgroud)