如何通过$ lookup在'加入'集合上执行$ text搜索?

Cor*_*son 5 mongoose mongodb node.js mongodb-query aggregation-framework

我是Mongo的新手,使用的是v3.2.我有2个收藏品Parent&Child.我想使用Parent.aggregate并使用$ lookup来"加入"Child然后对Child中的字段执行$ text $ search,并在父级上执行日期范围搜索.这可能吗...?

Nei*_*unn 6

根据已经给出的注释,确实无法对$texta 的结果执行搜索$lookup,因为除了第一个管道阶段之外的任何阶段都没有可用的索引。确实,特别是考虑到您确实希望根据“子”集合的结果进行“连接”,那么确实更好地搜索“子”。

这带来了明显的结论,为了做到这一点,您需要使用初始$text查询对“子”集合执行聚合,然后对$lookup“父”集合执行聚合,而不是相反。

作为一个工作示例,仅使用核心驱动程序进行演示:

MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {
  if (err) throw err;

  var Parent = db.collection('parents');
  var Child = db.collection('children');

  async.series(
    [
      // Cleanup
      function(callback) {
        async.each([Parent,Child],function(coll,callback) {
          coll.deleteMany({},callback);
        },callback);
      },
      // Create Index
      function(callback) {
        Child.createIndex({ "text": "text" },callback);
      },
      // Create Documents
      function(callback) {
        async.parallel(
          [
            function(callback) {
              Parent.insertMany(
                [
                  { "_id": 1, "name": "Parent 1" },
                  { "_id": 2, "name": "Parent 2" },
                  { "_id": 3, "name": "Parent 3" }
                ],
                callback
              );
            },
            function(callback) {
              Child.insertMany(
                [
                  {
                    "_id": 1,
                    "parent": 1,
                    "text": "The little dog laughed to see such fun"
                  },
                  {
                    "_id": 2,
                    "parent": 1,
                    "text": "The quick brown fox jumped over the lazy dog"
                  },
                  {
                    "_id": 3,
                    "parent": 1,
                    "text": "The dish ran away with the spoon"
                  },
                  {
                    "_id": 4,
                    "parent": 2,
                    "text": "Miss muffet on here tuffet"
                  },
                  {
                    "_id": 5,
                    "parent": 3,
                    "text": "Lady is a fox"
                  },
                  {
                    "_id": 6,
                    "parent": 3,
                    "text": "Every dog has it's day"
                  }
                ],
                callback
              )
            }
          ],
          callback
        );
      },
      // Aggregate with $text and $lookup
      function(callback) {
        Child.aggregate(
          [
            { "$match": {
              "$text": { "$search": "fox dog" }
            }},
            { "$project": {
              "parent": 1,
              "text": 1,
              "score": { "$meta": "textScore" }
            }},
            { "$sort": { "score": { "$meta": "textScore" } } },
            { "$lookup": {
              "from": "parents",
              "localField": "parent",
              "foreignField": "_id",
              "as": "parent"
            }},
            { "$unwind": "$parent" },
            { "$group": {
              "_id": "$parent._id",
              "name": { "$first": "$parent.name" },
              "children": {
                "$push": {
                  "_id": "$_id",
                  "text": "$text",
                  "score": "$score"
                }
              },
              "score": { "$sum": "$score" }
            }},
            { "$sort": { "score": -1 } }
          ],
          function(err,result) {
            console.log(JSON.stringify(result,undefined,2));
            callback(err);
          }
        )
      }
    ],
    function(err) {
      if (err) throw err;
      db.close();
    }
  );

});
Run Code Online (Sandbox Code Playgroud)

这会导致每个 中填充的$text查询的匹配项,以及按 排序:ChildParent"score"

[
  {
    "_id": 1,
    "name": "Parent 1",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]
Run Code Online (Sandbox Code Playgroud)

这最终是有意义的,并且比从“父级”查询以查找 a 中的所有“子级” $lookup,然后“后过滤”以$match删除任何不符合条件的“子级”,然后随后丢弃 a 中的所有“子级”要高效得多。 “父母”没有任何匹配。

同样的情况也适用于猫鼬风格的“引用”,其中您在“父级”中包含“子级”的“数组”,而不是在子级上进行记录。因此,只要"localField"子级上的(_id在这种情况下)与父级上的数组中定义的类型相同"foriegnField"(如果它正在使用的.populate()话),那么您仍然会得到匹配的“父级” ” 对于$lookup结果中的每个“子项”。

这一切都归结为扭转你的思维并认识到$text结果是最重要的,因此“那个”就是需要启动操作的集合。

这是可能的,但只要反过来做就可以了。


使用猫鼬风格和父级中引用的子级列表

仅显示父级引用的相反情况以及日期过滤:

var async = require('async'),
    mongoose = require('mongoose'),
    Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost/rlookup');

var parentSchema = new Schema({
  "_id": Number,
  "name": String,
  "date": Date,
  "children": [{ "type": Number, "ref": "Child" }]
});

var childSchema = new Schema({
  "_id": Number,
  "text": { "type": String, "index": "text" }
},{ "autoIndex": false });

var Parent = mongoose.model("Parent",parentSchema),
    Child = mongoose.model("Child",childSchema);

async.series(
  [
    function(callback) {
      async.each([Parent,Child],function(model,callback) {
        model.remove({},callback);
      },callback);
    },
    function(callback) {
      Child.ensureIndexes({ "background": false },callback);
    },
    function(callback) {
      async.parallel(
        [
          function(callback) {
            Parent.create([
              {
                "_id": 1,
                "name": "Parent 1",
                "date": new Date("2016-02-01"),
                "children": [1,2]
              },
              {
                "_id": 2,
                "name": "Parent 2",
                "date": new Date("2016-02-02"),
                "children": [3,4]
              },
              {
                "_id": 3,
                "name": "Parent 3",
                "date": new Date("2016-02-03"),
                "children": [5,6]
              },
              {
                "_id": 4,
                "name": "Parent 4",
                "date": new Date("2016-01-15"),
                "children": [1,2,6]
              }
            ],callback)
          },
          function(callback) {
            Child.create([
              {
                "_id": 1,
                "text": "The little dog laughed to see such fun"
              },
              {
                "_id": 2,
                "text": "The quick brown fox jumped over the lazy dog"
              },
              {
                "_id": 3,
                "text": "The dish ran awy with the spoon"
              },
              {
                "_id": 4,
                "text": "Miss muffet on her tuffet"
              },
              {
                "_id": 5,
                "text": "Lady is a fox"
              },
              {
                "_id": 6,
                "text": "Every dog has it's day"
              }
            ],callback);
          }
        ],
        callback
      );
    },
    function(callback) {
      Child.aggregate(
        [
          { "$match": {
            "$text": { "$search": "fox dog" }
          }},
          { "$project": {
            "text": 1,
            "score": { "$meta": "textScore" }
          }},
          { "$sort": { "score": { "$meta": "textScore" } } },
          { "$lookup": {
            "from": "parents",
            "localField": "_id",
            "foreignField": "children",
            "as": "parent"
          }},
          { "$project": {
            "text": 1,
            "score": 1,
            "parent": {
              "$filter": {
                "input": "$parent",
                "as": "parent",
                "cond": {
                  "$and": [
                    { "$gte": [ "$$parent.date", new Date("2016-02-01") ] },
                    { "$lt": [ "$$parent.date", new Date("2016-03-01") ] }
                  ]
                }
              }
            }
          }},
          { "$unwind": "$parent" },
          { "$group": {
            "_id": "$parent._id",
            "name": { "$first": "$parent.name" },
            "date": { "$first": "$parent.date" },
            "children": {
              "$push": {
                "_id": "$_id",
                "text": "$text",
                "score": "$score"
              }
            },
            "score": { "$sum": "$score" }
          }},
          { "$sort": { "score": -1 } }
        ],
        function(err,result) {
          console.log(JSON.stringify(result,undefined,2));
          callback(err);
        }
      )
    }
  ],
  function(err) {
    if (err) throw err;
    mongoose.disconnect();
  }
);
Run Code Online (Sandbox Code Playgroud)

随着输出:

[
  {
    "_id": 1,
    "name": "Parent 1",
    "date": "2016-02-01T00:00:00.000Z",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "date": "2016-02-03T00:00:00.000Z",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]
Run Code Online (Sandbox Code Playgroud)

请注意,"Parent 4"否则具有最大排名的 会被删除,因为日期不属于 所应用的查询范围内$filter