MapReduce在MongoDB中的"父链接"树上

Tom*_*son 2 tree mapreduce hierarchical-data mongodb

我有一组实体,代表一棵树.每个实体都有一个包含属性数组的属性.

例如:

{
    "_id" : 1,
    "parent_id" : null,
    "attributes" : [ "A", "B", "C" ]
}
Run Code Online (Sandbox Code Playgroud)

我想使用MapReduce生成另一个类似于原始集合的集合,但是对于集合中的每个项目,它不仅包含与实体直接关联的属性,还包含其祖先的属性,一直到层次结构的根源.

所以给出以下实体:

{
    "_id" : 1,
    "parent_id" : null,
    "attributes" : [ "A", "B", "C" ]
}

{
    "_id" : 2,
    "parent_id" : 1,
    "attributes" : [ "D", "E", "F" ]
}

{
    "_id" : 3,
    "parent_id" : 2,
    "attributes" : [ "G", "H", "I" ]
}
Run Code Online (Sandbox Code Playgroud)

MapReduce作业的结果如下:

{
    "_id" : 1,
    "attributes" : [ "A", "B", "C" ]
}

{
    "_id" : 2,
    "attributes" : [ "A", "B", "C", "D", "E", "F" ]
}

{
    "_id" : 3,
    "attributes" : [ "A", "B", "C", "D", "E", "F", "G", "H", "I" ]
}
Run Code Online (Sandbox Code Playgroud)

我已经设法生成了MapReduce作业,这些作业执行简单的操作,例如计算每个实体的属性,但无法理解我如何处理层次结构.我愿意接受存储数据的替代方法,但不希望将整个层次结构存储在单个文档中.

在MongoDB中使用MapReduce是否可以这么简单?或者我只是以错误的方式思考问题?

Eve*_*man 5

好吧,所以我不认为这将是非常高效/可扩展的,因为你必须从子节点递归地找到父ID.但是,它确实提供了您想要的输出.

var mapFunc = function(doc, id) {
  // if this is being invoked by mapReduce, it won't pass any parameters 
  if(doc == null) {
    doc = this; 
    id = this._id; 
  } else if (doc.parent_id != null) {
    // if this is a recursive call, find the parent
    doc = db.test.findOne({_id:doc.parent_id});
  }
  // emit the id, which is always the id of the child node (starting point), and the attributes
  emit(id, {attributes: doc.attributes}); 
  // if parent_id is not null, call mapFunc with the hidden parameters
  if(doc.parent_id != null) {
    // recursive mapFunc call
    mapFunc(doc, id); 
  } 
}
// since we're going to call this from within mapReduce recursively, we have to save it in the system JS
db.system.js.save({ "_id" : "mapFunc", "value" : mapFunc});

var reduceFunc = function(key, values) {
  var result = {attributes:[]}; 
  values.forEach(function(value) {
    // concat the result to the new values (I don't think order is guaranteed here)
    result.attributes = value.attributes.concat(result.attributes);
  }); 
  return result; 
}

// this just moves the attributes up a level
var finalize = function(key, value) {return value.attributes};

// quick test...
db.test.mapReduce(mapFunc, reduceFunc, {out: {inline: 1}, finalize: finalize});
Run Code Online (Sandbox Code Playgroud)

规定:

"results" : [
    {
        "_id" : 1,
        "value" : [
            "A",
            "B",
            "C"
        ]
    },
    {
        "_id" : 2,
        "value" : [
            "A",
            "B",
            "C",
            "D",
            "E",
            "F"
        ]
    },
    {
        "_id" : 3,
        "value" : [
            "A",
            "B",
            "C",
            "D",
            "E",
            "F",
            "G",
            "H",
            "I"
        ]
    }
],
"timeMillis" : 2,
"counts" : {
    "input" : 3,
    "emit" : 6,
    "reduce" : 2,
    "output" : 3
},
"ok" : 1,
}
Run Code Online (Sandbox Code Playgroud)