Tom*_*son 2 tree mapreduce hierarchical-data mongodb
我有一组实体,代表一棵树.每个实体都有一个包含属性数组的属性.
例如:
{
"_id" : 1,
"parent_id" : null,
"attributes" : [ "A", "B", "C" ]
}
Run Code Online (Sandbox Code Playgroud)
我想使用MapReduce生成另一个类似于原始集合的集合,但是对于集合中的每个项目,它不仅包含与实体直接关联的属性,还包含其祖先的属性,一直到层次结构的根源.
所以给出以下实体:
{
"_id" : 1,
"parent_id" : null,
"attributes" : [ "A", "B", "C" ]
}
{
"_id" : 2,
"parent_id" : 1,
"attributes" : [ "D", "E", "F" ]
}
{
"_id" : 3,
"parent_id" : 2,
"attributes" : [ "G", "H", "I" ]
}
Run Code Online (Sandbox Code Playgroud)
MapReduce作业的结果如下:
{
"_id" : 1,
"attributes" : [ "A", "B", "C" ]
}
{
"_id" : 2,
"attributes" : [ "A", "B", "C", "D", "E", "F" ]
}
{
"_id" : 3,
"attributes" : [ "A", "B", "C", "D", "E", "F", "G", "H", "I" ]
}
Run Code Online (Sandbox Code Playgroud)
我已经设法生成了MapReduce作业,这些作业执行简单的操作,例如计算每个实体的属性,但无法理解我如何处理层次结构.我愿意接受存储数据的替代方法,但不希望将整个层次结构存储在单个文档中.
在MongoDB中使用MapReduce是否可以这么简单?或者我只是以错误的方式思考问题?
好吧,所以我不认为这将是非常高效/可扩展的,因为你必须从子节点递归地找到父ID.但是,它确实提供了您想要的输出.
var mapFunc = function(doc, id) {
// if this is being invoked by mapReduce, it won't pass any parameters
if(doc == null) {
doc = this;
id = this._id;
} else if (doc.parent_id != null) {
// if this is a recursive call, find the parent
doc = db.test.findOne({_id:doc.parent_id});
}
// emit the id, which is always the id of the child node (starting point), and the attributes
emit(id, {attributes: doc.attributes});
// if parent_id is not null, call mapFunc with the hidden parameters
if(doc.parent_id != null) {
// recursive mapFunc call
mapFunc(doc, id);
}
}
// since we're going to call this from within mapReduce recursively, we have to save it in the system JS
db.system.js.save({ "_id" : "mapFunc", "value" : mapFunc});
var reduceFunc = function(key, values) {
var result = {attributes:[]};
values.forEach(function(value) {
// concat the result to the new values (I don't think order is guaranteed here)
result.attributes = value.attributes.concat(result.attributes);
});
return result;
}
// this just moves the attributes up a level
var finalize = function(key, value) {return value.attributes};
// quick test...
db.test.mapReduce(mapFunc, reduceFunc, {out: {inline: 1}, finalize: finalize});
Run Code Online (Sandbox Code Playgroud)
规定:
"results" : [
{
"_id" : 1,
"value" : [
"A",
"B",
"C"
]
},
{
"_id" : 2,
"value" : [
"A",
"B",
"C",
"D",
"E",
"F"
]
},
{
"_id" : 3,
"value" : [
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"I"
]
}
],
"timeMillis" : 2,
"counts" : {
"input" : 3,
"emit" : 6,
"reduce" : 2,
"output" : 3
},
"ok" : 1,
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
1228 次 |
| 最近记录: |