奇怪的地图减少了CouchDB中的行为.Rereduce?

Ton*_*ony 4 couchdb mapreduce

我有一个mapdbuce问题与couchdb(两个函数如下所示):当我用grouplevel = 2(精确)运行它时,我得到准确的输出:

{"rows":[
 {"key":["2011-01-11","staff-1"],"value":{"total":895.72,"count":2,"services":6,"services_ignored":6,"services_liked":0,"services_disliked":0,"services_disliked_avg":0,"Revise":{"total":275.72,"count":1},"Review":{"total":620,"count":1}}},
 {"key":["2011-01-11","staff-2"],"value":{"total":8461.689999999999,"count":2,"services":41,"services_ignored":37,"services_liked":4,"services_disliked":0,"services_disliked_avg":0,"Revise":{"total":4432.4,"count":1},"Review":{"total":4029.29,"count":1}}},
 {"key":["2011-01-11","staff-3"],"value":{"total":2100.72,"count":1,"services":10,"services_ignored":4,"services_liked":3,"services_disliked":3,"services_disliked_avg":2.3333333333333335,"Revise":{"total":2100.72,"count":1}}},
Run Code Online (Sandbox Code Playgroud)

但是,更改为grouplevel = 1,因此所有不同人员密钥的值应按日期分组,不再提供准确的输出(请注意总数是当前的,但所有其他人都是错误的):

{"rows":[
  {"key":["2011-01-11"],"value":{"total":11458.130000000001,"count":2,"services":0,"services_ignored":0,"services_liked":0,"services_disliked":0,"services_disliked_avg":0,"None":{"total":11458.130000000001,"count":2}}},
Run Code Online (Sandbox Code Playgroud)

我唯一的理论是这与rereduce有关,我还没有学到.我应该探索那个选项,还是我错过了其他的东西?

这是Map函数:

function(doc) {
if(doc.doc_type == 'Feedback') {
    emit([doc.date.split('T')[0], doc.staff_id], doc);
}
}
Run Code Online (Sandbox Code Playgroud)

这就是Reduce:

function(keys, vals) {
// sum all key points by status: total, count, services (liked, rejected, ignored)
var ret = {
    'total':0,
    'count':0, 
    'services': 0,
    'services_ignored': 0,
    'services_liked': 0,
    'services_disliked': 0,
    'services_disliked_avg': 0,
};

var total_disliked_score = 0;

// handle status
function handle_status(doc) {
    if(!doc.status || doc.status == '' || doc.status == undefined) {
        status = 'None';
    } else if (doc.status == 'Declined') {
        status = 'Rejected';
    } else {
        status = doc.status;
    }
    if(!ret[status]) ret[status] = {'total':0, 'count':0};
    ret[status]['total'] += doc.total;  
    ret[status]['count'] += 1;
};

// handle likes / dislikes
function handle_services(services) {
    ret.services += services.length;
    for(var a in services) {
        if (services[a].user_likes == 10) {
            ret.services_liked += 1;
        } else if (services[a].user_likes >= 1) {
            ret.services_disliked += 1;
            total_disliked_score += services[a].user_likes;
            if (total_disliked_score >= ret.services_disliked) {
                ret.services_disliked_avg = total_disliked_score / ret.services_disliked;
            }
        } else {
            ret.services_ignored += 1;
        }
    }
}

// loop thru docs 
for(var i in vals) {
    // increment the total $
    ret.total += vals[i].total;
    ret.count += 1;

    // update totals and sums for the status of this route
    handle_status(vals[i]);

    // do the likes / dislikes stats
    if(vals[i].groups) {
        for(var ii in vals[i].groups) {
            if(vals[i].groups[ii].services) {
                handle_services(vals[i].groups[ii].services); 
            }
        }
    }

    // handle deleted services
    if(vals[i].hidden_services) {
        if (vals[i].hidden_services) {
            handle_services(vals[i].hidden_services);
        }
    }
}

return ret;
}
Run Code Online (Sandbox Code Playgroud)

Vic*_*let 8

这是一个经典的错误.请记住,CouchDB减少发生在几个步骤中,其中一些步骤将接收其他减少步骤的结果作为输入.但是,您的代码似乎假设vals[i]将是{ "groups": _ , "hidden_services": _ , _ }表示单个文档的表单的对象.当重新减少发生时,此代码将失败,因为那时vals[i]将是{ "count" : _ , "services" : _ , _ }表示先前减少步骤的结果的形式.

因此,例如,通过计算ret.count += 1,您计算中间减少结果的数量而不是文档的数量.

一种解决方案是编写两个版本的reduce代码,一个用于处理原始reduce,另一个用于处理rereduce步骤.您可以通过查看第三个参数来确定给定调用是初始调用还是重新调用调用(如果是初始值则为false,如果是rereduce则为true).

另一种解决方案是使map函数发出{ "count" : _ , "services" : _ , _ }reduce函数返回的相同形式的预处理值,并使reduce函数仅将这些值的成员加在一起.