Bru*_*oLM 6 javascript performance grouping
我试图GroupBy用这些参数实现一个方法
function GroupBy(keySelector, elementSelector, comparer)
{
// keySelector = function(e) { return e.ID }
// elementSelector = function(e) { return e.Name }
// comparer = { Equals: function(a,b) { return a==b }, GetHashCode:... }
}
Run Code Online (Sandbox Code Playgroud)
但是我不知道实现它的有效方法.
我用linq.js和我创建的方法创建了一个jsPerf测试,该方法不使用比较器,只适用于平面类型.(这里输出测试)
其他库(如下划线和Lo-Dash)不带comparer参数.所以他们的实现是无关紧要的.
我的密钥可能是一个类,所以我需要一些东西来确定TKey在不同的实例中是否相同.
所以基本上我要做的是复制C#Linq GroupBy行为,这里记录.
样本输入:
var arrComplex =
[
{ N: { Value: 10 }, Name: "Foo" },
{ N: { Value: 10 }, Name: "Bar" },
{ N: { Value: 20 }, Name: "Foo" },
{ N: { Value: 20 }, Name: "Bar" }
];
Run Code Online (Sandbox Code Playgroud)
样本输出(或类似的东西):
[
{
"Key": {"Value":10},
"Elements":["Foo","Bar"]
},
{
"Key": {"Value":20},
"Elements":["Foo","Bar"]
}
]
Run Code Online (Sandbox Code Playgroud)
有关如何实施它的任何想法?
为了赏金,我想你考虑一下:
好吧,我期待一个完整的答案.
我使用你的jsperf作为参考,用于一些更好的脚本点.我真的非常喜欢你的'哈希'代码,所以我完全偷了它.根据'browserscope'图表,Mine使用不同的方法生成用于制作哈希的字符串,这似乎更快一点,从而提高了性能.我在我的测试中包含了一个"过多的递归"概念证明,以证明它具有递归保护,如JSON.stringify和.toSource().
我的jsfiddle显示代码返回您需要的格式.我的jsperf似乎表明它优于发布的解决方案.我还包括linq.js解决方案,但它对我来说在FireFox中表现非常糟糕.它可以在Safari,Chrome和IE中运行,但不会比我快,除了在IE中.我甚至在手机上试过它,但我仍然有相同的性能差异.我已经在所有浏览器的最新版本中亲自测试了它与发布的解决方案并排,并且每个浏览器的执行时间约为40%.每个人的想法是什么?
这是我的代码:
var arr = [
{ N: 10, Name: "Foo" },
{ N: 10, Name: "Bar" },
{ N: 20, Name: "Foo" },
{ N: 20, Name: "Bar" }
];
var poc = { name:'blah', obj:{} };
poc.obj = poc;
var arrComplex = [
{ N: { Value: 10, TooMuchRecursionProofPOC:poc }, Name: "Foo" },
{ N: { Value: 10, TooMuchRecursionProofPOC:poc }, Name: "Bar" },
{ N: { Value: 20, TooMuchRecursionProofPOC:poc }, Name: "Foo" },
{ N: { Value: 20, TooMuchRecursionProofPOC:poc }, Name: "Bar" }
];
var eArr = Enumerable.From(arr);
var eArrComplex = Enumerable.From(arrComplex);
function setup_hashers() {
// recursion protection idea
var rp = '_rp'+(Math.random()*10000000);
function tstr() {
var out = '', i = '';
if (this[rp]) { this[rp] = undefined; return out; }
for (i in this)
if (i != rp && this.hasOwnProperty(i))
out += this[i] instanceof Object
? ((this[rp] = true) && this[i] != this && !this[i][rp] ? tstr.call(this[i]) : '')
: (this[i].toString || tstr).call(this[i]);
return out;
};
Number.prototype.GetHashCode = function() {
return this.valueOf();
};
Object.prototype.GetHashCode = function() {
var s = (this instanceof Object ? tstr : this.toString || tstr).call(this),
h = 0;
if (s.length)
for (var i = 0; i < s.length; i++)
h = ((h << 5) - h) + s.charCodeAt(i);
return h;
};
}
function group_by(a, keyFunc, valFunc, comp, as_array) {
if (!a.length) return as_array ? [] : {};
var keyFunc = keyFunc || function (e) { return e; },
valFunc = valFunc || function (e) { return e; };
var comp = comp || {
Equals: function (a, b) { return a == b; },
Hash: function (e) { return e.GetHashCode(); }
};
var hashs = {}, key = '', hash = '';
for (var i = 0; i < a.length; i++) {
key = keyFunc(a[i]);
hash = comp.Hash(key);
if (typeof hashs[hash] != 'undefined')
hash = comp.Equals(key, hashs[hash].Key)
? hash
: hash + '-' + i;
hashs[hash] = hashs[hash] || { Key: key, Elements: [] };
hashs[hash].Elements.push(valFunc(a[i]));
}
if (as_array) {
var out = [], j = '', keys = Object.keys(hashs);
for (var j = 0; j < keys.length; j++)
out.push(hashs[keys[j]]);
return out;
}
return hashs;
};
function group_by_control(a, keyFunc, valFunc) {
if (!a.length) return as_array ? [] : {};
var keyFunc = keyFunc || function (e) { return e; },
valFunc = valFunc || function (e) { return e; };
var hashs = {}, key = '', hash = '';
for (var i = 0; i < a.length; i++) {
key = keyFunc(a[i]);
hashs[key] = hashs[key] || { Key: key, Elements: [] };
hashs[key].Elements.push(valFunc(a[i]));
}
var out = [], j = '', keys = Object.keys(hashs);
for (var j = 0; j < keys.length; j++)
out.push(hashs[keys[j]]);
return out;
};
setup_hashers();
console.log(group_by_control(
arr,
function(e) { return e.N },
function(e) { return e.Name }
));
console.log(group_by(
arrComplex, function(e) { return e.N; },
function(e) { return e.Name; },
{
Equals: function(a, b) { return a.Value == b.Value },
Hash: function(e) { return e.GetHashCode(); }
}
));
console.log(group_by(
arrComplex, function(e) { return e.N; },
function(e) { return e.Name; },
{
Equals: function(a, b) { return a.Value == b.Value },
Hash: function(e) { return e.GetHashCode(); }
},
true
));
Run Code Online (Sandbox Code Playgroud)
我设法以这种方式实现:
我需要从对象中获取哈希码。
Object.prototype.GetHashCode = function () {
var s = this instanceof Object ? stringify(this) : this.toString();
var hash = 0;
if (s.length === 0) return hash;
for (var i = 0; i < s.length; ++i) {
hash = ((hash << 5) - hash) + s.charCodeAt(i);
}
return hash;
};
Number.prototype.GetHashCode = function () { return this.valueOf(); };
Run Code Online (Sandbox Code Playgroud)
由于JSON.stringify循环引用会失败,我创建了另一种方法来对其进行字符串化,这样我可以将对象的大部分作为字符串来获取,并计算其哈希码,如下所示:
function isPlainObject(obj)
{
if ((typeof (obj) !== "object" || obj.nodeType || (obj instanceof Window))
|| (obj.constructor && !({}).hasOwnProperty.call(obj.constructor.prototype, "isPrototypeOf"))
)
{
return false;
}
return true;
}
function stringify(obj, s)
{
s = s || "";
for (var i in obj)
{
var o = obj[i];
if (o && (o instanceof Array || isPlainObject(o)))
{
s += i + ":" + JSON.stringify(o);
}
else if (o && typeof o === "object")
{
s += i + ":" + "$ref#" + o;
}
else
{
s += i + ":" + o;
}
}
return s;
}
Run Code Online (Sandbox Code Playgroud)
对性能没有太大影响。对于大物体,它是相同的,对于小物体,它会丢失,但仍然相当快速和安全。性能测试就到这里。
Name op/s
---------------------------------
JSON.stringify large 62
stringify large 62
JSON.stringify small 1,690,183
stringify small 1,062,452
Run Code Online (Sandbox Code Playgroud)
我的 GroupBy 方法
function GroupBy(a, keySelector, elementSelector, comparer)
{
// set default values for opitinal parameters
elementSelector = elementSelector || function(e) { return e; };
comparer = comparer ||
{
Equals: function(a,b) { return a==b },
GetHashCode: function(e) { return e.GetHashCode(); }
};
var key, hashKey, reHashKey;
// keep groups separated by hash
var hashs = {};
for (var i = 0, n = a.length; i < n; ++i)
{
// in case of same hash, but Equals returns false
reHashKey = undefined;
// grabs the key
key = keySelector(a[i]);
// grabs the hashcode
hashKey = comparer.GetHashCode(key);
// if a hash exists in the list
// compare values with Equals
// in case it return false, generate a unique hash
if (typeof hashs[hashKey] !== "undefined")
reHashKey = comparer.Equals(key, hashs[hashKey].Key) ? hashKey : hashKey + " " + i;
// if a new hash has been generated, update
if (typeof reHashKey !== "undefined" && reHashKey !== hashKey)
hashKey = reHashKey;
// get/create a new group and add the current element to the list
hashs[hashKey] = hashs[hashKey] || { Key: key, Elements: [] };
hashs[hashKey].Elements.push(a[i]);
}
return hashs;
}
Run Code Online (Sandbox Code Playgroud)
去测试
var arrComplex =
[
{ N: { Value: 10 }, Name: "Foo" },
{ N: { Value: 10 }, Name: "Bar" },
{ N: { Value: 20 }, Name: "Foo" },
{ N: { Value: 20 }, Name: "Bar" }
];
//
var x = GroupBy(arrComplex
, function(e) { return e.N; }
, function(e) { return e.Name; }
, {
Equals: function(a,b) { return a.Value == b.Value },
GetHashCode: function(e) { return e.GetHashCode(); }
}
);
//
console.log(x);
Run Code Online (Sandbox Code Playgroud)
jsFiddle 上的示例,现在使用 Jedi。
但是,根据我的测试,我的实现GroupBy比 linq.js 慢GroupBy。当我转换时它只会更快ToArray()。也许 linq.js 仅在我转换为数组时才真正执行,这就是差异的原因,我不确定这部分。
检测结果
Name op/s
---------------------------------
GroupBy 163,261
GroupByToArray 152,382
linq.js groupBy 243,547
linq.js groupBy toArray 26,309
Run Code Online (Sandbox Code Playgroud)