Lan*_*ard 5 javascript arrays algorithm tree data-structures
最近刚学了B+树,在问如何构建一个树数组,其中/出哪些项可以拼接,只允许1、2、4、8、16或32项的数组?,看到一个精彩的答案根据约束实现B+树。总而言之,这个问题基本上是在寻找一种我们可以将其视为数组的树状结构(按索引查找、在索引处删除、在索引处插入等)。此外,实现树节点的数组只能包含 2 个元素的幂,最多 32 个项目(1、2、4、8、16 或 32)。此外,如答案所示,它们应该相对紧凑,基本上确保在创建新的子数组之前每个节点都填充了 32 个项目(即,这样你就不会得到一堆 16 个项目的数组)。
答案有点复杂,我仍在挑选它。但是当我掌握了它的内部细节时,我想看看“真正的 B+ 搜索树”相比之下会是什么样子。也就是说,一个实际上有键的 B+树,你把它当作一个哈希表(按键查找,按键删除,用键插入等)。而另一个是通过index,这将是 key 。理想情况下是任意键(可能带有getKey
函数),或者只是字符串或整数键。
所以我想知道在原始答案中需要修改什么,复制到这里:
class Node {
constructor(capacity) {
// Mimic fixed-size array (avoid accidentally growing it)
this.children = Object.seal(Array(capacity).fill(null));
this.childCount = 0; // Number of used slots in children array
this.treeSize = 0; // Total number of values in this subtree
// Maintain back-link to parent.
this.parent = null;
// Per level in the tree, maintain a doubly linked list
this.prev = this.next = null;
}
setCapacity(capacity) {
if (capacity < 1) return;
// Here we make a new array, and copy the data into it
let children = Object.seal(Array(capacity).fill(null));
for (let i = 0; i < this.childCount; i++) children[i] = this.children[i];
this.children = children;
}
isLeaf() {
return !(this.children[0] instanceof Node);
}
index() {
return this.parent.children.indexOf(this);
}
updateTreeSize(start, end, sign=1) {
let sum = 0;
if (this.isLeaf()) {
sum = end - start;
} else {
for (let i = start; i < end; i++) sum += this.children[i].treeSize;
}
if (!sum) return;
sum *= sign;
// Apply the sum change to this node and all its ancestors
for (let node = this; node; node = node.parent) {
node.treeSize += sum;
}
}
wipe(start, end) {
this.updateTreeSize(start, end, -1);
this.children.copyWithin(start, end, this.childCount);
for (let i = this.childCount - end + start; i < this.childCount; i++) {
this.children[i] = null;
}
this.childCount -= end - start;
// Reduce allocated size if possible
if (this.childCount * 2 <= this.children.length) this.setCapacity(this.children.length / 2);
}
moveFrom(neighbor, target, start, count=1) {
// Note: `start` can have two meanings:
// if neighbor is null, it is the value/Node to move to the target
// if neighbor is a Node, it is the index from where value(s) have to be moved to the target
// Make room in target node
if (this.childCount + count > this.children.length) this.setCapacity(this.children.length * 2);
this.children.copyWithin(target + count, target, Math.max(target + count, this.childCount));
this.childCount += count;
if (neighbor !== null) {
// Copy the children
for (let i = 0; i < count; i++) {
this.children[target + i] = neighbor.children[start + i];
}
// Remove the original references
neighbor.wipe(start, start + count);
} else {
this.children[target] = start; // start is value to insert
}
this.updateTreeSize(target, target + count, 1);
// Set parent link(s)
if (!this.isLeaf()) {
for (let i = 0; i < count; i++) {
this.children[target + i].parent = this;
}
}
}
moveToNext(count) {
this.next.moveFrom(this, 0, this.childCount - count, count);
}
moveFromNext(count) {
this.moveFrom(this.next, this.childCount, 0, count);
}
basicRemove(index) {
if (!this.isLeaf()) {
// Take node out of the level's linked list
let prev = this.children[index].prev;
let next = this.children[index].next;
if (prev) prev.next = next;
if (next) next.prev = prev;
}
this.wipe(index, index + 1);
}
basicInsert(index, value) {
this.moveFrom(null, index, value);
if (value instanceof Node) {
// Insert node in the level's linked list
if (index > 0) {
value.prev = this.children[index-1];
value.next = value.prev.next;
} else if (this.childCount > 1) {
value.next = this.children[1];
value.prev = value.next.prev;
}
if (value.prev) value.prev.next = value;
if (value.next) value.next.prev = value;
}
}
pairWithSmallest() {
return this.prev && (!this.next || this.next.childCount > this.prev.childCount)
? [this.prev, this] : [this, this.next];
}
toString() {
return "[" + this.children.map(v => v??"-").join() + "]";
}
}
class Tree {
constructor(nodeCapacity=32) {
this.nodeCapacity = nodeCapacity;
this.root = new Node(1);
this.first = this.root; // Head of doubly linked list at bottom level
}
locate(offset) {
let node = this.root;
// Normalise argument
offset = offset < 0 ? Math.max(0, node.treeSize + offset) : Math.min(offset, node.treeSize);
while (!node.isLeaf()) {
let index = 0;
let child = node.children[index];
while (offset > child.treeSize || offset === child.treeSize && child.next) {
offset -= child.treeSize;
child = node.children[++index];
}
node = child;
}
return [node, offset];
}
getItemAt(offset) {
let [node, index] = this.locate(offset);
if (index < node.childCount) return node.children[index];
}
setItemAt(offset, value) {
let [node, index] = this.locate(offset);
if (index < node.childCount) node.children[index] = value;
}
removeItemAt(offset) {
let [node, index] = this.locate(offset);
if (index >= node.childCount) return;
while (true) {
console.assert(node.isLeaf() || node.children[index].treeSize === 0);
node.basicRemove(index);
// Exit when node's fill ratio is fine
if (!node.parent || node.childCount * 2 > this.nodeCapacity) return;
// Node has potentially too few children, we should either merge or redistribute
let [left, right] = node.pairWithSmallest();
if (!left || !right) { // A node with no siblings? Must become the root!
this.root = node;
node.parent = null;
return;
}
let sumCount = left.childCount + right.childCount;
let childCount = sumCount >> 1;
// Check whether to merge or to redistribute
if (sumCount > this.nodeCapacity) { // redistribute
// Move some data from the bigger to the smaller node
let shift = childCount - node.childCount;
if (!shift) { // Boundary case: when a redistribution would bring no improvement
console.assert(node.childCount * 2 === this.nodeCapacity && sumCount === this.nodeCapacity + 1);
return;
}
if (node === left) { // move some children from right to left
left.moveFromNext(shift);
} else { // move some children from left to right
left.moveToNext(shift);
}
return;
}
// Merge:
// Move all data from the right to the left
left.moveFromNext(right.childCount);
// Prepare to delete right node
node = right.parent;
index = right.index();
}
}
insertItemAt(offset, value) {
let [node, index] = this.locate(offset);
while (node.childCount === this.nodeCapacity) { // No room here
if (index === 0 && node.prev && node.prev.childCount < this.nodeCapacity) {
return node.prev.basicInsert(node.prev.childCount, value);
}
// Check whether we can redistribute (to avoid a split)
if (node !== this.root) {
let [left, right] = node.pairWithSmallest();
let joinedIndex = left === node ? index : left.childCount + index;
let sumCount = left.childCount + right.childCount + 1;
if (sumCount <= 2 * this.nodeCapacity) { // redistribute
let childCount = sumCount >> 1;
if (node === right) { // redistribute to the left
let insertInLeft = joinedIndex < childCount;
left.moveFromNext(childCount - left.childCount - +insertInLeft);
} else { // redistribute to the right
let insertInRight = index >= sumCount - childCount;
left.moveToNext(childCount - right.childCount - +insertInRight);
}
if (joinedIndex > left.childCount ||
joinedIndex === left.childCount && left.childCount > right.childCount) {
right.basicInsert(joinedIndex - left.childCount, value);
} else {
left.basicInsert(joinedIndex, value);
}
return;
}
}
// Cannot redistribute: split node
let childCount = node.childCount >> 1;
// Create a new node that will later become the right sibling of this node
let sibling = new Node(childCount);
// Move half of node node's data to it
sibling.moveFrom(node, 0, childCount, childCount);
// Insert the value in either the current node or the new one
if (index > node.childCount) {
sibling.basicInsert(index - node.childCount, value);
} else {
node.basicInsert(index, value);
}
// Is this the root?
if (!node.parent) {
// ...then first create a parent, which is the new root
this.root = new Node(2);
this.root.basicInsert(0, node);
}
// Prepare for inserting the sibling node into the tree
index = node.index() + 1;
node = node.parent;
value = sibling;
}
node.basicInsert(index, value);
}
/* Below this point: these methods are optional */
* [Symbol.iterator]() { // Make tree iterable
let i = 0;
for (let node = this.first; node; node = node.next) {
for (let i = 0; i < node.childCount; i++) yield node.children[i];
}
}
print() {
console.log(this.root && this.root.toString());
}
verify() {
// Raise an error when the tree violates one of the required properties
if (!this.root) return; // An empty tree is fine.
if (this.root.parent) throw "root should not have a parent";
// Perform a breadth first traversal
let q = [this.root];
while (q.length) {
if (q[0].isLeaf() && this.first !== q[0]) throw "this.first is not pointing to first leaf";
let level = [];
let last = null;
for (let parent of q) {
if (!(parent instanceof Node)) throw "parent is not instance of Node";
if (parent.children.length > this.nodeCapacity) throw "node's children array is too large";
if (parent.childCount > 0 && parent.childCount * 2 <= parent.children.length) throw "node's fill ratio is too low";
for (let i = parent.childCount; i < parent.children.length; i++) {
if (parent.children[i] !== null) throw "child beyond childCount should be null but is not";
}
let treeSize = parent.treeSize;
if (parent.isLeaf()) {
for (let value of parent.children.slice(0, parent.childCount)) {
if (value === null) throw "leaf has a null as value";
if (value instanceof Node) throw "leaf has a Node as value";
}
if (parent.treeSize !== parent.childCount) throw "leaf has mismatch in treeSize and childCount";
} else {
for (let node of parent.children.slice(0, parent.childCount)) {
if (node === null) throw "internal node has a null as value";
if (!(node instanceof Node)) throw "internal node has a non-Node as value";
if (node.parent !== parent) throw "wrong parent";
if (node.prev !== last) throw "prev link incorrect";
if (last && last.next !== node) throw "next link incorrect";
if (last && last.children.length + node.children.length <= this.nodeCapacity) {
throw "two consecutive siblings have a total number of children that is too small";
}
if (node.childCount * 2 < this.nodeCapacity) {
throw "internal node is too small: " + node;
}
level.push(node);
last = node;
treeSize -= node.treeSize;
}
if (treeSize) throw "internal node treeSize sum mismatches";
}
}
if (last && last.next) throw "last node in level has a next reference";
q = level;
}
}
test(count=100, option=3) {
// option:
// 0 = always insert & delete at left side (offset 0)
// 1 = always insert & delete at right side
// 2 = always insert & delete at middle
// 3 = insert & delete at random offsets
// Create array to perform the same operations on it as on the tree
let arr = [];
// Perform a series of insertions
for (let i = 0; i < count; i++) {
// Choose random insertion index
let index = Array.isArray(option) ? option[i] : [0, i, i >> 1, Math.floor(Math.random() * (i+1))][option];
// Perform same insertion in array and tree
arr.splice(index, 0, i);
this.insertItemAt(index, i);
// Verify tree consistency and properties
this.verify();
// Verify the order of values in the array is the same as in the tree
if (arr+"" !== [...this]+"") throw i + ": tree not same as array";
}
// Perform a series of updates
for (let i = 0; false && i < count; i++) {
// Choose random update index
let index = Math.floor(Math.random() * (i+1));
// Perform same insertion in array and tree
arr[index] += count;
this.setItemAt(index, this.getItemAt(index) + count);
// Verify tree consistency and properties
this.verify();
// Verify the order of values in the array is the same as in the tree
if (arr+"" !== [...this]+"") throw "tree not same as array";
}
// Perform a series of deletions
for (let i = arr.length - 1; i >= 0; i--) {
// Choose random deletion index
let index = [0, i, i >> 1, Math.floor(Math.random() * (i+1))][option];
// Perform same deletion in array and tree
arr.splice(index, 1);
this.removeItemAt(index);
// Verify tree consistency and properties
this.verify();
// Verify the order of values in the array is the same as in the tree
if (arr+"" !== [...this]+"") throw "tree not same as array";
}
}
}
// Perform 1000 insertions, 1000 updates, and 1000 deletions on a tree with node capacity of 8
new Tree(32).test(1000);
console.log("all tests completed");
Run Code Online (Sandbox Code Playgroud)
我基本上已经开始尝试将此代码重构为独立函数而不是面向对象的方法,因此我可以使其更接近程序集可能需要的内容。但我用 JavaScript 提问,因为它对我来说是最熟悉的,也很容易理解。在这样做的过程中,我开始四处寻找需要将索引的支持更改为对键的支持的地方,但我不完全确定这会有多大的变化。
我想看看实现是什么样的,这样我就可以更好地了解具有这些类型约束的实际 B+ 搜索树是什么样的。我在 GitHub 上看到了 JavaScript 中 B+trees 的每一个实现,但它们都缺少复杂的remove
方法,它们的实现方式千差万别,并且没有我正在从事的项目所需的额外约束,所以与整数索引相比,很难说键到底应该做什么。这是我一直在研究的一个 B+tree 实现,这是我发现的唯一一个remove
也在 JavaScript 中使用的方法。
除此之外,我想知道在上面的这个实现中到底需要改变什么来添加对键的支持。以及键应该如何在内部和叶级别工作(即,您是否在每个父节点上存储叶的开始和结束键?或者键如何准确工作以使其高效?)。
一方面,似乎只需要更改几个地方的代码(如locate
方法),但另一方面,似乎仅添加对键的支持就需要完全重写整个事情。我不确定,希望对此进行澄清。
const locateNode = (tree, key) => {
let node = tree.root
while (!checkIfLeaf(node)) {
let index = 0
let child = node.children[index]
while (key != child.key) {
child = node.children[++index]
}
node = child
}
return node
}
Run Code Online (Sandbox Code Playgroud)
你是对的,该locate
方法是变化最大的方法。您的实现进行线性搜索,这很好,只是while
条件应该进行<
比较而不是!=
。为了供您比较,我在下面提供了一个执行二分搜索而不是线性搜索的实现。
当您想要键/值对时,我将首先为这样的对创建一个类:
class KeyValue {
constructor(key, value) {
this.key = key;
this.value = value;
}
}
Run Code Online (Sandbox Code Playgroud)
然后将这些值而不是普通值插入到树的底层。在树中设置键/值对的方法将首先确定是否需要更新现有键,或者需要插入新对。我们可以将此方法命名set
为insert
,并且它将同时采用键和值。它将如下开始:
set(key, value) {
let [node, index] = this.locate(key);
if (index < node.childCount && node.children[index].key === key) {
// already present: update the value
node.children[index].value = value;
return;
}
let item = new KeyValue(key, value); // item can be a KeyValue or a Node
Run Code Online (Sandbox Code Playgroud)
...然后它会继续就像您已经拥有它一样,但是插入item
.
您还需要一种方法来key
在树中向上更新属性,以便它始终代表其下方子树中的最小键。删除或插入当然可能会更改最小键是什么,因此在这种情况下需要调用此方法:
updateKey() {
for (let node = this; node; node = node.parent) {
node.key = node.children[0].key;
}
}
Run Code Online (Sandbox Code Playgroud)
请注意,在第一次迭代中,node.children[0]
将引用一个KeyValue
对象,而在其他迭代中它将引用一个Node
实例。
一旦子级不是其父级的第一个子级,我们就可以打破这个循环,但由于index()
(确定这一点)方法进行了扫描,这实际上可能不会提高这里的整体效率。在你的实施中需要考虑的事情......
在 和wipe
方法中moveFrom
,您需要调用该新updateKey
方法,作为函数的最后一条指令。您可以选择为该调用添加条件前缀:
if (start === 0 && this.childCount > 0) this.updateKey();
Run Code Online (Sandbox Code Playgroud)
更换locate
方法。如上所述,我进行了二分搜索。但我认为在 JavaScript 中,与线性搜索相比,二分搜索不会提高效率,因为数组仅限于 32 个值,但在低级语言中,这可能值得编写代码:
locate(key) {
let node = this.root;
let low;
while (true) {
// Binary search among keys
low = 1;
let high = node.childCount;
while (low < high) {
let index = (low + high) >> 1;
if (key >= node.children[index].key) {
low = index + 1;
} else {
high = index;
}
}
low--;
if (node.isLeaf()) break;
node = node.children[low];
}
if (low < node.childCount && key > node.children[low].key) return [node, low+1];
return [node, low];
}
Run Code Online (Sandbox Code Playgroud)
定义一个get
方法,它将检索给定键的值,就像 JavaScript 原生Map#get
方法一样:
get(key) {
let [node, index] = this.locate(key);
if (index < node.childCount) {
let keyValue = node.children[index];
if (keyValue.key === key) return keyValue.value;
}
}
Run Code Online (Sandbox Code Playgroud)
该removeItemAt
方法将变为remove
, 并开始如下:
remove(key) {
let [node, index] = this.locate(key);
if (index >= node.childCount || node.children[index].key !== key) return; // not found
Run Code Online (Sandbox Code Playgroud)
...然后它会继续下去,就像你已经拥有它一样。
迭代器必须返回键/值对。与 JavaScript 如何使用 执行此操作一致Map#entries
,这会将它们生成为对(2 长度数组)。
就是这样。原始代码还包括验证树的一致性和测试方法的方法。当然,这些也需要相应改变。
class KeyValue {
constructor(key, value) {
this.key = key;
this.value = value;
}
}
Run Code Online (Sandbox Code Playgroud)
键的数据类型可以是比较运算符适用的任何数据类型。所以对于字符串或数字它会起作用。在 JavaScript 中,它也适用于实现了valueOf
ortoString
方法