如何将类数组B+树转换为类哈希B+搜索树?

Lan*_*ard 5 javascript arrays algorithm tree data-structures

最近刚学了B+树,在问如何构建一个树数组,其中/出哪些项可以拼接,只允许1、2、4、8、16或32项的数组?,看到一个精彩的答案根据约束实现B+树。总而言之,这个问题基本上是在寻找一种我们可以将其视为数组的树状结构(按索引查找、在索引处删除、在索引处插入等)。此外,实现树节点的数组只能包含 2 个元素的幂,最多 32 个项目(1、2、4、8、16 或 32)。此外,如答案所示,它们应该相对紧凑,基本上确保在创建新的子数组之前每个节点都填充了 32 个项目(即,这样你就不会得到一堆 16 个项目的数组)。

答案有点复杂,我仍在挑选它。但是当我掌握了它的内部细节时,我想看看“真正的 B+ 搜索树”相比之下会是什么样子。也就是说,一个实际上有的 B+树,你把它当作一个哈希表(按键查找,按键删除,用键插入等)。而另一个是通过index,这将是 key 。理想情况下是任意键(可能带有getKey函数),或者只是字符串或整数键。

所以我想知道在原始答案中需要修改什么,复制到这里:

class Node {
    constructor(capacity) {
        // Mimic fixed-size array (avoid accidentally growing it)
        this.children = Object.seal(Array(capacity).fill(null));
        this.childCount = 0; // Number of used slots in children array
        this.treeSize = 0; // Total number of values in this subtree
        // Maintain back-link to parent.
        this.parent = null;
        // Per level in the tree, maintain a doubly linked list
        this.prev = this.next = null;
    }
    setCapacity(capacity) {
        if (capacity < 1) return;
        // Here we make a new array, and copy the data into it
        let children = Object.seal(Array(capacity).fill(null));
        for (let i = 0; i < this.childCount; i++) children[i] = this.children[i];
        this.children = children;
    }
    isLeaf() {
        return !(this.children[0] instanceof Node);
    }
    index() {
        return this.parent.children.indexOf(this);
    }
    updateTreeSize(start, end, sign=1) {        
        let sum = 0;
        if (this.isLeaf()) {
            sum = end - start;
        } else {
            for (let i = start; i < end; i++) sum += this.children[i].treeSize;
        }
        if (!sum) return;
        sum *= sign;
        // Apply the sum change to this node and all its ancestors
        for (let node = this; node; node = node.parent) {
            node.treeSize += sum;
        }
    }
    wipe(start, end) {
        this.updateTreeSize(start, end, -1);
        this.children.copyWithin(start, end, this.childCount);
        for (let i = this.childCount - end + start; i < this.childCount; i++) {
            this.children[i] = null;
        }
        this.childCount -= end - start;
        // Reduce allocated size if possible
        if (this.childCount * 2 <= this.children.length) this.setCapacity(this.children.length / 2);
    }
    moveFrom(neighbor, target, start, count=1) {
        // Note: `start` can have two meanings:
        //   if neighbor is null, it is the value/Node to move to the target
        //   if neighbor is a Node, it is the index from where value(s) have to be moved to the target
        // Make room in target node
        if (this.childCount + count > this.children.length) this.setCapacity(this.children.length * 2);
        this.children.copyWithin(target + count, target, Math.max(target + count, this.childCount));
        this.childCount += count;
        if (neighbor !== null) {
            // Copy the children
            for (let i = 0; i < count; i++) {
                this.children[target + i] = neighbor.children[start + i];
            }
            // Remove the original references
            neighbor.wipe(start, start + count);
        } else {
            this.children[target] = start; // start is value to insert
        }
        this.updateTreeSize(target, target + count, 1);
        // Set parent link(s)
        if (!this.isLeaf()) {
            for (let i = 0; i < count; i++) {
                this.children[target + i].parent = this;
            }
        }
    }
    moveToNext(count) {
        this.next.moveFrom(this, 0, this.childCount - count, count);
    }
    moveFromNext(count) {
        this.moveFrom(this.next, this.childCount, 0, count);
    }
    basicRemove(index) {
        if (!this.isLeaf()) {
            // Take node out of the level's linked list
            let prev = this.children[index].prev;
            let next = this.children[index].next;
            if (prev) prev.next = next;
            if (next) next.prev = prev;
        }
        this.wipe(index, index + 1);
    }
    basicInsert(index, value) {
        this.moveFrom(null, index, value);
        if (value instanceof Node) {
            // Insert node in the level's linked list
            if (index > 0) {
                value.prev = this.children[index-1];
                value.next = value.prev.next;
            } else if (this.childCount > 1) {
                value.next = this.children[1];
                value.prev = value.next.prev;
            }
            if (value.prev) value.prev.next = value;
            if (value.next) value.next.prev = value;
        }
    }
    pairWithSmallest() {            
        return this.prev && (!this.next || this.next.childCount > this.prev.childCount)
            ? [this.prev, this] : [this, this.next];
    }
    toString() {
        return "[" + this.children.map(v => v??"-").join() + "]";
    }
}

class Tree {
    constructor(nodeCapacity=32) {
        this.nodeCapacity = nodeCapacity;
        this.root = new Node(1);
        this.first = this.root; // Head of doubly linked list at bottom level
    }
    locate(offset) {
        let node = this.root;
        // Normalise argument
        offset = offset < 0 ? Math.max(0, node.treeSize + offset) : Math.min(offset, node.treeSize);

        while (!node.isLeaf()) {
            let index = 0;
            let child = node.children[index];
            while (offset > child.treeSize || offset === child.treeSize && child.next) {
                offset -= child.treeSize;
                child = node.children[++index];
            }
            node = child;
        }
        return [node, offset];
    }
    getItemAt(offset) {
        let [node, index] = this.locate(offset);
        if (index < node.childCount) return node.children[index];
    }
    setItemAt(offset, value) {
        let [node, index] = this.locate(offset);
        if (index < node.childCount) node.children[index] = value;
    }
    removeItemAt(offset) {
        let [node, index] = this.locate(offset);
        if (index >= node.childCount) return;

        while (true) {
            console.assert(node.isLeaf() || node.children[index].treeSize === 0);
            node.basicRemove(index);

            // Exit when node's fill ratio is fine
            if (!node.parent || node.childCount * 2 > this.nodeCapacity) return;
            // Node has potentially too few children, we should either merge or redistribute
            
            let [left, right] = node.pairWithSmallest();
            
            if (!left || !right) { // A node with no siblings? Must become the root!
                this.root = node;
                node.parent = null;
                return;
            }
            let sumCount = left.childCount + right.childCount;
            let childCount = sumCount >> 1;
            
            // Check whether to merge or to redistribute
            if (sumCount > this.nodeCapacity) { // redistribute
                // Move some data from the bigger to the smaller node
                let shift = childCount - node.childCount;
                if (!shift) { // Boundary case: when a redistribution would bring no improvement
                    console.assert(node.childCount * 2 === this.nodeCapacity && sumCount === this.nodeCapacity + 1);
                    return;
                }
                if (node === left) { // move some children from right to left
                    left.moveFromNext(shift);
                } else { // move some children from left to right
                    left.moveToNext(shift);
                }
                return;
            }
            
            // Merge:
            // Move all data from the right to the left
            left.moveFromNext(right.childCount);
            // Prepare to delete right node
            node = right.parent;
            index = right.index();
        }
    }
    insertItemAt(offset, value) {
        let [node, index] = this.locate(offset);
        while (node.childCount === this.nodeCapacity) { // No room here
            if (index === 0 && node.prev && node.prev.childCount < this.nodeCapacity) {
                return node.prev.basicInsert(node.prev.childCount, value);
            }
            // Check whether we can redistribute (to avoid a split)
            if (node !== this.root) {
                let [left, right] = node.pairWithSmallest();
                let joinedIndex = left === node ? index : left.childCount + index;
                let sumCount = left.childCount + right.childCount + 1;
                if (sumCount <= 2 * this.nodeCapacity) { // redistribute
                    let childCount = sumCount >> 1;
                    if (node === right) { // redistribute to the left
                        let insertInLeft = joinedIndex < childCount;
                        left.moveFromNext(childCount - left.childCount - +insertInLeft);
                    } else { // redistribute to the right
                        let insertInRight = index >= sumCount - childCount;
                        left.moveToNext(childCount - right.childCount - +insertInRight);
                    }
                    if (joinedIndex > left.childCount || 
                            joinedIndex === left.childCount && left.childCount > right.childCount) {
                        right.basicInsert(joinedIndex - left.childCount, value);
                    } else {
                        left.basicInsert(joinedIndex, value);
                    }
                    return;
                }
            }
            // Cannot redistribute: split node
            let childCount = node.childCount >> 1;
            // Create a new node that will later become the right sibling of this node
            let sibling = new Node(childCount);
            // Move half of node node's data to it
            sibling.moveFrom(node, 0, childCount, childCount);
            // Insert the value in either the current node or the new one
            if (index > node.childCount) {
                sibling.basicInsert(index - node.childCount, value);
            } else {
                node.basicInsert(index, value);
            }
            // Is this the root? 
            if (!node.parent) {
                // ...then first create a parent, which is the new root
                this.root = new Node(2);
                this.root.basicInsert(0, node);
            }
            // Prepare for inserting the sibling node into the tree
            index = node.index() + 1;
            node = node.parent;
            value = sibling;
        }
        node.basicInsert(index, value);
    }
    /* Below this point: these methods are optional */
    * [Symbol.iterator]() { // Make tree iterable
        let i = 0;
        for (let node = this.first; node; node = node.next) {
            for (let i = 0; i < node.childCount; i++) yield node.children[i];
        }
    }
    print() {
        console.log(this.root && this.root.toString());
    }
    verify() {
        // Raise an error when the tree violates one of the required properties
        if (!this.root) return; // An empty tree is fine.
        if (this.root.parent) throw "root should not have a parent";
        // Perform a breadth first traversal
        let q = [this.root];
        while (q.length) {
            if (q[0].isLeaf() && this.first !== q[0]) throw "this.first is not pointing to first leaf";
            let level = [];
            let last = null;
            for (let parent of q) {
                if (!(parent instanceof Node)) throw "parent is not instance of Node";
                if (parent.children.length > this.nodeCapacity) throw "node's children array is too large";
                if (parent.childCount > 0 && parent.childCount * 2 <= parent.children.length) throw "node's fill ratio is too low";
                for (let i = parent.childCount; i < parent.children.length; i++) {
                    if (parent.children[i] !== null) throw "child beyond childCount should be null but is not";
                }
                let treeSize = parent.treeSize;
                if (parent.isLeaf()) {
                    for (let value of parent.children.slice(0, parent.childCount)) {
                        if (value === null) throw "leaf has a null as value";
                        if (value instanceof Node) throw "leaf has a Node as value";
                    }
                    if (parent.treeSize !== parent.childCount) throw "leaf has mismatch in treeSize and childCount";
                } else {
                    for (let node of parent.children.slice(0, parent.childCount)) {
                        if (node === null) throw "internal node has a null as value";
                        if (!(node instanceof Node)) throw "internal node has a non-Node as value";
                        if (node.parent !== parent) throw "wrong parent";
                        if (node.prev !== last) throw "prev link incorrect";
                        if (last && last.next !== node) throw "next link incorrect";
                        if (last && last.children.length + node.children.length <= this.nodeCapacity) {
                            throw "two consecutive siblings have a total number of children that is too small";
                        }
                        if (node.childCount * 2 < this.nodeCapacity) {
                            throw "internal node is too small: " + node;
                        }
                        level.push(node);
                        last = node;
                        treeSize -= node.treeSize;
                    }
                    if (treeSize) throw "internal node treeSize sum mismatches";
                }
            }
            if (last && last.next) throw "last node in level has a next reference";
            q = level;
        }
    }
    test(count=100, option=3) {
        // option:
        //     0 = always insert & delete at left side (offset 0)
        //     1 = always insert & delete at right side
        //     2 = always insert & delete at middle
        //     3 = insert & delete at random offsets
        // Create array to perform the same operations on it as on the tree
        let arr = [];
        // Perform a series of insertions
        for (let i = 0; i < count; i++) {
            // Choose random insertion index
            let index = Array.isArray(option) ? option[i] : [0, i, i >> 1, Math.floor(Math.random() * (i+1))][option];
            // Perform same insertion in array and tree
            arr.splice(index, 0, i);
            this.insertItemAt(index, i);
            // Verify tree consistency and properties
            this.verify();
            // Verify the order of values in the array is the same as in the tree
            if (arr+"" !== [...this]+"") throw i + ": tree not same as array";
        }
        // Perform a series of updates
        for (let i = 0; false && i < count; i++) {
            // Choose random update index
            let index = Math.floor(Math.random() * (i+1));
            // Perform same insertion in array and tree
            arr[index] += count;
            this.setItemAt(index, this.getItemAt(index) + count);
            // Verify tree consistency and properties
            this.verify();
            // Verify the order of values in the array is the same as in the tree
            if (arr+"" !== [...this]+"") throw "tree not same as array";
        }
        // Perform a series of deletions
        for (let i = arr.length - 1; i >= 0; i--) {
            // Choose random deletion index
            let index = [0, i, i >> 1, Math.floor(Math.random() * (i+1))][option];
            // Perform same deletion in array and tree
            arr.splice(index, 1);
            this.removeItemAt(index);
            // Verify tree consistency and properties
            this.verify();
            // Verify the order of values in the array is the same as in the tree
            if (arr+"" !== [...this]+"") throw "tree not same as array";
        }
    }
}

// Perform 1000 insertions, 1000 updates, and 1000 deletions on a tree with node capacity of 8
new Tree(32).test(1000);
console.log("all tests completed");
Run Code Online (Sandbox Code Playgroud)

我基本上已经开始尝试将此代码重构为独立函数而不是面向对象的方法,因此我可以使其更接近程序集可能需要的内容。但我用 JavaScript 提问,因为它对我来说是最熟悉的,也很容易理解。在这样做的过程中,我开始四处寻找需要将索引的支持更改为对键的支持的地方,但我不完全确定这会有多大的变化。

我想看看实现是什么样的,这样我就可以更好地了解具有这些类型约束的实际 B+ 搜索树是什么样的。我在 GitHub 上看到了 JavaScript 中 B+trees 的每一个实现,但它们都缺少复杂的remove方法,它们的实现方式千差万别,并且没有我正在从事的项目所需的额外约束,所以与整数索引相比,很难说键到底应该做什么。是我一直在研究的一个 B+tree 实现,这是我发现的唯一一个remove也在 JavaScript 中使用的方法。

除此之外,我想知道在上面的这个实现中到底需要改变什么来添加对键的支持。以及键应该如何在内部和叶级别工作(即,您是否在每个父节点上存储叶的开始和结束键?或者键如何准确工作以使其高效?)。

一方面,似乎只需要更改几个地方的代码(如locate方法),但另一方面,似乎仅添加对键的支持就需要完全重写整个事情。我不确定,希望对此进行澄清。

const locateNode = (tree, key) => {
  let node = tree.root

  while (!checkIfLeaf(node)) {
    let index = 0
    let child = node.children[index]
    while (key != child.key) {
      child = node.children[++index]
    }
    node = child
  }
  
  return node
}
Run Code Online (Sandbox Code Playgroud)

tri*_*cot 2

你是对的,该locate方法是变化最大的方法。您的实现进行线性搜索,这很好,只是while条件应该进行<比较而不是!=。为了供您比较,我在下面提供了一个执行二分搜索而不是线性搜索的实现。

改变什么

  • 当您想要键/值对时,我将首先为这样的对创建一个类:

    class KeyValue {
        constructor(key, value) {
            this.key = key; 
            this.value = value;
        }
    }
    
    Run Code Online (Sandbox Code Playgroud)
  • 然后将这些值而不是普通值插入到树的底层。在树中设置键/值对的方法将首先确定是否需要更新现有键,或者需要插入新对。我们可以将此方法命名setinsert,并且它将同时采用键和值。它将如下开始:

    set(key, value) {
        let [node, index] = this.locate(key);
        if (index < node.childCount && node.children[index].key === key) {
            // already present: update the value
            node.children[index].value = value;
            return;
        }
        let item = new KeyValue(key, value); // item can be a KeyValue or a Node
    
    Run Code Online (Sandbox Code Playgroud)

    ...然后它会继续就像您已经拥有它一样,但是插入item.

  • 您还需要一种方法来key在树中向上更新属性,以便它始终代表其下方子树中的最小键。删除或插入当然可能会更改最小键是什么,因此在这种情况下需要调用此方法:

    updateKey() {
        for (let node = this; node; node = node.parent) {
            node.key = node.children[0].key;
        }
    }
    
    Run Code Online (Sandbox Code Playgroud)

    请注意,在第一次迭代中,node.children[0]将引用一个KeyValue对象,而在其他迭代中它将引用一个Node实例。

    一旦子级不是其父级的第一个子级,我们就可以打破这个循环,但由于index()(确定这一点)方法进行了扫描,这实际上可能不会提高这里的整体效率。在你的实施中需要考虑的事情......

  • 在 和wipe方法中moveFrom,您需要调用该新updateKey方法,作为函数的最后一条指令。您可以选择为该调用添加条件前缀:

      if (start === 0 && this.childCount > 0) this.updateKey();
    
    Run Code Online (Sandbox Code Playgroud)
  • 更换locate方法。如上所述,我进行了二分搜索。但我认为在 JavaScript 中,与线性搜索相比,二分搜索不会提高效率,因为数组仅限于 32 个值,但在低级语言中,这可能值得编写代码:

    locate(key) {
        let node = this.root;
        let low;
        while (true) {
            // Binary search among keys
            low = 1;
            let high = node.childCount;
            while (low < high) {
                let index = (low + high) >> 1;
                if (key >= node.children[index].key) {
                    low = index + 1;
                } else {
                    high = index;
                }
            }
            low--;
            if (node.isLeaf()) break;
            node = node.children[low];
        }
        if (low < node.childCount && key > node.children[low].key) return [node, low+1];
        return [node, low];
    }
    
    Run Code Online (Sandbox Code Playgroud)
  • 定义一个get方法,它将检索给定键的值,就像 JavaScript 原生Map#get方法一样:

    get(key) {
        let [node, index] = this.locate(key);
        if (index < node.childCount) {
            let keyValue = node.children[index];
            if (keyValue.key === key) return keyValue.value;
        }
    }
    
    Run Code Online (Sandbox Code Playgroud)
  • removeItemAt方法将变为remove, 并开始如下:

    remove(key) {
        let [node, index] = this.locate(key);
        if (index >= node.childCount || node.children[index].key !== key) return; // not found
    
    Run Code Online (Sandbox Code Playgroud)

    ...然后它会继续下去,就像你已经拥有它一样。

  • 迭代器必须返回键/值对。与 JavaScript 如何使用 执行此操作一致Map#entries,这会将它们生成为对(2 长度数组)。

就是这样。原始代码还包括验证树的一致性和测试方法的方法。当然,这些也需要相应改变。

执行

class KeyValue {
    constructor(key, value) {
        this.key = key; 
        this.value = value;
    }
}
Run Code Online (Sandbox Code Playgroud)

支持的数据类型

的数据类型可以是比较运算符适用的任何数据类型。所以对于字符串或数字它会起作用。在 JavaScript 中,它也适用于实现了valueOfortoString方法