在四叉树中存储十亿个多边形

Jan*_*ora 1 c++ spatial

我需要在四叉树中存储10亿个空间多边形(使用它们的最小边界矩形指定).为此,我编写了以下代码.然而,事实证明,对于10亿个点,代码运行速度非常慢.有没有什么方法可以改进代码,以便它可以运行得更快.如果是,那么有人可以请帮助

//---------------------------------------------------------------------------
struct MBR
{
    double xRight, xLeft, yBottom, yTop;
    MBR *zero,*first,*second,*third;
    unsigned level=0;
    vector<unsigned> result; //contains the resulting intersecting spatial ids
};
bool intersects(MBR& spatialId,MBR& mbr) 
{
    if (mbr.yBottom > spatialId.yTop || mbr.yTop < spatialId.yBottom) return false;
    if (mbr.xLeft > spatialId.xRight || mbr.xRight < spatialId.xLeft) return false;        
    return true;    
}
//---------------------------------------------------------------------------
bool contains(MBR& spatialId,MBR& mbr) 
{
    if (mbr.yBottom > spatialId.yBottom || mbr.yTop < spatialId.yTop) return false;
    if (mbr.xLeft > spatialId.xLeft || mbr.xRight < spatialId.xRight) return false;
    return true;    
}
//---------------------------------------------------------------------------
bool touches(MBR& spatialId,MBR& mbr) 
{
    if (    (mbr.yBottom >= spatialId.yBottom + std::numeric_limits<double>::epsilon() && 
            mbr.yBottom <= spatialId.yBottom - std::numeric_limits<double>::epsilon()) ||
            (mbr.yTop >= spatialId.yTop + std::numeric_limits<double>::epsilon() &&
            mbr.yTop <= spatialId.yTop - std::numeric_limits<double>::epsilon()))
            return true;
    if (    (mbr.xLeft >= spatialId.xLeft + std::numeric_limits<double>::epsilon() &&
            mbr.xLeft <= spatialId.xLeft - std::numeric_limits<double>::epsilon()) ||
            (mbr.xRight >= spatialId.xRight + std::numeric_limits<double>::epsilon() &&
            mbr.xRight <= spatialId.xRight - std::numeric_limits<double>::epsilon()))
            return true;    
    return false;    
}
//---------------------------------------------------------------------------
MBR MBR1,MBR2,MBR3,MBR4;
vector<unsigned> spatialIds; //contain 1 billion spatial identifiers which are intersected with MBR1, MBR2, MBR3, MBR4
//MBR1, MBR2, MBR3, MBR4 are again specified using their Minimum Bounding Rectangles    
stack<MBR**> stackQuadTree;
MBR *root=new MBR();
(*root).yBottom=-90; (*root).yTop=90;
(*root).xLeft=-180; (*root).xRight=180;
(*root).level=0;
stackQuadTree.push(&root);

while(!stackQuadTree.empty())
{
    MBR** node=&(*stackQuadTree.front());
    if((*node)->level==50)
        break;

    (*node)->zero=new MBR(); (*node)->first=new MBR(); (*node)->second=new MBR(); (*node)->third=new MBR();
    (*node)->zero->yBottom=(*node)->yBottom; (*node)->zero->yTop=((*node)->yBottom+(*node)->yTop)/2;
    (*node)->zero->xLeft=(*node)->xLeft; (*node)->zero->xRight=((*node)->xLeft+(*node)->xRight)/2;                        
    (*node)->zero->level=(*node)->level+1;

    (*node)->first->yBottom=((*node)->yBottom+(*node)->yTop)/2; (*node)->first->yTop=(*node)->yTop; 
    (*node)->first->xLeft=(*node)->xLeft; (*node)->first->xRight=((*node)->xLeft+(*node)->xRight)/2;          
    (*node)->first->level=(*node)->level+1;

    (*node)->second->yBottom=(*node)->yBottom; (*node)->second->yTop=((*node)->yBottom+(*node)->yTop)/2;
    (*node)->second->xLeft=((*node)->xLeft+(*node)->xRight)/2; (*node)->second->xRight=(*node)->xRight;        
    (*node)->second->level=(*node)->level+1;

    (*node)->third->yBottom=((*node)->yBottom+(*node)->yTop)/2; (*node)->third->yTop=(*node)->yTop;
    (*node)->third->xLeft=((*node)->xLeft+(*node)->xRight)/2; (*node)->third->xRight=(*node)->xRight;                        
    (*node)->third->level=(*node)->level+1;

    MBR* node=*stackQuadTree.top();
    stackQuadTree.pop();        
    for(vector<MBR>::iterator itSpatialId=spatialIds.begin(),lSpatialId=spatialIds.end();itSpatialId!=lSpatialId;++itSpatialId)
    {
        if(intersects((*itSpatialId),&(*node)->zero)||contains((*itSpatialId),&(*node)->zero)||touches((*itSpatialId),&(*node)->zero))
        {
            (*node)->zero->result.push_back((*itSpatialId));
            stackQuadTree.push(*(*node)->zero);
        }

        if(intersects((*itSpatialId),&(*node)->first)||contains((*itSpatialId),&(*node)->first)||touches((*itSpatialId),&(*node)->first))
        {
            (*node)->first->result.push_back((*itSpatialId));
            stackQuadTree.push(*(*node)->first);
        }                    

        if(intersects((*itSpatialId),&(*node)->second)||contains((*itSpatialId),&(*node)->second)||touches((*itSpatialId),&(*node)->second))
        {
            (*node)->second->result.push_back((*itSpatialId));
            stackQuadTree.push(*(*node)->second);
        }   

        if(intersects((*itSpatialId),&(*node)->third)||contains((*itSpatialId),&(*node)->third)||touches((*itSpatialId),&(*node)->third))
        {
            (*node)->third->result.push_back((*itSpatialId));
            stackQuadTree.push(*(*node)->third);
        }   
    }
}
Run Code Online (Sandbox Code Playgroud)

Dan*_*šek 8

Since the requirement is to handle 109 records, and even with the most compact representation each record is 16 Bytes (4 * sizeof(float)), we would need 16 GiB just to keep those in memory. As we want to use the bounding boxes of polygons for indexing, it is possible for each bounding box to belong to multiple quadrants. If there is a lot of relatively large polygons or many overlaps, the memory requirements of the whole quadtree could significantly increase. That suggests an iterative approach to building the quadtree, where we use files to store the lists of bounding boxes matching each node, as well as the node information for the entire tree.


Quadtree Library

Rectangle Representation

We use rectangles to represent two things:

  • The bounding box of the polygon (i.e. our input data).
  • 象限的边界框(四叉树中的节点).

我们还需要对矩形执行一些操作,例如将它们细分为象限,或者找出一对矩形是否相交.让我们编写一个简单的类来封装它.

rectangle.hpp

#pragma once

#include <cstdint>

template<typename T = float>
struct rectangle_t
{
    typedef typename T value_type;

    rectangle_t() : left(0.0f), top(0.0f), right(0.0f), bottom(0.0f) {}
    rectangle_t(value_type x1, value_type y1, value_type x2, value_type y2)
        : left(x1), top(y1), right(x2), bottom(y2)
    {
    }

    bool intersects(rectangle_t<T> const& other) const;
    bool contains(rectangle_t<T> const& other) const;
    bool touches(rectangle_t<T> const& other) const;
    rectangle_t<T> quadrant(uint32_t n) const;

    value_type left, top, right, bottom;
};

template<typename T>
inline bool rectangle_t<T>::intersects(rectangle_t<T> const& other) const
{
    return !((left > other.right)
        || (right < other.left)
        || (top > other.bottom)
        || (bottom < other.top));
}

template<typename T>
inline bool rectangle_t<T>::contains(rectangle_t<T> const& other) const
{
    return !((left >= other.left)
        || (right <= other.right)
        || (top >= other.top)
        || (bottom <= other.bottom));
}

template<typename T>
inline bool rectangle_t<T>::touches(rectangle_t<T> const& other) const
{
    return ((left == other.right)
        || (right == other.left)
        || (top == other.bottom)
        || (bottom == other.top));
}

template<typename T>
inline rectangle_t<T> rectangle_t<T>::quadrant(uint32_t n) const
{
    value_type const center_x((left + right) / 2);
    value_type const center_y((top + bottom) / 2);
    switch (n & 0x03) {
    case 0: return rectangle_t<T>(left, top, center_x, center_y);
    case 1: return rectangle_t<T>(center_x, top, right, center_y);
    case 2: return rectangle_t<T>(left, center_y, center_x, bottom);
    case 3: return rectangle_t<T>(center_x, center_y, right, bottom);
    }
    return *this; // Can't happen since we mask n
}

typedef rectangle_t<float> rectangle;
Run Code Online (Sandbox Code Playgroud)

注意:虽然我们还提供方法contains()touches(),我们并不真正需要他们-如果矩形A包含矩形B,然后两人还相交.类似地,如果两个矩形具有一对重叠的边,则它们也被认为是相交的.


矩形序列化

下一步是开发一种将矩形存储在文件中的简单方法.我们可以使用简单的二进制文件,其布局与对象在内存中的表示方式相对应.

矩形布局

该文件包含单个对象序列,没有标题或其他任何内容.

矩形文件布局

Since we wish to process the data in blocks, we implement simple reader and writer classes providing this functionality.

rectangle_file.hpp

#pragma once

#include "rectangle.hpp"
#include "config.hpp"

#include <fstream>
#include <string>
#include <vector>

typedef std::vector<rectangle> rectangle_vec;

class rectangle_writer
{
public:
    rectangle_writer(std::string const& file_name);
    rectangle_writer(rectangle_writer const&) = delete;

    void write_block(rectangle_vec const& block);

    uint64_t count() const { return count_; }

private:
    std::ofstream f_;
    uint64_t count_;
};

class rectangle_reader
{
public:
    rectangle_reader(std::string const& file_name);
    rectangle_reader(rectangle_reader const&) = delete;

    void read_block(rectangle_vec& block, uint64_t n = BLOCK_SIZE);

    uint64_t count() const { return count_; }
    uint64_t total_count() const { return total_count_; }
    uint64_t remaining_count() const { return total_count_ - count_; }

private:
    std::ifstream f_;
    uint64_t count_;
    uint64_t total_count_;
};
Run Code Online (Sandbox Code Playgroud)

rectangle_file.cpp

#include "rectangle_file.hpp"

#include <algorithm>

rectangle_writer::rectangle_writer(std::string const& file_name)
    : f_(file_name, std::ios_base::binary)
    , count_(0)
{
    if (!f_) {
        throw std::runtime_error("Unable to open index file.");
    }
}

void rectangle_writer::write_block(rectangle_vec const& block)
{
    f_.write(reinterpret_cast<char const*>(&block[0])
        , block.size() * sizeof(rectangle));
    count_ += static_cast<uint32_t>(block.size());
    if (!f_) {
        throw std::runtime_error("Read error.");
    }
}

rectangle_reader::rectangle_reader(std::string const& file_name)
    : f_(file_name, std::ios_base::binary)
    , count_(0)
{
    f_.seekg(0, std::ios::end);
    total_count_ = static_cast<uint32_t>(f_.tellg() / sizeof(rectangle));
    f_.seekg(0, std::ios::beg);
}

void rectangle_reader::read_block(rectangle_vec& block, uint64_t n)
{
    uint64_t to_read(std::min(remaining_count(), n));
    block.resize(to_read);
    if (to_read) {
        f_.read(reinterpret_cast<char*>(&block[0])
            , to_read * sizeof(rectangle));
        count_ += to_read;
        if (!f_) {
            throw std::runtime_error("Write error.");
        }
    }
}
Run Code Online (Sandbox Code Playgroud)

Quadtree Representation

Due to the large number of nodes we are likely to work with, we keep all the nodes in an array and instead of pointers use node indices to represent the hierarchy. Index of a node is determined by its position in the array, so we don't need to store that explicitly.

We use a compact structure (node_info) to represent the relevant node information:

  • Rectangle defining the bounding box of the quadrant.
  • Array of 4 indices of the child nodes. (We use 32bit indices, limiting us to ~4 billion nodes)

The list of matching rectangles for each node is stored in a data file, named based on the node index (node_XXXXXXXX.dat).

Node Flyweight

In order to encapsulate the representation of the quadtree, and expose a familiar interface for manipulating it, we create a flyweight class node, holding a reference to the quadtree, the index of the node, and current level in the tree (16 Bytes). This class provides all the functionality necessary to build and use the quadtree:

  • Reading and writing the bounding box.
  • Reading and writing the lists of matching rectangles.
  • Access to child nodes, node index, current level, ...
  • Adding child nodes

quadtree.hpp

#pragma once

#include "rectangle.hpp"
#include "rectangle_file.hpp"

#include <string>
#include <vector>

class quadtree
{
public:
    class node
    {
    public:
        rectangle& bounds();
        rectangle const& bounds() const;

        node child(uint32_t i) const;

        rectangle_reader reader() const;
        rectangle_writer writer() const;

        std::string rectangle_file_name() const;

        void add_child_nodes() const;

        bool is_leaf_node() const;

        uint32_t index() const;
        uint32_t level() const;

    private:
        friend class quadtree;
        node(quadtree& tree, uint32_t index, uint32_t level);

        quadtree& tree_;
        uint32_t index_;
        uint32_t level_;
    };

public:
    enum {
        ROOT_NODE_INDEX = 0
        , QUADRANT_COUNT = 4
    };

    quadtree(quadtree&& other);
    ~quadtree();

    static quadtree create(std::string const& dir_name
        , rectangle const& bounds);
    static quadtree load(std::string const& dir_name);

    void save();

    node get_node(uint32_t i = ROOT_NODE_INDEX);
    uint32_t node_count() const;

private:
    quadtree(std::string const& dir_name);
    std::string index_file_name() const;

    uint32_t add_node(rectangle const& bounds);
    void add_child_nodes(uint32_t n);

private:
    struct node_info
    {
        node_info();
        explicit node_info(rectangle const& bounds);

        rectangle bounds;
        uint32_t child[4];
    };
    typedef std::vector<node_info> node_list;

    std::string dir_name_;
    node_list nodes_;
};

inline quadtree::node::node(quadtree& tree, uint32_t index, uint32_t level)
    : tree_(tree)
    , index_(index)
    , level_(level)
{
}

inline rectangle& quadtree::node::bounds()
{
    return tree_.nodes_[index_].bounds;
}

inline rectangle const& quadtree::node::bounds() const
{
    return tree_.nodes_[index_].bounds;
}

inline quadtree::node quadtree::node::child(uint32_t i) const
{
    return node(tree_, tree_.nodes_[index_].child[i], level_ + 1);
}

inline bool quadtree::node::is_leaf_node() const
{
    return !((tree_.nodes_[index_].child[0])
        || (tree_.nodes_[index_].child[1])
        || (tree_.nodes_[index_].child[2])
        || (tree_.nodes_[index_].child[3]));
}

inline uint32_t quadtree::node::index() const
{
    return index_;
}

inline uint32_t quadtree::node::level() const
{
    return level_;
}
Run Code Online (Sandbox Code Playgroud)

Quadtree Serialization

The layout on disk again corresponds to how the objects are represented in memory.

节点布局

The node file is a simple binary file containing single sequence of objects:

节点文件布局


For illustration, this is how a 3 level quadtree (root + 2 levels of children) with 1000 rectangles appears on disk:

Quadtree with 3 levels and 1000 rectangles


Configuration

config.hpp

#pragma once

#define BLOCK_SIZE (1024 * 8)

#define QUADTREE_LOGGING 0
#define WORKQUEUE_LOGGING 0
#define PROGRESS_LOGGING 1
Run Code Online (Sandbox Code Playgroud)

Processing

We base our processing on the following observations:

  • A tree with only the root node is still a valid tree.
  • We can subdivide any valid tree to get a deeper tree with more detailed categorization.

Quadtree Initialization

The initial step is to create the simplest valid quadtree possible: one with only the root node, containing all the bounding boxes.

Since I didn't have any data I could use, I wrote a simple generator that creates a configurable number of random rectangles and uses them to initialize the tree.

gen_rnd_tree.cpp

#include "rectangle.hpp"
#include "rectangle_file.hpp"
#include "quadtree.hpp"
#include "config.hpp"

#include <algorithm>
#include <cstdint>
#include <chrono>
#include <iostream>
#include <random>
#include <vector>

#if defined(_WIN32)
#include <direct.h>
#endif

void generate_random(uint32_t n, quadtree& qt)
{
    rectangle_writer writer(qt.get_node().writer());

    std::vector<rectangle> buffer;
    buffer.reserve(BLOCK_SIZE);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<rectangle::value_type> dis_x(-179.5f, 179.5f);
    std::uniform_real_distribution<rectangle::value_type> dis_y(-89.5f, 89.5f);
    std::exponential_distribution<rectangle::value_type> dis_wh(1.0f);

    for (uint32_t i(0); i < n; ++i) {
        rectangle::value_type x(dis_x(gen));
        rectangle::value_type y(dis_y(gen));
        rectangle::value_type half_w(std::min(dis_wh(gen), 10.0f) * 0.05f);
        rectangle::value_type half_h(std::min(dis_wh(gen), 10.0f) * 0.05f);
        buffer.emplace_back(x - half_w, y - half_h, x + half_w, y + half_h);

        if (buffer.size() >= BLOCK_SIZE) {
            writer.write_block(buffer);
            buffer.clear();
#if(PROGRESS_LOGGING)
            std::cout << ".";
#endif
        }
    }

    if (!buffer.empty()) {
        writer.write_block(buffer);
    }
#if(PROGRESS_LOGGING)
    std::cout << ".\n";
#endif
}

int main(int argc, char* argv[])
{
    using std::chrono::high_resolution_clock;
    using std::chrono::duration_cast;
    using std::chrono::microseconds;

    if (argc != 2) return -1;

    std::string const TREE_DIR("tree");

#if defined(_WIN32)
    _mkdir(TREE_DIR.c_str());
#else 
    mkdir(TREE_DIR.c_str(), 0777);
#endif

    uint32_t const N_POLYGONS(atoi(argv[1]));

    std::cout << "Polygon count = " << N_POLYGONS << "\n";
    std::cout << "Polygon size = " << sizeof(rectangle) << " B\n";
    std::cout << "Total size = " << N_POLYGONS * sizeof(rectangle) << " B\n";

    std::cout << "\nGenerating...\n";

    high_resolution_clock::time_point t1 = high_resolution_clock::now();

    quadtree qt(quadtree::create("tree", rectangle(-180,-90,180,90)));

    generate_random(N_POLYGONS, qt);

    high_resolution_clock::time_point t2 = high_resolution_clock::now();

    std::cout << "\n";

    double dt1_us(static_cast<double>(duration_cast<microseconds>(t2 - t1).count()));
    std::cout << "Generate: " << (dt1_us / 1000.0) << " ms\n";
    std::cout << "\nDone.\n\n";
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

Quadtree Subdivision

Function subdivide(quadtree& qt,...) performs a depth-first traversal of the tree using a stack. We could do a breadth-first traversal with a queue, but the queue would grow very large once we get deep enough in the tree.

We repeatedly consume nodes from the top of the stack, attempt to subdivide them, and push their child nodes on the stack.

Function subdivide(quadtree::node const& qtn, ...) makes the decisions whether to subdivide given node, and prepares the parameters for copy_elements(...). Once the node is subdivided, and all the matching rectangles have been moved to child nodes, we clear the rectangle file for the given node.

Function copy_elements(...) reads the list of matching rectangles from the parent node, and copies them to all the child nodes whose bounding boxes they intersect.

#include "rectangle.hpp"
#include "rectangle_file.hpp"
#include "quadtree.hpp"

#include <algorithm>
#include <cstdint>
#include <chrono>
#include <iostream>
#include <stack>

using std::chrono::high_resolution_clock;
using std::chrono::duration_cast;
using std::chrono::microseconds;

void copy_elements(rectangle_reader& reader, quadtree::node child_node[4])
{
    rectangle target[quadtree::QUADRANT_COUNT] {
         child_node[0].bounds()
        , child_node[1].bounds()
        , child_node[2].bounds()
        , child_node[3].bounds()
    };

    rectangle_writer writer[quadtree::QUADRANT_COUNT] {
        child_node[0].writer()
        , child_node[1].writer()
        , child_node[2].writer()
        , child_node[3].writer()
    };

    rectangle_vec buffer_in;
    buffer_in.reserve(BLOCK_SIZE);
    rectangle_vec buffer_out[quadtree::QUADRANT_COUNT];
    for (auto& buffer : buffer_out) {
        buffer.reserve(BLOCK_SIZE);
    }

#if(PROGRESS_LOGGING)
    for (; reader.remaining_count(); std::cout << ".") {
#else
    for (; reader.remaining_count();) {
#endif
        reader.read_block(buffer_in, BLOCK_SIZE);

        for (auto const& rect : buffer_in) {
            for (uint32_t i(0); i < quadtree::QUADRANT_COUNT; ++i) {
                if (target[i].intersects(rect)) {
                    buffer_out[i].push_back(rect);
                }
            }
        }

        for (uint32_t i(0); i < quadtree::QUADRANT_COUNT; ++i) {
            if (!buffer_out[i].empty()) {
                writer[i].write_block(buffer_out[i]);
                buffer_out[i].clear();
            }
        }
    }
}

// Return true if we should subdivide further, false otherwise
bool subdivide(quadtree::node const& qtn, uint32_t max_depth, uint32_t max_leaf_size)
{
    bool at_max_depth(qtn.level() >= max_depth);
    std::cout << (at_max_depth ? "Skipping" : "Subdividing")
        << " node #" << qtn.index() << " @ level " << qtn.level() << ".\n";
    if (at_max_depth) {
        return false;
    } else {
        // Make sure reader goes out of scope before writer is created.
        rectangle_reader reader(qtn.reader());
        if (qtn.is_leaf_node()) {
            if (reader.total_count() <= max_leaf_size) {
                std::cout << "Leaf node (" << reader.total_count() << ").\n";
                return false;
            }
            qtn.add_child_nodes();
        } else if (!reader.total_count()) {
            std::cout << "Branch node.\n";
            return (qtn.level() < max_depth);
        }

        quadtree::node child_nodes[4] {
            qtn.child(0)
            , qtn.child(1)
            , qtn.child(2)
            , qtn.child(3)
        };

        high_resolution_clock::time_point t1(high_resolution_clock::now());

        copy_elements(reader, child_nodes);

        high_resolution_clock::time_point t2(high_resolution_clock::now());
        double dt_us(static_cast<double>(duration_cast<microseconds>(t2 - t1).count()));

        std::cout << ". (" << reader.count() << ") " << (dt_us / 1000.0) << " ms\n";
    }

    rectangle_writer writer(qtn.writer()); // Clean the file

    std::cout << "Branch node.\n";
    return (qtn.level() < max_depth);
}

void subdivide(quadtree& qt, uint32_t max_depth, uint32_t max_leaf_size)
{
    std::stack<quadtree::node> work;
    work.emplace(qt.get_node());
    std::size_t max_queue_size(work.size());

    for (; !work.empty();) {
        quadtree::node node(work.top());
        work.pop();

        if (subdivide(node, max_depth, max_leaf_size)) {
            for (uint32_t i(0); i < quadtree::QUADRANT_COUNT; ++i) {
                work.emplace(node.child(i));
            }
        }
        max_queue_size = std::max(max_queue_size, work.size());
#if(WORKQUEUE_LOGGING)
        std::cout << "* Work queue " << work.size() << " (" << max_queue_size << ").\n";
#endif
    }
}

int main(int argc, char* argv[])
{
    uint32_t const MAX_DEPTH(atoi((argc >= 2) ? argv[1] : "2"));
    uint32_t const MAX_LEAF_SIZE(atoi((argc >= 3) ? argv[2] : "100"));

    std::cout << "Loading...\n";

    high_resolution_clock::time_point t1(high_resolution_clock::now());

    quadtree qt(quadtree::load("tree"));

    if (qt.node_count() < 1) {
        std::cerr << "Invalid tree.\n";
        return -1;
    }

    std::cout << "Building tree (max_depth=" << MAX_DEPTH
        << ", max_leaf_size=" << MAX_LEAF_SIZE << ")...\n";

    high_resolution_clock::time_point t2(high_resolution_clock::now());

    subdivide(qt, MAX_DEPTH, MAX_LEAF_SIZE);

    high_resolution_clock::time_point t3(high_resolution_clock::now());

    std::cout << "\n";

    std::cout << "Tree completed (" << qt.node_count() << " nodes).\n\n";

    double dt1_us(static_cast<double>(duration_cast<microseconds>(t2 - t1).count()));
    double dt2_us(static_cast<double>(duration_cast<microseconds>(t3 - t2).count()));
    std::cout << "Load: " << (dt1_us / 1000.0) << " ms\n";
    std::cout << "Process: " << (dt2_us / 1000.0) << " ms\n";
    std::cout << "\nDone.\n\n";
    return 0;
}
Run Code Online (Sandbox Code Playgroud)

Usage

  • gen_rnd_tree <count> -- Generate tree with count rectangles in root node.
  • build_tree <max_depth> <max_node_size> -- Subdivide nodes with more than max_node_size rectangles, up to depth max_depth (root is at depth 0).

Sample Output

Polygon count = 1000
Polygon size = 16 B
Total size = 16000 B

Generating...
.

Generate: 0 ms

Done.

Loading...
Building tree (max_depth=5, max_leaf_size=100)...
Subdividing node #0 @ level 0.
.. (1000) 15.624 ms
Branch node.
Subdividing node #4 @ level 1.
.. (288) 15.625 ms
Branch node.
Subdividing node #8 @ level 2.
Leaf node (71).
Subdividing node #7 @ level 2.
Leaf node (59).
Subdividing node #6 @ level 2.
Leaf node (81).
Subdividing node #5 @ level 2.
Leaf node (77).
Subdividing node #3 @ level 1.
.. (212) 0 ms
Branch node.
Subdividing node #12 @ level 2.
Leaf node (46).
Subdividing node #11 @ level 2.
Leaf node (55).
Subdividing node #10 @ level 2.
Leaf node (55).
Subdividing node #9 @ level 2.
Leaf node (58).
Subdividing node #2 @ level 1.
.. (260) 15.626 ms
Branch node.
Subdividing node #16 @ level 2.
Leaf node (68).
Subdividing node #15 @ level 2.
Leaf node (69).
Subdividing node #14 @ level 2.
Leaf node (71).
Subdividing node #13 @ level 2.
Leaf node (53).
Subdividing node #1 @ level 1.
.. (240) 0 ms
Branch node.
Subdividing node #20 @ level 2.
Leaf node (68).
Subdividing node #19 @ level 2.
Leaf node (60).
Subdividing node #18 @ level 2.
Leaf node (62).
Subdividing node #17 @ level 2.
Leaf node (50).

Tree completed (21 nodes).

Load: 0 ms
Process: 109.377 ms

Done.
Run Code Online (Sandbox Code Playgroud)

When tried with 108 rectangles, BLOCK_SIZE of 64k records, it took about 45 seconds to build a tree with max_depth of 6. That already gives nodes with approx. 105 items that could be easily handled with in-memory algorithm.

The program only used few MiB of RAM for the whole process.


Further Improvements

  • Error checking, unit tests (I wouldn't be able to fit the code here).
  • Add metadata to the rectangle file, so you can find what polygons they match. I would keep the size of the record a multiple of 16 Bytes (alignment).
  • 并行
    • Mutex同步quadtree,mutex同步stack,线程池运行for (; !work.empty();) { ...循环.
    • 由于我们仅在子节点之后推送子节点,因此每个线程将始终在树的孤立部分上工作.
  • 混合方法
    • 磁盘I/O成为这种处理的瓶颈.一旦我们得到足够小的节点(比如10 5 - 10 6个条目),我们就可以在内存中方便地处理数据.
    • 使用此算法中的节点列表作为查找来加载"小"内存中四叉树.实现缓存机制,以保持最近查找的"小"四叉树可用.
  • 矢量化(可能会加速匹配大量的矩形).

  • 甚至没有一个回复。。。老兄,写下您所有的东西真是太好了! (2认同)
  • 这是一个非常丰富的答案。已开始悬赏,明天将奖励给您。 (2认同)