如何提高多线程效率?

Gio*_*San 2 c++ performance multithreading

简要回顾:我有一个 C++ 多线程数独求解器,我想提高效率,我需要你的帮助。

我正在 C++ 中的多线程中实现一个蛮力数独求解器。主要结构是一棵树,所有逻辑是这样的:我开始读取输入中的初始矩阵,这将是我的根节点,然后我找到第一个空单元格,我找到所有可能适合该位置的数字,然后对于每种可能性,我都会创建父节点的一个子节点,以便每个节点对于每个可能的数字都有一个子节点。树以这种方式继续增长,直到遇到一个解决方案,即一个节点没有更多的空闲单元(所以它是满的),并且节点网格满足数独的所有规则。

我尝试在多线程中像这样实现这个算法:我按照上面的逻辑顺序执行完全相同的逻辑,但每次都执行一个步骤,存储我遇到的所有孩子直到那一刻(每个孩子都是一条路径,所以是一棵树)在一个向量中。如果孩子少于用户选择的线程,那么我会按顺序解决它们,然后再做一步(孩子会长大)。当我的孩子多于线程时,我会为每个线程拆分孩子,然后用他的部分(即一棵树)开始每个线程。

现在,考虑到“蛮力”方法和“仅标准库”要求是强制性的,所以我不能以不同的方式做,但我当然可以改变如何实现这一点的逻辑。

问题是:我怎样才能提高这个程序的效率?欢迎所有仅使用 std lib 的建议。

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)

using namespace std;

atomic<bool> solutionFound{false};

//Each node has a sudokuMatrix and some sub-trees
struct Node {
    vector<vector<int>> grid;
    vector<Node *> child;
};


Node *newNode(const vector<vector<int>> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
               const vector<vector<int>> &grid) {
    // Check column
    for (int row = 0; row < N; row++) {
        if (grid[row][col_] == val) return false;
    }
    // Check row
    for (int col = 0; col < N; col++) {
        if (grid[row_][col] == val) return false;
    }
    // check box
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) {  // they are in the same square 3x3
                if ((grid[row][col] == val)) return false;
            }
        }
    }
    return true;
}

//Check if the sudoku is solved
bool isSafe(const vector<vector<int>> &grid) 
{
    // Hashmap for row column and boxes
    unordered_map<int, int> row_[9], column_[9], box[3][3];
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            // mark the element in row column and box
            row_[row][grid[row][col]] += 1;
            column_[col][grid[row][col]] += 1;
            box[row / 3][col / 3][grid[row][col]] += 1;

            // if an element is already
            // present in the hashmap
            if (box[row / 3][col / 3][grid[row][col]] > 1 ||
                column_[col][grid[row][col]] > 1 ||
                row_[row][grid[row][col]] > 1)
                return false;
        }
    }
    return true;
}
//Find the first empty cell
pair<int, int> findCell(const vector<vector<int>> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i][j] == UNASSIGNED) {
                return make_pair(i, j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<vector<vector<int>>> getChoices(const int &row, const int &col,
                                     const vector<vector<int>> &grid) {
    list<vector<vector<int>>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i, row, col, grid)) {
            // cout << "posso inserire =" << i << endl;
            vector<vector<int>> tmpGrid = grid;
            tmpGrid[row][col] = i;
            choices.push_back(tmpGrid);
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(list<vector<vector<int>>> &choices, Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.front()));
        choices.pop_front();
    }
    return;
}

//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            for (Node *&n : n->child) {
                tasks.push_back(n);
            }
            continue;
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}

//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user. 
vector<Node *> splitChunks(Node *root, const int &nw) {
    vector<Node *> tasks;
    vector<Node *> nodes;
    nodes.push_back(root);

    while ((int)tasks.size() < nw && !solutionFound) {
        tasks.clear();
        solveOneStep(nodes, nw, tasks);
        nodes = tasks;
    }
    return tasks;
}

//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw,) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            solveSubTree(n->child, nw);
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                std::cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}


int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage is: nw " << endl;
        return (-1);
    }
//A test matrix.
    vector<vector<int>> grid = 
                            { { 0, 1, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 8, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 } };
    
    Node *root = newNode(grid);
    vector<thread> tids;
    const int nw = atoi(argv[1]); //Number of worker
    vector<vector<Node *>> works(nw, vector<Node *>()); 
    vector<Node *> tasks = splitChunks(root, nw);

//Split the tasks for each thread, the main thread takes the last part of the work.
    for (int i = 0; i < nw; i++) {
        int limit = 0;
        i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
        for (int j = 0; j < limit; j++) {
            works[i].push_back(tasks.back());
            tasks.pop_back();
        }
    }

//Start each thread, and then the main thread start his computation.
    for (int i = 0; i < nw - 1; i++) {
        tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
    }
    solveSubTree(works[nw - 1], nw, t1);  // Main thread do last part of the work

    for (thread &t : tids) {
        t.join();
    }

    std::cout << "end" << endl;
    return (0);
}

Run Code Online (Sandbox Code Playgroud)

Jér*_*ard 6

这里有几点可以提高参考实现的效率:

  • 使用vector<vector<int>>用于2D阵列远未高效:它不是在存储器中连续而引起许多慢分配。应该首选大的展平阵列。
  • unordered_map<int, int>因为集合中的整数在一个小的(连续的)范围内,所以不需要类似集合的操作。使用简单的数组要快得多。
  • 有些副本是不需要的,可以删除std::move
  • 由于网格中的整数很小,因此char可以重复使用int(以减少内存占用,将数据保存在 CPU 缓存中并可能使分配更快)。
  • 我在代码中看到一个new但没有delete...
  • 在许多情况下,线程之间的工作似乎明显不平衡,导致并行执行速度变慢,应该执行负载平衡以更好地扩展。一种方法是使用任务调度
  • 可以使用启发式方法来大大加快探索速度。首先,我建议您查看约束满足问题(CSP),因为众所周知的CSP 求解器非常擅长解决它。更多的一般性和理论性信息可以在《人工智能:现代方法》一书中找到。

这是应用第一个注释的代码,使我的机器上的执行速度提高了 5 倍(请注意,网格已在 中修改main):

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)

using namespace std;

void printGrid(const array<char, N*N>& grid)
{
    for (int row = 0; row < N; row++)
    {
        for (int col = 0; col < N; col++)
        {
            cout << (int)grid[row*N+col] << " ";
        }
        cout << endl;
    }
}

atomic<bool> solutionFound{false};

//Each node has a sudokuMatrix and some sub-trees
struct Node {
    array<char, N*N> grid;
    vector<Node *> child;
};


Node *newNode(const array<char, N*N> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
               const array<char, N*N> &grid) {
    // Check column
    for (int row = 0; row < N; row++) {
        if (grid[row*N+col_] == val) return false;
    }
    // Check row
    for (int col = 0; col < N; col++) {
        if (grid[row_*N+col] == val) return false;
    }
    // check box
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) {  // they are in the same square 3x3
                if ((grid[row*N+col] == val)) return false;
            }
        }
    }
    return true;
}

//Check if the sudoku is solved
bool isSafe(const array<char, N*N> &grid) 
{
    // No need for a hashmap for row column and boxes, 
    // just an array since associated values are small integer
    char row_[9][N+1] = {0};
    char column_[9][N+1] = {0};
    char box[3][3][N+1] = {0};

    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            // mark the element in row column and box
            row_[row][grid[row*N+col]] += 1;
            column_[col][grid[row*N+col]] += 1;
            box[row / 3][col / 3][grid[row*N+col]] += 1;

            // if an element is already
            // present in the hashmap
            if (box[row / 3][col / 3][grid[row*N+col]] > 1 ||
                column_[col][grid[row*N+col]] > 1 ||
                row_[row][grid[row*N+col]] > 1)
                return false;
        }
    }
    return true;
}
//Find the first empty cell
pair<int, int> findCell(const array<char, N*N> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i*N+j] == UNASSIGNED) {
                return make_pair(i, j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<array<char, N*N>> getChoices(const int &row, const int &col,
                                     const array<char, N*N> &grid) {
    list<array<char, N*N>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i, row, col, grid)) {
            // cout << "posso inserire =" << i << endl;
            array<char, N*N> tmpGrid = grid;
            tmpGrid[row*N+col] = i;
            choices.push_back(std::move(tmpGrid));
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(list<array<char, N*N>> &choices, Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.front()));
        choices.pop_front();
    }
    return;
}

//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<array<char, N*N>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            for (Node *&n : n->child) {
                tasks.push_back(n);
            }
            continue;
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}

//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user. 
vector<Node *> splitChunks(Node *root, const int &nw) {
    vector<Node *> tasks;
    vector<Node *> nodes;
    nodes.push_back(root);

    while ((int)tasks.size() < nw && !solutionFound) {
        tasks.clear();
        solveOneStep(nodes, nw, tasks);
        nodes = tasks;
    }
    return tasks;
}

//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<array<char, N*N>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            solveSubTree(n->child, nw);
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                std::cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}


int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage is: nw " << endl;
        return (-1);
    }
//A test matrix.
    array<char, N*N> grid = 
                            { 0, 0, 0, 0, 0, 0, 2, 0, 0,
                              0, 8, 0, 0, 0, 7, 0, 9, 0,
                              6, 0, 2, 0, 0, 0, 5, 0, 0,
                              0, 7, 0, 0, 6, 0, 0, 0, 0,
                              0, 0, 0, 9, 0, 1, 0, 0, 0,
                              0, 0, 0, 0, 2, 0, 0, 4, 0,
                              0, 0, 5, 0, 0, 0, 6, 0, 3,
                              0, 9, 0, 4, 0, 0, 0, 7, 0,
                              0, 0, 6, 0, 0, 0, 0, 0, 0 };
    
    Node *root = newNode(grid);
    vector<thread> tids;
    const int nw = atoi(argv[1]); //Number of worker
    vector<vector<Node *>> works(nw, vector<Node *>()); 
    vector<Node *> tasks = splitChunks(root, nw);

//Split the tasks for each thread, the main thread takes the last part of the work.
    for (int i = 0; i < nw; i++) {
        int limit = 0;
        i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
        for (int j = 0; j < limit; j++) {
            works[i].push_back(tasks.back());
            tasks.pop_back();
        }
    }

//Start each thread, and then the main thread start his computation.
    for (int i = 0; i < nw - 1; i++) {
        tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
    }
    solveSubTree(works[nw - 1], nw);  // Main thread do last part of the work

    for (thread &t : tids) {
        t.join();
    }

    std::cout << "end" << endl;
    return (0);
}
Run Code Online (Sandbox Code Playgroud)