Gio*_*San 2 c++ performance multithreading
简要回顾:我有一个 C++ 多线程数独求解器,我想提高效率,我需要你的帮助。
我正在 C++ 中的多线程中实现一个蛮力数独求解器。主要结构是一棵树,所有逻辑是这样的:我开始读取输入中的初始矩阵,这将是我的根节点,然后我找到第一个空单元格,我找到所有可能适合该位置的数字,然后对于每种可能性,我都会创建父节点的一个子节点,以便每个节点对于每个可能的数字都有一个子节点。树以这种方式继续增长,直到遇到一个解决方案,即一个节点没有更多的空闲单元(所以它是满的),并且节点网格满足数独的所有规则。
我尝试在多线程中像这样实现这个算法:我按照上面的逻辑顺序执行完全相同的逻辑,但每次都执行一个步骤,存储我遇到的所有孩子直到那一刻(每个孩子都是一条路径,所以是一棵树)在一个向量中。如果孩子少于用户选择的线程,那么我会按顺序解决它们,然后再做一步(孩子会长大)。当我的孩子多于线程时,我会为每个线程拆分孩子,然后用他的部分(即一棵树)开始每个线程。
现在,考虑到“蛮力”方法和“仅标准库”要求是强制性的,所以我不能以不同的方式做,但我当然可以改变如何实现这一点的逻辑。
问题是:我怎样才能提高这个程序的效率?欢迎所有仅使用 std lib 的建议。
#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)
using namespace std;
atomic<bool> solutionFound{false};
//Each node has a sudokuMatrix and some sub-trees
struct Node {
vector<vector<int>> grid;
vector<Node *> child;
};
Node *newNode(const vector<vector<int>> &newGrid) {
Node *temp = new Node;
temp->grid = newGrid;
return temp;
}
//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
const vector<vector<int>> &grid) {
// Check column
for (int row = 0; row < N; row++) {
if (grid[row][col_] == val) return false;
}
// Check row
for (int col = 0; col < N; col++) {
if (grid[row_][col] == val) return false;
}
// check box
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
if (row / 3 == row_ / 3 &&
col / 3 == col_ / 3) { // they are in the same square 3x3
if ((grid[row][col] == val)) return false;
}
}
}
return true;
}
//Check if the sudoku is solved
bool isSafe(const vector<vector<int>> &grid)
{
// Hashmap for row column and boxes
unordered_map<int, int> row_[9], column_[9], box[3][3];
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
// mark the element in row column and box
row_[row][grid[row][col]] += 1;
column_[col][grid[row][col]] += 1;
box[row / 3][col / 3][grid[row][col]] += 1;
// if an element is already
// present in the hashmap
if (box[row / 3][col / 3][grid[row][col]] > 1 ||
column_[col][grid[row][col]] > 1 ||
row_[row][grid[row][col]] > 1)
return false;
}
}
return true;
}
//Find the first empty cell
pair<int, int> findCell(const vector<vector<int>> &grid) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (grid[i][j] == UNASSIGNED) {
return make_pair(i, j);
}
}
}
return ERROR_PAIR;
}
//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<vector<vector<int>>> getChoices(const int &row, const int &col,
const vector<vector<int>> &grid) {
list<vector<vector<int>>> choices;
for (int i = 1; i < 10; i++) {
if (canInsert(i, row, col, grid)) {
// cout << "posso inserire =" << i << endl;
vector<vector<int>> tmpGrid = grid;
tmpGrid[row][col] = i;
choices.push_back(tmpGrid);
}
}
return choices;
}
//Update the childreen of a node.
void addChoices(list<vector<vector<int>>> &choices, Node &node) {
while (!choices.empty()) {
node.child.push_back(newNode(choices.front()));
choices.pop_front();
}
return;
}
//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<vector<vector<int>>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
for (Node *&n : n->child) {
tasks.push_back(n);
}
continue;
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
cout << "That's the first solution found !" << endl;
}
return;
}
}
}
//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user.
vector<Node *> splitChunks(Node *root, const int &nw) {
vector<Node *> tasks;
vector<Node *> nodes;
nodes.push_back(root);
while ((int)tasks.size() < nw && !solutionFound) {
tasks.clear();
solveOneStep(nodes, nw, tasks);
nodes = tasks;
}
return tasks;
}
//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw,) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<vector<vector<int>>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
solveSubTree(n->child, nw);
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
std::cout << "That's the first solution found !" << endl;
}
return;
}
}
}
int main(int argc, char *argv[]) {
if (argc != 2) {
cout << "Usage is: nw " << endl;
return (-1);
}
//A test matrix.
vector<vector<int>> grid =
{ { 0, 1, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 8, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 } };
Node *root = newNode(grid);
vector<thread> tids;
const int nw = atoi(argv[1]); //Number of worker
vector<vector<Node *>> works(nw, vector<Node *>());
vector<Node *> tasks = splitChunks(root, nw);
//Split the tasks for each thread, the main thread takes the last part of the work.
for (int i = 0; i < nw; i++) {
int limit = 0;
i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
for (int j = 0; j < limit; j++) {
works[i].push_back(tasks.back());
tasks.pop_back();
}
}
//Start each thread, and then the main thread start his computation.
for (int i = 0; i < nw - 1; i++) {
tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
}
solveSubTree(works[nw - 1], nw, t1); // Main thread do last part of the work
for (thread &t : tids) {
t.join();
}
std::cout << "end" << endl;
return (0);
}
Run Code Online (Sandbox Code Playgroud)
这里有几点可以提高参考实现的效率:
vector<vector<int>>用于2D阵列远未高效:它不是在存储器中连续而引起许多慢分配。应该首选大的展平阵列。unordered_map<int, int>因为集合中的整数在一个小的(连续的)范围内,所以不需要类似集合的操作。使用简单的数组要快得多。std::move。char可以重复使用int(以减少内存占用,将数据保存在 CPU 缓存中并可能使分配更快)。new但没有delete...这是应用第一个注释的代码,使我的机器上的执行速度提高了 5 倍(请注意,网格已在 中修改main):
#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)
using namespace std;
void printGrid(const array<char, N*N>& grid)
{
for (int row = 0; row < N; row++)
{
for (int col = 0; col < N; col++)
{
cout << (int)grid[row*N+col] << " ";
}
cout << endl;
}
}
atomic<bool> solutionFound{false};
//Each node has a sudokuMatrix and some sub-trees
struct Node {
array<char, N*N> grid;
vector<Node *> child;
};
Node *newNode(const array<char, N*N> &newGrid) {
Node *temp = new Node;
temp->grid = newGrid;
return temp;
}
//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
const array<char, N*N> &grid) {
// Check column
for (int row = 0; row < N; row++) {
if (grid[row*N+col_] == val) return false;
}
// Check row
for (int col = 0; col < N; col++) {
if (grid[row_*N+col] == val) return false;
}
// check box
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
if (row / 3 == row_ / 3 &&
col / 3 == col_ / 3) { // they are in the same square 3x3
if ((grid[row*N+col] == val)) return false;
}
}
}
return true;
}
//Check if the sudoku is solved
bool isSafe(const array<char, N*N> &grid)
{
// No need for a hashmap for row column and boxes,
// just an array since associated values are small integer
char row_[9][N+1] = {0};
char column_[9][N+1] = {0};
char box[3][3][N+1] = {0};
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
// mark the element in row column and box
row_[row][grid[row*N+col]] += 1;
column_[col][grid[row*N+col]] += 1;
box[row / 3][col / 3][grid[row*N+col]] += 1;
// if an element is already
// present in the hashmap
if (box[row / 3][col / 3][grid[row*N+col]] > 1 ||
column_[col][grid[row*N+col]] > 1 ||
row_[row][grid[row*N+col]] > 1)
return false;
}
}
return true;
}
//Find the first empty cell
pair<int, int> findCell(const array<char, N*N> &grid) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (grid[i*N+j] == UNASSIGNED) {
return make_pair(i, j);
}
}
}
return ERROR_PAIR;
}
//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<array<char, N*N>> getChoices(const int &row, const int &col,
const array<char, N*N> &grid) {
list<array<char, N*N>> choices;
for (int i = 1; i < 10; i++) {
if (canInsert(i, row, col, grid)) {
// cout << "posso inserire =" << i << endl;
array<char, N*N> tmpGrid = grid;
tmpGrid[row*N+col] = i;
choices.push_back(std::move(tmpGrid));
}
}
return choices;
}
//Update the childreen of a node.
void addChoices(list<array<char, N*N>> &choices, Node &node) {
while (!choices.empty()) {
node.child.push_back(newNode(choices.front()));
choices.pop_front();
}
return;
}
//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<array<char, N*N>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
for (Node *&n : n->child) {
tasks.push_back(n);
}
continue;
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
cout << "That's the first solution found !" << endl;
}
return;
}
}
}
//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user.
vector<Node *> splitChunks(Node *root, const int &nw) {
vector<Node *> tasks;
vector<Node *> nodes;
nodes.push_back(root);
while ((int)tasks.size() < nw && !solutionFound) {
tasks.clear();
solveOneStep(nodes, nw, tasks);
nodes = tasks;
}
return tasks;
}
//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<array<char, N*N>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
solveSubTree(n->child, nw);
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
std::cout << "That's the first solution found !" << endl;
}
return;
}
}
}
int main(int argc, char *argv[]) {
if (argc != 2) {
cout << "Usage is: nw " << endl;
return (-1);
}
//A test matrix.
array<char, N*N> grid =
{ 0, 0, 0, 0, 0, 0, 2, 0, 0,
0, 8, 0, 0, 0, 7, 0, 9, 0,
6, 0, 2, 0, 0, 0, 5, 0, 0,
0, 7, 0, 0, 6, 0, 0, 0, 0,
0, 0, 0, 9, 0, 1, 0, 0, 0,
0, 0, 0, 0, 2, 0, 0, 4, 0,
0, 0, 5, 0, 0, 0, 6, 0, 3,
0, 9, 0, 4, 0, 0, 0, 7, 0,
0, 0, 6, 0, 0, 0, 0, 0, 0 };
Node *root = newNode(grid);
vector<thread> tids;
const int nw = atoi(argv[1]); //Number of worker
vector<vector<Node *>> works(nw, vector<Node *>());
vector<Node *> tasks = splitChunks(root, nw);
//Split the tasks for each thread, the main thread takes the last part of the work.
for (int i = 0; i < nw; i++) {
int limit = 0;
i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
for (int j = 0; j < limit; j++) {
works[i].push_back(tasks.back());
tasks.pop_back();
}
}
//Start each thread, and then the main thread start his computation.
for (int i = 0; i < nw - 1; i++) {
tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
}
solveSubTree(works[nw - 1], nw); // Main thread do last part of the work
for (thread &t : tids) {
t.join();
}
std::cout << "end" << endl;
return (0);
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
106 次 |
| 最近记录: |