pyC*_*hon 15 c++ fortran blas scalapack
我收到以下错误,我不知道为什么.
{ 1, 1}: On entry to PDPOTRF parameter number 2 had an illegal value
{ 1, 0}: On entry to PDPOTRF parameter number 2 had an illegal value
{ 0, 1}: On entry to PDPOTRF parameter number 2 had an illegal value
{ 0, 0}: On entry to PDPOTRF parameter number 2 had an illegal value
info < 0: If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i.
Run Code Online (Sandbox Code Playgroud)
我知道错误消息的含义,但我尽可能地遵循网上提供的日期文档,并尝试将网络上的工作示例代码中的并行cholesky分解组合在一起.我不确定我哪里出错了.
有人可以解释我在下面的代码中出错了吗?这里是代码所做的概述,我正在使用4个处理器进行测试,并将8x8矩阵划分为2 x 2处理器块网格加载文件中的矩阵,下面是一个示例8 x 8矩阵文件,
182 147 140 125 132 76 126 157
147 213 185 150 209 114 166 188
140 185 232 129 194 142 199 205
125 150 129 143 148 81 104 150
132 209 194 148 214 122 172 189
76 114 142 81 122 102 129 117
126 166 199 104 172 129 187 181
157 188 205 150 189 117 181 259
Run Code Online (Sandbox Code Playgroud)
我按照示例将Matrix分发到4个节点中的每个节点上的4个独立的4x4本地阵列.然后我运行descinit_并调用相关的pdpotrf_例程,产生上述错误.我不知道我哪里出错了,并试图尽可能地遵循文档.fortran中并行cholesky分解的一个工作示例也会有很大帮助
函数调用的引用
运行参数
code name - Meaning = Value
N - Global Rows = 8
M - Global Cols = 8
Nb - Local Block Rows = 2
Mb - Local Block Cols = 2
nrows - Local Rows = 4
ncols Local Cols= 4
lda - Leading dimension of local array = 4 (i've tried 2,4,8)
ord - Order of Matrix = 4 (i've also tried many different things here as well)
Run Code Online (Sandbox Code Playgroud)
我在每个节点上打印上述参数,它们是相同的
#include <mpi.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <sstream>
using namespace std;
/*
To compile:
mpic++ test.cpp -o test -L/home/admin/libs -lscalapack -lrefblas -ltmg -lreflapack -lgfortran -Wall -O2
To run:
mpirun -np 4 ./test matrixfile 8 8 2 2
*/
extern "C" {
/* Cblacs declarations */
void Cblacs_pinfo(int*, int*);
void Cblacs_get(int, int, int*);
void Cblacs_gridinit(int*, const char*, int, int);
void Cblacs_gridinfo(int, int*, int*, int*,int*);
void Cblacs_pcoord(int, int, int*, int*);
void Cblacs_gridexit(int);
void Cblacs_barrier(int, const char*);
void Cdgerv2d(int, int, int, double*, int, int, int);
void Cdgesd2d(int, int, int, double*, int, int, int);
int numroc_(int*, int*, int*, int*, int*);
void pdpotrf_(char*, int*, double*,
int*, int*, int*, int*);
void descinit_( int *, int *, int *, int *, int *, int *, int *,
int *, int *, int *);
}
int main(int argc, char **argv){
/* MPI */
int mpirank,nprocs;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
double MPIelapsed;
double MPIt2;
double MPIt1;
/* Helping vars */
int iZERO = 0;
int verbose = 1;
bool mpiroot = (mpirank == 0);
if (argc < 6) {
if (mpiroot)
cerr << "Usage: matrixTest matrixfile N M Nb Mb"
<< endl
<< " N = Rows , M = Cols , Nb = Row Blocks , Mb = Col Blocks "
<< endl;
MPI_Finalize();
return 1;
}
/* Scalapack / Blacs Vars */
int N, M, Nb, Mb;
int descA[9];
int info = 0;
// int mla = 4;
int ord = 8;
double *A_glob = NULL, *A_glob2 = NULL, *A_loc = NULL;
/* Parse command line arguments */
if (mpiroot) {
/* Read command line arguments */
stringstream stream;
stream << argv[2] << " " << argv[3] << " " << argv[4] << " " << argv[5];
stream >> N >> M >> Nb >> Mb;
/* Reserve space and read matrix (with transposition!) */
A_glob = new double[N*M];
A_glob2 = new double[N*M];
string fname(argv[1]);
ifstream file(fname.c_str());
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
file >> *(A_glob + N*c + r);
}
}
/* Print matrix */
if(verbose == 1) {
cout << "Matrix A:\n";
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
cout << setw(3) << *(A_glob + N*c + r) << " ";
}
cout << "\n";
}
cout << endl;
}
}
/* Begin Cblas context */
int ctxt, myid, myrow, mycol, numproc;
//<TODO> make dynamic
int procrows = 2, proccols = 2;
Cblacs_pinfo(&myid, &numproc);
Cblacs_get(0, 0, &ctxt);
Cblacs_gridinit(&ctxt, "Row-major", procrows, proccols);
Cblacs_gridinfo( ctxt, &procrows, &proccols, &myrow, &mycol );
/* process coordinates for the process grid */
// Cblacs_pcoord(ctxt, myid, &myrow, &mycol);
/* Broadcast of the matrix dimensions */
int dimensions[4];
if (mpiroot) {
dimensions[0] = N;//Global Rows
dimensions[1] = M;//Global Cols
dimensions[2] = Nb;//Local Rows
dimensions[3] = Mb;//Local Cols
}
MPI_Bcast(dimensions, 4, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&ord, 1, MPI_INT, 0, MPI_COMM_WORLD);
N = dimensions[0];
M = dimensions[1];
Nb = dimensions[2];
Mb = dimensions[3];
int nrows = numroc_(&N, &Nb, &myrow, &iZERO, &procrows);
int ncols = numroc_(&M, &Mb, &mycol, &iZERO, &proccols);
int lda = max(1,nrows);
MPI_Bcast(&lda, 1, MPI_INT, 0, MPI_COMM_WORLD);
/* Print grid pattern */
if (myid == 0)
cout << "Processes grid pattern:" << endl;
for (int r = 0; r < procrows; ++r) {
for (int c = 0; c < proccols; ++c) {
Cblacs_barrier(ctxt, "All");
if (myrow == r && mycol == c) {
cout << myid << " " << flush;
}
}
Cblacs_barrier(ctxt, "All");
if (myid == 0)
cout << endl;
}
if(myid == 0){
cout <<"Run Parameters"<<endl;
cout <<"Global Rows = " << M <<endl;
cout <<"Global Cols = " << N <<endl;
cout <<"Local Block Rows = " << Mb <<endl;
cout <<"Local Block Cols = " << Nb <<endl;
cout << "nrows = "<<nrows<<endl;
cout << "ncols = "<<ncols<<endl;
cout << "lda = "<<lda<<endl;
cout <<"Order = "<<ord<<endl;
}
for (int id = 0; id < numproc; ++id) {
Cblacs_barrier(ctxt, "All");
}
A_loc = new double[nrows*ncols];
for (int i = 0; i < nrows*ncols; ++i) *(A_loc+i)=0.;
/* Scatter matrix */
int sendr = 0, sendc = 0, recvr = 0, recvc = 0;
for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
sendc = 0;
int nr = Nb;
if (N-r < Nb)
nr = N-r;
for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
int nc = Mb;
if (M-c < Mb)
nc = M-c;
if (mpiroot) {
Cdgesd2d(ctxt, nr, nc, A_glob+N*c+r, N, sendr, sendc);
}
if (myrow == sendr && mycol == sendc) {
Cdgerv2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
recvc = (recvc+nc)%ncols;
}
}
if (myrow == sendr)
recvr = (recvr+nr)%nrows;
}
/* Print local matrices */
if(verbose == 1) {
for (int id = 0; id < numproc; ++id) {
if (id == myid) {
cout << "A_loc on node " << myid << endl;
for (int r = 0; r < nrows; ++r) {
for (int c = 0; c < ncols; ++c)
cout << setw(3) << *(A_loc+nrows*c+r) << " ";
cout << endl;
}
cout << endl;
}
Cblacs_barrier(ctxt, "All");
}
}
for (int id = 0; id < numproc; ++id) {
Cblacs_barrier(ctxt, "All");
}
/* DescInit */
info=0;
descinit_(descA, &N, &M, &Nb, &Mb,&iZERO,&iZERO,&ctxt, &lda, &info);
if(mpiroot){
if(verbose == 1){
if (info == 0){
cout<<"Description init sucesss!"<<endl;
}
if(info < 0){
cout <<"Error Info < 0: if INFO = -i, the i-th argument had an illegal value"<< endl
<<"Info = " << info<<endl;
}
}
// Cblacs_barrier(ctxt, "All");
}
//psgesv_(n, 1, al, 1,1,idescal, ipiv, b, 1,1,idescb, info) */
// psgesv_(&n, &one, al, &one,&one,idescal, ipiv, b, &one,&one,idescb, &info);
//pXelset http://www.netlib.org/scalapack/tools/pdelset.f
/* CHOLESKY HERE */
info = 0;
MPIt1=MPI_Wtime();
pdpotrf_("L",&ord,A_loc,&Nb,&Mb,descA,&info);
for (int id = 0; id < numproc; ++id) {
Cblacs_barrier(ctxt, "All");
}
MPIt2 = MPI_Wtime();
MPIelapsed=MPIt2-MPIt1;
if(mpiroot){
std::cout<<"Cholesky MPI Run Time" << MPIelapsed<<std::endl;
if(info == 0){
std::cout<<"SUCCESS"<<std::endl;
}
if(info < 0){
cout << "info < 0: If the i-th argument is an array and the j-entry had an illegal value, then INFO = -(i*100+j), if the i-th argument is a scalar and had an illegal value, then INFO = -i. " << endl;
cout<<"info = " << info << endl;
}
if(info > 0){
std::cout<<"matrix is not positve definte"<<std::endl;
}
}
//sanity check set global matrix to zero before it's recieved by nodes
if(mpiroot){
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
A_glob2[c *N + r] = 0;
}
}
}
/* Gather matrix */
sendr = 0;
for (int r = 0; r < N; r += Nb, sendr=(sendr+1)%procrows) {
sendc = 0;
// Number of rows to be sent
// Is this the last row block?
int nr = Nb;
if (N-r < Nb)
nr = N-r;
for (int c = 0; c < M; c += Mb, sendc=(sendc+1)%proccols) {
// Number of cols to be sent
// Is this the last col block?
int nc = Mb;
if (M-c < Mb)
nc = M-c;
if (myrow == sendr && mycol == sendc) {
// Send a nr-by-nc submatrix to process (sendr, sendc)
Cdgesd2d(ctxt, nr, nc, A_loc+nrows*recvc+recvr, nrows, 0, 0);
recvc = (recvc+nc)%ncols;
}
if (mpiroot) {
// Receive the same data
// The leading dimension of the local matrix is nrows!
Cdgerv2d(ctxt, nr, nc, A_glob2+N*c+r, N, sendr, sendc);
}
}
if (myrow == sendr)
recvr = (recvr+nr)%nrows;
}
/* Print test matrix */
if (mpiroot) {
if(verbose == 1){
cout << "Matrix A test:\n";
for (int r = 0; r < N; ++r) {
for (int c = 0; c < M; ++c) {
cout << setw(3) << *(A_glob2+N*c+r) << " ";
}
cout << endl;
}
}
}
/* Release resources */
delete[] A_glob;
delete[] A_glob2;
delete[] A_loc;
Cblacs_gridexit(ctxt);
MPI_Finalize();
}
Run Code Online (Sandbox Code Playgroud)
对 - 我解决了这个问题.这就是你要做的事情(我检查了修改后的MPI程序的结果与Octave中你的矩阵的Cholesky分解 - 它有效.).
我发现IBM的以下LAPACK参考比链接中的更有用:http : //publib.boulder.ibm.com/infocenter/clresctr/vxrx/index.jsp?topic=%2Fcom.ibm.cluster. pessl.v4r2.pssl100.doc%2Fam6gr_lpotrf.htm
PDPOTRF( UPLO, N, A, IA, JA, DESCA, INFO )
你逝去的Mb和Nb为IA和JA.但是,这些参数用于在更大的矩阵内提供全局矩阵的起始行和列.只有当你有一个大矩阵并且只想要一个子矩阵的Cholesky分解时,它们才是相关的.在你的情况,IA并且JA都必须1!
所以你需要做的就是:
int IA = 1;
int JA = 1;
pdpotrf_("L",&ord,A_loc,&IA,&JA,descA,&info);
Run Code Online (Sandbox Code Playgroud)
您可能还想更改硬编码,int ord = 8;以便它取决于从命令行读取的值,否则您将在以后遇到问题.
如上所述修改程序的输出:
Cholesky MPI Run Time0.000659943
SUCCESS
Matrix A test:
13.4907 147 140 125 132 76 126 157
10.8964 9.70923 185 150 209 114 166 188
10.3775 7.4077 8.33269 129 194 142 199 205
9.26562 5.0507 -0.548194 5.59806 148 81 104 150
9.78449 10.5451 1.72175 0.897537 1.81524 122 172 189
5.63349 5.41911 5.20784 0.765767 0.0442447 3.63139 129 117
9.33974 6.61543 6.36911 -2.22569 1.03941 2.48498 1.79738 181
11.6376 6.30249 4.50561 2.28799 -0.627688 -2.17633 7.27182 0.547228
Run Code Online (Sandbox Code Playgroud)
Octave输出用于比较:
octave:1> A=dlmread("matrixfile4")
A =
182 147 140 125 132 76 126 157
147 213 185 150 209 114 166 188
140 185 232 129 194 142 199 205
125 150 129 143 148 81 104 150
132 209 194 148 214 122 172 189
76 114 142 81 122 102 129 117
126 166 199 104 172 129 187 181
157 188 205 150 189 117 181 259
octave:2> C=chol(A)
C =
13.49074 10.89636 10.37749 9.26562 9.78449 5.63349 9.33974 11.63761
0.00000 9.70923 7.40770 5.05070 10.54508 5.41911 6.61543 6.30249
0.00000 0.00000 8.33269 -0.54819 1.72175 5.20784 6.36911 4.50561
0.00000 0.00000 0.00000 5.59806 0.89754 0.76577 -2.22569 2.28799
0.00000 0.00000 0.00000 0.00000 1.81524 0.04424 1.03941 -0.62769
0.00000 0.00000 0.00000 0.00000 0.00000 3.63139 2.48498 -2.17633
0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 1.79738 7.27182
0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0.54723
Run Code Online (Sandbox Code Playgroud)
(我之前的评论 - 现在已删除 - 关于错误地复制到节点的矩阵不适用,你的程序的其余部分对我来说似乎没问题.)
| 归档时间: |
|
| 查看次数: |
1918 次 |
| 最近记录: |