diff --git a/common/csr.h b/common/csr.h index 62086331f2f0dc458f14f87777dea3683978f8a1..6c919f8a7622fff6c19fa74bf93d2938393097b3 100644 --- a/common/csr.h +++ b/common/csr.h @@ -7,14 +7,14 @@ csv zero = 0.0; #define CS_MAX(a,b) (((a) > (b)) ? (a) : (b)) typedef struct csr_t { - csi m; - csi n; - csi nzmax; - csi nr; - csi* r; - csi* p; - csi* j; - csv* x; + csi m; + csi n; + csi nzmax; + csi nr; + csi* r; + csi* p; + csi* j; + csv* x; } csr; /* method declarations {{{*/ csr *csr_spfree(csr *A); @@ -24,14 +24,14 @@ csr *csr_spfree(csr *A); /* free workspace and return a sparse matrix result */ csr *csr_done(csr *C, void *w, void *x, csi ok) { - return(ok ? C : csr_spfree(C)); /* return result if OK, else free it */ + return (ok ? C : csr_spfree(C)); /* return result if OK, else free it */ } /* wrapper for free */ void *cs_free(void *p) { if (p) free(p); /* free p if it is not already NULL */ - return(NULL); /* return NULL to simplify the use of cs_free */ + return (NULL); /* return NULL to simplify the use of cs_free */ } /* wrapper for realloc */ @@ -40,9 +40,9 @@ void *csr_realloc(void *p, csi n, size_t size, csi *ok) { pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ if (pnew == NULL) { - printf("%d:reallocation failed, pnew is NULL\n", __LINE__); + printf("%d:reallocation failed, pnew is NULL\n", __LINE__); } - return((*ok) ? pnew : p); /* return original p if failure */ + return ((*ok) ? pnew : p); /* return original p if failure */ } /* wrapper for realloc */ @@ -51,31 +51,31 @@ void *cs_realloc(void *p, csi n, size_t size, csi *ok) { pnew = realloc(p, CS_MAX(n, 1) * size); /* realloc the block */ *ok = (pnew != NULL); /* realloc fails if pnew is NULL */ if (pnew == NULL) { - printf("reallocation failed\n"); + printf("reallocation failed\n"); } - return((*ok) ? pnew : p); /* return original p if failure */ + return ((*ok) ? pnew : p); /* return original p if failure */ } /* change the max # of entries sparse matrix */ csi csr_sprealloc(csr *A, csi nzmax) { csi ok, oki = 0, okj = 1, okx = 1; if (!A) - return(0); + return (0); if (nzmax <= 0) - nzmax = A->p[A->m]; - A->j = (int*)csr_realloc(A->j, nzmax, sizeof(csi), &oki); + nzmax = A->p[A->m]; + A->j = (int*) csr_realloc(A->j, nzmax, sizeof (csi), &oki); if (A->x) - A->x = (csv*)csr_realloc(A->x, nzmax, sizeof(csv), &okx); + A->x = (csv*) csr_realloc(A->x, nzmax, sizeof (csv), &okx); ok = (oki && okj && okx); if (ok) - A->nzmax = nzmax; - return(ok); + A->nzmax = nzmax; + return (ok); } /* free a sparse matrix */ csr *csr_spfree(csr *A) { if (!A) - return(NULL); /* do nothing if A already NULL */ + return (NULL); /* do nothing if A already NULL */ cs_free(A->p); A->p = NULL; cs_free(A->j); @@ -90,30 +90,30 @@ csr *csr_spfree(csr *A) { /* allocate a sparse matrix (triplet form or compressed-ROW form) */ csr *csr_spalloc(csi m, csi n, csi nzmax, int values, int triplet, csv f) { - csr* A = (csr*)calloc(1, sizeof(csr)); /* allocate the cs struct */ + csr* A = (csr*) calloc(1, sizeof (csr)); /* allocate the cs struct */ if (!A) { - perror("sparse allocation failed"); - return(NULL); /* out of memory */ + perror("sparse allocation failed"); + return (NULL); /* out of memory */ } A->m = m; /* define dimensions and nzmax */ A->n = n; A->nzmax = nzmax = CS_MAX(nzmax, 0); - A->nr = 0; // number of nonzero rows - A->p = (csi*)calloc(m + 2, sizeof(csi)); - A->j = (csi*)calloc(CS_MAX(nzmax,1), sizeof(csi)); - A->x = (csv*)calloc(CS_MAX(nzmax,1), sizeof(csv)); - return((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); + A->nr = 0; // number of nonzero rows + A->p = (csi*) calloc(m + 2, sizeof (csi)); + A->j = (csi*) calloc(CS_MAX(nzmax, 1), sizeof (csi)); + A->x = (csv*) calloc(CS_MAX(nzmax, 1), sizeof (csv)); + return ((!A->p || !A->j || !A->x) ? csr_spfree(A) : A); }/*}}}*/ /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, const csv* Ax, csi Bm, csi Bn, csi Bnzmax, const csi* Bp, const csi* Bj, const csv* Bx, long* nummult, csi* xb, csv* x) { /*{{{*/ csv tf = 0; csi p, jp, j, kp, k, i, nz = 0, anz, *Cp, *Cj, m, n, - bnz, values = 1; + bnz, values = 1; csv *Cx; csr *C; if (An != Bm) - return(NULL); + return (NULL); if (Anzmax == 0 || Bnzmax == 0) { C = csr_spalloc(Am, Bn, 0, values, 0, tf); return C; @@ -122,8 +122,8 @@ csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, cons anz = Ap[Am]; n = Bn; bnz = Bp[Bm]; - for(i = 0; i < n; i++) xb[i] = 0; - for(i = 0; i < n; i++) + for (i = 0; i < n; i++) xb[i] = 0; + for (i = 0; i < n; i++) xb[i] = 0; values = (Ax != NULL) && (Bx != NULL); csi tnz = (anz + bnz) * 2; @@ -132,8 +132,8 @@ csr *csr_multiply(csi Am, csi An, csi Anzmax, const csi* Ap, const csi* Aj, cons return (csr_done(C, xb, x, 0)); Cp = C->p; for (i = 0; i < m; i++) { - if ( ( (nz + n) > C->nzmax ) ) { - if(!csr_sprealloc(C, (2 * (C->nzmax) + n) ) ) { + if (((nz + n) > C->nzmax)) { + if (!csr_sprealloc(C, (2 * (C->nzmax) + n))) { return (csr_done(C, xb, x, 0)); // out of memory } else { } diff --git a/common/mmio.h b/common/mmio.h index d306433fa92f8cce79d55bd625092e4c20b4b11b..196240e2559c8b74c15939ed26366defcb797c83 100644 --- a/common/mmio.h +++ b/common/mmio.h @@ -1,10 +1,10 @@ -/* -* Matrix Market I/O library for ANSI C -* -* See http://math.nist.gov/MatrixMarket for details. -* -* -*/ +/* + * Matrix Market I/O library for ANSI C + * + * See http://math.nist.gov/MatrixMarket for details. + * + * + */ #ifndef MM_IO_H #define MM_IO_H @@ -45,7 +45,7 @@ int mm_write_mtx_array_size(FILE *f, int M, int N); #define mm_is_skew(typecode) ((typecode)[3]=='K') #define mm_is_hermitian(typecode)((typecode)[3]=='H') -int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ +int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ /********************* MM_typecode modify fucntions ***************************/ @@ -89,22 +89,22 @@ int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ MM_matrix_typecode: 4-character sequence - ojbect sparse/ data storage - dense type scheme + ojbect sparse/ data storage + dense type scheme string position: [0] [1] [2] [3] Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) - A(array) C(omplex) H(ermitian) - P(attern) S(ymmetric) - I(nteger) K(kew) + A(array) C(omplex) H(ermitian) + P(attern) S(ymmetric) + I(nteger) K(kew) ***********************************************************************/ #define MM_MTX_STR "matrix" #define MM_ARRAY_STR "array" #define MM_DENSE_STR "array" -#define MM_COORDINATE_STR "coordinate" +#define MM_COORDINATE_STR "coordinate" #define MM_SPARSE_STR "coordinate" #define MM_COMPLEX_STR "complex" #define MM_REAL_STR "real" @@ -119,14 +119,14 @@ int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ /* high level routines */ int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[], - double val[], MM_typecode matcode); + double val[], MM_typecode matcode); int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[], - double val[], MM_typecode matcode); + double val[], MM_typecode matcode); int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img, - MM_typecode matcode); + MM_typecode matcode); int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, - double **val_, int **I_, int **J_); + double **val_, int **I_, int **J_); #include #include @@ -134,108 +134,101 @@ int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, #include int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_, - double **val_, int **I_, int **J_) -{ + double **val_, int **I_, int **J_) { FILE *f; MM_typecode matcode; int M, N, nz; int i; double *val; int *I, *J; - + if ((f = fopen(fname, "r")) == NULL) - return -1; - - - if (mm_read_banner(f, &matcode) != 0) - { + return -1; + + + if (mm_read_banner(f, &matcode) != 0) { printf("mm_read_unsymetric: Could not process Matrix Market banner "); printf(" in file [%s]\n", fname); return -1; } - - - - if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && - mm_is_sparse(matcode))) - { + + + + if (!(mm_is_real(matcode) && mm_is_matrix(matcode) && + mm_is_sparse(matcode))) { fprintf(stderr, "Sorry, this application does not support "); fprintf(stderr, "Market Market type: [%s]\n", mm_typecode_to_str(matcode)); return -1; } - + /* find out size of sparse matrix: M, N, nz .... */ - - if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) - { + + if (mm_read_mtx_crd_size(f, &M, &N, &nz) != 0) { fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); return -1; } - + *M_ = M; *N_ = N; *nz_ = nz; - + /* reseve memory for matrices */ - - I = (int *) malloc(nz * sizeof(int)); - J = (int *) malloc(nz * sizeof(int)); - val = (double *) malloc(nz * sizeof(double)); - + + I = (int *) malloc(nz * sizeof (int)); + J = (int *) malloc(nz * sizeof (int)); + val = (double *) malloc(nz * sizeof (double)); + *val_ = val; *I_ = I; *J_ = J; - + /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ - - for (i=0; i0) {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} - if (i0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);} - if (j -pc_type -ksp_monitor -ksp_rtol - These options will override those specified above as long as - KSPSetFromOptions() is called _after_ any other customization - routines. - */ - ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); - - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve several linear systems of the form A x_i = b_i - I.e., we retain the same matrix (A) for all systems, but - change the right-hand-side vector (b_i) at each step. - - In this case, we simply call KSPSolve() multiple times. The - preconditioner setup operations (e.g., factorization for ILU) - be done during the first call to KSPSolve() only; such operations - will NOT be repeated for successive solves. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ - - ntimes = 2; - ierr = PetscOptionsGetInt(NULL,"-ntimes",&ntimes,NULL);CHKERRQ(ierr); - for (k=1; k 0) { + J = Ii - n; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); + CHKERRQ(ierr); + } + if (i < m - 1) { + J = Ii + n; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); + CHKERRQ(ierr); + } + if (j > 0) { + J = Ii - 1; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); + CHKERRQ(ierr); + } + if (j < n - 1) { + J = Ii + 1; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES); + CHKERRQ(ierr); + } + v = 4.0; + ierr = MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES); + CHKERRQ(ierr); + } + + /* + Assemble matrix, using the 2-step process: + MatAssemblyBegin(), MatAssemblyEnd() + Computations can be done while messages are in transition + by placing code between these two statements. + */ + ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + + /* + Create parallel vectors. + - When using VecCreate(), VecSetSizes() and VecSetFromOptions(), + we specify only the vector's global + dimension; the parallel partitioning is determined at runtime. + - When solving a linear system, the vectors and matrices MUST + be partitioned accordingly. PETSc automatically generates + appropriately partitioned matrices and vectors when MatCreate() + and VecCreate() are used with the same communicator. + - Note: We form 1 vector from scratch and then duplicate as needed. + */ + ierr = VecCreate(PETSC_COMM_WORLD, &u); + CHKERRQ(ierr); + ierr = VecSetSizes(u, PETSC_DECIDE, m * n); + CHKERRQ(ierr); + ierr = VecSetFromOptions(u); + CHKERRQ(ierr); + ierr = VecDuplicate(u, &b); + CHKERRQ(ierr); + ierr = VecDuplicate(b, &x); + CHKERRQ(ierr); + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Create the linear solver and set various options + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + /* + Create linear solver context + */ + ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); + CHKERRQ(ierr); /* - Set exact solution; then compute right-hand-side vector. We use - an exact solution of a vector with all elements equal to 1.0*k. - */ - rhs = one * (PetscReal)k; - ierr = VecSet(u,rhs);CHKERRQ(ierr); - ierr = MatMult(A,u,b);CHKERRQ(ierr); + Set operators. Here the matrix that defines the linear system + also serves as the preconditioning matrix. + */ + ierr = KSPSetOperators(ksp, A, A); + CHKERRQ(ierr); /* - View the exact solution vector if desired - */ - ierr = PetscOptionsGetBool(NULL,"-view_exact_sol",&flg,NULL);CHKERRQ(ierr); - if (flg) {ierr = VecView(u,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} + Set runtime options, e.g., + -ksp_type -pc_type -ksp_monitor -ksp_rtol + These options will override those specified above as long as + KSPSetFromOptions() is called _after_ any other customization + routines. + */ + ierr = KSPSetFromOptions(ksp); + CHKERRQ(ierr); + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Solve several linear systems of the form A x_i = b_i + I.e., we retain the same matrix (A) for all systems, but + change the right-hand-side vector (b_i) at each step. + + In this case, we simply call KSPSolve() multiple times. The + preconditioner setup operations (e.g., factorization for ILU) + be done during the first call to KSPSolve() only; such operations + will NOT be repeated for successive solves. + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ - ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); + ntimes = 2; + ierr = PetscOptionsGetInt(NULL, "-ntimes", &ntimes, NULL); + CHKERRQ(ierr); + for (k = 1; k < ntimes + 1; k++) { + /* + Set exact solution; then compute right-hand-side vector. We use + an exact solution of a vector with all elements equal to 1.0*k. + */ + rhs = one * (PetscReal) k; + ierr = VecSet(u, rhs); + CHKERRQ(ierr); + ierr = MatMult(A, u, b); + CHKERRQ(ierr); + + /* + View the exact solution vector if desired + */ + ierr = PetscOptionsGetBool(NULL, "-view_exact_sol", &flg, NULL); + CHKERRQ(ierr); + if (flg) { + ierr = VecView(u, PETSC_VIEWER_STDOUT_WORLD); + CHKERRQ(ierr); + } + + ierr = KSPSolve(ksp, b, x); + CHKERRQ(ierr); + + /* + Check the error + */ + ierr = VecAXPY(x, neg_one, u); + CHKERRQ(ierr); + ierr = VecNorm(x, NORM_2, &norm); + CHKERRQ(ierr); + ierr = KSPGetIterationNumber(ksp, &its); + CHKERRQ(ierr); + /* + Print convergence information. PetscPrintf() produces a single + print statement from all processes that share a communicator. + */ + ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g System %D: iterations %D\n", (double) norm, k, its); + CHKERRQ(ierr); + } + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Clean up + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* - Check the error - */ - ierr = VecAXPY(x,neg_one,u);CHKERRQ(ierr); - ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); - ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); + Free work space. All PETSc objects should be destroyed when they + are no longer needed. + */ + ierr = KSPDestroy(&ksp); + CHKERRQ(ierr); + ierr = VecDestroy(&u); + CHKERRQ(ierr); + ierr = VecDestroy(&x); + CHKERRQ(ierr); + ierr = VecDestroy(&b); + CHKERRQ(ierr); + ierr = MatDestroy(&A); + CHKERRQ(ierr); + /* - Print convergence information. PetscPrintf() produces a single - print statement from all processes that share a communicator. - */ - ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g System %D: iterations %D\n",(double)norm,k,its);CHKERRQ(ierr); - } - - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Clean up - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ - /* - Free work space. All PETSc objects should be destroyed when they - are no longer needed. - */ - ierr = KSPDestroy(&ksp);CHKERRQ(ierr); - ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); - ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); - - /* - Always call PetscFinalize() before exiting a program. This routine - - finalizes the PETSc libraries as well as MPI - - provides summary and diagnostic information if certain runtime - options are chosen (e.g., -log_summary). - */ - ierr = PetscFinalize(); - return 0; + Always call PetscFinalize() before exiting a program. This routine + - finalizes the PETSc libraries as well as MPI + - provides summary and diagnostic information if certain runtime + options are chosen (e.g., -log_summary). + */ + ierr = PetscFinalize(); + return 0; } diff --git a/krylov/ksp_solver_simple.c b/krylov/ksp_solver_simple.c index e8815f16a2d5679296bdea1b9d0c28757506cdcb..d1595c2d7a431549e27d32ccc47b9e5cb8a4e230 100644 --- a/krylov/ksp_solver_simple.c +++ b/krylov/ksp_solver_simple.c @@ -7,9 +7,9 @@ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright notice, this + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, this + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. @@ -23,7 +23,7 @@ are permitted provided that the following conditions are met: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ static char help[] = "Solves a linear system in parallel with KSP.\n\ Input parameters include:\n\ @@ -46,214 +46,280 @@ T*/ petscmat.h - matrices petscis.h - index sets petscksp.h - Krylov subspace methods petscviewer.h - viewers petscpc.h - preconditioners -*/ + */ #include #undef __FUNCT__ #define __FUNCT__ "main" -int main(int argc,char **args) -{ - Vec x,b,u; /* approx solution, RHS, exact solution */ - Mat A; /* linear system matrix */ - KSP ksp; /* linear solver context */ - PetscRandom rctx; /* random number generator context */ - PetscReal norm; /* norm of solution error */ - PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its; - PetscErrorCode ierr; - PetscBool flg = PETSC_FALSE; - PetscScalar v; + +int main(int argc, char **args) { + Vec x, b, u; /* approx solution, RHS, exact solution */ + Mat A; /* linear system matrix */ + KSP ksp; /* linear solver context */ + PetscRandom rctx; /* random number generator context */ + PetscReal norm; /* norm of solution error */ + PetscInt i, j, Ii, J, Istart, Iend, m = 8, n = 7, its; + PetscErrorCode ierr; + PetscBool flg = PETSC_FALSE; + PetscScalar v; #if defined(PETSC_USE_LOG) - PetscLogStage stage; + PetscLogStage stage; #endif - PetscInitialize(&argc,&args,(char*)0,help); - ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr); - ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Compute the matrix and right-hand-side vector that define - the linear system, Ax = b. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ - /* - Create parallel matrix, specifying only its global dimensions. - When using MatCreate(), the matrix format can be specified at - runtime. Also, the parallel partitioning of the matrix is - determined by PETSc at runtime. - - Performance tuning note: For problems of substantial size, - preallocation of matrix memory is crucial for attaining good - performance. See the matrix chapter of the users manual for details. - */ - ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); - ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); - ierr = MatSetFromOptions(A);CHKERRQ(ierr); - ierr = MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);CHKERRQ(ierr); - ierr = MatSeqAIJSetPreallocation(A,5,NULL);CHKERRQ(ierr); - - /* - Currently, all PETSc parallel matrix formats are partitioned by - contiguous chunks of rows across the processors. Determine which - rows of the matrix are locally owned. - */ - ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr); - - /* - Set matrix elements for the 2-D, five-point stencil in parallel. - - Each processor needs to insert only elements that it owns - locally (but any non-local elements will be sent to the - appropriate processor during matrix assembly). - - Always specify global rows and columns of matrix entries. - - Note: this uses the less common natural ordering that orders first - all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n - instead of J = I +- m as you might expect. The more standard ordering - would first do all variables for y = h, then y = 2h etc. - - */ - ierr = PetscLogStageRegister("Assembly", &stage);CHKERRQ(ierr); - ierr = PetscLogStagePush(stage);CHKERRQ(ierr); - for (Ii=Istart; Ii0) {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} - if (i0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} - if (j -pc_type -ksp_monitor -ksp_rtol - These options will override those specified above as long as - KSPSetFromOptions() is called _after_ any other customization - routines. - */ - ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); - - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve the linear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ - - ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); - - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Check solution and clean up - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ - - /* - Check the error - */ - ierr = VecAXPY(x,-1.0,u);CHKERRQ(ierr); - ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); - ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); - - /* - Print convergence information. PetscPrintf() produces a single - print statement from all processes that share a communicator. - An alternative is PetscFPrintf(), which prints to a file. - */ - ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr); - - /* - Free work space. All PETSc objects should be destroyed when they - are no longer needed. - */ - ierr = KSPDestroy(&ksp);CHKERRQ(ierr); - ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); - ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); - - /* - Always call PetscFinalize() before exiting a program. This routine - - finalizes the PETSc libraries as well as MPI - - provides summary and diagnostic information if certain runtime - options are chosen (e.g., -log_summary). - */ - ierr = PetscFinalize(); - return 0; + PetscInitialize(&argc, &args, (char*) 0, help); + ierr = PetscOptionsGetInt(NULL, "-m", &m, NULL); + CHKERRQ(ierr); + ierr = PetscOptionsGetInt(NULL, "-n", &n, NULL); + CHKERRQ(ierr); + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Compute the matrix and right-hand-side vector that define + the linear system, Ax = b. + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + /* + Create parallel matrix, specifying only its global dimensions. + When using MatCreate(), the matrix format can be specified at + runtime. Also, the parallel partitioning of the matrix is + determined by PETSc at runtime. + + Performance tuning note: For problems of substantial size, + preallocation of matrix memory is crucial for attaining good + performance. See the matrix chapter of the users manual for details. + */ + ierr = MatCreate(PETSC_COMM_WORLD, &A); + CHKERRQ(ierr); + ierr = MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m*n, m * n); + CHKERRQ(ierr); + ierr = MatSetFromOptions(A); + CHKERRQ(ierr); + ierr = MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL); + CHKERRQ(ierr); + ierr = MatSeqAIJSetPreallocation(A, 5, NULL); + CHKERRQ(ierr); + + /* + Currently, all PETSc parallel matrix formats are partitioned by + contiguous chunks of rows across the processors. Determine which + rows of the matrix are locally owned. + */ + ierr = MatGetOwnershipRange(A, &Istart, &Iend); + CHKERRQ(ierr); + + /* + Set matrix elements for the 2-D, five-point stencil in parallel. + - Each processor needs to insert only elements that it owns + locally (but any non-local elements will be sent to the + appropriate processor during matrix assembly). + - Always specify global rows and columns of matrix entries. + + Note: this uses the less common natural ordering that orders first + all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n + instead of J = I +- m as you might expect. The more standard ordering + would first do all variables for y = h, then y = 2h etc. + + */ + ierr = PetscLogStageRegister("Assembly", &stage); + CHKERRQ(ierr); + ierr = PetscLogStagePush(stage); + CHKERRQ(ierr); + for (Ii = Istart; Ii < Iend; Ii++) { + v = -1.0; + i = Ii / n; + j = Ii - i*n; + if (i > 0) { + J = Ii - n; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (i < m - 1) { + J = Ii + n; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (j > 0) { + J = Ii - 1; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (j < n - 1) { + J = Ii + 1; + ierr = MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + v = 4.0; + ierr = MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES); + CHKERRQ(ierr); + } + + /* + Assemble matrix, using the 2-step process: + MatAssemblyBegin(), MatAssemblyEnd() + Computations can be done while messages are in transition + by placing code between these two statements. + */ + ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + ierr = PetscLogStagePop(); + CHKERRQ(ierr); + + /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */ + ierr = MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE); + CHKERRQ(ierr); + + /* + Create parallel vectors. + - We form 1 vector from scratch and then duplicate as needed. + - When using VecCreate(), VecSetSizes and VecSetFromOptions() + in this example, we specify only the + vector's global dimension; the parallel partitioning is determined + at runtime. + - When solving a linear system, the vectors and matrices MUST + be partitioned accordingly. PETSc automatically generates + appropriately partitioned matrices and vectors when MatCreate() + and VecCreate() are used with the same communicator. + - The user can alternatively specify the local vector and matrix + dimensions when more sophisticated partitioning is needed + (replacing the PETSC_DECIDE argument in the VecSetSizes() statement + below). + */ + ierr = VecCreate(PETSC_COMM_WORLD, &u); + CHKERRQ(ierr); + ierr = VecSetSizes(u, PETSC_DECIDE, m * n); + CHKERRQ(ierr); + ierr = VecSetFromOptions(u); + CHKERRQ(ierr); + ierr = VecDuplicate(u, &b); + CHKERRQ(ierr); + ierr = VecDuplicate(b, &x); + CHKERRQ(ierr); + + /* + Set exact solution; then compute right-hand-side vector. + By default we use an exact solution of a vector with all + elements of 1.0; Alternatively, using the runtime option + -random_sol forms a solution vector with random components. + */ + ierr = PetscOptionsGetBool(NULL, "-random_exact_sol", &flg, NULL); + CHKERRQ(ierr); + if (flg) { + ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rctx); + CHKERRQ(ierr); + ierr = PetscRandomSetFromOptions(rctx); + CHKERRQ(ierr); + ierr = VecSetRandom(u, rctx); + CHKERRQ(ierr); + ierr = PetscRandomDestroy(&rctx); + CHKERRQ(ierr); + } else { + ierr = VecSet(u, 1.0); + CHKERRQ(ierr); + } + ierr = MatMult(A, u, b); + CHKERRQ(ierr); + + /* + View the exact solution vector if desired + */ + flg = PETSC_FALSE; + ierr = PetscOptionsGetBool(NULL, "-view_exact_sol", &flg, NULL); + CHKERRQ(ierr); + if (flg) { + ierr = VecView(u, PETSC_VIEWER_STDOUT_WORLD); + CHKERRQ(ierr); + } + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Create the linear solver and set various options + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + /* + Create linear solver context + */ + ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); + CHKERRQ(ierr); + + /* + Set operators. Here the matrix that defines the linear system + also serves as the preconditioning matrix. + */ + ierr = KSPSetOperators(ksp, A, A); + CHKERRQ(ierr); + + /* + Set linear solver defaults for this problem (optional). + - By extracting the KSP and PC contexts from the KSP context, + we can then directly call any KSP and PC routines to set + various options. + - The following two statements are optional; all of these + parameters could alternatively be specified at runtime via + KSPSetFromOptions(). All of these defaults can be + overridden at runtime, as indicated below. + */ + ierr = KSPSetTolerances(ksp, 1.e-2 / ((m + 1)*(n + 1)), 1.e-50, PETSC_DEFAULT, + PETSC_DEFAULT); + CHKERRQ(ierr); + + /* + Set runtime options, e.g., + -ksp_type -pc_type -ksp_monitor -ksp_rtol + These options will override those specified above as long as + KSPSetFromOptions() is called _after_ any other customization + routines. + */ + ierr = KSPSetFromOptions(ksp); + CHKERRQ(ierr); + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Solve the linear system + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + ierr = KSPSolve(ksp, b, x); + CHKERRQ(ierr); + + /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Check solution and clean up + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + + /* + Check the error + */ + ierr = VecAXPY(x, -1.0, u); + CHKERRQ(ierr); + ierr = VecNorm(x, NORM_2, &norm); + CHKERRQ(ierr); + ierr = KSPGetIterationNumber(ksp, &its); + CHKERRQ(ierr); + + /* + Print convergence information. PetscPrintf() produces a single + print statement from all processes that share a communicator. + An alternative is PetscFPrintf(), which prints to a file. + */ + ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %D\n", (double) norm, its); + CHKERRQ(ierr); + + /* + Free work space. All PETSc objects should be destroyed when they + are no longer needed. + */ + ierr = KSPDestroy(&ksp); + CHKERRQ(ierr); + ierr = VecDestroy(&u); + CHKERRQ(ierr); + ierr = VecDestroy(&x); + CHKERRQ(ierr); + ierr = VecDestroy(&b); + CHKERRQ(ierr); + ierr = MatDestroy(&A); + CHKERRQ(ierr); + + /* + Always call PetscFinalize() before exiting a program. This routine + - finalizes the PETSc libraries as well as MPI + - provides summary and diagnostic information if certain runtime + options are chosen (e.g., -log_summary). + */ + ierr = PetscFinalize(); + return 0; } diff --git a/krylov/ksp_solver_two_sys.c b/krylov/ksp_solver_two_sys.c index de5edd688d7b45c68fd03b52c2d954544aab7b11..1feb7bec31fbcbfe2b9d1011bd868c0ef5ecc913 100644 --- a/krylov/ksp_solver_two_sys.c +++ b/krylov/ksp_solver_two_sys.c @@ -7,9 +7,9 @@ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright notice, this + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, this + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. @@ -23,7 +23,7 @@ are permitted provided that the following conditions are met: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + */ static char help[] = "Solves two linear systems in parallel with KSP. The code\n\ illustrates repeated solution of linear systems with the same preconditioner\n\ @@ -45,291 +45,399 @@ T*/ petscmat.h - matrices petscis.h - index sets petscksp.h - Krylov subspace methods petscviewer.h - viewers petscpc.h - preconditioners -*/ + */ #include #undef __FUNCT__ #define __FUNCT__ "main" -int main(int argc,char **args) -{ - KSP ksp; /* linear solver context */ - Mat C; /* matrix */ - Vec x,u,b; /* approx solution, RHS, exact solution */ - PetscReal norm; /* norm of solution error */ - PetscScalar v,none = -1.0; - PetscInt Ii,J,ldim,low,high,iglobal,Istart,Iend; - PetscErrorCode ierr; - PetscInt i,j,m = 3,n = 2,its; - PetscMPIInt size,rank; - PetscBool mat_nonsymmetric = PETSC_FALSE; - PetscBool testnewC = PETSC_FALSE; + +int main(int argc, char **args) { + KSP ksp; /* linear solver context */ + Mat C; /* matrix */ + Vec x, u, b; /* approx solution, RHS, exact solution */ + PetscReal norm; /* norm of solution error */ + PetscScalar v, none = -1.0; + PetscInt Ii, J, ldim, low, high, iglobal, Istart, Iend; + PetscErrorCode ierr; + PetscInt i, j, m = 3, n = 2, its; + PetscMPIInt size, rank; + PetscBool mat_nonsymmetric = PETSC_FALSE; + PetscBool testnewC = PETSC_FALSE; #if defined(PETSC_USE_LOG) - PetscLogStage stages[2]; + PetscLogStage stages[2]; #endif - PetscInitialize(&argc,&args,(char*)0,help); - ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr); - ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); - ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); - n = 2*size; - - /* - Set flag if we are doing a nonsymmetric problem; the default is symmetric. - */ - ierr = PetscOptionsGetBool(NULL,"-mat_nonsym",&mat_nonsymmetric,NULL);CHKERRQ(ierr); - - /* - Register two stages for separate profiling of the two linear solves. - Use the runtime option -log_summary for a printout of performance - statistics at the program's conlusion. - */ - ierr = PetscLogStageRegister("Original Solve",&stages[0]);CHKERRQ(ierr); - ierr = PetscLogStageRegister("Second Solve",&stages[1]);CHKERRQ(ierr); - - /* -------------- Stage 0: Solve Original System ---------------------- */ - /* - Indicate to PETSc profiling that we're beginning the first stage - */ - ierr = PetscLogStagePush(stages[0]);CHKERRQ(ierr); - - /* - Create parallel matrix, specifying only its global dimensions. - When using MatCreate(), the matrix format can be specified at - runtime. Also, the parallel partitioning of the matrix is - determined by PETSc at runtime. - */ - ierr = MatCreate(PETSC_COMM_WORLD,&C);CHKERRQ(ierr); - ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr); - ierr = MatSetFromOptions(C);CHKERRQ(ierr); - ierr = MatSetUp(C);CHKERRQ(ierr); - - /* - Currently, all PETSc parallel matrix formats are partitioned by - contiguous chunks of rows across the processors. Determine which - rows of the matrix are locally owned. - */ - ierr = MatGetOwnershipRange(C,&Istart,&Iend);CHKERRQ(ierr); - - /* - Set matrix entries matrix in parallel. - - Each processor needs to insert only elements that it owns - locally (but any non-local elements will be sent to the - appropriate processor during matrix assembly). - - Always specify global row and columns of matrix entries. - */ - for (Ii=Istart; Ii0) {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} - if (i0) {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} - if (j1) {J = Ii-n-1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} + PetscInitialize(&argc, &args, (char*) 0, help); + ierr = PetscOptionsGetInt(NULL, "-m", &m, NULL); + CHKERRQ(ierr); + ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank); + CHKERRQ(ierr); + ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size); + CHKERRQ(ierr); + n = 2 * size; + + /* + Set flag if we are doing a nonsymmetric problem; the default is symmetric. + */ + ierr = PetscOptionsGetBool(NULL, "-mat_nonsym", &mat_nonsymmetric, NULL); + CHKERRQ(ierr); + + /* + Register two stages for separate profiling of the two linear solves. + Use the runtime option -log_summary for a printout of performance + statistics at the program's conlusion. + */ + ierr = PetscLogStageRegister("Original Solve", &stages[0]); + CHKERRQ(ierr); + ierr = PetscLogStageRegister("Second Solve", &stages[1]); + CHKERRQ(ierr); + + /* -------------- Stage 0: Solve Original System ---------------------- */ + /* + Indicate to PETSc profiling that we're beginning the first stage + */ + ierr = PetscLogStagePush(stages[0]); + CHKERRQ(ierr); + + /* + Create parallel matrix, specifying only its global dimensions. + When using MatCreate(), the matrix format can be specified at + runtime. Also, the parallel partitioning of the matrix is + determined by PETSc at runtime. + */ + ierr = MatCreate(PETSC_COMM_WORLD, &C); + CHKERRQ(ierr); + ierr = MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m*n, m * n); + CHKERRQ(ierr); + ierr = MatSetFromOptions(C); + CHKERRQ(ierr); + ierr = MatSetUp(C); + CHKERRQ(ierr); + + /* + Currently, all PETSc parallel matrix formats are partitioned by + contiguous chunks of rows across the processors. Determine which + rows of the matrix are locally owned. + */ + ierr = MatGetOwnershipRange(C, &Istart, &Iend); + CHKERRQ(ierr); + + /* + Set matrix entries matrix in parallel. + - Each processor needs to insert only elements that it owns + locally (but any non-local elements will be sent to the + appropriate processor during matrix assembly). + - Always specify global row and columns of matrix entries. + */ + for (Ii = Istart; Ii < Iend; Ii++) { + v = -1.0; + i = Ii / n; + j = Ii - i*n; + if (i > 0) { + J = Ii - n; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (i < m - 1) { + J = Ii + n; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (j > 0) { + J = Ii - 1; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (j < n - 1) { + J = Ii + 1; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + v = 4.0; + ierr = MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES); } - } else { - ierr = MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); - ierr = MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);CHKERRQ(ierr); - } - - /* - Assemble matrix, using the 2-step process: - MatAssemblyBegin(), MatAssemblyEnd() - Computations can be done while messages are in transition - by placing code between these two statements. - */ - ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); - ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); - - /* - Create parallel vectors. - - When using VecSetSizes(), we specify only the vector's global - dimension; the parallel partitioning is determined at runtime. - - Note: We form 1 vector from scratch and then duplicate as needed. - */ - ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr); - ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr); - ierr = VecSetFromOptions(u);CHKERRQ(ierr); - ierr = VecDuplicate(u,&b);CHKERRQ(ierr); - ierr = VecDuplicate(b,&x);CHKERRQ(ierr); - - /* - Currently, all parallel PETSc vectors are partitioned by - contiguous chunks across the processors. Determine which - range of entries are locally owned. - */ - ierr = VecGetOwnershipRange(x,&low,&high);CHKERRQ(ierr); - - /* - Set elements within the exact solution vector in parallel. - - Each processor needs to insert only elements that it owns - locally (but any non-local entries will be sent to the - appropriate processor during vector assembly). - - Always specify global locations of vector entries. - */ - ierr = VecGetLocalSize(x,&ldim);CHKERRQ(ierr); - for (i=0; i -pc_type ) - */ - ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); - - /* - Solve linear system. Here we explicitly call KSPSetUp() for more - detailed performance monitoring of certain preconditioners, such - as ICC and ILU. This call is optional, as KSPSetUp() will - automatically be called within KSPSolve() if it hasn't been - called already. - */ - ierr = KSPSetUp(ksp);CHKERRQ(ierr); - ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); - - /* - Check the error - */ - ierr = VecAXPY(x,none,u);CHKERRQ(ierr); - ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); - ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); - if (norm > 1.e-13) { - ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);CHKERRQ(ierr); - } - - /* -------------- Stage 1: Solve Second System ---------------------- */ - /* - Solve another linear system with the same method. We reuse the KSP - context, matrix and vector data structures, and hence save the - overhead of creating new ones. - - Indicate to PETSc profiling that we're concluding the first - stage with PetscLogStagePop(), and beginning the second stage with - PetscLogStagePush(). - */ - ierr = PetscLogStagePop();CHKERRQ(ierr); - ierr = PetscLogStagePush(stages[1]);CHKERRQ(ierr); - - /* - Initialize all matrix entries to zero. MatZeroEntries() retains the - nonzero structure of the matrix for sparse formats. - */ - ierr = MatZeroEntries(C);CHKERRQ(ierr); - - /* - Assemble matrix again. Note that we retain the same matrix data - structure and the same nonzero pattern; we just change the values - of the matrix entries. - */ - for (i=0; i0) {J = Ii - n; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} - if (i0) {J = Ii - 1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} - if (j 1) { + J = Ii - n - 1; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + } + } else { + ierr = MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE); + CHKERRQ(ierr); + ierr = MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE); + CHKERRQ(ierr); } - } - if (mat_nonsymmetric) { - for (Ii=Istart; Ii1) {J = Ii-n-1; ierr = MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} + + /* + Assemble matrix, using the 2-step process: + MatAssemblyBegin(), MatAssemblyEnd() + Computations can be done while messages are in transition + by placing code between these two statements. + */ + ierr = MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + ierr = MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + + /* + Create parallel vectors. + - When using VecSetSizes(), we specify only the vector's global + dimension; the parallel partitioning is determined at runtime. + - Note: We form 1 vector from scratch and then duplicate as needed. + */ + ierr = VecCreate(PETSC_COMM_WORLD, &u); + CHKERRQ(ierr); + ierr = VecSetSizes(u, PETSC_DECIDE, m * n); + CHKERRQ(ierr); + ierr = VecSetFromOptions(u); + CHKERRQ(ierr); + ierr = VecDuplicate(u, &b); + CHKERRQ(ierr); + ierr = VecDuplicate(b, &x); + CHKERRQ(ierr); + + /* + Currently, all parallel PETSc vectors are partitioned by + contiguous chunks across the processors. Determine which + range of entries are locally owned. + */ + ierr = VecGetOwnershipRange(x, &low, &high); + CHKERRQ(ierr); + + /* + Set elements within the exact solution vector in parallel. + - Each processor needs to insert only elements that it owns + locally (but any non-local entries will be sent to the + appropriate processor during vector assembly). + - Always specify global locations of vector entries. + */ + ierr = VecGetLocalSize(x, &ldim); + CHKERRQ(ierr); + for (i = 0; i < ldim; i++) { + iglobal = i + low; + v = (PetscScalar) (i + 100 * rank); + ierr = VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES); + CHKERRQ(ierr); } - } - ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); - ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); - ierr = PetscOptionsGetBool(NULL,"-test_newMat",&testnewC,NULL);CHKERRQ(ierr); - if (testnewC) { /* - User may use a new matrix C with same nonzero pattern, e.g. - ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_package mumps -test_newMat - */ - Mat Ctmp; - ierr = MatDuplicate(C,MAT_COPY_VALUES,&Ctmp);CHKERRQ(ierr); - ierr = MatDestroy(&C);CHKERRQ(ierr); - ierr = MatDuplicate(Ctmp,MAT_COPY_VALUES,&C);CHKERRQ(ierr); - ierr = MatDestroy(&Ctmp);CHKERRQ(ierr); - } - /* - Compute another right-hand-side vector - */ - ierr = MatMult(C,u,b);CHKERRQ(ierr); - - /* - Set operators. Here the matrix that defines the linear system - also serves as the preconditioning matrix. - */ - ierr = KSPSetOperators(ksp,C,C);CHKERRQ(ierr); - - /* - Solve linear system - */ - ierr = KSPSetUp(ksp);CHKERRQ(ierr); - ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); - - /* - Check the error - */ - ierr = VecAXPY(x,none,u);CHKERRQ(ierr); - ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); - ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); - if (norm > 1.e-4) { - ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);CHKERRQ(ierr); - } - - /* - Free work space. All PETSc objects should be destroyed when they - are no longer needed. - */ - ierr = KSPDestroy(&ksp);CHKERRQ(ierr); - ierr = VecDestroy(&u);CHKERRQ(ierr); - ierr = VecDestroy(&x);CHKERRQ(ierr); - ierr = VecDestroy(&b);CHKERRQ(ierr); - ierr = MatDestroy(&C);CHKERRQ(ierr); - - /* - Indicate to PETSc profiling that we're concluding the second stage - */ - ierr = PetscLogStagePop();CHKERRQ(ierr); - - ierr = PetscFinalize(); - return 0; + Assemble vector, using the 2-step process: + VecAssemblyBegin(), VecAssemblyEnd() + Computations can be done while messages are in transition, + by placing code between these two statements. + */ + ierr = VecAssemblyBegin(u); + CHKERRQ(ierr); + ierr = VecAssemblyEnd(u); + CHKERRQ(ierr); + + /* + Compute right-hand-side vector + */ + ierr = MatMult(C, u, b); + CHKERRQ(ierr); + + /* + Create linear solver context + */ + ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); + CHKERRQ(ierr); + + /* + Set operators. Here the matrix that defines the linear system + also serves as the preconditioning matrix. + */ + ierr = KSPSetOperators(ksp, C, C); + CHKERRQ(ierr); + + /* + Set runtime options (e.g., -ksp_type -pc_type ) + */ + ierr = KSPSetFromOptions(ksp); + CHKERRQ(ierr); + + /* + Solve linear system. Here we explicitly call KSPSetUp() for more + detailed performance monitoring of certain preconditioners, such + as ICC and ILU. This call is optional, as KSPSetUp() will + automatically be called within KSPSolve() if it hasn't been + called already. + */ + ierr = KSPSetUp(ksp); + CHKERRQ(ierr); + ierr = KSPSolve(ksp, b, x); + CHKERRQ(ierr); + + /* + Check the error + */ + ierr = VecAXPY(x, none, u); + CHKERRQ(ierr); + ierr = VecNorm(x, NORM_2, &norm); + CHKERRQ(ierr); + ierr = KSPGetIterationNumber(ksp, &its); + CHKERRQ(ierr); + if (norm > 1.e-13) { + ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, Iterations %D\n", (double) norm, its); + CHKERRQ(ierr); + } + + /* -------------- Stage 1: Solve Second System ---------------------- */ + /* + Solve another linear system with the same method. We reuse the KSP + context, matrix and vector data structures, and hence save the + overhead of creating new ones. + + Indicate to PETSc profiling that we're concluding the first + stage with PetscLogStagePop(), and beginning the second stage with + PetscLogStagePush(). + */ + ierr = PetscLogStagePop(); + CHKERRQ(ierr); + ierr = PetscLogStagePush(stages[1]); + CHKERRQ(ierr); + + /* + Initialize all matrix entries to zero. MatZeroEntries() retains the + nonzero structure of the matrix for sparse formats. + */ + ierr = MatZeroEntries(C); + CHKERRQ(ierr); + + /* + Assemble matrix again. Note that we retain the same matrix data + structure and the same nonzero pattern; we just change the values + of the matrix entries. + */ + for (i = 0; i < m; i++) { + for (j = 2 * rank; j < 2 * rank + 2; j++) { + v = -1.0; + Ii = j + n*i; + if (i > 0) { + J = Ii - n; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (i < m - 1) { + J = Ii + n; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (j > 0) { + J = Ii - 1; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + if (j < n - 1) { + J = Ii + 1; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + v = 6.0; + ierr = MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES); + CHKERRQ(ierr); + } + } + if (mat_nonsymmetric) { + for (Ii = Istart; Ii < Iend; Ii++) { + v = -1.5; + i = Ii / n; + if (i > 1) { + J = Ii - n - 1; + ierr = MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES); + CHKERRQ(ierr); + } + } + } + ierr = MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + ierr = MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY); + CHKERRQ(ierr); + + ierr = PetscOptionsGetBool(NULL, "-test_newMat", &testnewC, NULL); + CHKERRQ(ierr); + if (testnewC) { + /* + User may use a new matrix C with same nonzero pattern, e.g. + ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_package mumps -test_newMat + */ + Mat Ctmp; + ierr = MatDuplicate(C, MAT_COPY_VALUES, &Ctmp); + CHKERRQ(ierr); + ierr = MatDestroy(&C); + CHKERRQ(ierr); + ierr = MatDuplicate(Ctmp, MAT_COPY_VALUES, &C); + CHKERRQ(ierr); + ierr = MatDestroy(&Ctmp); + CHKERRQ(ierr); + } + /* + Compute another right-hand-side vector + */ + ierr = MatMult(C, u, b); + CHKERRQ(ierr); + + /* + Set operators. Here the matrix that defines the linear system + also serves as the preconditioning matrix. + */ + ierr = KSPSetOperators(ksp, C, C); + CHKERRQ(ierr); + + /* + Solve linear system + */ + ierr = KSPSetUp(ksp); + CHKERRQ(ierr); + ierr = KSPSolve(ksp, b, x); + CHKERRQ(ierr); + + /* + Check the error + */ + ierr = VecAXPY(x, none, u); + CHKERRQ(ierr); + ierr = VecNorm(x, NORM_2, &norm); + CHKERRQ(ierr); + ierr = KSPGetIterationNumber(ksp, &its); + CHKERRQ(ierr); + if (norm > 1.e-4) { + ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, Iterations %D\n", (double) norm, its); + CHKERRQ(ierr); + } + + /* + Free work space. All PETSc objects should be destroyed when they + are no longer needed. + */ + ierr = KSPDestroy(&ksp); + CHKERRQ(ierr); + ierr = VecDestroy(&u); + CHKERRQ(ierr); + ierr = VecDestroy(&x); + CHKERRQ(ierr); + ierr = VecDestroy(&b); + CHKERRQ(ierr); + ierr = MatDestroy(&C); + CHKERRQ(ierr); + + /* + Indicate to PETSc profiling that we're concluding the second stage + */ + ierr = PetscLogStagePop(); + CHKERRQ(ierr); + + ierr = PetscFinalize(); + return 0; } diff --git a/spgemm/mkl_shmem/mklspgemm.c b/spgemm/mkl_shmem/mklspgemm.c index 990d4d322de9d6e478042ef10d9afafa3184e0a0..08e34e04b72c5c34af05876c05907217b1cb1208 100644 --- a/spgemm/mkl_shmem/mklspgemm.c +++ b/spgemm/mkl_shmem/mklspgemm.c @@ -3,7 +3,7 @@ #include #include #include -#define MIC_TARGET +#define MIC_TARGET #include "../../common/mmio.h" #include "../../common/util.h" #include "../../common/csr.h" @@ -15,7 +15,7 @@ char transa = 'n'; /*}}}*/ /** Multiply two sparse matrices which are stored in CSR format. MKL is used */ -void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) { /*{{{*/ +void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* AJ, MKL_INT* AI, MKL_INT Bn, MKL_INT Bnnz, double* Bval, MKL_INT* BJ, MKL_INT* BI, double** pCval, MKL_INT** pCJ, MKL_INT** pCI, double* time) { /*{{{*/ MKL_INT* CJ = NULL; double* Cval = NULL; MKL_INT sort = 3; // sort everything @@ -24,7 +24,7 @@ void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* MKL_INT ierr; MKL_INT request = 1; // symbolic multiplication mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - + request = 2; //numeric multiplication int Cnnz = CI[Am]-1; int Cval_size = Cnnz + 1; @@ -38,7 +38,7 @@ void mkl_cpu_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* double time_end = dsecnd(); *time = (time_end - time_st)/nrepeat; *pCval = Cval; *pCJ = CJ; *pCI = CI; -} /*}}}*/ +} /*}}}*/ int main(int argc, char* argv[]) { /*{{{*/ /** usage */ @@ -68,51 +68,51 @@ int main(int argc, char* argv[]) { /*{{{*/ } option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; assert(nrequired_args == iarg); - + /** read matrix market file for A matrix */ MKL_INT Am, An, Annz; - MKL_INT *Ax, *Ay; + MKL_INT *Ax, *Ay; double *Anz; read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); - + /** construct csr storage for A matrix */ MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); - coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); - + coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); + /** read matrix market file for B matrix */ - MKL_INT Bm, Bn, Bnnz; - MKL_INT *Bx, *By; + MKL_INT Bm, Bn, Bnnz; + MKL_INT *Bx, *By; double *Bnz; read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); - + /** construct csr storage for B matrix */ MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); - coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); - + coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); + /** multiply two matrices */ - double* Cval, time; + double* Cval, time; MKL_INT* CJ; MKL_INT* CI; mkl_cpu_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); - + int i; for(i=0;i<=Am;i++)AI[i]--; for(i=0;i #include #include -#define MIC_TARGET __declspec(target(mic)) +#define MIC_TARGET __declspec(target(mic)) #include "../../common/mmio.h" #include "../../common/util.h" #include "../../common/csr.h" @@ -26,7 +26,7 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* } else { nthreads = mkl_get_max_threads(); } - } + } #pragma offload target(mic:micdev) \ in(transa) \ in(Am) \ @@ -37,7 +37,7 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* in(AJ:length(Annz) free_if(0)) \ in(Bval:length(Bnnz) free_if(0)) \ in(BI:length(An+1) free_if(0)) \ - in(BJ:length(Bnnz) free_if(0)) + in(BJ:length(Bnnz) free_if(0)) {} MKL_INT Cnnz_host = -1; @@ -68,11 +68,11 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* for(i = 0; i < 10; i++) { mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - } + } double s_initial = dsecnd(); for(i = 0; i < nrepeat; i++) { mkl_dcsrmultcsr(&transa, &request, &sort, &Am, &An, &Bn, Aval, AJ, AI, Bval, BJ, BI, Cval, CJ, CI, &nnzmax, &ierr); - } + } time_mic_symbolic_mm = (dsecnd() - s_initial) / nrepeat; // seconds request = 2; //numeric multiplication @@ -109,13 +109,13 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* } *time = time_mic_numeric_mm; if (1) { // Copy result matrix from MIC to Host - int nelm_CI = Am + 2; - int nelm_CJ = Cnnz_host + 1; - int nelm_Cval = Cnnz_host + 1; + int nelm_CI = Am + 2; + int nelm_CJ = Cnnz_host + 1; + int nelm_Cval = Cnnz_host + 1; __declspec(target(mic)) MKL_INT* CI_host = (MKL_INT*)mkl_malloc( (nelm_CI) * sizeof( MKL_INT ), 64 ); __declspec(target(mic)) MKL_INT* CJ_host = (MKL_INT*)mkl_malloc( (nelm_CJ) * sizeof( MKL_INT ), 64 ); __declspec(target(mic)) double* Cval_host = (double*)mkl_malloc( (nelm_Cval) * sizeof( double ), 64 ); - #pragma offload target(mic:micdev) in(nelm_CI)\ + #pragma offload target(mic:micdev) in(nelm_CI)\ inout(CI_host:length(nelm_CI) alloc_if(1) free_if(0)) \ inout(CJ_host:length(nelm_CJ) alloc_if(1) free_if(0)) \ inout(Cval_host:length(nelm_Cval) alloc_if(1) free_if(0)) \ @@ -127,7 +127,7 @@ void mkl_mic_spgemm(MKL_INT Am, MKL_INT An, MKL_INT Annz, double* Aval, MKL_INT* for(i = 0; i < nelm_CI; i++) CI_host[i] = CI[i]; for(i = 0; i < nelm_CJ; i++) {CJ_host[i] = CJ[i]; Cval_host[i] = Cval[i];} } - *pCval = Cval_host; *pCJ = CJ_host; *pCI = CI_host; + *pCval = Cval_host; *pCJ = CJ_host; *pCI = CI_host; } } /* ENDOF mkl_mic_spmm }}}*/ @@ -162,51 +162,51 @@ int main(int argc, char* argv[]) { /*{{{*/ micdev = atoi(argv[iarg]); iarg++; option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; assert(nrequired_args == iarg); - + /** read matrix market file for A matrix */ MKL_INT Am, An, Annz; - MKL_INT *Ax, *Ay; + MKL_INT *Ax, *Ay; double *Anz; read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); - + /** construct csr storage for A matrix */ MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); - coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); - + coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); + /** read matrix market file for B matrix */ - MKL_INT Bm, Bn, Bnnz; - MKL_INT *Bx, *By; + MKL_INT Bm, Bn, Bnnz; + MKL_INT *Bx, *By; double *Bnz; read_mm(strpathB, &Bm, &Bn, &Bnnz, &Bx, &By, &Bnz); - + /** construct csr storage for B matrix */ MKL_INT* BJ = (MKL_INT*) mkl_malloc( Bnnz * sizeof( MKL_INT ), 64 ); MKL_INT* BI = (MKL_INT*) mkl_malloc( (Bm+1) * sizeof( MKL_INT ), 64 ); double* Bval = (double*) mkl_malloc( Bnnz * sizeof( double ), 64 ); - coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); - + coo_to_csr(Bm, Bnnz, Bx, By, Bnz, BI, BJ, Bval); + /** multiply two matrices */ - double* Cval, time; + double* Cval, time; MKL_INT* CJ; MKL_INT* CI; mkl_mic_spgemm(Am, An, Annz, Aval, AJ, AI, Bn, Bnnz, Bval, BJ, BI, &Cval, &CJ, &CI, &time); - + int i; for(i=0;i<=Am;i++)AI[i]--; for(i=0;i #include #include -#define MIC_TARGET +#define MIC_TARGET #include "../../common/mmio.h" #include "../../common/util.h" /*}}}*/ @@ -20,7 +20,7 @@ void mkl_cpu_spmv(const MKL_INT Am, const MKL_INT An, double* Aval, MKL_INT* AJ double alpha = 1.0; double beta = 0.0; char* matdescra = "G NF"; - + double time_st = dsecnd(); int i; for(i = 0; i < nrepeat; i++) { @@ -29,7 +29,7 @@ void mkl_cpu_spmv(const MKL_INT Am, const MKL_INT An, double* Aval, MKL_INT* AJ } double time_end = dsecnd(); *time = (time_end - time_st)/nrepeat; -} /*}}}*/ +} /*}}}*/ int main(int argc, char* argv[]) { /*{{{*/ /** usage */ @@ -57,40 +57,40 @@ int main(int argc, char* argv[]) { /*{{{*/ } option_print_matrices = strcmp(argv[iarg], "PRINT_YES")==0?OPTION_PRINT_MATRICES:OPTION_NOPRINT_MATRICES; iarg++; assert(nrequired_args == iarg); - + /** read matrix market file for A matrix */ MKL_INT Am, An, Annz; - MKL_INT *Ax, *Ay; + MKL_INT *Ax, *Ay; double *Anz; read_mm(strpathA, &Am, &An, &Annz, &Ax, &Ay, &Anz); - + /** construct csr storage for A matrix */ MKL_INT* AJ = (MKL_INT*) mkl_malloc( Annz * sizeof( MKL_INT ), 64 ); MKL_INT* AI = (MKL_INT*) mkl_malloc( (Am+1) * sizeof( MKL_INT ), 64 ); double* Aval = (double*) mkl_malloc( Annz * sizeof( double ), 64 ); - coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); - - double* xvec = (double*) mkl_malloc( An * sizeof( double ), 64 ); + coo_to_csr(Am, Annz, Ax, Ay, Anz, AI, AJ, Aval); + + double* xvec = (double*) mkl_malloc( An * sizeof( double ), 64 ); double* yvec = (double*) mkl_malloc( Am * sizeof( double ), 64 ); int i; for(i=0;i