Actual source code: superlu_dist.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the SuperLU_DIST_2.0 sparse solver
  5: */

  7: #include "src/mat/impls/aij/seq/aij.h"
  8: #include "src/mat/impls/aij/mpi/mpiaij.h"
  9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
 10: #include "stdlib.h"
 11: #endif

 14: #if defined(PETSC_USE_COMPLEX)
 15: #include "superlu_zdefs.h"
 16: #else
 17: #include "superlu_ddefs.h"
 18: #endif

 21: typedef enum { GLOBAL,DISTRIBUTED
 22: } SuperLU_MatInputMode;

 24: typedef struct {
 25:   int_t                   nprow,npcol,*row,*col;
 26:   gridinfo_t              grid;
 27:   superlu_options_t       options;
 28:   SuperMatrix             A_sup;
 29:   ScalePermstruct_t       ScalePermstruct;
 30:   LUstruct_t              LUstruct;
 31:   int                     StatPrint;
 32:   int                     MatInputMode;
 33:   SOLVEstruct_t           SOLVEstruct;
 34:   fact_t                  FactPattern;
 35:   MPI_Comm                comm_superlu;
 36: #if defined(PETSC_USE_COMPLEX)
 37:   doublecomplex           *val;
 38: #else
 39:   double                  *val;
 40: #endif

 42:   /* A few function pointers for inheritance */
 43:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 44:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 45:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 46:   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
 47:   PetscErrorCode (*MatDestroy)(Mat);

 49:   /* Flag to clean up (non-global) SuperLU objects during Destroy */
 50:   PetscTruth CleanUpSuperLU_Dist;
 51: } Mat_SuperLU_DIST;

 53: EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);

 66: PetscErrorCode  MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 67: {
 68:   PetscErrorCode   ierr;
 69:   Mat              B=*newmat;
 70:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

 73:   if (reuse == MAT_INITIAL_MATRIX) {
 74:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 75:   }
 76:   /* Reset the original function pointers */
 77:   B->ops->duplicate        = lu->MatDuplicate;
 78:   B->ops->view             = lu->MatView;
 79:   B->ops->assemblyend      = lu->MatAssemblyEnd;
 80:   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
 81:   B->ops->destroy          = lu->MatDestroy;

 83:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);
 84:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);
 85:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);
 86:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);

 88:   PetscObjectChangeTypeName((PetscObject)B,type);
 89:   *newmat = B;
 90:   return(0);
 91: }

 96: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
 97: {
 98:   PetscErrorCode   ierr;
 99:   PetscMPIInt      size;
100:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
101: 
103:   if (lu->CleanUpSuperLU_Dist) {
104:     /* Deallocate SuperLU_DIST storage */
105:     if (lu->MatInputMode == GLOBAL) {
106:       Destroy_CompCol_Matrix_dist(&lu->A_sup);
107:     } else {
108:       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
109:       if ( lu->options.SolveInitialized ) {
110: #if defined(PETSC_USE_COMPLEX)
111:         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
112: #else
113:         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
114: #endif
115:       }
116:     }
117:     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
118:     ScalePermstructFree(&lu->ScalePermstruct);
119:     LUstructFree(&lu->LUstruct);
120: 
121:     /* Release the SuperLU_DIST process grid. */
122:     superlu_gridexit(&lu->grid);
123: 
124:     MPI_Comm_free(&(lu->comm_superlu));
125:   }

127:   MPI_Comm_size(A->comm,&size);
128:   if (size == 1) {
129:     MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);
130:   } else {
131:     MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);
132:   }
133:   (*A->ops->destroy)(A);
134:   return(0);
135: }

139: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
140: {
141:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
142:   PetscErrorCode   ierr;
143:   PetscMPIInt      size;
144:   PetscInt         m=A->rmap.N, N=A->cmap.N;
145:   SuperLUStat_t    stat;
146:   double           berr[1];
147:   PetscScalar      *bptr;
148:   PetscInt         info, nrhs=1;
149:   Vec              x_seq;
150:   IS               iden;
151:   VecScatter       scat;
152: 
154:   MPI_Comm_size(A->comm,&size);
155:   if (size > 1) {
156:     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
157:       VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
158:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
159:       VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
160:       ISDestroy(iden);

162:       VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
163:       VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
164:       VecGetArray(x_seq,&bptr);
165:     } else { /* distributed mat input */
166:       VecCopy(b_mpi,x);
167:       VecGetArray(x,&bptr);
168:     }
169:   } else { /* size == 1 */
170:     VecCopy(b_mpi,x);
171:     VecGetArray(x,&bptr);
172:   }
173: 
174:   if (lu->options.Fact != FACTORED)
175:     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

177:   PStatInit(&stat);        /* Initialize the statistics variables. */
178:   if (lu->MatInputMode == GLOBAL) {
179: #if defined(PETSC_USE_COMPLEX)
180:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182: #else
183:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185: #endif 
186:   } else { /* distributed mat input */
187: #if defined(PETSC_USE_COMPLEX)
188:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190:     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191: #else
192:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194:     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195: #endif
196:   }
197:   if (lu->options.PrintStat) {
198:      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199:   }
200:   PStatFree(&stat);
201: 
202:   if (size > 1) {
203:     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204:       VecRestoreArray(x_seq,&bptr);
205:       VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
206:       VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
207:       VecScatterDestroy(scat);
208:       VecDestroy(x_seq);
209:     } else {
210:       VecRestoreArray(x,&bptr);
211:     }
212:   } else {
213:     VecRestoreArray(x,&bptr);
214:   }
215:   return(0);
216: }

220: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221: {
222:   Mat              *tseq,A_seq = PETSC_NULL;
223:   Mat_SeqAIJ       *aa,*bb;
224:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225:   PetscErrorCode   ierr;
226:   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227:                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228:   PetscMPIInt      size,rank;
229:   SuperLUStat_t    stat;
230:   double           *berr=0;
231:   IS               isrow;
232:   PetscLogDouble   time0,time,time_min,time_max;
233:   Mat              F_diag=PETSC_NULL;
234: #if defined(PETSC_USE_COMPLEX)
235:   doublecomplex    *av, *bv;
236: #else
237:   double           *av, *bv;
238: #endif

241:   MPI_Comm_size(A->comm,&size);
242:   MPI_Comm_rank(A->comm,&rank);
243: 
244:   if (lu->options.PrintStat) { /* collect time for mat conversion */
245:     MPI_Barrier(A->comm);
246:     PetscGetTime(&time0);
247:   }

249:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250:     if (size > 1) { /* convert mpi A to seq mat A */
251:       ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
252:       MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
253:       ISDestroy(isrow);
254: 
255:       A_seq = *tseq;
256:       PetscFree(tseq);
257:       aa =  (Mat_SeqAIJ*)A_seq->data;
258:     } else {
259:       aa =  (Mat_SeqAIJ*)A->data;
260:     }

262:     /* Convert Petsc NR matrix to SuperLU_DIST NC. 
263:        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264:     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265:       if (lu->FactPattern == SamePattern_SameRowPerm){
266:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269:       } else {
270:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271:         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272:         lu->options.Fact = SamePattern;
273:       }
274:     }
275: #if defined(PETSC_USE_COMPLEX)
276:     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277: #else
278:     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279: #endif

281:     /* Create compressed column matrix A_sup. */
282: #if defined(PETSC_USE_COMPLEX)
283:     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284: #else
285:     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286: #endif
287:   } else { /* distributed mat input */
288:     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289:     aa=(Mat_SeqAIJ*)(mat->A)->data;
290:     bb=(Mat_SeqAIJ*)(mat->B)->data;
291:     ai=aa->i; aj=aa->j;
292:     bi=bb->i; bj=bb->j;
293: #if defined(PETSC_USE_COMPLEX)
294:     av=(doublecomplex*)aa->a;
295:     bv=(doublecomplex*)bb->a;
296: #else
297:     av=aa->a;
298:     bv=bb->a;
299: #endif
300:     rstart = A->rmap.rstart;
301:     nz     = aa->nz + bb->nz;
302:     garray = mat->garray;
303: 
304:     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305: #if defined(PETSC_USE_COMPLEX)
306:       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307: #else
308:       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309: #endif
310:     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311:       if (lu->FactPattern == SamePattern_SameRowPerm){
312:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314:       } else {
315:         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316:         lu->options.Fact = SamePattern;
317:       }
318:     }
319:     nz = 0; irow = rstart;
320:     for ( i=0; i<m; i++ ) {
321:       lu->row[i] = nz;
322:       countA = ai[i+1] - ai[i];
323:       countB = bi[i+1] - bi[i];
324:       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325:       bjj = bj + bi[i];

327:       /* B part, smaller col index */
328:       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329:       jB = 0;
330:       for (j=0; j<countB; j++){
331:         jcol = garray[bjj[j]];
332:         if (jcol > colA_start) {
333:           jB = j;
334:           break;
335:         }
336:         lu->col[nz] = jcol;
337:         lu->val[nz++] = *bv++;
338:         if (j==countB-1) jB = countB;
339:       }

341:       /* A part */
342:       for (j=0; j<countA; j++){
343:         lu->col[nz] = rstart + ajj[j];
344:         lu->val[nz++] = *av++;
345:       }

347:       /* B part, larger col index */
348:       for (j=jB; j<countB; j++){
349:         lu->col[nz] = garray[bjj[j]];
350:         lu->val[nz++] = *bv++;
351:       }
352:     }
353:     lu->row[m] = nz;
354: #if defined(PETSC_USE_COMPLEX)
355:     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357: #else
358:     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360: #endif
361:   }
362:   if (lu->options.PrintStat) {
363:     PetscGetTime(&time);
364:     time0 = time - time0;
365:   }

367:   /* Factor the matrix. */
368:   PStatInit(&stat);   /* Initialize the statistics variables. */

370:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371: #if defined(PETSC_USE_COMPLEX)
372:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374: #else
375:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377: #endif 
378:   } else { /* distributed mat input */
379: #if defined(PETSC_USE_COMPLEX)
380:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383: #else
384:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387: #endif
388:   }

390:   if (lu->MatInputMode == GLOBAL && size > 1){
391:     MatDestroy(A_seq);
392:   }

394:   if (lu->options.PrintStat) {
395:     if (size > 1){
396:       MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
397:       MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
398:       MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
399:       time = time/size; /* average time */
400:       if (!rank)
401:         PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
402:                               %g / %g / %g\n",time_max,time_min,time);
403:     } else {
404:       PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
405:                               %g\n",time0);
406:     }
407: 
408:     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
409:   }
410:   PStatFree(&stat);
411:   if (size > 1){
412:     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
413:     F_diag->assembled = PETSC_TRUE;
414:   }
415:   (*F)->assembled  = PETSC_TRUE;
416:   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
417:   return(0);
418: }

420: /* Note the Petsc r and c permutations are ignored */
423: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
424: {
425:   Mat               B;
426:   Mat_SuperLU_DIST  *lu;
427:   PetscErrorCode    ierr;
428:   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
429:   PetscMPIInt       size;
430:   superlu_options_t options;
431:   PetscTruth        flg;
432:   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
433:   const char        *prtype[] = {"LargeDiag","NATURAL"};
434:   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};

437:   /* Create the factorization matrix */
438:   MatCreate(A->comm,&B);
439:   MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);
440:   MatSetType(B,A->type_name);
441:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
442:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

444:   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
445:   B->ops->solve            = MatSolve_SuperLU_DIST;
446:   B->factor                = FACTOR_LU;

448:   lu = (Mat_SuperLU_DIST*)(B->spptr);

450:   /*   Set the default input options:
451:         options.Fact = DOFACT;
452:         options.Equil = YES;
453:         options.ColPerm = MMD_AT_PLUS_A;
454:         options.RowPerm = LargeDiag;
455:         options.ReplaceTinyPivot = YES;
456:         options.Trans = NOTRANS;
457:         options.IterRefine = DOUBLE;
458:         options.SolveInitialized = NO;
459:         options.RefineInitialized = NO;
460:         options.PrintStat = YES;
461:   */
462:   set_default_options_dist(&options);

464:   MPI_Comm_dup(A->comm,&(lu->comm_superlu));
465:   MPI_Comm_size(A->comm,&size);
466: 
467:   PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");
468:     lu->npcol = (PetscMPIInt)(sqrt((PetscReal)size)); /* Default num of process columns */
469:     if (!lu->npcol) lu->npcol = 1;
470:     lu->nprow = (PetscMPIInt)(size/lu->npcol);        /* Default num of process rows */
471:     PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
472:     PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
473:     if (size != lu->nprow * lu->npcol)
474:       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
475: 
476:     lu->MatInputMode = DISTRIBUTED;
477:     PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);
478:     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;

480:     PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
481:     if (!flg) {
482:       options.Equil = NO;
483:     }

485:     PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
486:     if (flg) {
487:       switch (indx) {
488:       case 0:
489:         options.RowPerm = LargeDiag;
490:         break;
491:       case 1:
492:         options.RowPerm = NOROWPERM;
493:         break;
494:       }
495:     }

497:     PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);
498:     if (flg) {
499:       switch (indx) {
500:       case 0:
501:         options.ColPerm = MMD_AT_PLUS_A;
502:         break;
503:       case 1:
504:         options.ColPerm = NATURAL;
505:         break;
506:       case 2:
507:         options.ColPerm = MMD_ATA;
508:         break;
509:       }
510:     }

512:     PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
513:     if (!flg) {
514:       options.ReplaceTinyPivot = NO;
515:     }

517:     lu->FactPattern = SamePattern;
518:     PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
519:     if (flg) {
520:       switch (indx) {
521:       case 0:
522:         lu->FactPattern = SamePattern;
523:         break;
524:       case 1:
525:         lu->FactPattern = SamePattern_SameRowPerm;
526:         break;
527:       }
528:     }
529: 
530:     options.IterRefine = NOREFINE;
531:     PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
532:     if (flg) {
533:       options.IterRefine = DOUBLE;
534:     }

536:     if (PetscLogPrintInfo) {
537:       options.PrintStat = YES;
538:     } else {
539:       options.PrintStat = NO;
540:     }
541:     PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
542:                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
543:   PetscOptionsEnd();

545:   /* Initialize the SuperLU process grid. */
546:   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);

548:   /* Initialize ScalePermstruct and LUstruct. */
549:   ScalePermstructInit(M, N, &lu->ScalePermstruct);
550:   LUstructInit(M, N, &lu->LUstruct);

552:   lu->options             = options;
553:   lu->options.Fact        = DOFACT;
554:   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
555:   *F = B;
556:   return(0);
557: }

561: PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
562:   PetscErrorCode   ierr;
563:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);

566:   (*lu->MatAssemblyEnd)(A,mode);
567:   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
568:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
569:   return(0);
570: }

574: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
575: {
576:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
577:   superlu_options_t options;
578:   PetscErrorCode    ierr;

581:   /* check if matrix is superlu_dist type */
582:   if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);

584:   options = lu->options;
585:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
586:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
587:   PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);
588:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
589:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
590:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
591:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
592:   if (options.ColPerm == NATURAL) {
593:     PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");
594:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
595:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");
596:   } else if (options.ColPerm == MMD_ATA) {
597:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");
598:   } else {
599:     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
600:   }
601: 
602:   if (lu->FactPattern == SamePattern){
603:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");
604:   } else {
605:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");
606:   }
607:   return(0);
608: }

612: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
613: {
614:   PetscErrorCode    ierr;
615:   PetscTruth        iascii;
616:   PetscViewerFormat format;
617:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);

620:   (*lu->MatView)(A,viewer);

622:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
623:   if (iascii) {
624:     PetscViewerGetFormat(viewer,&format);
625:     if (format == PETSC_VIEWER_ASCII_INFO) {
626:       MatFactorInfo_SuperLU_DIST(A,viewer);
627:     }
628:   }
629:   return(0);
630: }


636: PetscErrorCode  MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
637: {
638:   /* This routine is only called to convert to MATSUPERLU_DIST */
639:   /* from MATSEQAIJ if A has a single process communicator */
640:   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
641:   PetscErrorCode   ierr;
642:   PetscMPIInt      size;
643:   MPI_Comm         comm;
644:   Mat              B=*newmat;
645:   Mat_SuperLU_DIST *lu;

648:   if (reuse == MAT_INITIAL_MATRIX) {
649:     MatDuplicate(A,MAT_COPY_VALUES,&B);
650:   }

652:   PetscObjectGetComm((PetscObject)A,&comm);
653:   PetscNew(Mat_SuperLU_DIST,&lu);

655:   lu->MatDuplicate         = A->ops->duplicate;
656:   lu->MatView              = A->ops->view;
657:   lu->MatAssemblyEnd       = A->ops->assemblyend;
658:   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
659:   lu->MatDestroy           = A->ops->destroy;
660:   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;

662:   B->spptr                 = (void*)lu;
663:   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
664:   B->ops->view             = MatView_SuperLU_DIST;
665:   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
666:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
667:   B->ops->destroy          = MatDestroy_SuperLU_DIST;
668:   MPI_Comm_size(comm,&size);
669:   if (size == 1) {
670:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
671:     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
672:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
673:     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
674:   } else {
675:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
676:                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
677:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
678:                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
679:   }
680:   PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");
681:   PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);
682:   *newmat = B;
683:   return(0);
684: }

689: PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
690:   PetscErrorCode   ierr;
691:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

694:   (*lu->MatDuplicate)(A,op,M);
695:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));
696:   return(0);
697: }

699: /*MC
700:   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 
701:   via the external package SuperLU_DIST.

703:   If SuperLU_DIST is installed (see the manual for
704:   instructions on how to declare the existence of external packages),
705:   a matrix type can be constructed which invokes SuperLU_DIST solvers.
706:   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).

708:   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
709:   and from MATMPIAIJ otherwise.  As a result, for single process communicators, 
710:   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 
711:   for communicators controlling multiple processes.  It is recommended that you call both of
712:   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
713:   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
714:   without data copy.

716:   Options Database Keys:
717: + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
718: . -mat_superlu_dist_r <n> - number of rows in processor partition
719: . -mat_superlu_dist_c <n> - number of columns in processor partition
720: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
721: . -mat_superlu_dist_equil - equilibrate the matrix
722: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
723: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
724: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
725: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
726: . -mat_superlu_dist_iterrefine - use iterative refinement
727: - -mat_superlu_dist_statprint - print factorization information

729:    Level: beginner

731: .seealso: PCLU
732: M*/

737: PetscErrorCode  MatCreate_SuperLU_DIST(Mat A)
738: {
740:   PetscMPIInt    size;

743:   MPI_Comm_size(A->comm,&size);
744:   if (size == 1) {
745:     MatSetType(A,MATSEQAIJ);
746:   } else {
747:     MatSetType(A,MATMPIAIJ);
748:     /*  A_diag = 0x0 ???  -- do we need it?
749:     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
750:     MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);
751:     */
752:   }
753:   MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);
754:   return(0);
755: }