Actual source code: cholbs.c
1: #define PETSCMAT_DLL
3: #include petsc.h
5: /* We must define MLOG for BlockSolve logging */
6: #if defined(PETSC_USE_LOG)
7: #define MLOG
8: #endif
10: #include src/mat/impls/rowbs/mpi/mpirowbs.h
14: PetscErrorCode MatCholeskyFactorNumeric_MPIRowbs(Mat mat,MatFactorInfo *info,Mat *factp)
15: {
16: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
18: #if defined(PETSC_USE_LOG)
19: PetscReal flop1 = BSlocal_flops();
20: #endif
24: if (!mbs->blocksolveassembly) {
25: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
26: }
28: /* Do prep work if same nonzero structure as previously factored matrix */
29: if (mbs->factor == FACTOR_CHOLESKY) {
30: /* Copy the nonzeros */
31: BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
32: }
33: /* Form incomplete Cholesky factor */
34: mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
35: while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
36: CHKERRBS(0); mbs->failures++;
37: /* Copy only the nonzeros */
38: BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
39: /* Increment the diagonal shift */
40: mbs->alpha += 0.1;
41: BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
42: PetscInfo3(mat,"BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",mbs->failures,mbs->ierr,mbs->alpha);
43: }
44: #if defined(PETSC_USE_LOG)
45: PetscLogFlops((int)(BSlocal_flops()-flop1));
46: #endif
48: mbs->factor = FACTOR_CHOLESKY;
49: return(0);
50: }
54: PetscErrorCode MatLUFactorNumeric_MPIRowbs(Mat mat,MatFactorInfo *info,Mat *factp)
55: {
56: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
58: #if defined(PETSC_USE_LOG)
60: PetscReal flop1 = BSlocal_flops();
61: #endif
64: if (!mbs->blocksolveassembly) {
65: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
66: }
68: /* Do prep work if same nonzero structure as previously factored matrix */
69: if (mbs->factor == FACTOR_LU) {
70: /* Copy the nonzeros */
71: BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
72: }
73: /* Form incomplete Cholesky factor */
74: mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
75: while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
76: CHKERRBS(0); mbs->failures++;
77: /* Copy only the nonzeros */
78: BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
79: /* Increment the diagonal shift */
80: mbs->alpha += 0.1;
81: BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
82: PetscInfo3(mat,"BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",mbs->failures,mbs->ierr,mbs->alpha);
83: }
84: mbs->factor = FACTOR_LU;
85: (*factp)->assembled = PETSC_TRUE;
86: #if defined(PETSC_USE_LOG)
87: PetscLogFlops((int)(BSlocal_flops()-flop1));
88: #endif
89: return(0);
90: }
91: /* ------------------------------------------------------------------- */
94: PetscErrorCode MatSolve_MPIRowbs(Mat mat,Vec x,Vec y)
95: {
96: Mat submat = (Mat) mat->data;
97: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
99: PetscScalar *ya,*xa,*xworka;
101: #if defined(PETSC_USE_LOG)
102: PetscReal flop1 = BSlocal_flops();
103: #endif
106: /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
107: if (!mbs->vecs_permscale) {
108: VecGetArray(x,&xa);
109: VecGetArray(mbs->xwork,&xworka);
110: BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
111: VecRestoreArray(x,&xa);
112: VecRestoreArray(mbs->xwork,&xworka);
113: VecPointwiseMult(y,mbs->diag,mbs->xwork);
114: } else {
115: VecCopy(x,y);
116: }
118: VecGetArray(y,&ya);
119: if (mbs->procinfo->single) {
120: /* Use BlockSolve routine for no cliques/inodes */
121: BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
122: BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
123: } else {
124: BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
125: BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
126: }
127: VecRestoreArray(y,&ya);
129: /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
130: if (!mbs->vecs_permscale) {
131: VecPointwiseMult(mbs->xwork,y,mbs->diag);
132: VecGetArray(y,&ya);
133: VecGetArray(mbs->xwork,&xworka);
134: BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
135: VecRestoreArray(y,&ya);
136: VecRestoreArray(mbs->xwork,&xworka);
137: }
138: #if defined(PETSC_USE_LOG)
139: PetscLogFlops((int)(BSlocal_flops()-flop1));
140: #endif
141: return(0);
142: }
144: /* ------------------------------------------------------------------- */
147: PetscErrorCode MatForwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
148: {
149: Mat submat = (Mat) mat->data;
150: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
152: PetscScalar *ya,*xa,*xworka;
154: #if defined(PETSC_USE_LOG)
155: PetscReal flop1 = BSlocal_flops();
156: #endif
159: /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
160: if (!mbs->vecs_permscale) {
161: VecGetArray(x,&xa);
162: VecGetArray(mbs->xwork,&xworka);
163: BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
164: VecRestoreArray(x,&xa);
165: VecRestoreArray(mbs->xwork,&xworka);
166: VecPointwiseMult(y,mbs->diag,mbs->xwork);
167: } else {
168: VecCopy(x,y);
169: }
171: VecGetArray(y,&ya);
172: if (mbs->procinfo->single){
173: /* Use BlockSolve routine for no cliques/inodes */
174: BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
175: } else {
176: BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
177: }
178: VecRestoreArray(y,&ya);
180: #if defined(PETSC_USE_LOG)
181: PetscLogFlops((int)(BSlocal_flops()-flop1));
182: #endif
184: return(0);
185: }
187: /* ------------------------------------------------------------------- */
190: PetscErrorCode MatBackwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
191: {
192: Mat submat = (Mat) mat->data;
193: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
195: PetscScalar *ya,*xworka;
197: #if defined (PETSC_USE_LOG)
198: PetscReal flop1 = BSlocal_flops();
199: #endif
202: VecCopy(x,y);
204: VecGetArray(y,&ya);
205: if (mbs->procinfo->single) {
206: /* Use BlockSolve routine for no cliques/inodes */
207: BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
208: } else {
209: BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
210: }
211: VecRestoreArray(y,&ya);
213: /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
214: if (!mbs->vecs_permscale) {
215: VecPointwiseMult(mbs->xwork,y,mbs->diag);
216: VecGetArray(y,&ya);
217: VecGetArray(mbs->xwork,&xworka);
218: BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
219: VecRestoreArray(y,&ya);
220: VecRestoreArray(mbs->xwork,&xworka);
221: }
222: #if defined (PETSC_USE_LOG)
223: PetscLogFlops((int)(BSlocal_flops()-flop1));
224: #endif
225: return(0);
226: }
229: /*
230: The logging variables required by BlockSolve,
232: This is an ugly hack that allows PETSc to run properly with BlockSolve regardless
233: of whether PETSc or BlockSolve is compiled with logging turned on.
235: It is bad because it relys on BlockSolve's internals not changing related to
236: logging but we have no choice, plus it is unlikely BlockSolve will be developed
237: in the near future anyways.
238: */
239: double MLOG_flops;
240: double MLOG_event_flops;
241: double MLOG_time_stamp;
242: PetscErrorCode MLOG_sequence_num;
243: #if defined (MLOG_MAX_EVNTS)
244: MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
245: MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
246: #else
247: typedef struct __MLOG_log_type {
248: double time_stamp;
249: double total_time;
250: double flops;
251: int event_num;
252: } MLOG_log_type;
253: #define MLOG_MAX_EVNTS 1300
254: #define MLOG_MAX_ACCUM 75
255: MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
256: MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
257: #endif