Actual source code: bcgsl.c

  1: #define PETSCKSP_DLL
  2: /*
  3:  * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
  4:  * "Enhanced implementation of BiCGStab(L) for solving linear systems
  5:  * of equations". This uses tricky delayed updating ideas to prevent
  6:  * round-off buildup.
  7:  */
 8:  #include petscblaslapack.h
 9:  #include src/ksp/ksp/kspimpl.h
 10:  #include bcgsl.h


 15: static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
 16: {
 17:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *) ksp->data;
 18:   PetscScalar    alpha, beta, nu, omega, sigma;
 19:   PetscScalar    rho0, rho1;
 20:   PetscReal      kappa0, kappaA, kappa1;
 21:   PetscReal      ghat, epsilon, abstol;
 22:   PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
 23:   PetscTruth     bUpdateX;
 24:   PetscTruth     bBombed = PETSC_FALSE;

 26:   PetscInt       maxit;
 27:   PetscInt       h, i, j, k, vi, ell;
 28:   PetscBLASInt   ldMZ,bierr;


 33:   /* set up temporary vectors */
 34:   vi = 0;
 35:   ell = bcgsl->ell;
 36:   bcgsl->vB    = ksp->work[vi]; vi++;
 37:   bcgsl->vRt   = ksp->work[vi]; vi++;
 38:   bcgsl->vTm   = ksp->work[vi]; vi++;
 39:   bcgsl->vvR   = ksp->work+vi; vi += ell+1;
 40:   bcgsl->vvU   = ksp->work+vi; vi += ell+1;
 41:   bcgsl->vXr   = ksp->work[vi]; vi++;
 42:   ldMZ = ell+1;
 43:   {
 44:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
 45:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
 46:     PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
 47:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
 48:     PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
 49:   }

 51:   /* Prime the iterative solver */
 52:   KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
 53:   VecNorm(VVR[0], NORM_2, &zeta0);
 54:   rnmax_computed = zeta0;
 55:   rnmax_true = zeta0;

 57:   (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
 58:   if (ksp->reason) {
 59:     PetscFree(AY0c);
 60:     PetscFree(AYlc);
 61:     PetscFree(AYtc);
 62:     PetscFree(MZa);
 63:     PetscFree(MZb);

 65:     return(0);
 66:   }

 68:   VecSet(VVU[0],0.0);
 69:   alpha = 0;
 70:   rho0 = omega = 1;

 72:   if (bcgsl->delta>0.0) {
 73:     VecCopy(VX, VXR);
 74:     VecSet(VX,0.0);
 75:     VecCopy(VVR[0], VB);
 76:   } else {
 77:     VecCopy(ksp->vec_rhs, VB);
 78:   }

 80:   /* Life goes on */
 81:   VecCopy(VVR[0], VRT);
 82:   zeta = zeta0;

 84:   KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);

 86:   for (k=0; k<maxit; k += bcgsl->ell) {
 87:     PetscObjectTakeAccess(ksp);
 88:     ksp->its   = k;
 89:     ksp->rnorm = zeta;
 90:     PetscObjectGrantAccess(ksp);

 92:     KSPLogResidualHistory(ksp, zeta);
 93:     KSPMonitor(ksp, ksp->its, zeta);

 95:     (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
 96:     if (ksp->reason) break;

 98:     /* BiCG part */
 99:     rho0 = -omega*rho0;
100:     nrm0 = zeta;
101:     for (j=0; j<bcgsl->ell; j++) {
102:       /* rho1 <- r_j' * r_tilde */
103:       VecDot(VVR[j], VRT, &rho1);
104:       if (rho1 == 0.0) {
105:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
106:         bBombed = PETSC_TRUE;
107:         break;
108:       }
109:       beta = alpha*(rho1/rho0);
110:       rho0 = rho1;
111:       nu = -beta;
112:       for (i=0; i<=j; i++) {
113:         /* u_i <- r_i - beta*u_i */
114:         VecAYPX(VVU[i], nu, VVR[i]);
115:       }
116:       /* u_{j+1} <- inv(K)*A*u_j */
117:       KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);

119:       VecDot(VVU[j+1], VRT, &sigma);
120:       if (sigma == 0.0) {
121:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
122:         bBombed = PETSC_TRUE;
123:         break;
124:       }
125:       alpha = rho1/sigma;

127:       /* x <- x + alpha*u_0 */
128:       VecAXPY(VX, alpha, VVU[0]);

130:       nu = -alpha;
131:       for (i=0; i<=j; i++) {
132:         /* r_i <- r_i - alpha*u_{i+1} */
133:         VecAXPY(VVR[i], nu, VVU[i+1]);
134:       }

136:       /* r_{j+1} <- inv(K)*A*r_j */
137:       KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);

139:       VecNorm(VVR[0], NORM_2, &nrm0);
140:       if (bcgsl->delta>0.0) {
141:         if (rnmax_computed<nrm0) rnmax_computed = nrm0;
142:         if (rnmax_true<nrm0) rnmax_true = nrm0;
143:       }

145:       /* NEW: check for early exit */
146:       (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
147:       if (ksp->reason) {
148:         PetscObjectTakeAccess(ksp);
149:         ksp->its   = k+j;
150:         ksp->rnorm = nrm0;
151:         PetscObjectGrantAccess(ksp);
152:         break;
153:       }
154:     }

156:     if (bBombed==PETSC_TRUE) break;

158:     /* Polynomial part */

160:     for (i=0; i<=bcgsl->ell; i++) {
161:       for (j=0; j<i; j++) {
162:         VecDot(VVR[j], VVR[i], &nu);
163:         MZa[i+ldMZ*j] = nu;
164:         MZa[j+ldMZ*i] = nu;
165:         MZb[i+ldMZ*j] = nu;
166:         MZb[j+ldMZ*i] = nu;
167:       }

169:       VecDot(VVR[i], VVR[i], &nu);
170:       MZa[i+ldMZ*i] = nu;
171:       MZb[i+ldMZ*i] = nu;
172:     }

174:     if (!bcgsl->bConvex || bcgsl->ell==1) {
175:       PetscBLASInt ione = 1,bell = bcgsl->ell;

177:       AY0c[0] = -1;
178:       LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
179:       if (ierr!=0) {
180:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
181:         bBombed = PETSC_TRUE;
182:         break;
183:       }
184:       BLAScopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
185:       LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
186:     } else {
187:       PetscBLASInt neqs = bcgsl->ell-1;
188:       PetscBLASInt ione = 1;
189:       PetscScalar aone = 1.0, azero = 0.0;

191:       LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
192:       if (ierr!=0) {
193:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
194:         bBombed = PETSC_TRUE;
195:         break;
196:       }
197:       BLAScopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
198:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
199:       AY0c[0] = -1;
200:       AY0c[bcgsl->ell] = 0;

202:       BLAScopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
203:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

205:       AYlc[0] = 0;
206:       AYlc[bcgsl->ell] = -1;

208:       BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

210:       kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

212:       /* round-off can cause negative kappa's */
213:       if (kappa0<0) kappa0 = -kappa0;
214:       kappa0 = sqrt(kappa0);

216:       kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

218:       BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

220:       kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

222:       if (kappa1<0) kappa1 = -kappa1;
223:       kappa1 = sqrt(kappa1);

225:       if (kappa0!=0.0 && kappa1!=0.0) {
226:         if (kappaA<0.7*kappa0*kappa1) {
227:           ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
228:         } else {
229:           ghat = kappaA/(kappa1*kappa1);
230:         }
231:         for (i=0; i<=bcgsl->ell; i++) {
232:           AY0c[i] = AY0c[i] - ghat* AYlc[i];
233:         }
234:       }
235:     }

237:     omega = AY0c[bcgsl->ell];
238:     for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
239:       omega = AY0c[h];
240:     }
241:     if (omega==0.0) {
242:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
243:       break;
244:     }

246:     for (i=1; i<=bcgsl->ell; i++) {
247:       nu = -AY0c[i];
248:       VecAXPY(VVU[0], nu, VVU[i]);
249:       nu = AY0c[i];
250:       VecAXPY(VX, nu, VVR[i-1]);
251:       nu = -AY0c[i];
252:       VecAXPY(VVR[0], nu, VVR[i]);
253:     }

255:     VecNorm(VVR[0], NORM_2, &zeta);

257:     /* Accurate Update */
258:     if (bcgsl->delta>0.0) {
259:       if (rnmax_computed<zeta) rnmax_computed = zeta;
260:       if (rnmax_true<zeta) rnmax_true = zeta;

262:       bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
263:       if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
264:         /* r0 <- b-inv(K)*A*X */
265:         KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
266:         nu = -1;
267:         VecAYPX(VVR[0], nu, VB);
268:         rnmax_true = zeta;

270:         if (bUpdateX) {
271:           nu = 1;
272:           VecAXPY(VXR,nu,VX);
273:           VecSet(VX,0.0);
274:           VecCopy(VVR[0], VB);
275:           rnmax_computed = zeta;
276:         }
277:       }
278:     }
279:   }

281:   KSPMonitor(ksp, ksp->its, zeta);

283:   if (bcgsl->delta>0.0) {
284:     nu   = 1;
285:     VecAXPY(VX,nu,VXR);
286:   }

288:   (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
289:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;

291:   PetscFree(AY0c);
292:   PetscFree(AYlc);
293:   PetscFree(AYtc);
294:   PetscFree(MZa);
295:   PetscFree(MZb);
296:   return(0);
297: }

301: /*@C
302:    KSPBCGSLSetXRes - Sets the parameter governing when
303:    exact residuals will be used instead of computed residuals.

305:    Collective on KSP

307:    Input Parameters:
308: +  ksp - iterative context obtained from KSPCreate
309: -  delta - computed residuals are used alone when delta is not positive

311:    Options Database Keys:

313: .  -ksp_bcgsl_xres delta

315:    Level: intermediate

317: .keywords: KSP, BiCGStab(L), set, exact residuals

319: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
320: @*/
321: PetscErrorCode  KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
322: {
323:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

327:   if (ksp->setupcalled) {
328:     if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
329:       KSPDefaultFreeWork(ksp);
330:       ksp->setupcalled = 0;
331:     }
332:   }
333:   bcgsl->delta = delta;
334:   return(0);
335: }

339: /*@C
340:    KSPBCGSLSetPol - Sets the type of polynomial part will
341:    be used in the BiCGSTab(L) solver.

343:    Collective on KSP

345:    Input Parameters:
346: +  ksp - iterative context obtained from KSPCreate
347: -  uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.

349:    Options Database Keys:

351: +  -ksp_bcgsl_cxpoly - use enhanced polynomial
352: .  -ksp_bcgsl_mrpoly - use standard polynomial

354:    Level: intermediate

356: .keywords: KSP, BiCGStab(L), set, polynomial

358: .seealso: @()
359: @*/
360: PetscErrorCode  KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
361: {
362:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

366:   if (!ksp->setupcalled) {
367:     bcgsl->bConvex = uMROR;
368:   } else if (bcgsl->bConvex != uMROR) {
369:     /* free the data structures,
370:        then create them again
371:      */
372:    KSPDefaultFreeWork(ksp);
373:     bcgsl->bConvex = uMROR;
374:     ksp->setupcalled = 0;
375:   }
376:   return(0);
377: }

381: /*@C
382:    KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).

384:    Collective on KSP

386:    Input Parameters:
387: +  ksp - iterative context obtained from KSPCreate
388: -  ell - number of search directions

390:    Options Database Keys:

392: .  -ksp_bcgsl_ell ell

394:    Level: intermediate

396: .keywords: KSP, BiCGStab(L), set, exact residuals,

398: .seealso: @()
399: @*/
400: PetscErrorCode  KSPBCGSLSetEll(KSP ksp, int ell)
401: {
402:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;

406:   if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");

408:   if (!ksp->setupcalled) {
409:     bcgsl->ell = ell;
410:   } else if (bcgsl->ell != ell) {
411:     /* free the data structures, then create them again */
412:     KSPDefaultFreeWork(ksp);

414:     bcgsl->ell = ell;
415:     ksp->setupcalled = 0;
416:   }
417:   return(0);
418: }

422: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
423: {
424:   KSP_BiCGStabL       *bcgsl = (KSP_BiCGStabL *)ksp->data;
425:   PetscErrorCode      ierr;
426:   PetscTruth          isascii, isstring;

429:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
430:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);

432:   if (isascii) {
433:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Ell = %D\n", bcgsl->ell);
434:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Delta = %lg\n", bcgsl->delta);
435:   } else {
436:     SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
437:   }
438:   return(0);
439: }

443: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
444: {
445:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
447:   PetscInt       this_ell;
448:   PetscReal      delta;
449:   PetscTruth     flga, flg;

452:   /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
453:      don't need to be called here.
454:   */
455:   PetscOptionsHead("KSP BiCGStab(L) Options");

457:   /* Set number of search directions */
458:   PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
459:   if (flg) {
460:     KSPBCGSLSetEll(ksp, this_ell);
461:   }

463:   /* Set polynomial type */
464:   PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
465:   if (flga) {
466:     KSPBCGSLSetPol(ksp, PETSC_TRUE);
467:   } else {
468:     PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
469:     KSPBCGSLSetPol(ksp, PETSC_FALSE);
470:   }

472:   /* Will computed residual be refreshed? */
473:   PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
474:   if (flg) {
475:     KSPBCGSLSetXRes(ksp, delta);
476:   }
477:   PetscOptionsTail();
478:   return(0);
479: }

483: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
484: {
485:   KSP_BiCGStabL  *bcgsl = (KSP_BiCGStabL *)ksp->data;
486:   PetscInt        ell = bcgsl->ell;

490:   /* Support left preconditioners only */
491:   if (ksp->pc_side == PC_SYMMETRIC) {
492:     SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
493:   } else if (ksp->pc_side == PC_RIGHT) {
494:     SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
495:   }
496:   KSPDefaultGetWork(ksp, 6+2*ell);
497:   return(0);
498: }

500: /*MC
501:      KSPBCGSL - Implements a slight variant of the Enhanced
502:                 BiCGStab(L) algorithm in (3) and (2).  The variation
503:                 concerns cases when either kappa0**2 or kappa1**2 is
504:                 negative due to round-off. Kappa0 has also been pulled
505:                 out of the denominator in the formula for ghat.

507:     References:
508:       1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
509:          approaches for the stable computation of hybrid BiCG
510:          methods", Applied Numerical Mathematics: Transactions
511:          f IMACS, 19(3), pp 235-54, 1996.
512:       2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
513:          "BiCGStab(L) and other hybrid Bi-CG methods",
514:           Numerical Algorithms, 7, pp 75-109, 1994.
515:       3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
516:          for solving linear systems of equations", preprint
517:          from www.citeseer.com.

519:    Contributed by: Joel M. Malard, email jm.malard@pnl.gov

521:    Options Database Keys:
522: +  -ksp_bcgsl_ell <ell> Number of Krylov search directions
523: -  -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
524: -  -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals

526:    Level: beginner

528: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS

530: M*/
534: PetscErrorCode  KSPCreate_BCGSL(KSP ksp)
535: {
537:   KSP_BiCGStabL  *bcgsl;

540:   /* allocate BiCGStab(L) context */
541:   PetscNew(KSP_BiCGStabL, &bcgsl);
542:   ksp->data = (void*)bcgsl;

544:   ksp->pc_side              = PC_LEFT;
545:   ksp->ops->setup           = KSPSetUp_BCGSL;
546:   ksp->ops->solve           = KSPSolve_BCGSL;
547:   ksp->ops->destroy         = KSPDefaultDestroy;
548:   ksp->ops->buildsolution   = KSPDefaultBuildSolution;
549:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
550:   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGSL;
551:   ksp->ops->view            = KSPView_BCGSL;

553:   /* Let the user redefine the number of directions vectors */
554:   bcgsl->ell = 2;

556:   /*Choose between a single MR step or an averaged MR/OR */
557:   bcgsl->bConvex = PETSC_FALSE;

559:   /* Set the threshold for when exact residuals will be used */
560:   bcgsl->delta = 0.0;
561:   return(0);
562: }