Actual source code: bcgsl.c
1: #define PETSCKSP_DLL
2: /*
3: * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
4: * "Enhanced implementation of BiCGStab(L) for solving linear systems
5: * of equations". This uses tricky delayed updating ideas to prevent
6: * round-off buildup.
7: */
8: #include petscblaslapack.h
9: #include src/ksp/ksp/kspimpl.h
10: #include bcgsl.h
15: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
16: {
17: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *) ksp->data;
18: PetscScalar alpha, beta, nu, omega, sigma;
19: PetscScalar rho0, rho1;
20: PetscReal kappa0, kappaA, kappa1;
21: PetscReal ghat, epsilon, abstol;
22: PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
23: PetscTruth bUpdateX;
24: PetscTruth bBombed = PETSC_FALSE;
26: PetscInt maxit;
27: PetscInt h, i, j, k, vi, ell;
28: PetscBLASInt ldMZ,bierr;
33: /* set up temporary vectors */
34: vi = 0;
35: ell = bcgsl->ell;
36: bcgsl->vB = ksp->work[vi]; vi++;
37: bcgsl->vRt = ksp->work[vi]; vi++;
38: bcgsl->vTm = ksp->work[vi]; vi++;
39: bcgsl->vvR = ksp->work+vi; vi += ell+1;
40: bcgsl->vvU = ksp->work+vi; vi += ell+1;
41: bcgsl->vXr = ksp->work[vi]; vi++;
42: ldMZ = ell+1;
43: {
44: PetscMalloc(ldMZ*sizeof(PetscScalar), &AY0c);
45: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYlc);
46: PetscMalloc(ldMZ*sizeof(PetscScalar), &AYtc);
47: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZa);
48: PetscMalloc(ldMZ*ldMZ*sizeof(PetscScalar), &MZb);
49: }
51: /* Prime the iterative solver */
52: KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
53: VecNorm(VVR[0], NORM_2, &zeta0);
54: rnmax_computed = zeta0;
55: rnmax_true = zeta0;
57: (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
58: if (ksp->reason) {
59: PetscFree(AY0c);
60: PetscFree(AYlc);
61: PetscFree(AYtc);
62: PetscFree(MZa);
63: PetscFree(MZb);
65: return(0);
66: }
68: VecSet(VVU[0],0.0);
69: alpha = 0;
70: rho0 = omega = 1;
72: if (bcgsl->delta>0.0) {
73: VecCopy(VX, VXR);
74: VecSet(VX,0.0);
75: VecCopy(VVR[0], VB);
76: } else {
77: VecCopy(ksp->vec_rhs, VB);
78: }
80: /* Life goes on */
81: VecCopy(VVR[0], VRT);
82: zeta = zeta0;
84: KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
86: for (k=0; k<maxit; k += bcgsl->ell) {
87: PetscObjectTakeAccess(ksp);
88: ksp->its = k;
89: ksp->rnorm = zeta;
90: PetscObjectGrantAccess(ksp);
92: KSPLogResidualHistory(ksp, zeta);
93: KSPMonitor(ksp, ksp->its, zeta);
95: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
96: if (ksp->reason) break;
98: /* BiCG part */
99: rho0 = -omega*rho0;
100: nrm0 = zeta;
101: for (j=0; j<bcgsl->ell; j++) {
102: /* rho1 <- r_j' * r_tilde */
103: VecDot(VVR[j], VRT, &rho1);
104: if (rho1 == 0.0) {
105: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
106: bBombed = PETSC_TRUE;
107: break;
108: }
109: beta = alpha*(rho1/rho0);
110: rho0 = rho1;
111: nu = -beta;
112: for (i=0; i<=j; i++) {
113: /* u_i <- r_i - beta*u_i */
114: VecAYPX(VVU[i], nu, VVR[i]);
115: }
116: /* u_{j+1} <- inv(K)*A*u_j */
117: KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
119: VecDot(VVU[j+1], VRT, &sigma);
120: if (sigma == 0.0) {
121: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
122: bBombed = PETSC_TRUE;
123: break;
124: }
125: alpha = rho1/sigma;
127: /* x <- x + alpha*u_0 */
128: VecAXPY(VX, alpha, VVU[0]);
130: nu = -alpha;
131: for (i=0; i<=j; i++) {
132: /* r_i <- r_i - alpha*u_{i+1} */
133: VecAXPY(VVR[i], nu, VVU[i+1]);
134: }
136: /* r_{j+1} <- inv(K)*A*r_j */
137: KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
139: VecNorm(VVR[0], NORM_2, &nrm0);
140: if (bcgsl->delta>0.0) {
141: if (rnmax_computed<nrm0) rnmax_computed = nrm0;
142: if (rnmax_true<nrm0) rnmax_true = nrm0;
143: }
145: /* NEW: check for early exit */
146: (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
147: if (ksp->reason) {
148: PetscObjectTakeAccess(ksp);
149: ksp->its = k+j;
150: ksp->rnorm = nrm0;
151: PetscObjectGrantAccess(ksp);
152: break;
153: }
154: }
156: if (bBombed==PETSC_TRUE) break;
158: /* Polynomial part */
160: for (i=0; i<=bcgsl->ell; i++) {
161: for (j=0; j<i; j++) {
162: VecDot(VVR[j], VVR[i], &nu);
163: MZa[i+ldMZ*j] = nu;
164: MZa[j+ldMZ*i] = nu;
165: MZb[i+ldMZ*j] = nu;
166: MZb[j+ldMZ*i] = nu;
167: }
169: VecDot(VVR[i], VVR[i], &nu);
170: MZa[i+ldMZ*i] = nu;
171: MZb[i+ldMZ*i] = nu;
172: }
174: if (!bcgsl->bConvex || bcgsl->ell==1) {
175: PetscBLASInt ione = 1,bell = bcgsl->ell;
177: AY0c[0] = -1;
178: LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
179: if (ierr!=0) {
180: ksp->reason = KSP_DIVERGED_BREAKDOWN;
181: bBombed = PETSC_TRUE;
182: break;
183: }
184: BLAScopy_(&bell, &MZb[1], &ione, &AY0c[1], &ione);
185: LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
186: } else {
187: PetscBLASInt neqs = bcgsl->ell-1;
188: PetscBLASInt ione = 1;
189: PetscScalar aone = 1.0, azero = 0.0;
191: LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
192: if (ierr!=0) {
193: ksp->reason = KSP_DIVERGED_BREAKDOWN;
194: bBombed = PETSC_TRUE;
195: break;
196: }
197: BLAScopy_(&neqs, &MZb[1], &ione, &AY0c[1], &ione);
198: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
199: AY0c[0] = -1;
200: AY0c[bcgsl->ell] = 0;
202: BLAScopy_(&neqs, &MZb[1+ldMZ*(bcgsl->ell)], &ione, &AYlc[1], &ione);
203: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);
205: AYlc[0] = 0;
206: AYlc[bcgsl->ell] = -1;
208: BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);
210: kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);
212: /* round-off can cause negative kappa's */
213: if (kappa0<0) kappa0 = -kappa0;
214: kappa0 = sqrt(kappa0);
216: kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
218: BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);
220: kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
222: if (kappa1<0) kappa1 = -kappa1;
223: kappa1 = sqrt(kappa1);
225: if (kappa0!=0.0 && kappa1!=0.0) {
226: if (kappaA<0.7*kappa0*kappa1) {
227: ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
228: } else {
229: ghat = kappaA/(kappa1*kappa1);
230: }
231: for (i=0; i<=bcgsl->ell; i++) {
232: AY0c[i] = AY0c[i] - ghat* AYlc[i];
233: }
234: }
235: }
237: omega = AY0c[bcgsl->ell];
238: for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
239: omega = AY0c[h];
240: }
241: if (omega==0.0) {
242: ksp->reason = KSP_DIVERGED_BREAKDOWN;
243: break;
244: }
246: for (i=1; i<=bcgsl->ell; i++) {
247: nu = -AY0c[i];
248: VecAXPY(VVU[0], nu, VVU[i]);
249: nu = AY0c[i];
250: VecAXPY(VX, nu, VVR[i-1]);
251: nu = -AY0c[i];
252: VecAXPY(VVR[0], nu, VVR[i]);
253: }
255: VecNorm(VVR[0], NORM_2, &zeta);
257: /* Accurate Update */
258: if (bcgsl->delta>0.0) {
259: if (rnmax_computed<zeta) rnmax_computed = zeta;
260: if (rnmax_true<zeta) rnmax_true = zeta;
262: bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
263: if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
264: /* r0 <- b-inv(K)*A*X */
265: KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
266: nu = -1;
267: VecAYPX(VVR[0], nu, VB);
268: rnmax_true = zeta;
270: if (bUpdateX) {
271: nu = 1;
272: VecAXPY(VXR,nu,VX);
273: VecSet(VX,0.0);
274: VecCopy(VVR[0], VB);
275: rnmax_computed = zeta;
276: }
277: }
278: }
279: }
281: KSPMonitor(ksp, ksp->its, zeta);
283: if (bcgsl->delta>0.0) {
284: nu = 1;
285: VecAXPY(VX,nu,VXR);
286: }
288: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
289: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
291: PetscFree(AY0c);
292: PetscFree(AYlc);
293: PetscFree(AYtc);
294: PetscFree(MZa);
295: PetscFree(MZb);
296: return(0);
297: }
301: /*@C
302: KSPBCGSLSetXRes - Sets the parameter governing when
303: exact residuals will be used instead of computed residuals.
305: Collective on KSP
307: Input Parameters:
308: + ksp - iterative context obtained from KSPCreate
309: - delta - computed residuals are used alone when delta is not positive
311: Options Database Keys:
313: . -ksp_bcgsl_xres delta
315: Level: intermediate
317: .keywords: KSP, BiCGStab(L), set, exact residuals
319: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
320: @*/
321: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
322: {
323: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
327: if (ksp->setupcalled) {
328: if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
329: KSPDefaultFreeWork(ksp);
330: ksp->setupcalled = 0;
331: }
332: }
333: bcgsl->delta = delta;
334: return(0);
335: }
339: /*@C
340: KSPBCGSLSetPol - Sets the type of polynomial part will
341: be used in the BiCGSTab(L) solver.
343: Collective on KSP
345: Input Parameters:
346: + ksp - iterative context obtained from KSPCreate
347: - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
349: Options Database Keys:
351: + -ksp_bcgsl_cxpoly - use enhanced polynomial
352: . -ksp_bcgsl_mrpoly - use standard polynomial
354: Level: intermediate
356: .keywords: KSP, BiCGStab(L), set, polynomial
358: .seealso: @()
359: @*/
360: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
361: {
362: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
366: if (!ksp->setupcalled) {
367: bcgsl->bConvex = uMROR;
368: } else if (bcgsl->bConvex != uMROR) {
369: /* free the data structures,
370: then create them again
371: */
372: KSPDefaultFreeWork(ksp);
373: bcgsl->bConvex = uMROR;
374: ksp->setupcalled = 0;
375: }
376: return(0);
377: }
381: /*@C
382: KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
384: Collective on KSP
386: Input Parameters:
387: + ksp - iterative context obtained from KSPCreate
388: - ell - number of search directions
390: Options Database Keys:
392: . -ksp_bcgsl_ell ell
394: Level: intermediate
396: .keywords: KSP, BiCGStab(L), set, exact residuals,
398: .seealso: @()
399: @*/
400: PetscErrorCode KSPBCGSLSetEll(KSP ksp, int ell)
401: {
402: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
406: if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
408: if (!ksp->setupcalled) {
409: bcgsl->ell = ell;
410: } else if (bcgsl->ell != ell) {
411: /* free the data structures, then create them again */
412: KSPDefaultFreeWork(ksp);
414: bcgsl->ell = ell;
415: ksp->setupcalled = 0;
416: }
417: return(0);
418: }
422: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
423: {
424: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
425: PetscErrorCode ierr;
426: PetscTruth isascii, isstring;
429: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
430: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);
432: if (isascii) {
433: PetscViewerASCIIPrintf(viewer, " BCGSL: Ell = %D\n", bcgsl->ell);
434: PetscViewerASCIIPrintf(viewer, " BCGSL: Delta = %lg\n", bcgsl->delta);
435: } else {
436: SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
437: }
438: return(0);
439: }
443: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
444: {
445: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
447: PetscInt this_ell;
448: PetscReal delta;
449: PetscTruth flga, flg;
452: /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
453: don't need to be called here.
454: */
455: PetscOptionsHead("KSP BiCGStab(L) Options");
457: /* Set number of search directions */
458: PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
459: if (flg) {
460: KSPBCGSLSetEll(ksp, this_ell);
461: }
463: /* Set polynomial type */
464: PetscOptionsName("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", &flga);
465: if (flga) {
466: KSPBCGSLSetPol(ksp, PETSC_TRUE);
467: } else {
468: PetscOptionsName("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", &flg);
469: KSPBCGSLSetPol(ksp, PETSC_FALSE);
470: }
472: /* Will computed residual be refreshed? */
473: PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
474: if (flg) {
475: KSPBCGSLSetXRes(ksp, delta);
476: }
477: PetscOptionsTail();
478: return(0);
479: }
483: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
484: {
485: KSP_BiCGStabL *bcgsl = (KSP_BiCGStabL *)ksp->data;
486: PetscInt ell = bcgsl->ell;
490: /* Support left preconditioners only */
491: if (ksp->pc_side == PC_SYMMETRIC) {
492: SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
493: } else if (ksp->pc_side == PC_RIGHT) {
494: SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
495: }
496: KSPDefaultGetWork(ksp, 6+2*ell);
497: return(0);
498: }
500: /*MC
501: KSPBCGSL - Implements a slight variant of the Enhanced
502: BiCGStab(L) algorithm in (3) and (2). The variation
503: concerns cases when either kappa0**2 or kappa1**2 is
504: negative due to round-off. Kappa0 has also been pulled
505: out of the denominator in the formula for ghat.
507: References:
508: 1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
509: approaches for the stable computation of hybrid BiCG
510: methods", Applied Numerical Mathematics: Transactions
511: f IMACS, 19(3), pp 235-54, 1996.
512: 2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
513: "BiCGStab(L) and other hybrid Bi-CG methods",
514: Numerical Algorithms, 7, pp 75-109, 1994.
515: 3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
516: for solving linear systems of equations", preprint
517: from www.citeseer.com.
519: Contributed by: Joel M. Malard, email jm.malard@pnl.gov
521: Options Database Keys:
522: + -ksp_bcgsl_ell <ell> Number of Krylov search directions
523: - -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
524: - -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals
526: Level: beginner
528: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS
530: M*/
534: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
535: {
537: KSP_BiCGStabL *bcgsl;
540: /* allocate BiCGStab(L) context */
541: PetscNew(KSP_BiCGStabL, &bcgsl);
542: ksp->data = (void*)bcgsl;
544: ksp->pc_side = PC_LEFT;
545: ksp->ops->setup = KSPSetUp_BCGSL;
546: ksp->ops->solve = KSPSolve_BCGSL;
547: ksp->ops->destroy = KSPDefaultDestroy;
548: ksp->ops->buildsolution = KSPDefaultBuildSolution;
549: ksp->ops->buildresidual = KSPDefaultBuildResidual;
550: ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
551: ksp->ops->view = KSPView_BCGSL;
553: /* Let the user redefine the number of directions vectors */
554: bcgsl->ell = 2;
556: /*Choose between a single MR step or an averaged MR/OR */
557: bcgsl->bConvex = PETSC_FALSE;
559: /* Set the threshold for when exact residuals will be used */
560: bcgsl->delta = 0.0;
561: return(0);
562: }