Actual source code: bdfact.c

  1: /*$Id: bdfact.c,v 1.65 2001/08/07 03:02:53 balay Exp $*/

  3: /* Block diagonal matrix format - factorization and triangular solves */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/vec/vecimpl.h
 7:  #include src/inline/ilu.h

  9: int MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *B)
 10: {
 11:   PetscTruth   idn;
 12:   int          ierr;

 15:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 16:   if (isrow) {
 17:     ISIdentity(isrow,&idn);
 18:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 19:   }
 20:   if (iscol) {
 21:     ISIdentity(iscol,&idn);
 22:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 23:   }
 24:   if (info && info->levels != 0) {
 25:     SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 26:   }
 27:   MatConvert(A,MATSAME,B);

 29:   /* Must set to zero for repeated calls with different nonzero structure */
 30:   (*B)->factor = 0;
 31:   return(0);
 32: }

 34: int MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info)
 35: {
 36:   PetscTruth   idn;
 37:   int          ierr;

 40:   /* For now, no fill is allocated in symbolic factorization phase, so we
 41:      directly use the input matrix for numeric factorization. */
 42:   if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 43:   if (isrow) {
 44:     ISIdentity(isrow,&idn);
 45:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 46:   }
 47:   if (iscol) {
 48:     ISIdentity(iscol,&idn);
 49:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 50:   }
 51:   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 52:   MatLUFactorNumeric(A,&A);
 53:   return(0);
 54: }

 56: /* --------------------------------------------------------------------------*/
 57: int MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
 58: {
 59:   Mat          C = *B;
 60:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
 61:   int          k,d,d2,dgk,elim_row,elim_col,bs = a->bs,knb,knb2,bs2 = bs*bs;
 62:   int          dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock,ierr;
 63:   int          *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
 64:   PetscScalar  **dv = a->diagv,*dd = dv[mainbd],*v_work;
 65:   PetscScalar  *multiplier;

 68:   /* Copy input matrix to factored matrix if we've already factored the
 69:      matrix before AND the nonzero structure remains the same.  This is done
 70:      in symbolic factorization the first time through, but there's no symbolic
 71:      factorization for successive calls with same matrix sparsity structure. */
 72:   if (C->factor == FACTOR_LU) {
 73:     for (i=0; i<a->nd; i++) {
 74:       len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
 75:       d   = diag[i];
 76:       if (d > 0) {
 77:         PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
 78:       } else {
 79:         PetscMemcpy(dv[i],a1->diagv[i],len);
 80:       }
 81:     }
 82:   }

 84:   if (!a->pivot) {
 85:     PetscMalloc((m+1)*sizeof(int),&a->pivot);
 86:     PetscLogObjectMemory(C,m*sizeof(int));
 87:   }
 88:   ierr       = PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
 89:   multiplier = v_work + bs;
 90:   ierr       = PetscMalloc((mblock+nblock+1)*sizeof(int),&dgptr);
 91:   ierr       = PetscMemzero(dgptr,(mblock+nblock)*sizeof(int));
 92:   for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
 93:   for (k=0; k<mblock; k++) { /* k = block pivot_row */
 94:     knb = k*bs; knb2 = knb*bs;
 95:     /* invert the diagonal block */
 96:     Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
 97:     for (d=mainbd-1; d>=0; d--) {
 98:       elim_row = k + diag[d];
 99:       if (elim_row < mblock) { /* sweep down */
100:         /* dv[d][knb2]: test if entire block is zero? */
101:         Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
102:         for (d2=d+1; d2<nd; d2++) {
103:           elim_col = elim_row - diag[d2];
104:           if (elim_col >=0 && elim_col < nblock) {
105:             dgk = k - elim_col;
106:             if ((dnum = dgptr[dgk+mblock])) {
107:               Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
108:                              &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
109:             }
110:           }
111:         }
112:       }
113:     }
114:   }
115:   PetscFree(dgptr);
116:   PetscFree(v_work);
117:   C->factor = FACTOR_LU;
118:   return(0);
119: }

121: int MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
122: {
123:   Mat          C = *B;
124:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
125:   int          k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len,ierr;
126:   int          *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
127:   PetscScalar  **dv = a->diagv,*dd = dv[mainbd],mult;

130:   /* Copy input matrix to factored matrix if we've already factored the
131:      matrix before AND the nonzero structure remains the same.  This is done
132:      in symbolic factorization the first time through, but there's no symbolic
133:      factorization for successive calls with same matrix sparsity structure. */
134:   if (C->factor == FACTOR_LU) {
135:     for (i=0; i<nd; i++) {
136:       len = a->bdlen[i] * sizeof(PetscScalar);
137:       d   = diag[i];
138:       if (d > 0) {
139:         PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
140:       } else {
141:         PetscMemcpy(dv[i],a1->diagv[i],len);
142:       }
143:     }
144:   }

146:   PetscMalloc((m+n+1)*sizeof(int),&dgptr);
147:   ierr  = PetscMemzero(dgptr,(m+n)*sizeof(int));
148:   for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
149:   for (k=0; k<m; k++) { /* k = pivot_row */
150:     dd[k] = 1.0/dd[k];
151:     for (d=mainbd-1; d>=0; d--) {
152:       elim_row = k + diag[d];
153:       if (elim_row < m) { /* sweep down */
154:         if (dv[d][elim_row] != 0.0) {
155:           dv[d][elim_row] *= dd[k];
156:           mult = dv[d][elim_row];
157:           for (d2=d+1; d2<nd; d2++) {
158:             elim_col = elim_row - diag[d2];
159:             dgk = k - elim_col;
160:             if (elim_col >=0 && elim_col < n) {
161:               if ((dnum = dgptr[dgk+m])) {
162:                 dv[d2][elim_row] -= mult * dv[dnum-1][k];
163:               }
164:             }
165:           }
166:         }
167:       }
168:     }
169:   }
170:   PetscFree(dgptr);
171:   C->factor = FACTOR_LU;
172:   return(0);
173: }

175: /* -----------------------------------------------------------------*/

177: int MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
178: {
179:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
180:   int          ierr,i,d,loc,mainbd = a->mainbd;
181:   int          n = A->n,m = A->m,*diag = a->diag,col;
182:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;

185:   VecGetArray(xx,&x);
186:   VecGetArray(yy,&y);
187:   /* forward solve the lower triangular part */
188:   for (i=0; i<m; i++) {
189:     sum = x[i];
190:     for (d=0; d<mainbd; d++) {
191:       loc = i - diag[d];
192:       if (loc >= 0) sum -= dv[d][i] * y[loc];
193:     }
194:     y[i] = sum;
195:   }
196:   /* backward solve the upper triangular part */
197:   for (i=m-1; i>=0; i--) {
198:     sum = y[i];
199:     for (d=mainbd+1; d<a->nd; d++) {
200:       col = i - diag[d];
201:       if (col < n) sum -= dv[d][i] * y[col];
202:     }
203:     y[i] = sum*dd[i];
204:   }
205:   VecRestoreArray(xx,&x);
206:   VecRestoreArray(yy,&y);
207:   PetscLogFlops(2*a->nz - A->n);
208:   return(0);
209: }

211: int MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
212: {
213:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
214:   int          i,d,loc,mainbd = a->mainbd;
215:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
216:   int          ierr,m = A->m,*diag = a->diag,col;
217:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
218:   PetscScalar  w0,w1,sum0,sum1;

221:   VecGetArray(xx,&x);
222:   VecGetArray(yy,&y);
223:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

225:   /* forward solve the lower triangular part */
226:   if (mainbd != 0) {
227:     inb = 0;
228:     for (i=0; i<mblock; i++) {
229:       sum0 = sum1 = 0.0;
230:       for (d=0; d<mainbd; d++) {
231:         loc = 2*(i - diag[d]);
232:         if (loc >= 0) {
233:           dvt = &dv[d][4*i];
234:           w0 = y[loc]; w1 = y[loc+1];
235:           sum0 += dvt[0]*w0 + dvt[2]*w1;
236:           sum1 += dvt[1]*w0 + dvt[3]*w1;
237:         }
238:       }
239:       y[inb] -= sum0; y[inb+1] -= sum1;

241:       inb += 2;
242:     }
243:   }
244:   /* backward solve the upper triangular part */
245:   inb = 2*(mblock-1); inb2 = 2*inb;
246:   for (i=mblock-1; i>=0; i--) {
247:     sum0 = y[inb]; sum1 = y[inb+1];
248:     for (d=mainbd+1; d<a->nd; d++) {
249:       col = 2*(i - diag[d]);
250:       if (col < 2*nblock) {
251:         dvt = &dv[d][4*i];
252:         w0 = y[col]; w1 = y[col+1];
253:         sum0 -= dvt[0]*w0 + dvt[2]*w1;
254:         sum1 -= dvt[1]*w0 + dvt[3]*w1;
255:       }
256:     }
257:     dvt = dd+inb2;
258:     y[inb]   = dvt[0]*sum0 + dvt[2]*sum1;
259:     y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
260:     inb -= 2; inb2 -= 4;
261:   }
262:   VecRestoreArray(xx,&x);
263:   VecRestoreArray(yy,&y);
264:   PetscLogFlops(2*a->nz - A->n);
265:   return(0);
266: }

268: int MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
269: {
270:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
271:   int          i,d,loc,mainbd = a->mainbd;
272:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
273:   int          ierr,m = A->m,*diag = a->diag,col;
274:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
275:   PetscScalar  w0,w1,w2,sum0,sum1,sum2;

278:   VecGetArray(xx,&x);
279:   VecGetArray(yy,&y);
280:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

282:   /* forward solve the lower triangular part */
283:   if (mainbd != 0) {
284:     inb = 0;
285:     for (i=0; i<mblock; i++) {
286:       sum0 = sum1 = sum2 = 0.0;
287:       for (d=0; d<mainbd; d++) {
288:         loc = 3*(i - diag[d]);
289:         if (loc >= 0) {
290:           dvt = &dv[d][9*i];
291:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
292:           sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
293:           sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
294:           sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
295:         }
296:       }
297:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
298:       inb += 3;
299:     }
300:   }
301:   /* backward solve the upper triangular part */
302:   inb = 3*(mblock-1); inb2 = 3*inb;
303:   for (i=mblock-1; i>=0; i--) {
304:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2];
305:     for (d=mainbd+1; d<a->nd; d++) {
306:       col = 3*(i - diag[d]);
307:       if (col < 3*nblock) {
308:         dvt = &dv[d][9*i];
309:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
310:         sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
311:         sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
312:         sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
313:       }
314:     }
315:     dvt = dd+inb2;
316:     y[inb]   = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
317:     y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
318:     y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
319:     inb -= 3; inb2 -= 9;
320:   }
321:   VecRestoreArray(xx,&x);
322:   VecRestoreArray(yy,&y);
323:   PetscLogFlops(2*a->nz - A->n);
324:   return(0);
325: }

327: int MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
328: {
329:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
330:   int          i,d,loc,mainbd = a->mainbd;
331:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
332:   int          ierr,m = A->m,*diag = a->diag,col;
333:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
334:   PetscScalar  w0,w1,w2,w3,sum0,sum1,sum2,sum3;

337:   VecGetArray(xx,&x);
338:   VecGetArray(yy,&y);
339:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

341:   /* forward solve the lower triangular part */
342:   if (mainbd != 0) {
343:     inb = 0;
344:     for (i=0; i<mblock; i++) {
345:       sum0 = sum1 = sum2 = sum3 = 0.0;
346:       for (d=0; d<mainbd; d++) {
347:         loc = 4*(i - diag[d]);
348:         if (loc >= 0) {
349:           dvt = &dv[d][16*i];
350:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
351:           sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
352:           sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
353:           sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
354:           sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
355:         }
356:       }
357:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
358:       inb += 4;
359:     }
360:   }
361:   /* backward solve the upper triangular part */
362:   inb = 4*(mblock-1); inb2 = 4*inb;
363:   for (i=mblock-1; i>=0; i--) {
364:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2]; sum3 =  y[inb+3];
365:     for (d=mainbd+1; d<a->nd; d++) {
366:       col = 4*(i - diag[d]);
367:       if (col < 4*nblock) {
368:         dvt = &dv[d][16*i];
369:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
370:         sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
371:         sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
372:         sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
373:         sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
374:       }
375:     }
376:     dvt = dd+inb2;
377:     y[inb]   = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
378:     y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
379:     y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
380:     y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
381:     inb -= 4; inb2 -= 16;
382:   }
383:   VecRestoreArray(xx,&x);
384:   VecRestoreArray(yy,&y);
385:   PetscLogFlops(2*a->nz - A->n);
386:   return(0);
387: }

389: int MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
390: {
391:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
392:   int          i,d,loc,mainbd = a->mainbd;
393:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
394:   int          ierr,m = A->m,*diag = a->diag,col;
395:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
396:   PetscScalar  w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;

399:   VecGetArray(xx,&x);
400:   VecGetArray(yy,&y);
401:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

403:   /* forward solve the lower triangular part */
404:   if (mainbd != 0) {
405:     inb = 0;
406:     for (i=0; i<mblock; i++) {
407:       sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
408:       for (d=0; d<mainbd; d++) {
409:         loc = 5*(i - diag[d]);
410:         if (loc >= 0) {
411:           dvt = &dv[d][25*i];
412:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
413:           sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
414:           sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
415:           sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
416:           sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
417:           sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
418:         }
419:       }
420:       y[inb]   -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
421:       y[inb+4] -= sum4;
422:       inb += 5;
423:     }
424:   }
425:   /* backward solve the upper triangular part */
426:   inb = 5*(mblock-1); inb2 = 5*inb;
427:   for (i=mblock-1; i>=0; i--) {
428:     sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
429:     for (d=mainbd+1; d<a->nd; d++) {
430:       col = 5*(i - diag[d]);
431:       if (col < 5*nblock) {
432:         dvt = &dv[d][25*i];
433:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
434:         sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
435:         sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
436:         sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
437:         sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
438:         sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
439:       }
440:     }
441:     dvt = dd+inb2;
442:     y[inb]   = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
443:                + dvt[20]*sum4;
444:     y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
445:                + dvt[21]*sum4;
446:     y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
447:                + dvt[22]*sum4;
448:     y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
449:                + dvt[23]*sum4;
450:     y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
451:                + dvt[24]*sum4;
452:     inb -= 5; inb2 -= 25;
453:   }
454:   VecRestoreArray(xx,&x);
455:   VecRestoreArray(yy,&y);
456:   PetscLogFlops(2*a->nz - A->n);
457:   return(0);
458: }

460: int MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
461: {
462:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
463:   int          i,d,loc,mainbd = a->mainbd;
464:   int          mblock = a->mblock,nblock = a->nblock,inb,inb2;
465:   int          ierr,bs = a->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
466:   PetscScalar  *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
467:   PetscScalar  work[25];

470:   VecGetArray(xx,&x);
471:   VecGetArray(yy,&y);
472:   if (bs > 25) SETERRQ(PETSC_ERR_SUP,"Blocks must be smaller then 25");
473:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

475:   /* forward solve the lower triangular part */
476:   if (mainbd != 0) {
477:     inb = 0;
478:     for (i=0; i<mblock; i++) {
479:       for (d=0; d<mainbd; d++) {
480:         loc = i - diag[d];
481:         if (loc >= 0) {
482:           Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
483:         }
484:       }
485:       inb += bs;
486:     }
487:   }
488:   /* backward solve the upper triangular part */
489:   inb = bs*(mblock-1); inb2 = inb*bs;
490:   for (i=mblock-1; i>=0; i--) {
491:     for (d=mainbd+1; d<a->nd; d++) {
492:       col = i - diag[d];
493:       if (col < nblock) {
494:         Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
495:       }
496:     }
497:     Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
498:     PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
499:     inb -= bs; inb2 -= bs2;
500:   }
501:   VecRestoreArray(xx,&x);
502:   VecRestoreArray(yy,&y);
503:   PetscLogFlops(2*a->nz - A->n);
504:   return(0);
505: }