Actual source code: bdiag2.c

  1: #define PETSCMAT_DLL

  3: /* Block diagonal matrix format */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/inline/ilu.h

 10: PetscErrorCode MatSetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
 11: {
 12:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
 13:   PetscInt       kk,ldiag,row,newnz,*bdlen_new;
 15:   PetscInt       j,k, *diag_new;
 16:   PetscTruth     roworiented = a->roworiented,dfound;
 17:   PetscScalar    value,**diagv_new;

 20:   for (kk=0; kk<m; kk++) { /* loop over added rows */
 21:     row = im[kk];
 22:     if (row < 0) continue;
 23:     if (row >= A->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->M-1);
 24:     for (j=0; j<n; j++) {
 25:       if (in[j] < 0) continue;
 26:       if (in[j] >= A->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],A->N-1);
 27:       ldiag  = row - in[j]; /* diagonal number */
 28:       dfound = PETSC_FALSE;
 29:       if (roworiented) {
 30:         value = v[j + kk*n];
 31:       } else {
 32:         value = v[kk + j*m];
 33:       }
 34:       /* search diagonals for required one */
 35:       for (k=0; k<a->nd; k++) {
 36:         if (a->diag[k] == ldiag) {
 37:           dfound = PETSC_TRUE;
 38:           if (is == ADD_VALUES) a->diagv[k][row] += value;
 39:           else                  a->diagv[k][row]  = value;
 40:           break;
 41:         }
 42:       }
 43:       if (!dfound) {
 44:         if (a->nonew || a->nonew_diag) {
 45: #if !defined(PETSC_USE_COMPLEX)
 46:           if (a->user_alloc && value) {
 47: #else
 48:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
 49: #endif
 50:             PetscLogInfo((A,"MatSetValues_SeqBDiag:Nonzero in diagonal %D that user did not allocate\n",ldiag));
 51:           }
 52:         } else {
 53:           PetscLogInfo((A,"MatSetValues_SeqBDiag: Allocating new diagonal: %D\n",ldiag));
 54:           a->reallocs++;
 55:           /* free old bdiag storage info and reallocate */
 56:           PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
 57:           bdlen_new = diag_new + a->nd + 1;
 58:           PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
 59:           for (k=0; k<a->nd; k++) {
 60:             diag_new[k]  = a->diag[k];
 61:             diagv_new[k] = a->diagv[k];
 62:             bdlen_new[k] = a->bdlen[k];
 63:           }
 64:           diag_new[a->nd]  = ldiag;
 65:           if (ldiag > 0) { /* lower triangular */
 66:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
 67:           } else {         /* upper triangular */
 68:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
 69:           }
 70:           newnz = bdlen_new[a->nd];
 71:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
 72:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
 73:           /* adjust pointers so that dv[diag][row] works for all diagonals*/
 74:           if (diag_new[a->nd] > 0) {
 75:             diagv_new[a->nd] -= diag_new[a->nd];
 76:           }
 77:           a->maxnz += newnz;
 78:           a->nz    += newnz;
 79:           PetscFree(a->diagv);
 80:           PetscFree(a->diag);
 81:           a->diag  = diag_new;
 82:           a->bdlen = bdlen_new;
 83:           a->diagv = diagv_new;

 85:           /* Insert value */
 86:           if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
 87:           else                  a->diagv[a->nd][row] = value;
 88:           a->nd++;
 89:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
 90:         }
 91:       }
 92:     }
 93:   }
 94:   return(0);
 95: }


100: PetscErrorCode MatSetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
101: {
102:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
104:   PetscInt       kk,ldiag,shift,row,newnz,*bdlen_new;
105:   PetscInt       j,k,bs = A->bs,*diag_new,idx=0;
106:   PetscTruth     roworiented = a->roworiented,dfound;
107:   PetscScalar    value,**diagv_new;

110:   for (kk=0; kk<m; kk++) { /* loop over added rows */
111:     row = im[kk];
112:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
113:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
114:     shift = (row/bs)*bs*bs + row%bs;
115:     for (j=0; j<n; j++) {
116:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
117:       dfound = PETSC_FALSE;
118:       if (roworiented) {
119:         value = v[idx++];
120:       } else {
121:         value = v[kk + j*m];
122:       }
123:       /* seach for appropriate diagonal */
124:       for (k=0; k<a->nd; k++) {
125:         if (a->diag[k] == ldiag) {
126:           dfound = PETSC_TRUE;
127:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
128:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
129:           break;
130:         }
131:       }
132:       if (!dfound) {
133:         if (a->nonew || a->nonew_diag) {
134: #if !defined(PETSC_USE_COMPLEX)
135:           if (a->user_alloc && value) {
136: #else
137:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
138: #endif
139:             PetscLogInfo((A,"MatSetValues_SeqBDiag:Nonzero in diagonal %D that user did not allocate\n",ldiag));
140:           }
141:         } else {
142:           PetscLogInfo((A,"MatSetValues_SeqBDiag: Allocating new diagonal: %D\n",ldiag));
143:           a->reallocs++;
144:           /* free old bdiag storage info and reallocate */
145:           PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
146:           bdlen_new = diag_new + a->nd + 1;
147:           PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
148:           for (k=0; k<a->nd; k++) {
149:             diag_new[k]  = a->diag[k];
150:             diagv_new[k] = a->diagv[k];
151:             bdlen_new[k] = a->bdlen[k];
152:           }
153:           diag_new[a->nd]  = ldiag;
154:           if (ldiag > 0) {/* lower triangular */
155:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
156:           } else {         /* upper triangular */
157:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
158:           }
159:           newnz = bs*bs*bdlen_new[a->nd];
160:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
161:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
162:           /* adjust pointer so that dv[diag][row] works for all diagonals */
163:           if (diag_new[a->nd] > 0) {
164:             diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
165:           }
166:           a->maxnz += newnz; a->nz += newnz;
167:           PetscFree(a->diagv);
168:           PetscFree(a->diag);
169:           a->diag  = diag_new;
170:           a->bdlen = bdlen_new;
171:           a->diagv = diagv_new;

173:           /* Insert value */
174:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
175:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
176:           a->nd++;
177:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
178:         }
179:       }
180:     }
181:   }
182:   return(0);
183: }

187: PetscErrorCode MatGetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
188: {
189:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
190:   PetscInt     kk,ldiag,row,j,k;
191:   PetscScalar  zero = 0.0;
192:   PetscTruth   dfound;

195:   for (kk=0; kk<m; kk++) { /* loop over rows */
196:     row = im[kk];
197:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
198:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
199:     for (j=0; j<n; j++) {
200:       if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
201:       if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
202:       ldiag = row - in[j]; /* diagonal number */
203:       dfound = PETSC_FALSE;
204:       for (k=0; k<a->nd; k++) {
205:         if (a->diag[k] == ldiag) {
206:           dfound = PETSC_TRUE;
207:           *v++ = a->diagv[k][row];
208:           break;
209:         }
210:       }
211:       if (!dfound) *v++ = zero;
212:     }
213:   }
214:   return(0);
215: }

219: PetscErrorCode MatGetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
220: {
221:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
222:   PetscInt     kk,ldiag,shift,row,j,k,bs = A->bs;
223:   PetscScalar  zero = 0.0;
224:   PetscTruth   dfound;

227:   for (kk=0; kk<m; kk++) { /* loop over rows */
228:     row = im[kk];
229:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
230:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
231:     shift = (row/bs)*bs*bs + row%bs;
232:     for (j=0; j<n; j++) {
233:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
234:       dfound = PETSC_FALSE;
235:       for (k=0; k<a->nd; k++) {
236:         if (a->diag[k] == ldiag) {
237:           dfound = PETSC_TRUE;
238:           *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
239:           break;
240:         }
241:       }
242:       if (!dfound) *v++ = zero;
243:     }
244:   }
245:   return(0);
246: }

248: /*
249:     MatMults for blocksize 1 to 5 and N -------------------------------
250:  */
253: PetscErrorCode MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
254: {
255:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
256:   PetscInt       nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
258:   PetscInt       d,j,len;
259:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
260:   PetscScalar    *pvin,*pvout,*dv;

263:   VecGetArray(xx,&vin);
264:   VecGetArray(yy,&vout);
265:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
266:   for (d=0; d<nd; d++) {
267:     dv   = a_diagv[d];
268:     diag = a_diag[d];
269:     len  = a_bdlen[d];
270:     if (diag > 0) {             /* lower triangle */
271:       pvin  = vin;
272:       pvout = vout + diag;
273:       dv    = dv   + diag;
274:     } else {                     /* upper triangle,including main diagonal */
275:       pvin  = vin - diag;
276:       pvout = vout;
277:     }
278:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
279:     PetscLogFlops(2*len);
280:   }
281:   VecRestoreArray(xx,&vin);
282:   VecRestoreArray(yy,&vout);
283:   return(0);
284: }

288: PetscErrorCode MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
289: {
290:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
291:   PetscInt       nd = a->nd,nb_diag;
293:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
294:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
295:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1;

298:   VecGetArray(xx,&vin);
299:   VecGetArray(yy,&vout);
300:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
301:   for (d=0; d<nd; d++) {
302:     dv      = a_diagv[d];
303:     nb_diag = 2*a_diag[d];
304:     len     = a_bdlen[d];
305:     if (nb_diag > 0) {                /* lower triangle */
306:       pvin  = vin;
307:       pvout = vout + nb_diag;
308:       dv    = dv   + 2*nb_diag;
309:     } else {                       /* upper triangle, including main diagonal */
310:       pvin  = vin - nb_diag;
311:       pvout = vout;
312:     }
313:     for (k=0; k<len; k++) {
314:       pvin0     = pvin[0]; pvin1 = pvin[1];

316:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
317:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

319:       pvout += 2; pvin += 2; dv += 4;
320:     }
321:     PetscLogFlops(8*len);
322:   }
323:   VecRestoreArray(xx,&vin);
324:   VecRestoreArray(yy,&vout);
325:   return(0);
326: }

330: PetscErrorCode MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
331: {
332:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
333:   PetscInt       nd = a->nd,nb_diag;
335:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
336:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
337:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

340:   VecGetArray(xx,&vin);
341:   VecGetArray(yy,&vout);
342:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
343:   for (d=0; d<nd; d++) {
344:     dv      = a_diagv[d];
345:     nb_diag = 3*a_diag[d];
346:     len     = a_bdlen[d];
347:     if (nb_diag > 0) {                /* lower triangle */
348:       pvin  = vin;
349:       pvout = vout + nb_diag;
350:       dv    = dv   + 3*nb_diag;
351:     } else {                       /* upper triangle,including main diagonal */
352:       pvin  = vin - nb_diag;
353:       pvout = vout;
354:     }
355:     for (k=0; k<len; k++) {
356:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

358:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
359:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
360:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

362:       pvout += 3; pvin += 3; dv += 9;
363:     }
364:     PetscLogFlops(18*len);
365:   }
366:   VecRestoreArray(xx,&vin);
367:   VecRestoreArray(yy,&vout);
368:   return(0);
369: }

373: PetscErrorCode MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
374: {
375:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
376:   PetscInt       nd = a->nd,nb_diag;
378:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
379:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
380:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

383:   VecGetArray(xx,&vin);
384:   VecGetArray(yy,&vout);
385:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
386:   for (d=0; d<nd; d++) {
387:     dv      = a_diagv[d];
388:     nb_diag = 4*a_diag[d];
389:     len     = a_bdlen[d];
390:     if (nb_diag > 0) {                /* lower triangle */
391:       pvin  = vin;
392:       pvout = vout + nb_diag;
393:       dv    = dv   + 4*nb_diag;
394:     } else {                       /* upper triangle,including main diagonal */
395:       pvin  = vin - nb_diag;
396:       pvout = vout;
397:     }
398:     for (k=0; k<len; k++) {
399:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

401:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
402:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
403:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
404:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

406:       pvout += 4; pvin += 4; dv += 16;
407:     }
408:     PetscLogFlops(32*len);
409:   }
410:   VecRestoreArray(xx,&vin);
411:   VecRestoreArray(yy,&vout);
412:   return(0);
413: }

417: PetscErrorCode MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
418: {
419:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
420:   PetscInt       nd = a->nd,nb_diag;
422:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
423:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
424:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

427:   VecGetArray(xx,&vin);
428:   VecGetArray(yy,&vout);
429:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
430:   for (d=0; d<nd; d++) {
431:     dv      = a_diagv[d];
432:     nb_diag = 5*a_diag[d];
433:     len     = a_bdlen[d];
434:     if (nb_diag > 0) {                /* lower triangle */
435:       pvin  = vin;
436:       pvout = vout + nb_diag;
437:       dv    = dv   + 5*nb_diag;
438:     } else {                       /* upper triangle,including main diagonal */
439:       pvin  = vin - nb_diag;
440:       pvout = vout;
441:     }
442:     for (k=0; k<len; k++) {
443:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

445:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
446:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
447:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
448:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
449:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

451:       pvout += 5; pvin += 5; dv += 25;
452:     }
453:     PetscLogFlops(50*len);
454:   }
455:   VecRestoreArray(xx,&vin);
456:   VecRestoreArray(yy,&vout);
457:   return(0);
458: }

462: PetscErrorCode MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
463: {
464:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
465:   PetscInt       nd = a->nd,bs = A->bs,nb_diag,bs2 = bs*bs;
467:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
468:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
469:   PetscScalar    *pvin,*pvout,*dv;

472:   VecGetArray(xx,&vin);
473:   VecGetArray(yy,&vout);
474:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
475:   for (d=0; d<nd; d++) {
476:     dv      = a_diagv[d];
477:     nb_diag = bs*a_diag[d];
478:     len     = a_bdlen[d];
479:     if (nb_diag > 0) {                /* lower triangle */
480:       pvin  = vin;
481:       pvout = vout + nb_diag;
482:       dv    = dv   + bs*nb_diag;
483:     } else {                       /* upper triangle, including main diagonal */
484:       pvin  = vin - nb_diag;
485:       pvout = vout;
486:     }
487:     for (k=0; k<len; k++) {
488:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
489:       pvout += bs; pvin += bs; dv += bs2;
490:     }
491:     PetscLogFlops(2*bs2*len);
492:   }
493:   VecRestoreArray(xx,&vin);
494:   VecRestoreArray(yy,&vout);
495:   return(0);
496: }

498: /*
499:     MatMultAdds for blocksize 1 to 5 and N -------------------------------
500:  */
503: PetscErrorCode MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
504: {
505:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
507:   PetscInt       nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
508:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
509:   PetscScalar    *pvin,*pvout,*dv;

512:   if (zz != yy) {VecCopy(zz,yy);}
513:   VecGetArray(xx,&vin);
514:   VecGetArray(yy,&vout);
515:   for (d=0; d<nd; d++) {
516:     dv   = a_diagv[d];
517:     diag = a_diag[d];
518:     len  = a_bdlen[d];
519:     if (diag > 0) {             /* lower triangle */
520:       pvin  = vin;
521:       pvout = vout + diag;
522:       dv    = dv   + diag;
523:     } else {                     /* upper triangle, including main diagonal */
524:       pvin  = vin - diag;
525:       pvout = vout;
526:     }
527:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
528:     PetscLogFlops(2*len);
529:   }
530:   VecRestoreArray(xx,&vin);
531:   VecRestoreArray(yy,&vout);
532:   return(0);
533: }

537: PetscErrorCode MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
538: {
539:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
541:   PetscInt       nd = a->nd,nb_diag;
542:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
543:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
544:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1;

547:   if (zz != yy) {VecCopy(zz,yy);}
548:   VecGetArray(xx,&vin);
549:   VecGetArray(yy,&vout);
550:   for (d=0; d<nd; d++) {
551:     dv      = a_diagv[d];
552:     nb_diag = 2*a_diag[d];
553:     len     = a_bdlen[d];
554:     if (nb_diag > 0) {                /* lower triangle */
555:       pvin  = vin;
556:       pvout = vout + nb_diag;
557:       dv    = dv   + 2*nb_diag;
558:     } else {                       /* upper triangle, including main diagonal */
559:       pvin  = vin - nb_diag;
560:       pvout = vout;
561:     }
562:     for (k=0; k<len; k++) {
563:       pvin0 = pvin[0]; pvin1 = pvin[1];

565:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
566:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

568:       pvout += 2; pvin += 2; dv += 4;
569:     }
570:     PetscLogFlops(8*len);
571:   }
572:   VecRestoreArray(xx,&vin);
573:   VecRestoreArray(yy,&vout);
574:   return(0);
575: }

579: PetscErrorCode MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
580: {
581:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
583:   PetscInt       nd = a->nd,nb_diag;
584:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
585:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
586:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

589:   if (zz != yy) {VecCopy(zz,yy);}
590:   VecGetArray(xx,&vin);
591:   VecGetArray(yy,&vout);
592:   for (d=0; d<nd; d++) {
593:     dv      = a_diagv[d];
594:     nb_diag = 3*a_diag[d];
595:     len     = a_bdlen[d];
596:     if (nb_diag > 0) {                /* lower triangle */
597:       pvin  = vin;
598:       pvout = vout + nb_diag;
599:       dv    = dv   + 3*nb_diag;
600:     } else {                       /* upper triangle, including main diagonal */
601:       pvin  = vin - nb_diag;
602:       pvout = vout;
603:     }
604:     for (k=0; k<len; k++) {
605:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

607:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
608:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
609:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

611:       pvout += 3; pvin += 3; dv += 9;
612:     }
613:     PetscLogFlops(18*len);
614:   }
615:   VecRestoreArray(xx,&vin);
616:   VecRestoreArray(yy,&vout);
617:   return(0);
618: }

622: PetscErrorCode MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
623: {
624:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
626:   PetscInt       nd = a->nd,nb_diag;
627:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
628:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
629:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

632:   if (zz != yy) {VecCopy(zz,yy);}
633:   VecGetArray(xx,&vin);
634:   VecGetArray(yy,&vout);
635:   for (d=0; d<nd; d++) {
636:     dv      = a_diagv[d];
637:     nb_diag = 4*a_diag[d];
638:     len     = a_bdlen[d];
639:     if (nb_diag > 0) {                /* lower triangle */
640:       pvin  = vin;
641:       pvout = vout + nb_diag;
642:       dv    = dv   + 4*nb_diag;
643:     } else {                       /* upper triangle, including main diagonal */
644:       pvin  = vin - nb_diag;
645:       pvout = vout;
646:     }
647:     for (k=0; k<len; k++) {
648:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

650:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
651:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
652:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
653:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

655:       pvout += 4; pvin += 4; dv += 16;
656:     }
657:     PetscLogFlops(32*len);
658:   }
659:   VecRestoreArray(xx,&vin);
660:   VecRestoreArray(yy,&vout);
661:   return(0);
662: }

666: PetscErrorCode MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
667: {
668:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
670:   PetscInt       nd = a->nd,nb_diag;
671:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
672:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
673:   PetscScalar    *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

676:   if (zz != yy) {VecCopy(zz,yy);}
677:   VecGetArray(xx,&vin);
678:   VecGetArray(yy,&vout);
679:   for (d=0; d<nd; d++) {
680:     dv      = a_diagv[d];
681:     nb_diag = 5*a_diag[d];
682:     len     = a_bdlen[d];
683:     if (nb_diag > 0) {                /* lower triangle */
684:       pvin  = vin;
685:       pvout = vout + nb_diag;
686:       dv    = dv   + 5*nb_diag;
687:     } else {                       /* upper triangle, including main diagonal */
688:       pvin  = vin - nb_diag;
689:       pvout = vout;
690:     }
691:     for (k=0; k<len; k++) {
692:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

694:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
695:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
696:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
697:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
698:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

700:       pvout += 5; pvin += 5; dv += 25;
701:     }
702:     PetscLogFlops(50*len);
703:   }
704:   VecRestoreArray(xx,&vin);
705:   VecRestoreArray(yy,&vout);
706:   return(0);
707: }

711: PetscErrorCode MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
712: {
713:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
715:   PetscInt       nd = a->nd,bs = A->bs,nb_diag,bs2 = bs*bs;
716:   PetscInt       *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
717:   PetscScalar    *vin,*vout,**a_diagv = a->diagv;
718:   PetscScalar    *pvin,*pvout,*dv;

721:   if (zz != yy) {VecCopy(zz,yy);}
722:   VecGetArray(xx,&vin);
723:   VecGetArray(yy,&vout);
724:   for (d=0; d<nd; d++) {
725:     dv      = a_diagv[d];
726:     nb_diag = bs*a_diag[d];
727:     len     = a_bdlen[d];
728:     if (nb_diag > 0) {                /* lower triangle */
729:       pvin  = vin;
730:       pvout = vout + nb_diag;
731:       dv    = dv   + bs*nb_diag;
732:     } else {                       /* upper triangle, including main diagonal */
733:       pvin  = vin - nb_diag;
734:       pvout = vout;
735:     }
736:     for (k=0; k<len; k++) {
737:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
738:       pvout += bs; pvin += bs; dv += bs2;
739:     }
740:     PetscLogFlops(2*bs2*len);
741:   }
742:   VecRestoreArray(xx,&vin);
743:   VecRestoreArray(yy,&vout);
744:   return(0);
745: }

747: /*
748:      MatMultTranspose ----------------------------------------------
749:  */
752: PetscErrorCode MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
753: {
754:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
756:   PetscInt       nd = a->nd,diag,d,j,len;
757:   PetscScalar    *pvin,*pvout,*dv;
758:   PetscScalar    *vin,*vout;
759: 
761:   VecGetArray(xx,&vin);
762:   VecGetArray(yy,&vout);
763:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
764:   for (d=0; d<nd; d++) {
765:     dv   = a->diagv[d];
766:     diag = a->diag[d];
767:     len  = a->bdlen[d];
768:       /* diag of original matrix is (row/bs - col/bs) */
769:       /* diag of transpose matrix is (col/bs - row/bs) */
770:     if (diag < 0) {        /* transpose is lower triangle */
771:       pvin  = vin;
772:       pvout = vout - diag;
773:     } else {        /* transpose is upper triangle, including main diagonal */
774:       pvin  = vin + diag;
775:       pvout = vout;
776:       dv    = dv + diag;
777:     }
778:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
779:   }
780:   VecRestoreArray(xx,&vin);
781:   VecRestoreArray(yy,&vout);
782:   return(0);
783: }

787: PetscErrorCode MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
788: {
789:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
791:   PetscInt       nd = a->nd,bs = A->bs,diag,kshift,kloc,d,i,j,k,len;
792:   PetscScalar    *pvin,*pvout,*dv;
793:   PetscScalar    *vin,*vout;
794: 
796:   VecGetArray(xx,&vin);
797:   VecGetArray(yy,&vout);
798:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
799:   for (d=0; d<nd; d++) {
800:     dv   = a->diagv[d];
801:     diag = a->diag[d];
802:     len  = a->bdlen[d];
803:       /* diag of original matrix is (row/bs - col/bs) */
804:       /* diag of transpose matrix is (col/bs - row/bs) */
805:     if (diag < 0) {        /* transpose is lower triangle */
806:       pvin  = vin;
807:       pvout = vout - bs*diag;
808:     } else {        /* transpose is upper triangle, including main diagonal */
809:       pvin  = vin + bs*diag;
810:       pvout = vout;
811:       dv    = dv + diag;
812:     }
813:     for (k=0; k<len; k++) {
814:       kloc = k*bs; kshift = kloc*bs;
815:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
816:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
817:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
818:         }
819:       }
820:     }
821:   }
822:   VecRestoreArray(xx,&vin);
823:   VecRestoreArray(yy,&vout);
824:   return(0);
825: }

827: /*
828:      MatMultTransposeAdd ----------------------------------------------
829:  */
832: PetscErrorCode MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
833: {
834:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
836:   PetscInt       nd = a->nd,diag,d,j,len;
837:   PetscScalar    *pvin,*pvout,*dv;
838:   PetscScalar    *vin,*vout;
839: 
841:   if (zz != yy) {VecCopy(zz,yy);}
842:   VecGetArray(xx,&vin);
843:   VecGetArray(yy,&vout);
844:   for (d=0; d<nd; d++) {
845:     dv   = a->diagv[d];
846:     diag = a->diag[d];
847:     len  = a->bdlen[d];
848:       /* diag of original matrix is (row/bs - col/bs) */
849:       /* diag of transpose matrix is (col/bs - row/bs) */
850:     if (diag < 0) {        /* transpose is lower triangle */
851:       pvin  = vin;
852:       pvout = vout - diag;
853:     } else {        /* transpose is upper triangle, including main diagonal */
854:       pvin  = vin + diag;
855:       pvout = vout;
856:       dv    = dv + diag;
857:     }
858:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
859:   }
860:   VecRestoreArray(xx,&vin);
861:   VecRestoreArray(yy,&vout);
862:   return(0);
863: }

867: PetscErrorCode MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
868: {
869:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
871:   PetscInt       nd = a->nd,bs = A->bs,diag,kshift,kloc,d,i,j,k,len;
872:   PetscScalar    *pvin,*pvout,*dv;
873:   PetscScalar    *vin,*vout;
874: 
876:   if (zz != yy) {VecCopy(zz,yy);}
877:   VecGetArray(xx,&vin);
878:   VecGetArray(yy,&vout);
879:   for (d=0; d<nd; d++) {
880:     dv   = a->diagv[d];
881:     diag = a->diag[d];
882:     len  = a->bdlen[d];
883:       /* diag of original matrix is (row/bs - col/bs) */
884:       /* diag of transpose matrix is (col/bs - row/bs) */
885:     if (diag < 0) {        /* transpose is lower triangle */
886:       pvin  = vin;
887:       pvout = vout - bs*diag;
888:     } else {        /* transpose is upper triangle, including main diagonal */
889:       pvin  = vin + bs*diag;
890:       pvout = vout;
891:       dv    = dv + diag;
892:     }
893:     for (k=0; k<len; k++) {
894:       kloc = k*bs; kshift = kloc*bs;
895:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
896:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
897:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
898:         }
899:       }
900:     }
901:   }
902:   VecRestoreArray(xx,&vin);
903:   VecRestoreArray(yy,&vout);
904:   return(0);
905: }
908: PetscErrorCode MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
909: {
910:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
911:   PetscScalar    *x,*b,*xb,*dd,*dv,dval,sum;
913:   PetscInt       i,j,k,d,kbase,bs = A->bs,kloc;
914:   PetscInt       mainbd = a->mainbd,diag,mblock = a->mblock,bloc;

917:   its = its*lits;
918:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);

920:   /* Currently this code doesn't use wavefront orderings, although
921:      we should eventually incorporate that option, whatever wavefront
922:      ordering maybe :-) */

924:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");

926:   VecGetArray(xx,&x);
927:   if (xx != bb) {
928:     VecGetArray(bb,&b);
929:   } else {
930:     b = x;
931:   }
932:   dd = a->diagv[mainbd];
933:   if (flag == SOR_APPLY_UPPER) {
934:     /* apply (U + D/omega) to the vector */
935:     for (k=0; k<mblock; k++) {
936:       kloc = k*bs; kbase = kloc*bs;
937:       for (i=0; i<bs; i++) {
938:         sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
939:         for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
940:         for (d=mainbd+1; d<a->nd; d++) {
941:           diag = a->diag[d];
942:           dv   = a->diagv[d];
943:           if (k-diag < mblock) {
944:             for (j=0; j<bs; j++) {
945:               sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
946:             }
947:           }
948:         }
949:         x[kloc+i] = sum;
950:       }
951:     }
952:     VecRestoreArray(xx,&x);
953:     if (xx != bb) {VecRestoreArray(bb,&b);}
954:     return(0);
955:   }
956:   if (flag & SOR_ZERO_INITIAL_GUESS) {
957:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
958:       for (k=0; k<mblock; k++) {
959:         kloc = k*bs; kbase = kloc*bs;
960:         for (i=0; i<bs; i++) {
961:           sum  = b[i+kloc];
962:           dval = shift + dd[i*(bs+1)+kbase];
963:           for (d=0; d<mainbd; d++) {
964:             diag = a->diag[d];
965:             dv   = a->diagv[d];
966:             if (k >= diag) {
967:               for (j=0; j<bs; j++)
968:                 sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
969:             }
970:           }
971:           for (j=0; j<i; j++){
972:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
973:           }
974:           x[kloc+i] = omega*sum/dval;
975:         }
976:       }
977:       xb = x;
978:     } else xb = b;
979:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
980:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
981:       for (k=0; k<mblock; k++) {
982:         kloc = k*bs; kbase = kloc*bs;
983:         for (i=0; i<bs; i++)
984:           x[kloc+i] *= dd[i*(bs+1)+kbase];
985:       }
986:     }
987:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
988:       for (k=mblock-1; k>=0; k--) {
989:         kloc = k*bs; kbase = kloc*bs;
990:         for (i=bs-1; i>=0; i--) {
991:           sum  = xb[i+kloc];
992:           dval = shift + dd[i*(bs+1)+kbase];
993:           for (j=i+1; j<bs; j++)
994:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
995:           for (d=mainbd+1; d<a->nd; d++) {
996:             diag = a->diag[d];
997:             dv   = a->diagv[d];
998:             bloc = k - diag;
999:             if (bloc < mblock) {
1000:               for (j=0; j<bs; j++)
1001:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1002:             }
1003:           }
1004:           x[kloc+i] = omega*sum/dval;
1005:         }
1006:       }
1007:     }
1008:     its--;
1009:   }
1010:   while (its--) {
1011:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1012:       for (k=0; k<mblock; k++) {
1013:         kloc = k*bs; kbase = kloc*bs;
1014:         for (i=0; i<bs; i++) {
1015:           sum  = b[i+kloc];
1016:           dval = shift + dd[i*(bs+1)+kbase];
1017:           for (d=0; d<mainbd; d++) {
1018:             diag = a->diag[d];
1019:             dv   = a->diagv[d];
1020:             bloc = k - diag;
1021:             if (bloc >= 0) {
1022:               for (j=0; j<bs; j++) {
1023:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1024:               }
1025:             }
1026:           }
1027:           for (d=mainbd; d<a->nd; d++) {
1028:             diag = a->diag[d];
1029:             dv   = a->diagv[d];
1030:             bloc = k - diag;
1031:             if (bloc < mblock) {
1032:               for (j=0; j<bs; j++) {
1033:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1034:               }
1035:             }
1036:           }
1037:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1038:         }
1039:       }
1040:     }
1041:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1042:       for (k=mblock-1; k>=0; k--) {
1043:         kloc = k*bs; kbase = kloc*bs;
1044:         for (i=bs-1; i>=0; i--) {
1045:           sum  = b[i+kloc];
1046:           dval = shift + dd[i*(bs+1)+kbase];
1047:           for (d=0; d<mainbd; d++) {
1048:             diag = a->diag[d];
1049:             dv   = a->diagv[d];
1050:             bloc = k - diag;
1051:             if (bloc >= 0) {
1052:               for (j=0; j<bs; j++) {
1053:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1054:               }
1055:             }
1056:           }
1057:           for (d=mainbd; d<a->nd; d++) {
1058:             diag = a->diag[d];
1059:             dv   = a->diagv[d];
1060:             bloc = k - diag;
1061:             if (bloc < mblock) {
1062:               for (j=0; j<bs; j++) {
1063:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1064:               }
1065:             }
1066:           }
1067:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1068:         }
1069:       }
1070:     }
1071:   }
1072:   VecRestoreArray(xx,&x);
1073:   if (xx != bb) VecRestoreArray(bb,&b);
1074:   return(0);
1075: }

1079: PetscErrorCode MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
1080: {
1081:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
1082:   PetscScalar    *x,*b,*xb,*dd,dval,sum;
1084:   PetscInt       m = A->m,i,d,loc;
1085:   PetscInt       mainbd = a->mainbd,diag;

1088:   its = its*lits;
1089:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1090:   /* Currently this code doesn't use wavefront orderings,although
1091:      we should eventually incorporate that option, whatever wavefront
1092:      ordering maybe :-) */

1094:   VecGetArray(xx,&x);
1095:   VecGetArray(bb,&b);
1096:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1097:   dd = a->diagv[mainbd];
1098:   if (flag == SOR_APPLY_UPPER) {
1099:     /* apply (U + D/omega) to the vector */
1100:     for (i=0; i<m; i++) {
1101:       sum = b[i] * (shift + dd[i]) / omega;
1102:       for (d=mainbd+1; d<a->nd; d++) {
1103:         diag = a->diag[d];
1104:         if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1105:       }
1106:       x[i] = sum;
1107:     }
1108:     VecRestoreArray(xx,&x);
1109:     VecRestoreArray(bb,&b);
1110:     return(0);
1111:   }
1112:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1113:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1114:       for (i=0; i<m; i++) {
1115:         sum  = b[i];
1116:         for (d=0; d<mainbd; d++) {
1117:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1118:         }
1119:         x[i] = omega*(sum/(shift + dd[i]));
1120:       }
1121:       xb = x;
1122:     } else xb = b;
1123:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1124:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1125:       for (i=0; i<m; i++) x[i] *= dd[i];
1126:     }
1127:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1128:       for (i=m-1; i>=0; i--) {
1129:         sum = xb[i];
1130:         for (d=mainbd+1; d<a->nd; d++) {
1131:           diag = a->diag[d];
1132:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1133:         }
1134:         x[i] = omega*(sum/(shift + dd[i]));
1135:       }
1136:     }
1137:     its--;
1138:   }
1139:   while (its--) {
1140:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1141:       for (i=0; i<m; i++) {
1142:         sum  = b[i];
1143:         dval = shift + dd[i];
1144:         for (d=0; d<mainbd; d++) {
1145:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1146:         }
1147:         for (d=mainbd; d<a->nd; d++) {
1148:           diag = a->diag[d];
1149:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1150:         }
1151:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1152:       }
1153:     }
1154:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1155:       for (i=m-1; i>=0; i--) {
1156:         sum = b[i];
1157:         for (d=0; d<mainbd; d++) {
1158:           loc = i - a->diag[d];
1159:           if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1160:         }
1161:         for (d=mainbd; d<a->nd; d++) {
1162:           diag = a->diag[d];
1163:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1164:         }
1165:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1166:       }
1167:     }
1168:   }
1169:   VecRestoreArray(xx,&x);
1170:   VecRestoreArray(bb,&b);
1171:   return(0);
1172: }