Actual source code: bdiag2.c

  1: /*$Id: bdiag2.c,v 1.21 2001/08/07 03:02:53 balay Exp $*/

  3: /* Block diagonal matrix format */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/vec/vecimpl.h
 7:  #include src/inline/ilu.h

  9: int MatSetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
 10: {
 11:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
 12:   int          kk,ldiag,row,newnz,*bdlen_new;
 13:   int          j,k, *diag_new,ierr;
 14:   PetscTruth   roworiented = a->roworiented,dfound;
 15:   PetscScalar  value,**diagv_new;

 18:   for (kk=0; kk<m; kk++) { /* loop over added rows */
 19:     row = im[kk];
 20:     if (row < 0) continue;
 21:     if (row >= A->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 22:     for (j=0; j<n; j++) {
 23:       if (in[j] < 0) continue;
 24:       if (in[j] >= A->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 25:       ldiag  = row - in[j]; /* diagonal number */
 26:       dfound = PETSC_FALSE;
 27:       if (roworiented) {
 28:         value = v[j + kk*n];
 29:       } else {
 30:         value = v[kk + j*m];
 31:       }
 32:       /* search diagonals for required one */
 33:       for (k=0; k<a->nd; k++) {
 34:         if (a->diag[k] == ldiag) {
 35:           dfound = PETSC_TRUE;
 36:           if (is == ADD_VALUES) a->diagv[k][row] += value;
 37:           else                  a->diagv[k][row]  = value;
 38:           break;
 39:         }
 40:       }
 41:       if (!dfound) {
 42:         if (a->nonew || a->nonew_diag) {
 43: #if !defined(PETSC_USE_COMPLEX)
 44:           if (a->user_alloc && value) {
 45: #else
 46:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
 47: #endif
 48:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
 49:           }
 50:         } else {
 51:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
 52:           a->reallocs++;
 53:           /* free old bdiag storage info and reallocate */
 54:           ierr      = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
 55:           bdlen_new = diag_new + a->nd + 1;
 56:           ierr      = PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
 57:           for (k=0; k<a->nd; k++) {
 58:             diag_new[k]  = a->diag[k];
 59:             diagv_new[k] = a->diagv[k];
 60:             bdlen_new[k] = a->bdlen[k];
 61:           }
 62:           diag_new[a->nd]  = ldiag;
 63:           if (ldiag > 0) { /* lower triangular */
 64:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
 65:           } else {         /* upper triangular */
 66:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
 67:           }
 68:           newnz = bdlen_new[a->nd];
 69:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
 70:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
 71:           /* adjust pointers so that dv[diag][row] works for all diagonals*/
 72:           if (diag_new[a->nd] > 0) {
 73:             diagv_new[a->nd] -= diag_new[a->nd];
 74:           }
 75:           a->maxnz += newnz;
 76:           a->nz    += newnz;
 77:           PetscFree(a->diagv);
 78:           PetscFree(a->diag);
 79:           a->diag  = diag_new;
 80:           a->bdlen = bdlen_new;
 81:           a->diagv = diagv_new;

 83:           /* Insert value */
 84:           if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
 85:           else                  a->diagv[a->nd][row] = value;
 86:           a->nd++;
 87:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
 88:         }
 89:       }
 90:     }
 91:   }
 92:   return(0);
 93: }


 96: int MatSetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
 97: {
 98:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
 99:   int          kk,ldiag,shift,row,newnz,*bdlen_new,ierr;
100:   int          j,k,bs = a->bs,*diag_new;
101:   PetscTruth   roworiented = a->roworiented,dfound;
102:   PetscScalar  value,**diagv_new;

105:   for (kk=0; kk<m; kk++) { /* loop over added rows */
106:     row = im[kk];
107:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
108:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
109:     shift = (row/bs)*bs*bs + row%bs;
110:     for (j=0; j<n; j++) {
111:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
112:       dfound = PETSC_FALSE;
113:       if (roworiented) {
114:         value = *v++;
115:       } else {
116:         value = v[kk + j*m];
117:       }
118:       /* seach for appropriate diagonal */
119:       for (k=0; k<a->nd; k++) {
120:         if (a->diag[k] == ldiag) {
121:           dfound = PETSC_TRUE;
122:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
123:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
124:           break;
125:         }
126:       }
127:       if (!dfound) {
128:         if (a->nonew || a->nonew_diag) {
129: #if !defined(PETSC_USE_COMPLEX)
130:           if (a->user_alloc && value) {
131: #else
132:           if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
133: #endif
134:             PetscLogInfo(A,"MatSetValues_SeqBDiag:Nonzero in diagonal %d that user did not allocaten",ldiag);
135:           }
136:         } else {
137:           PetscLogInfo(A,"MatSetValues_SeqBDiag: Allocating new diagonal: %dn",ldiag);
138:           a->reallocs++;
139:           /* free old bdiag storage info and reallocate */
140:           ierr      = PetscMalloc(2*(a->nd+1)*sizeof(int),&diag_new);
141:           bdlen_new = diag_new + a->nd + 1;
142:           ierr      = PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
143:           for (k=0; k<a->nd; k++) {
144:             diag_new[k]  = a->diag[k];
145:             diagv_new[k] = a->diagv[k];
146:             bdlen_new[k] = a->bdlen[k];
147:           }
148:           diag_new[a->nd]  = ldiag;
149:           if (ldiag > 0) {/* lower triangular */
150:             bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
151:           } else {         /* upper triangular */
152:             bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
153:           }
154:           newnz = bs*bs*bdlen_new[a->nd];
155:           PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
156:           PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
157:           /* adjust pointer so that dv[diag][row] works for all diagonals */
158:           if (diag_new[a->nd] > 0) {
159:             diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
160:           }
161:           a->maxnz += newnz; a->nz += newnz;
162:           PetscFree(a->diagv);
163:           PetscFree(a->diag);
164:           a->diag  = diag_new;
165:           a->bdlen = bdlen_new;
166:           a->diagv = diagv_new;

168:           /* Insert value */
169:           if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
170:           else                  a->diagv[k][shift + (in[j]%bs)*bs] = value;
171:           a->nd++;
172:           PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(int)+sizeof(PetscScalar*));
173:         }
174:       }
175:     }
176:   }
177:   return(0);
178: }

180: int MatGetValues_SeqBDiag_1(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
181: {
182:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
183:   int          kk,ldiag,row,j,k;
184:   PetscScalar  zero = 0.0;
185:   PetscTruth   dfound;

188:   for (kk=0; kk<m; kk++) { /* loop over rows */
189:     row = im[kk];
190:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
191:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
192:     for (j=0; j<n; j++) {
193:       if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
194:       if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
195:       ldiag = row - in[j]; /* diagonal number */
196:       dfound = PETSC_FALSE;
197:       for (k=0; k<a->nd; k++) {
198:         if (a->diag[k] == ldiag) {
199:           dfound = PETSC_TRUE;
200:           *v++ = a->diagv[k][row];
201:           break;
202:         }
203:       }
204:       if (!dfound) *v++ = zero;
205:     }
206:   }
207:   return(0);
208: }

210: int MatGetValues_SeqBDiag_N(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
211: {
212:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
213:   int          kk,ldiag,shift,row,j,k,bs = a->bs;
214:   PetscScalar  zero = 0.0;
215:   PetscTruth   dfound;

218:   for (kk=0; kk<m; kk++) { /* loop over rows */
219:     row = im[kk];
220:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
221:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222:     shift = (row/bs)*bs*bs + row%bs;
223:     for (j=0; j<n; j++) {
224:       ldiag  = row/bs - in[j]/bs; /* block diagonal */
225:       dfound = PETSC_FALSE;
226:       for (k=0; k<a->nd; k++) {
227:         if (a->diag[k] == ldiag) {
228:           dfound = PETSC_TRUE;
229:           *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
230:           break;
231:         }
232:       }
233:       if (!dfound) *v++ = zero;
234:     }
235:   }
236:   return(0);
237: }

239: /*
240:     MatMults for blocksize 1 to 5 and N -------------------------------
241:  */
242: int MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
243: {
244:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
245:   int             nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
246:   int             ierr,d,j,len;
247:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
248:   PetscScalar     *pvin,*pvout,*dv;

251:   VecGetArray(xx,&vin);
252:   VecGetArray(yy,&vout);
253:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
254:   for (d=0; d<nd; d++) {
255:     dv   = a_diagv[d];
256:     diag = a_diag[d];
257:     len  = a_bdlen[d];
258:     if (diag > 0) {             /* lower triangle */
259:       pvin  = vin;
260:       pvout = vout + diag;
261:       dv    = dv   + diag;
262:     } else {                     /* upper triangle,including main diagonal */
263:       pvin  = vin - diag;
264:       pvout = vout;
265:     }
266:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
267:     PetscLogFlops(2*len);
268:   }
269:   VecRestoreArray(xx,&vin);
270:   VecRestoreArray(yy,&vout);
271:   return(0);
272: }

274: int MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
275: {
276:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
277:   int             nd = a->nd,nb_diag;
278:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
279:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
280:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1;

283:   VecGetArray(xx,&vin);
284:   VecGetArray(yy,&vout);
285:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
286:   for (d=0; d<nd; d++) {
287:     dv      = a_diagv[d];
288:     nb_diag = 2*a_diag[d];
289:     len     = a_bdlen[d];
290:     if (nb_diag > 0) {                /* lower triangle */
291:       pvin  = vin;
292:       pvout = vout + nb_diag;
293:       dv    = dv   + 2*nb_diag;
294:     } else {                       /* upper triangle, including main diagonal */
295:       pvin  = vin - nb_diag;
296:       pvout = vout;
297:     }
298:     for (k=0; k<len; k++) {
299:       pvin0     = pvin[0]; pvin1 = pvin[1];

301:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
302:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

304:       pvout += 2; pvin += 2; dv += 4;
305:     }
306:     PetscLogFlops(8*len);
307:   }
308:   VecRestoreArray(xx,&vin);
309:   VecRestoreArray(yy,&vout);
310:   return(0);
311: }

313: int MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
314: {
315:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
316:   int             nd = a->nd,nb_diag;
317:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
318:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
319:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

322:   VecGetArray(xx,&vin);
323:   VecGetArray(yy,&vout);
324:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
325:   for (d=0; d<nd; d++) {
326:     dv      = a_diagv[d];
327:     nb_diag = 3*a_diag[d];
328:     len     = a_bdlen[d];
329:     if (nb_diag > 0) {                /* lower triangle */
330:       pvin  = vin;
331:       pvout = vout + nb_diag;
332:       dv    = dv   + 3*nb_diag;
333:     } else {                       /* upper triangle,including main diagonal */
334:       pvin  = vin - nb_diag;
335:       pvout = vout;
336:     }
337:     for (k=0; k<len; k++) {
338:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

340:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
341:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
342:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

344:       pvout += 3; pvin += 3; dv += 9;
345:     }
346:     PetscLogFlops(18*len);
347:   }
348:   VecRestoreArray(xx,&vin);
349:   VecRestoreArray(yy,&vout);
350:   return(0);
351: }

353: int MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
354: {
355:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
356:   int             nd = a->nd,nb_diag;
357:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
358:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
359:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

362:   VecGetArray(xx,&vin);
363:   VecGetArray(yy,&vout);
364:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
365:   for (d=0; d<nd; d++) {
366:     dv      = a_diagv[d];
367:     nb_diag = 4*a_diag[d];
368:     len     = a_bdlen[d];
369:     if (nb_diag > 0) {                /* lower triangle */
370:       pvin  = vin;
371:       pvout = vout + nb_diag;
372:       dv    = dv   + 4*nb_diag;
373:     } else {                       /* upper triangle,including main diagonal */
374:       pvin  = vin - nb_diag;
375:       pvout = vout;
376:     }
377:     for (k=0; k<len; k++) {
378:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

380:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
381:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
382:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
383:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

385:       pvout += 4; pvin += 4; dv += 16;
386:     }
387:     PetscLogFlops(32*len);
388:   }
389:   VecRestoreArray(xx,&vin);
390:   VecRestoreArray(yy,&vout);
391:   return(0);
392: }

394: int MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
395: {
396:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
397:   int             nd = a->nd,nb_diag;
398:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
399:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
400:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

403:   VecGetArray(xx,&vin);
404:   VecGetArray(yy,&vout);
405:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
406:   for (d=0; d<nd; d++) {
407:     dv      = a_diagv[d];
408:     nb_diag = 5*a_diag[d];
409:     len     = a_bdlen[d];
410:     if (nb_diag > 0) {                /* lower triangle */
411:       pvin  = vin;
412:       pvout = vout + nb_diag;
413:       dv    = dv   + 5*nb_diag;
414:     } else {                       /* upper triangle,including main diagonal */
415:       pvin  = vin - nb_diag;
416:       pvout = vout;
417:     }
418:     for (k=0; k<len; k++) {
419:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

421:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
422:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
423:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
424:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
425:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

427:       pvout += 5; pvin += 5; dv += 25;
428:     }
429:     PetscLogFlops(50*len);
430:   }
431:   VecRestoreArray(xx,&vin);
432:   VecRestoreArray(yy,&vout);
433:   return(0);
434: }

436: int MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
437: {
438:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
439:   int             nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
440:   int             ierr,*a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
441:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
442:   PetscScalar     *pvin,*pvout,*dv;

445:   VecGetArray(xx,&vin);
446:   VecGetArray(yy,&vout);
447:   PetscMemzero(vout,A->m*sizeof(PetscScalar));
448:   for (d=0; d<nd; d++) {
449:     dv      = a_diagv[d];
450:     nb_diag = bs*a_diag[d];
451:     len     = a_bdlen[d];
452:     if (nb_diag > 0) {                /* lower triangle */
453:       pvin  = vin;
454:       pvout = vout + nb_diag;
455:       dv    = dv   + bs*nb_diag;
456:     } else {                       /* upper triangle, including main diagonal */
457:       pvin  = vin - nb_diag;
458:       pvout = vout;
459:     }
460:     for (k=0; k<len; k++) {
461:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
462:       pvout += bs; pvin += bs; dv += bs2;
463:     }
464:     PetscLogFlops(2*bs2*len);
465:   }
466:   VecRestoreArray(xx,&vin);
467:   VecRestoreArray(yy,&vout);
468:   return(0);
469: }

471: /*
472:     MatMultAdds for blocksize 1 to 5 and N -------------------------------
473:  */
474: int MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
475: {
476:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
477:   int             ierr,nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
478:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
479:   PetscScalar     *pvin,*pvout,*dv;

482:   if (zz != yy) {VecCopy(zz,yy);}
483:   VecGetArray(xx,&vin);
484:   VecGetArray(yy,&vout);
485:   for (d=0; d<nd; d++) {
486:     dv   = a_diagv[d];
487:     diag = a_diag[d];
488:     len  = a_bdlen[d];
489:     if (diag > 0) {             /* lower triangle */
490:       pvin  = vin;
491:       pvout = vout + diag;
492:       dv    = dv   + diag;
493:     } else {                     /* upper triangle, including main diagonal */
494:       pvin  = vin - diag;
495:       pvout = vout;
496:     }
497:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
498:     PetscLogFlops(2*len);
499:   }
500:   VecRestoreArray(xx,&vin);
501:   VecRestoreArray(yy,&vout);
502:   return(0);
503: }

505: int MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
506: {
507:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
508:   int             ierr,nd = a->nd,nb_diag;
509:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
510:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
511:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1;

514:   if (zz != yy) {VecCopy(zz,yy);}
515:   VecGetArray(xx,&vin);
516:   VecGetArray(yy,&vout);
517:   for (d=0; d<nd; d++) {
518:     dv      = a_diagv[d];
519:     nb_diag = 2*a_diag[d];
520:     len     = a_bdlen[d];
521:     if (nb_diag > 0) {                /* lower triangle */
522:       pvin  = vin;
523:       pvout = vout + nb_diag;
524:       dv    = dv   + 2*nb_diag;
525:     } else {                       /* upper triangle, including main diagonal */
526:       pvin  = vin - nb_diag;
527:       pvout = vout;
528:     }
529:     for (k=0; k<len; k++) {
530:       pvin0 = pvin[0]; pvin1 = pvin[1];

532:       pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
533:       pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;

535:       pvout += 2; pvin += 2; dv += 4;
536:     }
537:     PetscLogFlops(8*len);
538:   }
539:   VecRestoreArray(xx,&vin);
540:   VecRestoreArray(yy,&vout);
541:   return(0);
542: }

544: int MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
545: {
546:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
547:   int             ierr,nd = a->nd,nb_diag;
548:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
549:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
550:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2;

553:   if (zz != yy) {VecCopy(zz,yy);}
554:   VecGetArray(xx,&vin);
555:   VecGetArray(yy,&vout);
556:   for (d=0; d<nd; d++) {
557:     dv      = a_diagv[d];
558:     nb_diag = 3*a_diag[d];
559:     len     = a_bdlen[d];
560:     if (nb_diag > 0) {                /* lower triangle */
561:       pvin  = vin;
562:       pvout = vout + nb_diag;
563:       dv    = dv   + 3*nb_diag;
564:     } else {                       /* upper triangle, including main diagonal */
565:       pvin  = vin - nb_diag;
566:       pvout = vout;
567:     }
568:     for (k=0; k<len; k++) {
569:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];

571:       pvout[0] += dv[0]*pvin0 + dv[3]*pvin1  + dv[6]*pvin2;
572:       pvout[1] += dv[1]*pvin0 + dv[4]*pvin1  + dv[7]*pvin2;
573:       pvout[2] += dv[2]*pvin0 + dv[5]*pvin1  + dv[8]*pvin2;

575:       pvout += 3; pvin += 3; dv += 9;
576:     }
577:     PetscLogFlops(18*len);
578:   }
579:   VecRestoreArray(xx,&vin);
580:   VecRestoreArray(yy,&vout);
581:   return(0);
582: }

584: int MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
585: {
586:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
587:   int             ierr,nd = a->nd,nb_diag;
588:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
589:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
590:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;

593:   if (zz != yy) {VecCopy(zz,yy);}
594:   VecGetArray(xx,&vin);
595:   VecGetArray(yy,&vout);
596:   for (d=0; d<nd; d++) {
597:     dv      = a_diagv[d];
598:     nb_diag = 4*a_diag[d];
599:     len     = a_bdlen[d];
600:     if (nb_diag > 0) {                /* lower triangle */
601:       pvin  = vin;
602:       pvout = vout + nb_diag;
603:       dv    = dv   + 4*nb_diag;
604:     } else {                       /* upper triangle, including main diagonal */
605:       pvin  = vin - nb_diag;
606:       pvout = vout;
607:     }
608:     for (k=0; k<len; k++) {
609:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];

611:       pvout[0] += dv[0]*pvin0 + dv[4]*pvin1  + dv[8]*pvin2 + dv[12]*pvin3;
612:       pvout[1] += dv[1]*pvin0 + dv[5]*pvin1  + dv[9]*pvin2 + dv[13]*pvin3;
613:       pvout[2] += dv[2]*pvin0 + dv[6]*pvin1  + dv[10]*pvin2 + dv[14]*pvin3;
614:       pvout[3] += dv[3]*pvin0 + dv[7]*pvin1  + dv[11]*pvin2 + dv[15]*pvin3;

616:       pvout += 4; pvin += 4; dv += 16;
617:     }
618:     PetscLogFlops(32*len);
619:   }
620:   VecRestoreArray(xx,&vin);
621:   VecRestoreArray(yy,&vout);
622:   return(0);
623: }

625: int MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
626: {
627:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
628:   int             ierr,nd = a->nd,nb_diag;
629:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
630:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
631:   PetscScalar     *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;

634:   if (zz != yy) {VecCopy(zz,yy);}
635:   VecGetArray(xx,&vin);
636:   VecGetArray(yy,&vout);
637:   for (d=0; d<nd; d++) {
638:     dv      = a_diagv[d];
639:     nb_diag = 5*a_diag[d];
640:     len     = a_bdlen[d];
641:     if (nb_diag > 0) {                /* lower triangle */
642:       pvin  = vin;
643:       pvout = vout + nb_diag;
644:       dv    = dv   + 5*nb_diag;
645:     } else {                       /* upper triangle, including main diagonal */
646:       pvin  = vin - nb_diag;
647:       pvout = vout;
648:     }
649:     for (k=0; k<len; k++) {
650:       pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];

652:       pvout[0] += dv[0]*pvin0 + dv[5]*pvin1  + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
653:       pvout[1] += dv[1]*pvin0 + dv[6]*pvin1  + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
654:       pvout[2] += dv[2]*pvin0 + dv[7]*pvin1  + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
655:       pvout[3] += dv[3]*pvin0 + dv[8]*pvin1  + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
656:       pvout[4] += dv[4]*pvin0 + dv[9]*pvin1  + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;

658:       pvout += 5; pvin += 5; dv += 25;
659:     }
660:     PetscLogFlops(50*len);
661:   }
662:   VecRestoreArray(xx,&vin);
663:   VecRestoreArray(yy,&vout);
664:   return(0);
665: }

667: int MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
668: {
669:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
670:   int             ierr,nd = a->nd,bs = a->bs,nb_diag,bs2 = bs*bs;
671:   int             *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
672:   PetscScalar     *vin,*vout,**a_diagv = a->diagv;
673:   PetscScalar     *pvin,*pvout,*dv;

676:   if (zz != yy) {VecCopy(zz,yy);}
677:   VecGetArray(xx,&vin);
678:   VecGetArray(yy,&vout);
679:   for (d=0; d<nd; d++) {
680:     dv      = a_diagv[d];
681:     nb_diag = bs*a_diag[d];
682:     len     = a_bdlen[d];
683:     if (nb_diag > 0) {                /* lower triangle */
684:       pvin  = vin;
685:       pvout = vout + nb_diag;
686:       dv    = dv   + bs*nb_diag;
687:     } else {                       /* upper triangle, including main diagonal */
688:       pvin  = vin - nb_diag;
689:       pvout = vout;
690:     }
691:     for (k=0; k<len; k++) {
692:       Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
693:       pvout += bs; pvin += bs; dv += bs2;
694:     }
695:     PetscLogFlops(2*bs2*len);
696:   }
697:   VecRestoreArray(xx,&vin);
698:   VecRestoreArray(yy,&vout);
699:   return(0);
700: }

702: /*
703:      MatMultTranspose ----------------------------------------------
704:  */
705: int MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
706: {
707:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
708:   int             ierr,nd = a->nd,diag,d,j,len;
709:   PetscScalar     *pvin,*pvout,*dv;
710:   PetscScalar     *vin,*vout;
711: 
713:   VecGetArray(xx,&vin);
714:   VecGetArray(yy,&vout);
715:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
716:   for (d=0; d<nd; d++) {
717:     dv   = a->diagv[d];
718:     diag = a->diag[d];
719:     len  = a->bdlen[d];
720:       /* diag of original matrix is (row/bs - col/bs) */
721:       /* diag of transpose matrix is (col/bs - row/bs) */
722:     if (diag < 0) {        /* transpose is lower triangle */
723:       pvin  = vin;
724:       pvout = vout - diag;
725:     } else {        /* transpose is upper triangle, including main diagonal */
726:       pvin  = vin + diag;
727:       pvout = vout;
728:       dv    = dv + diag;
729:     }
730:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
731:   }
732:   VecRestoreArray(xx,&vin);
733:   VecRestoreArray(yy,&vout);
734:   return(0);
735: }

737: int MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
738: {
739:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
740:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
741:   PetscScalar     *pvin,*pvout,*dv;
742:   PetscScalar     *vin,*vout;
743: 
745:   VecGetArray(xx,&vin);
746:   VecGetArray(yy,&vout);
747:   PetscMemzero(vout,A->n*sizeof(PetscScalar));
748:   for (d=0; d<nd; d++) {
749:     dv   = a->diagv[d];
750:     diag = a->diag[d];
751:     len  = a->bdlen[d];
752:       /* diag of original matrix is (row/bs - col/bs) */
753:       /* diag of transpose matrix is (col/bs - row/bs) */
754:     if (diag < 0) {        /* transpose is lower triangle */
755:       pvin  = vin;
756:       pvout = vout - bs*diag;
757:     } else {        /* transpose is upper triangle, including main diagonal */
758:       pvin  = vin + bs*diag;
759:       pvout = vout;
760:       dv    = dv + diag;
761:     }
762:     for (k=0; k<len; k++) {
763:       kloc = k*bs; kshift = kloc*bs;
764:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
765:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
766:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
767:         }
768:       }
769:     }
770:   }
771:   VecRestoreArray(xx,&vin);
772:   VecRestoreArray(yy,&vout);
773:   return(0);
774: }

776: /*
777:      MatMultTransposeAdd ----------------------------------------------
778:  */
779: int MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
780: {
781:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
782:   int             ierr,nd = a->nd,diag,d,j,len;
783:   PetscScalar     *pvin,*pvout,*dv;
784:   PetscScalar     *vin,*vout;
785: 
787:   if (zz != yy) {VecCopy(zz,yy);}
788:   VecGetArray(xx,&vin);
789:   VecGetArray(yy,&vout);
790:   for (d=0; d<nd; d++) {
791:     dv   = a->diagv[d];
792:     diag = a->diag[d];
793:     len  = a->bdlen[d];
794:       /* diag of original matrix is (row/bs - col/bs) */
795:       /* diag of transpose matrix is (col/bs - row/bs) */
796:     if (diag < 0) {        /* transpose is lower triangle */
797:       pvin  = vin;
798:       pvout = vout - diag;
799:     } else {        /* transpose is upper triangle, including main diagonal */
800:       pvin  = vin + diag;
801:       pvout = vout;
802:       dv    = dv + diag;
803:     }
804:     for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
805:   }
806:   VecRestoreArray(xx,&vin);
807:   VecRestoreArray(yy,&vout);
808:   return(0);
809: }

811: int MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
812: {
813:   Mat_SeqBDiag    *a = (Mat_SeqBDiag*)A->data;
814:   int             ierr,nd = a->nd,bs = a->bs,diag,kshift,kloc,d,i,j,k,len;
815:   PetscScalar     *pvin,*pvout,*dv;
816:   PetscScalar     *vin,*vout;
817: 
819:   if (zz != yy) {VecCopy(zz,yy);}
820:   VecGetArray(xx,&vin);
821:   VecGetArray(yy,&vout);
822:   for (d=0; d<nd; d++) {
823:     dv   = a->diagv[d];
824:     diag = a->diag[d];
825:     len  = a->bdlen[d];
826:       /* diag of original matrix is (row/bs - col/bs) */
827:       /* diag of transpose matrix is (col/bs - row/bs) */
828:     if (diag < 0) {        /* transpose is lower triangle */
829:       pvin  = vin;
830:       pvout = vout - bs*diag;
831:     } else {        /* transpose is upper triangle, including main diagonal */
832:       pvin  = vin + bs*diag;
833:       pvout = vout;
834:       dv    = dv + diag;
835:     }
836:     for (k=0; k<len; k++) {
837:       kloc = k*bs; kshift = kloc*bs;
838:       for (i=0; i<bs; i++) {         /* i = local column of transpose */
839:         for (j=0; j<bs; j++) {   /* j = local row of transpose */
840:           pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
841:         }
842:       }
843:     }
844:   }
845:   VecRestoreArray(xx,&vin);
846:   VecRestoreArray(yy,&vout);
847:   return(0);
848: }
849: int MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
850: {
851:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
852:   PetscScalar  *x,*b,*xb,*dd,*dv,dval,sum;
853:   int          ierr,i,j,k,d,kbase,bs = a->bs,kloc;
854:   int          mainbd = a->mainbd,diag,mblock = a->mblock,bloc;

857:   its = its*lits;
858:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

860:   /* Currently this code doesn't use wavefront orderings, although
861:      we should eventually incorporate that option, whatever wavefront
862:      ordering maybe :-) */

864:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");

866:   VecGetArray(xx,&x);
867:   if (xx != bb) {
868:     VecGetArray(bb,&b);
869:   } else {
870:     b = x;
871:   }
872:   dd = a->diagv[mainbd];
873:   if (flag == SOR_APPLY_UPPER) {
874:     /* apply (U + D/omega) to the vector */
875:     for (k=0; k<mblock; k++) {
876:       kloc = k*bs; kbase = kloc*bs;
877:       for (i=0; i<bs; i++) {
878:         sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
879:         for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
880:         for (d=mainbd+1; d<a->nd; d++) {
881:           diag = a->diag[d];
882:           dv   = a->diagv[d];
883:           if (k-diag < mblock) {
884:             for (j=0; j<bs; j++) {
885:               sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
886:             }
887:           }
888:         }
889:         x[kloc+i] = sum;
890:       }
891:     }
892:     VecRestoreArray(xx,&x);
893:     if (xx != bb) {VecRestoreArray(bb,&b);}
894:     return(0);
895:   }
896:   if (flag & SOR_ZERO_INITIAL_GUESS) {
897:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
898:       for (k=0; k<mblock; k++) {
899:         kloc = k*bs; kbase = kloc*bs;
900:         for (i=0; i<bs; i++) {
901:           sum  = b[i+kloc];
902:           dval = shift + dd[i*(bs+1)+kbase];
903:           for (d=0; d<mainbd; d++) {
904:             diag = a->diag[d];
905:             dv   = a->diagv[d];
906:             if (k >= diag) {
907:               for (j=0; j<bs; j++)
908:                 sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
909:             }
910:           }
911:           for (j=0; j<i; j++){
912:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
913:           }
914:           x[kloc+i] = omega*sum/dval;
915:         }
916:       }
917:       xb = x;
918:     } else xb = b;
919:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
920:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
921:       for (k=0; k<mblock; k++) {
922:         kloc = k*bs; kbase = kloc*bs;
923:         for (i=0; i<bs; i++)
924:           x[kloc+i] *= dd[i*(bs+1)+kbase];
925:       }
926:     }
927:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
928:       for (k=mblock-1; k>=0; k--) {
929:         kloc = k*bs; kbase = kloc*bs;
930:         for (i=bs-1; i>=0; i--) {
931:           sum  = xb[i+kloc];
932:           dval = shift + dd[i*(bs+1)+kbase];
933:           for (j=i+1; j<bs; j++)
934:             sum -= dd[kbase + j*bs + i] * x[kloc + j];
935:           for (d=mainbd+1; d<a->nd; d++) {
936:             diag = a->diag[d];
937:             dv   = a->diagv[d];
938:             bloc = k - diag;
939:             if (bloc < mblock) {
940:               for (j=0; j<bs; j++)
941:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
942:             }
943:           }
944:           x[kloc+i] = omega*sum/dval;
945:         }
946:       }
947:     }
948:     its--;
949:   }
950:   while (its--) {
951:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
952:       for (k=0; k<mblock; k++) {
953:         kloc = k*bs; kbase = kloc*bs;
954:         for (i=0; i<bs; i++) {
955:           sum  = b[i+kloc];
956:           dval = shift + dd[i*(bs+1)+kbase];
957:           for (d=0; d<mainbd; d++) {
958:             diag = a->diag[d];
959:             dv   = a->diagv[d];
960:             bloc = k - diag;
961:             if (bloc >= 0) {
962:               for (j=0; j<bs; j++) {
963:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
964:               }
965:             }
966:           }
967:           for (d=mainbd; d<a->nd; d++) {
968:             diag = a->diag[d];
969:             dv   = a->diagv[d];
970:             bloc = k - diag;
971:             if (bloc < mblock) {
972:               for (j=0; j<bs; j++) {
973:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
974:               }
975:             }
976:           }
977:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
978:         }
979:       }
980:     }
981:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
982:       for (k=mblock-1; k>=0; k--) {
983:         kloc = k*bs; kbase = kloc*bs;
984:         for (i=bs-1; i>=0; i--) {
985:           sum  = b[i+kloc];
986:           dval = shift + dd[i*(bs+1)+kbase];
987:           for (d=0; d<mainbd; d++) {
988:             diag = a->diag[d];
989:             dv   = a->diagv[d];
990:             bloc = k - diag;
991:             if (bloc >= 0) {
992:               for (j=0; j<bs; j++) {
993:                 sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
994:               }
995:             }
996:           }
997:           for (d=mainbd; d<a->nd; d++) {
998:             diag = a->diag[d];
999:             dv   = a->diagv[d];
1000:             bloc = k - diag;
1001:             if (bloc < mblock) {
1002:               for (j=0; j<bs; j++) {
1003:                 sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1004:               }
1005:             }
1006:           }
1007:           x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1008:         }
1009:       }
1010:     }
1011:   }
1012:   VecRestoreArray(xx,&x);
1013:   if (xx != bb) VecRestoreArray(bb,&b);
1014:   return(0);
1015: }

1017: int MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,int its,int lits,Vec xx)
1018: {
1019:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1020:   PetscScalar  *x,*b,*xb,*dd,dval,sum;
1021:   int          ierr,m = A->m,i,d,loc;
1022:   int          mainbd = a->mainbd,diag;

1025:   its = its*lits;
1026:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1027:   /* Currently this code doesn't use wavefront orderings,although
1028:      we should eventually incorporate that option, whatever wavefront
1029:      ordering maybe :-) */

1031:   VecGetArray(xx,&x);
1032:   VecGetArray(bb,&b);
1033:   if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1034:   dd = a->diagv[mainbd];
1035:   if (flag == SOR_APPLY_UPPER) {
1036:     /* apply (U + D/omega) to the vector */
1037:     for (i=0; i<m; i++) {
1038:       sum = b[i] * (shift + dd[i]) / omega;
1039:       for (d=mainbd+1; d<a->nd; d++) {
1040:         diag = a->diag[d];
1041:         if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1042:       }
1043:       x[i] = sum;
1044:     }
1045:     VecRestoreArray(xx,&x);
1046:     VecRestoreArray(bb,&b);
1047:     return(0);
1048:   }
1049:   if (flag & SOR_ZERO_INITIAL_GUESS) {
1050:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1051:       for (i=0; i<m; i++) {
1052:         sum  = b[i];
1053:         for (d=0; d<mainbd; d++) {
1054:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1055:         }
1056:         x[i] = omega*(sum/(shift + dd[i]));
1057:       }
1058:       xb = x;
1059:     } else xb = b;
1060:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1061:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1062:       for (i=0; i<m; i++) x[i] *= dd[i];
1063:     }
1064:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1065:       for (i=m-1; i>=0; i--) {
1066:         sum = xb[i];
1067:         for (d=mainbd+1; d<a->nd; d++) {
1068:           diag = a->diag[d];
1069:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1070:         }
1071:         x[i] = omega*(sum/(shift + dd[i]));
1072:       }
1073:     }
1074:     its--;
1075:   }
1076:   while (its--) {
1077:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1078:       for (i=0; i<m; i++) {
1079:         sum  = b[i];
1080:         dval = shift + dd[i];
1081:         for (d=0; d<mainbd; d++) {
1082:           if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1083:         }
1084:         for (d=mainbd; d<a->nd; d++) {
1085:           diag = a->diag[d];
1086:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1087:         }
1088:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1089:       }
1090:     }
1091:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1092:       for (i=m-1; i>=0; i--) {
1093:         sum = b[i];
1094:         for (d=0; d<mainbd; d++) {
1095:           loc = i - a->diag[d];
1096:           if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1097:         }
1098:         for (d=mainbd; d<a->nd; d++) {
1099:           diag = a->diag[d];
1100:           if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1101:         }
1102:         x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1103:       }
1104:     }
1105:   }
1106:   VecRestoreArray(xx,&x);
1107:   VecRestoreArray(bb,&b);
1108:   return(0);
1109: }