Actual source code: bdiag2.c
1: #define PETSCMAT_DLL
3: /* Block diagonal matrix format */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/inline/ilu.h
10: PetscErrorCode MatSetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
11: {
12: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
13: PetscInt kk,ldiag,row,newnz,*bdlen_new;
15: PetscInt j,k, *diag_new;
16: PetscTruth roworiented = a->roworiented,dfound;
17: PetscScalar value,**diagv_new;
20: for (kk=0; kk<m; kk++) { /* loop over added rows */
21: row = im[kk];
22: if (row < 0) continue;
23: if (row >= A->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->M-1);
24: for (j=0; j<n; j++) {
25: if (in[j] < 0) continue;
26: if (in[j] >= A->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],A->N-1);
27: ldiag = row - in[j]; /* diagonal number */
28: dfound = PETSC_FALSE;
29: if (roworiented) {
30: value = v[j + kk*n];
31: } else {
32: value = v[kk + j*m];
33: }
34: /* search diagonals for required one */
35: for (k=0; k<a->nd; k++) {
36: if (a->diag[k] == ldiag) {
37: dfound = PETSC_TRUE;
38: if (is == ADD_VALUES) a->diagv[k][row] += value;
39: else a->diagv[k][row] = value;
40: break;
41: }
42: }
43: if (!dfound) {
44: if (a->nonew || a->nonew_diag) {
45: #if !defined(PETSC_USE_COMPLEX)
46: if (a->user_alloc && value) {
47: #else
48: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
49: #endif
50: PetscLogInfo((A,"MatSetValues_SeqBDiag:Nonzero in diagonal %D that user did not allocate\n",ldiag));
51: }
52: } else {
53: PetscLogInfo((A,"MatSetValues_SeqBDiag: Allocating new diagonal: %D\n",ldiag));
54: a->reallocs++;
55: /* free old bdiag storage info and reallocate */
56: PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
57: bdlen_new = diag_new + a->nd + 1;
58: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
59: for (k=0; k<a->nd; k++) {
60: diag_new[k] = a->diag[k];
61: diagv_new[k] = a->diagv[k];
62: bdlen_new[k] = a->bdlen[k];
63: }
64: diag_new[a->nd] = ldiag;
65: if (ldiag > 0) { /* lower triangular */
66: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
67: } else { /* upper triangular */
68: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
69: }
70: newnz = bdlen_new[a->nd];
71: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
72: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
73: /* adjust pointers so that dv[diag][row] works for all diagonals*/
74: if (diag_new[a->nd] > 0) {
75: diagv_new[a->nd] -= diag_new[a->nd];
76: }
77: a->maxnz += newnz;
78: a->nz += newnz;
79: PetscFree(a->diagv);
80: PetscFree(a->diag);
81: a->diag = diag_new;
82: a->bdlen = bdlen_new;
83: a->diagv = diagv_new;
85: /* Insert value */
86: if (is == ADD_VALUES) a->diagv[a->nd][row] += value;
87: else a->diagv[a->nd][row] = value;
88: a->nd++;
89: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
90: }
91: }
92: }
93: }
94: return(0);
95: }
100: PetscErrorCode MatSetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
101: {
102: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
104: PetscInt kk,ldiag,shift,row,newnz,*bdlen_new;
105: PetscInt j,k,bs = A->bs,*diag_new,idx=0;
106: PetscTruth roworiented = a->roworiented,dfound;
107: PetscScalar value,**diagv_new;
110: for (kk=0; kk<m; kk++) { /* loop over added rows */
111: row = im[kk];
112: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row);
113: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
114: shift = (row/bs)*bs*bs + row%bs;
115: for (j=0; j<n; j++) {
116: ldiag = row/bs - in[j]/bs; /* block diagonal */
117: dfound = PETSC_FALSE;
118: if (roworiented) {
119: value = v[idx++];
120: } else {
121: value = v[kk + j*m];
122: }
123: /* seach for appropriate diagonal */
124: for (k=0; k<a->nd; k++) {
125: if (a->diag[k] == ldiag) {
126: dfound = PETSC_TRUE;
127: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
128: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
129: break;
130: }
131: }
132: if (!dfound) {
133: if (a->nonew || a->nonew_diag) {
134: #if !defined(PETSC_USE_COMPLEX)
135: if (a->user_alloc && value) {
136: #else
137: if (a->user_alloc && PetscRealPart(value) || PetscImaginaryPart(value)) {
138: #endif
139: PetscLogInfo((A,"MatSetValues_SeqBDiag:Nonzero in diagonal %D that user did not allocate\n",ldiag));
140: }
141: } else {
142: PetscLogInfo((A,"MatSetValues_SeqBDiag: Allocating new diagonal: %D\n",ldiag));
143: a->reallocs++;
144: /* free old bdiag storage info and reallocate */
145: PetscMalloc(2*(a->nd+1)*sizeof(PetscInt),&diag_new);
146: bdlen_new = diag_new + a->nd + 1;
147: PetscMalloc((a->nd+1)*sizeof(PetscScalar*),&diagv_new);
148: for (k=0; k<a->nd; k++) {
149: diag_new[k] = a->diag[k];
150: diagv_new[k] = a->diagv[k];
151: bdlen_new[k] = a->bdlen[k];
152: }
153: diag_new[a->nd] = ldiag;
154: if (ldiag > 0) {/* lower triangular */
155: bdlen_new[a->nd] = PetscMin(a->nblock,a->mblock - ldiag);
156: } else { /* upper triangular */
157: bdlen_new[a->nd] = PetscMin(a->mblock,a->nblock + ldiag);
158: }
159: newnz = bs*bs*bdlen_new[a->nd];
160: PetscMalloc(newnz*sizeof(PetscScalar),&diagv_new[a->nd]);
161: PetscMemzero(diagv_new[a->nd],newnz*sizeof(PetscScalar));
162: /* adjust pointer so that dv[diag][row] works for all diagonals */
163: if (diag_new[a->nd] > 0) {
164: diagv_new[a->nd] -= bs*bs*diag_new[a->nd];
165: }
166: a->maxnz += newnz; a->nz += newnz;
167: PetscFree(a->diagv);
168: PetscFree(a->diag);
169: a->diag = diag_new;
170: a->bdlen = bdlen_new;
171: a->diagv = diagv_new;
173: /* Insert value */
174: if (is == ADD_VALUES) a->diagv[k][shift + (in[j]%bs)*bs] += value;
175: else a->diagv[k][shift + (in[j]%bs)*bs] = value;
176: a->nd++;
177: PetscLogObjectMemory(A,newnz*sizeof(PetscScalar)+2*sizeof(PetscInt)+sizeof(PetscScalar*));
178: }
179: }
180: }
181: }
182: return(0);
183: }
187: PetscErrorCode MatGetValues_SeqBDiag_1(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
188: {
189: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
190: PetscInt kk,ldiag,row,j,k;
191: PetscScalar zero = 0.0;
192: PetscTruth dfound;
195: for (kk=0; kk<m; kk++) { /* loop over rows */
196: row = im[kk];
197: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
198: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
199: for (j=0; j<n; j++) {
200: if (in[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
201: if (in[j] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
202: ldiag = row - in[j]; /* diagonal number */
203: dfound = PETSC_FALSE;
204: for (k=0; k<a->nd; k++) {
205: if (a->diag[k] == ldiag) {
206: dfound = PETSC_TRUE;
207: *v++ = a->diagv[k][row];
208: break;
209: }
210: }
211: if (!dfound) *v++ = zero;
212: }
213: }
214: return(0);
215: }
219: PetscErrorCode MatGetValues_SeqBDiag_N(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
220: {
221: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
222: PetscInt kk,ldiag,shift,row,j,k,bs = A->bs;
223: PetscScalar zero = 0.0;
224: PetscTruth dfound;
227: for (kk=0; kk<m; kk++) { /* loop over rows */
228: row = im[kk];
229: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
230: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
231: shift = (row/bs)*bs*bs + row%bs;
232: for (j=0; j<n; j++) {
233: ldiag = row/bs - in[j]/bs; /* block diagonal */
234: dfound = PETSC_FALSE;
235: for (k=0; k<a->nd; k++) {
236: if (a->diag[k] == ldiag) {
237: dfound = PETSC_TRUE;
238: *v++ = a->diagv[k][shift + (in[j]%bs)*bs ];
239: break;
240: }
241: }
242: if (!dfound) *v++ = zero;
243: }
244: }
245: return(0);
246: }
248: /*
249: MatMults for blocksize 1 to 5 and N -------------------------------
250: */
253: PetscErrorCode MatMult_SeqBDiag_1(Mat A,Vec xx,Vec yy)
254: {
255: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
256: PetscInt nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen;
258: PetscInt d,j,len;
259: PetscScalar *vin,*vout,**a_diagv = a->diagv;
260: PetscScalar *pvin,*pvout,*dv;
263: VecGetArray(xx,&vin);
264: VecGetArray(yy,&vout);
265: PetscMemzero(vout,A->m*sizeof(PetscScalar));
266: for (d=0; d<nd; d++) {
267: dv = a_diagv[d];
268: diag = a_diag[d];
269: len = a_bdlen[d];
270: if (diag > 0) { /* lower triangle */
271: pvin = vin;
272: pvout = vout + diag;
273: dv = dv + diag;
274: } else { /* upper triangle,including main diagonal */
275: pvin = vin - diag;
276: pvout = vout;
277: }
278: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
279: PetscLogFlops(2*len);
280: }
281: VecRestoreArray(xx,&vin);
282: VecRestoreArray(yy,&vout);
283: return(0);
284: }
288: PetscErrorCode MatMult_SeqBDiag_2(Mat A,Vec xx,Vec yy)
289: {
290: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
291: PetscInt nd = a->nd,nb_diag;
293: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
294: PetscScalar *vin,*vout,**a_diagv = a->diagv;
295: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
298: VecGetArray(xx,&vin);
299: VecGetArray(yy,&vout);
300: PetscMemzero(vout,A->m*sizeof(PetscScalar));
301: for (d=0; d<nd; d++) {
302: dv = a_diagv[d];
303: nb_diag = 2*a_diag[d];
304: len = a_bdlen[d];
305: if (nb_diag > 0) { /* lower triangle */
306: pvin = vin;
307: pvout = vout + nb_diag;
308: dv = dv + 2*nb_diag;
309: } else { /* upper triangle, including main diagonal */
310: pvin = vin - nb_diag;
311: pvout = vout;
312: }
313: for (k=0; k<len; k++) {
314: pvin0 = pvin[0]; pvin1 = pvin[1];
316: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
317: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
319: pvout += 2; pvin += 2; dv += 4;
320: }
321: PetscLogFlops(8*len);
322: }
323: VecRestoreArray(xx,&vin);
324: VecRestoreArray(yy,&vout);
325: return(0);
326: }
330: PetscErrorCode MatMult_SeqBDiag_3(Mat A,Vec xx,Vec yy)
331: {
332: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
333: PetscInt nd = a->nd,nb_diag;
335: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
336: PetscScalar *vin,*vout,**a_diagv = a->diagv;
337: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
340: VecGetArray(xx,&vin);
341: VecGetArray(yy,&vout);
342: PetscMemzero(vout,A->m*sizeof(PetscScalar));
343: for (d=0; d<nd; d++) {
344: dv = a_diagv[d];
345: nb_diag = 3*a_diag[d];
346: len = a_bdlen[d];
347: if (nb_diag > 0) { /* lower triangle */
348: pvin = vin;
349: pvout = vout + nb_diag;
350: dv = dv + 3*nb_diag;
351: } else { /* upper triangle,including main diagonal */
352: pvin = vin - nb_diag;
353: pvout = vout;
354: }
355: for (k=0; k<len; k++) {
356: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
358: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
359: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
360: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
362: pvout += 3; pvin += 3; dv += 9;
363: }
364: PetscLogFlops(18*len);
365: }
366: VecRestoreArray(xx,&vin);
367: VecRestoreArray(yy,&vout);
368: return(0);
369: }
373: PetscErrorCode MatMult_SeqBDiag_4(Mat A,Vec xx,Vec yy)
374: {
375: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
376: PetscInt nd = a->nd,nb_diag;
378: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
379: PetscScalar *vin,*vout,**a_diagv = a->diagv;
380: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
383: VecGetArray(xx,&vin);
384: VecGetArray(yy,&vout);
385: PetscMemzero(vout,A->m*sizeof(PetscScalar));
386: for (d=0; d<nd; d++) {
387: dv = a_diagv[d];
388: nb_diag = 4*a_diag[d];
389: len = a_bdlen[d];
390: if (nb_diag > 0) { /* lower triangle */
391: pvin = vin;
392: pvout = vout + nb_diag;
393: dv = dv + 4*nb_diag;
394: } else { /* upper triangle,including main diagonal */
395: pvin = vin - nb_diag;
396: pvout = vout;
397: }
398: for (k=0; k<len; k++) {
399: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
401: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
402: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
403: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
404: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
406: pvout += 4; pvin += 4; dv += 16;
407: }
408: PetscLogFlops(32*len);
409: }
410: VecRestoreArray(xx,&vin);
411: VecRestoreArray(yy,&vout);
412: return(0);
413: }
417: PetscErrorCode MatMult_SeqBDiag_5(Mat A,Vec xx,Vec yy)
418: {
419: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
420: PetscInt nd = a->nd,nb_diag;
422: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
423: PetscScalar *vin,*vout,**a_diagv = a->diagv;
424: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
427: VecGetArray(xx,&vin);
428: VecGetArray(yy,&vout);
429: PetscMemzero(vout,A->m*sizeof(PetscScalar));
430: for (d=0; d<nd; d++) {
431: dv = a_diagv[d];
432: nb_diag = 5*a_diag[d];
433: len = a_bdlen[d];
434: if (nb_diag > 0) { /* lower triangle */
435: pvin = vin;
436: pvout = vout + nb_diag;
437: dv = dv + 5*nb_diag;
438: } else { /* upper triangle,including main diagonal */
439: pvin = vin - nb_diag;
440: pvout = vout;
441: }
442: for (k=0; k<len; k++) {
443: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
445: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
446: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
447: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
448: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
449: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
451: pvout += 5; pvin += 5; dv += 25;
452: }
453: PetscLogFlops(50*len);
454: }
455: VecRestoreArray(xx,&vin);
456: VecRestoreArray(yy,&vout);
457: return(0);
458: }
462: PetscErrorCode MatMult_SeqBDiag_N(Mat A,Vec xx,Vec yy)
463: {
464: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
465: PetscInt nd = a->nd,bs = A->bs,nb_diag,bs2 = bs*bs;
467: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
468: PetscScalar *vin,*vout,**a_diagv = a->diagv;
469: PetscScalar *pvin,*pvout,*dv;
472: VecGetArray(xx,&vin);
473: VecGetArray(yy,&vout);
474: PetscMemzero(vout,A->m*sizeof(PetscScalar));
475: for (d=0; d<nd; d++) {
476: dv = a_diagv[d];
477: nb_diag = bs*a_diag[d];
478: len = a_bdlen[d];
479: if (nb_diag > 0) { /* lower triangle */
480: pvin = vin;
481: pvout = vout + nb_diag;
482: dv = dv + bs*nb_diag;
483: } else { /* upper triangle, including main diagonal */
484: pvin = vin - nb_diag;
485: pvout = vout;
486: }
487: for (k=0; k<len; k++) {
488: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
489: pvout += bs; pvin += bs; dv += bs2;
490: }
491: PetscLogFlops(2*bs2*len);
492: }
493: VecRestoreArray(xx,&vin);
494: VecRestoreArray(yy,&vout);
495: return(0);
496: }
498: /*
499: MatMultAdds for blocksize 1 to 5 and N -------------------------------
500: */
503: PetscErrorCode MatMultAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
504: {
505: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
507: PetscInt nd = a->nd,diag,*a_diag = a->diag,*a_bdlen = a->bdlen,d,j,len;
508: PetscScalar *vin,*vout,**a_diagv = a->diagv;
509: PetscScalar *pvin,*pvout,*dv;
512: if (zz != yy) {VecCopy(zz,yy);}
513: VecGetArray(xx,&vin);
514: VecGetArray(yy,&vout);
515: for (d=0; d<nd; d++) {
516: dv = a_diagv[d];
517: diag = a_diag[d];
518: len = a_bdlen[d];
519: if (diag > 0) { /* lower triangle */
520: pvin = vin;
521: pvout = vout + diag;
522: dv = dv + diag;
523: } else { /* upper triangle, including main diagonal */
524: pvin = vin - diag;
525: pvout = vout;
526: }
527: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
528: PetscLogFlops(2*len);
529: }
530: VecRestoreArray(xx,&vin);
531: VecRestoreArray(yy,&vout);
532: return(0);
533: }
537: PetscErrorCode MatMultAdd_SeqBDiag_2(Mat A,Vec xx,Vec zz,Vec yy)
538: {
539: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
541: PetscInt nd = a->nd,nb_diag;
542: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
543: PetscScalar *vin,*vout,**a_diagv = a->diagv;
544: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1;
547: if (zz != yy) {VecCopy(zz,yy);}
548: VecGetArray(xx,&vin);
549: VecGetArray(yy,&vout);
550: for (d=0; d<nd; d++) {
551: dv = a_diagv[d];
552: nb_diag = 2*a_diag[d];
553: len = a_bdlen[d];
554: if (nb_diag > 0) { /* lower triangle */
555: pvin = vin;
556: pvout = vout + nb_diag;
557: dv = dv + 2*nb_diag;
558: } else { /* upper triangle, including main diagonal */
559: pvin = vin - nb_diag;
560: pvout = vout;
561: }
562: for (k=0; k<len; k++) {
563: pvin0 = pvin[0]; pvin1 = pvin[1];
565: pvout[0] += dv[0]*pvin0 + dv[2]*pvin1;
566: pvout[1] += dv[1]*pvin0 + dv[3]*pvin1;
568: pvout += 2; pvin += 2; dv += 4;
569: }
570: PetscLogFlops(8*len);
571: }
572: VecRestoreArray(xx,&vin);
573: VecRestoreArray(yy,&vout);
574: return(0);
575: }
579: PetscErrorCode MatMultAdd_SeqBDiag_3(Mat A,Vec xx,Vec zz,Vec yy)
580: {
581: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
583: PetscInt nd = a->nd,nb_diag;
584: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
585: PetscScalar *vin,*vout,**a_diagv = a->diagv;
586: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2;
589: if (zz != yy) {VecCopy(zz,yy);}
590: VecGetArray(xx,&vin);
591: VecGetArray(yy,&vout);
592: for (d=0; d<nd; d++) {
593: dv = a_diagv[d];
594: nb_diag = 3*a_diag[d];
595: len = a_bdlen[d];
596: if (nb_diag > 0) { /* lower triangle */
597: pvin = vin;
598: pvout = vout + nb_diag;
599: dv = dv + 3*nb_diag;
600: } else { /* upper triangle, including main diagonal */
601: pvin = vin - nb_diag;
602: pvout = vout;
603: }
604: for (k=0; k<len; k++) {
605: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2];
607: pvout[0] += dv[0]*pvin0 + dv[3]*pvin1 + dv[6]*pvin2;
608: pvout[1] += dv[1]*pvin0 + dv[4]*pvin1 + dv[7]*pvin2;
609: pvout[2] += dv[2]*pvin0 + dv[5]*pvin1 + dv[8]*pvin2;
611: pvout += 3; pvin += 3; dv += 9;
612: }
613: PetscLogFlops(18*len);
614: }
615: VecRestoreArray(xx,&vin);
616: VecRestoreArray(yy,&vout);
617: return(0);
618: }
622: PetscErrorCode MatMultAdd_SeqBDiag_4(Mat A,Vec xx,Vec zz,Vec yy)
623: {
624: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
626: PetscInt nd = a->nd,nb_diag;
627: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
628: PetscScalar *vin,*vout,**a_diagv = a->diagv;
629: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3;
632: if (zz != yy) {VecCopy(zz,yy);}
633: VecGetArray(xx,&vin);
634: VecGetArray(yy,&vout);
635: for (d=0; d<nd; d++) {
636: dv = a_diagv[d];
637: nb_diag = 4*a_diag[d];
638: len = a_bdlen[d];
639: if (nb_diag > 0) { /* lower triangle */
640: pvin = vin;
641: pvout = vout + nb_diag;
642: dv = dv + 4*nb_diag;
643: } else { /* upper triangle, including main diagonal */
644: pvin = vin - nb_diag;
645: pvout = vout;
646: }
647: for (k=0; k<len; k++) {
648: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3];
650: pvout[0] += dv[0]*pvin0 + dv[4]*pvin1 + dv[8]*pvin2 + dv[12]*pvin3;
651: pvout[1] += dv[1]*pvin0 + dv[5]*pvin1 + dv[9]*pvin2 + dv[13]*pvin3;
652: pvout[2] += dv[2]*pvin0 + dv[6]*pvin1 + dv[10]*pvin2 + dv[14]*pvin3;
653: pvout[3] += dv[3]*pvin0 + dv[7]*pvin1 + dv[11]*pvin2 + dv[15]*pvin3;
655: pvout += 4; pvin += 4; dv += 16;
656: }
657: PetscLogFlops(32*len);
658: }
659: VecRestoreArray(xx,&vin);
660: VecRestoreArray(yy,&vout);
661: return(0);
662: }
666: PetscErrorCode MatMultAdd_SeqBDiag_5(Mat A,Vec xx,Vec zz,Vec yy)
667: {
668: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
670: PetscInt nd = a->nd,nb_diag;
671: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
672: PetscScalar *vin,*vout,**a_diagv = a->diagv;
673: PetscScalar *pvin,*pvout,*dv,pvin0,pvin1,pvin2,pvin3,pvin4;
676: if (zz != yy) {VecCopy(zz,yy);}
677: VecGetArray(xx,&vin);
678: VecGetArray(yy,&vout);
679: for (d=0; d<nd; d++) {
680: dv = a_diagv[d];
681: nb_diag = 5*a_diag[d];
682: len = a_bdlen[d];
683: if (nb_diag > 0) { /* lower triangle */
684: pvin = vin;
685: pvout = vout + nb_diag;
686: dv = dv + 5*nb_diag;
687: } else { /* upper triangle, including main diagonal */
688: pvin = vin - nb_diag;
689: pvout = vout;
690: }
691: for (k=0; k<len; k++) {
692: pvin0 = pvin[0]; pvin1 = pvin[1]; pvin2 = pvin[2]; pvin3 = pvin[3]; pvin4 = pvin[4];
694: pvout[0] += dv[0]*pvin0 + dv[5]*pvin1 + dv[10]*pvin2 + dv[15]*pvin3 + dv[20]*pvin4;
695: pvout[1] += dv[1]*pvin0 + dv[6]*pvin1 + dv[11]*pvin2 + dv[16]*pvin3 + dv[21]*pvin4;
696: pvout[2] += dv[2]*pvin0 + dv[7]*pvin1 + dv[12]*pvin2 + dv[17]*pvin3 + dv[22]*pvin4;
697: pvout[3] += dv[3]*pvin0 + dv[8]*pvin1 + dv[13]*pvin2 + dv[18]*pvin3 + dv[23]*pvin4;
698: pvout[4] += dv[4]*pvin0 + dv[9]*pvin1 + dv[14]*pvin2 + dv[19]*pvin3 + dv[24]*pvin4;
700: pvout += 5; pvin += 5; dv += 25;
701: }
702: PetscLogFlops(50*len);
703: }
704: VecRestoreArray(xx,&vin);
705: VecRestoreArray(yy,&vout);
706: return(0);
707: }
711: PetscErrorCode MatMultAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
712: {
713: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
715: PetscInt nd = a->nd,bs = A->bs,nb_diag,bs2 = bs*bs;
716: PetscInt *a_diag = a->diag,*a_bdlen = a->bdlen,d,k,len;
717: PetscScalar *vin,*vout,**a_diagv = a->diagv;
718: PetscScalar *pvin,*pvout,*dv;
721: if (zz != yy) {VecCopy(zz,yy);}
722: VecGetArray(xx,&vin);
723: VecGetArray(yy,&vout);
724: for (d=0; d<nd; d++) {
725: dv = a_diagv[d];
726: nb_diag = bs*a_diag[d];
727: len = a_bdlen[d];
728: if (nb_diag > 0) { /* lower triangle */
729: pvin = vin;
730: pvout = vout + nb_diag;
731: dv = dv + bs*nb_diag;
732: } else { /* upper triangle, including main diagonal */
733: pvin = vin - nb_diag;
734: pvout = vout;
735: }
736: for (k=0; k<len; k++) {
737: Kernel_v_gets_v_plus_A_times_w(bs,pvout,dv,pvin);
738: pvout += bs; pvin += bs; dv += bs2;
739: }
740: PetscLogFlops(2*bs2*len);
741: }
742: VecRestoreArray(xx,&vin);
743: VecRestoreArray(yy,&vout);
744: return(0);
745: }
747: /*
748: MatMultTranspose ----------------------------------------------
749: */
752: PetscErrorCode MatMultTranspose_SeqBDiag_1(Mat A,Vec xx,Vec yy)
753: {
754: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
756: PetscInt nd = a->nd,diag,d,j,len;
757: PetscScalar *pvin,*pvout,*dv;
758: PetscScalar *vin,*vout;
759:
761: VecGetArray(xx,&vin);
762: VecGetArray(yy,&vout);
763: PetscMemzero(vout,A->n*sizeof(PetscScalar));
764: for (d=0; d<nd; d++) {
765: dv = a->diagv[d];
766: diag = a->diag[d];
767: len = a->bdlen[d];
768: /* diag of original matrix is (row/bs - col/bs) */
769: /* diag of transpose matrix is (col/bs - row/bs) */
770: if (diag < 0) { /* transpose is lower triangle */
771: pvin = vin;
772: pvout = vout - diag;
773: } else { /* transpose is upper triangle, including main diagonal */
774: pvin = vin + diag;
775: pvout = vout;
776: dv = dv + diag;
777: }
778: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
779: }
780: VecRestoreArray(xx,&vin);
781: VecRestoreArray(yy,&vout);
782: return(0);
783: }
787: PetscErrorCode MatMultTranspose_SeqBDiag_N(Mat A,Vec xx,Vec yy)
788: {
789: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
791: PetscInt nd = a->nd,bs = A->bs,diag,kshift,kloc,d,i,j,k,len;
792: PetscScalar *pvin,*pvout,*dv;
793: PetscScalar *vin,*vout;
794:
796: VecGetArray(xx,&vin);
797: VecGetArray(yy,&vout);
798: PetscMemzero(vout,A->n*sizeof(PetscScalar));
799: for (d=0; d<nd; d++) {
800: dv = a->diagv[d];
801: diag = a->diag[d];
802: len = a->bdlen[d];
803: /* diag of original matrix is (row/bs - col/bs) */
804: /* diag of transpose matrix is (col/bs - row/bs) */
805: if (diag < 0) { /* transpose is lower triangle */
806: pvin = vin;
807: pvout = vout - bs*diag;
808: } else { /* transpose is upper triangle, including main diagonal */
809: pvin = vin + bs*diag;
810: pvout = vout;
811: dv = dv + diag;
812: }
813: for (k=0; k<len; k++) {
814: kloc = k*bs; kshift = kloc*bs;
815: for (i=0; i<bs; i++) { /* i = local column of transpose */
816: for (j=0; j<bs; j++) { /* j = local row of transpose */
817: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
818: }
819: }
820: }
821: }
822: VecRestoreArray(xx,&vin);
823: VecRestoreArray(yy,&vout);
824: return(0);
825: }
827: /*
828: MatMultTransposeAdd ----------------------------------------------
829: */
832: PetscErrorCode MatMultTransposeAdd_SeqBDiag_1(Mat A,Vec xx,Vec zz,Vec yy)
833: {
834: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
836: PetscInt nd = a->nd,diag,d,j,len;
837: PetscScalar *pvin,*pvout,*dv;
838: PetscScalar *vin,*vout;
839:
841: if (zz != yy) {VecCopy(zz,yy);}
842: VecGetArray(xx,&vin);
843: VecGetArray(yy,&vout);
844: for (d=0; d<nd; d++) {
845: dv = a->diagv[d];
846: diag = a->diag[d];
847: len = a->bdlen[d];
848: /* diag of original matrix is (row/bs - col/bs) */
849: /* diag of transpose matrix is (col/bs - row/bs) */
850: if (diag < 0) { /* transpose is lower triangle */
851: pvin = vin;
852: pvout = vout - diag;
853: } else { /* transpose is upper triangle, including main diagonal */
854: pvin = vin + diag;
855: pvout = vout;
856: dv = dv + diag;
857: }
858: for (j=0; j<len; j++) pvout[j] += dv[j] * pvin[j];
859: }
860: VecRestoreArray(xx,&vin);
861: VecRestoreArray(yy,&vout);
862: return(0);
863: }
867: PetscErrorCode MatMultTransposeAdd_SeqBDiag_N(Mat A,Vec xx,Vec zz,Vec yy)
868: {
869: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
871: PetscInt nd = a->nd,bs = A->bs,diag,kshift,kloc,d,i,j,k,len;
872: PetscScalar *pvin,*pvout,*dv;
873: PetscScalar *vin,*vout;
874:
876: if (zz != yy) {VecCopy(zz,yy);}
877: VecGetArray(xx,&vin);
878: VecGetArray(yy,&vout);
879: for (d=0; d<nd; d++) {
880: dv = a->diagv[d];
881: diag = a->diag[d];
882: len = a->bdlen[d];
883: /* diag of original matrix is (row/bs - col/bs) */
884: /* diag of transpose matrix is (col/bs - row/bs) */
885: if (diag < 0) { /* transpose is lower triangle */
886: pvin = vin;
887: pvout = vout - bs*diag;
888: } else { /* transpose is upper triangle, including main diagonal */
889: pvin = vin + bs*diag;
890: pvout = vout;
891: dv = dv + diag;
892: }
893: for (k=0; k<len; k++) {
894: kloc = k*bs; kshift = kloc*bs;
895: for (i=0; i<bs; i++) { /* i = local column of transpose */
896: for (j=0; j<bs; j++) { /* j = local row of transpose */
897: pvout[kloc + j] += dv[kshift + j*bs + i] * pvin[kloc + i];
898: }
899: }
900: }
901: }
902: VecRestoreArray(xx,&vin);
903: VecRestoreArray(yy,&vout);
904: return(0);
905: }
908: PetscErrorCode MatRelax_SeqBDiag_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
909: {
910: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
911: PetscScalar *x,*b,*xb,*dd,*dv,dval,sum;
913: PetscInt i,j,k,d,kbase,bs = A->bs,kloc;
914: PetscInt mainbd = a->mainbd,diag,mblock = a->mblock,bloc;
917: its = its*lits;
918: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
920: /* Currently this code doesn't use wavefront orderings, although
921: we should eventually incorporate that option, whatever wavefront
922: ordering maybe :-) */
924: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
926: VecGetArray(xx,&x);
927: if (xx != bb) {
928: VecGetArray(bb,&b);
929: } else {
930: b = x;
931: }
932: dd = a->diagv[mainbd];
933: if (flag == SOR_APPLY_UPPER) {
934: /* apply (U + D/omega) to the vector */
935: for (k=0; k<mblock; k++) {
936: kloc = k*bs; kbase = kloc*bs;
937: for (i=0; i<bs; i++) {
938: sum = b[i+kloc] * (shift + dd[i*(bs+1)+kbase]) / omega;
939: for (j=i+1; j<bs; j++) sum += dd[kbase + j*bs + i] * b[kloc + j];
940: for (d=mainbd+1; d<a->nd; d++) {
941: diag = a->diag[d];
942: dv = a->diagv[d];
943: if (k-diag < mblock) {
944: for (j=0; j<bs; j++) {
945: sum += dv[kbase + j*bs + i] * b[(k-diag)*bs + j];
946: }
947: }
948: }
949: x[kloc+i] = sum;
950: }
951: }
952: VecRestoreArray(xx,&x);
953: if (xx != bb) {VecRestoreArray(bb,&b);}
954: return(0);
955: }
956: if (flag & SOR_ZERO_INITIAL_GUESS) {
957: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
958: for (k=0; k<mblock; k++) {
959: kloc = k*bs; kbase = kloc*bs;
960: for (i=0; i<bs; i++) {
961: sum = b[i+kloc];
962: dval = shift + dd[i*(bs+1)+kbase];
963: for (d=0; d<mainbd; d++) {
964: diag = a->diag[d];
965: dv = a->diagv[d];
966: if (k >= diag) {
967: for (j=0; j<bs; j++)
968: sum -= dv[k*bs*bs + j*bs + i] * x[(k-diag)*bs + j];
969: }
970: }
971: for (j=0; j<i; j++){
972: sum -= dd[kbase + j*bs + i] * x[kloc + j];
973: }
974: x[kloc+i] = omega*sum/dval;
975: }
976: }
977: xb = x;
978: } else xb = b;
979: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
980: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
981: for (k=0; k<mblock; k++) {
982: kloc = k*bs; kbase = kloc*bs;
983: for (i=0; i<bs; i++)
984: x[kloc+i] *= dd[i*(bs+1)+kbase];
985: }
986: }
987: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
988: for (k=mblock-1; k>=0; k--) {
989: kloc = k*bs; kbase = kloc*bs;
990: for (i=bs-1; i>=0; i--) {
991: sum = xb[i+kloc];
992: dval = shift + dd[i*(bs+1)+kbase];
993: for (j=i+1; j<bs; j++)
994: sum -= dd[kbase + j*bs + i] * x[kloc + j];
995: for (d=mainbd+1; d<a->nd; d++) {
996: diag = a->diag[d];
997: dv = a->diagv[d];
998: bloc = k - diag;
999: if (bloc < mblock) {
1000: for (j=0; j<bs; j++)
1001: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1002: }
1003: }
1004: x[kloc+i] = omega*sum/dval;
1005: }
1006: }
1007: }
1008: its--;
1009: }
1010: while (its--) {
1011: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1012: for (k=0; k<mblock; k++) {
1013: kloc = k*bs; kbase = kloc*bs;
1014: for (i=0; i<bs; i++) {
1015: sum = b[i+kloc];
1016: dval = shift + dd[i*(bs+1)+kbase];
1017: for (d=0; d<mainbd; d++) {
1018: diag = a->diag[d];
1019: dv = a->diagv[d];
1020: bloc = k - diag;
1021: if (bloc >= 0) {
1022: for (j=0; j<bs; j++) {
1023: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1024: }
1025: }
1026: }
1027: for (d=mainbd; d<a->nd; d++) {
1028: diag = a->diag[d];
1029: dv = a->diagv[d];
1030: bloc = k - diag;
1031: if (bloc < mblock) {
1032: for (j=0; j<bs; j++) {
1033: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1034: }
1035: }
1036: }
1037: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1038: }
1039: }
1040: }
1041: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1042: for (k=mblock-1; k>=0; k--) {
1043: kloc = k*bs; kbase = kloc*bs;
1044: for (i=bs-1; i>=0; i--) {
1045: sum = b[i+kloc];
1046: dval = shift + dd[i*(bs+1)+kbase];
1047: for (d=0; d<mainbd; d++) {
1048: diag = a->diag[d];
1049: dv = a->diagv[d];
1050: bloc = k - diag;
1051: if (bloc >= 0) {
1052: for (j=0; j<bs; j++) {
1053: sum -= dv[k*bs*bs + j*bs + i] * x[bloc*bs + j];
1054: }
1055: }
1056: }
1057: for (d=mainbd; d<a->nd; d++) {
1058: diag = a->diag[d];
1059: dv = a->diagv[d];
1060: bloc = k - diag;
1061: if (bloc < mblock) {
1062: for (j=0; j<bs; j++) {
1063: sum -= dv[kbase + j*bs + i] * x[(k-diag)*bs + j];
1064: }
1065: }
1066: }
1067: x[kloc+i] = (1.-omega)*x[kloc+i]+omega*(sum+dd[i*(bs+1)+kbase]*x[kloc+i])/dval;
1068: }
1069: }
1070: }
1071: }
1072: VecRestoreArray(xx,&x);
1073: if (xx != bb) VecRestoreArray(bb,&b);
1074: return(0);
1075: }
1079: PetscErrorCode MatRelax_SeqBDiag_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal shift,PetscInt its,PetscInt lits,Vec xx)
1080: {
1081: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
1082: PetscScalar *x,*b,*xb,*dd,dval,sum;
1084: PetscInt m = A->m,i,d,loc;
1085: PetscInt mainbd = a->mainbd,diag;
1088: its = its*lits;
1089: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1090: /* Currently this code doesn't use wavefront orderings,although
1091: we should eventually incorporate that option, whatever wavefront
1092: ordering maybe :-) */
1094: VecGetArray(xx,&x);
1095: VecGetArray(bb,&b);
1096: if (mainbd == -1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Main diagonal not set");
1097: dd = a->diagv[mainbd];
1098: if (flag == SOR_APPLY_UPPER) {
1099: /* apply (U + D/omega) to the vector */
1100: for (i=0; i<m; i++) {
1101: sum = b[i] * (shift + dd[i]) / omega;
1102: for (d=mainbd+1; d<a->nd; d++) {
1103: diag = a->diag[d];
1104: if (i-diag < m) sum += a->diagv[d][i] * x[i-diag];
1105: }
1106: x[i] = sum;
1107: }
1108: VecRestoreArray(xx,&x);
1109: VecRestoreArray(bb,&b);
1110: return(0);
1111: }
1112: if (flag & SOR_ZERO_INITIAL_GUESS) {
1113: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1114: for (i=0; i<m; i++) {
1115: sum = b[i];
1116: for (d=0; d<mainbd; d++) {
1117: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1118: }
1119: x[i] = omega*(sum/(shift + dd[i]));
1120: }
1121: xb = x;
1122: } else xb = b;
1123: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1124: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1125: for (i=0; i<m; i++) x[i] *= dd[i];
1126: }
1127: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1128: for (i=m-1; i>=0; i--) {
1129: sum = xb[i];
1130: for (d=mainbd+1; d<a->nd; d++) {
1131: diag = a->diag[d];
1132: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1133: }
1134: x[i] = omega*(sum/(shift + dd[i]));
1135: }
1136: }
1137: its--;
1138: }
1139: while (its--) {
1140: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
1141: for (i=0; i<m; i++) {
1142: sum = b[i];
1143: dval = shift + dd[i];
1144: for (d=0; d<mainbd; d++) {
1145: if (i >= a->diag[d]) sum -= a->diagv[d][i] * x[i-a->diag[d]];
1146: }
1147: for (d=mainbd; d<a->nd; d++) {
1148: diag = a->diag[d];
1149: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1150: }
1151: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/dval;
1152: }
1153: }
1154: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1155: for (i=m-1; i>=0; i--) {
1156: sum = b[i];
1157: for (d=0; d<mainbd; d++) {
1158: loc = i - a->diag[d];
1159: if (loc >= 0) sum -= a->diagv[d][i] * x[loc];
1160: }
1161: for (d=mainbd; d<a->nd; d++) {
1162: diag = a->diag[d];
1163: if (i-diag < m) sum -= a->diagv[d][i] * x[i-diag];
1164: }
1165: x[i] = (1. - omega)*x[i] + omega*(sum + dd[i]*x[i])/(shift + dd[i]);
1166: }
1167: }
1168: }
1169: VecRestoreArray(xx,&x);
1170: VecRestoreArray(bb,&b);
1171: return(0);
1172: }