Actual source code: sbaij2.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/inline/spops.h
5: #include src/inline/ilu.h
6: #include petscbt.h
7: #include src/mat/impls/sbaij/seq/sbaij.h
11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
12: {
13: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
15: PetscInt brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
16: PetscBT table,table0;
19: if (ov < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
20: mbs = a->mbs;
21: ai = a->i;
22: aj = a->j;
23: bs = A->bs;
24: PetscBTCreate(mbs,table);
25: PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
26: PetscMalloc((A->m+1)*sizeof(PetscInt),&nidx2);
27: PetscBTCreate(mbs,table0);
29: for (i=0; i<is_max; i++) { /* for each is */
30: isz = 0;
31: PetscBTMemzero(mbs,table);
32:
33: /* Extract the indices, assume there can be duplicate entries */
34: ISGetIndices(is[i],&idx);
35: ISGetLocalSize(is[i],&n);
37: /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
38: bcol_max = 0;
39: for (j=0; j<n ; ++j){
40: brow = idx[j]/bs; /* convert the indices into block indices */
41: if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42: if(!PetscBTLookupSet(table,brow)) {
43: nidx[isz++] = brow;
44: if (bcol_max < brow) bcol_max = brow;
45: }
46: }
47: ISRestoreIndices(is[i],&idx);
48: ISDestroy(is[i]);
49:
50: k = 0;
51: for (j=0; j<ov; j++){ /* for each overlap */
52: /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
53: PetscBTMemzero(mbs,table0);
54: for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }
56: n = isz; /* length of the updated is[i] */
57: for (brow=0; brow<mbs; brow++){
58: start = ai[brow]; end = ai[brow+1];
59: if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
60: for (l = start; l<end ; l++){
61: bcol = aj[l];
62: if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
63: }
64: k++;
65: if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
66: } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
67: for (l = start; l<end ; l++){
68: bcol = aj[l];
69: if (bcol > bcol_max) break;
70: if (PetscBTLookup(table0,bcol)){
71: if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
72: break; /* for l = start; l<end ; l++) */
73: }
74: }
75: }
76: }
77: } /* for each overlap */
79: /* expand the Index Set */
80: for (j=0; j<isz; j++) {
81: for (k=0; k<bs; k++)
82: nidx2[j*bs+k] = nidx[j]*bs+k;
83: }
84: ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
85: }
86: PetscBTDestroy(table);
87: PetscFree(nidx);
88: PetscFree(nidx2);
89: PetscBTDestroy(table0);
90: return(0);
91: }
95: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
96: {
97: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*c;
99: PetscInt *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
100: PetscInt row,mat_i,*mat_j,tcol,*mat_ilen;
101: PetscInt *irow,nrows,*ssmap,bs=A->bs,bs2=a->bs2;
102: PetscInt *aj = a->j,*ai = a->i;
103: MatScalar *mat_a;
104: Mat C;
105: PetscTruth flag;
108: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
109: ISSorted(iscol,(PetscTruth*)&i);
110: if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
112: ISGetIndices(isrow,&irow);
113: ISGetSize(isrow,&nrows);
114:
115: PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
116: ssmap = smap;
117: PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
118: PetscMemzero(smap,oldcols*sizeof(PetscInt));
119: for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
120: /* determine lens of each row */
121: for (i=0; i<nrows; i++) {
122: kstart = ai[irow[i]];
123: kend = kstart + a->ilen[irow[i]];
124: lens[i] = 0;
125: for (k=kstart; k<kend; k++) {
126: if (ssmap[aj[k]]) {
127: lens[i]++;
128: }
129: }
130: }
131: /* Create and fill new matrix */
132: if (scall == MAT_REUSE_MATRIX) {
133: c = (Mat_SeqSBAIJ *)((*B)->data);
135: if (c->mbs!=nrows || (*B)->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
136: PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
137: if (!flag) {
138: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
139: }
140: PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
141: C = *B;
142: } else {
143: MatCreate(A->comm,&C);
144: MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
145: MatSetType(C,A->type_name);
146: MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
147: }
148: c = (Mat_SeqSBAIJ *)(C->data);
149: for (i=0; i<nrows; i++) {
150: row = irow[i];
151: kstart = ai[row];
152: kend = kstart + a->ilen[row];
153: mat_i = c->i[i];
154: mat_j = c->j + mat_i;
155: mat_a = c->a + mat_i*bs2;
156: mat_ilen = c->ilen + i;
157: for (k=kstart; k<kend; k++) {
158: if ((tcol=ssmap[a->j[k]])) {
159: *mat_j++ = tcol - 1;
160: PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
161: mat_a += bs2;
162: (*mat_ilen)++;
163: }
164: }
165: }
166:
167: /* Free work space */
168: PetscFree(smap);
169: PetscFree(lens);
170: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
171: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
172:
173: ISRestoreIndices(isrow,&irow);
174: *B = C;
175: return(0);
176: }
180: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
181: {
182: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
183: IS is1;
185: PetscInt *vary,*iary,*irow,nrows,i,bs=A->bs,count;
188: if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
189:
190: ISGetIndices(isrow,&irow);
191: ISGetSize(isrow,&nrows);
192:
193: /* Verify if the indices corespond to each element in a block
194: and form the IS with compressed IS */
195: PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
196: iary = vary + a->mbs;
197: PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
198: for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
199:
200: count = 0;
201: for (i=0; i<a->mbs; i++) {
202: if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
203: if (vary[i]==bs) iary[count++] = i;
204: }
205: ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
206:
207: ISRestoreIndices(isrow,&irow);
208: PetscFree(vary);
210: MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
211: ISDestroy(is1);
212: return(0);
213: }
217: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
218: {
220: PetscInt i;
223: if (scall == MAT_INITIAL_MATRIX) {
224: PetscMalloc((n+1)*sizeof(Mat),B);
225: }
227: for (i=0; i<n; i++) {
228: MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
229: }
230: return(0);
231: }
233: /* -------------------------------------------------------*/
234: /* Should check that shapes of vectors and matrices match */
235: /* -------------------------------------------------------*/
236: #include petscblaslapack.h
240: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
241: {
242: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
243: PetscScalar *x,*z,*xb,x1,zero=0.0;
244: MatScalar *v;
246: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
249: VecSet(zz,zero);
250: VecGetArray(xx,&x);
251: VecGetArray(zz,&z);
253: v = a->a;
254: xb = x;
255:
256: for (i=0; i<mbs; i++) {
257: n = ai[1] - ai[0]; /* length of i_th row of A */
258: x1 = xb[0];
259: ib = aj + *ai;
260: jmin = 0;
261: if (*ib == i) { /* (diag of A)*x */
262: z[i] += *v++ * x[*ib++];
263: jmin++;
264: }
265: for (j=jmin; j<n; j++) {
266: cval = *ib;
267: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
268: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
269: }
270: xb++; ai++;
271: }
273: VecRestoreArray(xx,&x);
274: VecRestoreArray(zz,&z);
275: PetscLogFlops(2*(a->nz*2 - A->m) - A->m); /* nz = (nz+m)/2 */
276: return(0);
277: }
281: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
282: {
283: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
284: PetscScalar *x,*z,*xb,x1,x2,zero=0.0;
285: MatScalar *v;
287: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
290: VecSet(zz,zero);
291: VecGetArray(xx,&x);
292: VecGetArray(zz,&z);
293:
294: v = a->a;
295: xb = x;
297: for (i=0; i<mbs; i++) {
298: n = ai[1] - ai[0]; /* length of i_th block row of A */
299: x1 = xb[0]; x2 = xb[1];
300: ib = aj + *ai;
301: jmin = 0;
302: if (*ib == i){ /* (diag of A)*x */
303: z[2*i] += v[0]*x1 + v[2]*x2;
304: z[2*i+1] += v[2]*x1 + v[3]*x2;
305: v += 4; jmin++;
306: }
307: for (j=jmin; j<n; j++) {
308: /* (strict lower triangular part of A)*x */
309: cval = ib[j]*2;
310: z[cval] += v[0]*x1 + v[1]*x2;
311: z[cval+1] += v[2]*x1 + v[3]*x2;
312: /* (strict upper triangular part of A)*x */
313: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
314: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
315: v += 4;
316: }
317: xb +=2; ai++;
318: }
320: VecRestoreArray(xx,&x);
321: VecRestoreArray(zz,&z);
322: PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
323: return(0);
324: }
328: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
329: {
330: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
331: PetscScalar *x,*z,*xb,x1,x2,x3,zero=0.0;
332: MatScalar *v;
334: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
337: VecSet(zz,zero);
338: VecGetArray(xx,&x);
339: VecGetArray(zz,&z);
340:
341: v = a->a;
342: xb = x;
344: for (i=0; i<mbs; i++) {
345: n = ai[1] - ai[0]; /* length of i_th block row of A */
346: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
347: ib = aj + *ai;
348: jmin = 0;
349: if (*ib == i){ /* (diag of A)*x */
350: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
351: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
352: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
353: v += 9; jmin++;
354: }
355: for (j=jmin; j<n; j++) {
356: /* (strict lower triangular part of A)*x */
357: cval = ib[j]*3;
358: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
359: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
360: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
361: /* (strict upper triangular part of A)*x */
362: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
363: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
364: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
365: v += 9;
366: }
367: xb +=3; ai++;
368: }
370: VecRestoreArray(xx,&x);
371: VecRestoreArray(zz,&z);
372: PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
373: return(0);
374: }
378: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
379: {
380: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
381: PetscScalar *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
382: MatScalar *v;
384: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
387: VecSet(zz,zero);
388: VecGetArray(xx,&x);
389: VecGetArray(zz,&z);
390:
391: v = a->a;
392: xb = x;
394: for (i=0; i<mbs; i++) {
395: n = ai[1] - ai[0]; /* length of i_th block row of A */
396: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
397: ib = aj + *ai;
398: jmin = 0;
399: if (*ib == i){ /* (diag of A)*x */
400: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
401: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
402: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
403: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
404: v += 16; jmin++;
405: }
406: for (j=jmin; j<n; j++) {
407: /* (strict lower triangular part of A)*x */
408: cval = ib[j]*4;
409: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
410: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
411: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
412: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
413: /* (strict upper triangular part of A)*x */
414: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
415: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
416: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
417: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
418: v += 16;
419: }
420: xb +=4; ai++;
421: }
423: VecRestoreArray(xx,&x);
424: VecRestoreArray(zz,&z);
425: PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
426: return(0);
427: }
431: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
432: {
433: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
434: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
435: MatScalar *v;
437: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
440: VecSet(zz,zero);
441: VecGetArray(xx,&x);
442: VecGetArray(zz,&z);
443:
444: v = a->a;
445: xb = x;
447: for (i=0; i<mbs; i++) {
448: n = ai[1] - ai[0]; /* length of i_th block row of A */
449: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
450: ib = aj + *ai;
451: jmin = 0;
452: if (*ib == i){ /* (diag of A)*x */
453: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
454: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
455: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
456: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
457: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
458: v += 25; jmin++;
459: }
460: for (j=jmin; j<n; j++) {
461: /* (strict lower triangular part of A)*x */
462: cval = ib[j]*5;
463: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
464: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
465: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
466: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
467: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
468: /* (strict upper triangular part of A)*x */
469: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
470: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
471: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
472: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
473: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
474: v += 25;
475: }
476: xb +=5; ai++;
477: }
479: VecRestoreArray(xx,&x);
480: VecRestoreArray(zz,&z);
481: PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
482: return(0);
483: }
488: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
489: {
490: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
491: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
492: MatScalar *v;
494: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
497: VecSet(zz,zero);
498: VecGetArray(xx,&x);
499: VecGetArray(zz,&z);
500:
501: v = a->a;
502: xb = x;
504: for (i=0; i<mbs; i++) {
505: n = ai[1] - ai[0]; /* length of i_th block row of A */
506: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
507: ib = aj + *ai;
508: jmin = 0;
509: if (*ib == i){ /* (diag of A)*x */
510: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
511: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
512: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
513: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
514: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
515: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
516: v += 36; jmin++;
517: }
518: for (j=jmin; j<n; j++) {
519: /* (strict lower triangular part of A)*x */
520: cval = ib[j]*6;
521: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
522: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
523: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
524: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
525: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
526: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
527: /* (strict upper triangular part of A)*x */
528: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
529: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
530: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
531: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
532: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
533: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
534: v += 36;
535: }
536: xb +=6; ai++;
537: }
539: VecRestoreArray(xx,&x);
540: VecRestoreArray(zz,&z);
541: PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
542: return(0);
543: }
546: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
547: {
548: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
549: PetscScalar *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
550: MatScalar *v;
552: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
555: VecSet(zz,zero);
556: VecGetArray(xx,&x);
557: VecGetArray(zz,&z);
558:
559: v = a->a;
560: xb = x;
562: for (i=0; i<mbs; i++) {
563: n = ai[1] - ai[0]; /* length of i_th block row of A */
564: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
565: ib = aj + *ai;
566: jmin = 0;
567: if (*ib == i){ /* (diag of A)*x */
568: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
569: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
570: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
571: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
572: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
573: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
574: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
575: v += 49; jmin++;
576: }
577: for (j=jmin; j<n; j++) {
578: /* (strict lower triangular part of A)*x */
579: cval = ib[j]*7;
580: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
581: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
582: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
583: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
584: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
585: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
586: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
587: /* (strict upper triangular part of A)*x */
588: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
589: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
590: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
591: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
592: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
593: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
594: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
595: v += 49;
596: }
597: xb +=7; ai++;
598: }
599: VecRestoreArray(xx,&x);
600: VecRestoreArray(zz,&z);
601: PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
602: return(0);
603: }
605: /*
606: This will not work with MatScalar == float because it calls the BLAS
607: */
610: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
611: {
612: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
613: PetscScalar *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
614: MatScalar *v;
616: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;
619: VecSet(zz,zero);
620: VecGetArray(xx,&x); x_ptr=x;
621: VecGetArray(zz,&z); z_ptr=z;
623: aj = a->j;
624: v = a->a;
625: ii = a->i;
627: if (!a->mult_work) {
628: PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
629: }
630: work = a->mult_work;
631:
632: for (i=0; i<mbs; i++) {
633: n = ii[1] - ii[0]; ncols = n*bs;
634: workt = work; idx=aj+ii[0];
636: /* upper triangular part */
637: for (j=0; j<n; j++) {
638: xb = x_ptr + bs*(*idx++);
639: for (k=0; k<bs; k++) workt[k] = xb[k];
640: workt += bs;
641: }
642: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
643: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
644:
645: /* strict lower triangular part */
646: idx = aj+ii[0];
647: if (*idx == i){
648: ncols -= bs; v += bs2; idx++; n--;
649: }
650:
651: if (ncols > 0){
652: workt = work;
653: PetscMemzero(workt,ncols*sizeof(PetscScalar));
654: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
655: for (j=0; j<n; j++) {
656: zb = z_ptr + bs*(*idx++);
657: for (k=0; k<bs; k++) zb[k] += workt[k] ;
658: workt += bs;
659: }
660: }
661: x += bs; v += n*bs2; z += bs; ii++;
662: }
663:
664: VecRestoreArray(xx,&x);
665: VecRestoreArray(zz,&z);
666: PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
667: return(0);
668: }
672: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
673: {
674: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
675: PetscScalar *x,*y,*z,*xb,x1;
676: MatScalar *v;
678: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
681: VecGetArray(xx,&x);
682: if (yy != xx) {
683: VecGetArray(yy,&y);
684: } else {
685: y = x;
686: }
687: if (zz != yy) {
688: /* VecCopy(yy,zz); */
689: VecGetArray(zz,&z);
690: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
691: } else {
692: z = y;
693: }
695: v = a->a;
696: xb = x;
698: for (i=0; i<mbs; i++) {
699: n = ai[1] - ai[0]; /* length of i_th row of A */
700: x1 = xb[0];
701: ib = aj + *ai;
702: jmin = 0;
703: if (*ib == i) { /* (diag of A)*x */
704: z[i] += *v++ * x[*ib++]; jmin++;
705: }
706: for (j=jmin; j<n; j++) {
707: cval = *ib;
708: z[cval] += *v * x1; /* (strict lower triangular part of A)*x */
709: z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x */
710: }
711: xb++; ai++;
712: }
714: VecRestoreArray(xx,&x);
715: if (yy != xx) VecRestoreArray(yy,&y);
716: if (zz != yy) VecRestoreArray(zz,&z);
717:
718: PetscLogFlops(2*(a->nz*2 - A->m));
719: return(0);
720: }
724: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
725: {
726: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
727: PetscScalar *x,*y,*z,*xb,x1,x2;
728: MatScalar *v;
730: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
733: VecGetArray(xx,&x);
734: if (yy != xx) {
735: VecGetArray(yy,&y);
736: } else {
737: y = x;
738: }
739: if (zz != yy) {
740: /* VecCopy(yy,zz); */
741: VecGetArray(zz,&z);
742: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
743: } else {
744: z = y;
745: }
747: v = a->a;
748: xb = x;
750: for (i=0; i<mbs; i++) {
751: n = ai[1] - ai[0]; /* length of i_th block row of A */
752: x1 = xb[0]; x2 = xb[1];
753: ib = aj + *ai;
754: jmin = 0;
755: if (*ib == i){ /* (diag of A)*x */
756: z[2*i] += v[0]*x1 + v[2]*x2;
757: z[2*i+1] += v[2]*x1 + v[3]*x2;
758: v += 4; jmin++;
759: }
760: for (j=jmin; j<n; j++) {
761: /* (strict lower triangular part of A)*x */
762: cval = ib[j]*2;
763: z[cval] += v[0]*x1 + v[1]*x2;
764: z[cval+1] += v[2]*x1 + v[3]*x2;
765: /* (strict upper triangular part of A)*x */
766: z[2*i] += v[0]*x[cval] + v[2]*x[cval+1];
767: z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
768: v += 4;
769: }
770: xb +=2; ai++;
771: }
773: VecRestoreArray(xx,&x);
774: if (yy != xx) VecRestoreArray(yy,&y);
775: if (zz != yy) VecRestoreArray(zz,&z);
777: PetscLogFlops(4*(a->nz*2 - A->m));
778: return(0);
779: }
783: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
784: {
785: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
786: PetscScalar *x,*y,*z,*xb,x1,x2,x3;
787: MatScalar *v;
789: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
792: VecGetArray(xx,&x);
793: if (yy != xx) {
794: VecGetArray(yy,&y);
795: } else {
796: y = x;
797: }
798: if (zz != yy) {
799: /* VecCopy(yy,zz); */
800: VecGetArray(zz,&z);
801: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
802: } else {
803: z = y;
804: }
806: v = a->a;
807: xb = x;
809: for (i=0; i<mbs; i++) {
810: n = ai[1] - ai[0]; /* length of i_th block row of A */
811: x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
812: ib = aj + *ai;
813: jmin = 0;
814: if (*ib == i){ /* (diag of A)*x */
815: z[3*i] += v[0]*x1 + v[3]*x2 + v[6]*x3;
816: z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
817: z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
818: v += 9; jmin++;
819: }
820: for (j=jmin; j<n; j++) {
821: /* (strict lower triangular part of A)*x */
822: cval = ib[j]*3;
823: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3;
824: z[cval+1] += v[3]*x1 + v[4]*x2 + v[5]*x3;
825: z[cval+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
826: /* (strict upper triangular part of A)*x */
827: z[3*i] += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
828: z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
829: z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
830: v += 9;
831: }
832: xb +=3; ai++;
833: }
835: VecRestoreArray(xx,&x);
836: if (yy != xx) VecRestoreArray(yy,&y);
837: if (zz != yy) VecRestoreArray(zz,&z);
839: PetscLogFlops(18*(a->nz*2 - A->m));
840: return(0);
841: }
845: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
846: {
847: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
848: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4;
849: MatScalar *v;
851: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
854: VecGetArray(xx,&x);
855: if (yy != xx) {
856: VecGetArray(yy,&y);
857: } else {
858: y = x;
859: }
860: if (zz != yy) {
861: /* VecCopy(yy,zz); */
862: VecGetArray(zz,&z);
863: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
864: } else {
865: z = y;
866: }
868: v = a->a;
869: xb = x;
871: for (i=0; i<mbs; i++) {
872: n = ai[1] - ai[0]; /* length of i_th block row of A */
873: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
874: ib = aj + *ai;
875: jmin = 0;
876: if (*ib == i){ /* (diag of A)*x */
877: z[4*i] += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
878: z[4*i+1] += v[4]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
879: z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
880: z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
881: v += 16; jmin++;
882: }
883: for (j=jmin; j<n; j++) {
884: /* (strict lower triangular part of A)*x */
885: cval = ib[j]*4;
886: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
887: z[cval+1] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
888: z[cval+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
889: z[cval+3] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
890: /* (strict upper triangular part of A)*x */
891: z[4*i] += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
892: z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
893: z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
894: z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
895: v += 16;
896: }
897: xb +=4; ai++;
898: }
900: VecRestoreArray(xx,&x);
901: if (yy != xx) VecRestoreArray(yy,&y);
902: if (zz != yy) VecRestoreArray(zz,&z);
904: PetscLogFlops(32*(a->nz*2 - A->m));
905: return(0);
906: }
910: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
911: {
912: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
913: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5;
914: MatScalar *v;
916: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
919: VecGetArray(xx,&x);
920: if (yy != xx) {
921: VecGetArray(yy,&y);
922: } else {
923: y = x;
924: }
925: if (zz != yy) {
926: /* VecCopy(yy,zz); */
927: VecGetArray(zz,&z);
928: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
929: } else {
930: z = y;
931: }
933: v = a->a;
934: xb = x;
936: for (i=0; i<mbs; i++) {
937: n = ai[1] - ai[0]; /* length of i_th block row of A */
938: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
939: ib = aj + *ai;
940: jmin = 0;
941: if (*ib == i){ /* (diag of A)*x */
942: z[5*i] += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
943: z[5*i+1] += v[5]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
944: z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
945: z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
946: z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
947: v += 25; jmin++;
948: }
949: for (j=jmin; j<n; j++) {
950: /* (strict lower triangular part of A)*x */
951: cval = ib[j]*5;
952: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
953: z[cval+1] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
954: z[cval+2] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
955: z[cval+3] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
956: z[cval+4] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
957: /* (strict upper triangular part of A)*x */
958: z[5*i] +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
959: z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
960: z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
961: z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
962: z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
963: v += 25;
964: }
965: xb +=5; ai++;
966: }
968: VecRestoreArray(xx,&x);
969: if (yy != xx) VecRestoreArray(yy,&y);
970: if (zz != yy) VecRestoreArray(zz,&z);
972: PetscLogFlops(50*(a->nz*2 - A->m));
973: return(0);
974: }
977: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
978: {
979: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
980: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
981: MatScalar *v;
983: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
986: VecGetArray(xx,&x);
987: if (yy != xx) {
988: VecGetArray(yy,&y);
989: } else {
990: y = x;
991: }
992: if (zz != yy) {
993: VecGetArray(zz,&z);
994: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
995: } else {
996: z = y;
997: }
999: v = a->a;
1000: xb = x;
1002: for (i=0; i<mbs; i++) {
1003: n = ai[1] - ai[0]; /* length of i_th block row of A */
1004: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
1005: ib = aj + *ai;
1006: jmin = 0;
1007: if (*ib == i){ /* (diag of A)*x */
1008: z[6*i] += v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
1009: z[6*i+1] += v[6]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
1010: z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
1011: z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
1012: z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
1013: z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1014: v += 36; jmin++;
1015: }
1016: for (j=jmin; j<n; j++) {
1017: /* (strict lower triangular part of A)*x */
1018: cval = ib[j]*6;
1019: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
1020: z[cval+1] += v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
1021: z[cval+2] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
1022: z[cval+3] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
1023: z[cval+4] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
1024: z[cval+5] += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1025: /* (strict upper triangular part of A)*x */
1026: z[6*i] +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
1027: z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
1028: z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
1029: z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
1030: z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
1031: z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
1032: v += 36;
1033: }
1034: xb +=6; ai++;
1035: }
1037: VecRestoreArray(xx,&x);
1038: if (yy != xx) VecRestoreArray(yy,&y);
1039: if (zz != yy) VecRestoreArray(zz,&z);
1041: PetscLogFlops(72*(a->nz*2 - A->m));
1042: return(0);
1043: }
1047: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1048: {
1049: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1050: PetscScalar *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1051: MatScalar *v;
1053: PetscInt mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
1056: VecGetArray(xx,&x);
1057: if (yy != xx) {
1058: VecGetArray(yy,&y);
1059: } else {
1060: y = x;
1061: }
1062: if (zz != yy) {
1063: VecGetArray(zz,&z);
1064: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1065: } else {
1066: z = y;
1067: }
1069: v = a->a;
1070: xb = x;
1072: for (i=0; i<mbs; i++) {
1073: n = ai[1] - ai[0]; /* length of i_th block row of A */
1074: x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1075: ib = aj + *ai;
1076: jmin = 0;
1077: if (*ib == i){ /* (diag of A)*x */
1078: z[7*i] += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1079: z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1080: z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1081: z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1082: z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1083: z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1084: z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1085: v += 49; jmin++;
1086: }
1087: for (j=jmin; j<n; j++) {
1088: /* (strict lower triangular part of A)*x */
1089: cval = ib[j]*7;
1090: z[cval] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1091: z[cval+1] += v[7]*x1 + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1092: z[cval+2] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1093: z[cval+3] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1094: z[cval+4] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1095: z[cval+5] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1096: z[cval+6] += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1097: /* (strict upper triangular part of A)*x */
1098: z[7*i] +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1099: z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1100: z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1101: z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1102: z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1103: z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1104: z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1105: v += 49;
1106: }
1107: xb +=7; ai++;
1108: }
1110: VecRestoreArray(xx,&x);
1111: if (yy != xx) VecRestoreArray(yy,&y);
1112: if (zz != yy) VecRestoreArray(zz,&z);
1114: PetscLogFlops(98*(a->nz*2 - A->m));
1115: return(0);
1116: }
1120: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1121: {
1122: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1123: PetscScalar *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1124: MatScalar *v;
1126: PetscInt mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;
1129: VecGetArray(xx,&x); x_ptr=x;
1130: if (yy != xx) {
1131: VecGetArray(yy,&y);
1132: } else {
1133: y = x;
1134: }
1135: if (zz != yy) {
1136: VecGetArray(zz,&z); z_ptr=z;
1137: PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1138: } else {
1139: z = y;
1140: }
1142: aj = a->j;
1143: v = a->a;
1144: ii = a->i;
1146: if (!a->mult_work) {
1147: PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1148: }
1149: work = a->mult_work;
1150:
1151:
1152: for (i=0; i<mbs; i++) {
1153: n = ii[1] - ii[0]; ncols = n*bs;
1154: workt = work; idx=aj+ii[0];
1156: /* upper triangular part */
1157: for (j=0; j<n; j++) {
1158: xb = x_ptr + bs*(*idx++);
1159: for (k=0; k<bs; k++) workt[k] = xb[k];
1160: workt += bs;
1161: }
1162: /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1163: Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1165: /* strict lower triangular part */
1166: idx = aj+ii[0];
1167: if (*idx == i){
1168: ncols -= bs; v += bs2; idx++; n--;
1169: }
1170: if (ncols > 0){
1171: workt = work;
1172: PetscMemzero(workt,ncols*sizeof(PetscScalar));
1173: Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1174: for (j=0; j<n; j++) {
1175: zb = z_ptr + bs*(*idx++);
1176: for (k=0; k<bs; k++) zb[k] += workt[k] ;
1177: workt += bs;
1178: }
1179: }
1181: x += bs; v += n*bs2; z += bs; ii++;
1182: }
1184: VecRestoreArray(xx,&x);
1185: if (yy != xx) VecRestoreArray(yy,&y);
1186: if (zz != yy) VecRestoreArray(zz,&z);
1188: PetscLogFlops(2*(a->nz*2 - A->m));
1189: return(0);
1190: }
1194: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1195: {
1196: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1197: PetscScalar oalpha = alpha;
1198: PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;
1202: BLASscal_(&totalnz,&oalpha,a->a,&one);
1203: PetscLogFlops(totalnz);
1204: return(0);
1205: }
1209: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1210: {
1211: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1212: MatScalar *v = a->a;
1213: PetscReal sum_diag = 0.0, sum_off = 0.0, *sum;
1214: PetscInt i,j,k,bs = A->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1216: PetscInt *jl,*il,jmin,jmax,nexti,ik,*col;
1217:
1219: if (type == NORM_FROBENIUS) {
1220: for (k=0; k<mbs; k++){
1221: jmin = a->i[k]; jmax = a->i[k+1];
1222: col = aj + jmin;
1223: if (*col == k){ /* diagonal block */
1224: for (i=0; i<bs2; i++){
1225: #if defined(PETSC_USE_COMPLEX)
1226: sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1227: #else
1228: sum_diag += (*v)*(*v); v++;
1229: #endif
1230: }
1231: jmin++;
1232: }
1233: for (j=jmin; j<jmax; j++){ /* off-diagonal blocks */
1234: for (i=0; i<bs2; i++){
1235: #if defined(PETSC_USE_COMPLEX)
1236: sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1237: #else
1238: sum_off += (*v)*(*v); v++;
1239: #endif
1240: }
1241: }
1242: }
1243: *norm = sqrt(sum_diag + 2*sum_off);
1244: } else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1245: PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);
1246: jl = il + mbs;
1247: sum = (PetscReal*)(jl + mbs);
1248: for (i=0; i<mbs; i++) jl[i] = mbs;
1249: il[0] = 0;
1251: *norm = 0.0;
1252: for (k=0; k<mbs; k++) { /* k_th block row */
1253: for (j=0; j<bs; j++) sum[j]=0.0;
1254: /*-- col sum --*/
1255: i = jl[k]; /* first |A(i,k)| to be added */
1256: /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1257: at step k */
1258: while (i<mbs){
1259: nexti = jl[i]; /* next block row to be added */
1260: ik = il[i]; /* block index of A(i,k) in the array a */
1261: for (j=0; j<bs; j++){
1262: v = a->a + ik*bs2 + j*bs;
1263: for (k1=0; k1<bs; k1++) {
1264: sum[j] += PetscAbsScalar(*v); v++;
1265: }
1266: }
1267: /* update il, jl */
1268: jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1269: jmax = a->i[i+1];
1270: if (jmin < jmax){
1271: il[i] = jmin;
1272: j = a->j[jmin];
1273: jl[i] = jl[j]; jl[j]=i;
1274: }
1275: i = nexti;
1276: }
1277: /*-- row sum --*/
1278: jmin = a->i[k]; jmax = a->i[k+1];
1279: for (i=jmin; i<jmax; i++) {
1280: for (j=0; j<bs; j++){
1281: v = a->a + i*bs2 + j;
1282: for (k1=0; k1<bs; k1++){
1283: sum[j] += PetscAbsScalar(*v); v += bs;
1284: }
1285: }
1286: }
1287: /* add k_th block row to il, jl */
1288: col = aj+jmin;
1289: if (*col == k) jmin++;
1290: if (jmin < jmax){
1291: il[k] = jmin;
1292: j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1293: }
1294: for (j=0; j<bs; j++){
1295: if (sum[j] > *norm) *norm = sum[j];
1296: }
1297: }
1298: PetscFree(il);
1299: } else {
1300: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1301: }
1302: return(0);
1303: }
1307: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1308: {
1309: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1314: /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1315: if ((A->m != B->m) || (A->n != B->n) || (A->bs != B->bs)|| (a->nz != b->nz)) {
1316: *flg = PETSC_FALSE;
1317: return(0);
1318: }
1319:
1320: /* if the a->i are the same */
1321: PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1322: if (!*flg) {
1323: return(0);
1324: }
1325:
1326: /* if a->j are the same */
1327: PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1328: if (!*flg) {
1329: return(0);
1330: }
1331: /* if a->a are the same */
1332: PetscMemcmp(a->a,b->a,(a->nz)*(A->bs)*(A->bs)*sizeof(PetscScalar),flg);
1333: return(0);
1334: }
1338: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1339: {
1340: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1342: PetscInt i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1343: PetscScalar *x,zero = 0.0;
1344: MatScalar *aa,*aa_j;
1347: bs = A->bs;
1348: if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1349:
1350: aa = a->a;
1351: ai = a->i;
1352: aj = a->j;
1353: ambs = a->mbs;
1354: bs2 = a->bs2;
1356: VecSet(v,zero);
1357: VecGetArray(v,&x);
1358: VecGetLocalSize(v,&n);
1359: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1360: for (i=0; i<ambs; i++) {
1361: j=ai[i];
1362: if (aj[j] == i) { /* if this is a diagonal element */
1363: row = i*bs;
1364: aa_j = aa + j*bs2;
1365: if (A->factor && bs==1){
1366: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1367: } else {
1368: for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1369: }
1370: }
1371: }
1372:
1373: VecRestoreArray(v,&x);
1374: return(0);
1375: }
1379: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1380: {
1381: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1382: PetscScalar *l,x,*li,*ri;
1383: MatScalar *aa,*v;
1385: PetscInt i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1386: PetscTruth flg;
1389: if (ll != rr){
1390: VecEqual(ll,rr,&flg);
1391: if (!flg)
1392: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1393: }
1394: if (!ll) return(0);
1395: ai = a->i;
1396: aj = a->j;
1397: aa = a->a;
1398: m = A->m;
1399: bs = A->bs;
1400: mbs = a->mbs;
1401: bs2 = a->bs2;
1403: VecGetArray(ll,&l);
1404: VecGetLocalSize(ll,&lm);
1405: if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1406: for (i=0; i<mbs; i++) { /* for each block row */
1407: M = ai[i+1] - ai[i];
1408: li = l + i*bs;
1409: v = aa + bs2*ai[i];
1410: for (j=0; j<M; j++) { /* for each block */
1411: ri = l + bs*aj[ai[i]+j];
1412: for (k=0; k<bs; k++) {
1413: x = ri[k];
1414: for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1415: }
1416: }
1417: }
1418: VecRestoreArray(ll,&l);
1419: PetscLogFlops(2*a->nz);
1420: return(0);
1421: }
1425: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1426: {
1427: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1430: info->rows_global = (double)A->m;
1431: info->columns_global = (double)A->m;
1432: info->rows_local = (double)A->m;
1433: info->columns_local = (double)A->m;
1434: info->block_size = a->bs2;
1435: info->nz_allocated = a->maxnz; /*num. of nonzeros in upper triangular part */
1436: info->nz_used = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1437: info->nz_unneeded = (double)(info->nz_allocated - info->nz_used);
1438: info->assemblies = A->num_ass;
1439: info->mallocs = a->reallocs;
1440: info->memory = A->mem;
1441: if (A->factor) {
1442: info->fill_ratio_given = A->info.fill_ratio_given;
1443: info->fill_ratio_needed = A->info.fill_ratio_needed;
1444: info->factor_mallocs = A->info.factor_mallocs;
1445: } else {
1446: info->fill_ratio_given = 0;
1447: info->fill_ratio_needed = 0;
1448: info->factor_mallocs = 0;
1449: }
1450: return(0);
1451: }
1456: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1457: {
1458: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1462: PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1463: return(0);
1464: }
1468: PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1469: {
1470: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1472: PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1473: PetscReal atmp;
1474: MatScalar *aa;
1475: PetscScalar zero = 0.0,*x;
1476: PetscInt ncols,brow,bcol,krow,kcol;
1479: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1480: bs = A->bs;
1481: aa = a->a;
1482: ai = a->i;
1483: aj = a->j;
1484: mbs = a->mbs;
1486: VecSet(v,zero);
1487: VecGetArray(v,&x);
1488: VecGetLocalSize(v,&n);
1489: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1490: for (i=0; i<mbs; i++) {
1491: ncols = ai[1] - ai[0]; ai++;
1492: brow = bs*i;
1493: for (j=0; j<ncols; j++){
1494: bcol = bs*(*aj);
1495: for (kcol=0; kcol<bs; kcol++){
1496: col = bcol + kcol; /* col index */
1497: for (krow=0; krow<bs; krow++){
1498: atmp = PetscAbsScalar(*aa); aa++;
1499: row = brow + krow; /* row index */
1500: if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1501: if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1502: }
1503: }
1504: aj++;
1505: }
1506: }
1507: VecRestoreArray(v,&x);
1508: return(0);
1509: }