Actual source code: sbaij2.c

  1: /*$Id: sbaij2.c,v 1.32 2001/08/07 03:03:01 balay Exp $*/

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/spops.h
 6:  #include src/inline/ilu.h
 7:  #include petscbt.h
 8:  #include src/mat/impls/sbaij/seq/sbaij.h

 10: int MatIncreaseOverlap_SeqSBAIJ(Mat A,int is_max,IS *is,int ov)
 11: {
 13:   SETERRQ(1,"Function not yet written for SBAIJ format");
 14:   /* return(0); */
 15: }

 17: int MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
 18: {
 19:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
 20:   int          *smap,i,k,kstart,kend,ierr,oldcols = a->mbs,*lens;
 21:   int          row,mat_i,*mat_j,tcol,*mat_ilen;
 22:   int          *irow,nrows,*ssmap,bs=a->bs,bs2=a->bs2;
 23:   int          *aj = a->j,*ai = a->i;
 24:   MatScalar    *mat_a;
 25:   Mat          C;
 26:   PetscTruth   flag;

 29: 
 30:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
 31:   ISSorted(iscol,(PetscTruth*)&i);
 32:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

 34:   ISGetIndices(isrow,&irow);
 35:   ISGetSize(isrow,&nrows);
 36: 
 37:   ierr  = PetscMalloc((1+oldcols)*sizeof(int),&smap);
 38:   ssmap = smap;
 39:   ierr  = PetscMalloc((1+nrows)*sizeof(int),&lens);
 40:   ierr  = PetscMemzero(smap,oldcols*sizeof(int));
 41:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
 42:   /* determine lens of each row */
 43:   for (i=0; i<nrows; i++) {
 44:     kstart  = ai[irow[i]];
 45:     kend    = kstart + a->ilen[irow[i]];
 46:     lens[i] = 0;
 47:       for (k=kstart; k<kend; k++) {
 48:         if (ssmap[aj[k]]) {
 49:           lens[i]++;
 50:         }
 51:       }
 52:     }
 53:   /* Create and fill new matrix */
 54:   if (scall == MAT_REUSE_MATRIX) {
 55:     c = (Mat_SeqSBAIJ *)((*B)->data);

 57:     if (c->mbs!=nrows || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
 58:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);
 59:     if (flag == PETSC_FALSE) {
 60:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
 61:     }
 62:     PetscMemzero(c->ilen,c->mbs*sizeof(int));
 63:     C = *B;
 64:   } else {
 65:     MatCreateSeqSBAIJ(A->comm,bs,nrows*bs,nrows*bs,0,lens,&C);
 66:   }
 67:   c = (Mat_SeqSBAIJ *)(C->data);
 68:   for (i=0; i<nrows; i++) {
 69:     row    = irow[i];
 70:     kstart = ai[row];
 71:     kend   = kstart + a->ilen[row];
 72:     mat_i  = c->i[i];
 73:     mat_j  = c->j + mat_i;
 74:     mat_a  = c->a + mat_i*bs2;
 75:     mat_ilen = c->ilen + i;
 76:     for (k=kstart; k<kend; k++) {
 77:       if ((tcol=ssmap[a->j[k]])) {
 78:         *mat_j++ = tcol - 1;
 79:         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
 80:         mat_a   += bs2;
 81:         (*mat_ilen)++;
 82:       }
 83:     }
 84:   }
 85: 
 86:   /* Free work space */
 87:   PetscFree(smap);
 88:   PetscFree(lens);
 89:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
 90:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
 91: 
 92:   ISRestoreIndices(isrow,&irow);
 93:   *B = C;
 94:   return(0);
 95: }

 97: int MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
 98: {
 99:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
100:   IS          is1;
101:   int         *vary,*iary,*irow,nrows,i,ierr,bs=a->bs,count;

104:   if (isrow != iscol) SETERRQ(1,"MatGetSubmatrices_SeqSBAIJ: For symm. format, iscol must equal isro");
105: 
106:   ISGetIndices(isrow,&irow);
107:   ISGetSize(isrow,&nrows);
108: 
109:   /* Verify if the indices corespond to each element in a block 
110:    and form the IS with compressed IS */
111:   PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);
112:   iary = vary + a->mbs;
113:   PetscMemzero(vary,(a->mbs)*sizeof(int));
114:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
115: 
116:   count = 0;
117:   for (i=0; i<a->mbs; i++) {
118:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
119:     if (vary[i]==bs) iary[count++] = i;
120:   }
121:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
122: 
123:   ISRestoreIndices(isrow,&irow);
124:   PetscFree(vary);

126:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
127:   ISDestroy(is1);
128:   return(0);
129: }

131: int MatGetSubMatrices_SeqSBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B)
132: {
133:   int ierr,i;

136:   if (scall == MAT_INITIAL_MATRIX) {
137:     PetscMalloc((n+1)*sizeof(Mat),B);
138:   }

140:   for (i=0; i<n; i++) {
141:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
142:   }
143:   return(0);
144: }

146: /* -------------------------------------------------------*/
147: /* Should check that shapes of vectors and matrices match */
148: /* -------------------------------------------------------*/
149:  #include petscblaslapack.h

151: int MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
152: {
153:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
154:   PetscScalar     *x,*z,*xb,x1,zero=0.0;
155:   MatScalar       *v;
156:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

159:   VecSet(&zero,zz);
160:   VecGetArray(xx,&x);
161:   VecGetArray(zz,&z);

163:   v  = a->a;
164:   xb = x;
165: 
166:   for (i=0; i<mbs; i++) {
167:     n  = ai[1] - ai[0];  /* length of i_th row of A */
168:     x1 = xb[0];
169:     ib = aj + *ai;
170:     jmin = 0;
171:     if (*ib == i) {      /* (diag of A)*x */
172:       z[i] += *v++ * x[*ib++];
173:       jmin++;
174:     }
175:     for (j=jmin; j<n; j++) {
176:       cval    = *ib;
177:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
178:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
179:     }
180:     xb++; ai++;
181:   }

183:   VecRestoreArray(xx,&x);
184:   VecRestoreArray(zz,&z);
185:   PetscLogFlops(2*(a->s_nz*2 - A->m) - A->m);  /* s_nz = (nz+m)/2 */
186:   return(0);
187: }

189: int MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
190: {
191:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
192:   PetscScalar      *x,*z,*xb,x1,x2,zero=0.0;
193:   MatScalar        *v;
194:   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


198:   VecSet(&zero,zz);
199:   VecGetArray(xx,&x);
200:   VecGetArray(zz,&z);
201: 
202:   v     = a->a;
203:   xb = x;

205:   for (i=0; i<mbs; i++) {
206:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
207:     x1 = xb[0]; x2 = xb[1];
208:     ib = aj + *ai;
209:     jmin = 0;
210:     if (*ib == i){     /* (diag of A)*x */
211:       z[2*i]   += v[0]*x1 + v[2]*x2;
212:       z[2*i+1] += v[2]*x1 + v[3]*x2;
213:       v += 4; jmin++;
214:     }
215:     for (j=jmin; j<n; j++) {
216:       /* (strict lower triangular part of A)*x  */
217:       cval       = ib[j]*2;
218:       z[cval]     += v[0]*x1 + v[1]*x2;
219:       z[cval+1]   += v[2]*x1 + v[3]*x2;
220:       /* (strict upper triangular part of A)*x  */
221:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
222:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
223:       v  += 4;
224:     }
225:     xb +=2; ai++;
226:   }

228:   VecRestoreArray(xx,&x);
229:   VecRestoreArray(zz,&z);
230:   PetscLogFlops(8*(a->s_nz*2 - A->m) - A->m);
231:   return(0);
232: }

234: int MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
235: {
236:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data;
237:   PetscScalar   *x,*z,*xb,x1,x2,x3,zero=0.0;
238:   MatScalar     *v;
239:   int           mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;


243:   VecSet(&zero,zz);
244:   VecGetArray(xx,&x);
245:   VecGetArray(zz,&z);
246: 
247:   v     = a->a;
248:   xb = x;

250:   for (i=0; i<mbs; i++) {
251:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
252:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
253:     ib = aj + *ai;
254:     jmin = 0;
255:     if (*ib == i){     /* (diag of A)*x */
256:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
257:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
258:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
259:       v += 9; jmin++;
260:     }
261:     for (j=jmin; j<n; j++) {
262:       /* (strict lower triangular part of A)*x  */
263:       cval       = ib[j]*3;
264:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
265:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
266:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
267:       /* (strict upper triangular part of A)*x  */
268:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
269:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
270:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
271:       v  += 9;
272:     }
273:     xb +=3; ai++;
274:   }

276:   VecRestoreArray(xx,&x);
277:   VecRestoreArray(zz,&z);
278:   PetscLogFlops(18*(a->s_nz*2 - A->m) - A->m);
279:   return(0);
280: }

282: int MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
283: {
284:   Mat_SeqSBAIJ     *a = (Mat_SeqSBAIJ*)A->data;
285:   PetscScalar      *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
286:   MatScalar        *v;
287:   int              mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

290:   VecSet(&zero,zz);
291:   VecGetArray(xx,&x);
292:   VecGetArray(zz,&z);
293: 
294:   v     = a->a;
295:   xb = x;

297:   for (i=0; i<mbs; i++) {
298:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
299:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
300:     ib = aj + *ai;
301:     jmin = 0;
302:     if (*ib == i){     /* (diag of A)*x */
303:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
304:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
305:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
306:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
307:       v += 16; jmin++;
308:     }
309:     for (j=jmin; j<n; j++) {
310:       /* (strict lower triangular part of A)*x  */
311:       cval       = ib[j]*4;
312:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
313:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
314:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
315:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
316:       /* (strict upper triangular part of A)*x  */
317:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
318:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
319:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
320:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
321:       v  += 16;
322:     }
323:     xb +=4; ai++;
324:   }

326:   VecRestoreArray(xx,&x);
327:   VecRestoreArray(zz,&z);
328:   PetscLogFlops(32*(a->s_nz*2 - A->m) - A->m);
329:   return(0);
330: }

332: int MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
333: {
334:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
335:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
336:   MatScalar       *v;
337:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

340:   VecSet(&zero,zz);
341:   VecGetArray(xx,&x);
342:   VecGetArray(zz,&z);
343: 
344:   v     = a->a;
345:   xb = x;

347:   for (i=0; i<mbs; i++) {
348:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
349:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
350:     ib = aj + *ai;
351:     jmin = 0;
352:     if (*ib == i){      /* (diag of A)*x */
353:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
354:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
355:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
356:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
357:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
358:       v += 25; jmin++;
359:     }
360:     for (j=jmin; j<n; j++) {
361:       /* (strict lower triangular part of A)*x  */
362:       cval       = ib[j]*5;
363:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
364:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
365:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
366:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
367:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
368:       /* (strict upper triangular part of A)*x  */
369:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
370:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
371:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
372:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
373:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
374:       v  += 25;
375:     }
376:     xb +=5; ai++;
377:   }

379:   VecRestoreArray(xx,&x);
380:   VecRestoreArray(zz,&z);
381:   PetscLogFlops(50*(a->s_nz*2 - A->m) - A->m);
382:   return(0);
383: }


386: int MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
387: {
388:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
389:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
390:   MatScalar       *v;
391:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

394:   VecSet(&zero,zz);
395:   VecGetArray(xx,&x);
396:   VecGetArray(zz,&z);
397: 
398:   v     = a->a;
399:   xb = x;

401:   for (i=0; i<mbs; i++) {
402:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
403:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
404:     ib = aj + *ai;
405:     jmin = 0;
406:     if (*ib == i){      /* (diag of A)*x */
407:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
408:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
409:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
410:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
411:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
412:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
413:       v += 36; jmin++;
414:     }
415:     for (j=jmin; j<n; j++) {
416:       /* (strict lower triangular part of A)*x  */
417:       cval       = ib[j]*6;
418:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
419:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
420:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
421:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
422:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
423:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
424:       /* (strict upper triangular part of A)*x  */
425:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
426:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
427:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
428:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
429:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
430:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
431:       v  += 36;
432:     }
433:     xb +=6; ai++;
434:   }

436:   VecRestoreArray(xx,&x);
437:   VecRestoreArray(zz,&z);
438:   PetscLogFlops(72*(a->s_nz*2 - A->m) - A->m);
439:   return(0);
440: }
441: int MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
442: {
443:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
444:   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
445:   MatScalar       *v;
446:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

449:   VecSet(&zero,zz);
450:   VecGetArray(xx,&x);
451:   VecGetArray(zz,&z);
452: 
453:   v     = a->a;
454:   xb = x;

456:   for (i=0; i<mbs; i++) {
457:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
458:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
459:     ib = aj + *ai;
460:     jmin = 0;
461:     if (*ib == i){      /* (diag of A)*x */
462:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
463:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
464:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
465:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
466:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
467:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
468:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
469:       v += 49; jmin++;
470:     }
471:     for (j=jmin; j<n; j++) {
472:       /* (strict lower triangular part of A)*x  */
473:       cval       = ib[j]*7;
474:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
475:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
476:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
477:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
478:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
479:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
480:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
481:       /* (strict upper triangular part of A)*x  */
482:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
483:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
484:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
485:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
486:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
487:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
488:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
489:       v  += 49;
490:     }
491:     xb +=7; ai++;
492:   }
493:   VecRestoreArray(xx,&x);
494:   VecRestoreArray(zz,&z);
495:   PetscLogFlops(98*(a->s_nz*2 - A->m) - A->m);
496:   return(0);
497: }

499: /*
500:     This will not work with MatScalar == float because it calls the BLAS
501: */
502: int MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
503: {
504:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
505:   PetscScalar     *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
506:   MatScalar       *v;
507:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
508:   int             ncols,k;

511:   VecSet(&zero,zz);
512:   VecGetArray(xx,&x); x_ptr=x;
513:   VecGetArray(zz,&z); z_ptr=z;

515:   aj   = a->j;
516:   v    = a->a;
517:   ii   = a->i;

519:   if (!a->mult_work) {
520:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
521:   }
522:   work = a->mult_work;
523: 
524:   for (i=0; i<mbs; i++) {
525:     n     = ii[1] - ii[0]; ncols = n*bs;
526:     workt = work; idx=aj+ii[0];

528:     /* upper triangular part */
529:     for (j=0; j<n; j++) {
530:       xb = x_ptr + bs*(*idx++);
531:       for (k=0; k<bs; k++) workt[k] = xb[k];
532:       workt += bs;
533:     }
534:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
535:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
536: 
537:     /* strict lower triangular part */
538:     idx = aj+ii[0];
539:     if (*idx == i){
540:       ncols -= bs; v += bs2; idx++; n--;
541:     }
542: 
543:     if (ncols > 0){
544:       workt = work;
545:       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));
546:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
547:       for (j=0; j<n; j++) {
548:         zb = z_ptr + bs*(*idx++);
549:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
550:         workt += bs;
551:       }
552:     }
553:     x += bs; v += n*bs2; z += bs; ii++;
554:   }
555: 
556:   VecRestoreArray(xx,&x);
557:   VecRestoreArray(zz,&z);
558:   PetscLogFlops(2*(a->s_nz*2 - A->m)*bs2 - A->m);
559:   return(0);
560: }

562: int MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
563: {
564:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
565:   PetscScalar     *x,*y,*z,*xb,x1;
566:   MatScalar       *v;
567:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

570:   VecGetArray(xx,&x);
571:   if (yy != xx) {
572:     VecGetArray(yy,&y);
573:   } else {
574:     y = x;
575:   }
576:   if (zz != yy) {
577:     /* VecCopy(yy,zz); */
578:     VecGetArray(zz,&z);
579:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
580:   } else {
581:     z = y;
582:   }

584:   v  = a->a;
585:   xb = x;

587:   for (i=0; i<mbs; i++) {
588:     n  = ai[1] - ai[0];  /* length of i_th row of A */
589:     x1 = xb[0];
590:     ib = aj + *ai;
591:     jmin = 0;
592:     if (*ib == i) {            /* (diag of A)*x */
593:       z[i] += *v++ * x[*ib++]; jmin++;
594:     }
595:     for (j=jmin; j<n; j++) {
596:       cval    = *ib;
597:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
598:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
599:     }
600:     xb++; ai++;
601:   }

603:   VecRestoreArray(xx,&x);
604:   if (yy != xx) VecRestoreArray(yy,&y);
605:   if (zz != yy) VecRestoreArray(zz,&z);
606: 
607:   PetscLogFlops(2*(a->s_nz*2 - A->m));
608:   return(0);
609: }

611: int MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
612: {
613:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
614:   PetscScalar     *x,*y,*z,*xb,x1,x2;
615:   MatScalar       *v;
616:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

619:   VecGetArray(xx,&x);
620:   if (yy != xx) {
621:     VecGetArray(yy,&y);
622:   } else {
623:     y = x;
624:   }
625:   if (zz != yy) {
626:     /* VecCopy(yy,zz); */
627:     VecGetArray(zz,&z);
628:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
629:   } else {
630:     z = y;
631:   }

633:   v     = a->a;
634:   xb = x;

636:   for (i=0; i<mbs; i++) {
637:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
638:     x1 = xb[0]; x2 = xb[1];
639:     ib = aj + *ai;
640:     jmin = 0;
641:     if (*ib == i){      /* (diag of A)*x */
642:       z[2*i]   += v[0]*x1 + v[2]*x2;
643:       z[2*i+1] += v[2]*x1 + v[3]*x2;
644:       v += 4; jmin++;
645:     }
646:     for (j=jmin; j<n; j++) {
647:       /* (strict lower triangular part of A)*x  */
648:       cval       = ib[j]*2;
649:       z[cval]     += v[0]*x1 + v[1]*x2;
650:       z[cval+1]   += v[2]*x1 + v[3]*x2;
651:       /* (strict upper triangular part of A)*x  */
652:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
653:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
654:       v  += 4;
655:     }
656:     xb +=2; ai++;
657:   }

659:   VecRestoreArray(xx,&x);
660:   if (yy != xx) VecRestoreArray(yy,&y);
661:   if (zz != yy) VecRestoreArray(zz,&z);

663:   PetscLogFlops(4*(a->s_nz*2 - A->m));
664:   return(0);
665: }

667: int MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
668: {
669:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
670:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3;
671:   MatScalar       *v;
672:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

675:   VecGetArray(xx,&x);
676:   if (yy != xx) {
677:     VecGetArray(yy,&y);
678:   } else {
679:     y = x;
680:   }
681:   if (zz != yy) {
682:     /* VecCopy(yy,zz); */
683:     VecGetArray(zz,&z);
684:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
685:   } else {
686:     z = y;
687:   }

689:   v     = a->a;
690:   xb = x;

692:   for (i=0; i<mbs; i++) {
693:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
694:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
695:     ib = aj + *ai;
696:     jmin = 0;
697:     if (*ib == i){     /* (diag of A)*x */
698:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
699:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
700:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
701:      v += 9; jmin++;
702:     }
703:     for (j=jmin; j<n; j++) {
704:       /* (strict lower triangular part of A)*x  */
705:       cval       = ib[j]*3;
706:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
707:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
708:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
709:       /* (strict upper triangular part of A)*x  */
710:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
711:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
712:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
713:       v  += 9;
714:     }
715:     xb +=3; ai++;
716:   }

718:   VecRestoreArray(xx,&x);
719:   if (yy != xx) VecRestoreArray(yy,&y);
720:   if (zz != yy) VecRestoreArray(zz,&z);

722:   PetscLogFlops(18*(a->s_nz*2 - A->m));
723:   return(0);
724: }

726: int MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
727: {
728:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
729:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4;
730:   MatScalar       *v;
731:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

734:   VecGetArray(xx,&x);
735:   if (yy != xx) {
736:     VecGetArray(yy,&y);
737:   } else {
738:     y = x;
739:   }
740:   if (zz != yy) {
741:     /* VecCopy(yy,zz); */
742:     VecGetArray(zz,&z);
743:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
744:   } else {
745:     z = y;
746:   }

748:   v     = a->a;
749:   xb = x;

751:   for (i=0; i<mbs; i++) {
752:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
753:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
754:     ib = aj + *ai;
755:     jmin = 0;
756:     if (*ib == i){      /* (diag of A)*x */
757:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
758:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
759:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
760:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
761:       v += 16; jmin++;
762:     }
763:     for (j=jmin; j<n; j++) {
764:       /* (strict lower triangular part of A)*x  */
765:       cval       = ib[j]*4;
766:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
767:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
768:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
769:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
770:       /* (strict upper triangular part of A)*x  */
771:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
772:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
773:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
774:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
775:       v  += 16;
776:     }
777:     xb +=4; ai++;
778:   }

780:   VecRestoreArray(xx,&x);
781:   if (yy != xx) VecRestoreArray(yy,&y);
782:   if (zz != yy) VecRestoreArray(zz,&z);

784:   PetscLogFlops(32*(a->s_nz*2 - A->m));
785:   return(0);
786: }

788: int MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
789: {
790:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
791:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5;
792:   MatScalar       *v;
793:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

796:   VecGetArray(xx,&x);
797:   if (yy != xx) {
798:     VecGetArray(yy,&y);
799:   } else {
800:     y = x;
801:   }
802:   if (zz != yy) {
803:     /* VecCopy(yy,zz); */
804:     VecGetArray(zz,&z);
805:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
806:   } else {
807:     z = y;
808:   }

810:   v     = a->a;
811:   xb = x;

813:   for (i=0; i<mbs; i++) {
814:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
815:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
816:     ib = aj + *ai;
817:     jmin = 0;
818:     if (*ib == i){      /* (diag of A)*x */
819:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
820:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
821:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
822:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
823:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
824:       v += 25; jmin++;
825:     }
826:     for (j=jmin; j<n; j++) {
827:       /* (strict lower triangular part of A)*x  */
828:       cval       = ib[j]*5;
829:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
830:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
831:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
832:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
833:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
834:       /* (strict upper triangular part of A)*x  */
835:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
836:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
837:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
838:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
839:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
840:       v  += 25;
841:     }
842:     xb +=5; ai++;
843:   }

845:   VecRestoreArray(xx,&x);
846:   if (yy != xx) VecRestoreArray(yy,&y);
847:   if (zz != yy) VecRestoreArray(zz,&z);

849:   PetscLogFlops(50*(a->s_nz*2 - A->m));
850:   return(0);
851: }
852: int MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
853: {
854:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
855:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
856:   MatScalar       *v;
857:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

860:   VecGetArray(xx,&x);
861:   if (yy != xx) {
862:     VecGetArray(yy,&y);
863:   } else {
864:     y = x;
865:   }
866:   if (zz != yy) {
867:     /* VecCopy(yy,zz); */
868:     VecGetArray(zz,&z);
869:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
870:   } else {
871:     z = y;
872:   }

874:   v     = a->a;
875:   xb = x;

877:   for (i=0; i<mbs; i++) {
878:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
879:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
880:     ib = aj + *ai;
881:     jmin = 0;
882:     if (*ib == i){     /* (diag of A)*x */
883:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
884:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
885:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
886:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
887:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
888:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
889:       v += 36; jmin++;
890:     }
891:     for (j=jmin; j<n; j++) {
892:       /* (strict lower triangular part of A)*x  */
893:       cval       = ib[j]*6;
894:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
895:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
896:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
897:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
898:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
899:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
900:       /* (strict upper triangular part of A)*x  */
901:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
902:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
903:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
904:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
905:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
906:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
907:       v  += 36;
908:     }
909:     xb +=6; ai++;
910:   }

912:   VecRestoreArray(xx,&x);
913:   if (yy != xx) VecRestoreArray(yy,&y);
914:   if (zz != yy) VecRestoreArray(zz,&z);

916:   PetscLogFlops(72*(a->s_nz*2 - A->m));
917:   return(0);
918: }

920: int MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
921: {
922:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
923:   PetscScalar     *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
924:   MatScalar       *v;
925:   int             mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,ierr,*ib,cval,j,jmin;

928:   VecGetArray(xx,&x);
929:   if (yy != xx) {
930:     VecGetArray(yy,&y);
931:   } else {
932:     y = x;
933:   }
934:   if (zz != yy) {
935:     /* VecCopy(yy,zz); */
936:     VecGetArray(zz,&z);
937:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
938:   } else {
939:     z = y;
940:   }

942:   v     = a->a;
943:   xb = x;

945:   for (i=0; i<mbs; i++) {
946:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
947:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
948:     ib = aj + *ai;
949:     jmin = 0;
950:     if (*ib == i){     /* (diag of A)*x */
951:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
952:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
953:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
954:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
955:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
956:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
957:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
958:       v += 49; jmin++;
959:     }
960:     for (j=jmin; j<n; j++) {
961:       /* (strict lower triangular part of A)*x  */
962:       cval       = ib[j]*7;
963:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
964:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
965:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
966:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
967:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
968:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
969:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
970:       /* (strict upper triangular part of A)*x  */
971:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
972:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
973:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
974:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
975:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
976:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
977:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
978:       v  += 49;
979:     }
980:     xb +=7; ai++;
981:   }

983:   VecRestoreArray(xx,&x);
984:   if (yy != xx) VecRestoreArray(yy,&y);
985:   if (zz != yy) VecRestoreArray(zz,&z);

987:   PetscLogFlops(98*(a->s_nz*2 - A->m));
988:   return(0);
989: }

991: int MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
992: {
993:   Mat_SeqSBAIJ    *a = (Mat_SeqSBAIJ*)A->data;
994:   PetscScalar     *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
995:   MatScalar       *v;
996:   int             ierr,mbs=a->mbs,i,*idx,*aj,*ii,bs=a->bs,j,n,bs2=a->bs2;
997:   int             ncols,k;

1000:   VecGetArray(xx,&x); x_ptr=x;
1001:   if (yy != xx) {
1002:     VecGetArray(yy,&y);
1003:   } else {
1004:     y = x;
1005:   }
1006:   if (zz != yy) {
1007:     /* VecCopy(yy,zz); */
1008:     VecGetArray(zz,&z); z_ptr=z;
1009:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1010:   } else {
1011:     z = y;
1012:   }

1014:   aj   = a->j;
1015:   v    = a->a;
1016:   ii   = a->i;

1018:   if (!a->mult_work) {
1019:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1020:   }
1021:   work = a->mult_work;
1022: 
1023: 
1024:   for (i=0; i<mbs; i++) {
1025:     n     = ii[1] - ii[0]; ncols = n*bs;
1026:     workt = work; idx=aj+ii[0];

1028:     /* upper triangular part */
1029:     for (j=0; j<n; j++) {
1030:       xb = x_ptr + bs*(*idx++);
1031:       for (k=0; k<bs; k++) workt[k] = xb[k];
1032:       workt += bs;
1033:     }
1034:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1035:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1037:     /* strict lower triangular part */
1038:     idx = aj+ii[0];
1039:     if (*idx == i){
1040:       ncols -= bs; v += bs2; idx++; n--;
1041:     }
1042:     if (ncols > 0){
1043:       workt = work;
1044:       ierr  = PetscMemzero(workt,ncols*sizeof(PetscScalar));
1045:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1046:       for (j=0; j<n; j++) {
1047:         zb = z_ptr + bs*(*idx++);
1048:         /* idx++; */
1049:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1050:         workt += bs;
1051:       }
1052:     }

1054:     x += bs; v += n*bs2; z += bs; ii++;
1055:   }

1057:   VecRestoreArray(xx,&x);
1058:   if (yy != xx) VecRestoreArray(yy,&y);
1059:   if (zz != yy) VecRestoreArray(zz,&z);

1061:   PetscLogFlops(2*(a->s_nz*2 - A->m));
1062:   return(0);
1063: }

1065: int MatMultTranspose_SeqSBAIJ(Mat A,Vec xx,Vec zz)
1066: {
1068:   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1069:   /* return(0); */
1070: }

1072: int MatMultTransposeAdd_SeqSBAIJ(Mat A,Vec xx,Vec yy,Vec zz)

1074: {
1076:   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1077:   /* return(0); */
1078: }

1080: int MatScale_SeqSBAIJ(PetscScalar *alpha,Mat inA)
1081: {
1082:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1083:   int         one = 1,totalnz = a->bs2*a->s_nz;

1086:   BLscal_(&totalnz,alpha,a->a,&one);
1087:   PetscLogFlops(totalnz);
1088:   return(0);
1089: }

1091: int MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1092: {
1093:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1094:   MatScalar   *v = a->a;
1095:   PetscReal   sum_diag = 0.0, sum_off = 0.0, *sum;
1096:   int         i,j,k,bs = a->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1097:   int         *jl,*il,jmin,jmax,ierr,nexti,ik,*col;
1098: 
1100:   if (type == NORM_FROBENIUS) {
1101:     for (k=0; k<mbs; k++){
1102:       jmin = a->i[k]; jmax = a->i[k+1];
1103:       col  = aj + jmin;
1104:       if (*col == k){         /* diagonal block */
1105:         for (i=0; i<bs2; i++){
1106: #if defined(PETSC_USE_COMPLEX)
1107:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1108: #else
1109:           sum_diag += (*v)*(*v); v++;
1110: #endif
1111:         }
1112:         jmin++;
1113:       }
1114:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1115:         for (i=0; i<bs2; i++){
1116: #if defined(PETSC_USE_COMPLEX)
1117:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1118: #else
1119:           sum_off += (*v)*(*v); v++;
1120: #endif  
1121:         }
1122:       }
1123:     }
1124:     *norm = sqrt(sum_diag + 2*sum_off);

1126:   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1127:     PetscMalloc(mbs*sizeof(int),&il);
1128:     PetscMalloc(mbs*sizeof(int),&jl);
1129:     PetscMalloc(bs*sizeof(PetscReal),&sum);
1130:     for (i=0; i<mbs; i++) {
1131:       jl[i] = mbs; il[0] = 0;
1132:     }

1134:     *norm = 0.0;
1135:     for (k=0; k<mbs; k++) { /* k_th block row */
1136:       for (j=0; j<bs; j++) sum[j]=0.0;

1138:       /*-- col sum --*/
1139:       i = jl[k]; /* first |A(i,k)| to be added */
1140:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1141:                   at step k */
1142:       while (i<mbs){
1143:         nexti = jl[i];  /* next block row to be added */
1144:         ik    = il[i];  /* block index of A(i,k) in the array a */
1145:         for (j=0; j<bs; j++){
1146:           v = a->a + ik*bs2 + j*bs;
1147:           for (k1=0; k1<bs; k1++) {
1148:             sum[j] += PetscAbsScalar(*v); v++;
1149:           }
1150:         }
1151:         /* update il, jl */
1152:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1153:         jmax = a->i[i+1];
1154:         if (jmin < jmax){
1155:           il[i] = jmin;
1156:           j   = a->j[jmin];
1157:           jl[i] = jl[j]; jl[j]=i;
1158:         }
1159:         i = nexti;
1160:       }
1161: 
1162:       /*-- row sum --*/
1163:       jmin = a->i[k]; jmax = a->i[k+1];
1164:       for (i=jmin; i<jmax; i++) {
1165:         for (j=0; j<bs; j++){
1166:           v = a->a + i*bs2 + j;
1167:           for (k1=0; k1<bs; k1++){
1168:             sum[j] += PetscAbsScalar(*v);
1169:             v   += bs;
1170:           }
1171:         }
1172:       }
1173:       /* add k_th block row to il, jl */
1174:       col = aj+jmin;
1175:       if (*col == k) jmin++;
1176:       if (jmin < jmax){
1177:         il[k] = jmin;
1178:         j   = a->j[jmin];
1179:         jl[k] = jl[j]; jl[j] = k;
1180:       }
1181:       for (j=0; j<bs; j++){
1182:         if (sum[j] > *norm) *norm = sum[j];
1183:       }
1184:     }
1185:     PetscFree(il);
1186:     PetscFree(jl);
1187:     PetscFree(sum);
1188:   } else {
1189:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1190:   }
1191:   return(0);
1192: }

1194: int MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1195: {
1196:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;
1197:   int          ierr;
1198:   PetscTruth   flag;

1201:   PetscTypeCompare((PetscObject)A,MATSEQBAIJ,&flag);
1202:   if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");

1204:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1205:   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->s_nz != b->s_nz)) {
1206:     *flg = PETSC_FALSE;
1207:     return(0);
1208:   }
1209: 
1210:   /* if the a->i are the same */
1211:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);
1212:   if (*flg == PETSC_FALSE) {
1213:     return(0);
1214:   }
1215: 
1216:   /* if a->j are the same */
1217:   PetscMemcmp(a->j,b->j,(a->s_nz)*sizeof(int),flg);
1218:   if (*flg == PETSC_FALSE) {
1219:     return(0);
1220:   }
1221:   /* if a->a are the same */
1222:   PetscMemcmp(a->a,b->a,(a->s_nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);
1223:   return(0);
1224: 
1225: }

1227: int MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1228: {
1229:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1230:   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1231:   PetscScalar  *x,zero = 0.0;
1232:   MatScalar    *aa,*aa_j;

1235:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1236:   bs   = a->bs;
1237:   aa   = a->a;
1238:   ai   = a->i;
1239:   aj   = a->j;
1240:   ambs = a->mbs;
1241:   bs2  = a->bs2;

1243:   VecSet(&zero,v);
1244:   VecGetArray(v,&x);
1245:   VecGetLocalSize(v,&n);
1246:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1247:   for (i=0; i<ambs; i++) {
1248:     j=ai[i];
1249:     if (aj[j] == i) {             /* if this is a diagonal element */
1250:       row  = i*bs;
1251:       aa_j = aa + j*bs2;
1252:       for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1253:     }
1254:   }
1255:   VecRestoreArray(v,&x);
1256:   return(0);
1257: }

1259: int MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1260: {
1261:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1262:   PetscScalar  *l,*r,x,*li,*ri;
1263:   MatScalar    *aa,*v;
1264:   int          ierr,i,j,k,lm,rn,M,m,*ai,*aj,mbs,tmp,bs,bs2;

1267:   ai  = a->i;
1268:   aj  = a->j;
1269:   aa  = a->a;
1270:   m   = A->m;
1271:   bs  = a->bs;
1272:   mbs = a->mbs;
1273:   bs2 = a->bs2;

1275:   if (ll != rr) {
1276:     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be samen");
1277:   }
1278:   if (ll) {
1279:     VecGetArray(ll,&l);
1280:     VecGetLocalSize(ll,&lm);
1281:     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1282:     for (i=0; i<mbs; i++) { /* for each block row */
1283:       M  = ai[i+1] - ai[i];
1284:       li = l + i*bs;
1285:       v  = aa + bs2*ai[i];
1286:       for (j=0; j<M; j++) { /* for each block */
1287:         for (k=0; k<bs2; k++) {
1288:           (*v++) *= li[k%bs];
1289:         }
1290: #ifdef CONT
1291:         /* will be used to replace the above loop */
1292:         ri = l + bs*aj[ai[i]+j];
1293:         for (k=0; k<bs; k++) { /* column value */
1294:           x = ri[k];
1295:           for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1296:         }
1297: #endif

1299:       }
1300:     }
1301:     VecRestoreArray(ll,&l);
1302:     PetscLogFlops(2*a->s_nz);
1303:   }
1304:   /* will be deleted */
1305:   if (rr) {
1306:     VecGetArray(rr,&r);
1307:     VecGetLocalSize(rr,&rn);
1308:     if (rn != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1309:     for (i=0; i<mbs; i++) { /* for each block row */
1310:       M  = ai[i+1] - ai[i];
1311:       v  = aa + bs2*ai[i];
1312:       for (j=0; j<M; j++) { /* for each block */
1313:         ri = r + bs*aj[ai[i]+j];
1314:         for (k=0; k<bs; k++) {
1315:           x = ri[k];
1316:           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1317:         }
1318:       }
1319:     }
1320:     VecRestoreArray(rr,&r);
1321:     PetscLogFlops(a->s_nz);
1322:   }
1323:   return(0);
1324: }

1326: int MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1327: {
1328:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1331:   info->rows_global    = (double)A->m;
1332:   info->columns_global = (double)A->m;
1333:   info->rows_local     = (double)A->m;
1334:   info->columns_local  = (double)A->m;
1335:   info->block_size     = a->bs2;
1336:   info->nz_allocated   = a->s_maxnz; /*num. of nonzeros in upper triangular part */
1337:   info->nz_used        = a->bs2*a->s_nz; /*num. of nonzeros in upper triangular part */
1338:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1339:   info->assemblies   = A->num_ass;
1340:   info->mallocs      = a->reallocs;
1341:   info->memory       = A->mem;
1342:   if (A->factor) {
1343:     info->fill_ratio_given  = A->info.fill_ratio_given;
1344:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1345:     info->factor_mallocs    = A->info.factor_mallocs;
1346:   } else {
1347:     info->fill_ratio_given  = 0;
1348:     info->fill_ratio_needed = 0;
1349:     info->factor_mallocs    = 0;
1350:   }
1351:   return(0);
1352: }


1355: int MatZeroEntries_SeqSBAIJ(Mat A)
1356: {
1357:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1358:   int         ierr;

1361:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1362:   return(0);
1363: }

1365: int MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1366: {
1367:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1368:   int          ierr,i,j,n,row,col,bs,*ai,*aj,mbs;
1369:   PetscReal    atmp;
1370:   MatScalar    *aa;
1371:   PetscScalar  zero = 0.0,*x;
1372:   int          ncols,brow,bcol,krow,kcol;

1375:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1376:   bs   = a->bs;
1377:   aa   = a->a;
1378:   ai   = a->i;
1379:   aj   = a->j;
1380:   mbs = a->mbs;

1382:   VecSet(&zero,v);
1383:   VecGetArray(v,&x);
1384:   VecGetLocalSize(v,&n);
1385:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1386:   for (i=0; i<mbs; i++) {
1387:     ncols = ai[1] - ai[0]; ai++;
1388:     brow  = bs*i;
1389:     for (j=0; j<ncols; j++){
1390:       bcol = bs*(*aj);
1391:       for (kcol=0; kcol<bs; kcol++){
1392:         col = bcol + kcol;      /* col index */
1393:         for (krow=0; krow<bs; krow++){
1394:           atmp = PetscAbsScalar(*aa); aa++;
1395:           row = brow + krow;    /* row index */
1396:           /* printf("val[%d,%d]: %gn",row,col,atmp); */
1397:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1398:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1399:         }
1400:       }
1401:       aj++;
1402:     }
1403:   }
1404:   VecRestoreArray(v,&x);
1405:   return(0);
1406: }