Actual source code: sbaij2.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/inline/spops.h
 5:  #include src/inline/ilu.h
 6:  #include petscbt.h
 7:  #include src/mat/impls/sbaij/seq/sbaij.h

 11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
 12: {
 13:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
 15:   PetscInt       brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
 16:   PetscBT        table,table0;

 19:   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 20:   mbs = a->mbs;
 21:   ai  = a->i;
 22:   aj  = a->j;
 23:   bs  = A->bs;
 24:   PetscBTCreate(mbs,table);
 25:   PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
 26:   PetscMalloc((A->m+1)*sizeof(PetscInt),&nidx2);
 27:   PetscBTCreate(mbs,table0);

 29:   for (i=0; i<is_max; i++) { /* for each is */
 30:     isz  = 0;
 31:     PetscBTMemzero(mbs,table);
 32: 
 33:     /* Extract the indices, assume there can be duplicate entries */
 34:     ISGetIndices(is[i],&idx);
 35:     ISGetLocalSize(is[i],&n);

 37:     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
 38:     bcol_max = 0;
 39:     for (j=0; j<n ; ++j){
 40:       brow = idx[j]/bs; /* convert the indices into block indices */
 41:       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 42:       if(!PetscBTLookupSet(table,brow)) {
 43:         nidx[isz++] = brow;
 44:         if (bcol_max < brow) bcol_max = brow;
 45:       }
 46:     }
 47:     ISRestoreIndices(is[i],&idx);
 48:     ISDestroy(is[i]);
 49: 
 50:     k = 0;
 51:     for (j=0; j<ov; j++){ /* for each overlap */
 52:       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 53:       PetscBTMemzero(mbs,table0);
 54:       for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }

 56:       n = isz;  /* length of the updated is[i] */
 57:       for (brow=0; brow<mbs; brow++){
 58:         start = ai[brow]; end   = ai[brow+1];
 59:         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
 60:           for (l = start; l<end ; l++){
 61:             bcol = aj[l];
 62:             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
 63:           }
 64:           k++;
 65:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 66:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 67:           for (l = start; l<end ; l++){
 68:             bcol = aj[l];
 69:             if (bcol > bcol_max) break;
 70:             if (PetscBTLookup(table0,bcol)){
 71:               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
 72:               break; /* for l = start; l<end ; l++) */
 73:             }
 74:           }
 75:         }
 76:       }
 77:     } /* for each overlap */

 79:     /* expand the Index Set */
 80:     for (j=0; j<isz; j++) {
 81:       for (k=0; k<bs; k++)
 82:         nidx2[j*bs+k] = nidx[j]*bs+k;
 83:     }
 84:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
 85:   }
 86:   PetscBTDestroy(table);
 87:   PetscFree(nidx);
 88:   PetscFree(nidx2);
 89:   PetscBTDestroy(table0);
 90:   return(0);
 91: }

 95: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
 96: {
 97:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
 99:   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
100:   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
101:   PetscInt       *irow,nrows,*ssmap,bs=A->bs,bs2=a->bs2;
102:   PetscInt       *aj = a->j,*ai = a->i;
103:   MatScalar      *mat_a;
104:   Mat            C;
105:   PetscTruth     flag;

108:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
109:   ISSorted(iscol,(PetscTruth*)&i);
110:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

112:   ISGetIndices(isrow,&irow);
113:   ISGetSize(isrow,&nrows);
114: 
115:   PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
116:   ssmap = smap;
117:   PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
118:   PetscMemzero(smap,oldcols*sizeof(PetscInt));
119:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
120:   /* determine lens of each row */
121:   for (i=0; i<nrows; i++) {
122:     kstart  = ai[irow[i]];
123:     kend    = kstart + a->ilen[irow[i]];
124:     lens[i] = 0;
125:       for (k=kstart; k<kend; k++) {
126:         if (ssmap[aj[k]]) {
127:           lens[i]++;
128:         }
129:       }
130:     }
131:   /* Create and fill new matrix */
132:   if (scall == MAT_REUSE_MATRIX) {
133:     c = (Mat_SeqSBAIJ *)((*B)->data);

135:     if (c->mbs!=nrows || (*B)->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
136:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
137:     if (!flag) {
138:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
139:     }
140:     PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
141:     C = *B;
142:   } else {
143:     MatCreate(A->comm,&C);
144:     MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
145:     MatSetType(C,A->type_name);
146:     MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
147:   }
148:   c = (Mat_SeqSBAIJ *)(C->data);
149:   for (i=0; i<nrows; i++) {
150:     row    = irow[i];
151:     kstart = ai[row];
152:     kend   = kstart + a->ilen[row];
153:     mat_i  = c->i[i];
154:     mat_j  = c->j + mat_i;
155:     mat_a  = c->a + mat_i*bs2;
156:     mat_ilen = c->ilen + i;
157:     for (k=kstart; k<kend; k++) {
158:       if ((tcol=ssmap[a->j[k]])) {
159:         *mat_j++ = tcol - 1;
160:         PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
161:         mat_a   += bs2;
162:         (*mat_ilen)++;
163:       }
164:     }
165:   }
166: 
167:   /* Free work space */
168:   PetscFree(smap);
169:   PetscFree(lens);
170:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
171:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
172: 
173:   ISRestoreIndices(isrow,&irow);
174:   *B = C;
175:   return(0);
176: }

180: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
181: {
182:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
183:   IS             is1;
185:   PetscInt       *vary,*iary,*irow,nrows,i,bs=A->bs,count;

188:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
189: 
190:   ISGetIndices(isrow,&irow);
191:   ISGetSize(isrow,&nrows);
192: 
193:   /* Verify if the indices corespond to each element in a block 
194:    and form the IS with compressed IS */
195:   PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
196:   iary = vary + a->mbs;
197:   PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
198:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
199: 
200:   count = 0;
201:   for (i=0; i<a->mbs; i++) {
202:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
203:     if (vary[i]==bs) iary[count++] = i;
204:   }
205:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
206: 
207:   ISRestoreIndices(isrow,&irow);
208:   PetscFree(vary);

210:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
211:   ISDestroy(is1);
212:   return(0);
213: }

217: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
218: {
220:   PetscInt       i;

223:   if (scall == MAT_INITIAL_MATRIX) {
224:     PetscMalloc((n+1)*sizeof(Mat),B);
225:   }

227:   for (i=0; i<n; i++) {
228:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
229:   }
230:   return(0);
231: }

233: /* -------------------------------------------------------*/
234: /* Should check that shapes of vectors and matrices match */
235: /* -------------------------------------------------------*/
236:  #include petscblaslapack.h

240: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
241: {
242:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
243:   PetscScalar    *x,*z,*xb,x1,zero=0.0;
244:   MatScalar      *v;
246:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

249:   VecSet(zz,zero);
250:   VecGetArray(xx,&x);
251:   VecGetArray(zz,&z);

253:   v  = a->a;
254:   xb = x;
255: 
256:   for (i=0; i<mbs; i++) {
257:     n  = ai[1] - ai[0];  /* length of i_th row of A */
258:     x1 = xb[0];
259:     ib = aj + *ai;
260:     jmin = 0;
261:     if (*ib == i) {      /* (diag of A)*x */
262:       z[i] += *v++ * x[*ib++];
263:       jmin++;
264:     }
265:     for (j=jmin; j<n; j++) {
266:       cval    = *ib;
267:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
268:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
269:     }
270:     xb++; ai++;
271:   }

273:   VecRestoreArray(xx,&x);
274:   VecRestoreArray(zz,&z);
275:   PetscLogFlops(2*(a->nz*2 - A->m) - A->m);  /* nz = (nz+m)/2 */
276:   return(0);
277: }

281: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
282: {
283:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
284:   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
285:   MatScalar      *v;
287:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

290:   VecSet(zz,zero);
291:   VecGetArray(xx,&x);
292:   VecGetArray(zz,&z);
293: 
294:   v     = a->a;
295:   xb = x;

297:   for (i=0; i<mbs; i++) {
298:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
299:     x1 = xb[0]; x2 = xb[1];
300:     ib = aj + *ai;
301:     jmin = 0;
302:     if (*ib == i){     /* (diag of A)*x */
303:       z[2*i]   += v[0]*x1 + v[2]*x2;
304:       z[2*i+1] += v[2]*x1 + v[3]*x2;
305:       v += 4; jmin++;
306:     }
307:     for (j=jmin; j<n; j++) {
308:       /* (strict lower triangular part of A)*x  */
309:       cval       = ib[j]*2;
310:       z[cval]     += v[0]*x1 + v[1]*x2;
311:       z[cval+1]   += v[2]*x1 + v[3]*x2;
312:       /* (strict upper triangular part of A)*x  */
313:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
314:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
315:       v  += 4;
316:     }
317:     xb +=2; ai++;
318:   }

320:   VecRestoreArray(xx,&x);
321:   VecRestoreArray(zz,&z);
322:   PetscLogFlops(8*(a->nz*2 - A->m) - A->m);
323:   return(0);
324: }

328: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
329: {
330:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
331:   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
332:   MatScalar      *v;
334:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

337:   VecSet(zz,zero);
338:   VecGetArray(xx,&x);
339:   VecGetArray(zz,&z);
340: 
341:   v    = a->a;
342:   xb   = x;

344:   for (i=0; i<mbs; i++) {
345:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
346:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
347:     ib = aj + *ai;
348:     jmin = 0;
349:     if (*ib == i){     /* (diag of A)*x */
350:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
351:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
352:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
353:       v += 9; jmin++;
354:     }
355:     for (j=jmin; j<n; j++) {
356:       /* (strict lower triangular part of A)*x  */
357:       cval       = ib[j]*3;
358:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
359:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
360:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
361:       /* (strict upper triangular part of A)*x  */
362:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
363:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
364:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
365:       v  += 9;
366:     }
367:     xb +=3; ai++;
368:   }

370:   VecRestoreArray(xx,&x);
371:   VecRestoreArray(zz,&z);
372:   PetscLogFlops(18*(a->nz*2 - A->m) - A->m);
373:   return(0);
374: }

378: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
379: {
380:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
381:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
382:   MatScalar      *v;
384:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

387:   VecSet(zz,zero);
388:   VecGetArray(xx,&x);
389:   VecGetArray(zz,&z);
390: 
391:   v     = a->a;
392:   xb = x;

394:   for (i=0; i<mbs; i++) {
395:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
396:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
397:     ib = aj + *ai;
398:     jmin = 0;
399:     if (*ib == i){     /* (diag of A)*x */
400:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
401:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
402:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
403:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
404:       v += 16; jmin++;
405:     }
406:     for (j=jmin; j<n; j++) {
407:       /* (strict lower triangular part of A)*x  */
408:       cval       = ib[j]*4;
409:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
410:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
411:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
412:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
413:       /* (strict upper triangular part of A)*x  */
414:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
415:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
416:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
417:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
418:       v  += 16;
419:     }
420:     xb +=4; ai++;
421:   }

423:   VecRestoreArray(xx,&x);
424:   VecRestoreArray(zz,&z);
425:   PetscLogFlops(32*(a->nz*2 - A->m) - A->m);
426:   return(0);
427: }

431: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
432: {
433:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
434:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
435:   MatScalar      *v;
437:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

440:   VecSet(zz,zero);
441:   VecGetArray(xx,&x);
442:   VecGetArray(zz,&z);
443: 
444:   v     = a->a;
445:   xb = x;

447:   for (i=0; i<mbs; i++) {
448:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
449:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
450:     ib = aj + *ai;
451:     jmin = 0;
452:     if (*ib == i){      /* (diag of A)*x */
453:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
454:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
455:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
456:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
457:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
458:       v += 25; jmin++;
459:     }
460:     for (j=jmin; j<n; j++) {
461:       /* (strict lower triangular part of A)*x  */
462:       cval       = ib[j]*5;
463:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
464:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
465:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
466:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
467:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
468:       /* (strict upper triangular part of A)*x  */
469:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
470:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
471:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
472:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
473:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
474:       v  += 25;
475:     }
476:     xb +=5; ai++;
477:   }

479:   VecRestoreArray(xx,&x);
480:   VecRestoreArray(zz,&z);
481:   PetscLogFlops(50*(a->nz*2 - A->m) - A->m);
482:   return(0);
483: }


488: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
489: {
490:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
491:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
492:   MatScalar      *v;
494:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

497:   VecSet(zz,zero);
498:   VecGetArray(xx,&x);
499:   VecGetArray(zz,&z);
500: 
501:   v     = a->a;
502:   xb = x;

504:   for (i=0; i<mbs; i++) {
505:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
506:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
507:     ib = aj + *ai;
508:     jmin = 0;
509:     if (*ib == i){      /* (diag of A)*x */
510:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
511:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
512:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
513:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
514:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
515:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
516:       v += 36; jmin++;
517:     }
518:     for (j=jmin; j<n; j++) {
519:       /* (strict lower triangular part of A)*x  */
520:       cval       = ib[j]*6;
521:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
522:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
523:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
524:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
525:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
526:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
527:       /* (strict upper triangular part of A)*x  */
528:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
529:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
530:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
531:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
532:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
533:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
534:       v  += 36;
535:     }
536:     xb +=6; ai++;
537:   }

539:   VecRestoreArray(xx,&x);
540:   VecRestoreArray(zz,&z);
541:   PetscLogFlops(72*(a->nz*2 - A->m) - A->m);
542:   return(0);
543: }
546: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
547: {
548:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
549:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
550:   MatScalar      *v;
552:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

555:   VecSet(zz,zero);
556:   VecGetArray(xx,&x);
557:   VecGetArray(zz,&z);
558: 
559:   v     = a->a;
560:   xb = x;

562:   for (i=0; i<mbs; i++) {
563:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
564:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
565:     ib = aj + *ai;
566:     jmin = 0;
567:     if (*ib == i){      /* (diag of A)*x */
568:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
569:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
570:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
571:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
572:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
573:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
574:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
575:       v += 49; jmin++;
576:     }
577:     for (j=jmin; j<n; j++) {
578:       /* (strict lower triangular part of A)*x  */
579:       cval       = ib[j]*7;
580:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
581:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
582:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
583:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
584:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
585:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
586:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
587:       /* (strict upper triangular part of A)*x  */
588:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
589:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
590:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
591:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
592:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
593:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
594:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
595:       v  += 49;
596:     }
597:     xb +=7; ai++;
598:   }
599:   VecRestoreArray(xx,&x);
600:   VecRestoreArray(zz,&z);
601:   PetscLogFlops(98*(a->nz*2 - A->m) - A->m);
602:   return(0);
603: }

605: /*
606:     This will not work with MatScalar == float because it calls the BLAS
607: */
610: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
611: {
612:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
613:   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
614:   MatScalar      *v;
616:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;

619:   VecSet(zz,zero);
620:   VecGetArray(xx,&x); x_ptr=x;
621:   VecGetArray(zz,&z); z_ptr=z;

623:   aj   = a->j;
624:   v    = a->a;
625:   ii   = a->i;

627:   if (!a->mult_work) {
628:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
629:   }
630:   work = a->mult_work;
631: 
632:   for (i=0; i<mbs; i++) {
633:     n     = ii[1] - ii[0]; ncols = n*bs;
634:     workt = work; idx=aj+ii[0];

636:     /* upper triangular part */
637:     for (j=0; j<n; j++) {
638:       xb = x_ptr + bs*(*idx++);
639:       for (k=0; k<bs; k++) workt[k] = xb[k];
640:       workt += bs;
641:     }
642:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
643:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
644: 
645:     /* strict lower triangular part */
646:     idx = aj+ii[0];
647:     if (*idx == i){
648:       ncols -= bs; v += bs2; idx++; n--;
649:     }
650: 
651:     if (ncols > 0){
652:       workt = work;
653:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
654:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
655:       for (j=0; j<n; j++) {
656:         zb = z_ptr + bs*(*idx++);
657:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
658:         workt += bs;
659:       }
660:     }
661:     x += bs; v += n*bs2; z += bs; ii++;
662:   }
663: 
664:   VecRestoreArray(xx,&x);
665:   VecRestoreArray(zz,&z);
666:   PetscLogFlops(2*(a->nz*2 - A->m)*bs2 - A->m);
667:   return(0);
668: }

672: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
673: {
674:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
675:   PetscScalar    *x,*y,*z,*xb,x1;
676:   MatScalar      *v;
678:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

681:   VecGetArray(xx,&x);
682:   if (yy != xx) {
683:     VecGetArray(yy,&y);
684:   } else {
685:     y = x;
686:   }
687:   if (zz != yy) {
688:     /* VecCopy(yy,zz); */
689:     VecGetArray(zz,&z);
690:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
691:   } else {
692:     z = y;
693:   }

695:   v  = a->a;
696:   xb = x;

698:   for (i=0; i<mbs; i++) {
699:     n  = ai[1] - ai[0];  /* length of i_th row of A */
700:     x1 = xb[0];
701:     ib = aj + *ai;
702:     jmin = 0;
703:     if (*ib == i) {            /* (diag of A)*x */
704:       z[i] += *v++ * x[*ib++]; jmin++;
705:     }
706:     for (j=jmin; j<n; j++) {
707:       cval    = *ib;
708:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
709:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
710:     }
711:     xb++; ai++;
712:   }

714:   VecRestoreArray(xx,&x);
715:   if (yy != xx) VecRestoreArray(yy,&y);
716:   if (zz != yy) VecRestoreArray(zz,&z);
717: 
718:   PetscLogFlops(2*(a->nz*2 - A->m));
719:   return(0);
720: }

724: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
725: {
726:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
727:   PetscScalar    *x,*y,*z,*xb,x1,x2;
728:   MatScalar      *v;
730:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

733:   VecGetArray(xx,&x);
734:   if (yy != xx) {
735:     VecGetArray(yy,&y);
736:   } else {
737:     y = x;
738:   }
739:   if (zz != yy) {
740:     /* VecCopy(yy,zz); */
741:     VecGetArray(zz,&z);
742:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
743:   } else {
744:     z = y;
745:   }

747:   v     = a->a;
748:   xb = x;

750:   for (i=0; i<mbs; i++) {
751:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
752:     x1 = xb[0]; x2 = xb[1];
753:     ib = aj + *ai;
754:     jmin = 0;
755:     if (*ib == i){      /* (diag of A)*x */
756:       z[2*i]   += v[0]*x1 + v[2]*x2;
757:       z[2*i+1] += v[2]*x1 + v[3]*x2;
758:       v += 4; jmin++;
759:     }
760:     for (j=jmin; j<n; j++) {
761:       /* (strict lower triangular part of A)*x  */
762:       cval       = ib[j]*2;
763:       z[cval]     += v[0]*x1 + v[1]*x2;
764:       z[cval+1]   += v[2]*x1 + v[3]*x2;
765:       /* (strict upper triangular part of A)*x  */
766:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
767:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
768:       v  += 4;
769:     }
770:     xb +=2; ai++;
771:   }

773:   VecRestoreArray(xx,&x);
774:   if (yy != xx) VecRestoreArray(yy,&y);
775:   if (zz != yy) VecRestoreArray(zz,&z);

777:   PetscLogFlops(4*(a->nz*2 - A->m));
778:   return(0);
779: }

783: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
784: {
785:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
786:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3;
787:   MatScalar      *v;
789:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

792:   VecGetArray(xx,&x);
793:   if (yy != xx) {
794:     VecGetArray(yy,&y);
795:   } else {
796:     y = x;
797:   }
798:   if (zz != yy) {
799:     /* VecCopy(yy,zz); */
800:     VecGetArray(zz,&z);
801:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
802:   } else {
803:     z = y;
804:   }

806:   v     = a->a;
807:   xb = x;

809:   for (i=0; i<mbs; i++) {
810:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
811:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
812:     ib = aj + *ai;
813:     jmin = 0;
814:     if (*ib == i){     /* (diag of A)*x */
815:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
816:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
817:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
818:      v += 9; jmin++;
819:     }
820:     for (j=jmin; j<n; j++) {
821:       /* (strict lower triangular part of A)*x  */
822:       cval       = ib[j]*3;
823:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
824:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
825:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
826:       /* (strict upper triangular part of A)*x  */
827:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
828:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
829:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
830:       v  += 9;
831:     }
832:     xb +=3; ai++;
833:   }

835:   VecRestoreArray(xx,&x);
836:   if (yy != xx) VecRestoreArray(yy,&y);
837:   if (zz != yy) VecRestoreArray(zz,&z);

839:   PetscLogFlops(18*(a->nz*2 - A->m));
840:   return(0);
841: }

845: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
846: {
847:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
848:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4;
849:   MatScalar      *v;
851:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

854:   VecGetArray(xx,&x);
855:   if (yy != xx) {
856:     VecGetArray(yy,&y);
857:   } else {
858:     y = x;
859:   }
860:   if (zz != yy) {
861:     /* VecCopy(yy,zz); */
862:     VecGetArray(zz,&z);
863:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
864:   } else {
865:     z = y;
866:   }

868:   v     = a->a;
869:   xb = x;

871:   for (i=0; i<mbs; i++) {
872:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
873:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
874:     ib = aj + *ai;
875:     jmin = 0;
876:     if (*ib == i){      /* (diag of A)*x */
877:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
878:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
879:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
880:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
881:       v += 16; jmin++;
882:     }
883:     for (j=jmin; j<n; j++) {
884:       /* (strict lower triangular part of A)*x  */
885:       cval       = ib[j]*4;
886:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
887:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
888:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
889:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
890:       /* (strict upper triangular part of A)*x  */
891:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
892:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
893:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
894:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
895:       v  += 16;
896:     }
897:     xb +=4; ai++;
898:   }

900:   VecRestoreArray(xx,&x);
901:   if (yy != xx) VecRestoreArray(yy,&y);
902:   if (zz != yy) VecRestoreArray(zz,&z);

904:   PetscLogFlops(32*(a->nz*2 - A->m));
905:   return(0);
906: }

910: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
911: {
912:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
913:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4,x5;
914:   MatScalar      *v;
916:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

919:   VecGetArray(xx,&x);
920:   if (yy != xx) {
921:     VecGetArray(yy,&y);
922:   } else {
923:     y = x;
924:   }
925:   if (zz != yy) {
926:     /* VecCopy(yy,zz); */
927:     VecGetArray(zz,&z);
928:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
929:   } else {
930:     z = y;
931:   }

933:   v     = a->a;
934:   xb = x;

936:   for (i=0; i<mbs; i++) {
937:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
938:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
939:     ib = aj + *ai;
940:     jmin = 0;
941:     if (*ib == i){      /* (diag of A)*x */
942:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
943:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
944:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
945:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
946:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
947:       v += 25; jmin++;
948:     }
949:     for (j=jmin; j<n; j++) {
950:       /* (strict lower triangular part of A)*x  */
951:       cval       = ib[j]*5;
952:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
953:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
954:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
955:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
956:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
957:       /* (strict upper triangular part of A)*x  */
958:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
959:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
960:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
961:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
962:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
963:       v  += 25;
964:     }
965:     xb +=5; ai++;
966:   }

968:   VecRestoreArray(xx,&x);
969:   if (yy != xx) VecRestoreArray(yy,&y);
970:   if (zz != yy) VecRestoreArray(zz,&z);

972:   PetscLogFlops(50*(a->nz*2 - A->m));
973:   return(0);
974: }
977: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
978: {
979:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
980:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6;
981:   MatScalar      *v;
983:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

986:   VecGetArray(xx,&x);
987:   if (yy != xx) {
988:     VecGetArray(yy,&y);
989:   } else {
990:     y = x;
991:   }
992:   if (zz != yy) {
993:     VecGetArray(zz,&z);
994:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
995:   } else {
996:     z = y;
997:   }

999:   v     = a->a;
1000:   xb = x;

1002:   for (i=0; i<mbs; i++) {
1003:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1004:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
1005:     ib = aj + *ai;
1006:     jmin = 0;
1007:     if (*ib == i){     /* (diag of A)*x */
1008:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
1009:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
1010:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
1011:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
1012:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
1013:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1014:       v += 36; jmin++;
1015:     }
1016:     for (j=jmin; j<n; j++) {
1017:       /* (strict lower triangular part of A)*x  */
1018:       cval       = ib[j]*6;
1019:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
1020:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
1021:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
1022:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
1023:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
1024:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
1025:       /* (strict upper triangular part of A)*x  */
1026:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
1027:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
1028:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
1029:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
1030:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
1031:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
1032:       v  += 36;
1033:     }
1034:     xb +=6; ai++;
1035:   }

1037:   VecRestoreArray(xx,&x);
1038:   if (yy != xx) VecRestoreArray(yy,&y);
1039:   if (zz != yy) VecRestoreArray(zz,&z);

1041:   PetscLogFlops(72*(a->nz*2 - A->m));
1042:   return(0);
1043: }

1047: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1048: {
1049:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1050:   PetscScalar    *x,*y,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
1051:   MatScalar      *v;
1053:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

1056:   VecGetArray(xx,&x);
1057:   if (yy != xx) {
1058:     VecGetArray(yy,&y);
1059:   } else {
1060:     y = x;
1061:   }
1062:   if (zz != yy) {
1063:     VecGetArray(zz,&z);
1064:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1065:   } else {
1066:     z = y;
1067:   }

1069:   v     = a->a;
1070:   xb = x;

1072:   for (i=0; i<mbs; i++) {
1073:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1074:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1075:     ib = aj + *ai;
1076:     jmin = 0;
1077:     if (*ib == i){     /* (diag of A)*x */
1078:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1079:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1080:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1081:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1082:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1083:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1084:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1085:       v += 49; jmin++;
1086:     }
1087:     for (j=jmin; j<n; j++) {
1088:       /* (strict lower triangular part of A)*x  */
1089:       cval       = ib[j]*7;
1090:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1091:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1092:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1093:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1094:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1095:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1096:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1097:       /* (strict upper triangular part of A)*x  */
1098:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1099:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1100:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1101:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1102:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1103:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1104:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1105:       v  += 49;
1106:     }
1107:     xb +=7; ai++;
1108:   }

1110:   VecRestoreArray(xx,&x);
1111:   if (yy != xx) VecRestoreArray(yy,&y);
1112:   if (zz != yy) VecRestoreArray(zz,&z);

1114:   PetscLogFlops(98*(a->nz*2 - A->m));
1115:   return(0);
1116: }

1120: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1121: {
1122:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1123:   PetscScalar    *x,*x_ptr,*y,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1124:   MatScalar      *v;
1126:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->bs,j,n,bs2=a->bs2,ncols,k;

1129:   VecGetArray(xx,&x); x_ptr=x;
1130:   if (yy != xx) {
1131:     VecGetArray(yy,&y);
1132:   } else {
1133:     y = x;
1134:   }
1135:   if (zz != yy) {
1136:     VecGetArray(zz,&z); z_ptr=z;
1137:     PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));
1138:   } else {
1139:     z = y;
1140:   }

1142:   aj   = a->j;
1143:   v    = a->a;
1144:   ii   = a->i;

1146:   if (!a->mult_work) {
1147:     PetscMalloc((A->m+1)*sizeof(PetscScalar),&a->mult_work);
1148:   }
1149:   work = a->mult_work;
1150: 
1151: 
1152:   for (i=0; i<mbs; i++) {
1153:     n     = ii[1] - ii[0]; ncols = n*bs;
1154:     workt = work; idx=aj+ii[0];

1156:     /* upper triangular part */
1157:     for (j=0; j<n; j++) {
1158:       xb = x_ptr + bs*(*idx++);
1159:       for (k=0; k<bs; k++) workt[k] = xb[k];
1160:       workt += bs;
1161:     }
1162:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1163:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1165:     /* strict lower triangular part */
1166:     idx = aj+ii[0];
1167:     if (*idx == i){
1168:       ncols -= bs; v += bs2; idx++; n--;
1169:     }
1170:     if (ncols > 0){
1171:       workt = work;
1172:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1173:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1174:       for (j=0; j<n; j++) {
1175:         zb = z_ptr + bs*(*idx++);
1176:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1177:         workt += bs;
1178:       }
1179:     }

1181:     x += bs; v += n*bs2; z += bs; ii++;
1182:   }

1184:   VecRestoreArray(xx,&x);
1185:   if (yy != xx) VecRestoreArray(yy,&y);
1186:   if (zz != yy) VecRestoreArray(zz,&z);

1188:   PetscLogFlops(2*(a->nz*2 - A->m));
1189:   return(0);
1190: }

1194: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1195: {
1196:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1197:   PetscScalar oalpha = alpha;
1198:   PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;

1202:   BLASscal_(&totalnz,&oalpha,a->a,&one);
1203:   PetscLogFlops(totalnz);
1204:   return(0);
1205: }

1209: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1210: {
1211:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1212:   MatScalar      *v = a->a;
1213:   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1214:   PetscInt       i,j,k,bs = A->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1216:   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
1217: 
1219:   if (type == NORM_FROBENIUS) {
1220:     for (k=0; k<mbs; k++){
1221:       jmin = a->i[k]; jmax = a->i[k+1];
1222:       col  = aj + jmin;
1223:       if (*col == k){         /* diagonal block */
1224:         for (i=0; i<bs2; i++){
1225: #if defined(PETSC_USE_COMPLEX)
1226:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1227: #else
1228:           sum_diag += (*v)*(*v); v++;
1229: #endif
1230:         }
1231:         jmin++;
1232:       }
1233:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1234:         for (i=0; i<bs2; i++){
1235: #if defined(PETSC_USE_COMPLEX)
1236:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1237: #else
1238:           sum_off += (*v)*(*v); v++;
1239: #endif  
1240:         }
1241:       }
1242:     }
1243:     *norm = sqrt(sum_diag + 2*sum_off);
1244:   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1245:     PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);
1246:     jl   = il + mbs;
1247:     sum  = (PetscReal*)(jl + mbs);
1248:     for (i=0; i<mbs; i++) jl[i] = mbs;
1249:     il[0] = 0;

1251:     *norm = 0.0;
1252:     for (k=0; k<mbs; k++) { /* k_th block row */
1253:       for (j=0; j<bs; j++) sum[j]=0.0;
1254:       /*-- col sum --*/
1255:       i = jl[k]; /* first |A(i,k)| to be added */
1256:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1257:                   at step k */
1258:       while (i<mbs){
1259:         nexti = jl[i];  /* next block row to be added */
1260:         ik    = il[i];  /* block index of A(i,k) in the array a */
1261:         for (j=0; j<bs; j++){
1262:           v = a->a + ik*bs2 + j*bs;
1263:           for (k1=0; k1<bs; k1++) {
1264:             sum[j] += PetscAbsScalar(*v); v++;
1265:           }
1266:         }
1267:         /* update il, jl */
1268:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1269:         jmax = a->i[i+1];
1270:         if (jmin < jmax){
1271:           il[i] = jmin;
1272:           j   = a->j[jmin];
1273:           jl[i] = jl[j]; jl[j]=i;
1274:         }
1275:         i = nexti;
1276:       }
1277:       /*-- row sum --*/
1278:       jmin = a->i[k]; jmax = a->i[k+1];
1279:       for (i=jmin; i<jmax; i++) {
1280:         for (j=0; j<bs; j++){
1281:           v = a->a + i*bs2 + j;
1282:           for (k1=0; k1<bs; k1++){
1283:             sum[j] += PetscAbsScalar(*v); v += bs;
1284:           }
1285:         }
1286:       }
1287:       /* add k_th block row to il, jl */
1288:       col = aj+jmin;
1289:       if (*col == k) jmin++;
1290:       if (jmin < jmax){
1291:         il[k] = jmin;
1292:         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1293:       }
1294:       for (j=0; j<bs; j++){
1295:         if (sum[j] > *norm) *norm = sum[j];
1296:       }
1297:     }
1298:     PetscFree(il);
1299:   } else {
1300:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1301:   }
1302:   return(0);
1303: }

1307: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1308: {
1309:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;


1314:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1315:   if ((A->m != B->m) || (A->n != B->n) || (A->bs != B->bs)|| (a->nz != b->nz)) {
1316:     *flg = PETSC_FALSE;
1317:     return(0);
1318:   }
1319: 
1320:   /* if the a->i are the same */
1321:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1322:   if (!*flg) {
1323:     return(0);
1324:   }
1325: 
1326:   /* if a->j are the same */
1327:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1328:   if (!*flg) {
1329:     return(0);
1330:   }
1331:   /* if a->a are the same */
1332:   PetscMemcmp(a->a,b->a,(a->nz)*(A->bs)*(A->bs)*sizeof(PetscScalar),flg);
1333:   return(0);
1334: }

1338: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1339: {
1340:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1342:   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1343:   PetscScalar    *x,zero = 0.0;
1344:   MatScalar      *aa,*aa_j;

1347:   bs   = A->bs;
1348:   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1349: 
1350:   aa   = a->a;
1351:   ai   = a->i;
1352:   aj   = a->j;
1353:   ambs = a->mbs;
1354:   bs2  = a->bs2;

1356:   VecSet(v,zero);
1357:   VecGetArray(v,&x);
1358:   VecGetLocalSize(v,&n);
1359:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1360:   for (i=0; i<ambs; i++) {
1361:     j=ai[i];
1362:     if (aj[j] == i) {             /* if this is a diagonal element */
1363:       row  = i*bs;
1364:       aa_j = aa + j*bs2;
1365:       if (A->factor && bs==1){
1366:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1367:       } else {
1368:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1369:       }
1370:     }
1371:   }
1372: 
1373:   VecRestoreArray(v,&x);
1374:   return(0);
1375: }

1379: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1380: {
1381:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1382:   PetscScalar    *l,x,*li,*ri;
1383:   MatScalar      *aa,*v;
1385:   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1386:   PetscTruth     flg;

1389:   if (ll != rr){
1390:     VecEqual(ll,rr,&flg);
1391:     if (!flg)
1392:       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1393:   }
1394:   if (!ll) return(0);
1395:   ai  = a->i;
1396:   aj  = a->j;
1397:   aa  = a->a;
1398:   m   = A->m;
1399:   bs  = A->bs;
1400:   mbs = a->mbs;
1401:   bs2 = a->bs2;

1403:   VecGetArray(ll,&l);
1404:   VecGetLocalSize(ll,&lm);
1405:   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1406:   for (i=0; i<mbs; i++) { /* for each block row */
1407:     M  = ai[i+1] - ai[i];
1408:     li = l + i*bs;
1409:     v  = aa + bs2*ai[i];
1410:     for (j=0; j<M; j++) { /* for each block */
1411:       ri = l + bs*aj[ai[i]+j];
1412:       for (k=0; k<bs; k++) {
1413:         x = ri[k];
1414:         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1415:       }
1416:     }
1417:   }
1418:   VecRestoreArray(ll,&l);
1419:   PetscLogFlops(2*a->nz);
1420:   return(0);
1421: }

1425: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1426: {
1427:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1430:   info->rows_global    = (double)A->m;
1431:   info->columns_global = (double)A->m;
1432:   info->rows_local     = (double)A->m;
1433:   info->columns_local  = (double)A->m;
1434:   info->block_size     = a->bs2;
1435:   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
1436:   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1437:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1438:   info->assemblies   = A->num_ass;
1439:   info->mallocs      = a->reallocs;
1440:   info->memory       = A->mem;
1441:   if (A->factor) {
1442:     info->fill_ratio_given  = A->info.fill_ratio_given;
1443:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1444:     info->factor_mallocs    = A->info.factor_mallocs;
1445:   } else {
1446:     info->fill_ratio_given  = 0;
1447:     info->fill_ratio_needed = 0;
1448:     info->factor_mallocs    = 0;
1449:   }
1450:   return(0);
1451: }


1456: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1457: {
1458:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1462:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1463:   return(0);
1464: }

1468: PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat A,Vec v)
1469: {
1470:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1472:   PetscInt i,j,n,row,col,bs,*ai,*aj,mbs;
1473:   PetscReal    atmp;
1474:   MatScalar    *aa;
1475:   PetscScalar  zero = 0.0,*x;
1476:   PetscInt          ncols,brow,bcol,krow,kcol;

1479:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1480:   bs   = A->bs;
1481:   aa   = a->a;
1482:   ai   = a->i;
1483:   aj   = a->j;
1484:   mbs = a->mbs;

1486:   VecSet(v,zero);
1487:   VecGetArray(v,&x);
1488:   VecGetLocalSize(v,&n);
1489:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1490:   for (i=0; i<mbs; i++) {
1491:     ncols = ai[1] - ai[0]; ai++;
1492:     brow  = bs*i;
1493:     for (j=0; j<ncols; j++){
1494:       bcol = bs*(*aj);
1495:       for (kcol=0; kcol<bs; kcol++){
1496:         col = bcol + kcol;      /* col index */
1497:         for (krow=0; krow<bs; krow++){
1498:           atmp = PetscAbsScalar(*aa); aa++;
1499:           row = brow + krow;    /* row index */
1500:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1501:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1502:         }
1503:       }
1504:       aj++;
1505:     }
1506:   }
1507:   VecRestoreArray(v,&x);
1508:   return(0);
1509: }