Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/inline/spops.h
 9:  #include petscsys.h

 11:  #include src/inline/ilu.h

 15: /*@C
 16:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 18:   Collective on Mat

 20:   Input Parameters:
 21: . mat - the matrix

 23:   Level: advanced
 24: @*/
 25: PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
 26: {
 27:   PetscErrorCode ierr,(*f)(Mat);

 31:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 32:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 34:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 35:   if (f) {
 36:     (*f)(mat);
 37:   } else {
 38:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 39:   }
 40:   return(0);
 41: }

 46: PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 47: {
 48:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 50:   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
 51:   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;

 54:   if (a->idiagvalid) return(0);
 55:   MatMarkDiagonal_SeqBAIJ(A);
 56:   diag_offset = a->diag;
 57:   if (!a->idiag) {
 58:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 59:   }
 60:   diag  = a->idiag;
 61:   mdiag = a->idiag+bs*bs*mbs;
 62:   /* factor and invert each block */
 63:   switch (bs){
 64:     case 2:
 65:       for (i=0; i<mbs; i++) {
 66:         odiag   = v + 4*diag_offset[i];
 67:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 68:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 69:         Kernel_A_gets_inverse_A_2(diag);
 70:         diag    += 4;
 71:         mdiag   += 4;
 72:       }
 73:       break;
 74:     case 3:
 75:       for (i=0; i<mbs; i++) {
 76:         odiag    = v + 9*diag_offset[i];
 77:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 78:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 79:         diag[8]  = odiag[8];
 80:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 81:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 82:         mdiag[8] = odiag[8];
 83:         Kernel_A_gets_inverse_A_3(diag);
 84:         diag    += 9;
 85:         mdiag   += 9;
 86:       }
 87:       break;
 88:     case 4:
 89:       for (i=0; i<mbs; i++) {
 90:         odiag  = v + 16*diag_offset[i];
 91:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
 92:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
 93:         Kernel_A_gets_inverse_A_4(diag);
 94:         diag  += 16;
 95:         mdiag += 16;
 96:       }
 97:       break;
 98:     case 5:
 99:       for (i=0; i<mbs; i++) {
100:         odiag = v + 25*diag_offset[i];
101:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103:         Kernel_A_gets_inverse_A_5(diag);
104:         diag  += 25;
105:         mdiag += 25;
106:       }
107:       break;
108:     default:
109:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110:   }
111:   a->idiagvalid = PETSC_TRUE;
112:   return(0);
113: }

118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121:   PetscScalar        *x,x1,x2,s1,s2;
122:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123:   PetscErrorCode     ierr;
124:   PetscInt           m = a->mbs,i,i2,nz,idx;
125:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

128:   its = its*lits;
129:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

135:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

137:   diag  = a->diag;
138:   idiag = a->idiag;
139:   VecGetArray(xx,&x);
140:   VecGetArray(bb,(PetscScalar**)&b);

142:   if (flag & SOR_ZERO_INITIAL_GUESS) {
143:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146:       i2     = 2;
147:       idiag += 4;
148:       for (i=1; i<m; i++) {
149:         v     = aa + 4*ai[i];
150:         vi    = aj + ai[i];
151:         nz    = diag[i] - ai[i];
152:         s1    = b[i2]; s2 = b[i2+1];
153:         while (nz--) {
154:           idx  = 2*(*vi++);
155:           x1   = x[idx]; x2 = x[1+idx];
156:           s1  -= v[0]*x1 + v[2]*x2;
157:           s2  -= v[1]*x1 + v[3]*x2;
158:           v   += 4;
159:         }
160:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162:         idiag   += 4;
163:         i2      += 2;
164:       }
165:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166:       PetscLogFlops(4*(a->nz));
167:     }
168:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170:       i2    = 0;
171:       mdiag = a->idiag+4*a->mbs;
172:       for (i=0; i<m; i++) {
173:         x1      = x[i2]; x2 = x[i2+1];
174:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176:         mdiag  += 4;
177:         i2     += 2;
178:       }
179:       PetscLogFlops(6*m);
180:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
182:     }
183:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184:       idiag   = a->idiag+4*a->mbs - 4;
185:       i2      = 2*m - 2;
186:       x1      = x[i2]; x2 = x[i2+1];
187:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189:       idiag -= 4;
190:       i2    -= 2;
191:       for (i=m-2; i>=0; i--) {
192:         v     = aa + 4*(diag[i]+1);
193:         vi    = aj + diag[i] + 1;
194:         nz    = ai[i+1] - diag[i] - 1;
195:         s1    = x[i2]; s2 = x[i2+1];
196:         while (nz--) {
197:           idx  = 2*(*vi++);
198:           x1   = x[idx]; x2 = x[1+idx];
199:           s1  -= v[0]*x1 + v[2]*x2;
200:           s2  -= v[1]*x1 + v[3]*x2;
201:           v   += 4;
202:         }
203:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205:         idiag   -= 4;
206:         i2      -= 2;
207:       }
208:       PetscLogFlops(4*(a->nz));
209:     }
210:   } else {
211:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212:   }
213:   VecRestoreArray(xx,&x);
214:   VecRestoreArray(bb,(PetscScalar**)&b);
215:   return(0);
216: }

220: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221: {
222:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225:   PetscErrorCode     ierr;
226:   PetscInt           m = a->mbs,i,i2,nz,idx;
227:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

230:   its = its*lits;
231:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

237:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

239:   diag  = a->diag;
240:   idiag = a->idiag;
241:   VecGetArray(xx,&x);
242:   VecGetArray(bb,(PetscScalar**)&b);

244:   if (flag & SOR_ZERO_INITIAL_GUESS) {
245:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249:       i2     = 3;
250:       idiag += 9;
251:       for (i=1; i<m; i++) {
252:         v     = aa + 9*ai[i];
253:         vi    = aj + ai[i];
254:         nz    = diag[i] - ai[i];
255:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256:         while (nz--) {
257:           idx  = 3*(*vi++);
258:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262:           v   += 9;
263:         }
264:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267:         idiag   += 9;
268:         i2      += 3;
269:       }
270:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271:       PetscLogFlops(9*(a->nz));
272:     }
273:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275:       i2    = 0;
276:       mdiag = a->idiag+9*a->mbs;
277:       for (i=0; i<m; i++) {
278:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282:         mdiag  += 9;
283:         i2     += 3;
284:       }
285:       PetscLogFlops(15*m);
286:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
288:     }
289:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290:       idiag   = a->idiag+9*a->mbs - 9;
291:       i2      = 3*m - 3;
292:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296:       idiag -= 9;
297:       i2    -= 3;
298:       for (i=m-2; i>=0; i--) {
299:         v     = aa + 9*(diag[i]+1);
300:         vi    = aj + diag[i] + 1;
301:         nz    = ai[i+1] - diag[i] - 1;
302:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303:         while (nz--) {
304:           idx  = 3*(*vi++);
305:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309:           v   += 9;
310:         }
311:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314:         idiag   -= 9;
315:         i2      -= 3;
316:       }
317:       PetscLogFlops(9*(a->nz));
318:     }
319:   } else {
320:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321:   }
322:   VecRestoreArray(xx,&x);
323:   VecRestoreArray(bb,(PetscScalar**)&b);
324:   return(0);
325: }

329: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330: {
331:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334:   PetscErrorCode     ierr;
335:   PetscInt           m = a->mbs,i,i2,nz,idx;
336:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

339:   its = its*lits;
340:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

346:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

348:   diag  = a->diag;
349:   idiag = a->idiag;
350:   VecGetArray(xx,&x);
351:   VecGetArray(bb,(PetscScalar**)&b);

353:   if (flag & SOR_ZERO_INITIAL_GUESS) {
354:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359:       i2     = 4;
360:       idiag += 16;
361:       for (i=1; i<m; i++) {
362:         v     = aa + 16*ai[i];
363:         vi    = aj + ai[i];
364:         nz    = diag[i] - ai[i];
365:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366:         while (nz--) {
367:           idx  = 4*(*vi++);
368:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373:           v   += 16;
374:         }
375:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379:         idiag   += 16;
380:         i2      += 4;
381:       }
382:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383:       PetscLogFlops(16*(a->nz));
384:     }
385:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387:       i2    = 0;
388:       mdiag = a->idiag+16*a->mbs;
389:       for (i=0; i<m; i++) {
390:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395:         mdiag  += 16;
396:         i2     += 4;
397:       }
398:       PetscLogFlops(28*m);
399:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
401:     }
402:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403:       idiag   = a->idiag+16*a->mbs - 16;
404:       i2      = 4*m - 4;
405:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410:       idiag -= 16;
411:       i2    -= 4;
412:       for (i=m-2; i>=0; i--) {
413:         v     = aa + 16*(diag[i]+1);
414:         vi    = aj + diag[i] + 1;
415:         nz    = ai[i+1] - diag[i] - 1;
416:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417:         while (nz--) {
418:           idx  = 4*(*vi++);
419:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424:           v   += 16;
425:         }
426:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430:         idiag   -= 16;
431:         i2      -= 4;
432:       }
433:       PetscLogFlops(16*(a->nz));
434:     }
435:   } else {
436:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437:   }
438:   VecRestoreArray(xx,&x);
439:   VecRestoreArray(bb,(PetscScalar**)&b);
440:   return(0);
441: }

445: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446: {
447:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449:   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450:   PetscErrorCode     ierr;
451:   PetscInt           m = a->mbs,i,i2,nz,idx;
452:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

455:   its = its*lits;
456:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459:   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

462:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

464:   diag  = a->diag;
465:   idiag = a->idiag;
466:   VecGetArray(xx,&x);
467:   VecGetArray(bb,(PetscScalar**)&b);

469:   if (flag & SOR_ZERO_INITIAL_GUESS) {
470:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476:       i2     = 5;
477:       idiag += 25;
478:       for (i=1; i<m; i++) {
479:         v     = aa + 25*ai[i];
480:         vi    = aj + ai[i];
481:         nz    = diag[i] - ai[i];
482:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483:         while (nz--) {
484:           idx  = 5*(*vi++);
485:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491:           v   += 25;
492:         }
493:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498:         idiag   += 25;
499:         i2      += 5;
500:       }
501:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502:       PetscLogFlops(25*(a->nz));
503:     }
504:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506:       i2    = 0;
507:       mdiag = a->idiag+25*a->mbs;
508:       for (i=0; i<m; i++) {
509:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515:         mdiag  += 25;
516:         i2     += 5;
517:       }
518:       PetscLogFlops(45*m);
519:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520:       PetscMemcpy(x,b,A->m*sizeof(PetscScalar));
521:     }
522:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523:       idiag   = a->idiag+25*a->mbs - 25;
524:       i2      = 5*m - 5;
525:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531:       idiag -= 25;
532:       i2    -= 5;
533:       for (i=m-2; i>=0; i--) {
534:         v     = aa + 25*(diag[i]+1);
535:         vi    = aj + diag[i] + 1;
536:         nz    = ai[i+1] - diag[i] - 1;
537:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538:         while (nz--) {
539:           idx  = 5*(*vi++);
540:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546:           v   += 25;
547:         }
548:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553:         idiag   -= 25;
554:         i2      -= 5;
555:       }
556:       PetscLogFlops(25*(a->nz));
557:     }
558:   } else {
559:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560:   }
561:   VecRestoreArray(xx,&x);
562:   VecRestoreArray(bb,(PetscScalar**)&b);
563:   return(0);
564: }

566: /*
567:     Special version for Fun3d sequential benchmark
568: */
569: #if defined(PETSC_HAVE_FORTRAN_CAPS)
570: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572: #define matsetvaluesblocked4_ matsetvaluesblocked4
573: #endif

578: void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579: {
580:   Mat               A = *AA;
581:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583:   PetscInt          *ai=a->i,*ailen=a->ilen;
584:   PetscInt          *aj=a->j,stepval;
585:   const PetscScalar *value = v;
586:   MatScalar         *ap,*aa = a->a,*bap;

589:   stepval = (n-1)*4;
590:   for (k=0; k<m; k++) { /* loop over added rows */
591:     row  = im[k];
592:     rp   = aj + ai[row];
593:     ap   = aa + 16*ai[row];
594:     nrow = ailen[row];
595:     low  = 0;
596:     for (l=0; l<n; l++) { /* loop over added columns */
597:       col = in[l];
598:       value = v + k*(stepval+4)*4 + l*4;
599:       low = 0; high = nrow;
600:       while (high-low > 7) {
601:         t = (low+high)/2;
602:         if (rp[t] > col) high = t;
603:         else             low  = t;
604:       }
605:       for (i=low; i<high; i++) {
606:         if (rp[i] > col) break;
607:         if (rp[i] == col) {
608:           bap  = ap +  16*i;
609:           for (ii=0; ii<4; ii++,value+=stepval) {
610:             for (jj=ii; jj<16; jj+=4) {
611:               bap[jj] += *value++;
612:             }
613:           }
614:           goto noinsert2;
615:         }
616:       }
617:       N = nrow++ - 1;
618:       /* shift up all the later entries in this row */
619:       for (ii=N; ii>=i; ii--) {
620:         rp[ii+1] = rp[ii];
621:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
622:       }
623:       if (N >= i) {
624:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
625:       }
626:       rp[i] = col;
627:       bap   = ap +  16*i;
628:       for (ii=0; ii<4; ii++,value+=stepval) {
629:         for (jj=ii; jj<16; jj+=4) {
630:           bap[jj] = *value++;
631:         }
632:       }
633:       noinsert2:;
634:       low = i;
635:     }
636:     ailen[row] = nrow;
637:   }
638:   PetscFunctionReturnVoid();
639: }

642: #if defined(PETSC_HAVE_FORTRAN_CAPS)
643: #define matsetvalues4_ MATSETVALUES4
644: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
645: #define matsetvalues4_ matsetvalues4
646: #endif

651: void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
652: {
653:   Mat         A = *AA;
654:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
655:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
656:   PetscInt    *ai=a->i,*ailen=a->ilen;
657:   PetscInt    *aj=a->j,brow,bcol;
658:   PetscInt    ridx,cidx;
659:   MatScalar   *ap,value,*aa=a->a,*bap;
660: 
662:   for (k=0; k<m; k++) { /* loop over added rows */
663:     row  = im[k]; brow = row/4;
664:     rp   = aj + ai[brow];
665:     ap   = aa + 16*ai[brow];
666:     nrow = ailen[brow];
667:     low  = 0;
668:     for (l=0; l<n; l++) { /* loop over added columns */
669:       col = in[l]; bcol = col/4;
670:       ridx = row % 4; cidx = col % 4;
671:       value = v[l + k*n];
672:       low = 0; high = nrow;
673:       while (high-low > 7) {
674:         t = (low+high)/2;
675:         if (rp[t] > bcol) high = t;
676:         else              low  = t;
677:       }
678:       for (i=low; i<high; i++) {
679:         if (rp[i] > bcol) break;
680:         if (rp[i] == bcol) {
681:           bap  = ap +  16*i + 4*cidx + ridx;
682:           *bap += value;
683:           goto noinsert1;
684:         }
685:       }
686:       N = nrow++ - 1;
687:       /* shift up all the later entries in this row */
688:       for (ii=N; ii>=i; ii--) {
689:         rp[ii+1] = rp[ii];
690:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
691:       }
692:       if (N>=i) {
693:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
694:       }
695:       rp[i]                    = bcol;
696:       ap[16*i + 4*cidx + ridx] = value;
697:       noinsert1:;
698:       low = i;
699:     }
700:     ailen[brow] = nrow;
701:   }
702:   PetscFunctionReturnVoid();
703: }

706: /*  UGLY, ugly, ugly
707:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
708:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
709:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
710:    converts the entries into single precision and then calls ..._MatScalar() to put them
711:    into the single precision data structures.
712: */
713: #if defined(PETSC_USE_MAT_SINGLE)
714: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
715: #else
716: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
717: #endif

719: #define CHUNKSIZE  10

721: /*
722:      Checks for missing diagonals
723: */
726: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
727: {
728:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
730:   PetscInt       *diag,*jj = a->j,i;

733:   MatMarkDiagonal_SeqBAIJ(A);
734:   diag = a->diag;
735:   for (i=0; i<a->mbs; i++) {
736:     if (jj[diag[i]] != i) {
737:       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
738:     }
739:   }
740:   return(0);
741: }

745: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
746: {
747:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
749:   PetscInt       i,j,*diag,m = a->mbs;

752:   if (a->diag) return(0);

754:   PetscMalloc((m+1)*sizeof(PetscInt),&diag);
755:   PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));
756:   for (i=0; i<m; i++) {
757:     diag[i] = a->i[i+1];
758:     for (j=a->i[i]; j<a->i[i+1]; j++) {
759:       if (a->j[j] == i) {
760:         diag[i] = j;
761:         break;
762:       }
763:     }
764:   }
765:   a->diag = diag;
766:   return(0);
767: }


770: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

774: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
775: {
776:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
778:   PetscInt       n = a->mbs,i;

781:   *nn = n;
782:   if (!ia) return(0);
783:   if (symmetric) {
784:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
785:   } else if (oshift == 1) {
786:     /* temporarily add 1 to i and j indices */
787:     PetscInt nz = a->i[n];
788:     for (i=0; i<nz; i++) a->j[i]++;
789:     for (i=0; i<n+1; i++) a->i[i]++;
790:     *ia = a->i; *ja = a->j;
791:   } else {
792:     *ia = a->i; *ja = a->j;
793:   }

795:   return(0);
796: }

800: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
801: {
802:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
804:   PetscInt       i,n = a->mbs;

807:   if (!ia) return(0);
808:   if (symmetric) {
809:     PetscFree(*ia);
810:     PetscFree(*ja);
811:   } else if (oshift == 1) {
812:     PetscInt nz = a->i[n]-1;
813:     for (i=0; i<nz; i++) a->j[i]--;
814:     for (i=0; i<n+1; i++) a->i[i]--;
815:   }
816:   return(0);
817: }

821: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
822: {
823:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

827: #if defined(PETSC_USE_LOG)
828:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
829: #endif
830:   MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);
831:   if (a->row) {
832:     ISDestroy(a->row);
833:   }
834:   if (a->col) {
835:     ISDestroy(a->col);
836:   }
837:   if (a->diag) {PetscFree(a->diag);}
838:   if (a->idiag) {PetscFree(a->idiag);}
839:   if (a->imax) {PetscFree2(a->imax,a->ilen);}
840:   if (a->solve_work) {PetscFree(a->solve_work);}
841:   if (a->mult_work) {PetscFree(a->mult_work);}
842:   if (a->icol) {ISDestroy(a->icol);}
843:   if (a->saved_values) {PetscFree(a->saved_values);}
844: #if defined(PETSC_USE_MAT_SINGLE)
845:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
846: #endif
847:   if (a->xtoy) {PetscFree(a->xtoy);}
848:   if (a->compressedrow.use){PetscFree(a->compressedrow.i);}

850:   PetscFree(a);

852:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
853:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
854:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
855:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
856:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
857:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
858:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
859:   return(0);
860: }

864: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
865: {
866:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

870:   switch (op) {
871:   case MAT_ROW_ORIENTED:
872:     a->roworiented    = PETSC_TRUE;
873:     break;
874:   case MAT_COLUMN_ORIENTED:
875:     a->roworiented    = PETSC_FALSE;
876:     break;
877:   case MAT_COLUMNS_SORTED:
878:     a->sorted         = PETSC_TRUE;
879:     break;
880:   case MAT_COLUMNS_UNSORTED:
881:     a->sorted         = PETSC_FALSE;
882:     break;
883:   case MAT_KEEP_ZEROED_ROWS:
884:     a->keepzeroedrows = PETSC_TRUE;
885:     break;
886:   case MAT_NO_NEW_NONZERO_LOCATIONS:
887:     a->nonew          = 1;
888:     break;
889:   case MAT_NEW_NONZERO_LOCATION_ERR:
890:     a->nonew          = -1;
891:     break;
892:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
893:     a->nonew          = -2;
894:     break;
895:   case MAT_YES_NEW_NONZERO_LOCATIONS:
896:     a->nonew          = 0;
897:     break;
898:   case MAT_ROWS_SORTED:
899:   case MAT_ROWS_UNSORTED:
900:   case MAT_YES_NEW_DIAGONALS:
901:   case MAT_IGNORE_OFF_PROC_ENTRIES:
902:   case MAT_USE_HASH_TABLE:
903:     PetscLogInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));
904:     break;
905:   case MAT_NO_NEW_DIAGONALS:
906:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
907:   case MAT_SYMMETRIC:
908:   case MAT_STRUCTURALLY_SYMMETRIC:
909:   case MAT_NOT_SYMMETRIC:
910:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
911:   case MAT_HERMITIAN:
912:   case MAT_NOT_HERMITIAN:
913:   case MAT_SYMMETRY_ETERNAL:
914:   case MAT_NOT_SYMMETRY_ETERNAL:
915:     break;
916:   default:
917:     SETERRQ(PETSC_ERR_SUP,"unknown option");
918:   }
919:   return(0);
920: }

924: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
925: {
926:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
928:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
929:   MatScalar      *aa,*aa_i;
930:   PetscScalar    *v_i;

933:   bs  = A->bs;
934:   ai  = a->i;
935:   aj  = a->j;
936:   aa  = a->a;
937:   bs2 = a->bs2;
938: 
939:   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
940: 
941:   bn  = row/bs;   /* Block number */
942:   bp  = row % bs; /* Block Position */
943:   M   = ai[bn+1] - ai[bn];
944:   *nz = bs*M;
945: 
946:   if (v) {
947:     *v = 0;
948:     if (*nz) {
949:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
950:       for (i=0; i<M; i++) { /* for each block in the block row */
951:         v_i  = *v + i*bs;
952:         aa_i = aa + bs2*(ai[bn] + i);
953:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
954:       }
955:     }
956:   }

958:   if (idx) {
959:     *idx = 0;
960:     if (*nz) {
961:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
962:       for (i=0; i<M; i++) { /* for each block in the block row */
963:         idx_i = *idx + i*bs;
964:         itmp  = bs*aj[ai[bn] + i];
965:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
966:       }
967:     }
968:   }
969:   return(0);
970: }

974: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
975: {

979:   if (idx) {if (*idx) {PetscFree(*idx);}}
980:   if (v)   {if (*v)   {PetscFree(*v);}}
981:   return(0);
982: }

986: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
987: {
988:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
989:   Mat            C;
991:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
992:   PetscInt       *rows,*cols,bs2=a->bs2;
993:   PetscScalar    *array;

996:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
997:   PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
998:   PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1000: #if defined(PETSC_USE_MAT_SINGLE)
1001:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1002:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1003: #else
1004:   array = a->a;
1005: #endif

1007:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1008:   MatCreate(A->comm,&C);
1009:   MatSetSizes(C,A->n,A->m,A->n,A->m);
1010:   MatSetType(C,A->type_name);
1011:   MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1012:   PetscFree(col);
1013:   PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1014:   cols = rows + bs;
1015:   for (i=0; i<mbs; i++) {
1016:     cols[0] = i*bs;
1017:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1018:     len = ai[i+1] - ai[i];
1019:     for (j=0; j<len; j++) {
1020:       rows[0] = (*aj++)*bs;
1021:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1022:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1023:       array += bs2;
1024:     }
1025:   }
1026:   PetscFree(rows);
1027: #if defined(PETSC_USE_MAT_SINGLE)
1028:   PetscFree(array);
1029: #endif
1030: 
1031:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1032:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1033: 
1034:   if (B) {
1035:     *B = C;
1036:   } else {
1037:     MatHeaderCopy(A,C);
1038:   }
1039:   return(0);
1040: }

1044: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1045: {
1046:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1048:   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1049:   int            fd;
1050:   PetscScalar    *aa;
1051:   FILE           *file;

1054:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1055:   PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);
1056:   col_lens[0] = MAT_FILE_COOKIE;

1058:   col_lens[1] = A->m;
1059:   col_lens[2] = A->n;
1060:   col_lens[3] = a->nz*bs2;

1062:   /* store lengths of each row and write (including header) to file */
1063:   count = 0;
1064:   for (i=0; i<a->mbs; i++) {
1065:     for (j=0; j<bs; j++) {
1066:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1067:     }
1068:   }
1069:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);
1070:   PetscFree(col_lens);

1072:   /* store column indices (zero start index) */
1073:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1074:   count = 0;
1075:   for (i=0; i<a->mbs; i++) {
1076:     for (j=0; j<bs; j++) {
1077:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1078:         for (l=0; l<bs; l++) {
1079:           jj[count++] = bs*a->j[k] + l;
1080:         }
1081:       }
1082:     }
1083:   }
1084:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1085:   PetscFree(jj);

1087:   /* store nonzero values */
1088:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1089:   count = 0;
1090:   for (i=0; i<a->mbs; i++) {
1091:     for (j=0; j<bs; j++) {
1092:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1093:         for (l=0; l<bs; l++) {
1094:           aa[count++] = a->a[bs2*k + l*bs + j];
1095:         }
1096:       }
1097:     }
1098:   }
1099:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1100:   PetscFree(aa);

1102:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1103:   if (file) {
1104:     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1105:   }
1106:   return(0);
1107: }

1111: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1112: {
1113:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1114:   PetscErrorCode    ierr;
1115:   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1116:   PetscViewerFormat format;

1119:   PetscViewerGetFormat(viewer,&format);
1120:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1121:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1122:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1123:     Mat aij;
1124:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1125:     MatView(aij,viewer);
1126:     MatDestroy(aij);
1127:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1128:      return(0);
1129:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1130:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1131:     for (i=0; i<a->mbs; i++) {
1132:       for (j=0; j<bs; j++) {
1133:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1134:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1135:           for (l=0; l<bs; l++) {
1136: #if defined(PETSC_USE_COMPLEX)
1137:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1138:               PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1139:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1140:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1141:               PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1142:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1143:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1145:             }
1146: #else
1147:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1148:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1149:             }
1150: #endif
1151:           }
1152:         }
1153:         PetscViewerASCIIPrintf(viewer,"\n");
1154:       }
1155:     }
1156:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1157:   } else {
1158:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1159:     for (i=0; i<a->mbs; i++) {
1160:       for (j=0; j<bs; j++) {
1161:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1162:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1163:           for (l=0; l<bs; l++) {
1164: #if defined(PETSC_USE_COMPLEX)
1165:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1166:               PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1167:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1168:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1169:               PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1170:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1171:             } else {
1172:               PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1173:             }
1174: #else
1175:             PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1176: #endif
1177:           }
1178:         }
1179:         PetscViewerASCIIPrintf(viewer,"\n");
1180:       }
1181:     }
1182:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1183:   }
1184:   PetscViewerFlush(viewer);
1185:   return(0);
1186: }

1190: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1191: {
1192:   Mat            A = (Mat) Aa;
1193:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1195:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1196:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1197:   MatScalar      *aa;
1198:   PetscViewer    viewer;


1202:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1203:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1205:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1207:   /* loop over matrix elements drawing boxes */
1208:   color = PETSC_DRAW_BLUE;
1209:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1210:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1211:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1212:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1213:       aa = a->a + j*bs2;
1214:       for (k=0; k<bs; k++) {
1215:         for (l=0; l<bs; l++) {
1216:           if (PetscRealPart(*aa++) >=  0.) continue;
1217:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1218:         }
1219:       }
1220:     }
1221:   }
1222:   color = PETSC_DRAW_CYAN;
1223:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1224:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1226:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1227:       aa = a->a + j*bs2;
1228:       for (k=0; k<bs; k++) {
1229:         for (l=0; l<bs; l++) {
1230:           if (PetscRealPart(*aa++) != 0.) continue;
1231:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1232:         }
1233:       }
1234:     }
1235:   }

1237:   color = PETSC_DRAW_RED;
1238:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1239:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1240:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1241:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1242:       aa = a->a + j*bs2;
1243:       for (k=0; k<bs; k++) {
1244:         for (l=0; l<bs; l++) {
1245:           if (PetscRealPart(*aa++) <= 0.) continue;
1246:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1247:         }
1248:       }
1249:     }
1250:   }
1251:   return(0);
1252: }

1256: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1257: {
1259:   PetscReal      xl,yl,xr,yr,w,h;
1260:   PetscDraw      draw;
1261:   PetscTruth     isnull;


1265:   PetscViewerDrawGetDraw(viewer,0,&draw);
1266:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1268:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1269:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1270:   xr += w;    yr += h;  xl = -w;     yl = -h;
1271:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1272:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1273:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1274:   return(0);
1275: }

1279: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1280: {
1282:   PetscTruth     iascii,isbinary,isdraw;

1285:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1286:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1287:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1288:   if (iascii){
1289:     MatView_SeqBAIJ_ASCII(A,viewer);
1290:   } else if (isbinary) {
1291:     MatView_SeqBAIJ_Binary(A,viewer);
1292:   } else if (isdraw) {
1293:     MatView_SeqBAIJ_Draw(A,viewer);
1294:   } else {
1295:     Mat B;
1296:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1297:     MatView(B,viewer);
1298:     MatDestroy(B);
1299:   }
1300:   return(0);
1301: }


1306: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1307: {
1308:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1309:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1310:   PetscInt    *ai = a->i,*ailen = a->ilen;
1311:   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1312:   MatScalar   *ap,*aa = a->a,zero = 0.0;

1315:   for (k=0; k<m; k++) { /* loop over rows */
1316:     row  = im[k]; brow = row/bs;
1317:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1318:     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1319:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1320:     nrow = ailen[brow];
1321:     for (l=0; l<n; l++) { /* loop over columns */
1322:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1323:       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1324:       col  = in[l] ;
1325:       bcol = col/bs;
1326:       cidx = col%bs;
1327:       ridx = row%bs;
1328:       high = nrow;
1329:       low  = 0; /* assume unsorted */
1330:       while (high-low > 5) {
1331:         t = (low+high)/2;
1332:         if (rp[t] > bcol) high = t;
1333:         else             low  = t;
1334:       }
1335:       for (i=low; i<high; i++) {
1336:         if (rp[i] > bcol) break;
1337:         if (rp[i] == bcol) {
1338:           *v++ = ap[bs2*i+bs*cidx+ridx];
1339:           goto finished;
1340:         }
1341:       }
1342:       *v++ = zero;
1343:       finished:;
1344:     }
1345:   }
1346:   return(0);
1347: }

1349: #if defined(PETSC_USE_MAT_SINGLE)
1352: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1353: {
1354:   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1356:   PetscInt       i,N = m*n*b->bs2;
1357:   MatScalar      *vsingle;

1360:   if (N > b->setvalueslen) {
1361:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
1362:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1363:     b->setvalueslen  = N;
1364:   }
1365:   vsingle = b->setvaluescopy;
1366:   for (i=0; i<N; i++) {
1367:     vsingle[i] = v[i];
1368:   }
1369:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1370:   return(0);
1371: }
1372: #endif


1377: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1378: {
1379:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1380:   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1381:   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1382:   PetscErrorCode  ierr;
1383:   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1384:   PetscTruth      roworiented=a->roworiented;
1385:   const MatScalar *value = v;
1386:   MatScalar       *ap,*aa = a->a,*bap;

1389:   if (roworiented) {
1390:     stepval = (n-1)*bs;
1391:   } else {
1392:     stepval = (m-1)*bs;
1393:   }
1394:   for (k=0; k<m; k++) { /* loop over added rows */
1395:     row  = im[k];
1396:     if (row < 0) continue;
1397: #if defined(PETSC_USE_DEBUG)  
1398:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1399: #endif
1400:     rp   = aj + ai[row];
1401:     ap   = aa + bs2*ai[row];
1402:     rmax = imax[row];
1403:     nrow = ailen[row];
1404:     low  = 0;
1405:     high = nrow;
1406:     for (l=0; l<n; l++) { /* loop over added columns */
1407:       if (in[l] < 0) continue;
1408: #if defined(PETSC_USE_DEBUG)  
1409:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1410: #endif
1411:       col = in[l];
1412:       if (roworiented) {
1413:         value = v + k*(stepval+bs)*bs + l*bs;
1414:       } else {
1415:         value = v + l*(stepval+bs)*bs + k*bs;
1416:       }
1417:       if (col <= lastcol) low = 0; else high = nrow;
1418:       lastcol = col;
1419:       while (high-low > 7) {
1420:         t = (low+high)/2;
1421:         if (rp[t] > col) high = t;
1422:         else             low  = t;
1423:       }
1424:       for (i=low; i<high; i++) {
1425:         if (rp[i] > col) break;
1426:         if (rp[i] == col) {
1427:           bap  = ap +  bs2*i;
1428:           if (roworiented) {
1429:             if (is == ADD_VALUES) {
1430:               for (ii=0; ii<bs; ii++,value+=stepval) {
1431:                 for (jj=ii; jj<bs2; jj+=bs) {
1432:                   bap[jj] += *value++;
1433:                 }
1434:               }
1435:             } else {
1436:               for (ii=0; ii<bs; ii++,value+=stepval) {
1437:                 for (jj=ii; jj<bs2; jj+=bs) {
1438:                   bap[jj] = *value++;
1439:                 }
1440:               }
1441:             }
1442:           } else {
1443:             if (is == ADD_VALUES) {
1444:               for (ii=0; ii<bs; ii++,value+=stepval) {
1445:                 for (jj=0; jj<bs; jj++) {
1446:                   *bap++ += *value++;
1447:                 }
1448:               }
1449:             } else {
1450:               for (ii=0; ii<bs; ii++,value+=stepval) {
1451:                 for (jj=0; jj<bs; jj++) {
1452:                   *bap++  = *value++;
1453:                 }
1454:               }
1455:             }
1456:           }
1457:           goto noinsert2;
1458:         }
1459:       }
1460:       if (nonew == 1) goto noinsert2;
1461:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1462:       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1463:       N = nrow++ - 1;
1464:       /* shift up all the later entries in this row */
1465:       for (ii=N; ii>=i; ii--) {
1466:         rp[ii+1] = rp[ii];
1467:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1468:       }
1469:       if (N >= i) {
1470:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1471:       }
1472:       rp[i] = col;
1473:       bap   = ap +  bs2*i;
1474:       if (roworiented) {
1475:         for (ii=0; ii<bs; ii++,value+=stepval) {
1476:           for (jj=ii; jj<bs2; jj+=bs) {
1477:             bap[jj] = *value++;
1478:           }
1479:         }
1480:       } else {
1481:         for (ii=0; ii<bs; ii++,value+=stepval) {
1482:           for (jj=0; jj<bs; jj++) {
1483:             *bap++  = *value++;
1484:           }
1485:         }
1486:       }
1487:       noinsert2:;
1488:       low = i;
1489:     }
1490:     ailen[row] = nrow;
1491:   }
1492:   return(0);
1493: }

1497: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1498: {
1499:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1500:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1501:   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1503:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1504:   MatScalar      *aa = a->a,*ap;
1505:   PetscReal      ratio=0.6;

1508:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1510:   if (m) rmax = ailen[0];
1511:   for (i=1; i<mbs; i++) {
1512:     /* move each row back by the amount of empty slots (fshift) before it*/
1513:     fshift += imax[i-1] - ailen[i-1];
1514:     rmax   = PetscMax(rmax,ailen[i]);
1515:     if (fshift) {
1516:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1517:       N = ailen[i];
1518:       for (j=0; j<N; j++) {
1519:         ip[j-fshift] = ip[j];
1520:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1521:       }
1522:     }
1523:     ai[i] = ai[i-1] + ailen[i-1];
1524:   }
1525:   if (mbs) {
1526:     fshift += imax[mbs-1] - ailen[mbs-1];
1527:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1528:   }
1529:   /* reset ilen and imax for each row */
1530:   for (i=0; i<mbs; i++) {
1531:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1532:   }
1533:   a->nz = ai[mbs];

1535:   /* diagonals may have moved, so kill the diagonal pointers */
1536:   a->idiagvalid = PETSC_FALSE;
1537:   if (fshift && a->diag) {
1538:     PetscFree(a->diag);
1539:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1540:     a->diag = 0;
1541:   }
1542:   PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));
1543:   PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));
1544:   PetscLogInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));
1545:   a->reallocs          = 0;
1546:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1548:   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1549:   if (a->compressedrow.use){
1550:     Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1551:   }

1553:   A->same_nonzero = PETSC_TRUE;
1554:   return(0);
1555: }

1557: /* 
1558:    This function returns an array of flags which indicate the locations of contiguous
1559:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1560:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1561:    Assume: sizes should be long enough to hold all the values.
1562: */
1565: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1566: {
1567:   PetscInt   i,j,k,row;
1568:   PetscTruth flg;

1571:   for (i=0,j=0; i<n; j++) {
1572:     row = idx[i];
1573:     if (row%bs!=0) { /* Not the begining of a block */
1574:       sizes[j] = 1;
1575:       i++;
1576:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1577:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1578:       i++;
1579:     } else { /* Begining of the block, so check if the complete block exists */
1580:       flg = PETSC_TRUE;
1581:       for (k=1; k<bs; k++) {
1582:         if (row+k != idx[i+k]) { /* break in the block */
1583:           flg = PETSC_FALSE;
1584:           break;
1585:         }
1586:       }
1587:       if (flg) { /* No break in the bs */
1588:         sizes[j] = bs;
1589:         i+= bs;
1590:       } else {
1591:         sizes[j] = 1;
1592:         i++;
1593:       }
1594:     }
1595:   }
1596:   *bs_max = j;
1597:   return(0);
1598: }
1599: 
1602: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1603: {
1604:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1606:   PetscInt       i,j,k,count,*rows;
1607:   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1608:   PetscScalar    zero = 0.0;
1609:   MatScalar      *aa;

1612:   /* Make a copy of the IS and  sort it */
1613:   /* allocate memory for rows,sizes */
1614:   PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1615:   sizes = rows + is_n;

1617:   /* copy IS values to rows, and sort them */
1618:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1619:   PetscSortInt(is_n,rows);
1620:   if (baij->keepzeroedrows) {
1621:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1622:     bs_max = is_n;
1623:     A->same_nonzero = PETSC_TRUE;
1624:   } else {
1625:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1626:     A->same_nonzero = PETSC_FALSE;
1627:   }

1629:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1630:     row   = rows[j];
1631:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1632:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1633:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1634:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1635:       if (diag != 0.0) {
1636:         if (baij->ilen[row/bs] > 0) {
1637:           baij->ilen[row/bs]       = 1;
1638:           baij->j[baij->i[row/bs]] = row/bs;
1639:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1640:         }
1641:         /* Now insert all the diagonal values for this bs */
1642:         for (k=0; k<bs; k++) {
1643:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
1644:         }
1645:       } else { /* (diag == 0.0) */
1646:         baij->ilen[row/bs] = 0;
1647:       } /* end (diag == 0.0) */
1648:     } else { /* (sizes[i] != bs) */
1649: #if defined (PETSC_USE_DEBUG)
1650:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1651: #endif
1652:       for (k=0; k<count; k++) {
1653:         aa[0] =  zero;
1654:         aa    += bs;
1655:       }
1656:       if (diag != 0.0) {
1657:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
1658:       }
1659:     }
1660:   }

1662:   PetscFree(rows);
1663:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1664:   return(0);
1665: }

1669: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1670: {
1671:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1672:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1673:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1674:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1676:   PetscInt       ridx,cidx,bs2=a->bs2;
1677:   PetscTruth     roworiented=a->roworiented;
1678:   MatScalar      *ap,value,*aa=a->a,*bap;

1681:   for (k=0; k<m; k++) { /* loop over added rows */
1682:     row  = im[k];
1683:     brow = row/bs;
1684:     if (row < 0) continue;
1685: #if defined(PETSC_USE_DEBUG)  
1686:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1687: #endif
1688:     rp   = aj + ai[brow];
1689:     ap   = aa + bs2*ai[brow];
1690:     rmax = imax[brow];
1691:     nrow = ailen[brow];
1692:     low  = 0;
1693:     high = nrow;
1694:     for (l=0; l<n; l++) { /* loop over added columns */
1695:       if (in[l] < 0) continue;
1696: #if defined(PETSC_USE_DEBUG)  
1697:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1698: #endif
1699:       col = in[l]; bcol = col/bs;
1700:       ridx = row % bs; cidx = col % bs;
1701:       if (roworiented) {
1702:         value = v[l + k*n];
1703:       } else {
1704:         value = v[k + l*m];
1705:       }
1706:       if (col <= lastcol) low = 0; else high = nrow;
1707:       lastcol = col;
1708:       while (high-low > 7) {
1709:         t = (low+high)/2;
1710:         if (rp[t] > bcol) high = t;
1711:         else              low  = t;
1712:       }
1713:       for (i=low; i<high; i++) {
1714:         if (rp[i] > bcol) break;
1715:         if (rp[i] == bcol) {
1716:           bap  = ap +  bs2*i + bs*cidx + ridx;
1717:           if (is == ADD_VALUES) *bap += value;
1718:           else                  *bap  = value;
1719:           goto noinsert1;
1720:         }
1721:       }
1722:       if (nonew == 1) goto noinsert1;
1723:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1724:       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1725:       N = nrow++ - 1;
1726:       /* shift up all the later entries in this row */
1727:       for (ii=N; ii>=i; ii--) {
1728:         rp[ii+1] = rp[ii];
1729:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1730:       }
1731:       if (N>=i) {
1732:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1733:       }
1734:       rp[i]                      = bcol;
1735:       ap[bs2*i + bs*cidx + ridx] = value;
1736:       a->nz++;
1737:       noinsert1:;
1738:       low = i;
1739:     }
1740:     ailen[brow] = nrow;
1741:   }
1742:   A->same_nonzero = PETSC_FALSE;
1743:   return(0);
1744: }


1749: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1750: {
1751:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1752:   Mat            outA;
1754:   PetscTruth     row_identity,col_identity;

1757:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1758:   ISIdentity(row,&row_identity);
1759:   ISIdentity(col,&col_identity);
1760:   if (!row_identity || !col_identity) {
1761:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1762:   }

1764:   outA          = inA;
1765:   inA->factor   = FACTOR_LU;

1767:   if (!a->diag) {
1768:     MatMarkDiagonal_SeqBAIJ(inA);
1769:   }

1771:   a->row        = row;
1772:   a->col        = col;
1773:   PetscObjectReference((PetscObject)row);
1774:   PetscObjectReference((PetscObject)col);
1775: 
1776:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1777:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1778:   PetscLogObjectParent(inA,a->icol);
1779: 
1780:   /*
1781:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1782:       for ILU(0) factorization with natural ordering
1783:   */
1784:   if (inA->bs < 8) {
1785:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1786:   } else {
1787:     if (!a->solve_work) {
1788:       PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);
1789:       PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));
1790:     }
1791:   }

1793:   MatLUFactorNumeric(inA,info,&outA);

1795:   return(0);
1796: }
1799: PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1800: {
1801:   static PetscTruth called = PETSC_FALSE;
1802:   MPI_Comm          comm = A->comm;
1803:   PetscErrorCode    ierr;

1806:   if (called) {return(0);} else called = PETSC_TRUE;
1807:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1808:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1809:   return(0);
1810: }

1815: PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1816: {
1817:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1818:   PetscInt    i,nz,nbs;

1821:   nz  = baij->maxnz/baij->bs2;
1822:   nbs = baij->nbs;
1823:   for (i=0; i<nz; i++) {
1824:     baij->j[i] = indices[i];
1825:   }
1826:   baij->nz = nz;
1827:   for (i=0; i<nbs; i++) {
1828:     baij->ilen[i] = baij->imax[i];
1829:   }

1831:   return(0);
1832: }

1837: /*@
1838:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1839:        in the matrix.

1841:   Input Parameters:
1842: +  mat - the SeqBAIJ matrix
1843: -  indices - the column indices

1845:   Level: advanced

1847:   Notes:
1848:     This can be called if you have precomputed the nonzero structure of the 
1849:   matrix and want to provide it to the matrix object to improve the performance
1850:   of the MatSetValues() operation.

1852:     You MUST have set the correct numbers of nonzeros per row in the call to 
1853:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

1855:     MUST be called before any calls to MatSetValues();

1857: @*/
1858: PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1859: {
1860:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

1865:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1866:   if (f) {
1867:     (*f)(mat,indices);
1868:   } else {
1869:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1870:   }
1871:   return(0);
1872: }

1876: PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1877: {
1878:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1880:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1881:   PetscReal      atmp;
1882:   PetscScalar    *x,zero = 0.0;
1883:   MatScalar      *aa;
1884:   PetscInt       ncols,brow,krow,kcol;

1887:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1888:   bs   = A->bs;
1889:   aa   = a->a;
1890:   ai   = a->i;
1891:   aj   = a->j;
1892:   mbs = a->mbs;

1894:   VecSet(v,zero);
1895:   VecGetArray(v,&x);
1896:   VecGetLocalSize(v,&n);
1897:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1898:   for (i=0; i<mbs; i++) {
1899:     ncols = ai[1] - ai[0]; ai++;
1900:     brow  = bs*i;
1901:     for (j=0; j<ncols; j++){
1902:       /* bcol = bs*(*aj); */
1903:       for (kcol=0; kcol<bs; kcol++){
1904:         for (krow=0; krow<bs; krow++){
1905:           atmp = PetscAbsScalar(*aa); aa++;
1906:           row = brow + krow;    /* row index */
1907:           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1908:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1909:         }
1910:       }
1911:       aj++;
1912:     }
1913:   }
1914:   VecRestoreArray(v,&x);
1915:   return(0);
1916: }

1920: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1921: {

1925:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);
1926:   return(0);
1927: }

1931: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1932: {
1933:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1935:   *array = a->a;
1936:   return(0);
1937: }

1941: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1942: {
1944:   return(0);
1945: }

1947:  #include petscblaslapack.h
1950: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1951: {
1952:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1954:   PetscInt       i,bs=Y->bs,j,bs2;
1955:   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;

1958:   if (str == SAME_NONZERO_PATTERN) {
1959:     PetscScalar alpha = a;
1960:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1961:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1962:     if (y->xtoy && y->XtoY != X) {
1963:       PetscFree(y->xtoy);
1964:       MatDestroy(y->XtoY);
1965:     }
1966:     if (!y->xtoy) { /* get xtoy */
1967:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1968:       y->XtoY = X;
1969:     }
1970:     bs2 = bs*bs;
1971:     for (i=0; i<x->nz; i++) {
1972:       j = 0;
1973:       while (j < bs2){
1974:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1975:         j++;
1976:       }
1977:     }
1978:     PetscLogInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));
1979:   } else {
1980:     MatAXPY_Basic(Y,a,X,str);
1981:   }
1982:   return(0);
1983: }

1985: /* -------------------------------------------------------------------*/
1986: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1987:        MatGetRow_SeqBAIJ,
1988:        MatRestoreRow_SeqBAIJ,
1989:        MatMult_SeqBAIJ_N,
1990: /* 4*/ MatMultAdd_SeqBAIJ_N,
1991:        MatMultTranspose_SeqBAIJ,
1992:        MatMultTransposeAdd_SeqBAIJ,
1993:        MatSolve_SeqBAIJ_N,
1994:        0,
1995:        0,
1996: /*10*/ 0,
1997:        MatLUFactor_SeqBAIJ,
1998:        0,
1999:        0,
2000:        MatTranspose_SeqBAIJ,
2001: /*15*/ MatGetInfo_SeqBAIJ,
2002:        MatEqual_SeqBAIJ,
2003:        MatGetDiagonal_SeqBAIJ,
2004:        MatDiagonalScale_SeqBAIJ,
2005:        MatNorm_SeqBAIJ,
2006: /*20*/ 0,
2007:        MatAssemblyEnd_SeqBAIJ,
2008:        0,
2009:        MatSetOption_SeqBAIJ,
2010:        MatZeroEntries_SeqBAIJ,
2011: /*25*/ MatZeroRows_SeqBAIJ,
2012:        MatLUFactorSymbolic_SeqBAIJ,
2013:        MatLUFactorNumeric_SeqBAIJ_N,
2014:        MatCholeskyFactorSymbolic_SeqBAIJ,
2015:        MatCholeskyFactorNumeric_SeqBAIJ_N,
2016: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2017:        MatILUFactorSymbolic_SeqBAIJ,
2018:        MatICCFactorSymbolic_SeqBAIJ,
2019:        MatGetArray_SeqBAIJ,
2020:        MatRestoreArray_SeqBAIJ,
2021: /*35*/ MatDuplicate_SeqBAIJ,
2022:        0,
2023:        0,
2024:        MatILUFactor_SeqBAIJ,
2025:        0,
2026: /*40*/ MatAXPY_SeqBAIJ,
2027:        MatGetSubMatrices_SeqBAIJ,
2028:        MatIncreaseOverlap_SeqBAIJ,
2029:        MatGetValues_SeqBAIJ,
2030:        0,
2031: /*45*/ MatPrintHelp_SeqBAIJ,
2032:        MatScale_SeqBAIJ,
2033:        0,
2034:        0,
2035:        0,
2036: /*50*/ 0,
2037:        MatGetRowIJ_SeqBAIJ,
2038:        MatRestoreRowIJ_SeqBAIJ,
2039:        0,
2040:        0,
2041: /*55*/ 0,
2042:        0,
2043:        0,
2044:        0,
2045:        MatSetValuesBlocked_SeqBAIJ,
2046: /*60*/ MatGetSubMatrix_SeqBAIJ,
2047:        MatDestroy_SeqBAIJ,
2048:        MatView_SeqBAIJ,
2049:        MatGetPetscMaps_Petsc,
2050:        0,
2051: /*65*/ 0,
2052:        0,
2053:        0,
2054:        0,
2055:        0,
2056: /*70*/ MatGetRowMax_SeqBAIJ,
2057:        MatConvert_Basic,
2058:        0,
2059:        0,
2060:        0,
2061: /*75*/ 0,
2062:        0,
2063:        0,
2064:        0,
2065:        0,
2066: /*80*/ 0,
2067:        0,
2068:        0,
2069:        0,
2070:        MatLoad_SeqBAIJ,
2071: /*85*/ 0,
2072:        0,
2073:        0,
2074:        0,
2075:        0,
2076: /*90*/ 0,
2077:        0,
2078:        0,
2079:        0,
2080:        0,
2081: /*95*/ 0,
2082:        0,
2083:        0,
2084:        0};

2089: PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2090: {
2091:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2092:   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;

2096:   if (aij->nonew != 1) {
2097:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2098:   }

2100:   /* allocate space for values if not already there */
2101:   if (!aij->saved_values) {
2102:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2103:   }

2105:   /* copy values over */
2106:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2107:   return(0);
2108: }

2114: PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2115: {
2116:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2118:   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;

2121:   if (aij->nonew != 1) {
2122:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2123:   }
2124:   if (!aij->saved_values) {
2125:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2126:   }

2128:   /* copy values over */
2129:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2130:   return(0);
2131: }


2142: PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2143: {
2144:   Mat_SeqBAIJ    *b;
2146:   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2147:   PetscTruth     flg,skipallocation = PETSC_FALSE;


2151:   if (nz == MAT_SKIP_ALLOCATION) {
2152:     skipallocation = PETSC_TRUE;
2153:     nz             = 0;
2154:   }

2156:   B->preallocated = PETSC_TRUE;
2157:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
2158:   if (nnz && newbs != bs) {
2159:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2160:   }
2161:   bs   = newbs;

2163:   mbs  = B->m/bs;
2164:   nbs  = B->n/bs;
2165:   bs2  = bs*bs;

2167:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2168:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2169:   }

2171:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2172:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2173:   if (nnz) {
2174:     for (i=0; i<mbs; i++) {
2175:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2176:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2177:     }
2178:   }

2180:   b       = (Mat_SeqBAIJ*)B->data;
2181:   PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
2182:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2183:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2184:   if (!flg) {
2185:     switch (bs) {
2186:     case 1:
2187:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2188:       B->ops->mult            = MatMult_SeqBAIJ_1;
2189:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2190:       break;
2191:     case 2:
2192:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2193:       B->ops->mult            = MatMult_SeqBAIJ_2;
2194:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2195:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2196:       break;
2197:     case 3:
2198:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2199:       B->ops->mult            = MatMult_SeqBAIJ_3;
2200:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2201:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2202:       break;
2203:     case 4:
2204:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2205:       B->ops->mult            = MatMult_SeqBAIJ_4;
2206:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2207:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2208:       break;
2209:     case 5:
2210:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2211:       B->ops->mult            = MatMult_SeqBAIJ_5;
2212:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2213:       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2214:       break;
2215:     case 6:
2216:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2217:       B->ops->mult            = MatMult_SeqBAIJ_6;
2218:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2219:       break;
2220:     case 7:
2221:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2222:       B->ops->mult            = MatMult_SeqBAIJ_7;
2223:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2224:       break;
2225:     default:
2226:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2227:       B->ops->mult            = MatMult_SeqBAIJ_N;
2228:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2229:       break;
2230:     }
2231:   }
2232:   B->bs      = bs;
2233:   b->mbs     = mbs;
2234:   b->nbs     = nbs;
2235:   if (!skipallocation) {
2236:     PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2237:     /* b->ilen will count nonzeros in each block row so far. */
2238:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2239:     if (!nnz) {
2240:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2241:       else if (nz <= 0)        nz = 1;
2242:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2243:       nz = nz*mbs;
2244:     } else {
2245:       nz = 0;
2246:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2247:     }

2249:     /* allocate the matrix space */
2250:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);
2251:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2252:     PetscMemzero(b->j,nz*sizeof(PetscInt));
2253:     b->singlemalloc = PETSC_TRUE;

2255:     b->i[0] = 0;
2256:     for (i=1; i<mbs+1; i++) {
2257:       b->i[i] = b->i[i-1] + b->imax[i-1];
2258:     }
2259:   }

2261:   B->bs               = bs;
2262:   b->bs2              = bs2;
2263:   b->mbs              = mbs;
2264:   b->nz               = 0;
2265:   b->maxnz            = nz*bs2;
2266:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2267:   return(0);
2268: }

2271: /*MC
2272:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
2273:    block sparse compressed row format.

2275:    Options Database Keys:
2276: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

2278:   Level: beginner

2280: .seealso: MatCreateSeqBAIJ
2281: M*/

2286: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2287: {
2289:   PetscMPIInt    size;
2290:   Mat_SeqBAIJ    *b;

2293:   MPI_Comm_size(B->comm,&size);
2294:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

2296:   B->m = B->M = PetscMax(B->m,B->M);
2297:   B->n = B->N = PetscMax(B->n,B->N);
2298:   PetscNew(Mat_SeqBAIJ,&b);
2299:   B->data = (void*)b;
2300:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2301:   B->factor           = 0;
2302:   B->mapping          = 0;
2303:   b->row              = 0;
2304:   b->col              = 0;
2305:   b->icol             = 0;
2306:   b->reallocs         = 0;
2307:   b->saved_values     = 0;
2308: #if defined(PETSC_USE_MAT_SINGLE)
2309:   b->setvalueslen     = 0;
2310:   b->setvaluescopy    = PETSC_NULL;
2311: #endif

2313:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
2314:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

2316:   b->sorted           = PETSC_FALSE;
2317:   b->roworiented      = PETSC_TRUE;
2318:   b->nonew            = 0;
2319:   b->diag             = 0;
2320:   b->solve_work       = 0;
2321:   b->mult_work        = 0;
2322:   B->spptr            = 0;
2323:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2324:   b->keepzeroedrows   = PETSC_FALSE;
2325:   b->xtoy              = 0;
2326:   b->XtoY              = 0;
2327:   b->compressedrow.use     = PETSC_FALSE;
2328:   b->compressedrow.nrows   = 0;
2329:   b->compressedrow.i       = PETSC_NULL;
2330:   b->compressedrow.rindex  = PETSC_NULL;
2331:   b->compressedrow.checked = PETSC_FALSE;
2332:   B->same_nonzero          = PETSC_FALSE;

2334:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2335:                                      "MatInvertBlockDiagonal_SeqBAIJ",
2336:                                       MatInvertBlockDiagonal_SeqBAIJ);
2337:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2338:                                      "MatStoreValues_SeqBAIJ",
2339:                                       MatStoreValues_SeqBAIJ);
2340:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2341:                                      "MatRetrieveValues_SeqBAIJ",
2342:                                       MatRetrieveValues_SeqBAIJ);
2343:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2344:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2345:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
2346:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2347:                                      "MatConvert_SeqBAIJ_SeqAIJ",
2348:                                       MatConvert_SeqBAIJ_SeqAIJ);
2349:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2350:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2351:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
2352:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2353:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2354:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
2355:   return(0);
2356: }

2361: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2362: {
2363:   Mat            C;
2364:   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2366:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

2369:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

2371:   *B = 0;
2372:   MatCreate(A->comm,&C);
2373:   MatSetSizes(C,A->m,A->n,A->m,A->n);
2374:   MatSetType(C,A->type_name);
2375:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2376:   c    = (Mat_SeqBAIJ*)C->data;

2378:   C->M   = A->M;
2379:   C->N   = A->N;
2380:   C->bs  = A->bs;
2381:   c->bs2 = a->bs2;
2382:   c->mbs = a->mbs;
2383:   c->nbs = a->nbs;

2385:   PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2386:   for (i=0; i<mbs; i++) {
2387:     c->imax[i] = a->imax[i];
2388:     c->ilen[i] = a->ilen[i];
2389:   }

2391:   /* allocate the matrix space */
2392:   PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2393:   c->singlemalloc = PETSC_TRUE;
2394:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2395:   if (mbs > 0) {
2396:     PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2397:     if (cpvalues == MAT_COPY_VALUES) {
2398:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2399:     } else {
2400:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2401:     }
2402:   }
2403:   c->sorted      = a->sorted;
2404:   c->roworiented = a->roworiented;
2405:   c->nonew       = a->nonew;

2407:   if (a->diag) {
2408:     PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2409:     PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2410:     for (i=0; i<mbs; i++) {
2411:       c->diag[i] = a->diag[i];
2412:     }
2413:   } else c->diag        = 0;
2414:   c->nz                 = a->nz;
2415:   c->maxnz              = a->maxnz;
2416:   c->solve_work         = 0;
2417:   c->mult_work          = 0;
2418:   C->preallocated       = PETSC_TRUE;
2419:   C->assembled          = PETSC_TRUE;

2421:   c->compressedrow.use     = a->compressedrow.use;
2422:   c->compressedrow.nrows   = a->compressedrow.nrows;
2423:   c->compressedrow.checked = a->compressedrow.checked;
2424:   if ( a->compressedrow.checked && a->compressedrow.use){
2425:     i = a->compressedrow.nrows;
2426:     PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2427:     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2428:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2429:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2430:   } else {
2431:     c->compressedrow.use    = PETSC_FALSE;
2432:     c->compressedrow.i      = PETSC_NULL;
2433:     c->compressedrow.rindex = PETSC_NULL;
2434:   }
2435:   C->same_nonzero = A->same_nonzero;
2436:   *B = C;
2437:   PetscFListDuplicate(A->qlist,&C->qlist);
2438:   return(0);
2439: }

2443: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2444: {
2445:   Mat_SeqBAIJ    *a;
2446:   Mat            B;
2448:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2449:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2450:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2451:   PetscInt       *masked,nmask,tmp,bs2,ishift;
2452:   PetscMPIInt    size;
2453:   int            fd;
2454:   PetscScalar    *aa;
2455:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

2458:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
2459:   bs2  = bs*bs;

2461:   MPI_Comm_size(comm,&size);
2462:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2463:   PetscViewerBinaryGetDescriptor(viewer,&fd);
2464:   PetscBinaryRead(fd,header,4,PETSC_INT);
2465:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2466:   M = header[1]; N = header[2]; nz = header[3];

2468:   if (header[3] < 0) {
2469:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2470:   }

2472:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

2474:   /* 
2475:      This code adds extra rows to make sure the number of rows is 
2476:     divisible by the blocksize
2477:   */
2478:   mbs        = M/bs;
2479:   extra_rows = bs - M + bs*(mbs);
2480:   if (extra_rows == bs) extra_rows = 0;
2481:   else                  mbs++;
2482:   if (extra_rows) {
2483:     PetscLogInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));
2484:   }

2486:   /* read in row lengths */
2487:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2488:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2489:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

2491:   /* read in column indices */
2492:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2493:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
2494:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

2496:   /* loop over row lengths determining block row lengths */
2497:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2498:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2499:   PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2500:   PetscMemzero(mask,mbs*sizeof(PetscInt));
2501:   masked   = mask + mbs;
2502:   rowcount = 0; nzcount = 0;
2503:   for (i=0; i<mbs; i++) {
2504:     nmask = 0;
2505:     for (j=0; j<bs; j++) {
2506:       kmax = rowlengths[rowcount];
2507:       for (k=0; k<kmax; k++) {
2508:         tmp = jj[nzcount++]/bs;
2509:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2510:       }
2511:       rowcount++;
2512:     }
2513:     browlengths[i] += nmask;
2514:     /* zero out the mask elements we set */
2515:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2516:   }

2518:   /* create our matrix */
2519:   MatCreate(comm,&B);
2520:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2521:   MatSetType(B,type);
2522:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
2523:   a = (Mat_SeqBAIJ*)B->data;

2525:   /* set matrix "i" values */
2526:   a->i[0] = 0;
2527:   for (i=1; i<= mbs; i++) {
2528:     a->i[i]      = a->i[i-1] + browlengths[i-1];
2529:     a->ilen[i-1] = browlengths[i-1];
2530:   }
2531:   a->nz         = 0;
2532:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

2534:   /* read in nonzero values */
2535:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2536:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2537:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

2539:   /* set "a" and "j" values into matrix */
2540:   nzcount = 0; jcount = 0;
2541:   for (i=0; i<mbs; i++) {
2542:     nzcountb = nzcount;
2543:     nmask    = 0;
2544:     for (j=0; j<bs; j++) {
2545:       kmax = rowlengths[i*bs+j];
2546:       for (k=0; k<kmax; k++) {
2547:         tmp = jj[nzcount++]/bs;
2548:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2549:       }
2550:     }
2551:     /* sort the masked values */
2552:     PetscSortInt(nmask,masked);

2554:     /* set "j" values into matrix */
2555:     maskcount = 1;
2556:     for (j=0; j<nmask; j++) {
2557:       a->j[jcount++]  = masked[j];
2558:       mask[masked[j]] = maskcount++;
2559:     }
2560:     /* set "a" values into matrix */
2561:     ishift = bs2*a->i[i];
2562:     for (j=0; j<bs; j++) {
2563:       kmax = rowlengths[i*bs+j];
2564:       for (k=0; k<kmax; k++) {
2565:         tmp       = jj[nzcountb]/bs ;
2566:         block     = mask[tmp] - 1;
2567:         point     = jj[nzcountb] - bs*tmp;
2568:         idx       = ishift + bs2*block + j + bs*point;
2569:         a->a[idx] = (MatScalar)aa[nzcountb++];
2570:       }
2571:     }
2572:     /* zero out the mask elements we set */
2573:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2574:   }
2575:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

2577:   PetscFree(rowlengths);
2578:   PetscFree(browlengths);
2579:   PetscFree(aa);
2580:   PetscFree(jj);
2581:   PetscFree(mask);

2583:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2584:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2585:   MatView_Private(B);

2587:   *A = B;
2588:   return(0);
2589: }

2593: /*@C
2594:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2595:    compressed row) format.  For good matrix assembly performance the
2596:    user should preallocate the matrix storage by setting the parameter nz
2597:    (or the array nnz).  By setting these parameters accurately, performance
2598:    during matrix assembly can be increased by more than a factor of 50.

2600:    Collective on MPI_Comm

2602:    Input Parameters:
2603: +  comm - MPI communicator, set to PETSC_COMM_SELF
2604: .  bs - size of block
2605: .  m - number of rows
2606: .  n - number of columns
2607: .  nz - number of nonzero blocks  per block row (same for all rows)
2608: -  nnz - array containing the number of nonzero blocks in the various block rows 
2609:          (possibly different for each block row) or PETSC_NULL

2611:    Output Parameter:
2612: .  A - the matrix 

2614:    Options Database Keys:
2615: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2616:                      block calculations (much slower)
2617: .    -mat_block_size - size of the blocks to use

2619:    Level: intermediate

2621:    Notes:
2622:    The number of rows and columns must be divisible by blocksize.

2624:    If the nnz parameter is given then the nz parameter is ignored

2626:    A nonzero block is any block that as 1 or more nonzeros in it

2628:    The block AIJ format is fully compatible with standard Fortran 77
2629:    storage.  That is, the stored row and column indices can begin at
2630:    either one (as in Fortran) or zero.  See the users' manual for details.

2632:    Specify the preallocated storage with either nz or nnz (not both).
2633:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2634:    allocation.  For additional details, see the users manual chapter on
2635:    matrices.

2637: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2638: @*/
2639: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2640: {
2642: 
2644:   MatCreate(comm,A);
2645:   MatSetSizes(*A,m,n,m,n);
2646:   MatSetType(*A,MATSEQBAIJ);
2647:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
2648:   return(0);
2649: }

2653: /*@C
2654:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2655:    per row in the matrix. For good matrix assembly performance the
2656:    user should preallocate the matrix storage by setting the parameter nz
2657:    (or the array nnz).  By setting these parameters accurately, performance
2658:    during matrix assembly can be increased by more than a factor of 50.

2660:    Collective on MPI_Comm

2662:    Input Parameters:
2663: +  A - the matrix
2664: .  bs - size of block
2665: .  nz - number of block nonzeros per block row (same for all rows)
2666: -  nnz - array containing the number of block nonzeros in the various block rows 
2667:          (possibly different for each block row) or PETSC_NULL

2669:    Options Database Keys:
2670: .   -mat_no_unroll - uses code that does not unroll the loops in the 
2671:                      block calculations (much slower)
2672: .    -mat_block_size - size of the blocks to use

2674:    Level: intermediate

2676:    Notes:
2677:    If the nnz parameter is given then the nz parameter is ignored

2679:    The block AIJ format is fully compatible with standard Fortran 77
2680:    storage.  That is, the stored row and column indices can begin at
2681:    either one (as in Fortran) or zero.  See the users' manual for details.

2683:    Specify the preallocated storage with either nz or nnz (not both).
2684:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2685:    allocation.  For additional details, see the users manual chapter on
2686:    matrices.

2688: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2689: @*/
2690: PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2691: {
2692:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

2695:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2696:   if (f) {
2697:     (*f)(B,bs,nz,nnz);
2698:   }
2699:   return(0);
2700: }