Actual source code: baij.c

  1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/vec/vecimpl.h
 9:  #include src/inline/spops.h
 10:  #include petscsys.h

 12: /*
 13:     Special version for Fun3d sequential benchmark
 14: */
 15: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 16: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
 17: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 18: #define matsetvaluesblocked4_ matsetvaluesblocked4
 19: #endif

 21: void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
 22: {
 23:   Mat         A = *AA;
 24:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 25:   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
 26:   int         *ai=a->i,*ailen=a->ilen;
 27:   int         *aj=a->j,stepval;
 28:   PetscScalar *value = v;
 29:   MatScalar   *ap,*aa = a->a,*bap;

 32:   stepval = (n-1)*4;
 33:   for (k=0; k<m; k++) { /* loop over added rows */
 34:     row  = im[k];
 35:     rp   = aj + ai[row];
 36:     ap   = aa + 16*ai[row];
 37:     nrow = ailen[row];
 38:     low  = 0;
 39:     for (l=0; l<n; l++) { /* loop over added columns */
 40:       col = in[l];
 41:       value = v + k*(stepval+4)*4 + l*4;
 42:       low = 0; high = nrow;
 43:       while (high-low > 7) {
 44:         t = (low+high)/2;
 45:         if (rp[t] > col) high = t;
 46:         else             low  = t;
 47:       }
 48:       for (i=low; i<high; i++) {
 49:         if (rp[i] > col) break;
 50:         if (rp[i] == col) {
 51:           bap  = ap +  16*i;
 52:           for (ii=0; ii<4; ii++,value+=stepval) {
 53:             for (jj=ii; jj<16; jj+=4) {
 54:               bap[jj] += *value++;
 55:             }
 56:           }
 57:           goto noinsert2;
 58:         }
 59:       }
 60:       N = nrow++ - 1;
 61:       /* shift up all the later entries in this row */
 62:       for (ii=N; ii>=i; ii--) {
 63:         rp[ii+1] = rp[ii];
 64:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
 65:       }
 66:       if (N >= i) {
 67:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
 68:       }
 69:       rp[i] = col;
 70:       bap   = ap +  16*i;
 71:       for (ii=0; ii<4; ii++,value+=stepval) {
 72:         for (jj=ii; jj<16; jj+=4) {
 73:           bap[jj] = *value++;
 74:         }
 75:       }
 76:       noinsert2:;
 77:       low = i;
 78:     }
 79:     ailen[row] = nrow;
 80:   }
 81: }

 83: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 84: #define matsetvalues4_ MATSETVALUES4
 85: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 86: #define matsetvalues4_ matsetvalues4
 87: #endif

 89: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
 90: {
 91:   Mat         A = *AA;
 92:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 93:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
 94:   int         *ai=a->i,*ailen=a->ilen;
 95:   int         *aj=a->j,brow,bcol;
 96:   int         ridx,cidx;
 97:   MatScalar   *ap,value,*aa=a->a,*bap;
 98: 
100:   for (k=0; k<m; k++) { /* loop over added rows */
101:     row  = im[k]; brow = row/4;
102:     rp   = aj + ai[brow];
103:     ap   = aa + 16*ai[brow];
104:     nrow = ailen[brow];
105:     low  = 0;
106:     for (l=0; l<n; l++) { /* loop over added columns */
107:       col = in[l]; bcol = col/4;
108:       ridx = row % 4; cidx = col % 4;
109:       value = v[l + k*n];
110:       low = 0; high = nrow;
111:       while (high-low > 7) {
112:         t = (low+high)/2;
113:         if (rp[t] > bcol) high = t;
114:         else              low  = t;
115:       }
116:       for (i=low; i<high; i++) {
117:         if (rp[i] > bcol) break;
118:         if (rp[i] == bcol) {
119:           bap  = ap +  16*i + 4*cidx + ridx;
120:           *bap += value;
121:           goto noinsert1;
122:         }
123:       }
124:       N = nrow++ - 1;
125:       /* shift up all the later entries in this row */
126:       for (ii=N; ii>=i; ii--) {
127:         rp[ii+1] = rp[ii];
128:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
129:       }
130:       if (N>=i) {
131:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
132:       }
133:       rp[i]                    = bcol;
134:       ap[16*i + 4*cidx + ridx] = value;
135:       noinsert1:;
136:       low = i;
137:     }
138:     ailen[brow] = nrow;
139:   }
140: }

142: /*  UGLY, ugly, ugly
143:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
144:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
145:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
146:    converts the entries into single precision and then calls ..._MatScalar() to put them
147:    into the single precision data structures.
148: */
149: #if defined(PETSC_USE_MAT_SINGLE)
150: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
151: #else
152: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
153: #endif
154: #if defined(PETSC_HAVE_DSCPACK)
155: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
156: #endif

158: #define CHUNKSIZE  10

160: /*
161:      Checks for missing diagonals
162: */
163: int MatMissingDiagonal_SeqBAIJ(Mat A)
164: {
165:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
166:   int         *diag,*jj = a->j,i,ierr;

169:   MatMarkDiagonal_SeqBAIJ(A);
170:   diag = a->diag;
171:   for (i=0; i<a->mbs; i++) {
172:     if (jj[diag[i]] != i) {
173:       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
174:     }
175:   }
176:   return(0);
177: }

179: int MatMarkDiagonal_SeqBAIJ(Mat A)
180: {
181:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
182:   int         i,j,*diag,m = a->mbs,ierr;

185:   if (a->diag) return(0);

187:   PetscMalloc((m+1)*sizeof(int),&diag);
188:   PetscLogObjectMemory(A,(m+1)*sizeof(int));
189:   for (i=0; i<m; i++) {
190:     diag[i] = a->i[i+1];
191:     for (j=a->i[i]; j<a->i[i+1]; j++) {
192:       if (a->j[j] == i) {
193:         diag[i] = j;
194:         break;
195:       }
196:     }
197:   }
198:   a->diag = diag;
199:   return(0);
200: }


203: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);

205: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
206: {
207:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
208:   int         ierr,n = a->mbs,i;

211:   *nn = n;
212:   if (!ia) return(0);
213:   if (symmetric) {
214:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
215:   } else if (oshift == 1) {
216:     /* temporarily add 1 to i and j indices */
217:     int nz = a->i[n];
218:     for (i=0; i<nz; i++) a->j[i]++;
219:     for (i=0; i<n+1; i++) a->i[i]++;
220:     *ia = a->i; *ja = a->j;
221:   } else {
222:     *ia = a->i; *ja = a->j;
223:   }

225:   return(0);
226: }

228: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
229: {
230:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
231:   int         i,n = a->mbs,ierr;

234:   if (!ia) return(0);
235:   if (symmetric) {
236:     PetscFree(*ia);
237:     PetscFree(*ja);
238:   } else if (oshift == 1) {
239:     int nz = a->i[n]-1;
240:     for (i=0; i<nz; i++) a->j[i]--;
241:     for (i=0; i<n+1; i++) a->i[i]--;
242:   }
243:   return(0);
244: }

246: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
247: {
248:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;

251:   *bs = baij->bs;
252:   return(0);
253: }


256: int MatDestroy_SeqBAIJ(Mat A)
257: {
258:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
259:   int         ierr;

262: #if defined(PETSC_USE_LOG)
263:   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
264: #endif
265:   PetscFree(a->a);
266:   if (!a->singlemalloc) {
267:     PetscFree(a->i);
268:     PetscFree(a->j);
269:   }
270:   if (a->row) {
271:     ISDestroy(a->row);
272:   }
273:   if (a->col) {
274:     ISDestroy(a->col);
275:   }
276:   if (a->diag) {PetscFree(a->diag);}
277:   if (a->ilen) {PetscFree(a->ilen);}
278:   if (a->imax) {PetscFree(a->imax);}
279:   if (a->solve_work) {PetscFree(a->solve_work);}
280:   if (a->mult_work) {PetscFree(a->mult_work);}
281:   if (a->icol) {ISDestroy(a->icol);}
282:   if (a->saved_values) {PetscFree(a->saved_values);}
283: #if defined(PETSC_USE_MAT_SINGLE)
284:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
285: #endif
286:   PetscFree(a);
287:   return(0);
288: }

290: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
291: {
292:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

295:   switch (op) {
296:   case MAT_ROW_ORIENTED:
297:     a->roworiented    = PETSC_TRUE;
298:     break;
299:   case MAT_COLUMN_ORIENTED:
300:     a->roworiented    = PETSC_FALSE;
301:     break;
302:   case MAT_COLUMNS_SORTED:
303:     a->sorted         = PETSC_TRUE;
304:     break;
305:   case MAT_COLUMNS_UNSORTED:
306:     a->sorted         = PETSC_FALSE;
307:     break;
308:   case MAT_KEEP_ZEROED_ROWS:
309:     a->keepzeroedrows = PETSC_TRUE;
310:     break;
311:   case MAT_NO_NEW_NONZERO_LOCATIONS:
312:     a->nonew          = 1;
313:     break;
314:   case MAT_NEW_NONZERO_LOCATION_ERR:
315:     a->nonew          = -1;
316:     break;
317:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
318:     a->nonew          = -2;
319:     break;
320:   case MAT_YES_NEW_NONZERO_LOCATIONS:
321:     a->nonew          = 0;
322:     break;
323:   case MAT_ROWS_SORTED:
324:   case MAT_ROWS_UNSORTED:
325:   case MAT_YES_NEW_DIAGONALS:
326:   case MAT_IGNORE_OFF_PROC_ENTRIES:
327:   case MAT_USE_HASH_TABLE:
328:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
329:     break;
330:   case MAT_NO_NEW_DIAGONALS:
331:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
332:   case MAT_USE_SINGLE_PRECISION_SOLVES:
333:     if (a->bs==4) {
334:       PetscTruth sse_enabled_local,sse_enabled_global;
335:       int        ierr;

337:       sse_enabled_local  = PETSC_FALSE;
338:       sse_enabled_global = PETSC_FALSE;

340:       PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);
341: #if defined(PETSC_HAVE_SSE)
342:       if (sse_enabled_local) {
343:           a->single_precision_solves = PETSC_TRUE;
344:           A->ops->solve              = MatSolve_SeqBAIJ_Update;
345:           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
346:           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES setn");
347:           break;
348:       } else {
349:         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
350:       }
351: #else
352:       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
353: #endif
354:     }
355:     break;
356:   default:
357:     SETERRQ(PETSC_ERR_SUP,"unknown option");
358:   }
359:   return(0);
360: }

362: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
363: {
364:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
365:   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
366:   MatScalar    *aa,*aa_i;
367:   PetscScalar  *v_i;

370:   bs  = a->bs;
371:   ai  = a->i;
372:   aj  = a->j;
373:   aa  = a->a;
374:   bs2 = a->bs2;
375: 
376:   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
377: 
378:   bn  = row/bs;   /* Block number */
379:   bp  = row % bs; /* Block Position */
380:   M   = ai[bn+1] - ai[bn];
381:   *nz = bs*M;
382: 
383:   if (v) {
384:     *v = 0;
385:     if (*nz) {
386:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
387:       for (i=0; i<M; i++) { /* for each block in the block row */
388:         v_i  = *v + i*bs;
389:         aa_i = aa + bs2*(ai[bn] + i);
390:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
391:       }
392:     }
393:   }

395:   if (idx) {
396:     *idx = 0;
397:     if (*nz) {
398:       PetscMalloc((*nz)*sizeof(int),idx);
399:       for (i=0; i<M; i++) { /* for each block in the block row */
400:         idx_i = *idx + i*bs;
401:         itmp  = bs*aj[ai[bn] + i];
402:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
403:       }
404:     }
405:   }
406:   return(0);
407: }

409: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
410: {

414:   if (idx) {if (*idx) {PetscFree(*idx);}}
415:   if (v)   {if (*v)   {PetscFree(*v);}}
416:   return(0);
417: }

419: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
420: {
421:   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
422:   Mat         C;
423:   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
424:   int         *rows,*cols,bs2=a->bs2;
425:   PetscScalar *array;

428:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
429:   PetscMalloc((1+nbs)*sizeof(int),&col);
430:   PetscMemzero(col,(1+nbs)*sizeof(int));

432: #if defined(PETSC_USE_MAT_SINGLE)
433:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
434:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
435: #else
436:   array = a->a;
437: #endif

439:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
440:   MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
441:   PetscFree(col);
442:   PetscMalloc(2*bs*sizeof(int),&rows);
443:   cols = rows + bs;
444:   for (i=0; i<mbs; i++) {
445:     cols[0] = i*bs;
446:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
447:     len = ai[i+1] - ai[i];
448:     for (j=0; j<len; j++) {
449:       rows[0] = (*aj++)*bs;
450:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
451:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
452:       array += bs2;
453:     }
454:   }
455:   PetscFree(rows);
456: #if defined(PETSC_USE_MAT_SINGLE)
457:   PetscFree(array);
458: #endif
459: 
460:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
461:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
462: 
463:   if (B) {
464:     *B = C;
465:   } else {
466:     MatHeaderCopy(A,C);
467:   }
468:   return(0);
469: }

471: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
472: {
473:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
474:   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
475:   PetscScalar *aa;
476:   FILE        *file;

479:   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);
480:   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
481:   col_lens[0] = MAT_FILE_COOKIE;

483:   col_lens[1] = A->m;
484:   col_lens[2] = A->n;
485:   col_lens[3] = a->nz*bs2;

487:   /* store lengths of each row and write (including header) to file */
488:   count = 0;
489:   for (i=0; i<a->mbs; i++) {
490:     for (j=0; j<bs; j++) {
491:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
492:     }
493:   }
494:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
495:   PetscFree(col_lens);

497:   /* store column indices (zero start index) */
498:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
499:   count = 0;
500:   for (i=0; i<a->mbs; i++) {
501:     for (j=0; j<bs; j++) {
502:       for (k=a->i[i]; k<a->i[i+1]; k++) {
503:         for (l=0; l<bs; l++) {
504:           jj[count++] = bs*a->j[k] + l;
505:         }
506:       }
507:     }
508:   }
509:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
510:   PetscFree(jj);

512:   /* store nonzero values */
513:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
514:   count = 0;
515:   for (i=0; i<a->mbs; i++) {
516:     for (j=0; j<bs; j++) {
517:       for (k=a->i[i]; k<a->i[i+1]; k++) {
518:         for (l=0; l<bs; l++) {
519:           aa[count++] = a->a[bs2*k + l*bs + j];
520:         }
521:       }
522:     }
523:   }
524:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
525:   PetscFree(aa);

527:   PetscViewerBinaryGetInfoPointer(viewer,&file);
528:   if (file) {
529:     fprintf(file,"-matload_block_size %dn",a->bs);
530:   }
531:   return(0);
532: }

534: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);

536: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
537: {
538:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
539:   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
540:   PetscViewerFormat format;

543:   PetscViewerGetFormat(viewer,&format);
544:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
545:     PetscViewerASCIIPrintf(viewer,"  block size is %dn",bs);
546:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
547:     Mat aij;
548:     MatConvert(A,MATSEQAIJ,&aij);
549:     MatView(aij,viewer);
550:     MatDestroy(aij);
551:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
552: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
553:      MatMPIBAIJFactorInfo_DSCPACK(A,viewer);
554: #endif
555:      return(0);
556:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
557:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
558:     for (i=0; i<a->mbs; i++) {
559:       for (j=0; j<bs; j++) {
560:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
561:         for (k=a->i[i]; k<a->i[i+1]; k++) {
562:           for (l=0; l<bs; l++) {
563: #if defined(PETSC_USE_COMPLEX)
564:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
565:               PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
566:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
567:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
568:               PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
569:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
570:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
571:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
572:             }
573: #else
574:             if (a->a[bs2*k + l*bs + j] != 0.0) {
575:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
576:             }
577: #endif
578:           }
579:         }
580:         PetscViewerASCIIPrintf(viewer,"n");
581:       }
582:     }
583:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
584:   } else {
585:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
586:     for (i=0; i<a->mbs; i++) {
587:       for (j=0; j<bs; j++) {
588:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
589:         for (k=a->i[i]; k<a->i[i+1]; k++) {
590:           for (l=0; l<bs; l++) {
591: #if defined(PETSC_USE_COMPLEX)
592:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
593:               PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
594:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
595:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
596:               PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
597:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
598:             } else {
599:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
600:             }
601: #else
602:             PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
603: #endif
604:           }
605:         }
606:         PetscViewerASCIIPrintf(viewer,"n");
607:       }
608:     }
609:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
610:   }
611:   PetscViewerFlush(viewer);
612:   return(0);
613: }

615: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
616: {
617:   Mat          A = (Mat) Aa;
618:   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
619:   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
620:   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
621:   MatScalar    *aa;
622:   PetscViewer  viewer;


626:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
627:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

629:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

631:   /* loop over matrix elements drawing boxes */
632:   color = PETSC_DRAW_BLUE;
633:   for (i=0,row=0; i<mbs; i++,row+=bs) {
634:     for (j=a->i[i]; j<a->i[i+1]; j++) {
635:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
636:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
637:       aa = a->a + j*bs2;
638:       for (k=0; k<bs; k++) {
639:         for (l=0; l<bs; l++) {
640:           if (PetscRealPart(*aa++) >=  0.) continue;
641:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
642:         }
643:       }
644:     }
645:   }
646:   color = PETSC_DRAW_CYAN;
647:   for (i=0,row=0; i<mbs; i++,row+=bs) {
648:     for (j=a->i[i]; j<a->i[i+1]; j++) {
649:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
650:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
651:       aa = a->a + j*bs2;
652:       for (k=0; k<bs; k++) {
653:         for (l=0; l<bs; l++) {
654:           if (PetscRealPart(*aa++) != 0.) continue;
655:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
656:         }
657:       }
658:     }
659:   }

661:   color = PETSC_DRAW_RED;
662:   for (i=0,row=0; i<mbs; i++,row+=bs) {
663:     for (j=a->i[i]; j<a->i[i+1]; j++) {
664:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
665:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
666:       aa = a->a + j*bs2;
667:       for (k=0; k<bs; k++) {
668:         for (l=0; l<bs; l++) {
669:           if (PetscRealPart(*aa++) <= 0.) continue;
670:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
671:         }
672:       }
673:     }
674:   }
675:   return(0);
676: }

678: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
679: {
680:   int          ierr;
681:   PetscReal    xl,yl,xr,yr,w,h;
682:   PetscDraw    draw;
683:   PetscTruth   isnull;


687:   PetscViewerDrawGetDraw(viewer,0,&draw);
688:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

690:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
691:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
692:   xr += w;    yr += h;  xl = -w;     yl = -h;
693:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
694:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
695:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
696:   return(0);
697: }

699: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
700: {
701:   int        ierr;
702:   PetscTruth issocket,isascii,isbinary,isdraw;

705:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
706:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
707:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
708:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
709:   if (issocket) {
710:     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
711:   } else if (isascii){
712:     MatView_SeqBAIJ_ASCII(A,viewer);
713:   } else if (isbinary) {
714:     MatView_SeqBAIJ_Binary(A,viewer);
715:   } else if (isdraw) {
716:     MatView_SeqBAIJ_Draw(A,viewer);
717:   } else {
718:     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
719:   }
720:   return(0);
721: }


724: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
725: {
726:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
727:   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
728:   int        *ai = a->i,*ailen = a->ilen;
729:   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
730:   MatScalar  *ap,*aa = a->a,zero = 0.0;

733:   for (k=0; k<m; k++) { /* loop over rows */
734:     row  = im[k]; brow = row/bs;
735:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
736:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
737:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
738:     nrow = ailen[brow];
739:     for (l=0; l<n; l++) { /* loop over columns */
740:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
741:       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
742:       col  = in[l] ;
743:       bcol = col/bs;
744:       cidx = col%bs;
745:       ridx = row%bs;
746:       high = nrow;
747:       low  = 0; /* assume unsorted */
748:       while (high-low > 5) {
749:         t = (low+high)/2;
750:         if (rp[t] > bcol) high = t;
751:         else             low  = t;
752:       }
753:       for (i=low; i<high; i++) {
754:         if (rp[i] > bcol) break;
755:         if (rp[i] == bcol) {
756:           *v++ = ap[bs2*i+bs*cidx+ridx];
757:           goto finished;
758:         }
759:       }
760:       *v++ = zero;
761:       finished:;
762:     }
763:   }
764:   return(0);
765: }

767: #if defined(PETSC_USE_MAT_SINGLE)
768: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
769: {
770:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
771:   int         ierr,i,N = m*n*b->bs2;
772:   MatScalar   *vsingle;

775:   if (N > b->setvalueslen) {
776:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
777:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
778:     b->setvalueslen  = N;
779:   }
780:   vsingle = b->setvaluescopy;
781:   for (i=0; i<N; i++) {
782:     vsingle[i] = v[i];
783:   }
784:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
785:   return(0);
786: }
787: #endif


790: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
791: {
792:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
793:   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
794:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
795:   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
796:   PetscTruth  roworiented=a->roworiented;
797:   MatScalar   *value = v,*ap,*aa = a->a,*bap;

800:   if (roworiented) {
801:     stepval = (n-1)*bs;
802:   } else {
803:     stepval = (m-1)*bs;
804:   }
805:   for (k=0; k<m; k++) { /* loop over added rows */
806:     row  = im[k];
807:     if (row < 0) continue;
808: #if defined(PETSC_USE_BOPT_g)  
809:     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
810: #endif
811:     rp   = aj + ai[row];
812:     ap   = aa + bs2*ai[row];
813:     rmax = imax[row];
814:     nrow = ailen[row];
815:     low  = 0;
816:     for (l=0; l<n; l++) { /* loop over added columns */
817:       if (in[l] < 0) continue;
818: #if defined(PETSC_USE_BOPT_g)  
819:       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
820: #endif
821:       col = in[l];
822:       if (roworiented) {
823:         value = v + k*(stepval+bs)*bs + l*bs;
824:       } else {
825:         value = v + l*(stepval+bs)*bs + k*bs;
826:       }
827:       if (!sorted) low = 0; high = nrow;
828:       while (high-low > 7) {
829:         t = (low+high)/2;
830:         if (rp[t] > col) high = t;
831:         else             low  = t;
832:       }
833:       for (i=low; i<high; i++) {
834:         if (rp[i] > col) break;
835:         if (rp[i] == col) {
836:           bap  = ap +  bs2*i;
837:           if (roworiented) {
838:             if (is == ADD_VALUES) {
839:               for (ii=0; ii<bs; ii++,value+=stepval) {
840:                 for (jj=ii; jj<bs2; jj+=bs) {
841:                   bap[jj] += *value++;
842:                 }
843:               }
844:             } else {
845:               for (ii=0; ii<bs; ii++,value+=stepval) {
846:                 for (jj=ii; jj<bs2; jj+=bs) {
847:                   bap[jj] = *value++;
848:                 }
849:               }
850:             }
851:           } else {
852:             if (is == ADD_VALUES) {
853:               for (ii=0; ii<bs; ii++,value+=stepval) {
854:                 for (jj=0; jj<bs; jj++) {
855:                   *bap++ += *value++;
856:                 }
857:               }
858:             } else {
859:               for (ii=0; ii<bs; ii++,value+=stepval) {
860:                 for (jj=0; jj<bs; jj++) {
861:                   *bap++  = *value++;
862:                 }
863:               }
864:             }
865:           }
866:           goto noinsert2;
867:         }
868:       }
869:       if (nonew == 1) goto noinsert2;
870:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
871:       if (nrow >= rmax) {
872:         /* there is no extra room in row, therefore enlarge */
873:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
874:         MatScalar *new_a;

876:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

878:         /* malloc new storage space */
879:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
880:         ierr    = PetscMalloc(len,&new_a);
881:         new_j   = (int*)(new_a + bs2*new_nz);
882:         new_i   = new_j + new_nz;

884:         /* copy over old data into new slots */
885:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
886:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
887:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
888:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
889:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
890:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
891:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
892:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
893:         /* free up old matrix storage */
894:         PetscFree(a->a);
895:         if (!a->singlemalloc) {
896:           PetscFree(a->i);
897:           PetscFree(a->j);
898:         }
899:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
900:         a->singlemalloc = PETSC_TRUE;

902:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
903:         rmax = imax[row] = imax[row] + CHUNKSIZE;
904:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
905:         a->maxnz += bs2*CHUNKSIZE;
906:         a->reallocs++;
907:         a->nz++;
908:       }
909:       N = nrow++ - 1;
910:       /* shift up all the later entries in this row */
911:       for (ii=N; ii>=i; ii--) {
912:         rp[ii+1] = rp[ii];
913:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
914:       }
915:       if (N >= i) {
916:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
917:       }
918:       rp[i] = col;
919:       bap   = ap +  bs2*i;
920:       if (roworiented) {
921:         for (ii=0; ii<bs; ii++,value+=stepval) {
922:           for (jj=ii; jj<bs2; jj+=bs) {
923:             bap[jj] = *value++;
924:           }
925:         }
926:       } else {
927:         for (ii=0; ii<bs; ii++,value+=stepval) {
928:           for (jj=0; jj<bs; jj++) {
929:             *bap++  = *value++;
930:           }
931:         }
932:       }
933:       noinsert2:;
934:       low = i;
935:     }
936:     ailen[row] = nrow;
937:   }
938:   return(0);
939: }

941: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
942: {
943:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
944:   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
945:   int        m = A->m,*ip,N,*ailen = a->ilen;
946:   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
947:   MatScalar  *aa = a->a,*ap;
948: #if defined(PETSC_HAVE_DSCPACK)
949:   PetscTruth   flag;
950: #endif

953:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

955:   if (m) rmax = ailen[0];
956:   for (i=1; i<mbs; i++) {
957:     /* move each row back by the amount of empty slots (fshift) before it*/
958:     fshift += imax[i-1] - ailen[i-1];
959:     rmax   = PetscMax(rmax,ailen[i]);
960:     if (fshift) {
961:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
962:       N = ailen[i];
963:       for (j=0; j<N; j++) {
964:         ip[j-fshift] = ip[j];
965:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
966:       }
967:     }
968:     ai[i] = ai[i-1] + ailen[i-1];
969:   }
970:   if (mbs) {
971:     fshift += imax[mbs-1] - ailen[mbs-1];
972:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
973:   }
974:   /* reset ilen and imax for each row */
975:   for (i=0; i<mbs; i++) {
976:     ailen[i] = imax[i] = ai[i+1] - ai[i];
977:   }
978:   a->nz = ai[mbs];

980:   /* diagonals may have moved, so kill the diagonal pointers */
981:   if (fshift && a->diag) {
982:     PetscFree(a->diag);
983:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
984:     a->diag = 0;
985:   }
986:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
987:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
988:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
989:   a->reallocs          = 0;
990:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

992: #if defined(PETSC_HAVE_DSCPACK)
993:   PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);
994:   if (flag) { MatUseDSCPACK_MPIBAIJ(A); }
995: #endif

997:   return(0);
998: }



1002: /* 
1003:    This function returns an array of flags which indicate the locations of contiguous
1004:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1005:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1006:    Assume: sizes should be long enough to hold all the values.
1007: */
1008: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1009: {
1010:   int        i,j,k,row;
1011:   PetscTruth flg;

1014:   for (i=0,j=0; i<n; j++) {
1015:     row = idx[i];
1016:     if (row%bs!=0) { /* Not the begining of a block */
1017:       sizes[j] = 1;
1018:       i++;
1019:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1020:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1021:       i++;
1022:     } else { /* Begining of the block, so check if the complete block exists */
1023:       flg = PETSC_TRUE;
1024:       for (k=1; k<bs; k++) {
1025:         if (row+k != idx[i+k]) { /* break in the block */
1026:           flg = PETSC_FALSE;
1027:           break;
1028:         }
1029:       }
1030:       if (flg == PETSC_TRUE) { /* No break in the bs */
1031:         sizes[j] = bs;
1032:         i+= bs;
1033:       } else {
1034:         sizes[j] = 1;
1035:         i++;
1036:       }
1037:     }
1038:   }
1039:   *bs_max = j;
1040:   return(0);
1041: }
1042: 
1043: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
1044: {
1045:   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1046:   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1047:   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1048:   PetscScalar zero = 0.0;
1049:   MatScalar   *aa;

1052:   /* Make a copy of the IS and  sort it */
1053:   ISGetLocalSize(is,&is_n);
1054:   ISGetIndices(is,&is_idx);

1056:   /* allocate memory for rows,sizes */
1057:   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1058:   sizes = rows + is_n;

1060:   /* copy IS values to rows, and sort them */
1061:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1062:   PetscSortInt(is_n,rows);
1063:   if (baij->keepzeroedrows) {
1064:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1065:     bs_max = is_n;
1066:   } else {
1067:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1068:   }
1069:   ISRestoreIndices(is,&is_idx);

1071:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1072:     row   = rows[j];
1073:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1074:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1075:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1076:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1077:       if (diag) {
1078:         if (baij->ilen[row/bs] > 0) {
1079:           baij->ilen[row/bs]       = 1;
1080:           baij->j[baij->i[row/bs]] = row/bs;
1081:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1082:         }
1083:         /* Now insert all the diagonal values for this bs */
1084:         for (k=0; k<bs; k++) {
1085:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1086:         }
1087:       } else { /* (!diag) */
1088:         baij->ilen[row/bs] = 0;
1089:       } /* end (!diag) */
1090:     } else { /* (sizes[i] != bs) */
1091: #if defined (PETSC_USE_DEBUG)
1092:       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1093: #endif
1094:       for (k=0; k<count; k++) {
1095:         aa[0] =  zero;
1096:         aa    += bs;
1097:       }
1098:       if (diag) {
1099:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1100:       }
1101:     }
1102:   }

1104:   PetscFree(rows);
1105:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1106:   return(0);
1107: }

1109: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1110: {
1111:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1112:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1113:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1114:   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1115:   int         ridx,cidx,bs2=a->bs2,ierr;
1116:   PetscTruth  roworiented=a->roworiented;
1117:   MatScalar   *ap,value,*aa=a->a,*bap;

1120:   for (k=0; k<m; k++) { /* loop over added rows */
1121:     row  = im[k]; brow = row/bs;
1122:     if (row < 0) continue;
1123: #if defined(PETSC_USE_BOPT_g)  
1124:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1125: #endif
1126:     rp   = aj + ai[brow];
1127:     ap   = aa + bs2*ai[brow];
1128:     rmax = imax[brow];
1129:     nrow = ailen[brow];
1130:     low  = 0;
1131:     for (l=0; l<n; l++) { /* loop over added columns */
1132:       if (in[l] < 0) continue;
1133: #if defined(PETSC_USE_BOPT_g)  
1134:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1135: #endif
1136:       col = in[l]; bcol = col/bs;
1137:       ridx = row % bs; cidx = col % bs;
1138:       if (roworiented) {
1139:         value = v[l + k*n];
1140:       } else {
1141:         value = v[k + l*m];
1142:       }
1143:       if (!sorted) low = 0; high = nrow;
1144:       while (high-low > 7) {
1145:         t = (low+high)/2;
1146:         if (rp[t] > bcol) high = t;
1147:         else              low  = t;
1148:       }
1149:       for (i=low; i<high; i++) {
1150:         if (rp[i] > bcol) break;
1151:         if (rp[i] == bcol) {
1152:           bap  = ap +  bs2*i + bs*cidx + ridx;
1153:           if (is == ADD_VALUES) *bap += value;
1154:           else                  *bap  = value;
1155:           goto noinsert1;
1156:         }
1157:       }
1158:       if (nonew == 1) goto noinsert1;
1159:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1160:       if (nrow >= rmax) {
1161:         /* there is no extra room in row, therefore enlarge */
1162:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1163:         MatScalar *new_a;

1165:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

1167:         /* Malloc new storage space */
1168:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1169:         ierr    = PetscMalloc(len,&new_a);
1170:         new_j   = (int*)(new_a + bs2*new_nz);
1171:         new_i   = new_j + new_nz;

1173:         /* copy over old data into new slots */
1174:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1175:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1176:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1177:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1178:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1179:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1180:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1181:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1182:         /* free up old matrix storage */
1183:         PetscFree(a->a);
1184:         if (!a->singlemalloc) {
1185:           PetscFree(a->i);
1186:           PetscFree(a->j);
1187:         }
1188:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1189:         a->singlemalloc = PETSC_TRUE;

1191:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1192:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1193:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1194:         a->maxnz += bs2*CHUNKSIZE;
1195:         a->reallocs++;
1196:         a->nz++;
1197:       }
1198:       N = nrow++ - 1;
1199:       /* shift up all the later entries in this row */
1200:       for (ii=N; ii>=i; ii--) {
1201:         rp[ii+1] = rp[ii];
1202:         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1203:       }
1204:       if (N>=i) {
1205:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1206:       }
1207:       rp[i]                      = bcol;
1208:       ap[bs2*i + bs*cidx + ridx] = value;
1209:       noinsert1:;
1210:       low = i;
1211:     }
1212:     ailen[brow] = nrow;
1213:   }
1214:   return(0);
1215: }


1218: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1219: {
1220:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1221:   Mat         outA;
1222:   int         ierr;
1223:   PetscTruth  row_identity,col_identity;

1226:   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1227:   ISIdentity(row,&row_identity);
1228:   ISIdentity(col,&col_identity);
1229:   if (!row_identity || !col_identity) {
1230:     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1231:   }

1233:   outA          = inA;
1234:   inA->factor   = FACTOR_LU;

1236:   if (!a->diag) {
1237:     MatMarkDiagonal_SeqBAIJ(inA);
1238:   }

1240:   a->row        = row;
1241:   a->col        = col;
1242:   ierr          = PetscObjectReference((PetscObject)row);
1243:   ierr          = PetscObjectReference((PetscObject)col);
1244: 
1245:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1246:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1247:   PetscLogObjectParent(inA,a->icol);
1248: 
1249:   /*
1250:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1251:       for ILU(0) factorization with natural ordering
1252:   */
1253:   if (a->bs < 8) {
1254:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1255:   } else {
1256:     if (!a->solve_work) {
1257:       PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1258:       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1259:     }
1260:   }

1262:   MatLUFactorNumeric(inA,&outA);

1264:   return(0);
1265: }
1266: int MatPrintHelp_SeqBAIJ(Mat A)
1267: {
1268:   static PetscTruth called = PETSC_FALSE;
1269:   MPI_Comm          comm = A->comm;
1270:   int               ierr;

1273:   if (called) {return(0);} else called = PETSC_TRUE;
1274:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1275:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>n");
1276:   return(0);
1277: }

1279: EXTERN_C_BEGIN
1280: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1281: {
1282:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1283:   int         i,nz,nbs;

1286:   nz  = baij->maxnz/baij->bs2;
1287:   nbs = baij->nbs;
1288:   for (i=0; i<nz; i++) {
1289:     baij->j[i] = indices[i];
1290:   }
1291:   baij->nz = nz;
1292:   for (i=0; i<nbs; i++) {
1293:     baij->ilen[i] = baij->imax[i];
1294:   }

1296:   return(0);
1297: }
1298: EXTERN_C_END

1300: /*@
1301:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1302:        in the matrix.

1304:   Input Parameters:
1305: +  mat - the SeqBAIJ matrix
1306: -  indices - the column indices

1308:   Level: advanced

1310:   Notes:
1311:     This can be called if you have precomputed the nonzero structure of the 
1312:   matrix and want to provide it to the matrix object to improve the performance
1313:   of the MatSetValues() operation.

1315:     You MUST have set the correct numbers of nonzeros per row in the call to 
1316:   MatCreateSeqBAIJ().

1318:     MUST be called before any calls to MatSetValues();

1320: @*/
1321: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1322: {
1323:   int ierr,(*f)(Mat,int *);

1327:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1328:   if (f) {
1329:     (*f)(mat,indices);
1330:   } else {
1331:     SETERRQ(1,"Wrong type of matrix to set column indices");
1332:   }
1333:   return(0);
1334: }

1336: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1337: {
1338:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1339:   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1340:   PetscReal    atmp;
1341:   PetscScalar  *x,zero = 0.0;
1342:   MatScalar    *aa;
1343:   int          ncols,brow,krow,kcol;

1346:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1347:   bs   = a->bs;
1348:   aa   = a->a;
1349:   ai   = a->i;
1350:   aj   = a->j;
1351:   mbs = a->mbs;

1353:   VecSet(&zero,v);
1354:   VecGetArray(v,&x);
1355:   VecGetLocalSize(v,&n);
1356:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1357:   for (i=0; i<mbs; i++) {
1358:     ncols = ai[1] - ai[0]; ai++;
1359:     brow  = bs*i;
1360:     for (j=0; j<ncols; j++){
1361:       /* bcol = bs*(*aj); */
1362:       for (kcol=0; kcol<bs; kcol++){
1363:         for (krow=0; krow<bs; krow++){
1364:           atmp = PetscAbsScalar(*aa); aa++;
1365:           row = brow + krow;    /* row index */
1366:           /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1367:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1368:         }
1369:       }
1370:       aj++;
1371:     }
1372:   }
1373:   VecRestoreArray(v,&x);
1374:   return(0);
1375: }

1377: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1378: {
1379:   int        ierr;

1382:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1383:   return(0);
1384: }

1386: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1387: {
1388:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1390:   *array = a->a;
1391:   return(0);
1392: }

1394: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1395: {
1397:   return(0);
1398: }

1400: /* -------------------------------------------------------------------*/
1401: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1402:        MatGetRow_SeqBAIJ,
1403:        MatRestoreRow_SeqBAIJ,
1404:        MatMult_SeqBAIJ_N,
1405:        MatMultAdd_SeqBAIJ_N,
1406:        MatMultTranspose_SeqBAIJ,
1407:        MatMultTransposeAdd_SeqBAIJ,
1408:        MatSolve_SeqBAIJ_N,
1409:        0,
1410:        0,
1411:        0,
1412:        MatLUFactor_SeqBAIJ,
1413:        0,
1414:        0,
1415:        MatTranspose_SeqBAIJ,
1416:        MatGetInfo_SeqBAIJ,
1417:        MatEqual_SeqBAIJ,
1418:        MatGetDiagonal_SeqBAIJ,
1419:        MatDiagonalScale_SeqBAIJ,
1420:        MatNorm_SeqBAIJ,
1421:        0,
1422:        MatAssemblyEnd_SeqBAIJ,
1423:        0,
1424:        MatSetOption_SeqBAIJ,
1425:        MatZeroEntries_SeqBAIJ,
1426:        MatZeroRows_SeqBAIJ,
1427:        MatLUFactorSymbolic_SeqBAIJ,
1428:        MatLUFactorNumeric_SeqBAIJ_N,
1429:        0,
1430:        0,
1431:        MatSetUpPreallocation_SeqBAIJ,
1432:        MatILUFactorSymbolic_SeqBAIJ,
1433:        0,
1434:        MatGetArray_SeqBAIJ,
1435:        MatRestoreArray_SeqBAIJ,
1436:        MatDuplicate_SeqBAIJ,
1437:        0,
1438:        0,
1439:        MatILUFactor_SeqBAIJ,
1440:        0,
1441:        0,
1442:        MatGetSubMatrices_SeqBAIJ,
1443:        MatIncreaseOverlap_SeqBAIJ,
1444:        MatGetValues_SeqBAIJ,
1445:        0,
1446:        MatPrintHelp_SeqBAIJ,
1447:        MatScale_SeqBAIJ,
1448:        0,
1449:        0,
1450:        0,
1451:        MatGetBlockSize_SeqBAIJ,
1452:        MatGetRowIJ_SeqBAIJ,
1453:        MatRestoreRowIJ_SeqBAIJ,
1454:        0,
1455:        0,
1456:        0,
1457:        0,
1458:        0,
1459:        0,
1460:        MatSetValuesBlocked_SeqBAIJ,
1461:        MatGetSubMatrix_SeqBAIJ,
1462:        MatDestroy_SeqBAIJ,
1463:        MatView_SeqBAIJ,
1464:        MatGetPetscMaps_Petsc,
1465:        0,
1466:        0,
1467:        0,
1468:        0,
1469:        0,
1470:        0,
1471:        MatGetRowMax_SeqBAIJ,
1472:        MatConvert_Basic};

1474: EXTERN_C_BEGIN
1475: int MatStoreValues_SeqBAIJ(Mat mat)
1476: {
1477:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1478:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1479:   int         ierr;

1482:   if (aij->nonew != 1) {
1483:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1484:   }

1486:   /* allocate space for values if not already there */
1487:   if (!aij->saved_values) {
1488:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1489:   }

1491:   /* copy values over */
1492:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1493:   return(0);
1494: }
1495: EXTERN_C_END

1497: EXTERN_C_BEGIN
1498: int MatRetrieveValues_SeqBAIJ(Mat mat)
1499: {
1500:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1501:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;

1504:   if (aij->nonew != 1) {
1505:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1506:   }
1507:   if (!aij->saved_values) {
1508:     SETERRQ(1,"Must call MatStoreValues(A);first");
1509:   }

1511:   /* copy values over */
1512:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1513:   return(0);
1514: }
1515: EXTERN_C_END

1517: EXTERN_C_BEGIN
1518: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1519: EXTERN_C_END

1521: EXTERN_C_BEGIN
1522: int MatCreate_SeqBAIJ(Mat B)
1523: {
1524:   int         ierr,size;
1525:   Mat_SeqBAIJ *b;

1528:   MPI_Comm_size(B->comm,&size);
1529:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

1531:   B->m = B->M = PetscMax(B->m,B->M);
1532:   B->n = B->N = PetscMax(B->n,B->N);
1533:   ierr    = PetscNew(Mat_SeqBAIJ,&b);
1534:   B->data = (void*)b;
1535:   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1536:   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1537:   B->factor           = 0;
1538:   B->lupivotthreshold = 1.0;
1539:   B->mapping          = 0;
1540:   b->row              = 0;
1541:   b->col              = 0;
1542:   b->icol             = 0;
1543:   b->reallocs         = 0;
1544:   b->saved_values     = 0;
1545: #if defined(PETSC_USE_MAT_SINGLE)
1546:   b->setvalueslen     = 0;
1547:   b->setvaluescopy    = PETSC_NULL;
1548: #endif
1549:   b->single_precision_solves = PETSC_FALSE;

1551:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1552:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

1554:   b->sorted           = PETSC_FALSE;
1555:   b->roworiented      = PETSC_TRUE;
1556:   b->nonew            = 0;
1557:   b->diag             = 0;
1558:   b->solve_work       = 0;
1559:   b->mult_work        = 0;
1560:   B->spptr            = 0;
1561:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1562:   b->keepzeroedrows   = PETSC_FALSE;

1564:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1565:                                      "MatStoreValues_SeqBAIJ",
1566:                                       MatStoreValues_SeqBAIJ);
1567:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1568:                                      "MatRetrieveValues_SeqBAIJ",
1569:                                       MatRetrieveValues_SeqBAIJ);
1570:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1571:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1572:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
1573:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1574:                                      "MatConvert_SeqBAIJ_SeqAIJ",
1575:                                       MatConvert_SeqBAIJ_SeqAIJ);
1576:   return(0);
1577: }
1578: EXTERN_C_END

1580: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1581: {
1582:   Mat         C;
1583:   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1584:   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;

1587:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

1589:   *B = 0;
1590:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1591:   MatSetType(C,MATSEQBAIJ);
1592:   c    = (Mat_SeqBAIJ*)C->data;

1594:   c->bs         = a->bs;
1595:   c->bs2        = a->bs2;
1596:   c->mbs        = a->mbs;
1597:   c->nbs        = a->nbs;
1598:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));

1600:   PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1601:   PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1602:   for (i=0; i<mbs; i++) {
1603:     c->imax[i] = a->imax[i];
1604:     c->ilen[i] = a->ilen[i];
1605:   }

1607:   /* allocate the matrix space */
1608:   c->singlemalloc = PETSC_TRUE;
1609:   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1610:   PetscMalloc(len,&c->a);
1611:   c->j = (int*)(c->a + nz*bs2);
1612:   c->i = c->j + nz;
1613:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1614:   if (mbs > 0) {
1615:     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1616:     if (cpvalues == MAT_COPY_VALUES) {
1617:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1618:     } else {
1619:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1620:     }
1621:   }

1623:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1624:   c->sorted      = a->sorted;
1625:   c->roworiented = a->roworiented;
1626:   c->nonew       = a->nonew;

1628:   if (a->diag) {
1629:     PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1630:     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1631:     for (i=0; i<mbs; i++) {
1632:       c->diag[i] = a->diag[i];
1633:     }
1634:   } else c->diag        = 0;
1635:   c->nz                 = a->nz;
1636:   c->maxnz              = a->maxnz;
1637:   c->solve_work         = 0;
1638:   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1639:   c->mult_work          = 0;
1640:   C->preallocated       = PETSC_TRUE;
1641:   C->assembled          = PETSC_TRUE;
1642:   *B = C;
1643:   PetscFListDuplicate(A->qlist,&C->qlist);
1644:   return(0);
1645: }

1647: EXTERN_C_BEGIN
1648: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1649: {
1650:   Mat_SeqBAIJ  *a;
1651:   Mat          B;
1652:   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1653:   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1654:   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1655:   int          *masked,nmask,tmp,bs2,ishift;
1656:   PetscScalar  *aa;
1657:   MPI_Comm     comm = ((PetscObject)viewer)->comm;

1660:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1661:   bs2  = bs*bs;

1663:   MPI_Comm_size(comm,&size);
1664:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1665:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1666:   PetscBinaryRead(fd,header,4,PETSC_INT);
1667:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1668:   M = header[1]; N = header[2]; nz = header[3];

1670:   if (header[3] < 0) {
1671:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1672:   }

1674:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

1676:   /* 
1677:      This code adds extra rows to make sure the number of rows is 
1678:     divisible by the blocksize
1679:   */
1680:   mbs        = M/bs;
1681:   extra_rows = bs - M + bs*(mbs);
1682:   if (extra_rows == bs) extra_rows = 0;
1683:   else                  mbs++;
1684:   if (extra_rows) {
1685:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1686:   }

1688:   /* read in row lengths */
1689:   PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1690:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1691:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

1693:   /* read in column indices */
1694:   PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1695:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
1696:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

1698:   /* loop over row lengths determining block row lengths */
1699:   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);
1700:   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));
1701:   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);
1702:   ierr     = PetscMemzero(mask,mbs*sizeof(int));
1703:   masked   = mask + mbs;
1704:   rowcount = 0; nzcount = 0;
1705:   for (i=0; i<mbs; i++) {
1706:     nmask = 0;
1707:     for (j=0; j<bs; j++) {
1708:       kmax = rowlengths[rowcount];
1709:       for (k=0; k<kmax; k++) {
1710:         tmp = jj[nzcount++]/bs;
1711:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1712:       }
1713:       rowcount++;
1714:     }
1715:     browlengths[i] += nmask;
1716:     /* zero out the mask elements we set */
1717:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1718:   }

1720:   /* create our matrix */
1721:   MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1722:   B = *A;
1723:   a = (Mat_SeqBAIJ*)B->data;

1725:   /* set matrix "i" values */
1726:   a->i[0] = 0;
1727:   for (i=1; i<= mbs; i++) {
1728:     a->i[i]      = a->i[i-1] + browlengths[i-1];
1729:     a->ilen[i-1] = browlengths[i-1];
1730:   }
1731:   a->nz         = 0;
1732:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

1734:   /* read in nonzero values */
1735:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1736:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1737:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

1739:   /* set "a" and "j" values into matrix */
1740:   nzcount = 0; jcount = 0;
1741:   for (i=0; i<mbs; i++) {
1742:     nzcountb = nzcount;
1743:     nmask    = 0;
1744:     for (j=0; j<bs; j++) {
1745:       kmax = rowlengths[i*bs+j];
1746:       for (k=0; k<kmax; k++) {
1747:         tmp = jj[nzcount++]/bs;
1748:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1749:       }
1750:     }
1751:     /* sort the masked values */
1752:     PetscSortInt(nmask,masked);

1754:     /* set "j" values into matrix */
1755:     maskcount = 1;
1756:     for (j=0; j<nmask; j++) {
1757:       a->j[jcount++]  = masked[j];
1758:       mask[masked[j]] = maskcount++;
1759:     }
1760:     /* set "a" values into matrix */
1761:     ishift = bs2*a->i[i];
1762:     for (j=0; j<bs; j++) {
1763:       kmax = rowlengths[i*bs+j];
1764:       for (k=0; k<kmax; k++) {
1765:         tmp       = jj[nzcountb]/bs ;
1766:         block     = mask[tmp] - 1;
1767:         point     = jj[nzcountb] - bs*tmp;
1768:         idx       = ishift + bs2*block + j + bs*point;
1769:         a->a[idx] = (MatScalar)aa[nzcountb++];
1770:       }
1771:     }
1772:     /* zero out the mask elements we set */
1773:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1774:   }
1775:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

1777:   PetscFree(rowlengths);
1778:   PetscFree(browlengths);
1779:   PetscFree(aa);
1780:   PetscFree(jj);
1781:   PetscFree(mask);

1783:   B->assembled = PETSC_TRUE;
1784: 
1785:   MatView_Private(B);
1786:   return(0);
1787: }
1788: EXTERN_C_END

1790: /*@C
1791:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1792:    compressed row) format.  For good matrix assembly performance the
1793:    user should preallocate the matrix storage by setting the parameter nz
1794:    (or the array nnz).  By setting these parameters accurately, performance
1795:    during matrix assembly can be increased by more than a factor of 50.

1797:    Collective on MPI_Comm

1799:    Input Parameters:
1800: +  comm - MPI communicator, set to PETSC_COMM_SELF
1801: .  bs - size of block
1802: .  m - number of rows
1803: .  n - number of columns
1804: .  nz - number of nonzero blocks  per block row (same for all rows)
1805: -  nnz - array containing the number of nonzero blocks in the various block rows 
1806:          (possibly different for each block row) or PETSC_NULL

1808:    Output Parameter:
1809: .  A - the matrix 

1811:    Options Database Keys:
1812: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1813:                      block calculations (much slower)
1814: .    -mat_block_size - size of the blocks to use

1816:    Level: intermediate

1818:    Notes:
1819:    A nonzero block is any block that as 1 or more nonzeros in it

1821:    The block AIJ format is fully compatible with standard Fortran 77
1822:    storage.  That is, the stored row and column indices can begin at
1823:    either one (as in Fortran) or zero.  See the users' manual for details.

1825:    Specify the preallocated storage with either nz or nnz (not both).
1826:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1827:    allocation.  For additional details, see the users manual chapter on
1828:    matrices.

1830: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1831: @*/
1832: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1833: {
1835: 
1837:   MatCreate(comm,m,n,m,n,A);
1838:   MatSetType(*A,MATSEQBAIJ);
1839:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1840:   return(0);
1841: }

1843: /*@C
1844:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1845:    per row in the matrix. For good matrix assembly performance the
1846:    user should preallocate the matrix storage by setting the parameter nz
1847:    (or the array nnz).  By setting these parameters accurately, performance
1848:    during matrix assembly can be increased by more than a factor of 50.

1850:    Collective on MPI_Comm

1852:    Input Parameters:
1853: +  A - the matrix
1854: .  bs - size of block
1855: .  nz - number of block nonzeros per block row (same for all rows)
1856: -  nnz - array containing the number of block nonzeros in the various block rows 
1857:          (possibly different for each block row) or PETSC_NULL

1859:    Options Database Keys:
1860: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1861:                      block calculations (much slower)
1862: .    -mat_block_size - size of the blocks to use

1864:    Level: intermediate

1866:    Notes:
1867:    The block AIJ format is fully compatible with standard Fortran 77
1868:    storage.  That is, the stored row and column indices can begin at
1869:    either one (as in Fortran) or zero.  See the users' manual for details.

1871:    Specify the preallocated storage with either nz or nnz (not both).
1872:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1873:    allocation.  For additional details, see the users manual chapter on
1874:    matrices.

1876: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1877: @*/
1878: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1879: {
1880:   Mat_SeqBAIJ *b;
1881:   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1882:   PetscTruth  flg;

1885:   PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1886:   if (!flg) return(0);

1888:   B->preallocated = PETSC_TRUE;
1889:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1890:   if (nnz && newbs != bs) {
1891:     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1892:   }
1893:   bs   = newbs;

1895:   mbs  = B->m/bs;
1896:   nbs  = B->n/bs;
1897:   bs2  = bs*bs;

1899:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1900:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1901:   }

1903:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1904:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1905:   if (nnz) {
1906:     for (i=0; i<mbs; i++) {
1907:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1908:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1909:     }
1910:   }

1912:   b       = (Mat_SeqBAIJ*)B->data;
1913:   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1914:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1915:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1916:   if (!flg) {
1917:     switch (bs) {
1918:     case 1:
1919:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1920:       B->ops->mult            = MatMult_SeqBAIJ_1;
1921:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1922:       break;
1923:     case 2:
1924:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1925:       B->ops->mult            = MatMult_SeqBAIJ_2;
1926:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1927:       break;
1928:     case 3:
1929:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1930:       B->ops->mult            = MatMult_SeqBAIJ_3;
1931:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1932:       break;
1933:     case 4:
1934:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1935:       B->ops->mult            = MatMult_SeqBAIJ_4;
1936:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1937:       break;
1938:     case 5:
1939:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1940:       B->ops->mult            = MatMult_SeqBAIJ_5;
1941:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1942:       break;
1943:     case 6:
1944:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1945:       B->ops->mult            = MatMult_SeqBAIJ_6;
1946:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1947:       break;
1948:     case 7:
1949:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1950:       B->ops->mult            = MatMult_SeqBAIJ_7;
1951:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1952:       break;
1953:     default:
1954:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1955:       B->ops->mult            = MatMult_SeqBAIJ_N;
1956:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1957:       break;
1958:     }
1959:   }
1960:   b->bs      = bs;
1961:   b->mbs     = mbs;
1962:   b->nbs     = nbs;
1963:   PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1964:   if (!nnz) {
1965:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1966:     else if (nz <= 0)        nz = 1;
1967:     for (i=0; i<mbs; i++) b->imax[i] = nz;
1968:     nz = nz*mbs;
1969:   } else {
1970:     nz = 0;
1971:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1972:   }

1974:   /* allocate the matrix space */
1975:   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1976:   ierr  = PetscMalloc(len,&b->a);
1977:   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1978:   b->j  = (int*)(b->a + nz*bs2);
1979:   PetscMemzero(b->j,nz*sizeof(int));
1980:   b->i  = b->j + nz;
1981:   b->singlemalloc = PETSC_TRUE;

1983:   b->i[0] = 0;
1984:   for (i=1; i<mbs+1; i++) {
1985:     b->i[i] = b->i[i-1] + b->imax[i-1];
1986:   }

1988:   /* b->ilen will count nonzeros in each block row so far. */
1989:   PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1990:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1991:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

1993:   b->bs               = bs;
1994:   b->bs2              = bs2;
1995:   b->mbs              = mbs;
1996:   b->nz               = 0;
1997:   b->maxnz            = nz*bs2;
1998:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1999:   return(0);
2000: }