Actual source code: baij.c
1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/spops.h
10: #include petscsys.h
12: /*
13: Special version for Fun3d sequential benchmark
14: */
15: #if defined(PETSC_HAVE_FORTRAN_CAPS)
16: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18: #define matsetvaluesblocked4_ matsetvaluesblocked4
19: #endif
21: void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
22: {
23: Mat A = *AA;
24: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
25: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
26: int *ai=a->i,*ailen=a->ilen;
27: int *aj=a->j,stepval;
28: PetscScalar *value = v;
29: MatScalar *ap,*aa = a->a,*bap;
32: stepval = (n-1)*4;
33: for (k=0; k<m; k++) { /* loop over added rows */
34: row = im[k];
35: rp = aj + ai[row];
36: ap = aa + 16*ai[row];
37: nrow = ailen[row];
38: low = 0;
39: for (l=0; l<n; l++) { /* loop over added columns */
40: col = in[l];
41: value = v + k*(stepval+4)*4 + l*4;
42: low = 0; high = nrow;
43: while (high-low > 7) {
44: t = (low+high)/2;
45: if (rp[t] > col) high = t;
46: else low = t;
47: }
48: for (i=low; i<high; i++) {
49: if (rp[i] > col) break;
50: if (rp[i] == col) {
51: bap = ap + 16*i;
52: for (ii=0; ii<4; ii++,value+=stepval) {
53: for (jj=ii; jj<16; jj+=4) {
54: bap[jj] += *value++;
55: }
56: }
57: goto noinsert2;
58: }
59: }
60: N = nrow++ - 1;
61: /* shift up all the later entries in this row */
62: for (ii=N; ii>=i; ii--) {
63: rp[ii+1] = rp[ii];
64: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
65: }
66: if (N >= i) {
67: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
68: }
69: rp[i] = col;
70: bap = ap + 16*i;
71: for (ii=0; ii<4; ii++,value+=stepval) {
72: for (jj=ii; jj<16; jj+=4) {
73: bap[jj] = *value++;
74: }
75: }
76: noinsert2:;
77: low = i;
78: }
79: ailen[row] = nrow;
80: }
81: }
83: #if defined(PETSC_HAVE_FORTRAN_CAPS)
84: #define matsetvalues4_ MATSETVALUES4
85: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
86: #define matsetvalues4_ matsetvalues4
87: #endif
89: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
90: {
91: Mat A = *AA;
92: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
93: int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
94: int *ai=a->i,*ailen=a->ilen;
95: int *aj=a->j,brow,bcol;
96: int ridx,cidx;
97: MatScalar *ap,value,*aa=a->a,*bap;
98:
100: for (k=0; k<m; k++) { /* loop over added rows */
101: row = im[k]; brow = row/4;
102: rp = aj + ai[brow];
103: ap = aa + 16*ai[brow];
104: nrow = ailen[brow];
105: low = 0;
106: for (l=0; l<n; l++) { /* loop over added columns */
107: col = in[l]; bcol = col/4;
108: ridx = row % 4; cidx = col % 4;
109: value = v[l + k*n];
110: low = 0; high = nrow;
111: while (high-low > 7) {
112: t = (low+high)/2;
113: if (rp[t] > bcol) high = t;
114: else low = t;
115: }
116: for (i=low; i<high; i++) {
117: if (rp[i] > bcol) break;
118: if (rp[i] == bcol) {
119: bap = ap + 16*i + 4*cidx + ridx;
120: *bap += value;
121: goto noinsert1;
122: }
123: }
124: N = nrow++ - 1;
125: /* shift up all the later entries in this row */
126: for (ii=N; ii>=i; ii--) {
127: rp[ii+1] = rp[ii];
128: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
129: }
130: if (N>=i) {
131: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
132: }
133: rp[i] = bcol;
134: ap[16*i + 4*cidx + ridx] = value;
135: noinsert1:;
136: low = i;
137: }
138: ailen[brow] = nrow;
139: }
140: }
142: /* UGLY, ugly, ugly
143: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
144: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
145: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
146: converts the entries into single precision and then calls ..._MatScalar() to put them
147: into the single precision data structures.
148: */
149: #if defined(PETSC_USE_MAT_SINGLE)
150: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
151: #else
152: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
153: #endif
154: #if defined(PETSC_HAVE_DSCPACK)
155: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
156: #endif
158: #define CHUNKSIZE 10
160: /*
161: Checks for missing diagonals
162: */
163: int MatMissingDiagonal_SeqBAIJ(Mat A)
164: {
165: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
166: int *diag,*jj = a->j,i,ierr;
169: MatMarkDiagonal_SeqBAIJ(A);
170: diag = a->diag;
171: for (i=0; i<a->mbs; i++) {
172: if (jj[diag[i]] != i) {
173: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
174: }
175: }
176: return(0);
177: }
179: int MatMarkDiagonal_SeqBAIJ(Mat A)
180: {
181: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
182: int i,j,*diag,m = a->mbs,ierr;
185: if (a->diag) return(0);
187: PetscMalloc((m+1)*sizeof(int),&diag);
188: PetscLogObjectMemory(A,(m+1)*sizeof(int));
189: for (i=0; i<m; i++) {
190: diag[i] = a->i[i+1];
191: for (j=a->i[i]; j<a->i[i+1]; j++) {
192: if (a->j[j] == i) {
193: diag[i] = j;
194: break;
195: }
196: }
197: }
198: a->diag = diag;
199: return(0);
200: }
203: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
205: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
206: {
207: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
208: int ierr,n = a->mbs,i;
211: *nn = n;
212: if (!ia) return(0);
213: if (symmetric) {
214: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
215: } else if (oshift == 1) {
216: /* temporarily add 1 to i and j indices */
217: int nz = a->i[n];
218: for (i=0; i<nz; i++) a->j[i]++;
219: for (i=0; i<n+1; i++) a->i[i]++;
220: *ia = a->i; *ja = a->j;
221: } else {
222: *ia = a->i; *ja = a->j;
223: }
225: return(0);
226: }
228: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
229: {
230: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
231: int i,n = a->mbs,ierr;
234: if (!ia) return(0);
235: if (symmetric) {
236: PetscFree(*ia);
237: PetscFree(*ja);
238: } else if (oshift == 1) {
239: int nz = a->i[n]-1;
240: for (i=0; i<nz; i++) a->j[i]--;
241: for (i=0; i<n+1; i++) a->i[i]--;
242: }
243: return(0);
244: }
246: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
247: {
248: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
251: *bs = baij->bs;
252: return(0);
253: }
256: int MatDestroy_SeqBAIJ(Mat A)
257: {
258: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
259: int ierr;
262: #if defined(PETSC_USE_LOG)
263: PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
264: #endif
265: PetscFree(a->a);
266: if (!a->singlemalloc) {
267: PetscFree(a->i);
268: PetscFree(a->j);
269: }
270: if (a->row) {
271: ISDestroy(a->row);
272: }
273: if (a->col) {
274: ISDestroy(a->col);
275: }
276: if (a->diag) {PetscFree(a->diag);}
277: if (a->ilen) {PetscFree(a->ilen);}
278: if (a->imax) {PetscFree(a->imax);}
279: if (a->solve_work) {PetscFree(a->solve_work);}
280: if (a->mult_work) {PetscFree(a->mult_work);}
281: if (a->icol) {ISDestroy(a->icol);}
282: if (a->saved_values) {PetscFree(a->saved_values);}
283: #if defined(PETSC_USE_MAT_SINGLE)
284: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
285: #endif
286: PetscFree(a);
287: return(0);
288: }
290: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
291: {
292: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
295: switch (op) {
296: case MAT_ROW_ORIENTED:
297: a->roworiented = PETSC_TRUE;
298: break;
299: case MAT_COLUMN_ORIENTED:
300: a->roworiented = PETSC_FALSE;
301: break;
302: case MAT_COLUMNS_SORTED:
303: a->sorted = PETSC_TRUE;
304: break;
305: case MAT_COLUMNS_UNSORTED:
306: a->sorted = PETSC_FALSE;
307: break;
308: case MAT_KEEP_ZEROED_ROWS:
309: a->keepzeroedrows = PETSC_TRUE;
310: break;
311: case MAT_NO_NEW_NONZERO_LOCATIONS:
312: a->nonew = 1;
313: break;
314: case MAT_NEW_NONZERO_LOCATION_ERR:
315: a->nonew = -1;
316: break;
317: case MAT_NEW_NONZERO_ALLOCATION_ERR:
318: a->nonew = -2;
319: break;
320: case MAT_YES_NEW_NONZERO_LOCATIONS:
321: a->nonew = 0;
322: break;
323: case MAT_ROWS_SORTED:
324: case MAT_ROWS_UNSORTED:
325: case MAT_YES_NEW_DIAGONALS:
326: case MAT_IGNORE_OFF_PROC_ENTRIES:
327: case MAT_USE_HASH_TABLE:
328: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
329: break;
330: case MAT_NO_NEW_DIAGONALS:
331: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
332: case MAT_USE_SINGLE_PRECISION_SOLVES:
333: if (a->bs==4) {
334: PetscTruth sse_enabled_local,sse_enabled_global;
335: int ierr;
337: sse_enabled_local = PETSC_FALSE;
338: sse_enabled_global = PETSC_FALSE;
340: PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);
341: #if defined(PETSC_HAVE_SSE)
342: if (sse_enabled_local) {
343: a->single_precision_solves = PETSC_TRUE;
344: A->ops->solve = MatSolve_SeqBAIJ_Update;
345: A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
346: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES setn");
347: break;
348: } else {
349: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
350: }
351: #else
352: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignoredn");
353: #endif
354: }
355: break;
356: default:
357: SETERRQ(PETSC_ERR_SUP,"unknown option");
358: }
359: return(0);
360: }
362: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
363: {
364: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
365: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
366: MatScalar *aa,*aa_i;
367: PetscScalar *v_i;
370: bs = a->bs;
371: ai = a->i;
372: aj = a->j;
373: aa = a->a;
374: bs2 = a->bs2;
375:
376: if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
377:
378: bn = row/bs; /* Block number */
379: bp = row % bs; /* Block Position */
380: M = ai[bn+1] - ai[bn];
381: *nz = bs*M;
382:
383: if (v) {
384: *v = 0;
385: if (*nz) {
386: PetscMalloc((*nz)*sizeof(PetscScalar),v);
387: for (i=0; i<M; i++) { /* for each block in the block row */
388: v_i = *v + i*bs;
389: aa_i = aa + bs2*(ai[bn] + i);
390: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
391: }
392: }
393: }
395: if (idx) {
396: *idx = 0;
397: if (*nz) {
398: PetscMalloc((*nz)*sizeof(int),idx);
399: for (i=0; i<M; i++) { /* for each block in the block row */
400: idx_i = *idx + i*bs;
401: itmp = bs*aj[ai[bn] + i];
402: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
403: }
404: }
405: }
406: return(0);
407: }
409: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
410: {
414: if (idx) {if (*idx) {PetscFree(*idx);}}
415: if (v) {if (*v) {PetscFree(*v);}}
416: return(0);
417: }
419: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
420: {
421: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
422: Mat C;
423: int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
424: int *rows,*cols,bs2=a->bs2;
425: PetscScalar *array;
428: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
429: PetscMalloc((1+nbs)*sizeof(int),&col);
430: PetscMemzero(col,(1+nbs)*sizeof(int));
432: #if defined(PETSC_USE_MAT_SINGLE)
433: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
434: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
435: #else
436: array = a->a;
437: #endif
439: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
440: MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
441: PetscFree(col);
442: PetscMalloc(2*bs*sizeof(int),&rows);
443: cols = rows + bs;
444: for (i=0; i<mbs; i++) {
445: cols[0] = i*bs;
446: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
447: len = ai[i+1] - ai[i];
448: for (j=0; j<len; j++) {
449: rows[0] = (*aj++)*bs;
450: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
451: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
452: array += bs2;
453: }
454: }
455: PetscFree(rows);
456: #if defined(PETSC_USE_MAT_SINGLE)
457: PetscFree(array);
458: #endif
459:
460: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
461: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
462:
463: if (B) {
464: *B = C;
465: } else {
466: MatHeaderCopy(A,C);
467: }
468: return(0);
469: }
471: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
472: {
473: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
474: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
475: PetscScalar *aa;
476: FILE *file;
479: ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);
480: ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
481: col_lens[0] = MAT_FILE_COOKIE;
483: col_lens[1] = A->m;
484: col_lens[2] = A->n;
485: col_lens[3] = a->nz*bs2;
487: /* store lengths of each row and write (including header) to file */
488: count = 0;
489: for (i=0; i<a->mbs; i++) {
490: for (j=0; j<bs; j++) {
491: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
492: }
493: }
494: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
495: PetscFree(col_lens);
497: /* store column indices (zero start index) */
498: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
499: count = 0;
500: for (i=0; i<a->mbs; i++) {
501: for (j=0; j<bs; j++) {
502: for (k=a->i[i]; k<a->i[i+1]; k++) {
503: for (l=0; l<bs; l++) {
504: jj[count++] = bs*a->j[k] + l;
505: }
506: }
507: }
508: }
509: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
510: PetscFree(jj);
512: /* store nonzero values */
513: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
514: count = 0;
515: for (i=0; i<a->mbs; i++) {
516: for (j=0; j<bs; j++) {
517: for (k=a->i[i]; k<a->i[i+1]; k++) {
518: for (l=0; l<bs; l++) {
519: aa[count++] = a->a[bs2*k + l*bs + j];
520: }
521: }
522: }
523: }
524: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
525: PetscFree(aa);
527: PetscViewerBinaryGetInfoPointer(viewer,&file);
528: if (file) {
529: fprintf(file,"-matload_block_size %dn",a->bs);
530: }
531: return(0);
532: }
534: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
536: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
537: {
538: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
539: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
540: PetscViewerFormat format;
543: PetscViewerGetFormat(viewer,&format);
544: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
545: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
546: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
547: Mat aij;
548: MatConvert(A,MATSEQAIJ,&aij);
549: MatView(aij,viewer);
550: MatDestroy(aij);
551: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
552: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
553: MatMPIBAIJFactorInfo_DSCPACK(A,viewer);
554: #endif
555: return(0);
556: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
557: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
558: for (i=0; i<a->mbs; i++) {
559: for (j=0; j<bs; j++) {
560: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
561: for (k=a->i[i]; k<a->i[i+1]; k++) {
562: for (l=0; l<bs; l++) {
563: #if defined(PETSC_USE_COMPLEX)
564: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
565: PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
566: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
567: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
568: PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
569: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
570: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
571: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
572: }
573: #else
574: if (a->a[bs2*k + l*bs + j] != 0.0) {
575: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
576: }
577: #endif
578: }
579: }
580: PetscViewerASCIIPrintf(viewer,"n");
581: }
582: }
583: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
584: } else {
585: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
586: for (i=0; i<a->mbs; i++) {
587: for (j=0; j<bs; j++) {
588: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
589: for (k=a->i[i]; k<a->i[i+1]; k++) {
590: for (l=0; l<bs; l++) {
591: #if defined(PETSC_USE_COMPLEX)
592: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
593: PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
594: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
595: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
596: PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
597: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
598: } else {
599: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
600: }
601: #else
602: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
603: #endif
604: }
605: }
606: PetscViewerASCIIPrintf(viewer,"n");
607: }
608: }
609: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
610: }
611: PetscViewerFlush(viewer);
612: return(0);
613: }
615: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
616: {
617: Mat A = (Mat) Aa;
618: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
619: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
620: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
621: MatScalar *aa;
622: PetscViewer viewer;
626: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
627: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
629: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
631: /* loop over matrix elements drawing boxes */
632: color = PETSC_DRAW_BLUE;
633: for (i=0,row=0; i<mbs; i++,row+=bs) {
634: for (j=a->i[i]; j<a->i[i+1]; j++) {
635: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
636: x_l = a->j[j]*bs; x_r = x_l + 1.0;
637: aa = a->a + j*bs2;
638: for (k=0; k<bs; k++) {
639: for (l=0; l<bs; l++) {
640: if (PetscRealPart(*aa++) >= 0.) continue;
641: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
642: }
643: }
644: }
645: }
646: color = PETSC_DRAW_CYAN;
647: for (i=0,row=0; i<mbs; i++,row+=bs) {
648: for (j=a->i[i]; j<a->i[i+1]; j++) {
649: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
650: x_l = a->j[j]*bs; x_r = x_l + 1.0;
651: aa = a->a + j*bs2;
652: for (k=0; k<bs; k++) {
653: for (l=0; l<bs; l++) {
654: if (PetscRealPart(*aa++) != 0.) continue;
655: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
656: }
657: }
658: }
659: }
661: color = PETSC_DRAW_RED;
662: for (i=0,row=0; i<mbs; i++,row+=bs) {
663: for (j=a->i[i]; j<a->i[i+1]; j++) {
664: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
665: x_l = a->j[j]*bs; x_r = x_l + 1.0;
666: aa = a->a + j*bs2;
667: for (k=0; k<bs; k++) {
668: for (l=0; l<bs; l++) {
669: if (PetscRealPart(*aa++) <= 0.) continue;
670: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
671: }
672: }
673: }
674: }
675: return(0);
676: }
678: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
679: {
680: int ierr;
681: PetscReal xl,yl,xr,yr,w,h;
682: PetscDraw draw;
683: PetscTruth isnull;
687: PetscViewerDrawGetDraw(viewer,0,&draw);
688: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
690: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
691: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
692: xr += w; yr += h; xl = -w; yl = -h;
693: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
694: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
695: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
696: return(0);
697: }
699: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
700: {
701: int ierr;
702: PetscTruth issocket,isascii,isbinary,isdraw;
705: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
706: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
707: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
708: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
709: if (issocket) {
710: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
711: } else if (isascii){
712: MatView_SeqBAIJ_ASCII(A,viewer);
713: } else if (isbinary) {
714: MatView_SeqBAIJ_Binary(A,viewer);
715: } else if (isdraw) {
716: MatView_SeqBAIJ_Draw(A,viewer);
717: } else {
718: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
719: }
720: return(0);
721: }
724: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
725: {
726: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
727: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
728: int *ai = a->i,*ailen = a->ilen;
729: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
730: MatScalar *ap,*aa = a->a,zero = 0.0;
733: for (k=0; k<m; k++) { /* loop over rows */
734: row = im[k]; brow = row/bs;
735: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
736: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
737: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
738: nrow = ailen[brow];
739: for (l=0; l<n; l++) { /* loop over columns */
740: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
741: if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
742: col = in[l] ;
743: bcol = col/bs;
744: cidx = col%bs;
745: ridx = row%bs;
746: high = nrow;
747: low = 0; /* assume unsorted */
748: while (high-low > 5) {
749: t = (low+high)/2;
750: if (rp[t] > bcol) high = t;
751: else low = t;
752: }
753: for (i=low; i<high; i++) {
754: if (rp[i] > bcol) break;
755: if (rp[i] == bcol) {
756: *v++ = ap[bs2*i+bs*cidx+ridx];
757: goto finished;
758: }
759: }
760: *v++ = zero;
761: finished:;
762: }
763: }
764: return(0);
765: }
767: #if defined(PETSC_USE_MAT_SINGLE)
768: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
769: {
770: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
771: int ierr,i,N = m*n*b->bs2;
772: MatScalar *vsingle;
775: if (N > b->setvalueslen) {
776: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
777: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
778: b->setvalueslen = N;
779: }
780: vsingle = b->setvaluescopy;
781: for (i=0; i<N; i++) {
782: vsingle[i] = v[i];
783: }
784: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
785: return(0);
786: }
787: #endif
790: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
791: {
792: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
793: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
794: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
795: int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
796: PetscTruth roworiented=a->roworiented;
797: MatScalar *value = v,*ap,*aa = a->a,*bap;
800: if (roworiented) {
801: stepval = (n-1)*bs;
802: } else {
803: stepval = (m-1)*bs;
804: }
805: for (k=0; k<m; k++) { /* loop over added rows */
806: row = im[k];
807: if (row < 0) continue;
808: #if defined(PETSC_USE_BOPT_g)
809: if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
810: #endif
811: rp = aj + ai[row];
812: ap = aa + bs2*ai[row];
813: rmax = imax[row];
814: nrow = ailen[row];
815: low = 0;
816: for (l=0; l<n; l++) { /* loop over added columns */
817: if (in[l] < 0) continue;
818: #if defined(PETSC_USE_BOPT_g)
819: if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
820: #endif
821: col = in[l];
822: if (roworiented) {
823: value = v + k*(stepval+bs)*bs + l*bs;
824: } else {
825: value = v + l*(stepval+bs)*bs + k*bs;
826: }
827: if (!sorted) low = 0; high = nrow;
828: while (high-low > 7) {
829: t = (low+high)/2;
830: if (rp[t] > col) high = t;
831: else low = t;
832: }
833: for (i=low; i<high; i++) {
834: if (rp[i] > col) break;
835: if (rp[i] == col) {
836: bap = ap + bs2*i;
837: if (roworiented) {
838: if (is == ADD_VALUES) {
839: for (ii=0; ii<bs; ii++,value+=stepval) {
840: for (jj=ii; jj<bs2; jj+=bs) {
841: bap[jj] += *value++;
842: }
843: }
844: } else {
845: for (ii=0; ii<bs; ii++,value+=stepval) {
846: for (jj=ii; jj<bs2; jj+=bs) {
847: bap[jj] = *value++;
848: }
849: }
850: }
851: } else {
852: if (is == ADD_VALUES) {
853: for (ii=0; ii<bs; ii++,value+=stepval) {
854: for (jj=0; jj<bs; jj++) {
855: *bap++ += *value++;
856: }
857: }
858: } else {
859: for (ii=0; ii<bs; ii++,value+=stepval) {
860: for (jj=0; jj<bs; jj++) {
861: *bap++ = *value++;
862: }
863: }
864: }
865: }
866: goto noinsert2;
867: }
868: }
869: if (nonew == 1) goto noinsert2;
870: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
871: if (nrow >= rmax) {
872: /* there is no extra room in row, therefore enlarge */
873: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
874: MatScalar *new_a;
876: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
878: /* malloc new storage space */
879: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
880: ierr = PetscMalloc(len,&new_a);
881: new_j = (int*)(new_a + bs2*new_nz);
882: new_i = new_j + new_nz;
884: /* copy over old data into new slots */
885: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
886: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
887: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
888: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
889: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
890: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
891: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
892: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
893: /* free up old matrix storage */
894: PetscFree(a->a);
895: if (!a->singlemalloc) {
896: PetscFree(a->i);
897: PetscFree(a->j);
898: }
899: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
900: a->singlemalloc = PETSC_TRUE;
902: rp = aj + ai[row]; ap = aa + bs2*ai[row];
903: rmax = imax[row] = imax[row] + CHUNKSIZE;
904: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
905: a->maxnz += bs2*CHUNKSIZE;
906: a->reallocs++;
907: a->nz++;
908: }
909: N = nrow++ - 1;
910: /* shift up all the later entries in this row */
911: for (ii=N; ii>=i; ii--) {
912: rp[ii+1] = rp[ii];
913: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
914: }
915: if (N >= i) {
916: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
917: }
918: rp[i] = col;
919: bap = ap + bs2*i;
920: if (roworiented) {
921: for (ii=0; ii<bs; ii++,value+=stepval) {
922: for (jj=ii; jj<bs2; jj+=bs) {
923: bap[jj] = *value++;
924: }
925: }
926: } else {
927: for (ii=0; ii<bs; ii++,value+=stepval) {
928: for (jj=0; jj<bs; jj++) {
929: *bap++ = *value++;
930: }
931: }
932: }
933: noinsert2:;
934: low = i;
935: }
936: ailen[row] = nrow;
937: }
938: return(0);
939: }
941: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
942: {
943: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
944: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
945: int m = A->m,*ip,N,*ailen = a->ilen;
946: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
947: MatScalar *aa = a->a,*ap;
948: #if defined(PETSC_HAVE_DSCPACK)
949: PetscTruth flag;
950: #endif
953: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
955: if (m) rmax = ailen[0];
956: for (i=1; i<mbs; i++) {
957: /* move each row back by the amount of empty slots (fshift) before it*/
958: fshift += imax[i-1] - ailen[i-1];
959: rmax = PetscMax(rmax,ailen[i]);
960: if (fshift) {
961: ip = aj + ai[i]; ap = aa + bs2*ai[i];
962: N = ailen[i];
963: for (j=0; j<N; j++) {
964: ip[j-fshift] = ip[j];
965: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
966: }
967: }
968: ai[i] = ai[i-1] + ailen[i-1];
969: }
970: if (mbs) {
971: fshift += imax[mbs-1] - ailen[mbs-1];
972: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
973: }
974: /* reset ilen and imax for each row */
975: for (i=0; i<mbs; i++) {
976: ailen[i] = imax[i] = ai[i+1] - ai[i];
977: }
978: a->nz = ai[mbs];
980: /* diagonals may have moved, so kill the diagonal pointers */
981: if (fshift && a->diag) {
982: PetscFree(a->diag);
983: PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
984: a->diag = 0;
985: }
986: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
987: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
988: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
989: a->reallocs = 0;
990: A->info.nz_unneeded = (PetscReal)fshift*bs2;
992: #if defined(PETSC_HAVE_DSCPACK)
993: PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);
994: if (flag) { MatUseDSCPACK_MPIBAIJ(A); }
995: #endif
997: return(0);
998: }
1002: /*
1003: This function returns an array of flags which indicate the locations of contiguous
1004: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1005: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1006: Assume: sizes should be long enough to hold all the values.
1007: */
1008: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1009: {
1010: int i,j,k,row;
1011: PetscTruth flg;
1014: for (i=0,j=0; i<n; j++) {
1015: row = idx[i];
1016: if (row%bs!=0) { /* Not the begining of a block */
1017: sizes[j] = 1;
1018: i++;
1019: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1020: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1021: i++;
1022: } else { /* Begining of the block, so check if the complete block exists */
1023: flg = PETSC_TRUE;
1024: for (k=1; k<bs; k++) {
1025: if (row+k != idx[i+k]) { /* break in the block */
1026: flg = PETSC_FALSE;
1027: break;
1028: }
1029: }
1030: if (flg == PETSC_TRUE) { /* No break in the bs */
1031: sizes[j] = bs;
1032: i+= bs;
1033: } else {
1034: sizes[j] = 1;
1035: i++;
1036: }
1037: }
1038: }
1039: *bs_max = j;
1040: return(0);
1041: }
1042:
1043: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
1044: {
1045: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1046: int ierr,i,j,k,count,is_n,*is_idx,*rows;
1047: int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1048: PetscScalar zero = 0.0;
1049: MatScalar *aa;
1052: /* Make a copy of the IS and sort it */
1053: ISGetLocalSize(is,&is_n);
1054: ISGetIndices(is,&is_idx);
1056: /* allocate memory for rows,sizes */
1057: ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1058: sizes = rows + is_n;
1060: /* copy IS values to rows, and sort them */
1061: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1062: PetscSortInt(is_n,rows);
1063: if (baij->keepzeroedrows) {
1064: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1065: bs_max = is_n;
1066: } else {
1067: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1068: }
1069: ISRestoreIndices(is,&is_idx);
1071: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1072: row = rows[j];
1073: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1074: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1075: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1076: if (sizes[i] == bs && !baij->keepzeroedrows) {
1077: if (diag) {
1078: if (baij->ilen[row/bs] > 0) {
1079: baij->ilen[row/bs] = 1;
1080: baij->j[baij->i[row/bs]] = row/bs;
1081: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1082: }
1083: /* Now insert all the diagonal values for this bs */
1084: for (k=0; k<bs; k++) {
1085: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1086: }
1087: } else { /* (!diag) */
1088: baij->ilen[row/bs] = 0;
1089: } /* end (!diag) */
1090: } else { /* (sizes[i] != bs) */
1091: #if defined (PETSC_USE_DEBUG)
1092: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1093: #endif
1094: for (k=0; k<count; k++) {
1095: aa[0] = zero;
1096: aa += bs;
1097: }
1098: if (diag) {
1099: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1100: }
1101: }
1102: }
1104: PetscFree(rows);
1105: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1106: return(0);
1107: }
1109: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1110: {
1111: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1112: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1113: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1114: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1115: int ridx,cidx,bs2=a->bs2,ierr;
1116: PetscTruth roworiented=a->roworiented;
1117: MatScalar *ap,value,*aa=a->a,*bap;
1120: for (k=0; k<m; k++) { /* loop over added rows */
1121: row = im[k]; brow = row/bs;
1122: if (row < 0) continue;
1123: #if defined(PETSC_USE_BOPT_g)
1124: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1125: #endif
1126: rp = aj + ai[brow];
1127: ap = aa + bs2*ai[brow];
1128: rmax = imax[brow];
1129: nrow = ailen[brow];
1130: low = 0;
1131: for (l=0; l<n; l++) { /* loop over added columns */
1132: if (in[l] < 0) continue;
1133: #if defined(PETSC_USE_BOPT_g)
1134: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1135: #endif
1136: col = in[l]; bcol = col/bs;
1137: ridx = row % bs; cidx = col % bs;
1138: if (roworiented) {
1139: value = v[l + k*n];
1140: } else {
1141: value = v[k + l*m];
1142: }
1143: if (!sorted) low = 0; high = nrow;
1144: while (high-low > 7) {
1145: t = (low+high)/2;
1146: if (rp[t] > bcol) high = t;
1147: else low = t;
1148: }
1149: for (i=low; i<high; i++) {
1150: if (rp[i] > bcol) break;
1151: if (rp[i] == bcol) {
1152: bap = ap + bs2*i + bs*cidx + ridx;
1153: if (is == ADD_VALUES) *bap += value;
1154: else *bap = value;
1155: goto noinsert1;
1156: }
1157: }
1158: if (nonew == 1) goto noinsert1;
1159: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1160: if (nrow >= rmax) {
1161: /* there is no extra room in row, therefore enlarge */
1162: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1163: MatScalar *new_a;
1165: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1167: /* Malloc new storage space */
1168: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1169: ierr = PetscMalloc(len,&new_a);
1170: new_j = (int*)(new_a + bs2*new_nz);
1171: new_i = new_j + new_nz;
1173: /* copy over old data into new slots */
1174: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1175: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1176: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1177: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1178: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1179: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1180: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1181: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1182: /* free up old matrix storage */
1183: PetscFree(a->a);
1184: if (!a->singlemalloc) {
1185: PetscFree(a->i);
1186: PetscFree(a->j);
1187: }
1188: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1189: a->singlemalloc = PETSC_TRUE;
1191: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1192: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1193: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1194: a->maxnz += bs2*CHUNKSIZE;
1195: a->reallocs++;
1196: a->nz++;
1197: }
1198: N = nrow++ - 1;
1199: /* shift up all the later entries in this row */
1200: for (ii=N; ii>=i; ii--) {
1201: rp[ii+1] = rp[ii];
1202: ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1203: }
1204: if (N>=i) {
1205: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1206: }
1207: rp[i] = bcol;
1208: ap[bs2*i + bs*cidx + ridx] = value;
1209: noinsert1:;
1210: low = i;
1211: }
1212: ailen[brow] = nrow;
1213: }
1214: return(0);
1215: }
1218: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1219: {
1220: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1221: Mat outA;
1222: int ierr;
1223: PetscTruth row_identity,col_identity;
1226: if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1227: ISIdentity(row,&row_identity);
1228: ISIdentity(col,&col_identity);
1229: if (!row_identity || !col_identity) {
1230: SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1231: }
1233: outA = inA;
1234: inA->factor = FACTOR_LU;
1236: if (!a->diag) {
1237: MatMarkDiagonal_SeqBAIJ(inA);
1238: }
1240: a->row = row;
1241: a->col = col;
1242: ierr = PetscObjectReference((PetscObject)row);
1243: ierr = PetscObjectReference((PetscObject)col);
1244:
1245: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1246: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1247: PetscLogObjectParent(inA,a->icol);
1248:
1249: /*
1250: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1251: for ILU(0) factorization with natural ordering
1252: */
1253: if (a->bs < 8) {
1254: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1255: } else {
1256: if (!a->solve_work) {
1257: PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1258: PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1259: }
1260: }
1262: MatLUFactorNumeric(inA,&outA);
1264: return(0);
1265: }
1266: int MatPrintHelp_SeqBAIJ(Mat A)
1267: {
1268: static PetscTruth called = PETSC_FALSE;
1269: MPI_Comm comm = A->comm;
1270: int ierr;
1273: if (called) {return(0);} else called = PETSC_TRUE;
1274: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1275: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>n");
1276: return(0);
1277: }
1279: EXTERN_C_BEGIN
1280: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1281: {
1282: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1283: int i,nz,nbs;
1286: nz = baij->maxnz/baij->bs2;
1287: nbs = baij->nbs;
1288: for (i=0; i<nz; i++) {
1289: baij->j[i] = indices[i];
1290: }
1291: baij->nz = nz;
1292: for (i=0; i<nbs; i++) {
1293: baij->ilen[i] = baij->imax[i];
1294: }
1296: return(0);
1297: }
1298: EXTERN_C_END
1300: /*@
1301: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1302: in the matrix.
1304: Input Parameters:
1305: + mat - the SeqBAIJ matrix
1306: - indices - the column indices
1308: Level: advanced
1310: Notes:
1311: This can be called if you have precomputed the nonzero structure of the
1312: matrix and want to provide it to the matrix object to improve the performance
1313: of the MatSetValues() operation.
1315: You MUST have set the correct numbers of nonzeros per row in the call to
1316: MatCreateSeqBAIJ().
1318: MUST be called before any calls to MatSetValues();
1320: @*/
1321: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1322: {
1323: int ierr,(*f)(Mat,int *);
1327: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1328: if (f) {
1329: (*f)(mat,indices);
1330: } else {
1331: SETERRQ(1,"Wrong type of matrix to set column indices");
1332: }
1333: return(0);
1334: }
1336: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1337: {
1338: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1339: int ierr,i,j,n,row,bs,*ai,*aj,mbs;
1340: PetscReal atmp;
1341: PetscScalar *x,zero = 0.0;
1342: MatScalar *aa;
1343: int ncols,brow,krow,kcol;
1346: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1347: bs = a->bs;
1348: aa = a->a;
1349: ai = a->i;
1350: aj = a->j;
1351: mbs = a->mbs;
1353: VecSet(&zero,v);
1354: VecGetArray(v,&x);
1355: VecGetLocalSize(v,&n);
1356: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1357: for (i=0; i<mbs; i++) {
1358: ncols = ai[1] - ai[0]; ai++;
1359: brow = bs*i;
1360: for (j=0; j<ncols; j++){
1361: /* bcol = bs*(*aj); */
1362: for (kcol=0; kcol<bs; kcol++){
1363: for (krow=0; krow<bs; krow++){
1364: atmp = PetscAbsScalar(*aa); aa++;
1365: row = brow + krow; /* row index */
1366: /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1367: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1368: }
1369: }
1370: aj++;
1371: }
1372: }
1373: VecRestoreArray(v,&x);
1374: return(0);
1375: }
1377: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1378: {
1379: int ierr;
1382: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1383: return(0);
1384: }
1386: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1387: {
1388: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1390: *array = a->a;
1391: return(0);
1392: }
1394: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1395: {
1397: return(0);
1398: }
1400: /* -------------------------------------------------------------------*/
1401: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1402: MatGetRow_SeqBAIJ,
1403: MatRestoreRow_SeqBAIJ,
1404: MatMult_SeqBAIJ_N,
1405: MatMultAdd_SeqBAIJ_N,
1406: MatMultTranspose_SeqBAIJ,
1407: MatMultTransposeAdd_SeqBAIJ,
1408: MatSolve_SeqBAIJ_N,
1409: 0,
1410: 0,
1411: 0,
1412: MatLUFactor_SeqBAIJ,
1413: 0,
1414: 0,
1415: MatTranspose_SeqBAIJ,
1416: MatGetInfo_SeqBAIJ,
1417: MatEqual_SeqBAIJ,
1418: MatGetDiagonal_SeqBAIJ,
1419: MatDiagonalScale_SeqBAIJ,
1420: MatNorm_SeqBAIJ,
1421: 0,
1422: MatAssemblyEnd_SeqBAIJ,
1423: 0,
1424: MatSetOption_SeqBAIJ,
1425: MatZeroEntries_SeqBAIJ,
1426: MatZeroRows_SeqBAIJ,
1427: MatLUFactorSymbolic_SeqBAIJ,
1428: MatLUFactorNumeric_SeqBAIJ_N,
1429: 0,
1430: 0,
1431: MatSetUpPreallocation_SeqBAIJ,
1432: MatILUFactorSymbolic_SeqBAIJ,
1433: 0,
1434: MatGetArray_SeqBAIJ,
1435: MatRestoreArray_SeqBAIJ,
1436: MatDuplicate_SeqBAIJ,
1437: 0,
1438: 0,
1439: MatILUFactor_SeqBAIJ,
1440: 0,
1441: 0,
1442: MatGetSubMatrices_SeqBAIJ,
1443: MatIncreaseOverlap_SeqBAIJ,
1444: MatGetValues_SeqBAIJ,
1445: 0,
1446: MatPrintHelp_SeqBAIJ,
1447: MatScale_SeqBAIJ,
1448: 0,
1449: 0,
1450: 0,
1451: MatGetBlockSize_SeqBAIJ,
1452: MatGetRowIJ_SeqBAIJ,
1453: MatRestoreRowIJ_SeqBAIJ,
1454: 0,
1455: 0,
1456: 0,
1457: 0,
1458: 0,
1459: 0,
1460: MatSetValuesBlocked_SeqBAIJ,
1461: MatGetSubMatrix_SeqBAIJ,
1462: MatDestroy_SeqBAIJ,
1463: MatView_SeqBAIJ,
1464: MatGetPetscMaps_Petsc,
1465: 0,
1466: 0,
1467: 0,
1468: 0,
1469: 0,
1470: 0,
1471: MatGetRowMax_SeqBAIJ,
1472: MatConvert_Basic};
1474: EXTERN_C_BEGIN
1475: int MatStoreValues_SeqBAIJ(Mat mat)
1476: {
1477: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1478: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1479: int ierr;
1482: if (aij->nonew != 1) {
1483: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1484: }
1486: /* allocate space for values if not already there */
1487: if (!aij->saved_values) {
1488: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1489: }
1491: /* copy values over */
1492: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1493: return(0);
1494: }
1495: EXTERN_C_END
1497: EXTERN_C_BEGIN
1498: int MatRetrieveValues_SeqBAIJ(Mat mat)
1499: {
1500: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1501: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1504: if (aij->nonew != 1) {
1505: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1506: }
1507: if (!aij->saved_values) {
1508: SETERRQ(1,"Must call MatStoreValues(A);first");
1509: }
1511: /* copy values over */
1512: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1513: return(0);
1514: }
1515: EXTERN_C_END
1517: EXTERN_C_BEGIN
1518: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1519: EXTERN_C_END
1521: EXTERN_C_BEGIN
1522: int MatCreate_SeqBAIJ(Mat B)
1523: {
1524: int ierr,size;
1525: Mat_SeqBAIJ *b;
1528: MPI_Comm_size(B->comm,&size);
1529: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1531: B->m = B->M = PetscMax(B->m,B->M);
1532: B->n = B->N = PetscMax(B->n,B->N);
1533: ierr = PetscNew(Mat_SeqBAIJ,&b);
1534: B->data = (void*)b;
1535: ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1536: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1537: B->factor = 0;
1538: B->lupivotthreshold = 1.0;
1539: B->mapping = 0;
1540: b->row = 0;
1541: b->col = 0;
1542: b->icol = 0;
1543: b->reallocs = 0;
1544: b->saved_values = 0;
1545: #if defined(PETSC_USE_MAT_SINGLE)
1546: b->setvalueslen = 0;
1547: b->setvaluescopy = PETSC_NULL;
1548: #endif
1549: b->single_precision_solves = PETSC_FALSE;
1551: PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1552: PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);
1554: b->sorted = PETSC_FALSE;
1555: b->roworiented = PETSC_TRUE;
1556: b->nonew = 0;
1557: b->diag = 0;
1558: b->solve_work = 0;
1559: b->mult_work = 0;
1560: B->spptr = 0;
1561: B->info.nz_unneeded = (PetscReal)b->maxnz;
1562: b->keepzeroedrows = PETSC_FALSE;
1564: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1565: "MatStoreValues_SeqBAIJ",
1566: MatStoreValues_SeqBAIJ);
1567: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1568: "MatRetrieveValues_SeqBAIJ",
1569: MatRetrieveValues_SeqBAIJ);
1570: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1571: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1572: MatSeqBAIJSetColumnIndices_SeqBAIJ);
1573: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1574: "MatConvert_SeqBAIJ_SeqAIJ",
1575: MatConvert_SeqBAIJ_SeqAIJ);
1576: return(0);
1577: }
1578: EXTERN_C_END
1580: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1581: {
1582: Mat C;
1583: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1584: int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1587: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1589: *B = 0;
1590: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1591: MatSetType(C,MATSEQBAIJ);
1592: c = (Mat_SeqBAIJ*)C->data;
1594: c->bs = a->bs;
1595: c->bs2 = a->bs2;
1596: c->mbs = a->mbs;
1597: c->nbs = a->nbs;
1598: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1600: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1601: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1602: for (i=0; i<mbs; i++) {
1603: c->imax[i] = a->imax[i];
1604: c->ilen[i] = a->ilen[i];
1605: }
1607: /* allocate the matrix space */
1608: c->singlemalloc = PETSC_TRUE;
1609: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1610: PetscMalloc(len,&c->a);
1611: c->j = (int*)(c->a + nz*bs2);
1612: c->i = c->j + nz;
1613: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1614: if (mbs > 0) {
1615: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1616: if (cpvalues == MAT_COPY_VALUES) {
1617: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1618: } else {
1619: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1620: }
1621: }
1623: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1624: c->sorted = a->sorted;
1625: c->roworiented = a->roworiented;
1626: c->nonew = a->nonew;
1628: if (a->diag) {
1629: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1630: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1631: for (i=0; i<mbs; i++) {
1632: c->diag[i] = a->diag[i];
1633: }
1634: } else c->diag = 0;
1635: c->nz = a->nz;
1636: c->maxnz = a->maxnz;
1637: c->solve_work = 0;
1638: C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1639: c->mult_work = 0;
1640: C->preallocated = PETSC_TRUE;
1641: C->assembled = PETSC_TRUE;
1642: *B = C;
1643: PetscFListDuplicate(A->qlist,&C->qlist);
1644: return(0);
1645: }
1647: EXTERN_C_BEGIN
1648: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1649: {
1650: Mat_SeqBAIJ *a;
1651: Mat B;
1652: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1653: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1654: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1655: int *masked,nmask,tmp,bs2,ishift;
1656: PetscScalar *aa;
1657: MPI_Comm comm = ((PetscObject)viewer)->comm;
1660: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1661: bs2 = bs*bs;
1663: MPI_Comm_size(comm,&size);
1664: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1665: PetscViewerBinaryGetDescriptor(viewer,&fd);
1666: PetscBinaryRead(fd,header,4,PETSC_INT);
1667: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1668: M = header[1]; N = header[2]; nz = header[3];
1670: if (header[3] < 0) {
1671: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1672: }
1674: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1676: /*
1677: This code adds extra rows to make sure the number of rows is
1678: divisible by the blocksize
1679: */
1680: mbs = M/bs;
1681: extra_rows = bs - M + bs*(mbs);
1682: if (extra_rows == bs) extra_rows = 0;
1683: else mbs++;
1684: if (extra_rows) {
1685: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1686: }
1688: /* read in row lengths */
1689: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1690: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1691: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1693: /* read in column indices */
1694: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1695: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1696: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1698: /* loop over row lengths determining block row lengths */
1699: ierr = PetscMalloc(mbs*sizeof(int),&browlengths);
1700: ierr = PetscMemzero(browlengths,mbs*sizeof(int));
1701: ierr = PetscMalloc(2*mbs*sizeof(int),&mask);
1702: ierr = PetscMemzero(mask,mbs*sizeof(int));
1703: masked = mask + mbs;
1704: rowcount = 0; nzcount = 0;
1705: for (i=0; i<mbs; i++) {
1706: nmask = 0;
1707: for (j=0; j<bs; j++) {
1708: kmax = rowlengths[rowcount];
1709: for (k=0; k<kmax; k++) {
1710: tmp = jj[nzcount++]/bs;
1711: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1712: }
1713: rowcount++;
1714: }
1715: browlengths[i] += nmask;
1716: /* zero out the mask elements we set */
1717: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1718: }
1720: /* create our matrix */
1721: MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1722: B = *A;
1723: a = (Mat_SeqBAIJ*)B->data;
1725: /* set matrix "i" values */
1726: a->i[0] = 0;
1727: for (i=1; i<= mbs; i++) {
1728: a->i[i] = a->i[i-1] + browlengths[i-1];
1729: a->ilen[i-1] = browlengths[i-1];
1730: }
1731: a->nz = 0;
1732: for (i=0; i<mbs; i++) a->nz += browlengths[i];
1734: /* read in nonzero values */
1735: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1736: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1737: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1739: /* set "a" and "j" values into matrix */
1740: nzcount = 0; jcount = 0;
1741: for (i=0; i<mbs; i++) {
1742: nzcountb = nzcount;
1743: nmask = 0;
1744: for (j=0; j<bs; j++) {
1745: kmax = rowlengths[i*bs+j];
1746: for (k=0; k<kmax; k++) {
1747: tmp = jj[nzcount++]/bs;
1748: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1749: }
1750: }
1751: /* sort the masked values */
1752: PetscSortInt(nmask,masked);
1754: /* set "j" values into matrix */
1755: maskcount = 1;
1756: for (j=0; j<nmask; j++) {
1757: a->j[jcount++] = masked[j];
1758: mask[masked[j]] = maskcount++;
1759: }
1760: /* set "a" values into matrix */
1761: ishift = bs2*a->i[i];
1762: for (j=0; j<bs; j++) {
1763: kmax = rowlengths[i*bs+j];
1764: for (k=0; k<kmax; k++) {
1765: tmp = jj[nzcountb]/bs ;
1766: block = mask[tmp] - 1;
1767: point = jj[nzcountb] - bs*tmp;
1768: idx = ishift + bs2*block + j + bs*point;
1769: a->a[idx] = (MatScalar)aa[nzcountb++];
1770: }
1771: }
1772: /* zero out the mask elements we set */
1773: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1774: }
1775: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1777: PetscFree(rowlengths);
1778: PetscFree(browlengths);
1779: PetscFree(aa);
1780: PetscFree(jj);
1781: PetscFree(mask);
1783: B->assembled = PETSC_TRUE;
1784:
1785: MatView_Private(B);
1786: return(0);
1787: }
1788: EXTERN_C_END
1790: /*@C
1791: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1792: compressed row) format. For good matrix assembly performance the
1793: user should preallocate the matrix storage by setting the parameter nz
1794: (or the array nnz). By setting these parameters accurately, performance
1795: during matrix assembly can be increased by more than a factor of 50.
1797: Collective on MPI_Comm
1799: Input Parameters:
1800: + comm - MPI communicator, set to PETSC_COMM_SELF
1801: . bs - size of block
1802: . m - number of rows
1803: . n - number of columns
1804: . nz - number of nonzero blocks per block row (same for all rows)
1805: - nnz - array containing the number of nonzero blocks in the various block rows
1806: (possibly different for each block row) or PETSC_NULL
1808: Output Parameter:
1809: . A - the matrix
1811: Options Database Keys:
1812: . -mat_no_unroll - uses code that does not unroll the loops in the
1813: block calculations (much slower)
1814: . -mat_block_size - size of the blocks to use
1816: Level: intermediate
1818: Notes:
1819: A nonzero block is any block that as 1 or more nonzeros in it
1821: The block AIJ format is fully compatible with standard Fortran 77
1822: storage. That is, the stored row and column indices can begin at
1823: either one (as in Fortran) or zero. See the users' manual for details.
1825: Specify the preallocated storage with either nz or nnz (not both).
1826: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1827: allocation. For additional details, see the users manual chapter on
1828: matrices.
1830: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1831: @*/
1832: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1833: {
1835:
1837: MatCreate(comm,m,n,m,n,A);
1838: MatSetType(*A,MATSEQBAIJ);
1839: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1840: return(0);
1841: }
1843: /*@C
1844: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1845: per row in the matrix. For good matrix assembly performance the
1846: user should preallocate the matrix storage by setting the parameter nz
1847: (or the array nnz). By setting these parameters accurately, performance
1848: during matrix assembly can be increased by more than a factor of 50.
1850: Collective on MPI_Comm
1852: Input Parameters:
1853: + A - the matrix
1854: . bs - size of block
1855: . nz - number of block nonzeros per block row (same for all rows)
1856: - nnz - array containing the number of block nonzeros in the various block rows
1857: (possibly different for each block row) or PETSC_NULL
1859: Options Database Keys:
1860: . -mat_no_unroll - uses code that does not unroll the loops in the
1861: block calculations (much slower)
1862: . -mat_block_size - size of the blocks to use
1864: Level: intermediate
1866: Notes:
1867: The block AIJ format is fully compatible with standard Fortran 77
1868: storage. That is, the stored row and column indices can begin at
1869: either one (as in Fortran) or zero. See the users' manual for details.
1871: Specify the preallocated storage with either nz or nnz (not both).
1872: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1873: allocation. For additional details, see the users manual chapter on
1874: matrices.
1876: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1877: @*/
1878: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1879: {
1880: Mat_SeqBAIJ *b;
1881: int i,len,ierr,mbs,nbs,bs2,newbs = bs;
1882: PetscTruth flg;
1885: PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1886: if (!flg) return(0);
1888: B->preallocated = PETSC_TRUE;
1889: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1890: if (nnz && newbs != bs) {
1891: SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1892: }
1893: bs = newbs;
1895: mbs = B->m/bs;
1896: nbs = B->n/bs;
1897: bs2 = bs*bs;
1899: if (mbs*bs!=B->m || nbs*bs!=B->n) {
1900: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1901: }
1903: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1904: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1905: if (nnz) {
1906: for (i=0; i<mbs; i++) {
1907: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1908: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1909: }
1910: }
1912: b = (Mat_SeqBAIJ*)B->data;
1913: ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1914: B->ops->solve = MatSolve_SeqBAIJ_Update;
1915: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
1916: if (!flg) {
1917: switch (bs) {
1918: case 1:
1919: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1920: B->ops->mult = MatMult_SeqBAIJ_1;
1921: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
1922: break;
1923: case 2:
1924: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1925: B->ops->mult = MatMult_SeqBAIJ_2;
1926: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
1927: break;
1928: case 3:
1929: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1930: B->ops->mult = MatMult_SeqBAIJ_3;
1931: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
1932: break;
1933: case 4:
1934: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1935: B->ops->mult = MatMult_SeqBAIJ_4;
1936: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
1937: break;
1938: case 5:
1939: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1940: B->ops->mult = MatMult_SeqBAIJ_5;
1941: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
1942: break;
1943: case 6:
1944: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1945: B->ops->mult = MatMult_SeqBAIJ_6;
1946: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
1947: break;
1948: case 7:
1949: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1950: B->ops->mult = MatMult_SeqBAIJ_7;
1951: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
1952: break;
1953: default:
1954: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1955: B->ops->mult = MatMult_SeqBAIJ_N;
1956: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1957: break;
1958: }
1959: }
1960: b->bs = bs;
1961: b->mbs = mbs;
1962: b->nbs = nbs;
1963: PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1964: if (!nnz) {
1965: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1966: else if (nz <= 0) nz = 1;
1967: for (i=0; i<mbs; i++) b->imax[i] = nz;
1968: nz = nz*mbs;
1969: } else {
1970: nz = 0;
1971: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1972: }
1974: /* allocate the matrix space */
1975: len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1976: ierr = PetscMalloc(len,&b->a);
1977: ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1978: b->j = (int*)(b->a + nz*bs2);
1979: PetscMemzero(b->j,nz*sizeof(int));
1980: b->i = b->j + nz;
1981: b->singlemalloc = PETSC_TRUE;
1983: b->i[0] = 0;
1984: for (i=1; i<mbs+1; i++) {
1985: b->i[i] = b->i[i-1] + b->imax[i-1];
1986: }
1988: /* b->ilen will count nonzeros in each block row so far. */
1989: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1990: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1991: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1993: b->bs = bs;
1994: b->bs2 = bs2;
1995: b->mbs = mbs;
1996: b->nz = 0;
1997: b->maxnz = nz*bs2;
1998: B->info.nz_unneeded = (PetscReal)b->maxnz;
1999: return(0);
2000: }