Actual source code: sbaij.c
1: /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/spops.h
10: #include src/mat/impls/sbaij/seq/sbaij.h
11: #if defined(PETSC_HAVE_SPOOLES)
12: EXTERN int MatUseSpooles_SeqSBAIJ(Mat);
13: #endif
15: #define CHUNKSIZE 10
17: /*
18: Checks for missing diagonals
19: */
20: int MatMissingDiagonal_SeqSBAIJ(Mat A)
21: {
22: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
23: int *diag,*jj = a->j,i,ierr;
26: MatMarkDiagonal_SeqSBAIJ(A);
27: diag = a->diag;
28: for (i=0; i<a->mbs; i++) {
29: if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i);
30: }
31: return(0);
32: }
34: int MatMarkDiagonal_SeqSBAIJ(Mat A)
35: {
36: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
37: int i,mbs = a->mbs,ierr;
40: if (a->diag) return(0);
42: PetscMalloc((mbs+1)*sizeof(int),&a->diag);
43: PetscLogObjectMemory(A,(mbs+1)*sizeof(int));
44: for (i=0; i<mbs; i++) a->diag[i] = a->i[i];
45: return(0);
46: }
48: extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
50: static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
51: {
52: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
55: if (ia) {
56: SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
57: }
58: *nn = a->mbs;
59: return(0);
60: }
62: static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
63: {
65: if (!ia) return(0);
66: SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering");
67: /* return(0); */
68: }
70: int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs)
71: {
72: Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data;
75: *bs = sbaij->bs;
76: return(0);
77: }
79: int MatDestroy_SeqSBAIJ(Mat A)
80: {
81: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
82: int ierr;
85: #if defined(PETSC_USE_LOG)
86: PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz);
87: #endif
88: PetscFree(a->a);
89: if (!a->singlemalloc) {
90: PetscFree(a->i);
91: PetscFree(a->j);
92: }
93: if (a->row) {
94: ISDestroy(a->row);
95: }
96: if (a->diag) {PetscFree(a->diag);}
97: if (a->ilen) {PetscFree(a->ilen);}
98: if (a->imax) {PetscFree(a->imax);}
99: if (a->solve_work) {PetscFree(a->solve_work);}
100: if (a->mult_work) {PetscFree(a->mult_work);}
101: if (a->icol) {ISDestroy(a->icol);}
102: if (a->saved_values) {PetscFree(a->saved_values);}
104: if (a->inew){
105: PetscFree(a->inew);
106: a->inew = 0;
107: }
108: PetscFree(a);
109: return(0);
110: }
112: int MatSetOption_SeqSBAIJ(Mat A,MatOption op)
113: {
114: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
117: switch (op) {
118: case MAT_ROW_ORIENTED:
119: a->roworiented = PETSC_TRUE;
120: break;
121: case MAT_COLUMN_ORIENTED:
122: a->roworiented = PETSC_FALSE;
123: break;
124: case MAT_COLUMNS_SORTED:
125: a->sorted = PETSC_TRUE;
126: break;
127: case MAT_COLUMNS_UNSORTED:
128: a->sorted = PETSC_FALSE;
129: break;
130: case MAT_KEEP_ZEROED_ROWS:
131: a->keepzeroedrows = PETSC_TRUE;
132: break;
133: case MAT_NO_NEW_NONZERO_LOCATIONS:
134: a->nonew = 1;
135: break;
136: case MAT_NEW_NONZERO_LOCATION_ERR:
137: a->nonew = -1;
138: break;
139: case MAT_NEW_NONZERO_ALLOCATION_ERR:
140: a->nonew = -2;
141: break;
142: case MAT_YES_NEW_NONZERO_LOCATIONS:
143: a->nonew = 0;
144: break;
145: case MAT_ROWS_SORTED:
146: case MAT_ROWS_UNSORTED:
147: case MAT_YES_NEW_DIAGONALS:
148: case MAT_IGNORE_OFF_PROC_ENTRIES:
149: case MAT_USE_HASH_TABLE:
150: case MAT_USE_SINGLE_PRECISION_SOLVES:
151: PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignoredn");
152: break;
153: case MAT_NO_NEW_DIAGONALS:
154: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
155: default:
156: SETERRQ(PETSC_ERR_SUP,"unknown option");
157: }
158: return(0);
159: }
161: int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v)
162: {
163: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
164: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr;
165: MatScalar *aa,*aa_i;
166: PetscScalar *v_i;
169: bs = a->bs;
170: ai = a->i;
171: aj = a->j;
172: aa = a->a;
173: bs2 = a->bs2;
174:
175: if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
176:
177: bn = row/bs; /* Block number */
178: bp = row % bs; /* Block position */
179: M = ai[bn+1] - ai[bn];
180: *ncols = bs*M;
181:
182: if (v) {
183: *v = 0;
184: if (*ncols) {
185: PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);
186: for (i=0; i<M; i++) { /* for each block in the block row */
187: v_i = *v + i*bs;
188: aa_i = aa + bs2*(ai[bn] + i);
189: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
190: }
191: }
192: }
194: if (cols) {
195: *cols = 0;
196: if (*ncols) {
197: PetscMalloc((*ncols+row)*sizeof(int),cols);
198: for (i=0; i<M; i++) { /* for each block in the block row */
199: cols_i = *cols + i*bs;
200: itmp = bs*aj[ai[bn] + i];
201: for (j=0; j<bs; j++) {cols_i[j] = itmp++;}
202: }
203: }
204: }
206: /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */
207: /* this segment is currently removed, so only entries in the upper triangle are obtained */
208: #ifdef column_search
209: v_i = *v + M*bs;
210: cols_i = *cols + M*bs;
211: for (i=0; i<bn; i++){ /* for each block row */
212: M = ai[i+1] - ai[i];
213: for (j=0; j<M; j++){
214: itmp = aj[ai[i] + j]; /* block column value */
215: if (itmp == bn){
216: aa_i = aa + bs2*(ai[i] + j) + bs*bp;
217: for (k=0; k<bs; k++) {
218: *cols_i++ = i*bs+k;
219: *v_i++ = aa_i[k];
220: }
221: *ncols += bs;
222: break;
223: }
224: }
225: }
226: #endif
228: return(0);
229: }
231: int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
232: {
236: if (idx) {if (*idx) {PetscFree(*idx);}}
237: if (v) {if (*v) {PetscFree(*v);}}
238: return(0);
239: }
241: int MatTranspose_SeqSBAIJ(Mat A,Mat *B)
242: {
244: SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
245: /* return(0); */
246: }
248: static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer)
249: {
250: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
251: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
252: PetscScalar *aa;
253: FILE *file;
256: ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);
257: ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
258: col_lens[0] = MAT_FILE_COOKIE;
260: col_lens[1] = A->m;
261: col_lens[2] = A->m;
262: col_lens[3] = a->s_nz*bs2;
264: /* store lengths of each row and write (including header) to file */
265: count = 0;
266: for (i=0; i<a->mbs; i++) {
267: for (j=0; j<bs; j++) {
268: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
269: }
270: }
271: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
272: PetscFree(col_lens);
274: /* store column indices (zero start index) */
275: ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);
276: count = 0;
277: for (i=0; i<a->mbs; i++) {
278: for (j=0; j<bs; j++) {
279: for (k=a->i[i]; k<a->i[i+1]; k++) {
280: for (l=0; l<bs; l++) {
281: jj[count++] = bs*a->j[k] + l;
282: }
283: }
284: }
285: }
286: PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);
287: PetscFree(jj);
289: /* store nonzero values */
290: ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);
291: count = 0;
292: for (i=0; i<a->mbs; i++) {
293: for (j=0; j<bs; j++) {
294: for (k=a->i[i]; k<a->i[i+1]; k++) {
295: for (l=0; l<bs; l++) {
296: aa[count++] = a->a[bs2*k + l*bs + j];
297: }
298: }
299: }
300: }
301: PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);
302: PetscFree(aa);
304: PetscViewerBinaryGetInfoPointer(viewer,&file);
305: if (file) {
306: fprintf(file,"-matload_block_size %dn",a->bs);
307: }
308: return(0);
309: }
311: static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer)
312: {
313: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
314: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
315: char *name;
316: PetscViewerFormat format;
319: PetscObjectGetName((PetscObject)A,&name);
320: PetscViewerGetFormat(viewer,&format);
321: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
322: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
323: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
324: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
325: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
326: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
327: for (i=0; i<a->mbs; i++) {
328: for (j=0; j<bs; j++) {
329: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
330: for (k=a->i[i]; k<a->i[i+1]; k++) {
331: for (l=0; l<bs; l++) {
332: #if defined(PETSC_USE_COMPLEX)
333: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
334: PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
335: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
336: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
337: PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
338: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
339: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
340: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
341: }
342: #else
343: if (a->a[bs2*k + l*bs + j] != 0.0) {
344: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
345: }
346: #endif
347: }
348: }
349: PetscViewerASCIIPrintf(viewer,"n");
350: }
351: }
352: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
353: } else {
354: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
355: for (i=0; i<a->mbs; i++) {
356: for (j=0; j<bs; j++) {
357: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
358: for (k=a->i[i]; k<a->i[i+1]; k++) {
359: for (l=0; l<bs; l++) {
360: #if defined(PETSC_USE_COMPLEX)
361: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
362: PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
363: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
364: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
365: PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
366: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
367: } else {
368: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
369: }
370: #else
371: PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
372: #endif
373: }
374: }
375: PetscViewerASCIIPrintf(viewer,"n");
376: }
377: }
378: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
379: }
380: PetscViewerFlush(viewer);
381: return(0);
382: }
384: static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
385: {
386: Mat A = (Mat) Aa;
387: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
388: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
389: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
390: MatScalar *aa;
391: MPI_Comm comm;
392: PetscViewer viewer;
395: /*
396: This is nasty. If this is called from an originally parallel matrix
397: then all processes call this,but only the first has the matrix so the
398: rest should return immediately.
399: */
400: PetscObjectGetComm((PetscObject)draw,&comm);
401: MPI_Comm_rank(comm,&rank);
402: if (rank) return(0);
404: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
406: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
407: PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric");
409: /* loop over matrix elements drawing boxes */
410: color = PETSC_DRAW_BLUE;
411: for (i=0,row=0; i<mbs; i++,row+=bs) {
412: for (j=a->i[i]; j<a->i[i+1]; j++) {
413: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
414: x_l = a->j[j]*bs; x_r = x_l + 1.0;
415: aa = a->a + j*bs2;
416: for (k=0; k<bs; k++) {
417: for (l=0; l<bs; l++) {
418: if (PetscRealPart(*aa++) >= 0.) continue;
419: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
420: }
421: }
422: }
423: }
424: color = PETSC_DRAW_CYAN;
425: for (i=0,row=0; i<mbs; i++,row+=bs) {
426: for (j=a->i[i]; j<a->i[i+1]; j++) {
427: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
428: x_l = a->j[j]*bs; x_r = x_l + 1.0;
429: aa = a->a + j*bs2;
430: for (k=0; k<bs; k++) {
431: for (l=0; l<bs; l++) {
432: if (PetscRealPart(*aa++) != 0.) continue;
433: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
434: }
435: }
436: }
437: }
439: color = PETSC_DRAW_RED;
440: for (i=0,row=0; i<mbs; i++,row+=bs) {
441: for (j=a->i[i]; j<a->i[i+1]; j++) {
442: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
443: x_l = a->j[j]*bs; x_r = x_l + 1.0;
444: aa = a->a + j*bs2;
445: for (k=0; k<bs; k++) {
446: for (l=0; l<bs; l++) {
447: if (PetscRealPart(*aa++) <= 0.) continue;
448: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
449: }
450: }
451: }
452: }
453: return(0);
454: }
456: static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer)
457: {
458: int ierr;
459: PetscReal xl,yl,xr,yr,w,h;
460: PetscDraw draw;
461: PetscTruth isnull;
465: PetscViewerDrawGetDraw(viewer,0,&draw);
466: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
468: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
469: xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0;
470: xr += w; yr += h; xl = -w; yl = -h;
471: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
472: PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);
473: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
474: return(0);
475: }
477: int MatView_SeqSBAIJ(Mat A,PetscViewer viewer)
478: {
479: int ierr;
480: PetscTruth issocket,isascii,isbinary,isdraw;
483: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
484: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
485: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
486: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
487: if (issocket) {
488: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
489: } else if (isascii){
490: MatView_SeqSBAIJ_ASCII(A,viewer);
491: } else if (isbinary) {
492: MatView_SeqSBAIJ_Binary(A,viewer);
493: } else if (isdraw) {
494: MatView_SeqSBAIJ_Draw(A,viewer);
495: } else {
496: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
497: }
498: return(0);
499: }
502: int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
503: {
504: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
505: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
506: int *ai = a->i,*ailen = a->ilen;
507: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
508: MatScalar *ap,*aa = a->a,zero = 0.0;
511: for (k=0; k<m; k++) { /* loop over rows */
512: row = im[k]; brow = row/bs;
513: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
514: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
515: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
516: nrow = ailen[brow];
517: for (l=0; l<n; l++) { /* loop over columns */
518: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
519: if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
520: col = in[l] ;
521: bcol = col/bs;
522: cidx = col%bs;
523: ridx = row%bs;
524: high = nrow;
525: low = 0; /* assume unsorted */
526: while (high-low > 5) {
527: t = (low+high)/2;
528: if (rp[t] > bcol) high = t;
529: else low = t;
530: }
531: for (i=low; i<high; i++) {
532: if (rp[i] > bcol) break;
533: if (rp[i] == bcol) {
534: *v++ = ap[bs2*i+bs*cidx+ridx];
535: goto finished;
536: }
537: }
538: *v++ = zero;
539: finished:;
540: }
541: }
542: return(0);
543: }
546: int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
547: {
549: SETERRQ(1,"Function not yet written for SBAIJ format");
550: }
552: int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode)
553: {
554: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
555: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
556: int m = A->m,*ip,N,*ailen = a->ilen;
557: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
558: MatScalar *aa = a->a,*ap;
559: #if defined(PETSC_HAVE_SPOOLES)
560: PetscTruth flag;
561: #endif
564: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
566: if (m) rmax = ailen[0];
567: for (i=1; i<mbs; i++) {
568: /* move each row back by the amount of empty slots (fshift) before it*/
569: fshift += imax[i-1] - ailen[i-1];
570: rmax = PetscMax(rmax,ailen[i]);
571: if (fshift) {
572: ip = aj + ai[i]; ap = aa + bs2*ai[i];
573: N = ailen[i];
574: for (j=0; j<N; j++) {
575: ip[j-fshift] = ip[j];
576: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
577: }
578: }
579: ai[i] = ai[i-1] + ailen[i-1];
580: }
581: if (mbs) {
582: fshift += imax[mbs-1] - ailen[mbs-1];
583: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
584: }
585: /* reset ilen and imax for each row */
586: for (i=0; i<mbs; i++) {
587: ailen[i] = imax[i] = ai[i+1] - ai[i];
588: }
589: a->s_nz = ai[mbs];
591: /* diagonals may have moved, reset it */
592: if (a->diag) {
593: PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));
594: }
595: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",
596: m,A->m,a->bs,fshift*bs2,a->s_nz*bs2);
597: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %dn",
598: a->reallocs);
599: PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %dn",rmax);
600: a->reallocs = 0;
601: A->info.nz_unneeded = (PetscReal)fshift*bs2;
602:
603: #if defined(PETSC_HAVE_SPOOLES)
604: PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);
605: if (flag) { MatUseSpooles_SeqSBAIJ(A); }
606: #endif
608: return(0);
609: }
611: /*
612: This function returns an array of flags which indicate the locations of contiguous
613: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
614: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
615: Assume: sizes should be long enough to hold all the values.
616: */
617: static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
618: {
619: int i,j,k,row;
620: PetscTruth flg;
623: for (i=0,j=0; i<n; j++) {
624: row = idx[i];
625: if (row%bs!=0) { /* Not the begining of a block */
626: sizes[j] = 1;
627: i++;
628: } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */
629: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
630: i++;
631: } else { /* Begining of the block, so check if the complete block exists */
632: flg = PETSC_TRUE;
633: for (k=1; k<bs; k++) {
634: if (row+k != idx[i+k]) { /* break in the block */
635: flg = PETSC_FALSE;
636: break;
637: }
638: }
639: if (flg == PETSC_TRUE) { /* No break in the bs */
640: sizes[j] = bs;
641: i+= bs;
642: } else {
643: sizes[j] = 1;
644: i++;
645: }
646: }
647: }
648: *bs_max = j;
649: return(0);
650: }
651:
652: int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag)
653: {
654: Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data;
655: int ierr,i,j,k,count,is_n,*is_idx,*rows;
656: int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max;
657: PetscScalar zero = 0.0;
658: MatScalar *aa;
661: /* Make a copy of the IS and sort it */
662: ISGetSize(is,&is_n);
663: ISGetIndices(is,&is_idx);
665: /* allocate memory for rows,sizes */
666: PetscMalloc((3*is_n+1)*sizeof(int),&rows);
667: sizes = rows + is_n;
669: /* initialize copy IS values to rows, and sort them */
670: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
671: PetscSortInt(is_n,rows);
672: if (sbaij->keepzeroedrows) { /* do not change nonzero structure */
673: for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */
674: bs_max = is_n; /* bs_max: num. of contiguous block row in the row */
675: } else {
676: MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
677: }
678: ISRestoreIndices(is,&is_idx);
680: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
681: row = rows[j]; /* row to be zeroed */
682: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
683: count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */
684: aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs);
685: if (sizes[i] == bs && !sbaij->keepzeroedrows) {
686: if (diag) {
687: if (sbaij->ilen[row/bs] > 0) {
688: sbaij->ilen[row/bs] = 1;
689: sbaij->j[sbaij->i[row/bs]] = row/bs;
690: PetscMemzero(aa,count*bs*sizeof(MatScalar));
691: }
692: /* Now insert all the diagoanl values for this bs */
693: for (k=0; k<bs; k++) {
694: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
695: }
696: } else { /* (!diag) */
697: sbaij->ilen[row/bs] = 0;
698: } /* end (!diag) */
699: } else { /* (sizes[i] != bs), broken block */
700: #if defined (PETSC_USE_DEBUG)
701: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
702: #endif
703: for (k=0; k<count; k++) {
704: aa[0] = zero;
705: aa+=bs;
706: }
707: if (diag) {
708: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
709: }
710: }
711: }
713: PetscFree(rows);
714: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
715: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
716: return(0);
717: }
719: /* Only add/insert a(i,j) with i<=j (blocks).
720: Any a(i,j) with i>j input by user is ingored.
721: */
723: int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
724: {
725: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
726: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
727: int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
728: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
729: int ridx,cidx,bs2=a->bs2,ierr;
730: MatScalar *ap,value,*aa=a->a,*bap;
734: for (k=0; k<m; k++) { /* loop over added rows */
735: row = im[k]; /* row number */
736: brow = row/bs; /* block row number */
737: if (row < 0) continue;
738: #if defined(PETSC_USE_BOPT_g)
739: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
740: #endif
741: rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/
742: ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/
743: rmax = imax[brow]; /* maximum space allocated for this row */
744: nrow = ailen[brow]; /* actual length of this row */
745: low = 0;
747: for (l=0; l<n; l++) { /* loop over added columns */
748: if (in[l] < 0) continue;
749: #if defined(PETSC_USE_BOPT_g)
750: if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m);
751: #endif
752: col = in[l];
753: bcol = col/bs; /* block col number */
755: if (brow <= bcol){
756: ridx = row % bs; cidx = col % bs; /*row and col index inside the block */
757: if ((brow==bcol && ridx<=cidx) || (brow<bcol)){
758: /* element value a(k,l) */
759: if (roworiented) {
760: value = v[l + k*n];
761: } else {
762: value = v[k + l*m];
763: }
765: /* move pointer bap to a(k,l) quickly and add/insert value */
766: if (!sorted) low = 0; high = nrow;
767: while (high-low > 7) {
768: t = (low+high)/2;
769: if (rp[t] > bcol) high = t;
770: else low = t;
771: }
772: for (i=low; i<high; i++) {
773: /* printf("The loop of i=low.., rp[%d]=%dn",i,rp[i]); */
774: if (rp[i] > bcol) break;
775: if (rp[i] == bcol) {
776: bap = ap + bs2*i + bs*cidx + ridx;
777: if (is == ADD_VALUES) *bap += value;
778: else *bap = value;
779: /* for diag block, add/insert its symmetric element a(cidx,ridx) */
780: if (brow == bcol && ridx < cidx){
781: bap = ap + bs2*i + bs*ridx + cidx;
782: if (is == ADD_VALUES) *bap += value;
783: else *bap = value;
784: }
785: goto noinsert1;
786: }
787: }
788:
789: if (nonew == 1) goto noinsert1;
790: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
791: if (nrow >= rmax) {
792: /* there is no extra room in row, therefore enlarge */
793: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
794: MatScalar *new_a;
796: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
798: /* Malloc new storage space */
799: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
800: ierr = PetscMalloc(len,&new_a);
801: new_j = (int*)(new_a + bs2*new_nz);
802: new_i = new_j + new_nz;
804: /* copy over old data into new slots */
805: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
806: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
807: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
808: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
809: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
810: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
811: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
812: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
813: /* free up old matrix storage */
814: PetscFree(a->a);
815: if (!a->singlemalloc) {
816: PetscFree(a->i);
817: PetscFree(a->j);
818: }
819: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
820: a->singlemalloc = PETSC_TRUE;
822: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
823: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
824: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
825: a->s_maxnz += bs2*CHUNKSIZE;
826: a->reallocs++;
827: a->s_nz++;
828: }
830: N = nrow++ - 1;
831: /* shift up all the later entries in this row */
832: for (ii=N; ii>=i; ii--) {
833: rp[ii+1] = rp[ii];
834: ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
835: }
836: if (N>=i) {
837: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
838: }
839: rp[i] = bcol;
840: ap[bs2*i + bs*cidx + ridx] = value;
841: noinsert1:;
842: low = i;
843: /* } */
844: }
845: } /* end of if .. if.. */
846: } /* end of loop over added columns */
847: ailen[brow] = nrow;
848: } /* end of loop over added rows */
850: return(0);
851: }
853: extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*);
854: extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal);
855: extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int);
856: extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
857: extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
858: extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec);
859: extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec);
860: extern int MatScale_SeqSBAIJ(PetscScalar*,Mat);
861: extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *);
862: extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*);
863: extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec);
864: extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec);
865: extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *);
866: extern int MatZeroEntries_SeqSBAIJ(Mat);
867: extern int MatGetRowMax_SeqSBAIJ(Mat,Vec);
869: extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec);
870: extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec);
871: extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec);
872: extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec);
873: extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec);
874: extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec);
875: extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec);
876: extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec);
877: extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec);
878: extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec);
879: extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec);
880: extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec);
881: extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec);
882: extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec);
883: extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec);
885: extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
886: extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
887: extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
888: extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
889: extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
890: extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
891: extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
892: extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec);
894: extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*);
895: extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*);
896: extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*);
897: extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*);
898: extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*);
899: extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*);
900: extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*);
901: extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*);
903: extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec);
904: extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec);
905: extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec);
906: extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec);
907: extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec);
908: extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec);
909: extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec);
910: extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec);
912: extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec);
913: extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec);
914: extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec);
915: extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec);
916: extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec);
917: extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec);
918: extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec);
919: extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec);
921: #ifdef HAVE_MatICCFactor
922: /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */
923: int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level)
924: {
925: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
926: Mat outA;
927: int ierr;
928: PetscTruth row_identity,col_identity;
931: /*
932: if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
933: ISIdentity(row,&row_identity);
934: ISIdentity(col,&col_identity);
935: if (!row_identity || !col_identity) {
936: SETERRQ(1,"Row and column permutations must be identity for in-place ICC");
937: }
938: */
940: outA = inA;
941: inA->factor = FACTOR_CHOLESKY;
943: if (!a->diag) {
944: MatMarkDiagonal_SeqSBAIJ(inA);
945: }
946: /*
947: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
948: for ILU(0) factorization with natural ordering
949: */
950: switch (a->bs) {
951: case 1:
952: inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
953: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
954: PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1n");
955: case 2:
956: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
957: inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
958: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering;
959: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2n");
960: break;
961: case 3:
962: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
963: inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
964: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering;
965: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3n");
966: break;
967: case 4:
968: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
969: inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
970: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering;
971: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4n");
972: break;
973: case 5:
974: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
975: inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
976: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering;
977: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5n");
978: break;
979: case 6:
980: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
981: inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
982: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering;
983: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6n");
984: break;
985: case 7:
986: inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
987: inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering;
988: inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
989: PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7n");
990: break;
991: default:
992: a->row = row;
993: a->icol = col;
994: ierr = PetscObjectReference((PetscObject)row);
995: ierr = PetscObjectReference((PetscObject)col);
997: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
998: ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));
999: PetscLogObjectParent(inA,a->icol);
1001: if (!a->solve_work) {
1002: PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1003: PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));
1004: }
1005: }
1007: MatCholeskyFactorNumeric(inA,&outA);
1009: return(0);
1010: }
1011: #endif
1013: int MatPrintHelp_SeqSBAIJ(Mat A)
1014: {
1015: static PetscTruth called = PETSC_FALSE;
1016: MPI_Comm comm = A->comm;
1017: int ierr;
1020: if (called) {return(0);} else called = PETSC_TRUE;
1021: (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):n");
1022: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>n");
1023: return(0);
1024: }
1026: EXTERN_C_BEGIN
1027: int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices)
1028: {
1029: Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data;
1030: int i,nz,n;
1033: nz = baij->s_maxnz;
1034: n = mat->n;
1035: for (i=0; i<nz; i++) {
1036: baij->j[i] = indices[i];
1037: }
1038: baij->s_nz = nz;
1039: for (i=0; i<n; i++) {
1040: baij->ilen[i] = baij->imax[i];
1041: }
1043: return(0);
1044: }
1045: EXTERN_C_END
1047: /*@
1048: MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows
1049: in the matrix.
1051: Input Parameters:
1052: + mat - the SeqSBAIJ matrix
1053: - indices - the column indices
1055: Level: advanced
1057: Notes:
1058: This can be called if you have precomputed the nonzero structure of the
1059: matrix and want to provide it to the matrix object to improve the performance
1060: of the MatSetValues() operation.
1062: You MUST have set the correct numbers of nonzeros per row in the call to
1063: MatCreateSeqSBAIJ().
1065: MUST be called before any calls to MatSetValues()
1067: .seealso: MatCreateSeqSBAIJ
1068: @*/
1069: int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices)
1070: {
1071: int ierr,(*f)(Mat,int *);
1075: PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);
1076: if (f) {
1077: (*f)(mat,indices);
1078: } else {
1079: SETERRQ(1,"Wrong type of matrix to set column indices");
1080: }
1081: return(0);
1082: }
1084: int MatSetUpPreallocation_SeqSBAIJ(Mat A)
1085: {
1086: int ierr;
1089: MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1090: return(0);
1091: }
1093: /* -------------------------------------------------------------------*/
1094: static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ,
1095: MatGetRow_SeqSBAIJ,
1096: MatRestoreRow_SeqSBAIJ,
1097: MatMult_SeqSBAIJ_N,
1098: MatMultAdd_SeqSBAIJ_N,
1099: MatMultTranspose_SeqSBAIJ,
1100: MatMultTransposeAdd_SeqSBAIJ,
1101: MatSolve_SeqSBAIJ_N,
1102: 0,
1103: 0,
1104: 0,
1105: 0,
1106: MatCholeskyFactor_SeqSBAIJ,
1107: MatRelax_SeqSBAIJ,
1108: MatTranspose_SeqSBAIJ,
1109: MatGetInfo_SeqSBAIJ,
1110: MatEqual_SeqSBAIJ,
1111: MatGetDiagonal_SeqSBAIJ,
1112: MatDiagonalScale_SeqSBAIJ,
1113: MatNorm_SeqSBAIJ,
1114: 0,
1115: MatAssemblyEnd_SeqSBAIJ,
1116: 0,
1117: MatSetOption_SeqSBAIJ,
1118: MatZeroEntries_SeqSBAIJ,
1119: MatZeroRows_SeqSBAIJ,
1120: 0,
1121: 0,
1122: MatCholeskyFactorSymbolic_SeqSBAIJ,
1123: MatCholeskyFactorNumeric_SeqSBAIJ_N,
1124: MatSetUpPreallocation_SeqSBAIJ,
1125: 0,
1126: MatICCFactorSymbolic_SeqSBAIJ,
1127: 0,
1128: 0,
1129: MatDuplicate_SeqSBAIJ,
1130: 0,
1131: 0,
1132: 0,
1133: 0,
1134: 0,
1135: MatGetSubMatrices_SeqSBAIJ,
1136: MatIncreaseOverlap_SeqSBAIJ,
1137: MatGetValues_SeqSBAIJ,
1138: 0,
1139: MatPrintHelp_SeqSBAIJ,
1140: MatScale_SeqSBAIJ,
1141: 0,
1142: 0,
1143: 0,
1144: MatGetBlockSize_SeqSBAIJ,
1145: MatGetRowIJ_SeqSBAIJ,
1146: MatRestoreRowIJ_SeqSBAIJ,
1147: 0,
1148: 0,
1149: 0,
1150: 0,
1151: 0,
1152: 0,
1153: MatSetValuesBlocked_SeqSBAIJ,
1154: MatGetSubMatrix_SeqSBAIJ,
1155: 0,
1156: 0,
1157: MatGetPetscMaps_Petsc,
1158: 0,
1159: 0,
1160: 0,
1161: 0,
1162: 0,
1163: 0,
1164: MatGetRowMax_SeqSBAIJ};
1166: EXTERN_C_BEGIN
1167: int MatStoreValues_SeqSBAIJ(Mat mat)
1168: {
1169: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1170: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1171: int ierr;
1174: if (aij->nonew != 1) {
1175: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1176: }
1178: /* allocate space for values if not already there */
1179: if (!aij->saved_values) {
1180: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1181: }
1183: /* copy values over */
1184: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1185: return(0);
1186: }
1187: EXTERN_C_END
1189: EXTERN_C_BEGIN
1190: int MatRetrieveValues_SeqSBAIJ(Mat mat)
1191: {
1192: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data;
1193: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1196: if (aij->nonew != 1) {
1197: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1198: }
1199: if (!aij->saved_values) {
1200: SETERRQ(1,"Must call MatStoreValues(A);first");
1201: }
1203: /* copy values over */
1204: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1205: return(0);
1206: }
1207: EXTERN_C_END
1209: EXTERN_C_BEGIN
1210: int MatCreate_SeqSBAIJ(Mat B)
1211: {
1212: Mat_SeqSBAIJ *b;
1213: int ierr,size;
1216: MPI_Comm_size(B->comm,&size);
1217: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1218: B->m = B->M = PetscMax(B->m,B->M);
1219: B->n = B->N = PetscMax(B->n,B->N);
1221: ierr = PetscNew(Mat_SeqSBAIJ,&b);
1222: B->data = (void*)b;
1223: ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));
1224: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1225: B->ops->destroy = MatDestroy_SeqSBAIJ;
1226: B->ops->view = MatView_SeqSBAIJ;
1227: B->factor = 0;
1228: B->lupivotthreshold = 1.0;
1229: B->mapping = 0;
1230: b->row = 0;
1231: b->icol = 0;
1232: b->reallocs = 0;
1233: b->saved_values = 0;
1234:
1235: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1236: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1238: b->sorted = PETSC_FALSE;
1239: b->roworiented = PETSC_TRUE;
1240: b->nonew = 0;
1241: b->diag = 0;
1242: b->solve_work = 0;
1243: b->mult_work = 0;
1244: B->spptr = 0;
1245: b->keepzeroedrows = PETSC_FALSE;
1246:
1247: b->inew = 0;
1248: b->jnew = 0;
1249: b->anew = 0;
1250: b->a2anew = 0;
1251: b->permute = PETSC_FALSE;
1253: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1254: "MatStoreValues_SeqSBAIJ",
1255: (void*)MatStoreValues_SeqSBAIJ);
1256: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1257: "MatRetrieveValues_SeqSBAIJ",
1258: (void*)MatRetrieveValues_SeqSBAIJ);
1259: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C",
1260: "MatSeqSBAIJSetColumnIndices_SeqSBAIJ",
1261: (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);
1262: return(0);
1263: }
1264: EXTERN_C_END
1266: /*@C
1267: MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block
1268: compressed row) format. For good matrix assembly performance the
1269: user should preallocate the matrix storage by setting the parameter nz
1270: (or the array nnz). By setting these parameters accurately, performance
1271: during matrix assembly can be increased by more than a factor of 50.
1273: Collective on Mat
1275: Input Parameters:
1276: + A - the symmetric matrix
1277: . bs - size of block
1278: . nz - number of block nonzeros per block row (same for all rows)
1279: - nnz - array containing the number of block nonzeros in the various block rows
1280: (possibly different for each block row) or PETSC_NULL
1282: Options Database Keys:
1283: . -mat_no_unroll - uses code that does not unroll the loops in the
1284: block calculations (much slower)
1285: . -mat_block_size - size of the blocks to use
1287: Level: intermediate
1289: Notes:
1290: The block AIJ format is compatible with standard Fortran 77
1291: storage. That is, the stored row and column indices can begin at
1292: either one (as in Fortran) or zero. See the users' manual for details.
1294: Specify the preallocated storage with either nz or nnz (not both).
1295: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1296: allocation. For additional details, see the users manual chapter on
1297: matrices.
1299: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1300: @*/
1301: int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1302: {
1303: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data;
1304: int i,len,ierr,mbs,bs2;
1305: PetscTruth flg;
1306: int s_nz;
1309: B->preallocated = PETSC_TRUE;
1310: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
1311: mbs = B->m/bs;
1312: bs2 = bs*bs;
1314: if (mbs*bs != B->m) {
1315: SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize");
1316: }
1318: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3;
1319: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1320: if (nnz) {
1321: for (i=0; i<mbs; i++) {
1322: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1323: if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs);
1324: }
1325: }
1327: ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1328: if (!flg) {
1329: switch (bs) {
1330: case 1:
1331: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1;
1332: B->ops->solve = MatSolve_SeqSBAIJ_1;
1333: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1;
1334: B->ops->mult = MatMult_SeqSBAIJ_1;
1335: B->ops->multadd = MatMultAdd_SeqSBAIJ_1;
1336: break;
1337: case 2:
1338: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2;
1339: B->ops->solve = MatSolve_SeqSBAIJ_2;
1340: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2;
1341: B->ops->mult = MatMult_SeqSBAIJ_2;
1342: B->ops->multadd = MatMultAdd_SeqSBAIJ_2;
1343: break;
1344: case 3:
1345: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3;
1346: B->ops->solve = MatSolve_SeqSBAIJ_3;
1347: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3;
1348: B->ops->mult = MatMult_SeqSBAIJ_3;
1349: B->ops->multadd = MatMultAdd_SeqSBAIJ_3;
1350: break;
1351: case 4:
1352: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4;
1353: B->ops->solve = MatSolve_SeqSBAIJ_4;
1354: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4;
1355: B->ops->mult = MatMult_SeqSBAIJ_4;
1356: B->ops->multadd = MatMultAdd_SeqSBAIJ_4;
1357: break;
1358: case 5:
1359: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5;
1360: B->ops->solve = MatSolve_SeqSBAIJ_5;
1361: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5;
1362: B->ops->mult = MatMult_SeqSBAIJ_5;
1363: B->ops->multadd = MatMultAdd_SeqSBAIJ_5;
1364: break;
1365: case 6:
1366: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6;
1367: B->ops->solve = MatSolve_SeqSBAIJ_6;
1368: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6;
1369: B->ops->mult = MatMult_SeqSBAIJ_6;
1370: B->ops->multadd = MatMultAdd_SeqSBAIJ_6;
1371: break;
1372: case 7:
1373: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7;
1374: B->ops->solve = MatSolve_SeqSBAIJ_7;
1375: B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7;
1376: B->ops->mult = MatMult_SeqSBAIJ_7;
1377: B->ops->multadd = MatMultAdd_SeqSBAIJ_7;
1378: break;
1379: }
1380: }
1382: b->mbs = mbs;
1383: b->nbs = mbs;
1384: ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);
1385: if (!nnz) {
1386: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1387: else if (nz <= 0) nz = 1;
1388: for (i=0; i<mbs; i++) {
1389: b->imax[i] = nz;
1390: }
1391: nz = nz*mbs; /* total nz */
1392: } else {
1393: nz = 0;
1394: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1395: }
1396: /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */
1397: s_nz = nz;
1399: /* allocate the matrix space */
1400: len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1401: PetscMalloc(len,&b->a);
1402: PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));
1403: b->j = (int*)(b->a + s_nz*bs2);
1404: PetscMemzero(b->j,s_nz*sizeof(int));
1405: b->i = b->j + s_nz;
1406: b->singlemalloc = PETSC_TRUE;
1408: /* pointer to beginning of each row */
1409: b->i[0] = 0;
1410: for (i=1; i<mbs+1; i++) {
1411: b->i[i] = b->i[i-1] + b->imax[i-1];
1412: }
1414: /* b->ilen will count nonzeros in each block row so far. */
1415: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
1416: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1417: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1418:
1419: b->bs = bs;
1420: b->bs2 = bs2;
1421: b->s_nz = 0;
1422: b->s_maxnz = s_nz*bs2;
1423:
1424: b->inew = 0;
1425: b->jnew = 0;
1426: b->anew = 0;
1427: b->a2anew = 0;
1428: b->permute = PETSC_FALSE;
1429: return(0);
1430: }
1433: /*@C
1434: MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block
1435: compressed row) format. For good matrix assembly performance the
1436: user should preallocate the matrix storage by setting the parameter nz
1437: (or the array nnz). By setting these parameters accurately, performance
1438: during matrix assembly can be increased by more than a factor of 50.
1440: Collective on MPI_Comm
1442: Input Parameters:
1443: + comm - MPI communicator, set to PETSC_COMM_SELF
1444: . bs - size of block
1445: . m - number of rows, or number of columns
1446: . nz - number of block nonzeros per block row (same for all rows)
1447: - nnz - array containing the number of block nonzeros in the various block rows
1448: (possibly different for each block row) or PETSC_NULL
1450: Output Parameter:
1451: . A - the symmetric matrix
1453: Options Database Keys:
1454: . -mat_no_unroll - uses code that does not unroll the loops in the
1455: block calculations (much slower)
1456: . -mat_block_size - size of the blocks to use
1458: Level: intermediate
1460: Notes:
1461: The block AIJ format is compatible with standard Fortran 77
1462: storage. That is, the stored row and column indices can begin at
1463: either one (as in Fortran) or zero. See the users' manual for details.
1465: Specify the preallocated storage with either nz or nnz (not both).
1466: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1467: allocation. For additional details, see the users manual chapter on
1468: matrices.
1470: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ()
1471: @*/
1472: int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1473: {
1475:
1477: MatCreate(comm,m,n,m,n,A);
1478: MatSetType(*A,MATSEQSBAIJ);
1479: MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);
1480: return(0);
1481: }
1483: int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1484: {
1485: Mat C;
1486: Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data;
1487: int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr;
1490: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1492: *B = 0;
1493: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1494: MatSetType(C,MATSEQSBAIJ);
1495: c = (Mat_SeqSBAIJ*)C->data;
1497: ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1498: C->preallocated = PETSC_TRUE;
1499: C->factor = A->factor;
1500: c->row = 0;
1501: c->icol = 0;
1502: c->saved_values = 0;
1503: c->keepzeroedrows = a->keepzeroedrows;
1504: C->assembled = PETSC_TRUE;
1506: c->bs = a->bs;
1507: c->bs2 = a->bs2;
1508: c->mbs = a->mbs;
1509: c->nbs = a->nbs;
1511: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1512: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1513: for (i=0; i<mbs; i++) {
1514: c->imax[i] = a->imax[i];
1515: c->ilen[i] = a->ilen[i];
1516: }
1518: /* allocate the matrix space */
1519: c->singlemalloc = PETSC_TRUE;
1520: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1521: PetscMalloc(len,&c->a);
1522: c->j = (int*)(c->a + nz*bs2);
1523: c->i = c->j + nz;
1524: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1525: if (mbs > 0) {
1526: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1527: if (cpvalues == MAT_COPY_VALUES) {
1528: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1529: } else {
1530: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1531: }
1532: }
1534: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));
1535: c->sorted = a->sorted;
1536: c->roworiented = a->roworiented;
1537: c->nonew = a->nonew;
1539: if (a->diag) {
1540: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1541: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1542: for (i=0; i<mbs; i++) {
1543: c->diag[i] = a->diag[i];
1544: }
1545: } else c->diag = 0;
1546: c->s_nz = a->s_nz;
1547: c->s_maxnz = a->s_maxnz;
1548: c->solve_work = 0;
1549: C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1550: c->mult_work = 0;
1551: *B = C;
1552: PetscFListDuplicate(A->qlist,&C->qlist);
1553: return(0);
1554: }
1556: EXTERN_C_BEGIN
1557: int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A)
1558: {
1559: Mat_SeqSBAIJ *a;
1560: Mat B;
1561: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1562: int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount;
1563: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1564: int *masked,nmask,tmp,bs2,ishift;
1565: PetscScalar *aa;
1566: MPI_Comm comm = ((PetscObject)viewer)->comm;
1569: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1570: bs2 = bs*bs;
1572: MPI_Comm_size(comm,&size);
1573: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1574: PetscViewerBinaryGetDescriptor(viewer,&fd);
1575: PetscBinaryRead(fd,header,4,PETSC_INT);
1576: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1577: M = header[1]; N = header[2]; nz = header[3];
1579: if (header[3] < 0) {
1580: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ");
1581: }
1583: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1585: /*
1586: This code adds extra rows to make sure the number of rows is
1587: divisible by the blocksize
1588: */
1589: mbs = M/bs;
1590: extra_rows = bs - M + bs*(mbs);
1591: if (extra_rows == bs) extra_rows = 0;
1592: else mbs++;
1593: if (extra_rows) {
1594: PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksizen");
1595: }
1597: /* read in row lengths */
1598: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1599: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1600: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1602: /* read in column indices */
1603: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1604: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1605: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1607: /* loop over row lengths determining block row lengths */
1608: ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);
1609: ierr = PetscMalloc(2*mbs*sizeof(int),&mask);
1610: ierr = PetscMemzero(mask,mbs*sizeof(int));
1611: masked = mask + mbs;
1612: rowcount = 0; nzcount = 0;
1613: for (i=0; i<mbs; i++) {
1614: nmask = 0;
1615: for (j=0; j<bs; j++) {
1616: kmax = rowlengths[rowcount];
1617: for (k=0; k<kmax; k++) {
1618: tmp = jj[nzcount++]/bs; /* block col. index */
1619: if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;}
1620: }
1621: rowcount++;
1622: }
1623: s_browlengths[i] += nmask;
1624:
1625: /* zero out the mask elements we set */
1626: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1627: }
1629: /* create our matrix */
1630: MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);
1631: B = *A;
1632: a = (Mat_SeqSBAIJ*)B->data;
1634: /* set matrix "i" values */
1635: a->i[0] = 0;
1636: for (i=1; i<= mbs; i++) {
1637: a->i[i] = a->i[i-1] + s_browlengths[i-1];
1638: a->ilen[i-1] = s_browlengths[i-1];
1639: }
1640: a->s_nz = a->i[mbs];
1642: /* read in nonzero values */
1643: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1644: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1645: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1647: /* set "a" and "j" values into matrix */
1648: nzcount = 0; jcount = 0;
1649: for (i=0; i<mbs; i++) {
1650: nzcountb = nzcount;
1651: nmask = 0;
1652: for (j=0; j<bs; j++) {
1653: kmax = rowlengths[i*bs+j];
1654: for (k=0; k<kmax; k++) {
1655: tmp = jj[nzcount++]/bs; /* block col. index */
1656: if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;}
1657: }
1658: }
1659: /* sort the masked values */
1660: PetscSortInt(nmask,masked);
1662: /* set "j" values into matrix */
1663: maskcount = 1;
1664: for (j=0; j<nmask; j++) {
1665: a->j[jcount++] = masked[j];
1666: mask[masked[j]] = maskcount++;
1667: }
1669: /* set "a" values into matrix */
1670: ishift = bs2*a->i[i];
1671: for (j=0; j<bs; j++) {
1672: kmax = rowlengths[i*bs+j];
1673: for (k=0; k<kmax; k++) {
1674: tmp = jj[nzcountb]/bs ; /* block col. index */
1675: if (tmp >= i){
1676: block = mask[tmp] - 1;
1677: point = jj[nzcountb] - bs*tmp;
1678: idx = ishift + bs2*block + j + bs*point;
1679: a->a[idx] = aa[nzcountb];
1680: }
1681: nzcountb++;
1682: }
1683: }
1684: /* zero out the mask elements we set */
1685: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1686: }
1687: if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1689: PetscFree(rowlengths);
1690: PetscFree(s_browlengths);
1691: PetscFree(aa);
1692: PetscFree(jj);
1693: PetscFree(mask);
1695: B->assembled = PETSC_TRUE;
1696: MatView_Private(B);
1697: return(0);
1698: }
1699: EXTERN_C_END
1701: int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1702: {
1703: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
1704: MatScalar *aa=a->a,*v,*v1;
1705: PetscScalar *x,*b,*t,sum,d;
1706: int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr;
1707: int nz,nz1,*vj,*vj1,i;
1710: its = its*lits;
1711: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1713: if (bs > 1)
1714: SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
1716: VecGetArray(xx,&x);
1717: if (xx != bb) {
1718: VecGetArray(bb,&b);
1719: } else {
1720: b = x;
1721: }
1723: PetscMalloc(m*sizeof(PetscScalar),&t);
1724:
1725: if (flag & SOR_ZERO_INITIAL_GUESS) {
1726: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1727: for (i=0; i<m; i++)
1728: t[i] = b[i];
1730: for (i=0; i<m; i++){
1731: d = *(aa + ai[i]); /* diag[i] */
1732: v = aa + ai[i] + 1;
1733: vj = aj + ai[i] + 1;
1734: nz = ai[i+1] - ai[i] - 1;
1735: x[i] = omega*t[i]/d;
1736: while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */
1737: PetscLogFlops(2*nz-1);
1738: }
1739: }
1741: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1742: for (i=0; i<m; i++)
1743: t[i] = b[i];
1744:
1745: for (i=0; i<m-1; i++){ /* update rhs */
1746: v = aa + ai[i] + 1;
1747: vj = aj + ai[i] + 1;
1748: nz = ai[i+1] - ai[i] - 1;
1749: while (nz--) t[*vj++] -= x[i]*(*v++);
1750: PetscLogFlops(2*nz-1);
1751: }
1752: for (i=m-1; i>=0; i--){
1753: d = *(aa + ai[i]);
1754: v = aa + ai[i] + 1;
1755: vj = aj + ai[i] + 1;
1756: nz = ai[i+1] - ai[i] - 1;
1757: sum = t[i];
1758: while (nz--) sum -= x[*vj++]*(*v++);
1759: PetscLogFlops(2*nz-1);
1760: x[i] = (1-omega)*x[i] + omega*sum/d;
1761: }
1762: }
1763: its--;
1764: }
1766: while (its--) {
1767: /*
1768: forward sweep:
1769: for i=0,...,m-1:
1770: sum[i] = (b[i] - U(i,:)x )/d[i];
1771: x[i] = (1-omega)x[i] + omega*sum[i];
1772: b = b - x[i]*U^T(i,:);
1773:
1774: */
1775: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1776: for (i=0; i<m; i++)
1777: t[i] = b[i];
1779: for (i=0; i<m; i++){
1780: d = *(aa + ai[i]); /* diag[i] */
1781: v = aa + ai[i] + 1; v1=v;
1782: vj = aj + ai[i] + 1; vj1=vj;
1783: nz = ai[i+1] - ai[i] - 1; nz1=nz;
1784: sum = t[i];
1785: while (nz1--) sum -= (*v1++)*x[*vj1++];
1786: x[i] = (1-omega)*x[i] + omega*sum/d;
1787: while (nz--) t[*vj++] -= x[i]*(*v++);
1788: PetscLogFlops(4*nz-2);
1789: }
1790: }
1791:
1792: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1793: /*
1794: backward sweep:
1795: b = b - x[i]*U^T(i,:), i=0,...,n-2
1796: for i=m-1,...,0:
1797: sum[i] = (b[i] - U(i,:)x )/d[i];
1798: x[i] = (1-omega)x[i] + omega*sum[i];
1799: */
1800: for (i=0; i<m; i++)
1801: t[i] = b[i];
1802:
1803: for (i=0; i<m-1; i++){ /* update rhs */
1804: v = aa + ai[i] + 1;
1805: vj = aj + ai[i] + 1;
1806: nz = ai[i+1] - ai[i] - 1;
1807: while (nz--) t[*vj++] -= x[i]*(*v++);
1808: PetscLogFlops(2*nz-1);
1809: }
1810: for (i=m-1; i>=0; i--){
1811: d = *(aa + ai[i]);
1812: v = aa + ai[i] + 1;
1813: vj = aj + ai[i] + 1;
1814: nz = ai[i+1] - ai[i] - 1;
1815: sum = t[i];
1816: while (nz--) sum -= x[*vj++]*(*v++);
1817: PetscLogFlops(2*nz-1);
1818: x[i] = (1-omega)*x[i] + omega*sum/d;
1819: }
1820: }
1821: }
1823: PetscFree(t);
1824: VecRestoreArray(xx,&x);
1825: if (bb != xx) {
1826: VecRestoreArray(bb,&b);
1827: }
1829: return(0);
1830: }