Actual source code: mpibaij.c
1: /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
3: #include src/mat/impls/baij/mpi/mpibaij.h
4: #include src/vec/vecimpl.h
6: EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7: EXTERN int DisAssemble_MPIBAIJ(Mat);
8: EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9: EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10: EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *);
11: EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
12: EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
13: EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
14: EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
15: EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16: EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*);
18: /* UGLY, ugly, ugly
19: When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20: not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21: inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22: converts the entries into single precision and then calls ..._MatScalar() to put them
23: into the single precision data structures.
24: */
25: #if defined(PETSC_USE_MAT_SINGLE)
26: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27: EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28: EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29: EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30: EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31: #else
32: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
33: #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
34: #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
35: #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT
36: #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT
37: #endif
39: int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40: {
41: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
42: int ierr,i;
43: PetscScalar *va,*vb;
44: Vec vtmp;
47:
48: MatGetRowMax(a->A,v);
49: VecGetArray(v,&va);
51: VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);
52: MatGetRowMax(a->B,vtmp);
53: VecGetArray(vtmp,&vb);
55: for (i=0; i<A->m; i++){
56: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
57: }
59: VecRestoreArray(v,&va);
60: VecRestoreArray(vtmp,&vb);
61: VecDestroy(vtmp);
62:
63: return(0);
64: }
66: EXTERN_C_BEGIN
67: int MatStoreValues_MPIBAIJ(Mat mat)
68: {
69: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
70: int ierr;
73: MatStoreValues(aij->A);
74: MatStoreValues(aij->B);
75: return(0);
76: }
77: EXTERN_C_END
79: EXTERN_C_BEGIN
80: int MatRetrieveValues_MPIBAIJ(Mat mat)
81: {
82: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
83: int ierr;
86: MatRetrieveValues(aij->A);
87: MatRetrieveValues(aij->B);
88: return(0);
89: }
90: EXTERN_C_END
92: /*
93: Local utility routine that creates a mapping from the global column
94: number to the local number in the off-diagonal part of the local
95: storage of the matrix. This is done in a non scable way since the
96: length of colmap equals the global matrix length.
97: */
98: static int CreateColmap_MPIBAIJ_Private(Mat mat)
99: {
100: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
101: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
102: int nbs = B->nbs,i,bs=B->bs,ierr;
105: #if defined (PETSC_USE_CTABLE)
106: PetscTableCreate(baij->nbs,&baij->colmap);
107: for (i=0; i<nbs; i++){
108: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);
109: }
110: #else
111: PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);
112: PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
113: PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
114: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
115: #endif
116: return(0);
117: }
119: #define CHUNKSIZE 10
121: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv)
122: {
123:
124: brow = row/bs;
125: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
126: rmax = aimax[brow]; nrow = ailen[brow];
127: bcol = col/bs;
128: ridx = row % bs; cidx = col % bs;
129: low = 0; high = nrow;
130: while (high-low > 3) {
131: t = (low+high)/2;
132: if (rp[t] > bcol) high = t;
133: else low = t;
134: }
135: for (_i=low; _i<high; _i++) {
136: if (rp[_i] > bcol) break;
137: if (rp[_i] == bcol) {
138: bap = ap + bs2*_i + bs*cidx + ridx;
139: if (addv == ADD_VALUES) *bap += value;
140: else *bap = value;
141: goto a_noinsert;
142: }
143: }
144: if (a->nonew == 1) goto a_noinsert;
145: else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
146: if (nrow >= rmax) {
147: /* there is no extra room in row, therefore enlarge */
148: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
149: MatScalar *new_a;
150:
151: if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
152:
153: /* malloc new storage space */
154: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
155: PetscMalloc(len,&new_a);
156: new_j = (int*)(new_a + bs2*new_nz);
157: new_i = new_j + new_nz;
158:
159: /* copy over old data into new slots */
160: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
161: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
162: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
163: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
164: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
165: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
166: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));
167: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
168: aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
169: /* free up old matrix storage */
170: PetscFree(a->a);
171: if (!a->singlemalloc) {
172: PetscFree(a->i);
173: PetscFree(a->j);
174: }
175: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
176: a->singlemalloc = PETSC_TRUE;
177:
178: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
179: rmax = aimax[brow] = aimax[brow] + CHUNKSIZE;
180: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
181: a->maxnz += bs2*CHUNKSIZE;
182: a->reallocs++;
183: a->nz++;
184: }
185: N = nrow++ - 1;
186: /* shift up all the later entries in this row */
187: for (ii=N; ii>=_i; ii--) {
188: rp[ii+1] = rp[ii];
189: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
190: }
191: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); }
192: rp[_i] = bcol;
193: ap[bs2*_i + bs*cidx + ridx] = value;
194: a_noinsert:;
195: ailen[brow] = nrow;
196: }
198: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv)
199: {
200: brow = row/bs;
201: rp = bj + bi[brow]; ap = ba + bs2*bi[brow];
202: rmax = bimax[brow]; nrow = bilen[brow];
203: bcol = col/bs;
204: ridx = row % bs; cidx = col % bs;
205: low = 0; high = nrow;
206: while (high-low > 3) {
207: t = (low+high)/2;
208: if (rp[t] > bcol) high = t;
209: else low = t;
210: }
211: for (_i=low; _i<high; _i++) {
212: if (rp[_i] > bcol) break;
213: if (rp[_i] == bcol) {
214: bap = ap + bs2*_i + bs*cidx + ridx;
215: if (addv == ADD_VALUES) *bap += value;
216: else *bap = value;
217: goto b_noinsert;
218: }
219: }
220: if (b->nonew == 1) goto b_noinsert;
221: else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
222: if (nrow >= rmax) {
223: /* there is no extra room in row, therefore enlarge */
224: int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j;
225: MatScalar *new_a;
226:
227: if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
228:
229: /* malloc new storage space */
230: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int);
231: ierr = PetscMalloc(len,&new_a);
232: new_j = (int*)(new_a + bs2*new_nz);
233: new_i = new_j + new_nz;
234:
235: /* copy over old data into new slots */
236: for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];}
237: for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;}
238: PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));
239: len = (new_nz - CHUNKSIZE - bi[brow] - nrow);
240: PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));
241: PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));
242: PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
243: PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE),
244: ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));
245: /* free up old matrix storage */
246: PetscFree(b->a);
247: if (!b->singlemalloc) {
248: PetscFree(b->i);
249: PetscFree(b->j);
250: }
251: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;
252: b->singlemalloc = PETSC_TRUE;
253:
254: rp = bj + bi[brow]; ap = ba + bs2*bi[brow];
255: rmax = bimax[brow] = bimax[brow] + CHUNKSIZE;
256: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
257: b->maxnz += bs2*CHUNKSIZE;
258: b->reallocs++;
259: b->nz++;
260: }
261: N = nrow++ - 1;
262: /* shift up all the later entries in this row */
263: for (ii=N; ii>=_i; ii--) {
264: rp[ii+1] = rp[ii];
265: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
266: }
267: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));}
268: rp[_i] = bcol;
269: ap[bs2*_i + bs*cidx + ridx] = value;
270: b_noinsert:;
271: bilen[brow] = nrow;
272: }
274: #if defined(PETSC_USE_MAT_SINGLE)
275: int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
276: {
277: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
278: int ierr,i,N = m*n;
279: MatScalar *vsingle;
282: if (N > b->setvalueslen) {
283: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
284: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
285: b->setvalueslen = N;
286: }
287: vsingle = b->setvaluescopy;
289: for (i=0; i<N; i++) {
290: vsingle[i] = v[i];
291: }
292: MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
293: return(0);
294: }
296: int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
297: {
298: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
299: int ierr,i,N = m*n*b->bs2;
300: MatScalar *vsingle;
303: if (N > b->setvalueslen) {
304: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
305: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
306: b->setvalueslen = N;
307: }
308: vsingle = b->setvaluescopy;
309: for (i=0; i<N; i++) {
310: vsingle[i] = v[i];
311: }
312: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
313: return(0);
314: }
316: int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
317: {
318: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
319: int ierr,i,N = m*n;
320: MatScalar *vsingle;
323: if (N > b->setvalueslen) {
324: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
325: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
326: b->setvalueslen = N;
327: }
328: vsingle = b->setvaluescopy;
329: for (i=0; i<N; i++) {
330: vsingle[i] = v[i];
331: }
332: MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
333: return(0);
334: }
336: int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
337: {
338: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
339: int ierr,i,N = m*n*b->bs2;
340: MatScalar *vsingle;
343: if (N > b->setvalueslen) {
344: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
345: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
346: b->setvalueslen = N;
347: }
348: vsingle = b->setvaluescopy;
349: for (i=0; i<N; i++) {
350: vsingle[i] = v[i];
351: }
352: MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
353: return(0);
354: }
355: #endif
357: int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
358: {
359: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
360: MatScalar value;
361: PetscTruth roworiented = baij->roworiented;
362: int ierr,i,j,row,col;
363: int rstart_orig=baij->rstart_bs;
364: int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
365: int cend_orig=baij->cend_bs,bs=baij->bs;
367: /* Some Variables required in the macro */
368: Mat A = baij->A;
369: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
370: int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
371: MatScalar *aa=a->a;
373: Mat B = baij->B;
374: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
375: int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
376: MatScalar *ba=b->a;
378: int *rp,ii,nrow,_i,rmax,N,brow,bcol;
379: int low,high,t,ridx,cidx,bs2=a->bs2;
380: MatScalar *ap,*bap;
383: for (i=0; i<m; i++) {
384: if (im[i] < 0) continue;
385: #if defined(PETSC_USE_BOPT_g)
386: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
387: #endif
388: if (im[i] >= rstart_orig && im[i] < rend_orig) {
389: row = im[i] - rstart_orig;
390: for (j=0; j<n; j++) {
391: if (in[j] >= cstart_orig && in[j] < cend_orig){
392: col = in[j] - cstart_orig;
393: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
394: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
395: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
396: } else if (in[j] < 0) continue;
397: #if defined(PETSC_USE_BOPT_g)
398: else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
399: #endif
400: else {
401: if (mat->was_assembled) {
402: if (!baij->colmap) {
403: CreateColmap_MPIBAIJ_Private(mat);
404: }
405: #if defined (PETSC_USE_CTABLE)
406: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
407: col = col - 1;
408: #else
409: col = baij->colmap[in[j]/bs] - 1;
410: #endif
411: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
412: DisAssemble_MPIBAIJ(mat);
413: col = in[j];
414: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
415: B = baij->B;
416: b = (Mat_SeqBAIJ*)(B)->data;
417: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
418: ba=b->a;
419: } else col += in[j]%bs;
420: } else col = in[j];
421: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
422: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
423: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
424: }
425: }
426: } else {
427: if (!baij->donotstash) {
428: if (roworiented) {
429: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
430: } else {
431: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
432: }
433: }
434: }
435: }
436: return(0);
437: }
439: int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
440: {
441: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
442: MatScalar *value,*barray=baij->barray;
443: PetscTruth roworiented = baij->roworiented;
444: int ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
445: int rend=baij->rend,cstart=baij->cstart,stepval;
446: int cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
447:
449: if(!barray) {
450: ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);
451: baij->barray = barray;
452: }
454: if (roworiented) {
455: stepval = (n-1)*bs;
456: } else {
457: stepval = (m-1)*bs;
458: }
459: for (i=0; i<m; i++) {
460: if (im[i] < 0) continue;
461: #if defined(PETSC_USE_BOPT_g)
462: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
463: #endif
464: if (im[i] >= rstart && im[i] < rend) {
465: row = im[i] - rstart;
466: for (j=0; j<n; j++) {
467: /* If NumCol = 1 then a copy is not required */
468: if ((roworiented) && (n == 1)) {
469: barray = v + i*bs2;
470: } else if((!roworiented) && (m == 1)) {
471: barray = v + j*bs2;
472: } else { /* Here a copy is required */
473: if (roworiented) {
474: value = v + i*(stepval+bs)*bs + j*bs;
475: } else {
476: value = v + j*(stepval+bs)*bs + i*bs;
477: }
478: for (ii=0; ii<bs; ii++,value+=stepval) {
479: for (jj=0; jj<bs; jj++) {
480: *barray++ = *value++;
481: }
482: }
483: barray -=bs2;
484: }
485:
486: if (in[j] >= cstart && in[j] < cend){
487: col = in[j] - cstart;
488: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);
489: }
490: else if (in[j] < 0) continue;
491: #if defined(PETSC_USE_BOPT_g)
492: else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
493: #endif
494: else {
495: if (mat->was_assembled) {
496: if (!baij->colmap) {
497: CreateColmap_MPIBAIJ_Private(mat);
498: }
500: #if defined(PETSC_USE_BOPT_g)
501: #if defined (PETSC_USE_CTABLE)
502: { int data;
503: PetscTableFind(baij->colmap,in[j]+1,&data);
504: if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
505: }
506: #else
507: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
508: #endif
509: #endif
510: #if defined (PETSC_USE_CTABLE)
511: PetscTableFind(baij->colmap,in[j]+1,&col);
512: col = (col - 1)/bs;
513: #else
514: col = (baij->colmap[in[j]] - 1)/bs;
515: #endif
516: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
517: DisAssemble_MPIBAIJ(mat);
518: col = in[j];
519: }
520: }
521: else col = in[j];
522: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);
523: }
524: }
525: } else {
526: if (!baij->donotstash) {
527: if (roworiented) {
528: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
529: } else {
530: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
531: }
532: }
533: }
534: }
535: return(0);
536: }
538: #define HASH_KEY 0.6180339887
539: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
540: /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
541: /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
542: int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
543: {
544: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
545: PetscTruth roworiented = baij->roworiented;
546: int ierr,i,j,row,col;
547: int rstart_orig=baij->rstart_bs;
548: int rend_orig=baij->rend_bs,Nbs=baij->Nbs;
549: int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
550: PetscReal tmp;
551: MatScalar **HD = baij->hd,value;
552: #if defined(PETSC_USE_BOPT_g)
553: int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
554: #endif
558: for (i=0; i<m; i++) {
559: #if defined(PETSC_USE_BOPT_g)
560: if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
561: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
562: #endif
563: row = im[i];
564: if (row >= rstart_orig && row < rend_orig) {
565: for (j=0; j<n; j++) {
566: col = in[j];
567: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
568: /* Look up into the Hash Table */
569: key = (row/bs)*Nbs+(col/bs)+1;
570: h1 = HASH(size,key,tmp);
572:
573: idx = h1;
574: #if defined(PETSC_USE_BOPT_g)
575: insert_ct++;
576: total_ct++;
577: if (HT[idx] != key) {
578: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
579: if (idx == size) {
580: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
581: if (idx == h1) {
582: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
583: }
584: }
585: }
586: #else
587: if (HT[idx] != key) {
588: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
589: if (idx == size) {
590: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
591: if (idx == h1) {
592: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
593: }
594: }
595: }
596: #endif
597: /* A HASH table entry is found, so insert the values at the correct address */
598: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
599: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
600: }
601: } else {
602: if (!baij->donotstash) {
603: if (roworiented) {
604: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
605: } else {
606: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
607: }
608: }
609: }
610: }
611: #if defined(PETSC_USE_BOPT_g)
612: baij->ht_total_ct = total_ct;
613: baij->ht_insert_ct = insert_ct;
614: #endif
615: return(0);
616: }
618: int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
619: {
620: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
621: PetscTruth roworiented = baij->roworiented;
622: int ierr,i,j,ii,jj,row,col;
623: int rstart=baij->rstart ;
624: int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
625: int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
626: PetscReal tmp;
627: MatScalar **HD = baij->hd,*baij_a;
628: MatScalar *v_t,*value;
629: #if defined(PETSC_USE_BOPT_g)
630: int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
631: #endif
632:
635: if (roworiented) {
636: stepval = (n-1)*bs;
637: } else {
638: stepval = (m-1)*bs;
639: }
640: for (i=0; i<m; i++) {
641: #if defined(PETSC_USE_BOPT_g)
642: if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
643: if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
644: #endif
645: row = im[i];
646: v_t = v + i*bs2;
647: if (row >= rstart && row < rend) {
648: for (j=0; j<n; j++) {
649: col = in[j];
651: /* Look up into the Hash Table */
652: key = row*Nbs+col+1;
653: h1 = HASH(size,key,tmp);
654:
655: idx = h1;
656: #if defined(PETSC_USE_BOPT_g)
657: total_ct++;
658: insert_ct++;
659: if (HT[idx] != key) {
660: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
661: if (idx == size) {
662: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
663: if (idx == h1) {
664: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
665: }
666: }
667: }
668: #else
669: if (HT[idx] != key) {
670: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
671: if (idx == size) {
672: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
673: if (idx == h1) {
674: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
675: }
676: }
677: }
678: #endif
679: baij_a = HD[idx];
680: if (roworiented) {
681: /*value = v + i*(stepval+bs)*bs + j*bs;*/
682: /* value = v + (i*(stepval+bs)+j)*bs; */
683: value = v_t;
684: v_t += bs;
685: if (addv == ADD_VALUES) {
686: for (ii=0; ii<bs; ii++,value+=stepval) {
687: for (jj=ii; jj<bs2; jj+=bs) {
688: baij_a[jj] += *value++;
689: }
690: }
691: } else {
692: for (ii=0; ii<bs; ii++,value+=stepval) {
693: for (jj=ii; jj<bs2; jj+=bs) {
694: baij_a[jj] = *value++;
695: }
696: }
697: }
698: } else {
699: value = v + j*(stepval+bs)*bs + i*bs;
700: if (addv == ADD_VALUES) {
701: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
702: for (jj=0; jj<bs; jj++) {
703: baij_a[jj] += *value++;
704: }
705: }
706: } else {
707: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
708: for (jj=0; jj<bs; jj++) {
709: baij_a[jj] = *value++;
710: }
711: }
712: }
713: }
714: }
715: } else {
716: if (!baij->donotstash) {
717: if (roworiented) {
718: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
719: } else {
720: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
721: }
722: }
723: }
724: }
725: #if defined(PETSC_USE_BOPT_g)
726: baij->ht_total_ct = total_ct;
727: baij->ht_insert_ct = insert_ct;
728: #endif
729: return(0);
730: }
732: int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
733: {
734: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
735: int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
736: int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
739: for (i=0; i<m; i++) {
740: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
741: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
742: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
743: row = idxm[i] - bsrstart;
744: for (j=0; j<n; j++) {
745: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
746: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
747: if (idxn[j] >= bscstart && idxn[j] < bscend){
748: col = idxn[j] - bscstart;
749: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
750: } else {
751: if (!baij->colmap) {
752: CreateColmap_MPIBAIJ_Private(mat);
753: }
754: #if defined (PETSC_USE_CTABLE)
755: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
756: data --;
757: #else
758: data = baij->colmap[idxn[j]/bs]-1;
759: #endif
760: if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
761: else {
762: col = data + idxn[j]%bs;
763: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
764: }
765: }
766: }
767: } else {
768: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
769: }
770: }
771: return(0);
772: }
774: int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
775: {
776: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
777: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
778: int ierr,i,bs2=baij->bs2;
779: PetscReal sum = 0.0;
780: MatScalar *v;
783: if (baij->size == 1) {
784: MatNorm(baij->A,type,nrm);
785: } else {
786: if (type == NORM_FROBENIUS) {
787: v = amat->a;
788: for (i=0; i<amat->nz*bs2; i++) {
789: #if defined(PETSC_USE_COMPLEX)
790: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
791: #else
792: sum += (*v)*(*v); v++;
793: #endif
794: }
795: v = bmat->a;
796: for (i=0; i<bmat->nz*bs2; i++) {
797: #if defined(PETSC_USE_COMPLEX)
798: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
799: #else
800: sum += (*v)*(*v); v++;
801: #endif
802: }
803: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);
804: *nrm = sqrt(*nrm);
805: } else {
806: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
807: }
808: }
809: return(0);
810: }
813: /*
814: Creates the hash table, and sets the table
815: This table is created only once.
816: If new entried need to be added to the matrix
817: then the hash table has to be destroyed and
818: recreated.
819: */
820: int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
821: {
822: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
823: Mat A = baij->A,B=baij->B;
824: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
825: int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
826: int size,bs2=baij->bs2,rstart=baij->rstart,ierr;
827: int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
828: int *HT,key;
829: MatScalar **HD;
830: PetscReal tmp;
831: #if defined(PETSC_USE_BOPT_g)
832: int ct=0,max=0;
833: #endif
836: baij->ht_size=(int)(factor*nz);
837: size = baij->ht_size;
839: if (baij->ht) {
840: return(0);
841: }
842:
843: /* Allocate Memory for Hash Table */
844: ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);
845: baij->ht = (int*)(baij->hd + size);
846: HD = baij->hd;
847: HT = baij->ht;
850: PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));
851:
853: /* Loop Over A */
854: for (i=0; i<a->mbs; i++) {
855: for (j=ai[i]; j<ai[i+1]; j++) {
856: row = i+rstart;
857: col = aj[j]+cstart;
858:
859: key = row*Nbs + col + 1;
860: h1 = HASH(size,key,tmp);
861: for (k=0; k<size; k++){
862: if (HT[(h1+k)%size] == 0.0) {
863: HT[(h1+k)%size] = key;
864: HD[(h1+k)%size] = a->a + j*bs2;
865: break;
866: #if defined(PETSC_USE_BOPT_g)
867: } else {
868: ct++;
869: #endif
870: }
871: }
872: #if defined(PETSC_USE_BOPT_g)
873: if (k> max) max = k;
874: #endif
875: }
876: }
877: /* Loop Over B */
878: for (i=0; i<b->mbs; i++) {
879: for (j=bi[i]; j<bi[i+1]; j++) {
880: row = i+rstart;
881: col = garray[bj[j]];
882: key = row*Nbs + col + 1;
883: h1 = HASH(size,key,tmp);
884: for (k=0; k<size; k++){
885: if (HT[(h1+k)%size] == 0.0) {
886: HT[(h1+k)%size] = key;
887: HD[(h1+k)%size] = b->a + j*bs2;
888: break;
889: #if defined(PETSC_USE_BOPT_g)
890: } else {
891: ct++;
892: #endif
893: }
894: }
895: #if defined(PETSC_USE_BOPT_g)
896: if (k> max) max = k;
897: #endif
898: }
899: }
900:
901: /* Print Summary */
902: #if defined(PETSC_USE_BOPT_g)
903: for (i=0,j=0; i<size; i++) {
904: if (HT[i]) {j++;}
905: }
906: PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %dn",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
907: #endif
908: return(0);
909: }
911: int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
912: {
913: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
914: int ierr,nstash,reallocs;
915: InsertMode addv;
918: if (baij->donotstash) {
919: return(0);
920: }
922: /* make sure all processors are either in INSERTMODE or ADDMODE */
923: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
924: if (addv == (ADD_VALUES|INSERT_VALUES)) {
925: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
926: }
927: mat->insertmode = addv; /* in case this processor had no cache */
929: MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);
930: MatStashScatterBegin_Private(&mat->bstash,baij->rowners);
931: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
932: PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
933: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
934: PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
935: return(0);
936: }
938: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
939: int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
940: {
941: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
942: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
943: int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
944: int *row,*col,other_disassembled;
945: PetscTruth r1,r2,r3;
946: MatScalar *val;
947: InsertMode addv = mat->insertmode;
948: #if defined(PETSC_HAVE_DSCPACK)
949: PetscTruth flag;
950: #endif
953: if (!baij->donotstash) {
954: while (1) {
955: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
956: if (!flg) break;
958: for (i=0; i<n;) {
959: /* Now identify the consecutive vals belonging to the same row */
960: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
961: if (j < n) ncols = j-i;
962: else ncols = n-i;
963: /* Now assemble all these values with a single function call */
964: MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);
965: i = j;
966: }
967: }
968: MatStashScatterEnd_Private(&mat->stash);
969: /* Now process the block-stash. Since the values are stashed column-oriented,
970: set the roworiented flag to column oriented, and after MatSetValues()
971: restore the original flags */
972: r1 = baij->roworiented;
973: r2 = a->roworiented;
974: r3 = b->roworiented;
975: baij->roworiented = PETSC_FALSE;
976: a->roworiented = PETSC_FALSE;
977: b->roworiented = PETSC_FALSE;
978: while (1) {
979: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
980: if (!flg) break;
981:
982: for (i=0; i<n;) {
983: /* Now identify the consecutive vals belonging to the same row */
984: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
985: if (j < n) ncols = j-i;
986: else ncols = n-i;
987: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
988: i = j;
989: }
990: }
991: MatStashScatterEnd_Private(&mat->bstash);
992: baij->roworiented = r1;
993: a->roworiented = r2;
994: b->roworiented = r3;
995: }
997: MatAssemblyBegin(baij->A,mode);
998: MatAssemblyEnd(baij->A,mode);
1000: /* determine if any processor has disassembled, if so we must
1001: also disassemble ourselfs, in order that we may reassemble. */
1002: /*
1003: if nonzero structure of submatrix B cannot change then we know that
1004: no processor disassembled thus we can skip this stuff
1005: */
1006: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
1007: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
1008: if (mat->was_assembled && !other_disassembled) {
1009: DisAssemble_MPIBAIJ(mat);
1010: }
1011: }
1013: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1014: MatSetUpMultiply_MPIBAIJ(mat);
1015: }
1016: MatAssemblyBegin(baij->B,mode);
1017: MatAssemblyEnd(baij->B,mode);
1018:
1019: #if defined(PETSC_USE_BOPT_g)
1020: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1021: PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2fn",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1022: baij->ht_total_ct = 0;
1023: baij->ht_insert_ct = 0;
1024: }
1025: #endif
1026: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1027: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
1028: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
1029: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1030: }
1032: if (baij->rowvalues) {
1033: PetscFree(baij->rowvalues);
1034: baij->rowvalues = 0;
1035: }
1036: #if defined(PETSC_HAVE_DSCPACK)
1037: PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);
1038: if (flag) { MatUseDSCPACK_MPIBAIJ(mat); }
1039: #endif
1040: return(0);
1041: }
1043: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
1045: static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1046: {
1047: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1048: int ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1049: PetscTruth isascii,isdraw;
1050: PetscViewer sviewer;
1051: PetscViewerFormat format;
1054: /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...n"); */
1055: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
1056: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1057: if (isascii) {
1058: PetscViewerGetFormat(viewer,&format);
1059: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
1060: MatInfo info;
1061: MPI_Comm_rank(mat->comm,&rank);
1062: MatGetInfo(mat,MAT_LOCAL,&info);
1063: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %dn",
1064: rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1065: baij->bs,(int)info.memory);
1066: MatGetInfo(baij->A,MAT_LOCAL,&info);
1067: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
1068: MatGetInfo(baij->B,MAT_LOCAL,&info);
1069: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
1070: PetscViewerFlush(viewer);
1071: VecScatterView(baij->Mvctx,viewer);
1072: return(0);
1073: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1074: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
1075: return(0);
1076: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1077: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
1078: MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);
1079: #endif
1080: return(0);
1081: }
1082: }
1084: if (isdraw) {
1085: PetscDraw draw;
1086: PetscTruth isnull;
1087: PetscViewerDrawGetDraw(viewer,0,&draw);
1088: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1089: }
1091: if (size == 1) {
1092: PetscObjectSetName((PetscObject)baij->A,mat->name);
1093: MatView(baij->A,viewer);
1094: } else {
1095: /* assemble the entire matrix onto first processor. */
1096: Mat A;
1097: Mat_SeqBAIJ *Aloc;
1098: int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1099: MatScalar *a;
1101: if (!rank) {
1102: MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
1103: } else {
1104: MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
1105: }
1106: PetscLogObjectParent(mat,A);
1108: /* copy over the A part */
1109: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1110: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1111: PetscMalloc(bs*sizeof(int),&rvals);
1113: for (i=0; i<mbs; i++) {
1114: rvals[0] = bs*(baij->rstart + i);
1115: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1116: for (j=ai[i]; j<ai[i+1]; j++) {
1117: col = (baij->cstart+aj[j])*bs;
1118: for (k=0; k<bs; k++) {
1119: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1120: col++; a += bs;
1121: }
1122: }
1123: }
1124: /* copy over the B part */
1125: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1126: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1127: for (i=0; i<mbs; i++) {
1128: rvals[0] = bs*(baij->rstart + i);
1129: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1130: for (j=ai[i]; j<ai[i+1]; j++) {
1131: col = baij->garray[aj[j]]*bs;
1132: for (k=0; k<bs; k++) {
1133: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1134: col++; a += bs;
1135: }
1136: }
1137: }
1138: PetscFree(rvals);
1139: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1140: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1141: /*
1142: Everyone has to call to draw the matrix since the graphics waits are
1143: synchronized across all processors that share the PetscDraw object
1144: */
1145: PetscViewerGetSingleton(viewer,&sviewer);
1146: if (!rank) {
1147: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);
1148: MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1149: }
1150: PetscViewerRestoreSingleton(viewer,&sviewer);
1151: MatDestroy(A);
1152: }
1153: return(0);
1154: }
1156: int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1157: {
1158: int ierr;
1159: PetscTruth isascii,isdraw,issocket,isbinary;
1162: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
1163: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1164: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1165: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1166: if (isascii || isdraw || issocket || isbinary) {
1167: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1168: } else {
1169: SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1170: }
1171: return(0);
1172: }
1174: int MatDestroy_MPIBAIJ(Mat mat)
1175: {
1176: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1177: int ierr;
1180: #if defined(PETSC_USE_LOG)
1181: PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1182: #endif
1183: MatStashDestroy_Private(&mat->stash);
1184: MatStashDestroy_Private(&mat->bstash);
1185: PetscFree(baij->rowners);
1186: MatDestroy(baij->A);
1187: MatDestroy(baij->B);
1188: #if defined (PETSC_USE_CTABLE)
1189: if (baij->colmap) {PetscTableDelete(baij->colmap);}
1190: #else
1191: if (baij->colmap) {PetscFree(baij->colmap);}
1192: #endif
1193: if (baij->garray) {PetscFree(baij->garray);}
1194: if (baij->lvec) {VecDestroy(baij->lvec);}
1195: if (baij->Mvctx) {VecScatterDestroy(baij->Mvctx);}
1196: if (baij->rowvalues) {PetscFree(baij->rowvalues);}
1197: if (baij->barray) {PetscFree(baij->barray);}
1198: if (baij->hd) {PetscFree(baij->hd);}
1199: #if defined(PETSC_USE_MAT_SINGLE)
1200: if (baij->setvaluescopy) {PetscFree(baij->setvaluescopy);}
1201: #endif
1202: PetscFree(baij);
1203: return(0);
1204: }
1206: int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1207: {
1208: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1209: int ierr,nt;
1212: VecGetLocalSize(xx,&nt);
1213: if (nt != A->n) {
1214: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1215: }
1216: VecGetLocalSize(yy,&nt);
1217: if (nt != A->m) {
1218: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1219: }
1220: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1221: (*a->A->ops->mult)(a->A,xx,yy);
1222: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1223: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1224: VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1225: return(0);
1226: }
1228: int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1229: {
1230: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1231: int ierr;
1234: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1235: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1236: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1237: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1238: return(0);
1239: }
1241: int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1242: {
1243: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1244: int ierr;
1247: /* do nondiagonal part */
1248: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1249: /* send it on its way */
1250: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1251: /* do local part */
1252: (*a->A->ops->multtranspose)(a->A,xx,yy);
1253: /* receive remote parts: note this assumes the values are not actually */
1254: /* inserted in yy until the next line, which is true for my implementation*/
1255: /* but is not perhaps always true. */
1256: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1257: return(0);
1258: }
1260: int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261: {
1262: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1263: int ierr;
1266: /* do nondiagonal part */
1267: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1268: /* send it on its way */
1269: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1270: /* do local part */
1271: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1272: /* receive remote parts: note this assumes the values are not actually */
1273: /* inserted in yy until the next line, which is true for my implementation*/
1274: /* but is not perhaps always true. */
1275: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1276: return(0);
1277: }
1279: /*
1280: This only works correctly for square matrices where the subblock A->A is the
1281: diagonal block
1282: */
1283: int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1284: {
1285: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1286: int ierr;
1289: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1290: MatGetDiagonal(a->A,v);
1291: return(0);
1292: }
1294: int MatScale_MPIBAIJ(PetscScalar *aa,Mat A)
1295: {
1296: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1297: int ierr;
1300: MatScale(aa,a->A);
1301: MatScale(aa,a->B);
1302: return(0);
1303: }
1305: int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1306: {
1307: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1308: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1309: int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1310: int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1311: int *cmap,*idx_p,cstart = mat->cstart;
1314: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1315: mat->getrowactive = PETSC_TRUE;
1317: if (!mat->rowvalues && (idx || v)) {
1318: /*
1319: allocate enough space to hold information from the longest row.
1320: */
1321: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1322: int max = 1,mbs = mat->mbs,tmp;
1323: for (i=0; i<mbs; i++) {
1324: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1325: if (max < tmp) { max = tmp; }
1326: }
1327: PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1328: mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1329: }
1330:
1331: if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1332: lrow = row - brstart;
1334: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1335: if (!v) {pvA = 0; pvB = 0;}
1336: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1337: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1338: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1339: nztot = nzA + nzB;
1341: cmap = mat->garray;
1342: if (v || idx) {
1343: if (nztot) {
1344: /* Sort by increasing column numbers, assuming A and B already sorted */
1345: int imark = -1;
1346: if (v) {
1347: *v = v_p = mat->rowvalues;
1348: for (i=0; i<nzB; i++) {
1349: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1350: else break;
1351: }
1352: imark = i;
1353: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1354: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1355: }
1356: if (idx) {
1357: *idx = idx_p = mat->rowindices;
1358: if (imark > -1) {
1359: for (i=0; i<imark; i++) {
1360: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1361: }
1362: } else {
1363: for (i=0; i<nzB; i++) {
1364: if (cmap[cworkB[i]/bs] < cstart)
1365: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1366: else break;
1367: }
1368: imark = i;
1369: }
1370: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1371: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1372: }
1373: } else {
1374: if (idx) *idx = 0;
1375: if (v) *v = 0;
1376: }
1377: }
1378: *nz = nztot;
1379: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1380: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1381: return(0);
1382: }
1384: int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1385: {
1386: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1389: if (baij->getrowactive == PETSC_FALSE) {
1390: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1391: }
1392: baij->getrowactive = PETSC_FALSE;
1393: return(0);
1394: }
1396: int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1397: {
1398: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1401: *bs = baij->bs;
1402: return(0);
1403: }
1405: int MatZeroEntries_MPIBAIJ(Mat A)
1406: {
1407: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1408: int ierr;
1411: MatZeroEntries(l->A);
1412: MatZeroEntries(l->B);
1413: return(0);
1414: }
1416: int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1417: {
1418: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1419: Mat A = a->A,B = a->B;
1420: int ierr;
1421: PetscReal isend[5],irecv[5];
1424: info->block_size = (PetscReal)a->bs;
1425: MatGetInfo(A,MAT_LOCAL,info);
1426: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1427: isend[3] = info->memory; isend[4] = info->mallocs;
1428: MatGetInfo(B,MAT_LOCAL,info);
1429: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1430: isend[3] += info->memory; isend[4] += info->mallocs;
1431: if (flag == MAT_LOCAL) {
1432: info->nz_used = isend[0];
1433: info->nz_allocated = isend[1];
1434: info->nz_unneeded = isend[2];
1435: info->memory = isend[3];
1436: info->mallocs = isend[4];
1437: } else if (flag == MAT_GLOBAL_MAX) {
1438: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1439: info->nz_used = irecv[0];
1440: info->nz_allocated = irecv[1];
1441: info->nz_unneeded = irecv[2];
1442: info->memory = irecv[3];
1443: info->mallocs = irecv[4];
1444: } else if (flag == MAT_GLOBAL_SUM) {
1445: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1446: info->nz_used = irecv[0];
1447: info->nz_allocated = irecv[1];
1448: info->nz_unneeded = irecv[2];
1449: info->memory = irecv[3];
1450: info->mallocs = irecv[4];
1451: } else {
1452: SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1453: }
1454: info->rows_global = (PetscReal)A->M;
1455: info->columns_global = (PetscReal)A->N;
1456: info->rows_local = (PetscReal)A->m;
1457: info->columns_local = (PetscReal)A->N;
1458: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1459: info->fill_ratio_needed = 0;
1460: info->factor_mallocs = 0;
1461: return(0);
1462: }
1464: int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1465: {
1466: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1467: int ierr;
1470: switch (op) {
1471: case MAT_NO_NEW_NONZERO_LOCATIONS:
1472: case MAT_YES_NEW_NONZERO_LOCATIONS:
1473: case MAT_COLUMNS_UNSORTED:
1474: case MAT_COLUMNS_SORTED:
1475: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1476: case MAT_KEEP_ZEROED_ROWS:
1477: case MAT_NEW_NONZERO_LOCATION_ERR:
1478: MatSetOption(a->A,op);
1479: MatSetOption(a->B,op);
1480: break;
1481: case MAT_ROW_ORIENTED:
1482: a->roworiented = PETSC_TRUE;
1483: MatSetOption(a->A,op);
1484: MatSetOption(a->B,op);
1485: break;
1486: case MAT_ROWS_SORTED:
1487: case MAT_ROWS_UNSORTED:
1488: case MAT_YES_NEW_DIAGONALS:
1489: case MAT_USE_SINGLE_PRECISION_SOLVES:
1490: PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignoredn");
1491: break;
1492: case MAT_COLUMN_ORIENTED:
1493: a->roworiented = PETSC_FALSE;
1494: MatSetOption(a->A,op);
1495: MatSetOption(a->B,op);
1496: break;
1497: case MAT_IGNORE_OFF_PROC_ENTRIES:
1498: a->donotstash = PETSC_TRUE;
1499: break;
1500: case MAT_NO_NEW_DIAGONALS:
1501: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1502: case MAT_USE_HASH_TABLE:
1503: a->ht_flag = PETSC_TRUE;
1504: break;
1505: default:
1506: SETERRQ(PETSC_ERR_SUP,"unknown option");
1507: }
1508: return(0);
1509: }
1511: int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1512: {
1513: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1514: Mat_SeqBAIJ *Aloc;
1515: Mat B;
1516: int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1517: int bs=baij->bs,mbs=baij->mbs;
1518: MatScalar *a;
1519:
1521: if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1522: MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1523:
1524: /* copy over the A part */
1525: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1526: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1527: PetscMalloc(bs*sizeof(int),&rvals);
1528:
1529: for (i=0; i<mbs; i++) {
1530: rvals[0] = bs*(baij->rstart + i);
1531: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1532: for (j=ai[i]; j<ai[i+1]; j++) {
1533: col = (baij->cstart+aj[j])*bs;
1534: for (k=0; k<bs; k++) {
1535: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1536: col++; a += bs;
1537: }
1538: }
1539: }
1540: /* copy over the B part */
1541: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1542: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543: for (i=0; i<mbs; i++) {
1544: rvals[0] = bs*(baij->rstart + i);
1545: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1546: for (j=ai[i]; j<ai[i+1]; j++) {
1547: col = baij->garray[aj[j]]*bs;
1548: for (k=0; k<bs; k++) {
1549: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1550: col++; a += bs;
1551: }
1552: }
1553: }
1554: PetscFree(rvals);
1555: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1556: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1557:
1558: if (matout) {
1559: *matout = B;
1560: } else {
1561: MatHeaderCopy(A,B);
1562: }
1563: return(0);
1564: }
1566: int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1567: {
1568: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1569: Mat a = baij->A,b = baij->B;
1570: int ierr,s1,s2,s3;
1573: MatGetLocalSize(mat,&s2,&s3);
1574: if (rr) {
1575: VecGetLocalSize(rr,&s1);
1576: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1577: /* Overlap communication with computation. */
1578: VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1579: }
1580: if (ll) {
1581: VecGetLocalSize(ll,&s1);
1582: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1583: (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1584: }
1585: /* scale the diagonal block */
1586: (*a->ops->diagonalscale)(a,ll,rr);
1588: if (rr) {
1589: /* Do a scatter end and then right scale the off-diagonal block */
1590: VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1591: (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1592: }
1593:
1594: return(0);
1595: }
1597: int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag)
1598: {
1599: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1600: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1601: int *procs,*nprocs,j,idx,nsends,*work,row;
1602: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1603: int *rvalues,tag = A->tag,count,base,slen,n,*source;
1604: int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1605: MPI_Comm comm = A->comm;
1606: MPI_Request *send_waits,*recv_waits;
1607: MPI_Status recv_status,*send_status;
1608: IS istmp;
1609: PetscTruth found;
1610:
1612: ISGetLocalSize(is,&N);
1613: ISGetIndices(is,&rows);
1614:
1615: /* first count number of contributors to each processor */
1616: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
1617: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
1618: procs = nprocs + size;
1619: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
1620: for (i=0; i<N; i++) {
1621: idx = rows[i];
1622: found = PETSC_FALSE;
1623: for (j=0; j<size; j++) {
1624: if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1625: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1626: }
1627: }
1628: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1629: }
1630: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
1631:
1632: /* inform other processors of number of messages and max length*/
1633: ierr = PetscMalloc(2*size*sizeof(int),&work);
1634: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
1635: nmax = work[rank];
1636: nrecvs = work[size+rank];
1637: ierr = PetscFree(work);
1638:
1639: /* post receives: */
1640: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1641: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1642: for (i=0; i<nrecvs; i++) {
1643: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1644: }
1645:
1646: /* do sends:
1647: 1) starts[i] gives the starting index in svalues for stuff going to
1648: the ith processor
1649: */
1650: PetscMalloc((N+1)*sizeof(int),&svalues);
1651: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1652: PetscMalloc((size+1)*sizeof(int),&starts);
1653: starts[0] = 0;
1654: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1655: for (i=0; i<N; i++) {
1656: svalues[starts[owner[i]]++] = rows[i];
1657: }
1658: ISRestoreIndices(is,&rows);
1659:
1660: starts[0] = 0;
1661: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1662: count = 0;
1663: for (i=0; i<size; i++) {
1664: if (procs[i]) {
1665: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
1666: }
1667: }
1668: PetscFree(starts);
1670: base = owners[rank]*bs;
1671:
1672: /* wait on receives */
1673: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1674: source = lens + nrecvs;
1675: count = nrecvs; slen = 0;
1676: while (count) {
1677: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1678: /* unpack receives into our local space */
1679: MPI_Get_count(&recv_status,MPI_INT,&n);
1680: source[imdex] = recv_status.MPI_SOURCE;
1681: lens[imdex] = n;
1682: slen += n;
1683: count--;
1684: }
1685: PetscFree(recv_waits);
1686:
1687: /* move the data into the send scatter */
1688: PetscMalloc((slen+1)*sizeof(int),&lrows);
1689: count = 0;
1690: for (i=0; i<nrecvs; i++) {
1691: values = rvalues + i*nmax;
1692: for (j=0; j<lens[i]; j++) {
1693: lrows[count++] = values[j] - base;
1694: }
1695: }
1696: PetscFree(rvalues);
1697: PetscFree(lens);
1698: PetscFree(owner);
1699: PetscFree(nprocs);
1700:
1701: /* actually zap the local rows */
1702: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1703: PetscLogObjectParent(A,istmp);
1705: /*
1706: Zero the required rows. If the "diagonal block" of the matrix
1707: is square and the user wishes to set the diagonal we use seperate
1708: code so that MatSetValues() is not called for each diagonal allocating
1709: new memory, thus calling lots of mallocs and slowing things down.
1711: Contributed by: Mathew Knepley
1712: */
1713: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1714: MatZeroRows_SeqBAIJ(l->B,istmp,0);
1715: if (diag && (l->A->M == l->A->N)) {
1716: MatZeroRows_SeqBAIJ(l->A,istmp,diag);
1717: } else if (diag) {
1718: MatZeroRows_SeqBAIJ(l->A,istmp,0);
1719: if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1720: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options n
1721: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1722: }
1723: for (i=0; i<slen; i++) {
1724: row = lrows[i] + rstart_bs;
1725: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
1726: }
1727: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1728: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1729: } else {
1730: MatZeroRows_SeqBAIJ(l->A,istmp,0);
1731: }
1733: ISDestroy(istmp);
1734: PetscFree(lrows);
1736: /* wait on sends */
1737: if (nsends) {
1738: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1739: MPI_Waitall(nsends,send_waits,send_status);
1740: PetscFree(send_status);
1741: }
1742: PetscFree(send_waits);
1743: PetscFree(svalues);
1745: return(0);
1746: }
1748: int MatPrintHelp_MPIBAIJ(Mat A)
1749: {
1750: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1751: MPI_Comm comm = A->comm;
1752: static int called = 0;
1753: int ierr;
1756: if (!a->rank) {
1757: MatPrintHelp_SeqBAIJ(a->A);
1758: }
1759: if (called) {return(0);} else called = 1;
1760: (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):n");
1761: (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assemblyn");
1762: return(0);
1763: }
1765: int MatSetUnfactored_MPIBAIJ(Mat A)
1766: {
1767: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1768: int ierr;
1771: MatSetUnfactored(a->A);
1772: return(0);
1773: }
1775: static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1777: int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1778: {
1779: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1780: Mat a,b,c,d;
1781: PetscTruth flg;
1782: int ierr;
1785: PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);
1786: if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1787: a = matA->A; b = matA->B;
1788: c = matB->A; d = matB->B;
1790: MatEqual(a,c,&flg);
1791: if (flg == PETSC_TRUE) {
1792: MatEqual(b,d,&flg);
1793: }
1794: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1795: return(0);
1796: }
1799: int MatSetUpPreallocation_MPIBAIJ(Mat A)
1800: {
1801: int ierr;
1804: MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1805: return(0);
1806: }
1808: /* -------------------------------------------------------------------*/
1809: static struct _MatOps MatOps_Values = {
1810: MatSetValues_MPIBAIJ,
1811: MatGetRow_MPIBAIJ,
1812: MatRestoreRow_MPIBAIJ,
1813: MatMult_MPIBAIJ,
1814: MatMultAdd_MPIBAIJ,
1815: MatMultTranspose_MPIBAIJ,
1816: MatMultTransposeAdd_MPIBAIJ,
1817: 0,
1818: 0,
1819: 0,
1820: 0,
1821: 0,
1822: 0,
1823: 0,
1824: MatTranspose_MPIBAIJ,
1825: MatGetInfo_MPIBAIJ,
1826: MatEqual_MPIBAIJ,
1827: MatGetDiagonal_MPIBAIJ,
1828: MatDiagonalScale_MPIBAIJ,
1829: MatNorm_MPIBAIJ,
1830: MatAssemblyBegin_MPIBAIJ,
1831: MatAssemblyEnd_MPIBAIJ,
1832: 0,
1833: MatSetOption_MPIBAIJ,
1834: MatZeroEntries_MPIBAIJ,
1835: MatZeroRows_MPIBAIJ,
1836: 0,
1837: 0,
1838: 0,
1839: 0,
1840: MatSetUpPreallocation_MPIBAIJ,
1841: 0,
1842: 0,
1843: 0,
1844: 0,
1845: MatDuplicate_MPIBAIJ,
1846: 0,
1847: 0,
1848: 0,
1849: 0,
1850: 0,
1851: MatGetSubMatrices_MPIBAIJ,
1852: MatIncreaseOverlap_MPIBAIJ,
1853: MatGetValues_MPIBAIJ,
1854: 0,
1855: MatPrintHelp_MPIBAIJ,
1856: MatScale_MPIBAIJ,
1857: 0,
1858: 0,
1859: 0,
1860: MatGetBlockSize_MPIBAIJ,
1861: 0,
1862: 0,
1863: 0,
1864: 0,
1865: 0,
1866: 0,
1867: MatSetUnfactored_MPIBAIJ,
1868: 0,
1869: MatSetValuesBlocked_MPIBAIJ,
1870: 0,
1871: MatDestroy_MPIBAIJ,
1872: MatView_MPIBAIJ,
1873: MatGetPetscMaps_Petsc,
1874: 0,
1875: 0,
1876: 0,
1877: 0,
1878: 0,
1879: 0,
1880: MatGetRowMax_MPIBAIJ};
1883: EXTERN_C_BEGIN
1884: int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1885: {
1887: *a = ((Mat_MPIBAIJ *)A->data)->A;
1888: *iscopy = PETSC_FALSE;
1889: return(0);
1890: }
1891: EXTERN_C_END
1893: EXTERN_C_BEGIN
1894: int MatCreate_MPIBAIJ(Mat B)
1895: {
1896: Mat_MPIBAIJ *b;
1897: int ierr;
1898: PetscTruth flg;
1902: PetscNew(Mat_MPIBAIJ,&b);
1903: B->data = (void*)b;
1905: ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1906: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1907: B->mapping = 0;
1908: B->factor = 0;
1909: B->assembled = PETSC_FALSE;
1911: B->insertmode = NOT_SET_VALUES;
1912: MPI_Comm_rank(B->comm,&b->rank);
1913: MPI_Comm_size(B->comm,&b->size);
1915: /* build local table of row and column ownerships */
1916: ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);
1917: PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1918: b->cowners = b->rowners + b->size + 2;
1919: b->rowners_bs = b->cowners + b->size + 2;
1921: /* build cache for off array entries formed */
1922: MatStashCreate_Private(B->comm,1,&B->stash);
1923: b->donotstash = PETSC_FALSE;
1924: b->colmap = PETSC_NULL;
1925: b->garray = PETSC_NULL;
1926: b->roworiented = PETSC_TRUE;
1928: #if defined(PETSC_USE_MAT_SINGLE)
1929: /* stuff for MatSetValues_XXX in single precision */
1930: b->setvalueslen = 0;
1931: b->setvaluescopy = PETSC_NULL;
1932: #endif
1934: /* stuff used in block assembly */
1935: b->barray = 0;
1937: /* stuff used for matrix vector multiply */
1938: b->lvec = 0;
1939: b->Mvctx = 0;
1941: /* stuff for MatGetRow() */
1942: b->rowindices = 0;
1943: b->rowvalues = 0;
1944: b->getrowactive = PETSC_FALSE;
1946: /* hash table stuff */
1947: b->ht = 0;
1948: b->hd = 0;
1949: b->ht_size = 0;
1950: b->ht_flag = PETSC_FALSE;
1951: b->ht_fact = 0;
1952: b->ht_total_ct = 0;
1953: b->ht_insert_ct = 0;
1955: PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);
1956: if (flg) {
1957: PetscReal fact = 1.39;
1958: MatSetOption(B,MAT_USE_HASH_TABLE);
1959: PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);
1960: if (fact <= 1.0) fact = 1.39;
1961: MatMPIBAIJSetHashTableFactor(B,fact);
1962: PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2fn",fact);
1963: }
1964: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1965: "MatStoreValues_MPIBAIJ",
1966: MatStoreValues_MPIBAIJ);
1967: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1968: "MatRetrieveValues_MPIBAIJ",
1969: MatRetrieveValues_MPIBAIJ);
1970: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1971: "MatGetDiagonalBlock_MPIBAIJ",
1972: MatGetDiagonalBlock_MPIBAIJ);
1973: return(0);
1974: }
1975: EXTERN_C_END
1977: /*@C
1978: MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
1979: (block compressed row). For good matrix assembly performance
1980: the user should preallocate the matrix storage by setting the parameters
1981: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1982: performance can be increased by more than a factor of 50.
1984: Collective on Mat
1986: Input Parameters:
1987: + A - the matrix
1988: . bs - size of blockk
1989: . d_nz - number of block nonzeros per block row in diagonal portion of local
1990: submatrix (same for all local rows)
1991: . d_nnz - array containing the number of block nonzeros in the various block rows
1992: of the in diagonal portion of the local (possibly different for each block
1993: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
1994: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
1995: submatrix (same for all local rows).
1996: - o_nnz - array containing the number of nonzeros in the various block rows of the
1997: off-diagonal portion of the local submatrix (possibly different for
1998: each block row) or PETSC_NULL.
2000: Output Parameter:
2003: Options Database Keys:
2004: . -mat_no_unroll - uses code that does not unroll the loops in the
2005: block calculations (much slower)
2006: . -mat_block_size - size of the blocks to use
2008: Notes:
2009: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2010: than it must be used on all processors that share the object for that argument.
2012: Storage Information:
2013: For a square global matrix we define each processor's diagonal portion
2014: to be its local rows and the corresponding columns (a square submatrix);
2015: each processor's off-diagonal portion encompasses the remainder of the
2016: local matrix (a rectangular submatrix).
2018: The user can specify preallocated storage for the diagonal part of
2019: the local submatrix with either d_nz or d_nnz (not both). Set
2020: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2021: memory allocation. Likewise, specify preallocated storage for the
2022: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2024: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2025: the figure below we depict these three local rows and all columns (0-11).
2027: .vb
2028: 0 1 2 3 4 5 6 7 8 9 10 11
2029: -------------------
2030: row 3 | o o o d d d o o o o o o
2031: row 4 | o o o d d d o o o o o o
2032: row 5 | o o o d d d o o o o o o
2033: -------------------
2034: .ve
2035:
2036: Thus, any entries in the d locations are stored in the d (diagonal)
2037: submatrix, and any entries in the o locations are stored in the
2038: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2039: stored simply in the MATSEQBAIJ format for compressed row storage.
2041: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2042: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2043: In general, for PDE problems in which most nonzeros are near the diagonal,
2044: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2045: or you will get TERRIBLE performance; see the users' manual chapter on
2046: matrices.
2048: Level: intermediate
2050: .keywords: matrix, block, aij, compressed row, sparse, parallel
2052: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2053: @*/
2054: int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2055: {
2056: Mat_MPIBAIJ *b;
2057: int ierr,i;
2058: PetscTruth flg2;
2061: PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);
2062: if (!flg2) return(0);
2064: B->preallocated = PETSC_TRUE;
2065: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
2067: if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2068: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2069: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2070: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2071: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2072: if (d_nnz) {
2073: for (i=0; i<B->m/bs; i++) {
2074: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2075: }
2076: }
2077: if (o_nnz) {
2078: for (i=0; i<B->m/bs; i++) {
2079: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2080: }
2081: }
2082:
2083: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
2084: PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);
2085: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
2086: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
2088: b = (Mat_MPIBAIJ*)B->data;
2089: b->bs = bs;
2090: b->bs2 = bs*bs;
2091: b->mbs = B->m/bs;
2092: b->nbs = B->n/bs;
2093: b->Mbs = B->M/bs;
2094: b->Nbs = B->N/bs;
2096: MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
2097: b->rowners[0] = 0;
2098: for (i=2; i<=b->size; i++) {
2099: b->rowners[i] += b->rowners[i-1];
2100: }
2101: b->rstart = b->rowners[b->rank];
2102: b->rend = b->rowners[b->rank+1];
2104: MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
2105: b->cowners[0] = 0;
2106: for (i=2; i<=b->size; i++) {
2107: b->cowners[i] += b->cowners[i-1];
2108: }
2109: b->cstart = b->cowners[b->rank];
2110: b->cend = b->cowners[b->rank+1];
2112: for (i=0; i<=b->size; i++) {
2113: b->rowners_bs[i] = b->rowners[i]*bs;
2114: }
2115: b->rstart_bs = b->rstart*bs;
2116: b->rend_bs = b->rend*bs;
2117: b->cstart_bs = b->cstart*bs;
2118: b->cend_bs = b->cend*bs;
2120: MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);
2121: PetscLogObjectParent(B,b->A);
2122: MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);
2123: PetscLogObjectParent(B,b->B);
2124: MatStashCreate_Private(B->comm,bs,&B->bstash);
2126: return(0);
2127: }
2129: /*@C
2130: MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2131: (block compressed row). For good matrix assembly performance
2132: the user should preallocate the matrix storage by setting the parameters
2133: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2134: performance can be increased by more than a factor of 50.
2136: Collective on MPI_Comm
2138: Input Parameters:
2139: + comm - MPI communicator
2140: . bs - size of blockk
2141: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2142: This value should be the same as the local size used in creating the
2143: y vector for the matrix-vector product y = Ax.
2144: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2145: This value should be the same as the local size used in creating the
2146: x vector for the matrix-vector product y = Ax.
2147: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2148: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2149: . d_nz - number of nonzero blocks per block row in diagonal portion of local
2150: submatrix (same for all local rows)
2151: . d_nnz - array containing the number of nonzero blocks in the various block rows
2152: of the in diagonal portion of the local (possibly different for each block
2153: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
2154: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
2155: submatrix (same for all local rows).
2156: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
2157: off-diagonal portion of the local submatrix (possibly different for
2158: each block row) or PETSC_NULL.
2160: Output Parameter:
2161: . A - the matrix
2163: Options Database Keys:
2164: . -mat_no_unroll - uses code that does not unroll the loops in the
2165: block calculations (much slower)
2166: . -mat_block_size - size of the blocks to use
2168: Notes:
2169: A nonzero block is any block that as 1 or more nonzeros in it
2171: The user MUST specify either the local or global matrix dimensions
2172: (possibly both).
2174: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2175: than it must be used on all processors that share the object for that argument.
2177: Storage Information:
2178: For a square global matrix we define each processor's diagonal portion
2179: to be its local rows and the corresponding columns (a square submatrix);
2180: each processor's off-diagonal portion encompasses the remainder of the
2181: local matrix (a rectangular submatrix).
2183: The user can specify preallocated storage for the diagonal part of
2184: the local submatrix with either d_nz or d_nnz (not both). Set
2185: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2186: memory allocation. Likewise, specify preallocated storage for the
2187: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2189: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2190: the figure below we depict these three local rows and all columns (0-11).
2192: .vb
2193: 0 1 2 3 4 5 6 7 8 9 10 11
2194: -------------------
2195: row 3 | o o o d d d o o o o o o
2196: row 4 | o o o d d d o o o o o o
2197: row 5 | o o o d d d o o o o o o
2198: -------------------
2199: .ve
2200:
2201: Thus, any entries in the d locations are stored in the d (diagonal)
2202: submatrix, and any entries in the o locations are stored in the
2203: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2204: stored simply in the MATSEQBAIJ format for compressed row storage.
2206: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2207: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2208: In general, for PDE problems in which most nonzeros are near the diagonal,
2209: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2210: or you will get TERRIBLE performance; see the users' manual chapter on
2211: matrices.
2213: Level: intermediate
2215: .keywords: matrix, block, aij, compressed row, sparse, parallel
2217: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2218: @*/
2219: int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2220: {
2221: int ierr,size;
2224: MatCreate(comm,m,n,M,N,A);
2225: MPI_Comm_size(comm,&size);
2226: if (size > 1) {
2227: MatSetType(*A,MATMPIBAIJ);
2228: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2229: } else {
2230: MatSetType(*A,MATSEQBAIJ);
2231: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2232: }
2233: return(0);
2234: }
2236: static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2237: {
2238: Mat mat;
2239: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2240: int ierr,len=0;
2243: *newmat = 0;
2244: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
2245: MatSetType(mat,MATMPIBAIJ);
2246: mat->preallocated = PETSC_TRUE;
2247: mat->assembled = PETSC_TRUE;
2248: a = (Mat_MPIBAIJ*)mat->data;
2249: a->bs = oldmat->bs;
2250: a->bs2 = oldmat->bs2;
2251: a->mbs = oldmat->mbs;
2252: a->nbs = oldmat->nbs;
2253: a->Mbs = oldmat->Mbs;
2254: a->Nbs = oldmat->Nbs;
2255:
2256: a->rstart = oldmat->rstart;
2257: a->rend = oldmat->rend;
2258: a->cstart = oldmat->cstart;
2259: a->cend = oldmat->cend;
2260: a->size = oldmat->size;
2261: a->rank = oldmat->rank;
2262: a->donotstash = oldmat->donotstash;
2263: a->roworiented = oldmat->roworiented;
2264: a->rowindices = 0;
2265: a->rowvalues = 0;
2266: a->getrowactive = PETSC_FALSE;
2267: a->barray = 0;
2268: a->rstart_bs = oldmat->rstart_bs;
2269: a->rend_bs = oldmat->rend_bs;
2270: a->cstart_bs = oldmat->cstart_bs;
2271: a->cend_bs = oldmat->cend_bs;
2273: /* hash table stuff */
2274: a->ht = 0;
2275: a->hd = 0;
2276: a->ht_size = 0;
2277: a->ht_flag = oldmat->ht_flag;
2278: a->ht_fact = oldmat->ht_fact;
2279: a->ht_total_ct = 0;
2280: a->ht_insert_ct = 0;
2282: PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
2283: MatStashCreate_Private(matin->comm,1,&mat->stash);
2284: MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);
2285: if (oldmat->colmap) {
2286: #if defined (PETSC_USE_CTABLE)
2287: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2288: #else
2289: PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);
2290: PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2291: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2292: #endif
2293: } else a->colmap = 0;
2294: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2295: PetscMalloc(len*sizeof(int),&a->garray);
2296: PetscLogObjectMemory(mat,len*sizeof(int));
2297: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2298: } else a->garray = 0;
2299:
2300: VecDuplicate(oldmat->lvec,&a->lvec);
2301: PetscLogObjectParent(mat,a->lvec);
2302: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2304: PetscLogObjectParent(mat,a->Mvctx);
2305: MatDuplicate(oldmat->A,cpvalues,&a->A);
2306: PetscLogObjectParent(mat,a->A);
2307: MatDuplicate(oldmat->B,cpvalues,&a->B);
2308: PetscLogObjectParent(mat,a->B);
2309: PetscFListDuplicate(matin->qlist,&mat->qlist);
2310: *newmat = mat;
2311: return(0);
2312: }
2314: #include petscsys.h
2316: EXTERN_C_BEGIN
2317: int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2318: {
2319: Mat A;
2320: int i,nz,ierr,j,rstart,rend,fd;
2321: PetscScalar *vals,*buf;
2322: MPI_Comm comm = ((PetscObject)viewer)->comm;
2323: MPI_Status status;
2324: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2325: int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2326: int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2327: int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2328: int dcount,kmax,k,nzcount,tmp;
2329:
2331: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
2333: MPI_Comm_size(comm,&size);
2334: MPI_Comm_rank(comm,&rank);
2335: if (!rank) {
2336: PetscViewerBinaryGetDescriptor(viewer,&fd);
2337: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2338: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2339: if (header[3] < 0) {
2340: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2341: }
2342: }
2344: MPI_Bcast(header+1,3,MPI_INT,0,comm);
2345: M = header[1]; N = header[2];
2347: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2349: /*
2350: This code adds extra rows to make sure the number of rows is
2351: divisible by the blocksize
2352: */
2353: Mbs = M/bs;
2354: extra_rows = bs - M + bs*(Mbs);
2355: if (extra_rows == bs) extra_rows = 0;
2356: else Mbs++;
2357: if (extra_rows &&!rank) {
2358: PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksizen");
2359: }
2361: /* determine ownership of all rows */
2362: mbs = Mbs/size + ((Mbs % size) > rank);
2363: m = mbs*bs;
2364: ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);
2365: browners = rowners + size + 1;
2366: ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
2367: rowners[0] = 0;
2368: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2369: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2370: rstart = rowners[rank];
2371: rend = rowners[rank+1];
2373: /* distribute row lengths to all processors */
2374: PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);
2375: if (!rank) {
2376: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
2377: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2378: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2379: PetscMalloc(size*sizeof(int),&sndcounts);
2380: for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2381: MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
2382: PetscFree(sndcounts);
2383: } else {
2384: MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
2385: }
2387: if (!rank) {
2388: /* calculate the number of nonzeros on each processor */
2389: PetscMalloc(size*sizeof(int),&procsnz);
2390: PetscMemzero(procsnz,size*sizeof(int));
2391: for (i=0; i<size; i++) {
2392: for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2393: procsnz[i] += rowlengths[j];
2394: }
2395: }
2396: PetscFree(rowlengths);
2397:
2398: /* determine max buffer needed and allocate it */
2399: maxnz = 0;
2400: for (i=0; i<size; i++) {
2401: maxnz = PetscMax(maxnz,procsnz[i]);
2402: }
2403: PetscMalloc(maxnz*sizeof(int),&cols);
2405: /* read in my part of the matrix column indices */
2406: nz = procsnz[0];
2407: ierr = PetscMalloc(nz*sizeof(int),&ibuf);
2408: mycols = ibuf;
2409: if (size == 1) nz -= extra_rows;
2410: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2411: if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2413: /* read in every ones (except the last) and ship off */
2414: for (i=1; i<size-1; i++) {
2415: nz = procsnz[i];
2416: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2417: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
2418: }
2419: /* read in the stuff for the last proc */
2420: if (size != 1) {
2421: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
2422: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2423: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2424: MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
2425: }
2426: PetscFree(cols);
2427: } else {
2428: /* determine buffer space needed for message */
2429: nz = 0;
2430: for (i=0; i<m; i++) {
2431: nz += locrowlens[i];
2432: }
2433: ierr = PetscMalloc(nz*sizeof(int),&ibuf);
2434: mycols = ibuf;
2435: /* receive message of column indices*/
2436: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2437: MPI_Get_count(&status,MPI_INT,&maxnz);
2438: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2439: }
2440:
2441: /* loop over local rows, determining number of off diagonal entries */
2442: ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);
2443: odlens = dlens + (rend-rstart);
2444: ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);
2445: ierr = PetscMemzero(mask,3*Mbs*sizeof(int));
2446: masked1 = mask + Mbs;
2447: masked2 = masked1 + Mbs;
2448: rowcount = 0; nzcount = 0;
2449: for (i=0; i<mbs; i++) {
2450: dcount = 0;
2451: odcount = 0;
2452: for (j=0; j<bs; j++) {
2453: kmax = locrowlens[rowcount];
2454: for (k=0; k<kmax; k++) {
2455: tmp = mycols[nzcount++]/bs;
2456: if (!mask[tmp]) {
2457: mask[tmp] = 1;
2458: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2459: else masked1[dcount++] = tmp;
2460: }
2461: }
2462: rowcount++;
2463: }
2464:
2465: dlens[i] = dcount;
2466: odlens[i] = odcount;
2468: /* zero out the mask elements we set */
2469: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2470: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2471: }
2473: /* create our matrix */
2474: MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2475: A = *newmat;
2476: MatSetOption(A,MAT_COLUMNS_SORTED);
2477:
2478: if (!rank) {
2479: PetscMalloc(maxnz*sizeof(PetscScalar),&buf);
2480: /* read in my part of the matrix numerical values */
2481: nz = procsnz[0];
2482: vals = buf;
2483: mycols = ibuf;
2484: if (size == 1) nz -= extra_rows;
2485: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2486: if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2488: /* insert into matrix */
2489: jj = rstart*bs;
2490: for (i=0; i<m; i++) {
2491: MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2492: mycols += locrowlens[i];
2493: vals += locrowlens[i];
2494: jj++;
2495: }
2496: /* read in other processors (except the last one) and ship out */
2497: for (i=1; i<size-1; i++) {
2498: nz = procsnz[i];
2499: vals = buf;
2500: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2501: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2502: }
2503: /* the last proc */
2504: if (size != 1){
2505: nz = procsnz[i] - extra_rows;
2506: vals = buf;
2507: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2508: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2509: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
2510: }
2511: PetscFree(procsnz);
2512: } else {
2513: /* receive numeric values */
2514: PetscMalloc(nz*sizeof(PetscScalar),&buf);
2516: /* receive message of values*/
2517: vals = buf;
2518: mycols = ibuf;
2519: ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2520: ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2521: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2523: /* insert into matrix */
2524: jj = rstart*bs;
2525: for (i=0; i<m; i++) {
2526: ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2527: mycols += locrowlens[i];
2528: vals += locrowlens[i];
2529: jj++;
2530: }
2531: }
2532: PetscFree(locrowlens);
2533: PetscFree(buf);
2534: PetscFree(ibuf);
2535: PetscFree(rowners);
2536: PetscFree(dlens);
2537: PetscFree(mask);
2538: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2539: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2540: return(0);
2541: }
2542: EXTERN_C_END
2544: /*@
2545: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2547: Input Parameters:
2548: . mat - the matrix
2549: . fact - factor
2551: Collective on Mat
2553: Level: advanced
2555: Notes:
2556: This can also be set by the command line option: -mat_use_hash_table fact
2558: .keywords: matrix, hashtable, factor, HT
2560: .seealso: MatSetOption()
2561: @*/
2562: int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2563: {
2564: Mat_MPIBAIJ *baij;
2565: int ierr;
2566: PetscTruth flg;
2570: PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);
2571: if (!flg) {
2572: SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2573: }
2574: baij = (Mat_MPIBAIJ*)mat->data;
2575: baij->ht_fact = fact;
2576: return(0);
2577: }
2579: int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2580: {
2581: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2583: *Ad = a->A;
2584: *Ao = a->B;
2585: *colmap = a->garray;
2586: return(0);
2587: }