Actual source code: mpiaij.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/inline/spops.h
6: /*
7: Local utility routine that creates a mapping from the global column
8: number to the local number in the off-diagonal part of the local
9: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
10: a slightly higher hash table cost; without it it is not scalable (each processor
11: has an order N integer array but is fast to acess.
12: */
15: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16: {
17: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19: PetscInt n = aij->B->n,i;
22: #if defined (PETSC_USE_CTABLE)
23: PetscTableCreate(n,&aij->colmap);
24: for (i=0; i<n; i++){
25: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
26: }
27: #else
28: PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);
29: PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
30: PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));
31: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32: #endif
33: return(0);
34: }
37: #define CHUNKSIZE 15
38: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39: { \
40: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41: lastcol1 = col;\
42: while (high1-low1 > 5) { \
43: t = (low1+high1)/2; \
44: if (rp1[t] > col) high1 = t; \
45: else low1 = t; \
46: } \
47: for (_i=low1; _i<high1; _i++) { \
48: if (rp1[_i] > col) break; \
49: if (rp1[_i] == col) { \
50: if (addv == ADD_VALUES) ap1[_i] += value; \
51: else ap1[_i] = value; \
52: goto a_noinsert; \
53: } \
54: } \
55: if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56: if (nonew == 1) goto a_noinsert; \
57: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58: MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59: N = nrow1++ - 1; a->nz++; \
60: /* shift up all the later entries in this row */ \
61: for (ii=N; ii>=_i; ii--) { \
62: rp1[ii+1] = rp1[ii]; \
63: ap1[ii+1] = ap1[ii]; \
64: } \
65: rp1[_i] = col; \
66: ap1[_i] = value; \
67: a_noinsert: ; \
68: ailen[row] = nrow1; \
69: }
72: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73: { \
74: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75: lastcol2 = col;\
76: while (high2-low2 > 5) { \
77: t = (low2+high2)/2; \
78: if (rp2[t] > col) high2 = t; \
79: else low2 = t; \
80: } \
81: for (_i=low2; _i<high2; _i++) { \
82: if (rp2[_i] > col) break; \
83: if (rp2[_i] == col) { \
84: if (addv == ADD_VALUES) ap2[_i] += value; \
85: else ap2[_i] = value; \
86: goto b_noinsert; \
87: } \
88: } \
89: if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90: if (nonew == 1) goto b_noinsert; \
91: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92: MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93: N = nrow2++ - 1; b->nz++; \
94: /* shift up all the later entries in this row */ \
95: for (ii=N; ii>=_i; ii--) { \
96: rp2[ii+1] = rp2[ii]; \
97: ap2[ii+1] = ap2[ii]; \
98: } \
99: rp2[_i] = col; \
100: ap2[_i] = value; \
101: b_noinsert: ; \
102: bilen[row] = nrow2; \
103: }
107: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108: {
109: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
110: PetscScalar value;
112: PetscInt i,j,rstart = aij->rstart,rend = aij->rend;
113: PetscInt cstart = aij->cstart,cend = aij->cend,row,col;
114: PetscTruth roworiented = aij->roworiented;
116: /* Some Variables required in the macro */
117: Mat A = aij->A;
118: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
119: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120: PetscScalar *aa = a->a;
121: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122: Mat B = aij->B;
123: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
124: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125: PetscScalar *ba = b->a;
127: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128: PetscInt nonew = a->nonew;
129: PetscScalar *ap1,*ap2;
132: for (i=0; i<m; i++) {
133: if (im[i] < 0) continue;
134: #if defined(PETSC_USE_DEBUG)
135: if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136: #endif
137: if (im[i] >= rstart && im[i] < rend) {
138: row = im[i] - rstart;
139: lastcol1 = -1;
140: rp1 = aj + ai[row];
141: ap1 = aa + ai[row];
142: rmax1 = aimax[row];
143: nrow1 = ailen[row];
144: low1 = 0;
145: high1 = nrow1;
146: lastcol2 = -1;
147: rp2 = bj + bi[row];
148: ap2 = ba + bi[row];
149: rmax2 = bimax[row];
150: nrow2 = bilen[row];
151: low2 = 0;
152: high2 = nrow2;
154: for (j=0; j<n; j++) {
155: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157: if (in[j] >= cstart && in[j] < cend){
158: col = in[j] - cstart;
159: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160: } else if (in[j] < 0) continue;
161: #if defined(PETSC_USE_DEBUG)
162: else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163: #endif
164: else {
165: if (mat->was_assembled) {
166: if (!aij->colmap) {
167: CreateColmap_MPIAIJ_Private(mat);
168: }
169: #if defined (PETSC_USE_CTABLE)
170: PetscTableFind(aij->colmap,in[j]+1,&col);
171: col--;
172: #else
173: col = aij->colmap[in[j]] - 1;
174: #endif
175: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176: DisAssemble_MPIAIJ(mat);
177: col = in[j];
178: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179: B = aij->B;
180: b = (Mat_SeqAIJ*)B->data;
181: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182: rp2 = bj + bi[row];
183: ap2 = ba + bi[row];
184: rmax2 = bimax[row];
185: nrow2 = bilen[row];
186: low2 = 0;
187: high2 = nrow2;
188: bm = aij->B->m;
189: ba = b->a;
190: }
191: } else col = in[j];
192: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193: }
194: }
195: } else {
196: if (!aij->donotstash) {
197: if (roworiented) {
198: if (ignorezeroentries && v[i*n] == 0.0) continue;
199: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
200: } else {
201: if (ignorezeroentries && v[i] == 0.0) continue;
202: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
203: }
204: }
205: }
206: }
207: return(0);
208: }
213: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214: {
215: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
217: PetscInt i,j,rstart = aij->rstart,rend = aij->rend;
218: PetscInt cstart = aij->cstart,cend = aij->cend,row,col;
221: for (i=0; i<m; i++) {
222: if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223: if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224: if (idxm[i] >= rstart && idxm[i] < rend) {
225: row = idxm[i] - rstart;
226: for (j=0; j<n; j++) {
227: if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228: if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229: if (idxn[j] >= cstart && idxn[j] < cend){
230: col = idxn[j] - cstart;
231: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
232: } else {
233: if (!aij->colmap) {
234: CreateColmap_MPIAIJ_Private(mat);
235: }
236: #if defined (PETSC_USE_CTABLE)
237: PetscTableFind(aij->colmap,idxn[j]+1,&col);
238: col --;
239: #else
240: col = aij->colmap[idxn[j]] - 1;
241: #endif
242: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243: else {
244: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
245: }
246: }
247: }
248: } else {
249: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250: }
251: }
252: return(0);
253: }
257: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258: {
259: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
261: PetscInt nstash,reallocs;
262: InsertMode addv;
265: if (aij->donotstash) {
266: return(0);
267: }
269: /* make sure all processors are either in INSERTMODE or ADDMODE */
270: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
271: if (addv == (ADD_VALUES|INSERT_VALUES)) {
272: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273: }
274: mat->insertmode = addv; /* in case this processor had no cache */
276: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
277: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
278: PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));
279: return(0);
280: }
284: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285: {
286: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
287: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
289: PetscMPIInt n;
290: PetscInt i,j,rstart,ncols,flg;
291: PetscInt *row,*col,other_disassembled;
292: PetscScalar *val;
293: InsertMode addv = mat->insertmode;
296: if (!aij->donotstash) {
297: while (1) {
298: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
299: if (!flg) break;
301: for (i=0; i<n;) {
302: /* Now identify the consecutive vals belonging to the same row */
303: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304: if (j < n) ncols = j-i;
305: else ncols = n-i;
306: /* Now assemble all these values with a single function call */
307: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
308: i = j;
309: }
310: }
311: MatStashScatterEnd_Private(&mat->stash);
312: }
313: a->compressedrow.use = PETSC_FALSE;
314: MatAssemblyBegin(aij->A,mode);
315: MatAssemblyEnd(aij->A,mode);
317: /* determine if any processor has disassembled, if so we must
318: also disassemble ourselfs, in order that we may reassemble. */
319: /*
320: if nonzero structure of submatrix B cannot change then we know that
321: no processor disassembled thus we can skip this stuff
322: */
323: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
324: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
325: if (mat->was_assembled && !other_disassembled) {
326: DisAssemble_MPIAIJ(mat);
327: }
328: }
329: /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330: b = (Mat_SeqAIJ *)aij->B->data;
332: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333: MatSetUpMultiply_MPIAIJ(mat);
334: }
335: MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);
336: b->compressedrow.use = PETSC_TRUE;
337: MatAssemblyBegin(aij->B,mode);
338: MatAssemblyEnd(aij->B,mode);
340: if (aij->rowvalues) {
341: PetscFree(aij->rowvalues);
342: aij->rowvalues = 0;
343: }
345: /* used by MatAXPY() */
346: a->xtoy = 0; b->xtoy = 0;
347: a->XtoY = 0; b->XtoY = 0;
349: return(0);
350: }
354: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355: {
356: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
360: MatZeroEntries(l->A);
361: MatZeroEntries(l->B);
362: return(0);
363: }
367: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
368: {
369: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
371: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372: PetscInt i,*owners = l->rowners;
373: PetscInt *nprocs,j,idx,nsends,row;
374: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
375: PetscInt *rvalues,count,base,slen,*source;
376: PetscInt *lens,*lrows,*values,rstart=l->rstart;
377: MPI_Comm comm = A->comm;
378: MPI_Request *send_waits,*recv_waits;
379: MPI_Status recv_status,*send_status;
380: #if defined(PETSC_DEBUG)
381: PetscTruth found = PETSC_FALSE;
382: #endif
385: /* first count number of contributors to each processor */
386: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
387: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
388: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
389: j = 0;
390: for (i=0; i<N; i++) {
391: if (lastidx > (idx = rows[i])) j = 0;
392: lastidx = idx;
393: for (; j<size; j++) {
394: if (idx >= owners[j] && idx < owners[j+1]) {
395: nprocs[2*j]++;
396: nprocs[2*j+1] = 1;
397: owner[i] = j;
398: #if defined(PETSC_DEBUG)
399: found = PETSC_TRUE;
400: #endif
401: break;
402: }
403: }
404: #if defined(PETSC_DEBUG)
405: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
406: found = PETSC_FALSE;
407: #endif
408: }
409: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
411: /* inform other processors of number of messages and max length*/
412: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
414: /* post receives: */
415: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
416: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
417: for (i=0; i<nrecvs; i++) {
418: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
419: }
421: /* do sends:
422: 1) starts[i] gives the starting index in svalues for stuff going to
423: the ith processor
424: */
425: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
426: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
427: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
428: starts[0] = 0;
429: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
430: for (i=0; i<N; i++) {
431: svalues[starts[owner[i]]++] = rows[i];
432: }
434: starts[0] = 0;
435: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
436: count = 0;
437: for (i=0; i<size; i++) {
438: if (nprocs[2*i+1]) {
439: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
440: }
441: }
442: PetscFree(starts);
444: base = owners[rank];
446: /* wait on receives */
447: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
448: source = lens + nrecvs;
449: count = nrecvs; slen = 0;
450: while (count) {
451: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
452: /* unpack receives into our local space */
453: MPI_Get_count(&recv_status,MPIU_INT,&n);
454: source[imdex] = recv_status.MPI_SOURCE;
455: lens[imdex] = n;
456: slen += n;
457: count--;
458: }
459: PetscFree(recv_waits);
460:
461: /* move the data into the send scatter */
462: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
463: count = 0;
464: for (i=0; i<nrecvs; i++) {
465: values = rvalues + i*nmax;
466: for (j=0; j<lens[i]; j++) {
467: lrows[count++] = values[j] - base;
468: }
469: }
470: PetscFree(rvalues);
471: PetscFree(lens);
472: PetscFree(owner);
473: PetscFree(nprocs);
474:
475: /* actually zap the local rows */
476: /*
477: Zero the required rows. If the "diagonal block" of the matrix
478: is square and the user wishes to set the diagonal we use seperate
479: code so that MatSetValues() is not called for each diagonal allocating
480: new memory, thus calling lots of mallocs and slowing things down.
482: Contributed by: Matthew Knepley
483: */
484: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
485: MatZeroRows(l->B,slen,lrows,0.0);
486: if ((diag != 0.0) && (l->A->M == l->A->N)) {
487: MatZeroRows(l->A,slen,lrows,diag);
488: } else if (diag != 0.0) {
489: MatZeroRows(l->A,slen,lrows,0.0);
490: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
491: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
492: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
493: }
494: for (i = 0; i < slen; i++) {
495: row = lrows[i] + rstart;
496: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
497: }
498: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
499: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
500: } else {
501: MatZeroRows(l->A,slen,lrows,0.0);
502: }
503: PetscFree(lrows);
505: /* wait on sends */
506: if (nsends) {
507: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
508: MPI_Waitall(nsends,send_waits,send_status);
509: PetscFree(send_status);
510: }
511: PetscFree(send_waits);
512: PetscFree(svalues);
514: return(0);
515: }
519: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
520: {
521: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
523: PetscInt nt;
526: VecGetLocalSize(xx,&nt);
527: if (nt != A->n) {
528: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
529: }
530: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
531: (*a->A->ops->mult)(a->A,xx,yy);
532: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
533: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
534: return(0);
535: }
539: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
540: {
541: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
545: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
546: (*a->A->ops->multadd)(a->A,xx,yy,zz);
547: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
548: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
549: return(0);
550: }
554: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
555: {
556: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
558: PetscTruth merged;
561: VecScatterGetMerged(a->Mvctx,&merged);
562: /* do nondiagonal part */
563: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
564: if (!merged) {
565: /* send it on its way */
566: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
567: /* do local part */
568: (*a->A->ops->multtranspose)(a->A,xx,yy);
569: /* receive remote parts: note this assumes the values are not actually */
570: /* added in yy until the next line, */
571: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
572: } else {
573: /* do local part */
574: (*a->A->ops->multtranspose)(a->A,xx,yy);
575: /* send it on its way */
576: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
577: /* values actually were received in the Begin() but we need to call this nop */
578: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
579: }
580: return(0);
581: }
586: PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
587: {
588: MPI_Comm comm;
589: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
590: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
591: IS Me,Notme;
593: PetscInt M,N,first,last,*notme,i;
594: PetscMPIInt size;
598: /* Easy test: symmetric diagonal block */
599: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
600: MatIsTranspose(Adia,Bdia,tol,f);
601: if (!*f) return(0);
602: PetscObjectGetComm((PetscObject)Amat,&comm);
603: MPI_Comm_size(comm,&size);
604: if (size == 1) return(0);
606: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
607: MatGetSize(Amat,&M,&N);
608: MatGetOwnershipRange(Amat,&first,&last);
609: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
610: for (i=0; i<first; i++) notme[i] = i;
611: for (i=last; i<M; i++) notme[i-last+first] = i;
612: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
613: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
614: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
615: Aoff = Aoffs[0];
616: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
617: Boff = Boffs[0];
618: MatIsTranspose(Aoff,Boff,tol,f);
619: MatDestroyMatrices(1,&Aoffs);
620: MatDestroyMatrices(1,&Boffs);
621: ISDestroy(Me);
622: ISDestroy(Notme);
624: return(0);
625: }
630: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631: {
632: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
636: /* do nondiagonal part */
637: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
638: /* send it on its way */
639: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
640: /* do local part */
641: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
642: /* receive remote parts */
643: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
644: return(0);
645: }
647: /*
648: This only works correctly for square matrices where the subblock A->A is the
649: diagonal block
650: */
653: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654: {
656: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
659: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660: if (a->rstart != a->cstart || a->rend != a->cend) {
661: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662: }
663: MatGetDiagonal(a->A,v);
664: return(0);
665: }
669: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
670: {
671: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
675: MatScale(a->A,aa);
676: MatScale(a->B,aa);
677: return(0);
678: }
682: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
683: {
684: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
688: #if defined(PETSC_USE_LOG)
689: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
690: #endif
691: MatStashDestroy_Private(&mat->stash);
692: PetscFree(aij->rowners);
693: MatDestroy(aij->A);
694: MatDestroy(aij->B);
695: #if defined (PETSC_USE_CTABLE)
696: if (aij->colmap) {PetscTableDelete(aij->colmap);}
697: #else
698: if (aij->colmap) {PetscFree(aij->colmap);}
699: #endif
700: if (aij->garray) {PetscFree(aij->garray);}
701: if (aij->lvec) {VecDestroy(aij->lvec);}
702: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
703: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
704: PetscFree(aij);
706: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
707: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
708: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
709: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
710: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
711: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
712: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
713: return(0);
714: }
718: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
719: {
720: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
721: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
722: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
723: PetscErrorCode ierr;
724: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
725: int fd;
726: PetscInt nz,header[4],*row_lengths,*range,rlen,i;
727: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
728: PetscScalar *column_values;
731: MPI_Comm_rank(mat->comm,&rank);
732: MPI_Comm_size(mat->comm,&size);
733: nz = A->nz + B->nz;
734: if (!rank) {
735: header[0] = MAT_FILE_COOKIE;
736: header[1] = mat->M;
737: header[2] = mat->N;
738: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);
739: PetscViewerBinaryGetDescriptor(viewer,&fd);
740: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
741: /* get largest number of rows any processor has */
742: rlen = mat->m;
743: PetscMapGetGlobalRange(mat->rmap,&range);
744: for (i=1; i<size; i++) {
745: rlen = PetscMax(rlen,range[i+1] - range[i]);
746: }
747: } else {
748: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);
749: rlen = mat->m;
750: }
752: /* load up the local row counts */
753: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
754: for (i=0; i<mat->m; i++) {
755: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
756: }
758: /* store the row lengths to the file */
759: if (!rank) {
760: MPI_Status status;
761: PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);
762: for (i=1; i<size; i++) {
763: rlen = range[i+1] - range[i];
764: MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);
765: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
766: }
767: } else {
768: MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);
769: }
770: PetscFree(row_lengths);
772: /* load up the local column indices */
773: nzmax = nz; /* )th processor needs space a largest processor needs */
774: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);
775: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
776: cnt = 0;
777: for (i=0; i<mat->m; i++) {
778: for (j=B->i[i]; j<B->i[i+1]; j++) {
779: if ( (col = garray[B->j[j]]) > cstart) break;
780: column_indices[cnt++] = col;
781: }
782: for (k=A->i[i]; k<A->i[i+1]; k++) {
783: column_indices[cnt++] = A->j[k] + cstart;
784: }
785: for (; j<B->i[i+1]; j++) {
786: column_indices[cnt++] = garray[B->j[j]];
787: }
788: }
789: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
791: /* store the column indices to the file */
792: if (!rank) {
793: MPI_Status status;
794: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
795: for (i=1; i<size; i++) {
796: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
797: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
798: MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);
799: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
800: }
801: } else {
802: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
803: MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);
804: }
805: PetscFree(column_indices);
807: /* load up the local column values */
808: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
809: cnt = 0;
810: for (i=0; i<mat->m; i++) {
811: for (j=B->i[i]; j<B->i[i+1]; j++) {
812: if ( garray[B->j[j]] > cstart) break;
813: column_values[cnt++] = B->a[j];
814: }
815: for (k=A->i[i]; k<A->i[i+1]; k++) {
816: column_values[cnt++] = A->a[k];
817: }
818: for (; j<B->i[i+1]; j++) {
819: column_values[cnt++] = B->a[j];
820: }
821: }
822: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
824: /* store the column values to the file */
825: if (!rank) {
826: MPI_Status status;
827: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
828: for (i=1; i<size; i++) {
829: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
830: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
831: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
832: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
833: }
834: } else {
835: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
836: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
837: }
838: PetscFree(column_values);
839: return(0);
840: }
844: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
845: {
846: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
847: PetscErrorCode ierr;
848: PetscMPIInt rank = aij->rank,size = aij->size;
849: PetscTruth isdraw,iascii,isbinary;
850: PetscViewer sviewer;
851: PetscViewerFormat format;
854: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
855: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
856: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
857: if (iascii) {
858: PetscViewerGetFormat(viewer,&format);
859: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
860: MatInfo info;
861: PetscTruth inodes;
863: MPI_Comm_rank(mat->comm,&rank);
864: MatGetInfo(mat,MAT_LOCAL,&info);
865: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
866: if (!inodes) {
867: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
868: rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
869: } else {
870: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
871: rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
872: }
873: MatGetInfo(aij->A,MAT_LOCAL,&info);
874: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
875: MatGetInfo(aij->B,MAT_LOCAL,&info);
876: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
877: PetscViewerFlush(viewer);
878: VecScatterView(aij->Mvctx,viewer);
879: return(0);
880: } else if (format == PETSC_VIEWER_ASCII_INFO) {
881: PetscInt inodecount,inodelimit,*inodes;
882: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
883: if (inodes) {
884: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
885: } else {
886: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
887: }
888: return(0);
889: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
890: return(0);
891: }
892: } else if (isbinary) {
893: if (size == 1) {
894: PetscObjectSetName((PetscObject)aij->A,mat->name);
895: MatView(aij->A,viewer);
896: } else {
897: MatView_MPIAIJ_Binary(mat,viewer);
898: }
899: return(0);
900: } else if (isdraw) {
901: PetscDraw draw;
902: PetscTruth isnull;
903: PetscViewerDrawGetDraw(viewer,0,&draw);
904: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
905: }
907: if (size == 1) {
908: PetscObjectSetName((PetscObject)aij->A,mat->name);
909: MatView(aij->A,viewer);
910: } else {
911: /* assemble the entire matrix onto first processor. */
912: Mat A;
913: Mat_SeqAIJ *Aloc;
914: PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
915: PetscScalar *a;
917: MatCreate(mat->comm,&A);
918: if (!rank) {
919: MatSetSizes(A,M,N,M,N);
920: } else {
921: MatSetSizes(A,0,0,M,N);
922: }
923: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
924: MatSetType(A,MATMPIAIJ);
925: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
926: PetscLogObjectParent(mat,A);
928: /* copy over the A part */
929: Aloc = (Mat_SeqAIJ*)aij->A->data;
930: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
931: row = aij->rstart;
932: for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
933: for (i=0; i<m; i++) {
934: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
935: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
936: }
937: aj = Aloc->j;
938: for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
940: /* copy over the B part */
941: Aloc = (Mat_SeqAIJ*)aij->B->data;
942: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943: row = aij->rstart;
944: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
945: ct = cols;
946: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
947: for (i=0; i<m; i++) {
948: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
949: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
950: }
951: PetscFree(ct);
952: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
953: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
954: /*
955: Everyone has to call to draw the matrix since the graphics waits are
956: synchronized across all processors that share the PetscDraw object
957: */
958: PetscViewerGetSingleton(viewer,&sviewer);
959: if (!rank) {
960: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
961: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
962: }
963: PetscViewerRestoreSingleton(viewer,&sviewer);
964: MatDestroy(A);
965: }
966: return(0);
967: }
971: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
972: {
974: PetscTruth iascii,isdraw,issocket,isbinary;
975:
977: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
978: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
979: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
980: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
981: if (iascii || isdraw || isbinary || issocket) {
982: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
983: } else {
984: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
985: }
986: return(0);
987: }
993: PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
994: {
995: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
997: Vec bb1;
998: PetscScalar mone=-1.0;
1001: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1003: VecDuplicate(bb,&bb1);
1005: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1006: if (flag & SOR_ZERO_INITIAL_GUESS) {
1007: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1008: its--;
1009: }
1010:
1011: while (its--) {
1012: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1013: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1015: /* update rhs: bb1 = bb - B*x */
1016: VecScale(mat->lvec,mone);
1017: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1019: /* local sweep */
1020: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1021:
1022: }
1023: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1024: if (flag & SOR_ZERO_INITIAL_GUESS) {
1025: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1026: its--;
1027: }
1028: while (its--) {
1029: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1030: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1032: /* update rhs: bb1 = bb - B*x */
1033: VecScale(mat->lvec,mone);
1034: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1036: /* local sweep */
1037: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1038:
1039: }
1040: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1041: if (flag & SOR_ZERO_INITIAL_GUESS) {
1042: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1043: its--;
1044: }
1045: while (its--) {
1046: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1047: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1049: /* update rhs: bb1 = bb - B*x */
1050: VecScale(mat->lvec,mone);
1051: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1053: /* local sweep */
1054: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1055:
1056: }
1057: } else {
1058: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1059: }
1061: VecDestroy(bb1);
1062: return(0);
1063: }
1067: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1068: {
1069: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1070: Mat A = mat->A,B = mat->B;
1072: PetscReal isend[5],irecv[5];
1075: info->block_size = 1.0;
1076: MatGetInfo(A,MAT_LOCAL,info);
1077: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1078: isend[3] = info->memory; isend[4] = info->mallocs;
1079: MatGetInfo(B,MAT_LOCAL,info);
1080: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1081: isend[3] += info->memory; isend[4] += info->mallocs;
1082: if (flag == MAT_LOCAL) {
1083: info->nz_used = isend[0];
1084: info->nz_allocated = isend[1];
1085: info->nz_unneeded = isend[2];
1086: info->memory = isend[3];
1087: info->mallocs = isend[4];
1088: } else if (flag == MAT_GLOBAL_MAX) {
1089: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1090: info->nz_used = irecv[0];
1091: info->nz_allocated = irecv[1];
1092: info->nz_unneeded = irecv[2];
1093: info->memory = irecv[3];
1094: info->mallocs = irecv[4];
1095: } else if (flag == MAT_GLOBAL_SUM) {
1096: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1097: info->nz_used = irecv[0];
1098: info->nz_allocated = irecv[1];
1099: info->nz_unneeded = irecv[2];
1100: info->memory = irecv[3];
1101: info->mallocs = irecv[4];
1102: }
1103: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1104: info->fill_ratio_needed = 0;
1105: info->factor_mallocs = 0;
1106: info->rows_global = (double)matin->M;
1107: info->columns_global = (double)matin->N;
1108: info->rows_local = (double)matin->m;
1109: info->columns_local = (double)matin->N;
1111: return(0);
1112: }
1116: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1117: {
1118: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1122: switch (op) {
1123: case MAT_NO_NEW_NONZERO_LOCATIONS:
1124: case MAT_YES_NEW_NONZERO_LOCATIONS:
1125: case MAT_COLUMNS_UNSORTED:
1126: case MAT_COLUMNS_SORTED:
1127: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1128: case MAT_KEEP_ZEROED_ROWS:
1129: case MAT_NEW_NONZERO_LOCATION_ERR:
1130: case MAT_USE_INODES:
1131: case MAT_DO_NOT_USE_INODES:
1132: case MAT_IGNORE_ZERO_ENTRIES:
1133: MatSetOption(a->A,op);
1134: MatSetOption(a->B,op);
1135: break;
1136: case MAT_ROW_ORIENTED:
1137: a->roworiented = PETSC_TRUE;
1138: MatSetOption(a->A,op);
1139: MatSetOption(a->B,op);
1140: break;
1141: case MAT_ROWS_SORTED:
1142: case MAT_ROWS_UNSORTED:
1143: case MAT_YES_NEW_DIAGONALS:
1144: PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));
1145: break;
1146: case MAT_COLUMN_ORIENTED:
1147: a->roworiented = PETSC_FALSE;
1148: MatSetOption(a->A,op);
1149: MatSetOption(a->B,op);
1150: break;
1151: case MAT_IGNORE_OFF_PROC_ENTRIES:
1152: a->donotstash = PETSC_TRUE;
1153: break;
1154: case MAT_NO_NEW_DIAGONALS:
1155: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1156: case MAT_SYMMETRIC:
1157: case MAT_STRUCTURALLY_SYMMETRIC:
1158: case MAT_HERMITIAN:
1159: case MAT_SYMMETRY_ETERNAL:
1160: MatSetOption(a->A,op);
1161: break;
1162: case MAT_NOT_SYMMETRIC:
1163: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1164: case MAT_NOT_HERMITIAN:
1165: case MAT_NOT_SYMMETRY_ETERNAL:
1166: break;
1167: default:
1168: SETERRQ(PETSC_ERR_SUP,"unknown option");
1169: }
1170: return(0);
1171: }
1175: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1176: {
1177: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1178: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1180: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1181: PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1182: PetscInt *cmap,*idx_p;
1185: if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1186: mat->getrowactive = PETSC_TRUE;
1188: if (!mat->rowvalues && (idx || v)) {
1189: /*
1190: allocate enough space to hold information from the longest row.
1191: */
1192: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1193: PetscInt max = 1,tmp;
1194: for (i=0; i<matin->m; i++) {
1195: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1196: if (max < tmp) { max = tmp; }
1197: }
1198: PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1199: mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1200: }
1202: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1203: lrow = row - rstart;
1205: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1206: if (!v) {pvA = 0; pvB = 0;}
1207: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1208: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1209: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1210: nztot = nzA + nzB;
1212: cmap = mat->garray;
1213: if (v || idx) {
1214: if (nztot) {
1215: /* Sort by increasing column numbers, assuming A and B already sorted */
1216: PetscInt imark = -1;
1217: if (v) {
1218: *v = v_p = mat->rowvalues;
1219: for (i=0; i<nzB; i++) {
1220: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1221: else break;
1222: }
1223: imark = i;
1224: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1225: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1226: }
1227: if (idx) {
1228: *idx = idx_p = mat->rowindices;
1229: if (imark > -1) {
1230: for (i=0; i<imark; i++) {
1231: idx_p[i] = cmap[cworkB[i]];
1232: }
1233: } else {
1234: for (i=0; i<nzB; i++) {
1235: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1236: else break;
1237: }
1238: imark = i;
1239: }
1240: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1241: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1242: }
1243: } else {
1244: if (idx) *idx = 0;
1245: if (v) *v = 0;
1246: }
1247: }
1248: *nz = nztot;
1249: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1250: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1251: return(0);
1252: }
1256: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1257: {
1258: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1261: if (!aij->getrowactive) {
1262: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1263: }
1264: aij->getrowactive = PETSC_FALSE;
1265: return(0);
1266: }
1270: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1271: {
1272: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1273: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1275: PetscInt i,j,cstart = aij->cstart;
1276: PetscReal sum = 0.0;
1277: PetscScalar *v;
1280: if (aij->size == 1) {
1281: MatNorm(aij->A,type,norm);
1282: } else {
1283: if (type == NORM_FROBENIUS) {
1284: v = amat->a;
1285: for (i=0; i<amat->nz; i++) {
1286: #if defined(PETSC_USE_COMPLEX)
1287: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1288: #else
1289: sum += (*v)*(*v); v++;
1290: #endif
1291: }
1292: v = bmat->a;
1293: for (i=0; i<bmat->nz; i++) {
1294: #if defined(PETSC_USE_COMPLEX)
1295: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1296: #else
1297: sum += (*v)*(*v); v++;
1298: #endif
1299: }
1300: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1301: *norm = sqrt(*norm);
1302: } else if (type == NORM_1) { /* max column norm */
1303: PetscReal *tmp,*tmp2;
1304: PetscInt *jj,*garray = aij->garray;
1305: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1306: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1307: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1308: *norm = 0.0;
1309: v = amat->a; jj = amat->j;
1310: for (j=0; j<amat->nz; j++) {
1311: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1312: }
1313: v = bmat->a; jj = bmat->j;
1314: for (j=0; j<bmat->nz; j++) {
1315: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1316: }
1317: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1318: for (j=0; j<mat->N; j++) {
1319: if (tmp2[j] > *norm) *norm = tmp2[j];
1320: }
1321: PetscFree(tmp);
1322: PetscFree(tmp2);
1323: } else if (type == NORM_INFINITY) { /* max row norm */
1324: PetscReal ntemp = 0.0;
1325: for (j=0; j<aij->A->m; j++) {
1326: v = amat->a + amat->i[j];
1327: sum = 0.0;
1328: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1329: sum += PetscAbsScalar(*v); v++;
1330: }
1331: v = bmat->a + bmat->i[j];
1332: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1333: sum += PetscAbsScalar(*v); v++;
1334: }
1335: if (sum > ntemp) ntemp = sum;
1336: }
1337: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1338: } else {
1339: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1340: }
1341: }
1342: return(0);
1343: }
1347: PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1348: {
1349: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1350: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1352: PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1353: Mat B;
1354: PetscScalar *array;
1357: if (!matout && M != N) {
1358: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1359: }
1361: MatCreate(A->comm,&B);
1362: MatSetSizes(B,A->n,A->m,N,M);
1363: MatSetType(B,A->type_name);
1364: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
1366: /* copy over the A part */
1367: Aloc = (Mat_SeqAIJ*)a->A->data;
1368: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1369: row = a->rstart;
1370: for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1371: for (i=0; i<m; i++) {
1372: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1373: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1374: }
1375: aj = Aloc->j;
1376: for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1378: /* copy over the B part */
1379: Aloc = (Mat_SeqAIJ*)a->B->data;
1380: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1381: row = a->rstart;
1382: PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);
1383: ct = cols;
1384: for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1385: for (i=0; i<m; i++) {
1386: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1387: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1388: }
1389: PetscFree(ct);
1390: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1391: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1392: if (matout) {
1393: *matout = B;
1394: } else {
1395: MatHeaderCopy(A,B);
1396: }
1397: return(0);
1398: }
1402: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1403: {
1404: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1405: Mat a = aij->A,b = aij->B;
1407: PetscInt s1,s2,s3;
1410: MatGetLocalSize(mat,&s2,&s3);
1411: if (rr) {
1412: VecGetLocalSize(rr,&s1);
1413: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1414: /* Overlap communication with computation. */
1415: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1416: }
1417: if (ll) {
1418: VecGetLocalSize(ll,&s1);
1419: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1420: (*b->ops->diagonalscale)(b,ll,0);
1421: }
1422: /* scale the diagonal block */
1423: (*a->ops->diagonalscale)(a,ll,rr);
1425: if (rr) {
1426: /* Do a scatter end and then right scale the off-diagonal block */
1427: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1428: (*b->ops->diagonalscale)(b,0,aij->lvec);
1429: }
1430:
1431: return(0);
1432: }
1437: PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1438: {
1439: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1443: if (!a->rank) {
1444: MatPrintHelp_SeqAIJ(a->A);
1445: }
1446: return(0);
1447: }
1451: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1452: {
1453: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1457: MatSetBlockSize(a->A,bs);
1458: MatSetBlockSize(a->B,bs);
1459: return(0);
1460: }
1463: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1464: {
1465: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1469: MatSetUnfactored(a->A);
1470: return(0);
1471: }
1475: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1476: {
1477: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1478: Mat a,b,c,d;
1479: PetscTruth flg;
1483: a = matA->A; b = matA->B;
1484: c = matB->A; d = matB->B;
1486: MatEqual(a,c,&flg);
1487: if (flg) {
1488: MatEqual(b,d,&flg);
1489: }
1490: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1491: return(0);
1492: }
1496: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1497: {
1499: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1500: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1503: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1504: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1505: /* because of the column compression in the off-processor part of the matrix a->B,
1506: the number of columns in a->B and b->B may be different, hence we cannot call
1507: the MatCopy() directly on the two parts. If need be, we can provide a more
1508: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1509: then copying the submatrices */
1510: MatCopy_Basic(A,B,str);
1511: } else {
1512: MatCopy(a->A,b->A,str);
1513: MatCopy(a->B,b->B,str);
1514: }
1515: return(0);
1516: }
1520: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1521: {
1525: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1526: return(0);
1527: }
1529: #include petscblaslapack.h
1532: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1533: {
1535: PetscInt i;
1536: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1537: PetscBLASInt bnz,one=1;
1538: Mat_SeqAIJ *x,*y;
1541: if (str == SAME_NONZERO_PATTERN) {
1542: PetscScalar alpha = a;
1543: x = (Mat_SeqAIJ *)xx->A->data;
1544: y = (Mat_SeqAIJ *)yy->A->data;
1545: bnz = (PetscBLASInt)x->nz;
1546: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1547: x = (Mat_SeqAIJ *)xx->B->data;
1548: y = (Mat_SeqAIJ *)yy->B->data;
1549: bnz = (PetscBLASInt)x->nz;
1550: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1551: } else if (str == SUBSET_NONZERO_PATTERN) {
1552: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
1554: x = (Mat_SeqAIJ *)xx->B->data;
1555: y = (Mat_SeqAIJ *)yy->B->data;
1556: if (y->xtoy && y->XtoY != xx->B) {
1557: PetscFree(y->xtoy);
1558: MatDestroy(y->XtoY);
1559: }
1560: if (!y->xtoy) { /* get xtoy */
1561: MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1562: y->XtoY = xx->B;
1563: }
1564: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1565: } else {
1566: MatAXPY_Basic(Y,a,X,str);
1567: }
1568: return(0);
1569: }
1571: EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1575: PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1576: {
1577: #if defined(PETSC_USE_COMPLEX)
1579: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1582: MatConjugate_SeqAIJ(aij->A);
1583: MatConjugate_SeqAIJ(aij->B);
1584: #else
1586: #endif
1587: return(0);
1588: }
1590: /* -------------------------------------------------------------------*/
1591: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1592: MatGetRow_MPIAIJ,
1593: MatRestoreRow_MPIAIJ,
1594: MatMult_MPIAIJ,
1595: /* 4*/ MatMultAdd_MPIAIJ,
1596: MatMultTranspose_MPIAIJ,
1597: MatMultTransposeAdd_MPIAIJ,
1598: 0,
1599: 0,
1600: 0,
1601: /*10*/ 0,
1602: 0,
1603: 0,
1604: MatRelax_MPIAIJ,
1605: MatTranspose_MPIAIJ,
1606: /*15*/ MatGetInfo_MPIAIJ,
1607: MatEqual_MPIAIJ,
1608: MatGetDiagonal_MPIAIJ,
1609: MatDiagonalScale_MPIAIJ,
1610: MatNorm_MPIAIJ,
1611: /*20*/ MatAssemblyBegin_MPIAIJ,
1612: MatAssemblyEnd_MPIAIJ,
1613: 0,
1614: MatSetOption_MPIAIJ,
1615: MatZeroEntries_MPIAIJ,
1616: /*25*/ MatZeroRows_MPIAIJ,
1617: 0,
1618: 0,
1619: 0,
1620: 0,
1621: /*30*/ MatSetUpPreallocation_MPIAIJ,
1622: 0,
1623: 0,
1624: 0,
1625: 0,
1626: /*35*/ MatDuplicate_MPIAIJ,
1627: 0,
1628: 0,
1629: 0,
1630: 0,
1631: /*40*/ MatAXPY_MPIAIJ,
1632: MatGetSubMatrices_MPIAIJ,
1633: MatIncreaseOverlap_MPIAIJ,
1634: MatGetValues_MPIAIJ,
1635: MatCopy_MPIAIJ,
1636: /*45*/ MatPrintHelp_MPIAIJ,
1637: MatScale_MPIAIJ,
1638: 0,
1639: 0,
1640: 0,
1641: /*50*/ MatSetBlockSize_MPIAIJ,
1642: 0,
1643: 0,
1644: 0,
1645: 0,
1646: /*55*/ MatFDColoringCreate_MPIAIJ,
1647: 0,
1648: MatSetUnfactored_MPIAIJ,
1649: 0,
1650: 0,
1651: /*60*/ MatGetSubMatrix_MPIAIJ,
1652: MatDestroy_MPIAIJ,
1653: MatView_MPIAIJ,
1654: MatGetPetscMaps_Petsc,
1655: 0,
1656: /*65*/ 0,
1657: 0,
1658: 0,
1659: 0,
1660: 0,
1661: /*70*/ 0,
1662: 0,
1663: MatSetColoring_MPIAIJ,
1664: #if defined(PETSC_HAVE_ADIC)
1665: MatSetValuesAdic_MPIAIJ,
1666: #else
1667: 0,
1668: #endif
1669: MatSetValuesAdifor_MPIAIJ,
1670: /*75*/ 0,
1671: 0,
1672: 0,
1673: 0,
1674: 0,
1675: /*80*/ 0,
1676: 0,
1677: 0,
1678: 0,
1679: /*84*/ MatLoad_MPIAIJ,
1680: 0,
1681: 0,
1682: 0,
1683: 0,
1684: 0,
1685: /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1686: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1687: MatMatMultNumeric_MPIAIJ_MPIAIJ,
1688: MatPtAP_Basic,
1689: MatPtAPSymbolic_MPIAIJ,
1690: /*95*/ MatPtAPNumeric_MPIAIJ,
1691: 0,
1692: 0,
1693: 0,
1694: 0,
1695: /*100*/0,
1696: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1697: MatPtAPNumeric_MPIAIJ_MPIAIJ,
1698: MatConjugate_MPIAIJ
1699: };
1701: /* ----------------------------------------------------------------------------------------*/
1706: PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1707: {
1708: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1712: MatStoreValues(aij->A);
1713: MatStoreValues(aij->B);
1714: return(0);
1715: }
1721: PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1722: {
1723: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1727: MatRetrieveValues(aij->A);
1728: MatRetrieveValues(aij->B);
1729: return(0);
1730: }
1733: #include petscpc.h
1737: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1738: {
1739: Mat_MPIAIJ *b;
1741: PetscInt i;
1744: B->preallocated = PETSC_TRUE;
1745: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1746: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1747: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1748: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1749: if (d_nnz) {
1750: for (i=0; i<B->m; i++) {
1751: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1752: }
1753: }
1754: if (o_nnz) {
1755: for (i=0; i<B->m; i++) {
1756: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1757: }
1758: }
1759: b = (Mat_MPIAIJ*)B->data;
1760: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
1761: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
1763: return(0);
1764: }
1769: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1770: {
1771: Mat mat;
1772: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1776: *newmat = 0;
1777: MatCreate(matin->comm,&mat);
1778: MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);
1779: MatSetType(mat,matin->type_name);
1780: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
1781: a = (Mat_MPIAIJ*)mat->data;
1782:
1783: mat->factor = matin->factor;
1784: mat->bs = matin->bs;
1785: mat->assembled = PETSC_TRUE;
1786: mat->insertmode = NOT_SET_VALUES;
1787: mat->preallocated = PETSC_TRUE;
1789: a->rstart = oldmat->rstart;
1790: a->rend = oldmat->rend;
1791: a->cstart = oldmat->cstart;
1792: a->cend = oldmat->cend;
1793: a->size = oldmat->size;
1794: a->rank = oldmat->rank;
1795: a->donotstash = oldmat->donotstash;
1796: a->roworiented = oldmat->roworiented;
1797: a->rowindices = 0;
1798: a->rowvalues = 0;
1799: a->getrowactive = PETSC_FALSE;
1801: PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));
1802: MatStashCreate_Private(matin->comm,1,&mat->stash);
1803: if (oldmat->colmap) {
1804: #if defined (PETSC_USE_CTABLE)
1805: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1806: #else
1807: PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);
1808: PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1809: PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));
1810: #endif
1811: } else a->colmap = 0;
1812: if (oldmat->garray) {
1813: PetscInt len;
1814: len = oldmat->B->n;
1815: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
1816: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1817: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
1818: } else a->garray = 0;
1819:
1820: VecDuplicate(oldmat->lvec,&a->lvec);
1821: PetscLogObjectParent(mat,a->lvec);
1822: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1823: PetscLogObjectParent(mat,a->Mvctx);
1824: MatDestroy(a->A);
1825: MatDuplicate(oldmat->A,cpvalues,&a->A);
1826: PetscLogObjectParent(mat,a->A);
1827: MatDestroy(a->B);
1828: MatDuplicate(oldmat->B,cpvalues,&a->B);
1829: PetscLogObjectParent(mat,a->B);
1830: PetscFListDuplicate(matin->qlist,&mat->qlist);
1831: *newmat = mat;
1832: return(0);
1833: }
1835: #include petscsys.h
1839: PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1840: {
1841: Mat A;
1842: PetscScalar *vals,*svals;
1843: MPI_Comm comm = ((PetscObject)viewer)->comm;
1844: MPI_Status status;
1846: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1847: PetscInt i,nz,j,rstart,rend,mmax;
1848: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
1849: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1850: PetscInt cend,cstart,n,*rowners;
1851: int fd;
1854: MPI_Comm_size(comm,&size);
1855: MPI_Comm_rank(comm,&rank);
1856: if (!rank) {
1857: PetscViewerBinaryGetDescriptor(viewer,&fd);
1858: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1859: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1860: }
1862: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1863: M = header[1]; N = header[2];
1864: /* determine ownership of all rows */
1865: m = M/size + ((M % size) > rank);
1866: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
1867: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1869: /* First process needs enough room for process with most rows */
1870: if (!rank) {
1871: mmax = rowners[1];
1872: for (i=2; i<size; i++) {
1873: mmax = PetscMax(mmax,rowners[i]);
1874: }
1875: } else mmax = m;
1877: rowners[0] = 0;
1878: for (i=2; i<=size; i++) {
1879: mmax = PetscMax(mmax,rowners[i]);
1880: rowners[i] += rowners[i-1];
1881: }
1882: rstart = rowners[rank];
1883: rend = rowners[rank+1];
1885: /* distribute row lengths to all processors */
1886: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
1887: if (!rank) {
1888: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
1889: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
1890: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1891: PetscMemzero(procsnz,size*sizeof(PetscInt));
1892: for (j=0; j<m; j++) {
1893: procsnz[0] += ourlens[j];
1894: }
1895: for (i=1; i<size; i++) {
1896: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
1897: /* calculate the number of nonzeros on each processor */
1898: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1899: procsnz[i] += rowlengths[j];
1900: }
1901: MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
1902: }
1903: PetscFree(rowlengths);
1904: } else {
1905: MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);
1906: }
1908: if (!rank) {
1909: /* determine max buffer needed and allocate it */
1910: maxnz = 0;
1911: for (i=0; i<size; i++) {
1912: maxnz = PetscMax(maxnz,procsnz[i]);
1913: }
1914: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1916: /* read in my part of the matrix column indices */
1917: nz = procsnz[0];
1918: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1919: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1921: /* read in every one elses and ship off */
1922: for (i=1; i<size; i++) {
1923: nz = procsnz[i];
1924: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1925: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1926: }
1927: PetscFree(cols);
1928: } else {
1929: /* determine buffer space needed for message */
1930: nz = 0;
1931: for (i=0; i<m; i++) {
1932: nz += ourlens[i];
1933: }
1934: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1936: /* receive message of column indices*/
1937: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1938: MPI_Get_count(&status,MPIU_INT,&maxnz);
1939: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1940: }
1942: /* determine column ownership if matrix is not square */
1943: if (N != M) {
1944: n = N/size + ((N % size) > rank);
1945: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
1946: cstart = cend - n;
1947: } else {
1948: cstart = rstart;
1949: cend = rend;
1950: n = cend - cstart;
1951: }
1953: /* loop over local rows, determining number of off diagonal entries */
1954: PetscMemzero(offlens,m*sizeof(PetscInt));
1955: jj = 0;
1956: for (i=0; i<m; i++) {
1957: for (j=0; j<ourlens[i]; j++) {
1958: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1959: jj++;
1960: }
1961: }
1963: /* create our matrix */
1964: for (i=0; i<m; i++) {
1965: ourlens[i] -= offlens[i];
1966: }
1967: MatCreate(comm,&A);
1968: MatSetSizes(A,m,n,M,N);
1969: MatSetType(A,type);
1970: MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);
1972: MatSetOption(A,MAT_COLUMNS_SORTED);
1973: for (i=0; i<m; i++) {
1974: ourlens[i] += offlens[i];
1975: }
1977: if (!rank) {
1978: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
1980: /* read in my part of the matrix numerical values */
1981: nz = procsnz[0];
1982: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1983:
1984: /* insert into matrix */
1985: jj = rstart;
1986: smycols = mycols;
1987: svals = vals;
1988: for (i=0; i<m; i++) {
1989: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1990: smycols += ourlens[i];
1991: svals += ourlens[i];
1992: jj++;
1993: }
1995: /* read in other processors and ship out */
1996: for (i=1; i<size; i++) {
1997: nz = procsnz[i];
1998: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1999: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2000: }
2001: PetscFree(procsnz);
2002: } else {
2003: /* receive numeric values */
2004: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2006: /* receive message of values*/
2007: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2008: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2009: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2011: /* insert into matrix */
2012: jj = rstart;
2013: smycols = mycols;
2014: svals = vals;
2015: for (i=0; i<m; i++) {
2016: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2017: smycols += ourlens[i];
2018: svals += ourlens[i];
2019: jj++;
2020: }
2021: }
2022: PetscFree2(ourlens,offlens);
2023: PetscFree(vals);
2024: PetscFree(mycols);
2025: PetscFree(rowners);
2027: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2028: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2029: *newmat = A;
2030: return(0);
2031: }
2035: /*
2036: Not great since it makes two copies of the submatrix, first an SeqAIJ
2037: in local and then by concatenating the local matrices the end result.
2038: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2039: */
2040: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2041: {
2043: PetscMPIInt rank,size;
2044: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
2045: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2046: Mat *local,M,Mreuse;
2047: PetscScalar *vwork,*aa;
2048: MPI_Comm comm = mat->comm;
2049: Mat_SeqAIJ *aij;
2053: MPI_Comm_rank(comm,&rank);
2054: MPI_Comm_size(comm,&size);
2056: if (call == MAT_REUSE_MATRIX) {
2057: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2058: if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2059: local = &Mreuse;
2060: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2061: } else {
2062: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2063: Mreuse = *local;
2064: PetscFree(local);
2065: }
2067: /*
2068: m - number of local rows
2069: n - number of columns (same on all processors)
2070: rstart - first row in new global matrix generated
2071: */
2072: MatGetSize(Mreuse,&m,&n);
2073: if (call == MAT_INITIAL_MATRIX) {
2074: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2075: ii = aij->i;
2076: jj = aij->j;
2078: /*
2079: Determine the number of non-zeros in the diagonal and off-diagonal
2080: portions of the matrix in order to do correct preallocation
2081: */
2083: /* first get start and end of "diagonal" columns */
2084: if (csize == PETSC_DECIDE) {
2085: ISGetSize(isrow,&mglobal);
2086: if (mglobal == n) { /* square matrix */
2087: nlocal = m;
2088: } else {
2089: nlocal = n/size + ((n % size) > rank);
2090: }
2091: } else {
2092: nlocal = csize;
2093: }
2094: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2095: rstart = rend - nlocal;
2096: if (rank == size - 1 && rend != n) {
2097: SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2098: }
2100: /* next, compute all the lengths */
2101: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
2102: olens = dlens + m;
2103: for (i=0; i<m; i++) {
2104: jend = ii[i+1] - ii[i];
2105: olen = 0;
2106: dlen = 0;
2107: for (j=0; j<jend; j++) {
2108: if (*jj < rstart || *jj >= rend) olen++;
2109: else dlen++;
2110: jj++;
2111: }
2112: olens[i] = olen;
2113: dlens[i] = dlen;
2114: }
2115: MatCreate(comm,&M);
2116: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
2117: MatSetType(M,mat->type_name);
2118: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2119: PetscFree(dlens);
2120: } else {
2121: PetscInt ml,nl;
2123: M = *newmat;
2124: MatGetLocalSize(M,&ml,&nl);
2125: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2126: MatZeroEntries(M);
2127: /*
2128: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2129: rather than the slower MatSetValues().
2130: */
2131: M->was_assembled = PETSC_TRUE;
2132: M->assembled = PETSC_FALSE;
2133: }
2134: MatGetOwnershipRange(M,&rstart,&rend);
2135: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2136: ii = aij->i;
2137: jj = aij->j;
2138: aa = aij->a;
2139: for (i=0; i<m; i++) {
2140: row = rstart + i;
2141: nz = ii[i+1] - ii[i];
2142: cwork = jj; jj += nz;
2143: vwork = aa; aa += nz;
2144: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2145: }
2147: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2148: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2149: *newmat = M;
2151: /* save submatrix used in processor for next request */
2152: if (call == MAT_INITIAL_MATRIX) {
2153: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2154: PetscObjectDereference((PetscObject)Mreuse);
2155: }
2157: return(0);
2158: }
2163: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2164: {
2165: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2166: PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2167: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2168: const PetscInt *JJ;
2169: PetscScalar *values;
2173: #if defined(PETSC_OPT_g)
2174: if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2175: #endif
2176: PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2177: o_nnz = d_nnz + m;
2179: for (i=0; i<m; i++) {
2180: nnz = I[i+1]- I[i];
2181: JJ = J + I[i];
2182: nnz_max = PetscMax(nnz_max,nnz);
2183: #if defined(PETSC_OPT_g)
2184: if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2185: #endif
2186: for (j=0; j<nnz; j++) {
2187: if (*JJ >= cstart) break;
2188: JJ++;
2189: }
2190: d = 0;
2191: for (; j<nnz; j++) {
2192: if (*JJ++ >= cend) break;
2193: d++;
2194: }
2195: d_nnz[i] = d;
2196: o_nnz[i] = nnz - d;
2197: }
2198: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2199: PetscFree(d_nnz);
2201: if (v) values = (PetscScalar*)v;
2202: else {
2203: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
2204: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
2205: }
2207: MatSetOption(B,MAT_COLUMNS_SORTED);
2208: for (i=0; i<m; i++) {
2209: ii = i + rstart;
2210: nnz = I[i+1]- I[i];
2211: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);
2212: }
2213: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2214: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2215: MatSetOption(B,MAT_COLUMNS_UNSORTED);
2217: if (!v) {
2218: PetscFree(values);
2219: }
2220: return(0);
2221: }
2226: /*@C
2227: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2228: (the default parallel PETSc format).
2230: Collective on MPI_Comm
2232: Input Parameters:
2233: + A - the matrix
2234: . i - the indices into j for the start of each local row (starts with zero)
2235: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2236: - v - optional values in the matrix
2238: Level: developer
2240: .keywords: matrix, aij, compressed row, sparse, parallel
2242: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2243: @*/
2244: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2245: {
2246: PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2249: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);
2250: if (f) {
2251: (*f)(B,i,j,v);
2252: }
2253: return(0);
2254: }
2258: /*@C
2259: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2260: (the default parallel PETSc format). For good matrix assembly performance
2261: the user should preallocate the matrix storage by setting the parameters
2262: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2263: performance can be increased by more than a factor of 50.
2265: Collective on MPI_Comm
2267: Input Parameters:
2268: + A - the matrix
2269: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2270: (same value is used for all local rows)
2271: . d_nnz - array containing the number of nonzeros in the various rows of the
2272: DIAGONAL portion of the local submatrix (possibly different for each row)
2273: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2274: The size of this array is equal to the number of local rows, i.e 'm'.
2275: You must leave room for the diagonal entry even if it is zero.
2276: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2277: submatrix (same value is used for all local rows).
2278: - o_nnz - array containing the number of nonzeros in the various rows of the
2279: OFF-DIAGONAL portion of the local submatrix (possibly different for
2280: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2281: structure. The size of this array is equal to the number
2282: of local rows, i.e 'm'.
2284: If the *_nnz parameter is given then the *_nz parameter is ignored
2286: The AIJ format (also called the Yale sparse matrix format or
2287: compressed row storage (CSR)), is fully compatible with standard Fortran 77
2288: storage. The stored row and column indices begin with zero. See the users manual for details.
2290: The parallel matrix is partitioned such that the first m0 rows belong to
2291: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2292: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2294: The DIAGONAL portion of the local submatrix of a processor can be defined
2295: as the submatrix which is obtained by extraction the part corresponding
2296: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2297: first row that belongs to the processor, and r2 is the last row belonging
2298: to the this processor. This is a square mxm matrix. The remaining portion
2299: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2301: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2303: Example usage:
2304:
2305: Consider the following 8x8 matrix with 34 non-zero values, that is
2306: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2307: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2308: as follows:
2310: .vb
2311: 1 2 0 | 0 3 0 | 0 4
2312: Proc0 0 5 6 | 7 0 0 | 8 0
2313: 9 0 10 | 11 0 0 | 12 0
2314: -------------------------------------
2315: 13 0 14 | 15 16 17 | 0 0
2316: Proc1 0 18 0 | 19 20 21 | 0 0
2317: 0 0 0 | 22 23 0 | 24 0
2318: -------------------------------------
2319: Proc2 25 26 27 | 0 0 28 | 29 0
2320: 30 0 0 | 31 32 33 | 0 34
2321: .ve
2323: This can be represented as a collection of submatrices as:
2325: .vb
2326: A B C
2327: D E F
2328: G H I
2329: .ve
2331: Where the submatrices A,B,C are owned by proc0, D,E,F are
2332: owned by proc1, G,H,I are owned by proc2.
2334: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2335: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2336: The 'M','N' parameters are 8,8, and have the same values on all procs.
2338: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2339: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2340: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2341: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2342: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2343: matrix, ans [DF] as another SeqAIJ matrix.
2345: When d_nz, o_nz parameters are specified, d_nz storage elements are
2346: allocated for every row of the local diagonal submatrix, and o_nz
2347: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2348: One way to choose d_nz and o_nz is to use the max nonzerors per local
2349: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2350: In this case, the values of d_nz,o_nz are:
2351: .vb
2352: proc0 : dnz = 2, o_nz = 2
2353: proc1 : dnz = 3, o_nz = 2
2354: proc2 : dnz = 1, o_nz = 4
2355: .ve
2356: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2357: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2358: for proc3. i.e we are using 12+15+10=37 storage locations to store
2359: 34 values.
2361: When d_nnz, o_nnz parameters are specified, the storage is specified
2362: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2363: In the above case the values for d_nnz,o_nnz are:
2364: .vb
2365: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2366: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2367: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2368: .ve
2369: Here the space allocated is sum of all the above values i.e 34, and
2370: hence pre-allocation is perfect.
2372: Level: intermediate
2374: .keywords: matrix, aij, compressed row, sparse, parallel
2376: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2377: MPIAIJ
2378: @*/
2379: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2380: {
2381: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2384: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
2385: if (f) {
2386: (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
2387: }
2388: return(0);
2389: }
2393: /*@C
2394: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2395: (the default parallel PETSc format). For good matrix assembly performance
2396: the user should preallocate the matrix storage by setting the parameters
2397: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2398: performance can be increased by more than a factor of 50.
2400: Collective on MPI_Comm
2402: Input Parameters:
2403: + comm - MPI communicator
2404: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2405: This value should be the same as the local size used in creating the
2406: y vector for the matrix-vector product y = Ax.
2407: . n - This value should be the same as the local size used in creating the
2408: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2409: calculated if N is given) For square matrices n is almost always m.
2410: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2411: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2412: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2413: (same value is used for all local rows)
2414: . d_nnz - array containing the number of nonzeros in the various rows of the
2415: DIAGONAL portion of the local submatrix (possibly different for each row)
2416: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2417: The size of this array is equal to the number of local rows, i.e 'm'.
2418: You must leave room for the diagonal entry even if it is zero.
2419: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2420: submatrix (same value is used for all local rows).
2421: - o_nnz - array containing the number of nonzeros in the various rows of the
2422: OFF-DIAGONAL portion of the local submatrix (possibly different for
2423: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2424: structure. The size of this array is equal to the number
2425: of local rows, i.e 'm'.
2427: Output Parameter:
2428: . A - the matrix
2430: Notes:
2431: If the *_nnz parameter is given then the *_nz parameter is ignored
2433: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2434: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2435: storage requirements for this matrix.
2437: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2438: processor than it must be used on all processors that share the object for
2439: that argument.
2441: The user MUST specify either the local or global matrix dimensions
2442: (possibly both).
2444: The parallel matrix is partitioned such that the first m0 rows belong to
2445: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2446: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2448: The DIAGONAL portion of the local submatrix of a processor can be defined
2449: as the submatrix which is obtained by extraction the part corresponding
2450: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2451: first row that belongs to the processor, and r2 is the last row belonging
2452: to the this processor. This is a square mxm matrix. The remaining portion
2453: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2455: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2457: When calling this routine with a single process communicator, a matrix of
2458: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
2459: type of communicator, use the construction mechanism:
2460: MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2462: By default, this format uses inodes (identical nodes) when possible.
2463: We search for consecutive rows with the same nonzero structure, thereby
2464: reusing matrix information to achieve increased efficiency.
2466: Options Database Keys:
2467: + -mat_no_inode - Do not use inodes
2468: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2469: - -mat_aij_oneindex - Internally use indexing starting at 1
2470: rather than 0. Note that when calling MatSetValues(),
2471: the user still MUST index entries starting at 0!
2474: Example usage:
2475:
2476: Consider the following 8x8 matrix with 34 non-zero values, that is
2477: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2478: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2479: as follows:
2481: .vb
2482: 1 2 0 | 0 3 0 | 0 4
2483: Proc0 0 5 6 | 7 0 0 | 8 0
2484: 9 0 10 | 11 0 0 | 12 0
2485: -------------------------------------
2486: 13 0 14 | 15 16 17 | 0 0
2487: Proc1 0 18 0 | 19 20 21 | 0 0
2488: 0 0 0 | 22 23 0 | 24 0
2489: -------------------------------------
2490: Proc2 25 26 27 | 0 0 28 | 29 0
2491: 30 0 0 | 31 32 33 | 0 34
2492: .ve
2494: This can be represented as a collection of submatrices as:
2496: .vb
2497: A B C
2498: D E F
2499: G H I
2500: .ve
2502: Where the submatrices A,B,C are owned by proc0, D,E,F are
2503: owned by proc1, G,H,I are owned by proc2.
2505: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2506: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2507: The 'M','N' parameters are 8,8, and have the same values on all procs.
2509: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2510: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2511: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2512: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2513: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2514: matrix, ans [DF] as another SeqAIJ matrix.
2516: When d_nz, o_nz parameters are specified, d_nz storage elements are
2517: allocated for every row of the local diagonal submatrix, and o_nz
2518: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2519: One way to choose d_nz and o_nz is to use the max nonzerors per local
2520: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2521: In this case, the values of d_nz,o_nz are:
2522: .vb
2523: proc0 : dnz = 2, o_nz = 2
2524: proc1 : dnz = 3, o_nz = 2
2525: proc2 : dnz = 1, o_nz = 4
2526: .ve
2527: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2528: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2529: for proc3. i.e we are using 12+15+10=37 storage locations to store
2530: 34 values.
2532: When d_nnz, o_nnz parameters are specified, the storage is specified
2533: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2534: In the above case the values for d_nnz,o_nnz are:
2535: .vb
2536: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2537: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2538: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2539: .ve
2540: Here the space allocated is sum of all the above values i.e 34, and
2541: hence pre-allocation is perfect.
2543: Level: intermediate
2545: .keywords: matrix, aij, compressed row, sparse, parallel
2547: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2548: MPIAIJ
2549: @*/
2550: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2551: {
2553: PetscMPIInt size;
2556: MatCreate(comm,A);
2557: MatSetSizes(*A,m,n,M,N);
2558: MPI_Comm_size(comm,&size);
2559: if (size > 1) {
2560: MatSetType(*A,MATMPIAIJ);
2561: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2562: } else {
2563: MatSetType(*A,MATSEQAIJ);
2564: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2565: }
2566: return(0);
2567: }
2571: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2572: {
2573: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2576: *Ad = a->A;
2577: *Ao = a->B;
2578: *colmap = a->garray;
2579: return(0);
2580: }
2584: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2585: {
2587: PetscInt i;
2588: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2591: if (coloring->ctype == IS_COLORING_LOCAL) {
2592: ISColoringValue *allcolors,*colors;
2593: ISColoring ocoloring;
2595: /* set coloring for diagonal portion */
2596: MatSetColoring_SeqAIJ(a->A,coloring);
2598: /* set coloring for off-diagonal portion */
2599: ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2600: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2601: for (i=0; i<a->B->n; i++) {
2602: colors[i] = allcolors[a->garray[i]];
2603: }
2604: PetscFree(allcolors);
2605: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2606: MatSetColoring_SeqAIJ(a->B,ocoloring);
2607: ISColoringDestroy(ocoloring);
2608: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2609: ISColoringValue *colors;
2610: PetscInt *larray;
2611: ISColoring ocoloring;
2613: /* set coloring for diagonal portion */
2614: PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);
2615: for (i=0; i<a->A->n; i++) {
2616: larray[i] = i + a->cstart;
2617: }
2618: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2619: PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);
2620: for (i=0; i<a->A->n; i++) {
2621: colors[i] = coloring->colors[larray[i]];
2622: }
2623: PetscFree(larray);
2624: ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2625: MatSetColoring_SeqAIJ(a->A,ocoloring);
2626: ISColoringDestroy(ocoloring);
2628: /* set coloring for off-diagonal portion */
2629: PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);
2630: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2631: PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);
2632: for (i=0; i<a->B->n; i++) {
2633: colors[i] = coloring->colors[larray[i]];
2634: }
2635: PetscFree(larray);
2636: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2637: MatSetColoring_SeqAIJ(a->B,ocoloring);
2638: ISColoringDestroy(ocoloring);
2639: } else {
2640: SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2641: }
2643: return(0);
2644: }
2646: #if defined(PETSC_HAVE_ADIC)
2649: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2650: {
2651: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2655: MatSetValuesAdic_SeqAIJ(a->A,advalues);
2656: MatSetValuesAdic_SeqAIJ(a->B,advalues);
2657: return(0);
2658: }
2659: #endif
2663: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2664: {
2665: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2669: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2670: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2671: return(0);
2672: }
2676: /*@C
2677: MatMerge - Creates a single large PETSc matrix by concatinating sequential
2678: matrices from each processor
2680: Collective on MPI_Comm
2682: Input Parameters:
2683: + comm - the communicators the parallel matrix will live on
2684: . inmat - the input sequential matrices
2685: . n - number of local columns (or PETSC_DECIDE)
2686: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2688: Output Parameter:
2689: . outmat - the parallel matrix generated
2691: Level: advanced
2693: Notes: The number of columns of the matrix in EACH processor MUST be the same.
2695: @*/
2696: PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2697: {
2699: PetscInt m,N,i,rstart,nnz,I,*dnz,*onz;
2700: PetscInt *indx;
2701: PetscScalar *values;
2702: PetscMap columnmap,rowmap;
2705: MatGetSize(inmat,&m,&N);
2706: /*
2707: PetscMPIInt rank;
2708: MPI_Comm_rank(comm,&rank);
2709: PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2710: */
2711: if (scall == MAT_INITIAL_MATRIX){
2712: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2713: if (n == PETSC_DECIDE){
2714: PetscMapCreate(comm,&columnmap);
2715: PetscMapSetSize(columnmap,N);
2716: PetscMapSetType(columnmap,MAP_MPI);
2717: PetscMapGetLocalSize(columnmap,&n);
2718: PetscMapDestroy(columnmap);
2719: }
2721: PetscMapCreate(comm,&rowmap);
2722: PetscMapSetLocalSize(rowmap,m);
2723: PetscMapSetType(rowmap,MAP_MPI);
2724: PetscMapGetLocalRange(rowmap,&rstart,0);
2725: PetscMapDestroy(rowmap);
2727: MatPreallocateInitialize(comm,m,n,dnz,onz);
2728: for (i=0;i<m;i++) {
2729: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
2730: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2731: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
2732: }
2733: /* This routine will ONLY return MPIAIJ type matrix */
2734: MatCreate(comm,outmat);
2735: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
2736: MatSetType(*outmat,MATMPIAIJ);
2737: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
2738: MatPreallocateFinalize(dnz,onz);
2739:
2740: } else if (scall == MAT_REUSE_MATRIX){
2741: MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
2742: } else {
2743: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2744: }
2746: for (i=0;i<m;i++) {
2747: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
2748: I = i + rstart;
2749: MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2750: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
2751: }
2752: MatDestroy(inmat);
2753: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2754: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
2756: return(0);
2757: }
2761: PetscErrorCode MatFileSplit(Mat A,char *outfile)
2762: {
2763: PetscErrorCode ierr;
2764: PetscMPIInt rank;
2765: PetscInt m,N,i,rstart,nnz;
2766: size_t len;
2767: const PetscInt *indx;
2768: PetscViewer out;
2769: char *name;
2770: Mat B;
2771: const PetscScalar *values;
2774: MatGetLocalSize(A,&m,0);
2775: MatGetSize(A,0,&N);
2776: /* Should this be the type of the diagonal block of A? */
2777: MatCreate(PETSC_COMM_SELF,&B);
2778: MatSetSizes(B,m,N,m,N);
2779: MatSetType(B,MATSEQAIJ);
2780: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
2781: MatGetOwnershipRange(A,&rstart,0);
2782: for (i=0;i<m;i++) {
2783: MatGetRow(A,i+rstart,&nnz,&indx,&values);
2784: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2785: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2786: }
2787: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2788: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2790: MPI_Comm_rank(A->comm,&rank);
2791: PetscStrlen(outfile,&len);
2792: PetscMalloc((len+5)*sizeof(char),&name);
2793: sprintf(name,"%s.%d",outfile,rank);
2794: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);
2795: PetscFree(name);
2796: MatView(B,out);
2797: PetscViewerDestroy(out);
2798: MatDestroy(B);
2799: return(0);
2800: }
2802: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2805: PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2806: {
2807: PetscErrorCode ierr;
2808: Mat_Merge_SeqsToMPI *merge;
2809: PetscObjectContainer container;
2812: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
2813: if (container) {
2814: PetscObjectContainerGetPointer(container,(void **)&merge);
2815: PetscFree(merge->id_r);
2816: PetscFree(merge->len_s);
2817: PetscFree(merge->len_r);
2818: PetscFree(merge->bi);
2819: PetscFree(merge->bj);
2820: PetscFree(merge->buf_ri);
2821: PetscFree(merge->buf_rj);
2822: PetscMapDestroy(merge->rowmap);
2823: if (merge->coi){PetscFree(merge->coi);}
2824: if (merge->coj){PetscFree(merge->coj);}
2825: if (merge->owners_co){PetscFree(merge->owners_co);}
2826:
2827: PetscObjectContainerDestroy(container);
2828: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
2829: }
2830: PetscFree(merge);
2832: MatDestroy_MPIAIJ(A);
2833: return(0);
2834: }
2836: #include src/mat/utils/freespace.h
2837: #include petscbt.h
2838: static PetscEvent logkey_seqstompinum = 0;
2841: /*@C
2842: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2843: matrices from each processor
2845: Collective on MPI_Comm
2847: Input Parameters:
2848: + comm - the communicators the parallel matrix will live on
2849: . seqmat - the input sequential matrices
2850: . m - number of local rows (or PETSC_DECIDE)
2851: . n - number of local columns (or PETSC_DECIDE)
2852: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2854: Output Parameter:
2855: . mpimat - the parallel matrix generated
2857: Level: advanced
2859: Notes:
2860: The dimensions of the sequential matrix in each processor MUST be the same.
2861: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2862: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2863: @*/
2864: PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2865: {
2866: PetscErrorCode ierr;
2867: MPI_Comm comm=mpimat->comm;
2868: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
2869: PetscMPIInt size,rank,taga,*len_s;
2870: PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2871: PetscInt proc,m;
2872: PetscInt **buf_ri,**buf_rj;
2873: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2874: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
2875: MPI_Request *s_waits,*r_waits;
2876: MPI_Status *status;
2877: MatScalar *aa=a->a,**abuf_r,*ba_i;
2878: Mat_Merge_SeqsToMPI *merge;
2879: PetscObjectContainer container;
2880:
2882: if (!logkey_seqstompinum) {
2883: PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2884: }
2885: PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);
2887: MPI_Comm_size(comm,&size);
2888: MPI_Comm_rank(comm,&rank);
2890: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
2891: if (container) {
2892: PetscObjectContainerGetPointer(container,(void **)&merge);
2893: }
2894: bi = merge->bi;
2895: bj = merge->bj;
2896: buf_ri = merge->buf_ri;
2897: buf_rj = merge->buf_rj;
2899: PetscMalloc(size*sizeof(MPI_Status),&status);
2900: PetscMapGetGlobalRange(merge->rowmap,&owners);
2901: len_s = merge->len_s;
2903: /* send and recv matrix values */
2904: /*-----------------------------*/
2905: PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);
2906: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
2908: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
2909: for (proc=0,k=0; proc<size; proc++){
2910: if (!len_s[proc]) continue;
2911: i = owners[proc];
2912: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
2913: k++;
2914: }
2916: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
2917: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
2918: PetscFree(status);
2920: PetscFree(s_waits);
2921: PetscFree(r_waits);
2923: /* insert mat values of mpimat */
2924: /*----------------------------*/
2925: PetscMalloc(N*sizeof(MatScalar),&ba_i);
2926: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
2927: nextrow = buf_ri_k + merge->nrecv;
2928: nextai = nextrow + merge->nrecv;
2930: for (k=0; k<merge->nrecv; k++){
2931: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2932: nrows = *(buf_ri_k[k]);
2933: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
2934: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
2935: }
2937: /* set values of ba */
2938: PetscMapGetLocalSize(merge->rowmap,&m);
2939: for (i=0; i<m; i++) {
2940: arow = owners[rank] + i;
2941: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
2942: bnzi = bi[i+1] - bi[i];
2943: PetscMemzero(ba_i,bnzi*sizeof(MatScalar));
2945: /* add local non-zero vals of this proc's seqmat into ba */
2946: anzi = ai[arow+1] - ai[arow];
2947: aj = a->j + ai[arow];
2948: aa = a->a + ai[arow];
2949: nextaj = 0;
2950: for (j=0; nextaj<anzi; j++){
2951: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2952: ba_i[j] += aa[nextaj++];
2953: }
2954: }
2956: /* add received vals into ba */
2957: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2958: /* i-th row */
2959: if (i == *nextrow[k]) {
2960: anzi = *(nextai[k]+1) - *nextai[k];
2961: aj = buf_rj[k] + *(nextai[k]);
2962: aa = abuf_r[k] + *(nextai[k]);
2963: nextaj = 0;
2964: for (j=0; nextaj<anzi; j++){
2965: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2966: ba_i[j] += aa[nextaj++];
2967: }
2968: }
2969: nextrow[k]++; nextai[k]++;
2970: }
2971: }
2972: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
2973: }
2974: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
2975: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
2977: PetscFree(abuf_r);
2978: PetscFree(ba_i);
2979: PetscFree(buf_ri_k);
2980: PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);
2981: return(0);
2982: }
2984: static PetscEvent logkey_seqstompisym = 0;
2987: PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2988: {
2989: PetscErrorCode ierr;
2990: Mat B_mpi;
2991: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
2992: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
2993: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
2994: PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
2995: PetscInt len,proc,*dnz,*onz;
2996: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
2997: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
2998: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
2999: MPI_Status *status;
3000: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
3001: PetscBT lnkbt;
3002: Mat_Merge_SeqsToMPI *merge;
3003: PetscObjectContainer container;
3006: if (!logkey_seqstompisym) {
3007: PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3008: }
3009: PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);
3011: /* make sure it is a PETSc comm */
3012: PetscCommDuplicate(comm,&comm,PETSC_NULL);
3013: MPI_Comm_size(comm,&size);
3014: MPI_Comm_rank(comm,&rank);
3015:
3016: PetscNew(Mat_Merge_SeqsToMPI,&merge);
3017: PetscMalloc(size*sizeof(MPI_Status),&status);
3019: /* determine row ownership */
3020: /*---------------------------------------------------------*/
3021: PetscMapCreate(comm,&merge->rowmap);
3022: if (m == PETSC_DECIDE) {
3023: PetscMapSetSize(merge->rowmap,M);
3024: } else {
3025: PetscMapSetLocalSize(merge->rowmap,m);
3026: }
3027: PetscMapSetType(merge->rowmap,MAP_MPI);
3028: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
3029: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
3030:
3031: if (m == PETSC_DECIDE) {PetscMapGetLocalSize(merge->rowmap,&m); }
3032: PetscMapGetGlobalRange(merge->rowmap,&owners);
3034: /* determine the number of messages to send, their lengths */
3035: /*---------------------------------------------------------*/
3036: len_s = merge->len_s;
3038: len = 0; /* length of buf_si[] */
3039: merge->nsend = 0;
3040: for (proc=0; proc<size; proc++){
3041: len_si[proc] = 0;
3042: if (proc == rank){
3043: len_s[proc] = 0;
3044: } else {
3045: len_si[proc] = owners[proc+1] - owners[proc] + 1;
3046: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3047: }
3048: if (len_s[proc]) {
3049: merge->nsend++;
3050: nrows = 0;
3051: for (i=owners[proc]; i<owners[proc+1]; i++){
3052: if (ai[i+1] > ai[i]) nrows++;
3053: }
3054: len_si[proc] = 2*(nrows+1);
3055: len += len_si[proc];
3056: }
3057: }
3059: /* determine the number and length of messages to receive for ij-structure */
3060: /*-------------------------------------------------------------------------*/
3061: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
3062: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
3064: /* post the Irecv of j-structure */
3065: /*-------------------------------*/
3066: PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);
3067: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
3069: /* post the Isend of j-structure */
3070: /*--------------------------------*/
3071: PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);
3072: sj_waits = si_waits + merge->nsend;
3074: for (proc=0, k=0; proc<size; proc++){
3075: if (!len_s[proc]) continue;
3076: i = owners[proc];
3077: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
3078: k++;
3079: }
3081: /* receives and sends of j-structure are complete */
3082: /*------------------------------------------------*/
3083: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
3084: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
3085:
3086: /* send and recv i-structure */
3087: /*---------------------------*/
3088: PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);
3089: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
3090:
3091: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
3092: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
3093: for (proc=0,k=0; proc<size; proc++){
3094: if (!len_s[proc]) continue;
3095: /* form outgoing message for i-structure:
3096: buf_si[0]: nrows to be sent
3097: [1:nrows]: row index (global)
3098: [nrows+1:2*nrows+1]: i-structure index
3099: */
3100: /*-------------------------------------------*/
3101: nrows = len_si[proc]/2 - 1;
3102: buf_si_i = buf_si + nrows+1;
3103: buf_si[0] = nrows;
3104: buf_si_i[0] = 0;
3105: nrows = 0;
3106: for (i=owners[proc]; i<owners[proc+1]; i++){
3107: anzi = ai[i+1] - ai[i];
3108: if (anzi) {
3109: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3110: buf_si[nrows+1] = i-owners[proc]; /* local row index */
3111: nrows++;
3112: }
3113: }
3114: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
3115: k++;
3116: buf_si += len_si[proc];
3117: }
3119: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
3120: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
3122: PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));
3123: for (i=0; i<merge->nrecv; i++){
3124: PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));
3125: }
3127: PetscFree(len_si);
3128: PetscFree(len_ri);
3129: PetscFree(rj_waits);
3130: PetscFree(si_waits);
3131: PetscFree(ri_waits);
3132: PetscFree(buf_s);
3133: PetscFree(status);
3135: /* compute a local seq matrix in each processor */
3136: /*----------------------------------------------*/
3137: /* allocate bi array and free space for accumulating nonzero column info */
3138: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
3139: bi[0] = 0;
3141: /* create and initialize a linked list */
3142: nlnk = N+1;
3143: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
3144:
3145: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3146: len = 0;
3147: len = ai[owners[rank+1]] - ai[owners[rank]];
3148: GetMoreSpace((PetscInt)(2*len+1),&free_space);
3149: current_space = free_space;
3151: /* determine symbolic info for each local row */
3152: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3153: nextrow = buf_ri_k + merge->nrecv;
3154: nextai = nextrow + merge->nrecv;
3155: for (k=0; k<merge->nrecv; k++){
3156: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3157: nrows = *buf_ri_k[k];
3158: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
3159: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3160: }
3162: MatPreallocateInitialize(comm,m,n,dnz,onz);
3163: len = 0;
3164: for (i=0;i<m;i++) {
3165: bnzi = 0;
3166: /* add local non-zero cols of this proc's seqmat into lnk */
3167: arow = owners[rank] + i;
3168: anzi = ai[arow+1] - ai[arow];
3169: aj = a->j + ai[arow];
3170: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3171: bnzi += nlnk;
3172: /* add received col data into lnk */
3173: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3174: if (i == *nextrow[k]) { /* i-th row */
3175: anzi = *(nextai[k]+1) - *nextai[k];
3176: aj = buf_rj[k] + *nextai[k];
3177: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3178: bnzi += nlnk;
3179: nextrow[k]++; nextai[k]++;
3180: }
3181: }
3182: if (len < bnzi) len = bnzi; /* =max(bnzi) */
3184: /* if free space is not available, make more free space */
3185: if (current_space->local_remaining<bnzi) {
3186: GetMoreSpace(current_space->total_array_size,¤t_space);
3187: nspacedouble++;
3188: }
3189: /* copy data into free space, then initialize lnk */
3190: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
3191: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
3193: current_space->array += bnzi;
3194: current_space->local_used += bnzi;
3195: current_space->local_remaining -= bnzi;
3196:
3197: bi[i+1] = bi[i] + bnzi;
3198: }
3199:
3200: PetscFree(buf_ri_k);
3202: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
3203: MakeSpaceContiguous(&free_space,bj);
3204: PetscLLDestroy(lnk,lnkbt);
3206: /* create symbolic parallel matrix B_mpi */
3207: /*---------------------------------------*/
3208: MatCreate(comm,&B_mpi);
3209: if (n==PETSC_DECIDE) {
3210: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
3211: } else {
3212: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3213: }
3214: MatSetType(B_mpi,MATMPIAIJ);
3215: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
3216: MatPreallocateFinalize(dnz,onz);
3218: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3219: B_mpi->assembled = PETSC_FALSE;
3220: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
3221: merge->bi = bi;
3222: merge->bj = bj;
3223: merge->buf_ri = buf_ri;
3224: merge->buf_rj = buf_rj;
3225: merge->coi = PETSC_NULL;
3226: merge->coj = PETSC_NULL;
3227: merge->owners_co = PETSC_NULL;
3229: /* attach the supporting struct to B_mpi for reuse */
3230: PetscObjectContainerCreate(PETSC_COMM_SELF,&container);
3231: PetscObjectContainerSetPointer(container,merge);
3232: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
3233: *mpimat = B_mpi;
3235: PetscCommDestroy(&comm);
3236: PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);
3237: return(0);
3238: }
3240: static PetscEvent logkey_seqstompi = 0;
3243: PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3244: {
3245: PetscErrorCode ierr;
3248: if (!logkey_seqstompi) {
3249: PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3250: }
3251: PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);
3252: if (scall == MAT_INITIAL_MATRIX){
3253: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
3254: }
3255: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
3256: PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);
3257: return(0);
3258: }
3259: static PetscEvent logkey_getlocalmat = 0;
3262: /*@C
3263: MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3265: Not Collective
3267: Input Parameters:
3268: + A - the matrix
3269: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3271: Output Parameter:
3272: . A_loc - the local sequential matrix generated
3274: Level: developer
3276: @*/
3277: PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3278: {
3279: PetscErrorCode ierr;
3280: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
3281: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3282: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3283: PetscScalar *aa=a->a,*ba=b->a,*ca;
3284: PetscInt am=A->m,i,j,k,cstart=mpimat->cstart;
3285: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
3288: if (!logkey_getlocalmat) {
3289: PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3290: }
3291: PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);
3292: if (scall == MAT_INITIAL_MATRIX){
3293: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
3294: ci[0] = 0;
3295: for (i=0; i<am; i++){
3296: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3297: }
3298: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
3299: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
3300: k = 0;
3301: for (i=0; i<am; i++) {
3302: ncols_o = bi[i+1] - bi[i];
3303: ncols_d = ai[i+1] - ai[i];
3304: /* off-diagonal portion of A */
3305: for (jo=0; jo<ncols_o; jo++) {
3306: col = cmap[*bj];
3307: if (col >= cstart) break;
3308: cj[k] = col; bj++;
3309: ca[k++] = *ba++;
3310: }
3311: /* diagonal portion of A */
3312: for (j=0; j<ncols_d; j++) {
3313: cj[k] = cstart + *aj++;
3314: ca[k++] = *aa++;
3315: }
3316: /* off-diagonal portion of A */
3317: for (j=jo; j<ncols_o; j++) {
3318: cj[k] = cmap[*bj++];
3319: ca[k++] = *ba++;
3320: }
3321: }
3322: /* put together the new matrix */
3323: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);
3324: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3325: /* Since these are PETSc arrays, change flags to free them as necessary. */
3326: mat = (Mat_SeqAIJ*)(*A_loc)->data;
3327: mat->freedata = PETSC_TRUE;
3328: mat->nonew = 0;
3329: } else if (scall == MAT_REUSE_MATRIX){
3330: mat=(Mat_SeqAIJ*)(*A_loc)->data;
3331: ci = mat->i; cj = mat->j; ca = mat->a;
3332: for (i=0; i<am; i++) {
3333: /* off-diagonal portion of A */
3334: ncols_o = bi[i+1] - bi[i];
3335: for (jo=0; jo<ncols_o; jo++) {
3336: col = cmap[*bj];
3337: if (col >= cstart) break;
3338: *ca++ = *ba++; bj++;
3339: }
3340: /* diagonal portion of A */
3341: ncols_d = ai[i+1] - ai[i];
3342: for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3343: /* off-diagonal portion of A */
3344: for (j=jo; j<ncols_o; j++) {
3345: *ca++ = *ba++; bj++;
3346: }
3347: }
3348: } else {
3349: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3350: }
3352: PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);
3353: return(0);
3354: }
3356: static PetscEvent logkey_getlocalmatcondensed = 0;
3359: /*@C
3360: MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3362: Not Collective
3364: Input Parameters:
3365: + A - the matrix
3366: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3367: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
3369: Output Parameter:
3370: . A_loc - the local sequential matrix generated
3372: Level: developer
3374: @*/
3375: PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3376: {
3377: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
3378: PetscErrorCode ierr;
3379: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3380: IS isrowa,iscola;
3381: Mat *aloc;
3384: if (!logkey_getlocalmatcondensed) {
3385: PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3386: }
3387: PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);
3388: if (!row){
3389: start = a->rstart; end = a->rend;
3390: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
3391: } else {
3392: isrowa = *row;
3393: }
3394: if (!col){
3395: start = a->cstart;
3396: cmap = a->garray;
3397: nzA = a->A->n;
3398: nzB = a->B->n;
3399: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
3400: ncols = 0;
3401: for (i=0; i<nzB; i++) {
3402: if (cmap[i] < start) idx[ncols++] = cmap[i];
3403: else break;
3404: }
3405: imark = i;
3406: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3407: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3408: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);
3409: PetscFree(idx);
3410: } else {
3411: iscola = *col;
3412: }
3413: if (scall != MAT_INITIAL_MATRIX){
3414: PetscMalloc(sizeof(Mat),&aloc);
3415: aloc[0] = *A_loc;
3416: }
3417: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
3418: *A_loc = aloc[0];
3419: PetscFree(aloc);
3420: if (!row){
3421: ISDestroy(isrowa);
3422: }
3423: if (!col){
3424: ISDestroy(iscola);
3425: }
3426: PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);
3427: return(0);
3428: }
3430: static PetscEvent logkey_GetBrowsOfAcols = 0;
3433: /*@C
3434: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3436: Collective on Mat
3438: Input Parameters:
3439: + A,B - the matrices in mpiaij format
3440: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3441: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3443: Output Parameter:
3444: + rowb, colb - index sets of rows and columns of B to extract
3445: . brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3446: - B_seq - the sequential matrix generated
3448: Level: developer
3450: @*/
3451: PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3452: {
3453: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3454: PetscErrorCode ierr;
3455: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3456: IS isrowb,iscolb;
3457: Mat *bseq;
3458:
3460: if (a->cstart != b->rstart || a->cend != b->rend){
3461: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3462: }
3463: if (!logkey_GetBrowsOfAcols) {
3464: PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3465: }
3466: PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);
3467:
3468: if (scall == MAT_INITIAL_MATRIX){
3469: start = a->cstart;
3470: cmap = a->garray;
3471: nzA = a->A->n;
3472: nzB = a->B->n;
3473: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
3474: ncols = 0;
3475: for (i=0; i<nzB; i++) { /* row < local row index */
3476: if (cmap[i] < start) idx[ncols++] = cmap[i];
3477: else break;
3478: }
3479: imark = i;
3480: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
3481: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3482: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);
3483: PetscFree(idx);
3484: *brstart = imark;
3485: ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);
3486: } else {
3487: if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3488: isrowb = *rowb; iscolb = *colb;
3489: PetscMalloc(sizeof(Mat),&bseq);
3490: bseq[0] = *B_seq;
3491: }
3492: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
3493: *B_seq = bseq[0];
3494: PetscFree(bseq);
3495: if (!rowb){
3496: ISDestroy(isrowb);
3497: } else {
3498: *rowb = isrowb;
3499: }
3500: if (!colb){
3501: ISDestroy(iscolb);
3502: } else {
3503: *colb = iscolb;
3504: }
3505: PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);
3506: return(0);
3507: }
3509: static PetscEvent logkey_GetBrowsOfAocols = 0;
3512: /*@C
3513: MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3514: of the OFF-DIAGONAL portion of local A
3516: Collective on Mat
3518: Input Parameters:
3519: + A,B - the matrices in mpiaij format
3520: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3521: . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3522: - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3524: Output Parameter:
3525: + B_oth - the sequential matrix generated
3527: Level: developer
3529: @*/
3530: PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3531: {
3532: VecScatter_MPI_General *gen_to,*gen_from;
3533: PetscErrorCode ierr;
3534: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3535: Mat_SeqAIJ *b_oth;
3536: VecScatter ctx=a->Mvctx;
3537: MPI_Comm comm=ctx->comm;
3538: PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank;
3539: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3540: PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA;
3541: PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3542: MPI_Request *rwaits,*swaits;
3543: MPI_Status *sstatus,rstatus;
3544: PetscInt *cols;
3545: PetscScalar *vals;
3546: PetscMPIInt j;
3547:
3549: if (a->cstart != b->rstart || a->cend != b->rend){
3550: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3551: }
3552: if (!logkey_GetBrowsOfAocols) {
3553: PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3554: }
3555: PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);
3556: MPI_Comm_rank(comm,&rank);
3558: gen_to = (VecScatter_MPI_General*)ctx->todata;
3559: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3560: rvalues = gen_from->values; /* holds the length of sending row */
3561: svalues = gen_to->values; /* holds the length of receiving row */
3562: nrecvs = gen_from->n;
3563: nsends = gen_to->n;
3564: rwaits = gen_from->requests;
3565: swaits = gen_to->requests;
3566: srow = gen_to->indices; /* local row index to be sent */
3567: rstarts = gen_from->starts;
3568: sstarts = gen_to->starts;
3569: rprocs = gen_from->procs;
3570: sprocs = gen_to->procs;
3571: sstatus = gen_to->sstatus;
3573: if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3574: if (scall == MAT_INITIAL_MATRIX){
3575: /* i-array */
3576: /*---------*/
3577: /* post receives */
3578: for (i=0; i<nrecvs; i++){
3579: rowlen = (PetscInt*)rvalues + rstarts[i];
3580: nrows = rstarts[i+1]-rstarts[i];
3581: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
3582: }
3584: /* pack the outgoing message */
3585: PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);
3586: rstartsj = sstartsj + nsends +1;
3587: sstartsj[0] = 0; rstartsj[0] = 0;
3588: len = 0; /* total length of j or a array to be sent */
3589: k = 0;
3590: for (i=0; i<nsends; i++){
3591: rowlen = (PetscInt*)svalues + sstarts[i];
3592: nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3593: for (j=0; j<nrows; j++) {
3594: row = srow[k] + b->rowners[rank]; /* global row idx */
3595: MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL); /* rowlength */
3596: len += rowlen[j];
3597: MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);
3598: k++;
3599: }
3600: MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);
3601: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3602: }
3603: /* recvs and sends of i-array are completed */
3604: i = nrecvs;
3605: while (i--) {
3606: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3607: }
3608: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
3609: /* allocate buffers for sending j and a arrays */
3610: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
3611: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
3613: /* create i-array of B_oth */
3614: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
3615: b_othi[0] = 0;
3616: len = 0; /* total length of j or a array to be received */
3617: k = 0;
3618: for (i=0; i<nrecvs; i++){
3619: rowlen = (PetscInt*)rvalues + rstarts[i];
3620: nrows = rstarts[i+1]-rstarts[i];
3621: for (j=0; j<nrows; j++) {
3622: b_othi[k+1] = b_othi[k] + rowlen[j];
3623: len += rowlen[j]; k++;
3624: }
3625: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3626: }
3628: /* allocate space for j and a arrrays of B_oth */
3629: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
3630: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);
3632: /* j-array */
3633: /*---------*/
3634: /* post receives of j-array */
3635: for (i=0; i<nrecvs; i++){
3636: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3637: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
3638: }
3639: k = 0;
3640: for (i=0; i<nsends; i++){
3641: nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3642: bufJ = bufj+sstartsj[i];
3643: for (j=0; j<nrows; j++) {
3644: row = srow[k++] + b->rowners[rank]; /* global row idx */
3645: MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);
3646: for (l=0; l<ncols; l++){
3647: *bufJ++ = cols[l];
3648: }
3649: MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);
3650: }
3651: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
3652: }
3654: /* recvs and sends of j-array are completed */
3655: i = nrecvs;
3656: while (i--) {
3657: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3658: }
3659: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
3660: } else if (scall == MAT_REUSE_MATRIX){
3661: sstartsj = *startsj;
3662: rstartsj = sstartsj + nsends +1;
3663: bufa = *bufa_ptr;
3664: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
3665: b_otha = b_oth->a;
3666: } else {
3667: SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3668: }
3670: /* a-array */
3671: /*---------*/
3672: /* post receives of a-array */
3673: for (i=0; i<nrecvs; i++){
3674: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3675: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
3676: }
3677: k = 0;
3678: for (i=0; i<nsends; i++){
3679: nrows = sstarts[i+1]-sstarts[i];
3680: bufA = bufa+sstartsj[i];
3681: for (j=0; j<nrows; j++) {
3682: row = srow[k++] + b->rowners[rank]; /* global row idx */
3683: MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);
3684: for (l=0; l<ncols; l++){
3685: *bufA++ = vals[l];
3686: }
3687: MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);
3689: }
3690: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
3691: }
3692: /* recvs and sends of a-array are completed */
3693: i = nrecvs;
3694: while (i--) {
3695: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
3696: }
3697: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
3698:
3699: if (scall == MAT_INITIAL_MATRIX){
3700: /* put together the new matrix */
3701: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);
3703: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3704: /* Since these are PETSc arrays, change flags to free them as necessary. */
3705: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3706: b_oth->freedata = PETSC_TRUE;
3707: b_oth->nonew = 0;
3709: PetscFree(bufj);
3710: if (!startsj || !bufa_ptr){
3711: PetscFree(sstartsj);
3712: PetscFree(bufa_ptr);
3713: } else {
3714: *startsj = sstartsj;
3715: *bufa_ptr = bufa;
3716: }
3717: }
3718: PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);
3719:
3720: return(0);
3721: }
3723: /*MC
3724: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3726: Options Database Keys:
3727: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3729: Level: beginner
3731: .seealso: MatCreateMPIAIJ
3732: M*/
3737: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3738: {
3739: Mat_MPIAIJ *b;
3741: PetscInt i;
3742: PetscMPIInt size;
3745: MPI_Comm_size(B->comm,&size);
3747: PetscNew(Mat_MPIAIJ,&b);
3748: B->data = (void*)b;
3749: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3750: B->factor = 0;
3751: B->bs = 1;
3752: B->assembled = PETSC_FALSE;
3753: B->mapping = 0;
3755: B->insertmode = NOT_SET_VALUES;
3756: b->size = size;
3757: MPI_Comm_rank(B->comm,&b->rank);
3759: PetscSplitOwnership(B->comm,&B->m,&B->M);
3760: PetscSplitOwnership(B->comm,&B->n,&B->N);
3762: /* the information in the maps duplicates the information computed below, eventually
3763: we should remove the duplicate information that is not contained in the maps */
3764: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
3765: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
3767: /* build local table of row and column ownerships */
3768: PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);
3769: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3770: b->cowners = b->rowners + b->size + 2;
3771: MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);
3772: b->rowners[0] = 0;
3773: for (i=2; i<=b->size; i++) {
3774: b->rowners[i] += b->rowners[i-1];
3775: }
3776: b->rstart = b->rowners[b->rank];
3777: b->rend = b->rowners[b->rank+1];
3778: MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);
3779: b->cowners[0] = 0;
3780: for (i=2; i<=b->size; i++) {
3781: b->cowners[i] += b->cowners[i-1];
3782: }
3783: b->cstart = b->cowners[b->rank];
3784: b->cend = b->cowners[b->rank+1];
3786: /* build cache for off array entries formed */
3787: MatStashCreate_Private(B->comm,1,&B->stash);
3788: b->donotstash = PETSC_FALSE;
3789: b->colmap = 0;
3790: b->garray = 0;
3791: b->roworiented = PETSC_TRUE;
3793: /* stuff used for matrix vector multiply */
3794: b->lvec = PETSC_NULL;
3795: b->Mvctx = PETSC_NULL;
3797: /* stuff for MatGetRow() */
3798: b->rowindices = 0;
3799: b->rowvalues = 0;
3800: b->getrowactive = PETSC_FALSE;
3802: /* Explicitly create 2 MATSEQAIJ matrices. */
3803: MatCreate(PETSC_COMM_SELF,&b->A);
3804: MatSetSizes(b->A,B->m,B->n,B->m,B->n);
3805: MatSetType(b->A,MATSEQAIJ);
3806: PetscLogObjectParent(B,b->A);
3807: MatCreate(PETSC_COMM_SELF,&b->B);
3808: MatSetSizes(b->B,B->m,B->N,B->m,B->N);
3809: MatSetType(b->B,MATSEQAIJ);
3810: PetscLogObjectParent(B,b->B);
3812: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3813: "MatStoreValues_MPIAIJ",
3814: MatStoreValues_MPIAIJ);
3815: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3816: "MatRetrieveValues_MPIAIJ",
3817: MatRetrieveValues_MPIAIJ);
3818: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3819: "MatGetDiagonalBlock_MPIAIJ",
3820: MatGetDiagonalBlock_MPIAIJ);
3821: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3822: "MatIsTranspose_MPIAIJ",
3823: MatIsTranspose_MPIAIJ);
3824: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3825: "MatMPIAIJSetPreallocation_MPIAIJ",
3826: MatMPIAIJSetPreallocation_MPIAIJ);
3827: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3828: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3829: MatMPIAIJSetPreallocationCSR_MPIAIJ);
3830: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3831: "MatDiagonalScaleLocal_MPIAIJ",
3832: MatDiagonalScaleLocal_MPIAIJ);
3833: return(0);
3834: }