Actual source code: mpiaij.c
1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/spops.h
7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8: EXTERN int DisAssemble_MPIAIJ(Mat);
9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12: EXTERN int MatPrintHelp_SeqAIJ(Mat);
13: EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
14: EXTERN int MatUseSpooles_MPIAIJ(Mat);
16: /*
17: Local utility routine that creates a mapping from the global column
18: number to the local number in the off-diagonal part of the local
19: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
20: a slightly higher hash table cost; without it it is not scalable (each processor
21: has an order N integer array but is fast to acess.
22: */
23: int CreateColmap_MPIAIJ_Private(Mat mat)
24: {
25: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26: int n = aij->B->n,i,ierr;
29: #if defined (PETSC_USE_CTABLE)
30: PetscTableCreate(n,&aij->colmap);
31: for (i=0; i<n; i++){
32: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
33: }
34: #else
35: PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
36: PetscLogObjectMemory(mat,mat->N*sizeof(int));
37: PetscMemzero(aij->colmap,mat->N*sizeof(int));
38: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39: #endif
40: return(0);
41: }
43: #define CHUNKSIZE 15
44: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv)
45: {
46:
47: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
48: rmax = aimax[row]; nrow = ailen[row];
49: col1 = col - shift;
50:
51: low = 0; high = nrow;
52: while (high-low > 5) {
53: t = (low+high)/2;
54: if (rp[t] > col) high = t;
55: else low = t;
56: }
57: for (_i=low; _i<high; _i++) {
58: if (rp[_i] > col1) break;
59: if (rp[_i] == col1) {
60: if (addv == ADD_VALUES) ap[_i] += value;
61: else ap[_i] = value;
62: goto a_noinsert;
63: }
64: }
65: if (nonew == 1) goto a_noinsert;
66: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
67: if (nrow >= rmax) {
68: /* there is no extra room in row, therefore enlarge */
69: int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j;
70: PetscScalar *new_a;
71:
72: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
73:
74: /* malloc new storage space */
75: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int);
76: ierr = PetscMalloc(len,&new_a);
77: new_j = (int*)(new_a + new_nz);
78: new_i = new_j + new_nz;
79:
80: /* copy over old data into new slots */
81: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
82: for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
83: PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));
84: len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift);
85: PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow,
86: len*sizeof(int));
87: PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));
88: PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow,
89: len*sizeof(PetscScalar));
90: /* free up old matrix storage */
91:
92: PetscFree(a->a);
93: if (!a->singlemalloc) {
94: PetscFree(a->i);
95: PetscFree(a->j);
96: }
97: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
98: a->singlemalloc = PETSC_TRUE;
99:
100: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
101: rmax = aimax[row] = aimax[row] + CHUNKSIZE;
102: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar)));
103: a->maxnz += CHUNKSIZE;
104: a->reallocs++;
105: }
106: N = nrow++ - 1; a->nz++;
107: /* shift up all the later entries in this row */
108: for (ii=N; ii>=_i; ii--) {
109: rp[ii+1] = rp[ii];
110: ap[ii+1] = ap[ii];
111: }
112: rp[_i] = col1;
113: ap[_i] = value;
114: a_noinsert: ;
115: ailen[row] = nrow;
116: }
118: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv)
119: {
120:
121: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
122: rmax = bimax[row]; nrow = bilen[row];
123: col1 = col - shift;
124:
125: low = 0; high = nrow;
126: while (high-low > 5) {
127: t = (low+high)/2;
128: if (rp[t] > col) high = t;
129: else low = t;
130: }
131: for (_i=low; _i<high; _i++) {
132: if (rp[_i] > col1) break;
133: if (rp[_i] == col1) {
134: if (addv == ADD_VALUES) ap[_i] += value;
135: else ap[_i] = value;
136: goto b_noinsert;
137: }
138: }
139: if (nonew == 1) goto b_noinsert;
140: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
141: if (nrow >= rmax) {
142: /* there is no extra room in row, therefore enlarge */
143: int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j;
144: PetscScalar *new_a;
145:
146: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
147:
148: /* malloc new storage space */
149: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int);
150: ierr = PetscMalloc(len,&new_a);
151: new_j = (int*)(new_a + new_nz);
152: new_i = new_j + new_nz;
153:
154: /* copy over old data into new slots */
155: for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];}
156: for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;}
157: PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));
158: len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift);
159: PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow,
160: len*sizeof(int));
161: PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));
162: PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow,
163: len*sizeof(PetscScalar));
164: /* free up old matrix storage */
165:
166: PetscFree(b->a);
167: if (!b->singlemalloc) {
168: PetscFree(b->i);
169: PetscFree(b->j);
170: }
171: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;
172: b->singlemalloc = PETSC_TRUE;
173:
174: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
175: rmax = bimax[row] = bimax[row] + CHUNKSIZE;
176: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar)));
177: b->maxnz += CHUNKSIZE;
178: b->reallocs++;
179: }
180: N = nrow++ - 1; b->nz++;
181: /* shift up all the later entries in this row */
182: for (ii=N; ii>=_i; ii--) {
183: rp[ii+1] = rp[ii];
184: ap[ii+1] = ap[ii];
185: }
186: rp[_i] = col1;
187: ap[_i] = value;
188: b_noinsert: ;
189: bilen[row] = nrow;
190: }
192: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
193: {
194: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
195: PetscScalar value;
196: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
197: int cstart = aij->cstart,cend = aij->cend,row,col;
198: PetscTruth roworiented = aij->roworiented;
200: /* Some Variables required in the macro */
201: Mat A = aij->A;
202: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
203: int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
204: PetscScalar *aa = a->a;
205: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
206: Mat B = aij->B;
207: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
208: int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
209: PetscScalar *ba = b->a;
211: int *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
212: int nonew = a->nonew,shift = a->indexshift;
213: PetscScalar *ap;
216: for (i=0; i<m; i++) {
217: if (im[i] < 0) continue;
218: #if defined(PETSC_USE_BOPT_g)
219: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
220: #endif
221: if (im[i] >= rstart && im[i] < rend) {
222: row = im[i] - rstart;
223: for (j=0; j<n; j++) {
224: if (in[j] >= cstart && in[j] < cend){
225: col = in[j] - cstart;
226: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227: if (ignorezeroentries && value == 0.0) continue;
228: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
229: /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
230: } else if (in[j] < 0) continue;
231: #if defined(PETSC_USE_BOPT_g)
232: else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
233: #endif
234: else {
235: if (mat->was_assembled) {
236: if (!aij->colmap) {
237: CreateColmap_MPIAIJ_Private(mat);
238: }
239: #if defined (PETSC_USE_CTABLE)
240: PetscTableFind(aij->colmap,in[j]+1,&col);
241: col--;
242: #else
243: col = aij->colmap[in[j]] - 1;
244: #endif
245: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246: DisAssemble_MPIAIJ(mat);
247: col = in[j];
248: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249: B = aij->B;
250: b = (Mat_SeqAIJ*)B->data;
251: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252: ba = b->a;
253: }
254: } else col = in[j];
255: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256: if (ignorezeroentries && value == 0.0) continue;
257: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
258: /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
259: }
260: }
261: } else {
262: if (!aij->donotstash) {
263: if (roworiented) {
264: if (ignorezeroentries && v[i*n] == 0.0) continue;
265: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
266: } else {
267: if (ignorezeroentries && v[i] == 0.0) continue;
268: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
269: }
270: }
271: }
272: }
273: return(0);
274: }
276: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
277: {
278: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
279: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
280: int cstart = aij->cstart,cend = aij->cend,row,col;
283: for (i=0; i<m; i++) {
284: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
285: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
286: if (idxm[i] >= rstart && idxm[i] < rend) {
287: row = idxm[i] - rstart;
288: for (j=0; j<n; j++) {
289: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
290: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
291: if (idxn[j] >= cstart && idxn[j] < cend){
292: col = idxn[j] - cstart;
293: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
294: } else {
295: if (!aij->colmap) {
296: CreateColmap_MPIAIJ_Private(mat);
297: }
298: #if defined (PETSC_USE_CTABLE)
299: PetscTableFind(aij->colmap,idxn[j]+1,&col);
300: col --;
301: #else
302: col = aij->colmap[idxn[j]] - 1;
303: #endif
304: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305: else {
306: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
307: }
308: }
309: }
310: } else {
311: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312: }
313: }
314: return(0);
315: }
317: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318: {
319: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
320: int ierr,nstash,reallocs;
321: InsertMode addv;
324: if (aij->donotstash) {
325: return(0);
326: }
328: /* make sure all processors are either in INSERTMODE or ADDMODE */
329: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
330: if (addv == (ADD_VALUES|INSERT_VALUES)) {
331: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
332: }
333: mat->insertmode = addv; /* in case this processor had no cache */
335: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
336: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
337: PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
338: return(0);
339: }
342: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343: {
344: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
345: int i,j,rstart,ncols,n,ierr,flg;
346: int *row,*col,other_disassembled;
347: PetscScalar *val;
348: InsertMode addv = mat->insertmode;
349: #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
350: PetscTruth flag;
351: #endif
354: if (!aij->donotstash) {
355: while (1) {
356: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
357: if (!flg) break;
359: for (i=0; i<n;) {
360: /* Now identify the consecutive vals belonging to the same row */
361: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
362: if (j < n) ncols = j-i;
363: else ncols = n-i;
364: /* Now assemble all these values with a single function call */
365: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
366: i = j;
367: }
368: }
369: MatStashScatterEnd_Private(&mat->stash);
370: }
371:
372: MatAssemblyBegin(aij->A,mode);
373: MatAssemblyEnd(aij->A,mode);
375: /* determine if any processor has disassembled, if so we must
376: also disassemble ourselfs, in order that we may reassemble. */
377: /*
378: if nonzero structure of submatrix B cannot change then we know that
379: no processor disassembled thus we can skip this stuff
380: */
381: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
382: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
383: if (mat->was_assembled && !other_disassembled) {
384: DisAssemble_MPIAIJ(mat);
385: }
386: }
388: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
389: MatSetUpMultiply_MPIAIJ(mat);
390: }
391: MatAssemblyBegin(aij->B,mode);
392: MatAssemblyEnd(aij->B,mode);
394: if (aij->rowvalues) {
395: PetscFree(aij->rowvalues);
396: aij->rowvalues = 0;
397: }
398: #if defined(PETSC_HAVE_SUPERLUDIST)
399: PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);
400: if (flag) { MatUseSuperLU_DIST_MPIAIJ(mat); }
401: #endif
403: #if defined(PETSC_HAVE_SPOOLES)
404: PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);
405: if (flag) { MatUseSpooles_MPIAIJ(mat); }
406: #endif
407: return(0);
408: }
410: int MatZeroEntries_MPIAIJ(Mat A)
411: {
412: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413: int ierr;
416: MatZeroEntries(l->A);
417: MatZeroEntries(l->B);
418: return(0);
419: }
421: int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422: {
423: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
424: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425: int *procs,*nprocs,j,idx,nsends,*work,row;
426: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427: int *rvalues,tag = A->tag,count,base,slen,n,*source;
428: int *lens,imdex,*lrows,*values,rstart=l->rstart;
429: MPI_Comm comm = A->comm;
430: MPI_Request *send_waits,*recv_waits;
431: MPI_Status recv_status,*send_status;
432: IS istmp;
433: PetscTruth found;
436: ISGetLocalSize(is,&N);
437: ISGetIndices(is,&rows);
439: /* first count number of contributors to each processor */
440: PetscMalloc(2*size*sizeof(int),&nprocs);
441: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
442: procs = nprocs + size;
443: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
444: for (i=0; i<N; i++) {
445: idx = rows[i];
446: found = PETSC_FALSE;
447: for (j=0; j<size; j++) {
448: if (idx >= owners[j] && idx < owners[j+1]) {
449: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450: }
451: }
452: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453: }
454: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
456: /* inform other processors of number of messages and max length*/
457: PetscMalloc(2*size*sizeof(int),&work);
458: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
459: nrecvs = work[size+rank];
460: nmax = work[rank];
461: ierr = PetscFree(work);
463: /* post receives: */
464: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
465: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
466: for (i=0; i<nrecvs; i++) {
467: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
468: }
470: /* do sends:
471: 1) starts[i] gives the starting index in svalues for stuff going to
472: the ith processor
473: */
474: PetscMalloc((N+1)*sizeof(int),&svalues);
475: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
476: PetscMalloc((size+1)*sizeof(int),&starts);
477: starts[0] = 0;
478: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479: for (i=0; i<N; i++) {
480: svalues[starts[owner[i]]++] = rows[i];
481: }
482: ISRestoreIndices(is,&rows);
484: starts[0] = 0;
485: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486: count = 0;
487: for (i=0; i<size; i++) {
488: if (procs[i]) {
489: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
490: }
491: }
492: PetscFree(starts);
494: base = owners[rank];
496: /* wait on receives */
497: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
498: source = lens + nrecvs;
499: count = nrecvs; slen = 0;
500: while (count) {
501: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
502: /* unpack receives into our local space */
503: MPI_Get_count(&recv_status,MPI_INT,&n);
504: source[imdex] = recv_status.MPI_SOURCE;
505: lens[imdex] = n;
506: slen += n;
507: count--;
508: }
509: PetscFree(recv_waits);
510:
511: /* move the data into the send scatter */
512: PetscMalloc((slen+1)*sizeof(int),&lrows);
513: count = 0;
514: for (i=0; i<nrecvs; i++) {
515: values = rvalues + i*nmax;
516: for (j=0; j<lens[i]; j++) {
517: lrows[count++] = values[j] - base;
518: }
519: }
520: PetscFree(rvalues);
521: PetscFree(lens);
522: PetscFree(owner);
523: PetscFree(nprocs);
524:
525: /* actually zap the local rows */
526: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
527: PetscLogObjectParent(A,istmp);
529: /*
530: Zero the required rows. If the "diagonal block" of the matrix
531: is square and the user wishes to set the diagonal we use seperate
532: code so that MatSetValues() is not called for each diagonal allocating
533: new memory, thus calling lots of mallocs and slowing things down.
535: Contributed by: Mathew Knepley
536: */
537: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538: MatZeroRows(l->B,istmp,0);
539: if (diag && (l->A->M == l->A->N)) {
540: ierr = MatZeroRows(l->A,istmp,diag);
541: } else if (diag) {
542: MatZeroRows(l->A,istmp,0);
543: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
545: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546: }
547: for (i = 0; i < slen; i++) {
548: row = lrows[i] + rstart;
549: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
550: }
551: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
552: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
553: } else {
554: MatZeroRows(l->A,istmp,0);
555: }
556: ISDestroy(istmp);
557: PetscFree(lrows);
559: /* wait on sends */
560: if (nsends) {
561: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
562: MPI_Waitall(nsends,send_waits,send_status);
563: PetscFree(send_status);
564: }
565: PetscFree(send_waits);
566: PetscFree(svalues);
568: return(0);
569: }
571: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572: {
573: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574: int ierr,nt;
577: VecGetLocalSize(xx,&nt);
578: if (nt != A->n) {
579: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
580: }
581: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
582: (*a->A->ops->mult)(a->A,xx,yy);
583: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
584: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
585: return(0);
586: }
588: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
589: {
590: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
591: int ierr;
594: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
595: (*a->A->ops->multadd)(a->A,xx,yy,zz);
596: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
597: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
598: return(0);
599: }
601: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602: {
603: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604: int ierr;
607: /* do nondiagonal part */
608: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
609: /* send it on its way */
610: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
611: /* do local part */
612: (*a->A->ops->multtranspose)(a->A,xx,yy);
613: /* receive remote parts: note this assumes the values are not actually */
614: /* inserted in yy until the next line, which is true for my implementation*/
615: /* but is not perhaps always true. */
616: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
617: return(0);
618: }
620: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
621: {
622: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
623: int ierr;
626: /* do nondiagonal part */
627: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
628: /* send it on its way */
629: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
630: /* do local part */
631: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
632: /* receive remote parts: note this assumes the values are not actually */
633: /* inserted in yy until the next line, which is true for my implementation*/
634: /* but is not perhaps always true. */
635: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
636: return(0);
637: }
639: /*
640: This only works correctly for square matrices where the subblock A->A is the
641: diagonal block
642: */
643: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
644: {
645: int ierr;
646: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
649: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
650: if (a->rstart != a->cstart || a->rend != a->cend) {
651: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
652: }
653: MatGetDiagonal(a->A,v);
654: return(0);
655: }
657: int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
658: {
659: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
660: int ierr;
663: MatScale(aa,a->A);
664: MatScale(aa,a->B);
665: return(0);
666: }
668: int MatDestroy_MPIAIJ(Mat mat)
669: {
670: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
671: int ierr;
674: #if defined(PETSC_USE_LOG)
675: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
676: #endif
677: MatStashDestroy_Private(&mat->stash);
678: PetscFree(aij->rowners);
679: MatDestroy(aij->A);
680: MatDestroy(aij->B);
681: #if defined (PETSC_USE_CTABLE)
682: if (aij->colmap) {PetscTableDelete(aij->colmap);}
683: #else
684: if (aij->colmap) {PetscFree(aij->colmap);}
685: #endif
686: if (aij->garray) {PetscFree(aij->garray);}
687: if (aij->lvec) {VecDestroy(aij->lvec);}
688: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
689: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
690: PetscFree(aij);
691: return(0);
692: }
694: extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
695: extern int MatFactorInfo_Spooles(Mat,PetscViewer);
697: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
698: {
699: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
700: Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
701: int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
702: PetscTruth isdraw,isascii,flg;
703: PetscViewer sviewer;
704: PetscViewerFormat format;
707: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
708: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
709: if (isascii) {
710: PetscViewerGetFormat(viewer,&format);
711: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
712: MatInfo info;
713: MPI_Comm_rank(mat->comm,&rank);
714: MatGetInfo(mat,MAT_LOCAL,&info);
715: PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
716: if (flg) {
717: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
718: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
719: } else {
720: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
721: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
722: }
723: MatGetInfo(aij->A,MAT_LOCAL,&info);
724: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
725: MatGetInfo(aij->B,MAT_LOCAL,&info);
726: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
727: PetscViewerFlush(viewer);
728: VecScatterView(aij->Mvctx,viewer);
729: return(0);
730: } else if (format == PETSC_VIEWER_ASCII_INFO) {
731: return(0);
732: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
733: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
734: MatMPIAIJFactorInfo_SuperLu(mat,viewer);
735: #endif
736: #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
737: MatFactorInfo_Spooles(mat,viewer);
738: #endif
739: return(0);
740: }
741: } else if (isdraw) {
742: PetscDraw draw;
743: PetscTruth isnull;
744: PetscViewerDrawGetDraw(viewer,0,&draw);
745: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
746: }
748: if (size == 1) {
749: PetscObjectSetName((PetscObject)aij->A,mat->name);
750: MatView(aij->A,viewer);
751: } else {
752: /* assemble the entire matrix onto first processor. */
753: Mat A;
754: Mat_SeqAIJ *Aloc;
755: int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
756: PetscScalar *a;
758: if (!rank) {
759: MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
760: } else {
761: MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
762: }
763: PetscLogObjectParent(mat,A);
765: /* copy over the A part */
766: Aloc = (Mat_SeqAIJ*)aij->A->data;
767: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
768: row = aij->rstart;
769: for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
770: for (i=0; i<m; i++) {
771: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
772: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
773: }
774: aj = Aloc->j;
775: for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
777: /* copy over the B part */
778: Aloc = (Mat_SeqAIJ*)aij->B->data;
779: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
780: row = aij->rstart;
781: PetscMalloc((ai[m]+1)*sizeof(int),&cols);
782: ct = cols;
783: for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
784: for (i=0; i<m; i++) {
785: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
786: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
787: }
788: PetscFree(ct);
789: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
790: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
791: /*
792: Everyone has to call to draw the matrix since the graphics waits are
793: synchronized across all processors that share the PetscDraw object
794: */
795: PetscViewerGetSingleton(viewer,&sviewer);
796: if (!rank) {
797: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
798: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
799: }
800: PetscViewerRestoreSingleton(viewer,&sviewer);
801: MatDestroy(A);
802: }
803: return(0);
804: }
806: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
807: {
808: int ierr;
809: PetscTruth isascii,isdraw,issocket,isbinary;
810:
812: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
813: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
814: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
815: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
816: if (isascii || isdraw || isbinary || issocket) {
817: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
818: } else {
819: SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
820: }
821: return(0);
822: }
826: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
827: {
828: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
829: int ierr;
830: Vec bb1;
831: PetscScalar mone=-1.0;
834: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
836: VecDuplicate(bb,&bb1);
838: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
839: if (flag & SOR_ZERO_INITIAL_GUESS) {
840: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
841: its--;
842: }
843:
844: while (its--) {
845: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
846: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
848: /* update rhs: bb1 = bb - B*x */
849: VecScale(&mone,mat->lvec);
850: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
852: /* local sweep */
853: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
854:
855: }
856: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
857: if (flag & SOR_ZERO_INITIAL_GUESS) {
858: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
859: its--;
860: }
861: while (its--) {
862: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
863: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
865: /* update rhs: bb1 = bb - B*x */
866: VecScale(&mone,mat->lvec);
867: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
869: /* local sweep */
870: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
871:
872: }
873: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
874: if (flag & SOR_ZERO_INITIAL_GUESS) {
875: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
876: its--;
877: }
878: while (its--) {
879: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
880: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
882: /* update rhs: bb1 = bb - B*x */
883: VecScale(&mone,mat->lvec);
884: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
886: /* local sweep */
887: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
888:
889: }
890: } else {
891: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
892: }
894: VecDestroy(bb1);
895: return(0);
896: }
898: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
899: {
900: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
901: Mat A = mat->A,B = mat->B;
902: int ierr;
903: PetscReal isend[5],irecv[5];
906: info->block_size = 1.0;
907: MatGetInfo(A,MAT_LOCAL,info);
908: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
909: isend[3] = info->memory; isend[4] = info->mallocs;
910: MatGetInfo(B,MAT_LOCAL,info);
911: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
912: isend[3] += info->memory; isend[4] += info->mallocs;
913: if (flag == MAT_LOCAL) {
914: info->nz_used = isend[0];
915: info->nz_allocated = isend[1];
916: info->nz_unneeded = isend[2];
917: info->memory = isend[3];
918: info->mallocs = isend[4];
919: } else if (flag == MAT_GLOBAL_MAX) {
920: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
921: info->nz_used = irecv[0];
922: info->nz_allocated = irecv[1];
923: info->nz_unneeded = irecv[2];
924: info->memory = irecv[3];
925: info->mallocs = irecv[4];
926: } else if (flag == MAT_GLOBAL_SUM) {
927: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
928: info->nz_used = irecv[0];
929: info->nz_allocated = irecv[1];
930: info->nz_unneeded = irecv[2];
931: info->memory = irecv[3];
932: info->mallocs = irecv[4];
933: }
934: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
935: info->fill_ratio_needed = 0;
936: info->factor_mallocs = 0;
937: info->rows_global = (double)matin->M;
938: info->columns_global = (double)matin->N;
939: info->rows_local = (double)matin->m;
940: info->columns_local = (double)matin->N;
942: return(0);
943: }
945: int MatSetOption_MPIAIJ(Mat A,MatOption op)
946: {
947: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
948: int ierr;
951: switch (op) {
952: case MAT_NO_NEW_NONZERO_LOCATIONS:
953: case MAT_YES_NEW_NONZERO_LOCATIONS:
954: case MAT_COLUMNS_UNSORTED:
955: case MAT_COLUMNS_SORTED:
956: case MAT_NEW_NONZERO_ALLOCATION_ERR:
957: case MAT_KEEP_ZEROED_ROWS:
958: case MAT_NEW_NONZERO_LOCATION_ERR:
959: case MAT_USE_INODES:
960: case MAT_DO_NOT_USE_INODES:
961: case MAT_IGNORE_ZERO_ENTRIES:
962: MatSetOption(a->A,op);
963: MatSetOption(a->B,op);
964: break;
965: case MAT_ROW_ORIENTED:
966: a->roworiented = PETSC_TRUE;
967: MatSetOption(a->A,op);
968: MatSetOption(a->B,op);
969: break;
970: case MAT_ROWS_SORTED:
971: case MAT_ROWS_UNSORTED:
972: case MAT_YES_NEW_DIAGONALS:
973: case MAT_USE_SINGLE_PRECISION_SOLVES:
974: PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
975: break;
976: case MAT_COLUMN_ORIENTED:
977: a->roworiented = PETSC_FALSE;
978: MatSetOption(a->A,op);
979: MatSetOption(a->B,op);
980: break;
981: case MAT_IGNORE_OFF_PROC_ENTRIES:
982: a->donotstash = PETSC_TRUE;
983: break;
984: case MAT_NO_NEW_DIAGONALS:
985: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
986: default:
987: SETERRQ(PETSC_ERR_SUP,"unknown option");
988: }
989: return(0);
990: }
992: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
993: {
994: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
995: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
996: int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
997: int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
998: int *cmap,*idx_p;
1001: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1002: mat->getrowactive = PETSC_TRUE;
1004: if (!mat->rowvalues && (idx || v)) {
1005: /*
1006: allocate enough space to hold information from the longest row.
1007: */
1008: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1009: int max = 1,tmp;
1010: for (i=0; i<matin->m; i++) {
1011: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1012: if (max < tmp) { max = tmp; }
1013: }
1014: PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1015: mat->rowindices = (int*)(mat->rowvalues + max);
1016: }
1018: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1019: lrow = row - rstart;
1021: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1022: if (!v) {pvA = 0; pvB = 0;}
1023: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1024: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1025: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1026: nztot = nzA + nzB;
1028: cmap = mat->garray;
1029: if (v || idx) {
1030: if (nztot) {
1031: /* Sort by increasing column numbers, assuming A and B already sorted */
1032: int imark = -1;
1033: if (v) {
1034: *v = v_p = mat->rowvalues;
1035: for (i=0; i<nzB; i++) {
1036: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1037: else break;
1038: }
1039: imark = i;
1040: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1041: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1042: }
1043: if (idx) {
1044: *idx = idx_p = mat->rowindices;
1045: if (imark > -1) {
1046: for (i=0; i<imark; i++) {
1047: idx_p[i] = cmap[cworkB[i]];
1048: }
1049: } else {
1050: for (i=0; i<nzB; i++) {
1051: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1052: else break;
1053: }
1054: imark = i;
1055: }
1056: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1057: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1058: }
1059: } else {
1060: if (idx) *idx = 0;
1061: if (v) *v = 0;
1062: }
1063: }
1064: *nz = nztot;
1065: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1066: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1067: return(0);
1068: }
1070: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1071: {
1072: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1075: if (aij->getrowactive == PETSC_FALSE) {
1076: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1077: }
1078: aij->getrowactive = PETSC_FALSE;
1079: return(0);
1080: }
1082: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1083: {
1084: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1085: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1086: int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1087: PetscReal sum = 0.0;
1088: PetscScalar *v;
1091: if (aij->size == 1) {
1092: MatNorm(aij->A,type,norm);
1093: } else {
1094: if (type == NORM_FROBENIUS) {
1095: v = amat->a;
1096: for (i=0; i<amat->nz; i++) {
1097: #if defined(PETSC_USE_COMPLEX)
1098: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1099: #else
1100: sum += (*v)*(*v); v++;
1101: #endif
1102: }
1103: v = bmat->a;
1104: for (i=0; i<bmat->nz; i++) {
1105: #if defined(PETSC_USE_COMPLEX)
1106: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1107: #else
1108: sum += (*v)*(*v); v++;
1109: #endif
1110: }
1111: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1112: *norm = sqrt(*norm);
1113: } else if (type == NORM_1) { /* max column norm */
1114: PetscReal *tmp,*tmp2;
1115: int *jj,*garray = aij->garray;
1116: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1117: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1118: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1119: *norm = 0.0;
1120: v = amat->a; jj = amat->j;
1121: for (j=0; j<amat->nz; j++) {
1122: tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++;
1123: }
1124: v = bmat->a; jj = bmat->j;
1125: for (j=0; j<bmat->nz; j++) {
1126: tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1127: }
1128: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1129: for (j=0; j<mat->N; j++) {
1130: if (tmp2[j] > *norm) *norm = tmp2[j];
1131: }
1132: PetscFree(tmp);
1133: PetscFree(tmp2);
1134: } else if (type == NORM_INFINITY) { /* max row norm */
1135: PetscReal ntemp = 0.0;
1136: for (j=0; j<aij->A->m; j++) {
1137: v = amat->a + amat->i[j] + shift;
1138: sum = 0.0;
1139: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1140: sum += PetscAbsScalar(*v); v++;
1141: }
1142: v = bmat->a + bmat->i[j] + shift;
1143: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1144: sum += PetscAbsScalar(*v); v++;
1145: }
1146: if (sum > ntemp) ntemp = sum;
1147: }
1148: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1149: } else {
1150: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1151: }
1152: }
1153: return(0);
1154: }
1156: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1157: {
1158: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1159: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1160: int ierr,shift = Aloc->indexshift;
1161: int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1162: Mat B;
1163: PetscScalar *array;
1166: if (!matout && M != N) {
1167: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1168: }
1170: MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1172: /* copy over the A part */
1173: Aloc = (Mat_SeqAIJ*)a->A->data;
1174: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1175: row = a->rstart;
1176: for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1177: for (i=0; i<m; i++) {
1178: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1179: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1180: }
1181: aj = Aloc->j;
1182: for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1184: /* copy over the B part */
1185: Aloc = (Mat_SeqAIJ*)a->B->data;
1186: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1187: row = a->rstart;
1188: PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1189: ct = cols;
1190: for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1191: for (i=0; i<m; i++) {
1192: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1193: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1194: }
1195: PetscFree(ct);
1196: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1197: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1198: if (matout) {
1199: *matout = B;
1200: } else {
1201: MatHeaderCopy(A,B);
1202: }
1203: return(0);
1204: }
1206: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1207: {
1208: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1209: Mat a = aij->A,b = aij->B;
1210: int ierr,s1,s2,s3;
1213: MatGetLocalSize(mat,&s2,&s3);
1214: if (rr) {
1215: VecGetLocalSize(rr,&s1);
1216: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1217: /* Overlap communication with computation. */
1218: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1219: }
1220: if (ll) {
1221: VecGetLocalSize(ll,&s1);
1222: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1223: (*b->ops->diagonalscale)(b,ll,0);
1224: }
1225: /* scale the diagonal block */
1226: (*a->ops->diagonalscale)(a,ll,rr);
1228: if (rr) {
1229: /* Do a scatter end and then right scale the off-diagonal block */
1230: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1231: (*b->ops->diagonalscale)(b,0,aij->lvec);
1232: }
1233:
1234: return(0);
1235: }
1238: int MatPrintHelp_MPIAIJ(Mat A)
1239: {
1240: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1241: int ierr;
1244: if (!a->rank) {
1245: MatPrintHelp_SeqAIJ(a->A);
1246: }
1247: return(0);
1248: }
1250: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1251: {
1253: *bs = 1;
1254: return(0);
1255: }
1256: int MatSetUnfactored_MPIAIJ(Mat A)
1257: {
1258: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1259: int ierr;
1262: MatSetUnfactored(a->A);
1263: return(0);
1264: }
1266: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1267: {
1268: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1269: Mat a,b,c,d;
1270: PetscTruth flg;
1271: int ierr;
1274: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1275: if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1276: a = matA->A; b = matA->B;
1277: c = matB->A; d = matB->B;
1279: MatEqual(a,c,&flg);
1280: if (flg == PETSC_TRUE) {
1281: MatEqual(b,d,&flg);
1282: }
1283: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1284: return(0);
1285: }
1287: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1288: {
1289: int ierr;
1290: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1291: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1292: PetscTruth flg;
1295: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1296: if (str != SAME_NONZERO_PATTERN || !flg) {
1297: /* because of the column compression in the off-processor part of the matrix a->B,
1298: the number of columns in a->B and b->B may be different, hence we cannot call
1299: the MatCopy() directly on the two parts. If need be, we can provide a more
1300: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1301: then copying the submatrices */
1302: MatCopy_Basic(A,B,str);
1303: } else {
1304: MatCopy(a->A,b->A,str);
1305: MatCopy(a->B,b->B,str);
1306: }
1307: return(0);
1308: }
1310: int MatSetUpPreallocation_MPIAIJ(Mat A)
1311: {
1312: int ierr;
1315: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1316: return(0);
1317: }
1319: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1320: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1321: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1322: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1323: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1324: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1325: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1326: #endif
1328: #include petscblaslapack.h
1330: int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1331: {
1332: int ierr,one;
1333: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1334: Mat_SeqAIJ *x,*y;
1337: if (str == SAME_NONZERO_PATTERN) {
1338: x = (Mat_SeqAIJ *)xx->A->data;
1339: y = (Mat_SeqAIJ *)yy->A->data;
1340: BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1341: x = (Mat_SeqAIJ *)xx->B->data;
1342: y = (Mat_SeqAIJ *)yy->B->data;
1343: BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1344: } else {
1345: MatAXPY_Basic(a,X,Y,str);
1346: }
1347: return(0);
1348: }
1350: /* -------------------------------------------------------------------*/
1351: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1352: MatGetRow_MPIAIJ,
1353: MatRestoreRow_MPIAIJ,
1354: MatMult_MPIAIJ,
1355: MatMultAdd_MPIAIJ,
1356: MatMultTranspose_MPIAIJ,
1357: MatMultTransposeAdd_MPIAIJ,
1358: 0,
1359: 0,
1360: 0,
1361: 0,
1362: 0,
1363: 0,
1364: MatRelax_MPIAIJ,
1365: MatTranspose_MPIAIJ,
1366: MatGetInfo_MPIAIJ,
1367: MatEqual_MPIAIJ,
1368: MatGetDiagonal_MPIAIJ,
1369: MatDiagonalScale_MPIAIJ,
1370: MatNorm_MPIAIJ,
1371: MatAssemblyBegin_MPIAIJ,
1372: MatAssemblyEnd_MPIAIJ,
1373: 0,
1374: MatSetOption_MPIAIJ,
1375: MatZeroEntries_MPIAIJ,
1376: MatZeroRows_MPIAIJ,
1377: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1378: MatLUFactorSymbolic_MPIAIJ_TFS,
1379: #else
1380: 0,
1381: #endif
1382: 0,
1383: 0,
1384: 0,
1385: MatSetUpPreallocation_MPIAIJ,
1386: 0,
1387: 0,
1388: 0,
1389: 0,
1390: MatDuplicate_MPIAIJ,
1391: 0,
1392: 0,
1393: 0,
1394: 0,
1395: MatAXPY_MPIAIJ,
1396: MatGetSubMatrices_MPIAIJ,
1397: MatIncreaseOverlap_MPIAIJ,
1398: MatGetValues_MPIAIJ,
1399: MatCopy_MPIAIJ,
1400: MatPrintHelp_MPIAIJ,
1401: MatScale_MPIAIJ,
1402: 0,
1403: 0,
1404: 0,
1405: MatGetBlockSize_MPIAIJ,
1406: 0,
1407: 0,
1408: 0,
1409: 0,
1410: MatFDColoringCreate_MPIAIJ,
1411: 0,
1412: MatSetUnfactored_MPIAIJ,
1413: 0,
1414: 0,
1415: MatGetSubMatrix_MPIAIJ,
1416: MatDestroy_MPIAIJ,
1417: MatView_MPIAIJ,
1418: MatGetPetscMaps_Petsc,
1419: 0,
1420: 0,
1421: 0,
1422: 0,
1423: 0,
1424: 0,
1425: 0,
1426: 0,
1427: MatSetColoring_MPIAIJ,
1428: MatSetValuesAdic_MPIAIJ,
1429: MatSetValuesAdifor_MPIAIJ
1430: };
1432: /* ----------------------------------------------------------------------------------------*/
1434: EXTERN_C_BEGIN
1435: int MatStoreValues_MPIAIJ(Mat mat)
1436: {
1437: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1438: int ierr;
1441: MatStoreValues(aij->A);
1442: MatStoreValues(aij->B);
1443: return(0);
1444: }
1445: EXTERN_C_END
1447: EXTERN_C_BEGIN
1448: int MatRetrieveValues_MPIAIJ(Mat mat)
1449: {
1450: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1451: int ierr;
1454: MatRetrieveValues(aij->A);
1455: MatRetrieveValues(aij->B);
1456: return(0);
1457: }
1458: EXTERN_C_END
1460: #include petscpc.h
1461: EXTERN_C_BEGIN
1462: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1463: EXTERN_C_END
1465: EXTERN_C_BEGIN
1466: int MatCreate_MPIAIJ(Mat B)
1467: {
1468: Mat_MPIAIJ *b;
1469: int ierr,i,size;
1472: MPI_Comm_size(B->comm,&size);
1474: ierr = PetscNew(Mat_MPIAIJ,&b);
1475: B->data = (void*)b;
1476: ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1477: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1478: B->factor = 0;
1479: B->assembled = PETSC_FALSE;
1480: B->mapping = 0;
1482: B->insertmode = NOT_SET_VALUES;
1483: b->size = size;
1484: MPI_Comm_rank(B->comm,&b->rank);
1486: PetscSplitOwnership(B->comm,&B->m,&B->M);
1487: PetscSplitOwnership(B->comm,&B->n,&B->N);
1489: /* the information in the maps duplicates the information computed below, eventually
1490: we should remove the duplicate information that is not contained in the maps */
1491: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1492: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1494: /* build local table of row and column ownerships */
1495: PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1496: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1497: b->cowners = b->rowners + b->size + 2;
1498: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1499: b->rowners[0] = 0;
1500: for (i=2; i<=b->size; i++) {
1501: b->rowners[i] += b->rowners[i-1];
1502: }
1503: b->rstart = b->rowners[b->rank];
1504: b->rend = b->rowners[b->rank+1];
1505: MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1506: b->cowners[0] = 0;
1507: for (i=2; i<=b->size; i++) {
1508: b->cowners[i] += b->cowners[i-1];
1509: }
1510: b->cstart = b->cowners[b->rank];
1511: b->cend = b->cowners[b->rank+1];
1513: /* build cache for off array entries formed */
1514: MatStashCreate_Private(B->comm,1,&B->stash);
1515: b->donotstash = PETSC_FALSE;
1516: b->colmap = 0;
1517: b->garray = 0;
1518: b->roworiented = PETSC_TRUE;
1520: /* stuff used for matrix vector multiply */
1521: b->lvec = PETSC_NULL;
1522: b->Mvctx = PETSC_NULL;
1524: /* stuff for MatGetRow() */
1525: b->rowindices = 0;
1526: b->rowvalues = 0;
1527: b->getrowactive = PETSC_FALSE;
1529: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1530: "MatStoreValues_MPIAIJ",
1531: MatStoreValues_MPIAIJ);
1532: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1533: "MatRetrieveValues_MPIAIJ",
1534: MatRetrieveValues_MPIAIJ);
1535: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1536: "MatGetDiagonalBlock_MPIAIJ",
1537: MatGetDiagonalBlock_MPIAIJ);
1539: return(0);
1540: }
1541: EXTERN_C_END
1543: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1544: {
1545: Mat mat;
1546: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1547: int ierr;
1550: *newmat = 0;
1551: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1552: MatSetType(mat,MATMPIAIJ);
1553: a = (Mat_MPIAIJ*)mat->data;
1554: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1555: mat->factor = matin->factor;
1556: mat->assembled = PETSC_TRUE;
1557: mat->insertmode = NOT_SET_VALUES;
1558: mat->preallocated = PETSC_TRUE;
1560: a->rstart = oldmat->rstart;
1561: a->rend = oldmat->rend;
1562: a->cstart = oldmat->cstart;
1563: a->cend = oldmat->cend;
1564: a->size = oldmat->size;
1565: a->rank = oldmat->rank;
1566: a->donotstash = oldmat->donotstash;
1567: a->roworiented = oldmat->roworiented;
1568: a->rowindices = 0;
1569: a->rowvalues = 0;
1570: a->getrowactive = PETSC_FALSE;
1572: ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1573: ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);
1574: if (oldmat->colmap) {
1575: #if defined (PETSC_USE_CTABLE)
1576: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1577: #else
1578: PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1579: PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1580: ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1581: #endif
1582: } else a->colmap = 0;
1583: if (oldmat->garray) {
1584: int len;
1585: len = oldmat->B->n;
1586: PetscMalloc((len+1)*sizeof(int),&a->garray);
1587: PetscLogObjectMemory(mat,len*sizeof(int));
1588: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1589: } else a->garray = 0;
1590:
1591: VecDuplicate(oldmat->lvec,&a->lvec);
1592: PetscLogObjectParent(mat,a->lvec);
1593: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1594: PetscLogObjectParent(mat,a->Mvctx);
1595: MatDuplicate(oldmat->A,cpvalues,&a->A);
1596: PetscLogObjectParent(mat,a->A);
1597: MatDuplicate(oldmat->B,cpvalues,&a->B);
1598: PetscLogObjectParent(mat,a->B);
1599: PetscFListDuplicate(matin->qlist,&mat->qlist);
1600: *newmat = mat;
1601: return(0);
1602: }
1604: #include petscsys.h
1606: EXTERN_C_BEGIN
1607: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1608: {
1609: Mat A;
1610: PetscScalar *vals,*svals;
1611: MPI_Comm comm = ((PetscObject)viewer)->comm;
1612: MPI_Status status;
1613: int i,nz,ierr,j,rstart,rend,fd;
1614: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1615: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1616: int tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1619: MPI_Comm_size(comm,&size);
1620: MPI_Comm_rank(comm,&rank);
1621: if (!rank) {
1622: PetscViewerBinaryGetDescriptor(viewer,&fd);
1623: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1624: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1625: if (header[3] < 0) {
1626: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1627: }
1628: }
1630: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1631: M = header[1]; N = header[2];
1632: /* determine ownership of all rows */
1633: m = M/size + ((M % size) > rank);
1634: PetscMalloc((size+2)*sizeof(int),&rowners);
1635: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1636: rowners[0] = 0;
1637: for (i=2; i<=size; i++) {
1638: rowners[i] += rowners[i-1];
1639: }
1640: rstart = rowners[rank];
1641: rend = rowners[rank+1];
1643: /* distribute row lengths to all processors */
1644: ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1645: offlens = ourlens + (rend-rstart);
1646: if (!rank) {
1647: PetscMalloc(M*sizeof(int),&rowlengths);
1648: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1649: PetscMalloc(size*sizeof(int),&sndcounts);
1650: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1651: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1652: PetscFree(sndcounts);
1653: } else {
1654: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1655: }
1657: if (!rank) {
1658: /* calculate the number of nonzeros on each processor */
1659: PetscMalloc(size*sizeof(int),&procsnz);
1660: PetscMemzero(procsnz,size*sizeof(int));
1661: for (i=0; i<size; i++) {
1662: for (j=rowners[i]; j< rowners[i+1]; j++) {
1663: procsnz[i] += rowlengths[j];
1664: }
1665: }
1666: PetscFree(rowlengths);
1668: /* determine max buffer needed and allocate it */
1669: maxnz = 0;
1670: for (i=0; i<size; i++) {
1671: maxnz = PetscMax(maxnz,procsnz[i]);
1672: }
1673: PetscMalloc(maxnz*sizeof(int),&cols);
1675: /* read in my part of the matrix column indices */
1676: nz = procsnz[0];
1677: PetscMalloc(nz*sizeof(int),&mycols);
1678: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1680: /* read in every one elses and ship off */
1681: for (i=1; i<size; i++) {
1682: nz = procsnz[i];
1683: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1684: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1685: }
1686: PetscFree(cols);
1687: } else {
1688: /* determine buffer space needed for message */
1689: nz = 0;
1690: for (i=0; i<m; i++) {
1691: nz += ourlens[i];
1692: }
1693: PetscMalloc((nz+1)*sizeof(int),&mycols);
1695: /* receive message of column indices*/
1696: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1697: MPI_Get_count(&status,MPI_INT,&maxnz);
1698: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1699: }
1701: /* determine column ownership if matrix is not square */
1702: if (N != M) {
1703: n = N/size + ((N % size) > rank);
1704: ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1705: cstart = cend - n;
1706: } else {
1707: cstart = rstart;
1708: cend = rend;
1709: n = cend - cstart;
1710: }
1712: /* loop over local rows, determining number of off diagonal entries */
1713: PetscMemzero(offlens,m*sizeof(int));
1714: jj = 0;
1715: for (i=0; i<m; i++) {
1716: for (j=0; j<ourlens[i]; j++) {
1717: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1718: jj++;
1719: }
1720: }
1722: /* create our matrix */
1723: for (i=0; i<m; i++) {
1724: ourlens[i] -= offlens[i];
1725: }
1726: MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1727: A = *newmat;
1728: MatSetOption(A,MAT_COLUMNS_SORTED);
1729: for (i=0; i<m; i++) {
1730: ourlens[i] += offlens[i];
1731: }
1733: if (!rank) {
1734: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1736: /* read in my part of the matrix numerical values */
1737: nz = procsnz[0];
1738: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1739:
1740: /* insert into matrix */
1741: jj = rstart;
1742: smycols = mycols;
1743: svals = vals;
1744: for (i=0; i<m; i++) {
1745: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1746: smycols += ourlens[i];
1747: svals += ourlens[i];
1748: jj++;
1749: }
1751: /* read in other processors and ship out */
1752: for (i=1; i<size; i++) {
1753: nz = procsnz[i];
1754: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1755: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1756: }
1757: PetscFree(procsnz);
1758: } else {
1759: /* receive numeric values */
1760: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
1762: /* receive message of values*/
1763: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1764: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1765: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1767: /* insert into matrix */
1768: jj = rstart;
1769: smycols = mycols;
1770: svals = vals;
1771: for (i=0; i<m; i++) {
1772: ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1773: smycols += ourlens[i];
1774: svals += ourlens[i];
1775: jj++;
1776: }
1777: }
1778: PetscFree(ourlens);
1779: PetscFree(vals);
1780: PetscFree(mycols);
1781: PetscFree(rowners);
1783: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1784: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1785: return(0);
1786: }
1787: EXTERN_C_END
1789: /*
1790: Not great since it makes two copies of the submatrix, first an SeqAIJ
1791: in local and then by concatenating the local matrices the end result.
1792: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1793: */
1794: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1795: {
1796: int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1797: int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1798: Mat *local,M,Mreuse;
1799: PetscScalar *vwork,*aa;
1800: MPI_Comm comm = mat->comm;
1801: Mat_SeqAIJ *aij;
1805: MPI_Comm_rank(comm,&rank);
1806: MPI_Comm_size(comm,&size);
1808: if (call == MAT_REUSE_MATRIX) {
1809: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
1810: if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1811: local = &Mreuse;
1812: ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
1813: } else {
1814: ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
1815: Mreuse = *local;
1816: ierr = PetscFree(local);
1817: }
1819: /*
1820: m - number of local rows
1821: n - number of columns (same on all processors)
1822: rstart - first row in new global matrix generated
1823: */
1824: MatGetSize(Mreuse,&m,&n);
1825: if (call == MAT_INITIAL_MATRIX) {
1826: aij = (Mat_SeqAIJ*)(Mreuse)->data;
1827: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1828: ii = aij->i;
1829: jj = aij->j;
1831: /*
1832: Determine the number of non-zeros in the diagonal and off-diagonal
1833: portions of the matrix in order to do correct preallocation
1834: */
1836: /* first get start and end of "diagonal" columns */
1837: if (csize == PETSC_DECIDE) {
1838: nlocal = n/size + ((n % size) > rank);
1839: } else {
1840: nlocal = csize;
1841: }
1842: ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
1843: rstart = rend - nlocal;
1844: if (rank == size - 1 && rend != n) {
1845: SETERRQ(1,"Local column sizes do not add up to total number of columns");
1846: }
1848: /* next, compute all the lengths */
1849: ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);
1850: olens = dlens + m;
1851: for (i=0; i<m; i++) {
1852: jend = ii[i+1] - ii[i];
1853: olen = 0;
1854: dlen = 0;
1855: for (j=0; j<jend; j++) {
1856: if (*jj < rstart || *jj >= rend) olen++;
1857: else dlen++;
1858: jj++;
1859: }
1860: olens[i] = olen;
1861: dlens[i] = dlen;
1862: }
1863: MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
1864: PetscFree(dlens);
1865: } else {
1866: int ml,nl;
1868: M = *newmat;
1869: MatGetLocalSize(M,&ml,&nl);
1870: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1871: MatZeroEntries(M);
1872: /*
1873: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1874: rather than the slower MatSetValues().
1875: */
1876: M->was_assembled = PETSC_TRUE;
1877: M->assembled = PETSC_FALSE;
1878: }
1879: MatGetOwnershipRange(M,&rstart,&rend);
1880: aij = (Mat_SeqAIJ*)(Mreuse)->data;
1881: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1882: ii = aij->i;
1883: jj = aij->j;
1884: aa = aij->a;
1885: for (i=0; i<m; i++) {
1886: row = rstart + i;
1887: nz = ii[i+1] - ii[i];
1888: cwork = jj; jj += nz;
1889: vwork = aa; aa += nz;
1890: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
1891: }
1893: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
1894: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
1895: *newmat = M;
1897: /* save submatrix used in processor for next request */
1898: if (call == MAT_INITIAL_MATRIX) {
1899: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
1900: PetscObjectDereference((PetscObject)Mreuse);
1901: }
1903: return(0);
1904: }
1906: /*@C
1907: MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1908: (the default parallel PETSc format). For good matrix assembly performance
1909: the user should preallocate the matrix storage by setting the parameters
1910: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
1911: performance can be increased by more than a factor of 50.
1913: Collective on MPI_Comm
1915: Input Parameters:
1916: + A - the matrix
1917: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
1918: (same value is used for all local rows)
1919: . d_nnz - array containing the number of nonzeros in the various rows of the
1920: DIAGONAL portion of the local submatrix (possibly different for each row)
1921: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1922: The size of this array is equal to the number of local rows, i.e 'm'.
1923: You must leave room for the diagonal entry even if it is zero.
1924: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
1925: submatrix (same value is used for all local rows).
1926: - o_nnz - array containing the number of nonzeros in the various rows of the
1927: OFF-DIAGONAL portion of the local submatrix (possibly different for
1928: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1929: structure. The size of this array is equal to the number
1930: of local rows, i.e 'm'.
1932: The AIJ format (also called the Yale sparse matrix format or
1933: compressed row storage), is fully compatible with standard Fortran 77
1934: storage. That is, the stored row and column indices can begin at
1935: either one (as in Fortran) or zero. See the users manual for details.
1937: The user MUST specify either the local or global matrix dimensions
1938: (possibly both).
1940: The parallel matrix is partitioned such that the first m0 rows belong to
1941: process 0, the next m1 rows belong to process 1, the next m2 rows belong
1942: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1944: The DIAGONAL portion of the local submatrix of a processor can be defined
1945: as the submatrix which is obtained by extraction the part corresponding
1946: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1947: first row that belongs to the processor, and r2 is the last row belonging
1948: to the this processor. This is a square mxm matrix. The remaining portion
1949: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1951: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1953: By default, this format uses inodes (identical nodes) when possible.
1954: We search for consecutive rows with the same nonzero structure, thereby
1955: reusing matrix information to achieve increased efficiency.
1957: Options Database Keys:
1958: + -mat_aij_no_inode - Do not use inodes
1959: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1960: - -mat_aij_oneindex - Internally use indexing starting at 1
1961: rather than 0. Note that when calling MatSetValues(),
1962: the user still MUST index entries starting at 0!
1964: Example usage:
1965:
1966: Consider the following 8x8 matrix with 34 non-zero values, that is
1967: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1968: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1969: as follows:
1971: .vb
1972: 1 2 0 | 0 3 0 | 0 4
1973: Proc0 0 5 6 | 7 0 0 | 8 0
1974: 9 0 10 | 11 0 0 | 12 0
1975: -------------------------------------
1976: 13 0 14 | 15 16 17 | 0 0
1977: Proc1 0 18 0 | 19 20 21 | 0 0
1978: 0 0 0 | 22 23 0 | 24 0
1979: -------------------------------------
1980: Proc2 25 26 27 | 0 0 28 | 29 0
1981: 30 0 0 | 31 32 33 | 0 34
1982: .ve
1984: This can be represented as a collection of submatrices as:
1986: .vb
1987: A B C
1988: D E F
1989: G H I
1990: .ve
1992: Where the submatrices A,B,C are owned by proc0, D,E,F are
1993: owned by proc1, G,H,I are owned by proc2.
1995: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1996: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1997: The 'M','N' parameters are 8,8, and have the same values on all procs.
1999: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2000: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2001: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2002: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2003: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2004: matrix, ans [DF] as another SeqAIJ matrix.
2006: When d_nz, o_nz parameters are specified, d_nz storage elements are
2007: allocated for every row of the local diagonal submatrix, and o_nz
2008: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2009: One way to choose d_nz and o_nz is to use the max nonzerors per local
2010: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2011: In this case, the values of d_nz,o_nz are:
2012: .vb
2013: proc0 : dnz = 2, o_nz = 2
2014: proc1 : dnz = 3, o_nz = 2
2015: proc2 : dnz = 1, o_nz = 4
2016: .ve
2017: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2018: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2019: for proc3. i.e we are using 12+15+10=37 storage locations to store
2020: 34 values.
2022: When d_nnz, o_nnz parameters are specified, the storage is specified
2023: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2024: In the above case the values for d_nnz,o_nnz are:
2025: .vb
2026: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2027: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2028: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2029: .ve
2030: Here the space allocated is sum of all the above values i.e 34, and
2031: hence pre-allocation is perfect.
2033: Level: intermediate
2035: .keywords: matrix, aij, compressed row, sparse, parallel
2037: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2038: @*/
2039: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2040: {
2041: Mat_MPIAIJ *b;
2042: int ierr,i;
2043: PetscTruth flg2;
2046: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2047: if (!flg2) return(0);
2048: B->preallocated = PETSC_TRUE;
2049: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2050: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2051: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2052: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2053: if (d_nnz) {
2054: for (i=0; i<B->m; i++) {
2055: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2056: }
2057: }
2058: if (o_nnz) {
2059: for (i=0; i<B->m; i++) {
2060: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2061: }
2062: }
2063: b = (Mat_MPIAIJ*)B->data;
2065: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2066: PetscLogObjectParent(B,b->A);
2067: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2068: PetscLogObjectParent(B,b->B);
2070: return(0);
2071: }
2073: /*@C
2074: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2075: (the default parallel PETSc format). For good matrix assembly performance
2076: the user should preallocate the matrix storage by setting the parameters
2077: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2078: performance can be increased by more than a factor of 50.
2080: Collective on MPI_Comm
2082: Input Parameters:
2083: + comm - MPI communicator
2084: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2085: This value should be the same as the local size used in creating the
2086: y vector for the matrix-vector product y = Ax.
2087: . n - This value should be the same as the local size used in creating the
2088: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2089: calculated if N is given) For square matrices n is almost always m.
2090: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2091: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2092: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2093: (same value is used for all local rows)
2094: . d_nnz - array containing the number of nonzeros in the various rows of the
2095: DIAGONAL portion of the local submatrix (possibly different for each row)
2096: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2097: The size of this array is equal to the number of local rows, i.e 'm'.
2098: You must leave room for the diagonal entry even if it is zero.
2099: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2100: submatrix (same value is used for all local rows).
2101: - o_nnz - array containing the number of nonzeros in the various rows of the
2102: OFF-DIAGONAL portion of the local submatrix (possibly different for
2103: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2104: structure. The size of this array is equal to the number
2105: of local rows, i.e 'm'.
2107: Output Parameter:
2108: . A - the matrix
2110: Notes:
2111: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2112: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2113: storage requirements for this matrix.
2115: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2116: processor than it must be used on all processors that share the object for
2117: that argument.
2119: The AIJ format (also called the Yale sparse matrix format or
2120: compressed row storage), is fully compatible with standard Fortran 77
2121: storage. That is, the stored row and column indices can begin at
2122: either one (as in Fortran) or zero. See the users manual for details.
2124: The user MUST specify either the local or global matrix dimensions
2125: (possibly both).
2127: The parallel matrix is partitioned such that the first m0 rows belong to
2128: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2129: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2131: The DIAGONAL portion of the local submatrix of a processor can be defined
2132: as the submatrix which is obtained by extraction the part corresponding
2133: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2134: first row that belongs to the processor, and r2 is the last row belonging
2135: to the this processor. This is a square mxm matrix. The remaining portion
2136: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2138: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2140: By default, this format uses inodes (identical nodes) when possible.
2141: We search for consecutive rows with the same nonzero structure, thereby
2142: reusing matrix information to achieve increased efficiency.
2144: Options Database Keys:
2145: + -mat_aij_no_inode - Do not use inodes
2146: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2147: - -mat_aij_oneindex - Internally use indexing starting at 1
2148: rather than 0. Note that when calling MatSetValues(),
2149: the user still MUST index entries starting at 0!
2152: Example usage:
2153:
2154: Consider the following 8x8 matrix with 34 non-zero values, that is
2155: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2156: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2157: as follows:
2159: .vb
2160: 1 2 0 | 0 3 0 | 0 4
2161: Proc0 0 5 6 | 7 0 0 | 8 0
2162: 9 0 10 | 11 0 0 | 12 0
2163: -------------------------------------
2164: 13 0 14 | 15 16 17 | 0 0
2165: Proc1 0 18 0 | 19 20 21 | 0 0
2166: 0 0 0 | 22 23 0 | 24 0
2167: -------------------------------------
2168: Proc2 25 26 27 | 0 0 28 | 29 0
2169: 30 0 0 | 31 32 33 | 0 34
2170: .ve
2172: This can be represented as a collection of submatrices as:
2174: .vb
2175: A B C
2176: D E F
2177: G H I
2178: .ve
2180: Where the submatrices A,B,C are owned by proc0, D,E,F are
2181: owned by proc1, G,H,I are owned by proc2.
2183: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2184: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2185: The 'M','N' parameters are 8,8, and have the same values on all procs.
2187: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2188: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2189: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2190: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2191: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2192: matrix, ans [DF] as another SeqAIJ matrix.
2194: When d_nz, o_nz parameters are specified, d_nz storage elements are
2195: allocated for every row of the local diagonal submatrix, and o_nz
2196: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2197: One way to choose d_nz and o_nz is to use the max nonzerors per local
2198: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2199: In this case, the values of d_nz,o_nz are:
2200: .vb
2201: proc0 : dnz = 2, o_nz = 2
2202: proc1 : dnz = 3, o_nz = 2
2203: proc2 : dnz = 1, o_nz = 4
2204: .ve
2205: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2206: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2207: for proc3. i.e we are using 12+15+10=37 storage locations to store
2208: 34 values.
2210: When d_nnz, o_nnz parameters are specified, the storage is specified
2211: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2212: In the above case the values for d_nnz,o_nnz are:
2213: .vb
2214: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2215: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2216: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2217: .ve
2218: Here the space allocated is sum of all the above values i.e 34, and
2219: hence pre-allocation is perfect.
2221: Level: intermediate
2223: .keywords: matrix, aij, compressed row, sparse, parallel
2225: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2226: @*/
2227: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2228: {
2229: int ierr,size;
2232: MatCreate(comm,m,n,M,N,A);
2233: MPI_Comm_size(comm,&size);
2234: if (size > 1) {
2235: MatSetType(*A,MATMPIAIJ);
2236: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2237: } else {
2238: MatSetType(*A,MATSEQAIJ);
2239: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2240: }
2241: return(0);
2242: }
2244: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2245: {
2246: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2248: *Ad = a->A;
2249: *Ao = a->B;
2250: *colmap = a->garray;
2251: return(0);
2252: }
2254: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2255: {
2256: int ierr;
2257: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2260: if (coloring->ctype == IS_COLORING_LOCAL) {
2261: int *allcolors,*colors,i;
2262: ISColoring ocoloring;
2264: /* set coloring for diagonal portion */
2265: MatSetColoring_SeqAIJ(a->A,coloring);
2267: /* set coloring for off-diagonal portion */
2268: ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2269: PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2270: for (i=0; i<a->B->n; i++) {
2271: colors[i] = allcolors[a->garray[i]];
2272: }
2273: PetscFree(allcolors);
2274: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2275: MatSetColoring_SeqAIJ(a->B,ocoloring);
2276: ISColoringDestroy(ocoloring);
2277: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2278: int *colors,i,*larray;
2279: ISColoring ocoloring;
2281: /* set coloring for diagonal portion */
2282: PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2283: for (i=0; i<a->A->n; i++) {
2284: larray[i] = i + a->cstart;
2285: }
2286: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2287: PetscMalloc((a->A->n+1)*sizeof(int),&colors);
2288: for (i=0; i<a->A->n; i++) {
2289: colors[i] = coloring->colors[larray[i]];
2290: }
2291: PetscFree(larray);
2292: ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2293: MatSetColoring_SeqAIJ(a->A,ocoloring);
2294: ISColoringDestroy(ocoloring);
2296: /* set coloring for off-diagonal portion */
2297: PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2298: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2299: PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2300: for (i=0; i<a->B->n; i++) {
2301: colors[i] = coloring->colors[larray[i]];
2302: }
2303: PetscFree(larray);
2304: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2305: MatSetColoring_SeqAIJ(a->B,ocoloring);
2306: ISColoringDestroy(ocoloring);
2307: } else {
2308: SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2309: }
2311: return(0);
2312: }
2314: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2315: {
2316: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2317: int ierr;
2320: MatSetValuesAdic_SeqAIJ(a->A,advalues);
2321: MatSetValuesAdic_SeqAIJ(a->B,advalues);
2322: return(0);
2323: }
2325: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2326: {
2327: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2328: int ierr;
2331: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2332: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2333: return(0);
2334: }