Actual source code: mpiaij.c

  1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/

 3:  #include src/mat/impls/aij/mpi/mpiaij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/spops.h

  7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
  8: EXTERN int DisAssemble_MPIAIJ(Mat);
  9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
 10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
 11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
 12: EXTERN int MatPrintHelp_SeqAIJ(Mat);
 13: EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat);
 14: EXTERN int MatUseSpooles_MPIAIJ(Mat);

 16: /* 
 17:   Local utility routine that creates a mapping from the global column 
 18: number to the local number in the off-diagonal part of the local 
 19: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
 20: a slightly higher hash table cost; without it it is not scalable (each processor
 21: has an order N integer array but is fast to acess.
 22: */
 23: int CreateColmap_MPIAIJ_Private(Mat mat)
 24: {
 25:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
 26:   int        n = aij->B->n,i,ierr;

 29: #if defined (PETSC_USE_CTABLE)
 30:   PetscTableCreate(n,&aij->colmap);
 31:   for (i=0; i<n; i++){
 32:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 33:   }
 34: #else
 35:   PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
 36:   PetscLogObjectMemory(mat,mat->N*sizeof(int));
 37:   PetscMemzero(aij->colmap,mat->N*sizeof(int));
 38:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 39: #endif
 40:   return(0);
 41: }

 43: #define CHUNKSIZE   15
 44: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) 
 45: { 
 46:  
 47:     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
 48:     rmax = aimax[row]; nrow = ailen[row];  
 49:     col1 = col - shift; 
 50:      
 51:     low = 0; high = nrow; 
 52:     while (high-low > 5) { 
 53:       t = (low+high)/2; 
 54:       if (rp[t] > col) high = t; 
 55:       else             low  = t; 
 56:     } 
 57:       for (_i=low; _i<high; _i++) { 
 58:         if (rp[_i] > col1) break; 
 59:         if (rp[_i] == col1) { 
 60:           if (addv == ADD_VALUES) ap[_i] += value;   
 61:           else                  ap[_i] = value; 
 62:           goto a_noinsert; 
 63:         } 
 64:       }  
 65:       if (nonew == 1) goto a_noinsert; 
 66:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
 67:       if (nrow >= rmax) { 
 68:         /* there is no extra room in row, therefore enlarge */ 
 69:         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; 
 70:         PetscScalar *new_a; 
 71:  
 72:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
 73:  
 74:         /* malloc new storage space */ 
 75:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); 
 76:         ierr    = PetscMalloc(len,&new_a); 
 77:         new_j   = (int*)(new_a + new_nz); 
 78:         new_i   = new_j + new_nz; 
 79:  
 80:         /* copy over old data into new slots */ 
 81:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 
 82:         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 
 83:         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); 
 84:         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); 
 85:         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, 
 86:                                                            len*sizeof(int)); 
 87:         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); 
 88:         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, 
 89:                                                            len*sizeof(PetscScalar));  
 90:         /* free up old matrix storage */ 
 91:  
 92:         PetscFree(a->a);  
 93:         if (!a->singlemalloc) { 
 94:            PetscFree(a->i); 
 95:            PetscFree(a->j); 
 96:         } 
 97:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  
 98:         a->singlemalloc = PETSC_TRUE; 
 99:  
100:         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
101:         rmax = aimax[row] = aimax[row] + CHUNKSIZE; 
102:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); 
103:         a->maxnz += CHUNKSIZE; 
104:         a->reallocs++; 
105:       } 
106:       N = nrow++ - 1; a->nz++; 
107:       /* shift up all the later entries in this row */ 
108:       for (ii=N; ii>=_i; ii--) { 
109:         rp[ii+1] = rp[ii]; 
110:         ap[ii+1] = ap[ii]; 
111:       } 
112:       rp[_i] = col1;  
113:       ap[_i] = value;  
114:       a_noinsert: ; 
115:       ailen[row] = nrow; 
116: } 

118: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) 
119: { 
120:  
121:     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
122:     rmax = bimax[row]; nrow = bilen[row];  
123:     col1 = col - shift; 
124:      
125:     low = 0; high = nrow; 
126:     while (high-low > 5) { 
127:       t = (low+high)/2; 
128:       if (rp[t] > col) high = t; 
129:       else             low  = t; 
130:     } 
131:        for (_i=low; _i<high; _i++) { 
132:         if (rp[_i] > col1) break; 
133:         if (rp[_i] == col1) { 
134:           if (addv == ADD_VALUES) ap[_i] += value;   
135:           else                  ap[_i] = value; 
136:           goto b_noinsert; 
137:         } 
138:       }  
139:       if (nonew == 1) goto b_noinsert; 
140:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
141:       if (nrow >= rmax) { 
142:         /* there is no extra room in row, therefore enlarge */ 
143:         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; 
144:         PetscScalar *new_a; 
145:  
146:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
147:  
148:         /* malloc new storage space */ 
149:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); 
150:         ierr    = PetscMalloc(len,&new_a); 
151:         new_j   = (int*)(new_a + new_nz); 
152:         new_i   = new_j + new_nz; 
153:  
154:         /* copy over old data into new slots */ 
155:         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} 
156:         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} 
157:         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); 
158:         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); 
159:         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, 
160:                                                            len*sizeof(int)); 
161:         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); 
162:         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, 
163:                                                            len*sizeof(PetscScalar));  
164:         /* free up old matrix storage */ 
165:  
166:         PetscFree(b->a);  
167:         if (!b->singlemalloc) { 
168:           PetscFree(b->i); 
169:           PetscFree(b->j); 
170:         } 
171:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  
172:         b->singlemalloc = PETSC_TRUE; 
173:  
174:         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
175:         rmax = bimax[row] = bimax[row] + CHUNKSIZE; 
176:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); 
177:         b->maxnz += CHUNKSIZE; 
178:         b->reallocs++; 
179:       } 
180:       N = nrow++ - 1; b->nz++; 
181:       /* shift up all the later entries in this row */ 
182:       for (ii=N; ii>=_i; ii--) { 
183:         rp[ii+1] = rp[ii]; 
184:         ap[ii+1] = ap[ii]; 
185:       } 
186:       rp[_i] = col1;  
187:       ap[_i] = value;  
188:       b_noinsert: ; 
189:       bilen[row] = nrow; 
190: }

192: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
193: {
194:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
195:   PetscScalar  value;
196:   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
197:   int          cstart = aij->cstart,cend = aij->cend,row,col;
198:   PetscTruth   roworiented = aij->roworiented;

200:   /* Some Variables required in the macro */
201:   Mat          A = aij->A;
202:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
203:   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
204:   PetscScalar  *aa = a->a;
205:   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
206:   Mat          B = aij->B;
207:   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
208:   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
209:   PetscScalar  *ba = b->a;

211:   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
212:   int          nonew = a->nonew,shift = a->indexshift;
213:   PetscScalar  *ap;

216:   for (i=0; i<m; i++) {
217:     if (im[i] < 0) continue;
218: #if defined(PETSC_USE_BOPT_g)
219:     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
220: #endif
221:     if (im[i] >= rstart && im[i] < rend) {
222:       row = im[i] - rstart;
223:       for (j=0; j<n; j++) {
224:         if (in[j] >= cstart && in[j] < cend){
225:           col = in[j] - cstart;
226:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227:           if (ignorezeroentries && value == 0.0) continue;
228:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
229:           /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
230:         } else if (in[j] < 0) continue;
231: #if defined(PETSC_USE_BOPT_g)
232:         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
233: #endif
234:         else {
235:           if (mat->was_assembled) {
236:             if (!aij->colmap) {
237:               CreateColmap_MPIAIJ_Private(mat);
238:             }
239: #if defined (PETSC_USE_CTABLE)
240:             PetscTableFind(aij->colmap,in[j]+1,&col);
241:             col--;
242: #else
243:             col = aij->colmap[in[j]] - 1;
244: #endif
245:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246:               DisAssemble_MPIAIJ(mat);
247:               col =  in[j];
248:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249:               B = aij->B;
250:               b = (Mat_SeqAIJ*)B->data;
251:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252:               ba = b->a;
253:             }
254:           } else col = in[j];
255:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256:           if (ignorezeroentries && value == 0.0) continue;
257:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
258:           /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
259:         }
260:       }
261:     } else {
262:       if (!aij->donotstash) {
263:         if (roworiented) {
264:           if (ignorezeroentries && v[i*n] == 0.0) continue;
265:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
266:         } else {
267:           if (ignorezeroentries && v[i] == 0.0) continue;
268:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
269:         }
270:       }
271:     }
272:   }
273:   return(0);
274: }

276: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
277: {
278:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
279:   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
280:   int        cstart = aij->cstart,cend = aij->cend,row,col;

283:   for (i=0; i<m; i++) {
284:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
285:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
286:     if (idxm[i] >= rstart && idxm[i] < rend) {
287:       row = idxm[i] - rstart;
288:       for (j=0; j<n; j++) {
289:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
290:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
291:         if (idxn[j] >= cstart && idxn[j] < cend){
292:           col = idxn[j] - cstart;
293:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
294:         } else {
295:           if (!aij->colmap) {
296:             CreateColmap_MPIAIJ_Private(mat);
297:           }
298: #if defined (PETSC_USE_CTABLE)
299:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
300:           col --;
301: #else
302:           col = aij->colmap[idxn[j]] - 1;
303: #endif
304:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305:           else {
306:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
307:           }
308:         }
309:       }
310:     } else {
311:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312:     }
313:   }
314:   return(0);
315: }

317: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318: {
319:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320:   int         ierr,nstash,reallocs;
321:   InsertMode  addv;

324:   if (aij->donotstash) {
325:     return(0);
326:   }

328:   /* make sure all processors are either in INSERTMODE or ADDMODE */
329:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
330:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
331:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
332:   }
333:   mat->insertmode = addv; /* in case this processor had no cache */

335:   MatStashScatterBegin_Private(&mat->stash,aij->rowners);
336:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
337:   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
338:   return(0);
339: }


342: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343: {
344:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
345:   int         i,j,rstart,ncols,n,ierr,flg;
346:   int         *row,*col,other_disassembled;
347:   PetscScalar *val;
348:   InsertMode  addv = mat->insertmode;
349: #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 
350:   PetscTruth  flag;
351: #endif

354:   if (!aij->donotstash) {
355:     while (1) {
356:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
357:       if (!flg) break;

359:       for (i=0; i<n;) {
360:         /* Now identify the consecutive vals belonging to the same row */
361:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
362:         if (j < n) ncols = j-i;
363:         else       ncols = n-i;
364:         /* Now assemble all these values with a single function call */
365:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
366:         i = j;
367:       }
368:     }
369:     MatStashScatterEnd_Private(&mat->stash);
370:   }
371: 
372:   MatAssemblyBegin(aij->A,mode);
373:   MatAssemblyEnd(aij->A,mode);

375:   /* determine if any processor has disassembled, if so we must 
376:      also disassemble ourselfs, in order that we may reassemble. */
377:   /*
378:      if nonzero structure of submatrix B cannot change then we know that
379:      no processor disassembled thus we can skip this stuff
380:   */
381:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
382:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
383:     if (mat->was_assembled && !other_disassembled) {
384:       DisAssemble_MPIAIJ(mat);
385:     }
386:   }

388:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
389:     MatSetUpMultiply_MPIAIJ(mat);
390:   }
391:   MatAssemblyBegin(aij->B,mode);
392:   MatAssemblyEnd(aij->B,mode);

394:   if (aij->rowvalues) {
395:     PetscFree(aij->rowvalues);
396:     aij->rowvalues = 0;
397:   }
398: #if defined(PETSC_HAVE_SUPERLUDIST) 
399:   PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);
400:   if (flag) { MatUseSuperLU_DIST_MPIAIJ(mat); }
401: #endif 

403: #if defined(PETSC_HAVE_SPOOLES) 
404:   PetscOptionsHasName(PETSC_NULL,"-mat_aij_spooles",&flag);
405:   if (flag) { MatUseSpooles_MPIAIJ(mat); }
406: #endif 
407:   return(0);
408: }

410: int MatZeroEntries_MPIAIJ(Mat A)
411: {
412:   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413:   int        ierr;

416:   MatZeroEntries(l->A);
417:   MatZeroEntries(l->B);
418:   return(0);
419: }

421: int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
422: {
423:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
425:   int            *procs,*nprocs,j,idx,nsends,*work,row;
426:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
427:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
428:   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
429:   MPI_Comm       comm = A->comm;
430:   MPI_Request    *send_waits,*recv_waits;
431:   MPI_Status     recv_status,*send_status;
432:   IS             istmp;
433:   PetscTruth     found;

436:   ISGetLocalSize(is,&N);
437:   ISGetIndices(is,&rows);

439:   /*  first count number of contributors to each processor */
440:   PetscMalloc(2*size*sizeof(int),&nprocs);
441:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
442:   procs  = nprocs + size;
443:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
444:   for (i=0; i<N; i++) {
445:     idx = rows[i];
446:     found = PETSC_FALSE;
447:     for (j=0; j<size; j++) {
448:       if (idx >= owners[j] && idx < owners[j+1]) {
449:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
450:       }
451:     }
452:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
453:   }
454:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}

456:   /* inform other processors of number of messages and max length*/
457:   PetscMalloc(2*size*sizeof(int),&work);
458:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
459:   nrecvs = work[size+rank];
460:   nmax   = work[rank];
461:   ierr   = PetscFree(work);

463:   /* post receives:   */
464:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
465:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
466:   for (i=0; i<nrecvs; i++) {
467:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
468:   }

470:   /* do sends:
471:       1) starts[i] gives the starting index in svalues for stuff going to 
472:          the ith processor
473:   */
474:   PetscMalloc((N+1)*sizeof(int),&svalues);
475:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
476:   PetscMalloc((size+1)*sizeof(int),&starts);
477:   starts[0] = 0;
478:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
479:   for (i=0; i<N; i++) {
480:     svalues[starts[owner[i]]++] = rows[i];
481:   }
482:   ISRestoreIndices(is,&rows);

484:   starts[0] = 0;
485:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
486:   count = 0;
487:   for (i=0; i<size; i++) {
488:     if (procs[i]) {
489:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
490:     }
491:   }
492:   PetscFree(starts);

494:   base = owners[rank];

496:   /*  wait on receives */
497:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
498:   source = lens + nrecvs;
499:   count  = nrecvs; slen = 0;
500:   while (count) {
501:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
502:     /* unpack receives into our local space */
503:     MPI_Get_count(&recv_status,MPI_INT,&n);
504:     source[imdex]  = recv_status.MPI_SOURCE;
505:     lens[imdex]    = n;
506:     slen          += n;
507:     count--;
508:   }
509:   PetscFree(recv_waits);
510: 
511:   /* move the data into the send scatter */
512:   PetscMalloc((slen+1)*sizeof(int),&lrows);
513:   count = 0;
514:   for (i=0; i<nrecvs; i++) {
515:     values = rvalues + i*nmax;
516:     for (j=0; j<lens[i]; j++) {
517:       lrows[count++] = values[j] - base;
518:     }
519:   }
520:   PetscFree(rvalues);
521:   PetscFree(lens);
522:   PetscFree(owner);
523:   PetscFree(nprocs);
524: 
525:   /* actually zap the local rows */
526:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
527:   PetscLogObjectParent(A,istmp);

529:   /*
530:         Zero the required rows. If the "diagonal block" of the matrix
531:      is square and the user wishes to set the diagonal we use seperate
532:      code so that MatSetValues() is not called for each diagonal allocating
533:      new memory, thus calling lots of mallocs and slowing things down.

535:        Contributed by: Mathew Knepley
536:   */
537:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538:   MatZeroRows(l->B,istmp,0);
539:   if (diag && (l->A->M == l->A->N)) {
540:     ierr      = MatZeroRows(l->A,istmp,diag);
541:   } else if (diag) {
542:     MatZeroRows(l->A,istmp,0);
543:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
545: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546:     }
547:     for (i = 0; i < slen; i++) {
548:       row  = lrows[i] + rstart;
549:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
550:     }
551:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
552:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
553:   } else {
554:     MatZeroRows(l->A,istmp,0);
555:   }
556:   ISDestroy(istmp);
557:   PetscFree(lrows);

559:   /* wait on sends */
560:   if (nsends) {
561:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
562:     MPI_Waitall(nsends,send_waits,send_status);
563:     PetscFree(send_status);
564:   }
565:   PetscFree(send_waits);
566:   PetscFree(svalues);

568:   return(0);
569: }

571: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572: {
573:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574:   int        ierr,nt;

577:   VecGetLocalSize(xx,&nt);
578:   if (nt != A->n) {
579:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
580:   }
581:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
582:   (*a->A->ops->mult)(a->A,xx,yy);
583:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
584:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
585:   return(0);
586: }

588: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
589: {
590:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
591:   int        ierr;

594:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
595:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
596:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
597:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
598:   return(0);
599: }

601: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602: {
603:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604:   int        ierr;

607:   /* do nondiagonal part */
608:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
609:   /* send it on its way */
610:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
611:   /* do local part */
612:   (*a->A->ops->multtranspose)(a->A,xx,yy);
613:   /* receive remote parts: note this assumes the values are not actually */
614:   /* inserted in yy until the next line, which is true for my implementation*/
615:   /* but is not perhaps always true. */
616:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
617:   return(0);
618: }

620: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
621: {
622:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
623:   int        ierr;

626:   /* do nondiagonal part */
627:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
628:   /* send it on its way */
629:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
630:   /* do local part */
631:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
632:   /* receive remote parts: note this assumes the values are not actually */
633:   /* inserted in yy until the next line, which is true for my implementation*/
634:   /* but is not perhaps always true. */
635:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
636:   return(0);
637: }

639: /*
640:   This only works correctly for square matrices where the subblock A->A is the 
641:    diagonal block
642: */
643: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
644: {
645:   int        ierr;
646:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

649:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
650:   if (a->rstart != a->cstart || a->rend != a->cend) {
651:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
652:   }
653:   MatGetDiagonal(a->A,v);
654:   return(0);
655: }

657: int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
658: {
659:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
660:   int        ierr;

663:   MatScale(aa,a->A);
664:   MatScale(aa,a->B);
665:   return(0);
666: }

668: int MatDestroy_MPIAIJ(Mat mat)
669: {
670:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
671:   int        ierr;

674: #if defined(PETSC_USE_LOG)
675:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
676: #endif
677:   MatStashDestroy_Private(&mat->stash);
678:   PetscFree(aij->rowners);
679:   MatDestroy(aij->A);
680:   MatDestroy(aij->B);
681: #if defined (PETSC_USE_CTABLE)
682:   if (aij->colmap) {PetscTableDelete(aij->colmap);}
683: #else
684:   if (aij->colmap) {PetscFree(aij->colmap);}
685: #endif
686:   if (aij->garray) {PetscFree(aij->garray);}
687:   if (aij->lvec)   {VecDestroy(aij->lvec);}
688:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
689:   if (aij->rowvalues) {PetscFree(aij->rowvalues);}
690:   PetscFree(aij);
691:   return(0);
692: }

694: extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
695: extern int MatFactorInfo_Spooles(Mat,PetscViewer);

697: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
698: {
699:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
700:   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
701:   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
702:   PetscTruth        isdraw,isascii,flg;
703:   PetscViewer       sviewer;
704:   PetscViewerFormat format;

707:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
708:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
709:   if (isascii) {
710:     PetscViewerGetFormat(viewer,&format);
711:     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
712:       MatInfo info;
713:       MPI_Comm_rank(mat->comm,&rank);
714:       MatGetInfo(mat,MAT_LOCAL,&info);
715:       PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
716:       if (flg) {
717:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
718:                                               rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
719:       } else {
720:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
721:                     rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
722:       }
723:       MatGetInfo(aij->A,MAT_LOCAL,&info);
724:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
725:       MatGetInfo(aij->B,MAT_LOCAL,&info);
726:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
727:       PetscViewerFlush(viewer);
728:       VecScatterView(aij->Mvctx,viewer);
729:       return(0);
730:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
731:       return(0);
732:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
733: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
734:       MatMPIAIJFactorInfo_SuperLu(mat,viewer);
735: #endif
736: #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
737:       MatFactorInfo_Spooles(mat,viewer);
738: #endif
739:       return(0);
740:     }
741:   } else if (isdraw) {
742:     PetscDraw       draw;
743:     PetscTruth isnull;
744:     PetscViewerDrawGetDraw(viewer,0,&draw);
745:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
746:   }

748:   if (size == 1) {
749:     PetscObjectSetName((PetscObject)aij->A,mat->name);
750:     MatView(aij->A,viewer);
751:   } else {
752:     /* assemble the entire matrix onto first processor. */
753:     Mat         A;
754:     Mat_SeqAIJ *Aloc;
755:     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
756:     PetscScalar *a;

758:     if (!rank) {
759:       MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
760:     } else {
761:       MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
762:     }
763:     PetscLogObjectParent(mat,A);

765:     /* copy over the A part */
766:     Aloc = (Mat_SeqAIJ*)aij->A->data;
767:     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
768:     row = aij->rstart;
769:     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
770:     for (i=0; i<m; i++) {
771:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
772:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
773:     }
774:     aj = Aloc->j;
775:     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}

777:     /* copy over the B part */
778:     Aloc = (Mat_SeqAIJ*)aij->B->data;
779:     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
780:     row  = aij->rstart;
781:     PetscMalloc((ai[m]+1)*sizeof(int),&cols);
782:     ct   = cols;
783:     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
784:     for (i=0; i<m; i++) {
785:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
786:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
787:     }
788:     PetscFree(ct);
789:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
790:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
791:     /* 
792:        Everyone has to call to draw the matrix since the graphics waits are
793:        synchronized across all processors that share the PetscDraw object
794:     */
795:     PetscViewerGetSingleton(viewer,&sviewer);
796:     if (!rank) {
797:       PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
798:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
799:     }
800:     PetscViewerRestoreSingleton(viewer,&sviewer);
801:     MatDestroy(A);
802:   }
803:   return(0);
804: }

806: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
807: {
808:   int        ierr;
809:   PetscTruth isascii,isdraw,issocket,isbinary;
810: 
812:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
813:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
814:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
815:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
816:   if (isascii || isdraw || isbinary || issocket) {
817:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
818:   } else {
819:     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
820:   }
821:   return(0);
822: }



826: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
827: {
828:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
829:   int          ierr;
830:   Vec          bb1;
831:   PetscScalar  mone=-1.0;

834:   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

836:   VecDuplicate(bb,&bb1);

838:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
839:     if (flag & SOR_ZERO_INITIAL_GUESS) {
840:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
841:       its--;
842:     }
843: 
844:     while (its--) {
845:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
846:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

848:       /* update rhs: bb1 = bb - B*x */
849:       VecScale(&mone,mat->lvec);
850:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

852:       /* local sweep */
853:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
854: 
855:     }
856:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
857:     if (flag & SOR_ZERO_INITIAL_GUESS) {
858:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
859:       its--;
860:     }
861:     while (its--) {
862:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
863:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

865:       /* update rhs: bb1 = bb - B*x */
866:       VecScale(&mone,mat->lvec);
867:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

869:       /* local sweep */
870:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
871: 
872:     }
873:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
874:     if (flag & SOR_ZERO_INITIAL_GUESS) {
875:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
876:       its--;
877:     }
878:     while (its--) {
879:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
880:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

882:       /* update rhs: bb1 = bb - B*x */
883:       VecScale(&mone,mat->lvec);
884:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

886:       /* local sweep */
887:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
888: 
889:     }
890:   } else {
891:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
892:   }

894:   VecDestroy(bb1);
895:   return(0);
896: }

898: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
899: {
900:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
901:   Mat        A = mat->A,B = mat->B;
902:   int        ierr;
903:   PetscReal  isend[5],irecv[5];

906:   info->block_size     = 1.0;
907:   MatGetInfo(A,MAT_LOCAL,info);
908:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
909:   isend[3] = info->memory;  isend[4] = info->mallocs;
910:   MatGetInfo(B,MAT_LOCAL,info);
911:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
912:   isend[3] += info->memory;  isend[4] += info->mallocs;
913:   if (flag == MAT_LOCAL) {
914:     info->nz_used      = isend[0];
915:     info->nz_allocated = isend[1];
916:     info->nz_unneeded  = isend[2];
917:     info->memory       = isend[3];
918:     info->mallocs      = isend[4];
919:   } else if (flag == MAT_GLOBAL_MAX) {
920:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
921:     info->nz_used      = irecv[0];
922:     info->nz_allocated = irecv[1];
923:     info->nz_unneeded  = irecv[2];
924:     info->memory       = irecv[3];
925:     info->mallocs      = irecv[4];
926:   } else if (flag == MAT_GLOBAL_SUM) {
927:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
928:     info->nz_used      = irecv[0];
929:     info->nz_allocated = irecv[1];
930:     info->nz_unneeded  = irecv[2];
931:     info->memory       = irecv[3];
932:     info->mallocs      = irecv[4];
933:   }
934:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
935:   info->fill_ratio_needed = 0;
936:   info->factor_mallocs    = 0;
937:   info->rows_global       = (double)matin->M;
938:   info->columns_global    = (double)matin->N;
939:   info->rows_local        = (double)matin->m;
940:   info->columns_local     = (double)matin->N;

942:   return(0);
943: }

945: int MatSetOption_MPIAIJ(Mat A,MatOption op)
946: {
947:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
948:   int        ierr;

951:   switch (op) {
952:   case MAT_NO_NEW_NONZERO_LOCATIONS:
953:   case MAT_YES_NEW_NONZERO_LOCATIONS:
954:   case MAT_COLUMNS_UNSORTED:
955:   case MAT_COLUMNS_SORTED:
956:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
957:   case MAT_KEEP_ZEROED_ROWS:
958:   case MAT_NEW_NONZERO_LOCATION_ERR:
959:   case MAT_USE_INODES:
960:   case MAT_DO_NOT_USE_INODES:
961:   case MAT_IGNORE_ZERO_ENTRIES:
962:     MatSetOption(a->A,op);
963:     MatSetOption(a->B,op);
964:     break;
965:   case MAT_ROW_ORIENTED:
966:     a->roworiented = PETSC_TRUE;
967:     MatSetOption(a->A,op);
968:     MatSetOption(a->B,op);
969:     break;
970:   case MAT_ROWS_SORTED:
971:   case MAT_ROWS_UNSORTED:
972:   case MAT_YES_NEW_DIAGONALS:
973:   case MAT_USE_SINGLE_PRECISION_SOLVES:
974:     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
975:     break;
976:   case MAT_COLUMN_ORIENTED:
977:     a->roworiented = PETSC_FALSE;
978:     MatSetOption(a->A,op);
979:     MatSetOption(a->B,op);
980:     break;
981:   case MAT_IGNORE_OFF_PROC_ENTRIES:
982:     a->donotstash = PETSC_TRUE;
983:     break;
984:   case MAT_NO_NEW_DIAGONALS:
985:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
986:   default:
987:     SETERRQ(PETSC_ERR_SUP,"unknown option");
988:   }
989:   return(0);
990: }

992: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
993: {
994:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
995:   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
996:   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
997:   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
998:   int          *cmap,*idx_p;

1001:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1002:   mat->getrowactive = PETSC_TRUE;

1004:   if (!mat->rowvalues && (idx || v)) {
1005:     /*
1006:         allocate enough space to hold information from the longest row.
1007:     */
1008:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1009:     int     max = 1,tmp;
1010:     for (i=0; i<matin->m; i++) {
1011:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1012:       if (max < tmp) { max = tmp; }
1013:     }
1014:     PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1015:     mat->rowindices = (int*)(mat->rowvalues + max);
1016:   }

1018:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1019:   lrow = row - rstart;

1021:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1022:   if (!v)   {pvA = 0; pvB = 0;}
1023:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1024:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1025:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1026:   nztot = nzA + nzB;

1028:   cmap  = mat->garray;
1029:   if (v  || idx) {
1030:     if (nztot) {
1031:       /* Sort by increasing column numbers, assuming A and B already sorted */
1032:       int imark = -1;
1033:       if (v) {
1034:         *v = v_p = mat->rowvalues;
1035:         for (i=0; i<nzB; i++) {
1036:           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1037:           else break;
1038:         }
1039:         imark = i;
1040:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1041:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1042:       }
1043:       if (idx) {
1044:         *idx = idx_p = mat->rowindices;
1045:         if (imark > -1) {
1046:           for (i=0; i<imark; i++) {
1047:             idx_p[i] = cmap[cworkB[i]];
1048:           }
1049:         } else {
1050:           for (i=0; i<nzB; i++) {
1051:             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1052:             else break;
1053:           }
1054:           imark = i;
1055:         }
1056:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1057:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1058:       }
1059:     } else {
1060:       if (idx) *idx = 0;
1061:       if (v)   *v   = 0;
1062:     }
1063:   }
1064:   *nz = nztot;
1065:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1066:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1067:   return(0);
1068: }

1070: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1071: {
1072:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1075:   if (aij->getrowactive == PETSC_FALSE) {
1076:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1077:   }
1078:   aij->getrowactive = PETSC_FALSE;
1079:   return(0);
1080: }

1082: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1083: {
1084:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1085:   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1086:   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1087:   PetscReal    sum = 0.0;
1088:   PetscScalar  *v;

1091:   if (aij->size == 1) {
1092:      MatNorm(aij->A,type,norm);
1093:   } else {
1094:     if (type == NORM_FROBENIUS) {
1095:       v = amat->a;
1096:       for (i=0; i<amat->nz; i++) {
1097: #if defined(PETSC_USE_COMPLEX)
1098:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1099: #else
1100:         sum += (*v)*(*v); v++;
1101: #endif
1102:       }
1103:       v = bmat->a;
1104:       for (i=0; i<bmat->nz; i++) {
1105: #if defined(PETSC_USE_COMPLEX)
1106:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1107: #else
1108:         sum += (*v)*(*v); v++;
1109: #endif
1110:       }
1111:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1112:       *norm = sqrt(*norm);
1113:     } else if (type == NORM_1) { /* max column norm */
1114:       PetscReal *tmp,*tmp2;
1115:       int    *jj,*garray = aij->garray;
1116:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1117:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1118:       PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1119:       *norm = 0.0;
1120:       v = amat->a; jj = amat->j;
1121:       for (j=0; j<amat->nz; j++) {
1122:         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1123:       }
1124:       v = bmat->a; jj = bmat->j;
1125:       for (j=0; j<bmat->nz; j++) {
1126:         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1127:       }
1128:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1129:       for (j=0; j<mat->N; j++) {
1130:         if (tmp2[j] > *norm) *norm = tmp2[j];
1131:       }
1132:       PetscFree(tmp);
1133:       PetscFree(tmp2);
1134:     } else if (type == NORM_INFINITY) { /* max row norm */
1135:       PetscReal ntemp = 0.0;
1136:       for (j=0; j<aij->A->m; j++) {
1137:         v = amat->a + amat->i[j] + shift;
1138:         sum = 0.0;
1139:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1140:           sum += PetscAbsScalar(*v); v++;
1141:         }
1142:         v = bmat->a + bmat->i[j] + shift;
1143:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1144:           sum += PetscAbsScalar(*v); v++;
1145:         }
1146:         if (sum > ntemp) ntemp = sum;
1147:       }
1148:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1149:     } else {
1150:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1151:     }
1152:   }
1153:   return(0);
1154: }

1156: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1157: {
1158:   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1159:   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1160:   int          ierr,shift = Aloc->indexshift;
1161:   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1162:   Mat          B;
1163:   PetscScalar  *array;

1166:   if (!matout && M != N) {
1167:     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1168:   }

1170:   MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);

1172:   /* copy over the A part */
1173:   Aloc = (Mat_SeqAIJ*)a->A->data;
1174:   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1175:   row = a->rstart;
1176:   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1177:   for (i=0; i<m; i++) {
1178:     MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1179:     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1180:   }
1181:   aj = Aloc->j;
1182:   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}

1184:   /* copy over the B part */
1185:   Aloc = (Mat_SeqAIJ*)a->B->data;
1186:   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1187:   row  = a->rstart;
1188:   PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1189:   ct   = cols;
1190:   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1191:   for (i=0; i<m; i++) {
1192:     MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1193:     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1194:   }
1195:   PetscFree(ct);
1196:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1197:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1198:   if (matout) {
1199:     *matout = B;
1200:   } else {
1201:     MatHeaderCopy(A,B);
1202:   }
1203:   return(0);
1204: }

1206: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1207: {
1208:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1209:   Mat        a = aij->A,b = aij->B;
1210:   int        ierr,s1,s2,s3;

1213:   MatGetLocalSize(mat,&s2,&s3);
1214:   if (rr) {
1215:     VecGetLocalSize(rr,&s1);
1216:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1217:     /* Overlap communication with computation. */
1218:     VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1219:   }
1220:   if (ll) {
1221:     VecGetLocalSize(ll,&s1);
1222:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1223:     (*b->ops->diagonalscale)(b,ll,0);
1224:   }
1225:   /* scale  the diagonal block */
1226:   (*a->ops->diagonalscale)(a,ll,rr);

1228:   if (rr) {
1229:     /* Do a scatter end and then right scale the off-diagonal block */
1230:     VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1231:     (*b->ops->diagonalscale)(b,0,aij->lvec);
1232:   }
1233: 
1234:   return(0);
1235: }


1238: int MatPrintHelp_MPIAIJ(Mat A)
1239: {
1240:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1241:   int        ierr;

1244:   if (!a->rank) {
1245:     MatPrintHelp_SeqAIJ(a->A);
1246:   }
1247:   return(0);
1248: }

1250: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1251: {
1253:   *bs = 1;
1254:   return(0);
1255: }
1256: int MatSetUnfactored_MPIAIJ(Mat A)
1257: {
1258:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1259:   int        ierr;

1262:   MatSetUnfactored(a->A);
1263:   return(0);
1264: }

1266: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1267: {
1268:   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1269:   Mat        a,b,c,d;
1270:   PetscTruth flg;
1271:   int        ierr;

1274:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1275:   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1276:   a = matA->A; b = matA->B;
1277:   c = matB->A; d = matB->B;

1279:   MatEqual(a,c,&flg);
1280:   if (flg == PETSC_TRUE) {
1281:     MatEqual(b,d,&flg);
1282:   }
1283:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1284:   return(0);
1285: }

1287: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1288: {
1289:   int        ierr;
1290:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1291:   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1292:   PetscTruth flg;

1295:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1296:   if (str != SAME_NONZERO_PATTERN || !flg) {
1297:     /* because of the column compression in the off-processor part of the matrix a->B,
1298:        the number of columns in a->B and b->B may be different, hence we cannot call
1299:        the MatCopy() directly on the two parts. If need be, we can provide a more 
1300:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1301:        then copying the submatrices */
1302:     MatCopy_Basic(A,B,str);
1303:   } else {
1304:     MatCopy(a->A,b->A,str);
1305:     MatCopy(a->B,b->B,str);
1306:   }
1307:   return(0);
1308: }

1310: int MatSetUpPreallocation_MPIAIJ(Mat A)
1311: {
1312:   int        ierr;

1315:    MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1316:   return(0);
1317: }

1319: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1320: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1321: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1322: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1323: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1324: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1325: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1326: #endif

1328:  #include petscblaslapack.h

1330: int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1331: {
1332:   int        ierr,one;
1333:   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1334:   Mat_SeqAIJ *x,*y;

1337:   if (str == SAME_NONZERO_PATTERN) {
1338:     x  = (Mat_SeqAIJ *)xx->A->data;
1339:     y  = (Mat_SeqAIJ *)yy->A->data;
1340:     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1341:     x  = (Mat_SeqAIJ *)xx->B->data;
1342:     y  = (Mat_SeqAIJ *)yy->B->data;
1343:     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1344:   } else {
1345:     MatAXPY_Basic(a,X,Y,str);
1346:   }
1347:   return(0);
1348: }

1350: /* -------------------------------------------------------------------*/
1351: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1352:        MatGetRow_MPIAIJ,
1353:        MatRestoreRow_MPIAIJ,
1354:        MatMult_MPIAIJ,
1355:        MatMultAdd_MPIAIJ,
1356:        MatMultTranspose_MPIAIJ,
1357:        MatMultTransposeAdd_MPIAIJ,
1358:        0,
1359:        0,
1360:        0,
1361:        0,
1362:        0,
1363:        0,
1364:        MatRelax_MPIAIJ,
1365:        MatTranspose_MPIAIJ,
1366:        MatGetInfo_MPIAIJ,
1367:        MatEqual_MPIAIJ,
1368:        MatGetDiagonal_MPIAIJ,
1369:        MatDiagonalScale_MPIAIJ,
1370:        MatNorm_MPIAIJ,
1371:        MatAssemblyBegin_MPIAIJ,
1372:        MatAssemblyEnd_MPIAIJ,
1373:        0,
1374:        MatSetOption_MPIAIJ,
1375:        MatZeroEntries_MPIAIJ,
1376:        MatZeroRows_MPIAIJ,
1377: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1378:        MatLUFactorSymbolic_MPIAIJ_TFS,
1379: #else
1380:        0,
1381: #endif
1382:        0,
1383:        0,
1384:        0,
1385:        MatSetUpPreallocation_MPIAIJ,
1386:        0,
1387:        0,
1388:        0,
1389:        0,
1390:        MatDuplicate_MPIAIJ,
1391:        0,
1392:        0,
1393:        0,
1394:        0,
1395:        MatAXPY_MPIAIJ,
1396:        MatGetSubMatrices_MPIAIJ,
1397:        MatIncreaseOverlap_MPIAIJ,
1398:        MatGetValues_MPIAIJ,
1399:        MatCopy_MPIAIJ,
1400:        MatPrintHelp_MPIAIJ,
1401:        MatScale_MPIAIJ,
1402:        0,
1403:        0,
1404:        0,
1405:        MatGetBlockSize_MPIAIJ,
1406:        0,
1407:        0,
1408:        0,
1409:        0,
1410:        MatFDColoringCreate_MPIAIJ,
1411:        0,
1412:        MatSetUnfactored_MPIAIJ,
1413:        0,
1414:        0,
1415:        MatGetSubMatrix_MPIAIJ,
1416:        MatDestroy_MPIAIJ,
1417:        MatView_MPIAIJ,
1418:        MatGetPetscMaps_Petsc,
1419:        0,
1420:        0,
1421:        0,
1422:        0,
1423:        0,
1424:        0,
1425:        0,
1426:        0,
1427:        MatSetColoring_MPIAIJ,
1428:        MatSetValuesAdic_MPIAIJ,
1429:        MatSetValuesAdifor_MPIAIJ
1430: };

1432: /* ----------------------------------------------------------------------------------------*/

1434: EXTERN_C_BEGIN
1435: int MatStoreValues_MPIAIJ(Mat mat)
1436: {
1437:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1438:   int        ierr;

1441:   MatStoreValues(aij->A);
1442:   MatStoreValues(aij->B);
1443:   return(0);
1444: }
1445: EXTERN_C_END

1447: EXTERN_C_BEGIN
1448: int MatRetrieveValues_MPIAIJ(Mat mat)
1449: {
1450:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1451:   int        ierr;

1454:   MatRetrieveValues(aij->A);
1455:   MatRetrieveValues(aij->B);
1456:   return(0);
1457: }
1458: EXTERN_C_END

1460:  #include petscpc.h
1461: EXTERN_C_BEGIN
1462: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1463: EXTERN_C_END

1465: EXTERN_C_BEGIN
1466: int MatCreate_MPIAIJ(Mat B)
1467: {
1468:   Mat_MPIAIJ *b;
1469:   int        ierr,i,size;

1472:   MPI_Comm_size(B->comm,&size);

1474:   ierr            = PetscNew(Mat_MPIAIJ,&b);
1475:   B->data         = (void*)b;
1476:   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1477:   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1478:   B->factor       = 0;
1479:   B->assembled    = PETSC_FALSE;
1480:   B->mapping      = 0;

1482:   B->insertmode      = NOT_SET_VALUES;
1483:   b->size            = size;
1484:   MPI_Comm_rank(B->comm,&b->rank);

1486:   PetscSplitOwnership(B->comm,&B->m,&B->M);
1487:   PetscSplitOwnership(B->comm,&B->n,&B->N);

1489:   /* the information in the maps duplicates the information computed below, eventually 
1490:      we should remove the duplicate information that is not contained in the maps */
1491:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1492:   PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);

1494:   /* build local table of row and column ownerships */
1495:   PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1496:   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1497:   b->cowners = b->rowners + b->size + 2;
1498:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1499:   b->rowners[0] = 0;
1500:   for (i=2; i<=b->size; i++) {
1501:     b->rowners[i] += b->rowners[i-1];
1502:   }
1503:   b->rstart = b->rowners[b->rank];
1504:   b->rend   = b->rowners[b->rank+1];
1505:   MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1506:   b->cowners[0] = 0;
1507:   for (i=2; i<=b->size; i++) {
1508:     b->cowners[i] += b->cowners[i-1];
1509:   }
1510:   b->cstart = b->cowners[b->rank];
1511:   b->cend   = b->cowners[b->rank+1];

1513:   /* build cache for off array entries formed */
1514:   MatStashCreate_Private(B->comm,1,&B->stash);
1515:   b->donotstash  = PETSC_FALSE;
1516:   b->colmap      = 0;
1517:   b->garray      = 0;
1518:   b->roworiented = PETSC_TRUE;

1520:   /* stuff used for matrix vector multiply */
1521:   b->lvec      = PETSC_NULL;
1522:   b->Mvctx     = PETSC_NULL;

1524:   /* stuff for MatGetRow() */
1525:   b->rowindices   = 0;
1526:   b->rowvalues    = 0;
1527:   b->getrowactive = PETSC_FALSE;

1529:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1530:                                      "MatStoreValues_MPIAIJ",
1531:                                      MatStoreValues_MPIAIJ);
1532:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1533:                                      "MatRetrieveValues_MPIAIJ",
1534:                                      MatRetrieveValues_MPIAIJ);
1535:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1536:                                      "MatGetDiagonalBlock_MPIAIJ",
1537:                                      MatGetDiagonalBlock_MPIAIJ);

1539:   return(0);
1540: }
1541: EXTERN_C_END

1543: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1544: {
1545:   Mat        mat;
1546:   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1547:   int        ierr;

1550:   *newmat       = 0;
1551:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1552:   MatSetType(mat,MATMPIAIJ);
1553:   a    = (Mat_MPIAIJ*)mat->data;
1554:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1555:   mat->factor       = matin->factor;
1556:   mat->assembled    = PETSC_TRUE;
1557:   mat->insertmode   = NOT_SET_VALUES;
1558:   mat->preallocated = PETSC_TRUE;

1560:   a->rstart       = oldmat->rstart;
1561:   a->rend         = oldmat->rend;
1562:   a->cstart       = oldmat->cstart;
1563:   a->cend         = oldmat->cend;
1564:   a->size         = oldmat->size;
1565:   a->rank         = oldmat->rank;
1566:   a->donotstash   = oldmat->donotstash;
1567:   a->roworiented  = oldmat->roworiented;
1568:   a->rowindices   = 0;
1569:   a->rowvalues    = 0;
1570:   a->getrowactive = PETSC_FALSE;

1572:   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1573:   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);
1574:   if (oldmat->colmap) {
1575: #if defined (PETSC_USE_CTABLE)
1576:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1577: #else
1578:     PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1579:     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1580:     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1581: #endif
1582:   } else a->colmap = 0;
1583:   if (oldmat->garray) {
1584:     int len;
1585:     len  = oldmat->B->n;
1586:     PetscMalloc((len+1)*sizeof(int),&a->garray);
1587:     PetscLogObjectMemory(mat,len*sizeof(int));
1588:     if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1589:   } else a->garray = 0;
1590: 
1591:    VecDuplicate(oldmat->lvec,&a->lvec);
1592:   PetscLogObjectParent(mat,a->lvec);
1593:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1594:   PetscLogObjectParent(mat,a->Mvctx);
1595:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1596:   PetscLogObjectParent(mat,a->A);
1597:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1598:   PetscLogObjectParent(mat,a->B);
1599:   PetscFListDuplicate(matin->qlist,&mat->qlist);
1600:   *newmat = mat;
1601:   return(0);
1602: }

1604:  #include petscsys.h

1606: EXTERN_C_BEGIN
1607: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1608: {
1609:   Mat          A;
1610:   PetscScalar  *vals,*svals;
1611:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1612:   MPI_Status   status;
1613:   int          i,nz,ierr,j,rstart,rend,fd;
1614:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1615:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1616:   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;

1619:   MPI_Comm_size(comm,&size);
1620:   MPI_Comm_rank(comm,&rank);
1621:   if (!rank) {
1622:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1623:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1624:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1625:     if (header[3] < 0) {
1626:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1627:     }
1628:   }

1630:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1631:   M = header[1]; N = header[2];
1632:   /* determine ownership of all rows */
1633:   m = M/size + ((M % size) > rank);
1634:   PetscMalloc((size+2)*sizeof(int),&rowners);
1635:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1636:   rowners[0] = 0;
1637:   for (i=2; i<=size; i++) {
1638:     rowners[i] += rowners[i-1];
1639:   }
1640:   rstart = rowners[rank];
1641:   rend   = rowners[rank+1];

1643:   /* distribute row lengths to all processors */
1644:   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1645:   offlens = ourlens + (rend-rstart);
1646:   if (!rank) {
1647:     PetscMalloc(M*sizeof(int),&rowlengths);
1648:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1649:     PetscMalloc(size*sizeof(int),&sndcounts);
1650:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1651:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1652:     PetscFree(sndcounts);
1653:   } else {
1654:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1655:   }

1657:   if (!rank) {
1658:     /* calculate the number of nonzeros on each processor */
1659:     PetscMalloc(size*sizeof(int),&procsnz);
1660:     PetscMemzero(procsnz,size*sizeof(int));
1661:     for (i=0; i<size; i++) {
1662:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1663:         procsnz[i] += rowlengths[j];
1664:       }
1665:     }
1666:     PetscFree(rowlengths);

1668:     /* determine max buffer needed and allocate it */
1669:     maxnz = 0;
1670:     for (i=0; i<size; i++) {
1671:       maxnz = PetscMax(maxnz,procsnz[i]);
1672:     }
1673:     PetscMalloc(maxnz*sizeof(int),&cols);

1675:     /* read in my part of the matrix column indices  */
1676:     nz   = procsnz[0];
1677:     PetscMalloc(nz*sizeof(int),&mycols);
1678:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1680:     /* read in every one elses and ship off */
1681:     for (i=1; i<size; i++) {
1682:       nz   = procsnz[i];
1683:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1684:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1685:     }
1686:     PetscFree(cols);
1687:   } else {
1688:     /* determine buffer space needed for message */
1689:     nz = 0;
1690:     for (i=0; i<m; i++) {
1691:       nz += ourlens[i];
1692:     }
1693:     PetscMalloc((nz+1)*sizeof(int),&mycols);

1695:     /* receive message of column indices*/
1696:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1697:     MPI_Get_count(&status,MPI_INT,&maxnz);
1698:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1699:   }

1701:   /* determine column ownership if matrix is not square */
1702:   if (N != M) {
1703:     n      = N/size + ((N % size) > rank);
1704:     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1705:     cstart = cend - n;
1706:   } else {
1707:     cstart = rstart;
1708:     cend   = rend;
1709:     n      = cend - cstart;
1710:   }

1712:   /* loop over local rows, determining number of off diagonal entries */
1713:   PetscMemzero(offlens,m*sizeof(int));
1714:   jj = 0;
1715:   for (i=0; i<m; i++) {
1716:     for (j=0; j<ourlens[i]; j++) {
1717:       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1718:       jj++;
1719:     }
1720:   }

1722:   /* create our matrix */
1723:   for (i=0; i<m; i++) {
1724:     ourlens[i] -= offlens[i];
1725:   }
1726:   MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1727:   A = *newmat;
1728:   MatSetOption(A,MAT_COLUMNS_SORTED);
1729:   for (i=0; i<m; i++) {
1730:     ourlens[i] += offlens[i];
1731:   }

1733:   if (!rank) {
1734:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1736:     /* read in my part of the matrix numerical values  */
1737:     nz   = procsnz[0];
1738:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1739: 
1740:     /* insert into matrix */
1741:     jj      = rstart;
1742:     smycols = mycols;
1743:     svals   = vals;
1744:     for (i=0; i<m; i++) {
1745:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1746:       smycols += ourlens[i];
1747:       svals   += ourlens[i];
1748:       jj++;
1749:     }

1751:     /* read in other processors and ship out */
1752:     for (i=1; i<size; i++) {
1753:       nz   = procsnz[i];
1754:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1755:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1756:     }
1757:     PetscFree(procsnz);
1758:   } else {
1759:     /* receive numeric values */
1760:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

1762:     /* receive message of values*/
1763:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1764:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1765:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1767:     /* insert into matrix */
1768:     jj      = rstart;
1769:     smycols = mycols;
1770:     svals   = vals;
1771:     for (i=0; i<m; i++) {
1772:       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1773:       smycols += ourlens[i];
1774:       svals   += ourlens[i];
1775:       jj++;
1776:     }
1777:   }
1778:   PetscFree(ourlens);
1779:   PetscFree(vals);
1780:   PetscFree(mycols);
1781:   PetscFree(rowners);

1783:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1784:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1785:   return(0);
1786: }
1787: EXTERN_C_END

1789: /*
1790:     Not great since it makes two copies of the submatrix, first an SeqAIJ 
1791:   in local and then by concatenating the local matrices the end result.
1792:   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1793: */
1794: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1795: {
1796:   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1797:   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend;
1798:   Mat          *local,M,Mreuse;
1799:   PetscScalar  *vwork,*aa;
1800:   MPI_Comm     comm = mat->comm;
1801:   Mat_SeqAIJ   *aij;


1805:   MPI_Comm_rank(comm,&rank);
1806:   MPI_Comm_size(comm,&size);

1808:   if (call ==  MAT_REUSE_MATRIX) {
1809:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
1810:     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
1811:     local = &Mreuse;
1812:     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
1813:   } else {
1814:     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
1815:     Mreuse = *local;
1816:     ierr   = PetscFree(local);
1817:   }

1819:   /* 
1820:       m - number of local rows
1821:       n - number of columns (same on all processors)
1822:       rstart - first row in new global matrix generated
1823:   */
1824:   MatGetSize(Mreuse,&m,&n);
1825:   if (call == MAT_INITIAL_MATRIX) {
1826:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
1827:     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1828:     ii  = aij->i;
1829:     jj  = aij->j;

1831:     /*
1832:         Determine the number of non-zeros in the diagonal and off-diagonal 
1833:         portions of the matrix in order to do correct preallocation
1834:     */

1836:     /* first get start and end of "diagonal" columns */
1837:     if (csize == PETSC_DECIDE) {
1838:       nlocal = n/size + ((n % size) > rank);
1839:     } else {
1840:       nlocal = csize;
1841:     }
1842:     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
1843:     rstart = rend - nlocal;
1844:     if (rank == size - 1 && rend != n) {
1845:       SETERRQ(1,"Local column sizes do not add up to total number of columns");
1846:     }

1848:     /* next, compute all the lengths */
1849:     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);
1850:     olens = dlens + m;
1851:     for (i=0; i<m; i++) {
1852:       jend = ii[i+1] - ii[i];
1853:       olen = 0;
1854:       dlen = 0;
1855:       for (j=0; j<jend; j++) {
1856:         if (*jj < rstart || *jj >= rend) olen++;
1857:         else dlen++;
1858:         jj++;
1859:       }
1860:       olens[i] = olen;
1861:       dlens[i] = dlen;
1862:     }
1863:     MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
1864:     PetscFree(dlens);
1865:   } else {
1866:     int ml,nl;

1868:     M = *newmat;
1869:     MatGetLocalSize(M,&ml,&nl);
1870:     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1871:     MatZeroEntries(M);
1872:     /*
1873:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1874:        rather than the slower MatSetValues().
1875:     */
1876:     M->was_assembled = PETSC_TRUE;
1877:     M->assembled     = PETSC_FALSE;
1878:   }
1879:   MatGetOwnershipRange(M,&rstart,&rend);
1880:   aij = (Mat_SeqAIJ*)(Mreuse)->data;
1881:   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
1882:   ii  = aij->i;
1883:   jj  = aij->j;
1884:   aa  = aij->a;
1885:   for (i=0; i<m; i++) {
1886:     row   = rstart + i;
1887:     nz    = ii[i+1] - ii[i];
1888:     cwork = jj;     jj += nz;
1889:     vwork = aa;     aa += nz;
1890:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
1891:   }

1893:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
1894:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
1895:   *newmat = M;

1897:   /* save submatrix used in processor for next request */
1898:   if (call ==  MAT_INITIAL_MATRIX) {
1899:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
1900:     PetscObjectDereference((PetscObject)Mreuse);
1901:   }

1903:   return(0);
1904: }

1906: /*@C
1907:    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
1908:    (the default parallel PETSc format).  For good matrix assembly performance
1909:    the user should preallocate the matrix storage by setting the parameters 
1910:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1911:    performance can be increased by more than a factor of 50.

1913:    Collective on MPI_Comm

1915:    Input Parameters:
1916: +  A - the matrix 
1917: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1918:            (same value is used for all local rows)
1919: .  d_nnz - array containing the number of nonzeros in the various rows of the 
1920:            DIAGONAL portion of the local submatrix (possibly different for each row)
1921:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
1922:            The size of this array is equal to the number of local rows, i.e 'm'. 
1923:            You must leave room for the diagonal entry even if it is zero.
1924: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1925:            submatrix (same value is used for all local rows).
1926: -  o_nnz - array containing the number of nonzeros in the various rows of the
1927:            OFF-DIAGONAL portion of the local submatrix (possibly different for
1928:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
1929:            structure. The size of this array is equal to the number 
1930:            of local rows, i.e 'm'. 

1932:    The AIJ format (also called the Yale sparse matrix format or
1933:    compressed row storage), is fully compatible with standard Fortran 77
1934:    storage.  That is, the stored row and column indices can begin at
1935:    either one (as in Fortran) or zero.  See the users manual for details.

1937:    The user MUST specify either the local or global matrix dimensions
1938:    (possibly both).

1940:    The parallel matrix is partitioned such that the first m0 rows belong to 
1941:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
1942:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

1944:    The DIAGONAL portion of the local submatrix of a processor can be defined 
1945:    as the submatrix which is obtained by extraction the part corresponding 
1946:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
1947:    first row that belongs to the processor, and r2 is the last row belonging 
1948:    to the this processor. This is a square mxm matrix. The remaining portion 
1949:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

1951:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

1953:    By default, this format uses inodes (identical nodes) when possible.
1954:    We search for consecutive rows with the same nonzero structure, thereby
1955:    reusing matrix information to achieve increased efficiency.

1957:    Options Database Keys:
1958: +  -mat_aij_no_inode  - Do not use inodes
1959: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1960: -  -mat_aij_oneindex - Internally use indexing starting at 1
1961:         rather than 0.  Note that when calling MatSetValues(),
1962:         the user still MUST index entries starting at 0!

1964:    Example usage:
1965:   
1966:    Consider the following 8x8 matrix with 34 non-zero values, that is 
1967:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1968:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
1969:    as follows:

1971: .vb
1972:             1  2  0  |  0  3  0  |  0  4
1973:     Proc0   0  5  6  |  7  0  0  |  8  0
1974:             9  0 10  | 11  0  0  | 12  0
1975:     -------------------------------------
1976:            13  0 14  | 15 16 17  |  0  0
1977:     Proc1   0 18  0  | 19 20 21  |  0  0 
1978:             0  0  0  | 22 23  0  | 24  0
1979:     -------------------------------------
1980:     Proc2  25 26 27  |  0  0 28  | 29  0
1981:            30  0  0  | 31 32 33  |  0 34
1982: .ve

1984:    This can be represented as a collection of submatrices as:

1986: .vb
1987:       A B C
1988:       D E F
1989:       G H I
1990: .ve

1992:    Where the submatrices A,B,C are owned by proc0, D,E,F are
1993:    owned by proc1, G,H,I are owned by proc2.

1995:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1996:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1997:    The 'M','N' parameters are 8,8, and have the same values on all procs.

1999:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2000:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2001:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2002:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2003:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2004:    matrix, ans [DF] as another SeqAIJ matrix.

2006:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2007:    allocated for every row of the local diagonal submatrix, and o_nz
2008:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2009:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2010:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2011:    In this case, the values of d_nz,o_nz are:
2012: .vb
2013:      proc0 : dnz = 2, o_nz = 2
2014:      proc1 : dnz = 3, o_nz = 2
2015:      proc2 : dnz = 1, o_nz = 4
2016: .ve
2017:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2018:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2019:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2020:    34 values.

2022:    When d_nnz, o_nnz parameters are specified, the storage is specified
2023:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2024:    In the above case the values for d_nnz,o_nnz are:
2025: .vb
2026:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2027:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2028:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2029: .ve
2030:    Here the space allocated is sum of all the above values i.e 34, and
2031:    hence pre-allocation is perfect.

2033:    Level: intermediate

2035: .keywords: matrix, aij, compressed row, sparse, parallel

2037: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2038: @*/
2039: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2040: {
2041:   Mat_MPIAIJ   *b;
2042:   int          ierr,i;
2043:   PetscTruth   flg2;

2046:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2047:   if (!flg2) return(0);
2048:   B->preallocated = PETSC_TRUE;
2049:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2050:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2051:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2052:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2053:   if (d_nnz) {
2054:     for (i=0; i<B->m; i++) {
2055:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2056:     }
2057:   }
2058:   if (o_nnz) {
2059:     for (i=0; i<B->m; i++) {
2060:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2061:     }
2062:   }
2063:   b = (Mat_MPIAIJ*)B->data;

2065:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2066:   PetscLogObjectParent(B,b->A);
2067:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2068:   PetscLogObjectParent(B,b->B);

2070:   return(0);
2071: }

2073: /*@C
2074:    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2075:    (the default parallel PETSc format).  For good matrix assembly performance
2076:    the user should preallocate the matrix storage by setting the parameters 
2077:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2078:    performance can be increased by more than a factor of 50.

2080:    Collective on MPI_Comm

2082:    Input Parameters:
2083: +  comm - MPI communicator
2084: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2085:            This value should be the same as the local size used in creating the 
2086:            y vector for the matrix-vector product y = Ax.
2087: .  n - This value should be the same as the local size used in creating the 
2088:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2089:        calculated if N is given) For square matrices n is almost always m.
2090: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2091: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2092: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2093:            (same value is used for all local rows)
2094: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2095:            DIAGONAL portion of the local submatrix (possibly different for each row)
2096:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2097:            The size of this array is equal to the number of local rows, i.e 'm'. 
2098:            You must leave room for the diagonal entry even if it is zero.
2099: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2100:            submatrix (same value is used for all local rows).
2101: -  o_nnz - array containing the number of nonzeros in the various rows of the
2102:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2103:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2104:            structure. The size of this array is equal to the number 
2105:            of local rows, i.e 'm'. 

2107:    Output Parameter:
2108: .  A - the matrix 

2110:    Notes:
2111:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2112:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2113:    storage requirements for this matrix.

2115:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one 
2116:    processor than it must be used on all processors that share the object for 
2117:    that argument.

2119:    The AIJ format (also called the Yale sparse matrix format or
2120:    compressed row storage), is fully compatible with standard Fortran 77
2121:    storage.  That is, the stored row and column indices can begin at
2122:    either one (as in Fortran) or zero.  See the users manual for details.

2124:    The user MUST specify either the local or global matrix dimensions
2125:    (possibly both).

2127:    The parallel matrix is partitioned such that the first m0 rows belong to 
2128:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2129:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2131:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2132:    as the submatrix which is obtained by extraction the part corresponding 
2133:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2134:    first row that belongs to the processor, and r2 is the last row belonging 
2135:    to the this processor. This is a square mxm matrix. The remaining portion 
2136:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2138:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2140:    By default, this format uses inodes (identical nodes) when possible.
2141:    We search for consecutive rows with the same nonzero structure, thereby
2142:    reusing matrix information to achieve increased efficiency.

2144:    Options Database Keys:
2145: +  -mat_aij_no_inode  - Do not use inodes
2146: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2147: -  -mat_aij_oneindex - Internally use indexing starting at 1
2148:         rather than 0.  Note that when calling MatSetValues(),
2149:         the user still MUST index entries starting at 0!


2152:    Example usage:
2153:   
2154:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2155:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2156:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2157:    as follows:

2159: .vb
2160:             1  2  0  |  0  3  0  |  0  4
2161:     Proc0   0  5  6  |  7  0  0  |  8  0
2162:             9  0 10  | 11  0  0  | 12  0
2163:     -------------------------------------
2164:            13  0 14  | 15 16 17  |  0  0
2165:     Proc1   0 18  0  | 19 20 21  |  0  0 
2166:             0  0  0  | 22 23  0  | 24  0
2167:     -------------------------------------
2168:     Proc2  25 26 27  |  0  0 28  | 29  0
2169:            30  0  0  | 31 32 33  |  0 34
2170: .ve

2172:    This can be represented as a collection of submatrices as:

2174: .vb
2175:       A B C
2176:       D E F
2177:       G H I
2178: .ve

2180:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2181:    owned by proc1, G,H,I are owned by proc2.

2183:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2184:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2185:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2187:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2188:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2189:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2190:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2191:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2192:    matrix, ans [DF] as another SeqAIJ matrix.

2194:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2195:    allocated for every row of the local diagonal submatrix, and o_nz
2196:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2197:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2198:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2199:    In this case, the values of d_nz,o_nz are:
2200: .vb
2201:      proc0 : dnz = 2, o_nz = 2
2202:      proc1 : dnz = 3, o_nz = 2
2203:      proc2 : dnz = 1, o_nz = 4
2204: .ve
2205:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2206:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2207:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2208:    34 values.

2210:    When d_nnz, o_nnz parameters are specified, the storage is specified
2211:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2212:    In the above case the values for d_nnz,o_nnz are:
2213: .vb
2214:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2215:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2216:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2217: .ve
2218:    Here the space allocated is sum of all the above values i.e 34, and
2219:    hence pre-allocation is perfect.

2221:    Level: intermediate

2223: .keywords: matrix, aij, compressed row, sparse, parallel

2225: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2226: @*/
2227: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2228: {
2229:   int ierr,size;

2232:   MatCreate(comm,m,n,M,N,A);
2233:   MPI_Comm_size(comm,&size);
2234:   if (size > 1) {
2235:     MatSetType(*A,MATMPIAIJ);
2236:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2237:   } else {
2238:     MatSetType(*A,MATSEQAIJ);
2239:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2240:   }
2241:   return(0);
2242: }

2244: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2245: {
2246:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2248:   *Ad     = a->A;
2249:   *Ao     = a->B;
2250:   *colmap = a->garray;
2251:   return(0);
2252: }

2254: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2255: {
2256:   int        ierr;
2257:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

2260:   if (coloring->ctype == IS_COLORING_LOCAL) {
2261:     int        *allcolors,*colors,i;
2262:     ISColoring ocoloring;

2264:     /* set coloring for diagonal portion */
2265:     MatSetColoring_SeqAIJ(a->A,coloring);

2267:     /* set coloring for off-diagonal portion */
2268:     ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2269:     PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2270:     for (i=0; i<a->B->n; i++) {
2271:       colors[i] = allcolors[a->garray[i]];
2272:     }
2273:     PetscFree(allcolors);
2274:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2275:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2276:     ISColoringDestroy(ocoloring);
2277:   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2278:     int        *colors,i,*larray;
2279:     ISColoring ocoloring;

2281:     /* set coloring for diagonal portion */
2282:     PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2283:     for (i=0; i<a->A->n; i++) {
2284:       larray[i] = i + a->cstart;
2285:     }
2286:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2287:     PetscMalloc((a->A->n+1)*sizeof(int),&colors);
2288:     for (i=0; i<a->A->n; i++) {
2289:       colors[i] = coloring->colors[larray[i]];
2290:     }
2291:     PetscFree(larray);
2292:     ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2293:     MatSetColoring_SeqAIJ(a->A,ocoloring);
2294:     ISColoringDestroy(ocoloring);

2296:     /* set coloring for off-diagonal portion */
2297:     PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2298:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2299:     PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2300:     for (i=0; i<a->B->n; i++) {
2301:       colors[i] = coloring->colors[larray[i]];
2302:     }
2303:     PetscFree(larray);
2304:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2305:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2306:     ISColoringDestroy(ocoloring);
2307:   } else {
2308:     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2309:   }

2311:   return(0);
2312: }

2314: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2315: {
2316:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2317:   int        ierr;

2320:   MatSetValuesAdic_SeqAIJ(a->A,advalues);
2321:   MatSetValuesAdic_SeqAIJ(a->B,advalues);
2322:   return(0);
2323: }

2325: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2326: {
2327:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2328:   int        ierr;

2331:   MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2332:   MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2333:   return(0);
2334: }