Actual source code: mpidense.c

  1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */
  6: 
 7:  #include src/mat/impls/dense/mpi/mpidense.h
 8:  #include src/vec/vecimpl.h

 10: EXTERN_C_BEGIN
 11: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 12: {
 13:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
 14:   int          m = A->m,rstart = mdn->rstart,ierr;
 15:   PetscScalar  *array;
 16:   MPI_Comm     comm;

 19:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

 21:   /* The reuse aspect is not implemented efficiently */
 22:   if (reuse) { MatDestroy(*B);}

 24:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 25:   MatGetArray(mdn->A,&array);
 26:   MatCreateSeqDense(comm,m,m,array+m*rstart,B);
 27:   MatRestoreArray(mdn->A,&array);
 28:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
 29:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
 30: 
 31:   *iscopy = PETSC_TRUE;
 32:   return(0);
 33: }
 34: EXTERN_C_END

 36: EXTERN int MatSetUpMultiply_MPIDense(Mat);

 38: int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
 39: {
 40:   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
 41:   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
 42:   PetscTruth   roworiented = A->roworiented;

 45:   for (i=0; i<m; i++) {
 46:     if (idxm[i] < 0) continue;
 47:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 48:     if (idxm[i] >= rstart && idxm[i] < rend) {
 49:       row = idxm[i] - rstart;
 50:       if (roworiented) {
 51:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
 52:       } else {
 53:         for (j=0; j<n; j++) {
 54:           if (idxn[j] < 0) continue;
 55:           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 56:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 57:         }
 58:       }
 59:     } else {
 60:       if (!A->donotstash) {
 61:         if (roworiented) {
 62:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 63:         } else {
 64:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 65:         }
 66:       }
 67:     }
 68:   }
 69:   return(0);
 70: }

 72: int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
 73: {
 74:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
 75:   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;

 78:   for (i=0; i<m; i++) {
 79:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 80:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 81:     if (idxm[i] >= rstart && idxm[i] < rend) {
 82:       row = idxm[i] - rstart;
 83:       for (j=0; j<n; j++) {
 84:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 85:         if (idxn[j] >= mat->N) {
 86:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 87:         }
 88:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
 89:       }
 90:     } else {
 91:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 92:     }
 93:   }
 94:   return(0);
 95: }

 97: int MatGetArray_MPIDense(Mat A,PetscScalar **array)
 98: {
 99:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
100:   int          ierr;

103:   MatGetArray(a->A,array);
104:   return(0);
105: }

107: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
108: {
109:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
110:   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
111:   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
112:   PetscScalar  *av,*bv,*v = lmat->v;
113:   Mat          newmat;

116:   ISGetIndices(isrow,&irow);
117:   ISGetIndices(iscol,&icol);
118:   ISGetLocalSize(isrow,&nrows);
119:   ISGetLocalSize(iscol,&ncols);

121:   /* No parallel redistribution currently supported! Should really check each index set
122:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
123:      original matrix! */

125:   MatGetLocalSize(A,&nlrows,&nlcols);
126:   MatGetOwnershipRange(A,&rstart,&rend);
127: 
128:   /* Check submatrix call */
129:   if (scall == MAT_REUSE_MATRIX) {
130:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
131:     /* Really need to test rows and column sizes! */
132:     newmat = *B;
133:   } else {
134:     /* Create and fill new matrix */
135:     MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
136:   }

138:   /* Now extract the data pointers and do the copy, column at a time */
139:   newmatd = (Mat_MPIDense*)newmat->data;
140:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
141: 
142:   for (i=0; i<ncols; i++) {
143:     av = v + nlrows*icol[i];
144:     for (j=0; j<nrows; j++) {
145:       *bv++ = av[irow[j] - rstart];
146:     }
147:   }

149:   /* Assemble the matrices so that the correct flags are set */
150:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
151:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

153:   /* Free work space */
154:   ISRestoreIndices(isrow,&irow);
155:   ISRestoreIndices(iscol,&icol);
156:   *B = newmat;
157:   return(0);
158: }

160: int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
161: {
163:   return(0);
164: }

166: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
167: {
168:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
169:   MPI_Comm     comm = mat->comm;
170:   int          ierr,nstash,reallocs;
171:   InsertMode   addv;

174:   /* make sure all processors are either in INSERTMODE or ADDMODE */
175:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
176:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
177:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
178:   }
179:   mat->insertmode = addv; /* in case this processor had no cache */

181:   MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
182:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
183:   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
184:   return(0);
185: }

187: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
188: {
189:   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
190:   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
191:   PetscScalar  *val;
192:   InsertMode   addv=mat->insertmode;

195:   /*  wait on receives */
196:   while (1) {
197:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
198:     if (!flg) break;
199: 
200:     for (i=0; i<n;) {
201:       /* Now identify the consecutive vals belonging to the same row */
202:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
203:       if (j < n) ncols = j-i;
204:       else       ncols = n-i;
205:       /* Now assemble all these values with a single function call */
206:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
207:       i = j;
208:     }
209:   }
210:   MatStashScatterEnd_Private(&mat->stash);
211: 
212:   MatAssemblyBegin(mdn->A,mode);
213:   MatAssemblyEnd(mdn->A,mode);

215:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
216:     MatSetUpMultiply_MPIDense(mat);
217:   }
218:   return(0);
219: }

221: int MatZeroEntries_MPIDense(Mat A)
222: {
223:   int          ierr;
224:   Mat_MPIDense *l = (Mat_MPIDense*)A->data;

227:   MatZeroEntries(l->A);
228:   return(0);
229: }

231: int MatGetBlockSize_MPIDense(Mat A,int *bs)
232: {
234:   *bs = 1;
235:   return(0);
236: }

238: /* the code does not do the diagonal entries correctly unless the 
239:    matrix is square and the column and row owerships are identical.
240:    This is a BUG. The only way to fix it seems to be to access 
241:    mdn->A and mdn->B directly and not through the MatZeroRows() 
242:    routine. 
243: */
244: int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
245: {
246:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
247:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
248:   int            *procs,*nprocs,j,idx,nsends,*work;
249:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
250:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
251:   int            *lens,imdex,*lrows,*values;
252:   MPI_Comm       comm = A->comm;
253:   MPI_Request    *send_waits,*recv_waits;
254:   MPI_Status     recv_status,*send_status;
255:   IS             istmp;
256:   PetscTruth     found;

259:   ISGetLocalSize(is,&N);
260:   ISGetIndices(is,&rows);

262:   /*  first count number of contributors to each processor */
263:   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);
264:   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));
265:   procs = nprocs + size;
266:   ierr  = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
267:   for (i=0; i<N; i++) {
268:     idx = rows[i];
269:     found = PETSC_FALSE;
270:     for (j=0; j<size; j++) {
271:       if (idx >= owners[j] && idx < owners[j+1]) {
272:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
273:       }
274:     }
275:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
276:   }
277:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}

279:   /* inform other processors of number of messages and max length*/
280:   ierr   = PetscMalloc(2*size*sizeof(int),&work);
281:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
282:   nmax   = work[rank];
283:   nrecvs = work[size+rank];
284:   ierr   = PetscFree(work);

286:   /* post receives:   */
287:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
288:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
289:   for (i=0; i<nrecvs; i++) {
290:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
291:   }

293:   /* do sends:
294:       1) starts[i] gives the starting index in svalues for stuff going to 
295:          the ith processor
296:   */
297:   PetscMalloc((N+1)*sizeof(int),&svalues);
298:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
299:   PetscMalloc((size+1)*sizeof(int),&starts);
300:   starts[0]  = 0;
301:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
302:   for (i=0; i<N; i++) {
303:     svalues[starts[owner[i]]++] = rows[i];
304:   }
305:   ISRestoreIndices(is,&rows);

307:   starts[0] = 0;
308:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
309:   count = 0;
310:   for (i=0; i<size; i++) {
311:     if (procs[i]) {
312:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
313:     }
314:   }
315:   PetscFree(starts);

317:   base = owners[rank];

319:   /*  wait on receives */
320:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
321:   source = lens + nrecvs;
322:   count  = nrecvs; slen = 0;
323:   while (count) {
324:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
325:     /* unpack receives into our local space */
326:     MPI_Get_count(&recv_status,MPI_INT,&n);
327:     source[imdex]  = recv_status.MPI_SOURCE;
328:     lens[imdex]  = n;
329:     slen += n;
330:     count--;
331:   }
332:   PetscFree(recv_waits);
333: 
334:   /* move the data into the send scatter */
335:   PetscMalloc((slen+1)*sizeof(int),&lrows);
336:   count = 0;
337:   for (i=0; i<nrecvs; i++) {
338:     values = rvalues + i*nmax;
339:     for (j=0; j<lens[i]; j++) {
340:       lrows[count++] = values[j] - base;
341:     }
342:   }
343:   PetscFree(rvalues);
344:   PetscFree(lens);
345:   PetscFree(owner);
346:   PetscFree(nprocs);
347: 
348:   /* actually zap the local rows */
349:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
350:   PetscLogObjectParent(A,istmp);
351:   PetscFree(lrows);
352:   MatZeroRows(l->A,istmp,diag);
353:   ISDestroy(istmp);

355:   /* wait on sends */
356:   if (nsends) {
357:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
358:     MPI_Waitall(nsends,send_waits,send_status);
359:     PetscFree(send_status);
360:   }
361:   PetscFree(send_waits);
362:   PetscFree(svalues);

364:   return(0);
365: }

367: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
368: {
369:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
370:   int          ierr;

373:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
374:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
375:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
376:   return(0);
377: }

379: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
380: {
381:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
382:   int          ierr;

385:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
386:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
387:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
388:   return(0);
389: }

391: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
392: {
393:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
394:   int          ierr;
395:   PetscScalar  zero = 0.0;

398:   VecSet(&zero,yy);
399:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
400:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
401:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
402:   return(0);
403: }

405: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
406: {
407:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
408:   int          ierr;

411:   VecCopy(yy,zz);
412:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
413:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
414:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
415:   return(0);
416: }

418: int MatGetDiagonal_MPIDense(Mat A,Vec v)
419: {
420:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
421:   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
422:   int          ierr,len,i,n,m = A->m,radd;
423:   PetscScalar  *x,zero = 0.0;
424: 
426:   VecSet(&zero,v);
427:   VecGetArray(v,&x);
428:   VecGetSize(v,&n);
429:   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
430:   len  = PetscMin(a->A->m,a->A->n);
431:   radd = a->rstart*m;
432:   for (i=0; i<len; i++) {
433:     x[i] = aloc->v[radd + i*m + i];
434:   }
435:   VecRestoreArray(v,&x);
436:   return(0);
437: }

439: int MatDestroy_MPIDense(Mat mat)
440: {
441:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
442:   int          ierr;


446: #if defined(PETSC_USE_LOG)
447:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
448: #endif
449:   MatStashDestroy_Private(&mat->stash);
450:   PetscFree(mdn->rowners);
451:   MatDestroy(mdn->A);
452:   if (mdn->lvec)   VecDestroy(mdn->lvec);
453:   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
454:   if (mdn->factor) {
455:     if (mdn->factor->temp)   {PetscFree(mdn->factor->temp);}
456:     if (mdn->factor->tag)    {PetscFree(mdn->factor->tag);}
457:     if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
458:     PetscFree(mdn->factor);
459:   }
460:   PetscFree(mdn);
461:   return(0);
462: }

464: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
465: {
466:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
467:   int          ierr;

470:   if (mdn->size == 1) {
471:     MatView(mdn->A,viewer);
472:   }
473:   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
474:   return(0);
475: }

477: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
478: {
479:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
480:   int               ierr,size = mdn->size,rank = mdn->rank;
481:   PetscViewerType   vtype;
482:   PetscTruth        isascii,isdraw;
483:   PetscViewer       sviewer;
484:   PetscViewerFormat format;

487:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
488:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
489:   if (isascii) {
490:     PetscViewerGetType(viewer,&vtype);
491:     PetscViewerGetFormat(viewer,&format);
492:     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
493:       MatInfo info;
494:       MatGetInfo(mat,MAT_LOCAL,&info);
495:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
496:                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
497:       PetscViewerFlush(viewer);
498:       VecScatterView(mdn->Mvctx,viewer);
499:       return(0);
500:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
501:       return(0);
502:     }
503:   } else if (isdraw) {
504:     PetscDraw       draw;
505:     PetscTruth isnull;

507:     PetscViewerDrawGetDraw(viewer,0,&draw);
508:     PetscDrawIsNull(draw,&isnull);
509:     if (isnull) return(0);
510:   }

512:   if (size == 1) {
513:     MatView(mdn->A,viewer);
514:   } else {
515:     /* assemble the entire matrix onto first processor. */
516:     Mat          A;
517:     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
518:     PetscScalar  *vals;

520:     if (!rank) {
521:       MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
522:     } else {
523:       MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
524:     }
525:     PetscLogObjectParent(mat,A);

527:     /* Copy the matrix ... This isn't the most efficient means,
528:        but it's quick for now */
529:     row = mdn->rstart; m = mdn->A->m;
530:     for (i=0; i<m; i++) {
531:       MatGetRow(mat,row,&nz,&cols,&vals);
532:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
533:       MatRestoreRow(mat,row,&nz,&cols,&vals);
534:       row++;
535:     }

537:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
538:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
539:     PetscViewerGetSingleton(viewer,&sviewer);
540:     if (!rank) {
541:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
542:     }
543:     PetscViewerRestoreSingleton(viewer,&sviewer);
544:     PetscViewerFlush(viewer);
545:     MatDestroy(A);
546:   }
547:   return(0);
548: }

550: int MatView_MPIDense(Mat mat,PetscViewer viewer)
551: {
552:   int        ierr;
553:   PetscTruth isascii,isbinary,isdraw,issocket;
554: 
556: 
557:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
558:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
559:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
560:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

562:   if (isascii || issocket || isdraw) {
563:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
564:   } else if (isbinary) {
565:     MatView_MPIDense_Binary(mat,viewer);
566:   } else {
567:     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
568:   }
569:   return(0);
570: }

572: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
573: {
574:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
575:   Mat          mdn = mat->A;
576:   int          ierr;
577:   PetscReal    isend[5],irecv[5];

580:   info->rows_global    = (double)A->M;
581:   info->columns_global = (double)A->N;
582:   info->rows_local     = (double)A->m;
583:   info->columns_local  = (double)A->N;
584:   info->block_size     = 1.0;
585:   MatGetInfo(mdn,MAT_LOCAL,info);
586:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
587:   isend[3] = info->memory;  isend[4] = info->mallocs;
588:   if (flag == MAT_LOCAL) {
589:     info->nz_used      = isend[0];
590:     info->nz_allocated = isend[1];
591:     info->nz_unneeded  = isend[2];
592:     info->memory       = isend[3];
593:     info->mallocs      = isend[4];
594:   } else if (flag == MAT_GLOBAL_MAX) {
595:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
596:     info->nz_used      = irecv[0];
597:     info->nz_allocated = irecv[1];
598:     info->nz_unneeded  = irecv[2];
599:     info->memory       = irecv[3];
600:     info->mallocs      = irecv[4];
601:   } else if (flag == MAT_GLOBAL_SUM) {
602:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
603:     info->nz_used      = irecv[0];
604:     info->nz_allocated = irecv[1];
605:     info->nz_unneeded  = irecv[2];
606:     info->memory       = irecv[3];
607:     info->mallocs      = irecv[4];
608:   }
609:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
610:   info->fill_ratio_needed = 0;
611:   info->factor_mallocs    = 0;
612:   return(0);
613: }

615: int MatSetOption_MPIDense(Mat A,MatOption op)
616: {
617:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
618:   int          ierr;

621:   switch (op) {
622:   case MAT_NO_NEW_NONZERO_LOCATIONS:
623:   case MAT_YES_NEW_NONZERO_LOCATIONS:
624:   case MAT_NEW_NONZERO_LOCATION_ERR:
625:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
626:   case MAT_COLUMNS_SORTED:
627:   case MAT_COLUMNS_UNSORTED:
628:     MatSetOption(a->A,op);
629:     break;
630:   case MAT_ROW_ORIENTED:
631:     a->roworiented = PETSC_TRUE;
632:     MatSetOption(a->A,op);
633:     break;
634:   case MAT_ROWS_SORTED:
635:   case MAT_ROWS_UNSORTED:
636:   case MAT_YES_NEW_DIAGONALS:
637:   case MAT_USE_HASH_TABLE:
638:   case MAT_USE_SINGLE_PRECISION_SOLVES:
639:     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignoredn");
640:     break;
641:   case MAT_COLUMN_ORIENTED:
642:     a->roworiented = PETSC_FALSE;
643:     MatSetOption(a->A,op);
644:     break;
645:   case MAT_IGNORE_OFF_PROC_ENTRIES:
646:     a->donotstash = PETSC_TRUE;
647:     break;
648:   case MAT_NO_NEW_DIAGONALS:
649:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
650:   default:
651:     SETERRQ(PETSC_ERR_SUP,"unknown option");
652:   }
653:   return(0);
654: }

656: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
657: {
658:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
659:   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;

662:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
663:   lrow = row - rstart;
664:   MatGetRow(mat->A,lrow,nz,idx,v);
665:   return(0);
666: }

668: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
669: {

673:   if (idx) {PetscFree(*idx);}
674:   if (v) {PetscFree(*v);}
675:   return(0);
676: }

678: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
679: {
680:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
681:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
682:   PetscScalar  *l,*r,x,*v;
683:   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;

686:   MatGetLocalSize(A,&s2,&s3);
687:   if (ll) {
688:     VecGetLocalSize(ll,&s2a);
689:     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
690:     VecGetArray(ll,&l);
691:     for (i=0; i<m; i++) {
692:       x = l[i];
693:       v = mat->v + i;
694:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
695:     }
696:     VecRestoreArray(ll,&l);
697:     PetscLogFlops(n*m);
698:   }
699:   if (rr) {
700:     VecGetSize(rr,&s3a);
701:     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
702:     VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
703:     VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
704:     VecGetArray(mdn->lvec,&r);
705:     for (i=0; i<n; i++) {
706:       x = r[i];
707:       v = mat->v + i*m;
708:       for (j=0; j<m; j++) { (*v++) *= x;}
709:     }
710:     VecRestoreArray(mdn->lvec,&r);
711:     PetscLogFlops(n*m);
712:   }
713:   return(0);
714: }

716: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
717: {
718:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
719:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
720:   int          ierr,i,j;
721:   PetscReal    sum = 0.0;
722:   PetscScalar  *v = mat->v;

725:   if (mdn->size == 1) {
726:      MatNorm(mdn->A,type,nrm);
727:   } else {
728:     if (type == NORM_FROBENIUS) {
729:       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
730: #if defined(PETSC_USE_COMPLEX)
731:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
732: #else
733:         sum += (*v)*(*v); v++;
734: #endif
735:       }
736:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
737:       *nrm = sqrt(*nrm);
738:       PetscLogFlops(2*mdn->A->n*mdn->A->m);
739:     } else if (type == NORM_1) {
740:       PetscReal *tmp,*tmp2;
741:       PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
742:       tmp2 = tmp + A->N;
743:       PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
744:       *nrm = 0.0;
745:       v = mat->v;
746:       for (j=0; j<mdn->A->n; j++) {
747:         for (i=0; i<mdn->A->m; i++) {
748:           tmp[j] += PetscAbsScalar(*v);  v++;
749:         }
750:       }
751:       MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
752:       for (j=0; j<A->N; j++) {
753:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
754:       }
755:       PetscFree(tmp);
756:       PetscLogFlops(A->n*A->m);
757:     } else if (type == NORM_INFINITY) { /* max row norm */
758:       PetscReal ntemp;
759:       MatNorm(mdn->A,type,&ntemp);
760:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
761:     } else {
762:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
763:     }
764:   }
765:   return(0);
766: }

768: int MatTranspose_MPIDense(Mat A,Mat *matout)
769: {
770:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
771:   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
772:   Mat          B;
773:   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
774:   int          j,i,ierr;
775:   PetscScalar  *v;

778:   if (!matout && M != N) {
779:     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
780:   }
781:   MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);

783:   m = a->A->m; n = a->A->n; v = Aloc->v;
784:   PetscMalloc(n*sizeof(int),&rwork);
785:   for (j=0; j<n; j++) {
786:     for (i=0; i<m; i++) rwork[i] = rstart + i;
787:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
788:     v   += m;
789:   }
790:   PetscFree(rwork);
791:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
792:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
793:   if (matout) {
794:     *matout = B;
795:   } else {
796:     MatHeaderCopy(A,B);
797:   }
798:   return(0);
799: }

801:  #include petscblaslapack.h
802: int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
803: {
804:   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
805:   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
806:   int          one = 1,nz;

809:   nz = inA->m*inA->N;
810:   BLscal_(&nz,alpha,a->v,&one);
811:   PetscLogFlops(nz);
812:   return(0);
813: }

815: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
816: EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);

818: int MatSetUpPreallocation_MPIDense(Mat A)
819: {
820:   int        ierr;

823:    MatMPIDenseSetPreallocation(A,0);
824:   return(0);
825: }

827: /* -------------------------------------------------------------------*/
828: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
829:        MatGetRow_MPIDense,
830:        MatRestoreRow_MPIDense,
831:        MatMult_MPIDense,
832:        MatMultAdd_MPIDense,
833:        MatMultTranspose_MPIDense,
834:        MatMultTransposeAdd_MPIDense,
835:        0,
836:        0,
837:        0,
838:        0,
839:        0,
840:        0,
841:        0,
842:        MatTranspose_MPIDense,
843:        MatGetInfo_MPIDense,0,
844:        MatGetDiagonal_MPIDense,
845:        MatDiagonalScale_MPIDense,
846:        MatNorm_MPIDense,
847:        MatAssemblyBegin_MPIDense,
848:        MatAssemblyEnd_MPIDense,
849:        0,
850:        MatSetOption_MPIDense,
851:        MatZeroEntries_MPIDense,
852:        MatZeroRows_MPIDense,
853:        0,
854:        0,
855:        0,
856:        0,
857:        MatSetUpPreallocation_MPIDense,
858:        0,
859:        0,
860:        MatGetArray_MPIDense,
861:        MatRestoreArray_MPIDense,
862:        MatDuplicate_MPIDense,
863:        0,
864:        0,
865:        0,
866:        0,
867:        0,
868:        MatGetSubMatrices_MPIDense,
869:        0,
870:        MatGetValues_MPIDense,
871:        0,
872:        0,
873:        MatScale_MPIDense,
874:        0,
875:        0,
876:        0,
877:        MatGetBlockSize_MPIDense,
878:        0,
879:        0,
880:        0,
881:        0,
882:        0,
883:        0,
884:        0,
885:        0,
886:        0,
887:        MatGetSubMatrix_MPIDense,
888:        MatDestroy_MPIDense,
889:        MatView_MPIDense,
890:        MatGetPetscMaps_Petsc};

892: EXTERN_C_BEGIN
893: int MatCreate_MPIDense(Mat mat)
894: {
895:   Mat_MPIDense *a;
896:   int          ierr,i;

899:   ierr              = PetscNew(Mat_MPIDense,&a);
900:   mat->data         = (void*)a;
901:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
902:   mat->factor       = 0;
903:   mat->mapping      = 0;

905:   a->factor       = 0;
906:   mat->insertmode = NOT_SET_VALUES;
907:   MPI_Comm_rank(mat->comm,&a->rank);
908:   MPI_Comm_size(mat->comm,&a->size);

910:   PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
911:   PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
912:   a->nvec = mat->n;

914:   /* the information in the maps duplicates the information computed below, eventually 
915:      we should remove the duplicate information that is not contained in the maps */
916:   PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
917:   PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);

919:   /* build local table of row and column ownerships */
920:   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
921:   a->cowners = a->rowners + a->size + 1;
922:   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
923:   MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
924:   a->rowners[0] = 0;
925:   for (i=2; i<=a->size; i++) {
926:     a->rowners[i] += a->rowners[i-1];
927:   }
928:   a->rstart = a->rowners[a->rank];
929:   a->rend   = a->rowners[a->rank+1];
930:   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
931:   a->cowners[0] = 0;
932:   for (i=2; i<=a->size; i++) {
933:     a->cowners[i] += a->cowners[i-1];
934:   }

936:   /* build cache for off array entries formed */
937:   a->donotstash = PETSC_FALSE;
938:   MatStashCreate_Private(mat->comm,1,&mat->stash);

940:   /* stuff used for matrix vector multiply */
941:   a->lvec        = 0;
942:   a->Mvctx       = 0;
943:   a->roworiented = PETSC_TRUE;

945:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
946:                                      "MatGetDiagonalBlock_MPIDense",
947:                                      MatGetDiagonalBlock_MPIDense);
948:   return(0);
949: }
950: EXTERN_C_END

952: /*@C
953:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

955:    Not collective

957:    Input Parameters:
958: .  A - the matrix
959: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
960:    to control all matrix memory allocation.

962:    Notes:
963:    The dense format is fully compatible with standard Fortran 77
964:    storage by columns.

966:    The data input variable is intended primarily for Fortran programmers
967:    who wish to allocate their own matrix memory space.  Most users should
968:    set data=PETSC_NULL.

970:    Level: intermediate

972: .keywords: matrix,dense, parallel

974: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
975: @*/
976: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
977: {
978:   Mat_MPIDense *a;
979:   int          ierr;
980:   PetscTruth   flg2;

983:   PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);
984:   if (!flg2) return(0);
985:   mat->preallocated = PETSC_TRUE;
986:   /* Note:  For now, when data is specified above, this assumes the user correctly
987:    allocates the local dense storage space.  We should add error checking. */

989:   a    = (Mat_MPIDense*)mat->data;
990:   MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
991:   PetscLogObjectParent(mat,a->A);
992:   return(0);
993: }

995: /*@C
996:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

998:    Collective on MPI_Comm

1000:    Input Parameters:
1001: +  comm - MPI communicator
1002: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1003: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1004: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1005: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1006: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1007:    to control all matrix memory allocation.

1009:    Output Parameter:
1010: .  A - the matrix

1012:    Notes:
1013:    The dense format is fully compatible with standard Fortran 77
1014:    storage by columns.

1016:    The data input variable is intended primarily for Fortran programmers
1017:    who wish to allocate their own matrix memory space.  Most users should
1018:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1020:    The user MUST specify either the local or global matrix dimensions
1021:    (possibly both).

1023:    Level: intermediate

1025: .keywords: matrix,dense, parallel

1027: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1028: @*/
1029: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1030: {
1031:   int ierr,size;

1034:   MatCreate(comm,m,n,M,N,A);
1035:   MPI_Comm_size(comm,&size);
1036:   if (size > 1) {
1037:     MatSetType(*A,MATMPIDENSE);
1038:     MatMPIDenseSetPreallocation(*A,data);
1039:   } else {
1040:     MatSetType(*A,MATSEQDENSE);
1041:     MatSeqDenseSetPreallocation(*A,data);
1042:   }
1043:   return(0);
1044: }

1046: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1047: {
1048:   Mat          mat;
1049:   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1050:   int          ierr;

1053:   *newmat       = 0;
1054:   MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1055:   MatSetType(mat,MATMPIDENSE);
1056:   ierr              = PetscNew(Mat_MPIDense,&a);
1057:   mat->data         = (void*)a;
1058:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1059:   mat->factor       = A->factor;
1060:   mat->assembled    = PETSC_TRUE;
1061:   mat->preallocated = PETSC_TRUE;

1063:   a->rstart       = oldmat->rstart;
1064:   a->rend         = oldmat->rend;
1065:   a->size         = oldmat->size;
1066:   a->rank         = oldmat->rank;
1067:   mat->insertmode = NOT_SET_VALUES;
1068:   a->nvec         = oldmat->nvec;
1069:   a->donotstash   = oldmat->donotstash;
1070:   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1071:   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1072:   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1073:   MatStashCreate_Private(A->comm,1,&mat->stash);

1075:   MatSetUpMultiply_MPIDense(mat);
1076:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1077:   PetscLogObjectParent(mat,a->A);
1078:   *newmat = mat;
1079:   return(0);
1080: }

1082:  #include petscsys.h

1084: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1085: {
1086:   int          *rowners,i,size,rank,m,ierr,nz,j;
1087:   PetscScalar  *array,*vals,*vals_ptr;
1088:   MPI_Status   status;

1091:   MPI_Comm_rank(comm,&rank);
1092:   MPI_Comm_size(comm,&size);

1094:   /* determine ownership of all rows */
1095:   m          = M/size + ((M % size) > rank);
1096:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1097:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1098:   rowners[0] = 0;
1099:   for (i=2; i<=size; i++) {
1100:     rowners[i] += rowners[i-1];
1101:   }

1103:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1104:   MatGetArray(*newmat,&array);

1106:   if (!rank) {
1107:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1109:     /* read in my part of the matrix numerical values  */
1110:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1111: 
1112:     /* insert into matrix-by row (this is why cannot directly read into array */
1113:     vals_ptr = vals;
1114:     for (i=0; i<m; i++) {
1115:       for (j=0; j<N; j++) {
1116:         array[i + j*m] = *vals_ptr++;
1117:       }
1118:     }

1120:     /* read in other processors and ship out */
1121:     for (i=1; i<size; i++) {
1122:       nz   = (rowners[i+1] - rowners[i])*N;
1123:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1124:       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1125:     }
1126:   } else {
1127:     /* receive numeric values */
1128:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1130:     /* receive message of values*/
1131:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);

1133:     /* insert into matrix-by row (this is why cannot directly read into array */
1134:     vals_ptr = vals;
1135:     for (i=0; i<m; i++) {
1136:       for (j=0; j<N; j++) {
1137:         array[i + j*m] = *vals_ptr++;
1138:       }
1139:     }
1140:   }
1141:   PetscFree(rowners);
1142:   PetscFree(vals);
1143:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1144:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1145:   return(0);
1146: }

1148: EXTERN_C_BEGIN
1149: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1150: {
1151:   Mat          A;
1152:   PetscScalar  *vals,*svals;
1153:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1154:   MPI_Status   status;
1155:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1156:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1157:   int          tag = ((PetscObject)viewer)->tag;
1158:   int          i,nz,ierr,j,rstart,rend,fd;

1161:   MPI_Comm_size(comm,&size);
1162:   MPI_Comm_rank(comm,&rank);
1163:   if (!rank) {
1164:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1165:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1166:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1167:   }

1169:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1170:   M = header[1]; N = header[2]; nz = header[3];

1172:   /*
1173:        Handle case where matrix is stored on disk as a dense matrix 
1174:   */
1175:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1176:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1177:     return(0);
1178:   }

1180:   /* determine ownership of all rows */
1181:   m          = M/size + ((M % size) > rank);
1182:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1183:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1184:   rowners[0] = 0;
1185:   for (i=2; i<=size; i++) {
1186:     rowners[i] += rowners[i-1];
1187:   }
1188:   rstart = rowners[rank];
1189:   rend   = rowners[rank+1];

1191:   /* distribute row lengths to all processors */
1192:   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1193:   offlens = ourlens + (rend-rstart);
1194:   if (!rank) {
1195:     PetscMalloc(M*sizeof(int),&rowlengths);
1196:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1197:     PetscMalloc(size*sizeof(int),&sndcounts);
1198:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1199:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1200:     PetscFree(sndcounts);
1201:   } else {
1202:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1203:   }

1205:   if (!rank) {
1206:     /* calculate the number of nonzeros on each processor */
1207:     PetscMalloc(size*sizeof(int),&procsnz);
1208:     PetscMemzero(procsnz,size*sizeof(int));
1209:     for (i=0; i<size; i++) {
1210:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1211:         procsnz[i] += rowlengths[j];
1212:       }
1213:     }
1214:     PetscFree(rowlengths);

1216:     /* determine max buffer needed and allocate it */
1217:     maxnz = 0;
1218:     for (i=0; i<size; i++) {
1219:       maxnz = PetscMax(maxnz,procsnz[i]);
1220:     }
1221:     PetscMalloc(maxnz*sizeof(int),&cols);

1223:     /* read in my part of the matrix column indices  */
1224:     nz = procsnz[0];
1225:     PetscMalloc(nz*sizeof(int),&mycols);
1226:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1228:     /* read in every one elses and ship off */
1229:     for (i=1; i<size; i++) {
1230:       nz   = procsnz[i];
1231:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1232:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1233:     }
1234:     PetscFree(cols);
1235:   } else {
1236:     /* determine buffer space needed for message */
1237:     nz = 0;
1238:     for (i=0; i<m; i++) {
1239:       nz += ourlens[i];
1240:     }
1241:     PetscMalloc(nz*sizeof(int),&mycols);

1243:     /* receive message of column indices*/
1244:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1245:     MPI_Get_count(&status,MPI_INT,&maxnz);
1246:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1247:   }

1249:   /* loop over local rows, determining number of off diagonal entries */
1250:   PetscMemzero(offlens,m*sizeof(int));
1251:   jj = 0;
1252:   for (i=0; i<m; i++) {
1253:     for (j=0; j<ourlens[i]; j++) {
1254:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1255:       jj++;
1256:     }
1257:   }

1259:   /* create our matrix */
1260:   for (i=0; i<m; i++) {
1261:     ourlens[i] -= offlens[i];
1262:   }
1263:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1264:   A = *newmat;
1265:   for (i=0; i<m; i++) {
1266:     ourlens[i] += offlens[i];
1267:   }

1269:   if (!rank) {
1270:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1272:     /* read in my part of the matrix numerical values  */
1273:     nz = procsnz[0];
1274:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1275: 
1276:     /* insert into matrix */
1277:     jj      = rstart;
1278:     smycols = mycols;
1279:     svals   = vals;
1280:     for (i=0; i<m; i++) {
1281:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1282:       smycols += ourlens[i];
1283:       svals   += ourlens[i];
1284:       jj++;
1285:     }

1287:     /* read in other processors and ship out */
1288:     for (i=1; i<size; i++) {
1289:       nz   = procsnz[i];
1290:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1291:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1292:     }
1293:     PetscFree(procsnz);
1294:   } else {
1295:     /* receive numeric values */
1296:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1298:     /* receive message of values*/
1299:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1300:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1301:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1303:     /* insert into matrix */
1304:     jj      = rstart;
1305:     smycols = mycols;
1306:     svals   = vals;
1307:     for (i=0; i<m; i++) {
1308:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1309:       smycols += ourlens[i];
1310:       svals   += ourlens[i];
1311:       jj++;
1312:     }
1313:   }
1314:   PetscFree(ourlens);
1315:   PetscFree(vals);
1316:   PetscFree(mycols);
1317:   PetscFree(rowners);

1319:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1320:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1321:   return(0);
1322: }
1323: EXTERN_C_END