Actual source code: mpidense.c

  1: #define PETSCMAT_DLL

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */
  6: 
 7:  #include src/mat/impls/dense/mpi/mpidense.h

 11: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 12: {
 13:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
 15:   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;

 18:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
 19:   lrow = row - rstart;
 20:   MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
 21:   return(0);
 22: }

 26: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 27: {

 31:   if (idx) {PetscFree(*idx);}
 32:   if (v) {PetscFree(*v);}
 33:   return(0);
 34: }

 39: PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 40: {
 41:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
 43:   PetscInt          m = A->m,rstart = mdn->rstart;
 44:   PetscScalar  *array;
 45:   MPI_Comm     comm;

 48:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

 50:   /* The reuse aspect is not implemented efficiently */
 51:   if (reuse) { MatDestroy(*B);}

 53:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 54:   MatGetArray(mdn->A,&array);
 55:   MatCreate(comm,B);
 56:   MatSetSizes(*B,m,m,m,m);
 57:   MatSetType(*B,mdn->A->type_name);
 58:   MatSeqDenseSetPreallocation(*B,array+m*rstart);
 59:   MatRestoreArray(mdn->A,&array);
 60:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
 61:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
 62: 
 63:   *iscopy = PETSC_TRUE;
 64:   return(0);
 65: }

 70: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
 71: {
 72:   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
 74:   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
 75:   PetscTruth   roworiented = A->roworiented;

 78:   for (i=0; i<m; i++) {
 79:     if (idxm[i] < 0) continue;
 80:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 81:     if (idxm[i] >= rstart && idxm[i] < rend) {
 82:       row = idxm[i] - rstart;
 83:       if (roworiented) {
 84:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
 85:       } else {
 86:         for (j=0; j<n; j++) {
 87:           if (idxn[j] < 0) continue;
 88:           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 89:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 90:         }
 91:       }
 92:     } else {
 93:       if (!A->donotstash) {
 94:         if (roworiented) {
 95:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 96:         } else {
 97:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 98:         }
 99:       }
100:     }
101:   }
102:   return(0);
103: }

107: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
108: {
109:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
111:   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;

114:   for (i=0; i<m; i++) {
115:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
116:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
117:     if (idxm[i] >= rstart && idxm[i] < rend) {
118:       row = idxm[i] - rstart;
119:       for (j=0; j<n; j++) {
120:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
121:         if (idxn[j] >= mat->N) {
122:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
123:         }
124:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
125:       }
126:     } else {
127:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
128:     }
129:   }
130:   return(0);
131: }

135: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
136: {
137:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;

141:   MatGetArray(a->A,array);
142:   return(0);
143: }

147: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
148: {
149:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
150:   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
152:   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
153:   PetscScalar  *av,*bv,*v = lmat->v;
154:   Mat          newmat;

157:   ISGetIndices(isrow,&irow);
158:   ISGetIndices(iscol,&icol);
159:   ISGetLocalSize(isrow,&nrows);
160:   ISGetLocalSize(iscol,&ncols);

162:   /* No parallel redistribution currently supported! Should really check each index set
163:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
164:      original matrix! */

166:   MatGetLocalSize(A,&nlrows,&nlcols);
167:   MatGetOwnershipRange(A,&rstart,&rend);
168: 
169:   /* Check submatrix call */
170:   if (scall == MAT_REUSE_MATRIX) {
171:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
172:     /* Really need to test rows and column sizes! */
173:     newmat = *B;
174:   } else {
175:     /* Create and fill new matrix */
176:     MatCreate(A->comm,&newmat);
177:     MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);
178:     MatSetType(newmat,A->type_name);
179:     MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
180:   }

182:   /* Now extract the data pointers and do the copy, column at a time */
183:   newmatd = (Mat_MPIDense*)newmat->data;
184:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
185: 
186:   for (i=0; i<ncols; i++) {
187:     av = v + nlrows*icol[i];
188:     for (j=0; j<nrows; j++) {
189:       *bv++ = av[irow[j] - rstart];
190:     }
191:   }

193:   /* Assemble the matrices so that the correct flags are set */
194:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
195:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

197:   /* Free work space */
198:   ISRestoreIndices(isrow,&irow);
199:   ISRestoreIndices(iscol,&icol);
200:   *B = newmat;
201:   return(0);
202: }

206: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
207: {
209:   return(0);
210: }

214: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
215: {
216:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
217:   MPI_Comm     comm = mat->comm;
219:   PetscInt          nstash,reallocs;
220:   InsertMode   addv;

223:   /* make sure all processors are either in INSERTMODE or ADDMODE */
224:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
225:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
226:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
227:   }
228:   mat->insertmode = addv; /* in case this processor had no cache */

230:   MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
231:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
232:   PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));
233:   return(0);
234: }

238: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
239: {
240:   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
241:   PetscErrorCode  ierr;
242:   PetscInt        i,*row,*col,flg,j,rstart,ncols;
243:   PetscMPIInt     n;
244:   PetscScalar       *val;
245:   InsertMode      addv=mat->insertmode;

248:   /*  wait on receives */
249:   while (1) {
250:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
251:     if (!flg) break;
252: 
253:     for (i=0; i<n;) {
254:       /* Now identify the consecutive vals belonging to the same row */
255:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
256:       if (j < n) ncols = j-i;
257:       else       ncols = n-i;
258:       /* Now assemble all these values with a single function call */
259:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
260:       i = j;
261:     }
262:   }
263:   MatStashScatterEnd_Private(&mat->stash);
264: 
265:   MatAssemblyBegin(mdn->A,mode);
266:   MatAssemblyEnd(mdn->A,mode);

268:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
269:     MatSetUpMultiply_MPIDense(mat);
270:   }
271:   return(0);
272: }

276: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
277: {
279:   Mat_MPIDense *l = (Mat_MPIDense*)A->data;

282:   MatZeroEntries(l->A);
283:   return(0);
284: }

286: /* the code does not do the diagonal entries correctly unless the 
287:    matrix is square and the column and row owerships are identical.
288:    This is a BUG. The only way to fix it seems to be to access 
289:    mdn->A and mdn->B directly and not through the MatZeroRows() 
290:    routine. 
291: */
294: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
295: {
296:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
298:   PetscInt       i,*owners = l->rowners;
299:   PetscInt       *nprocs,j,idx,nsends;
300:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
301:   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
302:   PetscInt       *lens,*lrows,*values;
303:   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
304:   MPI_Comm       comm = A->comm;
305:   MPI_Request    *send_waits,*recv_waits;
306:   MPI_Status     recv_status,*send_status;
307:   PetscTruth     found;

310:   /*  first count number of contributors to each processor */
311:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
312:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
313:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
314:   for (i=0; i<N; i++) {
315:     idx = rows[i];
316:     found = PETSC_FALSE;
317:     for (j=0; j<size; j++) {
318:       if (idx >= owners[j] && idx < owners[j+1]) {
319:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
320:       }
321:     }
322:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
323:   }
324:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

326:   /* inform other processors of number of messages and max length*/
327:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

329:   /* post receives:   */
330:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
331:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
332:   for (i=0; i<nrecvs; i++) {
333:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
334:   }

336:   /* do sends:
337:       1) starts[i] gives the starting index in svalues for stuff going to 
338:          the ith processor
339:   */
340:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
341:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
342:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
343:   starts[0]  = 0;
344:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
345:   for (i=0; i<N; i++) {
346:     svalues[starts[owner[i]]++] = rows[i];
347:   }

349:   starts[0] = 0;
350:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
351:   count = 0;
352:   for (i=0; i<size; i++) {
353:     if (nprocs[2*i+1]) {
354:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
355:     }
356:   }
357:   PetscFree(starts);

359:   base = owners[rank];

361:   /*  wait on receives */
362:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
363:   source = lens + nrecvs;
364:   count  = nrecvs; slen = 0;
365:   while (count) {
366:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
367:     /* unpack receives into our local space */
368:     MPI_Get_count(&recv_status,MPIU_INT,&n);
369:     source[imdex]  = recv_status.MPI_SOURCE;
370:     lens[imdex]    = n;
371:     slen += n;
372:     count--;
373:   }
374:   PetscFree(recv_waits);
375: 
376:   /* move the data into the send scatter */
377:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
378:   count = 0;
379:   for (i=0; i<nrecvs; i++) {
380:     values = rvalues + i*nmax;
381:     for (j=0; j<lens[i]; j++) {
382:       lrows[count++] = values[j] - base;
383:     }
384:   }
385:   PetscFree(rvalues);
386:   PetscFree(lens);
387:   PetscFree(owner);
388:   PetscFree(nprocs);
389: 
390:   /* actually zap the local rows */
391:   MatZeroRows(l->A,slen,lrows,diag);
392:   PetscFree(lrows);

394:   /* wait on sends */
395:   if (nsends) {
396:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
397:     MPI_Waitall(nsends,send_waits,send_status);
398:     PetscFree(send_status);
399:   }
400:   PetscFree(send_waits);
401:   PetscFree(svalues);

403:   return(0);
404: }

408: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
409: {
410:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;

414:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
415:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
416:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
417:   return(0);
418: }

422: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
423: {
424:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;

428:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
429:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
430:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
431:   return(0);
432: }

436: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
437: {
438:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
440:   PetscScalar  zero = 0.0;

443:   VecSet(yy,zero);
444:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
445:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
446:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
447:   return(0);
448: }

452: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
453: {
454:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;

458:   VecCopy(yy,zz);
459:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
460:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
461:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
462:   return(0);
463: }

467: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
468: {
469:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
470:   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
472:   PetscInt          len,i,n,m = A->m,radd;
473:   PetscScalar  *x,zero = 0.0;
474: 
476:   VecSet(v,zero);
477:   VecGetArray(v,&x);
478:   VecGetSize(v,&n);
479:   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
480:   len  = PetscMin(a->A->m,a->A->n);
481:   radd = a->rstart*m;
482:   for (i=0; i<len; i++) {
483:     x[i] = aloc->v[radd + i*m + i];
484:   }
485:   VecRestoreArray(v,&x);
486:   return(0);
487: }

491: PetscErrorCode MatDestroy_MPIDense(Mat mat)
492: {
493:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;


498: #if defined(PETSC_USE_LOG)
499:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
500: #endif
501:   MatStashDestroy_Private(&mat->stash);
502:   PetscFree(mdn->rowners);
503:   MatDestroy(mdn->A);
504:   if (mdn->lvec)   VecDestroy(mdn->lvec);
505:   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
506:   if (mdn->factor) {
507:     if (mdn->factor->temp)   {PetscFree(mdn->factor->temp);}
508:     if (mdn->factor->tag)    {PetscFree(mdn->factor->tag);}
509:     if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
510:     PetscFree(mdn->factor);
511:   }
512:   PetscFree(mdn);
513:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
514:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
515:   return(0);
516: }

520: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
521: {
522:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;

526:   if (mdn->size == 1) {
527:     MatView(mdn->A,viewer);
528:   }
529:   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
530:   return(0);
531: }

535: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
536: {
537:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
538:   PetscErrorCode    ierr;
539:   PetscMPIInt       size = mdn->size,rank = mdn->rank;
540:   PetscViewerType   vtype;
541:   PetscTruth        iascii,isdraw;
542:   PetscViewer       sviewer;
543:   PetscViewerFormat format;

546:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
547:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
548:   if (iascii) {
549:     PetscViewerGetType(viewer,&vtype);
550:     PetscViewerGetFormat(viewer,&format);
551:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
552:       MatInfo info;
553:       MatGetInfo(mat,MAT_LOCAL,&info);
554:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
555:                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
556:       PetscViewerFlush(viewer);
557:       VecScatterView(mdn->Mvctx,viewer);
558:       return(0);
559:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
560:       return(0);
561:     }
562:   } else if (isdraw) {
563:     PetscDraw       draw;
564:     PetscTruth isnull;

566:     PetscViewerDrawGetDraw(viewer,0,&draw);
567:     PetscDrawIsNull(draw,&isnull);
568:     if (isnull) return(0);
569:   }

571:   if (size == 1) {
572:     MatView(mdn->A,viewer);
573:   } else {
574:     /* assemble the entire matrix onto first processor. */
575:     Mat         A;
576:     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
577:     PetscInt    *cols;
578:     PetscScalar   *vals;

580:     MatCreate(mat->comm,&A);
581:     if (!rank) {
582:       MatSetSizes(A,M,N,M,N);
583:     } else {
584:       MatSetSizes(A,0,0,M,N);
585:     }
586:     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
587:     MatSetType(A,MATMPIDENSE);
588:     MatMPIDenseSetPreallocation(A,PETSC_NULL);
589:     PetscLogObjectParent(mat,A);

591:     /* Copy the matrix ... This isn't the most efficient means,
592:        but it's quick for now */
593:     A->insertmode = INSERT_VALUES;
594:     row = mdn->rstart; m = mdn->A->m;
595:     for (i=0; i<m; i++) {
596:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
597:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
598:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
599:       row++;
600:     }

602:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
603:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
604:     PetscViewerGetSingleton(viewer,&sviewer);
605:     if (!rank) {
606:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
607:     }
608:     PetscViewerRestoreSingleton(viewer,&sviewer);
609:     PetscViewerFlush(viewer);
610:     MatDestroy(A);
611:   }
612:   return(0);
613: }

617: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
618: {
620:   PetscTruth iascii,isbinary,isdraw,issocket;
621: 
623: 
624:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
625:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
626:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
627:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

629:   if (iascii || issocket || isdraw) {
630:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
631:   } else if (isbinary) {
632:     MatView_MPIDense_Binary(mat,viewer);
633:   } else {
634:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
635:   }
636:   return(0);
637: }

641: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
642: {
643:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
644:   Mat          mdn = mat->A;
646:   PetscReal    isend[5],irecv[5];

649:   info->rows_global    = (double)A->M;
650:   info->columns_global = (double)A->N;
651:   info->rows_local     = (double)A->m;
652:   info->columns_local  = (double)A->N;
653:   info->block_size     = 1.0;
654:   MatGetInfo(mdn,MAT_LOCAL,info);
655:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
656:   isend[3] = info->memory;  isend[4] = info->mallocs;
657:   if (flag == MAT_LOCAL) {
658:     info->nz_used      = isend[0];
659:     info->nz_allocated = isend[1];
660:     info->nz_unneeded  = isend[2];
661:     info->memory       = isend[3];
662:     info->mallocs      = isend[4];
663:   } else if (flag == MAT_GLOBAL_MAX) {
664:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
665:     info->nz_used      = irecv[0];
666:     info->nz_allocated = irecv[1];
667:     info->nz_unneeded  = irecv[2];
668:     info->memory       = irecv[3];
669:     info->mallocs      = irecv[4];
670:   } else if (flag == MAT_GLOBAL_SUM) {
671:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
672:     info->nz_used      = irecv[0];
673:     info->nz_allocated = irecv[1];
674:     info->nz_unneeded  = irecv[2];
675:     info->memory       = irecv[3];
676:     info->mallocs      = irecv[4];
677:   }
678:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
679:   info->fill_ratio_needed = 0;
680:   info->factor_mallocs    = 0;
681:   return(0);
682: }

686: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
687: {
688:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;

692:   switch (op) {
693:   case MAT_NO_NEW_NONZERO_LOCATIONS:
694:   case MAT_YES_NEW_NONZERO_LOCATIONS:
695:   case MAT_NEW_NONZERO_LOCATION_ERR:
696:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
697:   case MAT_COLUMNS_SORTED:
698:   case MAT_COLUMNS_UNSORTED:
699:     MatSetOption(a->A,op);
700:     break;
701:   case MAT_ROW_ORIENTED:
702:     a->roworiented = PETSC_TRUE;
703:     MatSetOption(a->A,op);
704:     break;
705:   case MAT_ROWS_SORTED:
706:   case MAT_ROWS_UNSORTED:
707:   case MAT_YES_NEW_DIAGONALS:
708:   case MAT_USE_HASH_TABLE:
709:     PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));
710:     break;
711:   case MAT_COLUMN_ORIENTED:
712:     a->roworiented = PETSC_FALSE;
713:     MatSetOption(a->A,op);
714:     break;
715:   case MAT_IGNORE_OFF_PROC_ENTRIES:
716:     a->donotstash = PETSC_TRUE;
717:     break;
718:   case MAT_NO_NEW_DIAGONALS:
719:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
720:   case MAT_SYMMETRIC:
721:   case MAT_STRUCTURALLY_SYMMETRIC:
722:   case MAT_NOT_SYMMETRIC:
723:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
724:   case MAT_HERMITIAN:
725:   case MAT_NOT_HERMITIAN:
726:   case MAT_SYMMETRY_ETERNAL:
727:   case MAT_NOT_SYMMETRY_ETERNAL:
728:     break;
729:   default:
730:     SETERRQ(PETSC_ERR_SUP,"unknown option");
731:   }
732:   return(0);
733: }


738: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
739: {
740:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
741:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
742:   PetscScalar  *l,*r,x,*v;
744:   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;

747:   MatGetLocalSize(A,&s2,&s3);
748:   if (ll) {
749:     VecGetLocalSize(ll,&s2a);
750:     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
751:     VecGetArray(ll,&l);
752:     for (i=0; i<m; i++) {
753:       x = l[i];
754:       v = mat->v + i;
755:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
756:     }
757:     VecRestoreArray(ll,&l);
758:     PetscLogFlops(n*m);
759:   }
760:   if (rr) {
761:     VecGetSize(rr,&s3a);
762:     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
763:     VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
764:     VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
765:     VecGetArray(mdn->lvec,&r);
766:     for (i=0; i<n; i++) {
767:       x = r[i];
768:       v = mat->v + i*m;
769:       for (j=0; j<m; j++) { (*v++) *= x;}
770:     }
771:     VecRestoreArray(mdn->lvec,&r);
772:     PetscLogFlops(n*m);
773:   }
774:   return(0);
775: }

779: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
780: {
781:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
782:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
784:   PetscInt          i,j;
785:   PetscReal    sum = 0.0;
786:   PetscScalar  *v = mat->v;

789:   if (mdn->size == 1) {
790:      MatNorm(mdn->A,type,nrm);
791:   } else {
792:     if (type == NORM_FROBENIUS) {
793:       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
794: #if defined(PETSC_USE_COMPLEX)
795:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
796: #else
797:         sum += (*v)*(*v); v++;
798: #endif
799:       }
800:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
801:       *nrm = sqrt(*nrm);
802:       PetscLogFlops(2*mdn->A->n*mdn->A->m);
803:     } else if (type == NORM_1) {
804:       PetscReal *tmp,*tmp2;
805:       PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
806:       tmp2 = tmp + A->N;
807:       PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
808:       *nrm = 0.0;
809:       v = mat->v;
810:       for (j=0; j<mdn->A->n; j++) {
811:         for (i=0; i<mdn->A->m; i++) {
812:           tmp[j] += PetscAbsScalar(*v);  v++;
813:         }
814:       }
815:       MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
816:       for (j=0; j<A->N; j++) {
817:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
818:       }
819:       PetscFree(tmp);
820:       PetscLogFlops(A->n*A->m);
821:     } else if (type == NORM_INFINITY) { /* max row norm */
822:       PetscReal ntemp;
823:       MatNorm(mdn->A,type,&ntemp);
824:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
825:     } else {
826:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
827:     }
828:   }
829:   return(0);
830: }

834: PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
835: {
836:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
837:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
838:   Mat            B;
839:   PetscInt       M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
841:   PetscInt       j,i;
842:   PetscScalar      *v;

845:   if (!matout && M != N) {
846:     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
847:   }
848:   MatCreate(A->comm,&B);
849:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);
850:   MatSetType(B,A->type_name);
851:   MatMPIDenseSetPreallocation(B,PETSC_NULL);

853:   m = a->A->m; n = a->A->n; v = Aloc->v;
854:   PetscMalloc(n*sizeof(PetscInt),&rwork);
855:   for (j=0; j<n; j++) {
856:     for (i=0; i<m; i++) rwork[i] = rstart + i;
857:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
858:     v   += m;
859:   }
860:   PetscFree(rwork);
861:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
862:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
863:   if (matout) {
864:     *matout = B;
865:   } else {
866:     MatHeaderCopy(A,B);
867:   }
868:   return(0);
869: }

871:  #include petscblaslapack.h
874: PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
875: {
876:   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
877:   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
878:   PetscScalar  oalpha = alpha;
879:   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;

883:   BLASscal_(&nz,&oalpha,a->v,&one);
884:   PetscLogFlops(nz);
885:   return(0);
886: }

888: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);

892: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
893: {

897:    MatMPIDenseSetPreallocation(A,0);
898:   return(0);
899: }

901: /* -------------------------------------------------------------------*/
902: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
903:        MatGetRow_MPIDense,
904:        MatRestoreRow_MPIDense,
905:        MatMult_MPIDense,
906: /* 4*/ MatMultAdd_MPIDense,
907:        MatMultTranspose_MPIDense,
908:        MatMultTransposeAdd_MPIDense,
909:        0,
910:        0,
911:        0,
912: /*10*/ 0,
913:        0,
914:        0,
915:        0,
916:        MatTranspose_MPIDense,
917: /*15*/ MatGetInfo_MPIDense,
918:        0,
919:        MatGetDiagonal_MPIDense,
920:        MatDiagonalScale_MPIDense,
921:        MatNorm_MPIDense,
922: /*20*/ MatAssemblyBegin_MPIDense,
923:        MatAssemblyEnd_MPIDense,
924:        0,
925:        MatSetOption_MPIDense,
926:        MatZeroEntries_MPIDense,
927: /*25*/ MatZeroRows_MPIDense,
928:        0,
929:        0,
930:        0,
931:        0,
932: /*30*/ MatSetUpPreallocation_MPIDense,
933:        0,
934:        0,
935:        MatGetArray_MPIDense,
936:        MatRestoreArray_MPIDense,
937: /*35*/ MatDuplicate_MPIDense,
938:        0,
939:        0,
940:        0,
941:        0,
942: /*40*/ 0,
943:        MatGetSubMatrices_MPIDense,
944:        0,
945:        MatGetValues_MPIDense,
946:        0,
947: /*45*/ 0,
948:        MatScale_MPIDense,
949:        0,
950:        0,
951:        0,
952: /*50*/ 0,
953:        0,
954:        0,
955:        0,
956:        0,
957: /*55*/ 0,
958:        0,
959:        0,
960:        0,
961:        0,
962: /*60*/ MatGetSubMatrix_MPIDense,
963:        MatDestroy_MPIDense,
964:        MatView_MPIDense,
965:        MatGetPetscMaps_Petsc,
966:        0,
967: /*65*/ 0,
968:        0,
969:        0,
970:        0,
971:        0,
972: /*70*/ 0,
973:        0,
974:        0,
975:        0,
976:        0,
977: /*75*/ 0,
978:        0,
979:        0,
980:        0,
981:        0,
982: /*80*/ 0,
983:        0,
984:        0,
985:        0,
986: /*84*/ MatLoad_MPIDense,
987:        0,
988:        0,
989:        0,
990:        0,
991:        0,
992: /*90*/ 0,
993:        0,
994:        0,
995:        0,
996:        0,
997: /*95*/ 0,
998:        0,
999:        0,
1000:        0};

1005: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1006: {
1007:   Mat_MPIDense *a;

1011:   mat->preallocated = PETSC_TRUE;
1012:   /* Note:  For now, when data is specified above, this assumes the user correctly
1013:    allocates the local dense storage space.  We should add error checking. */

1015:   a    = (Mat_MPIDense*)mat->data;
1016:   MatCreate(PETSC_COMM_SELF,&a->A);
1017:   MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);
1018:   MatSetType(a->A,MATSEQDENSE);
1019:   MatSeqDenseSetPreallocation(a->A,data);
1020:   PetscLogObjectParent(mat,a->A);
1021:   return(0);
1022: }

1025: /*MC
1026:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1028:    Options Database Keys:
1029: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()

1031:   Level: beginner

1033: .seealso: MatCreateMPIDense
1034: M*/

1039: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1040: {
1041:   Mat_MPIDense *a;
1043:   PetscInt          i;

1046:   PetscNew(Mat_MPIDense,&a);
1047:   mat->data         = (void*)a;
1048:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1049:   mat->factor       = 0;
1050:   mat->mapping      = 0;

1052:   a->factor       = 0;
1053:   mat->insertmode = NOT_SET_VALUES;
1054:   MPI_Comm_rank(mat->comm,&a->rank);
1055:   MPI_Comm_size(mat->comm,&a->size);

1057:   PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
1058:   PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
1059:   a->nvec = mat->n;

1061:   /* the information in the maps duplicates the information computed below, eventually 
1062:      we should remove the duplicate information that is not contained in the maps */
1063:   PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
1064:   PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);

1066:   /* build local table of row and column ownerships */
1067:   PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);
1068:   a->cowners = a->rowners + a->size + 1;
1069:   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1070:   MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);
1071:   a->rowners[0] = 0;
1072:   for (i=2; i<=a->size; i++) {
1073:     a->rowners[i] += a->rowners[i-1];
1074:   }
1075:   a->rstart = a->rowners[a->rank];
1076:   a->rend   = a->rowners[a->rank+1];
1077:   MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);
1078:   a->cowners[0] = 0;
1079:   for (i=2; i<=a->size; i++) {
1080:     a->cowners[i] += a->cowners[i-1];
1081:   }

1083:   /* build cache for off array entries formed */
1084:   a->donotstash = PETSC_FALSE;
1085:   MatStashCreate_Private(mat->comm,1,&mat->stash);

1087:   /* stuff used for matrix vector multiply */
1088:   a->lvec        = 0;
1089:   a->Mvctx       = 0;
1090:   a->roworiented = PETSC_TRUE;

1092:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1093:                                      "MatGetDiagonalBlock_MPIDense",
1094:                                      MatGetDiagonalBlock_MPIDense);
1095:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1096:                                      "MatMPIDenseSetPreallocation_MPIDense",
1097:                                      MatMPIDenseSetPreallocation_MPIDense);
1098:   return(0);
1099: }

1102: /*MC
1103:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1105:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1106:    and MATMPIDENSE otherwise.

1108:    Options Database Keys:
1109: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1111:   Level: beginner

1113: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1114: M*/

1119: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1120: {
1122:   PetscMPIInt    size;

1125:   PetscObjectChangeTypeName((PetscObject)A,MATDENSE);
1126:   MPI_Comm_size(A->comm,&size);
1127:   if (size == 1) {
1128:     MatSetType(A,MATSEQDENSE);
1129:   } else {
1130:     MatSetType(A,MATMPIDENSE);
1131:   }
1132:   return(0);
1133: }

1138: /*@C
1139:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1141:    Not collective

1143:    Input Parameters:
1144: .  A - the matrix
1145: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1146:    to control all matrix memory allocation.

1148:    Notes:
1149:    The dense format is fully compatible with standard Fortran 77
1150:    storage by columns.

1152:    The data input variable is intended primarily for Fortran programmers
1153:    who wish to allocate their own matrix memory space.  Most users should
1154:    set data=PETSC_NULL.

1156:    Level: intermediate

1158: .keywords: matrix,dense, parallel

1160: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1161: @*/
1162: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1163: {
1164:   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);

1167:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1168:   if (f) {
1169:     (*f)(mat,data);
1170:   }
1171:   return(0);
1172: }

1176: /*@C
1177:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1179:    Collective on MPI_Comm

1181:    Input Parameters:
1182: +  comm - MPI communicator
1183: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1184: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1185: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1186: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1187: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1188:    to control all matrix memory allocation.

1190:    Output Parameter:
1191: .  A - the matrix

1193:    Notes:
1194:    The dense format is fully compatible with standard Fortran 77
1195:    storage by columns.

1197:    The data input variable is intended primarily for Fortran programmers
1198:    who wish to allocate their own matrix memory space.  Most users should
1199:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1201:    The user MUST specify either the local or global matrix dimensions
1202:    (possibly both).

1204:    Level: intermediate

1206: .keywords: matrix,dense, parallel

1208: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1209: @*/
1210: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1211: {
1213:   PetscMPIInt    size;

1216:   MatCreate(comm,A);
1217:   MatSetSizes(*A,m,n,M,N);
1218:   MPI_Comm_size(comm,&size);
1219:   if (size > 1) {
1220:     MatSetType(*A,MATMPIDENSE);
1221:     MatMPIDenseSetPreallocation(*A,data);
1222:   } else {
1223:     MatSetType(*A,MATSEQDENSE);
1224:     MatSeqDenseSetPreallocation(*A,data);
1225:   }
1226:   return(0);
1227: }

1231: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1232: {
1233:   Mat          mat;
1234:   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;

1238:   *newmat       = 0;
1239:   MatCreate(A->comm,&mat);
1240:   MatSetSizes(mat,A->m,A->n,A->M,A->N);
1241:   MatSetType(mat,A->type_name);
1242:   PetscNew(Mat_MPIDense,&a);
1243:   mat->data         = (void*)a;
1244:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1245:   mat->factor       = A->factor;
1246:   mat->assembled    = PETSC_TRUE;
1247:   mat->preallocated = PETSC_TRUE;

1249:   a->rstart       = oldmat->rstart;
1250:   a->rend         = oldmat->rend;
1251:   a->size         = oldmat->size;
1252:   a->rank         = oldmat->rank;
1253:   mat->insertmode = NOT_SET_VALUES;
1254:   a->nvec         = oldmat->nvec;
1255:   a->donotstash   = oldmat->donotstash;
1256:   PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);
1257:   PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1258:   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));
1259:   MatStashCreate_Private(A->comm,1,&mat->stash);

1261:   MatSetUpMultiply_MPIDense(mat);
1262:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1263:   PetscLogObjectParent(mat,a->A);
1264:   *newmat = mat;
1265:   return(0);
1266: }

1268:  #include petscsys.h

1272: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1273: {
1275:   PetscMPIInt    rank,size;
1276:   PetscInt            *rowners,i,m,nz,j;
1277:   PetscScalar    *array,*vals,*vals_ptr;
1278:   MPI_Status     status;

1281:   MPI_Comm_rank(comm,&rank);
1282:   MPI_Comm_size(comm,&size);

1284:   /* determine ownership of all rows */
1285:   m          = M/size + ((M % size) > rank);
1286:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1287:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1288:   rowners[0] = 0;
1289:   for (i=2; i<=size; i++) {
1290:     rowners[i] += rowners[i-1];
1291:   }

1293:   MatCreate(comm,newmat);
1294:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1295:   MatSetType(*newmat,type);
1296:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1297:   MatGetArray(*newmat,&array);

1299:   if (!rank) {
1300:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1302:     /* read in my part of the matrix numerical values  */
1303:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1304: 
1305:     /* insert into matrix-by row (this is why cannot directly read into array */
1306:     vals_ptr = vals;
1307:     for (i=0; i<m; i++) {
1308:       for (j=0; j<N; j++) {
1309:         array[i + j*m] = *vals_ptr++;
1310:       }
1311:     }

1313:     /* read in other processors and ship out */
1314:     for (i=1; i<size; i++) {
1315:       nz   = (rowners[i+1] - rowners[i])*N;
1316:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1317:       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1318:     }
1319:   } else {
1320:     /* receive numeric values */
1321:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1323:     /* receive message of values*/
1324:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);

1326:     /* insert into matrix-by row (this is why cannot directly read into array */
1327:     vals_ptr = vals;
1328:     for (i=0; i<m; i++) {
1329:       for (j=0; j<N; j++) {
1330:         array[i + j*m] = *vals_ptr++;
1331:       }
1332:     }
1333:   }
1334:   PetscFree(rowners);
1335:   PetscFree(vals);
1336:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1337:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1338:   return(0);
1339: }

1343: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1344: {
1345:   Mat            A;
1346:   PetscScalar    *vals,*svals;
1347:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1348:   MPI_Status     status;
1349:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1350:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1351:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1352:   PetscInt       i,nz,j,rstart,rend;
1353:   int            fd;

1357:   MPI_Comm_size(comm,&size);
1358:   MPI_Comm_rank(comm,&rank);
1359:   if (!rank) {
1360:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1361:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1362:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1363:   }

1365:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1366:   M = header[1]; N = header[2]; nz = header[3];

1368:   /*
1369:        Handle case where matrix is stored on disk as a dense matrix 
1370:   */
1371:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1372:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1373:     return(0);
1374:   }

1376:   /* determine ownership of all rows */
1377:   m          = M/size + ((M % size) > rank);
1378:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1379:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1380:   rowners[0] = 0;
1381:   for (i=2; i<=size; i++) {
1382:     rowners[i] += rowners[i-1];
1383:   }
1384:   rstart = rowners[rank];
1385:   rend   = rowners[rank+1];

1387:   /* distribute row lengths to all processors */
1388:   PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);
1389:   offlens = ourlens + (rend-rstart);
1390:   if (!rank) {
1391:     PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1392:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1393:     PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1394:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1395:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1396:     PetscFree(sndcounts);
1397:   } else {
1398:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1399:   }

1401:   if (!rank) {
1402:     /* calculate the number of nonzeros on each processor */
1403:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
1404:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1405:     for (i=0; i<size; i++) {
1406:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1407:         procsnz[i] += rowlengths[j];
1408:       }
1409:     }
1410:     PetscFree(rowlengths);

1412:     /* determine max buffer needed and allocate it */
1413:     maxnz = 0;
1414:     for (i=0; i<size; i++) {
1415:       maxnz = PetscMax(maxnz,procsnz[i]);
1416:     }
1417:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

1419:     /* read in my part of the matrix column indices  */
1420:     nz = procsnz[0];
1421:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
1422:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1424:     /* read in every one elses and ship off */
1425:     for (i=1; i<size; i++) {
1426:       nz   = procsnz[i];
1427:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1428:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1429:     }
1430:     PetscFree(cols);
1431:   } else {
1432:     /* determine buffer space needed for message */
1433:     nz = 0;
1434:     for (i=0; i<m; i++) {
1435:       nz += ourlens[i];
1436:     }
1437:     PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);

1439:     /* receive message of column indices*/
1440:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1441:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1442:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1443:   }

1445:   /* loop over local rows, determining number of off diagonal entries */
1446:   PetscMemzero(offlens,m*sizeof(PetscInt));
1447:   jj = 0;
1448:   for (i=0; i<m; i++) {
1449:     for (j=0; j<ourlens[i]; j++) {
1450:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1451:       jj++;
1452:     }
1453:   }

1455:   /* create our matrix */
1456:   for (i=0; i<m; i++) {
1457:     ourlens[i] -= offlens[i];
1458:   }
1459:   MatCreate(comm,newmat);
1460:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1461:   MatSetType(*newmat,type);
1462:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1463:   A = *newmat;
1464:   for (i=0; i<m; i++) {
1465:     ourlens[i] += offlens[i];
1466:   }

1468:   if (!rank) {
1469:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1471:     /* read in my part of the matrix numerical values  */
1472:     nz = procsnz[0];
1473:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1474: 
1475:     /* insert into matrix */
1476:     jj      = rstart;
1477:     smycols = mycols;
1478:     svals   = vals;
1479:     for (i=0; i<m; i++) {
1480:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1481:       smycols += ourlens[i];
1482:       svals   += ourlens[i];
1483:       jj++;
1484:     }

1486:     /* read in other processors and ship out */
1487:     for (i=1; i<size; i++) {
1488:       nz   = procsnz[i];
1489:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1490:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1491:     }
1492:     PetscFree(procsnz);
1493:   } else {
1494:     /* receive numeric values */
1495:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

1497:     /* receive message of values*/
1498:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1499:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1500:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1502:     /* insert into matrix */
1503:     jj      = rstart;
1504:     smycols = mycols;
1505:     svals   = vals;
1506:     for (i=0; i<m; i++) {
1507:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1508:       smycols += ourlens[i];
1509:       svals   += ourlens[i];
1510:       jj++;
1511:     }
1512:   }
1513:   PetscFree(ourlens);
1514:   PetscFree(vals);
1515:   PetscFree(mycols);
1516:   PetscFree(rowners);

1518:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1519:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1520:   return(0);
1521: }