Actual source code: mpibdiag.c

  1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
  2: /*
  3:    The basic matrix operations for the Block diagonal parallel 
  4:   matrices.
  5: */
 6:  #include src/mat/impls/bdiag/mpi/mpibdiag.h
 7:  #include src/vec/vecimpl.h

  9: int MatSetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
 10: {
 11:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 12:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
 13:   PetscTruth   roworiented = mbd->roworiented;

 16:   for (i=0; i<m; i++) {
 17:     if (idxm[i] < 0) continue;
 18:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 19:     if (idxm[i] >= rstart && idxm[i] < rend) {
 20:       row = idxm[i] - rstart;
 21:       for (j=0; j<n; j++) {
 22:         if (idxn[j] < 0) continue;
 23:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 24:         if (roworiented) {
 25:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
 26:         } else {
 27:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 28:         }
 29:       }
 30:     } else {
 31:       if (!mbd->donotstash) {
 32:         if (roworiented) {
 33:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 34:         } else {
 35:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 36:         }
 37:       }
 38:     }
 39:   }
 40:   return(0);
 41: }

 43: int MatGetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
 44: {
 45:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 46:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;

 49:   for (i=0; i<m; i++) {
 50:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 51:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 52:     if (idxm[i] >= rstart && idxm[i] < rend) {
 53:       row = idxm[i] - rstart;
 54:       for (j=0; j<n; j++) {
 55:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 56:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 57:         MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
 58:       }
 59:     } else {
 60:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 61:     }
 62:   }
 63:   return(0);
 64: }

 66: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
 67: {
 68:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 69:   MPI_Comm     comm = mat->comm;
 70:   int          ierr,nstash,reallocs;
 71:   InsertMode   addv;

 74:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
 75:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
 76:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
 77:   }
 78:   mat->insertmode = addv; /* in case this processor had no cache */
 79:   MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
 80:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
 81:   PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
 82:   return(0);
 83: }
 84: EXTERN int MatSetUpMultiply_MPIBDiag(Mat);

 86: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
 87: {
 88:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 89:   Mat_SeqBDiag *mlocal;
 90:   int          i,n,*row,*col;
 91:   int          *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
 92:   PetscScalar  *val;
 93:   InsertMode   addv = mat->insertmode;


 97:   while (1) {
 98:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
 99:     if (!flg) break;
100: 
101:     for (i=0; i<n;) {
102:       /* Now identify the consecutive vals belonging to the same row */
103:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
104:       if (j < n) ncols = j-i;
105:       else       ncols = n-i;
106:       /* Now assemble all these values with a single function call */
107:       MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
108:       i = j;
109:     }
110:   }
111:   MatStashScatterEnd_Private(&mat->stash);

113:   MatAssemblyBegin(mbd->A,mode);
114:   MatAssemblyEnd(mbd->A,mode);

116:   /* Fix main diagonal location and determine global diagonals */
117:   mlocal         = (Mat_SeqBDiag*)mbd->A->data;
118:   Mblock         = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
119:   len            = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
120:   ierr           = PetscMalloc(2*len*sizeof(int),&tmp1);
121:   tmp2           = tmp1 + len;
122:   ierr           = PetscMemzero(tmp1,2*len*sizeof(int));
123:   mlocal->mainbd = -1;
124:   for (i=0; i<mlocal->nd; i++) {
125:     if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
126:     tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
127:   }
128:   MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
129:   ict  = 0;
130:   for (i=0; i<len; i++) {
131:     if (tmp2[i]) {
132:       mbd->gdiag[ict] = i - Mblock;
133:       ict++;
134:     }
135:   }
136:   mbd->gnd = ict;
137:   PetscFree(tmp1);

139:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
140:     MatSetUpMultiply_MPIBDiag(mat);
141:   }
142:   return(0);
143: }

145: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
146: {
147:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
148:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;

151:   *bs = dmat->bs;
152:   return(0);
153: }

155: int MatZeroEntries_MPIBDiag(Mat A)
156: {
157:   Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
158:   int          ierr;

161:   MatZeroEntries(l->A);
162:   return(0);
163: }

165: /* again this uses the same basic stratagy as in the assembly and 
166:    scatter create routines, we should try to do it systematically 
167:    if we can figure out the proper level of generality. */

169: /* the code does not do the diagonal entries correctly unless the 
170:    matrix is square and the column and row owerships are identical.
171:    This is a BUG. The only way to fix it seems to be to access 
172:    aij->A and aij->B directly and not through the MatZeroRows() 
173:    routine. 
174: */

176: int MatZeroRows_MPIBDiag(Mat A,IS is,PetscScalar *diag)
177: {
178:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;
179:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
180:   int            *procs,*nprocs,j,idx,nsends,*work;
181:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
182:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
183:   int            *lens,imdex,*lrows,*values;
184:   MPI_Comm       comm = A->comm;
185:   MPI_Request    *send_waits,*recv_waits;
186:   MPI_Status     recv_status,*send_status;
187:   IS             istmp;
188:   PetscTruth     found;

191:   ISGetLocalSize(is,&N);
192:   ISGetIndices(is,&rows);

194:   /*  first count number of contributors to each processor */
195:   ierr   = PetscMalloc(2*size*sizeof(int),&nprocs);
196:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
197:   procs  = nprocs + size;
198:   ierr   = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
199:   for (i=0; i<N; i++) {
200:     idx = rows[i];
201:     found = PETSC_FALSE;
202:     for (j=0; j<size; j++) {
203:       if (idx >= owners[j] && idx < owners[j+1]) {
204:         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
205:       }
206:     }
207:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
208:   }
209:   nsends = 0;  for (i=0; i<size; i++) {nsends += procs[i];}

211:   /* inform other processors of number of messages and max length*/
212:   ierr   = PetscMalloc(2*size*sizeof(int),&work);
213:   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
214:   nmax   = work[rank];
215:   nrecvs = work[size+rank];
216:   ierr   = PetscFree(work);

218:   /* post receives:   */
219:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
220:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
221:   for (i=0; i<nrecvs; i++) {
222:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
223:   }

225:   /* do sends:
226:       1) starts[i] gives the starting index in svalues for stuff going to 
227:          the ith processor
228:   */
229:   PetscMalloc((N+1)*sizeof(int),&svalues);
230:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
231:   PetscMalloc((size+1)*sizeof(int),&starts);
232:   starts[0] = 0;
233:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
234:   for (i=0; i<N; i++) {
235:     svalues[starts[owner[i]]++] = rows[i];
236:   }
237:   ISRestoreIndices(is,&rows);

239:   starts[0] = 0;
240:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
241:   count = 0;
242:   for (i=0; i<size; i++) {
243:     if (procs[i]) {
244:       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
245:     }
246:   }
247:   PetscFree(starts);

249:   base = owners[rank];

251:   /*  wait on receives */
252:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
253:   source = lens + nrecvs;
254:   count  = nrecvs;
255:   slen   = 0;
256:   while (count) {
257:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
258:     /* unpack receives into our local space */
259:     MPI_Get_count(&recv_status,MPI_INT,&n);
260:     source[imdex]  = recv_status.MPI_SOURCE;
261:     lens[imdex]  = n;
262:     slen += n;
263:     count--;
264:   }
265:   PetscFree(recv_waits);
266: 
267:   /* move the data into the send scatter */
268:   ierr  = PetscMalloc((slen+1)*sizeof(int),&lrows);
269:   count = 0;
270:   for (i=0; i<nrecvs; i++) {
271:     values = rvalues + i*nmax;
272:     for (j=0; j<lens[i]; j++) {
273:       lrows[count++] = values[j] - base;
274:     }
275:   }
276:   PetscFree(rvalues);
277:   PetscFree(lens);
278:   PetscFree(owner);
279:   PetscFree(nprocs);
280: 
281:   /* actually zap the local rows */
282:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
283:   PetscLogObjectParent(A,istmp);
284:   PetscFree(lrows);
285:   MatZeroRows(l->A,istmp,diag);
286:   ISDestroy(istmp);

288:   /* wait on sends */
289:   if (nsends) {
290:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
291:     MPI_Waitall(nsends,send_waits,send_status);
292:     PetscFree(send_status);
293:   }
294:   PetscFree(send_waits);
295:   PetscFree(svalues);

297:   return(0);
298: }

300: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
301: {
302:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
303:   int          ierr;

306:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
307:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
308:   (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
309:   return(0);
310: }

312: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
313: {
314:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
315:   int          ierr;

318:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
319:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
320:   (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
321:   return(0);
322: }

324: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
325: {
326:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
327:   int          ierr;
328:   PetscScalar  zero = 0.0;

331:   VecSet(&zero,yy);
332:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
333:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
334:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
335:   return(0);
336: }

338: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
339: {
340:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
341:   int          ierr;

344:   VecCopy(yy,zz);
345:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
346:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
347:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
348:   return(0);
349: }

351: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
352: {
353:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
354:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
355:   int          ierr;
356:   PetscReal    isend[5],irecv[5];

359:   info->block_size     = (PetscReal)dmat->bs;
360:   MatGetInfo(mat->A,MAT_LOCAL,info);
361:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
362:   isend[3] = info->memory;  isend[4] = info->mallocs;
363:   if (flag == MAT_LOCAL) {
364:     info->nz_used      = isend[0];
365:     info->nz_allocated = isend[1];
366:     info->nz_unneeded  = isend[2];
367:     info->memory       = isend[3];
368:     info->mallocs      = isend[4];
369:   } else if (flag == MAT_GLOBAL_MAX) {
370:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
371:     info->nz_used      = irecv[0];
372:     info->nz_allocated = irecv[1];
373:     info->nz_unneeded  = irecv[2];
374:     info->memory       = irecv[3];
375:     info->mallocs      = irecv[4];
376:   } else if (flag == MAT_GLOBAL_SUM) {
377:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
378:     info->nz_used      = irecv[0];
379:     info->nz_allocated = irecv[1];
380:     info->nz_unneeded  = irecv[2];
381:     info->memory       = irecv[3];
382:     info->mallocs      = irecv[4];
383:   }
384:   info->rows_global    = (double)matin->M;
385:   info->columns_global = (double)matin->N;
386:   info->rows_local     = (double)matin->m;
387:   info->columns_local  = (double)matin->N;
388:   return(0);
389: }

391: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
392: {
393:   int          ierr;
394:   Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;

397:   MatGetDiagonal(A->A,v);
398:   return(0);
399: }

401: int MatDestroy_MPIBDiag(Mat mat)
402: {
403:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
404:   int          ierr;
405: #if defined(PETSC_USE_LOG)
406:   Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;

409:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
410: #else
412: #endif
413:   MatStashDestroy_Private(&mat->stash);
414:   PetscFree(mbd->rowners);
415:   PetscFree(mbd->gdiag);
416:   MatDestroy(mbd->A);
417:   if (mbd->lvec) {VecDestroy(mbd->lvec);}
418:   if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
419:   PetscFree(mbd);
420:   return(0);
421: }


424: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
425: {
426:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
427:   int          ierr;

430:   if (mbd->size == 1) {
431:     MatView(mbd->A,viewer);
432:   } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
433:   return(0);
434: }

436: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
437: {
438:   Mat_MPIBDiag      *mbd = (Mat_MPIBDiag*)mat->data;
439:   Mat_SeqBDiag      *dmat = (Mat_SeqBDiag*)mbd->A->data;
440:   int               ierr,i,size = mbd->size,rank = mbd->rank;
441:   PetscTruth        isascii,isdraw;
442:   PetscViewer       sviewer;
443:   PetscViewerFormat format;

446:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
447:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
448:   if (isascii) {
449:     PetscViewerGetFormat(viewer,&format);
450:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
451:       int nline = PetscMin(10,mbd->gnd),k,nk,np;
452:       PetscViewerASCIIPrintf(viewer,"  block size=%d, total number of diagonals=%dn",dmat->bs,mbd->gnd);
453:       nk = (mbd->gnd-1)/nline + 1;
454:       for (k=0; k<nk; k++) {
455:         PetscViewerASCIIPrintf(viewer,"  global diag numbers:");
456:         np = PetscMin(nline,mbd->gnd - nline*k);
457:         for (i=0; i<np; i++) {
458:           PetscViewerASCIIPrintf(viewer,"  %d",mbd->gdiag[i+nline*k]);
459:         }
460:         PetscViewerASCIIPrintf(viewer,"n");
461:       }
462:       if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
463:         MatInfo info;
464:         MPI_Comm_rank(mat->comm,&rank);
465:         MatGetInfo(mat,MAT_LOCAL,&info);
466:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
467:             (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
468:         PetscViewerFlush(viewer);
469:         VecScatterView(mbd->Mvctx,viewer);
470:       }
471:       return(0);
472:     }
473:   }

475:   if (isdraw) {
476:     PetscDraw       draw;
477:     PetscTruth isnull;
478:     PetscViewerDrawGetDraw(viewer,0,&draw);
479:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
480:   }

482:   if (size == 1) {
483:     MatView(mbd->A,viewer);
484:   } else {
485:     /* assemble the entire matrix onto first processor. */
486:     Mat          A;
487:     int          M = mat->M,N = mat->N,m,row,nz,*cols;
488:     PetscScalar  *vals;
489:     Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;

491:     if (!rank) {
492:       MatCreateMPIBDiag(mat->comm,M,M,N,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL,&A);
493:     } else {
494:       MatCreateMPIBDiag(mat->comm,0,M,N,0,Ambd->bs,PETSC_NULL,PETSC_NULL,&A);
495:     }
496:     PetscLogObjectParent(mat,A);

498:     /* Copy the matrix ... This isn't the most efficient means,
499:        but it's quick for now */
500:     row = mbd->rstart;
501:     m = mbd->A->m;
502:     for (i=0; i<m; i++) {
503:       MatGetRow(mat,row,&nz,&cols,&vals);
504:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
505:       MatRestoreRow(mat,row,&nz,&cols,&vals);
506:       row++;
507:     }
508:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
509:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
510:     PetscViewerGetSingleton(viewer,&sviewer);
511:     if (!rank) {
512:       MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
513:     }
514:     PetscViewerRestoreSingleton(viewer,&sviewer);
515:     PetscViewerFlush(viewer);
516:     MatDestroy(A);
517:   }
518:   return(0);
519: }

521: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
522: {
523:   int        ierr;
524:   PetscTruth isascii,isdraw,isbinary;

527:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
528:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
529:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
530:   if (isascii || isdraw) {
531:     MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
532:   } else if (isbinary) {
533:     MatView_MPIBDiag_Binary(mat,viewer);
534:   } else {
535:     SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
536:   }
537:   return(0);
538: }

540: int MatSetOption_MPIBDiag(Mat A,MatOption op)
541: {
542:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
543:   int          ierr;

545:   switch (op) {
546:   case MAT_NO_NEW_NONZERO_LOCATIONS:
547:   case MAT_YES_NEW_NONZERO_LOCATIONS:
548:   case MAT_NEW_NONZERO_LOCATION_ERR:
549:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
550:   case MAT_NO_NEW_DIAGONALS:
551:   case MAT_YES_NEW_DIAGONALS:
552:     MatSetOption(mbd->A,op);
553:     break;
554:   case MAT_ROW_ORIENTED:
555:     mbd->roworiented = PETSC_TRUE;
556:     MatSetOption(mbd->A,op);
557:     break;
558:   case MAT_COLUMN_ORIENTED:
559:     mbd->roworiented = PETSC_FALSE;
560:     MatSetOption(mbd->A,op);
561:     break;
562:   case MAT_IGNORE_OFF_PROC_ENTRIES:
563:     mbd->donotstash = PETSC_TRUE;
564:     break;
565:   case MAT_ROWS_SORTED:
566:   case MAT_ROWS_UNSORTED:
567:   case MAT_COLUMNS_SORTED:
568:   case MAT_COLUMNS_UNSORTED:
569:   case MAT_USE_SINGLE_PRECISION_SOLVES:
570:     PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignoredn");
571:     break;
572:   default:
573:     SETERRQ(PETSC_ERR_SUP,"unknown option");
574:   }
575:   return(0);
576: }

578: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
579: {
580:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
581:   int          lrow,ierr;

584:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
585:   lrow = row - mat->rstart;
586:   MatGetRow(mat->A,lrow,nz,idx,v);
587:   return(0);
588: }

590: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
591:                                   PetscScalar **v)
592: {
593:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
594:   int          lrow,ierr;

597:   lrow = row - mat->rstart;
598:   MatRestoreRow(mat->A,lrow,nz,idx,v);
599:   return(0);
600: }


603: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
604: {
605:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
606:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
607:   PetscReal    sum = 0.0;
608:   int          ierr,d,i,nd = a->nd,bs = a->bs,len;
609:   PetscScalar  *dv;

612:   if (type == NORM_FROBENIUS) {
613:     for (d=0; d<nd; d++) {
614:       dv   = a->diagv[d];
615:       len  = a->bdlen[d]*bs*bs;
616:       for (i=0; i<len; i++) {
617: #if defined(PETSC_USE_COMPLEX)
618:         sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
619: #else
620:         sum += dv[i]*dv[i];
621: #endif
622:       }
623:     }
624:     MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
625:     *nrm = sqrt(*nrm);
626:     PetscLogFlops(2*A->n*A->m);
627:   } else if (type == NORM_1) { /* max column norm */
628:     PetscReal *tmp,*tmp2;
629:     int    j;
630:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
631:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
632:     MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
633:     *nrm = 0.0;
634:     MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
635:     for (j=0; j<mbd->A->n; j++) {
636:       if (tmp2[j] > *nrm) *nrm = tmp2[j];
637:     }
638:     PetscFree(tmp);
639:     PetscFree(tmp2);
640:   } else if (type == NORM_INFINITY) { /* max row norm */
641:     PetscReal normtemp;
642:     MatNorm(mbd->A,type,&normtemp);
643:     MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
644:   }
645:   return(0);
646: }

648: EXTERN int MatPrintHelp_SeqBDiag(Mat);
649: int MatPrintHelp_MPIBDiag(Mat A)
650: {
651:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
652:   int          ierr;

655:   if (!a->rank) {
656:     MatPrintHelp_SeqBDiag(a->A);
657:   }
658:   return(0);
659: }

661: EXTERN int MatScale_SeqBDiag(PetscScalar*,Mat);
662: int MatScale_MPIBDiag(PetscScalar *alpha,Mat A)
663: {
664:   int          ierr;
665:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;

668:   MatScale_SeqBDiag(alpha,a->A);
669:   return(0);
670: }

672: int MatSetUpPreallocation_MPIBDiag(Mat A)
673: {
674:   int        ierr;

677:    MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
678:   return(0);
679: }

681: /* -------------------------------------------------------------------*/

683: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
684:        MatGetRow_MPIBDiag,
685:        MatRestoreRow_MPIBDiag,
686:        MatMult_MPIBDiag,
687:        MatMultAdd_MPIBDiag,
688:        MatMultTranspose_MPIBDiag,
689:        MatMultTransposeAdd_MPIBDiag,
690:        0,
691:        0,
692:        0,
693:        0,
694:        0,
695:        0,
696:        0,
697:        0,
698:        MatGetInfo_MPIBDiag,0,
699:        MatGetDiagonal_MPIBDiag,
700:        0,
701:        MatNorm_MPIBDiag,
702:        MatAssemblyBegin_MPIBDiag,
703:        MatAssemblyEnd_MPIBDiag,
704:        0,
705:        MatSetOption_MPIBDiag,
706:        MatZeroEntries_MPIBDiag,
707:        MatZeroRows_MPIBDiag,
708:        0,
709:        0,
710:        0,
711:        0,
712:        MatSetUpPreallocation_MPIBDiag,
713:        0,
714:        0,
715:        0,
716:        0,
717:        0,
718:        0,
719:        0,
720:        0,
721:        0,
722:        0,
723:        0,
724:        0,
725:        MatGetValues_MPIBDiag,
726:        0,
727:        MatPrintHelp_MPIBDiag,
728:        MatScale_MPIBDiag,
729:        0,
730:        0,
731:        0,
732:        MatGetBlockSize_MPIBDiag,
733:        0,
734:        0,
735:        0,
736:        0,
737:        0,
738:        0,
739:        0,
740:        0,
741:        0,
742:        0,
743:        MatDestroy_MPIBDiag,
744:        MatView_MPIBDiag,
745:        MatGetPetscMaps_Petsc};

747: EXTERN_C_BEGIN
748: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
749: {
750:   Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
751:   int          ierr,lrows,lcols,rstart,rend;
752:   IS           localc,localr;

755:   MatGetLocalSize(A,&lrows,&lcols);
756:   MatGetOwnershipRange(A,&rstart,&rend);
757:   ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
758:   ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
759:   MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
760:   ISDestroy(localr);
761:   ISDestroy(localc);

763:   *iscopy = PETSC_TRUE;
764:   return(0);
765: }
766: EXTERN_C_END

768: EXTERN_C_BEGIN
769: int MatCreate_MPIBDiag(Mat B)
770: {
771:   Mat_MPIBDiag *b;
772:   int          ierr;

775:   ierr            = PetscNew(Mat_MPIBDiag,&b);
776:   B->data         = (void*)b;
777:   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
778:   B->factor       = 0;
779:   B->mapping      = 0;

781:   B->insertmode = NOT_SET_VALUES;
782:   MPI_Comm_rank(B->comm,&b->rank);
783:   MPI_Comm_size(B->comm,&b->size);

785:   /* build local table of row ownerships */
786:   PetscMalloc((b->size+2)*sizeof(int),&b->rowners);

788:   /* build cache for off array entries formed */
789:   MatStashCreate_Private(B->comm,1,&B->stash);
790:   b->donotstash = PETSC_FALSE;

792:   /* stuff used for matrix-vector multiply */
793:   b->lvec        = 0;
794:   b->Mvctx       = 0;

796:   /* used for MatSetValues() input */
797:   b->roworiented = PETSC_TRUE;

799:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
800:                                      "MatGetDiagonalBlock_MPIBDiag",
801:                                       MatGetDiagonalBlock_MPIBDiag);
802:   return(0);
803: }
804: EXTERN_C_END

806: /*@C
807:    MatMPIBDiagSetPreallocation - 

809:    Collective on Mat

811:    Input Parameters:
812: +  A - the matrix 
813: .  nd - number of block diagonals (global) (optional)
814: .  bs - each element of a diagonal is an bs x bs dense matrix
815: .  diag - optional array of block diagonal numbers (length nd).
816:    For a matrix element A[i,j], where i=row and j=column, the
817:    diagonal number is
818: $     diag = i/bs - j/bs  (integer division)
819:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
820:    needed (expensive).
821: -  diagv  - pointer to actual diagonals (in same order as diag array), 
822:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
823:    to control memory allocation.


826:    Options Database Keys:
827: .  -mat_block_size <bs> - Sets blocksize
828: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

830:    Notes:
831:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
832:    than it must be used on all processors that share the object for that argument.

834:    The parallel matrix is partitioned across the processors by rows, where
835:    each local rectangular matrix is stored in the uniprocessor block 
836:    diagonal format.  See the users manual for further details.

838:    The user MUST specify either the local or global numbers of rows
839:    (possibly both).

841:    The case bs=1 (conventional diagonal storage) is implemented as
842:    a special case.

844:    Fortran Notes:
845:    Fortran programmers cannot set diagv; this variable is ignored.

847:    Level: intermediate

849: .keywords: matrix, block, diagonal, parallel, sparse

851: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
852: @*/
853: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
854: {
855:   Mat_MPIBDiag *b;
856:   int          ierr,i,k,*ldiag,len,nd2;
857:   PetscScalar  **ldiagv = 0;
858:   PetscTruth   flg2;

861:   PetscTypeCompare((PetscObject)B,MATMPIBDIAG,&flg2);
862:   if (!flg2) return(0);
863:   B->preallocated = PETSC_TRUE;
864:   if (bs == PETSC_DEFAULT) bs = 1;
865:   if (nd == PETSC_DEFAULT) nd = 0;
866:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
867:   PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
868:   PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
869:   if (nd && !diag) {
870:     PetscMalloc(nd*sizeof(int),&diag);
871:     nd2  = nd;
872:     PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
873:     if (nd2 != nd) {
874:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
875:     }
876:   } else if (flg2) {
877:     SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
878:   }

880:   if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");

882:   PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
883:   B->n = B->N = PetscMax(B->n,B->N);

885:   if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
886:   if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
887:   if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");

889:   /* the information in the maps duplicates the information computed below, eventually 
890:      we should remove the duplicate information that is not contained in the maps */
891:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
892:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);


895:   b          = (Mat_MPIBDiag*)B->data;
896:   b->gnd     = nd;

898:   ierr          = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
899:   b->rowners[0] = 0;
900:   for (i=2; i<=b->size; i++) {
901:     b->rowners[i] += b->rowners[i-1];
902:   }
903:   b->rstart  = b->rowners[b->rank];
904:   b->rend    = b->rowners[b->rank+1];
905:   b->brstart = (b->rstart)/bs;
906:   b->brend   = (b->rend)/bs;


909:   /* Determine local diagonals; for now, assume global rows = global cols */
910:   /* These are sorted in MatCreateSeqBDiag */
911:   PetscMalloc((nd+1)*sizeof(int),&ldiag);
912:   len  = B->M/bs + B->N/bs + 1;
913:   PetscMalloc(len*sizeof(int),&b->gdiag);
914:   k    = 0;
915:   PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
916:                         + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
917:   if (diagv) {
918:     PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
919:   }
920:   for (i=0; i<nd; i++) {
921:     b->gdiag[i] = diag[i];
922:     if (diag[i] > 0) { /* lower triangular */
923:       if (diag[i] < b->brend) {
924:         ldiag[k] = diag[i] - b->brstart;
925:         if (diagv) ldiagv[k] = diagv[i];
926:         k++;
927:       }
928:     } else { /* upper triangular */
929:       if (B->M/bs - diag[i] > B->N/bs) {
930:         if (B->M/bs + diag[i] > b->brstart) {
931:           ldiag[k] = diag[i] - b->brstart;
932:           if (diagv) ldiagv[k] = diagv[i];
933:           k++;
934:         }
935:       } else {
936:         if (B->M/bs > b->brstart) {
937:           ldiag[k] = diag[i] - b->brstart;
938:           if (diagv) ldiagv[k] = diagv[i];
939:           k++;
940:         }
941:       }
942:     }
943:   }

945:   /* Form local matrix */
946:   MatCreateSeqBDiag(PETSC_COMM_SELF,B->m,B->n,k,bs,ldiag,ldiagv,&b->A);
947:   PetscLogObjectParent(B,b->A);
948:   PetscFree(ldiag);
949:   if (ldiagv) {PetscFree(ldiagv);}

951:   return(0);
952: }

954: /*@C
955:    MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.

957:    Collective on MPI_Comm

959:    Input Parameters:
960: +  comm - MPI communicator
961: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
962: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
963: .  N - number of columns (local and global)
964: .  nd - number of block diagonals (global) (optional)
965: .  bs - each element of a diagonal is an bs x bs dense matrix
966: .  diag - optional array of block diagonal numbers (length nd).
967:    For a matrix element A[i,j], where i=row and j=column, the
968:    diagonal number is
969: $     diag = i/bs - j/bs  (integer division)
970:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
971:    needed (expensive).
972: -  diagv  - pointer to actual diagonals (in same order as diag array), 
973:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
974:    to control memory allocation.

976:    Output Parameter:
977: .  A - the matrix 

979:    Options Database Keys:
980: .  -mat_block_size <bs> - Sets blocksize
981: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

983:    Notes:
984:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
985:    than it must be used on all processors that share the object for that argument.

987:    The parallel matrix is partitioned across the processors by rows, where
988:    each local rectangular matrix is stored in the uniprocessor block 
989:    diagonal format.  See the users manual for further details.

991:    The user MUST specify either the local or global numbers of rows
992:    (possibly both).

994:    The case bs=1 (conventional diagonal storage) is implemented as
995:    a special case.

997:    Fortran Notes:
998:    Fortran programmers cannot set diagv; this variable is ignored.

1000:    Level: intermediate

1002: .keywords: matrix, block, diagonal, parallel, sparse

1004: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1005: @*/
1006: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,int *diag,PetscScalar **diagv,Mat *A)
1007: {
1008:   int ierr,size;

1011:   MatCreate(comm,m,m,M,N,A);
1012:   MPI_Comm_size(comm,&size);
1013:   if (size > 1) {
1014:     MatSetType(*A,MATMPIBDIAG);
1015:     MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1016:   } else {
1017:     MatSetType(*A,MATSEQBDIAG);
1018:     MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1019:   }
1020:   return(0);
1021: }

1023: /*@C
1024:    MatBDiagGetData - Gets the data for the block diagonal matrix format.
1025:    For the parallel case, this returns information for the local submatrix.

1027:    Input Parameters:
1028: .  mat - the matrix, stored in block diagonal format.

1030:    Not Collective

1032:    Output Parameters:
1033: +  m - number of rows
1034: .  n - number of columns
1035: .  nd - number of block diagonals
1036: .  bs - each element of a diagonal is an bs x bs dense matrix
1037: .  bdlen - array of total block lengths of block diagonals
1038: .  diag - optional array of block diagonal numbers (length nd).
1039:    For a matrix element A[i,j], where i=row and j=column, the
1040:    diagonal number is
1041: $     diag = i/bs - j/bs  (integer division)
1042:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1043:    needed (expensive).
1044: -  diagv - pointer to actual diagonals (in same order as diag array), 

1046:    Level: advanced

1048:    Notes:
1049:    See the users manual for further details regarding this storage format.

1051: .keywords: matrix, block, diagonal, get, data

1053: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1054: @*/
1055: int MatBDiagGetData(Mat mat,int *nd,int *bs,int **diag,int **bdlen,PetscScalar ***diagv)
1056: {
1057:   Mat_MPIBDiag *pdmat;
1058:   Mat_SeqBDiag *dmat = 0;
1059:   PetscTruth   isseq,ismpi;
1060:   int          ierr;

1064:   PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1065:   PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1066:   if (isseq) {
1067:     dmat = (Mat_SeqBDiag*)mat->data;
1068:   } else if (ismpi) {
1069:     pdmat = (Mat_MPIBDiag*)mat->data;
1070:     dmat = (Mat_SeqBDiag*)pdmat->A->data;
1071:   } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1072:   *nd    = dmat->nd;
1073:   *bs    = dmat->bs;
1074:   *diag  = dmat->diag;
1075:   *bdlen = dmat->bdlen;
1076:   *diagv = dmat->diagv;
1077:   return(0);
1078: }

1080:  #include petscsys.h

1082: EXTERN_C_BEGIN
1083: int MatLoad_MPIBDiag(PetscViewer viewer,MatType type,Mat *newmat)
1084: {
1085:   Mat          A;
1086:   PetscScalar  *vals,*svals;
1087:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1088:   MPI_Status   status;
1089:   int          bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1090:   int          header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1091:   int          *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1092:   int          tag = ((PetscObject)viewer)->tag,extra_rows;

1095:   MPI_Comm_size(comm,&size);
1096:   MPI_Comm_rank(comm,&rank);
1097:   if (!rank) {
1098:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1099:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1100:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1101:     if (header[3] < 0) {
1102:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1103:     }
1104:   }
1105:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1106:   M = header[1]; N = header[2];

1108:   bs = 1;   /* uses a block size of 1 by default; */
1109:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1111:   /* 
1112:      This code adds extra rows to make sure the number of rows is 
1113:      divisible by the blocksize
1114:   */
1115:   Mbs        = M/bs;
1116:   extra_rows = bs - M + bs*(Mbs);
1117:   if (extra_rows == bs) extra_rows = 0;
1118:   else                  Mbs++;
1119:   if (extra_rows && !rank) {
1120:     PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksizen");
1121:   }

1123:   /* determine ownership of all rows */
1124:   m          = bs*(Mbs/size + ((Mbs % size) > rank));
1125:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1126:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1127:   rowners[0] = 0;
1128:   for (i=2; i<=size; i++) {
1129:     rowners[i] += rowners[i-1];
1130:   }
1131:   rstart = rowners[rank];
1132:   rend   = rowners[rank+1];

1134:   /* distribute row lengths to all processors */
1135:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1136:   if (!rank) {
1137:     PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1138:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1139:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1140:     PetscMalloc(size*sizeof(int),&sndcounts);
1141:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1142:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1143:     PetscFree(sndcounts);
1144:   } else {
1145:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1146:   }

1148:   if (!rank) {
1149:     /* calculate the number of nonzeros on each processor */
1150:     PetscMalloc(size*sizeof(int),&procsnz);
1151:     PetscMemzero(procsnz,size*sizeof(int));
1152:     for (i=0; i<size; i++) {
1153:       for (j=rowners[i]; j<rowners[i+1]; j++) {
1154:         procsnz[i] += rowlengths[j];
1155:       }
1156:     }
1157:     PetscFree(rowlengths);

1159:     /* determine max buffer needed and allocate it */
1160:     maxnz = 0;
1161:     for (i=0; i<size; i++) {
1162:       maxnz = PetscMax(maxnz,procsnz[i]);
1163:     }
1164:     PetscMalloc(maxnz*sizeof(int),&cols);

1166:     /* read in my part of the matrix column indices  */
1167:     nz   = procsnz[0];
1168:     PetscMalloc(nz*sizeof(int),&mycols);
1169:     if (size == 1)  nz -= extra_rows;
1170:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1171:     if (size == 1)  for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }

1173:     /* read in every one elses and ship off */
1174:     for (i=1; i<size-1; i++) {
1175:       nz   = procsnz[i];
1176:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1177:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1178:     }
1179:     /* read in the stuff for the last proc */
1180:     if (size != 1) {
1181:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1182:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1183:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1184:       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1185:     }
1186:     PetscFree(cols);
1187:   } else {
1188:     /* determine buffer space needed for message */
1189:     nz = 0;
1190:     for (i=0; i<m; i++) {
1191:       nz += ourlens[i];
1192:     }
1193:     PetscMalloc(nz*sizeof(int),&mycols);

1195:     /* receive message of column indices*/
1196:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1197:     MPI_Get_count(&status,MPI_INT,&maxnz);
1198:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1199:   }

1201:   MatCreateMPIBDiag(comm,m,M+extra_rows,N+extra_rows,PETSC_NULL,bs,PETSC_NULL,PETSC_NULL,newmat);
1202:   A = *newmat;

1204:   if (!rank) {
1205:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1207:     /* read in my part of the matrix numerical values  */
1208:     nz = procsnz[0];
1209:     if (size == 1)  nz -= extra_rows;
1210:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1211:     if (size == 1)  for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }

1213:     /* insert into matrix */
1214:     jj      = rstart;
1215:     smycols = mycols;
1216:     svals   = vals;
1217:     for (i=0; i<m; i++) {
1218:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1219:       smycols += ourlens[i];
1220:       svals   += ourlens[i];
1221:       jj++;
1222:     }

1224:     /* read in other processors (except the last one) and ship out */
1225:     for (i=1; i<size-1; i++) {
1226:       nz   = procsnz[i];
1227:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1228:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1229:     }
1230:     /* the last proc */
1231:     if (size != 1){
1232:       nz   = procsnz[i] - extra_rows;
1233:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1234:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1235:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1236:     }
1237:     PetscFree(procsnz);
1238:   } else {
1239:     /* receive numeric values */
1240:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1242:     /* receive message of values*/
1243:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1244:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1245:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1247:     /* insert into matrix */
1248:     jj      = rstart;
1249:     smycols = mycols;
1250:     svals   = vals;
1251:     for (i=0; i<m; i++) {
1252:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1253:       smycols += ourlens[i];
1254:       svals   += ourlens[i];
1255:       jj++;
1256:     }
1257:   }
1258:   PetscFree(ourlens);
1259:   PetscFree(vals);
1260:   PetscFree(mycols);
1261:   PetscFree(rowners);

1263:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1264:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1265:   return(0);
1266: }
1267: EXTERN_C_END