Actual source code: mpibdiag.c
1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
2: /*
3: The basic matrix operations for the Block diagonal parallel
4: matrices.
5: */
6: #include src/mat/impls/bdiag/mpi/mpibdiag.h
7: #include src/vec/vecimpl.h
9: int MatSetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
10: {
11: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
12: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
13: PetscTruth roworiented = mbd->roworiented;
16: for (i=0; i<m; i++) {
17: if (idxm[i] < 0) continue;
18: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
19: if (idxm[i] >= rstart && idxm[i] < rend) {
20: row = idxm[i] - rstart;
21: for (j=0; j<n; j++) {
22: if (idxn[j] < 0) continue;
23: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
24: if (roworiented) {
25: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
26: } else {
27: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
28: }
29: }
30: } else {
31: if (!mbd->donotstash) {
32: if (roworiented) {
33: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
34: } else {
35: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
36: }
37: }
38: }
39: }
40: return(0);
41: }
43: int MatGetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
44: {
45: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
46: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
49: for (i=0; i<m; i++) {
50: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
51: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52: if (idxm[i] >= rstart && idxm[i] < rend) {
53: row = idxm[i] - rstart;
54: for (j=0; j<n; j++) {
55: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
56: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
57: MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
58: }
59: } else {
60: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
61: }
62: }
63: return(0);
64: }
66: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
67: {
68: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
69: MPI_Comm comm = mat->comm;
70: int ierr,nstash,reallocs;
71: InsertMode addv;
74: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
75: if (addv == (ADD_VALUES|INSERT_VALUES)) {
76: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
77: }
78: mat->insertmode = addv; /* in case this processor had no cache */
79: MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
80: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
81: PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
82: return(0);
83: }
84: EXTERN int MatSetUpMultiply_MPIBDiag(Mat);
86: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
87: {
88: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
89: Mat_SeqBDiag *mlocal;
90: int i,n,*row,*col;
91: int *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
92: PetscScalar *val;
93: InsertMode addv = mat->insertmode;
97: while (1) {
98: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
99: if (!flg) break;
100:
101: for (i=0; i<n;) {
102: /* Now identify the consecutive vals belonging to the same row */
103: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
104: if (j < n) ncols = j-i;
105: else ncols = n-i;
106: /* Now assemble all these values with a single function call */
107: MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
108: i = j;
109: }
110: }
111: MatStashScatterEnd_Private(&mat->stash);
113: MatAssemblyBegin(mbd->A,mode);
114: MatAssemblyEnd(mbd->A,mode);
116: /* Fix main diagonal location and determine global diagonals */
117: mlocal = (Mat_SeqBDiag*)mbd->A->data;
118: Mblock = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
119: len = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
120: ierr = PetscMalloc(2*len*sizeof(int),&tmp1);
121: tmp2 = tmp1 + len;
122: ierr = PetscMemzero(tmp1,2*len*sizeof(int));
123: mlocal->mainbd = -1;
124: for (i=0; i<mlocal->nd; i++) {
125: if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
126: tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
127: }
128: MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
129: ict = 0;
130: for (i=0; i<len; i++) {
131: if (tmp2[i]) {
132: mbd->gdiag[ict] = i - Mblock;
133: ict++;
134: }
135: }
136: mbd->gnd = ict;
137: PetscFree(tmp1);
139: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
140: MatSetUpMultiply_MPIBDiag(mat);
141: }
142: return(0);
143: }
145: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
146: {
147: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
148: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
151: *bs = dmat->bs;
152: return(0);
153: }
155: int MatZeroEntries_MPIBDiag(Mat A)
156: {
157: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
158: int ierr;
161: MatZeroEntries(l->A);
162: return(0);
163: }
165: /* again this uses the same basic stratagy as in the assembly and
166: scatter create routines, we should try to do it systematically
167: if we can figure out the proper level of generality. */
169: /* the code does not do the diagonal entries correctly unless the
170: matrix is square and the column and row owerships are identical.
171: This is a BUG. The only way to fix it seems to be to access
172: aij->A and aij->B directly and not through the MatZeroRows()
173: routine.
174: */
176: int MatZeroRows_MPIBDiag(Mat A,IS is,PetscScalar *diag)
177: {
178: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
179: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
180: int *procs,*nprocs,j,idx,nsends,*work;
181: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
182: int *rvalues,tag = A->tag,count,base,slen,n,*source;
183: int *lens,imdex,*lrows,*values;
184: MPI_Comm comm = A->comm;
185: MPI_Request *send_waits,*recv_waits;
186: MPI_Status recv_status,*send_status;
187: IS istmp;
188: PetscTruth found;
191: ISGetLocalSize(is,&N);
192: ISGetIndices(is,&rows);
194: /* first count number of contributors to each processor */
195: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
196: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
197: procs = nprocs + size;
198: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
199: for (i=0; i<N; i++) {
200: idx = rows[i];
201: found = PETSC_FALSE;
202: for (j=0; j<size; j++) {
203: if (idx >= owners[j] && idx < owners[j+1]) {
204: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
205: }
206: }
207: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
208: }
209: nsends = 0; for (i=0; i<size; i++) {nsends += procs[i];}
211: /* inform other processors of number of messages and max length*/
212: ierr = PetscMalloc(2*size*sizeof(int),&work);
213: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
214: nmax = work[rank];
215: nrecvs = work[size+rank];
216: ierr = PetscFree(work);
218: /* post receives: */
219: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
220: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
221: for (i=0; i<nrecvs; i++) {
222: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
223: }
225: /* do sends:
226: 1) starts[i] gives the starting index in svalues for stuff going to
227: the ith processor
228: */
229: PetscMalloc((N+1)*sizeof(int),&svalues);
230: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
231: PetscMalloc((size+1)*sizeof(int),&starts);
232: starts[0] = 0;
233: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
234: for (i=0; i<N; i++) {
235: svalues[starts[owner[i]]++] = rows[i];
236: }
237: ISRestoreIndices(is,&rows);
239: starts[0] = 0;
240: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
241: count = 0;
242: for (i=0; i<size; i++) {
243: if (procs[i]) {
244: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
245: }
246: }
247: PetscFree(starts);
249: base = owners[rank];
251: /* wait on receives */
252: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
253: source = lens + nrecvs;
254: count = nrecvs;
255: slen = 0;
256: while (count) {
257: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
258: /* unpack receives into our local space */
259: MPI_Get_count(&recv_status,MPI_INT,&n);
260: source[imdex] = recv_status.MPI_SOURCE;
261: lens[imdex] = n;
262: slen += n;
263: count--;
264: }
265: PetscFree(recv_waits);
266:
267: /* move the data into the send scatter */
268: ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);
269: count = 0;
270: for (i=0; i<nrecvs; i++) {
271: values = rvalues + i*nmax;
272: for (j=0; j<lens[i]; j++) {
273: lrows[count++] = values[j] - base;
274: }
275: }
276: PetscFree(rvalues);
277: PetscFree(lens);
278: PetscFree(owner);
279: PetscFree(nprocs);
280:
281: /* actually zap the local rows */
282: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
283: PetscLogObjectParent(A,istmp);
284: PetscFree(lrows);
285: MatZeroRows(l->A,istmp,diag);
286: ISDestroy(istmp);
288: /* wait on sends */
289: if (nsends) {
290: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
291: MPI_Waitall(nsends,send_waits,send_status);
292: PetscFree(send_status);
293: }
294: PetscFree(send_waits);
295: PetscFree(svalues);
297: return(0);
298: }
300: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
301: {
302: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
303: int ierr;
306: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
307: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
308: (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
309: return(0);
310: }
312: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
313: {
314: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
315: int ierr;
318: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
319: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
320: (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
321: return(0);
322: }
324: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
325: {
326: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
327: int ierr;
328: PetscScalar zero = 0.0;
331: VecSet(&zero,yy);
332: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
333: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
334: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
335: return(0);
336: }
338: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
339: {
340: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
341: int ierr;
344: VecCopy(yy,zz);
345: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
346: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
347: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
348: return(0);
349: }
351: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
352: {
353: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
354: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
355: int ierr;
356: PetscReal isend[5],irecv[5];
359: info->block_size = (PetscReal)dmat->bs;
360: MatGetInfo(mat->A,MAT_LOCAL,info);
361: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
362: isend[3] = info->memory; isend[4] = info->mallocs;
363: if (flag == MAT_LOCAL) {
364: info->nz_used = isend[0];
365: info->nz_allocated = isend[1];
366: info->nz_unneeded = isend[2];
367: info->memory = isend[3];
368: info->mallocs = isend[4];
369: } else if (flag == MAT_GLOBAL_MAX) {
370: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
371: info->nz_used = irecv[0];
372: info->nz_allocated = irecv[1];
373: info->nz_unneeded = irecv[2];
374: info->memory = irecv[3];
375: info->mallocs = irecv[4];
376: } else if (flag == MAT_GLOBAL_SUM) {
377: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
378: info->nz_used = irecv[0];
379: info->nz_allocated = irecv[1];
380: info->nz_unneeded = irecv[2];
381: info->memory = irecv[3];
382: info->mallocs = irecv[4];
383: }
384: info->rows_global = (double)matin->M;
385: info->columns_global = (double)matin->N;
386: info->rows_local = (double)matin->m;
387: info->columns_local = (double)matin->N;
388: return(0);
389: }
391: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
392: {
393: int ierr;
394: Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;
397: MatGetDiagonal(A->A,v);
398: return(0);
399: }
401: int MatDestroy_MPIBDiag(Mat mat)
402: {
403: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
404: int ierr;
405: #if defined(PETSC_USE_LOG)
406: Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;
409: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
410: #else
412: #endif
413: MatStashDestroy_Private(&mat->stash);
414: PetscFree(mbd->rowners);
415: PetscFree(mbd->gdiag);
416: MatDestroy(mbd->A);
417: if (mbd->lvec) {VecDestroy(mbd->lvec);}
418: if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
419: PetscFree(mbd);
420: return(0);
421: }
424: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
425: {
426: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
427: int ierr;
430: if (mbd->size == 1) {
431: MatView(mbd->A,viewer);
432: } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
433: return(0);
434: }
436: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
437: {
438: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
439: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
440: int ierr,i,size = mbd->size,rank = mbd->rank;
441: PetscTruth isascii,isdraw;
442: PetscViewer sviewer;
443: PetscViewerFormat format;
446: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
447: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
448: if (isascii) {
449: PetscViewerGetFormat(viewer,&format);
450: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
451: int nline = PetscMin(10,mbd->gnd),k,nk,np;
452: PetscViewerASCIIPrintf(viewer," block size=%d, total number of diagonals=%dn",dmat->bs,mbd->gnd);
453: nk = (mbd->gnd-1)/nline + 1;
454: for (k=0; k<nk; k++) {
455: PetscViewerASCIIPrintf(viewer," global diag numbers:");
456: np = PetscMin(nline,mbd->gnd - nline*k);
457: for (i=0; i<np; i++) {
458: PetscViewerASCIIPrintf(viewer," %d",mbd->gdiag[i+nline*k]);
459: }
460: PetscViewerASCIIPrintf(viewer,"n");
461: }
462: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
463: MatInfo info;
464: MPI_Comm_rank(mat->comm,&rank);
465: MatGetInfo(mat,MAT_LOCAL,&info);
466: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
467: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
468: PetscViewerFlush(viewer);
469: VecScatterView(mbd->Mvctx,viewer);
470: }
471: return(0);
472: }
473: }
475: if (isdraw) {
476: PetscDraw draw;
477: PetscTruth isnull;
478: PetscViewerDrawGetDraw(viewer,0,&draw);
479: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
480: }
482: if (size == 1) {
483: MatView(mbd->A,viewer);
484: } else {
485: /* assemble the entire matrix onto first processor. */
486: Mat A;
487: int M = mat->M,N = mat->N,m,row,nz,*cols;
488: PetscScalar *vals;
489: Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;
491: if (!rank) {
492: MatCreateMPIBDiag(mat->comm,M,M,N,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL,&A);
493: } else {
494: MatCreateMPIBDiag(mat->comm,0,M,N,0,Ambd->bs,PETSC_NULL,PETSC_NULL,&A);
495: }
496: PetscLogObjectParent(mat,A);
498: /* Copy the matrix ... This isn't the most efficient means,
499: but it's quick for now */
500: row = mbd->rstart;
501: m = mbd->A->m;
502: for (i=0; i<m; i++) {
503: MatGetRow(mat,row,&nz,&cols,&vals);
504: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
505: MatRestoreRow(mat,row,&nz,&cols,&vals);
506: row++;
507: }
508: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
509: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
510: PetscViewerGetSingleton(viewer,&sviewer);
511: if (!rank) {
512: MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
513: }
514: PetscViewerRestoreSingleton(viewer,&sviewer);
515: PetscViewerFlush(viewer);
516: MatDestroy(A);
517: }
518: return(0);
519: }
521: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
522: {
523: int ierr;
524: PetscTruth isascii,isdraw,isbinary;
527: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
528: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
529: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
530: if (isascii || isdraw) {
531: MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
532: } else if (isbinary) {
533: MatView_MPIBDiag_Binary(mat,viewer);
534: } else {
535: SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
536: }
537: return(0);
538: }
540: int MatSetOption_MPIBDiag(Mat A,MatOption op)
541: {
542: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
543: int ierr;
545: switch (op) {
546: case MAT_NO_NEW_NONZERO_LOCATIONS:
547: case MAT_YES_NEW_NONZERO_LOCATIONS:
548: case MAT_NEW_NONZERO_LOCATION_ERR:
549: case MAT_NEW_NONZERO_ALLOCATION_ERR:
550: case MAT_NO_NEW_DIAGONALS:
551: case MAT_YES_NEW_DIAGONALS:
552: MatSetOption(mbd->A,op);
553: break;
554: case MAT_ROW_ORIENTED:
555: mbd->roworiented = PETSC_TRUE;
556: MatSetOption(mbd->A,op);
557: break;
558: case MAT_COLUMN_ORIENTED:
559: mbd->roworiented = PETSC_FALSE;
560: MatSetOption(mbd->A,op);
561: break;
562: case MAT_IGNORE_OFF_PROC_ENTRIES:
563: mbd->donotstash = PETSC_TRUE;
564: break;
565: case MAT_ROWS_SORTED:
566: case MAT_ROWS_UNSORTED:
567: case MAT_COLUMNS_SORTED:
568: case MAT_COLUMNS_UNSORTED:
569: case MAT_USE_SINGLE_PRECISION_SOLVES:
570: PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignoredn");
571: break;
572: default:
573: SETERRQ(PETSC_ERR_SUP,"unknown option");
574: }
575: return(0);
576: }
578: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
579: {
580: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
581: int lrow,ierr;
584: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
585: lrow = row - mat->rstart;
586: MatGetRow(mat->A,lrow,nz,idx,v);
587: return(0);
588: }
590: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
591: PetscScalar **v)
592: {
593: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
594: int lrow,ierr;
597: lrow = row - mat->rstart;
598: MatRestoreRow(mat->A,lrow,nz,idx,v);
599: return(0);
600: }
603: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
604: {
605: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
606: Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
607: PetscReal sum = 0.0;
608: int ierr,d,i,nd = a->nd,bs = a->bs,len;
609: PetscScalar *dv;
612: if (type == NORM_FROBENIUS) {
613: for (d=0; d<nd; d++) {
614: dv = a->diagv[d];
615: len = a->bdlen[d]*bs*bs;
616: for (i=0; i<len; i++) {
617: #if defined(PETSC_USE_COMPLEX)
618: sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
619: #else
620: sum += dv[i]*dv[i];
621: #endif
622: }
623: }
624: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
625: *nrm = sqrt(*nrm);
626: PetscLogFlops(2*A->n*A->m);
627: } else if (type == NORM_1) { /* max column norm */
628: PetscReal *tmp,*tmp2;
629: int j;
630: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
631: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
632: MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
633: *nrm = 0.0;
634: MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
635: for (j=0; j<mbd->A->n; j++) {
636: if (tmp2[j] > *nrm) *nrm = tmp2[j];
637: }
638: PetscFree(tmp);
639: PetscFree(tmp2);
640: } else if (type == NORM_INFINITY) { /* max row norm */
641: PetscReal normtemp;
642: MatNorm(mbd->A,type,&normtemp);
643: MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
644: }
645: return(0);
646: }
648: EXTERN int MatPrintHelp_SeqBDiag(Mat);
649: int MatPrintHelp_MPIBDiag(Mat A)
650: {
651: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
652: int ierr;
655: if (!a->rank) {
656: MatPrintHelp_SeqBDiag(a->A);
657: }
658: return(0);
659: }
661: EXTERN int MatScale_SeqBDiag(PetscScalar*,Mat);
662: int MatScale_MPIBDiag(PetscScalar *alpha,Mat A)
663: {
664: int ierr;
665: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
668: MatScale_SeqBDiag(alpha,a->A);
669: return(0);
670: }
672: int MatSetUpPreallocation_MPIBDiag(Mat A)
673: {
674: int ierr;
677: MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
678: return(0);
679: }
681: /* -------------------------------------------------------------------*/
683: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
684: MatGetRow_MPIBDiag,
685: MatRestoreRow_MPIBDiag,
686: MatMult_MPIBDiag,
687: MatMultAdd_MPIBDiag,
688: MatMultTranspose_MPIBDiag,
689: MatMultTransposeAdd_MPIBDiag,
690: 0,
691: 0,
692: 0,
693: 0,
694: 0,
695: 0,
696: 0,
697: 0,
698: MatGetInfo_MPIBDiag,0,
699: MatGetDiagonal_MPIBDiag,
700: 0,
701: MatNorm_MPIBDiag,
702: MatAssemblyBegin_MPIBDiag,
703: MatAssemblyEnd_MPIBDiag,
704: 0,
705: MatSetOption_MPIBDiag,
706: MatZeroEntries_MPIBDiag,
707: MatZeroRows_MPIBDiag,
708: 0,
709: 0,
710: 0,
711: 0,
712: MatSetUpPreallocation_MPIBDiag,
713: 0,
714: 0,
715: 0,
716: 0,
717: 0,
718: 0,
719: 0,
720: 0,
721: 0,
722: 0,
723: 0,
724: 0,
725: MatGetValues_MPIBDiag,
726: 0,
727: MatPrintHelp_MPIBDiag,
728: MatScale_MPIBDiag,
729: 0,
730: 0,
731: 0,
732: MatGetBlockSize_MPIBDiag,
733: 0,
734: 0,
735: 0,
736: 0,
737: 0,
738: 0,
739: 0,
740: 0,
741: 0,
742: 0,
743: MatDestroy_MPIBDiag,
744: MatView_MPIBDiag,
745: MatGetPetscMaps_Petsc};
747: EXTERN_C_BEGIN
748: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
749: {
750: Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
751: int ierr,lrows,lcols,rstart,rend;
752: IS localc,localr;
755: MatGetLocalSize(A,&lrows,&lcols);
756: MatGetOwnershipRange(A,&rstart,&rend);
757: ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
758: ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
759: MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
760: ISDestroy(localr);
761: ISDestroy(localc);
763: *iscopy = PETSC_TRUE;
764: return(0);
765: }
766: EXTERN_C_END
768: EXTERN_C_BEGIN
769: int MatCreate_MPIBDiag(Mat B)
770: {
771: Mat_MPIBDiag *b;
772: int ierr;
775: ierr = PetscNew(Mat_MPIBDiag,&b);
776: B->data = (void*)b;
777: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
778: B->factor = 0;
779: B->mapping = 0;
781: B->insertmode = NOT_SET_VALUES;
782: MPI_Comm_rank(B->comm,&b->rank);
783: MPI_Comm_size(B->comm,&b->size);
785: /* build local table of row ownerships */
786: PetscMalloc((b->size+2)*sizeof(int),&b->rowners);
788: /* build cache for off array entries formed */
789: MatStashCreate_Private(B->comm,1,&B->stash);
790: b->donotstash = PETSC_FALSE;
792: /* stuff used for matrix-vector multiply */
793: b->lvec = 0;
794: b->Mvctx = 0;
796: /* used for MatSetValues() input */
797: b->roworiented = PETSC_TRUE;
799: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
800: "MatGetDiagonalBlock_MPIBDiag",
801: MatGetDiagonalBlock_MPIBDiag);
802: return(0);
803: }
804: EXTERN_C_END
806: /*@C
807: MatMPIBDiagSetPreallocation -
809: Collective on Mat
811: Input Parameters:
812: + A - the matrix
813: . nd - number of block diagonals (global) (optional)
814: . bs - each element of a diagonal is an bs x bs dense matrix
815: . diag - optional array of block diagonal numbers (length nd).
816: For a matrix element A[i,j], where i=row and j=column, the
817: diagonal number is
818: $ diag = i/bs - j/bs (integer division)
819: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
820: needed (expensive).
821: - diagv - pointer to actual diagonals (in same order as diag array),
822: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
823: to control memory allocation.
826: Options Database Keys:
827: . -mat_block_size <bs> - Sets blocksize
828: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
830: Notes:
831: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
832: than it must be used on all processors that share the object for that argument.
834: The parallel matrix is partitioned across the processors by rows, where
835: each local rectangular matrix is stored in the uniprocessor block
836: diagonal format. See the users manual for further details.
838: The user MUST specify either the local or global numbers of rows
839: (possibly both).
841: The case bs=1 (conventional diagonal storage) is implemented as
842: a special case.
844: Fortran Notes:
845: Fortran programmers cannot set diagv; this variable is ignored.
847: Level: intermediate
849: .keywords: matrix, block, diagonal, parallel, sparse
851: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
852: @*/
853: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
854: {
855: Mat_MPIBDiag *b;
856: int ierr,i,k,*ldiag,len,nd2;
857: PetscScalar **ldiagv = 0;
858: PetscTruth flg2;
861: PetscTypeCompare((PetscObject)B,MATMPIBDIAG,&flg2);
862: if (!flg2) return(0);
863: B->preallocated = PETSC_TRUE;
864: if (bs == PETSC_DEFAULT) bs = 1;
865: if (nd == PETSC_DEFAULT) nd = 0;
866: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
867: PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
868: PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
869: if (nd && !diag) {
870: PetscMalloc(nd*sizeof(int),&diag);
871: nd2 = nd;
872: PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
873: if (nd2 != nd) {
874: SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
875: }
876: } else if (flg2) {
877: SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
878: }
880: if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");
882: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
883: B->n = B->N = PetscMax(B->n,B->N);
885: if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
886: if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
887: if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
889: /* the information in the maps duplicates the information computed below, eventually
890: we should remove the duplicate information that is not contained in the maps */
891: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
892: PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);
895: b = (Mat_MPIBDiag*)B->data;
896: b->gnd = nd;
898: ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
899: b->rowners[0] = 0;
900: for (i=2; i<=b->size; i++) {
901: b->rowners[i] += b->rowners[i-1];
902: }
903: b->rstart = b->rowners[b->rank];
904: b->rend = b->rowners[b->rank+1];
905: b->brstart = (b->rstart)/bs;
906: b->brend = (b->rend)/bs;
909: /* Determine local diagonals; for now, assume global rows = global cols */
910: /* These are sorted in MatCreateSeqBDiag */
911: PetscMalloc((nd+1)*sizeof(int),&ldiag);
912: len = B->M/bs + B->N/bs + 1;
913: PetscMalloc(len*sizeof(int),&b->gdiag);
914: k = 0;
915: PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
916: + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
917: if (diagv) {
918: PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
919: }
920: for (i=0; i<nd; i++) {
921: b->gdiag[i] = diag[i];
922: if (diag[i] > 0) { /* lower triangular */
923: if (diag[i] < b->brend) {
924: ldiag[k] = diag[i] - b->brstart;
925: if (diagv) ldiagv[k] = diagv[i];
926: k++;
927: }
928: } else { /* upper triangular */
929: if (B->M/bs - diag[i] > B->N/bs) {
930: if (B->M/bs + diag[i] > b->brstart) {
931: ldiag[k] = diag[i] - b->brstart;
932: if (diagv) ldiagv[k] = diagv[i];
933: k++;
934: }
935: } else {
936: if (B->M/bs > b->brstart) {
937: ldiag[k] = diag[i] - b->brstart;
938: if (diagv) ldiagv[k] = diagv[i];
939: k++;
940: }
941: }
942: }
943: }
945: /* Form local matrix */
946: MatCreateSeqBDiag(PETSC_COMM_SELF,B->m,B->n,k,bs,ldiag,ldiagv,&b->A);
947: PetscLogObjectParent(B,b->A);
948: PetscFree(ldiag);
949: if (ldiagv) {PetscFree(ldiagv);}
951: return(0);
952: }
954: /*@C
955: MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.
957: Collective on MPI_Comm
959: Input Parameters:
960: + comm - MPI communicator
961: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
962: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
963: . N - number of columns (local and global)
964: . nd - number of block diagonals (global) (optional)
965: . bs - each element of a diagonal is an bs x bs dense matrix
966: . diag - optional array of block diagonal numbers (length nd).
967: For a matrix element A[i,j], where i=row and j=column, the
968: diagonal number is
969: $ diag = i/bs - j/bs (integer division)
970: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
971: needed (expensive).
972: - diagv - pointer to actual diagonals (in same order as diag array),
973: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
974: to control memory allocation.
976: Output Parameter:
977: . A - the matrix
979: Options Database Keys:
980: . -mat_block_size <bs> - Sets blocksize
981: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
983: Notes:
984: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
985: than it must be used on all processors that share the object for that argument.
987: The parallel matrix is partitioned across the processors by rows, where
988: each local rectangular matrix is stored in the uniprocessor block
989: diagonal format. See the users manual for further details.
991: The user MUST specify either the local or global numbers of rows
992: (possibly both).
994: The case bs=1 (conventional diagonal storage) is implemented as
995: a special case.
997: Fortran Notes:
998: Fortran programmers cannot set diagv; this variable is ignored.
1000: Level: intermediate
1002: .keywords: matrix, block, diagonal, parallel, sparse
1004: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1005: @*/
1006: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,int *diag,PetscScalar **diagv,Mat *A)
1007: {
1008: int ierr,size;
1011: MatCreate(comm,m,m,M,N,A);
1012: MPI_Comm_size(comm,&size);
1013: if (size > 1) {
1014: MatSetType(*A,MATMPIBDIAG);
1015: MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1016: } else {
1017: MatSetType(*A,MATSEQBDIAG);
1018: MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1019: }
1020: return(0);
1021: }
1023: /*@C
1024: MatBDiagGetData - Gets the data for the block diagonal matrix format.
1025: For the parallel case, this returns information for the local submatrix.
1027: Input Parameters:
1028: . mat - the matrix, stored in block diagonal format.
1030: Not Collective
1032: Output Parameters:
1033: + m - number of rows
1034: . n - number of columns
1035: . nd - number of block diagonals
1036: . bs - each element of a diagonal is an bs x bs dense matrix
1037: . bdlen - array of total block lengths of block diagonals
1038: . diag - optional array of block diagonal numbers (length nd).
1039: For a matrix element A[i,j], where i=row and j=column, the
1040: diagonal number is
1041: $ diag = i/bs - j/bs (integer division)
1042: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1043: needed (expensive).
1044: - diagv - pointer to actual diagonals (in same order as diag array),
1046: Level: advanced
1048: Notes:
1049: See the users manual for further details regarding this storage format.
1051: .keywords: matrix, block, diagonal, get, data
1053: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1054: @*/
1055: int MatBDiagGetData(Mat mat,int *nd,int *bs,int **diag,int **bdlen,PetscScalar ***diagv)
1056: {
1057: Mat_MPIBDiag *pdmat;
1058: Mat_SeqBDiag *dmat = 0;
1059: PetscTruth isseq,ismpi;
1060: int ierr;
1064: PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1065: PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1066: if (isseq) {
1067: dmat = (Mat_SeqBDiag*)mat->data;
1068: } else if (ismpi) {
1069: pdmat = (Mat_MPIBDiag*)mat->data;
1070: dmat = (Mat_SeqBDiag*)pdmat->A->data;
1071: } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1072: *nd = dmat->nd;
1073: *bs = dmat->bs;
1074: *diag = dmat->diag;
1075: *bdlen = dmat->bdlen;
1076: *diagv = dmat->diagv;
1077: return(0);
1078: }
1080: #include petscsys.h
1082: EXTERN_C_BEGIN
1083: int MatLoad_MPIBDiag(PetscViewer viewer,MatType type,Mat *newmat)
1084: {
1085: Mat A;
1086: PetscScalar *vals,*svals;
1087: MPI_Comm comm = ((PetscObject)viewer)->comm;
1088: MPI_Status status;
1089: int bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1090: int header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1091: int *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1092: int tag = ((PetscObject)viewer)->tag,extra_rows;
1095: MPI_Comm_size(comm,&size);
1096: MPI_Comm_rank(comm,&rank);
1097: if (!rank) {
1098: PetscViewerBinaryGetDescriptor(viewer,&fd);
1099: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1100: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1101: if (header[3] < 0) {
1102: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1103: }
1104: }
1105: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1106: M = header[1]; N = header[2];
1108: bs = 1; /* uses a block size of 1 by default; */
1109: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1111: /*
1112: This code adds extra rows to make sure the number of rows is
1113: divisible by the blocksize
1114: */
1115: Mbs = M/bs;
1116: extra_rows = bs - M + bs*(Mbs);
1117: if (extra_rows == bs) extra_rows = 0;
1118: else Mbs++;
1119: if (extra_rows && !rank) {
1120: PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksizen");
1121: }
1123: /* determine ownership of all rows */
1124: m = bs*(Mbs/size + ((Mbs % size) > rank));
1125: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1126: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1127: rowners[0] = 0;
1128: for (i=2; i<=size; i++) {
1129: rowners[i] += rowners[i-1];
1130: }
1131: rstart = rowners[rank];
1132: rend = rowners[rank+1];
1134: /* distribute row lengths to all processors */
1135: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1136: if (!rank) {
1137: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1138: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1139: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1140: PetscMalloc(size*sizeof(int),&sndcounts);
1141: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1142: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1143: PetscFree(sndcounts);
1144: } else {
1145: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1146: }
1148: if (!rank) {
1149: /* calculate the number of nonzeros on each processor */
1150: PetscMalloc(size*sizeof(int),&procsnz);
1151: PetscMemzero(procsnz,size*sizeof(int));
1152: for (i=0; i<size; i++) {
1153: for (j=rowners[i]; j<rowners[i+1]; j++) {
1154: procsnz[i] += rowlengths[j];
1155: }
1156: }
1157: PetscFree(rowlengths);
1159: /* determine max buffer needed and allocate it */
1160: maxnz = 0;
1161: for (i=0; i<size; i++) {
1162: maxnz = PetscMax(maxnz,procsnz[i]);
1163: }
1164: PetscMalloc(maxnz*sizeof(int),&cols);
1166: /* read in my part of the matrix column indices */
1167: nz = procsnz[0];
1168: PetscMalloc(nz*sizeof(int),&mycols);
1169: if (size == 1) nz -= extra_rows;
1170: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1171: if (size == 1) for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }
1173: /* read in every one elses and ship off */
1174: for (i=1; i<size-1; i++) {
1175: nz = procsnz[i];
1176: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1177: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1178: }
1179: /* read in the stuff for the last proc */
1180: if (size != 1) {
1181: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1182: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1183: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1184: MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1185: }
1186: PetscFree(cols);
1187: } else {
1188: /* determine buffer space needed for message */
1189: nz = 0;
1190: for (i=0; i<m; i++) {
1191: nz += ourlens[i];
1192: }
1193: PetscMalloc(nz*sizeof(int),&mycols);
1195: /* receive message of column indices*/
1196: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1197: MPI_Get_count(&status,MPI_INT,&maxnz);
1198: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1199: }
1201: MatCreateMPIBDiag(comm,m,M+extra_rows,N+extra_rows,PETSC_NULL,bs,PETSC_NULL,PETSC_NULL,newmat);
1202: A = *newmat;
1204: if (!rank) {
1205: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1207: /* read in my part of the matrix numerical values */
1208: nz = procsnz[0];
1209: if (size == 1) nz -= extra_rows;
1210: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1211: if (size == 1) for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }
1213: /* insert into matrix */
1214: jj = rstart;
1215: smycols = mycols;
1216: svals = vals;
1217: for (i=0; i<m; i++) {
1218: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1219: smycols += ourlens[i];
1220: svals += ourlens[i];
1221: jj++;
1222: }
1224: /* read in other processors (except the last one) and ship out */
1225: for (i=1; i<size-1; i++) {
1226: nz = procsnz[i];
1227: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1228: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1229: }
1230: /* the last proc */
1231: if (size != 1){
1232: nz = procsnz[i] - extra_rows;
1233: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1234: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1235: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1236: }
1237: PetscFree(procsnz);
1238: } else {
1239: /* receive numeric values */
1240: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1242: /* receive message of values*/
1243: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1244: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1245: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1247: /* insert into matrix */
1248: jj = rstart;
1249: smycols = mycols;
1250: svals = vals;
1251: for (i=0; i<m; i++) {
1252: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1253: smycols += ourlens[i];
1254: svals += ourlens[i];
1255: jj++;
1256: }
1257: }
1258: PetscFree(ourlens);
1259: PetscFree(vals);
1260: PetscFree(mycols);
1261: PetscFree(rowners);
1263: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1264: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1265: return(0);
1266: }
1267: EXTERN_C_END