Actual source code: mpidense.c
1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
6:
7: #include src/mat/impls/dense/mpi/mpidense.h
8: #include src/vec/vecimpl.h
10: EXTERN_C_BEGIN
11: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
12: {
13: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
14: int m = A->m,rstart = mdn->rstart,ierr;
15: PetscScalar *array;
16: MPI_Comm comm;
19: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
21: /* The reuse aspect is not implemented efficiently */
22: if (reuse) { MatDestroy(*B);}
24: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
25: MatGetArray(mdn->A,&array);
26: MatCreateSeqDense(comm,m,m,array+m*rstart,B);
27: MatRestoreArray(mdn->A,&array);
28: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
29: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
30:
31: *iscopy = PETSC_TRUE;
32: return(0);
33: }
34: EXTERN_C_END
36: EXTERN int MatSetUpMultiply_MPIDense(Mat);
38: int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
39: {
40: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
41: int ierr,i,j,rstart = A->rstart,rend = A->rend,row;
42: PetscTruth roworiented = A->roworiented;
45: for (i=0; i<m; i++) {
46: if (idxm[i] < 0) continue;
47: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
48: if (idxm[i] >= rstart && idxm[i] < rend) {
49: row = idxm[i] - rstart;
50: if (roworiented) {
51: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
52: } else {
53: for (j=0; j<n; j++) {
54: if (idxn[j] < 0) continue;
55: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
56: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
57: }
58: }
59: } else {
60: if (!A->donotstash) {
61: if (roworiented) {
62: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
63: } else {
64: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
65: }
66: }
67: }
68: }
69: return(0);
70: }
72: int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
73: {
74: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
75: int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
78: for (i=0; i<m; i++) {
79: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
80: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
81: if (idxm[i] >= rstart && idxm[i] < rend) {
82: row = idxm[i] - rstart;
83: for (j=0; j<n; j++) {
84: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
85: if (idxn[j] >= mat->N) {
86: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
87: }
88: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
89: }
90: } else {
91: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
92: }
93: }
94: return(0);
95: }
97: int MatGetArray_MPIDense(Mat A,PetscScalar **array)
98: {
99: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
100: int ierr;
103: MatGetArray(a->A,array);
104: return(0);
105: }
107: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
108: {
109: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
110: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
111: int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
112: PetscScalar *av,*bv,*v = lmat->v;
113: Mat newmat;
116: ISGetIndices(isrow,&irow);
117: ISGetIndices(iscol,&icol);
118: ISGetLocalSize(isrow,&nrows);
119: ISGetLocalSize(iscol,&ncols);
121: /* No parallel redistribution currently supported! Should really check each index set
122: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
123: original matrix! */
125: MatGetLocalSize(A,&nlrows,&nlcols);
126: MatGetOwnershipRange(A,&rstart,&rend);
127:
128: /* Check submatrix call */
129: if (scall == MAT_REUSE_MATRIX) {
130: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
131: /* Really need to test rows and column sizes! */
132: newmat = *B;
133: } else {
134: /* Create and fill new matrix */
135: MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
136: }
138: /* Now extract the data pointers and do the copy, column at a time */
139: newmatd = (Mat_MPIDense*)newmat->data;
140: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
141:
142: for (i=0; i<ncols; i++) {
143: av = v + nlrows*icol[i];
144: for (j=0; j<nrows; j++) {
145: *bv++ = av[irow[j] - rstart];
146: }
147: }
149: /* Assemble the matrices so that the correct flags are set */
150: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
151: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
153: /* Free work space */
154: ISRestoreIndices(isrow,&irow);
155: ISRestoreIndices(iscol,&icol);
156: *B = newmat;
157: return(0);
158: }
160: int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
161: {
163: return(0);
164: }
166: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
167: {
168: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
169: MPI_Comm comm = mat->comm;
170: int ierr,nstash,reallocs;
171: InsertMode addv;
174: /* make sure all processors are either in INSERTMODE or ADDMODE */
175: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
176: if (addv == (ADD_VALUES|INSERT_VALUES)) {
177: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
178: }
179: mat->insertmode = addv; /* in case this processor had no cache */
181: MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
182: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
183: PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
184: return(0);
185: }
187: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
188: {
189: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
190: int i,n,ierr,*row,*col,flg,j,rstart,ncols;
191: PetscScalar *val;
192: InsertMode addv=mat->insertmode;
195: /* wait on receives */
196: while (1) {
197: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
198: if (!flg) break;
199:
200: for (i=0; i<n;) {
201: /* Now identify the consecutive vals belonging to the same row */
202: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
203: if (j < n) ncols = j-i;
204: else ncols = n-i;
205: /* Now assemble all these values with a single function call */
206: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
207: i = j;
208: }
209: }
210: MatStashScatterEnd_Private(&mat->stash);
211:
212: MatAssemblyBegin(mdn->A,mode);
213: MatAssemblyEnd(mdn->A,mode);
215: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
216: MatSetUpMultiply_MPIDense(mat);
217: }
218: return(0);
219: }
221: int MatZeroEntries_MPIDense(Mat A)
222: {
223: int ierr;
224: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
227: MatZeroEntries(l->A);
228: return(0);
229: }
231: int MatGetBlockSize_MPIDense(Mat A,int *bs)
232: {
234: *bs = 1;
235: return(0);
236: }
238: /* the code does not do the diagonal entries correctly unless the
239: matrix is square and the column and row owerships are identical.
240: This is a BUG. The only way to fix it seems to be to access
241: mdn->A and mdn->B directly and not through the MatZeroRows()
242: routine.
243: */
244: int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
245: {
246: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
247: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
248: int *procs,*nprocs,j,idx,nsends,*work;
249: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
250: int *rvalues,tag = A->tag,count,base,slen,n,*source;
251: int *lens,imdex,*lrows,*values;
252: MPI_Comm comm = A->comm;
253: MPI_Request *send_waits,*recv_waits;
254: MPI_Status recv_status,*send_status;
255: IS istmp;
256: PetscTruth found;
259: ISGetLocalSize(is,&N);
260: ISGetIndices(is,&rows);
262: /* first count number of contributors to each processor */
263: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
264: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
265: procs = nprocs + size;
266: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
267: for (i=0; i<N; i++) {
268: idx = rows[i];
269: found = PETSC_FALSE;
270: for (j=0; j<size; j++) {
271: if (idx >= owners[j] && idx < owners[j+1]) {
272: nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
273: }
274: }
275: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
276: }
277: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
279: /* inform other processors of number of messages and max length*/
280: ierr = PetscMalloc(2*size*sizeof(int),&work);
281: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
282: nmax = work[rank];
283: nrecvs = work[size+rank];
284: ierr = PetscFree(work);
286: /* post receives: */
287: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
288: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
289: for (i=0; i<nrecvs; i++) {
290: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
291: }
293: /* do sends:
294: 1) starts[i] gives the starting index in svalues for stuff going to
295: the ith processor
296: */
297: PetscMalloc((N+1)*sizeof(int),&svalues);
298: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
299: PetscMalloc((size+1)*sizeof(int),&starts);
300: starts[0] = 0;
301: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
302: for (i=0; i<N; i++) {
303: svalues[starts[owner[i]]++] = rows[i];
304: }
305: ISRestoreIndices(is,&rows);
307: starts[0] = 0;
308: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
309: count = 0;
310: for (i=0; i<size; i++) {
311: if (procs[i]) {
312: MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
313: }
314: }
315: PetscFree(starts);
317: base = owners[rank];
319: /* wait on receives */
320: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
321: source = lens + nrecvs;
322: count = nrecvs; slen = 0;
323: while (count) {
324: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
325: /* unpack receives into our local space */
326: MPI_Get_count(&recv_status,MPI_INT,&n);
327: source[imdex] = recv_status.MPI_SOURCE;
328: lens[imdex] = n;
329: slen += n;
330: count--;
331: }
332: PetscFree(recv_waits);
333:
334: /* move the data into the send scatter */
335: PetscMalloc((slen+1)*sizeof(int),&lrows);
336: count = 0;
337: for (i=0; i<nrecvs; i++) {
338: values = rvalues + i*nmax;
339: for (j=0; j<lens[i]; j++) {
340: lrows[count++] = values[j] - base;
341: }
342: }
343: PetscFree(rvalues);
344: PetscFree(lens);
345: PetscFree(owner);
346: PetscFree(nprocs);
347:
348: /* actually zap the local rows */
349: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
350: PetscLogObjectParent(A,istmp);
351: PetscFree(lrows);
352: MatZeroRows(l->A,istmp,diag);
353: ISDestroy(istmp);
355: /* wait on sends */
356: if (nsends) {
357: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
358: MPI_Waitall(nsends,send_waits,send_status);
359: PetscFree(send_status);
360: }
361: PetscFree(send_waits);
362: PetscFree(svalues);
364: return(0);
365: }
367: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
368: {
369: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
370: int ierr;
373: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
374: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
375: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
376: return(0);
377: }
379: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
380: {
381: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
382: int ierr;
385: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
386: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
387: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
388: return(0);
389: }
391: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
392: {
393: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
394: int ierr;
395: PetscScalar zero = 0.0;
398: VecSet(&zero,yy);
399: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
400: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
401: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
402: return(0);
403: }
405: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
406: {
407: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
408: int ierr;
411: VecCopy(yy,zz);
412: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
413: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
414: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
415: return(0);
416: }
418: int MatGetDiagonal_MPIDense(Mat A,Vec v)
419: {
420: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
421: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
422: int ierr,len,i,n,m = A->m,radd;
423: PetscScalar *x,zero = 0.0;
424:
426: VecSet(&zero,v);
427: VecGetArray(v,&x);
428: VecGetSize(v,&n);
429: if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
430: len = PetscMin(a->A->m,a->A->n);
431: radd = a->rstart*m;
432: for (i=0; i<len; i++) {
433: x[i] = aloc->v[radd + i*m + i];
434: }
435: VecRestoreArray(v,&x);
436: return(0);
437: }
439: int MatDestroy_MPIDense(Mat mat)
440: {
441: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
442: int ierr;
446: #if defined(PETSC_USE_LOG)
447: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
448: #endif
449: MatStashDestroy_Private(&mat->stash);
450: PetscFree(mdn->rowners);
451: MatDestroy(mdn->A);
452: if (mdn->lvec) VecDestroy(mdn->lvec);
453: if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx);
454: if (mdn->factor) {
455: if (mdn->factor->temp) {PetscFree(mdn->factor->temp);}
456: if (mdn->factor->tag) {PetscFree(mdn->factor->tag);}
457: if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
458: PetscFree(mdn->factor);
459: }
460: PetscFree(mdn);
461: return(0);
462: }
464: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
465: {
466: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
467: int ierr;
470: if (mdn->size == 1) {
471: MatView(mdn->A,viewer);
472: }
473: else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
474: return(0);
475: }
477: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
478: {
479: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
480: int ierr,size = mdn->size,rank = mdn->rank;
481: PetscViewerType vtype;
482: PetscTruth isascii,isdraw;
483: PetscViewer sviewer;
484: PetscViewerFormat format;
487: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
488: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
489: if (isascii) {
490: PetscViewerGetType(viewer,&vtype);
491: PetscViewerGetFormat(viewer,&format);
492: if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
493: MatInfo info;
494: MatGetInfo(mat,MAT_LOCAL,&info);
495: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
496: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
497: PetscViewerFlush(viewer);
498: VecScatterView(mdn->Mvctx,viewer);
499: return(0);
500: } else if (format == PETSC_VIEWER_ASCII_INFO) {
501: return(0);
502: }
503: } else if (isdraw) {
504: PetscDraw draw;
505: PetscTruth isnull;
507: PetscViewerDrawGetDraw(viewer,0,&draw);
508: PetscDrawIsNull(draw,&isnull);
509: if (isnull) return(0);
510: }
512: if (size == 1) {
513: MatView(mdn->A,viewer);
514: } else {
515: /* assemble the entire matrix onto first processor. */
516: Mat A;
517: int M = mat->M,N = mat->N,m,row,i,nz,*cols;
518: PetscScalar *vals;
520: if (!rank) {
521: MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
522: } else {
523: MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
524: }
525: PetscLogObjectParent(mat,A);
527: /* Copy the matrix ... This isn't the most efficient means,
528: but it's quick for now */
529: row = mdn->rstart; m = mdn->A->m;
530: for (i=0; i<m; i++) {
531: MatGetRow(mat,row,&nz,&cols,&vals);
532: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
533: MatRestoreRow(mat,row,&nz,&cols,&vals);
534: row++;
535: }
537: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
538: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
539: PetscViewerGetSingleton(viewer,&sviewer);
540: if (!rank) {
541: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
542: }
543: PetscViewerRestoreSingleton(viewer,&sviewer);
544: PetscViewerFlush(viewer);
545: MatDestroy(A);
546: }
547: return(0);
548: }
550: int MatView_MPIDense(Mat mat,PetscViewer viewer)
551: {
552: int ierr;
553: PetscTruth isascii,isbinary,isdraw,issocket;
554:
556:
557: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
558: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
559: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
560: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
562: if (isascii || issocket || isdraw) {
563: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
564: } else if (isbinary) {
565: MatView_MPIDense_Binary(mat,viewer);
566: } else {
567: SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
568: }
569: return(0);
570: }
572: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
573: {
574: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
575: Mat mdn = mat->A;
576: int ierr;
577: PetscReal isend[5],irecv[5];
580: info->rows_global = (double)A->M;
581: info->columns_global = (double)A->N;
582: info->rows_local = (double)A->m;
583: info->columns_local = (double)A->N;
584: info->block_size = 1.0;
585: MatGetInfo(mdn,MAT_LOCAL,info);
586: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
587: isend[3] = info->memory; isend[4] = info->mallocs;
588: if (flag == MAT_LOCAL) {
589: info->nz_used = isend[0];
590: info->nz_allocated = isend[1];
591: info->nz_unneeded = isend[2];
592: info->memory = isend[3];
593: info->mallocs = isend[4];
594: } else if (flag == MAT_GLOBAL_MAX) {
595: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
596: info->nz_used = irecv[0];
597: info->nz_allocated = irecv[1];
598: info->nz_unneeded = irecv[2];
599: info->memory = irecv[3];
600: info->mallocs = irecv[4];
601: } else if (flag == MAT_GLOBAL_SUM) {
602: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
603: info->nz_used = irecv[0];
604: info->nz_allocated = irecv[1];
605: info->nz_unneeded = irecv[2];
606: info->memory = irecv[3];
607: info->mallocs = irecv[4];
608: }
609: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
610: info->fill_ratio_needed = 0;
611: info->factor_mallocs = 0;
612: return(0);
613: }
615: int MatSetOption_MPIDense(Mat A,MatOption op)
616: {
617: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
618: int ierr;
621: switch (op) {
622: case MAT_NO_NEW_NONZERO_LOCATIONS:
623: case MAT_YES_NEW_NONZERO_LOCATIONS:
624: case MAT_NEW_NONZERO_LOCATION_ERR:
625: case MAT_NEW_NONZERO_ALLOCATION_ERR:
626: case MAT_COLUMNS_SORTED:
627: case MAT_COLUMNS_UNSORTED:
628: MatSetOption(a->A,op);
629: break;
630: case MAT_ROW_ORIENTED:
631: a->roworiented = PETSC_TRUE;
632: MatSetOption(a->A,op);
633: break;
634: case MAT_ROWS_SORTED:
635: case MAT_ROWS_UNSORTED:
636: case MAT_YES_NEW_DIAGONALS:
637: case MAT_USE_HASH_TABLE:
638: case MAT_USE_SINGLE_PRECISION_SOLVES:
639: PetscLogInfo(A,"MatSetOption_MPIDense:Option ignoredn");
640: break;
641: case MAT_COLUMN_ORIENTED:
642: a->roworiented = PETSC_FALSE;
643: MatSetOption(a->A,op);
644: break;
645: case MAT_IGNORE_OFF_PROC_ENTRIES:
646: a->donotstash = PETSC_TRUE;
647: break;
648: case MAT_NO_NEW_DIAGONALS:
649: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
650: default:
651: SETERRQ(PETSC_ERR_SUP,"unknown option");
652: }
653: return(0);
654: }
656: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
657: {
658: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
659: int lrow,rstart = mat->rstart,rend = mat->rend,ierr;
662: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
663: lrow = row - rstart;
664: MatGetRow(mat->A,lrow,nz,idx,v);
665: return(0);
666: }
668: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
669: {
673: if (idx) {PetscFree(*idx);}
674: if (v) {PetscFree(*v);}
675: return(0);
676: }
678: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
679: {
680: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
681: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
682: PetscScalar *l,*r,x,*v;
683: int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
686: MatGetLocalSize(A,&s2,&s3);
687: if (ll) {
688: VecGetLocalSize(ll,&s2a);
689: if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
690: VecGetArray(ll,&l);
691: for (i=0; i<m; i++) {
692: x = l[i];
693: v = mat->v + i;
694: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
695: }
696: VecRestoreArray(ll,&l);
697: PetscLogFlops(n*m);
698: }
699: if (rr) {
700: VecGetSize(rr,&s3a);
701: if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
702: VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
703: VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
704: VecGetArray(mdn->lvec,&r);
705: for (i=0; i<n; i++) {
706: x = r[i];
707: v = mat->v + i*m;
708: for (j=0; j<m; j++) { (*v++) *= x;}
709: }
710: VecRestoreArray(mdn->lvec,&r);
711: PetscLogFlops(n*m);
712: }
713: return(0);
714: }
716: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
717: {
718: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
719: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
720: int ierr,i,j;
721: PetscReal sum = 0.0;
722: PetscScalar *v = mat->v;
725: if (mdn->size == 1) {
726: MatNorm(mdn->A,type,nrm);
727: } else {
728: if (type == NORM_FROBENIUS) {
729: for (i=0; i<mdn->A->n*mdn->A->m; i++) {
730: #if defined(PETSC_USE_COMPLEX)
731: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
732: #else
733: sum += (*v)*(*v); v++;
734: #endif
735: }
736: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
737: *nrm = sqrt(*nrm);
738: PetscLogFlops(2*mdn->A->n*mdn->A->m);
739: } else if (type == NORM_1) {
740: PetscReal *tmp,*tmp2;
741: PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
742: tmp2 = tmp + A->N;
743: PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
744: *nrm = 0.0;
745: v = mat->v;
746: for (j=0; j<mdn->A->n; j++) {
747: for (i=0; i<mdn->A->m; i++) {
748: tmp[j] += PetscAbsScalar(*v); v++;
749: }
750: }
751: MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
752: for (j=0; j<A->N; j++) {
753: if (tmp2[j] > *nrm) *nrm = tmp2[j];
754: }
755: PetscFree(tmp);
756: PetscLogFlops(A->n*A->m);
757: } else if (type == NORM_INFINITY) { /* max row norm */
758: PetscReal ntemp;
759: MatNorm(mdn->A,type,&ntemp);
760: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
761: } else {
762: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
763: }
764: }
765: return(0);
766: }
768: int MatTranspose_MPIDense(Mat A,Mat *matout)
769: {
770: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
771: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
772: Mat B;
773: int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
774: int j,i,ierr;
775: PetscScalar *v;
778: if (!matout && M != N) {
779: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
780: }
781: MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
783: m = a->A->m; n = a->A->n; v = Aloc->v;
784: PetscMalloc(n*sizeof(int),&rwork);
785: for (j=0; j<n; j++) {
786: for (i=0; i<m; i++) rwork[i] = rstart + i;
787: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
788: v += m;
789: }
790: PetscFree(rwork);
791: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
792: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
793: if (matout) {
794: *matout = B;
795: } else {
796: MatHeaderCopy(A,B);
797: }
798: return(0);
799: }
801: #include petscblaslapack.h
802: int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
803: {
804: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
805: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
806: int one = 1,nz;
809: nz = inA->m*inA->N;
810: BLscal_(&nz,alpha,a->v,&one);
811: PetscLogFlops(nz);
812: return(0);
813: }
815: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
816: EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
818: int MatSetUpPreallocation_MPIDense(Mat A)
819: {
820: int ierr;
823: MatMPIDenseSetPreallocation(A,0);
824: return(0);
825: }
827: /* -------------------------------------------------------------------*/
828: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
829: MatGetRow_MPIDense,
830: MatRestoreRow_MPIDense,
831: MatMult_MPIDense,
832: MatMultAdd_MPIDense,
833: MatMultTranspose_MPIDense,
834: MatMultTransposeAdd_MPIDense,
835: 0,
836: 0,
837: 0,
838: 0,
839: 0,
840: 0,
841: 0,
842: MatTranspose_MPIDense,
843: MatGetInfo_MPIDense,0,
844: MatGetDiagonal_MPIDense,
845: MatDiagonalScale_MPIDense,
846: MatNorm_MPIDense,
847: MatAssemblyBegin_MPIDense,
848: MatAssemblyEnd_MPIDense,
849: 0,
850: MatSetOption_MPIDense,
851: MatZeroEntries_MPIDense,
852: MatZeroRows_MPIDense,
853: 0,
854: 0,
855: 0,
856: 0,
857: MatSetUpPreallocation_MPIDense,
858: 0,
859: 0,
860: MatGetArray_MPIDense,
861: MatRestoreArray_MPIDense,
862: MatDuplicate_MPIDense,
863: 0,
864: 0,
865: 0,
866: 0,
867: 0,
868: MatGetSubMatrices_MPIDense,
869: 0,
870: MatGetValues_MPIDense,
871: 0,
872: 0,
873: MatScale_MPIDense,
874: 0,
875: 0,
876: 0,
877: MatGetBlockSize_MPIDense,
878: 0,
879: 0,
880: 0,
881: 0,
882: 0,
883: 0,
884: 0,
885: 0,
886: 0,
887: MatGetSubMatrix_MPIDense,
888: MatDestroy_MPIDense,
889: MatView_MPIDense,
890: MatGetPetscMaps_Petsc};
892: EXTERN_C_BEGIN
893: int MatCreate_MPIDense(Mat mat)
894: {
895: Mat_MPIDense *a;
896: int ierr,i;
899: ierr = PetscNew(Mat_MPIDense,&a);
900: mat->data = (void*)a;
901: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
902: mat->factor = 0;
903: mat->mapping = 0;
905: a->factor = 0;
906: mat->insertmode = NOT_SET_VALUES;
907: MPI_Comm_rank(mat->comm,&a->rank);
908: MPI_Comm_size(mat->comm,&a->size);
910: PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
911: PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
912: a->nvec = mat->n;
914: /* the information in the maps duplicates the information computed below, eventually
915: we should remove the duplicate information that is not contained in the maps */
916: PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
917: PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);
919: /* build local table of row and column ownerships */
920: ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
921: a->cowners = a->rowners + a->size + 1;
922: PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
923: MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
924: a->rowners[0] = 0;
925: for (i=2; i<=a->size; i++) {
926: a->rowners[i] += a->rowners[i-1];
927: }
928: a->rstart = a->rowners[a->rank];
929: a->rend = a->rowners[a->rank+1];
930: ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
931: a->cowners[0] = 0;
932: for (i=2; i<=a->size; i++) {
933: a->cowners[i] += a->cowners[i-1];
934: }
936: /* build cache for off array entries formed */
937: a->donotstash = PETSC_FALSE;
938: MatStashCreate_Private(mat->comm,1,&mat->stash);
940: /* stuff used for matrix vector multiply */
941: a->lvec = 0;
942: a->Mvctx = 0;
943: a->roworiented = PETSC_TRUE;
945: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
946: "MatGetDiagonalBlock_MPIDense",
947: MatGetDiagonalBlock_MPIDense);
948: return(0);
949: }
950: EXTERN_C_END
952: /*@C
953: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
955: Not collective
957: Input Parameters:
958: . A - the matrix
959: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
960: to control all matrix memory allocation.
962: Notes:
963: The dense format is fully compatible with standard Fortran 77
964: storage by columns.
966: The data input variable is intended primarily for Fortran programmers
967: who wish to allocate their own matrix memory space. Most users should
968: set data=PETSC_NULL.
970: Level: intermediate
972: .keywords: matrix,dense, parallel
974: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
975: @*/
976: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
977: {
978: Mat_MPIDense *a;
979: int ierr;
980: PetscTruth flg2;
983: PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);
984: if (!flg2) return(0);
985: mat->preallocated = PETSC_TRUE;
986: /* Note: For now, when data is specified above, this assumes the user correctly
987: allocates the local dense storage space. We should add error checking. */
989: a = (Mat_MPIDense*)mat->data;
990: MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
991: PetscLogObjectParent(mat,a->A);
992: return(0);
993: }
995: /*@C
996: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
998: Collective on MPI_Comm
1000: Input Parameters:
1001: + comm - MPI communicator
1002: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1003: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1004: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1005: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1006: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1007: to control all matrix memory allocation.
1009: Output Parameter:
1010: . A - the matrix
1012: Notes:
1013: The dense format is fully compatible with standard Fortran 77
1014: storage by columns.
1016: The data input variable is intended primarily for Fortran programmers
1017: who wish to allocate their own matrix memory space. Most users should
1018: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1020: The user MUST specify either the local or global matrix dimensions
1021: (possibly both).
1023: Level: intermediate
1025: .keywords: matrix,dense, parallel
1027: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1028: @*/
1029: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1030: {
1031: int ierr,size;
1034: MatCreate(comm,m,n,M,N,A);
1035: MPI_Comm_size(comm,&size);
1036: if (size > 1) {
1037: MatSetType(*A,MATMPIDENSE);
1038: MatMPIDenseSetPreallocation(*A,data);
1039: } else {
1040: MatSetType(*A,MATSEQDENSE);
1041: MatSeqDenseSetPreallocation(*A,data);
1042: }
1043: return(0);
1044: }
1046: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1047: {
1048: Mat mat;
1049: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1050: int ierr;
1053: *newmat = 0;
1054: MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1055: MatSetType(mat,MATMPIDENSE);
1056: ierr = PetscNew(Mat_MPIDense,&a);
1057: mat->data = (void*)a;
1058: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1059: mat->factor = A->factor;
1060: mat->assembled = PETSC_TRUE;
1061: mat->preallocated = PETSC_TRUE;
1063: a->rstart = oldmat->rstart;
1064: a->rend = oldmat->rend;
1065: a->size = oldmat->size;
1066: a->rank = oldmat->rank;
1067: mat->insertmode = NOT_SET_VALUES;
1068: a->nvec = oldmat->nvec;
1069: a->donotstash = oldmat->donotstash;
1070: ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1071: PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1072: PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1073: MatStashCreate_Private(A->comm,1,&mat->stash);
1075: MatSetUpMultiply_MPIDense(mat);
1076: MatDuplicate(oldmat->A,cpvalues,&a->A);
1077: PetscLogObjectParent(mat,a->A);
1078: *newmat = mat;
1079: return(0);
1080: }
1082: #include petscsys.h
1084: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1085: {
1086: int *rowners,i,size,rank,m,ierr,nz,j;
1087: PetscScalar *array,*vals,*vals_ptr;
1088: MPI_Status status;
1091: MPI_Comm_rank(comm,&rank);
1092: MPI_Comm_size(comm,&size);
1094: /* determine ownership of all rows */
1095: m = M/size + ((M % size) > rank);
1096: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1097: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1098: rowners[0] = 0;
1099: for (i=2; i<=size; i++) {
1100: rowners[i] += rowners[i-1];
1101: }
1103: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1104: MatGetArray(*newmat,&array);
1106: if (!rank) {
1107: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1109: /* read in my part of the matrix numerical values */
1110: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1111:
1112: /* insert into matrix-by row (this is why cannot directly read into array */
1113: vals_ptr = vals;
1114: for (i=0; i<m; i++) {
1115: for (j=0; j<N; j++) {
1116: array[i + j*m] = *vals_ptr++;
1117: }
1118: }
1120: /* read in other processors and ship out */
1121: for (i=1; i<size; i++) {
1122: nz = (rowners[i+1] - rowners[i])*N;
1123: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1124: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1125: }
1126: } else {
1127: /* receive numeric values */
1128: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1130: /* receive message of values*/
1131: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1133: /* insert into matrix-by row (this is why cannot directly read into array */
1134: vals_ptr = vals;
1135: for (i=0; i<m; i++) {
1136: for (j=0; j<N; j++) {
1137: array[i + j*m] = *vals_ptr++;
1138: }
1139: }
1140: }
1141: PetscFree(rowners);
1142: PetscFree(vals);
1143: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1144: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1145: return(0);
1146: }
1148: EXTERN_C_BEGIN
1149: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1150: {
1151: Mat A;
1152: PetscScalar *vals,*svals;
1153: MPI_Comm comm = ((PetscObject)viewer)->comm;
1154: MPI_Status status;
1155: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1156: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1157: int tag = ((PetscObject)viewer)->tag;
1158: int i,nz,ierr,j,rstart,rend,fd;
1161: MPI_Comm_size(comm,&size);
1162: MPI_Comm_rank(comm,&rank);
1163: if (!rank) {
1164: PetscViewerBinaryGetDescriptor(viewer,&fd);
1165: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1166: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1167: }
1169: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1170: M = header[1]; N = header[2]; nz = header[3];
1172: /*
1173: Handle case where matrix is stored on disk as a dense matrix
1174: */
1175: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1176: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1177: return(0);
1178: }
1180: /* determine ownership of all rows */
1181: m = M/size + ((M % size) > rank);
1182: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1183: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1184: rowners[0] = 0;
1185: for (i=2; i<=size; i++) {
1186: rowners[i] += rowners[i-1];
1187: }
1188: rstart = rowners[rank];
1189: rend = rowners[rank+1];
1191: /* distribute row lengths to all processors */
1192: ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1193: offlens = ourlens + (rend-rstart);
1194: if (!rank) {
1195: PetscMalloc(M*sizeof(int),&rowlengths);
1196: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1197: PetscMalloc(size*sizeof(int),&sndcounts);
1198: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1199: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1200: PetscFree(sndcounts);
1201: } else {
1202: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1203: }
1205: if (!rank) {
1206: /* calculate the number of nonzeros on each processor */
1207: PetscMalloc(size*sizeof(int),&procsnz);
1208: PetscMemzero(procsnz,size*sizeof(int));
1209: for (i=0; i<size; i++) {
1210: for (j=rowners[i]; j< rowners[i+1]; j++) {
1211: procsnz[i] += rowlengths[j];
1212: }
1213: }
1214: PetscFree(rowlengths);
1216: /* determine max buffer needed and allocate it */
1217: maxnz = 0;
1218: for (i=0; i<size; i++) {
1219: maxnz = PetscMax(maxnz,procsnz[i]);
1220: }
1221: PetscMalloc(maxnz*sizeof(int),&cols);
1223: /* read in my part of the matrix column indices */
1224: nz = procsnz[0];
1225: PetscMalloc(nz*sizeof(int),&mycols);
1226: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1228: /* read in every one elses and ship off */
1229: for (i=1; i<size; i++) {
1230: nz = procsnz[i];
1231: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1232: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1233: }
1234: PetscFree(cols);
1235: } else {
1236: /* determine buffer space needed for message */
1237: nz = 0;
1238: for (i=0; i<m; i++) {
1239: nz += ourlens[i];
1240: }
1241: PetscMalloc(nz*sizeof(int),&mycols);
1243: /* receive message of column indices*/
1244: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1245: MPI_Get_count(&status,MPI_INT,&maxnz);
1246: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1247: }
1249: /* loop over local rows, determining number of off diagonal entries */
1250: PetscMemzero(offlens,m*sizeof(int));
1251: jj = 0;
1252: for (i=0; i<m; i++) {
1253: for (j=0; j<ourlens[i]; j++) {
1254: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1255: jj++;
1256: }
1257: }
1259: /* create our matrix */
1260: for (i=0; i<m; i++) {
1261: ourlens[i] -= offlens[i];
1262: }
1263: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1264: A = *newmat;
1265: for (i=0; i<m; i++) {
1266: ourlens[i] += offlens[i];
1267: }
1269: if (!rank) {
1270: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1272: /* read in my part of the matrix numerical values */
1273: nz = procsnz[0];
1274: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1275:
1276: /* insert into matrix */
1277: jj = rstart;
1278: smycols = mycols;
1279: svals = vals;
1280: for (i=0; i<m; i++) {
1281: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1282: smycols += ourlens[i];
1283: svals += ourlens[i];
1284: jj++;
1285: }
1287: /* read in other processors and ship out */
1288: for (i=1; i<size; i++) {
1289: nz = procsnz[i];
1290: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1291: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1292: }
1293: PetscFree(procsnz);
1294: } else {
1295: /* receive numeric values */
1296: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1298: /* receive message of values*/
1299: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1300: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1301: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1303: /* insert into matrix */
1304: jj = rstart;
1305: smycols = mycols;
1306: svals = vals;
1307: for (i=0; i<m; i++) {
1308: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1309: smycols += ourlens[i];
1310: svals += ourlens[i];
1311: jj++;
1312: }
1313: }
1314: PetscFree(ourlens);
1315: PetscFree(vals);
1316: PetscFree(mycols);
1317: PetscFree(rowners);
1319: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1320: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1321: return(0);
1322: }
1323: EXTERN_C_END