Actual source code: mpidense.c
1: #define PETSCMAT_DLL
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
6:
7: #include src/mat/impls/dense/mpi/mpidense.h
11: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
12: {
13: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
15: PetscInt lrow,rstart = mat->rstart,rend = mat->rend;
18: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
19: lrow = row - rstart;
20: MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
21: return(0);
22: }
26: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
27: {
31: if (idx) {PetscFree(*idx);}
32: if (v) {PetscFree(*v);}
33: return(0);
34: }
39: PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
40: {
41: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
43: PetscInt m = A->m,rstart = mdn->rstart;
44: PetscScalar *array;
45: MPI_Comm comm;
48: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
50: /* The reuse aspect is not implemented efficiently */
51: if (reuse) { MatDestroy(*B);}
53: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
54: MatGetArray(mdn->A,&array);
55: MatCreate(comm,B);
56: MatSetSizes(*B,m,m,m,m);
57: MatSetType(*B,mdn->A->type_name);
58: MatSeqDenseSetPreallocation(*B,array+m*rstart);
59: MatRestoreArray(mdn->A,&array);
60: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
61: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
62:
63: *iscopy = PETSC_TRUE;
64: return(0);
65: }
70: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
71: {
72: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
74: PetscInt i,j,rstart = A->rstart,rend = A->rend,row;
75: PetscTruth roworiented = A->roworiented;
78: for (i=0; i<m; i++) {
79: if (idxm[i] < 0) continue;
80: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
81: if (idxm[i] >= rstart && idxm[i] < rend) {
82: row = idxm[i] - rstart;
83: if (roworiented) {
84: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
85: } else {
86: for (j=0; j<n; j++) {
87: if (idxn[j] < 0) continue;
88: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
89: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
90: }
91: }
92: } else {
93: if (!A->donotstash) {
94: if (roworiented) {
95: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
96: } else {
97: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
98: }
99: }
100: }
101: }
102: return(0);
103: }
107: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
108: {
109: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
111: PetscInt i,j,rstart = mdn->rstart,rend = mdn->rend,row;
114: for (i=0; i<m; i++) {
115: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
116: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
117: if (idxm[i] >= rstart && idxm[i] < rend) {
118: row = idxm[i] - rstart;
119: for (j=0; j<n; j++) {
120: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
121: if (idxn[j] >= mat->N) {
122: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
123: }
124: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
125: }
126: } else {
127: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
128: }
129: }
130: return(0);
131: }
135: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
136: {
137: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
141: MatGetArray(a->A,array);
142: return(0);
143: }
147: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
148: {
149: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
150: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
152: PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
153: PetscScalar *av,*bv,*v = lmat->v;
154: Mat newmat;
157: ISGetIndices(isrow,&irow);
158: ISGetIndices(iscol,&icol);
159: ISGetLocalSize(isrow,&nrows);
160: ISGetLocalSize(iscol,&ncols);
162: /* No parallel redistribution currently supported! Should really check each index set
163: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
164: original matrix! */
166: MatGetLocalSize(A,&nlrows,&nlcols);
167: MatGetOwnershipRange(A,&rstart,&rend);
168:
169: /* Check submatrix call */
170: if (scall == MAT_REUSE_MATRIX) {
171: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
172: /* Really need to test rows and column sizes! */
173: newmat = *B;
174: } else {
175: /* Create and fill new matrix */
176: MatCreate(A->comm,&newmat);
177: MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);
178: MatSetType(newmat,A->type_name);
179: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
180: }
182: /* Now extract the data pointers and do the copy, column at a time */
183: newmatd = (Mat_MPIDense*)newmat->data;
184: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
185:
186: for (i=0; i<ncols; i++) {
187: av = v + nlrows*icol[i];
188: for (j=0; j<nrows; j++) {
189: *bv++ = av[irow[j] - rstart];
190: }
191: }
193: /* Assemble the matrices so that the correct flags are set */
194: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
195: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
197: /* Free work space */
198: ISRestoreIndices(isrow,&irow);
199: ISRestoreIndices(iscol,&icol);
200: *B = newmat;
201: return(0);
202: }
206: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
207: {
209: return(0);
210: }
214: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
215: {
216: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
217: MPI_Comm comm = mat->comm;
219: PetscInt nstash,reallocs;
220: InsertMode addv;
223: /* make sure all processors are either in INSERTMODE or ADDMODE */
224: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
225: if (addv == (ADD_VALUES|INSERT_VALUES)) {
226: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
227: }
228: mat->insertmode = addv; /* in case this processor had no cache */
230: MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
231: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
232: PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));
233: return(0);
234: }
238: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
239: {
240: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
241: PetscErrorCode ierr;
242: PetscInt i,*row,*col,flg,j,rstart,ncols;
243: PetscMPIInt n;
244: PetscScalar *val;
245: InsertMode addv=mat->insertmode;
248: /* wait on receives */
249: while (1) {
250: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
251: if (!flg) break;
252:
253: for (i=0; i<n;) {
254: /* Now identify the consecutive vals belonging to the same row */
255: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
256: if (j < n) ncols = j-i;
257: else ncols = n-i;
258: /* Now assemble all these values with a single function call */
259: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
260: i = j;
261: }
262: }
263: MatStashScatterEnd_Private(&mat->stash);
264:
265: MatAssemblyBegin(mdn->A,mode);
266: MatAssemblyEnd(mdn->A,mode);
268: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
269: MatSetUpMultiply_MPIDense(mat);
270: }
271: return(0);
272: }
276: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
277: {
279: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
282: MatZeroEntries(l->A);
283: return(0);
284: }
286: /* the code does not do the diagonal entries correctly unless the
287: matrix is square and the column and row owerships are identical.
288: This is a BUG. The only way to fix it seems to be to access
289: mdn->A and mdn->B directly and not through the MatZeroRows()
290: routine.
291: */
294: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
295: {
296: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
298: PetscInt i,*owners = l->rowners;
299: PetscInt *nprocs,j,idx,nsends;
300: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
301: PetscInt *rvalues,tag = A->tag,count,base,slen,*source;
302: PetscInt *lens,*lrows,*values;
303: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
304: MPI_Comm comm = A->comm;
305: MPI_Request *send_waits,*recv_waits;
306: MPI_Status recv_status,*send_status;
307: PetscTruth found;
310: /* first count number of contributors to each processor */
311: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
312: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
313: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
314: for (i=0; i<N; i++) {
315: idx = rows[i];
316: found = PETSC_FALSE;
317: for (j=0; j<size; j++) {
318: if (idx >= owners[j] && idx < owners[j+1]) {
319: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
320: }
321: }
322: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
323: }
324: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
326: /* inform other processors of number of messages and max length*/
327: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
329: /* post receives: */
330: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
331: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
332: for (i=0; i<nrecvs; i++) {
333: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
334: }
336: /* do sends:
337: 1) starts[i] gives the starting index in svalues for stuff going to
338: the ith processor
339: */
340: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
341: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
342: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
343: starts[0] = 0;
344: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
345: for (i=0; i<N; i++) {
346: svalues[starts[owner[i]]++] = rows[i];
347: }
349: starts[0] = 0;
350: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
351: count = 0;
352: for (i=0; i<size; i++) {
353: if (nprocs[2*i+1]) {
354: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
355: }
356: }
357: PetscFree(starts);
359: base = owners[rank];
361: /* wait on receives */
362: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
363: source = lens + nrecvs;
364: count = nrecvs; slen = 0;
365: while (count) {
366: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
367: /* unpack receives into our local space */
368: MPI_Get_count(&recv_status,MPIU_INT,&n);
369: source[imdex] = recv_status.MPI_SOURCE;
370: lens[imdex] = n;
371: slen += n;
372: count--;
373: }
374: PetscFree(recv_waits);
375:
376: /* move the data into the send scatter */
377: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
378: count = 0;
379: for (i=0; i<nrecvs; i++) {
380: values = rvalues + i*nmax;
381: for (j=0; j<lens[i]; j++) {
382: lrows[count++] = values[j] - base;
383: }
384: }
385: PetscFree(rvalues);
386: PetscFree(lens);
387: PetscFree(owner);
388: PetscFree(nprocs);
389:
390: /* actually zap the local rows */
391: MatZeroRows(l->A,slen,lrows,diag);
392: PetscFree(lrows);
394: /* wait on sends */
395: if (nsends) {
396: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
397: MPI_Waitall(nsends,send_waits,send_status);
398: PetscFree(send_status);
399: }
400: PetscFree(send_waits);
401: PetscFree(svalues);
403: return(0);
404: }
408: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
409: {
410: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
414: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
415: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
416: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
417: return(0);
418: }
422: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
423: {
424: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
428: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
429: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
430: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
431: return(0);
432: }
436: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
437: {
438: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
440: PetscScalar zero = 0.0;
443: VecSet(yy,zero);
444: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
445: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
446: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
447: return(0);
448: }
452: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
453: {
454: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
458: VecCopy(yy,zz);
459: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
460: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
461: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
462: return(0);
463: }
467: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
468: {
469: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
470: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
472: PetscInt len,i,n,m = A->m,radd;
473: PetscScalar *x,zero = 0.0;
474:
476: VecSet(v,zero);
477: VecGetArray(v,&x);
478: VecGetSize(v,&n);
479: if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
480: len = PetscMin(a->A->m,a->A->n);
481: radd = a->rstart*m;
482: for (i=0; i<len; i++) {
483: x[i] = aloc->v[radd + i*m + i];
484: }
485: VecRestoreArray(v,&x);
486: return(0);
487: }
491: PetscErrorCode MatDestroy_MPIDense(Mat mat)
492: {
493: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498: #if defined(PETSC_USE_LOG)
499: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
500: #endif
501: MatStashDestroy_Private(&mat->stash);
502: PetscFree(mdn->rowners);
503: MatDestroy(mdn->A);
504: if (mdn->lvec) VecDestroy(mdn->lvec);
505: if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx);
506: if (mdn->factor) {
507: if (mdn->factor->temp) {PetscFree(mdn->factor->temp);}
508: if (mdn->factor->tag) {PetscFree(mdn->factor->tag);}
509: if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
510: PetscFree(mdn->factor);
511: }
512: PetscFree(mdn);
513: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
514: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
515: return(0);
516: }
520: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
521: {
522: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
526: if (mdn->size == 1) {
527: MatView(mdn->A,viewer);
528: }
529: else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
530: return(0);
531: }
535: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
536: {
537: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
538: PetscErrorCode ierr;
539: PetscMPIInt size = mdn->size,rank = mdn->rank;
540: PetscViewerType vtype;
541: PetscTruth iascii,isdraw;
542: PetscViewer sviewer;
543: PetscViewerFormat format;
546: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
547: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
548: if (iascii) {
549: PetscViewerGetType(viewer,&vtype);
550: PetscViewerGetFormat(viewer,&format);
551: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
552: MatInfo info;
553: MatGetInfo(mat,MAT_LOCAL,&info);
554: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
555: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
556: PetscViewerFlush(viewer);
557: VecScatterView(mdn->Mvctx,viewer);
558: return(0);
559: } else if (format == PETSC_VIEWER_ASCII_INFO) {
560: return(0);
561: }
562: } else if (isdraw) {
563: PetscDraw draw;
564: PetscTruth isnull;
566: PetscViewerDrawGetDraw(viewer,0,&draw);
567: PetscDrawIsNull(draw,&isnull);
568: if (isnull) return(0);
569: }
571: if (size == 1) {
572: MatView(mdn->A,viewer);
573: } else {
574: /* assemble the entire matrix onto first processor. */
575: Mat A;
576: PetscInt M = mat->M,N = mat->N,m,row,i,nz;
577: PetscInt *cols;
578: PetscScalar *vals;
580: MatCreate(mat->comm,&A);
581: if (!rank) {
582: MatSetSizes(A,M,N,M,N);
583: } else {
584: MatSetSizes(A,0,0,M,N);
585: }
586: /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
587: MatSetType(A,MATMPIDENSE);
588: MatMPIDenseSetPreallocation(A,PETSC_NULL);
589: PetscLogObjectParent(mat,A);
591: /* Copy the matrix ... This isn't the most efficient means,
592: but it's quick for now */
593: A->insertmode = INSERT_VALUES;
594: row = mdn->rstart; m = mdn->A->m;
595: for (i=0; i<m; i++) {
596: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
597: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
598: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
599: row++;
600: }
602: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
603: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
604: PetscViewerGetSingleton(viewer,&sviewer);
605: if (!rank) {
606: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
607: }
608: PetscViewerRestoreSingleton(viewer,&sviewer);
609: PetscViewerFlush(viewer);
610: MatDestroy(A);
611: }
612: return(0);
613: }
617: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
618: {
620: PetscTruth iascii,isbinary,isdraw,issocket;
621:
623:
624: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
625: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
626: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
627: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
629: if (iascii || issocket || isdraw) {
630: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
631: } else if (isbinary) {
632: MatView_MPIDense_Binary(mat,viewer);
633: } else {
634: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
635: }
636: return(0);
637: }
641: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
642: {
643: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
644: Mat mdn = mat->A;
646: PetscReal isend[5],irecv[5];
649: info->rows_global = (double)A->M;
650: info->columns_global = (double)A->N;
651: info->rows_local = (double)A->m;
652: info->columns_local = (double)A->N;
653: info->block_size = 1.0;
654: MatGetInfo(mdn,MAT_LOCAL,info);
655: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
656: isend[3] = info->memory; isend[4] = info->mallocs;
657: if (flag == MAT_LOCAL) {
658: info->nz_used = isend[0];
659: info->nz_allocated = isend[1];
660: info->nz_unneeded = isend[2];
661: info->memory = isend[3];
662: info->mallocs = isend[4];
663: } else if (flag == MAT_GLOBAL_MAX) {
664: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
665: info->nz_used = irecv[0];
666: info->nz_allocated = irecv[1];
667: info->nz_unneeded = irecv[2];
668: info->memory = irecv[3];
669: info->mallocs = irecv[4];
670: } else if (flag == MAT_GLOBAL_SUM) {
671: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
672: info->nz_used = irecv[0];
673: info->nz_allocated = irecv[1];
674: info->nz_unneeded = irecv[2];
675: info->memory = irecv[3];
676: info->mallocs = irecv[4];
677: }
678: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
679: info->fill_ratio_needed = 0;
680: info->factor_mallocs = 0;
681: return(0);
682: }
686: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
687: {
688: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
692: switch (op) {
693: case MAT_NO_NEW_NONZERO_LOCATIONS:
694: case MAT_YES_NEW_NONZERO_LOCATIONS:
695: case MAT_NEW_NONZERO_LOCATION_ERR:
696: case MAT_NEW_NONZERO_ALLOCATION_ERR:
697: case MAT_COLUMNS_SORTED:
698: case MAT_COLUMNS_UNSORTED:
699: MatSetOption(a->A,op);
700: break;
701: case MAT_ROW_ORIENTED:
702: a->roworiented = PETSC_TRUE;
703: MatSetOption(a->A,op);
704: break;
705: case MAT_ROWS_SORTED:
706: case MAT_ROWS_UNSORTED:
707: case MAT_YES_NEW_DIAGONALS:
708: case MAT_USE_HASH_TABLE:
709: PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));
710: break;
711: case MAT_COLUMN_ORIENTED:
712: a->roworiented = PETSC_FALSE;
713: MatSetOption(a->A,op);
714: break;
715: case MAT_IGNORE_OFF_PROC_ENTRIES:
716: a->donotstash = PETSC_TRUE;
717: break;
718: case MAT_NO_NEW_DIAGONALS:
719: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
720: case MAT_SYMMETRIC:
721: case MAT_STRUCTURALLY_SYMMETRIC:
722: case MAT_NOT_SYMMETRIC:
723: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
724: case MAT_HERMITIAN:
725: case MAT_NOT_HERMITIAN:
726: case MAT_SYMMETRY_ETERNAL:
727: case MAT_NOT_SYMMETRY_ETERNAL:
728: break;
729: default:
730: SETERRQ(PETSC_ERR_SUP,"unknown option");
731: }
732: return(0);
733: }
738: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
739: {
740: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
741: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
742: PetscScalar *l,*r,x,*v;
744: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
747: MatGetLocalSize(A,&s2,&s3);
748: if (ll) {
749: VecGetLocalSize(ll,&s2a);
750: if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
751: VecGetArray(ll,&l);
752: for (i=0; i<m; i++) {
753: x = l[i];
754: v = mat->v + i;
755: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
756: }
757: VecRestoreArray(ll,&l);
758: PetscLogFlops(n*m);
759: }
760: if (rr) {
761: VecGetSize(rr,&s3a);
762: if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
763: VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
764: VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
765: VecGetArray(mdn->lvec,&r);
766: for (i=0; i<n; i++) {
767: x = r[i];
768: v = mat->v + i*m;
769: for (j=0; j<m; j++) { (*v++) *= x;}
770: }
771: VecRestoreArray(mdn->lvec,&r);
772: PetscLogFlops(n*m);
773: }
774: return(0);
775: }
779: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
780: {
781: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
782: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
784: PetscInt i,j;
785: PetscReal sum = 0.0;
786: PetscScalar *v = mat->v;
789: if (mdn->size == 1) {
790: MatNorm(mdn->A,type,nrm);
791: } else {
792: if (type == NORM_FROBENIUS) {
793: for (i=0; i<mdn->A->n*mdn->A->m; i++) {
794: #if defined(PETSC_USE_COMPLEX)
795: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
796: #else
797: sum += (*v)*(*v); v++;
798: #endif
799: }
800: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
801: *nrm = sqrt(*nrm);
802: PetscLogFlops(2*mdn->A->n*mdn->A->m);
803: } else if (type == NORM_1) {
804: PetscReal *tmp,*tmp2;
805: PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
806: tmp2 = tmp + A->N;
807: PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
808: *nrm = 0.0;
809: v = mat->v;
810: for (j=0; j<mdn->A->n; j++) {
811: for (i=0; i<mdn->A->m; i++) {
812: tmp[j] += PetscAbsScalar(*v); v++;
813: }
814: }
815: MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
816: for (j=0; j<A->N; j++) {
817: if (tmp2[j] > *nrm) *nrm = tmp2[j];
818: }
819: PetscFree(tmp);
820: PetscLogFlops(A->n*A->m);
821: } else if (type == NORM_INFINITY) { /* max row norm */
822: PetscReal ntemp;
823: MatNorm(mdn->A,type,&ntemp);
824: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
825: } else {
826: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
827: }
828: }
829: return(0);
830: }
834: PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
835: {
836: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
837: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
838: Mat B;
839: PetscInt M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
841: PetscInt j,i;
842: PetscScalar *v;
845: if (!matout && M != N) {
846: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
847: }
848: MatCreate(A->comm,&B);
849: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);
850: MatSetType(B,A->type_name);
851: MatMPIDenseSetPreallocation(B,PETSC_NULL);
853: m = a->A->m; n = a->A->n; v = Aloc->v;
854: PetscMalloc(n*sizeof(PetscInt),&rwork);
855: for (j=0; j<n; j++) {
856: for (i=0; i<m; i++) rwork[i] = rstart + i;
857: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
858: v += m;
859: }
860: PetscFree(rwork);
861: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
862: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
863: if (matout) {
864: *matout = B;
865: } else {
866: MatHeaderCopy(A,B);
867: }
868: return(0);
869: }
871: #include petscblaslapack.h
874: PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
875: {
876: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
877: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
878: PetscScalar oalpha = alpha;
879: PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
883: BLASscal_(&nz,&oalpha,a->v,&one);
884: PetscLogFlops(nz);
885: return(0);
886: }
888: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
892: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
893: {
897: MatMPIDenseSetPreallocation(A,0);
898: return(0);
899: }
901: /* -------------------------------------------------------------------*/
902: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
903: MatGetRow_MPIDense,
904: MatRestoreRow_MPIDense,
905: MatMult_MPIDense,
906: /* 4*/ MatMultAdd_MPIDense,
907: MatMultTranspose_MPIDense,
908: MatMultTransposeAdd_MPIDense,
909: 0,
910: 0,
911: 0,
912: /*10*/ 0,
913: 0,
914: 0,
915: 0,
916: MatTranspose_MPIDense,
917: /*15*/ MatGetInfo_MPIDense,
918: 0,
919: MatGetDiagonal_MPIDense,
920: MatDiagonalScale_MPIDense,
921: MatNorm_MPIDense,
922: /*20*/ MatAssemblyBegin_MPIDense,
923: MatAssemblyEnd_MPIDense,
924: 0,
925: MatSetOption_MPIDense,
926: MatZeroEntries_MPIDense,
927: /*25*/ MatZeroRows_MPIDense,
928: 0,
929: 0,
930: 0,
931: 0,
932: /*30*/ MatSetUpPreallocation_MPIDense,
933: 0,
934: 0,
935: MatGetArray_MPIDense,
936: MatRestoreArray_MPIDense,
937: /*35*/ MatDuplicate_MPIDense,
938: 0,
939: 0,
940: 0,
941: 0,
942: /*40*/ 0,
943: MatGetSubMatrices_MPIDense,
944: 0,
945: MatGetValues_MPIDense,
946: 0,
947: /*45*/ 0,
948: MatScale_MPIDense,
949: 0,
950: 0,
951: 0,
952: /*50*/ 0,
953: 0,
954: 0,
955: 0,
956: 0,
957: /*55*/ 0,
958: 0,
959: 0,
960: 0,
961: 0,
962: /*60*/ MatGetSubMatrix_MPIDense,
963: MatDestroy_MPIDense,
964: MatView_MPIDense,
965: MatGetPetscMaps_Petsc,
966: 0,
967: /*65*/ 0,
968: 0,
969: 0,
970: 0,
971: 0,
972: /*70*/ 0,
973: 0,
974: 0,
975: 0,
976: 0,
977: /*75*/ 0,
978: 0,
979: 0,
980: 0,
981: 0,
982: /*80*/ 0,
983: 0,
984: 0,
985: 0,
986: /*84*/ MatLoad_MPIDense,
987: 0,
988: 0,
989: 0,
990: 0,
991: 0,
992: /*90*/ 0,
993: 0,
994: 0,
995: 0,
996: 0,
997: /*95*/ 0,
998: 0,
999: 0,
1000: 0};
1005: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1006: {
1007: Mat_MPIDense *a;
1011: mat->preallocated = PETSC_TRUE;
1012: /* Note: For now, when data is specified above, this assumes the user correctly
1013: allocates the local dense storage space. We should add error checking. */
1015: a = (Mat_MPIDense*)mat->data;
1016: MatCreate(PETSC_COMM_SELF,&a->A);
1017: MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);
1018: MatSetType(a->A,MATSEQDENSE);
1019: MatSeqDenseSetPreallocation(a->A,data);
1020: PetscLogObjectParent(mat,a->A);
1021: return(0);
1022: }
1025: /*MC
1026: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1028: Options Database Keys:
1029: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1031: Level: beginner
1033: .seealso: MatCreateMPIDense
1034: M*/
1039: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1040: {
1041: Mat_MPIDense *a;
1043: PetscInt i;
1046: PetscNew(Mat_MPIDense,&a);
1047: mat->data = (void*)a;
1048: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1049: mat->factor = 0;
1050: mat->mapping = 0;
1052: a->factor = 0;
1053: mat->insertmode = NOT_SET_VALUES;
1054: MPI_Comm_rank(mat->comm,&a->rank);
1055: MPI_Comm_size(mat->comm,&a->size);
1057: PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
1058: PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
1059: a->nvec = mat->n;
1061: /* the information in the maps duplicates the information computed below, eventually
1062: we should remove the duplicate information that is not contained in the maps */
1063: PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
1064: PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);
1066: /* build local table of row and column ownerships */
1067: PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);
1068: a->cowners = a->rowners + a->size + 1;
1069: PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1070: MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);
1071: a->rowners[0] = 0;
1072: for (i=2; i<=a->size; i++) {
1073: a->rowners[i] += a->rowners[i-1];
1074: }
1075: a->rstart = a->rowners[a->rank];
1076: a->rend = a->rowners[a->rank+1];
1077: MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);
1078: a->cowners[0] = 0;
1079: for (i=2; i<=a->size; i++) {
1080: a->cowners[i] += a->cowners[i-1];
1081: }
1083: /* build cache for off array entries formed */
1084: a->donotstash = PETSC_FALSE;
1085: MatStashCreate_Private(mat->comm,1,&mat->stash);
1087: /* stuff used for matrix vector multiply */
1088: a->lvec = 0;
1089: a->Mvctx = 0;
1090: a->roworiented = PETSC_TRUE;
1092: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1093: "MatGetDiagonalBlock_MPIDense",
1094: MatGetDiagonalBlock_MPIDense);
1095: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1096: "MatMPIDenseSetPreallocation_MPIDense",
1097: MatMPIDenseSetPreallocation_MPIDense);
1098: return(0);
1099: }
1102: /*MC
1103: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1105: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1106: and MATMPIDENSE otherwise.
1108: Options Database Keys:
1109: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1111: Level: beginner
1113: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1114: M*/
1119: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1120: {
1122: PetscMPIInt size;
1125: PetscObjectChangeTypeName((PetscObject)A,MATDENSE);
1126: MPI_Comm_size(A->comm,&size);
1127: if (size == 1) {
1128: MatSetType(A,MATSEQDENSE);
1129: } else {
1130: MatSetType(A,MATMPIDENSE);
1131: }
1132: return(0);
1133: }
1138: /*@C
1139: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1141: Not collective
1143: Input Parameters:
1144: . A - the matrix
1145: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1146: to control all matrix memory allocation.
1148: Notes:
1149: The dense format is fully compatible with standard Fortran 77
1150: storage by columns.
1152: The data input variable is intended primarily for Fortran programmers
1153: who wish to allocate their own matrix memory space. Most users should
1154: set data=PETSC_NULL.
1156: Level: intermediate
1158: .keywords: matrix,dense, parallel
1160: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1161: @*/
1162: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1163: {
1164: PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1167: PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1168: if (f) {
1169: (*f)(mat,data);
1170: }
1171: return(0);
1172: }
1176: /*@C
1177: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1179: Collective on MPI_Comm
1181: Input Parameters:
1182: + comm - MPI communicator
1183: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1184: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1185: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1186: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1187: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1188: to control all matrix memory allocation.
1190: Output Parameter:
1191: . A - the matrix
1193: Notes:
1194: The dense format is fully compatible with standard Fortran 77
1195: storage by columns.
1197: The data input variable is intended primarily for Fortran programmers
1198: who wish to allocate their own matrix memory space. Most users should
1199: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1201: The user MUST specify either the local or global matrix dimensions
1202: (possibly both).
1204: Level: intermediate
1206: .keywords: matrix,dense, parallel
1208: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1209: @*/
1210: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1211: {
1213: PetscMPIInt size;
1216: MatCreate(comm,A);
1217: MatSetSizes(*A,m,n,M,N);
1218: MPI_Comm_size(comm,&size);
1219: if (size > 1) {
1220: MatSetType(*A,MATMPIDENSE);
1221: MatMPIDenseSetPreallocation(*A,data);
1222: } else {
1223: MatSetType(*A,MATSEQDENSE);
1224: MatSeqDenseSetPreallocation(*A,data);
1225: }
1226: return(0);
1227: }
1231: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1232: {
1233: Mat mat;
1234: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1238: *newmat = 0;
1239: MatCreate(A->comm,&mat);
1240: MatSetSizes(mat,A->m,A->n,A->M,A->N);
1241: MatSetType(mat,A->type_name);
1242: PetscNew(Mat_MPIDense,&a);
1243: mat->data = (void*)a;
1244: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1245: mat->factor = A->factor;
1246: mat->assembled = PETSC_TRUE;
1247: mat->preallocated = PETSC_TRUE;
1249: a->rstart = oldmat->rstart;
1250: a->rend = oldmat->rend;
1251: a->size = oldmat->size;
1252: a->rank = oldmat->rank;
1253: mat->insertmode = NOT_SET_VALUES;
1254: a->nvec = oldmat->nvec;
1255: a->donotstash = oldmat->donotstash;
1256: PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);
1257: PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1258: PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));
1259: MatStashCreate_Private(A->comm,1,&mat->stash);
1261: MatSetUpMultiply_MPIDense(mat);
1262: MatDuplicate(oldmat->A,cpvalues,&a->A);
1263: PetscLogObjectParent(mat,a->A);
1264: *newmat = mat;
1265: return(0);
1266: }
1268: #include petscsys.h
1272: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1273: {
1275: PetscMPIInt rank,size;
1276: PetscInt *rowners,i,m,nz,j;
1277: PetscScalar *array,*vals,*vals_ptr;
1278: MPI_Status status;
1281: MPI_Comm_rank(comm,&rank);
1282: MPI_Comm_size(comm,&size);
1284: /* determine ownership of all rows */
1285: m = M/size + ((M % size) > rank);
1286: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1287: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1288: rowners[0] = 0;
1289: for (i=2; i<=size; i++) {
1290: rowners[i] += rowners[i-1];
1291: }
1293: MatCreate(comm,newmat);
1294: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1295: MatSetType(*newmat,type);
1296: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1297: MatGetArray(*newmat,&array);
1299: if (!rank) {
1300: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1302: /* read in my part of the matrix numerical values */
1303: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1304:
1305: /* insert into matrix-by row (this is why cannot directly read into array */
1306: vals_ptr = vals;
1307: for (i=0; i<m; i++) {
1308: for (j=0; j<N; j++) {
1309: array[i + j*m] = *vals_ptr++;
1310: }
1311: }
1313: /* read in other processors and ship out */
1314: for (i=1; i<size; i++) {
1315: nz = (rowners[i+1] - rowners[i])*N;
1316: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1317: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1318: }
1319: } else {
1320: /* receive numeric values */
1321: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1323: /* receive message of values*/
1324: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1326: /* insert into matrix-by row (this is why cannot directly read into array */
1327: vals_ptr = vals;
1328: for (i=0; i<m; i++) {
1329: for (j=0; j<N; j++) {
1330: array[i + j*m] = *vals_ptr++;
1331: }
1332: }
1333: }
1334: PetscFree(rowners);
1335: PetscFree(vals);
1336: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1337: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1338: return(0);
1339: }
1343: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1344: {
1345: Mat A;
1346: PetscScalar *vals,*svals;
1347: MPI_Comm comm = ((PetscObject)viewer)->comm;
1348: MPI_Status status;
1349: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1350: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1351: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1352: PetscInt i,nz,j,rstart,rend;
1353: int fd;
1357: MPI_Comm_size(comm,&size);
1358: MPI_Comm_rank(comm,&rank);
1359: if (!rank) {
1360: PetscViewerBinaryGetDescriptor(viewer,&fd);
1361: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1362: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1363: }
1365: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1366: M = header[1]; N = header[2]; nz = header[3];
1368: /*
1369: Handle case where matrix is stored on disk as a dense matrix
1370: */
1371: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1372: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1373: return(0);
1374: }
1376: /* determine ownership of all rows */
1377: m = M/size + ((M % size) > rank);
1378: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1379: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1380: rowners[0] = 0;
1381: for (i=2; i<=size; i++) {
1382: rowners[i] += rowners[i-1];
1383: }
1384: rstart = rowners[rank];
1385: rend = rowners[rank+1];
1387: /* distribute row lengths to all processors */
1388: PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);
1389: offlens = ourlens + (rend-rstart);
1390: if (!rank) {
1391: PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1392: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1393: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1394: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1395: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1396: PetscFree(sndcounts);
1397: } else {
1398: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1399: }
1401: if (!rank) {
1402: /* calculate the number of nonzeros on each processor */
1403: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1404: PetscMemzero(procsnz,size*sizeof(PetscInt));
1405: for (i=0; i<size; i++) {
1406: for (j=rowners[i]; j< rowners[i+1]; j++) {
1407: procsnz[i] += rowlengths[j];
1408: }
1409: }
1410: PetscFree(rowlengths);
1412: /* determine max buffer needed and allocate it */
1413: maxnz = 0;
1414: for (i=0; i<size; i++) {
1415: maxnz = PetscMax(maxnz,procsnz[i]);
1416: }
1417: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1419: /* read in my part of the matrix column indices */
1420: nz = procsnz[0];
1421: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1422: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1424: /* read in every one elses and ship off */
1425: for (i=1; i<size; i++) {
1426: nz = procsnz[i];
1427: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1428: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1429: }
1430: PetscFree(cols);
1431: } else {
1432: /* determine buffer space needed for message */
1433: nz = 0;
1434: for (i=0; i<m; i++) {
1435: nz += ourlens[i];
1436: }
1437: PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);
1439: /* receive message of column indices*/
1440: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1441: MPI_Get_count(&status,MPIU_INT,&maxnz);
1442: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1443: }
1445: /* loop over local rows, determining number of off diagonal entries */
1446: PetscMemzero(offlens,m*sizeof(PetscInt));
1447: jj = 0;
1448: for (i=0; i<m; i++) {
1449: for (j=0; j<ourlens[i]; j++) {
1450: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1451: jj++;
1452: }
1453: }
1455: /* create our matrix */
1456: for (i=0; i<m; i++) {
1457: ourlens[i] -= offlens[i];
1458: }
1459: MatCreate(comm,newmat);
1460: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1461: MatSetType(*newmat,type);
1462: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1463: A = *newmat;
1464: for (i=0; i<m; i++) {
1465: ourlens[i] += offlens[i];
1466: }
1468: if (!rank) {
1469: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1471: /* read in my part of the matrix numerical values */
1472: nz = procsnz[0];
1473: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1474:
1475: /* insert into matrix */
1476: jj = rstart;
1477: smycols = mycols;
1478: svals = vals;
1479: for (i=0; i<m; i++) {
1480: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1481: smycols += ourlens[i];
1482: svals += ourlens[i];
1483: jj++;
1484: }
1486: /* read in other processors and ship out */
1487: for (i=1; i<size; i++) {
1488: nz = procsnz[i];
1489: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1490: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1491: }
1492: PetscFree(procsnz);
1493: } else {
1494: /* receive numeric values */
1495: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
1497: /* receive message of values*/
1498: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1499: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1500: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1502: /* insert into matrix */
1503: jj = rstart;
1504: smycols = mycols;
1505: svals = vals;
1506: for (i=0; i<m; i++) {
1507: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1508: smycols += ourlens[i];
1509: svals += ourlens[i];
1510: jj++;
1511: }
1512: }
1513: PetscFree(ourlens);
1514: PetscFree(vals);
1515: PetscFree(mycols);
1516: PetscFree(rowners);
1518: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1519: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1520: return(0);
1521: }