Actual source code: mpirowbs.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/rowbs/mpi/mpirowbs.h
5: #define CHUNCKSIZE_LOCAL 10
9: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
10: {
14: if (v) {
15: #if defined(PETSC_USE_LOG)
16: int len = -n*(sizeof(int)+sizeof(PetscScalar));
17: #endif
18: PetscFree(v);
19: PetscLogObjectMemory(A,len);
20: }
21: return(0);
22: }
26: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
27: {
29: int len;
32: if (!n) {
33: *i = 0; *v = 0;
34: } else {
35: len = n*(sizeof(int) + sizeof(PetscScalar));
36: PetscMalloc(len,v);
37: PetscLogObjectMemory(A,len);
38: *i = (int*)(*v + n);
39: }
40: return(0);
41: }
45: PetscErrorCode MatScale_MPIRowbs(Mat inA,PetscScalar alpha)
46: {
47: Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
48: BSspmat *A = a->A;
49: BSsprow *vs;
50: PetscScalar *ap;
51: int i,m = inA->m,nrow,j;
55: for (i=0; i<m; i++) {
56: vs = A->rows[i];
57: nrow = vs->length;
58: ap = vs->nz;
59: for (j=0; j<nrow; j++) {
60: ap[j] *= alpha;
61: }
62: }
63: PetscLogFlops(a->nz);
64: return(0);
65: }
67: /* ----------------------------------------------------------------- */
70: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
71: {
72: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
74: int i,len,m = A->m,*tnnz;
75: BSspmat *bsmat;
76: BSsprow *vs;
79: PetscMalloc((m+1)*sizeof(int),&tnnz);
80: if (!nnz) {
81: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
82: if (nz <= 0) nz = 1;
83: for (i=0; i<m; i++) tnnz[i] = nz;
84: nz = nz*m;
85: } else {
86: nz = 0;
87: for (i=0; i<m; i++) {
88: if (nnz[i] <= 0) tnnz[i] = 1;
89: else tnnz[i] = nnz[i];
90: nz += tnnz[i];
91: }
92: }
94: /* Allocate BlockSolve matrix context */
95: PetscNew(BSspmat,&bsif->A);
96: bsmat = bsif->A;
97: BSset_mat_icc_storage(bsmat,PETSC_FALSE);
98: BSset_mat_symmetric(bsmat,PETSC_FALSE);
99: len = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
100: PetscMalloc(len,&bsmat->rows);
101: bsmat->num_rows = m;
102: bsmat->global_num_rows = A->M;
103: bsmat->map = bsif->bsmap;
104: vs = (BSsprow*)(bsmat->rows + m);
105: for (i=0; i<m; i++) {
106: bsmat->rows[i] = vs;
107: bsif->imax[i] = tnnz[i];
108: vs->diag_ind = -1;
109: MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
110: /* put zero on diagonal */
111: /*vs->length = 1;
112: vs->col[0] = i + bsif->rstart;
113: vs->nz[0] = 0.0;*/
114: vs->length = 0;
115: vs++;
116: }
117: PetscLogObjectMemory(A,sizeof(BSspmat) + len);
118: bsif->nz = 0;
119: bsif->maxnz = nz;
120: bsif->sorted = 0;
121: bsif->roworiented = PETSC_TRUE;
122: bsif->nonew = 0;
123: bsif->bs_color_single = 0;
125: PetscFree(tnnz);
126: return(0);
127: }
131: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
132: {
133: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
134: BSspmat *A = mat->A;
135: BSsprow *vs;
137: int *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
138: int *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
139: PetscScalar *ap,value;
142: for (k=0; k<m; k++) { /* loop over added rows */
143: row = im[k];
144: if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
145: if (row >= AA->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->m-1);
146: vs = A->rows[row];
147: ap = vs->nz; rp = vs->col;
148: rmax = imax[row]; nrow = vs->length;
149: a = 0;
150: for (l=0; l<n; l++) { /* loop over added columns */
151: if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
152: if (in[l] >= AA->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->N-1);
153: col = in[l]; value = *v++;
154: if (!sorted) a = 0; b = nrow;
155: while (b-a > 5) {
156: t = (b+a)/2;
157: if (rp[t] > col) b = t;
158: else a = t;
159: }
160: for (i=a; i<b; i++) {
161: if (rp[i] > col) break;
162: if (rp[i] == col) {
163: if (addv == ADD_VALUES) ap[i] += value;
164: else ap[i] = value;
165: goto noinsert;
166: }
167: }
168: if (nonew) goto noinsert;
169: if (nrow >= rmax) {
170: /* there is no extra room in row, therefore enlarge */
171: int *itemp,*iout,*iin = vs->col;
172: PetscScalar *vout,*vin = vs->nz,*vtemp;
174: /* malloc new storage space */
175: imax[row] += CHUNCKSIZE_LOCAL;
176: MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
177: vout = vtemp; iout = itemp;
178: for (ii=0; ii<i; ii++) {
179: vout[ii] = vin[ii];
180: iout[ii] = iin[ii];
181: }
182: vout[i] = value;
183: iout[i] = col;
184: for (ii=i+1; ii<=nrow; ii++) {
185: vout[ii] = vin[ii-1];
186: iout[ii] = iin[ii-1];
187: }
188: /* free old row storage */
189: if (rmax > 0) {
190: MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
191: }
192: vs->col = iout; vs->nz = vout;
193: rmax = imax[row];
194: mat->maxnz += CHUNCKSIZE_LOCAL;
195: mat->reallocs++;
196: } else {
197: /* shift higher columns over to make room for newie */
198: for (ii=nrow-1; ii>=i; ii--) {
199: rp[ii+1] = rp[ii];
200: ap[ii+1] = ap[ii];
201: }
202: rp[i] = col;
203: ap[i] = value;
204: }
205: nrow++;
206: mat->nz++;
207: AA->same_nonzero = PETSC_FALSE;
208: noinsert:;
209: a = i + 1;
210: }
211: vs->length = nrow;
212: }
213: return(0);
214: }
219: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
220: {
222: return(0);
223: }
227: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
228: {
229: Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
230: BSspmat *A = a->A;
231: BSsprow *vs;
232: int i,j,rstart = a->rstart;
235: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
237: /* Mark location of diagonal */
238: for (i=0; i<AA->m; i++) {
239: vs = A->rows[i];
240: for (j=0; j<vs->length; j++) {
241: if (vs->col[j] == i + rstart) {
242: vs->diag_ind = j;
243: break;
244: }
245: }
246: if (vs->diag_ind == -1) {
247: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
248: }
249: }
250: return(0);
251: }
255: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,PetscInt N,const PetscInt rz[],PetscScalar diag)
256: {
257: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
258: BSspmat *l = a->A;
260: int i,m = A->m - 1,col,base=a->rowners[a->rank];
263: if (a->keepzeroedrows) {
264: for (i=0; i<N; i++) {
265: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
266: PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
267: if (diag != 0.0) {
268: col=rz[i]+base;
269: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
270: }
271: }
272: } else {
273: if (diag != 0.0) {
274: for (i=0; i<N; i++) {
275: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
276: if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
277: l->rows[rz[i]]->length = 1;
278: l->rows[rz[i]]->nz[0] = diag;
279: l->rows[rz[i]]->col[0] = a->rstart + rz[i];
280: } else {
281: col=rz[i]+base;
282: MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
283: }
284: }
285: } else {
286: for (i=0; i<N; i++) {
287: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
288: l->rows[rz[i]]->length = 0;
289: }
290: }
291: A->same_nonzero = PETSC_FALSE;
292: }
293: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
294: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
295: return(0);
296: }
300: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
301: {
302: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
303: BSsprow *vs,**rs;
304: PetscScalar *xv;
305: PetscReal sum = 0.0;
307: int *xi,nz,i,j;
310: rs = mat->A->rows;
311: if (type == NORM_FROBENIUS) {
312: for (i=0; i<A->m; i++) {
313: vs = *rs++;
314: nz = vs->length;
315: xv = vs->nz;
316: while (nz--) {
317: #if defined(PETSC_USE_COMPLEX)
318: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
319: #else
320: sum += (*xv)*(*xv); xv++;
321: #endif
322: }
323: }
324: *norm = sqrt(sum);
325: } else if (type == NORM_1) { /* max column norm */
326: PetscReal *tmp;
327: PetscMalloc(A->n*sizeof(PetscReal),&tmp);
328: PetscMemzero(tmp,A->n*sizeof(PetscReal));
329: *norm = 0.0;
330: for (i=0; i<A->m; i++) {
331: vs = *rs++;
332: nz = vs->length;
333: xi = vs->col;
334: xv = vs->nz;
335: while (nz--) {
336: tmp[*xi] += PetscAbsScalar(*xv);
337: xi++; xv++;
338: }
339: }
340: for (j=0; j<A->n; j++) {
341: if (tmp[j] > *norm) *norm = tmp[j];
342: }
343: PetscFree(tmp);
344: } else if (type == NORM_INFINITY) { /* max row norm */
345: *norm = 0.0;
346: for (i=0; i<A->m; i++) {
347: vs = *rs++;
348: nz = vs->length;
349: xv = vs->nz;
350: sum = 0.0;
351: while (nz--) {
352: sum += PetscAbsScalar(*xv); xv++;
353: }
354: if (sum > *norm) *norm = sum;
355: }
356: } else {
357: SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
358: }
359: return(0);
360: }
362: /* ----------------------------------------------------------------- */
366: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
367: {
368: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
370: int i,j,row,col,rstart = a->rstart,rend = a->rend;
371: PetscTruth roworiented = a->roworiented;
374: /* Note: There's no need to "unscale" the matrix, since scaling is
375: confined to a->pA, and we're working with a->A here */
376: for (i=0; i<m; i++) {
377: if (im[i] < 0) continue;
378: if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
379: if (im[i] >= rstart && im[i] < rend) {
380: row = im[i] - rstart;
381: for (j=0; j<n; j++) {
382: if (in[j] < 0) continue;
383: if (in[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);
384: if (in[j] >= 0 && in[j] < mat->N){
385: col = in[j];
386: if (roworiented) {
387: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
388: } else {
389: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
390: }
391: } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
392: }
393: } else {
394: if (!a->donotstash) {
395: if (roworiented) {
396: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
397: } else {
398: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
399: }
400: }
401: }
402: }
403: return(0);
404: }
408: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
409: {
410: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
411: MPI_Comm comm = mat->comm;
413: int nstash,reallocs;
414: InsertMode addv;
417: /* Note: There's no need to "unscale" the matrix, since scaling is
418: confined to a->pA, and we're working with a->A here */
420: /* make sure all processors are either in INSERTMODE or ADDMODE */
421: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
422: if (addv == (ADD_VALUES|INSERT_VALUES)) {
423: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
424: }
425: mat->insertmode = addv; /* in case this processor had no cache */
427: MatStashScatterBegin_Private(&mat->stash,a->rowners);
428: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
429: PetscLogInfo((0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs));
430: return(0);
431: }
435: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
436: {
437: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
439: int i,j;
440: PetscTruth iascii;
441: BSspmat *A = a->A;
442: BSsprow **rs = A->rows;
443: PetscViewerFormat format;
446: PetscViewerGetFormat(viewer,&format);
447: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
449: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
450: int ind_l,ind_g,clq_l,clq_g,color;
451: ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
452: ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
453: clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
454: clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
455: color = BSnum_colors(a->pA);CHKERRBS(0);
456: PetscViewerASCIIPrintf(viewer," %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
457: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
458: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
459: for (i=0; i<A->num_rows; i++) {
460: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
461: for (j=0; j<rs[i]->length; j++) {
462: if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
463: }
464: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
465: }
466: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
467: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
468: } else {
469: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
470: for (i=0; i<A->num_rows; i++) {
471: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
472: for (j=0; j<rs[i]->length; j++) {
473: PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
474: }
475: PetscViewerASCIISynchronizedPrintf(viewer,"\n");
476: }
477: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
478: }
479: PetscViewerFlush(viewer);
480: return(0);
481: }
485: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
486: {
487: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
489: PetscMPIInt rank,size;
490: PetscInt i,M,m,*sbuff,*rowlengths;
491: PetscInt *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
492: BSspmat *A = a->A;
493: BSsprow **rs = A->rows;
494: MPI_Comm comm = mat->comm;
495: MPI_Status status;
496: PetscScalar *vals;
497: MatInfo info;
500: MPI_Comm_size(comm,&size);
501: MPI_Comm_rank(comm,&rank);
503: M = mat->M; m = mat->m;
504: /* First gather together on the first processor the lengths of
505: each row, and write them out to the file */
506: PetscMalloc(m*sizeof(int),&sbuff);
507: for (i=0; i<A->num_rows; i++) {
508: sbuff[i] = rs[i]->length;
509: }
510: MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
511: if (!rank) {
512: PetscViewerBinaryGetDescriptor(viewer,&fd);
513: PetscMalloc((4+M)*sizeof(int),&rowlengths);
514: PetscMalloc(size*sizeof(int),&recvcts);
515: recvdisp = a->rowners;
516: for (i=0; i<size; i++) {
517: recvcts[i] = recvdisp[i+1] - recvdisp[i];
518: }
519: /* first four elements of rowlength are the header */
520: rowlengths[0] = mat->cookie;
521: rowlengths[1] = mat->M;
522: rowlengths[2] = mat->N;
523: rowlengths[3] = (int)info.nz_used;
524: MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
525: PetscFree(sbuff);
526: PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
527: /* count the number of nonzeros on each processor */
528: PetscMemzero(recvcts,size*sizeof(int));
529: for (i=0; i<size; i++) {
530: for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
531: recvcts[i] += rowlengths[j+3];
532: }
533: }
534: /* allocate buffer long enough to hold largest one */
535: maxnz = 0;
536: for (i=0; i<size; i++) {
537: maxnz = PetscMax(maxnz,recvcts[i]);
538: }
539: PetscFree(rowlengths);
540: PetscFree(recvcts);
541: PetscMalloc(maxnz*sizeof(int),&cols);
543: /* binary store column indices for 0th processor */
544: nz = 0;
545: for (i=0; i<A->num_rows; i++) {
546: for (j=0; j<rs[i]->length; j++) {
547: cols[nz++] = rs[i]->col[j];
548: }
549: }
550: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
552: /* receive and store column indices for all other processors */
553: for (i=1; i<size; i++) {
554: /* should tell processor that I am now ready and to begin the send */
555: MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
556: MPI_Get_count(&status,MPI_INT,&nz);
557: PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
558: }
559: PetscFree(cols);
560: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
562: /* binary store values for 0th processor */
563: nz = 0;
564: for (i=0; i<A->num_rows; i++) {
565: for (j=0; j<rs[i]->length; j++) {
566: vals[nz++] = rs[i]->nz[j];
567: }
568: }
569: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
571: /* receive and store nonzeros for all other processors */
572: for (i=1; i<size; i++) {
573: /* should tell processor that I am now ready and to begin the send */
574: MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
575: MPI_Get_count(&status,MPIU_SCALAR,&nz);
576: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
577: }
578: PetscFree(vals);
579: } else {
580: MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
581: PetscFree(sbuff);
583: /* count local nonzeros */
584: nz = 0;
585: for (i=0; i<A->num_rows; i++) {
586: for (j=0; j<rs[i]->length; j++) {
587: nz++;
588: }
589: }
590: /* copy into buffer column indices */
591: PetscMalloc(nz*sizeof(int),&cols);
592: nz = 0;
593: for (i=0; i<A->num_rows; i++) {
594: for (j=0; j<rs[i]->length; j++) {
595: cols[nz++] = rs[i]->col[j];
596: }
597: }
598: /* send */ /* should wait until processor zero tells me to go */
599: MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
600: PetscFree(cols);
602: /* copy into buffer column values */
603: PetscMalloc(nz*sizeof(PetscScalar),&vals);
604: nz = 0;
605: for (i=0; i<A->num_rows; i++) {
606: for (j=0; j<rs[i]->length; j++) {
607: vals[nz++] = rs[i]->nz[j];
608: }
609: }
610: /* send */ /* should wait until processor zero tells me to go */
611: MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
612: PetscFree(vals);
613: }
615: return(0);
616: }
620: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
621: {
622: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
624: PetscTruth iascii,isbinary;
627: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
628: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
629: if (!bsif->blocksolveassembly) {
630: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
631: }
632: if (iascii) {
633: MatView_MPIRowbs_ASCII(mat,viewer);
634: } else if (isbinary) {
635: MatView_MPIRowbs_Binary(mat,viewer);
636: } else {
637: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
638: }
639: return(0);
640: }
641:
644: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
645: {
646: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
647: BSspmat *A = a->A;
648: BSsprow *vs;
649: int size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
650: int msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
652: int ctr_j,*sbuf1_j,k;
653: PetscScalar val=0.0;
654: MPI_Comm comm;
655: MPI_Request *s_waits1,*r_waits1;
656: MPI_Status *s_status,*r_status;
659: comm = mat->comm;
660: tag = mat->tag;
661: size = a->size;
662: rank = a->rank;
663: M = mat->M;
664: rstart = a->rstart;
666: PetscMalloc(M*sizeof(int),&rtable);
667: /* Create hash table for the mapping :row -> proc */
668: for (i=0,j=0; i<size; i++) {
669: len = a->rowners[i+1];
670: for (; j<len; j++) {
671: rtable[j] = i;
672: }
673: }
675: /* Evaluate communication - mesg to whom, length of mesg, and buffer space
676: required. Based on this, buffers are allocated, and data copied into them. */
677: PetscMalloc(size*4*sizeof(int),&w1);/* mesg size */
678: w3 = w1 + 2*size; /* no of IS that needs to be sent to proc i */
679: w4 = w3 + size; /* temp work space used in determining w1, w3 */
680: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */
682: for (i=0; i<mat->m; i++) {
683: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
684: vs = A->rows[i];
685: for (j=0; j<vs->length; j++) {
686: proc = rtable[vs->col[j]];
687: w4[proc]++;
688: }
689: for (j=0; j<size; j++) {
690: if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
691: }
692: }
693:
694: nrqs = 0; /* number of outgoing messages */
695: msz = 0; /* total mesg length (for all proc */
696: w1[2*rank] = 0; /* no mesg sent to itself */
697: w3[rank] = 0;
698: for (i=0; i<size; i++) {
699: if (w1[2*i]) {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
700: }
701: /* pa - is list of processors to communicate with */
702: PetscMalloc((nrqs+1)*sizeof(int),&pa);
703: for (i=0,j=0; i<size; i++) {
704: if (w1[2*i]) {pa[j] = i; j++;}
705: }
707: /* Each message would have a header = 1 + 2*(no of ROWS) + data */
708: for (i=0; i<nrqs; i++) {
709: j = pa[i];
710: w1[2*j] += w1[2*j+1] + 2*w3[j];
711: msz += w1[2*j];
712: }
713:
714: /* Do a global reduction to determine how many messages to expect */
715: PetscMaxSum(comm,w1,&bsz,&nrqr);
717: /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
718: len = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
719: PetscMalloc(len,&rbuf1);
720: rbuf1[0] = (int*)(rbuf1 + nrqr);
721: for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;
723: /* Post the receives */
724: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
725: for (i=0; i<nrqr; ++i){
726: MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
727: }
728:
729: /* Allocate Memory for outgoing messages */
730: len = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
731: PetscMalloc(len,&sbuf1);
732: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
733: PetscMemzero(sbuf1,2*size*sizeof(int*));
734: tmp = (int*)(sbuf1 + 2*size);
735: ctr = tmp + msz;
737: {
738: int *iptr = tmp,ict = 0;
739: for (i=0; i<nrqs; i++) {
740: j = pa[i];
741: iptr += ict;
742: sbuf1[j] = iptr;
743: ict = w1[2*j];
744: }
745: }
747: /* Form the outgoing messages */
748: /* Clean up the header space */
749: for (i=0; i<nrqs; i++) {
750: j = pa[i];
751: sbuf1[j][0] = 0;
752: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
753: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
754: }
756: /* Parse the matrix and copy the data into sbuf1 */
757: for (i=0; i<mat->m; i++) {
758: PetscMemzero(ctr,size*sizeof(int));
759: vs = A->rows[i];
760: for (j=0; j<vs->length; j++) {
761: col = vs->col[j];
762: proc = rtable[col];
763: if (proc != rank) { /* copy to the outgoing buffer */
764: ctr[proc]++;
765: *ptr[proc] = col;
766: ptr[proc]++;
767: } else {
768: row = col - rstart;
769: col = i + rstart;
770: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
771: }
772: }
773: /* Update the headers for the current row */
774: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
775: if ((ctr_j = ctr[j])) {
776: sbuf1_j = sbuf1[j];
777: k = ++sbuf1_j[0];
778: sbuf1_j[2*k] = ctr_j;
779: sbuf1_j[2*k-1] = i + rstart;
780: }
781: }
782: }
783: /* Check Validity of the outgoing messages */
784: {
785: int sum;
786: for (i=0 ; i<nrqs ; i++) {
787: j = pa[i];
788: if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
789: }
791: for (i=0 ; i<nrqs ; i++) {
792: j = pa[i];
793: sum = 1;
794: for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
795: if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
796: }
797: }
798:
799: /* Now post the sends */
800: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
801: for (i=0; i<nrqs; ++i) {
802: j = pa[i];
803: MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
804: }
805:
806: /* Receive messages*/
807: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
808: for (i=0; i<nrqr; ++i) {
809: MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
810: /* Process the Message */
811: {
812: int *rbuf1_i,n_row,ct1;
814: rbuf1_i = rbuf1[idx];
815: n_row = rbuf1_i[0];
816: ct1 = 2*n_row+1;
817: val = 0.0;
818: /* Optimise this later */
819: for (j=1; j<=n_row; j++) {
820: col = rbuf1_i[2*j-1];
821: for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
822: row = rbuf1_i[ct1] - rstart;
823: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
824: }
825: }
826: }
827: }
829: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
830: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}
832: PetscFree(rtable);
833: PetscFree(w1);
834: PetscFree(pa);
835: PetscFree(rbuf1);
836: PetscFree(sbuf1);
837: PetscFree(r_waits1);
838: PetscFree(s_waits1);
839: PetscFree(r_status);
840: PetscFree(s_status);
841: return(0);
842: }
844: /*
845: This does the BlockSolve portion of the matrix assembly.
846: It is provided in a seperate routine so that users can
847: operate on the matrix (using MatScale(), MatShift() etc.) after
848: the matrix has been assembled but before BlockSolve has sucked it
849: in and devoured it.
850: */
853: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
854: {
855: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
857: int ldim,low,high,i;
858: PetscScalar *diag;
861: if ((mat->was_assembled) && (!mat->same_nonzero)) { /* Free the old info */
862: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
863: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
864: }
866: if ((!mat->same_nonzero) || (!mat->was_assembled)) {
867: /* Indicates bypassing cliques in coloring */
868: if (a->bs_color_single) {
869: BSctx_set_si(a->procinfo,100);
870: }
871: /* Form permuted matrix for efficient parallel execution */
872: a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
873: /* Set up the communication */
874: a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
875: } else {
876: /* Repermute the matrix */
877: BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
878: }
880: /* Symmetrically scale the matrix by the diagonal */
881: BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);
883: /* Store inverse of square root of permuted diagonal scaling matrix */
884: VecGetLocalSize(a->diag,&ldim);
885: VecGetOwnershipRange(a->diag,&low,&high);
886: VecGetArray(a->diag,&diag);
887: for (i=0; i<ldim; i++) {
888: if (a->pA->scale_diag[i] != 0.0) {
889: diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
890: } else {
891: diag[i] = 1.0;
892: }
893: }
894: VecRestoreArray(a->diag,&diag);
895: a->assembled_icc_storage = a->A->icc_storage;
896: a->blocksolveassembly = 1;
897: mat->was_assembled = PETSC_TRUE;
898: mat->same_nonzero = PETSC_TRUE;
899: PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assembly\n"));
900: return(0);
901: }
905: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
906: {
907: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
909: int i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
910: PetscScalar *vals,val;
911: InsertMode addv = mat->insertmode;
914: while (1) {
915: MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
916: if (!flg) break;
917:
918: for (i=0; i<n;) {
919: /* Now identify the consecutive vals belonging to the same row */
920: for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
921: if (j < n) ncols = j-i;
922: else ncols = n-i;
923: /* Now assemble all these values with a single function call */
924: MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
925: i = j;
926: }
927: }
928: MatStashScatterEnd_Private(&mat->stash);
930: rstart = a->rstart;
931: nzcount = a->nz; /* This is the number of nonzeros entered by the user */
932: /* BlockSolve requires that the matrix is structurally symmetric */
933: if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
934: MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
935: }
936:
937: /* BlockSolve requires that all the diagonal elements are set */
938: val = 0.0;
939: for (i=0; i<mat->m; i++) {
940: row = i; col = i + rstart;
941: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
942: }
943:
944: MatAssemblyBegin_MPIRowbs_local(mat,mode);
945: MatAssemblyEnd_MPIRowbs_local(mat,mode);
946:
947: a->blocksolveassembly = 0;
948: PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->m,mat->n,a->maxnz-a->nz,a->nz));
949: PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount));
950: PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %d\n",a->reallocs));
951: return(0);
952: }
956: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
957: {
958: Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
959: BSspmat *A = l->A;
960: BSsprow *vs;
961: int i,j;
964: for (i=0; i <mat->m; i++) {
965: vs = A->rows[i];
966: for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
967: }
968: return(0);
969: }
971: /* the code does not do the diagonal entries correctly unless the
972: matrix is square and the column and row owerships are identical.
973: This is a BUG.
974: */
978: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
979: {
980: Mat_MPIRowbs *l = (Mat_MPIRowbs*)A->data;
982: int i,*owners = l->rowners,size = l->size;
983: int *nprocs,j,idx,nsends;
984: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
985: int *rvalues,tag = A->tag,count,base,slen,n,*source;
986: int *lens,imdex,*lrows,*values;
987: MPI_Comm comm = A->comm;
988: MPI_Request *send_waits,*recv_waits;
989: MPI_Status recv_status,*send_status;
990: PetscTruth found;
993: /* first count number of contributors to each processor */
994: PetscMalloc(2*size*sizeof(int),&nprocs);
995: PetscMemzero(nprocs,2*size*sizeof(int));
996: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
997: for (i=0; i<N; i++) {
998: idx = rows[i];
999: found = PETSC_FALSE;
1000: for (j=0; j<size; j++) {
1001: if (idx >= owners[j] && idx < owners[j+1]) {
1002: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1003: }
1004: }
1005: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1006: }
1007: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
1009: /* inform other processors of number of messages and max length*/
1010: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1012: /* post receives: */
1013: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1014: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1015: for (i=0; i<nrecvs; i++) {
1016: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1017: }
1019: /* do sends:
1020: 1) starts[i] gives the starting index in svalues for stuff going to
1021: the ith processor
1022: */
1023: PetscMalloc((N+1)*sizeof(int),&svalues);
1024: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1025: PetscMalloc((size+1)*sizeof(int),&starts);
1026: starts[0] = 0;
1027: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1028: for (i=0; i<N; i++) {
1029: svalues[starts[owner[i]]++] = rows[i];
1030: }
1032: starts[0] = 0;
1033: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1034: count = 0;
1035: for (i=0; i<size; i++) {
1036: if (nprocs[2*i+1]) {
1037: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1038: }
1039: }
1040: PetscFree(starts);
1042: base = owners[rank];
1044: /* wait on receives */
1045: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1046: source = lens + nrecvs;
1047: count = nrecvs; slen = 0;
1048: while (count) {
1049: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1050: /* unpack receives into our local space */
1051: MPI_Get_count(&recv_status,MPI_INT,&n);
1052: source[imdex] = recv_status.MPI_SOURCE;
1053: lens[imdex] = n;
1054: slen += n;
1055: count--;
1056: }
1057: PetscFree(recv_waits);
1058:
1059: /* move the data into the send scatter */
1060: PetscMalloc((slen+1)*sizeof(int),&lrows);
1061: count = 0;
1062: for (i=0; i<nrecvs; i++) {
1063: values = rvalues + i*nmax;
1064: for (j=0; j<lens[i]; j++) {
1065: lrows[count++] = values[j] - base;
1066: }
1067: }
1068: PetscFree(rvalues);
1069: PetscFree(lens);
1070: PetscFree(owner);
1071: PetscFree(nprocs);
1072:
1073: /* actually zap the local rows */
1074: MatZeroRows_MPIRowbs_local(A,slen,lrows,diag);
1075: PetscFree(lrows);
1077: /* wait on sends */
1078: if (nsends) {
1079: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1080: MPI_Waitall(nsends,send_waits,send_status);
1081: PetscFree(send_status);
1082: }
1083: PetscFree(send_waits);
1084: PetscFree(svalues);
1086: return(0);
1087: }
1091: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1092: {
1093: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1094: BSsprow *vs,**rs;
1095: PetscScalar *xv;
1096: PetscReal sum = 0.0;
1098: int *xi,nz,i,j;
1101: if (a->size == 1) {
1102: MatNorm_MPIRowbs_local(mat,type,norm);
1103: } else {
1104: rs = a->A->rows;
1105: if (type == NORM_FROBENIUS) {
1106: for (i=0; i<mat->m; i++) {
1107: vs = *rs++;
1108: nz = vs->length;
1109: xv = vs->nz;
1110: while (nz--) {
1111: #if defined(PETSC_USE_COMPLEX)
1112: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1113: #else
1114: sum += (*xv)*(*xv); xv++;
1115: #endif
1116: }
1117: }
1118: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1119: *norm = sqrt(*norm);
1120: } else if (type == NORM_1) { /* max column norm */
1121: PetscReal *tmp,*tmp2;
1122: PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1123: PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1124: PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1125: *norm = 0.0;
1126: for (i=0; i<mat->m; i++) {
1127: vs = *rs++;
1128: nz = vs->length;
1129: xi = vs->col;
1130: xv = vs->nz;
1131: while (nz--) {
1132: tmp[*xi] += PetscAbsScalar(*xv);
1133: xi++; xv++;
1134: }
1135: }
1136: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1137: for (j=0; j<mat->n; j++) {
1138: if (tmp2[j] > *norm) *norm = tmp2[j];
1139: }
1140: PetscFree(tmp);
1141: PetscFree(tmp2);
1142: } else if (type == NORM_INFINITY) { /* max row norm */
1143: PetscReal ntemp = 0.0;
1144: for (i=0; i<mat->m; i++) {
1145: vs = *rs++;
1146: nz = vs->length;
1147: xv = vs->nz;
1148: sum = 0.0;
1149: while (nz--) {
1150: sum += PetscAbsScalar(*xv); xv++;
1151: }
1152: if (sum > ntemp) ntemp = sum;
1153: }
1154: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1155: } else {
1156: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1157: }
1158: }
1159: return(0);
1160: }
1164: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1165: {
1166: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1167: BSprocinfo *bspinfo = bsif->procinfo;
1168: PetscScalar *xxa,*xworka,*yya;
1172: if (!bsif->blocksolveassembly) {
1173: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1174: }
1176: /* Permute and apply diagonal scaling: [ xwork = D^{1/2} * x ] */
1177: if (!bsif->vecs_permscale) {
1178: VecGetArray(bsif->xwork,&xworka);
1179: VecGetArray(xx,&xxa);
1180: BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1181: VecRestoreArray(bsif->xwork,&xworka);
1182: VecRestoreArray(xx,&xxa);
1183: VecPointwiseDivide(xx,bsif->xwork,bsif->diag);
1184: }
1186: VecGetArray(xx,&xxa);
1187: VecGetArray(yy,&yya);
1188: /* Do lower triangular multiplication: [ y = L * xwork ] */
1189: if (bspinfo->single) {
1190: BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1191: } else {
1192: BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1193: }
1194:
1195: /* Do upper triangular multiplication: [ y = y + L^{T} * xwork ] */
1196: if (mat->symmetric) {
1197: if (bspinfo->single){
1198: BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1199: } else {
1200: BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1201: }
1202: }
1203: /* not needed for ILU version since forward does it all */
1204: VecRestoreArray(xx,&xxa);
1205: VecRestoreArray(yy,&yya);
1207: /* Apply diagonal scaling to vector: [ y = D^{1/2} * y ] */
1208: if (!bsif->vecs_permscale) {
1209: VecGetArray(bsif->xwork,&xworka);
1210: VecGetArray(xx,&xxa);
1211: BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1212: VecRestoreArray(bsif->xwork,&xworka);
1213: VecRestoreArray(xx,&xxa);
1214: VecPointwiseDivide(bsif->xwork,yy,bsif->diag);
1215: VecGetArray(bsif->xwork,&xworka);
1216: VecGetArray(yy,&yya);
1217: BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1218: VecRestoreArray(bsif->xwork,&xworka);
1219: VecRestoreArray(yy,&yya);
1220: }
1221: PetscLogFlops(2*bsif->nz - mat->m);
1223: return(0);
1224: }
1228: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1229: {
1231: PetscScalar one = 1.0;
1234: (*mat->ops->mult)(mat,xx,zz);
1235: VecAXPY(zz,one,yy);
1236: return(0);
1237: }
1241: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1242: {
1243: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1244: PetscReal isend[5],irecv[5];
1248: info->rows_global = (double)A->M;
1249: info->columns_global = (double)A->N;
1250: info->rows_local = (double)A->m;
1251: info->columns_local = (double)A->N;
1252: info->block_size = 1.0;
1253: info->mallocs = (double)mat->reallocs;
1254: isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] = mat->maxnz - mat->nz;
1255: isend[3] = A->mem; isend[4] = info->mallocs;
1257: if (flag == MAT_LOCAL) {
1258: info->nz_used = isend[0];
1259: info->nz_allocated = isend[1];
1260: info->nz_unneeded = isend[2];
1261: info->memory = isend[3];
1262: info->mallocs = isend[4];
1263: } else if (flag == MAT_GLOBAL_MAX) {
1264: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1265: info->nz_used = irecv[0];
1266: info->nz_allocated = irecv[1];
1267: info->nz_unneeded = irecv[2];
1268: info->memory = irecv[3];
1269: info->mallocs = irecv[4];
1270: } else if (flag == MAT_GLOBAL_SUM) {
1271: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1272: info->nz_used = irecv[0];
1273: info->nz_allocated = irecv[1];
1274: info->nz_unneeded = irecv[2];
1275: info->memory = irecv[3];
1276: info->mallocs = irecv[4];
1277: }
1278: return(0);
1279: }
1283: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1284: {
1285: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1286: BSsprow **rs = a->A->rows;
1288: int i,n;
1289: PetscScalar *x,zero = 0.0;
1292: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1293: if (!a->blocksolveassembly) {
1294: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1295: }
1297: VecSet(v,zero);
1298: VecGetLocalSize(v,&n);
1299: if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1300: VecGetArray(v,&x);
1301: for (i=0; i<mat->m; i++) {
1302: x[i] = rs[i]->nz[rs[i]->diag_ind];
1303: }
1304: VecRestoreArray(v,&x);
1305: return(0);
1306: }
1310: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1311: {
1312: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1313: BSspmat *A = a->A;
1314: BSsprow *vs;
1316: int i;
1319: #if defined(PETSC_USE_LOG)
1320: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1321: #endif
1322: PetscFree(a->rowners);
1323: MatStashDestroy_Private(&mat->stash);
1324: if (a->bsmap) {
1325: if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1326: if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1327: if (a->bsmap->vglobal2proc) (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1328: PetscFree(a->bsmap);
1329: }
1331: if (A) {
1332: for (i=0; i<mat->m; i++) {
1333: vs = A->rows[i];
1334: MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1335: }
1336: /* Note: A->map = a->bsmap is freed above */
1337: PetscFree(A->rows);
1338: PetscFree(A);
1339: }
1340: if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1341: if (a->diag) {VecDestroy(a->diag);}
1342: if (a->xwork) {VecDestroy(a->xwork);}
1343: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
1344: if (a->fpA) {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1345: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1346: if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1347: if (a->imax) {PetscFree(a->imax);}
1348: MPI_Comm_free(&(a->comm_mpirowbs));
1349: PetscFree(a);
1350: PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1351: return(0);
1352: }
1356: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op)
1357: {
1358: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
1362: switch (op) {
1363: case MAT_ROW_ORIENTED:
1364: a->roworiented = PETSC_TRUE;
1365: break;
1366: case MAT_COLUMN_ORIENTED:
1367: a->roworiented = PETSC_FALSE;
1368: break;
1369: case MAT_COLUMNS_SORTED:
1370: a->sorted = 1;
1371: break;
1372: case MAT_COLUMNS_UNSORTED:
1373: a->sorted = 0;
1374: break;
1375: case MAT_NO_NEW_NONZERO_LOCATIONS:
1376: a->nonew = 1;
1377: break;
1378: case MAT_YES_NEW_NONZERO_LOCATIONS:
1379: a->nonew = 0;
1380: break;
1381: case MAT_DO_NOT_USE_INODES:
1382: a->bs_color_single = 1;
1383: break;
1384: case MAT_YES_NEW_DIAGONALS:
1385: case MAT_ROWS_SORTED:
1386: case MAT_NEW_NONZERO_LOCATION_ERR:
1387: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1388: case MAT_ROWS_UNSORTED:
1389: case MAT_USE_HASH_TABLE:
1390: PetscLogInfo((A,"MatSetOption_MPIRowbs:Option ignored\n"));
1391: break;
1392: case MAT_IGNORE_OFF_PROC_ENTRIES:
1393: a->donotstash = PETSC_TRUE;
1394: break;
1395: case MAT_NO_NEW_DIAGONALS:
1396: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1397: break;
1398: case MAT_KEEP_ZEROED_ROWS:
1399: a->keepzeroedrows = PETSC_TRUE;
1400: break;
1401: case MAT_SYMMETRIC:
1402: BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1403: break;
1404: case MAT_STRUCTURALLY_SYMMETRIC:
1405: case MAT_NOT_SYMMETRIC:
1406: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1407: case MAT_HERMITIAN:
1408: case MAT_NOT_HERMITIAN:
1409: case MAT_SYMMETRY_ETERNAL:
1410: case MAT_NOT_SYMMETRY_ETERNAL:
1411: break;
1412: default:
1413: SETERRQ(PETSC_ERR_SUP,"unknown option");
1414: break;
1415: }
1416: return(0);
1417: }
1421: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1422: {
1423: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1424: BSspmat *A = mat->A;
1425: BSsprow *rs;
1426:
1428: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1430: rs = A->rows[row - mat->rstart];
1431: *nz = rs->length;
1432: if (v) *v = rs->nz;
1433: if (idx) *idx = rs->col;
1434: return(0);
1435: }
1439: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1440: {
1442: return(0);
1443: }
1445: /* ------------------------------------------------------------------ */
1449: PetscErrorCode MatPrintHelp_MPIRowbs(Mat A)
1450: {
1451: static PetscTruth called = PETSC_FALSE;
1452: MPI_Comm comm = A->comm;
1456: if (called) {return(0);} else called = PETSC_TRUE;
1457: (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):\n");
1458: (*PetscHelpPrintf)(comm," -mat_rowbs_no_inode - Do not use inodes\n");
1459: return(0);
1460: }
1464: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1465: {
1469: MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1470: return(0);
1471: }
1473: /* -------------------------------------------------------------------*/
1474: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1475: MatGetRow_MPIRowbs,
1476: MatRestoreRow_MPIRowbs,
1477: MatMult_MPIRowbs,
1478: /* 4*/ MatMultAdd_MPIRowbs,
1479: MatMult_MPIRowbs,
1480: MatMultAdd_MPIRowbs,
1481: MatSolve_MPIRowbs,
1482: 0,
1483: 0,
1484: /*10*/ 0,
1485: 0,
1486: 0,
1487: 0,
1488: 0,
1489: /*15*/ MatGetInfo_MPIRowbs,
1490: 0,
1491: MatGetDiagonal_MPIRowbs,
1492: 0,
1493: MatNorm_MPIRowbs,
1494: /*20*/ MatAssemblyBegin_MPIRowbs,
1495: MatAssemblyEnd_MPIRowbs,
1496: 0,
1497: MatSetOption_MPIRowbs,
1498: MatZeroEntries_MPIRowbs,
1499: /*25*/ MatZeroRows_MPIRowbs,
1500: 0,
1501: MatLUFactorNumeric_MPIRowbs,
1502: 0,
1503: MatCholeskyFactorNumeric_MPIRowbs,
1504: /*30*/ MatSetUpPreallocation_MPIRowbs,
1505: MatILUFactorSymbolic_MPIRowbs,
1506: MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1507: 0,
1508: 0,
1509: /*35*/ 0,
1510: MatForwardSolve_MPIRowbs,
1511: MatBackwardSolve_MPIRowbs,
1512: 0,
1513: 0,
1514: /*40*/ 0,
1515: MatGetSubMatrices_MPIRowbs,
1516: 0,
1517: 0,
1518: 0,
1519: /*45*/ MatPrintHelp_MPIRowbs,
1520: MatScale_MPIRowbs,
1521: 0,
1522: 0,
1523: 0,
1524: /*50*/ 0,
1525: 0,
1526: 0,
1527: 0,
1528: 0,
1529: /*55*/ 0,
1530: 0,
1531: 0,
1532: 0,
1533: 0,
1534: /*60*/ MatGetSubMatrix_MPIRowbs,
1535: MatDestroy_MPIRowbs,
1536: MatView_MPIRowbs,
1537: MatGetPetscMaps_Petsc,
1538: MatUseScaledForm_MPIRowbs,
1539: /*65*/ MatScaleSystem_MPIRowbs,
1540: MatUnScaleSystem_MPIRowbs,
1541: 0,
1542: 0,
1543: 0,
1544: /*70*/ 0,
1545: 0,
1546: 0,
1547: 0,
1548: 0,
1549: /*75*/ 0,
1550: 0,
1551: 0,
1552: 0,
1553: 0,
1554: /*80*/ 0,
1555: 0,
1556: 0,
1557: 0,
1558: MatLoad_MPIRowbs,
1559: /*85*/ 0,
1560: 0,
1561: 0,
1562: 0,
1563: 0,
1564: /*90*/ 0,
1565: 0,
1566: 0,
1567: 0,
1568: 0,
1569: /*95*/ 0,
1570: 0,
1571: 0,
1572: 0};
1574: /* ------------------------------------------------------------------- */
1579: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1580: {
1584: mat->preallocated = PETSC_TRUE;
1585: MatCreateMPIRowbs_local(mat,nz,nnz);
1586: return(0);
1587: }
1590: /*MC
1591: MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1592: with the external package BlockSolve95. If BlockSolve95 is installed (see the manual for instructions
1593: on how to declare the existence of external packages), a matrix type can be constructed which invokes
1594: BlockSolve95 preconditioners and solvers.
1596: Options Database Keys:
1597: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()
1599: Level: beginner
1601: .seealso: MatCreateMPIRowbs
1602: M*/
1607: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIRowbs(Mat A)
1608: {
1609: Mat_MPIRowbs *a;
1610: BSmapping *bsmap;
1611: BSoff_map *bsoff;
1613: int i,*offset,m,M;
1614: PetscTruth flg1,flg2,flg3;
1615: BSprocinfo *bspinfo;
1616: MPI_Comm comm;
1617:
1619: comm = A->comm;
1620: m = A->m;
1621: M = A->M;
1623: PetscNew(Mat_MPIRowbs,&a);
1624: A->data = (void*)a;
1625: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1626: A->factor = 0;
1627: A->mapping = 0;
1628: a->vecs_permscale = PETSC_FALSE;
1629: A->insertmode = NOT_SET_VALUES;
1630: a->blocksolveassembly = 0;
1631: a->keepzeroedrows = PETSC_FALSE;
1633: MPI_Comm_rank(comm,&a->rank);
1634: MPI_Comm_size(comm,&a->size);
1636: PetscSplitOwnership(comm,&m,&M);
1638: A->N = M;
1639: A->M = M;
1640: A->m = m;
1641: A->n = A->N; /* each row stores all columns */
1642: PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1643: a->reallocs = 0;
1645: /* the information in the maps duplicates the information computed below, eventually
1646: we should remove the duplicate information that is not contained in the maps */
1647: PetscMapCreateMPI(comm,m,M,&A->rmap);
1648: PetscMapCreateMPI(comm,m,M,&A->cmap);
1650: /* build local table of row ownerships */
1651: PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1652: MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1653: a->rowners[0] = 0;
1654: for (i=2; i<=a->size; i++) {
1655: a->rowners[i] += a->rowners[i-1];
1656: }
1657: a->rstart = a->rowners[a->rank];
1658: a->rend = a->rowners[a->rank+1];
1659: PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));
1661: /* build cache for off array entries formed */
1662: MatStashCreate_Private(A->comm,1,&A->stash);
1663: a->donotstash = PETSC_FALSE;
1665: /* Initialize BlockSolve information */
1666: a->A = 0;
1667: a->pA = 0;
1668: a->comm_pA = 0;
1669: a->fpA = 0;
1670: a->comm_fpA = 0;
1671: a->alpha = 1.0;
1672: a->0;
1673: a->failures = 0;
1674: MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));
1675: VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1676: VecDuplicate(a->diag,&(a->xwork));
1677: PetscLogObjectParent(A,a->diag); PetscLogObjectParent(A,a->xwork);
1678: PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1679: bspinfo = BScreate_ctx();CHKERRBS(0);
1680: a->procinfo = bspinfo;
1681: BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1682: BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1683: BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1684: BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1685: BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1686: BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1687: #if defined(PETSC_USE_DEBUG)
1688: BSctx_set_err(bspinfo,1);CHKERRBS(0); /* BS error checking */
1689: #endif
1690: BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1691: PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1692: if (flg1) {
1693: BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1694: }
1695: PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1696: PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1697: PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1698: if (flg1 || flg2 || flg3) {
1699: BSctx_set_si(bspinfo,1);CHKERRBS(0);
1700: } else {
1701: BSctx_set_si(bspinfo,0);CHKERRBS(0);
1702: }
1703: #if defined(PETSC_USE_LOG)
1704: MLOG_INIT(); /* Initialize logging */
1705: #endif
1707: /* Compute global offsets */
1708: offset = &a->rstart;
1710: PetscNew(BSmapping,&a->bsmap);
1711: PetscLogObjectMemory(A,sizeof(BSmapping));
1712: bsmap = a->bsmap;
1713: PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1714: *((int*)bsmap->vlocal2global) = (*offset);
1715: bsmap->flocal2global = BSloc2glob;
1716: bsmap->free_l2g = 0;
1717: PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1718: *((int*)bsmap->vglobal2local) = (*offset);
1719: bsmap->fglobal2local = BSglob2loc;
1720: bsmap->free_g2l = 0;
1721: bsoff = BSmake_off_map(*offset,bspinfo,A->M);
1722: bsmap->vglobal2proc = (void*)bsoff;
1723: bsmap->fglobal2proc = BSglob2proc;
1724: bsmap->free_g2p = (void(*)(void*)) BSfree_off_map;
1725: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1726: "MatMPIRowbsSetPreallocation_MPIRowbs",
1727: MatMPIRowbsSetPreallocation_MPIRowbs);
1728: return(0);
1729: }
1734: /* @
1735: MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros
1736: per row in the matrix.
1738: Input Parameter:
1739: + mat - matrix
1740: . nz - maximum expected for any row
1741: - nzz - number expected in each row
1743: Note:
1744: This routine is valid only for matrices stored in the MATMPIROWBS
1745: format.
1746: @ */
1747: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1748: {
1749: PetscErrorCode ierr,(*f)(Mat,int,const int[]);
1752: PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1753: if (f) {
1754: (*f)(mat,nz,nnz);
1755: }
1756: return(0);
1757: }
1759: /* --------------- extra BlockSolve-specific routines -------------- */
1762: /* @
1763: MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1764: user can then manipulate to alter the default parameters.
1766: Input Parameter:
1767: mat - matrix
1769: Output Parameter:
1770: procinfo - processor information context
1772: Note:
1773: This routine is valid only for matrices stored in the MATMPIROWBS
1774: format.
1775: @ */
1776: PetscErrorCode PETSCMAT_DLLEXPORT MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1777: {
1778: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1779: PetscTruth ismpirowbs;
1783: PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1784: if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1785: procinfo = a->procinfo;
1786: return(0);
1787: }
1791: PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1792: {
1793: Mat_MPIRowbs *a;
1794: BSspmat *A;
1795: BSsprow **rs;
1796: Mat mat;
1798: int i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1799: int header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1800: PetscScalar *vals;
1801: MPI_Comm comm = ((PetscObject)viewer)->comm;
1802: MPI_Status status;
1805: MPI_Comm_size(comm,&size);
1806: MPI_Comm_rank(comm,&rank);
1807: if (!rank) {
1808: PetscViewerBinaryGetDescriptor(viewer,&fd);
1809: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1810: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1811: if (header[3] < 0) {
1812: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1813: }
1814: }
1816: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1817: M = header[1];
1818: /* determine ownership of all rows */
1819: m = M/size + ((M % size) > rank);
1820: PetscMalloc((size+2)*sizeof(int),&rowners);
1821: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1822: rowners[0] = 0;
1823: for (i=2; i<=size; i++) {
1824: rowners[i] += rowners[i-1];
1825: }
1826: rstart = rowners[rank];
1827: rend = rowners[rank+1];
1829: /* distribute row lengths to all processors */
1830: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1831: if (!rank) {
1832: PetscMalloc(M*sizeof(int),&rowlengths);
1833: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1834: PetscMalloc(size*sizeof(int),&sndcounts);
1835: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1836: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1837: PetscFree(sndcounts);
1838: } else {
1839: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1840: }
1842: /* create our matrix */
1843: MatCreate(comm,newmat);
1844: MatSetSizes(*newmat,m,m,M,M);
1845: MatSetType(*newmat,type);
1846: MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1847: mat = *newmat;
1848: PetscFree(ourlens);
1850: a = (Mat_MPIRowbs*)mat->data;
1851: A = a->A;
1852: rs = A->rows;
1854: if (!rank) {
1855: /* calculate the number of nonzeros on each processor */
1856: PetscMalloc(size*sizeof(int),&procsnz);
1857: PetscMemzero(procsnz,size*sizeof(int));
1858: for (i=0; i<size; i++) {
1859: for (j=rowners[i]; j< rowners[i+1]; j++) {
1860: procsnz[i] += rowlengths[j];
1861: }
1862: }
1863: PetscFree(rowlengths);
1865: /* determine max buffer needed and allocate it */
1866: maxnz = 0;
1867: for (i=0; i<size; i++) {
1868: maxnz = PetscMax(maxnz,procsnz[i]);
1869: }
1870: PetscMalloc(maxnz*sizeof(int),&cols);
1872: /* read in my part of the matrix column indices */
1873: nz = procsnz[0];
1874: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1875:
1876: /* insert it into my part of matrix */
1877: nz = 0;
1878: for (i=0; i<A->num_rows; i++) {
1879: for (j=0; j<a->imax[i]; j++) {
1880: rs[i]->col[j] = cols[nz++];
1881: }
1882: rs[i]->length = a->imax[i];
1883: }
1884: /* read in parts for all other processors */
1885: for (i=1; i<size; i++) {
1886: nz = procsnz[i];
1887: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1888: MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1889: }
1890: PetscFree(cols);
1891: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1893: /* read in my part of the matrix numerical values */
1894: nz = procsnz[0];
1895: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1896:
1897: /* insert it into my part of matrix */
1898: nz = 0;
1899: for (i=0; i<A->num_rows; i++) {
1900: for (j=0; j<a->imax[i]; j++) {
1901: rs[i]->nz[j] = vals[nz++];
1902: }
1903: }
1904: /* read in parts for all other processors */
1905: for (i=1; i<size; i++) {
1906: nz = procsnz[i];
1907: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1908: MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1909: }
1910: PetscFree(vals);
1911: PetscFree(procsnz);
1912: } else {
1913: /* determine buffer space needed for message */
1914: nz = 0;
1915: for (i=0; i<A->num_rows; i++) {
1916: nz += a->imax[i];
1917: }
1918: PetscMalloc(nz*sizeof(int),&cols);
1920: /* receive message of column indices*/
1921: MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1922: MPI_Get_count(&status,MPI_INT,&maxnz);
1923: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1925: /* insert it into my part of matrix */
1926: nz = 0;
1927: for (i=0; i<A->num_rows; i++) {
1928: for (j=0; j<a->imax[i]; j++) {
1929: rs[i]->col[j] = cols[nz++];
1930: }
1931: rs[i]->length = a->imax[i];
1932: }
1933: PetscFree(cols);
1934: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1936: /* receive message of values*/
1937: MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1938: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1939: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1941: /* insert it into my part of matrix */
1942: nz = 0;
1943: for (i=0; i<A->num_rows; i++) {
1944: for (j=0; j<a->imax[i]; j++) {
1945: rs[i]->nz[j] = vals[nz++];
1946: }
1947: rs[i]->length = a->imax[i];
1948: }
1949: PetscFree(vals);
1950: }
1951: PetscFree(rowners);
1952: a->nz = a->maxnz;
1953: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1954: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1955: return(0);
1956: }
1958: /*
1959: Special destroy and view routines for factored matrices
1960: */
1963: static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat)
1964: {
1966: #if defined(PETSC_USE_LOG)
1967: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1968: #endif
1969: return(0);
1970: }
1974: static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1975: {
1979: MatView((Mat) mat->data,viewer);
1980: return(0);
1981: }
1985: PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact)
1986: {
1987: /* Note: f is not currently used in BlockSolve */
1988: Mat newmat;
1989: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1991: PetscTruth idn;
1994: if (isrow) {
1995: ISIdentity(isrow,&idn);
1996: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1997: }
1999: if (!mat->symmetric) {
2000: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
2001: preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
2002: symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
2003: }
2005: /* If the icc_storage flag wasn't set before the last blocksolveassembly, */
2006: /* we must completely redo the assembly as a different storage format is required. */
2007: if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
2008: mat->same_nonzero = PETSC_FALSE;
2009: mbs->blocksolveassembly = 0;
2010: }
2012: if (!mbs->blocksolveassembly) {
2013: BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
2014: BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
2015: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2016: }
2018: /* Copy permuted matrix */
2019: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2020: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2022: /* Set up the communication for factorization */
2023: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2024: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2026: /*
2027: Create a new Mat structure to hold the "factored" matrix,
2028: not this merely contains a pointer to the original matrix, since
2029: the original matrix contains the factor information.
2030: */
2031: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2032: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2034: newmat->data = (void*)mat;
2035: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2036: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2037: newmat->ops->view = MatView_MPIRowbs_Factored;
2038: newmat->factor = 1;
2039: newmat->preallocated = PETSC_TRUE;
2040: newmat->M = mat->M;
2041: newmat->N = mat->N;
2042: newmat->m = mat->m;
2043: newmat->n = mat->n;
2044: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2046: *newfact = newmat;
2047: return(0);
2048: }
2052: PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact)
2053: {
2054: Mat newmat;
2055: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
2057: PetscTruth idn;
2060: if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill");
2061: if (isrow) {
2062: ISIdentity(isrow,&idn);
2063: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2064: }
2065: if (iscol) {
2066: ISIdentity(iscol,&idn);
2067: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2068: }
2070: if (!mbs->blocksolveassembly) {
2071: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2072: }
2073:
2074: /* if (mat->symmetric) { */
2075: /* SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2076: /* MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2077: /* using the option MatSetOption(A,MAT_SYMMETRIC)"); */
2078: /* } */
2080: /* Copy permuted matrix */
2081: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2082: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
2084: /* Set up the communication for factorization */
2085: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2086: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2088: /*
2089: Create a new Mat structure to hold the "factored" matrix,
2090: not this merely contains a pointer to the original matrix, since
2091: the original matrix contains the factor information.
2092: */
2093: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2094: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2096: newmat->data = (void*)mat;
2097: PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2098: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2099: newmat->ops->view = MatView_MPIRowbs_Factored;
2100: newmat->factor = 1;
2101: newmat->preallocated = PETSC_TRUE;
2102: newmat->M = mat->M;
2103: newmat->N = mat->N;
2104: newmat->m = mat->m;
2105: newmat->n = mat->n;
2106: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2108: *newfact = newmat;
2109: return(0);
2110: }
2114: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2115: {
2121: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2122: ISColoringCreate(mat->comm,mat->m,0,coloring);
2124: return(0);
2125: }
2129: /*@C
2130: MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2131: format. This format is intended primarily as an interface for BlockSolve95.
2133: Collective on MPI_Comm
2135: Input Parameters:
2136: + comm - MPI communicator
2137: . m - number of local rows (or PETSC_DECIDE to have calculated)
2138: . M - number of global rows (or PETSC_DECIDE to have calculated)
2139: . nz - number of nonzeros per row (same for all local rows)
2140: - nnz - number of nonzeros per row (possibly different for each row).
2142: Output Parameter:
2143: . newA - the matrix
2145: Notes:
2146: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2147: than it must be used on all processors that share the object for that argument.
2149: The user MUST specify either the local or global matrix dimensions
2150: (possibly both).
2152: Specify the preallocated storage with either nz or nnz (not both). Set
2153: nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2154: allocation.
2156: Notes:
2157: By default, the matrix is assumed to be nonsymmetric; the user can
2158: take advantage of special optimizations for symmetric matrices by calling
2159: $ MatSetOption(mat,MAT_SYMMETRIC)
2160: $ MatSetOption(mat,MAT_SYMMETRY_ETERNAL)
2161: BEFORE calling the routine MatAssemblyBegin().
2163: Internally, the MATMPIROWBS format inserts zero elements to the
2164: matrix if necessary, so that nonsymmetric matrices are considered
2165: to be symmetric in terms of their sparsity structure; this format
2166: is required for use of the parallel communication routines within
2167: BlockSolve95. In particular, if the matrix element A[i,j] exists,
2168: then PETSc will internally allocate a 0 value for the element
2169: A[j,i] during MatAssemblyEnd() if the user has not already set
2170: a value for the matrix element A[j,i].
2172: Options Database Keys:
2173: . -mat_rowbs_no_inode - Do not use inodes.
2175: Level: intermediate
2176:
2177: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve
2179: .seealso: MatCreate(), MatSetValues()
2180: @*/
2181: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2182: {
2184:
2186: MatCreate(comm,newA);
2187: MatSetSizes(*newA,m,m,M,M);
2188: MatSetType(*newA,MATMPIROWBS);
2189: MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2190: return(0);
2191: }
2194: /* -------------------------------------------------------------------------*/
2196: #include src/mat/impls/aij/seq/aij.h
2197: #include src/mat/impls/aij/mpi/mpiaij.h
2201: PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2202: {
2204: int nmax,nstages_local,nstages,i,pos,max_no;
2208: /* Allocate memory to hold all the submatrices */
2209: if (scall != MAT_REUSE_MATRIX) {
2210: PetscMalloc((ismax+1)*sizeof(Mat),submat);
2211: }
2212:
2213: /* Determine the number of stages through which submatrices are done */
2214: nmax = 20*1000000 / (C->N * sizeof(int));
2215: if (!nmax) nmax = 1;
2216: nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
2218: /* Make sure every processor loops through the nstages */
2219: MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);
2221: for (i=0,pos=0; i<nstages; i++) {
2222: if (pos+nmax <= ismax) max_no = nmax;
2223: else if (pos == ismax) max_no = 0;
2224: else max_no = ismax-pos;
2225: MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2226: pos += max_no;
2227: }
2228: return(0);
2229: }
2230: /* -------------------------------------------------------------------------*/
2231: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2232: matrix and preservs zeroes from structural symetry
2233: */
2236: PetscErrorCode MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2237: {
2238: Mat_MPIRowbs *c = (Mat_MPIRowbs *)(C->data);
2239: BSspmat *A = c->A;
2240: Mat_SeqAIJ *mat;
2242: int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2243: int **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
2244: int nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2245: int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2246: int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
2247: int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2248: int *rmap_i,tag0,tag1,tag2,tag3;
2249: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2250: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2251: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2252: MPI_Status *r_status3,*r_status4,*s_status4;
2253: MPI_Comm comm;
2254: FLOAT **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2255: PetscScalar *mat_a;
2256: PetscTruth sorted;
2257: int *onodes1,*olengths1;
2260: comm = C->comm;
2261: tag0 = C->tag;
2262: size = c->size;
2263: rank = c->rank;
2264: m = C->M;
2265:
2266: /* Get some new tags to keep the communication clean */
2267: PetscObjectGetNewTag((PetscObject)C,&tag1);
2268: PetscObjectGetNewTag((PetscObject)C,&tag2);
2269: PetscObjectGetNewTag((PetscObject)C,&tag3);
2271: /* Check if the col indices are sorted */
2272: for (i=0; i<ismax; i++) {
2273: ISSorted(isrow[i],&sorted);
2274: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2275: ISSorted(iscol[i],&sorted);
2276: /* if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2277: }
2279: len = (2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int);
2280: PetscMalloc(len,&irow);
2281: icol = irow + ismax;
2282: nrow = (int*)(icol + ismax);
2283: ncol = nrow + ismax;
2284: rtable = ncol + ismax;
2286: for (i=0; i<ismax; i++) {
2287: ISGetIndices(isrow[i],&irow[i]);
2288: ISGetIndices(iscol[i],&icol[i]);
2289: ISGetLocalSize(isrow[i],&nrow[i]);
2290: ISGetLocalSize(iscol[i],&ncol[i]);
2291: }
2293: /* Create hash table for the mapping :row -> proc*/
2294: for (i=0,j=0; i<size; i++) {
2295: jmax = c->rowners[i+1];
2296: for (; j<jmax; j++) {
2297: rtable[j] = i;
2298: }
2299: }
2301: /* evaluate communication - mesg to who, length of mesg, and buffer space
2302: required. Based on this, buffers are allocated, and data copied into them*/
2303: PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2304: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2305: w3 = w2 + size; /* no of IS that needs to be sent to proc i */
2306: w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */
2307: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2308: for (i=0; i<ismax; i++) {
2309: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2310: jmax = nrow[i];
2311: irow_i = irow[i];
2312: for (j=0; j<jmax; j++) {
2313: row = irow_i[j];
2314: proc = rtable[row];
2315: w4[proc]++;
2316: }
2317: for (j=0; j<size; j++) {
2318: if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
2319: }
2320: }
2321:
2322: nrqs = 0; /* no of outgoing messages */
2323: msz = 0; /* total mesg length (for all procs) */
2324: w1[rank] = 0; /* no mesg sent to self */
2325: w3[rank] = 0;
2326: for (i=0; i<size; i++) {
2327: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2328: }
2329: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2330: for (i=0,j=0; i<size; i++) {
2331: if (w1[i]) { pa[j] = i; j++; }
2332: }
2334: /* Each message would have a header = 1 + 2*(no of IS) + data */
2335: for (i=0; i<nrqs; i++) {
2336: j = pa[i];
2337: w1[j] += w2[j] + 2* w3[j];
2338: msz += w1[j];
2339: }
2341: /* Determine the number of messages to expect, their lengths, from from-ids */
2342: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2343: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2345: /* Now post the Irecvs corresponding to these messages */
2346: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2347:
2348: PetscFree(onodes1);
2349: PetscFree(olengths1);
2350:
2351: /* Allocate Memory for outgoing messages */
2352: len = 2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int);
2353: PetscMalloc(len,&sbuf1);
2354: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2355: PetscMemzero(sbuf1,2*size*sizeof(int*));
2356: /* allocate memory for outgoing data + buf to receive the first reply */
2357: tmp = (int*)(ptr + size);
2358: ctr = tmp + 2*msz;
2360: {
2361: int *iptr = tmp,ict = 0;
2362: for (i=0; i<nrqs; i++) {
2363: j = pa[i];
2364: iptr += ict;
2365: sbuf1[j] = iptr;
2366: ict = w1[j];
2367: }
2368: }
2370: /* Form the outgoing messages */
2371: /* Initialize the header space */
2372: for (i=0; i<nrqs; i++) {
2373: j = pa[i];
2374: sbuf1[j][0] = 0;
2375: PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2376: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
2377: }
2378:
2379: /* Parse the isrow and copy data into outbuf */
2380: for (i=0; i<ismax; i++) {
2381: PetscMemzero(ctr,size*sizeof(int));
2382: irow_i = irow[i];
2383: jmax = nrow[i];
2384: for (j=0; j<jmax; j++) { /* parse the indices of each IS */
2385: row = irow_i[j];
2386: proc = rtable[row];
2387: if (proc != rank) { /* copy to the outgoing buf*/
2388: ctr[proc]++;
2389: *ptr[proc] = row;
2390: ptr[proc]++;
2391: }
2392: }
2393: /* Update the headers for the current IS */
2394: for (j=0; j<size; j++) { /* Can Optimise this loop too */
2395: if ((ctr_j = ctr[j])) {
2396: sbuf1_j = sbuf1[j];
2397: k = ++sbuf1_j[0];
2398: sbuf1_j[2*k] = ctr_j;
2399: sbuf1_j[2*k-1] = i;
2400: }
2401: }
2402: }
2404: /* Now post the sends */
2405: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2406: for (i=0; i<nrqs; ++i) {
2407: j = pa[i];
2408: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2409: }
2411: /* Post Receives to capture the buffer size */
2412: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2413: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2414: rbuf2[0] = tmp + msz;
2415: for (i=1; i<nrqs; ++i) {
2416: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2417: }
2418: for (i=0; i<nrqs; ++i) {
2419: j = pa[i];
2420: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2421: }
2423: /* Send to other procs the buf size they should allocate */
2424:
2426: /* Receive messages*/
2427: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2428: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2429: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2430: PetscMalloc(len,&sbuf2);
2431: req_size = (int*)(sbuf2 + nrqr);
2432: req_source = req_size + nrqr;
2433:
2434: {
2435: BSsprow **sAi = A->rows;
2436: int id,rstart = c->rstart;
2437: int *sbuf2_i;
2439: for (i=0; i<nrqr; ++i) {
2440: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2441: req_size[idx] = 0;
2442: rbuf1_i = rbuf1[idx];
2443: start = 2*rbuf1_i[0] + 1;
2444: MPI_Get_count(r_status1+i,MPI_INT,&end);
2445: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2446: sbuf2_i = sbuf2[idx];
2447: for (j=start; j<end; j++) {
2448: id = rbuf1_i[j] - rstart;
2449: ncols = (sAi[id])->length;
2450: sbuf2_i[j] = ncols;
2451: req_size[idx] += ncols;
2452: }
2453: req_source[idx] = r_status1[i].MPI_SOURCE;
2454: /* form the header */
2455: sbuf2_i[0] = req_size[idx];
2456: for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2457: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2458: }
2459: }
2460: PetscFree(r_status1);
2461: PetscFree(r_waits1);
2463: /* recv buffer sizes */
2464: /* Receive messages*/
2465:
2466: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2467: PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2468: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2469: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2470: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
2472: for (i=0; i<nrqs; ++i) {
2473: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2474: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2475: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2476: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2477: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2478: }
2479: PetscFree(r_status2);
2480: PetscFree(r_waits2);
2481:
2482: /* Wait on sends1 and sends2 */
2483: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2484: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
2486: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
2487: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
2488: PetscFree(s_status1);
2489: PetscFree(s_status2);
2490: PetscFree(s_waits1);
2491: PetscFree(s_waits2);
2493: /* Now allocate buffers for a->j, and send them off */
2494: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2495: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2496: PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2497: for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2498:
2499: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2500: {
2501: BSsprow *brow;
2502: int *Acol;
2503: int rstart = c->rstart;
2505: for (i=0; i<nrqr; i++) {
2506: rbuf1_i = rbuf1[i];
2507: sbuf_aj_i = sbuf_aj[i];
2508: ct1 = 2*rbuf1_i[0] + 1;
2509: ct2 = 0;
2510: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2511: kmax = rbuf1[i][2*j];
2512: for (k=0; k<kmax; k++,ct1++) {
2513: brow = A->rows[rbuf1_i[ct1] - rstart];
2514: ncols = brow->length;
2515: Acol = brow->col;
2516: /* load the column indices for this row into cols*/
2517: cols = sbuf_aj_i + ct2;
2518: PetscMemcpy(cols,Acol,ncols*sizeof(int));
2519: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2520: mappings?? */
2521: ct2 += ncols;
2522: }
2523: }
2524: MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2525: }
2526: }
2527: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2528: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
2530: /* Allocate buffers for a->a, and send them off */
2531: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2532: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2533: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2534: for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2535:
2536: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2537: {
2538: BSsprow *brow;
2539: FLOAT *Aval;
2540: int rstart = c->rstart;
2541:
2542: for (i=0; i<nrqr; i++) {
2543: rbuf1_i = rbuf1[i];
2544: sbuf_aa_i = sbuf_aa[i];
2545: ct1 = 2*rbuf1_i[0]+1;
2546: ct2 = 0;
2547: for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2548: kmax = rbuf1_i[2*j];
2549: for (k=0; k<kmax; k++,ct1++) {
2550: brow = A->rows[rbuf1_i[ct1] - rstart];
2551: ncols = brow->length;
2552: Aval = brow->nz;
2553: /* load the column values for this row into vals*/
2554: vals = sbuf_aa_i+ct2;
2555: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2556: ct2 += ncols;
2557: }
2558: }
2559: MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2560: }
2561: }
2562: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2563: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2564: PetscFree(rbuf1);
2566: /* Form the matrix */
2567: /* create col map */
2568: {
2569: int *icol_i;
2570:
2571: len = (1+ismax)*sizeof(int*)+ ismax*C->N*sizeof(int);
2572: PetscMalloc(len,&cmap);
2573: cmap[0] = (int*)(cmap + ismax);
2574: PetscMemzero(cmap[0],(1+ismax*C->N)*sizeof(int));
2575: for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->N; }
2576: for (i=0; i<ismax; i++) {
2577: jmax = ncol[i];
2578: icol_i = icol[i];
2579: cmap_i = cmap[i];
2580: for (j=0; j<jmax; j++) {
2581: cmap_i[icol_i[j]] = j+1;
2582: }
2583: }
2584: }
2586: /* Create lens which is required for MatCreate... */
2587: for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2588: len = (1+ismax)*sizeof(int*)+ j*sizeof(int);
2589: PetscMalloc(len,&lens);
2590: lens[0] = (int*)(lens + ismax);
2591: PetscMemzero(lens[0],j*sizeof(int));
2592: for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2593:
2594: /* Update lens from local data */
2595: { BSsprow *Arow;
2596: for (i=0; i<ismax; i++) {
2597: jmax = nrow[i];
2598: cmap_i = cmap[i];
2599: irow_i = irow[i];
2600: lens_i = lens[i];
2601: for (j=0; j<jmax; j++) {
2602: row = irow_i[j];
2603: proc = rtable[row];
2604: if (proc == rank) {
2605: Arow=A->rows[row-c->rstart];
2606: ncols=Arow->length;
2607: cols=Arow->col;
2608: for (k=0; k<ncols; k++) {
2609: if (cmap_i[cols[k]]) { lens_i[j]++;}
2610: }
2611: }
2612: }
2613: }
2614: }
2615:
2616: /* Create row map*/
2617: len = (1+ismax)*sizeof(int*)+ ismax*C->M*sizeof(int);
2618: PetscMalloc(len,&rmap);
2619: rmap[0] = (int*)(rmap + ismax);
2620: PetscMemzero(rmap[0],ismax*C->M*sizeof(int));
2621: for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->M;}
2622: for (i=0; i<ismax; i++) {
2623: rmap_i = rmap[i];
2624: irow_i = irow[i];
2625: jmax = nrow[i];
2626: for (j=0; j<jmax; j++) {
2627: rmap_i[irow_i[j]] = j;
2628: }
2629: }
2630:
2631: /* Update lens from offproc data */
2632: {
2633: int *rbuf2_i,*rbuf3_i,*sbuf1_i;
2635: for (tmp2=0; tmp2<nrqs; tmp2++) {
2636: MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2637: idx = pa[i];
2638: sbuf1_i = sbuf1[idx];
2639: jmax = sbuf1_i[0];
2640: ct1 = 2*jmax+1;
2641: ct2 = 0;
2642: rbuf2_i = rbuf2[i];
2643: rbuf3_i = rbuf3[i];
2644: for (j=1; j<=jmax; j++) {
2645: is_no = sbuf1_i[2*j-1];
2646: max1 = sbuf1_i[2*j];
2647: lens_i = lens[is_no];
2648: cmap_i = cmap[is_no];
2649: rmap_i = rmap[is_no];
2650: for (k=0; k<max1; k++,ct1++) {
2651: row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2652: max2 = rbuf2_i[ct1];
2653: for (l=0; l<max2; l++,ct2++) {
2654: if (cmap_i[rbuf3_i[ct2]]) {
2655: lens_i[row]++;
2656: }
2657: }
2658: }
2659: }
2660: }
2661: }
2662: PetscFree(r_status3);
2663: PetscFree(r_waits3);
2664: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
2665: PetscFree(s_status3);
2666: PetscFree(s_waits3);
2668: /* Create the submatrices */
2669: if (scall == MAT_REUSE_MATRIX) {
2670: PetscTruth same;
2671:
2672: /*
2673: Assumes new rows are same length as the old rows,hence bug!
2674: */
2675: for (i=0; i<ismax; i++) {
2676: PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2677: if (!same) {
2678: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2679: }
2680: mat = (Mat_SeqAIJ*)(submats[i]->data);
2681: if ((submats[i]->m != nrow[i]) || (submats[i]->n != ncol[i])) {
2682: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2683: }
2684: PetscMemcmp(mat->ilen,lens[i],submats[i]->m*sizeof(int),&same);
2685: if (!same) {
2686: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2687: }
2688: /* Initial matrix as if empty */
2689: PetscMemzero(mat->ilen,submats[i]->m*sizeof(int));
2690: submats[i]->factor = C->factor;
2691: }
2692: } else {
2693: for (i=0; i<ismax; i++) {
2694: /* Here we want to explicitly generate SeqAIJ matrices */
2695: MatCreate(PETSC_COMM_SELF,submats+i);
2696: MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);
2697: MatSetType(submats[i],MATSEQAIJ);
2698: MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2699: }
2700: }
2702: /* Assemble the matrices */
2703: /* First assemble the local rows */
2704: {
2705: int ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2706: PetscScalar *imat_a;
2707: BSsprow *Arow;
2708:
2709: for (i=0; i<ismax; i++) {
2710: mat = (Mat_SeqAIJ*)submats[i]->data;
2711: imat_ilen = mat->ilen;
2712: imat_j = mat->j;
2713: imat_i = mat->i;
2714: imat_a = mat->a;
2715: cmap_i = cmap[i];
2716: rmap_i = rmap[i];
2717: irow_i = irow[i];
2718: jmax = nrow[i];
2719: for (j=0; j<jmax; j++) {
2720: row = irow_i[j];
2721: proc = rtable[row];
2722: if (proc == rank) {
2723: old_row = row;
2724: row = rmap_i[row];
2725: ilen_row = imat_ilen[row];
2726:
2727: Arow=A->rows[old_row-c->rstart];
2728: ncols=Arow->length;
2729: cols=Arow->col;
2730: vals=Arow->nz;
2731:
2732: mat_i = imat_i[row];
2733: mat_a = imat_a + mat_i;
2734: mat_j = imat_j + mat_i;
2735: for (k=0; k<ncols; k++) {
2736: if ((tcol = cmap_i[cols[k]])) {
2737: *mat_j++ = tcol - 1;
2738: *mat_a++ = (PetscScalar)vals[k];
2739: ilen_row++;
2740: }
2741: }
2742: imat_ilen[row] = ilen_row;
2743: }
2744: }
2745: }
2746: }
2748: /* Now assemble the off proc rows*/
2749: {
2750: int *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2751: int *imat_j,*imat_i;
2752: PetscScalar *imat_a;
2753: FLOAT *rbuf4_i;
2754:
2755: for (tmp2=0; tmp2<nrqs; tmp2++) {
2756: MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2757: idx = pa[i];
2758: sbuf1_i = sbuf1[idx];
2759: jmax = sbuf1_i[0];
2760: ct1 = 2*jmax + 1;
2761: ct2 = 0;
2762: rbuf2_i = rbuf2[i];
2763: rbuf3_i = rbuf3[i];
2764: rbuf4_i = rbuf4[i];
2765: for (j=1; j<=jmax; j++) {
2766: is_no = sbuf1_i[2*j-1];
2767: rmap_i = rmap[is_no];
2768: cmap_i = cmap[is_no];
2769: mat = (Mat_SeqAIJ*)submats[is_no]->data;
2770: imat_ilen = mat->ilen;
2771: imat_j = mat->j;
2772: imat_i = mat->i;
2773: imat_a = mat->a;
2774: max1 = sbuf1_i[2*j];
2775: for (k=0; k<max1; k++,ct1++) {
2776: row = sbuf1_i[ct1];
2777: row = rmap_i[row];
2778: ilen = imat_ilen[row];
2779: mat_i = imat_i[row];
2780: mat_a = imat_a + mat_i;
2781: mat_j = imat_j + mat_i;
2782: max2 = rbuf2_i[ct1];
2783: for (l=0; l<max2; l++,ct2++) {
2784: if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2785: *mat_j++ = tcol - 1;
2786: *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2787: ilen++;
2788: }
2789: }
2790: imat_ilen[row] = ilen;
2791: }
2792: }
2793: }
2794: }
2795: PetscFree(r_status4);
2796: PetscFree(r_waits4);
2797: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
2798: PetscFree(s_waits4);
2799: PetscFree(s_status4);
2801: /* Restore the indices */
2802: for (i=0; i<ismax; i++) {
2803: ISRestoreIndices(isrow[i],irow+i);
2804: ISRestoreIndices(iscol[i],icol+i);
2805: }
2807: /* Destroy allocated memory */
2808: PetscFree(irow);
2809: PetscFree(w1);
2810: PetscFree(pa);
2812: PetscFree(sbuf1);
2813: PetscFree(rbuf2);
2814: for (i=0; i<nrqr; ++i) {
2815: PetscFree(sbuf2[i]);
2816: }
2817: for (i=0; i<nrqs; ++i) {
2818: PetscFree(rbuf3[i]);
2819: PetscFree(rbuf4[i]);
2820: }
2822: PetscFree(sbuf2);
2823: PetscFree(rbuf3);
2824: PetscFree(rbuf4);
2825: PetscFree(sbuf_aj[0]);
2826: PetscFree(sbuf_aj);
2827: PetscFree(sbuf_aa[0]);
2828: PetscFree(sbuf_aa);
2829:
2830: PetscFree(cmap);
2831: PetscFree(rmap);
2832: PetscFree(lens);
2834: for (i=0; i<ismax; i++) {
2835: MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2836: MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2837: }
2838: return(0);
2839: }
2841: /*
2842: can be optimized by send only non-zeroes in iscol IS -
2843: so prebuild submatrix on sending side including A,B partitioning
2844: */
2847: #include src/vec/is/impls/general/general.h
2848: PetscErrorCode MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2849: {
2850: Mat_MPIRowbs *c = (Mat_MPIRowbs*)C->data;
2851: BSspmat *A = c->A;
2852: BSsprow *Arow;
2853: Mat_SeqAIJ *matA,*matB; /* on prac , off proc part of submat */
2854: Mat_MPIAIJ *mat; /* submat->data */
2856: int *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2857: int *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2858: int i,j,k,l,len,jmax,proc,idx;
2859: int **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2860: FLOAT **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */
2862: int *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2863: int *req_source;
2864: int ncols_t;
2865:
2866:
2867: MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2868: MPI_Request *r_waits4,*s_waits3,*s_waits4;
2869:
2870: MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2871: MPI_Status *r_status3,*r_status4,*s_status4;
2872: MPI_Comm comm;
2876: comm = C->comm;
2877: tag0 = C->tag;
2878: size = c->size;
2879: rank = c->rank;
2881: if (size==1) {
2882: if (scall == MAT_REUSE_MATRIX) {
2883: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2884: return(0);
2885: } else {
2886: Mat *newsubmat;
2887:
2888: ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2889: *submat=*newsubmat;
2890: ierr=PetscFree(newsubmat);
2891: return(0);
2892: }
2893: }
2894:
2895: /* Get some new tags to keep the communication clean */
2896: PetscObjectGetNewTag((PetscObject)C,&tag1);
2897: PetscObjectGetNewTag((PetscObject)C,&tag2);
2898: PetscObjectGetNewTag((PetscObject)C,&tag3);
2900: /* Check if the col indices are sorted */
2901: {PetscTruth sorted;
2902: ISSorted(isrow,&sorted);
2903: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2904: ISSorted(iscol,&sorted);
2905: if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2906: }
2907:
2908: ISGetIndices(isrow,&irow);
2909: ISGetIndices(iscol,&icol);
2910: ISGetLocalSize(isrow,&nrow);
2911: ISGetLocalSize(iscol,&ncol);
2912:
2913: if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2914: if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2915:
2916:
2917: len = (C->M+1)*sizeof(int);
2918: PetscMalloc(len,&rtable);
2919: /* Create hash table for the mapping :row -> proc*/
2920: for (i=0,j=0; i<size; i++) {
2921: jmax = c->rowners[i+1];
2922: for (; j<jmax; j++) {
2923: rtable[j] = i;
2924: }
2925: }
2927: /* evaluate communication - mesg to who, length of mesg, and buffer space
2928: required. Based on this, buffers are allocated, and data copied into them*/
2929: PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2930: w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/
2931: PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2932: for (j=0; j<nrow; j++) {
2933: row_t = irow[j];
2934: proc = rtable[row_t];
2935: w1[proc]++;
2936: }
2937: nrqs = 0; /* no of outgoing messages */
2938: msz = 0; /* total mesg length (for all procs) */
2939: w1[rank] = 0; /* no mesg sent to self */
2940: for (i=0; i<size; i++) {
2941: if (w1[i]) { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2942: }
2943:
2944: PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2945: for (i=0,j=0; i<size; i++) {
2946: if (w1[i]) {
2947: pa[j++] = i;
2948: w1[i]++; /* header for return data */
2949: msz+=w1[i];
2950: }
2951: }
2952:
2953: {int *onodes1,*olengths1;
2954: /* Determine the number of messages to expect, their lengths, from from-ids */
2955: PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2956: PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2957: /* Now post the Irecvs corresponding to these messages */
2958: PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2959: PetscFree(onodes1);
2960: PetscFree(olengths1);
2961: }
2962:
2963: { int **ptr,*iptr,*tmp;
2964: /* Allocate Memory for outgoing messages */
2965: len = 2*size*sizeof(int*) + msz*sizeof(int);
2966: PetscMalloc(len,&sbuf1);
2967: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
2968: PetscMemzero(sbuf1,2*size*sizeof(int*));
2969: /* allocate memory for outgoing data + buf to receive the first reply */
2970: tmp = (int*)(ptr + size);
2972: for (i=0,iptr=tmp; i<nrqs; i++) {
2973: j = pa[i];
2974: sbuf1[j] = iptr;
2975: iptr += w1[j];
2976: }
2978: /* Form the outgoing messages */
2979: for (i=0; i<nrqs; i++) {
2980: j = pa[i];
2981: sbuf1[j][0] = 0; /*header */
2982: ptr[j] = sbuf1[j] + 1;
2983: }
2984:
2985: /* Parse the isrow and copy data into outbuf */
2986: for (j=0; j<nrow; j++) {
2987: row_t = irow[j];
2988: proc = rtable[row_t];
2989: if (proc != rank) { /* copy to the outgoing buf*/
2990: sbuf1[proc][0]++;
2991: *ptr[proc] = row_t;
2992: ptr[proc]++;
2993: }
2994: }
2995: } /* block */
2997: /* Now post the sends */
2998:
2999: /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
3000: * of requested rows)*/
3002: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
3003: for (i=0; i<nrqs; ++i) {
3004: j = pa[i];
3005: MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
3006: }
3008: /* Post Receives to capture the buffer size */
3009: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
3010: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
3011: PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
3012: for (i=1; i<nrqs; ++i) {
3013: rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
3014: }
3015: for (i=0; i<nrqs; ++i) {
3016: j = pa[i];
3017: MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
3018: }
3020: /* Send to other procs the buf size they should allocate */
3021: /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
3022: * (row sizes) */
3024: /* Receive messages*/
3025: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
3026: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
3027: len = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
3028: PetscMalloc(len,&sbuf2);
3029: req_size = (int*)(sbuf2 + nrqr);
3030: req_source = req_size + nrqr;
3031:
3032: {
3033: BSsprow **sAi = A->rows;
3034: int id,rstart = c->rstart;
3035: int *sbuf2_i,*rbuf1_i,end;
3037: for (i=0; i<nrqr; ++i) {
3038: MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
3039: req_size[idx] = 0;
3040: rbuf1_i = rbuf1[idx];
3041: MPI_Get_count(r_status1+i,MPI_INT,&end);
3042: PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
3043: sbuf2_i = sbuf2[idx];
3044: for (j=1; j<end; j++) {
3045: id = rbuf1_i[j] - rstart;
3046: ncols_t = (sAi[id])->length;
3047: sbuf2_i[j] = ncols_t;
3048: req_size[idx] += ncols_t;
3049: }
3050: req_source[idx] = r_status1[i].MPI_SOURCE;
3051: /* form the header */
3052: sbuf2_i[0] = req_size[idx];
3053: MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
3054: }
3055: }
3056: PetscFree(r_status1);
3057: PetscFree(r_waits1);
3059: /* recv buffer sizes */
3060: /* Receive messages*/
3061:
3062: PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
3063: PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
3064: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
3065: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
3066: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);
3068: for (i=0; i<nrqs; ++i) {
3069: MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
3070: PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
3071: PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
3072: MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
3073: MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
3074: }
3075: PetscFree(r_status2);
3076: PetscFree(r_waits2);
3077:
3078: /* Wait on sends1 and sends2 */
3079: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
3080: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);
3082: if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
3083: if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
3084: PetscFree(s_status1);
3085: PetscFree(s_status2);
3086: PetscFree(s_waits1);
3087: PetscFree(s_waits2);
3089: /* Now allocate buffers for a->j, and send them off */
3090: /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3091: * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3092:
3093: PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3094: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3095: PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3096: for (i=1; i<nrqr; i++) sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3097:
3098: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3099: {
3100: int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3101: int rstart = c->rstart;
3103: for (i=0; i<nrqr; i++) {
3104: rbuf1_i = rbuf1[i];
3105: sbuf3_i = sbuf3[i];
3106: noutcols = 0;
3107: kmax = rbuf1_i[0]; /* num. of req. rows */
3108: for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3109: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3110: ncols = Arow->length;
3111: Acol = Arow->col;
3112: /* load the column indices for this row into cols*/
3113: cols = sbuf3_i + noutcols;
3114: PetscMemcpy(cols,Acol,ncols*sizeof(int));
3115: /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3116: noutcols += ncols;
3117: }
3118: MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3119: }
3120: }
3121: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3122: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);
3124: /* Allocate buffers for a->a, and send them off */
3125: /* can be optimized by conect with previous block */
3126: PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3127: for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3128: PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3129: for (i=1; i<nrqr; i++) sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3130:
3131: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3132: {
3133: FLOAT *Aval,*vals,*sbuf4_i;
3134: int rstart = c->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3135:
3136:
3137: for (i=0; i<nrqr; i++) {
3138: rbuf1_i = rbuf1[i];
3139: sbuf4_i = sbuf4[i];
3140: rqrow = 1;
3141: noutvals = 0;
3142: kmax = rbuf1_i[0]; /* num of req. rows */
3143: for (k=0; k<kmax; k++,rqrow++) {
3144: Arow = A->rows[rbuf1_i[rqrow] - rstart];
3145: ncols = Arow->length;
3146: Aval = Arow->nz;
3147: /* load the column values for this row into vals*/
3148: vals = sbuf4_i+noutvals;
3149: PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3150: noutvals += ncols;
3151: }
3152: MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3153: }
3154: }
3155: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3156: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3157: PetscFree(rbuf1);
3159: /* Form the matrix */
3161: /* create col map */
3162: len = C->N*sizeof(int)+1;
3163: PetscMalloc(len,&cmap);
3164: PetscMemzero(cmap,C->N*sizeof(int));
3165: for (j=0; j<ncol; j++) {
3166: cmap[icol[j]] = j+1;
3167: }
3168:
3169: /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3170: len = C->M*sizeof(int)+1;
3171: PetscMalloc(len,&rmap);
3172: PetscMemzero(rmap,C->M*sizeof(int));
3173: for (j=0; j<nrow; j++) {
3174: rmap[irow[j]] = j;
3175: }
3177: /*
3178: Determine the number of non-zeros in the diagonal and off-diagonal
3179: portions of the matrix in order to do correct preallocation
3180: */
3182: /* first get start and end of "diagonal" columns */
3183: if (csize == PETSC_DECIDE) {
3184: nlocal = ncol/size + ((ncol % size) > rank);
3185: } else {
3186: nlocal = csize;
3187: }
3188: {
3189: int ncols,*cols,olen,dlen,thecol;
3190: int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3191:
3192: MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3193: cstart = cend - nlocal;
3194: if (rank == size - 1 && cend != ncol) {
3195: SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns");
3196: }
3198: PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3199: o_nz = d_nz + nrow;
3200:
3201: /* Update lens from local data */
3202: for (j=0; j<nrow; j++) {
3203: row = irow[j];
3204: proc = rtable[row];
3205: if (proc == rank) {
3206: Arow=A->rows[row-c->rstart];
3207: ncols=Arow->length;
3208: cols=Arow->col;
3209: olen=dlen=0;
3210: for (k=0; k<ncols; k++) {
3211: if ((thecol=cmap[cols[k]])) {
3212: if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3213: else olen++;
3214: }
3215: }
3216: o_nz[j]=olen;
3217: d_nz[j]=dlen;
3218: } else d_nz[j]=o_nz[j]=0;
3219: }
3220: /* Update lens from offproc data and done waits */
3221: /* this will be much simplier after sending only appropriate columns */
3222: for (j=0; j<nrqs;j++) {
3223: MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3224: proc = pa[i];
3225: sbuf1_i = sbuf1[proc];
3226: cidx = 0;
3227: rbuf2_i = rbuf2[i];
3228: rbuf3_i = rbuf3[i];
3229: kmax = sbuf1_i[0]; /*num of rq. rows*/
3230: for (k=1; k<=kmax; k++) {
3231: row = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3232: for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3233: if ((thecol=cmap[rbuf3_i[cidx]])) {
3234:
3235: if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3236: else o_nz[row]++;
3237: }
3238: }
3239: }
3240: }
3241: }
3242: PetscFree(r_status3);
3243: PetscFree(r_waits3);
3244: if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
3245: PetscFree(s_status3);
3246: PetscFree(s_waits3);
3248: if (scall == MAT_INITIAL_MATRIX) {
3249: MatCreate(comm,submat);
3250: MatSetSizes(*submat,nrow,nlocal,PETSC_DECIDE,ncol);
3251: MatSetType(*submat,C->type_name);
3252: MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3253: mat=(Mat_MPIAIJ *)((*submat)->data);
3254: matA=(Mat_SeqAIJ *)(mat->A->data);
3255: matB=(Mat_SeqAIJ *)(mat->B->data);
3256:
3257: } else {
3258: PetscTruth same;
3259: /* folowing code can be optionaly dropped for debuged versions of users
3260: * program, but I don't know PETSc option which can switch off such safety
3261: * tests - in a same way counting of o_nz,d_nz can be droped for REUSE
3262: * matrix */
3263:
3264: PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3265: if (!same) {
3266: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3267: }
3268: if (((*submat)->m != nrow) || ((*submat)->N != ncol)) {
3269: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3270: }
3271: mat=(Mat_MPIAIJ *)((*submat)->data);
3272: matA=(Mat_SeqAIJ *)(mat->A->data);
3273: matB=(Mat_SeqAIJ *)(mat->B->data);
3274: PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3275: if (!same) {
3276: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3277: }
3278: PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3279: if (!same) {
3280: SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3281: }
3282: /* Initial matrix as if empty */
3283: PetscMemzero(matA->ilen,nrow*sizeof(int));
3284: PetscMemzero(matB->ilen,nrow*sizeof(int));
3285: /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3286: * delete all possibly nonactual inforamtion */
3287: /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3288: * copy some factor information - where are thay */
3289: (*submat)->was_assembled=PETSC_FALSE;
3290: (*submat)->assembled=PETSC_FALSE;
3291:
3292: }
3293: PetscFree(d_nz);
3295: /* Assemble the matrix */
3296: /* First assemble from local rows */
3297: {
3298: int i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3299: FLOAT *vals;
3300: PetscScalar *matA_a,*matB_a;
3301:
3302: for (j=0; j<nrow; j++) {
3303: oldrow = irow[j];
3304: proc = rtable[oldrow];
3305: if (proc == rank) {
3306: row = rmap[oldrow];
3307:
3308: Arow = A->rows[oldrow-c->rstart];
3309: ncols = Arow->length;
3310: cols = Arow->col;
3311: vals = Arow->nz;
3312:
3313: i_row = matA->i[row];
3314: matA_a = matA->a + i_row;
3315: matA_j = matA->j + i_row;
3316: i_row = matB->i[row];
3317: matB_a = matB->a + i_row;
3318: matB_j = matB->j + i_row;
3319: for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3320: if ((tcol = cmap[cols[k]])) {
3321: if (tcol<=cstart) {
3322: *matB_j++ = tcol-1;
3323: *matB_a++ = vals[k];
3324: ilenB++;
3325: } else if (tcol<=cend) {
3326: *matA_j++ = (tcol-1)-cstart;
3327: *matA_a++ = (PetscScalar)(vals[k]);
3328: ilenA++;
3329: } else {
3330: *matB_j++ = tcol-1;
3331: *matB_a++ = vals[k];
3332: ilenB++;
3333: }
3334: }
3335: }
3336: matA->ilen[row]=ilenA;
3337: matB->ilen[row]=ilenB;
3338:
3339: }
3340: }
3341: }
3343: /* Now assemble the off proc rows*/
3344: {
3345: int *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3346: int *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3347: PetscScalar *matA_a,*matB_a;
3348: FLOAT *rbuf4_i;
3350: for (j=0; j<nrqs; j++) {
3351: MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3352: proc = pa[i];
3353: sbuf1_i = sbuf1[proc];
3354:
3355: cidx = 0;
3356: rbuf2_i = rbuf2[i];
3357: rbuf3_i = rbuf3[i];
3358: rbuf4_i = rbuf4[i];
3359: kmax = sbuf1_i[0];
3360: for (k=1; k<=kmax; k++) {
3361: row = rmap[sbuf1_i[k]];
3362:
3363: i_row = matA->i[row];
3364: matA_a = matA->a + i_row;
3365: matA_j = matA->j + i_row;
3366: i_row = matB->i[row];
3367: matB_a = matB->a + i_row;
3368: matB_j = matB->j + i_row;
3369:
3370: lmax = rbuf2_i[k];
3371: for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3372: if ((tcol = cmap[rbuf3_i[cidx]])) {
3373: if (tcol<=cstart) {
3374: *matB_j++ = tcol-1;
3375: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3376: ilenB++;
3377: } else if (tcol<=cend) {
3378: *matA_j++ = (tcol-1)-cstart;
3379: *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3380: ilenA++;
3381: } else {
3382: *matB_j++ = tcol-1;
3383: *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3384: ilenB++;
3385: }
3386: }
3387: }
3388: matA->ilen[row]=ilenA;
3389: matB->ilen[row]=ilenB;
3390: }
3391: }
3392: }
3394: PetscFree(r_status4);
3395: PetscFree(r_waits4);
3396: if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
3397: PetscFree(s_waits4);
3398: PetscFree(s_status4);
3400: /* Restore the indices */
3401: ISRestoreIndices(isrow,&irow);
3402: ISRestoreIndices(iscol,&icol);
3404: /* Destroy allocated memory */
3405: PetscFree(rtable);
3406: PetscFree(w1);
3407: PetscFree(pa);
3409: PetscFree(sbuf1);
3410: PetscFree(rbuf2[0]);
3411: PetscFree(rbuf2);
3412: for (i=0; i<nrqr; ++i) {
3413: PetscFree(sbuf2[i]);
3414: }
3415: for (i=0; i<nrqs; ++i) {
3416: PetscFree(rbuf3[i]);
3417: PetscFree(rbuf4[i]);
3418: }
3420: PetscFree(sbuf2);
3421: PetscFree(rbuf3);
3422: PetscFree(rbuf4);
3423: PetscFree(sbuf3[0]);
3424: PetscFree(sbuf3);
3425: PetscFree(sbuf4[0]);
3426: PetscFree(sbuf4);
3427:
3428: PetscFree(cmap);
3429: PetscFree(rmap);
3432: MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3433: MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);
3436: return(0);
3437: }