Actual source code: mpirowbs.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

  5: #define CHUNCKSIZE_LOCAL   10

  9: static PetscErrorCode MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
 10: {

 14:   if (v) {
 15: #if defined(PETSC_USE_LOG)
 16:     int len = -n*(sizeof(int)+sizeof(PetscScalar));
 17: #endif
 18:     PetscFree(v);
 19:     PetscLogObjectMemory(A,len);
 20:   }
 21:   return(0);
 22: }

 26: static PetscErrorCode MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
 27: {
 29:   int len;

 32:   if (!n) {
 33:     *i = 0; *v = 0;
 34:   } else {
 35:     len = n*(sizeof(int) + sizeof(PetscScalar));
 36:     PetscMalloc(len,v);
 37:     PetscLogObjectMemory(A,len);
 38:     *i = (int*)(*v + n);
 39:   }
 40:   return(0);
 41: }

 45: PetscErrorCode MatScale_MPIRowbs(Mat inA,PetscScalar alpha)
 46: {
 47:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)inA->data;
 48:   BSspmat        *A = a->A;
 49:   BSsprow        *vs;
 50:   PetscScalar    *ap;
 51:   int            i,m = inA->m,nrow,j;

 55:   for (i=0; i<m; i++) {
 56:     vs   = A->rows[i];
 57:     nrow = vs->length;
 58:     ap   = vs->nz;
 59:     for (j=0; j<nrow; j++) {
 60:       ap[j] *= alpha;
 61:     }
 62:   }
 63:   PetscLogFlops(a->nz);
 64:   return(0);
 65: }

 67: /* ----------------------------------------------------------------- */
 70: static PetscErrorCode MatCreateMPIRowbs_local(Mat A,int nz,const int nnz[])
 71: {
 72:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
 74:   int   i,len,m = A->m,*tnnz;
 75:   BSspmat      *bsmat;
 76:   BSsprow      *vs;

 79:   PetscMalloc((m+1)*sizeof(int),&tnnz);
 80:   if (!nnz) {
 81:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
 82:     if (nz <= 0)             nz = 1;
 83:     for (i=0; i<m; i++) tnnz[i] = nz;
 84:     nz      = nz*m;
 85:   } else {
 86:     nz = 0;
 87:     for (i=0; i<m; i++) {
 88:       if (nnz[i] <= 0) tnnz[i] = 1;
 89:       else             tnnz[i] = nnz[i];
 90:       nz += tnnz[i];
 91:     }
 92:   }

 94:   /* Allocate BlockSolve matrix context */
 95:   PetscNew(BSspmat,&bsif->A);
 96:   bsmat = bsif->A;
 97:   BSset_mat_icc_storage(bsmat,PETSC_FALSE);
 98:   BSset_mat_symmetric(bsmat,PETSC_FALSE);
 99:   len                    = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
100:   PetscMalloc(len,&bsmat->rows);
101:   bsmat->num_rows        = m;
102:   bsmat->global_num_rows = A->M;
103:   bsmat->map             = bsif->bsmap;
104:   vs                     = (BSsprow*)(bsmat->rows + m);
105:   for (i=0; i<m; i++) {
106:     bsmat->rows[i]  = vs;
107:     bsif->imax[i]   = tnnz[i];
108:     vs->diag_ind    = -1;
109:     MatMallocRowbs_Private(A,tnnz[i],&(vs->col),&(vs->nz));
110:     /* put zero on diagonal */
111:     /*vs->length            = 1;
112:     vs->col[0]      = i + bsif->rstart;
113:     vs->nz[0]       = 0.0;*/
114:     vs->length = 0;
115:     vs++;
116:   }
117:   PetscLogObjectMemory(A,sizeof(BSspmat) + len);
118:   bsif->nz               = 0;
119:   bsif->maxnz            = nz;
120:   bsif->sorted           = 0;
121:   bsif->roworiented      = PETSC_TRUE;
122:   bsif->nonew            = 0;
123:   bsif->bs_color_single  = 0;

125:   PetscFree(tnnz);
126:   return(0);
127: }

131: static PetscErrorCode MatSetValues_MPIRowbs_local(Mat AA,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
132: {
133:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
134:   BSspmat      *A = mat->A;
135:   BSsprow      *vs;
137:   int          *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax;
138:   int          *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
139:   PetscScalar  *ap,value;

142:   for (k=0; k<m; k++) { /* loop over added rows */
143:     row = im[k];
144:     if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row);
145:     if (row >= AA->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,AA->m-1);
146:     vs   = A->rows[row];
147:     ap   = vs->nz; rp = vs->col;
148:     rmax = imax[row]; nrow = vs->length;
149:     a    = 0;
150:     for (l=0; l<n; l++) { /* loop over added columns */
151:       if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative col: %d",in[l]);
152:       if (in[l] >= AA->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],AA->N-1);
153:       col = in[l]; value = *v++;
154:       if (!sorted) a = 0; b = nrow;
155:       while (b-a > 5) {
156:         t = (b+a)/2;
157:         if (rp[t] > col) b = t;
158:         else             a = t;
159:       }
160:       for (i=a; i<b; i++) {
161:         if (rp[i] > col) break;
162:         if (rp[i] == col) {
163:           if (addv == ADD_VALUES) ap[i] += value;
164:           else                    ap[i] = value;
165:           goto noinsert;
166:         }
167:       }
168:       if (nonew) goto noinsert;
169:       if (nrow >= rmax) {
170:         /* there is no extra room in row, therefore enlarge */
171:         int    *itemp,*iout,*iin = vs->col;
172:         PetscScalar *vout,*vin = vs->nz,*vtemp;

174:         /* malloc new storage space */
175:         imax[row] += CHUNCKSIZE_LOCAL;
176:         MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
177:         vout = vtemp; iout = itemp;
178:         for (ii=0; ii<i; ii++) {
179:           vout[ii] = vin[ii];
180:           iout[ii] = iin[ii];
181:         }
182:         vout[i] = value;
183:         iout[i] = col;
184:         for (ii=i+1; ii<=nrow; ii++) {
185:           vout[ii] = vin[ii-1];
186:           iout[ii] = iin[ii-1];
187:         }
188:         /* free old row storage */
189:         if (rmax > 0) {
190:           MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
191:         }
192:         vs->col           =  iout; vs->nz = vout;
193:         rmax              =  imax[row];
194:         mat->maxnz        += CHUNCKSIZE_LOCAL;
195:         mat->reallocs++;
196:       } else {
197:         /* shift higher columns over to make room for newie */
198:         for (ii=nrow-1; ii>=i; ii--) {
199:           rp[ii+1] = rp[ii];
200:           ap[ii+1] = ap[ii];
201:         }
202:         rp[i] = col;
203:         ap[i] = value;
204:       }
205:       nrow++;
206:       mat->nz++;
207:       AA->same_nonzero = PETSC_FALSE;
208:       noinsert:;
209:       a = i + 1;
210:     }
211:     vs->length = nrow;
212:   }
213:   return(0);
214: }


219: static PetscErrorCode MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
220: {
222:   return(0);
223: }

227: static PetscErrorCode MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
228: {
229:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
230:   BSspmat      *A = a->A;
231:   BSsprow      *vs;
232:   int          i,j,rstart = a->rstart;

235:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

237:   /* Mark location of diagonal */
238:   for (i=0; i<AA->m; i++) {
239:     vs = A->rows[i];
240:     for (j=0; j<vs->length; j++) {
241:       if (vs->col[j] == i + rstart) {
242:         vs->diag_ind = j;
243:         break;
244:       }
245:     }
246:     if (vs->diag_ind == -1) {
247:       SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
248:     }
249:   }
250:   return(0);
251: }

255: static PetscErrorCode MatZeroRows_MPIRowbs_local(Mat A,PetscInt N,const PetscInt rz[],PetscScalar diag)
256: {
257:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
258:   BSspmat      *l = a->A;
260:   int          i,m = A->m - 1,col,base=a->rowners[a->rank];

263:   if (a->keepzeroedrows) {
264:     for (i=0; i<N; i++) {
265:       if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
266:       PetscMemzero(l->rows[rz[i]]->nz,l->rows[rz[i]]->length*sizeof(PetscScalar));
267:       if (diag != 0.0) {
268:         col=rz[i]+base;
269:         MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
270:       }
271:     }
272:   } else {
273:     if (diag != 0.0) {
274:       for (i=0; i<N; i++) {
275:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
276:         if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
277:           l->rows[rz[i]]->length = 1;
278:           l->rows[rz[i]]->nz[0]  = diag;
279:           l->rows[rz[i]]->col[0] = a->rstart + rz[i];
280:         } else {
281:           col=rz[i]+base;
282:           MatSetValues_MPIRowbs_local(A,1,&rz[i],1,&col,&diag,INSERT_VALUES);
283:         }
284:       }
285:     } else {
286:       for (i=0; i<N; i++) {
287:         if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
288:         l->rows[rz[i]]->length = 0;
289:       }
290:     }
291:     A->same_nonzero = PETSC_FALSE;
292:   }
293:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
294:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
295:   return(0);
296: }

300: static PetscErrorCode MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
301: {
302:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
303:   BSsprow      *vs,**rs;
304:   PetscScalar  *xv;
305:   PetscReal    sum = 0.0;
307:   int          *xi,nz,i,j;

310:   rs = mat->A->rows;
311:   if (type == NORM_FROBENIUS) {
312:     for (i=0; i<A->m; i++) {
313:       vs = *rs++;
314:       nz = vs->length;
315:       xv = vs->nz;
316:       while (nz--) {
317: #if defined(PETSC_USE_COMPLEX)
318:         sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
319: #else
320:         sum += (*xv)*(*xv); xv++;
321: #endif
322:       }
323:     }
324:     *norm = sqrt(sum);
325:   } else if (type == NORM_1) { /* max column norm */
326:     PetscReal *tmp;
327:     PetscMalloc(A->n*sizeof(PetscReal),&tmp);
328:     PetscMemzero(tmp,A->n*sizeof(PetscReal));
329:     *norm = 0.0;
330:     for (i=0; i<A->m; i++) {
331:       vs = *rs++;
332:       nz = vs->length;
333:       xi = vs->col;
334:       xv = vs->nz;
335:       while (nz--) {
336:         tmp[*xi] += PetscAbsScalar(*xv);
337:         xi++; xv++;
338:       }
339:     }
340:     for (j=0; j<A->n; j++) {
341:       if (tmp[j] > *norm) *norm = tmp[j];
342:     }
343:     PetscFree(tmp);
344:   } else if (type == NORM_INFINITY) { /* max row norm */
345:     *norm = 0.0;
346:     for (i=0; i<A->m; i++) {
347:       vs = *rs++;
348:       nz = vs->length;
349:       xv = vs->nz;
350:       sum = 0.0;
351:       while (nz--) {
352:         sum += PetscAbsScalar(*xv); xv++;
353:       }
354:       if (sum > *norm) *norm = sum;
355:     }
356:   } else {
357:     SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
358:   }
359:   return(0);
360: }

362: /* ----------------------------------------------------------------- */

366: PetscErrorCode MatSetValues_MPIRowbs(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode av)
367: {
368:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
370:   int   i,j,row,col,rstart = a->rstart,rend = a->rend;
371:   PetscTruth   roworiented = a->roworiented;

374:   /* Note:  There's no need to "unscale" the matrix, since scaling is
375:      confined to a->pA, and we're working with a->A here */
376:   for (i=0; i<m; i++) {
377:     if (im[i] < 0) continue;
378:     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
379:     if (im[i] >= rstart && im[i] < rend) {
380:       row = im[i] - rstart;
381:       for (j=0; j<n; j++) {
382:         if (in[j] < 0) continue;
383:         if (in[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);
384:         if (in[j] >= 0 && in[j] < mat->N){
385:           col = in[j];
386:           if (roworiented) {
387:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
388:           } else {
389:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
390:           }
391:         } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
392:       }
393:     } else {
394:       if (!a->donotstash) {
395:         if (roworiented) {
396:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
397:         } else {
398:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
399:         }
400:       }
401:     }
402:   }
403:   return(0);
404: }

408: PetscErrorCode MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
409: {
410:   Mat_MPIRowbs  *a = (Mat_MPIRowbs*)mat->data;
411:   MPI_Comm      comm = mat->comm;
413:   int         nstash,reallocs;
414:   InsertMode    addv;

417:   /* Note:  There's no need to "unscale" the matrix, since scaling is
418:             confined to a->pA, and we're working with a->A here */

420:   /* make sure all processors are either in INSERTMODE or ADDMODE */
421:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
422:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
423:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
424:   }
425:   mat->insertmode = addv; /* in case this processor had no cache */

427:   MatStashScatterBegin_Private(&mat->stash,a->rowners);
428:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
429:   PetscLogInfo((0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs));
430:   return(0);
431: }

435: static PetscErrorCode MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
436: {
437:   Mat_MPIRowbs      *a = (Mat_MPIRowbs*)mat->data;
439:   int               i,j;
440:   PetscTruth        iascii;
441:   BSspmat           *A = a->A;
442:   BSsprow           **rs = A->rows;
443:   PetscViewerFormat format;

446:   PetscViewerGetFormat(viewer,&format);
447:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);

449:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
450:     int ind_l,ind_g,clq_l,clq_g,color;
451:     ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
452:     ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
453:     clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
454:     clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
455:     color = BSnum_colors(a->pA);CHKERRBS(0);
456:     PetscViewerASCIIPrintf(viewer,"  %d global inode(s), %d global clique(s), %d color(s)\n",ind_g,clq_g,color);
457:     PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d local inode(s), %d local clique(s)\n",a->rank,ind_l,clq_l);
458:   } else  if (format == PETSC_VIEWER_ASCII_COMMON) {
459:     for (i=0; i<A->num_rows; i++) {
460:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
461:       for (j=0; j<rs[i]->length; j++) {
462:         if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
463:       }
464:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
465:     }
466:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
467:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
468:   } else {
469:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
470:     for (i=0; i<A->num_rows; i++) {
471:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
472:       for (j=0; j<rs[i]->length; j++) {
473:         PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
474:       }
475:       PetscViewerASCIISynchronizedPrintf(viewer,"\n");
476:     }
477:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
478:   }
479:   PetscViewerFlush(viewer);
480:   return(0);
481: }

485: static PetscErrorCode MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
486: {
487:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)mat->data;
489:   PetscMPIInt    rank,size;
490:   PetscInt       i,M,m,*sbuff,*rowlengths;
491:   PetscInt       *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
492:   BSspmat        *A = a->A;
493:   BSsprow        **rs = A->rows;
494:   MPI_Comm       comm = mat->comm;
495:   MPI_Status     status;
496:   PetscScalar    *vals;
497:   MatInfo        info;

500:   MPI_Comm_size(comm,&size);
501:   MPI_Comm_rank(comm,&rank);

503:   M = mat->M; m = mat->m;
504:   /* First gather together on the first processor the lengths of 
505:      each row, and write them out to the file */
506:   PetscMalloc(m*sizeof(int),&sbuff);
507:   for (i=0; i<A->num_rows; i++) {
508:     sbuff[i] = rs[i]->length;
509:   }
510:   MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
511:   if (!rank) {
512:     PetscViewerBinaryGetDescriptor(viewer,&fd);
513:     PetscMalloc((4+M)*sizeof(int),&rowlengths);
514:     PetscMalloc(size*sizeof(int),&recvcts);
515:     recvdisp = a->rowners;
516:     for (i=0; i<size; i++) {
517:       recvcts[i] = recvdisp[i+1] - recvdisp[i];
518:     }
519:     /* first four elements of rowlength are the header */
520:     rowlengths[0] = mat->cookie;
521:     rowlengths[1] = mat->M;
522:     rowlengths[2] = mat->N;
523:     rowlengths[3] = (int)info.nz_used;
524:     MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
525:     PetscFree(sbuff);
526:     PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,PETSC_FALSE);
527:     /* count the number of nonzeros on each processor */
528:     PetscMemzero(recvcts,size*sizeof(int));
529:     for (i=0; i<size; i++) {
530:       for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
531:         recvcts[i] += rowlengths[j+3];
532:       }
533:     }
534:     /* allocate buffer long enough to hold largest one */
535:     maxnz = 0;
536:     for (i=0; i<size; i++) {
537:       maxnz = PetscMax(maxnz,recvcts[i]);
538:     }
539:     PetscFree(rowlengths);
540:     PetscFree(recvcts);
541:     PetscMalloc(maxnz*sizeof(int),&cols);

543:     /* binary store column indices for 0th processor */
544:     nz = 0;
545:     for (i=0; i<A->num_rows; i++) {
546:       for (j=0; j<rs[i]->length; j++) {
547:         cols[nz++] = rs[i]->col[j];
548:       }
549:     }
550:     PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);

552:     /* receive and store column indices for all other processors */
553:     for (i=1; i<size; i++) {
554:       /* should tell processor that I am now ready and to begin the send */
555:       MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
556:       MPI_Get_count(&status,MPI_INT,&nz);
557:       PetscBinaryWrite(fd,cols,nz,PETSC_INT,PETSC_FALSE);
558:     }
559:     PetscFree(cols);
560:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

562:     /* binary store values for 0th processor */
563:     nz = 0;
564:     for (i=0; i<A->num_rows; i++) {
565:       for (j=0; j<rs[i]->length; j++) {
566:         vals[nz++] = rs[i]->nz[j];
567:       }
568:     }
569:     PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);

571:     /* receive and store nonzeros for all other processors */
572:     for (i=1; i<size; i++) {
573:       /* should tell processor that I am now ready and to begin the send */
574:       MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
575:       MPI_Get_count(&status,MPIU_SCALAR,&nz);
576:       PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,PETSC_FALSE);
577:     }
578:     PetscFree(vals);
579:   } else {
580:     MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
581:     PetscFree(sbuff);

583:     /* count local nonzeros */
584:     nz = 0;
585:     for (i=0; i<A->num_rows; i++) {
586:       for (j=0; j<rs[i]->length; j++) {
587:         nz++;
588:       }
589:     }
590:     /* copy into buffer column indices */
591:     PetscMalloc(nz*sizeof(int),&cols);
592:     nz = 0;
593:     for (i=0; i<A->num_rows; i++) {
594:       for (j=0; j<rs[i]->length; j++) {
595:         cols[nz++] = rs[i]->col[j];
596:       }
597:     }
598:     /* send */  /* should wait until processor zero tells me to go */
599:     MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
600:     PetscFree(cols);

602:     /* copy into buffer column values */
603:     PetscMalloc(nz*sizeof(PetscScalar),&vals);
604:     nz   = 0;
605:     for (i=0; i<A->num_rows; i++) {
606:       for (j=0; j<rs[i]->length; j++) {
607:         vals[nz++] = rs[i]->nz[j];
608:       }
609:     }
610:     /* send */  /* should wait until processor zero tells me to go */
611:     MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
612:     PetscFree(vals);
613:   }

615:   return(0);
616: }

620: PetscErrorCode MatView_MPIRowbs(Mat mat,PetscViewer viewer)
621: {
622:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
624:   PetscTruth   iascii,isbinary;

627:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
628:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
629:   if (!bsif->blocksolveassembly) {
630:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
631:   }
632:   if (iascii) {
633:     MatView_MPIRowbs_ASCII(mat,viewer);
634:   } else if (isbinary) {
635:     MatView_MPIRowbs_Binary(mat,viewer);
636:   } else {
637:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
638:   }
639:   return(0);
640: }
641: 
644: static PetscErrorCode MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
645: {
646:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
647:   BSspmat      *A = a->A;
648:   BSsprow      *vs;
649:   int          size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
650:   int          msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,idx,row;
652:   int          ctr_j,*sbuf1_j,k;
653:   PetscScalar  val=0.0;
654:   MPI_Comm     comm;
655:   MPI_Request  *s_waits1,*r_waits1;
656:   MPI_Status   *s_status,*r_status;

659:   comm   = mat->comm;
660:   tag    = mat->tag;
661:   size   = a->size;
662:   rank   = a->rank;
663:   M      = mat->M;
664:   rstart = a->rstart;

666:   PetscMalloc(M*sizeof(int),&rtable);
667:   /* Create hash table for the mapping :row -> proc */
668:   for (i=0,j=0; i<size; i++) {
669:     len = a->rowners[i+1];
670:     for (; j<len; j++) {
671:       rtable[j] = i;
672:     }
673:   }

675:   /* Evaluate communication - mesg to whom, length of mesg, and buffer space
676:      required. Based on this, buffers are allocated, and data copied into them. */
677:   PetscMalloc(size*4*sizeof(int),&w1);/*  mesg size */
678:   w3   = w1 + 2*size;       /* no of IS that needs to be sent to proc i */
679:   w4   = w3 + size;       /* temp work space used in determining w1,  w3 */
680:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */

682:   for (i=0;  i<mat->m; i++) {
683:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
684:     vs = A->rows[i];
685:     for (j=0; j<vs->length; j++) {
686:       proc = rtable[vs->col[j]];
687:       w4[proc]++;
688:     }
689:     for (j=0; j<size; j++) {
690:       if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
691:     }
692:   }
693: 
694:   nrqs       = 0;              /* number of outgoing messages */
695:   msz        = 0;              /* total mesg length (for all proc */
696:   w1[2*rank] = 0;              /* no mesg sent to itself */
697:   w3[rank]   = 0;
698:   for (i=0; i<size; i++) {
699:     if (w1[2*i])  {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
700:   }
701:   /* pa - is list of processors to communicate with */
702:   PetscMalloc((nrqs+1)*sizeof(int),&pa);
703:   for (i=0,j=0; i<size; i++) {
704:     if (w1[2*i]) {pa[j] = i; j++;}
705:   }

707:   /* Each message would have a header = 1 + 2*(no of ROWS) + data */
708:   for (i=0; i<nrqs; i++) {
709:     j       = pa[i];
710:     w1[2*j] += w1[2*j+1] + 2*w3[j];
711:     msz     += w1[2*j];
712:   }
713: 
714:   /* Do a global reduction to determine how many messages to expect */
715:   PetscMaxSum(comm,w1,&bsz,&nrqr);

717:   /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
718:   len      = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
719:   PetscMalloc(len,&rbuf1);
720:   rbuf1[0] = (int*)(rbuf1 + nrqr);
721:   for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

723:   /* Post the receives */
724:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
725:   for (i=0; i<nrqr; ++i){
726:     MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
727:   }
728: 
729:   /* Allocate Memory for outgoing messages */
730:   len   = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
731:   PetscMalloc(len,&sbuf1);
732:   ptr   = sbuf1 + size;     /* Pointers to the data in outgoing buffers */
733:   PetscMemzero(sbuf1,2*size*sizeof(int*));
734:   tmp   = (int*)(sbuf1 + 2*size);
735:   ctr   = tmp + msz;

737:   {
738:     int *iptr = tmp,ict  = 0;
739:     for (i=0; i<nrqs; i++) {
740:       j        = pa[i];
741:       iptr    += ict;
742:       sbuf1[j] = iptr;
743:       ict      = w1[2*j];
744:     }
745:   }

747:   /* Form the outgoing messages */
748:   /* Clean up the header space */
749:   for (i=0; i<nrqs; i++) {
750:     j           = pa[i];
751:     sbuf1[j][0] = 0;
752:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
753:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
754:   }

756:   /* Parse the matrix and copy the data into sbuf1 */
757:   for (i=0; i<mat->m; i++) {
758:     PetscMemzero(ctr,size*sizeof(int));
759:     vs = A->rows[i];
760:     for (j=0; j<vs->length; j++) {
761:       col  = vs->col[j];
762:       proc = rtable[col];
763:       if (proc != rank) { /* copy to the outgoing buffer */
764:         ctr[proc]++;
765:           *ptr[proc] = col;
766:           ptr[proc]++;
767:       } else {
768:         row = col - rstart;
769:         col = i + rstart;
770:         MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
771:       }
772:     }
773:     /* Update the headers for the current row */
774:     for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
775:       if ((ctr_j = ctr[j])) {
776:         sbuf1_j        = sbuf1[j];
777:         k               = ++sbuf1_j[0];
778:         sbuf1_j[2*k]   = ctr_j;
779:         sbuf1_j[2*k-1] = i + rstart;
780:       }
781:     }
782:   }
783:    /* Check Validity of the outgoing messages */
784:   {
785:     int sum;
786:     for (i=0 ; i<nrqs ; i++) {
787:       j = pa[i];
788:       if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!\n"); }
789:     }

791:     for (i=0 ; i<nrqs ; i++) {
792:       j = pa[i];
793:       sum = 1;
794:       for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
795:       if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!\n"); }
796:     }
797:   }
798: 
799:   /* Now post the sends */
800:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
801:   for (i=0; i<nrqs; ++i) {
802:     j    = pa[i];
803:     MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
804:   }
805: 
806:   /* Receive messages*/
807:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
808:   for (i=0; i<nrqr; ++i) {
809:     MPI_Waitany(nrqr,r_waits1,&idx,r_status+i);
810:     /* Process the Message */
811:     {
812:       int    *rbuf1_i,n_row,ct1;

814:       rbuf1_i = rbuf1[idx];
815:       n_row   = rbuf1_i[0];
816:       ct1     = 2*n_row+1;
817:       val     = 0.0;
818:       /* Optimise this later */
819:       for (j=1; j<=n_row; j++) {
820:         col = rbuf1_i[2*j-1];
821:         for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
822:           row = rbuf1_i[ct1] - rstart;
823:           MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
824:         }
825:       }
826:     }
827:   }

829:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
830:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

832:   PetscFree(rtable);
833:   PetscFree(w1);
834:   PetscFree(pa);
835:   PetscFree(rbuf1);
836:   PetscFree(sbuf1);
837:   PetscFree(r_waits1);
838:   PetscFree(s_waits1);
839:   PetscFree(r_status);
840:   PetscFree(s_status);
841:   return(0);
842: }

844: /*
845:      This does the BlockSolve portion of the matrix assembly.
846:    It is provided in a seperate routine so that users can
847:    operate on the matrix (using MatScale(), MatShift() etc.) after 
848:    the matrix has been assembled but before BlockSolve has sucked it
849:    in and devoured it.
850: */
853: PetscErrorCode MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
854: {
855:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
857:   int          ldim,low,high,i;
858:   PetscScalar  *diag;

861:   if ((mat->was_assembled) && (!mat->same_nonzero)) {  /* Free the old info */
862:     if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
863:     if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
864:   }

866:   if ((!mat->same_nonzero) || (!mat->was_assembled)) {
867:     /* Indicates bypassing cliques in coloring */
868:     if (a->bs_color_single) {
869:       BSctx_set_si(a->procinfo,100);
870:     }
871:     /* Form permuted matrix for efficient parallel execution */
872:     a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
873:     /* Set up the communication */
874:     a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
875:   } else {
876:     /* Repermute the matrix */
877:     BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
878:   }

880:   /* Symmetrically scale the matrix by the diagonal */
881:   BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);

883:   /* Store inverse of square root of permuted diagonal scaling matrix */
884:   VecGetLocalSize(a->diag,&ldim);
885:   VecGetOwnershipRange(a->diag,&low,&high);
886:   VecGetArray(a->diag,&diag);
887:   for (i=0; i<ldim; i++) {
888:     if (a->pA->scale_diag[i] != 0.0) {
889:       diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
890:     } else {
891:       diag[i] = 1.0;
892:     }
893:   }
894:   VecRestoreArray(a->diag,&diag);
895:   a->assembled_icc_storage = a->A->icc_storage;
896:   a->blocksolveassembly = 1;
897:   mat->was_assembled    = PETSC_TRUE;
898:   mat->same_nonzero     = PETSC_TRUE;
899:   PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assembly\n"));
900:   return(0);
901: }

905: PetscErrorCode MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
906: {
907:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
909:   int          i,n,row,col,*rows,*cols,rstart,nzcount,flg,j,ncols;
910:   PetscScalar  *vals,val;
911:   InsertMode   addv = mat->insertmode;

914:   while (1) {
915:     MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
916:     if (!flg) break;
917: 
918:     for (i=0; i<n;) {
919:       /* Now identify the consecutive vals belonging to the same row */
920:       for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
921:       if (j < n) ncols = j-i;
922:       else       ncols = n-i;
923:       /* Now assemble all these values with a single function call */
924:       MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
925:       i = j;
926:     }
927:   }
928:   MatStashScatterEnd_Private(&mat->stash);

930:   rstart = a->rstart;
931:   nzcount = a->nz; /* This is the number of nonzeros entered by the user */
932:   /* BlockSolve requires that the matrix is structurally symmetric */
933:   if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
934:     MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
935:   }
936: 
937:   /* BlockSolve requires that all the diagonal elements are set */
938:   val  = 0.0;
939:   for (i=0; i<mat->m; i++) {
940:     row = i; col = i + rstart;
941:     MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
942:   }
943: 
944:   MatAssemblyBegin_MPIRowbs_local(mat,mode);
945:   MatAssemblyEnd_MPIRowbs_local(mat,mode);
946: 
947:   a->blocksolveassembly = 0;
948:   PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d used\n",mat->m,mat->n,a->maxnz-a->nz,a->nz));
949:   PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %d\n",nzcount,a->nz-nzcount));
950:   PetscLogInfo((mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %d\n",a->reallocs));
951:   return(0);
952: }

956: PetscErrorCode MatZeroEntries_MPIRowbs(Mat mat)
957: {
958:   Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
959:   BSspmat      *A = l->A;
960:   BSsprow      *vs;
961:   int          i,j;

964:   for (i=0; i <mat->m; i++) {
965:     vs = A->rows[i];
966:     for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
967:   }
968:   return(0);
969: }

971: /* the code does not do the diagonal entries correctly unless the 
972:    matrix is square and the column and row owerships are identical.
973:    This is a BUG.
974: */

978: PetscErrorCode MatZeroRows_MPIRowbs(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
979: {
980:   Mat_MPIRowbs   *l = (Mat_MPIRowbs*)A->data;
982:   int            i,*owners = l->rowners,size = l->size;
983:   int            *nprocs,j,idx,nsends;
984:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
985:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
986:   int            *lens,imdex,*lrows,*values;
987:   MPI_Comm       comm = A->comm;
988:   MPI_Request    *send_waits,*recv_waits;
989:   MPI_Status     recv_status,*send_status;
990:   PetscTruth     found;

993:   /*  first count number of contributors to each processor */
994:   PetscMalloc(2*size*sizeof(int),&nprocs);
995:   PetscMemzero(nprocs,2*size*sizeof(int));
996:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
997:   for (i=0; i<N; i++) {
998:     idx   = rows[i];
999:     found = PETSC_FALSE;
1000:     for (j=0; j<size; j++) {
1001:       if (idx >= owners[j] && idx < owners[j+1]) {
1002:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1003:       }
1004:     }
1005:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
1006:   }
1007:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

1009:   /* inform other processors of number of messages and max length*/
1010:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

1012:   /* post receives:   */
1013:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1014:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1015:   for (i=0; i<nrecvs; i++) {
1016:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1017:   }

1019:   /* do sends:
1020:       1) starts[i] gives the starting index in svalues for stuff going to 
1021:          the ith processor
1022:   */
1023:   PetscMalloc((N+1)*sizeof(int),&svalues);
1024:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1025:   PetscMalloc((size+1)*sizeof(int),&starts);
1026:   starts[0] = 0;
1027:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1028:   for (i=0; i<N; i++) {
1029:     svalues[starts[owner[i]]++] = rows[i];
1030:   }

1032:   starts[0] = 0;
1033:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1034:   count = 0;
1035:   for (i=0; i<size; i++) {
1036:     if (nprocs[2*i+1]) {
1037:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1038:     }
1039:   }
1040:   PetscFree(starts);

1042:   base = owners[rank];

1044:   /*  wait on receives */
1045:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1046:   source = lens + nrecvs;
1047:   count = nrecvs; slen = 0;
1048:   while (count) {
1049:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1050:     /* unpack receives into our local space */
1051:     MPI_Get_count(&recv_status,MPI_INT,&n);
1052:     source[imdex]  = recv_status.MPI_SOURCE;
1053:     lens[imdex]    = n;
1054:     slen           += n;
1055:     count--;
1056:   }
1057:   PetscFree(recv_waits);
1058: 
1059:   /* move the data into the send scatter */
1060:   PetscMalloc((slen+1)*sizeof(int),&lrows);
1061:   count = 0;
1062:   for (i=0; i<nrecvs; i++) {
1063:     values = rvalues + i*nmax;
1064:     for (j=0; j<lens[i]; j++) {
1065:       lrows[count++] = values[j] - base;
1066:     }
1067:   }
1068:   PetscFree(rvalues);
1069:   PetscFree(lens);
1070:   PetscFree(owner);
1071:   PetscFree(nprocs);
1072: 
1073:   /* actually zap the local rows */
1074:   MatZeroRows_MPIRowbs_local(A,slen,lrows,diag);
1075:   PetscFree(lrows);

1077:   /* wait on sends */
1078:   if (nsends) {
1079:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1080:     MPI_Waitall(nsends,send_waits,send_status);
1081:     PetscFree(send_status);
1082:   }
1083:   PetscFree(send_waits);
1084:   PetscFree(svalues);

1086:   return(0);
1087: }

1091: PetscErrorCode MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1092: {
1093:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1094:   BSsprow      *vs,**rs;
1095:   PetscScalar  *xv;
1096:   PetscReal    sum = 0.0;
1098:   int          *xi,nz,i,j;

1101:   if (a->size == 1) {
1102:     MatNorm_MPIRowbs_local(mat,type,norm);
1103:   } else {
1104:     rs = a->A->rows;
1105:     if (type == NORM_FROBENIUS) {
1106:       for (i=0; i<mat->m; i++) {
1107:         vs = *rs++;
1108:         nz = vs->length;
1109:         xv = vs->nz;
1110:         while (nz--) {
1111: #if defined(PETSC_USE_COMPLEX)
1112:           sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1113: #else
1114:           sum += (*xv)*(*xv); xv++;
1115: #endif
1116:         }
1117:       }
1118:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1119:       *norm = sqrt(*norm);
1120:     } else if (type == NORM_1) { /* max column norm */
1121:       PetscReal *tmp,*tmp2;
1122:       PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1123:       PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1124:       PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1125:       *norm = 0.0;
1126:       for (i=0; i<mat->m; i++) {
1127:         vs = *rs++;
1128:         nz = vs->length;
1129:         xi = vs->col;
1130:         xv = vs->nz;
1131:         while (nz--) {
1132:           tmp[*xi] += PetscAbsScalar(*xv);
1133:           xi++; xv++;
1134:         }
1135:       }
1136:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1137:       for (j=0; j<mat->n; j++) {
1138:         if (tmp2[j] > *norm) *norm = tmp2[j];
1139:       }
1140:       PetscFree(tmp);
1141:       PetscFree(tmp2);
1142:     } else if (type == NORM_INFINITY) { /* max row norm */
1143:       PetscReal ntemp = 0.0;
1144:       for (i=0; i<mat->m; i++) {
1145:         vs = *rs++;
1146:         nz = vs->length;
1147:         xv = vs->nz;
1148:         sum = 0.0;
1149:         while (nz--) {
1150:           sum += PetscAbsScalar(*xv); xv++;
1151:         }
1152:         if (sum > ntemp) ntemp = sum;
1153:       }
1154:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1155:     } else {
1156:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1157:     }
1158:   }
1159:   return(0);
1160: }

1164: PetscErrorCode MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1165: {
1166:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1167:   BSprocinfo   *bspinfo = bsif->procinfo;
1168:   PetscScalar  *xxa,*xworka,*yya;

1172:   if (!bsif->blocksolveassembly) {
1173:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1174:   }

1176:   /* Permute and apply diagonal scaling:  [ xwork = D^{1/2} * x ] */
1177:   if (!bsif->vecs_permscale) {
1178:     VecGetArray(bsif->xwork,&xworka);
1179:     VecGetArray(xx,&xxa);
1180:     BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1181:     VecRestoreArray(bsif->xwork,&xworka);
1182:     VecRestoreArray(xx,&xxa);
1183:     VecPointwiseDivide(xx,bsif->xwork,bsif->diag);
1184:   }

1186:   VecGetArray(xx,&xxa);
1187:   VecGetArray(yy,&yya);
1188:   /* Do lower triangular multiplication:  [ y = L * xwork ] */
1189:   if (bspinfo->single) {
1190:     BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1191:   }  else {
1192:     BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1193:   }
1194: 
1195:   /* Do upper triangular multiplication:  [ y = y + L^{T} * xwork ] */
1196:   if (mat->symmetric) {
1197:     if (bspinfo->single){
1198:       BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1199:     } else {
1200:       BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1201:     }
1202:   }
1203:   /* not needed for ILU version since forward does it all */
1204:   VecRestoreArray(xx,&xxa);
1205:   VecRestoreArray(yy,&yya);

1207:   /* Apply diagonal scaling to vector:  [  y = D^{1/2} * y ] */
1208:   if (!bsif->vecs_permscale) {
1209:     VecGetArray(bsif->xwork,&xworka);
1210:     VecGetArray(xx,&xxa);
1211:     BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1212:     VecRestoreArray(bsif->xwork,&xworka);
1213:     VecRestoreArray(xx,&xxa);
1214:     VecPointwiseDivide(bsif->xwork,yy,bsif->diag);
1215:     VecGetArray(bsif->xwork,&xworka);
1216:     VecGetArray(yy,&yya);
1217:     BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1218:     VecRestoreArray(bsif->xwork,&xworka);
1219:     VecRestoreArray(yy,&yya);
1220:   }
1221:   PetscLogFlops(2*bsif->nz - mat->m);

1223:   return(0);
1224: }

1228: PetscErrorCode MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1229: {
1231:   PetscScalar  one = 1.0;

1234:   (*mat->ops->mult)(mat,xx,zz);
1235:   VecAXPY(zz,one,yy);
1236:   return(0);
1237: }

1241: PetscErrorCode MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1242: {
1243:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1244:   PetscReal    isend[5],irecv[5];

1248:   info->rows_global    = (double)A->M;
1249:   info->columns_global = (double)A->N;
1250:   info->rows_local     = (double)A->m;
1251:   info->columns_local  = (double)A->N;
1252:   info->block_size     = 1.0;
1253:   info->mallocs        = (double)mat->reallocs;
1254:   isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] =  mat->maxnz -  mat->nz;
1255:   isend[3] = A->mem;  isend[4] = info->mallocs;

1257:   if (flag == MAT_LOCAL) {
1258:     info->nz_used      = isend[0];
1259:     info->nz_allocated = isend[1];
1260:     info->nz_unneeded  = isend[2];
1261:     info->memory       = isend[3];
1262:     info->mallocs      = isend[4];
1263:   } else if (flag == MAT_GLOBAL_MAX) {
1264:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1265:     info->nz_used      = irecv[0];
1266:     info->nz_allocated = irecv[1];
1267:     info->nz_unneeded  = irecv[2];
1268:     info->memory       = irecv[3];
1269:     info->mallocs      = irecv[4];
1270:   } else if (flag == MAT_GLOBAL_SUM) {
1271:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1272:     info->nz_used      = irecv[0];
1273:     info->nz_allocated = irecv[1];
1274:     info->nz_unneeded  = irecv[2];
1275:     info->memory       = irecv[3];
1276:     info->mallocs      = irecv[4];
1277:   }
1278:   return(0);
1279: }

1283: PetscErrorCode MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1284: {
1285:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1286:   BSsprow      **rs = a->A->rows;
1288:   int          i,n;
1289:   PetscScalar  *x,zero = 0.0;

1292:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1293:   if (!a->blocksolveassembly) {
1294:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1295:   }

1297:   VecSet(v,zero);
1298:   VecGetLocalSize(v,&n);
1299:   if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1300:   VecGetArray(v,&x);
1301:   for (i=0; i<mat->m; i++) {
1302:     x[i] = rs[i]->nz[rs[i]->diag_ind];
1303:   }
1304:   VecRestoreArray(v,&x);
1305:   return(0);
1306: }

1310: PetscErrorCode MatDestroy_MPIRowbs(Mat mat)
1311: {
1312:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1313:   BSspmat      *A = a->A;
1314:   BSsprow      *vs;
1316:   int          i;

1319: #if defined(PETSC_USE_LOG)
1320:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1321: #endif
1322:   PetscFree(a->rowners);
1323:   MatStashDestroy_Private(&mat->stash);
1324:   if (a->bsmap) {
1325:     if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1326:     if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1327:     if (a->bsmap->vglobal2proc)  (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1328:     PetscFree(a->bsmap);
1329:   }

1331:   if (A) {
1332:     for (i=0; i<mat->m; i++) {
1333:       vs = A->rows[i];
1334:       MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1335:     }
1336:     /* Note: A->map = a->bsmap is freed above */
1337:     PetscFree(A->rows);
1338:     PetscFree(A);
1339:   }
1340:   if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1341:   if (a->diag)     {VecDestroy(a->diag);}
1342:   if (a->xwork)    {VecDestroy(a->xwork);}
1343:   if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
1344:   if (a->fpA)      {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1345:   if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1346:   if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1347:   if (a->imax)     {PetscFree(a->imax);}
1348:   MPI_Comm_free(&(a->comm_mpirowbs));
1349:   PetscFree(a);
1350:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C","",PETSC_NULL);
1351:   return(0);
1352: }

1356: PetscErrorCode MatSetOption_MPIRowbs(Mat A,MatOption op)
1357: {
1358:   Mat_MPIRowbs   *a = (Mat_MPIRowbs*)A->data;

1362:   switch (op) {
1363:   case MAT_ROW_ORIENTED:
1364:     a->roworiented = PETSC_TRUE;
1365:     break;
1366:   case MAT_COLUMN_ORIENTED:
1367:     a->roworiented = PETSC_FALSE;
1368:     break;
1369:   case MAT_COLUMNS_SORTED:
1370:     a->sorted      = 1;
1371:     break;
1372:   case MAT_COLUMNS_UNSORTED:
1373:     a->sorted      = 0;
1374:     break;
1375:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1376:     a->nonew       = 1;
1377:     break;
1378:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1379:     a->nonew       = 0;
1380:     break;
1381:   case MAT_DO_NOT_USE_INODES:
1382:     a->bs_color_single = 1;
1383:     break;
1384:   case MAT_YES_NEW_DIAGONALS:
1385:   case MAT_ROWS_SORTED:
1386:   case MAT_NEW_NONZERO_LOCATION_ERR:
1387:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1388:   case MAT_ROWS_UNSORTED:
1389:   case MAT_USE_HASH_TABLE:
1390:     PetscLogInfo((A,"MatSetOption_MPIRowbs:Option ignored\n"));
1391:     break;
1392:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1393:     a->donotstash = PETSC_TRUE;
1394:     break;
1395:   case MAT_NO_NEW_DIAGONALS:
1396:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1397:     break;
1398:   case MAT_KEEP_ZEROED_ROWS:
1399:     a->keepzeroedrows    = PETSC_TRUE;
1400:     break;
1401:   case MAT_SYMMETRIC:
1402:     BSset_mat_symmetric(a->A,PETSC_TRUE);CHKERRBS(0);
1403:     break;
1404:   case MAT_STRUCTURALLY_SYMMETRIC:
1405:   case MAT_NOT_SYMMETRIC:
1406:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1407:   case MAT_HERMITIAN:
1408:   case MAT_NOT_HERMITIAN:
1409:   case MAT_SYMMETRY_ETERNAL:
1410:   case MAT_NOT_SYMMETRY_ETERNAL:
1411:     break;
1412:   default:
1413:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1414:     break;
1415:   }
1416:   return(0);
1417: }

1421: PetscErrorCode MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1422: {
1423:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1424:   BSspmat      *A = mat->A;
1425:   BSsprow      *rs;
1426: 
1428:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");

1430:   rs  = A->rows[row - mat->rstart];
1431:   *nz = rs->length;
1432:   if (v)   *v   = rs->nz;
1433:   if (idx) *idx = rs->col;
1434:   return(0);
1435: }

1439: PetscErrorCode MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1440: {
1442:   return(0);
1443: }

1445: /* ------------------------------------------------------------------ */

1449: PetscErrorCode MatPrintHelp_MPIRowbs(Mat A)
1450: {
1451:   static PetscTruth called = PETSC_FALSE;
1452:   MPI_Comm          comm = A->comm;

1456:   if (called) {return(0);} else called = PETSC_TRUE;
1457:   (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):\n");
1458:   (*PetscHelpPrintf)(comm,"  -mat_rowbs_no_inode  - Do not use inodes\n");
1459:   return(0);
1460: }

1464: PetscErrorCode MatSetUpPreallocation_MPIRowbs(Mat A)
1465: {

1469:    MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1470:   return(0);
1471: }

1473: /* -------------------------------------------------------------------*/
1474: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1475:        MatGetRow_MPIRowbs,
1476:        MatRestoreRow_MPIRowbs,
1477:        MatMult_MPIRowbs,
1478: /* 4*/ MatMultAdd_MPIRowbs,
1479:        MatMult_MPIRowbs,
1480:        MatMultAdd_MPIRowbs,
1481:        MatSolve_MPIRowbs,
1482:        0,
1483:        0,
1484: /*10*/ 0,
1485:        0,
1486:        0,
1487:        0,
1488:        0,
1489: /*15*/ MatGetInfo_MPIRowbs,
1490:        0,
1491:        MatGetDiagonal_MPIRowbs,
1492:        0,
1493:        MatNorm_MPIRowbs,
1494: /*20*/ MatAssemblyBegin_MPIRowbs,
1495:        MatAssemblyEnd_MPIRowbs,
1496:        0,
1497:        MatSetOption_MPIRowbs,
1498:        MatZeroEntries_MPIRowbs,
1499: /*25*/ MatZeroRows_MPIRowbs,
1500:        0,
1501:        MatLUFactorNumeric_MPIRowbs,
1502:        0,
1503:        MatCholeskyFactorNumeric_MPIRowbs,
1504: /*30*/ MatSetUpPreallocation_MPIRowbs,
1505:        MatILUFactorSymbolic_MPIRowbs,
1506:        MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1507:        0,
1508:        0,
1509: /*35*/ 0,
1510:        MatForwardSolve_MPIRowbs,
1511:        MatBackwardSolve_MPIRowbs,
1512:        0,
1513:        0,
1514: /*40*/ 0,
1515:        MatGetSubMatrices_MPIRowbs,
1516:        0,
1517:        0,
1518:        0,
1519: /*45*/ MatPrintHelp_MPIRowbs,
1520:        MatScale_MPIRowbs,
1521:        0,
1522:        0,
1523:        0,
1524: /*50*/ 0,
1525:        0,
1526:        0,
1527:        0,
1528:        0,
1529: /*55*/ 0,
1530:        0,
1531:        0,
1532:        0,
1533:        0,
1534: /*60*/ MatGetSubMatrix_MPIRowbs,
1535:        MatDestroy_MPIRowbs,
1536:        MatView_MPIRowbs,
1537:        MatGetPetscMaps_Petsc,
1538:        MatUseScaledForm_MPIRowbs,
1539: /*65*/ MatScaleSystem_MPIRowbs,
1540:        MatUnScaleSystem_MPIRowbs,
1541:        0,
1542:        0,
1543:        0,
1544: /*70*/ 0,
1545:        0,
1546:        0,
1547:        0,
1548:        0,
1549: /*75*/ 0,
1550:        0,
1551:        0,
1552:        0,
1553:        0,
1554: /*80*/ 0,
1555:        0,
1556:        0,
1557:        0,
1558:        MatLoad_MPIRowbs,
1559: /*85*/ 0,
1560:        0,
1561:        0,
1562:        0,
1563:        0,
1564: /*90*/ 0,
1565:        0,
1566:        0,
1567:        0,
1568:        0,
1569: /*95*/ 0,
1570:        0,
1571:        0,
1572:        0};

1574: /* ------------------------------------------------------------------- */

1579: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation_MPIRowbs(Mat mat,int nz,const int nnz[])
1580: {

1584:   mat->preallocated = PETSC_TRUE;
1585:   MatCreateMPIRowbs_local(mat,nz,nnz);
1586:   return(0);
1587: }

1590: /*MC
1591:    MATMPIROWBS - MATMPIROWBS = "mpirowbs" - A matrix type providing ILU and ICC for distributed sparse matrices for use
1592:    with the external package BlockSolve95.  If BlockSolve95 is installed (see the manual for instructions
1593:    on how to declare the existence of external packages), a matrix type can be constructed which invokes
1594:    BlockSolve95 preconditioners and solvers. 

1596:    Options Database Keys:
1597: . -mat_type mpirowbs - sets the matrix type to "mpirowbs" during a call to MatSetFromOptions()

1599:   Level: beginner

1601: .seealso: MatCreateMPIRowbs
1602: M*/

1607: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIRowbs(Mat A)
1608: {
1609:   Mat_MPIRowbs *a;
1610:   BSmapping    *bsmap;
1611:   BSoff_map    *bsoff;
1613:   int          i,*offset,m,M;
1614:   PetscTruth   flg1,flg2,flg3;
1615:   BSprocinfo   *bspinfo;
1616:   MPI_Comm     comm;
1617: 
1619:   comm = A->comm;
1620:   m    = A->m;
1621:   M    = A->M;

1623:   PetscNew(Mat_MPIRowbs,&a);
1624:   A->data               = (void*)a;
1625:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1626:   A->factor             = 0;
1627:   A->mapping            = 0;
1628:   a->vecs_permscale     = PETSC_FALSE;
1629:   A->insertmode         = NOT_SET_VALUES;
1630:   a->blocksolveassembly = 0;
1631:   a->keepzeroedrows     = PETSC_FALSE;

1633:   MPI_Comm_rank(comm,&a->rank);
1634:   MPI_Comm_size(comm,&a->size);

1636:   PetscSplitOwnership(comm,&m,&M);

1638:   A->N = M;
1639:   A->M = M;
1640:   A->m = m;
1641:   A->n = A->N;  /* each row stores all columns */
1642:   PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1643:   a->reallocs                      = 0;

1645:   /* the information in the maps duplicates the information computed below, eventually 
1646:      we should remove the duplicate information that is not contained in the maps */
1647:   PetscMapCreateMPI(comm,m,M,&A->rmap);
1648:   PetscMapCreateMPI(comm,m,M,&A->cmap);

1650:   /* build local table of row ownerships */
1651:   PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1652:   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1653:   a->rowners[0] = 0;
1654:   for (i=2; i<=a->size; i++) {
1655:     a->rowners[i] += a->rowners[i-1];
1656:   }
1657:   a->rstart = a->rowners[a->rank];
1658:   a->rend   = a->rowners[a->rank+1];
1659:   PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));

1661:   /* build cache for off array entries formed */
1662:   MatStashCreate_Private(A->comm,1,&A->stash);
1663:   a->donotstash = PETSC_FALSE;

1665:   /* Initialize BlockSolve information */
1666:   a->A              = 0;
1667:   a->pA              = 0;
1668:   a->comm_pA  = 0;
1669:   a->fpA      = 0;
1670:   a->comm_fpA = 0;
1671:   a->alpha    = 1.0;
1672:   a->0;
1673:   a->failures = 0;
1674:   MPI_Comm_dup(A->comm,&(a->comm_mpirowbs));
1675:   VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1676:   VecDuplicate(a->diag,&(a->xwork));
1677:   PetscLogObjectParent(A,a->diag);  PetscLogObjectParent(A,a->xwork);
1678:   PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1679:   bspinfo = BScreate_ctx();CHKERRBS(0);
1680:   a->procinfo = bspinfo;
1681:   BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1682:   BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1683:   BSctx_set_ps(bspinfo,a->comm_mpirowbs);CHKERRBS(0);
1684:   BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1685:   BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1686:   BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1687: #if defined(PETSC_USE_DEBUG)
1688:   BSctx_set_err(bspinfo,1);CHKERRBS(0);  /* BS error checking */
1689: #endif
1690:   BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1691:   PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1692:   if (flg1) {
1693:     BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1694:   }
1695:   PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1696:   PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1697:   PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1698:   if (flg1 || flg2 || flg3) {
1699:     BSctx_set_si(bspinfo,1);CHKERRBS(0);
1700:   } else {
1701:     BSctx_set_si(bspinfo,0);CHKERRBS(0);
1702:   }
1703: #if defined(PETSC_USE_LOG)
1704:   MLOG_INIT();  /* Initialize logging */
1705: #endif

1707:   /* Compute global offsets */
1708:   offset = &a->rstart;

1710:   PetscNew(BSmapping,&a->bsmap);
1711:   PetscLogObjectMemory(A,sizeof(BSmapping));
1712:   bsmap = a->bsmap;
1713:   PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1714:   *((int*)bsmap->vlocal2global) = (*offset);
1715:   bsmap->flocal2global                 = BSloc2glob;
1716:   bsmap->free_l2g                = 0;
1717:   PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1718:   *((int*)bsmap->vglobal2local) = (*offset);
1719:   bsmap->fglobal2local                 = BSglob2loc;
1720:   bsmap->free_g2l                 = 0;
1721:   bsoff                          = BSmake_off_map(*offset,bspinfo,A->M);
1722:   bsmap->vglobal2proc                 = (void*)bsoff;
1723:   bsmap->fglobal2proc                 = BSglob2proc;
1724:   bsmap->free_g2p                = (void(*)(void*)) BSfree_off_map;
1725:   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIRowbsSetPreallocation_C",
1726:                                     "MatMPIRowbsSetPreallocation_MPIRowbs",
1727:                                      MatMPIRowbsSetPreallocation_MPIRowbs);
1728:   return(0);
1729: }

1734: /* @
1735:   MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros 
1736:   per row in the matrix.

1738:   Input Parameter:
1739: +  mat - matrix
1740: .  nz - maximum expected for any row
1741: -  nzz - number expected in each row

1743:   Note:
1744:   This routine is valid only for matrices stored in the MATMPIROWBS
1745:   format.
1746: @ */
1747: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsSetPreallocation(Mat mat,int nz,const int nnz[])
1748: {
1749:   PetscErrorCode ierr,(*f)(Mat,int,const int[]);

1752:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIRowbsSetPreallocation_C",(void (**)(void))&f);
1753:   if (f) {
1754:     (*f)(mat,nz,nnz);
1755:   }
1756:   return(0);
1757: }

1759: /* --------------- extra BlockSolve-specific routines -------------- */
1762: /* @
1763:   MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1764:   user can then manipulate to alter the default parameters.

1766:   Input Parameter:
1767:   mat - matrix

1769:   Output Parameter:
1770:   procinfo - processor information context

1772:   Note:
1773:   This routine is valid only for matrices stored in the MATMPIROWBS
1774:   format.
1775: @ */
1776: PetscErrorCode PETSCMAT_DLLEXPORT MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1777: {
1778:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1779:   PetscTruth   ismpirowbs;

1783:   PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1784:   if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1785:   procinfo = a->procinfo;
1786:   return(0);
1787: }

1791: PetscErrorCode MatLoad_MPIRowbs(PetscViewer viewer,const MatType type,Mat *newmat)
1792: {
1793:   Mat_MPIRowbs *a;
1794:   BSspmat      *A;
1795:   BSsprow      **rs;
1796:   Mat          mat;
1798:   int          i,nz,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1799:   int          header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1800:   PetscScalar  *vals;
1801:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1802:   MPI_Status   status;

1805:   MPI_Comm_size(comm,&size);
1806:   MPI_Comm_rank(comm,&rank);
1807:   if (!rank) {
1808:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1809:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1810:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1811:     if (header[3] < 0) {
1812:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1813:     }
1814:   }

1816:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1817:   M = header[1];
1818:   /* determine ownership of all rows */
1819:   m          = M/size + ((M % size) > rank);
1820:   PetscMalloc((size+2)*sizeof(int),&rowners);
1821:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1822:   rowners[0] = 0;
1823:   for (i=2; i<=size; i++) {
1824:     rowners[i] += rowners[i-1];
1825:   }
1826:   rstart = rowners[rank];
1827:   rend   = rowners[rank+1];

1829:   /* distribute row lengths to all processors */
1830:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1831:   if (!rank) {
1832:     PetscMalloc(M*sizeof(int),&rowlengths);
1833:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1834:     PetscMalloc(size*sizeof(int),&sndcounts);
1835:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1836:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1837:     PetscFree(sndcounts);
1838:   } else {
1839:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1840:   }

1842:   /* create our matrix */
1843:   MatCreate(comm,newmat);
1844:   MatSetSizes(*newmat,m,m,M,M);
1845:   MatSetType(*newmat,type);
1846:   MatMPIRowbsSetPreallocation(*newmat,0,ourlens);
1847:   mat = *newmat;
1848:   PetscFree(ourlens);

1850:   a = (Mat_MPIRowbs*)mat->data;
1851:   A = a->A;
1852:   rs = A->rows;

1854:   if (!rank) {
1855:     /* calculate the number of nonzeros on each processor */
1856:     PetscMalloc(size*sizeof(int),&procsnz);
1857:     PetscMemzero(procsnz,size*sizeof(int));
1858:     for (i=0; i<size; i++) {
1859:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1860:         procsnz[i] += rowlengths[j];
1861:       }
1862:     }
1863:     PetscFree(rowlengths);

1865:     /* determine max buffer needed and allocate it */
1866:     maxnz = 0;
1867:     for (i=0; i<size; i++) {
1868:       maxnz = PetscMax(maxnz,procsnz[i]);
1869:     }
1870:     PetscMalloc(maxnz*sizeof(int),&cols);

1872:     /* read in my part of the matrix column indices  */
1873:     nz = procsnz[0];
1874:     PetscBinaryRead(fd,cols,nz,PETSC_INT);
1875: 
1876:     /* insert it into my part of matrix */
1877:     nz = 0;
1878:     for (i=0; i<A->num_rows; i++) {
1879:       for (j=0; j<a->imax[i]; j++) {
1880:         rs[i]->col[j] = cols[nz++];
1881:       }
1882:       rs[i]->length = a->imax[i];
1883:     }
1884:     /* read in parts for all other processors */
1885:     for (i=1; i<size; i++) {
1886:       nz   = procsnz[i];
1887:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1888:       MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1889:     }
1890:     PetscFree(cols);
1891:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1893:     /* read in my part of the matrix numerical values  */
1894:     nz   = procsnz[0];
1895:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1896: 
1897:     /* insert it into my part of matrix */
1898:     nz = 0;
1899:     for (i=0; i<A->num_rows; i++) {
1900:       for (j=0; j<a->imax[i]; j++) {
1901:         rs[i]->nz[j] = vals[nz++];
1902:       }
1903:     }
1904:     /* read in parts for all other processors */
1905:     for (i=1; i<size; i++) {
1906:       nz   = procsnz[i];
1907:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1908:       MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1909:     }
1910:     PetscFree(vals);
1911:     PetscFree(procsnz);
1912:   } else {
1913:     /* determine buffer space needed for message */
1914:     nz = 0;
1915:     for (i=0; i<A->num_rows; i++) {
1916:       nz += a->imax[i];
1917:     }
1918:     PetscMalloc(nz*sizeof(int),&cols);

1920:     /* receive message of column indices*/
1921:     MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1922:     MPI_Get_count(&status,MPI_INT,&maxnz);
1923:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1925:     /* insert it into my part of matrix */
1926:     nz = 0;
1927:     for (i=0; i<A->num_rows; i++) {
1928:       for (j=0; j<a->imax[i]; j++) {
1929:         rs[i]->col[j] = cols[nz++];
1930:       }
1931:       rs[i]->length = a->imax[i];
1932:     }
1933:     PetscFree(cols);
1934:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1936:     /* receive message of values*/
1937:     MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1938:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1939:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1941:     /* insert it into my part of matrix */
1942:     nz = 0;
1943:     for (i=0; i<A->num_rows; i++) {
1944:       for (j=0; j<a->imax[i]; j++) {
1945:         rs[i]->nz[j] = vals[nz++];
1946:       }
1947:       rs[i]->length = a->imax[i];
1948:     }
1949:     PetscFree(vals);
1950:   }
1951:   PetscFree(rowners);
1952:   a->nz = a->maxnz;
1953:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1954:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1955:   return(0);
1956: }

1958: /* 
1959:     Special destroy and view routines for factored matrices 
1960: */
1963: static PetscErrorCode MatDestroy_MPIRowbs_Factored(Mat mat)
1964: {
1966: #if defined(PETSC_USE_LOG)
1967:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1968: #endif
1969:   return(0);
1970: }

1974: static PetscErrorCode MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1975: {

1979:   MatView((Mat) mat->data,viewer);
1980:   return(0);
1981: }

1985: PetscErrorCode MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,MatFactorInfo *info,Mat *newfact)
1986: {
1987:   /* Note:  f is not currently used in BlockSolve */
1988:   Mat          newmat;
1989:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1991:   PetscTruth   idn;

1994:   if (isrow) {
1995:     ISIdentity(isrow,&idn);
1996:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1997:   }

1999:   if (!mat->symmetric) {
2000:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky \n\
2001:         preconditioning with a MATMPIROWBS matrix you must declare it to be \n\
2002:         symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
2003:   }

2005:   /* If the icc_storage flag wasn't set before the last blocksolveassembly,          */
2006:   /* we must completely redo the assembly as a different storage format is required. */
2007:   if (mbs->blocksolveassembly && !mbs->assembled_icc_storage) {
2008:     mat->same_nonzero       = PETSC_FALSE;
2009:     mbs->blocksolveassembly = 0;
2010:   }

2012:   if (!mbs->blocksolveassembly) {
2013:     BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
2014:     BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
2015:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2016:   }

2018:   /* Copy permuted matrix */
2019:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2020:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

2022:   /* Set up the communication for factorization */
2023:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2024:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2026:   /* 
2027:       Create a new Mat structure to hold the "factored" matrix, 
2028:     not this merely contains a pointer to the original matrix, since
2029:     the original matrix contains the factor information.
2030:   */
2031:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2032:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

2034:   newmat->data         = (void*)mat;
2035:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2036:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2037:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2038:   newmat->factor       = 1;
2039:   newmat->preallocated = PETSC_TRUE;
2040:   newmat->M            = mat->M;
2041:   newmat->N            = mat->N;
2042:   newmat->m            = mat->m;
2043:   newmat->n            = mat->n;
2044:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

2046:   *newfact = newmat;
2047:   return(0);
2048: }

2052: PetscErrorCode MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatFactorInfo* info,Mat *newfact)
2053: {
2054:   Mat          newmat;
2055:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
2057:   PetscTruth   idn;

2060:   if (info->levels) SETERRQ(PETSC_ERR_SUP,"Blocksolve ILU only supports 0 fill");
2061:   if (isrow) {
2062:     ISIdentity(isrow,&idn);
2063:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
2064:   }
2065:   if (iscol) {
2066:     ISIdentity(iscol,&idn);
2067:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
2068:   }

2070:   if (!mbs->blocksolveassembly) {
2071:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
2072:   }
2073: 
2074: /*   if (mat->symmetric) { */
2075: /*     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with \n\ */
2076: /*         MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrix\n\ */
2077: /*         using the option MatSetOption(A,MAT_SYMMETRIC)"); */
2078: /*   } */

2080:   /* Copy permuted matrix */
2081:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
2082:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

2084:   /* Set up the communication for factorization */
2085:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
2086:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2088:   /* 
2089:       Create a new Mat structure to hold the "factored" matrix,
2090:     not this merely contains a pointer to the original matrix, since
2091:     the original matrix contains the factor information.
2092:   */
2093:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2094:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

2096:   newmat->data         = (void*)mat;
2097:   PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2098:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2099:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2100:   newmat->factor       = 1;
2101:   newmat->preallocated = PETSC_TRUE;
2102:   newmat->M            = mat->M;
2103:   newmat->N            = mat->N;
2104:   newmat->m            = mat->m;
2105:   newmat->n            = mat->n;
2106:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

2108:   *newfact = newmat;
2109:   return(0);
2110: }

2114: PetscErrorCode PETSCMAT_DLLEXPORT MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2115: {

2121:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2122:   ISColoringCreate(mat->comm,mat->m,0,coloring);

2124:   return(0);
2125: }

2129: /*@C
2130:    MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2131:    format.  This format is intended primarily as an interface for BlockSolve95.

2133:    Collective on MPI_Comm

2135:    Input Parameters:
2136: +  comm - MPI communicator
2137: .  m - number of local rows (or PETSC_DECIDE to have calculated)
2138: .  M - number of global rows (or PETSC_DECIDE to have calculated)
2139: .  nz - number of nonzeros per row (same for all local rows)
2140: -  nnz - number of nonzeros per row (possibly different for each row).

2142:    Output Parameter:
2143: .  newA - the matrix 

2145:    Notes:
2146:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2147:    than it must be used on all processors that share the object for that argument.

2149:    The user MUST specify either the local or global matrix dimensions
2150:    (possibly both).

2152:    Specify the preallocated storage with either nz or nnz (not both).  Set 
2153:    nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2154:    allocation.

2156:    Notes:
2157:    By default, the matrix is assumed to be nonsymmetric; the user can
2158:    take advantage of special optimizations for symmetric matrices by calling
2159: $     MatSetOption(mat,MAT_SYMMETRIC)
2160: $     MatSetOption(mat,MAT_SYMMETRY_ETERNAL)
2161:    BEFORE calling the routine MatAssemblyBegin().

2163:    Internally, the MATMPIROWBS format inserts zero elements to the
2164:    matrix if necessary, so that nonsymmetric matrices are considered
2165:    to be symmetric in terms of their sparsity structure; this format
2166:    is required for use of the parallel communication routines within
2167:    BlockSolve95. In particular, if the matrix element A[i,j] exists,
2168:    then PETSc will internally allocate a 0 value for the element
2169:    A[j,i] during MatAssemblyEnd() if the user has not already set
2170:    a value for the matrix element A[j,i].

2172:    Options Database Keys:
2173: .  -mat_rowbs_no_inode - Do not use inodes.

2175:    Level: intermediate
2176:   
2177: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve

2179: .seealso: MatCreate(), MatSetValues()
2180: @*/
2181: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,const int nnz[],Mat *newA)
2182: {
2184: 
2186:   MatCreate(comm,newA);
2187:   MatSetSizes(*newA,m,m,M,M);
2188:   MatSetType(*newA,MATMPIROWBS);
2189:   MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2190:   return(0);
2191: }


2194: /* -------------------------------------------------------------------------*/

2196:  #include src/mat/impls/aij/seq/aij.h
2197:  #include src/mat/impls/aij/mpi/mpiaij.h

2201: PetscErrorCode MatGetSubMatrices_MPIRowbs(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
2202: {
2204:   int         nmax,nstages_local,nstages,i,pos,max_no;


2208:   /* Allocate memory to hold all the submatrices */
2209:   if (scall != MAT_REUSE_MATRIX) {
2210:     PetscMalloc((ismax+1)*sizeof(Mat),submat);
2211:   }
2212: 
2213:   /* Determine the number of stages through which submatrices are done */
2214:   nmax          = 20*1000000 / (C->N * sizeof(int));
2215:   if (!nmax) nmax = 1;
2216:   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);

2218:   /* Make sure every processor loops through the nstages */
2219:   MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);

2221:   for (i=0,pos=0; i<nstages; i++) {
2222:     if (pos+nmax <= ismax) max_no = nmax;
2223:     else if (pos == ismax) max_no = 0;
2224:     else                   max_no = ismax-pos;
2225:     MatGetSubMatrices_MPIRowbs_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);
2226:     pos += max_no;
2227:   }
2228:   return(0);
2229: }
2230: /* -------------------------------------------------------------------------*/
2231: /* for now MatGetSubMatrices_MPIRowbs_Local get MPIAij submatrices of input
2232:    matrix and preservs zeroes from structural symetry
2233:  */
2236: PetscErrorCode MatGetSubMatrices_MPIRowbs_Local(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
2237: {
2238:   Mat_MPIRowbs  *c = (Mat_MPIRowbs *)(C->data);
2239:   BSspmat       *A = c->A;
2240:   Mat_SeqAIJ    *mat;
2242:   int         **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
2243:   int         **sbuf1,**sbuf2,rank,m,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
2244:   int         nrqs,msz,**ptr,idx,*req_size,*ctr,*pa,*tmp,tcol,nrqr;
2245:   int         **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2,**rmap;
2246:   int         **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i;
2247:   int         len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i;
2248:   int         *rmap_i,tag0,tag1,tag2,tag3;
2249:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2250:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2251:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2252:   MPI_Status  *r_status3,*r_status4,*s_status4;
2253:   MPI_Comm    comm;
2254:   FLOAT       **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i;
2255:   PetscScalar *mat_a;
2256:   PetscTruth  sorted;
2257:   int         *onodes1,*olengths1;

2260:   comm   = C->comm;
2261:   tag0   = C->tag;
2262:   size   = c->size;
2263:   rank   = c->rank;
2264:   m      = C->M;
2265: 
2266:   /* Get some new tags to keep the communication clean */
2267:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2268:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2269:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2271:     /* Check if the col indices are sorted */
2272:   for (i=0; i<ismax; i++) {
2273:     ISSorted(isrow[i],&sorted);
2274:     if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2275:     ISSorted(iscol[i],&sorted);
2276:     /*    if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted"); */
2277:   }

2279:   len    = (2*ismax+1)*(sizeof(int*)+ sizeof(int)) + (m+1)*sizeof(int);
2280:   PetscMalloc(len,&irow);
2281:   icol   = irow + ismax;
2282:   nrow   = (int*)(icol + ismax);
2283:   ncol   = nrow + ismax;
2284:   rtable = ncol + ismax;

2286:   for (i=0; i<ismax; i++) {
2287:     ISGetIndices(isrow[i],&irow[i]);
2288:     ISGetIndices(iscol[i],&icol[i]);
2289:     ISGetLocalSize(isrow[i],&nrow[i]);
2290:     ISGetLocalSize(iscol[i],&ncol[i]);
2291:   }

2293:   /* Create hash table for the mapping :row -> proc*/
2294:   for (i=0,j=0; i<size; i++) {
2295:     jmax = c->rowners[i+1];
2296:     for (; j<jmax; j++) {
2297:       rtable[j] = i;
2298:     }
2299:   }

2301:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2302:      required. Based on this, buffers are allocated, and data copied into them*/
2303:   PetscMalloc(size*4*sizeof(int),&w1); /* mesg size */
2304:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2305:   w3     = w2 + size;      /* no of IS that needs to be sent to proc i */
2306:   w4     = w3 + size;      /* temp work space used in determining w1, w2, w3 */
2307:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector*/
2308:   for (i=0; i<ismax; i++) {
2309:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector*/
2310:     jmax   = nrow[i];
2311:     irow_i = irow[i];
2312:     for (j=0; j<jmax; j++) {
2313:       row  = irow_i[j];
2314:       proc = rtable[row];
2315:       w4[proc]++;
2316:     }
2317:     for (j=0; j<size; j++) {
2318:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
2319:     }
2320:   }
2321: 
2322:   nrqs     = 0;              /* no of outgoing messages */
2323:   msz      = 0;              /* total mesg length (for all procs) */
2324:   w1[rank] = 0;              /* no mesg sent to self */
2325:   w3[rank] = 0;
2326:   for (i=0; i<size; i++) {
2327:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2328:   }
2329:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2330:   for (i=0,j=0; i<size; i++) {
2331:     if (w1[i]) { pa[j] = i; j++; }
2332:   }

2334:   /* Each message would have a header = 1 + 2*(no of IS) + data */
2335:   for (i=0; i<nrqs; i++) {
2336:     j     = pa[i];
2337:     w1[j] += w2[j] + 2* w3[j];
2338:     msz   += w1[j];
2339:   }

2341:   /* Determine the number of messages to expect, their lengths, from from-ids */
2342:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2343:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

2345:   /* Now post the Irecvs corresponding to these messages */
2346:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2347: 
2348:   PetscFree(onodes1);
2349:   PetscFree(olengths1);
2350: 
2351:   /* Allocate Memory for outgoing messages */
2352:   len      = 2*size*sizeof(int*) + 2*msz*sizeof(int) + size*sizeof(int);
2353:   PetscMalloc(len,&sbuf1);
2354:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2355:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2356:   /* allocate memory for outgoing data + buf to receive the first reply */
2357:   tmp      = (int*)(ptr + size);
2358:   ctr      = tmp + 2*msz;

2360:   {
2361:     int *iptr = tmp,ict = 0;
2362:     for (i=0; i<nrqs; i++) {
2363:       j         = pa[i];
2364:       iptr     += ict;
2365:       sbuf1[j]  = iptr;
2366:       ict       = w1[j];
2367:     }
2368:   }

2370:   /* Form the outgoing messages */
2371:   /* Initialize the header space */
2372:   for (i=0; i<nrqs; i++) {
2373:     j           = pa[i];
2374:     sbuf1[j][0] = 0;
2375:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
2376:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
2377:   }
2378: 
2379:   /* Parse the isrow and copy data into outbuf */
2380:   for (i=0; i<ismax; i++) {
2381:     PetscMemzero(ctr,size*sizeof(int));
2382:     irow_i = irow[i];
2383:     jmax   = nrow[i];
2384:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
2385:       row  = irow_i[j];
2386:       proc = rtable[row];
2387:       if (proc != rank) { /* copy to the outgoing buf*/
2388:         ctr[proc]++;
2389:         *ptr[proc] = row;
2390:         ptr[proc]++;
2391:       }
2392:     }
2393:     /* Update the headers for the current IS */
2394:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
2395:       if ((ctr_j = ctr[j])) {
2396:         sbuf1_j        = sbuf1[j];
2397:         k              = ++sbuf1_j[0];
2398:         sbuf1_j[2*k]   = ctr_j;
2399:         sbuf1_j[2*k-1] = i;
2400:       }
2401:     }
2402:   }

2404:   /*  Now  post the sends */
2405:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
2406:   for (i=0; i<nrqs; ++i) {
2407:     j    = pa[i];
2408:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
2409:   }

2411:   /* Post Receives to capture the buffer size */
2412:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
2413:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
2414:   rbuf2[0] = tmp + msz;
2415:   for (i=1; i<nrqs; ++i) {
2416:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
2417:   }
2418:   for (i=0; i<nrqs; ++i) {
2419:     j    = pa[i];
2420:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
2421:   }

2423:   /* Send to other procs the buf size they should allocate */
2424: 

2426:   /* Receive messages*/
2427:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
2428:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
2429:   len         = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
2430:   PetscMalloc(len,&sbuf2);
2431:   req_size    = (int*)(sbuf2 + nrqr);
2432:   req_source  = req_size + nrqr;
2433: 
2434:   {
2435:     BSsprow    **sAi = A->rows;
2436:     int        id,rstart = c->rstart;
2437:     int        *sbuf2_i;

2439:     for (i=0; i<nrqr; ++i) {
2440:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
2441:       req_size[idx]   = 0;
2442:       rbuf1_i         = rbuf1[idx];
2443:       start           = 2*rbuf1_i[0] + 1;
2444:       MPI_Get_count(r_status1+i,MPI_INT,&end);
2445:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
2446:       sbuf2_i         = sbuf2[idx];
2447:       for (j=start; j<end; j++) {
2448:         id               = rbuf1_i[j] - rstart;
2449:         ncols            = (sAi[id])->length;
2450:         sbuf2_i[j]       = ncols;
2451:         req_size[idx]   += ncols;
2452:       }
2453:       req_source[idx] = r_status1[i].MPI_SOURCE;
2454:       /* form the header */
2455:       sbuf2_i[0]   = req_size[idx];
2456:       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
2457:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
2458:     }
2459:   }
2460:   PetscFree(r_status1);
2461:   PetscFree(r_waits1);

2463:   /*  recv buffer sizes */
2464:   /* Receive messages*/
2465: 
2466:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
2467:   PetscMalloc((nrqs+1)*sizeof(FLOAT *),&rbuf4);
2468:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
2469:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
2470:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

2472:   for (i=0; i<nrqs; ++i) {
2473:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
2474:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
2475:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
2476:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
2477:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
2478:   }
2479:   PetscFree(r_status2);
2480:   PetscFree(r_waits2);
2481: 
2482:   /* Wait on sends1 and sends2 */
2483:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
2484:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

2486:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
2487:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
2488:   PetscFree(s_status1);
2489:   PetscFree(s_status2);
2490:   PetscFree(s_waits1);
2491:   PetscFree(s_waits2);

2493:   /* Now allocate buffers for a->j, and send them off */
2494:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf_aj);
2495:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2496:   PetscMalloc((j+1)*sizeof(int),&sbuf_aj[0]);
2497:   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
2498: 
2499:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
2500:   {
2501:     BSsprow *brow;
2502:     int *Acol;
2503:     int rstart = c->rstart;

2505:     for (i=0; i<nrqr; i++) {
2506:       rbuf1_i   = rbuf1[i];
2507:       sbuf_aj_i = sbuf_aj[i];
2508:       ct1       = 2*rbuf1_i[0] + 1;
2509:       ct2       = 0;
2510:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2511:         kmax = rbuf1[i][2*j];
2512:         for (k=0; k<kmax; k++,ct1++) {
2513:           brow   = A->rows[rbuf1_i[ct1] - rstart];
2514:           ncols  = brow->length;
2515:           Acol   = brow->col;
2516:           /* load the column indices for this row into cols*/
2517:           cols  = sbuf_aj_i + ct2;
2518:           PetscMemcpy(cols,Acol,ncols*sizeof(int));
2519:           /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with
2520:                                                           mappings?? */
2521:           ct2 += ncols;
2522:         }
2523:       }
2524:       MPI_Isend(sbuf_aj_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
2525:     }
2526:   }
2527:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
2528:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

2530:   /* Allocate buffers for a->a, and send them off */
2531:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf_aa);
2532:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
2533:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf_aa[0]);
2534:   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];
2535: 
2536:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
2537:   {
2538:     BSsprow *brow;
2539:     FLOAT *Aval;
2540:     int rstart = c->rstart;
2541: 
2542:     for (i=0; i<nrqr; i++) {
2543:       rbuf1_i   = rbuf1[i];
2544:       sbuf_aa_i = sbuf_aa[i];
2545:       ct1       = 2*rbuf1_i[0]+1;
2546:       ct2       = 0;
2547:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
2548:         kmax = rbuf1_i[2*j];
2549:         for (k=0; k<kmax; k++,ct1++) {
2550:           brow  = A->rows[rbuf1_i[ct1] - rstart];
2551:           ncols = brow->length;
2552:           Aval  = brow->nz;
2553:           /* load the column values for this row into vals*/
2554:           vals  = sbuf_aa_i+ct2;
2555:           PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
2556:           ct2 += ncols;
2557:         }
2558:       }
2559:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
2560:     }
2561:   }
2562:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
2563:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
2564:   PetscFree(rbuf1);

2566:   /* Form the matrix */
2567:   /* create col map */
2568:   {
2569:     int *icol_i;
2570: 
2571:     len     = (1+ismax)*sizeof(int*)+ ismax*C->N*sizeof(int);
2572:     PetscMalloc(len,&cmap);
2573:     cmap[0] = (int*)(cmap + ismax);
2574:     PetscMemzero(cmap[0],(1+ismax*C->N)*sizeof(int));
2575:     for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + C->N; }
2576:     for (i=0; i<ismax; i++) {
2577:       jmax   = ncol[i];
2578:       icol_i = icol[i];
2579:       cmap_i = cmap[i];
2580:       for (j=0; j<jmax; j++) {
2581:         cmap_i[icol_i[j]] = j+1;
2582:       }
2583:     }
2584:   }

2586:   /* Create lens which is required for MatCreate... */
2587:   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
2588:   len     = (1+ismax)*sizeof(int*)+ j*sizeof(int);
2589:   PetscMalloc(len,&lens);
2590:   lens[0] = (int*)(lens + ismax);
2591:   PetscMemzero(lens[0],j*sizeof(int));
2592:   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
2593: 
2594:   /* Update lens from local data */
2595:   { BSsprow *Arow;
2596:     for (i=0; i<ismax; i++) {
2597:       jmax   = nrow[i];
2598:       cmap_i = cmap[i];
2599:       irow_i = irow[i];
2600:       lens_i = lens[i];
2601:       for (j=0; j<jmax; j++) {
2602:         row  = irow_i[j];
2603:         proc = rtable[row];
2604:         if (proc == rank) {
2605:           Arow=A->rows[row-c->rstart];
2606:           ncols=Arow->length;
2607:           cols=Arow->col;
2608:           for (k=0; k<ncols; k++) {
2609:             if (cmap_i[cols[k]]) { lens_i[j]++;}
2610:           }
2611:         }
2612:       }
2613:     }
2614:   }
2615: 
2616:   /* Create row map*/
2617:   len     = (1+ismax)*sizeof(int*)+ ismax*C->M*sizeof(int);
2618:   PetscMalloc(len,&rmap);
2619:   rmap[0] = (int*)(rmap + ismax);
2620:   PetscMemzero(rmap[0],ismax*C->M*sizeof(int));
2621:   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->M;}
2622:   for (i=0; i<ismax; i++) {
2623:     rmap_i = rmap[i];
2624:     irow_i = irow[i];
2625:     jmax   = nrow[i];
2626:     for (j=0; j<jmax; j++) {
2627:       rmap_i[irow_i[j]] = j;
2628:     }
2629:   }
2630: 
2631:   /* Update lens from offproc data */
2632:   {
2633:     int *rbuf2_i,*rbuf3_i,*sbuf1_i;

2635:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2636:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+tmp2);
2637:       idx     = pa[i];
2638:       sbuf1_i = sbuf1[idx];
2639:       jmax    = sbuf1_i[0];
2640:       ct1     = 2*jmax+1;
2641:       ct2     = 0;
2642:       rbuf2_i = rbuf2[i];
2643:       rbuf3_i = rbuf3[i];
2644:       for (j=1; j<=jmax; j++) {
2645:         is_no   = sbuf1_i[2*j-1];
2646:         max1    = sbuf1_i[2*j];
2647:         lens_i  = lens[is_no];
2648:         cmap_i  = cmap[is_no];
2649:         rmap_i  = rmap[is_no];
2650:         for (k=0; k<max1; k++,ct1++) {
2651:           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
2652:           max2 = rbuf2_i[ct1];
2653:           for (l=0; l<max2; l++,ct2++) {
2654:             if (cmap_i[rbuf3_i[ct2]]) {
2655:               lens_i[row]++;
2656:             }
2657:           }
2658:         }
2659:       }
2660:     }
2661:   }
2662:   PetscFree(r_status3);
2663:   PetscFree(r_waits3);
2664:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
2665:   PetscFree(s_status3);
2666:   PetscFree(s_waits3);

2668:   /* Create the submatrices */
2669:   if (scall == MAT_REUSE_MATRIX) {
2670:     PetscTruth same;
2671: 
2672:     /*
2673:         Assumes new rows are same length as the old rows,hence bug!
2674:     */
2675:     for (i=0; i<ismax; i++) {
2676:       PetscTypeCompare((PetscObject)(submats[i]),MATSEQAIJ,&same);
2677:       if (!same) {
2678:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
2679:       }
2680:       mat = (Mat_SeqAIJ*)(submats[i]->data);
2681:       if ((submats[i]->m != nrow[i]) || (submats[i]->n != ncol[i])) {
2682:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
2683:       }
2684:       PetscMemcmp(mat->ilen,lens[i],submats[i]->m*sizeof(int),&same);
2685:       if (!same) {
2686:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
2687:       }
2688:       /* Initial matrix as if empty */
2689:       PetscMemzero(mat->ilen,submats[i]->m*sizeof(int));
2690:       submats[i]->factor = C->factor;
2691:     }
2692:   } else {
2693:     for (i=0; i<ismax; i++) {
2694:       /* Here we want to explicitly generate SeqAIJ matrices */
2695:       MatCreate(PETSC_COMM_SELF,submats+i);
2696:       MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);
2697:       MatSetType(submats[i],MATSEQAIJ);
2698:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
2699:     }
2700:   }

2702:   /* Assemble the matrices */
2703:   /* First assemble the local rows */
2704:   {
2705:     int    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
2706:     PetscScalar *imat_a;
2707:     BSsprow *Arow;
2708: 
2709:     for (i=0; i<ismax; i++) {
2710:       mat       = (Mat_SeqAIJ*)submats[i]->data;
2711:       imat_ilen = mat->ilen;
2712:       imat_j    = mat->j;
2713:       imat_i    = mat->i;
2714:       imat_a    = mat->a;
2715:       cmap_i    = cmap[i];
2716:       rmap_i    = rmap[i];
2717:       irow_i    = irow[i];
2718:       jmax      = nrow[i];
2719:       for (j=0; j<jmax; j++) {
2720:         row      = irow_i[j];
2721:         proc     = rtable[row];
2722:         if (proc == rank) {
2723:           old_row  = row;
2724:           row      = rmap_i[row];
2725:           ilen_row = imat_ilen[row];
2726: 
2727:           Arow=A->rows[old_row-c->rstart];
2728:           ncols=Arow->length;
2729:           cols=Arow->col;
2730:           vals=Arow->nz;
2731: 
2732:           mat_i    = imat_i[row];
2733:           mat_a    = imat_a + mat_i;
2734:           mat_j    = imat_j + mat_i;
2735:           for (k=0; k<ncols; k++) {
2736:             if ((tcol = cmap_i[cols[k]])) {
2737:               *mat_j++ = tcol - 1;
2738:               *mat_a++ = (PetscScalar)vals[k];
2739:               ilen_row++;
2740:             }
2741:           }
2742:           imat_ilen[row] = ilen_row;
2743:         }
2744:       }
2745:     }
2746:   }

2748:   /*   Now assemble the off proc rows*/
2749:   {
2750:     int    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
2751:     int    *imat_j,*imat_i;
2752:     PetscScalar *imat_a;
2753:     FLOAT *rbuf4_i;
2754: 
2755:     for (tmp2=0; tmp2<nrqs; tmp2++) {
2756:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+tmp2);
2757:       idx     = pa[i];
2758:       sbuf1_i = sbuf1[idx];
2759:       jmax    = sbuf1_i[0];
2760:       ct1     = 2*jmax + 1;
2761:       ct2     = 0;
2762:       rbuf2_i = rbuf2[i];
2763:       rbuf3_i = rbuf3[i];
2764:       rbuf4_i = rbuf4[i];
2765:       for (j=1; j<=jmax; j++) {
2766:         is_no     = sbuf1_i[2*j-1];
2767:         rmap_i    = rmap[is_no];
2768:         cmap_i    = cmap[is_no];
2769:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
2770:         imat_ilen = mat->ilen;
2771:         imat_j    = mat->j;
2772:         imat_i    = mat->i;
2773:         imat_a    = mat->a;
2774:         max1      = sbuf1_i[2*j];
2775:         for (k=0; k<max1; k++,ct1++) {
2776:           row   = sbuf1_i[ct1];
2777:           row   = rmap_i[row];
2778:           ilen  = imat_ilen[row];
2779:           mat_i = imat_i[row];
2780:           mat_a = imat_a + mat_i;
2781:           mat_j = imat_j + mat_i;
2782:           max2 = rbuf2_i[ct1];
2783:           for (l=0; l<max2; l++,ct2++) {
2784:             if ((tcol = cmap_i[rbuf3_i[ct2]])) {
2785:               *mat_j++ = tcol - 1;
2786:               *mat_a++ = (PetscScalar)rbuf4_i[ct2];
2787:               ilen++;
2788:             }
2789:           }
2790:           imat_ilen[row] = ilen;
2791:         }
2792:       }
2793:     }
2794:   }
2795:   PetscFree(r_status4);
2796:   PetscFree(r_waits4);
2797:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
2798:   PetscFree(s_waits4);
2799:   PetscFree(s_status4);

2801:   /* Restore the indices */
2802:   for (i=0; i<ismax; i++) {
2803:     ISRestoreIndices(isrow[i],irow+i);
2804:     ISRestoreIndices(iscol[i],icol+i);
2805:   }

2807:   /* Destroy allocated memory */
2808:   PetscFree(irow);
2809:   PetscFree(w1);
2810:   PetscFree(pa);

2812:   PetscFree(sbuf1);
2813:   PetscFree(rbuf2);
2814:   for (i=0; i<nrqr; ++i) {
2815:     PetscFree(sbuf2[i]);
2816:   }
2817:   for (i=0; i<nrqs; ++i) {
2818:     PetscFree(rbuf3[i]);
2819:     PetscFree(rbuf4[i]);
2820:   }

2822:   PetscFree(sbuf2);
2823:   PetscFree(rbuf3);
2824:   PetscFree(rbuf4);
2825:   PetscFree(sbuf_aj[0]);
2826:   PetscFree(sbuf_aj);
2827:   PetscFree(sbuf_aa[0]);
2828:   PetscFree(sbuf_aa);
2829: 
2830:   PetscFree(cmap);
2831:   PetscFree(rmap);
2832:   PetscFree(lens);

2834:   for (i=0; i<ismax; i++) {
2835:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
2836:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
2837:   }
2838:   return(0);
2839: }

2841: /*
2842:   can be optimized by send only non-zeroes in iscol IS  -
2843:   so prebuild submatrix on sending side including A,B partitioning
2844:   */
2847:  #include src/vec/is/impls/general/general.h
2848: PetscErrorCode MatGetSubMatrix_MPIRowbs(Mat C,IS isrow,IS iscol,int csize,MatReuse scall,Mat *submat)
2849: {
2850:   Mat_MPIRowbs  *c = (Mat_MPIRowbs*)C->data;
2851:   BSspmat       *A = c->A;
2852:   BSsprow *Arow;
2853:   Mat_SeqAIJ    *matA,*matB; /* on prac , off proc part of submat */
2854:   Mat_MPIAIJ    *mat;  /* submat->data */
2856:   int    *irow,*icol,nrow,ncol,*rtable,size,rank,tag0,tag1,tag2,tag3;
2857:   int    *w1,*w2,*pa,nrqs,nrqr,msz,row_t;
2858:   int    i,j,k,l,len,jmax,proc,idx;
2859:   int    **sbuf1,**sbuf2,**rbuf1,**rbuf2,*req_size,**sbuf3,**rbuf3;
2860:   FLOAT  **rbuf4,**sbuf4; /* FLOAT is from Block Solve 95 library */

2862:   int    *cmap,*rmap,nlocal,*o_nz,*d_nz,cstart,cend;
2863:   int    *req_source;
2864:   int    ncols_t;
2865: 
2866: 
2867:   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
2868:   MPI_Request *r_waits4,*s_waits3,*s_waits4;
2869: 
2870:   MPI_Status  *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
2871:   MPI_Status  *r_status3,*r_status4,*s_status4;
2872:   MPI_Comm    comm;


2876:   comm   = C->comm;
2877:   tag0   = C->tag;
2878:   size   = c->size;
2879:   rank   = c->rank;

2881:   if (size==1) {
2882:     if (scall == MAT_REUSE_MATRIX) {
2883:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_REUSE_MATRIX,&submat);
2884:       return(0);
2885:     } else {
2886:       Mat *newsubmat;
2887: 
2888:       ierr=MatGetSubMatrices(C,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&newsubmat);
2889:       *submat=*newsubmat;
2890:       ierr=PetscFree(newsubmat);
2891:       return(0);
2892:     }
2893:   }
2894: 
2895:   /* Get some new tags to keep the communication clean */
2896:   PetscObjectGetNewTag((PetscObject)C,&tag1);
2897:   PetscObjectGetNewTag((PetscObject)C,&tag2);
2898:   PetscObjectGetNewTag((PetscObject)C,&tag3);

2900:   /* Check if the col indices are sorted */
2901:   {PetscTruth sorted;
2902:   ISSorted(isrow,&sorted);
2903:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
2904:   ISSorted(iscol,&sorted);
2905:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
2906:   }
2907: 
2908:   ISGetIndices(isrow,&irow);
2909:   ISGetIndices(iscol,&icol);
2910:   ISGetLocalSize(isrow,&nrow);
2911:   ISGetLocalSize(iscol,&ncol);
2912: 
2913:   if (!isrow) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty ISrow");
2914:   if (!iscol) SETERRQ(PETSC_ERR_ARG_SIZ,"Empty IScol");
2915: 
2916: 
2917:   len    = (C->M+1)*sizeof(int);
2918:   PetscMalloc(len,&rtable);
2919:   /* Create hash table for the mapping :row -> proc*/
2920:   for (i=0,j=0; i<size; i++) {
2921:     jmax = c->rowners[i+1];
2922:     for (; j<jmax; j++) {
2923:       rtable[j] = i;
2924:     }
2925:   }

2927:   /* evaluate communication - mesg to who, length of mesg, and buffer space
2928:      required. Based on this, buffers are allocated, and data copied into them*/
2929:   PetscMalloc(size*2*sizeof(int),&w1); /* mesg size */
2930:   w2     = w1 + size;      /* if w2[i] marked, then a message to proc i*/
2931:   PetscMemzero(w1,size*2*sizeof(int)); /* initialize work vector*/
2932:   for (j=0; j<nrow; j++) {
2933:     row_t  = irow[j];
2934:     proc   = rtable[row_t];
2935:     w1[proc]++;
2936:   }
2937:   nrqs     = 0;              /* no of outgoing messages */
2938:   msz      = 0;              /* total mesg length (for all procs) */
2939:   w1[rank] = 0;              /* no mesg sent to self */
2940:   for (i=0; i<size; i++) {
2941:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
2942:   }
2943: 
2944:   PetscMalloc((nrqs+1)*sizeof(int),&pa); /*(proc -array)*/
2945:   for (i=0,j=0; i<size; i++) {
2946:     if (w1[i]) {
2947:       pa[j++] = i;
2948:       w1[i]++;  /* header for return data */
2949:       msz+=w1[i];
2950:     }
2951:   }
2952: 
2953:   {int  *onodes1,*olengths1;
2954:   /* Determine the number of messages to expect, their lengths, from from-ids */
2955:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
2956:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);
2957:   /* Now post the Irecvs corresponding to these messages */
2958:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);
2959:   PetscFree(onodes1);
2960:   PetscFree(olengths1);
2961:   }
2962: 
2963: { int **ptr,*iptr,*tmp;
2964:   /* Allocate Memory for outgoing messages */
2965:   len      = 2*size*sizeof(int*) + msz*sizeof(int);
2966:   PetscMalloc(len,&sbuf1);
2967:   ptr      = sbuf1 + size;   /* Pointers to the data in outgoing buffers */
2968:   PetscMemzero(sbuf1,2*size*sizeof(int*));
2969:   /* allocate memory for outgoing data + buf to receive the first reply */
2970:   tmp      = (int*)(ptr + size);

2972:   for (i=0,iptr=tmp; i<nrqs; i++) {
2973:     j         = pa[i];
2974:     sbuf1[j]  = iptr;
2975:     iptr     += w1[j];
2976:   }

2978:   /* Form the outgoing messages */
2979:   for (i=0; i<nrqs; i++) {
2980:     j           = pa[i];
2981:     sbuf1[j][0] = 0;   /*header */
2982:     ptr[j]      = sbuf1[j] + 1;
2983:   }
2984: 
2985:   /* Parse the isrow and copy data into outbuf */
2986:   for (j=0; j<nrow; j++) {
2987:     row_t  = irow[j];
2988:     proc = rtable[row_t];
2989:     if (proc != rank) { /* copy to the outgoing buf*/
2990:       sbuf1[proc][0]++;
2991:       *ptr[proc] = row_t;
2992:       ptr[proc]++;
2993:     }
2994:   }
2995: } /* block */

2997:   /*  Now  post the sends */
2998: 
2999:   /* structure of sbuf1[i]/rbuf1[i] : 1 (num of rows) + nrow-local rows (nuberes
3000:    * of requested rows)*/

3002:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
3003:   for (i=0; i<nrqs; ++i) {
3004:     j    = pa[i];
3005:     MPI_Isend(sbuf1[j],w1[j],MPI_INT,j,tag0,comm,s_waits1+i);
3006:   }

3008:   /* Post Receives to capture the buffer size */
3009:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);
3010:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf2);
3011:   PetscMalloc(msz*sizeof(int)+1,&(rbuf2[0]));
3012:   for (i=1; i<nrqs; ++i) {
3013:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
3014:   }
3015:   for (i=0; i<nrqs; ++i) {
3016:     j    = pa[i];
3017:     MPI_Irecv(rbuf2[i],w1[j],MPI_INT,j,tag1,comm,r_waits2+i);
3018:   }

3020:   /* Send to other procs the buf size they should allocate */
3021:   /* structure of sbuf2[i]/rbuf2[i]: 1 (total size to allocate) + nrow-locrow
3022:    * (row sizes) */

3024:   /* Receive messages*/
3025:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);
3026:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);
3027:   len         = 2*nrqr*sizeof(int) + (nrqr+1)*sizeof(int*);
3028:   PetscMalloc(len,&sbuf2);
3029:   req_size    = (int*)(sbuf2 + nrqr);
3030:   req_source  = req_size + nrqr;
3031: 
3032:   {
3033:     BSsprow    **sAi = A->rows;
3034:     int        id,rstart = c->rstart;
3035:     int        *sbuf2_i,*rbuf1_i,end;

3037:     for (i=0; i<nrqr; ++i) {
3038:       MPI_Waitany(nrqr,r_waits1,&idx,r_status1+i);
3039:       req_size[idx]   = 0;
3040:       rbuf1_i         = rbuf1[idx];
3041:       MPI_Get_count(r_status1+i,MPI_INT,&end);
3042:       PetscMalloc((end+1)*sizeof(int),&sbuf2[idx]);
3043:       sbuf2_i         = sbuf2[idx];
3044:       for (j=1; j<end; j++) {
3045:         id               = rbuf1_i[j] - rstart;
3046:         ncols_t          = (sAi[id])->length;
3047:         sbuf2_i[j]       = ncols_t;
3048:         req_size[idx]   += ncols_t;
3049:       }
3050:       req_source[idx] = r_status1[i].MPI_SOURCE;
3051:       /* form the header */
3052:       sbuf2_i[0]   = req_size[idx];
3053:       MPI_Isend(sbuf2_i,end,MPI_INT,req_source[idx],tag1,comm,s_waits2+i);
3054:     }
3055:   }
3056:   PetscFree(r_status1);
3057:   PetscFree(r_waits1);

3059:   /*  recv buffer sizes */
3060:   /* Receive messages*/
3061: 
3062:   PetscMalloc((nrqs+1)*sizeof(int*),&rbuf3);
3063:   PetscMalloc((nrqs+1)*sizeof(FLOAT*),&rbuf4);
3064:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);
3065:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);
3066:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);

3068:   for (i=0; i<nrqs; ++i) {
3069:     MPI_Waitany(nrqs,r_waits2,&idx,r_status2+i);
3070:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(int),&rbuf3[idx]);
3071:     PetscMalloc((rbuf2[idx][0]+1)*sizeof(FLOAT),&rbuf4[idx]);
3072:     MPI_Irecv(rbuf3[idx],rbuf2[idx][0],MPI_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idx);
3073:     MPI_Irecv(rbuf4[idx],rbuf2[idx][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idx);
3074:   }
3075:   PetscFree(r_status2);
3076:   PetscFree(r_waits2);
3077: 
3078:   /* Wait on sends1 and sends2 */
3079:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);
3080:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);

3082:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
3083:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
3084:   PetscFree(s_status1);
3085:   PetscFree(s_status2);
3086:   PetscFree(s_waits1);
3087:   PetscFree(s_waits2);

3089:   /* Now allocate buffers for a->j, and send them off */
3090:   /* structure of sbuf3[i]/rbuf3[i],sbuf4[i]/rbuf4[i]: reqsize[i] (cols resp.
3091:    * vals of all req. rows; row sizes was in rbuf2; vals are of FLOAT type */
3092: 
3093:   PetscMalloc((nrqr+1)*sizeof(int*),&sbuf3);
3094:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3095:   PetscMalloc((j+1)*sizeof(int),&sbuf3[0]);
3096:   for (i=1; i<nrqr; i++)  sbuf3[i] = sbuf3[i-1] + req_size[i-1];
3097: 
3098:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);
3099:   {
3100:     int *Acol,*rbuf1_i,*sbuf3_i,rqrow,noutcols,kmax,*cols,ncols;
3101:     int rstart = c->rstart;

3103:     for (i=0; i<nrqr; i++) {
3104:       rbuf1_i   = rbuf1[i];
3105:       sbuf3_i   = sbuf3[i];
3106:       noutcols  = 0;
3107:       kmax = rbuf1_i[0];  /* num. of req. rows */
3108:       for (k=0,rqrow=1; k<kmax; k++,rqrow++) {
3109:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3110:         ncols  = Arow->length;
3111:         Acol   = Arow->col;
3112:         /* load the column indices for this row into cols*/
3113:         cols  = sbuf3_i + noutcols;
3114:         PetscMemcpy(cols,Acol,ncols*sizeof(int));
3115:         /*for (l=0; l<ncols;l++) cols[l]=Acol[l]; */ /* How is it with mappings?? */
3116:         noutcols += ncols;
3117:       }
3118:       MPI_Isend(sbuf3_i,req_size[i],MPI_INT,req_source[i],tag2,comm,s_waits3+i);
3119:     }
3120:   }
3121:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);
3122:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);

3124:   /* Allocate buffers for a->a, and send them off */
3125:   /* can be optimized by conect with previous block */
3126:   PetscMalloc((nrqr+1)*sizeof(FLOAT*),&sbuf4);
3127:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
3128:   PetscMalloc((j+1)*sizeof(FLOAT),&sbuf4[0]);
3129:   for (i=1; i<nrqr; i++)  sbuf4[i] = sbuf4[i-1] + req_size[i-1];
3130: 
3131:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);
3132:   {
3133:     FLOAT *Aval,*vals,*sbuf4_i;
3134:     int rstart = c->rstart,*rbuf1_i,rqrow,noutvals,kmax,ncols;
3135: 
3136: 
3137:     for (i=0; i<nrqr; i++) {
3138:       rbuf1_i   = rbuf1[i];
3139:       sbuf4_i   = sbuf4[i];
3140:       rqrow     = 1;
3141:       noutvals  = 0;
3142:       kmax      = rbuf1_i[0];  /* num of req. rows */
3143:       for (k=0; k<kmax; k++,rqrow++) {
3144:         Arow    = A->rows[rbuf1_i[rqrow] - rstart];
3145:         ncols  = Arow->length;
3146:         Aval = Arow->nz;
3147:         /* load the column values for this row into vals*/
3148:         vals  = sbuf4_i+noutvals;
3149:         PetscMemcpy(vals,Aval,ncols*sizeof(FLOAT));
3150:         noutvals += ncols;
3151:       }
3152:       MPI_Isend(sbuf4_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
3153:     }
3154:   }
3155:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);
3156:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);
3157:   PetscFree(rbuf1);

3159:   /* Form the matrix */

3161:   /* create col map */
3162:   len     = C->N*sizeof(int)+1;
3163:   PetscMalloc(len,&cmap);
3164:   PetscMemzero(cmap,C->N*sizeof(int));
3165:   for (j=0; j<ncol; j++) {
3166:       cmap[icol[j]] = j+1;
3167:   }
3168: 
3169:   /* Create row map / maybe I will need global rowmap but here is local rowmap*/
3170:   len     = C->M*sizeof(int)+1;
3171:   PetscMalloc(len,&rmap);
3172:   PetscMemzero(rmap,C->M*sizeof(int));
3173:   for (j=0; j<nrow; j++) {
3174:     rmap[irow[j]] = j;
3175:   }

3177:   /*
3178:      Determine the number of non-zeros in the diagonal and off-diagonal 
3179:      portions of the matrix in order to do correct preallocation
3180:    */

3182:   /* first get start and end of "diagonal" columns */
3183:   if (csize == PETSC_DECIDE) {
3184:     nlocal = ncol/size + ((ncol % size) > rank);
3185:   } else {
3186:     nlocal = csize;
3187:   }
3188:   {
3189:     int ncols,*cols,olen,dlen,thecol;
3190:     int *rbuf2_i,*rbuf3_i,*sbuf1_i,row,kmax,cidx;
3191: 
3192:     MPI_Scan(&nlocal,&cend,1,MPI_INT,MPI_SUM,comm);
3193:     cstart = cend - nlocal;
3194:     if (rank == size - 1 && cend != ncol) {
3195:       SETERRQ(PETSC_ERR_ARG_SIZ,"Local column sizes do not add up to total number of columns");
3196:     }

3198:     PetscMalloc((2*nrow+1)*sizeof(int),&d_nz);
3199:     o_nz = d_nz + nrow;
3200: 
3201:     /* Update lens from local data */
3202:     for (j=0; j<nrow; j++) {
3203:       row  = irow[j];
3204:       proc = rtable[row];
3205:       if (proc == rank) {
3206:         Arow=A->rows[row-c->rstart];
3207:         ncols=Arow->length;
3208:         cols=Arow->col;
3209:         olen=dlen=0;
3210:         for (k=0; k<ncols; k++) {
3211:           if ((thecol=cmap[cols[k]])) {
3212:             if (cstart<thecol && thecol<=cend) dlen++; /* thecol is from 1 */
3213:             else olen++;
3214:           }
3215:         }
3216:         o_nz[j]=olen;
3217:         d_nz[j]=dlen;
3218:       } else d_nz[j]=o_nz[j]=0;
3219:     }
3220:     /* Update lens from offproc data and done waits */
3221:     /* this will be much simplier after sending only appropriate columns */
3222:     for (j=0; j<nrqs;j++) {
3223:       MPI_Waitany(nrqs,r_waits3,&i,r_status3+j);
3224:       proc   = pa[i];
3225:       sbuf1_i = sbuf1[proc];
3226:       cidx    = 0;
3227:       rbuf2_i = rbuf2[i];
3228:       rbuf3_i = rbuf3[i];
3229:       kmax    = sbuf1_i[0]; /*num of rq. rows*/
3230:       for (k=1; k<=kmax; k++) {
3231:         row  = rmap[sbuf1_i[k]]; /* the val in the new matrix to be */
3232:         for (l=0; l<rbuf2_i[k]; l++,cidx++) {
3233:           if ((thecol=cmap[rbuf3_i[cidx]])) {
3234: 
3235:             if (cstart<thecol && thecol<=cend) d_nz[row]++; /* thecol is from 1 */
3236:             else o_nz[row]++;
3237:           }
3238:         }
3239:       }
3240:     }
3241:   }
3242:   PetscFree(r_status3);
3243:   PetscFree(r_waits3);
3244:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
3245:   PetscFree(s_status3);
3246:   PetscFree(s_waits3);

3248:   if (scall ==  MAT_INITIAL_MATRIX) {
3249:     MatCreate(comm,submat);
3250:     MatSetSizes(*submat,nrow,nlocal,PETSC_DECIDE,ncol);
3251:     MatSetType(*submat,C->type_name);
3252:     MatMPIAIJSetPreallocation(*submat,0,d_nz,0,o_nz);
3253:     mat=(Mat_MPIAIJ *)((*submat)->data);
3254:     matA=(Mat_SeqAIJ *)(mat->A->data);
3255:     matB=(Mat_SeqAIJ *)(mat->B->data);
3256: 
3257:   } else {
3258:     PetscTruth same;
3259:     /* folowing code can be optionaly dropped for debuged versions of users
3260:      * program, but I don't know PETSc option which can switch off such safety
3261:      * tests - in a same way counting of o_nz,d_nz can be droped for  REUSE
3262:      * matrix */
3263: 
3264:     PetscTypeCompare((PetscObject)(*submat),MATMPIAIJ,&same);
3265:     if (!same) {
3266:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong type");
3267:     }
3268:     if (((*submat)->m != nrow) || ((*submat)->N != ncol)) {
3269:         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
3270:     }
3271:     mat=(Mat_MPIAIJ *)((*submat)->data);
3272:     matA=(Mat_SeqAIJ *)(mat->A->data);
3273:     matB=(Mat_SeqAIJ *)(mat->B->data);
3274:     PetscMemcmp(matA->ilen,d_nz,nrow*sizeof(int),&same);
3275:     if (!same) {
3276:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3277:     }
3278:     PetscMemcmp(matB->ilen,o_nz,nrow*sizeof(int),&same);
3279:     if (!same) {
3280:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
3281:     }
3282:   /* Initial matrix as if empty */
3283:     PetscMemzero(matA->ilen,nrow*sizeof(int));
3284:     PetscMemzero(matB->ilen,nrow*sizeof(int));
3285:     /* Perhaps MatZeroEnteries may be better - look what it is exactly doing - I must
3286:      * delete all possibly nonactual inforamtion */
3287:     /*submats[i]->factor = C->factor; !!! ??? if factor will be same then I must
3288:      * copy some factor information - where are thay */
3289:     (*submat)->was_assembled=PETSC_FALSE;
3290:     (*submat)->assembled=PETSC_FALSE;
3291: 
3292:   }
3293:   PetscFree(d_nz);

3295:   /* Assemble the matrix */
3296:   /* First assemble from local rows */
3297:   {
3298:     int    i_row,oldrow,row,ncols,*cols,*matA_j,*matB_j,ilenA,ilenB,tcol;
3299:     FLOAT  *vals;
3300:     PetscScalar *matA_a,*matB_a;
3301: 
3302:     for (j=0; j<nrow; j++) {
3303:       oldrow = irow[j];
3304:       proc   = rtable[oldrow];
3305:       if (proc == rank) {
3306:         row  = rmap[oldrow];
3307: 
3308:         Arow  = A->rows[oldrow-c->rstart];
3309:         ncols = Arow->length;
3310:         cols  = Arow->col;
3311:         vals  = Arow->nz;
3312: 
3313:         i_row   = matA->i[row];
3314:         matA_a = matA->a + i_row;
3315:         matA_j = matA->j + i_row;
3316:         i_row   = matB->i[row];
3317:         matB_a = matB->a + i_row;
3318:         matB_j = matB->j + i_row;
3319:         for (k=0,ilenA=0,ilenB=0; k<ncols; k++) {
3320:           if ((tcol = cmap[cols[k]])) {
3321:             if (tcol<=cstart) {
3322:               *matB_j++ = tcol-1;
3323:               *matB_a++ = vals[k];
3324:               ilenB++;
3325:             } else if (tcol<=cend) {
3326:               *matA_j++ = (tcol-1)-cstart;
3327:               *matA_a++ = (PetscScalar)(vals[k]);
3328:               ilenA++;
3329:             } else {
3330:               *matB_j++ = tcol-1;
3331:               *matB_a++ = vals[k];
3332:               ilenB++;
3333:             }
3334:           }
3335:         }
3336:         matA->ilen[row]=ilenA;
3337:         matB->ilen[row]=ilenB;
3338: 
3339:       }
3340:     }
3341:   }

3343:   /*   Now assemble the off proc rows*/
3344:   {
3345:     int  *sbuf1_i,*rbuf2_i,*rbuf3_i,cidx,kmax,row,i_row;
3346:     int  *matA_j,*matB_j,lmax,tcol,ilenA,ilenB;
3347:     PetscScalar *matA_a,*matB_a;
3348:     FLOAT *rbuf4_i;

3350:     for (j=0; j<nrqs; j++) {
3351:       MPI_Waitany(nrqs,r_waits4,&i,r_status4+j);
3352:       proc   = pa[i];
3353:       sbuf1_i = sbuf1[proc];
3354: 
3355:       cidx    = 0;
3356:       rbuf2_i = rbuf2[i];
3357:       rbuf3_i = rbuf3[i];
3358:       rbuf4_i = rbuf4[i];
3359:       kmax    = sbuf1_i[0];
3360:       for (k=1; k<=kmax; k++) {
3361:         row = rmap[sbuf1_i[k]];
3362: 
3363:         i_row  = matA->i[row];
3364:         matA_a = matA->a + i_row;
3365:         matA_j = matA->j + i_row;
3366:         i_row  = matB->i[row];
3367:         matB_a = matB->a + i_row;
3368:         matB_j = matB->j + i_row;
3369: 
3370:         lmax = rbuf2_i[k];
3371:         for (l=0,ilenA=0,ilenB=0; l<lmax; l++,cidx++) {
3372:           if ((tcol = cmap[rbuf3_i[cidx]])) {
3373:             if (tcol<=cstart) {
3374:               *matB_j++ = tcol-1;
3375:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);;
3376:               ilenB++;
3377:             } else if (tcol<=cend) {
3378:               *matA_j++ = (tcol-1)-cstart;
3379:               *matA_a++ = (PetscScalar)(rbuf4_i[cidx]);
3380:               ilenA++;
3381:             } else {
3382:               *matB_j++ = tcol-1;
3383:               *matB_a++ = (PetscScalar)(rbuf4_i[cidx]);
3384:               ilenB++;
3385:             }
3386:           }
3387:         }
3388:         matA->ilen[row]=ilenA;
3389:         matB->ilen[row]=ilenB;
3390:       }
3391:     }
3392:   }

3394:   PetscFree(r_status4);
3395:   PetscFree(r_waits4);
3396:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
3397:   PetscFree(s_waits4);
3398:   PetscFree(s_status4);

3400:   /* Restore the indices */
3401:   ISRestoreIndices(isrow,&irow);
3402:   ISRestoreIndices(iscol,&icol);

3404:   /* Destroy allocated memory */
3405:   PetscFree(rtable);
3406:   PetscFree(w1);
3407:   PetscFree(pa);

3409:   PetscFree(sbuf1);
3410:   PetscFree(rbuf2[0]);
3411:   PetscFree(rbuf2);
3412:   for (i=0; i<nrqr; ++i) {
3413:     PetscFree(sbuf2[i]);
3414:   }
3415:   for (i=0; i<nrqs; ++i) {
3416:     PetscFree(rbuf3[i]);
3417:     PetscFree(rbuf4[i]);
3418:   }

3420:   PetscFree(sbuf2);
3421:   PetscFree(rbuf3);
3422:   PetscFree(rbuf4);
3423:   PetscFree(sbuf3[0]);
3424:   PetscFree(sbuf3);
3425:   PetscFree(sbuf4[0]);
3426:   PetscFree(sbuf4);
3427: 
3428:   PetscFree(cmap);
3429:   PetscFree(rmap);


3432:   MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3433:   MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);


3436:   return(0);
3437: }