Actual source code: matstash.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/matimpl.h

  5: /*
  6:        The input to the stash is ALWAYS in MatScalar precision, and the 
  7:     internal storage and output is also in MatScalar.
  8: */
  9: #define DEFAULT_STASH_SIZE   10000

 11: /*
 12:   MatStashCreate_Private - Creates a stash,currently used for all the parallel 
 13:   matrix implementations. The stash is where elements of a matrix destined 
 14:   to be stored on other processors are kept until matrix assembly is done.

 16:   This is a simple minded stash. Simply adds entries to end of stash.

 18:   Input Parameters:
 19:   comm - communicator, required for scatters.
 20:   bs   - stash block size. used when stashing blocks of values

 22:   Output Parameters:
 23:   stash    - the newly created stash
 24: */
 27: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
 28: {
 30:   PetscInt       max,*opt,nopt;
 31:   PetscTruth     flg;

 34:   /* Require 2 tags,get the second using PetscCommGetNewTag() */
 35:   stash->comm = comm;
 36:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 37:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 38:   MPI_Comm_size(stash->comm,&stash->size);
 39:   MPI_Comm_rank(stash->comm,&stash->rank);

 41:   nopt = stash->size;
 42:   PetscMalloc(nopt*sizeof(PetscInt),&opt);
 43:   PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
 44:   if (flg) {
 45:     if (nopt == 1)                max = opt[0];
 46:     else if (nopt == stash->size) max = opt[stash->rank];
 47:     else if (stash->rank < nopt)  max = opt[stash->rank];
 48:     else                          max = 0; /* Use default */
 49:     stash->umax = max;
 50:   } else {
 51:     stash->umax = 0;
 52:   }
 53:   PetscFree(opt);
 54:   if (bs <= 0) bs = 1;

 56:   stash->bs       = bs;
 57:   stash->nmax     = 0;
 58:   stash->oldnmax  = 0;
 59:   stash->n        = 0;
 60:   stash->reallocs = -1;
 61:   stash->idx      = 0;
 62:   stash->idy      = 0;
 63:   stash->array    = 0;

 65:   stash->send_waits  = 0;
 66:   stash->recv_waits  = 0;
 67:   stash->send_status = 0;
 68:   stash->nsends      = 0;
 69:   stash->nrecvs      = 0;
 70:   stash->svalues     = 0;
 71:   stash->rvalues     = 0;
 72:   stash->rindices    = 0;
 73:   stash->nprocs      = 0;
 74:   stash->nprocessed  = 0;
 75:   return(0);
 76: }

 78: /* 
 79:    MatStashDestroy_Private - Destroy the stash
 80: */
 83: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
 84: {

 88:   if (stash->array) {
 89:     PetscFree(stash->array);
 90:     stash->array = 0;
 91:   }
 92:   return(0);
 93: }

 95: /* 
 96:    MatStashScatterEnd_Private - This is called as the fial stage of
 97:    scatter. The final stages of messagepassing is done here, and
 98:    all the memory used for messagepassing is cleanedu up. This
 99:    routine also resets the stash, and deallocates the memory used
100:    for the stash. It also keeps track of the current memory usage
101:    so that the same value can be used the next time through.
102: */
105: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106: {
108:   int         nsends=stash->nsends,bs2,oldnmax;
109:   MPI_Status  *send_status;

112:   /* wait on sends */
113:   if (nsends) {
114:     PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
115:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
116:     PetscFree(send_status);
117:   }

119:   /* Now update nmaxold to be app 10% more than max n used, this way the
120:      wastage of space is reduced the next time this stash is used.
121:      Also update the oldmax, only if it increases */
122:   if (stash->n) {
123:     bs2      = stash->bs*stash->bs;
124:     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
125:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126:   }

128:   stash->nmax       = 0;
129:   stash->n          = 0;
130:   stash->reallocs   = -1;
131:   stash->nprocessed = 0;

133:   if (stash->array) {
134:     PetscFree(stash->array);
135:     stash->array = 0;
136:     stash->idx   = 0;
137:     stash->idy   = 0;
138:   }
139:   if (stash->send_waits) {
140:     PetscFree(stash->send_waits);
141:     stash->send_waits = 0;
142:   }
143:   if (stash->recv_waits) {
144:     PetscFree(stash->recv_waits);
145:     stash->recv_waits = 0;
146:   }
147:   if (stash->svalues) {
148:     PetscFree(stash->svalues);
149:     stash->svalues = 0;
150:   }
151:   if (stash->rvalues) {
152:     PetscFree(stash->rvalues);
153:     stash->rvalues = 0;
154:   }
155:   if (stash->rindices) {
156:     PetscFree(stash->rindices);
157:     stash->rindices = 0;
158:   }
159:   if (stash->nprocs) {
160:     PetscFree(stash->nprocs);
161:     stash->nprocs = 0;
162:   }

164:   return(0);
165: }

167: /* 
168:    MatStashGetInfo_Private - Gets the relavant statistics of the stash

170:    Input Parameters:
171:    stash    - the stash
172:    nstash   - the size of the stash. Indicates the number of values stored.
173:    reallocs - the number of additional mallocs incurred.
174:    
175: */
178: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
179: {
180:   PetscInt bs2 = stash->bs*stash->bs;

183:   if (nstash) *nstash   = stash->n*bs2;
184:   if (reallocs) {
185:     if (stash->reallocs < 0) *reallocs = 0;
186:     else                     *reallocs = stash->reallocs;
187:   }
188:   return(0);
189: }


192: /* 
193:    MatStashSetInitialSize_Private - Sets the initial size of the stash

195:    Input Parameters:
196:    stash  - the stash
197:    max    - the value that is used as the max size of the stash. 
198:             this value is used while allocating memory.
199: */
202: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
203: {
205:   stash->umax = max;
206:   return(0);
207: }

209: /* MatStashExpand_Private - Expand the stash. This function is called
210:    when the space in the stash is not sufficient to add the new values
211:    being inserted into the stash.
212:    
213:    Input Parameters:
214:    stash - the stash
215:    incr  - the minimum increase requested
216:    
217:    Notes: 
218:    This routine doubles the currently used memory. 
219:  */
222: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
223: {
225:   PetscInt       *n_idx,*n_idy,newnmax,bs2;
226:   MatScalar *n_array;

229:   /* allocate a larger stash */
230:   bs2     = stash->bs*stash->bs;
231:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
232:     if (stash->umax)                  newnmax = stash->umax/bs2;
233:     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
234:   } else if (!stash->nmax) { /* resuing stash */
235:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
236:     else                              newnmax = stash->oldnmax/bs2;
237:   } else                              newnmax = stash->nmax*2;
238:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

240:   PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);
241:   n_idx = (PetscInt*)(n_array + bs2*newnmax);
242:   n_idy = (PetscInt*)(n_idx + newnmax);
243:   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));
244:   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
245:   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));
246:   if (stash->array) {PetscFree(stash->array);}
247:   stash->array   = n_array;
248:   stash->idx     = n_idx;
249:   stash->idy     = n_idy;
250:   stash->nmax    = newnmax;
251:   stash->reallocs++;
252:   return(0);
253: }
254: /*
255:   MatStashValuesRow_Private - inserts values into the stash. This function
256:   expects the values to be roworiented. Multiple columns belong to the same row
257:   can be inserted with a single call to this function.

259:   Input Parameters:
260:   stash  - the stash
261:   row    - the global row correspoiding to the values
262:   n      - the number of elements inserted. All elements belong to the above row.
263:   idxn   - the global column indices corresponding to each of the values.
264:   values - the values inserted
265: */
268: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
269: {
271:   PetscInt i;

274:   /* Check and see if we have sufficient memory */
275:   if ((stash->n + n) > stash->nmax) {
276:     MatStashExpand_Private(stash,n);
277:   }
278:   for (i=0; i<n; i++) {
279:     stash->idx[stash->n]   = row;
280:     stash->idy[stash->n]   = idxn[i];
281:     stash->array[stash->n] = values[i];
282:     stash->n++;
283:   }
284:   return(0);
285: }
286: /*
287:   MatStashValuesCol_Private - inserts values into the stash. This function
288:   expects the values to be columnoriented. Multiple columns belong to the same row
289:   can be inserted with a single call to this function.

291:   Input Parameters:
292:   stash   - the stash
293:   row     - the global row correspoiding to the values
294:   n       - the number of elements inserted. All elements belong to the above row.
295:   idxn    - the global column indices corresponding to each of the values.
296:   values  - the values inserted
297:   stepval - the consecutive values are sepated by a distance of stepval.
298:             this happens because the input is columnoriented.
299: */
302: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
303: {
305:   PetscInt i;

308:   /* Check and see if we have sufficient memory */
309:   if ((stash->n + n) > stash->nmax) {
310:     MatStashExpand_Private(stash,n);
311:   }
312:   for (i=0; i<n; i++) {
313:     stash->idx[stash->n]   = row;
314:     stash->idy[stash->n]   = idxn[i];
315:     stash->array[stash->n] = values[i*stepval];
316:     stash->n++;
317:   }
318:   return(0);
319: }

321: /*
322:   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 
323:   This function expects the values to be roworiented. Multiple columns belong 
324:   to the same block-row can be inserted with a single call to this function.
325:   This function extracts the sub-block of values based on the dimensions of
326:   the original input block, and the row,col values corresponding to the blocks.

328:   Input Parameters:
329:   stash  - the stash
330:   row    - the global block-row correspoiding to the values
331:   n      - the number of elements inserted. All elements belong to the above row.
332:   idxn   - the global block-column indices corresponding to each of the blocks of 
333:            values. Each block is of size bs*bs.
334:   values - the values inserted
335:   rmax   - the number of block-rows in the original block.
336:   cmax   - the number of block-columsn on the original block.
337:   idx    - the index of the current block-row in the original block.
338: */
341: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
342: {
344:   PetscInt i,j,k,bs2,bs=stash->bs;
345:   const MatScalar *vals;
346:   MatScalar       *array;

349:   bs2 = bs*bs;
350:   if ((stash->n+n) > stash->nmax) {
351:     MatStashExpand_Private(stash,n);
352:   }
353:   for (i=0; i<n; i++) {
354:     stash->idx[stash->n]   = row;
355:     stash->idy[stash->n] = idxn[i];
356:     /* Now copy over the block of values. Store the values column oriented.
357:        This enables inserting multiple blocks belonging to a row with a single
358:        funtion call */
359:     array = stash->array + bs2*stash->n;
360:     vals  = values + idx*bs2*n + bs*i;
361:     for (j=0; j<bs; j++) {
362:       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
363:       array += 1;
364:       vals  += cmax*bs;
365:     }
366:     stash->n++;
367:   }
368:   return(0);
369: }

371: /*
372:   MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 
373:   This function expects the values to be roworiented. Multiple columns belong 
374:   to the same block-row can be inserted with a single call to this function.
375:   This function extracts the sub-block of values based on the dimensions of
376:   the original input block, and the row,col values corresponding to the blocks.

378:   Input Parameters:
379:   stash  - the stash
380:   row    - the global block-row correspoiding to the values
381:   n      - the number of elements inserted. All elements belong to the above row.
382:   idxn   - the global block-column indices corresponding to each of the blocks of 
383:            values. Each block is of size bs*bs.
384:   values - the values inserted
385:   rmax   - the number of block-rows in the original block.
386:   cmax   - the number of block-columsn on the original block.
387:   idx    - the index of the current block-row in the original block.
388: */
391: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
392: {
394:   PetscInt i,j,k,bs2,bs=stash->bs;
395:   const MatScalar *vals;
396:   MatScalar       *array;

399:   bs2 = bs*bs;
400:   if ((stash->n+n) > stash->nmax) {
401:     MatStashExpand_Private(stash,n);
402:   }
403:   for (i=0; i<n; i++) {
404:     stash->idx[stash->n]   = row;
405:     stash->idy[stash->n] = idxn[i];
406:     /* Now copy over the block of values. Store the values column oriented.
407:      This enables inserting multiple blocks belonging to a row with a single
408:      funtion call */
409:     array = stash->array + bs2*stash->n;
410:     vals  = values + idx*bs + bs2*rmax*i;
411:     for (j=0; j<bs; j++) {
412:       for (k=0; k<bs; k++) {array[k] = vals[k];}
413:       array += bs;
414:       vals  += rmax*bs;
415:     }
416:     stash->n++;
417:   }
418:   return(0);
419: }
420: /*
421:   MatStashScatterBegin_Private - Initiates the transfer of values to the
422:   correct owners. This function goes through the stash, and check the
423:   owners of each stashed value, and sends the values off to the owner
424:   processors.

426:   Input Parameters:
427:   stash  - the stash
428:   owners - an array of size 'no-of-procs' which gives the ownership range
429:            for each node.

431:   Notes: The 'owners' array in the cased of the blocked-stash has the 
432:   ranges specified blocked global indices, and for the regular stash in
433:   the proper global indices.
434: */
437: PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
438: {
439:   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
440:   PetscInt       size=stash->size,nsends;
442:   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx;
443:   MatScalar      **rvalues,*svalues;
444:   MPI_Comm       comm = stash->comm;
445:   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
446:   PetscMPIInt    *nprocs,*nlengths,nreceives;


450:   bs2   = stash->bs*stash->bs;
451:   /*  first count number of contributors to each processor */
452:   PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);
453:   PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));
454:   PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);

456:   nlengths = nprocs+size;
457:   j        = 0;
458:   lastidx  = -1;
459:   for (i=0; i<stash->n; i++) {
460:     /* if indices are NOT locally sorted, need to start search at the beginning */
461:     if (lastidx > (idx = stash->idx[i])) j = 0;
462:     lastidx = idx;
463:     for (; j<size; j++) {
464:       if (idx >= owners[j] && idx < owners[j+1]) {
465:         nlengths[j]++; owner[i] = j; break;
466:       }
467:     }
468:   }
469:   /* Now check what procs get messages - and compute nsends. */
470:   for (i=0, nsends=0 ; i<size; i++) {
471:     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
472:   }

474:   { int  *onodes,*olengths;
475:   /* Determine the number of messages to expect, their lengths, from from-ids */
476:   PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);
477:   PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
478:   /* since clubbing row,col - lengths are multiplied by 2 */
479:   for (i=0; i<nreceives; i++) olengths[i] *=2;
480:   PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
481:   /* values are size 'bs2' lengths (and remove earlier factor 2 */
482:   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
483:   PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
484:   PetscFree(onodes);
485:   PetscFree(olengths);
486:   }

488:   /* do sends:
489:       1) starts[i] gives the starting index in svalues for stuff going to 
490:          the ith processor
491:   */
492:   PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);
493:   sindices = (PetscInt*)(svalues + bs2*stash->n);
494:   PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
495:   PetscMalloc(2*size*sizeof(PetscInt),&startv);
496:   starti   = startv + size;
497:   /* use 2 sends the first with all_a, the next with all_i and all_j */
498:   startv[0]  = 0; starti[0] = 0;
499:   for (i=1; i<size; i++) {
500:     startv[i] = startv[i-1] + nlengths[i-1];
501:     starti[i] = starti[i-1] + nlengths[i-1]*2;
502:   }
503:   for (i=0; i<stash->n; i++) {
504:     j = owner[i];
505:     if (bs2 == 1) {
506:       svalues[startv[j]]              = stash->array[i];
507:     } else {
508:       PetscInt       k;
509:       MatScalar *buf1,*buf2;
510:       buf1 = svalues+bs2*startv[j];
511:       buf2 = stash->array+bs2*i;
512:       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
513:     }
514:     sindices[starti[j]]               = stash->idx[i];
515:     sindices[starti[j]+nlengths[j]]   = stash->idy[i];
516:     startv[j]++;
517:     starti[j]++;
518:   }
519:   startv[0] = 0;
520:   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}

522:   for (i=0,count=0; i<size; i++) {
523:     if (nprocs[i]) {
524:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
525:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);
526:     }
527:   }
528: #if defined(PETSC_USE_DEBUG)
529:   PetscLogInfo((0,"MatStashScatterBegin_Private: No of messages: %d \n",nsends));
530:   for (i=0; i<size; i++) {
531:     if (nprocs[i]) {
532:       PetscLogInfo((0,"MatStashScatterBegin_Private: Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt)));
533:     }
534:   }
535: #endif
536:   PetscFree(owner);
537:   PetscFree(startv);
538:   /* This memory is reused in scatter end  for a different purpose*/
539:   for (i=0; i<2*size; i++) nprocs[i] = -1;
540:   stash->nprocs      = nprocs;
541: 
542:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
543:   PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);

545:   for (i=0; i<nreceives; i++) {
546:     recv_waits[2*i]   = recv_waits1[i];
547:     recv_waits[2*i+1] = recv_waits2[i];
548:   }
549:   stash->recv_waits = recv_waits;
550:   PetscFree(recv_waits1);
551:   PetscFree(recv_waits2);

553:   stash->svalues    = svalues;    stash->rvalues     = rvalues;
554:   stash->rindices   = rindices;   stash->send_waits  = send_waits;
555:   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
556:   return(0);
557: }

559: /* 
560:    MatStashScatterGetMesg_Private - This function waits on the receives posted 
561:    in the function MatStashScatterBegin_Private() and returns one message at 
562:    a time to the calling function. If no messages are left, it indicates this
563:    by setting flg = 0, else it sets flg = 1.

565:    Input Parameters:
566:    stash - the stash

568:    Output Parameters:
569:    nvals - the number of entries in the current message.
570:    rows  - an array of row indices (or blocked indices) corresponding to the values
571:    cols  - an array of columnindices (or blocked indices) corresponding to the values
572:    vals  - the values
573:    flg   - 0 indicates no more message left, and the current call has no values associated.
574:            1 indicates that the current call successfully received a message, and the
575:              other output parameters nvals,rows,cols,vals are set appropriately.
576: */
579: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
580: {
582:   PetscMPIInt    i,*flg_v,i1,i2;
583:   PetscInt       bs2;
584:   MPI_Status     recv_status;
585:   PetscTruth     match_found = PETSC_FALSE;


589:   *flg = 0; /* When a message is discovered this is reset to 1 */
590:   /* Return if no more messages to process */
591:   if (stash->nprocessed == stash->nrecvs) { return(0); }

593:   flg_v = stash->nprocs;
594:   bs2   = stash->bs*stash->bs;
595:   /* If a matching pair of receieves are found, process them, and return the data to
596:      the calling function. Until then keep receiving messages */
597:   while (!match_found) {
598:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
599:     /* Now pack the received message into a structure which is useable by others */
600:     if (i % 2) {
601:       MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
602:       flg_v[2*recv_status.MPI_SOURCE] = i/2;
603:       *nvals = *nvals/bs2;
604:     } else {
605:       MPI_Get_count(&recv_status,MPIU_INT,nvals);
606:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
607:       *nvals = *nvals/2; /* This message has both row indices and col indices */
608:     }
609: 
610:     /* Check if we have both the messages from this proc */
611:     i1 = flg_v[2*recv_status.MPI_SOURCE];
612:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
613:     if (i1 != -1 && i2 != -1) {
614:       *rows       = stash->rindices[i2];
615:       *cols       = *rows + *nvals;
616:       *vals       = stash->rvalues[i1];
617:       *flg        = 1;
618:       stash->nprocessed ++;
619:       match_found = PETSC_TRUE;
620:     }
621:   }
622:   return(0);
623: }