Actual source code: matstash.c

  1: /*$Id: matstash.c,v 1.50 2001/03/23 23:22:45 balay Exp $*/

 3:  #include src/mat/matimpl.h

  5: /*
  6:        The input to the stash is ALWAYS in MatScalar precision, and the 
  7:     internal storage and output is also in MatScalar.
  8: */
  9: #define DEFAULT_STASH_SIZE   10000

 11: /*
 12:   MatStashCreate_Private - Creates a stash,currently used for all the parallel 
 13:   matrix implementations. The stash is where elements of a matrix destined 
 14:   to be stored on other processors are kept until matrix assembly is done.

 16:   This is a simple minded stash. Simply adds entries to end of stash.

 18:   Input Parameters:
 19:   comm - communicator, required for scatters.
 20:   bs   - stash block size. used when stashing blocks of values

 22:   Output Parameters:
 23:   stash    - the newly created stash
 24: */
 25: int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
 26: {
 27:   int        ierr,max,*opt,nopt;
 28:   PetscTruth flg;

 31:   /* Require 2 tags,get the second using PetscCommGetNewTag() */
 32:   stash->comm = comm;
 33:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 34:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 35:   MPI_Comm_size(stash->comm,&stash->size);
 36:   MPI_Comm_rank(stash->comm,&stash->rank);

 38:   nopt = stash->size;
 39:   PetscMalloc(nopt*sizeof(int),&opt);
 40:   PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
 41:   if (flg) {
 42:     if (nopt == 1)                max = opt[0];
 43:     else if (nopt == stash->size) max = opt[stash->rank];
 44:     else if (stash->rank < nopt)  max = opt[stash->rank];
 45:     else                          max = 0; /* Use default */
 46:     stash->umax = max;
 47:   } else {
 48:     stash->umax = 0;
 49:   }
 50:   PetscFree(opt);
 51:   if (bs <= 0) bs = 1;

 53:   stash->bs       = bs;
 54:   stash->nmax     = 0;
 55:   stash->oldnmax  = 0;
 56:   stash->n        = 0;
 57:   stash->reallocs = -1;
 58:   stash->idx      = 0;
 59:   stash->idy      = 0;
 60:   stash->array    = 0;

 62:   stash->send_waits  = 0;
 63:   stash->recv_waits  = 0;
 64:   stash->send_status = 0;
 65:   stash->nsends      = 0;
 66:   stash->nrecvs      = 0;
 67:   stash->svalues     = 0;
 68:   stash->rvalues     = 0;
 69:   stash->rmax        = 0;
 70:   stash->nprocs      = 0;
 71:   stash->nprocessed  = 0;
 72:   return(0);
 73: }

 75: /* 
 76:    MatStashDestroy_Private - Destroy the stash
 77: */
 78: int MatStashDestroy_Private(MatStash *stash)
 79: {

 83:   if (stash->array) {
 84:     PetscFree(stash->array);
 85:     stash->array = 0;
 86:   }
 87:   return(0);
 88: }

 90: /* 
 91:    MatStashScatterEnd_Private - This is called as the fial stage of
 92:    scatter. The final stages of messagepassing is done here, and
 93:    all the memory used for messagepassing is cleanedu up. This
 94:    routine also resets the stash, and deallocates the memory used
 95:    for the stash. It also keeps track of the current memory usage
 96:    so that the same value can be used the next time through.
 97: */
 98: int MatStashScatterEnd_Private(MatStash *stash)
 99: {
100:   int         nsends=stash->nsends,ierr,bs2,oldnmax;
101:   MPI_Status  *send_status;

104:   /* wait on sends */
105:   if (nsends) {
106:     PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
107:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
108:     PetscFree(send_status);
109:   }

111:   /* Now update nmaxold to be app 10% more than max n used, this way the
112:      wastage of space is reduced the next time this stash is used.
113:      Also update the oldmax, only if it increases */
114:   if (stash->n) {
115:     bs2      = stash->bs*stash->bs;
116:     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
117:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
118:   }

120:   stash->nmax       = 0;
121:   stash->n          = 0;
122:   stash->reallocs   = -1;
123:   stash->rmax       = 0;
124:   stash->nprocessed = 0;

126:   if (stash->array) {
127:     ierr         = PetscFree(stash->array);
128:     stash->array = 0;
129:     stash->idx   = 0;
130:     stash->idy   = 0;
131:   }
132:   if (stash->send_waits) {
133:     PetscFree(stash->send_waits);
134:     stash->send_waits = 0;
135:   }
136:   if (stash->recv_waits) {
137:     PetscFree(stash->recv_waits);
138:     stash->recv_waits = 0;
139:   }
140:   if (stash->svalues) {
141:     PetscFree(stash->svalues);
142:     stash->svalues = 0;
143:   }
144:   if (stash->rvalues) {
145:     PetscFree(stash->rvalues);
146:     stash->rvalues = 0;
147:   }
148:   if (stash->nprocs) {
149:     PetscFree(stash->nprocs);
150:     stash->nprocs = 0;
151:   }

153:   return(0);
154: }

156: /* 
157:    MatStashGetInfo_Private - Gets the relavant statistics of the stash

159:    Input Parameters:
160:    stash    - the stash
161:    nstash   - the size of the stash. Indicates the number of values stored.
162:    reallocs - the number of additional mallocs incurred.
163:    
164: */
165: int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
166: {
167:   int bs2 = stash->bs*stash->bs;

170:   *nstash   = stash->n*bs2;
171:   if (stash->reallocs < 0) *reallocs = 0;
172:   else                     *reallocs = stash->reallocs;
173:   return(0);
174: }


177: /* 
178:    MatStashSetInitialSize_Private - Sets the initial size of the stash

180:    Input Parameters:
181:    stash  - the stash
182:    max    - the value that is used as the max size of the stash. 
183:             this value is used while allocating memory.
184: */
185: int MatStashSetInitialSize_Private(MatStash *stash,int max)
186: {
188:   stash->umax = max;
189:   return(0);
190: }

192: /* MatStashExpand_Private - Expand the stash. This function is called
193:    when the space in the stash is not sufficient to add the new values
194:    being inserted into the stash.
195:    
196:    Input Parameters:
197:    stash - the stash
198:    incr  - the minimum increase requested
199:    
200:    Notes: 
201:    This routine doubles the currently used memory. 
202:  */
203: static int MatStashExpand_Private(MatStash *stash,int incr)
204: {
205:   int       *n_idx,*n_idy,newnmax,bs2,ierr;
206:   MatScalar *n_array;

209:   /* allocate a larger stash */
210:   bs2     = stash->bs*stash->bs;
211:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
212:     if (stash->umax)                  newnmax = stash->umax/bs2;
213:     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
214:   } else if (!stash->nmax) { /* resuing stash */
215:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
216:     else                              newnmax = stash->oldnmax/bs2;
217:   } else                              newnmax = stash->nmax*2;
218:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

220:   ierr  = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);
221:   n_idx = (int*)(n_array + bs2*newnmax);
222:   n_idy = (int*)(n_idx + newnmax);
223:   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));
224:   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
225:   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
226:   if (stash->array) {PetscFree(stash->array);}
227:   stash->array   = n_array;
228:   stash->idx     = n_idx;
229:   stash->idy     = n_idy;
230:   stash->nmax    = newnmax;
231:   stash->reallocs++;
232:   return(0);
233: }
234: /*
235:   MatStashValuesRow_Private - inserts values into the stash. This function
236:   expects the values to be roworiented. Multiple columns belong to the same row
237:   can be inserted with a single call to this function.

239:   Input Parameters:
240:   stash  - the stash
241:   row    - the global row correspoiding to the values
242:   n      - the number of elements inserted. All elements belong to the above row.
243:   idxn   - the global column indices corresponding to each of the values.
244:   values - the values inserted
245: */
246: int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values)
247: {
248:   int    ierr,i;

251:   /* Check and see if we have sufficient memory */
252:   if ((stash->n + n) > stash->nmax) {
253:     MatStashExpand_Private(stash,n);
254:   }
255:   for (i=0; i<n; i++) {
256:     stash->idx[stash->n]   = row;
257:     stash->idy[stash->n]   = idxn[i];
258:     stash->array[stash->n] = values[i];
259:     stash->n++;
260:   }
261:   return(0);
262: }
263: /*
264:   MatStashValuesCol_Private - inserts values into the stash. This function
265:   expects the values to be columnoriented. Multiple columns belong to the same row
266:   can be inserted with a single call to this function.

268:   Input Parameters:
269:   stash   - the stash
270:   row     - the global row correspoiding to the values
271:   n       - the number of elements inserted. All elements belong to the above row.
272:   idxn    - the global column indices corresponding to each of the values.
273:   values  - the values inserted
274:   stepval - the consecutive values are sepated by a distance of stepval.
275:             this happens because the input is columnoriented.
276: */
277: int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int stepval)
278: {
279:   int    ierr,i;

282:   /* Check and see if we have sufficient memory */
283:   if ((stash->n + n) > stash->nmax) {
284:     MatStashExpand_Private(stash,n);
285:   }
286:   for (i=0; i<n; i++) {
287:     stash->idx[stash->n]   = row;
288:     stash->idy[stash->n]   = idxn[i];
289:     stash->array[stash->n] = values[i*stepval];
290:     stash->n++;
291:   }
292:   return(0);
293: }

295: /*
296:   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 
297:   This function expects the values to be roworiented. Multiple columns belong 
298:   to the same block-row can be inserted with a single call to this function.
299:   This function extracts the sub-block of values based on the dimensions of
300:   the original input block, and the row,col values corresponding to the blocks.

302:   Input Parameters:
303:   stash  - the stash
304:   row    - the global block-row correspoiding to the values
305:   n      - the number of elements inserted. All elements belong to the above row.
306:   idxn   - the global block-column indices corresponding to each of the blocks of 
307:            values. Each block is of size bs*bs.
308:   values - the values inserted
309:   rmax   - the number of block-rows in the original block.
310:   cmax   - the number of block-columsn on the original block.
311:   idx    - the index of the current block-row in the original block.
312: */
313: int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
314: {
315:   int       ierr,i,j,k,bs2,bs=stash->bs;
316:   MatScalar *vals,*array;

319:   bs2 = bs*bs;
320:   if ((stash->n+n) > stash->nmax) {
321:     MatStashExpand_Private(stash,n);
322:   }
323:   for (i=0; i<n; i++) {
324:     stash->idx[stash->n]   = row;
325:     stash->idy[stash->n] = idxn[i];
326:     /* Now copy over the block of values. Store the values column oriented.
327:        This enables inserting multiple blocks belonging to a row with a single
328:        funtion call */
329:     array = stash->array + bs2*stash->n;
330:     vals  = values + idx*bs2*n + bs*i;
331:     for (j=0; j<bs; j++) {
332:       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
333:       array += 1;
334:       vals  += cmax*bs;
335:     }
336:     stash->n++;
337:   }
338:   return(0);
339: }

341: /*
342:   MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 
343:   This function expects the values to be roworiented. Multiple columns belong 
344:   to the same block-row can be inserted with a single call to this function.
345:   This function extracts the sub-block of values based on the dimensions of
346:   the original input block, and the row,col values corresponding to the blocks.

348:   Input Parameters:
349:   stash  - the stash
350:   row    - the global block-row correspoiding to the values
351:   n      - the number of elements inserted. All elements belong to the above row.
352:   idxn   - the global block-column indices corresponding to each of the blocks of 
353:            values. Each block is of size bs*bs.
354:   values - the values inserted
355:   rmax   - the number of block-rows in the original block.
356:   cmax   - the number of block-columsn on the original block.
357:   idx    - the index of the current block-row in the original block.
358: */
359: int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
360: {
361:   int       ierr,i,j,k,bs2,bs=stash->bs;
362:   MatScalar *vals,*array;

365:   bs2 = bs*bs;
366:   if ((stash->n+n) > stash->nmax) {
367:     MatStashExpand_Private(stash,n);
368:   }
369:   for (i=0; i<n; i++) {
370:     stash->idx[stash->n]   = row;
371:     stash->idy[stash->n] = idxn[i];
372:     /* Now copy over the block of values. Store the values column oriented.
373:      This enables inserting multiple blocks belonging to a row with a single
374:      funtion call */
375:     array = stash->array + bs2*stash->n;
376:     vals  = values + idx*bs + bs2*rmax*i;
377:     for (j=0; j<bs; j++) {
378:       for (k=0; k<bs; k++) {array[k] = vals[k];}
379:       array += bs;
380:       vals  += rmax*bs;
381:     }
382:     stash->n++;
383:   }
384:   return(0);
385: }
386: /*
387:   MatStashScatterBegin_Private - Initiates the transfer of values to the
388:   correct owners. This function goes through the stash, and check the
389:   owners of each stashed value, and sends the values off to the owner
390:   processors.

392:   Input Parameters:
393:   stash  - the stash
394:   owners - an array of size 'no-of-procs' which gives the ownership range
395:            for each node.

397:   Notes: The 'owners' array in the cased of the blocked-stash has the 
398:   ranges specified blocked global indices, and for the regular stash in
399:   the proper global indices.
400: */
401: int MatStashScatterBegin_Private(MatStash *stash,int *owners)
402: {
403:   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
404:   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
405:   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
406:   MatScalar   *rvalues,*svalues;
407:   MPI_Comm    comm = stash->comm;
408:   MPI_Request *send_waits,*recv_waits;


412:   bs2   = stash->bs*stash->bs;
413:   /*  first count number of contributors to each processor */
414:   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);
415:   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));
416:   procs = nprocs + size;
417:   ierr  = PetscMalloc((stash->n+1)*sizeof(int),&owner);

419:   for (i=0; i<stash->n; i++) {
420:     idx = stash->idx[i];
421:     for (j=0; j<size; j++) {
422:       if (idx >= owners[j] && idx < owners[j+1]) {
423:         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
424:       }
425:     }
426:   }
427:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
428: 
429:   /* inform other processors of number of messages and max length*/
430:   ierr      = PetscMalloc(2*size*sizeof(int),&work);
431:   ierr      = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
432:   nmax      = work[rank];
433:   nreceives = work[size+rank];
434:   ierr      = PetscFree(work);
435:   /* post receives: 
436:      since we don't know how long each individual message is we 
437:      allocate the largest needed buffer for each receive. Potentially 
438:      this is a lot of wasted space.
439:   */
440:   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);
441:   rindices = (int*)(rvalues + bs2*nreceives*nmax);
442:   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
443:   for (i=0,count=0; i<nreceives; i++) {
444:     MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
445:                      recv_waits+count++);
446:     MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
447:   }

449:   /* do sends:
450:       1) starts[i] gives the starting index in svalues for stuff going to 
451:          the ith processor
452:   */
453:   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);
454:   sindices = (int*)(svalues + bs2*stash->n);
455:   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
456:   ierr     = PetscMalloc(2*size*sizeof(int),&startv);
457:   starti   = startv + size;
458:   /* use 2 sends the first with all_a, the next with all_i and all_j */
459:   startv[0]  = 0; starti[0] = 0;
460:   for (i=1; i<size; i++) {
461:     startv[i] = startv[i-1] + nprocs[i-1];
462:     starti[i] = starti[i-1] + nprocs[i-1]*2;
463:   }
464:   for (i=0; i<stash->n; i++) {
465:     j = owner[i];
466:     if (bs2 == 1) {
467:       svalues[startv[j]]              = stash->array[i];
468:     } else {
469:       int       k;
470:       MatScalar *buf1,*buf2;
471:       buf1 = svalues+bs2*startv[j];
472:       buf2 = stash->array+bs2*i;
473:       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
474:     }
475:     sindices[starti[j]]             = stash->idx[i];
476:     sindices[starti[j]+nprocs[j]]   = stash->idy[i];
477:     startv[j]++;
478:     starti[j]++;
479:   }
480:   startv[0] = 0;
481:   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[i-1];}
482:   for (i=0,count=0; i<size; i++) {
483:     if (procs[i]) {
484:       MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_MATSCALAR,i,tag1,comm,
485:                        send_waits+count++);
486:       MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
487:                        send_waits+count++);
488:     }
489:   }
490:   PetscFree(owner);
491:   PetscFree(startv);
492:   /* This memory is reused in scatter end  for a different purpose*/
493:   for (i=0; i<2*size; i++) nprocs[i] = -1;
494:   stash->nprocs      = nprocs;

496:   stash->svalues    = svalues;    stash->rvalues    = rvalues;
497:   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
498:   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
499:   stash->rmax       = nmax;
500:   return(0);
501: }

503: /* 
504:    MatStashScatterGetMesg_Private - This function waits on the receives posted 
505:    in the function MatStashScatterBegin_Private() and returns one message at 
506:    a time to the calling function. If no messages are left, it indicates this
507:    by setting flg = 0, else it sets flg = 1.

509:    Input Parameters:
510:    stash - the stash

512:    Output Parameters:
513:    nvals - the number of entries in the current message.
514:    rows  - an array of row indices (or blocked indices) corresponding to the values
515:    cols  - an array of columnindices (or blocked indices) corresponding to the values
516:    vals  - the values
517:    flg   - 0 indicates no more message left, and the current call has no values associated.
518:            1 indicates that the current call successfully received a message, and the
519:              other output parameters nvals,rows,cols,vals are set appropriately.
520: */
521: int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
522: {
523:   int         i,ierr,size=stash->size,*flg_v,*flg_i,i1,i2,*rindices,bs2;
524:   MPI_Status  recv_status;
525:   PetscTruth  match_found = PETSC_FALSE;


529:   *flg = 0; /* When a message is discovered this is reset to 1 */
530:   /* Return if no more messages to process */
531:   if (stash->nprocessed == stash->nrecvs) { return(0); }

533:   flg_v = stash->nprocs;
534:   flg_i = flg_v + size;
535:   bs2   = stash->bs*stash->bs;
536:   /* If a matching pair of receieves are found, process them, and return the data to
537:      the calling function. Until then keep receiving messages */
538:   while (!match_found) {
539:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
540:     /* Now pack the received message into a structure which is useable by others */
541:     if (i % 2) {
542:       MPI_Get_count(&recv_status,MPI_INT,nvals);
543:       flg_i[recv_status.MPI_SOURCE] = i/2;
544:       *nvals = *nvals/2; /* This message has both row indices and col indices */
545:     } else {
546:       MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
547:       flg_v[recv_status.MPI_SOURCE] = i/2;
548:       *nvals = *nvals/bs2;
549:     }
550: 
551:     /* Check if we have both the messages from this proc */
552:     i1 = flg_v[recv_status.MPI_SOURCE];
553:     i2 = flg_i[recv_status.MPI_SOURCE];
554:     if (i1 != -1 && i2 != -1) {
555:       rindices    = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
556:       *rows       = rindices + 2*i2*stash->rmax;
557:       *cols       = *rows + *nvals;
558:       *vals       = stash->rvalues + i1*bs2*stash->rmax;
559:       *flg        = 1;
560:       stash->nprocessed ++;
561:       match_found = PETSC_TRUE;
562:     }
563:   }
564:   return(0);
565: }