Actual source code: vecstash.c

  1: #define PETSCVEC_DLL

 3:  #include vecimpl.h

  5: #define DEFAULT_STASH_SIZE   100

  7: /*
  8:   VecStashCreate_Private - Creates a stash,currently used for all the parallel 
  9:   matrix implementations. The stash is where elements of a matrix destined 
 10:   to be stored on other processors are kept until matrix assembly is done.

 12:   This is a simple minded stash. Simply adds entries to end of stash.

 14:   Input Parameters:
 15:   comm - communicator, required for scatters.
 16:   bs   - stash block size. used when stashing blocks of values

 18:   Output Parameters:
 19:   stash    - the newly created stash
 20: */
 23: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
 24: {
 26:   PetscInt       max,*opt,nopt;
 27:   PetscTruth     flg;

 30:   /* Require 2 tags, get the second using PetscCommGetNewTag() */
 31:   stash->comm = comm;
 32:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 33:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 34:   MPI_Comm_size(stash->comm,&stash->size);
 35:   MPI_Comm_rank(stash->comm,&stash->rank);

 37:   nopt = stash->size;
 38:   PetscMalloc(nopt*sizeof(PetscInt),&opt);
 39:   PetscOptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);
 40:   if (flg) {
 41:     if (nopt == 1)                max = opt[0];
 42:     else if (nopt == stash->size) max = opt[stash->rank];
 43:     else if (stash->rank < nopt)  max = opt[stash->rank];
 44:     else                          max = 0; /* use default */
 45:     stash->umax = max;
 46:   } else {
 47:     stash->umax = 0;
 48:   }
 49:   PetscFree(opt);

 51:   if (bs <= 0) bs = 1;

 53:   stash->bs       = bs;
 54:   stash->nmax     = 0;
 55:   stash->oldnmax  = 0;
 56:   stash->n        = 0;
 57:   stash->reallocs = -1;
 58:   stash->idx      = 0;
 59:   stash->array    = 0;

 61:   stash->send_waits  = 0;
 62:   stash->recv_waits  = 0;
 63:   stash->send_status = 0;
 64:   stash->nsends      = 0;
 65:   stash->nrecvs      = 0;
 66:   stash->svalues     = 0;
 67:   stash->rvalues     = 0;
 68:   stash->rmax        = 0;
 69:   stash->nprocs      = 0;
 70:   stash->nprocessed  = 0;
 71:   stash->donotstash  = PETSC_FALSE;
 72:   return(0);
 73: }

 75: /* 
 76:    VecStashDestroy_Private - Destroy the stash
 77: */
 80: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
 81: {

 85:   if (stash->array) {
 86:     PetscFree(stash->array);
 87:     stash->array = 0;
 88:   }
 89:   if (stash->bowners) {
 90:     PetscFree(stash->bowners);
 91:   }
 92:   return(0);
 93: }

 95: /* 
 96:    VecStashScatterEnd_Private - This is called as the fial stage of
 97:    scatter. The final stages of message passing is done here, and
 98:    all the memory used for message passing is cleanedu up. This
 99:    routine also resets the stash, and deallocates the memory used
100:    for the stash. It also keeps track of the current memory usage
101:    so that the same value can be used the next time through.
102: */
105: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
106: {
108:   PetscInt       nsends=stash->nsends,oldnmax;
109:   MPI_Status     *send_status;

112:   /* wait on sends */
113:   if (nsends) {
114:     PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
115:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
116:     PetscFree(send_status);
117:   }

119:   /* Now update nmaxold to be app 10% more than max n, this way the
120:      wastage of space is reduced the next time this stash is used.
121:      Also update the oldmax, only if it increases */
122:   if (stash->n) {
123:     oldnmax  = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
124:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125:   }

127:   stash->nmax       = 0;
128:   stash->n          = 0;
129:   stash->reallocs   = -1;
130:   stash->rmax       = 0;
131:   stash->nprocessed = 0;

133:   if (stash->array) {
134:     PetscFree(stash->array);
135:     stash->array = 0;
136:     stash->idx   = 0;
137:   }
138:   if (stash->send_waits) {
139:     PetscFree(stash->send_waits);
140:     stash->send_waits = 0;
141:   }
142:   if (stash->recv_waits) {
143:     PetscFree(stash->recv_waits);
144:     stash->recv_waits = 0;
145:   }
146:   if (stash->svalues) {
147:     PetscFree(stash->svalues);
148:   stash->svalues = 0;
149:   }
150:   if (stash->rvalues) {
151:     PetscFree(stash->rvalues);
152:     stash->rvalues = 0;
153:   }
154:   if (stash->nprocs) {
155:     PetscFree(stash->nprocs);
156:     stash->nprocs = 0;
157:   }
158:   return(0);
159: }

161: /* 
162:    VecStashGetInfo_Private - Gets the relavant statistics of the stash

164:    Input Parameters:
165:    stash    - the stash
166:    nstash   - the size of the stash
167:    reallocs - the number of additional mallocs incurred.
168:    
169: */
172: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
173: {

176:   if (nstash)  *nstash   = stash->n*stash->bs;
177:   if (reallocs) {
178:     if (stash->reallocs < 0) *reallocs = 0;
179:     else                     *reallocs = stash->reallocs;
180:   }
181:   return(0);
182: }


185: /* 
186:    VecStashSetInitialSize_Private - Sets the initial size of the stash

188:    Input Parameters:
189:    stash  - the stash
190:    max    - the value that is used as the max size of the stash. 
191:             this value is used while allocating memory. It specifies
192:             the number of vals stored, even with the block-stash
193: */
196: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
197: {
199:   stash->umax = max;
200:   return(0);
201: }

203: /* VecStashExpand_Private - Expand the stash. This function is called
204:    when the space in the stash is not sufficient to add the new values
205:    being inserted into the stash.
206:    
207:    Input Parameters:
208:    stash - the stash
209:    incr  - the minimum increase requested
210:    
211:    Notes: 
212:    This routine doubles the currently used memory. 
213: */
216: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
217: {
219:   PetscInt       *n_idx,newnmax,bs=stash->bs;
220:   PetscScalar    *n_array;

223:   /* allocate a larger stash. */
224:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
225:     if (stash->umax)                  newnmax = stash->umax/bs;
226:     else                              newnmax = DEFAULT_STASH_SIZE/bs;
227:   } else if (!stash->nmax) { /* resuing stash */
228:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
229:     else                              newnmax = stash->oldnmax/bs;
230:   } else                              newnmax = stash->nmax*2;

232:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

234:   PetscMalloc((newnmax)*(sizeof(PetscInt)+bs*sizeof(PetscScalar)),&n_array);
235:   n_idx = (PetscInt*)(n_array + bs*newnmax);
236:   PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
237:   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
238:   if (stash->array) {PetscFree(stash->array);}
239:   stash->array   = n_array;
240:   stash->idx     = n_idx;
241:   stash->nmax    = newnmax;
242:   stash->reallocs++;
243:   return(0);
244: }
245: /*
246:   VecStashScatterBegin_Private - Initiates the transfer of values to the
247:   correct owners. This function goes through the stash, and check the
248:   owners of each stashed value, and sends the values off to the owner
249:   processors.

251:   Input Parameters:
252:   stash  - the stash
253:   owners - an array of size 'no-of-procs' which gives the ownership range
254:            for each node.

256:   Notes: The 'owners' array in the cased of the blocked-stash has the 
257:   ranges specified blocked global indices, and for the regular stash in
258:   the proper global indices.
259: */
262: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
263: {
265:   PetscMPIInt    size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
266:   PetscInt       *owner,*start,*nprocs,nsends,nreceives;
267:   PetscInt       nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
268:   PetscScalar    *rvalues,*svalues;
269:   MPI_Comm       comm = stash->comm;
270:   MPI_Request    *send_waits,*recv_waits;


274:   /*  first count number of contributors to each processor */
275:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
276:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
277:   PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);

279:   j       = 0;
280:   lastidx = -1;
281:   for (i=0; i<stash->n; i++) {
282:     /* if indices are NOT locally sorted, need to start search at the beginning */
283:     if (lastidx > (idx = stash->idx[i])) j = 0;
284:     lastidx = idx;
285:     for (; j<size; j++) {
286:       if (idx >= owners[j] && idx < owners[j+1]) {
287:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
288:       }
289:     }
290:   }
291:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
292: 
293:   /* inform other processors of number of messages and max length*/
294:   PetscMaxSum(comm,nprocs,&nmax,&nreceives);

296:   /* post receives: 
297:      since we don't know how long each individual message is we 
298:      allocate the largest needed buffer for each receive. Potentially 
299:      this is a lot of wasted space.
300:   */
301:   PetscMalloc((nreceives+1)*(nmax+1)*(bs*sizeof(PetscScalar)+sizeof(PetscInt)),&rvalues);
302:   rindices = (PetscInt*)(rvalues + bs*nreceives*nmax);
303:   PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
304:   for (i=0,count=0; i<nreceives; i++) {
305:     MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
306:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
307:   }

309:   /* do sends:
310:       1) starts[i] gives the starting index in svalues for stuff going to 
311:          the ith processor
312:   */
313:   PetscMalloc((stash->n+1)*(bs*sizeof(PetscScalar)+sizeof(PetscInt)),&svalues);
314:   sindices   = (PetscInt*)(svalues + bs*stash->n);
315:   PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
316:   PetscMalloc(size*sizeof(PetscInt),&start);
317:   /* use 2 sends the first with all_v, the next with all_i */
318:   start[0] = 0;
319:   for (i=1; i<size; i++) {
320:     start[i] = start[i-1] + nprocs[2*i-2];
321:   }
322:   for (i=0; i<stash->n; i++) {
323:     j = owner[i];
324:     if (bs == 1) {
325:       svalues[start[j]] = stash->array[i];
326:     } else {
327:       PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
328:     }
329:     sindices[start[j]]  = stash->idx[i];
330:     start[j]++;
331:   }
332:   start[0] = 0;
333:   for (i=1; i<size; i++) { start[i] = start[i-1] + nprocs[2*i-2];}
334:   for (i=0,count=0; i<size; i++) {
335:     if (nprocs[2*i+1]) {
336:       MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
337:       MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
338:     }
339:   }
340:   PetscFree(owner);
341:   PetscFree(start);
342:   /* This memory is reused in scatter end  for a different purpose*/
343:   for (i=0; i<2*size; i++) nprocs[i] = -1;
344:   stash->nprocs      = nprocs;

346:   stash->svalues    = svalues;    stash->rvalues    = rvalues;
347:   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
348:   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
349:   stash->rmax       = nmax;
350:   return(0);
351: }

353: /* 
354:    VecStashScatterGetMesg_Private - This function waits on the receives posted 
355:    in the function VecStashScatterBegin_Private() and returns one message at 
356:    a time to the calling function. If no messages are left, it indicates this
357:    by setting flg = 0, else it sets flg = 1.

359:    Input Parameters:
360:    stash - the stash

362:    Output Parameters:
363:    nvals - the number of entries in the current message.
364:    rows  - an array of row indices (or blocked indices) corresponding to the values
365:    cols  - an array of columnindices (or blocked indices) corresponding to the values
366:    vals  - the values
367:    flg   - 0 indicates no more message left, and the current call has no values associated.
368:            1 indicates that the current call successfully received a message, and the
369:              other output parameters nvals,rows,cols,vals are set appropriately.
370: */
373: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
374: {
376:   PetscMPIInt    i;
377:   PetscInt       *flg_v;
378:   PetscInt       i1,i2,*rindices,bs=stash->bs;
379:   MPI_Status     recv_status;
380:   PetscTruth     match_found = PETSC_FALSE;


384:   *flg = 0; /* When a message is discovered this is reset to 1 */
385:   /* Return if no more messages to process */
386:   if (stash->nprocessed == stash->nrecvs) { return(0); }

388:   flg_v = stash->nprocs;
389:   /* If a matching pair of receieves are found, process them, and return the data to
390:      the calling function. Until then keep receiving messages */
391:   while (!match_found) {
392:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
393:     /* Now pack the received message into a structure which is useable by others */
394:     if (i % 2) {
395:       MPI_Get_count(&recv_status,MPIU_INT,nvals);
396:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
397:     } else {
398:       MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
399:       flg_v[2*recv_status.MPI_SOURCE] = i/2;
400:       *nvals = *nvals/bs;
401:     }
402: 
403:     /* Check if we have both the messages from this proc */
404:     i1 = flg_v[2*recv_status.MPI_SOURCE];
405:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
406:     if (i1 != -1 && i2 != -1) {
407:       rindices    = (PetscInt*)(stash->rvalues + bs*stash->rmax*stash->nrecvs);
408:       *rows       = rindices + i2*stash->rmax;
409:       *vals       = stash->rvalues + i1*bs*stash->rmax;
410:       *flg        = 1;
411:       stash->nprocessed ++;
412:       match_found = PETSC_TRUE;
413:     }
414:   }
415:   return(0);
416: }