Actual source code: vecstash.c
1: /*$Id: vecstash.c,v 1.29 2001/09/07 20:08:55 bsmith Exp $*/
3: #include src/vec/vecimpl.h
5: #define DEFAULT_STASH_SIZE 100
7: /*
8: VecStashCreate_Private - Creates a stash,currently used for all the parallel
9: matrix implementations. The stash is where elements of a matrix destined
10: to be stored on other processors are kept until matrix assembly is done.
12: This is a simple minded stash. Simply adds entries to end of stash.
14: Input Parameters:
15: comm - communicator, required for scatters.
16: bs - stash block size. used when stashing blocks of values
18: Output Parameters:
19: stash - the newly created stash
20: */
21: int VecStashCreate_Private(MPI_Comm comm,int bs,VecStash *stash)
22: {
23: int ierr,max,*opt,nopt;
24: PetscTruth flg;
27: /* Require 2 tags, get the second using PetscCommGetNewTag() */
28: stash->comm = comm;
29: PetscCommGetNewTag(stash->comm,&stash->tag1);
30: PetscCommGetNewTag(stash->comm,&stash->tag2);
31: MPI_Comm_size(stash->comm,&stash->size);
32: MPI_Comm_rank(stash->comm,&stash->rank);
34: nopt = stash->size;
35: PetscMalloc(nopt*sizeof(int),&opt);
36: PetscOptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);
37: if (flg) {
38: if (nopt == 1) max = opt[0];
39: else if (nopt == stash->size) max = opt[stash->rank];
40: else if (stash->rank < nopt) max = opt[stash->rank];
41: else max = 0; /* use default */
42: stash->umax = max;
43: } else {
44: stash->umax = 0;
45: }
46: PetscFree(opt);
48: if (bs <= 0) bs = 1;
50: stash->bs = bs;
51: stash->nmax = 0;
52: stash->oldnmax = 0;
53: stash->n = 0;
54: stash->reallocs = -1;
55: stash->idx = 0;
56: stash->array = 0;
58: stash->send_waits = 0;
59: stash->recv_waits = 0;
60: stash->send_status = 0;
61: stash->nsends = 0;
62: stash->nrecvs = 0;
63: stash->svalues = 0;
64: stash->rvalues = 0;
65: stash->rmax = 0;
66: stash->nprocs = 0;
67: stash->nprocessed = 0;
68: stash->donotstash = PETSC_FALSE;
69: return(0);
70: }
72: /*
73: VecStashDestroy_Private - Destroy the stash
74: */
75: int VecStashDestroy_Private(VecStash *stash)
76: {
80: if (stash->array) {
81: PetscFree(stash->array);
82: stash->array = 0;
83: }
84: if (stash->bowners) {
85: PetscFree(stash->bowners);
86: }
87: return(0);
88: }
90: /*
91: VecStashScatterEnd_Private - This is called as the fial stage of
92: scatter. The final stages of message passing is done here, and
93: all the memory used for message passing is cleanedu up. This
94: routine also resets the stash, and deallocates the memory used
95: for the stash. It also keeps track of the current memory usage
96: so that the same value can be used the next time through.
97: */
98: int VecStashScatterEnd_Private(VecStash *stash)
99: {
100: int nsends=stash->nsends,ierr,oldnmax;
101: MPI_Status *send_status;
104: /* wait on sends */
105: if (nsends) {
106: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
107: MPI_Waitall(2*nsends,stash->send_waits,send_status);
108: PetscFree(send_status);
109: }
111: /* Now update nmaxold to be app 10% more than max n, this way the
112: wastage of space is reduced the next time this stash is used.
113: Also update the oldmax, only if it increases */
114: if (stash->n) {
115: oldnmax = ((int)(stash->n * 1.1) + 5)*stash->bs;
116: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
117: }
119: stash->nmax = 0;
120: stash->n = 0;
121: stash->reallocs = -1;
122: stash->rmax = 0;
123: stash->nprocessed = 0;
125: if (stash->array) {
126: PetscFree(stash->array);
127: stash->array = 0;
128: stash->idx = 0;
129: }
130: if (stash->send_waits) {
131: PetscFree(stash->send_waits);
132: stash->send_waits = 0;
133: }
134: if (stash->recv_waits) {
135: PetscFree(stash->recv_waits);
136: stash->recv_waits = 0;
137: }
138: if (stash->svalues) {
139: PetscFree(stash->svalues);
140: stash->svalues = 0;
141: }
142: if (stash->rvalues) {
143: PetscFree(stash->rvalues);
144: stash->rvalues = 0;
145: }
146: if (stash->nprocs) {
147: PetscFree(stash->nprocs);
148: stash->nprocs = 0;
149: }
151: return(0);
152: }
154: /*
155: VecStashGetInfo_Private - Gets the relavant statistics of the stash
157: Input Parameters:
158: stash - the stash
159: nstash - the size of the stash
160: reallocs - the number of additional mallocs incurred.
161:
162: */
163: int VecStashGetInfo_Private(VecStash *stash,int *nstash,int *reallocs)
164: {
167: *nstash = stash->n*stash->bs;
168: if (stash->reallocs < 0) *reallocs = 0;
169: else *reallocs = stash->reallocs;
171: return(0);
172: }
175: /*
176: VecStashSetInitialSize_Private - Sets the initial size of the stash
178: Input Parameters:
179: stash - the stash
180: max - the value that is used as the max size of the stash.
181: this value is used while allocating memory. It specifies
182: the number of vals stored, even with the block-stash
183: */
184: int VecStashSetInitialSize_Private(VecStash *stash,int max)
185: {
187: stash->umax = max;
188: return(0);
189: }
191: /* VecStashExpand_Private - Expand the stash. This function is called
192: when the space in the stash is not sufficient to add the new values
193: being inserted into the stash.
194:
195: Input Parameters:
196: stash - the stash
197: incr - the minimum increase requested
198:
199: Notes:
200: This routine doubles the currently used memory.
201: */
202: int VecStashExpand_Private(VecStash *stash,int incr)
203: {
204: int *n_idx,newnmax,bs=stash->bs,ierr;
205: PetscScalar *n_array;
208: /* allocate a larger stash. */
209: if (!stash->oldnmax && !stash->nmax) { /* new stash */
210: if (stash->umax) newnmax = stash->umax/bs;
211: else newnmax = DEFAULT_STASH_SIZE/bs;
212: } else if (!stash->nmax) { /* resuing stash */
213: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
214: else newnmax = stash->oldnmax/bs;
215: } else newnmax = stash->nmax*2;
217: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
219: ierr = PetscMalloc((newnmax)*(sizeof(int)+bs*sizeof(PetscScalar)),&n_array);
220: n_idx = (int*)(n_array + bs*newnmax);
221: ierr = PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
222: ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
223: if (stash->array) {PetscFree(stash->array);}
224: stash->array = n_array;
225: stash->idx = n_idx;
226: stash->nmax = newnmax;
227: stash->reallocs++;
228: return(0);
229: }
230: /*
231: VecStashScatterBegin_Private - Initiates the transfer of values to the
232: correct owners. This function goes through the stash, and check the
233: owners of each stashed value, and sends the values off to the owner
234: processors.
236: Input Parameters:
237: stash - the stash
238: owners - an array of size 'no-of-procs' which gives the ownership range
239: for each node.
241: Notes: The 'owners' array in the cased of the blocked-stash has the
242: ranges specified blocked global indices, and for the regular stash in
243: the proper global indices.
244: */
245: int VecStashScatterBegin_Private(VecStash *stash,int *owners)
246: {
247: int *owner,*start,tag1=stash->tag1,tag2=stash->tag2;
248: int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
249: int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx,bs=stash->bs;
250: PetscScalar *rvalues,*svalues;
251: MPI_Comm comm = stash->comm;
252: MPI_Request *send_waits,*recv_waits;
256: /* first count number of contributors to each processor */
257: PetscMalloc(2*size*sizeof(int),&nprocs);
258: procs = nprocs + size;
259: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
260: ierr = PetscMalloc((stash->n+1)*sizeof(int),&owner);
262: for (i=0; i<stash->n; i++) {
263: idx = stash->idx[i];
264: for (j=0; j<size; j++) {
265: if (idx >= owners[j] && idx < owners[j+1]) {
266: nprocs[j]++; procs[j] = 1; owner[i] = j; break;
267: }
268: }
269: }
270: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
271:
272: /* inform other processors of number of messages and max length*/
273: ierr = PetscMalloc(2*size*sizeof(int),&work);
274: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
275: nmax = work[rank];
276: nreceives = work[size+rank];
277: ierr = PetscFree(work);
278: /* post receives:
279: since we don't know how long each individual message is we
280: allocate the largest needed buffer for each receive. Potentially
281: this is a lot of wasted space.
282: */
283: ierr = PetscMalloc((nreceives+1)*(nmax+1)*(bs*sizeof(PetscScalar)+sizeof(int)),&rvalues);
284: rindices = (int*)(rvalues + bs*nreceives*nmax);
285: ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
286: for (i=0,count=0; i<nreceives; i++) {
287: MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
288: MPI_Irecv(rindices+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
289: }
291: /* do sends:
292: 1) starts[i] gives the starting index in svalues for stuff going to
293: the ith processor
294: */
295: ierr = PetscMalloc((stash->n+1)*(bs*sizeof(PetscScalar)+sizeof(int)),&svalues);
296: sindices = (int*)(svalues + bs*stash->n);
297: PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
298: PetscMalloc(size*sizeof(int),&start);
299: /* use 2 sends the first with all_v, the next with all_i */
300: start[0] = 0;
301: for (i=1; i<size; i++) {
302: start[i] = start[i-1] + nprocs[i-1];
303: }
304: for (i=0; i<stash->n; i++) {
305: j = owner[i];
306: if (bs == 1) {
307: svalues[start[j]] = stash->array[i];
308: } else {
309: PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
310: }
311: sindices[start[j]] = stash->idx[i];
312: start[j]++;
313: }
314: start[0] = 0;
315: for (i=1; i<size; i++) { start[i] = start[i-1] + nprocs[i-1];}
316: for (i=0,count=0; i<size; i++) {
317: if (procs[i]) {
318: MPI_Isend(svalues+bs*start[i],bs*nprocs[i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
319: MPI_Isend(sindices+start[i],nprocs[i],MPI_INT,i,tag2,comm,send_waits+count++);
320: }
321: }
322: PetscFree(owner);
323: PetscFree(start);
324: /* This memory is reused in scatter end for a different purpose*/
325: for (i=0; i<2*size; i++) nprocs[i] = -1;
326: stash->nprocs = nprocs;
328: stash->svalues = svalues; stash->rvalues = rvalues;
329: stash->nsends = nsends; stash->nrecvs = nreceives;
330: stash->send_waits = send_waits; stash->recv_waits = recv_waits;
331: stash->rmax = nmax;
332: return(0);
333: }
335: /*
336: VecStashScatterGetMesg_Private - This function waits on the receives posted
337: in the function VecStashScatterBegin_Private() and returns one message at
338: a time to the calling function. If no messages are left, it indicates this
339: by setting flg = 0, else it sets flg = 1.
341: Input Parameters:
342: stash - the stash
344: Output Parameters:
345: nvals - the number of entries in the current message.
346: rows - an array of row indices (or blocked indices) corresponding to the values
347: cols - an array of columnindices (or blocked indices) corresponding to the values
348: vals - the values
349: flg - 0 indicates no more message left, and the current call has no values associated.
350: 1 indicates that the current call successfully received a message, and the
351: other output parameters nvals,rows,cols,vals are set appropriately.
352: */
353: int VecStashScatterGetMesg_Private(VecStash *stash,int *nvals,int **rows,PetscScalar **vals,int *flg)
354: {
355: int i,ierr,size=stash->size,*flg_v,*flg_i;
356: int i1,i2,*rindices,bs=stash->bs;
357: MPI_Status recv_status;
358: PetscTruth match_found = PETSC_FALSE;
362: *flg = 0; /* When a message is discovered this is reset to 1 */
363: /* Return if no more messages to process */
364: if (stash->nprocessed == stash->nrecvs) { return(0); }
366: flg_v = stash->nprocs;
367: flg_i = flg_v + size;
368: /* If a matching pair of receieves are found, process them, and return the data to
369: the calling function. Until then keep receiving messages */
370: while (!match_found) {
371: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
372: /* Now pack the received message into a structure which is useable by others */
373: if (i % 2) {
374: MPI_Get_count(&recv_status,MPI_INT,nvals);
375: flg_i[recv_status.MPI_SOURCE] = i/2;
376: } else {
377: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
378: flg_v[recv_status.MPI_SOURCE] = i/2;
379: *nvals = *nvals/bs;
380: }
381:
382: /* Check if we have both the messages from this proc */
383: i1 = flg_v[recv_status.MPI_SOURCE];
384: i2 = flg_i[recv_status.MPI_SOURCE];
385: if (i1 != -1 && i2 != -1) {
386: rindices = (int*)(stash->rvalues + bs*stash->rmax*stash->nrecvs);
387: *rows = rindices + i2*stash->rmax;
388: *vals = stash->rvalues + i1*bs*stash->rmax;
389: *flg = 1;
390: stash->nprocessed ++;
391: match_found = PETSC_TRUE;
392: }
393: }
394: return(0);
395: }