Actual source code: matstash.c
1: /*$Id: matstash.c,v 1.50 2001/03/23 23:22:45 balay Exp $*/
3: #include src/mat/matimpl.h
5: /*
6: The input to the stash is ALWAYS in MatScalar precision, and the
7: internal storage and output is also in MatScalar.
8: */
9: #define DEFAULT_STASH_SIZE 10000
11: /*
12: MatStashCreate_Private - Creates a stash,currently used for all the parallel
13: matrix implementations. The stash is where elements of a matrix destined
14: to be stored on other processors are kept until matrix assembly is done.
16: This is a simple minded stash. Simply adds entries to end of stash.
18: Input Parameters:
19: comm - communicator, required for scatters.
20: bs - stash block size. used when stashing blocks of values
22: Output Parameters:
23: stash - the newly created stash
24: */
25: int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
26: {
27: int ierr,max,*opt,nopt;
28: PetscTruth flg;
31: /* Require 2 tags,get the second using PetscCommGetNewTag() */
32: stash->comm = comm;
33: PetscCommGetNewTag(stash->comm,&stash->tag1);
34: PetscCommGetNewTag(stash->comm,&stash->tag2);
35: MPI_Comm_size(stash->comm,&stash->size);
36: MPI_Comm_rank(stash->comm,&stash->rank);
38: nopt = stash->size;
39: PetscMalloc(nopt*sizeof(int),&opt);
40: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
41: if (flg) {
42: if (nopt == 1) max = opt[0];
43: else if (nopt == stash->size) max = opt[stash->rank];
44: else if (stash->rank < nopt) max = opt[stash->rank];
45: else max = 0; /* Use default */
46: stash->umax = max;
47: } else {
48: stash->umax = 0;
49: }
50: PetscFree(opt);
51: if (bs <= 0) bs = 1;
53: stash->bs = bs;
54: stash->nmax = 0;
55: stash->oldnmax = 0;
56: stash->n = 0;
57: stash->reallocs = -1;
58: stash->idx = 0;
59: stash->idy = 0;
60: stash->array = 0;
62: stash->send_waits = 0;
63: stash->recv_waits = 0;
64: stash->send_status = 0;
65: stash->nsends = 0;
66: stash->nrecvs = 0;
67: stash->svalues = 0;
68: stash->rvalues = 0;
69: stash->rmax = 0;
70: stash->nprocs = 0;
71: stash->nprocessed = 0;
72: return(0);
73: }
75: /*
76: MatStashDestroy_Private - Destroy the stash
77: */
78: int MatStashDestroy_Private(MatStash *stash)
79: {
83: if (stash->array) {
84: PetscFree(stash->array);
85: stash->array = 0;
86: }
87: return(0);
88: }
90: /*
91: MatStashScatterEnd_Private - This is called as the fial stage of
92: scatter. The final stages of messagepassing is done here, and
93: all the memory used for messagepassing is cleanedu up. This
94: routine also resets the stash, and deallocates the memory used
95: for the stash. It also keeps track of the current memory usage
96: so that the same value can be used the next time through.
97: */
98: int MatStashScatterEnd_Private(MatStash *stash)
99: {
100: int nsends=stash->nsends,ierr,bs2,oldnmax;
101: MPI_Status *send_status;
104: /* wait on sends */
105: if (nsends) {
106: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
107: MPI_Waitall(2*nsends,stash->send_waits,send_status);
108: PetscFree(send_status);
109: }
111: /* Now update nmaxold to be app 10% more than max n used, this way the
112: wastage of space is reduced the next time this stash is used.
113: Also update the oldmax, only if it increases */
114: if (stash->n) {
115: bs2 = stash->bs*stash->bs;
116: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
117: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
118: }
120: stash->nmax = 0;
121: stash->n = 0;
122: stash->reallocs = -1;
123: stash->rmax = 0;
124: stash->nprocessed = 0;
126: if (stash->array) {
127: ierr = PetscFree(stash->array);
128: stash->array = 0;
129: stash->idx = 0;
130: stash->idy = 0;
131: }
132: if (stash->send_waits) {
133: PetscFree(stash->send_waits);
134: stash->send_waits = 0;
135: }
136: if (stash->recv_waits) {
137: PetscFree(stash->recv_waits);
138: stash->recv_waits = 0;
139: }
140: if (stash->svalues) {
141: PetscFree(stash->svalues);
142: stash->svalues = 0;
143: }
144: if (stash->rvalues) {
145: PetscFree(stash->rvalues);
146: stash->rvalues = 0;
147: }
148: if (stash->nprocs) {
149: PetscFree(stash->nprocs);
150: stash->nprocs = 0;
151: }
153: return(0);
154: }
156: /*
157: MatStashGetInfo_Private - Gets the relavant statistics of the stash
159: Input Parameters:
160: stash - the stash
161: nstash - the size of the stash. Indicates the number of values stored.
162: reallocs - the number of additional mallocs incurred.
163:
164: */
165: int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
166: {
167: int bs2 = stash->bs*stash->bs;
170: *nstash = stash->n*bs2;
171: if (stash->reallocs < 0) *reallocs = 0;
172: else *reallocs = stash->reallocs;
173: return(0);
174: }
177: /*
178: MatStashSetInitialSize_Private - Sets the initial size of the stash
180: Input Parameters:
181: stash - the stash
182: max - the value that is used as the max size of the stash.
183: this value is used while allocating memory.
184: */
185: int MatStashSetInitialSize_Private(MatStash *stash,int max)
186: {
188: stash->umax = max;
189: return(0);
190: }
192: /* MatStashExpand_Private - Expand the stash. This function is called
193: when the space in the stash is not sufficient to add the new values
194: being inserted into the stash.
195:
196: Input Parameters:
197: stash - the stash
198: incr - the minimum increase requested
199:
200: Notes:
201: This routine doubles the currently used memory.
202: */
203: static int MatStashExpand_Private(MatStash *stash,int incr)
204: {
205: int *n_idx,*n_idy,newnmax,bs2,ierr;
206: MatScalar *n_array;
209: /* allocate a larger stash */
210: bs2 = stash->bs*stash->bs;
211: if (!stash->oldnmax && !stash->nmax) { /* new stash */
212: if (stash->umax) newnmax = stash->umax/bs2;
213: else newnmax = DEFAULT_STASH_SIZE/bs2;
214: } else if (!stash->nmax) { /* resuing stash */
215: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
216: else newnmax = stash->oldnmax/bs2;
217: } else newnmax = stash->nmax*2;
218: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
220: ierr = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);
221: n_idx = (int*)(n_array + bs2*newnmax);
222: n_idy = (int*)(n_idx + newnmax);
223: ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));
224: ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
225: ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
226: if (stash->array) {PetscFree(stash->array);}
227: stash->array = n_array;
228: stash->idx = n_idx;
229: stash->idy = n_idy;
230: stash->nmax = newnmax;
231: stash->reallocs++;
232: return(0);
233: }
234: /*
235: MatStashValuesRow_Private - inserts values into the stash. This function
236: expects the values to be roworiented. Multiple columns belong to the same row
237: can be inserted with a single call to this function.
239: Input Parameters:
240: stash - the stash
241: row - the global row correspoiding to the values
242: n - the number of elements inserted. All elements belong to the above row.
243: idxn - the global column indices corresponding to each of the values.
244: values - the values inserted
245: */
246: int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values)
247: {
248: int ierr,i;
251: /* Check and see if we have sufficient memory */
252: if ((stash->n + n) > stash->nmax) {
253: MatStashExpand_Private(stash,n);
254: }
255: for (i=0; i<n; i++) {
256: stash->idx[stash->n] = row;
257: stash->idy[stash->n] = idxn[i];
258: stash->array[stash->n] = values[i];
259: stash->n++;
260: }
261: return(0);
262: }
263: /*
264: MatStashValuesCol_Private - inserts values into the stash. This function
265: expects the values to be columnoriented. Multiple columns belong to the same row
266: can be inserted with a single call to this function.
268: Input Parameters:
269: stash - the stash
270: row - the global row correspoiding to the values
271: n - the number of elements inserted. All elements belong to the above row.
272: idxn - the global column indices corresponding to each of the values.
273: values - the values inserted
274: stepval - the consecutive values are sepated by a distance of stepval.
275: this happens because the input is columnoriented.
276: */
277: int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int stepval)
278: {
279: int ierr,i;
282: /* Check and see if we have sufficient memory */
283: if ((stash->n + n) > stash->nmax) {
284: MatStashExpand_Private(stash,n);
285: }
286: for (i=0; i<n; i++) {
287: stash->idx[stash->n] = row;
288: stash->idy[stash->n] = idxn[i];
289: stash->array[stash->n] = values[i*stepval];
290: stash->n++;
291: }
292: return(0);
293: }
295: /*
296: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
297: This function expects the values to be roworiented. Multiple columns belong
298: to the same block-row can be inserted with a single call to this function.
299: This function extracts the sub-block of values based on the dimensions of
300: the original input block, and the row,col values corresponding to the blocks.
302: Input Parameters:
303: stash - the stash
304: row - the global block-row correspoiding to the values
305: n - the number of elements inserted. All elements belong to the above row.
306: idxn - the global block-column indices corresponding to each of the blocks of
307: values. Each block is of size bs*bs.
308: values - the values inserted
309: rmax - the number of block-rows in the original block.
310: cmax - the number of block-columsn on the original block.
311: idx - the index of the current block-row in the original block.
312: */
313: int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
314: {
315: int ierr,i,j,k,bs2,bs=stash->bs;
316: MatScalar *vals,*array;
319: bs2 = bs*bs;
320: if ((stash->n+n) > stash->nmax) {
321: MatStashExpand_Private(stash,n);
322: }
323: for (i=0; i<n; i++) {
324: stash->idx[stash->n] = row;
325: stash->idy[stash->n] = idxn[i];
326: /* Now copy over the block of values. Store the values column oriented.
327: This enables inserting multiple blocks belonging to a row with a single
328: funtion call */
329: array = stash->array + bs2*stash->n;
330: vals = values + idx*bs2*n + bs*i;
331: for (j=0; j<bs; j++) {
332: for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
333: array += 1;
334: vals += cmax*bs;
335: }
336: stash->n++;
337: }
338: return(0);
339: }
341: /*
342: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
343: This function expects the values to be roworiented. Multiple columns belong
344: to the same block-row can be inserted with a single call to this function.
345: This function extracts the sub-block of values based on the dimensions of
346: the original input block, and the row,col values corresponding to the blocks.
348: Input Parameters:
349: stash - the stash
350: row - the global block-row correspoiding to the values
351: n - the number of elements inserted. All elements belong to the above row.
352: idxn - the global block-column indices corresponding to each of the blocks of
353: values. Each block is of size bs*bs.
354: values - the values inserted
355: rmax - the number of block-rows in the original block.
356: cmax - the number of block-columsn on the original block.
357: idx - the index of the current block-row in the original block.
358: */
359: int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
360: {
361: int ierr,i,j,k,bs2,bs=stash->bs;
362: MatScalar *vals,*array;
365: bs2 = bs*bs;
366: if ((stash->n+n) > stash->nmax) {
367: MatStashExpand_Private(stash,n);
368: }
369: for (i=0; i<n; i++) {
370: stash->idx[stash->n] = row;
371: stash->idy[stash->n] = idxn[i];
372: /* Now copy over the block of values. Store the values column oriented.
373: This enables inserting multiple blocks belonging to a row with a single
374: funtion call */
375: array = stash->array + bs2*stash->n;
376: vals = values + idx*bs + bs2*rmax*i;
377: for (j=0; j<bs; j++) {
378: for (k=0; k<bs; k++) {array[k] = vals[k];}
379: array += bs;
380: vals += rmax*bs;
381: }
382: stash->n++;
383: }
384: return(0);
385: }
386: /*
387: MatStashScatterBegin_Private - Initiates the transfer of values to the
388: correct owners. This function goes through the stash, and check the
389: owners of each stashed value, and sends the values off to the owner
390: processors.
392: Input Parameters:
393: stash - the stash
394: owners - an array of size 'no-of-procs' which gives the ownership range
395: for each node.
397: Notes: The 'owners' array in the cased of the blocked-stash has the
398: ranges specified blocked global indices, and for the regular stash in
399: the proper global indices.
400: */
401: int MatStashScatterBegin_Private(MatStash *stash,int *owners)
402: {
403: int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
404: int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
405: int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx;
406: MatScalar *rvalues,*svalues;
407: MPI_Comm comm = stash->comm;
408: MPI_Request *send_waits,*recv_waits;
412: bs2 = stash->bs*stash->bs;
413: /* first count number of contributors to each processor */
414: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
415: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
416: procs = nprocs + size;
417: ierr = PetscMalloc((stash->n+1)*sizeof(int),&owner);
419: for (i=0; i<stash->n; i++) {
420: idx = stash->idx[i];
421: for (j=0; j<size; j++) {
422: if (idx >= owners[j] && idx < owners[j+1]) {
423: nprocs[j]++; procs[j] = 1; owner[i] = j; break;
424: }
425: }
426: }
427: nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];}
428:
429: /* inform other processors of number of messages and max length*/
430: ierr = PetscMalloc(2*size*sizeof(int),&work);
431: ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
432: nmax = work[rank];
433: nreceives = work[size+rank];
434: ierr = PetscFree(work);
435: /* post receives:
436: since we don't know how long each individual message is we
437: allocate the largest needed buffer for each receive. Potentially
438: this is a lot of wasted space.
439: */
440: ierr = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);
441: rindices = (int*)(rvalues + bs2*nreceives*nmax);
442: ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
443: for (i=0,count=0; i<nreceives; i++) {
444: MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
445: recv_waits+count++);
446: MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
447: }
449: /* do sends:
450: 1) starts[i] gives the starting index in svalues for stuff going to
451: the ith processor
452: */
453: ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);
454: sindices = (int*)(svalues + bs2*stash->n);
455: ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
456: ierr = PetscMalloc(2*size*sizeof(int),&startv);
457: starti = startv + size;
458: /* use 2 sends the first with all_a, the next with all_i and all_j */
459: startv[0] = 0; starti[0] = 0;
460: for (i=1; i<size; i++) {
461: startv[i] = startv[i-1] + nprocs[i-1];
462: starti[i] = starti[i-1] + nprocs[i-1]*2;
463: }
464: for (i=0; i<stash->n; i++) {
465: j = owner[i];
466: if (bs2 == 1) {
467: svalues[startv[j]] = stash->array[i];
468: } else {
469: int k;
470: MatScalar *buf1,*buf2;
471: buf1 = svalues+bs2*startv[j];
472: buf2 = stash->array+bs2*i;
473: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
474: }
475: sindices[starti[j]] = stash->idx[i];
476: sindices[starti[j]+nprocs[j]] = stash->idy[i];
477: startv[j]++;
478: starti[j]++;
479: }
480: startv[0] = 0;
481: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[i-1];}
482: for (i=0,count=0; i<size; i++) {
483: if (procs[i]) {
484: MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_MATSCALAR,i,tag1,comm,
485: send_waits+count++);
486: MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm,
487: send_waits+count++);
488: }
489: }
490: PetscFree(owner);
491: PetscFree(startv);
492: /* This memory is reused in scatter end for a different purpose*/
493: for (i=0; i<2*size; i++) nprocs[i] = -1;
494: stash->nprocs = nprocs;
496: stash->svalues = svalues; stash->rvalues = rvalues;
497: stash->nsends = nsends; stash->nrecvs = nreceives;
498: stash->send_waits = send_waits; stash->recv_waits = recv_waits;
499: stash->rmax = nmax;
500: return(0);
501: }
503: /*
504: MatStashScatterGetMesg_Private - This function waits on the receives posted
505: in the function MatStashScatterBegin_Private() and returns one message at
506: a time to the calling function. If no messages are left, it indicates this
507: by setting flg = 0, else it sets flg = 1.
509: Input Parameters:
510: stash - the stash
512: Output Parameters:
513: nvals - the number of entries in the current message.
514: rows - an array of row indices (or blocked indices) corresponding to the values
515: cols - an array of columnindices (or blocked indices) corresponding to the values
516: vals - the values
517: flg - 0 indicates no more message left, and the current call has no values associated.
518: 1 indicates that the current call successfully received a message, and the
519: other output parameters nvals,rows,cols,vals are set appropriately.
520: */
521: int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
522: {
523: int i,ierr,size=stash->size,*flg_v,*flg_i,i1,i2,*rindices,bs2;
524: MPI_Status recv_status;
525: PetscTruth match_found = PETSC_FALSE;
529: *flg = 0; /* When a message is discovered this is reset to 1 */
530: /* Return if no more messages to process */
531: if (stash->nprocessed == stash->nrecvs) { return(0); }
533: flg_v = stash->nprocs;
534: flg_i = flg_v + size;
535: bs2 = stash->bs*stash->bs;
536: /* If a matching pair of receieves are found, process them, and return the data to
537: the calling function. Until then keep receiving messages */
538: while (!match_found) {
539: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
540: /* Now pack the received message into a structure which is useable by others */
541: if (i % 2) {
542: MPI_Get_count(&recv_status,MPI_INT,nvals);
543: flg_i[recv_status.MPI_SOURCE] = i/2;
544: *nvals = *nvals/2; /* This message has both row indices and col indices */
545: } else {
546: MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
547: flg_v[recv_status.MPI_SOURCE] = i/2;
548: *nvals = *nvals/bs2;
549: }
550:
551: /* Check if we have both the messages from this proc */
552: i1 = flg_v[recv_status.MPI_SOURCE];
553: i2 = flg_i[recv_status.MPI_SOURCE];
554: if (i1 != -1 && i2 != -1) {
555: rindices = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
556: *rows = rindices + 2*i2*stash->rmax;
557: *cols = *rows + *nvals;
558: *vals = stash->rvalues + i1*bs2*stash->rmax;
559: *flg = 1;
560: stash->nprocessed ++;
561: match_found = PETSC_TRUE;
562: }
563: }
564: return(0);
565: }