Actual source code: matstash.c
1: #define PETSCMAT_DLL
3: #include src/mat/matimpl.h
5: /*
6: The input to the stash is ALWAYS in MatScalar precision, and the
7: internal storage and output is also in MatScalar.
8: */
9: #define DEFAULT_STASH_SIZE 10000
11: /*
12: MatStashCreate_Private - Creates a stash,currently used for all the parallel
13: matrix implementations. The stash is where elements of a matrix destined
14: to be stored on other processors are kept until matrix assembly is done.
16: This is a simple minded stash. Simply adds entries to end of stash.
18: Input Parameters:
19: comm - communicator, required for scatters.
20: bs - stash block size. used when stashing blocks of values
22: Output Parameters:
23: stash - the newly created stash
24: */
27: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
28: {
30: PetscInt max,*opt,nopt;
31: PetscTruth flg;
34: /* Require 2 tags,get the second using PetscCommGetNewTag() */
35: stash->comm = comm;
36: PetscCommGetNewTag(stash->comm,&stash->tag1);
37: PetscCommGetNewTag(stash->comm,&stash->tag2);
38: MPI_Comm_size(stash->comm,&stash->size);
39: MPI_Comm_rank(stash->comm,&stash->rank);
41: nopt = stash->size;
42: PetscMalloc(nopt*sizeof(PetscInt),&opt);
43: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
44: if (flg) {
45: if (nopt == 1) max = opt[0];
46: else if (nopt == stash->size) max = opt[stash->rank];
47: else if (stash->rank < nopt) max = opt[stash->rank];
48: else max = 0; /* Use default */
49: stash->umax = max;
50: } else {
51: stash->umax = 0;
52: }
53: PetscFree(opt);
54: if (bs <= 0) bs = 1;
56: stash->bs = bs;
57: stash->nmax = 0;
58: stash->oldnmax = 0;
59: stash->n = 0;
60: stash->reallocs = -1;
61: stash->idx = 0;
62: stash->idy = 0;
63: stash->array = 0;
65: stash->send_waits = 0;
66: stash->recv_waits = 0;
67: stash->send_status = 0;
68: stash->nsends = 0;
69: stash->nrecvs = 0;
70: stash->svalues = 0;
71: stash->rvalues = 0;
72: stash->rindices = 0;
73: stash->nprocs = 0;
74: stash->nprocessed = 0;
75: return(0);
76: }
78: /*
79: MatStashDestroy_Private - Destroy the stash
80: */
83: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
84: {
88: if (stash->array) {
89: PetscFree(stash->array);
90: stash->array = 0;
91: }
92: return(0);
93: }
95: /*
96: MatStashScatterEnd_Private - This is called as the fial stage of
97: scatter. The final stages of messagepassing is done here, and
98: all the memory used for messagepassing is cleanedu up. This
99: routine also resets the stash, and deallocates the memory used
100: for the stash. It also keeps track of the current memory usage
101: so that the same value can be used the next time through.
102: */
105: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
106: {
108: int nsends=stash->nsends,bs2,oldnmax;
109: MPI_Status *send_status;
112: /* wait on sends */
113: if (nsends) {
114: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
115: MPI_Waitall(2*nsends,stash->send_waits,send_status);
116: PetscFree(send_status);
117: }
119: /* Now update nmaxold to be app 10% more than max n used, this way the
120: wastage of space is reduced the next time this stash is used.
121: Also update the oldmax, only if it increases */
122: if (stash->n) {
123: bs2 = stash->bs*stash->bs;
124: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
125: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126: }
128: stash->nmax = 0;
129: stash->n = 0;
130: stash->reallocs = -1;
131: stash->nprocessed = 0;
133: if (stash->array) {
134: PetscFree(stash->array);
135: stash->array = 0;
136: stash->idx = 0;
137: stash->idy = 0;
138: }
139: if (stash->send_waits) {
140: PetscFree(stash->send_waits);
141: stash->send_waits = 0;
142: }
143: if (stash->recv_waits) {
144: PetscFree(stash->recv_waits);
145: stash->recv_waits = 0;
146: }
147: if (stash->svalues) {
148: PetscFree(stash->svalues);
149: stash->svalues = 0;
150: }
151: if (stash->rvalues) {
152: PetscFree(stash->rvalues);
153: stash->rvalues = 0;
154: }
155: if (stash->rindices) {
156: PetscFree(stash->rindices);
157: stash->rindices = 0;
158: }
159: if (stash->nprocs) {
160: PetscFree(stash->nprocs);
161: stash->nprocs = 0;
162: }
164: return(0);
165: }
167: /*
168: MatStashGetInfo_Private - Gets the relavant statistics of the stash
170: Input Parameters:
171: stash - the stash
172: nstash - the size of the stash. Indicates the number of values stored.
173: reallocs - the number of additional mallocs incurred.
174:
175: */
178: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
179: {
180: PetscInt bs2 = stash->bs*stash->bs;
183: if (nstash) *nstash = stash->n*bs2;
184: if (reallocs) {
185: if (stash->reallocs < 0) *reallocs = 0;
186: else *reallocs = stash->reallocs;
187: }
188: return(0);
189: }
192: /*
193: MatStashSetInitialSize_Private - Sets the initial size of the stash
195: Input Parameters:
196: stash - the stash
197: max - the value that is used as the max size of the stash.
198: this value is used while allocating memory.
199: */
202: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
203: {
205: stash->umax = max;
206: return(0);
207: }
209: /* MatStashExpand_Private - Expand the stash. This function is called
210: when the space in the stash is not sufficient to add the new values
211: being inserted into the stash.
212:
213: Input Parameters:
214: stash - the stash
215: incr - the minimum increase requested
216:
217: Notes:
218: This routine doubles the currently used memory.
219: */
222: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
223: {
225: PetscInt *n_idx,*n_idy,newnmax,bs2;
226: MatScalar *n_array;
229: /* allocate a larger stash */
230: bs2 = stash->bs*stash->bs;
231: if (!stash->oldnmax && !stash->nmax) { /* new stash */
232: if (stash->umax) newnmax = stash->umax/bs2;
233: else newnmax = DEFAULT_STASH_SIZE/bs2;
234: } else if (!stash->nmax) { /* resuing stash */
235: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
236: else newnmax = stash->oldnmax/bs2;
237: } else newnmax = stash->nmax*2;
238: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
240: PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);
241: n_idx = (PetscInt*)(n_array + bs2*newnmax);
242: n_idy = (PetscInt*)(n_idx + newnmax);
243: PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));
244: PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
245: PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));
246: if (stash->array) {PetscFree(stash->array);}
247: stash->array = n_array;
248: stash->idx = n_idx;
249: stash->idy = n_idy;
250: stash->nmax = newnmax;
251: stash->reallocs++;
252: return(0);
253: }
254: /*
255: MatStashValuesRow_Private - inserts values into the stash. This function
256: expects the values to be roworiented. Multiple columns belong to the same row
257: can be inserted with a single call to this function.
259: Input Parameters:
260: stash - the stash
261: row - the global row correspoiding to the values
262: n - the number of elements inserted. All elements belong to the above row.
263: idxn - the global column indices corresponding to each of the values.
264: values - the values inserted
265: */
268: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
269: {
271: PetscInt i;
274: /* Check and see if we have sufficient memory */
275: if ((stash->n + n) > stash->nmax) {
276: MatStashExpand_Private(stash,n);
277: }
278: for (i=0; i<n; i++) {
279: stash->idx[stash->n] = row;
280: stash->idy[stash->n] = idxn[i];
281: stash->array[stash->n] = values[i];
282: stash->n++;
283: }
284: return(0);
285: }
286: /*
287: MatStashValuesCol_Private - inserts values into the stash. This function
288: expects the values to be columnoriented. Multiple columns belong to the same row
289: can be inserted with a single call to this function.
291: Input Parameters:
292: stash - the stash
293: row - the global row correspoiding to the values
294: n - the number of elements inserted. All elements belong to the above row.
295: idxn - the global column indices corresponding to each of the values.
296: values - the values inserted
297: stepval - the consecutive values are sepated by a distance of stepval.
298: this happens because the input is columnoriented.
299: */
302: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
303: {
305: PetscInt i;
308: /* Check and see if we have sufficient memory */
309: if ((stash->n + n) > stash->nmax) {
310: MatStashExpand_Private(stash,n);
311: }
312: for (i=0; i<n; i++) {
313: stash->idx[stash->n] = row;
314: stash->idy[stash->n] = idxn[i];
315: stash->array[stash->n] = values[i*stepval];
316: stash->n++;
317: }
318: return(0);
319: }
321: /*
322: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
323: This function expects the values to be roworiented. Multiple columns belong
324: to the same block-row can be inserted with a single call to this function.
325: This function extracts the sub-block of values based on the dimensions of
326: the original input block, and the row,col values corresponding to the blocks.
328: Input Parameters:
329: stash - the stash
330: row - the global block-row correspoiding to the values
331: n - the number of elements inserted. All elements belong to the above row.
332: idxn - the global block-column indices corresponding to each of the blocks of
333: values. Each block is of size bs*bs.
334: values - the values inserted
335: rmax - the number of block-rows in the original block.
336: cmax - the number of block-columsn on the original block.
337: idx - the index of the current block-row in the original block.
338: */
341: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
342: {
344: PetscInt i,j,k,bs2,bs=stash->bs;
345: const MatScalar *vals;
346: MatScalar *array;
349: bs2 = bs*bs;
350: if ((stash->n+n) > stash->nmax) {
351: MatStashExpand_Private(stash,n);
352: }
353: for (i=0; i<n; i++) {
354: stash->idx[stash->n] = row;
355: stash->idy[stash->n] = idxn[i];
356: /* Now copy over the block of values. Store the values column oriented.
357: This enables inserting multiple blocks belonging to a row with a single
358: funtion call */
359: array = stash->array + bs2*stash->n;
360: vals = values + idx*bs2*n + bs*i;
361: for (j=0; j<bs; j++) {
362: for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
363: array += 1;
364: vals += cmax*bs;
365: }
366: stash->n++;
367: }
368: return(0);
369: }
371: /*
372: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
373: This function expects the values to be roworiented. Multiple columns belong
374: to the same block-row can be inserted with a single call to this function.
375: This function extracts the sub-block of values based on the dimensions of
376: the original input block, and the row,col values corresponding to the blocks.
378: Input Parameters:
379: stash - the stash
380: row - the global block-row correspoiding to the values
381: n - the number of elements inserted. All elements belong to the above row.
382: idxn - the global block-column indices corresponding to each of the blocks of
383: values. Each block is of size bs*bs.
384: values - the values inserted
385: rmax - the number of block-rows in the original block.
386: cmax - the number of block-columsn on the original block.
387: idx - the index of the current block-row in the original block.
388: */
391: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
392: {
394: PetscInt i,j,k,bs2,bs=stash->bs;
395: const MatScalar *vals;
396: MatScalar *array;
399: bs2 = bs*bs;
400: if ((stash->n+n) > stash->nmax) {
401: MatStashExpand_Private(stash,n);
402: }
403: for (i=0; i<n; i++) {
404: stash->idx[stash->n] = row;
405: stash->idy[stash->n] = idxn[i];
406: /* Now copy over the block of values. Store the values column oriented.
407: This enables inserting multiple blocks belonging to a row with a single
408: funtion call */
409: array = stash->array + bs2*stash->n;
410: vals = values + idx*bs + bs2*rmax*i;
411: for (j=0; j<bs; j++) {
412: for (k=0; k<bs; k++) {array[k] = vals[k];}
413: array += bs;
414: vals += rmax*bs;
415: }
416: stash->n++;
417: }
418: return(0);
419: }
420: /*
421: MatStashScatterBegin_Private - Initiates the transfer of values to the
422: correct owners. This function goes through the stash, and check the
423: owners of each stashed value, and sends the values off to the owner
424: processors.
426: Input Parameters:
427: stash - the stash
428: owners - an array of size 'no-of-procs' which gives the ownership range
429: for each node.
431: Notes: The 'owners' array in the cased of the blocked-stash has the
432: ranges specified blocked global indices, and for the regular stash in
433: the proper global indices.
434: */
437: PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
438: {
439: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
440: PetscInt size=stash->size,nsends;
442: PetscInt count,*sindices,**rindices,i,j,idx,lastidx;
443: MatScalar **rvalues,*svalues;
444: MPI_Comm comm = stash->comm;
445: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
446: PetscMPIInt *nprocs,*nlengths,nreceives;
450: bs2 = stash->bs*stash->bs;
451: /* first count number of contributors to each processor */
452: PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);
453: PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));
454: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
456: nlengths = nprocs+size;
457: j = 0;
458: lastidx = -1;
459: for (i=0; i<stash->n; i++) {
460: /* if indices are NOT locally sorted, need to start search at the beginning */
461: if (lastidx > (idx = stash->idx[i])) j = 0;
462: lastidx = idx;
463: for (; j<size; j++) {
464: if (idx >= owners[j] && idx < owners[j+1]) {
465: nlengths[j]++; owner[i] = j; break;
466: }
467: }
468: }
469: /* Now check what procs get messages - and compute nsends. */
470: for (i=0, nsends=0 ; i<size; i++) {
471: if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
472: }
474: { int *onodes,*olengths;
475: /* Determine the number of messages to expect, their lengths, from from-ids */
476: PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);
477: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
478: /* since clubbing row,col - lengths are multiplied by 2 */
479: for (i=0; i<nreceives; i++) olengths[i] *=2;
480: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
481: /* values are size 'bs2' lengths (and remove earlier factor 2 */
482: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
483: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
484: PetscFree(onodes);
485: PetscFree(olengths);
486: }
488: /* do sends:
489: 1) starts[i] gives the starting index in svalues for stuff going to
490: the ith processor
491: */
492: PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);
493: sindices = (PetscInt*)(svalues + bs2*stash->n);
494: PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
495: PetscMalloc(2*size*sizeof(PetscInt),&startv);
496: starti = startv + size;
497: /* use 2 sends the first with all_a, the next with all_i and all_j */
498: startv[0] = 0; starti[0] = 0;
499: for (i=1; i<size; i++) {
500: startv[i] = startv[i-1] + nlengths[i-1];
501: starti[i] = starti[i-1] + nlengths[i-1]*2;
502: }
503: for (i=0; i<stash->n; i++) {
504: j = owner[i];
505: if (bs2 == 1) {
506: svalues[startv[j]] = stash->array[i];
507: } else {
508: PetscInt k;
509: MatScalar *buf1,*buf2;
510: buf1 = svalues+bs2*startv[j];
511: buf2 = stash->array+bs2*i;
512: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
513: }
514: sindices[starti[j]] = stash->idx[i];
515: sindices[starti[j]+nlengths[j]] = stash->idy[i];
516: startv[j]++;
517: starti[j]++;
518: }
519: startv[0] = 0;
520: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
522: for (i=0,count=0; i<size; i++) {
523: if (nprocs[i]) {
524: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
525: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);
526: }
527: }
528: #if defined(PETSC_USE_DEBUG)
529: PetscLogInfo((0,"MatStashScatterBegin_Private: No of messages: %d \n",nsends));
530: for (i=0; i<size; i++) {
531: if (nprocs[i]) {
532: PetscLogInfo((0,"MatStashScatterBegin_Private: Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt)));
533: }
534: }
535: #endif
536: PetscFree(owner);
537: PetscFree(startv);
538: /* This memory is reused in scatter end for a different purpose*/
539: for (i=0; i<2*size; i++) nprocs[i] = -1;
540: stash->nprocs = nprocs;
541:
542: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
543: PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
545: for (i=0; i<nreceives; i++) {
546: recv_waits[2*i] = recv_waits1[i];
547: recv_waits[2*i+1] = recv_waits2[i];
548: }
549: stash->recv_waits = recv_waits;
550: PetscFree(recv_waits1);
551: PetscFree(recv_waits2);
553: stash->svalues = svalues; stash->rvalues = rvalues;
554: stash->rindices = rindices; stash->send_waits = send_waits;
555: stash->nsends = nsends; stash->nrecvs = nreceives;
556: return(0);
557: }
559: /*
560: MatStashScatterGetMesg_Private - This function waits on the receives posted
561: in the function MatStashScatterBegin_Private() and returns one message at
562: a time to the calling function. If no messages are left, it indicates this
563: by setting flg = 0, else it sets flg = 1.
565: Input Parameters:
566: stash - the stash
568: Output Parameters:
569: nvals - the number of entries in the current message.
570: rows - an array of row indices (or blocked indices) corresponding to the values
571: cols - an array of columnindices (or blocked indices) corresponding to the values
572: vals - the values
573: flg - 0 indicates no more message left, and the current call has no values associated.
574: 1 indicates that the current call successfully received a message, and the
575: other output parameters nvals,rows,cols,vals are set appropriately.
576: */
579: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
580: {
582: PetscMPIInt i,*flg_v,i1,i2;
583: PetscInt bs2;
584: MPI_Status recv_status;
585: PetscTruth match_found = PETSC_FALSE;
589: *flg = 0; /* When a message is discovered this is reset to 1 */
590: /* Return if no more messages to process */
591: if (stash->nprocessed == stash->nrecvs) { return(0); }
593: flg_v = stash->nprocs;
594: bs2 = stash->bs*stash->bs;
595: /* If a matching pair of receieves are found, process them, and return the data to
596: the calling function. Until then keep receiving messages */
597: while (!match_found) {
598: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
599: /* Now pack the received message into a structure which is useable by others */
600: if (i % 2) {
601: MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
602: flg_v[2*recv_status.MPI_SOURCE] = i/2;
603: *nvals = *nvals/bs2;
604: } else {
605: MPI_Get_count(&recv_status,MPIU_INT,nvals);
606: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
607: *nvals = *nvals/2; /* This message has both row indices and col indices */
608: }
609:
610: /* Check if we have both the messages from this proc */
611: i1 = flg_v[2*recv_status.MPI_SOURCE];
612: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
613: if (i1 != -1 && i2 != -1) {
614: *rows = stash->rindices[i2];
615: *cols = *rows + *nvals;
616: *vals = stash->rvalues[i1];
617: *flg = 1;
618: stash->nprocessed ++;
619: match_found = PETSC_TRUE;
620: }
621: }
622: return(0);
623: }