Actual source code: comb.c
1: /*$Id: comb.c,v 1.40 2001/09/07 20:08:55 bsmith Exp $*/
3: /*
4: Split phase global vector reductions with support for combining the
5: communication portion of several operations. Using MPI-1.1 support only
7: The idea for this and much of the initial code is contributed by
8: Victor Eijkhout.
10: Usage:
11: VecDotBegin(Vec,Vec,PetscScalar *);
12: VecNormBegin(Vec,NormType,PetscReal *);
13: ....
14: VecDotEnd(Vec,Vec,PetscScalar *);
15: VecNormEnd(Vec,NormType,PetscReal *);
17: Limitations:
18: - The order of the xxxEnd() functions MUST be in the same order
19: as the xxxBegin(). There is extensive error checking to try to
20: insure that the user calls the routines in the correct order
21: */
23: #include src/vec/vecimpl.h
25: #define STATE_BEGIN 0
26: #define STATE_END 1
28: #define REDUCE_SUM 0
29: #define REDUCE_MAX 1
30: #define REDUCE_MIN 2
32: typedef struct {
33: MPI_Comm comm;
34: PetscScalar *lvalues; /* this are the reduced values before call to MPI_Allreduce() */
35: PetscScalar *gvalues; /* values after call to MPI_Allreduce() */
36: void **invecs; /* for debugging only, vector/memory used with each op */
37: int *reducetype; /* is particular value to be summed or maxed? */
38: int state; /* are we calling xxxBegin() or xxxEnd()? */
39: int maxops; /* total amount of space we have for requests */
40: int numopsbegin; /* number of requests that have been queued in */
41: int numopsend; /* number of requests that have been gotten by user */
42: } PetscSplitReduction;
43: /*
44: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
45: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
46: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
47: some of each.
48: */
50: /*
51: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
52: */
53: int PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
54: {
58: ierr = PetscNew(PetscSplitReduction,sr);
59: (*sr)->numopsbegin = 0;
60: (*sr)->numopsend = 0;
61: (*sr)->state = STATE_BEGIN;
62: (*sr)->maxops = 32;
63: ierr = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
64: ierr = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
65: ierr = PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
66: (*sr)->comm = comm;
67: ierr = PetscMalloc(32*sizeof(int),&(*sr)->reducetype);
68: return(0);
69: }
71: /*
72: This function is the MPI reduction operation used when there is
73: a combination of sums and max in the reduction. The call below to
74: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
75: MPI operator PetscSplitReduction_Op.
76: */
77: MPI_Op PetscSplitReduction_Op = 0;
79: EXTERN_C_BEGIN
80: void PetscSplitReduction_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
81: {
82: PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
83: int i,count = *cnt;
86: if (*datatype != MPIU_REAL) {
87: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
88: MPI_Abort(MPI_COMM_WORLD,1);
89: }
90: #if defined(PETSC_USE_COMPLEX)
91: count = count/2;
92: #endif
93: count = count/2;
94: for (i=0; i<count; i++) {
95: if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
96: xout[i] += xin[i];
97: } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
98: xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
99: } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
100: xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
101: } else {
102: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
103: MPI_Abort(MPI_COMM_WORLD,1);
104: }
105: }
106: PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
107: return;
108: }
109: EXTERN_C_END
111: /*
112: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
113: */
114: int PetscSplitReductionApply(PetscSplitReduction *sr)
115: {
116: int size,ierr,i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
117: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
118: int sum_flg = 0,max_flg = 0, min_flg = 0;
119: MPI_Comm comm = sr->comm;
122: if (sr->numopsend > 0) {
123: SETERRQ(1,"Cannot call this after VecxxxEnd() has been called");
124: }
126: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
127: ierr = MPI_Comm_size(sr->comm,&size);
128: if (size == 1) {
129: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
130: } else {
131: /* determine if all reductions are sum, max, or min */
132: for (i=0; i<numops; i++) {
133: if (reducetype[i] == REDUCE_MAX) {
134: max_flg = 1;
135: } else if (reducetype[i] == REDUCE_SUM) {
136: sum_flg = 1;
137: } else if (reducetype[i] == REDUCE_MIN) {
138: min_flg = 1;
139: } else {
140: SETERRQ(1,"Error in PetscSplitReduction data structure, probably memory corruption");
141: }
142: }
143: if (sum_flg + max_flg + min_flg > 1) {
144: /*
145: after all the entires in lvalues we store the reducetype flags to indicate
146: to the reduction operations what are sums and what are max
147: */
148: for (i=0; i<numops; i++) {
149: lvalues[numops+i] = reducetype[i];
150: }
151: #if defined(PETSC_USE_COMPLEX)
152: MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
153: #else
154: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
155: #endif
156: } else if (max_flg) {
157: #if defined(PETSC_USE_COMPLEX)
158: /*
159: complex case we max both the real and imaginary parts, the imaginary part
160: is just ignored later
161: */
162: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
163: #else
164: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
165: #endif
166: } else if (min_flg) {
167: #if defined(PETSC_USE_COMPLEX)
168: /*
169: complex case we min both the real and imaginary parts, the imaginary part
170: is just ignored later
171: */
172: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
173: #else
174: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
175: #endif
176: } else {
177: MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
178: }
179: }
180: sr->state = STATE_END;
181: sr->numopsend = 0;
182: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
183: return(0);
184: }
187: /*
188: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
189: */
190: int PetscSplitReductionExtend(PetscSplitReduction *sr)
191: {
192: int maxops = sr->maxops,*reducetype = sr->reducetype,ierr;
193: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
194: void *invecs = sr->invecs;
197: sr->maxops = 2*maxops;
198: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
199: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
200: PetscMalloc(2*maxops*sizeof(int),&sr->reducetype);
201: PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
202: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
203: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
204: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(int));
205: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
206: PetscFree(lvalues);
207: PetscFree(gvalues);
208: PetscFree(reducetype);
209: PetscFree(invecs);
210: return(0);
211: }
213: int PetscSplitReductionDestroy(PetscSplitReduction *sr)
214: {
218: PetscFree(sr->lvalues);
219: PetscFree(sr->gvalues);
220: PetscFree(sr->reducetype);
221: PetscFree(sr->invecs);
222: PetscFree(sr);
223: return(0);
224: }
226: static int Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
228: EXTERN_C_BEGIN
229: /*
230: Private routine to delete internal storage when a communicator is freed.
231: This is called by MPI, not by users.
233: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
234: it was MPI_Comm *comm.
235: */
236: int Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
237: {
241: PetscLogInfo(0,"Petsc_DelReduction:Deleting reduction data in an MPI_Comm %ldn",(long)comm);
242: PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
243: return(0);
244: }
245: EXTERN_C_END
247: /*
248: PetscSplitReductionGet - Gets the split reduction object from a
249: PETSc vector, creates if it does not exit.
251: */
252: int PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
253: {
254: int ierr,flag;
257: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
258: /*
259: The calling sequence of the 2nd argument to this function changed
260: between MPI Standard 1.0 and the revisions 1.1 Here we match the
261: new standard, if you are using an MPI implementation that uses
262: the older version you will get a warning message about the next line;
263: it is only a warning message and should do no harm.
264: */
265: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
266: /*
267: Also create the special MPI reduction operation that may be needed
268: */
269: MPI_Op_create(PetscSplitReduction_Local,1,&PetscSplitReduction_Op);
270: }
271: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
272: if (!flag) { /* doesn't exist yet so create it and put it in */
273: PetscSplitReductionCreate(comm,sr);
274: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
275: PetscLogInfo(0,"PetscSplitReductionGet:Putting reduction data in an MPI_Comm %ldn",(long)comm);
276: }
278: return(0);
279: }
281: /* ----------------------------------------------------------------------------------------------------*/
283: /*@
284: VecDotBegin - Starts a split phase dot product computation.
286: Input Parameters:
287: + x - the first vector
288: . y - the second vector
289: - result - where the result will go (can be PETSC_NULL)
291: Level: advanced
293: Notes:
294: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
296: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
297: VecTDotBegin(), VecTDotEnd()
298: @*/
299: int VecDotBegin(Vec x,Vec y,PetscScalar *result)
300: {
301: int ierr;
302: PetscSplitReduction *sr;
303: MPI_Comm comm;
306: PetscObjectGetComm((PetscObject)x,&comm);
307: PetscSplitReductionGet(comm,&sr);
308: if (sr->state == STATE_END) {
309: SETERRQ(1,"Called before all VecxxxEnd() called");
310: }
311: if (sr->numopsbegin >= sr->maxops) {
312: PetscSplitReductionExtend(sr);
313: }
314: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
315: sr->invecs[sr->numopsbegin] = (void*)x;
316: if (!x->ops->dot_local) SETERRQ(1,"Vector does not suppport local dots");
317: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
318: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
319: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
320: return(0);
321: }
323: /*@
324: VecDotEnd - Ends a split phase dot product computation.
326: Input Parameters:
327: + x - the first vector (can be PETSC_NULL)
328: . y - the second vector (can be PETSC_NULL)
329: - result - where the result will go
331: Level: advanced
333: Notes:
334: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
336: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
337: VecTDotBegin(),VecTDotEnd()
339: @*/
340: int VecDotEnd(Vec x,Vec y,PetscScalar *result)
341: {
342: int ierr;
343: PetscSplitReduction *sr;
344: MPI_Comm comm;
347: PetscObjectGetComm((PetscObject)x,&comm);
348: PetscSplitReductionGet(comm,&sr);
349:
350: if (sr->state != STATE_END) {
351: /* this is the first call to VecxxxEnd() so do the communication */
352: PetscSplitReductionApply(sr);
353: }
355: if (sr->numopsend >= sr->numopsbegin) {
356: SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
357: }
358: if (x && (void*) x != sr->invecs[sr->numopsend]) {
359: SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
360: }
361: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
362: SETERRQ(1,"Called VecDotEnd() on a reduction started with VecNormBegin()");
363: }
364: *result = sr->gvalues[sr->numopsend++];
366: /*
367: We are finished getting all the results so reset to no outstanding requests
368: */
369: if (sr->numopsend == sr->numopsbegin) {
370: sr->state = STATE_BEGIN;
371: sr->numopsend = 0;
372: sr->numopsbegin = 0;
373: }
374: return(0);
375: }
377: /*@
378: VecTDotBegin - Starts a split phase transpose dot product computation.
380: Input Parameters:
381: + x - the first vector
382: . y - the second vector
383: - result - where the result will go (can be PETSC_NULL)
385: Level: advanced
387: Notes:
388: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
390: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
391: VecDotBegin(), VecDotEnd()
393: @*/
394: int VecTDotBegin(Vec x,Vec y,PetscScalar *result)
395: {
396: int ierr;
397: PetscSplitReduction *sr;
398: MPI_Comm comm;
401: PetscObjectGetComm((PetscObject)x,&comm);
402: PetscSplitReductionGet(comm,&sr);
403: if (sr->state == STATE_END) {
404: SETERRQ(1,"Called before all VecxxxEnd() called");
405: }
406: if (sr->numopsbegin >= sr->maxops) {
407: PetscSplitReductionExtend(sr);
408: }
409: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
410: sr->invecs[sr->numopsbegin] = (void*)x;
411: if (!x->ops->tdot_local) SETERRQ(1,"Vector does not suppport local dots");
412: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
413: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
414: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
415: return(0);
416: }
418: /*@
419: VecTDotEnd - Ends a split phase transpose dot product computation.
421: Input Parameters:
422: + x - the first vector (can be PETSC_NULL)
423: . y - the second vector (can be PETSC_NULL)
424: - result - where the result will go
426: Level: advanced
428: Notes:
429: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
431: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
432: VecDotBegin(), VecDotEnd()
433: @*/
434: int VecTDotEnd(Vec x,Vec y,PetscScalar *result)
435: {
436: int ierr;
439: /*
440: TDotEnd() is the same as DotEnd() so reuse the code
441: */
442: VecDotEnd(x,y,result);
443: return(0);
444: }
446: /* -------------------------------------------------------------------------*/
448: /*@
449: VecNormBegin - Starts a split phase norm computation.
451: Input Parameters:
452: + x - the first vector
453: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
454: - result - where the result will go (can be PETSC_NULL)
456: Level: advanced
458: Notes:
459: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
461: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
463: @*/
464: int VecNormBegin(Vec x,NormType ntype,PetscReal *result)
465: {
466: int ierr;
467: PetscSplitReduction *sr;
468: PetscReal lresult[2];
469: MPI_Comm comm;
472: PetscObjectGetComm((PetscObject)x,&comm);
473: PetscSplitReductionGet(comm,&sr);
474: if (sr->state == STATE_END) {
475: SETERRQ(1,"Called before all VecxxxEnd() called");
476: }
477: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
478: PetscSplitReductionExtend(sr);
479: }
480:
481: sr->invecs[sr->numopsbegin] = (void*)x;
482: if (!x->ops->norm_local) SETERRQ(1,"Vector does not support local norms");
483: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
484: (*x->ops->norm_local)(x,ntype,lresult);
485: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
486: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
487: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
488: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
489: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
490: sr->lvalues[sr->numopsbegin++] = lresult[0];
491: if (ntype == NORM_1_AND_2) {
492: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
493: sr->lvalues[sr->numopsbegin++] = lresult[1];
494: }
495: return(0);
496: }
498: /*@
499: VecNormEnd - Ends a split phase norm computation.
501: Input Parameters:
502: + x - the first vector (can be PETSC_NULL)
503: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
504: - result - where the result will go
506: Level: advanced
508: Notes:
509: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
511: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
513: @*/
514: int VecNormEnd(Vec x,NormType ntype,PetscReal *result)
515: {
516: int ierr;
517: PetscSplitReduction *sr;
518: MPI_Comm comm;
521: PetscObjectGetComm((PetscObject)x,&comm);
522: PetscSplitReductionGet(comm,&sr);
523:
524: if (sr->state != STATE_END) {
525: /* this is the first call to VecxxxEnd() so do the communication */
526: PetscSplitReductionApply(sr);
527: }
529: if (sr->numopsend >= sr->numopsbegin) {
530: SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
531: }
532: if (x && (void*)x != sr->invecs[sr->numopsend]) {
533: SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
534: }
535: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
536: SETERRQ(1,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
537: }
538: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
539: if (ntype == NORM_2) {
540: result[0] = sqrt(result[0]);
541: } else if (ntype == NORM_1_AND_2) {
542: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
543: result[1] = sqrt(result[1]);
544: }
546: if (sr->numopsend == sr->numopsbegin) {
547: sr->state = STATE_BEGIN;
548: sr->numopsend = 0;
549: sr->numopsbegin = 0;
550: }
551: return(0);
552: }
554: /*
555: Possibly add
557: PetscReductionSumBegin/End()
558: PetscReductionMaxBegin/End()
559: PetscReductionMinBegin/End()
560: or have more like MPI with a single function with flag for Op? Like first better
561: */