Actual source code: comb.c
1: #define PETSCVEC_DLL
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include vecimpl.h
24: #define STATE_BEGIN 0
25: #define STATE_END 1
27: #define REDUCE_SUM 0
28: #define REDUCE_MAX 1
29: #define REDUCE_MIN 2
31: typedef struct {
32: MPI_Comm comm;
33: PetscScalar *lvalues; /* this are the reduced values before call to MPI_Allreduce() */
34: PetscScalar *gvalues; /* values after call to MPI_Allreduce() */
35: void **invecs; /* for debugging only, vector/memory used with each op */
36: PetscInt *reducetype; /* is particular value to be summed or maxed? */
37: PetscInt state; /* are we calling xxxBegin() or xxxEnd()? */
38: PetscInt maxops; /* total amount of space we have for requests */
39: PetscInt numopsbegin; /* number of requests that have been queued in */
40: PetscInt numopsend; /* number of requests that have been gotten by user */
41: } PetscSplitReduction;
42: /*
43: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
44: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
45: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
46: some of each.
47: */
51: /*
52: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
53: */
54: PetscErrorCode PETSCVEC_DLLEXPORT PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
55: {
59: PetscNew(PetscSplitReduction,sr);
60: (*sr)->numopsbegin = 0;
61: (*sr)->numopsend = 0;
62: (*sr)->state = STATE_BEGIN;
63: (*sr)->maxops = 32;
64: PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
65: PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
66: PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
67: (*sr)->comm = comm;
68: PetscMalloc(32*sizeof(PetscInt),&(*sr)->reducetype);
69: return(0);
70: }
72: /*
73: This function is the MPI reduction operation used when there is
74: a combination of sums and max in the reduction. The call below to
75: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
76: MPI operator PetscSplitReduction_Op.
77: */
78: MPI_Op PetscSplitReduction_Op = 0;
83: void PETSCVEC_DLLEXPORT PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
84: {
85: PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
86: PetscInt i,count = (PetscInt)*cnt;
89: if (*datatype != MPIU_REAL) {
90: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
91: MPI_Abort(MPI_COMM_WORLD,1);
92: }
93: #if defined(PETSC_USE_COMPLEX)
94: count = count/2;
95: #endif
96: count = count/2;
97: for (i=0; i<count; i++) {
98: if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
99: xout[i] += xin[i];
100: } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
101: xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
102: } else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
103: xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
104: } else {
105: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
106: MPI_Abort(MPI_COMM_WORLD,1);
107: }
108: }
109: PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
110: return;
111: }
116: /*
117: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
118: */
119: PetscErrorCode PETSCVEC_DLLEXPORT PetscSplitReductionApply(PetscSplitReduction *sr)
120: {
122: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
123: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
124: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
125: MPI_Comm comm = sr->comm;
126: PetscMPIInt size;
129: if (sr->numopsend > 0) {
130: SETERRQ(PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
131: }
133: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
134: MPI_Comm_size(sr->comm,&size);
135: if (size == 1) {
136: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
137: } else {
138: /* determine if all reductions are sum, max, or min */
139: for (i=0; i<numops; i++) {
140: if (reducetype[i] == REDUCE_MAX) {
141: max_flg = 1;
142: } else if (reducetype[i] == REDUCE_SUM) {
143: sum_flg = 1;
144: } else if (reducetype[i] == REDUCE_MIN) {
145: min_flg = 1;
146: } else {
147: SETERRQ(PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
148: }
149: }
150: if (sum_flg + max_flg + min_flg > 1) {
151: /*
152: after all the entires in lvalues we store the reducetype flags to indicate
153: to the reduction operations what are sums and what are max
154: */
155: for (i=0; i<numops; i++) {
156: lvalues[numops+i] = reducetype[i];
157: }
158: #if defined(PETSC_USE_COMPLEX)
159: MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
160: #else
161: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
162: #endif
163: } else if (max_flg) {
164: #if defined(PETSC_USE_COMPLEX)
165: /*
166: complex case we max both the real and imaginary parts, the imaginary part
167: is just ignored later
168: */
169: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
170: #else
171: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
172: #endif
173: } else if (min_flg) {
174: #if defined(PETSC_USE_COMPLEX)
175: /*
176: complex case we min both the real and imaginary parts, the imaginary part
177: is just ignored later
178: */
179: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
180: #else
181: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
182: #endif
183: } else {
184: MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
185: }
186: }
187: sr->state = STATE_END;
188: sr->numopsend = 0;
189: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
190: return(0);
191: }
196: /*
197: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
198: */
199: PetscErrorCode PETSCVEC_DLLEXPORT PetscSplitReductionExtend(PetscSplitReduction *sr)
200: {
202: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
203: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
204: void *invecs = sr->invecs;
207: sr->maxops = 2*maxops;
208: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
209: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
210: PetscMalloc(2*maxops*sizeof(PetscInt),&sr->reducetype);
211: PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
212: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
213: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
214: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
215: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
216: PetscFree(lvalues);
217: PetscFree(gvalues);
218: PetscFree(reducetype);
219: PetscFree(invecs);
220: return(0);
221: }
225: PetscErrorCode PETSCVEC_DLLEXPORT PetscSplitReductionDestroy(PetscSplitReduction *sr)
226: {
230: PetscFree(sr->lvalues);
231: PetscFree(sr->gvalues);
232: PetscFree(sr->reducetype);
233: PetscFree(sr->invecs);
234: PetscFree(sr);
235: return(0);
236: }
238: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
243: /*
244: Private routine to delete internal storage when a communicator is freed.
245: This is called by MPI, not by users.
247: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
248: it was MPI_Comm *comm.
249: */
250: int PETSCVEC_DLLEXPORT Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
251: {
255: PetscLogInfo((0,"Petsc_DelReduction:Deleting reduction data in an MPI_Comm %ld\n",(long)comm));
256: PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
257: return(0);
258: }
261: /*
262: PetscSplitReductionGet - Gets the split reduction object from a
263: PETSc vector, creates if it does not exit.
265: */
268: PetscErrorCode PETSCVEC_DLLEXPORT PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
269: {
271: PetscMPIInt flag;
274: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
275: /*
276: The calling sequence of the 2nd argument to this function changed
277: between MPI Standard 1.0 and the revisions 1.1 Here we match the
278: new standard, if you are using an MPI implementation that uses
279: the older version you will get a warning message about the next line;
280: it is only a warning message and should do no harm.
281: */
282: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
283: }
284: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
285: if (!flag) { /* doesn't exist yet so create it and put it in */
286: PetscSplitReductionCreate(comm,sr);
287: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
288: PetscLogInfo((0,"PetscSplitReductionGet:Putting reduction data in an MPI_Comm %ld\n",(long)comm));
289: }
291: return(0);
292: }
294: /* ----------------------------------------------------------------------------------------------------*/
298: /*@
299: VecDotBegin - Starts a split phase dot product computation.
301: Input Parameters:
302: + x - the first vector
303: . y - the second vector
304: - result - where the result will go (can be PETSC_NULL)
306: Level: advanced
308: Notes:
309: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
311: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
312: VecTDotBegin(), VecTDotEnd()
313: @*/
314: PetscErrorCode PETSCVEC_DLLEXPORT VecDotBegin(Vec x,Vec y,PetscScalar *result)
315: {
316: PetscErrorCode ierr;
317: PetscSplitReduction *sr;
318: MPI_Comm comm;
321: PetscObjectGetComm((PetscObject)x,&comm);
322: PetscSplitReductionGet(comm,&sr);
323: if (sr->state == STATE_END) {
324: SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
325: }
326: if (sr->numopsbegin >= sr->maxops) {
327: PetscSplitReductionExtend(sr);
328: }
329: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
330: sr->invecs[sr->numopsbegin] = (void*)x;
331: if (!x->ops->dot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
332: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
333: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
334: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
335: return(0);
336: }
340: /*@
341: VecDotEnd - Ends a split phase dot product computation.
343: Input Parameters:
344: + x - the first vector (can be PETSC_NULL)
345: . y - the second vector (can be PETSC_NULL)
346: - result - where the result will go
348: Level: advanced
350: Notes:
351: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
353: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
354: VecTDotBegin(),VecTDotEnd()
356: @*/
357: PetscErrorCode PETSCVEC_DLLEXPORT VecDotEnd(Vec x,Vec y,PetscScalar *result)
358: {
359: PetscErrorCode ierr;
360: PetscSplitReduction *sr;
361: MPI_Comm comm;
364: PetscObjectGetComm((PetscObject)x,&comm);
365: PetscSplitReductionGet(comm,&sr);
366:
367: if (sr->state != STATE_END) {
368: /* this is the first call to VecxxxEnd() so do the communication */
369: PetscSplitReductionApply(sr);
370: }
372: if (sr->numopsend >= sr->numopsbegin) {
373: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
374: }
375: if (x && (void*) x != sr->invecs[sr->numopsend]) {
376: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
377: }
378: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
379: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
380: }
381: *result = sr->gvalues[sr->numopsend++];
383: /*
384: We are finished getting all the results so reset to no outstanding requests
385: */
386: if (sr->numopsend == sr->numopsbegin) {
387: sr->state = STATE_BEGIN;
388: sr->numopsend = 0;
389: sr->numopsbegin = 0;
390: }
391: return(0);
392: }
396: /*@
397: VecTDotBegin - Starts a split phase transpose dot product computation.
399: Input Parameters:
400: + x - the first vector
401: . y - the second vector
402: - result - where the result will go (can be PETSC_NULL)
404: Level: advanced
406: Notes:
407: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
409: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
410: VecDotBegin(), VecDotEnd()
412: @*/
413: PetscErrorCode PETSCVEC_DLLEXPORT VecTDotBegin(Vec x,Vec y,PetscScalar *result)
414: {
415: PetscErrorCode ierr;
416: PetscSplitReduction *sr;
417: MPI_Comm comm;
420: PetscObjectGetComm((PetscObject)x,&comm);
421: PetscSplitReductionGet(comm,&sr);
422: if (sr->state == STATE_END) {
423: SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
424: }
425: if (sr->numopsbegin >= sr->maxops) {
426: PetscSplitReductionExtend(sr);
427: }
428: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
429: sr->invecs[sr->numopsbegin] = (void*)x;
430: if (!x->ops->tdot_local) SETERRQ(PETSC_ERR_SUP,"Vector does not suppport local dots");
431: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
432: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
433: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
434: return(0);
435: }
439: /*@
440: VecTDotEnd - Ends a split phase transpose dot product computation.
442: Input Parameters:
443: + x - the first vector (can be PETSC_NULL)
444: . y - the second vector (can be PETSC_NULL)
445: - result - where the result will go
447: Level: advanced
449: Notes:
450: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
452: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
453: VecDotBegin(), VecDotEnd()
454: @*/
455: PetscErrorCode PETSCVEC_DLLEXPORT VecTDotEnd(Vec x,Vec y,PetscScalar *result)
456: {
460: /*
461: TDotEnd() is the same as DotEnd() so reuse the code
462: */
463: VecDotEnd(x,y,result);
464: return(0);
465: }
467: /* -------------------------------------------------------------------------*/
471: /*@
472: VecNormBegin - Starts a split phase norm computation.
474: Input Parameters:
475: + x - the first vector
476: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
477: - result - where the result will go (can be PETSC_NULL)
479: Level: advanced
481: Notes:
482: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
484: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
486: @*/
487: PetscErrorCode PETSCVEC_DLLEXPORT VecNormBegin(Vec x,NormType ntype,PetscReal *result)
488: {
489: PetscErrorCode ierr;
490: PetscSplitReduction *sr;
491: PetscReal lresult[2];
492: MPI_Comm comm;
495: PetscObjectGetComm((PetscObject)x,&comm);
496: PetscSplitReductionGet(comm,&sr);
497: if (sr->state == STATE_END) {
498: SETERRQ(PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
499: }
500: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
501: PetscSplitReductionExtend(sr);
502: }
503:
504: sr->invecs[sr->numopsbegin] = (void*)x;
505: if (!x->ops->norm_local) SETERRQ(PETSC_ERR_SUP,"Vector does not support local norms");
506: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
507: (*x->ops->norm_local)(x,ntype,lresult);
508: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
509: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
510: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
511: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
512: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
513: sr->lvalues[sr->numopsbegin++] = lresult[0];
514: if (ntype == NORM_1_AND_2) {
515: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
516: sr->lvalues[sr->numopsbegin++] = lresult[1];
517: }
518: return(0);
519: }
523: /*@
524: VecNormEnd - Ends a split phase norm computation.
526: Input Parameters:
527: + x - the first vector (can be PETSC_NULL)
528: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
529: - result - where the result will go
531: Level: advanced
533: Notes:
534: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
536: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
538: @*/
539: PetscErrorCode PETSCVEC_DLLEXPORT VecNormEnd(Vec x,NormType ntype,PetscReal *result)
540: {
541: PetscErrorCode ierr;
542: PetscInt type_id;
543: PetscSplitReduction *sr;
544: MPI_Comm comm;
547: VecNormComposedDataID(ntype,&type_id);
549: PetscObjectGetComm((PetscObject)x,&comm);
550: PetscSplitReductionGet(comm,&sr);
551:
552: if (sr->state != STATE_END) {
553: /* this is the first call to VecxxxEnd() so do the communication */
554: PetscSplitReductionApply(sr);
555: }
557: if (sr->numopsend >= sr->numopsbegin) {
558: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
559: }
560: if (x && (void*)x != sr->invecs[sr->numopsend]) {
561: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
562: }
563: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
564: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
565: }
566: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
568: if (ntype == NORM_2) {
569: result[0] = sqrt(result[0]);
570: } else if (ntype == NORM_1_AND_2) {
571: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
572: result[1] = sqrt(result[1]);
573: }
574: if (ntype!=NORM_1_AND_2) {
575: PetscObjectComposedDataSetReal((PetscObject)x,type_id,result[0]);
576: }
578: if (sr->numopsend == sr->numopsbegin) {
579: sr->state = STATE_BEGIN;
580: sr->numopsend = 0;
581: sr->numopsbegin = 0;
582: }
583: return(0);
584: }
586: /*
587: Possibly add
589: PetscReductionSumBegin/End()
590: PetscReductionMaxBegin/End()
591: PetscReductionMinBegin/End()
592: or have more like MPI with a single function with flag for Op? Like first better
593: */