Actual source code: comb.c

  1: /*$Id: comb.c,v 1.40 2001/09/07 20:08:55 bsmith Exp $*/

  3: /*
  4:       Split phase global vector reductions with support for combining the
  5:    communication portion of several operations. Using MPI-1.1 support only

  7:       The idea for this and much of the initial code is contributed by 
  8:    Victor Eijkhout.

 10:        Usage:
 11:              VecDotBegin(Vec,Vec,PetscScalar *);
 12:              VecNormBegin(Vec,NormType,PetscReal *);
 13:              ....
 14:              VecDotEnd(Vec,Vec,PetscScalar *);
 15:              VecNormEnd(Vec,NormType,PetscReal *);

 17:        Limitations: 
 18:          - The order of the xxxEnd() functions MUST be in the same order
 19:            as the xxxBegin(). There is extensive error checking to try to 
 20:            insure that the user calls the routines in the correct order
 21: */

 23:  #include src/vec/vecimpl.h

 25: #define STATE_BEGIN 0
 26: #define STATE_END   1

 28: #define REDUCE_SUM  0
 29: #define REDUCE_MAX  1
 30: #define REDUCE_MIN  2

 32: typedef struct {
 33:   MPI_Comm     comm;
 34:   PetscScalar  *lvalues;    /* this are the reduced values before call to MPI_Allreduce() */
 35:   PetscScalar  *gvalues;    /* values after call to MPI_Allreduce() */
 36:   void         **invecs;    /* for debugging only, vector/memory used with each op */
 37:   int          *reducetype; /* is particular value to be summed or maxed? */
 38:   int          state;       /* are we calling xxxBegin() or xxxEnd()? */
 39:   int          maxops;      /* total amount of space we have for requests */
 40:   int          numopsbegin; /* number of requests that have been queued in */
 41:   int          numopsend;   /* number of requests that have been gotten by user */
 42: } PetscSplitReduction;
 43: /*
 44:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 45: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by 
 46: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 47: some of each.
 48: */

 50: /*
 51:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 52: */
 53: int PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 54: {

 58:   ierr               = PetscNew(PetscSplitReduction,sr);
 59:   (*sr)->numopsbegin = 0;
 60:   (*sr)->numopsend   = 0;
 61:   (*sr)->state       = STATE_BEGIN;
 62:   (*sr)->maxops      = 32;
 63:   ierr               = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
 64:   ierr               = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
 65:   ierr               = PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
 66:   (*sr)->comm        = comm;
 67:   ierr               = PetscMalloc(32*sizeof(int),&(*sr)->reducetype);
 68:   return(0);
 69: }

 71: /*
 72:        This function is the MPI reduction operation used when there is 
 73:    a combination of sums and max in the reduction. The call below to 
 74:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the 
 75:    MPI operator PetscSplitReduction_Op.
 76: */
 77: MPI_Op PetscSplitReduction_Op = 0;

 79: EXTERN_C_BEGIN
 80: void PetscSplitReduction_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
 81: {
 82:   PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
 83:   int         i,count = *cnt;

 86:   if (*datatype != MPIU_REAL) {
 87:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
 88:     MPI_Abort(MPI_COMM_WORLD,1);
 89:   }
 90: #if defined(PETSC_USE_COMPLEX)
 91:   count = count/2;
 92: #endif
 93:   count = count/2;
 94:   for (i=0; i<count; i++) {
 95:     if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
 96:       xout[i] += xin[i];
 97:     } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
 98:       xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
 99:     } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
100:       xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
101:     } else {
102:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
103:       MPI_Abort(MPI_COMM_WORLD,1);
104:     }
105:   }
106:   PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
107:   return;
108: }
109: EXTERN_C_END

111: /*
112:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
113: */
114: int PetscSplitReductionApply(PetscSplitReduction *sr)
115: {
116:   int         size,ierr,i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
117:   PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
118:   int         sum_flg = 0,max_flg = 0, min_flg = 0;
119:   MPI_Comm    comm = sr->comm;

122:   if (sr->numopsend > 0) {
123:     SETERRQ(1,"Cannot call this after VecxxxEnd() has been called");
124:   }

126:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
127:   ierr  = MPI_Comm_size(sr->comm,&size);
128:   if (size == 1) {
129:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
130:   } else {
131:     /* determine if all reductions are sum, max, or min */
132:     for (i=0; i<numops; i++) {
133:       if (reducetype[i] == REDUCE_MAX) {
134:         max_flg = 1;
135:       } else if (reducetype[i] == REDUCE_SUM) {
136:         sum_flg = 1;
137:       } else if (reducetype[i] == REDUCE_MIN) {
138:         min_flg = 1;
139:       } else {
140:         SETERRQ(1,"Error in PetscSplitReduction data structure, probably memory corruption");
141:       }
142:     }
143:     if (sum_flg + max_flg + min_flg > 1) {
144:       /* 
145:          after all the entires in lvalues we store the reducetype flags to indicate
146:          to the reduction operations what are sums and what are max
147:       */
148:       for (i=0; i<numops; i++) {
149:         lvalues[numops+i] = reducetype[i];
150:       }
151: #if defined(PETSC_USE_COMPLEX)
152:       MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
153: #else
154:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
155: #endif
156:     } else if (max_flg) {
157: #if defined(PETSC_USE_COMPLEX)
158:       /* 
159:         complex case we max both the real and imaginary parts, the imaginary part
160:         is just ignored later
161:       */
162:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
163: #else
164:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
165: #endif
166:     } else if (min_flg) {
167: #if defined(PETSC_USE_COMPLEX)
168:       /* 
169:         complex case we min both the real and imaginary parts, the imaginary part
170:         is just ignored later
171:       */
172:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
173: #else
174:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
175: #endif
176:     } else {
177:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
178:     }
179:   }
180:   sr->state     = STATE_END;
181:   sr->numopsend = 0;
182:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
183:   return(0);
184: }


187: /*
188:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
189: */
190: int PetscSplitReductionExtend(PetscSplitReduction *sr)
191: {
192:   int         maxops = sr->maxops,*reducetype = sr->reducetype,ierr;
193:   PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
194:   void        *invecs = sr->invecs;

197:   sr->maxops     = 2*maxops;
198:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
199:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
200:   PetscMalloc(2*maxops*sizeof(int),&sr->reducetype);
201:   PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
202:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
203:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
204:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(int));
205:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
206:   PetscFree(lvalues);
207:   PetscFree(gvalues);
208:   PetscFree(reducetype);
209:   PetscFree(invecs);
210:   return(0);
211: }

213: int PetscSplitReductionDestroy(PetscSplitReduction *sr)
214: {

218:   PetscFree(sr->lvalues);
219:   PetscFree(sr->gvalues);
220:   PetscFree(sr->reducetype);
221:   PetscFree(sr->invecs);
222:   PetscFree(sr);
223:   return(0);
224: }

226: static int Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

228: EXTERN_C_BEGIN
229: /*
230:    Private routine to delete internal storage when a communicator is freed.
231:   This is called by MPI, not by users.

233:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
234:   it was MPI_Comm *comm.  
235: */
236: int Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
237: {

241:   PetscLogInfo(0,"Petsc_DelReduction:Deleting reduction data in an MPI_Comm %ldn",(long)comm);
242:   PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
243:   return(0);
244: }
245: EXTERN_C_END

247: /*
248:      PetscSplitReductionGet - Gets the split reduction object from a 
249:         PETSc vector, creates if it does not exit.

251: */
252: int PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
253: {
254:   int      ierr,flag;

257:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
258:     /* 
259:        The calling sequence of the 2nd argument to this function changed
260:        between MPI Standard 1.0 and the revisions 1.1 Here we match the 
261:        new standard, if you are using an MPI implementation that uses 
262:        the older version you will get a warning message about the next line;
263:        it is only a warning message and should do no harm.
264:     */
265:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
266:     /*
267:          Also create the special MPI reduction operation that may be needed 
268:     */
269:     MPI_Op_create(PetscSplitReduction_Local,1,&PetscSplitReduction_Op);
270:   }
271:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
272:   if (!flag) {  /* doesn't exist yet so create it and put it in */
273:     PetscSplitReductionCreate(comm,sr);
274:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
275:     PetscLogInfo(0,"PetscSplitReductionGet:Putting reduction data in an MPI_Comm %ldn",(long)comm);
276:   }

278:   return(0);
279: }

281: /* ----------------------------------------------------------------------------------------------------*/

283: /*@
284:    VecDotBegin - Starts a split phase dot product computation.

286:    Input Parameters:
287: +   x - the first vector
288: .   y - the second vector
289: -   result - where the result will go (can be PETSC_NULL)

291:    Level: advanced

293:    Notes:
294:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

296: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
297:          VecTDotBegin(), VecTDotEnd()
298: @*/
299: int VecDotBegin(Vec x,Vec y,PetscScalar *result)
300: {
301:   int                 ierr;
302:   PetscSplitReduction *sr;
303:   MPI_Comm            comm;

306:   PetscObjectGetComm((PetscObject)x,&comm);
307:   PetscSplitReductionGet(comm,&sr);
308:   if (sr->state == STATE_END) {
309:     SETERRQ(1,"Called before all VecxxxEnd() called");
310:   }
311:   if (sr->numopsbegin >= sr->maxops) {
312:     PetscSplitReductionExtend(sr);
313:   }
314:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
315:   sr->invecs[sr->numopsbegin]     = (void*)x;
316:   if (!x->ops->dot_local) SETERRQ(1,"Vector does not suppport local dots");
317:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
318:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
319:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
320:   return(0);
321: }

323: /*@
324:    VecDotEnd - Ends a split phase dot product computation.

326:    Input Parameters:
327: +  x - the first vector (can be PETSC_NULL)
328: .  y - the second vector (can be PETSC_NULL)
329: -  result - where the result will go

331:    Level: advanced

333:    Notes:
334:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

336: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
337:          VecTDotBegin(),VecTDotEnd()

339: @*/
340: int VecDotEnd(Vec x,Vec y,PetscScalar *result)
341: {
342:   int                 ierr;
343:   PetscSplitReduction *sr;
344:   MPI_Comm            comm;

347:   PetscObjectGetComm((PetscObject)x,&comm);
348:   PetscSplitReductionGet(comm,&sr);
349: 
350:   if (sr->state != STATE_END) {
351:     /* this is the first call to VecxxxEnd() so do the communication */
352:     PetscSplitReductionApply(sr);
353:   }

355:   if (sr->numopsend >= sr->numopsbegin) {
356:     SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
357:   }
358:   if (x && (void*) x != sr->invecs[sr->numopsend]) {
359:     SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
360:   }
361:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
362:     SETERRQ(1,"Called VecDotEnd() on a reduction started with VecNormBegin()");
363:   }
364:   *result = sr->gvalues[sr->numopsend++];

366:   /*
367:      We are finished getting all the results so reset to no outstanding requests
368:   */
369:   if (sr->numopsend == sr->numopsbegin) {
370:     sr->state        = STATE_BEGIN;
371:     sr->numopsend    = 0;
372:     sr->numopsbegin  = 0;
373:   }
374:   return(0);
375: }

377: /*@
378:    VecTDotBegin - Starts a split phase transpose dot product computation.

380:    Input Parameters:
381: +  x - the first vector
382: .  y - the second vector
383: -  result - where the result will go (can be PETSC_NULL)

385:    Level: advanced

387:    Notes:
388:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

390: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
391:          VecDotBegin(), VecDotEnd()

393: @*/
394: int VecTDotBegin(Vec x,Vec y,PetscScalar *result)
395: {
396:   int                 ierr;
397:   PetscSplitReduction *sr;
398:   MPI_Comm            comm;

401:   PetscObjectGetComm((PetscObject)x,&comm);
402:   PetscSplitReductionGet(comm,&sr);
403:   if (sr->state == STATE_END) {
404:     SETERRQ(1,"Called before all VecxxxEnd() called");
405:   }
406:   if (sr->numopsbegin >= sr->maxops) {
407:     PetscSplitReductionExtend(sr);
408:   }
409:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
410:   sr->invecs[sr->numopsbegin]     = (void*)x;
411:   if (!x->ops->tdot_local) SETERRQ(1,"Vector does not suppport local dots");
412:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
413:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
414:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
415:   return(0);
416: }

418: /*@
419:    VecTDotEnd - Ends a split phase transpose dot product computation.

421:    Input Parameters:
422: +  x - the first vector (can be PETSC_NULL)
423: .  y - the second vector (can be PETSC_NULL)
424: -  result - where the result will go

426:    Level: advanced

428:    Notes:
429:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

431: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
432:          VecDotBegin(), VecDotEnd()
433: @*/
434: int VecTDotEnd(Vec x,Vec y,PetscScalar *result)
435: {
436:   int               ierr;

439:   /*
440:       TDotEnd() is the same as DotEnd() so reuse the code
441:   */
442:   VecDotEnd(x,y,result);
443:   return(0);
444: }

446: /* -------------------------------------------------------------------------*/

448: /*@
449:    VecNormBegin - Starts a split phase norm computation.

451:    Input Parameters:
452: +  x - the first vector
453: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
454: -  result - where the result will go (can be PETSC_NULL)

456:    Level: advanced

458:    Notes:
459:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

461: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

463: @*/
464: int VecNormBegin(Vec x,NormType ntype,PetscReal *result)
465: {
466:   int                 ierr;
467:   PetscSplitReduction *sr;
468:   PetscReal           lresult[2];
469:   MPI_Comm            comm;

472:   PetscObjectGetComm((PetscObject)x,&comm);
473:   PetscSplitReductionGet(comm,&sr);
474:   if (sr->state == STATE_END) {
475:     SETERRQ(1,"Called before all VecxxxEnd() called");
476:   }
477:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
478:     PetscSplitReductionExtend(sr);
479:   }
480: 
481:   sr->invecs[sr->numopsbegin]     = (void*)x;
482:   if (!x->ops->norm_local) SETERRQ(1,"Vector does not support local norms");
483:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
484:   (*x->ops->norm_local)(x,ntype,lresult);
485:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
486:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
487:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
488:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
489:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
490:   sr->lvalues[sr->numopsbegin++] = lresult[0];
491:   if (ntype == NORM_1_AND_2) {
492:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
493:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
494:   }
495:   return(0);
496: }

498: /*@
499:    VecNormEnd - Ends a split phase norm computation.

501:    Input Parameters:
502: +  x - the first vector (can be PETSC_NULL)
503: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
504: -  result - where the result will go

506:    Level: advanced

508:    Notes:
509:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

511: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

513: @*/
514: int VecNormEnd(Vec x,NormType ntype,PetscReal *result)
515: {
516:   int                 ierr;
517:   PetscSplitReduction *sr;
518:   MPI_Comm            comm;

521:   PetscObjectGetComm((PetscObject)x,&comm);
522:   PetscSplitReductionGet(comm,&sr);
523: 
524:   if (sr->state != STATE_END) {
525:     /* this is the first call to VecxxxEnd() so do the communication */
526:     PetscSplitReductionApply(sr);
527:   }

529:   if (sr->numopsend >= sr->numopsbegin) {
530:     SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
531:   }
532:   if (x && (void*)x != sr->invecs[sr->numopsend]) {
533:     SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
534:   }
535:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
536:     SETERRQ(1,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
537:   }
538:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
539:   if (ntype == NORM_2) {
540:     result[0] = sqrt(result[0]);
541:   } else if (ntype == NORM_1_AND_2) {
542:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
543:     result[1] = sqrt(result[1]);
544:   }

546:   if (sr->numopsend == sr->numopsbegin) {
547:     sr->state        = STATE_BEGIN;
548:     sr->numopsend    = 0;
549:     sr->numopsbegin  = 0;
550:   }
551:   return(0);
552: }

554: /*
555:    Possibly add

557:      PetscReductionSumBegin/End()
558:      PetscReductionMaxBegin/End()
559:      PetscReductionMinBegin/End()
560:    or have more like MPI with a single function with flag for Op? Like first better
561: */