Actual source code: comm.c

  1: #define PETSCKSP_DLL

  3: /***********************************comm.c*************************************

  5: Author: Henry M. Tufo III

  7: e-mail: hmt@cs.brown.edu

  9: snail-mail:
 10: Division of Applied Mathematics
 11: Brown University
 12: Providence, RI 02912

 14: Last Modification: 
 15: 11.21.97
 16: ***********************************comm.c*************************************/

 18: /***********************************comm.c*************************************
 19: File Description:
 20: -----------------

 22: ***********************************comm.c*************************************/
 23:  #include src/ksp/pc/impls/tfs/tfs.h


 26: /* global program control variables - explicitly exported */
 27: int my_id            = 0;
 28: int num_nodes        = 1;
 29: int floor_num_nodes  = 0;
 30: int i_log2_num_nodes = 0;

 32: /* global program control variables */
 33: static int p_init = 0;
 34: static int modfl_num_nodes;
 35: static int edge_not_pow_2;

 37: static unsigned int edge_node[sizeof(PetscInt)*32];

 39: /***********************************comm.c*************************************
 40: Function: giop()

 42: Input : 
 43: Output: 
 44: Return: 
 45: Description: 
 46: ***********************************comm.c*************************************/
 47: void
 48: comm_init (void)
 49: {

 51:   if (p_init++) return;

 53: #if PETSC_SIZEOF_INT != 4
 54:   error_msg_warning("Int != 4 Bytes!");
 55: #endif


 58:   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
 59:   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);

 61:   if (num_nodes> (INT_MAX >> 1))
 62:   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}

 64:   ivec_zero((int*)edge_node,sizeof(PetscInt)*32);

 66:   floor_num_nodes = 1;
 67:   i_log2_num_nodes = modfl_num_nodes = 0;
 68:   while (floor_num_nodes <= num_nodes)
 69:     {
 70:       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
 71:       floor_num_nodes <<= 1;
 72:       i_log2_num_nodes++;
 73:     }

 75:   i_log2_num_nodes--;
 76:   floor_num_nodes >>= 1;
 77:   modfl_num_nodes = (num_nodes - floor_num_nodes);

 79:   if ((my_id > 0) && (my_id <= modfl_num_nodes))
 80:     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
 81:   else if (my_id >= floor_num_nodes)
 82:     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
 83:     }
 84:   else
 85:     {edge_not_pow_2 = 0;}

 87: }



 91: /***********************************comm.c*************************************
 92: Function: giop()

 94: Input : 
 95: Output: 
 96: Return: 
 97: Description: fan-in/out version
 98: ***********************************comm.c*************************************/
 99: void
100: giop(int *vals, int *work, int n, int *oprs)
101: {
102:    int mask, edge;
103:   int type, dest;
104:   vfp fp;
105:   MPI_Status  status;


108: #ifdef SAFE
109:   /* ok ... should have some data, work, and operator(s) */
110:   if (!vals||!work||!oprs)
111:     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

113:   /* non-uniform should have at least two entries */
114:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
115:     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}

117:   /* check to make sure comm package has been initialized */
118:   if (!p_init)
119:     {comm_init();}
120: #endif

122:   /* if there's nothing to do return */
123:   if ((num_nodes<2)||(!n))
124:     {
125:       return;
126:     }

128:   /* a negative number if items to send ==> fatal */
129:   if (n<0)
130:     {error_msg_fatal("giop() :: n=%D<0?",n);}

132:   /* advance to list of n operations for custom */
133:   if ((type=oprs[0])==NON_UNIFORM)
134:     {oprs++;}

136:   /* major league hack */
137:   if (!(fp = (vfp) ivec_fct_addr(type))) {
138:     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
139:     fp = (vfp) oprs;
140:   }

142:   /* all msgs will be of the same length */
143:   /* if not a hypercube must colapse partial dim */
144:   if (edge_not_pow_2)
145:     {
146:       if (my_id >= floor_num_nodes)
147:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
148:       else
149:         {
150:           MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
151:                    MPI_COMM_WORLD,&status);
152:           (*fp)(vals,work,n,oprs);
153:         }
154:     }

156:   /* implement the mesh fan in/out exchange algorithm */
157:   if (my_id<floor_num_nodes)
158:     {
159:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
160:         {
161:           dest = my_id^mask;
162:           if (my_id > dest)
163:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
164:           else
165:             {
166:               MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
167:                        MPI_COMM_WORLD, &status);
168:               (*fp)(vals, work, n, oprs);
169:             }
170:         }

172:       mask=floor_num_nodes>>1;
173:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
174:         {
175:           if (my_id%mask)
176:             {continue;}
177: 
178:           dest = my_id^mask;
179:           if (my_id < dest)
180:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
181:           else
182:             {
183:               MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
184:                        MPI_COMM_WORLD, &status);
185:             }
186:         }
187:     }

189:   /* if not a hypercube must expand to partial dim */
190:   if (edge_not_pow_2)
191:     {
192:       if (my_id >= floor_num_nodes)
193:         {
194:           MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
195:                    MPI_COMM_WORLD,&status);
196:         }
197:       else
198:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
199:     }
200: }

202: /***********************************comm.c*************************************
203: Function: grop()

205: Input : 
206: Output: 
207: Return: 
208: Description: fan-in/out version
209: ***********************************comm.c*************************************/
210: void
211: grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs)
212: {
213:    int mask, edge;
214:   int type, dest;
215:   vfp fp;
216:   MPI_Status  status;


219: #ifdef SAFE
220:   /* ok ... should have some data, work, and operator(s) */
221:   if (!vals||!work||!oprs)
222:     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

224:   /* non-uniform should have at least two entries */
225:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
226:     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}

228:   /* check to make sure comm package has been initialized */
229:   if (!p_init)
230:     {comm_init();}
231: #endif

233:   /* if there's nothing to do return */
234:   if ((num_nodes<2)||(!n))
235:     {return;}

237:   /* a negative number of items to send ==> fatal */
238:   if (n<0)
239:     {error_msg_fatal("gdop() :: n=%D<0?",n);}

241:   /* advance to list of n operations for custom */
242:   if ((type=oprs[0])==NON_UNIFORM)
243:     {oprs++;}

245:   if (!(fp = (vfp) rvec_fct_addr(type))) {
246:     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
247:     fp = (vfp) oprs;
248:   }

250:   /* all msgs will be of the same length */
251:   /* if not a hypercube must colapse partial dim */
252:   if (edge_not_pow_2)
253:     {
254:       if (my_id >= floor_num_nodes)
255:         {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,
256:                   MPI_COMM_WORLD);}
257:       else
258:         {
259:           MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
260:                    MPI_COMM_WORLD,&status);
261:           (*fp)(vals,work,n,oprs);
262:         }
263:     }

265:   /* implement the mesh fan in/out exchange algorithm */
266:   if (my_id<floor_num_nodes)
267:     {
268:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
269:         {
270:           dest = my_id^mask;
271:           if (my_id > dest)
272:             {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
273:           else
274:             {
275:               MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
276:                        MPI_COMM_WORLD, &status);
277:               (*fp)(vals, work, n, oprs);
278:             }
279:         }

281:       mask=floor_num_nodes>>1;
282:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
283:         {
284:           if (my_id%mask)
285:             {continue;}
286: 
287:           dest = my_id^mask;
288:           if (my_id < dest)
289:             {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
290:           else
291:             {
292:               MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,
293:                        MPI_COMM_WORLD, &status);
294:             }
295:         }
296:     }

298:   /* if not a hypercube must expand to partial dim */
299:   if (edge_not_pow_2)
300:     {
301:       if (my_id >= floor_num_nodes)
302:         {
303:           MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
304:                    MPI_COMM_WORLD,&status);
305:         }
306:       else
307:         {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,
308:                   MPI_COMM_WORLD);}
309:     }
310: }


313: /***********************************comm.c*************************************
314: Function: grop()

316: Input : 
317: Output: 
318: Return: 
319: Description: fan-in/out version

321: note good only for num_nodes=2^k!!!

323: ***********************************comm.c*************************************/
324: void
325: grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim)
326: {
327:    int mask, edge;
328:   int type, dest;
329:   vfp fp;
330:   MPI_Status  status;

332: #ifdef SAFE
333:   /* ok ... should have some data, work, and operator(s) */
334:   if (!vals||!work||!oprs)
335:     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

337:   /* non-uniform should have at least two entries */
338:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
339:     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}

341:   /* check to make sure comm package has been initialized */
342:   if (!p_init)
343:     {comm_init();}
344: #endif

346:   /* if there's nothing to do return */
347:   if ((num_nodes<2)||(!n)||(dim<=0))
348:     {return;}

350:   /* the error msg says it all!!! */
351:   if (modfl_num_nodes)
352:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

354:   /* a negative number of items to send ==> fatal */
355:   if (n<0)
356:     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}

358:   /* can't do more dimensions then exist */
359:   dim = PetscMin(dim,i_log2_num_nodes);

361:   /* advance to list of n operations for custom */
362:   if ((type=oprs[0])==NON_UNIFORM)
363:     {oprs++;}

365:   if (!(fp = (vfp) rvec_fct_addr(type))) {
366:     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
367:     fp = (vfp) oprs;
368:   }

370:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
371:     {
372:       dest = my_id^mask;
373:       if (my_id > dest)
374:         {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
375:       else
376:         {
377:           MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
378:                    &status);
379:           (*fp)(vals, work, n, oprs);
380:         }
381:     }

383:   if (edge==dim)
384:     {mask>>=1;}
385:   else
386:     {while (++edge<dim) {mask<<=1;}}

388:   for (edge=0; edge<dim; edge++,mask>>=1)
389:     {
390:       if (my_id%mask)
391:         {continue;}
392: 
393:       dest = my_id^mask;
394:       if (my_id < dest)
395:         {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
396:       else
397:         {
398:           MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
399:                    &status);
400:         }
401:     }
402: }


405: /***********************************comm.c*************************************
406: Function: gop()

408: Input : 
409: Output: 
410: Return: 
411: Description: fan-in/out version
412: ***********************************comm.c*************************************/
413: void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type)
414: {
415:    int mask, edge;
416:   int dest;
417:   MPI_Status  status;
418:   MPI_Op op;

420: #ifdef SAFE
421:   /* check to make sure comm package has been initialized */
422:   if (!p_init)
423:     {comm_init();}

425:   /* ok ... should have some data, work, and operator(s) */
426:   if (!vals||!work||!fp)
427:     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
428: #endif

430:   /* if there's nothing to do return */
431:   if ((num_nodes<2)||(!n))
432:     {return;}

434:   /* a negative number of items to send ==> fatal */
435:   if (n<0)
436:     {error_msg_fatal("gop() :: n=%D<0?",n);}

438:   if (comm_type==MPI)
439:     {
440:       MPI_Op_create(fp,TRUE,&op);
441:       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
442:       MPI_Op_free(&op);
443:       return;
444:     }

446:   /* if not a hypercube must colapse partial dim */
447:   if (edge_not_pow_2)
448:     {
449:       if (my_id >= floor_num_nodes)
450:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
451:                   MPI_COMM_WORLD);}
452:       else
453:         {
454:           MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
455:                    MPI_COMM_WORLD,&status);
456:           (*fp)(vals,work,&n,&dt);
457:         }
458:     }

460:   /* implement the mesh fan in/out exchange algorithm */
461:   if (my_id<floor_num_nodes)
462:     {
463:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
464:         {
465:           dest = my_id^mask;
466:           if (my_id > dest)
467:             {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
468:           else
469:             {
470:               MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
471:                        MPI_COMM_WORLD, &status);
472:               (*fp)(vals, work, &n, &dt);
473:             }
474:         }

476:       mask=floor_num_nodes>>1;
477:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
478:         {
479:           if (my_id%mask)
480:             {continue;}
481: 
482:           dest = my_id^mask;
483:           if (my_id < dest)
484:             {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
485:           else
486:             {
487:               MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
488:                        MPI_COMM_WORLD, &status);
489:             }
490:         }
491:     }

493:   /* if not a hypercube must expand to partial dim */
494:   if (edge_not_pow_2)
495:     {
496:       if (my_id >= floor_num_nodes)
497:         {
498:           MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
499:                    MPI_COMM_WORLD,&status);
500:         }
501:       else
502:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
503:                   MPI_COMM_WORLD);}
504:     }
505: }






512: /******************************************************************************
513: Function: giop()

515: Input : 
516: Output: 
517: Return: 
518: Description: 
519:  
520: ii+1 entries in seg :: 0 .. ii

522: ******************************************************************************/
523: void 
524: ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, 
525:            int *segs)
526: {
527:    int edge, type, dest, mask;
528:    int stage_n;
529:   MPI_Status  status;

531: #ifdef SAFE
532:   /* check to make sure comm package has been initialized */
533:   if (!p_init)
534:     {comm_init();}
535: #endif


538:   /* all msgs are *NOT* the same length */
539:   /* implement the mesh fan in/out exchange algorithm */
540:   for (mask=0, edge=0; edge<level; edge++, mask++)
541:     {
542:       stage_n = (segs[level] - segs[edge]);
543:       if (stage_n && !(my_id & mask))
544:         {
545:           dest = edge_node[edge];
546:           type = MSGTAG3 + my_id + (num_nodes*edge);
547:           if (my_id>dest)
548:           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
549:                       MPI_COMM_WORLD);}
550:           else
551:             {
552:               type =  type - my_id + dest;
553:               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
554:                        MPI_COMM_WORLD,&status);
555:               rvec_add(vals+segs[edge], work, stage_n);
556:             }
557:         }
558:       mask <<= 1;
559:     }
560:   mask>>=1;
561:   for (edge=0; edge<level; edge++)
562:     {
563:       stage_n = (segs[level] - segs[level-1-edge]);
564:       if (stage_n && !(my_id & mask))
565:         {
566:           dest = edge_node[level-edge-1];
567:           type = MSGTAG6 + my_id + (num_nodes*edge);
568:           if (my_id<dest)
569:             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
570:                       MPI_COMM_WORLD);}
571:           else
572:             {
573:               type =  type - my_id + dest;
574:               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
575:                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
576:             }
577:         }
578:       mask >>= 1;
579:     }
580: }



584: /***********************************comm.c*************************************
585: Function: grop_hc_vvl()

587: Input : 
588: Output: 
589: Return: 
590: Description: fan-in/out version

592: note good only for num_nodes=2^k!!!

594: ***********************************comm.c*************************************/
595: void
596: grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim)
597: {
598:    int mask, edge, n;
599:   int type, dest;
600:   vfp fp;
601:   MPI_Status  status;

603:   error_msg_fatal("grop_hc_vvl() :: is not working!\n");

605: #ifdef SAFE
606:   /* ok ... should have some data, work, and operator(s) */
607:   if (!vals||!work||!oprs||!segs)
608:     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

610:   /* non-uniform should have at least two entries */

612:   /* check to make sure comm package has been initialized */
613:   if (!p_init)
614:     {comm_init();}
615: #endif

617:   /* if there's nothing to do return */
618:   if ((num_nodes<2)||(dim<=0))
619:     {return;}

621:   /* the error msg says it all!!! */
622:   if (modfl_num_nodes)
623:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

625:   /* can't do more dimensions then exist */
626:   dim = PetscMin(dim,i_log2_num_nodes);

628:   /* advance to list of n operations for custom */
629:   if ((type=oprs[0])==NON_UNIFORM)
630:     {oprs++;}

632:   if (!(fp = (vfp) rvec_fct_addr(type))){
633:     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
634:     fp = (vfp) oprs;
635:   }

637:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
638:     {
639:       n = segs[dim]-segs[edge];
640:       dest = my_id^mask;
641:       if (my_id > dest)
642:         {MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
643:       else
644:         {
645:           MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
646:                    MPI_COMM_WORLD, &status);
647:           (*fp)(vals+segs[edge], work, n, oprs);
648:         }
649:     }

651:   if (edge==dim)
652:     {mask>>=1;}
653:   else
654:     {while (++edge<dim) {mask<<=1;}}

656:   for (edge=0; edge<dim; edge++,mask>>=1)
657:     {
658:       if (my_id%mask)
659:         {continue;}
660: 
661:       n = (segs[dim]-segs[dim-1-edge]);
662: 
663:       dest = my_id^mask;
664:       if (my_id < dest)
665:         {MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id,
666:                   MPI_COMM_WORLD);}
667:       else
668:         {
669:           MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,
670:                    MSGTAG4+dest,MPI_COMM_WORLD, &status);
671:         }
672:     }
673: }

675: /******************************************************************************
676: Function: giop()

678: Input : 
679: Output: 
680: Return: 
681: Description: 
682:  
683: ii+1 entries in seg :: 0 .. ii

685: ******************************************************************************/
686: void new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
687: {
688:    int edge, type, dest, mask;
689:    int stage_n;
690:   MPI_Status  status;

692: #ifdef SAFE
693:   /* check to make sure comm package has been initialized */
694:   if (!p_init)
695:     {comm_init();}
696: #endif

698:   /* all msgs are *NOT* the same length */
699:   /* implement the mesh fan in/out exchange algorithm */
700:   for (mask=0, edge=0; edge<level; edge++, mask++)
701:     {
702:       stage_n = (segs[level] - segs[edge]);
703:       if (stage_n && !(my_id & mask))
704:         {
705:           dest = edge_node[edge];
706:           type = MSGTAG3 + my_id + (num_nodes*edge);
707:           if (my_id>dest)
708:           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
709:                       MPI_COMM_WORLD);}
710:           else
711:             {
712:               type =  type - my_id + dest;
713:               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
714:                        MPI_COMM_WORLD,&status);
715:               rvec_add(vals+segs[edge], work, stage_n);
716:             }
717:         }
718:       mask <<= 1;
719:     }
720:   mask>>=1;
721:   for (edge=0; edge<level; edge++)
722:     {
723:       stage_n = (segs[level] - segs[level-1-edge]);
724:       if (stage_n && !(my_id & mask))
725:         {
726:           dest = edge_node[level-edge-1];
727:           type = MSGTAG6 + my_id + (num_nodes*edge);
728:           if (my_id<dest)
729:             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
730:                       MPI_COMM_WORLD);}
731:           else
732:             {
733:               type =  type - my_id + dest;
734:               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
735:                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
736:             }
737:         }
738:       mask >>= 1;
739:     }
740: }



744: /***********************************comm.c*************************************
745: Function: giop()

747: Input : 
748: Output: 
749: Return: 
750: Description: fan-in/out version

752: note good only for num_nodes=2^k!!!

754: ***********************************comm.c*************************************/
755: void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
756: {
757:    int mask, edge;
758:   int type, dest;
759:   vfp fp;
760:   MPI_Status  status;

762: #ifdef SAFE
763:   /* ok ... should have some data, work, and operator(s) */
764:   if (!vals||!work||!oprs)
765:     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

767:   /* non-uniform should have at least two entries */
768:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
769:     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}

771:   /* check to make sure comm package has been initialized */
772:   if (!p_init)
773:     {comm_init();}
774: #endif

776:   /* if there's nothing to do return */
777:   if ((num_nodes<2)||(!n)||(dim<=0))
778:     {return;}

780:   /* the error msg says it all!!! */
781:   if (modfl_num_nodes)
782:     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}

784:   /* a negative number of items to send ==> fatal */
785:   if (n<0)
786:     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}

788:   /* can't do more dimensions then exist */
789:   dim = PetscMin(dim,i_log2_num_nodes);

791:   /* advance to list of n operations for custom */
792:   if ((type=oprs[0])==NON_UNIFORM)
793:     {oprs++;}

795:   if (!(fp = (vfp) ivec_fct_addr(type))){
796:     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
797:     fp = (vfp) oprs;
798:   }

800:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
801:     {
802:       dest = my_id^mask;
803:       if (my_id > dest)
804:         {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
805:       else
806:         {
807:           MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
808:                    &status);
809:           (*fp)(vals, work, n, oprs);
810:         }
811:     }

813:   if (edge==dim)
814:     {mask>>=1;}
815:   else
816:     {while (++edge<dim) {mask<<=1;}}

818:   for (edge=0; edge<dim; edge++,mask>>=1)
819:     {
820:       if (my_id%mask)
821:         {continue;}
822: 
823:       dest = my_id^mask;
824:       if (my_id < dest)
825:         {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
826:       else
827:         {
828:           MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
829:                    &status);
830:         }
831:     }
832: }