Actual source code: gs.c

  1: #define PETSCKSP_DLL

  3: /***********************************gs.c***************************************

  5: Author: Henry M. Tufo III

  7: e-mail: hmt@cs.brown.edu

  9: snail-mail:
 10: Division of Applied Mathematics
 11: Brown University
 12: Providence, RI 02912

 14: Last Modification: 
 15: 6.21.97
 16: ************************************gs.c**************************************/

 18: /***********************************gs.c***************************************
 19: File Description:
 20: -----------------

 22: ************************************gs.c**************************************/

 24:  #include src/ksp/pc/impls/tfs/tfs.h

 26: /* default length of number of items via tree - doubles if exceeded */
 27: #define TREE_BUF_SZ 2048;
 28: #define GS_VEC_SZ   1



 32: /***********************************gs.c***************************************
 33: Type: struct gather_scatter_id 
 34: ------------------------------

 36: ************************************gs.c**************************************/
 37: typedef struct gather_scatter_id {
 38:   int id;
 39:   int nel_min;
 40:   int nel_max;
 41:   int nel_sum;
 42:   int negl;
 43:   int gl_max;
 44:   int gl_min;
 45:   int repeats;
 46:   int ordered;
 47:   int positive;
 48:   PetscScalar *vals;

 50:   /* bit mask info */
 51:   int *my_proc_mask;
 52:   int mask_sz;
 53:   int *ngh_buf;
 54:   int ngh_buf_sz;
 55:   int *nghs;
 56:   int num_nghs;
 57:   int max_nghs;
 58:   int *pw_nghs;
 59:   int num_pw_nghs;
 60:   int *tree_nghs;
 61:   int num_tree_nghs;

 63:   int num_loads;

 65:   /* repeats == true -> local info */
 66:   int nel;         /* number of unique elememts */
 67:   int *elms;       /* of size nel */
 68:   int nel_total;
 69:   int *local_elms; /* of size nel_total */
 70:   int *companion;  /* of size nel_total */

 72:   /* local info */
 73:   int num_local_total;
 74:   int local_strength;
 75:   int num_local;
 76:   int *num_local_reduce;
 77:   int **local_reduce;
 78:   int num_local_gop;
 79:   int *num_gop_local_reduce;
 80:   int **gop_local_reduce;

 82:   /* pairwise info */
 83:   int level;
 84:   int num_pairs;
 85:   int max_pairs;
 86:   int loc_node_pairs;
 87:   int max_node_pairs;
 88:   int min_node_pairs;
 89:   int avg_node_pairs;
 90:   int *pair_list;
 91:   int *msg_sizes;
 92:   int **node_list;
 93:   int len_pw_list;
 94:   int *pw_elm_list;
 95:   PetscScalar *pw_vals;

 97:   MPI_Request *msg_ids_in;
 98:   MPI_Request *msg_ids_out;

100:   PetscScalar *out;
101:   PetscScalar *in;
102:   int msg_total;

104:   /* tree - crystal accumulator info */
105:   int max_left_over;
106:   int *pre;
107:   int *in_num;
108:   int *out_num;
109:   int **in_list;
110:   int **out_list;

112:   /* new tree work*/
113:   int  tree_nel;
114:   int *tree_elms;
115:   PetscScalar *tree_buf;
116:   PetscScalar *tree_work;

118:   int  tree_map_sz;
119:   int *tree_map_in;
120:   int *tree_map_out;

122:   /* current memory status */
123:   int gl_bss_min;
124:   int gl_perm_min;

126:   /* max segment size for gs_gop_vec() */
127:   int vec_sz;

129:   /* hack to make paul happy */
130:   MPI_Comm gs_comm;

132: } gs_id;


135: /* to be made public */

137: /* PRIVATE - and definitely not exported */
138: /*static void gs_print_template( gs_id* gs, int who);*/
139: /*static void gs_print_stemplate( gs_id* gs, int who);*/

141: static gs_id *gsi_check_args(int *elms, int nel, int level);
142: static void gsi_via_bit_mask(gs_id *gs);
143: static void get_ngh_buf(gs_id *gs);
144: static void set_pairwise(gs_id *gs);
145: static gs_id * gsi_new(void);
146: static void set_tree(gs_id *gs);

148: /* same for all but vector flavor */
149: static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150: /* vector flavor */
151: static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);

153: static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154: static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155: static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156: static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157: static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);


160: static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161: static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162: static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163: static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164: static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);

166: static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167: static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168: static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);

170: static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171: static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172: static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173: static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174: static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);

176: static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177: static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178: static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179: static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180: static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);

182: static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183: static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184: static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185: static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186: static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);

188: static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189: static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190: static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191: static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192: static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);

194: static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195: static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196: static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197: static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198: static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);

200: static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201: static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202: static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203: static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204: static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);

206: static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207: static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208: static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209: static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);



213: /* global vars */
214: /* from comm.c module */

216: /* module state inf and fortran interface */
217: static int num_gs_ids = 0;

219: /* should make this dynamic ... later */
220: static int msg_buf=MAX_MSG_BUF;
221: static int vec_sz=GS_VEC_SZ;
222: static int *tree_buf=NULL;
223: static int tree_buf_sz=0;
224: static int ntree=0;


227: /******************************************************************************
228: Function: gs_init_()

230: Input : 
231: Output: 
232: Return: 
233: Description:  
234: ******************************************************************************/
235: void gs_init_vec_sz(int size)
236: {
237:   /*  vec_ch = TRUE; */

239:   vec_sz = size;
240: }

242: /******************************************************************************
243: Function: gs_init_()

245: Input : 
246: Output: 
247: Return: 
248: Description:  
249: ******************************************************************************/
250: void gs_init_msg_buf_sz(int buf_size)
251: {
252:   /*  msg_ch = TRUE; */

254:   msg_buf = buf_size;
255: }

257: /******************************************************************************
258: Function: gs_init()

260: Input : 

262: Output: 

264: RETURN: 

266: Description:  
267: ******************************************************************************/
268: gs_id *
269: gs_init( int *elms, int nel, int level)
270: {
271:    gs_id *gs;
272:   MPI_Group gs_group;
273:   MPI_Comm  gs_comm;

275:   /* ensure that communication package has been initialized */
276:   comm_init();


279:   /* determines if we have enough dynamic/semi-static memory */
280:   /* checks input, allocs and sets gd_id template            */
281:   gs = gsi_check_args(elms,nel,level);

283:   /* only bit mask version up and working for the moment    */
284:   /* LATER :: get int list version working for sparse pblms */
285:   gsi_via_bit_mask(gs);


288:   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289:   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290:   gs->gs_comm=gs_comm;

292:   return(gs);
293: }



297: /******************************************************************************
298: Function: gsi_new()

300: Input : 
301: Output: 
302: Return: 
303: Description: 

305: elm list must >= 0!!!
306: elm repeats allowed
307: ******************************************************************************/
308: static
309: gs_id *
310: gsi_new(void)
311: {
312:   gs_id *gs;
313:   gs = (gs_id *) malloc(sizeof(gs_id));
314:   PetscMemzero(gs,sizeof(gs_id));
315:   return(gs);
316: }



320: /******************************************************************************
321: Function: gsi_check_args()

323: Input : 
324: Output: 
325: Return: 
326: Description: 

328: elm list must >= 0!!!
329: elm repeats allowed
330: local working copy of elms is sorted
331: ******************************************************************************/
332: static
333: gs_id *
334: gsi_check_args(int *in_elms, int nel, int level)
335: {
336:    int i, j, k, t2;
337:   int *companion, *elms, *unique, *iptr;
338:   int num_local=0, *num_to_reduce, **local_reduce;
339:   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
340:   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
341:   int work[sizeof(oprs)/sizeof(oprs[0])-1];
342:   gs_id *gs;



346: #ifdef SAFE
347:   if (!in_elms)
348:     {error_msg_fatal("elms point to nothing!!!\n");}

350:   if (nel<0)
351:     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}

353:   if (nel==0)
354:     {error_msg_warning("I don't have any elements!!!\n");}
355: #endif

357:   /* get space for gs template */
358:   gs = gsi_new();
359:   gs->id = ++num_gs_ids;

361:   /* hmt 6.4.99                                            */
362:   /* caller can set global ids that don't participate to 0 */
363:   /* gs_init ignores all zeros in elm list                 */
364:   /* negative global ids are still invalid                 */
365:   for (i=j=0;i<nel;i++)
366:     {if (in_elms[i]!=0) {j++;}}

368:   k=nel; nel=j;

370:   /* copy over in_elms list and create inverse map */
371:   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
372:   companion = (int*) malloc(nel*sizeof(PetscInt));
373:   /* ivec_c_index(companion,nel); */
374:   /* ivec_copy(elms,in_elms,nel); */
375:   for (i=j=0;i<k;i++)
376:     {
377:       if (in_elms[i]!=0)
378:         {elms[j] = in_elms[i]; companion[j++] = i;}
379:     }

381:   if (j!=nel)
382:     {error_msg_fatal("nel j mismatch!\n");}

384: #ifdef SAFE
385:   /* pre-pass ... check to see if sorted */
386:   elms[nel] = INT_MAX;
387:   iptr = elms;
388:   unique = elms+1;
389:   j=0;
390:   while (*iptr!=INT_MAX)
391:     {
392:       if (*iptr++>*unique++)
393:         {j=1; break;}
394:     }

396:   /* set up inverse map */
397:   if (j)
398:     {
399:       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
400:       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
401:     }
402:   else
403:     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
404: #else
405:   SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
406: #endif
407:   elms[nel] = INT_MIN;

409:   /* first pass */
410:   /* determine number of unique elements, check pd */
411:   for (i=k=0;i<nel;i+=j)
412:     {
413:       t2 = elms[i];
414:       j=++i;
415: 
416:       /* clump 'em for now */
417:       while (elms[j]==t2) {j++;}
418: 
419:       /* how many together and num local */
420:       if (j-=i)
421:         {num_local++; k+=j;}
422:     }

424:   /* how many unique elements? */
425:   gs->repeats=k;
426:   gs->nel = nel-k;


429:   /* number of repeats? */
430:   gs->num_local = num_local;
431:   num_local+=2;
432:   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
433:   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));

435:   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
436:   gs->elms = unique;
437:   gs->nel_total = nel;
438:   gs->local_elms = elms;
439:   gs->companion = companion;

441:   /* compess map as well as keep track of local ops */
442:   for (num_local=i=j=0;i<gs->nel;i++)
443:     {
444:       k=j;
445:       t2 = unique[i] = elms[j];
446:       companion[i] = companion[j];
447: 
448:       while (elms[j]==t2) {j++;}

450:       if ((t2=(j-k))>1)
451:         {
452:           /* number together */
453:           num_to_reduce[num_local] = t2++;
454:           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));

456:           /* to use binary searching don't remap until we check intersection */
457:           *iptr++ = i;
458: 
459:           /* note that we're skipping the first one */
460:           while (++k<j)
461:             {*(iptr++) = companion[k];}
462:           *iptr = -1;
463:         }
464:     }

466:   /* sentinel for ngh_buf */
467:   unique[gs->nel]=INT_MAX;

469:   /* for two partition sort hack */
470:   num_to_reduce[num_local] = 0;
471:   local_reduce[num_local] = NULL;
472:   num_to_reduce[++num_local] = 0;
473:   local_reduce[num_local] = NULL;

475:   /* load 'em up */
476:   /* note one extra to hold NON_UNIFORM flag!!! */
477:   vals[2] = vals[1] = vals[0] = nel;
478:   if (gs->nel>0)
479:     {
480:        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
481:        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
482:     }
483:   else
484:     {
485:        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
486:        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
487:     }
488:   vals[5] = level;
489:   vals[6] = num_gs_ids;

491:   /* GLOBAL: send 'em out */
492:   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);

494:   /* must be semi-pos def - only pairwise depends on this */
495:   /* LATER - remove this restriction */
496:   if (vals[3]<0)
497:     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}

499:   if (vals[4]==INT_MAX)
500:     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}

502:   gs->nel_min = vals[0];
503:   gs->nel_max = vals[1];
504:   gs->nel_sum = vals[2];
505:   gs->gl_min  = vals[3];
506:   gs->gl_max  = vals[4];
507:   gs->negl    = vals[4]-vals[3]+1;

509:   if (gs->negl<=0)
510:     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
511: 
512:   /* LATER :: add level == -1 -> program selects level */
513:   if (vals[5]<0)
514:     {vals[5]=0;}
515:   else if (vals[5]>num_nodes)
516:     {vals[5]=num_nodes;}
517:   gs->level = vals[5];

519:   return(gs);
520: }


523: /******************************************************************************
524: Function: gsi_via_bit_mask()

526: Input : 
527: Output: 
528: Return: 
529: Description: 


532: ******************************************************************************/
533: static
534: void
535: gsi_via_bit_mask(gs_id *gs)
536: {
537:    int i, nel, *elms;
538:   int t1;
539:   int **reduce;
540:   int *map;

542:   /* totally local removes ... ct_bits == 0 */
543:   get_ngh_buf(gs);

545:   if (gs->level)
546:     {set_pairwise(gs);}

548:   if (gs->max_left_over)
549:     {set_tree(gs);}

551:   /* intersection local and pairwise/tree? */
552:   gs->num_local_total = gs->num_local;
553:   gs->gop_local_reduce = gs->local_reduce;
554:   gs->num_gop_local_reduce = gs->num_local_reduce;

556:   map = gs->companion;

558:   /* is there any local compression */
559:   if (!gs->num_local) {
560:     gs->local_strength = NONE;
561:     gs->num_local_gop = 0;
562:   } else {
563:       /* ok find intersection */
564:       map = gs->companion;
565:       reduce = gs->local_reduce;
566:       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
567:         {
568:           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
569:               ||
570:               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
571:             {
572:               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
573:               t1++;
574:               if (gs->num_local_reduce[i]<=0)
575:                 {error_msg_fatal("nobody in list?");}
576:               gs->num_local_reduce[i] *= -1;
577:             }
578:            **reduce=map[**reduce];
579:         }

581:       /* intersection is empty */
582:       if (!t1)
583:         {
584:           gs->local_strength = FULL;
585:           gs->num_local_gop = 0;
586:         }
587:       /* intersection not empty */
588:       else
589:         {
590:           gs->local_strength = PARTIAL;
591:           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
592:                    gs->num_local + 1, SORT_INT_PTR);

594:           gs->num_local_gop = t1;
595:           gs->num_local_total =  gs->num_local;
596:           gs->num_local    -= t1;
597:           gs->gop_local_reduce = gs->local_reduce;
598:           gs->num_gop_local_reduce = gs->num_local_reduce;

600:           for (i=0; i<t1; i++)
601:             {
602:               if (gs->num_gop_local_reduce[i]>=0)
603:                 {error_msg_fatal("they aren't negative?");}
604:               gs->num_gop_local_reduce[i] *= -1;
605:               gs->local_reduce++;
606:               gs->num_local_reduce++;
607:             }
608:           gs->local_reduce++;
609:           gs->num_local_reduce++;
610:         }
611:     }

613:   elms = gs->pw_elm_list;
614:   nel  = gs->len_pw_list;
615:   for (i=0; i<nel; i++)
616:     {elms[i] = map[elms[i]];}

618:   elms = gs->tree_map_in;
619:   nel  = gs->tree_map_sz;
620:   for (i=0; i<nel; i++)
621:     {elms[i] = map[elms[i]];}

623:   /* clean up */
624:   free((void*) gs->local_elms);
625:   free((void*) gs->companion);
626:   free((void*) gs->elms);
627:   free((void*) gs->ngh_buf);
628:   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
629: }



633: /******************************************************************************
634: Function: place_in_tree()

636: Input : 
637: Output: 
638: Return: 
639: Description: 


642: ******************************************************************************/
643: static
644: void
645: place_in_tree( int elm)
646: {
647:    int *tp, n;


650:   if (ntree==tree_buf_sz)
651:     {
652:       if (tree_buf_sz)
653:         {
654:           tp = tree_buf;
655:           n = tree_buf_sz;
656:           tree_buf_sz<<=1;
657:           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
658:           ivec_copy(tree_buf,tp,n);
659:           free(tp);
660:         }
661:       else
662:         {
663:           tree_buf_sz = TREE_BUF_SZ;
664:           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
665:         }
666:     }

668:   tree_buf[ntree++] = elm;
669: }



673: /******************************************************************************
674: Function: get_ngh_buf()

676: Input : 
677: Output: 
678: Return: 
679: Description: 


682: ******************************************************************************/
683: static
684: void
685: get_ngh_buf(gs_id *gs)
686: {
687:    int i, j, npw=0, ntree_map=0;
688:   int p_mask_size, ngh_buf_size, buf_size;
689:   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
690:   int *ngh_buf, *buf1, *buf2;
691:   int offset, per_load, num_loads, or_ct, start, end;
692:   int *ptr1, *ptr2, i_start, negl, nel, *elms;
693:   int oper=GL_B_OR;
694:   int *ptr3, *t_mask, level, ct1, ct2;

696:   /* to make life easier */
697:   nel   = gs->nel;
698:   elms  = gs->elms;
699:   level = gs->level;
700: 
701:   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
702:   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
703:   set_bit_mask(p_mask,p_mask_size,my_id);

705:   /* allocate space for masks and info bufs */
706:   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
707:   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
708:   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
709:   t_mask = (int*) malloc(p_mask_size);
710:   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);

712:   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
713:   /* had thought I could exploit rendezvous threshold */

715:   /* default is one pass */
716:   per_load = negl  = gs->negl;
717:   gs->num_loads = num_loads = 1;
718:   i=p_mask_size*negl;

720:   /* possible overflow on buffer size */
721:   /* overflow hack                    */
722:   if (i<0) {i=INT_MAX;}

724:   buf_size = PetscMin(msg_buf,i);

726:   /* can we do it? */
727:   if (p_mask_size>buf_size)
728:     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}

730:   /* get giop buf space ... make *only* one malloc */
731:   buf1 = (int*) malloc(buf_size<<1);

733:   /* more than one gior exchange needed? */
734:   if (buf_size!=i)
735:     {
736:       per_load = buf_size/p_mask_size;
737:       buf_size = per_load*p_mask_size;
738:       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
739:     }


742:   /* convert buf sizes from #bytes to #ints - 32 bit only! */
743: #ifdef SAFE  
744:   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
745: #else  
746:   p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
747: #endif
748: 
749:   /* find giop work space */
750:   buf2 = buf1+buf_size;

752:   /* hold #ints needed for processor masks */
753:   gs->mask_sz=p_mask_size;

755:   /* init buffers */
756:   ivec_zero(sh_proc_mask,p_mask_size);
757:   ivec_zero(pw_sh_proc_mask,p_mask_size);
758:   ivec_zero(ngh_buf,ngh_buf_size);

760:   /* HACK reset tree info */
761:   tree_buf=NULL;
762:   tree_buf_sz=ntree=0;

764:   /* queue the tree elements for now */
765:   /* elms_q = new_queue(); */
766: 
767:   /* can also queue tree info for pruned or forest implememtation */
768:   /*  mask_q = new_queue(); */

770:   /* ok do it */
771:   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
772:     {
773:       /* identity for bitwise or is 000...000 */
774:       ivec_zero(buf1,buf_size);

776:       /* load msg buffer */
777:       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
778:         {
779:           offset = (offset-start)*p_mask_size;
780:           ivec_copy(buf1+offset,p_mask,p_mask_size);
781:         }

783:       /* GLOBAL: pass buffer */
784:       giop(buf1,buf2,buf_size,&oper);


787:       /* unload buffer into ngh_buf */
788:       ptr2=(elms+i_start);
789:       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
790:         {
791:           /* I own it ... may have to pairwise it */
792:           if (j==*ptr2)
793:             {
794:               /* do i share it w/anyone? */
795: #ifdef SAFE
796:               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
797: #else
798:               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
799: #endif
800:               /* guess not */
801:               if (ct1<2)
802:                 {ptr2++; ptr1+=p_mask_size; continue;}

804:               /* i do ... so keep info and turn off my bit */
805:               ivec_copy(ptr1,ptr3,p_mask_size);
806:               ivec_xor(ptr1,p_mask,p_mask_size);
807:               ivec_or(sh_proc_mask,ptr1,p_mask_size);
808: 
809:               /* is it to be done pairwise? */
810:               if (--ct1<=level)
811:                 {
812:                   npw++;
813: 
814:                   /* turn on high bit to indicate pw need to process */
815:                   *ptr2++ |= TOP_BIT;
816:                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
817:                   ptr1+=p_mask_size;
818:                   continue;
819:                 }

821:               /* get set for next and note that I have a tree contribution */
822:               /* could save exact elm index for tree here -> save a search */
823:               ptr2++; ptr1+=p_mask_size; ntree_map++;
824:             }
825:           /* i don't but still might be involved in tree */
826:           else
827:             {

829:               /* shared by how many? */
830: #ifdef SAFE
831:               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
832: #else
833:               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
834: #endif

836:               /* none! */
837:               if (ct1<2)
838:                 {continue;}

840:               /* is it going to be done pairwise? but not by me of course!*/
841:               if (--ct1<=level)
842:                 {continue;}
843:             }
844:           /* LATER we're going to have to process it NOW */
845:           /* nope ... tree it */
846:           place_in_tree(j);
847:         }
848:     }

850:   free((void*)t_mask);
851:   free((void*)buf1);

853:   gs->len_pw_list=npw;
854:   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));

856:   /* expand from bit mask list to int list and save ngh list */
857:   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
858:   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);

860:   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));

862:   oper = GL_MAX;
863:   ct1 = gs->num_nghs;
864:   giop(&ct1,&ct2,1,&oper);
865:   gs->max_nghs = ct1;

867:   gs->tree_map_sz  = ntree_map;
868:   gs->max_left_over=ntree;

870:   free((void*)p_mask);
871:   free((void*)sh_proc_mask);

873: }





879: /******************************************************************************
880: Function: pairwise_init()

882: Input : 
883: Output: 
884: Return: 
885: Description: 

887: if an element is shared by fewer that level# of nodes do pairwise exch 
888: ******************************************************************************/
889: static
890: void
891: set_pairwise(gs_id *gs)
892: {
893:    int i, j;
894:   int p_mask_size;
895:   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
896:   int *ngh_buf, *buf2;
897:   int offset;
898:   int *msg_list, *msg_size, **msg_nodes, nprs;
899:   int *pairwise_elm_list, len_pair_list=0;
900:   int *iptr, t1, i_start, nel, *elms;
901:   int ct;



905:   /* to make life easier */
906:   nel  = gs->nel;
907:   elms = gs->elms;
908:   ngh_buf = gs->ngh_buf;
909:   sh_proc_mask  = gs->pw_nghs;

911:   /* need a few temp masks */
912:   p_mask_size   = len_bit_mask(num_nodes);
913:   p_mask        = (int*) malloc(p_mask_size);
914:   tmp_proc_mask = (int*) malloc(p_mask_size);

916:   /* set mask to my my_id's bit mask */
917:   set_bit_mask(p_mask,p_mask_size,my_id);

919: #ifdef SAFE
920:   p_mask_size /= sizeof(PetscInt);
921: #else
922:   p_mask_size >>= 2;
923: #endif
924: 
925:   len_pair_list=gs->len_pw_list;
926:   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));

928:   /* how many processors (nghs) do we have to exchange with? */
929:   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));


932:   /* allocate space for gs_gop() info */
933:   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
934:   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
935:   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));

937:   /* init msg_size list */
938:   ivec_zero(msg_size,nprs);

940:   /* expand from bit mask list to int list */
941:   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
942: 
943:   /* keep list of elements being handled pairwise */
944:   for (i=j=0;i<nel;i++)
945:     {
946:       if (elms[i] & TOP_BIT)
947:         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
948:     }
949:   pairwise_elm_list[j] = -1;

951:   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
952:   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
953:   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
954:   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
955:   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);

957:   /* find who goes to each processor */
958:   for (i_start=i=0;i<nprs;i++)
959:     {
960:       /* processor i's mask */
961:       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);

963:       /* det # going to processor i */
964:       for (ct=j=0;j<len_pair_list;j++)
965:         {
966:           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
967:           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
968:           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
969:             {ct++;}
970:         }
971:       msg_size[i] = ct;
972:       i_start = PetscMax(i_start,ct);

974:       /*space to hold nodes in message to first neighbor */
975:       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));

977:       for (j=0;j<len_pair_list;j++)
978:         {
979:           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
980:           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
981:           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
982:             {*iptr++ = j;}
983:         }
984:       *iptr = -1;
985:     }
986:   msg_nodes[nprs] = NULL;

988:   j=gs->loc_node_pairs=i_start;
989:   t1 = GL_MAX;
990:   giop(&i_start,&offset,1,&t1);
991:   gs->max_node_pairs = i_start;

993:   i_start=j;
994:   t1 = GL_MIN;
995:   giop(&i_start,&offset,1,&t1);
996:   gs->min_node_pairs = i_start;

998:   i_start=j;
999:   t1 = GL_ADD;
1000:   giop(&i_start,&offset,1,&t1);
1001:   gs->avg_node_pairs = i_start/num_nodes + 1;

1003:   i_start=nprs;
1004:   t1 = GL_MAX;
1005:   giop(&i_start,&offset,1,&t1);
1006:   gs->max_pairs = i_start;


1009:   /* remap pairwise in tail of gsi_via_bit_mask() */
1010:   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1011:   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1012:   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);

1014:   /* reset malloc pool */
1015:   free((void*)p_mask);
1016:   free((void*)tmp_proc_mask);

1018: }



1022: /******************************************************************************
1023: Function: set_tree()

1025: Input : 
1026: Output: 
1027: Return: 
1028: Description: 

1030: to do pruned tree just save ngh buf copy for each one and decode here!
1031: ******************************************************************************/
1032: static
1033: void
1034: set_tree(gs_id *gs)
1035: {
1036:    int i, j, n, nel;
1037:    int *iptr_in, *iptr_out, *tree_elms, *elms;


1040:   /* local work ptrs */
1041:   elms = gs->elms;
1042:   nel     = gs->nel;

1044:   /* how many via tree */
1045:   gs->tree_nel  = n = ntree;
1046:   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1047:   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1048:   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1049:   j=gs->tree_map_sz;
1050:   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1051:   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));

1053:   /* search the longer of the two lists */
1054:   /* note ... could save this info in get_ngh_buf and save searches */
1055:   if (n<=nel)
1056:     {
1057:       /* bijective fct w/remap - search elm list */
1058:       for (i=0; i<n; i++)
1059:         {
1060:           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1061:             {*iptr_in++ = j; *iptr_out++ = i;}
1062:         }
1063:     }
1064:   else
1065:     {
1066:       for (i=0; i<nel; i++)
1067:         {
1068:           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1069:             {*iptr_in++ = i; *iptr_out++ = j;}
1070:         }
1071:     }

1073:   /* sentinel */
1074:   *iptr_in = *iptr_out = -1;

1076: }


1079: /******************************************************************************
1080: Function: gather_scatter

1082: Input : 
1083: Output: 
1084: Return: 
1085: Description: 
1086: ******************************************************************************/
1087: static
1088: void
1089: gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1090: {
1091:    int *num, *map, **reduce;
1092:    PetscScalar tmp;


1095:   num    = gs->num_gop_local_reduce;
1096:   reduce = gs->gop_local_reduce;
1097:   while ((map = *reduce++))
1098:     {
1099:       /* wall */
1100:       if (*num == 2)
1101:         {
1102:           num ++;
1103:           vals[map[1]] = vals[map[0]];
1104:         }
1105:       /* corner shared by three elements */
1106:       else if (*num == 3)
1107:         {
1108:           num ++;
1109:           vals[map[2]] = vals[map[1]] = vals[map[0]];
1110:         }
1111:       /* corner shared by four elements */
1112:       else if (*num == 4)
1113:         {
1114:           num ++;
1115:           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1116:         }
1117:       /* general case ... odd geoms ... 3D*/
1118:       else
1119:         {
1120:           num++;
1121:           tmp = *(vals + *map++);
1122:           while (*map >= 0)
1123:             {*(vals + *map++) = tmp;}
1124:         }
1125:     }
1126: }



1130: /******************************************************************************
1131: Function: gather_scatter

1133: Input : 
1134: Output: 
1135: Return: 
1136: Description: 
1137: ******************************************************************************/
1138: void
1139: gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1140: {
1141:   /* local only operations!!! */
1142:   if (gs->num_local)
1143:     {gs_gop_local_binary(gs,vals,fct);}
1144: 
1145:   /* if intersection tree/pairwise and local isn't empty */
1146:   if (gs->num_local_gop)
1147:     {
1148:       gs_gop_local_in_binary(gs,vals,fct);
1149: 
1150:       /* pairwise */
1151:       if (gs->num_pairs)
1152:         {gs_gop_pairwise_binary(gs,vals,fct);}
1153: 
1154:       /* tree */
1155:       else if (gs->max_left_over)
1156:         {gs_gop_tree_binary(gs,vals,fct);}
1157: 
1158:       gs_gop_local_out(gs,vals);
1159:     }
1160:   /* if intersection tree/pairwise and local is empty */
1161:   else
1162:     {
1163:       /* pairwise */
1164:       if (gs->num_pairs)
1165:         {gs_gop_pairwise_binary(gs,vals,fct);}
1166: 
1167:       /* tree */
1168:       else if (gs->max_left_over)
1169:         {gs_gop_tree_binary(gs,vals,fct);}
1170:     }
1171: }



1175: /******************************************************************************
1176: Function: gather_scatter

1178: Input : 
1179: Output: 
1180: Return: 
1181: Description: 
1182: ******************************************************************************/
1183: static
1184: void
1185: gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1186: {
1187:    int *num, *map, **reduce;
1188:   PetscScalar tmp;


1191:   num    = gs->num_local_reduce;
1192:   reduce = gs->local_reduce;
1193:   while ((map = *reduce))
1194:     {
1195:       num ++;
1196:       (*fct)(&tmp,NULL,1);
1197:       /* tmp = 0.0; */
1198:       while (*map >= 0)
1199:         {(*fct)(&tmp,(vals + *map),1); map++;}
1200:         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1201: 
1202:       map = *reduce++;
1203:       while (*map >= 0)
1204:         {*(vals + *map++) = tmp;}
1205:     }
1206: }



1210: /******************************************************************************
1211: Function: gather_scatter

1213: Input : 
1214: Output: 
1215: Return: 
1216: Description: 
1217: ******************************************************************************/
1218: static
1219: void
1220: gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1221: {
1222:    int *num, *map, **reduce;
1223:    PetscScalar *base;


1226:   num    = gs->num_gop_local_reduce;

1228:   reduce = gs->gop_local_reduce;
1229:   while ((map = *reduce++))
1230:     {
1231:       num++;
1232:       base = vals + *map++;
1233:       while (*map >= 0)
1234:         {(*fct)(base,(vals + *map),1); map++;}
1235:     }
1236: }



1240: /******************************************************************************
1241: Function: gather_scatter

1243: VERSION 3 :: 

1245: Input : 
1246: Output: 
1247: Return: 
1248: Description: 
1249: ******************************************************************************/
1250: static
1251: void
1252: gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1253:                         rbfp fct)
1254: {
1255:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1256:    int *iptr, *msg_list, *msg_size, **msg_nodes;
1257:    int *pw, *list, *size, **nodes;
1258:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1259:   MPI_Status status;


1262:   /* strip and load s */
1263:   msg_list =list         = gs->pair_list;
1264:   msg_size =size         = gs->msg_sizes;
1265:   msg_nodes=nodes        = gs->node_list;
1266:   iptr=pw                = gs->pw_elm_list;
1267:   dptr1=dptr3            = gs->pw_vals;
1268:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1269:   msg_ids_out = ids_out  = gs->msg_ids_out;
1270:   dptr2                  = gs->out;
1271:   in1=in2                = gs->in;

1273:   /* post the receives */
1274:   /*  msg_nodes=nodes; */
1275:   do
1276:     {
1277:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1278:          second one *list and do list++ afterwards */
1279:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1280:                 gs->gs_comm, msg_ids_in++);
1281:       in1 += *size++;
1282:     }
1283:   while (*++msg_nodes);
1284:   msg_nodes=nodes;

1286:   /* load gs values into in out gs buffers */
1287:   while (*iptr >= 0)
1288:     {*dptr3++ = *(in_vals + *iptr++);}

1290:   /* load out buffers and post the sends */
1291:   while ((iptr = *msg_nodes++))
1292:     {
1293:       dptr3 = dptr2;
1294:       while (*iptr >= 0)
1295:         {*dptr2++ = *(dptr1 + *iptr++);}
1296:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1297:       /* is msg_ids_out++ correct? */
1298:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1299:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1300:     }

1302:   if (gs->max_left_over)
1303:     {gs_gop_tree_binary(gs,in_vals,fct);}

1305:   /* process the received data */
1306:   msg_nodes=nodes;
1307:   while ((iptr = *nodes++))
1308:     {
1309:       /* Should I check the return value of MPI_Wait() or status? */
1310:       /* Can this loop be replaced by a call to MPI_Waitall()? */
1311:       MPI_Wait(ids_in++, &status);
1312:       while (*iptr >= 0)
1313:         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1314:       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1315:     }

1317:   /* replace vals */
1318:   while (*pw >= 0)
1319:     {*(in_vals + *pw++) = *dptr1++;}

1321:   /* clear isend message handles */
1322:   /* This changed for clarity though it could be the same */
1323:   while (*msg_nodes++)
1324:     /* Should I check the return value of MPI_Wait() or status? */
1325:     /* Can this loop be replaced by a call to MPI_Waitall()? */
1326:     {MPI_Wait(ids_out++, &status);}
1327: }



1331: /******************************************************************************
1332: Function: gather_scatter

1334: Input : 
1335: Output: 
1336: Return: 
1337: Description: 
1338: ******************************************************************************/
1339: static
1340: void
1341: gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1342: {
1343:   int size;
1344:   int *in, *out;
1345:   PetscScalar *buf, *work;

1347:   in   = gs->tree_map_in;
1348:   out  = gs->tree_map_out;
1349:   buf  = gs->tree_buf;
1350:   work = gs->tree_work;
1351:   size = gs->tree_nel;

1353:   /* load vals vector w/identity */
1354:   (*fct)(buf,NULL,size);
1355: 
1356:   /* load my contribution into val vector */
1357:   while (*in >= 0)
1358:     {(*fct)((buf + *out++),(vals + *in++),-1);}

1360:   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);

1362:   in   = gs->tree_map_in;
1363:   out  = gs->tree_map_out;
1364:   while (*in >= 0)
1365:     {(*fct)((vals + *in++),(buf + *out++),-1);}

1367: }




1372: /******************************************************************************
1373: Function: gather_scatter

1375: Input : 
1376: Output: 
1377: Return: 
1378: Description: 
1379: ******************************************************************************/
1380: void
1381: gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1382: {
1383:   switch (*op) {
1384:   case '+':
1385:     gs_gop_plus(gs,vals);
1386:     break;
1387:   case '*':
1388:     gs_gop_times(gs,vals);
1389:     break;
1390:   case 'a':
1391:     gs_gop_min_abs(gs,vals);
1392:     break;
1393:   case 'A':
1394:     gs_gop_max_abs(gs,vals);
1395:     break;
1396:   case 'e':
1397:     gs_gop_exists(gs,vals);
1398:     break;
1399:   case 'm':
1400:     gs_gop_min(gs,vals);
1401:     break;
1402:   case 'M':
1403:     gs_gop_max(gs,vals); break;
1404:     /*
1405:     if (*(op+1)=='\0')
1406:       {gs_gop_max(gs,vals); break;}
1407:     else if (*(op+1)=='X')
1408:       {gs_gop_max_abs(gs,vals); break;}
1409:     else if (*(op+1)=='N')
1410:       {gs_gop_min_abs(gs,vals); break;}
1411:     */
1412:   default:
1413:     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1414:     error_msg_warning("gs_gop() :: default :: plus");
1415:     gs_gop_plus(gs,vals);
1416:     break;
1417:   }
1418: }


1421: /******************************************************************************
1422: Function: gather_scatter

1424: Input : 
1425: Output: 
1426: Return: 
1427: Description: 
1428: ******************************************************************************/
1429: static void
1430: gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1431: {
1432:   /* local only operations!!! */
1433:   if (gs->num_local)
1434:     {gs_gop_local_exists(gs,vals);}

1436:   /* if intersection tree/pairwise and local isn't empty */
1437:   if (gs->num_local_gop)
1438:     {
1439:       gs_gop_local_in_exists(gs,vals);

1441:       /* pairwise */
1442:       if (gs->num_pairs)
1443:         {gs_gop_pairwise_exists(gs,vals);}
1444: 
1445:       /* tree */
1446:       else if (gs->max_left_over)
1447:         {gs_gop_tree_exists(gs,vals);}
1448: 
1449:       gs_gop_local_out(gs,vals);
1450:     }
1451:   /* if intersection tree/pairwise and local is empty */
1452:   else
1453:     {
1454:       /* pairwise */
1455:       if (gs->num_pairs)
1456:         {gs_gop_pairwise_exists(gs,vals);}
1457: 
1458:       /* tree */
1459:       else if (gs->max_left_over)
1460:         {gs_gop_tree_exists(gs,vals);}
1461:     }
1462: }



1466: /******************************************************************************
1467: Function: gather_scatter

1469: Input : 
1470: Output: 
1471: Return: 
1472: Description: 
1473: ******************************************************************************/
1474: static
1475: void
1476: gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1477: {
1478:    int *num, *map, **reduce;
1479:    PetscScalar tmp;


1482:   num    = gs->num_local_reduce;
1483:   reduce = gs->local_reduce;
1484:   while ((map = *reduce))
1485:     {
1486:       num ++;
1487:       tmp = 0.0;
1488:       while (*map >= 0)
1489:         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1490: 
1491:       map = *reduce++;
1492:       while (*map >= 0)
1493:         {*(vals + *map++) = tmp;}
1494:     }
1495: }



1499: /******************************************************************************
1500: Function: gather_scatter

1502: Input : 
1503: Output: 
1504: Return: 
1505: Description: 
1506: ******************************************************************************/
1507: static
1508: void
1509: gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1510: {
1511:    int *num, *map, **reduce;
1512:    PetscScalar *base;


1515:   num    = gs->num_gop_local_reduce;
1516:   reduce = gs->gop_local_reduce;
1517:   while ((map = *reduce++))
1518:     {
1519:       num++;
1520:       base = vals + *map++;
1521:       while (*map >= 0)
1522:         {*base = EXISTS(*base,*(vals + *map)); map++;}
1523:     }
1524: }



1528: /******************************************************************************
1529: Function: gather_scatter

1531: VERSION 3 :: 

1533: Input : 
1534: Output: 
1535: Return: 
1536: Description: 
1537: ******************************************************************************/
1538: static
1539: void
1540: gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1541: {
1542:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1543:    int *iptr, *msg_list, *msg_size, **msg_nodes;
1544:    int *pw, *list, *size, **nodes;
1545:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1546:   MPI_Status status;


1549:   /* strip and load s */
1550:   msg_list =list         = gs->pair_list;
1551:   msg_size =size         = gs->msg_sizes;
1552:   msg_nodes=nodes        = gs->node_list;
1553:   iptr=pw                = gs->pw_elm_list;
1554:   dptr1=dptr3            = gs->pw_vals;
1555:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1556:   msg_ids_out = ids_out  = gs->msg_ids_out;
1557:   dptr2                  = gs->out;
1558:   in1=in2                = gs->in;

1560:   /* post the receives */
1561:   /*  msg_nodes=nodes; */
1562:   do
1563:     {
1564:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1565:          second one *list and do list++ afterwards */
1566:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1567:                 gs->gs_comm, msg_ids_in++);
1568:       in1 += *size++;
1569:     }
1570:   while (*++msg_nodes);
1571:   msg_nodes=nodes;

1573:   /* load gs values into in out gs buffers */
1574:   while (*iptr >= 0)
1575:     {*dptr3++ = *(in_vals + *iptr++);}

1577:   /* load out buffers and post the sends */
1578:   while ((iptr = *msg_nodes++))
1579:     {
1580:       dptr3 = dptr2;
1581:       while (*iptr >= 0)
1582:         {*dptr2++ = *(dptr1 + *iptr++);}
1583:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1584:       /* is msg_ids_out++ correct? */
1585:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1586:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1587:     }

1589:   if (gs->max_left_over)
1590:     {gs_gop_tree_exists(gs,in_vals);}

1592:   /* process the received data */
1593:   msg_nodes=nodes;
1594:   while ((iptr = *nodes++))
1595:     {
1596:       /* Should I check the return value of MPI_Wait() or status? */
1597:       /* Can this loop be replaced by a call to MPI_Waitall()? */
1598:       MPI_Wait(ids_in++, &status);
1599:       while (*iptr >= 0)
1600:         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1601:     }

1603:   /* replace vals */
1604:   while (*pw >= 0)
1605:     {*(in_vals + *pw++) = *dptr1++;}

1607:   /* clear isend message handles */
1608:   /* This changed for clarity though it could be the same */
1609:   while (*msg_nodes++)
1610:     /* Should I check the return value of MPI_Wait() or status? */
1611:     /* Can this loop be replaced by a call to MPI_Waitall()? */
1612:     {MPI_Wait(ids_out++, &status);}
1613: }



1617: /******************************************************************************
1618: Function: gather_scatter

1620: Input : 
1621: Output: 
1622: Return: 
1623: Description: 
1624: ******************************************************************************/
1625: static
1626: void
1627: gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1628: {
1629:   int size;
1630:   int *in, *out;
1631:   PetscScalar *buf, *work;
1632:   int op[] = {GL_EXISTS,0};


1635:   in   = gs->tree_map_in;
1636:   out  = gs->tree_map_out;
1637:   buf  = gs->tree_buf;
1638:   work = gs->tree_work;
1639:   size = gs->tree_nel;

1641:   rvec_zero(buf,size);

1643:   while (*in >= 0)
1644:     {
1645:       /*
1646:       printf("%d :: out=%d\n",my_id,*out);
1647:       printf("%d :: in=%d\n",my_id,*in);
1648:       */
1649:       *(buf + *out++) = *(vals + *in++);
1650:     }

1652:   grop(buf,work,size,op);

1654:   in   = gs->tree_map_in;
1655:   out  = gs->tree_map_out;

1657:   while (*in >= 0)
1658:     {*(vals + *in++) = *(buf + *out++);}

1660: }



1664: /******************************************************************************
1665: Function: gather_scatter

1667: Input : 
1668: Output: 
1669: Return: 
1670: Description: 
1671: ******************************************************************************/
1672: static void
1673: gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1674: {
1675:   /* local only operations!!! */
1676:   if (gs->num_local)
1677:     {gs_gop_local_max_abs(gs,vals);}

1679:   /* if intersection tree/pairwise and local isn't empty */
1680:   if (gs->num_local_gop)
1681:     {
1682:       gs_gop_local_in_max_abs(gs,vals);

1684:       /* pairwise */
1685:       if (gs->num_pairs)
1686:         {gs_gop_pairwise_max_abs(gs,vals);}
1687: 
1688:       /* tree */
1689:       else if (gs->max_left_over)
1690:         {gs_gop_tree_max_abs(gs,vals);}
1691: 
1692:       gs_gop_local_out(gs,vals);
1693:     }
1694:   /* if intersection tree/pairwise and local is empty */
1695:   else
1696:     {
1697:       /* pairwise */
1698:       if (gs->num_pairs)
1699:         {gs_gop_pairwise_max_abs(gs,vals);}
1700: 
1701:       /* tree */
1702:       else if (gs->max_left_over)
1703:         {gs_gop_tree_max_abs(gs,vals);}
1704:     }
1705: }



1709: /******************************************************************************
1710: Function: gather_scatter

1712: Input : 
1713: Output: 
1714: Return: 
1715: Description: 
1716: ******************************************************************************/
1717: static
1718: void
1719: gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1720: {
1721:    int *num, *map, **reduce;
1722:    PetscScalar tmp;


1725:   num    = gs->num_local_reduce;
1726:   reduce = gs->local_reduce;
1727:   while ((map = *reduce))
1728:     {
1729:       num ++;
1730:       tmp = 0.0;
1731:       while (*map >= 0)
1732:         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1733: 
1734:       map = *reduce++;
1735:       while (*map >= 0)
1736:         {*(vals + *map++) = tmp;}
1737:     }
1738: }



1742: /******************************************************************************
1743: Function: gather_scatter

1745: Input : 
1746: Output: 
1747: Return: 
1748: Description: 
1749: ******************************************************************************/
1750: static
1751: void
1752: gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1753: {
1754:    int *num, *map, **reduce;
1755:    PetscScalar *base;


1758:   num    = gs->num_gop_local_reduce;
1759:   reduce = gs->gop_local_reduce;
1760:   while ((map = *reduce++))
1761:     {
1762:       num++;
1763:       base = vals + *map++;
1764:       while (*map >= 0)
1765:         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1766:     }
1767: }



1771: /******************************************************************************
1772: Function: gather_scatter

1774: VERSION 3 :: 

1776: Input : 
1777: Output: 
1778: Return: 
1779: Description: 
1780: ******************************************************************************/
1781: static
1782: void
1783: gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1784: {
1785:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1786:    int *iptr, *msg_list, *msg_size, **msg_nodes;
1787:    int *pw, *list, *size, **nodes;
1788:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1789:   MPI_Status status;


1792:   /* strip and load s */
1793:   msg_list =list         = gs->pair_list;
1794:   msg_size =size         = gs->msg_sizes;
1795:   msg_nodes=nodes        = gs->node_list;
1796:   iptr=pw                = gs->pw_elm_list;
1797:   dptr1=dptr3            = gs->pw_vals;
1798:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1799:   msg_ids_out = ids_out  = gs->msg_ids_out;
1800:   dptr2                  = gs->out;
1801:   in1=in2                = gs->in;

1803:   /* post the receives */
1804:   /*  msg_nodes=nodes; */
1805:   do
1806:     {
1807:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1808:          second one *list and do list++ afterwards */
1809:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1810:                 gs->gs_comm, msg_ids_in++);
1811:       in1 += *size++;
1812:     }
1813:   while (*++msg_nodes);
1814:   msg_nodes=nodes;

1816:   /* load gs values into in out gs buffers */
1817:   while (*iptr >= 0)
1818:     {*dptr3++ = *(in_vals + *iptr++);}

1820:   /* load out buffers and post the sends */
1821:   while ((iptr = *msg_nodes++))
1822:     {
1823:       dptr3 = dptr2;
1824:       while (*iptr >= 0)
1825:         {*dptr2++ = *(dptr1 + *iptr++);}
1826:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1827:       /* is msg_ids_out++ correct? */
1828:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1829:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1830:     }

1832:   if (gs->max_left_over)
1833:     {gs_gop_tree_max_abs(gs,in_vals);}

1835:   /* process the received data */
1836:   msg_nodes=nodes;
1837:   while ((iptr = *nodes++))
1838:     {
1839:       /* Should I check the return value of MPI_Wait() or status? */
1840:       /* Can this loop be replaced by a call to MPI_Waitall()? */
1841:       MPI_Wait(ids_in++, &status);
1842:       while (*iptr >= 0)
1843:         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1844:     }

1846:   /* replace vals */
1847:   while (*pw >= 0)
1848:     {*(in_vals + *pw++) = *dptr1++;}

1850:   /* clear isend message handles */
1851:   /* This changed for clarity though it could be the same */
1852:   while (*msg_nodes++)
1853:     /* Should I check the return value of MPI_Wait() or status? */
1854:     /* Can this loop be replaced by a call to MPI_Waitall()? */
1855:     {MPI_Wait(ids_out++, &status);}
1856: }



1860: /******************************************************************************
1861: Function: gather_scatter

1863: Input : 
1864: Output: 
1865: Return: 
1866: Description: 
1867: ******************************************************************************/
1868: static
1869: void
1870: gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1871: {
1872:   int size;
1873:   int *in, *out;
1874:   PetscScalar *buf, *work;
1875:   int op[] = {GL_MAX_ABS,0};


1878:   in   = gs->tree_map_in;
1879:   out  = gs->tree_map_out;
1880:   buf  = gs->tree_buf;
1881:   work = gs->tree_work;
1882:   size = gs->tree_nel;

1884:   rvec_zero(buf,size);

1886:   while (*in >= 0)
1887:     {
1888:       /*
1889:       printf("%d :: out=%d\n",my_id,*out);
1890:       printf("%d :: in=%d\n",my_id,*in);
1891:       */
1892:       *(buf + *out++) = *(vals + *in++);
1893:     }

1895:   grop(buf,work,size,op);

1897:   in   = gs->tree_map_in;
1898:   out  = gs->tree_map_out;

1900:   while (*in >= 0)
1901:     {*(vals + *in++) = *(buf + *out++);}

1903: }



1907: /******************************************************************************
1908: Function: gather_scatter

1910: Input : 
1911: Output: 
1912: Return: 
1913: Description: 
1914: ******************************************************************************/
1915: static void
1916: gs_gop_max( gs_id *gs,  PetscScalar *vals)
1917: {

1919:   /* local only operations!!! */
1920:   if (gs->num_local)
1921:     {gs_gop_local_max(gs,vals);}

1923:   /* if intersection tree/pairwise and local isn't empty */
1924:   if (gs->num_local_gop)
1925:     {
1926:       gs_gop_local_in_max(gs,vals);

1928:       /* pairwise */
1929:       if (gs->num_pairs)
1930:         {gs_gop_pairwise_max(gs,vals);}
1931: 
1932:       /* tree */
1933:       else if (gs->max_left_over)
1934:         {gs_gop_tree_max(gs,vals);}
1935: 
1936:       gs_gop_local_out(gs,vals);
1937:     }
1938:   /* if intersection tree/pairwise and local is empty */
1939:   else
1940:     {
1941:       /* pairwise */
1942:       if (gs->num_pairs)
1943:         {gs_gop_pairwise_max(gs,vals);}
1944: 
1945:       /* tree */
1946:       else if (gs->max_left_over)
1947:         {gs_gop_tree_max(gs,vals);}
1948:     }
1949: }



1953: /******************************************************************************
1954: Function: gather_scatter

1956: Input : 
1957: Output: 
1958: Return: 
1959: Description: 
1960: ******************************************************************************/
1961: static
1962: void
1963: gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1964: {
1965:    int *num, *map, **reduce;
1966:    PetscScalar tmp;


1969:   num    = gs->num_local_reduce;
1970:   reduce = gs->local_reduce;
1971:   while ((map = *reduce))
1972:     {
1973:       num ++;
1974:       tmp = -REAL_MAX;
1975:       while (*map >= 0)
1976:         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1977: 
1978:       map = *reduce++;
1979:       while (*map >= 0)
1980:         {*(vals + *map++) = tmp;}
1981:     }
1982: }



1986: /******************************************************************************
1987: Function: gather_scatter

1989: Input : 
1990: Output: 
1991: Return: 
1992: Description: 
1993: ******************************************************************************/
1994: static
1995: void
1996: gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1997: {
1998:    int *num, *map, **reduce;
1999:    PetscScalar *base;


2002:   num    = gs->num_gop_local_reduce;
2003:   reduce = gs->gop_local_reduce;
2004:   while ((map = *reduce++))
2005:     {
2006:       num++;
2007:       base = vals + *map++;
2008:       while (*map >= 0)
2009:         {*base = PetscMax(*base,*(vals + *map)); map++;}
2010:     }
2011: }



2015: /******************************************************************************
2016: Function: gather_scatter

2018: VERSION 3 :: 

2020: Input : 
2021: Output: 
2022: Return: 
2023: Description: 
2024: ******************************************************************************/
2025: static
2026: void
2027: gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2028: {
2029:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2030:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2031:    int *pw, *list, *size, **nodes;
2032:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2033:   MPI_Status status;


2036:   /* strip and load s */
2037:   msg_list =list         = gs->pair_list;
2038:   msg_size =size         = gs->msg_sizes;
2039:   msg_nodes=nodes        = gs->node_list;
2040:   iptr=pw                = gs->pw_elm_list;
2041:   dptr1=dptr3            = gs->pw_vals;
2042:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2043:   msg_ids_out = ids_out  = gs->msg_ids_out;
2044:   dptr2                  = gs->out;
2045:   in1=in2                = gs->in;

2047:   /* post the receives */
2048:   /*  msg_nodes=nodes; */
2049:   do
2050:     {
2051:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2052:          second one *list and do list++ afterwards */
2053:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2054:                 gs->gs_comm, msg_ids_in++);
2055:       in1 += *size++;
2056:     }
2057:   while (*++msg_nodes);
2058:   msg_nodes=nodes;

2060:   /* load gs values into in out gs buffers */
2061:   while (*iptr >= 0)
2062:     {*dptr3++ = *(in_vals + *iptr++);}

2064:   /* load out buffers and post the sends */
2065:   while ((iptr = *msg_nodes++))
2066:     {
2067:       dptr3 = dptr2;
2068:       while (*iptr >= 0)
2069:         {*dptr2++ = *(dptr1 + *iptr++);}
2070:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2071:       /* is msg_ids_out++ correct? */
2072:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2073:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2074:     }

2076:   if (gs->max_left_over)
2077:     {gs_gop_tree_max(gs,in_vals);}

2079:   /* process the received data */
2080:   msg_nodes=nodes;
2081:   while ((iptr = *nodes++))
2082:     {
2083:       /* Should I check the return value of MPI_Wait() or status? */
2084:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2085:       MPI_Wait(ids_in++, &status);
2086:       while (*iptr >= 0)
2087:         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2088:     }

2090:   /* replace vals */
2091:   while (*pw >= 0)
2092:     {*(in_vals + *pw++) = *dptr1++;}

2094:   /* clear isend message handles */
2095:   /* This changed for clarity though it could be the same */
2096:   while (*msg_nodes++)
2097:     /* Should I check the return value of MPI_Wait() or status? */
2098:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2099:     {MPI_Wait(ids_out++, &status);}
2100: }



2104: /******************************************************************************
2105: Function: gather_scatter

2107: Input : 
2108: Output: 
2109: Return: 
2110: Description: 
2111: ******************************************************************************/
2112: static
2113: void
2114: gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2115: {
2116:   int size;
2117:   int *in, *out;
2118:   PetscScalar *buf, *work;
2119: 
2120:   in   = gs->tree_map_in;
2121:   out  = gs->tree_map_out;
2122:   buf  = gs->tree_buf;
2123:   work = gs->tree_work;
2124:   size = gs->tree_nel;

2126:   rvec_set(buf,-REAL_MAX,size);

2128:   while (*in >= 0)
2129:     {*(buf + *out++) = *(vals + *in++);}

2131:   in   = gs->tree_map_in;
2132:   out  = gs->tree_map_out;
2133:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2134:   while (*in >= 0)
2135:     {*(vals + *in++) = *(work + *out++);}

2137: }



2141: /******************************************************************************
2142: Function: gather_scatter

2144: Input : 
2145: Output: 
2146: Return: 
2147: Description: 
2148: ******************************************************************************/
2149: static void
2150: gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2151: {

2153:   /* local only operations!!! */
2154:   if (gs->num_local)
2155:     {gs_gop_local_min_abs(gs,vals);}

2157:   /* if intersection tree/pairwise and local isn't empty */
2158:   if (gs->num_local_gop)
2159:     {
2160:       gs_gop_local_in_min_abs(gs,vals);

2162:       /* pairwise */
2163:       if (gs->num_pairs)
2164:         {gs_gop_pairwise_min_abs(gs,vals);}
2165: 
2166:       /* tree */
2167:       else if (gs->max_left_over)
2168:         {gs_gop_tree_min_abs(gs,vals);}
2169: 
2170:       gs_gop_local_out(gs,vals);
2171:     }
2172:   /* if intersection tree/pairwise and local is empty */
2173:   else
2174:     {
2175:       /* pairwise */
2176:       if (gs->num_pairs)
2177:         {gs_gop_pairwise_min_abs(gs,vals);}
2178: 
2179:       /* tree */
2180:       else if (gs->max_left_over)
2181:         {gs_gop_tree_min_abs(gs,vals);}
2182:     }
2183: }



2187: /******************************************************************************
2188: Function: gather_scatter

2190: Input : 
2191: Output: 
2192: Return: 
2193: Description: 
2194: ******************************************************************************/
2195: static
2196: void
2197: gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2198: {
2199:    int *num, *map, **reduce;
2200:    PetscScalar tmp;


2203:   num    = gs->num_local_reduce;
2204:   reduce = gs->local_reduce;
2205:   while ((map = *reduce))
2206:     {
2207:       num ++;
2208:       tmp = REAL_MAX;
2209:       while (*map >= 0)
2210:         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2211: 
2212:       map = *reduce++;
2213:       while (*map >= 0)
2214:         {*(vals + *map++) = tmp;}
2215:     }
2216: }



2220: /******************************************************************************
2221: Function: gather_scatter

2223: Input : 
2224: Output: 
2225: Return: 
2226: Description: 
2227: ******************************************************************************/
2228: static
2229: void
2230: gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2231: {
2232:    int *num, *map, **reduce;
2233:    PetscScalar *base;

2235:   num    = gs->num_gop_local_reduce;
2236:   reduce = gs->gop_local_reduce;
2237:   while ((map = *reduce++))
2238:     {
2239:       num++;
2240:       base = vals + *map++;
2241:       while (*map >= 0)
2242:         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2243:     }
2244: }



2248: /******************************************************************************
2249: Function: gather_scatter

2251: VERSION 3 :: 

2253: Input : 
2254: Output: 
2255: Return: 
2256: Description: 
2257: ******************************************************************************/
2258: static
2259: void
2260: gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2261: {
2262:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2263:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2264:    int *pw, *list, *size, **nodes;
2265:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2266:   MPI_Status status;


2269:   /* strip and load s */
2270:   msg_list =list         = gs->pair_list;
2271:   msg_size =size         = gs->msg_sizes;
2272:   msg_nodes=nodes        = gs->node_list;
2273:   iptr=pw                = gs->pw_elm_list;
2274:   dptr1=dptr3            = gs->pw_vals;
2275:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2276:   msg_ids_out = ids_out  = gs->msg_ids_out;
2277:   dptr2                  = gs->out;
2278:   in1=in2                = gs->in;

2280:   /* post the receives */
2281:   /*  msg_nodes=nodes; */
2282:   do
2283:     {
2284:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2285:          second one *list and do list++ afterwards */
2286:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2287:                 gs->gs_comm, msg_ids_in++);
2288:       in1 += *size++;
2289:     }
2290:   while (*++msg_nodes);
2291:   msg_nodes=nodes;

2293:   /* load gs values into in out gs buffers */
2294:   while (*iptr >= 0)
2295:     {*dptr3++ = *(in_vals + *iptr++);}

2297:   /* load out buffers and post the sends */
2298:   while ((iptr = *msg_nodes++))
2299:     {
2300:       dptr3 = dptr2;
2301:       while (*iptr >= 0)
2302:         {*dptr2++ = *(dptr1 + *iptr++);}
2303:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2304:       /* is msg_ids_out++ correct? */
2305:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2306:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2307:     }

2309:   if (gs->max_left_over)
2310:     {gs_gop_tree_min_abs(gs,in_vals);}

2312:   /* process the received data */
2313:   msg_nodes=nodes;
2314:   while ((iptr = *nodes++))
2315:     {
2316:       /* Should I check the return value of MPI_Wait() or status? */
2317:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2318:       MPI_Wait(ids_in++, &status);
2319:       while (*iptr >= 0)
2320:         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2321:     }

2323:   /* replace vals */
2324:   while (*pw >= 0)
2325:     {*(in_vals + *pw++) = *dptr1++;}

2327:   /* clear isend message handles */
2328:   /* This changed for clarity though it could be the same */
2329:   while (*msg_nodes++)
2330:     /* Should I check the return value of MPI_Wait() or status? */
2331:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2332:     {MPI_Wait(ids_out++, &status);}
2333: }



2337: /******************************************************************************
2338: Function: gather_scatter

2340: Input : 
2341: Output: 
2342: Return: 
2343: Description: 
2344: ******************************************************************************/
2345: static
2346: void
2347: gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2348: {
2349:   int size;
2350:   int *in, *out;
2351:   PetscScalar *buf, *work;
2352:   int op[] = {GL_MIN_ABS,0};


2355:   in   = gs->tree_map_in;
2356:   out  = gs->tree_map_out;
2357:   buf  = gs->tree_buf;
2358:   work = gs->tree_work;
2359:   size = gs->tree_nel;

2361:   rvec_set(buf,REAL_MAX,size);

2363:   while (*in >= 0)
2364:     {*(buf + *out++) = *(vals + *in++);}

2366:   in   = gs->tree_map_in;
2367:   out  = gs->tree_map_out;
2368:   grop(buf,work,size,op);
2369:   while (*in >= 0)
2370:     {*(vals + *in++) = *(buf + *out++);}

2372: }



2376: /******************************************************************************
2377: Function: gather_scatter

2379: Input : 
2380: Output: 
2381: Return: 
2382: Description: 
2383: ******************************************************************************/
2384: static void
2385: gs_gop_min( gs_id *gs,  PetscScalar *vals)
2386: {

2388:   /* local only operations!!! */
2389:   if (gs->num_local)
2390:     {gs_gop_local_min(gs,vals);}

2392:   /* if intersection tree/pairwise and local isn't empty */
2393:   if (gs->num_local_gop)
2394:     {
2395:       gs_gop_local_in_min(gs,vals);

2397:       /* pairwise */
2398:       if (gs->num_pairs)
2399:         {gs_gop_pairwise_min(gs,vals);}
2400: 
2401:       /* tree */
2402:       else if (gs->max_left_over)
2403:         {gs_gop_tree_min(gs,vals);}
2404: 
2405:       gs_gop_local_out(gs,vals);
2406:     }
2407:   /* if intersection tree/pairwise and local is empty */
2408:   else
2409:     {
2410:       /* pairwise */
2411:       if (gs->num_pairs)
2412:         {gs_gop_pairwise_min(gs,vals);}
2413: 
2414:       /* tree */
2415:       else if (gs->max_left_over)
2416:         {gs_gop_tree_min(gs,vals);}
2417:     }
2418: }



2422: /******************************************************************************
2423: Function: gather_scatter

2425: Input : 
2426: Output: 
2427: Return: 
2428: Description: 
2429: ******************************************************************************/
2430: static
2431: void
2432: gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2433: {
2434:    int *num, *map, **reduce;
2435:    PetscScalar tmp;

2437:   num    = gs->num_local_reduce;
2438:   reduce = gs->local_reduce;
2439:   while ((map = *reduce))
2440:     {
2441:       num ++;
2442:       tmp = REAL_MAX;
2443:       while (*map >= 0)
2444:         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2445: 
2446:       map = *reduce++;
2447:       while (*map >= 0)
2448:         {*(vals + *map++) = tmp;}
2449:     }
2450: }



2454: /******************************************************************************
2455: Function: gather_scatter

2457: Input : 
2458: Output: 
2459: Return: 
2460: Description: 
2461: ******************************************************************************/
2462: static
2463: void
2464: gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2465: {
2466:    int *num, *map, **reduce;
2467:    PetscScalar *base;

2469:   num    = gs->num_gop_local_reduce;
2470:   reduce = gs->gop_local_reduce;
2471:   while ((map = *reduce++))
2472:     {
2473:       num++;
2474:       base = vals + *map++;
2475:       while (*map >= 0)
2476:         {*base = PetscMin(*base,*(vals + *map)); map++;}
2477:     }
2478: }



2482: /******************************************************************************
2483: Function: gather_scatter

2485: VERSION 3 :: 

2487: Input : 
2488: Output: 
2489: Return: 
2490: Description: 
2491: ******************************************************************************/
2492: static
2493: void
2494: gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2495: {
2496:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2497:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2498:    int *pw, *list, *size, **nodes;
2499:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2500:   MPI_Status status;


2503:   /* strip and load s */
2504:   msg_list =list         = gs->pair_list;
2505:   msg_size =size         = gs->msg_sizes;
2506:   msg_nodes=nodes        = gs->node_list;
2507:   iptr=pw                = gs->pw_elm_list;
2508:   dptr1=dptr3            = gs->pw_vals;
2509:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2510:   msg_ids_out = ids_out  = gs->msg_ids_out;
2511:   dptr2                  = gs->out;
2512:   in1=in2                = gs->in;

2514:   /* post the receives */
2515:   /*  msg_nodes=nodes; */
2516:   do
2517:     {
2518:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2519:          second one *list and do list++ afterwards */
2520:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2521:                 gs->gs_comm, msg_ids_in++);
2522:       in1 += *size++;
2523:     }
2524:   while (*++msg_nodes);
2525:   msg_nodes=nodes;

2527:   /* load gs values into in out gs buffers */
2528:   while (*iptr >= 0)
2529:     {*dptr3++ = *(in_vals + *iptr++);}

2531:   /* load out buffers and post the sends */
2532:   while ((iptr = *msg_nodes++))
2533:     {
2534:       dptr3 = dptr2;
2535:       while (*iptr >= 0)
2536:         {*dptr2++ = *(dptr1 + *iptr++);}
2537:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2538:       /* is msg_ids_out++ correct? */
2539:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2540:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2541:     }

2543:   /* process the received data */
2544:   if (gs->max_left_over)
2545:     {gs_gop_tree_min(gs,in_vals);}

2547:   msg_nodes=nodes;
2548:   while ((iptr = *nodes++))
2549:     {
2550:       /* Should I check the return value of MPI_Wait() or status? */
2551:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2552:       MPI_Wait(ids_in++, &status);
2553:       while (*iptr >= 0)
2554:         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2555:     }

2557:   /* replace vals */
2558:   while (*pw >= 0)
2559:     {*(in_vals + *pw++) = *dptr1++;}

2561:   /* clear isend message handles */
2562:   /* This changed for clarity though it could be the same */
2563:   while (*msg_nodes++)
2564:     /* Should I check the return value of MPI_Wait() or status? */
2565:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2566:     {MPI_Wait(ids_out++, &status);}
2567: }



2571: /******************************************************************************
2572: Function: gather_scatter

2574: Input : 
2575: Output: 
2576: Return: 
2577: Description: 
2578: ******************************************************************************/
2579: static
2580: void
2581: gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2582: {
2583:   int size;
2584:   int *in, *out;
2585:   PetscScalar *buf, *work;
2586: 
2587:   in   = gs->tree_map_in;
2588:   out  = gs->tree_map_out;
2589:   buf  = gs->tree_buf;
2590:   work = gs->tree_work;
2591:   size = gs->tree_nel;

2593:   rvec_set(buf,REAL_MAX,size);

2595:   while (*in >= 0)
2596:     {*(buf + *out++) = *(vals + *in++);}

2598:   in   = gs->tree_map_in;
2599:   out  = gs->tree_map_out;
2600:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2601:   while (*in >= 0)
2602:     {*(vals + *in++) = *(work + *out++);}
2603: }



2607: /******************************************************************************
2608: Function: gather_scatter

2610: Input : 
2611: Output: 
2612: Return: 
2613: Description: 
2614: ******************************************************************************/
2615: static void
2616: gs_gop_times( gs_id *gs,  PetscScalar *vals)
2617: {

2619:   /* local only operations!!! */
2620:   if (gs->num_local)
2621:     {gs_gop_local_times(gs,vals);}

2623:   /* if intersection tree/pairwise and local isn't empty */
2624:   if (gs->num_local_gop)
2625:     {
2626:       gs_gop_local_in_times(gs,vals);

2628:       /* pairwise */
2629:       if (gs->num_pairs)
2630:         {gs_gop_pairwise_times(gs,vals);}
2631: 
2632:       /* tree */
2633:       else if (gs->max_left_over)
2634:         {gs_gop_tree_times(gs,vals);}
2635: 
2636:       gs_gop_local_out(gs,vals);
2637:     }
2638:   /* if intersection tree/pairwise and local is empty */
2639:   else
2640:     {
2641:       /* pairwise */
2642:       if (gs->num_pairs)
2643:         {gs_gop_pairwise_times(gs,vals);}
2644: 
2645:       /* tree */
2646:       else if (gs->max_left_over)
2647:         {gs_gop_tree_times(gs,vals);}
2648:     }
2649: }



2653: /******************************************************************************
2654: Function: gather_scatter

2656: Input : 
2657: Output: 
2658: Return: 
2659: Description: 
2660: ******************************************************************************/
2661: static
2662: void
2663: gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2664: {
2665:    int *num, *map, **reduce;
2666:    PetscScalar tmp;

2668:   num    = gs->num_local_reduce;
2669:   reduce = gs->local_reduce;
2670:   while ((map = *reduce))
2671:     {
2672:       /* wall */
2673:       if (*num == 2)
2674:         {
2675:           num ++; reduce++;
2676:           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2677:         }
2678:       /* corner shared by three elements */
2679:       else if (*num == 3)
2680:         {
2681:           num ++; reduce++;
2682:           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2683:         }
2684:       /* corner shared by four elements */
2685:       else if (*num == 4)
2686:         {
2687:           num ++; reduce++;
2688:           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2689:                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2690:         }
2691:       /* general case ... odd geoms ... 3D*/
2692:       else
2693:         {
2694:           num ++;
2695:           tmp = 1.0;
2696:           while (*map >= 0)
2697:             {tmp *= *(vals + *map++);}

2699:           map = *reduce++;
2700:           while (*map >= 0)
2701:             {*(vals + *map++) = tmp;}
2702:         }
2703:     }
2704: }



2708: /******************************************************************************
2709: Function: gather_scatter

2711: Input : 
2712: Output: 
2713: Return: 
2714: Description: 
2715: ******************************************************************************/
2716: static
2717: void
2718: gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2719: {
2720:    int *num, *map, **reduce;
2721:    PetscScalar *base;

2723:   num    = gs->num_gop_local_reduce;
2724:   reduce = gs->gop_local_reduce;
2725:   while ((map = *reduce++))
2726:     {
2727:       /* wall */
2728:       if (*num == 2)
2729:         {
2730:           num ++;
2731:           vals[map[0]] *= vals[map[1]];
2732:         }
2733:       /* corner shared by three elements */
2734:       else if (*num == 3)
2735:         {
2736:           num ++;
2737:           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2738:         }
2739:       /* corner shared by four elements */
2740:       else if (*num == 4)
2741:         {
2742:           num ++;
2743:           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2744:         }
2745:       /* general case ... odd geoms ... 3D*/
2746:       else
2747:         {
2748:           num++;
2749:           base = vals + *map++;
2750:           while (*map >= 0)
2751:             {*base *= *(vals + *map++);}
2752:         }
2753:     }
2754: }



2758: /******************************************************************************
2759: Function: gather_scatter

2761: VERSION 3 :: 

2763: Input : 
2764: Output: 
2765: Return: 
2766: Description: 
2767: ******************************************************************************/
2768: static
2769: void
2770: gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2771: {
2772:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2773:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2774:    int *pw, *list, *size, **nodes;
2775:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2776:   MPI_Status status;


2779:   /* strip and load s */
2780:   msg_list =list         = gs->pair_list;
2781:   msg_size =size         = gs->msg_sizes;
2782:   msg_nodes=nodes        = gs->node_list;
2783:   iptr=pw                = gs->pw_elm_list;
2784:   dptr1=dptr3            = gs->pw_vals;
2785:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2786:   msg_ids_out = ids_out  = gs->msg_ids_out;
2787:   dptr2                  = gs->out;
2788:   in1=in2                = gs->in;

2790:   /* post the receives */
2791:   /*  msg_nodes=nodes; */
2792:   do
2793:     {
2794:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2795:          second one *list and do list++ afterwards */
2796:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2797:                 gs->gs_comm, msg_ids_in++);
2798:       in1 += *size++;
2799:     }
2800:   while (*++msg_nodes);
2801:   msg_nodes=nodes;

2803:   /* load gs values into in out gs buffers */
2804:   while (*iptr >= 0)
2805:     {*dptr3++ = *(in_vals + *iptr++);}

2807:   /* load out buffers and post the sends */
2808:   while ((iptr = *msg_nodes++))
2809:     {
2810:       dptr3 = dptr2;
2811:       while (*iptr >= 0)
2812:         {*dptr2++ = *(dptr1 + *iptr++);}
2813:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2814:       /* is msg_ids_out++ correct? */
2815:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2816:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2817:     }

2819:   if (gs->max_left_over)
2820:     {gs_gop_tree_times(gs,in_vals);}

2822:   /* process the received data */
2823:   msg_nodes=nodes;
2824:   while ((iptr = *nodes++))
2825:     {
2826:       /* Should I check the return value of MPI_Wait() or status? */
2827:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2828:       MPI_Wait(ids_in++, &status);
2829:       while (*iptr >= 0)
2830:         {*(dptr1 + *iptr++) *= *in2++;}
2831:     }

2833:   /* replace vals */
2834:   while (*pw >= 0)
2835:     {*(in_vals + *pw++) = *dptr1++;}

2837:   /* clear isend message handles */
2838:   /* This changed for clarity though it could be the same */
2839:   while (*msg_nodes++)
2840:     /* Should I check the return value of MPI_Wait() or status? */
2841:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2842:     {MPI_Wait(ids_out++, &status);}
2843: }



2847: /******************************************************************************
2848: Function: gather_scatter

2850: Input : 
2851: Output: 
2852: Return: 
2853: Description: 
2854: ******************************************************************************/
2855: static
2856: void
2857: gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2858: {
2859:   int size;
2860:   int *in, *out;
2861:   PetscScalar *buf, *work;
2862: 
2863:   in   = gs->tree_map_in;
2864:   out  = gs->tree_map_out;
2865:   buf  = gs->tree_buf;
2866:   work = gs->tree_work;
2867:   size = gs->tree_nel;

2869:   rvec_one(buf,size);

2871:   while (*in >= 0)
2872:     {*(buf + *out++) = *(vals + *in++);}

2874:   in   = gs->tree_map_in;
2875:   out  = gs->tree_map_out;
2876:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2877:   while (*in >= 0)
2878:     {*(vals + *in++) = *(work + *out++);}

2880: }



2884: /******************************************************************************
2885: Function: gather_scatter


2888: Input : 
2889: Output: 
2890: Return: 
2891: Description: 
2892: ******************************************************************************/
2893: static void
2894: gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2895: {

2897:   /* local only operations!!! */
2898:   if (gs->num_local)
2899:     {gs_gop_local_plus(gs,vals);}

2901:   /* if intersection tree/pairwise and local isn't empty */
2902:   if (gs->num_local_gop)
2903:     {
2904:       gs_gop_local_in_plus(gs,vals);

2906:       /* pairwise will NOT do tree inside ... */
2907:       if (gs->num_pairs)
2908:         {gs_gop_pairwise_plus(gs,vals);}

2910:       /* tree */
2911:       if (gs->max_left_over)
2912:         {gs_gop_tree_plus(gs,vals);}
2913: 
2914:       gs_gop_local_out(gs,vals);
2915:     }
2916:   /* if intersection tree/pairwise and local is empty */
2917:   else
2918:     {
2919:       /* pairwise will NOT do tree inside */
2920:       if (gs->num_pairs)
2921:         {gs_gop_pairwise_plus(gs,vals);}
2922: 
2923:       /* tree */
2924:       if (gs->max_left_over)
2925:         {gs_gop_tree_plus(gs,vals);}
2926:     }

2928: }



2932: /******************************************************************************
2933: Function: gather_scatter

2935: Input : 
2936: Output: 
2937: Return: 
2938: Description: 
2939: ******************************************************************************/
2940: static
2941: void
2942: gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2943: {
2944:    int *num, *map, **reduce;
2945:    PetscScalar tmp;


2948:   num    = gs->num_local_reduce;
2949:   reduce = gs->local_reduce;
2950:   while ((map = *reduce))
2951:     {
2952:       /* wall */
2953:       if (*num == 2)
2954:         {
2955:           num ++; reduce++;
2956:           vals[map[1]] = vals[map[0]] += vals[map[1]];
2957:         }
2958:       /* corner shared by three elements */
2959:       else if (*num == 3)
2960:         {
2961:           num ++; reduce++;
2962:           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2963:         }
2964:       /* corner shared by four elements */
2965:       else if (*num == 4)
2966:         {
2967:           num ++; reduce++;
2968:           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2969:                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2970:         }
2971:       /* general case ... odd geoms ... 3D*/
2972:       else
2973:         {
2974:           num ++;
2975:           tmp = 0.0;
2976:           while (*map >= 0)
2977:             {tmp += *(vals + *map++);}

2979:           map = *reduce++;
2980:           while (*map >= 0)
2981:             {*(vals + *map++) = tmp;}
2982:         }
2983:     }
2984: }



2988: /******************************************************************************
2989: Function: gather_scatter

2991: Input : 
2992: Output: 
2993: Return: 
2994: Description: 
2995: ******************************************************************************/
2996: static
2997: void
2998: gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
2999: {
3000:    int *num, *map, **reduce;
3001:    PetscScalar *base;


3004:   num    = gs->num_gop_local_reduce;
3005:   reduce = gs->gop_local_reduce;
3006:   while ((map = *reduce++))
3007:     {
3008:       /* wall */
3009:       if (*num == 2)
3010:         {
3011:           num ++;
3012:           vals[map[0]] += vals[map[1]];
3013:         }
3014:       /* corner shared by three elements */
3015:       else if (*num == 3)
3016:         {
3017:           num ++;
3018:           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3019:         }
3020:       /* corner shared by four elements */
3021:       else if (*num == 4)
3022:         {
3023:           num ++;
3024:           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3025:         }
3026:       /* general case ... odd geoms ... 3D*/
3027:       else
3028:         {
3029:           num++;
3030:           base = vals + *map++;
3031:           while (*map >= 0)
3032:             {*base += *(vals + *map++);}
3033:         }
3034:     }
3035: }



3039: /******************************************************************************
3040: Function: gather_scatter

3042: VERSION 3 :: 

3044: Input : 
3045: Output: 
3046: Return: 
3047: Description: 
3048: ******************************************************************************/
3049: static
3050: void
3051: gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3052: {
3053:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3054:    int *iptr, *msg_list, *msg_size, **msg_nodes;
3055:    int *pw, *list, *size, **nodes;
3056:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3057:   MPI_Status status;


3060:   /* strip and load s */
3061:   msg_list =list         = gs->pair_list;
3062:   msg_size =size         = gs->msg_sizes;
3063:   msg_nodes=nodes        = gs->node_list;
3064:   iptr=pw                = gs->pw_elm_list;
3065:   dptr1=dptr3            = gs->pw_vals;
3066:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3067:   msg_ids_out = ids_out  = gs->msg_ids_out;
3068:   dptr2                  = gs->out;
3069:   in1=in2                = gs->in;

3071:   /* post the receives */
3072:   /*  msg_nodes=nodes; */
3073:   do
3074:     {
3075:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3076:          second one *list and do list++ afterwards */
3077:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3078:                 gs->gs_comm, msg_ids_in++);
3079:       in1 += *size++;
3080:     }
3081:   while (*++msg_nodes);
3082:   msg_nodes=nodes;

3084:   /* load gs values into in out gs buffers */
3085:   while (*iptr >= 0)
3086:     {*dptr3++ = *(in_vals + *iptr++);}

3088:   /* load out buffers and post the sends */
3089:   while ((iptr = *msg_nodes++))
3090:     {
3091:       dptr3 = dptr2;
3092:       while (*iptr >= 0)
3093:         {*dptr2++ = *(dptr1 + *iptr++);}
3094:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3095:       /* is msg_ids_out++ correct? */
3096:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3097:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3098:     }

3100:   /* do the tree while we're waiting */
3101:   if (gs->max_left_over)
3102:     {gs_gop_tree_plus(gs,in_vals);}

3104:   /* process the received data */
3105:   msg_nodes=nodes;
3106:   while ((iptr = *nodes++))
3107:     {
3108:       /* Should I check the return value of MPI_Wait() or status? */
3109:       /* Can this loop be replaced by a call to MPI_Waitall()? */
3110:       MPI_Wait(ids_in++, &status);
3111:       while (*iptr >= 0)
3112:         {*(dptr1 + *iptr++) += *in2++;}
3113:     }

3115:   /* replace vals */
3116:   while (*pw >= 0)
3117:     {*(in_vals + *pw++) = *dptr1++;}

3119:   /* clear isend message handles */
3120:   /* This changed for clarity though it could be the same */
3121:   while (*msg_nodes++)
3122:     /* Should I check the return value of MPI_Wait() or status? */
3123:     /* Can this loop be replaced by a call to MPI_Waitall()? */
3124:     {MPI_Wait(ids_out++, &status);}

3126: }



3130: /******************************************************************************
3131: Function: gather_scatter

3133: Input : 
3134: Output: 
3135: Return: 
3136: Description: 
3137: ******************************************************************************/
3138: static
3139: void
3140: gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3141: {
3142:   int size;
3143:   int *in, *out;
3144:   PetscScalar *buf, *work;
3145: 
3146:   in   = gs->tree_map_in;
3147:   out  = gs->tree_map_out;
3148:   buf  = gs->tree_buf;
3149:   work = gs->tree_work;
3150:   size = gs->tree_nel;

3152:   rvec_zero(buf,size);

3154:   while (*in >= 0)
3155:     {*(buf + *out++) = *(vals + *in++);}

3157:   in   = gs->tree_map_in;
3158:   out  = gs->tree_map_out;
3159:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3160:   while (*in >= 0)
3161:     {*(vals + *in++) = *(work + *out++);}

3163: }

3165: /******************************************************************************
3166: Function: gs_free()

3168: Input : 

3170: Output: 

3172: Return: 

3174: Description:  
3175:   if (gs->sss) {free((void*) gs->sss);}
3176: ******************************************************************************/
3177: void
3178: gs_free( gs_id *gs)
3179: {
3180:    int i;


3183:   if (gs->nghs) {free((void*) gs->nghs);}
3184:   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}

3186:   /* tree */
3187:   if (gs->max_left_over)
3188:     {
3189:       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3190:       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3191:       if (gs->tree_work) {free((void*) gs->tree_work);}
3192:       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3193:       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3194:     }

3196:   /* pairwise info */
3197:   if (gs->num_pairs)
3198:     {
3199:       /* should be NULL already */
3200:       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3201:       if (gs->elms) {free((void*) gs->elms);}
3202:       if (gs->local_elms) {free((void*) gs->local_elms);}
3203:       if (gs->companion) {free((void*) gs->companion);}
3204: 
3205:       /* only set if pairwise */
3206:       if (gs->vals) {free((void*) gs->vals);}
3207:       if (gs->in) {free((void*) gs->in);}
3208:       if (gs->out) {free((void*) gs->out);}
3209:       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3210:       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3211:       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3212:       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3213:       if (gs->node_list)
3214:         {
3215:           for (i=0;i<gs->num_pairs;i++)
3216:             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3217:           free((void*) gs->node_list);
3218:         }
3219:       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3220:       if (gs->pair_list) {free((void*) gs->pair_list);}
3221:     }

3223:   /* local info */
3224:   if (gs->num_local_total>=0)
3225:     {
3226:       for (i=0;i<gs->num_local_total+1;i++)
3227:         /*      for (i=0;i<gs->num_local_total;i++) */
3228:         {
3229:           if (gs->num_gop_local_reduce[i])
3230:             {free((void*) gs->gop_local_reduce[i]);}
3231:         }
3232:     }

3234:   /* if intersection tree/pairwise and local isn't empty */
3235:   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3236:   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}

3238:   free((void*) gs);
3239: }






3246: /******************************************************************************
3247: Function: gather_scatter

3249: Input : 
3250: Output: 
3251: Return: 
3252: Description: 
3253: ******************************************************************************/
3254: void
3255: gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3256: {

3258:   switch (*op) {
3259:   case '+':
3260:     gs_gop_vec_plus(gs,vals,step);
3261:     break;
3262:   default:
3263:     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3264:     error_msg_warning("gs_gop_vec() :: default :: plus");
3265:     gs_gop_vec_plus(gs,vals,step);
3266:     break;
3267:   }
3268: }



3272: /******************************************************************************
3273: Function: gather_scatter

3275: Input : 
3276: Output: 
3277: Return: 
3278: Description: 
3279: ******************************************************************************/
3280: static void
3281: gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3282: {
3283:   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}

3285:   /* local only operations!!! */
3286:   if (gs->num_local)
3287:     {gs_gop_vec_local_plus(gs,vals,step);}

3289:   /* if intersection tree/pairwise and local isn't empty */
3290:   if (gs->num_local_gop)
3291:     {
3292:       gs_gop_vec_local_in_plus(gs,vals,step);

3294:       /* pairwise */
3295:       if (gs->num_pairs)
3296:         {gs_gop_vec_pairwise_plus(gs,vals,step);}

3298:       /* tree */
3299:       else if (gs->max_left_over)
3300:         {gs_gop_vec_tree_plus(gs,vals,step);}

3302:       gs_gop_vec_local_out(gs,vals,step);
3303:     }
3304:   /* if intersection tree/pairwise and local is empty */
3305:   else
3306:     {
3307:       /* pairwise */
3308:       if (gs->num_pairs)
3309:         {gs_gop_vec_pairwise_plus(gs,vals,step);}

3311:       /* tree */
3312:       else if (gs->max_left_over)
3313:         {gs_gop_vec_tree_plus(gs,vals,step);}
3314:     }
3315: }



3319: /******************************************************************************
3320: Function: gather_scatter

3322: Input : 
3323: Output: 
3324: Return: 
3325: Description: 
3326: ******************************************************************************/
3327: static
3328: void
3329: gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, 
3330:                        int step)
3331: {
3332:    int *num, *map, **reduce;
3333:    PetscScalar *base;


3336:   num    = gs->num_local_reduce;
3337:   reduce = gs->local_reduce;
3338:   while ((map = *reduce))
3339:     {
3340:       base = vals + map[0] * step;

3342:       /* wall */
3343:       if (*num == 2)
3344:         {
3345:           num++; reduce++;
3346:           rvec_add (base,vals+map[1]*step,step);
3347:           rvec_copy(vals+map[1]*step,base,step);
3348:         }
3349:       /* corner shared by three elements */
3350:       else if (*num == 3)
3351:         {
3352:           num++; reduce++;
3353:           rvec_add (base,vals+map[1]*step,step);
3354:           rvec_add (base,vals+map[2]*step,step);
3355:           rvec_copy(vals+map[2]*step,base,step);
3356:           rvec_copy(vals+map[1]*step,base,step);
3357:         }
3358:       /* corner shared by four elements */
3359:       else if (*num == 4)
3360:         {
3361:           num++; reduce++;
3362:           rvec_add (base,vals+map[1]*step,step);
3363:           rvec_add (base,vals+map[2]*step,step);
3364:           rvec_add (base,vals+map[3]*step,step);
3365:           rvec_copy(vals+map[3]*step,base,step);
3366:           rvec_copy(vals+map[2]*step,base,step);
3367:           rvec_copy(vals+map[1]*step,base,step);
3368:         }
3369:       /* general case ... odd geoms ... 3D */
3370:       else
3371:         {
3372:           num++;
3373:           while (*++map >= 0)
3374:             {rvec_add (base,vals+*map*step,step);}
3375: 
3376:           map = *reduce;
3377:           while (*++map >= 0)
3378:             {rvec_copy(vals+*map*step,base,step);}
3379: 
3380:           reduce++;
3381:         }
3382:     }
3383: }



3387: /******************************************************************************
3388: Function: gather_scatter

3390: Input : 
3391: Output: 
3392: Return: 
3393: Description: 
3394: ******************************************************************************/
3395: static
3396: void
3397: gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, 
3398:                           int step)
3399: {
3400:    int  *num, *map, **reduce;
3401:    PetscScalar *base;

3403:   num    = gs->num_gop_local_reduce;
3404:   reduce = gs->gop_local_reduce;
3405:   while ((map = *reduce++))
3406:     {
3407:       base = vals + map[0] * step;

3409:       /* wall */
3410:       if (*num == 2)
3411:         {
3412:           num ++;
3413:           rvec_add(base,vals+map[1]*step,step);
3414:         }
3415:       /* corner shared by three elements */
3416:       else if (*num == 3)
3417:         {
3418:           num ++;
3419:           rvec_add(base,vals+map[1]*step,step);
3420:           rvec_add(base,vals+map[2]*step,step);
3421:         }
3422:       /* corner shared by four elements */
3423:       else if (*num == 4)
3424:         {
3425:           num ++;
3426:           rvec_add(base,vals+map[1]*step,step);
3427:           rvec_add(base,vals+map[2]*step,step);
3428:           rvec_add(base,vals+map[3]*step,step);
3429:         }
3430:       /* general case ... odd geoms ... 3D*/
3431:       else
3432:         {
3433:           num++;
3434:           while (*++map >= 0)
3435:             {rvec_add(base,vals+*map*step,step);}
3436:         }
3437:     }
3438: }


3441: /******************************************************************************
3442: Function: gather_scatter

3444: Input : 
3445: Output: 
3446: Return: 
3447: Description: 
3448: ******************************************************************************/
3449: static
3450: void
3451: gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, 
3452:                       int step)
3453: {
3454:    int *num, *map, **reduce;
3455:    PetscScalar *base;


3458:   num    = gs->num_gop_local_reduce;
3459:   reduce = gs->gop_local_reduce;
3460:   while ((map = *reduce++))
3461:     {
3462:       base = vals + map[0] * step;

3464:       /* wall */
3465:       if (*num == 2)
3466:         {
3467:           num ++;
3468:           rvec_copy(vals+map[1]*step,base,step);
3469:         }
3470:       /* corner shared by three elements */
3471:       else if (*num == 3)
3472:         {
3473:           num ++;
3474:           rvec_copy(vals+map[1]*step,base,step);
3475:           rvec_copy(vals+map[2]*step,base,step);
3476:         }
3477:       /* corner shared by four elements */
3478:       else if (*num == 4)
3479:         {
3480:           num ++;
3481:           rvec_copy(vals+map[1]*step,base,step);
3482:           rvec_copy(vals+map[2]*step,base,step);
3483:           rvec_copy(vals+map[3]*step,base,step);
3484:         }
3485:       /* general case ... odd geoms ... 3D*/
3486:       else
3487:         {
3488:           num++;
3489:           while (*++map >= 0)
3490:             {rvec_copy(vals+*map*step,base,step);}
3491:         }
3492:     }
3493: }



3497: /******************************************************************************
3498: Function: gather_scatter

3500: VERSION 3 :: 

3502: Input : 
3503: Output: 
3504: Return: 
3505: Description: 
3506: ******************************************************************************/
3507: static
3508: void
3509: gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3510:                           int step)
3511: {
3512:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3513:    int *iptr, *msg_list, *msg_size, **msg_nodes;
3514:    int *pw, *list, *size, **nodes;
3515:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3516:   MPI_Status status;
3517:   PetscBLASInt i1;


3520:   /* strip and load s */
3521:   msg_list =list         = gs->pair_list;
3522:   msg_size =size         = gs->msg_sizes;
3523:   msg_nodes=nodes        = gs->node_list;
3524:   iptr=pw                = gs->pw_elm_list;
3525:   dptr1=dptr3            = gs->pw_vals;
3526:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3527:   msg_ids_out = ids_out  = gs->msg_ids_out;
3528:   dptr2                  = gs->out;
3529:   in1=in2                = gs->in;

3531:   /* post the receives */
3532:   /*  msg_nodes=nodes; */
3533:   do
3534:     {
3535:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3536:          second one *list and do list++ afterwards */
3537:       MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3538:                 gs->gs_comm, msg_ids_in++);
3539:       in1 += *size++ *step;
3540:     }
3541:   while (*++msg_nodes);
3542:   msg_nodes=nodes;

3544:   /* load gs values into in out gs buffers */
3545:   while (*iptr >= 0)
3546:     {
3547:       rvec_copy(dptr3,in_vals + *iptr*step,step);
3548:       dptr3+=step;
3549:       iptr++;
3550:     }

3552:   /* load out buffers and post the sends */
3553:   while ((iptr = *msg_nodes++))
3554:     {
3555:       dptr3 = dptr2;
3556:       while (*iptr >= 0)
3557:         {
3558:           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3559:           dptr2+=step;
3560:           iptr++;
3561:         }
3562:       MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3563:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3564:     }

3566:   /* tree */
3567:   if (gs->max_left_over)
3568:     {gs_gop_vec_tree_plus(gs,in_vals,step);}

3570:   /* process the received data */
3571:   msg_nodes=nodes;
3572:   while ((iptr = *nodes++)){
3573:     PetscScalar d1 = 1.0;
3574:       /* Should I check the return value of MPI_Wait() or status? */
3575:       /* Can this loop be replaced by a call to MPI_Waitall()? */
3576:       MPI_Wait(ids_in++, &status);
3577:       while (*iptr >= 0) {
3578:           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3579:           in2+=step;
3580:           iptr++;
3581:       }
3582:   }

3584:   /* replace vals */
3585:   while (*pw >= 0)
3586:     {
3587:       rvec_copy(in_vals + *pw*step,dptr1,step);
3588:       dptr1+=step;
3589:       pw++;
3590:     }

3592:   /* clear isend message handles */
3593:   /* This changed for clarity though it could be the same */
3594:   while (*msg_nodes++)
3595:     /* Should I check the return value of MPI_Wait() or status? */
3596:     /* Can this loop be replaced by a call to MPI_Waitall()? */
3597:     {MPI_Wait(ids_out++, &status);}


3600: }



3604: /******************************************************************************
3605: Function: gather_scatter

3607: Input : 
3608: Output: 
3609: Return: 
3610: Description: 
3611: ******************************************************************************/
3612: static
3613: void
3614: gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3615: {
3616:   int size, *in, *out;
3617:   PetscScalar *buf, *work;
3618:   int op[] = {GL_ADD,0};
3619:   PetscBLASInt i1 = 1;


3622:   /* copy over to local variables */
3623:   in   = gs->tree_map_in;
3624:   out  = gs->tree_map_out;
3625:   buf  = gs->tree_buf;
3626:   work = gs->tree_work;
3627:   size = gs->tree_nel*step;

3629:   /* zero out collection buffer */
3630:   rvec_zero(buf,size);


3633:   /* copy over my contributions */
3634:   while (*in >= 0)
3635:     {
3636:       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3637:     }

3639:   /* perform fan in/out on full buffer */
3640:   /* must change grop to handle the blas */
3641:   grop(buf,work,size,op);

3643:   /* reset */
3644:   in   = gs->tree_map_in;
3645:   out  = gs->tree_map_out;

3647:   /* get the portion of the results I need */
3648:   while (*in >= 0)
3649:     {
3650:       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3651:     }

3653: }



3657: /******************************************************************************
3658: Function: gather_scatter

3660: Input : 
3661: Output: 
3662: Return: 
3663: Description: 
3664: ******************************************************************************/
3665: void
3666: gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3667: {

3669:   switch (*op) {
3670:   case '+':
3671:     gs_gop_plus_hc(gs,vals,dim);
3672:     break;
3673:   default:
3674:     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3675:     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3676:     gs_gop_plus_hc(gs,vals,dim);
3677:     break;
3678:   }
3679: }



3683: /******************************************************************************
3684: Function: gather_scatter

3686: Input : 
3687: Output: 
3688: Return: 
3689: Description: 
3690: ******************************************************************************/
3691: static void
3692: gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3693: {
3694:   /* if there's nothing to do return */
3695:   if (dim<=0)
3696:     {return;}

3698:   /* can't do more dimensions then exist */
3699:   dim = PetscMin(dim,i_log2_num_nodes);

3701:   /* local only operations!!! */
3702:   if (gs->num_local)
3703:     {gs_gop_local_plus(gs,vals);}

3705:   /* if intersection tree/pairwise and local isn't empty */
3706:   if (gs->num_local_gop)
3707:     {
3708:       gs_gop_local_in_plus(gs,vals);

3710:       /* pairwise will do tree inside ... */
3711:       if (gs->num_pairs)
3712:         {gs_gop_pairwise_plus_hc(gs,vals,dim);}

3714:       /* tree only */
3715:       else if (gs->max_left_over)
3716:         {gs_gop_tree_plus_hc(gs,vals,dim);}
3717: 
3718:       gs_gop_local_out(gs,vals);
3719:     }
3720:   /* if intersection tree/pairwise and local is empty */
3721:   else
3722:     {
3723:       /* pairwise will do tree inside */
3724:       if (gs->num_pairs)
3725:         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3726: 
3727:       /* tree */
3728:       else if (gs->max_left_over)
3729:         {gs_gop_tree_plus_hc(gs,vals,dim);}
3730:     }

3732: }


3735: /******************************************************************************
3736: VERSION 3 :: 

3738: Input : 
3739: Output: 
3740: Return: 
3741: Description: 
3742: ******************************************************************************/
3743: static
3744: void
3745: gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3746: {
3747:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3748:    int *iptr, *msg_list, *msg_size, **msg_nodes;
3749:    int *pw, *list, *size, **nodes;
3750:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3751:   MPI_Status status;
3752:   int i, mask=1;

3754:   for (i=1; i<dim; i++)
3755:     {mask<<=1; mask++;}


3758:   /* strip and load s */
3759:   msg_list =list         = gs->pair_list;
3760:   msg_size =size         = gs->msg_sizes;
3761:   msg_nodes=nodes        = gs->node_list;
3762:   iptr=pw                = gs->pw_elm_list;
3763:   dptr1=dptr3            = gs->pw_vals;
3764:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3765:   msg_ids_out = ids_out  = gs->msg_ids_out;
3766:   dptr2                  = gs->out;
3767:   in1=in2                = gs->in;

3769:   /* post the receives */
3770:   /*  msg_nodes=nodes; */
3771:   do
3772:     {
3773:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3774:          second one *list and do list++ afterwards */
3775:       if ((my_id|mask)==(*list|mask))
3776:         {
3777:           MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3778:                     gs->gs_comm, msg_ids_in++);
3779:           in1 += *size++;
3780:         }
3781:       else
3782:         {list++; size++;}
3783:     }
3784:   while (*++msg_nodes);

3786:   /* load gs values into in out gs buffers */
3787:   while (*iptr >= 0)
3788:     {*dptr3++ = *(in_vals + *iptr++);}

3790:   /* load out buffers and post the sends */
3791:   msg_nodes=nodes;
3792:   list = msg_list;
3793:   while ((iptr = *msg_nodes++))
3794:     {
3795:       if ((my_id|mask)==(*list|mask))
3796:         {
3797:           dptr3 = dptr2;
3798:           while (*iptr >= 0)
3799:             {*dptr2++ = *(dptr1 + *iptr++);}
3800:           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3801:           /* is msg_ids_out++ correct? */
3802:           MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3803:                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3804:         }
3805:       else
3806:         {list++; msg_size++;}
3807:     }

3809:   /* do the tree while we're waiting */
3810:   if (gs->max_left_over)
3811:     {gs_gop_tree_plus_hc(gs,in_vals,dim);}

3813:   /* process the received data */
3814:   msg_nodes=nodes;
3815:   list = msg_list;
3816:   while ((iptr = *nodes++))
3817:     {
3818:       if ((my_id|mask)==(*list|mask))
3819:         {
3820:           /* Should I check the return value of MPI_Wait() or status? */
3821:           /* Can this loop be replaced by a call to MPI_Waitall()? */
3822:           MPI_Wait(ids_in++, &status);
3823:           while (*iptr >= 0)
3824:             {*(dptr1 + *iptr++) += *in2++;}
3825:         }
3826:       list++;
3827:     }

3829:   /* replace vals */
3830:   while (*pw >= 0)
3831:     {*(in_vals + *pw++) = *dptr1++;}

3833:   /* clear isend message handles */
3834:   /* This changed for clarity though it could be the same */
3835:   while (*msg_nodes++)
3836:     {
3837:       if ((my_id|mask)==(*msg_list|mask))
3838:         {
3839:           /* Should I check the return value of MPI_Wait() or status? */
3840:           /* Can this loop be replaced by a call to MPI_Waitall()? */
3841:           MPI_Wait(ids_out++, &status);
3842:         }
3843:       msg_list++;
3844:     }


3847: }



3851: /******************************************************************************
3852: Function: gather_scatter

3854: Input : 
3855: Output: 
3856: Return: 
3857: Description: 
3858: ******************************************************************************/
3859: static
3860: void
3861: gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3862: {
3863:   int size;
3864:   int *in, *out;
3865:   PetscScalar *buf, *work;
3866:   int op[] = {GL_ADD,0};

3868:   in   = gs->tree_map_in;
3869:   out  = gs->tree_map_out;
3870:   buf  = gs->tree_buf;
3871:   work = gs->tree_work;
3872:   size = gs->tree_nel;

3874:   rvec_zero(buf,size);

3876:   while (*in >= 0)
3877:     {*(buf + *out++) = *(vals + *in++);}

3879:   in   = gs->tree_map_in;
3880:   out  = gs->tree_map_out;

3882:   grop_hc(buf,work,size,op,dim);

3884:   while (*in >= 0)
3885:     {*(vals + *in++) = *(buf + *out++);}

3887: }