Actual source code: xxt.c

  1: #define PETSCKSP_DLL

  3: /*************************************xxt.c************************************
  4: Module Name: xxt
  5: Module Info:

  7: author:  Henry M. Tufo III
  8: e-mail:  hmt@asci.uchicago.edu
  9: contact:
 10: +--------------------------------+--------------------------------+
 11: |MCS Division - Building 221     |Department of Computer Science  |
 12: |Argonne National Laboratory     |Ryerson 152                     |
 13: |9700 S. Cass Avenue             |The University of Chicago       |
 14: |Argonne, IL  60439              |Chicago, IL  60637              |
 15: |(630) 252-5354/5986 ph/fx       |(773) 702-6019/8487 ph/fx       |
 16: +--------------------------------+--------------------------------+

 18: Last Modification: 3.20.01
 19: **************************************xxt.c***********************************/


 22: /*************************************xxt.c************************************
 23: NOTES ON USAGE: 

 25: **************************************xxt.c***********************************/
 26:  #include src/ksp/pc/impls/tfs/tfs.h

 28: #define LEFT  -1
 29: #define RIGHT  1
 30: #define BOTH   0
 31: #define MAX_FORTRAN_HANDLES  10

 33: typedef struct xxt_solver_info {
 34:   int n, m, n_global, m_global;
 35:   int nnz, max_nnz, msg_buf_sz;
 36:   int *nsep, *lnsep, *fo, nfo, *stages;
 37:   int *col_sz, *col_indices;
 38:   PetscScalar **col_vals, *x, *solve_uu, *solve_w;
 39:   int nsolves;
 40:   PetscScalar tot_solve_time;
 41: } xxt_info;

 43: typedef struct matvec_info {
 44:   int n, m, n_global, m_global;
 45:   int *local2global;
 46:   gs_ADT gs_handle;
 47:   PetscErrorCode (*matvec)(struct matvec_info*,PetscScalar*,PetscScalar*);
 48:   void *grid_data;
 49: } mv_info;

 51: struct xxt_CDT{
 52:   int id;
 53:   int ns;
 54:   int level;
 55:   xxt_info *info;
 56:   mv_info  *mvi;
 57: };

 59: static int n_xxt=0;
 60: static int n_xxt_handles=0;

 62: /* prototypes */
 63: static void do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *rhs);
 64: static void check_init(void);
 65: static void check_handle(xxt_ADT xxt_handle);
 66: static void det_separators(xxt_ADT xxt_handle);
 67: static void do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u);
 68: static int xxt_generate(xxt_ADT xxt_handle);
 69: static int do_xxt_factor(xxt_ADT xxt_handle);
 70: static mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data);



 74: /*************************************xxt.c************************************
 75: Function: XXT_new()

 77: Input :
 78: Output:
 79: Return:
 80: Description:
 81: **************************************xxt.c***********************************/
 82: xxt_ADT 
 83: XXT_new(void)
 84: {
 85:   xxt_ADT xxt_handle;



 89:   /* rolling count on n_xxt ... pot. problem here */
 90:   n_xxt_handles++;
 91:   xxt_handle       = (xxt_ADT)malloc(sizeof(struct xxt_CDT));
 92:   xxt_handle->id   = ++n_xxt;
 93:   xxt_handle->info = NULL; xxt_handle->mvi  = NULL;

 95:   return(xxt_handle);
 96: }


 99: /*************************************xxt.c************************************
100: Function: XXT_factor()

102: Input :
103: Output:
104: Return:
105: Description:
106: **************************************xxt.c***********************************/
107: int
108: XXT_factor(xxt_ADT xxt_handle, /* prev. allocated xxt  handle */
109:            int *local2global,  /* global column mapping       */
110:            int n,              /* local num rows              */
111:            int m,              /* local num cols              */
112:            void *matvec,       /* b_loc=A_local.x_loc         */
113:            void *grid_data     /* grid data for matvec        */
114:            )
115: {
116:   check_init();
117:   check_handle(xxt_handle);

119:   /* only 2^k for now and all nodes participating */
120:   if ((1<<(xxt_handle->level=i_log2_num_nodes))!=num_nodes)
121:     {error_msg_fatal("only 2^k for now and MPI_COMM_WORLD!!! %d != %d\n",1<<i_log2_num_nodes,num_nodes);}

123:   /* space for X info */
124:   xxt_handle->info = (xxt_info*)malloc(sizeof(xxt_info));

126:   /* set up matvec handles */
127:   xxt_handle->mvi  = set_mvi(local2global, n, m, matvec, grid_data);

129:   /* matrix is assumed to be of full rank */
130:   /* LATER we can reset to indicate rank def. */
131:   xxt_handle->ns=0;

133:   /* determine separators and generate firing order - NB xxt info set here */
134:   det_separators(xxt_handle);

136:   return(do_xxt_factor(xxt_handle));
137: }


140: /*************************************xxt.c************************************
141: Function: XXT_solve

143: Input :
144: Output:
145: Return:
146: Description:
147: **************************************xxt.c***********************************/
148: int
149: XXT_solve(xxt_ADT xxt_handle, double *x, double *b)
150: {

152:   check_init();
153:   check_handle(xxt_handle);

155:   /* need to copy b into x? */
156:   if (b)
157:     {rvec_copy(x,b,xxt_handle->mvi->n);}
158:   do_xxt_solve(xxt_handle,x);

160:   return(0);
161: }


164: /*************************************xxt.c************************************
165: Function: XXT_free()

167: Input :
168: Output:
169: Return:
170: Description:
171: **************************************xxt.c***********************************/
172: int
173: XXT_free(xxt_ADT xxt_handle)
174: {

176:   check_init();
177:   check_handle(xxt_handle);
178:   n_xxt_handles--;

180:   free(xxt_handle->info->nsep);
181:   free(xxt_handle->info->lnsep);
182:   free(xxt_handle->info->fo);
183:   free(xxt_handle->info->stages);
184:   free(xxt_handle->info->solve_uu);
185:   free(xxt_handle->info->solve_w);
186:   free(xxt_handle->info->x);
187:   free(xxt_handle->info->col_vals);
188:   free(xxt_handle->info->col_sz);
189:   free(xxt_handle->info->col_indices);
190:   free(xxt_handle->info);
191:   free(xxt_handle->mvi->local2global);
192:    gs_free(xxt_handle->mvi->gs_handle);
193:   free(xxt_handle->mvi);
194:   free(xxt_handle);

196: 

198:   /* if the check fails we nuke */
199:   /* if NULL pointer passed to free we nuke */
200:   /* if the calls to free fail that's not my problem */
201:   return(0);
202: }



206: /*************************************xxt.c************************************
207: Function: 

209: Input : 
210: Output: 
211: Return: 
212: Description:  
213: **************************************xxt.c***********************************/
214: int
215: XXT_stats(xxt_ADT xxt_handle)
216: {
217:   int  op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD};
218:   int fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
219:   int   vals[9],  work[9];
220:   PetscScalar fvals[3], fwork[3];



224:   check_init();
225:   check_handle(xxt_handle);

227:   /* if factorization not done there are no stats */
228:   if (!xxt_handle->info||!xxt_handle->mvi)
229:     {
230:       if (!my_id)
231:         {printf("XXT_stats() :: no stats available!\n");}
232:       return 1;
233:     }

235:   vals[0]=vals[1]=vals[2]=xxt_handle->info->nnz;
236:   vals[3]=vals[4]=vals[5]=xxt_handle->mvi->n;
237:   vals[6]=vals[7]=vals[8]=xxt_handle->info->msg_buf_sz;
238:   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);

240:   fvals[0]=fvals[1]=fvals[2]
241:     =xxt_handle->info->tot_solve_time/xxt_handle->info->nsolves++;
242:   grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop);

244:   if (!my_id)
245:     {
246:       printf("%d :: min   xxt_nnz=%d\n",my_id,vals[0]);
247:       printf("%d :: max   xxt_nnz=%d\n",my_id,vals[1]);
248:       printf("%d :: avg   xxt_nnz=%g\n",my_id,1.0*vals[2]/num_nodes);
249:       printf("%d :: tot   xxt_nnz=%d\n",my_id,vals[2]);
250:       printf("%d :: xxt   C(2d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.5)));
251:       printf("%d :: xxt   C(3d)  =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.6667)));
252:       printf("%d :: min   xxt_n  =%d\n",my_id,vals[3]);
253:       printf("%d :: max   xxt_n  =%d\n",my_id,vals[4]);
254:       printf("%d :: avg   xxt_n  =%g\n",my_id,1.0*vals[5]/num_nodes);
255:       printf("%d :: tot   xxt_n  =%d\n",my_id,vals[5]);
256:       printf("%d :: min   xxt_buf=%d\n",my_id,vals[6]);
257:       printf("%d :: max   xxt_buf=%d\n",my_id,vals[7]);
258:       printf("%d :: avg   xxt_buf=%g\n",my_id,1.0*vals[8]/num_nodes);
259:       printf("%d :: min   xxt_slv=%g\n",my_id,fvals[0]);
260:       printf("%d :: max   xxt_slv=%g\n",my_id,fvals[1]);
261:       printf("%d :: avg   xxt_slv=%g\n",my_id,fvals[2]/num_nodes);
262:     }

264:   return(0);
265: }


268: /*************************************xxt.c************************************
269: Function: do_xxt_factor

271: Input : 
272: Output: 
273: Return: 
274: Description: get A_local, local portion of global coarse matrix which 
275: is a row dist. nxm matrix w/ n<m.
276:    o my_ml holds address of ML struct associated w/A_local and coarse grid
277:    o local2global holds global number of column i (i=0,...,m-1)
278:    o local2global holds global number of row    i (i=0,...,n-1)
279:    o mylocmatvec performs A_local . vec_local (note that gs is performed using 
280:    gs_init/gop).

282: mylocmatvec = my_ml->Amat[grid_tag].matvec->external;
283: mylocmatvec (void :: void *data, double *in, double *out)
284: **************************************xxt.c***********************************/
285: static
286: int
287: do_xxt_factor(xxt_ADT xxt_handle)
288: {
289:   int flag;


292:   flag=xxt_generate(xxt_handle);

294:   return(flag);
295: }


298: /*************************************xxt.c************************************
299: Function: 

301: Input : 
302: Output: 
303: Return: 
304: Description:  
305: **************************************xxt.c***********************************/
306: static
307: int
308: xxt_generate(xxt_ADT xxt_handle)
309: {
310:   int i,j,k,idex;
311:   int dim, col;
312:   PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w;
313:   int *segs;
314:   int op[] = {GL_ADD,0};
315:   int off, len;
316:   PetscScalar *x_ptr;
317:   int *iptr, flag;
318:   int start=0, end, work;
319:   int op2[] = {GL_MIN,0};
320:   gs_ADT gs_handle;
321:   int *nsep, *lnsep, *fo;
322:   int a_n=xxt_handle->mvi->n;
323:   int a_m=xxt_handle->mvi->m;
324:   int *a_local2global=xxt_handle->mvi->local2global;
325:   int level;
326:   int xxt_nnz=0, xxt_max_nnz=0;
327:   int n, m;
328:   int *col_sz, *col_indices, *stages;
329:   PetscScalar **col_vals, *x;
330:   int n_global;
331:   int xxt_zero_nnz=0;
332:   int xxt_zero_nnz_0=0;
333:   PetscBLASInt i1 = 1;
334:   PetscScalar dm1 = -1.0;

336:   n=xxt_handle->mvi->n;
337:   nsep=xxt_handle->info->nsep;
338:   lnsep=xxt_handle->info->lnsep;
339:   fo=xxt_handle->info->fo;
340:   end=lnsep[0];
341:   level=xxt_handle->level;
342:   gs_handle=xxt_handle->mvi->gs_handle;

344:   /* is there a null space? */
345:   /* LATER add in ability to detect null space by checking alpha */
346:   for (i=0, j=0; i<=level; i++)
347:     {j+=nsep[i];}

349:   m = j-xxt_handle->ns;
350:   if (m!=j)
351:     {printf("xxt_generate() :: null space exists %d %d %d\n",m,j,xxt_handle->ns);}

353:   /* get and initialize storage for x local         */
354:   /* note that x local is nxm and stored by columns */
355:   col_sz = (int*) malloc(m*sizeof(PetscInt));
356:   col_indices = (int*) malloc((2*m+1)*sizeof(int));
357:   col_vals = (PetscScalar **) malloc(m*sizeof(PetscScalar *));
358:   for (i=j=0; i<m; i++, j+=2)
359:     {
360:       col_indices[j]=col_indices[j+1]=col_sz[i]=-1;
361:       col_vals[i] = NULL;
362:     }
363:   col_indices[j]=-1;

365:   /* size of separators for each sub-hc working from bottom of tree to top */
366:   /* this looks like nsep[]=segments */
367:   stages = (int*) malloc((level+1)*sizeof(PetscInt));
368:   segs   = (int*) malloc((level+1)*sizeof(PetscInt));
369:   ivec_zero(stages,level+1);
370:   ivec_copy(segs,nsep,level+1);
371:   for (i=0; i<level; i++)
372:     {segs[i+1] += segs[i];}
373:   stages[0] = segs[0];

375:   /* temporary vectors  */
376:   u  = (PetscScalar *) malloc(n*sizeof(PetscScalar));
377:   z  = (PetscScalar *) malloc(n*sizeof(PetscScalar));
378:   v  = (PetscScalar *) malloc(a_m*sizeof(PetscScalar));
379:   uu = (PetscScalar *) malloc(m*sizeof(PetscScalar));
380:   w  = (PetscScalar *) malloc(m*sizeof(PetscScalar));

382:   /* extra nnz due to replication of vertices across separators */
383:   for (i=1, j=0; i<=level; i++)
384:     {j+=nsep[i];}

386:   /* storage for sparse x values */
387:   n_global = xxt_handle->info->n_global;
388:   xxt_max_nnz = (int)(2.5*pow(1.0*n_global,1.6667) + j*n/2)/num_nodes;
389:   x = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar));
390:   xxt_nnz = 0;

392:   /* LATER - can embed next sep to fire in gs */
393:   /* time to make the donuts - generate X factor */
394:   for (dim=i=j=0;i<m;i++)
395:     {
396:       /* time to move to the next level? */
397:       while (i==segs[dim])
398:         {
399: #ifdef SAFE          
400:           if (dim==level)
401:             {error_msg_fatal("dim about to exceed level\n"); break;}
402: #endif

404:           stages[dim++]=i;
405:           end+=lnsep[dim];
406:         }
407:       stages[dim]=i;

409:       /* which column are we firing? */
410:       /* i.e. set v_l */
411:       /* use new seps and do global min across hc to determine which one to fire */
412:       (start<end) ? (col=fo[start]) : (col=INT_MAX);
413:       giop_hc(&col,&work,1,op2,dim);

415:       /* shouldn't need this */
416:       if (col==INT_MAX)
417:         {
418:           error_msg_warning("hey ... col==INT_MAX??\n");
419:           continue;
420:         }

422:       /* do I own it? I should */
423:       rvec_zero(v ,a_m);
424:       if (col==fo[start])
425:         {
426:           start++;
427:           idex=ivec_linear_search(col, a_local2global, a_n);
428:           if (idex!=-1)
429:             {v[idex] = 1.0; j++;}
430:           else
431:             {error_msg_fatal("NOT FOUND!\n");}
432:         }
433:       else
434:         {
435:           idex=ivec_linear_search(col, a_local2global, a_m);
436:           if (idex!=-1)
437:             {v[idex] = 1.0;}
438:         }

440:       /* perform u = A.v_l */
441:       rvec_zero(u,n);
442:       do_matvec(xxt_handle->mvi,v,u);

444:       /* uu =  X^T.u_l (local portion) */
445:       /* technically only need to zero out first i entries */
446:       /* later turn this into an XXT_solve call ? */
447:       rvec_zero(uu,m);
448:       x_ptr=x;
449:       iptr = col_indices;
450:       for (k=0; k<i; k++)
451:         {
452:           off = *iptr++;
453:           len = *iptr++;

455:           uu[k] = BLASdot_(&len,u+off,&i1,x_ptr,&i1);
456:           x_ptr+=len;
457:         }


460:       /* uu = X^T.u_l (comm portion) */
461:       ssgl_radd  (uu, w, dim, stages);

463:       /* z = X.uu */
464:       rvec_zero(z,n);
465:       x_ptr=x;
466:       iptr = col_indices;
467:       for (k=0; k<i; k++)
468:         {
469:           off = *iptr++;
470:           len = *iptr++;

472:           BLASaxpy_(&len,&uu[k],x_ptr,&i1,z+off,&i1);
473:           x_ptr+=len;
474:         }

476:       /* compute v_l = v_l - z */
477:       rvec_zero(v+a_n,a_m-a_n);
478:       BLASaxpy_(&n,&dm1,z,&i1,v,&i1);

480:       /* compute u_l = A.v_l */
481:       if (a_n!=a_m)
482:         {gs_gop_hc(gs_handle,v,"+\0",dim);}
483:       rvec_zero(u,n);
484:       do_matvec(xxt_handle->mvi,v,u);

486:       /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */
487:       alpha = BLASdot_(&n,u,&i1,v,&i1);
488:       /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */
489:       grop_hc(&alpha, &alpha_w, 1, op, dim);

491:       alpha = (PetscScalar) sqrt((double)alpha);

493:       /* check for small alpha                             */
494:       /* LATER use this to detect and determine null space */
495:       if (fabs(alpha)<1.0e-14)
496:         {error_msg_fatal("bad alpha! %g\n",alpha);}

498:       /* compute v_l = v_l/sqrt(alpha) */
499:       rvec_scale(v,1.0/alpha,n);

501:       /* add newly generated column, v_l, to X */
502:       flag = 1;
503:       off=len=0;
504:       for (k=0; k<n; k++)
505:         {
506:           if (v[k]!=0.0)
507:             {
508:               len=k;
509:               if (flag)
510:                 {off=k; flag=0;}
511:             }
512:         }

514:       len -= (off-1);

516:       if (len>0)
517:         {
518:           if ((xxt_nnz+len)>xxt_max_nnz)
519:             {
520:               error_msg_warning("increasing space for X by 2x!\n");
521:               xxt_max_nnz *= 2;
522:               x_ptr = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar));
523:               rvec_copy(x_ptr,x,xxt_nnz);
524:               free(x);
525:               x = x_ptr;
526:               x_ptr+=xxt_nnz;
527:             }
528:           xxt_nnz += len;
529:           rvec_copy(x_ptr,v+off,len);

531:           /* keep track of number of zeros */
532:           if (dim)
533:             {
534:               for (k=0; k<len; k++)
535:                 {
536:                   if (x_ptr[k]==0.0)
537:                     {xxt_zero_nnz++;}
538:                 }
539:             }
540:           else
541:             {
542:               for (k=0; k<len; k++)
543:                 {
544:                   if (x_ptr[k]==0.0)
545:                     {xxt_zero_nnz_0++;}
546:                 }
547:             }
548:           col_indices[2*i] = off;
549:           col_sz[i] = col_indices[2*i+1] = len;
550:           col_vals[i] = x_ptr;
551:         }
552:       else
553:         {
554:           col_indices[2*i] = 0;
555:           col_sz[i] = col_indices[2*i+1] = 0;
556:           col_vals[i] = x_ptr;
557:         }
558:     }

560:   /* close off stages for execution phase */
561:   while (dim!=level)
562:     {
563:       stages[dim++]=i;
564:       error_msg_warning("disconnected!!! dim(%d)!=level(%d)\n",dim,level);
565:     }
566:   stages[dim]=i;

568:   xxt_handle->info->n=xxt_handle->mvi->n;
569:   xxt_handle->info->m=m;
570:   xxt_handle->info->nnz=xxt_nnz;
571:   xxt_handle->info->max_nnz=xxt_max_nnz;
572:   xxt_handle->info->msg_buf_sz=stages[level]-stages[0];
573:   xxt_handle->info->solve_uu = (PetscScalar *) malloc(m*sizeof(PetscScalar));
574:   xxt_handle->info->solve_w  = (PetscScalar *) malloc(m*sizeof(PetscScalar));
575:   xxt_handle->info->x=x;
576:   xxt_handle->info->col_vals=col_vals;
577:   xxt_handle->info->col_sz=col_sz;
578:   xxt_handle->info->col_indices=col_indices;
579:   xxt_handle->info->stages=stages;
580:   xxt_handle->info->nsolves=0;
581:   xxt_handle->info->tot_solve_time=0.0;

583:   free(segs);
584:   free(u);
585:   free(v);
586:   free(uu);
587:   free(z);
588:   free(w);

590:   return(0);
591: }


594: /*************************************xxt.c************************************
595: Function: 

597: Input : 
598: Output: 
599: Return: 
600: Description:  
601: **************************************xxt.c***********************************/
602: static
603: void
604: do_xxt_solve(xxt_ADT xxt_handle,  PetscScalar *uc)
605: {
606:    int off, len, *iptr;
607:   int level       =xxt_handle->level;
608:   int n           =xxt_handle->info->n;
609:   int m           =xxt_handle->info->m;
610:   int *stages     =xxt_handle->info->stages;
611:   int *col_indices=xxt_handle->info->col_indices;
612:   PetscScalar *x_ptr, *uu_ptr;
613:   PetscScalar *solve_uu=xxt_handle->info->solve_uu;
614:   PetscScalar *solve_w =xxt_handle->info->solve_w;
615:   PetscScalar *x       =xxt_handle->info->x;
616:   PetscBLASInt i1 = 1;

618:   uu_ptr=solve_uu;
619:   rvec_zero(uu_ptr,m);

621:   /* x  = X.Y^T.b */
622:   /* uu = Y^T.b */
623:   for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len)
624:     {
625:       off=*iptr++; len=*iptr++;
626:       *uu_ptr++ = BLASdot_(&len,uc+off,&i1,x_ptr,&i1);
627:     }

629:   /* comunication of beta */
630:   uu_ptr=solve_uu;
631:   if (level) {ssgl_radd(uu_ptr, solve_w, level, stages);}

633:   rvec_zero(uc,n);

635:   /* x = X.uu */
636:   for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len)
637:     {
638:       off=*iptr++; len=*iptr++;
639:       BLASaxpy_(&len,uu_ptr++,x_ptr,&i1,uc+off,&i1);
640:     }

642: }


645: /*************************************Xxt.c************************************
646: Function: check_init

648: Input :
649: Output:
650: Return:
651: Description:
652: **************************************xxt.c***********************************/
653: static
654: void
655: check_init(void)
656: {
657:   comm_init();

659: }


662: /*************************************xxt.c************************************
663: Function: check_handle()

665: Input :
666: Output:
667: Return:
668: Description:
669: **************************************xxt.c***********************************/
670: static
671: void 
672: check_handle(xxt_ADT xxt_handle)
673: {
674: #ifdef SAFE
675:   int vals[2], work[2], op[] = {NON_UNIFORM,GL_MIN,GL_MAX};
676: #endif


679:   if (xxt_handle==NULL)
680:     {error_msg_fatal("check_handle() :: bad handle :: NULL %d\n",xxt_handle);}

682: #ifdef SAFE
683:   vals[0]=vals[1]=xxt_handle->id;
684:   giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
685:   if ((vals[0]!=vals[1])||(xxt_handle->id<=0))
686:     {error_msg_fatal("check_handle() :: bad handle :: id mismatch min/max %d/%d %d\n",
687:                      vals[0],vals[1], xxt_handle->id);}
688: #endif

690: }


693: /*************************************xxt.c************************************
694: Function: det_separators

696: Input :
697: Output:
698: Return:
699: Description:
700:   det_separators(xxt_handle, local2global, n, m, mylocmatvec, grid_data);
701: **************************************xxt.c***********************************/
702: static 
703: void 
704: det_separators(xxt_ADT xxt_handle)
705: {
706:   int i, ct, id;
707:   int mask, edge, *iptr;
708:   int *dir, *used;
709:   int sum[4], w[4];
710:   PetscScalar rsum[4], rw[4];
711:   int op[] = {GL_ADD,0};
712:   PetscScalar *lhs, *rhs;
713:   int *nsep, *lnsep, *fo, nfo=0;
714:   gs_ADT gs_handle=xxt_handle->mvi->gs_handle;
715:   int *local2global=xxt_handle->mvi->local2global;
716:   int  n=xxt_handle->mvi->n;
717:   int  m=xxt_handle->mvi->m;
718:   int level=xxt_handle->level;
719:   int shared=FALSE;

721:   dir  = (int*)malloc(sizeof(PetscInt)*(level+1));
722:   nsep = (int*)malloc(sizeof(PetscInt)*(level+1));
723:   lnsep= (int*)malloc(sizeof(PetscInt)*(level+1));
724:   fo   = (int*)malloc(sizeof(PetscInt)*(n+1));
725:   used = (int*)malloc(sizeof(PetscInt)*n);

727:   ivec_zero(dir  ,level+1);
728:   ivec_zero(nsep ,level+1);
729:   ivec_zero(lnsep,level+1);
730:   ivec_set (fo   ,-1,n+1);
731:   ivec_zero(used,n);

733:   lhs  = (double*)malloc(sizeof(PetscScalar)*m);
734:   rhs  = (double*)malloc(sizeof(PetscScalar)*m);

736:   /* determine the # of unique dof */
737:   rvec_zero(lhs,m);
738:   rvec_set(lhs,1.0,n);
739:   gs_gop_hc(gs_handle,lhs,"+\0",level);
740:   rvec_zero(rsum,2);
741:   for (ct=i=0;i<n;i++)
742:     {
743:       if (lhs[i]!=0.0)
744:         {rsum[0]+=1.0/lhs[i]; rsum[1]+=lhs[i];}
745:     }
746:   grop_hc(rsum,rw,2,op,level);
747:   rsum[0]+=0.1;
748:   rsum[1]+=0.1;
749:   /*  if (!my_id)
750:     {
751:       printf("xxt n unique = %d (%g)\n",(int) rsum[0], rsum[0]);
752:       printf("xxt n shared = %d (%g)\n",(int) rsum[1], rsum[1]);
753:       }*/

755:   if (fabs(rsum[0]-rsum[1])>EPS)
756:     {shared=TRUE;}

758:   xxt_handle->info->n_global=xxt_handle->info->m_global=(int) rsum[0];
759:   xxt_handle->mvi->n_global =xxt_handle->mvi->m_global =(int) rsum[0];

761:   /* determine separator sets top down */
762:   if (shared)
763:     {
764:       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
765:         {
766:           /* set rsh of hc, fire, and collect lhs responses */
767:           (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
768:           gs_gop_hc(gs_handle,lhs,"+\0",edge);
769: 
770:           /* set lsh of hc, fire, and collect rhs responses */
771:           (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
772:           gs_gop_hc(gs_handle,rhs,"+\0",edge);
773: 
774:           for (i=0;i<n;i++)
775:             {
776:               if (id< mask)
777:                 {
778:                   if (lhs[i]!=0.0)
779:                     {lhs[i]=1.0;}
780:                 }
781:               if (id>=mask)
782:                 {
783:                   if (rhs[i]!=0.0)
784:                     {rhs[i]=1.0;}
785:                 }
786:             }

788:           if (id< mask)
789:             {gs_gop_hc(gs_handle,lhs,"+\0",edge-1);}
790:           else
791:             {gs_gop_hc(gs_handle,rhs,"+\0",edge-1);}

793:           /* count number of dofs I own that have signal and not in sep set */
794:           rvec_zero(rsum,4);
795:           for (ivec_zero(sum,4),ct=i=0;i<n;i++)
796:             {
797:               if (!used[i])
798:                 {
799:                   /* number of unmarked dofs on node */
800:                   ct++;
801:                   /* number of dofs to be marked on lhs hc */
802:                   if (id< mask)
803:                     {
804:                       if (lhs[i]!=0.0)
805:                         {sum[0]++; rsum[0]+=1.0/lhs[i];}
806:                     }
807:                   /* number of dofs to be marked on rhs hc */
808:                   if (id>=mask)
809:                     {
810:                       if (rhs[i]!=0.0)
811:                         {sum[1]++; rsum[1]+=1.0/rhs[i];}
812:                     }
813:                 }
814:             }

816:           /* go for load balance - choose half with most unmarked dofs, bias LHS */
817:           (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
818:           (id<mask) ? (rsum[2]=ct) : (rsum[3]=ct);
819:           giop_hc(sum,w,4,op,edge);
820:           grop_hc(rsum,rw,4,op,edge);
821:           rsum[0]+=0.1; rsum[1]+=0.1; rsum[2]+=0.1; rsum[3]+=0.1;

823:           if (id<mask)
824:             {
825:               /* mark dofs I own that have signal and not in sep set */
826:               for (ct=i=0;i<n;i++)
827:                 {
828:                   if ((!used[i])&&(lhs[i]!=0.0))
829:                     {
830:                       ct++; nfo++;

832:                       if (nfo>n)
833:                         {error_msg_fatal("nfo about to exceed n\n");}

835:                       *--iptr = local2global[i];
836:                       used[i]=edge;
837:                     }
838:                 }
839:               if (ct>1) {ivec_sort(iptr,ct);}

841:               lnsep[edge]=ct;
842:               nsep[edge]=(int) rsum[0];
843:               dir [edge]=LEFT;
844:             }

846:           if (id>=mask)
847:             {
848:               /* mark dofs I own that have signal and not in sep set */
849:               for (ct=i=0;i<n;i++)
850:                 {
851:                   if ((!used[i])&&(rhs[i]!=0.0))
852:                     {
853:                       ct++; nfo++;

855:                       if (nfo>n)
856:                         {error_msg_fatal("nfo about to exceed n\n");}

858:                       *--iptr = local2global[i];
859:                       used[i]=edge;
860:                     }
861:                 }
862:               if (ct>1) {ivec_sort(iptr,ct);}

864:               lnsep[edge]=ct;
865:               nsep[edge]= (int) rsum[1];
866:               dir [edge]=RIGHT;
867:             }

869:           /* LATER or we can recur on these to order seps at this level */
870:           /* do we need full set of separators for this?                */

872:           /* fold rhs hc into lower */
873:           if (id>=mask)
874:             {id-=mask;}
875:         }
876:     }
877:   else
878:     {
879:       for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1)
880:         {
881:           /* set rsh of hc, fire, and collect lhs responses */
882:           (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m);
883:           gs_gop_hc(gs_handle,lhs,"+\0",edge);

885:           /* set lsh of hc, fire, and collect rhs responses */
886:           (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m);
887:           gs_gop_hc(gs_handle,rhs,"+\0",edge);

889:           /* count number of dofs I own that have signal and not in sep set */
890:           for (ivec_zero(sum,4),ct=i=0;i<n;i++)
891:             {
892:               if (!used[i])
893:                 {
894:                   /* number of unmarked dofs on node */
895:                   ct++;
896:                   /* number of dofs to be marked on lhs hc */
897:                   if ((id< mask)&&(lhs[i]!=0.0)) {sum[0]++;}
898:                   /* number of dofs to be marked on rhs hc */
899:                   if ((id>=mask)&&(rhs[i]!=0.0)) {sum[1]++;}
900:                 }
901:             }

903:           /* go for load balance - choose half with most unmarked dofs, bias LHS */
904:           (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
905:           giop_hc(sum,w,4,op,edge);

907:           /* lhs hc wins */
908:           if (sum[2]>=sum[3])
909:             {
910:               if (id<mask)
911:                 {
912:                   /* mark dofs I own that have signal and not in sep set */
913:                   for (ct=i=0;i<n;i++)
914:                     {
915:                       if ((!used[i])&&(lhs[i]!=0.0))
916:                         {
917:                           ct++; nfo++;
918:                           *--iptr = local2global[i];
919:                           used[i]=edge;
920:                         }
921:                     }
922:                   if (ct>1) {ivec_sort(iptr,ct);}
923:                   lnsep[edge]=ct;
924:                 }
925:               nsep[edge]=sum[0];
926:               dir [edge]=LEFT;
927:             }
928:           /* rhs hc wins */
929:           else
930:             {
931:               if (id>=mask)
932:                 {
933:                   /* mark dofs I own that have signal and not in sep set */
934:                   for (ct=i=0;i<n;i++)
935:                     {
936:                       if ((!used[i])&&(rhs[i]!=0.0))
937:                         {
938:                           ct++; nfo++;
939:                           *--iptr = local2global[i];
940:                           used[i]=edge;
941:                         }
942:                     }
943:                   if (ct>1) {ivec_sort(iptr,ct);}
944:                   lnsep[edge]=ct;
945:                 }
946:               nsep[edge]=sum[1];
947:               dir [edge]=RIGHT;
948:             }
949:           /* LATER or we can recur on these to order seps at this level */
950:           /* do we need full set of separators for this?                */

952:           /* fold rhs hc into lower */
953:           if (id>=mask)
954:             {id-=mask;}
955:         }
956:     }


959:   /* level 0 is on processor case - so mark the remainder */
960:   for (ct=i=0;i<n;i++)
961:     {
962:       if (!used[i])
963:         {
964:           ct++; nfo++;
965:           *--iptr = local2global[i];
966:           used[i]=edge;
967:         }
968:     }
969:   if (ct>1) {ivec_sort(iptr,ct);}
970:   lnsep[edge]=ct;
971:   nsep [edge]=ct;
972:   dir  [edge]=LEFT;

974:   xxt_handle->info->nsep=nsep;
975:   xxt_handle->info->lnsep=lnsep;
976:   xxt_handle->info->fo=fo;
977:   xxt_handle->info->nfo=nfo;

979:   free(dir);
980:   free(lhs);
981:   free(rhs);
982:   free(used);

984: }


987: /*************************************xxt.c************************************
988: Function: set_mvi

990: Input :
991: Output:
992: Return:
993: Description:
994: **************************************xxt.c***********************************/
995: static
996: mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data)
997: {
998:   mv_info *mvi;


1001:   mvi = (mv_info*)malloc(sizeof(mv_info));
1002:   mvi->n=n;
1003:   mvi->m=m;
1004:   mvi->n_global=-1;
1005:   mvi->m_global=-1;
1006:   mvi->local2global=(int*)malloc((m+1)*sizeof(PetscInt));
1007:   ivec_copy(mvi->local2global,local2global,m);
1008:   mvi->local2global[m] = INT_MAX;
1009:   mvi->matvec=(PetscErrorCode (*)(mv_info*,PetscScalar*,PetscScalar*))matvec;
1010:   mvi->grid_data=grid_data;

1012:   /* set xxt communication handle to perform restricted matvec */
1013:   mvi->gs_handle = gs_init(local2global, m, num_nodes);

1015:   return(mvi);
1016: }


1019: /*************************************xxt.c************************************
1020: Function: set_mvi

1022: Input :
1023: Output:
1024: Return:
1025: Description:

1027:       computes u = A.v 
1028:       do_matvec(xxt_handle->mvi,v,u);
1029: **************************************xxt.c***********************************/
1030: static
1031: void do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u)
1032: {
1033:   A->matvec((mv_info*)A->grid_data,v,u);
1034: }