Actual source code: nn.c

  1: #define PETSCKSP_DLL

 3:  #include src/ksp/pc/impls/is/nn/nn.h

  5: /* -------------------------------------------------------------------------- */
  6: /*
  7:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  8:                     by setting data structures and options.   

 10:    Input Parameter:
 11: .  pc - the preconditioner context

 13:    Application Interface Routine: PCSetUp()

 15:    Notes:
 16:    The interface routine PCSetUp() is not usually called directly by
 17:    the user, but instead is called by PCApply() if necessary.
 18: */
 21: static PetscErrorCode PCSetUp_NN(PC pc)
 22: {
 24: 
 26:   if (!pc->setupcalled) {
 27:     /* Set up all the "iterative substructuring" common block */
 28:     PCISSetUp(pc);
 29:     /* Create the coarse matrix. */
 30:     PCNNCreateCoarseMatrix(pc);
 31:   }
 32:   return(0);
 33: }

 35: /* -------------------------------------------------------------------------- */
 36: /*
 37:    PCApply_NN - Applies the NN preconditioner to a vector.

 39:    Input Parameters:
 40: .  pc - the preconditioner context
 41: .  r - input vector (global)

 43:    Output Parameter:
 44: .  z - output vector (global)

 46:    Application Interface Routine: PCApply()
 47:  */
 50: static PetscErrorCode PCApply_NN(PC pc,Vec r,Vec z)
 51: {
 52:   PC_IS          *pcis = (PC_IS*)(pc->data);
 54:   PetscScalar    m_one = -1.0;
 55:   Vec            w = pcis->vec1_global;

 58:   /*
 59:     Dirichlet solvers.
 60:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 61:     Storing the local results at vec2_D
 62:   */
 63:   VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 64:   VecScatterEnd  (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
 65:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 66: 
 67:   /*
 68:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 69:     Storing the result in the interface portion of the global vector w.
 70:   */
 71:   MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
 72:   VecScale(pcis->vec1_B,m_one);
 73:   VecCopy(r,w);
 74:   VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
 75:   VecScatterEnd  (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);

 77:   /*
 78:     Apply the interface preconditioner
 79:   */
 80:   PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
 81:                                           pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);

 83:   /*
 84:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 85:     The result is stored in vec1_D.
 86:   */
 87:   VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 88:   VecScatterEnd  (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
 89:   MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);

 91:   /*
 92:     Dirichlet solvers.
 93:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 94:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 95:   */
 96:   VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 97:   VecScatterEnd  (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
 98:   KSPSolve(pcis->ksp_D,pcis->vec1_D,pcis->vec2_D);
 99:   VecScale(pcis->vec2_D,m_one);
100:   VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
101:   VecScatterEnd  (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
102:   return(0);
103: }

105: /* -------------------------------------------------------------------------- */
106: /*
107:    PCDestroy_NN - Destroys the private context for the NN preconditioner
108:    that was created with PCCreate_NN().

110:    Input Parameter:
111: .  pc - the preconditioner context

113:    Application Interface Routine: PCDestroy()
114: */
117: static PetscErrorCode PCDestroy_NN(PC pc)
118: {
119:   PC_NN          *pcnn = (PC_NN*)pc->data;

123:   PCISDestroy(pc);

125:   if (pcnn->coarse_mat)  {MatDestroy(pcnn->coarse_mat);}
126:   if (pcnn->coarse_x)    {VecDestroy(pcnn->coarse_x);}
127:   if (pcnn->coarse_b)    {VecDestroy(pcnn->coarse_b);}
128:   if (pcnn->ksp_coarse) {KSPDestroy(pcnn->ksp_coarse);}
129:   if (pcnn->DZ_IN) {
130:     if (pcnn->DZ_IN[0]) {PetscFree(pcnn->DZ_IN[0]);}
131:     PetscFree(pcnn->DZ_IN);
132:   }

134:   /*
135:       Free the private data structure that was hanging off the PC
136:   */
137:   PetscFree(pcnn);
138:   return(0);
139: }

141: /* -------------------------------------------------------------------------- */
142: /*MC
143:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

145:    Options Database Keys:
146: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
147:                                        (this skips the first coarse grid solve in the preconditioner)
148: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
149:                                        (this skips the second coarse grid solve in the preconditioner)
150: .    -pc_is_damp_fixed <fact> -
151: .    -pc_is_remove_nullspace_fixed -
152: .    -pc_is_set_damping_factor_floating <fact> -
153: .    -pc_is_not_damp_floating -
154: +    -pc_is_not_remove_nullspace_floating - 

156:    Level: intermediate

158:    Notes: The matrix used with this preconditioner must be of type MATIS 

160:           Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
161:           degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
162:           on the subdomains; though in our experience using approximate solvers is slower.).

164:           Options for the coarse grid preconditioner can be set with -nn_coarse_pc_xxx
165:           Options for the Dirichlet subproblem preconditioner can be set with -is_localD_pc_xxx
166:           Options for the Neumann subproblem preconditioner can be set with -is_localN_pc_xxx

168:    Contributed by Paulo Goldfeld

170: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,  MatIS
171: M*/
175: PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_NN(PC pc)
176: {
178:   PC_NN          *pcnn;

181:   /*
182:      Creates the private data structure for this preconditioner and
183:      attach it to the PC object.
184:   */
185:   PetscNew(PC_NN,&pcnn);
186:   pc->data  = (void*)pcnn;

188:   /*
189:      Logs the memory usage; this is not needed but allows PETSc to 
190:      monitor how much memory is being used for various purposes.
191:   */
192:   PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */

194:   PCISCreate(pc);
195:   pcnn->coarse_mat  = 0;
196:   pcnn->coarse_x    = 0;
197:   pcnn->coarse_b    = 0;
198:   pcnn->ksp_coarse = 0;
199:   pcnn->DZ_IN       = 0;

201:   /*
202:       Set the pointers for the functions that are provided above.
203:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
204:       are called, they will automatically call these functions.  Note we
205:       choose not to provide a couple of these functions since they are
206:       not needed.
207:   */
208:   pc->ops->apply               = PCApply_NN;
209:   pc->ops->applytranspose      = 0;
210:   pc->ops->setup               = PCSetUp_NN;
211:   pc->ops->destroy             = PCDestroy_NN;
212:   pc->ops->view                = 0;
213:   pc->ops->applyrichardson     = 0;
214:   pc->ops->applysymmetricleft  = 0;
215:   pc->ops->applysymmetricright = 0;
216:   return(0);
217: }


221: /* -------------------------------------------------------------------------- */
222: /*
223:    PCNNCreateCoarseMatrix - 
224: */
227: PetscErrorCode PCNNCreateCoarseMatrix (PC pc)
228: {
229:   MPI_Request    *send_request, *recv_request;
231:   PetscInt       i, j, k;
232:   PetscScalar*   mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
233:   PetscScalar**  DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

235:   /* aliasing some names */
236:   PC_IS*         pcis     = (PC_IS*)(pc->data);
237:   PC_NN*         pcnn     = (PC_NN*)pc->data;
238:   PetscInt       n_neigh  = pcis->n_neigh;
239:   PetscInt*      neigh    = pcis->neigh;
240:   PetscInt*      n_shared = pcis->n_shared;
241:   PetscInt**     shared   = pcis->shared;
242:   PetscScalar**  DZ_IN;   /* Must be initialized after memory allocation. */

245:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
246:   PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);

248:   /* Allocate memory for DZ */
249:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
250:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
251:   {
252:     PetscInt size_of_Z = 0;
253:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
254:     DZ_IN = pcnn->DZ_IN;
255:     PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
256:     for (i=0; i<n_neigh; i++) {
257:       size_of_Z += n_shared[i];
258:     }
259:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
260:     PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
261:   }
262:   for (i=1; i<n_neigh; i++) {
263:     DZ_IN[i]  = DZ_IN [i-1] + n_shared[i-1];
264:     DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
265:   }

267:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
268:   /* First, set the auxiliary array pcis->work_N. */
269:   PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
270:   for (i=1; i<n_neigh; i++){
271:     for (j=0; j<n_shared[i]; j++) {
272:       DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
273:     }
274:   }

276:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
277:   /* Notice that send_request[] and recv_request[] could have one less element. */
278:   /* We make them longer to have request[i] corresponding to neigh[i].          */
279:   {
280:     PetscMPIInt tag;
281:     PetscObjectGetNewTag((PetscObject)pc,&tag);
282:     PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
283:     recv_request = send_request + (n_neigh);
284:     for (i=1; i<n_neigh; i++) {
285:       MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
286:       MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
287:     }
288:   }

290:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
291:   for(j=0; j<n_shared[0]; j++) {
292:     DZ_IN[0][j] = pcis->work_N[shared[0][j]];
293:   }

295:   /* Start computing with local D*Z while communication goes on.    */
296:   /* Apply Schur complement. The result is "stored" in vec (more    */
297:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
298:   /* and also scattered to pcnn->work_N.                            */
299:   PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
300:                                pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);

302:   /* Compute the first column, while completing the receiving. */
303:   for (i=0; i<n_neigh; i++) {
304:     MPI_Status  stat;
305:     PetscMPIInt ind=0;
306:     if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
307:     mat[ind*n_neigh+0] = 0.0;
308:     for (k=0; k<n_shared[ind]; k++) {
309:       mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
310:     }
311:   }

313:   /* Compute the remaining of the columns */
314:   for (j=1; j<n_neigh; j++) {
315:     PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
316:                                  pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
317:     for (i=0; i<n_neigh; i++) {
318:       mat[i*n_neigh+j] = 0.0;
319:       for (k=0; k<n_shared[i]; k++) {
320:         mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
321:       }
322:     }
323:   }

325:   /* Complete the sending. */
326:   if (n_neigh>1) {
327:     MPI_Status *stat;
328:     PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
329:     if (n_neigh-1) {MPI_Waitall(n_neigh-1,&(send_request[1]),stat);}
330:     PetscFree(stat);
331:   }

333:   /* Free the memory for the MPI requests */
334:   PetscFree(send_request);

336:   /* Free the memory for DZ_OUT */
337:   if (DZ_OUT) {
338:     if (DZ_OUT[0]) { PetscFree(DZ_OUT[0]); }
339:     PetscFree(DZ_OUT);
340:   }

342:   {
343:     PetscMPIInt size;
344:     MPI_Comm_size(pc->comm,&size);
345:     /* Create the global coarse vectors (rhs and solution). */
346:     VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
347:     VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
348:     /* Create and set the global coarse AIJ matrix. */
349:     MatCreate(pc->comm,&(pcnn->coarse_mat));
350:     MatSetSizes(pcnn->coarse_mat,1,1,size,size);
351:     MatSetType(pcnn->coarse_mat,MATAIJ);
352:     MatSeqAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL);
353:     MatMPIAIJSetPreallocation(pcnn->coarse_mat,1,PETSC_NULL,1,PETSC_NULL);
354:     MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
355:     MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
356:     MatAssemblyEnd  (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
357:   }

359:   {
360:     PetscMPIInt rank;
361:     PetscScalar one = 1.0;
362:     MPI_Comm_rank(pc->comm,&rank);
363:     /* "Zero out" rows of not-purely-Neumann subdomains */
364:     if (pcis->pure_neumann) {  /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
365:       MatZeroRows(pcnn->coarse_mat,0,PETSC_NULL,one);
366:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
367:       PetscInt row = (PetscInt) rank;
368:       MatZeroRows(pcnn->coarse_mat,1,&row,one);
369:     }
370:   }

372:   /* Create the coarse linear solver context */
373:   {
374:     PC  pc_ctx, inner_pc;
375:     KSPCreate(pc->comm,&pcnn->ksp_coarse);
376:     KSPSetOperators(pcnn->ksp_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
377:     KSPGetPC(pcnn->ksp_coarse,&pc_ctx);
378:     PCSetType(pc_ctx,PCREDUNDANT);
379:     KSPSetType(pcnn->ksp_coarse,KSPPREONLY);
380:     PCRedundantGetPC(pc_ctx,&inner_pc);
381:     PCSetType(inner_pc,PCLU);
382:     KSPSetOptionsPrefix(pcnn->ksp_coarse,"nn_coarse_");
383:     KSPSetFromOptions(pcnn->ksp_coarse);
384:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
385:     KSPSetUp(pcnn->ksp_coarse);
386:   }

388:   /* Free the memory for mat */
389:   PetscFree(mat);

391:   /* for DEBUGGING, save the coarse matrix to a file. */
392:   {
393:     PetscTruth flg;
394:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_save_coarse_matrix",&flg);
395:     if (flg) {
396:       PetscViewer viewer;
397:       PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
398:       PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
399:       MatView(pcnn->coarse_mat,viewer);
400:       PetscViewerDestroy(viewer);
401:     }
402:   }

404:   /*  Set the variable pcnn->factor_coarse_rhs. */
405:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

407:   /* See historical note 02, at the bottom of this file. */
408:   return(0);
409: }

411: /* -------------------------------------------------------------------------- */
412: /*
413:    PCNNApplySchurToChunk - 

415:    Input parameters:
416: .  pcnn
417: .  n - size of chunk
418: .  idx - indices of chunk
419: .  chunk - values

421:    Output parameters:
422: .  array_N - result of Schur complement applied to chunk, scattered to big array
423: .  vec1_B  - result of Schur complement applied to chunk
424: .  vec2_B  - garbage (used as work space)
425: .  vec1_D  - garbage (used as work space)
426: .  vec2_D  - garbage (used as work space)

428: */
431: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
432: {
434:   PetscInt       i;
435:   PC_IS          *pcis = (PC_IS*)(pc->data);

438:   PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
439:   for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
440:   PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
441:   PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
442:   PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
443:   return(0);
444: }

446: /* -------------------------------------------------------------------------- */
447: /*
448:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e., 
449:                                       the preconditioner for the Schur complement.

451:    Input parameter:
452: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

454:    Output parameters:
455: .  z - global vector of interior and interface nodes. The values on the interface are the result of
456:        the application of the interface preconditioner to the interface part of r. The values on the
457:        interior nodes are garbage.
458: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
459: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
460: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
461: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
462: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
463: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
464: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
465: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

467: */
470: PetscErrorCode PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
471:                                       Vec vec2_D, Vec vec1_N, Vec vec2_N)
472: {
474:   PC_IS*         pcis = (PC_IS*)(pc->data);

477:   /*
478:     First balancing step.
479:   */
480:   {
481:     PetscTruth flg;
482:     PetscOptionsHasName(PETSC_NULL,"-pc_nn_turn_off_first_balancing",&flg);
483:     if (!flg) {
484:       PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
485:     } else {
486:       VecCopy(r,z);
487:     }
488:   }

490:   /*
491:     Extract the local interface part of z and scale it by D 
492:   */
493:   VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
494:   VecScatterEnd  (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
495:   VecPointwiseMult(vec2_B,pcis->D,vec1_B);

497:   /* Neumann Solver */
498:   PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);

500:   /*
501:     Second balancing step.
502:   */
503:   {
504:     PetscTruth flg;
505:     PetscOptionsHasName(PETSC_NULL,"-pc_turn_off_second_balancing",&flg);
506:     if (!flg) {
507:       PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
508:     } else {
509:       PetscScalar zero = 0.0;
510:       VecPointwiseMult(vec2_B,pcis->D,vec1_B);
511:       VecSet(z,zero);
512:       VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
513:       VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
514:     }
515:   }
516:   return(0);
517: }

519: /* -------------------------------------------------------------------------- */
520: /*
521:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
522:                    input argument u is provided), or s, as given in equations
523:                    (12) and (13), if the input argument u is a null vector.
524:                    Notice that the input argument u plays the role of u_i in
525:                    equation (14). The equation numbers refer to [Man93].

527:    Input Parameters:
528: .  pcnn - NN preconditioner context.
529: .  r - MPI vector of all nodes (interior and interface). It's preserved.
530: .  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

532:    Output Parameters:
533: .  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
534: .  vec1_B - Sequential vector of local interface nodes. Workspace.
535: .  vec2_B - Sequential vector of local interface nodes. Workspace.
536: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
537: .  vec1_D - Sequential vector of local interior nodes. Workspace.
538: .  vec2_D - Sequential vector of local interior nodes. Workspace.
539: .  work_N - Array of all local nodes (interior and interface). Workspace.

541: */
544: PetscErrorCode PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
545:                    Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
546: {
548:   PetscInt       k;
549:   PetscScalar    zero     =  0.0;
550:   PetscScalar    m_one    = -1.0;
551:   PetscScalar    value;
552:   PetscScalar*   lambda;
553:   PC_NN*         pcnn     = (PC_NN*)(pc->data);
554:   PC_IS*         pcis     = (PC_IS*)(pc->data);

557:   PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
558:   if (u) {
559:     if (!vec3_B) { vec3_B = u; }
560:     VecPointwiseMult(vec1_B,pcis->D,u);
561:     VecSet(z,zero);
562:     VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
563:     VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
564:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
565:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
566:     PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
567:     VecScale(vec3_B,m_one);
568:     VecCopy(r,z);
569:     VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
570:     VecScatterEnd  (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
571:   } else {
572:     VecCopy(r,z);
573:   }
574:   VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
575:   VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
576:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
577:   for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
578:   value *= pcnn->factor_coarse_rhs;  /* This factor is set in CreateCoarseMatrix(). */
579:   {
580:     PetscMPIInt rank;
581:     MPI_Comm_rank(pc->comm,&rank);
582:     VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
583:     /*
584:        Since we are only inserting local values (one value actually) we don't need to do the 
585:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
586:        VecAssemblyBegin(pcnn->coarse_b); 
587:        VecAssemblyEnd  (pcnn->coarse_b);
588:     */
589:   }
590:   KSPSolve(pcnn->ksp_coarse,pcnn->coarse_b,pcnn->coarse_x);
591:   if (!u) { VecScale(pcnn->coarse_x,m_one); }
592:   VecGetArray(pcnn->coarse_x,&lambda);
593:   for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
594:   VecRestoreArray(pcnn->coarse_x,&lambda);
595:   PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
596:   VecSet(z,zero);
597:   VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
598:   VecScatterEnd  (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
599:   if (!u) {
600:     VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
601:     VecScatterEnd  (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
602:     PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
603:     VecCopy(r,z);
604:   }
605:   VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
606:   VecScatterEnd  (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
607:   PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
608:   return(0);
609: }




615: /*  -------   E N D   O F   T H E   C O D E   -------  */
616: /*                                                     */
617: /*  From now on, "footnotes" (or "historical notes").  */
618: /*                                                     */
619: /*  -------------------------------------------------  */



623: /* --------------------------------------------------------------------------
624:    Historical note 01 
625:    -------------------------------------------------------------------------- */
626: /*
627:    We considered the possibility of an alternative D_i that would still
628:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
629:    The basic principle was still the pseudo-inverse of the counting
630:    function; the difference was that we would not count subdomains
631:    that do not contribute to the coarse space (i.e., not pure-Neumann
632:    subdomains).

634:    This turned out to be a bad idea:  we would solve trivial Neumann
635:    problems in the not pure-Neumann subdomains, since we would be scaling
636:    the balanced residual by zero.
637: */




642: /* --------------------------------------------------------------------------
643:    Historical note 02 
644:    -------------------------------------------------------------------------- */
645: /*
646:    We tried an alternative coarse problem, that would eliminate exactly a
647:    constant error. Turned out not to improve the overall convergence.
648: */