Actual source code: nn.c
1: /*$Id: nn.c,v 1.13 2001/08/07 03:03:41 balay Exp $*/
3: #include src/sles/pc/impls/is/nn/nn.h
5: /* -------------------------------------------------------------------------- */
6: /*
7: PCSetUp_NN - Prepares for the use of the NN preconditioner
8: by setting data structures and options.
10: Input Parameter:
11: . pc - the preconditioner context
13: Application Interface Routine: PCSetUp()
15: Notes:
16: The interface routine PCSetUp() is not usually called directly by
17: the user, but instead is called by PCApply() if necessary.
18: */
19: static int PCSetUp_NN(PC pc)
20: {
22:
24: if (!pc->setupcalled) {
25: /* Set up all the "iterative substructuring" common block */
26: PCISSetUp(pc);
27: /* Create the coarse matrix. */
28: PCNNCreateCoarseMatrix(pc);
29: }
30: return(0);
31: }
33: /* -------------------------------------------------------------------------- */
34: /*
35: PCApply_NN - Applies the NN preconditioner to a vector.
37: Input Parameters:
38: . pc - the preconditioner context
39: . r - input vector (global)
41: Output Parameter:
42: . z - output vector (global)
44: Application Interface Routine: PCApply()
45: */
46: static int PCApply_NN(PC pc,Vec r,Vec z)
47: {
48: PC_IS *pcis = (PC_IS*)(pc->data);
49: int ierr,its;
50: PetscScalar m_one = -1.0;
51: Vec w = pcis->vec1_global;
55: /*
56: Dirichlet solvers.
57: Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
58: Storing the local results at vec2_D
59: */
60: VecScatterBegin(r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
61: VecScatterEnd (r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_D);
62: SLESSolve(pcis->sles_D,pcis->vec1_D,pcis->vec2_D,&its);
63:
64: /*
65: Computing $ r_B - sum_j tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
66: Storing the result in the interface portion of the global vector w.
67: */
68: MatMult(pcis->A_BI,pcis->vec2_D,pcis->vec1_B);
69: VecScale(&m_one,pcis->vec1_B);
70: VecCopy(r,w);
71: VecScatterBegin(pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
72: VecScatterEnd (pcis->vec1_B,w,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
74: /*
75: Apply the interface preconditioner
76: */
77: PCNNApplyInterfacePreconditioner(pc,w,z,pcis->work_N,pcis->vec1_B,pcis->vec2_B,pcis->vec3_B,pcis->vec1_D,
78: pcis->vec3_D,pcis->vec1_N,pcis->vec2_N);
80: /*
81: Computing $ t_I^{(i)} = A_{IB}^{(i)} tilde R_i z_B $
82: The result is stored in vec1_D.
83: */
84: VecScatterBegin(z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
85: VecScatterEnd (z,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
86: MatMult(pcis->A_IB,pcis->vec1_B,pcis->vec1_D);
88: /*
89: Dirichlet solvers.
90: Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
91: $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
92: */
93: VecScatterBegin(pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
94: VecScatterEnd (pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE,pcis->global_to_D);
95: SLESSolve(pcis->sles_D,pcis->vec1_D,pcis->vec2_D,&its);
96: VecScale(&m_one,pcis->vec2_D);
97: VecScatterBegin(pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
98: VecScatterEnd (pcis->vec2_D,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_D);
100: return(0);
101: }
103: /* -------------------------------------------------------------------------- */
104: /*
105: PCDestroy_NN - Destroys the private context for the NN preconditioner
106: that was created with PCCreate_NN().
108: Input Parameter:
109: . pc - the preconditioner context
111: Application Interface Routine: PCDestroy()
112: */
113: static int PCDestroy_NN(PC pc)
114: {
115: PC_NN *pcnn = (PC_NN*)pc->data;
116: int ierr;
120: PCISDestroy(pc);
122: if (pcnn->coarse_mat) {MatDestroy(pcnn->coarse_mat);}
123: if (pcnn->coarse_x) {VecDestroy(pcnn->coarse_x);}
124: if (pcnn->coarse_b) {VecDestroy(pcnn->coarse_b);}
125: if (pcnn->sles_coarse) {SLESDestroy(pcnn->sles_coarse);}
126: if (pcnn->DZ_IN) {
127: if (pcnn->DZ_IN[0]) {PetscFree(pcnn->DZ_IN[0]);}
128: PetscFree(pcnn->DZ_IN);
129: }
131: /*
132: Free the private data structure that was hanging off the PC
133: */
134: PetscFree(pcnn);
135: return(0);
136: }
138: /* -------------------------------------------------------------------------- */
139: /*
140: PCCreate_NN - Creates a NN preconditioner context, PC_NN,
141: and sets this as the private data within the generic preconditioning
142: context, PC, that was created within PCCreate().
144: Input Parameter:
145: . pc - the preconditioner context
147: Application Interface Routine: PCCreate()
148: */
149: EXTERN_C_BEGIN
150: int PCCreate_NN(PC pc)
151: {
152: int ierr;
153: PC_NN *pcnn;
157: /*
158: Creates the private data structure for this preconditioner and
159: attach it to the PC object.
160: */
161: ierr = PetscNew(PC_NN,&pcnn);
162: pc->data = (void*)pcnn;
164: /*
165: Logs the memory usage; this is not needed but allows PETSc to
166: monitor how much memory is being used for various purposes.
167: */
168: PetscLogObjectMemory(pc,sizeof(PC_NN)+sizeof(PC_IS)); /* Is this the right thing to do? */
170: PCISCreate(pc);
171: pcnn->coarse_mat = 0;
172: pcnn->coarse_x = 0;
173: pcnn->coarse_b = 0;
174: pcnn->sles_coarse = 0;
175: pcnn->DZ_IN = 0;
177: /*
178: Set the pointers for the functions that are provided above.
179: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
180: are called, they will automatically call these functions. Note we
181: choose not to provide a couple of these functions since they are
182: not needed.
183: */
184: pc->ops->apply = PCApply_NN;
185: pc->ops->applytranspose = 0;
186: pc->ops->setup = PCSetUp_NN;
187: pc->ops->destroy = PCDestroy_NN;
188: pc->ops->view = 0;
189: pc->ops->applyrichardson = 0;
190: pc->ops->applysymmetricleft = 0;
191: pc->ops->applysymmetricright = 0;
193: return(0);
194: }
195: EXTERN_C_END
198: /* -------------------------------------------------------------------------- */
199: /*
200: PCNNCreateCoarseMatrix -
201: */
202: int PCNNCreateCoarseMatrix (PC pc)
203: {
204: MPI_Request *send_request, *recv_request;
205: int i, j, k, ierr;
207: PetscScalar* mat; /* Sub-matrix with this subdomain's contribution to the coarse matrix */
208: PetscScalar** DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */
210: /* aliasing some names */
211: PC_IS* pcis = (PC_IS*)(pc->data);
212: PC_NN* pcnn = (PC_NN*)pc->data;
213: int n_neigh = pcis->n_neigh;
214: int* neigh = pcis->neigh;
215: int* n_shared = pcis->n_shared;
216: int** shared = pcis->shared;
217: PetscScalar** DZ_IN; /* Must be initialized after memory allocation. */
221: /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
222: PetscMalloc((n_neigh*n_neigh+1)*sizeof(PetscScalar),&mat);
224: /* Allocate memory for DZ */
225: /* Notice that DZ_OUT[0] is allocated some space that is never used. */
226: /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
227: {
228: int size_of_Z = 0;
229: ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&pcnn->DZ_IN);
230: DZ_IN = pcnn->DZ_IN;
231: ierr = PetscMalloc ((n_neigh+1)*sizeof(PetscScalar*),&DZ_OUT);
232: for (i=0; i<n_neigh; i++) {
233: size_of_Z += n_shared[i];
234: }
235: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_IN[0]);
236: PetscMalloc ((size_of_Z+1)*sizeof(PetscScalar),&DZ_OUT[0]);
237: }
238: for (i=1; i<n_neigh; i++) {
239: DZ_IN[i] = DZ_IN [i-1] + n_shared[i-1];
240: DZ_OUT[i] = DZ_OUT[i-1] + n_shared[i-1];
241: }
243: /* Set the values of DZ_OUT, in order to send this info to the neighbours */
244: /* First, set the auxiliary array pcis->work_N. */
245: PCISScatterArrayNToVecB(pcis->work_N,pcis->D,INSERT_VALUES,SCATTER_REVERSE,pc);
246: for (i=1; i<n_neigh; i++){
247: for (j=0; j<n_shared[i]; j++) {
248: DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
249: }
250: }
252: /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
253: /* Notice that send_request[] and recv_request[] could have one less element. */
254: /* We make them longer to have request[i] corresponding to neigh[i]. */
255: {
256: int tag;
257: PetscObjectGetNewTag((PetscObject)pc,&tag);
258: PetscMalloc((2*(n_neigh)+1)*sizeof(MPI_Request),&send_request);
259: recv_request = send_request + (n_neigh);
260: for (i=1; i<n_neigh; i++) {
261: MPI_Isend((void*)(DZ_OUT[i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(send_request[i]));
262: MPI_Irecv((void*)(DZ_IN [i]),n_shared[i],MPIU_SCALAR,neigh[i],tag,pc->comm,&(recv_request[i]));
263: }
264: }
266: /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
267: for(j=0; j<n_shared[0]; j++) {
268: DZ_IN[0][j] = pcis->work_N[shared[0][j]];
269: }
271: /* Start computing with local D*Z while communication goes on. */
272: /* Apply Schur complement. The result is "stored" in vec (more */
273: /* precisely, vec points to the result, stored in pc_nn->vec1_B) */
274: /* and also scattered to pcnn->work_N. */
275: PCNNApplySchurToChunk(pc,n_shared[0],shared[0],DZ_IN[0],pcis->work_N,pcis->vec1_B,
276: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
278: /* Compute the first column, while completing the receiving. */
279: for (i=0; i<n_neigh; i++) {
280: MPI_Status stat;
281: int ind=0;
282: if (i>0) { MPI_Waitany(n_neigh-1,recv_request+1,&ind,&stat); ind++;}
283: mat[ind*n_neigh+0] = 0.0;
284: for (k=0; k<n_shared[ind]; k++) {
285: mat[ind*n_neigh+0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
286: }
287: }
289: /* Compute the remaining of the columns */
290: for (j=1; j<n_neigh; j++) {
291: PCNNApplySchurToChunk(pc,n_shared[j],shared[j],DZ_IN[j],pcis->work_N,pcis->vec1_B,
292: pcis->vec2_B,pcis->vec1_D,pcis->vec2_D);
293: for (i=0; i<n_neigh; i++) {
294: mat[i*n_neigh+j] = 0.0;
295: for (k=0; k<n_shared[i]; k++) {
296: mat[i*n_neigh+j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
297: }
298: }
299: }
301: /* Complete the sending. */
302: if (n_neigh>1) {
303: MPI_Status *stat;
304: PetscMalloc((n_neigh-1)*sizeof(MPI_Status),&stat);
305: MPI_Waitall(n_neigh-1,&(send_request[1]),stat);
306: PetscFree(stat);
307: }
309: /* Free the memory for the MPI requests */
310: PetscFree(send_request);
312: /* Free the memory for DZ_OUT */
313: if (DZ_OUT) {
314: if (DZ_OUT[0]) { PetscFree(DZ_OUT[0]); }
315: PetscFree(DZ_OUT);
316: }
318: {
319: int size,n_neigh_m1;
320: MPI_Comm_size(pc->comm,&size);
321: n_neigh_m1 = (n_neigh) ? n_neigh-1 : 0;
322: /* Create the global coarse vectors (rhs and solution). */
323: VecCreateMPI(pc->comm,1,size,&(pcnn->coarse_b));
324: VecDuplicate(pcnn->coarse_b,&(pcnn->coarse_x));
325: /* Create and set the global coarse matrix. */
326: MatCreateMPIAIJ(pc->comm,1,1,size,size,1,PETSC_NULL,n_neigh_m1,PETSC_NULL,&(pcnn->coarse_mat));
327: MatSetValues(pcnn->coarse_mat,n_neigh,neigh,n_neigh,neigh,mat,ADD_VALUES);
328: MatAssemblyBegin(pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
329: MatAssemblyEnd (pcnn->coarse_mat,MAT_FINAL_ASSEMBLY);
330: }
332: {
333: int rank;
334: PetscScalar one = 1.0;
335: IS is;
336: MPI_Comm_rank(pc->comm,&rank);
337: /* "Zero out" rows of not-purely-Neumann subdomains */
338: if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
339: ISCreateStride(pc->comm,0,0,0,&is);
340: } else { /* here it DOES zero the row, since it's not a floating subdomain. */
341: ISCreateStride(pc->comm,1,rank,0,&is);
342: }
343: MatZeroRows(pcnn->coarse_mat,is,&one);
344: ISDestroy(is);
345: }
347: /* Create the coarse linear solver context */
348: {
349: PC pc_ctx, inner_pc;
350: KSP ksp_ctx;
351: SLESCreate(pc->comm,&pcnn->sles_coarse);
352: SLESSetOperators(pcnn->sles_coarse,pcnn->coarse_mat,pcnn->coarse_mat,SAME_PRECONDITIONER);
353: SLESGetKSP(pcnn->sles_coarse,&ksp_ctx);
354: SLESGetPC(pcnn->sles_coarse,&pc_ctx);
355: PCSetType(pc_ctx,PCREDUNDANT);
356: KSPSetType(ksp_ctx,KSPPREONLY);
357: PCRedundantGetPC(pc_ctx,&inner_pc);
358: PCSetType(inner_pc,PCLU);
359: SLESSetOptionsPrefix(pcnn->sles_coarse,"coarse_");
360: SLESSetFromOptions(pcnn->sles_coarse);
361: /* the vectors in the following line are dummy arguments, just telling the SLES the vector size. Values are not used */
362: SLESSetUp(pcnn->sles_coarse,pcnn->coarse_x,pcnn->coarse_b);
363: }
365: /* Free the memory for mat */
366: PetscFree(mat);
368: /* for DEBUGGING, save the coarse matrix to a file. */
369: {
370: PetscTruth flg;
371: PetscOptionsHasName(PETSC_NULL,"-save_coarse_matrix",&flg);
372: if (flg) {
373: PetscViewer viewer;
374: PetscViewerASCIIOpen(PETSC_COMM_WORLD,"coarse.m",&viewer);
375: PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
376: MatView(pcnn->coarse_mat,viewer);
377: PetscViewerDestroy(viewer);
378: }
379: }
381: /* Set the variable pcnn->factor_coarse_rhs. */
382: pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;
384: /* See historical note 02, at the bottom of this file. */
386: return(0);
387: }
389: /* -------------------------------------------------------------------------- */
390: /*
391: PCNNApplySchurToChunk -
393: Input parameters:
394: . pcnn
395: . n - size of chunk
396: . idx - indices of chunk
397: . chunk - values
399: Output parameters:
400: . array_N - result of Schur complement applied to chunk, scattered to big array
401: . vec1_B - result of Schur complement applied to chunk
402: . vec2_B - garbage (used as work space)
403: . vec1_D - garbage (used as work space)
404: . vec2_D - garbage (used as work space)
406: */
407: int PCNNApplySchurToChunk(PC pc, int n, int* idx, PetscScalar *chunk, PetscScalar* array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
408: {
409: int i, ierr;
410: PC_IS *pcis = (PC_IS*)(pc->data);
414: PetscMemzero((void*)array_N, pcis->n*sizeof(PetscScalar));
415: for (i=0; i<n; i++) { array_N[idx[i]] = chunk[i]; }
416: PCISScatterArrayNToVecB(array_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
417: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
418: PCISScatterArrayNToVecB(array_N,vec1_B,INSERT_VALUES,SCATTER_REVERSE,pc);
420: return(0);
421: }
423: /* -------------------------------------------------------------------------- */
424: /*
425: PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
426: the preconditioner for the Schur complement.
428: Input parameter:
429: . r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.
431: Output parameters:
432: . z - global vector of interior and interface nodes. The values on the interface are the result of
433: the application of the interface preconditioner to the interface part of r. The values on the
434: interior nodes are garbage.
435: . work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
436: . vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
437: . vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
438: . vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
439: . vec1_D - vector of local interior nodes; returns garbage (used as work space)
440: . vec2_D - vector of local interior nodes; returns garbage (used as work space)
441: . vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
442: . vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
444: */
445: int PCNNApplyInterfacePreconditioner (PC pc, Vec r, Vec z, PetscScalar* work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D,
446: Vec vec2_D, Vec vec1_N, Vec vec2_N)
447: {
448: int ierr;
449: PC_IS* pcis = (PC_IS*)(pc->data);
453: /*
454: First balancing step.
455: */
456: {
457: PetscTruth flg;
458: PetscOptionsHasName(PETSC_NULL,"-turn_off_first_balancing",&flg);
459: if (!flg) {
460: PCNNBalancing(pc,r,(Vec)0,z,vec1_B,vec2_B,(Vec)0,vec1_D,vec2_D,work_N);
461: } else {
462: VecCopy(r,z);
463: }
464: }
466: /*
467: Extract the local interface part of z and scale it by D
468: */
469: VecScatterBegin(z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
470: VecScatterEnd (z,vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
471: VecPointwiseMult(pcis->D,vec1_B,vec2_B);
473: /* Neumann Solver */
474: PCISApplyInvSchur(pc,vec2_B,vec1_B,vec1_N,vec2_N);
476: /*
477: Second balancing step.
478: */
479: {
480: PetscTruth flg;
481: PetscOptionsHasName(PETSC_NULL,"-turn_off_second_balancing",&flg);
482: if (!flg) {
483: PCNNBalancing(pc,r,vec1_B,z,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D,work_N);
484: } else {
485: PetscScalar zero = 0.0;
486: VecPointwiseMult(pcis->D,vec1_B,vec2_B);
487: VecSet(&zero,z);
488: VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
489: VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
490: }
491: }
493: return(0);
494: }
496: /* -------------------------------------------------------------------------- */
497: /*
498: PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
499: input argument u is provided), or s, as given in equations
500: (12) and (13), if the input argument u is a null vector.
501: Notice that the input argument u plays the role of u_i in
502: equation (14). The equation numbers refer to [Man93].
504: Input Parameters:
505: . pcnn - NN preconditioner context.
506: . r - MPI vector of all nodes (interior and interface). It's preserved.
507: . u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.
509: Output Parameters:
510: . z - MPI vector of interior and interface nodes. Returns s or z (see description above).
511: . vec1_B - Sequential vector of local interface nodes. Workspace.
512: . vec2_B - Sequential vector of local interface nodes. Workspace.
513: . vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
514: . vec1_D - Sequential vector of local interior nodes. Workspace.
515: . vec2_D - Sequential vector of local interior nodes. Workspace.
516: . work_N - Array of all local nodes (interior and interface). Workspace.
518: */
519: int PCNNBalancing (PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B,
520: Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
521: {
522: int k, ierr, its;
523: PetscScalar zero = 0.0;
524: PetscScalar m_one = -1.0;
525: PetscScalar value;
526: PetscScalar* lambda;
527: PC_NN* pcnn = (PC_NN*)(pc->data);
528: PC_IS* pcis = (PC_IS*)(pc->data);
531: PetscLogEventBegin(PC_ApplyCoarse,0,0,0,0);
533: if (u) {
534: if (!vec3_B) { vec3_B = u; }
535: VecPointwiseMult(pcis->D,u,vec1_B);
536: VecSet(&zero,z);
537: VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
538: VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
539: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
540: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
541: PCISApplySchur(pc,vec2_B,vec3_B,(Vec)0,vec1_D,vec2_D);
542: VecScale(&m_one,vec3_B);
543: VecCopy(r,z);
544: VecScatterBegin(vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
545: VecScatterEnd (vec3_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
546: } else {
547: VecCopy(r,z);
548: }
549: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
550: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
551: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_REVERSE,pc);
552: for (k=0, value=0.0; k<pcis->n_shared[0]; k++) { value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]]; }
553: value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
554: {
555: int rank;
556: MPI_Comm_rank(pc->comm,&rank);
557: VecSetValue(pcnn->coarse_b,rank,value,INSERT_VALUES);
558: /*
559: Since we are only inserting local values (one value actually) we don't need to do the
560: reduction that tells us there is no data that needs to be moved. Hence we comment out these
561: VecAssemblyBegin(pcnn->coarse_b);
562: VecAssemblyEnd (pcnn->coarse_b);
563: */
564: }
565: SLESSolve(pcnn->sles_coarse,pcnn->coarse_b,pcnn->coarse_x,&its);
566: if (!u) { VecScale(&m_one,pcnn->coarse_x); }
567: VecGetArray(pcnn->coarse_x,&lambda);
568: for (k=0; k<pcis->n_shared[0]; k++) { work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k]; }
569: VecRestoreArray(pcnn->coarse_x,&lambda);
570: PCISScatterArrayNToVecB(work_N,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pc);
571: VecSet(&zero,z);
572: VecScatterBegin(vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
573: VecScatterEnd (vec2_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
574: if (!u) {
575: VecScatterBegin(z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
576: VecScatterEnd (z,vec2_B,INSERT_VALUES,SCATTER_FORWARD,pcis->global_to_B);
577: PCISApplySchur(pc,vec2_B,vec1_B,(Vec)0,vec1_D,vec2_D);
578: VecCopy(r,z);
579: }
580: VecScatterBegin(vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
581: VecScatterEnd (vec1_B,z,ADD_VALUES,SCATTER_REVERSE,pcis->global_to_B);
582: PetscLogEventEnd(PC_ApplyCoarse,0,0,0,0);
584: return(0);
585: }
590: /* ------- E N D O F T H E C O D E ------- */
591: /* */
592: /* From now on, "footnotes" (or "historical notes"). */
593: /* */
594: /* ------------------------------------------------- */
597: #ifdef __HISTORICAL_NOTES___do_not_compile__
599: /* --------------------------------------------------------------------------
600: Historical note 01
601: -------------------------------------------------------------------------- */
602: /*
603: We considered the possibility of an alternative D_i that would still
604: provide a partition of unity (i.e., $ sum_i N_i D_i N_i^T = I $).
605: The basic principle was still the pseudo-inverse of the counting
606: function; the difference was that we would not count subdomains
607: that do not contribute to the coarse space (i.e., not pure-Neumann
608: subdomains).
610: This turned out to be a bad idea: we would solve trivial Neumann
611: problems in the not pure-Neumann subdomains, since we would be scaling
612: the balanced residual by zero.
613: */
615: {
616: PetscTruth flg;
617: PetscOptionsHasName(PETSC_NULL,"-pcnn_new_scaling",&flg);
618: if (flg) {
619: Vec counter;
620: PetscScalar one=1.0, zero=0.0;
621: VecDuplicate(pc->vec,&counter);
622: VecSet(&zero,counter);
623: if (pcnn->pure_neumann) {
624: VecSet(&one,pcnn->D);
625: } else {
626: VecSet(&zero,pcnn->D);
627: }
628: VecScatterBegin(pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
629: VecScatterEnd (pcnn->D,counter,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
630: VecScatterBegin(counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
631: VecScatterEnd (counter,pcnn->D,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
632: VecDestroy(counter);
633: if (pcnn->pure_neumann) {
634: VecReciprocal(pcnn->D);
635: } else {
636: VecSet(&zero,pcnn->D);
637: }
638: }
639: }
643: /* --------------------------------------------------------------------------
644: Historical note 02
645: -------------------------------------------------------------------------- */
646: /*
647: We tried an alternative coarse problem, that would eliminate exactly a
648: constant error. Turned out not to improve the overall convergence.
649: */
651: /* Set the variable pcnn->factor_coarse_rhs. */
652: {
653: PetscTruth flg;
654: PetscOptionsHasName(PETSC_NULL,"-enforce_preserving_constants",&flg);
655: if (!flg) { pcnn->factor_coarse_rhs = (pcnn->pure_neumann) ? 1.0 : 0.0; }
656: else {
657: PetscScalar zero = 0.0, one = 1.0;
658: VecSet(&one,pcnn->vec1_B);
659: ApplySchurComplement(pcnn,pcnn->vec1_B,pcnn->vec2_B,(Vec)0,pcnn->vec1_D,pcnn->vec2_D);
660: VecSet(&zero,pcnn->vec1_global);
661: VecScatterBegin(pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
662: VecScatterEnd (pcnn->vec2_B,pcnn->vec1_global,ADD_VALUES,SCATTER_REVERSE,pcnn->global_to_B);
663: VecScatterBegin(pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
664: VecScatterEnd (pcnn->vec1_global,pcnn->vec1_B,INSERT_VALUES,SCATTER_FORWARD,pcnn->global_to_B);
665: if (pcnn->pure_neumann) { pcnn->factor_coarse_rhs = 1.0; }
666: else {
667: ScatterArrayNToVecB(pcnn->work_N,pcnn->vec1_B,INSERT_VALUES,SCATTER_REVERSE,pcnn);
668: for (k=0, pcnn->factor_coarse_rhs=0.0; k<pcnn->n_shared[0]; k++) {
669: pcnn->factor_coarse_rhs += pcnn->work_N[pcnn->shared[0][k]] * pcnn->DZ_IN[0][k];
670: }
671: if (pcnn->factor_coarse_rhs) { pcnn->factor_coarse_rhs = 1.0 / pcnn->factor_coarse_rhs; }
672: else { SETERRQ(1,"Constants cannot be preserved. Remove "-enforce_preserving_constants" option."); }
673: }
674: }
675: }
677: #endif /* __HISTORICAL_NOTES___do_not_compile */