Actual source code: ml.c

  1: #ifdef PETSC_RCS_HEADER
  2: static char vcid[] = "$Id: mlNew.c,v 1.18 2001/01/27 21:32:36 knepley Exp $";
  3: #endif
  4: /*
  5:    Defines a multilevel preconditioner due to Vivek Sarin for AIJ matrices
  6: */
 7:  #include src/sles/pc/pcimpl.h
 8:  #include src/mesh/impls/triangular/triimpl.h
 9:  #include src/grid/gridimpl.h
 10: #include <fcntl.h>
 11: #if defined(PETSC_HAVE_UNISTD_H)
 12: #include <unistd.h>
 13: #endif
 14: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
 15:   #include <sys/procfs.h>
 16:   #include <sys/hwperftypes.h>
 17:   #include <sys/hwperfmacros.h>
 18: #endif
 19:  #include ml.h

 21: int PC_MLEvents[PC_ML_MAX_EVENTS];

 23: /*@C
 24:   PCMultiLevelInitializePackage - This function initializes everything in the ML package. It is called
 25:   from PetscDLLibraryRegister() when using dynamic libraries, and on the first call to PCCreate()
 26:   when using static libraries.

 28:   Input Parameter:
 29:   path - The dynamic library path, or PETSC_NULL

 31:   Level: developer

 33: .keywords: PC, multilevel, initialize, package
 34: .seealso: PetscInitialize()
 35: @*/
 36: int PCMultiLevelInitializePackage(char *path) {
 37:   static PetscTruth initialized = PETSC_FALSE;
 38:   int               ierr;

 41:   if (initialized == PETSC_TRUE) return(0);
 42:   initialized = PETSC_TRUE;
 43:   /* Register Classes */
 44:   /* Register Constructors and Serializers */
 45:   /* Register Events */
 46:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpInit],                        "MLSetUpInit",      PC_COOKIE);
 47:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrained],                 "MLSetUpConstr",    PC_COOKIE);
 48:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrainedBd],               "MLSetUpConstrBd",  PC_COOKIE);
 49:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpParallel],                    "MLSetUpParallel",  PC_COOKIE);
 50:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionMesh],              "MLRdPartMesh",     PC_COOKIE);
 51:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionRowCol],            "MLRdPartRowCol",   PC_COOKIE);
 52:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceFactor],                     "MLRdFactor",       PC_COOKIE);
 53:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGrad],                     "MLRdBdGrad",       PC_COOKIE);
 54:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradExtract],              "MLRdBdGradExtrct", PC_COOKIE);
 55:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradRowPartLocalToGlobal], "MLRdBdGrdRwPtL2G", PC_COOKIE);
 56:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceShrinkMesh],                 "MLRdShrinkMesh",   PC_COOKIE);
 57:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricLeftParallel],       "MLApplySymmLeftP", PC_COOKIE);
 58:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricRightParallel],      "MLApplySymmRghtP", PC_COOKIE);
 59:   PetscLogEventRegister(&PC_MLEvents[PC_ML_QRFactorization],                  "MLQRFact",         PC_COOKIE);
 60:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplyQR],                          "MLQRApply",        PC_COOKIE);
 61:   PetscLogEventRegister(&PC_MLEvents[PC_ML_CreateBdGrad],                     "MLCreateBdGrad",   PC_COOKIE);
 62:   /* Process info exclusions */
 63:   /* Process summary exclusions */
 64:   /* Add options checkers */
 65:   return(0);
 66: }

 68: int PCConvert_Multilevel(PC pc)
 69: {
 70:   PC_Multilevel *ml = (PC_Multilevel *) pc->data;
 71: #ifdef PETSC_HAVE_MATHEMATICA
 72:   int            ierr;
 73: #endif

 76:   /* Convert a Mathematica MLData object into a PC_Multilevel object */
 77:   if (ml->mathViewer != PETSC_NULL) {
 78: #ifdef PETSC_HAVE_MATHEMATICA
 79:     PetscViewerMathematicaMultiLevelConvert(ml->mathViewer, pc);
 80: #else
 81:     SETERRQ(PETSC_ERR_SUP, " ");
 82: #endif
 83:   }
 84:   return(0);
 85: }

 87: static int PCDestroyStructures_Multilevel(PC pc)
 88: {
 89:   PC_Multilevel *ml = (PC_Multilevel *) pc->data;
 90:   int            numProcs, numRows, numCols;
 91:   int            level, part;
 92:   int            ierr;

 95:   /* Destroy gradient structures */
 96:   MPI_Comm_size(pc->comm, &numProcs);
 97:   VarOrderingDestroy(ml->sOrder);
 98:   VarOrderingDestroy(ml->tOrder);
 99:   LocalVarOrderingDestroy(ml->sLocOrder);
100:   LocalVarOrderingDestroy(ml->tLocOrder);
101:   GMatDestroy(ml->B);
102:   if (ml->reduceSol != PETSC_NULL)
103:     {GVecDestroy(ml->reduceSol);                                                                   }
104:   if (ml->diag      != PETSC_NULL)
105:     {GVecDestroy(ml->diag);                                                                        }

107:   /* Destroy mesh structures */
108:   if (ml->useMath == PETSC_FALSE) {
109:     PetscFree(ml->numElements);
110:     PetscFree(ml->numVertices);
111:     for(level = 0; level <= ml->numLevels; level++) {
112:       PetscFree(ml->meshes[level][MESH_OFFSETS]);
113:       PetscFree(ml->meshes[level][MESH_ADJ]);
114:       PetscFree(ml->meshes[level][MESH_ELEM]);
115:       PetscFree(ml->meshes[level]);
116:     }
117:     PetscFree(ml->vertices);
118:     PetscFree(ml->meshes);
119:   }

121:   /* Destroy factorization structures */
122:   if (ml->useMath == PETSC_FALSE)
123:   {
124:     for(level = 0; level < ml->numLevels; level++)
125:     {
126:       if (ml->numPartitions[level] == 0)
127:         continue;
128:       for(part = 0; part < ml->numPartitions[level]; part++)
129:       {
130:         numRows = ml->numPartitionRows[level][part];
131:         numCols = ml->numPartitionCols[level][part];
132:         if (numRows > 0)
133:         {
134:           PetscFree(ml->factors[level][part][FACT_U]);
135:           if (PCMultiLevelDoQR_Private(pc, numRows, numCols) == PETSC_TRUE)
136:           {
137:             PetscFree(ml->factors[level][part][FACT_QR]);
138:             PetscFree(ml->factors[level][part][FACT_TAU]);
139:           }
140:         }
141:         if (numCols > 0)
142:         {
143:           PetscFree(ml->factors[level][part][FACT_DINV]);
144:           PetscFree(ml->factors[level][part][FACT_V]);
145:         }
146:         PetscFree(ml->factors[level][part]);
147:       }
148:       PetscFree(ml->factors[level]);
149:       MatDestroy(ml->grads[level]);
150:       VecDestroy(ml->bdReduceVecs[level]);
151:       VecDestroy(ml->colReduceVecs[level]);
152:       VecDestroy(ml->colReduceVecs2[level]);
153:     }
154:     PetscFree(ml->factors);
155:     PetscFree(ml->grads);
156:     PetscFree(ml->bdReduceVecs);
157:     PetscFree(ml->colReduceVecs);
158:     PetscFree(ml->colReduceVecs2);
159:     PetscFree(ml->interiorWork);
160:     PetscFree(ml->interiorWork2);
161:     if (ml->svdWork != PETSC_NULL)
162:       {PetscFree(ml->svdWork);                                                                     }
163:   }

165:   /* Destroy numbering structures */
166:   if (ml->useMath == PETSC_FALSE)
167:   {
168:     for(level = 0; level < ml->numLevels; level++)
169:     {
170:       if (ml->numPartitions[level] == 0)
171:         continue;
172:       for(part = 0; part < ml->numPartitions[level]; part++)
173:       {
174:         if (ml->numPartitionCols[level][part] > 0)
175:           {PetscFree(ml->colPartition[level][part]);                                               }
176:         if (ml->numPartitionRows[level][part] > 0)
177:           {PetscFree(ml->rowPartition[level][PART_ROW_INT][part]);                                 }
178:       }
179:       if (ml->numPartitionRows[level][ml->numPartitions[level]] > 0)
180:         {PetscFree(ml->rowPartition[level][PART_ROW_BD][0]);                                       }
181:       if (ml->numPartitionRows[level][ml->numPartitions[level]+1] > 0)
182:         {PetscFree(ml->rowPartition[level][PART_ROW_RES][0]);                                      }
183:       PetscFree(ml->rowPartition[level][PART_ROW_INT]);
184:       PetscFree(ml->rowPartition[level][PART_ROW_BD]);
185:       PetscFree(ml->rowPartition[level][PART_ROW_RES]);
186:       PetscFree(ml->numPartitionCols[level]);
187:       PetscFree(ml->colPartition[level]);
188:       PetscFree(ml->numPartitionRows[level]);
189:       PetscFree(ml->rowPartition[level]);
190:     }
191:     PetscFree(ml->numPartitions);
192:     PetscFree(ml->numPartitionCols);
193:     PetscFree(ml->numPartitionRows);
194:     PetscFree(ml->colPartition);
195:     PetscFree(ml->rowPartition);
196:   }
197:   if (ml->range    != PETSC_NULL) {
198:     PetscFree(ml->range);
199:   }
200:   VecScatterDestroy(ml->rangeScatter);
201:   if (ml->nullCols != PETSC_NULL) {
202:     PetscFree(ml->nullCols);
203:   }

205:   /* Destroy parallel structures */
206:   if (numProcs > 1)
207:   {
208:     MatDestroy(ml->locB);
209:     MatDestroy(ml->interfaceB);
210:     VecDestroy(ml->interiorRhs);
211:     VecScatterDestroy(ml->interiorScatter);
212:     VecDestroy(ml->interfaceRhs);
213:     VecScatterDestroy(ml->interfaceScatter);
214: #ifndef PETSC_HAVE_PLAPACK
215:     VecDestroy(ml->locInterfaceRhs);
216:     VecScatterDestroy(ml->locInterfaceScatter);
217: #endif
218:     VecDestroy(ml->interfaceColRhs);
219:     VecScatterDestroy(ml->interfaceColScatter);
220:     VecDestroy(ml->colWorkVec);
221:     PetscFree(ml->interfaceRows);
222:     PetscFree(ml->firstInterfaceRow);
223:     PetscFree(ml->firstNullCol);
224: #ifdef PETSC_HAVE_PLAPACK
225:     PLA_Obj_free(&ml->interfaceQR);
226:     PLA_Obj_free(&ml->interfaceTAU);
227:     PLA_Obj_free(&ml->PLArhsD);
228:     PLA_Obj_free(&ml->PLArhsP);
229:     PLA_Temp_free(&ml->PLAlayout);
230: #else
231:     PetscFree(ml->interfaceQR);
232:     PetscFree(ml->interfaceTAU);
233:     PetscFree(ml->work);
234: #endif
235:   }
236:   return(0);
237: }

239: static int PCSetUp_Multilevel(PC pc)
240: {
241:   PC_Multilevel   *ml = (PC_Multilevel *) pc->data;
242:   GMat             A  = pc->mat;
243:   Grid             grid;
244:   Mesh             mesh;
245:   Mesh_Triangular *tri;
246:   Partition        p;
247:   /* Mesh support */
248:   char            *typeName;
249:   int              numVertices, numNodes, numEdges, numElements;
250:   int             *bcNodes;
251:   /* Parallel support */
252:   PetscTruth       isInterfaceRow;
253:   int             *diagInterfaceRows;
254:   int             *offdiagInterfaceRows;
255:   int             *interfaceRows;
256:   int             *interfaceCols;
257:   int             *interiorRows;
258:   int              numNewRange;
259:   int             *newRange;
260:   int             *rangeRows;
261:   int             *nullCols;
262:   int              rowLen;
263:   int             *rowLens;
264:   int             *cols;
265:   PetscScalar     *vals;
266:   Vec              tempV, tempB;
267:   PetscScalar     *arrayB;
268:   IS               localIS, globalIS;
269:   int             *recvCounts;
270:   int              numProcs, rank, proc;
271:   PetscScalar      one  = 1.0;
272:   int              elem, newElem, corner, row, row2, col, nullCol, rangeCol, bc, count, lossCount;
273: #ifdef PETSC_HAVE_PLAPACK
274:   double          *locB_I;
275: #else
276:   int              minSize;
277: #endif
278: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
279:   int              pid, fd;
280:   char             procfile[64];
281:   int              event, event0, event1;
282:   int              gen_start, gen_read;
283:   long long        count0, globalCount0;
284:   long long        count1, globalCount1;
285:   hwperf_profevctrarg_t evctr_args;
286:   hwperf_cntr_t         counts;
287: #endif
288:   PetscTruth       opt, match;
289:   int              ierr;

292:   /* We need this so that setup is not called recursively by PCMultiLevelApplyXXX */
293:   if (pc->setupcalled > 0) {
294:     return(0);
295:   }
296:   pc->setupcalled = 2;

298:   /* Destroy old structures -- This should not happen anymore */
299:   if (ml->numLevels >= 0) {
300:     SETERRQ(PETSC_ERR_ARG_WRONG, "Should not reform an existing decomposition");
301:     /* PCDestroyStructures_Multilevel(pc);                                                          */
302:   }

304:   PC_MLLogEventBegin(PC_ML_SetUpInit, pc, 0, 0, 0);
305:   /* Get the grid */
306:   GMatGetGrid(A, &grid);
307:   ml->grid = grid;

309:   /* Get mesh information */
310:   GridGetMesh(grid, &mesh);
311:   MeshGetType(mesh, &typeName);
312:   PetscTypeCompare((PetscObject) mesh, MESH_TRIANGULAR_2D, &match);
313:   if (match == PETSC_FALSE) SETERRQ(PETSC_ERR_SUP, "Multilevel preconditioner only supports 2D triangular meshes");
314:   tri  = (Mesh_Triangular *) mesh->data;
315:   /* Set target partition size */
316:   ml->partSize  = 10;
317:   PetscOptionsGetInt(pc->prefix, "-pc_ml_partition_size", &ml->partSize, &opt);

319:   /* Setup parallel information */
320:   p        = mesh->part;
321:   numProcs = p->numProcs;
322:   rank     = p->rank;

324:   /* Enable Mathematica support */
325:   PetscOptionsHasName(pc->prefix, "-pc_ml_math", &opt);
326:   if (opt == PETSC_TRUE) {
327:     ml->useMath = PETSC_TRUE;
328:     PetscViewerMathematicaOpen(pc->comm, PETSC_DECIDE, PETSC_NULL, PETSC_NULL, &ml->mathViewer);
329:   }

331:   PetscOptionsHasName(pc->prefix, "-pc_ml_mesh_view_draw", &opt);
332:   if (opt == PETSC_TRUE) {
333:     PetscViewerDrawOpen(pc->comm, 0, 0, 0, 0, 300, 300, &ml->factorizationViewer);
334:   }

336:   /* Setup allocation */
337:   PetscOptionsGetInt(pc->prefix, "-pc_ml_max_levels", &ml->maxLevels, &opt);
338:   if (ml->maxLevels < 0)
339:     ml->maxLevels = 25;

341:   /* Set the threshold for a final QR factorization */
342:   PetscOptionsGetInt(pc->prefix, "-pc_ml_qr_thresh", &ml->QRthresh, &opt);
343:   PetscOptionsGetReal(pc->prefix, "-pc_ml_qr_factor", &ml->DoQRFactor, &opt);

345:   /* Allocate mesh storage */
346:   ml->numMeshes = 1;
347:   MeshGetInfo(mesh, &numVertices, &numNodes, &numEdges, &numElements);
348:   PetscMalloc((ml->maxLevels+1) * sizeof(int),    &ml->numElements);
349:   PetscMalloc((ml->maxLevels+1) * sizeof(int),    &ml->numVertices);
350:   PetscMalloc((ml->maxLevels+1) * sizeof(int **), &ml->meshes);
351:   PetscMalloc(NUM_MESH_DIV      * sizeof(int *),  &ml->meshes[0]);
352:   PetscMalloc((numElements*3)   * sizeof(int),    &ml->meshes[0][MESH_ELEM]);
353:   PetscMalloc(grid->numPointBC  * sizeof(int),    &bcNodes);
354:   PetscLogObjectMemory(pc, (ml->maxLevels+1)*2 * sizeof(int) + (ml->maxLevels+1) * sizeof(int **) + NUM_MESH_DIV * sizeof(int *) +
355:                        (numElements*3) * sizeof(int) + grid->numPointBC * sizeof(int));

357:   /* Create the local graph */
358:   for(bc = 0; bc < grid->numPointBC; bc++) {
359:     bcNodes[bc] = grid->pointBC[bc].node;
360:   }
361:   MeshCreateLocalCSR(mesh, &ml->numVertices[0], &ml->numEdges, &ml->meshes[0][MESH_OFFSETS],
362:                             &ml->meshes[0][MESH_ADJ], &ml->vertices, grid->numPointBC, bcNodes, PETSC_FALSE);
363: 

365:   /* Create the element descriptions */
366:   ml->numElements[0] = 0;
367:   for(elem = 0; elem < numElements; elem++) {
368:     /* Remove all elements containing a constrained node */
369:     for(bc = 0; bc < grid->numPointBC; bc++)
370:       if ((tri->faces[elem*mesh->numCorners+0] == bcNodes[bc]) ||
371:           (tri->faces[elem*mesh->numCorners+1] == bcNodes[bc]) ||
372:           (tri->faces[elem*mesh->numCorners+2] == bcNodes[bc]))
373:         break;
374:     if (bc < grid->numPointBC)
375:       continue;

377:     /* Remove elements with off-processor nodes */
378:     if ((tri->faces[elem*mesh->numCorners+0] >= numNodes) ||
379:         (tri->faces[elem*mesh->numCorners+1] >= numNodes) ||
380:         (tri->faces[elem*mesh->numCorners+2] >= numNodes))
381:       continue;

383:     /* Add the element */
384:     for(corner = 0; corner < 3; corner++) {
385:       /* Decrement node numbers to account for constrained nodes */
386:       newElem = tri->faces[elem*mesh->numCorners+corner];
387:       for(bc = 0; bc < grid->numPointBC; bc++)
388:         if ((bcNodes[bc] >= 0) && (tri->faces[elem*mesh->numCorners+corner] > bcNodes[bc]))
389:           newElem--;
390:       ml->meshes[0][MESH_ELEM][ml->numElements[0]*3+corner] = newElem+1;
391:     }
392:     ml->numElements[0]++;
393:   }

395:   if (ml->factorizationViewer != PETSC_NULL) {
396:     PetscTruth isPeriodic;
397:     PetscDraw  draw;

399:     MeshView(mesh, ml->factorizationViewer);
400:     PetscViewerFlush(ml->factorizationViewer);
401:     PetscViewerDrawGetDraw(ml->factorizationViewer, 0, &draw);
402:     MeshIsPeriodic(mesh, &isPeriodic);
403:     if (isPeriodic == PETSC_FALSE) {
404:       /* Does not work in periodic case */
405:       PCMultiLevelView_Private(pc, ml->factorizationViewer, 0, ml->numVertices[0], ml->numElements[0], ml->meshes[0]);
406: 
407:     }
408:   }

410:   /* Cleanup */
411:   PetscFree(bcNodes);
412: #ifdef PETSC_USE_BOPT_g
413: #endif

415:   if ((ml->sField < 0) || (ml->tField < 0)) {
416:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Shape and test function fields must be setup.");
417:   }
418:   /* Create shape function variable orderings */
419:   VarOrderingCreateGeneral(grid, 1, &ml->sField, &ml->sOrder);
420:   LocalVarOrderingCreate(grid, 1, &ml->sField, &ml->sLocOrder);
421:   /* Create test function variable orderings */
422:   VarOrderingCreateGeneral(grid, 1, &ml->tField, &ml->tOrder);
423:   LocalVarOrderingCreate(grid, 1, &ml->tField, &ml->tLocOrder);
424:   /* Check orderings */
425:   if (ml->numVertices[0] != ml->sOrder->numLocVars) {
426:     SETERRQ2(PETSC_ERR_ARG_CORRUPT, "Invalid local graph: %d vertices != %d vars", ml->numVertices[0], ml->sOrder->numLocVars);
427:   }
428:   /* Allocate the operator */
429:   GMatCreateRectangular(grid, ml->sOrder, ml->tOrder, &ml->B);
430:   /* Create the multiplier solution vector */
431:   GVecCreateRectangular(ml->grid, ml->tOrder, &ml->reduceSol);
432:   /* Construct the operator */
433:   MatSetOption(ml->B, MAT_NEW_NONZERO_ALLOCATION_ERR);
434:   GMatEvaluateOperatorGalerkin(ml->B, PETSC_NULL, 1, &ml->sField, &ml->tField, ml->gradOp,
435:                                       ml->gradAlpha, MAT_FINAL_ASSEMBLY, ml);
436: 

438:   /* Precondition the initial system with its diagonal */
439:   PetscOptionsHasName(pc->prefix, "-pc_ml_jacobi", &opt);
440:   if (opt == PETSC_TRUE) {
441:     GVecCreateConstrained(ml->grid, &ml->diag);
442:     GMatGetDiagonalConstrained(pc->mat, ml->diag);
443:     VecReciprocal(ml->diag);
444:     VecSqrt(ml->diag);
445:     MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
446:   }

448:   /* Parallel Support */
449:   if (numProcs > 1) {
450:     PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
451:     /* Separate interior and interface rows */
452:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &interiorRows);
453:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &ml->interfaceRows);
454:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &diagInterfaceRows);
455:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &offdiagInterfaceRows);
456:     PetscMemzero(diagInterfaceRows,    ml->tOrder->numLocVars * sizeof(int));
457:     PetscMemzero(offdiagInterfaceRows, ml->tOrder->numLocVars * sizeof(int));
458:     PetscLogObjectMemory(pc, ml->tOrder->numLocVars * sizeof(int));
459:     ml->numInteriorRows     = 0;
460:     ml->numLocInterfaceRows = 0;
461:     for(row = ml->tOrder->firstVar[rank]; row < ml->tOrder->firstVar[rank+1]; row++) {
462:       MatGetRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
463: #ifdef PETSC_USE_BOPT_g
464:       if (rowLen <= 0) {
465:         PetscPrintf(PETSC_COMM_SELF, "row %d is nulln", row);
466:         SETERRQ(PETSC_ERR_ARG_CORRUPT, "Null row in gradient");
467:       }
468: #endif
469:       /* Find rows on domain boundaries */
470:       for(col = 0, isInterfaceRow = PETSC_FALSE; col < rowLen; col++) {
471:         if ((cols[col] < ml->sOrder->firstVar[rank]) || (cols[col] >= ml->sOrder->firstVar[rank+1])) {
472:           isInterfaceRow = PETSC_TRUE;
473:           offdiagInterfaceRows[ml->numLocInterfaceRows]++;
474:         }
475:       }
476:       if (isInterfaceRow == PETSC_FALSE) {
477:         interiorRows[ml->numInteriorRows++] = row;
478:       } else {
479:         ml->interfaceRows[ml->numLocInterfaceRows]   = row;
480:         diagInterfaceRows[ml->numLocInterfaceRows++] = rowLen - offdiagInterfaceRows[ml->numLocInterfaceRows];
481:       }

483:       MatRestoreRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
484:     }
485:     if (ml->numInteriorRows + ml->numLocInterfaceRows != ml->tOrder->numLocVars) {
486:       SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
487:     }

489:     /* Place interior rows after interface rows */
490:     ml->interiorRows = ml->interfaceRows + ml->numLocInterfaceRows;
491:     PetscMemcpy(ml->interiorRows, interiorRows, ml->numInteriorRows * sizeof(int));
492:     PetscFree(interiorRows);

494:     /* Calculate global size of the interface matrix */
495:     PetscMalloc((numProcs+1) * sizeof(int), &ml->firstInterfaceRow);
496:     PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
497:     MPI_Allgather(&ml->numLocInterfaceRows, 1, MPI_INT, &ml->firstInterfaceRow[1], 1, MPI_INT, pc->comm);
498: 
499:     ml->firstInterfaceRow[0] = 0;
500:     for(proc = 1; proc < numProcs+1; proc++)
501:       ml->firstInterfaceRow[proc] += ml->firstInterfaceRow[proc-1];
502:     ml->numInterfaceRows = ml->firstInterfaceRow[numProcs];

504:     /* Setup allocation */
505:     PetscMalloc((ml->numInteriorRows+1) * sizeof(int), &rowLens);
506:     for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++) {
507:       if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row)) {
508:         lossCount++;
509:         continue;
510:       } else {
511:         MatGetRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
512:         rowLens[count] = rowLen;
513:         MatRestoreRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
514:         count++;
515:       }
516:     }
517:     if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
518:     if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");

520:     /* Create the gradient local to this processor */
521:     MatCreateSeqAIJ(PETSC_COMM_SELF, ml->numInteriorRows, ml->sOrder->numLocVars, 0, rowLens, &ml->locB);
522: 
523:     PetscFree(rowLens);
524:     for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++)
525:     {
526:       if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row))
527:       {
528:         lossCount++;
529:         continue;
530:       }
531:       else
532:       {
533:         MatGetRow(ml->B, row, &rowLen, &cols, &vals);
534:         /* This is definitely cheating */
535:         for(col = 0; col < rowLen; col++)
536:           cols[col] -= ml->sOrder->firstVar[rank];
537:         MatSetValues(ml->locB, 1, &count, rowLen, cols, vals, INSERT_VALUES);
538:         MatRestoreRow(ml->B, row, &rowLen, &cols, &vals);
539:         count++;
540:       }
541:     }
542:     MatAssemblyBegin(ml->locB, MAT_FINAL_ASSEMBLY);
543:     MatAssemblyEnd(ml->locB, MAT_FINAL_ASSEMBLY);
544:     if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
545:     if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
546:     PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
547:   } else {
548:     ml->locB = ml->B;
549:   }
550:   MatGetLocalSize(ml->locB, &ml->numRows, &ml->numCols);
551: #ifdef PETSC_USE_BOPT_g
552: #endif

554: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
555:   /* Events for the SGI R10000 hardware counters, Add 16 to each event for Counter 1.

557:      Event Counter 0                                            Counter 1
558:      0     Cycles                                               Cycles
559:      1     Instructions Issued                                  Instructions Graduated
560:      2     Load/prefetch/sync Issued                            Load/prefetch/sync Graduated
561:      3     Stores Issued                                        Stores Graduated
562:      4     Store conditional Issued                             Store conditional Graduated
563:      5     Failed store conditional                             Floating point instructions Graduated
564:      6     Branches decoded                                     Write back from data cache to secondary cache
565:      7     Write back from secondary cache to system interface  TLB refill exceptions
566:      8     Single-bit ECC errors on secondary cache data        Branches mispredicted
567:      9     Instruction cache misses                             Data cache misses
568:      10    Secondary instruction cache misses                   Secondary data cache misses
569:      11    Secondary instruction cache way mispredicted         Secondary data cache way mispredicted
570:      12    External intervention requests                       External intervention hits
571:      13    External invalidation requests                       External invalidation hits
572:      14    Virtual coherency                                    Upgrade requests on clean secondary cache lines
573:      15    Instructions graduated                               Upgrade requests on shared secondary cache lines
574:    */
575:   /* Setup events */
576:   event0 = 0;
577:   event1 = 21;
578:   PetscOptionsHasName(pc->prefix, "-pc_ml_flops_monitor", &opt);
579:   if (opt == PETSC_TRUE)
580:     event1 = 21;
581:   PetscOptionsHasName(pc->prefix, "-pc_ml_cache_monitor", &opt);
582:   if (opt == PETSC_TRUE)
583:     event1 = 25;
584:   PetscOptionsHasName(pc->prefix, "-pc_ml_sec_cache_monitor", &opt);
585:   if (opt == PETSC_TRUE)
586:     event1 = 26;

588:   /* Open the proc file for iotctl() */
589:   pid = getpid();
590:   sprintf(procfile, "/proc/%010d", pid);
591:   if ((fd  = open(procfile, O_RDWR)) < 0) SETERRQ(PETSC_ERR_FILE_OPEN, "Could not open proc file");
592:   /* Set the event for counter 0 */
593:   for (event = 0; event < HWPERF_EVENTMAX; event++) {
594:     if (event == event0) {
595:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
596:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie   = 1;
597:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev   = event;
598:       evctr_args.hwp_ovflw_freq[event]                                = 0;
599:     } else {
600:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
601:       evctr_args.hwp_ovflw_freq[event]                       = 0;
602:     }
603:   }
604:   /* Set the event for counter 1 */
605:   for (event = HWPERF_CNT1BASE; event < HWPERF_EVENTMAX; event++) {
606:     if (event == event1) {
607:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
608:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie   = 1;
609:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev   = event - HWPERF_CNT1BASE;
610:       evctr_args.hwp_ovflw_freq[event]                                = 0;
611:     } else {
612:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
613:       evctr_args.hwp_ovflw_freq[event]                       = 0;
614:     }
615:   }
616:   evctr_args.hwp_ovflw_sig = 0;
617:   /* Start SGI hardware counters */
618:   if ((gen_start = ioctl(fd, PIOCENEVCTRS, (void *) &evctr_args)) < 0) {
619:     SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
620:   }
621: #endif
622:   PC_MLLogEventEnd(PC_ML_SetUpInit, pc, 0, 0, 0);

624:   /* Factorize the gradient matrix */
625:   PCMultiLevelReduce(pc);

627:   /* Parallel support */
628:   if (numProcs > 1) {
629:     PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
630:     /* Get size information */
631:     PetscMalloc(numProcs     * sizeof(int), &nullCols);
632:     PetscMalloc((numProcs+1) * sizeof(int), &ml->firstNullCol);
633:     PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
634:     MPI_Allreduce(&ml->numLocNullCols, &ml->numNullCols, 1, MPI_INT, MPI_SUM, pc->comm);
635:     MPI_Allgather(&ml->numLocNullCols, 1, MPI_INT, nullCols, 1, MPI_INT, pc->comm);
636:     for(proc = 1, ml->firstNullCol[0] = 0; proc <= numProcs; proc++) {
637:       ml->firstNullCol[proc] = nullCols[proc-1] + ml->firstNullCol[proc-1];
638:     }
639:     PetscLogInfo(pc, "Interface rows: %d Total rows: %d NullCols: %dn", ml->numInterfaceRows, ml->tOrder->numVars, ml->numNullCols);

641:     /* Get all interface rows */
642:     PetscMalloc(ml->numInterfaceRows * sizeof(int), &interfaceRows);
643:     PetscMalloc(numProcs             * sizeof(int), &recvCounts);
644:     for(proc = 0; proc < numProcs; proc++)
645:       recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
646:     MPI_Allgatherv(ml->interfaceRows, ml->numLocInterfaceRows,           MPI_INT,
647:                    interfaceRows,     recvCounts, ml->firstInterfaceRow, MPI_INT, pc->comm);
648:     PetscFree(recvCounts);

650:     /* Get all the interface columns */
651:     PetscMalloc(ml->numNullCols    * sizeof(int), &interfaceCols);
652:     if (ml->numLocNullCols > 0) {
653:       PetscMalloc(ml->numLocNullCols * sizeof(int), &cols);
654:     }
655:     for(col = 0; col < ml->numLocNullCols; col++) {
656:       cols[col] = ml->nullCols[col] + ml->sOrder->firstVar[rank];
657:     }
658:     MPI_Allgatherv(cols,          ml->numLocNullCols,     MPI_INT,
659:                    interfaceCols, nullCols, ml->firstNullCol, MPI_INT, pc->comm);
660:     if (ml->numLocNullCols > 0) {
661:       PetscFree(cols);
662:     }

664:     /* Get the rows for the range split onto processors like a column vector
665:          Here we are looping over all local columns (ml->sOrder->numLocVars)
666:        using the local column number (col). The number of null columns discovered
667:        (nullCol) should equals the number found in the serial factorization
668:        (ml->numLocNullCols), as the number of range columns (rangeCol) should
669:        agree with the local rank (ml->rank).
670:      */
671:     PetscMalloc(ml->sOrder->numLocVars * sizeof(int), &rangeRows);
672:     for(col = 0, nullCol = 0, rangeCol = 0; col < ml->sOrder->numLocVars; col++) {
673:       if ((nullCol < ml->numLocNullCols) && (ml->nullCols[nullCol] == col)) {
674:         rangeRows[col] = interfaceRows[ml->firstNullCol[rank]+nullCol++];
675:       } else {
676:         rangeRows[col] = ml->interiorRows[ml->range[rangeCol++]];
677:       }
678:     }
679:     if ((rangeCol != ml->rank) || (nullCol != ml->numLocNullCols)) {
680:       PetscPrintf(PETSC_COMM_SELF, "[%d]rank: %d rangeCol: %d null: %d nullCol: %dn",
681:                   rank, ml->rank, rangeCol, ml->numLocNullCols, nullCol);
682:       SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space");
683:     }

685:     /* Renumber and enlarge range[] to account for interface rows */
686:     ml->localRank = ml->rank;
687:     if (ml->numNullCols < ml->firstInterfaceRow[rank])
688:       numNewRange = 0;
689:     else
690:       numNewRange = PetscMin(ml->firstInterfaceRow[rank+1], ml->numNullCols) - ml->firstInterfaceRow[rank];
691:     if (ml->rank + numNewRange > 0)
692:     {
693:       /* We maintain the range[] in local row numbers */
694:       PetscMalloc((ml->rank + numNewRange) * sizeof(int), &newRange);
695:       for(row = 0; row < ml->rank; row++)
696:         newRange[row] = ml->interiorRows[ml->range[row]] - ml->tOrder->firstVar[rank];
697:       for(row = 0; row < numNewRange; row++)
698:         newRange[ml->rank+row] = ml->interfaceRows[row] - ml->tOrder->firstVar[rank];
699:       ml->rank += numNewRange;
700:       if (ml->range != PETSC_NULL)
701:         {PetscFree(ml->range);                                                                     }
702:       ml->range = newRange;
703:     }
704:     MPI_Allreduce(&ml->rank, &ml->globalRank, 1, MPI_INT, MPI_SUM, pc->comm);

706:     /* Create work vectors for applying P in parallel */
707:     GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);

709:     /* Create scatter from the solution vector to a local interior vector */
710:     ISCreateStride(PETSC_COMM_SELF, ml->numInteriorRows, 0, 1, &localIS);
711:     ISCreateGeneral(PETSC_COMM_SELF, ml->numInteriorRows, ml->interiorRows, &globalIS);
712:     VecCreateSeq(PETSC_COMM_SELF, ml->numInteriorRows, &ml->interiorRhs);
713:     VecScatterCreate(pc->vec, globalIS, ml->interiorRhs, localIS, &ml->interiorScatter);
714:     ISDestroy(localIS);
715:     ISDestroy(globalIS);

717:     /* Create scatter from the solution vector to a local interface vector */
718:     ISCreateStride(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->firstInterfaceRow[rank], 1, &localIS);
719:     ISCreateGeneral(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->interfaceRows, &globalIS);
720:     VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &ml->interfaceRhs);
721:     VecScatterCreate(pc->vec, globalIS, ml->interfaceRhs, localIS, &ml->interfaceScatter);
722:     ISDestroy(localIS);
723:     ISDestroy(globalIS);

725: #ifndef PETSC_HAVE_PLAPACK
726:     /* Create scatter from the solution vector to an interface vector */
727:     if (rank == 0) {
728:       ISCreateStride(PETSC_COMM_SELF, ml->numInterfaceRows, 0, 1, &localIS);
729:       ISCreateGeneral(PETSC_COMM_SELF, ml->numInterfaceRows, interfaceRows, &globalIS);
730:       VecCreateSeq(PETSC_COMM_SELF, ml->numInterfaceRows, &ml->locInterfaceRhs);
731:       VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
732:     } else {
733:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
734:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
735:       VecCreateSeq(PETSC_COMM_SELF, 0, &ml->locInterfaceRhs);
736:       VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
737:     }
738:     ISDestroy(localIS);
739:     ISDestroy(globalIS);
740: #endif

742:     /* Create scatter from a column vector to a column interface vector */
743: #ifdef PETSC_HAVE_PLAPACK
744:     ISCreateStride(PETSC_COMM_SELF, ml->numLocNullCols, ml->firstNullCol[rank], 1, &localIS);
745:     ISCreateGeneral(PETSC_COMM_SELF, ml->numLocNullCols, &interfaceCols[ml->firstNullCol[rank]], &globalIS);
746: 
747:     VecCreateMPI(pc->comm, ml->numLocNullCols, ml->numNullCols, &ml->interfaceColRhs);
748:     VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
749: 
750:     ISDestroy(localIS);
751:     ISDestroy(globalIS);
752: #else
753:     if (rank == 0) {
754:       ISCreateStride(PETSC_COMM_SELF, ml->numNullCols, 0, 1, &localIS);
755:       ISCreateGeneral(PETSC_COMM_SELF, ml->numNullCols, interfaceCols, &globalIS);
756:       VecCreateSeq(PETSC_COMM_SELF, ml->numNullCols, &ml->interfaceColRhs);
757:       VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
758: 
759:     } else {
760:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
761:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
762:       VecCreateSeq(PETSC_COMM_SELF, 0, &ml->interfaceColRhs);
763:       VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
764: 
765:     }
766:     ISDestroy(localIS);
767:     ISDestroy(globalIS);
768: #endif

770:     /* Create scatter from a solution vector to a column vector */
771:     ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
772:     ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
773:     VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
774:     ISDestroy(localIS);
775:     ISDestroy(globalIS);

777:     /* Create sparse B_I */
778:     MatCreateMPIAIJ(pc->comm, ml->numLocInterfaceRows, ml->sOrder->numLocVars, ml->numInterfaceRows,
779:                            ml->sOrder->numVars, 0, diagInterfaceRows, 0, offdiagInterfaceRows, &ml->interfaceB);
780: 
781:     PetscFree(diagInterfaceRows);
782:     PetscFree(offdiagInterfaceRows);

784:     for(row = 0; row < ml->numLocInterfaceRows; row++) {
785:       /* Copy row into B^p_I */
786:       row2 = row + ml->firstInterfaceRow[rank];
787:       MatGetRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
788:       MatSetValues(ml->interfaceB, 1, &row2, rowLen, cols, vals, INSERT_VALUES);
789:       MatRestoreRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
790:     }
791:     MatAssemblyBegin(ml->interfaceB, MAT_FINAL_ASSEMBLY);
792:     MatAssemblyEnd(ml->interfaceB, MAT_FINAL_ASSEMBLY);

794:     /* Get V_I: The rows of V corresponding to the zero singular values for each domain */
795:     VecDuplicateVecs(ml->colWorkVec, ml->numNullCols, &ml->interfaceV);
796:     for(col = 0; col < ml->numNullCols; col++) {
797:       tempV   = ml->interfaceV[col];
798:       if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
799:         nullCol = ml->nullCols[col-ml->firstNullCol[rank]]+ml->sOrder->firstVar[rank];
800:         ierr    = VecSetValues(tempV, 1, &nullCol, &one, INSERT_VALUES);
801:       }
802:       ierr      = VecAssemblyBegin(tempV);
803:       ierr      = VecAssemblyEnd(tempV);
804:       if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
805:         ierr    = PCMultiLevelApplyV(pc, tempV, tempV);
806:       }
807:     }

809: #ifdef PETSC_HAVE_PLAPACK
810:     /* You MUST initialize all PLAPACK variables to PETSC_NULL */
811:     ml->PLAlayout    = PETSC_NULL;
812:     ml->interfaceQR  = PETSC_NULL;
813:     ml->interfaceTAU = PETSC_NULL;
814:     ml->PLArhsD      = PETSC_NULL;
815:     ml->PLArhsP      = PETSC_NULL;
816:     /* Create the template for vector and matrix alignment */
817:     PLA_Temp_create(ml->numNullCols, 0, &ml->PLAlayout);
818:     /* Create the interface matrix B_Gamma */
819:     PLA_Matrix_create(MPI_DOUBLE, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, 0, 0, &ml->interfaceQR);
820: 
821:     /* Create the vector of scaling factors Tau */
822:     PLA_Vector_create(MPI_DOUBLE, ml->numNullCols, ml->PLAlayout, 0, &ml->interfaceTAU);
823:     /* Create the storage vectors for application of D and P */
824:     PLA_Vector_create(MPI_DOUBLE, ml->numNullCols,      ml->PLAlayout, 0, &ml->PLArhsD);
825:     PLA_Vector_create(MPI_DOUBLE, ml->numInterfaceRows, ml->PLAlayout, 0, &ml->PLArhsP);
826:     /* Create temporary space for the local rows of the interface matrix */
827:     PetscMalloc(ml->numLocInterfaceRows*ml->numNullCols * sizeof(double), &locB_I);

829:     /* Construct B_I V_I */
830:     ierr  = VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
831:     for(col = 0; col < ml->numNullCols; col++) {
832:       MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
833:       /* Copy into storage -- column-major order */
834:       ierr  = VecGetArray(tempB, &arrayB);
835:       PetscMemcpy(&locB_I[ml->numLocInterfaceRows*col], arrayB, ml->numLocInterfaceRows * sizeof(double));
836: 
837:       VecRestoreArray(tempB, &arrayB);
838:     }
839:     VecDestroy(tempB);
840:     VecDestroyVecs(ml->interfaceV, ml->numNullCols);

842:     /* Set the matrix entries */
843:     PLA_Obj_set_to_zero(ml->interfaceQR);
844:     PLA_API_begin();
845:     PLA_Obj_API_open(ml->interfaceQR);
846:     PLA_API_axpy_matrix_to_global(ml->numLocInterfaceRows, ml->numNullCols, &one, locB_I,
847:                                          ml->numLocInterfaceRows, ml->interfaceQR, ml->firstInterfaceRow[rank], 0);
848: 
849:     PLA_Obj_API_close(ml->interfaceQR);
850:     PLA_API_end();
851:     PetscFree(locB_I);
852: #else
853:     /* Allocate storage for the QR factorization */
854:     minSize = PetscMin(ml->numInterfaceRows, ml->numNullCols);
855:     PetscMalloc(ml->numInterfaceRows*ml->numNullCols * sizeof(double), &ml->interfaceQR);
856:     PetscMalloc(minSize                              * sizeof(double), &ml->interfaceTAU);
857:     PetscLogObjectMemory(pc, (ml->numInterfaceRows*ml->numNullCols + minSize)*sizeof(double));
858:     PetscMemzero(ml->interfaceQR,  ml->numInterfaceRows*ml->numNullCols * sizeof(double));
859:     PetscMemzero(ml->interfaceTAU, minSize                              * sizeof(double));

861:     /* Construct B_I V_I */
862:     VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
863:     for(col = 0; col < ml->numNullCols; col++) {
864:       MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
865:       /* Copy into storage -- column-major order */
866:       VecGetArray(tempB, &arrayB);
867:       PetscMemcpy(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], arrayB,
868:                          ml->numLocInterfaceRows * sizeof(double));
869: 
870:       VecRestoreArray(tempB, &arrayB);
871:     }
872:     VecDestroy(tempB);
873:     VecDestroyVecs(ml->interfaceV, ml->numNullCols);

875:     /* Gather matrix / B^1_I V^1_I | ... | B^1_I V^p_I 
876:                      | B^2_I V^1_I | ... | B^2_I V^p_I |
877:                                      ...
878:                       B^p_I V^1_I | ... | B^p_I V^p_I /

880:        Note: It has already been put in column-major order
881:     */
882:     PetscMalloc(numProcs * sizeof(int), &recvCounts);
883:     for(proc = 0; proc < numProcs; proc++)
884:       recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
885:     for(col = 0; col < ml->numNullCols; col++) {
886:       MPI_Gatherv(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], ml->numLocInterfaceRows,
887:                          MPI_DOUBLE, &ml->interfaceQR[ml->numInterfaceRows*col], recvCounts, ml->firstInterfaceRow,
888:                          MPI_DOUBLE, 0, pc->comm);
889: 
890:     }
891:     PetscFree(recvCounts);

893:     /* Allocate workspace -- Also used in applying P */
894:     ml->workLen = ml->numInterfaceRows;
895:     PetscMalloc(ml->workLen * sizeof(double), &ml->work);
896:     PetscLogObjectMemory(pc, ml->workLen * sizeof(double));
897: #endif

899:     /* Do QR factorization on one processor */
900:     PC_MLLogEventBegin(PC_ML_QRFactorization, pc, 0, 0, 0);
901: #ifdef PETSC_HAVE_PLAPACK
902:     PLA_QR(ml->interfaceQR, ml->interfaceTAU);
903: #else
904:     if (rank == 0) {
905:       LAgeqrf_(&ml->numInterfaceRows, &ml->numNullCols, ml->interfaceQR,
906:              &ml->numInterfaceRows, ml->interfaceTAU, ml->work, &ml->workLen, &ierr);
907:       if (ierr) SETERRQ(PETSC_ERR_LIB, "Invalid interface QR");
908:     }
909: #endif
910:     PC_MLLogEventEnd(PC_ML_QRFactorization, pc, 0, 0, 0);

912: #ifdef PETSC_USE_BOPT_g
913:     /* Diagnostics */
914:     PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug_qr", &opt);
915:     if (opt == PETSC_TRUE) {
916:       PetscReal *intQR;
917:       PetscReal *intTAU;
918:       int     r, c;
919: #ifdef PETSC_HAVE_PLAPACK
920:       PLA_Obj tau = PETSC_NULL;
921:       PLA_Obj qr  = PETSC_NULL;
922:       int     length, width, stride, ldim;

924:       PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numNullCols, 1, ml->PLAlayout, &tau);
925:       PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, &qr);
926:       PLA_Copy(ml->interfaceTAU, tau);
927:       PLA_Copy(ml->interfaceQR,  qr);
928:       PLA_Obj_local_info(tau, &length, &width, (void **) &intTAU, &stride, &ldim);
929:       if ((rank == 0) && ((length != ml->numNullCols) || (width != 1) || (stride != 1)))
930:         SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface tau vector");
931:       PLA_Obj_local_info(qr,  &length, &width, (void **) &intQR,  &stride, &ldim);
932:       if ((rank == 0) && ((length != ml->numInterfaceRows) || (width != ml->numNullCols) || (ldim != ml->numInterfaceRows)))
933:         SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface qr matrix");
934: #else
935:       intTAU = ml->interfaceTAU;
936:       intQR  = ml->interfaceQR;
937: #endif
938:       if (rank == 0) {
939:         for(c = 0; c < ml->numNullCols; c++)
940:           PetscPrintf(pc->comm, "%.4g ", intTAU[c]);
941:         PetscPrintf(pc->comm, "n");
942:         for(r = 0; r < ml->numInterfaceRows; r++) {
943:           for(c = 0; c < ml->numNullCols; c++)
944:             PetscPrintf(pc->comm, "%.4g ", intQR[ml->numInterfaceRows*c+r]);
945:           PetscPrintf(pc->comm, "n");
946:         }
947:       }
948: #ifdef PETSC_HAVE_PLAPACK
949:       PLA_Obj_free(&tau);
950:       PLA_Obj_free(&qr);
951: #endif
952:     }
953: #endif

955:     /* Scatter Cleanup */
956:     PetscFree(rangeRows);
957:     PetscFree(nullCols);
958:     PetscFree(interfaceRows);
959:     PetscFree(interfaceCols);
960:     PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
961:   } else {
962:     /* Get the rows for the range split onto processors like a column vector */
963:     PetscMalloc(ml->rank * sizeof(int), &rangeRows);
964:     for(row = 0; row < ml->rank; row++)
965:       rangeRows[row] = ml->range[row];
966:     GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);

968:     /* Create scatter from a solution vector to a column vector */
969:     ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
970:     ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
971:     VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
972:     ISDestroy(localIS);
973:     ISDestroy(globalIS);

975:     /* Cleanup */
976:     PetscFree(rangeRows);
977:     VecDestroy(ml->colWorkVec);
978:   }

980:   if (ml->diag != PETSC_NULL) {
981:     /* Recover original B */
982:     VecReciprocal(ml->diag);
983:     MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
984:     VecReciprocal(ml->diag);
985:   }

987: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
988:   /* Read the hardware counter values */
989:   if ((gen_read = ioctl(fd, PIOCGETEVCTRS, (void *) &counts)) < 0) {
990:     SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
991:   }
992:   /* Generation number should be the same */
993:   if (gen_start != gen_read) SETERRQ(PETSC_ERR_LIB, "Lost SGI hardware counters");
994:   /* Release the counters */
995:   if ((ioctl(fd, PIOCRELEVCTRS)) < 0) SETERRQ(PETSC_ERR_LIB, "Could not free SGI hardware counters");
996:   close(fd);
997:   count0 = counts.hwp_evctr[event0];
998:   count1 = counts.hwp_evctr[event1];
999:   MPI_Reduce(&count0, &globalCount0, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1000:   MPI_Reduce(&count1, &globalCount1, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1001:   /* Use hardware counter data */
1002:   switch (event1) {
1003:   case 21:
1004:     PetscPrintf(pc->comm, "Flops: %ldn", globalCount1);
1005:     break;
1006:   case 25:
1007:     PetscPrintf(pc->comm, "Data cache misses: %ldn", globalCount1);
1008:     break;
1009:   case 26:
1010:     PetscPrintf(pc->comm, "Secondary data cache misses: %ldn", globalCount1);
1011:     break;
1012:   }
1013: #endif

1015:   PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug", &opt);
1016:   if (opt == PETSC_TRUE) {
1017:     PCDebug_Multilevel(pc);
1018:   }

1020:   return(0);
1021: }

1023: static int PCPreSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1024: {
1025:   PC_Multilevel *ml   = (PC_Multilevel *) pc->data;
1026:   Grid           grid = ml->grid;
1027:   GVec           bdReduceVec, bdReduceVec2;
1028:   GVec           reduceVec;
1029:   GMat           bdReduceMat;
1030:   VarOrdering    sOrder;
1031:   PetscScalar    one = 1.0, reduceAlpha;
1032:   int            bc;
1033:   int            ierr;

1036:   /* Create -g = B^T u + B^T_Gamma f_Gamma */
1037:   GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec);
1038:   VarOrderingCreateSubset(grid->reduceOrder, 1, &ml->tField, PETSC_FALSE, &sOrder);
1039: #define OLD_REDUCTION
1040: #ifdef OLD_REDUCTION
1041:   GMatCreateRectangular(grid, sOrder, ml->sOrder, &bdReduceMat);
1042:   MatSetOption(bdReduceMat, MAT_NEW_NONZERO_ALLOCATION_ERR);
1043:   for(bc = 0; bc < grid->numBC; bc++) {
1044:     /* Conditions on A */
1045:     if (ml->tField == grid->bc[bc].field) {
1046:       GMatEvaluateOperatorGalerkin(bdReduceMat, PETSC_NULL, 1, &ml->tField, &ml->sField, ml->divOp,
1047:                                           1.0, MAT_FINAL_ASSEMBLY, ml);
1048: 
1049:     }
1050:     /* Conditions on B^T only show up in the u equations, so they are already handled */
1051:   }
1052:   for(bc = 0; bc < grid->numPointBC; bc++) {
1053:     /* Conditions on A */
1054:     if (ml->tField == grid->pointBC[bc].field) {
1055:       GVecEvaluateOperatorGalerkin(bdReduceVec, grid->bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1056:                                           ml->divOp, 1.0, ml);
1057: 
1058:     }
1059:     /* Conditions on B^T only show up in the u equations, so they are already handled */
1060: #ifdef PETSC_USE_BOPT_g
1061: #endif
1062:   }
1063: #else
1064:   GVecEvaluateOperatorGalerkin(bdReduceVec, bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1065:                                       ml->divOp, 1.0, ml);
1066: 
1067: #endif
1068:   VarOrderingDestroy(sOrder);
1069: #ifdef OLD_REDUCTION
1070:   /* Should be taken out when I fix the MF version */
1071:   MatMult(bdReduceMat, grid->bdReduceVec, bdReduceVec);
1072:   GMatDestroy(bdReduceMat);
1073: #endif

1075:   /* Add in the B^T u part */
1076:   if (ml->nonlinearIterate != PETSC_NULL) {
1077:     GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec2);
1078:     PCMultiLevelApplyGradientTrans(pc, ml->nonlinearIterate, bdReduceVec2);
1079:     VecAXPY(&one, bdReduceVec2, bdReduceVec);
1080:     GVecDestroy(bdReduceVec2);
1081:   }

1083:   /* Calculate -D^{-T} Z^T g = D^{-T} Z^T B^T_Gamma f_Gamma */
1084:   PCMultiLevelApplyVTrans(pc, bdReduceVec, bdReduceVec);
1085:   PCMultiLevelApplyDInvTrans(pc, bdReduceVec, bdReduceVec);

1087:   /* Calculate -P_1 D^{-1} Z^T g */
1088:   PCMultiLevelApplyP1(pc, bdReduceVec, ml->reduceSol);

1090:   /* Calculate -A P_1 D^{-1} Z^T g -- This does not seem like good design, but I will deal with it later */
1091:   GVecCreateRectangular(grid, ml->tOrder, &reduceVec);
1092:   MatMult(pc->mat, ml->reduceSol, reduceVec);

1094:   /* f --> f - A P_1 D^{-1} Z^T g */
1095:   GridGetBCMultiplier(grid, &reduceAlpha);
1096:   VecAXPY(&reduceAlpha, reduceVec, rhs);

1098:   /* Cleanup */
1099:   GVecDestroy(reduceVec);
1100:   GVecDestroy(bdReduceVec);
1101:   return(0);
1102: }

1104: static int PCPostSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1105: {
1107:   return(0);
1108: }

1110: static int PCDestroy_Multilevel(PC pc)
1111: {
1112:   PC_Multilevel *ml = (PC_Multilevel *) pc->data;
1113:   int            ierr;

1116:   if (ml->numLevels >= 0) {
1117:     PCDestroyStructures_Multilevel(pc);
1118:   }
1119:   if (ml->mathViewer != PETSC_NULL) {
1120:     PetscViewerDestroy(ml->mathViewer);
1121:   }
1122:   if (ml->factorizationViewer != PETSC_NULL) {
1123:     PetscViewerDestroy(ml->factorizationViewer);
1124:   }
1125:   PetscFree(ml);
1126:   return(0);
1127: }

1129: int PCApply_Multilevel(PC pc, Vec x, Vec y)
1130: {

1134:   PCMultiLevelApplyP(pc, x, y);
1135:   return(0);
1136: }

1138: int PCApplyTrans_Multilevel(PC pc, Vec x, Vec y)
1139: {

1143:   PCMultiLevelApplyPTrans(pc, x, y);
1144:   return(0);
1145: }

1147: int PCApplySymmetricLeft_Multilevel(PC pc, Vec x, Vec y)
1148: {

1152:   PCMultiLevelApplyP2Trans(pc, x, y);
1153:   return(0);
1154: }

1156: int PCApplySymmetricRight_Multilevel(PC pc, Vec x, Vec y)
1157: {

1161:   PCMultiLevelApplyP2(pc, x, y);
1162:   return(0);
1163: }

1165: EXTERN_C_BEGIN
1166: int PCCreate_Multilevel(PC pc)
1167: {
1168:   PC_Multilevel *ml;
1169:   int            ierr;

1172:   PCMultiLevelInitializePackage(PETSC_NULL);

1174:   PetscNew(PC_Multilevel, &ml);
1175:   PetscLogObjectMemory(pc, sizeof(PC_Multilevel));
1176:   pc->data                     = (void *) ml;

1178:   pc->ops->setup               = PCSetUp_Multilevel;
1179:   pc->ops->apply               = PCApply_Multilevel;
1180:   pc->ops->applyrichardson     = PETSC_NULL;
1181:   pc->ops->applyBA             = PETSC_NULL;
1182:   pc->ops->applytranspose      = PCApplyTrans_Multilevel;
1183:   pc->ops->applyBAtranspose    = PETSC_NULL;
1184:   pc->ops->setfromoptions      = PETSC_NULL;
1185:   pc->ops->presolve            = PCPreSolve_Multilevel;
1186:   pc->ops->postsolve           = PCPostSolve_Multilevel;
1187:   pc->ops->getfactoredmatrix   = PETSC_NULL;
1188:   pc->ops->applysymmetricleft  = PCApplySymmetricLeft_Multilevel;
1189:   pc->ops->applysymmetricright = PCApplySymmetricRight_Multilevel;
1190:   pc->ops->setuponblocks       = PETSC_NULL;
1191:   pc->ops->destroy             = PCDestroy_Multilevel;
1192:   pc->ops->view                = PETSC_NULL;
1193:   pc->modifysubmatrices        = PETSC_NULL;

1195:   ml->grid                = PETSC_NULL;
1196:   ml->useMath             = PETSC_FALSE;
1197:   ml->mathViewer          = PETSC_NULL;
1198:   ml->factorizationViewer = PETSC_NULL;

1200:   ml->gradOp              = GRADIENT;
1201:   ml->divOp               = DIVERGENCE;
1202:   ml->sField              = -1;
1203:   ml->tField              = -1;
1204:   ml->sOrder              = PETSC_NULL;
1205:   ml->sLocOrder           = PETSC_NULL;
1206:   ml->tOrder              = PETSC_NULL;
1207:   ml->tLocOrder           = PETSC_NULL;
1208:   ml->gradAlpha           = 1.0;
1209:   ml->B                   = PETSC_NULL;
1210:   ml->locB                = PETSC_NULL;
1211:   ml->reduceSol           = PETSC_NULL;
1212:   ml->nonlinearIterate    = PETSC_NULL;
1213:   ml->diag                = PETSC_NULL;

1215:   ml->numMeshes           = -1;
1216:   ml->numElements         = PETSC_NULL;
1217:   ml->numVertices         = PETSC_NULL;
1218:   ml->vertices            = PETSC_NULL;
1219:   ml->numEdges            = -1;
1220:   ml->meshes              = PETSC_NULL;

1222:   ml->numPartitions       = PETSC_NULL;
1223:   ml->numPartitionCols    = PETSC_NULL;
1224:   ml->colPartition        = PETSC_NULL;
1225:   ml->numPartitionRows    = PETSC_NULL;
1226:   ml->rowPartition        = PETSC_NULL;
1227:   ml->rank                = -1;
1228:   ml->localRank           = -1;
1229:   ml->range               = PETSC_NULL;
1230:   ml->rangeScatter        = PETSC_NULL;
1231:   ml->numLocNullCols      = -1;
1232:   ml->nullCols            = PETSC_NULL;

1234:   ml->numRows             = -1;
1235:   ml->numCols             = -1;
1236:   ml->numLevels           = -1;
1237:   ml->maxLevels           = -1;
1238:   ml->QRthresh            = 0;
1239:   ml->DoQRFactor          = 2;
1240:   ml->zeroTol             = 1.0e-10;
1241:   ml->factors             = PETSC_NULL;
1242:   ml->grads               = PETSC_NULL;

1244:   ml->globalRank          = -1;
1245:   ml->numNullCols         = -1;
1246:   ml->numLocInterfaceRows = -1;
1247:   ml->numInterfaceRows    = -1;
1248:   ml->interfaceRows       = PETSC_NULL;
1249:   ml->firstInterfaceRow   = PETSC_NULL;
1250:   ml->numInteriorRows     = -1;
1251:   ml->interiorRows        = PETSC_NULL;
1252:   ml->interfaceQR         = PETSC_NULL;
1253:   ml->interfaceTAU        = PETSC_NULL;
1254:   ml->interfaceV          = PETSC_NULL;
1255:   ml->interfaceB          = PETSC_NULL;
1256:   ml->interiorRhs         = PETSC_NULL;
1257:   ml->interiorScatter     = PETSC_NULL;
1258:   ml->interfaceRhs        = PETSC_NULL;
1259:   ml->interfaceScatter    = PETSC_NULL;
1260: #ifndef PETSC_HAVE_PLAPACK
1261:   ml->locInterfaceRhs     = PETSC_NULL;
1262:   ml->locInterfaceScatter = PETSC_NULL;
1263: #endif
1264:   ml->interfaceColRhs     = PETSC_NULL;
1265:   ml->interfaceColScatter = PETSC_NULL;
1266:   ml->reduceScatter       = PETSC_NULL;
1267:   ml->colWorkVec          = PETSC_NULL;
1268:   ml->globalColWorkVec    = PETSC_NULL;

1270:   ml->workLen             = -1;
1271:   ml->work                = PETSC_NULL;
1272:   ml->svdWorkLen          = -1;
1273:   ml->svdWork             = PETSC_NULL;
1274:   return(0);
1275: }
1276: EXTERN_C_END