Actual source code: ml.c
1: #ifdef PETSC_RCS_HEADER
2: static char vcid[] = "$Id: mlNew.c,v 1.18 2001/01/27 21:32:36 knepley Exp $";
3: #endif
4: /*
5: Defines a multilevel preconditioner due to Vivek Sarin for AIJ matrices
6: */
7: #include src/sles/pc/pcimpl.h
8: #include src/mesh/impls/triangular/triimpl.h
9: #include src/grid/gridimpl.h
10: #include <fcntl.h>
11: #if defined(PETSC_HAVE_UNISTD_H)
12: #include <unistd.h>
13: #endif
14: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
15: #include <sys/procfs.h>
16: #include <sys/hwperftypes.h>
17: #include <sys/hwperfmacros.h>
18: #endif
19: #include ml.h
21: int PC_MLEvents[PC_ML_MAX_EVENTS];
23: /*@C
24: PCMultiLevelInitializePackage - This function initializes everything in the ML package. It is called
25: from PetscDLLibraryRegister() when using dynamic libraries, and on the first call to PCCreate()
26: when using static libraries.
28: Input Parameter:
29: path - The dynamic library path, or PETSC_NULL
31: Level: developer
33: .keywords: PC, multilevel, initialize, package
34: .seealso: PetscInitialize()
35: @*/
36: int PCMultiLevelInitializePackage(char *path) {
37: static PetscTruth initialized = PETSC_FALSE;
38: int ierr;
41: if (initialized == PETSC_TRUE) return(0);
42: initialized = PETSC_TRUE;
43: /* Register Classes */
44: /* Register Constructors and Serializers */
45: /* Register Events */
46: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpInit], "MLSetUpInit", PC_COOKIE);
47: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrained], "MLSetUpConstr", PC_COOKIE);
48: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrainedBd], "MLSetUpConstrBd", PC_COOKIE);
49: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpParallel], "MLSetUpParallel", PC_COOKIE);
50: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionMesh], "MLRdPartMesh", PC_COOKIE);
51: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionRowCol], "MLRdPartRowCol", PC_COOKIE);
52: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceFactor], "MLRdFactor", PC_COOKIE);
53: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGrad], "MLRdBdGrad", PC_COOKIE);
54: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradExtract], "MLRdBdGradExtrct", PC_COOKIE);
55: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradRowPartLocalToGlobal], "MLRdBdGrdRwPtL2G", PC_COOKIE);
56: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceShrinkMesh], "MLRdShrinkMesh", PC_COOKIE);
57: PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricLeftParallel], "MLApplySymmLeftP", PC_COOKIE);
58: PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricRightParallel], "MLApplySymmRghtP", PC_COOKIE);
59: PetscLogEventRegister(&PC_MLEvents[PC_ML_QRFactorization], "MLQRFact", PC_COOKIE);
60: PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplyQR], "MLQRApply", PC_COOKIE);
61: PetscLogEventRegister(&PC_MLEvents[PC_ML_CreateBdGrad], "MLCreateBdGrad", PC_COOKIE);
62: /* Process info exclusions */
63: /* Process summary exclusions */
64: /* Add options checkers */
65: return(0);
66: }
68: int PCConvert_Multilevel(PC pc)
69: {
70: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
71: #ifdef PETSC_HAVE_MATHEMATICA
72: int ierr;
73: #endif
76: /* Convert a Mathematica MLData object into a PC_Multilevel object */
77: if (ml->mathViewer != PETSC_NULL) {
78: #ifdef PETSC_HAVE_MATHEMATICA
79: PetscViewerMathematicaMultiLevelConvert(ml->mathViewer, pc);
80: #else
81: SETERRQ(PETSC_ERR_SUP, " ");
82: #endif
83: }
84: return(0);
85: }
87: static int PCDestroyStructures_Multilevel(PC pc)
88: {
89: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
90: int numProcs, numRows, numCols;
91: int level, part;
92: int ierr;
95: /* Destroy gradient structures */
96: MPI_Comm_size(pc->comm, &numProcs);
97: VarOrderingDestroy(ml->sOrder);
98: VarOrderingDestroy(ml->tOrder);
99: LocalVarOrderingDestroy(ml->sLocOrder);
100: LocalVarOrderingDestroy(ml->tLocOrder);
101: GMatDestroy(ml->B);
102: if (ml->reduceSol != PETSC_NULL)
103: {GVecDestroy(ml->reduceSol); }
104: if (ml->diag != PETSC_NULL)
105: {GVecDestroy(ml->diag); }
107: /* Destroy mesh structures */
108: if (ml->useMath == PETSC_FALSE) {
109: PetscFree(ml->numElements);
110: PetscFree(ml->numVertices);
111: for(level = 0; level <= ml->numLevels; level++) {
112: PetscFree(ml->meshes[level][MESH_OFFSETS]);
113: PetscFree(ml->meshes[level][MESH_ADJ]);
114: PetscFree(ml->meshes[level][MESH_ELEM]);
115: PetscFree(ml->meshes[level]);
116: }
117: PetscFree(ml->vertices);
118: PetscFree(ml->meshes);
119: }
121: /* Destroy factorization structures */
122: if (ml->useMath == PETSC_FALSE)
123: {
124: for(level = 0; level < ml->numLevels; level++)
125: {
126: if (ml->numPartitions[level] == 0)
127: continue;
128: for(part = 0; part < ml->numPartitions[level]; part++)
129: {
130: numRows = ml->numPartitionRows[level][part];
131: numCols = ml->numPartitionCols[level][part];
132: if (numRows > 0)
133: {
134: PetscFree(ml->factors[level][part][FACT_U]);
135: if (PCMultiLevelDoQR_Private(pc, numRows, numCols) == PETSC_TRUE)
136: {
137: PetscFree(ml->factors[level][part][FACT_QR]);
138: PetscFree(ml->factors[level][part][FACT_TAU]);
139: }
140: }
141: if (numCols > 0)
142: {
143: PetscFree(ml->factors[level][part][FACT_DINV]);
144: PetscFree(ml->factors[level][part][FACT_V]);
145: }
146: PetscFree(ml->factors[level][part]);
147: }
148: PetscFree(ml->factors[level]);
149: MatDestroy(ml->grads[level]);
150: VecDestroy(ml->bdReduceVecs[level]);
151: VecDestroy(ml->colReduceVecs[level]);
152: VecDestroy(ml->colReduceVecs2[level]);
153: }
154: PetscFree(ml->factors);
155: PetscFree(ml->grads);
156: PetscFree(ml->bdReduceVecs);
157: PetscFree(ml->colReduceVecs);
158: PetscFree(ml->colReduceVecs2);
159: PetscFree(ml->interiorWork);
160: PetscFree(ml->interiorWork2);
161: if (ml->svdWork != PETSC_NULL)
162: {PetscFree(ml->svdWork); }
163: }
165: /* Destroy numbering structures */
166: if (ml->useMath == PETSC_FALSE)
167: {
168: for(level = 0; level < ml->numLevels; level++)
169: {
170: if (ml->numPartitions[level] == 0)
171: continue;
172: for(part = 0; part < ml->numPartitions[level]; part++)
173: {
174: if (ml->numPartitionCols[level][part] > 0)
175: {PetscFree(ml->colPartition[level][part]); }
176: if (ml->numPartitionRows[level][part] > 0)
177: {PetscFree(ml->rowPartition[level][PART_ROW_INT][part]); }
178: }
179: if (ml->numPartitionRows[level][ml->numPartitions[level]] > 0)
180: {PetscFree(ml->rowPartition[level][PART_ROW_BD][0]); }
181: if (ml->numPartitionRows[level][ml->numPartitions[level]+1] > 0)
182: {PetscFree(ml->rowPartition[level][PART_ROW_RES][0]); }
183: PetscFree(ml->rowPartition[level][PART_ROW_INT]);
184: PetscFree(ml->rowPartition[level][PART_ROW_BD]);
185: PetscFree(ml->rowPartition[level][PART_ROW_RES]);
186: PetscFree(ml->numPartitionCols[level]);
187: PetscFree(ml->colPartition[level]);
188: PetscFree(ml->numPartitionRows[level]);
189: PetscFree(ml->rowPartition[level]);
190: }
191: PetscFree(ml->numPartitions);
192: PetscFree(ml->numPartitionCols);
193: PetscFree(ml->numPartitionRows);
194: PetscFree(ml->colPartition);
195: PetscFree(ml->rowPartition);
196: }
197: if (ml->range != PETSC_NULL) {
198: PetscFree(ml->range);
199: }
200: VecScatterDestroy(ml->rangeScatter);
201: if (ml->nullCols != PETSC_NULL) {
202: PetscFree(ml->nullCols);
203: }
205: /* Destroy parallel structures */
206: if (numProcs > 1)
207: {
208: MatDestroy(ml->locB);
209: MatDestroy(ml->interfaceB);
210: VecDestroy(ml->interiorRhs);
211: VecScatterDestroy(ml->interiorScatter);
212: VecDestroy(ml->interfaceRhs);
213: VecScatterDestroy(ml->interfaceScatter);
214: #ifndef PETSC_HAVE_PLAPACK
215: VecDestroy(ml->locInterfaceRhs);
216: VecScatterDestroy(ml->locInterfaceScatter);
217: #endif
218: VecDestroy(ml->interfaceColRhs);
219: VecScatterDestroy(ml->interfaceColScatter);
220: VecDestroy(ml->colWorkVec);
221: PetscFree(ml->interfaceRows);
222: PetscFree(ml->firstInterfaceRow);
223: PetscFree(ml->firstNullCol);
224: #ifdef PETSC_HAVE_PLAPACK
225: PLA_Obj_free(&ml->interfaceQR);
226: PLA_Obj_free(&ml->interfaceTAU);
227: PLA_Obj_free(&ml->PLArhsD);
228: PLA_Obj_free(&ml->PLArhsP);
229: PLA_Temp_free(&ml->PLAlayout);
230: #else
231: PetscFree(ml->interfaceQR);
232: PetscFree(ml->interfaceTAU);
233: PetscFree(ml->work);
234: #endif
235: }
236: return(0);
237: }
239: static int PCSetUp_Multilevel(PC pc)
240: {
241: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
242: GMat A = pc->mat;
243: Grid grid;
244: Mesh mesh;
245: Mesh_Triangular *tri;
246: Partition p;
247: /* Mesh support */
248: char *typeName;
249: int numVertices, numNodes, numEdges, numElements;
250: int *bcNodes;
251: /* Parallel support */
252: PetscTruth isInterfaceRow;
253: int *diagInterfaceRows;
254: int *offdiagInterfaceRows;
255: int *interfaceRows;
256: int *interfaceCols;
257: int *interiorRows;
258: int numNewRange;
259: int *newRange;
260: int *rangeRows;
261: int *nullCols;
262: int rowLen;
263: int *rowLens;
264: int *cols;
265: PetscScalar *vals;
266: Vec tempV, tempB;
267: PetscScalar *arrayB;
268: IS localIS, globalIS;
269: int *recvCounts;
270: int numProcs, rank, proc;
271: PetscScalar one = 1.0;
272: int elem, newElem, corner, row, row2, col, nullCol, rangeCol, bc, count, lossCount;
273: #ifdef PETSC_HAVE_PLAPACK
274: double *locB_I;
275: #else
276: int minSize;
277: #endif
278: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
279: int pid, fd;
280: char procfile[64];
281: int event, event0, event1;
282: int gen_start, gen_read;
283: long long count0, globalCount0;
284: long long count1, globalCount1;
285: hwperf_profevctrarg_t evctr_args;
286: hwperf_cntr_t counts;
287: #endif
288: PetscTruth opt, match;
289: int ierr;
292: /* We need this so that setup is not called recursively by PCMultiLevelApplyXXX */
293: if (pc->setupcalled > 0) {
294: return(0);
295: }
296: pc->setupcalled = 2;
298: /* Destroy old structures -- This should not happen anymore */
299: if (ml->numLevels >= 0) {
300: SETERRQ(PETSC_ERR_ARG_WRONG, "Should not reform an existing decomposition");
301: /* PCDestroyStructures_Multilevel(pc); */
302: }
304: PC_MLLogEventBegin(PC_ML_SetUpInit, pc, 0, 0, 0);
305: /* Get the grid */
306: GMatGetGrid(A, &grid);
307: ml->grid = grid;
309: /* Get mesh information */
310: GridGetMesh(grid, &mesh);
311: MeshGetType(mesh, &typeName);
312: PetscTypeCompare((PetscObject) mesh, MESH_TRIANGULAR_2D, &match);
313: if (match == PETSC_FALSE) SETERRQ(PETSC_ERR_SUP, "Multilevel preconditioner only supports 2D triangular meshes");
314: tri = (Mesh_Triangular *) mesh->data;
315: /* Set target partition size */
316: ml->partSize = 10;
317: PetscOptionsGetInt(pc->prefix, "-pc_ml_partition_size", &ml->partSize, &opt);
319: /* Setup parallel information */
320: p = mesh->part;
321: numProcs = p->numProcs;
322: rank = p->rank;
324: /* Enable Mathematica support */
325: PetscOptionsHasName(pc->prefix, "-pc_ml_math", &opt);
326: if (opt == PETSC_TRUE) {
327: ml->useMath = PETSC_TRUE;
328: PetscViewerMathematicaOpen(pc->comm, PETSC_DECIDE, PETSC_NULL, PETSC_NULL, &ml->mathViewer);
329: }
331: PetscOptionsHasName(pc->prefix, "-pc_ml_mesh_view_draw", &opt);
332: if (opt == PETSC_TRUE) {
333: PetscViewerDrawOpen(pc->comm, 0, 0, 0, 0, 300, 300, &ml->factorizationViewer);
334: }
336: /* Setup allocation */
337: PetscOptionsGetInt(pc->prefix, "-pc_ml_max_levels", &ml->maxLevels, &opt);
338: if (ml->maxLevels < 0)
339: ml->maxLevels = 25;
341: /* Set the threshold for a final QR factorization */
342: PetscOptionsGetInt(pc->prefix, "-pc_ml_qr_thresh", &ml->QRthresh, &opt);
343: PetscOptionsGetReal(pc->prefix, "-pc_ml_qr_factor", &ml->DoQRFactor, &opt);
345: /* Allocate mesh storage */
346: ml->numMeshes = 1;
347: MeshGetInfo(mesh, &numVertices, &numNodes, &numEdges, &numElements);
348: PetscMalloc((ml->maxLevels+1) * sizeof(int), &ml->numElements);
349: PetscMalloc((ml->maxLevels+1) * sizeof(int), &ml->numVertices);
350: PetscMalloc((ml->maxLevels+1) * sizeof(int **), &ml->meshes);
351: PetscMalloc(NUM_MESH_DIV * sizeof(int *), &ml->meshes[0]);
352: PetscMalloc((numElements*3) * sizeof(int), &ml->meshes[0][MESH_ELEM]);
353: PetscMalloc(grid->numPointBC * sizeof(int), &bcNodes);
354: PetscLogObjectMemory(pc, (ml->maxLevels+1)*2 * sizeof(int) + (ml->maxLevels+1) * sizeof(int **) + NUM_MESH_DIV * sizeof(int *) +
355: (numElements*3) * sizeof(int) + grid->numPointBC * sizeof(int));
357: /* Create the local graph */
358: for(bc = 0; bc < grid->numPointBC; bc++) {
359: bcNodes[bc] = grid->pointBC[bc].node;
360: }
361: MeshCreateLocalCSR(mesh, &ml->numVertices[0], &ml->numEdges, &ml->meshes[0][MESH_OFFSETS],
362: &ml->meshes[0][MESH_ADJ], &ml->vertices, grid->numPointBC, bcNodes, PETSC_FALSE);
363:
365: /* Create the element descriptions */
366: ml->numElements[0] = 0;
367: for(elem = 0; elem < numElements; elem++) {
368: /* Remove all elements containing a constrained node */
369: for(bc = 0; bc < grid->numPointBC; bc++)
370: if ((tri->faces[elem*mesh->numCorners+0] == bcNodes[bc]) ||
371: (tri->faces[elem*mesh->numCorners+1] == bcNodes[bc]) ||
372: (tri->faces[elem*mesh->numCorners+2] == bcNodes[bc]))
373: break;
374: if (bc < grid->numPointBC)
375: continue;
377: /* Remove elements with off-processor nodes */
378: if ((tri->faces[elem*mesh->numCorners+0] >= numNodes) ||
379: (tri->faces[elem*mesh->numCorners+1] >= numNodes) ||
380: (tri->faces[elem*mesh->numCorners+2] >= numNodes))
381: continue;
383: /* Add the element */
384: for(corner = 0; corner < 3; corner++) {
385: /* Decrement node numbers to account for constrained nodes */
386: newElem = tri->faces[elem*mesh->numCorners+corner];
387: for(bc = 0; bc < grid->numPointBC; bc++)
388: if ((bcNodes[bc] >= 0) && (tri->faces[elem*mesh->numCorners+corner] > bcNodes[bc]))
389: newElem--;
390: ml->meshes[0][MESH_ELEM][ml->numElements[0]*3+corner] = newElem+1;
391: }
392: ml->numElements[0]++;
393: }
395: if (ml->factorizationViewer != PETSC_NULL) {
396: PetscTruth isPeriodic;
397: PetscDraw draw;
399: MeshView(mesh, ml->factorizationViewer);
400: PetscViewerFlush(ml->factorizationViewer);
401: PetscViewerDrawGetDraw(ml->factorizationViewer, 0, &draw);
402: MeshIsPeriodic(mesh, &isPeriodic);
403: if (isPeriodic == PETSC_FALSE) {
404: /* Does not work in periodic case */
405: PCMultiLevelView_Private(pc, ml->factorizationViewer, 0, ml->numVertices[0], ml->numElements[0], ml->meshes[0]);
406:
407: }
408: }
410: /* Cleanup */
411: PetscFree(bcNodes);
412: #ifdef PETSC_USE_BOPT_g
413: #endif
415: if ((ml->sField < 0) || (ml->tField < 0)) {
416: SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Shape and test function fields must be setup.");
417: }
418: /* Create shape function variable orderings */
419: VarOrderingCreateGeneral(grid, 1, &ml->sField, &ml->sOrder);
420: LocalVarOrderingCreate(grid, 1, &ml->sField, &ml->sLocOrder);
421: /* Create test function variable orderings */
422: VarOrderingCreateGeneral(grid, 1, &ml->tField, &ml->tOrder);
423: LocalVarOrderingCreate(grid, 1, &ml->tField, &ml->tLocOrder);
424: /* Check orderings */
425: if (ml->numVertices[0] != ml->sOrder->numLocVars) {
426: SETERRQ2(PETSC_ERR_ARG_CORRUPT, "Invalid local graph: %d vertices != %d vars", ml->numVertices[0], ml->sOrder->numLocVars);
427: }
428: /* Allocate the operator */
429: GMatCreateRectangular(grid, ml->sOrder, ml->tOrder, &ml->B);
430: /* Create the multiplier solution vector */
431: GVecCreateRectangular(ml->grid, ml->tOrder, &ml->reduceSol);
432: /* Construct the operator */
433: MatSetOption(ml->B, MAT_NEW_NONZERO_ALLOCATION_ERR);
434: GMatEvaluateOperatorGalerkin(ml->B, PETSC_NULL, 1, &ml->sField, &ml->tField, ml->gradOp,
435: ml->gradAlpha, MAT_FINAL_ASSEMBLY, ml);
436:
438: /* Precondition the initial system with its diagonal */
439: PetscOptionsHasName(pc->prefix, "-pc_ml_jacobi", &opt);
440: if (opt == PETSC_TRUE) {
441: GVecCreateConstrained(ml->grid, &ml->diag);
442: GMatGetDiagonalConstrained(pc->mat, ml->diag);
443: VecReciprocal(ml->diag);
444: VecSqrt(ml->diag);
445: MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
446: }
448: /* Parallel Support */
449: if (numProcs > 1) {
450: PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
451: /* Separate interior and interface rows */
452: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &interiorRows);
453: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &ml->interfaceRows);
454: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &diagInterfaceRows);
455: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &offdiagInterfaceRows);
456: PetscMemzero(diagInterfaceRows, ml->tOrder->numLocVars * sizeof(int));
457: PetscMemzero(offdiagInterfaceRows, ml->tOrder->numLocVars * sizeof(int));
458: PetscLogObjectMemory(pc, ml->tOrder->numLocVars * sizeof(int));
459: ml->numInteriorRows = 0;
460: ml->numLocInterfaceRows = 0;
461: for(row = ml->tOrder->firstVar[rank]; row < ml->tOrder->firstVar[rank+1]; row++) {
462: MatGetRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
463: #ifdef PETSC_USE_BOPT_g
464: if (rowLen <= 0) {
465: PetscPrintf(PETSC_COMM_SELF, "row %d is nulln", row);
466: SETERRQ(PETSC_ERR_ARG_CORRUPT, "Null row in gradient");
467: }
468: #endif
469: /* Find rows on domain boundaries */
470: for(col = 0, isInterfaceRow = PETSC_FALSE; col < rowLen; col++) {
471: if ((cols[col] < ml->sOrder->firstVar[rank]) || (cols[col] >= ml->sOrder->firstVar[rank+1])) {
472: isInterfaceRow = PETSC_TRUE;
473: offdiagInterfaceRows[ml->numLocInterfaceRows]++;
474: }
475: }
476: if (isInterfaceRow == PETSC_FALSE) {
477: interiorRows[ml->numInteriorRows++] = row;
478: } else {
479: ml->interfaceRows[ml->numLocInterfaceRows] = row;
480: diagInterfaceRows[ml->numLocInterfaceRows++] = rowLen - offdiagInterfaceRows[ml->numLocInterfaceRows];
481: }
483: MatRestoreRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
484: }
485: if (ml->numInteriorRows + ml->numLocInterfaceRows != ml->tOrder->numLocVars) {
486: SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
487: }
489: /* Place interior rows after interface rows */
490: ml->interiorRows = ml->interfaceRows + ml->numLocInterfaceRows;
491: PetscMemcpy(ml->interiorRows, interiorRows, ml->numInteriorRows * sizeof(int));
492: PetscFree(interiorRows);
494: /* Calculate global size of the interface matrix */
495: PetscMalloc((numProcs+1) * sizeof(int), &ml->firstInterfaceRow);
496: PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
497: MPI_Allgather(&ml->numLocInterfaceRows, 1, MPI_INT, &ml->firstInterfaceRow[1], 1, MPI_INT, pc->comm);
498:
499: ml->firstInterfaceRow[0] = 0;
500: for(proc = 1; proc < numProcs+1; proc++)
501: ml->firstInterfaceRow[proc] += ml->firstInterfaceRow[proc-1];
502: ml->numInterfaceRows = ml->firstInterfaceRow[numProcs];
504: /* Setup allocation */
505: PetscMalloc((ml->numInteriorRows+1) * sizeof(int), &rowLens);
506: for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++) {
507: if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row)) {
508: lossCount++;
509: continue;
510: } else {
511: MatGetRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
512: rowLens[count] = rowLen;
513: MatRestoreRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
514: count++;
515: }
516: }
517: if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
518: if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
520: /* Create the gradient local to this processor */
521: MatCreateSeqAIJ(PETSC_COMM_SELF, ml->numInteriorRows, ml->sOrder->numLocVars, 0, rowLens, &ml->locB);
522:
523: PetscFree(rowLens);
524: for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++)
525: {
526: if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row))
527: {
528: lossCount++;
529: continue;
530: }
531: else
532: {
533: MatGetRow(ml->B, row, &rowLen, &cols, &vals);
534: /* This is definitely cheating */
535: for(col = 0; col < rowLen; col++)
536: cols[col] -= ml->sOrder->firstVar[rank];
537: MatSetValues(ml->locB, 1, &count, rowLen, cols, vals, INSERT_VALUES);
538: MatRestoreRow(ml->B, row, &rowLen, &cols, &vals);
539: count++;
540: }
541: }
542: MatAssemblyBegin(ml->locB, MAT_FINAL_ASSEMBLY);
543: MatAssemblyEnd(ml->locB, MAT_FINAL_ASSEMBLY);
544: if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
545: if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
546: PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
547: } else {
548: ml->locB = ml->B;
549: }
550: MatGetLocalSize(ml->locB, &ml->numRows, &ml->numCols);
551: #ifdef PETSC_USE_BOPT_g
552: #endif
554: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
555: /* Events for the SGI R10000 hardware counters, Add 16 to each event for Counter 1.
557: Event Counter 0 Counter 1
558: 0 Cycles Cycles
559: 1 Instructions Issued Instructions Graduated
560: 2 Load/prefetch/sync Issued Load/prefetch/sync Graduated
561: 3 Stores Issued Stores Graduated
562: 4 Store conditional Issued Store conditional Graduated
563: 5 Failed store conditional Floating point instructions Graduated
564: 6 Branches decoded Write back from data cache to secondary cache
565: 7 Write back from secondary cache to system interface TLB refill exceptions
566: 8 Single-bit ECC errors on secondary cache data Branches mispredicted
567: 9 Instruction cache misses Data cache misses
568: 10 Secondary instruction cache misses Secondary data cache misses
569: 11 Secondary instruction cache way mispredicted Secondary data cache way mispredicted
570: 12 External intervention requests External intervention hits
571: 13 External invalidation requests External invalidation hits
572: 14 Virtual coherency Upgrade requests on clean secondary cache lines
573: 15 Instructions graduated Upgrade requests on shared secondary cache lines
574: */
575: /* Setup events */
576: event0 = 0;
577: event1 = 21;
578: PetscOptionsHasName(pc->prefix, "-pc_ml_flops_monitor", &opt);
579: if (opt == PETSC_TRUE)
580: event1 = 21;
581: PetscOptionsHasName(pc->prefix, "-pc_ml_cache_monitor", &opt);
582: if (opt == PETSC_TRUE)
583: event1 = 25;
584: PetscOptionsHasName(pc->prefix, "-pc_ml_sec_cache_monitor", &opt);
585: if (opt == PETSC_TRUE)
586: event1 = 26;
588: /* Open the proc file for iotctl() */
589: pid = getpid();
590: sprintf(procfile, "/proc/%010d", pid);
591: if ((fd = open(procfile, O_RDWR)) < 0) SETERRQ(PETSC_ERR_FILE_OPEN, "Could not open proc file");
592: /* Set the event for counter 0 */
593: for (event = 0; event < HWPERF_EVENTMAX; event++) {
594: if (event == event0) {
595: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
596: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie = 1;
597: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev = event;
598: evctr_args.hwp_ovflw_freq[event] = 0;
599: } else {
600: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
601: evctr_args.hwp_ovflw_freq[event] = 0;
602: }
603: }
604: /* Set the event for counter 1 */
605: for (event = HWPERF_CNT1BASE; event < HWPERF_EVENTMAX; event++) {
606: if (event == event1) {
607: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
608: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie = 1;
609: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev = event - HWPERF_CNT1BASE;
610: evctr_args.hwp_ovflw_freq[event] = 0;
611: } else {
612: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
613: evctr_args.hwp_ovflw_freq[event] = 0;
614: }
615: }
616: evctr_args.hwp_ovflw_sig = 0;
617: /* Start SGI hardware counters */
618: if ((gen_start = ioctl(fd, PIOCENEVCTRS, (void *) &evctr_args)) < 0) {
619: SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
620: }
621: #endif
622: PC_MLLogEventEnd(PC_ML_SetUpInit, pc, 0, 0, 0);
624: /* Factorize the gradient matrix */
625: PCMultiLevelReduce(pc);
627: /* Parallel support */
628: if (numProcs > 1) {
629: PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
630: /* Get size information */
631: PetscMalloc(numProcs * sizeof(int), &nullCols);
632: PetscMalloc((numProcs+1) * sizeof(int), &ml->firstNullCol);
633: PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
634: MPI_Allreduce(&ml->numLocNullCols, &ml->numNullCols, 1, MPI_INT, MPI_SUM, pc->comm);
635: MPI_Allgather(&ml->numLocNullCols, 1, MPI_INT, nullCols, 1, MPI_INT, pc->comm);
636: for(proc = 1, ml->firstNullCol[0] = 0; proc <= numProcs; proc++) {
637: ml->firstNullCol[proc] = nullCols[proc-1] + ml->firstNullCol[proc-1];
638: }
639: PetscLogInfo(pc, "Interface rows: %d Total rows: %d NullCols: %dn", ml->numInterfaceRows, ml->tOrder->numVars, ml->numNullCols);
641: /* Get all interface rows */
642: PetscMalloc(ml->numInterfaceRows * sizeof(int), &interfaceRows);
643: PetscMalloc(numProcs * sizeof(int), &recvCounts);
644: for(proc = 0; proc < numProcs; proc++)
645: recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
646: MPI_Allgatherv(ml->interfaceRows, ml->numLocInterfaceRows, MPI_INT,
647: interfaceRows, recvCounts, ml->firstInterfaceRow, MPI_INT, pc->comm);
648: PetscFree(recvCounts);
650: /* Get all the interface columns */
651: PetscMalloc(ml->numNullCols * sizeof(int), &interfaceCols);
652: if (ml->numLocNullCols > 0) {
653: PetscMalloc(ml->numLocNullCols * sizeof(int), &cols);
654: }
655: for(col = 0; col < ml->numLocNullCols; col++) {
656: cols[col] = ml->nullCols[col] + ml->sOrder->firstVar[rank];
657: }
658: MPI_Allgatherv(cols, ml->numLocNullCols, MPI_INT,
659: interfaceCols, nullCols, ml->firstNullCol, MPI_INT, pc->comm);
660: if (ml->numLocNullCols > 0) {
661: PetscFree(cols);
662: }
664: /* Get the rows for the range split onto processors like a column vector
665: Here we are looping over all local columns (ml->sOrder->numLocVars)
666: using the local column number (col). The number of null columns discovered
667: (nullCol) should equals the number found in the serial factorization
668: (ml->numLocNullCols), as the number of range columns (rangeCol) should
669: agree with the local rank (ml->rank).
670: */
671: PetscMalloc(ml->sOrder->numLocVars * sizeof(int), &rangeRows);
672: for(col = 0, nullCol = 0, rangeCol = 0; col < ml->sOrder->numLocVars; col++) {
673: if ((nullCol < ml->numLocNullCols) && (ml->nullCols[nullCol] == col)) {
674: rangeRows[col] = interfaceRows[ml->firstNullCol[rank]+nullCol++];
675: } else {
676: rangeRows[col] = ml->interiorRows[ml->range[rangeCol++]];
677: }
678: }
679: if ((rangeCol != ml->rank) || (nullCol != ml->numLocNullCols)) {
680: PetscPrintf(PETSC_COMM_SELF, "[%d]rank: %d rangeCol: %d null: %d nullCol: %dn",
681: rank, ml->rank, rangeCol, ml->numLocNullCols, nullCol);
682: SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space");
683: }
685: /* Renumber and enlarge range[] to account for interface rows */
686: ml->localRank = ml->rank;
687: if (ml->numNullCols < ml->firstInterfaceRow[rank])
688: numNewRange = 0;
689: else
690: numNewRange = PetscMin(ml->firstInterfaceRow[rank+1], ml->numNullCols) - ml->firstInterfaceRow[rank];
691: if (ml->rank + numNewRange > 0)
692: {
693: /* We maintain the range[] in local row numbers */
694: PetscMalloc((ml->rank + numNewRange) * sizeof(int), &newRange);
695: for(row = 0; row < ml->rank; row++)
696: newRange[row] = ml->interiorRows[ml->range[row]] - ml->tOrder->firstVar[rank];
697: for(row = 0; row < numNewRange; row++)
698: newRange[ml->rank+row] = ml->interfaceRows[row] - ml->tOrder->firstVar[rank];
699: ml->rank += numNewRange;
700: if (ml->range != PETSC_NULL)
701: {PetscFree(ml->range); }
702: ml->range = newRange;
703: }
704: MPI_Allreduce(&ml->rank, &ml->globalRank, 1, MPI_INT, MPI_SUM, pc->comm);
706: /* Create work vectors for applying P in parallel */
707: GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);
709: /* Create scatter from the solution vector to a local interior vector */
710: ISCreateStride(PETSC_COMM_SELF, ml->numInteriorRows, 0, 1, &localIS);
711: ISCreateGeneral(PETSC_COMM_SELF, ml->numInteriorRows, ml->interiorRows, &globalIS);
712: VecCreateSeq(PETSC_COMM_SELF, ml->numInteriorRows, &ml->interiorRhs);
713: VecScatterCreate(pc->vec, globalIS, ml->interiorRhs, localIS, &ml->interiorScatter);
714: ISDestroy(localIS);
715: ISDestroy(globalIS);
717: /* Create scatter from the solution vector to a local interface vector */
718: ISCreateStride(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->firstInterfaceRow[rank], 1, &localIS);
719: ISCreateGeneral(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->interfaceRows, &globalIS);
720: VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &ml->interfaceRhs);
721: VecScatterCreate(pc->vec, globalIS, ml->interfaceRhs, localIS, &ml->interfaceScatter);
722: ISDestroy(localIS);
723: ISDestroy(globalIS);
725: #ifndef PETSC_HAVE_PLAPACK
726: /* Create scatter from the solution vector to an interface vector */
727: if (rank == 0) {
728: ISCreateStride(PETSC_COMM_SELF, ml->numInterfaceRows, 0, 1, &localIS);
729: ISCreateGeneral(PETSC_COMM_SELF, ml->numInterfaceRows, interfaceRows, &globalIS);
730: VecCreateSeq(PETSC_COMM_SELF, ml->numInterfaceRows, &ml->locInterfaceRhs);
731: VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
732: } else {
733: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
734: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
735: VecCreateSeq(PETSC_COMM_SELF, 0, &ml->locInterfaceRhs);
736: VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
737: }
738: ISDestroy(localIS);
739: ISDestroy(globalIS);
740: #endif
742: /* Create scatter from a column vector to a column interface vector */
743: #ifdef PETSC_HAVE_PLAPACK
744: ISCreateStride(PETSC_COMM_SELF, ml->numLocNullCols, ml->firstNullCol[rank], 1, &localIS);
745: ISCreateGeneral(PETSC_COMM_SELF, ml->numLocNullCols, &interfaceCols[ml->firstNullCol[rank]], &globalIS);
746:
747: VecCreateMPI(pc->comm, ml->numLocNullCols, ml->numNullCols, &ml->interfaceColRhs);
748: VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
749:
750: ISDestroy(localIS);
751: ISDestroy(globalIS);
752: #else
753: if (rank == 0) {
754: ISCreateStride(PETSC_COMM_SELF, ml->numNullCols, 0, 1, &localIS);
755: ISCreateGeneral(PETSC_COMM_SELF, ml->numNullCols, interfaceCols, &globalIS);
756: VecCreateSeq(PETSC_COMM_SELF, ml->numNullCols, &ml->interfaceColRhs);
757: VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
758:
759: } else {
760: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
761: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
762: VecCreateSeq(PETSC_COMM_SELF, 0, &ml->interfaceColRhs);
763: VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
764:
765: }
766: ISDestroy(localIS);
767: ISDestroy(globalIS);
768: #endif
770: /* Create scatter from a solution vector to a column vector */
771: ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
772: ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
773: VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
774: ISDestroy(localIS);
775: ISDestroy(globalIS);
777: /* Create sparse B_I */
778: MatCreateMPIAIJ(pc->comm, ml->numLocInterfaceRows, ml->sOrder->numLocVars, ml->numInterfaceRows,
779: ml->sOrder->numVars, 0, diagInterfaceRows, 0, offdiagInterfaceRows, &ml->interfaceB);
780:
781: PetscFree(diagInterfaceRows);
782: PetscFree(offdiagInterfaceRows);
784: for(row = 0; row < ml->numLocInterfaceRows; row++) {
785: /* Copy row into B^p_I */
786: row2 = row + ml->firstInterfaceRow[rank];
787: MatGetRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
788: MatSetValues(ml->interfaceB, 1, &row2, rowLen, cols, vals, INSERT_VALUES);
789: MatRestoreRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
790: }
791: MatAssemblyBegin(ml->interfaceB, MAT_FINAL_ASSEMBLY);
792: MatAssemblyEnd(ml->interfaceB, MAT_FINAL_ASSEMBLY);
794: /* Get V_I: The rows of V corresponding to the zero singular values for each domain */
795: VecDuplicateVecs(ml->colWorkVec, ml->numNullCols, &ml->interfaceV);
796: for(col = 0; col < ml->numNullCols; col++) {
797: tempV = ml->interfaceV[col];
798: if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
799: nullCol = ml->nullCols[col-ml->firstNullCol[rank]]+ml->sOrder->firstVar[rank];
800: ierr = VecSetValues(tempV, 1, &nullCol, &one, INSERT_VALUES);
801: }
802: ierr = VecAssemblyBegin(tempV);
803: ierr = VecAssemblyEnd(tempV);
804: if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
805: ierr = PCMultiLevelApplyV(pc, tempV, tempV);
806: }
807: }
809: #ifdef PETSC_HAVE_PLAPACK
810: /* You MUST initialize all PLAPACK variables to PETSC_NULL */
811: ml->PLAlayout = PETSC_NULL;
812: ml->interfaceQR = PETSC_NULL;
813: ml->interfaceTAU = PETSC_NULL;
814: ml->PLArhsD = PETSC_NULL;
815: ml->PLArhsP = PETSC_NULL;
816: /* Create the template for vector and matrix alignment */
817: PLA_Temp_create(ml->numNullCols, 0, &ml->PLAlayout);
818: /* Create the interface matrix B_Gamma */
819: PLA_Matrix_create(MPI_DOUBLE, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, 0, 0, &ml->interfaceQR);
820:
821: /* Create the vector of scaling factors Tau */
822: PLA_Vector_create(MPI_DOUBLE, ml->numNullCols, ml->PLAlayout, 0, &ml->interfaceTAU);
823: /* Create the storage vectors for application of D and P */
824: PLA_Vector_create(MPI_DOUBLE, ml->numNullCols, ml->PLAlayout, 0, &ml->PLArhsD);
825: PLA_Vector_create(MPI_DOUBLE, ml->numInterfaceRows, ml->PLAlayout, 0, &ml->PLArhsP);
826: /* Create temporary space for the local rows of the interface matrix */
827: PetscMalloc(ml->numLocInterfaceRows*ml->numNullCols * sizeof(double), &locB_I);
829: /* Construct B_I V_I */
830: ierr = VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
831: for(col = 0; col < ml->numNullCols; col++) {
832: MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
833: /* Copy into storage -- column-major order */
834: ierr = VecGetArray(tempB, &arrayB);
835: PetscMemcpy(&locB_I[ml->numLocInterfaceRows*col], arrayB, ml->numLocInterfaceRows * sizeof(double));
836:
837: VecRestoreArray(tempB, &arrayB);
838: }
839: VecDestroy(tempB);
840: VecDestroyVecs(ml->interfaceV, ml->numNullCols);
842: /* Set the matrix entries */
843: PLA_Obj_set_to_zero(ml->interfaceQR);
844: PLA_API_begin();
845: PLA_Obj_API_open(ml->interfaceQR);
846: PLA_API_axpy_matrix_to_global(ml->numLocInterfaceRows, ml->numNullCols, &one, locB_I,
847: ml->numLocInterfaceRows, ml->interfaceQR, ml->firstInterfaceRow[rank], 0);
848:
849: PLA_Obj_API_close(ml->interfaceQR);
850: PLA_API_end();
851: PetscFree(locB_I);
852: #else
853: /* Allocate storage for the QR factorization */
854: minSize = PetscMin(ml->numInterfaceRows, ml->numNullCols);
855: PetscMalloc(ml->numInterfaceRows*ml->numNullCols * sizeof(double), &ml->interfaceQR);
856: PetscMalloc(minSize * sizeof(double), &ml->interfaceTAU);
857: PetscLogObjectMemory(pc, (ml->numInterfaceRows*ml->numNullCols + minSize)*sizeof(double));
858: PetscMemzero(ml->interfaceQR, ml->numInterfaceRows*ml->numNullCols * sizeof(double));
859: PetscMemzero(ml->interfaceTAU, minSize * sizeof(double));
861: /* Construct B_I V_I */
862: VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
863: for(col = 0; col < ml->numNullCols; col++) {
864: MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
865: /* Copy into storage -- column-major order */
866: VecGetArray(tempB, &arrayB);
867: PetscMemcpy(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], arrayB,
868: ml->numLocInterfaceRows * sizeof(double));
869:
870: VecRestoreArray(tempB, &arrayB);
871: }
872: VecDestroy(tempB);
873: VecDestroyVecs(ml->interfaceV, ml->numNullCols);
875: /* Gather matrix / B^1_I V^1_I | ... | B^1_I V^p_I
876: | B^2_I V^1_I | ... | B^2_I V^p_I |
877: ...
878: B^p_I V^1_I | ... | B^p_I V^p_I /
880: Note: It has already been put in column-major order
881: */
882: PetscMalloc(numProcs * sizeof(int), &recvCounts);
883: for(proc = 0; proc < numProcs; proc++)
884: recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
885: for(col = 0; col < ml->numNullCols; col++) {
886: MPI_Gatherv(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], ml->numLocInterfaceRows,
887: MPI_DOUBLE, &ml->interfaceQR[ml->numInterfaceRows*col], recvCounts, ml->firstInterfaceRow,
888: MPI_DOUBLE, 0, pc->comm);
889:
890: }
891: PetscFree(recvCounts);
893: /* Allocate workspace -- Also used in applying P */
894: ml->workLen = ml->numInterfaceRows;
895: PetscMalloc(ml->workLen * sizeof(double), &ml->work);
896: PetscLogObjectMemory(pc, ml->workLen * sizeof(double));
897: #endif
899: /* Do QR factorization on one processor */
900: PC_MLLogEventBegin(PC_ML_QRFactorization, pc, 0, 0, 0);
901: #ifdef PETSC_HAVE_PLAPACK
902: PLA_QR(ml->interfaceQR, ml->interfaceTAU);
903: #else
904: if (rank == 0) {
905: LAgeqrf_(&ml->numInterfaceRows, &ml->numNullCols, ml->interfaceQR,
906: &ml->numInterfaceRows, ml->interfaceTAU, ml->work, &ml->workLen, &ierr);
907: if (ierr) SETERRQ(PETSC_ERR_LIB, "Invalid interface QR");
908: }
909: #endif
910: PC_MLLogEventEnd(PC_ML_QRFactorization, pc, 0, 0, 0);
912: #ifdef PETSC_USE_BOPT_g
913: /* Diagnostics */
914: PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug_qr", &opt);
915: if (opt == PETSC_TRUE) {
916: PetscReal *intQR;
917: PetscReal *intTAU;
918: int r, c;
919: #ifdef PETSC_HAVE_PLAPACK
920: PLA_Obj tau = PETSC_NULL;
921: PLA_Obj qr = PETSC_NULL;
922: int length, width, stride, ldim;
924: PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numNullCols, 1, ml->PLAlayout, &tau);
925: PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, &qr);
926: PLA_Copy(ml->interfaceTAU, tau);
927: PLA_Copy(ml->interfaceQR, qr);
928: PLA_Obj_local_info(tau, &length, &width, (void **) &intTAU, &stride, &ldim);
929: if ((rank == 0) && ((length != ml->numNullCols) || (width != 1) || (stride != 1)))
930: SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface tau vector");
931: PLA_Obj_local_info(qr, &length, &width, (void **) &intQR, &stride, &ldim);
932: if ((rank == 0) && ((length != ml->numInterfaceRows) || (width != ml->numNullCols) || (ldim != ml->numInterfaceRows)))
933: SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface qr matrix");
934: #else
935: intTAU = ml->interfaceTAU;
936: intQR = ml->interfaceQR;
937: #endif
938: if (rank == 0) {
939: for(c = 0; c < ml->numNullCols; c++)
940: PetscPrintf(pc->comm, "%.4g ", intTAU[c]);
941: PetscPrintf(pc->comm, "n");
942: for(r = 0; r < ml->numInterfaceRows; r++) {
943: for(c = 0; c < ml->numNullCols; c++)
944: PetscPrintf(pc->comm, "%.4g ", intQR[ml->numInterfaceRows*c+r]);
945: PetscPrintf(pc->comm, "n");
946: }
947: }
948: #ifdef PETSC_HAVE_PLAPACK
949: PLA_Obj_free(&tau);
950: PLA_Obj_free(&qr);
951: #endif
952: }
953: #endif
955: /* Scatter Cleanup */
956: PetscFree(rangeRows);
957: PetscFree(nullCols);
958: PetscFree(interfaceRows);
959: PetscFree(interfaceCols);
960: PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
961: } else {
962: /* Get the rows for the range split onto processors like a column vector */
963: PetscMalloc(ml->rank * sizeof(int), &rangeRows);
964: for(row = 0; row < ml->rank; row++)
965: rangeRows[row] = ml->range[row];
966: GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);
968: /* Create scatter from a solution vector to a column vector */
969: ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
970: ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
971: VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
972: ISDestroy(localIS);
973: ISDestroy(globalIS);
975: /* Cleanup */
976: PetscFree(rangeRows);
977: VecDestroy(ml->colWorkVec);
978: }
980: if (ml->diag != PETSC_NULL) {
981: /* Recover original B */
982: VecReciprocal(ml->diag);
983: MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
984: VecReciprocal(ml->diag);
985: }
987: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
988: /* Read the hardware counter values */
989: if ((gen_read = ioctl(fd, PIOCGETEVCTRS, (void *) &counts)) < 0) {
990: SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
991: }
992: /* Generation number should be the same */
993: if (gen_start != gen_read) SETERRQ(PETSC_ERR_LIB, "Lost SGI hardware counters");
994: /* Release the counters */
995: if ((ioctl(fd, PIOCRELEVCTRS)) < 0) SETERRQ(PETSC_ERR_LIB, "Could not free SGI hardware counters");
996: close(fd);
997: count0 = counts.hwp_evctr[event0];
998: count1 = counts.hwp_evctr[event1];
999: MPI_Reduce(&count0, &globalCount0, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1000: MPI_Reduce(&count1, &globalCount1, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1001: /* Use hardware counter data */
1002: switch (event1) {
1003: case 21:
1004: PetscPrintf(pc->comm, "Flops: %ldn", globalCount1);
1005: break;
1006: case 25:
1007: PetscPrintf(pc->comm, "Data cache misses: %ldn", globalCount1);
1008: break;
1009: case 26:
1010: PetscPrintf(pc->comm, "Secondary data cache misses: %ldn", globalCount1);
1011: break;
1012: }
1013: #endif
1015: PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug", &opt);
1016: if (opt == PETSC_TRUE) {
1017: PCDebug_Multilevel(pc);
1018: }
1020: return(0);
1021: }
1023: static int PCPreSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1024: {
1025: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
1026: Grid grid = ml->grid;
1027: GVec bdReduceVec, bdReduceVec2;
1028: GVec reduceVec;
1029: GMat bdReduceMat;
1030: VarOrdering sOrder;
1031: PetscScalar one = 1.0, reduceAlpha;
1032: int bc;
1033: int ierr;
1036: /* Create -g = B^T u + B^T_Gamma f_Gamma */
1037: GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec);
1038: VarOrderingCreateSubset(grid->reduceOrder, 1, &ml->tField, PETSC_FALSE, &sOrder);
1039: #define OLD_REDUCTION
1040: #ifdef OLD_REDUCTION
1041: GMatCreateRectangular(grid, sOrder, ml->sOrder, &bdReduceMat);
1042: MatSetOption(bdReduceMat, MAT_NEW_NONZERO_ALLOCATION_ERR);
1043: for(bc = 0; bc < grid->numBC; bc++) {
1044: /* Conditions on A */
1045: if (ml->tField == grid->bc[bc].field) {
1046: GMatEvaluateOperatorGalerkin(bdReduceMat, PETSC_NULL, 1, &ml->tField, &ml->sField, ml->divOp,
1047: 1.0, MAT_FINAL_ASSEMBLY, ml);
1048:
1049: }
1050: /* Conditions on B^T only show up in the u equations, so they are already handled */
1051: }
1052: for(bc = 0; bc < grid->numPointBC; bc++) {
1053: /* Conditions on A */
1054: if (ml->tField == grid->pointBC[bc].field) {
1055: GVecEvaluateOperatorGalerkin(bdReduceVec, grid->bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1056: ml->divOp, 1.0, ml);
1057:
1058: }
1059: /* Conditions on B^T only show up in the u equations, so they are already handled */
1060: #ifdef PETSC_USE_BOPT_g
1061: #endif
1062: }
1063: #else
1064: GVecEvaluateOperatorGalerkin(bdReduceVec, bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1065: ml->divOp, 1.0, ml);
1066:
1067: #endif
1068: VarOrderingDestroy(sOrder);
1069: #ifdef OLD_REDUCTION
1070: /* Should be taken out when I fix the MF version */
1071: MatMult(bdReduceMat, grid->bdReduceVec, bdReduceVec);
1072: GMatDestroy(bdReduceMat);
1073: #endif
1075: /* Add in the B^T u part */
1076: if (ml->nonlinearIterate != PETSC_NULL) {
1077: GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec2);
1078: PCMultiLevelApplyGradientTrans(pc, ml->nonlinearIterate, bdReduceVec2);
1079: VecAXPY(&one, bdReduceVec2, bdReduceVec);
1080: GVecDestroy(bdReduceVec2);
1081: }
1083: /* Calculate -D^{-T} Z^T g = D^{-T} Z^T B^T_Gamma f_Gamma */
1084: PCMultiLevelApplyVTrans(pc, bdReduceVec, bdReduceVec);
1085: PCMultiLevelApplyDInvTrans(pc, bdReduceVec, bdReduceVec);
1087: /* Calculate -P_1 D^{-1} Z^T g */
1088: PCMultiLevelApplyP1(pc, bdReduceVec, ml->reduceSol);
1090: /* Calculate -A P_1 D^{-1} Z^T g -- This does not seem like good design, but I will deal with it later */
1091: GVecCreateRectangular(grid, ml->tOrder, &reduceVec);
1092: MatMult(pc->mat, ml->reduceSol, reduceVec);
1094: /* f --> f - A P_1 D^{-1} Z^T g */
1095: GridGetBCMultiplier(grid, &reduceAlpha);
1096: VecAXPY(&reduceAlpha, reduceVec, rhs);
1098: /* Cleanup */
1099: GVecDestroy(reduceVec);
1100: GVecDestroy(bdReduceVec);
1101: return(0);
1102: }
1104: static int PCPostSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1105: {
1107: return(0);
1108: }
1110: static int PCDestroy_Multilevel(PC pc)
1111: {
1112: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
1113: int ierr;
1116: if (ml->numLevels >= 0) {
1117: PCDestroyStructures_Multilevel(pc);
1118: }
1119: if (ml->mathViewer != PETSC_NULL) {
1120: PetscViewerDestroy(ml->mathViewer);
1121: }
1122: if (ml->factorizationViewer != PETSC_NULL) {
1123: PetscViewerDestroy(ml->factorizationViewer);
1124: }
1125: PetscFree(ml);
1126: return(0);
1127: }
1129: int PCApply_Multilevel(PC pc, Vec x, Vec y)
1130: {
1134: PCMultiLevelApplyP(pc, x, y);
1135: return(0);
1136: }
1138: int PCApplyTrans_Multilevel(PC pc, Vec x, Vec y)
1139: {
1143: PCMultiLevelApplyPTrans(pc, x, y);
1144: return(0);
1145: }
1147: int PCApplySymmetricLeft_Multilevel(PC pc, Vec x, Vec y)
1148: {
1152: PCMultiLevelApplyP2Trans(pc, x, y);
1153: return(0);
1154: }
1156: int PCApplySymmetricRight_Multilevel(PC pc, Vec x, Vec y)
1157: {
1161: PCMultiLevelApplyP2(pc, x, y);
1162: return(0);
1163: }
1165: EXTERN_C_BEGIN
1166: int PCCreate_Multilevel(PC pc)
1167: {
1168: PC_Multilevel *ml;
1169: int ierr;
1172: PCMultiLevelInitializePackage(PETSC_NULL);
1174: PetscNew(PC_Multilevel, &ml);
1175: PetscLogObjectMemory(pc, sizeof(PC_Multilevel));
1176: pc->data = (void *) ml;
1178: pc->ops->setup = PCSetUp_Multilevel;
1179: pc->ops->apply = PCApply_Multilevel;
1180: pc->ops->applyrichardson = PETSC_NULL;
1181: pc->ops->applyBA = PETSC_NULL;
1182: pc->ops->applytranspose = PCApplyTrans_Multilevel;
1183: pc->ops->applyBAtranspose = PETSC_NULL;
1184: pc->ops->setfromoptions = PETSC_NULL;
1185: pc->ops->presolve = PCPreSolve_Multilevel;
1186: pc->ops->postsolve = PCPostSolve_Multilevel;
1187: pc->ops->getfactoredmatrix = PETSC_NULL;
1188: pc->ops->applysymmetricleft = PCApplySymmetricLeft_Multilevel;
1189: pc->ops->applysymmetricright = PCApplySymmetricRight_Multilevel;
1190: pc->ops->setuponblocks = PETSC_NULL;
1191: pc->ops->destroy = PCDestroy_Multilevel;
1192: pc->ops->view = PETSC_NULL;
1193: pc->modifysubmatrices = PETSC_NULL;
1195: ml->grid = PETSC_NULL;
1196: ml->useMath = PETSC_FALSE;
1197: ml->mathViewer = PETSC_NULL;
1198: ml->factorizationViewer = PETSC_NULL;
1200: ml->gradOp = GRADIENT;
1201: ml->divOp = DIVERGENCE;
1202: ml->sField = -1;
1203: ml->tField = -1;
1204: ml->sOrder = PETSC_NULL;
1205: ml->sLocOrder = PETSC_NULL;
1206: ml->tOrder = PETSC_NULL;
1207: ml->tLocOrder = PETSC_NULL;
1208: ml->gradAlpha = 1.0;
1209: ml->B = PETSC_NULL;
1210: ml->locB = PETSC_NULL;
1211: ml->reduceSol = PETSC_NULL;
1212: ml->nonlinearIterate = PETSC_NULL;
1213: ml->diag = PETSC_NULL;
1215: ml->numMeshes = -1;
1216: ml->numElements = PETSC_NULL;
1217: ml->numVertices = PETSC_NULL;
1218: ml->vertices = PETSC_NULL;
1219: ml->numEdges = -1;
1220: ml->meshes = PETSC_NULL;
1222: ml->numPartitions = PETSC_NULL;
1223: ml->numPartitionCols = PETSC_NULL;
1224: ml->colPartition = PETSC_NULL;
1225: ml->numPartitionRows = PETSC_NULL;
1226: ml->rowPartition = PETSC_NULL;
1227: ml->rank = -1;
1228: ml->localRank = -1;
1229: ml->range = PETSC_NULL;
1230: ml->rangeScatter = PETSC_NULL;
1231: ml->numLocNullCols = -1;
1232: ml->nullCols = PETSC_NULL;
1234: ml->numRows = -1;
1235: ml->numCols = -1;
1236: ml->numLevels = -1;
1237: ml->maxLevels = -1;
1238: ml->QRthresh = 0;
1239: ml->DoQRFactor = 2;
1240: ml->zeroTol = 1.0e-10;
1241: ml->factors = PETSC_NULL;
1242: ml->grads = PETSC_NULL;
1244: ml->globalRank = -1;
1245: ml->numNullCols = -1;
1246: ml->numLocInterfaceRows = -1;
1247: ml->numInterfaceRows = -1;
1248: ml->interfaceRows = PETSC_NULL;
1249: ml->firstInterfaceRow = PETSC_NULL;
1250: ml->numInteriorRows = -1;
1251: ml->interiorRows = PETSC_NULL;
1252: ml->interfaceQR = PETSC_NULL;
1253: ml->interfaceTAU = PETSC_NULL;
1254: ml->interfaceV = PETSC_NULL;
1255: ml->interfaceB = PETSC_NULL;
1256: ml->interiorRhs = PETSC_NULL;
1257: ml->interiorScatter = PETSC_NULL;
1258: ml->interfaceRhs = PETSC_NULL;
1259: ml->interfaceScatter = PETSC_NULL;
1260: #ifndef PETSC_HAVE_PLAPACK
1261: ml->locInterfaceRhs = PETSC_NULL;
1262: ml->locInterfaceScatter = PETSC_NULL;
1263: #endif
1264: ml->interfaceColRhs = PETSC_NULL;
1265: ml->interfaceColScatter = PETSC_NULL;
1266: ml->reduceScatter = PETSC_NULL;
1267: ml->colWorkVec = PETSC_NULL;
1268: ml->globalColWorkVec = PETSC_NULL;
1270: ml->workLen = -1;
1271: ml->work = PETSC_NULL;
1272: ml->svdWorkLen = -1;
1273: ml->svdWork = PETSC_NULL;
1274: return(0);
1275: }
1276: EXTERN_C_END