Actual source code: mpispooles.c
1: #define PETSCMAT_DLL
3: /*
4: Provides an interface to the Spooles parallel sparse solver (MPI SPOOLES)
5: */
7: #include src/mat/impls/aij/seq/aij.h
8: #include src/mat/impls/sbaij/seq/sbaij.h
9: #include src/mat/impls/baij/seq/baij.h
10: #include src/mat/impls/aij/mpi/mpiaij.h
11: #include src/mat/impls/sbaij/mpi/mpisbaij.h
12: #include src/mat/impls/aij/seq/spooles/spooles.h
14: EXTERN int SetSpoolesOptions(Mat, Spooles_options *);
18: PetscErrorCode MatDestroy_MPIAIJSpooles(Mat A)
19: {
20: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
22:
24: if (lu->CleanUpSpooles) {
25: FrontMtx_free(lu->frontmtx);
26: IV_free(lu->newToOldIV);
27: IV_free(lu->oldToNewIV);
28: IV_free(lu->vtxmapIV);
29: InpMtx_free(lu->mtxA);
30: ETree_free(lu->frontETree);
31: IVL_free(lu->symbfacIVL);
32: SubMtxManager_free(lu->mtxmanager);
33: DenseMtx_free(lu->mtxX);
34: DenseMtx_free(lu->mtxY);
35: MPI_Comm_free(&(lu->comm_spooles));
36: if ( lu->scat ){
37: VecDestroy(lu->vec_spooles);
38: ISDestroy(lu->iden);
39: ISDestroy(lu->is_petsc);
40: VecScatterDestroy(lu->scat);
41: }
42: }
43: MatConvert_Spooles_Base(A,lu->basetype,MAT_REUSE_MATRIX,&A);
44: (*A->ops->destroy)(A);
46: return(0);
47: }
51: PetscErrorCode MatSolve_MPIAIJSpooles(Mat A,Vec b,Vec x)
52: {
53: Mat_Spooles *lu = (Mat_Spooles*)A->spptr;
55: int size,rank,m=A->m,irow,*rowindY;
56: PetscScalar *array;
57: DenseMtx *newY ;
58: SubMtxManager *solvemanager ;
59: #if defined(PETSC_USE_COMPLEX)
60: double x_real,x_imag;
61: #endif
64: MPI_Comm_size(A->comm,&size);
65: MPI_Comm_rank(A->comm,&rank);
66:
67: /* copy b into spooles' rhs mtxY */
68: DenseMtx_init(lu->mtxY, lu->options.typeflag, 0, 0, m, 1, 1, m);
69: VecGetArray(b,&array);
71: DenseMtx_rowIndices(lu->mtxY, &m, &rowindY); /* get m, rowind */
72: for ( irow = 0 ; irow < m ; irow++ ) {
73: rowindY[irow] = irow + lu->rstart; /* global rowind */
74: #if !defined(PETSC_USE_COMPLEX)
75: DenseMtx_setRealEntry(lu->mtxY, irow, 0, *array++);
76: #else
77: DenseMtx_setComplexEntry(lu->mtxY,irow,0,PetscRealPart(*array),PetscImaginaryPart(*array));
78: array++;
79: #endif
80: }
81: VecRestoreArray(b,&array);
82:
83: if ( lu->options.msglvl > 2 ) {
84: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n 1 matrix in original ordering");
85: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
86: fflush(lu->options.msgFile);
87: }
88:
89: /* permute and redistribute Y if necessary */
90: DenseMtx_permuteRows(lu->mtxY, lu->oldToNewIV);
91: if ( lu->options.msglvl > 2 ) {
92: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix in new ordering");
93: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
94: fflush(lu->options.msgFile);
95: }
97: MPI_Barrier(A->comm); /* for initializing firsttag, because the num. of tags used
98: by FrontMtx_MPI_split() is unknown */
99: lu->firsttag = 0;
100: newY = DenseMtx_MPI_splitByRows(lu->mtxY, lu->vtxmapIV, lu->stats, lu->options.msglvl,
101: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
102: DenseMtx_free(lu->mtxY);
103: lu->mtxY = newY ;
104: lu->firsttag += size ;
105: if ( lu->options.msglvl > 2 ) {
106: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split DenseMtx Y");
107: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
108: fflush(lu->options.msgFile);
109: }
111: if ( FRONTMTX_IS_PIVOTING(lu->frontmtx) ) {
112: /* pivoting has taken place, redistribute the right hand side
113: to match the final rows and columns in the fronts */
114: IV *rowmapIV ;
115: rowmapIV = FrontMtx_MPI_rowmapIV(lu->frontmtx, lu->ownersIV, lu->options.msglvl,
116: lu->options.msgFile, lu->comm_spooles);
117: newY = DenseMtx_MPI_splitByRows(lu->mtxY, rowmapIV, lu->stats, lu->options.msglvl,
118: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
119: DenseMtx_free(lu->mtxY);
120: lu->mtxY = newY ;
121: IV_free(rowmapIV);
122: lu->firsttag += size;
123: }
124: if ( lu->options.msglvl > 2 ) {
125: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n rhs matrix after split");
126: DenseMtx_writeForHumanEye(lu->mtxY, lu->options.msgFile);
127: fflush(lu->options.msgFile);
128: }
130: if ( lu->nmycol > 0 ) IVcopy(lu->nmycol,lu->rowindX,IV_entries(lu->ownedColumnsIV)); /* must do for each solve */
131:
132: /* solve the linear system */
133: solvemanager = SubMtxManager_new();
134: SubMtxManager_init(solvemanager, NO_LOCK, 0);
135: FrontMtx_MPI_solve(lu->frontmtx, lu->mtxX, lu->mtxY, solvemanager, lu->solvemap, lu->cpus,
136: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
137: SubMtxManager_free(solvemanager);
138: if ( lu->options.msglvl > 2 ) {
139: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n solution in new ordering");
140: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
141: }
143: /* permute the solution into the original ordering */
144: DenseMtx_permuteRows(lu->mtxX, lu->newToOldIV);
145: if ( lu->options.msglvl > 2 ) {
146: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n solution in old ordering");
147: DenseMtx_writeForHumanEye(lu->mtxX, lu->options.msgFile);
148: fflush(lu->options.msgFile);
149: }
150:
151: /* scatter local solution mtxX into mpi vector x */
152: if( !lu->scat ){ /* create followings once for each numfactorization */
153: /* vec_spooles <- mtxX */
154: #if !defined(PETSC_USE_COMPLEX)
155: VecCreateSeqWithArray(PETSC_COMM_SELF,lu->nmycol,lu->entX,&lu->vec_spooles);
156: #else
157: VecCreateSeq(PETSC_COMM_SELF,lu->nmycol,&lu->vec_spooles);
158: VecGetArray(lu->vec_spooles,&array);
159: for (irow = 0; irow < lu->nmycol; irow++){
160: DenseMtx_complexEntry(lu->mtxX,irow,0,&x_real,&x_imag);
161: array[irow] = x_real+x_imag*PETSC_i;
162: }
163: VecRestoreArray(lu->vec_spooles,&array);
164: #endif
165: ISCreateStride(PETSC_COMM_SELF,lu->nmycol,0,1,&lu->iden);
166: ISCreateGeneral(PETSC_COMM_SELF,lu->nmycol,lu->rowindX,&lu->is_petsc);
167: VecScatterCreate(lu->vec_spooles,lu->iden,x,lu->is_petsc,&lu->scat);
168: }
170: VecScatterBegin(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
171: VecScatterEnd(lu->vec_spooles,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
172:
173: return(0);
174: }
178: PetscErrorCode MatFactorNumeric_MPIAIJSpooles(Mat A,MatFactorInfo *info,Mat *F)
179: {
180: Mat_Spooles *lu = (Mat_Spooles*)(*F)->spptr;
181: PetscErrorCode ierr;
182: int rank,size,lookahead=0,sierr;
183: ChvManager *chvmanager ;
184: Chv *rootchv ;
185: Graph *graph ;
186: IVL *adjIVL;
187: DV *cumopsDV ;
188: double droptol=0.0,*opcounts,minops,cutoff;
189: #if !defined(PETSC_USE_COMPLEX)
190: double *val;
191: #endif
192: InpMtx *newA ;
193: PetscScalar *av, *bv;
194: int *ai, *aj, *bi,*bj, nz, *ajj, *bjj, *garray,
195: i,j,irow,jcol,countA,countB,jB,*row,*col,colA_start,jj;
196: int M=A->M,m=A->m,root,nedges,tagbound,lasttag;
197:
199: MPI_Comm_size(A->comm,&size);
200: MPI_Comm_rank(A->comm,&rank);
202: if (lu->flg == DIFFERENT_NONZERO_PATTERN) { /* first numeric factorization */
203: /* get input parameters */
204: SetSpoolesOptions(A, &lu->options);
206: (*F)->ops->solve = MatSolve_MPIAIJSpooles;
207: (*F)->ops->destroy = MatDestroy_MPIAIJSpooles;
208: (*F)->assembled = PETSC_TRUE;
210: /* to be used by MatSolve() */
211: lu->mtxY = DenseMtx_new();
212: lu->mtxX = DenseMtx_new();
213: lu->scat = PETSC_NULL;
215: IVzero(20, lu->stats);
216: DVzero(20, lu->cpus);
218: lu->mtxA = InpMtx_new();
219: }
220:
221: /* copy A to Spooles' InpMtx object */
222: if ( lu->options.symflag == SPOOLES_NONSYMMETRIC ) {
223: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
224: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)(mat->A)->data;
225: Mat_SeqAIJ *bb=(Mat_SeqAIJ*)(mat->B)->data;
226: ai=aa->i; aj=aa->j; av=aa->a;
227: bi=bb->i; bj=bb->j; bv=bb->a;
228: lu->rstart = mat->rstart;
229: nz = aa->nz + bb->nz;
230: garray = mat->garray;
231: } else { /* SPOOLES_SYMMETRIC */
232: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
233: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
234: Mat_SeqBAIJ *bb=(Mat_SeqBAIJ*)(mat->B)->data;
235: ai=aa->i; aj=aa->j; av=aa->a;
236: bi=bb->i; bj=bb->j; bv=bb->a;
237: lu->rstart = mat->rstart;
238: nz = aa->nz + bb->nz;
239: garray = mat->garray;
240: }
241:
242: InpMtx_init(lu->mtxA, INPMTX_BY_ROWS, lu->options.typeflag, nz, 0);
243: row = InpMtx_ivec1(lu->mtxA);
244: col = InpMtx_ivec2(lu->mtxA);
245: #if !defined(PETSC_USE_COMPLEX)
246: val = InpMtx_dvec(lu->mtxA);
247: #endif
249: jj = 0; irow = lu->rstart;
250: for ( i=0; i<m; i++ ) {
251: ajj = aj + ai[i]; /* ptr to the beginning of this row */
252: countA = ai[i+1] - ai[i];
253: countB = bi[i+1] - bi[i];
254: bjj = bj + bi[i];
255: jB = 0;
256:
257: if (lu->options.symflag == SPOOLES_NONSYMMETRIC ){
258: /* B part, smaller col index */
259: colA_start = lu->rstart + ajj[0]; /* the smallest col index for A */
260: for (j=0; j<countB; j++){
261: jcol = garray[bjj[j]];
262: if (jcol > colA_start) {
263: jB = j;
264: break;
265: }
266: row[jj] = irow; col[jj] = jcol;
267: #if !defined(PETSC_USE_COMPLEX)
268: val[jj++] = *bv++;
269: #else
270: InpMtx_inputComplexEntry(lu->mtxA,irow,jcol,PetscRealPart(*bv),PetscImaginaryPart(*bv));
271: bv++; jj++;
272: #endif
273: if (j==countB-1) jB = countB;
274: }
275: }
276: /* A part */
277: for (j=0; j<countA; j++){
278: row[jj] = irow; col[jj] = lu->rstart + ajj[j];
279: #if !defined(PETSC_USE_COMPLEX)
280: val[jj++] = *av++;
281: #else
282: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*av),PetscImaginaryPart(*av));
283: av++; jj++;
284: #endif
285: }
286: /* B part, larger col index */
287: for (j=jB; j<countB; j++){
288: row[jj] = irow; col[jj] = garray[bjj[j]];
289: #if !defined(PETSC_USE_COMPLEX)
290: val[jj++] = *bv++;
291: #else
292: InpMtx_inputComplexEntry(lu->mtxA,irow,col[jj],PetscRealPart(*bv),PetscImaginaryPart(*bv));
293: bv++; jj++;
294: #endif
295: }
296: irow++;
297: }
298: #if !defined(PETSC_USE_COMPLEX)
299: InpMtx_inputRealTriples(lu->mtxA, nz, row, col, val);
300: #endif
301: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
302: if ( lu->options.msglvl > 0 ) {
303: printf("[%d] input matrix\n",rank);
304: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n [%d] input matrix\n",rank);
305: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
306: fflush(lu->options.msgFile);
307: }
309: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
310: /*
311: find a low-fill ordering
312: (1) create the Graph object
313: (2) order the graph using multiple minimum degree
314: (3) find out who has the best ordering w.r.t. op count,
315: and broadcast that front tree object
316: */
317: graph = Graph_new();
318: adjIVL = InpMtx_MPI_fullAdjacency(lu->mtxA, lu->stats,
319: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
320: nedges = IVL_tsize(adjIVL);
321: Graph_init2(graph, 0, M, 0, nedges, M, nedges, adjIVL, NULL, NULL);
322: if ( lu->options.msglvl > 2 ) {
323: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n graph of the input matrix");
324: Graph_writeForHumanEye(graph, lu->options.msgFile);
325: fflush(lu->options.msgFile);
326: }
328: switch (lu->options.ordering) {
329: case 0:
330: lu->frontETree = orderViaBestOfNDandMS(graph,
331: lu->options.maxdomainsize, lu->options.maxzeros, lu->options.maxsize,
332: lu->options.seed + rank, lu->options.msglvl, lu->options.msgFile); break;
333: case 1:
334: lu->frontETree = orderViaMMD(graph,lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
335: case 2:
336: lu->frontETree = orderViaMS(graph, lu->options.maxdomainsize,
337: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
338: case 3:
339: lu->frontETree = orderViaND(graph, lu->options.maxdomainsize,
340: lu->options.seed + rank,lu->options.msglvl,lu->options.msgFile); break;
341: default:
342: SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown Spooles's ordering");
343: }
345: Graph_free(graph);
346: if ( lu->options.msglvl > 2 ) {
347: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n front tree from ordering");
348: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
349: fflush(lu->options.msgFile);
350: }
352: opcounts = DVinit(size, 0.0);
353: opcounts[rank] = ETree_nFactorOps(lu->frontETree, lu->options.typeflag, lu->options.symflag);
354: MPI_Allgather((void*) &opcounts[rank], 1, MPI_DOUBLE,
355: (void*) opcounts, 1, MPI_DOUBLE, A->comm);
356: minops = DVmin(size, opcounts, &root);
357: DVfree(opcounts);
358:
359: lu->frontETree = ETree_MPI_Bcast(lu->frontETree, root,
360: lu->options.msglvl, lu->options.msgFile, lu->comm_spooles);
361: if ( lu->options.msglvl > 2 ) {
362: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n best front tree");
363: ETree_writeForHumanEye(lu->frontETree, lu->options.msgFile);
364: fflush(lu->options.msgFile);
365: }
366:
367: /* get the permutations, permute the front tree, permute the matrix */
368: lu->oldToNewIV = ETree_oldToNewVtxPerm(lu->frontETree);
369: lu->newToOldIV = ETree_newToOldVtxPerm(lu->frontETree);
371: ETree_permuteVertices(lu->frontETree, lu->oldToNewIV);
373: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
374:
375: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
377: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
378: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
380: /* generate the owners map IV object and the map from vertices to owners */
381: cutoff = 1./(2*size);
382: cumopsDV = DV_new();
383: DV_init(cumopsDV, size, NULL);
384: lu->ownersIV = ETree_ddMap(lu->frontETree,
385: lu->options.typeflag, lu->options.symflag, cumopsDV, cutoff);
386: DV_free(cumopsDV);
387: lu->vtxmapIV = IV_new();
388: IV_init(lu->vtxmapIV, M, NULL);
389: IVgather(M, IV_entries(lu->vtxmapIV),
390: IV_entries(lu->ownersIV), ETree_vtxToFront(lu->frontETree));
391: if ( lu->options.msglvl > 2 ) {
392: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from fronts to owning processes");
393: IV_writeForHumanEye(lu->ownersIV, lu->options.msgFile);
394: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n map from vertices to owning processes");
395: IV_writeForHumanEye(lu->vtxmapIV, lu->options.msgFile);
396: fflush(lu->options.msgFile);
397: }
399: /* redistribute the matrix */
400: lu->firsttag = 0 ;
401: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
402: lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
403: lu->firsttag += size ;
405: InpMtx_free(lu->mtxA);
406: lu->mtxA = newA ;
407: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
408: if ( lu->options.msglvl > 2 ) {
409: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
410: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
411: fflush(lu->options.msgFile);
412: }
413:
414: /* compute the symbolic factorization */
415: lu->symbfacIVL = SymbFac_MPI_initFromInpMtx(lu->frontETree, lu->ownersIV, lu->mtxA,
416: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
417: lu->firsttag += lu->frontETree->nfront ;
418: if ( lu->options.msglvl > 2 ) {
419: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n local symbolic factorization");
420: IVL_writeForHumanEye(lu->symbfacIVL, lu->options.msgFile);
421: fflush(lu->options.msgFile);
422: }
424: lu->mtxmanager = SubMtxManager_new();
425: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
426: lu->frontmtx = FrontMtx_new();
428: } else { /* new num factorization using previously computed symbolic factor */
429: if (lu->options.pivotingflag) { /* different FrontMtx is required */
430: FrontMtx_free(lu->frontmtx);
431: lu->frontmtx = FrontMtx_new();
432: }
434: SubMtxManager_free(lu->mtxmanager);
435: lu->mtxmanager = SubMtxManager_new();
436: SubMtxManager_init(lu->mtxmanager, NO_LOCK, 0);
438: /* permute mtxA */
439: InpMtx_permute(lu->mtxA, IV_entries(lu->oldToNewIV), IV_entries(lu->oldToNewIV));
440: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) InpMtx_mapToUpperTriangle(lu->mtxA);
441:
442: InpMtx_changeCoordType(lu->mtxA, INPMTX_BY_CHEVRONS);
443: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
445: /* redistribute the matrix */
446: MPI_Barrier(A->comm);
447: lu->firsttag = 0;
448: newA = InpMtx_MPI_split(lu->mtxA, lu->vtxmapIV, lu->stats,
449: lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
450: lu->firsttag += size ;
452: InpMtx_free(lu->mtxA);
453: lu->mtxA = newA ;
454: InpMtx_changeStorageMode(lu->mtxA, INPMTX_BY_VECTORS);
455: if ( lu->options.msglvl > 2 ) {
456: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n split InpMtx");
457: InpMtx_writeForHumanEye(lu->mtxA, lu->options.msgFile);
458: fflush(lu->options.msgFile);
459: }
460: } /* end of if ( lu->flg == DIFFERENT_NONZERO_PATTERN) */
462: FrontMtx_init(lu->frontmtx, lu->frontETree, lu->symbfacIVL, lu->options.typeflag, lu->options.symflag,
463: FRONTMTX_DENSE_FRONTS, lu->options.pivotingflag, NO_LOCK, rank,
464: lu->ownersIV, lu->mtxmanager, lu->options.msglvl, lu->options.msgFile);
466: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
467: if ( lu->options.patchAndGoFlag == 1 ) {
468: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
469: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 1, lu->options.toosmall, lu->options.fudge,
470: lu->options.storeids, lu->options.storevalues);
471: } else if ( lu->options.patchAndGoFlag == 2 ) {
472: lu->frontmtx->patchinfo = PatchAndGoInfo_new();
473: PatchAndGoInfo_init(lu->frontmtx->patchinfo, 2, lu->options.toosmall, lu->options.fudge,
474: lu->options.storeids, lu->options.storevalues);
475: }
476: }
478: /* numerical factorization */
479: chvmanager = ChvManager_new();
480: ChvManager_init(chvmanager, NO_LOCK, 0);
482: tagbound = maxTagMPI(lu->comm_spooles);
483: lasttag = lu->firsttag + 3*lu->frontETree->nfront + 2;
484: /* if(!rank) PetscPrintf(PETSC_COMM_SELF,"\n firsttag: %d, nfront: %d\n",lu->firsttag, lu->frontETree->nfront);*/
485: if ( lasttag > tagbound ) {
486: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_factorInpMtx(), tag range is [%d,%d], tag_bound = %d",\
487: lu->firsttag, lasttag, tagbound);
488: }
489: rootchv = FrontMtx_MPI_factorInpMtx(lu->frontmtx, lu->mtxA, lu->options.tau, droptol,
490: chvmanager, lu->ownersIV, lookahead, &sierr, lu->cpus,
491: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag,lu->comm_spooles);
492: ChvManager_free(chvmanager);
493: lu->firsttag = lasttag;
494: if ( lu->options.msglvl > 2 ) {
495: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization");
496: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
497: fflush(lu->options.msgFile);
498: }
500: if ( lu->options.symflag == SPOOLES_SYMMETRIC ) {
501: if ( lu->options.patchAndGoFlag == 1 ) {
502: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
503: if (lu->options.msglvl > 0 ){
504: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
505: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
506: }
507: }
508: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
509: } else if ( lu->options.patchAndGoFlag == 2 ) {
510: if (lu->options.msglvl > 0 ){
511: if ( lu->frontmtx->patchinfo->fudgeIV != NULL ) {
512: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n small pivots found at these locations");
513: IV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeIV, lu->options.msgFile);
514: }
515: if ( lu->frontmtx->patchinfo->fudgeDV != NULL ) {
516: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n perturbations");
517: DV_writeForHumanEye(lu->frontmtx->patchinfo->fudgeDV, lu->options.msgFile);
518: }
519: }
520: PatchAndGoInfo_free(lu->frontmtx->patchinfo);
521: }
522: }
523: if ( sierr >= 0 ) SETERRQ2(PETSC_ERR_LIB,"\n proc %d : factorization error at front %d", rank, sierr);
524:
525: /* post-process the factorization and split
526: the factor matrices into submatrices */
527: lasttag = lu->firsttag + 5*size;
528: if ( lasttag > tagbound ) {
529: SETERRQ3(PETSC_ERR_LIB,"fatal error in FrontMtx_MPI_postProcess(), tag range is [%d,%d], tag_bound = %d",\
530: lu->firsttag, lasttag, tagbound);
531: }
532: FrontMtx_MPI_postProcess(lu->frontmtx, lu->ownersIV, lu->stats, lu->options.msglvl,
533: lu->options.msgFile, lu->firsttag, lu->comm_spooles);
534: lu->firsttag += 5*size ;
535: if ( lu->options.msglvl > 2 ) {
536: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after post-processing");
537: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
538: fflush(lu->options.msgFile);
539: }
540:
541: /* create the solve map object */
542: lu->solvemap = SolveMap_new();
543: SolveMap_ddMap(lu->solvemap, lu->frontmtx->symmetryflag,
544: FrontMtx_upperBlockIVL(lu->frontmtx),
545: FrontMtx_lowerBlockIVL(lu->frontmtx),
546: size, lu->ownersIV, FrontMtx_frontTree(lu->frontmtx),
547: lu->options.seed, lu->options.msglvl, lu->options.msgFile);
548: if ( lu->options.msglvl > 2 ) {
549: SolveMap_writeForHumanEye(lu->solvemap, lu->options.msgFile);
550: fflush(lu->options.msgFile);
551: }
553: /* redistribute the submatrices of the factors */
554: FrontMtx_MPI_split(lu->frontmtx, lu->solvemap,
555: lu->stats, lu->options.msglvl, lu->options.msgFile, lu->firsttag, lu->comm_spooles);
556: if ( lu->options.msglvl > 2 ) {
557: PetscFPrintf(PETSC_COMM_SELF,lu->options.msgFile, "\n\n numeric factorization after split");
558: FrontMtx_writeForHumanEye(lu->frontmtx, lu->options.msgFile);
559: fflush(lu->options.msgFile);
560: }
562: /* create a solution DenseMtx object */
563: lu->ownedColumnsIV = FrontMtx_ownedColumnsIV(lu->frontmtx, rank, lu->ownersIV,
564: lu->options.msglvl, lu->options.msgFile);
565: lu->nmycol = IV_size(lu->ownedColumnsIV);
566: if ( lu->nmycol > 0) {
567: DenseMtx_init(lu->mtxX, lu->options.typeflag, 0, 0, lu->nmycol, 1, 1, lu->nmycol);
568: /* get pointers rowindX and entX */
569: DenseMtx_rowIndices(lu->mtxX, &lu->nmycol, &lu->rowindX);
570: lu->entX = DenseMtx_entries(lu->mtxX);
571: } else { /* lu->nmycol == 0 */
572: lu->entX = 0;
573: lu->rowindX = 0;
574: }
576: if ( lu->scat ){
577: VecDestroy(lu->vec_spooles);
578: ISDestroy(lu->iden);
579: ISDestroy(lu->is_petsc);
580: VecScatterDestroy(lu->scat);
581: }
582: lu->scat = PETSC_NULL;
583: lu->flg = SAME_NONZERO_PATTERN;
585: lu->CleanUpSpooles = PETSC_TRUE;
586: return(0);
587: }
592: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJSpooles(Mat A,MatType type,MatReuse reuse,Mat *newmat)
593: {
594: /* This routine is only called to convert a MATMPIAIJ matrix */
595: /* to a MATMPIAIJSPOOLES matrix, so we will ignore 'MatType type'. */
597: Mat B=*newmat;
598: Mat_Spooles *lu;
601: if (reuse == MAT_INITIAL_MATRIX) {
602: /* This routine is inherited, so we know the type is correct. */
603: MatDuplicate(A,MAT_COPY_VALUES,&B);
604: }
606: PetscNew(Mat_Spooles,&lu);
607: B->spptr = (void*)lu;
609: lu->basetype = MATMPIAIJ;
610: lu->CleanUpSpooles = PETSC_FALSE;
611: lu->MatDuplicate = A->ops->duplicate;
612: lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic;
613: lu->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
614: lu->MatView = A->ops->view;
615: lu->MatAssemblyEnd = A->ops->assemblyend;
616: lu->MatDestroy = A->ops->destroy;
618: B->ops->duplicate = MatDuplicate_Spooles;
619: B->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJSpooles;
620: B->ops->view = MatView_SeqAIJSpooles;
621: B->ops->assemblyend = MatAssemblyEnd_MPIAIJSpooles;
622: B->ops->destroy = MatDestroy_MPIAIJSpooles;
624: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaijspooles_mpiaij_C",
625: "MatConvert_Spooles_Base",MatConvert_Spooles_Base);
626: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijspooles_C",
627: "MatConvert_MPIAIJ_MPIAIJSpooles",MatConvert_MPIAIJ_MPIAIJSpooles);
628: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJSPOOLES);
629: *newmat = B;
630: return(0);
631: }
634: /*MC
635: MATMPIAIJSPOOLES - MATMPIAIJSPOOLES = "mpiaijspooles" - A matrix type providing direct solvers (LU) for distributed matrices
636: via the external package Spooles.
638: If MPIAIJSPOOLES is installed (see the manual for
639: instructions on how to declare the existence of external packages),
640: a matrix type can be constructed which invokes SPOOLES solvers.
641: After calling MatCreate(...,A), simply call MatSetType(A,MATMPIAIJSPOOLES).
643: This matrix inherits from MATMPIAIJ. As a result, MatMPIAIJSetPreallocation is
644: supported for this matrix type. One can also call MatConvert for an inplace conversion to or from
645: the MATMPIAIJ type without data copy.
647: Consult Spooles documentation for more information about the options database keys below.
649: Options Database Keys:
650: + -mat_type mpiaijspooles - sets the matrix type to "mpiaijspooles" during a call to MatSetFromOptions()
651: . -mat_spooles_tau <tau> - upper bound on the magnitude of the largest element in L or U
652: . -mat_spooles_seed <seed> - random number seed used for ordering
653: . -mat_spooles_msglvl <msglvl> - message output level
654: . -mat_spooles_ordering <BestOfNDandMS,MMD,MS,ND> - ordering used
655: . -mat_spooles_maxdomainsize <n> - maximum subgraph size used by Spooles orderings
656: . -mat_spooles_maxzeros <n> - maximum number of zeros inside a supernode
657: . -mat_spooles_maxsize <n> - maximum size of a supernode
658: . -mat_spooles_FrontMtxInfo <true,fase> - print Spooles information about the computed factorization
659: . -mat_spooles_symmetryflag <0,1,2> - 0: SPOOLES_SYMMETRIC, 1: SPOOLES_HERMITIAN, 2: SPOOLES_NONSYMMETRIC
660: . -mat_spooles_patchAndGoFlag <0,1,2> - 0: no patch, 1: use PatchAndGo strategy 1, 2: use PatchAndGo strategy 2
661: . -mat_spooles_toosmall <dt> - drop tolerance for PatchAndGo strategy 1
662: . -mat_spooles_storeids <bool integer> - if nonzero, stores row and col numbers where patches were applied in an IV object
663: . -mat_spooles_fudge <delta> - fudge factor for rescaling diagonals with PatchAndGo strategy 2
664: - -mat_spooles_storevalues <bool integer> - if nonzero and PatchAndGo strategy 2 is used, store change in diagonal value in a DV object
666: Level: beginner
668: .seealso: PCLU
669: M*/
674: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJSpooles(Mat A)
675: {
677: Mat A_diag;
680: /* Change type name before calling MatSetType to force proper construction of MPIAIJ and MPIAIJSpooles types */
681: PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJSPOOLES);
682: MatSetType(A,MATMPIAIJ);
683: A_diag = ((Mat_MPIAIJ *)A->data)->A;
684: MatConvert_SeqAIJ_SeqAIJSpooles(A_diag,MATSEQAIJSPOOLES,MAT_REUSE_MATRIX,&A_diag);
685: MatConvert_MPIAIJ_MPIAIJSpooles(A,MATMPIAIJSPOOLES,MAT_REUSE_MATRIX,&A);
686: return(0);
687: }