Actual source code: superlu_dist.c
1: /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the SuperLU_DIST sparse solver
4: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/mat/impls/aij/mpi/mpiaij.h
8: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9: #include "stdlib.h"
10: #endif
12: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
14: EXTERN_C_BEGIN
15: #include "superlu_ddefs.h"
16: EXTERN_C_END
18: typedef struct {
19: int nprow,npcol;
20: gridinfo_t grid;
21: superlu_options_t options;
22: SuperMatrix A_sup;
23: ScalePermstruct_t ScalePermstruct;
24: LUstruct_t LUstruct;
25: int StatPrint;
26: } Mat_MPIAIJ_SuperLU_DIST;
28: extern int MatDestroy_MPIAIJ(Mat);
29: extern int MatDestroy_SeqAIJ(Mat);
31: #if !defined(PETSC_HAVE_SUPERLU)
32: /* SuperLU function: Convert a row compressed storage into a column compressed storage. */
33: void dCompRow_to_CompCol(int m, int n, int nnz,
34: double *a, int *colind, int *rowptr,
35: double **at, int_t **rowind, int_t **colptr)
36: {
37: int i, j, col, relpos, *marker;
39: /* Allocate storage for another copy of the matrix. */
40: *at = (double *) doubleMalloc_dist(nnz);
41: *rowind = (int *) intMalloc_dist(nnz);
42: *colptr = (int *) intMalloc_dist(n+1);
43: marker = (int *) intCalloc_dist(n);
45: /* Get counts of each column of A, and set up column pointers */
46: for (i = 0; i < m; ++i)
47: for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
48: (*colptr)[0] = 0;
49: for (j = 0; j < n; ++j) {
50: (*colptr)[j+1] = (*colptr)[j] + marker[j];
51: marker[j] = (*colptr)[j];
52: }
54: /* Transfer the matrix into the compressed column storage. */
55: for (i = 0; i < m; ++i) {
56: for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
57: col = colind[j];
58: relpos = marker[col];
59: (*rowind)[relpos] = i;
60: (*at)[relpos] = a[j];
61: ++marker[col];
62: }
63: }
65: SUPERLU_FREE(marker);
66: }
67: #else
68: EXTERN_C_BEGIN
69: extern void dCompRow_to_CompCol(int,int,int,double*,int*,int*,double**,int_t**,int_t**);
70: EXTERN_C_END
71: #endif /* PETSC_HAVE_SUPERLU*/
73: int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A)
74: {
75: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
76: Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
77: int ierr, size=a->size;
78:
80: /* Deallocate SuperLU_DIST storage */
81: Destroy_CompCol_Matrix_dist(&lu->A_sup);
82: Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
83: ScalePermstructFree(&lu->ScalePermstruct);
84: LUstructFree(&lu->LUstruct);
86: /* Release the SuperLU_DIST process grid. */
87: superlu_gridexit(&lu->grid);
89: PetscFree(lu);
90:
91: if (size == 1){
92: MatDestroy_SeqAIJ(A);
93: } else {
94: MatDestroy_MPIAIJ(A);
95: }
96:
97: return(0);
98: }
100: int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
101: {
102: Mat_MPIAIJ *aa = (Mat_MPIAIJ*)A->data;
103: Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
104: int ierr, size=aa->size;
105: int m=A->M, N=A->N;
106: superlu_options_t options=lu->options;
107: SuperLUStat_t stat;
108: double berr[1],*bptr;
109: int info, nrhs=1;
110: Vec x_seq;
111: IS iden;
112: VecScatter scat;
113: PetscLogDouble time0,time,time_min,time_max;
114:
116: if (size > 1) { /* convert mpi vector b to seq vector x_seq */
117: VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
118: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
119: VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
120: ISDestroy(iden);
122: VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
123: VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
124: VecGetArray(x_seq,&bptr);
125: } else {
126: VecCopy(b_mpi,x);
127: VecGetArray(x,&bptr);
128: }
129:
130: options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
132: PStatInit(&stat); /* Initialize the statistics variables. */
133: if (lu->StatPrint) {
134: MPI_Barrier(A->comm); /* to be removed */
135: PetscGetTime(&time0); /* to be removed */
136: }
137: pdgssvx_ABglobal(&options, &lu->A_sup, &lu->ScalePermstruct, bptr, m, nrhs,
138: &lu->grid, &lu->LUstruct, berr, &stat, &info);
139: if (lu->StatPrint) {
140: PetscGetTime(&time); /* to be removed */
141: PStatPrint(&stat, &lu->grid); /* Print the statistics. */
142: }
143: PStatFree(&stat);
144:
145: if (size > 1) { /* convert seq x to mpi x */
146: VecRestoreArray(x_seq,&bptr);
147: VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
148: VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
149: VecScatterDestroy(scat);
150: VecDestroy(x_seq);
151: } else {
152: VecRestoreArray(x,&bptr);
153: }
154: if (lu->StatPrint) {
155: time0 = time - time0;
156: MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
157: MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
158: MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
159: time = time/size; /* average time */
160: PetscPrintf(A->comm, " Time for superlu_dist solve (max/min/avg): %g / %g / %gnn",time_max,time_min,time);
161: }
163: return(0);
164: }
166: int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F)
167: {
168: Mat_MPIAIJ *fac = (Mat_MPIAIJ*)(*F)->data;
169: Mat *tseq,A_seq = PETSC_NULL;
170: Mat_SeqAIJ *aa;
171: Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr;
172: int M=A->M,N=A->N,info,ierr,size=fac->size,i;
173: SuperLUStat_t stat;
174: double *berr=0, *bptr=0;
175: int_t *asub, *xa;
176: double *a;
177: SuperMatrix A_sup;
178: IS isrow;
179: PetscLogDouble time0[2],time[2],time_min[2],time_max[2];
182: if (lu->StatPrint) {
183: MPI_Barrier(A->comm);
184: PetscGetTime(&time0[0]);
185: }
187: if (size > 1) { /* convert mpi A to seq mat A */
188: ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
189: MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
190: ISDestroy(isrow);
191:
192: A_seq = *tseq;
193: PetscFree(tseq);
194: aa = (Mat_SeqAIJ*)A_seq->data;
195: } else {
196: aa = (Mat_SeqAIJ*)A->data;
197: }
199: /* Allocate storage for compressed column representation. */
200: dallocateA_dist(N, aa->nz, &a, &asub, &xa);
201:
202: /* Convert Petsc NR matrix storage to SuperLU_DIST NC storage */
203: dCompRow_to_CompCol(M,N,aa->nz,aa->a,aa->j,aa->i,&a, &asub, &xa);
205: if (lu->StatPrint) {
206: PetscGetTime(&time[0]);
207: time0[0] = time[0] - time0[0];
208: }
210: /* Create compressed column matrix A_sup. */
211: dCreate_CompCol_Matrix_dist(&A_sup, M, N, aa->nz, a, asub, xa, NC, D, GE);
213: /* Factor the matrix. */
214: PStatInit(&stat); /* Initialize the statistics variables. */
216: if (lu->StatPrint) {
217: MPI_Barrier(A->comm);
218: PetscGetTime(&time0[1]);
219: }
220: pdgssvx_ABglobal(&lu->options, &A_sup, &lu->ScalePermstruct, bptr, M, 0,
221: &lu->grid, &lu->LUstruct, berr, &stat, &info);
222: if (lu->StatPrint) {
223: PetscGetTime(&time[1]); /* to be removed */
224: time0[1] = time[1] - time0[1];
225: if (lu->StatPrint) PStatPrint(&stat, &lu->grid); /* Print the statistics. */
226: }
227: PStatFree(&stat);
229: lu->A_sup = A_sup;
230: lu->options.Fact = SamePattern; /* Sparsity pattern of A and perm_c can be reused. */
231: if (size > 1){
232: MatDestroy(A_seq);
233: }
235: if (lu->StatPrint) {
236: MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
237: MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
238: MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
239: for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
240: PetscPrintf(A->comm, " Time for mat conversion (max/min/avg): %g / %g / %gn",time_max[0],time_min[0],time[0]);
241: PetscPrintf(A->comm, " Time for superlu_dist fact (max/min/avg): %g / %g / %gnn",time_max[1],time_min[1],time[1]);
242: }
243: (*F)->assembled = PETSC_TRUE;
244: return(0);
245: }
247: /* Note the Petsc r and c permutations are ignored */
248: int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
249: {
250: Mat_MPIAIJ *fac;
251: Mat_MPIAIJ_SuperLU_DIST *lu;
252: int ierr,M=A->M,N=A->N,size;
253: gridinfo_t grid;
254: superlu_options_t options;
255: ScalePermstruct_t ScalePermstruct;
256: LUstruct_t LUstruct;
257: char buff[32];
258: PetscTruth flg;
259: char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
260: char *prtype[] = {"LargeDiag","NATURAL"};
262:
263: PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);
265: /* Create the factorization matrix F */
266: MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,M,N,0,PETSC_NULL,0,PETSC_NULL,F);
268: (*F)->ops->lufactornumeric = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
269: (*F)->ops->solve = MatSolve_MPIAIJ_SuperLU_DIST;
270: (*F)->ops->destroy = MatDestroy_MPIAIJ_SuperLU_DIST;
271: (*F)->factor = FACTOR_LU;
272: (*F)->spptr = (void*)lu;
273: fac = (Mat_MPIAIJ*)(*F)->data;
275: /* Set the input options */
276: set_default_options(&options);
278: MPI_Comm_size(A->comm,&size);
279: lu->nprow = size/2; /* Default process rows. */
280: if (lu->nprow == 0) lu->nprow = 1;
281: lu->npcol = size/lu->nprow; /* Default process columns. */
283: PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");
284:
285: PetscOptionsInt("-mat_aij_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
286: PetscOptionsInt("-mat_aij_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
287: if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
288:
289: PetscOptionsLogical("-mat_aij_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
290: if (!flg) {
291: options.Equil = NO;
292: }
294: PetscOptionsEList("-mat_aij_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);
295: while (flg) {
296: PetscStrcmp(buff,"LargeDiag",&flg);
297: if (flg) {
298: options.RowPerm = LargeDiag;
299: break;
300: }
301: PetscStrcmp(buff,"NATURAL",&flg);
302: if (flg) {
303: options.RowPerm = NOROWPERM;
304: break;
305: }
306: SETERRQ1(1,"Unknown row permutation %s",buff);
307: }
309: PetscOptionsEList("-mat_aij_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);
310: while (flg) {
311: PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);
312: if (flg) {
313: options.ColPerm = MMD_AT_PLUS_A;
314: break;
315: }
316: PetscStrcmp(buff,"NATURAL",&flg);
317: if (flg) {
318: options.ColPerm = NATURAL;
319: break;
320: }
321: PetscStrcmp(buff,"MMD_ATA",&flg);
322: if (flg) {
323: options.ColPerm = MMD_ATA;
324: break;
325: }
326: PetscStrcmp(buff,"COLAMD",&flg);
327: if (flg) {
328: options.ColPerm = COLAMD;
329: break;
330: }
331: SETERRQ1(1,"Unknown column permutation %s",buff);
332: }
334: PetscOptionsLogical("-mat_aij_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
335: if (!flg) {
336: options.ReplaceTinyPivot = NO;
337: }
339: options.IterRefine = NOREFINE;
340: PetscOptionsLogical("-mat_aij_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
341: if (flg) {
342: options.IterRefine = DOUBLE;
343: }
345: if (PetscLogPrintInfo) {
346: lu->StatPrint = (int)PETSC_TRUE;
347: } else {
348: lu->StatPrint = (int)PETSC_FALSE;
349: }
350: PetscOptionsLogical("-mat_aij_superlu_dist_statprint","Print factorization information","None",
351: (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);
352: PetscOptionsEnd();
354: /* Initialize the SuperLU process grid. */
355: superlu_gridinit(A->comm, lu->nprow, lu->npcol, &grid);
357: /* Initialize ScalePermstruct and LUstruct. */
358: ScalePermstructInit(M, N, &ScalePermstruct);
359: LUstructInit(M, N, &LUstruct);
361: lu->ScalePermstruct = ScalePermstruct;
362: lu->LUstruct = LUstruct;
363: lu->options = options;
364: lu->grid = grid;
365: fac->size = size;
367: return(0);
368: }
370: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
371: {
373: A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
374: A->ops->lufactornumeric = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
375: return(0);
376: }
378: int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer)
379: {
380: Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
381: superlu_options_t options;
382: int ierr;
383: char *colperm;
386: /* check if matrix is superlu_dist type */
387: if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) return(0);
389: options = lu->options;
390: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:n");
391: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s n",(options.Equil != NO) ? "true": "false");
392: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s n",(options.ReplaceTinyPivot != NO) ? "true": "false");
393: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s n",(options.IterRefine == DOUBLE) ? "true": "false");
394: PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d n",lu->nprow,lu->npcol);
395: PetscViewerASCIIPrintf(viewer," Row permutation %s n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
396: if (options.ColPerm == NATURAL) {
397: colperm = "NATURAL";
398: } else if (options.ColPerm == MMD_AT_PLUS_A) {
399: colperm = "MMD_AT_PLUS_A";
400: } else if (options.ColPerm == MMD_ATA) {
401: colperm = "MMD_ATA";
402: } else if (options.ColPerm == COLAMD) {
403: colperm = "COLAMD";
404: } else {
405: SETERRQ(1,"Unknown column permutation");
406: }
407: PetscViewerASCIIPrintf(viewer," Column permutation %s n",colperm);
408: return(0);
409: }
411: #else
413: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
414: {
416: return(0);
417: }
419: #endif