Actual source code: superlu.c
1: /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
3: /*
4: Provides an interface to the SuperLU sparse solver
5: Modified for SuperLU 2.0 by Matthew Knepley
7: */
9: #include src/mat/impls/aij/seq/aij.h
11: #if defined(PETSC_HAVE_SUPERLU) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
12: EXTERN_C_BEGIN
13: #include "dsp_defs.h"
14: #include "util.h"
15: EXTERN_C_END
18: typedef struct {
19: SuperMatrix A;
20: SuperMatrix B;
21: SuperMatrix AC;
22: SuperMatrix L;
23: SuperMatrix U;
24: int *perm_r;
25: int *perm_c;
26: int relax;
27: int panel_size;
28: double pivot_threshold;
29: NCformat *store;
30: MatStructure flg;
31: } Mat_SeqAIJ_SuperLU;
34: extern int MatDestroy_SeqAIJ(Mat);
36: int MatDestroy_SeqAIJ_SuperLU(Mat A)
37: {
38: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
39: int ierr;
42: if (--A->refct > 0)return(0);
43: /* We have to free the global data or SuperLU crashes (sucky design)*/
44: /* Since we don't know if more solves on other matrices may be done
45: we cannot free the yucky SuperLU global data
46: StatFree();
47: */
49: /* Free the SuperLU datastructures */
50: Destroy_CompCol_Permuted(&lu->AC);
51: Destroy_SuperNode_Matrix(&lu->L);
52: Destroy_CompCol_Matrix(&lu->U);
53: PetscFree(lu->B.Store);
54: PetscFree(lu->A.Store);
55: PetscFree(lu->perm_r);
56: PetscFree(lu->perm_c);
57: PetscFree(lu);
58: MatDestroy_SeqAIJ(A);
59: return(0);
60: }
62: #include src/mat/impls/dense/seq/dense.h
63: int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat)
64: {
65: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
66: int numRows = A->m;
67: int numCols = A->n;
68: SCformat *Lstore;
69: int numNullCols,size;
70: PetscScalar *nullVals,*workVals;
71: int row,newRow,col,newCol,block,b;
72: int ierr;
77: if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix");
78: numNullCols = numCols - numRows;
79: if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems");
80: /* Create the null matrix */
81: MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);
82: if (numNullCols == 0) {
83: MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
84: MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
85: return(0);
86: }
87: nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v;
88: /* Copy in the columns */
89: Lstore = (SCformat*)lu->L.Store;
90: for(block = 0; block <= Lstore->nsuper; block++) {
91: newRow = Lstore->sup_to_col[block];
92: size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block];
93: for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) {
94: newCol = Lstore->rowind[col];
95: if (newCol >= numRows) {
96: for(b = 0; b < size; b++)
97: nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
98: }
99: }
100: }
101: /* Permute rhs to form P^T_c B */
102: PetscMalloc(numRows*sizeof(double),&workVals);
103: for(b = 0; b < numNullCols; b++) {
104: for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row];
105: for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row];
106: }
107: /* Backward solve the upper triangle A x = b */
108: for(b = 0; b < numNullCols; b++) {
109: sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
110: if (ierr < 0)
111: SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr);
112: }
113: PetscFree(workVals);
115: MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
116: MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
117: return(0);
118: }
120: int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x)
121: {
122: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
123: PetscScalar *array;
124: int m;
125: int ierr;
128: VecGetLocalSize(b,&m);
129: VecCopy(b,x);
130: VecGetArray(x,&array);
131: /* Create the Rhs */
132: lu->B.Stype = DN;
133: lu->B.Dtype = _D;
134: lu->B.Mtype = GE;
135: lu->B.nrow = m;
136: lu->B.ncol = 1;
137: ((DNformat*)lu->B.Store)->lda = m;
138: ((DNformat*)lu->B.Store)->nzval = array;
139: dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
140: if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
141: VecRestoreArray(x,&array);
142: return(0);
143: }
145: static int StatInitCalled = 0;
147: int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F)
148: {
149: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data;
150: Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr;
151: int *etree,i,ierr;
154: /* Create the SuperMatrix for A^T:
156: Since SuperLU only likes column-oriented matrices,we pass it the transpose,
157: and then solve A^T X = B in MatSolve().
158: */
159: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */
160: lu->A.Stype = NC;
161: lu->A.Dtype = _D;
162: lu->A.Mtype = GE;
163: lu->A.nrow = A->n;
164: lu->A.ncol = A->m;
165:
166: PetscMalloc(sizeof(NCformat),&lu->store);
167: PetscMalloc(sizeof(DNformat),&lu->B.Store);
168: }
169: lu->store->nnz = aa->nz;
170: lu->store->colptr = aa->i;
171: lu->store->rowind = aa->j;
172: lu->store->nzval = aa->a;
173: lu->A.Store = lu->store;
174:
175: /* Shift indices down */
176: if (aa->indexshift) {
177: for(i = 0; i < A->m+1; i++) aa->i[i]--;
178: for(i = 0; i < aa->nz; i++) aa->j[i]--;
179: }
180:
181: /* Set SuperLU options */
182: lu->relax = sp_ienv(2);
183: lu->panel_size = sp_ienv(1);
184: /* We have to initialize global data or SuperLU crashes (sucky design) */
185: if (!StatInitCalled) {
186: StatInit(lu->panel_size,lu->relax);
187: }
188: StatInitCalled++;
190: /* Create the elimination tree */
191: ierr = PetscMalloc(A->n*sizeof(int),&etree);
192: sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC);
193: /* Factor the matrix */
194: dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
195: if (ierr < 0) {
196: SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
197: } else if (ierr > 0) {
198: if (ierr <= A->m) {
199: SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr);
200: } else {
201: SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m);
202: }
203: }
205: /* Shift indices up */
206: if (aa->indexshift) {
207: for (i = 0; i < A->n+1; i++) aa->i[i]++;
208: for (i = 0; i < aa->nz; i++) aa->j[i]++;
209: }
211: /* Cleanup */
212: PetscFree(etree);
214: lu->flg = SAME_NONZERO_PATTERN;
215: return(0);
216: }
218: /*
219: Note the r permutation is ignored
220: */
221: int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
222: {
223: Mat B;
224: Mat_SeqAIJ_SuperLU *lu;
225: int ierr,*ca;
228:
229: ierr = MatCreateSeqAIJ(A->comm,A->m,A->n,0,PETSC_NULL,F);
230: B = *F;
231: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU;
232: B->ops->solve = MatSolve_SeqAIJ_SuperLU;
233: B->ops->destroy = MatDestroy_SeqAIJ_SuperLU;
234: B->factor = FACTOR_LU;
235:
236: ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);
237: B->spptr = (void*)lu;
238: PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU",
239: (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);
241: /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */
242: PetscMalloc(A->n*sizeof(int),&lu->perm_r);
243: PetscMalloc(A->m*sizeof(int),&lu->perm_c);
244: ISGetIndices(c,&ca);
245: PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));
246: ISRestoreIndices(c,&ca);
247:
248: if (info) {
249: lu->pivot_threshold = info->dtcol;
250: } else {
251: lu->pivot_threshold = 0.0; /* no pivoting */
252: }
254: PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU));
256: lu->flg = DIFFERENT_NONZERO_PATTERN;
257: return(0);
258: }
260: int MatUseSuperLU_SeqAIJ(Mat A)
261: {
262: PetscTruth flg;
263: int ierr;
267: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
268: if (!flg) return(0);
270: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU;
272: return(0);
273: }
275: #else
277: int MatUseSuperLU_SeqAIJ(Mat A)
278: {
280: return(0);
281: }
283: #endif