Actual source code: superlu.c

  1: /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/

  3: /* 
  4:         Provides an interface to the SuperLU sparse solver
  5:           Modified for SuperLU 2.0 by Matthew Knepley

  7: */

 9:  #include src/mat/impls/aij/seq/aij.h

 11: #if defined(PETSC_HAVE_SUPERLU) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
 12: EXTERN_C_BEGIN
 13: #include "dsp_defs.h"
 14: #include "util.h"
 15: EXTERN_C_END


 18: typedef struct {
 19:   SuperMatrix  A;
 20:   SuperMatrix  B;
 21:   SuperMatrix  AC;
 22:   SuperMatrix  L;
 23:   SuperMatrix  U;
 24:   int          *perm_r;
 25:   int          *perm_c;
 26:   int          relax;
 27:   int          panel_size;
 28:   double       pivot_threshold;
 29:   NCformat     *store;
 30:   MatStructure flg;
 31: } Mat_SeqAIJ_SuperLU;


 34: extern int MatDestroy_SeqAIJ(Mat);

 36: int MatDestroy_SeqAIJ_SuperLU(Mat A)
 37: {
 38:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
 39:   int                ierr;

 42:   if (--A->refct > 0)return(0);
 43:   /* We have to free the global data or SuperLU crashes (sucky design)*/
 44:   /* Since we don't know if more solves on other matrices may be done
 45:      we cannot free the yucky SuperLU global data
 46:     StatFree(); 
 47:   */

 49:   /* Free the SuperLU datastructures */
 50:   Destroy_CompCol_Permuted(&lu->AC);
 51:   Destroy_SuperNode_Matrix(&lu->L);
 52:   Destroy_CompCol_Matrix(&lu->U);
 53:   PetscFree(lu->B.Store);
 54:   PetscFree(lu->A.Store);
 55:   PetscFree(lu->perm_r);
 56:   PetscFree(lu->perm_c);
 57:   PetscFree(lu);
 58:   MatDestroy_SeqAIJ(A);
 59:   return(0);
 60: }

 62:  #include src/mat/impls/dense/seq/dense.h
 63: int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat)
 64: {
 65:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
 66:   int                 numRows = A->m;
 67:   int                 numCols = A->n;
 68:   SCformat           *Lstore;
 69:   int                 numNullCols,size;
 70:   PetscScalar             *nullVals,*workVals;
 71:   int                 row,newRow,col,newCol,block,b;
 72:   int                 ierr;

 77:   if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix");
 78:   numNullCols = numCols - numRows;
 79:   if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems");
 80:   /* Create the null matrix */
 81:   MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);
 82:   if (numNullCols == 0) {
 83:     MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
 84:     MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
 85:     return(0);
 86:   }
 87:   nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v;
 88:   /* Copy in the columns */
 89:   Lstore = (SCformat*)lu->L.Store;
 90:   for(block = 0; block <= Lstore->nsuper; block++) {
 91:     newRow = Lstore->sup_to_col[block];
 92:     size   = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block];
 93:     for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) {
 94:       newCol = Lstore->rowind[col];
 95:       if (newCol >= numRows) {
 96:         for(b = 0; b < size; b++)
 97:           nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
 98:       }
 99:     }
100:   }
101:   /* Permute rhs to form P^T_c B */
102:   PetscMalloc(numRows*sizeof(double),&workVals);
103:   for(b = 0; b < numNullCols; b++) {
104:     for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row];
105:     for(row = 0; row < numRows; row++) nullVals[b*numRows+row]   = workVals[row];
106:   }
107:   /* Backward solve the upper triangle A x = b */
108:   for(b = 0; b < numNullCols; b++) {
109:     sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
110:     if (ierr < 0)
111:       SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr);
112:   }
113:   PetscFree(workVals);

115:   MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);
116:   MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);
117:   return(0);
118: }

120: int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x)
121: {
122:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
123:   PetscScalar        *array;
124:   int                m;
125:   int                ierr;

128:   VecGetLocalSize(b,&m);
129:   VecCopy(b,x);
130:   VecGetArray(x,&array);
131:   /* Create the Rhs */
132:   lu->B.Stype        = DN;
133:   lu->B.Dtype        = _D;
134:   lu->B.Mtype        = GE;
135:   lu->B.nrow         = m;
136:   lu->B.ncol         = 1;
137:   ((DNformat*)lu->B.Store)->lda   = m;
138:   ((DNformat*)lu->B.Store)->nzval = array;
139:   dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
140:   if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
141:   VecRestoreArray(x,&array);
142:   return(0);
143: }

145: static int StatInitCalled = 0;

147: int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F)
148: {
149:   Mat_SeqAIJ         *aa = (Mat_SeqAIJ*)(A)->data;
150:   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr;
151:   int                *etree,i,ierr;

154:   /* Create the SuperMatrix for A^T:

156:        Since SuperLU only likes column-oriented matrices,we pass it the transpose,
157:        and then solve A^T X = B in MatSolve().
158:   */
159:   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */
160:     lu->A.Stype   = NC;
161:     lu->A.Dtype   = _D;
162:     lu->A.Mtype   = GE;
163:     lu->A.nrow    = A->n;
164:     lu->A.ncol    = A->m;
165: 
166:     PetscMalloc(sizeof(NCformat),&lu->store);
167:     PetscMalloc(sizeof(DNformat),&lu->B.Store);
168:   }
169:   lu->store->nnz    = aa->nz;
170:   lu->store->colptr = aa->i;
171:   lu->store->rowind = aa->j;
172:   lu->store->nzval  = aa->a;
173:   lu->A.Store       = lu->store;
174: 
175:   /* Shift indices down */
176:   if (aa->indexshift) {
177:     for(i = 0; i < A->m+1; i++) aa->i[i]--;
178:     for(i = 0; i < aa->nz; i++) aa->j[i]--;
179:   }
180: 
181:   /* Set SuperLU options */
182:   lu->relax      = sp_ienv(2);
183:   lu->panel_size = sp_ienv(1);
184:   /* We have to initialize global data or SuperLU crashes (sucky design) */
185:   if (!StatInitCalled) {
186:     StatInit(lu->panel_size,lu->relax);
187:   }
188:   StatInitCalled++;

190:   /* Create the elimination tree */
191:   ierr           = PetscMalloc(A->n*sizeof(int),&etree);
192:   sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC);
193:   /* Factor the matrix */
194:   dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
195:   if (ierr < 0) {
196:     SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
197:   } else if (ierr > 0) {
198:     if (ierr <= A->m) {
199:       SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr);
200:     } else {
201:       SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m);
202:     }
203:   }

205:   /* Shift indices up */
206:   if (aa->indexshift) {
207:     for (i = 0; i < A->n+1; i++)  aa->i[i]++;
208:     for (i = 0; i < aa->nz; i++)  aa->j[i]++;
209:   }

211:   /* Cleanup */
212:   PetscFree(etree);

214:   lu->flg = SAME_NONZERO_PATTERN;
215:   return(0);
216: }

218: /*
219:    Note the r permutation is ignored
220: */
221: int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
222: {
223:   Mat                 B;
224:   Mat_SeqAIJ_SuperLU  *lu;
225:   int                 ierr,*ca;

228: 
229:   ierr            = MatCreateSeqAIJ(A->comm,A->m,A->n,0,PETSC_NULL,F);
230:   B               = *F;
231:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU;
232:   B->ops->solve           = MatSolve_SeqAIJ_SuperLU;
233:   B->ops->destroy         = MatDestroy_SeqAIJ_SuperLU;
234:   B->factor               = FACTOR_LU;
235: 
236:   ierr            = PetscNew(Mat_SeqAIJ_SuperLU,&lu);
237:   B->spptr        = (void*)lu;
238:   PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU",
239:                                     (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);

241:   /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */
242:   PetscMalloc(A->n*sizeof(int),&lu->perm_r);
243:   PetscMalloc(A->m*sizeof(int),&lu->perm_c);
244:   ISGetIndices(c,&ca);
245:   PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));
246:   ISRestoreIndices(c,&ca);
247: 
248:   if (info) {
249:     lu->pivot_threshold = info->dtcol;
250:   } else {
251:     lu->pivot_threshold = 0.0;   /* no pivoting */
252:   }

254:   PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU));

256:   lu->flg = DIFFERENT_NONZERO_PATTERN;
257:   return(0);
258: }

260: int MatUseSuperLU_SeqAIJ(Mat A)
261: {
262:   PetscTruth flg;
263:   int        ierr;

267:   PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
268:   if (!flg) return(0);

270:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU;

272:   return(0);
273: }

275: #else

277: int MatUseSuperLU_SeqAIJ(Mat A)
278: {
280:   return(0);
281: }

283: #endif