Actual source code: superlu_dist.c

  1: /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
  2: /* 
  3:         Provides an interface to the SuperLU_DIST sparse solver
  4: */

 6:  #include src/mat/impls/aij/seq/aij.h
 7:  #include src/mat/impls/aij/mpi/mpiaij.h
  8: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
  9: #include "stdlib.h"
 10: #endif

 12: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)

 14: EXTERN_C_BEGIN
 15: #include "superlu_ddefs.h"
 16: EXTERN_C_END

 18: typedef struct {
 19:   int                     nprow,npcol;
 20:   gridinfo_t              grid;
 21:   superlu_options_t       options;
 22:   SuperMatrix             A_sup;
 23:   ScalePermstruct_t       ScalePermstruct;
 24:   LUstruct_t              LUstruct;
 25:   int                     StatPrint;
 26: } Mat_MPIAIJ_SuperLU_DIST;

 28: extern int MatDestroy_MPIAIJ(Mat);
 29: extern int MatDestroy_SeqAIJ(Mat);

 31: #if !defined(PETSC_HAVE_SUPERLU)
 32: /* SuperLU function: Convert a row compressed storage into a column compressed storage. */
 33: void dCompRow_to_CompCol(int m, int n, int nnz,
 34:                     double *a, int *colind, int *rowptr,
 35:                     double **at, int_t **rowind, int_t **colptr)
 36: {
 37:     int i, j, col, relpos, *marker;

 39:     /* Allocate storage for another copy of the matrix. */
 40:     *at = (double *) doubleMalloc_dist(nnz);
 41:     *rowind = (int *) intMalloc_dist(nnz);
 42:     *colptr = (int *) intMalloc_dist(n+1);
 43:     marker = (int *) intCalloc_dist(n);

 45:     /* Get counts of each column of A, and set up column pointers */
 46:     for (i = 0; i < m; ++i)
 47:         for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
 48:     (*colptr)[0] = 0;
 49:     for (j = 0; j < n; ++j) {
 50:         (*colptr)[j+1] = (*colptr)[j] + marker[j];
 51:         marker[j] = (*colptr)[j];
 52:     }

 54:     /* Transfer the matrix into the compressed column storage. */
 55:     for (i = 0; i < m; ++i) {
 56:         for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
 57:             col = colind[j];
 58:             relpos = marker[col];
 59:             (*rowind)[relpos] = i;
 60:             (*at)[relpos] = a[j];
 61:             ++marker[col];
 62:         }
 63:     }

 65:     SUPERLU_FREE(marker);
 66: }
 67: #else
 68: EXTERN_C_BEGIN
 69: extern void dCompRow_to_CompCol(int,int,int,double*,int*,int*,double**,int_t**,int_t**);
 70: EXTERN_C_END
 71: #endif /* PETSC_HAVE_SUPERLU*/

 73: int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A)
 74: {
 75:   Mat_MPIAIJ              *a  = (Mat_MPIAIJ*)A->data;
 76:   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
 77:   int                     ierr, size=a->size;
 78: 
 80:   /* Deallocate SuperLU_DIST storage */
 81:   Destroy_CompCol_Matrix_dist(&lu->A_sup);
 82:   Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
 83:   ScalePermstructFree(&lu->ScalePermstruct);
 84:   LUstructFree(&lu->LUstruct);

 86:   /* Release the SuperLU_DIST process grid. */
 87:   superlu_gridexit(&lu->grid);

 89:   PetscFree(lu);
 90: 
 91:   if (size == 1){
 92:     MatDestroy_SeqAIJ(A);
 93:   } else {
 94:     MatDestroy_MPIAIJ(A);
 95:   }
 96: 
 97:   return(0);
 98: }

100: int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
101: {
102:   Mat_MPIAIJ              *aa = (Mat_MPIAIJ*)A->data;
103:   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
104:   int                     ierr, size=aa->size;
105:   int                     m=A->M, N=A->N;
106:   superlu_options_t       options=lu->options;
107:   SuperLUStat_t           stat;
108:   double                  berr[1],*bptr;
109:   int                     info, nrhs=1;
110:   Vec                     x_seq;
111:   IS                      iden;
112:   VecScatter              scat;
113:   PetscLogDouble          time0,time,time_min,time_max;
114: 
116:   if (size > 1) {  /* convert mpi vector b to seq vector x_seq */
117:     VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
118:     ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
119:     VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
120:     ISDestroy(iden);

122:     VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
123:     VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);
124:     VecGetArray(x_seq,&bptr);
125:   } else {
126:     VecCopy(b_mpi,x);
127:     VecGetArray(x,&bptr);
128:   }
129: 
130:   options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/

132:   PStatInit(&stat);        /* Initialize the statistics variables. */
133:   if (lu->StatPrint) {
134:     MPI_Barrier(A->comm); /* to be removed */
135:     PetscGetTime(&time0);  /* to be removed */
136:   }
137:   pdgssvx_ABglobal(&options, &lu->A_sup, &lu->ScalePermstruct, bptr, m, nrhs,
138:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
139:   if (lu->StatPrint) {
140:     PetscGetTime(&time);  /* to be removed */
141:      PStatPrint(&stat, &lu->grid);     /* Print the statistics. */
142:   }
143:   PStatFree(&stat);
144: 
145:   if (size > 1) {    /* convert seq x to mpi x */
146:     VecRestoreArray(x_seq,&bptr);
147:     VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
148:     VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);
149:     VecScatterDestroy(scat);
150:     VecDestroy(x_seq);
151:   } else {
152:     VecRestoreArray(x,&bptr);
153:   }
154:   if (lu->StatPrint) {
155:     time0 = time - time0;
156:     MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
157:     MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
158:     MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
159:     time = time/size; /* average time */
160:     PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %gnn",time_max,time_min,time);
161:   }

163:   return(0);
164: }

166: int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F)
167: {
168:   Mat_MPIAIJ              *fac = (Mat_MPIAIJ*)(*F)->data;
169:   Mat                     *tseq,A_seq = PETSC_NULL;
170:   Mat_SeqAIJ              *aa;
171:   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr;
172:   int                     M=A->M,N=A->N,info,ierr,size=fac->size,i;
173:   SuperLUStat_t           stat;
174:   double                  *berr=0, *bptr=0;
175:   int_t                   *asub, *xa;
176:   double                  *a;
177:   SuperMatrix             A_sup;
178:   IS                      isrow;
179:   PetscLogDouble          time0[2],time[2],time_min[2],time_max[2];

182:   if (lu->StatPrint) {
183:     MPI_Barrier(A->comm);
184:     PetscGetTime(&time0[0]);
185:   }

187:   if (size > 1) { /* convert mpi A to seq mat A */
188:     ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
189:     MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
190:     ISDestroy(isrow);
191: 
192:     A_seq = *tseq;
193:     PetscFree(tseq);
194:     aa =  (Mat_SeqAIJ*)A_seq->data;
195:   } else {
196:     aa =  (Mat_SeqAIJ*)A->data;
197:   }

199:   /* Allocate storage for compressed column representation. */
200:   dallocateA_dist(N, aa->nz, &a, &asub, &xa);
201: 
202:   /* Convert Petsc NR matrix storage to SuperLU_DIST NC storage */
203:   dCompRow_to_CompCol(M,N,aa->nz,aa->a,aa->j,aa->i,&a, &asub, &xa);

205:   if (lu->StatPrint) {
206:     PetscGetTime(&time[0]);
207:     time0[0] = time[0] - time0[0];
208:   }

210:   /* Create compressed column matrix A_sup. */
211:   dCreate_CompCol_Matrix_dist(&A_sup, M, N, aa->nz, a, asub, xa, NC, D, GE);

213:   /* Factor the matrix. */
214:   PStatInit(&stat);                /* Initialize the statistics variables. */

216:   if (lu->StatPrint) {
217:     MPI_Barrier(A->comm);
218:     PetscGetTime(&time0[1]);
219:   }
220:   pdgssvx_ABglobal(&lu->options, &A_sup, &lu->ScalePermstruct, bptr, M, 0,
221:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
222:   if (lu->StatPrint) {
223:     PetscGetTime(&time[1]);  /* to be removed */
224:     time0[1] = time[1] - time0[1];
225:     if (lu->StatPrint) PStatPrint(&stat, &lu->grid);        /* Print the statistics. */
226:   }
227:   PStatFree(&stat);

229:   lu->A_sup        = A_sup;
230:   lu->options.Fact = SamePattern; /* Sparsity pattern of A and perm_c can be reused. */
231:   if (size > 1){
232:     MatDestroy(A_seq);
233:   }

235:   if (lu->StatPrint) {
236:     MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
237:     MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
238:     MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
239:     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
240:     PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %gn",time_max[0],time_min[0],time[0]);
241:     PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %gnn",time_max[1],time_min[1],time[1]);
242:   }
243:   (*F)->assembled             = PETSC_TRUE;
244:   return(0);
245: }

247: /* Note the Petsc r and c permutations are ignored */
248: int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatLUInfo *info,Mat *F)
249: {
250:   Mat_MPIAIJ              *fac;
251:   Mat_MPIAIJ_SuperLU_DIST *lu;
252:   int                     ierr,M=A->M,N=A->N,size;
253:   gridinfo_t              grid;
254:   superlu_options_t       options;
255:   ScalePermstruct_t       ScalePermstruct;
256:   LUstruct_t              LUstruct;
257:   char                    buff[32];
258:   PetscTruth              flg;
259:   char                    *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
260:   char                    *prtype[] = {"LargeDiag","NATURAL"};
262: 
263:   PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);

265:   /* Create the factorization matrix F */
266:   MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,M,N,0,PETSC_NULL,0,PETSC_NULL,F);

268:   (*F)->ops->lufactornumeric  = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
269:   (*F)->ops->solve            = MatSolve_MPIAIJ_SuperLU_DIST;
270:   (*F)->ops->destroy          = MatDestroy_MPIAIJ_SuperLU_DIST;
271:   (*F)->factor                = FACTOR_LU;
272:   (*F)->spptr                  = (void*)lu;
273:   fac                         = (Mat_MPIAIJ*)(*F)->data;

275:   /* Set the input options */
276:   set_default_options(&options);

278:   MPI_Comm_size(A->comm,&size);
279:   lu->nprow = size/2;               /* Default process rows.      */
280:   if (lu->nprow == 0) lu->nprow = 1;
281:   lu->npcol = size/lu->nprow;           /* Default process columns.   */

283:   PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");
284: 
285:     PetscOptionsInt("-mat_aij_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
286:     PetscOptionsInt("-mat_aij_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
287:     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
288: 
289:     PetscOptionsLogical("-mat_aij_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
290:     if (!flg) {
291:       options.Equil = NO;
292:     }

294:     PetscOptionsEList("-mat_aij_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);
295:     while (flg) {
296:       PetscStrcmp(buff,"LargeDiag",&flg);
297:       if (flg) {
298:         options.RowPerm = LargeDiag;
299:         break;
300:       }
301:       PetscStrcmp(buff,"NATURAL",&flg);
302:       if (flg) {
303:         options.RowPerm = NOROWPERM;
304:         break;
305:       }
306:       SETERRQ1(1,"Unknown row permutation %s",buff);
307:     }

309:     PetscOptionsEList("-mat_aij_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);
310:     while (flg) {
311:       PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);
312:       if (flg) {
313:         options.ColPerm = MMD_AT_PLUS_A;
314:         break;
315:       }
316:       PetscStrcmp(buff,"NATURAL",&flg);
317:       if (flg) {
318:         options.ColPerm = NATURAL;
319:         break;
320:       }
321:       PetscStrcmp(buff,"MMD_ATA",&flg);
322:       if (flg) {
323:         options.ColPerm = MMD_ATA;
324:         break;
325:       }
326:       PetscStrcmp(buff,"COLAMD",&flg);
327:       if (flg) {
328:         options.ColPerm = COLAMD;
329:         break;
330:       }
331:       SETERRQ1(1,"Unknown column permutation %s",buff);
332:     }

334:     PetscOptionsLogical("-mat_aij_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
335:     if (!flg) {
336:       options.ReplaceTinyPivot = NO;
337:     }

339:     options.IterRefine = NOREFINE;
340:     PetscOptionsLogical("-mat_aij_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
341:     if (flg) {
342:       options.IterRefine = DOUBLE;
343:     }

345:     if (PetscLogPrintInfo) {
346:       lu->StatPrint = (int)PETSC_TRUE;
347:     } else {
348:       lu->StatPrint = (int)PETSC_FALSE;
349:     }
350:     PetscOptionsLogical("-mat_aij_superlu_dist_statprint","Print factorization information","None",
351:                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);
352:   PetscOptionsEnd();

354:   /* Initialize the SuperLU process grid. */
355:   superlu_gridinit(A->comm, lu->nprow, lu->npcol, &grid);

357:   /* Initialize ScalePermstruct and LUstruct. */
358:   ScalePermstructInit(M, N, &ScalePermstruct);
359:   LUstructInit(M, N, &LUstruct);

361:   lu->ScalePermstruct = ScalePermstruct;
362:   lu->LUstruct        = LUstruct;
363:   lu->options         = options;
364:   lu->grid            = grid;
365:   fac->size           = size;

367:   return(0);
368: }

370: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
371: {
373:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
374:   A->ops->lufactornumeric  = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
375:   return(0);
376: }

378: int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer)
379: {
380:   Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
381:   superlu_options_t       options;
382:   int                     ierr;
383:   char                    *colperm;

386:   /* check if matrix is superlu_dist type */
387:   if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) return(0);

389:   options = lu->options;
390:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:n");
391:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s n",(options.Equil != NO) ? "true": "false");
392:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s n",(options.ReplaceTinyPivot != NO) ? "true": "false");
393:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s n",(options.IterRefine == DOUBLE) ? "true": "false");
394:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d n",lu->nprow,lu->npcol);
395:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
396:   if (options.ColPerm == NATURAL) {
397:     colperm = "NATURAL";
398:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
399:     colperm = "MMD_AT_PLUS_A";
400:   } else if (options.ColPerm == MMD_ATA) {
401:     colperm = "MMD_ATA";
402:   } else if (options.ColPerm == COLAMD) {
403:     colperm = "COLAMD";
404:   } else {
405:     SETERRQ(1,"Unknown column permutation");
406:   }
407:   PetscViewerASCIIPrintf(viewer,"  Column permutation %s n",colperm);
408:   return(0);
409: }

411: #else

413: int MatUseSuperLU_DIST_MPIAIJ(Mat A)
414: {
416:   return(0);
417: }

419: #endif