Actual source code: dscpack.c
1: /*$Id: dscpack.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2: /*
3: Provides an interface to the DSCPACK (Domain-Separator Codes) sparse direct solver
4: */
6: #include src/mat/impls/baij/seq/baij.h
7: #include src/mat/impls/baij/mpi/mpibaij.h
9: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
11: EXTERN_C_BEGIN
12: #include "dscmain.h"
13: EXTERN_C_END
15: typedef struct {
16: DSC_Solver My_DSC_Solver;
17: int num_local_strucs, *local_struc_old_num,
18: num_local_cols, num_local_nonz,
19: *global_struc_new_col_num,
20: *global_struc_new_num, *global_struc_owner,
21: dsc_id,bs,*local_cols_old_num,*replication;
22: int order_code,scheme_code,factor_type, stat,
23: LBLASLevel,DBLASLevel,max_mem_allowed;
24: MatStructure flg;
25: IS my_cols,iden,iden_dsc;
26: Vec vec_dsc;
27: VecScatter scat;
28: } Mat_MPIBAIJ_DSC;
30: extern int MatDestroy_MPIBAIJ(Mat);
31: extern int MatDestroy_SeqBAIJ(Mat);
33: /* DSC function */
34: void isort2(int size, int *list, int *index)
35: {
36: /* in increasing order */
37: /* index will contain indices such that */
38: /* list can be accessed in sorted order */
39: int i, j, x, y;
41: for (i=0; i<size; i++) index[i] =i;
43: for (i=1; i<size; i++){
44: y= index[i];
45: x=list[index[i]];
46: for (j=i-1; ((j>=0) && (x<list[index[j]])); j--)
47: index[j+1]=index[j];
48: index[j+1]=y;
49: }
50: }/*end isort2*/
52: int BAIJtoMyANonz( int *AIndex, int *AStruct, int bs,
53: RealNumberType *ANonz, int NumLocalStructs,
54: int NumLocalNonz, int *GlobalStructNewColNum,
55: int *LocalStructOldNum,
56: int *LocalStructLocalNum,
57: RealNumberType **adr_MyANonz)
58: /*
59: Extract non-zero values of lower triangular part
60: of the permuted matrix that belong to this processor.
62: Only output parameter is adr_MyANonz -- is malloced and changed.
63: Rest are input parameters left unchanged.
65: When LocalStructLocalNum == PETSC_NULL,
66: AIndex, AStruct, and ANonz contain entire original matrix A
67: in PETSc SeqBAIJ format,
68: otherwise,
69: AIndex, AStruct, and ANonz are indeces for the submatrix
70: of A whose colomns (in increasing order) belong to this processor.
72: Other variables supply information on ownership of columns
73: and the new numbering in a fill-reducing permutation
75: This information is used to setup lower half of A nonzeroes
76: for columns owned by this processor
77: */
78: {
79: int i, j, k, iold,inew, jj, kk,ierr, bs2=bs*bs,
80: *idx, *NewColNum,
81: MyANonz_last, max_struct=0, struct_size;
82: RealNumberType *MyANonz;
86: /* loop: to find maximum number of subscripts over columns
87: assigned to this processor */
88: for (i=0; i <NumLocalStructs; i++) {
89: /* for each struct i (local) assigned to this processor */
90: if (LocalStructLocalNum){
91: iold = LocalStructLocalNum[i];
92: } else {
93: iold = LocalStructOldNum[i];
94: }
95:
96: struct_size = AIndex[iold+1] - AIndex[iold];
97: if ( max_struct <= struct_size) max_struct = struct_size;
98: }
100: /* allocate tmp arrays large enough to hold densest struct */
101: PetscMalloc((2*max_struct+1)*sizeof(int),&NewColNum);
102: idx = NewColNum + max_struct;
103:
104: PetscMalloc(NumLocalNonz*sizeof(RealNumberType),&MyANonz);
105: *adr_MyANonz = MyANonz;
107: /* loop to set up nonzeroes in MyANonz */
108: MyANonz_last = 0 ; /* points to first empty space in MyANonz */
109: for (i=0; i <NumLocalStructs; i++) {
111: /* for each struct i (local) assigned to this processor */
112: if (LocalStructLocalNum){
113: iold = LocalStructLocalNum[i];
114: } else {
115: iold = LocalStructOldNum[i];
116: }
118: struct_size = AIndex[iold+1] - AIndex[iold];
119: for (k=0, j=AIndex[iold]; j<AIndex[iold+1]; j++){
120: NewColNum[k] = GlobalStructNewColNum[AStruct[j]];
121: k++;
122: }
123: isort2(struct_size, NewColNum, idx);
124:
125: kk = AIndex[iold]*bs2; /* points to 1st element of iold block col in ANonz */
126: inew = GlobalStructNewColNum[LocalStructOldNum[i]];
128: for (jj = 0; jj < bs; jj++) {
129: for (j=0; j<struct_size; j++){
130: for ( k = 0; k<bs; k++){
131: if (NewColNum[idx[j]] + k >= inew)
132: MyANonz[MyANonz_last++] = ANonz[kk + idx[j]*bs2 + k*bs + jj];
133: }
134: }
135: inew++;
136: }
137: } /* end outer loop for i */
139: PetscFree(NewColNum);
140: if (MyANonz_last != NumLocalNonz)
141: SETERRQ2(1,"MyANonz_last %d != NumLocalNonz %dn",MyANonz_last, NumLocalNonz);
142: return(0);
143: }
145: int MatDestroy_MPIBAIJ_DSCPACK(Mat A)
146: {
147: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)A->spptr;
148: int ierr, size;
149:
151: MPI_Comm_size(A->comm,&size);
153: if (lu->dsc_id != -1) {
154: if(lu->stat) DSC_DoStats(lu->My_DSC_Solver);
155: DSC_FreeAll(lu->My_DSC_Solver);
156: DSC_Close0(lu->My_DSC_Solver);
158: PetscFree(lu->local_cols_old_num);
159: }
160: DSC_End(lu->My_DSC_Solver);
161:
162: ISDestroy(lu->my_cols);
163: PetscFree(lu->replication);
164: VecDestroy(lu->vec_dsc);
165: ISDestroy(lu->iden_dsc);
166: VecScatterDestroy(lu->scat);
167:
168: if (size >1) ISDestroy(lu->iden);
169: PetscFree(lu);
171: if (size == 1){
172: MatDestroy_SeqBAIJ(A);
173: } else {
174: MatDestroy_MPIBAIJ(A);
175: }
176:
177: return(0);
178: }
180: int MatSolve_MPIBAIJ_DSCPACK(Mat A,Vec b,Vec x)
181: {
182: Mat_MPIBAIJ_DSC *lu= (Mat_MPIBAIJ_DSC*)A->spptr;
183: int ierr;
184: RealNumberType *solution_vec, *rhs_vec;
187: /* scatter b into seq vec_dsc */
188: if ( !lu->scat ) {
189: VecScatterCreate(b,lu->my_cols,lu->vec_dsc,lu->iden_dsc,&lu->scat);
190: }
191: VecScatterBegin(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
192: VecScatterEnd(b,lu->vec_dsc,INSERT_VALUES,SCATTER_FORWARD,lu->scat);
194: if (lu->dsc_id != -1){
195: VecGetArray(lu->vec_dsc,&rhs_vec);
196: DSC_InputRhsLocalVec(lu->My_DSC_Solver, rhs_vec, lu->num_local_cols);
197: VecRestoreArray(lu->vec_dsc,&rhs_vec);
198:
199: DSC_Solve(lu->My_DSC_Solver);
200: if (ierr != DSC_NO_ERROR) {
201: DSC_ErrorDisplay(lu->My_DSC_Solver);
202: SETERRQ(1,"Error in calling DSC_Solve");
203: }
205: /* get the permuted local solution */
206: VecGetArray(lu->vec_dsc,&solution_vec);
207: DSC_GetLocalSolution(lu->My_DSC_Solver,solution_vec, lu->num_local_cols);
208: VecRestoreArray(lu->vec_dsc,&solution_vec);
210: } /* end of if (lu->dsc_id != -1) */
212: /* put permuted local solution solution_vec into x in the original order */
213: VecScatterBegin(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
214: VecScatterEnd(lu->vec_dsc,x,INSERT_VALUES,SCATTER_REVERSE,lu->scat);
216: return(0);
217: }
219: int MatCholeskyFactorNumeric_MPIBAIJ_DSCPACK(Mat A,Mat *F)
220: {
221: Mat_SeqBAIJ *a_seq;
222: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)(*F)->spptr;
223: Mat *tseq,A_seq;
224: RealNumberType *my_a_nonz;
225: int ierr, M=A->M, Mbs=M/lu->bs, size,
226: max_mem_estimate, max_single_malloc_blk,
227: number_of_procs,i,j,next,iold,
228: *idx,*iidx,*itmp;
229: IS my_cols_sorted;
230:
232: MPI_Comm_size(A->comm,&size);
233:
234: if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
236: /* convert A to A_seq */
237: if (size > 1) {
238: ISCreateStride(PETSC_COMM_SELF,M,0,1,&lu->iden);
239: MatGetSubMatrices(A,1,&lu->iden,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
240:
241: A_seq = *tseq;
242: PetscFree(tseq);
243: a_seq = (Mat_SeqBAIJ*)A_seq->data;
244: } else {
245: a_seq = (Mat_SeqBAIJ*)A->data;
246: }
247:
248: PetscMalloc(Mbs*sizeof(int),&lu->replication);
249: for (i=0; i<Mbs; i++) lu->replication[i] = lu->bs;
251: number_of_procs = DSC_Analyze(Mbs, a_seq->i, a_seq->j, lu->replication);
252:
253: i = size;
254: if ( number_of_procs < i ) i = number_of_procs;
255: number_of_procs = 1;
256: while ( i > 1 ){
257: number_of_procs *= 2; i /= 2;
258: }
260: /* DSC_Solver starts */
261: lu->My_DSC_Solver = DSC_Begin();
262: DSC_Open0( lu->My_DSC_Solver, number_of_procs, &lu->dsc_id, PETSC_COMM_WORLD );
264: if (lu->dsc_id != -1) {
265: DSC_Order(lu->My_DSC_Solver,lu->order_code,Mbs,a_seq->i,a_seq->j,lu->replication,
266: &M,&lu->num_local_strucs,
267: &lu->num_local_cols, &lu->num_local_nonz, &lu->global_struc_new_col_num,
268: &lu->global_struc_new_num, &lu->global_struc_owner,
269: &lu->local_struc_old_num);
270: if (ierr != DSC_NO_ERROR) {
271: DSC_ErrorDisplay(lu->My_DSC_Solver);
272: SETERRQ(1,"Error when use DSC_Order()");
273: }
275: DSC_SFactor(lu->My_DSC_Solver,&max_mem_estimate,&max_single_malloc_blk,
276: lu->max_mem_allowed, lu->LBLASLevel, lu->DBLASLevel);
277: if (ierr != DSC_NO_ERROR) {
278: DSC_ErrorDisplay(lu->My_DSC_Solver);
279: SETERRQ(1,"Error when use DSC_Order");
280: }
282: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
283: lu->num_local_strucs, lu->num_local_nonz,
284: lu->global_struc_new_col_num,
285: lu->local_struc_old_num,
286: PETSC_NULL,
287: &my_a_nonz);
288: if (ierr <0) {
289: DSC_ErrorDisplay(lu->My_DSC_Solver);
290: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
291: }
293: /* get local_cols_old_num and IS my_cols to be used later */
294: PetscMalloc(lu->num_local_cols*sizeof(int),&lu->local_cols_old_num);
295: for (next = 0, i=0; i<lu->num_local_strucs; i++){
296: iold = lu->bs*lu->local_struc_old_num[i];
297: for (j=0; j<lu->bs; j++)
298: lu->local_cols_old_num[next++] = iold++;
299: }
300: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
301:
302: } else { /* lu->dsc_id == -1 */
303: lu->num_local_cols = 0;
304: lu->local_cols_old_num = 0;
305: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,lu->local_cols_old_num,&lu->my_cols);
306: }
307: /* generate vec_dsc and iden_dsc to be used later */
308: VecCreateSeq(PETSC_COMM_SELF,lu->num_local_cols,&lu->vec_dsc);
309: ISCreateStride(PETSC_COMM_SELF,lu->num_local_cols,0,1,&lu->iden_dsc);
310: lu->scat = PETSC_NULL;
312: if ( size>1 ) {MatDestroy(A_seq); }
314: } else { /* use previously computed symbolic factor */
315: /* convert A to my A_seq */
316: if (size > 1) {
317: if (lu->dsc_id == -1) {
318: itmp = 0;
319: } else {
320: PetscMalloc(2*lu->num_local_strucs*sizeof(int),&idx);
321: iidx = idx + lu->num_local_strucs;
322: PetscMalloc(lu->num_local_cols*sizeof(int),&itmp);
323:
324: isort2(lu->num_local_strucs, lu->local_struc_old_num, idx);
325: for (next=0, i=0; i< lu->num_local_strucs; i++) {
326: iold = lu->bs*lu->local_struc_old_num[idx[i]];
327: for (j=0; j<lu->bs; j++){
328: itmp[next++] = iold++; /* sorted local_cols_old_num */
329: }
330: }
331: for (i=0; i< lu->num_local_strucs; i++) {
332: iidx[idx[i]] = i; /* inverse of idx */
333: }
334: } /* end of (lu->dsc_id == -1) */
335: ISCreateGeneral(PETSC_COMM_SELF,lu->num_local_cols,itmp,&my_cols_sorted);
336: MatGetSubMatrices(A,1,&my_cols_sorted,&lu->iden,MAT_INITIAL_MATRIX,&tseq);
337: ISDestroy(my_cols_sorted);
338:
339: A_seq = *tseq;
340: PetscFree(tseq);
341:
342: if (lu->dsc_id != -1) {
343: DSC_ReFactorInitialize(lu->My_DSC_Solver);
345: a_seq = (Mat_SeqBAIJ*)A_seq->data;
346: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
347: lu->num_local_strucs, lu->num_local_nonz,
348: lu->global_struc_new_col_num,
349: lu->local_struc_old_num,
350: iidx,
351: &my_a_nonz);
352: if (ierr <0) {
353: DSC_ErrorDisplay(lu->My_DSC_Solver);
354: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
355: }
356:
357: PetscFree(idx);
358: PetscFree(itmp);
359: } /* end of if(lu->dsc_id != -1) */
360: } else { /* size == 1 */
361: a_seq = (Mat_SeqBAIJ*)A->data;
362:
363: BAIJtoMyANonz(a_seq->i, a_seq->j, lu->bs, a_seq->a,
364: lu->num_local_strucs, lu->num_local_nonz,
365: lu->global_struc_new_col_num,
366: lu->local_struc_old_num,
367: PETSC_NULL,
368: &my_a_nonz);
369: if (ierr <0) {
370: DSC_ErrorDisplay(lu->My_DSC_Solver);
371: SETERRQ1(1,"Error setting local nonzeroes at processor %d n", lu->dsc_id);
372: }
373: }
374: if ( size>1 ) {MatDestroy(A_seq); }
375: }
376:
377: if (lu->dsc_id != -1) {
378: DSC_NFactor(lu->My_DSC_Solver, lu->scheme_code, my_a_nonz, lu->factor_type, lu->LBLASLevel, lu->DBLASLevel);
379: PetscFree(my_a_nonz);
380: }
381:
382: (*F)->assembled = PETSC_TRUE;
383: lu->flg = SAME_NONZERO_PATTERN;
385: return(0);
386: }
388: /* Note the Petsc permutation r is ignored */
389: int MatCholeskyFactorSymbolic_MPIBAIJ_DSCPACK(Mat A,IS r,PetscReal f,Mat *F)
390: {
391: Mat_MPIBAIJ_DSC *lu;
392: int ierr,M=A->M,size;
393: PetscTruth flg;
394: char buff[32], *ftype[] = {"LLT","LDLT"},
395: *ltype[] = {"LBLAS1","LBLAS2","LBLAS3"},
396: *dtype[] = {"DBLAS1","DBLAS2"};
399: PetscNew(Mat_MPIBAIJ_DSC,&lu);
401: /* Create the factorization matrix F */
402: MatGetBlockSize(A,&lu->bs);
403: MatCreateMPIBAIJ(A->comm,lu->bs,PETSC_DECIDE,PETSC_DECIDE,M,M,0,PETSC_NULL,0,PETSC_NULL,F);
404:
405: (*F)->spptr = (Mat_MPIBAIJ_DSC*)lu;
406: (*F)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIBAIJ_DSCPACK;
407: (*F)->ops->solve = MatSolve_MPIBAIJ_DSCPACK;
408: (*F)->ops->destroy = MatDestroy_MPIBAIJ_DSCPACK;
409: (*F)->factor = FACTOR_CHOLESKY;
411: /* Set the default input options */
412: lu->order_code = 2;
413: lu->scheme_code = 1;
414: lu->factor_type = 1;
415: lu->stat = 0; /* do not display stats */
416: lu->LBLASLevel = DSC_LBLAS3;
417: lu->DBLASLevel = DSC_DBLAS2;
418: lu->max_mem_allowed = 256;
420: /* Get the runtime input options */
421: PetscOptionsBegin(A->comm,A->prefix,"DSCPACK Options","Mat");
423: PetscOptionsInt("-mat_dscpack_order","order_code: n
424: 1 = ND, 2 = Hybrid with Minimum Degree, 3 = Hybrid with Minimum Deficiency", 425: "None",
426: lu->order_code,&lu->order_code,PETSC_NULL);
428: PetscOptionsInt("-mat_dscpack_scheme","scheme_code: n
429: 1 = standard factorization, 2 = factorization + selective inversion", 430: "None",
431: lu->scheme_code,&lu->scheme_code,PETSC_NULL);
432:
433: PetscOptionsEList("-mat_dscpack_factor","factor_type","None",
434: ftype,2,ftype[0],buff,32,&flg);
435: while (flg) {
436: PetscStrcmp(buff,"LLT",&flg);
437: if (flg) {
438: lu->factor_type = DSC_LLT;
439: break;
440: }
441: PetscStrcmp(buff,"LDLT",&flg);
442: if (flg) {
443: lu->factor_type = DSC_LDLT;
444: break;
445: }
446: SETERRQ1(1,"Unknown factor type %s",buff);
447: }
448: PetscOptionsInt("-mat_dscpack_MaxMemAllowed","", 449: "None",
450: lu->max_mem_allowed,&lu->max_mem_allowed,PETSC_NULL);
452: PetscOptionsInt("-mat_dscpack_stats","display stats: 0 = no display, 1 = display",
453: "None", lu->stat,&lu->stat,PETSC_NULL);
454:
455: PetscOptionsEList("-mat_dscpack_LBLAS","BLAS level used in the local phase","None",
456: ltype,3,ltype[2],buff,32,&flg);
457: while (flg) {
458: PetscStrcmp(buff,"LBLAS1",&flg);
459: if (flg) {
460: lu->LBLASLevel = DSC_LBLAS1;
461: break;
462: }
463: PetscStrcmp(buff,"LBLAS2",&flg);
464: if (flg) {
465: lu->LBLASLevel = DSC_LBLAS2;
466: break;
467: }
468: PetscStrcmp(buff,"LBLAS3",&flg);
469: if (flg) {
470: lu->LBLASLevel = DSC_LBLAS3;
471: break;
472: }
473: SETERRQ1(1,"Unknown local phase BLAS level %s",buff);
474: }
476: PetscOptionsEList("-mat_dscpack_DBLAS","BLAS level used in the distributed phase","None",
477: dtype,2,dtype[1],buff,32,&flg);
478: while (flg) {
479: PetscStrcmp(buff,"DBLAS1",&flg);
480: if (flg) {
481: lu->DBLASLevel = DSC_DBLAS1;
482: break;
483: }
484: PetscStrcmp(buff,"DBLAS2",&flg);
485: if (flg) {
486: lu->DBLASLevel = DSC_DBLAS2;
487: break;
488: }
489: SETERRQ1(1,"Unknown distributed phase BLAS level %s",buff);
490: }
492: PetscOptionsEnd();
493:
494: lu->flg = DIFFERENT_NONZERO_PATTERN;
495: return(0);
496: }
498: int MatUseDSCPACK_MPIBAIJ(Mat A)
499: {
501: A->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIBAIJ_DSCPACK;
502: return(0);
503: }
505: int MatMPIBAIJFactorInfo_DSCPACK(Mat A,PetscViewer viewer)
506: {
507: Mat_MPIBAIJ_DSC *lu=(Mat_MPIBAIJ_DSC*)A->spptr;
508: int ierr;
509: char *s;
510:
512: /* check if matrix is dscpack type */
513: if (A->ops->solve != MatSolve_MPIBAIJ_DSCPACK) return(0);
515: PetscViewerASCIIPrintf(viewer,"DSCPACK run parameters:n");
517: switch (lu->order_code) {
518: case 1: s = "ND"; break;
519: case 2: s = "Hybrid with Minimum Degree"; break;
520: case 3: s = "Hybrid with Minimum Deficiency"; break;
521: }
522: PetscViewerASCIIPrintf(viewer," order_code: %s n",s);
524: switch (lu->scheme_code) {
525: case 1: s = "standard factorization"; break;
526: case 2: s = "factorization + selective inversion"; break;
527: }
528: PetscViewerASCIIPrintf(viewer," scheme_code: %s n",s);
530: switch (lu->stat) {
531: case 0: s = "NO"; break;
532: case 1: s = "YES"; break;
533: }
534: PetscViewerASCIIPrintf(viewer," display stats: %s n",s);
535:
536: if ( lu->factor_type == DSC_LLT) {
537: s = "LLT";
538: } else if ( lu->factor_type == DSC_LDLT){
539: s = "LDLT";
540: } else {
541: SETERRQ(1,"Unknown factor type");
542: }
543: PetscViewerASCIIPrintf(viewer," factor type: %s n",s);
545: if ( lu->LBLASLevel == DSC_LBLAS1) {
546: s = "BLAS1";
547: } else if ( lu->LBLASLevel == DSC_LBLAS2){
548: s = "BLAS2";
549: } else if ( lu->LBLASLevel == DSC_LBLAS3){
550: s = "BLAS3";
551: } else {
552: SETERRQ(1,"Unknown local phase BLAS level");
553: }
554: PetscViewerASCIIPrintf(viewer," local phase BLAS level: %s n",s);
556: if ( lu->DBLASLevel == DSC_DBLAS1) {
557: s = "BLAS1";
558: } else if ( lu->DBLASLevel == DSC_DBLAS2){
559: s = "BLAS2";
560: } else {
561: SETERRQ(1,"Unknown distributed phase BLAS level");
562: }
563: PetscViewerASCIIPrintf(viewer," distributed phase BLAS level: %s n",s);
564: return(0);
565: }
567: #else
569: int MatUseDSCPACK_MPIBAIJ(Mat A)
570: {
572: return(0);
573: }
575: #endif