Actual source code: cholbs.c

  1: #define PETSCMAT_DLL

 3:  #include petsc.h

  5: /* We must define MLOG for BlockSolve logging */
  6: #if defined(PETSC_USE_LOG)
  7: #define MLOG
  8: #endif

 10:  #include src/mat/impls/rowbs/mpi/mpirowbs.h

 14: PetscErrorCode MatCholeskyFactorNumeric_MPIRowbs(Mat mat,MatFactorInfo *info,Mat *factp)
 15: {
 16:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
 18: #if defined(PETSC_USE_LOG)
 19:   PetscReal flop1 = BSlocal_flops();
 20: #endif


 24:   if (!mbs->blocksolveassembly) {
 25:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 26:   }

 28:   /* Do prep work if same nonzero structure as previously factored matrix */
 29:   if (mbs->factor == FACTOR_CHOLESKY) {
 30:     /* Copy the nonzeros */
 31:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 32:   }
 33:   /* Form incomplete Cholesky factor */
 34:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 35:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 36:     CHKERRBS(0); mbs->failures++;
 37:     /* Copy only the nonzeros */
 38:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 39:     /* Increment the diagonal shift */
 40:     mbs->alpha += 0.1;
 41:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 42:     PetscLogInfo((mat,"MatCholeskyFactorNumeric_MPIRowbs:BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",
 43:                                  mbs->failures,mbs->ierr,mbs->alpha));
 44:   }
 45: #if defined(PETSC_USE_LOG)
 46:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 47: #endif

 49:   mbs->factor = FACTOR_CHOLESKY;
 50:   return(0);
 51: }

 55: PetscErrorCode MatLUFactorNumeric_MPIRowbs(Mat mat,MatFactorInfo *info,Mat *factp)
 56: {
 57:   Mat_MPIRowbs   *mbs = (Mat_MPIRowbs*)mat->data;

 59: #if defined(PETSC_USE_LOG)
 61:   PetscReal      flop1 = BSlocal_flops();
 62: #endif

 65:   if (!mbs->blocksolveassembly) {
 66:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
 67:   }

 69:   /* Do prep work if same nonzero structure as previously factored matrix */
 70:   if (mbs->factor == FACTOR_LU) {
 71:     /* Copy the nonzeros */
 72:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 73:   }
 74:   /* Form incomplete Cholesky factor */
 75:   mbs->0; mbs->failures = 0; mbs->alpha = 1.0;
 76:   while ((mbs->BSfactor(mbs->fpA,mbs->comm_fpA,mbs->procinfo))) {
 77:     CHKERRBS(0); mbs->failures++;
 78:     /* Copy only the nonzeros */
 79:     BScopy_nz(mbs->pA,mbs->fpA);CHKERRBS(0);
 80:     /* Increment the diagonal shift */
 81:     mbs->alpha += 0.1;
 82:     BSset_diag(mbs->fpA,mbs->alpha,mbs->procinfo);CHKERRBS(0);
 83:     PetscLogInfo((mat,"MatLUFactorNumeric_MPIRowbs:BlockSolve95: %d failed factor(s), err=%d, alpha=%g\n",
 84:                                        mbs->failures,mbs->ierr,mbs->alpha));
 85:   }
 86:   mbs->factor = FACTOR_LU;
 87:   (*factp)->assembled = PETSC_TRUE;
 88: #if defined(PETSC_USE_LOG)
 89:   PetscLogFlops((int)(BSlocal_flops()-flop1));
 90: #endif
 91:   return(0);
 92: }
 93: /* ------------------------------------------------------------------- */
 96: PetscErrorCode MatSolve_MPIRowbs(Mat mat,Vec x,Vec y)
 97: {
 98:   Mat          submat = (Mat) mat->data;
 99:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
101:   PetscScalar  *ya,*xa,*xworka;

103: #if defined(PETSC_USE_LOG)
104:   PetscReal flop1 = BSlocal_flops();
105: #endif

108:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
109:   if (!mbs->vecs_permscale) {
110:     VecGetArray(x,&xa);
111:     VecGetArray(mbs->xwork,&xworka);
112:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
113:     VecRestoreArray(x,&xa);
114:     VecRestoreArray(mbs->xwork,&xworka);
115:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
116:   } else {
117:     VecCopy(x,y);
118:   }

120:   VecGetArray(y,&ya);
121:   if (mbs->procinfo->single) {
122:     /* Use BlockSolve routine for no cliques/inodes */
123:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
124:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
125:   } else {
126:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
127:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
128:   }
129:   VecRestoreArray(y,&ya);

131:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
132:   if (!mbs->vecs_permscale) {
133:     VecPointwiseMult(mbs->xwork,y,mbs->diag);
134:     VecGetArray(y,&ya);
135:     VecGetArray(mbs->xwork,&xworka);
136:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
137:     VecRestoreArray(y,&ya);
138:     VecRestoreArray(mbs->xwork,&xworka);
139:   }
140: #if defined(PETSC_USE_LOG)
141:   PetscLogFlops((int)(BSlocal_flops()-flop1));
142: #endif
143:   return(0);
144: }

146: /* ------------------------------------------------------------------- */
149: PetscErrorCode MatForwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
150: {
151:   Mat          submat = (Mat) mat->data;
152:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
154:   PetscScalar  *ya,*xa,*xworka;

156: #if defined(PETSC_USE_LOG)
157:   PetscReal flop1 = BSlocal_flops();
158: #endif

161:   /* Permute and apply diagonal scaling to vector, where D^{-1/2} is stored */
162:   if (!mbs->vecs_permscale) {
163:     VecGetArray(x,&xa);
164:     VecGetArray(mbs->xwork,&xworka);
165:     BSperm_dvec(xa,xworka,mbs->pA->perm);CHKERRBS(0);
166:     VecRestoreArray(x,&xa);
167:     VecRestoreArray(mbs->xwork,&xworka);
168:     VecPointwiseMult(y,mbs->diag,mbs->xwork);
169:   } else {
170:     VecCopy(x,y);
171:   }

173:   VecGetArray(y,&ya);
174:   if (mbs->procinfo->single){
175:     /* Use BlockSolve routine for no cliques/inodes */
176:     BSfor_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
177:   } else {
178:     BSfor_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
179:   }
180:   VecRestoreArray(y,&ya);

182: #if defined(PETSC_USE_LOG)
183:   PetscLogFlops((int)(BSlocal_flops()-flop1));
184: #endif

186:   return(0);
187: }

189: /* ------------------------------------------------------------------- */
192: PetscErrorCode MatBackwardSolve_MPIRowbs(Mat mat,Vec x,Vec y)
193: {
194:   Mat          submat = (Mat) mat->data;
195:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)submat->data;
197:   PetscScalar  *ya,*xworka;

199: #if defined (PETSC_USE_LOG)
200:   PetscReal flop1 = BSlocal_flops();
201: #endif

204:   VecCopy(x,y);

206:   VecGetArray(y,&ya);
207:   if (mbs->procinfo->single) {
208:     /* Use BlockSolve routine for no cliques/inodes */
209:     BSback_solve1(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
210:   } else {
211:     BSback_solve(mbs->fpA,ya,mbs->comm_pA,mbs->procinfo);CHKERRBS(0);
212:   }
213:   VecRestoreArray(y,&ya);

215:   /* Apply diagonal scaling and unpermute, where D^{-1/2} is stored */
216:   if (!mbs->vecs_permscale) {
217:     VecPointwiseMult(mbs->xwork,y,mbs->diag);
218:     VecGetArray(y,&ya);
219:     VecGetArray(mbs->xwork,&xworka);
220:     BSiperm_dvec(xworka,ya,mbs->pA->perm);CHKERRBS(0);
221:     VecRestoreArray(y,&ya);
222:     VecRestoreArray(mbs->xwork,&xworka);
223:   }
224: #if defined (PETSC_USE_LOG)
225:   PetscLogFlops((int)(BSlocal_flops()-flop1));
226: #endif
227:   return(0);
228: }


231: /* 
232:     The logging variables required by BlockSolve, 

234:     This is an ugly hack that allows PETSc to run properly with BlockSolve regardless
235:   of whether PETSc or BlockSolve is compiled with logging turned on. 

237:     It is bad because it relys on BlockSolve's internals not changing related to 
238:   logging but we have no choice, plus it is unlikely BlockSolve will be developed
239:   in the near future anyways.
240: */
241: PETSCMAT_DLLEXPORT double MLOG_flops;
242: PETSCMAT_DLLEXPORT double MLOG_event_flops;
243: PETSCMAT_DLLEXPORT double MLOG_time_stamp;
244: PETSCMAT_DLLEXPORT PetscErrorCode    MLOG_sequence_num;
245: #if defined (MLOG_MAX_EVNTS) 
246: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
247: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
248: #else
249: typedef struct __MLOG_log_type {
250:         double        time_stamp;
251:         double        total_time;
252:         double  flops;
253:         int        event_num;
254: } MLOG_log_type;
255: #define        MLOG_MAX_EVNTS 1300
256: #define        MLOG_MAX_ACCUM 75
257: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_event_log[MLOG_MAX_EVNTS];
258: PETSCMAT_DLLEXPORT MLOG_log_type MLOG_accum_log[MLOG_MAX_ACCUM];
259: #endif