Actual source code: aijfact.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/aij/seq/aij.h
 4:  #include src/inline/dot.h
 5:  #include src/inline/spops.h
 6:  #include petscbt.h
 7:  #include src/mat/utils/freespace.h

 11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
 12: {

 15:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 16: #if !defined(PETSC_USE_DEBUG)
 17:   return(0);
 18: #endif
 19: }


 22: EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);

 24: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 25: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
 26: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
 27: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
 28: #endif

 32:   /* ------------------------------------------------------------

 34:           This interface was contribed by Tony Caola

 36:      This routine is an interface to the pivoting drop-tolerance 
 37:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 38:      SPARSEKIT2.

 40:      The SPARSEKIT2 routines used here are covered by the GNU 
 41:      copyright; see the file gnu in this directory.

 43:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 44:      help in getting this routine ironed out.

 46:      The major drawback to this routine is that if info->fill is 
 47:      not large enough it fails rather than allocating more space;
 48:      this can be fixed by hacking/improving the f2c version of 
 49:      Yousef Saad's code.

 51:      ------------------------------------------------------------
 52: */
 53: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
 54: {
 55: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 57:   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
 58:   You can obtain the drop tolerance routines by installing PETSc from\n\
 59:   www.mcs.anl.gov/petsc\n");
 60: #else
 61:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
 62:   IS             iscolf,isicol,isirow;
 63:   PetscTruth     reorder;
 64:   PetscErrorCode ierr,sierr;
 65:   PetscInt       *c,*r,*ic,i,n = A->m;
 66:   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 67:   PetscInt       *ordcol,*iwk,*iperm,*jw;
 68:   PetscInt       jmax,lfill,job,*o_i,*o_j;
 69:   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 70:   PetscReal      af;


 74:   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
 75:   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
 76:   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
 77:   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
 78:   lfill   = (PetscInt)(info->dtcount/2.0);
 79:   jmax    = (PetscInt)(info->fill*a->nz);


 82:   /* ------------------------------------------------------------
 83:      If reorder=.TRUE., then the original matrix has to be 
 84:      reordered to reflect the user selected ordering scheme, and
 85:      then de-reordered so it is in it's original format.  
 86:      Because Saad's dperm() is NOT in place, we have to copy 
 87:      the original matrix and allocate more storage. . . 
 88:      ------------------------------------------------------------
 89:   */

 91:   /* set reorder to true if either isrow or iscol is not identity */
 92:   ISIdentity(isrow,&reorder);
 93:   if (reorder) {ISIdentity(iscol,&reorder);}
 94:   reorder = PetscNot(reorder);

 96: 
 97:   /* storage for ilu factor */
 98:   PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
 99:   PetscMalloc(jmax*sizeof(PetscInt),&new_j);
100:   PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
101:   PetscMalloc(n*sizeof(PetscInt),&ordcol);

103:   /* ------------------------------------------------------------
104:      Make sure that everything is Fortran formatted (1-Based)
105:      ------------------------------------------------------------
106:   */
107:   for (i=old_i[0];i<old_i[n];i++) {
108:     old_j[i]++;
109:   }
110:   for(i=0;i<n+1;i++) {
111:     old_i[i]++;
112:   };
113: 

115:   if (reorder) {
116:     ISGetIndices(iscol,&c);
117:     ISGetIndices(isrow,&r);
118:     for(i=0;i<n;i++) {
119:       r[i]  = r[i]+1;
120:       c[i]  = c[i]+1;
121:     }
122:     PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
123:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
124:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
125:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126:     for (i=0;i<n;i++) {
127:       r[i]  = r[i]-1;
128:       c[i]  = c[i]-1;
129:     }
130:     ISRestoreIndices(iscol,&c);
131:     ISRestoreIndices(isrow,&r);
132:     o_a = old_a2;
133:     o_j = old_j2;
134:     o_i = old_i2;
135:   } else {
136:     o_a = old_a;
137:     o_j = old_j;
138:     o_i = old_i;
139:   }

141:   /* ------------------------------------------------------------
142:      Call Saad's ilutp() routine to generate the factorization
143:      ------------------------------------------------------------
144:   */

146:   PetscMalloc(2*n*sizeof(PetscInt),&iperm);
147:   PetscMalloc(2*n*sizeof(PetscInt),&jw);
148:   PetscMalloc(n*sizeof(PetscScalar),&w);

150:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151:   if (sierr) {
152:     switch (sierr) {
153:       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154:       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155:       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156:       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157:       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158:       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159:     }
160:   }

162:   PetscFree(w);
163:   PetscFree(jw);

165:   /* ------------------------------------------------------------
166:      Saad's routine gives the result in Modified Sparse Row (msr)
167:      Convert to Compressed Sparse Row format (csr) 
168:      ------------------------------------------------------------
169:   */

171:   PetscMalloc(n*sizeof(PetscScalar),&wk);
172:   PetscMalloc((n+1)*sizeof(PetscInt),&iwk);

174:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

176:   PetscFree(iwk);
177:   PetscFree(wk);

179:   if (reorder) {
180:     PetscFree(old_a2);
181:     PetscFree(old_j2);
182:     PetscFree(old_i2);
183:   } else {
184:     /* fix permutation of old_j that the factorization introduced */
185:     for (i=old_i[0]; i<old_i[n]; i++) {
186:       old_j[i-1] = iperm[old_j[i-1]-1];
187:     }
188:   }

190:   /* get rid of the shift to indices starting at 1 */
191:   for (i=0; i<n+1; i++) {
192:     old_i[i]--;
193:   }
194:   for (i=old_i[0];i<old_i[n];i++) {
195:     old_j[i]--;
196:   }
197: 
198:   /* Make the factored matrix 0-based */
199:   for (i=0; i<n+1; i++) {
200:     new_i[i]--;
201:   }
202:   for (i=new_i[0];i<new_i[n];i++) {
203:     new_j[i]--;
204:   }

206:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207:   /*-- permute the right-hand-side and solution vectors           --*/
208:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
210:   ISGetIndices(isicol,&ic);
211:   for(i=0; i<n; i++) {
212:     ordcol[i] = ic[iperm[i]-1];
213:   };
214:   ISRestoreIndices(isicol,&ic);
215:   ISDestroy(isicol);

217:   PetscFree(iperm);

219:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
220:   PetscFree(ordcol);

222:   /*----- put together the new matrix -----*/

224:   MatCreate(A->comm,fact);
225:   MatSetSizes(*fact,n,n,n,n);
226:   MatSetType(*fact,A->type_name);
227:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
228:   (*fact)->factor    = FACTOR_LU;
229:   (*fact)->assembled = PETSC_TRUE;

231:   b = (Mat_SeqAIJ*)(*fact)->data;
232:   b->freedata      = PETSC_TRUE;
233:   b->sorted        = PETSC_FALSE;
234:   b->singlemalloc  = PETSC_FALSE;
235:   b->a             = new_a;
236:   b->j             = new_j;
237:   b->i             = new_i;
238:   b->ilen          = 0;
239:   b->imax          = 0;
240:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241:   b->row           = isirow;
242:   b->col           = iscolf;
243:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
244:   b->maxnz = b->nz = new_i[n];
245:   MatMarkDiagonal_SeqAIJ(*fact);
246:   (*fact)->info.factor_mallocs = 0;

248:   MatMarkDiagonal_SeqAIJ(A);

250:   af = ((double)b->nz)/((double)a->nz) + .001;
251:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));
252:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));
253:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));
254:   PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));

256:   MatILUDTFactor_Inode(A,isrow,iscol,info,fact);

258:   return(0);
259: #endif
260: }

264: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265: {
266:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
267:   IS             isicol;
269:   PetscInt       *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270:   PetscInt       *bi,*bj,*ajtmp;
271:   PetscInt       *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272:   PetscReal      f;
273:   PetscInt       nlnk,*lnk,k,*cols,**bi_ptr;
274:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
275:   PetscBT        lnkbt;

278:   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
280:   ISGetIndices(isrow,&r);
281:   ISGetIndices(isicol,&ic);

283:   /* get new row pointers */
284:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
285:   bi[0] = 0;

287:   /* bdiag is location of diagonal in factor */
288:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
289:   bdiag[0] = 0;

291:   /* linked list for storing column indices of the active row */
292:   nlnk = n + 1;
293:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

295:   PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);
296:   im     = cols + n;
297:   bi_ptr = (PetscInt**)(im + n);

299:   /* initial FreeSpace size is f*(ai[n]+1) */
300:   f = info->fill;
301:   GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
302:   current_space = free_space;

304:   for (i=0; i<n; i++) {
305:     /* copy previous fill into linked list */
306:     nzi = 0;
307:     nnz = ai[r[i]+1] - ai[r[i]];
308:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309:     ajtmp = aj + ai[r[i]];
310:     for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
311:     PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);
312:     nzi += nlnk;

314:     /* add pivot rows into linked list */
315:     row = lnk[n];
316:     while (row < i) {
317:       nzbd    = bdiag[row] - bi[row] + 1;
318:       ajtmp   = bi_ptr[row] + nzbd;
319:       nnz     = im[row] - nzbd; /* num of columns with row<indices<=i */
320:       im[row] = nzbd;
321:       PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);
322:       nzi     += nlnk;
323:       im[row] += nzbd;  /* update im[row]: num of cols with index<=i */

325:       row = lnk[row];
326:     }

328:     bi[i+1] = bi[i] + nzi;
329:     im[i]   = nzi;

331:     /* mark bdiag */
332:     nzbd = 0;
333:     nnz  = nzi;
334:     k    = lnk[n];
335:     while (nnz-- && k < i){
336:       nzbd++;
337:       k = lnk[k];
338:     }
339:     bdiag[i] = bi[i] + nzbd;

341:     /* if free space is not available, make more free space */
342:     if (current_space->local_remaining<nzi) {
343:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
344:       GetMoreSpace(nnz,&current_space);
345:       reallocs++;
346:     }

348:     /* copy data into free space, then initialize lnk */
349:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
350:     bi_ptr[i] = current_space->array;
351:     current_space->array           += nzi;
352:     current_space->local_used      += nzi;
353:     current_space->local_remaining -= nzi;
354:   }
355: #if defined(PETSC_USE_DEBUG)
356:   if (ai[n] != 0) {
357:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
358:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
359:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));
360:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));
361:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));
362:   } else {
363:     PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));
364:   }
365: #endif

367:   ISRestoreIndices(isrow,&r);
368:   ISRestoreIndices(isicol,&ic);

370:   /* destroy list of free space and other temporary array(s) */
371:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
372:   MakeSpaceContiguous(&free_space,bj);
373:   PetscLLDestroy(lnk,lnkbt);
374:   PetscFree(cols);

376:   /* put together the new matrix */
377:   MatCreate(A->comm,B);
378:   MatSetSizes(*B,n,n,n,n);
379:   MatSetType(*B,A->type_name);
380:   MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
381:   PetscLogObjectParent(*B,isicol);
382:   b    = (Mat_SeqAIJ*)(*B)->data;
383:   b->freedata     = PETSC_TRUE;
384:   b->singlemalloc = PETSC_FALSE;
385:   PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
386:   b->j          = bj;
387:   b->i          = bi;
388:   b->diag       = bdiag;
389:   b->ilen       = 0;
390:   b->imax       = 0;
391:   b->row        = isrow;
392:   b->col        = iscol;
393:   PetscObjectReference((PetscObject)isrow);
394:   PetscObjectReference((PetscObject)iscol);
395:   b->icol       = isicol;
396:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

398:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
399:   PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
400:   b->maxnz = b->nz = bi[n] ;

402:   (*B)->factor                 =  FACTOR_LU;
403:   (*B)->info.factor_mallocs    = reallocs;
404:   (*B)->info.fill_ratio_given  = f;

406:   if (ai[n] != 0) {
407:     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
408:   } else {
409:     (*B)->info.fill_ratio_needed = 0.0;
410:   }
411:   MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
412:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
413:   return(0);
414: }

416: /* ----------------------------------------------------------- */
419: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
420: {
421:   Mat            C=*B;
422:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
423:   IS             isrow = b->row,isicol = b->icol;
425:   PetscInt       *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
426:   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
427:   PetscInt       *diag_offset = b->diag,diag,*pj;
428:   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
429:   PetscScalar    d;
430:   PetscReal      rs;
431:   LUShift_Ctx    sctx;
432:   PetscInt       newshift;

435:   ISGetIndices(isrow,&r);
436:   ISGetIndices(isicol,&ic);
437:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
438:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
439:   rtmps = rtmp; ics = ic;

441:   if (!a->diag) {
442:     MatMarkDiagonal_SeqAIJ(A);
443:   }
444:   /* if both shift schemes are chosen by user, only use info->shiftpd */
445:   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
446:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
447:     PetscInt *aai = a->i,*ddiag = a->diag;
448:     sctx.shift_top = 0;
449:     for (i=0; i<n; i++) {
450:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
451:       d  = (a->a)[ddiag[i]];
452:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
453:       v  = a->a+aai[i];
454:       nz = aai[i+1] - aai[i];
455:       for (j=0; j<nz; j++)
456:         rs += PetscAbsScalar(v[j]);
457:       if (rs>sctx.shift_top) sctx.shift_top = rs;
458:     }
459:     if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
460:     sctx.shift_top    *= 1.1;
461:     sctx.nshift_max   = 5;
462:     sctx.shift_lo     = 0.;
463:     sctx.shift_hi     = 1.;
464:   }

466:   sctx.shift_amount = 0;
467:   sctx.nshift       = 0;
468:   do {
469:     sctx.lushift = PETSC_FALSE;
470:     for (i=0; i<n; i++){
471:       nz    = bi[i+1] - bi[i];
472:       bjtmp = bj + bi[i];
473:       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;

475:       /* load in initial (unfactored row) */
476:       nz    = a->i[r[i]+1] - a->i[r[i]];
477:       ajtmp = a->j + a->i[r[i]];
478:       v     = a->a + a->i[r[i]];
479:       for (j=0; j<nz; j++) {
480:         rtmp[ics[ajtmp[j]]] = v[j];
481:       }
482:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

484:       row = *bjtmp++;
485:       while  (row < i) {
486:         pc = rtmp + row;
487:         if (*pc != 0.0) {
488:           pv         = b->a + diag_offset[row];
489:           pj         = b->j + diag_offset[row] + 1;
490:           multiplier = *pc / *pv++;
491:           *pc        = multiplier;
492:           nz         = bi[row+1] - diag_offset[row] - 1;
493:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
494:           PetscLogFlops(2*nz);
495:         }
496:         row = *bjtmp++;
497:       }
498:       /* finished row so stick it into b->a */
499:       pv   = b->a + bi[i] ;
500:       pj   = b->j + bi[i] ;
501:       nz   = bi[i+1] - bi[i];
502:       diag = diag_offset[i] - bi[i];
503:       rs   = 0.0;
504:       for (j=0; j<nz; j++) {
505:         pv[j] = rtmps[pj[j]];
506:         if (j != diag) rs += PetscAbsScalar(pv[j]);
507:       }

509:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
510:       sctx.rs  = rs;
511:       sctx.pv  = pv[diag];
512:       MatLUCheckShift_inline(info,sctx,newshift);
513:       if (newshift == 1){
514:         break;    /* sctx.shift_amount is updated */
515:       } else if (newshift == -1){
516:         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
517:       }
518:     }

520:     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
521:       /*
522:        * if no shift in this attempt & shifting & started shifting & can refine,
523:        * then try lower shift
524:        */
525:       sctx.shift_hi        = info->shift_fraction;
526:       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
527:       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
528:       sctx.lushift         = PETSC_TRUE;
529:       sctx.nshift++;
530:     }
531:   } while (sctx.lushift);

533:   /* invert diagonal entries for simplier triangular solves */
534:   for (i=0; i<n; i++) {
535:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
536:   }

538:   PetscFree(rtmp);
539:   ISRestoreIndices(isicol,&ic);
540:   ISRestoreIndices(isrow,&r);
541:   C->factor = FACTOR_LU;
542:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
543:   C->assembled = PETSC_TRUE;
544:   PetscLogFlops(C->n);
545:   if (sctx.nshift){
546:     if (info->shiftnz) {
547:       PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
548:     } else if (info->shiftpd) {
549:       PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));
550:     }
551:   }
552:   return(0);
553: }

557: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
558: {
560:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
561:   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
562:   return(0);
563: }


566: /* ----------------------------------------------------------- */
569: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
570: {
572:   Mat            C;

575:   MatLUFactorSymbolic(A,row,col,info,&C);
576:   MatLUFactorNumeric(A,info,&C);
577:   MatHeaderCopy(A,C);
578:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
579:   return(0);
580: }
581: /* ----------------------------------------------------------- */
584: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
585: {
586:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
587:   IS             iscol = a->col,isrow = a->row;
589:   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
590:   PetscInt       nz,*rout,*cout;
591:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

594:   if (!n) return(0);

596:   VecGetArray(bb,&b);
597:   VecGetArray(xx,&x);
598:   tmp  = a->solve_work;

600:   ISGetIndices(isrow,&rout); r = rout;
601:   ISGetIndices(iscol,&cout); c = cout + (n-1);

603:   /* forward solve the lower triangular */
604:   tmp[0] = b[*r++];
605:   tmps   = tmp;
606:   for (i=1; i<n; i++) {
607:     v   = aa + ai[i] ;
608:     vi  = aj + ai[i] ;
609:     nz  = a->diag[i] - ai[i];
610:     sum = b[*r++];
611:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
612:     tmp[i] = sum;
613:   }

615:   /* backward solve the upper triangular */
616:   for (i=n-1; i>=0; i--){
617:     v   = aa + a->diag[i] + 1;
618:     vi  = aj + a->diag[i] + 1;
619:     nz  = ai[i+1] - a->diag[i] - 1;
620:     sum = tmp[i];
621:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
622:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
623:   }

625:   ISRestoreIndices(isrow,&rout);
626:   ISRestoreIndices(iscol,&cout);
627:   VecRestoreArray(bb,&b);
628:   VecRestoreArray(xx,&x);
629:   PetscLogFlops(2*a->nz - A->n);
630:   return(0);
631: }

633: /* ----------------------------------------------------------- */
636: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
637: {
638:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
640:   PetscInt       n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
641:   PetscScalar    *x,*b,*aa = a->a;
642: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
643:   PetscInt       adiag_i,i,*vi,nz,ai_i;
644:   PetscScalar    *v,sum;
645: #endif

648:   if (!n) return(0);

650:   VecGetArray(bb,&b);
651:   VecGetArray(xx,&x);

653: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
654:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
655: #else
656:   /* forward solve the lower triangular */
657:   x[0] = b[0];
658:   for (i=1; i<n; i++) {
659:     ai_i = ai[i];
660:     v    = aa + ai_i;
661:     vi   = aj + ai_i;
662:     nz   = adiag[i] - ai_i;
663:     sum  = b[i];
664:     while (nz--) sum -= *v++ * x[*vi++];
665:     x[i] = sum;
666:   }

668:   /* backward solve the upper triangular */
669:   for (i=n-1; i>=0; i--){
670:     adiag_i = adiag[i];
671:     v       = aa + adiag_i + 1;
672:     vi      = aj + adiag_i + 1;
673:     nz      = ai[i+1] - adiag_i - 1;
674:     sum     = x[i];
675:     while (nz--) sum -= *v++ * x[*vi++];
676:     x[i]    = sum*aa[adiag_i];
677:   }
678: #endif
679:   PetscLogFlops(2*a->nz - A->n);
680:   VecRestoreArray(bb,&b);
681:   VecRestoreArray(xx,&x);
682:   return(0);
683: }

687: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
688: {
689:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
690:   IS             iscol = a->col,isrow = a->row;
692:   PetscInt       *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
693:   PetscInt       nz,*rout,*cout;
694:   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;

697:   if (yy != xx) {VecCopy(yy,xx);}

699:   VecGetArray(bb,&b);
700:   VecGetArray(xx,&x);
701:   tmp  = a->solve_work;

703:   ISGetIndices(isrow,&rout); r = rout;
704:   ISGetIndices(iscol,&cout); c = cout + (n-1);

706:   /* forward solve the lower triangular */
707:   tmp[0] = b[*r++];
708:   for (i=1; i<n; i++) {
709:     v   = aa + ai[i] ;
710:     vi  = aj + ai[i] ;
711:     nz  = a->diag[i] - ai[i];
712:     sum = b[*r++];
713:     while (nz--) sum -= *v++ * tmp[*vi++ ];
714:     tmp[i] = sum;
715:   }

717:   /* backward solve the upper triangular */
718:   for (i=n-1; i>=0; i--){
719:     v   = aa + a->diag[i] + 1;
720:     vi  = aj + a->diag[i] + 1;
721:     nz  = ai[i+1] - a->diag[i] - 1;
722:     sum = tmp[i];
723:     while (nz--) sum -= *v++ * tmp[*vi++ ];
724:     tmp[i] = sum*aa[a->diag[i]];
725:     x[*c--] += tmp[i];
726:   }

728:   ISRestoreIndices(isrow,&rout);
729:   ISRestoreIndices(iscol,&cout);
730:   VecRestoreArray(bb,&b);
731:   VecRestoreArray(xx,&x);
732:   PetscLogFlops(2*a->nz);

734:   return(0);
735: }
736: /* -------------------------------------------------------------------*/
739: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
740: {
741:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
742:   IS             iscol = a->col,isrow = a->row;
744:   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
745:   PetscInt       nz,*rout,*cout,*diag = a->diag;
746:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;

749:   VecGetArray(bb,&b);
750:   VecGetArray(xx,&x);
751:   tmp  = a->solve_work;

753:   ISGetIndices(isrow,&rout); r = rout;
754:   ISGetIndices(iscol,&cout); c = cout;

756:   /* copy the b into temp work space according to permutation */
757:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

759:   /* forward solve the U^T */
760:   for (i=0; i<n; i++) {
761:     v   = aa + diag[i] ;
762:     vi  = aj + diag[i] + 1;
763:     nz  = ai[i+1] - diag[i] - 1;
764:     s1  = tmp[i];
765:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
766:     while (nz--) {
767:       tmp[*vi++ ] -= (*v++)*s1;
768:     }
769:     tmp[i] = s1;
770:   }

772:   /* backward solve the L^T */
773:   for (i=n-1; i>=0; i--){
774:     v   = aa + diag[i] - 1 ;
775:     vi  = aj + diag[i] - 1 ;
776:     nz  = diag[i] - ai[i];
777:     s1  = tmp[i];
778:     while (nz--) {
779:       tmp[*vi-- ] -= (*v--)*s1;
780:     }
781:   }

783:   /* copy tmp into x according to permutation */
784:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

786:   ISRestoreIndices(isrow,&rout);
787:   ISRestoreIndices(iscol,&cout);
788:   VecRestoreArray(bb,&b);
789:   VecRestoreArray(xx,&x);

791:   PetscLogFlops(2*a->nz-A->n);
792:   return(0);
793: }

797: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
798: {
799:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
800:   IS             iscol = a->col,isrow = a->row;
802:   PetscInt       *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
803:   PetscInt       nz,*rout,*cout,*diag = a->diag;
804:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;

807:   if (zz != xx) {VecCopy(zz,xx);}

809:   VecGetArray(bb,&b);
810:   VecGetArray(xx,&x);
811:   tmp = a->solve_work;

813:   ISGetIndices(isrow,&rout); r = rout;
814:   ISGetIndices(iscol,&cout); c = cout;

816:   /* copy the b into temp work space according to permutation */
817:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

819:   /* forward solve the U^T */
820:   for (i=0; i<n; i++) {
821:     v   = aa + diag[i] ;
822:     vi  = aj + diag[i] + 1;
823:     nz  = ai[i+1] - diag[i] - 1;
824:     tmp[i] *= *v++;
825:     while (nz--) {
826:       tmp[*vi++ ] -= (*v++)*tmp[i];
827:     }
828:   }

830:   /* backward solve the L^T */
831:   for (i=n-1; i>=0; i--){
832:     v   = aa + diag[i] - 1 ;
833:     vi  = aj + diag[i] - 1 ;
834:     nz  = diag[i] - ai[i];
835:     while (nz--) {
836:       tmp[*vi-- ] -= (*v--)*tmp[i];
837:     }
838:   }

840:   /* copy tmp into x according to permutation */
841:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

843:   ISRestoreIndices(isrow,&rout);
844:   ISRestoreIndices(iscol,&cout);
845:   VecRestoreArray(bb,&b);
846:   VecRestoreArray(xx,&x);

848:   PetscLogFlops(2*a->nz);
849:   return(0);
850: }
851: /* ----------------------------------------------------------------*/
852: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);

856: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
857: {
858:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
859:   IS             isicol;
861:   PetscInt       *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
862:   PetscInt       *bi,*cols,nnz,*cols_lvl;
863:   PetscInt       *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
864:   PetscInt       i,levels,diagonal_fill;
865:   PetscTruth     col_identity,row_identity;
866:   PetscReal      f;
867:   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
868:   PetscBT        lnkbt;
869:   PetscInt       nzi,*bj,**bj_ptr,**bjlvl_ptr;
870:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
871:   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
872: 
874:   f             = info->fill;
875:   levels        = (PetscInt)info->levels;
876:   diagonal_fill = (PetscInt)info->diagonal_fill;
877:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

879:   /* special case that simply copies fill pattern */
880:   ISIdentity(isrow,&row_identity);
881:   ISIdentity(iscol,&col_identity);
882:   if (!levels && row_identity && col_identity) {
883:     MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
884:     (*fact)->factor = FACTOR_LU;
885:     b               = (Mat_SeqAIJ*)(*fact)->data;
886:     if (!b->diag) {
887:       MatMarkDiagonal_SeqAIJ(*fact);
888:     }
889:     MatMissingDiagonal_SeqAIJ(*fact);
890:     b->row              = isrow;
891:     b->col              = iscol;
892:     b->icol             = isicol;
893:     PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
894:     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
895:     PetscObjectReference((PetscObject)isrow);
896:     PetscObjectReference((PetscObject)iscol);
897:     return(0);
898:   }

900:   ISGetIndices(isrow,&r);
901:   ISGetIndices(isicol,&ic);

903:   /* get new row pointers */
904:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
905:   bi[0] = 0;
906:   /* bdiag is location of diagonal in factor */
907:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
908:   bdiag[0]  = 0;

910:   PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
911:   bjlvl_ptr = (PetscInt**)(bj_ptr + n);

913:   /* create a linked list for storing column indices of the active row */
914:   nlnk = n + 1;
915:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

917:   /* initial FreeSpace size is f*(ai[n]+1) */
918:   GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
919:   current_space = free_space;
920:   GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
921:   current_space_lvl = free_space_lvl;
922: 
923:   for (i=0; i<n; i++) {
924:     nzi = 0;
925:     /* copy current row into linked list */
926:     nnz  = ai[r[i]+1] - ai[r[i]];
927:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
928:     cols = aj + ai[r[i]];
929:     lnk[i] = -1; /* marker to indicate if diagonal exists */
930:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
931:     nzi += nlnk;

933:     /* make sure diagonal entry is included */
934:     if (diagonal_fill && lnk[i] == -1) {
935:       fm = n;
936:       while (lnk[fm] < i) fm = lnk[fm];
937:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
938:       lnk[fm]    = i;
939:       lnk_lvl[i] = 0;
940:       nzi++; dcount++;
941:     }

943:     /* add pivot rows into the active row */
944:     nzbd = 0;
945:     prow = lnk[n];
946:     while (prow < i) {
947:       nnz      = bdiag[prow];
948:       cols     = bj_ptr[prow] + nnz + 1;
949:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
950:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
951:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
952:       nzi += nlnk;
953:       prow = lnk[prow];
954:       nzbd++;
955:     }
956:     bdiag[i] = nzbd;
957:     bi[i+1]  = bi[i] + nzi;

959:     /* if free space is not available, make more free space */
960:     if (current_space->local_remaining<nzi) {
961:       nnz = nzi*(n - i); /* estimated and max additional space needed */
962:       GetMoreSpace(nnz,&current_space);
963:       GetMoreSpace(nnz,&current_space_lvl);
964:       reallocs++;
965:     }

967:     /* copy data into free_space and free_space_lvl, then initialize lnk */
968:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
969:     bj_ptr[i]    = current_space->array;
970:     bjlvl_ptr[i] = current_space_lvl->array;

972:     /* make sure the active row i has diagonal entry */
973:     if (*(bj_ptr[i]+bdiag[i]) != i) {
974:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
975:     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
976:     }

978:     current_space->array           += nzi;
979:     current_space->local_used      += nzi;
980:     current_space->local_remaining -= nzi;
981:     current_space_lvl->array           += nzi;
982:     current_space_lvl->local_used      += nzi;
983:     current_space_lvl->local_remaining -= nzi;
984:   }

986:   ISRestoreIndices(isrow,&r);
987:   ISRestoreIndices(isicol,&ic);

989:   /* destroy list of free space and other temporary arrays */
990:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
991:   MakeSpaceContiguous(&free_space,bj);
992:   PetscIncompleteLLDestroy(lnk,lnkbt);
993:   DestroySpace(free_space_lvl);
994:   PetscFree(bj_ptr);

996: #if defined(PETSC_USE_DEBUG)
997:   {
998:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
999:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
1000:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));
1001:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));
1002:     PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));
1003:     if (diagonal_fill) {
1004:       PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));
1005:     }
1006:   }
1007: #endif

1009:   /* put together the new matrix */
1010:   MatCreate(A->comm,fact);
1011:   MatSetSizes(*fact,n,n,n,n);
1012:   MatSetType(*fact,A->type_name);
1013:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1014:   PetscLogObjectParent(*fact,isicol);
1015:   b = (Mat_SeqAIJ*)(*fact)->data;
1016:   b->freedata     = PETSC_TRUE;
1017:   b->singlemalloc = PETSC_FALSE;
1018:   len = (bi[n] )*sizeof(PetscScalar);
1019:   PetscMalloc(len+1,&b->a);
1020:   b->j          = bj;
1021:   b->i          = bi;
1022:   for (i=0; i<n; i++) bdiag[i] += bi[i];
1023:   b->diag       = bdiag;
1024:   b->ilen       = 0;
1025:   b->imax       = 0;
1026:   b->row        = isrow;
1027:   b->col        = iscol;
1028:   PetscObjectReference((PetscObject)isrow);
1029:   PetscObjectReference((PetscObject)iscol);
1030:   b->icol       = isicol;
1031:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1032:   /* In b structure:  Free imax, ilen, old a, old j.  
1033:      Allocate bdiag, solve_work, new a, new j */
1034:   PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1035:   b->maxnz             = b->nz = bi[n] ;
1036:   (*fact)->factor = FACTOR_LU;
1037:   (*fact)->info.factor_mallocs    = reallocs;
1038:   (*fact)->info.fill_ratio_given  = f;
1039:   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);

1041:   MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1042:   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

1044:   return(0);
1045: }

1047:  #include src/mat/impls/sbaij/seq/sbaij.h
1050: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1051: {
1052:   Mat            C = *B;
1053:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1054:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1055:   IS             ip=b->row;
1057:   PetscInt       *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1058:   PetscInt       *ai=a->i,*aj=a->j;
1059:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1060:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1061:   PetscReal      zeropivot,rs,shiftnz;
1062:   PetscTruth     shiftpd;
1063:   ChShift_Ctx    sctx;
1064:   PetscInt       newshift;

1067:   shiftnz   = info->shiftnz;
1068:   shiftpd   = info->shiftpd;
1069:   zeropivot = info->zeropivot;

1071:   ISGetIndices(ip,&rip);
1072: 
1073:   /* initialization */
1074:   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1075:   PetscMalloc(nz,&il);
1076:   jl   = il + mbs;
1077:   rtmp = (MatScalar*)(jl + mbs);

1079:   sctx.shift_amount = 0;
1080:   sctx.nshift       = 0;
1081:   do {
1082:     sctx.chshift = PETSC_FALSE;
1083:     for (i=0; i<mbs; i++) {
1084:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1085:     }
1086: 
1087:     for (k = 0; k<mbs; k++){
1088:       bval = ba + bi[k];
1089:       /* initialize k-th row by the perm[k]-th row of A */
1090:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1091:       for (j = jmin; j < jmax; j++){
1092:         col = rip[aj[j]];
1093:         if (col >= k){ /* only take upper triangular entry */
1094:           rtmp[col] = aa[j];
1095:           *bval++  = 0.0; /* for in-place factorization */
1096:         }
1097:       }
1098:       /* shift the diagonal of the matrix */
1099:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1101:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1102:       dk = rtmp[k];
1103:       i = jl[k]; /* first row to be added to k_th row  */

1105:       while (i < k){
1106:         nexti = jl[i]; /* next row to be added to k_th row */

1108:         /* compute multiplier, update diag(k) and U(i,k) */
1109:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1110:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1111:         dk += uikdi*ba[ili];
1112:         ba[ili] = uikdi; /* -U(i,k) */

1114:         /* add multiple of row i to k-th row */
1115:         jmin = ili + 1; jmax = bi[i+1];
1116:         if (jmin < jmax){
1117:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1118:           /* update il and jl for row i */
1119:           il[i] = jmin;
1120:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1121:         }
1122:         i = nexti;
1123:       }

1125:       /* shift the diagonals when zero pivot is detected */
1126:       /* compute rs=sum of abs(off-diagonal) */
1127:       rs   = 0.0;
1128:       jmin = bi[k]+1;
1129:       nz   = bi[k+1] - jmin;
1130:       if (nz){
1131:         bcol = bj + jmin;
1132:         while (nz--){
1133:           rs += PetscAbsScalar(rtmp[*bcol]);
1134:           bcol++;
1135:         }
1136:       }

1138:       sctx.rs = rs;
1139:       sctx.pv = dk;
1140:       MatCholeskyCheckShift_inline(info,sctx,newshift);
1141:       if (newshift == 1){
1142:         break;    /* sctx.shift_amount is updated */
1143:       } else if (newshift == -1){
1144:         SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1145:       }
1146: 
1147:       /* copy data into U(k,:) */
1148:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1149:       jmin = bi[k]+1; jmax = bi[k+1];
1150:       if (jmin < jmax) {
1151:         for (j=jmin; j<jmax; j++){
1152:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1153:         }
1154:         /* add the k-th row into il and jl */
1155:         il[k] = jmin;
1156:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1157:       }
1158:     }
1159:   } while (sctx.chshift);
1160:   PetscFree(il);

1162:   ISRestoreIndices(ip,&rip);
1163:   C->factor       = FACTOR_CHOLESKY;
1164:   C->assembled    = PETSC_TRUE;
1165:   C->preallocated = PETSC_TRUE;
1166:   PetscLogFlops(C->m);
1167:   if (sctx.nshift){
1168:     if (shiftnz) {
1169:       PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1170:     } else if (shiftpd) {
1171:       PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1172:     }
1173:   }
1174:   return(0);
1175: }

1179: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1180: {
1181:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1182:   Mat_SeqSBAIJ   *b;
1183:   Mat            B;
1185:   PetscTruth     perm_identity;
1186:   PetscInt       reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1187:   PetscInt       jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1188:   PetscInt       nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1189:   PetscInt       ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1190:   PetscReal      fill=info->fill,levels=info->levels;
1191:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1192:   FreeSpaceList  free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1193:   PetscBT        lnkbt;
1194: 
1196:   ISIdentity(perm,&perm_identity);
1197:   ISGetIndices(perm,&rip);

1199:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1200:   ui[0] = 0;

1202:   /* special case that simply copies fill pattern */
1203:   if (!levels && perm_identity) {
1204:     MatMarkDiagonal_SeqAIJ(A);
1205:     for (i=0; i<am; i++) {
1206:       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1207:     }
1208:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1209:     cols = uj;
1210:     for (i=0; i<am; i++) {
1211:       aj    = a->j + a->diag[i];
1212:       ncols = ui[i+1] - ui[i];
1213:       for (j=0; j<ncols; j++) *cols++ = *aj++;
1214:     }
1215:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1216:     /* initialization */
1217:     PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);

1219:     /* jl: linked list for storing indices of the pivot rows 
1220:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1221:     PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1222:     il         = jl + am;
1223:     uj_ptr     = (PetscInt**)(il + am);
1224:     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1225:     for (i=0; i<am; i++){
1226:       jl[i] = am; il[i] = 0;
1227:     }

1229:     /* create and initialize a linked list for storing column indices of the active row k */
1230:     nlnk = am + 1;
1231:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

1233:     /* initial FreeSpace size is fill*(ai[am]+1) */
1234:     GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1235:     current_space = free_space;
1236:     GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1237:     current_space_lvl = free_space_lvl;

1239:     for (k=0; k<am; k++){  /* for each active row k */
1240:       /* initialize lnk by the column indices of row rip[k] of A */
1241:       nzk   = 0;
1242:       ncols = ai[rip[k]+1] - ai[rip[k]];
1243:       ncols_upper = 0;
1244:       for (j=0; j<ncols; j++){
1245:         i = *(aj + ai[rip[k]] + j);
1246:         if (rip[i] >= k){ /* only take upper triangular entry */
1247:           ajtmp[ncols_upper] = i;
1248:           ncols_upper++;
1249:         }
1250:       }
1251:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
1252:       nzk += nlnk;

1254:       /* update lnk by computing fill-in for each pivot row to be merged in */
1255:       prow = jl[k]; /* 1st pivot row */
1256: 
1257:       while (prow < k){
1258:         nextprow = jl[prow];
1259: 
1260:         /* merge prow into k-th row */
1261:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1262:         jmax = ui[prow+1];
1263:         ncols = jmax-jmin;
1264:         i     = jmin - ui[prow];
1265:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1266:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1267:         j     = *(uj - 1);
1268:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1269:         nzk += nlnk;

1271:         /* update il and jl for prow */
1272:         if (jmin < jmax){
1273:           il[prow] = jmin;
1274:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1275:         }
1276:         prow = nextprow;
1277:       }

1279:       /* if free space is not available, make more free space */
1280:       if (current_space->local_remaining<nzk) {
1281:         i = am - k + 1; /* num of unfactored rows */
1282:         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1283:         GetMoreSpace(i,&current_space);
1284:         GetMoreSpace(i,&current_space_lvl);
1285:         reallocs++;
1286:       }

1288:       /* copy data into free_space and free_space_lvl, then initialize lnk */
1289:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

1291:       /* add the k-th row into il and jl */
1292:       if (nzk > 1){
1293:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1294:         jl[k] = jl[i]; jl[i] = k;
1295:         il[k] = ui[k] + 1;
1296:       }
1297:       uj_ptr[k]     = current_space->array;
1298:       uj_lvl_ptr[k] = current_space_lvl->array;

1300:       current_space->array           += nzk;
1301:       current_space->local_used      += nzk;
1302:       current_space->local_remaining -= nzk;

1304:       current_space_lvl->array           += nzk;
1305:       current_space_lvl->local_used      += nzk;
1306:       current_space_lvl->local_remaining -= nzk;

1308:       ui[k+1] = ui[k] + nzk;
1309:     }

1311: #if defined(PETSC_USE_DEBUG)
1312:     if (ai[am] != 0) {
1313:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1314:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1315:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1316:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1317:     } else {
1318:       PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1319:     }
1320: #endif

1322:     ISRestoreIndices(perm,&rip);
1323:     PetscFree(jl);
1324:     PetscFree(ajtmp);

1326:     /* destroy list of free space and other temporary array(s) */
1327:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1328:     MakeSpaceContiguous(&free_space,uj);
1329:     PetscIncompleteLLDestroy(lnk,lnkbt);
1330:     DestroySpace(free_space_lvl);

1332:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

1334:   /* put together the new matrix in MATSEQSBAIJ format */
1335:   MatCreate(PETSC_COMM_SELF,fact);
1336:   MatSetSizes(*fact,am,am,am,am);
1337:   B = *fact;
1338:   MatSetType(B,MATSEQSBAIJ);
1339:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1341:   b    = (Mat_SeqSBAIJ*)B->data;
1342:   b->singlemalloc = PETSC_FALSE;
1343:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1344:   b->j    = uj;
1345:   b->i    = ui;
1346:   b->diag = 0;
1347:   b->ilen = 0;
1348:   b->imax = 0;
1349:   b->row  = perm;
1350:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1351:   PetscObjectReference((PetscObject)perm);
1352:   b->icol = perm;
1353:   PetscObjectReference((PetscObject)perm);
1354:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1355:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1356:   b->maxnz = b->nz = ui[am];
1357: 
1358:   B->factor                 = FACTOR_CHOLESKY;
1359:   B->info.factor_mallocs    = reallocs;
1360:   B->info.fill_ratio_given  = fill;
1361:   if (ai[am] != 0) {
1362:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1363:   } else {
1364:     B->info.fill_ratio_needed = 0.0;
1365:   }
1366:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1367:   if (perm_identity){
1368:     B->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1369:     B->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1370:   }
1371:   return(0);
1372: }

1376: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1377: {
1378:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1379:   Mat_SeqSBAIJ   *b;
1380:   Mat            B;
1382:   PetscTruth     perm_identity;
1383:   PetscReal      fill = info->fill;
1384:   PetscInt       *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1385:   PetscInt       *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1386:   PetscInt       nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1387:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1388:   PetscBT        lnkbt;
1389:   IS             iperm;

1392:   /* check whether perm is the identity mapping */
1393:   ISIdentity(perm,&perm_identity);
1394:   ISGetIndices(perm,&rip);

1396:   if (!perm_identity){
1397:     /* check if perm is symmetric! */
1398:     ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1399:     ISGetIndices(iperm,&riip);
1400:     for (i=0; i<am; i++) {
1401:       if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1402:     }
1403:     ISRestoreIndices(iperm,&riip);
1404:     ISDestroy(iperm);
1405:   }

1407:   /* initialization */
1408:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1409:   ui[0] = 0;

1411:   /* jl: linked list for storing indices of the pivot rows 
1412:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1413:   PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1414:   il     = jl + am;
1415:   cols   = il + am;
1416:   ui_ptr = (PetscInt**)(cols + am);
1417:   for (i=0; i<am; i++){
1418:     jl[i] = am; il[i] = 0;
1419:   }

1421:   /* create and initialize a linked list for storing column indices of the active row k */
1422:   nlnk = am + 1;
1423:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

1425:   /* initial FreeSpace size is fill*(ai[am]+1) */
1426:   GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1427:   current_space = free_space;

1429:   for (k=0; k<am; k++){  /* for each active row k */
1430:     /* initialize lnk by the column indices of row rip[k] of A */
1431:     nzk   = 0;
1432:     ncols = ai[rip[k]+1] - ai[rip[k]];
1433:     ncols_upper = 0;
1434:     for (j=0; j<ncols; j++){
1435:       i = rip[*(aj + ai[rip[k]] + j)];
1436:       if (i >= k){ /* only take upper triangular entry */
1437:         cols[ncols_upper] = i;
1438:         ncols_upper++;
1439:       }
1440:     }
1441:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1442:     nzk += nlnk;

1444:     /* update lnk by computing fill-in for each pivot row to be merged in */
1445:     prow = jl[k]; /* 1st pivot row */
1446: 
1447:     while (prow < k){
1448:       nextprow = jl[prow];
1449:       /* merge prow into k-th row */
1450:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1451:       jmax = ui[prow+1];
1452:       ncols = jmax-jmin;
1453:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1454:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1455:       nzk += nlnk;

1457:       /* update il and jl for prow */
1458:       if (jmin < jmax){
1459:         il[prow] = jmin;
1460:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1461:       }
1462:       prow = nextprow;
1463:     }

1465:     /* if free space is not available, make more free space */
1466:     if (current_space->local_remaining<nzk) {
1467:       i = am - k + 1; /* num of unfactored rows */
1468:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1469:       GetMoreSpace(i,&current_space);
1470:       reallocs++;
1471:     }

1473:     /* copy data into free space, then initialize lnk */
1474:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

1476:     /* add the k-th row into il and jl */
1477:     if (nzk-1 > 0){
1478:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1479:       jl[k] = jl[i]; jl[i] = k;
1480:       il[k] = ui[k] + 1;
1481:     }
1482:     ui_ptr[k] = current_space->array;
1483:     current_space->array           += nzk;
1484:     current_space->local_used      += nzk;
1485:     current_space->local_remaining -= nzk;

1487:     ui[k+1] = ui[k] + nzk;
1488:   }

1490: #if defined(PETSC_USE_DEBUG)
1491:   if (ai[am] != 0) {
1492:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1493:     PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1494:     PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1495:     PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1496:   } else {
1497:      PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1498:   }
1499: #endif

1501:   ISRestoreIndices(perm,&rip);
1502:   PetscFree(jl);

1504:   /* destroy list of free space and other temporary array(s) */
1505:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1506:   MakeSpaceContiguous(&free_space,uj);
1507:   PetscLLDestroy(lnk,lnkbt);

1509:   /* put together the new matrix in MATSEQSBAIJ format */
1510:   MatCreate(PETSC_COMM_SELF,fact);
1511:   MatSetSizes(*fact,am,am,am,am);
1512:   B    = *fact;
1513:   MatSetType(B,MATSEQSBAIJ);
1514:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1516:   b = (Mat_SeqSBAIJ*)B->data;
1517:   b->singlemalloc = PETSC_FALSE;
1518:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1519:   b->j    = uj;
1520:   b->i    = ui;
1521:   b->diag = 0;
1522:   b->ilen = 0;
1523:   b->imax = 0;
1524:   b->row  = perm;
1525:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1526:   PetscObjectReference((PetscObject)perm);
1527:   b->icol = perm;
1528:   PetscObjectReference((PetscObject)perm);
1529:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1530:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1531:   b->maxnz = b->nz = ui[am];
1532: 
1533:   B->factor                 = FACTOR_CHOLESKY;
1534:   B->info.factor_mallocs    = reallocs;
1535:   B->info.fill_ratio_given  = fill;
1536:   if (ai[am] != 0) {
1537:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1538:   } else {
1539:     B->info.fill_ratio_needed = 0.0;
1540:   }
1541:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1542:   if (perm_identity){
1543:     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1544:     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1545:   }
1546:   return(0);
1547: }