Actual source code: aijfact.c

  1: /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/

 3:  #include src/mat/impls/aij/seq/aij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/dot.h

  7: int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
  8: {

 11:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 12: #if !defined(PETSC_USE_DEBUG)
 13:   return(0);
 14: #endif
 15: }


 18: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
 19: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);

 21: EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
 22: EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
 23: EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);

 25:   /* ------------------------------------------------------------

 27:           This interface was contribed by Tony Caola

 29:      This routine is an interface to the pivoting drop-tolerance 
 30:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 31:      SPARSEKIT2.

 33:      The SPARSEKIT2 routines used here are covered by the GNU 
 34:      copyright; see the file gnu in this directory.

 36:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 37:      help in getting this routine ironed out.

 39:      The major drawback to this routine is that if info->fill is 
 40:      not large enough it fails rather than allocating more space;
 41:      this can be fixed by hacking/improving the f2c version of 
 42:      Yousef Saad's code.

 44:      ishift = 0, for indices start at 1
 45:      ishift = 1, for indices starting at 0
 46:      ------------------------------------------------------------
 47:   */

 49: int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
 50: {
 51:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b;
 52:   IS           iscolf,isicol,isirow;
 53:   PetscTruth   reorder;
 54:   int          *c,*r,*ic,ierr,i,n = A->m;
 55:   int          *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 56:   int          *ordcol,*iwk,*iperm,*jw;
 57:   int          ishift = !a->indexshift;
 58:   int          jmax,lfill,job,*o_i,*o_j;
 59:   PetscScalar  *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 60:   PetscReal    permtol,af;


 64:   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
 65:   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
 66:   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
 67:   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
 68:   lfill   = (int)(info->dtcount/2.0);
 69:   jmax    = (int)(info->fill*a->nz);
 70:   permtol = info->dtcol;


 73:   /* ------------------------------------------------------------
 74:      If reorder=.TRUE., then the original matrix has to be 
 75:      reordered to reflect the user selected ordering scheme, and
 76:      then de-reordered so it is in it's original format.  
 77:      Because Saad's dperm() is NOT in place, we have to copy 
 78:      the original matrix and allocate more storage. . . 
 79:      ------------------------------------------------------------
 80:   */

 82:   /* set reorder to true if either isrow or iscol is not identity */
 83:   ISIdentity(isrow,&reorder);
 84:   if (reorder) {ISIdentity(iscol,&reorder);}
 85:   reorder = PetscNot(reorder);

 87: 
 88:   /* storage for ilu factor */
 89:   PetscMalloc((n+1)*sizeof(int),&new_i);
 90:   PetscMalloc(jmax*sizeof(int),&new_j);
 91:   PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
 92:   PetscMalloc(n*sizeof(int),&ordcol);

 94:   /* ------------------------------------------------------------
 95:      Make sure that everything is Fortran formatted (1-Based)
 96:      ------------------------------------------------------------
 97:   */
 98:   if (ishift) {
 99:     for (i=old_i[0];i<old_i[n];i++) {
100:       old_j[i]++;
101:     }
102:     for(i=0;i<n+1;i++) {
103:       old_i[i]++;
104:     };
105:   };

107:   if (reorder) {
108:     ISGetIndices(iscol,&c);
109:     ISGetIndices(isrow,&r);
110:     for(i=0;i<n;i++) {
111:       r[i]  = r[i]+1;
112:       c[i]  = c[i]+1;
113:     }
114:     PetscMalloc((n+1)*sizeof(int),&old_i2);
115:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
116:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
117:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
118:     for (i=0;i<n;i++) {
119:       r[i]  = r[i]-1;
120:       c[i]  = c[i]-1;
121:     }
122:     ISRestoreIndices(iscol,&c);
123:     ISRestoreIndices(isrow,&r);
124:     o_a = old_a2;
125:     o_j = old_j2;
126:     o_i = old_i2;
127:   } else {
128:     o_a = old_a;
129:     o_j = old_j;
130:     o_i = old_i;
131:   }

133:   /* ------------------------------------------------------------
134:      Call Saad's ilutp() routine to generate the factorization
135:      ------------------------------------------------------------
136:   */

138:   PetscMalloc(2*n*sizeof(int),&iperm);
139:   PetscMalloc(2*n*sizeof(int),&jw);
140:   PetscMalloc(n*sizeof(PetscScalar),&w);

142:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
143:   if (ierr) {
144:     switch (ierr) {
145:       case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
146:       case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
147:       case -5: SETERRQ(1,"ilutp(), zero row encountered");
148:       case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
149:       case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
150:       default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
151:     }
152:   }

154:   PetscFree(w);
155:   PetscFree(jw);

157:   /* ------------------------------------------------------------
158:      Saad's routine gives the result in Modified Sparse Row (msr)
159:      Convert to Compressed Sparse Row format (csr) 
160:      ------------------------------------------------------------
161:   */

163:   PetscMalloc(n*sizeof(PetscScalar),&wk);
164:   PetscMalloc((n+1)*sizeof(int),&iwk);

166:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

168:   PetscFree(iwk);
169:   PetscFree(wk);

171:   if (reorder) {
172:     PetscFree(old_a2);
173:     PetscFree(old_j2);
174:     PetscFree(old_i2);
175:   } else {
176:     /* fix permutation of old_j that the factorization introduced */
177:     for (i=old_i[0]; i<old_i[n]; i++) {
178:       old_j[i-1] = iperm[old_j[i-1]-1];
179:     }
180:   }

182:   /* get rid of the shift to indices starting at 1 */
183:   if (ishift) {
184:     for (i=0; i<n+1; i++) {
185:       old_i[i]--;
186:     }
187:     for (i=old_i[0];i<old_i[n];i++) {
188:       old_j[i]--;
189:     }
190:   }

192:   /* Make the factored matrix 0-based */
193:   if (ishift) {
194:     for (i=0; i<n+1; i++) {
195:       new_i[i]--;
196:     }
197:     for (i=new_i[0];i<new_i[n];i++) {
198:       new_j[i]--;
199:     }
200:   }

202:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203:   /*-- permute the right-hand-side and solution vectors           --*/
204:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
205:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
206:   ISGetIndices(isicol,&ic);
207:   for(i=0; i<n; i++) {
208:     ordcol[i] = ic[iperm[i]-1];
209:   };
210:   ISRestoreIndices(isicol,&ic);
211:   ISDestroy(isicol);

213:   PetscFree(iperm);

215:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
216:   PetscFree(ordcol);

218:   /*----- put together the new matrix -----*/

220:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
221:   (*fact)->factor    = FACTOR_LU;
222:   (*fact)->assembled = PETSC_TRUE;

224:   b = (Mat_SeqAIJ*)(*fact)->data;
225:   PetscFree(b->imax);
226:   b->sorted        = PETSC_FALSE;
227:   b->singlemalloc  = PETSC_FALSE;
228:   /* the next line frees the default space generated by the MatCreate() */
229:   ierr             = PetscFree(b->a);
230:   ierr             = PetscFree(b->ilen);
231:   b->a             = new_a;
232:   b->j             = new_j;
233:   b->i             = new_i;
234:   b->ilen          = 0;
235:   b->imax          = 0;
236:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
237:   b->row           = isirow;
238:   b->col           = iscolf;
239:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
240:   b->maxnz = b->nz = new_i[n];
241:   b->indexshift    = a->indexshift;
242:   MatMarkDiagonal_SeqAIJ(*fact);
243:   (*fact)->info.factor_mallocs = 0;

245:   MatMarkDiagonal_SeqAIJ(A);

247:   /* check out for identical nodes. If found, use inode functions */
248:   Mat_AIJ_CheckInode(*fact,PETSC_FALSE);

250:   af = ((double)b->nz)/((double)a->nz) + .001;
251:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %gn",info->fill,af);
252:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
253:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);n",af);
254:   PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.n");

256:   return(0);
257: }

259: /*
260:     Factorization code for AIJ format. 
261: */
262: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
263: {
264:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
265:   IS         isicol;
266:   int        *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
267:   int        *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift;
268:   int        *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
269:   PetscReal  f;

272:   if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
273:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
274:   ISGetIndices(isrow,&r);
275:   ISGetIndices(isicol,&ic);

277:   /* get new row pointers */
278:   PetscMalloc((n+1)*sizeof(int),&ainew);
279:   ainew[0] = -shift;
280:   /* don't know how many column pointers are needed so estimate */
281:   if (info) f = info->fill; else f = 1.0;
282:   jmax  = (int)(f*ai[n]+(!shift));
283:   PetscMalloc((jmax)*sizeof(int),&ajnew);
284:   /* fill is a linked list of nonzeros in active row */
285:   PetscMalloc((2*n+1)*sizeof(int),&fill);
286:   im   = fill + n + 1;
287:   /* idnew is location of diagonal in factor */
288:   PetscMalloc((n+1)*sizeof(int),&idnew);
289:   idnew[0] = -shift;

291:   for (i=0; i<n; i++) {
292:     /* first copy previous fill into linked list */
293:     nnz     = nz    = ai[r[i]+1] - ai[r[i]];
294:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
295:     ajtmp   = aj + ai[r[i]] + shift;
296:     fill[n] = n;
297:     while (nz--) {
298:       fm  = n;
299:       idx = ic[*ajtmp++ + shift];
300:       do {
301:         m  = fm;
302:         fm = fill[m];
303:       } while (fm < idx);
304:       fill[m]   = idx;
305:       fill[idx] = fm;
306:     }
307:     row = fill[n];
308:     while (row < i) {
309:       ajtmp = ajnew + idnew[row] + (!shift);
310:       nzbd  = 1 + idnew[row] - ainew[row];
311:       nz    = im[row] - nzbd;
312:       fm    = row;
313:       while (nz-- > 0) {
314:         idx = *ajtmp++ + shift;
315:         nzbd++;
316:         if (idx == i) im[row] = nzbd;
317:         do {
318:           m  = fm;
319:           fm = fill[m];
320:         } while (fm < idx);
321:         if (fm != idx) {
322:           fill[m]   = idx;
323:           fill[idx] = fm;
324:           fm        = idx;
325:           nnz++;
326:         }
327:       }
328:       row = fill[row];
329:     }
330:     /* copy new filled row into permanent storage */
331:     ainew[i+1] = ainew[i] + nnz;
332:     if (ainew[i+1] > jmax) {

334:       /* estimate how much additional space we will need */
335:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
336:       /* just double the memory each time */
337:       int maxadd = jmax;
338:       /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
339:       if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
340:       jmax += maxadd;

342:       /* allocate a longer ajnew */
343:       PetscMalloc(jmax*sizeof(int),&ajtmp);
344:       ierr  = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
345:       ierr  = PetscFree(ajnew);
346:       ajnew = ajtmp;
347:       realloc++; /* count how many times we realloc */
348:     }
349:     ajtmp = ajnew + ainew[i] + shift;
350:     fm    = fill[n];
351:     nzi   = 0;
352:     im[i] = nnz;
353:     while (nnz--) {
354:       if (fm < i) nzi++;
355:       *ajtmp++ = fm - shift;
356:       fm       = fill[fm];
357:     }
358:     idnew[i] = ainew[i] + nzi;
359:   }
360:   if (ai[n] != 0) {
361:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
362:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
363:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use n",af);
364:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);n",af);
365:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.n");
366:   } else {
367:     PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrixn");
368:   }

370:   ISRestoreIndices(isrow,&r);
371:   ISRestoreIndices(isicol,&ic);

373:   PetscFree(fill);

375:   /* put together the new matrix */
376:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);
377:   PetscLogObjectParent(*B,isicol);
378:   b = (Mat_SeqAIJ*)(*B)->data;
379:   PetscFree(b->imax);
380:   b->singlemalloc = PETSC_FALSE;
381:   /* the next line frees the default space generated by the Create() */
382:   PetscFree(b->a);
383:   PetscFree(b->ilen);
384:   ierr          = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);
385:   b->j          = ajnew;
386:   b->i          = ainew;
387:   b->diag       = idnew;
388:   b->ilen       = 0;
389:   b->imax       = 0;
390:   b->row        = isrow;
391:   b->col        = iscol;
392:   if (info) {
393:     b->lu_damp      = (PetscTruth) ((int)info->damp);
394:     b->lu_damping   = info->damping;
395:     b->lu_zeropivot = info->zeropivot;
396:   } else {
397:     b->lu_damp      = PETSC_FALSE;
398:     b->lu_damping   = 0.0;
399:     b->lu_zeropivot = 1.e-12;
400:   }
401:   ierr          = PetscObjectReference((PetscObject)isrow);
402:   ierr          = PetscObjectReference((PetscObject)iscol);
403:   b->icol       = isicol;
404:   ierr          = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
405:   /* In b structure:  Free imax, ilen, old a, old j.  
406:      Allocate idnew, solve_work, new a, new j */
407:   PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar)));
408:   b->maxnz = b->nz = ainew[n] + shift;

410:   (*B)->factor                 =  FACTOR_LU;
411:   (*B)->info.factor_mallocs    = realloc;
412:   (*B)->info.fill_ratio_given  = f;
413:   Mat_AIJ_CheckInode(*B,PETSC_FALSE);
414:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

416:   if (ai[n] != 0) {
417:     (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
418:   } else {
419:     (*B)->info.fill_ratio_needed = 0.0;
420:   }
421:   return(0);
422: }
423: /* ----------------------------------------------------------- */
424: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);

426: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
427: {
428:   Mat          C = *B;
429:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
430:   IS           isrow = b->row,isicol = b->icol;
431:   int          *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
432:   int          *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift;
433:   int          *diag_offset = b->diag,diag;
434:   int          *pj,ndamp = 0;
435:   PetscScalar  *rtmp,*v,*pc,multiplier,*pv,*rtmps;
436:   PetscReal    damping = b->lu_damping, zeropivot = b->lu_zeropivot;
437:   PetscTruth   damp;

440:   ierr  = ISGetIndices(isrow,&r);
441:   ierr  = ISGetIndices(isicol,&ic);
442:   ierr  = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
443:   ierr  = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
444:   rtmps = rtmp + shift; ics = ic + shift;

446:   do {
447:     damp = PETSC_FALSE;
448:     for (i=0; i<n; i++) {
449:       nz    = ai[i+1] - ai[i];
450:       ajtmp = aj + ai[i] + shift;
451:       for  (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;

453:       /* load in initial (unfactored row) */
454:       nz       = a->i[r[i]+1] - a->i[r[i]];
455:       ajtmpold = a->j + a->i[r[i]] + shift;
456:       v        = a->a + a->i[r[i]] + shift;
457:       for (j=0; j<nz; j++) {
458:         rtmp[ics[ajtmpold[j]]] = v[j];
459:         if (ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */
460:           rtmp[ics[ajtmpold[j]]] += damping;
461:         }
462:       }

464:       row = *ajtmp++ + shift;
465:       while  (row < i) {
466:         pc = rtmp + row;
467:         if (*pc != 0.0) {
468:           pv         = b->a + diag_offset[row] + shift;
469:           pj         = b->j + diag_offset[row] + (!shift);
470:           multiplier = *pc / *pv++;
471:           *pc        = multiplier;
472:           nz         = ai[row+1] - diag_offset[row] - 1;
473:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
474:           PetscLogFlops(2*nz);
475:         }
476:         row = *ajtmp++ + shift;
477:       }
478:       /* finished row so stick it into b->a */
479:       pv = b->a + ai[i] + shift;
480:       pj = b->j + ai[i] + shift;
481:       nz = ai[i+1] - ai[i];
482:       for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];}
483:       diag = diag_offset[i] - ai[i];
484:       if (PetscAbsScalar(pv[diag]) < zeropivot) {
485:         if (b->lu_damp) {
486:           damp = PETSC_TRUE;
487:           if (damping) damping *= 2.0;
488:           else damping = 1.e-12;
489:           ndamp++;
490:           break;
491:         } else {
492:           SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",i,PetscAbsScalar(pv[diag]),zeropivot);
493:         }
494:       }
495:     }
496:   } while (damp);

498:   /* invert diagonal entries for simplier triangular solves */
499:   for (i=0; i<n; i++) {
500:     b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
501:   }

503:   PetscFree(rtmp);
504:   ISRestoreIndices(isicol,&ic);
505:   ISRestoreIndices(isrow,&r);
506:   C->factor = FACTOR_LU;
507:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
508:   C->assembled = PETSC_TRUE;
509:   PetscLogFlops(C->n);
510:   if (ndamp || b->lu_damping) {
511:     PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %gn",ndamp,damping);
512:   }
513:   return(0);
514: }
515: /* ----------------------------------------------------------- */
516: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info)
517: {
519:   Mat C;

522:   MatLUFactorSymbolic(A,row,col,info,&C);
523:   MatLUFactorNumeric(A,&C);
524:   MatHeaderCopy(A,C);
525:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
526:   return(0);
527: }
528: /* ----------------------------------------------------------- */
529: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
530: {
531:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
532:   IS           iscol = a->col,isrow = a->row;
533:   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
534:   int          nz,shift = a->indexshift,*rout,*cout;
535:   PetscScalar  *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

538:   if (!n) return(0);

540:   VecGetArray(bb,&b);
541:   VecGetArray(xx,&x);
542:   tmp  = a->solve_work;

544:   ISGetIndices(isrow,&rout); r = rout;
545:   ISGetIndices(iscol,&cout); c = cout + (n-1);

547:   /* forward solve the lower triangular */
548:   tmp[0] = b[*r++];
549:   tmps   = tmp + shift;
550:   for (i=1; i<n; i++) {
551:     v   = aa + ai[i] + shift;
552:     vi  = aj + ai[i] + shift;
553:     nz  = a->diag[i] - ai[i];
554:     sum = b[*r++];
555:     while (nz--) sum -= *v++ * tmps[*vi++];
556:     tmp[i] = sum;
557:   }

559:   /* backward solve the upper triangular */
560:   for (i=n-1; i>=0; i--){
561:     v   = aa + a->diag[i] + (!shift);
562:     vi  = aj + a->diag[i] + (!shift);
563:     nz  = ai[i+1] - a->diag[i] - 1;
564:     sum = tmp[i];
565:     while (nz--) sum -= *v++ * tmps[*vi++];
566:     x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
567:   }

569:   ISRestoreIndices(isrow,&rout);
570:   ISRestoreIndices(iscol,&cout);
571:   VecRestoreArray(bb,&b);
572:   VecRestoreArray(xx,&x);
573:   PetscLogFlops(2*a->nz - A->n);
574:   return(0);
575: }

577: /* ----------------------------------------------------------- */
578: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
579: {
580:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
581:   int          n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
582:   PetscScalar  *x,*b,*aa = a->a;
583: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
584:   int          adiag_i,i,*vi,nz,ai_i;
585:   PetscScalar  *v,sum;
586: #endif

589:   if (!n) return(0);
590:   if (a->indexshift) {
591:      MatSolve_SeqAIJ(A,bb,xx);
592:      return(0);
593:   }

595:   VecGetArray(bb,&b);
596:   VecGetArray(xx,&x);

598: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
599:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
600: #else
601:   /* forward solve the lower triangular */
602:   x[0] = b[0];
603:   for (i=1; i<n; i++) {
604:     ai_i = ai[i];
605:     v    = aa + ai_i;
606:     vi   = aj + ai_i;
607:     nz   = adiag[i] - ai_i;
608:     sum  = b[i];
609:     while (nz--) sum -= *v++ * x[*vi++];
610:     x[i] = sum;
611:   }

613:   /* backward solve the upper triangular */
614:   for (i=n-1; i>=0; i--){
615:     adiag_i = adiag[i];
616:     v       = aa + adiag_i + 1;
617:     vi      = aj + adiag_i + 1;
618:     nz      = ai[i+1] - adiag_i - 1;
619:     sum     = x[i];
620:     while (nz--) sum -= *v++ * x[*vi++];
621:     x[i]    = sum*aa[adiag_i];
622:   }
623: #endif
624:   PetscLogFlops(2*a->nz - A->n);
625:   VecRestoreArray(bb,&b);
626:   VecRestoreArray(xx,&x);
627:   return(0);
628: }

630: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
631: {
632:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
633:   IS           iscol = a->col,isrow = a->row;
634:   int          *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
635:   int          nz,shift = a->indexshift,*rout,*cout;
636:   PetscScalar  *x,*b,*tmp,*aa = a->a,sum,*v;

639:   if (yy != xx) {VecCopy(yy,xx);}

641:   VecGetArray(bb,&b);
642:   VecGetArray(xx,&x);
643:   tmp  = a->solve_work;

645:   ISGetIndices(isrow,&rout); r = rout;
646:   ISGetIndices(iscol,&cout); c = cout + (n-1);

648:   /* forward solve the lower triangular */
649:   tmp[0] = b[*r++];
650:   for (i=1; i<n; i++) {
651:     v   = aa + ai[i] + shift;
652:     vi  = aj + ai[i] + shift;
653:     nz  = a->diag[i] - ai[i];
654:     sum = b[*r++];
655:     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
656:     tmp[i] = sum;
657:   }

659:   /* backward solve the upper triangular */
660:   for (i=n-1; i>=0; i--){
661:     v   = aa + a->diag[i] + (!shift);
662:     vi  = aj + a->diag[i] + (!shift);
663:     nz  = ai[i+1] - a->diag[i] - 1;
664:     sum = tmp[i];
665:     while (nz--) sum -= *v++ * tmp[*vi++ + shift];
666:     tmp[i] = sum*aa[a->diag[i]+shift];
667:     x[*c--] += tmp[i];
668:   }

670:   ISRestoreIndices(isrow,&rout);
671:   ISRestoreIndices(iscol,&cout);
672:   VecRestoreArray(bb,&b);
673:   VecRestoreArray(xx,&x);
674:   PetscLogFlops(2*a->nz);

676:   return(0);
677: }
678: /* -------------------------------------------------------------------*/
679: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
680: {
681:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
682:   IS           iscol = a->col,isrow = a->row;
683:   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
684:   int          nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
685:   PetscScalar  *x,*b,*tmp,*aa = a->a,*v,s1;

688:   VecGetArray(bb,&b);
689:   VecGetArray(xx,&x);
690:   tmp  = a->solve_work;

692:   ISGetIndices(isrow,&rout); r = rout;
693:   ISGetIndices(iscol,&cout); c = cout;

695:   /* copy the b into temp work space according to permutation */
696:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

698:   /* forward solve the U^T */
699:   for (i=0; i<n; i++) {
700:     v   = aa + diag[i] + shift;
701:     vi  = aj + diag[i] + (!shift);
702:     nz  = ai[i+1] - diag[i] - 1;
703:     s1  = tmp[i];
704:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
705:     while (nz--) {
706:       tmp[*vi++ + shift] -= (*v++)*s1;
707:     }
708:     tmp[i] = s1;
709:   }

711:   /* backward solve the L^T */
712:   for (i=n-1; i>=0; i--){
713:     v   = aa + diag[i] - 1 + shift;
714:     vi  = aj + diag[i] - 1 + shift;
715:     nz  = diag[i] - ai[i];
716:     s1  = tmp[i];
717:     while (nz--) {
718:       tmp[*vi-- + shift] -= (*v--)*s1;
719:     }
720:   }

722:   /* copy tmp into x according to permutation */
723:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

725:   ISRestoreIndices(isrow,&rout);
726:   ISRestoreIndices(iscol,&cout);
727:   VecRestoreArray(bb,&b);
728:   VecRestoreArray(xx,&x);

730:   PetscLogFlops(2*a->nz-A->n);
731:   return(0);
732: }

734: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
735: {
736:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
737:   IS           iscol = a->col,isrow = a->row;
738:   int          *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
739:   int          nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
740:   PetscScalar  *x,*b,*tmp,*aa = a->a,*v;

743:   if (zz != xx) VecCopy(zz,xx);

745:   VecGetArray(bb,&b);
746:   VecGetArray(xx,&x);
747:   tmp = a->solve_work;

749:   ISGetIndices(isrow,&rout); r = rout;
750:   ISGetIndices(iscol,&cout); c = cout;

752:   /* copy the b into temp work space according to permutation */
753:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

755:   /* forward solve the U^T */
756:   for (i=0; i<n; i++) {
757:     v   = aa + diag[i] + shift;
758:     vi  = aj + diag[i] + (!shift);
759:     nz  = ai[i+1] - diag[i] - 1;
760:     tmp[i] *= *v++;
761:     while (nz--) {
762:       tmp[*vi++ + shift] -= (*v++)*tmp[i];
763:     }
764:   }

766:   /* backward solve the L^T */
767:   for (i=n-1; i>=0; i--){
768:     v   = aa + diag[i] - 1 + shift;
769:     vi  = aj + diag[i] - 1 + shift;
770:     nz  = diag[i] - ai[i];
771:     while (nz--) {
772:       tmp[*vi-- + shift] -= (*v--)*tmp[i];
773:     }
774:   }

776:   /* copy tmp into x according to permutation */
777:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

779:   ISRestoreIndices(isrow,&rout);
780:   ISRestoreIndices(iscol,&cout);
781:   VecRestoreArray(bb,&b);
782:   VecRestoreArray(xx,&x);

784:   PetscLogFlops(2*a->nz);
785:   return(0);
786: }
787: /* ----------------------------------------------------------------*/
788: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);

790: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
791: {
792:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
793:   IS         isicol;
794:   int        *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
795:   int        *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
796:   int        *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
797:   int        incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
798:   PetscTruth col_identity,row_identity;
799:   PetscReal  f;
800: 
802:   if (info) {
803:     f             = info->fill;
804:     levels        = (int)info->levels;
805:     diagonal_fill = (int)info->diagonal_fill;
806:   } else {
807:     f             = 1.0;
808:     levels        = 0;
809:     diagonal_fill = 0;
810:   }
811:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

813:   /* special case that simply copies fill pattern */
814:   ISIdentity(isrow,&row_identity);
815:   ISIdentity(iscol,&col_identity);
816:   if (!levels && row_identity && col_identity) {
817:     MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
818:     (*fact)->factor = FACTOR_LU;
819:     b               = (Mat_SeqAIJ*)(*fact)->data;
820:     if (!b->diag) {
821:       MatMarkDiagonal_SeqAIJ(*fact);
822:     }
823:     MatMissingDiagonal_SeqAIJ(*fact);
824:     b->row              = isrow;
825:     b->col              = iscol;
826:     b->icol             = isicol;
827:     if (info) {
828:       b->lu_damp      = (PetscTruth)((int)info->damp);
829:       b->lu_damping   = info->damping;
830:       b->lu_zeropivot = info->zeropivot;
831:     } else {
832:       b->lu_damp      = PETSC_FALSE;
833:       b->lu_damping   = 0.0;
834:       b->lu_zeropivot = 1.e-12;
835:     }
836:     ierr                = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
837:     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
838:     ierr                = PetscObjectReference((PetscObject)isrow);
839:     ierr                = PetscObjectReference((PetscObject)iscol);
840:     return(0);
841:   }

843:   ISGetIndices(isrow,&r);
844:   ISGetIndices(isicol,&ic);

846:   /* get new row pointers */
847:   PetscMalloc((n+1)*sizeof(int),&ainew);
848:   ainew[0] = -shift;
849:   /* don't know how many column pointers are needed so estimate */
850:   jmax = (int)(f*(ai[n]+!shift));
851:   PetscMalloc((jmax)*sizeof(int),&ajnew);
852:   /* ajfill is level of fill for each fill entry */
853:   PetscMalloc((jmax)*sizeof(int),&ajfill);
854:   /* fill is a linked list of nonzeros in active row */
855:   PetscMalloc((n+1)*sizeof(int),&fill);
856:   /* im is level for each filled value */
857:   PetscMalloc((n+1)*sizeof(int),&im);
858:   /* dloc is location of diagonal in factor */
859:   PetscMalloc((n+1)*sizeof(int),&dloc);
860:   dloc[0]  = 0;
861:   for (prow=0; prow<n; prow++) {

863:     /* copy current row into linked list */
864:     nzf     = nz  = ai[r[prow]+1] - ai[r[prow]];
865:     if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
866:     xi      = aj + ai[r[prow]] + shift;
867:     fill[n]    = n;
868:     fill[prow] = -1; /* marker to indicate if diagonal exists */
869:     while (nz--) {
870:       fm  = n;
871:       idx = ic[*xi++ + shift];
872:       do {
873:         m  = fm;
874:         fm = fill[m];
875:       } while (fm < idx);
876:       fill[m]   = idx;
877:       fill[idx] = fm;
878:       im[idx]   = 0;
879:     }

881:     /* make sure diagonal entry is included */
882:     if (diagonal_fill && fill[prow] == -1) {
883:       fm = n;
884:       while (fill[fm] < prow) fm = fill[fm];
885:       fill[prow] = fill[fm]; /* insert diagonal into linked list */
886:       fill[fm]   = prow;
887:       im[prow]   = 0;
888:       nzf++;
889:       dcount++;
890:     }

892:     nzi = 0;
893:     row = fill[n];
894:     while (row < prow) {
895:       incrlev = im[row] + 1;
896:       nz      = dloc[row];
897:       xi      = ajnew  + ainew[row] + shift + nz + 1;
898:       flev    = ajfill + ainew[row] + shift + nz + 1;
899:       nnz     = ainew[row+1] - ainew[row] - nz - 1;
900:       fm      = row;
901:       while (nnz-- > 0) {
902:         idx = *xi++ + shift;
903:         if (*flev + incrlev > levels) {
904:           flev++;
905:           continue;
906:         }
907:         do {
908:           m  = fm;
909:           fm = fill[m];
910:         } while (fm < idx);
911:         if (fm != idx) {
912:           im[idx]   = *flev + incrlev;
913:           fill[m]   = idx;
914:           fill[idx] = fm;
915:           fm        = idx;
916:           nzf++;
917:         } else {
918:           if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
919:         }
920:         flev++;
921:       }
922:       row = fill[row];
923:       nzi++;
924:     }
925:     /* copy new filled row into permanent storage */
926:     ainew[prow+1] = ainew[prow] + nzf;
927:     if (ainew[prow+1] > jmax-shift) {

929:       /* estimate how much additional space we will need */
930:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
931:       /* just double the memory each time */
932:       /*  maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
933:       int maxadd = jmax;
934:       if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
935:       jmax += maxadd;

937:       /* allocate a longer ajnew and ajfill */
938:       ierr   = PetscMalloc(jmax*sizeof(int),&xi);
939:       ierr   = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
940:       ierr   = PetscFree(ajnew);
941:       ajnew  = xi;
942:       ierr   = PetscMalloc(jmax*sizeof(int),&xi);
943:       ierr   = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
944:       ierr   = PetscFree(ajfill);
945:       ajfill = xi;
946:       realloc++; /* count how many times we realloc */
947:     }
948:     xi          = ajnew + ainew[prow] + shift;
949:     flev        = ajfill + ainew[prow] + shift;
950:     dloc[prow]  = nzi;
951:     fm          = fill[n];
952:     while (nzf--) {
953:       *xi++   = fm - shift;
954:       *flev++ = im[fm];
955:       fm      = fill[fm];
956:     }
957:     /* make sure row has diagonal entry */
958:     if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
959:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrixn
960:     try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
961:     }
962:   }
963:   PetscFree(ajfill);
964:   ISRestoreIndices(isrow,&r);
965:   ISRestoreIndices(isicol,&ic);
966:   PetscFree(fill);
967:   PetscFree(im);

969:   {
970:     PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
971:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
972:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use n",af);
973:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);n",af);
974:     PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.n");
975:     if (diagonal_fill) {
976:       PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
977:     }
978:   }

980:   /* put together the new matrix */
981:   MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
982:   PetscLogObjectParent(*fact,isicol);
983:   b = (Mat_SeqAIJ*)(*fact)->data;
984:   PetscFree(b->imax);
985:   b->singlemalloc = PETSC_FALSE;
986:   len = (ainew[n] + shift)*sizeof(PetscScalar);
987:   /* the next line frees the default space generated by the Create() */
988:   PetscFree(b->a);
989:   PetscFree(b->ilen);
990:   PetscMalloc(len+1,&b->a);
991:   b->j          = ajnew;
992:   b->i          = ainew;
993:   for (i=0; i<n; i++) dloc[i] += ainew[i];
994:   b->diag       = dloc;
995:   b->ilen       = 0;
996:   b->imax       = 0;
997:   b->row        = isrow;
998:   b->col        = iscol;
999:   ierr          = PetscObjectReference((PetscObject)isrow);
1000:   ierr          = PetscObjectReference((PetscObject)iscol);
1001:   b->icol       = isicol;
1002:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1003:   /* In b structure:  Free imax, ilen, old a, old j.  
1004:      Allocate dloc, solve_work, new a, new j */
1005:   PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar)));
1006:   b->maxnz          = b->nz = ainew[n] + shift;
1007:   if (info) {
1008:     b->lu_damp      = (PetscTruth)((int)info->damp);
1009:     b->lu_damping   = info->damping;
1010:     b->lu_zeropivot = info->zeropivot;
1011:   } else {
1012:     b->lu_damp      = PETSC_FALSE;
1013:     b->lu_damping   = 0.0;
1014:     b->lu_zeropivot = 1.e-12;
1015:   }
1016:   (*fact)->factor   = FACTOR_LU;
1017:   Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
1018:   (*fact)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

1020:   (*fact)->info.factor_mallocs    = realloc;
1021:   (*fact)->info.fill_ratio_given  = f;
1022:   (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1023:   (*fact)->factor                 =  FACTOR_LU;
1024:   return(0);
1025: }