Actual source code: aijfact.c
1: /*$Id: aijfact.c,v 1.167 2001/09/11 16:32:26 bsmith Exp $*/
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/dot.h
7: int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
8: {
11: SETERRQ(PETSC_ERR_SUP,"Code not written");
12: #if !defined(PETSC_USE_DEBUG)
13: return(0);
14: #endif
15: }
18: EXTERN int MatMarkDiagonal_SeqAIJ(Mat);
19: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
21: EXTERN int SPARSEKIT2dperm(int*,PetscScalar*,int*,int*,PetscScalar*,int*,int*,int*,int*,int*);
22: EXTERN int SPARSEKIT2ilutp(int*,PetscScalar*,int*,int*,int*,PetscReal,PetscReal*,int*,PetscScalar*,int*,int*,int*,PetscScalar*,int*,int*,int*);
23: EXTERN int SPARSEKIT2msrcsr(int*,PetscScalar*,int*,PetscScalar*,int*,int*,PetscScalar*,int*);
25: /* ------------------------------------------------------------
27: This interface was contribed by Tony Caola
29: This routine is an interface to the pivoting drop-tolerance
30: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
31: SPARSEKIT2.
33: The SPARSEKIT2 routines used here are covered by the GNU
34: copyright; see the file gnu in this directory.
36: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
37: help in getting this routine ironed out.
39: The major drawback to this routine is that if info->fill is
40: not large enough it fails rather than allocating more space;
41: this can be fixed by hacking/improving the f2c version of
42: Yousef Saad's code.
44: ishift = 0, for indices start at 1
45: ishift = 1, for indices starting at 0
46: ------------------------------------------------------------
47: */
49: int MatILUDTFactor_SeqAIJ(Mat A,MatILUInfo *info,IS isrow,IS iscol,Mat *fact)
50: {
51: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
52: IS iscolf,isicol,isirow;
53: PetscTruth reorder;
54: int *c,*r,*ic,ierr,i,n = A->m;
55: int *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
56: int *ordcol,*iwk,*iperm,*jw;
57: int ishift = !a->indexshift;
58: int jmax,lfill,job,*o_i,*o_j;
59: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
60: PetscReal permtol,af;
64: if (info->dt == PETSC_DEFAULT) info->dt = .005;
65: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (int)(1.5*a->rmax);
66: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
67: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
68: lfill = (int)(info->dtcount/2.0);
69: jmax = (int)(info->fill*a->nz);
70: permtol = info->dtcol;
73: /* ------------------------------------------------------------
74: If reorder=.TRUE., then the original matrix has to be
75: reordered to reflect the user selected ordering scheme, and
76: then de-reordered so it is in it's original format.
77: Because Saad's dperm() is NOT in place, we have to copy
78: the original matrix and allocate more storage. . .
79: ------------------------------------------------------------
80: */
82: /* set reorder to true if either isrow or iscol is not identity */
83: ISIdentity(isrow,&reorder);
84: if (reorder) {ISIdentity(iscol,&reorder);}
85: reorder = PetscNot(reorder);
87:
88: /* storage for ilu factor */
89: PetscMalloc((n+1)*sizeof(int),&new_i);
90: PetscMalloc(jmax*sizeof(int),&new_j);
91: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
92: PetscMalloc(n*sizeof(int),&ordcol);
94: /* ------------------------------------------------------------
95: Make sure that everything is Fortran formatted (1-Based)
96: ------------------------------------------------------------
97: */
98: if (ishift) {
99: for (i=old_i[0];i<old_i[n];i++) {
100: old_j[i]++;
101: }
102: for(i=0;i<n+1;i++) {
103: old_i[i]++;
104: };
105: };
107: if (reorder) {
108: ISGetIndices(iscol,&c);
109: ISGetIndices(isrow,&r);
110: for(i=0;i<n;i++) {
111: r[i] = r[i]+1;
112: c[i] = c[i]+1;
113: }
114: PetscMalloc((n+1)*sizeof(int),&old_i2);
115: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(int),&old_j2);
116: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
117: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
118: for (i=0;i<n;i++) {
119: r[i] = r[i]-1;
120: c[i] = c[i]-1;
121: }
122: ISRestoreIndices(iscol,&c);
123: ISRestoreIndices(isrow,&r);
124: o_a = old_a2;
125: o_j = old_j2;
126: o_i = old_i2;
127: } else {
128: o_a = old_a;
129: o_j = old_j;
130: o_i = old_i;
131: }
133: /* ------------------------------------------------------------
134: Call Saad's ilutp() routine to generate the factorization
135: ------------------------------------------------------------
136: */
138: PetscMalloc(2*n*sizeof(int),&iperm);
139: PetscMalloc(2*n*sizeof(int),&jw);
140: PetscMalloc(n*sizeof(PetscScalar),&w);
142: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&permtol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&ierr);
143: if (ierr) {
144: switch (ierr) {
145: case -3: SETERRQ2(1,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
146: case -2: SETERRQ2(1,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %d",info->fill,jmax);
147: case -5: SETERRQ(1,"ilutp(), zero row encountered");
148: case -1: SETERRQ(1,"ilutp(), input matrix may be wrong");
149: case -4: SETERRQ1(1,"ilutp(), illegal info->fill value %d",jmax);
150: default: SETERRQ1(1,"ilutp(), zero pivot detected on row %d",ierr);
151: }
152: }
154: PetscFree(w);
155: PetscFree(jw);
157: /* ------------------------------------------------------------
158: Saad's routine gives the result in Modified Sparse Row (msr)
159: Convert to Compressed Sparse Row format (csr)
160: ------------------------------------------------------------
161: */
163: PetscMalloc(n*sizeof(PetscScalar),&wk);
164: PetscMalloc((n+1)*sizeof(int),&iwk);
166: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
168: PetscFree(iwk);
169: PetscFree(wk);
171: if (reorder) {
172: PetscFree(old_a2);
173: PetscFree(old_j2);
174: PetscFree(old_i2);
175: } else {
176: /* fix permutation of old_j that the factorization introduced */
177: for (i=old_i[0]; i<old_i[n]; i++) {
178: old_j[i-1] = iperm[old_j[i-1]-1];
179: }
180: }
182: /* get rid of the shift to indices starting at 1 */
183: if (ishift) {
184: for (i=0; i<n+1; i++) {
185: old_i[i]--;
186: }
187: for (i=old_i[0];i<old_i[n];i++) {
188: old_j[i]--;
189: }
190: }
192: /* Make the factored matrix 0-based */
193: if (ishift) {
194: for (i=0; i<n+1; i++) {
195: new_i[i]--;
196: }
197: for (i=new_i[0];i<new_i[n];i++) {
198: new_j[i]--;
199: }
200: }
202: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
203: /*-- permute the right-hand-side and solution vectors --*/
204: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
205: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
206: ISGetIndices(isicol,&ic);
207: for(i=0; i<n; i++) {
208: ordcol[i] = ic[iperm[i]-1];
209: };
210: ISRestoreIndices(isicol,&ic);
211: ISDestroy(isicol);
213: PetscFree(iperm);
215: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
216: PetscFree(ordcol);
218: /*----- put together the new matrix -----*/
220: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
221: (*fact)->factor = FACTOR_LU;
222: (*fact)->assembled = PETSC_TRUE;
224: b = (Mat_SeqAIJ*)(*fact)->data;
225: PetscFree(b->imax);
226: b->sorted = PETSC_FALSE;
227: b->singlemalloc = PETSC_FALSE;
228: /* the next line frees the default space generated by the MatCreate() */
229: ierr = PetscFree(b->a);
230: ierr = PetscFree(b->ilen);
231: b->a = new_a;
232: b->j = new_j;
233: b->i = new_i;
234: b->ilen = 0;
235: b->imax = 0;
236: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
237: b->row = isirow;
238: b->col = iscolf;
239: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
240: b->maxnz = b->nz = new_i[n];
241: b->indexshift = a->indexshift;
242: MatMarkDiagonal_SeqAIJ(*fact);
243: (*fact)->info.factor_mallocs = 0;
245: MatMarkDiagonal_SeqAIJ(A);
247: /* check out for identical nodes. If found, use inode functions */
248: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
250: af = ((double)b->nz)/((double)a->nz) + .001;
251: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %gn",info->fill,af);
252: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use n",af);
253: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);n",af);
254: PetscLogInfo(A,"MatILUDTFactor_SeqAIJ:for best performance.n");
256: return(0);
257: }
259: /*
260: Factorization code for AIJ format.
261: */
262: int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatLUInfo *info,Mat *B)
263: {
264: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
265: IS isicol;
266: int *r,*ic,ierr,i,n = A->m,*ai = a->i,*aj = a->j;
267: int *ainew,*ajnew,jmax,*fill,*ajtmp,nz,shift = a->indexshift;
268: int *idnew,idx,row,m,fm,nnz,nzi,realloc = 0,nzbd,*im;
269: PetscReal f;
272: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
273: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
274: ISGetIndices(isrow,&r);
275: ISGetIndices(isicol,&ic);
277: /* get new row pointers */
278: PetscMalloc((n+1)*sizeof(int),&ainew);
279: ainew[0] = -shift;
280: /* don't know how many column pointers are needed so estimate */
281: if (info) f = info->fill; else f = 1.0;
282: jmax = (int)(f*ai[n]+(!shift));
283: PetscMalloc((jmax)*sizeof(int),&ajnew);
284: /* fill is a linked list of nonzeros in active row */
285: PetscMalloc((2*n+1)*sizeof(int),&fill);
286: im = fill + n + 1;
287: /* idnew is location of diagonal in factor */
288: PetscMalloc((n+1)*sizeof(int),&idnew);
289: idnew[0] = -shift;
291: for (i=0; i<n; i++) {
292: /* first copy previous fill into linked list */
293: nnz = nz = ai[r[i]+1] - ai[r[i]];
294: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
295: ajtmp = aj + ai[r[i]] + shift;
296: fill[n] = n;
297: while (nz--) {
298: fm = n;
299: idx = ic[*ajtmp++ + shift];
300: do {
301: m = fm;
302: fm = fill[m];
303: } while (fm < idx);
304: fill[m] = idx;
305: fill[idx] = fm;
306: }
307: row = fill[n];
308: while (row < i) {
309: ajtmp = ajnew + idnew[row] + (!shift);
310: nzbd = 1 + idnew[row] - ainew[row];
311: nz = im[row] - nzbd;
312: fm = row;
313: while (nz-- > 0) {
314: idx = *ajtmp++ + shift;
315: nzbd++;
316: if (idx == i) im[row] = nzbd;
317: do {
318: m = fm;
319: fm = fill[m];
320: } while (fm < idx);
321: if (fm != idx) {
322: fill[m] = idx;
323: fill[idx] = fm;
324: fm = idx;
325: nnz++;
326: }
327: }
328: row = fill[row];
329: }
330: /* copy new filled row into permanent storage */
331: ainew[i+1] = ainew[i] + nnz;
332: if (ainew[i+1] > jmax) {
334: /* estimate how much additional space we will need */
335: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
336: /* just double the memory each time */
337: int maxadd = jmax;
338: /* maxadd = (int)((f*(ai[n]+(!shift))*(n-i+5))/n); */
339: if (maxadd < nnz) maxadd = (n-i)*(nnz+1);
340: jmax += maxadd;
342: /* allocate a longer ajnew */
343: PetscMalloc(jmax*sizeof(int),&ajtmp);
344: ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));
345: ierr = PetscFree(ajnew);
346: ajnew = ajtmp;
347: realloc++; /* count how many times we realloc */
348: }
349: ajtmp = ajnew + ainew[i] + shift;
350: fm = fill[n];
351: nzi = 0;
352: im[i] = nnz;
353: while (nnz--) {
354: if (fm < i) nzi++;
355: *ajtmp++ = fm - shift;
356: fm = fill[fm];
357: }
358: idnew[i] = ainew[i] + nzi;
359: }
360: if (ai[n] != 0) {
361: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
362: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
363: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use n",af);
364: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);n",af);
365: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.n");
366: } else {
367: PetscLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrixn");
368: }
370: ISRestoreIndices(isrow,&r);
371: ISRestoreIndices(isicol,&ic);
373: PetscFree(fill);
375: /* put together the new matrix */
376: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);
377: PetscLogObjectParent(*B,isicol);
378: b = (Mat_SeqAIJ*)(*B)->data;
379: PetscFree(b->imax);
380: b->singlemalloc = PETSC_FALSE;
381: /* the next line frees the default space generated by the Create() */
382: PetscFree(b->a);
383: PetscFree(b->ilen);
384: ierr = PetscMalloc((ainew[n]+shift+1)*sizeof(PetscScalar),&b->a);
385: b->j = ajnew;
386: b->i = ainew;
387: b->diag = idnew;
388: b->ilen = 0;
389: b->imax = 0;
390: b->row = isrow;
391: b->col = iscol;
392: if (info) {
393: b->lu_damp = (PetscTruth) ((int)info->damp);
394: b->lu_damping = info->damping;
395: b->lu_zeropivot = info->zeropivot;
396: } else {
397: b->lu_damp = PETSC_FALSE;
398: b->lu_damping = 0.0;
399: b->lu_zeropivot = 1.e-12;
400: }
401: ierr = PetscObjectReference((PetscObject)isrow);
402: ierr = PetscObjectReference((PetscObject)iscol);
403: b->icol = isicol;
404: ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
405: /* In b structure: Free imax, ilen, old a, old j.
406: Allocate idnew, solve_work, new a, new j */
407: PetscLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(PetscScalar)));
408: b->maxnz = b->nz = ainew[n] + shift;
410: (*B)->factor = FACTOR_LU;
411: (*B)->info.factor_mallocs = realloc;
412: (*B)->info.fill_ratio_given = f;
413: Mat_AIJ_CheckInode(*B,PETSC_FALSE);
414: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
416: if (ai[n] != 0) {
417: (*B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
418: } else {
419: (*B)->info.fill_ratio_needed = 0.0;
420: }
421: return(0);
422: }
423: /* ----------------------------------------------------------- */
424: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
426: int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B)
427: {
428: Mat C = *B;
429: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
430: IS isrow = b->row,isicol = b->icol;
431: int *r,*ic,ierr,i,j,n = A->m,*ai = b->i,*aj = b->j;
432: int *ajtmpold,*ajtmp,nz,row,*ics,shift = a->indexshift;
433: int *diag_offset = b->diag,diag;
434: int *pj,ndamp = 0;
435: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
436: PetscReal damping = b->lu_damping, zeropivot = b->lu_zeropivot;
437: PetscTruth damp;
440: ierr = ISGetIndices(isrow,&r);
441: ierr = ISGetIndices(isicol,&ic);
442: ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
443: ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
444: rtmps = rtmp + shift; ics = ic + shift;
446: do {
447: damp = PETSC_FALSE;
448: for (i=0; i<n; i++) {
449: nz = ai[i+1] - ai[i];
450: ajtmp = aj + ai[i] + shift;
451: for (j=0; j<nz; j++) rtmps[ajtmp[j]] = 0.0;
453: /* load in initial (unfactored row) */
454: nz = a->i[r[i]+1] - a->i[r[i]];
455: ajtmpold = a->j + a->i[r[i]] + shift;
456: v = a->a + a->i[r[i]] + shift;
457: for (j=0; j<nz; j++) {
458: rtmp[ics[ajtmpold[j]]] = v[j];
459: if (ajtmpold[j] == r[i]) { /* damp the diagonal of the matrix */
460: rtmp[ics[ajtmpold[j]]] += damping;
461: }
462: }
464: row = *ajtmp++ + shift;
465: while (row < i) {
466: pc = rtmp + row;
467: if (*pc != 0.0) {
468: pv = b->a + diag_offset[row] + shift;
469: pj = b->j + diag_offset[row] + (!shift);
470: multiplier = *pc / *pv++;
471: *pc = multiplier;
472: nz = ai[row+1] - diag_offset[row] - 1;
473: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
474: PetscLogFlops(2*nz);
475: }
476: row = *ajtmp++ + shift;
477: }
478: /* finished row so stick it into b->a */
479: pv = b->a + ai[i] + shift;
480: pj = b->j + ai[i] + shift;
481: nz = ai[i+1] - ai[i];
482: for (j=0; j<nz; j++) {pv[j] = rtmps[pj[j]];}
483: diag = diag_offset[i] - ai[i];
484: if (PetscAbsScalar(pv[diag]) < zeropivot) {
485: if (b->lu_damp) {
486: damp = PETSC_TRUE;
487: if (damping) damping *= 2.0;
488: else damping = 1.e-12;
489: ndamp++;
490: break;
491: } else {
492: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",i,PetscAbsScalar(pv[diag]),zeropivot);
493: }
494: }
495: }
496: } while (damp);
498: /* invert diagonal entries for simplier triangular solves */
499: for (i=0; i<n; i++) {
500: b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift];
501: }
503: PetscFree(rtmp);
504: ISRestoreIndices(isicol,&ic);
505: ISRestoreIndices(isrow,&r);
506: C->factor = FACTOR_LU;
507: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
508: C->assembled = PETSC_TRUE;
509: PetscLogFlops(C->n);
510: if (ndamp || b->lu_damping) {
511: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ: number of damping tries %d damping value %gn",ndamp,damping);
512: }
513: return(0);
514: }
515: /* ----------------------------------------------------------- */
516: int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatLUInfo *info)
517: {
519: Mat C;
522: MatLUFactorSymbolic(A,row,col,info,&C);
523: MatLUFactorNumeric(A,&C);
524: MatHeaderCopy(A,C);
525: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
526: return(0);
527: }
528: /* ----------------------------------------------------------- */
529: int MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
530: {
531: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
532: IS iscol = a->col,isrow = a->row;
533: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
534: int nz,shift = a->indexshift,*rout,*cout;
535: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
538: if (!n) return(0);
540: VecGetArray(bb,&b);
541: VecGetArray(xx,&x);
542: tmp = a->solve_work;
544: ISGetIndices(isrow,&rout); r = rout;
545: ISGetIndices(iscol,&cout); c = cout + (n-1);
547: /* forward solve the lower triangular */
548: tmp[0] = b[*r++];
549: tmps = tmp + shift;
550: for (i=1; i<n; i++) {
551: v = aa + ai[i] + shift;
552: vi = aj + ai[i] + shift;
553: nz = a->diag[i] - ai[i];
554: sum = b[*r++];
555: while (nz--) sum -= *v++ * tmps[*vi++];
556: tmp[i] = sum;
557: }
559: /* backward solve the upper triangular */
560: for (i=n-1; i>=0; i--){
561: v = aa + a->diag[i] + (!shift);
562: vi = aj + a->diag[i] + (!shift);
563: nz = ai[i+1] - a->diag[i] - 1;
564: sum = tmp[i];
565: while (nz--) sum -= *v++ * tmps[*vi++];
566: x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift];
567: }
569: ISRestoreIndices(isrow,&rout);
570: ISRestoreIndices(iscol,&cout);
571: VecRestoreArray(bb,&b);
572: VecRestoreArray(xx,&x);
573: PetscLogFlops(2*a->nz - A->n);
574: return(0);
575: }
577: /* ----------------------------------------------------------- */
578: int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
579: {
580: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
581: int n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag,ierr;
582: PetscScalar *x,*b,*aa = a->a;
583: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
584: int adiag_i,i,*vi,nz,ai_i;
585: PetscScalar *v,sum;
586: #endif
589: if (!n) return(0);
590: if (a->indexshift) {
591: MatSolve_SeqAIJ(A,bb,xx);
592: return(0);
593: }
595: VecGetArray(bb,&b);
596: VecGetArray(xx,&x);
598: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
599: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
600: #else
601: /* forward solve the lower triangular */
602: x[0] = b[0];
603: for (i=1; i<n; i++) {
604: ai_i = ai[i];
605: v = aa + ai_i;
606: vi = aj + ai_i;
607: nz = adiag[i] - ai_i;
608: sum = b[i];
609: while (nz--) sum -= *v++ * x[*vi++];
610: x[i] = sum;
611: }
613: /* backward solve the upper triangular */
614: for (i=n-1; i>=0; i--){
615: adiag_i = adiag[i];
616: v = aa + adiag_i + 1;
617: vi = aj + adiag_i + 1;
618: nz = ai[i+1] - adiag_i - 1;
619: sum = x[i];
620: while (nz--) sum -= *v++ * x[*vi++];
621: x[i] = sum*aa[adiag_i];
622: }
623: #endif
624: PetscLogFlops(2*a->nz - A->n);
625: VecRestoreArray(bb,&b);
626: VecRestoreArray(xx,&x);
627: return(0);
628: }
630: int MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
631: {
632: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
633: IS iscol = a->col,isrow = a->row;
634: int *r,*c,ierr,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
635: int nz,shift = a->indexshift,*rout,*cout;
636: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
639: if (yy != xx) {VecCopy(yy,xx);}
641: VecGetArray(bb,&b);
642: VecGetArray(xx,&x);
643: tmp = a->solve_work;
645: ISGetIndices(isrow,&rout); r = rout;
646: ISGetIndices(iscol,&cout); c = cout + (n-1);
648: /* forward solve the lower triangular */
649: tmp[0] = b[*r++];
650: for (i=1; i<n; i++) {
651: v = aa + ai[i] + shift;
652: vi = aj + ai[i] + shift;
653: nz = a->diag[i] - ai[i];
654: sum = b[*r++];
655: while (nz--) sum -= *v++ * tmp[*vi++ + shift];
656: tmp[i] = sum;
657: }
659: /* backward solve the upper triangular */
660: for (i=n-1; i>=0; i--){
661: v = aa + a->diag[i] + (!shift);
662: vi = aj + a->diag[i] + (!shift);
663: nz = ai[i+1] - a->diag[i] - 1;
664: sum = tmp[i];
665: while (nz--) sum -= *v++ * tmp[*vi++ + shift];
666: tmp[i] = sum*aa[a->diag[i]+shift];
667: x[*c--] += tmp[i];
668: }
670: ISRestoreIndices(isrow,&rout);
671: ISRestoreIndices(iscol,&cout);
672: VecRestoreArray(bb,&b);
673: VecRestoreArray(xx,&x);
674: PetscLogFlops(2*a->nz);
676: return(0);
677: }
678: /* -------------------------------------------------------------------*/
679: int MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
680: {
681: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
682: IS iscol = a->col,isrow = a->row;
683: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
684: int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
685: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
688: VecGetArray(bb,&b);
689: VecGetArray(xx,&x);
690: tmp = a->solve_work;
692: ISGetIndices(isrow,&rout); r = rout;
693: ISGetIndices(iscol,&cout); c = cout;
695: /* copy the b into temp work space according to permutation */
696: for (i=0; i<n; i++) tmp[i] = b[c[i]];
698: /* forward solve the U^T */
699: for (i=0; i<n; i++) {
700: v = aa + diag[i] + shift;
701: vi = aj + diag[i] + (!shift);
702: nz = ai[i+1] - diag[i] - 1;
703: s1 = tmp[i];
704: s1 *= (*v++); /* multiply by inverse of diagonal entry */
705: while (nz--) {
706: tmp[*vi++ + shift] -= (*v++)*s1;
707: }
708: tmp[i] = s1;
709: }
711: /* backward solve the L^T */
712: for (i=n-1; i>=0; i--){
713: v = aa + diag[i] - 1 + shift;
714: vi = aj + diag[i] - 1 + shift;
715: nz = diag[i] - ai[i];
716: s1 = tmp[i];
717: while (nz--) {
718: tmp[*vi-- + shift] -= (*v--)*s1;
719: }
720: }
722: /* copy tmp into x according to permutation */
723: for (i=0; i<n; i++) x[r[i]] = tmp[i];
725: ISRestoreIndices(isrow,&rout);
726: ISRestoreIndices(iscol,&cout);
727: VecRestoreArray(bb,&b);
728: VecRestoreArray(xx,&x);
730: PetscLogFlops(2*a->nz-A->n);
731: return(0);
732: }
734: int MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
735: {
736: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
737: IS iscol = a->col,isrow = a->row;
738: int *r,*c,ierr,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
739: int nz,shift = a->indexshift,*rout,*cout,*diag = a->diag;
740: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
743: if (zz != xx) VecCopy(zz,xx);
745: VecGetArray(bb,&b);
746: VecGetArray(xx,&x);
747: tmp = a->solve_work;
749: ISGetIndices(isrow,&rout); r = rout;
750: ISGetIndices(iscol,&cout); c = cout;
752: /* copy the b into temp work space according to permutation */
753: for (i=0; i<n; i++) tmp[i] = b[c[i]];
755: /* forward solve the U^T */
756: for (i=0; i<n; i++) {
757: v = aa + diag[i] + shift;
758: vi = aj + diag[i] + (!shift);
759: nz = ai[i+1] - diag[i] - 1;
760: tmp[i] *= *v++;
761: while (nz--) {
762: tmp[*vi++ + shift] -= (*v++)*tmp[i];
763: }
764: }
766: /* backward solve the L^T */
767: for (i=n-1; i>=0; i--){
768: v = aa + diag[i] - 1 + shift;
769: vi = aj + diag[i] - 1 + shift;
770: nz = diag[i] - ai[i];
771: while (nz--) {
772: tmp[*vi-- + shift] -= (*v--)*tmp[i];
773: }
774: }
776: /* copy tmp into x according to permutation */
777: for (i=0; i<n; i++) x[r[i]] += tmp[i];
779: ISRestoreIndices(isrow,&rout);
780: ISRestoreIndices(iscol,&cout);
781: VecRestoreArray(bb,&b);
782: VecRestoreArray(xx,&x);
784: PetscLogFlops(2*a->nz);
785: return(0);
786: }
787: /* ----------------------------------------------------------------*/
788: EXTERN int MatMissingDiagonal_SeqAIJ(Mat);
790: int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact)
791: {
792: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
793: IS isicol;
794: int *r,*ic,ierr,prow,n = A->m,*ai = a->i,*aj = a->j;
795: int *ainew,*ajnew,jmax,*fill,*xi,nz,*im,*ajfill,*flev;
796: int *dloc,idx,row,m,fm,nzf,nzi,len, realloc = 0,dcount = 0;
797: int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill;
798: PetscTruth col_identity,row_identity;
799: PetscReal f;
800:
802: if (info) {
803: f = info->fill;
804: levels = (int)info->levels;
805: diagonal_fill = (int)info->diagonal_fill;
806: } else {
807: f = 1.0;
808: levels = 0;
809: diagonal_fill = 0;
810: }
811: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
813: /* special case that simply copies fill pattern */
814: ISIdentity(isrow,&row_identity);
815: ISIdentity(iscol,&col_identity);
816: if (!levels && row_identity && col_identity) {
817: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
818: (*fact)->factor = FACTOR_LU;
819: b = (Mat_SeqAIJ*)(*fact)->data;
820: if (!b->diag) {
821: MatMarkDiagonal_SeqAIJ(*fact);
822: }
823: MatMissingDiagonal_SeqAIJ(*fact);
824: b->row = isrow;
825: b->col = iscol;
826: b->icol = isicol;
827: if (info) {
828: b->lu_damp = (PetscTruth)((int)info->damp);
829: b->lu_damping = info->damping;
830: b->lu_zeropivot = info->zeropivot;
831: } else {
832: b->lu_damp = PETSC_FALSE;
833: b->lu_damping = 0.0;
834: b->lu_zeropivot = 1.e-12;
835: }
836: ierr = PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
837: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
838: ierr = PetscObjectReference((PetscObject)isrow);
839: ierr = PetscObjectReference((PetscObject)iscol);
840: return(0);
841: }
843: ISGetIndices(isrow,&r);
844: ISGetIndices(isicol,&ic);
846: /* get new row pointers */
847: PetscMalloc((n+1)*sizeof(int),&ainew);
848: ainew[0] = -shift;
849: /* don't know how many column pointers are needed so estimate */
850: jmax = (int)(f*(ai[n]+!shift));
851: PetscMalloc((jmax)*sizeof(int),&ajnew);
852: /* ajfill is level of fill for each fill entry */
853: PetscMalloc((jmax)*sizeof(int),&ajfill);
854: /* fill is a linked list of nonzeros in active row */
855: PetscMalloc((n+1)*sizeof(int),&fill);
856: /* im is level for each filled value */
857: PetscMalloc((n+1)*sizeof(int),&im);
858: /* dloc is location of diagonal in factor */
859: PetscMalloc((n+1)*sizeof(int),&dloc);
860: dloc[0] = 0;
861: for (prow=0; prow<n; prow++) {
863: /* copy current row into linked list */
864: nzf = nz = ai[r[prow]+1] - ai[r[prow]];
865: if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
866: xi = aj + ai[r[prow]] + shift;
867: fill[n] = n;
868: fill[prow] = -1; /* marker to indicate if diagonal exists */
869: while (nz--) {
870: fm = n;
871: idx = ic[*xi++ + shift];
872: do {
873: m = fm;
874: fm = fill[m];
875: } while (fm < idx);
876: fill[m] = idx;
877: fill[idx] = fm;
878: im[idx] = 0;
879: }
881: /* make sure diagonal entry is included */
882: if (diagonal_fill && fill[prow] == -1) {
883: fm = n;
884: while (fill[fm] < prow) fm = fill[fm];
885: fill[prow] = fill[fm]; /* insert diagonal into linked list */
886: fill[fm] = prow;
887: im[prow] = 0;
888: nzf++;
889: dcount++;
890: }
892: nzi = 0;
893: row = fill[n];
894: while (row < prow) {
895: incrlev = im[row] + 1;
896: nz = dloc[row];
897: xi = ajnew + ainew[row] + shift + nz + 1;
898: flev = ajfill + ainew[row] + shift + nz + 1;
899: nnz = ainew[row+1] - ainew[row] - nz - 1;
900: fm = row;
901: while (nnz-- > 0) {
902: idx = *xi++ + shift;
903: if (*flev + incrlev > levels) {
904: flev++;
905: continue;
906: }
907: do {
908: m = fm;
909: fm = fill[m];
910: } while (fm < idx);
911: if (fm != idx) {
912: im[idx] = *flev + incrlev;
913: fill[m] = idx;
914: fill[idx] = fm;
915: fm = idx;
916: nzf++;
917: } else {
918: if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev;
919: }
920: flev++;
921: }
922: row = fill[row];
923: nzi++;
924: }
925: /* copy new filled row into permanent storage */
926: ainew[prow+1] = ainew[prow] + nzf;
927: if (ainew[prow+1] > jmax-shift) {
929: /* estimate how much additional space we will need */
930: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
931: /* just double the memory each time */
932: /* maxadd = (int)((f*(ai[n]+!shift)*(n-prow+5))/n); */
933: int maxadd = jmax;
934: if (maxadd < nzf) maxadd = (n-prow)*(nzf+1);
935: jmax += maxadd;
937: /* allocate a longer ajnew and ajfill */
938: ierr = PetscMalloc(jmax*sizeof(int),&xi);
939: ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));
940: ierr = PetscFree(ajnew);
941: ajnew = xi;
942: ierr = PetscMalloc(jmax*sizeof(int),&xi);
943: ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));
944: ierr = PetscFree(ajfill);
945: ajfill = xi;
946: realloc++; /* count how many times we realloc */
947: }
948: xi = ajnew + ainew[prow] + shift;
949: flev = ajfill + ainew[prow] + shift;
950: dloc[prow] = nzi;
951: fm = fill[n];
952: while (nzf--) {
953: *xi++ = fm - shift;
954: *flev++ = im[fm];
955: fm = fill[fm];
956: }
957: /* make sure row has diagonal entry */
958: if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) {
959: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %d has missing diagonal in factored matrixn
960: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow);
961: }
962: }
963: PetscFree(ajfill);
964: ISRestoreIndices(isrow,&r);
965: ISRestoreIndices(isicol,&ic);
966: PetscFree(fill);
967: PetscFree(im);
969: {
970: PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]);
971: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
972: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use n",af);
973: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);n",af);
974: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.n");
975: if (diagonal_fill) {
976: PetscLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %d missing diagonals",dcount);
977: }
978: }
980: /* put together the new matrix */
981: MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);
982: PetscLogObjectParent(*fact,isicol);
983: b = (Mat_SeqAIJ*)(*fact)->data;
984: PetscFree(b->imax);
985: b->singlemalloc = PETSC_FALSE;
986: len = (ainew[n] + shift)*sizeof(PetscScalar);
987: /* the next line frees the default space generated by the Create() */
988: PetscFree(b->a);
989: PetscFree(b->ilen);
990: PetscMalloc(len+1,&b->a);
991: b->j = ajnew;
992: b->i = ainew;
993: for (i=0; i<n; i++) dloc[i] += ainew[i];
994: b->diag = dloc;
995: b->ilen = 0;
996: b->imax = 0;
997: b->row = isrow;
998: b->col = iscol;
999: ierr = PetscObjectReference((PetscObject)isrow);
1000: ierr = PetscObjectReference((PetscObject)iscol);
1001: b->icol = isicol;
1002: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1003: /* In b structure: Free imax, ilen, old a, old j.
1004: Allocate dloc, solve_work, new a, new j */
1005: PetscLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(PetscScalar)));
1006: b->maxnz = b->nz = ainew[n] + shift;
1007: if (info) {
1008: b->lu_damp = (PetscTruth)((int)info->damp);
1009: b->lu_damping = info->damping;
1010: b->lu_zeropivot = info->zeropivot;
1011: } else {
1012: b->lu_damp = PETSC_FALSE;
1013: b->lu_damping = 0.0;
1014: b->lu_zeropivot = 1.e-12;
1015: }
1016: (*fact)->factor = FACTOR_LU;
1017: Mat_AIJ_CheckInode(*fact,PETSC_FALSE);
1018: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1020: (*fact)->info.factor_mallocs = realloc;
1021: (*fact)->info.fill_ratio_given = f;
1022: (*fact)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]);
1023: (*fact)->factor = FACTOR_LU;
1024: return(0);
1025: }