Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
6: #include petscbt.h
7: #include src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
24: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28: #endif
32: /* ------------------------------------------------------------
34: This interface was contribed by Tony Caola
36: This routine is an interface to the pivoting drop-tolerance
37: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38: SPARSEKIT2.
40: The SPARSEKIT2 routines used here are covered by the GNU
41: copyright; see the file gnu in this directory.
43: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44: help in getting this routine ironed out.
46: The major drawback to this routine is that if info->fill is
47: not large enough it fails rather than allocating more space;
48: this can be fixed by hacking/improving the f2c version of
49: Yousef Saad's code.
51: ------------------------------------------------------------
52: */
53: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54: {
55: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
57: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58: You can obtain the drop tolerance routines by installing PETSc from\n\
59: www.mcs.anl.gov/petsc\n");
60: #else
61: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
62: IS iscolf,isicol,isirow;
63: PetscTruth reorder;
64: PetscErrorCode ierr,sierr;
65: PetscInt *c,*r,*ic,i,n = A->m;
66: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67: PetscInt *ordcol,*iwk,*iperm,*jw;
68: PetscInt jmax,lfill,job,*o_i,*o_j;
69: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70: PetscReal af;
74: if (info->dt == PETSC_DEFAULT) info->dt = .005;
75: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
77: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
78: lfill = (PetscInt)(info->dtcount/2.0);
79: jmax = (PetscInt)(info->fill*a->nz);
82: /* ------------------------------------------------------------
83: If reorder=.TRUE., then the original matrix has to be
84: reordered to reflect the user selected ordering scheme, and
85: then de-reordered so it is in it's original format.
86: Because Saad's dperm() is NOT in place, we have to copy
87: the original matrix and allocate more storage. . .
88: ------------------------------------------------------------
89: */
91: /* set reorder to true if either isrow or iscol is not identity */
92: ISIdentity(isrow,&reorder);
93: if (reorder) {ISIdentity(iscol,&reorder);}
94: reorder = PetscNot(reorder);
96:
97: /* storage for ilu factor */
98: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
99: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
100: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
101: PetscMalloc(n*sizeof(PetscInt),&ordcol);
103: /* ------------------------------------------------------------
104: Make sure that everything is Fortran formatted (1-Based)
105: ------------------------------------------------------------
106: */
107: for (i=old_i[0];i<old_i[n];i++) {
108: old_j[i]++;
109: }
110: for(i=0;i<n+1;i++) {
111: old_i[i]++;
112: };
113:
115: if (reorder) {
116: ISGetIndices(iscol,&c);
117: ISGetIndices(isrow,&r);
118: for(i=0;i<n;i++) {
119: r[i] = r[i]+1;
120: c[i] = c[i]+1;
121: }
122: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
123: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
124: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
125: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126: for (i=0;i<n;i++) {
127: r[i] = r[i]-1;
128: c[i] = c[i]-1;
129: }
130: ISRestoreIndices(iscol,&c);
131: ISRestoreIndices(isrow,&r);
132: o_a = old_a2;
133: o_j = old_j2;
134: o_i = old_i2;
135: } else {
136: o_a = old_a;
137: o_j = old_j;
138: o_i = old_i;
139: }
141: /* ------------------------------------------------------------
142: Call Saad's ilutp() routine to generate the factorization
143: ------------------------------------------------------------
144: */
146: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
147: PetscMalloc(2*n*sizeof(PetscInt),&jw);
148: PetscMalloc(n*sizeof(PetscScalar),&w);
150: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151: if (sierr) {
152: switch (sierr) {
153: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
154: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %g space allocated %D",info->fill,jmax);
155: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159: }
160: }
162: PetscFree(w);
163: PetscFree(jw);
165: /* ------------------------------------------------------------
166: Saad's routine gives the result in Modified Sparse Row (msr)
167: Convert to Compressed Sparse Row format (csr)
168: ------------------------------------------------------------
169: */
171: PetscMalloc(n*sizeof(PetscScalar),&wk);
172: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
174: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
176: PetscFree(iwk);
177: PetscFree(wk);
179: if (reorder) {
180: PetscFree(old_a2);
181: PetscFree(old_j2);
182: PetscFree(old_i2);
183: } else {
184: /* fix permutation of old_j that the factorization introduced */
185: for (i=old_i[0]; i<old_i[n]; i++) {
186: old_j[i-1] = iperm[old_j[i-1]-1];
187: }
188: }
190: /* get rid of the shift to indices starting at 1 */
191: for (i=0; i<n+1; i++) {
192: old_i[i]--;
193: }
194: for (i=old_i[0];i<old_i[n];i++) {
195: old_j[i]--;
196: }
197:
198: /* Make the factored matrix 0-based */
199: for (i=0; i<n+1; i++) {
200: new_i[i]--;
201: }
202: for (i=new_i[0];i<new_i[n];i++) {
203: new_j[i]--;
204: }
206: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207: /*-- permute the right-hand-side and solution vectors --*/
208: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
210: ISGetIndices(isicol,&ic);
211: for(i=0; i<n; i++) {
212: ordcol[i] = ic[iperm[i]-1];
213: };
214: ISRestoreIndices(isicol,&ic);
215: ISDestroy(isicol);
217: PetscFree(iperm);
219: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
220: PetscFree(ordcol);
222: /*----- put together the new matrix -----*/
224: MatCreate(A->comm,fact);
225: MatSetSizes(*fact,n,n,n,n);
226: MatSetType(*fact,A->type_name);
227: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
228: (*fact)->factor = FACTOR_LU;
229: (*fact)->assembled = PETSC_TRUE;
231: b = (Mat_SeqAIJ*)(*fact)->data;
232: b->freedata = PETSC_TRUE;
233: b->sorted = PETSC_FALSE;
234: b->singlemalloc = PETSC_FALSE;
235: b->a = new_a;
236: b->j = new_j;
237: b->i = new_i;
238: b->ilen = 0;
239: b->imax = 0;
240: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241: b->row = isirow;
242: b->col = iscolf;
243: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
244: b->maxnz = b->nz = new_i[n];
245: MatMarkDiagonal_SeqAIJ(*fact);
246: (*fact)->info.factor_mallocs = 0;
248: MatMarkDiagonal_SeqAIJ(A);
250: af = ((double)b->nz)/((double)a->nz) + .001;
251: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Fill ratio:given %g needed %g\n",info->fill,af));
252: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af));
253: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:PCILUSetFill(pc,%g);\n",af));
254: PetscLogInfo((A,"MatILUDTFactor_SeqAIJ:for best performance.\n"));
256: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
258: return(0);
259: #endif
260: }
264: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265: {
266: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
267: IS isicol;
269: PetscInt *r,*ic,i,n=A->m,*ai=a->i,*aj=a->j;
270: PetscInt *bi,*bj,*ajtmp;
271: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272: PetscReal f;
273: PetscInt nlnk,*lnk,k,*cols,**bi_ptr;
274: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275: PetscBT lnkbt;
278: if (A->M != A->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
280: ISGetIndices(isrow,&r);
281: ISGetIndices(isicol,&ic);
283: /* get new row pointers */
284: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
285: bi[0] = 0;
287: /* bdiag is location of diagonal in factor */
288: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
289: bdiag[0] = 0;
291: /* linked list for storing column indices of the active row */
292: nlnk = n + 1;
293: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
295: PetscMalloc((2*n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&cols);
296: im = cols + n;
297: bi_ptr = (PetscInt**)(im + n);
299: /* initial FreeSpace size is f*(ai[n]+1) */
300: f = info->fill;
301: GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
302: current_space = free_space;
304: for (i=0; i<n; i++) {
305: /* copy previous fill into linked list */
306: nzi = 0;
307: nnz = ai[r[i]+1] - ai[r[i]];
308: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
309: ajtmp = aj + ai[r[i]];
310: for (k=0; k<nnz; k++) cols[k] = ic[*(ajtmp+k)]; /* note: cols is not sorted when iscol!=indentity */
311: PetscLLAdd(nnz,cols,n,nlnk,lnk,lnkbt);
312: nzi += nlnk;
314: /* add pivot rows into linked list */
315: row = lnk[n];
316: while (row < i) {
317: nzbd = bdiag[row] - bi[row] + 1;
318: ajtmp = bi_ptr[row] + nzbd;
319: nnz = im[row] - nzbd; /* num of columns with row<indices<=i */
320: im[row] = nzbd;
321: PetscLLAddSortedLU(nnz,ajtmp,row,nlnk,lnk,lnkbt,i,nzbd);
322: nzi += nlnk;
323: im[row] += nzbd; /* update im[row]: num of cols with index<=i */
325: row = lnk[row];
326: }
328: bi[i+1] = bi[i] + nzi;
329: im[i] = nzi;
331: /* mark bdiag */
332: nzbd = 0;
333: nnz = nzi;
334: k = lnk[n];
335: while (nnz-- && k < i){
336: nzbd++;
337: k = lnk[k];
338: }
339: bdiag[i] = bi[i] + nzbd;
341: /* if free space is not available, make more free space */
342: if (current_space->local_remaining<nzi) {
343: nnz = (n - i)*nzi; /* estimated and max additional space needed */
344: GetMoreSpace(nnz,¤t_space);
345: reallocs++;
346: }
348: /* copy data into free space, then initialize lnk */
349: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
350: bi_ptr[i] = current_space->array;
351: current_space->array += nzi;
352: current_space->local_used += nzi;
353: current_space->local_remaining -= nzi;
354: }
355: #if defined(PETSC_USE_DEBUG)
356: if (ai[n] != 0) {
357: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
358: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
359: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %G or use \n",af));
360: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af));
361: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"));
362: } else {
363: PetscLogInfo((A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"));
364: }
365: #endif
367: ISRestoreIndices(isrow,&r);
368: ISRestoreIndices(isicol,&ic);
370: /* destroy list of free space and other temporary array(s) */
371: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
372: MakeSpaceContiguous(&free_space,bj);
373: PetscLLDestroy(lnk,lnkbt);
374: PetscFree(cols);
376: /* put together the new matrix */
377: MatCreate(A->comm,B);
378: MatSetSizes(*B,n,n,n,n);
379: MatSetType(*B,A->type_name);
380: MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
381: PetscLogObjectParent(*B,isicol);
382: b = (Mat_SeqAIJ*)(*B)->data;
383: b->freedata = PETSC_TRUE;
384: b->singlemalloc = PETSC_FALSE;
385: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
386: b->j = bj;
387: b->i = bi;
388: b->diag = bdiag;
389: b->ilen = 0;
390: b->imax = 0;
391: b->row = isrow;
392: b->col = iscol;
393: PetscObjectReference((PetscObject)isrow);
394: PetscObjectReference((PetscObject)iscol);
395: b->icol = isicol;
396: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
398: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
399: PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
400: b->maxnz = b->nz = bi[n] ;
402: (*B)->factor = FACTOR_LU;
403: (*B)->info.factor_mallocs = reallocs;
404: (*B)->info.fill_ratio_given = f;
406: if (ai[n] != 0) {
407: (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
408: } else {
409: (*B)->info.fill_ratio_needed = 0.0;
410: }
411: MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
412: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
413: return(0);
414: }
416: /* ----------------------------------------------------------- */
419: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
420: {
421: Mat C=*B;
422: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
423: IS isrow = b->row,isicol = b->icol;
425: PetscInt *r,*ic,i,j,n=A->m,*bi=b->i,*bj=b->j;
426: PetscInt *ajtmp,*bjtmp,nz,row,*ics;
427: PetscInt *diag_offset = b->diag,diag,*pj;
428: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
429: PetscScalar d;
430: PetscReal rs;
431: LUShift_Ctx sctx;
432: PetscInt newshift;
435: ISGetIndices(isrow,&r);
436: ISGetIndices(isicol,&ic);
437: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
438: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
439: rtmps = rtmp; ics = ic;
441: if (!a->diag) {
442: MatMarkDiagonal_SeqAIJ(A);
443: }
444: /* if both shift schemes are chosen by user, only use info->shiftpd */
445: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
446: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
447: PetscInt *aai = a->i,*ddiag = a->diag;
448: sctx.shift_top = 0;
449: for (i=0; i<n; i++) {
450: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
451: d = (a->a)[ddiag[i]];
452: rs = -PetscAbsScalar(d) - PetscRealPart(d);
453: v = a->a+aai[i];
454: nz = aai[i+1] - aai[i];
455: for (j=0; j<nz; j++)
456: rs += PetscAbsScalar(v[j]);
457: if (rs>sctx.shift_top) sctx.shift_top = rs;
458: }
459: if (sctx.shift_top == 0.0) sctx.shift_top += 1.e-12;
460: sctx.shift_top *= 1.1;
461: sctx.nshift_max = 5;
462: sctx.shift_lo = 0.;
463: sctx.shift_hi = 1.;
464: }
466: sctx.shift_amount = 0;
467: sctx.nshift = 0;
468: do {
469: sctx.lushift = PETSC_FALSE;
470: for (i=0; i<n; i++){
471: nz = bi[i+1] - bi[i];
472: bjtmp = bj + bi[i];
473: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
475: /* load in initial (unfactored row) */
476: nz = a->i[r[i]+1] - a->i[r[i]];
477: ajtmp = a->j + a->i[r[i]];
478: v = a->a + a->i[r[i]];
479: for (j=0; j<nz; j++) {
480: rtmp[ics[ajtmp[j]]] = v[j];
481: }
482: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
484: row = *bjtmp++;
485: while (row < i) {
486: pc = rtmp + row;
487: if (*pc != 0.0) {
488: pv = b->a + diag_offset[row];
489: pj = b->j + diag_offset[row] + 1;
490: multiplier = *pc / *pv++;
491: *pc = multiplier;
492: nz = bi[row+1] - diag_offset[row] - 1;
493: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
494: PetscLogFlops(2*nz);
495: }
496: row = *bjtmp++;
497: }
498: /* finished row so stick it into b->a */
499: pv = b->a + bi[i] ;
500: pj = b->j + bi[i] ;
501: nz = bi[i+1] - bi[i];
502: diag = diag_offset[i] - bi[i];
503: rs = 0.0;
504: for (j=0; j<nz; j++) {
505: pv[j] = rtmps[pj[j]];
506: if (j != diag) rs += PetscAbsScalar(pv[j]);
507: }
509: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
510: sctx.rs = rs;
511: sctx.pv = pv[diag];
512: MatLUCheckShift_inline(info,sctx,newshift);
513: if (newshift == 1){
514: break; /* sctx.shift_amount is updated */
515: } else if (newshift == -1){
516: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
517: }
518: }
520: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
521: /*
522: * if no shift in this attempt & shifting & started shifting & can refine,
523: * then try lower shift
524: */
525: sctx.shift_hi = info->shift_fraction;
526: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
527: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
528: sctx.lushift = PETSC_TRUE;
529: sctx.nshift++;
530: }
531: } while (sctx.lushift);
533: /* invert diagonal entries for simplier triangular solves */
534: for (i=0; i<n; i++) {
535: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
536: }
538: PetscFree(rtmp);
539: ISRestoreIndices(isicol,&ic);
540: ISRestoreIndices(isrow,&r);
541: C->factor = FACTOR_LU;
542: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
543: C->assembled = PETSC_TRUE;
544: PetscLogFlops(C->n);
545: if (sctx.nshift){
546: if (info->shiftnz) {
547: PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
548: } else if (info->shiftpd) {
549: PetscLogInfo((0,"MatLUFactorNumeric_SeqAIJ: number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top));
550: }
551: }
552: return(0);
553: }
557: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
558: {
560: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
561: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
562: return(0);
563: }
566: /* ----------------------------------------------------------- */
569: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
570: {
572: Mat C;
575: MatLUFactorSymbolic(A,row,col,info,&C);
576: MatLUFactorNumeric(A,info,&C);
577: MatHeaderCopy(A,C);
578: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
579: return(0);
580: }
581: /* ----------------------------------------------------------- */
584: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
585: {
586: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
587: IS iscol = a->col,isrow = a->row;
589: PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
590: PetscInt nz,*rout,*cout;
591: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
594: if (!n) return(0);
596: VecGetArray(bb,&b);
597: VecGetArray(xx,&x);
598: tmp = a->solve_work;
600: ISGetIndices(isrow,&rout); r = rout;
601: ISGetIndices(iscol,&cout); c = cout + (n-1);
603: /* forward solve the lower triangular */
604: tmp[0] = b[*r++];
605: tmps = tmp;
606: for (i=1; i<n; i++) {
607: v = aa + ai[i] ;
608: vi = aj + ai[i] ;
609: nz = a->diag[i] - ai[i];
610: sum = b[*r++];
611: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
612: tmp[i] = sum;
613: }
615: /* backward solve the upper triangular */
616: for (i=n-1; i>=0; i--){
617: v = aa + a->diag[i] + 1;
618: vi = aj + a->diag[i] + 1;
619: nz = ai[i+1] - a->diag[i] - 1;
620: sum = tmp[i];
621: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
622: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
623: }
625: ISRestoreIndices(isrow,&rout);
626: ISRestoreIndices(iscol,&cout);
627: VecRestoreArray(bb,&b);
628: VecRestoreArray(xx,&x);
629: PetscLogFlops(2*a->nz - A->n);
630: return(0);
631: }
633: /* ----------------------------------------------------------- */
636: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
637: {
638: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
640: PetscInt n = A->m,*ai = a->i,*aj = a->j,*adiag = a->diag;
641: PetscScalar *x,*b,*aa = a->a;
642: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
643: PetscInt adiag_i,i,*vi,nz,ai_i;
644: PetscScalar *v,sum;
645: #endif
648: if (!n) return(0);
650: VecGetArray(bb,&b);
651: VecGetArray(xx,&x);
653: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
654: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
655: #else
656: /* forward solve the lower triangular */
657: x[0] = b[0];
658: for (i=1; i<n; i++) {
659: ai_i = ai[i];
660: v = aa + ai_i;
661: vi = aj + ai_i;
662: nz = adiag[i] - ai_i;
663: sum = b[i];
664: while (nz--) sum -= *v++ * x[*vi++];
665: x[i] = sum;
666: }
668: /* backward solve the upper triangular */
669: for (i=n-1; i>=0; i--){
670: adiag_i = adiag[i];
671: v = aa + adiag_i + 1;
672: vi = aj + adiag_i + 1;
673: nz = ai[i+1] - adiag_i - 1;
674: sum = x[i];
675: while (nz--) sum -= *v++ * x[*vi++];
676: x[i] = sum*aa[adiag_i];
677: }
678: #endif
679: PetscLogFlops(2*a->nz - A->n);
680: VecRestoreArray(bb,&b);
681: VecRestoreArray(xx,&x);
682: return(0);
683: }
687: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
688: {
689: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
690: IS iscol = a->col,isrow = a->row;
692: PetscInt *r,*c,i, n = A->m,*vi,*ai = a->i,*aj = a->j;
693: PetscInt nz,*rout,*cout;
694: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
697: if (yy != xx) {VecCopy(yy,xx);}
699: VecGetArray(bb,&b);
700: VecGetArray(xx,&x);
701: tmp = a->solve_work;
703: ISGetIndices(isrow,&rout); r = rout;
704: ISGetIndices(iscol,&cout); c = cout + (n-1);
706: /* forward solve the lower triangular */
707: tmp[0] = b[*r++];
708: for (i=1; i<n; i++) {
709: v = aa + ai[i] ;
710: vi = aj + ai[i] ;
711: nz = a->diag[i] - ai[i];
712: sum = b[*r++];
713: while (nz--) sum -= *v++ * tmp[*vi++ ];
714: tmp[i] = sum;
715: }
717: /* backward solve the upper triangular */
718: for (i=n-1; i>=0; i--){
719: v = aa + a->diag[i] + 1;
720: vi = aj + a->diag[i] + 1;
721: nz = ai[i+1] - a->diag[i] - 1;
722: sum = tmp[i];
723: while (nz--) sum -= *v++ * tmp[*vi++ ];
724: tmp[i] = sum*aa[a->diag[i]];
725: x[*c--] += tmp[i];
726: }
728: ISRestoreIndices(isrow,&rout);
729: ISRestoreIndices(iscol,&cout);
730: VecRestoreArray(bb,&b);
731: VecRestoreArray(xx,&x);
732: PetscLogFlops(2*a->nz);
734: return(0);
735: }
736: /* -------------------------------------------------------------------*/
739: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
740: {
741: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
742: IS iscol = a->col,isrow = a->row;
744: PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
745: PetscInt nz,*rout,*cout,*diag = a->diag;
746: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
749: VecGetArray(bb,&b);
750: VecGetArray(xx,&x);
751: tmp = a->solve_work;
753: ISGetIndices(isrow,&rout); r = rout;
754: ISGetIndices(iscol,&cout); c = cout;
756: /* copy the b into temp work space according to permutation */
757: for (i=0; i<n; i++) tmp[i] = b[c[i]];
759: /* forward solve the U^T */
760: for (i=0; i<n; i++) {
761: v = aa + diag[i] ;
762: vi = aj + diag[i] + 1;
763: nz = ai[i+1] - diag[i] - 1;
764: s1 = tmp[i];
765: s1 *= (*v++); /* multiply by inverse of diagonal entry */
766: while (nz--) {
767: tmp[*vi++ ] -= (*v++)*s1;
768: }
769: tmp[i] = s1;
770: }
772: /* backward solve the L^T */
773: for (i=n-1; i>=0; i--){
774: v = aa + diag[i] - 1 ;
775: vi = aj + diag[i] - 1 ;
776: nz = diag[i] - ai[i];
777: s1 = tmp[i];
778: while (nz--) {
779: tmp[*vi-- ] -= (*v--)*s1;
780: }
781: }
783: /* copy tmp into x according to permutation */
784: for (i=0; i<n; i++) x[r[i]] = tmp[i];
786: ISRestoreIndices(isrow,&rout);
787: ISRestoreIndices(iscol,&cout);
788: VecRestoreArray(bb,&b);
789: VecRestoreArray(xx,&x);
791: PetscLogFlops(2*a->nz-A->n);
792: return(0);
793: }
797: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
798: {
799: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
800: IS iscol = a->col,isrow = a->row;
802: PetscInt *r,*c,i,n = A->m,*vi,*ai = a->i,*aj = a->j;
803: PetscInt nz,*rout,*cout,*diag = a->diag;
804: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
807: if (zz != xx) {VecCopy(zz,xx);}
809: VecGetArray(bb,&b);
810: VecGetArray(xx,&x);
811: tmp = a->solve_work;
813: ISGetIndices(isrow,&rout); r = rout;
814: ISGetIndices(iscol,&cout); c = cout;
816: /* copy the b into temp work space according to permutation */
817: for (i=0; i<n; i++) tmp[i] = b[c[i]];
819: /* forward solve the U^T */
820: for (i=0; i<n; i++) {
821: v = aa + diag[i] ;
822: vi = aj + diag[i] + 1;
823: nz = ai[i+1] - diag[i] - 1;
824: tmp[i] *= *v++;
825: while (nz--) {
826: tmp[*vi++ ] -= (*v++)*tmp[i];
827: }
828: }
830: /* backward solve the L^T */
831: for (i=n-1; i>=0; i--){
832: v = aa + diag[i] - 1 ;
833: vi = aj + diag[i] - 1 ;
834: nz = diag[i] - ai[i];
835: while (nz--) {
836: tmp[*vi-- ] -= (*v--)*tmp[i];
837: }
838: }
840: /* copy tmp into x according to permutation */
841: for (i=0; i<n; i++) x[r[i]] += tmp[i];
843: ISRestoreIndices(isrow,&rout);
844: ISRestoreIndices(iscol,&cout);
845: VecRestoreArray(bb,&b);
846: VecRestoreArray(xx,&x);
848: PetscLogFlops(2*a->nz);
849: return(0);
850: }
851: /* ----------------------------------------------------------------*/
852: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
856: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
857: {
858: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
859: IS isicol;
861: PetscInt *r,*ic,n=A->m,*ai=a->i,*aj=a->j;
862: PetscInt *bi,*cols,nnz,*cols_lvl;
863: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
864: PetscInt i,levels,diagonal_fill;
865: PetscTruth col_identity,row_identity;
866: PetscReal f;
867: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
868: PetscBT lnkbt;
869: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
870: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
871: FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
872:
874: f = info->fill;
875: levels = (PetscInt)info->levels;
876: diagonal_fill = (PetscInt)info->diagonal_fill;
877: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
879: /* special case that simply copies fill pattern */
880: ISIdentity(isrow,&row_identity);
881: ISIdentity(iscol,&col_identity);
882: if (!levels && row_identity && col_identity) {
883: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
884: (*fact)->factor = FACTOR_LU;
885: b = (Mat_SeqAIJ*)(*fact)->data;
886: if (!b->diag) {
887: MatMarkDiagonal_SeqAIJ(*fact);
888: }
889: MatMissingDiagonal_SeqAIJ(*fact);
890: b->row = isrow;
891: b->col = iscol;
892: b->icol = isicol;
893: PetscMalloc(((*fact)->m+1)*sizeof(PetscScalar),&b->solve_work);
894: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
895: PetscObjectReference((PetscObject)isrow);
896: PetscObjectReference((PetscObject)iscol);
897: return(0);
898: }
900: ISGetIndices(isrow,&r);
901: ISGetIndices(isicol,&ic);
903: /* get new row pointers */
904: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
905: bi[0] = 0;
906: /* bdiag is location of diagonal in factor */
907: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
908: bdiag[0] = 0;
910: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
911: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
913: /* create a linked list for storing column indices of the active row */
914: nlnk = n + 1;
915: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
917: /* initial FreeSpace size is f*(ai[n]+1) */
918: GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space);
919: current_space = free_space;
920: GetMoreSpace((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
921: current_space_lvl = free_space_lvl;
922:
923: for (i=0; i<n; i++) {
924: nzi = 0;
925: /* copy current row into linked list */
926: nnz = ai[r[i]+1] - ai[r[i]];
927: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
928: cols = aj + ai[r[i]];
929: lnk[i] = -1; /* marker to indicate if diagonal exists */
930: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
931: nzi += nlnk;
933: /* make sure diagonal entry is included */
934: if (diagonal_fill && lnk[i] == -1) {
935: fm = n;
936: while (lnk[fm] < i) fm = lnk[fm];
937: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
938: lnk[fm] = i;
939: lnk_lvl[i] = 0;
940: nzi++; dcount++;
941: }
943: /* add pivot rows into the active row */
944: nzbd = 0;
945: prow = lnk[n];
946: while (prow < i) {
947: nnz = bdiag[prow];
948: cols = bj_ptr[prow] + nnz + 1;
949: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
950: nnz = bi[prow+1] - bi[prow] - nnz - 1;
951: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
952: nzi += nlnk;
953: prow = lnk[prow];
954: nzbd++;
955: }
956: bdiag[i] = nzbd;
957: bi[i+1] = bi[i] + nzi;
959: /* if free space is not available, make more free space */
960: if (current_space->local_remaining<nzi) {
961: nnz = nzi*(n - i); /* estimated and max additional space needed */
962: GetMoreSpace(nnz,¤t_space);
963: GetMoreSpace(nnz,¤t_space_lvl);
964: reallocs++;
965: }
967: /* copy data into free_space and free_space_lvl, then initialize lnk */
968: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
969: bj_ptr[i] = current_space->array;
970: bjlvl_ptr[i] = current_space_lvl->array;
972: /* make sure the active row i has diagonal entry */
973: if (*(bj_ptr[i]+bdiag[i]) != i) {
974: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
975: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
976: }
978: current_space->array += nzi;
979: current_space->local_used += nzi;
980: current_space->local_remaining -= nzi;
981: current_space_lvl->array += nzi;
982: current_space_lvl->local_used += nzi;
983: current_space_lvl->local_remaining -= nzi;
984: }
986: ISRestoreIndices(isrow,&r);
987: ISRestoreIndices(isicol,&ic);
989: /* destroy list of free space and other temporary arrays */
990: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
991: MakeSpaceContiguous(&free_space,bj);
992: PetscIncompleteLLDestroy(lnk,lnkbt);
993: DestroySpace(free_space_lvl);
994: PetscFree(bj_ptr);
996: #if defined(PETSC_USE_DEBUG)
997: {
998: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
999: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,f,af));
1000: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Run with -[sub_]pc_ilu_fill %g or use \n",af));
1001: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill([sub]pc,%g);\n",af));
1002: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"));
1003: if (diagonal_fill) {
1004: PetscLogInfo((A,"MatILUFactorSymbolic_SeqAIJ:Detected and replaced %D missing diagonals",dcount));
1005: }
1006: }
1007: #endif
1009: /* put together the new matrix */
1010: MatCreate(A->comm,fact);
1011: MatSetSizes(*fact,n,n,n,n);
1012: MatSetType(*fact,A->type_name);
1013: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1014: PetscLogObjectParent(*fact,isicol);
1015: b = (Mat_SeqAIJ*)(*fact)->data;
1016: b->freedata = PETSC_TRUE;
1017: b->singlemalloc = PETSC_FALSE;
1018: len = (bi[n] )*sizeof(PetscScalar);
1019: PetscMalloc(len+1,&b->a);
1020: b->j = bj;
1021: b->i = bi;
1022: for (i=0; i<n; i++) bdiag[i] += bi[i];
1023: b->diag = bdiag;
1024: b->ilen = 0;
1025: b->imax = 0;
1026: b->row = isrow;
1027: b->col = iscol;
1028: PetscObjectReference((PetscObject)isrow);
1029: PetscObjectReference((PetscObject)iscol);
1030: b->icol = isicol;
1031: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1032: /* In b structure: Free imax, ilen, old a, old j.
1033: Allocate bdiag, solve_work, new a, new j */
1034: PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1035: b->maxnz = b->nz = bi[n] ;
1036: (*fact)->factor = FACTOR_LU;
1037: (*fact)->info.factor_mallocs = reallocs;
1038: (*fact)->info.fill_ratio_given = f;
1039: (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1041: MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1042: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1044: return(0);
1045: }
1047: #include src/mat/impls/sbaij/seq/sbaij.h
1050: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1051: {
1052: Mat C = *B;
1053: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1054: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1055: IS ip=b->row;
1057: PetscInt *rip,i,j,mbs=A->m,*bi=b->i,*bj=b->j,*bcol;
1058: PetscInt *ai=a->i,*aj=a->j;
1059: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1060: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1061: PetscReal zeropivot,rs,shiftnz;
1062: PetscTruth shiftpd;
1063: ChShift_Ctx sctx;
1064: PetscInt newshift;
1067: shiftnz = info->shiftnz;
1068: shiftpd = info->shiftpd;
1069: zeropivot = info->zeropivot;
1071: ISGetIndices(ip,&rip);
1072:
1073: /* initialization */
1074: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1075: PetscMalloc(nz,&il);
1076: jl = il + mbs;
1077: rtmp = (MatScalar*)(jl + mbs);
1079: sctx.shift_amount = 0;
1080: sctx.nshift = 0;
1081: do {
1082: sctx.chshift = PETSC_FALSE;
1083: for (i=0; i<mbs; i++) {
1084: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1085: }
1086:
1087: for (k = 0; k<mbs; k++){
1088: bval = ba + bi[k];
1089: /* initialize k-th row by the perm[k]-th row of A */
1090: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1091: for (j = jmin; j < jmax; j++){
1092: col = rip[aj[j]];
1093: if (col >= k){ /* only take upper triangular entry */
1094: rtmp[col] = aa[j];
1095: *bval++ = 0.0; /* for in-place factorization */
1096: }
1097: }
1098: /* shift the diagonal of the matrix */
1099: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1101: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1102: dk = rtmp[k];
1103: i = jl[k]; /* first row to be added to k_th row */
1105: while (i < k){
1106: nexti = jl[i]; /* next row to be added to k_th row */
1108: /* compute multiplier, update diag(k) and U(i,k) */
1109: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1110: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1111: dk += uikdi*ba[ili];
1112: ba[ili] = uikdi; /* -U(i,k) */
1114: /* add multiple of row i to k-th row */
1115: jmin = ili + 1; jmax = bi[i+1];
1116: if (jmin < jmax){
1117: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1118: /* update il and jl for row i */
1119: il[i] = jmin;
1120: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1121: }
1122: i = nexti;
1123: }
1125: /* shift the diagonals when zero pivot is detected */
1126: /* compute rs=sum of abs(off-diagonal) */
1127: rs = 0.0;
1128: jmin = bi[k]+1;
1129: nz = bi[k+1] - jmin;
1130: if (nz){
1131: bcol = bj + jmin;
1132: while (nz--){
1133: rs += PetscAbsScalar(rtmp[*bcol]);
1134: bcol++;
1135: }
1136: }
1138: sctx.rs = rs;
1139: sctx.pv = dk;
1140: MatCholeskyCheckShift_inline(info,sctx,newshift);
1141: if (newshift == 1){
1142: break; /* sctx.shift_amount is updated */
1143: } else if (newshift == -1){
1144: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g * rs %g",k,PetscAbsScalar(dk),zeropivot,rs);
1145: }
1146:
1147: /* copy data into U(k,:) */
1148: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1149: jmin = bi[k]+1; jmax = bi[k+1];
1150: if (jmin < jmax) {
1151: for (j=jmin; j<jmax; j++){
1152: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1153: }
1154: /* add the k-th row into il and jl */
1155: il[k] = jmin;
1156: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1157: }
1158: }
1159: } while (sctx.chshift);
1160: PetscFree(il);
1162: ISRestoreIndices(ip,&rip);
1163: C->factor = FACTOR_CHOLESKY;
1164: C->assembled = PETSC_TRUE;
1165: C->preallocated = PETSC_TRUE;
1166: PetscLogFlops(C->m);
1167: if (sctx.nshift){
1168: if (shiftnz) {
1169: PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1170: } else if (shiftpd) {
1171: PetscLogInfo((0,"MatCholeskyFactorNumeric_SeqAIJ: number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,sctx.shift_amount));
1172: }
1173: }
1174: return(0);
1175: }
1179: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1180: {
1181: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1182: Mat_SeqSBAIJ *b;
1183: Mat B;
1185: PetscTruth perm_identity;
1186: PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->m,*ui;
1187: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1188: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1189: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1190: PetscReal fill=info->fill,levels=info->levels;
1191: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1192: FreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1193: PetscBT lnkbt;
1194:
1196: ISIdentity(perm,&perm_identity);
1197: ISGetIndices(perm,&rip);
1199: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1200: ui[0] = 0;
1202: /* special case that simply copies fill pattern */
1203: if (!levels && perm_identity) {
1204: MatMarkDiagonal_SeqAIJ(A);
1205: for (i=0; i<am; i++) {
1206: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1207: }
1208: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1209: cols = uj;
1210: for (i=0; i<am; i++) {
1211: aj = a->j + a->diag[i];
1212: ncols = ui[i+1] - ui[i];
1213: for (j=0; j<ncols; j++) *cols++ = *aj++;
1214: }
1215: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1216: /* initialization */
1217: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1219: /* jl: linked list for storing indices of the pivot rows
1220: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1221: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1222: il = jl + am;
1223: uj_ptr = (PetscInt**)(il + am);
1224: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1225: for (i=0; i<am; i++){
1226: jl[i] = am; il[i] = 0;
1227: }
1229: /* create and initialize a linked list for storing column indices of the active row k */
1230: nlnk = am + 1;
1231: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1233: /* initial FreeSpace size is fill*(ai[am]+1) */
1234: GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1235: current_space = free_space;
1236: GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1237: current_space_lvl = free_space_lvl;
1239: for (k=0; k<am; k++){ /* for each active row k */
1240: /* initialize lnk by the column indices of row rip[k] of A */
1241: nzk = 0;
1242: ncols = ai[rip[k]+1] - ai[rip[k]];
1243: ncols_upper = 0;
1244: for (j=0; j<ncols; j++){
1245: i = *(aj + ai[rip[k]] + j);
1246: if (rip[i] >= k){ /* only take upper triangular entry */
1247: ajtmp[ncols_upper] = i;
1248: ncols_upper++;
1249: }
1250: }
1251: PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
1252: nzk += nlnk;
1254: /* update lnk by computing fill-in for each pivot row to be merged in */
1255: prow = jl[k]; /* 1st pivot row */
1256:
1257: while (prow < k){
1258: nextprow = jl[prow];
1259:
1260: /* merge prow into k-th row */
1261: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1262: jmax = ui[prow+1];
1263: ncols = jmax-jmin;
1264: i = jmin - ui[prow];
1265: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1266: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1267: j = *(uj - 1);
1268: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1269: nzk += nlnk;
1271: /* update il and jl for prow */
1272: if (jmin < jmax){
1273: il[prow] = jmin;
1274: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1275: }
1276: prow = nextprow;
1277: }
1279: /* if free space is not available, make more free space */
1280: if (current_space->local_remaining<nzk) {
1281: i = am - k + 1; /* num of unfactored rows */
1282: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1283: GetMoreSpace(i,¤t_space);
1284: GetMoreSpace(i,¤t_space_lvl);
1285: reallocs++;
1286: }
1288: /* copy data into free_space and free_space_lvl, then initialize lnk */
1289: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1291: /* add the k-th row into il and jl */
1292: if (nzk > 1){
1293: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1294: jl[k] = jl[i]; jl[i] = k;
1295: il[k] = ui[k] + 1;
1296: }
1297: uj_ptr[k] = current_space->array;
1298: uj_lvl_ptr[k] = current_space_lvl->array;
1300: current_space->array += nzk;
1301: current_space->local_used += nzk;
1302: current_space->local_remaining -= nzk;
1304: current_space_lvl->array += nzk;
1305: current_space_lvl->local_used += nzk;
1306: current_space_lvl->local_remaining -= nzk;
1308: ui[k+1] = ui[k] + nzk;
1309: }
1311: #if defined(PETSC_USE_DEBUG)
1312: if (ai[am] != 0) {
1313: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1314: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1315: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1316: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1317: } else {
1318: PetscLogInfo((A,"MatICCFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1319: }
1320: #endif
1322: ISRestoreIndices(perm,&rip);
1323: PetscFree(jl);
1324: PetscFree(ajtmp);
1326: /* destroy list of free space and other temporary array(s) */
1327: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1328: MakeSpaceContiguous(&free_space,uj);
1329: PetscIncompleteLLDestroy(lnk,lnkbt);
1330: DestroySpace(free_space_lvl);
1332: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1334: /* put together the new matrix in MATSEQSBAIJ format */
1335: MatCreate(PETSC_COMM_SELF,fact);
1336: MatSetSizes(*fact,am,am,am,am);
1337: B = *fact;
1338: MatSetType(B,MATSEQSBAIJ);
1339: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1341: b = (Mat_SeqSBAIJ*)B->data;
1342: b->singlemalloc = PETSC_FALSE;
1343: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1344: b->j = uj;
1345: b->i = ui;
1346: b->diag = 0;
1347: b->ilen = 0;
1348: b->imax = 0;
1349: b->row = perm;
1350: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1351: PetscObjectReference((PetscObject)perm);
1352: b->icol = perm;
1353: PetscObjectReference((PetscObject)perm);
1354: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1355: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1356: b->maxnz = b->nz = ui[am];
1357:
1358: B->factor = FACTOR_CHOLESKY;
1359: B->info.factor_mallocs = reallocs;
1360: B->info.fill_ratio_given = fill;
1361: if (ai[am] != 0) {
1362: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1363: } else {
1364: B->info.fill_ratio_needed = 0.0;
1365: }
1366: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1367: if (perm_identity){
1368: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1369: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1370: }
1371: return(0);
1372: }
1376: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1377: {
1378: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1379: Mat_SeqSBAIJ *b;
1380: Mat B;
1382: PetscTruth perm_identity;
1383: PetscReal fill = info->fill;
1384: PetscInt *rip,*riip,i,am=A->m,*ai=a->i,*aj=a->j,reallocs=0,prow;
1385: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1386: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1387: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1388: PetscBT lnkbt;
1389: IS iperm;
1392: /* check whether perm is the identity mapping */
1393: ISIdentity(perm,&perm_identity);
1394: ISGetIndices(perm,&rip);
1396: if (!perm_identity){
1397: /* check if perm is symmetric! */
1398: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1399: ISGetIndices(iperm,&riip);
1400: for (i=0; i<am; i++) {
1401: if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1402: }
1403: ISRestoreIndices(iperm,&riip);
1404: ISDestroy(iperm);
1405: }
1407: /* initialization */
1408: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1409: ui[0] = 0;
1411: /* jl: linked list for storing indices of the pivot rows
1412: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1413: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1414: il = jl + am;
1415: cols = il + am;
1416: ui_ptr = (PetscInt**)(cols + am);
1417: for (i=0; i<am; i++){
1418: jl[i] = am; il[i] = 0;
1419: }
1421: /* create and initialize a linked list for storing column indices of the active row k */
1422: nlnk = am + 1;
1423: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1425: /* initial FreeSpace size is fill*(ai[am]+1) */
1426: GetMoreSpace((PetscInt)(fill*(ai[am]+1)),&free_space);
1427: current_space = free_space;
1429: for (k=0; k<am; k++){ /* for each active row k */
1430: /* initialize lnk by the column indices of row rip[k] of A */
1431: nzk = 0;
1432: ncols = ai[rip[k]+1] - ai[rip[k]];
1433: ncols_upper = 0;
1434: for (j=0; j<ncols; j++){
1435: i = rip[*(aj + ai[rip[k]] + j)];
1436: if (i >= k){ /* only take upper triangular entry */
1437: cols[ncols_upper] = i;
1438: ncols_upper++;
1439: }
1440: }
1441: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1442: nzk += nlnk;
1444: /* update lnk by computing fill-in for each pivot row to be merged in */
1445: prow = jl[k]; /* 1st pivot row */
1446:
1447: while (prow < k){
1448: nextprow = jl[prow];
1449: /* merge prow into k-th row */
1450: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1451: jmax = ui[prow+1];
1452: ncols = jmax-jmin;
1453: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1454: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1455: nzk += nlnk;
1457: /* update il and jl for prow */
1458: if (jmin < jmax){
1459: il[prow] = jmin;
1460: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1461: }
1462: prow = nextprow;
1463: }
1465: /* if free space is not available, make more free space */
1466: if (current_space->local_remaining<nzk) {
1467: i = am - k + 1; /* num of unfactored rows */
1468: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1469: GetMoreSpace(i,¤t_space);
1470: reallocs++;
1471: }
1473: /* copy data into free space, then initialize lnk */
1474: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1476: /* add the k-th row into il and jl */
1477: if (nzk-1 > 0){
1478: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1479: jl[k] = jl[i]; jl[i] = k;
1480: il[k] = ui[k] + 1;
1481: }
1482: ui_ptr[k] = current_space->array;
1483: current_space->array += nzk;
1484: current_space->local_used += nzk;
1485: current_space->local_remaining -= nzk;
1487: ui[k+1] = ui[k] + nzk;
1488: }
1490: #if defined(PETSC_USE_DEBUG)
1491: if (ai[am] != 0) {
1492: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1493: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Reallocs %D Fill ratio:given %g needed %g\n",reallocs,fill,af));
1494: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Run with -pc_cholesky_fill %g or use \n",af));
1495: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:PCCholeskySetFill(pc,%g) for best performance.\n",af));
1496: } else {
1497: PetscLogInfo((A,"MatCholeskyFactorSymbolic_SeqAIJ:Empty matrix.\n"));
1498: }
1499: #endif
1501: ISRestoreIndices(perm,&rip);
1502: PetscFree(jl);
1504: /* destroy list of free space and other temporary array(s) */
1505: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1506: MakeSpaceContiguous(&free_space,uj);
1507: PetscLLDestroy(lnk,lnkbt);
1509: /* put together the new matrix in MATSEQSBAIJ format */
1510: MatCreate(PETSC_COMM_SELF,fact);
1511: MatSetSizes(*fact,am,am,am,am);
1512: B = *fact;
1513: MatSetType(B,MATSEQSBAIJ);
1514: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1516: b = (Mat_SeqSBAIJ*)B->data;
1517: b->singlemalloc = PETSC_FALSE;
1518: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1519: b->j = uj;
1520: b->i = ui;
1521: b->diag = 0;
1522: b->ilen = 0;
1523: b->imax = 0;
1524: b->row = perm;
1525: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1526: PetscObjectReference((PetscObject)perm);
1527: b->icol = perm;
1528: PetscObjectReference((PetscObject)perm);
1529: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1530: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1531: b->maxnz = b->nz = ui[am];
1532:
1533: B->factor = FACTOR_CHOLESKY;
1534: B->info.factor_mallocs = reallocs;
1535: B->info.fill_ratio_given = fill;
1536: if (ai[am] != 0) {
1537: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1538: } else {
1539: B->info.fill_ratio_needed = 0.0;
1540: }
1541: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1542: if (perm_identity){
1543: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1544: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1545: }
1546: return(0);
1547: }