Actual source code: sbaijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/baij/seq/baij.h
4: #include src/mat/impls/sbaij/seq/sbaij.h
5: #include src/inline/ilu.h
6: #include include/petscis.h
8: #if !defined(PETSC_USE_COMPLEX)
9: /*
10: input:
11: F -- numeric factor
12: output:
13: nneg, nzero, npos: matrix inertia
14: */
18: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneig,PetscInt *nzero,PetscInt *npos)
19: {
20: Mat_SeqSBAIJ *fact_ptr = (Mat_SeqSBAIJ*)F->data;
21: PetscScalar *dd = fact_ptr->a;
22: PetscInt mbs=fact_ptr->mbs,bs=F->rmap.bs,i,nneig_tmp,npos_tmp,*fi = fact_ptr->i;
25: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);
26: nneig_tmp = 0; npos_tmp = 0;
27: for (i=0; i<mbs; i++){
28: if (PetscRealPart(dd[*fi]) > 0.0){
29: npos_tmp++;
30: } else if (PetscRealPart(dd[*fi]) < 0.0){
31: nneig_tmp++;
32: }
33: fi++;
34: }
35: if (nneig) *nneig = nneig_tmp;
36: if (npos) *npos = npos_tmp;
37: if (nzero) *nzero = mbs - nneig_tmp - npos_tmp;
39: return(0);
40: }
41: #endif /* !defined(PETSC_USE_COMPLEX) */
43: /*
44: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
45: Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
46: */
49: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
50: {
51: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
53: PetscInt *rip,i,mbs = a->mbs,*ai,*aj;
54: PetscInt *jutmp,bs = A->rmap.bs,bs2=a->bs2;
55: PetscInt m,reallocs = 0,prow;
56: PetscInt *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
57: PetscReal f = info->fill;
58: PetscTruth perm_identity;
61: /* check whether perm is the identity mapping */
62: ISIdentity(perm,&perm_identity);
63: ISGetIndices(perm,&rip);
64:
65: if (perm_identity){ /* without permutation */
66: a->permute = PETSC_FALSE;
67: ai = a->i; aj = a->j;
68: } else { /* non-trivial permutation */
69: a->permute = PETSC_TRUE;
70: MatReorderingSeqSBAIJ(A,perm);
71: ai = a->inew; aj = a->jnew;
72: }
73:
74: /* initialization */
75: PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
76: umax = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
77: PetscMalloc(umax*sizeof(PetscInt),&ju);
78: iu[0] = mbs+1;
79: juidx = mbs + 1; /* index for ju */
80: PetscMalloc(2*mbs*sizeof(PetscInt),&jl); /* linked list for pivot row */
81: q = jl + mbs; /* linked list for col index */
82: for (i=0; i<mbs; i++){
83: jl[i] = mbs;
84: q[i] = 0;
85: }
87: /* for each row k */
88: for (k=0; k<mbs; k++){
89: for (i=0; i<mbs; i++) q[i] = 0; /* to be removed! */
90: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
91: q[k] = mbs;
92: /* initialize nonzero structure of k-th row to row rip[k] of A */
93: jmin = ai[rip[k]] +1; /* exclude diag[k] */
94: jmax = ai[rip[k]+1];
95: for (j=jmin; j<jmax; j++){
96: vj = rip[aj[j]]; /* col. value */
97: if(vj > k){
98: qm = k;
99: do {
100: m = qm; qm = q[m];
101: } while(qm < vj);
102: if (qm == vj) {
103: SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
104: }
105: nzk++;
106: q[m] = vj;
107: q[vj] = qm;
108: } /* if(vj > k) */
109: } /* for (j=jmin; j<jmax; j++) */
111: /* modify nonzero structure of k-th row by computing fill-in
112: for each row i to be merged in */
113: prow = k;
114: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
115:
116: while (prow < k){
117: /* merge row prow into k-th row */
118: jmin = iu[prow] + 1; jmax = iu[prow+1];
119: qm = k;
120: for (j=jmin; j<jmax; j++){
121: vj = ju[j];
122: do {
123: m = qm; qm = q[m];
124: } while (qm < vj);
125: if (qm != vj){
126: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
127: }
128: }
129: prow = jl[prow]; /* next pivot row */
130: }
131:
132: /* add k to row list for first nonzero element in k-th row */
133: if (nzk > 0){
134: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
135: jl[k] = jl[i]; jl[i] = k;
136: }
137: iu[k+1] = iu[k] + nzk;
139: /* allocate more space to ju if needed */
140: if (iu[k+1] > umax) {
141: /* estimate how much additional space we will need */
142: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143: /* just double the memory each time */
144: maxadd = umax;
145: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
146: umax += maxadd;
148: /* allocate a longer ju */
149: PetscMalloc(umax*sizeof(PetscInt),&jutmp);
150: PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
151: PetscFree(ju);
152: ju = jutmp;
153: reallocs++; /* count how many times we realloc */
154: }
156: /* save nonzero structure of k-th row in ju */
157: i=k;
158: while (nzk --) {
159: i = q[i];
160: ju[juidx++] = i;
161: }
162: }
164: #if defined(PETSC_USE_INFO)
165: if (ai[mbs] != 0) {
166: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
167: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
168: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
169: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
170: PetscInfo(A,"for best performance.\n");
171: } else {
172: PetscInfo(A,"Empty matrix.\n");
173: }
174: #endif
176: ISRestoreIndices(perm,&rip);
177: PetscFree(jl);
179: /* put together the new matrix */
180: MatCreate(A->comm,B);
181: MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);
182: MatSetType(*B,A->type_name);
183: MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
185: /* PetscLogObjectParent(*B,iperm); */
186: b = (Mat_SeqSBAIJ*)(*B)->data;
187: b->singlemalloc = PETSC_FALSE;
188: b->free_a = PETSC_TRUE;
189: b->free_ij = PETSC_TRUE;
190: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
191: b->j = ju;
192: b->i = iu;
193: b->diag = 0;
194: b->ilen = 0;
195: b->imax = 0;
196: b->row = perm;
197: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
198: PetscObjectReference((PetscObject)perm);
199: b->icol = perm;
200: PetscObjectReference((PetscObject)perm);
201: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
202: /* In b structure: Free imax, ilen, old a, old j.
203: Allocate idnew, solve_work, new a, new j */
204: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
205: b->maxnz = b->nz = iu[mbs];
206:
207: (*B)->factor = FACTOR_CHOLESKY;
208: (*B)->info.factor_mallocs = reallocs;
209: (*B)->info.fill_ratio_given = f;
210: if (ai[mbs] != 0) {
211: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
212: } else {
213: (*B)->info.fill_ratio_needed = 0.0;
214: }
216: if (perm_identity){
217: switch (bs) {
218: case 1:
219: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
220: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
221: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");
222: break;
223: case 2:
224: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
225: (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
226: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");
227: break;
228: case 3:
229: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
230: (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
231: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");
232: break;
233: case 4:
234: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
235: (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
236: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");
237: break;
238: case 5:
239: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
240: (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
241: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");
242: break;
243: case 6:
244: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
245: (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
246: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");
247: break;
248: case 7:
249: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
250: (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
251: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");
252: break;
253: default:
254: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
255: (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
256: PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");
257: break;
258: }
259: }
260: return(0);
261: }
262: /*
263: Symbolic U^T*D*U factorization for SBAIJ format.
264: */
265: #include petscbt.h
266: #include src/mat/utils/freespace.h
269: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
270: {
271: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
272: Mat_SeqSBAIJ *b;
273: Mat B;
274: PetscErrorCode ierr;
275: PetscTruth perm_identity;
276: PetscReal fill = info->fill;
277: PetscInt *rip,i,mbs=a->mbs,bs=A->rmap.bs,*ai,*aj,reallocs=0,prow;
278: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
279: PetscInt nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
280: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
281: PetscBT lnkbt;
284: /*
285: This code originally uses Modified Sparse Row (MSR) storage
286: (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
287: Then it is rewritten so the factor B takes seqsbaij format. However the associated
288: MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
289: thus the original code in MSR format is still used for these cases.
290: The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
291: MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
292: */
293: if (bs > 1){
294: MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);
295: return(0);
296: }
298: /* check whether perm is the identity mapping */
299: ISIdentity(perm,&perm_identity);
301: if (perm_identity){
302: a->permute = PETSC_FALSE;
303: ai = a->i; aj = a->j;
304: } else {
305: a->permute = PETSC_TRUE;
306: MatReorderingSeqSBAIJ(A,perm);
307: ai = a->inew; aj = a->jnew;
308: }
309: ISGetIndices(perm,&rip);
311: /* initialization */
312: PetscMalloc((mbs+1)*sizeof(PetscInt),&ui);
313: ui[0] = 0;
315: /* jl: linked list for storing indices of the pivot rows
316: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
317: PetscMalloc((3*mbs+1)*sizeof(PetscInt)+mbs*sizeof(PetscInt*),&jl);
318: il = jl + mbs;
319: cols = il + mbs;
320: ui_ptr = (PetscInt**)(cols + mbs);
321:
322: for (i=0; i<mbs; i++){
323: jl[i] = mbs; il[i] = 0;
324: }
326: /* create and initialize a linked list for storing column indices of the active row k */
327: nlnk = mbs + 1;
328: PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);
330: /* initial FreeSpace size is fill*(ai[mbs]+1) */
331: PetscFreeSpaceGet((PetscInt)(fill*(ai[mbs]+1)),&free_space);
332: current_space = free_space;
334: for (k=0; k<mbs; k++){ /* for each active row k */
335: /* initialize lnk by the column indices of row rip[k] of A */
336: nzk = 0;
337: ncols = ai[rip[k]+1] - ai[rip[k]];
338: for (j=0; j<ncols; j++){
339: i = *(aj + ai[rip[k]] + j);
340: cols[j] = rip[i];
341: }
342: PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);
343: nzk += nlnk;
345: /* update lnk by computing fill-in for each pivot row to be merged in */
346: prow = jl[k]; /* 1st pivot row */
347:
348: while (prow < k){
349: nextprow = jl[prow];
350: /* merge prow into k-th row */
351: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
352: jmax = ui[prow+1];
353: ncols = jmax-jmin;
354: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
355: PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
356: nzk += nlnk;
358: /* update il and jl for prow */
359: if (jmin < jmax){
360: il[prow] = jmin;
361: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
362: }
363: prow = nextprow;
364: }
366: /* if free space is not available, make more free space */
367: if (current_space->local_remaining<nzk) {
368: i = mbs - k + 1; /* num of unfactored rows */
369: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
370: PetscFreeSpaceGet(i,¤t_space);
371: reallocs++;
372: }
374: /* copy data into free space, then initialize lnk */
375: PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);
377: /* add the k-th row into il and jl */
378: if (nzk-1 > 0){
379: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
380: jl[k] = jl[i]; jl[i] = k;
381: il[k] = ui[k] + 1;
382: }
383: ui_ptr[k] = current_space->array;
384: current_space->array += nzk;
385: current_space->local_used += nzk;
386: current_space->local_remaining -= nzk;
388: ui[k+1] = ui[k] + nzk;
389: }
391: #if defined(PETSC_USE_INFO)
392: if (ai[mbs] != 0) {
393: PetscReal af = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
394: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
395: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
396: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
397: } else {
398: PetscInfo(A,"Empty matrix.\n");
399: }
400: #endif
402: ISRestoreIndices(perm,&rip);
403: PetscFree(jl);
405: /* destroy list of free space and other temporary array(s) */
406: PetscMalloc((ui[mbs]+1)*sizeof(PetscInt),&uj);
407: PetscFreeSpaceContiguous(&free_space,uj);
408: PetscLLDestroy(lnk,lnkbt);
410: /* put together the new matrix in MATSEQSBAIJ format */
411: MatCreate(PETSC_COMM_SELF,fact);
412: MatSetSizes(*fact,mbs,mbs,mbs,mbs);
413: B = *fact;
414: MatSetType(B,MATSEQSBAIJ);
415: MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
417: b = (Mat_SeqSBAIJ*)B->data;
418: b->singlemalloc = PETSC_FALSE;
419: b->free_a = PETSC_TRUE;
420: b->free_ij = PETSC_TRUE;
421: PetscMalloc((ui[mbs]+1)*sizeof(MatScalar),&b->a);
422: b->j = uj;
423: b->i = ui;
424: b->diag = 0;
425: b->ilen = 0;
426: b->imax = 0;
427: b->row = perm;
428: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
429: PetscObjectReference((PetscObject)perm);
430: b->icol = perm;
431: PetscObjectReference((PetscObject)perm);
432: PetscMalloc((mbs+1)*sizeof(PetscScalar),&b->solve_work);
433: PetscLogObjectMemory(B,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
434: b->maxnz = b->nz = ui[mbs];
435:
436: B->factor = FACTOR_CHOLESKY;
437: B->info.factor_mallocs = reallocs;
438: B->info.fill_ratio_given = fill;
439: if (ai[mbs] != 0) {
440: B->info.fill_ratio_needed = ((PetscReal)ui[mbs])/((PetscReal)ai[mbs]);
441: } else {
442: B->info.fill_ratio_needed = 0.0;
443: }
445: if (perm_identity){
446: switch (bs) {
447: case 1:
448: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
449: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
450: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");
451: break;
452: case 2:
453: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
454: B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
455: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");
456: break;
457: case 3:
458: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
459: B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
460: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");
461: break;
462: case 4:
463: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
464: B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
465: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");
466: break;
467: case 5:
468: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
469: B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
470: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");
471: break;
472: case 6:
473: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
474: B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
475: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");
476: break;
477: case 7:
478: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
479: B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
480: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");
481: break;
482: default:
483: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
484: B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
485: PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");
486: break;
487: }
488: }
489: return(0);
490: }
493: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat A,MatFactorInfo *info,Mat *B)
494: {
495: Mat C = *B;
496: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
497: IS perm = b->row;
499: PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
500: PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
501: PetscInt bs=A->rmap.bs,bs2 = a->bs2,bslog = 0;
502: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
503: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
504: MatScalar *work;
505: PetscInt *pivots;
508: /* initialization */
509: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
510: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
511: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
512: jl = il + mbs;
513: for (i=0; i<mbs; i++) {
514: jl[i] = mbs; il[0] = 0;
515: }
516: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
517: uik = dk + bs2;
518: work = uik + bs2;
519: PetscMalloc(bs*sizeof(PetscInt),&pivots);
520:
521: ISGetIndices(perm,&perm_ptr);
522:
523: /* check permutation */
524: if (!a->permute){
525: ai = a->i; aj = a->j; aa = a->a;
526: } else {
527: ai = a->inew; aj = a->jnew;
528: PetscMalloc(bs2*ai[mbs]*sizeof(MatScalar),&aa);
529: PetscMemcpy(aa,a->a,bs2*ai[mbs]*sizeof(MatScalar));
530: PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
531: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));
533: /* flops in while loop */
534: bslog = 2*bs*bs2;
536: for (i=0; i<mbs; i++){
537: jmin = ai[i]; jmax = ai[i+1];
538: for (j=jmin; j<jmax; j++){
539: while (a2anew[j] != j){
540: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
541: for (k1=0; k1<bs2; k1++){
542: dk[k1] = aa[k*bs2+k1];
543: aa[k*bs2+k1] = aa[j*bs2+k1];
544: aa[j*bs2+k1] = dk[k1];
545: }
546: }
547: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
548: if (i > aj[j]){
549: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
550: ap = aa + j*bs2; /* ptr to the beginning of j-th block of aa */
551: for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
552: for (k=0; k<bs; k++){ /* j-th block of aa <- dk^T */
553: for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
554: }
555: }
556: }
557: }
558: PetscFree(a2anew);
559: }
560:
561: /* for each row k */
562: for (k = 0; k<mbs; k++){
564: /*initialize k-th row with elements nonzero in row perm(k) of A */
565: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
566:
567: ap = aa + jmin*bs2;
568: for (j = jmin; j < jmax; j++){
569: vj = perm_ptr[aj[j]]; /* block col. index */
570: rtmp_ptr = rtmp + vj*bs2;
571: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
572: }
574: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
575: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
576: i = jl[k]; /* first row to be added to k_th row */
578: while (i < k){
579: nexti = jl[i]; /* next row to be added to k_th row */
581: /* compute multiplier */
582: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
584: /* uik = -inv(Di)*U_bar(i,k) */
585: diag = ba + i*bs2;
586: u = ba + ili*bs2;
587: PetscMemzero(uik,bs2*sizeof(MatScalar));
588: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
589:
590: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
591: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
592: PetscLogFlops(bslog*2);
593:
594: /* update -U(i,k) */
595: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
597: /* add multiple of row i to k-th row ... */
598: jmin = ili + 1; jmax = bi[i+1];
599: if (jmin < jmax){
600: for (j=jmin; j<jmax; j++) {
601: /* rtmp += -U(i,k)^T * U_bar(i,j) */
602: rtmp_ptr = rtmp + bj[j]*bs2;
603: u = ba + j*bs2;
604: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
605: }
606: PetscLogFlops(bslog*(jmax-jmin));
607:
608: /* ... add i to row list for next nonzero entry */
609: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
610: j = bj[jmin];
611: jl[i] = jl[j]; jl[j] = i; /* update jl */
612: }
613: i = nexti;
614: }
616: /* save nonzero entries in k-th row of U ... */
618: /* invert diagonal block */
619: diag = ba+k*bs2;
620: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
621: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
622:
623: jmin = bi[k]; jmax = bi[k+1];
624: if (jmin < jmax) {
625: for (j=jmin; j<jmax; j++){
626: vj = bj[j]; /* block col. index of U */
627: u = ba + j*bs2;
628: rtmp_ptr = rtmp + vj*bs2;
629: for (k1=0; k1<bs2; k1++){
630: *u++ = *rtmp_ptr;
631: *rtmp_ptr++ = 0.0;
632: }
633: }
634:
635: /* ... add k to row list for first nonzero entry in k-th row */
636: il[k] = jmin;
637: i = bj[jmin];
638: jl[k] = jl[i]; jl[i] = k;
639: }
640: }
642: PetscFree(rtmp);
643: PetscFree(il);
644: PetscFree(dk);
645: PetscFree(pivots);
646: if (a->permute){
647: PetscFree(aa);
648: }
650: ISRestoreIndices(perm,&perm_ptr);
651: C->factor = FACTOR_CHOLESKY;
652: C->assembled = PETSC_TRUE;
653: C->preallocated = PETSC_TRUE;
654: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
655: return(0);
656: }
660: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
661: {
662: Mat C = *B;
663: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
665: PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
666: PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
667: PetscInt bs=A->rmap.bs,bs2 = a->bs2,bslog;
668: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
669: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
670: MatScalar *work;
671: PetscInt *pivots;
674: PetscMalloc(bs2*mbs*sizeof(MatScalar),&rtmp);
675: PetscMemzero(rtmp,bs2*mbs*sizeof(MatScalar));
676: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
677: jl = il + mbs;
678: for (i=0; i<mbs; i++) {
679: jl[i] = mbs; il[0] = 0;
680: }
681: PetscMalloc((2*bs2+bs)*sizeof(MatScalar),&dk);
682: uik = dk + bs2;
683: work = uik + bs2;
684: PetscMalloc(bs*sizeof(PetscInt),&pivots);
685:
686: ai = a->i; aj = a->j; aa = a->a;
688: /* flops in while loop */
689: bslog = 2*bs*bs2;
690:
691: /* for each row k */
692: for (k = 0; k<mbs; k++){
694: /*initialize k-th row with elements nonzero in row k of A */
695: jmin = ai[k]; jmax = ai[k+1];
696: ap = aa + jmin*bs2;
697: for (j = jmin; j < jmax; j++){
698: vj = aj[j]; /* block col. index */
699: rtmp_ptr = rtmp + vj*bs2;
700: for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
701: }
703: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
704: PetscMemcpy(dk,rtmp+k*bs2,bs2*sizeof(MatScalar));
705: i = jl[k]; /* first row to be added to k_th row */
707: while (i < k){
708: nexti = jl[i]; /* next row to be added to k_th row */
710: /* compute multiplier */
711: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
713: /* uik = -inv(Di)*U_bar(i,k) */
714: diag = ba + i*bs2;
715: u = ba + ili*bs2;
716: PetscMemzero(uik,bs2*sizeof(MatScalar));
717: Kernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);
718:
719: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
720: Kernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
721: PetscLogFlops(bslog*2);
722:
723: /* update -U(i,k) */
724: PetscMemcpy(ba+ili*bs2,uik,bs2*sizeof(MatScalar));
726: /* add multiple of row i to k-th row ... */
727: jmin = ili + 1; jmax = bi[i+1];
728: if (jmin < jmax){
729: for (j=jmin; j<jmax; j++) {
730: /* rtmp += -U(i,k)^T * U_bar(i,j) */
731: rtmp_ptr = rtmp + bj[j]*bs2;
732: u = ba + j*bs2;
733: Kernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
734: }
735: PetscLogFlops(bslog*(jmax-jmin));
736:
737: /* ... add i to row list for next nonzero entry */
738: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
739: j = bj[jmin];
740: jl[i] = jl[j]; jl[j] = i; /* update jl */
741: }
742: i = nexti;
743: }
745: /* save nonzero entries in k-th row of U ... */
747: /* invert diagonal block */
748: diag = ba+k*bs2;
749: PetscMemcpy(diag,dk,bs2*sizeof(MatScalar));
750: Kernel_A_gets_inverse_A(bs,diag,pivots,work);
751:
752: jmin = bi[k]; jmax = bi[k+1];
753: if (jmin < jmax) {
754: for (j=jmin; j<jmax; j++){
755: vj = bj[j]; /* block col. index of U */
756: u = ba + j*bs2;
757: rtmp_ptr = rtmp + vj*bs2;
758: for (k1=0; k1<bs2; k1++){
759: *u++ = *rtmp_ptr;
760: *rtmp_ptr++ = 0.0;
761: }
762: }
763:
764: /* ... add k to row list for first nonzero entry in k-th row */
765: il[k] = jmin;
766: i = bj[jmin];
767: jl[k] = jl[i]; jl[i] = k;
768: }
769: }
771: PetscFree(rtmp);
772: PetscFree(il);
773: PetscFree(dk);
774: PetscFree(pivots);
776: C->factor = FACTOR_CHOLESKY;
777: C->assembled = PETSC_TRUE;
778: C->preallocated = PETSC_TRUE;
779: PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
780: return(0);
781: }
783: /*
784: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
785: Version for blocks 2 by 2.
786: */
789: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat A,MatFactorInfo *info,Mat *B)
790: {
791: Mat C = *B;
792: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
793: IS perm = b->row;
795: PetscInt *perm_ptr,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
796: PetscInt *ai,*aj,*a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
797: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
798: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
801: /* initialization */
802: /* il and jl record the first nonzero element in each row of the accessing
803: window U(0:k, k:mbs-1).
804: jl: list of rows to be added to uneliminated rows
805: i>= k: jl(i) is the first row to be added to row i
806: i< k: jl(i) is the row following row i in some list of rows
807: jl(i) = mbs indicates the end of a list
808: il(i): points to the first nonzero element in columns k,...,mbs-1 of
809: row i of U */
810: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
811: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
812: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
813: jl = il + mbs;
814: for (i=0; i<mbs; i++) {
815: jl[i] = mbs; il[0] = 0;
816: }
817: PetscMalloc(8*sizeof(MatScalar),&dk);
818: uik = dk + 4;
819: ISGetIndices(perm,&perm_ptr);
821: /* check permutation */
822: if (!a->permute){
823: ai = a->i; aj = a->j; aa = a->a;
824: } else {
825: ai = a->inew; aj = a->jnew;
826: PetscMalloc(4*ai[mbs]*sizeof(MatScalar),&aa);
827: PetscMemcpy(aa,a->a,4*ai[mbs]*sizeof(MatScalar));
828: PetscMalloc(ai[mbs]*sizeof(PetscInt),&a2anew);
829: PetscMemcpy(a2anew,a->a2anew,(ai[mbs])*sizeof(PetscInt));
831: for (i=0; i<mbs; i++){
832: jmin = ai[i]; jmax = ai[i+1];
833: for (j=jmin; j<jmax; j++){
834: while (a2anew[j] != j){
835: k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
836: for (k1=0; k1<4; k1++){
837: dk[k1] = aa[k*4+k1];
838: aa[k*4+k1] = aa[j*4+k1];
839: aa[j*4+k1] = dk[k1];
840: }
841: }
842: /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
843: if (i > aj[j]){
844: /* printf("change orientation, row: %d, col: %d\n",i,aj[j]); */
845: ap = aa + j*4; /* ptr to the beginning of the block */
846: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
847: ap[1] = ap[2];
848: ap[2] = dk[1];
849: }
850: }
851: }
852: PetscFree(a2anew);
853: }
855: /* for each row k */
856: for (k = 0; k<mbs; k++){
858: /*initialize k-th row with elements nonzero in row perm(k) of A */
859: jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
860: ap = aa + jmin*4;
861: for (j = jmin; j < jmax; j++){
862: vj = perm_ptr[aj[j]]; /* block col. index */
863: rtmp_ptr = rtmp + vj*4;
864: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
865: }
867: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
868: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
869: i = jl[k]; /* first row to be added to k_th row */
871: while (i < k){
872: nexti = jl[i]; /* next row to be added to k_th row */
874: /* compute multiplier */
875: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
877: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
878: diag = ba + i*4;
879: u = ba + ili*4;
880: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
881: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
882: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
883: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
884:
885: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
886: dk[0] += uik[0]*u[0] + uik[1]*u[1];
887: dk[1] += uik[2]*u[0] + uik[3]*u[1];
888: dk[2] += uik[0]*u[2] + uik[1]*u[3];
889: dk[3] += uik[2]*u[2] + uik[3]*u[3];
891: PetscLogFlops(16*2);
893: /* update -U(i,k): ba[ili] = uik */
894: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
896: /* add multiple of row i to k-th row ... */
897: jmin = ili + 1; jmax = bi[i+1];
898: if (jmin < jmax){
899: for (j=jmin; j<jmax; j++) {
900: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
901: rtmp_ptr = rtmp + bj[j]*4;
902: u = ba + j*4;
903: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
904: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
905: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
906: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
907: }
908: PetscLogFlops(16*(jmax-jmin));
909:
910: /* ... add i to row list for next nonzero entry */
911: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
912: j = bj[jmin];
913: jl[i] = jl[j]; jl[j] = i; /* update jl */
914: }
915: i = nexti;
916: }
918: /* save nonzero entries in k-th row of U ... */
920: /* invert diagonal block */
921: diag = ba+k*4;
922: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
923: Kernel_A_gets_inverse_A_2(diag);
924:
925: jmin = bi[k]; jmax = bi[k+1];
926: if (jmin < jmax) {
927: for (j=jmin; j<jmax; j++){
928: vj = bj[j]; /* block col. index of U */
929: u = ba + j*4;
930: rtmp_ptr = rtmp + vj*4;
931: for (k1=0; k1<4; k1++){
932: *u++ = *rtmp_ptr;
933: *rtmp_ptr++ = 0.0;
934: }
935: }
936:
937: /* ... add k to row list for first nonzero entry in k-th row */
938: il[k] = jmin;
939: i = bj[jmin];
940: jl[k] = jl[i]; jl[i] = k;
941: }
942: }
944: PetscFree(rtmp);
945: PetscFree(il);
946: PetscFree(dk);
947: if (a->permute) {
948: PetscFree(aa);
949: }
950: ISRestoreIndices(perm,&perm_ptr);
951: C->factor = FACTOR_CHOLESKY;
952: C->assembled = PETSC_TRUE;
953: C->preallocated = PETSC_TRUE;
954: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
955: return(0);
956: }
958: /*
959: Version for when blocks are 2 by 2 Using natural ordering
960: */
963: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
964: {
965: Mat C = *B;
966: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ *)C->data;
968: PetscInt i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
969: PetscInt *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
970: MatScalar *ba = b->a,*aa,*ap,*dk,*uik;
971: MatScalar *u,*diag,*rtmp,*rtmp_ptr;
974: /* initialization */
975: /* il and jl record the first nonzero element in each row of the accessing
976: window U(0:k, k:mbs-1).
977: jl: list of rows to be added to uneliminated rows
978: i>= k: jl(i) is the first row to be added to row i
979: i< k: jl(i) is the row following row i in some list of rows
980: jl(i) = mbs indicates the end of a list
981: il(i): points to the first nonzero element in columns k,...,mbs-1 of
982: row i of U */
983: PetscMalloc(4*mbs*sizeof(MatScalar),&rtmp);
984: PetscMemzero(rtmp,4*mbs*sizeof(MatScalar));
985: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
986: jl = il + mbs;
987: for (i=0; i<mbs; i++) {
988: jl[i] = mbs; il[0] = 0;
989: }
990: PetscMalloc(8*sizeof(MatScalar),&dk);
991: uik = dk + 4;
992:
993: ai = a->i; aj = a->j; aa = a->a;
995: /* for each row k */
996: for (k = 0; k<mbs; k++){
998: /*initialize k-th row with elements nonzero in row k of A */
999: jmin = ai[k]; jmax = ai[k+1];
1000: ap = aa + jmin*4;
1001: for (j = jmin; j < jmax; j++){
1002: vj = aj[j]; /* block col. index */
1003: rtmp_ptr = rtmp + vj*4;
1004: for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1005: }
1006:
1007: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1008: PetscMemcpy(dk,rtmp+k*4,4*sizeof(MatScalar));
1009: i = jl[k]; /* first row to be added to k_th row */
1011: while (i < k){
1012: nexti = jl[i]; /* next row to be added to k_th row */
1014: /* compute multiplier */
1015: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1017: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1018: diag = ba + i*4;
1019: u = ba + ili*4;
1020: uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1021: uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1022: uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1023: uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);
1024:
1025: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1026: dk[0] += uik[0]*u[0] + uik[1]*u[1];
1027: dk[1] += uik[2]*u[0] + uik[3]*u[1];
1028: dk[2] += uik[0]*u[2] + uik[1]*u[3];
1029: dk[3] += uik[2]*u[2] + uik[3]*u[3];
1031: PetscLogFlops(16*2);
1033: /* update -U(i,k): ba[ili] = uik */
1034: PetscMemcpy(ba+ili*4,uik,4*sizeof(MatScalar));
1036: /* add multiple of row i to k-th row ... */
1037: jmin = ili + 1; jmax = bi[i+1];
1038: if (jmin < jmax){
1039: for (j=jmin; j<jmax; j++) {
1040: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1041: rtmp_ptr = rtmp + bj[j]*4;
1042: u = ba + j*4;
1043: rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1044: rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1045: rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1046: rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1047: }
1048: PetscLogFlops(16*(jmax-jmin));
1050: /* ... add i to row list for next nonzero entry */
1051: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1052: j = bj[jmin];
1053: jl[i] = jl[j]; jl[j] = i; /* update jl */
1054: }
1055: i = nexti;
1056: }
1058: /* save nonzero entries in k-th row of U ... */
1060: /* invert diagonal block */
1061: diag = ba+k*4;
1062: PetscMemcpy(diag,dk,4*sizeof(MatScalar));
1063: Kernel_A_gets_inverse_A_2(diag);
1064:
1065: jmin = bi[k]; jmax = bi[k+1];
1066: if (jmin < jmax) {
1067: for (j=jmin; j<jmax; j++){
1068: vj = bj[j]; /* block col. index of U */
1069: u = ba + j*4;
1070: rtmp_ptr = rtmp + vj*4;
1071: for (k1=0; k1<4; k1++){
1072: *u++ = *rtmp_ptr;
1073: *rtmp_ptr++ = 0.0;
1074: }
1075: }
1076:
1077: /* ... add k to row list for first nonzero entry in k-th row */
1078: il[k] = jmin;
1079: i = bj[jmin];
1080: jl[k] = jl[i]; jl[i] = k;
1081: }
1082: }
1084: PetscFree(rtmp);
1085: PetscFree(il);
1086: PetscFree(dk);
1088: C->factor = FACTOR_CHOLESKY;
1089: C->assembled = PETSC_TRUE;
1090: C->preallocated = PETSC_TRUE;
1091: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1092: return(0);
1093: }
1095: /*
1096: Numeric U^T*D*U factorization for SBAIJ format.
1097: Version for blocks are 1 by 1.
1098: */
1101: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat A,MatFactorInfo *info,Mat *B)
1102: {
1103: Mat C = *B;
1104: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1105: IS ip=b->row;
1107: PetscInt *rip,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1108: PetscInt *ai,*aj,*a2anew;
1109: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1110: MatScalar *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1111: PetscReal zeropivot,rs,shiftnz;
1112: PetscReal shiftpd;
1113: ChShift_Ctx sctx;
1114: PetscInt newshift;
1117: /* initialization */
1118: shiftnz = info->shiftnz;
1119: shiftpd = info->shiftpd;
1120: zeropivot = info->zeropivot;
1122: ISGetIndices(ip,&rip);
1123: if (!a->permute){
1124: ai = a->i; aj = a->j; aa = a->a;
1125: } else {
1126: ai = a->inew; aj = a->jnew;
1127: nz = ai[mbs];
1128: PetscMalloc(nz*sizeof(MatScalar),&aa);
1129: a2anew = a->a2anew;
1130: bval = a->a;
1131: for (j=0; j<nz; j++){
1132: aa[a2anew[j]] = *(bval++);
1133: }
1134: }
1135:
1136: /* initialization */
1137: /* il and jl record the first nonzero element in each row of the accessing
1138: window U(0:k, k:mbs-1).
1139: jl: list of rows to be added to uneliminated rows
1140: i>= k: jl(i) is the first row to be added to row i
1141: i< k: jl(i) is the row following row i in some list of rows
1142: jl(i) = mbs indicates the end of a list
1143: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1144: row i of U */
1145: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1146: PetscMalloc(nz,&il);
1147: jl = il + mbs;
1148: rtmp = (MatScalar*)(jl + mbs);
1150: sctx.shift_amount = 0;
1151: sctx.nshift = 0;
1152: do {
1153: sctx.chshift = PETSC_FALSE;
1154: for (i=0; i<mbs; i++) {
1155: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1156: }
1157:
1158: for (k = 0; k<mbs; k++){
1159: /*initialize k-th row by the perm[k]-th row of A */
1160: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1161: bval = ba + bi[k];
1162: for (j = jmin; j < jmax; j++){
1163: col = rip[aj[j]];
1164: rtmp[col] = aa[j];
1165: *bval++ = 0.0; /* for in-place factorization */
1166: }
1168: /* shift the diagonal of the matrix */
1169: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1171: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1172: dk = rtmp[k];
1173: i = jl[k]; /* first row to be added to k_th row */
1175: while (i < k){
1176: nexti = jl[i]; /* next row to be added to k_th row */
1178: /* compute multiplier, update diag(k) and U(i,k) */
1179: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1180: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1181: dk += uikdi*ba[ili];
1182: ba[ili] = uikdi; /* -U(i,k) */
1184: /* add multiple of row i to k-th row */
1185: jmin = ili + 1; jmax = bi[i+1];
1186: if (jmin < jmax){
1187: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1188: PetscLogFlops(2*(jmax-jmin));
1190: /* update il and jl for row i */
1191: il[i] = jmin;
1192: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1193: }
1194: i = nexti;
1195: }
1197: /* shift the diagonals when zero pivot is detected */
1198: /* compute rs=sum of abs(off-diagonal) */
1199: rs = 0.0;
1200: jmin = bi[k]+1;
1201: nz = bi[k+1] - jmin;
1202: if (nz){
1203: bcol = bj + jmin;
1204: while (nz--){
1205: rs += PetscAbsScalar(rtmp[*bcol]);
1206: bcol++;
1207: }
1208: }
1210: sctx.rs = rs;
1211: sctx.pv = dk;
1212: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1213: if (newshift == 1) break; /* sctx.shift_amount is updated */
1214:
1215: /* copy data into U(k,:) */
1216: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1217: jmin = bi[k]+1; jmax = bi[k+1];
1218: if (jmin < jmax) {
1219: for (j=jmin; j<jmax; j++){
1220: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1221: }
1222: /* add the k-th row into il and jl */
1223: il[k] = jmin;
1224: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1225: }
1226: }
1227: } while (sctx.chshift);
1228: PetscFree(il);
1229: if (a->permute){PetscFree(aa);}
1231: ISRestoreIndices(ip,&rip);
1232: C->factor = FACTOR_CHOLESKY;
1233: C->assembled = PETSC_TRUE;
1234: C->preallocated = PETSC_TRUE;
1235: PetscLogFlops(C->rmap.N);
1236: if (sctx.nshift){
1237: if (shiftnz) {
1238: PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1239: } else if (shiftpd) {
1240: PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1241: }
1242: }
1243: return(0);
1244: }
1246: /*
1247: Version for when blocks are 1 by 1 Using natural ordering
1248: */
1251: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat A,MatFactorInfo *info,Mat *B)
1252: {
1253: Mat C = *B;
1254: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ *)C->data;
1256: PetscInt i,j,mbs = a->mbs;
1257: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1258: PetscInt k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1259: MatScalar *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1260: PetscReal zeropivot,rs,shiftnz;
1261: PetscReal shiftpd;
1262: ChShift_Ctx sctx;
1263: PetscInt newshift;
1266: /* initialization */
1267: shiftnz = info->shiftnz;
1268: shiftpd = info->shiftpd;
1269: zeropivot = info->zeropivot;
1271: /* il and jl record the first nonzero element in each row of the accessing
1272: window U(0:k, k:mbs-1).
1273: jl: list of rows to be added to uneliminated rows
1274: i>= k: jl(i) is the first row to be added to row i
1275: i< k: jl(i) is the row following row i in some list of rows
1276: jl(i) = mbs indicates the end of a list
1277: il(i): points to the first nonzero element in U(i,k:mbs-1)
1278: */
1279: PetscMalloc(mbs*sizeof(MatScalar),&rtmp);
1280: PetscMalloc(2*mbs*sizeof(PetscInt),&il);
1281: jl = il + mbs;
1283: sctx.shift_amount = 0;
1284: sctx.nshift = 0;
1285: do {
1286: sctx.chshift = PETSC_FALSE;
1287: for (i=0; i<mbs; i++) {
1288: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1289: }
1291: for (k = 0; k<mbs; k++){
1292: /*initialize k-th row with elements nonzero in row perm(k) of A */
1293: nz = ai[k+1] - ai[k];
1294: acol = aj + ai[k];
1295: aval = aa + ai[k];
1296: bval = ba + bi[k];
1297: while (nz -- ){
1298: rtmp[*acol++] = *aval++;
1299: *bval++ = 0.0; /* for in-place factorization */
1300: }
1302: /* shift the diagonal of the matrix */
1303: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1304:
1305: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1306: dk = rtmp[k];
1307: i = jl[k]; /* first row to be added to k_th row */
1309: while (i < k){
1310: nexti = jl[i]; /* next row to be added to k_th row */
1311: /* compute multiplier, update D(k) and U(i,k) */
1312: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1313: uikdi = - ba[ili]*ba[bi[i]];
1314: dk += uikdi*ba[ili];
1315: ba[ili] = uikdi; /* -U(i,k) */
1317: /* add multiple of row i to k-th row ... */
1318: jmin = ili + 1;
1319: nz = bi[i+1] - jmin;
1320: if (nz > 0){
1321: bcol = bj + jmin;
1322: bval = ba + jmin;
1323: PetscLogFlops(2*nz);
1324: while (nz --) rtmp[*bcol++] += uikdi*(*bval++);
1325:
1326: /* update il and jl for i-th row */
1327: il[i] = jmin;
1328: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1329: }
1330: i = nexti;
1331: }
1333: /* shift the diagonals when zero pivot is detected */
1334: /* compute rs=sum of abs(off-diagonal) */
1335: rs = 0.0;
1336: jmin = bi[k]+1;
1337: nz = bi[k+1] - jmin;
1338: if (nz){
1339: bcol = bj + jmin;
1340: while (nz--){
1341: rs += PetscAbsScalar(rtmp[*bcol]);
1342: bcol++;
1343: }
1344: }
1346: sctx.rs = rs;
1347: sctx.pv = dk;
1348: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1349: if (newshift == 1) break; /* sctx.shift_amount is updated */
1350:
1351: /* copy data into U(k,:) */
1352: ba[bi[k]] = 1.0/dk;
1353: jmin = bi[k]+1;
1354: nz = bi[k+1] - jmin;
1355: if (nz){
1356: bcol = bj + jmin;
1357: bval = ba + jmin;
1358: while (nz--){
1359: *bval++ = rtmp[*bcol];
1360: rtmp[*bcol++] = 0.0;
1361: }
1362: /* add k-th row into il and jl */
1363: il[k] = jmin;
1364: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1365: }
1366: } /* end of for (k = 0; k<mbs; k++) */
1367: } while (sctx.chshift);
1368: PetscFree(rtmp);
1369: PetscFree(il);
1370:
1371: C->factor = FACTOR_CHOLESKY;
1372: C->assembled = PETSC_TRUE;
1373: C->preallocated = PETSC_TRUE;
1374: PetscLogFlops(C->rmap.N);
1375: if (sctx.nshift){
1376: if (shiftnz) {
1377: PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1378: } else if (shiftpd) {
1379: PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1380: }
1381: }
1382: return(0);
1383: }
1387: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info)
1388: {
1390: Mat C;
1393: MatCholeskyFactorSymbolic(A,perm,info,&C);
1394: MatCholeskyFactorNumeric(A,info,&C);
1395: MatHeaderCopy(A,C);
1396: return(0);
1397: }