Actual source code: baijfact.c
1: /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/
2: /*
3: Factorization code for BAIJ format.
4: */
5: #include src/mat/impls/baij/seq/baij.h
6: #include src/vec/vecimpl.h
7: #include src/inline/ilu.h
9: /* ------------------------------------------------------------*/
10: /*
11: Version for when blocks are 2 by 2
12: */
13: int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
14: {
15: Mat C = *B;
16: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
17: IS isrow = b->row,isicol = b->icol;
18: int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
19: int *ajtmpold,*ajtmp,nz,row;
20: int *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
21: MatScalar *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
22: MatScalar p1,p2,p3,p4;
23: MatScalar *ba = b->a,*aa = a->a;
26: ierr = ISGetIndices(isrow,&r);
27: ierr = ISGetIndices(isicol,&ic);
28: ierr = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
30: for (i=0; i<n; i++) {
31: nz = bi[i+1] - bi[i];
32: ajtmp = bj + bi[i];
33: for (j=0; j<nz; j++) {
34: x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
35: }
36: /* load in initial (unfactored row) */
37: idx = r[i];
38: nz = ai[idx+1] - ai[idx];
39: ajtmpold = aj + ai[idx];
40: v = aa + 4*ai[idx];
41: for (j=0; j<nz; j++) {
42: x = rtmp+4*ic[ajtmpold[j]];
43: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
44: v += 4;
45: }
46: row = *ajtmp++;
47: while (row < i) {
48: pc = rtmp + 4*row;
49: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
50: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
51: pv = ba + 4*diag_offset[row];
52: pj = bj + diag_offset[row] + 1;
53: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
54: pc[0] = m1 = p1*x1 + p3*x2;
55: pc[1] = m2 = p2*x1 + p4*x2;
56: pc[2] = m3 = p1*x3 + p3*x4;
57: pc[3] = m4 = p2*x3 + p4*x4;
58: nz = bi[row+1] - diag_offset[row] - 1;
59: pv += 4;
60: for (j=0; j<nz; j++) {
61: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
62: x = rtmp + 4*pj[j];
63: x[0] -= m1*x1 + m3*x2;
64: x[1] -= m2*x1 + m4*x2;
65: x[2] -= m1*x3 + m3*x4;
66: x[3] -= m2*x3 + m4*x4;
67: pv += 4;
68: }
69: PetscLogFlops(16*nz+12);
70: }
71: row = *ajtmp++;
72: }
73: /* finished row so stick it into b->a */
74: pv = ba + 4*bi[i];
75: pj = bj + bi[i];
76: nz = bi[i+1] - bi[i];
77: for (j=0; j<nz; j++) {
78: x = rtmp+4*pj[j];
79: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
80: pv += 4;
81: }
82: /* invert diagonal block */
83: w = ba + 4*diag_offset[i];
84: Kernel_A_gets_inverse_A_2(w);
85: }
87: PetscFree(rtmp);
88: ISRestoreIndices(isicol,&ic);
89: ISRestoreIndices(isrow,&r);
90: C->factor = FACTOR_LU;
91: C->assembled = PETSC_TRUE;
92: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
93: return(0);
94: }
95: /*
96: Version for when blocks are 2 by 2 Using natural ordering
97: */
98: int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
99: {
100: Mat C = *B;
101: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
102: int ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
103: int *ajtmpold,*ajtmp,nz,row;
104: int *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
105: MatScalar *pv,*v,*rtmp,*pc,*w,*x;
106: MatScalar p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
107: MatScalar *ba = b->a,*aa = a->a;
110: PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);
112: for (i=0; i<n; i++) {
113: nz = bi[i+1] - bi[i];
114: ajtmp = bj + bi[i];
115: for (j=0; j<nz; j++) {
116: x = rtmp+4*ajtmp[j];
117: x[0] = x[1] = x[2] = x[3] = 0.0;
118: }
119: /* load in initial (unfactored row) */
120: nz = ai[i+1] - ai[i];
121: ajtmpold = aj + ai[i];
122: v = aa + 4*ai[i];
123: for (j=0; j<nz; j++) {
124: x = rtmp+4*ajtmpold[j];
125: x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
126: v += 4;
127: }
128: row = *ajtmp++;
129: while (row < i) {
130: pc = rtmp + 4*row;
131: p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
132: if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
133: pv = ba + 4*diag_offset[row];
134: pj = bj + diag_offset[row] + 1;
135: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
136: pc[0] = m1 = p1*x1 + p3*x2;
137: pc[1] = m2 = p2*x1 + p4*x2;
138: pc[2] = m3 = p1*x3 + p3*x4;
139: pc[3] = m4 = p2*x3 + p4*x4;
140: nz = bi[row+1] - diag_offset[row] - 1;
141: pv += 4;
142: for (j=0; j<nz; j++) {
143: x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
144: x = rtmp + 4*pj[j];
145: x[0] -= m1*x1 + m3*x2;
146: x[1] -= m2*x1 + m4*x2;
147: x[2] -= m1*x3 + m3*x4;
148: x[3] -= m2*x3 + m4*x4;
149: pv += 4;
150: }
151: PetscLogFlops(16*nz+12);
152: }
153: row = *ajtmp++;
154: }
155: /* finished row so stick it into b->a */
156: pv = ba + 4*bi[i];
157: pj = bj + bi[i];
158: nz = bi[i+1] - bi[i];
159: for (j=0; j<nz; j++) {
160: x = rtmp+4*pj[j];
161: pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
162: pv += 4;
163: }
164: /* invert diagonal block */
165: w = ba + 4*diag_offset[i];
166: Kernel_A_gets_inverse_A_2(w);
167: /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
168: }
170: PetscFree(rtmp);
171: C->factor = FACTOR_LU;
172: C->assembled = PETSC_TRUE;
173: PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
174: return(0);
175: }
177: /* ----------------------------------------------------------- */
178: /*
179: Version for when blocks are 1 by 1.
180: */
181: int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
182: {
183: Mat C = *B;
184: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
185: IS isrow = b->row,isicol = b->icol;
186: int *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
187: int *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
188: int *diag_offset = b->diag,diag,*pj;
189: MatScalar *pv,*v,*rtmp,multiplier,*pc;
190: MatScalar *ba = b->a,*aa = a->a;
193: ierr = ISGetIndices(isrow,&r);
194: ierr = ISGetIndices(isicol,&ic);
195: ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
197: for (i=0; i<n; i++) {
198: nz = bi[i+1] - bi[i];
199: ajtmp = bj + bi[i];
200: for (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;
202: /* load in initial (unfactored row) */
203: nz = ai[r[i]+1] - ai[r[i]];
204: ajtmpold = aj + ai[r[i]];
205: v = aa + ai[r[i]];
206: for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];
208: row = *ajtmp++;
209: while (row < i) {
210: pc = rtmp + row;
211: if (*pc != 0.0) {
212: pv = ba + diag_offset[row];
213: pj = bj + diag_offset[row] + 1;
214: multiplier = *pc * *pv++;
215: *pc = multiplier;
216: nz = bi[row+1] - diag_offset[row] - 1;
217: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
218: PetscLogFlops(1+2*nz);
219: }
220: row = *ajtmp++;
221: }
222: /* finished row so stick it into b->a */
223: pv = ba + bi[i];
224: pj = bj + bi[i];
225: nz = bi[i+1] - bi[i];
226: for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
227: diag = diag_offset[i] - bi[i];
228: /* check pivot entry for current row */
229: if (pv[diag] == 0.0) {
230: SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
231: }
232: pv[diag] = 1.0/pv[diag];
233: }
235: PetscFree(rtmp);
236: ISRestoreIndices(isicol,&ic);
237: ISRestoreIndices(isrow,&r);
238: C->factor = FACTOR_LU;
239: C->assembled = PETSC_TRUE;
240: PetscLogFlops(C->n);
241: return(0);
242: }
245: /* ----------------------------------------------------------- */
246: int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatLUInfo *info)
247: {
248: int ierr;
249: Mat C;
252: MatLUFactorSymbolic(A,row,col,info,&C);
253: MatLUFactorNumeric(A,&C);
254: MatHeaderCopy(A,C);
255: PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
256: return(0);
257: }