Actual source code: baijfact.c

  1: /*$Id: baijfact.c,v 1.90 2001/03/23 23:22:07 balay Exp $*/
  2: /*
  3:     Factorization code for BAIJ format. 
  4: */
 5:  #include src/mat/impls/baij/seq/baij.h
 6:  #include src/vec/vecimpl.h
 7:  #include src/inline/ilu.h

  9: /* ------------------------------------------------------------*/
 10: /*
 11:       Version for when blocks are 2 by 2
 12: */
 13: int MatLUFactorNumeric_SeqBAIJ_2(Mat A,Mat *B)
 14: {
 15:   Mat                C = *B;
 16:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
 17:   IS                 isrow = b->row,isicol = b->icol;
 18:   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
 19:   int                *ajtmpold,*ajtmp,nz,row;
 20:   int                *diag_offset=b->diag,idx,*ai=a->i,*aj=a->j,*pj;
 21:   MatScalar          *pv,*v,*rtmp,m1,m2,m3,m4,*pc,*w,*x,x1,x2,x3,x4;
 22:   MatScalar          p1,p2,p3,p4;
 23:   MatScalar          *ba = b->a,*aa = a->a;

 26:   ierr  = ISGetIndices(isrow,&r);
 27:   ierr  = ISGetIndices(isicol,&ic);
 28:   ierr  = PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);

 30:   for (i=0; i<n; i++) {
 31:     nz    = bi[i+1] - bi[i];
 32:     ajtmp = bj + bi[i];
 33:     for  (j=0; j<nz; j++) {
 34:       x = rtmp+4*ajtmp[j]; x[0] = x[1] = x[2] = x[3] = 0.0;
 35:     }
 36:     /* load in initial (unfactored row) */
 37:     idx      = r[i];
 38:     nz       = ai[idx+1] - ai[idx];
 39:     ajtmpold = aj + ai[idx];
 40:     v        = aa + 4*ai[idx];
 41:     for (j=0; j<nz; j++) {
 42:       x    = rtmp+4*ic[ajtmpold[j]];
 43:       x[0] = v[0]; x[1] = v[1]; x[2] = v[2]; x[3] = v[3];
 44:       v    += 4;
 45:     }
 46:     row = *ajtmp++;
 47:     while (row < i) {
 48:       pc = rtmp + 4*row;
 49:       p1 = pc[0]; p2 = pc[1]; p3 = pc[2]; p4 = pc[3];
 50:       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
 51:         pv = ba + 4*diag_offset[row];
 52:         pj = bj + diag_offset[row] + 1;
 53:         x1 = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
 54:         pc[0] = m1 = p1*x1 + p3*x2;
 55:         pc[1] = m2 = p2*x1 + p4*x2;
 56:         pc[2] = m3 = p1*x3 + p3*x4;
 57:         pc[3] = m4 = p2*x3 + p4*x4;
 58:         nz = bi[row+1] - diag_offset[row] - 1;
 59:         pv += 4;
 60:         for (j=0; j<nz; j++) {
 61:           x1   = pv[0]; x2 = pv[1]; x3 = pv[2]; x4 = pv[3];
 62:           x    = rtmp + 4*pj[j];
 63:           x[0] -= m1*x1 + m3*x2;
 64:           x[1] -= m2*x1 + m4*x2;
 65:           x[2] -= m1*x3 + m3*x4;
 66:           x[3] -= m2*x3 + m4*x4;
 67:           pv   += 4;
 68:         }
 69:         PetscLogFlops(16*nz+12);
 70:       }
 71:       row = *ajtmp++;
 72:     }
 73:     /* finished row so stick it into b->a */
 74:     pv = ba + 4*bi[i];
 75:     pj = bj + bi[i];
 76:     nz = bi[i+1] - bi[i];
 77:     for (j=0; j<nz; j++) {
 78:       x     = rtmp+4*pj[j];
 79:       pv[0] = x[0]; pv[1] = x[1]; pv[2] = x[2]; pv[3] = x[3];
 80:       pv   += 4;
 81:     }
 82:     /* invert diagonal block */
 83:     w = ba + 4*diag_offset[i];
 84:     Kernel_A_gets_inverse_A_2(w);
 85:   }

 87:   PetscFree(rtmp);
 88:   ISRestoreIndices(isicol,&ic);
 89:   ISRestoreIndices(isrow,&r);
 90:   C->factor = FACTOR_LU;
 91:   C->assembled = PETSC_TRUE;
 92:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
 93:   return(0);
 94: }
 95: /*
 96:       Version for when blocks are 2 by 2 Using natural ordering
 97: */
 98: int MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat A,Mat *B)
 99: {
100:   Mat                C = *B;
101:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
102:   int                ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
103:   int                *ajtmpold,*ajtmp,nz,row;
104:   int                *diag_offset = b->diag,*ai=a->i,*aj=a->j,*pj;
105:   MatScalar          *pv,*v,*rtmp,*pc,*w,*x;
106:   MatScalar          p1,p2,p3,p4,m1,m2,m3,m4,x1,x2,x3,x4;
107:   MatScalar          *ba = b->a,*aa = a->a;

110:   PetscMalloc(4*(n+1)*sizeof(MatScalar),&rtmp);

112:   for (i=0; i<n; i++) {
113:     nz    = bi[i+1] - bi[i];
114:     ajtmp = bj + bi[i];
115:     for  (j=0; j<nz; j++) {
116:       x = rtmp+4*ajtmp[j];
117:       x[0]  = x[1]  = x[2]  = x[3]  = 0.0;
118:     }
119:     /* load in initial (unfactored row) */
120:     nz       = ai[i+1] - ai[i];
121:     ajtmpold = aj + ai[i];
122:     v        = aa + 4*ai[i];
123:     for (j=0; j<nz; j++) {
124:       x    = rtmp+4*ajtmpold[j];
125:       x[0]  = v[0];  x[1]  = v[1];  x[2]  = v[2];  x[3]  = v[3];
126:       v    += 4;
127:     }
128:     row = *ajtmp++;
129:     while (row < i) {
130:       pc  = rtmp + 4*row;
131:       p1  = pc[0];  p2  = pc[1];  p3  = pc[2];  p4  = pc[3];
132:       if (p1 != 0.0 || p2 != 0.0 || p3 != 0.0 || p4 != 0.0) {
133:         pv = ba + 4*diag_offset[row];
134:         pj = bj + diag_offset[row] + 1;
135:         x1  = pv[0];  x2  = pv[1];  x3  = pv[2];  x4  = pv[3];
136:         pc[0] = m1 = p1*x1 + p3*x2;
137:         pc[1] = m2 = p2*x1 + p4*x2;
138:         pc[2] = m3 = p1*x3 + p3*x4;
139:         pc[3] = m4 = p2*x3 + p4*x4;
140:         nz = bi[row+1] - diag_offset[row] - 1;
141:         pv += 4;
142:         for (j=0; j<nz; j++) {
143:           x1   = pv[0];  x2  = pv[1];   x3 = pv[2];  x4  = pv[3];
144:           x    = rtmp + 4*pj[j];
145:           x[0] -= m1*x1 + m3*x2;
146:           x[1] -= m2*x1 + m4*x2;
147:           x[2] -= m1*x3 + m3*x4;
148:           x[3] -= m2*x3 + m4*x4;
149:           pv   += 4;
150:         }
151:         PetscLogFlops(16*nz+12);
152:       }
153:       row = *ajtmp++;
154:     }
155:     /* finished row so stick it into b->a */
156:     pv = ba + 4*bi[i];
157:     pj = bj + bi[i];
158:     nz = bi[i+1] - bi[i];
159:     for (j=0; j<nz; j++) {
160:       x      = rtmp+4*pj[j];
161:       pv[0]  = x[0];  pv[1]  = x[1];  pv[2]  = x[2];  pv[3]  = x[3];
162:       pv   += 4;
163:     }
164:     /* invert diagonal block */
165:     w = ba + 4*diag_offset[i];
166:     Kernel_A_gets_inverse_A_2(w);
167:     /*Kernel_A_gets_inverse_A(bs,w,v_pivots,v_work);*/
168:   }

170:   PetscFree(rtmp);
171:   C->factor    = FACTOR_LU;
172:   C->assembled = PETSC_TRUE;
173:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
174:   return(0);
175: }

177: /* ----------------------------------------------------------- */
178: /*
179:      Version for when blocks are 1 by 1.
180: */
181: int MatLUFactorNumeric_SeqBAIJ_1(Mat A,Mat *B)
182: {
183:   Mat                C = *B;
184:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ *)C->data;
185:   IS                 isrow = b->row,isicol = b->icol;
186:   int                *r,*ic,ierr,i,j,n = a->mbs,*bi = b->i,*bj = b->j;
187:   int                *ajtmpold,*ajtmp,nz,row,*ai = a->i,*aj = a->j;
188:   int                *diag_offset = b->diag,diag,*pj;
189:   MatScalar          *pv,*v,*rtmp,multiplier,*pc;
190:   MatScalar          *ba = b->a,*aa = a->a;

193:   ierr  = ISGetIndices(isrow,&r);
194:   ierr  = ISGetIndices(isicol,&ic);
195:   ierr  = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);

197:   for (i=0; i<n; i++) {
198:     nz    = bi[i+1] - bi[i];
199:     ajtmp = bj + bi[i];
200:     for  (j=0; j<nz; j++) rtmp[ajtmp[j]] = 0.0;

202:     /* load in initial (unfactored row) */
203:     nz       = ai[r[i]+1] - ai[r[i]];
204:     ajtmpold = aj + ai[r[i]];
205:     v        = aa + ai[r[i]];
206:     for (j=0; j<nz; j++) rtmp[ic[ajtmpold[j]]] =  v[j];

208:     row = *ajtmp++;
209:     while (row < i) {
210:       pc = rtmp + row;
211:       if (*pc != 0.0) {
212:         pv         = ba + diag_offset[row];
213:         pj         = bj + diag_offset[row] + 1;
214:         multiplier = *pc * *pv++;
215:         *pc        = multiplier;
216:         nz         = bi[row+1] - diag_offset[row] - 1;
217:         for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
218:         PetscLogFlops(1+2*nz);
219:       }
220:       row = *ajtmp++;
221:     }
222:     /* finished row so stick it into b->a */
223:     pv = ba + bi[i];
224:     pj = bj + bi[i];
225:     nz = bi[i+1] - bi[i];
226:     for (j=0; j<nz; j++) {pv[j] = rtmp[pj[j]];}
227:     diag = diag_offset[i] - bi[i];
228:     /* check pivot entry for current row */
229:     if (pv[diag] == 0.0) {
230:       SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot");
231:     }
232:     pv[diag] = 1.0/pv[diag];
233:   }

235:   PetscFree(rtmp);
236:   ISRestoreIndices(isicol,&ic);
237:   ISRestoreIndices(isrow,&r);
238:   C->factor    = FACTOR_LU;
239:   C->assembled = PETSC_TRUE;
240:   PetscLogFlops(C->n);
241:   return(0);
242: }


245: /* ----------------------------------------------------------- */
246: int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,MatLUInfo *info)
247: {
248:   int         ierr;
249:   Mat         C;

252:   MatLUFactorSymbolic(A,row,col,info,&C);
253:   MatLUFactorNumeric(A,&C);
254:   MatHeaderCopy(A,C);
255:   PetscLogObjectParent(A,((Mat_SeqBAIJ*)(A->data))->icol);
256:   return(0);
257: }