Actual source code: sbaijfact2.c
1: /*$Id: sbaijfact2.c,v 1.2.1.41 2001/08/07 03:03:01 balay Exp $*/
2: /*
3: Factorization code for SBAIJ format.
4: */
6: #include src/mat/impls/sbaij/seq/sbaij.h
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/ilu.h
10: #include src/inline/dot.h
12: int MatSolveTranspose_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
13: {
15: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
16: }
18: int MatSolveTranspose_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
19: {
21: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
22: }
24: int MatSolveTranspose_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
25: {
27: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
28: }
30: int MatSolveTranspose_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
31: {
33: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
34: }
36: int MatSolveTranspose_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
37: {
39: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
40: }
42: int MatSolveTranspose_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
43: {
45: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
46: }
48: int MatSolveTranspose_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
49: {
51: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
52: }
54: int MatSolveTranspose_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
55: {
57: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
58: }
60: int MatSolveTranspose_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
61: {
63: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
64: }
66: int MatSolveTranspose_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
67: {
69: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
70: }
72: int MatSolveTranspose_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
73: {
75: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
76: }
78: int MatSolveTranspose_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
79: {
81: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
82: }
84: int MatSolveTranspose_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
85: {
87: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
88: }
90: int MatSolveTranspose_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
91: {
93: SETERRQ(1,"Function not needed for SBAIJ format. Call MatSolve().");
94: }
96: int MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
97: {
98: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
99: IS isrow=a->row;
100: int mbs=a->mbs,*ai=a->i,*aj=a->j;
101: int nz,*vj,k,ierr,*r,idx,k1;
102: int bs=a->bs,bs2 = a->bs2;
103: MatScalar *aa=a->a,*v,*diag;
104: PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t;
107: VecGetArray(bb,&b);
108: VecGetArray(xx,&x);
109: t = a->solve_work;
110: ISGetIndices(isrow,&r);
111: PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);
113: /* solve U^T * D * y = b by forward substitution */
114: xk = t;
115: for (k=0; k<mbs; k++) { /* t <- perm(b) */
116: idx = bs*r[k];
117: for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
118: }
119: for (k=0; k<mbs; k++){
120: v = aa + bs2*ai[k];
121: xk = t + k*bs; /* Dk*xk = k-th block of x */
122: PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar)); /* xk_tmp <- xk */
123: nz = ai[k+1] - ai[k];
124: vj = aj + ai[k];
125: xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */
126: while (nz--) {
127: /* x(:) += U(k,:)^T*(Dk*xk) */
128: Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
129: vj++; xj = t + (*vj)*bs;
130: v += bs2;
131: }
132: /* xk = inv(Dk)*(Dk*xk) */
133: diag = aa+k*bs2; /* ptr to inv(Dk) */
134: Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
135: }
137: /* solve U*x = y by back substitution */
138: for (k=mbs-1; k>=0; k--){
139: v = aa + bs2*ai[k];
140: xk = t + k*bs; /* xk */
141: nz = ai[k+1] - ai[k];
142: vj = aj + ai[k];
143: xj = t + (*vj)*bs;
144: while (nz--) {
145: /* xk += U(k,:)*x(:) */
146: Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
147: vj++;
148: v += bs2; xj = t + (*vj)*bs;
149: }
150: idx = bs*r[k];
151: for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
152: }
154: PetscFree(xk_tmp);
155: VecRestoreArray(bb,&b);
156: VecRestoreArray(xx,&x);
157: PetscLogFlops(bs2*(2*a->s_nz + mbs));
158: return(0);
159: }
161: int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
162: {
163: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
164: int mbs=a->mbs,*ai=a->i,*aj=a->j;
165: int nz,*vj,k,ierr;
166: int bs=a->bs,bs2 = a->bs2;
167: MatScalar *aa=a->a,*v,*diag;
168: PetscScalar *x,*xk,*xj,*b,*xk_tmp;
171:
172: VecGetArray(bb,&b);
173: VecGetArray(xx,&x);
175: PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);
177: /* solve U^T * D * y = b by forward substitution */
178: PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar)); /* x <- b */
179: for (k=0; k<mbs; k++){
180: v = aa + bs2*ai[k];
181: xk = x + k*bs; /* Dk*xk = k-th block of x */
182: PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar)); /* xk_tmp <- xk */
183: nz = ai[k+1] - ai[k];
184: vj = aj + ai[k];
185: xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */
186: while (nz--) {
187: /* x(:) += U(k,:)^T*(Dk*xk) */
188: Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
189: vj++; xj = x + (*vj)*bs;
190: v += bs2;
191: }
192: /* xk = inv(Dk)*(Dk*xk) */
193: diag = aa+k*bs2; /* ptr to inv(Dk) */
194: Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
195: }
197: /* solve U*x = y by back substitution */
198: for (k=mbs-1; k>=0; k--){
199: v = aa + bs2*ai[k];
200: xk = x + k*bs; /* xk */
201: nz = ai[k+1] - ai[k];
202: vj = aj + ai[k];
203: xj = x + (*vj)*bs;
204: while (nz--) {
205: /* xk += U(k,:)*x(:) */
206: Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
207: vj++;
208: v += bs2; xj = x + (*vj)*bs;
209: }
210: }
212: PetscFree(xk_tmp);
213: VecRestoreArray(bb,&b);
214: VecRestoreArray(xx,&x);
215: PetscLogFlops(bs2*(2*a->s_nz + mbs));
216: return(0);
217: }
219: int MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
220: {
221: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
222: IS isrow=a->row;
223: int mbs=a->mbs,*ai=a->i,*aj=a->j;
224: int nz,*vj,k,ierr,*r,idx;
225: MatScalar *aa=a->a,*v,*d;
226: PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
229: VecGetArray(bb,&b);
230: VecGetArray(xx,&x);
231: t = a->solve_work;
232: ISGetIndices(isrow,&r);
234: /* solve U^T * D * y = b by forward substitution */
235: tp = t;
236: for (k=0; k<mbs; k++) { /* t <- perm(b) */
237: idx = 7*r[k];
238: tp[0] = b[idx];
239: tp[1] = b[idx+1];
240: tp[2] = b[idx+2];
241: tp[3] = b[idx+3];
242: tp[4] = b[idx+4];
243: tp[5] = b[idx+5];
244: tp[6] = b[idx+6];
245: tp += 7;
246: }
247:
248: for (k=0; k<mbs; k++){
249: v = aa + 49*ai[k];
250: vj = aj + ai[k];
251: tp = t + k*7;
252: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
253: nz = ai[k+1] - ai[k];
254: tp = t + (*vj)*7;
255: while (nz--) {
256: tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
257: tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
258: tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
259: tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
260: tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
261: tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
262: tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
263: vj++; tp = t + (*vj)*7;
264: v += 49;
265: }
267: /* xk = inv(Dk)*(Dk*xk) */
268: d = aa+k*49; /* ptr to inv(Dk) */
269: tp = t + k*7;
270: tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
271: tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
272: tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
273: tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
274: tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
275: tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
276: tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
277: }
279: /* solve U*x = y by back substitution */
280: for (k=mbs-1; k>=0; k--){
281: v = aa + 49*ai[k];
282: vj = aj + ai[k];
283: tp = t + k*7;
284: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */
285: nz = ai[k+1] - ai[k];
286:
287: tp = t + (*vj)*7;
288: while (nz--) {
289: /* xk += U(k,:)*x(:) */
290: x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
291: x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
292: x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
293: x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
294: x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
295: x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
296: x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
297: vj++; tp = t + (*vj)*7;
298: v += 49;
299: }
300: tp = t + k*7;
301: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
302: idx = 7*r[k];
303: x[idx] = x0;
304: x[idx+1] = x1;
305: x[idx+2] = x2;
306: x[idx+3] = x3;
307: x[idx+4] = x4;
308: x[idx+5] = x5;
309: x[idx+6] = x6;
310: }
312: VecRestoreArray(bb,&b);
313: VecRestoreArray(xx,&x);
314: PetscLogFlops(49*(2*a->s_nz + mbs));
315: return(0);
316: }
318: int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
319: {
320: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
321: int mbs=a->mbs,*ai=a->i,*aj=a->j;
322: MatScalar *aa=a->a,*v,*d;
323: PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5,x6;
324: int nz,*vj,k,ierr;
327:
328: VecGetArray(bb,&b);
329: VecGetArray(xx,&x);
330:
331: /* solve U^T * D * y = b by forward substitution */
332: PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar)); /* x <- b */
333: for (k=0; k<mbs; k++){
334: v = aa + 49*ai[k];
335: xp = x + k*7;
336: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
337: nz = ai[k+1] - ai[k];
338: vj = aj + ai[k];
339: xp = x + (*vj)*7;
340: while (nz--) {
341: /* x(:) += U(k,:)^T*(Dk*xk) */
342: xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
343: xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
344: xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
345: xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
346: xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
347: xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
348: xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
349: vj++; xp = x + (*vj)*7;
350: v += 49;
351: }
352: /* xk = inv(Dk)*(Dk*xk) */
353: d = aa+k*49; /* ptr to inv(Dk) */
354: xp = x + k*7;
355: xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
356: xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
357: xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
358: xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
359: xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
360: xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
361: xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
362: }
364: /* solve U*x = y by back substitution */
365: for (k=mbs-1; k>=0; k--){
366: v = aa + 49*ai[k];
367: xp = x + k*7;
368: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
369: nz = ai[k+1] - ai[k];
370: vj = aj + ai[k];
371: xp = x + (*vj)*7;
372: while (nz--) {
373: /* xk += U(k,:)*x(:) */
374: x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
375: x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
376: x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
377: x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
378: x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
379: x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
380: x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
381: vj++;
382: v += 49; xp = x + (*vj)*7;
383: }
384: xp = x + k*7;
385: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
386: }
388: VecRestoreArray(bb,&b);
389: VecRestoreArray(xx,&x);
390: PetscLogFlops(49*(2*a->s_nz + mbs));
391: return(0);
392: }
394: int MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
395: {
396: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
397: IS isrow=a->row;
398: int mbs=a->mbs,*ai=a->i,*aj=a->j;
399: int nz,*vj,k,ierr,*r,idx;
400: MatScalar *aa=a->a,*v,*d;
401: PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
404: VecGetArray(bb,&b);
405: VecGetArray(xx,&x);
406: t = a->solve_work;
407: ISGetIndices(isrow,&r);
409: /* solve U^T * D * y = b by forward substitution */
410: tp = t;
411: for (k=0; k<mbs; k++) { /* t <- perm(b) */
412: idx = 6*r[k];
413: tp[0] = b[idx];
414: tp[1] = b[idx+1];
415: tp[2] = b[idx+2];
416: tp[3] = b[idx+3];
417: tp[4] = b[idx+4];
418: tp[5] = b[idx+5];
419: tp += 6;
420: }
421:
422: for (k=0; k<mbs; k++){
423: v = aa + 36*ai[k];
424: vj = aj + ai[k];
425: tp = t + k*6;
426: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
427: nz = ai[k+1] - ai[k];
428: tp = t + (*vj)*6;
429: while (nz--) {
430: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
431: tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
432: tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
433: tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
434: tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
435: tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
436: vj++; tp = t + (*vj)*6;
437: v += 36;
438: }
440: /* xk = inv(Dk)*(Dk*xk) */
441: d = aa+k*36; /* ptr to inv(Dk) */
442: tp = t + k*6;
443: tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
444: tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
445: tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
446: tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
447: tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
448: tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
449: }
451: /* solve U*x = y by back substitution */
452: for (k=mbs-1; k>=0; k--){
453: v = aa + 36*ai[k];
454: vj = aj + ai[k];
455: tp = t + k*6;
456: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */
457: nz = ai[k+1] - ai[k];
458:
459: tp = t + (*vj)*6;
460: while (nz--) {
461: /* xk += U(k,:)*x(:) */
462: x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
463: x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
464: x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
465: x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
466: x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
467: x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
468: vj++; tp = t + (*vj)*6;
469: v += 36;
470: }
471: tp = t + k*6;
472: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
473: idx = 6*r[k];
474: x[idx] = x0;
475: x[idx+1] = x1;
476: x[idx+2] = x2;
477: x[idx+3] = x3;
478: x[idx+4] = x4;
479: x[idx+5] = x5;
480: }
482: VecRestoreArray(bb,&b);
483: VecRestoreArray(xx,&x);
484: PetscLogFlops(36*(2*a->s_nz + mbs));
485: return(0);
486: }
488: int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
489: {
490: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
491: int mbs=a->mbs,*ai=a->i,*aj=a->j;
492: MatScalar *aa=a->a,*v,*d;
493: PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4,x5;
494: int nz,*vj,k,ierr;
497:
498: VecGetArray(bb,&b);
499: VecGetArray(xx,&x);
500:
501: /* solve U^T * D * y = b by forward substitution */
502: PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar)); /* x <- b */
503: for (k=0; k<mbs; k++){
504: v = aa + 36*ai[k];
505: xp = x + k*6;
506: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
507: nz = ai[k+1] - ai[k];
508: vj = aj + ai[k];
509: xp = x + (*vj)*6;
510: while (nz--) {
511: /* x(:) += U(k,:)^T*(Dk*xk) */
512: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
513: xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
514: xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
515: xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
516: xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
517: xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
518: vj++; xp = x + (*vj)*6;
519: v += 36;
520: }
521: /* xk = inv(Dk)*(Dk*xk) */
522: d = aa+k*36; /* ptr to inv(Dk) */
523: xp = x + k*6;
524: xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
525: xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
526: xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
527: xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
528: xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
529: xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
530: }
532: /* solve U*x = y by back substitution */
533: for (k=mbs-1; k>=0; k--){
534: v = aa + 36*ai[k];
535: xp = x + k*6;
536: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
537: nz = ai[k+1] - ai[k];
538: vj = aj + ai[k];
539: xp = x + (*vj)*6;
540: while (nz--) {
541: /* xk += U(k,:)*x(:) */
542: x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
543: x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
544: x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
545: x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
546: x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
547: x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
548: vj++;
549: v += 36; xp = x + (*vj)*6;
550: }
551: xp = x + k*6;
552: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
553: }
555: VecRestoreArray(bb,&b);
556: VecRestoreArray(xx,&x);
557: PetscLogFlops(36*(2*a->s_nz + mbs));
558: return(0);
559: }
561: int MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
562: {
563: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
564: IS isrow=a->row;
565: int mbs=a->mbs,*ai=a->i,*aj=a->j;
566: int nz,*vj,k,ierr,*r,idx;
567: MatScalar *aa=a->a,*v,*diag;
568: PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp;
571: VecGetArray(bb,&b);
572: VecGetArray(xx,&x);
573: t = a->solve_work;
574: ISGetIndices(isrow,&r);
576: /* solve U^T * D * y = b by forward substitution */
577: tp = t;
578: for (k=0; k<mbs; k++) { /* t <- perm(b) */
579: idx = 5*r[k];
580: tp[0] = b[idx];
581: tp[1] = b[idx+1];
582: tp[2] = b[idx+2];
583: tp[3] = b[idx+3];
584: tp[4] = b[idx+4];
585: tp += 5;
586: }
587:
588: for (k=0; k<mbs; k++){
589: v = aa + 25*ai[k];
590: vj = aj + ai[k];
591: tp = t + k*5;
592: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
593: nz = ai[k+1] - ai[k];
595: tp = t + (*vj)*5;
596: while (nz--) {
597: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
598: tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
599: tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
600: tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
601: tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
602: vj++; tp = t + (*vj)*5;
603: v += 25;
604: }
606: /* xk = inv(Dk)*(Dk*xk) */
607: diag = aa+k*25; /* ptr to inv(Dk) */
608: tp = t + k*5;
609: tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
610: tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
611: tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
612: tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
613: tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
614: }
616: /* solve U*x = y by back substitution */
617: for (k=mbs-1; k>=0; k--){
618: v = aa + 25*ai[k];
619: vj = aj + ai[k];
620: tp = t + k*5;
621: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
622: nz = ai[k+1] - ai[k];
623:
624: tp = t + (*vj)*5;
625: while (nz--) {
626: /* xk += U(k,:)*x(:) */
627: x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
628: x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
629: x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
630: x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
631: x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
632: vj++; tp = t + (*vj)*5;
633: v += 25;
634: }
635: tp = t + k*5;
636: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
637: idx = 5*r[k];
638: x[idx] = x0;
639: x[idx+1] = x1;
640: x[idx+2] = x2;
641: x[idx+3] = x3;
642: x[idx+4] = x4;
643: }
645: VecRestoreArray(bb,&b);
646: VecRestoreArray(xx,&x);
647: PetscLogFlops(25*(2*a->s_nz + mbs));
648: return(0);
649: }
651: int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
652: {
653: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
654: int mbs=a->mbs,*ai=a->i,*aj=a->j;
655: MatScalar *aa=a->a,*v,*diag;
656: PetscScalar *x,*xp,*b,x0,x1,x2,x3,x4;
657: int nz,*vj,k,ierr;
660:
661: VecGetArray(bb,&b);
662: VecGetArray(xx,&x);
664: /* solve U^T * D * y = b by forward substitution */
665: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
666: for (k=0; k<mbs; k++){
667: v = aa + 25*ai[k];
668: xp = x + k*5;
669: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
670: nz = ai[k+1] - ai[k];
671: vj = aj + ai[k];
672: xp = x + (*vj)*5;
673: while (nz--) {
674: /* x(:) += U(k,:)^T*(Dk*xk) */
675: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
676: xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
677: xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
678: xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
679: xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
680: vj++; xp = x + (*vj)*5;
681: v += 25;
682: }
683: /* xk = inv(Dk)*(Dk*xk) */
684: diag = aa+k*25; /* ptr to inv(Dk) */
685: xp = x + k*5;
686: xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
687: xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
688: xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
689: xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
690: xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
691: }
693: /* solve U*x = y by back substitution */
694: for (k=mbs-1; k>=0; k--){
695: v = aa + 25*ai[k];
696: xp = x + k*5;
697: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
698: nz = ai[k+1] - ai[k];
699: vj = aj + ai[k];
700: xp = x + (*vj)*5;
701: while (nz--) {
702: /* xk += U(k,:)*x(:) */
703: x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
704: x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
705: x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
706: x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
707: x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
708: vj++;
709: v += 25; xp = x + (*vj)*5;
710: }
711: xp = x + k*5;
712: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
713: }
715: VecRestoreArray(bb,&b);
716: VecRestoreArray(xx,&x);
717: PetscLogFlops(25*(2*a->s_nz + mbs));
718: return(0);
719: }
721: int MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
722: {
723: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
724: IS isrow=a->row;
725: int mbs=a->mbs,*ai=a->i,*aj=a->j;
726: int nz,*vj,k,ierr,*r,idx;
727: MatScalar *aa=a->a,*v,*diag;
728: PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp;
731: VecGetArray(bb,&b);
732: VecGetArray(xx,&x);
733: t = a->solve_work;
734: ISGetIndices(isrow,&r);
736: /* solve U^T * D * y = b by forward substitution */
737: tp = t;
738: for (k=0; k<mbs; k++) { /* t <- perm(b) */
739: idx = 4*r[k];
740: tp[0] = b[idx];
741: tp[1] = b[idx+1];
742: tp[2] = b[idx+2];
743: tp[3] = b[idx+3];
744: tp += 4;
745: }
746:
747: for (k=0; k<mbs; k++){
748: v = aa + 16*ai[k];
749: vj = aj + ai[k];
750: tp = t + k*4;
751: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
752: nz = ai[k+1] - ai[k];
754: tp = t + (*vj)*4;
755: while (nz--) {
756: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
757: tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
758: tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
759: tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
760: vj++; tp = t + (*vj)*4;
761: v += 16;
762: }
764: /* xk = inv(Dk)*(Dk*xk) */
765: diag = aa+k*16; /* ptr to inv(Dk) */
766: tp = t + k*4;
767: tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
768: tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
769: tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
770: tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
771: }
773: /* solve U*x = y by back substitution */
774: for (k=mbs-1; k>=0; k--){
775: v = aa + 16*ai[k];
776: vj = aj + ai[k];
777: tp = t + k*4;
778: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
779: nz = ai[k+1] - ai[k];
780:
781: tp = t + (*vj)*4;
782: while (nz--) {
783: /* xk += U(k,:)*x(:) */
784: x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
785: x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
786: x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
787: x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
788: vj++; tp = t + (*vj)*4;
789: v += 16;
790: }
791: tp = t + k*4;
792: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
793: idx = 4*r[k];
794: x[idx] = x0;
795: x[idx+1] = x1;
796: x[idx+2] = x2;
797: x[idx+3] = x3;
798: }
800: VecRestoreArray(bb,&b);
801: VecRestoreArray(xx,&x);
802: PetscLogFlops(16*(2*a->s_nz + mbs));
803: return(0);
804: }
806: /*
807: Special case where the matrix was factored in the natural ordering.
808: This eliminates the need for the column and row permutation.
809: */
810: int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
811: {
812: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
813: int mbs=a->mbs,*ai=a->i,*aj=a->j;
814: MatScalar *aa=a->a,*v,*diag;
815: PetscScalar *x,*xp,*b,x0,x1,x2,x3;
816: int nz,*vj,k,ierr;
819:
820: VecGetArray(bb,&b);
821: VecGetArray(xx,&x);
823: /* solve U^T * D * y = b by forward substitution */
824: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
825: for (k=0; k<mbs; k++){
826: v = aa + 16*ai[k];
827: xp = x + k*4;
828: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
829: nz = ai[k+1] - ai[k];
830: vj = aj + ai[k];
831: xp = x + (*vj)*4;
832: while (nz--) {
833: /* x(:) += U(k,:)^T*(Dk*xk) */
834: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
835: xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
836: xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
837: xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
838: vj++; xp = x + (*vj)*4;
839: v += 16;
840: }
841: /* xk = inv(Dk)*(Dk*xk) */
842: diag = aa+k*16; /* ptr to inv(Dk) */
843: xp = x + k*4;
844: xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
845: xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
846: xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
847: xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
848: }
850: /* solve U*x = y by back substitution */
851: for (k=mbs-1; k>=0; k--){
852: v = aa + 16*ai[k];
853: xp = x + k*4;
854: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
855: nz = ai[k+1] - ai[k];
856: vj = aj + ai[k];
857: xp = x + (*vj)*4;
858: while (nz--) {
859: /* xk += U(k,:)*x(:) */
860: x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
861: x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
862: x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
863: x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
864: vj++;
865: v += 16; xp = x + (*vj)*4;
866: }
867: xp = x + k*4;
868: xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
869: }
871: VecRestoreArray(bb,&b);
872: VecRestoreArray(xx,&x);
873: PetscLogFlops(16*(2*a->s_nz + mbs));
874: return(0);
875: }
877: int MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
878: {
879: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
880: IS isrow=a->row;
881: int mbs=a->mbs,*ai=a->i,*aj=a->j;
882: int nz,*vj,k,ierr,*r,idx;
883: MatScalar *aa=a->a,*v,*diag;
884: PetscScalar *x,*b,x0,x1,x2,*t,*tp;
887: VecGetArray(bb,&b);
888: VecGetArray(xx,&x);
889: t = a->solve_work;
890: ISGetIndices(isrow,&r);
892: /* solve U^T * D * y = b by forward substitution */
893: tp = t;
894: for (k=0; k<mbs; k++) { /* t <- perm(b) */
895: idx = 3*r[k];
896: tp[0] = b[idx];
897: tp[1] = b[idx+1];
898: tp[2] = b[idx+2];
899: tp += 3;
900: }
901:
902: for (k=0; k<mbs; k++){
903: v = aa + 9*ai[k];
904: vj = aj + ai[k];
905: tp = t + k*3;
906: x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
907: nz = ai[k+1] - ai[k];
909: tp = t + (*vj)*3;
910: while (nz--) {
911: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
912: tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
913: tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
914: vj++; tp = t + (*vj)*3;
915: v += 9;
916: }
918: /* xk = inv(Dk)*(Dk*xk) */
919: diag = aa+k*9; /* ptr to inv(Dk) */
920: tp = t + k*3;
921: tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
922: tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
923: tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
924: }
926: /* solve U*x = y by back substitution */
927: for (k=mbs-1; k>=0; k--){
928: v = aa + 9*ai[k];
929: vj = aj + ai[k];
930: tp = t + k*3;
931: x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */
932: nz = ai[k+1] - ai[k];
933:
934: tp = t + (*vj)*3;
935: while (nz--) {
936: /* xk += U(k,:)*x(:) */
937: x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
938: x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
939: x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
940: vj++; tp = t + (*vj)*3;
941: v += 9;
942: }
943: tp = t + k*3;
944: tp[0] = x0; tp[1] = x1; tp[2] = x2;
945: idx = 3*r[k];
946: x[idx] = x0;
947: x[idx+1] = x1;
948: x[idx+2] = x2;
949: }
951: VecRestoreArray(bb,&b);
952: VecRestoreArray(xx,&x);
953: PetscLogFlops(9*(2*a->s_nz + mbs));
954: return(0);
955: }
957: /*
958: Special case where the matrix was factored in the natural ordering.
959: This eliminates the need for the column and row permutation.
960: */
961: int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
962: {
963: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
964: int mbs=a->mbs,*ai=a->i,*aj=a->j;
965: MatScalar *aa=a->a,*v,*diag;
966: PetscScalar *x,*xp,*b,x0,x1,x2;
967: int nz,*vj,k,ierr;
970:
971: VecGetArray(bb,&b);
972: VecGetArray(xx,&x);
974: /* solve U^T * D * y = b by forward substitution */
975: PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));
976: for (k=0; k<mbs; k++){
977: v = aa + 9*ai[k];
978: xp = x + k*3;
979: x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
980: nz = ai[k+1] - ai[k];
981: vj = aj + ai[k];
982: xp = x + (*vj)*3;
983: while (nz--) {
984: /* x(:) += U(k,:)^T*(Dk*xk) */
985: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
986: xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
987: xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
988: vj++; xp = x + (*vj)*3;
989: v += 9;
990: }
991: /* xk = inv(Dk)*(Dk*xk) */
992: diag = aa+k*9; /* ptr to inv(Dk) */
993: xp = x + k*3;
994: xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
995: xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
996: xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
997: }
999: /* solve U*x = y by back substitution */
1000: for (k=mbs-1; k>=0; k--){
1001: v = aa + 9*ai[k];
1002: xp = x + k*3;
1003: x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */
1004: nz = ai[k+1] - ai[k];
1005: vj = aj + ai[k];
1006: xp = x + (*vj)*3;
1007: while (nz--) {
1008: /* xk += U(k,:)*x(:) */
1009: x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1010: x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1011: x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1012: vj++;
1013: v += 9; xp = x + (*vj)*3;
1014: }
1015: xp = x + k*3;
1016: xp[0] = x0; xp[1] = x1; xp[2] = x2;
1017: }
1019: VecRestoreArray(bb,&b);
1020: VecRestoreArray(xx,&x);
1021: PetscLogFlops(9*(2*a->s_nz + mbs));
1022: return(0);
1023: }
1025: int MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1026: {
1027: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data;
1028: IS isrow=a->row;
1029: int mbs=a->mbs,*ai=a->i,*aj=a->j;
1030: int nz,*vj,k,k2,ierr,*r,idx;
1031: MatScalar *aa=a->a,*v,*diag;
1032: PetscScalar *x,*b,x0,x1,*t;
1035: VecGetArray(bb,&b);
1036: VecGetArray(xx,&x);
1037: t = a->solve_work;
1038: /* printf("called MatSolve_SeqSBAIJ_2n"); */
1039: ISGetIndices(isrow,&r);
1041: /* solve U^T * D * y = perm(b) by forward substitution */
1042: for (k=0; k<mbs; k++) { /* t <- perm(b) */
1043: idx = 2*r[k];
1044: t[k*2] = b[idx];
1045: t[k*2+1] = b[idx+1];
1046: }
1047: for (k=0; k<mbs; k++){
1048: v = aa + 4*ai[k];
1049: vj = aj + ai[k];
1050: k2 = k*2;
1051: x0 = t[k2]; x1 = t[k2+1];
1052: nz = ai[k+1] - ai[k];
1053: while (nz--) {
1054: t[(*vj)*2] += v[0]*x0 + v[1]*x1;
1055: t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1056: vj++; v += 4;
1057: }
1058: diag = aa+k*4; /* ptr to inv(Dk) */
1059: t[k2] = diag[0]*x0 + diag[2]*x1;
1060: t[k2+1] = diag[1]*x0 + diag[3]*x1;
1061: }
1063: /* solve U*x = y by back substitution */
1064: for (k=mbs-1; k>=0; k--){
1065: v = aa + 4*ai[k];
1066: vj = aj + ai[k];
1067: k2 = k*2;
1068: x0 = t[k2]; x1 = t[k2+1];
1069: nz = ai[k+1] - ai[k];
1070: while (nz--) {
1071: x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1072: x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1073: vj++; v += 4;
1074: }
1075: t[k2] = x0;
1076: t[k2+1] = x1;
1077: idx = 2*r[k];
1078: x[idx] = x0;
1079: x[idx+1] = x1;
1080: }
1082: ISRestoreIndices(isrow,&r);
1083: VecRestoreArray(bb,&b);
1084: VecRestoreArray(xx,&x);
1085: PetscLogFlops(4*(2*a->s_nz + mbs));
1086: return(0);
1087: }
1089: /*
1090: Special case where the matrix was factored in the natural ordering.
1091: This eliminates the need for the column and row permutation.
1092: */
1093: int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1094: {
1095: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1096: int mbs=a->mbs,*ai=a->i,*aj=a->j;
1097: MatScalar *aa=a->a,*v,*diag;
1098: PetscScalar *x,*b,x0,x1;
1099: int nz,*vj,k,k2,ierr;
1102:
1103: VecGetArray(bb,&b);
1104: VecGetArray(xx,&x);
1106: /* solve U^T * D * y = b by forward substitution */
1107: PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));
1108: for (k=0; k<mbs; k++){
1109: v = aa + 4*ai[k];
1110: vj = aj + ai[k];
1111: k2 = k*2;
1112: x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */
1113: nz = ai[k+1] - ai[k];
1114:
1115: while (nz--) {
1116: /* x(:) += U(k,:)^T*(Dk*xk) */
1117: x[(*vj)*2] += v[0]*x0 + v[1]*x1;
1118: x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1119: vj++; v += 4;
1120: }
1121: /* xk = inv(Dk)*(Dk*xk) */
1122: diag = aa+k*4; /* ptr to inv(Dk) */
1123: x[k2] = diag[0]*x0 + diag[2]*x1;
1124: x[k2+1] = diag[1]*x0 + diag[3]*x1;
1125: }
1127: /* solve U*x = y by back substitution */
1128: for (k=mbs-1; k>=0; k--){
1129: v = aa + 4*ai[k];
1130: vj = aj + ai[k];
1131: k2 = k*2;
1132: x0 = x[k2]; x1 = x[k2+1]; /* xk */
1133: nz = ai[k+1] - ai[k];
1134: while (nz--) {
1135: /* xk += U(k,:)*x(:) */
1136: x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1137: x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1138: vj++; v += 4;
1139: }
1140: x[k2] = x0;
1141: x[k2+1] = x1;
1142: }
1144: VecRestoreArray(bb,&b);
1145: VecRestoreArray(xx,&x);
1146: PetscLogFlops(4*(2*a->s_nz + mbs)); /* bs2*(2*a->s_nz + mbs) */
1147: return(0);
1148: }
1150: int MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1151: {
1152: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1153: IS isrow=a->row;
1154: int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr,*rip;
1155: MatScalar *aa=a->a,*v;
1156: PetscScalar *x,*b,xk,*t;
1157: int nz,*vj,k;
1160: if (!mbs) return(0);
1162: VecGetArray(bb,&b);
1163: VecGetArray(xx,&x);
1164: t = a->solve_work;
1166: ISGetIndices(isrow,&rip);
1167:
1168: /* solve U^T*D*y = perm(b) by forward substitution */
1169: for (k=0; k<mbs; k++) t[k] = b[rip[k]];
1170: for (k=0; k<mbs; k++){
1171: v = aa + ai[k];
1172: vj = aj + ai[k];
1173: xk = t[k];
1174: nz = ai[k+1] - ai[k];
1175: while (nz--) t[*vj++] += (*v++) * xk;
1176: t[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */
1177: }
1179: /* solve U*x = y by back substitution */
1180: for (k=mbs-1; k>=0; k--){
1181: v = aa + ai[k];
1182: vj = aj + ai[k];
1183: xk = t[k];
1184: nz = ai[k+1] - ai[k];
1185: while (nz--) xk += (*v++) * t[*vj++];
1186: t[k] = xk;
1187: x[rip[k]] = xk;
1188: }
1190: ISRestoreIndices(isrow,&rip);
1191: VecRestoreArray(bb,&b);
1192: VecRestoreArray(xx,&x);
1193: PetscLogFlops(4*a->s_nz + A->m);
1194: return(0);
1195: }
1197: /*
1198: Special case where the matrix was ILU(0) factored in the natural
1199: ordering. This eliminates the need for the column and row permutation.
1200: */
1201: int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1202: {
1203: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1204: int mbs=a->mbs,*ai=a->i,*aj=a->j,ierr;
1205: MatScalar *aa=a->a,*v;
1206: PetscScalar *x,*b,xk;
1207: int nz,*vj,k;
1210:
1211: VecGetArray(bb,&b);
1212: VecGetArray(xx,&x);
1213:
1214: /* solve U^T*D*y = b by forward substitution */
1215: PetscMemcpy(x,b,mbs*sizeof(PetscScalar));
1216: for (k=0; k<mbs; k++){
1217: v = aa + ai[k];
1218: vj = aj + ai[k];
1219: xk = x[k];
1220: nz = ai[k+1] - ai[k];
1221: while (nz--) x[*vj++] += (*v++) * xk;
1222: x[k] = xk*aa[k]; /* note: aa[k] = 1/D(k) */
1223: }
1225: /* solve U*x = y by back substitution */
1226: for (k=mbs-2; k>=0; k--){
1227: v = aa + ai[k];
1228: vj = aj + ai[k];
1229: xk = x[k];
1230: nz = ai[k+1] - ai[k];
1231: while (nz--) xk += (*v++) * x[*vj++];
1232: x[k] = xk;
1233: }
1235: VecRestoreArray(bb,&b);
1236: VecRestoreArray(xx,&x);
1237: PetscLogFlops(4*a->s_nz + A->m);
1238: return(0);
1239: }
1241: int MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,PetscReal f,int levels,Mat *B)
1242: {
1243: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
1244: int *rip,ierr,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1245: int *jutmp,bs = a->bs,bs2=a->bs2;
1246: int m,nzi,realloc = 0,*levtmp;
1247: int *jl,*q,jumin,jmin,jmax,juptr,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1248: int incrlev,*lev,lev_ik,shift;
1249: PetscTruth perm_identity;
1252:
1253: /* check whether perm is the identity mapping */
1254: ISIdentity(perm,&perm_identity);
1255: if (!perm_identity) a->permute = PETSC_TRUE;
1256: if (perm_identity){
1257: ai = a->i; aj = a->j;
1258: } else { /* non-trivial permutation */
1259: MatReorderingSeqSBAIJ(A, perm);
1260: ai = a->inew; aj = a->jnew;
1261: }
1262:
1263: /* initialization */
1264: /* Don't know how many column pointers are needed so estimate.
1265: Use Modified Sparse Row storage for u and ju, see Sasd pp.85 */
1266: ierr = ISGetIndices(perm,&rip);
1267: umax = (int)(f*ai[mbs] + 1);
1268: ierr = PetscMalloc(umax*sizeof(int),&lev);
1269: umax += mbs + 1;
1270: shift = mbs + 1;
1271: ierr = PetscMalloc((mbs+1)*sizeof(int),&iu);
1272: ierr = PetscMalloc(umax*sizeof(int),&ju);
1273: iu[0] = mbs+1;
1274: juptr = mbs;
1275: ierr = PetscMalloc(mbs*sizeof(int),&jl);
1276: ierr = PetscMalloc(mbs*sizeof(int),&q);
1277: ierr = PetscMalloc((mbs+1)*sizeof(int),&levtmp);
1278: for (i=0; i<mbs; i++){
1279: jl[i] = mbs; q[i] = 0;
1280: }
1282: /* for each row k */
1283: for (k=0; k<mbs; k++){
1284: nzk = 0;
1285: q[k] = mbs;
1286: /* initialize nonzero structure of k-th row to row rip[k] of A */
1287: jmin = ai[rip[k]];
1288: jmax = ai[rip[k]+1];
1289: for (j=jmin; j<jmax; j++){
1290: vj = rip[aj[j]];
1291: if (vj > k){
1292: qm = k;
1293: do {
1294: m = qm; qm = q[m];
1295: } while(qm < vj);
1296: if (qm == vj) {
1297: printf(" error: duplicate entry in An"); break;
1298: }
1299: nzk++;
1300: q[m] = vj;
1301: q[vj] = qm;
1302: levtmp[vj] = 0; /* initialize lev for nonzero element */
1303: }
1304: }
1306: /* modify nonzero structure of k-th row by computing fill-in
1307: for each row i to be merged in */
1308: i = k;
1309: i = jl[i]; /* next pivot row (== 0 for symbolic factorization) */
1310:
1311: while (i < mbs){
1312: /* merge row i into k-th row */
1313: j=iu[i];
1314: lev_ik = lev[j-shift];
1315: nzi = iu[i+1] - (iu[i]+1);
1316: jmin = iu[i] + 1; jmax = iu[i] + nzi;
1317: qm = k;
1318: for (j=jmin; j<jmax+1; j++){
1319: vj = ju[j];
1320: incrlev = lev[j-shift]+lev_ik+1;
1322: if (incrlev > levels) continue;
1323: do {
1324: m = qm; qm = q[m];
1325: } while (qm < vj);
1326: if (qm != vj){ /* a fill */
1327: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1328: levtmp[vj] = incrlev;
1329: }
1330: else { /* already a nonzero element */
1331: if (levtmp[vj]>incrlev) levtmp[vj] = incrlev;
1332: }
1333: }
1334: i = jl[i]; /* next pivot row */
1335: }
1336:
1337: /* add k to row list for first nonzero element in k-th row */
1338: if (nzk > 1){
1339: i = q[k]; /* col value of first nonzero element in k_th row of U */
1340: jl[k] = jl[i]; jl[i] = k;
1341: }
1342: iu[k+1] = iu[k] + nzk;
1344: /* allocate more space to ju and lev if needed */
1345: if (iu[k+1] > umax) {
1346: printf("allocate more space, iu[%d]=%d > umax=%dn",k+1, iu[k+1],umax);
1347: /* estimate how much additional space we will need */
1348: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1349: /* just double the memory each time */
1350: maxadd = umax;
1351: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1352: umax += maxadd;
1354: /* allocate a longer ju */
1355: PetscMalloc(umax*sizeof(int),&jutmp);
1356: PetscMemcpy(jutmp,ju,iu[k]*sizeof(int));
1357: PetscFree(ju);
1358: ju = jutmp;
1360: ierr = PetscMalloc(umax*sizeof(int),&jutmp);
1361: ierr = PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(int));
1362: ierr = PetscFree(lev);
1363: lev = jutmp;
1364: realloc += 2; /* count how many times we realloc */
1365: }
1367: /* save nonzero structure of k-th row in ju */
1368: i=k;
1369: jumin = juptr + 1; juptr += nzk;
1370: for (j=jumin; j<juptr+1; j++){
1371: i = q[i];
1372: ju[j] = i;
1373: lev[j-shift] = levtmp[i];
1374: }
1375: }
1376:
1377: if (ai[mbs] != 0) {
1378: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1379: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Reallocs %d Fill ratio:given %g needed %gn",realloc,f,af);
1380: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Run with -pc_lu_fill %g or use n",af);
1381: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:PCLUSetFill(pc,%g);n",af);
1382: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:for best performance.n");
1383: } else {
1384: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Empty matrix.n");
1385: }
1387: ISRestoreIndices(perm,&rip);
1388: PetscFree(q);
1389: PetscFree(jl);
1390: PetscFree(lev);
1391: PetscFree(levtmp);
1393: /* put together the new matrix */
1394: MatCreateSeqSBAIJ(A->comm,bs,bs*mbs,bs*mbs,0,PETSC_NULL,B);
1395: /* PetscLogObjectParent(*B,iperm); */
1396: b = (Mat_SeqSBAIJ*)(*B)->data;
1397: PetscFree(b->imax);
1398: b->singlemalloc = PETSC_FALSE;
1399: /* the next line frees the default space generated by the Create() */
1400: PetscFree(b->a);
1401: PetscFree(b->ilen);
1402: ierr = PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
1403: b->j = ju;
1404: b->i = iu;
1405: b->diag = 0;
1406: b->ilen = 0;
1407: b->imax = 0;
1408:
1409: if (b->row) {
1410: ISDestroy(b->row);
1411: }
1412: if (b->icol) {
1413: ISDestroy(b->icol);
1414: }
1415: b->row = perm;
1416: b->icol = perm;
1417: ierr = PetscObjectReference((PetscObject)perm);
1418: ierr = PetscObjectReference((PetscObject)perm);
1419: ierr = PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
1420: /* In b structure: Free imax, ilen, old a, old j.
1421: Allocate idnew, solve_work, new a, new j */
1422: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(int)+sizeof(MatScalar)));
1423: b->s_maxnz = b->s_nz = iu[mbs];
1424:
1425: (*B)->factor = FACTOR_CHOLESKY;
1426: (*B)->info.factor_mallocs = realloc;
1427: (*B)->info.fill_ratio_given = f;
1428: if (ai[mbs] != 0) {
1429: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
1430: } else {
1431: (*B)->info.fill_ratio_needed = 0.0;
1432: }
1434: if (perm_identity){
1435: switch (bs) {
1436: case 1:
1437: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
1438: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1439: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=1n");
1440: break;
1441: case 2:
1442: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
1443: (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
1444: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2n");
1445: break;
1446: case 3:
1447: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
1448: (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
1449: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:sing special in-place natural ordering factor and solve BS=3n");
1450: break;
1451: case 4:
1452: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
1453: (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
1454: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4n");
1455: break;
1456: case 5:
1457: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
1458: (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
1459: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5n");
1460: break;
1461: case 6:
1462: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
1463: (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
1464: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6n");
1465: break;
1466: case 7:
1467: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
1468: (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
1469: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7n");
1470: break;
1471: default:
1472: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
1473: (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
1474: PetscLogInfo(A,"MatICCFactorSymbolic_SeqSBAIJ:Using special in-place natural ordering factor and solve BS>7n");
1475: break;
1476: }
1477: }
1479: return(0);
1480: }