Actual source code: pbjacobi.c
1: /*$Id: pbjacobi.c,v 1.4 2001/08/07 03:03:42 balay Exp $*/
3: /*
4: Include files needed for the PBJacobi preconditioner:
5: pcimpl.h - private include file intended for use by all preconditioners
6: */
8: #include src/sles/pc/pcimpl.h
10: /*
11: Private context (data structure) for the PBJacobi preconditioner.
12: */
13: typedef struct {
14: PetscScalar *diag;
15: int bs,mbs;
16: } PC_PBJacobi;
18: /*
19: Currently only implemented for baij matrices and directly access baij
20: data structures.
21: */
22: #include src/mat/impls/baij/mpi/mpibaij.h
23: #include src/inline/ilu.h
25: static int PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
26: {
27: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
28: int ierr,i,m = jac->mbs;
29: PetscScalar *diag = jac->diag,x0,x1,*xx,*yy;
30:
32: VecGetArray(x,&xx);
33: VecGetArray(y,&yy);
34: for (i=0; i<m; i++) {
35: x0 = xx[2*i]; x1 = xx[2*i+1];
36: yy[2*i] = diag[0]*x0 + diag[2]*x1;
37: yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
38: diag += 4;
39: }
40: VecRestoreArray(x,&xx);
41: VecRestoreArray(y,&yy);
42: PetscLogFlops(6*m);
43: return(0);
44: }
45: static int PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
46: {
47: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
48: int ierr,i,m = jac->mbs;
49: PetscScalar *diag = jac->diag,x0,x1,x2,*xx,*yy;
50:
52: VecGetArray(x,&xx);
53: VecGetArray(y,&yy);
54: for (i=0; i<m; i++) {
55: x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];
56: yy[3*i] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
57: yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
58: yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
59: diag += 9;
60: }
61: VecRestoreArray(x,&xx);
62: VecRestoreArray(y,&yy);
63: PetscLogFlops(15*m);
64: return(0);
65: }
66: static int PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
67: {
68: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
69: int ierr,i,m = jac->mbs;
70: PetscScalar *diag = jac->diag,x0,x1,x2,x3,*xx,*yy;
71:
73: VecGetArray(x,&xx);
74: VecGetArray(y,&yy);
75: for (i=0; i<m; i++) {
76: x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];
77: yy[4*i] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
78: yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
79: yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
80: yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
81: diag += 16;
82: }
83: VecRestoreArray(x,&xx);
84: VecRestoreArray(y,&yy);
85: PetscLogFlops(28*m);
86: return(0);
87: }
88: static int PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
89: {
90: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
91: int ierr,i,m = jac->mbs;
92: PetscScalar *diag = jac->diag,x0,x1,x2,x3,x4,*xx,*yy;
93:
95: VecGetArray(x,&xx);
96: VecGetArray(y,&yy);
97: for (i=0; i<m; i++) {
98: x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];
99: yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
100: yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
101: yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
102: yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
103: yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
104: diag += 25;
105: }
106: VecRestoreArray(x,&xx);
107: VecRestoreArray(y,&yy);
108: PetscLogFlops(45*m);
109: return(0);
110: }
111: /* -------------------------------------------------------------------------- */
112: static int PCSetUp_PBJacobi(PC pc)
113: {
114: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
115: int ierr,i,*diag_offset,bs2;
116: PetscTruth seqbaij,mpibaij;
117: Mat A = pc->pmat;
118: PetscScalar *diag,*odiag,*v;
119: Mat_SeqBAIJ *a;
122: PetscTypeCompare((PetscObject)pc->pmat,MATSEQBAIJ,&seqbaij);
123: PetscTypeCompare((PetscObject)pc->pmat,MATMPIBAIJ,&mpibaij);
124: if (!seqbaij && !mpibaij) {
125: SETERRQ(1,"Currently only supports BAIJ matrices");
126: }
127: if (mpibaij) A = ((Mat_MPIBAIJ*)A->data)->A;
128: if (A->m != A->n) SETERRQ(1,"Supported only for square matrices and square storage");
129: ierr = MatMarkDiagonal_SeqBAIJ(A);
130: a = (Mat_SeqBAIJ*)A->data;
131: bs2 = a->bs*a->bs;
132: diag_offset = a->diag;
133: v = a->a;
134: ierr = PetscMalloc((bs2*a->mbs+1)*sizeof(PetscScalar),&diag);
135: PetscLogObjectMemory(pc,bs2*a->mbs*sizeof(PetscScalar));
136: jac->diag = diag;
137: jac->bs = a->bs;
138: jac->mbs = a->mbs;
140: /* factor and invert each block */
141: switch (a->bs){
142: case 2:
143: for (i=0; i<a->mbs; i++) {
144: odiag = v + 4*diag_offset[i];
145: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
146: ierr = Kernel_A_gets_inverse_A_2(diag);
147: diag += 4;
148: }
149: pc->ops->apply = PCApply_PBJacobi_2;
150: break;
151: case 3:
152: for (i=0; i<a->mbs; i++) {
153: odiag = v + 9*diag_offset[i];
154: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
155: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
156: diag[8] = odiag[8]; diag[9] = odiag[9];
157: ierr = Kernel_A_gets_inverse_A_3(diag);
158: diag += 9;
159: }
160: pc->ops->apply = PCApply_PBJacobi_3;
161: break;
162: case 4:
163: for (i=0; i<a->mbs; i++) {
164: odiag = v + 16*diag_offset[i];
165: ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
166: ierr = Kernel_A_gets_inverse_A_4(diag);
167: diag += 16;
168: }
169: pc->ops->apply = PCApply_PBJacobi_4;
170: break;
171: case 5:
172: for (i=0; i<a->mbs; i++) {
173: odiag = v + 25*diag_offset[i];
174: ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
175: ierr = Kernel_A_gets_inverse_A_5(diag);
176: diag += 25;
177: }
178: pc->ops->apply = PCApply_PBJacobi_5;
179: break;
180: default:
181: SETERRQ1(1,"not supported for block size %d",a->bs);
182: }
184: return(0);
185: }
186: /* -------------------------------------------------------------------------- */
187: static int PCDestroy_PBJacobi(PC pc)
188: {
189: PC_PBJacobi *jac = (PC_PBJacobi*)pc->data;
190: int ierr;
193: if (jac->diag) {PetscFree(jac->diag);}
195: /*
196: Free the private data structure that was hanging off the PC
197: */
198: PetscFree(jac);
199: return(0);
200: }
201: /* -------------------------------------------------------------------------- */
202: EXTERN_C_BEGIN
203: int PCCreate_PBJacobi(PC pc)
204: {
205: PC_PBJacobi *jac;
206: int ierr;
210: /*
211: Creates the private data structure for this preconditioner and
212: attach it to the PC object.
213: */
214: ierr = PetscNew(PC_PBJacobi,&jac);
215: pc->data = (void*)jac;
217: /*
218: Logs the memory usage; this is not needed but allows PETSc to
219: monitor how much memory is being used for various purposes.
220: */
221: PetscLogObjectMemory(pc,sizeof(PC_PBJacobi));
223: /*
224: Initialize the pointers to vectors to ZERO; these will be used to store
225: diagonal entries of the matrix for fast preconditioner application.
226: */
227: jac->diag = 0;
229: /*
230: Set the pointers for the functions that are provided above.
231: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
232: are called, they will automatically call these functions. Note we
233: choose not to provide a couple of these functions since they are
234: not needed.
235: */
236: pc->ops->apply = 0; /*set depending on the block size */
237: pc->ops->applytranspose = 0;
238: pc->ops->setup = PCSetUp_PBJacobi;
239: pc->ops->destroy = PCDestroy_PBJacobi;
240: pc->ops->setfromoptions = 0;
241: pc->ops->view = 0;
242: pc->ops->applyrichardson = 0;
243: pc->ops->applysymmetricleft = 0;
244: pc->ops->applysymmetricright = 0;
245: return(0);
246: }
247: EXTERN_C_END