Actual source code: baij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/inline/spops.h
9: #include petscsys.h
11: #include src/inline/ilu.h
15: /*@C
16: MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
18: Collective on Mat
20: Input Parameters:
21: . mat - the matrix
23: Level: advanced
24: @*/
25: PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
26: {
27: PetscErrorCode ierr,(*f)(Mat);
31: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
34: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
35: if (f) {
36: (*f)(mat);
37: } else {
38: SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39: }
40: return(0);
41: }
46: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47: {
48: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
50: PetscInt *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51: PetscScalar *v = a->a,*odiag,*diag,*mdiag;
54: if (a->idiagvalid) return(0);
55: MatMarkDiagonal_SeqBAIJ(A);
56: diag_offset = a->diag;
57: if (!a->idiag) {
58: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
59: }
60: diag = a->idiag;
61: mdiag = a->idiag+bs*bs*mbs;
62: /* factor and invert each block */
63: switch (bs){
64: case 2:
65: for (i=0; i<mbs; i++) {
66: odiag = v + 4*diag_offset[i];
67: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69: Kernel_A_gets_inverse_A_2(diag);
70: diag += 4;
71: mdiag += 4;
72: }
73: break;
74: case 3:
75: for (i=0; i<mbs; i++) {
76: odiag = v + 9*diag_offset[i];
77: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79: diag[8] = odiag[8];
80: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82: mdiag[8] = odiag[8];
83: Kernel_A_gets_inverse_A_3(diag);
84: diag += 9;
85: mdiag += 9;
86: }
87: break;
88: case 4:
89: for (i=0; i<mbs; i++) {
90: odiag = v + 16*diag_offset[i];
91: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
92: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
93: Kernel_A_gets_inverse_A_4(diag);
94: diag += 16;
95: mdiag += 16;
96: }
97: break;
98: case 5:
99: for (i=0; i<mbs; i++) {
100: odiag = v + 25*diag_offset[i];
101: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
102: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
103: Kernel_A_gets_inverse_A_5(diag);
104: diag += 25;
105: mdiag += 25;
106: }
107: break;
108: default:
109: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110: }
111: a->idiagvalid = PETSC_TRUE;
112: return(0);
113: }
118: PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119: {
120: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
121: PetscScalar *x,x1,x2,s1,s2;
122: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
123: PetscErrorCode ierr;
124: PetscInt m = a->mbs,i,i2,nz,idx;
125: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
128: its = its*lits;
129: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
137: diag = a->diag;
138: idiag = a->idiag;
139: VecGetArray(xx,&x);
140: VecGetArray(bb,(PetscScalar**)&b);
142: if (flag & SOR_ZERO_INITIAL_GUESS) {
143: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146: i2 = 2;
147: idiag += 4;
148: for (i=1; i<m; i++) {
149: v = aa + 4*ai[i];
150: vi = aj + ai[i];
151: nz = diag[i] - ai[i];
152: s1 = b[i2]; s2 = b[i2+1];
153: while (nz--) {
154: idx = 2*(*vi++);
155: x1 = x[idx]; x2 = x[1+idx];
156: s1 -= v[0]*x1 + v[2]*x2;
157: s2 -= v[1]*x1 + v[3]*x2;
158: v += 4;
159: }
160: x[i2] = idiag[0]*s1 + idiag[2]*s2;
161: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162: idiag += 4;
163: i2 += 2;
164: }
165: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166: PetscLogFlops(4*(a->nz));
167: }
168: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170: i2 = 0;
171: mdiag = a->idiag+4*a->mbs;
172: for (i=0; i<m; i++) {
173: x1 = x[i2]; x2 = x[i2+1];
174: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
175: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176: mdiag += 4;
177: i2 += 2;
178: }
179: PetscLogFlops(6*m);
180: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
182: }
183: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184: idiag = a->idiag+4*a->mbs - 4;
185: i2 = 2*m - 2;
186: x1 = x[i2]; x2 = x[i2+1];
187: x[i2] = idiag[0]*x1 + idiag[2]*x2;
188: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189: idiag -= 4;
190: i2 -= 2;
191: for (i=m-2; i>=0; i--) {
192: v = aa + 4*(diag[i]+1);
193: vi = aj + diag[i] + 1;
194: nz = ai[i+1] - diag[i] - 1;
195: s1 = x[i2]; s2 = x[i2+1];
196: while (nz--) {
197: idx = 2*(*vi++);
198: x1 = x[idx]; x2 = x[1+idx];
199: s1 -= v[0]*x1 + v[2]*x2;
200: s2 -= v[1]*x1 + v[3]*x2;
201: v += 4;
202: }
203: x[i2] = idiag[0]*s1 + idiag[2]*s2;
204: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205: idiag -= 4;
206: i2 -= 2;
207: }
208: PetscLogFlops(4*(a->nz));
209: }
210: } else {
211: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212: }
213: VecRestoreArray(xx,&x);
214: VecRestoreArray(bb,(PetscScalar**)&b);
215: return(0);
216: }
220: PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221: {
222: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
223: PetscScalar *x,x1,x2,x3,s1,s2,s3;
224: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
225: PetscErrorCode ierr;
226: PetscInt m = a->mbs,i,i2,nz,idx;
227: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
230: its = its*lits;
231: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
239: diag = a->diag;
240: idiag = a->idiag;
241: VecGetArray(xx,&x);
242: VecGetArray(bb,(PetscScalar**)&b);
244: if (flag & SOR_ZERO_INITIAL_GUESS) {
245: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249: i2 = 3;
250: idiag += 9;
251: for (i=1; i<m; i++) {
252: v = aa + 9*ai[i];
253: vi = aj + ai[i];
254: nz = diag[i] - ai[i];
255: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256: while (nz--) {
257: idx = 3*(*vi++);
258: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262: v += 9;
263: }
264: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267: idiag += 9;
268: i2 += 3;
269: }
270: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271: PetscLogFlops(9*(a->nz));
272: }
273: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275: i2 = 0;
276: mdiag = a->idiag+9*a->mbs;
277: for (i=0; i<m; i++) {
278: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282: mdiag += 9;
283: i2 += 3;
284: }
285: PetscLogFlops(15*m);
286: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
288: }
289: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290: idiag = a->idiag+9*a->mbs - 9;
291: i2 = 3*m - 3;
292: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296: idiag -= 9;
297: i2 -= 3;
298: for (i=m-2; i>=0; i--) {
299: v = aa + 9*(diag[i]+1);
300: vi = aj + diag[i] + 1;
301: nz = ai[i+1] - diag[i] - 1;
302: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303: while (nz--) {
304: idx = 3*(*vi++);
305: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309: v += 9;
310: }
311: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314: idiag -= 9;
315: i2 -= 3;
316: }
317: PetscLogFlops(9*(a->nz));
318: }
319: } else {
320: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321: }
322: VecRestoreArray(xx,&x);
323: VecRestoreArray(bb,(PetscScalar**)&b);
324: return(0);
325: }
329: PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330: {
331: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
332: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
333: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
334: PetscErrorCode ierr;
335: PetscInt m = a->mbs,i,i2,nz,idx;
336: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
339: its = its*lits;
340: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
348: diag = a->diag;
349: idiag = a->idiag;
350: VecGetArray(xx,&x);
351: VecGetArray(bb,(PetscScalar**)&b);
353: if (flag & SOR_ZERO_INITIAL_GUESS) {
354: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
356: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
357: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359: i2 = 4;
360: idiag += 16;
361: for (i=1; i<m; i++) {
362: v = aa + 16*ai[i];
363: vi = aj + ai[i];
364: nz = diag[i] - ai[i];
365: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366: while (nz--) {
367: idx = 4*(*vi++);
368: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
370: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
371: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373: v += 16;
374: }
375: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
376: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
377: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379: idiag += 16;
380: i2 += 4;
381: }
382: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383: PetscLogFlops(16*(a->nz));
384: }
385: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387: i2 = 0;
388: mdiag = a->idiag+16*a->mbs;
389: for (i=0; i<m; i++) {
390: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
392: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
393: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395: mdiag += 16;
396: i2 += 4;
397: }
398: PetscLogFlops(28*m);
399: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
401: }
402: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403: idiag = a->idiag+16*a->mbs - 16;
404: i2 = 4*m - 4;
405: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
407: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
408: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410: idiag -= 16;
411: i2 -= 4;
412: for (i=m-2; i>=0; i--) {
413: v = aa + 16*(diag[i]+1);
414: vi = aj + diag[i] + 1;
415: nz = ai[i+1] - diag[i] - 1;
416: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417: while (nz--) {
418: idx = 4*(*vi++);
419: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
421: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
422: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424: v += 16;
425: }
426: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
427: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
428: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430: idiag -= 16;
431: i2 -= 4;
432: }
433: PetscLogFlops(16*(a->nz));
434: }
435: } else {
436: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437: }
438: VecRestoreArray(xx,&x);
439: VecRestoreArray(bb,(PetscScalar**)&b);
440: return(0);
441: }
445: PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446: {
447: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
448: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449: const PetscScalar *v,*aa = a->a, *b, *idiag,*mdiag;
450: PetscErrorCode ierr;
451: PetscInt m = a->mbs,i,i2,nz,idx;
452: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
455: its = its*lits;
456: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459: if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
464: diag = a->diag;
465: idiag = a->idiag;
466: VecGetArray(xx,&x);
467: VecGetArray(bb,(PetscScalar**)&b);
469: if (flag & SOR_ZERO_INITIAL_GUESS) {
470: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476: i2 = 5;
477: idiag += 25;
478: for (i=1; i<m; i++) {
479: v = aa + 25*ai[i];
480: vi = aj + ai[i];
481: nz = diag[i] - ai[i];
482: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483: while (nz--) {
484: idx = 5*(*vi++);
485: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491: v += 25;
492: }
493: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498: idiag += 25;
499: i2 += 5;
500: }
501: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502: PetscLogFlops(25*(a->nz));
503: }
504: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506: i2 = 0;
507: mdiag = a->idiag+25*a->mbs;
508: for (i=0; i<m; i++) {
509: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515: mdiag += 25;
516: i2 += 5;
517: }
518: PetscLogFlops(45*m);
519: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520: PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));
521: }
522: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523: idiag = a->idiag+25*a->mbs - 25;
524: i2 = 5*m - 5;
525: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531: idiag -= 25;
532: i2 -= 5;
533: for (i=m-2; i>=0; i--) {
534: v = aa + 25*(diag[i]+1);
535: vi = aj + diag[i] + 1;
536: nz = ai[i+1] - diag[i] - 1;
537: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538: while (nz--) {
539: idx = 5*(*vi++);
540: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546: v += 25;
547: }
548: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553: idiag -= 25;
554: i2 -= 5;
555: }
556: PetscLogFlops(25*(a->nz));
557: }
558: } else {
559: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560: }
561: VecRestoreArray(xx,&x);
562: VecRestoreArray(bb,(PetscScalar**)&b);
563: return(0);
564: }
566: /*
567: Special version for direct calls from Fortran (Used in PETSc-fun3d)
568: */
569: #if defined(PETSC_HAVE_FORTRAN_CAPS)
570: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572: #define matsetvaluesblocked4_ matsetvaluesblocked4
573: #endif
578: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579: {
580: Mat A = *AA;
581: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
582: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583: PetscInt *ai=a->i,*ailen=a->ilen;
584: PetscInt *aj=a->j,stepval,lastcol = -1;
585: const PetscScalar *value = v;
586: MatScalar *ap,*aa = a->a,*bap;
589: if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
590: stepval = (n-1)*4;
591: for (k=0; k<m; k++) { /* loop over added rows */
592: row = im[k];
593: rp = aj + ai[row];
594: ap = aa + 16*ai[row];
595: nrow = ailen[row];
596: low = 0;
597: high = nrow;
598: for (l=0; l<n; l++) { /* loop over added columns */
599: col = in[l];
600: if (col <= lastcol) low = 0; else high = nrow;
601: lastcol = col;
602: value = v + k*(stepval+4 + l)*4;
603: while (high-low > 7) {
604: t = (low+high)/2;
605: if (rp[t] > col) high = t;
606: else low = t;
607: }
608: for (i=low; i<high; i++) {
609: if (rp[i] > col) break;
610: if (rp[i] == col) {
611: bap = ap + 16*i;
612: for (ii=0; ii<4; ii++,value+=stepval) {
613: for (jj=ii; jj<16; jj+=4) {
614: bap[jj] += *value++;
615: }
616: }
617: goto noinsert2;
618: }
619: }
620: N = nrow++ - 1;
621: high++; /* added new column index thus must search to one higher than before */
622: /* shift up all the later entries in this row */
623: for (ii=N; ii>=i; ii--) {
624: rp[ii+1] = rp[ii];
625: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
626: }
627: if (N >= i) {
628: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
629: }
630: rp[i] = col;
631: bap = ap + 16*i;
632: for (ii=0; ii<4; ii++,value+=stepval) {
633: for (jj=ii; jj<16; jj+=4) {
634: bap[jj] = *value++;
635: }
636: }
637: noinsert2:;
638: low = i;
639: }
640: ailen[row] = nrow;
641: }
642: PetscFunctionReturnVoid();
643: }
646: #if defined(PETSC_HAVE_FORTRAN_CAPS)
647: #define matsetvalues4_ MATSETVALUES4
648: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
649: #define matsetvalues4_ matsetvalues4
650: #endif
655: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
656: {
657: Mat A = *AA;
658: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
659: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
660: PetscInt *ai=a->i,*ailen=a->ilen;
661: PetscInt *aj=a->j,brow,bcol;
662: PetscInt ridx,cidx,lastcol = -1;
663: MatScalar *ap,value,*aa=a->a,*bap;
664:
666: for (k=0; k<m; k++) { /* loop over added rows */
667: row = im[k]; brow = row/4;
668: rp = aj + ai[brow];
669: ap = aa + 16*ai[brow];
670: nrow = ailen[brow];
671: low = 0;
672: high = nrow;
673: for (l=0; l<n; l++) { /* loop over added columns */
674: col = in[l]; bcol = col/4;
675: ridx = row % 4; cidx = col % 4;
676: value = v[l + k*n];
677: if (col <= lastcol) low = 0; else high = nrow;
678: lastcol = col;
679: while (high-low > 7) {
680: t = (low+high)/2;
681: if (rp[t] > bcol) high = t;
682: else low = t;
683: }
684: for (i=low; i<high; i++) {
685: if (rp[i] > bcol) break;
686: if (rp[i] == bcol) {
687: bap = ap + 16*i + 4*cidx + ridx;
688: *bap += value;
689: goto noinsert1;
690: }
691: }
692: N = nrow++ - 1;
693: high++; /* added new column thus must search to one higher than before */
694: /* shift up all the later entries in this row */
695: for (ii=N; ii>=i; ii--) {
696: rp[ii+1] = rp[ii];
697: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
698: }
699: if (N>=i) {
700: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
701: }
702: rp[i] = bcol;
703: ap[16*i + 4*cidx + ridx] = value;
704: noinsert1:;
705: low = i;
706: }
707: ailen[brow] = nrow;
708: }
709: PetscFunctionReturnVoid();
710: }
713: /* UGLY, ugly, ugly
714: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
715: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
716: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
717: converts the entries into single precision and then calls ..._MatScalar() to put them
718: into the single precision data structures.
719: */
720: #if defined(PETSC_USE_MAT_SINGLE)
721: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
722: #else
723: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
724: #endif
726: #define CHUNKSIZE 10
728: /*
729: Checks for missing diagonals
730: */
733: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
734: {
735: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
737: PetscInt *diag,*jj = a->j,i;
740: MatMarkDiagonal_SeqBAIJ(A);
741: diag = a->diag;
742: for (i=0; i<a->mbs; i++) {
743: if (jj[diag[i]] != i) {
744: SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
745: }
746: }
747: return(0);
748: }
752: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
753: {
754: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
756: PetscInt i,j,m = a->mbs;
759: if (!a->diag) {
760: PetscMalloc(m*sizeof(PetscInt),&a->diag);
761: }
762: for (i=0; i<m; i++) {
763: a->diag[i] = a->i[i+1];
764: for (j=a->i[i]; j<a->i[i+1]; j++) {
765: if (a->j[j] == i) {
766: a->diag[i] = j;
767: break;
768: }
769: }
770: }
771: return(0);
772: }
775: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
779: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
780: {
781: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
783: PetscInt n = a->mbs,i;
786: *nn = n;
787: if (!ia) return(0);
788: if (symmetric) {
789: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
790: } else if (oshift == 1) {
791: /* temporarily add 1 to i and j indices */
792: PetscInt nz = a->i[n];
793: for (i=0; i<nz; i++) a->j[i]++;
794: for (i=0; i<n+1; i++) a->i[i]++;
795: *ia = a->i; *ja = a->j;
796: } else {
797: *ia = a->i; *ja = a->j;
798: }
800: return(0);
801: }
805: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
806: {
807: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
809: PetscInt i,n = a->mbs;
812: if (!ia) return(0);
813: if (symmetric) {
814: PetscFree(*ia);
815: PetscFree(*ja);
816: } else if (oshift == 1) {
817: PetscInt nz = a->i[n]-1;
818: for (i=0; i<nz; i++) a->j[i]--;
819: for (i=0; i<n+1; i++) a->i[i]--;
820: }
821: return(0);
822: }
826: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
827: {
828: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
832: #if defined(PETSC_USE_LOG)
833: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
834: #endif
835: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
836: if (a->row) {
837: ISDestroy(a->row);
838: }
839: if (a->col) {
840: ISDestroy(a->col);
841: }
842: PetscFree(a->diag);
843: PetscFree(a->idiag);
844: PetscFree2(a->imax,a->ilen);
845: PetscFree(a->solve_work);
846: PetscFree(a->mult_work);
847: if (a->icol) {ISDestroy(a->icol);}
848: PetscFree(a->saved_values);
849: #if defined(PETSC_USE_MAT_SINGLE)
850: PetscFree(a->setvaluescopy);
851: #endif
852: PetscFree(a->xtoy);
853: if (a->compressedrow.use){PetscFree(a->compressedrow.i);}
855: PetscFree(a);
857: PetscObjectChangeTypeName((PetscObject)A,0);
858: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
859: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
860: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
861: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
862: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
863: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
864: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
865: return(0);
866: }
870: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
871: {
872: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
876: switch (op) {
877: case MAT_ROW_ORIENTED:
878: a->roworiented = PETSC_TRUE;
879: break;
880: case MAT_COLUMN_ORIENTED:
881: a->roworiented = PETSC_FALSE;
882: break;
883: case MAT_COLUMNS_SORTED:
884: a->sorted = PETSC_TRUE;
885: break;
886: case MAT_COLUMNS_UNSORTED:
887: a->sorted = PETSC_FALSE;
888: break;
889: case MAT_KEEP_ZEROED_ROWS:
890: a->keepzeroedrows = PETSC_TRUE;
891: break;
892: case MAT_NO_NEW_NONZERO_LOCATIONS:
893: a->nonew = 1;
894: break;
895: case MAT_NEW_NONZERO_LOCATION_ERR:
896: a->nonew = -1;
897: break;
898: case MAT_NEW_NONZERO_ALLOCATION_ERR:
899: a->nonew = -2;
900: break;
901: case MAT_YES_NEW_NONZERO_LOCATIONS:
902: a->nonew = 0;
903: break;
904: case MAT_ROWS_SORTED:
905: case MAT_ROWS_UNSORTED:
906: case MAT_YES_NEW_DIAGONALS:
907: case MAT_IGNORE_OFF_PROC_ENTRIES:
908: case MAT_USE_HASH_TABLE:
909: PetscInfo1(A,"Option %d ignored\n",op);
910: break;
911: case MAT_NO_NEW_DIAGONALS:
912: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
913: case MAT_SYMMETRIC:
914: case MAT_STRUCTURALLY_SYMMETRIC:
915: case MAT_NOT_SYMMETRIC:
916: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
917: case MAT_HERMITIAN:
918: case MAT_NOT_HERMITIAN:
919: case MAT_SYMMETRY_ETERNAL:
920: case MAT_NOT_SYMMETRY_ETERNAL:
921: break;
922: default:
923: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
924: }
925: return(0);
926: }
930: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
931: {
932: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
934: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
935: MatScalar *aa,*aa_i;
936: PetscScalar *v_i;
939: bs = A->rmap.bs;
940: ai = a->i;
941: aj = a->j;
942: aa = a->a;
943: bs2 = a->bs2;
944:
945: if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
946:
947: bn = row/bs; /* Block number */
948: bp = row % bs; /* Block Position */
949: M = ai[bn+1] - ai[bn];
950: *nz = bs*M;
951:
952: if (v) {
953: *v = 0;
954: if (*nz) {
955: PetscMalloc((*nz)*sizeof(PetscScalar),v);
956: for (i=0; i<M; i++) { /* for each block in the block row */
957: v_i = *v + i*bs;
958: aa_i = aa + bs2*(ai[bn] + i);
959: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
960: }
961: }
962: }
964: if (idx) {
965: *idx = 0;
966: if (*nz) {
967: PetscMalloc((*nz)*sizeof(PetscInt),idx);
968: for (i=0; i<M; i++) { /* for each block in the block row */
969: idx_i = *idx + i*bs;
970: itmp = bs*aj[ai[bn] + i];
971: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
972: }
973: }
974: }
975: return(0);
976: }
980: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
981: {
985: if (idx) {PetscFree(*idx);}
986: if (v) {PetscFree(*v);}
987: return(0);
988: }
992: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
993: {
994: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
995: Mat C;
997: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
998: PetscInt *rows,*cols,bs2=a->bs2;
999: PetscScalar *array;
1002: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1003: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1004: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1006: #if defined(PETSC_USE_MAT_SINGLE)
1007: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
1008: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1009: #else
1010: array = a->a;
1011: #endif
1013: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1014: MatCreate(A->comm,&C);
1015: MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);
1016: MatSetType(C,A->type_name);
1017: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1018: PetscFree(col);
1019: PetscMalloc(2*bs*sizeof(PetscInt),&rows);
1020: cols = rows + bs;
1021: for (i=0; i<mbs; i++) {
1022: cols[0] = i*bs;
1023: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1024: len = ai[i+1] - ai[i];
1025: for (j=0; j<len; j++) {
1026: rows[0] = (*aj++)*bs;
1027: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1028: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
1029: array += bs2;
1030: }
1031: }
1032: PetscFree(rows);
1033: #if defined(PETSC_USE_MAT_SINGLE)
1034: PetscFree(array);
1035: #endif
1036:
1037: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1038: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1039:
1040: if (B) {
1041: *B = C;
1042: } else {
1043: MatHeaderCopy(A,C);
1044: }
1045: return(0);
1046: }
1050: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1051: {
1052: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1054: PetscInt i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1055: int fd;
1056: PetscScalar *aa;
1057: FILE *file;
1060: PetscViewerBinaryGetDescriptor(viewer,&fd);
1061: PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);
1062: col_lens[0] = MAT_FILE_COOKIE;
1064: col_lens[1] = A->rmap.N;
1065: col_lens[2] = A->cmap.n;
1066: col_lens[3] = a->nz*bs2;
1068: /* store lengths of each row and write (including header) to file */
1069: count = 0;
1070: for (i=0; i<a->mbs; i++) {
1071: for (j=0; j<bs; j++) {
1072: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1073: }
1074: }
1075: PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);
1076: PetscFree(col_lens);
1078: /* store column indices (zero start index) */
1079: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1080: count = 0;
1081: for (i=0; i<a->mbs; i++) {
1082: for (j=0; j<bs; j++) {
1083: for (k=a->i[i]; k<a->i[i+1]; k++) {
1084: for (l=0; l<bs; l++) {
1085: jj[count++] = bs*a->j[k] + l;
1086: }
1087: }
1088: }
1089: }
1090: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1091: PetscFree(jj);
1093: /* store nonzero values */
1094: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1095: count = 0;
1096: for (i=0; i<a->mbs; i++) {
1097: for (j=0; j<bs; j++) {
1098: for (k=a->i[i]; k<a->i[i+1]; k++) {
1099: for (l=0; l<bs; l++) {
1100: aa[count++] = a->a[bs2*k + l*bs + j];
1101: }
1102: }
1103: }
1104: }
1105: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1106: PetscFree(aa);
1108: PetscViewerBinaryGetInfoPointer(viewer,&file);
1109: if (file) {
1110: fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1111: }
1112: return(0);
1113: }
1117: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1118: {
1119: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1120: PetscErrorCode ierr;
1121: PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1122: PetscViewerFormat format;
1125: PetscViewerGetFormat(viewer,&format);
1126: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1127: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1128: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1129: Mat aij;
1130: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1131: MatView(aij,viewer);
1132: MatDestroy(aij);
1133: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1134: return(0);
1135: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1136: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1137: for (i=0; i<a->mbs; i++) {
1138: for (j=0; j<bs; j++) {
1139: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1140: for (k=a->i[i]; k<a->i[i+1]; k++) {
1141: for (l=0; l<bs; l++) {
1142: #if defined(PETSC_USE_COMPLEX)
1143: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1145: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1146: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1147: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1148: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1149: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1150: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1151: }
1152: #else
1153: if (a->a[bs2*k + l*bs + j] != 0.0) {
1154: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1155: }
1156: #endif
1157: }
1158: }
1159: PetscViewerASCIIPrintf(viewer,"\n");
1160: }
1161: }
1162: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1163: } else {
1164: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1165: for (i=0; i<a->mbs; i++) {
1166: for (j=0; j<bs; j++) {
1167: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1168: for (k=a->i[i]; k<a->i[i+1]; k++) {
1169: for (l=0; l<bs; l++) {
1170: #if defined(PETSC_USE_COMPLEX)
1171: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1172: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1173: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1174: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1175: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1176: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1177: } else {
1178: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1179: }
1180: #else
1181: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1182: #endif
1183: }
1184: }
1185: PetscViewerASCIIPrintf(viewer,"\n");
1186: }
1187: }
1188: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1189: }
1190: PetscViewerFlush(viewer);
1191: return(0);
1192: }
1196: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1197: {
1198: Mat A = (Mat) Aa;
1199: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1201: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1202: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1203: MatScalar *aa;
1204: PetscViewer viewer;
1208: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1209: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1211: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1213: /* loop over matrix elements drawing boxes */
1214: color = PETSC_DRAW_BLUE;
1215: for (i=0,row=0; i<mbs; i++,row+=bs) {
1216: for (j=a->i[i]; j<a->i[i+1]; j++) {
1217: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1218: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1219: aa = a->a + j*bs2;
1220: for (k=0; k<bs; k++) {
1221: for (l=0; l<bs; l++) {
1222: if (PetscRealPart(*aa++) >= 0.) continue;
1223: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1224: }
1225: }
1226: }
1227: }
1228: color = PETSC_DRAW_CYAN;
1229: for (i=0,row=0; i<mbs; i++,row+=bs) {
1230: for (j=a->i[i]; j<a->i[i+1]; j++) {
1231: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1232: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1233: aa = a->a + j*bs2;
1234: for (k=0; k<bs; k++) {
1235: for (l=0; l<bs; l++) {
1236: if (PetscRealPart(*aa++) != 0.) continue;
1237: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1238: }
1239: }
1240: }
1241: }
1243: color = PETSC_DRAW_RED;
1244: for (i=0,row=0; i<mbs; i++,row+=bs) {
1245: for (j=a->i[i]; j<a->i[i+1]; j++) {
1246: y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1247: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1248: aa = a->a + j*bs2;
1249: for (k=0; k<bs; k++) {
1250: for (l=0; l<bs; l++) {
1251: if (PetscRealPart(*aa++) <= 0.) continue;
1252: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1253: }
1254: }
1255: }
1256: }
1257: return(0);
1258: }
1262: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1263: {
1265: PetscReal xl,yl,xr,yr,w,h;
1266: PetscDraw draw;
1267: PetscTruth isnull;
1271: PetscViewerDrawGetDraw(viewer,0,&draw);
1272: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1274: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1275: xr = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1276: xr += w; yr += h; xl = -w; yl = -h;
1277: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1278: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1279: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1280: return(0);
1281: }
1285: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1286: {
1288: PetscTruth iascii,isbinary,isdraw;
1291: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1292: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1293: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1294: if (iascii){
1295: MatView_SeqBAIJ_ASCII(A,viewer);
1296: } else if (isbinary) {
1297: MatView_SeqBAIJ_Binary(A,viewer);
1298: } else if (isdraw) {
1299: MatView_SeqBAIJ_Draw(A,viewer);
1300: } else {
1301: Mat B;
1302: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1303: MatView(B,viewer);
1304: MatDestroy(B);
1305: }
1306: return(0);
1307: }
1312: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1313: {
1314: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1315: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1316: PetscInt *ai = a->i,*ailen = a->ilen;
1317: PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1318: MatScalar *ap,*aa = a->a,zero = 0.0;
1321: for (k=0; k<m; k++) { /* loop over rows */
1322: row = im[k]; brow = row/bs;
1323: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1324: if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1325: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1326: nrow = ailen[brow];
1327: for (l=0; l<n; l++) { /* loop over columns */
1328: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1329: if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1330: col = in[l] ;
1331: bcol = col/bs;
1332: cidx = col%bs;
1333: ridx = row%bs;
1334: high = nrow;
1335: low = 0; /* assume unsorted */
1336: while (high-low > 5) {
1337: t = (low+high)/2;
1338: if (rp[t] > bcol) high = t;
1339: else low = t;
1340: }
1341: for (i=low; i<high; i++) {
1342: if (rp[i] > bcol) break;
1343: if (rp[i] == bcol) {
1344: *v++ = ap[bs2*i+bs*cidx+ridx];
1345: goto finished;
1346: }
1347: }
1348: *v++ = zero;
1349: finished:;
1350: }
1351: }
1352: return(0);
1353: }
1355: #if defined(PETSC_USE_MAT_SINGLE)
1358: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1359: {
1360: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1362: PetscInt i,N = m*n*b->bs2;
1363: MatScalar *vsingle;
1366: if (N > b->setvalueslen) {
1367: PetscFree(b->setvaluescopy);
1368: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
1369: b->setvalueslen = N;
1370: }
1371: vsingle = b->setvaluescopy;
1372: for (i=0; i<N; i++) {
1373: vsingle[i] = v[i];
1374: }
1375: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
1376: return(0);
1377: }
1378: #endif
1383: PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1384: {
1385: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1386: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1387: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1388: PetscErrorCode ierr;
1389: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1390: PetscTruth roworiented=a->roworiented;
1391: const MatScalar *value = v;
1392: MatScalar *ap,*aa = a->a,*bap;
1395: if (roworiented) {
1396: stepval = (n-1)*bs;
1397: } else {
1398: stepval = (m-1)*bs;
1399: }
1400: for (k=0; k<m; k++) { /* loop over added rows */
1401: row = im[k];
1402: if (row < 0) continue;
1403: #if defined(PETSC_USE_DEBUG)
1404: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1405: #endif
1406: rp = aj + ai[row];
1407: ap = aa + bs2*ai[row];
1408: rmax = imax[row];
1409: nrow = ailen[row];
1410: low = 0;
1411: high = nrow;
1412: for (l=0; l<n; l++) { /* loop over added columns */
1413: if (in[l] < 0) continue;
1414: #if defined(PETSC_USE_DEBUG)
1415: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1416: #endif
1417: col = in[l];
1418: if (roworiented) {
1419: value = v + k*(stepval+bs)*bs + l*bs;
1420: } else {
1421: value = v + l*(stepval+bs)*bs + k*bs;
1422: }
1423: if (col <= lastcol) low = 0; else high = nrow;
1424: lastcol = col;
1425: while (high-low > 7) {
1426: t = (low+high)/2;
1427: if (rp[t] > col) high = t;
1428: else low = t;
1429: }
1430: for (i=low; i<high; i++) {
1431: if (rp[i] > col) break;
1432: if (rp[i] == col) {
1433: bap = ap + bs2*i;
1434: if (roworiented) {
1435: if (is == ADD_VALUES) {
1436: for (ii=0; ii<bs; ii++,value+=stepval) {
1437: for (jj=ii; jj<bs2; jj+=bs) {
1438: bap[jj] += *value++;
1439: }
1440: }
1441: } else {
1442: for (ii=0; ii<bs; ii++,value+=stepval) {
1443: for (jj=ii; jj<bs2; jj+=bs) {
1444: bap[jj] = *value++;
1445: }
1446: }
1447: }
1448: } else {
1449: if (is == ADD_VALUES) {
1450: for (ii=0; ii<bs; ii++,value+=stepval) {
1451: for (jj=0; jj<bs; jj++) {
1452: *bap++ += *value++;
1453: }
1454: }
1455: } else {
1456: for (ii=0; ii<bs; ii++,value+=stepval) {
1457: for (jj=0; jj<bs; jj++) {
1458: *bap++ = *value++;
1459: }
1460: }
1461: }
1462: }
1463: goto noinsert2;
1464: }
1465: }
1466: if (nonew == 1) goto noinsert2;
1467: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1468: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew);
1469: N = nrow++ - 1; high++;
1470: /* shift up all the later entries in this row */
1471: for (ii=N; ii>=i; ii--) {
1472: rp[ii+1] = rp[ii];
1473: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1474: }
1475: if (N >= i) {
1476: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1477: }
1478: rp[i] = col;
1479: bap = ap + bs2*i;
1480: if (roworiented) {
1481: for (ii=0; ii<bs; ii++,value+=stepval) {
1482: for (jj=ii; jj<bs2; jj+=bs) {
1483: bap[jj] = *value++;
1484: }
1485: }
1486: } else {
1487: for (ii=0; ii<bs; ii++,value+=stepval) {
1488: for (jj=0; jj<bs; jj++) {
1489: *bap++ = *value++;
1490: }
1491: }
1492: }
1493: noinsert2:;
1494: low = i;
1495: }
1496: ailen[row] = nrow;
1497: }
1498: return(0);
1499: }
1503: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1504: {
1505: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1506: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1507: PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen;
1509: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1510: MatScalar *aa = a->a,*ap;
1511: PetscReal ratio=0.6;
1514: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1516: if (m) rmax = ailen[0];
1517: for (i=1; i<mbs; i++) {
1518: /* move each row back by the amount of empty slots (fshift) before it*/
1519: fshift += imax[i-1] - ailen[i-1];
1520: rmax = PetscMax(rmax,ailen[i]);
1521: if (fshift) {
1522: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1523: N = ailen[i];
1524: for (j=0; j<N; j++) {
1525: ip[j-fshift] = ip[j];
1526: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1527: }
1528: }
1529: ai[i] = ai[i-1] + ailen[i-1];
1530: }
1531: if (mbs) {
1532: fshift += imax[mbs-1] - ailen[mbs-1];
1533: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1534: }
1535: /* reset ilen and imax for each row */
1536: for (i=0; i<mbs; i++) {
1537: ailen[i] = imax[i] = ai[i+1] - ai[i];
1538: }
1539: a->nz = ai[mbs];
1541: /* diagonals may have moved, so kill the diagonal pointers */
1542: a->idiagvalid = PETSC_FALSE;
1543: if (fshift && a->diag) {
1544: PetscFree(a->diag);
1545: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1546: a->diag = 0;
1547: }
1548: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);
1549: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1550: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1551: a->reallocs = 0;
1552: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1554: /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1555: if (a->compressedrow.use){
1556: Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1557: }
1559: A->same_nonzero = PETSC_TRUE;
1560: return(0);
1561: }
1563: /*
1564: This function returns an array of flags which indicate the locations of contiguous
1565: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1566: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1567: Assume: sizes should be long enough to hold all the values.
1568: */
1571: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1572: {
1573: PetscInt i,j,k,row;
1574: PetscTruth flg;
1577: for (i=0,j=0; i<n; j++) {
1578: row = idx[i];
1579: if (row%bs!=0) { /* Not the begining of a block */
1580: sizes[j] = 1;
1581: i++;
1582: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1583: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1584: i++;
1585: } else { /* Begining of the block, so check if the complete block exists */
1586: flg = PETSC_TRUE;
1587: for (k=1; k<bs; k++) {
1588: if (row+k != idx[i+k]) { /* break in the block */
1589: flg = PETSC_FALSE;
1590: break;
1591: }
1592: }
1593: if (flg) { /* No break in the bs */
1594: sizes[j] = bs;
1595: i+= bs;
1596: } else {
1597: sizes[j] = 1;
1598: i++;
1599: }
1600: }
1601: }
1602: *bs_max = j;
1603: return(0);
1604: }
1605:
1608: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1609: {
1610: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1612: PetscInt i,j,k,count,*rows;
1613: PetscInt bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1614: PetscScalar zero = 0.0;
1615: MatScalar *aa;
1618: /* Make a copy of the IS and sort it */
1619: /* allocate memory for rows,sizes */
1620: PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);
1621: sizes = rows + is_n;
1623: /* copy IS values to rows, and sort them */
1624: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1625: PetscSortInt(is_n,rows);
1626: if (baij->keepzeroedrows) {
1627: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1628: bs_max = is_n;
1629: A->same_nonzero = PETSC_TRUE;
1630: } else {
1631: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1632: A->same_nonzero = PETSC_FALSE;
1633: }
1635: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1636: row = rows[j];
1637: if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1638: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1639: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1640: if (sizes[i] == bs && !baij->keepzeroedrows) {
1641: if (diag != 0.0) {
1642: if (baij->ilen[row/bs] > 0) {
1643: baij->ilen[row/bs] = 1;
1644: baij->j[baij->i[row/bs]] = row/bs;
1645: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1646: }
1647: /* Now insert all the diagonal values for this bs */
1648: for (k=0; k<bs; k++) {
1649: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
1650: }
1651: } else { /* (diag == 0.0) */
1652: baij->ilen[row/bs] = 0;
1653: } /* end (diag == 0.0) */
1654: } else { /* (sizes[i] != bs) */
1655: #if defined (PETSC_USE_DEBUG)
1656: if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1657: #endif
1658: for (k=0; k<count; k++) {
1659: aa[0] = zero;
1660: aa += bs;
1661: }
1662: if (diag != 0.0) {
1663: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
1664: }
1665: }
1666: }
1668: PetscFree(rows);
1669: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1670: return(0);
1671: }
1675: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1676: {
1677: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1678: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1679: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1680: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1682: PetscInt ridx,cidx,bs2=a->bs2;
1683: PetscTruth roworiented=a->roworiented;
1684: MatScalar *ap,value,*aa=a->a,*bap;
1687: for (k=0; k<m; k++) { /* loop over added rows */
1688: row = im[k];
1689: brow = row/bs;
1690: if (row < 0) continue;
1691: #if defined(PETSC_USE_DEBUG)
1692: if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1693: #endif
1694: rp = aj + ai[brow];
1695: ap = aa + bs2*ai[brow];
1696: rmax = imax[brow];
1697: nrow = ailen[brow];
1698: low = 0;
1699: high = nrow;
1700: for (l=0; l<n; l++) { /* loop over added columns */
1701: if (in[l] < 0) continue;
1702: #if defined(PETSC_USE_DEBUG)
1703: if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1704: #endif
1705: col = in[l]; bcol = col/bs;
1706: ridx = row % bs; cidx = col % bs;
1707: if (roworiented) {
1708: value = v[l + k*n];
1709: } else {
1710: value = v[k + l*m];
1711: }
1712: if (col <= lastcol) low = 0; else high = nrow;
1713: lastcol = col;
1714: while (high-low > 7) {
1715: t = (low+high)/2;
1716: if (rp[t] > bcol) high = t;
1717: else low = t;
1718: }
1719: for (i=low; i<high; i++) {
1720: if (rp[i] > bcol) break;
1721: if (rp[i] == bcol) {
1722: bap = ap + bs2*i + bs*cidx + ridx;
1723: if (is == ADD_VALUES) *bap += value;
1724: else *bap = value;
1725: goto noinsert1;
1726: }
1727: }
1728: if (nonew == 1) goto noinsert1;
1729: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1730: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew);
1731: N = nrow++ - 1; high++;
1732: /* shift up all the later entries in this row */
1733: for (ii=N; ii>=i; ii--) {
1734: rp[ii+1] = rp[ii];
1735: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1736: }
1737: if (N>=i) {
1738: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1739: }
1740: rp[i] = bcol;
1741: ap[bs2*i + bs*cidx + ridx] = value;
1742: a->nz++;
1743: noinsert1:;
1744: low = i;
1745: }
1746: ailen[brow] = nrow;
1747: }
1748: A->same_nonzero = PETSC_FALSE;
1749: return(0);
1750: }
1755: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1756: {
1757: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1758: Mat outA;
1760: PetscTruth row_identity,col_identity;
1763: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1764: ISIdentity(row,&row_identity);
1765: ISIdentity(col,&col_identity);
1766: if (!row_identity || !col_identity) {
1767: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1768: }
1770: outA = inA;
1771: inA->factor = FACTOR_LU;
1773: MatMarkDiagonal_SeqBAIJ(inA);
1775: a->row = row;
1776: a->col = col;
1777: PetscObjectReference((PetscObject)row);
1778: PetscObjectReference((PetscObject)col);
1779:
1780: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1781: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1782: PetscLogObjectParent(inA,a->icol);
1783:
1784: /*
1785: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1786: for ILU(0) factorization with natural ordering
1787: */
1788: if (inA->rmap.bs < 8) {
1789: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1790: } else {
1791: if (!a->solve_work) {
1792: PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);
1793: PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));
1794: }
1795: }
1797: MatLUFactorNumeric(inA,info,&outA);
1799: return(0);
1800: }
1805: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1806: {
1807: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1808: PetscInt i,nz,nbs;
1811: nz = baij->maxnz/baij->bs2;
1812: nbs = baij->nbs;
1813: for (i=0; i<nz; i++) {
1814: baij->j[i] = indices[i];
1815: }
1816: baij->nz = nz;
1817: for (i=0; i<nbs; i++) {
1818: baij->ilen[i] = baij->imax[i];
1819: }
1821: return(0);
1822: }
1827: /*@
1828: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1829: in the matrix.
1831: Input Parameters:
1832: + mat - the SeqBAIJ matrix
1833: - indices - the column indices
1835: Level: advanced
1837: Notes:
1838: This can be called if you have precomputed the nonzero structure of the
1839: matrix and want to provide it to the matrix object to improve the performance
1840: of the MatSetValues() operation.
1842: You MUST have set the correct numbers of nonzeros per row in the call to
1843: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1845: MUST be called before any calls to MatSetValues();
1847: @*/
1848: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1849: {
1850: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1855: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1856: if (f) {
1857: (*f)(mat,indices);
1858: } else {
1859: SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1860: }
1861: return(0);
1862: }
1866: PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1867: {
1868: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1870: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
1871: PetscReal atmp;
1872: PetscScalar *x,zero = 0.0;
1873: MatScalar *aa;
1874: PetscInt ncols,brow,krow,kcol;
1877: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1878: bs = A->rmap.bs;
1879: aa = a->a;
1880: ai = a->i;
1881: aj = a->j;
1882: mbs = a->mbs;
1884: VecSet(v,zero);
1885: VecGetArray(v,&x);
1886: VecGetLocalSize(v,&n);
1887: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1888: for (i=0; i<mbs; i++) {
1889: ncols = ai[1] - ai[0]; ai++;
1890: brow = bs*i;
1891: for (j=0; j<ncols; j++){
1892: /* bcol = bs*(*aj); */
1893: for (kcol=0; kcol<bs; kcol++){
1894: for (krow=0; krow<bs; krow++){
1895: atmp = PetscAbsScalar(*aa); aa++;
1896: row = brow + krow; /* row index */
1897: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1898: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1899: }
1900: }
1901: aj++;
1902: }
1903: }
1904: VecRestoreArray(v,&x);
1905: return(0);
1906: }
1910: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1911: {
1915: /* If the two matrices have the same copy implementation, use fast copy. */
1916: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1917: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1918: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1920: if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1921: SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1922: }
1923: PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));
1924: } else {
1925: MatCopy_Basic(A,B,str);
1926: }
1927: return(0);
1928: }
1932: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1933: {
1937: MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);
1938: return(0);
1939: }
1943: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1944: {
1945: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1947: *array = a->a;
1948: return(0);
1949: }
1953: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1954: {
1956: return(0);
1957: }
1959: #include petscblaslapack.h
1962: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1963: {
1964: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1966: PetscInt i,bs=Y->rmap.bs,j,bs2;
1967: PetscBLASInt one=1,bnz = (PetscBLASInt)x->nz;
1970: if (str == SAME_NONZERO_PATTERN) {
1971: PetscScalar alpha = a;
1972: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1973: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1974: if (y->xtoy && y->XtoY != X) {
1975: PetscFree(y->xtoy);
1976: MatDestroy(y->XtoY);
1977: }
1978: if (!y->xtoy) { /* get xtoy */
1979: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
1980: y->XtoY = X;
1981: }
1982: bs2 = bs*bs;
1983: for (i=0; i<x->nz; i++) {
1984: j = 0;
1985: while (j < bs2){
1986: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1987: j++;
1988: }
1989: }
1990: PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1991: } else {
1992: MatAXPY_Basic(Y,a,X,str);
1993: }
1994: return(0);
1995: }
1999: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2000: {
2001: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2002: PetscInt i,nz = a->bs2*a->i[a->mbs];
2003: PetscScalar *aa = a->a;
2006: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2007: return(0);
2008: }
2012: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2013: {
2014: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2015: PetscInt i,nz = a->bs2*a->i[a->mbs];
2016: PetscScalar *aa = a->a;
2019: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2020: return(0);
2021: }
2024: /* -------------------------------------------------------------------*/
2025: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2026: MatGetRow_SeqBAIJ,
2027: MatRestoreRow_SeqBAIJ,
2028: MatMult_SeqBAIJ_N,
2029: /* 4*/ MatMultAdd_SeqBAIJ_N,
2030: MatMultTranspose_SeqBAIJ,
2031: MatMultTransposeAdd_SeqBAIJ,
2032: MatSolve_SeqBAIJ_N,
2033: 0,
2034: 0,
2035: /*10*/ 0,
2036: MatLUFactor_SeqBAIJ,
2037: 0,
2038: 0,
2039: MatTranspose_SeqBAIJ,
2040: /*15*/ MatGetInfo_SeqBAIJ,
2041: MatEqual_SeqBAIJ,
2042: MatGetDiagonal_SeqBAIJ,
2043: MatDiagonalScale_SeqBAIJ,
2044: MatNorm_SeqBAIJ,
2045: /*20*/ 0,
2046: MatAssemblyEnd_SeqBAIJ,
2047: 0,
2048: MatSetOption_SeqBAIJ,
2049: MatZeroEntries_SeqBAIJ,
2050: /*25*/ MatZeroRows_SeqBAIJ,
2051: MatLUFactorSymbolic_SeqBAIJ,
2052: MatLUFactorNumeric_SeqBAIJ_N,
2053: MatCholeskyFactorSymbolic_SeqBAIJ,
2054: MatCholeskyFactorNumeric_SeqBAIJ_N,
2055: /*30*/ MatSetUpPreallocation_SeqBAIJ,
2056: MatILUFactorSymbolic_SeqBAIJ,
2057: MatICCFactorSymbolic_SeqBAIJ,
2058: MatGetArray_SeqBAIJ,
2059: MatRestoreArray_SeqBAIJ,
2060: /*35*/ MatDuplicate_SeqBAIJ,
2061: 0,
2062: 0,
2063: MatILUFactor_SeqBAIJ,
2064: 0,
2065: /*40*/ MatAXPY_SeqBAIJ,
2066: MatGetSubMatrices_SeqBAIJ,
2067: MatIncreaseOverlap_SeqBAIJ,
2068: MatGetValues_SeqBAIJ,
2069: MatCopy_SeqBAIJ,
2070: /*45*/ 0,
2071: MatScale_SeqBAIJ,
2072: 0,
2073: 0,
2074: 0,
2075: /*50*/ 0,
2076: MatGetRowIJ_SeqBAIJ,
2077: MatRestoreRowIJ_SeqBAIJ,
2078: 0,
2079: 0,
2080: /*55*/ 0,
2081: 0,
2082: 0,
2083: 0,
2084: MatSetValuesBlocked_SeqBAIJ,
2085: /*60*/ MatGetSubMatrix_SeqBAIJ,
2086: MatDestroy_SeqBAIJ,
2087: MatView_SeqBAIJ,
2088: 0,
2089: 0,
2090: /*65*/ 0,
2091: 0,
2092: 0,
2093: 0,
2094: 0,
2095: /*70*/ MatGetRowMax_SeqBAIJ,
2096: MatConvert_Basic,
2097: 0,
2098: 0,
2099: 0,
2100: /*75*/ 0,
2101: 0,
2102: 0,
2103: 0,
2104: 0,
2105: /*80*/ 0,
2106: 0,
2107: 0,
2108: 0,
2109: MatLoad_SeqBAIJ,
2110: /*85*/ 0,
2111: 0,
2112: 0,
2113: 0,
2114: 0,
2115: /*90*/ 0,
2116: 0,
2117: 0,
2118: 0,
2119: 0,
2120: /*95*/ 0,
2121: 0,
2122: 0,
2123: 0,
2124: 0,
2125: /*100*/0,
2126: 0,
2127: 0,
2128: 0,
2129: 0,
2130: /*105*/0,
2131: MatRealPart_SeqBAIJ,
2132: MatImaginaryPart_SeqBAIJ
2133: };
2138: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2139: {
2140: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2141: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2145: if (aij->nonew != 1) {
2146: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2147: }
2149: /* allocate space for values if not already there */
2150: if (!aij->saved_values) {
2151: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2152: }
2154: /* copy values over */
2155: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2156: return(0);
2157: }
2163: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2164: {
2165: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2167: PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2170: if (aij->nonew != 1) {
2171: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2172: }
2173: if (!aij->saved_values) {
2174: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2175: }
2177: /* copy values over */
2178: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2179: return(0);
2180: }
2191: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2192: {
2193: Mat_SeqBAIJ *b;
2195: PetscInt i,mbs,nbs,bs2,newbs = bs;
2196: PetscTruth flg,skipallocation = PETSC_FALSE;
2200: if (nz == MAT_SKIP_ALLOCATION) {
2201: skipallocation = PETSC_TRUE;
2202: nz = 0;
2203: }
2205: PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");
2206: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);
2207: PetscOptionsEnd();
2209: if (nnz && newbs != bs) {
2210: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2211: }
2212: bs = newbs;
2214: B->rmap.bs = B->cmap.bs = bs;
2215: PetscMapInitialize(B->comm,&B->rmap);
2216: PetscMapInitialize(B->comm,&B->cmap);
2218: B->preallocated = PETSC_TRUE;
2220: mbs = B->rmap.n/bs;
2221: nbs = B->cmap.n/bs;
2222: bs2 = bs*bs;
2224: if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2225: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2226: }
2228: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2229: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2230: if (nnz) {
2231: for (i=0; i<mbs; i++) {
2232: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2233: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2234: }
2235: }
2237: b = (Mat_SeqBAIJ*)B->data;
2238: PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");
2239: PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2240: PetscOptionsEnd();
2242: B->ops->solve = MatSolve_SeqBAIJ_Update;
2243: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
2244: if (!flg) {
2245: switch (bs) {
2246: case 1:
2247: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2248: B->ops->mult = MatMult_SeqBAIJ_1;
2249: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2250: break;
2251: case 2:
2252: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2253: B->ops->mult = MatMult_SeqBAIJ_2;
2254: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2255: B->ops->pbrelax = MatPBRelax_SeqBAIJ_2;
2256: break;
2257: case 3:
2258: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2259: B->ops->mult = MatMult_SeqBAIJ_3;
2260: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2261: B->ops->pbrelax = MatPBRelax_SeqBAIJ_3;
2262: break;
2263: case 4:
2264: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2265: B->ops->mult = MatMult_SeqBAIJ_4;
2266: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2267: B->ops->pbrelax = MatPBRelax_SeqBAIJ_4;
2268: break;
2269: case 5:
2270: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2271: B->ops->mult = MatMult_SeqBAIJ_5;
2272: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2273: B->ops->pbrelax = MatPBRelax_SeqBAIJ_5;
2274: break;
2275: case 6:
2276: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2277: B->ops->mult = MatMult_SeqBAIJ_6;
2278: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2279: break;
2280: case 7:
2281: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2282: B->ops->mult = MatMult_SeqBAIJ_7;
2283: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2284: break;
2285: default:
2286: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2287: B->ops->mult = MatMult_SeqBAIJ_N;
2288: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2289: break;
2290: }
2291: }
2292: B->rmap.bs = bs;
2293: b->mbs = mbs;
2294: b->nbs = nbs;
2295: if (!skipallocation) {
2296: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2297: /* b->ilen will count nonzeros in each block row so far. */
2298: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2299: if (!nnz) {
2300: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2301: else if (nz <= 0) nz = 1;
2302: for (i=0; i<mbs; i++) b->imax[i] = nz;
2303: nz = nz*mbs;
2304: } else {
2305: nz = 0;
2306: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2307: }
2309: /* allocate the matrix space */
2310: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);
2311: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2312: PetscMemzero(b->j,nz*sizeof(PetscInt));
2313: b->singlemalloc = PETSC_TRUE;
2315: b->i[0] = 0;
2316: for (i=1; i<mbs+1; i++) {
2317: b->i[i] = b->i[i-1] + b->imax[i-1];
2318: }
2319: b->free_a = PETSC_TRUE;
2320: b->free_ij = PETSC_TRUE;
2321: } else {
2322: b->free_a = PETSC_FALSE;
2323: b->free_ij = PETSC_FALSE;
2324: }
2326: B->rmap.bs = bs;
2327: b->bs2 = bs2;
2328: b->mbs = mbs;
2329: b->nz = 0;
2330: b->maxnz = nz*bs2;
2331: B->info.nz_unneeded = (PetscReal)b->maxnz;
2332: return(0);
2333: }
2336: /*MC
2337: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2338: block sparse compressed row format.
2340: Options Database Keys:
2341: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2343: Level: beginner
2345: .seealso: MatCreateSeqBAIJ()
2346: M*/
2351: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2352: {
2354: PetscMPIInt size;
2355: Mat_SeqBAIJ *b;
2358: MPI_Comm_size(B->comm,&size);
2359: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2361: PetscNew(Mat_SeqBAIJ,&b);
2362: B->data = (void*)b;
2363: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2364: B->factor = 0;
2365: B->mapping = 0;
2366: b->row = 0;
2367: b->col = 0;
2368: b->icol = 0;
2369: b->reallocs = 0;
2370: b->saved_values = 0;
2371: #if defined(PETSC_USE_MAT_SINGLE)
2372: b->setvalueslen = 0;
2373: b->setvaluescopy = PETSC_NULL;
2374: #endif
2376: b->sorted = PETSC_FALSE;
2377: b->roworiented = PETSC_TRUE;
2378: b->nonew = 0;
2379: b->diag = 0;
2380: b->solve_work = 0;
2381: b->mult_work = 0;
2382: B->spptr = 0;
2383: B->info.nz_unneeded = (PetscReal)b->maxnz;
2384: b->keepzeroedrows = PETSC_FALSE;
2385: b->xtoy = 0;
2386: b->XtoY = 0;
2387: b->compressedrow.use = PETSC_FALSE;
2388: b->compressedrow.nrows = 0;
2389: b->compressedrow.i = PETSC_NULL;
2390: b->compressedrow.rindex = PETSC_NULL;
2391: b->compressedrow.checked = PETSC_FALSE;
2392: B->same_nonzero = PETSC_FALSE;
2394: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2395: "MatInvertBlockDiagonal_SeqBAIJ",
2396: MatInvertBlockDiagonal_SeqBAIJ);
2397: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2398: "MatStoreValues_SeqBAIJ",
2399: MatStoreValues_SeqBAIJ);
2400: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2401: "MatRetrieveValues_SeqBAIJ",
2402: MatRetrieveValues_SeqBAIJ);
2403: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2404: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2405: MatSeqBAIJSetColumnIndices_SeqBAIJ);
2406: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2407: "MatConvert_SeqBAIJ_SeqAIJ",
2408: MatConvert_SeqBAIJ_SeqAIJ);
2409: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2410: "MatConvert_SeqBAIJ_SeqSBAIJ",
2411: MatConvert_SeqBAIJ_SeqSBAIJ);
2412: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2413: "MatSeqBAIJSetPreallocation_SeqBAIJ",
2414: MatSeqBAIJSetPreallocation_SeqBAIJ);
2415: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
2416: return(0);
2417: }
2422: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2423: {
2424: Mat C;
2425: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2427: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2430: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2432: *B = 0;
2433: MatCreate(A->comm,&C);
2434: MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);
2435: MatSetType(C,A->type_name);
2436: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
2437: c = (Mat_SeqBAIJ*)C->data;
2439: C->rmap.N = A->rmap.N;
2440: C->cmap.N = A->cmap.N;
2441: C->rmap.bs = A->rmap.bs;
2442: c->bs2 = a->bs2;
2443: c->mbs = a->mbs;
2444: c->nbs = a->nbs;
2446: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
2447: for (i=0; i<mbs; i++) {
2448: c->imax[i] = a->imax[i];
2449: c->ilen[i] = a->ilen[i];
2450: }
2452: /* allocate the matrix space */
2453: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
2454: c->singlemalloc = PETSC_TRUE;
2455: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
2456: if (mbs > 0) {
2457: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
2458: if (cpvalues == MAT_COPY_VALUES) {
2459: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
2460: } else {
2461: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
2462: }
2463: }
2464: c->sorted = a->sorted;
2465: c->roworiented = a->roworiented;
2466: c->nonew = a->nonew;
2468: if (a->diag) {
2469: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
2470: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
2471: for (i=0; i<mbs; i++) {
2472: c->diag[i] = a->diag[i];
2473: }
2474: } else c->diag = 0;
2475: c->nz = a->nz;
2476: c->maxnz = a->maxnz;
2477: c->solve_work = 0;
2478: c->mult_work = 0;
2479: c->free_a = PETSC_TRUE;
2480: c->free_ij = PETSC_TRUE;
2481: C->preallocated = PETSC_TRUE;
2482: C->assembled = PETSC_TRUE;
2484: c->compressedrow.use = a->compressedrow.use;
2485: c->compressedrow.nrows = a->compressedrow.nrows;
2486: c->compressedrow.checked = a->compressedrow.checked;
2487: if ( a->compressedrow.checked && a->compressedrow.use){
2488: i = a->compressedrow.nrows;
2489: PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);
2490: c->compressedrow.rindex = c->compressedrow.i + i + 1;
2491: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
2492: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
2493: } else {
2494: c->compressedrow.use = PETSC_FALSE;
2495: c->compressedrow.i = PETSC_NULL;
2496: c->compressedrow.rindex = PETSC_NULL;
2497: }
2498: C->same_nonzero = A->same_nonzero;
2499: *B = C;
2500: PetscFListDuplicate(A->qlist,&C->qlist);
2501: return(0);
2502: }
2506: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2507: {
2508: Mat_SeqBAIJ *a;
2509: Mat B;
2511: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
2512: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2513: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
2514: PetscInt *masked,nmask,tmp,bs2,ishift;
2515: PetscMPIInt size;
2516: int fd;
2517: PetscScalar *aa;
2518: MPI_Comm comm = ((PetscObject)viewer)->comm;
2521: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
2522: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
2523: PetscOptionsEnd();
2524: bs2 = bs*bs;
2526: MPI_Comm_size(comm,&size);
2527: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2528: PetscViewerBinaryGetDescriptor(viewer,&fd);
2529: PetscBinaryRead(fd,header,4,PETSC_INT);
2530: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2531: M = header[1]; N = header[2]; nz = header[3];
2533: if (header[3] < 0) {
2534: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2535: }
2537: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2539: /*
2540: This code adds extra rows to make sure the number of rows is
2541: divisible by the blocksize
2542: */
2543: mbs = M/bs;
2544: extra_rows = bs - M + bs*(mbs);
2545: if (extra_rows == bs) extra_rows = 0;
2546: else mbs++;
2547: if (extra_rows) {
2548: PetscInfo(0,"Padding loaded matrix to match blocksize\n");
2549: }
2551: /* read in row lengths */
2552: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
2553: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2554: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2556: /* read in column indices */
2557: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
2558: PetscBinaryRead(fd,jj,nz,PETSC_INT);
2559: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2561: /* loop over row lengths determining block row lengths */
2562: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
2563: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
2564: PetscMalloc(2*mbs*sizeof(PetscInt),&mask);
2565: PetscMemzero(mask,mbs*sizeof(PetscInt));
2566: masked = mask + mbs;
2567: rowcount = 0; nzcount = 0;
2568: for (i=0; i<mbs; i++) {
2569: nmask = 0;
2570: for (j=0; j<bs; j++) {
2571: kmax = rowlengths[rowcount];
2572: for (k=0; k<kmax; k++) {
2573: tmp = jj[nzcount++]/bs;
2574: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2575: }
2576: rowcount++;
2577: }
2578: browlengths[i] += nmask;
2579: /* zero out the mask elements we set */
2580: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2581: }
2583: /* create our matrix */
2584: MatCreate(comm,&B);
2585: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2586: MatSetType(B,type);
2587: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
2588: a = (Mat_SeqBAIJ*)B->data;
2590: /* set matrix "i" values */
2591: a->i[0] = 0;
2592: for (i=1; i<= mbs; i++) {
2593: a->i[i] = a->i[i-1] + browlengths[i-1];
2594: a->ilen[i-1] = browlengths[i-1];
2595: }
2596: a->nz = 0;
2597: for (i=0; i<mbs; i++) a->nz += browlengths[i];
2599: /* read in nonzero values */
2600: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
2601: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
2602: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2604: /* set "a" and "j" values into matrix */
2605: nzcount = 0; jcount = 0;
2606: for (i=0; i<mbs; i++) {
2607: nzcountb = nzcount;
2608: nmask = 0;
2609: for (j=0; j<bs; j++) {
2610: kmax = rowlengths[i*bs+j];
2611: for (k=0; k<kmax; k++) {
2612: tmp = jj[nzcount++]/bs;
2613: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2614: }
2615: }
2616: /* sort the masked values */
2617: PetscSortInt(nmask,masked);
2619: /* set "j" values into matrix */
2620: maskcount = 1;
2621: for (j=0; j<nmask; j++) {
2622: a->j[jcount++] = masked[j];
2623: mask[masked[j]] = maskcount++;
2624: }
2625: /* set "a" values into matrix */
2626: ishift = bs2*a->i[i];
2627: for (j=0; j<bs; j++) {
2628: kmax = rowlengths[i*bs+j];
2629: for (k=0; k<kmax; k++) {
2630: tmp = jj[nzcountb]/bs ;
2631: block = mask[tmp] - 1;
2632: point = jj[nzcountb] - bs*tmp;
2633: idx = ishift + bs2*block + j + bs*point;
2634: a->a[idx] = (MatScalar)aa[nzcountb++];
2635: }
2636: }
2637: /* zero out the mask elements we set */
2638: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2639: }
2640: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2642: PetscFree(rowlengths);
2643: PetscFree(browlengths);
2644: PetscFree(aa);
2645: PetscFree(jj);
2646: PetscFree(mask);
2648: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2649: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2650: MatView_Private(B);
2652: *A = B;
2653: return(0);
2654: }
2658: /*@C
2659: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2660: compressed row) format. For good matrix assembly performance the
2661: user should preallocate the matrix storage by setting the parameter nz
2662: (or the array nnz). By setting these parameters accurately, performance
2663: during matrix assembly can be increased by more than a factor of 50.
2665: Collective on MPI_Comm
2667: Input Parameters:
2668: + comm - MPI communicator, set to PETSC_COMM_SELF
2669: . bs - size of block
2670: . m - number of rows
2671: . n - number of columns
2672: . nz - number of nonzero blocks per block row (same for all rows)
2673: - nnz - array containing the number of nonzero blocks in the various block rows
2674: (possibly different for each block row) or PETSC_NULL
2676: Output Parameter:
2677: . A - the matrix
2679: Options Database Keys:
2680: . -mat_no_unroll - uses code that does not unroll the loops in the
2681: block calculations (much slower)
2682: . -mat_block_size - size of the blocks to use
2684: Level: intermediate
2686: Notes:
2687: The number of rows and columns must be divisible by blocksize.
2689: If the nnz parameter is given then the nz parameter is ignored
2691: A nonzero block is any block that as 1 or more nonzeros in it
2693: The block AIJ format is fully compatible with standard Fortran 77
2694: storage. That is, the stored row and column indices can begin at
2695: either one (as in Fortran) or zero. See the users' manual for details.
2697: Specify the preallocated storage with either nz or nnz (not both).
2698: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2699: allocation. For additional details, see the users manual chapter on
2700: matrices.
2702: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2703: @*/
2704: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2705: {
2707:
2709: MatCreate(comm,A);
2710: MatSetSizes(*A,m,n,m,n);
2711: MatSetType(*A,MATSEQBAIJ);
2712: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
2713: return(0);
2714: }
2718: /*@C
2719: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2720: per row in the matrix. For good matrix assembly performance the
2721: user should preallocate the matrix storage by setting the parameter nz
2722: (or the array nnz). By setting these parameters accurately, performance
2723: during matrix assembly can be increased by more than a factor of 50.
2725: Collective on MPI_Comm
2727: Input Parameters:
2728: + A - the matrix
2729: . bs - size of block
2730: . nz - number of block nonzeros per block row (same for all rows)
2731: - nnz - array containing the number of block nonzeros in the various block rows
2732: (possibly different for each block row) or PETSC_NULL
2734: Options Database Keys:
2735: . -mat_no_unroll - uses code that does not unroll the loops in the
2736: block calculations (much slower)
2737: . -mat_block_size - size of the blocks to use
2739: Level: intermediate
2741: Notes:
2742: If the nnz parameter is given then the nz parameter is ignored
2744: The block AIJ format is fully compatible with standard Fortran 77
2745: storage. That is, the stored row and column indices can begin at
2746: either one (as in Fortran) or zero. See the users' manual for details.
2748: Specify the preallocated storage with either nz or nnz (not both).
2749: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2750: allocation. For additional details, see the users manual chapter on
2751: matrices.
2753: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2754: @*/
2755: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2756: {
2757: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2760: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
2761: if (f) {
2762: (*f)(B,bs,nz,nnz);
2763: }
2764: return(0);
2765: }
2769: /*@
2770: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2771: (upper triangular entries in CSR format) provided by the user.
2773: Collective on MPI_Comm
2775: Input Parameters:
2776: + comm - must be an MPI communicator of size 1
2777: . bs - size of block
2778: . m - number of rows
2779: . n - number of columns
2780: . i - row indices
2781: . j - column indices
2782: - a - matrix values
2784: Output Parameter:
2785: . mat - the matrix
2787: Level: intermediate
2789: Notes:
2790: The i, j, and a arrays are not copied by this routine, the user must free these arrays
2791: once the matrix is destroyed
2793: You cannot set new nonzero locations into this matrix, that will generate an error.
2795: The i and j indices are 0 based
2797: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2799: @*/
2800: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2801: {
2803: PetscInt ii;
2804: Mat_SeqBAIJ *baij;
2807: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2808: if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2809:
2810: MatCreate(comm,mat);
2811: MatSetSizes(*mat,m,n,m,n);
2812: MatSetType(*mat,MATSEQBAIJ);
2813: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
2814: baij = (Mat_SeqBAIJ*)(*mat)->data;
2815: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
2817: baij->i = i;
2818: baij->j = j;
2819: baij->a = a;
2820: baij->singlemalloc = PETSC_FALSE;
2821: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2822: baij->free_a = PETSC_FALSE;
2823: baij->free_ij = PETSC_FALSE;
2825: for (ii=0; ii<m; ii++) {
2826: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2827: #if defined(PETSC_USE_DEBUG)
2828: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2829: #endif
2830: }
2831: #if defined(PETSC_USE_DEBUG)
2832: for (ii=0; ii<baij->i[m]; ii++) {
2833: if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2834: if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2835: }
2836: #endif
2838: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
2839: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
2840: return(0);
2841: }