Actual source code: bdfact.c
1: /*$Id: bdfact.c,v 1.65 2001/08/07 03:02:53 balay Exp $*/
3: /* Block diagonal matrix format - factorization and triangular solves */
5: #include src/mat/impls/bdiag/seq/bdiag.h
6: #include src/vec/vecimpl.h
7: #include src/inline/ilu.h
9: int MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *B)
10: {
11: PetscTruth idn;
12: int ierr;
15: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
16: if (isrow) {
17: ISIdentity(isrow,&idn);
18: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
19: }
20: if (iscol) {
21: ISIdentity(iscol,&idn);
22: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
23: }
24: if (info && info->levels != 0) {
25: SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
26: }
27: MatConvert(A,MATSAME,B);
29: /* Must set to zero for repeated calls with different nonzero structure */
30: (*B)->factor = 0;
31: return(0);
32: }
34: int MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatILUInfo *info)
35: {
36: PetscTruth idn;
37: int ierr;
40: /* For now, no fill is allocated in symbolic factorization phase, so we
41: directly use the input matrix for numeric factorization. */
42: if (A->m != A->n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
43: if (isrow) {
44: ISIdentity(isrow,&idn);
45: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
46: }
47: if (iscol) {
48: ISIdentity(iscol,&idn);
49: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
50: }
51: if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
52: MatLUFactorNumeric(A,&A);
53: return(0);
54: }
56: /* --------------------------------------------------------------------------*/
57: int MatLUFactorNumeric_SeqBDiag_N(Mat A,Mat *B)
58: {
59: Mat C = *B;
60: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
61: int k,d,d2,dgk,elim_row,elim_col,bs = a->bs,knb,knb2,bs2 = bs*bs;
62: int dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock,ierr;
63: int *diag = a->diag, m = A->m,mainbd = a->mainbd,*dgptr,len,i;
64: PetscScalar **dv = a->diagv,*dd = dv[mainbd],*v_work;
65: PetscScalar *multiplier;
68: /* Copy input matrix to factored matrix if we've already factored the
69: matrix before AND the nonzero structure remains the same. This is done
70: in symbolic factorization the first time through, but there's no symbolic
71: factorization for successive calls with same matrix sparsity structure. */
72: if (C->factor == FACTOR_LU) {
73: for (i=0; i<a->nd; i++) {
74: len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
75: d = diag[i];
76: if (d > 0) {
77: PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
78: } else {
79: PetscMemcpy(dv[i],a1->diagv[i],len);
80: }
81: }
82: }
84: if (!a->pivot) {
85: PetscMalloc((m+1)*sizeof(int),&a->pivot);
86: PetscLogObjectMemory(C,m*sizeof(int));
87: }
88: ierr = PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
89: multiplier = v_work + bs;
90: ierr = PetscMalloc((mblock+nblock+1)*sizeof(int),&dgptr);
91: ierr = PetscMemzero(dgptr,(mblock+nblock)*sizeof(int));
92: for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
93: for (k=0; k<mblock; k++) { /* k = block pivot_row */
94: knb = k*bs; knb2 = knb*bs;
95: /* invert the diagonal block */
96: Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
97: for (d=mainbd-1; d>=0; d--) {
98: elim_row = k + diag[d];
99: if (elim_row < mblock) { /* sweep down */
100: /* dv[d][knb2]: test if entire block is zero? */
101: Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
102: for (d2=d+1; d2<nd; d2++) {
103: elim_col = elim_row - diag[d2];
104: if (elim_col >=0 && elim_col < nblock) {
105: dgk = k - elim_col;
106: if ((dnum = dgptr[dgk+mblock])) {
107: Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
108: &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
109: }
110: }
111: }
112: }
113: }
114: }
115: PetscFree(dgptr);
116: PetscFree(v_work);
117: C->factor = FACTOR_LU;
118: return(0);
119: }
121: int MatLUFactorNumeric_SeqBDiag_1(Mat A,Mat *B)
122: {
123: Mat C = *B;
124: Mat_SeqBDiag *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
125: int k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len,ierr;
126: int *diag = a->diag,n = A->n,m = A->m,mainbd = a->mainbd,*dgptr;
127: PetscScalar **dv = a->diagv,*dd = dv[mainbd],mult;
130: /* Copy input matrix to factored matrix if we've already factored the
131: matrix before AND the nonzero structure remains the same. This is done
132: in symbolic factorization the first time through, but there's no symbolic
133: factorization for successive calls with same matrix sparsity structure. */
134: if (C->factor == FACTOR_LU) {
135: for (i=0; i<nd; i++) {
136: len = a->bdlen[i] * sizeof(PetscScalar);
137: d = diag[i];
138: if (d > 0) {
139: PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
140: } else {
141: PetscMemcpy(dv[i],a1->diagv[i],len);
142: }
143: }
144: }
146: PetscMalloc((m+n+1)*sizeof(int),&dgptr);
147: ierr = PetscMemzero(dgptr,(m+n)*sizeof(int));
148: for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
149: for (k=0; k<m; k++) { /* k = pivot_row */
150: dd[k] = 1.0/dd[k];
151: for (d=mainbd-1; d>=0; d--) {
152: elim_row = k + diag[d];
153: if (elim_row < m) { /* sweep down */
154: if (dv[d][elim_row] != 0.0) {
155: dv[d][elim_row] *= dd[k];
156: mult = dv[d][elim_row];
157: for (d2=d+1; d2<nd; d2++) {
158: elim_col = elim_row - diag[d2];
159: dgk = k - elim_col;
160: if (elim_col >=0 && elim_col < n) {
161: if ((dnum = dgptr[dgk+m])) {
162: dv[d2][elim_row] -= mult * dv[dnum-1][k];
163: }
164: }
165: }
166: }
167: }
168: }
169: }
170: PetscFree(dgptr);
171: C->factor = FACTOR_LU;
172: return(0);
173: }
175: /* -----------------------------------------------------------------*/
177: int MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
178: {
179: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
180: int ierr,i,d,loc,mainbd = a->mainbd;
181: int n = A->n,m = A->m,*diag = a->diag,col;
182: PetscScalar *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;
185: VecGetArray(xx,&x);
186: VecGetArray(yy,&y);
187: /* forward solve the lower triangular part */
188: for (i=0; i<m; i++) {
189: sum = x[i];
190: for (d=0; d<mainbd; d++) {
191: loc = i - diag[d];
192: if (loc >= 0) sum -= dv[d][i] * y[loc];
193: }
194: y[i] = sum;
195: }
196: /* backward solve the upper triangular part */
197: for (i=m-1; i>=0; i--) {
198: sum = y[i];
199: for (d=mainbd+1; d<a->nd; d++) {
200: col = i - diag[d];
201: if (col < n) sum -= dv[d][i] * y[col];
202: }
203: y[i] = sum*dd[i];
204: }
205: VecRestoreArray(xx,&x);
206: VecRestoreArray(yy,&y);
207: PetscLogFlops(2*a->nz - A->n);
208: return(0);
209: }
211: int MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
212: {
213: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
214: int i,d,loc,mainbd = a->mainbd;
215: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
216: int ierr,m = A->m,*diag = a->diag,col;
217: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
218: PetscScalar w0,w1,sum0,sum1;
221: VecGetArray(xx,&x);
222: VecGetArray(yy,&y);
223: PetscMemcpy(y,x,m*sizeof(PetscScalar));
225: /* forward solve the lower triangular part */
226: if (mainbd != 0) {
227: inb = 0;
228: for (i=0; i<mblock; i++) {
229: sum0 = sum1 = 0.0;
230: for (d=0; d<mainbd; d++) {
231: loc = 2*(i - diag[d]);
232: if (loc >= 0) {
233: dvt = &dv[d][4*i];
234: w0 = y[loc]; w1 = y[loc+1];
235: sum0 += dvt[0]*w0 + dvt[2]*w1;
236: sum1 += dvt[1]*w0 + dvt[3]*w1;
237: }
238: }
239: y[inb] -= sum0; y[inb+1] -= sum1;
241: inb += 2;
242: }
243: }
244: /* backward solve the upper triangular part */
245: inb = 2*(mblock-1); inb2 = 2*inb;
246: for (i=mblock-1; i>=0; i--) {
247: sum0 = y[inb]; sum1 = y[inb+1];
248: for (d=mainbd+1; d<a->nd; d++) {
249: col = 2*(i - diag[d]);
250: if (col < 2*nblock) {
251: dvt = &dv[d][4*i];
252: w0 = y[col]; w1 = y[col+1];
253: sum0 -= dvt[0]*w0 + dvt[2]*w1;
254: sum1 -= dvt[1]*w0 + dvt[3]*w1;
255: }
256: }
257: dvt = dd+inb2;
258: y[inb] = dvt[0]*sum0 + dvt[2]*sum1;
259: y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
260: inb -= 2; inb2 -= 4;
261: }
262: VecRestoreArray(xx,&x);
263: VecRestoreArray(yy,&y);
264: PetscLogFlops(2*a->nz - A->n);
265: return(0);
266: }
268: int MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
269: {
270: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
271: int i,d,loc,mainbd = a->mainbd;
272: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
273: int ierr,m = A->m,*diag = a->diag,col;
274: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
275: PetscScalar w0,w1,w2,sum0,sum1,sum2;
278: VecGetArray(xx,&x);
279: VecGetArray(yy,&y);
280: PetscMemcpy(y,x,m*sizeof(PetscScalar));
282: /* forward solve the lower triangular part */
283: if (mainbd != 0) {
284: inb = 0;
285: for (i=0; i<mblock; i++) {
286: sum0 = sum1 = sum2 = 0.0;
287: for (d=0; d<mainbd; d++) {
288: loc = 3*(i - diag[d]);
289: if (loc >= 0) {
290: dvt = &dv[d][9*i];
291: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
292: sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
293: sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
294: sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
295: }
296: }
297: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
298: inb += 3;
299: }
300: }
301: /* backward solve the upper triangular part */
302: inb = 3*(mblock-1); inb2 = 3*inb;
303: for (i=mblock-1; i>=0; i--) {
304: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2];
305: for (d=mainbd+1; d<a->nd; d++) {
306: col = 3*(i - diag[d]);
307: if (col < 3*nblock) {
308: dvt = &dv[d][9*i];
309: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
310: sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
311: sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
312: sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
313: }
314: }
315: dvt = dd+inb2;
316: y[inb] = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
317: y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
318: y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
319: inb -= 3; inb2 -= 9;
320: }
321: VecRestoreArray(xx,&x);
322: VecRestoreArray(yy,&y);
323: PetscLogFlops(2*a->nz - A->n);
324: return(0);
325: }
327: int MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
328: {
329: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
330: int i,d,loc,mainbd = a->mainbd;
331: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
332: int ierr,m = A->m,*diag = a->diag,col;
333: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
334: PetscScalar w0,w1,w2,w3,sum0,sum1,sum2,sum3;
337: VecGetArray(xx,&x);
338: VecGetArray(yy,&y);
339: PetscMemcpy(y,x,m*sizeof(PetscScalar));
341: /* forward solve the lower triangular part */
342: if (mainbd != 0) {
343: inb = 0;
344: for (i=0; i<mblock; i++) {
345: sum0 = sum1 = sum2 = sum3 = 0.0;
346: for (d=0; d<mainbd; d++) {
347: loc = 4*(i - diag[d]);
348: if (loc >= 0) {
349: dvt = &dv[d][16*i];
350: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
351: sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
352: sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
353: sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
354: sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
355: }
356: }
357: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
358: inb += 4;
359: }
360: }
361: /* backward solve the upper triangular part */
362: inb = 4*(mblock-1); inb2 = 4*inb;
363: for (i=mblock-1; i>=0; i--) {
364: sum0 = y[inb]; sum1 = y[inb+1]; sum2 = y[inb+2]; sum3 = y[inb+3];
365: for (d=mainbd+1; d<a->nd; d++) {
366: col = 4*(i - diag[d]);
367: if (col < 4*nblock) {
368: dvt = &dv[d][16*i];
369: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
370: sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2 + dvt[12]*w3;
371: sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2 + dvt[13]*w3;
372: sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
373: sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
374: }
375: }
376: dvt = dd+inb2;
377: y[inb] = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
378: y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
379: y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
380: y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
381: inb -= 4; inb2 -= 16;
382: }
383: VecRestoreArray(xx,&x);
384: VecRestoreArray(yy,&y);
385: PetscLogFlops(2*a->nz - A->n);
386: return(0);
387: }
389: int MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
390: {
391: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
392: int i,d,loc,mainbd = a->mainbd;
393: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
394: int ierr,m = A->m,*diag = a->diag,col;
395: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
396: PetscScalar w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;
399: VecGetArray(xx,&x);
400: VecGetArray(yy,&y);
401: PetscMemcpy(y,x,m*sizeof(PetscScalar));
403: /* forward solve the lower triangular part */
404: if (mainbd != 0) {
405: inb = 0;
406: for (i=0; i<mblock; i++) {
407: sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
408: for (d=0; d<mainbd; d++) {
409: loc = 5*(i - diag[d]);
410: if (loc >= 0) {
411: dvt = &dv[d][25*i];
412: w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
413: sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
414: sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
415: sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
416: sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
417: sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
418: }
419: }
420: y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
421: y[inb+4] -= sum4;
422: inb += 5;
423: }
424: }
425: /* backward solve the upper triangular part */
426: inb = 5*(mblock-1); inb2 = 5*inb;
427: for (i=mblock-1; i>=0; i--) {
428: sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
429: for (d=mainbd+1; d<a->nd; d++) {
430: col = 5*(i - diag[d]);
431: if (col < 5*nblock) {
432: dvt = &dv[d][25*i];
433: w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
434: sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
435: sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
436: sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
437: sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
438: sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
439: }
440: }
441: dvt = dd+inb2;
442: y[inb] = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
443: + dvt[20]*sum4;
444: y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
445: + dvt[21]*sum4;
446: y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
447: + dvt[22]*sum4;
448: y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
449: + dvt[23]*sum4;
450: y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
451: + dvt[24]*sum4;
452: inb -= 5; inb2 -= 25;
453: }
454: VecRestoreArray(xx,&x);
455: VecRestoreArray(yy,&y);
456: PetscLogFlops(2*a->nz - A->n);
457: return(0);
458: }
460: int MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
461: {
462: Mat_SeqBDiag *a = (Mat_SeqBDiag*)A->data;
463: int i,d,loc,mainbd = a->mainbd;
464: int mblock = a->mblock,nblock = a->nblock,inb,inb2;
465: int ierr,bs = a->bs,m = A->m,*diag = a->diag,col,bs2 = bs*bs;
466: PetscScalar *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
467: PetscScalar work[25];
470: VecGetArray(xx,&x);
471: VecGetArray(yy,&y);
472: if (bs > 25) SETERRQ(PETSC_ERR_SUP,"Blocks must be smaller then 25");
473: PetscMemcpy(y,x,m*sizeof(PetscScalar));
475: /* forward solve the lower triangular part */
476: if (mainbd != 0) {
477: inb = 0;
478: for (i=0; i<mblock; i++) {
479: for (d=0; d<mainbd; d++) {
480: loc = i - diag[d];
481: if (loc >= 0) {
482: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
483: }
484: }
485: inb += bs;
486: }
487: }
488: /* backward solve the upper triangular part */
489: inb = bs*(mblock-1); inb2 = inb*bs;
490: for (i=mblock-1; i>=0; i--) {
491: for (d=mainbd+1; d<a->nd; d++) {
492: col = i - diag[d];
493: if (col < nblock) {
494: Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
495: }
496: }
497: Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
498: PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
499: inb -= bs; inb2 -= bs2;
500: }
501: VecRestoreArray(xx,&x);
502: VecRestoreArray(yy,&y);
503: PetscLogFlops(2*a->nz - A->n);
504: return(0);
505: }