Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include src/mat/impls/maij/maij.h
21: #include src/mat/utils/freespace.h
22: #include vecimpl.h
26: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: return(0);
129: }
131: /*MC
132: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
133: multicomponent problems, interpolating or restricting each component the same way independently.
134: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
136: Operations provided:
137: . MatMult
138: . MatMultTranspose
139: . MatMultAdd
140: . MatMultTransposeAdd
142: Level: advanced
144: .seealso: MatCreateSeqDense
145: M*/
150: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
151: {
153: Mat_MPIMAIJ *b;
156: PetscNew(Mat_MPIMAIJ,&b);
157: A->data = (void*)b;
158: PetscMemzero(A->ops,sizeof(struct _MatOps));
159: A->factor = 0;
160: A->mapping = 0;
162: b->AIJ = 0;
163: b->dof = 0;
164: b->OAIJ = 0;
165: b->ctx = 0;
166: b->w = 0;
167: return(0);
168: }
171: /* --------------------------------------------------------------------------------------*/
174: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
175: {
176: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
177: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
178: PetscScalar *x,*y,*v,sum1, sum2;
180: PetscInt m = b->AIJ->m,*idx,*ii;
181: PetscInt n,i,jrow,j;
184: VecGetArray(xx,&x);
185: VecGetArray(yy,&y);
186: idx = a->j;
187: v = a->a;
188: ii = a->i;
190: for (i=0; i<m; i++) {
191: jrow = ii[i];
192: n = ii[i+1] - jrow;
193: sum1 = 0.0;
194: sum2 = 0.0;
195: for (j=0; j<n; j++) {
196: sum1 += v[jrow]*x[2*idx[jrow]];
197: sum2 += v[jrow]*x[2*idx[jrow]+1];
198: jrow++;
199: }
200: y[2*i] = sum1;
201: y[2*i+1] = sum2;
202: }
204: PetscLogFlops(4*a->nz - 2*m);
205: VecRestoreArray(xx,&x);
206: VecRestoreArray(yy,&y);
207: return(0);
208: }
212: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
213: {
214: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
215: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
216: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
218: PetscInt m = b->AIJ->m,n,i,*idx;
221: VecSet(yy,zero);
222: VecGetArray(xx,&x);
223: VecGetArray(yy,&y);
224:
225: for (i=0; i<m; i++) {
226: idx = a->j + a->i[i] ;
227: v = a->a + a->i[i] ;
228: n = a->i[i+1] - a->i[i];
229: alpha1 = x[2*i];
230: alpha2 = x[2*i+1];
231: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
232: }
233: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
234: VecRestoreArray(xx,&x);
235: VecRestoreArray(yy,&y);
236: return(0);
237: }
241: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
242: {
243: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
244: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
245: PetscScalar *x,*y,*v,sum1, sum2;
247: PetscInt m = b->AIJ->m,*idx,*ii;
248: PetscInt n,i,jrow,j;
251: if (yy != zz) {VecCopy(yy,zz);}
252: VecGetArray(xx,&x);
253: VecGetArray(zz,&y);
254: idx = a->j;
255: v = a->a;
256: ii = a->i;
258: for (i=0; i<m; i++) {
259: jrow = ii[i];
260: n = ii[i+1] - jrow;
261: sum1 = 0.0;
262: sum2 = 0.0;
263: for (j=0; j<n; j++) {
264: sum1 += v[jrow]*x[2*idx[jrow]];
265: sum2 += v[jrow]*x[2*idx[jrow]+1];
266: jrow++;
267: }
268: y[2*i] += sum1;
269: y[2*i+1] += sum2;
270: }
272: PetscLogFlops(4*a->nz - 2*m);
273: VecRestoreArray(xx,&x);
274: VecRestoreArray(zz,&y);
275: return(0);
276: }
279: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
280: {
281: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
282: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
283: PetscScalar *x,*y,*v,alpha1,alpha2;
285: PetscInt m = b->AIJ->m,n,i,*idx;
288: if (yy != zz) {VecCopy(yy,zz);}
289: VecGetArray(xx,&x);
290: VecGetArray(zz,&y);
291:
292: for (i=0; i<m; i++) {
293: idx = a->j + a->i[i] ;
294: v = a->a + a->i[i] ;
295: n = a->i[i+1] - a->i[i];
296: alpha1 = x[2*i];
297: alpha2 = x[2*i+1];
298: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
299: }
300: PetscLogFlops(4*a->nz - 2*b->AIJ->n);
301: VecRestoreArray(xx,&x);
302: VecRestoreArray(zz,&y);
303: return(0);
304: }
305: /* --------------------------------------------------------------------------------------*/
308: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309: {
310: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
311: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
312: PetscScalar *x,*y,*v,sum1, sum2, sum3;
314: PetscInt m = b->AIJ->m,*idx,*ii;
315: PetscInt n,i,jrow,j;
318: VecGetArray(xx,&x);
319: VecGetArray(yy,&y);
320: idx = a->j;
321: v = a->a;
322: ii = a->i;
324: for (i=0; i<m; i++) {
325: jrow = ii[i];
326: n = ii[i+1] - jrow;
327: sum1 = 0.0;
328: sum2 = 0.0;
329: sum3 = 0.0;
330: for (j=0; j<n; j++) {
331: sum1 += v[jrow]*x[3*idx[jrow]];
332: sum2 += v[jrow]*x[3*idx[jrow]+1];
333: sum3 += v[jrow]*x[3*idx[jrow]+2];
334: jrow++;
335: }
336: y[3*i] = sum1;
337: y[3*i+1] = sum2;
338: y[3*i+2] = sum3;
339: }
341: PetscLogFlops(6*a->nz - 3*m);
342: VecRestoreArray(xx,&x);
343: VecRestoreArray(yy,&y);
344: return(0);
345: }
349: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
350: {
351: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
352: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
353: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
355: PetscInt m = b->AIJ->m,n,i,*idx;
358: VecSet(yy,zero);
359: VecGetArray(xx,&x);
360: VecGetArray(yy,&y);
361:
362: for (i=0; i<m; i++) {
363: idx = a->j + a->i[i];
364: v = a->a + a->i[i];
365: n = a->i[i+1] - a->i[i];
366: alpha1 = x[3*i];
367: alpha2 = x[3*i+1];
368: alpha3 = x[3*i+2];
369: while (n-->0) {
370: y[3*(*idx)] += alpha1*(*v);
371: y[3*(*idx)+1] += alpha2*(*v);
372: y[3*(*idx)+2] += alpha3*(*v);
373: idx++; v++;
374: }
375: }
376: PetscLogFlops(6*a->nz - 3*b->AIJ->n);
377: VecRestoreArray(xx,&x);
378: VecRestoreArray(yy,&y);
379: return(0);
380: }
384: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385: {
386: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
387: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
388: PetscScalar *x,*y,*v,sum1, sum2, sum3;
390: PetscInt m = b->AIJ->m,*idx,*ii;
391: PetscInt n,i,jrow,j;
394: if (yy != zz) {VecCopy(yy,zz);}
395: VecGetArray(xx,&x);
396: VecGetArray(zz,&y);
397: idx = a->j;
398: v = a->a;
399: ii = a->i;
401: for (i=0; i<m; i++) {
402: jrow = ii[i];
403: n = ii[i+1] - jrow;
404: sum1 = 0.0;
405: sum2 = 0.0;
406: sum3 = 0.0;
407: for (j=0; j<n; j++) {
408: sum1 += v[jrow]*x[3*idx[jrow]];
409: sum2 += v[jrow]*x[3*idx[jrow]+1];
410: sum3 += v[jrow]*x[3*idx[jrow]+2];
411: jrow++;
412: }
413: y[3*i] += sum1;
414: y[3*i+1] += sum2;
415: y[3*i+2] += sum3;
416: }
418: PetscLogFlops(6*a->nz);
419: VecRestoreArray(xx,&x);
420: VecRestoreArray(zz,&y);
421: return(0);
422: }
425: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
426: {
427: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
428: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
429: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
431: PetscInt m = b->AIJ->m,n,i,*idx;
434: if (yy != zz) {VecCopy(yy,zz);}
435: VecGetArray(xx,&x);
436: VecGetArray(zz,&y);
437: for (i=0; i<m; i++) {
438: idx = a->j + a->i[i] ;
439: v = a->a + a->i[i] ;
440: n = a->i[i+1] - a->i[i];
441: alpha1 = x[3*i];
442: alpha2 = x[3*i+1];
443: alpha3 = x[3*i+2];
444: while (n-->0) {
445: y[3*(*idx)] += alpha1*(*v);
446: y[3*(*idx)+1] += alpha2*(*v);
447: y[3*(*idx)+2] += alpha3*(*v);
448: idx++; v++;
449: }
450: }
451: PetscLogFlops(6*a->nz);
452: VecRestoreArray(xx,&x);
453: VecRestoreArray(zz,&y);
454: return(0);
455: }
457: /* ------------------------------------------------------------------------------*/
460: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
461: {
462: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
463: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
464: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
466: PetscInt m = b->AIJ->m,*idx,*ii;
467: PetscInt n,i,jrow,j;
470: VecGetArray(xx,&x);
471: VecGetArray(yy,&y);
472: idx = a->j;
473: v = a->a;
474: ii = a->i;
476: for (i=0; i<m; i++) {
477: jrow = ii[i];
478: n = ii[i+1] - jrow;
479: sum1 = 0.0;
480: sum2 = 0.0;
481: sum3 = 0.0;
482: sum4 = 0.0;
483: for (j=0; j<n; j++) {
484: sum1 += v[jrow]*x[4*idx[jrow]];
485: sum2 += v[jrow]*x[4*idx[jrow]+1];
486: sum3 += v[jrow]*x[4*idx[jrow]+2];
487: sum4 += v[jrow]*x[4*idx[jrow]+3];
488: jrow++;
489: }
490: y[4*i] = sum1;
491: y[4*i+1] = sum2;
492: y[4*i+2] = sum3;
493: y[4*i+3] = sum4;
494: }
496: PetscLogFlops(8*a->nz - 4*m);
497: VecRestoreArray(xx,&x);
498: VecRestoreArray(yy,&y);
499: return(0);
500: }
504: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
505: {
506: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
507: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
508: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
510: PetscInt m = b->AIJ->m,n,i,*idx;
513: VecSet(yy,zero);
514: VecGetArray(xx,&x);
515: VecGetArray(yy,&y);
516: for (i=0; i<m; i++) {
517: idx = a->j + a->i[i] ;
518: v = a->a + a->i[i] ;
519: n = a->i[i+1] - a->i[i];
520: alpha1 = x[4*i];
521: alpha2 = x[4*i+1];
522: alpha3 = x[4*i+2];
523: alpha4 = x[4*i+3];
524: while (n-->0) {
525: y[4*(*idx)] += alpha1*(*v);
526: y[4*(*idx)+1] += alpha2*(*v);
527: y[4*(*idx)+2] += alpha3*(*v);
528: y[4*(*idx)+3] += alpha4*(*v);
529: idx++; v++;
530: }
531: }
532: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
533: VecRestoreArray(xx,&x);
534: VecRestoreArray(yy,&y);
535: return(0);
536: }
540: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
541: {
542: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
543: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
544: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
546: PetscInt m = b->AIJ->m,*idx,*ii;
547: PetscInt n,i,jrow,j;
550: if (yy != zz) {VecCopy(yy,zz);}
551: VecGetArray(xx,&x);
552: VecGetArray(zz,&y);
553: idx = a->j;
554: v = a->a;
555: ii = a->i;
557: for (i=0; i<m; i++) {
558: jrow = ii[i];
559: n = ii[i+1] - jrow;
560: sum1 = 0.0;
561: sum2 = 0.0;
562: sum3 = 0.0;
563: sum4 = 0.0;
564: for (j=0; j<n; j++) {
565: sum1 += v[jrow]*x[4*idx[jrow]];
566: sum2 += v[jrow]*x[4*idx[jrow]+1];
567: sum3 += v[jrow]*x[4*idx[jrow]+2];
568: sum4 += v[jrow]*x[4*idx[jrow]+3];
569: jrow++;
570: }
571: y[4*i] += sum1;
572: y[4*i+1] += sum2;
573: y[4*i+2] += sum3;
574: y[4*i+3] += sum4;
575: }
577: PetscLogFlops(8*a->nz - 4*m);
578: VecRestoreArray(xx,&x);
579: VecRestoreArray(zz,&y);
580: return(0);
581: }
584: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
585: {
586: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
587: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
588: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
590: PetscInt m = b->AIJ->m,n,i,*idx;
593: if (yy != zz) {VecCopy(yy,zz);}
594: VecGetArray(xx,&x);
595: VecGetArray(zz,&y);
596:
597: for (i=0; i<m; i++) {
598: idx = a->j + a->i[i] ;
599: v = a->a + a->i[i] ;
600: n = a->i[i+1] - a->i[i];
601: alpha1 = x[4*i];
602: alpha2 = x[4*i+1];
603: alpha3 = x[4*i+2];
604: alpha4 = x[4*i+3];
605: while (n-->0) {
606: y[4*(*idx)] += alpha1*(*v);
607: y[4*(*idx)+1] += alpha2*(*v);
608: y[4*(*idx)+2] += alpha3*(*v);
609: y[4*(*idx)+3] += alpha4*(*v);
610: idx++; v++;
611: }
612: }
613: PetscLogFlops(8*a->nz - 4*b->AIJ->n);
614: VecRestoreArray(xx,&x);
615: VecRestoreArray(zz,&y);
616: return(0);
617: }
618: /* ------------------------------------------------------------------------------*/
622: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
623: {
624: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
625: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
626: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
628: PetscInt m = b->AIJ->m,*idx,*ii;
629: PetscInt n,i,jrow,j;
632: VecGetArray(xx,&x);
633: VecGetArray(yy,&y);
634: idx = a->j;
635: v = a->a;
636: ii = a->i;
638: for (i=0; i<m; i++) {
639: jrow = ii[i];
640: n = ii[i+1] - jrow;
641: sum1 = 0.0;
642: sum2 = 0.0;
643: sum3 = 0.0;
644: sum4 = 0.0;
645: sum5 = 0.0;
646: for (j=0; j<n; j++) {
647: sum1 += v[jrow]*x[5*idx[jrow]];
648: sum2 += v[jrow]*x[5*idx[jrow]+1];
649: sum3 += v[jrow]*x[5*idx[jrow]+2];
650: sum4 += v[jrow]*x[5*idx[jrow]+3];
651: sum5 += v[jrow]*x[5*idx[jrow]+4];
652: jrow++;
653: }
654: y[5*i] = sum1;
655: y[5*i+1] = sum2;
656: y[5*i+2] = sum3;
657: y[5*i+3] = sum4;
658: y[5*i+4] = sum5;
659: }
661: PetscLogFlops(10*a->nz - 5*m);
662: VecRestoreArray(xx,&x);
663: VecRestoreArray(yy,&y);
664: return(0);
665: }
669: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
670: {
671: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
672: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
673: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
675: PetscInt m = b->AIJ->m,n,i,*idx;
678: VecSet(yy,zero);
679: VecGetArray(xx,&x);
680: VecGetArray(yy,&y);
681:
682: for (i=0; i<m; i++) {
683: idx = a->j + a->i[i] ;
684: v = a->a + a->i[i] ;
685: n = a->i[i+1] - a->i[i];
686: alpha1 = x[5*i];
687: alpha2 = x[5*i+1];
688: alpha3 = x[5*i+2];
689: alpha4 = x[5*i+3];
690: alpha5 = x[5*i+4];
691: while (n-->0) {
692: y[5*(*idx)] += alpha1*(*v);
693: y[5*(*idx)+1] += alpha2*(*v);
694: y[5*(*idx)+2] += alpha3*(*v);
695: y[5*(*idx)+3] += alpha4*(*v);
696: y[5*(*idx)+4] += alpha5*(*v);
697: idx++; v++;
698: }
699: }
700: PetscLogFlops(10*a->nz - 5*b->AIJ->n);
701: VecRestoreArray(xx,&x);
702: VecRestoreArray(yy,&y);
703: return(0);
704: }
708: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
709: {
710: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
711: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
712: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
714: PetscInt m = b->AIJ->m,*idx,*ii;
715: PetscInt n,i,jrow,j;
718: if (yy != zz) {VecCopy(yy,zz);}
719: VecGetArray(xx,&x);
720: VecGetArray(zz,&y);
721: idx = a->j;
722: v = a->a;
723: ii = a->i;
725: for (i=0; i<m; i++) {
726: jrow = ii[i];
727: n = ii[i+1] - jrow;
728: sum1 = 0.0;
729: sum2 = 0.0;
730: sum3 = 0.0;
731: sum4 = 0.0;
732: sum5 = 0.0;
733: for (j=0; j<n; j++) {
734: sum1 += v[jrow]*x[5*idx[jrow]];
735: sum2 += v[jrow]*x[5*idx[jrow]+1];
736: sum3 += v[jrow]*x[5*idx[jrow]+2];
737: sum4 += v[jrow]*x[5*idx[jrow]+3];
738: sum5 += v[jrow]*x[5*idx[jrow]+4];
739: jrow++;
740: }
741: y[5*i] += sum1;
742: y[5*i+1] += sum2;
743: y[5*i+2] += sum3;
744: y[5*i+3] += sum4;
745: y[5*i+4] += sum5;
746: }
748: PetscLogFlops(10*a->nz);
749: VecRestoreArray(xx,&x);
750: VecRestoreArray(zz,&y);
751: return(0);
752: }
756: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
757: {
758: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
759: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
760: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
762: PetscInt m = b->AIJ->m,n,i,*idx;
765: if (yy != zz) {VecCopy(yy,zz);}
766: VecGetArray(xx,&x);
767: VecGetArray(zz,&y);
768:
769: for (i=0; i<m; i++) {
770: idx = a->j + a->i[i] ;
771: v = a->a + a->i[i] ;
772: n = a->i[i+1] - a->i[i];
773: alpha1 = x[5*i];
774: alpha2 = x[5*i+1];
775: alpha3 = x[5*i+2];
776: alpha4 = x[5*i+3];
777: alpha5 = x[5*i+4];
778: while (n-->0) {
779: y[5*(*idx)] += alpha1*(*v);
780: y[5*(*idx)+1] += alpha2*(*v);
781: y[5*(*idx)+2] += alpha3*(*v);
782: y[5*(*idx)+3] += alpha4*(*v);
783: y[5*(*idx)+4] += alpha5*(*v);
784: idx++; v++;
785: }
786: }
787: PetscLogFlops(10*a->nz);
788: VecRestoreArray(xx,&x);
789: VecRestoreArray(zz,&y);
790: return(0);
791: }
793: /* ------------------------------------------------------------------------------*/
796: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
797: {
798: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
799: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
800: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
802: PetscInt m = b->AIJ->m,*idx,*ii;
803: PetscInt n,i,jrow,j;
806: VecGetArray(xx,&x);
807: VecGetArray(yy,&y);
808: idx = a->j;
809: v = a->a;
810: ii = a->i;
812: for (i=0; i<m; i++) {
813: jrow = ii[i];
814: n = ii[i+1] - jrow;
815: sum1 = 0.0;
816: sum2 = 0.0;
817: sum3 = 0.0;
818: sum4 = 0.0;
819: sum5 = 0.0;
820: sum6 = 0.0;
821: for (j=0; j<n; j++) {
822: sum1 += v[jrow]*x[6*idx[jrow]];
823: sum2 += v[jrow]*x[6*idx[jrow]+1];
824: sum3 += v[jrow]*x[6*idx[jrow]+2];
825: sum4 += v[jrow]*x[6*idx[jrow]+3];
826: sum5 += v[jrow]*x[6*idx[jrow]+4];
827: sum6 += v[jrow]*x[6*idx[jrow]+5];
828: jrow++;
829: }
830: y[6*i] = sum1;
831: y[6*i+1] = sum2;
832: y[6*i+2] = sum3;
833: y[6*i+3] = sum4;
834: y[6*i+4] = sum5;
835: y[6*i+5] = sum6;
836: }
838: PetscLogFlops(12*a->nz - 6*m);
839: VecRestoreArray(xx,&x);
840: VecRestoreArray(yy,&y);
841: return(0);
842: }
846: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
847: {
848: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
849: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
850: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
852: PetscInt m = b->AIJ->m,n,i,*idx;
855: VecSet(yy,zero);
856: VecGetArray(xx,&x);
857: VecGetArray(yy,&y);
859: for (i=0; i<m; i++) {
860: idx = a->j + a->i[i] ;
861: v = a->a + a->i[i] ;
862: n = a->i[i+1] - a->i[i];
863: alpha1 = x[6*i];
864: alpha2 = x[6*i+1];
865: alpha3 = x[6*i+2];
866: alpha4 = x[6*i+3];
867: alpha5 = x[6*i+4];
868: alpha6 = x[6*i+5];
869: while (n-->0) {
870: y[6*(*idx)] += alpha1*(*v);
871: y[6*(*idx)+1] += alpha2*(*v);
872: y[6*(*idx)+2] += alpha3*(*v);
873: y[6*(*idx)+3] += alpha4*(*v);
874: y[6*(*idx)+4] += alpha5*(*v);
875: y[6*(*idx)+5] += alpha6*(*v);
876: idx++; v++;
877: }
878: }
879: PetscLogFlops(12*a->nz - 6*b->AIJ->n);
880: VecRestoreArray(xx,&x);
881: VecRestoreArray(yy,&y);
882: return(0);
883: }
887: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
888: {
889: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
890: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
891: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
893: PetscInt m = b->AIJ->m,*idx,*ii;
894: PetscInt n,i,jrow,j;
897: if (yy != zz) {VecCopy(yy,zz);}
898: VecGetArray(xx,&x);
899: VecGetArray(zz,&y);
900: idx = a->j;
901: v = a->a;
902: ii = a->i;
904: for (i=0; i<m; i++) {
905: jrow = ii[i];
906: n = ii[i+1] - jrow;
907: sum1 = 0.0;
908: sum2 = 0.0;
909: sum3 = 0.0;
910: sum4 = 0.0;
911: sum5 = 0.0;
912: sum6 = 0.0;
913: for (j=0; j<n; j++) {
914: sum1 += v[jrow]*x[6*idx[jrow]];
915: sum2 += v[jrow]*x[6*idx[jrow]+1];
916: sum3 += v[jrow]*x[6*idx[jrow]+2];
917: sum4 += v[jrow]*x[6*idx[jrow]+3];
918: sum5 += v[jrow]*x[6*idx[jrow]+4];
919: sum6 += v[jrow]*x[6*idx[jrow]+5];
920: jrow++;
921: }
922: y[6*i] += sum1;
923: y[6*i+1] += sum2;
924: y[6*i+2] += sum3;
925: y[6*i+3] += sum4;
926: y[6*i+4] += sum5;
927: y[6*i+5] += sum6;
928: }
930: PetscLogFlops(12*a->nz);
931: VecRestoreArray(xx,&x);
932: VecRestoreArray(zz,&y);
933: return(0);
934: }
938: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
939: {
940: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
941: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
942: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
944: PetscInt m = b->AIJ->m,n,i,*idx;
947: if (yy != zz) {VecCopy(yy,zz);}
948: VecGetArray(xx,&x);
949: VecGetArray(zz,&y);
950:
951: for (i=0; i<m; i++) {
952: idx = a->j + a->i[i] ;
953: v = a->a + a->i[i] ;
954: n = a->i[i+1] - a->i[i];
955: alpha1 = x[6*i];
956: alpha2 = x[6*i+1];
957: alpha3 = x[6*i+2];
958: alpha4 = x[6*i+3];
959: alpha5 = x[6*i+4];
960: alpha6 = x[6*i+5];
961: while (n-->0) {
962: y[6*(*idx)] += alpha1*(*v);
963: y[6*(*idx)+1] += alpha2*(*v);
964: y[6*(*idx)+2] += alpha3*(*v);
965: y[6*(*idx)+3] += alpha4*(*v);
966: y[6*(*idx)+4] += alpha5*(*v);
967: y[6*(*idx)+5] += alpha6*(*v);
968: idx++; v++;
969: }
970: }
971: PetscLogFlops(12*a->nz);
972: VecRestoreArray(xx,&x);
973: VecRestoreArray(zz,&y);
974: return(0);
975: }
977: /* ------------------------------------------------------------------------------*/
980: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
981: {
982: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
983: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
984: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
986: PetscInt m = b->AIJ->m,*idx,*ii;
987: PetscInt n,i,jrow,j;
990: VecGetArray(xx,&x);
991: VecGetArray(yy,&y);
992: idx = a->j;
993: v = a->a;
994: ii = a->i;
996: for (i=0; i<m; i++) {
997: jrow = ii[i];
998: n = ii[i+1] - jrow;
999: sum1 = 0.0;
1000: sum2 = 0.0;
1001: sum3 = 0.0;
1002: sum4 = 0.0;
1003: sum5 = 0.0;
1004: sum6 = 0.0;
1005: sum7 = 0.0;
1006: for (j=0; j<n; j++) {
1007: sum1 += v[jrow]*x[7*idx[jrow]];
1008: sum2 += v[jrow]*x[7*idx[jrow]+1];
1009: sum3 += v[jrow]*x[7*idx[jrow]+2];
1010: sum4 += v[jrow]*x[7*idx[jrow]+3];
1011: sum5 += v[jrow]*x[7*idx[jrow]+4];
1012: sum6 += v[jrow]*x[7*idx[jrow]+5];
1013: sum7 += v[jrow]*x[7*idx[jrow]+6];
1014: jrow++;
1015: }
1016: y[7*i] = sum1;
1017: y[7*i+1] = sum2;
1018: y[7*i+2] = sum3;
1019: y[7*i+3] = sum4;
1020: y[7*i+4] = sum5;
1021: y[7*i+5] = sum6;
1022: y[7*i+6] = sum7;
1023: }
1025: PetscLogFlops(14*a->nz - 7*m);
1026: VecRestoreArray(xx,&x);
1027: VecRestoreArray(yy,&y);
1028: return(0);
1029: }
1033: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1034: {
1035: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1036: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1037: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1039: PetscInt m = b->AIJ->m,n,i,*idx;
1042: VecSet(yy,zero);
1043: VecGetArray(xx,&x);
1044: VecGetArray(yy,&y);
1046: for (i=0; i<m; i++) {
1047: idx = a->j + a->i[i] ;
1048: v = a->a + a->i[i] ;
1049: n = a->i[i+1] - a->i[i];
1050: alpha1 = x[7*i];
1051: alpha2 = x[7*i+1];
1052: alpha3 = x[7*i+2];
1053: alpha4 = x[7*i+3];
1054: alpha5 = x[7*i+4];
1055: alpha6 = x[7*i+5];
1056: alpha7 = x[7*i+6];
1057: while (n-->0) {
1058: y[7*(*idx)] += alpha1*(*v);
1059: y[7*(*idx)+1] += alpha2*(*v);
1060: y[7*(*idx)+2] += alpha3*(*v);
1061: y[7*(*idx)+3] += alpha4*(*v);
1062: y[7*(*idx)+4] += alpha5*(*v);
1063: y[7*(*idx)+5] += alpha6*(*v);
1064: y[7*(*idx)+6] += alpha7*(*v);
1065: idx++; v++;
1066: }
1067: }
1068: PetscLogFlops(14*a->nz - 7*b->AIJ->n);
1069: VecRestoreArray(xx,&x);
1070: VecRestoreArray(yy,&y);
1071: return(0);
1072: }
1076: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1077: {
1078: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1079: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1080: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1082: PetscInt m = b->AIJ->m,*idx,*ii;
1083: PetscInt n,i,jrow,j;
1086: if (yy != zz) {VecCopy(yy,zz);}
1087: VecGetArray(xx,&x);
1088: VecGetArray(zz,&y);
1089: idx = a->j;
1090: v = a->a;
1091: ii = a->i;
1093: for (i=0; i<m; i++) {
1094: jrow = ii[i];
1095: n = ii[i+1] - jrow;
1096: sum1 = 0.0;
1097: sum2 = 0.0;
1098: sum3 = 0.0;
1099: sum4 = 0.0;
1100: sum5 = 0.0;
1101: sum6 = 0.0;
1102: sum7 = 0.0;
1103: for (j=0; j<n; j++) {
1104: sum1 += v[jrow]*x[7*idx[jrow]];
1105: sum2 += v[jrow]*x[7*idx[jrow]+1];
1106: sum3 += v[jrow]*x[7*idx[jrow]+2];
1107: sum4 += v[jrow]*x[7*idx[jrow]+3];
1108: sum5 += v[jrow]*x[7*idx[jrow]+4];
1109: sum6 += v[jrow]*x[7*idx[jrow]+5];
1110: sum7 += v[jrow]*x[7*idx[jrow]+6];
1111: jrow++;
1112: }
1113: y[7*i] += sum1;
1114: y[7*i+1] += sum2;
1115: y[7*i+2] += sum3;
1116: y[7*i+3] += sum4;
1117: y[7*i+4] += sum5;
1118: y[7*i+5] += sum6;
1119: y[7*i+6] += sum7;
1120: }
1122: PetscLogFlops(14*a->nz);
1123: VecRestoreArray(xx,&x);
1124: VecRestoreArray(zz,&y);
1125: return(0);
1126: }
1130: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1131: {
1132: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1133: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1134: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1136: PetscInt m = b->AIJ->m,n,i,*idx;
1139: if (yy != zz) {VecCopy(yy,zz);}
1140: VecGetArray(xx,&x);
1141: VecGetArray(zz,&y);
1142: for (i=0; i<m; i++) {
1143: idx = a->j + a->i[i] ;
1144: v = a->a + a->i[i] ;
1145: n = a->i[i+1] - a->i[i];
1146: alpha1 = x[7*i];
1147: alpha2 = x[7*i+1];
1148: alpha3 = x[7*i+2];
1149: alpha4 = x[7*i+3];
1150: alpha5 = x[7*i+4];
1151: alpha6 = x[7*i+5];
1152: alpha7 = x[7*i+6];
1153: while (n-->0) {
1154: y[7*(*idx)] += alpha1*(*v);
1155: y[7*(*idx)+1] += alpha2*(*v);
1156: y[7*(*idx)+2] += alpha3*(*v);
1157: y[7*(*idx)+3] += alpha4*(*v);
1158: y[7*(*idx)+4] += alpha5*(*v);
1159: y[7*(*idx)+5] += alpha6*(*v);
1160: y[7*(*idx)+6] += alpha7*(*v);
1161: idx++; v++;
1162: }
1163: }
1164: PetscLogFlops(14*a->nz);
1165: VecRestoreArray(xx,&x);
1166: VecRestoreArray(zz,&y);
1167: return(0);
1168: }
1172: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1173: {
1174: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1175: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1176: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1178: PetscInt m = b->AIJ->m,*idx,*ii;
1179: PetscInt n,i,jrow,j;
1182: VecGetArray(xx,&x);
1183: VecGetArray(yy,&y);
1184: idx = a->j;
1185: v = a->a;
1186: ii = a->i;
1188: for (i=0; i<m; i++) {
1189: jrow = ii[i];
1190: n = ii[i+1] - jrow;
1191: sum1 = 0.0;
1192: sum2 = 0.0;
1193: sum3 = 0.0;
1194: sum4 = 0.0;
1195: sum5 = 0.0;
1196: sum6 = 0.0;
1197: sum7 = 0.0;
1198: sum8 = 0.0;
1199: for (j=0; j<n; j++) {
1200: sum1 += v[jrow]*x[8*idx[jrow]];
1201: sum2 += v[jrow]*x[8*idx[jrow]+1];
1202: sum3 += v[jrow]*x[8*idx[jrow]+2];
1203: sum4 += v[jrow]*x[8*idx[jrow]+3];
1204: sum5 += v[jrow]*x[8*idx[jrow]+4];
1205: sum6 += v[jrow]*x[8*idx[jrow]+5];
1206: sum7 += v[jrow]*x[8*idx[jrow]+6];
1207: sum8 += v[jrow]*x[8*idx[jrow]+7];
1208: jrow++;
1209: }
1210: y[8*i] = sum1;
1211: y[8*i+1] = sum2;
1212: y[8*i+2] = sum3;
1213: y[8*i+3] = sum4;
1214: y[8*i+4] = sum5;
1215: y[8*i+5] = sum6;
1216: y[8*i+6] = sum7;
1217: y[8*i+7] = sum8;
1218: }
1220: PetscLogFlops(16*a->nz - 8*m);
1221: VecRestoreArray(xx,&x);
1222: VecRestoreArray(yy,&y);
1223: return(0);
1224: }
1228: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1229: {
1230: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1231: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1232: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1234: PetscInt m = b->AIJ->m,n,i,*idx;
1237: VecSet(yy,zero);
1238: VecGetArray(xx,&x);
1239: VecGetArray(yy,&y);
1241: for (i=0; i<m; i++) {
1242: idx = a->j + a->i[i] ;
1243: v = a->a + a->i[i] ;
1244: n = a->i[i+1] - a->i[i];
1245: alpha1 = x[8*i];
1246: alpha2 = x[8*i+1];
1247: alpha3 = x[8*i+2];
1248: alpha4 = x[8*i+3];
1249: alpha5 = x[8*i+4];
1250: alpha6 = x[8*i+5];
1251: alpha7 = x[8*i+6];
1252: alpha8 = x[8*i+7];
1253: while (n-->0) {
1254: y[8*(*idx)] += alpha1*(*v);
1255: y[8*(*idx)+1] += alpha2*(*v);
1256: y[8*(*idx)+2] += alpha3*(*v);
1257: y[8*(*idx)+3] += alpha4*(*v);
1258: y[8*(*idx)+4] += alpha5*(*v);
1259: y[8*(*idx)+5] += alpha6*(*v);
1260: y[8*(*idx)+6] += alpha7*(*v);
1261: y[8*(*idx)+7] += alpha8*(*v);
1262: idx++; v++;
1263: }
1264: }
1265: PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1266: VecRestoreArray(xx,&x);
1267: VecRestoreArray(yy,&y);
1268: return(0);
1269: }
1273: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1274: {
1275: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1276: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1277: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1279: PetscInt m = b->AIJ->m,*idx,*ii;
1280: PetscInt n,i,jrow,j;
1283: if (yy != zz) {VecCopy(yy,zz);}
1284: VecGetArray(xx,&x);
1285: VecGetArray(zz,&y);
1286: idx = a->j;
1287: v = a->a;
1288: ii = a->i;
1290: for (i=0; i<m; i++) {
1291: jrow = ii[i];
1292: n = ii[i+1] - jrow;
1293: sum1 = 0.0;
1294: sum2 = 0.0;
1295: sum3 = 0.0;
1296: sum4 = 0.0;
1297: sum5 = 0.0;
1298: sum6 = 0.0;
1299: sum7 = 0.0;
1300: sum8 = 0.0;
1301: for (j=0; j<n; j++) {
1302: sum1 += v[jrow]*x[8*idx[jrow]];
1303: sum2 += v[jrow]*x[8*idx[jrow]+1];
1304: sum3 += v[jrow]*x[8*idx[jrow]+2];
1305: sum4 += v[jrow]*x[8*idx[jrow]+3];
1306: sum5 += v[jrow]*x[8*idx[jrow]+4];
1307: sum6 += v[jrow]*x[8*idx[jrow]+5];
1308: sum7 += v[jrow]*x[8*idx[jrow]+6];
1309: sum8 += v[jrow]*x[8*idx[jrow]+7];
1310: jrow++;
1311: }
1312: y[8*i] += sum1;
1313: y[8*i+1] += sum2;
1314: y[8*i+2] += sum3;
1315: y[8*i+3] += sum4;
1316: y[8*i+4] += sum5;
1317: y[8*i+5] += sum6;
1318: y[8*i+6] += sum7;
1319: y[8*i+7] += sum8;
1320: }
1322: PetscLogFlops(16*a->nz);
1323: VecRestoreArray(xx,&x);
1324: VecRestoreArray(zz,&y);
1325: return(0);
1326: }
1330: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1331: {
1332: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1333: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1334: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1336: PetscInt m = b->AIJ->m,n,i,*idx;
1339: if (yy != zz) {VecCopy(yy,zz);}
1340: VecGetArray(xx,&x);
1341: VecGetArray(zz,&y);
1342: for (i=0; i<m; i++) {
1343: idx = a->j + a->i[i] ;
1344: v = a->a + a->i[i] ;
1345: n = a->i[i+1] - a->i[i];
1346: alpha1 = x[8*i];
1347: alpha2 = x[8*i+1];
1348: alpha3 = x[8*i+2];
1349: alpha4 = x[8*i+3];
1350: alpha5 = x[8*i+4];
1351: alpha6 = x[8*i+5];
1352: alpha7 = x[8*i+6];
1353: alpha8 = x[8*i+7];
1354: while (n-->0) {
1355: y[8*(*idx)] += alpha1*(*v);
1356: y[8*(*idx)+1] += alpha2*(*v);
1357: y[8*(*idx)+2] += alpha3*(*v);
1358: y[8*(*idx)+3] += alpha4*(*v);
1359: y[8*(*idx)+4] += alpha5*(*v);
1360: y[8*(*idx)+5] += alpha6*(*v);
1361: y[8*(*idx)+6] += alpha7*(*v);
1362: y[8*(*idx)+7] += alpha8*(*v);
1363: idx++; v++;
1364: }
1365: }
1366: PetscLogFlops(16*a->nz);
1367: VecRestoreArray(xx,&x);
1368: VecRestoreArray(zz,&y);
1369: return(0);
1370: }
1373: /* ------------------------------------------------------------------------------*/
1376: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1377: {
1378: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1379: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1380: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1382: PetscInt m = b->AIJ->m,*idx,*ii;
1383: PetscInt n,i,jrow,j;
1386: VecGetArray(xx,&x);
1387: VecGetArray(yy,&y);
1388: idx = a->j;
1389: v = a->a;
1390: ii = a->i;
1392: for (i=0; i<m; i++) {
1393: jrow = ii[i];
1394: n = ii[i+1] - jrow;
1395: sum1 = 0.0;
1396: sum2 = 0.0;
1397: sum3 = 0.0;
1398: sum4 = 0.0;
1399: sum5 = 0.0;
1400: sum6 = 0.0;
1401: sum7 = 0.0;
1402: sum8 = 0.0;
1403: sum9 = 0.0;
1404: for (j=0; j<n; j++) {
1405: sum1 += v[jrow]*x[9*idx[jrow]];
1406: sum2 += v[jrow]*x[9*idx[jrow]+1];
1407: sum3 += v[jrow]*x[9*idx[jrow]+2];
1408: sum4 += v[jrow]*x[9*idx[jrow]+3];
1409: sum5 += v[jrow]*x[9*idx[jrow]+4];
1410: sum6 += v[jrow]*x[9*idx[jrow]+5];
1411: sum7 += v[jrow]*x[9*idx[jrow]+6];
1412: sum8 += v[jrow]*x[9*idx[jrow]+7];
1413: sum9 += v[jrow]*x[9*idx[jrow]+8];
1414: jrow++;
1415: }
1416: y[9*i] = sum1;
1417: y[9*i+1] = sum2;
1418: y[9*i+2] = sum3;
1419: y[9*i+3] = sum4;
1420: y[9*i+4] = sum5;
1421: y[9*i+5] = sum6;
1422: y[9*i+6] = sum7;
1423: y[9*i+7] = sum8;
1424: y[9*i+8] = sum9;
1425: }
1427: PetscLogFlops(18*a->nz - 9*m);
1428: VecRestoreArray(xx,&x);
1429: VecRestoreArray(yy,&y);
1430: return(0);
1431: }
1435: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1436: {
1437: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1438: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1439: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1441: PetscInt m = b->AIJ->m,n,i,*idx;
1444: VecSet(yy,zero);
1445: VecGetArray(xx,&x);
1446: VecGetArray(yy,&y);
1448: for (i=0; i<m; i++) {
1449: idx = a->j + a->i[i] ;
1450: v = a->a + a->i[i] ;
1451: n = a->i[i+1] - a->i[i];
1452: alpha1 = x[9*i];
1453: alpha2 = x[9*i+1];
1454: alpha3 = x[9*i+2];
1455: alpha4 = x[9*i+3];
1456: alpha5 = x[9*i+4];
1457: alpha6 = x[9*i+5];
1458: alpha7 = x[9*i+6];
1459: alpha8 = x[9*i+7];
1460: alpha9 = x[9*i+8];
1461: while (n-->0) {
1462: y[9*(*idx)] += alpha1*(*v);
1463: y[9*(*idx)+1] += alpha2*(*v);
1464: y[9*(*idx)+2] += alpha3*(*v);
1465: y[9*(*idx)+3] += alpha4*(*v);
1466: y[9*(*idx)+4] += alpha5*(*v);
1467: y[9*(*idx)+5] += alpha6*(*v);
1468: y[9*(*idx)+6] += alpha7*(*v);
1469: y[9*(*idx)+7] += alpha8*(*v);
1470: y[9*(*idx)+8] += alpha9*(*v);
1471: idx++; v++;
1472: }
1473: }
1474: PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1475: VecRestoreArray(xx,&x);
1476: VecRestoreArray(yy,&y);
1477: return(0);
1478: }
1482: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1483: {
1484: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1485: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1486: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1488: PetscInt m = b->AIJ->m,*idx,*ii;
1489: PetscInt n,i,jrow,j;
1492: if (yy != zz) {VecCopy(yy,zz);}
1493: VecGetArray(xx,&x);
1494: VecGetArray(zz,&y);
1495: idx = a->j;
1496: v = a->a;
1497: ii = a->i;
1499: for (i=0; i<m; i++) {
1500: jrow = ii[i];
1501: n = ii[i+1] - jrow;
1502: sum1 = 0.0;
1503: sum2 = 0.0;
1504: sum3 = 0.0;
1505: sum4 = 0.0;
1506: sum5 = 0.0;
1507: sum6 = 0.0;
1508: sum7 = 0.0;
1509: sum8 = 0.0;
1510: sum9 = 0.0;
1511: for (j=0; j<n; j++) {
1512: sum1 += v[jrow]*x[9*idx[jrow]];
1513: sum2 += v[jrow]*x[9*idx[jrow]+1];
1514: sum3 += v[jrow]*x[9*idx[jrow]+2];
1515: sum4 += v[jrow]*x[9*idx[jrow]+3];
1516: sum5 += v[jrow]*x[9*idx[jrow]+4];
1517: sum6 += v[jrow]*x[9*idx[jrow]+5];
1518: sum7 += v[jrow]*x[9*idx[jrow]+6];
1519: sum8 += v[jrow]*x[9*idx[jrow]+7];
1520: sum9 += v[jrow]*x[9*idx[jrow]+8];
1521: jrow++;
1522: }
1523: y[9*i] += sum1;
1524: y[9*i+1] += sum2;
1525: y[9*i+2] += sum3;
1526: y[9*i+3] += sum4;
1527: y[9*i+4] += sum5;
1528: y[9*i+5] += sum6;
1529: y[9*i+6] += sum7;
1530: y[9*i+7] += sum8;
1531: y[9*i+8] += sum9;
1532: }
1534: PetscLogFlops(18*a->nz);
1535: VecRestoreArray(xx,&x);
1536: VecRestoreArray(zz,&y);
1537: return(0);
1538: }
1542: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1543: {
1544: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1545: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1546: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1548: PetscInt m = b->AIJ->m,n,i,*idx;
1551: if (yy != zz) {VecCopy(yy,zz);}
1552: VecGetArray(xx,&x);
1553: VecGetArray(zz,&y);
1554: for (i=0; i<m; i++) {
1555: idx = a->j + a->i[i] ;
1556: v = a->a + a->i[i] ;
1557: n = a->i[i+1] - a->i[i];
1558: alpha1 = x[9*i];
1559: alpha2 = x[9*i+1];
1560: alpha3 = x[9*i+2];
1561: alpha4 = x[9*i+3];
1562: alpha5 = x[9*i+4];
1563: alpha6 = x[9*i+5];
1564: alpha7 = x[9*i+6];
1565: alpha8 = x[9*i+7];
1566: alpha9 = x[9*i+8];
1567: while (n-->0) {
1568: y[9*(*idx)] += alpha1*(*v);
1569: y[9*(*idx)+1] += alpha2*(*v);
1570: y[9*(*idx)+2] += alpha3*(*v);
1571: y[9*(*idx)+3] += alpha4*(*v);
1572: y[9*(*idx)+4] += alpha5*(*v);
1573: y[9*(*idx)+5] += alpha6*(*v);
1574: y[9*(*idx)+6] += alpha7*(*v);
1575: y[9*(*idx)+7] += alpha8*(*v);
1576: y[9*(*idx)+8] += alpha9*(*v);
1577: idx++; v++;
1578: }
1579: }
1580: PetscLogFlops(18*a->nz);
1581: VecRestoreArray(xx,&x);
1582: VecRestoreArray(zz,&y);
1583: return(0);
1584: }
1586: /*--------------------------------------------------------------------------------------------*/
1589: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1590: {
1591: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1592: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1593: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1594: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1596: PetscInt m = b->AIJ->m,*idx,*ii;
1597: PetscInt n,i,jrow,j;
1600: VecGetArray(xx,&x);
1601: VecGetArray(yy,&y);
1602: idx = a->j;
1603: v = a->a;
1604: ii = a->i;
1606: for (i=0; i<m; i++) {
1607: jrow = ii[i];
1608: n = ii[i+1] - jrow;
1609: sum1 = 0.0;
1610: sum2 = 0.0;
1611: sum3 = 0.0;
1612: sum4 = 0.0;
1613: sum5 = 0.0;
1614: sum6 = 0.0;
1615: sum7 = 0.0;
1616: sum8 = 0.0;
1617: sum9 = 0.0;
1618: sum10 = 0.0;
1619: sum11 = 0.0;
1620: sum12 = 0.0;
1621: sum13 = 0.0;
1622: sum14 = 0.0;
1623: sum15 = 0.0;
1624: sum16 = 0.0;
1625: for (j=0; j<n; j++) {
1626: sum1 += v[jrow]*x[16*idx[jrow]];
1627: sum2 += v[jrow]*x[16*idx[jrow]+1];
1628: sum3 += v[jrow]*x[16*idx[jrow]+2];
1629: sum4 += v[jrow]*x[16*idx[jrow]+3];
1630: sum5 += v[jrow]*x[16*idx[jrow]+4];
1631: sum6 += v[jrow]*x[16*idx[jrow]+5];
1632: sum7 += v[jrow]*x[16*idx[jrow]+6];
1633: sum8 += v[jrow]*x[16*idx[jrow]+7];
1634: sum9 += v[jrow]*x[16*idx[jrow]+8];
1635: sum10 += v[jrow]*x[16*idx[jrow]+9];
1636: sum11 += v[jrow]*x[16*idx[jrow]+10];
1637: sum12 += v[jrow]*x[16*idx[jrow]+11];
1638: sum13 += v[jrow]*x[16*idx[jrow]+12];
1639: sum14 += v[jrow]*x[16*idx[jrow]+13];
1640: sum15 += v[jrow]*x[16*idx[jrow]+14];
1641: sum16 += v[jrow]*x[16*idx[jrow]+15];
1642: jrow++;
1643: }
1644: y[16*i] = sum1;
1645: y[16*i+1] = sum2;
1646: y[16*i+2] = sum3;
1647: y[16*i+3] = sum4;
1648: y[16*i+4] = sum5;
1649: y[16*i+5] = sum6;
1650: y[16*i+6] = sum7;
1651: y[16*i+7] = sum8;
1652: y[16*i+8] = sum9;
1653: y[16*i+9] = sum10;
1654: y[16*i+10] = sum11;
1655: y[16*i+11] = sum12;
1656: y[16*i+12] = sum13;
1657: y[16*i+13] = sum14;
1658: y[16*i+14] = sum15;
1659: y[16*i+15] = sum16;
1660: }
1662: PetscLogFlops(32*a->nz - 16*m);
1663: VecRestoreArray(xx,&x);
1664: VecRestoreArray(yy,&y);
1665: return(0);
1666: }
1670: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1671: {
1672: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1673: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1674: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1675: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1677: PetscInt m = b->AIJ->m,n,i,*idx;
1680: VecSet(yy,zero);
1681: VecGetArray(xx,&x);
1682: VecGetArray(yy,&y);
1684: for (i=0; i<m; i++) {
1685: idx = a->j + a->i[i] ;
1686: v = a->a + a->i[i] ;
1687: n = a->i[i+1] - a->i[i];
1688: alpha1 = x[16*i];
1689: alpha2 = x[16*i+1];
1690: alpha3 = x[16*i+2];
1691: alpha4 = x[16*i+3];
1692: alpha5 = x[16*i+4];
1693: alpha6 = x[16*i+5];
1694: alpha7 = x[16*i+6];
1695: alpha8 = x[16*i+7];
1696: alpha9 = x[16*i+8];
1697: alpha10 = x[16*i+9];
1698: alpha11 = x[16*i+10];
1699: alpha12 = x[16*i+11];
1700: alpha13 = x[16*i+12];
1701: alpha14 = x[16*i+13];
1702: alpha15 = x[16*i+14];
1703: alpha16 = x[16*i+15];
1704: while (n-->0) {
1705: y[16*(*idx)] += alpha1*(*v);
1706: y[16*(*idx)+1] += alpha2*(*v);
1707: y[16*(*idx)+2] += alpha3*(*v);
1708: y[16*(*idx)+3] += alpha4*(*v);
1709: y[16*(*idx)+4] += alpha5*(*v);
1710: y[16*(*idx)+5] += alpha6*(*v);
1711: y[16*(*idx)+6] += alpha7*(*v);
1712: y[16*(*idx)+7] += alpha8*(*v);
1713: y[16*(*idx)+8] += alpha9*(*v);
1714: y[16*(*idx)+9] += alpha10*(*v);
1715: y[16*(*idx)+10] += alpha11*(*v);
1716: y[16*(*idx)+11] += alpha12*(*v);
1717: y[16*(*idx)+12] += alpha13*(*v);
1718: y[16*(*idx)+13] += alpha14*(*v);
1719: y[16*(*idx)+14] += alpha15*(*v);
1720: y[16*(*idx)+15] += alpha16*(*v);
1721: idx++; v++;
1722: }
1723: }
1724: PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1725: VecRestoreArray(xx,&x);
1726: VecRestoreArray(yy,&y);
1727: return(0);
1728: }
1732: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1733: {
1734: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1735: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1736: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1737: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1739: PetscInt m = b->AIJ->m,*idx,*ii;
1740: PetscInt n,i,jrow,j;
1743: if (yy != zz) {VecCopy(yy,zz);}
1744: VecGetArray(xx,&x);
1745: VecGetArray(zz,&y);
1746: idx = a->j;
1747: v = a->a;
1748: ii = a->i;
1750: for (i=0; i<m; i++) {
1751: jrow = ii[i];
1752: n = ii[i+1] - jrow;
1753: sum1 = 0.0;
1754: sum2 = 0.0;
1755: sum3 = 0.0;
1756: sum4 = 0.0;
1757: sum5 = 0.0;
1758: sum6 = 0.0;
1759: sum7 = 0.0;
1760: sum8 = 0.0;
1761: sum9 = 0.0;
1762: sum10 = 0.0;
1763: sum11 = 0.0;
1764: sum12 = 0.0;
1765: sum13 = 0.0;
1766: sum14 = 0.0;
1767: sum15 = 0.0;
1768: sum16 = 0.0;
1769: for (j=0; j<n; j++) {
1770: sum1 += v[jrow]*x[16*idx[jrow]];
1771: sum2 += v[jrow]*x[16*idx[jrow]+1];
1772: sum3 += v[jrow]*x[16*idx[jrow]+2];
1773: sum4 += v[jrow]*x[16*idx[jrow]+3];
1774: sum5 += v[jrow]*x[16*idx[jrow]+4];
1775: sum6 += v[jrow]*x[16*idx[jrow]+5];
1776: sum7 += v[jrow]*x[16*idx[jrow]+6];
1777: sum8 += v[jrow]*x[16*idx[jrow]+7];
1778: sum9 += v[jrow]*x[16*idx[jrow]+8];
1779: sum10 += v[jrow]*x[16*idx[jrow]+9];
1780: sum11 += v[jrow]*x[16*idx[jrow]+10];
1781: sum12 += v[jrow]*x[16*idx[jrow]+11];
1782: sum13 += v[jrow]*x[16*idx[jrow]+12];
1783: sum14 += v[jrow]*x[16*idx[jrow]+13];
1784: sum15 += v[jrow]*x[16*idx[jrow]+14];
1785: sum16 += v[jrow]*x[16*idx[jrow]+15];
1786: jrow++;
1787: }
1788: y[16*i] += sum1;
1789: y[16*i+1] += sum2;
1790: y[16*i+2] += sum3;
1791: y[16*i+3] += sum4;
1792: y[16*i+4] += sum5;
1793: y[16*i+5] += sum6;
1794: y[16*i+6] += sum7;
1795: y[16*i+7] += sum8;
1796: y[16*i+8] += sum9;
1797: y[16*i+9] += sum10;
1798: y[16*i+10] += sum11;
1799: y[16*i+11] += sum12;
1800: y[16*i+12] += sum13;
1801: y[16*i+13] += sum14;
1802: y[16*i+14] += sum15;
1803: y[16*i+15] += sum16;
1804: }
1806: PetscLogFlops(32*a->nz);
1807: VecRestoreArray(xx,&x);
1808: VecRestoreArray(zz,&y);
1809: return(0);
1810: }
1814: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1815: {
1816: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1817: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1818: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1819: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1821: PetscInt m = b->AIJ->m,n,i,*idx;
1824: if (yy != zz) {VecCopy(yy,zz);}
1825: VecGetArray(xx,&x);
1826: VecGetArray(zz,&y);
1827: for (i=0; i<m; i++) {
1828: idx = a->j + a->i[i] ;
1829: v = a->a + a->i[i] ;
1830: n = a->i[i+1] - a->i[i];
1831: alpha1 = x[16*i];
1832: alpha2 = x[16*i+1];
1833: alpha3 = x[16*i+2];
1834: alpha4 = x[16*i+3];
1835: alpha5 = x[16*i+4];
1836: alpha6 = x[16*i+5];
1837: alpha7 = x[16*i+6];
1838: alpha8 = x[16*i+7];
1839: alpha9 = x[16*i+8];
1840: alpha10 = x[16*i+9];
1841: alpha11 = x[16*i+10];
1842: alpha12 = x[16*i+11];
1843: alpha13 = x[16*i+12];
1844: alpha14 = x[16*i+13];
1845: alpha15 = x[16*i+14];
1846: alpha16 = x[16*i+15];
1847: while (n-->0) {
1848: y[16*(*idx)] += alpha1*(*v);
1849: y[16*(*idx)+1] += alpha2*(*v);
1850: y[16*(*idx)+2] += alpha3*(*v);
1851: y[16*(*idx)+3] += alpha4*(*v);
1852: y[16*(*idx)+4] += alpha5*(*v);
1853: y[16*(*idx)+5] += alpha6*(*v);
1854: y[16*(*idx)+6] += alpha7*(*v);
1855: y[16*(*idx)+7] += alpha8*(*v);
1856: y[16*(*idx)+8] += alpha9*(*v);
1857: y[16*(*idx)+9] += alpha10*(*v);
1858: y[16*(*idx)+10] += alpha11*(*v);
1859: y[16*(*idx)+11] += alpha12*(*v);
1860: y[16*(*idx)+12] += alpha13*(*v);
1861: y[16*(*idx)+13] += alpha14*(*v);
1862: y[16*(*idx)+14] += alpha15*(*v);
1863: y[16*(*idx)+15] += alpha16*(*v);
1864: idx++; v++;
1865: }
1866: }
1867: PetscLogFlops(32*a->nz);
1868: VecRestoreArray(xx,&x);
1869: VecRestoreArray(zz,&y);
1870: return(0);
1871: }
1873: /*===================================================================================*/
1876: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1877: {
1878: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1882: /* start the scatter */
1883: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1884: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1885: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1886: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1887: return(0);
1888: }
1892: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1893: {
1894: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1898: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1899: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1900: VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1901: VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1902: return(0);
1903: }
1907: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1908: {
1909: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1913: /* start the scatter */
1914: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1915: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1916: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1917: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1918: return(0);
1919: }
1923: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1924: {
1925: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1929: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1930: VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1931: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1932: VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1933: return(0);
1934: }
1938: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
1939: {
1940: /* This routine requires testing -- but it's getting better. */
1942: FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1943: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
1944: Mat P=pp->AIJ;
1945: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
1946: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
1947: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
1948: PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn;
1949: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
1950: MatScalar *ca;
1953: /* Start timer */
1954: PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);
1956: /* Get ij structure of P^T */
1957: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
1959: cn = pn*ppdof;
1960: /* Allocate ci array, arrays for fill computation and */
1961: /* free space for accumulating nonzero column info */
1962: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
1963: ci[0] = 0;
1965: /* Work arrays for rows of P^T*A */
1966: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
1967: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
1968: ptasparserow = ptadenserow + an;
1969: denserow = ptasparserow + an;
1970: sparserow = denserow + cn;
1972: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1973: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1974: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
1975: GetMoreSpace((ai[am]/pm)*pn,&free_space);
1976: current_space = free_space;
1978: /* Determine symbolic info for each row of C: */
1979: for (i=0;i<pn;i++) {
1980: ptnzi = pti[i+1] - pti[i];
1981: ptJ = ptj + pti[i];
1982: for (dof=0;dof<ppdof;dof++) {
1983: ptanzi = 0;
1984: /* Determine symbolic row of PtA: */
1985: for (j=0;j<ptnzi;j++) {
1986: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
1987: arow = ptJ[j]*ppdof + dof;
1988: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
1989: anzj = ai[arow+1] - ai[arow];
1990: ajj = aj + ai[arow];
1991: for (k=0;k<anzj;k++) {
1992: if (!ptadenserow[ajj[k]]) {
1993: ptadenserow[ajj[k]] = -1;
1994: ptasparserow[ptanzi++] = ajj[k];
1995: }
1996: }
1997: }
1998: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1999: ptaj = ptasparserow;
2000: cnzi = 0;
2001: for (j=0;j<ptanzi;j++) {
2002: /* Get offset within block of P */
2003: pshift = *ptaj%ppdof;
2004: /* Get block row of P */
2005: prow = (*ptaj++)/ppdof; /* integer division */
2006: /* P has same number of nonzeros per row as the compressed form */
2007: pnzj = pi[prow+1] - pi[prow];
2008: pjj = pj + pi[prow];
2009: for (k=0;k<pnzj;k++) {
2010: /* Locations in C are shifted by the offset within the block */
2011: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2012: if (!denserow[pjj[k]*ppdof+pshift]) {
2013: denserow[pjj[k]*ppdof+pshift] = -1;
2014: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2015: }
2016: }
2017: }
2019: /* sort sparserow */
2020: PetscSortInt(cnzi,sparserow);
2021:
2022: /* If free space is not available, make more free space */
2023: /* Double the amount of total space in the list */
2024: if (current_space->local_remaining<cnzi) {
2025: GetMoreSpace(current_space->total_array_size,¤t_space);
2026: }
2028: /* Copy data into free space, and zero out denserows */
2029: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2030: current_space->array += cnzi;
2031: current_space->local_used += cnzi;
2032: current_space->local_remaining -= cnzi;
2034: for (j=0;j<ptanzi;j++) {
2035: ptadenserow[ptasparserow[j]] = 0;
2036: }
2037: for (j=0;j<cnzi;j++) {
2038: denserow[sparserow[j]] = 0;
2039: }
2040: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2041: /* For now, we will recompute what is needed. */
2042: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2043: }
2044: }
2045: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2046: /* Allocate space for cj, initialize cj, and */
2047: /* destroy list of free space and other temporary array(s) */
2048: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2049: MakeSpaceContiguous(&free_space,cj);
2050: PetscFree(ptadenserow);
2051:
2052: /* Allocate space for ca */
2053: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2054: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2055:
2056: /* put together the new matrix */
2057: MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);
2059: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2060: /* Since these are PETSc arrays, change flags to free them as necessary. */
2061: c = (Mat_SeqAIJ *)((*C)->data);
2062: c->freedata = PETSC_TRUE;
2063: c->nonew = 0;
2065: /* Clean up. */
2066: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2068: PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2069: return(0);
2070: }
2074: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2075: {
2076: /* This routine requires testing -- first draft only */
2078: PetscInt flops=0;
2079: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2080: Mat P=pp->AIJ;
2081: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2082: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2083: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2084: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2085: PetscInt *ci=c->i,*cj=c->j,*cjj;
2086: PetscInt am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof;
2087: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2088: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2091: /* Allocate temporary array for storage of one row of A*P */
2092: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2093: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2095: apj = (PetscInt *)(apa + cn);
2096: apjdense = apj + cn;
2098: /* Clear old values in C */
2099: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2101: for (i=0;i<am;i++) {
2102: /* Form sparse row of A*P */
2103: anzi = ai[i+1] - ai[i];
2104: apnzj = 0;
2105: for (j=0;j<anzi;j++) {
2106: /* Get offset within block of P */
2107: pshift = *aj%ppdof;
2108: /* Get block row of P */
2109: prow = *aj++/ppdof; /* integer division */
2110: pnzj = pi[prow+1] - pi[prow];
2111: pjj = pj + pi[prow];
2112: paj = pa + pi[prow];
2113: for (k=0;k<pnzj;k++) {
2114: poffset = pjj[k]*ppdof+pshift;
2115: if (!apjdense[poffset]) {
2116: apjdense[poffset] = -1;
2117: apj[apnzj++] = poffset;
2118: }
2119: apa[poffset] += (*aa)*paj[k];
2120: }
2121: flops += 2*pnzj;
2122: aa++;
2123: }
2125: /* Sort the j index array for quick sparse axpy. */
2126: /* Note: a array does not need sorting as it is in dense storage locations. */
2127: PetscSortInt(apnzj,apj);
2129: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2130: prow = i/ppdof; /* integer division */
2131: pshift = i%ppdof;
2132: poffset = pi[prow];
2133: pnzi = pi[prow+1] - poffset;
2134: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2135: pJ = pj+poffset;
2136: pA = pa+poffset;
2137: for (j=0;j<pnzi;j++) {
2138: crow = (*pJ)*ppdof+pshift;
2139: cjj = cj + ci[crow];
2140: caj = ca + ci[crow];
2141: pJ++;
2142: /* Perform sparse axpy operation. Note cjj includes apj. */
2143: for (k=0,nextap=0;nextap<apnzj;k++) {
2144: if (cjj[k]==apj[nextap]) {
2145: caj[k] += (*pA)*apa[apj[nextap++]];
2146: }
2147: }
2148: flops += 2*apnzj;
2149: pA++;
2150: }
2152: /* Zero the current row info for A*P */
2153: for (j=0;j<apnzj;j++) {
2154: apa[apj[j]] = 0.;
2155: apjdense[apj[j]] = 0;
2156: }
2157: }
2159: /* Assemble the final matrix and clean up */
2160: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2161: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2162: PetscFree(apa);
2163: PetscLogFlops(flops);
2165: return(0);
2166: }
2171: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2172: {
2173: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2174: Mat a = b->AIJ,B;
2175: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2176: PetscErrorCode ierr;
2177: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2178: PetscInt *cols;
2179: PetscScalar *vals;
2182: MatGetSize(a,&m,&n);
2183: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2184: for (i=0; i<m; i++) {
2185: nmax = PetscMax(nmax,aij->ilen[i]);
2186: for (j=0; j<dof; j++) {
2187: ilen[dof*i+j] = aij->ilen[i];
2188: }
2189: }
2190: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2191: MatSetOption(B,MAT_COLUMNS_SORTED);
2192: PetscFree(ilen);
2193: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2194: ii = 0;
2195: for (i=0; i<m; i++) {
2196: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2197: for (j=0; j<dof; j++) {
2198: for (k=0; k<ncols; k++) {
2199: icols[k] = dof*cols[k]+j;
2200: }
2201: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2202: ii++;
2203: }
2204: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2205: }
2206: PetscFree(icols);
2207: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2208: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2210: if (reuse == MAT_REUSE_MATRIX) {
2211: MatHeaderReplace(A,B);
2212: } else {
2213: *newmat = B;
2214: }
2215: return(0);
2216: }
2219: #include src/mat/impls/aij/mpi/mpiaij.h
2224: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2225: {
2226: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
2227: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2228: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2229: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2230: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2231: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2232: PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
2233: PetscInt rstart,cstart,*garray,ii,k;
2234: PetscErrorCode ierr;
2235: PetscScalar *vals,*ovals;
2238: PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);
2239: for (i=0; i<A->m/dof; i++) {
2240: nmax = PetscMax(nmax,AIJ->ilen[i]);
2241: onmax = PetscMax(onmax,OAIJ->ilen[i]);
2242: for (j=0; j<dof; j++) {
2243: dnz[dof*i+j] = AIJ->ilen[i];
2244: onz[dof*i+j] = OAIJ->ilen[i];
2245: }
2246: }
2247: MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);
2248: MatSetOption(B,MAT_COLUMNS_SORTED);
2249: PetscFree2(dnz,onz);
2251: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2252: rstart = dof*mpiaij->rstart;
2253: cstart = dof*mpiaij->cstart;
2254: garray = mpiaij->garray;
2256: ii = rstart;
2257: for (i=0; i<A->m/dof; i++) {
2258: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2259: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2260: for (j=0; j<dof; j++) {
2261: for (k=0; k<ncols; k++) {
2262: icols[k] = cstart + dof*cols[k]+j;
2263: }
2264: for (k=0; k<oncols; k++) {
2265: oicols[k] = dof*garray[ocols[k]]+j;
2266: }
2267: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2268: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2269: ii++;
2270: }
2271: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2272: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2273: }
2274: PetscFree2(icols,oicols);
2276: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2277: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2279: if (reuse == MAT_REUSE_MATRIX) {
2280: MatHeaderReplace(A,B);
2281: } else {
2282: *newmat = B;
2283: }
2284: return(0);
2285: }
2289: /* ---------------------------------------------------------------------------------- */
2290: /*MC
2291: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2292: operations for multicomponent problems. It interpolates each component the same
2293: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
2294: and MATMPIAIJ for distributed matrices.
2296: Operations provided:
2297: + MatMult
2298: . MatMultTranspose
2299: . MatMultAdd
2300: . MatMultTransposeAdd
2301: - MatView
2303: Level: advanced
2305: M*/
2308: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2309: {
2311: PetscMPIInt size;
2312: PetscInt n;
2313: Mat_MPIMAIJ *b;
2314: Mat B;
2317: PetscObjectReference((PetscObject)A);
2319: if (dof == 1) {
2320: *maij = A;
2321: } else {
2322: MatCreate(A->comm,&B);
2323: MatSetSizes(B,dof*A->m,dof*A->n,dof*A->M,dof*A->N);
2324: B->assembled = PETSC_TRUE;
2326: MPI_Comm_size(A->comm,&size);
2327: if (size == 1) {
2328: MatSetType(B,MATSEQMAIJ);
2329: B->ops->destroy = MatDestroy_SeqMAIJ;
2330: B->ops->view = MatView_SeqMAIJ;
2331: b = (Mat_MPIMAIJ*)B->data;
2332: b->dof = dof;
2333: b->AIJ = A;
2334: if (dof == 2) {
2335: B->ops->mult = MatMult_SeqMAIJ_2;
2336: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
2337: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
2338: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2339: } else if (dof == 3) {
2340: B->ops->mult = MatMult_SeqMAIJ_3;
2341: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
2342: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
2343: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2344: } else if (dof == 4) {
2345: B->ops->mult = MatMult_SeqMAIJ_4;
2346: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
2347: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
2348: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2349: } else if (dof == 5) {
2350: B->ops->mult = MatMult_SeqMAIJ_5;
2351: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
2352: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
2353: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2354: } else if (dof == 6) {
2355: B->ops->mult = MatMult_SeqMAIJ_6;
2356: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
2357: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
2358: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2359: } else if (dof == 7) {
2360: B->ops->mult = MatMult_SeqMAIJ_7;
2361: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
2362: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
2363: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2364: } else if (dof == 8) {
2365: B->ops->mult = MatMult_SeqMAIJ_8;
2366: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
2367: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
2368: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2369: } else if (dof == 9) {
2370: B->ops->mult = MatMult_SeqMAIJ_9;
2371: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
2372: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
2373: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2374: } else if (dof == 16) {
2375: B->ops->mult = MatMult_SeqMAIJ_16;
2376: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
2377: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
2378: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2379: } else {
2380: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2381: }
2382: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2383: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2384: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2385: } else {
2386: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2387: IS from,to;
2388: Vec gvec;
2389: PetscInt *garray,i;
2391: MatSetType(B,MATMPIMAIJ);
2392: B->ops->destroy = MatDestroy_MPIMAIJ;
2393: B->ops->view = MatView_MPIMAIJ;
2394: b = (Mat_MPIMAIJ*)B->data;
2395: b->dof = dof;
2396: b->A = A;
2397: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2398: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
2400: VecGetSize(mpiaij->lvec,&n);
2401: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2402: VecSetBlockSize(b->w,dof);
2404: /* create two temporary Index sets for build scatter gather */
2405: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2406: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2407: ISCreateBlock(A->comm,dof,n,garray,&from);
2408: PetscFree(garray);
2409: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
2411: /* create temporary global vector to generate scatter context */
2412: VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);
2413: VecSetBlockSize(gvec,dof);
2415: /* generate the scatter context */
2416: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
2418: ISDestroy(from);
2419: ISDestroy(to);
2420: VecDestroy(gvec);
2422: B->ops->mult = MatMult_MPIMAIJ_dof;
2423: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
2424: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
2425: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2426: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2427: }
2428: *maij = B;
2429: MatView_Private(B);
2430: }
2431: return(0);
2432: }