Actual source code: maij.c

  1: /*$Id: maij.c,v 1.19 2001/08/07 03:03:00 balay Exp $*/
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include src/mat/impls/maij/maij.h

 21: int MatMAIJGetAIJ(Mat A,Mat *B)
 22: {
 23:   int         ierr;
 24:   PetscTruth  ismpimaij,isseqmaij;

 27:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 28:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 29:   if (ismpimaij) {
 30:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 32:     *B = b->A;
 33:   } else if (isseqmaij) {
 34:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 36:     *B = b->AIJ;
 37:   } else {
 38:     *B = A;
 39:   }
 40:   return(0);
 41: }

 43: int MatMAIJRedimension(Mat A,int dof,Mat *B)
 44: {
 46:   Mat Aij;

 49:   MatMAIJGetAIJ(A,&Aij);
 50:   MatCreateMAIJ(Aij,dof,B);
 51:   return(0);
 52: }

 54: int MatDestroy_SeqMAIJ(Mat A)
 55: {
 56:   int         ierr;
 57:   Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 60:   if (b->AIJ) {
 61:     MatDestroy(b->AIJ);
 62:   }
 63:   PetscFree(b);
 64:   return(0);
 65: }

 67: int MatDestroy_MPIMAIJ(Mat A)
 68: {
 69:   int         ierr;
 70:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 73:   if (b->AIJ) {
 74:     MatDestroy(b->AIJ);
 75:   }
 76:   if (b->OAIJ) {
 77:     MatDestroy(b->OAIJ);
 78:   }
 79:   if (b->A) {
 80:     MatDestroy(b->A);
 81:   }
 82:   if (b->ctx) {
 83:     VecScatterDestroy(b->ctx);
 84:   }
 85:   if (b->w) {
 86:     VecDestroy(b->w);
 87:   }
 88:   PetscFree(b);
 89:   return(0);
 90: }

 92: EXTERN_C_BEGIN
 93: int MatCreate_MAIJ(Mat A)
 94: {
 95:   int         ierr;
 96:   Mat_MPIMAIJ *b;

 99:   ierr     = PetscNew(Mat_MPIMAIJ,&b);
100:   A->data  = (void*)b;
101:   PetscMemzero(b,sizeof(Mat_MPIMAIJ));
102:   PetscMemzero(A->ops,sizeof(struct _MatOps));
103:   A->factor           = 0;
104:   A->mapping          = 0;

106:   b->AIJ  = 0;
107:   b->dof  = 0;
108:   b->OAIJ = 0;
109:   b->ctx  = 0;
110:   b->w    = 0;
111:   return(0);
112: }
113: EXTERN_C_END

115: /* --------------------------------------------------------------------------------------*/
116: int MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
117: {
118:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
119:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
120:   PetscScalar   *x,*y,*v,sum1, sum2;
121:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
122:   int           n,i,jrow,j;

125:   VecGetArray(xx,&x);
126:   VecGetArray(yy,&y);
127:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
128:   idx  = a->j;
129:   v    = a->a;
130:   ii   = a->i;

132:   v    += shift; /* shift for Fortran start by 1 indexing */
133:   idx  += shift;
134:   for (i=0; i<m; i++) {
135:     jrow = ii[i];
136:     n    = ii[i+1] - jrow;
137:     sum1  = 0.0;
138:     sum2  = 0.0;
139:     for (j=0; j<n; j++) {
140:       sum1 += v[jrow]*x[2*idx[jrow]];
141:       sum2 += v[jrow]*x[2*idx[jrow]+1];
142:       jrow++;
143:      }
144:     y[2*i]   = sum1;
145:     y[2*i+1] = sum2;
146:   }

148:   PetscLogFlops(4*a->nz - 2*m);
149:   VecRestoreArray(xx,&x);
150:   VecRestoreArray(yy,&y);
151:   return(0);
152: }

154: int MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
155: {
156:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
157:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
158:   PetscScalar   *x,*y,*v,alpha1,alpha2,zero = 0.0;
159:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

162:   VecSet(&zero,yy);
163:   VecGetArray(xx,&x);
164:   VecGetArray(yy,&y);
165:   y = y + shift; /* shift for Fortran start by 1 indexing */
166:   for (i=0; i<m; i++) {
167:     idx    = a->j + a->i[i] + shift;
168:     v      = a->a + a->i[i] + shift;
169:     n      = a->i[i+1] - a->i[i];
170:     alpha1 = x[2*i];
171:     alpha2 = x[2*i+1];
172:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
173:   }
174:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
175:   VecRestoreArray(xx,&x);
176:   VecRestoreArray(yy,&y);
177:   return(0);
178: }

180: int MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
181: {
182:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
183:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
184:   PetscScalar   *x,*y,*v,sum1, sum2;
185:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
186:   int           n,i,jrow,j;

189:   if (yy != zz) {VecCopy(yy,zz);}
190:   VecGetArray(xx,&x);
191:   VecGetArray(zz,&y);
192:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
193:   idx  = a->j;
194:   v    = a->a;
195:   ii   = a->i;

197:   v    += shift; /* shift for Fortran start by 1 indexing */
198:   idx  += shift;
199:   for (i=0; i<m; i++) {
200:     jrow = ii[i];
201:     n    = ii[i+1] - jrow;
202:     sum1  = 0.0;
203:     sum2  = 0.0;
204:     for (j=0; j<n; j++) {
205:       sum1 += v[jrow]*x[2*idx[jrow]];
206:       sum2 += v[jrow]*x[2*idx[jrow]+1];
207:       jrow++;
208:      }
209:     y[2*i]   += sum1;
210:     y[2*i+1] += sum2;
211:   }

213:   PetscLogFlops(4*a->nz - 2*m);
214:   VecRestoreArray(xx,&x);
215:   VecRestoreArray(zz,&y);
216:   return(0);
217: }
218: int MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
219: {
220:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
221:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
222:   PetscScalar   *x,*y,*v,alpha1,alpha2;
223:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

226:   if (yy != zz) {VecCopy(yy,zz);}
227:   VecGetArray(xx,&x);
228:   VecGetArray(zz,&y);
229:   y = y + shift; /* shift for Fortran start by 1 indexing */
230:   for (i=0; i<m; i++) {
231:     idx   = a->j + a->i[i] + shift;
232:     v     = a->a + a->i[i] + shift;
233:     n     = a->i[i+1] - a->i[i];
234:     alpha1 = x[2*i];
235:     alpha2 = x[2*i+1];
236:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
237:   }
238:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
239:   VecRestoreArray(xx,&x);
240:   VecRestoreArray(zz,&y);
241:   return(0);
242: }
243: /* --------------------------------------------------------------------------------------*/
244: int MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
245: {
246:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
247:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
248:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
249:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
250:   int           n,i,jrow,j;

253:   VecGetArray(xx,&x);
254:   VecGetArray(yy,&y);
255:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
256:   idx  = a->j;
257:   v    = a->a;
258:   ii   = a->i;

260:   v    += shift; /* shift for Fortran start by 1 indexing */
261:   idx  += shift;
262:   for (i=0; i<m; i++) {
263:     jrow = ii[i];
264:     n    = ii[i+1] - jrow;
265:     sum1  = 0.0;
266:     sum2  = 0.0;
267:     sum3  = 0.0;
268:     for (j=0; j<n; j++) {
269:       sum1 += v[jrow]*x[3*idx[jrow]];
270:       sum2 += v[jrow]*x[3*idx[jrow]+1];
271:       sum3 += v[jrow]*x[3*idx[jrow]+2];
272:       jrow++;
273:      }
274:     y[3*i]   = sum1;
275:     y[3*i+1] = sum2;
276:     y[3*i+2] = sum3;
277:   }

279:   PetscLogFlops(6*a->nz - 3*m);
280:   VecRestoreArray(xx,&x);
281:   VecRestoreArray(yy,&y);
282:   return(0);
283: }

285: int MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
286: {
287:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
288:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
289:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
290:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

293:   VecSet(&zero,yy);
294:   VecGetArray(xx,&x);
295:   VecGetArray(yy,&y);
296:   y = y + shift; /* shift for Fortran start by 1 indexing */
297:   for (i=0; i<m; i++) {
298:     idx    = a->j + a->i[i] + shift;
299:     v      = a->a + a->i[i] + shift;
300:     n      = a->i[i+1] - a->i[i];
301:     alpha1 = x[3*i];
302:     alpha2 = x[3*i+1];
303:     alpha3 = x[3*i+2];
304:     while (n-->0) {
305:       y[3*(*idx)]   += alpha1*(*v);
306:       y[3*(*idx)+1] += alpha2*(*v);
307:       y[3*(*idx)+2] += alpha3*(*v);
308:       idx++; v++;
309:     }
310:   }
311:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
312:   VecRestoreArray(xx,&x);
313:   VecRestoreArray(yy,&y);
314:   return(0);
315: }

317: int MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
318: {
319:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
320:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
321:   PetscScalar   *x,*y,*v,sum1, sum2, sum3;
322:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
323:   int           n,i,jrow,j;

326:   if (yy != zz) {VecCopy(yy,zz);}
327:   VecGetArray(xx,&x);
328:   VecGetArray(zz,&y);
329:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
330:   idx  = a->j;
331:   v    = a->a;
332:   ii   = a->i;

334:   v    += shift; /* shift for Fortran start by 1 indexing */
335:   idx  += shift;
336:   for (i=0; i<m; i++) {
337:     jrow = ii[i];
338:     n    = ii[i+1] - jrow;
339:     sum1  = 0.0;
340:     sum2  = 0.0;
341:     sum3  = 0.0;
342:     for (j=0; j<n; j++) {
343:       sum1 += v[jrow]*x[3*idx[jrow]];
344:       sum2 += v[jrow]*x[3*idx[jrow]+1];
345:       sum3 += v[jrow]*x[3*idx[jrow]+2];
346:       jrow++;
347:      }
348:     y[3*i]   += sum1;
349:     y[3*i+1] += sum2;
350:     y[3*i+2] += sum3;
351:   }

353:   PetscLogFlops(6*a->nz);
354:   VecRestoreArray(xx,&x);
355:   VecRestoreArray(zz,&y);
356:   return(0);
357: }
358: int MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
359: {
360:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
361:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
362:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3;
363:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

366:   if (yy != zz) {VecCopy(yy,zz);}
367:   VecGetArray(xx,&x);
368:   VecGetArray(zz,&y);
369:   y = y + shift; /* shift for Fortran start by 1 indexing */
370:   for (i=0; i<m; i++) {
371:     idx    = a->j + a->i[i] + shift;
372:     v      = a->a + a->i[i] + shift;
373:     n      = a->i[i+1] - a->i[i];
374:     alpha1 = x[3*i];
375:     alpha2 = x[3*i+1];
376:     alpha3 = x[3*i+2];
377:     while (n-->0) {
378:       y[3*(*idx)]   += alpha1*(*v);
379:       y[3*(*idx)+1] += alpha2*(*v);
380:       y[3*(*idx)+2] += alpha3*(*v);
381:       idx++; v++;
382:     }
383:   }
384:   PetscLogFlops(6*a->nz);
385:   VecRestoreArray(xx,&x);
386:   VecRestoreArray(zz,&y);
387:   return(0);
388: }

390: /* ------------------------------------------------------------------------------*/
391: int MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
392: {
393:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
394:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
395:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
396:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
397:   int           n,i,jrow,j;

400:   VecGetArray(xx,&x);
401:   VecGetArray(yy,&y);
402:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
403:   idx  = a->j;
404:   v    = a->a;
405:   ii   = a->i;

407:   v    += shift; /* shift for Fortran start by 1 indexing */
408:   idx  += shift;
409:   for (i=0; i<m; i++) {
410:     jrow = ii[i];
411:     n    = ii[i+1] - jrow;
412:     sum1  = 0.0;
413:     sum2  = 0.0;
414:     sum3  = 0.0;
415:     sum4  = 0.0;
416:     for (j=0; j<n; j++) {
417:       sum1 += v[jrow]*x[4*idx[jrow]];
418:       sum2 += v[jrow]*x[4*idx[jrow]+1];
419:       sum3 += v[jrow]*x[4*idx[jrow]+2];
420:       sum4 += v[jrow]*x[4*idx[jrow]+3];
421:       jrow++;
422:      }
423:     y[4*i]   = sum1;
424:     y[4*i+1] = sum2;
425:     y[4*i+2] = sum3;
426:     y[4*i+3] = sum4;
427:   }

429:   PetscLogFlops(8*a->nz - 4*m);
430:   VecRestoreArray(xx,&x);
431:   VecRestoreArray(yy,&y);
432:   return(0);
433: }

435: int MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
436: {
437:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
438:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
439:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
440:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

443:   VecSet(&zero,yy);
444:   VecGetArray(xx,&x);
445:   VecGetArray(yy,&y);
446:   y = y + shift; /* shift for Fortran start by 1 indexing */
447:   for (i=0; i<m; i++) {
448:     idx    = a->j + a->i[i] + shift;
449:     v      = a->a + a->i[i] + shift;
450:     n      = a->i[i+1] - a->i[i];
451:     alpha1 = x[4*i];
452:     alpha2 = x[4*i+1];
453:     alpha3 = x[4*i+2];
454:     alpha4 = x[4*i+3];
455:     while (n-->0) {
456:       y[4*(*idx)]   += alpha1*(*v);
457:       y[4*(*idx)+1] += alpha2*(*v);
458:       y[4*(*idx)+2] += alpha3*(*v);
459:       y[4*(*idx)+3] += alpha4*(*v);
460:       idx++; v++;
461:     }
462:   }
463:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
464:   VecRestoreArray(xx,&x);
465:   VecRestoreArray(yy,&y);
466:   return(0);
467: }

469: int MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
470: {
471:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
472:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
473:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4;
474:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
475:   int           n,i,jrow,j;

478:   if (yy != zz) {VecCopy(yy,zz);}
479:   VecGetArray(xx,&x);
480:   VecGetArray(zz,&y);
481:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
482:   idx  = a->j;
483:   v    = a->a;
484:   ii   = a->i;

486:   v    += shift; /* shift for Fortran start by 1 indexing */
487:   idx  += shift;
488:   for (i=0; i<m; i++) {
489:     jrow = ii[i];
490:     n    = ii[i+1] - jrow;
491:     sum1  = 0.0;
492:     sum2  = 0.0;
493:     sum3  = 0.0;
494:     sum4  = 0.0;
495:     for (j=0; j<n; j++) {
496:       sum1 += v[jrow]*x[4*idx[jrow]];
497:       sum2 += v[jrow]*x[4*idx[jrow]+1];
498:       sum3 += v[jrow]*x[4*idx[jrow]+2];
499:       sum4 += v[jrow]*x[4*idx[jrow]+3];
500:       jrow++;
501:      }
502:     y[4*i]   += sum1;
503:     y[4*i+1] += sum2;
504:     y[4*i+2] += sum3;
505:     y[4*i+3] += sum4;
506:   }

508:   PetscLogFlops(8*a->nz - 4*m);
509:   VecRestoreArray(xx,&x);
510:   VecRestoreArray(zz,&y);
511:   return(0);
512: }
513: int MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
514: {
515:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
516:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
517:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
518:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

521:   if (yy != zz) {VecCopy(yy,zz);}
522:   VecGetArray(xx,&x);
523:   VecGetArray(zz,&y);
524:   y = y + shift; /* shift for Fortran start by 1 indexing */
525:   for (i=0; i<m; i++) {
526:     idx    = a->j + a->i[i] + shift;
527:     v      = a->a + a->i[i] + shift;
528:     n      = a->i[i+1] - a->i[i];
529:     alpha1 = x[4*i];
530:     alpha2 = x[4*i+1];
531:     alpha3 = x[4*i+2];
532:     alpha4 = x[4*i+3];
533:     while (n-->0) {
534:       y[4*(*idx)]   += alpha1*(*v);
535:       y[4*(*idx)+1] += alpha2*(*v);
536:       y[4*(*idx)+2] += alpha3*(*v);
537:       y[4*(*idx)+3] += alpha4*(*v);
538:       idx++; v++;
539:     }
540:   }
541:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
542:   VecRestoreArray(xx,&x);
543:   VecRestoreArray(zz,&y);
544:   return(0);
545: }
546: /* ------------------------------------------------------------------------------*/

548: int MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
549: {
550:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
551:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
552:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
553:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
554:   int           n,i,jrow,j;

557:   VecGetArray(xx,&x);
558:   VecGetArray(yy,&y);
559:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
560:   idx  = a->j;
561:   v    = a->a;
562:   ii   = a->i;

564:   v    += shift; /* shift for Fortran start by 1 indexing */
565:   idx  += shift;
566:   for (i=0; i<m; i++) {
567:     jrow = ii[i];
568:     n    = ii[i+1] - jrow;
569:     sum1  = 0.0;
570:     sum2  = 0.0;
571:     sum3  = 0.0;
572:     sum4  = 0.0;
573:     sum5  = 0.0;
574:     for (j=0; j<n; j++) {
575:       sum1 += v[jrow]*x[5*idx[jrow]];
576:       sum2 += v[jrow]*x[5*idx[jrow]+1];
577:       sum3 += v[jrow]*x[5*idx[jrow]+2];
578:       sum4 += v[jrow]*x[5*idx[jrow]+3];
579:       sum5 += v[jrow]*x[5*idx[jrow]+4];
580:       jrow++;
581:      }
582:     y[5*i]   = sum1;
583:     y[5*i+1] = sum2;
584:     y[5*i+2] = sum3;
585:     y[5*i+3] = sum4;
586:     y[5*i+4] = sum5;
587:   }

589:   PetscLogFlops(10*a->nz - 5*m);
590:   VecRestoreArray(xx,&x);
591:   VecRestoreArray(yy,&y);
592:   return(0);
593: }

595: int MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
596: {
597:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
598:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
599:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
600:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

603:   VecSet(&zero,yy);
604:   VecGetArray(xx,&x);
605:   VecGetArray(yy,&y);
606:   y = y + shift; /* shift for Fortran start by 1 indexing */
607:   for (i=0; i<m; i++) {
608:     idx    = a->j + a->i[i] + shift;
609:     v      = a->a + a->i[i] + shift;
610:     n      = a->i[i+1] - a->i[i];
611:     alpha1 = x[5*i];
612:     alpha2 = x[5*i+1];
613:     alpha3 = x[5*i+2];
614:     alpha4 = x[5*i+3];
615:     alpha5 = x[5*i+4];
616:     while (n-->0) {
617:       y[5*(*idx)]   += alpha1*(*v);
618:       y[5*(*idx)+1] += alpha2*(*v);
619:       y[5*(*idx)+2] += alpha3*(*v);
620:       y[5*(*idx)+3] += alpha4*(*v);
621:       y[5*(*idx)+4] += alpha5*(*v);
622:       idx++; v++;
623:     }
624:   }
625:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
626:   VecRestoreArray(xx,&x);
627:   VecRestoreArray(yy,&y);
628:   return(0);
629: }

631: int MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
632: {
633:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
634:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
635:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
636:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
637:   int           n,i,jrow,j;

640:   if (yy != zz) {VecCopy(yy,zz);}
641:   VecGetArray(xx,&x);
642:   VecGetArray(zz,&y);
643:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
644:   idx  = a->j;
645:   v    = a->a;
646:   ii   = a->i;

648:   v    += shift; /* shift for Fortran start by 1 indexing */
649:   idx  += shift;
650:   for (i=0; i<m; i++) {
651:     jrow = ii[i];
652:     n    = ii[i+1] - jrow;
653:     sum1  = 0.0;
654:     sum2  = 0.0;
655:     sum3  = 0.0;
656:     sum4  = 0.0;
657:     sum5  = 0.0;
658:     for (j=0; j<n; j++) {
659:       sum1 += v[jrow]*x[5*idx[jrow]];
660:       sum2 += v[jrow]*x[5*idx[jrow]+1];
661:       sum3 += v[jrow]*x[5*idx[jrow]+2];
662:       sum4 += v[jrow]*x[5*idx[jrow]+3];
663:       sum5 += v[jrow]*x[5*idx[jrow]+4];
664:       jrow++;
665:      }
666:     y[5*i]   += sum1;
667:     y[5*i+1] += sum2;
668:     y[5*i+2] += sum3;
669:     y[5*i+3] += sum4;
670:     y[5*i+4] += sum5;
671:   }

673:   PetscLogFlops(10*a->nz);
674:   VecRestoreArray(xx,&x);
675:   VecRestoreArray(zz,&y);
676:   return(0);
677: }

679: int MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
680: {
681:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
682:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
683:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
684:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

687:   if (yy != zz) {VecCopy(yy,zz);}
688:   VecGetArray(xx,&x);
689:   VecGetArray(zz,&y);
690:   y = y + shift; /* shift for Fortran start by 1 indexing */
691:   for (i=0; i<m; i++) {
692:     idx    = a->j + a->i[i] + shift;
693:     v      = a->a + a->i[i] + shift;
694:     n      = a->i[i+1] - a->i[i];
695:     alpha1 = x[5*i];
696:     alpha2 = x[5*i+1];
697:     alpha3 = x[5*i+2];
698:     alpha4 = x[5*i+3];
699:     alpha5 = x[5*i+4];
700:     while (n-->0) {
701:       y[5*(*idx)]   += alpha1*(*v);
702:       y[5*(*idx)+1] += alpha2*(*v);
703:       y[5*(*idx)+2] += alpha3*(*v);
704:       y[5*(*idx)+3] += alpha4*(*v);
705:       y[5*(*idx)+4] += alpha5*(*v);
706:       idx++; v++;
707:     }
708:   }
709:   PetscLogFlops(10*a->nz);
710:   VecRestoreArray(xx,&x);
711:   VecRestoreArray(zz,&y);
712:   return(0);
713: }

715: /* ------------------------------------------------------------------------------*/
716: int MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
717: {
718:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
719:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
720:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
721:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
722:   int           n,i,jrow,j;

725:   VecGetArray(xx,&x);
726:   VecGetArray(yy,&y);
727:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
728:   idx  = a->j;
729:   v    = a->a;
730:   ii   = a->i;

732:   v    += shift; /* shift for Fortran start by 1 indexing */
733:   idx  += shift;
734:   for (i=0; i<m; i++) {
735:     jrow = ii[i];
736:     n    = ii[i+1] - jrow;
737:     sum1  = 0.0;
738:     sum2  = 0.0;
739:     sum3  = 0.0;
740:     sum4  = 0.0;
741:     sum5  = 0.0;
742:     sum6  = 0.0;
743:     for (j=0; j<n; j++) {
744:       sum1 += v[jrow]*x[6*idx[jrow]];
745:       sum2 += v[jrow]*x[6*idx[jrow]+1];
746:       sum3 += v[jrow]*x[6*idx[jrow]+2];
747:       sum4 += v[jrow]*x[6*idx[jrow]+3];
748:       sum5 += v[jrow]*x[6*idx[jrow]+4];
749:       sum6 += v[jrow]*x[6*idx[jrow]+5];
750:       jrow++;
751:      }
752:     y[6*i]   = sum1;
753:     y[6*i+1] = sum2;
754:     y[6*i+2] = sum3;
755:     y[6*i+3] = sum4;
756:     y[6*i+4] = sum5;
757:     y[6*i+5] = sum6;
758:   }

760:   PetscLogFlops(12*a->nz - 6*m);
761:   VecRestoreArray(xx,&x);
762:   VecRestoreArray(yy,&y);
763:   return(0);
764: }

766: int MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
767: {
768:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
769:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
770:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
771:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

774:   VecSet(&zero,yy);
775:   VecGetArray(xx,&x);
776:   VecGetArray(yy,&y);
777:   y = y + shift; /* shift for Fortran start by 1 indexing */
778:   for (i=0; i<m; i++) {
779:     idx    = a->j + a->i[i] + shift;
780:     v      = a->a + a->i[i] + shift;
781:     n      = a->i[i+1] - a->i[i];
782:     alpha1 = x[6*i];
783:     alpha2 = x[6*i+1];
784:     alpha3 = x[6*i+2];
785:     alpha4 = x[6*i+3];
786:     alpha5 = x[6*i+4];
787:     alpha6 = x[6*i+5];
788:     while (n-->0) {
789:       y[6*(*idx)]   += alpha1*(*v);
790:       y[6*(*idx)+1] += alpha2*(*v);
791:       y[6*(*idx)+2] += alpha3*(*v);
792:       y[6*(*idx)+3] += alpha4*(*v);
793:       y[6*(*idx)+4] += alpha5*(*v);
794:       y[6*(*idx)+5] += alpha6*(*v);
795:       idx++; v++;
796:     }
797:   }
798:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
799:   VecRestoreArray(xx,&x);
800:   VecRestoreArray(yy,&y);
801:   return(0);
802: }

804: int MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
805: {
806:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
807:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
808:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
809:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
810:   int           n,i,jrow,j;

813:   if (yy != zz) {VecCopy(yy,zz);}
814:   VecGetArray(xx,&x);
815:   VecGetArray(zz,&y);
816:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
817:   idx  = a->j;
818:   v    = a->a;
819:   ii   = a->i;

821:   v    += shift; /* shift for Fortran start by 1 indexing */
822:   idx  += shift;
823:   for (i=0; i<m; i++) {
824:     jrow = ii[i];
825:     n    = ii[i+1] - jrow;
826:     sum1  = 0.0;
827:     sum2  = 0.0;
828:     sum3  = 0.0;
829:     sum4  = 0.0;
830:     sum5  = 0.0;
831:     sum6  = 0.0;
832:     for (j=0; j<n; j++) {
833:       sum1 += v[jrow]*x[6*idx[jrow]];
834:       sum2 += v[jrow]*x[6*idx[jrow]+1];
835:       sum3 += v[jrow]*x[6*idx[jrow]+2];
836:       sum4 += v[jrow]*x[6*idx[jrow]+3];
837:       sum5 += v[jrow]*x[6*idx[jrow]+4];
838:       sum6 += v[jrow]*x[6*idx[jrow]+5];
839:       jrow++;
840:      }
841:     y[6*i]   += sum1;
842:     y[6*i+1] += sum2;
843:     y[6*i+2] += sum3;
844:     y[6*i+3] += sum4;
845:     y[6*i+4] += sum5;
846:     y[6*i+5] += sum6;
847:   }

849:   PetscLogFlops(12*a->nz);
850:   VecRestoreArray(xx,&x);
851:   VecRestoreArray(zz,&y);
852:   return(0);
853: }

855: int MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
856: {
857:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
858:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
859:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
860:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

863:   if (yy != zz) {VecCopy(yy,zz);}
864:   VecGetArray(xx,&x);
865:   VecGetArray(zz,&y);
866:   y = y + shift; /* shift for Fortran start by 1 indexing */
867:   for (i=0; i<m; i++) {
868:     idx    = a->j + a->i[i] + shift;
869:     v      = a->a + a->i[i] + shift;
870:     n      = a->i[i+1] - a->i[i];
871:     alpha1 = x[6*i];
872:     alpha2 = x[6*i+1];
873:     alpha3 = x[6*i+2];
874:     alpha4 = x[6*i+3];
875:     alpha5 = x[6*i+4];
876:     alpha6 = x[6*i+5];
877:     while (n-->0) {
878:       y[6*(*idx)]   += alpha1*(*v);
879:       y[6*(*idx)+1] += alpha2*(*v);
880:       y[6*(*idx)+2] += alpha3*(*v);
881:       y[6*(*idx)+3] += alpha4*(*v);
882:       y[6*(*idx)+4] += alpha5*(*v);
883:       y[6*(*idx)+5] += alpha6*(*v);
884:       idx++; v++;
885:     }
886:   }
887:   PetscLogFlops(12*a->nz);
888:   VecRestoreArray(xx,&x);
889:   VecRestoreArray(zz,&y);
890:   return(0);
891: }

893: /* ------------------------------------------------------------------------------*/
894: int MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
895: {
896:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
897:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
898:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
899:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
900:   int           n,i,jrow,j;

903:   VecGetArray(xx,&x);
904:   VecGetArray(yy,&y);
905:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
906:   idx  = a->j;
907:   v    = a->a;
908:   ii   = a->i;

910:   v    += shift; /* shift for Fortran start by 1 indexing */
911:   idx  += shift;
912:   for (i=0; i<m; i++) {
913:     jrow = ii[i];
914:     n    = ii[i+1] - jrow;
915:     sum1  = 0.0;
916:     sum2  = 0.0;
917:     sum3  = 0.0;
918:     sum4  = 0.0;
919:     sum5  = 0.0;
920:     sum6  = 0.0;
921:     sum7  = 0.0;
922:     sum8  = 0.0;
923:     for (j=0; j<n; j++) {
924:       sum1 += v[jrow]*x[8*idx[jrow]];
925:       sum2 += v[jrow]*x[8*idx[jrow]+1];
926:       sum3 += v[jrow]*x[8*idx[jrow]+2];
927:       sum4 += v[jrow]*x[8*idx[jrow]+3];
928:       sum5 += v[jrow]*x[8*idx[jrow]+4];
929:       sum6 += v[jrow]*x[8*idx[jrow]+5];
930:       sum7 += v[jrow]*x[8*idx[jrow]+6];
931:       sum8 += v[jrow]*x[8*idx[jrow]+7];
932:       jrow++;
933:      }
934:     y[8*i]   = sum1;
935:     y[8*i+1] = sum2;
936:     y[8*i+2] = sum3;
937:     y[8*i+3] = sum4;
938:     y[8*i+4] = sum5;
939:     y[8*i+5] = sum6;
940:     y[8*i+6] = sum7;
941:     y[8*i+7] = sum8;
942:   }

944:   PetscLogFlops(16*a->nz - 8*m);
945:   VecRestoreArray(xx,&x);
946:   VecRestoreArray(yy,&y);
947:   return(0);
948: }

950: int MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
951: {
952:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
953:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
954:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
955:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

958:   VecSet(&zero,yy);
959:   VecGetArray(xx,&x);
960:   VecGetArray(yy,&y);
961:   y = y + shift; /* shift for Fortran start by 1 indexing */
962:   for (i=0; i<m; i++) {
963:     idx    = a->j + a->i[i] + shift;
964:     v      = a->a + a->i[i] + shift;
965:     n      = a->i[i+1] - a->i[i];
966:     alpha1 = x[8*i];
967:     alpha2 = x[8*i+1];
968:     alpha3 = x[8*i+2];
969:     alpha4 = x[8*i+3];
970:     alpha5 = x[8*i+4];
971:     alpha6 = x[8*i+5];
972:     alpha7 = x[8*i+6];
973:     alpha8 = x[8*i+7];
974:     while (n-->0) {
975:       y[8*(*idx)]   += alpha1*(*v);
976:       y[8*(*idx)+1] += alpha2*(*v);
977:       y[8*(*idx)+2] += alpha3*(*v);
978:       y[8*(*idx)+3] += alpha4*(*v);
979:       y[8*(*idx)+4] += alpha5*(*v);
980:       y[8*(*idx)+5] += alpha6*(*v);
981:       y[8*(*idx)+6] += alpha7*(*v);
982:       y[8*(*idx)+7] += alpha8*(*v);
983:       idx++; v++;
984:     }
985:   }
986:   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
987:   VecRestoreArray(xx,&x);
988:   VecRestoreArray(yy,&y);
989:   return(0);
990: }

992: int MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
993: {
994:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
995:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
996:   PetscScalar   *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
997:   int           ierr,m = b->AIJ->m,*idx,shift = a->indexshift,*ii;
998:   int           n,i,jrow,j;

1001:   if (yy != zz) {VecCopy(yy,zz);}
1002:   VecGetArray(xx,&x);
1003:   VecGetArray(zz,&y);
1004:   x    = x + shift;    /* shift for Fortran start by 1 indexing */
1005:   idx  = a->j;
1006:   v    = a->a;
1007:   ii   = a->i;

1009:   v    += shift; /* shift for Fortran start by 1 indexing */
1010:   idx  += shift;
1011:   for (i=0; i<m; i++) {
1012:     jrow = ii[i];
1013:     n    = ii[i+1] - jrow;
1014:     sum1  = 0.0;
1015:     sum2  = 0.0;
1016:     sum3  = 0.0;
1017:     sum4  = 0.0;
1018:     sum5  = 0.0;
1019:     sum6  = 0.0;
1020:     sum7  = 0.0;
1021:     sum8  = 0.0;
1022:     for (j=0; j<n; j++) {
1023:       sum1 += v[jrow]*x[8*idx[jrow]];
1024:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1025:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1026:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1027:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1028:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1029:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1030:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1031:       jrow++;
1032:      }
1033:     y[8*i]   += sum1;
1034:     y[8*i+1] += sum2;
1035:     y[8*i+2] += sum3;
1036:     y[8*i+3] += sum4;
1037:     y[8*i+4] += sum5;
1038:     y[8*i+5] += sum6;
1039:     y[8*i+6] += sum7;
1040:     y[8*i+7] += sum8;
1041:   }

1043:   PetscLogFlops(16*a->nz);
1044:   VecRestoreArray(xx,&x);
1045:   VecRestoreArray(zz,&y);
1046:   return(0);
1047: }

1049: int MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1050: {
1051:   Mat_SeqMAIJ   *b = (Mat_SeqMAIJ*)A->data;
1052:   Mat_SeqAIJ    *a = (Mat_SeqAIJ*)b->AIJ->data;
1053:   PetscScalar   *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1054:   int           ierr,m = b->AIJ->m,n,i,*idx,shift = a->indexshift;

1057:   if (yy != zz) {VecCopy(yy,zz);}
1058:   VecGetArray(xx,&x);
1059:   VecGetArray(zz,&y);
1060:   y = y + shift; /* shift for Fortran start by 1 indexing */
1061:   for (i=0; i<m; i++) {
1062:     idx    = a->j + a->i[i] + shift;
1063:     v      = a->a + a->i[i] + shift;
1064:     n      = a->i[i+1] - a->i[i];
1065:     alpha1 = x[8*i];
1066:     alpha2 = x[8*i+1];
1067:     alpha3 = x[8*i+2];
1068:     alpha4 = x[8*i+3];
1069:     alpha5 = x[8*i+4];
1070:     alpha6 = x[8*i+5];
1071:     alpha7 = x[8*i+6];
1072:     alpha8 = x[8*i+7];
1073:     while (n-->0) {
1074:       y[8*(*idx)]   += alpha1*(*v);
1075:       y[8*(*idx)+1] += alpha2*(*v);
1076:       y[8*(*idx)+2] += alpha3*(*v);
1077:       y[8*(*idx)+3] += alpha4*(*v);
1078:       y[8*(*idx)+4] += alpha5*(*v);
1079:       y[8*(*idx)+5] += alpha6*(*v);
1080:       y[8*(*idx)+6] += alpha7*(*v);
1081:       y[8*(*idx)+7] += alpha8*(*v);
1082:       idx++; v++;
1083:     }
1084:   }
1085:   PetscLogFlops(16*a->nz);
1086:   VecRestoreArray(xx,&x);
1087:   VecRestoreArray(zz,&y);
1088:   return(0);
1089: }

1091: /*===================================================================================*/
1092: int MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1093: {
1094:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1095:   int         ierr;

1098:   /* start the scatter */
1099:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1100:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1101:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1102:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1103:   return(0);
1104: }

1106: int MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1107: {
1108:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1109:   int         ierr;
1111:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1112:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1113:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1114:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1115:   return(0);
1116: }

1118: int MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1119: {
1120:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1121:   int         ierr;

1124:   /* start the scatter */
1125:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1126:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1127:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1128:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,zz);
1129:   return(0);
1130: }

1132: int MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1133: {
1134:   Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
1135:   int         ierr;
1137:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1138:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1139:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1140:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1141:   return(0);
1142: }

1144: /* ---------------------------------------------------------------------------------- */
1145: int MatCreateMAIJ(Mat A,int dof,Mat *maij)
1146: {
1147:   int         ierr,size,n;
1148:   Mat_MPIMAIJ *b;
1149:   Mat         B;

1152:   ierr   = PetscObjectReference((PetscObject)A);

1154:   if (dof == 1) {
1155:     *maij = A;
1156:   } else {
1157:     MatCreate(A->comm,dof*A->m,dof*A->n,dof*A->M,dof*A->N,&B);
1158:     B->assembled    = PETSC_TRUE;

1160:     MPI_Comm_size(A->comm,&size);
1161:     if (size == 1) {
1162:       MatSetType(B,MATSEQMAIJ);
1163:       B->ops->destroy = MatDestroy_SeqMAIJ;
1164:       b      = (Mat_MPIMAIJ*)B->data;
1165:       b->dof = dof;
1166:       b->AIJ = A;
1167:       if (dof == 2) {
1168:         B->ops->mult             = MatMult_SeqMAIJ_2;
1169:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
1170:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
1171:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1172:       } else if (dof == 3) {
1173:         B->ops->mult             = MatMult_SeqMAIJ_3;
1174:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
1175:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
1176:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1177:       } else if (dof == 4) {
1178:         B->ops->mult             = MatMult_SeqMAIJ_4;
1179:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
1180:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
1181:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1182:       } else if (dof == 5) {
1183:         B->ops->mult             = MatMult_SeqMAIJ_5;
1184:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
1185:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
1186:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1187:       } else if (dof == 6) {
1188:         B->ops->mult             = MatMult_SeqMAIJ_6;
1189:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
1190:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
1191:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1192:       } else if (dof == 8) {
1193:         B->ops->mult             = MatMult_SeqMAIJ_8;
1194:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
1195:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
1196:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1197:       } else {
1198:         SETERRQ1(1,"Cannot handle a dof of %d. Send request for code to petsc-maint@mcs.anl.govn",dof);
1199:       }
1200:     } else {
1201:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1202:       IS         from,to;
1203:       Vec        gvec;
1204:       int        *garray,i;

1206:       MatSetType(B,MATMPIMAIJ);
1207:       B->ops->destroy = MatDestroy_MPIMAIJ;
1208:       b      = (Mat_MPIMAIJ*)B->data;
1209:       b->dof = dof;
1210:       b->A   = A;
1211:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
1212:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

1214:       VecGetSize(mpiaij->lvec,&n);
1215:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);

1217:       /* create two temporary Index sets for build scatter gather */
1218:       PetscMalloc((n+1)*sizeof(int),&garray);
1219:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
1220:       ISCreateBlock(A->comm,dof,n,garray,&from);
1221:       PetscFree(garray);
1222:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

1224:       /* create temporary global vector to generate scatter context */
1225:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);

1227:       /* generate the scatter context */
1228:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

1230:       ISDestroy(from);
1231:       ISDestroy(to);
1232:       VecDestroy(gvec);

1234:       B->ops->mult             = MatMult_MPIMAIJ_dof;
1235:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
1236:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
1237:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1238:     }
1239:     *maij = B;
1240:   }
1241:   return(0);
1242: }