Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include src/mat/impls/maij/maij.h
 21:  #include src/mat/utils/freespace.h
 22:  #include vecimpl.h

 26: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
 27: {
 29:   PetscTruth     ismpimaij,isseqmaij;

 32:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 33:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 34:   if (ismpimaij) {
 35:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 37:     *B = b->A;
 38:   } else if (isseqmaij) {
 39:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 41:     *B = b->AIJ;
 42:   } else {
 43:     *B = A;
 44:   }
 45:   return(0);
 46: }

 50: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 51: {
 53:   Mat            Aij;

 56:   MatMAIJGetAIJ(A,&Aij);
 57:   MatCreateMAIJ(Aij,dof,B);
 58:   return(0);
 59: }

 63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 64: {
 66:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 69:   if (b->AIJ) {
 70:     MatDestroy(b->AIJ);
 71:   }
 72:   PetscFree(b);
 73:   return(0);
 74: }

 78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
 79: {
 81:   Mat            B;

 84:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
 85:   MatView(B,viewer);
 86:   MatDestroy(B);
 87:   return(0);
 88: }

 92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
 93: {
 95:   Mat            B;

 98:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
 99:   MatView(B,viewer);
100:   MatDestroy(B);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

112:   if (b->AIJ) {
113:     MatDestroy(b->AIJ);
114:   }
115:   if (b->OAIJ) {
116:     MatDestroy(b->OAIJ);
117:   }
118:   if (b->A) {
119:     MatDestroy(b->A);
120:   }
121:   if (b->ctx) {
122:     VecScatterDestroy(b->ctx);
123:   }
124:   if (b->w) {
125:     VecDestroy(b->w);
126:   }
127:   PetscFree(b);
128:   return(0);
129: }

131: /*MC
132:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
133:   multicomponent problems, interpolating or restricting each component the same way independently.
134:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

136:   Operations provided:
137: . MatMult
138: . MatMultTranspose
139: . MatMultAdd
140: . MatMultTransposeAdd

142:   Level: advanced

144: .seealso: MatCreateSeqDense
145: M*/

150: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
151: {
153:   Mat_MPIMAIJ    *b;

156:   PetscNew(Mat_MPIMAIJ,&b);
157:   A->data  = (void*)b;
158:   PetscMemzero(A->ops,sizeof(struct _MatOps));
159:   A->factor           = 0;
160:   A->mapping          = 0;

162:   b->AIJ  = 0;
163:   b->dof  = 0;
164:   b->OAIJ = 0;
165:   b->ctx  = 0;
166:   b->w    = 0;
167:   return(0);
168: }

171: /* --------------------------------------------------------------------------------------*/
174: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
175: {
176:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
177:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
178:   PetscScalar    *x,*y,*v,sum1, sum2;
180:   PetscInt       m = b->AIJ->m,*idx,*ii;
181:   PetscInt       n,i,jrow,j;

184:   VecGetArray(xx,&x);
185:   VecGetArray(yy,&y);
186:   idx  = a->j;
187:   v    = a->a;
188:   ii   = a->i;

190:   for (i=0; i<m; i++) {
191:     jrow = ii[i];
192:     n    = ii[i+1] - jrow;
193:     sum1  = 0.0;
194:     sum2  = 0.0;
195:     for (j=0; j<n; j++) {
196:       sum1 += v[jrow]*x[2*idx[jrow]];
197:       sum2 += v[jrow]*x[2*idx[jrow]+1];
198:       jrow++;
199:      }
200:     y[2*i]   = sum1;
201:     y[2*i+1] = sum2;
202:   }

204:   PetscLogFlops(4*a->nz - 2*m);
205:   VecRestoreArray(xx,&x);
206:   VecRestoreArray(yy,&y);
207:   return(0);
208: }

212: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
213: {
214:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
215:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
216:   PetscScalar    *x,*y,*v,alpha1,alpha2,zero = 0.0;
218:   PetscInt       m = b->AIJ->m,n,i,*idx;

221:   VecSet(yy,zero);
222:   VecGetArray(xx,&x);
223:   VecGetArray(yy,&y);
224: 
225:   for (i=0; i<m; i++) {
226:     idx    = a->j + a->i[i] ;
227:     v      = a->a + a->i[i] ;
228:     n      = a->i[i+1] - a->i[i];
229:     alpha1 = x[2*i];
230:     alpha2 = x[2*i+1];
231:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
232:   }
233:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
234:   VecRestoreArray(xx,&x);
235:   VecRestoreArray(yy,&y);
236:   return(0);
237: }

241: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
242: {
243:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
244:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
245:   PetscScalar    *x,*y,*v,sum1, sum2;
247:   PetscInt       m = b->AIJ->m,*idx,*ii;
248:   PetscInt       n,i,jrow,j;

251:   if (yy != zz) {VecCopy(yy,zz);}
252:   VecGetArray(xx,&x);
253:   VecGetArray(zz,&y);
254:   idx  = a->j;
255:   v    = a->a;
256:   ii   = a->i;

258:   for (i=0; i<m; i++) {
259:     jrow = ii[i];
260:     n    = ii[i+1] - jrow;
261:     sum1  = 0.0;
262:     sum2  = 0.0;
263:     for (j=0; j<n; j++) {
264:       sum1 += v[jrow]*x[2*idx[jrow]];
265:       sum2 += v[jrow]*x[2*idx[jrow]+1];
266:       jrow++;
267:      }
268:     y[2*i]   += sum1;
269:     y[2*i+1] += sum2;
270:   }

272:   PetscLogFlops(4*a->nz - 2*m);
273:   VecRestoreArray(xx,&x);
274:   VecRestoreArray(zz,&y);
275:   return(0);
276: }
279: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
280: {
281:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
282:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
283:   PetscScalar    *x,*y,*v,alpha1,alpha2;
285:   PetscInt       m = b->AIJ->m,n,i,*idx;

288:   if (yy != zz) {VecCopy(yy,zz);}
289:   VecGetArray(xx,&x);
290:   VecGetArray(zz,&y);
291: 
292:   for (i=0; i<m; i++) {
293:     idx   = a->j + a->i[i] ;
294:     v     = a->a + a->i[i] ;
295:     n     = a->i[i+1] - a->i[i];
296:     alpha1 = x[2*i];
297:     alpha2 = x[2*i+1];
298:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
299:   }
300:   PetscLogFlops(4*a->nz - 2*b->AIJ->n);
301:   VecRestoreArray(xx,&x);
302:   VecRestoreArray(zz,&y);
303:   return(0);
304: }
305: /* --------------------------------------------------------------------------------------*/
308: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309: {
310:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
311:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
312:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
314:   PetscInt       m = b->AIJ->m,*idx,*ii;
315:   PetscInt       n,i,jrow,j;

318:   VecGetArray(xx,&x);
319:   VecGetArray(yy,&y);
320:   idx  = a->j;
321:   v    = a->a;
322:   ii   = a->i;

324:   for (i=0; i<m; i++) {
325:     jrow = ii[i];
326:     n    = ii[i+1] - jrow;
327:     sum1  = 0.0;
328:     sum2  = 0.0;
329:     sum3  = 0.0;
330:     for (j=0; j<n; j++) {
331:       sum1 += v[jrow]*x[3*idx[jrow]];
332:       sum2 += v[jrow]*x[3*idx[jrow]+1];
333:       sum3 += v[jrow]*x[3*idx[jrow]+2];
334:       jrow++;
335:      }
336:     y[3*i]   = sum1;
337:     y[3*i+1] = sum2;
338:     y[3*i+2] = sum3;
339:   }

341:   PetscLogFlops(6*a->nz - 3*m);
342:   VecRestoreArray(xx,&x);
343:   VecRestoreArray(yy,&y);
344:   return(0);
345: }

349: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
350: {
351:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
352:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
353:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
355:   PetscInt       m = b->AIJ->m,n,i,*idx;

358:   VecSet(yy,zero);
359:   VecGetArray(xx,&x);
360:   VecGetArray(yy,&y);
361: 
362:   for (i=0; i<m; i++) {
363:     idx    = a->j + a->i[i];
364:     v      = a->a + a->i[i];
365:     n      = a->i[i+1] - a->i[i];
366:     alpha1 = x[3*i];
367:     alpha2 = x[3*i+1];
368:     alpha3 = x[3*i+2];
369:     while (n-->0) {
370:       y[3*(*idx)]   += alpha1*(*v);
371:       y[3*(*idx)+1] += alpha2*(*v);
372:       y[3*(*idx)+2] += alpha3*(*v);
373:       idx++; v++;
374:     }
375:   }
376:   PetscLogFlops(6*a->nz - 3*b->AIJ->n);
377:   VecRestoreArray(xx,&x);
378:   VecRestoreArray(yy,&y);
379:   return(0);
380: }

384: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385: {
386:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
387:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
388:   PetscScalar    *x,*y,*v,sum1, sum2, sum3;
390:   PetscInt       m = b->AIJ->m,*idx,*ii;
391:   PetscInt       n,i,jrow,j;

394:   if (yy != zz) {VecCopy(yy,zz);}
395:   VecGetArray(xx,&x);
396:   VecGetArray(zz,&y);
397:   idx  = a->j;
398:   v    = a->a;
399:   ii   = a->i;

401:   for (i=0; i<m; i++) {
402:     jrow = ii[i];
403:     n    = ii[i+1] - jrow;
404:     sum1  = 0.0;
405:     sum2  = 0.0;
406:     sum3  = 0.0;
407:     for (j=0; j<n; j++) {
408:       sum1 += v[jrow]*x[3*idx[jrow]];
409:       sum2 += v[jrow]*x[3*idx[jrow]+1];
410:       sum3 += v[jrow]*x[3*idx[jrow]+2];
411:       jrow++;
412:      }
413:     y[3*i]   += sum1;
414:     y[3*i+1] += sum2;
415:     y[3*i+2] += sum3;
416:   }

418:   PetscLogFlops(6*a->nz);
419:   VecRestoreArray(xx,&x);
420:   VecRestoreArray(zz,&y);
421:   return(0);
422: }
425: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
426: {
427:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
428:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
429:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3;
431:   PetscInt       m = b->AIJ->m,n,i,*idx;

434:   if (yy != zz) {VecCopy(yy,zz);}
435:   VecGetArray(xx,&x);
436:   VecGetArray(zz,&y);
437:   for (i=0; i<m; i++) {
438:     idx    = a->j + a->i[i] ;
439:     v      = a->a + a->i[i] ;
440:     n      = a->i[i+1] - a->i[i];
441:     alpha1 = x[3*i];
442:     alpha2 = x[3*i+1];
443:     alpha3 = x[3*i+2];
444:     while (n-->0) {
445:       y[3*(*idx)]   += alpha1*(*v);
446:       y[3*(*idx)+1] += alpha2*(*v);
447:       y[3*(*idx)+2] += alpha3*(*v);
448:       idx++; v++;
449:     }
450:   }
451:   PetscLogFlops(6*a->nz);
452:   VecRestoreArray(xx,&x);
453:   VecRestoreArray(zz,&y);
454:   return(0);
455: }

457: /* ------------------------------------------------------------------------------*/
460: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
461: {
462:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
463:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
464:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
466:   PetscInt       m = b->AIJ->m,*idx,*ii;
467:   PetscInt       n,i,jrow,j;

470:   VecGetArray(xx,&x);
471:   VecGetArray(yy,&y);
472:   idx  = a->j;
473:   v    = a->a;
474:   ii   = a->i;

476:   for (i=0; i<m; i++) {
477:     jrow = ii[i];
478:     n    = ii[i+1] - jrow;
479:     sum1  = 0.0;
480:     sum2  = 0.0;
481:     sum3  = 0.0;
482:     sum4  = 0.0;
483:     for (j=0; j<n; j++) {
484:       sum1 += v[jrow]*x[4*idx[jrow]];
485:       sum2 += v[jrow]*x[4*idx[jrow]+1];
486:       sum3 += v[jrow]*x[4*idx[jrow]+2];
487:       sum4 += v[jrow]*x[4*idx[jrow]+3];
488:       jrow++;
489:      }
490:     y[4*i]   = sum1;
491:     y[4*i+1] = sum2;
492:     y[4*i+2] = sum3;
493:     y[4*i+3] = sum4;
494:   }

496:   PetscLogFlops(8*a->nz - 4*m);
497:   VecRestoreArray(xx,&x);
498:   VecRestoreArray(yy,&y);
499:   return(0);
500: }

504: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
505: {
506:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
507:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
508:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
510:   PetscInt       m = b->AIJ->m,n,i,*idx;

513:   VecSet(yy,zero);
514:   VecGetArray(xx,&x);
515:   VecGetArray(yy,&y);
516:   for (i=0; i<m; i++) {
517:     idx    = a->j + a->i[i] ;
518:     v      = a->a + a->i[i] ;
519:     n      = a->i[i+1] - a->i[i];
520:     alpha1 = x[4*i];
521:     alpha2 = x[4*i+1];
522:     alpha3 = x[4*i+2];
523:     alpha4 = x[4*i+3];
524:     while (n-->0) {
525:       y[4*(*idx)]   += alpha1*(*v);
526:       y[4*(*idx)+1] += alpha2*(*v);
527:       y[4*(*idx)+2] += alpha3*(*v);
528:       y[4*(*idx)+3] += alpha4*(*v);
529:       idx++; v++;
530:     }
531:   }
532:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
533:   VecRestoreArray(xx,&x);
534:   VecRestoreArray(yy,&y);
535:   return(0);
536: }

540: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
541: {
542:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
543:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
544:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4;
546:   PetscInt       m = b->AIJ->m,*idx,*ii;
547:   PetscInt       n,i,jrow,j;

550:   if (yy != zz) {VecCopy(yy,zz);}
551:   VecGetArray(xx,&x);
552:   VecGetArray(zz,&y);
553:   idx  = a->j;
554:   v    = a->a;
555:   ii   = a->i;

557:   for (i=0; i<m; i++) {
558:     jrow = ii[i];
559:     n    = ii[i+1] - jrow;
560:     sum1  = 0.0;
561:     sum2  = 0.0;
562:     sum3  = 0.0;
563:     sum4  = 0.0;
564:     for (j=0; j<n; j++) {
565:       sum1 += v[jrow]*x[4*idx[jrow]];
566:       sum2 += v[jrow]*x[4*idx[jrow]+1];
567:       sum3 += v[jrow]*x[4*idx[jrow]+2];
568:       sum4 += v[jrow]*x[4*idx[jrow]+3];
569:       jrow++;
570:      }
571:     y[4*i]   += sum1;
572:     y[4*i+1] += sum2;
573:     y[4*i+2] += sum3;
574:     y[4*i+3] += sum4;
575:   }

577:   PetscLogFlops(8*a->nz - 4*m);
578:   VecRestoreArray(xx,&x);
579:   VecRestoreArray(zz,&y);
580:   return(0);
581: }
584: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
585: {
586:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
587:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
588:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
590:   PetscInt       m = b->AIJ->m,n,i,*idx;

593:   if (yy != zz) {VecCopy(yy,zz);}
594:   VecGetArray(xx,&x);
595:   VecGetArray(zz,&y);
596: 
597:   for (i=0; i<m; i++) {
598:     idx    = a->j + a->i[i] ;
599:     v      = a->a + a->i[i] ;
600:     n      = a->i[i+1] - a->i[i];
601:     alpha1 = x[4*i];
602:     alpha2 = x[4*i+1];
603:     alpha3 = x[4*i+2];
604:     alpha4 = x[4*i+3];
605:     while (n-->0) {
606:       y[4*(*idx)]   += alpha1*(*v);
607:       y[4*(*idx)+1] += alpha2*(*v);
608:       y[4*(*idx)+2] += alpha3*(*v);
609:       y[4*(*idx)+3] += alpha4*(*v);
610:       idx++; v++;
611:     }
612:   }
613:   PetscLogFlops(8*a->nz - 4*b->AIJ->n);
614:   VecRestoreArray(xx,&x);
615:   VecRestoreArray(zz,&y);
616:   return(0);
617: }
618: /* ------------------------------------------------------------------------------*/

622: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
623: {
624:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
625:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
626:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
628:   PetscInt       m = b->AIJ->m,*idx,*ii;
629:   PetscInt       n,i,jrow,j;

632:   VecGetArray(xx,&x);
633:   VecGetArray(yy,&y);
634:   idx  = a->j;
635:   v    = a->a;
636:   ii   = a->i;

638:   for (i=0; i<m; i++) {
639:     jrow = ii[i];
640:     n    = ii[i+1] - jrow;
641:     sum1  = 0.0;
642:     sum2  = 0.0;
643:     sum3  = 0.0;
644:     sum4  = 0.0;
645:     sum5  = 0.0;
646:     for (j=0; j<n; j++) {
647:       sum1 += v[jrow]*x[5*idx[jrow]];
648:       sum2 += v[jrow]*x[5*idx[jrow]+1];
649:       sum3 += v[jrow]*x[5*idx[jrow]+2];
650:       sum4 += v[jrow]*x[5*idx[jrow]+3];
651:       sum5 += v[jrow]*x[5*idx[jrow]+4];
652:       jrow++;
653:      }
654:     y[5*i]   = sum1;
655:     y[5*i+1] = sum2;
656:     y[5*i+2] = sum3;
657:     y[5*i+3] = sum4;
658:     y[5*i+4] = sum5;
659:   }

661:   PetscLogFlops(10*a->nz - 5*m);
662:   VecRestoreArray(xx,&x);
663:   VecRestoreArray(yy,&y);
664:   return(0);
665: }

669: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
670: {
671:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
672:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
673:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
675:   PetscInt       m = b->AIJ->m,n,i,*idx;

678:   VecSet(yy,zero);
679:   VecGetArray(xx,&x);
680:   VecGetArray(yy,&y);
681: 
682:   for (i=0; i<m; i++) {
683:     idx    = a->j + a->i[i] ;
684:     v      = a->a + a->i[i] ;
685:     n      = a->i[i+1] - a->i[i];
686:     alpha1 = x[5*i];
687:     alpha2 = x[5*i+1];
688:     alpha3 = x[5*i+2];
689:     alpha4 = x[5*i+3];
690:     alpha5 = x[5*i+4];
691:     while (n-->0) {
692:       y[5*(*idx)]   += alpha1*(*v);
693:       y[5*(*idx)+1] += alpha2*(*v);
694:       y[5*(*idx)+2] += alpha3*(*v);
695:       y[5*(*idx)+3] += alpha4*(*v);
696:       y[5*(*idx)+4] += alpha5*(*v);
697:       idx++; v++;
698:     }
699:   }
700:   PetscLogFlops(10*a->nz - 5*b->AIJ->n);
701:   VecRestoreArray(xx,&x);
702:   VecRestoreArray(yy,&y);
703:   return(0);
704: }

708: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
709: {
710:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
711:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
712:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
714:   PetscInt       m = b->AIJ->m,*idx,*ii;
715:   PetscInt       n,i,jrow,j;

718:   if (yy != zz) {VecCopy(yy,zz);}
719:   VecGetArray(xx,&x);
720:   VecGetArray(zz,&y);
721:   idx  = a->j;
722:   v    = a->a;
723:   ii   = a->i;

725:   for (i=0; i<m; i++) {
726:     jrow = ii[i];
727:     n    = ii[i+1] - jrow;
728:     sum1  = 0.0;
729:     sum2  = 0.0;
730:     sum3  = 0.0;
731:     sum4  = 0.0;
732:     sum5  = 0.0;
733:     for (j=0; j<n; j++) {
734:       sum1 += v[jrow]*x[5*idx[jrow]];
735:       sum2 += v[jrow]*x[5*idx[jrow]+1];
736:       sum3 += v[jrow]*x[5*idx[jrow]+2];
737:       sum4 += v[jrow]*x[5*idx[jrow]+3];
738:       sum5 += v[jrow]*x[5*idx[jrow]+4];
739:       jrow++;
740:      }
741:     y[5*i]   += sum1;
742:     y[5*i+1] += sum2;
743:     y[5*i+2] += sum3;
744:     y[5*i+3] += sum4;
745:     y[5*i+4] += sum5;
746:   }

748:   PetscLogFlops(10*a->nz);
749:   VecRestoreArray(xx,&x);
750:   VecRestoreArray(zz,&y);
751:   return(0);
752: }

756: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
757: {
758:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
759:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
760:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
762:   PetscInt       m = b->AIJ->m,n,i,*idx;

765:   if (yy != zz) {VecCopy(yy,zz);}
766:   VecGetArray(xx,&x);
767:   VecGetArray(zz,&y);
768: 
769:   for (i=0; i<m; i++) {
770:     idx    = a->j + a->i[i] ;
771:     v      = a->a + a->i[i] ;
772:     n      = a->i[i+1] - a->i[i];
773:     alpha1 = x[5*i];
774:     alpha2 = x[5*i+1];
775:     alpha3 = x[5*i+2];
776:     alpha4 = x[5*i+3];
777:     alpha5 = x[5*i+4];
778:     while (n-->0) {
779:       y[5*(*idx)]   += alpha1*(*v);
780:       y[5*(*idx)+1] += alpha2*(*v);
781:       y[5*(*idx)+2] += alpha3*(*v);
782:       y[5*(*idx)+3] += alpha4*(*v);
783:       y[5*(*idx)+4] += alpha5*(*v);
784:       idx++; v++;
785:     }
786:   }
787:   PetscLogFlops(10*a->nz);
788:   VecRestoreArray(xx,&x);
789:   VecRestoreArray(zz,&y);
790:   return(0);
791: }

793: /* ------------------------------------------------------------------------------*/
796: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
797: {
798:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
799:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
800:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
802:   PetscInt       m = b->AIJ->m,*idx,*ii;
803:   PetscInt       n,i,jrow,j;

806:   VecGetArray(xx,&x);
807:   VecGetArray(yy,&y);
808:   idx  = a->j;
809:   v    = a->a;
810:   ii   = a->i;

812:   for (i=0; i<m; i++) {
813:     jrow = ii[i];
814:     n    = ii[i+1] - jrow;
815:     sum1  = 0.0;
816:     sum2  = 0.0;
817:     sum3  = 0.0;
818:     sum4  = 0.0;
819:     sum5  = 0.0;
820:     sum6  = 0.0;
821:     for (j=0; j<n; j++) {
822:       sum1 += v[jrow]*x[6*idx[jrow]];
823:       sum2 += v[jrow]*x[6*idx[jrow]+1];
824:       sum3 += v[jrow]*x[6*idx[jrow]+2];
825:       sum4 += v[jrow]*x[6*idx[jrow]+3];
826:       sum5 += v[jrow]*x[6*idx[jrow]+4];
827:       sum6 += v[jrow]*x[6*idx[jrow]+5];
828:       jrow++;
829:      }
830:     y[6*i]   = sum1;
831:     y[6*i+1] = sum2;
832:     y[6*i+2] = sum3;
833:     y[6*i+3] = sum4;
834:     y[6*i+4] = sum5;
835:     y[6*i+5] = sum6;
836:   }

838:   PetscLogFlops(12*a->nz - 6*m);
839:   VecRestoreArray(xx,&x);
840:   VecRestoreArray(yy,&y);
841:   return(0);
842: }

846: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
847: {
848:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
849:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
850:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
852:   PetscInt       m = b->AIJ->m,n,i,*idx;

855:   VecSet(yy,zero);
856:   VecGetArray(xx,&x);
857:   VecGetArray(yy,&y);

859:   for (i=0; i<m; i++) {
860:     idx    = a->j + a->i[i] ;
861:     v      = a->a + a->i[i] ;
862:     n      = a->i[i+1] - a->i[i];
863:     alpha1 = x[6*i];
864:     alpha2 = x[6*i+1];
865:     alpha3 = x[6*i+2];
866:     alpha4 = x[6*i+3];
867:     alpha5 = x[6*i+4];
868:     alpha6 = x[6*i+5];
869:     while (n-->0) {
870:       y[6*(*idx)]   += alpha1*(*v);
871:       y[6*(*idx)+1] += alpha2*(*v);
872:       y[6*(*idx)+2] += alpha3*(*v);
873:       y[6*(*idx)+3] += alpha4*(*v);
874:       y[6*(*idx)+4] += alpha5*(*v);
875:       y[6*(*idx)+5] += alpha6*(*v);
876:       idx++; v++;
877:     }
878:   }
879:   PetscLogFlops(12*a->nz - 6*b->AIJ->n);
880:   VecRestoreArray(xx,&x);
881:   VecRestoreArray(yy,&y);
882:   return(0);
883: }

887: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
888: {
889:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
890:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
891:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
893:   PetscInt       m = b->AIJ->m,*idx,*ii;
894:   PetscInt       n,i,jrow,j;

897:   if (yy != zz) {VecCopy(yy,zz);}
898:   VecGetArray(xx,&x);
899:   VecGetArray(zz,&y);
900:   idx  = a->j;
901:   v    = a->a;
902:   ii   = a->i;

904:   for (i=0; i<m; i++) {
905:     jrow = ii[i];
906:     n    = ii[i+1] - jrow;
907:     sum1  = 0.0;
908:     sum2  = 0.0;
909:     sum3  = 0.0;
910:     sum4  = 0.0;
911:     sum5  = 0.0;
912:     sum6  = 0.0;
913:     for (j=0; j<n; j++) {
914:       sum1 += v[jrow]*x[6*idx[jrow]];
915:       sum2 += v[jrow]*x[6*idx[jrow]+1];
916:       sum3 += v[jrow]*x[6*idx[jrow]+2];
917:       sum4 += v[jrow]*x[6*idx[jrow]+3];
918:       sum5 += v[jrow]*x[6*idx[jrow]+4];
919:       sum6 += v[jrow]*x[6*idx[jrow]+5];
920:       jrow++;
921:      }
922:     y[6*i]   += sum1;
923:     y[6*i+1] += sum2;
924:     y[6*i+2] += sum3;
925:     y[6*i+3] += sum4;
926:     y[6*i+4] += sum5;
927:     y[6*i+5] += sum6;
928:   }

930:   PetscLogFlops(12*a->nz);
931:   VecRestoreArray(xx,&x);
932:   VecRestoreArray(zz,&y);
933:   return(0);
934: }

938: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
939: {
940:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
941:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
942:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
944:   PetscInt       m = b->AIJ->m,n,i,*idx;

947:   if (yy != zz) {VecCopy(yy,zz);}
948:   VecGetArray(xx,&x);
949:   VecGetArray(zz,&y);
950: 
951:   for (i=0; i<m; i++) {
952:     idx    = a->j + a->i[i] ;
953:     v      = a->a + a->i[i] ;
954:     n      = a->i[i+1] - a->i[i];
955:     alpha1 = x[6*i];
956:     alpha2 = x[6*i+1];
957:     alpha3 = x[6*i+2];
958:     alpha4 = x[6*i+3];
959:     alpha5 = x[6*i+4];
960:     alpha6 = x[6*i+5];
961:     while (n-->0) {
962:       y[6*(*idx)]   += alpha1*(*v);
963:       y[6*(*idx)+1] += alpha2*(*v);
964:       y[6*(*idx)+2] += alpha3*(*v);
965:       y[6*(*idx)+3] += alpha4*(*v);
966:       y[6*(*idx)+4] += alpha5*(*v);
967:       y[6*(*idx)+5] += alpha6*(*v);
968:       idx++; v++;
969:     }
970:   }
971:   PetscLogFlops(12*a->nz);
972:   VecRestoreArray(xx,&x);
973:   VecRestoreArray(zz,&y);
974:   return(0);
975: }

977: /* ------------------------------------------------------------------------------*/
980: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
981: {
982:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
983:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
984:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
986:   PetscInt       m = b->AIJ->m,*idx,*ii;
987:   PetscInt       n,i,jrow,j;

990:   VecGetArray(xx,&x);
991:   VecGetArray(yy,&y);
992:   idx  = a->j;
993:   v    = a->a;
994:   ii   = a->i;

996:   for (i=0; i<m; i++) {
997:     jrow = ii[i];
998:     n    = ii[i+1] - jrow;
999:     sum1  = 0.0;
1000:     sum2  = 0.0;
1001:     sum3  = 0.0;
1002:     sum4  = 0.0;
1003:     sum5  = 0.0;
1004:     sum6  = 0.0;
1005:     sum7  = 0.0;
1006:     for (j=0; j<n; j++) {
1007:       sum1 += v[jrow]*x[7*idx[jrow]];
1008:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1009:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1010:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1011:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1012:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1013:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1014:       jrow++;
1015:      }
1016:     y[7*i]   = sum1;
1017:     y[7*i+1] = sum2;
1018:     y[7*i+2] = sum3;
1019:     y[7*i+3] = sum4;
1020:     y[7*i+4] = sum5;
1021:     y[7*i+5] = sum6;
1022:     y[7*i+6] = sum7;
1023:   }

1025:   PetscLogFlops(14*a->nz - 7*m);
1026:   VecRestoreArray(xx,&x);
1027:   VecRestoreArray(yy,&y);
1028:   return(0);
1029: }

1033: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1034: {
1035:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1036:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1037:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1039:   PetscInt       m = b->AIJ->m,n,i,*idx;

1042:   VecSet(yy,zero);
1043:   VecGetArray(xx,&x);
1044:   VecGetArray(yy,&y);

1046:   for (i=0; i<m; i++) {
1047:     idx    = a->j + a->i[i] ;
1048:     v      = a->a + a->i[i] ;
1049:     n      = a->i[i+1] - a->i[i];
1050:     alpha1 = x[7*i];
1051:     alpha2 = x[7*i+1];
1052:     alpha3 = x[7*i+2];
1053:     alpha4 = x[7*i+3];
1054:     alpha5 = x[7*i+4];
1055:     alpha6 = x[7*i+5];
1056:     alpha7 = x[7*i+6];
1057:     while (n-->0) {
1058:       y[7*(*idx)]   += alpha1*(*v);
1059:       y[7*(*idx)+1] += alpha2*(*v);
1060:       y[7*(*idx)+2] += alpha3*(*v);
1061:       y[7*(*idx)+3] += alpha4*(*v);
1062:       y[7*(*idx)+4] += alpha5*(*v);
1063:       y[7*(*idx)+5] += alpha6*(*v);
1064:       y[7*(*idx)+6] += alpha7*(*v);
1065:       idx++; v++;
1066:     }
1067:   }
1068:   PetscLogFlops(14*a->nz - 7*b->AIJ->n);
1069:   VecRestoreArray(xx,&x);
1070:   VecRestoreArray(yy,&y);
1071:   return(0);
1072: }

1076: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1077: {
1078:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1079:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1080:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1082:   PetscInt       m = b->AIJ->m,*idx,*ii;
1083:   PetscInt       n,i,jrow,j;

1086:   if (yy != zz) {VecCopy(yy,zz);}
1087:   VecGetArray(xx,&x);
1088:   VecGetArray(zz,&y);
1089:   idx  = a->j;
1090:   v    = a->a;
1091:   ii   = a->i;

1093:   for (i=0; i<m; i++) {
1094:     jrow = ii[i];
1095:     n    = ii[i+1] - jrow;
1096:     sum1  = 0.0;
1097:     sum2  = 0.0;
1098:     sum3  = 0.0;
1099:     sum4  = 0.0;
1100:     sum5  = 0.0;
1101:     sum6  = 0.0;
1102:     sum7  = 0.0;
1103:     for (j=0; j<n; j++) {
1104:       sum1 += v[jrow]*x[7*idx[jrow]];
1105:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1106:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1107:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1108:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1109:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1110:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1111:       jrow++;
1112:      }
1113:     y[7*i]   += sum1;
1114:     y[7*i+1] += sum2;
1115:     y[7*i+2] += sum3;
1116:     y[7*i+3] += sum4;
1117:     y[7*i+4] += sum5;
1118:     y[7*i+5] += sum6;
1119:     y[7*i+6] += sum7;
1120:   }

1122:   PetscLogFlops(14*a->nz);
1123:   VecRestoreArray(xx,&x);
1124:   VecRestoreArray(zz,&y);
1125:   return(0);
1126: }

1130: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1131: {
1132:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1133:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1134:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1136:   PetscInt       m = b->AIJ->m,n,i,*idx;

1139:   if (yy != zz) {VecCopy(yy,zz);}
1140:   VecGetArray(xx,&x);
1141:   VecGetArray(zz,&y);
1142:   for (i=0; i<m; i++) {
1143:     idx    = a->j + a->i[i] ;
1144:     v      = a->a + a->i[i] ;
1145:     n      = a->i[i+1] - a->i[i];
1146:     alpha1 = x[7*i];
1147:     alpha2 = x[7*i+1];
1148:     alpha3 = x[7*i+2];
1149:     alpha4 = x[7*i+3];
1150:     alpha5 = x[7*i+4];
1151:     alpha6 = x[7*i+5];
1152:     alpha7 = x[7*i+6];
1153:     while (n-->0) {
1154:       y[7*(*idx)]   += alpha1*(*v);
1155:       y[7*(*idx)+1] += alpha2*(*v);
1156:       y[7*(*idx)+2] += alpha3*(*v);
1157:       y[7*(*idx)+3] += alpha4*(*v);
1158:       y[7*(*idx)+4] += alpha5*(*v);
1159:       y[7*(*idx)+5] += alpha6*(*v);
1160:       y[7*(*idx)+6] += alpha7*(*v);
1161:       idx++; v++;
1162:     }
1163:   }
1164:   PetscLogFlops(14*a->nz);
1165:   VecRestoreArray(xx,&x);
1166:   VecRestoreArray(zz,&y);
1167:   return(0);
1168: }

1172: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1173: {
1174:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1175:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1176:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1178:   PetscInt       m = b->AIJ->m,*idx,*ii;
1179:   PetscInt       n,i,jrow,j;

1182:   VecGetArray(xx,&x);
1183:   VecGetArray(yy,&y);
1184:   idx  = a->j;
1185:   v    = a->a;
1186:   ii   = a->i;

1188:   for (i=0; i<m; i++) {
1189:     jrow = ii[i];
1190:     n    = ii[i+1] - jrow;
1191:     sum1  = 0.0;
1192:     sum2  = 0.0;
1193:     sum3  = 0.0;
1194:     sum4  = 0.0;
1195:     sum5  = 0.0;
1196:     sum6  = 0.0;
1197:     sum7  = 0.0;
1198:     sum8  = 0.0;
1199:     for (j=0; j<n; j++) {
1200:       sum1 += v[jrow]*x[8*idx[jrow]];
1201:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1202:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1203:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1204:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1205:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1206:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1207:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1208:       jrow++;
1209:      }
1210:     y[8*i]   = sum1;
1211:     y[8*i+1] = sum2;
1212:     y[8*i+2] = sum3;
1213:     y[8*i+3] = sum4;
1214:     y[8*i+4] = sum5;
1215:     y[8*i+5] = sum6;
1216:     y[8*i+6] = sum7;
1217:     y[8*i+7] = sum8;
1218:   }

1220:   PetscLogFlops(16*a->nz - 8*m);
1221:   VecRestoreArray(xx,&x);
1222:   VecRestoreArray(yy,&y);
1223:   return(0);
1224: }

1228: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1229: {
1230:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1231:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1232:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1234:   PetscInt       m = b->AIJ->m,n,i,*idx;

1237:   VecSet(yy,zero);
1238:   VecGetArray(xx,&x);
1239:   VecGetArray(yy,&y);

1241:   for (i=0; i<m; i++) {
1242:     idx    = a->j + a->i[i] ;
1243:     v      = a->a + a->i[i] ;
1244:     n      = a->i[i+1] - a->i[i];
1245:     alpha1 = x[8*i];
1246:     alpha2 = x[8*i+1];
1247:     alpha3 = x[8*i+2];
1248:     alpha4 = x[8*i+3];
1249:     alpha5 = x[8*i+4];
1250:     alpha6 = x[8*i+5];
1251:     alpha7 = x[8*i+6];
1252:     alpha8 = x[8*i+7];
1253:     while (n-->0) {
1254:       y[8*(*idx)]   += alpha1*(*v);
1255:       y[8*(*idx)+1] += alpha2*(*v);
1256:       y[8*(*idx)+2] += alpha3*(*v);
1257:       y[8*(*idx)+3] += alpha4*(*v);
1258:       y[8*(*idx)+4] += alpha5*(*v);
1259:       y[8*(*idx)+5] += alpha6*(*v);
1260:       y[8*(*idx)+6] += alpha7*(*v);
1261:       y[8*(*idx)+7] += alpha8*(*v);
1262:       idx++; v++;
1263:     }
1264:   }
1265:   PetscLogFlops(16*a->nz - 8*b->AIJ->n);
1266:   VecRestoreArray(xx,&x);
1267:   VecRestoreArray(yy,&y);
1268:   return(0);
1269: }

1273: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1274: {
1275:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1276:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1277:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1279:   PetscInt       m = b->AIJ->m,*idx,*ii;
1280:   PetscInt       n,i,jrow,j;

1283:   if (yy != zz) {VecCopy(yy,zz);}
1284:   VecGetArray(xx,&x);
1285:   VecGetArray(zz,&y);
1286:   idx  = a->j;
1287:   v    = a->a;
1288:   ii   = a->i;

1290:   for (i=0; i<m; i++) {
1291:     jrow = ii[i];
1292:     n    = ii[i+1] - jrow;
1293:     sum1  = 0.0;
1294:     sum2  = 0.0;
1295:     sum3  = 0.0;
1296:     sum4  = 0.0;
1297:     sum5  = 0.0;
1298:     sum6  = 0.0;
1299:     sum7  = 0.0;
1300:     sum8  = 0.0;
1301:     for (j=0; j<n; j++) {
1302:       sum1 += v[jrow]*x[8*idx[jrow]];
1303:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1304:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1305:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1306:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1307:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1308:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1309:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1310:       jrow++;
1311:      }
1312:     y[8*i]   += sum1;
1313:     y[8*i+1] += sum2;
1314:     y[8*i+2] += sum3;
1315:     y[8*i+3] += sum4;
1316:     y[8*i+4] += sum5;
1317:     y[8*i+5] += sum6;
1318:     y[8*i+6] += sum7;
1319:     y[8*i+7] += sum8;
1320:   }

1322:   PetscLogFlops(16*a->nz);
1323:   VecRestoreArray(xx,&x);
1324:   VecRestoreArray(zz,&y);
1325:   return(0);
1326: }

1330: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1331: {
1332:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1333:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1334:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1336:   PetscInt       m = b->AIJ->m,n,i,*idx;

1339:   if (yy != zz) {VecCopy(yy,zz);}
1340:   VecGetArray(xx,&x);
1341:   VecGetArray(zz,&y);
1342:   for (i=0; i<m; i++) {
1343:     idx    = a->j + a->i[i] ;
1344:     v      = a->a + a->i[i] ;
1345:     n      = a->i[i+1] - a->i[i];
1346:     alpha1 = x[8*i];
1347:     alpha2 = x[8*i+1];
1348:     alpha3 = x[8*i+2];
1349:     alpha4 = x[8*i+3];
1350:     alpha5 = x[8*i+4];
1351:     alpha6 = x[8*i+5];
1352:     alpha7 = x[8*i+6];
1353:     alpha8 = x[8*i+7];
1354:     while (n-->0) {
1355:       y[8*(*idx)]   += alpha1*(*v);
1356:       y[8*(*idx)+1] += alpha2*(*v);
1357:       y[8*(*idx)+2] += alpha3*(*v);
1358:       y[8*(*idx)+3] += alpha4*(*v);
1359:       y[8*(*idx)+4] += alpha5*(*v);
1360:       y[8*(*idx)+5] += alpha6*(*v);
1361:       y[8*(*idx)+6] += alpha7*(*v);
1362:       y[8*(*idx)+7] += alpha8*(*v);
1363:       idx++; v++;
1364:     }
1365:   }
1366:   PetscLogFlops(16*a->nz);
1367:   VecRestoreArray(xx,&x);
1368:   VecRestoreArray(zz,&y);
1369:   return(0);
1370: }


1373: /* ------------------------------------------------------------------------------*/
1376: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1377: {
1378:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1379:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1380:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1382:   PetscInt       m = b->AIJ->m,*idx,*ii;
1383:   PetscInt       n,i,jrow,j;

1386:   VecGetArray(xx,&x);
1387:   VecGetArray(yy,&y);
1388:   idx  = a->j;
1389:   v    = a->a;
1390:   ii   = a->i;

1392:   for (i=0; i<m; i++) {
1393:     jrow = ii[i];
1394:     n    = ii[i+1] - jrow;
1395:     sum1  = 0.0;
1396:     sum2  = 0.0;
1397:     sum3  = 0.0;
1398:     sum4  = 0.0;
1399:     sum5  = 0.0;
1400:     sum6  = 0.0;
1401:     sum7  = 0.0;
1402:     sum8  = 0.0;
1403:     sum9  = 0.0;
1404:     for (j=0; j<n; j++) {
1405:       sum1 += v[jrow]*x[9*idx[jrow]];
1406:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1407:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1408:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1409:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1410:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1411:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1412:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1413:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1414:       jrow++;
1415:      }
1416:     y[9*i]   = sum1;
1417:     y[9*i+1] = sum2;
1418:     y[9*i+2] = sum3;
1419:     y[9*i+3] = sum4;
1420:     y[9*i+4] = sum5;
1421:     y[9*i+5] = sum6;
1422:     y[9*i+6] = sum7;
1423:     y[9*i+7] = sum8;
1424:     y[9*i+8] = sum9;
1425:   }

1427:   PetscLogFlops(18*a->nz - 9*m);
1428:   VecRestoreArray(xx,&x);
1429:   VecRestoreArray(yy,&y);
1430:   return(0);
1431: }

1435: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1436: {
1437:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1438:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1439:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1441:   PetscInt       m = b->AIJ->m,n,i,*idx;

1444:   VecSet(yy,zero);
1445:   VecGetArray(xx,&x);
1446:   VecGetArray(yy,&y);

1448:   for (i=0; i<m; i++) {
1449:     idx    = a->j + a->i[i] ;
1450:     v      = a->a + a->i[i] ;
1451:     n      = a->i[i+1] - a->i[i];
1452:     alpha1 = x[9*i];
1453:     alpha2 = x[9*i+1];
1454:     alpha3 = x[9*i+2];
1455:     alpha4 = x[9*i+3];
1456:     alpha5 = x[9*i+4];
1457:     alpha6 = x[9*i+5];
1458:     alpha7 = x[9*i+6];
1459:     alpha8 = x[9*i+7];
1460:     alpha9 = x[9*i+8];
1461:     while (n-->0) {
1462:       y[9*(*idx)]   += alpha1*(*v);
1463:       y[9*(*idx)+1] += alpha2*(*v);
1464:       y[9*(*idx)+2] += alpha3*(*v);
1465:       y[9*(*idx)+3] += alpha4*(*v);
1466:       y[9*(*idx)+4] += alpha5*(*v);
1467:       y[9*(*idx)+5] += alpha6*(*v);
1468:       y[9*(*idx)+6] += alpha7*(*v);
1469:       y[9*(*idx)+7] += alpha8*(*v);
1470:       y[9*(*idx)+8] += alpha9*(*v);
1471:       idx++; v++;
1472:     }
1473:   }
1474:   PetscLogFlops(18*a->nz - 9*b->AIJ->n);
1475:   VecRestoreArray(xx,&x);
1476:   VecRestoreArray(yy,&y);
1477:   return(0);
1478: }

1482: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1483: {
1484:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1485:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1486:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1488:   PetscInt       m = b->AIJ->m,*idx,*ii;
1489:   PetscInt       n,i,jrow,j;

1492:   if (yy != zz) {VecCopy(yy,zz);}
1493:   VecGetArray(xx,&x);
1494:   VecGetArray(zz,&y);
1495:   idx  = a->j;
1496:   v    = a->a;
1497:   ii   = a->i;

1499:   for (i=0; i<m; i++) {
1500:     jrow = ii[i];
1501:     n    = ii[i+1] - jrow;
1502:     sum1  = 0.0;
1503:     sum2  = 0.0;
1504:     sum3  = 0.0;
1505:     sum4  = 0.0;
1506:     sum5  = 0.0;
1507:     sum6  = 0.0;
1508:     sum7  = 0.0;
1509:     sum8  = 0.0;
1510:     sum9  = 0.0;
1511:     for (j=0; j<n; j++) {
1512:       sum1 += v[jrow]*x[9*idx[jrow]];
1513:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1514:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1515:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1516:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1517:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1518:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1519:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1520:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1521:       jrow++;
1522:      }
1523:     y[9*i]   += sum1;
1524:     y[9*i+1] += sum2;
1525:     y[9*i+2] += sum3;
1526:     y[9*i+3] += sum4;
1527:     y[9*i+4] += sum5;
1528:     y[9*i+5] += sum6;
1529:     y[9*i+6] += sum7;
1530:     y[9*i+7] += sum8;
1531:     y[9*i+8] += sum9;
1532:   }

1534:   PetscLogFlops(18*a->nz);
1535:   VecRestoreArray(xx,&x);
1536:   VecRestoreArray(zz,&y);
1537:   return(0);
1538: }

1542: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1543: {
1544:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1545:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1546:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1548:   PetscInt       m = b->AIJ->m,n,i,*idx;

1551:   if (yy != zz) {VecCopy(yy,zz);}
1552:   VecGetArray(xx,&x);
1553:   VecGetArray(zz,&y);
1554:   for (i=0; i<m; i++) {
1555:     idx    = a->j + a->i[i] ;
1556:     v      = a->a + a->i[i] ;
1557:     n      = a->i[i+1] - a->i[i];
1558:     alpha1 = x[9*i];
1559:     alpha2 = x[9*i+1];
1560:     alpha3 = x[9*i+2];
1561:     alpha4 = x[9*i+3];
1562:     alpha5 = x[9*i+4];
1563:     alpha6 = x[9*i+5];
1564:     alpha7 = x[9*i+6];
1565:     alpha8 = x[9*i+7];
1566:     alpha9 = x[9*i+8];
1567:     while (n-->0) {
1568:       y[9*(*idx)]   += alpha1*(*v);
1569:       y[9*(*idx)+1] += alpha2*(*v);
1570:       y[9*(*idx)+2] += alpha3*(*v);
1571:       y[9*(*idx)+3] += alpha4*(*v);
1572:       y[9*(*idx)+4] += alpha5*(*v);
1573:       y[9*(*idx)+5] += alpha6*(*v);
1574:       y[9*(*idx)+6] += alpha7*(*v);
1575:       y[9*(*idx)+7] += alpha8*(*v);
1576:       y[9*(*idx)+8] += alpha9*(*v);
1577:       idx++; v++;
1578:     }
1579:   }
1580:   PetscLogFlops(18*a->nz);
1581:   VecRestoreArray(xx,&x);
1582:   VecRestoreArray(zz,&y);
1583:   return(0);
1584: }

1586: /*--------------------------------------------------------------------------------------------*/
1589: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1590: {
1591:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1592:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1593:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1594:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1596:   PetscInt       m = b->AIJ->m,*idx,*ii;
1597:   PetscInt       n,i,jrow,j;

1600:   VecGetArray(xx,&x);
1601:   VecGetArray(yy,&y);
1602:   idx  = a->j;
1603:   v    = a->a;
1604:   ii   = a->i;

1606:   for (i=0; i<m; i++) {
1607:     jrow = ii[i];
1608:     n    = ii[i+1] - jrow;
1609:     sum1  = 0.0;
1610:     sum2  = 0.0;
1611:     sum3  = 0.0;
1612:     sum4  = 0.0;
1613:     sum5  = 0.0;
1614:     sum6  = 0.0;
1615:     sum7  = 0.0;
1616:     sum8  = 0.0;
1617:     sum9  = 0.0;
1618:     sum10 = 0.0;
1619:     sum11 = 0.0;
1620:     sum12 = 0.0;
1621:     sum13 = 0.0;
1622:     sum14 = 0.0;
1623:     sum15 = 0.0;
1624:     sum16 = 0.0;
1625:     for (j=0; j<n; j++) {
1626:       sum1  += v[jrow]*x[16*idx[jrow]];
1627:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1628:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1629:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1630:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1631:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1632:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1633:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1634:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1635:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1636:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1637:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1638:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1639:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1640:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1641:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1642:       jrow++;
1643:      }
1644:     y[16*i]    = sum1;
1645:     y[16*i+1]  = sum2;
1646:     y[16*i+2]  = sum3;
1647:     y[16*i+3]  = sum4;
1648:     y[16*i+4]  = sum5;
1649:     y[16*i+5]  = sum6;
1650:     y[16*i+6]  = sum7;
1651:     y[16*i+7]  = sum8;
1652:     y[16*i+8]  = sum9;
1653:     y[16*i+9]  = sum10;
1654:     y[16*i+10] = sum11;
1655:     y[16*i+11] = sum12;
1656:     y[16*i+12] = sum13;
1657:     y[16*i+13] = sum14;
1658:     y[16*i+14] = sum15;
1659:     y[16*i+15] = sum16;
1660:   }

1662:   PetscLogFlops(32*a->nz - 16*m);
1663:   VecRestoreArray(xx,&x);
1664:   VecRestoreArray(yy,&y);
1665:   return(0);
1666: }

1670: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1671: {
1672:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1673:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1674:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1675:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1677:   PetscInt       m = b->AIJ->m,n,i,*idx;

1680:   VecSet(yy,zero);
1681:   VecGetArray(xx,&x);
1682:   VecGetArray(yy,&y);

1684:   for (i=0; i<m; i++) {
1685:     idx    = a->j + a->i[i] ;
1686:     v      = a->a + a->i[i] ;
1687:     n      = a->i[i+1] - a->i[i];
1688:     alpha1  = x[16*i];
1689:     alpha2  = x[16*i+1];
1690:     alpha3  = x[16*i+2];
1691:     alpha4  = x[16*i+3];
1692:     alpha5  = x[16*i+4];
1693:     alpha6  = x[16*i+5];
1694:     alpha7  = x[16*i+6];
1695:     alpha8  = x[16*i+7];
1696:     alpha9  = x[16*i+8];
1697:     alpha10 = x[16*i+9];
1698:     alpha11 = x[16*i+10];
1699:     alpha12 = x[16*i+11];
1700:     alpha13 = x[16*i+12];
1701:     alpha14 = x[16*i+13];
1702:     alpha15 = x[16*i+14];
1703:     alpha16 = x[16*i+15];
1704:     while (n-->0) {
1705:       y[16*(*idx)]    += alpha1*(*v);
1706:       y[16*(*idx)+1]  += alpha2*(*v);
1707:       y[16*(*idx)+2]  += alpha3*(*v);
1708:       y[16*(*idx)+3]  += alpha4*(*v);
1709:       y[16*(*idx)+4]  += alpha5*(*v);
1710:       y[16*(*idx)+5]  += alpha6*(*v);
1711:       y[16*(*idx)+6]  += alpha7*(*v);
1712:       y[16*(*idx)+7]  += alpha8*(*v);
1713:       y[16*(*idx)+8]  += alpha9*(*v);
1714:       y[16*(*idx)+9]  += alpha10*(*v);
1715:       y[16*(*idx)+10] += alpha11*(*v);
1716:       y[16*(*idx)+11] += alpha12*(*v);
1717:       y[16*(*idx)+12] += alpha13*(*v);
1718:       y[16*(*idx)+13] += alpha14*(*v);
1719:       y[16*(*idx)+14] += alpha15*(*v);
1720:       y[16*(*idx)+15] += alpha16*(*v);
1721:       idx++; v++;
1722:     }
1723:   }
1724:   PetscLogFlops(32*a->nz - 16*b->AIJ->n);
1725:   VecRestoreArray(xx,&x);
1726:   VecRestoreArray(yy,&y);
1727:   return(0);
1728: }

1732: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1733: {
1734:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1735:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1736:   PetscScalar    *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1737:   PetscScalar    sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1739:   PetscInt       m = b->AIJ->m,*idx,*ii;
1740:   PetscInt       n,i,jrow,j;

1743:   if (yy != zz) {VecCopy(yy,zz);}
1744:   VecGetArray(xx,&x);
1745:   VecGetArray(zz,&y);
1746:   idx  = a->j;
1747:   v    = a->a;
1748:   ii   = a->i;

1750:   for (i=0; i<m; i++) {
1751:     jrow = ii[i];
1752:     n    = ii[i+1] - jrow;
1753:     sum1  = 0.0;
1754:     sum2  = 0.0;
1755:     sum3  = 0.0;
1756:     sum4  = 0.0;
1757:     sum5  = 0.0;
1758:     sum6  = 0.0;
1759:     sum7  = 0.0;
1760:     sum8  = 0.0;
1761:     sum9  = 0.0;
1762:     sum10 = 0.0;
1763:     sum11 = 0.0;
1764:     sum12 = 0.0;
1765:     sum13 = 0.0;
1766:     sum14 = 0.0;
1767:     sum15 = 0.0;
1768:     sum16 = 0.0;
1769:     for (j=0; j<n; j++) {
1770:       sum1  += v[jrow]*x[16*idx[jrow]];
1771:       sum2  += v[jrow]*x[16*idx[jrow]+1];
1772:       sum3  += v[jrow]*x[16*idx[jrow]+2];
1773:       sum4  += v[jrow]*x[16*idx[jrow]+3];
1774:       sum5  += v[jrow]*x[16*idx[jrow]+4];
1775:       sum6  += v[jrow]*x[16*idx[jrow]+5];
1776:       sum7  += v[jrow]*x[16*idx[jrow]+6];
1777:       sum8  += v[jrow]*x[16*idx[jrow]+7];
1778:       sum9  += v[jrow]*x[16*idx[jrow]+8];
1779:       sum10 += v[jrow]*x[16*idx[jrow]+9];
1780:       sum11 += v[jrow]*x[16*idx[jrow]+10];
1781:       sum12 += v[jrow]*x[16*idx[jrow]+11];
1782:       sum13 += v[jrow]*x[16*idx[jrow]+12];
1783:       sum14 += v[jrow]*x[16*idx[jrow]+13];
1784:       sum15 += v[jrow]*x[16*idx[jrow]+14];
1785:       sum16 += v[jrow]*x[16*idx[jrow]+15];
1786:       jrow++;
1787:      }
1788:     y[16*i]    += sum1;
1789:     y[16*i+1]  += sum2;
1790:     y[16*i+2]  += sum3;
1791:     y[16*i+3]  += sum4;
1792:     y[16*i+4]  += sum5;
1793:     y[16*i+5]  += sum6;
1794:     y[16*i+6]  += sum7;
1795:     y[16*i+7]  += sum8;
1796:     y[16*i+8]  += sum9;
1797:     y[16*i+9]  += sum10;
1798:     y[16*i+10] += sum11;
1799:     y[16*i+11] += sum12;
1800:     y[16*i+12] += sum13;
1801:     y[16*i+13] += sum14;
1802:     y[16*i+14] += sum15;
1803:     y[16*i+15] += sum16;
1804:   }

1806:   PetscLogFlops(32*a->nz);
1807:   VecRestoreArray(xx,&x);
1808:   VecRestoreArray(zz,&y);
1809:   return(0);
1810: }

1814: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1815: {
1816:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;
1817:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)b->AIJ->data;
1818:   PetscScalar    *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1819:   PetscScalar    alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1821:   PetscInt       m = b->AIJ->m,n,i,*idx;

1824:   if (yy != zz) {VecCopy(yy,zz);}
1825:   VecGetArray(xx,&x);
1826:   VecGetArray(zz,&y);
1827:   for (i=0; i<m; i++) {
1828:     idx    = a->j + a->i[i] ;
1829:     v      = a->a + a->i[i] ;
1830:     n      = a->i[i+1] - a->i[i];
1831:     alpha1 = x[16*i];
1832:     alpha2 = x[16*i+1];
1833:     alpha3 = x[16*i+2];
1834:     alpha4 = x[16*i+3];
1835:     alpha5 = x[16*i+4];
1836:     alpha6 = x[16*i+5];
1837:     alpha7 = x[16*i+6];
1838:     alpha8 = x[16*i+7];
1839:     alpha9  = x[16*i+8];
1840:     alpha10 = x[16*i+9];
1841:     alpha11 = x[16*i+10];
1842:     alpha12 = x[16*i+11];
1843:     alpha13 = x[16*i+12];
1844:     alpha14 = x[16*i+13];
1845:     alpha15 = x[16*i+14];
1846:     alpha16 = x[16*i+15];
1847:     while (n-->0) {
1848:       y[16*(*idx)]   += alpha1*(*v);
1849:       y[16*(*idx)+1] += alpha2*(*v);
1850:       y[16*(*idx)+2] += alpha3*(*v);
1851:       y[16*(*idx)+3] += alpha4*(*v);
1852:       y[16*(*idx)+4] += alpha5*(*v);
1853:       y[16*(*idx)+5] += alpha6*(*v);
1854:       y[16*(*idx)+6] += alpha7*(*v);
1855:       y[16*(*idx)+7] += alpha8*(*v);
1856:       y[16*(*idx)+8]  += alpha9*(*v);
1857:       y[16*(*idx)+9]  += alpha10*(*v);
1858:       y[16*(*idx)+10] += alpha11*(*v);
1859:       y[16*(*idx)+11] += alpha12*(*v);
1860:       y[16*(*idx)+12] += alpha13*(*v);
1861:       y[16*(*idx)+13] += alpha14*(*v);
1862:       y[16*(*idx)+14] += alpha15*(*v);
1863:       y[16*(*idx)+15] += alpha16*(*v);
1864:       idx++; v++;
1865:     }
1866:   }
1867:   PetscLogFlops(32*a->nz);
1868:   VecRestoreArray(xx,&x);
1869:   VecRestoreArray(zz,&y);
1870:   return(0);
1871: }

1873: /*===================================================================================*/
1876: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1877: {
1878:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1882:   /* start the scatter */
1883:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1884:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
1885:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1886:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
1887:   return(0);
1888: }

1892: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
1893: {
1894:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1898:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1899:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
1900:   VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1901:   VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1902:   return(0);
1903: }

1907: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1908: {
1909:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1913:   /* start the scatter */
1914:   VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1915:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
1916:   VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
1917:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
1918:   return(0);
1919: }

1923: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
1924: {
1925:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

1929:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
1930:   VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1931:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
1932:   VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
1933:   return(0);
1934: }

1938: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
1939: {
1940:   /* This routine requires testing -- but it's getting better. */
1942:   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1943:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
1944:   Mat            P=pp->AIJ;
1945:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
1946:   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
1947:   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
1948:   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof,cn;
1949:   PetscInt       i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
1950:   MatScalar      *ca;

1953:   /* Start timer */
1954:   PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);

1956:   /* Get ij structure of P^T */
1957:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

1959:   cn = pn*ppdof;
1960:   /* Allocate ci array, arrays for fill computation and */
1961:   /* free space for accumulating nonzero column info */
1962:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
1963:   ci[0] = 0;

1965:   /* Work arrays for rows of P^T*A */
1966:   PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
1967:   PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
1968:   ptasparserow = ptadenserow  + an;
1969:   denserow     = ptasparserow + an;
1970:   sparserow    = denserow     + cn;

1972:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
1973:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1974:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
1975:   GetMoreSpace((ai[am]/pm)*pn,&free_space);
1976:   current_space = free_space;

1978:   /* Determine symbolic info for each row of C: */
1979:   for (i=0;i<pn;i++) {
1980:     ptnzi  = pti[i+1] - pti[i];
1981:     ptJ    = ptj + pti[i];
1982:     for (dof=0;dof<ppdof;dof++) {
1983:       ptanzi = 0;
1984:       /* Determine symbolic row of PtA: */
1985:       for (j=0;j<ptnzi;j++) {
1986:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
1987:         arow = ptJ[j]*ppdof + dof;
1988:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
1989:         anzj = ai[arow+1] - ai[arow];
1990:         ajj  = aj + ai[arow];
1991:         for (k=0;k<anzj;k++) {
1992:           if (!ptadenserow[ajj[k]]) {
1993:             ptadenserow[ajj[k]]    = -1;
1994:             ptasparserow[ptanzi++] = ajj[k];
1995:           }
1996:         }
1997:       }
1998:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
1999:       ptaj = ptasparserow;
2000:       cnzi   = 0;
2001:       for (j=0;j<ptanzi;j++) {
2002:         /* Get offset within block of P */
2003:         pshift = *ptaj%ppdof;
2004:         /* Get block row of P */
2005:         prow = (*ptaj++)/ppdof; /* integer division */
2006:         /* P has same number of nonzeros per row as the compressed form */
2007:         pnzj = pi[prow+1] - pi[prow];
2008:         pjj  = pj + pi[prow];
2009:         for (k=0;k<pnzj;k++) {
2010:           /* Locations in C are shifted by the offset within the block */
2011:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2012:           if (!denserow[pjj[k]*ppdof+pshift]) {
2013:             denserow[pjj[k]*ppdof+pshift] = -1;
2014:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2015:           }
2016:         }
2017:       }

2019:       /* sort sparserow */
2020:       PetscSortInt(cnzi,sparserow);
2021: 
2022:       /* If free space is not available, make more free space */
2023:       /* Double the amount of total space in the list */
2024:       if (current_space->local_remaining<cnzi) {
2025:         GetMoreSpace(current_space->total_array_size,&current_space);
2026:       }

2028:       /* Copy data into free space, and zero out denserows */
2029:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2030:       current_space->array           += cnzi;
2031:       current_space->local_used      += cnzi;
2032:       current_space->local_remaining -= cnzi;

2034:       for (j=0;j<ptanzi;j++) {
2035:         ptadenserow[ptasparserow[j]] = 0;
2036:       }
2037:       for (j=0;j<cnzi;j++) {
2038:         denserow[sparserow[j]] = 0;
2039:       }
2040:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2041:       /*        For now, we will recompute what is needed. */
2042:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2043:     }
2044:   }
2045:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2046:   /* Allocate space for cj, initialize cj, and */
2047:   /* destroy list of free space and other temporary array(s) */
2048:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2049:   MakeSpaceContiguous(&free_space,cj);
2050:   PetscFree(ptadenserow);
2051: 
2052:   /* Allocate space for ca */
2053:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2054:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2055: 
2056:   /* put together the new matrix */
2057:   MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);

2059:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2060:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2061:   c = (Mat_SeqAIJ *)((*C)->data);
2062:   c->freedata = PETSC_TRUE;
2063:   c->nonew    = 0;

2065:   /* Clean up. */
2066:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2068:   PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2069:   return(0);
2070: }

2074: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2075: {
2076:   /* This routine requires testing -- first draft only */
2078:   PetscInt       flops=0;
2079:   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
2080:   Mat            P=pp->AIJ;
2081:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
2082:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
2083:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
2084:   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2085:   PetscInt       *ci=c->i,*cj=c->j,*cjj;
2086:   PetscInt       am=A->M,cn=C->N,cm=C->M,ppdof=pp->dof;
2087:   PetscInt       i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2088:   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;

2091:   /* Allocate temporary array for storage of one row of A*P */
2092:   PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2093:   PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));

2095:   apj      = (PetscInt *)(apa + cn);
2096:   apjdense = apj + cn;

2098:   /* Clear old values in C */
2099:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

2101:   for (i=0;i<am;i++) {
2102:     /* Form sparse row of A*P */
2103:     anzi  = ai[i+1] - ai[i];
2104:     apnzj = 0;
2105:     for (j=0;j<anzi;j++) {
2106:       /* Get offset within block of P */
2107:       pshift = *aj%ppdof;
2108:       /* Get block row of P */
2109:       prow   = *aj++/ppdof; /* integer division */
2110:       pnzj = pi[prow+1] - pi[prow];
2111:       pjj  = pj + pi[prow];
2112:       paj  = pa + pi[prow];
2113:       for (k=0;k<pnzj;k++) {
2114:         poffset = pjj[k]*ppdof+pshift;
2115:         if (!apjdense[poffset]) {
2116:           apjdense[poffset] = -1;
2117:           apj[apnzj++]      = poffset;
2118:         }
2119:         apa[poffset] += (*aa)*paj[k];
2120:       }
2121:       flops += 2*pnzj;
2122:       aa++;
2123:     }

2125:     /* Sort the j index array for quick sparse axpy. */
2126:     /* Note: a array does not need sorting as it is in dense storage locations. */
2127:     PetscSortInt(apnzj,apj);

2129:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2130:     prow    = i/ppdof; /* integer division */
2131:     pshift  = i%ppdof;
2132:     poffset = pi[prow];
2133:     pnzi = pi[prow+1] - poffset;
2134:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2135:     pJ   = pj+poffset;
2136:     pA   = pa+poffset;
2137:     for (j=0;j<pnzi;j++) {
2138:       crow   = (*pJ)*ppdof+pshift;
2139:       cjj    = cj + ci[crow];
2140:       caj    = ca + ci[crow];
2141:       pJ++;
2142:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2143:       for (k=0,nextap=0;nextap<apnzj;k++) {
2144:         if (cjj[k]==apj[nextap]) {
2145:           caj[k] += (*pA)*apa[apj[nextap++]];
2146:         }
2147:       }
2148:       flops += 2*apnzj;
2149:       pA++;
2150:     }

2152:     /* Zero the current row info for A*P */
2153:     for (j=0;j<apnzj;j++) {
2154:       apa[apj[j]]      = 0.;
2155:       apjdense[apj[j]] = 0;
2156:     }
2157:   }

2159:   /* Assemble the final matrix and clean up */
2160:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2161:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2162:   PetscFree(apa);
2163:   PetscLogFlops(flops);

2165:   return(0);
2166: }

2171: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2172: {
2173:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2174:   Mat               a = b->AIJ,B;
2175:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
2176:   PetscErrorCode    ierr;
2177:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2178:   PetscInt          *cols;
2179:   PetscScalar       *vals;

2182:   MatGetSize(a,&m,&n);
2183:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2184:   for (i=0; i<m; i++) {
2185:     nmax = PetscMax(nmax,aij->ilen[i]);
2186:     for (j=0; j<dof; j++) {
2187:       ilen[dof*i+j] = aij->ilen[i];
2188:     }
2189:   }
2190:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2191:   MatSetOption(B,MAT_COLUMNS_SORTED);
2192:   PetscFree(ilen);
2193:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
2194:   ii   = 0;
2195:   for (i=0; i<m; i++) {
2196:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2197:     for (j=0; j<dof; j++) {
2198:       for (k=0; k<ncols; k++) {
2199:         icols[k] = dof*cols[k]+j;
2200:       }
2201:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2202:       ii++;
2203:     }
2204:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2205:   }
2206:   PetscFree(icols);
2207:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2208:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2210:   if (reuse == MAT_REUSE_MATRIX) {
2211:     MatHeaderReplace(A,B);
2212:   } else {
2213:     *newmat = B;
2214:   }
2215:   return(0);
2216: }

2219:  #include src/mat/impls/aij/mpi/mpiaij.h

2224: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2225: {
2226:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
2227:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2228:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2229:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2230:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2231:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2232:   PetscInt          dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
2233:   PetscInt          rstart,cstart,*garray,ii,k;
2234:   PetscErrorCode    ierr;
2235:   PetscScalar       *vals,*ovals;

2238:   PetscMalloc2(A->m,PetscInt,&dnz,A->m,PetscInt,&onz);
2239:   for (i=0; i<A->m/dof; i++) {
2240:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
2241:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
2242:     for (j=0; j<dof; j++) {
2243:       dnz[dof*i+j] = AIJ->ilen[i];
2244:       onz[dof*i+j] = OAIJ->ilen[i];
2245:     }
2246:   }
2247:   MatCreateMPIAIJ(A->comm,A->m,A->n,A->M,A->N,0,dnz,0,onz,&B);
2248:   MatSetOption(B,MAT_COLUMNS_SORTED);
2249:   PetscFree2(dnz,onz);

2251:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2252:   rstart = dof*mpiaij->rstart;
2253:   cstart = dof*mpiaij->cstart;
2254:   garray = mpiaij->garray;

2256:   ii = rstart;
2257:   for (i=0; i<A->m/dof; i++) {
2258:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2259:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2260:     for (j=0; j<dof; j++) {
2261:       for (k=0; k<ncols; k++) {
2262:         icols[k] = cstart + dof*cols[k]+j;
2263:       }
2264:       for (k=0; k<oncols; k++) {
2265:         oicols[k] = dof*garray[ocols[k]]+j;
2266:       }
2267:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2268:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2269:       ii++;
2270:     }
2271:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2272:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2273:   }
2274:   PetscFree2(icols,oicols);

2276:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2277:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2279:   if (reuse == MAT_REUSE_MATRIX) {
2280:     MatHeaderReplace(A,B);
2281:   } else {
2282:     *newmat = B;
2283:   }
2284:   return(0);
2285: }


2289: /* ---------------------------------------------------------------------------------- */
2290: /*MC
2291:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 
2292:   operations for multicomponent problems.  It interpolates each component the same
2293:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
2294:   and MATMPIAIJ for distributed matrices.

2296:   Operations provided:
2297: + MatMult
2298: . MatMultTranspose
2299: . MatMultAdd
2300: . MatMultTransposeAdd
2301: - MatView

2303:   Level: advanced

2305: M*/
2308: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2309: {
2311:   PetscMPIInt    size;
2312:   PetscInt       n;
2313:   Mat_MPIMAIJ    *b;
2314:   Mat            B;

2317:   PetscObjectReference((PetscObject)A);

2319:   if (dof == 1) {
2320:     *maij = A;
2321:   } else {
2322:     MatCreate(A->comm,&B);
2323:     MatSetSizes(B,dof*A->m,dof*A->n,dof*A->M,dof*A->N);
2324:     B->assembled    = PETSC_TRUE;

2326:     MPI_Comm_size(A->comm,&size);
2327:     if (size == 1) {
2328:       MatSetType(B,MATSEQMAIJ);
2329:       B->ops->destroy = MatDestroy_SeqMAIJ;
2330:       B->ops->view    = MatView_SeqMAIJ;
2331:       b      = (Mat_MPIMAIJ*)B->data;
2332:       b->dof = dof;
2333:       b->AIJ = A;
2334:       if (dof == 2) {
2335:         B->ops->mult             = MatMult_SeqMAIJ_2;
2336:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
2337:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
2338:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2339:       } else if (dof == 3) {
2340:         B->ops->mult             = MatMult_SeqMAIJ_3;
2341:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
2342:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
2343:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2344:       } else if (dof == 4) {
2345:         B->ops->mult             = MatMult_SeqMAIJ_4;
2346:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
2347:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
2348:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2349:       } else if (dof == 5) {
2350:         B->ops->mult             = MatMult_SeqMAIJ_5;
2351:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
2352:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
2353:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2354:       } else if (dof == 6) {
2355:         B->ops->mult             = MatMult_SeqMAIJ_6;
2356:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
2357:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
2358:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2359:       } else if (dof == 7) {
2360:         B->ops->mult             = MatMult_SeqMAIJ_7;
2361:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
2362:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
2363:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2364:       } else if (dof == 8) {
2365:         B->ops->mult             = MatMult_SeqMAIJ_8;
2366:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
2367:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
2368:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2369:       } else if (dof == 9) {
2370:         B->ops->mult             = MatMult_SeqMAIJ_9;
2371:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
2372:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
2373:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2374:       } else if (dof == 16) {
2375:         B->ops->mult             = MatMult_SeqMAIJ_16;
2376:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
2377:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
2378:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2379:       } else {
2380:         SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2381:       }
2382:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2383:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2384:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2385:     } else {
2386:       Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2387:       IS         from,to;
2388:       Vec        gvec;
2389:       PetscInt   *garray,i;

2391:       MatSetType(B,MATMPIMAIJ);
2392:       B->ops->destroy = MatDestroy_MPIMAIJ;
2393:       B->ops->view    = MatView_MPIMAIJ;
2394:       b      = (Mat_MPIMAIJ*)B->data;
2395:       b->dof = dof;
2396:       b->A   = A;
2397:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2398:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

2400:       VecGetSize(mpiaij->lvec,&n);
2401:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2402:       VecSetBlockSize(b->w,dof);

2404:       /* create two temporary Index sets for build scatter gather */
2405:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2406:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2407:       ISCreateBlock(A->comm,dof,n,garray,&from);
2408:       PetscFree(garray);
2409:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

2411:       /* create temporary global vector to generate scatter context */
2412:       VecCreateMPI(A->comm,dof*A->n,dof*A->N,&gvec);
2413:       VecSetBlockSize(gvec,dof);

2415:       /* generate the scatter context */
2416:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

2418:       ISDestroy(from);
2419:       ISDestroy(to);
2420:       VecDestroy(gvec);

2422:       B->ops->mult             = MatMult_MPIMAIJ_dof;
2423:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
2424:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
2425:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2426:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2427:     }
2428:     *maij = B;
2429:     MatView_Private(B);
2430:   }
2431:   return(0);
2432: }