Actual source code: dvec2.c

  2: /*$Id: dvec2.c,v 1.91 2001/09/11 18:14:31 bsmith Exp $*/

  4: /* 
  5:    Defines some vector operation functions that are shared by 
  6:   sequential and parallel vectors.
  7: */
 8:  #include src/vec/impls/dvecimpl.h
 9:  #include src/inline/dot.h
 10:  #include src/inline/setval.h
 11:  #include src/inline/axpy.h

 13: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 14: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
 15: {
 16:   Vec_Seq     *xv = (Vec_Seq *)xin->data;
 17:   int         i,nv_rem,n = xin->n,ierr;
 18:   PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,*x;
 19:   Vec         *yy;

 22:   sum0 = 0;
 23:   sum1 = 0;
 24:   sum2 = 0;

 26:   i      = nv;
 27:   nv_rem = nv&0x3;
 28:   yy     = (Vec*)yin;
 29:   x      = xv->array;

 31:   switch (nv_rem) {
 32:   case 3:
 33:     VecGetArrayFast(yy[0],&yy0);
 34:     VecGetArrayFast(yy[1],&yy1);
 35:     VecGetArrayFast(yy[2],&yy2);
 36:     fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
 37:     VecRestoreArrayFast(yy[0],&yy0);
 38:     VecRestoreArrayFast(yy[1],&yy1);
 39:     VecRestoreArrayFast(yy[2],&yy2);
 40:     z[0] = sum0;
 41:     z[1] = sum1;
 42:     z[2] = sum2;
 43:     break;
 44:   case 2:
 45:     VecGetArrayFast(yy[0],&yy0);
 46:     VecGetArrayFast(yy[1],&yy1);
 47:     fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
 48:     VecRestoreArrayFast(yy[0],&yy0);
 49:     VecRestoreArrayFast(yy[1],&yy1);
 50:     z[0] = sum0;
 51:     z[1] = sum1;
 52:     break;
 53:   case 1:
 54:     VecGetArrayFast(yy[0],&yy0);
 55:     fortranmdot1_(x,yy0,&n,&sum0);
 56:     VecRestoreArrayFast(yy[0],&yy0);
 57:     z[0] = sum0;
 58:     break;
 59:   case 0:
 60:     break;
 61:   }
 62:   z  += nv_rem;
 63:   i  -= nv_rem;
 64:   yy += nv_rem;

 66:   while (i >0) {
 67:     sum0 = 0;
 68:     sum1 = 0;
 69:     sum2 = 0;
 70:     sum3 = 0;
 71:     VecGetArrayFast(yy[0],&yy0);
 72:     VecGetArrayFast(yy[1],&yy1);
 73:     VecGetArrayFast(yy[2],&yy2);
 74:     VecGetArrayFast(yy[3],&yy3);
 75:     fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
 76:     VecRestoreArrayFast(yy[0],&yy0);
 77:     VecRestoreArrayFast(yy[1],&yy1);
 78:     VecRestoreArrayFast(yy[2],&yy2);
 79:     VecRestoreArrayFast(yy[3],&yy3);
 80:     yy  += 4;
 81:     z[0] = sum0;
 82:     z[1] = sum1;
 83:     z[2] = sum2;
 84:     z[3] = sum3;
 85:     z   += 4;
 86:     i   -= 4;
 87:   }
 88:   PetscLogFlops(nv*(2*xin->n-1));
 89:   return(0);
 90: }

 92: #else
 93: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar * restrict z)
 94: {
 95:   Vec_Seq     *xv = (Vec_Seq *)xin->data;
 96:   int          n = xin->n,i,j,nv_rem,j_rem,ierr;
 97:   PetscScalar  sum0,sum1,sum2,sum3,x0,x1,x2,x3,* restrict x;
 98:   PetscScalar  * restrict yy0,* restrict yy1,* restrict yy2,*restrict yy3;
 99:   Vec          *yy;

102:   sum0 = 0;
103:   sum1 = 0;
104:   sum2 = 0;

106:   i      = nv;
107:   nv_rem = nv&0x3;
108:   yy     = (Vec *)yin;
109:   j      = n;
110:   x      = xv->array;

112:   switch (nv_rem) {
113:   case 3:
114:     VecGetArrayFast(yy[0],&yy0);
115:     VecGetArrayFast(yy[1],&yy1);
116:     VecGetArrayFast(yy[2],&yy2);
117:     switch (j_rem=j&0x3) {
118:     case 3:
119:       x2 = x[2];
120:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
121:       sum2 += x2*PetscConj(yy2[2]);
122:     case 2:
123:       x1 = x[1];
124:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
125:       sum2 += x1*PetscConj(yy2[1]);
126:     case 1:
127:       x0 = x[0];
128:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
129:       sum2 += x0*PetscConj(yy2[0]);
130:     case 0:
131:       x   += j_rem;
132:       yy0 += j_rem;
133:       yy1 += j_rem;
134:       yy2 += j_rem;
135:       j   -= j_rem;
136:       break;
137:     }
138:     while (j>0) {
139:       x0 = x[0];
140:       x1 = x[1];
141:       x2 = x[2];
142:       x3 = x[3];
143:       x += 4;
144: 
145:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
146:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
147:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
148:       j -= 4;
149:     }
150:     z[0] = sum0;
151:     z[1] = sum1;
152:     z[2] = sum2;
153:     VecRestoreArrayFast(yy[0],&yy0);
154:     VecRestoreArrayFast(yy[1],&yy1);
155:     VecRestoreArrayFast(yy[2],&yy2);
156:     break;
157:   case 2:
158:     VecGetArrayFast(yy[0],&yy0);
159:     VecGetArrayFast(yy[1],&yy1);
160:     switch (j_rem=j&0x3) {
161:     case 3:
162:       x2 = x[2];
163:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
164:     case 2:
165:       x1 = x[1];
166:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
167:     case 1:
168:       x0 = x[0];
169:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
170:     case 0:
171:       x   += j_rem;
172:       yy0 += j_rem;
173:       yy1 += j_rem;
174:       j   -= j_rem;
175:       break;
176:     }
177:     while (j>0) {
178:       x0 = x[0];
179:       x1 = x[1];
180:       x2 = x[2];
181:       x3 = x[3];
182:       x += 4;
183: 
184:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
185:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
186:       j -= 4;
187:     }
188:     z[0] = sum0;
189:     z[1] = sum1;
190: 
191:     VecRestoreArrayFast(yy[0],&yy0);
192:     VecRestoreArrayFast(yy[1],&yy1);
193:     break;
194:   case 1:
195:     VecGetArrayFast(yy[0],&yy0);
196:     switch (j_rem=j&0x3) {
197:     case 3:
198:       x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
199:     case 2:
200:       x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
201:     case 1:
202:       x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
203:     case 0:
204:       x   += j_rem;
205:       yy0 += j_rem;
206:       j   -= j_rem;
207:       break;
208:     }
209:     while (j>0) {
210:       sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
211:             + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
212:       yy0+=4;
213:       j -= 4; x+=4;
214:     }
215:     z[0] = sum0;

217:     VecRestoreArrayFast(yy[0],&yy0);
218:     break;
219:   case 0:
220:     break;
221:   }
222:   z  += nv_rem;
223:   i  -= nv_rem;
224:   yy += nv_rem;

226:   while (i >0) {
227:     sum0 = 0;
228:     sum1 = 0;
229:     sum2 = 0;
230:     sum3 = 0;
231:     VecGetArrayFast(yy[0],&yy0);
232:     VecGetArrayFast(yy[1],&yy1);
233:     VecGetArrayFast(yy[2],&yy2);
234:     VecGetArrayFast(yy[3],&yy3);

236:     j = n;
237:     x = xv->array;
238:     switch (j_rem=j&0x3) {
239:     case 3:
240:       x2 = x[2];
241:       sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
242:       sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
243:     case 2:
244:       x1 = x[1];
245:       sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
246:       sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
247:     case 1:
248:       x0 = x[0];
249:       sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
250:       sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
251:     case 0:
252:       x   += j_rem;
253:       yy0 += j_rem;
254:       yy1 += j_rem;
255:       yy2 += j_rem;
256:       yy3 += j_rem;
257:       j   -= j_rem;
258:       break;
259:     }
260:     while (j>0) {
261:       x0 = x[0];
262:       x1 = x[1];
263:       x2 = x[2];
264:       x3 = x[3];
265:       x += 4;
266: 
267:       sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
268:       sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
269:       sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
270:       sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
271:       j -= 4;
272:     }
273:     z[0] = sum0;
274:     z[1] = sum1;
275:     z[2] = sum2;
276:     z[3] = sum3;
277:     z   += 4;
278:     i   -= 4;
279:     VecRestoreArrayFast(yy[0],&yy0);
280:     VecRestoreArrayFast(yy[1],&yy1);
281:     VecRestoreArrayFast(yy[2],&yy2);
282:     VecRestoreArrayFast(yy[3],&yy3);
283:     yy  += 4;
284:   }
285:   PetscLogFlops(nv*(2*xin->n-1));
286:   return(0);
287: }
288: #endif

290: /* ----------------------------------------------------------------------------*/
291: int VecMTDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
292: {
293:   Vec_Seq      *xv = (Vec_Seq *)xin->data;
294:   int          n = xin->n,i,j,nv_rem,j_rem,ierr;
295:   PetscScalar  sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
296:   Vec          *yy;
297: 

300:   sum0 = 0;
301:   sum1 = 0;
302:   sum2 = 0;

304:   i      = nv;
305:   nv_rem = nv&0x3;
306:   yy     = (Vec*)yin;
307:   j    = n;
308:   x    = xv->array;

310:   switch (nv_rem) {
311:   case 3:
312:     VecGetArrayFast(yy[0],&yy0);
313:     VecGetArrayFast(yy[1],&yy1);
314:     VecGetArrayFast(yy[2],&yy2);
315:     switch (j_rem=j&0x3) {
316:     case 3:
317:       x2 = x[2];
318:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
319:       sum2 += x2*yy2[2];
320:     case 2:
321:       x1 = x[1];
322:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
323:       sum2 += x1*yy2[1];
324:     case 1:
325:       x0 = x[0];
326:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
327:       sum2 += x0*yy2[0];
328:     case 0:
329:       x  += j_rem;
330:       yy0 += j_rem;
331:       yy1 += j_rem;
332:       yy2 += j_rem;
333:       j  -= j_rem;
334:       break;
335:     }
336:     while (j>0) {
337:       x0 = x[0];
338:       x1 = x[1];
339:       x2 = x[2];
340:       x3 = x[3];
341:       x += 4;
342: 
343:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
344:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
345:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
346:       j -= 4;
347:     }
348:     z[0] = sum0;
349:     z[1] = sum1;
350:     z[2] = sum2;
351:     VecRestoreArrayFast(yy[0],&yy0);
352:     VecRestoreArrayFast(yy[1],&yy1);
353:     VecRestoreArrayFast(yy[2],&yy2);
354:     break;
355:   case 2:
356:     VecGetArrayFast(yy[0],&yy0);
357:     VecGetArrayFast(yy[1],&yy1);
358:     switch (j_rem=j&0x3) {
359:     case 3:
360:       x2 = x[2];
361:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
362:     case 2:
363:       x1 = x[1];
364:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
365:     case 1:
366:       x0 = x[0];
367:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
368:     case 0:
369:       x  += j_rem;
370:       yy0 += j_rem;
371:       yy1 += j_rem;
372:       j  -= j_rem;
373:       break;
374:     }
375:     while (j>0) {
376:       x0 = x[0];
377:       x1 = x[1];
378:       x2 = x[2];
379:       x3 = x[3];
380:       x += 4;
381: 
382:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
383:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
384:       j -= 4;
385:     }
386:     z[0] = sum0;
387:     z[1] = sum1;
388: 
389:     VecRestoreArrayFast(yy[0],&yy0);
390:     VecRestoreArrayFast(yy[1],&yy1);
391:     break;
392:   case 1:
393:     VecGetArrayFast(yy[0],&yy0);
394:     switch (j_rem=j&0x3) {
395:     case 3:
396:       x2 = x[2]; sum0 += x2*yy0[2];
397:     case 2:
398:       x1 = x[1]; sum0 += x1*yy0[1];
399:     case 1:
400:       x0 = x[0]; sum0 += x0*yy0[0];
401:     case 0:
402:       x  += j_rem;
403:       yy0 += j_rem;
404:       j  -= j_rem;
405:       break;
406:     }
407:     while (j>0) {
408:       sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
409:       j -= 4; x+=4;
410:     }
411:     z[0] = sum0;

413:     VecRestoreArrayFast(yy[0],&yy0);
414:     break;
415:   case 0:
416:     break;
417:   }
418:   z  += nv_rem;
419:   i  -= nv_rem;
420:   yy += nv_rem;

422:   while (i >0) {
423:     sum0 = 0;
424:     sum1 = 0;
425:     sum2 = 0;
426:     sum3 = 0;
427:     VecGetArrayFast(yy[0],&yy0);
428:     VecGetArrayFast(yy[1],&yy1);
429:     VecGetArrayFast(yy[2],&yy2);
430:     VecGetArrayFast(yy[3],&yy3);

432:     j = n;
433:     x = xv->array;
434:     switch (j_rem=j&0x3) {
435:     case 3:
436:       x2 = x[2];
437:       sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
438:       sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
439:     case 2:
440:       x1 = x[1];
441:       sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
442:       sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
443:     case 1:
444:       x0 = x[0];
445:       sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
446:       sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
447:     case 0:
448:       x  += j_rem;
449:       yy0 += j_rem;
450:       yy1 += j_rem;
451:       yy2 += j_rem;
452:       yy3 += j_rem;
453:       j  -= j_rem;
454:       break;
455:     }
456:     while (j>0) {
457:       x0 = x[0];
458:       x1 = x[1];
459:       x2 = x[2];
460:       x3 = x[3];
461:       x += 4;
462: 
463:       sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
464:       sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
465:       sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
466:       sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
467:       j -= 4;
468:     }
469:     z[0] = sum0;
470:     z[1] = sum1;
471:     z[2] = sum2;
472:     z[3] = sum3;
473:     z   += 4;
474:     i   -= 4;
475:     VecRestoreArrayFast(yy[0],&yy0);
476:     VecRestoreArrayFast(yy[1],&yy1);
477:     VecRestoreArrayFast(yy[2],&yy2);
478:     VecRestoreArrayFast(yy[3],&yy3);
479:     yy  += 4;
480:   }
481:   PetscLogFlops(nv*(2*xin->n-1));
482:   return(0);
483: }
484: 

486: int VecMax_Seq(Vec xin,int* idx,PetscReal * z)
487: {
488:   Vec_Seq      *x = (Vec_Seq*)xin->data;
489:   int          i,j=0,n = xin->n;
490:   PetscReal    max,tmp;
491:   PetscScalar  *xx = x->array;

494:   if (!n) {
495:     max = PETSC_MIN;
496:     j   = -1;
497:   } else {
498: #if defined(PETSC_USE_COMPLEX)
499:       max = PetscRealPart(*xx++); j = 0;
500: #else
501:       max = *xx++; j = 0;
502: #endif
503:     for (i=1; i<n; i++) {
504: #if defined(PETSC_USE_COMPLEX)
505:       if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
506: #else
507:       if ((tmp = *xx++) > max) { j = i; max = tmp; }
508: #endif
509:     }
510:   }
511:   *z   = max;
512:   if (idx) *idx = j;
513:   return(0);
514: }

516: int VecMin_Seq(Vec xin,int* idx,PetscReal * z)
517: {
518:   Vec_Seq      *x = (Vec_Seq*)xin->data;
519:   int          i,j=0,n = xin->n;
520:   PetscReal    min,tmp;
521:   PetscScalar  *xx = x->array;

524:   if (!n) {
525:     min = PETSC_MAX;
526:     j   = -1;
527:   } else {
528: #if defined(PETSC_USE_COMPLEX)
529:     min = PetscRealPart(*xx++); j = 0;
530: #else
531:     min = *xx++; j = 0;
532: #endif
533:     for (i=1; i<n; i++) {
534: #if defined(PETSC_USE_COMPLEX)
535:       if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
536: #else
537:       if ((tmp = *xx++) < min) { j = i; min = tmp; }
538: #endif
539:     }
540:   }
541:   *z   = min;
542:   if (idx) *idx = j;
543:   return(0);
544: }

546: int VecSet_Seq(const PetscScalar* alpha,Vec xin)
547: {
548:   Vec_Seq      *x = (Vec_Seq *)xin->data;
549:   int          n = xin->n,ierr;
550:   PetscScalar  *xx = x->array,oalpha = *alpha;

553:   if (oalpha == 0.0) {
554:     PetscMemzero(xx,n*sizeof(PetscScalar));
555:   }
556:   else {
557:     SET(xx,n,oalpha);
558:   }
559:   return(0);
560: }

562: int VecSetRandom_Seq(PetscRandom r,Vec xin)
563: {
564:   int          n = xin->n,i,ierr;
565:   PetscScalar  *xx;

568:   VecGetArrayFast(xin,&xx);
569:   for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
570:   VecRestoreArrayFast(xin,&xx);
571:   return(0);
572: }

574: int VecMAXPY_Seq(int nv,const PetscScalar *alpha,Vec xin,Vec *y)
575: {
576:   Vec_Seq      *xdata = (Vec_Seq*)xin->data;
577:   int          n = xin->n,ierr,j,j_rem;
578:   PetscScalar  *xx,*yy0,*yy1,*yy2,*yy3,alpha0,alpha1,alpha2,alpha3;

580: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
581: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
582: #endif

585:   PetscLogFlops(nv*2*n);

587:   xx = xdata->array;
588:   switch (j_rem=nv&0x3) {
589:   case 3:
590:     VecGetArrayFast(y[0],&yy0);
591:     VecGetArrayFast(y[1],&yy1);
592:     VecGetArrayFast(y[2],&yy2);
593:     alpha0 = alpha[0];
594:     alpha1 = alpha[1];
595:     alpha2 = alpha[2];
596:     alpha += 3;
597:     APXY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
598:     VecRestoreArrayFast(y[0],&yy0);
599:     VecRestoreArrayFast(y[1],&yy1);
600:     VecRestoreArrayFast(y[2],&yy2);
601:     y     += 3;
602:     break;
603:   case 2:
604:     VecGetArrayFast(y[0],&yy0);
605:     VecGetArrayFast(y[1],&yy1);
606:     alpha0 = alpha[0];
607:     alpha1 = alpha[1];
608:     alpha +=2;
609:     APXY2(xx,alpha0,alpha1,yy0,yy1,n);
610:     VecRestoreArrayFast(y[0],&yy0);
611:     VecRestoreArrayFast(y[1],&yy1);
612:     y     +=2;
613:     break;
614:   case 1:
615:     VecGetArrayFast(y[0],&yy0);
616:     alpha0 = *alpha++; APXY(xx,alpha0,yy0,n);
617:     VecRestoreArrayFast(y[0],&yy0);
618:     y     +=1;
619:     break;
620:   }
621:   for (j=j_rem; j<nv; j+=4) {
622:     VecGetArrayFast(y[0],&yy0);
623:     VecGetArrayFast(y[1],&yy1);
624:     VecGetArrayFast(y[2],&yy2);
625:     VecGetArrayFast(y[3],&yy3);
626:     alpha0 = alpha[0];
627:     alpha1 = alpha[1];
628:     alpha2 = alpha[2];
629:     alpha3 = alpha[3];
630:     alpha  += 4;

632:     APXY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
633:     VecRestoreArrayFast(y[0],&yy0);
634:     VecRestoreArrayFast(y[1],&yy1);
635:     VecRestoreArrayFast(y[2],&yy2);
636:     VecRestoreArrayFast(y[3],&yy3);
637:     y      += 4;
638:   }
639:   return(0);
640: }

642: int VecAYPX_Seq(const PetscScalar *alpha,Vec xin,Vec yin)
643: {
644:   Vec_Seq      *x = (Vec_Seq *)xin->data;
645:   int          n = xin->n,ierr;
646:   PetscScalar  *xx = x->array,*yy;

649:   VecGetArrayFast(yin,&yy);
650: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
651:   fortranaypx_(&n,alpha,xx,yy);
652: #else
653:   {
654:     int i;
655:     PetscScalar oalpha = *alpha;
656:     for (i=0; i<n; i++) {
657:       yy[i] = xx[i] + oalpha*yy[i];
658:     }
659:   }
660: #endif
661:   VecRestoreArrayFast(yin,&yy);
662:   PetscLogFlops(2*n);
663:   return(0);
664: }

666: /*
667:    IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
668:   to be slower than a regular C loop.  Hence,we do not include it.
669:   void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
670: */

672: int VecWAXPY_Seq(const PetscScalar* alpha,Vec xin,Vec yin,Vec win)
673: {
674:   Vec_Seq      *x = (Vec_Seq *)xin->data;
675:   int          i,n = xin->n,ierr;
676:   PetscScalar  *xx = x->array,*yy,*ww,oalpha = *alpha;

679:   VecGetArrayFast(yin,&yy);
680:   VecGetArrayFast(win,&ww);
681:   if (oalpha == 1.0) {
682:     PetscLogFlops(n);
683:     /* could call BLAS axpy after call to memcopy, but may be slower */
684:     for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
685:   } else if (oalpha == -1.0) {
686:     PetscLogFlops(n);
687:     for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
688:   } else if (oalpha == 0.0) {
689:     PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
690:   } else {
691: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
692:     fortranwaxpy_(&n,alpha,xx,yy,ww);
693: #else
694:     for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
695: #endif
696:     PetscLogFlops(2*n);
697:   }
698:   VecRestoreArrayFast(yin,&yy);
699:   VecRestoreArrayFast(win,&ww);
700:   return(0);
701: }

703: int VecPointwiseMult_Seq(Vec xin,Vec yin,Vec win)
704: {
705:   Vec_Seq      *x = (Vec_Seq *)xin->data;
706:   int          n = xin->n,i,ierr;
707:   PetscScalar  *xx = x->array,*yy,*ww;

710:   VecGetArrayFast(yin,&yy);
711:   if (yin != win) {VecGetArrayFast(win,&ww);}
712:   else ww = yy;

714:   if (ww == xx) {
715:     for (i=0; i<n; i++) ww[i] *= yy[i];
716:   } else if (ww == yy) {
717:     for (i=0; i<n; i++) ww[i] *= xx[i];
718:   } else {
719:     /*  This was suppose to help on SGI but didn't really seem to
720:           PetscReal * __restrict www = ww;
721:           PetscReal * __restrict yyy = yy;
722:           PetscReal * __restrict xxx = xx;
723:           for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
724:     */
725: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
726:     fortranxtimesy_(xx,yy,ww,&n);
727: #else
728:     for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
729: #endif
730:   }
731:   VecRestoreArrayFast(yin,&yy);
732:   if (yin != win) {VecRestoreArrayFast(win,&ww);}
733:   PetscLogFlops(n);
734:   return(0);
735: }

737: int VecPointwiseDivide_Seq(Vec xin,Vec yin,Vec win)
738: {
739:   Vec_Seq      *x = (Vec_Seq *)xin->data;
740:   int          n = xin->n,i,ierr;
741:   PetscScalar  *xx = x->array,*yy,*ww;

744:   VecGetArrayFast(yin,&yy);
745:   if (yin != win) {VecGetArrayFast(win,&ww);}
746:   else {ww = yy;}
747:   for (i=0; i<n; i++) ww[i] = xx[i] / yy[i];
748:   VecRestoreArrayFast(yin,&yy);
749:   if (yin != win) {VecRestoreArrayFast(win,&ww);}
750:   PetscLogFlops(n);
751:   return(0);
752: }

754: int VecGetArray_Seq(Vec vin,PetscScalar *a[])
755: {
756:   Vec_Seq *v = (Vec_Seq *)vin->data;
757:   int     ierr;

760:   if (vin->array_gotten) {
761:     SETERRQ(1,"Array has already been gotten for this vector,you mayn
762:     have forgotten a call to VecRestoreArray()");
763:   }
764:   vin->array_gotten = PETSC_TRUE;

766:   *a =  v->array;
767:   PetscObjectTakeAccess(vin);
768:   return(0);
769: }

771: int VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
772: {


777:   if (!vin->array_gotten) {
778:     SETERRQ(1,"Array has not been gotten for this vector, you mayn
779:     have forgotten a call to VecGetArray()");
780:   }
781:   vin->array_gotten = PETSC_FALSE;
782:   if (a) *a         = 0; /* now user cannot accidently use it again */

784:   PetscObjectGrantAccess(vin);
785:   return(0);
786: }

788: int VecResetArray_Seq(Vec vin)
789: {
790:   Vec_Seq *v = (Vec_Seq *)vin->data;

793:   v->array = v->array_allocated;
794:   return(0);
795: }

797: int VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
798: {
799:   Vec_Seq *v = (Vec_Seq *)vin->data;

802:   v->array = (PetscScalar *)a;
803:   return(0);
804: }

806: int VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
807: {
808:   Vec_Seq *v = (Vec_Seq *)vin->data;
809:   int     ierr;

812:   if (v->array_allocated) {PetscFree(v->array_allocated);}
813:   v->array_allocated = v->array = (PetscScalar *)a;
814:   return(0);
815: }

817: int VecGetSize_Seq(Vec vin,int *size)
818: {
820:   *size = vin->n;
821:   return(0);
822: }

824: int VecConjugate_Seq(Vec xin)
825: {
826:   PetscScalar *x = ((Vec_Seq *)xin->data)->array;
827:   int         n = xin->n;

830:   while (n-->0) {
831:     *x = PetscConj(*x);
832:     x++;
833:   }
834:   return(0);
835: }
836: