Actual source code: dvec2.c
2: /*$Id: dvec2.c,v 1.91 2001/09/11 18:14:31 bsmith Exp $*/
4: /*
5: Defines some vector operation functions that are shared by
6: sequential and parallel vectors.
7: */
8: #include src/vec/impls/dvecimpl.h
9: #include src/inline/dot.h
10: #include src/inline/setval.h
11: #include src/inline/axpy.h
13: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
14: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
15: {
16: Vec_Seq *xv = (Vec_Seq *)xin->data;
17: int i,nv_rem,n = xin->n,ierr;
18: PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,*x;
19: Vec *yy;
22: sum0 = 0;
23: sum1 = 0;
24: sum2 = 0;
26: i = nv;
27: nv_rem = nv&0x3;
28: yy = (Vec*)yin;
29: x = xv->array;
31: switch (nv_rem) {
32: case 3:
33: VecGetArrayFast(yy[0],&yy0);
34: VecGetArrayFast(yy[1],&yy1);
35: VecGetArrayFast(yy[2],&yy2);
36: fortranmdot3_(x,yy0,yy1,yy2,&n,&sum0,&sum1,&sum2);
37: VecRestoreArrayFast(yy[0],&yy0);
38: VecRestoreArrayFast(yy[1],&yy1);
39: VecRestoreArrayFast(yy[2],&yy2);
40: z[0] = sum0;
41: z[1] = sum1;
42: z[2] = sum2;
43: break;
44: case 2:
45: VecGetArrayFast(yy[0],&yy0);
46: VecGetArrayFast(yy[1],&yy1);
47: fortranmdot2_(x,yy0,yy1,&n,&sum0,&sum1);
48: VecRestoreArrayFast(yy[0],&yy0);
49: VecRestoreArrayFast(yy[1],&yy1);
50: z[0] = sum0;
51: z[1] = sum1;
52: break;
53: case 1:
54: VecGetArrayFast(yy[0],&yy0);
55: fortranmdot1_(x,yy0,&n,&sum0);
56: VecRestoreArrayFast(yy[0],&yy0);
57: z[0] = sum0;
58: break;
59: case 0:
60: break;
61: }
62: z += nv_rem;
63: i -= nv_rem;
64: yy += nv_rem;
66: while (i >0) {
67: sum0 = 0;
68: sum1 = 0;
69: sum2 = 0;
70: sum3 = 0;
71: VecGetArrayFast(yy[0],&yy0);
72: VecGetArrayFast(yy[1],&yy1);
73: VecGetArrayFast(yy[2],&yy2);
74: VecGetArrayFast(yy[3],&yy3);
75: fortranmdot4_(x,yy0,yy1,yy2,yy3,&n,&sum0,&sum1,&sum2,&sum3);
76: VecRestoreArrayFast(yy[0],&yy0);
77: VecRestoreArrayFast(yy[1],&yy1);
78: VecRestoreArrayFast(yy[2],&yy2);
79: VecRestoreArrayFast(yy[3],&yy3);
80: yy += 4;
81: z[0] = sum0;
82: z[1] = sum1;
83: z[2] = sum2;
84: z[3] = sum3;
85: z += 4;
86: i -= 4;
87: }
88: PetscLogFlops(nv*(2*xin->n-1));
89: return(0);
90: }
92: #else
93: int VecMDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar * restrict z)
94: {
95: Vec_Seq *xv = (Vec_Seq *)xin->data;
96: int n = xin->n,i,j,nv_rem,j_rem,ierr;
97: PetscScalar sum0,sum1,sum2,sum3,x0,x1,x2,x3,* restrict x;
98: PetscScalar * restrict yy0,* restrict yy1,* restrict yy2,*restrict yy3;
99: Vec *yy;
102: sum0 = 0;
103: sum1 = 0;
104: sum2 = 0;
106: i = nv;
107: nv_rem = nv&0x3;
108: yy = (Vec *)yin;
109: j = n;
110: x = xv->array;
112: switch (nv_rem) {
113: case 3:
114: VecGetArrayFast(yy[0],&yy0);
115: VecGetArrayFast(yy[1],&yy1);
116: VecGetArrayFast(yy[2],&yy2);
117: switch (j_rem=j&0x3) {
118: case 3:
119: x2 = x[2];
120: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
121: sum2 += x2*PetscConj(yy2[2]);
122: case 2:
123: x1 = x[1];
124: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
125: sum2 += x1*PetscConj(yy2[1]);
126: case 1:
127: x0 = x[0];
128: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
129: sum2 += x0*PetscConj(yy2[0]);
130: case 0:
131: x += j_rem;
132: yy0 += j_rem;
133: yy1 += j_rem;
134: yy2 += j_rem;
135: j -= j_rem;
136: break;
137: }
138: while (j>0) {
139: x0 = x[0];
140: x1 = x[1];
141: x2 = x[2];
142: x3 = x[3];
143: x += 4;
144:
145: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
146: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
147: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
148: j -= 4;
149: }
150: z[0] = sum0;
151: z[1] = sum1;
152: z[2] = sum2;
153: VecRestoreArrayFast(yy[0],&yy0);
154: VecRestoreArrayFast(yy[1],&yy1);
155: VecRestoreArrayFast(yy[2],&yy2);
156: break;
157: case 2:
158: VecGetArrayFast(yy[0],&yy0);
159: VecGetArrayFast(yy[1],&yy1);
160: switch (j_rem=j&0x3) {
161: case 3:
162: x2 = x[2];
163: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
164: case 2:
165: x1 = x[1];
166: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
167: case 1:
168: x0 = x[0];
169: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
170: case 0:
171: x += j_rem;
172: yy0 += j_rem;
173: yy1 += j_rem;
174: j -= j_rem;
175: break;
176: }
177: while (j>0) {
178: x0 = x[0];
179: x1 = x[1];
180: x2 = x[2];
181: x3 = x[3];
182: x += 4;
183:
184: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
185: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
186: j -= 4;
187: }
188: z[0] = sum0;
189: z[1] = sum1;
190:
191: VecRestoreArrayFast(yy[0],&yy0);
192: VecRestoreArrayFast(yy[1],&yy1);
193: break;
194: case 1:
195: VecGetArrayFast(yy[0],&yy0);
196: switch (j_rem=j&0x3) {
197: case 3:
198: x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
199: case 2:
200: x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
201: case 1:
202: x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
203: case 0:
204: x += j_rem;
205: yy0 += j_rem;
206: j -= j_rem;
207: break;
208: }
209: while (j>0) {
210: sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
211: + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]);
212: yy0+=4;
213: j -= 4; x+=4;
214: }
215: z[0] = sum0;
217: VecRestoreArrayFast(yy[0],&yy0);
218: break;
219: case 0:
220: break;
221: }
222: z += nv_rem;
223: i -= nv_rem;
224: yy += nv_rem;
226: while (i >0) {
227: sum0 = 0;
228: sum1 = 0;
229: sum2 = 0;
230: sum3 = 0;
231: VecGetArrayFast(yy[0],&yy0);
232: VecGetArrayFast(yy[1],&yy1);
233: VecGetArrayFast(yy[2],&yy2);
234: VecGetArrayFast(yy[3],&yy3);
236: j = n;
237: x = xv->array;
238: switch (j_rem=j&0x3) {
239: case 3:
240: x2 = x[2];
241: sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]);
242: sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
243: case 2:
244: x1 = x[1];
245: sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]);
246: sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
247: case 1:
248: x0 = x[0];
249: sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]);
250: sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
251: case 0:
252: x += j_rem;
253: yy0 += j_rem;
254: yy1 += j_rem;
255: yy2 += j_rem;
256: yy3 += j_rem;
257: j -= j_rem;
258: break;
259: }
260: while (j>0) {
261: x0 = x[0];
262: x1 = x[1];
263: x2 = x[2];
264: x3 = x[3];
265: x += 4;
266:
267: sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
268: sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
269: sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
270: sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
271: j -= 4;
272: }
273: z[0] = sum0;
274: z[1] = sum1;
275: z[2] = sum2;
276: z[3] = sum3;
277: z += 4;
278: i -= 4;
279: VecRestoreArrayFast(yy[0],&yy0);
280: VecRestoreArrayFast(yy[1],&yy1);
281: VecRestoreArrayFast(yy[2],&yy2);
282: VecRestoreArrayFast(yy[3],&yy3);
283: yy += 4;
284: }
285: PetscLogFlops(nv*(2*xin->n-1));
286: return(0);
287: }
288: #endif
290: /* ----------------------------------------------------------------------------*/
291: int VecMTDot_Seq(int nv,Vec xin,const Vec yin[],PetscScalar *z)
292: {
293: Vec_Seq *xv = (Vec_Seq *)xin->data;
294: int n = xin->n,i,j,nv_rem,j_rem,ierr;
295: PetscScalar sum0,sum1,sum2,sum3,*yy0,*yy1,*yy2,*yy3,x0,x1,x2,x3,*x;
296: Vec *yy;
297:
300: sum0 = 0;
301: sum1 = 0;
302: sum2 = 0;
304: i = nv;
305: nv_rem = nv&0x3;
306: yy = (Vec*)yin;
307: j = n;
308: x = xv->array;
310: switch (nv_rem) {
311: case 3:
312: VecGetArrayFast(yy[0],&yy0);
313: VecGetArrayFast(yy[1],&yy1);
314: VecGetArrayFast(yy[2],&yy2);
315: switch (j_rem=j&0x3) {
316: case 3:
317: x2 = x[2];
318: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
319: sum2 += x2*yy2[2];
320: case 2:
321: x1 = x[1];
322: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
323: sum2 += x1*yy2[1];
324: case 1:
325: x0 = x[0];
326: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
327: sum2 += x0*yy2[0];
328: case 0:
329: x += j_rem;
330: yy0 += j_rem;
331: yy1 += j_rem;
332: yy2 += j_rem;
333: j -= j_rem;
334: break;
335: }
336: while (j>0) {
337: x0 = x[0];
338: x1 = x[1];
339: x2 = x[2];
340: x3 = x[3];
341: x += 4;
342:
343: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
344: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
345: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
346: j -= 4;
347: }
348: z[0] = sum0;
349: z[1] = sum1;
350: z[2] = sum2;
351: VecRestoreArrayFast(yy[0],&yy0);
352: VecRestoreArrayFast(yy[1],&yy1);
353: VecRestoreArrayFast(yy[2],&yy2);
354: break;
355: case 2:
356: VecGetArrayFast(yy[0],&yy0);
357: VecGetArrayFast(yy[1],&yy1);
358: switch (j_rem=j&0x3) {
359: case 3:
360: x2 = x[2];
361: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
362: case 2:
363: x1 = x[1];
364: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
365: case 1:
366: x0 = x[0];
367: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
368: case 0:
369: x += j_rem;
370: yy0 += j_rem;
371: yy1 += j_rem;
372: j -= j_rem;
373: break;
374: }
375: while (j>0) {
376: x0 = x[0];
377: x1 = x[1];
378: x2 = x[2];
379: x3 = x[3];
380: x += 4;
381:
382: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
383: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
384: j -= 4;
385: }
386: z[0] = sum0;
387: z[1] = sum1;
388:
389: VecRestoreArrayFast(yy[0],&yy0);
390: VecRestoreArrayFast(yy[1],&yy1);
391: break;
392: case 1:
393: VecGetArrayFast(yy[0],&yy0);
394: switch (j_rem=j&0x3) {
395: case 3:
396: x2 = x[2]; sum0 += x2*yy0[2];
397: case 2:
398: x1 = x[1]; sum0 += x1*yy0[1];
399: case 1:
400: x0 = x[0]; sum0 += x0*yy0[0];
401: case 0:
402: x += j_rem;
403: yy0 += j_rem;
404: j -= j_rem;
405: break;
406: }
407: while (j>0) {
408: sum0 += x[0]*yy0[0] + x[1]*yy0[1] + x[2]*yy0[2] + x[3]*yy0[3]; yy0+=4;
409: j -= 4; x+=4;
410: }
411: z[0] = sum0;
413: VecRestoreArrayFast(yy[0],&yy0);
414: break;
415: case 0:
416: break;
417: }
418: z += nv_rem;
419: i -= nv_rem;
420: yy += nv_rem;
422: while (i >0) {
423: sum0 = 0;
424: sum1 = 0;
425: sum2 = 0;
426: sum3 = 0;
427: VecGetArrayFast(yy[0],&yy0);
428: VecGetArrayFast(yy[1],&yy1);
429: VecGetArrayFast(yy[2],&yy2);
430: VecGetArrayFast(yy[3],&yy3);
432: j = n;
433: x = xv->array;
434: switch (j_rem=j&0x3) {
435: case 3:
436: x2 = x[2];
437: sum0 += x2*yy0[2]; sum1 += x2*yy1[2];
438: sum2 += x2*yy2[2]; sum3 += x2*yy3[2];
439: case 2:
440: x1 = x[1];
441: sum0 += x1*yy0[1]; sum1 += x1*yy1[1];
442: sum2 += x1*yy2[1]; sum3 += x1*yy3[1];
443: case 1:
444: x0 = x[0];
445: sum0 += x0*yy0[0]; sum1 += x0*yy1[0];
446: sum2 += x0*yy2[0]; sum3 += x0*yy3[0];
447: case 0:
448: x += j_rem;
449: yy0 += j_rem;
450: yy1 += j_rem;
451: yy2 += j_rem;
452: yy3 += j_rem;
453: j -= j_rem;
454: break;
455: }
456: while (j>0) {
457: x0 = x[0];
458: x1 = x[1];
459: x2 = x[2];
460: x3 = x[3];
461: x += 4;
462:
463: sum0 += x0*yy0[0] + x1*yy0[1] + x2*yy0[2] + x3*yy0[3]; yy0+=4;
464: sum1 += x0*yy1[0] + x1*yy1[1] + x2*yy1[2] + x3*yy1[3]; yy1+=4;
465: sum2 += x0*yy2[0] + x1*yy2[1] + x2*yy2[2] + x3*yy2[3]; yy2+=4;
466: sum3 += x0*yy3[0] + x1*yy3[1] + x2*yy3[2] + x3*yy3[3]; yy3+=4;
467: j -= 4;
468: }
469: z[0] = sum0;
470: z[1] = sum1;
471: z[2] = sum2;
472: z[3] = sum3;
473: z += 4;
474: i -= 4;
475: VecRestoreArrayFast(yy[0],&yy0);
476: VecRestoreArrayFast(yy[1],&yy1);
477: VecRestoreArrayFast(yy[2],&yy2);
478: VecRestoreArrayFast(yy[3],&yy3);
479: yy += 4;
480: }
481: PetscLogFlops(nv*(2*xin->n-1));
482: return(0);
483: }
484:
486: int VecMax_Seq(Vec xin,int* idx,PetscReal * z)
487: {
488: Vec_Seq *x = (Vec_Seq*)xin->data;
489: int i,j=0,n = xin->n;
490: PetscReal max,tmp;
491: PetscScalar *xx = x->array;
494: if (!n) {
495: max = PETSC_MIN;
496: j = -1;
497: } else {
498: #if defined(PETSC_USE_COMPLEX)
499: max = PetscRealPart(*xx++); j = 0;
500: #else
501: max = *xx++; j = 0;
502: #endif
503: for (i=1; i<n; i++) {
504: #if defined(PETSC_USE_COMPLEX)
505: if ((tmp = PetscRealPart(*xx++)) > max) { j = i; max = tmp;}
506: #else
507: if ((tmp = *xx++) > max) { j = i; max = tmp; }
508: #endif
509: }
510: }
511: *z = max;
512: if (idx) *idx = j;
513: return(0);
514: }
516: int VecMin_Seq(Vec xin,int* idx,PetscReal * z)
517: {
518: Vec_Seq *x = (Vec_Seq*)xin->data;
519: int i,j=0,n = xin->n;
520: PetscReal min,tmp;
521: PetscScalar *xx = x->array;
524: if (!n) {
525: min = PETSC_MAX;
526: j = -1;
527: } else {
528: #if defined(PETSC_USE_COMPLEX)
529: min = PetscRealPart(*xx++); j = 0;
530: #else
531: min = *xx++; j = 0;
532: #endif
533: for (i=1; i<n; i++) {
534: #if defined(PETSC_USE_COMPLEX)
535: if ((tmp = PetscRealPart(*xx++)) < min) { j = i; min = tmp;}
536: #else
537: if ((tmp = *xx++) < min) { j = i; min = tmp; }
538: #endif
539: }
540: }
541: *z = min;
542: if (idx) *idx = j;
543: return(0);
544: }
546: int VecSet_Seq(const PetscScalar* alpha,Vec xin)
547: {
548: Vec_Seq *x = (Vec_Seq *)xin->data;
549: int n = xin->n,ierr;
550: PetscScalar *xx = x->array,oalpha = *alpha;
553: if (oalpha == 0.0) {
554: PetscMemzero(xx,n*sizeof(PetscScalar));
555: }
556: else {
557: SET(xx,n,oalpha);
558: }
559: return(0);
560: }
562: int VecSetRandom_Seq(PetscRandom r,Vec xin)
563: {
564: int n = xin->n,i,ierr;
565: PetscScalar *xx;
568: VecGetArrayFast(xin,&xx);
569: for (i=0; i<n; i++) {PetscRandomGetValue(r,&xx[i]);}
570: VecRestoreArrayFast(xin,&xx);
571: return(0);
572: }
574: int VecMAXPY_Seq(int nv,const PetscScalar *alpha,Vec xin,Vec *y)
575: {
576: Vec_Seq *xdata = (Vec_Seq*)xin->data;
577: int n = xin->n,ierr,j,j_rem;
578: PetscScalar *xx,*yy0,*yy1,*yy2,*yy3,alpha0,alpha1,alpha2,alpha3;
580: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
581: #pragma disjoint(*xx,*yy0,*yy1,*yy2,*yy3,*alpha)
582: #endif
585: PetscLogFlops(nv*2*n);
587: xx = xdata->array;
588: switch (j_rem=nv&0x3) {
589: case 3:
590: VecGetArrayFast(y[0],&yy0);
591: VecGetArrayFast(y[1],&yy1);
592: VecGetArrayFast(y[2],&yy2);
593: alpha0 = alpha[0];
594: alpha1 = alpha[1];
595: alpha2 = alpha[2];
596: alpha += 3;
597: APXY3(xx,alpha0,alpha1,alpha2,yy0,yy1,yy2,n);
598: VecRestoreArrayFast(y[0],&yy0);
599: VecRestoreArrayFast(y[1],&yy1);
600: VecRestoreArrayFast(y[2],&yy2);
601: y += 3;
602: break;
603: case 2:
604: VecGetArrayFast(y[0],&yy0);
605: VecGetArrayFast(y[1],&yy1);
606: alpha0 = alpha[0];
607: alpha1 = alpha[1];
608: alpha +=2;
609: APXY2(xx,alpha0,alpha1,yy0,yy1,n);
610: VecRestoreArrayFast(y[0],&yy0);
611: VecRestoreArrayFast(y[1],&yy1);
612: y +=2;
613: break;
614: case 1:
615: VecGetArrayFast(y[0],&yy0);
616: alpha0 = *alpha++; APXY(xx,alpha0,yy0,n);
617: VecRestoreArrayFast(y[0],&yy0);
618: y +=1;
619: break;
620: }
621: for (j=j_rem; j<nv; j+=4) {
622: VecGetArrayFast(y[0],&yy0);
623: VecGetArrayFast(y[1],&yy1);
624: VecGetArrayFast(y[2],&yy2);
625: VecGetArrayFast(y[3],&yy3);
626: alpha0 = alpha[0];
627: alpha1 = alpha[1];
628: alpha2 = alpha[2];
629: alpha3 = alpha[3];
630: alpha += 4;
632: APXY4(xx,alpha0,alpha1,alpha2,alpha3,yy0,yy1,yy2,yy3,n);
633: VecRestoreArrayFast(y[0],&yy0);
634: VecRestoreArrayFast(y[1],&yy1);
635: VecRestoreArrayFast(y[2],&yy2);
636: VecRestoreArrayFast(y[3],&yy3);
637: y += 4;
638: }
639: return(0);
640: }
642: int VecAYPX_Seq(const PetscScalar *alpha,Vec xin,Vec yin)
643: {
644: Vec_Seq *x = (Vec_Seq *)xin->data;
645: int n = xin->n,ierr;
646: PetscScalar *xx = x->array,*yy;
649: VecGetArrayFast(yin,&yy);
650: #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX)
651: fortranaypx_(&n,alpha,xx,yy);
652: #else
653: {
654: int i;
655: PetscScalar oalpha = *alpha;
656: for (i=0; i<n; i++) {
657: yy[i] = xx[i] + oalpha*yy[i];
658: }
659: }
660: #endif
661: VecRestoreArrayFast(yin,&yy);
662: PetscLogFlops(2*n);
663: return(0);
664: }
666: /*
667: IBM ESSL contains a routine dzaxpy() that is our WAXPY() but it appears
668: to be slower than a regular C loop. Hence,we do not include it.
669: void ?zaxpy(int*,PetscScalar*,PetscScalar*,int*,PetscScalar*,int*,PetscScalar*,int*);
670: */
672: int VecWAXPY_Seq(const PetscScalar* alpha,Vec xin,Vec yin,Vec win)
673: {
674: Vec_Seq *x = (Vec_Seq *)xin->data;
675: int i,n = xin->n,ierr;
676: PetscScalar *xx = x->array,*yy,*ww,oalpha = *alpha;
679: VecGetArrayFast(yin,&yy);
680: VecGetArrayFast(win,&ww);
681: if (oalpha == 1.0) {
682: PetscLogFlops(n);
683: /* could call BLAS axpy after call to memcopy, but may be slower */
684: for (i=0; i<n; i++) ww[i] = yy[i] + xx[i];
685: } else if (oalpha == -1.0) {
686: PetscLogFlops(n);
687: for (i=0; i<n; i++) ww[i] = yy[i] - xx[i];
688: } else if (oalpha == 0.0) {
689: PetscMemcpy(ww,yy,n*sizeof(PetscScalar));
690: } else {
691: #if defined(PETSC_USE_FORTRAN_KERNEL_WAXPY)
692: fortranwaxpy_(&n,alpha,xx,yy,ww);
693: #else
694: for (i=0; i<n; i++) ww[i] = yy[i] + oalpha * xx[i];
695: #endif
696: PetscLogFlops(2*n);
697: }
698: VecRestoreArrayFast(yin,&yy);
699: VecRestoreArrayFast(win,&ww);
700: return(0);
701: }
703: int VecPointwiseMult_Seq(Vec xin,Vec yin,Vec win)
704: {
705: Vec_Seq *x = (Vec_Seq *)xin->data;
706: int n = xin->n,i,ierr;
707: PetscScalar *xx = x->array,*yy,*ww;
710: VecGetArrayFast(yin,&yy);
711: if (yin != win) {VecGetArrayFast(win,&ww);}
712: else ww = yy;
714: if (ww == xx) {
715: for (i=0; i<n; i++) ww[i] *= yy[i];
716: } else if (ww == yy) {
717: for (i=0; i<n; i++) ww[i] *= xx[i];
718: } else {
719: /* This was suppose to help on SGI but didn't really seem to
720: PetscReal * __restrict www = ww;
721: PetscReal * __restrict yyy = yy;
722: PetscReal * __restrict xxx = xx;
723: for (i=0; i<n; i++) www[i] = xxx[i] * yyy[i];
724: */
725: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
726: fortranxtimesy_(xx,yy,ww,&n);
727: #else
728: for (i=0; i<n; i++) ww[i] = xx[i] * yy[i];
729: #endif
730: }
731: VecRestoreArrayFast(yin,&yy);
732: if (yin != win) {VecRestoreArrayFast(win,&ww);}
733: PetscLogFlops(n);
734: return(0);
735: }
737: int VecPointwiseDivide_Seq(Vec xin,Vec yin,Vec win)
738: {
739: Vec_Seq *x = (Vec_Seq *)xin->data;
740: int n = xin->n,i,ierr;
741: PetscScalar *xx = x->array,*yy,*ww;
744: VecGetArrayFast(yin,&yy);
745: if (yin != win) {VecGetArrayFast(win,&ww);}
746: else {ww = yy;}
747: for (i=0; i<n; i++) ww[i] = xx[i] / yy[i];
748: VecRestoreArrayFast(yin,&yy);
749: if (yin != win) {VecRestoreArrayFast(win,&ww);}
750: PetscLogFlops(n);
751: return(0);
752: }
754: int VecGetArray_Seq(Vec vin,PetscScalar *a[])
755: {
756: Vec_Seq *v = (Vec_Seq *)vin->data;
757: int ierr;
760: if (vin->array_gotten) {
761: SETERRQ(1,"Array has already been gotten for this vector,you mayn
762: have forgotten a call to VecRestoreArray()");
763: }
764: vin->array_gotten = PETSC_TRUE;
766: *a = v->array;
767: PetscObjectTakeAccess(vin);
768: return(0);
769: }
771: int VecRestoreArray_Seq(Vec vin,PetscScalar *a[])
772: {
777: if (!vin->array_gotten) {
778: SETERRQ(1,"Array has not been gotten for this vector, you mayn
779: have forgotten a call to VecGetArray()");
780: }
781: vin->array_gotten = PETSC_FALSE;
782: if (a) *a = 0; /* now user cannot accidently use it again */
784: PetscObjectGrantAccess(vin);
785: return(0);
786: }
788: int VecResetArray_Seq(Vec vin)
789: {
790: Vec_Seq *v = (Vec_Seq *)vin->data;
793: v->array = v->array_allocated;
794: return(0);
795: }
797: int VecPlaceArray_Seq(Vec vin,const PetscScalar *a)
798: {
799: Vec_Seq *v = (Vec_Seq *)vin->data;
802: v->array = (PetscScalar *)a;
803: return(0);
804: }
806: int VecReplaceArray_Seq(Vec vin,const PetscScalar *a)
807: {
808: Vec_Seq *v = (Vec_Seq *)vin->data;
809: int ierr;
812: if (v->array_allocated) {PetscFree(v->array_allocated);}
813: v->array_allocated = v->array = (PetscScalar *)a;
814: return(0);
815: }
817: int VecGetSize_Seq(Vec vin,int *size)
818: {
820: *size = vin->n;
821: return(0);
822: }
824: int VecConjugate_Seq(Vec xin)
825: {
826: PetscScalar *x = ((Vec_Seq *)xin->data)->array;
827: int n = xin->n;
830: while (n-->0) {
831: *x = PetscConj(*x);
832: x++;
833: }
834: return(0);
835: }
836: