Actual source code: dot.h

  2: #ifndef DOT
 3:  #include petsc.h


  7: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
  8: #if defined(PETSC_HAVE_FORTRAN_CAPS)
  9: #define fortranmdot4_      FORTRANMDOT4
 10: #define fortranmdot3_      FORTRANMDOT3
 11: #define fortranmdot2_      FORTRANMDOT2
 12: #define fortranmdot1_      FORTRANMDOT1
 13: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 14: #define fortranmdot4_      fortranmdot4
 15: #define fortranmdot3_      fortranmdot3
 16: #define fortranmdot2_      fortranmdot2
 17: #define fortranmdot1_      fortranmdot1
 18: #endif
 19: EXTERN void fortranmdot4_(void*,void*,void*,void*,void*,PetscInt*,void*,void*,void*,void*);
 20: EXTERN void fortranmdot3_(void*,void*,void*,void*,PetscInt*,void*,void*,void*);
 21: EXTERN void fortranmdot2_(void*,void*,void*,PetscInt*,void*,void*);
 22: EXTERN void fortranmdot1_(void*,void*,PetscInt*,void*);
 23: #endif

 25: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
 26: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 27: #define fortrannormsqr_    FORTRANNORMSQR
 28: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 29: #define fortrannormsqr_    fortrannormsqr
 30: #endif
 31: EXTERN void fortrannormsqr_(void*,PetscInt*,void*);
 32: #endif

 34: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJ)
 35: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 36: #define fortranmultaij_    FORTRANMULTAIJ
 37: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 38: #define fortranmultaij_    fortranmultaij
 39: #endif
 40: EXTERN void fortranmultaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*);
 41: #endif

 43: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTTRANSPOSEAIJ)
 44: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 45: #define fortranmulttransposeaddaij_    FORTRANMULTTRANSPOSEADDAIJ
 46: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 47: #define fortranmulttransposeaddaij_    fortranmulttransposeaddaij
 48: #endif
 49: EXTERN void fortranmulttransposeaddaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*);
 50: #endif

 52: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJ)
 53: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 54: #define fortranmultaddaij_ FORTRANMULTADDAIJ
 55: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 56: #define fortranmultaddaij_ fortranmultaddaij
 57: #endif
 58: EXTERN void fortranmultaddaij_(PetscInt*,void*,PetscInt*,PetscInt*,void*,void*,void*);
 59: #endif

 61: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
 62: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 63: #define fortransolveaij_   FORTRANSOLVEAIJ
 64: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 65: #define fortransolveaij_   fortransolveaij
 66: #endif
 67: EXTERN void fortransolveaij_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*);
 68: #endif

 70: #if defined(PETSC_USE_FORTRAN_KERNEL_RELAXAIJ)
 71: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 72: #define fortranrelaxaijforward_   FORTRANRELAXAIJFORWARD
 73: #define fortranrelaxaijbackward_   FORTRANRELAXAIJBACKWARD
 74: #define fortranrelaxaijforwardzero_   FORTRANRELAXAIJFORWARDZERO
 75: #define fortranrelaxaijbackwardzero_   FORTRANRELAXAIJBACKWARDZERO
 76: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 77: #define fortranrelaxaijforward_   fortranrelaxaijforward
 78: #define fortranrelaxaijbackward_   fortranrelaxaijbackward
 79: #define fortranrelaxaijforwardzero_   fortranrelaxaijforwardzero
 80: #define fortranrelaxaijbackwardzero_   fortranrelaxaijbackwardzero
 81: #endif
 82: EXTERN void fortranrelaxaijforward_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*);
 83: EXTERN void fortranrelaxaijbackward_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*);
 84: EXTERN void fortranrelaxaijforwardzero_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*,void*);
 85: EXTERN void fortranrelaxaijbackwardzero_(PetscInt*,PetscReal*,void*,PetscInt*,PetscInt*,const PetscInt*,void*,void*,void*);
 86: #endif

 88: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
 89: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 90: #define fortransolvebaij4_         FORTRANSOLVEBAIJ4
 91: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 92: #define fortransolvebaij4_          fortransolvebaij4
 93: #endif
 94: EXTERN void fortransolvebaij4_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*,void*);
 95: #endif

 97: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
 98: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 99: #define fortransolvebaij4unroll_   FORTRANSOLVEBAIJ4UNROLL
100: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
101: #define fortransolvebaij4unroll_    fortransolvebaij4unroll
102: #endif
103: EXTERN void fortransolvebaij4unroll_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*);
104: #endif

106: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
107: #if defined(PETSC_HAVE_FORTRAN_CAPS)
108: #define fortransolvebaij4blas_     FORTRANSOLVEBAIJ4BLAS
109: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
110: #define fortransolvebaij4blas_      fortransolvebaij4blas
111: #endif
112: EXTERN void fortransolvebaij4blas_(PetscInt*,void*,PetscInt*,PetscInt*,PetscInt*,void*,void*,void*);
113: #endif

115: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
116: #ifdef PETSC_HAVE_FORTRAN_CAPS
117: #define fortranxtimesy_ FORTRANXTIMESY
118: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
119: #define fortranxtimesy_ fortranxtimesy
120: #endif
121: EXTERN void fortranxtimesy_(void*,void*,void*,PetscInt*);
122: #endif


126: /* ------------------------------------------------------------------- */


129: #if !defined(PETSC_USE_COMPLEX)

131: #ifdef PETSC_USE_UNROLL_KERNELS
132: #define DOT(sum,x,y,n) {\
133: switch (n & 0x3) {\
134: case 3: sum += *x++ * *y++;\
135: case 2: sum += *x++ * *y++;\
136: case 1: sum += *x++ * *y++;\
137: n -= 4;case 0:break;}\
138: while (n>0) {sum += x[0]*y[0]+x[1]*y[1]+x[2]*y[2]+x[3]*y[3];x+=4;y+=4;\
139: n -= 4;}}
140: #define DOT2(sum1,sum2,x,y1,y2,n) {\
141: if(n&0x1){sum1+=*x**y1++;sum2+=*x++**y2++;n--;}\
142: while (n>0) {sum1+=x[0]*y1[0]+x[1]*y1[1];sum2+=x[0]*y2[0]+x[1]*y2[1];x+=2;\
143: y1+=2;y2+=2;n -= 2;}}
144: #define SQR(sum,x,n) {\
145: switch (n & 0x3) {\
146: case 3: sum += *x * *x;x++;\
147: case 2: sum += *x * *x;x++;\
148: case 1: sum += *x * *x;x++;\
149: n -= 4;case 0:break;}\
150: while (n>0) {sum += x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];x+=4;\
151: n -= 4;}}

153: #elif defined(PETSC_USE_WHILE_KERNELS)
154: #define DOT(sum,x,y,n) {\
155: while(n--) sum+= *x++ * *y++;}
156: #define DOT2(sum1,sum2,x,y1,y2,n) {\
157: while(n--){sum1+= *x**y1++;sum2+=*x++**y2++;}}
158: #define SQR(sum,x,n)   {\
159: while(n--) {sum+= *x * *x; x++;}}

161: #elif defined(PETSC_USE_BLAS_KERNELS)
162: #define DOT(sum,x,y,n) {PetscBLASInt one=1;\
163: sum=BLASdot_(&n,x,&one,y,&one);}
164: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
165: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
166: #define SQR(sum,x,n)   {PetscBLASInt one=1;\
167: sum=BLASdot_(&n,x,&one,x,&one);}

169: #else
170: #define DOT(sum,x,y,n) {PetscInt __i;\
171: for(__i=0;__i<n;__i++)sum+=x[__i]*y[__i];}
172: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
173: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
174: #define SQR(sum,x,n)   {PetscInt __i;\
175: for(__i=0;__i<n;__i++)sum+=x[__i]*x[__i];}
176: #endif

178: #else

180: #ifdef PETSC_USE_UNROLL_KERNELS
181: #define DOT(sum,x,y,n) {\
182: switch (n & 0x3) {\
183: case 3: sum += *x * conj(*y); x++; y++;\
184: case 2: sum += *x * conj(*y); x++; y++;\
185: case 1: sum += *x * conj(*y); x++; y++;\
186: n -= 4;case 0:break;}\
187: while (n>0) {sum += x[0]*conj(y[0])+x[1]*conj(y[1])+x[2]*conj(y[2])+x[3]*conj(y[3]);x+=4;y+=4;\
188: n -= 4;}}
189: #define DOT2(sum1,sum2,x,y1,y2,n) {\
190: if(n&0x1){sum1+=*x*conj(*y1)++;sum2+=*x++*conj(*y2)++;n--;}\
191: while (n>0) {sum1+=x[0]*conj(y1[0])+x[1]*conj(y1[1]);sum2+=x[0]*conj(y2[0])+x[1]*conj(y2[1]);x+=2;\
192: y1+=2;y2+=2;n -= 2;}}
193: #define SQR(sum,x,n) {\
194: switch (n & 0x3) {\
195: case 3: sum += *x * conj(*x);x++;\
196: case 2: sum += *x * conj(*x);x++;\
197: case 1: sum += *x * conj(*x);x++;\
198: n -= 4;case 0:break;}\
199: while (n>0) {sum += x[0]*conj(x[0])+x[1]*conj(x[1])+x[2]*conj(x[2])+x[3]*conj(x[3]);x+=4;\
200: n -= 4;}}

202: #elif defined(PETSC_USE_WHILE_KERNELS)
203: #define DOT(sum,x,y,n) {
204: while(n--) sum+= *x++ * conj(*y++);}
205: #define DOT2(sum1,sum2,x,y1,y2,n) {\
206: while(n--){sum1+= *x*conj(*y1);sum2+=*x*conj(*y2); x++; y1++; y2++;}}
207: #define SQR(sum,x,n)   {\
208: while(n--) {sum+= *x * conj(*x); x++;}}

210: #else
211: #define DOT(sum,x,y,n) {PetscInt __i;\
212: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(y[__i]);}
213: #define DOT2(sum1,sum2,x,y1,y2,n) {PetscInt __i;\
214: for(__i=0;__i<n;__i++){sum1+=x[__i]*conj(y1[__i]);sum2+=x[__i]*conj(y2[__i]);}}
215: #define SQR(sum,x,n)   {PetscInt __i;\
216: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(x[__i]);}
217: #endif

219: #endif

221: #endif