3 #ifndef EVAL_CONTAINER_TRIPLE_INT_H
4 #define EVAL_CONTAINER_TRIPLE_INT_H
11 #if (defined __INTEL_COMPILER || defined __clang__)
12 # include <emmintrin.h>
13 # define __builtin_ia32_pxor128 _mm_xor_si128
14 # define __builtin_ia32_psubd128 _mm_sub_epi32
15 # define __builtin_ia32_paddd128 _mm_add_epi32
19 #if (defined __x86_64__) || (defined __i386__)
21 # define OSL_USE_SSE 1
24 # warning "TripleInt without SSE"
33 typedef CArray<int32_t,4>
v4si;
34 typedef CArray<int64_t,2>
v2di;
35 #elif defined __INTEL_COMPILER
55 assert(reinterpret_cast<size_t>(
this) % 16 == 0);
61 assert(reinterpret_cast<size_t>(
this) % 16 == 0);
64 for(
int i=0;i<3;i++)
v.
iv[i]=si.
v.
iv[i];
69 assert(reinterpret_cast<size_t>(
this) % 16 == 0);
72 v.
iv = (CArray<int,4>){{a, b, c, 0}};
82 for(
int i=0;i<3;i++)
v.
iv[i]=0;
94 ret.
v.
v4=__builtin_ia32_psubd128(ret.
v.
v4,
v.
v4);
96 for(
int i=0;i<3;i++) ret.
v.
iv[i]= -
v.
iv[i];
102 v.
v4=__builtin_ia32_paddd128(
v.
v4,si.
v.
v4);
104 for(
int i=0;i<3;i++)
v.
iv[i]+=si.
v.
iv[i];
110 v.
v4=__builtin_ia32_psubd128(
v.
v4,si.
v.
v4);
112 for(
int i=0;i<3;i++)
v.
iv[i]-=si.
v.
iv[i];
119 unsigned long long scalescale=(
unsigned long long )((
unsigned int)scale);
120 scalescale|=scalescale<<32ull;
121 val.
v2=__builtin_ia32_vec_set_v2di(val.
v2,(
long long)scalescale,0);
122 val.
v2=__builtin_ia32_vec_set_v2di(val.
v2,(
long long)scalescale,1);
123 v.
v4=__builtin_ia32_pmulld128(
v.
v4,val.
v4);
125 for(
int i=0;i<3;i++)
v.
iv[i]*=scale;
131 for(
int i=0;i<3;i++)
v.
iv[i] /= div;
137 v.
v4= __builtin_ia32_psradi128 (
v.
v4, shift);
139 for(
int i=0;i<3;i++)
v.
iv[i] >>= shift;
143 static size_t size() {
return 3; }
172 CArray<TripleInt,2>
v;
212 return l[0] == r[0] && l[1] == r[1];
214 std::ostream&
operator<<(std::ostream& os,TripleInt
const& ti);
216 using container::TripleInt;
217 using container::TripleIntPair;
219 #endif // EVAL_CONTAINER_TRIPLE_INT_H