tbb_machine.h

00001 /*
00002     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00024 #include "tbb_stddef.h"
00025 
00026 #if _WIN32||_WIN64
00027 
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031 
00032 #if __MINGW64__
00033 #include "machine/linux_intel64.h"
00034 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00035 #define __TBB_Yield()  SwitchToThread()
00036 #elif __MINGW32__
00037 #include "machine/linux_ia32.h"
00038 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00039 #define __TBB_Yield()  SwitchToThread()
00040 #elif defined(_M_IX86)
00041 #include "machine/windows_ia32.h"
00042 #elif defined(_M_AMD64) 
00043 #include "machine/windows_intel64.h"
00044 #elif _XBOX 
00045 #include "machine/xbox360_ppc.h"
00046 #endif
00047 
00048 #ifdef _MANAGED
00049 #pragma managed(pop)
00050 #endif
00051 
00052 #elif __linux__ || __FreeBSD__ || __NetBSD__
00053 
00054 #if __i386__
00055 #include "machine/linux_ia32.h"
00056 #elif __x86_64__
00057 #include "machine/linux_intel64.h"
00058 #elif __ia64__
00059 #include "machine/linux_ia64.h"
00060 #elif __powerpc__
00061 #include "machine/mac_ppc.h"
00062 #endif
00063 #include "machine/linux_common.h"
00064 
00065 #elif __APPLE__
00066 
00067 #if __i386__
00068 #include "machine/linux_ia32.h"
00069 #elif __x86_64__
00070 #include "machine/linux_intel64.h"
00071 #elif __POWERPC__
00072 #include "machine/mac_ppc.h"
00073 #endif
00074 #include "machine/macos_common.h"
00075 
00076 #elif _AIX
00077 
00078 #include "machine/ibm_aix51.h"
00079 
00080 #elif __sun || __SUNPRO_CC
00081 
00082 #define __asm__ asm 
00083 #define __volatile__ volatile
00084 
00085 #if __i386  || __i386__
00086 #include "machine/linux_ia32.h"
00087 #elif __x86_64__
00088 #include "machine/linux_intel64.h"
00089 #elif __sparc
00090 #include "machine/sunos_sparc.h"
00091 #endif
00092 #include <sched.h>
00093 
00094 #define __TBB_Yield() sched_yield()
00095 
00096 #endif /* Sun */
00097 
00098 #ifndef __TBB_64BIT_ATOMICS
00099 #define __TBB_64BIT_ATOMICS 1
00100 #endif
00101 
00103 
00115 #if    !defined(__TBB_CompareAndSwap4) \
00116     || !defined(__TBB_CompareAndSwap8) && __TBB_64BIT_ATOMICS \
00117     || !defined(__TBB_Yield)           \
00118     || !defined(__TBB_full_memory_fence)    \
00119     || !defined(__TBB_release_consistency_helper)
00120 #error Minimal requirements for tbb_machine.h not satisfied; platform is not supported.
00121 #endif
00122 
00123 #ifndef __TBB_Pause
00124     inline void __TBB_Pause(int32_t) {
00125         __TBB_Yield();
00126     }
00127 #endif
00128 
00129 namespace tbb {
00130 
00132 inline void atomic_fence () { __TBB_full_memory_fence(); }
00133 
00134 namespace internal {
00135 
00137 
00138 class atomic_backoff : no_copy {
00140 
00142     static const int32_t LOOPS_BEFORE_YIELD = 16;
00143     int32_t count;
00144 public:
00145     atomic_backoff() : count(1) {}
00146 
00148     void pause() {
00149         if( count<=LOOPS_BEFORE_YIELD ) {
00150             __TBB_Pause(count);
00151             // Pause twice as long the next time.
00152             count*=2;
00153         } else {
00154             // Pause is so long that we might as well yield CPU to scheduler.
00155             __TBB_Yield();
00156         }
00157     }
00158 
00159     // pause for a few times and then return false immediately.
00160     bool bounded_pause() {
00161         if( count<=LOOPS_BEFORE_YIELD ) {
00162             __TBB_Pause(count);
00163             // Pause twice as long the next time.
00164             count*=2;
00165             return true;
00166         } else {
00167             return false;
00168         }
00169     }
00170 
00171     void reset() {
00172         count = 1;
00173     }
00174 };
00175 
00177 
00178 template<typename T, typename U>
00179 void spin_wait_while_eq( const volatile T& location, U value ) {
00180     atomic_backoff backoff;
00181     while( location==value ) backoff.pause();
00182 }
00183 
00185 
00186 template<typename T, typename U>
00187 void spin_wait_until_eq( const volatile T& location, const U value ) {
00188     atomic_backoff backoff;
00189     while( location!=value ) backoff.pause();
00190 }
00191 
00192 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00193 // S should be either 1 or 2, for the mask calculation to work correctly.
00194 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
00195 template<size_t S, typename T>
00196 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00197     volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00198 #if __TBB_BIG_ENDIAN
00199     const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00200 #else
00201     const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00202 #endif
00203     const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00204     atomic_backoff b;
00205     uint32_t result;
00206     for(;;) {
00207         result = *base; // reload the base value which might change during the pause
00208         uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00209         uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00210         // __TBB_CompareAndSwap4 presumed to have full fence. 
00211         result = __TBB_CompareAndSwap4( base, new_value, old_value );
00212         if(  result==old_value               // CAS succeeded
00213           || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
00214             break;
00215         else                                 // CAS failed but the bits of interest left unchanged
00216             b.pause();
00217     }
00218     return T((result & mask) >> bitoffset);
00219 }
00220 
00221 template<size_t S, typename T>
00222 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { 
00223     return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00224 }
00225 
00226 template<>
00227 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00228 #ifdef __TBB_CompareAndSwap1
00229     return __TBB_CompareAndSwap1(ptr,value,comparand);
00230 #else
00231     return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00232 #endif
00233 }
00234 
00235 template<>
00236 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00237 #ifdef __TBB_CompareAndSwap2
00238     return __TBB_CompareAndSwap2(ptr,value,comparand);
00239 #else
00240     return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00241 #endif
00242 }
00243 
00244 template<>
00245 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { 
00246     return __TBB_CompareAndSwap4(ptr,value,comparand);
00247 }
00248 
00249 #if __TBB_64BIT_ATOMICS
00250 template<>
00251 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { 
00252     return __TBB_CompareAndSwap8(ptr,value,comparand);
00253 }
00254 #endif
00255 
00256 template<size_t S, typename T>
00257 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00258     atomic_backoff b;
00259     T result;
00260     for(;;) {
00261         result = *reinterpret_cast<volatile T *>(ptr);
00262         // __TBB_CompareAndSwapGeneric presumed to have full fence. 
00263         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 
00264             break;
00265         b.pause();
00266     }
00267     return result;
00268 }
00269 
00270 template<size_t S, typename T>
00271 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00272     atomic_backoff b;
00273     T result;
00274     for(;;) {
00275         result = *reinterpret_cast<volatile T *>(ptr);
00276         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00277         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 
00278             break;
00279         b.pause();
00280     }
00281     return result;
00282 }
00283 
00284 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
00285 // strict as type T.  Type type should have a trivial default constructor and destructor, so that
00286 // arrays of that type can be declared without initializers.  
00287 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00288 // to a type bigger than T.
00289 // The default definition here works on machines where integers are naturally aligned and the
00290 // strictest alignment is 16.
00291 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00292 
00293 #if __GNUC__ || __SUNPRO_CC || __IBMCPP__
00294 struct __TBB_machine_type_with_strictest_alignment {
00295     int member[4];
00296 } __attribute__((aligned(16)));
00297 #elif _MSC_VER
00298 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00299     int member[4];
00300 };
00301 #else
00302 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00303 #endif
00304 
00305 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00306 template<> struct type_with_alignment<1> { char member; };
00307 template<> struct type_with_alignment<2> { uint16_t member; };
00308 template<> struct type_with_alignment<4> { uint32_t member; };
00309 template<> struct type_with_alignment<8> { uint64_t member; };
00310 
00311 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2  
00313 
00315 template<size_t Size, typename T> 
00316 struct work_around_alignment_bug {
00317 #if _MSC_VER
00318     static const size_t alignment = __alignof(T);
00319 #else
00320     static const size_t alignment = __alignof__(T);
00321 #endif
00322 };
00323 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00324 #elif __GNUC__ || __SUNPRO_CC || __IBMCPP__
00325 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00326 #else
00327 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00328 #endif
00329 #endif  /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
00330 
00331 // Template class here is to avoid instantiation of the static data for modules that don't use it
00332 template<typename T>
00333 struct reverse {
00334     static const T byte_table[256];
00335 };
00336 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00337 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00338 template<typename T>
00339 const T reverse<T>::byte_table[256] = {
00340     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00341     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00342     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00343     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00344     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00345     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00346     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00347     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00348     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00349     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00350     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00351     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00352     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00353     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00354     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00355     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00356 };
00357 
00358 } // namespace internal
00359 } // namespace tbb
00360 
00361 #ifndef __TBB_CompareAndSwap1
00362 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00363 #endif
00364 
00365 #ifndef __TBB_CompareAndSwap2 
00366 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00367 #endif
00368 
00369 #ifndef __TBB_CompareAndSwapW
00370 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00371 #endif
00372 
00373 #ifndef __TBB_FetchAndAdd1
00374 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00375 #endif
00376 
00377 #ifndef __TBB_FetchAndAdd2
00378 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00379 #endif
00380 
00381 #ifndef __TBB_FetchAndAdd4
00382 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00383 #endif
00384 
00385 #ifndef __TBB_FetchAndAdd8
00386 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00387 #endif
00388 
00389 #ifndef __TBB_FetchAndAddW
00390 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00391 #endif
00392 
00393 #ifndef __TBB_FetchAndStore1
00394 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00395 #endif
00396 
00397 #ifndef __TBB_FetchAndStore2
00398 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00399 #endif
00400 
00401 #ifndef __TBB_FetchAndStore4
00402 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00403 #endif
00404 
00405 #ifndef __TBB_FetchAndStore8
00406 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00407 #endif
00408 
00409 #ifndef __TBB_FetchAndStoreW
00410 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00411 #endif
00412 
00413 #if __TBB_DECL_FENCED_ATOMICS
00414 
00415 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00416 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00417 #endif 
00418 #ifndef __TBB_CompareAndSwap1acquire
00419 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00420 #endif 
00421 #ifndef __TBB_CompareAndSwap1release
00422 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00423 #endif 
00424 
00425 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00426 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00427 #endif
00428 #ifndef __TBB_CompareAndSwap2acquire
00429 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00430 #endif
00431 #ifndef __TBB_CompareAndSwap2release
00432 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00433 #endif
00434 
00435 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00436 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00437 #endif 
00438 #ifndef __TBB_CompareAndSwap4acquire
00439 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00440 #endif 
00441 #ifndef __TBB_CompareAndSwap4release
00442 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00443 #endif 
00444 
00445 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00446 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00447 #endif
00448 #ifndef __TBB_CompareAndSwap8acquire
00449 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00450 #endif
00451 #ifndef __TBB_CompareAndSwap8release
00452 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00453 #endif
00454 
00455 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00456 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00457 #endif
00458 #ifndef __TBB_FetchAndAdd1acquire
00459 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00460 #endif
00461 #ifndef __TBB_FetchAndAdd1release
00462 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00463 #endif
00464 
00465 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00466 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00467 #endif
00468 #ifndef __TBB_FetchAndAdd2acquire
00469 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00470 #endif
00471 #ifndef __TBB_FetchAndAdd2release
00472 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00473 #endif
00474 
00475 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00476 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00477 #endif
00478 #ifndef __TBB_FetchAndAdd4acquire
00479 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00480 #endif
00481 #ifndef __TBB_FetchAndAdd4release
00482 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00483 #endif
00484 
00485 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00486 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00487 #endif
00488 #ifndef __TBB_FetchAndAdd8acquire
00489 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00490 #endif
00491 #ifndef __TBB_FetchAndAdd8release
00492 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00493 #endif
00494 
00495 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00496 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00497 #endif
00498 #ifndef __TBB_FetchAndStore1acquire
00499 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00500 #endif
00501 #ifndef __TBB_FetchAndStore1release
00502 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00503 #endif
00504 
00505 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00506 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00507 #endif
00508 #ifndef __TBB_FetchAndStore2acquire
00509 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00510 #endif
00511 #ifndef __TBB_FetchAndStore2release
00512 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00513 #endif
00514 
00515 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00516 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00517 #endif
00518 #ifndef __TBB_FetchAndStore4acquire
00519 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00520 #endif
00521 #ifndef __TBB_FetchAndStore4release
00522 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00523 #endif
00524 
00525 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00526 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00527 #endif
00528 #ifndef __TBB_FetchAndStore8acquire
00529 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00530 #endif
00531 #ifndef __TBB_FetchAndStore8release
00532 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00533 #endif
00534 
00535 #endif // __TBB_DECL_FENCED_ATOMICS
00536 
00537 // Special atomic functions
00538 #ifndef __TBB_FetchAndAddWrelease
00539 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00540 #endif
00541 
00542 #ifndef __TBB_FetchAndIncrementWacquire
00543 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00544 #endif
00545 
00546 #ifndef __TBB_FetchAndDecrementWrelease
00547 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00548 #endif
00549 
00550 template <typename T, size_t S>
00551 struct __TBB_machine_load_store {
00552     static inline T load_with_acquire(const volatile T& location) {
00553         T to_return = location;
00554         __TBB_release_consistency_helper();
00555         return to_return;
00556     }
00557 
00558     static inline void store_with_release(volatile T &location, T value) {
00559         __TBB_release_consistency_helper();
00560         location = value;
00561     }
00562 };
00563 
00564 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00565 #if _MSC_VER
00566 using tbb::internal::int64_t;
00567 #endif
00568 // On 32-bit platforms, there should be definition of __TBB_Store8 and __TBB_Load8
00569 #ifndef __TBB_Store8
00570 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00571     for(;;) {
00572         int64_t result = *(int64_t *)ptr;
00573         if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00574     }
00575 }
00576 #endif
00577 
00578 #ifndef __TBB_Load8
00579 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00580     const int64_t anyvalue = 3264; // Could be anything, just the same for comparand and new value
00581     return __TBB_CompareAndSwap8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00582 }
00583 #endif
00584 
00585 template <typename T>
00586 struct __TBB_machine_load_store<T,8> {
00587     static inline T load_with_acquire(const volatile T& location) {
00588         T to_return = (T)__TBB_Load8((const volatile void*)&location);
00589         __TBB_release_consistency_helper();
00590         return to_return;
00591     }
00592 
00593     static inline void store_with_release(volatile T& location, T value) {
00594         __TBB_release_consistency_helper();
00595         __TBB_Store8((volatile void *)&location,(int64_t)value);
00596     }
00597 };
00598 #endif /* __TBB_WORDSIZE==4 */
00599 
00600 #ifndef __TBB_load_with_acquire
00601 template<typename T>
00602 inline T __TBB_load_with_acquire(const volatile T &location) {
00603     return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(location);
00604 }
00605 #endif
00606 
00607 #ifndef __TBB_store_with_release
00608 template<typename T, typename V>
00609 inline void __TBB_store_with_release(volatile T& location, V value) {
00610     __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,T(value));
00611 }
00613 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00614     __TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(location,value);
00615 }
00616 #endif
00617 
00618 #ifndef __TBB_Log2
00619 inline intptr_t __TBB_Log2( uintptr_t x ) {
00620     if( x==0 ) return -1;
00621     intptr_t result = 0;
00622     uintptr_t tmp;
00623 #if __TBB_WORDSIZE>=8
00624     if( (tmp = x>>32) ) { x=tmp; result += 32; }
00625 #endif
00626     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00627     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00628     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00629     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00630     return (x&2)? result+1: result;
00631 }
00632 #endif
00633 
00634 #ifndef __TBB_AtomicOR
00635 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00636     tbb::internal::atomic_backoff b;
00637     for(;;) {
00638         uintptr_t tmp = *(volatile uintptr_t *)operand;
00639         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00640         if( result==tmp ) break;
00641         b.pause();
00642     }
00643 }
00644 #endif
00645 
00646 #ifndef __TBB_AtomicAND
00647 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00648     tbb::internal::atomic_backoff b;
00649     for(;;) {
00650         uintptr_t tmp = *(volatile uintptr_t *)operand;
00651         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00652         if( result==tmp ) break;
00653         b.pause();
00654     }
00655 }
00656 #endif
00657 
00658 #ifndef __TBB_Byte
00659 typedef unsigned char __TBB_Byte;
00660 #endif
00661 
00662 #ifndef __TBB_TryLockByte
00663 inline bool __TBB_TryLockByte( __TBB_Byte &flag ) {
00664     return __TBB_CompareAndSwap1(&flag,1,0)==0;
00665 }
00666 #endif
00667 
00668 #ifndef __TBB_LockByte
00669 inline uintptr_t __TBB_LockByte( __TBB_Byte& flag ) {
00670     if ( !__TBB_TryLockByte(flag) ) {
00671         tbb::internal::atomic_backoff b;
00672         do {
00673             b.pause();
00674         } while ( !__TBB_TryLockByte(flag) );
00675     }
00676     return 0;
00677 }
00678 #endif
00679 
00680 #define __TBB_UnlockByte __TBB_store_with_release
00681 
00682 #ifndef __TBB_ReverseByte
00683 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00684     return tbb::internal::reverse<unsigned char>::byte_table[src];
00685 }
00686 #endif
00687 
00688 template<typename T>
00689 T __TBB_ReverseBits(T src)
00690 {
00691     T dst;
00692     unsigned char *original = (unsigned char *) &src;
00693     unsigned char *reversed = (unsigned char *) &dst;
00694 
00695     for( int i = sizeof(T)-1; i >= 0; i-- )
00696         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00697 
00698     return dst;
00699 }
00700 
00701 #endif /* __TBB_machine_H */

Copyright © 2005-2011 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.