tbb_machine.h

00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00024 #include "tbb_stddef.h"
00025 
00026 #if _WIN32||_WIN64
00027 
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031 
00032 #if __MINGW32__
00033 #include "machine/linux_ia32.h"
00034 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00035 #define __TBB_Yield()  SwitchToThread()
00036 #elif defined(_M_IX86)
00037 #include "machine/windows_ia32.h"
00038 #elif defined(_M_AMD64) 
00039 #include "machine/windows_intel64.h"
00040 #elif _XBOX 
00041 #include "machine/xbox360_ppc.h"
00042 #else
00043 #error Unsupported platform
00044 #endif
00045 
00046 #ifdef _MANAGED
00047 #pragma managed(pop)
00048 #endif
00049 
00050 #elif __linux__ || __FreeBSD__
00051 
00052 #if __i386__
00053 #include "machine/linux_ia32.h"
00054 #elif __x86_64__
00055 #include "machine/linux_intel64.h"
00056 #elif __ia64__
00057 #include "machine/linux_ia64.h"
00058 #endif
00059 
00060 #elif __APPLE__
00061 
00062 #if __i386__
00063 #include "machine/linux_ia32.h"
00064 #elif __x86_64__
00065 #include "machine/linux_intel64.h"
00066 #elif __POWERPC__
00067 #include "machine/mac_ppc.h"
00068 #endif
00069 
00070 #elif _AIX
00071 
00072 #include "machine/ibm_aix51.h"
00073 
00074 #elif __sun || __SUNPRO_CC
00075 
00076 #define __asm__ asm 
00077 #define __volatile__ volatile
00078 #if __i386  || __i386__
00079 #include "machine/linux_ia32.h"
00080 #elif __x86_64__
00081 #include "machine/linux_intel64.h"
00082 #elif __sparc
00083 #include "machine/sunos_sparc.h"
00084 #endif
00085 
00086 #endif
00087 
00089 
00101 #if    !defined(__TBB_CompareAndSwap4) \
00102     || !defined(__TBB_CompareAndSwap8) \
00103     || !defined(__TBB_Yield)           \
00104     || !defined(__TBB_full_memory_fence)    \
00105     || !defined(__TBB_release_consistency_helper)
00106 #error Minimal requirements for tbb_machine.h not satisfied 
00107 #endif
00108 
00109 #ifndef __TBB_load_with_acquire
00111     template<typename T>
00112     inline T __TBB_load_with_acquire(const volatile T& location) {
00113         T temp = location;
00114         __TBB_release_consistency_helper();
00115         return temp;
00116     }
00117 #endif
00118 
00119 #ifndef __TBB_store_with_release
00121     template<typename T, typename V>
00122     inline void __TBB_store_with_release(volatile T& location, V value) {
00123         __TBB_release_consistency_helper();
00124         location = T(value); 
00125     }
00126 #endif
00127 
00128 #ifndef __TBB_Pause
00129     inline void __TBB_Pause(int32_t) {
00130         __TBB_Yield();
00131     }
00132 #endif
00133 
00134 namespace tbb {
00135 namespace internal {
00136 
00138 
00139 class atomic_backoff {
00141 
00143     static const int32_t LOOPS_BEFORE_YIELD = 16;
00144     int32_t count;
00145 public:
00146     atomic_backoff() : count(1) {}
00147 
00149     void pause() {
00150         if( count<=LOOPS_BEFORE_YIELD ) {
00151             __TBB_Pause(count);
00152             // Pause twice as long the next time.
00153             count*=2;
00154         } else {
00155             // Pause is so long that we might as well yield CPU to scheduler.
00156             __TBB_Yield();
00157         }
00158     }
00159 
00160     // pause for a few times and then return false immediately.
00161     bool bounded_pause() {
00162         if( count<=LOOPS_BEFORE_YIELD ) {
00163             __TBB_Pause(count);
00164             // Pause twice as long the next time.
00165             count*=2;
00166             return true;
00167         } else {
00168             return false;
00169         }
00170     }
00171 
00172     void reset() {
00173         count = 1;
00174     }
00175 };
00176 
00178 
00179 template<typename T, typename U>
00180 void spin_wait_while_eq( const volatile T& location, U value ) {
00181     atomic_backoff backoff;
00182     while( location==value ) backoff.pause();
00183 }
00184 
00186 
00187 template<typename T, typename U>
00188 void spin_wait_until_eq( const volatile T& location, const U value ) {
00189     atomic_backoff backoff;
00190     while( location!=value ) backoff.pause();
00191 }
00192 
00193 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00194 // S should be either 1 or 2, for the mask calculation to work correctly.
00195 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
00196 template<size_t S, typename T>
00197 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00198     volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00199 #if __TBB_BIG_ENDIAN
00200     const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00201 #else
00202     const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00203 #endif
00204     const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00205     atomic_backoff b;
00206     uint32_t result;
00207     for(;;) {
00208         result = *base; // reload the base value which might change during the pause
00209         uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00210         uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00211         // __TBB_CompareAndSwap4 presumed to have full fence. 
00212         result = __TBB_CompareAndSwap4( base, new_value, old_value );
00213         if(  result==old_value               // CAS succeeded
00214           || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
00215             break;
00216         else                                 // CAS failed but the bits of interest left unchanged
00217             b.pause();
00218     }
00219     return T((result & mask) >> bitoffset);
00220 }
00221 
00222 template<size_t S, typename T>
00223 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { 
00224     return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00225 }
00226 
00227 template<>
00228 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00229 #ifdef __TBB_CompareAndSwap1
00230     return __TBB_CompareAndSwap1(ptr,value,comparand);
00231 #else
00232     return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00233 #endif
00234 }
00235 
00236 template<>
00237 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00238 #ifdef __TBB_CompareAndSwap2
00239     return __TBB_CompareAndSwap2(ptr,value,comparand);
00240 #else
00241     return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00242 #endif
00243 }
00244 
00245 template<>
00246 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { 
00247     return __TBB_CompareAndSwap4(ptr,value,comparand);
00248 }
00249 
00250 template<>
00251 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { 
00252     return __TBB_CompareAndSwap8(ptr,value,comparand);
00253 }
00254 
00255 template<size_t S, typename T>
00256 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00257     atomic_backoff b;
00258     T result;
00259     for(;;) {
00260         result = *reinterpret_cast<volatile T *>(ptr);
00261         // __TBB_CompareAndSwapGeneric presumed to have full fence. 
00262         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 
00263             break;
00264         b.pause();
00265     }
00266     return result;
00267 }
00268 
00269 template<size_t S, typename T>
00270 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00271     atomic_backoff b;
00272     T result;
00273     for(;;) {
00274         result = *reinterpret_cast<volatile T *>(ptr);
00275         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00276         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 
00277             break;
00278         b.pause();
00279     }
00280     return result;
00281 }
00282 
00283 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
00284 // strict as type T.  Type type should have a trivial default constructor and destructor, so that
00285 // arrays of that type can be declared without initializers.  
00286 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00287 // to a type bigger than T.
00288 // The default definition here works on machines where integers are naturally aligned and the
00289 // strictest alignment is 16.
00290 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00291 
00292 #if __GNUC__ || __SUNPRO_CC
00293 struct __TBB_machine_type_with_strictest_alignment {
00294     int member[4];
00295 } __attribute__((aligned(16)));
00296 #elif _MSC_VER
00297 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00298     int member[4];
00299 };
00300 #else
00301 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00302 #endif
00303 
00304 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00305 template<> struct type_with_alignment<1> { char member; };
00306 template<> struct type_with_alignment<2> { uint16_t member; };
00307 template<> struct type_with_alignment<4> { uint32_t member; };
00308 template<> struct type_with_alignment<8> { uint64_t member; };
00309 
00310 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2  
00312 
00314 template<size_t Size, typename T> 
00315 struct work_around_alignment_bug {
00316 #if _MSC_VER
00317     static const size_t alignment = __alignof(T);
00318 #else
00319     static const size_t alignment = __alignof__(T);
00320 #endif
00321 };
00322 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00323 #elif __GNUC__ || __SUNPRO_CC
00324 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00325 #else
00326 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00327 #endif
00328 #endif  /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
00329 
00330 // Template class here is to avoid instantiation of the static data for modules that don't use it
00331 template<typename T>
00332 struct reverse {
00333     static const T byte_table[256];
00334 };
00335 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00336 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00337 template<typename T>
00338 const T reverse<T>::byte_table[256] = {
00339     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00340     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00341     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00342     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00343     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00344     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00345     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00346     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00347     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00348     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00349     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00350     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00351     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00352     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00353     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00354     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00355 };
00356 
00357 } // namespace internal
00358 } // namespace tbb
00359 
00360 #ifndef __TBB_CompareAndSwap1
00361 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00362 #endif
00363 
00364 #ifndef __TBB_CompareAndSwap2 
00365 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00366 #endif
00367 
00368 #ifndef __TBB_CompareAndSwapW
00369 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00370 #endif
00371 
00372 #ifndef __TBB_FetchAndAdd1
00373 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00374 #endif
00375 
00376 #ifndef __TBB_FetchAndAdd2
00377 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00378 #endif
00379 
00380 #ifndef __TBB_FetchAndAdd4
00381 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00382 #endif
00383 
00384 #ifndef __TBB_FetchAndAdd8
00385 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00386 #endif
00387 
00388 #ifndef __TBB_FetchAndAddW
00389 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00390 #endif
00391 
00392 #ifndef __TBB_FetchAndStore1
00393 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00394 #endif
00395 
00396 #ifndef __TBB_FetchAndStore2
00397 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00398 #endif
00399 
00400 #ifndef __TBB_FetchAndStore4
00401 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00402 #endif
00403 
00404 #ifndef __TBB_FetchAndStore8
00405 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00406 #endif
00407 
00408 #ifndef __TBB_FetchAndStoreW
00409 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00410 #endif
00411 
00412 #if __TBB_DECL_FENCED_ATOMICS
00413 
00414 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00415 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00416 #endif 
00417 #ifndef __TBB_CompareAndSwap1acquire
00418 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00419 #endif 
00420 #ifndef __TBB_CompareAndSwap1release
00421 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00422 #endif 
00423 
00424 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00425 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00426 #endif
00427 #ifndef __TBB_CompareAndSwap2acquire
00428 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00429 #endif
00430 #ifndef __TBB_CompareAndSwap2release
00431 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00432 #endif
00433 
00434 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00435 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00436 #endif 
00437 #ifndef __TBB_CompareAndSwap4acquire
00438 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00439 #endif 
00440 #ifndef __TBB_CompareAndSwap4release
00441 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00442 #endif 
00443 
00444 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00445 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00446 #endif
00447 #ifndef __TBB_CompareAndSwap8acquire
00448 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00449 #endif
00450 #ifndef __TBB_CompareAndSwap8release
00451 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00452 #endif
00453 
00454 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00455 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00456 #endif
00457 #ifndef __TBB_FetchAndAdd1acquire
00458 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00459 #endif
00460 #ifndef __TBB_FetchAndAdd1release
00461 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00462 #endif
00463 
00464 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00465 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00466 #endif
00467 #ifndef __TBB_FetchAndAdd2acquire
00468 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00469 #endif
00470 #ifndef __TBB_FetchAndAdd2release
00471 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00472 #endif
00473 
00474 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00475 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00476 #endif
00477 #ifndef __TBB_FetchAndAdd4acquire
00478 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00479 #endif
00480 #ifndef __TBB_FetchAndAdd4release
00481 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00482 #endif
00483 
00484 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00485 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00486 #endif
00487 #ifndef __TBB_FetchAndAdd8acquire
00488 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00489 #endif
00490 #ifndef __TBB_FetchAndAdd8release
00491 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00492 #endif
00493 
00494 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00495 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00496 #endif
00497 #ifndef __TBB_FetchAndStore1acquire
00498 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00499 #endif
00500 #ifndef __TBB_FetchAndStore1release
00501 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00502 #endif
00503 
00504 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00505 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00506 #endif
00507 #ifndef __TBB_FetchAndStore2acquire
00508 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00509 #endif
00510 #ifndef __TBB_FetchAndStore2release
00511 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00512 #endif
00513 
00514 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00515 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00516 #endif
00517 #ifndef __TBB_FetchAndStore4acquire
00518 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00519 #endif
00520 #ifndef __TBB_FetchAndStore4release
00521 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00522 #endif
00523 
00524 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00525 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00526 #endif
00527 #ifndef __TBB_FetchAndStore8acquire
00528 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00529 #endif
00530 #ifndef __TBB_FetchAndStore8release
00531 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00532 #endif
00533 
00534 #endif // __TBB_DECL_FENCED_ATOMICS
00535 
00536 // Special atomic functions
00537 #ifndef __TBB_FetchAndAddWrelease
00538 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00539 #endif
00540 
00541 #ifndef __TBB_FetchAndIncrementWacquire
00542 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00543 #endif
00544 
00545 #ifndef __TBB_FetchAndDecrementWrelease
00546 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00547 #endif
00548 
00549 #if __TBB_WORDSIZE==4
00550 // On 32-bit platforms, "atomic.h" requires definition of __TBB_Store8 and __TBB_Load8
00551 #ifndef __TBB_Store8
00552 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00553     tbb::internal::atomic_backoff b;
00554     for(;;) {
00555         int64_t result = *(int64_t *)ptr;
00556         if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00557         b.pause();
00558     }
00559 }
00560 #endif
00561 
00562 #ifndef __TBB_Load8
00563 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00564     int64_t result = *(int64_t *)ptr;
00565     result = __TBB_CompareAndSwap8((volatile void *)ptr,result,result);
00566     return result;
00567 }
00568 #endif
00569 #endif /* __TBB_WORDSIZE==4 */
00570 
00571 #ifndef __TBB_Log2
00572 inline intptr_t __TBB_Log2( uintptr_t x ) {
00573     if( x==0 ) return -1;
00574     intptr_t result = 0;
00575     uintptr_t tmp;
00576 #if __TBB_WORDSIZE>=8
00577     if( (tmp = x>>32) ) { x=tmp; result += 32; }
00578 #endif
00579     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00580     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00581     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00582     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00583     return (x&2)? result+1: result;
00584 }
00585 #endif
00586 
00587 #ifndef __TBB_AtomicOR
00588 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00589     tbb::internal::atomic_backoff b;
00590     for(;;) {
00591         uintptr_t tmp = *(volatile uintptr_t *)operand;
00592         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00593         if( result==tmp ) break;
00594         b.pause();
00595     }
00596 }
00597 #endif
00598 
00599 #ifndef __TBB_AtomicAND
00600 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00601     tbb::internal::atomic_backoff b;
00602     for(;;) {
00603         uintptr_t tmp = *(volatile uintptr_t *)operand;
00604         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00605         if( result==tmp ) break;
00606         b.pause();
00607     }
00608 }
00609 #endif
00610 
00611 #ifndef __TBB_TryLockByte
00612 inline bool __TBB_TryLockByte( unsigned char &flag ) {
00613     return __TBB_CompareAndSwap1(&flag,1,0)==0;
00614 }
00615 #endif
00616 
00617 #ifndef __TBB_LockByte
00618 inline uintptr_t __TBB_LockByte( unsigned char& flag ) {
00619     if ( !__TBB_TryLockByte(flag) ) {
00620         tbb::internal::atomic_backoff b;
00621         do {
00622             b.pause();
00623         } while ( !__TBB_TryLockByte(flag) );
00624     }
00625     return 0;
00626 }
00627 #endif
00628 
00629 #ifndef __TBB_ReverseByte
00630 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00631     return tbb::internal::reverse<unsigned char>::byte_table[src];
00632 }
00633 #endif
00634 
00635 template<typename T>
00636 T __TBB_ReverseBits(T src)
00637 {
00638     T dst;
00639     unsigned char *original = (unsigned char *) &src;
00640     unsigned char *reversed = (unsigned char *) &dst;
00641 
00642     for( int i = sizeof(T)-1; i >= 0; i-- )
00643         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00644 
00645     return dst;
00646 }
00647 
00648 #endif /* __TBB_machine_H */

Copyright © 2005-2010 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.