parallel_reduce.h

00001 /*
00002     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_reduce_H
00022 #define __TBB_parallel_reduce_H
00023 
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include "partitioner.h"
00027 #include "tbb_profiling.h"
00028 #include <new>
00029 
00030 namespace tbb {
00031 
00033 namespace internal {
00035 
00036     typedef char reduction_context;
00037 
00039 
00040     template<typename Body>
00041     class finish_reduce: public task {
00043         Body* my_body;
00044         bool has_right_zombie;
00045         const reduction_context my_context;
00046         aligned_space<Body,1> zombie_space;
00047         finish_reduce( reduction_context context_ ) : 
00048             my_body(NULL),
00049             has_right_zombie(false),
00050             my_context(context_)
00051         {
00052         }
00053         task* execute() {
00054             if( has_right_zombie ) {
00055                 // Right child was stolen.
00056                 Body* s = zombie_space.begin();
00057                 my_body->join( *s );
00058                 s->~Body();
00059             }
00060             if( my_context==1 )  // left child
00061                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
00062             return NULL;
00063         }
00064         template<typename Range,typename Body_, typename Partitioner>
00065         friend class start_reduce;
00066     };
00067 
00069 
00070     template<typename Range, typename Body, typename Partitioner>
00071     class start_reduce: public task {
00072         typedef finish_reduce<Body> finish_type;
00073         Body* my_body;
00074         Range my_range;
00075         typename Partitioner::partition_type my_partition;
00076         reduction_context my_context;
00077         /*override*/ task* execute();
00078         template<typename Body_>
00079         friend class finish_reduce;
00080     
00082         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
00083             my_body(body),
00084             my_range(range),
00085             my_partition(partitioner),
00086             my_context(0)
00087         {
00088         }
00090 
00091         start_reduce( start_reduce& parent_, split ) :
00092             my_body(parent_.my_body),
00093             my_range(parent_.my_range,split()),
00094             my_partition(parent_.my_partition,split()),
00095             my_context(2)
00096         {
00097             my_partition.set_affinity(*this);
00098             parent_.my_context = 1;
00099         }
00101         /*override*/ void note_affinity( affinity_id id ) {
00102             my_partition.note_affinity( id );
00103         }
00104 
00105 public:
00106         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
00107             if( !range.empty() ) {
00108 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00109                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
00110 #else
00111                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00112                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00113                 task_group_context context;
00114                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00115 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00116             }
00117         }
00118 #if __TBB_TASK_GROUP_CONTEXT
00119         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
00120             if( !range.empty() ) 
00121                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00122         }
00123 #endif /* __TBB_TASK_GROUP_CONTEXT */
00124     };
00125 
00126     template<typename Range, typename Body, typename Partitioner>
00127     task* start_reduce<Range,Body,Partitioner>::execute() {
00128         if( my_context==2 ) { // right child
00129             finish_type* p = static_cast<finish_type*>(parent());
00130             if( !itt_load_word_with_acquire(p->my_body) ) {
00131                 my_body = new( p->zombie_space.begin() ) Body(*my_body,split());
00132                 p->has_right_zombie = true;
00133             }
00134         }
00135         if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
00136             (*my_body)( my_range );
00137             if( my_context==1 ) 
00138                 itt_store_word_with_release(static_cast<finish_type*>(parent())->my_body, my_body );
00139             return my_partition.continue_after_execute_range();
00140         } else {
00141             finish_type& c = *new( allocate_continuation()) finish_type(my_context);
00142             recycle_as_child_of(c);
00143             c.set_ref_count(2);    
00144             bool delay = my_partition.decide_whether_to_delay();
00145             start_reduce& b = *new( c.allocate_child() ) start_reduce(*this,split());
00146             my_partition.spawn_or_delay(delay,b);
00147             return this;
00148         }
00149     }
00150 
00152 
00156     template<typename Range, typename Value, typename RealBody, typename Reduction>
00157     class lambda_reduce_body {
00158 
00159 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
00160 //       (might require some performance measurements)
00161 
00162         const Value&     identity_element;
00163         const RealBody&  my_real_body;
00164         const Reduction& my_reduction;
00165         Value            my_value;
00166         lambda_reduce_body& operator= ( const lambda_reduce_body& other );
00167     public:
00168         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
00169             : identity_element(identity)
00170             , my_real_body(body)
00171             , my_reduction(reduction)
00172             , my_value(identity)
00173         { }
00174         lambda_reduce_body( const lambda_reduce_body& other )
00175             : identity_element(other.identity_element)
00176             , my_real_body(other.my_real_body)
00177             , my_reduction(other.my_reduction)
00178             , my_value(other.my_value)
00179         { }
00180         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
00181             : identity_element(other.identity_element)
00182             , my_real_body(other.my_real_body)
00183             , my_reduction(other.my_reduction)
00184             , my_value(other.identity_element)
00185         { }
00186         void operator()(Range& range) {
00187             my_value = my_real_body(range, const_cast<const Value&>(my_value));
00188         }
00189         void join( lambda_reduce_body& rhs ) {
00190             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
00191         }
00192         Value result() const {
00193             return my_value;
00194         }
00195     };
00196 
00197 } // namespace internal
00199 
00200 // Requirements on Range concept are documented in blocked_range.h
00201 
00220 
00222 
00223 template<typename Range, typename Body>
00224 void parallel_reduce( const Range& range, Body& body ) {
00225     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
00226 }
00227 
00229 
00230 template<typename Range, typename Body>
00231 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00232     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
00233 }
00234 
00236 
00237 template<typename Range, typename Body>
00238 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00239     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
00240 }
00241 
00243 
00244 template<typename Range, typename Body>
00245 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
00246     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
00247 }
00248 
00249 #if __TBB_TASK_GROUP_CONTEXT
00251 
00252 template<typename Range, typename Body>
00253 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
00254     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
00255 }
00256 
00258 
00259 template<typename Range, typename Body>
00260 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
00261     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
00262 }
00263 
00265 
00266 template<typename Range, typename Body>
00267 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
00268     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
00269 }
00270 #endif /* __TBB_TASK_GROUP_CONTEXT */
00271 
00275 
00276 
00277 template<typename Range, typename Value, typename RealBody, typename Reduction>
00278 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00279     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00280     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
00281                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
00282     return body.result();
00283 }
00284 
00286 
00287 template<typename Range, typename Value, typename RealBody, typename Reduction>
00288 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00289                        const simple_partitioner& partitioner ) {
00290     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00291     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00292                           ::run(range, body, partitioner );
00293     return body.result();
00294 }
00295 
00297 
00298 template<typename Range, typename Value, typename RealBody, typename Reduction>
00299 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00300                        const auto_partitioner& partitioner ) {
00301     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00302     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00303                           ::run( range, body, partitioner );
00304     return body.result();
00305 }
00306 
00308 
00309 template<typename Range, typename Value, typename RealBody, typename Reduction>
00310 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00311                        affinity_partitioner& partitioner ) {
00312     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00313     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00314                                         ::run( range, body, partitioner );
00315     return body.result();
00316 }
00317 
00318 #if __TBB_TASK_GROUP_CONTEXT
00320 
00321 template<typename Range, typename Value, typename RealBody, typename Reduction>
00322 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00323                        const simple_partitioner& partitioner, task_group_context& context ) {
00324     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00325     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00326                           ::run( range, body, partitioner, context );
00327     return body.result();
00328 }
00329 
00331 
00332 template<typename Range, typename Value, typename RealBody, typename Reduction>
00333 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00334                        const auto_partitioner& partitioner, task_group_context& context ) {
00335     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00336     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00337                           ::run( range, body, partitioner, context );
00338     return body.result();
00339 }
00340 
00342 
00343 template<typename Range, typename Value, typename RealBody, typename Reduction>
00344 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00345                        affinity_partitioner& partitioner, task_group_context& context ) {
00346     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00347     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00348                                         ::run( range, body, partitioner, context );
00349     return body.result();
00350 }
00351 #endif /* __TBB_TASK_GROUP_CONTEXT */
00352 
00353 
00354 } // namespace tbb
00355 
00356 #endif /* __TBB_parallel_reduce_H */
00357 

Copyright © 2005-2011 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.