parallel_reduce.h

00001 /*
00002     Copyright 2005-2009 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_reduce_H
00022 #define __TBB_parallel_reduce_H
00023 
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include "partitioner.h"
00027 #include <new>
00028 
00029 namespace tbb {
00030 
00032 namespace internal {
00033 
00035     void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3( void* dst, void* src );
00036 
00038     void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3( const void* src );
00039 
00040     template<typename T> inline void parallel_reduce_store_body( T*& dst, T* src ) {
00041 #if TBB_USE_THREADING_TOOLS
00042         itt_store_pointer_with_release_v3(&dst,src);
00043 #else
00044         __TBB_store_with_release(dst,src);
00045 #endif /* TBB_USE_THREADING_TOOLS */
00046     }
00047 
00048     template<typename T> inline T* parallel_reduce_load_body( T*& src ) {
00049 #if TBB_USE_THREADING_TOOLS
00050         return static_cast<T*>(itt_load_pointer_with_acquire_v3(&src));
00051 #else
00052         return __TBB_load_with_acquire(src);
00053 #endif /* TBB_USE_THREADING_TOOLS */
00054     }
00055 
00057 
00058     typedef char reduction_context;
00059 
00061 
00062     template<typename Body>
00063     class finish_reduce: public task {
00065         Body* my_body;
00066         bool has_right_zombie;
00067         const reduction_context my_context;
00068         aligned_space<Body,1> zombie_space;
00069         finish_reduce( char context ) : 
00070             my_body(NULL),
00071             has_right_zombie(false),
00072             my_context(context)
00073         {
00074         }
00075         task* execute() {
00076             if( has_right_zombie ) {
00077                 // Right child was stolen.
00078                 Body* s = zombie_space.begin();
00079                 my_body->join( *s );
00080                 s->~Body();
00081             }
00082             if( my_context==1 ) 
00083                 parallel_reduce_store_body( static_cast<finish_reduce*>(parent())->my_body, my_body );
00084             return NULL;
00085         }       
00086         template<typename Range,typename Body_, typename Partitioner>
00087         friend class start_reduce;
00088     };
00089 
00091 
00092     template<typename Range, typename Body, typename Partitioner>
00093     class start_reduce: public task {
00094         typedef finish_reduce<Body> finish_type;
00095         Body* my_body;
00096         Range my_range;
00097         typename Partitioner::partition_type my_partition;
00098         reduction_context my_context;
00099         /*override*/ task* execute();
00100         template<typename Body_>
00101         friend class finish_reduce;
00102     
00104         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
00105             my_body(body),
00106             my_range(range),
00107             my_partition(partitioner),
00108             my_context(0)
00109         {
00110         }
00112 
00113         start_reduce( start_reduce& parent, split ) :
00114             my_body(parent.my_body),
00115             my_range(parent.my_range,split()),
00116             my_partition(parent.my_partition,split()),
00117             my_context(2)
00118         {
00119             my_partition.set_affinity(*this);
00120             parent.my_context = 1;
00121         }
00123         /*override*/ void note_affinity( affinity_id id ) {
00124             my_partition.note_affinity( id );
00125         }
00126 
00127 public:
00128         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
00129             if( !range.empty() ) {
00130 #if !__TBB_EXCEPTIONS || TBB_JOIN_OUTER_TASK_GROUP
00131                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
00132 #else
00133                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00134                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00135                 task_group_context context;
00136                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00137 #endif /* __TBB_EXCEPTIONS && !TBB_JOIN_OUTER_TASK_GROUP */
00138             }
00139         }
00140 #if __TBB_EXCEPTIONS
00141         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
00142             if( !range.empty() ) 
00143                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00144         }
00145 #endif /* __TBB_EXCEPTIONS */
00146     };
00147 
00148     template<typename Range, typename Body, typename Partitioner>
00149     task* start_reduce<Range,Body,Partitioner>::execute() {
00150         if( my_context==2 ) {
00151             finish_type* p = static_cast<finish_type*>(parent() );
00152             if( !parallel_reduce_load_body(p->my_body) ) {
00153                 my_body = new( p->zombie_space.begin() ) Body(*my_body,split());
00154                 p->has_right_zombie = true;
00155             } 
00156         }
00157         if( !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
00158             (*my_body)( my_range );
00159             if( my_context==1 ) 
00160                 parallel_reduce_store_body(static_cast<finish_type*>(parent())->my_body, my_body );
00161             return my_partition.continue_after_execute_range(*this);
00162         } else {
00163             finish_type& c = *new( allocate_continuation()) finish_type(my_context);
00164             recycle_as_child_of(c);
00165             c.set_ref_count(2);    
00166             bool delay = my_partition.decide_whether_to_delay();
00167             start_reduce& b = *new( c.allocate_child() ) start_reduce(*this,split());
00168             my_partition.spawn_or_delay(delay,*this,b);
00169             return this;
00170         }
00171     } 
00172 
00174 
00178     template<typename Range, typename Value, typename RealBody, typename Reduction>
00179     class lambda_reduce_body {
00180 
00181 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
00182 //       (might require some performance measurements)
00183 
00184         const Value&     identity_element;
00185         const RealBody&  my_real_body;
00186         const Reduction& my_reduction;
00187         Value            my_value;
00188     public:
00189         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
00190             : identity_element(identity)
00191             , my_real_body(body)
00192             , my_reduction(reduction)
00193             , my_value(identity)
00194         { }
00195         lambda_reduce_body( const lambda_reduce_body& other )
00196             : identity_element(other.identity_element)
00197             , my_real_body(other.my_real_body)
00198             , my_reduction(other.my_reduction)
00199             , my_value(other.my_value)
00200         { }
00201         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
00202             : identity_element(other.identity_element)
00203             , my_real_body(other.my_real_body)
00204             , my_reduction(other.my_reduction)
00205             , my_value(other.identity_element)
00206         { }
00207         void operator()(Range& range) {
00208             my_value = my_real_body(range, const_cast<const Value&>(my_value));
00209         }
00210         void join( lambda_reduce_body& rhs ) {
00211             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
00212         }
00213         Value result() const {
00214             return my_value;
00215         }
00216     };
00217 
00218 } // namespace internal
00220 
00221 // Requirements on Range concept are documented in blocked_range.h
00222 
00241 
00243 
00244 template<typename Range, typename Body>
00245 void parallel_reduce( const Range& range, Body& body ) {
00246     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
00247 }
00248 
00250 
00251 template<typename Range, typename Body>
00252 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00253     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
00254 }
00255 
00257 
00258 template<typename Range, typename Body>
00259 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00260     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
00261 }
00262 
00264 
00265 template<typename Range, typename Body>
00266 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
00267     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
00268 }
00269 
00270 #if __TBB_EXCEPTIONS
00272 
00273 template<typename Range, typename Body>
00274 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
00275     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
00276 }
00277 
00279 
00280 template<typename Range, typename Body>
00281 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
00282     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
00283 }
00284 
00286 
00287 template<typename Range, typename Body>
00288 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
00289     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
00290 }
00291 #endif /* __TBB_EXCEPTIONS */
00292 
00296 
00297 
00298 template<typename Range, typename Value, typename RealBody, typename Reduction>
00299 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00300     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00301     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,__TBB_DEFAULT_PARTITIONER>
00302                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
00303     return body.result();
00304 }
00305 
00307 
00308 template<typename Range, typename Value, typename RealBody, typename Reduction>
00309 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00310                        const simple_partitioner& partitioner ) {
00311     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00312     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00313                           ::run(range, body, partitioner );
00314     return body.result();
00315 }
00316 
00318 
00319 template<typename Range, typename Value, typename RealBody, typename Reduction>
00320 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00321                        const auto_partitioner& partitioner ) {
00322     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00323     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00324                           ::run( range, body, partitioner );
00325     return body.result();
00326 }
00327 
00329 
00330 template<typename Range, typename Value, typename RealBody, typename Reduction>
00331 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00332                        affinity_partitioner& partitioner ) {
00333     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00334     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00335                                         ::run( range, body, partitioner );
00336     return body.result();
00337 }
00338 
00339 #if __TBB_EXCEPTIONS
00341 
00342 template<typename Range, typename Value, typename RealBody, typename Reduction>
00343 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00344                        const simple_partitioner& partitioner, task_group_context& context ) {
00345     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00346     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00347                           ::run( range, body, partitioner, context );
00348     return body.result();
00349 }
00350 
00352 
00353 template<typename Range, typename Value, typename RealBody, typename Reduction>
00354 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00355                        const auto_partitioner& partitioner, task_group_context& context ) {
00356     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00357     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00358                           ::run( range, body, partitioner, context );
00359     return body.result();
00360 }
00361 
00363 
00364 template<typename Range, typename Value, typename RealBody, typename Reduction>
00365 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00366                        affinity_partitioner& partitioner, task_group_context& context ) {
00367     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00368     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00369                                         ::run( range, body, partitioner, context );
00370     return body.result();
00371 }
00372 #endif /* __TBB_EXCEPTIONS */
00373 
00374 
00375 } // namespace tbb
00376 
00377 #endif /* __TBB_parallel_reduce_H */
00378 

Copyright © 2005-2009 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.