00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef __TBB_parallel_scan_H
00030 #define __TBB_parallel_scan_H
00031
00032 #include "task.h"
00033 #include "aligned_space.h"
00034 #include <new>
00035 #include "partitioner.h"
00036
00037 namespace tbb {
00038
00040
00041 struct pre_scan_tag {
00042 static bool is_final_scan() {return false;}
00043 };
00044
00046
00047 struct final_scan_tag {
00048 static bool is_final_scan() {return true;}
00049 };
00050
00052 namespace internal {
00053
00055
00056 template<typename Range, typename Body>
00057 class final_sum: public task {
00058 public:
00059 Body my_body;
00060 private:
00061 aligned_space<Range,1> my_range;
00063 Body* my_stuff_last;
00064 public:
00065 final_sum( Body& body_ ) :
00066 my_body(body_,split())
00067 {
00068 poison_pointer(my_stuff_last);
00069 }
00070 ~final_sum() {
00071 my_range.begin()->~Range();
00072 }
00073 void finish_construction( const Range& range_, Body* stuff_last_ ) {
00074 new( my_range.begin() ) Range(range_);
00075 my_stuff_last = stuff_last_;
00076 }
00077 private:
00078 task* execute() {
00079 my_body( *my_range.begin(), final_scan_tag() );
00080 if( my_stuff_last )
00081 my_stuff_last->assign(my_body);
00082 return NULL;
00083 }
00084 };
00085
00087
00088 template<typename Range, typename Body>
00089 class sum_node: public task {
00090 typedef final_sum<Range,Body> final_sum_type;
00091 public:
00092 final_sum_type *my_incoming;
00093 final_sum_type *my_body;
00094 Body *my_stuff_last;
00095 private:
00096 final_sum_type *my_left_sum;
00097 sum_node *my_left;
00098 sum_node *my_right;
00099 bool my_left_is_final;
00100 Range my_range;
00101 sum_node( const Range range_, bool left_is_final_ ) :
00102 my_left_sum(NULL),
00103 my_left(NULL),
00104 my_right(NULL),
00105 my_left_is_final(left_is_final_),
00106 my_range(range_)
00107 {
00108
00109 poison_pointer(my_body);
00110 poison_pointer(my_incoming);
00111 }
00112 task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
00113 if( !n ) {
00114 f.recycle_as_child_of( *this );
00115 f.finish_construction( range_, stuff_last_ );
00116 return &f;
00117 } else {
00118 n->my_body = &f;
00119 n->my_incoming = incoming_;
00120 n->my_stuff_last = stuff_last_;
00121 return n;
00122 }
00123 }
00124 task* execute() {
00125 if( my_body ) {
00126 if( my_incoming )
00127 my_left_sum->my_body.reverse_join( my_incoming->my_body );
00128 recycle_as_continuation();
00129 sum_node& c = *this;
00130 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
00131 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
00132 set_ref_count( (a!=NULL)+(b!=NULL) );
00133 my_body = NULL;
00134 if( a ) spawn(*b);
00135 else a = b;
00136 return a;
00137 } else {
00138 return NULL;
00139 }
00140 }
00141 template<typename Range_,typename Body_,typename Partitioner_>
00142 friend class start_scan;
00143
00144 template<typename Range_,typename Body_>
00145 friend class finish_scan;
00146 };
00147
00149
00150 template<typename Range, typename Body>
00151 class finish_scan: public task {
00152 typedef sum_node<Range,Body> sum_node_type;
00153 typedef final_sum<Range,Body> final_sum_type;
00154 final_sum_type** const my_sum;
00155 sum_node_type*& my_return_slot;
00156 public:
00157 final_sum_type* my_right_zombie;
00158 sum_node_type& my_result;
00159
00160 task* execute() {
00161 __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
00162 if( my_result.my_left )
00163 my_result.my_left_is_final = false;
00164 if( my_right_zombie && my_sum )
00165 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
00166 __TBB_ASSERT( !my_return_slot, NULL );
00167 if( my_right_zombie || my_result.my_right ) {
00168 my_return_slot = &my_result;
00169 } else {
00170 destroy( my_result );
00171 }
00172 if( my_right_zombie && !my_sum && !my_result.my_right ) {
00173 destroy(*my_right_zombie);
00174 my_right_zombie = NULL;
00175 }
00176 return NULL;
00177 }
00178
00179 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
00180 my_sum(sum_),
00181 my_return_slot(return_slot_),
00182 my_right_zombie(NULL),
00183 my_result(result_)
00184 {
00185 __TBB_ASSERT( !my_return_slot, NULL );
00186 }
00187 };
00188
00190
00191 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00192 class start_scan: public task {
00193 typedef sum_node<Range,Body> sum_node_type;
00194 typedef final_sum<Range,Body> final_sum_type;
00195 final_sum_type* my_body;
00197 final_sum_type** my_sum;
00198 sum_node_type** my_return_slot;
00200 sum_node_type* my_parent_sum;
00201 bool my_is_final;
00202 bool my_is_right_child;
00203 Range my_range;
00204 typename Partitioner::partition_type my_partition;
00205 task* execute();
00206 public:
00207 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
00208 my_body(parent_.my_body),
00209 my_sum(parent_.my_sum),
00210 my_return_slot(&return_slot_),
00211 my_parent_sum(parent_sum_),
00212 my_is_final(parent_.my_is_final),
00213 my_is_right_child(false),
00214 my_range(parent_.my_range,split()),
00215 my_partition(parent_.my_partition,split())
00216 {
00217 __TBB_ASSERT( !*my_return_slot, NULL );
00218 }
00219
00220 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
00221 my_body(&body_),
00222 my_sum(NULL),
00223 my_return_slot(&return_slot_),
00224 my_parent_sum(NULL),
00225 my_is_final(true),
00226 my_is_right_child(false),
00227 my_range(range_),
00228 my_partition(partitioner_)
00229 {
00230 __TBB_ASSERT( !*my_return_slot, NULL );
00231 }
00232
00233 static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
00234 if( !range_.empty() ) {
00235 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00236 internal::sum_node<Range,Body>* root = NULL;
00237 typedef internal::final_sum<Range,Body> final_sum_type;
00238 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
00239 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00240 root,
00241 range_,
00242 *temp_body,
00243 partitioner_ );
00244 task::spawn_root_and_wait( pass1 );
00245 if( root ) {
00246 root->my_body = temp_body;
00247 root->my_incoming = NULL;
00248 root->my_stuff_last = &body_;
00249 task::spawn_root_and_wait( *root );
00250 } else {
00251 body_.assign(temp_body->my_body);
00252 temp_body->finish_construction( range_, NULL );
00253 temp_body->destroy(*temp_body);
00254 }
00255 }
00256 }
00257 };
00258
00259 template<typename Range, typename Body, typename Partitioner>
00260 task* start_scan<Range,Body,Partitioner>::execute() {
00261 typedef internal::finish_scan<Range,Body> finish_pass1_type;
00262 finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00263
00264
00265
00266 bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
00267 if( treat_as_stolen ) {
00268
00269 p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
00270 my_is_final = false;
00271 }
00272 task* next_task = NULL;
00273 if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
00274 if( my_is_final )
00275 (my_body->my_body)( my_range, final_scan_tag() );
00276 else if( my_sum )
00277 (my_body->my_body)( my_range, pre_scan_tag() );
00278 if( my_sum )
00279 *my_sum = my_body;
00280 __TBB_ASSERT( !*my_return_slot, NULL );
00281 } else {
00282 sum_node_type* result;
00283 if( my_parent_sum )
00284 result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,my_is_final);
00285 else
00286 result = new(task::allocate_root()) sum_node_type(my_range,my_is_final);
00287 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
00288
00289 start_scan& b = *new( c.allocate_child() ) start_scan( result->my_right, *this, result );
00290 b.my_is_right_child = true;
00291
00292
00293
00294 recycle_as_child_of(c);
00295 c.set_ref_count(2);
00296 c.spawn(b);
00297 my_sum = &result->my_left_sum;
00298 my_return_slot = &result->my_left;
00299 my_is_right_child = false;
00300 next_task = this;
00301 my_parent_sum = result;
00302 __TBB_ASSERT( !*my_return_slot, NULL );
00303 }
00304 return next_task;
00305 }
00306 }
00308
00309
00310
00328
00330
00331 template<typename Range, typename Body>
00332 void parallel_scan( const Range& range, Body& body ) {
00333 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
00334 }
00335
00337
00338 template<typename Range, typename Body>
00339 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00340 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
00341 }
00342
00344
00345 template<typename Range, typename Body>
00346 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00347 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
00348 }
00350
00351 }
00352
00353 #endif
00354