00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef __TBB_partitioner_H
00030 #define __TBB_partitioner_H
00031
00032 #ifndef __TBB_INITIAL_CHUNKS
00033 #define __TBB_INITIAL_CHUNKS 2
00034 #endif
00035 #ifndef __TBB_RANGE_POOL_CAPACITY
00036 #define __TBB_RANGE_POOL_CAPACITY 8
00037 #endif
00038 #ifndef __TBB_INIT_DEPTH
00039 #define __TBB_INIT_DEPTH 5
00040 #endif
00041
00042 #include "task.h"
00043 #include "aligned_space.h"
00044 #include "atomic.h"
00045
00046 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00047
00048 #pragma warning (push)
00049 #pragma warning (disable: 4244)
00050 #endif
00051
00052 namespace tbb {
00053
00054 class auto_partitioner;
00055 class simple_partitioner;
00056 class affinity_partitioner;
00057 namespace interface6 {
00058 namespace internal {
00059 class affinity_partition_type;
00060 }
00061 }
00062
00063 namespace internal {
00064 size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
00065
00067 class affinity_partitioner_base_v3: no_copy {
00068 friend class tbb::affinity_partitioner;
00069 friend class tbb::interface6::internal::affinity_partition_type;
00071
00072 affinity_id* my_array;
00074 size_t my_size;
00076 affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
00078 ~affinity_partitioner_base_v3() {resize(0);}
00080
00081 void __TBB_EXPORTED_METHOD resize( unsigned factor );
00082 };
00083
00085 class partition_type_base {
00086 public:
00087 void set_affinity( task & ) {}
00088 void note_affinity( task::affinity_id ) {}
00089 task* continue_after_execute_range() {return NULL;}
00090 bool decide_whether_to_delay() {return false;}
00091 void spawn_or_delay( bool, task& b ) {
00092 task::spawn(b);
00093 }
00094 };
00095
00096 template<typename Range, typename Body, typename Partitioner> class start_scan;
00097
00098 }
00100
00101 namespace serial {
00102 namespace interface6 {
00103 template<typename Range, typename Body, typename Partitioner> class start_for;
00104 }
00105 }
00106
00107 namespace interface6 {
00109 namespace internal {
00110 using namespace tbb::internal;
00111 template<typename Range, typename Body, typename Partitioner> class start_for;
00112 template<typename Range, typename Body, typename Partitioner> class start_reduce;
00113
00115 class flag_task: public task {
00116 public:
00117 tbb::atomic<bool> my_child_stolen;
00118 flag_task() { my_child_stolen = false; }
00119 task* execute() { return NULL; }
00120 static void mark_task_stolen(task &t) {
00121 tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen;
00122 #if TBB_USE_THREADING_TOOLS
00123
00124 flag.fetch_and_store<release>(true);
00125 #else
00126 flag = true;
00127 #endif //TBB_USE_THREADING_TOOLS
00128 }
00129 static bool is_peer_stolen(task &t) {
00130 return static_cast<flag_task*>(t.parent())->my_child_stolen;
00131 }
00132 };
00133
00135 class signal_task: public task {
00136 public:
00137 task* execute() {
00138 if( is_stolen_task() ) {
00139 flag_task::mark_task_stolen(*this);
00140 }
00141 return NULL;
00142 }
00143 };
00144
00148 typedef unsigned char depth_t;
00149
00151 template <typename T, depth_t MaxCapacity>
00152 class range_vector {
00153 depth_t my_head;
00154 depth_t my_tail;
00155 depth_t my_size;
00156 depth_t my_depth[MaxCapacity];
00157 tbb::aligned_space<T, MaxCapacity> my_pool;
00158
00159 public:
00161 range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
00162 my_depth[0] = 0;
00163 new( my_pool.begin() ) T(elem);
00164 }
00165 ~range_vector() {
00166 while( !empty() ) pop_back();
00167 }
00168 bool empty() const { return my_size == 0; }
00169 depth_t size() const { return my_size; }
00172 void split_to_fill(depth_t max_depth) {
00173 while( my_size < MaxCapacity && my_depth[my_head] < max_depth
00174 && my_pool.begin()[my_head].is_divisible() ) {
00175 depth_t prev = my_head;
00176 my_head = (my_head + 1) % MaxCapacity;
00177 new(my_pool.begin()+my_head) T(my_pool.begin()[prev]);
00178 my_pool.begin()[prev].~T();
00179 new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split());
00180 my_depth[my_head] = ++my_depth[prev];
00181 my_size++;
00182 }
00183 }
00184 void pop_back() {
00185 __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
00186 my_pool.begin()[my_head].~T();
00187 my_size--;
00188 my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
00189 }
00190 void pop_front() {
00191 __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
00192 my_pool.begin()[my_tail].~T();
00193 my_size--;
00194 my_tail = (my_tail + 1) % MaxCapacity;
00195 }
00196 T& back() {
00197 __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
00198 return my_pool.begin()[my_head];
00199 }
00200 T& front() {
00201 __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
00202 return my_pool.begin()[my_tail];
00203 }
00205 depth_t front_depth() {
00206 __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
00207 return my_depth[my_tail];
00208 }
00209 };
00210
00212 template <typename Partition>
00213 struct partition_type_base {
00214
00215 void set_affinity( task & ) {}
00216 void note_affinity( task::affinity_id ) {}
00217 bool check_being_stolen(task &) { return false; }
00218 bool check_for_demand(task &) { return false; }
00219 bool divisions_left() { return true; }
00220 bool should_create_trap() { return false; }
00221 depth_t max_depth() { return 0; }
00222 void align_depth(depth_t) { }
00223
00224 Partition& derived() { return *static_cast<Partition*>(this); }
00225 template<typename StartType>
00226 flag_task* split_work(StartType &start) {
00227 flag_task* parent_ptr = start.create_continuation();
00228 start.set_parent(parent_ptr);
00229 parent_ptr->set_ref_count(2);
00230 StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, split());
00231 start.spawn(right_work);
00232 return parent_ptr;
00233 }
00234 template<typename StartType, typename Range>
00235 void execute(StartType &start, Range &range) {
00236
00237
00238
00239
00240
00241
00242
00243 task* parent_ptr = start.parent();
00244 if( range.is_divisible() ) {
00245 if( derived().divisions_left() )
00246 do parent_ptr = split_work(start);
00247 while( range.is_divisible() && derived().divisions_left() );
00248 if( derived().should_create_trap() ) {
00249 if( parent_ptr->ref_count() > 1 ) {
00250 parent_ptr = start.create_continuation();
00251 start.set_parent(parent_ptr);
00252 } else __TBB_ASSERT(parent_ptr->ref_count() == 1, NULL);
00253 parent_ptr->set_ref_count(2);
00254 signal_task& right_signal = *new( parent_ptr->allocate_child() ) signal_task();
00255 start.spawn(right_signal);
00256 }
00257 }
00258 if( !range.is_divisible() || !derived().max_depth() )
00259 start.run_body( range );
00260 else {
00261 internal::range_vector<Range, Partition::range_pool_size> range_pool(range);
00262 do {
00263 range_pool.split_to_fill(derived().max_depth());
00264 if( derived().check_for_demand( start ) ) {
00265 if( range_pool.size() > 1 ) {
00266 parent_ptr = start.create_continuation();
00267 start.set_parent(parent_ptr);
00268 parent_ptr->set_ref_count(2);
00269 StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, range_pool.front(), range_pool.front_depth());
00270 start.spawn(right_work);
00271 range_pool.pop_front();
00272 continue;
00273 }
00274 if( range_pool.back().is_divisible() )
00275 continue;
00276 }
00277 start.run_body( range_pool.back() );
00278 range_pool.pop_back();
00279 } while( !range_pool.empty() && !start.is_cancelled() );
00280 }
00281 }
00282 };
00283
00285 template <typename Partition>
00286 struct auto_partition_type_base : partition_type_base<Partition> {
00287 size_t my_divisor;
00288 depth_t my_max_depth;
00289 auto_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) {
00290 my_divisor = tbb::internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4;
00291 __TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partitioner_divisor() is not valid");
00292 }
00293 auto_partition_type_base(auto_partition_type_base &src, split) {
00294 my_max_depth = src.my_max_depth;
00295 #if __TBB_INITIAL_TASK_IMBALANCE
00296 if( src.my_divisor <= 1 ) my_divisor = 0;
00297 else my_divisor = src.my_divisor = (src.my_divisor+1u) / 2u;
00298 #else
00299 my_divisor = src.my_divisor / 2u;
00300 src.my_divisor = src.my_divisor - my_divisor;
00301 if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(src.my_divisor/my_divisor));
00302 #endif
00303 }
00304 bool check_being_stolen( task &t) {
00305 if( !my_divisor ) {
00306 my_divisor = 1;
00307 if( t.is_stolen_task() ) {
00308 #if TBB_USE_EXCEPTIONS
00309
00310 __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
00311
00312
00313
00314 #endif
00315 flag_task::mark_task_stolen(t);
00316 my_max_depth++;
00317 return true;
00318 }
00319 }
00320 return false;
00321 }
00322 bool divisions_left() {
00323 if( my_divisor > 1 ) return true;
00324 if( my_divisor && my_max_depth > 1 ) {
00325
00326 my_max_depth--;
00327 my_divisor = 0;
00328 return true;
00329 } else return false;
00330 }
00331 bool should_create_trap() {
00332 return my_divisor > 0;
00333 }
00334 bool check_for_demand(task &t) {
00335 if( flag_task::is_peer_stolen(t) ) {
00336 my_max_depth++;
00337 return true;
00338 } else return false;
00339 }
00340 void align_depth(depth_t base) {
00341 __TBB_ASSERT(base <= my_max_depth, 0);
00342 my_max_depth -= base;
00343 }
00344 depth_t max_depth() { return my_max_depth; }
00345 };
00346
00348 class affinity_partition_type : public auto_partition_type_base<affinity_partition_type> {
00349 static const unsigned factor_power = 4;
00350 static const unsigned factor = 1<<factor_power;
00351 bool my_delay;
00352 unsigned map_begin, map_end, map_mid;
00353 tbb::internal::affinity_id* my_array;
00354 void set_mid() {
00355 unsigned d = (map_end - map_begin)/2;
00356 if( d > factor )
00357 d &= 0u-factor;
00358 map_mid = map_end - d;
00359 }
00360 public:
00361 affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap ) {
00362 __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
00363 ap.resize(factor);
00364 my_array = ap.my_array;
00365 map_begin = 0;
00366 map_end = unsigned(ap.my_size);
00367 set_mid();
00368 my_delay = true;
00369 my_divisor /= __TBB_INITIAL_CHUNKS;
00370 my_max_depth = factor_power+1;
00371 __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
00372 }
00373 affinity_partition_type(affinity_partition_type& p, split)
00374 : auto_partition_type_base<affinity_partition_type>(p, split()), my_array(p.my_array) {
00375 __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begin)%factor==0, NULL );
00376 map_end = p.map_end;
00377 map_begin = p.map_end = p.map_mid;
00378 set_mid(); p.set_mid();
00379 my_delay = p.my_delay;
00380 }
00381 void set_affinity( task &t ) {
00382 if( map_begin<map_end )
00383 t.set_affinity( my_array[map_begin] );
00384 }
00385 void note_affinity( task::affinity_id id ) {
00386 if( map_begin<map_end )
00387 my_array[map_begin] = id;
00388 }
00389 bool check_for_demand( task &t ) {
00390 if( !my_delay ) {
00391 if( map_mid<map_end ) {
00392 __TBB_ASSERT(my_max_depth>__TBB_Log2(map_end-map_mid), 0);
00393 return true;
00394 }
00395 if( flag_task::is_peer_stolen(t) ) {
00396 my_max_depth++;
00397 return true;
00398 }
00399 } else my_delay = false;
00400 return false;
00401 }
00402 bool divisions_left() {
00403 return my_divisor > 1;
00404 }
00405 bool should_create_trap() {
00406 return true;
00407 }
00408 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00409 };
00410
00411 class auto_partition_type: public auto_partition_type_base<auto_partition_type> {
00412 public:
00413 auto_partition_type( const auto_partitioner& ) {}
00414 auto_partition_type( auto_partition_type& src, split)
00415 : auto_partition_type_base<auto_partition_type>(src, split()) {}
00416 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00417 };
00418
00419 class simple_partition_type: public partition_type_base<simple_partition_type> {
00420 public:
00421 simple_partition_type( const simple_partitioner& ) {}
00422 simple_partition_type( const simple_partition_type&, split ) {}
00424 template<typename StartType, typename Range>
00425 void execute(StartType &start, Range &range) {
00426 while( range.is_divisible() )
00427 split_work( start );
00428 start.run_body( range );
00429 }
00430
00431 };
00432
00434 class old_auto_partition_type: public tbb::internal::partition_type_base {
00435 size_t num_chunks;
00436 static const size_t VICTIM_CHUNKS = 4;
00437 public:
00438 bool should_execute_range(const task &t) {
00439 if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
00440 num_chunks = VICTIM_CHUNKS;
00441 return num_chunks==1;
00442 }
00443 old_auto_partition_type( const auto_partitioner& )
00444 : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00445 old_auto_partition_type( const affinity_partitioner& )
00446 : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00447 old_auto_partition_type( old_auto_partition_type& pt, split ) {
00448 num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
00449 }
00450 };
00451
00452 }
00454 }
00455
00457
00459 class simple_partitioner {
00460 public:
00461 simple_partitioner() {}
00462 private:
00463 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00464 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00465 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00466 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00467
00468 class partition_type: public internal::partition_type_base {
00469 public:
00470 bool should_execute_range(const task& ) {return false;}
00471 partition_type( const simple_partitioner& ) {}
00472 partition_type( const partition_type&, split ) {}
00473 };
00474
00475 typedef interface6::internal::simple_partition_type task_partition_type;
00476 };
00477
00479
00482 class auto_partitioner {
00483 public:
00484 auto_partitioner() {}
00485
00486 private:
00487 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00488 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00489 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00490 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00491
00492 typedef interface6::internal::old_auto_partition_type partition_type;
00493
00494 typedef interface6::internal::auto_partition_type task_partition_type;
00495 };
00496
00498 class affinity_partitioner: internal::affinity_partitioner_base_v3 {
00499 public:
00500 affinity_partitioner() {}
00501
00502 private:
00503 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00504 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00505 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00506 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00507
00508 typedef interface6::internal::old_auto_partition_type partition_type;
00509
00510 typedef interface6::internal::affinity_partition_type task_partition_type;
00511 };
00512
00513 }
00514
00515 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00516 #pragma warning (pop)
00517 #endif // warning 4244 is back
00518 #undef __TBB_INITIAL_CHUNKS
00519 #undef __TBB_RANGE_POOL_CAPACITY
00520 #undef __TBB_INIT_DEPTH
00521 #endif