42#ifndef TURBO_IPC_ABSTRACT_DOMAIN
45#ifdef NO_CONCURRENT_MANAGED_MEMORY
46 using ConcurrentAllocator = bt::pinned_allocator;
48 using ConcurrentAllocator = bt::managed_allocator;
52 bt::statistics_allocator<ConcurrentAllocator>,
53 bt::statistics_allocator<UniqueLightAlloc<ConcurrentAllocator, 0>>,
54 bt::statistics_allocator<UniqueLightAlloc<ConcurrentAllocator, 1>>>;
64 cuda::std::atomic_flag stop;
67 MemoryConfig mem_config;
69 UnifiedData(
const CP<Itv>& cp, MemoryConfig mem_config)
70 : root(GridCP::tag_gpu_block_copy{},
false, cp)
72 , mem_config(mem_config)
74 size_t num_subproblems = 1;
75 num_subproblems <<= root.config.subproblems_power;
76 root.stats.eps_num_subproblems = num_subproblems;
81using IStore = VStore<Itv, bt::pool_allocator>;
82using IProp = PIR<IStore, bt::pool_allocator>;
83using UB = ZUB<typename Itv::LB::value_type, bt::atomic_memory_grid>;
84using strategies_type = bt::vector<StrategyType<bt::global_allocator>, bt::global_allocator>;
89 abstract_ptr<VStore<Itv, bt::global_allocator>> root_store;
92 abstract_ptr<VStore<Itv, bt::global_allocator>> best_store;
97 abstract_ptr<IStore> store;
104 abstract_ptr<IProp> iprop;
110 size_t subproblem_idx;
121 int current_strategy;
126 int next_unassigned_var;
129 int snapshot_root_strategy;
132 int snapshot_next_unassigned_var;
135 bt::vector<LightBranch<Itv>, bt::global_allocator> decisions;
141 cuda::std::chrono::system_clock::time_point timer;
143 __device__ BlockData()
145 , current_strategy(0)
146 , next_unassigned_var(0)
151 __device__
void allocate(
const UnifiedData& unified_data,
const GridData& grid_data,
unsigned char* shared_mem) {
152 if(threadIdx.x == 0) {
153 subproblem_idx = blockIdx.x;
154 const MemoryConfig& mem_config = unified_data.mem_config;
155 const auto& u_store = *(unified_data.root.store);
156 const auto& u_iprop = *(unified_data.root.iprop);
157 bt::pool_allocator shared_mem_pool(mem_config.make_shared_pool(shared_mem));
158 bt::pool_allocator store_allocator(mem_config.make_store_pool(shared_mem_pool));
159 bt::pool_allocator prop_allocator(mem_config.make_prop_pool(shared_mem_pool));
160 root_store = bt::make_shared<VStore<Itv, bt::global_allocator>, bt::global_allocator>(u_store);
161 best_store = bt::make_shared<VStore<Itv, bt::global_allocator>, bt::global_allocator>(u_store);
162 store = bt::allocate_shared<IStore, bt::pool_allocator>(store_allocator, u_store, store_allocator);
163 iprop = bt::allocate_shared<IProp, bt::pool_allocator>(prop_allocator, u_iprop, store, prop_allocator);
168 __device__
void deallocate_shared_data() {
169 if(threadIdx.x == 0) {
171 store = abstract_ptr<IStore>();
172 iprop = abstract_ptr<IProp>();
181 __device__ INLINE
void split(
bool& has_changed,
const strategies_type& strategies) {
182 using LB2 =
typename Itv::LB::local_type;
183 using UB2 =
typename Itv::UB::local_type;
184 __shared__ local::ZUB idx;
185 decisions[depth].var = AVar{};
186 int currentDepth = depth;
187 for(
int i = current_strategy; i < strategies.size(); ++i) {
188 switch(strategies[i].var_order) {
189 case VariableOrder::INPUT_ORDER: {
190 input_order_split(has_changed, idx, strategies[i]);
193 case VariableOrder::FIRST_FAIL: {
194 lattice_smallest_split(has_changed, idx, strategies[i],
195 [](
const Itv& u) {
return UB2(u.ub().value() - u.lb().value()); });
198 case VariableOrder::ANTI_FIRST_FAIL: {
199 lattice_smallest_split(has_changed, idx, strategies[i],
200 [](
const Itv& u) {
return LB2(u.ub().value() - u.lb().value()); });
203 case VariableOrder::LARGEST: {
204 lattice_smallest_split(has_changed, idx, strategies[i],
205 [](
const Itv& u) {
return LB2(u.ub().value()); });
208 case VariableOrder::SMALLEST: {
209 lattice_smallest_split(has_changed, idx, strategies[i],
210 [](
const Itv& u) {
return UB2(u.lb().value()); });
213 default: assert(
false);
217 if(!decisions[currentDepth].var.is_untyped()) {
220 if(threadIdx.x == 0) {
222 next_unassigned_var = 0;
233 __device__ INLINE
void input_order_split(
bool& has_changed, local::ZUB& idx,
const StrategyType<bt::global_allocator>& strategy)
235 bool split_in_store = strategy.vars.empty();
236 int n = split_in_store ? store->vars() : strategy.vars.size();
238 if(threadIdx.x == 0) {
246 for(
int i = next_unassigned_var + threadIdx.x; i < n; i += blockDim.x) {
247 const auto& dom = (*store)[split_in_store ? i : strategy.vars[i].vid()];
248 if(dom.lb().value() != dom.ub().value() && !dom.lb().is_top() && !dom.ub().is_top()) {
249 if(idx.meet(local::ZUB(i))) {
256 if(threadIdx.x == 0) {
257 next_unassigned_var = idx.value();
259 if(idx.value() != n) {
260 push_decision(strategy.val_order, split_in_store ? AVar{store->aty(), idx.value()} : strategy.vars[idx.value()]);
269 __device__ INLINE
void lattice_smallest_split(
bool& has_changed, local::ZUB& idx,
270 const StrategyType<bt::global_allocator>& strategy, F f)
272 using T =
decltype(f(
Itv{}));
274 bool split_in_store = strategy.vars.empty();
275 int n = split_in_store ? store->vars() : strategy.vars.size();
277 if(threadIdx.x == 0) {
285 for(
int i = next_unassigned_var + threadIdx.x; i < n; i += blockDim.x) {
286 const auto& dom = (*store)[split_in_store ? i : strategy.vars[i].vid()];
287 if(dom.lb().value() != dom.ub().value() && !dom.lb().is_top() && !dom.ub().is_top()) {
288 if(value.meet(f(dom))) {
291 if(idx.meet(local::ZUB(i))) {
299 if(!value.is_top()) {
300 if(threadIdx.x == 0) {
301 next_unassigned_var = idx.value();
312 for(
int i = next_unassigned_var + threadIdx.x; i < n; i += blockDim.x) {
313 const auto& dom = (*store)[split_in_store ? i : strategy.vars[i].vid()];
314 if(dom.lb().value() != dom.ub().value() && !dom.lb().is_top() && !dom.ub().is_top() && f(dom) == value) {
315 if(idx.meet(local::ZUB(i))) {
322 assert(idx.value() < n);
323 push_decision(strategy.val_order, split_in_store ? AVar{store->aty(), idx.value()} : strategy.vars[idx.value()]);
330 __device__ INLINE
void push_decision(ValueOrder val_order, AVar var) {
331 using value_type =
typename Itv::LB::value_type;
332 if(threadIdx.x == 0) {
333 decisions[depth].var = var;
334 decisions[depth].current_idx = -1;
335 const auto& dom = store->project(decisions[depth].var);
336 assert(dom.lb().value() != dom.ub().value());
338 case ValueOrder::MIN: {
339 decisions[depth].children[0] =
Itv(dom.lb().value());
340 decisions[depth].children[1] =
Itv(dom.lb().value() + value_type{1}, dom.ub());
343 case ValueOrder::MAX: {
344 decisions[depth].children[0] =
Itv(dom.ub().value());
345 decisions[depth].children[1] =
Itv(dom.lb(), dom.ub().value() - value_type{1});
348 case ValueOrder::SPLIT: {
349 auto mid = dom.lb().value() + (dom.ub().value() - dom.lb().value()) / value_type{2};
350 decisions[depth].children[0] =
Itv(dom.lb(), mid);
351 decisions[depth].children[1] =
Itv(mid + value_type{1}, dom.ub());
354 case ValueOrder::REVERSE_SPLIT: {
355 auto mid = dom.lb().value() + (dom.ub().value() - dom.lb().value()) / value_type{2};
356 decisions[depth].children[0] =
Itv(mid + value_type{1}, dom.ub());
357 decisions[depth].children[1] =
Itv(dom.lb(), mid);
361 default: assert(
false);
367 decisions[depth].ropes[0] = depth + 1;
368 decisions[depth].ropes[1] = depth > 0 ? decisions[depth-1].ropes[decisions[depth-1].current_idx] : -1;
371 if(decisions.size() == depth) {
372 printf(
"resize to %d\n", (
int)decisions.size() * 2);
373 decisions.resize(decisions.size() * 2);
382 bt::vector<BlockData, bt::global_allocator> blocks;
389 ZLB<size_t, bt::atomic_memory_grid> next_subproblem;
402 cuda::binary_semaphore<cuda::thread_scope_device> print_lock;
405 strategies_type search_strategies;
413 __device__ GridData(
const GridCP& root)
414 : blocks(root.stats.num_blocks)
415 , next_subproblem(root.stats.num_blocks)
417 , search_strategies(root.split->strategies_())
418 , obj_var(root.minimize_obj_var)
422MemoryConfig configure_gpu_barebones(
CP<Itv>&);
423__global__
void initialize_global_data(UnifiedData*, bt::unique_ptr<GridData, bt::global_allocator>*);
424__global__
void gpu_barebones_solve(UnifiedData*, GridData*);
425template <
class FPEngine>
426__device__ INLINE
void propagate(UnifiedData& unified_data, GridData& grid_data, BlockData& block_data,
427 FPEngine& fp_engine,
bool& stop,
bool& has_changed,
bool& is_leaf_node);
428__global__
void reduce_blocks(UnifiedData*, GridData*);
429__global__
void deallocate_global_data(bt::unique_ptr<GridData, bt::global_allocator>*);
433 printf(
"%% WARNING: -arch barebones is incompatible with -i and -a (it cannot print intermediate solutions).");
435 auto start = std::chrono::steady_clock::now();
436 check_support_managed_memory();
437 check_support_concurrent_managed_memory();
441 if(cp.
iprop->is_bot()) {
446 MemoryConfig mem_config = configure_gpu_barebones(cp);
447 auto unified_data = bt::make_unique<UnifiedData, ConcurrentAllocator>(cp, mem_config);
448 auto grid_data = bt::make_unique<bt::unique_ptr<GridData, bt::global_allocator>, ConcurrentAllocator>();
449 initialize_global_data<<<1,1>>>(unified_data.get(), grid_data.get());
450 CUDAEX(cudaDeviceSynchronize());
456 CUDA_THREADS_PER_BLOCK,
457 mem_config.shared_bytes>>>
458 (unified_data.get(), grid_data->get());
459 bool interrupted = wait_solving_ends(unified_data->stop, unified_data->root, start);
460 CUDAEX(cudaDeviceSynchronize());
461 reduce_blocks<<<1,1>>>(unified_data.get(), grid_data->get());
462 CUDAEX(cudaDeviceSynchronize());
463 auto& uroot = unified_data->root;
464 if(uroot.stats.solutions > 0) {
467 uroot.stats.print_mzn_final_separator();
468 if(uroot.config.print_statistics) {
469 uroot.config.print_mzn_statistics();
470 uroot.stats.print_mzn_statistics(uroot.config.verbose_solving);
471 if(uroot.bab->is_optimization() && uroot.stats.solutions > 0) {
472 uroot.stats.print_mzn_objective(uroot.best->project(uroot.bab->objective_var()), uroot.bab->is_minimization());
474 unified_data->root.stats.print_mzn_end_stats();
476 deallocate_global_data<<<1,1>>>(grid_data.get());
477 CUDAEX(cudaDeviceSynchronize());
486MemoryConfig configure_gpu_barebones(
CP<Itv>& cp) {
491 cudaDeviceProp deviceProp;
492 cudaGetDeviceProperties(&deviceProp, 0);
493 int max_block_per_sm;
494 cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_block_per_sm, (
void*) gpu_barebones_solve, CUDA_THREADS_PER_BLOCK, 0);
496 printf(
"%% max_blocks_per_sm=%d\n", max_block_per_sm);
501 printf(
"%% WARNING: -or %d is too high on your GPU architecture, it has been reduced to %d.\n", (
int)cp.
config.
or_nodes, cp.
stats.
num_blocks);
505 cp.
stats.
num_blocks = max_block_per_sm * deviceProp.multiProcessorCount;
509 size_t store_bytes = gpu_sizeof<IStore>() + gpu_sizeof<abstract_ptr<IStore>>() + cp.
store->vars() * gpu_sizeof<Itv>();
510 size_t iprop_bytes = gpu_sizeof<IProp>() + gpu_sizeof<abstract_ptr<IProp>>() + cp.
iprop->num_deductions() * gpu_sizeof<bytecode_type>() + gpu_sizeof<typename IProp::bytecodes_type>();
512 if(cp.
iprop->num_deductions() + cp.
store->vars() > 1000000) {
514 printf(
"%% WARNING: Large problem detected, reducing to 1 block per SM.\n");
516 int blocks_per_sm = (cp.
stats.
num_blocks + deviceProp.multiProcessorCount - 1) / deviceProp.multiProcessorCount;
517 MemoryConfig mem_config;
519 mem_config = MemoryConfig(store_bytes, iprop_bytes);
522 mem_config = MemoryConfig((
void*) gpu_barebones_solve, config.
verbose_solving, blocks_per_sm, store_bytes, iprop_bytes);
524 mem_config.print_mzn_statistics(config, cp.
stats);
530 size_t estimated_global_mem = gpu_sizeof<UnifiedData>() + store_bytes *
size_t{5} + iprop_bytes +
531 gpu_sizeof<GridData>() +
532 nblocks * gpu_sizeof<BlockData>() +
533 nblocks * store_bytes *
size_t{3} +
534 nblocks * iprop_bytes *
size_t{2} +
535 nblocks * cp.
iprop->num_deductions() *
size_t{4} * gpu_sizeof<int>() +
536 nblocks * (gpu_sizeof<int>() + gpu_sizeof<LightBranch<Itv>>()) *
size_t{
MAX_SEARCH_DEPTH};
537 size_t required_global_mem = std::max(deviceProp.totalGlobalMem / 2, estimated_global_mem);
538 if(estimated_global_mem > deviceProp.totalGlobalMem / 2) {
539 printf(
"%% WARNING: The estimated global memory is larger than half of the total global memory.\n\
540 We reduce the number of blocks to avoid running out of memory.\n");
543 CUDAEX(cudaDeviceSetLimit(cudaLimitMallocHeapSize, required_global_mem));
546 printf(
"%% estimated_global_mem=%zu\n", estimated_global_mem);
549 if(deviceProp.totalGlobalMem < required_global_mem) {
550 printf(
"%% WARNING: The total global memory available is less than the required global memory.\n\
551 As our memory estimation is very conservative, it might still work, but it is not guaranteed.\n");
556 CUDAEX(cudaDeviceSetLimit(cudaLimitStackSize, config.
stack_kb*1000));
558 size_t total_stack_size = deviceProp.multiProcessorCount * deviceProp.maxThreadsPerMultiProcessor * config.
stack_kb * 1000;
564__global__
void initialize_global_data(
565 UnifiedData* unified_data,
566 bt::unique_ptr<GridData, bt::global_allocator>* grid_data_ptr)
568 *grid_data_ptr = bt::make_unique<GridData, bt::global_allocator>(unified_data->root);
571#define TIMEPOINT(KIND) \
572 if(threadIdx.x == 0) { \
573 block_data.timer = block_data.stats.stop_timer(Timer::KIND, block_data.timer); \
576__global__
void gpu_barebones_solve(UnifiedData* unified_data, GridData* grid_data) {
577 extern __shared__
unsigned char shared_mem[];
578 auto& config = unified_data->root.config;
579 BlockData& block_data = grid_data->blocks[blockIdx.x];
581 printf(
"%% GPU kernel started, starting solving...\n");
586 block_data.allocate(*unified_data, *grid_data, shared_mem);
588 IProp& iprop = *block_data.iprop;
589#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
590 __shared__ BlockAsynchronousFixpointGPU<true> fp_engine;
592 __shared__ FixpointSubsetGPU<BlockAsynchronousFixpointGPU<true>, bt::global_allocator, CUDA_THREADS_PER_BLOCK> fp_engine;
593 fp_engine.init(iprop.num_deductions());
597 __shared__
bool stop;
598 __shared__
bool has_changed;
599 __shared__
bool is_leaf_node;
600 __shared__
int remaining_depth;
602 auto group = cooperative_groups::this_thread_block();
603 if(threadIdx.x == 0) {
604 block_data.timer = block_data.stats.start_timer_device();
610 size_t num_subproblems = unified_data->root.stats.eps_num_subproblems;
611 while(block_data.subproblem_idx < num_subproblems && !stop) {
613 grid_data->print_lock.acquire();
614 printf(
"%% Block %d solves subproblem num %" PRIu64
"\n", blockIdx.x, block_data.subproblem_idx);
615 grid_data->print_lock.release();
620 block_data.current_strategy = 0;
621 block_data.next_unassigned_var = 0;
622 block_data.depth = 0;
623 unified_data->root.store->copy_to(group, *block_data.store);
624#ifndef TURBO_NO_ENTAILED_PROP_REMOVAL
625 fp_engine.reset(iprop.num_deductions());
632 is_leaf_node =
false;
633 while(remaining_depth > 0 && !is_leaf_node && !stop) {
634 propagate(*unified_data, *grid_data, block_data, fp_engine, stop, has_changed, is_leaf_node);
637 block_data.split(has_changed, grid_data->search_strategies);
641 if(block_data.decisions[0].var.is_untyped()) {
643 block_data.stats.exhaustive =
false;
645 else if(threadIdx.x == 0) {
654 size_t branch_idx = (block_data.subproblem_idx & (
size_t{1} << remaining_depth)) >> remaining_depth;
656 block_data.store->embed(block_data.decisions[0].var, block_data.decisions[0].children[branch_idx]);
665 if(is_leaf_node && !stop) {
678 if(threadIdx.x == 0) {
679 size_t next_subproblem_idx = ((block_data.subproblem_idx >> remaining_depth) +
size_t{1}) << remaining_depth;
681 while(grid_data->next_subproblem.meet(ZLB<size_t, bt::local_memory>(next_subproblem_idx))) {}
684 if((block_data.subproblem_idx & ((
size_t{1} << remaining_depth) -
size_t{1})) ==
size_t{0}) {
685 block_data.stats.eps_skipped_subproblems += next_subproblem_idx - block_data.subproblem_idx;
697 propagate(*unified_data, *grid_data, block_data, fp_engine, stop, has_changed, is_leaf_node);
704 if(block_data.depth == 0) {
705 block_data.store->copy_to(group, *block_data.root_store);
707 block_data.split(has_changed, grid_data->search_strategies);
711 if(block_data.decisions[block_data.depth - 1].var.is_untyped()) {
713 block_data.stats.exhaustive =
false;
715 else if(threadIdx.x == 0) {
717 if(block_data.depth == 1) {
718 block_data.snapshot_root_strategy = block_data.current_strategy;
719 block_data.snapshot_next_unassigned_var = block_data.next_unassigned_var;
722 block_data.store->embed(block_data.decisions[block_data.depth-1].var, block_data.decisions[block_data.depth-1].next());
735 if(block_data.depth == 0) {
738 if(threadIdx.x == 0) {
739 block_data.depth = block_data.decisions[block_data.depth-1].ropes[block_data.decisions[block_data.depth-1].current_idx];
743 if(block_data.depth == -1) {
747#ifndef TURBO_NO_ENTAILED_PROP_REMOVAL
748 fp_engine.reset(iprop.num_deductions());
750 block_data.root_store->copy_to(group, *block_data.store);
755 for(
int i = threadIdx.x; i < block_data.depth - 1; i += blockDim.x) {
756 if(block_data.store->embed(block_data.decisions[i].var, block_data.decisions[i].current())) {
762 if(threadIdx.x == 0) {
763 block_data.store->embed(block_data.decisions[block_data.depth - 1].var, block_data.decisions[block_data.depth - 1].next());
765 block_data.current_strategy = block_data.snapshot_root_strategy;
766 block_data.next_unassigned_var = block_data.snapshot_next_unassigned_var;
772 && !unified_data->stop.test())
774 block_data.stats.eps_solved_subproblems += 1;
782 if(threadIdx.x == 0 && !stop) {
784 block_data.subproblem_idx = grid_data->next_subproblem.atomic()++;
793 && !unified_data->stop.test())
795 block_data.stats.num_blocks_done = 1;
798#ifndef TURBO_NO_ENTAILED_PROP_REMOVAL
801 block_data.deallocate_shared_data();
805template <
class FPEngine>
806__device__ INLINE
void propagate(UnifiedData& unified_data, GridData& grid_data, BlockData& block_data,
807 FPEngine& fp_engine,
bool& stop,
bool& has_changed,
bool& is_leaf_node)
809 __shared__
int warp_iterations[CUDA_THREADS_PER_BLOCK/32];
810 warp_iterations[threadIdx.x / 32] = 0;
811 auto& config = unified_data.root.config;
812 IProp& iprop = *block_data.iprop;
813 auto group = cooperative_groups::this_thread_block();
817 if(threadIdx.x == 0 && !grid_data.obj_var.is_untyped()) {
821 if(!grid_data.appx_best_bound.is_top()) {
822 block_data.store->embed(grid_data.obj_var,
823 Itv(Itv::LB::top(), Itv::UB(grid_data.appx_best_bound.value() - 1)));
824 block_data.store->embed(grid_data.obj_var,
825 Itv(Itv::LB::top(), Itv::UB(block_data.best_bound.value() - 1)));
830 if(grid_data.appx_best_bound.is_bot()) {
835 is_leaf_node =
false;
839#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
840 int num_active = iprop.num_deductions();
842 int num_active = fp_engine.num_active();
846 fp_iterations = fp_engine.fixpoint(
847#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
848 iprop.num_deductions(),
850 [&](
int i){ return iprop.deduce(i); },
851 [&](){ return iprop.is_bot(); });
852 if(threadIdx.x == 0) {
853 block_data.stats.num_deductions += fp_iterations * num_active;
859 fp_iterations = fp_engine.fixpoint(
860#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
861 iprop.num_deductions(),
863 [&](
int i){ return iprop.deduce(i); },
864 [&](){ return iprop.is_bot(); });
865 if(threadIdx.x == 0) {
866 block_data.stats.num_deductions += fp_iterations * num_active;
870 fp_iterations = fp_engine.fixpoint(
871#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
872 iprop.num_deductions(),
874 [&](
int i){ return warp_fixpoint<CUDA_THREADS_PER_BLOCK>(iprop, i, warp_iterations); },
875 [&](){ return iprop.is_bot(); });
876 if(threadIdx.x == 0) {
877 for(
int i = 0; i < CUDA_THREADS_PER_BLOCK/32; ++i) {
878 block_data.stats.num_deductions += warp_iterations[i] * 32;
889 if(!iprop.is_bot()) {
890#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
892 for(
int i = threadIdx.x; !has_changed && i < iprop.num_deductions(); i += blockDim.x) {
898 num_active = has_changed;
900 fp_engine.select([&](
int i) {
return !iprop.ask(i); });
901 num_active = fp_engine.num_active();
904 if(num_active == 0) {
906 if(threadIdx.x == 0) {
907 block_data.best_bound.meet(Itv::UB(block_data.store->project(grid_data.obj_var).lb().value()));
908 grid_data.appx_best_bound.meet(block_data.best_bound);
910 block_data.store->copy_to(group, *block_data.best_store);
911 if(threadIdx.x == 0) {
912 block_data.stats.solutions++;
914 grid_data.print_lock.acquire();
915 printf(
"%% objective="); block_data.best_bound.print(); printf(
"\n");
916 grid_data.print_lock.release();
925 if(threadIdx.x == 0) {
926 block_data.stats.fixpoint_iterations += fp_iterations;
927 block_data.stats.nodes++;
928 block_data.stats.fails += (iprop.is_bot() ? 1 : 0);
929 block_data.stats.depth_max = battery::max(block_data.stats.depth_max, block_data.depth);
934 || unified_data.stop.test())
936 block_data.stats.exhaustive =
false;
942__global__
void reduce_blocks(UnifiedData* unified_data, GridData* grid_data) {
943 auto& root = unified_data->root;
944 for(
int i = 0; i < grid_data->blocks.size(); ++i) {
945 root.stats.meet(grid_data->blocks[i].stats);
947 for(
int i = 0; i < grid_data->blocks.size(); ++i) {
948 auto& block = grid_data->blocks[i];
949 if(block.stats.solutions > 0) {
950 if(root.bab->is_satisfaction()) {
951 block.best_store->extract(*root.best);
955 grid_data->appx_best_bound.meet(block.best_bound);
960 if(!grid_data->appx_best_bound.is_top()) {
961 for(
int i = 0; i < grid_data->blocks.size(); ++i) {
962 auto& block = grid_data->blocks[i];
963 if(block.best_bound == grid_data->appx_best_bound
964 && block.best_store->project(grid_data->obj_var).lb() == block.best_bound)
966 block.best_store->copy_to(*root.best);
973__global__
void deallocate_global_data(bt::unique_ptr<GridData, bt::global_allocator>* grid_data) {
980#if defined(TURBO_IPC_ABSTRACT_DOMAIN) || !defined(__CUDACC__)
983#ifdef TURBO_IPC_ABSTRACT_DOMAIN
984 std::cerr <<
"-arch barebones does not support IPC abstract domain." << std::endl;
986 std::cerr <<
"You must use a CUDA compiler (nvcc or clang) to compile Turbo on GPU." << std::endl;