42#ifndef TURBO_IPC_ABSTRACT_DOMAIN 
   45#ifdef NO_CONCURRENT_MANAGED_MEMORY 
   46  using ConcurrentAllocator = bt::pinned_allocator;
 
   48  using ConcurrentAllocator = bt::managed_allocator;
 
   52  bt::statistics_allocator<ConcurrentAllocator>,
 
   53  bt::statistics_allocator<UniqueLightAlloc<ConcurrentAllocator, 0>>,
 
   54  bt::statistics_allocator<UniqueLightAlloc<ConcurrentAllocator, 1>>>;
 
   64  cuda::std::atomic_flag stop;
 
   67  MemoryConfig mem_config;
 
   69  UnifiedData(
const CP<Itv>& cp, MemoryConfig mem_config)
 
   70   : root(GridCP::tag_gpu_block_copy{}, 
false, cp)
 
   72   , mem_config(mem_config)
 
   74    size_t num_subproblems = 1;
 
   75    num_subproblems <<= root.config.subproblems_power;
 
   76    root.stats.eps_num_subproblems = num_subproblems;
 
   81using IStore = VStore<Itv, bt::pool_allocator>;
 
   82using IProp = PIR<IStore, bt::pool_allocator>;
 
   83using UB = ZUB<typename Itv::LB::value_type, bt::atomic_memory_grid>;
 
   84using strategies_type = bt::vector<StrategyType<bt::global_allocator>, bt::global_allocator>;
 
   89  abstract_ptr<VStore<Itv, bt::global_allocator>> root_store;
 
   92  abstract_ptr<VStore<Itv, bt::global_allocator>> best_store;
 
   97  abstract_ptr<IStore> store;
 
  104  abstract_ptr<IProp> iprop;
 
  110  size_t subproblem_idx;
 
  121  int current_strategy;
 
  126  int next_unassigned_var;
 
  129  int snapshot_root_strategy;
 
  132  int snapshot_next_unassigned_var;
 
  135  bt::vector<LightBranch<Itv>, bt::global_allocator> decisions;
 
  141  cuda::std::chrono::system_clock::time_point timer;
 
  143  __device__ BlockData()
 
  145   , current_strategy(0)
 
  146   , next_unassigned_var(0)
 
  151  __device__ 
void allocate(
const UnifiedData& unified_data, 
const GridData& grid_data, 
unsigned char* shared_mem) {
 
  152    if(threadIdx.x == 0) {
 
  153      subproblem_idx = blockIdx.x;
 
  154      const MemoryConfig& mem_config = unified_data.mem_config;
 
  155      const auto& u_store = *(unified_data.root.store);
 
  156      const auto& u_iprop = *(unified_data.root.iprop);
 
  157      bt::pool_allocator shared_mem_pool(mem_config.make_shared_pool(shared_mem));
 
  158      bt::pool_allocator store_allocator(mem_config.make_store_pool(shared_mem_pool));
 
  159      bt::pool_allocator prop_allocator(mem_config.make_prop_pool(shared_mem_pool));
 
  160      root_store = bt::make_shared<VStore<Itv, bt::global_allocator>, bt::global_allocator>(u_store);
 
  161      best_store = bt::make_shared<VStore<Itv, bt::global_allocator>, bt::global_allocator>(u_store);
 
  162      store = bt::allocate_shared<IStore, bt::pool_allocator>(store_allocator, u_store, store_allocator);
 
  163      iprop = bt::allocate_shared<IProp, bt::pool_allocator>(prop_allocator, u_iprop, store, prop_allocator);
 
  168  __device__ 
void deallocate_shared_data() {
 
  169    if(threadIdx.x == 0) {
 
  171      store = abstract_ptr<IStore>();
 
  172      iprop = abstract_ptr<IProp>();
 
  181  __device__ INLINE 
void split(
bool& has_changed, 
const strategies_type& strategies) {
 
  182    using LB2 = 
typename Itv::LB::local_type;
 
  183    using UB2 = 
typename Itv::UB::local_type;
 
  184    __shared__ local::ZUB idx;
 
  185    decisions[depth].var = AVar{};
 
  186    int currentDepth = depth;
 
  187    for(
int i = current_strategy; i < strategies.size(); ++i) {
 
  188      switch(strategies[i].var_order) {
 
  189        case VariableOrder::INPUT_ORDER: {
 
  190          input_order_split(has_changed, idx, strategies[i]);
 
  193        case VariableOrder::FIRST_FAIL: {
 
  194          lattice_smallest_split(has_changed, idx, strategies[i],
 
  195            [](
const Itv& u) { 
return UB2(u.ub().value() - u.lb().value()); });
 
  198        case VariableOrder::ANTI_FIRST_FAIL: {
 
  199          lattice_smallest_split(has_changed, idx, strategies[i],
 
  200            [](
const Itv& u) { 
return LB2(u.ub().value() - u.lb().value()); });
 
  203        case VariableOrder::LARGEST: {
 
  204          lattice_smallest_split(has_changed, idx, strategies[i],
 
  205            [](
const Itv& u) { 
return LB2(u.ub().value()); });
 
  208        case VariableOrder::SMALLEST: {
 
  209          lattice_smallest_split(has_changed, idx, strategies[i],
 
  210            [](
const Itv& u) { 
return UB2(u.lb().value()); });
 
  213        default: assert(
false);
 
  217      if(!decisions[currentDepth].var.is_untyped()) {
 
  220      if(threadIdx.x == 0) {
 
  222        next_unassigned_var = 0;
 
  233  __device__ INLINE 
void input_order_split(
bool& has_changed, local::ZUB& idx, 
const StrategyType<bt::global_allocator>& strategy)
 
  235    bool split_in_store = strategy.vars.empty();
 
  236    int n = split_in_store ? store->vars() : strategy.vars.size();
 
  238    if(threadIdx.x == 0) {
 
  246      for(
int i = next_unassigned_var + threadIdx.x; i < n; i += blockDim.x) {
 
  247        const auto& dom = (*store)[split_in_store ? i : strategy.vars[i].vid()];
 
  248        if(dom.lb().value() != dom.ub().value() && !dom.lb().is_top() && !dom.ub().is_top()) {
 
  249          if(idx.meet(local::ZUB(i))) {
 
  256    if(threadIdx.x == 0) {
 
  257      next_unassigned_var = idx.value();
 
  259    if(idx.value() != n) {
 
  260      push_decision(strategy.val_order, split_in_store ? AVar{store->aty(), idx.value()} : strategy.vars[idx.value()]);
 
  269  __device__ INLINE 
void lattice_smallest_split(
bool& has_changed, local::ZUB& idx,
 
  270    const StrategyType<bt::global_allocator>& strategy, F f)
 
  272    using T = 
decltype(f(
Itv{}));
 
  274    bool split_in_store = strategy.vars.empty();
 
  275    int n = split_in_store ? store->vars() : strategy.vars.size();
 
  277    if(threadIdx.x == 0) {
 
  285      for(
int i = next_unassigned_var + threadIdx.x; i < n; i += blockDim.x) {
 
  286        const auto& dom = (*store)[split_in_store ? i : strategy.vars[i].vid()];
 
  287        if(dom.lb().value() != dom.ub().value() && !dom.lb().is_top() && !dom.ub().is_top()) {
 
  288          if(value.meet(f(dom))) {
 
  291          if(idx.meet(local::ZUB(i))) {
 
  299    if(!value.is_top()) {
 
  300      if(threadIdx.x == 0) {
 
  301        next_unassigned_var = idx.value();
 
  312        for(
int i = next_unassigned_var + threadIdx.x; i < n; i += blockDim.x) {
 
  313          const auto& dom = (*store)[split_in_store ? i : strategy.vars[i].vid()];
 
  314          if(dom.lb().value() != dom.ub().value() && !dom.lb().is_top() && !dom.ub().is_top() && f(dom) == value) {
 
  315            if(idx.meet(local::ZUB(i))) {
 
  322      assert(idx.value() < n);
 
  323      push_decision(strategy.val_order, split_in_store ? AVar{store->aty(), idx.value()} : strategy.vars[idx.value()]);
 
  330  __device__ INLINE 
void push_decision(ValueOrder val_order, AVar var) {
 
  331    using value_type = 
typename Itv::LB::value_type;
 
  332    if(threadIdx.x == 0) {
 
  333      decisions[depth].var = var;
 
  334      decisions[depth].current_idx = -1;
 
  335      const auto& dom = store->project(decisions[depth].var);
 
  336      assert(dom.lb().value() != dom.ub().value());
 
  338        case ValueOrder::MIN: {
 
  339          decisions[depth].children[0] = 
Itv(dom.lb().value());
 
  340          decisions[depth].children[1] = 
Itv(dom.lb().value() + value_type{1}, dom.ub());
 
  343        case ValueOrder::MAX: {
 
  344          decisions[depth].children[0] = 
Itv(dom.ub().value());
 
  345          decisions[depth].children[1] = 
Itv(dom.lb(), dom.ub().value() - value_type{1});
 
  348        case ValueOrder::SPLIT: {
 
  349          auto mid = dom.lb().value() +  (dom.ub().value() - dom.lb().value()) / value_type{2};
 
  350          decisions[depth].children[0] = 
Itv(dom.lb(), mid);
 
  351          decisions[depth].children[1] = 
Itv(mid + value_type{1}, dom.ub());
 
  354        case ValueOrder::REVERSE_SPLIT: {
 
  355          auto mid = dom.lb().value() +  (dom.ub().value() - dom.lb().value()) / value_type{2};
 
  356          decisions[depth].children[0] = 
Itv(mid + value_type{1}, dom.ub());
 
  357          decisions[depth].children[1] = 
Itv(dom.lb(), mid);
 
  361        default: assert(
false);
 
  367      decisions[depth].ropes[0] = depth + 1;
 
  368      decisions[depth].ropes[1] = depth > 0 ? decisions[depth-1].ropes[decisions[depth-1].current_idx] : -1;
 
  371      if(decisions.size() == depth) {
 
  372        printf(
"resize to %d\n", (
int)decisions.size() * 2);
 
  373        decisions.resize(decisions.size() * 2);
 
  382  bt::vector<BlockData, bt::global_allocator> blocks;
 
  389  ZLB<size_t, bt::atomic_memory_grid> next_subproblem;
 
  402  cuda::binary_semaphore<cuda::thread_scope_device> print_lock;
 
  405  strategies_type search_strategies;
 
  413  __device__ GridData(
const GridCP& root)
 
  414   : blocks(root.stats.num_blocks)
 
  415   , next_subproblem(root.stats.num_blocks)
 
  417   , search_strategies(root.split->strategies_())
 
  418   , obj_var(root.minimize_obj_var)
 
  422MemoryConfig configure_gpu_barebones(
CP<Itv>&);
 
  423__global__ 
void initialize_global_data(UnifiedData*, bt::unique_ptr<GridData, bt::global_allocator>*);
 
  424__global__ 
void gpu_barebones_solve(UnifiedData*, GridData*);
 
  425template <
class FPEngine>
 
  426__device__ INLINE 
void propagate(UnifiedData& unified_data, GridData& grid_data, BlockData& block_data,
 
  427   FPEngine& fp_engine, 
bool& stop, 
bool& has_changed, 
bool& is_leaf_node);
 
  428__global__ 
void reduce_blocks(UnifiedData*, GridData*);
 
  429__global__ 
void deallocate_global_data(bt::unique_ptr<GridData, bt::global_allocator>*);
 
  433    printf(
"%% WARNING: -arch barebones is incompatible with -i and -a (it cannot print intermediate solutions).");
 
  435  auto start = std::chrono::steady_clock::now();
 
  436  check_support_managed_memory();
 
  437  check_support_concurrent_managed_memory();
 
  441  if(cp.
iprop->is_bot()) {
 
  446  MemoryConfig mem_config = configure_gpu_barebones(cp);
 
  447  auto unified_data = bt::make_unique<UnifiedData, ConcurrentAllocator>(cp, mem_config);
 
  448  auto grid_data = bt::make_unique<bt::unique_ptr<GridData, bt::global_allocator>, ConcurrentAllocator>();
 
  449  initialize_global_data<<<1,1>>>(unified_data.get(), grid_data.get());
 
  450  CUDAEX(cudaDeviceSynchronize());
 
  456      CUDA_THREADS_PER_BLOCK,
 
  457      mem_config.shared_bytes>>>
 
  458    (unified_data.get(), grid_data->get());
 
  459  bool interrupted = wait_solving_ends(unified_data->stop, unified_data->root, start);
 
  460  CUDAEX(cudaDeviceSynchronize());
 
  461  reduce_blocks<<<1,1>>>(unified_data.get(), grid_data->get());
 
  462  CUDAEX(cudaDeviceSynchronize());
 
  463  auto& uroot = unified_data->root;
 
  464  if(uroot.stats.solutions > 0) {
 
  467  uroot.stats.print_mzn_final_separator();
 
  468  if(uroot.config.print_statistics) {
 
  469    uroot.config.print_mzn_statistics();
 
  470    uroot.stats.print_mzn_statistics(uroot.config.verbose_solving);
 
  471    if(uroot.bab->is_optimization() && uroot.stats.solutions > 0) {
 
  472      uroot.stats.print_mzn_objective(uroot.best->project(uroot.bab->objective_var()), uroot.bab->is_minimization());
 
  474    unified_data->root.stats.print_mzn_end_stats();
 
  476  deallocate_global_data<<<1,1>>>(grid_data.get());
 
  477  CUDAEX(cudaDeviceSynchronize());
 
  486MemoryConfig configure_gpu_barebones(
CP<Itv>& cp) {
 
  491  cudaDeviceProp deviceProp;
 
  492  cudaGetDeviceProperties(&deviceProp, 0);
 
  493  int max_block_per_sm;
 
  494  cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_block_per_sm, (
void*) gpu_barebones_solve, CUDA_THREADS_PER_BLOCK, 0);
 
  496    printf(
"%% max_blocks_per_sm=%d\n", max_block_per_sm);
 
  501      printf(
"%% WARNING: -or %d is too high on your GPU architecture, it has been reduced to %d.\n", (
int)cp.
config.
or_nodes, cp.
stats.
num_blocks);
 
  505    cp.
stats.
num_blocks = max_block_per_sm * deviceProp.multiProcessorCount;
 
  509  size_t store_bytes = gpu_sizeof<IStore>() + gpu_sizeof<abstract_ptr<IStore>>() + cp.
store->vars() * gpu_sizeof<Itv>();
 
  510  size_t iprop_bytes = gpu_sizeof<IProp>() + gpu_sizeof<abstract_ptr<IProp>>() + cp.
iprop->num_deductions() * gpu_sizeof<bytecode_type>() + gpu_sizeof<typename IProp::bytecodes_type>();
 
  512  if(cp.
iprop->num_deductions() + cp.
store->vars() > 1000000) {
 
  514    printf(
"%% WARNING: Large problem detected, reducing to 1 block per SM.\n");
 
  516  int blocks_per_sm = (cp.
stats.
num_blocks + deviceProp.multiProcessorCount - 1) / deviceProp.multiProcessorCount;
 
  517  MemoryConfig mem_config;
 
  519    mem_config = MemoryConfig(store_bytes, iprop_bytes);
 
  522    mem_config = MemoryConfig((
void*) gpu_barebones_solve, config.
verbose_solving, blocks_per_sm, store_bytes, iprop_bytes);
 
  524  mem_config.print_mzn_statistics(config, cp.
stats);
 
  530  size_t estimated_global_mem = gpu_sizeof<UnifiedData>() + store_bytes * 
size_t{5} + iprop_bytes +
 
  531    gpu_sizeof<GridData>() +
 
  532    nblocks * gpu_sizeof<BlockData>() +
 
  533    nblocks * store_bytes * 
size_t{3} + 
 
  534    nblocks * iprop_bytes * 
size_t{2} +
 
  535    nblocks * cp.
iprop->num_deductions() * 
size_t{4} * gpu_sizeof<int>()  + 
 
  536    nblocks * (gpu_sizeof<int>() + gpu_sizeof<LightBranch<Itv>>()) * 
size_t{
MAX_SEARCH_DEPTH};
 
  537  size_t required_global_mem = std::max(deviceProp.totalGlobalMem / 2, estimated_global_mem);
 
  538  if(estimated_global_mem > deviceProp.totalGlobalMem / 2) {
 
  539    printf(
"%% WARNING: The estimated global memory is larger than half of the total global memory.\n\ 
  540    We reduce the number of blocks to avoid running out of memory.\n");
 
  543  CUDAEX(cudaDeviceSetLimit(cudaLimitMallocHeapSize, required_global_mem));
 
  546    printf(
"%% estimated_global_mem=%zu\n", estimated_global_mem);
 
  549  if(deviceProp.totalGlobalMem < required_global_mem) {
 
  550    printf(
"%% WARNING: The total global memory available is less than the required global memory.\n\ 
  551    As our memory estimation is very conservative, it might still work, but it is not guaranteed.\n");
 
  556    CUDAEX(cudaDeviceSetLimit(cudaLimitStackSize, config.
stack_kb*1000));
 
  558    size_t total_stack_size = deviceProp.multiProcessorCount * deviceProp.maxThreadsPerMultiProcessor * config.
stack_kb * 1000;
 
  564__global__ 
void initialize_global_data(
 
  565  UnifiedData* unified_data,
 
  566  bt::unique_ptr<GridData, bt::global_allocator>* grid_data_ptr)
 
  568  *grid_data_ptr = bt::make_unique<GridData, bt::global_allocator>(unified_data->root);
 
  571#define TIMEPOINT(KIND) \ 
  572  if(threadIdx.x == 0) { \ 
  573    block_data.timer = block_data.stats.stop_timer(Timer::KIND, block_data.timer); \ 
  576__global__ 
void gpu_barebones_solve(UnifiedData* unified_data, GridData* grid_data) {
 
  577  extern __shared__ 
unsigned char shared_mem[];
 
  578  auto& config = unified_data->root.config;
 
  579  BlockData& block_data = grid_data->blocks[blockIdx.x];
 
  581    printf(
"%% GPU kernel started, starting solving...\n");
 
  586  block_data.allocate(*unified_data, *grid_data, shared_mem);
 
  588  IProp& iprop = *block_data.iprop;
 
  589#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL 
  590  __shared__ BlockAsynchronousFixpointGPU<true> fp_engine;
 
  592  __shared__ FixpointSubsetGPU<BlockAsynchronousFixpointGPU<true>, bt::global_allocator, CUDA_THREADS_PER_BLOCK> fp_engine;
 
  593  fp_engine.init(iprop.num_deductions());
 
  597  __shared__ 
bool stop;
 
  598  __shared__ 
bool has_changed;
 
  599  __shared__ 
bool is_leaf_node;
 
  600  __shared__ 
int remaining_depth;
 
  602  auto group = cooperative_groups::this_thread_block();
 
  603  if(threadIdx.x == 0) {
 
  604    block_data.timer = block_data.stats.start_timer_device();
 
  610  size_t num_subproblems = unified_data->root.stats.eps_num_subproblems;
 
  611  while(block_data.subproblem_idx < num_subproblems && !stop) {
 
  613      grid_data->print_lock.acquire();
 
  614      printf(
"%% Block %d solves subproblem num %" PRIu64 
"\n", blockIdx.x, block_data.subproblem_idx);
 
  615      grid_data->print_lock.release();
 
  620    block_data.current_strategy = 0;
 
  621    block_data.next_unassigned_var = 0;
 
  622    block_data.depth = 0;
 
  623    unified_data->root.store->copy_to(group, *block_data.store);
 
  624#ifndef TURBO_NO_ENTAILED_PROP_REMOVAL 
  625    fp_engine.reset(iprop.num_deductions());
 
  632    is_leaf_node = 
false;
 
  633    while(remaining_depth > 0 && !is_leaf_node && !stop) {
 
  634      propagate(*unified_data, *grid_data, block_data, fp_engine, stop, has_changed, is_leaf_node);
 
  637        block_data.split(has_changed, grid_data->search_strategies);
 
  641        if(block_data.decisions[0].var.is_untyped()) {
 
  643          block_data.stats.exhaustive = 
false;
 
  645        else if(threadIdx.x == 0) {
 
  654          size_t branch_idx = (block_data.subproblem_idx & (
size_t{1} << remaining_depth)) >> remaining_depth;
 
  656          block_data.store->embed(block_data.decisions[0].var, block_data.decisions[0].children[branch_idx]);
 
  665    if(is_leaf_node && !stop) {
 
  678      if(threadIdx.x == 0) {
 
  679        size_t next_subproblem_idx = ((block_data.subproblem_idx >> remaining_depth) + 
size_t{1}) << remaining_depth;
 
  681        while(grid_data->next_subproblem.meet(ZLB<size_t, bt::local_memory>(next_subproblem_idx))) {}
 
  684        if((block_data.subproblem_idx & ((
size_t{1} << remaining_depth) - 
size_t{1})) == 
size_t{0}) {
 
  685          block_data.stats.eps_skipped_subproblems += next_subproblem_idx - block_data.subproblem_idx;
 
  697        propagate(*unified_data, *grid_data, block_data, fp_engine, stop, has_changed, is_leaf_node);
 
  704          if(block_data.depth == 0) {
 
  705            block_data.store->copy_to(group, *block_data.root_store);
 
  707          block_data.split(has_changed, grid_data->search_strategies);
 
  711          if(block_data.decisions[block_data.depth - 1].var.is_untyped()) {
 
  713            block_data.stats.exhaustive = 
false;
 
  715          else if(threadIdx.x == 0) {
 
  717            if(block_data.depth == 1) {
 
  718              block_data.snapshot_root_strategy = block_data.current_strategy;
 
  719              block_data.snapshot_next_unassigned_var = block_data.next_unassigned_var;
 
  722            block_data.store->embed(block_data.decisions[block_data.depth-1].var, block_data.decisions[block_data.depth-1].next());
 
  735          if(block_data.depth == 0) {
 
  738          if(threadIdx.x == 0) {
 
  739            block_data.depth = block_data.decisions[block_data.depth-1].ropes[block_data.decisions[block_data.depth-1].current_idx];
 
  743          if(block_data.depth == -1) {
 
  747#ifndef TURBO_NO_ENTAILED_PROP_REMOVAL 
  748          fp_engine.reset(iprop.num_deductions());
 
  750          block_data.root_store->copy_to(group, *block_data.store);
 
  755            for(
int i = threadIdx.x; i < block_data.depth - 1; i += blockDim.x) {
 
  756              if(block_data.store->embed(block_data.decisions[i].var, block_data.decisions[i].current())) {
 
  762          if(threadIdx.x == 0) {
 
  763            block_data.store->embed(block_data.decisions[block_data.depth - 1].var, block_data.decisions[block_data.depth - 1].next());
 
  765            block_data.current_strategy = block_data.snapshot_root_strategy;
 
  766            block_data.next_unassigned_var = block_data.snapshot_next_unassigned_var;
 
  772        && !unified_data->stop.test())
 
  774        block_data.stats.eps_solved_subproblems += 1;
 
  782    if(threadIdx.x == 0 && !stop) {
 
  784      block_data.subproblem_idx = grid_data->next_subproblem.atomic()++;
 
  793      && !unified_data->stop.test())
 
  795    block_data.stats.num_blocks_done = 1;
 
  798#ifndef TURBO_NO_ENTAILED_PROP_REMOVAL 
  801  block_data.deallocate_shared_data();
 
  805template <
class FPEngine>
 
  806__device__ INLINE 
void propagate(UnifiedData& unified_data, GridData& grid_data, BlockData& block_data,
 
  807   FPEngine& fp_engine, 
bool& stop, 
bool& has_changed, 
bool& is_leaf_node)
 
  809  __shared__ 
int warp_iterations[CUDA_THREADS_PER_BLOCK/32];
 
  810  warp_iterations[threadIdx.x / 32] = 0;
 
  811  auto& config = unified_data.root.config;
 
  812  IProp& iprop = *block_data.iprop;
 
  813  auto group = cooperative_groups::this_thread_block();
 
  817  if(threadIdx.x == 0 && !grid_data.obj_var.is_untyped()) {
 
  821    if(!grid_data.appx_best_bound.is_top()) {
 
  822      block_data.store->embed(grid_data.obj_var,
 
  823        Itv(Itv::LB::top(), Itv::UB(grid_data.appx_best_bound.value() - 1)));
 
  824      block_data.store->embed(grid_data.obj_var,
 
  825        Itv(Itv::LB::top(), Itv::UB(block_data.best_bound.value() - 1)));
 
  830  if(grid_data.appx_best_bound.is_bot()) {
 
  835  is_leaf_node = 
false;
 
  839#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL 
  840  int num_active = iprop.num_deductions();
 
  842  int num_active = fp_engine.num_active();
 
  846      fp_iterations = fp_engine.fixpoint(
 
  847#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
 
  848        iprop.num_deductions(),
 
  850        [&](
int i){ return iprop.deduce(i); },
 
  851        [&](){ return iprop.is_bot(); });
 
  852      if(threadIdx.x == 0) {
 
  853        block_data.stats.num_deductions += fp_iterations * num_active;
 
  859        fp_iterations = fp_engine.fixpoint(
 
  860#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
 
  861        iprop.num_deductions(),
 
  863          [&](
int i){ return iprop.deduce(i); },
 
  864          [&](){ return iprop.is_bot(); });
 
  865        if(threadIdx.x == 0) {
 
  866          block_data.stats.num_deductions += fp_iterations * num_active;
 
  870        fp_iterations = fp_engine.fixpoint(
 
  871#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL
 
  872          iprop.num_deductions(),
 
  874          [&](
int i){ return warp_fixpoint<CUDA_THREADS_PER_BLOCK>(iprop, i, warp_iterations); },
 
  875          [&](){ return iprop.is_bot(); });
 
  876        if(threadIdx.x == 0) {
 
  877          for(
int i = 0; i < CUDA_THREADS_PER_BLOCK/32; ++i) {
 
  878            block_data.stats.num_deductions += warp_iterations[i] * 32;
 
  889  if(!iprop.is_bot()) {
 
  890#ifdef TURBO_NO_ENTAILED_PROP_REMOVAL 
  892    for(
int i = threadIdx.x; !has_changed && i < iprop.num_deductions(); i += blockDim.x) {
 
  898    num_active = has_changed;
 
  900    fp_engine.select([&](
int i) { 
return !iprop.ask(i); });
 
  901    num_active = fp_engine.num_active();
 
  904    if(num_active == 0) {
 
  906      if(threadIdx.x == 0) {
 
  907        block_data.best_bound.meet(Itv::UB(block_data.store->project(grid_data.obj_var).lb().value()));
 
  908        grid_data.appx_best_bound.meet(block_data.best_bound);
 
  910      block_data.store->copy_to(group, *block_data.best_store);
 
  911      if(threadIdx.x == 0) {
 
  912        block_data.stats.solutions++;
 
  914          grid_data.print_lock.acquire();
 
  915          printf(
"%% objective="); block_data.best_bound.print(); printf(
"\n");
 
  916          grid_data.print_lock.release();
 
  925  if(threadIdx.x == 0) {
 
  926    block_data.stats.fixpoint_iterations += fp_iterations;
 
  927    block_data.stats.nodes++;
 
  928    block_data.stats.fails += (iprop.is_bot() ? 1 : 0);
 
  929    block_data.stats.depth_max = battery::max(block_data.stats.depth_max, block_data.depth);
 
  934      || unified_data.stop.test())
 
  936      block_data.stats.exhaustive = 
false;
 
  942__global__ 
void reduce_blocks(UnifiedData* unified_data, GridData* grid_data) {
 
  943  auto& root = unified_data->root;
 
  944  for(
int i = 0; i < grid_data->blocks.size(); ++i) {
 
  945    root.stats.meet(grid_data->blocks[i].stats);
 
  947  for(
int i = 0; i < grid_data->blocks.size(); ++i) {
 
  948    auto& block = grid_data->blocks[i];
 
  949    if(block.stats.solutions > 0) {
 
  950      if(root.bab->is_satisfaction()) {
 
  951        block.best_store->extract(*root.best);
 
  955        grid_data->appx_best_bound.meet(block.best_bound);
 
  960  if(!grid_data->appx_best_bound.is_top()) {
 
  961    for(
int i = 0; i < grid_data->blocks.size(); ++i) {
 
  962      auto& block = grid_data->blocks[i];
 
  963      if(block.best_bound == grid_data->appx_best_bound
 
  964       && block.best_store->project(grid_data->obj_var).lb() == block.best_bound)
 
  966        block.best_store->copy_to(*root.best);
 
  973__global__ 
void deallocate_global_data(bt::unique_ptr<GridData, bt::global_allocator>* grid_data) {
 
  980#if defined(TURBO_IPC_ABSTRACT_DOMAIN) || !defined(__CUDACC__) 
  983#ifdef TURBO_IPC_ABSTRACT_DOMAIN 
  984  std::cerr << 
"-arch barebones does not support IPC abstract domain." << std::endl;
 
  986  std::cerr << 
"You must use a CUDA compiler (nvcc or clang) to compile Turbo on GPU." << std::endl;