Lattice Land Core Library
fixpoint.hpp
Go to the documentation of this file.
1 // Copyright 2022 Pierre Talbot
2 
3 #ifndef LALA_CORE_FIXPOINT_HPP
4 #define LALA_CORE_FIXPOINT_HPP
5 
6 #include "logic/logic.hpp"
7 #include "b.hpp"
8 #include "battery/memory.hpp"
9 #include "battery/vector.hpp"
10 
11 #ifdef __CUDACC__
12  #include <cooperative_groups.h>
13  #include <cub/block/block_scan.cuh>
14 #endif
15 
16 namespace lala {
17 
18 /** A simple form of sequential fixpoint computation based on Kleene fixpoint.
19  * At each iteration, the deduction operations \f$ f_1, \ldots, f_n \f$ are simply composed by functional composition \f$ f = f_n \circ \ldots \circ f_1 \f$.
20  * This strategy basically corresponds to the Gauss-Seidel iteration method. */
22 public:
23  CUDA void barrier() {}
24 
25  /** We iterate the function `f` `n` times: \f$ f(0); f(1); \ldots ; f(n); \f$
26  * \param `n` the number of call to `f`.
27  * \param `bool f(int i)` returns `true` if something has changed for `i`.
28  * \return `true` if for some `i`, `f(i)` returned `true`, `false` otherwise.
29  */
30  template <class F>
31  CUDA local::B iterate(int n, const F& f) const {
32  bool has_changed = false;
33  for(int i = 0; i < n; ++i) {
34  has_changed |= f(i);
35  }
36  return has_changed;
37  }
38 
39  /** We execute `iterate(n, f)` until we reach a fixpoint or `must_stop()` returns `true`.
40  * \param `n` the number of call to `f`.
41  * \param `bool f(int i)` returns `true` if something has changed for `i`.
42  * \param `bool must_stop()` returns `true` if we must stop early the fixpoint computation.
43  * \param `has_changed` is set to `true` if we were not yet in a fixpoint.
44  * \return The number of iterations required to reach a fixpoint or until `must_stop()` returns `true`.
45  */
46  template <class F, class StopFun, class M>
47  CUDA int fixpoint(int n, const F& f, const StopFun& must_stop, B<M>& has_changed) {
48  int iterations = 0;
49  local::B changed(true);
50  while(changed && !must_stop()) {
51  changed = iterate(n, f);
52  has_changed.join(changed);
53  iterations++;
54  }
55  return iterations;
56  }
57 
58  /** Same as `fixpoint` above without `has_changed`. */
59  template <class F, class StopFun>
60  CUDA int fixpoint(int n, const F& f, const StopFun& must_stop) {
61  local::B has_changed(false);
62  return fixpoint(n, f, must_stop, has_changed);
63  }
64 
65  /** Same as `fixpoint` above with `must_stop` always returning `false`. */
66  template <class F, class M>
67  CUDA int fixpoint(int n, const F& f, B<M>& has_changed) {
68  return fixpoint(n, f, [](){ return false; }, has_changed);
69  }
70 
71  /** Same as `fixpoint` above without `has_changed` and with `must_stop` always returning `false`. */
72  template <class F>
73  CUDA int fixpoint(int n, const F& f) {
74  local::B has_changed(false);
75  return fixpoint(n, f, has_changed);
76  }
77 };
78 
79 
80 /** Add the ability to deactive functions in a fixpoint computation.
81  * Given a function `g`, we select only the functions \f$ f_{i_1} ; \ldots ; f_{i_k} \f$ for which \f$ g(i_k) \f$ is `true`, and compute subsequent fixpoint without them.
82  */
83 template <class FixpointEngine>
85 private:
86  FixpointEngine fp_engine;
87 
88  /** The indexes of all functions. */
89  battery::vector<int> indexes;
90 
91  /** The active subset of the functions is from 0..n-1. */
92  int n;
93 
94 public:
95  FixpointSubsetCPU(int n) : n(n), indexes(n) {
96  for(int i = 0; i < n; ++i) {
97  indexes[i] = i;
98  }
99  }
100 
101  template <class F>
102  bool iterate(const F& f) {
103  return fp_engine.iterate(n, [&](int i) { return f(indexes[i]); });
104  }
105 
106  template <class F>
107  int fixpoint(const F& f) {
108  return fp_engine.fixpoint(n, [&](int i) { return f(indexes[i]); });
109  }
110 
111  template <class F, class StopFun>
112  int fixpoint(const F& f, const StopFun& g) {
113  return fp_engine.fixpoint(n, [&](int i) { return f(indexes[i]); }, g);
114  }
115 
116  template <class F, class StopFun, class M>
117  int fixpoint(const F& f, const StopFun& g, B<M>& has_changed) {
118  return fp_engine.fixpoint(n, [&](int i) { return f(indexes[i]); }, g);
119  }
120 
121  /** \return the number of active functions. */
122  int num_active() const {
123  return n;
124  }
125 
126  void reset() {
127  n = indexes.size();
128  }
129 
130  /** Compute the subset of the functions that are still active.
131  * The subsequent call to `fixpoint` will only consider the function `f_i` for which `g(i)` is `true`. */
132  template <class G>
133  void select(const G& g) {
134  for(int i = 0; i < n; ++i) {
135  if(!g(indexes[i])) {
136  battery::swap(indexes[i], indexes[--n]);
137  i--;
138  }
139  }
140  }
141 
142  using snapshot_type = int;
143 
145  return snapshot_type(n);
146  }
147 
148  void restore(const snapshot_type& snap) {
149  n = snap;
150  }
151 };
152 
153 #ifdef __CUDACC__
154 
155 /** This fixpoint engine is parametrized by an iterator engine `I` providing a method `iterate(n,f)`, a barrier `barrier()` and a function `is_thread0()` returning `true` for a single thread.
156  * AsynchronousFixpoint provides a `fixpoint` function using `iterate` of `I`. */
157 template <class IteratorEngine>
158 class AsynchronousFixpoint {
159  /** We do not use atomic because tearing is seemingly not possible in CUDA (according to information given by Nvidia engineers during a hackathon). */
160  local::B changed[3];
161  local::B stop[3];
162 
163  CUDA void reset() {
164  if(is_thread0()) {
165  changed[0] = true;
166  changed[1] = false;
167  changed[2] = false;
168  for(int i = 0; i < 3; ++i) {
169  stop[i] = false;
170  }
171  }
172  }
173 
174 public:
175  CUDA INLINE bool is_thread0() {
176  return static_cast<IteratorEngine*>(this)->is_thread0();
177  }
178 
179  CUDA INLINE void barrier() {
180  static_cast<IteratorEngine*>(this)->barrier();
181  }
182 
183  template <class F>
184  CUDA INLINE local::B iterate(int n, const F& f) const {
185  return static_cast<const IteratorEngine*>(this)->iterate(n, f);
186  }
187 
188  /** We execute `I::iterate(n, f)` until we reach a fixpoint or `must_stop()` returns `true`.
189  * \param `n` the number of call to `f`.
190  * \param `bool f(int i)` returns `true` if something has changed for `i`.
191  * \param `bool must_stop()` returns `true` if we must stop early the fixpoint computation. This function is called by the first thread only.
192  * \param `has_changed` is set to `true` if we were not yet in a fixpoint.
193  * \return The number of iterations required to reach a fixpoint or until `must_stop()` returns `true`.
194  */
195  template <class F, class StopFun, class M>
196  CUDA int fixpoint(int n, const F& f, const StopFun& must_stop, B<M>& has_changed) {
197  reset();
198  barrier();
199  int i;
200  for(i = 1; changed[(i-1)%3] && !stop[(i-1)%3]; ++i) {
201  changed[i%3].join(iterate(n, f));
202  if(is_thread0()) {
203  changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
204  stop[i%3].join(must_stop());
205  }
206  barrier();
207  }
208  // It changes if we performed several iteration, or if the first iteration changed the abstract domain.
209  if(is_thread0()) {
210  has_changed.join(changed[1] || i > 2);
211  }
212  return i - 1;
213  }
214 
215  template <class F, class Iter, class StopFun>
216  CUDA int fixpoint(int n, const F& f, const Iter& h, const StopFun& must_stop) {
217  reset();
218  barrier();
219  int i;
220  for(i = 1; changed[(i-1)%3] && !stop[(i-1)%3]; ++i) {
221  changed[i%3].join(iterate(n, f));
222  if(is_thread0()) {
223  changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
224  stop[i%3].join(must_stop());
225  }
226  barrier();
227  h();
228  barrier();
229  }
230  return i - 1;
231  }
232 
233  template <class Alloc, class F, class Iter, class StopFun>
234  CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, const Iter& h, const StopFun& must_stop) {
235  return fixpoint(indexes.size(), [&](int i) { return f(indexes[i]); }, h, must_stop);
236  }
237 
238  /** Same as `fixpoint` above without `has_changed`. */
239  template <class F, class StopFun>
240  CUDA INLINE int fixpoint(int n, const F& f, const StopFun& must_stop) {
241  local::B has_changed(false);
242  return fixpoint(n, f, must_stop, has_changed);
243  }
244 
245  /** Same as `fixpoint` above with `must_stop` always returning `false`. */
246  template <class F, class M>
247  CUDA INLINE int fixpoint(int n, const F& f, B<M>& has_changed) {
248  return fixpoint(n, f, [](){ return false; }, has_changed);
249  }
250 
251  /** Same as `fixpoint` above without `has_changed` and with `must_stop` always returning `false`. */
252  template <class F>
253  CUDA INLINE int fixpoint(int n, const F& f) {
254  local::B has_changed(false);
255  return fixpoint(n, f, has_changed);
256  }
257 
258  /** Same as `fixpoint` with a new function defined by `g(i) = f(indexes[i])` and `n = indexes.size()`. */
259  template <class Alloc, class F, class StopFun, class M>
260  CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, const StopFun& must_stop, B<M>& has_changed) {
261  return fixpoint(indexes.size(), [&](int i) { return f(indexes[i]); }, must_stop, has_changed);
262  }
263 
264  /** Same as `fixpoint` with `g(i) = f(indexes[i])` and `n = indexes.size()`, without `has_changed`. */
265  template <class Alloc, class F, class StopFun>
266  CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, const StopFun& must_stop) {
267  local::B has_changed(false);
268  return fixpoint(indexes, f, must_stop, has_changed);
269  }
270 
271  /** Same as `fixpoint` above with `must_stop` always returning `false`. */
272  template <class Alloc, class F, class M>
273  CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, B<M>& has_changed) {
274  return fixpoint(indexes, f, [](){ return false; }, has_changed);
275  }
276 
277  /** Same as `fixpoint` above without `has_changed` and with `must_stop` always returning `false`. */
278  template <class Alloc, class F>
279  CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f) {
280  local::B has_changed(false);
281  return fixpoint(indexes, f, has_changed);
282  }
283 };
284 
285 /** A simple form of fixpoint computation based on Kleene fixpoint.
286  * At each iteration, the functions \f$ f_1, \ldots, f_n \f$ are composed by parallel composition \f$ f = f_1 \| \ldots \| f_n \f$ meaning they are executed in parallel by different threads.
287  * This is called an asynchronous iteration and it is due to (Cousot, Asynchronous iterative methods for solving a fixed point system of monotone equations in a complete lattice, 1977).
288  * \tparam Group is a CUDA cooperative group class (note that we provide a more efficient implementation for block group below in `BlockAsynchronousIterationGPU`).
289 */
290 template <class Group>
291 class AsynchronousIterationGPU : public AsynchronousFixpoint<AsynchronousIterationGPU<Group>> {
292 public:
293  using group_type = Group;
294 private:
295  Group group;
296 
297  CUDA void assert_cuda_arch() const {
298  printf("AsynchronousIterationGPU must be used on the GPU device only.\n");
299  assert(0);
300  }
301 
302 public:
303  CUDA AsynchronousIterationGPU(const Group& group):
304  group(group)
305  {}
306 
307  CUDA void reset() const {}
308 
309  CUDA INLINE bool is_thread0() const {
310  #ifndef __CUDA_ARCH__
311  assert_cuda_arch();
312  return false;
313  #else
314  return group.thread_rank() == 0;
315  #endif
316  }
317 
318  /** A barrier used to synchronize the threads within the group between iterations. */
319  CUDA INLINE void barrier() {
320  #ifndef __CUDA_ARCH__
321  assert_cuda_arch();
322  #else
323  group.sync();
324  #endif
325  }
326 
327  /** The function `f` is called `n` times in parallel: \f$ f(0) \| f(1) \| \ldots \| f(n) \f$.
328  * \param `n` the number of call to `f`.
329  * \param `bool f(int i)` returns `true` if something has changed for `i`.
330  * \return `true` if for some `i`, `f(i)` returned `true`, `false` otherwise.
331  */
332  template <class F>
333  CUDA INLINE bool iterate(int n, const F& f) const {
334  #ifndef __CUDA_ARCH__
335  assert_cuda_arch();
336  return false;
337  #else
338  bool has_changed = false;
339  for (int i = group.thread_rank(); i < n; i += group.num_threads()) {
340  has_changed |= f(i);
341  }
342  return has_changed;
343  #endif
344  }
345 };
346 
347 using GridAsynchronousFixpointGPU = AsynchronousIterationGPU<cooperative_groups::grid_group>;
348 
349 /** An optimized version of `AsynchronousIterationGPU` when the fixpoint is computed on a single block.
350  * We avoid the use of cooperative groups which take extra memory space.
351  * `syncwarp` is a boolean to tell if `f` in `iterate` is syncing the warp or not, if it does and syncwarp is `true`, `iterate` will always iterate to a multiple of 32 threads by repeating the last index if necessary.
352  */
353 template <bool syncwarp = false>
354 class BlockAsynchronousFixpointGPU : public AsynchronousFixpoint<BlockAsynchronousFixpointGPU<syncwarp>> {
355 private:
356  CUDA void assert_cuda_arch() const {
357  printf("BlockAsynchronousFixpointGPU must be used on the GPU device only.\n");
358  assert(0);
359  }
360 
361 public:
362  BlockAsynchronousFixpointGPU() = default;
363 
364  CUDA INLINE bool is_thread0() const {
365  #ifndef __CUDA_ARCH__
366  assert_cuda_arch();
367  return false;
368  #else
369  return threadIdx.x == 0;
370  #endif
371  }
372 
373  CUDA INLINE void barrier() {
374  #ifndef __CUDA_ARCH__
375  assert_cuda_arch();
376  #else
377  __syncthreads();
378  #endif
379  }
380 
381  /** The function `f` is called `n` times in parallel: \f$ f(0) \| f(1) \| \ldots \| f(n-1) \f$.
382  * If `n` is greater than the number of threads in the block, we perform a stride loop, without synchronization between two iterations.
383  * \param `n` the number of calls to `f`.
384  * \param `bool f(int i)` returns `true` if something has changed for `i`.
385  * \return `true` if for some `i`, `f(i)` returned `true`, `false` otherwise.
386  */
387  template <class F>
388  CUDA INLINE bool iterate(int n, const F& f) const {
389  #ifndef __CUDA_ARCH__
390  assert_cuda_arch();
391  return false;
392  #else
393  bool has_changed = false;
394  int n2 = syncwarp ? max(n,n+(32-(n%32))) : n;
395  for (int i = threadIdx.x; i < n2; i += blockDim.x) {
396  has_changed |= f(syncwarp ? (i >= n ? n-1 : i) : i);
397  }
398  return has_changed;
399  #endif
400  }
401 };
402 
403 #ifdef __CUDACC__
404 
405 /** This function can be passed to `iterate` of a fixpoint engine in order to perform a local fixpoint per warp.
406  * It expects the deduction operation to be split into a `load_deduce` and a `deduce`.
407  * TPB: the number of threads per block.
408 */
409 template <int TPB, class A>
410 __device__ local::B warp_fixpoint(A& a, int i) {
411  auto ded = a.load_deduce(i);
412  local::B has_changed = false;
413  __shared__ bool warp_changed[TPB/32];
414  int warp_id = threadIdx.x / 32;
415  warp_changed[warp_id] = true;
416  while(warp_changed[warp_id]) {
417  __syncwarp();
418  warp_changed[warp_id] = false;
419  __syncwarp();
420  if(a.deduce(ded)) {
421  has_changed = true;
422  /** If something changed, we continue to iterate only if we did not reach bot. */
423  if(!a.is_bot()) {
424  warp_changed[warp_id] = true;
425  }
426  }
427  __syncwarp();
428  }
429  return has_changed;
430 }
431 
432 #endif
433 
434 /** Add the ability to deactive functions in a fixpoint computation.
435  * Given a function `g`, we select only the functions \f$ f_{i_1} \| \ldots \| f_{i_k} \f$ for which \f$ g(i_k) \f$ is `true`, and compute subsequent fixpoint without them.
436  */
437 template <class FixpointEngine, class Allocator, int TPB>
438 class FixpointSubsetGPU {
439 public:
440  using allocator_type = Allocator;
441 
442 private:
443  FixpointEngine fp_engine;
444 
445  /** The indexes of functions that are active. */
446  battery::vector<int, allocator_type> indexes;
447 
448  /** A mask to know which functions are still active.
449  * We have `mask[i] <=> g(indexes[i])`.
450  */
451  battery::vector<bool, allocator_type> mask;
452 
453  /** A temporary array to compute the prefix sum of `mask`, in order to copy indexes into `indexes2`. */
454  battery::vector<int, allocator_type> sum;
455 
456  /** A temporary array when copying the new active functions. */
457  battery::vector<int, allocator_type> indexes2;
458 
459  /** The CUB prefix sum temporary storage. */
460  using BlockScan = cub::BlockScan<int, TPB>;
461  typename BlockScan::TempStorage cub_prefixsum_tmp;
462 
463  // We round n to the next multiple of TPB (the maximum dimension of the block, for now).
464  __device__ INLINE int round_multiple_TPB(int n) {
465  return n + ((blockDim.x - n % blockDim.x) % blockDim.x);
466  }
467 
468 public:
469  FixpointSubsetGPU() = default;
470 
471  __device__ void reset(int n) {
472  if(threadIdx.x == 0) {
473  indexes.resize(n);
474  indexes2.resize(n);
475  }
476  __syncthreads();
477  for(int i = threadIdx.x; i < indexes.size(); i += blockDim.x) {
478  indexes[i] = i;
479  }
480  }
481 
482  __device__ void init(int n, const allocator_type& allocator = allocator_type()) {
483  if(threadIdx.x == 0) {
484  indexes = battery::vector<int, allocator_type>(n, allocator);
485  indexes2 = battery::vector<int, allocator_type>(n, allocator);
486  mask = battery::vector<bool, allocator_type>(round_multiple_TPB(n), false, allocator);
487  sum = battery::vector<int, allocator_type>(round_multiple_TPB(n), 0, allocator);
488  }
489  __syncthreads();
490  for(int i = threadIdx.x; i < indexes.size(); i += blockDim.x) {
491  indexes[i] = i;
492  }
493  }
494 
495  __device__ void destroy() {
496  if(threadIdx.x == 0) {
497  indexes = battery::vector<int, allocator_type>();
498  indexes2 = battery::vector<int, allocator_type>();
499  mask = battery::vector<bool, allocator_type>();
500  sum = battery::vector<int, allocator_type>();
501  }
502  __syncthreads();
503  }
504 
505  CUDA INLINE bool is_thread0() const {
506  return fp_engine.is_thread0();
507  }
508 
509  CUDA INLINE void barrier() {
510  fp_engine.barrier();
511  }
512 
513  template <class F>
514  CUDA INLINE bool iterate(const F& f) {
515  return fp_engine.iterate(indexes, f);
516  }
517 
518  template <class F>
519  CUDA INLINE int fixpoint(const F& f) {
520  return fp_engine.fixpoint(indexes, f);
521  }
522 
523  template <class F, class StopFun>
524  CUDA INLINE int fixpoint(const F& f, const StopFun& g) {
525  return fp_engine.fixpoint(indexes, f, g);
526  }
527 
528  template <class F, class Iter, class StopFun>
529  CUDA INLINE int fixpoint(const F& f, const Iter& h, const StopFun& g) {
530  return fp_engine.fixpoint(indexes, f, h, g);
531  }
532 
533  template <class F, class StopFun, class M>
534  CUDA INLINE int fixpoint(const F& f, const StopFun& g, B<M>& has_changed) {
535  return fp_engine.fixpoint(indexes, f, g);
536  }
537 
538  /** \return the number of active functions. */
539  CUDA int num_active() const {
540  return indexes.size();
541  }
542 
543  /** Compute the subset of the functions that are still active.
544  * The subsequent call to `fixpoint` will only consider the function `f_i` for which `g(i)` is `true`. */
545  template <class G>
546  __device__ void select(const G& g) {
547  assert(TPB == blockDim.x);
548  // indexes: 0 1 2 3 (indexes of the propagators)
549  // mask: 1 0 0 1 (filtering entailed functions)
550  // sum: 1 1 1 2 (inclusive prefix sum)
551  // indexes2: 0 3 (new indexes of the propagators)
552  if(indexes.size() == 0) {
553  return;
554  }
555 
556  /** I. We perform a parallel map to detect the active functions. */
557  for(int i = threadIdx.x; i < indexes.size(); i += blockDim.x) {
558  mask[i] = g(indexes[i]);
559  }
560 
561  /** II. We then compute the prefix sum of the mask in order to compute the new indexes of the active functions. */
562  int n = round_multiple_TPB(indexes.size());
563  for(int i = threadIdx.x; i < n; i += blockDim.x) {
564  BlockScan(cub_prefixsum_tmp).InclusiveSum(mask[i], sum[i]);
565  __syncthreads(); // required by BlockScan to reuse the temporary storage.
566  }
567  for(int i = blockDim.x + threadIdx.x; i < n; i += blockDim.x) {
568  sum[i] += sum[i - threadIdx.x - 1];
569  __syncthreads();
570  }
571 
572  /** III. We compute the new indexes of the active functions. */
573  if(threadIdx.x == 0) {
574  battery::swap(indexes, indexes2);
575  indexes.resize(sum[indexes2.size()-1]);
576  }
577  __syncthreads();
578  for(int i = threadIdx.x; i < indexes2.size(); i += blockDim.x) {
579  if(mask[i]) {
580  indexes[sum[i]-1] = indexes2[i];
581  }
582  }
583  }
584 
585  template <class Alloc = allocator_type>
586  using snapshot_type = battery::vector<int, Alloc>;
587 
588  template <class Alloc = allocator_type>
589  CUDA snapshot_type<Alloc> snapshot(const Alloc& alloc = Alloc()) const {
590  return snapshot_type<Alloc>(indexes, alloc);
591  }
592 
593  template <class Alloc>
594  __device__ void restore_par(const snapshot_type<Alloc>& snap) {
595  for(int i = threadIdx.x; i < snap.size(); i += blockDim.x) {
596  indexes[i] = snap[i];
597  }
598  if(threadIdx.x == 0) {
599  assert(snap.size() < indexes.capacity());
600  indexes.resize(snap.size());
601  }
602  }
603 };
604 
605 #endif
606 
607 } // namespace lala
608 
609 #endif
Definition: b.hpp:10
Definition: fixpoint.hpp:84
int fixpoint(const F &f)
Definition: fixpoint.hpp:107
bool iterate(const F &f)
Definition: fixpoint.hpp:102
void reset()
Definition: fixpoint.hpp:126
int fixpoint(const F &f, const StopFun &g)
Definition: fixpoint.hpp:112
FixpointSubsetCPU(int n)
Definition: fixpoint.hpp:95
int num_active() const
Definition: fixpoint.hpp:122
int snapshot_type
Definition: fixpoint.hpp:142
snapshot_type snapshot() const
Definition: fixpoint.hpp:144
int fixpoint(const F &f, const StopFun &g, B< M > &has_changed)
Definition: fixpoint.hpp:117
void select(const G &g)
Definition: fixpoint.hpp:133
void restore(const snapshot_type &snap)
Definition: fixpoint.hpp:148
Definition: fixpoint.hpp:21
CUDA int fixpoint(int n, const F &f, const StopFun &must_stop, B< M > &has_changed)
Definition: fixpoint.hpp:47
CUDA int fixpoint(int n, const F &f, B< M > &has_changed)
Definition: fixpoint.hpp:67
CUDA int fixpoint(int n, const F &f)
Definition: fixpoint.hpp:73
CUDA int fixpoint(int n, const F &f, const StopFun &must_stop)
Definition: fixpoint.hpp:60
CUDA void barrier()
Definition: fixpoint.hpp:23
CUDA local::B iterate(int n, const F &f) const
Definition: fixpoint.hpp:31
::lala::B<::battery::local_memory > B
Definition: b.hpp:12
Definition: abstract_deps.hpp:14