Lattice Land Core Library
Loading...
Searching...
No Matches
fixpoint.hpp
Go to the documentation of this file.
1// Copyright 2022 Pierre Talbot
2
3#ifndef LALA_CORE_FIXPOINT_HPP
4#define LALA_CORE_FIXPOINT_HPP
5
6#include "logic/logic.hpp"
7#include "b.hpp"
8#include "battery/memory.hpp"
9#include "battery/vector.hpp"
10
11#ifdef __CUDACC__
12 #include <cooperative_groups.h>
13 #include <cub/block/block_scan.cuh>
14#endif
15
16namespace lala {
17
18/** A simple form of sequential fixpoint computation based on Kleene fixpoint.
19 * At each iteration, the deduction operations \f$ f_1, \ldots, f_n \f$ are simply composed by functional composition \f$ f = f_n \circ \ldots \circ f_1 \f$.
20 * This strategy basically corresponds to the Gauss-Seidel iteration method. */
22public:
23 CUDA void barrier() {}
24
25 /** We iterate the function `f` `n` times: \f$ f(0); f(1); \ldots ; f(n); \f$
26 * \param `n` the number of call to `f`.
27 * \param `bool f(int i)` returns `true` if something has changed for `i`.
28 * \return `true` if for some `i`, `f(i)` returned `true`, `false` otherwise.
29 */
30 template <class F>
31 CUDA local::B iterate(int n, const F& f) const {
32 bool has_changed = false;
33 for(int i = 0; i < n; ++i) {
34 has_changed |= f(i);
35 }
36 return has_changed;
37 }
38
39 /** We execute `iterate(n, f)` until we reach a fixpoint or `must_stop()` returns `true`.
40 * \param `n` the number of call to `f`.
41 * \param `bool f(int i)` returns `true` if something has changed for `i`.
42 * \param `bool must_stop()` returns `true` if we must stop early the fixpoint computation.
43 * \param `has_changed` is set to `true` if we were not yet in a fixpoint.
44 * \return The number of iterations required to reach a fixpoint or until `must_stop()` returns `true`.
45 */
46 template <class F, class StopFun, class M>
47 CUDA int fixpoint(int n, const F& f, const StopFun& must_stop, B<M>& has_changed) {
48 int iterations = 0;
49 local::B changed(true);
50 while(changed && !must_stop()) {
51 changed = iterate(n, f);
52 has_changed.join(changed);
53 iterations++;
54 }
55 return iterations;
56 }
57
58 /** Same as `fixpoint` above without `has_changed`. */
59 template <class F, class StopFun>
60 CUDA int fixpoint(int n, const F& f, const StopFun& must_stop) {
61 local::B has_changed(false);
62 return fixpoint(n, f, must_stop, has_changed);
63 }
64
65 /** Same as `fixpoint` above with `must_stop` always returning `false`. */
66 template <class F, class M>
67 CUDA int fixpoint(int n, const F& f, B<M>& has_changed) {
68 return fixpoint(n, f, [](){ return false; }, has_changed);
69 }
70
71 /** Same as `fixpoint` above without `has_changed` and with `must_stop` always returning `false`. */
72 template <class F>
73 CUDA int fixpoint(int n, const F& f) {
74 local::B has_changed(false);
75 return fixpoint(n, f, has_changed);
76 }
77};
78
79
80/** Add the ability to deactive functions in a fixpoint computation.
81 * Given a function `g`, we select only the functions \f$ f_{i_1} ; \ldots ; f_{i_k} \f$ for which \f$ g(i_k) \f$ is `true`, and compute subsequent fixpoint without them.
82 */
83template <class FixpointEngine>
85private:
86 FixpointEngine fp_engine;
87
88 /** The indexes of all functions. */
89 battery::vector<int> indexes;
90
91 /** The active subset of the functions is from 0..n-1. */
92 int n;
93
94public:
95 FixpointSubsetCPU(int n) : n(n), indexes(n) {
96 for(int i = 0; i < n; ++i) {
97 indexes[i] = i;
98 }
99 }
100
101 template <class F>
102 bool iterate(const F& f) {
103 return fp_engine.iterate(n, [&](int i) { return f(indexes[i]); });
104 }
105
106 template <class F>
107 int fixpoint(const F& f) {
108 return fp_engine.fixpoint(n, [&](int i) { return f(indexes[i]); });
109 }
110
111 template <class F, class StopFun>
112 int fixpoint(const F& f, const StopFun& g) {
113 return fp_engine.fixpoint(n, [&](int i) { return f(indexes[i]); }, g);
114 }
115
116 template <class F, class StopFun, class M>
117 int fixpoint(const F& f, const StopFun& g, B<M>& has_changed) {
118 return fp_engine.fixpoint(n, [&](int i) { return f(indexes[i]); }, g);
119 }
120
121 /** \return the number of active functions. */
122 int num_active() const {
123 return n;
124 }
125
126 void reset() {
127 n = indexes.size();
128 }
129
130 /** Compute the subset of the functions that are still active.
131 * The subsequent call to `fixpoint` will only consider the function `f_i` for which `g(i)` is `true`. */
132 template <class G>
133 void select(const G& g) {
134 for(int i = 0; i < n; ++i) {
135 if(!g(indexes[i])) {
136 battery::swap(indexes[i], indexes[--n]);
137 i--;
138 }
139 }
140 }
141
142 using snapshot_type = int;
143
145 return snapshot_type(n);
146 }
147
148 void restore(const snapshot_type& snap) {
149 n = snap;
150 }
151};
152
153#ifdef __CUDACC__
154
155/** This fixpoint engine is parametrized by an iterator engine `I` providing a method `iterate(n,f)`, a barrier `barrier()` and a function `is_thread0()` returning `true` for a single thread.
156 * AsynchronousFixpoint provides a `fixpoint` function using `iterate` of `I`. */
157template <class IteratorEngine>
158class AsynchronousFixpoint {
159 /** We do not use atomic because tearing is seemingly not possible in CUDA (according to information given by Nvidia engineers during a hackathon). */
160 local::B changed[3];
161 local::B stop[3];
162
163 CUDA void reset() {
164 if(is_thread0()) {
165 changed[0] = true;
166 changed[1] = false;
167 changed[2] = false;
168 for(int i = 0; i < 3; ++i) {
169 stop[i] = false;
170 }
171 }
172 }
173
174public:
175 CUDA INLINE bool is_thread0() {
176 return static_cast<IteratorEngine*>(this)->is_thread0();
177 }
178
179 CUDA INLINE void barrier() {
180 static_cast<IteratorEngine*>(this)->barrier();
181 }
182
183 template <class F>
184 CUDA INLINE local::B iterate(int n, const F& f) const {
185 return static_cast<const IteratorEngine*>(this)->iterate(n, f);
186 }
187
188 /** We execute `I::iterate(n, f)` until we reach a fixpoint or `must_stop()` returns `true`.
189 * \param `n` the number of call to `f`.
190 * \param `bool f(int i)` returns `true` if something has changed for `i`.
191 * \param `bool must_stop()` returns `true` if we must stop early the fixpoint computation. This function is called by the first thread only.
192 * \param `has_changed` is set to `true` if we were not yet in a fixpoint.
193 * \return The number of iterations required to reach a fixpoint or until `must_stop()` returns `true`.
194 */
195 template <class F, class StopFun, class M>
196 CUDA int fixpoint(int n, const F& f, const StopFun& must_stop, B<M>& has_changed) {
197 reset();
198 barrier();
199 int i;
200 for(i = 1; changed[(i-1)%3] && !stop[(i-1)%3]; ++i) {
201 changed[i%3].join(iterate(n, f));
202 if(is_thread0()) {
203 changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
204 stop[i%3].join(must_stop());
205 }
206 barrier();
207 }
208 // It changes if we performed several iteration, or if the first iteration changed the abstract domain.
209 if(is_thread0()) {
210 has_changed.join(changed[1] || i > 2);
211 }
212 return i - 1;
213 }
214
215 template <class F, class Iter, class StopFun>
216 CUDA int fixpoint(int n, const F& f, const Iter& h, const StopFun& must_stop) {
217 reset();
218 barrier();
219 int i;
220 for(i = 1; changed[(i-1)%3] && !stop[(i-1)%3]; ++i) {
221 changed[i%3].join(iterate(n, f));
222 if(is_thread0()) {
223 changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
224 stop[i%3].join(must_stop());
225 }
226 barrier();
227 h();
228 barrier();
229 }
230 return i - 1;
231 }
232
233 template <class Alloc, class F, class Iter, class StopFun>
234 CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, const Iter& h, const StopFun& must_stop) {
235 return fixpoint(indexes.size(), [&](int i) { return f(indexes[i]); }, h, must_stop);
236 }
237
238 /** Same as `fixpoint` above without `has_changed`. */
239 template <class F, class StopFun>
240 CUDA INLINE int fixpoint(int n, const F& f, const StopFun& must_stop) {
241 local::B has_changed(false);
242 return fixpoint(n, f, must_stop, has_changed);
243 }
244
245 /** Same as `fixpoint` above with `must_stop` always returning `false`. */
246 template <class F, class M>
247 CUDA INLINE int fixpoint(int n, const F& f, B<M>& has_changed) {
248 return fixpoint(n, f, [](){ return false; }, has_changed);
249 }
250
251 /** Same as `fixpoint` above without `has_changed` and with `must_stop` always returning `false`. */
252 template <class F>
253 CUDA INLINE int fixpoint(int n, const F& f) {
254 local::B has_changed(false);
255 return fixpoint(n, f, has_changed);
256 }
257
258 /** Same as `fixpoint` with a new function defined by `g(i) = f(indexes[i])` and `n = indexes.size()`. */
259 template <class Alloc, class F, class StopFun, class M>
260 CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, const StopFun& must_stop, B<M>& has_changed) {
261 return fixpoint(indexes.size(), [&](int i) { return f(indexes[i]); }, must_stop, has_changed);
262 }
263
264 /** Same as `fixpoint` with `g(i) = f(indexes[i])` and `n = indexes.size()`, without `has_changed`. */
265 template <class Alloc, class F, class StopFun>
266 CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, const StopFun& must_stop) {
267 local::B has_changed(false);
268 return fixpoint(indexes, f, must_stop, has_changed);
269 }
270
271 /** Same as `fixpoint` above with `must_stop` always returning `false`. */
272 template <class Alloc, class F, class M>
273 CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f, B<M>& has_changed) {
274 return fixpoint(indexes, f, [](){ return false; }, has_changed);
275 }
276
277 /** Same as `fixpoint` above without `has_changed` and with `must_stop` always returning `false`. */
278 template <class Alloc, class F>
279 CUDA INLINE int fixpoint(const battery::vector<int, Alloc>& indexes, const F& f) {
280 local::B has_changed(false);
281 return fixpoint(indexes, f, has_changed);
282 }
283};
284
285/** A simple form of fixpoint computation based on Kleene fixpoint.
286 * At each iteration, the functions \f$ f_1, \ldots, f_n \f$ are composed by parallel composition \f$ f = f_1 \| \ldots \| f_n \f$ meaning they are executed in parallel by different threads.
287 * This is called an asynchronous iteration and it is due to (Cousot, Asynchronous iterative methods for solving a fixed point system of monotone equations in a complete lattice, 1977).
288 * \tparam Group is a CUDA cooperative group class (note that we provide a more efficient implementation for block group below in `BlockAsynchronousIterationGPU`).
289*/
290template <class Group>
291class AsynchronousIterationGPU : public AsynchronousFixpoint<AsynchronousIterationGPU<Group>> {
292public:
293 using group_type = Group;
294private:
295 Group group;
296
297 CUDA void assert_cuda_arch() const {
298 printf("AsynchronousIterationGPU must be used on the GPU device only.\n");
299 assert(0);
300 }
301
302public:
303 CUDA AsynchronousIterationGPU(const Group& group):
304 group(group)
305 {}
306
307 CUDA void reset() const {}
308
309 CUDA INLINE bool is_thread0() const {
310 #ifndef __CUDA_ARCH__
311 assert_cuda_arch();
312 return false;
313 #else
314 return group.thread_rank() == 0;
315 #endif
316 }
317
318 /** A barrier used to synchronize the threads within the group between iterations. */
319 CUDA INLINE void barrier() {
320 #ifndef __CUDA_ARCH__
321 assert_cuda_arch();
322 #else
323 group.sync();
324 #endif
325 }
326
327 /** The function `f` is called `n` times in parallel: \f$ f(0) \| f(1) \| \ldots \| f(n) \f$.
328 * \param `n` the number of call to `f`.
329 * \param `bool f(int i)` returns `true` if something has changed for `i`.
330 * \return `true` if for some `i`, `f(i)` returned `true`, `false` otherwise.
331 */
332 template <class F>
333 CUDA INLINE bool iterate(int n, const F& f) const {
334 #ifndef __CUDA_ARCH__
335 assert_cuda_arch();
336 return false;
337 #else
338 bool has_changed = false;
339 for (int i = group.thread_rank(); i < n; i += group.num_threads()) {
340 has_changed |= f(i);
341 }
342 return has_changed;
343 #endif
344 }
345};
346
347using GridAsynchronousFixpointGPU = AsynchronousIterationGPU<cooperative_groups::grid_group>;
348
349/** An optimized version of `AsynchronousIterationGPU` when the fixpoint is computed on a single block.
350 * We avoid the use of cooperative groups which take extra memory space.
351 * `syncwarp` is a boolean to tell if `f` in `iterate` is syncing the warp or not, if it does and syncwarp is `true`, `iterate` will always iterate to a multiple of 32 threads by repeating the last index if necessary.
352 */
353template <bool syncwarp = false>
354class BlockAsynchronousFixpointGPU : public AsynchronousFixpoint<BlockAsynchronousFixpointGPU<syncwarp>> {
355private:
356 CUDA void assert_cuda_arch() const {
357 printf("BlockAsynchronousFixpointGPU must be used on the GPU device only.\n");
358 assert(0);
359 }
360
361public:
362 BlockAsynchronousFixpointGPU() = default;
363
364 CUDA INLINE bool is_thread0() const {
365 #ifndef __CUDA_ARCH__
366 assert_cuda_arch();
367 return false;
368 #else
369 return threadIdx.x == 0;
370 #endif
371 }
372
373 CUDA INLINE void barrier() {
374 #ifndef __CUDA_ARCH__
375 assert_cuda_arch();
376 #else
377 __syncthreads();
378 #endif
379 }
380
381 /** The function `f` is called `n` times in parallel: \f$ f(0) \| f(1) \| \ldots \| f(n-1) \f$.
382 * If `n` is greater than the number of threads in the block, we perform a stride loop, without synchronization between two iterations.
383 * \param `n` the number of calls to `f`.
384 * \param `bool f(int i)` returns `true` if something has changed for `i`.
385 * \return `true` if for some `i`, `f(i)` returned `true`, `false` otherwise.
386 */
387 template <class F>
388 CUDA INLINE bool iterate(int n, const F& f) const {
389 #ifndef __CUDA_ARCH__
390 assert_cuda_arch();
391 return false;
392 #else
393 bool has_changed = false;
394 int n2 = syncwarp && n != 0 ? max(n,n+(32-(n%32))) : n;
395 for (int i = threadIdx.x; i < n2; i += blockDim.x) {
396 has_changed |= f(syncwarp ? (i >= n ? n-1 : i) : i);
397 }
398 return has_changed;
399 #endif
400 }
401};
402
403#ifdef __CUDACC__
404
405/** This function can be passed to `iterate` of a fixpoint engine in order to perform a local fixpoint per warp.
406 * It expects the deduction operation to be split into a `load_deduce` and a `deduce`.
407 * Note that `load_deduce` can simply be the identity if it is not useful.
408 * TPB: the number of threads per block.
409*/
410template <int TPB, class A>
411__device__ local::B warp_fixpoint(A& a, int i, int* warp_iterations) {
412 auto ded = a.load_deduce(i);
413 local::B has_changed = false;
414 __shared__ bool warp_changed[TPB/32];
415 int warp_id = threadIdx.x / 32;
416 warp_changed[warp_id] = true;
417 while(warp_changed[warp_id]) {
418 __syncwarp();
419 warp_changed[warp_id] = false;
420 __syncwarp();
421 if(a.deduce(ded)) {
422 has_changed = true;
423 /** If something changed, we continue to iterate only if we did not reach bot. */
424 if(!a.is_bot()) {
425 warp_changed[warp_id] = true;
426 }
427 }
428 if(threadIdx.x % 32 == 0) {
429 warp_iterations[warp_id]++;
430 }
431 __syncwarp();
432 }
433 return has_changed;
434}
435
436#endif
437
438/** Add the ability to deactive functions in a fixpoint computation.
439 * Given a function `g`, we select only the functions \f$ f_{i_1} \| \ldots \| f_{i_k} \f$ for which \f$ g(i_k) \f$ is `true`, and compute subsequent fixpoint without them.
440 */
441template <class FixpointEngine, class Allocator, int TPB>
442class FixpointSubsetGPU {
443public:
444 using allocator_type = Allocator;
445
446private:
447 FixpointEngine fp_engine;
448
449 /** The indexes of functions that are active. */
450 battery::vector<int, allocator_type> indexes;
451
452 /** A mask to know which functions are still active.
453 * We have `mask[i] <=> g(indexes[i])`.
454 */
455 battery::vector<bool, allocator_type> mask;
456
457 /** A temporary array to compute the prefix sum of `mask`, in order to copy indexes into `indexes2`. */
458 battery::vector<int, allocator_type> sum;
459
460 /** A temporary array when copying the new active functions. */
461 battery::vector<int, allocator_type> indexes2;
462
463 /** The CUB prefix sum temporary storage. */
464 using BlockScan = cub::BlockScan<int, TPB>;
465 typename BlockScan::TempStorage cub_prefixsum_tmp;
466
467 // We round n to the next multiple of TPB (the maximum dimension of the block, for now).
468 __device__ INLINE int round_multiple_TPB(int n) {
469 return n + ((blockDim.x - n % blockDim.x) % blockDim.x);
470 }
471
472public:
473 FixpointSubsetGPU() = default;
474
475 __device__ void reset(int n) {
476 if(threadIdx.x == 0) {
477 indexes.resize(n);
478 indexes2.resize(n);
479 }
480 __syncthreads();
481 for(int i = threadIdx.x; i < indexes.size(); i += blockDim.x) {
482 indexes[i] = i;
483 }
484 }
485
486 __device__ void init(int n, const allocator_type& allocator = allocator_type()) {
487 if(threadIdx.x == 0) {
488 indexes = battery::vector<int, allocator_type>(n, allocator);
489 indexes2 = battery::vector<int, allocator_type>(n, allocator);
490 mask = battery::vector<bool, allocator_type>(round_multiple_TPB(n), false, allocator);
491 sum = battery::vector<int, allocator_type>(round_multiple_TPB(n), 0, allocator);
492 }
493 __syncthreads();
494 for(int i = threadIdx.x; i < indexes.size(); i += blockDim.x) {
495 indexes[i] = i;
496 }
497 }
498
499 __device__ void destroy() {
500 if(threadIdx.x == 0) {
501 indexes = battery::vector<int, allocator_type>();
502 indexes2 = battery::vector<int, allocator_type>();
503 mask = battery::vector<bool, allocator_type>();
504 sum = battery::vector<int, allocator_type>();
505 }
506 __syncthreads();
507 }
508
509 CUDA INLINE bool is_thread0() const {
510 return fp_engine.is_thread0();
511 }
512
513 CUDA INLINE void barrier() {
514 fp_engine.barrier();
515 }
516
517 template <class F>
518 CUDA INLINE bool iterate(const F& f) {
519 return fp_engine.iterate(indexes, f);
520 }
521
522 template <class F>
523 CUDA INLINE int fixpoint(const F& f) {
524 return fp_engine.fixpoint(indexes, f);
525 }
526
527 template <class F, class StopFun>
528 CUDA INLINE int fixpoint(const F& f, const StopFun& g) {
529 return fp_engine.fixpoint(indexes, f, g);
530 }
531
532 template <class F, class Iter, class StopFun>
533 CUDA INLINE int fixpoint(const F& f, const Iter& h, const StopFun& g) {
534 return fp_engine.fixpoint(indexes, f, h, g);
535 }
536
537 template <class F, class StopFun, class M>
538 CUDA INLINE int fixpoint(const F& f, const StopFun& g, B<M>& has_changed) {
539 return fp_engine.fixpoint(indexes, f, g);
540 }
541
542 /** \return the number of active functions. */
543 CUDA int num_active() const {
544 return indexes.size();
545 }
546
547 /** Compute the subset of the functions that are still active.
548 * The subsequent call to `fixpoint` will only consider the function `f_i` for which `g(i)` is `true`. */
549 template <class G>
550 __device__ void select(const G& g) {
551 assert(TPB == blockDim.x);
552 // indexes: 0 1 2 3 (indexes of the propagators)
553 // mask: 1 0 0 1 (filtering entailed functions)
554 // sum: 1 1 1 2 (inclusive prefix sum)
555 // indexes2: 0 3 (new indexes of the propagators)
556 if(indexes.size() == 0) {
557 return;
558 }
559
560 /** I. We perform a parallel map to detect the active functions. */
561 for(int i = threadIdx.x; i < indexes.size(); i += blockDim.x) {
562 mask[i] = g(indexes[i]);
563 }
564
565 /** II. We then compute the prefix sum of the mask in order to compute the new indexes of the active functions. */
566 int n = round_multiple_TPB(indexes.size());
567 for(int i = threadIdx.x; i < n; i += blockDim.x) {
568 BlockScan(cub_prefixsum_tmp).InclusiveSum(mask[i], sum[i]);
569 __syncthreads(); // required by BlockScan to reuse the temporary storage.
570 }
571 for(int i = blockDim.x + threadIdx.x; i < n; i += blockDim.x) {
572 sum[i] += sum[i - threadIdx.x - 1];
573 __syncthreads();
574 }
575
576 /** III. We compute the new indexes of the active functions. */
577 if(threadIdx.x == 0) {
578 battery::swap(indexes, indexes2);
579 indexes.resize(sum[indexes2.size()-1]);
580 }
581 __syncthreads();
582 for(int i = threadIdx.x; i < indexes2.size(); i += blockDim.x) {
583 if(mask[i]) {
584 indexes[sum[i]-1] = indexes2[i];
585 }
586 }
587 }
588
589 template <class Alloc = allocator_type>
590 using snapshot_type = battery::vector<int, Alloc>;
591
592 template <class Alloc = allocator_type>
593 CUDA snapshot_type<Alloc> snapshot(const Alloc& alloc = Alloc()) const {
594 return snapshot_type<Alloc>(indexes, alloc);
595 }
596
597 template <class Alloc>
598 __device__ void restore_par(const snapshot_type<Alloc>& snap) {
599 for(int i = threadIdx.x; i < snap.size(); i += blockDim.x) {
600 indexes[i] = snap[i];
601 }
602 if(threadIdx.x == 0) {
603 assert(snap.size() < indexes.capacity());
604 indexes.resize(snap.size());
605 }
606 }
607};
608
609#endif
610
611} // namespace lala
612
613#endif
Definition b.hpp:10
Definition fixpoint.hpp:84
int fixpoint(const F &f)
Definition fixpoint.hpp:107
bool iterate(const F &f)
Definition fixpoint.hpp:102
void reset()
Definition fixpoint.hpp:126
int fixpoint(const F &f, const StopFun &g)
Definition fixpoint.hpp:112
FixpointSubsetCPU(int n)
Definition fixpoint.hpp:95
int num_active() const
Definition fixpoint.hpp:122
int snapshot_type
Definition fixpoint.hpp:142
snapshot_type snapshot() const
Definition fixpoint.hpp:144
int fixpoint(const F &f, const StopFun &g, B< M > &has_changed)
Definition fixpoint.hpp:117
void select(const G &g)
Definition fixpoint.hpp:133
void restore(const snapshot_type &snap)
Definition fixpoint.hpp:148
Definition fixpoint.hpp:21
CUDA int fixpoint(int n, const F &f, const StopFun &must_stop, B< M > &has_changed)
Definition fixpoint.hpp:47
CUDA int fixpoint(int n, const F &f, B< M > &has_changed)
Definition fixpoint.hpp:67
CUDA int fixpoint(int n, const F &f)
Definition fixpoint.hpp:73
CUDA int fixpoint(int n, const F &f, const StopFun &must_stop)
Definition fixpoint.hpp:60
CUDA void barrier()
Definition fixpoint.hpp:23
CUDA local::B iterate(int n, const F &f) const
Definition fixpoint.hpp:31
::lala::B<::battery::local_memory > B
Definition b.hpp:12
Definition abstract_deps.hpp:14