Lattice Land Core Library
Loading...
Searching...
No Matches
fixpoint.hpp
Go to the documentation of this file.
1// Copyright 2022 Pierre Talbot
2
3#ifndef FIXPOINT_HPP
4#define FIXPOINT_HPP
5
6#include "logic/logic.hpp"
7#include "b.hpp"
8#include "battery/memory.hpp"
9#include "battery/vector.hpp"
10
11#ifdef __CUDACC__
12 #include <cooperative_groups.h>
13#endif
14
15namespace lala {
16
17/** A simple form of sequential fixpoint computation based on Kleene fixpoint.
18 * At each iteration, the deduction operations \f$ f_1, \ldots, f_n \f$ are simply composed by functional composition \f$ f = f_n \circ \ldots \circ f_1 \f$.
19 * This strategy basically corresponds to the Gauss-Seidel iteration method. */
21public:
22 CUDA void barrier() {}
23
24 template <class A>
25 CUDA local::B iterate(A& a) {
26 size_t n = a.num_deductions();
27 bool has_changed = false;
28 for(size_t i = 0; i < n; ++i) {
29 has_changed |= a.deduce(i);
30 }
31 return has_changed;
32 }
33
34 template <class A>
35 CUDA size_t fixpoint(A& a, local::B& has_changed) {
36 size_t iterations = 0;
37 local::B changed(true);
38 while(changed && !a.is_bot()) {
39 changed = iterate(a);
40 has_changed.join(changed);
41 iterations++;
42 }
43 return iterations;
44 }
45
46 template <class A>
47 CUDA local::B fixpoint(A& a) {
48 local::B has_changed(false);
49 fixpoint(a, has_changed);
50 return has_changed;
51 }
52};
53
54#ifdef __CUDACC__
55
56/** A simple form of fixpoint computation based on Kleene fixpoint.
57 * At each iteration, the deduction operations \f$ f_1, \ldots, f_n \f$ are composed by parallel composition \f$ f = f_1 \| \ldots \| f_n \f$ meaning they are executed in parallel by different threads.
58 * This is called an asynchronous iteration and it is due to (Cousot, Asynchronous iterative methods for solving a fixed point system of monotone equations in a complete lattice, 1977).
59 * The underlying lattice on which we iterate must provide two methods:
60 * - `a.deduce(int)`: call the ith deduction functions and returns `true` if `a` has changed.
61 * - `a.num_deductions()`: return the number of deduction functions.
62 * \tparam Group is a CUDA cooperative group class.
63 * \tparam Memory is an atomic memory, that must be compatible with the cooperative group chosen (e.g., don't use atomic_memory_block if the group contains multiple blocks). */
64template <class Group, class Memory>
65class AsynchronousIterationGPU {
66public:
67 using memory_type = Memory;
68 using group_type = Group;
69private:
70 using atomic_bool = B<memory_type>;
71 atomic_bool changed[3];
72 atomic_bool is_bot[3];
73 Group group;
74
75 CUDA void assert_cuda_arch() {
76 printf("AsynchronousIterationGPU must be used on the GPU device only.\n");
77 assert(0);
78 }
79
80 CUDA void reset() {
81 changed[0].join(true);
82 changed[1].meet(false);
83 changed[2].meet(false);
84 for(int i = 0; i < 3; ++i) {
85 is_bot[i].meet(false);
86 }
87 }
88
89public:
90 CUDA AsynchronousIterationGPU(const Group& group):
91 group(group)
92 {}
93
94 CUDA void barrier() {
95 #ifndef __CUDA_ARCH__
96 assert_cuda_arch();
97 #else
98 cooperative_groups::sync(group);
99 #endif
100 }
101
102 template <class A>
103 CUDA bool iterate(A& a) {
104 #ifndef __CUDA_ARCH__
105 assert_cuda_arch();
106 return false;
107 #else
108 size_t n = a.num_deductions();
109 bool has_changed = false;
110 for (size_t t = group.thread_rank(); t < n; t += group.num_threads()) {
111 has_changed |= a.deduce(t);
112 if((t-group.thread_rank()) + group.num_threads() < n) __syncwarp();
113 }
114 return has_changed;
115 #endif
116 }
117
118 template <class A, class M>
119 CUDA size_t fixpoint(A& a, B<M>& has_changed, volatile bool* stop) {
120 #ifndef __CUDA_ARCH__
121 assert_cuda_arch();
122 return 0;
123 #else
124 reset();
125 barrier();
126 size_t i;
127 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
128 changed[i%3].join(iterate(a));
129 changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
130 is_bot[i%3].join(a.is_bot());
131 is_bot[i%3].join(*stop);
132 barrier();
133 }
134 // It changes if we performed several iteration, or if the first iteration changed the abstract domain.
135 has_changed.join(changed[1]);
136 has_changed.join(changed[2]);
137 return i - 1;
138 #endif
139 }
140
141 template <class A>
142 CUDA size_t fixpoint(A& a) {
143 #ifndef __CUDA_ARCH__
144 assert_cuda_arch();
145 return 0;
146 #else
147 reset();
148 barrier();
149 size_t i;
150 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
151 changed[i%3].join(iterate(a));
152 changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
153 is_bot[i%3].join(a.is_bot());
154 barrier();
155 }
156 return i - 1;
157 #endif
158 }
159};
160
161// using BlockAsynchronousIterationGPU = AsynchronousIterationGPU<cooperative_groups::thread_block, battery::atomic_memory_block>;
162using GridAsynchronousIterationGPU = AsynchronousIterationGPU<cooperative_groups::grid_group, battery::atomic_memory_grid>;
163
164class BlockAsynchronousIterationGPU {
165public:
166 using memory_type = battery::atomic_memory_block;
167private:
168 using atomic_bool = B<memory_type>;
169 atomic_bool changed[3];
170 atomic_bool is_bot[3];
171
172 CUDA void assert_cuda_arch() {
173 printf("BlockAsynchronousIterationGPU must be used on the GPU device only.\n");
174 assert(0);
175 }
176
177 CUDA void reset() {
178 changed[0].join(true);
179 changed[1].meet(false);
180 changed[2].meet(false);
181 for(int i = 0; i < 3; ++i) {
182 is_bot[i].meet(false);
183 }
184 }
185
186public:
187 BlockAsynchronousIterationGPU() = default;
188
189 CUDA void barrier() {
190 #ifndef __CUDA_ARCH__
191 assert_cuda_arch();
192 #else
193 __syncthreads();
194 #endif
195 }
196
197 template <class A>
198 CUDA bool iterate(A& a) {
199 #ifndef __CUDA_ARCH__
200 assert_cuda_arch();
201 return false;
202 #else
203 size_t n = a.num_deductions();
204 bool has_changed = false;
205 for (size_t t = threadIdx.x; t < n; t += blockDim.x) {
206 has_changed |= a.deduce(t);
207 if((t-threadIdx.x) + blockDim.x < n) __syncwarp();
208 }
209 return has_changed;
210 #endif
211 }
212
213 template <class A, class M>
214 CUDA size_t fixpoint(A& a, B<M>& has_changed, volatile bool* stop) {
215 #ifndef __CUDA_ARCH__
216 assert_cuda_arch();
217 return 0;
218 #else
219 reset();
220 barrier();
221 size_t i;
222 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
223 changed[i%3].join(iterate(a));
224 changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
225 is_bot[i%3].join(a.is_bot());
226 is_bot[i%3].join(*stop);
227 barrier();
228 }
229 // It changes if we performed several iteration, or if the first iteration changed the abstract domain.
230 has_changed.join(changed[1]);
231 has_changed.join(changed[2]);
232 return i - 1;
233 #endif
234 }
235
236 template <class A>
237 CUDA size_t fixpoint(A& a) {
238 #ifndef __CUDA_ARCH__
239 assert_cuda_arch();
240 return 0;
241 #else
242 reset();
243 barrier();
244 size_t i;
245 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
246 changed[i%3].join(iterate(a));
247 changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
248 is_bot[i%3].join(a.is_bot());
249 barrier();
250 }
251 return i - 1;
252 #endif
253 }
254
255 template <class Alloc, class A>
256 CUDA bool iterate(const battery::vector<int, Alloc>& indexes, A& a) {
257 #ifndef __CUDA_ARCH__
258 assert_cuda_arch();
259 return false;
260 #else
261 assert(a.num_deductions() >= indexes.size());
262 bool has_changed = false;
263 for (size_t t = threadIdx.x; t < indexes.size(); t += blockDim.x) {
264 has_changed |= a.deduce(indexes[t]);
265 if((t-threadIdx.x) + blockDim.x < indexes.size()) __syncwarp();
266 }
267 return has_changed;
268 #endif
269 }
270
271 template <class Alloc, class A>
272 CUDA size_t fixpoint(const battery::vector<int, Alloc>& indexes, A& a) {
273 #ifndef __CUDA_ARCH__
274 assert_cuda_arch();
275 return 0;
276 #else
277 reset();
278 barrier();
279 size_t i;
280 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
281 changed[i%3].join(iterate(indexes, a));
282 changed[(i+1)%3].meet(false); // reinitialize changed for the next iteration.
283 is_bot[i%3].join(a.is_bot());
284 barrier();
285 }
286 return i - 1;
287 #endif
288 }
289};
290
291#endif
292
293} // namespace lala
294
295#endif
Definition b.hpp:10
Definition fixpoint.hpp:20
CUDA local::B iterate(A &a)
Definition fixpoint.hpp:25
CUDA local::B fixpoint(A &a)
Definition fixpoint.hpp:47
CUDA void barrier()
Definition fixpoint.hpp:22
CUDA size_t fixpoint(A &a, local::B &has_changed)
Definition fixpoint.hpp:35
Definition abstract_deps.hpp:14
CUDA constexpr auto meet(const Interval< L > &, const Interval< K > &)