8#include "battery/memory.hpp"
9#include "battery/vector.hpp"
12 #include <cooperative_groups.h>
26 size_t n = a.num_deductions();
27 bool has_changed =
false;
28 for(
size_t i = 0; i < n; ++i) {
29 has_changed |= a.deduce(i);
36 size_t iterations = 0;
38 while(changed && !a.is_bot()) {
40 has_changed.join(changed);
64template <
class Group,
class Memory>
65class AsynchronousIterationGPU {
67 using memory_type = Memory;
68 using group_type = Group;
70 using atomic_bool = B<memory_type>;
71 atomic_bool changed[3];
72 atomic_bool is_bot[3];
75 CUDA
void assert_cuda_arch() {
76 printf(
"AsynchronousIterationGPU must be used on the GPU device only.\n");
81 changed[0].join(
true);
82 changed[1].meet(
false);
83 changed[2].meet(
false);
84 for(
int i = 0; i < 3; ++i) {
85 is_bot[i].meet(
false);
90 CUDA AsynchronousIterationGPU(
const Group& group):
98 cooperative_groups::sync(group);
103 CUDA
bool iterate(A& a) {
104 #ifndef __CUDA_ARCH__
108 size_t n = a.num_deductions();
109 bool has_changed =
false;
110 for (
size_t t = group.thread_rank(); t < n; t += group.num_threads()) {
111 has_changed |= a.deduce(t);
112 if((t-group.thread_rank()) + group.num_threads() < n) __syncwarp();
118 template <
class A,
class M>
119 CUDA
size_t fixpoint(A& a, B<M>& has_changed,
volatile bool* stop) {
120 #ifndef __CUDA_ARCH__
127 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
128 changed[i%3].join(iterate(a));
129 changed[(i+1)%3].
meet(
false);
130 is_bot[i%3].join(a.is_bot());
131 is_bot[i%3].join(*stop);
135 has_changed.join(changed[1]);
136 has_changed.join(changed[2]);
142 CUDA
size_t fixpoint(A& a) {
143 #ifndef __CUDA_ARCH__
150 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
151 changed[i%3].join(iterate(a));
152 changed[(i+1)%3].
meet(
false);
153 is_bot[i%3].join(a.is_bot());
162using GridAsynchronousIterationGPU = AsynchronousIterationGPU<cooperative_groups::grid_group, battery::atomic_memory_grid>;
164class BlockAsynchronousIterationGPU {
166 using memory_type = battery::atomic_memory_block;
168 using atomic_bool = B<memory_type>;
169 atomic_bool changed[3];
170 atomic_bool is_bot[3];
172 CUDA
void assert_cuda_arch() {
173 printf(
"BlockAsynchronousIterationGPU must be used on the GPU device only.\n");
178 changed[0].join(
true);
179 changed[1].meet(
false);
180 changed[2].meet(
false);
181 for(
int i = 0; i < 3; ++i) {
182 is_bot[i].meet(
false);
187 BlockAsynchronousIterationGPU() =
default;
189 CUDA
void barrier() {
190 #ifndef __CUDA_ARCH__
198 CUDA
bool iterate(A& a) {
199 #ifndef __CUDA_ARCH__
203 size_t n = a.num_deductions();
204 bool has_changed =
false;
205 for (
size_t t = threadIdx.x; t < n; t += blockDim.x) {
206 has_changed |= a.deduce(t);
207 if((t-threadIdx.x) + blockDim.x < n) __syncwarp();
213 template <
class A,
class M>
214 CUDA
size_t fixpoint(A& a, B<M>& has_changed,
volatile bool* stop) {
215 #ifndef __CUDA_ARCH__
222 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
223 changed[i%3].join(iterate(a));
224 changed[(i+1)%3].
meet(
false);
225 is_bot[i%3].join(a.is_bot());
226 is_bot[i%3].join(*stop);
230 has_changed.join(changed[1]);
231 has_changed.join(changed[2]);
237 CUDA
size_t fixpoint(A& a) {
238 #ifndef __CUDA_ARCH__
245 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
246 changed[i%3].join(iterate(a));
247 changed[(i+1)%3].
meet(
false);
248 is_bot[i%3].join(a.is_bot());
255 template <
class Alloc,
class A>
256 CUDA
bool iterate(
const battery::vector<int, Alloc>& indexes, A& a) {
257 #ifndef __CUDA_ARCH__
261 assert(a.num_deductions() >= indexes.size());
262 bool has_changed =
false;
263 for (
size_t t = threadIdx.x; t < indexes.size(); t += blockDim.x) {
264 has_changed |= a.deduce(indexes[t]);
265 if((t-threadIdx.x) + blockDim.x < indexes.size()) __syncwarp();
271 template <
class Alloc,
class A>
272 CUDA
size_t fixpoint(
const battery::vector<int, Alloc>& indexes, A& a) {
273 #ifndef __CUDA_ARCH__
280 for(i = 1; changed[(i-1)%3] && !is_bot[(i-1)%3]; ++i) {
281 changed[i%3].join(iterate(indexes, a));
282 changed[(i+1)%3].
meet(
false);
283 is_bot[i%3].join(a.is_bot());
Definition fixpoint.hpp:20
CUDA local::B iterate(A &a)
Definition fixpoint.hpp:25
CUDA local::B fixpoint(A &a)
Definition fixpoint.hpp:47
CUDA void barrier()
Definition fixpoint.hpp:22
CUDA size_t fixpoint(A &a, local::B &has_changed)
Definition fixpoint.hpp:35
Definition abstract_deps.hpp:14
CUDA constexpr auto meet(const Interval< L > &, const Interval< K > &)