8#include "battery/memory.hpp"
11 #include <cooperative_groups.h>
25 size_t n = a.num_refinements();
26 for(
size_t i = 0; i < n; ++i) {
27 a.refine(i, has_changed);
33 size_t iterations = 0;
35 while(changed && !a.
is_top()) {
38 has_changed.
tell(changed);
62template <
class Group,
class Memory,
class Allocator>
63class AsynchronousIterationGPU {
65 using memory_type = Memory;
66 using allocator_type = Allocator;
67 using group_type = Group;
69 using atomic_binc = BInc<memory_type>;
70 battery::vector<atomic_binc, allocator_type> changed;
71 battery::vector<atomic_binc, allocator_type> is_top;
74 CUDA
void assert_cuda_arch() {
75 printf(
"AsynchronousIterationGPU must be used on the GPU device only.\n");
80 changed[0].tell_top();
81 changed[1].dtell_bot();
82 changed[2].dtell_bot();
83 for(
int i = 0; i < is_top.size(); ++i) {
84 is_top[i].dtell_bot();
89 CUDA AsynchronousIterationGPU(
const Group& group,
const allocator_type& alloc = allocator_type()):
90 group(group), changed(3, alloc), is_top(3, alloc)
97 cooperative_groups::sync(group);
101 template <
class A,
class M>
102 CUDA
void iterate(A& a, BInc<M>& has_changed) {
103 #ifndef __CUDA_ARCH__
106 size_t n = a.num_refinements();
107 for (
size_t t = group.thread_rank(); t < n; t += group.num_threads()) {
108 a.refine(t, has_changed);
109 if((t-group.thread_rank()) + group.num_threads() < n) __syncwarp();
114 template <
class A,
class M>
115 CUDA
size_t fixpoint(A& a, BInc<M>& has_changed,
volatile bool* stop) {
116 #ifndef __CUDA_ARCH__
123 for(i = 1; changed[(i-1)%3] && !is_top[(i-1)%3]; ++i) {
124 iterate(a, changed[i%3]);
125 changed[(i+1)%3].dtell_bot();
126 is_top[i%3].tell(a.is_top());
127 is_top[i%3].tell(local::BInc{*stop});
131 has_changed.tell(changed[1]);
132 has_changed.tell(changed[2]);
137 template <
class A,
class M>
138 CUDA
size_t fixpoint(A& a, BInc<M>& has_changed) {
140 return fixpoint(a, has_changed, &stop);
144 CUDA local::BInc fixpoint(A& a) {
145 local::BInc has_changed(
false);
146 fixpoint(a, has_changed);
151template <
class Allocator>
152using BlockAsynchronousIterationGPU = AsynchronousIterationGPU<cooperative_groups::thread_block, battery::atomic_memory_block, Allocator>;
154template <
class Allocator>
155using GridAsynchronousIterationGPU = AsynchronousIterationGPU<cooperative_groups::grid_group, battery::atomic_memory_grid, Allocator>;
Definition fixpoint.hpp:19
CUDA void iterate(A &a, local::BInc &has_changed)
Definition fixpoint.hpp:24
CUDA local::BInc fixpoint(A &a)
Definition fixpoint.hpp:45
CUDA void barrier()
Definition fixpoint.hpp:21
CUDA size_t fixpoint(A &a, local::BInc &has_changed)
Definition fixpoint.hpp:32
Definition primitive_upset.hpp:118
CUDA constexpr local::BInc is_top() const
Definition primitive_upset.hpp:224
CUDA constexpr this_type & dtell_bot()
Definition primitive_upset.hpp:259
CUDA constexpr this_type & tell(const this_type2< M1 > &other, BInc< M2 > &has_changed)
Definition primitive_upset.hpp:239
Definition abstract_deps.hpp:14