Cuda battery library
Loading...
Searching...
No Matches
memory.hpp
Go to the documentation of this file.
1// Copyright 2022 Pierre Talbot
2
3#ifndef CUDA_BATTERY_MEMORY_HPP
4#define CUDA_BATTERY_MEMORY_HPP
5
6/** \file memory.hpp
7 * An abstraction of memory load and store useful to write a single version of an algorithm working either sequentially or in parallel, and on CPU or GPU.
8 * Note that these classes are mainly designed to work with a relaxed memory ordering; we are unsure of their applicability to other kinds of memory ordering.
9 */
10
11#include <cassert>
12#include <type_traits>
13#include <utility>
14
15#ifdef __CUDACC__
16 #include <cuda/atomic>
17#else
18 #include <atomic>
19#endif
20
21#include "utility.hpp"
22
23namespace battery {
24
25/** This is to be deleted in the future, just there because atomic in CUDA 12.4 does not support ::value_type. */
26namespace impl {
27 template <class T>
28 struct value_type_of {
29 using type = typename T::value_type;
30 };
31#ifdef __CUDACC__
32 template <class V, cuda::thread_scope Scope>
33 struct value_type_of<cuda::atomic<V, Scope>> {
34 using type = V;
35 };
36#endif
37}
38
39
40template <class A>
41class copyable_atomic: public A {
42public:
43 using value_type = typename impl::value_type_of<A>::type;
44 copyable_atomic() = default;
46 copyable_atomic(const copyable_atomic& other): A(other.load()) {}
47 copyable_atomic(copyable_atomic&& other): A(other.load()) {}
49 this->store(other.load());
50 return *this;
51 }
53 this->store(other.load());
54 return *this;
55 }
56};
57
58/** Represent the memory of a variable that cannot be accessed by multiple threads. */
59template <bool read_only>
60class memory {
61public:
62 template <class T> using atomic_type = T;
63
64 /** Indicate this memory is written by a single thread. */
65 constexpr static const bool sequential = true;
66
67public:
68 template <class T>
69 CUDA INLINE static constexpr T load(const atomic_type<T>& a) {
70 return a;
71 }
72
73 template <class T>
74 CUDA INLINE static constexpr std::enable_if_t<!read_only, void> store(atomic_type<T>& a, T v) {
75 a = v;
76 }
77
78 template <class T>
79 CUDA INLINE static constexpr std::enable_if_t<!read_only, T> exchange(atomic_type<T>& a, T v) {
80 return std::exchange(a, std::move(v));
81 }
82};
83
86
87#ifdef __CUDACC__
88
89/** Memory load and store operations relative to a cuda scope (per-thread, block, grid, ...) and given a certain memory order (by default relaxed). */
90template <cuda::thread_scope scope, cuda::memory_order mem_order = cuda::memory_order_relaxed>
91class atomic_memory_scoped {
92public:
93 template <class T> using atomic_type = copyable_atomic<cuda::atomic<T, scope>>;
94 constexpr static const bool sequential = false;
95
96 template <class T>
97 CUDA INLINE static T load(const atomic_type<T>& a) {
98 return a.load(mem_order);
99 }
100
101 template <class T>
102 CUDA INLINE static void store(atomic_type<T>& a, T v) {
103 a.store(v, mem_order);
104 }
105
106 template <class T>
107 CUDA INLINE static T exchange(atomic_type<T>& a, T v) {
108 return a.exchange(v, mem_order);
109 }
110};
111
112using atomic_memory_block = atomic_memory_scoped<cuda::thread_scope_block>;
113using atomic_memory_grid = atomic_memory_scoped<cuda::thread_scope_device>;
114using atomic_memory_multi_grid = atomic_memory_scoped<cuda::thread_scope_system>;
115
116#endif // __CUDACC__
117
118#ifdef __CUDACC__
119 /// @private
120 namespace impl {
121 template <class T>
122 using atomic_t = cuda::std::atomic<T>;
123 }
124 /// @private
125 using memory_order = cuda::std::memory_order;
126 /// @private
127 constexpr memory_order memory_order_relaxed = cuda::std::memory_order_relaxed;
128 /// @private
129 constexpr memory_order memory_order_seq_cst = cuda::std::memory_order_seq_cst;
130#else
131 /// @private
132 namespace impl {
133 template <class T>
134 using atomic_t = std::atomic<T>;
135 }
136 /// @private
137 using memory_order = std::memory_order;
138 /// @private
139 constexpr memory_order memory_order_relaxed = std::memory_order_relaxed;
140 /// @private
141 constexpr memory_order memory_order_seq_cst = std::memory_order_seq_cst;
142#endif
143
144/** Use the standard C++ atomic type, either provided by libcudacxx if we compile with a CUDA compiler, or through the STL otherwise. */
145template <memory_order mem_order = memory_order_relaxed>
147public:
149 constexpr static const bool sequential = false;
150
151 template <class T>
152 CUDA INLINE static T load(const atomic_type<T>& a) {
153 return a.load(mem_order);
154 }
155
156 template <class T>
157 CUDA INLINE static void store(atomic_type<T>& a, T v) {
158 a.store(v, mem_order);
159 }
160
161 template <class T>
163 return a.exchange(v, mem_order);
164 }
165};
166
167}
168
169#endif
Definition memory.hpp:146
CUDA static INLINE void store(atomic_type< T > &a, T v)
Definition memory.hpp:157
CUDA static INLINE T load(const atomic_type< T > &a)
Definition memory.hpp:152
static constexpr const bool sequential
Definition memory.hpp:149
CUDA static INLINE T exchange(atomic_type< T > &a, T v)
Definition memory.hpp:162
copyable_atomic< impl::atomic_t< T > > atomic_type
Definition memory.hpp:148
Definition memory.hpp:41
typename impl::value_type_of< A >::type value_type
Definition memory.hpp:43
copyable_atomic(copyable_atomic &&other)
Definition memory.hpp:47
copyable_atomic(const copyable_atomic &other)
Definition memory.hpp:46
copyable_atomic & operator=(copyable_atomic &&other)
Definition memory.hpp:52
CUDA copyable_atomic(value_type x)
Definition memory.hpp:45
copyable_atomic & operator=(const copyable_atomic &other)
Definition memory.hpp:48
Definition memory.hpp:60
CUDA static INLINE constexpr T load(const atomic_type< T > &a)
Definition memory.hpp:69
static constexpr const bool sequential
Definition memory.hpp:65
T atomic_type
Definition memory.hpp:62
CUDA static INLINE constexpr std::enable_if_t<!read_only, void > store(atomic_type< T > &a, T v)
Definition memory.hpp:74
CUDA static INLINE constexpr std::enable_if_t<!read_only, T > exchange(atomic_type< T > &a, T v)
Definition memory.hpp:79
Definition algorithm.hpp:10
#define INLINE
Definition utility.hpp:63
#define CUDA
Definition utility.hpp:59