3#ifndef CUDA_BATTERY_ALLOCATOR_HPP
4#define CUDA_BATTERY_ALLOCATOR_HPP
29class global_allocator {
31 CUDA NI void* allocate(
size_t bytes) {
36 void* data = std::malloc(bytes);
37 if (data ==
nullptr) {
38 printf(
"Allocation of device memory failed\n");
43 cudaError_t rc = cudaMalloc(&data, bytes);
44 if (rc != cudaSuccess) {
45 std::cerr <<
"Allocation of global memory failed: " << cudaGetErrorString(rc) << std::endl;
52 CUDA NI void deallocate(
void* data) {
56 cudaError_t rc = cudaFree(data);
57 if (rc != cudaSuccess) {
58 std::cerr <<
"Free of global memory failed: " << cudaGetErrorString(rc) << std::endl;
63 CUDA bool operator==(
const global_allocator&)
const {
return true; }
68class managed_allocator {
70 CUDA NI void* allocate(
size_t bytes) {
72 return global_allocator{}.allocate(bytes);
78 cudaError_t rc = cudaMallocManaged(&data, bytes);
79 if (rc != cudaSuccess) {
80 std::cerr <<
"Allocation of managed memory failed: " << cudaGetErrorString(rc) << std::endl;
87 CUDA NI void deallocate(
void* data) {
89 return global_allocator{}.deallocate(data);
91 cudaError_t rc = cudaFree(data);
92 if (rc != cudaSuccess) {
93 std::cerr <<
"Free of managed memory failed: " << cudaGetErrorString(rc) << std::endl;
98 CUDA bool operator==(
const managed_allocator&)
const {
return true; }
110class pinned_allocator {
112 CUDA NI void* allocate(
size_t bytes) {
114 return global_allocator{}.allocate(bytes);
119 void* data =
nullptr;
120 cudaError_t rc = cudaMallocHost(&data, bytes);
121 if (rc != cudaSuccess) {
122 std::cerr <<
"Allocation of pinned memory failed: " << cudaGetErrorString(rc) << std::endl;
129 CUDA NI void deallocate(
void* data) {
131 return global_allocator{}.deallocate(data);
133 cudaError_t rc = cudaFreeHost(data);
134 if (rc != cudaSuccess) {
135 std::cerr <<
"Free of pinned memory failed: " << cudaGetErrorString(rc) << std::endl;
140 CUDA bool operator==(
const pinned_allocator&)
const {
return true; }
145CUDA inline void*
operator new(
size_t bytes, battery::managed_allocator& p) {
146 return p.allocate(bytes);
149CUDA inline void operator delete(
void* ptr, battery::managed_allocator& p) {
150 return p.deallocate(ptr);
153CUDA inline void*
operator new(
size_t bytes, battery::global_allocator& p) {
154 return p.allocate(bytes);
157CUDA inline void operator delete(
void* ptr, battery::global_allocator& p) {
161CUDA inline void*
operator new(
size_t bytes, battery::pinned_allocator& p) {
162 return p.allocate(bytes);
165CUDA inline void operator delete(
void* ptr, battery::pinned_allocator& p) {
177static const int power2[17] = {0, 1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 16};
187 struct control_block {
192 size_t num_deallocations;
193 size_t num_allocations;
194 size_t unaligned_wasted_bytes;
197 CUDA control_block(
unsigned char* mem,
size_t capacity,
size_t alignment)
198 : mem(mem), capacity(capacity), offset(0), alignment(alignment), num_deallocations(0), num_allocations(0), unaligned_wasted_bytes(0), counter(1)
201 CUDA void* allocate(
size_t bytes) {
206 size_t smallest_alignment = (bytes > alignment || alignment > 16) ? alignment : impl::power2[bytes];
207 if(
size_t(&mem[offset]) % smallest_alignment != 0) {
208 size_t old_offset = offset;
209 offset += smallest_alignment - (size_t(&mem[offset]) % smallest_alignment);
210 unaligned_wasted_bytes += (offset - old_offset);
212 assert(offset + bytes <= capacity);
213 assert((
size_t)&mem[offset] % smallest_alignment == 0);
214 void* m = (
void*)&mem[offset];
220 CUDA void deallocate(
void* ptr) {
227 control_block* block;
235 if(block !=
nullptr) {
243 other.block =
nullptr;
247 : block(::new control_block(mem,
capacity, alignment))
251 CUDA void destroy() {
253 if(block->counter == 0) {
260 if(block !=
nullptr) {
266 if(block !=
nullptr) {
270 other.block =
nullptr;
275 size_t old = block->alignment;
276 block->alignment = alignment;
281 return block->allocate(bytes);
285 block->deallocate(ptr);
290 printf(
"%% %" PRIu64
" / %" PRIu64
" used [%" PRIu64
"/%" PRIu64
"]KB [%" PRIu64
"/%" PRIu64
"]MB\n",
291 block->offset, block->capacity,
292 block->offset/1000, block->capacity/1000,
293 block->offset/1000/1000, block->capacity/1000/1000);
294 printf(
"%% %" PRIu64
" / %" PRIu64
" wasted for alignment [%" PRIu64
"/%" PRIu64
"]KB [%" PRIu64
"/%" PRIu64
"]MB\n",
295 block->unaligned_wasted_bytes, block->offset,
296 block->unaligned_wasted_bytes/1000, block->offset/1000,
297 block->unaligned_wasted_bytes/1000/1000, block->offset/1000/1000);
298 printf(
"%% %" PRIu64
" allocations and %" PRIu64
" deallocations\n", block->num_allocations, block->num_deallocations);
302 return block->offset;
306 return block->capacity;
310 return block->num_deallocations;
314 return block->num_allocations;
318 return block->unaligned_wasted_bytes;
322 return block == rhs.block;
328 return p.allocate(bytes);
332 return p.deallocate(ptr);
344 return std::malloc(bytes);
356 return p.allocate(bytes);
360 return p.deallocate(ptr);
365 template <
class Allocator,
class InternalAllocator = Allocator>
367 struct control_block {
370 size_t num_deallocations;
371 size_t num_allocations;
372 size_t total_bytes_allocated;
374 CUDA control_block(
const Allocator& allocator)
375 : allocator(allocator), counter(1), num_deallocations(0), num_allocations(0), total_bytes_allocated(0)
378 CUDA NI void* allocate(
size_t bytes) {
380 total_bytes_allocated += bytes;
381 return allocator.allocate(bytes);
384 CUDA NI void deallocate(
void* ptr) {
387 allocator.deallocate(ptr);
392 InternalAllocator internal_allocator;
393 control_block* block;
399 : internal_allocator(other.internal_allocator), block(other.block)
405 : internal_allocator(internal_allocator)
407 block =
static_cast<control_block*
>(this->internal_allocator.allocate(
sizeof(control_block)));
408 new(block) control_block(allocator);
413 if(block->counter == 0) {
414 internal_allocator.deallocate(block);
419 return block->allocate(bytes);
423 block->deallocate(ptr);
427 return block->num_allocations;
431 return block->num_deallocations;
435 return block->total_bytes_allocated;
439 return block == rhs.block;
Definition allocator.hpp:185
CUDA NI ~pool_allocator()
Definition allocator.hpp:259
CUDA size_t used() const
Definition allocator.hpp:301
CUDA NI void deallocate(void *ptr)
Definition allocator.hpp:284
CUDA NI pool_allocator(unsigned char *mem, size_t capacity, size_t alignment=alignof(std::max_align_t))
Definition allocator.hpp:246
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:280
CUDA NI pool_allocator()=default
CUDA size_t unaligned_wasted_bytes() const
Definition allocator.hpp:317
CUDA size_t num_allocations() const
Definition allocator.hpp:313
CUDA bool operator==(const pool_allocator &rhs) const
Definition allocator.hpp:321
CUDA NI pool_allocator(const pool_allocator &other)
Definition allocator.hpp:232
CUDA NI pool_allocator & operator=(pool_allocator &&other)
Definition allocator.hpp:265
CUDA size_t align_at(size_t alignment)
Definition allocator.hpp:274
CUDA size_t capacity() const
Definition allocator.hpp:305
CUDA NI pool_allocator(pool_allocator &&other)
Definition allocator.hpp:240
CUDA NI void print() const
Definition allocator.hpp:288
CUDA size_t num_deallocations() const
Definition allocator.hpp:309
Definition allocator.hpp:338
CUDA NI void deallocate(void *data)
Definition allocator.hpp:347
CUDA bool operator==(const standard_allocator &) const
Definition allocator.hpp:351
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:340
Definition allocator.hpp:366
CUDA NI statistics_allocator(const statistics_allocator &other)
Definition allocator.hpp:398
CUDA size_t num_deallocations() const
Definition allocator.hpp:430
CUDA bool operator==(const this_type &rhs) const
Definition allocator.hpp:438
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:418
CUDA size_t total_bytes_allocated() const
Definition allocator.hpp:434
CUDA NI statistics_allocator(const Allocator &allocator=Allocator(), const InternalAllocator &internal_allocator=InternalAllocator())
Definition allocator.hpp:404
CUDA NI void deallocate(void *ptr)
Definition allocator.hpp:422
CUDA NI ~statistics_allocator()
Definition allocator.hpp:411
CUDA size_t num_allocations() const
Definition allocator.hpp:426
Definition algorithm.hpp:10
CUDA bool operator==(const string< Alloc1 > &lhs, const string< Alloc2 > &rhs)
Definition string.hpp:110
#define CUDA
Definition utility.hpp:59
#define NI
Definition utility.hpp:62