3#ifndef CUDA_BATTERY_ALLOCATOR_HPP
4#define CUDA_BATTERY_ALLOCATOR_HPP
29class global_allocator {
31 CUDA NI void* allocate(
size_t bytes) {
36 void* data = std::malloc(bytes);
37 if (data ==
nullptr) {
38 printf(
"Allocation of device memory failed\n");
43 cudaError_t rc = cudaMalloc(&data, bytes);
44 if (rc != cudaSuccess) {
45 std::cerr <<
"Allocation of global memory failed: " << cudaGetErrorString(rc) << std::endl;
52 CUDA NI void deallocate(
void* data) {
56 cudaError_t rc = cudaFree(data);
57 if (rc != cudaSuccess) {
58 std::cerr <<
"Free of global memory failed: " << cudaGetErrorString(rc) << std::endl;
63 CUDA bool operator==(
const global_allocator&)
const {
return true; }
68class managed_allocator {
70 CUDA NI void* allocate(
size_t bytes) {
72 return global_allocator{}.allocate(bytes);
78 cudaError_t rc = cudaMallocManaged(&data, bytes);
79 if (rc != cudaSuccess) {
80 std::cerr <<
"Allocation of managed memory failed: " << cudaGetErrorString(rc) << std::endl;
87 CUDA NI void deallocate(
void* data) {
89 return global_allocator{}.deallocate(data);
91 cudaError_t rc = cudaFree(data);
92 if (rc != cudaSuccess) {
93 std::cerr <<
"Free of managed memory failed: " << cudaGetErrorString(rc) << std::endl;
98 CUDA bool operator==(
const managed_allocator&)
const {
return true; }
110class pinned_allocator {
112 CUDA NI void* allocate(
size_t bytes) {
114 return global_allocator{}.allocate(bytes);
119 void* data =
nullptr;
120 cudaError_t rc = cudaMallocHost(&data, bytes);
121 if (rc != cudaSuccess) {
122 std::cerr <<
"Allocation of pinned memory failed: " << cudaGetErrorString(rc) << std::endl;
129 CUDA NI void deallocate(
void* data) {
131 return global_allocator{}.deallocate(data);
133 cudaError_t rc = cudaFreeHost(data);
134 if (rc != cudaSuccess) {
135 std::cerr <<
"Free of pinned memory failed: " << cudaGetErrorString(rc) << std::endl;
140 CUDA bool operator==(
const pinned_allocator&)
const {
return true; }
145CUDA inline void*
operator new(
size_t bytes, battery::managed_allocator& p) {
146 return p.allocate(bytes);
149CUDA inline void operator delete(
void* ptr, battery::managed_allocator& p) {
150 return p.deallocate(ptr);
153CUDA inline void*
operator new(
size_t bytes, battery::global_allocator& p) {
154 return p.allocate(bytes);
157CUDA inline void operator delete(
void* ptr, battery::global_allocator& p) {
161CUDA inline void*
operator new(
size_t bytes, battery::pinned_allocator& p) {
162 return p.allocate(bytes);
165CUDA inline void operator delete(
void* ptr, battery::pinned_allocator& p) {
177static const int power2[17] = {0, 1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 16};
187 struct control_block {
192 size_t num_deallocations;
193 size_t num_allocations;
194 size_t unaligned_wasted_bytes;
197 CUDA control_block(
unsigned char* mem,
size_t capacity,
size_t alignment)
198 : mem(mem), capacity(capacity), offset(0), alignment(alignment), num_deallocations(0), num_allocations(0), unaligned_wasted_bytes(0), counter(1)
201 CUDA void* allocate(
size_t bytes) {
206 size_t smallest_alignment = (bytes > alignment || alignment > 16) ? alignment : impl::power2[bytes];
207 if(
size_t(&mem[offset]) % smallest_alignment != 0) {
208 size_t old_offset = offset;
209 offset += smallest_alignment - (size_t(&mem[offset]) % smallest_alignment);
210 unaligned_wasted_bytes += (offset - old_offset);
212 assert(offset + bytes <= capacity);
213 assert((
size_t)&mem[offset] % smallest_alignment == 0);
214 void* m = (
void*)&mem[offset];
220 CUDA void deallocate(
void* ptr) {
227 control_block* block;
233 if(block !=
nullptr) {
239 : block(::new control_block(mem,
capacity, alignment))
243 if(block !=
nullptr) {
245 if(block->counter == 0) {
252 size_t old = block->alignment;
253 block->alignment = alignment;
258 return block->allocate(bytes);
262 block->deallocate(ptr);
267 printf(
"%% %" PRIu64
" / %" PRIu64
" used [%" PRIu64
"/%" PRIu64
"]KB [%" PRIu64
"/%" PRIu64
"]MB\n",
268 block->offset, block->capacity,
269 block->offset/1000, block->capacity/1000,
270 block->offset/1000/1000, block->capacity/1000/1000);
271 printf(
"%% %" PRIu64
" / %" PRIu64
" wasted for alignment [%" PRIu64
"/%" PRIu64
"]KB [%" PRIu64
"/%" PRIu64
"]MB\n",
272 block->unaligned_wasted_bytes, block->offset,
273 block->unaligned_wasted_bytes/1000, block->offset/1000,
274 block->unaligned_wasted_bytes/1000/1000, block->offset/1000/1000);
275 printf(
"%% %" PRIu64
" allocations and %" PRIu64
" deallocations\n", block->num_allocations, block->num_deallocations);
279 return block->offset;
283 return block->capacity;
287 return block->num_deallocations;
291 return block->num_allocations;
295 return block->unaligned_wasted_bytes;
299 return block == rhs.block;
305 return p.allocate(bytes);
309 return p.deallocate(ptr);
321 return std::malloc(bytes);
333 return p.allocate(bytes);
337 return p.deallocate(ptr);
342 template <
class Allocator,
class InternalAllocator = Allocator>
344 struct control_block {
347 size_t num_deallocations;
348 size_t num_allocations;
349 size_t total_bytes_allocated;
351 CUDA control_block(
const Allocator& allocator)
352 : allocator(allocator), counter(1), num_deallocations(0), num_allocations(0), total_bytes_allocated(0)
355 CUDA NI void* allocate(
size_t bytes) {
357 total_bytes_allocated += bytes;
358 return allocator.allocate(bytes);
361 CUDA NI void deallocate(
void* ptr) {
364 allocator.deallocate(ptr);
369 InternalAllocator internal_allocator;
370 control_block* block;
376 : internal_allocator(other.internal_allocator), block(other.block)
382 : internal_allocator(internal_allocator)
384 block =
static_cast<control_block*
>(this->internal_allocator.allocate(
sizeof(control_block)));
385 new(block) control_block(allocator);
390 if(block->counter == 0) {
391 internal_allocator.deallocate(block);
396 return block->allocate(bytes);
400 block->deallocate(ptr);
404 return block->num_allocations;
408 return block->num_deallocations;
412 return block->total_bytes_allocated;
416 return block == rhs.block;
Definition allocator.hpp:185
CUDA NI ~pool_allocator()
Definition allocator.hpp:242
CUDA size_t used() const
Definition allocator.hpp:278
CUDA NI void deallocate(void *ptr)
Definition allocator.hpp:261
CUDA NI pool_allocator(unsigned char *mem, size_t capacity, size_t alignment=alignof(std::max_align_t))
Definition allocator.hpp:238
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:257
CUDA size_t unaligned_wasted_bytes() const
Definition allocator.hpp:294
CUDA size_t num_allocations() const
Definition allocator.hpp:290
CUDA bool operator==(const pool_allocator &rhs) const
Definition allocator.hpp:298
CUDA NI pool_allocator(const pool_allocator &other)
Definition allocator.hpp:230
CUDA size_t align_at(size_t alignment)
Definition allocator.hpp:251
CUDA size_t capacity() const
Definition allocator.hpp:282
CUDA NI void print() const
Definition allocator.hpp:265
CUDA size_t num_deallocations() const
Definition allocator.hpp:286
Definition allocator.hpp:315
CUDA NI void deallocate(void *data)
Definition allocator.hpp:324
CUDA bool operator==(const standard_allocator &) const
Definition allocator.hpp:328
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:317
Definition allocator.hpp:343
CUDA NI statistics_allocator(const statistics_allocator &other)
Definition allocator.hpp:375
CUDA size_t num_deallocations() const
Definition allocator.hpp:407
CUDA bool operator==(const this_type &rhs) const
Definition allocator.hpp:415
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:395
CUDA size_t total_bytes_allocated() const
Definition allocator.hpp:411
CUDA NI statistics_allocator(const Allocator &allocator=Allocator(), const InternalAllocator &internal_allocator=InternalAllocator())
Definition allocator.hpp:381
CUDA NI void deallocate(void *ptr)
Definition allocator.hpp:399
CUDA NI ~statistics_allocator()
Definition allocator.hpp:388
CUDA size_t num_allocations() const
Definition allocator.hpp:403
Definition algorithm.hpp:10
CUDA bool operator==(const string< Alloc1 > &lhs, const string< Alloc2 > &rhs)
Definition string.hpp:110
#define CUDA
Definition utility.hpp:59
#define NI
Definition utility.hpp:62