3#ifndef CUDA_BATTERY_ALLOCATOR_HPP
4#define CUDA_BATTERY_ALLOCATOR_HPP
31 return std::malloc(bytes);
43 return p.allocate(bytes);
47 return p.deallocate(ptr);
57class global_allocator {
59 CUDA NI void* allocate(
size_t bytes) {
64 void* data = std::malloc(bytes);
65 if (data ==
nullptr) {
66 printf(
"Allocation of device memory failed\n");
71 cudaError_t rc = cudaMalloc(&data, bytes);
72 if (rc != cudaSuccess) {
73 std::cerr <<
"Allocation of global memory failed: " << cudaGetErrorString(rc) << std::endl;
80 CUDA NI void deallocate(
void* data) {
84 cudaError_t rc = cudaFree(data);
85 if (rc != cudaSuccess) {
86 std::cerr <<
"Free of global memory failed: " << cudaGetErrorString(rc) << std::endl;
91 CUDA bool operator==(
const global_allocator&)
const {
return true; }
95class managed_allocator {
97 CUDA NI void* allocate(
size_t bytes) {
99 printf(
"Managed memory cannot be allocated in device functions.\n");
106 void* data =
nullptr;
107 cudaError_t rc = cudaMallocManaged(&data, bytes);
108 if (rc != cudaSuccess) {
109 std::cerr <<
"Allocation of managed memory failed: " << cudaGetErrorString(rc) << std::endl;
116 CUDA NI void deallocate(
void* data) {
118 printf(
"Managed memory cannot be freed in device functions.\n");
121 cudaError_t rc = cudaFree(data);
122 if (rc != cudaSuccess) {
123 std::cerr <<
"Free of managed memory failed: " << cudaGetErrorString(rc) << std::endl;
128 CUDA bool operator==(
const managed_allocator&)
const {
return true; }
140class pinned_allocator {
142 CUDA NI void* allocate(
size_t bytes) {
144 return global_allocator{}.allocate(bytes);
149 void* data =
nullptr;
150 cudaError_t rc = cudaMallocHost(&data, bytes);
151 if (rc != cudaSuccess) {
152 std::cerr <<
"Allocation of pinned memory failed: " << cudaGetErrorString(rc) << std::endl;
159 CUDA NI void deallocate(
void* data) {
161 return global_allocator{}.deallocate(data);
163 cudaError_t rc = cudaFreeHost(data);
164 if (rc != cudaSuccess) {
165 std::cerr <<
"Free of pinned memory failed: " << cudaGetErrorString(rc) << std::endl;
170 CUDA bool operator==(
const pinned_allocator&)
const {
return true; }
175CUDA inline void*
operator new(
size_t bytes, battery::managed_allocator& p) {
176 return p.allocate(bytes);
179CUDA inline void operator delete(
void* ptr, battery::managed_allocator& p) {
180 return p.deallocate(ptr);
183CUDA inline void*
operator new(
size_t bytes, battery::global_allocator& p) {
184 return p.allocate(bytes);
187CUDA inline void operator delete(
void* ptr, battery::global_allocator& p) {
191CUDA inline void*
operator new(
size_t bytes, battery::pinned_allocator& p) {
192 return p.allocate(bytes);
195CUDA inline void operator delete(
void* ptr, battery::pinned_allocator& p) {
207static const int power2[17] = {0, 1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 16};
217 struct control_block {
222 size_t num_deallocations;
223 size_t num_allocations;
224 size_t unaligned_wasted_bytes;
227 CUDA control_block(
unsigned char* mem,
size_t capacity,
size_t alignment)
228 : mem(mem), capacity(capacity), offset(0), alignment(alignment), num_deallocations(0), num_allocations(0), unaligned_wasted_bytes(0), counter(1)
231 CUDA void* allocate(
size_t bytes) {
236 size_t smallest_alignment = (bytes > alignment || alignment > 16) ? alignment : impl::power2[bytes];
237 if(
size_t(&mem[offset]) % smallest_alignment != 0) {
238 size_t old_offset = offset;
239 offset += smallest_alignment - (size_t(&mem[offset]) % smallest_alignment);
240 unaligned_wasted_bytes += (offset - old_offset);
242 assert(offset + bytes <= capacity);
243 assert((
size_t)&mem[offset] % smallest_alignment == 0);
244 void* m = (
void*)&mem[offset];
250 CUDA void deallocate(
void* ptr) {
257 control_block* block;
265 if(block !=
nullptr) {
273 other.block =
nullptr;
277 : block(new control_block(mem,
capacity, alignment))
281 CUDA void destroy() {
283 if(block->counter == 0) {
286 #ifdef CUDA_THREADS_PER_BLOCK
295 if(block !=
nullptr) {
301 if(block !=
nullptr) {
305 other.block =
nullptr;
310 size_t old = block->alignment;
311 block->alignment = alignment;
316 return block->allocate(bytes);
320 block->deallocate(ptr);
325 printf(
"%% %" PRIu64
" / %" PRIu64
" used [%" PRIu64
"/%" PRIu64
"]KB [%" PRIu64
"/%" PRIu64
"]MB\n",
326 block->offset, block->capacity,
327 block->offset/1000, block->capacity/1000,
328 block->offset/1000/1000, block->capacity/1000/1000);
329 printf(
"%% %" PRIu64
" / %" PRIu64
" wasted for alignment [%" PRIu64
"/%" PRIu64
"]KB [%" PRIu64
"/%" PRIu64
"]MB\n",
330 block->unaligned_wasted_bytes, block->offset,
331 block->unaligned_wasted_bytes/1000, block->offset/1000,
332 block->unaligned_wasted_bytes/1000/1000, block->offset/1000/1000);
333 printf(
"%% %" PRIu64
" allocations and %" PRIu64
" deallocations\n", block->num_allocations, block->num_deallocations);
337 return block->offset;
341 return block->capacity;
345 return block->num_deallocations;
349 return block->num_allocations;
353 return block->unaligned_wasted_bytes;
357 return block == rhs.block;
363 return p.allocate(bytes);
367 return p.deallocate(ptr);
371 template <
class Allocator,
class InternalAllocator = Allocator>
373 struct control_block {
376 size_t num_deallocations;
377 size_t num_allocations;
378 size_t total_bytes_allocated;
380 CUDA control_block(
const Allocator& allocator)
381 : allocator(allocator), counter(1), num_deallocations(0), num_allocations(0), total_bytes_allocated(0)
384 CUDA NI void* allocate(
size_t bytes) {
386 total_bytes_allocated += bytes;
387 return allocator.allocate(bytes);
390 CUDA NI void deallocate(
void* ptr) {
393 allocator.deallocate(ptr);
398 InternalAllocator internal_allocator;
399 control_block* block;
405 : internal_allocator(other.internal_allocator), block(other.block)
411 : internal_allocator(internal_allocator)
413 block =
static_cast<control_block*
>(this->internal_allocator.allocate(
sizeof(control_block)));
414 new(block) control_block(allocator);
419 if(block->counter == 0) {
420 internal_allocator.deallocate(block);
425 return block->allocate(bytes);
429 block->deallocate(ptr);
433 return block->num_allocations;
437 return block->num_deallocations;
441 return block->total_bytes_allocated;
445 return block == rhs.block;
Definition allocator.hpp:215
CUDA NI ~pool_allocator()
Definition allocator.hpp:294
CUDA size_t used() const
Definition allocator.hpp:336
CUDA NI void deallocate(void *ptr)
Definition allocator.hpp:319
CUDA NI pool_allocator(unsigned char *mem, size_t capacity, size_t alignment=alignof(std::max_align_t))
Definition allocator.hpp:276
NI pool_allocator()=default
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:315
CUDA size_t unaligned_wasted_bytes() const
Definition allocator.hpp:352
CUDA size_t num_allocations() const
Definition allocator.hpp:348
CUDA bool operator==(const pool_allocator &rhs) const
Definition allocator.hpp:356
CUDA NI pool_allocator(const pool_allocator &other)
Definition allocator.hpp:262
CUDA NI pool_allocator & operator=(pool_allocator &&other)
Definition allocator.hpp:300
CUDA size_t align_at(size_t alignment)
Definition allocator.hpp:309
CUDA size_t capacity() const
Definition allocator.hpp:340
CUDA NI pool_allocator(pool_allocator &&other)
Definition allocator.hpp:270
CUDA NI void print() const
Definition allocator.hpp:323
CUDA size_t num_deallocations() const
Definition allocator.hpp:344
Definition allocator.hpp:25
CUDA NI void deallocate(void *data)
Definition allocator.hpp:34
CUDA bool operator==(const standard_allocator &) const
Definition allocator.hpp:38
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:27
Definition allocator.hpp:372
CUDA NI statistics_allocator(const statistics_allocator &other)
Definition allocator.hpp:404
CUDA size_t num_deallocations() const
Definition allocator.hpp:436
CUDA bool operator==(const this_type &rhs) const
Definition allocator.hpp:444
CUDA NI void * allocate(size_t bytes)
Definition allocator.hpp:424
CUDA size_t total_bytes_allocated() const
Definition allocator.hpp:440
CUDA NI statistics_allocator(const Allocator &allocator=Allocator(), const InternalAllocator &internal_allocator=InternalAllocator())
Definition allocator.hpp:410
CUDA NI void deallocate(void *ptr)
Definition allocator.hpp:428
CUDA NI ~statistics_allocator()
Definition allocator.hpp:417
CUDA size_t num_allocations() const
Definition allocator.hpp:432
Definition algorithm.hpp:10
CUDA bool operator==(const string< Alloc1 > &lhs, const string< Alloc2 > &rhs)
Definition string.hpp:110
#define CUDA
Definition utility.hpp:59
#define NI
Definition utility.hpp:62