3#ifndef CUDA_BATTERY_UTILITY_HPP
4#define CUDA_BATTERY_UTILITY_HPP
17 #define CUDA_GLOBAL __global__
19 #ifdef REDUCE_PTX_SIZE
21 #define NI __noinline__
27 #define INLINE __forceinline__
30 #define CUDA __device__ __host__
34 CUDA NI inline void gpuAssert(cudaError_t code,
const char *file,
int line,
bool abort=
true) {
35 if (code != cudaSuccess) {
36 printf(
"%s:%d CUDA runtime error %s\n", file, line, cudaGetErrorString(code));
51 #define CUDAE(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, false); }
55 #define CUDAEX(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, true); }
69 template<
class T>
CUDA constexpr inline void swap(T& a, T& b) {
75 CUDA constexpr inline size_t strlen(
const char* str) {
77 while(str[n] !=
'\0') { ++n; }
82 CUDA constexpr inline int strcmp(
const char* s1,
const char* s2) {
83 while(*s1 && (*s1 == *s2)) {
87 return *(
const unsigned char*)s1 - *(
const unsigned char*)s2;
91template<
class T>
CUDA constexpr inline void swap(T& a, T& b) {
101 return impl::strlen(str);
103 return std::strlen(str);
110 return impl::strcmp(s1, s2);
112 return std::strcmp(s1, s2);
124 return std::min(a, b);
136 return std::max(a, b);
144 return std::isnan(a);
150# define CONSTEXPR_NEXTAFTER
152# define CONSTEXPR_NEXTAFTER constexpr
157 return ::nextafterf(f, dir);
159 return std::nextafterf(f, dir);
165 return ::nextafter(f, dir);
167 return std::nextafter(f, dir);
179 if constexpr (std::is_floating_point<T>()) {
180 return -std::numeric_limits<T>::infinity();
182 return std::numeric_limits<T>::min();
184 static constexpr T
inf() {
185 if constexpr (std::is_floating_point<T>()) {
186 return std::numeric_limits<T>::infinity();
188 return std::numeric_limits<T>::max();
192#define MAP_LIMITS(x, From, To) \
193 if(x == 0) { return 0; } \
194 if(x == limits<From>::neg_inf()) {\
195 return limits<To>::neg_inf(); \
197 if(x == limits<From>::inf()) {\
198 return limits<To>::inf(); \
208template<
class To,
class From,
bool map_limits = true>
210 if constexpr(std::is_same_v<To, From>) {
213 if constexpr(map_limits) {
214 MAP_LIMITS(x, From, To)
218 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
219 if constexpr(std::is_same_v<From, unsigned long long>) {
220 if constexpr(std::is_same_v<To, float>) {
221 return __ull2float_ru(x);
223 else if constexpr(std::is_same_v<To, double>) {
224 return __ull2double_ru(x);
227 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in ru_cast.");
230 else if constexpr(std::is_same_v<From, int>) {
231 if constexpr(std::is_same_v<To, float>) {
232 return __int2float_ru(x);
234 else if constexpr(std::is_same_v<To, double>) {
235 return __int2double_rn(x);
238 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in ru_cast.");
242 static_assert(
sizeof(
long long int) >=
sizeof(From));
243 if constexpr(std::is_same_v<To, float>) {
244 return __ll2float_ru(x);
246 else if constexpr(std::is_same_v<To, double>) {
247 return __ll2double_ru(x);
250 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in ru_cast.");
255 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
256 if constexpr(std::is_same_v<From, float>) {
257 return static_cast<To
>(__float2ll_ru(x));
259 else if constexpr(std::is_same_v<From, double>) {
260 return static_cast<To
>(__double2ll_ru(x));
263 static_assert(std::is_same_v<From, float>,
"Unsupported combination of types in ru_cast.");
267 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
268 return __double2float_ru(x);
272 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
273 #if !defined(__GNUC__) && !defined(_MSC_VER)
274 #pragma STDC FENV_ACCESS ON
276 int r = std::fesetround(FE_UPWARD);
278 return static_cast<To
>(x);
281 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
282 return static_cast<To
>(std::ceil(x));
285 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
286 #if !defined(__GNUC__) && !defined(_MSC_VER)
287 #pragma STDC FENV_ACCESS ON
289 int r = std::fesetround(FE_UPWARD);
291 return static_cast<To
>(x);
294 return static_cast<To
>(x);
304template<
class To,
class From,
bool map_limits=true>
306 if constexpr(std::is_same_v<To, From>) {
309 if constexpr(map_limits) {
310 MAP_LIMITS(x, From, To)
314 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
315 if constexpr(std::is_same_v<From, unsigned long long>) {
316 if constexpr(std::is_same_v<To, float>) {
317 return __ull2float_rd(x);
319 else if constexpr(std::is_same_v<To, double>) {
320 return __ull2double_rd(x);
323 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
326 else if constexpr(std::is_same_v<From, int>) {
327 if constexpr(std::is_same_v<To, float>) {
328 return __int2float_rd(x);
330 else if constexpr(std::is_same_v<To, double>) {
331 return __int2double_rn(x);
334 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
338 static_assert(
sizeof(
long long int) >=
sizeof(From));
339 if constexpr(std::is_same_v<To, float>) {
340 return __ll2float_rd(x);
342 else if constexpr(std::is_same_v<To, double>) {
343 return __ll2double_rd(x);
346 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
351 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
352 if constexpr(std::is_same_v<From, float>) {
353 return static_cast<To
>(__float2ll_rd(x));
355 else if constexpr(std::is_same_v<From, double>) {
356 return static_cast<To
>(__double2ll_rd(x));
359 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
363 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
364 return __double2float_rd(x);
368 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
369 #if !defined(__GNUC__) && !defined(_MSC_VER)
370 #pragma STDC FENV_ACCESS ON
372 int r = std::fesetround(FE_DOWNWARD);
374 return static_cast<To
>(x);
377 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
378 return static_cast<To
>(std::floor(x));
381 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
382 #if !defined(__GNUC__) && !defined(_MSC_VER)
383 #pragma STDC FENV_ACCESS ON
385 int r = std::fesetround(FE_DOWNWARD);
387 return static_cast<To
>(x);
390 return static_cast<To
>(x);
395 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"popcount only works on unsigned integers");
397 if constexpr(std::is_same_v<T, unsigned int>) {
400 else if constexpr(std::is_same_v<T, unsigned long long>) {
404 return __popcll(
static_cast<unsigned long long>(x));
406 #elif __cpp_lib_bitops
407 return std::popcount(x);
410 for(
int i = 0; i <
sizeof(T) * CHAR_BIT && x != 0; ++i) {
420 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countl_zero only works on unsigned integers");
423 if constexpr(
sizeof(T) <=
sizeof(int)) {
424 return __clz(x) - ((
sizeof(int) -
sizeof(T)) * CHAR_BIT);
426 else if constexpr(
sizeof(T) <=
sizeof(
long long int)) {
427 return __clzll(x) - ((
sizeof(
long long int) -
sizeof(T)) * CHAR_BIT);
430 static_assert(
sizeof(T) <
sizeof(
long long int),
"countX_Y (CUDA version) only supports types smaller than long long int.");
432 #elif __cpp_lib_bitops
433 return std::countl_zero(x);
436 constexpr int bits =
sizeof(T) * CHAR_BIT;
437 constexpr T mask = (T)1 << (bits - 1);
438 for(
int i = 0; i < bits && (x & mask) == 0; ++i) {
439 c += (x & mask) == 0;
448 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countl_one only works on unsigned integers");
451 #elif __cpp_lib_bitops
452 return std::countl_one(x);
455 constexpr int bits =
sizeof(T) * CHAR_BIT;
456 constexpr T mask = (T)1 << (bits - 1);
457 for(
int i = 0; i < bits && (x & mask) > 0; ++i) {
467 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countl_zero only works on unsigned integers");
470 return sizeof(T) * CHAR_BIT;
472 if constexpr(
sizeof(T) <=
sizeof(int)) {
475 else if constexpr(
sizeof(T) <=
sizeof(
long long int)) {
476 return __ffsll(x) - 1;
479 static_assert(
sizeof(T) <
sizeof(
long long int),
"countr_zero (CUDA version) only supports types smaller or equal to long long int.");
481 #elif __cpp_lib_bitops
482 return std::countr_zero(x);
485 constexpr int bits =
sizeof(T) * CHAR_BIT;
486 constexpr T mask = 1;
487 for(
int i = 0; i < bits && (x & mask) == 0; ++i) {
488 c += (x & mask) == 0;
497 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countr_one only works on unsigned integers");
500 #elif __cpp_lib_bitops
501 return std::countr_one(x);
504 constexpr int bits =
sizeof(T) * CHAR_BIT;
505 constexpr T mask = 1;
506 for(
int i = 0; i < bits && (x & mask) > 0; ++i) {
517 return (T(0) < val) - (val < T(0));
523 static_assert(std::is_integral_v<T>,
"ipow is only working on integer value.");
542#define FLOAT_ARITHMETIC_CUDA_IMPL(name, cudaname) \
543 if constexpr(std::is_same_v<T, float>) { \
544 return __f ## cudaname(x, y); \
546 else if constexpr(std::is_same_v<T, double>) { \
547 return __d ## cudaname(x, y); \
550 static_assert(std::is_same_v<T, float>, #name " (CUDA version) only support float or double types."); \
553#define FLOAT_ARITHMETIC_CPP_IMPL(cppop, cppround) \
554 int r = std::fesetround(cppround); \
561 FLOAT_ARITHMETIC_CUDA_IMPL(
add_up, add_ru)
563 #if !defined(__GNUC__) && !defined(_MSC_VER)
564 #pragma STDC FENV_ACCESS ON
566 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_UPWARD)
573 FLOAT_ARITHMETIC_CUDA_IMPL(
add_down, add_rd)
575 #if !defined(__GNUC__) && !defined(_MSC_VER)
576 #pragma STDC FENV_ACCESS ON
578 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_DOWNWARD)
585 FLOAT_ARITHMETIC_CUDA_IMPL(
sub_up, sub_ru)
587 #if !defined(__GNUC__) && !defined(_MSC_VER)
588 #pragma STDC FENV_ACCESS ON
590 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_UPWARD)
597 FLOAT_ARITHMETIC_CUDA_IMPL(
sub_down, sub_rd)
599 #if !defined(__GNUC__) && !defined(_MSC_VER)
600 #pragma STDC FENV_ACCESS ON
602 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_DOWNWARD)
609 FLOAT_ARITHMETIC_CUDA_IMPL(
mul_up, mul_ru)
611 #if !defined(__GNUC__) && !defined(_MSC_VER)
612 #pragma STDC FENV_ACCESS ON
614 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_UPWARD)
621 FLOAT_ARITHMETIC_CUDA_IMPL(
mul_down, mul_rd)
623 #if !defined(__GNUC__) && !defined(_MSC_VER)
624 #pragma STDC FENV_ACCESS ON
626 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_DOWNWARD)
633 FLOAT_ARITHMETIC_CUDA_IMPL(
div_up, div_ru)
635 #if !defined(__GNUC__) && !defined(_MSC_VER)
636 #pragma STDC FENV_ACCESS ON
638 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_UPWARD)
645 FLOAT_ARITHMETIC_CUDA_IMPL(
div_down, div_rd)
647 #if !defined(__GNUC__) && !defined(_MSC_VER)
648 #pragma STDC FENV_ACCESS ON
650 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_DOWNWARD)
657 static_assert(std::is_integral_v<T>,
"tdiv only works on integer values.");
663 static_assert(std::is_integral_v<T>,
"tdiv only works on integer values.");
670 static_assert(std::is_integral_v<T>,
"fdiv only works on integer values.");
676 static_assert(std::is_integral_v<T>,
"fmod only works on integer values.");
683 static_assert(std::is_integral_v<T>,
"cdiv only works on integer values.");
689 static_assert(std::is_integral_v<T>,
"cmod only works on integer values.");
696 static_assert(std::is_integral_v<T>,
"ediv only works on integer values.");
702 static_assert(std::is_integral_v<T>,
"emod only works on integer values.");
710template<>
CUDA NI inline void print(
const bool &x) { x ? printf(
"true") : printf(
"false"); }
711template<>
CUDA NI inline void print(
const char &x) { printf(
"%c", x); }
712template<>
CUDA NI inline void print(
const short &x) { printf(
"%d", (
int)x); }
713template<>
CUDA NI inline void print(
const int &x) { printf(
"%d", x); }
714template<>
CUDA NI inline void print(
const long long int &x) { printf(
"%lld", x); }
715template<>
CUDA NI inline void print(
const long int &x) { printf(
"%ld", x); }
716template<>
CUDA NI inline void print(
const unsigned char &x) { printf(
"%d", (
int)x); }
717template<>
CUDA NI inline void print(
const unsigned short &x) { printf(
"%d", (
int)x); }
718template<>
CUDA NI inline void print(
const unsigned int &x) { printf(
"%u", x); }
719template<>
CUDA NI inline void print(
const unsigned long &x) { printf(
"%lu", x); }
720template<>
CUDA NI inline void print(
const unsigned long long &x) { printf(
"%llu", x); }
721template<>
CUDA NI inline void print(
const float &x) { printf(
"%f", x); }
722template<>
CUDA NI inline void print(
const double &x) { printf(
"%lf", x); }
723template<>
CUDA NI inline void print(
char const*
const &x) { printf(
"%s", x); }
Definition algorithm.hpp:10
CUDA NI constexpr int countl_zero(T x)
Definition utility.hpp:419
CUDA constexpr T mul_down(T x, T y)
Definition utility.hpp:619
CUDA NI constexpr int countr_one(T x)
Definition utility.hpp:496
CUDA constexpr T ediv(T x, T y)
Definition utility.hpp:695
CUDA INLINE constexpr T min(T a, T b)
Definition utility.hpp:116
CUDA constexpr T add_down(T x, T y)
Definition utility.hpp:571
CUDA constexpr T div_down(T x, T y)
Definition utility.hpp:643
CUDA constexpr T add_up(T x, T y)
Definition utility.hpp:559
CUDA INLINE constexpr T max(T a, T b)
Definition utility.hpp:128
CUDA constexpr T tdiv(T x, T y)
Definition utility.hpp:656
CUDA constexpr T fmod(T x, T y)
Definition utility.hpp:675
CUDA NI constexpr int countr_zero(T x)
Definition utility.hpp:466
CUDA constexpr T fdiv(T x, T y)
Definition utility.hpp:669
CUDA constexpr void swap(T &a, T &b)
Definition utility.hpp:91
CUDA NI void print(const T &t)
Definition utility.hpp:707
CUDA constexpr T tmod(T x, T y)
Definition utility.hpp:662
CUDA constexpr T div_up(T x, T y)
Definition utility.hpp:631
CUDA NI constexpr int popcount(T x)
Definition utility.hpp:394
CUDA constexpr T emod(T x, T y)
Definition utility.hpp:701
CUDA constexpr T mul_up(T x, T y)
Definition utility.hpp:607
CUDA size_t strlen(const char *str)
Definition utility.hpp:99
CUDA constexpr T sub_down(T x, T y)
Definition utility.hpp:595
CUDA constexpr T isnan(T a)
Definition utility.hpp:140
CUDA NI constexpr To rd_cast(From x)
Definition utility.hpp:305
CUDA NI constexpr int countl_one(T x)
Definition utility.hpp:447
CUDA int strcmp(const char *s1, const char *s2)
Definition utility.hpp:108
CUDA NI constexpr To ru_cast(From x)
Definition utility.hpp:209
CUDA constexpr T sub_up(T x, T y)
Definition utility.hpp:583
CUDA constexpr T cmod(T x, T y)
Definition utility.hpp:688
CUDA CONSTEXPR_NEXTAFTER float nextafter(float f, float dir)
Definition utility.hpp:155
CUDA constexpr int signum(T val)
Definition utility.hpp:516
CUDA NI constexpr T ipow(T a, T b)
Definition utility.hpp:522
CUDA constexpr T cdiv(T x, T y)
Definition utility.hpp:682
Definition utility.hpp:177
static constexpr T neg_inf()
Definition utility.hpp:178
static constexpr T inf()
Definition utility.hpp:184
#define INLINE
Definition utility.hpp:63
#define CONSTEXPR_NEXTAFTER
Definition utility.hpp:152
#define CUDA
Definition utility.hpp:59
#define NI
Definition utility.hpp:62