3#ifndef CUDA_BATTERY_UTILITY_HPP
4#define CUDA_BATTERY_UTILITY_HPP
17 #define CUDA_GLOBAL __global__
19 #ifdef REDUCE_PTX_SIZE
21 #define NI __noinline__
27 #define INLINE __forceinline__
30 #define CUDA __device__ __host__
34 CUDA NI inline void gpuAssert(cudaError_t code,
const char *file,
int line,
bool abort=
true) {
35 if (code != cudaSuccess) {
36 printf(
"%s:%d CUDA runtime error %s\n", file, line, cudaGetErrorString(code));
51 #define CUDAE(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, false); }
55 #define CUDAEX(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, true); }
69 template<
class T>
CUDA constexpr inline void swap(T& a, T& b) {
75 CUDA constexpr inline size_t strlen(
const char* str) {
77 while(str[n] !=
'\0') { ++n; }
82 CUDA constexpr inline int strcmp(
const char* s1,
const char* s2) {
83 while(*s1 && (*s1 == *s2)) {
87 return *(
const unsigned char*)s1 - *(
const unsigned char*)s2;
91template<
class T>
CUDA constexpr inline void swap(T& a, T& b) {
101 return impl::strlen(str);
103 return std::strlen(str);
110 return impl::strcmp(s1, s2);
112 return std::strcmp(s1, s2);
124 return std::min(a, b);
136 return std::max(a, b);
144 return std::isnan(a);
148CUDA constexpr double pow(
double a,
double b) {
152 return std::pow(a, b);
158# define CONSTEXPR_NEXTAFTER
160# define CONSTEXPR_NEXTAFTER constexpr
165 return ::nextafterf(f, dir);
167 return std::nextafterf(f, dir);
173 return ::nextafter(f, dir);
175 return std::nextafter(f, dir);
187 if constexpr (std::is_floating_point<T>()) {
188 return -std::numeric_limits<T>::infinity();
190 return std::numeric_limits<T>::min();
192 static constexpr T
inf() {
193 if constexpr (std::is_floating_point<T>()) {
194 return std::numeric_limits<T>::infinity();
196 return std::numeric_limits<T>::max();
200#define MAP_LIMITS(x, From, To) \
201 if(x == 0) { return 0; } \
202 if(x == limits<From>::neg_inf()) {\
203 return limits<To>::neg_inf(); \
205 if(x == limits<From>::inf()) {\
206 return limits<To>::inf(); \
216template<
class To,
class From,
bool map_limits = true>
218 if constexpr(std::is_same_v<To, From>) {
221 if constexpr(map_limits) {
222 MAP_LIMITS(x, From, To)
226 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
227 if constexpr(std::is_same_v<From, unsigned long long>) {
228 if constexpr(std::is_same_v<To, float>) {
229 return __ull2float_ru(x);
231 else if constexpr(std::is_same_v<To, double>) {
232 return __ull2double_ru(x);
235 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in ru_cast.");
238 else if constexpr(std::is_same_v<From, int>) {
239 if constexpr(std::is_same_v<To, float>) {
240 return __int2float_ru(x);
242 else if constexpr(std::is_same_v<To, double>) {
243 return __int2double_rn(x);
246 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in ru_cast.");
250 static_assert(
sizeof(
long long int) >=
sizeof(From));
251 if constexpr(std::is_same_v<To, float>) {
252 return __ll2float_ru(x);
254 else if constexpr(std::is_same_v<To, double>) {
255 return __ll2double_ru(x);
258 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in ru_cast.");
263 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
264 if constexpr(std::is_same_v<From, float>) {
265 return static_cast<To
>(__float2ll_ru(x));
267 else if constexpr(std::is_same_v<From, double>) {
268 return static_cast<To
>(__double2ll_ru(x));
271 static_assert(std::is_same_v<From, float>,
"Unsupported combination of types in ru_cast.");
275 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
276 return __double2float_ru(x);
280 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
281 #if !defined(__GNUC__) && !defined(_MSC_VER)
282 #pragma STDC FENV_ACCESS ON
284 int r = std::fesetround(FE_UPWARD);
286 return static_cast<To
>(x);
289 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
290 return static_cast<To
>(std::ceil(x));
293 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
294 #if !defined(__GNUC__) && !defined(_MSC_VER)
295 #pragma STDC FENV_ACCESS ON
297 int r = std::fesetround(FE_UPWARD);
299 return static_cast<To
>(x);
302 return static_cast<To
>(x);
312template<
class To,
class From,
bool map_limits=true>
314 if constexpr(std::is_same_v<To, From>) {
317 if constexpr(map_limits) {
318 MAP_LIMITS(x, From, To)
322 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
323 if constexpr(std::is_same_v<From, unsigned long long>) {
324 if constexpr(std::is_same_v<To, float>) {
325 return __ull2float_rd(x);
327 else if constexpr(std::is_same_v<To, double>) {
328 return __ull2double_rd(x);
331 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
334 else if constexpr(std::is_same_v<From, int>) {
335 if constexpr(std::is_same_v<To, float>) {
336 return __int2float_rd(x);
338 else if constexpr(std::is_same_v<To, double>) {
339 return __int2double_rn(x);
342 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
346 static_assert(
sizeof(
long long int) >=
sizeof(From));
347 if constexpr(std::is_same_v<To, float>) {
348 return __ll2float_rd(x);
350 else if constexpr(std::is_same_v<To, double>) {
351 return __ll2double_rd(x);
354 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
359 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
360 if constexpr(std::is_same_v<From, float>) {
361 return static_cast<To
>(__float2ll_rd(x));
363 else if constexpr(std::is_same_v<From, double>) {
364 return static_cast<To
>(__double2ll_rd(x));
367 static_assert(std::is_same_v<To, float>,
"Unsupported combination of types in rd_cast.");
371 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
372 return __double2float_rd(x);
376 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
377 #if !defined(__GNUC__) && !defined(_MSC_VER)
378 #pragma STDC FENV_ACCESS ON
380 int r = std::fesetround(FE_DOWNWARD);
382 return static_cast<To
>(x);
385 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
386 return static_cast<To
>(std::floor(x));
389 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
390 #if !defined(__GNUC__) && !defined(_MSC_VER)
391 #pragma STDC FENV_ACCESS ON
393 int r = std::fesetround(FE_DOWNWARD);
395 return static_cast<To
>(x);
398 return static_cast<To
>(x);
403 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"popcount only works on unsigned integers");
405 if constexpr(std::is_same_v<T, unsigned int>) {
408 else if constexpr(std::is_same_v<T, unsigned long long>) {
412 return __popcll(
static_cast<unsigned long long>(x));
414 #elif __cpp_lib_bitops
415 return std::popcount(x);
418 for(
int i = 0; i <
sizeof(T) * CHAR_BIT && x != 0; ++i) {
428 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countl_zero only works on unsigned integers");
431 if constexpr(
sizeof(T) <=
sizeof(
int)) {
432 return __clz(x) - ((
sizeof(int) -
sizeof(T)) * CHAR_BIT);
434 else if constexpr(
sizeof(T) <=
sizeof(
long long int)) {
435 return __clzll(x) - ((
sizeof(
long long int) -
sizeof(T)) * CHAR_BIT);
438 static_assert(
sizeof(T) <
sizeof(
long long int),
"countX_Y (CUDA version) only supports types smaller than long long int.");
440 #elif __cpp_lib_bitops
441 return std::countl_zero(x);
444 constexpr int bits =
sizeof(T) * CHAR_BIT;
445 constexpr T mask = (T)1 << (bits - 1);
446 for(
int i = 0; i < bits && (x & mask) == 0; ++i) {
447 c += (x & mask) == 0;
456 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countl_one only works on unsigned integers");
459 #elif __cpp_lib_bitops
460 return std::countl_one(x);
463 constexpr int bits =
sizeof(T) * CHAR_BIT;
464 constexpr T mask = (T)1 << (bits - 1);
465 for(
int i = 0; i < bits && (x & mask) > 0; ++i) {
475 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countl_zero only works on unsigned integers");
478 return sizeof(T) * CHAR_BIT;
480 if constexpr(
sizeof(T) <=
sizeof(
int)) {
483 else if constexpr(
sizeof(T) <=
sizeof(
long long int)) {
484 return __ffsll(x) - 1;
487 static_assert(
sizeof(T) <
sizeof(
long long int),
"countr_zero (CUDA version) only supports types smaller or equal to long long int.");
489 #elif __cpp_lib_bitops
490 return std::countr_zero(x);
493 constexpr int bits =
sizeof(T) * CHAR_BIT;
494 constexpr T mask = 1;
495 for(
int i = 0; i < bits && (x & mask) == 0; ++i) {
496 c += (x & mask) == 0;
505 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
"countr_one only works on unsigned integers");
508 #elif __cpp_lib_bitops
509 return std::countr_one(x);
512 constexpr int bits =
sizeof(T) * CHAR_BIT;
513 constexpr T mask = 1;
514 for(
int i = 0; i < bits && (x & mask) > 0; ++i) {
525 return (T(0) < val) - (val < T(0));
531 static_assert(std::is_integral_v<T>,
"ipow is only working on integer value.");
552 static_assert(std::is_integral_v<T>,
"iroots_down is only working on integer value.");
553 T l =
static_cast<T
>(
pow(x, 1.0 / r));
554 while ((l + 1) * l * l <= x) l++;
555 while (l * l * l > x) l--;
561 static_assert(std::is_integral_v<T>,
"iroots_down is only working on integer value.");
562 T u =
static_cast<T
>(
pow(x, 1.0 / r));
563 while (u * u * u < x) u++;
567#define FLOAT_ARITHMETIC_CUDA_IMPL(name, cudaname) \
568 if constexpr(std::is_same_v<T, float>) { \
569 return __f ## cudaname(x, y); \
571 else if constexpr(std::is_same_v<T, double>) { \
572 return __d ## cudaname(x, y); \
575 static_assert(std::is_same_v<T, float>, #name " (CUDA version) only support float or double types."); \
578#define FLOAT_ARITHMETIC_CPP_IMPL(cppop, cppround) \
579 int r = std::fesetround(cppround); \
586 FLOAT_ARITHMETIC_CUDA_IMPL(
add_up, add_ru)
588 #if !defined(__GNUC__) && !defined(_MSC_VER)
589 #pragma STDC FENV_ACCESS ON
591 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_UPWARD)
598 FLOAT_ARITHMETIC_CUDA_IMPL(
add_down, add_rd)
600 #if !defined(__GNUC__) && !defined(_MSC_VER)
601 #pragma STDC FENV_ACCESS ON
603 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_DOWNWARD)
610 FLOAT_ARITHMETIC_CUDA_IMPL(
sub_up, sub_ru)
612 #if !defined(__GNUC__) && !defined(_MSC_VER)
613 #pragma STDC FENV_ACCESS ON
615 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_UPWARD)
622 FLOAT_ARITHMETIC_CUDA_IMPL(
sub_down, sub_rd)
624 #if !defined(__GNUC__) && !defined(_MSC_VER)
625 #pragma STDC FENV_ACCESS ON
627 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_DOWNWARD)
634 FLOAT_ARITHMETIC_CUDA_IMPL(
mul_up, mul_ru)
636 #if !defined(__GNUC__) && !defined(_MSC_VER)
637 #pragma STDC FENV_ACCESS ON
639 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_UPWARD)
646 FLOAT_ARITHMETIC_CUDA_IMPL(
mul_down, mul_rd)
648 #if !defined(__GNUC__) && !defined(_MSC_VER)
649 #pragma STDC FENV_ACCESS ON
651 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_DOWNWARD)
658 FLOAT_ARITHMETIC_CUDA_IMPL(
div_up, div_ru)
660 #if !defined(__GNUC__) && !defined(_MSC_VER)
661 #pragma STDC FENV_ACCESS ON
663 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_UPWARD)
670 FLOAT_ARITHMETIC_CUDA_IMPL(
div_down, div_rd)
672 #if !defined(__GNUC__) && !defined(_MSC_VER)
673 #pragma STDC FENV_ACCESS ON
675 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_DOWNWARD)
682 static_assert(std::is_integral_v<T>,
"tdiv only works on integer values.");
688 static_assert(std::is_integral_v<T>,
"tdiv only works on integer values.");
695 static_assert(std::is_integral_v<T>,
"fdiv only works on integer values.");
701 static_assert(std::is_integral_v<T>,
"fmod only works on integer values.");
708 static_assert(std::is_integral_v<T>,
"cdiv only works on integer values.");
714 static_assert(std::is_integral_v<T>,
"cmod only works on integer values.");
721 static_assert(std::is_integral_v<T>,
"ediv only works on integer values.");
727 static_assert(std::is_integral_v<T>,
"emod only works on integer values.");
735template<>
CUDA NI inline void print(
const bool &x) { x ? printf(
"true") : printf(
"false"); }
736template<>
CUDA NI inline void print(
const char &x) { printf(
"%c", x); }
737template<>
CUDA NI inline void print(
const short &x) { printf(
"%d", (
int)x); }
738template<>
CUDA NI inline void print(
const int &x) { printf(
"%d", x); }
739template<>
CUDA NI inline void print(
const long long int &x) { printf(
"%lld", x); }
740template<>
CUDA NI inline void print(
const long int &x) { printf(
"%ld", x); }
741template<>
CUDA NI inline void print(
const unsigned char &x) { printf(
"%d", (
int)x); }
742template<>
CUDA NI inline void print(
const unsigned short &x) { printf(
"%d", (
int)x); }
743template<>
CUDA NI inline void print(
const unsigned int &x) { printf(
"%u", x); }
744template<>
CUDA NI inline void print(
const unsigned long &x) { printf(
"%lu", x); }
745template<>
CUDA NI inline void print(
const unsigned long long &x) { printf(
"%llu", x); }
746template<>
CUDA NI inline void print(
const float &x) { printf(
"%f", x); }
747template<>
CUDA NI inline void print(
const double &x) { printf(
"%lf", x); }
748template<>
CUDA NI inline void print(
char const*
const &x) { printf(
"%s", x); }
Definition algorithm.hpp:10
CUDA NI constexpr int countl_zero(T x)
Definition utility.hpp:427
CUDA constexpr T mul_down(T x, T y)
Definition utility.hpp:644
CUDA T iroots_up(T x, int r)
Definition utility.hpp:560
CUDA NI constexpr int countr_one(T x)
Definition utility.hpp:504
CUDA constexpr T ediv(T x, T y)
Definition utility.hpp:720
CUDA INLINE constexpr T min(T a, T b)
Definition utility.hpp:116
CUDA constexpr T add_down(T x, T y)
Definition utility.hpp:596
CUDA constexpr T div_down(T x, T y)
Definition utility.hpp:668
CUDA constexpr T add_up(T x, T y)
Definition utility.hpp:584
CUDA INLINE constexpr T max(T a, T b)
Definition utility.hpp:128
CUDA T iroots_down(T x, int r)
Definition utility.hpp:551
CUDA constexpr T tdiv(T x, T y)
Definition utility.hpp:681
CUDA constexpr T fmod(T x, T y)
Definition utility.hpp:700
CUDA NI constexpr int countr_zero(T x)
Definition utility.hpp:474
CUDA constexpr T fdiv(T x, T y)
Definition utility.hpp:694
CUDA constexpr void swap(T &a, T &b)
Definition utility.hpp:91
CUDA NI void print(const T &t)
Definition utility.hpp:732
CUDA constexpr T tmod(T x, T y)
Definition utility.hpp:687
CUDA constexpr T div_up(T x, T y)
Definition utility.hpp:656
CUDA NI constexpr int popcount(T x)
Definition utility.hpp:402
CUDA constexpr T emod(T x, T y)
Definition utility.hpp:726
CUDA constexpr T mul_up(T x, T y)
Definition utility.hpp:632
CUDA size_t strlen(const char *str)
Definition utility.hpp:99
CUDA constexpr T sub_down(T x, T y)
Definition utility.hpp:620
CUDA constexpr T isnan(T a)
Definition utility.hpp:140
CUDA NI constexpr To rd_cast(From x)
Definition utility.hpp:313
CUDA NI constexpr int countl_one(T x)
Definition utility.hpp:455
CUDA int strcmp(const char *s1, const char *s2)
Definition utility.hpp:108
CUDA NI constexpr To ru_cast(From x)
Definition utility.hpp:217
CUDA constexpr T sub_up(T x, T y)
Definition utility.hpp:608
CUDA constexpr T cmod(T x, T y)
Definition utility.hpp:713
CUDA CONSTEXPR_NEXTAFTER float nextafter(float f, float dir)
Definition utility.hpp:163
CUDA constexpr int signum(T val)
Definition utility.hpp:524
CUDA constexpr double pow(double a, double b)
Definition utility.hpp:148
CUDA NI constexpr T ipow(T a, T b)
Definition utility.hpp:530
CUDA constexpr T cdiv(T x, T y)
Definition utility.hpp:707
Definition utility.hpp:185
static constexpr T neg_inf()
Definition utility.hpp:186
static constexpr T inf()
Definition utility.hpp:192
#define INLINE
Definition utility.hpp:63
#define CONSTEXPR_NEXTAFTER
Definition utility.hpp:160
#define CUDA
Definition utility.hpp:59
#define NI
Definition utility.hpp:62