Cuda battery library
Loading...
Searching...
No Matches
utility.hpp
Go to the documentation of this file.
1// Copyright 2021 Pierre Talbot, Frédéric Pinel
2
3#ifndef CUDA_BATTERY_UTILITY_HPP
4#define CUDA_BATTERY_UTILITY_HPP
5
6#include <cstdio>
7#include <cassert>
8#include <limits>
9#include <climits>
10#include <algorithm>
11#include <cstring>
12#include <cmath>
13#include <cfenv>
14#include <bit>
15
16#ifdef __CUDACC__
17 #define CUDA_GLOBAL __global__
18
19 #ifdef REDUCE_PTX_SIZE
20 /** `NI` stands for noinline, to hint `nvcc` the function should not be inlined. */
21 #define NI __noinline__
22 #else
23 #define NI
24 #endif
25
26 /** Request a function to be inlined. */
27 #define INLINE __forceinline__
28
29 /** `CUDA` is a macro indicating that a function can be executed on a GPU. It is defined to `__device__ __host__` when the code is compiled with `nvcc`. */
30 #define CUDA __device__ __host__
31
32 namespace battery {
33 namespace impl {
34 CUDA NI inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
35 if (code != cudaSuccess) {
36 printf("%s:%d CUDA runtime error %s\n", file, line, cudaGetErrorString(code));
37 if (abort) {
38 #ifdef __CUDA_ARCH__
39 assert(0);
40 #else
41 exit(code);
42 #endif
43 }
44 }
45 }
46 }}
47
48 /** A macro checking the result of a CUDA API call.
49 * It prints an error message if an error occured.
50 */
51 #define CUDAE(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, false); }
52
53 /** Similar to CUDAE but abort the computation in addition, either with `assert` (GPU) or `exit` (CPU).
54 */
55 #define CUDAEX(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, true); }
56
57#else
58 #define CUDA_GLOBAL
59 #define CUDA
60 #define CUDAE(S) S
61 #define CUDAEX(S) S
62 #define NI
63 #define INLINE inline
64#endif
65
66namespace battery {
67
68namespace impl {
69 template<class T> CUDA constexpr inline void swap(T& a, T& b) {
70 T c(std::move(a));
71 a = std::move(b);
72 b = std::move(c);
73 }
74
75 CUDA constexpr inline size_t strlen(const char* str) {
76 size_t n = 0;
77 while(str[n] != '\0') { ++n; }
78 return n;
79 }
80
81 /** See https://stackoverflow.com/a/34873406/2231159 */
82 CUDA constexpr inline int strcmp(const char* s1, const char* s2) {
83 while(*s1 && (*s1 == *s2)) {
84 s1++;
85 s2++;
86 }
87 return *(const unsigned char*)s1 - *(const unsigned char*)s2;
88 }
89}
90
91template<class T> CUDA constexpr inline void swap(T& a, T& b) {
92 #ifdef __CUDA_ARCH__
93 impl::swap(a, b);
94 #else
95 std::swap(a, b);
96 #endif
97}
98
99CUDA inline size_t strlen(const char* str) {
100 #ifdef __CUDA_ARCH__
101 return impl::strlen(str);
102 #else
103 return std::strlen(str);
104 #endif
105}
106
107/** See https://stackoverflow.com/a/34873406/2231159 */
108CUDA inline int strcmp(const char* s1, const char* s2) {
109 #ifdef __CUDA_ARCH__
110 return impl::strcmp(s1, s2);
111 #else
112 return std::strcmp(s1, s2);
113 #endif
114}
115
116template<class T> CUDA INLINE constexpr T min(T a, T b) {
117 #ifdef __CUDA_ARCH__
118 // When C++23 is available
119 // if !consteval { return ::min(a, b); }
120 // else { return std::min(a, b); }
121 // return a < b ? a : b;
122 return ::min(a, b);
123 #else
124 return std::min(a, b);
125 #endif
126}
127
128template<class T> CUDA INLINE constexpr T max(T a, T b) {
129 #ifdef __CUDA_ARCH__
130 // When C++23 is available
131 // if !consteval { return ::max(a, b); }
132 // else { return std::max(a, b); }
133 // return a > b ? a : b;
134 return ::max(a, b); // a > b ? a : b;
135 #else
136 return std::max(a, b);
137 #endif
138}
139
140template<class T> CUDA constexpr T isnan(T a) {
141 #ifdef __CUDA_ARCH__
142 return ::isnan(a);
143 #else
144 return std::isnan(a);
145 #endif
146}
147
148CUDA constexpr double pow(double a, double b) {
149 #ifdef __CUDA_ARCH__
150 return ::pow(a, b);
151 #else
152 return std::pow(a, b);
153 #endif
154}
155
156#ifdef _MSC_VER
157// MSVC omits constexpr for nextafter (officially not available until C++23)
158# define CONSTEXPR_NEXTAFTER
159#else
160# define CONSTEXPR_NEXTAFTER constexpr
161#endif
162
163CUDA CONSTEXPR_NEXTAFTER inline float nextafter(float f, float dir) {
164 #ifdef __CUDA_ARCH__
165 return ::nextafterf(f, dir);
166 #else
167 return std::nextafterf(f, dir);
168 #endif
169}
170
171CUDA CONSTEXPR_NEXTAFTER inline double nextafter(double f, double dir) {
172 #ifdef __CUDA_ARCH__
173 return ::nextafter(f, dir);
174 #else
175 return std::nextafter(f, dir);
176 #endif
177}
178
179/** `limits` is a structure to get "infinity points" of primitive types including integers.
180 * For floating-point numbers, we use their built-in representation of infinity.
181 * For integers, we use the minimal and maximal values of the underlying type to represent infinities.
182 * When converting using `ru_cast` and `rd_cast`, the infinities will be preserved across types.
183 */
184template<class T>
185struct limits {
186 static constexpr T neg_inf() {
187 if constexpr (std::is_floating_point<T>()) {
188 return -std::numeric_limits<T>::infinity();
189 }
190 return std::numeric_limits<T>::min();
191 }
192 static constexpr T inf() {
193 if constexpr (std::is_floating_point<T>()) {
194 return std::numeric_limits<T>::infinity();
195 }
196 return std::numeric_limits<T>::max();
197 }
198};
199
200#define MAP_LIMITS(x, From, To) \
201 if(x == 0) { return 0; } \
202 if(x == limits<From>::neg_inf()) {\
203 return limits<To>::neg_inf(); \
204 } \
205 if(x == limits<From>::inf()) {\
206 return limits<To>::inf(); \
207 }
208
209/** Cast the variable `x` from type `From` to type `To` following upper rounding rule (cast in the direction of infinity).
210 Minimal and maximal values of `From` are interpreted as infinities, and are therefore mapped to the infinities of the new types accordingly (e.g., float INF maps to int MAX_INT).
211
212 - On CPU: Rounding mode is UPWARD after this operation.
213 - On GPU: CUDA intrinsics are used.
214
215 Overflow: Nothing is done to prevent overflow, it mostly behaves as with `static_cast`. */
216template<class To, class From, bool map_limits = true>
217CUDA NI constexpr To ru_cast(From x) {
218 if constexpr(std::is_same_v<To, From>) {
219 return x;
220 }
221 if constexpr(map_limits) {
222 MAP_LIMITS(x, From, To)
223 }
224 #ifdef __CUDA_ARCH__
225 // Integer to floating-point number cast.
226 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
227 if constexpr(std::is_same_v<From, unsigned long long>) {
228 if constexpr(std::is_same_v<To, float>) {
229 return __ull2float_ru(x);
230 }
231 else if constexpr(std::is_same_v<To, double>) {
232 return __ull2double_ru(x);
233 }
234 else {
235 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in ru_cast.");
236 }
237 }
238 else if constexpr(std::is_same_v<From, int>) {
239 if constexpr(std::is_same_v<To, float>) {
240 return __int2float_ru(x);
241 }
242 else if constexpr(std::is_same_v<To, double>) {
243 return __int2double_rn(x);
244 }
245 else {
246 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in ru_cast.");
247 }
248 }
249 else {
250 static_assert(sizeof(long long int) >= sizeof(From));
251 if constexpr(std::is_same_v<To, float>) {
252 return __ll2float_ru(x);
253 }
254 else if constexpr(std::is_same_v<To, double>) {
255 return __ll2double_ru(x);
256 }
257 else {
258 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in ru_cast.");
259 }
260 }
261 }
262 // Floating-point number to integer number.
263 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
264 if constexpr(std::is_same_v<From, float>) {
265 return static_cast<To>(__float2ll_ru(x));
266 }
267 else if constexpr(std::is_same_v<From, double>) {
268 return static_cast<To>(__double2ll_ru(x));
269 }
270 else {
271 static_assert(std::is_same_v<From, float>, "Unsupported combination of types in ru_cast.");
272 }
273 }
274 // Floating-point to floating-point.
275 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
276 return __double2float_ru(x);
277 }
278 #else
279 // Integer to floating-point number cast.
280 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
281 #if !defined(__GNUC__) && !defined(_MSC_VER)
282 #pragma STDC FENV_ACCESS ON
283 #endif
284 int r = std::fesetround(FE_UPWARD);
285 assert(r == 0);
286 return static_cast<To>(x);
287 }
288 // Floating-point number to integer number.
289 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
290 return static_cast<To>(std::ceil(x));
291 }
292 // Floating-point to floating-point.
293 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
294 #if !defined(__GNUC__) && !defined(_MSC_VER)
295 #pragma STDC FENV_ACCESS ON
296 #endif
297 int r = std::fesetround(FE_UPWARD);
298 assert(r == 0);
299 return static_cast<To>(x);
300 }
301 #endif
302 return static_cast<To>(x);
303}
304
305/** Cast the variable `x` from type `From` to type `To` following down rounding rule (cast in the direction of negative infinity).
306 Minimal and maximal values of `From` are interpreted as infinities, and are therefore mapped to the infinities of the new types accordingly (e.g., float INF maps to int MAX_INT).
307
308 - On CPU: Rounding mode is DOWNWARD after this operation.
309 - On GPU: CUDA intrinsics are used.
310
311 Overflow: Nothing is done to prevent overflow, it mostly behaves as with `static_cast`. */
312template<class To, class From, bool map_limits=true>
313CUDA NI constexpr To rd_cast(From x) {
314 if constexpr(std::is_same_v<To, From>) {
315 return x;
316 }
317 if constexpr(map_limits) {
318 MAP_LIMITS(x, From, To)
319 }
320 #ifdef __CUDA_ARCH__
321 // Integer to floating-point number cast.
322 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
323 if constexpr(std::is_same_v<From, unsigned long long>) {
324 if constexpr(std::is_same_v<To, float>) {
325 return __ull2float_rd(x);
326 }
327 else if constexpr(std::is_same_v<To, double>) {
328 return __ull2double_rd(x);
329 }
330 else {
331 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
332 }
333 }
334 else if constexpr(std::is_same_v<From, int>) {
335 if constexpr(std::is_same_v<To, float>) {
336 return __int2float_rd(x);
337 }
338 else if constexpr(std::is_same_v<To, double>) {
339 return __int2double_rn(x);
340 }
341 else {
342 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
343 }
344 }
345 else {
346 static_assert(sizeof(long long int) >= sizeof(From));
347 if constexpr(std::is_same_v<To, float>) {
348 return __ll2float_rd(x);
349 }
350 else if constexpr(std::is_same_v<To, double>) {
351 return __ll2double_rd(x);
352 }
353 else {
354 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
355 }
356 }
357 }
358 // Floating-point number to integer number.
359 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
360 if constexpr(std::is_same_v<From, float>) {
361 return static_cast<To>(__float2ll_rd(x));
362 }
363 else if constexpr(std::is_same_v<From, double>) {
364 return static_cast<To>(__double2ll_rd(x));
365 }
366 else {
367 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
368 }
369 }
370 // Floating-point to floating-point.
371 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
372 return __double2float_rd(x);
373 }
374 #else
375 // Integer to floating-point number cast.
376 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
377 #if !defined(__GNUC__) && !defined(_MSC_VER)
378 #pragma STDC FENV_ACCESS ON
379 #endif
380 int r = std::fesetround(FE_DOWNWARD);
381 assert(r == 0);
382 return static_cast<To>(x);
383 }
384 // Floating-point number to integer number.
385 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
386 return static_cast<To>(std::floor(x));
387 }
388 // Floating-point to floating-point.
389 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
390 #if !defined(__GNUC__) && !defined(_MSC_VER)
391 #pragma STDC FENV_ACCESS ON
392 #endif
393 int r = std::fesetround(FE_DOWNWARD);
394 assert(r == 0);
395 return static_cast<To>(x);
396 }
397 #endif
398 return static_cast<To>(x);
399}
400
401template<class T>
402CUDA NI constexpr int popcount(T x) {
403 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "popcount only works on unsigned integers");
404 #ifdef __CUDA_ARCH__
405 if constexpr(std::is_same_v<T, unsigned int>) {
406 return __popc(x);
407 }
408 else if constexpr(std::is_same_v<T, unsigned long long>) {
409 return __popcll(x);
410 }
411 else {
412 return __popcll(static_cast<unsigned long long>(x));
413 }
414 #elif __cpp_lib_bitops
415 return std::popcount(x);
416 #else
417 int c = 0;
418 for(int i = 0; i < sizeof(T) * CHAR_BIT && x != 0; ++i) {
419 c += (x & 1);
420 x >>= 1;
421 }
422 return c;
423 #endif
424}
425
426template<class T>
427CUDA NI constexpr int countl_zero(T x) {
428 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countl_zero only works on unsigned integers");
429 #ifdef __CUDA_ARCH__
430 // If the size of `T` is smaller than `int` or `long long int` we must remove the extra zeroes that are added after conversion.
431 if constexpr(sizeof(T) <= sizeof(int)) {
432 return __clz(x) - ((sizeof(int) - sizeof(T)) * CHAR_BIT);
433 }
434 else if constexpr(sizeof(T) <= sizeof(long long int)) {
435 return __clzll(x) - ((sizeof(long long int) - sizeof(T)) * CHAR_BIT);
436 }
437 else {
438 static_assert(sizeof(T) < sizeof(long long int), "countX_Y (CUDA version) only supports types smaller than long long int.");
439 }
440 #elif __cpp_lib_bitops
441 return std::countl_zero(x);
442 #else
443 int c = 0;
444 constexpr int bits = sizeof(T) * CHAR_BIT;
445 constexpr T mask = (T)1 << (bits - 1);
446 for(int i = 0; i < bits && (x & mask) == 0; ++i) {
447 c += (x & mask) == 0;
448 x <<= 1;
449 }
450 return c;
451 #endif
452}
453
454template<class T>
455CUDA NI constexpr int countl_one(T x) {
456 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countl_one only works on unsigned integers");
457 #ifdef __CUDA_ARCH__
458 return countl_zero((T)~x);
459 #elif __cpp_lib_bitops
460 return std::countl_one(x);
461 #else
462 int c = 0;
463 constexpr int bits = sizeof(T) * CHAR_BIT;
464 constexpr T mask = (T)1 << (bits - 1);
465 for(int i = 0; i < bits && (x & mask) > 0; ++i) {
466 c += (x & mask) > 0;
467 x <<= 1;
468 }
469 return c;
470 #endif
471}
472
473template<class T>
474CUDA NI constexpr int countr_zero(T x) {
475 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countl_zero only works on unsigned integers");
476 #ifdef __CUDA_ARCH__
477 if(x == 0) {
478 return sizeof(T) * CHAR_BIT;
479 }
480 if constexpr(sizeof(T) <= sizeof(int)) {
481 return __ffs(x) - 1;
482 }
483 else if constexpr(sizeof(T) <= sizeof(long long int)) {
484 return __ffsll(x) - 1;
485 }
486 else {
487 static_assert(sizeof(T) < sizeof(long long int), "countr_zero (CUDA version) only supports types smaller or equal to long long int.");
488 }
489 #elif __cpp_lib_bitops
490 return std::countr_zero(x);
491 #else
492 int c = 0;
493 constexpr int bits = sizeof(T) * CHAR_BIT;
494 constexpr T mask = 1;
495 for(int i = 0; i < bits && (x & mask) == 0; ++i) {
496 c += (x & mask) == 0;
497 x >>= 1;
498 }
499 return c;
500 #endif
501}
502
503template<class T>
504CUDA NI constexpr int countr_one(T x) {
505 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countr_one only works on unsigned integers");
506 #ifdef __CUDA_ARCH__
507 return countr_zero((T)~x);
508 #elif __cpp_lib_bitops
509 return std::countr_one(x);
510 #else
511 int c = 0;
512 constexpr int bits = sizeof(T) * CHAR_BIT;
513 constexpr T mask = 1;
514 for(int i = 0; i < bits && (x & mask) > 0; ++i) {
515 c += (x & mask) > 0;
516 x >>= 1;
517 }
518 return c;
519 #endif
520}
521
522/** Signum function, https://stackoverflow.com/a/4609795/2231159 */
523template <class T>
524CUDA constexpr int signum(T val) {
525 return (T(0) < val) - (val < T(0));
526}
527
528/** Precondition: T is an integer with b >= 0.*/
529template <class T>
530CUDA NI constexpr T ipow(T a, T b) {
531 static_assert(std::is_integral_v<T>, "ipow is only working on integer value.");
532 assert(b >= 0);
533 if(b == 2) {
534 return a*a;
535 }
536 // Code taken from GeCode implementation.
537 T p = 1;
538 do {
539 if (b % 2 == 0) {
540 a *= a;
541 b >>= 1;
542 } else {
543 p *= a;
544 b--;
545 }
546 } while (b > 0);
547 return p;
548}
549
550template <class T>
551CUDA T iroots_down(T x, int r) {
552 static_assert(std::is_integral_v<T>, "iroots_down is only working on integer value.");
553 T l = static_cast<T>(pow(x, 1.0 / r)); // Initial estimate
554 while ((l + 1) * l * l <= x) l++; // Adjust upwards if needed
555 while (l * l * l > x) l--; // Adjust downwards if overestimated
556 return l;
557}
558
559template <class T>
560CUDA T iroots_up(T x, int r) {
561 static_assert(std::is_integral_v<T>, "iroots_down is only working on integer value.");
562 T u = static_cast<T>(pow(x, 1.0 / r)); // Initial estimate
563 while (u * u * u < x) u++; // Adjust upwards if underestimated
564 return u;
565}
566
567#define FLOAT_ARITHMETIC_CUDA_IMPL(name, cudaname) \
568 if constexpr(std::is_same_v<T, float>) { \
569 return __f ## cudaname(x, y); \
570 } \
571 else if constexpr(std::is_same_v<T, double>) { \
572 return __d ## cudaname(x, y); \
573 } \
574 else { \
575 static_assert(std::is_same_v<T, float>, #name " (CUDA version) only support float or double types."); \
576 }
577
578#define FLOAT_ARITHMETIC_CPP_IMPL(cppop, cppround) \
579 int r = std::fesetround(cppround); \
580 assert(r == 0); \
581 return x cppop y;
582
583template <class T>
584CUDA constexpr T add_up(T x, T y) {
585 #ifdef __CUDA_ARCH__
586 FLOAT_ARITHMETIC_CUDA_IMPL(add_up, add_ru)
587 #else
588 #if !defined(__GNUC__) && !defined(_MSC_VER)
589 #pragma STDC FENV_ACCESS ON
590 #endif
591 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_UPWARD)
592 #endif
593}
594
595template <class T>
596CUDA constexpr T add_down(T x, T y) {
597 #ifdef __CUDA_ARCH__
598 FLOAT_ARITHMETIC_CUDA_IMPL(add_down, add_rd)
599 #else
600 #if !defined(__GNUC__) && !defined(_MSC_VER)
601 #pragma STDC FENV_ACCESS ON
602 #endif
603 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_DOWNWARD)
604 #endif
605}
606
607template <class T>
608CUDA constexpr T sub_up(T x, T y) {
609 #ifdef __CUDA_ARCH__
610 FLOAT_ARITHMETIC_CUDA_IMPL(sub_up, sub_ru)
611 #else
612 #if !defined(__GNUC__) && !defined(_MSC_VER)
613 #pragma STDC FENV_ACCESS ON
614 #endif
615 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_UPWARD)
616 #endif
617}
618
619template <class T>
620CUDA constexpr T sub_down(T x, T y) {
621 #ifdef __CUDA_ARCH__
622 FLOAT_ARITHMETIC_CUDA_IMPL(sub_down, sub_rd)
623 #else
624 #if !defined(__GNUC__) && !defined(_MSC_VER)
625 #pragma STDC FENV_ACCESS ON
626 #endif
627 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_DOWNWARD)
628 #endif
629}
630
631template <class T>
632CUDA constexpr T mul_up(T x, T y) {
633 #ifdef __CUDA_ARCH__
634 FLOAT_ARITHMETIC_CUDA_IMPL(mul_up, mul_ru)
635 #else
636 #if !defined(__GNUC__) && !defined(_MSC_VER)
637 #pragma STDC FENV_ACCESS ON
638 #endif
639 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_UPWARD)
640 #endif
641}
642
643template <class T>
644CUDA constexpr T mul_down(T x, T y) {
645 #ifdef __CUDA_ARCH__
646 FLOAT_ARITHMETIC_CUDA_IMPL(mul_down, mul_rd)
647 #else
648 #if !defined(__GNUC__) && !defined(_MSC_VER)
649 #pragma STDC FENV_ACCESS ON
650 #endif
651 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_DOWNWARD)
652 #endif
653}
654
655template <class T>
656CUDA constexpr T div_up(T x, T y) {
657 #ifdef __CUDA_ARCH__
658 FLOAT_ARITHMETIC_CUDA_IMPL(div_up, div_ru)
659 #else
660 #if !defined(__GNUC__) && !defined(_MSC_VER)
661 #pragma STDC FENV_ACCESS ON
662 #endif
663 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_UPWARD)
664 #endif
665}
666
667template <class T>
668CUDA constexpr T div_down(T x, T y) {
669 #ifdef __CUDA_ARCH__
670 FLOAT_ARITHMETIC_CUDA_IMPL(div_down, div_rd)
671 #else
672 #if !defined(__GNUC__) && !defined(_MSC_VER)
673 #pragma STDC FENV_ACCESS ON
674 #endif
675 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_DOWNWARD)
676 #endif
677}
678
679// Truncated division and modulus, by default in C++.
680template <class T>
681CUDA constexpr T tdiv(T x, T y) {
682 static_assert(std::is_integral_v<T>, "tdiv only works on integer values.");
683 return x / y;
684}
685
686template <class T>
687CUDA constexpr T tmod(T x, T y) {
688 static_assert(std::is_integral_v<T>, "tdiv only works on integer values.");
689 return x % y;
690}
691
692// Floor division and modulus, see (Leijen D. (2003). Division and Modulus for Computer Scientists).
693template <class T>
694CUDA constexpr T fdiv(T x, T y) {
695 static_assert(std::is_integral_v<T>, "fdiv only works on integer values.");
696 return x / y - (battery::signum(x % y) == -battery::signum(y));
697}
698
699template <class T>
700CUDA constexpr T fmod(T x, T y) {
701 static_assert(std::is_integral_v<T>, "fmod only works on integer values.");
702 return x % y + y * (battery::signum(x % y) == -battery::signum(y));
703}
704
705// Ceil division and modulus.
706template <class T>
707CUDA constexpr T cdiv(T x, T y) {
708 static_assert(std::is_integral_v<T>, "cdiv only works on integer values.");
709 return x / y + (battery::signum(x % y) == battery::signum(y));
710}
711
712template <class T>
713CUDA constexpr T cmod(T x, T y) {
714 static_assert(std::is_integral_v<T>, "cmod only works on integer values.");
715 return x % y - y * (battery::signum(x % y) == battery::signum(y));
716}
717
718// Euclidean division and modulus, see (Leijen D. (2003). Division and Modulus for Computer Scientists).
719template <class T>
720CUDA constexpr T ediv(T x, T y) {
721 static_assert(std::is_integral_v<T>, "ediv only works on integer values.");
722 return x / y - ((x % y >= 0) ? 0 : battery::signum(y));
723}
724
725template <class T>
726CUDA constexpr T emod(T x, T y) {
727 static_assert(std::is_integral_v<T>, "emod only works on integer values.");
728 return x % y + y * ((x % y >= 0) ? 0 : battery::signum(y));
729}
730
731template<typename T>
732CUDA NI void print(const T& t) {
733 t.print();
734}
735template<> CUDA NI inline void print(const bool &x) { x ? printf("true") : printf("false"); }
736template<> CUDA NI inline void print(const char &x) { printf("%c", x); }
737template<> CUDA NI inline void print(const short &x) { printf("%d", (int)x); }
738template<> CUDA NI inline void print(const int &x) { printf("%d", x); }
739template<> CUDA NI inline void print(const long long int &x) { printf("%lld", x); }
740template<> CUDA NI inline void print(const long int &x) { printf("%ld", x); }
741template<> CUDA NI inline void print(const unsigned char &x) { printf("%d", (int)x); }
742template<> CUDA NI inline void print(const unsigned short &x) { printf("%d", (int)x); }
743template<> CUDA NI inline void print(const unsigned int &x) { printf("%u", x); }
744template<> CUDA NI inline void print(const unsigned long &x) { printf("%lu", x); }
745template<> CUDA NI inline void print(const unsigned long long &x) { printf("%llu", x); }
746template<> CUDA NI inline void print(const float &x) { printf("%f", x); }
747template<> CUDA NI inline void print(const double &x) { printf("%lf", x); }
748template<> CUDA NI inline void print(char const* const &x) { printf("%s", x); }
749
750} // namespace battery
751
752#endif // UTILITY_HPP
Definition algorithm.hpp:10
CUDA NI constexpr int countl_zero(T x)
Definition utility.hpp:427
CUDA constexpr T mul_down(T x, T y)
Definition utility.hpp:644
CUDA T iroots_up(T x, int r)
Definition utility.hpp:560
CUDA NI constexpr int countr_one(T x)
Definition utility.hpp:504
CUDA constexpr T ediv(T x, T y)
Definition utility.hpp:720
CUDA INLINE constexpr T min(T a, T b)
Definition utility.hpp:116
CUDA constexpr T add_down(T x, T y)
Definition utility.hpp:596
CUDA constexpr T div_down(T x, T y)
Definition utility.hpp:668
CUDA constexpr T add_up(T x, T y)
Definition utility.hpp:584
CUDA INLINE constexpr T max(T a, T b)
Definition utility.hpp:128
CUDA T iroots_down(T x, int r)
Definition utility.hpp:551
CUDA constexpr T tdiv(T x, T y)
Definition utility.hpp:681
CUDA constexpr T fmod(T x, T y)
Definition utility.hpp:700
CUDA NI constexpr int countr_zero(T x)
Definition utility.hpp:474
CUDA constexpr T fdiv(T x, T y)
Definition utility.hpp:694
CUDA constexpr void swap(T &a, T &b)
Definition utility.hpp:91
CUDA NI void print(const T &t)
Definition utility.hpp:732
CUDA constexpr T tmod(T x, T y)
Definition utility.hpp:687
CUDA constexpr T div_up(T x, T y)
Definition utility.hpp:656
CUDA NI constexpr int popcount(T x)
Definition utility.hpp:402
CUDA constexpr T emod(T x, T y)
Definition utility.hpp:726
CUDA constexpr T mul_up(T x, T y)
Definition utility.hpp:632
CUDA size_t strlen(const char *str)
Definition utility.hpp:99
CUDA constexpr T sub_down(T x, T y)
Definition utility.hpp:620
CUDA constexpr T isnan(T a)
Definition utility.hpp:140
CUDA NI constexpr To rd_cast(From x)
Definition utility.hpp:313
CUDA NI constexpr int countl_one(T x)
Definition utility.hpp:455
CUDA int strcmp(const char *s1, const char *s2)
Definition utility.hpp:108
CUDA NI constexpr To ru_cast(From x)
Definition utility.hpp:217
CUDA constexpr T sub_up(T x, T y)
Definition utility.hpp:608
CUDA constexpr T cmod(T x, T y)
Definition utility.hpp:713
CUDA CONSTEXPR_NEXTAFTER float nextafter(float f, float dir)
Definition utility.hpp:163
CUDA constexpr int signum(T val)
Definition utility.hpp:524
CUDA constexpr double pow(double a, double b)
Definition utility.hpp:148
CUDA NI constexpr T ipow(T a, T b)
Definition utility.hpp:530
CUDA constexpr T cdiv(T x, T y)
Definition utility.hpp:707
Definition utility.hpp:185
static constexpr T neg_inf()
Definition utility.hpp:186
static constexpr T inf()
Definition utility.hpp:192
#define INLINE
Definition utility.hpp:63
#define CONSTEXPR_NEXTAFTER
Definition utility.hpp:160
#define CUDA
Definition utility.hpp:59
#define NI
Definition utility.hpp:62