Cuda battery library
Loading...
Searching...
No Matches
utility.hpp
Go to the documentation of this file.
1// Copyright 2021 Pierre Talbot, Frédéric Pinel
2
3#ifndef CUDA_BATTERY_UTILITY_HPP
4#define CUDA_BATTERY_UTILITY_HPP
5
6#include <cstdio>
7#include <cassert>
8#include <limits>
9#include <climits>
10#include <algorithm>
11#include <cstring>
12#include <cmath>
13#include <cfenv>
14#include <bit>
15
16#ifdef __CUDACC__
17 #define CUDA_GLOBAL __global__
18
19 #ifdef REDUCE_PTX_SIZE
20 /** `NI` stands for noinline, to hint `nvcc` the function should not be inlined. */
21 #define NI __noinline__
22 #else
23 #define NI
24 #endif
25
26 /** Request a function to be inlined. */
27 #define INLINE __forceinline__
28
29 /** `CUDA` is a macro indicating that a function can be executed on a GPU. It is defined to `__device__ __host__` when the code is compiled with `nvcc`. */
30 #define CUDA __device__ __host__
31
32 namespace battery {
33 namespace impl {
34 CUDA NI inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
35 if (code != cudaSuccess) {
36 printf("%s:%d CUDA runtime error %s\n", file, line, cudaGetErrorString(code));
37 if (abort) {
38 #ifdef __CUDA_ARCH__
39 assert(0);
40 #else
41 exit(code);
42 #endif
43 }
44 }
45 }
46 }}
47
48 /** A macro checking the result of a CUDA API call.
49 * It prints an error message if an error occured.
50 */
51 #define CUDAE(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, false); }
52
53 /** Similar to CUDAE but abort the computation in addition, either with `assert` (GPU) or `exit` (CPU).
54 */
55 #define CUDAEX(result) { ::battery::impl::gpuAssert((result), __FILE__, __LINE__, true); }
56
57#else
58 #define CUDA_GLOBAL
59 #define CUDA
60 #define CUDAE(S) S
61 #define CUDAEX(S) S
62 #define NI
63 #define INLINE inline
64#endif
65
66namespace battery {
67
68namespace impl {
69 template<class T> CUDA constexpr inline void swap(T& a, T& b) {
70 T c(std::move(a));
71 a = std::move(b);
72 b = std::move(c);
73 }
74
75 CUDA constexpr inline size_t strlen(const char* str) {
76 size_t n = 0;
77 while(str[n] != '\0') { ++n; }
78 return n;
79 }
80
81 /** See https://stackoverflow.com/a/34873406/2231159 */
82 CUDA constexpr inline int strcmp(const char* s1, const char* s2) {
83 while(*s1 && (*s1 == *s2)) {
84 s1++;
85 s2++;
86 }
87 return *(const unsigned char*)s1 - *(const unsigned char*)s2;
88 }
89}
90
91template<class T> CUDA constexpr inline void swap(T& a, T& b) {
92 #ifdef __CUDA_ARCH__
93 impl::swap(a, b);
94 #else
95 std::swap(a, b);
96 #endif
97}
98
99CUDA inline size_t strlen(const char* str) {
100 #ifdef __CUDA_ARCH__
101 return impl::strlen(str);
102 #else
103 return std::strlen(str);
104 #endif
105}
106
107/** See https://stackoverflow.com/a/34873406/2231159 */
108CUDA inline int strcmp(const char* s1, const char* s2) {
109 #ifdef __CUDA_ARCH__
110 return impl::strcmp(s1, s2);
111 #else
112 return std::strcmp(s1, s2);
113 #endif
114}
115
116template<class T> CUDA INLINE constexpr T min(T a, T b) {
117 #ifdef __CUDA_ARCH__
118 // When C++23 is available
119 // if !consteval { return ::min(a, b); }
120 // else { return std::min(a, b); }
121 // return a < b ? a : b;
122 return ::min(a, b);
123 #else
124 return std::min(a, b);
125 #endif
126}
127
128template<class T> CUDA INLINE constexpr T max(T a, T b) {
129 #ifdef __CUDA_ARCH__
130 // When C++23 is available
131 // if !consteval { return ::max(a, b); }
132 // else { return std::max(a, b); }
133 // return a > b ? a : b;
134 return ::max(a, b); // a > b ? a : b;
135 #else
136 return std::max(a, b);
137 #endif
138}
139
140template<class T> CUDA constexpr T isnan(T a) {
141 #ifdef __CUDA_ARCH__
142 return ::isnan(a);
143 #else
144 return std::isnan(a);
145 #endif
146}
147
148#ifdef _MSC_VER
149// MSVC omits constexpr for nextafter (officially not available until C++23)
150# define CONSTEXPR_NEXTAFTER
151#else
152# define CONSTEXPR_NEXTAFTER constexpr
153#endif
154
155CUDA CONSTEXPR_NEXTAFTER inline float nextafter(float f, float dir) {
156 #ifdef __CUDA_ARCH__
157 return ::nextafterf(f, dir);
158 #else
159 return std::nextafterf(f, dir);
160 #endif
161}
162
163CUDA CONSTEXPR_NEXTAFTER inline double nextafter(double f, double dir) {
164 #ifdef __CUDA_ARCH__
165 return ::nextafter(f, dir);
166 #else
167 return std::nextafter(f, dir);
168 #endif
169}
170
171/** `limits` is a structure to get "infinity points" of primitive types including integers.
172 * For floating-point numbers, we use their built-in representation of infinity.
173 * For integers, we use the minimal and maximal values of the underlying type to represent infinities.
174 * When converting using `ru_cast` and `rd_cast`, the infinities will be preserved across types.
175 */
176template<class T>
177struct limits {
178 static constexpr T neg_inf() {
179 if constexpr (std::is_floating_point<T>()) {
180 return -std::numeric_limits<T>::infinity();
181 }
182 return std::numeric_limits<T>::min();
183 }
184 static constexpr T inf() {
185 if constexpr (std::is_floating_point<T>()) {
186 return std::numeric_limits<T>::infinity();
187 }
188 return std::numeric_limits<T>::max();
189 }
190};
191
192#define MAP_LIMITS(x, From, To) \
193 if(x == 0) { return 0; } \
194 if(x == limits<From>::neg_inf()) {\
195 return limits<To>::neg_inf(); \
196 } \
197 if(x == limits<From>::inf()) {\
198 return limits<To>::inf(); \
199 }
200
201/** Cast the variable `x` from type `From` to type `To` following upper rounding rule (cast in the direction of infinity).
202 Minimal and maximal values of `From` are interpreted as infinities, and are therefore mapped to the infinities of the new types accordingly (e.g., float INF maps to int MAX_INT).
203
204 - On CPU: Rounding mode is UPWARD after this operation.
205 - On GPU: CUDA intrinsics are used.
206
207 Overflow: Nothing is done to prevent overflow, it mostly behaves as with `static_cast`. */
208template<class To, class From, bool map_limits = true>
209CUDA NI constexpr To ru_cast(From x) {
210 if constexpr(std::is_same_v<To, From>) {
211 return x;
212 }
213 if constexpr(map_limits) {
214 MAP_LIMITS(x, From, To)
215 }
216 #ifdef __CUDA_ARCH__
217 // Integer to floating-point number cast.
218 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
219 if constexpr(std::is_same_v<From, unsigned long long>) {
220 if constexpr(std::is_same_v<To, float>) {
221 return __ull2float_ru(x);
222 }
223 else if constexpr(std::is_same_v<To, double>) {
224 return __ull2double_ru(x);
225 }
226 else {
227 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in ru_cast.");
228 }
229 }
230 else if constexpr(std::is_same_v<From, int>) {
231 if constexpr(std::is_same_v<To, float>) {
232 return __int2float_ru(x);
233 }
234 else if constexpr(std::is_same_v<To, double>) {
235 return __int2double_rn(x);
236 }
237 else {
238 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in ru_cast.");
239 }
240 }
241 else {
242 static_assert(sizeof(long long int) >= sizeof(From));
243 if constexpr(std::is_same_v<To, float>) {
244 return __ll2float_ru(x);
245 }
246 else if constexpr(std::is_same_v<To, double>) {
247 return __ll2double_ru(x);
248 }
249 else {
250 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in ru_cast.");
251 }
252 }
253 }
254 // Floating-point number to integer number.
255 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
256 if constexpr(std::is_same_v<From, float>) {
257 return static_cast<To>(__float2ll_ru(x));
258 }
259 else if constexpr(std::is_same_v<From, double>) {
260 return static_cast<To>(__double2ll_ru(x));
261 }
262 else {
263 static_assert(std::is_same_v<From, float>, "Unsupported combination of types in ru_cast.");
264 }
265 }
266 // Floating-point to floating-point.
267 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
268 return __double2float_ru(x);
269 }
270 #else
271 // Integer to floating-point number cast.
272 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
273 #if !defined(__GNUC__) && !defined(_MSC_VER)
274 #pragma STDC FENV_ACCESS ON
275 #endif
276 int r = std::fesetround(FE_UPWARD);
277 assert(r == 0);
278 return static_cast<To>(x);
279 }
280 // Floating-point number to integer number.
281 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
282 return static_cast<To>(std::ceil(x));
283 }
284 // Floating-point to floating-point.
285 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
286 #if !defined(__GNUC__) && !defined(_MSC_VER)
287 #pragma STDC FENV_ACCESS ON
288 #endif
289 int r = std::fesetround(FE_UPWARD);
290 assert(r == 0);
291 return static_cast<To>(x);
292 }
293 #endif
294 return static_cast<To>(x);
295}
296
297/** Cast the variable `x` from type `From` to type `To` following down rounding rule (cast in the direction of negative infinity).
298 Minimal and maximal values of `From` are interpreted as infinities, and are therefore mapped to the infinities of the new types accordingly (e.g., float INF maps to int MAX_INT).
299
300 - On CPU: Rounding mode is DOWNWARD after this operation.
301 - On GPU: CUDA intrinsics are used.
302
303 Overflow: Nothing is done to prevent overflow, it mostly behaves as with `static_cast`. */
304template<class To, class From, bool map_limits=true>
305CUDA NI constexpr To rd_cast(From x) {
306 if constexpr(std::is_same_v<To, From>) {
307 return x;
308 }
309 if constexpr(map_limits) {
310 MAP_LIMITS(x, From, To)
311 }
312 #ifdef __CUDA_ARCH__
313 // Integer to floating-point number cast.
314 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
315 if constexpr(std::is_same_v<From, unsigned long long>) {
316 if constexpr(std::is_same_v<To, float>) {
317 return __ull2float_rd(x);
318 }
319 else if constexpr(std::is_same_v<To, double>) {
320 return __ull2double_rd(x);
321 }
322 else {
323 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
324 }
325 }
326 else if constexpr(std::is_same_v<From, int>) {
327 if constexpr(std::is_same_v<To, float>) {
328 return __int2float_rd(x);
329 }
330 else if constexpr(std::is_same_v<To, double>) {
331 return __int2double_rn(x);
332 }
333 else {
334 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
335 }
336 }
337 else {
338 static_assert(sizeof(long long int) >= sizeof(From));
339 if constexpr(std::is_same_v<To, float>) {
340 return __ll2float_rd(x);
341 }
342 else if constexpr(std::is_same_v<To, double>) {
343 return __ll2double_rd(x);
344 }
345 else {
346 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
347 }
348 }
349 }
350 // Floating-point number to integer number.
351 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
352 if constexpr(std::is_same_v<From, float>) {
353 return static_cast<To>(__float2ll_rd(x));
354 }
355 else if constexpr(std::is_same_v<From, double>) {
356 return static_cast<To>(__double2ll_rd(x));
357 }
358 else {
359 static_assert(std::is_same_v<To, float>, "Unsupported combination of types in rd_cast.");
360 }
361 }
362 // Floating-point to floating-point.
363 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
364 return __double2float_rd(x);
365 }
366 #else
367 // Integer to floating-point number cast.
368 if constexpr(std::is_integral_v<From> && std::is_floating_point_v<To>) {
369 #if !defined(__GNUC__) && !defined(_MSC_VER)
370 #pragma STDC FENV_ACCESS ON
371 #endif
372 int r = std::fesetround(FE_DOWNWARD);
373 assert(r == 0);
374 return static_cast<To>(x);
375 }
376 // Floating-point number to integer number.
377 else if constexpr(std::is_floating_point_v<From> && std::is_integral_v<To>) {
378 return static_cast<To>(std::floor(x));
379 }
380 // Floating-point to floating-point.
381 else if constexpr(std::is_same_v<From, double> && std::is_same_v<To, float>) {
382 #if !defined(__GNUC__) && !defined(_MSC_VER)
383 #pragma STDC FENV_ACCESS ON
384 #endif
385 int r = std::fesetround(FE_DOWNWARD);
386 assert(r == 0);
387 return static_cast<To>(x);
388 }
389 #endif
390 return static_cast<To>(x);
391}
392
393template<class T>
394CUDA NI constexpr int popcount(T x) {
395 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "popcount only works on unsigned integers");
396 #ifdef __CUDA_ARCH__
397 if constexpr(std::is_same_v<T, unsigned int>) {
398 return __popc(x);
399 }
400 else if constexpr(std::is_same_v<T, unsigned long long>) {
401 return __popcll(x);
402 }
403 else {
404 return __popcll(static_cast<unsigned long long>(x));
405 }
406 #elif __cpp_lib_bitops
407 return std::popcount(x);
408 #else
409 int c = 0;
410 for(int i = 0; i < sizeof(T) * CHAR_BIT && x != 0; ++i) {
411 c += (x & 1);
412 x >>= 1;
413 }
414 return c;
415 #endif
416}
417
418template<class T>
419CUDA NI constexpr int countl_zero(T x) {
420 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countl_zero only works on unsigned integers");
421 #ifdef __CUDA_ARCH__
422 // If the size of `T` is smaller than `int` or `long long int` we must remove the extra zeroes that are added after conversion.
423 if constexpr(sizeof(T) <= sizeof(int)) {
424 return __clz(x) - ((sizeof(int) - sizeof(T)) * CHAR_BIT);
425 }
426 else if constexpr(sizeof(T) <= sizeof(long long int)) {
427 return __clzll(x) - ((sizeof(long long int) - sizeof(T)) * CHAR_BIT);
428 }
429 else {
430 static_assert(sizeof(T) < sizeof(long long int), "countX_Y (CUDA version) only supports types smaller than long long int.");
431 }
432 #elif __cpp_lib_bitops
433 return std::countl_zero(x);
434 #else
435 int c = 0;
436 constexpr int bits = sizeof(T) * CHAR_BIT;
437 constexpr T mask = (T)1 << (bits - 1);
438 for(int i = 0; i < bits && (x & mask) == 0; ++i) {
439 c += (x & mask) == 0;
440 x <<= 1;
441 }
442 return c;
443 #endif
444}
445
446template<class T>
447CUDA NI constexpr int countl_one(T x) {
448 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countl_one only works on unsigned integers");
449 #ifdef __CUDA_ARCH__
450 return countl_zero((T)~x);
451 #elif __cpp_lib_bitops
452 return std::countl_one(x);
453 #else
454 int c = 0;
455 constexpr int bits = sizeof(T) * CHAR_BIT;
456 constexpr T mask = (T)1 << (bits - 1);
457 for(int i = 0; i < bits && (x & mask) > 0; ++i) {
458 c += (x & mask) > 0;
459 x <<= 1;
460 }
461 return c;
462 #endif
463}
464
465template<class T>
466CUDA NI constexpr int countr_zero(T x) {
467 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countl_zero only works on unsigned integers");
468 #ifdef __CUDA_ARCH__
469 if(x == 0) {
470 return sizeof(T) * CHAR_BIT;
471 }
472 if constexpr(sizeof(T) <= sizeof(int)) {
473 return __ffs(x) - 1;
474 }
475 else if constexpr(sizeof(T) <= sizeof(long long int)) {
476 return __ffsll(x) - 1;
477 }
478 else {
479 static_assert(sizeof(T) < sizeof(long long int), "countr_zero (CUDA version) only supports types smaller or equal to long long int.");
480 }
481 #elif __cpp_lib_bitops
482 return std::countr_zero(x);
483 #else
484 int c = 0;
485 constexpr int bits = sizeof(T) * CHAR_BIT;
486 constexpr T mask = 1;
487 for(int i = 0; i < bits && (x & mask) == 0; ++i) {
488 c += (x & mask) == 0;
489 x >>= 1;
490 }
491 return c;
492 #endif
493}
494
495template<class T>
496CUDA NI constexpr int countr_one(T x) {
497 static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>, "countr_one only works on unsigned integers");
498 #ifdef __CUDA_ARCH__
499 return countr_zero((T)~x);
500 #elif __cpp_lib_bitops
501 return std::countr_one(x);
502 #else
503 int c = 0;
504 constexpr int bits = sizeof(T) * CHAR_BIT;
505 constexpr T mask = 1;
506 for(int i = 0; i < bits && (x & mask) > 0; ++i) {
507 c += (x & mask) > 0;
508 x >>= 1;
509 }
510 return c;
511 #endif
512}
513
514/** Signum function, https://stackoverflow.com/a/4609795/2231159 */
515template <class T>
516CUDA constexpr int signum(T val) {
517 return (T(0) < val) - (val < T(0));
518}
519
520/** Precondition: T is an integer with b >= 0.*/
521template <class T>
522CUDA NI constexpr T ipow(T a, T b) {
523 static_assert(std::is_integral_v<T>, "ipow is only working on integer value.");
524 assert(b >= 0);
525 if(b == 2) {
526 return a*a;
527 }
528 // Code taken from GeCode implementation.
529 T p = 1;
530 do {
531 if (b % 2 == 0) {
532 a *= a;
533 b >>= 1;
534 } else {
535 p *= a;
536 b--;
537 }
538 } while (b > 0);
539 return p;
540}
541
542#define FLOAT_ARITHMETIC_CUDA_IMPL(name, cudaname) \
543 if constexpr(std::is_same_v<T, float>) { \
544 return __f ## cudaname(x, y); \
545 } \
546 else if constexpr(std::is_same_v<T, double>) { \
547 return __d ## cudaname(x, y); \
548 } \
549 else { \
550 static_assert(std::is_same_v<T, float>, #name " (CUDA version) only support float or double types."); \
551 }
552
553#define FLOAT_ARITHMETIC_CPP_IMPL(cppop, cppround) \
554 int r = std::fesetround(cppround); \
555 assert(r == 0); \
556 return x cppop y;
557
558template <class T>
559CUDA constexpr T add_up(T x, T y) {
560 #ifdef __CUDA_ARCH__
561 FLOAT_ARITHMETIC_CUDA_IMPL(add_up, add_ru)
562 #else
563 #if !defined(__GNUC__) && !defined(_MSC_VER)
564 #pragma STDC FENV_ACCESS ON
565 #endif
566 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_UPWARD)
567 #endif
568}
569
570template <class T>
571CUDA constexpr T add_down(T x, T y) {
572 #ifdef __CUDA_ARCH__
573 FLOAT_ARITHMETIC_CUDA_IMPL(add_down, add_rd)
574 #else
575 #if !defined(__GNUC__) && !defined(_MSC_VER)
576 #pragma STDC FENV_ACCESS ON
577 #endif
578 FLOAT_ARITHMETIC_CPP_IMPL(+, FE_DOWNWARD)
579 #endif
580}
581
582template <class T>
583CUDA constexpr T sub_up(T x, T y) {
584 #ifdef __CUDA_ARCH__
585 FLOAT_ARITHMETIC_CUDA_IMPL(sub_up, sub_ru)
586 #else
587 #if !defined(__GNUC__) && !defined(_MSC_VER)
588 #pragma STDC FENV_ACCESS ON
589 #endif
590 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_UPWARD)
591 #endif
592}
593
594template <class T>
595CUDA constexpr T sub_down(T x, T y) {
596 #ifdef __CUDA_ARCH__
597 FLOAT_ARITHMETIC_CUDA_IMPL(sub_down, sub_rd)
598 #else
599 #if !defined(__GNUC__) && !defined(_MSC_VER)
600 #pragma STDC FENV_ACCESS ON
601 #endif
602 FLOAT_ARITHMETIC_CPP_IMPL(-, FE_DOWNWARD)
603 #endif
604}
605
606template <class T>
607CUDA constexpr T mul_up(T x, T y) {
608 #ifdef __CUDA_ARCH__
609 FLOAT_ARITHMETIC_CUDA_IMPL(mul_up, mul_ru)
610 #else
611 #if !defined(__GNUC__) && !defined(_MSC_VER)
612 #pragma STDC FENV_ACCESS ON
613 #endif
614 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_UPWARD)
615 #endif
616}
617
618template <class T>
619CUDA constexpr T mul_down(T x, T y) {
620 #ifdef __CUDA_ARCH__
621 FLOAT_ARITHMETIC_CUDA_IMPL(mul_down, mul_rd)
622 #else
623 #if !defined(__GNUC__) && !defined(_MSC_VER)
624 #pragma STDC FENV_ACCESS ON
625 #endif
626 FLOAT_ARITHMETIC_CPP_IMPL(*, FE_DOWNWARD)
627 #endif
628}
629
630template <class T>
631CUDA constexpr T div_up(T x, T y) {
632 #ifdef __CUDA_ARCH__
633 FLOAT_ARITHMETIC_CUDA_IMPL(div_up, div_ru)
634 #else
635 #if !defined(__GNUC__) && !defined(_MSC_VER)
636 #pragma STDC FENV_ACCESS ON
637 #endif
638 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_UPWARD)
639 #endif
640}
641
642template <class T>
643CUDA constexpr T div_down(T x, T y) {
644 #ifdef __CUDA_ARCH__
645 FLOAT_ARITHMETIC_CUDA_IMPL(div_down, div_rd)
646 #else
647 #if !defined(__GNUC__) && !defined(_MSC_VER)
648 #pragma STDC FENV_ACCESS ON
649 #endif
650 FLOAT_ARITHMETIC_CPP_IMPL(/, FE_DOWNWARD)
651 #endif
652}
653
654// Truncated division and modulus, by default in C++.
655template <class T>
656CUDA constexpr T tdiv(T x, T y) {
657 static_assert(std::is_integral_v<T>, "tdiv only works on integer values.");
658 return x / y;
659}
660
661template <class T>
662CUDA constexpr T tmod(T x, T y) {
663 static_assert(std::is_integral_v<T>, "tdiv only works on integer values.");
664 return x % y;
665}
666
667// Floor division and modulus, see (Leijen D. (2003). Division and Modulus for Computer Scientists).
668template <class T>
669CUDA constexpr T fdiv(T x, T y) {
670 static_assert(std::is_integral_v<T>, "fdiv only works on integer values.");
671 return x / y - (battery::signum(x % y) == -battery::signum(y));
672}
673
674template <class T>
675CUDA constexpr T fmod(T x, T y) {
676 static_assert(std::is_integral_v<T>, "fmod only works on integer values.");
677 return x % y + y * (battery::signum(x % y) == -battery::signum(y));
678}
679
680// Ceil division and modulus.
681template <class T>
682CUDA constexpr T cdiv(T x, T y) {
683 static_assert(std::is_integral_v<T>, "cdiv only works on integer values.");
684 return x / y + (battery::signum(x % y) == battery::signum(y));
685}
686
687template <class T>
688CUDA constexpr T cmod(T x, T y) {
689 static_assert(std::is_integral_v<T>, "cmod only works on integer values.");
690 return x % y - y * (battery::signum(x % y) == battery::signum(y));
691}
692
693// Euclidean division and modulus, see (Leijen D. (2003). Division and Modulus for Computer Scientists).
694template <class T>
695CUDA constexpr T ediv(T x, T y) {
696 static_assert(std::is_integral_v<T>, "ediv only works on integer values.");
697 return x / y - ((x % y >= 0) ? 0 : battery::signum(y));
698}
699
700template <class T>
701CUDA constexpr T emod(T x, T y) {
702 static_assert(std::is_integral_v<T>, "emod only works on integer values.");
703 return x % y + y * ((x % y >= 0) ? 0 : battery::signum(y));
704}
705
706template<typename T>
707CUDA NI void print(const T& t) {
708 t.print();
709}
710template<> CUDA NI inline void print(const bool &x) { x ? printf("true") : printf("false"); }
711template<> CUDA NI inline void print(const char &x) { printf("%c", x); }
712template<> CUDA NI inline void print(const short &x) { printf("%d", (int)x); }
713template<> CUDA NI inline void print(const int &x) { printf("%d", x); }
714template<> CUDA NI inline void print(const long long int &x) { printf("%lld", x); }
715template<> CUDA NI inline void print(const long int &x) { printf("%ld", x); }
716template<> CUDA NI inline void print(const unsigned char &x) { printf("%d", (int)x); }
717template<> CUDA NI inline void print(const unsigned short &x) { printf("%d", (int)x); }
718template<> CUDA NI inline void print(const unsigned int &x) { printf("%u", x); }
719template<> CUDA NI inline void print(const unsigned long &x) { printf("%lu", x); }
720template<> CUDA NI inline void print(const unsigned long long &x) { printf("%llu", x); }
721template<> CUDA NI inline void print(const float &x) { printf("%f", x); }
722template<> CUDA NI inline void print(const double &x) { printf("%lf", x); }
723template<> CUDA NI inline void print(char const* const &x) { printf("%s", x); }
724
725} // namespace battery
726
727#endif // UTILITY_HPP
Definition algorithm.hpp:10
CUDA NI constexpr int countl_zero(T x)
Definition utility.hpp:419
CUDA constexpr T mul_down(T x, T y)
Definition utility.hpp:619
CUDA NI constexpr int countr_one(T x)
Definition utility.hpp:496
CUDA constexpr T ediv(T x, T y)
Definition utility.hpp:695
CUDA INLINE constexpr T min(T a, T b)
Definition utility.hpp:116
CUDA constexpr T add_down(T x, T y)
Definition utility.hpp:571
CUDA constexpr T div_down(T x, T y)
Definition utility.hpp:643
CUDA constexpr T add_up(T x, T y)
Definition utility.hpp:559
CUDA INLINE constexpr T max(T a, T b)
Definition utility.hpp:128
CUDA constexpr T tdiv(T x, T y)
Definition utility.hpp:656
CUDA constexpr T fmod(T x, T y)
Definition utility.hpp:675
CUDA NI constexpr int countr_zero(T x)
Definition utility.hpp:466
CUDA constexpr T fdiv(T x, T y)
Definition utility.hpp:669
CUDA constexpr void swap(T &a, T &b)
Definition utility.hpp:91
CUDA NI void print(const T &t)
Definition utility.hpp:707
CUDA constexpr T tmod(T x, T y)
Definition utility.hpp:662
CUDA constexpr T div_up(T x, T y)
Definition utility.hpp:631
CUDA NI constexpr int popcount(T x)
Definition utility.hpp:394
CUDA constexpr T emod(T x, T y)
Definition utility.hpp:701
CUDA constexpr T mul_up(T x, T y)
Definition utility.hpp:607
CUDA size_t strlen(const char *str)
Definition utility.hpp:99
CUDA constexpr T sub_down(T x, T y)
Definition utility.hpp:595
CUDA constexpr T isnan(T a)
Definition utility.hpp:140
CUDA NI constexpr To rd_cast(From x)
Definition utility.hpp:305
CUDA NI constexpr int countl_one(T x)
Definition utility.hpp:447
CUDA int strcmp(const char *s1, const char *s2)
Definition utility.hpp:108
CUDA NI constexpr To ru_cast(From x)
Definition utility.hpp:209
CUDA constexpr T sub_up(T x, T y)
Definition utility.hpp:583
CUDA constexpr T cmod(T x, T y)
Definition utility.hpp:688
CUDA CONSTEXPR_NEXTAFTER float nextafter(float f, float dir)
Definition utility.hpp:155
CUDA constexpr int signum(T val)
Definition utility.hpp:516
CUDA NI constexpr T ipow(T a, T b)
Definition utility.hpp:522
CUDA constexpr T cdiv(T x, T y)
Definition utility.hpp:682
Definition utility.hpp:177
static constexpr T neg_inf()
Definition utility.hpp:178
static constexpr T inf()
Definition utility.hpp:184
#define INLINE
Definition utility.hpp:63
#define CONSTEXPR_NEXTAFTER
Definition utility.hpp:152
#define CUDA
Definition utility.hpp:59
#define NI
Definition utility.hpp:62