12#ifndef MFEM_REDUCERS_HPP
13#define MFEM_REDUCERS_HPP
68 static_assert(std::is_integral<T>::value,
"Only works for integral types");
86 static_assert(std::is_integral<T>::value,
"Only works for integral types");
113 static constexpr T
max_val = std::numeric_limits<T>::max();
159 static constexpr T
min_val = std::numeric_limits<T>::min();
198 if (
b.first <
a.first)
202 if (
b.second >
a.second)
212 static constexpr T
min_val = std::numeric_limits<T>::min();
213 static constexpr T
max_val = std::numeric_limits<T>::max();
226 a.first = fmin(
a.first,
b.first);
227 a.second = fmax(
a.second,
b.second);
241 a.first = fmin(
a.first,
b.first);
242 a.second = fmax(
a.second,
b.second);
260 if (
b.first <=
a.first)
272 std::integral_constant<T, std::numeric_limits<T>::max()>::value, I{0}};
281 if (
b.first <=
a.first)
297 if (
b.first <=
a.first)
317 if (
a.first <=
b.first)
329 std::integral_constant<T, std::numeric_limits<T>::max()>::value, I{0}};
338 if (
a.first <=
b.first)
354 if (
a.first <=
b.first)
370 if (
b.min_val <=
a.min_val)
372 a.min_val =
b.min_val;
373 a.min_loc =
b.min_loc;
375 if (
b.max_val >=
a.max_val)
377 a.max_val =
b.max_val;
378 a.max_loc =
b.max_loc;
389 std::integral_constant<T, std::numeric_limits<T>::max()>::value,
390 std::integral_constant<T, std::numeric_limits<T>::min()>::value, I(0),
400 if (
b.min_val <=
a.min_val)
402 a.min_val =
b.min_val;
403 a.min_loc =
b.min_loc;
405 if (
b.max_val >=
a.max_val)
407 a.max_val =
b.max_val;
408 a.max_loc =
b.max_loc;
423 if (
b.min_val <=
a.min_val)
425 a.min_val =
b.min_val;
426 a.min_loc =
b.min_loc;
428 if (
b.max_val >=
a.max_val)
430 a.max_val =
b.max_val;
431 a.max_loc =
b.max_loc;
451template<
class B,
class R>
struct reduction_kernel
454 using value_type =
typename R::value_type;
456 mutable value_type *work;
462 int items_per_thread;
464 constexpr static MFEM_HOST_DEVICE
int max_blocksize() {
return 256; }
467 static int block_log2(
unsigned N)
469#if defined(__GNUC__) || defined(__clang__)
470 return N ? (
sizeof(unsigned) * 8 - __builtin_clz(N)) : 0;
471#elif defined(_MSC_VER)
472 return sizeof(unsigned) * 8 - __lzclz(N);
484 MFEM_HOST_DEVICE
void operator()(
int work_idx)
const
486 MFEM_SHARED value_type buffer[max_blocksize()];
487 reducer.SetInitialValue(buffer[MFEM_THREAD_ID(x)]);
489 for (
int idx = 0; idx < items_per_thread; ++idx)
491 int i = MFEM_THREAD_ID(x) +
492 (idx + work_idx * items_per_thread) * MFEM_THREAD_SIZE(x);
495 body(i, buffer[MFEM_THREAD_ID(x)]);
503 for (
int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
506 if (MFEM_THREAD_ID(x) < i)
508 reducer.Join(buffer[MFEM_THREAD_ID(x)], buffer[MFEM_THREAD_ID(x) + i]);
511 if (MFEM_THREAD_ID(x) == 0)
513 work[work_idx] = buffer[0];
531template <
class T,
class B,
class R>
532void reduce(
int N, T &res, B &&body,
const R &reducer,
bool use_dev,
540#if defined(MFEM_USE_CUDA_OR_HIP)
545 using red_type = internal::reduction_kernel<typename std::decay<B>::type,
546 typename std::decay<R>::type>;
548 int block_size = std::min<int>(red_type::max_blocksize(),
549 1ll << red_type::block_log2(N));
552#if defined(MFEM_USE_CUDA)
554 constexpr int mp_sat = 8;
555#elif defined(MFEM_USE_HIP)
557 constexpr int mp_sat = 4;
560 constexpr int mp_sat = 1;
564 int nblocks = std::min(mp_sat * num_mp, (N + block_size - 1) / block_size);
565 int items_per_thread =
566 (N + block_size * nblocks - 1) / (block_size * nblocks);
568 red_type red{
nullptr, std::forward<B>(body), reducer, N, items_per_thread};
570 auto mt = workspace.
GetMemory().GetMemoryType();
575 workspace.
SetSize(nblocks, mt);
578 forall_2D(nblocks, block_size, 1, std::move(red));
581 for (
int i = 0; i < nblocks; ++i)
583 reducer.Join(res, work[i]);
589 for (
int i = 0; i < N; ++i)
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
T * HostWrite()
Shortcut for mfem::Write(a.GetMemory(), a.Size(), false).
static int NumMultiprocessors()
Same as NumMultiprocessors(int), for the currently active device.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static int GetId()
Get the device ID of the configured device.
void reduce(int N, T &res, B &&body, const R &reducer, bool use_dev, Array< T > &workspace)
Performs a 1D reduction on the range [0,N). res initial value and where the result will be written....
void forall_2D(int N, int X, int Y, lambda &&body)
@ HOST_PINNED
Host memory: pinned (page-locked)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(T &a)
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Pair of values which can be used in device code.
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static constexpr T min_val
Two pairs for the min/max values and their location indices.
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static constexpr T max_val
static constexpr T min_val
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
static constexpr T max_val
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)