MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
reducers.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_REDUCERS_HPP
13#define MFEM_REDUCERS_HPP
14
15#include "array.hpp"
16#include "forall.hpp"
17
18#include <cmath>
19#include <limits>
20#include <type_traits>
21
22namespace mfem
23{
24
25/// Pair of values which can be used in device code
26template <class A, class B> struct DevicePair
27{
30};
31
32/// Two pairs for the min/max values and their location indices
33template <class A, class B> struct MinMaxLocScalar
34{
39};
40
41/// @brief a += b
42template <class T> struct SumReducer
43{
44 using value_type = T;
45 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
46 {
47 a += b;
48 }
49
50 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a) { a = T(0); }
51};
52
53/// @brief a *= b
54template <class T> struct MultReducer
55{
56 using value_type = T;
57 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
58 {
59 a *= b;
60 }
61
62 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a) { a = T(1); }
63};
64
65/// @brief a &= b
66template <class T> struct BAndReducer
67{
68 static_assert(std::is_integral<T>::value, "Only works for integral types");
69 using value_type = T;
70 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
71 {
72 a &= b;
73 }
74
75 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
76 {
77 // sets all bits, does not work for floating point types
78 // bitwise operators are not defined for floating point types anyways
79 a = ~T(0);
80 }
81};
82
83/// @brief a |= b
84template <class T> struct BOrReducer
85{
86 static_assert(std::is_integral<T>::value, "Only works for integral types");
87 using value_type = T;
88 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
89 {
90 a |= b;
91 }
92
93 static MFEM_HOST_DEVICE void SetInitialValue(T &a) { a = T(0); }
94};
95
96/// @brief a = min(a,b)
97template <class T> struct MinReducer
98{
99 using value_type = T;
100
101 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
102 {
103 if (b < a)
104 {
105 a = b;
106 }
107 }
108
109 // If we use std::numeric_limits<T>::max() in host-device method, Cuda
110 // complains about calling host-only constexpr functions in device code
111 // without --expt-relaxed-constexpr, so we define the following constant as a
112 // workaround for the Cuda warning.
113 static constexpr T max_val = std::numeric_limits<T>::max();
114
115 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
116 {
117 a = max_val;
118 }
119};
120
121template <> struct MinReducer<float>
122{
123 using value_type = float;
124 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
125 {
126 a = fmin(a, b);
127 }
128
129 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a) { a = HUGE_VALF; }
130};
131
132template <> struct MinReducer<double>
133{
134 using value_type = double;
135 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
136 {
137 a = fmin(a, b);
138 }
139
140 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a) { a = HUGE_VAL; }
141};
142
143/// @brief a = max(a,b)
144template <class T> struct MaxReducer
145{
146 using value_type = T;
147 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
148 {
149 if (a < b)
150 {
151 a = b;
152 }
153 }
154
155 // If we use std::numeric_limits<T>::min() in host-device method, Cuda
156 // complains about calling host-only constexpr functions in device code
157 // without --expt-relaxed-constexpr, so we define the following constant as a
158 // workaround for the Cuda warning.
159 static constexpr T min_val = std::numeric_limits<T>::min();
160
161 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
162 {
163 a = min_val;
164 }
165};
166
167template <> struct MaxReducer<float>
168{
169 using value_type = float;
170 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
171 {
172 a = fmax(a, b);
173 }
174
175 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
176 {
177 a = -HUGE_VALF;
178 }
179};
180
181template <> struct MaxReducer<double>
182{
183 using value_type = double;
184 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
185 {
186 a = fmax(a, b);
187 }
188
189 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a) { a = -HUGE_VAL; }
190};
191
192/// @brief a = minmax(a,b)
193template <class T> struct MinMaxReducer
194{
196 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
197 {
198 if (b.first < a.first)
199 {
200 a.first = b.first;
201 }
202 if (b.second > a.second)
203 {
204 a.second = b.second;
205 }
206 }
207
208 // If we use std::numeric_limits<T>::min() (or max()) in host-device method,
209 // Cuda complains about calling host-only constexpr functions in device code
210 // without --expt-relaxed-constexpr, so we define the following constants as
211 // a workaround for the Cuda warning.
212 static constexpr T min_val = std::numeric_limits<T>::min();
213 static constexpr T max_val = std::numeric_limits<T>::max();
214
215 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
216 {
218 }
219};
220
221template <> struct MinMaxReducer<float>
222{
224 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
225 {
226 a.first = fmin(a.first, b.first);
227 a.second = fmax(a.second, b.second);
228 }
229
230 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
231 {
232 a = value_type{HUGE_VALF, -HUGE_VALF};
233 }
234};
235
236template <> struct MinMaxReducer<double>
237{
239 static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
240 {
241 a.first = fmin(a.first, b.first);
242 a.second = fmax(a.second, b.second);
243 }
244
245 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
246 {
247 a = value_type{HUGE_VAL, -HUGE_VAL};
248 }
249};
250
251/// @brief i = argmin(a[i], a[j])
252///
253/// Note: for ties the returned index can correspond to any min entry, not
254/// necesarily the first one
255template <class T, class I> struct ArgMinReducer
256{
258 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
259 {
260 if (b.first <= a.first)
261 {
262 a = b;
263 }
264 }
265 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
266 {
267 // Cuda complains about calling host-only constexpr functions in device
268 // code without --expt-relaxed-constexpr, wrap into integral_constant to
269 // get around this
270 a = value_type
271 {
272 std::integral_constant<T, std::numeric_limits<T>::max()>::value, I{0}};
273 }
274};
275
276template <class I> struct ArgMinReducer<float, I>
277{
279 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
280 {
281 if (b.first <= a.first)
282 {
283 a = b;
284 }
285 }
286 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
287 {
288 a = value_type{HUGE_VALF, I{0}};
289 }
290};
291
292template <class I> struct ArgMinReducer<double, I>
293{
295 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
296 {
297 if (b.first <= a.first)
298 {
299 a = b;
300 }
301 }
302 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
303 {
304 a = value_type{HUGE_VAL, I{0}};
305 }
306};
307
308/// @brief i = argmax(a[i], a[j])
309///
310/// Note: for ties the returned index can correspond to any min entry, not
311/// necesarily the first one.
312template <class T, class I> struct ArgMaxReducer
313{
315 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
316 {
317 if (a.first <= b.first)
318 {
319 a = b;
320 }
321 }
322 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
323 {
324 // Cuda complains about calling host-only constexpr functions in device
325 // code without --expt-relaxed-constexpr, wrap into integral_constant to
326 // get around this
327 a = value_type
328 {
329 std::integral_constant<T, std::numeric_limits<T>::max()>::value, I{0}};
330 }
331};
332
333template <class I> struct ArgMaxReducer<float, I>
334{
336 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
337 {
338 if (a.first <= b.first)
339 {
340 a = b;
341 }
342 }
343 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
344 {
345 a = value_type{-HUGE_VALF, I{0}};
346 }
347};
348
349template <class I> struct ArgMaxReducer<double, I>
350{
352 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
353 {
354 if (a.first <= b.first)
355 {
356 a = b;
357 }
358 }
359 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
360 {
361 a = value_type{-HUGE_VAL, I{0}};
362 }
363};
364
365template <class T, class I> struct ArgMinMaxReducer
366{
368 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
369 {
370 if (b.min_val <= a.min_val)
371 {
372 a.min_val = b.min_val;
373 a.min_loc = b.min_loc;
374 }
375 if (b.max_val >= a.max_val)
376 {
377 a.max_val = b.max_val;
378 a.max_loc = b.max_loc;
379 }
380 }
381
382 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
383 {
384 // Cuda complains about calling host-only constexpr functions in device
385 // code without --expt-relaxed-constexpr, wrap into integral_constant to
386 // get around this
387 a = value_type
388 {
389 std::integral_constant<T, std::numeric_limits<T>::max()>::value,
390 std::integral_constant<T, std::numeric_limits<T>::min()>::value, I(0),
391 I(0)};
392 }
393};
394
395template <class I> struct ArgMinMaxReducer<float, I>
396{
398 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
399 {
400 if (b.min_val <= a.min_val)
401 {
402 a.min_val = b.min_val;
403 a.min_loc = b.min_loc;
404 }
405 if (b.max_val >= a.max_val)
406 {
407 a.max_val = b.max_val;
408 a.max_loc = b.max_loc;
409 }
410 }
411
412 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
413 {
414 a = value_type{HUGE_VALF, -HUGE_VALF, I(0), I(0)};
415 }
416};
417
418template <class I> struct ArgMinMaxReducer<double, I>
419{
421 static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
422 {
423 if (b.min_val <= a.min_val)
424 {
425 a.min_val = b.min_val;
426 a.min_loc = b.min_loc;
427 }
428 if (b.max_val >= a.max_val)
429 {
430 a.max_val = b.max_val;
431 a.max_loc = b.max_loc;
432 }
433 }
434
435 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
436 {
437 a = value_type{HUGE_VAL, -HUGE_VAL, I(0), I(0)};
438 }
439};
440
441namespace internal
442{
443
444/**
445 @brief Device portion of a reduction over a 1D sequence [0, N)
446 @tparam B Reduction body. Must be callable with the signature void(int i, value_type&
447 v), where i is the index to evaluate and v is the value to update.
448 @tparam R Reducer capable of combining values of type value_type. See reducers.hpp for
449 pre-defined reducers.
450 */
451template<class B, class R> struct reduction_kernel
452{
453 /// value type body and reducer operate on.
454 using value_type = typename R::value_type;
455 /// workspace for the intermediate reduction results
456 mutable value_type *work;
457 B body;
458 R reducer;
459 /// Length of sequence to reduce over.
460 int N;
461 /// How many items is each thread responsible for during the serial phase
462 int items_per_thread;
463
464 constexpr static MFEM_HOST_DEVICE int max_blocksize() { return 256; }
465
466 /// helper for computing the reduction block size
467 static int block_log2(unsigned N)
468 {
469#if defined(__GNUC__) || defined(__clang__)
470 return N ? (sizeof(unsigned) * 8 - __builtin_clz(N)) : 0;
471#elif defined(_MSC_VER)
472 return sizeof(unsigned) * 8 - __lzclz(N);
473#else
474 int res = 0;
475 while (N)
476 {
477 N >>= 1;
478 ++res;
479 }
480 return res;
481#endif
482 }
483
484 MFEM_HOST_DEVICE void operator()(int work_idx) const
485 {
486 MFEM_SHARED value_type buffer[max_blocksize()];
487 reducer.SetInitialValue(buffer[MFEM_THREAD_ID(x)]);
488 // serial part
489 for (int idx = 0; idx < items_per_thread; ++idx)
490 {
491 int i = MFEM_THREAD_ID(x) +
492 (idx + work_idx * items_per_thread) * MFEM_THREAD_SIZE(x);
493 if (i < N)
494 {
495 body(i, buffer[MFEM_THREAD_ID(x)]);
496 }
497 else
498 {
499 break;
500 }
501 }
502 // binary tree reduction
503 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
504 {
505 MFEM_SYNC_THREAD;
506 if (MFEM_THREAD_ID(x) < i)
507 {
508 reducer.Join(buffer[MFEM_THREAD_ID(x)], buffer[MFEM_THREAD_ID(x) + i]);
509 }
510 }
511 if (MFEM_THREAD_ID(x) == 0)
512 {
513 work[work_idx] = buffer[0];
514 }
515 }
516};
517}
518
519/**
520 @brief Performs a 1D reduction on the range [0,N).
521 @a res initial value and where the result will be written.
522 @a body reduction function body.
523 @a reducer helper for joining two reduced values.
524 @a use_dev true to perform the reduction on the device, if possible.
525 @a workspace temporary workspace used for device reductions. May be resized to
526 a larger capacity as needed. Preferably should have MemoryType::MANAGED or
527 MemoryType::HOST_PINNED. TODO: replace with internal temporary workspace
528 vectors once that's added to the memory manager.
529 @tparam T value_type to operate on
530 */
531template <class T, class B, class R>
532void reduce(int N, T &res, B &&body, const R &reducer, bool use_dev,
533 Array<T> &workspace)
534{
535 if (N == 0)
536 {
537 return;
538 }
539
540#if defined(MFEM_USE_CUDA_OR_HIP)
541 if (use_dev &&
544 {
545 using red_type = internal::reduction_kernel<typename std::decay<B>::type,
546 typename std::decay<R>::type>;
547 // max block size is 256, but can be smaller
548 int block_size = std::min<int>(red_type::max_blocksize(),
549 1ll << red_type::block_log2(N));
550
552#if defined(MFEM_USE_CUDA)
553 // good value of mp_sat found experimentally on Lassen
554 constexpr int mp_sat = 8;
555#elif defined(MFEM_USE_HIP)
556 // good value of mp_sat found experimentally on Tuolumne
557 constexpr int mp_sat = 4;
558#else
559 num_mp = 1;
560 constexpr int mp_sat = 1;
561#endif
562 // determine how many items each thread should sum during the serial
563 // portion
564 int nblocks = std::min(mp_sat * num_mp, (N + block_size - 1) / block_size);
565 int items_per_thread =
566 (N + block_size * nblocks - 1) / (block_size * nblocks);
567
568 red_type red{nullptr, std::forward<B>(body), reducer, N, items_per_thread};
569 // allocate res to fit block_size entries
570 auto mt = workspace.GetMemory().GetMemoryType();
572 {
574 }
575 workspace.SetSize(nblocks, mt);
576 auto work = workspace.HostWrite();
577 red.work = work;
578 forall_2D(nblocks, block_size, 1, std::move(red));
579 // wait for results
580 MFEM_DEVICE_SYNC;
581 for (int i = 0; i < nblocks; ++i)
582 {
583 reducer.Join(res, work[i]);
584 }
585 return;
586 }
587#endif
588
589 for (int i = 0; i < N; ++i)
590 {
591 body(i, res);
592 }
593}
594
595} // namespace mfem
596
597#endif // MFEM_REDUCERS_HPP
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition array.hpp:145
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:840
T * HostWrite()
Shortcut for mfem::Write(a.GetMemory(), a.Size(), false).
Definition array.hpp:393
static int NumMultiprocessors()
Same as NumMultiprocessors(int), for the currently active device.
Definition device.cpp:736
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:262
static int GetId()
Get the device ID of the configured device.
Definition device.hpp:253
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
void reduce(int N, T &res, B &&body, const R &reducer, bool use_dev, Array< T > &workspace)
Performs a 1D reduction on the range [0,N). res initial value and where the result will be written....
Definition reducers.hpp:532
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:925
@ HOST_PINNED
Host memory: pinned (page-locked)
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:359
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:352
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:336
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:343
i = argmax(a[i], a[j])
Definition reducers.hpp:313
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:315
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:322
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:421
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:435
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:398
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:412
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:382
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:368
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:295
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:302
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:286
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:279
i = argmin(a[i], a[j])
Definition reducers.hpp:256
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:265
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:258
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:70
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:75
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:88
static MFEM_HOST_DEVICE void SetInitialValue(T &a)
Definition reducers.hpp:93
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition device.hpp:51
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition device.hpp:42
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
Definition device.hpp:54
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition device.hpp:40
Pair of values which can be used in device code.
Definition reducers.hpp:27
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:184
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:189
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:170
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:175
a = max(a,b)
Definition reducers.hpp:145
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:147
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:161
static constexpr T min_val
Definition reducers.hpp:159
Two pairs for the min/max values and their location indices.
Definition reducers.hpp:34
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:245
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:239
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:224
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:230
a = minmax(a,b)
Definition reducers.hpp:194
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:196
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:215
static constexpr T max_val
Definition reducers.hpp:213
static constexpr T min_val
Definition reducers.hpp:212
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:135
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:140
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:124
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:129
a = min(a,b)
Definition reducers.hpp:98
static MFEM_HOST_DEVICE void Join(value_type &a, value_type b)
Definition reducers.hpp:101
static constexpr T max_val
Definition reducers.hpp:113
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:115
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:62
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:57
static MFEM_HOST_DEVICE void Join(value_type &a, const value_type &b)
Definition reducers.hpp:45
static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
Definition reducers.hpp:50