12#ifndef MFEM_FORALL_HPP
13#define MFEM_FORALL_HPP
23#include <_hypre_utilities.h>
45struct DofQuadLimits_CUDA
47 static constexpr int MAX_D1D = 14;
48 static constexpr int MAX_Q1D = 14;
49 static constexpr int MAX_T1D = 32;
50 static constexpr int HCURL_MAX_D1D = 5;
51 static constexpr int HCURL_MAX_Q1D = 6;
52 static constexpr int HDIV_MAX_D1D = 5;
53 static constexpr int HDIV_MAX_Q1D = 6;
54 static constexpr int MAX_INTERP_1D = 8;
55 static constexpr int MAX_DET_1D = 6;
58struct DofQuadLimits_HIP
60 static constexpr int MAX_D1D = 10;
61 static constexpr int MAX_Q1D = 10;
62 static constexpr int MAX_T1D = 32;
63 static constexpr int HCURL_MAX_D1D = 5;
64 static constexpr int HCURL_MAX_Q1D = 5;
65 static constexpr int HDIV_MAX_D1D = 5;
66 static constexpr int HDIV_MAX_Q1D = 6;
67 static constexpr int MAX_INTERP_1D = 8;
68 static constexpr int MAX_DET_1D = 6;
71struct DofQuadLimits_CPU
74 static constexpr int MAX_D1D = 24;
75 static constexpr int MAX_Q1D = 24;
77 static constexpr int MAX_D1D = 14;
78 static constexpr int MAX_Q1D = 14;
80 static constexpr int MAX_T1D = 32;
81 static constexpr int HCURL_MAX_D1D = 10;
82 static constexpr int HCURL_MAX_Q1D = 10;
83 static constexpr int HDIV_MAX_D1D = 10;
84 static constexpr int HDIV_MAX_Q1D = 10;
85 static constexpr int MAX_INTERP_1D = MAX_D1D;
86 static constexpr int MAX_DET_1D = MAX_D1D;
99#if defined(__CUDA_ARCH__)
101#elif defined(__HIP_DEVICE_COMPILE__)
131 return dof_quad_limits;
140 else { Populate<internal::DofQuadLimits_CPU>(); }
147 template <
typename T>
void Populate()
161#define MFEM_PRAGMA(X) _Pragma(#X)
164#if defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)
166#define MFEM_UNROLL(N) MFEM_PRAGMA(unroll(N))
168#define MFEM_UNROLL(N) MFEM_PRAGMA(unroll N)
171#define MFEM_UNROLL(N)
177#if defined(MFEM_USE_CUDA) && defined(__CUDACC__)
178#define MFEM_GPU_FORALL(i, N,...) CuWrap1D(N, [=] MFEM_DEVICE \
179 (int i) {__VA_ARGS__})
180#elif defined(MFEM_USE_HIP) && defined(__HIP__)
181#define MFEM_GPU_FORALL(i, N,...) HipWrap1D(N, [=] MFEM_DEVICE \
182 (int i) {__VA_ARGS__})
184#define MFEM_GPU_FORALL(i, N,...) do { } while (false)
191#define MFEM_FORALL(i,N,...) \
192 ForallWrap<1>(true,N,[=] MFEM_HOST_DEVICE (int i) {__VA_ARGS__})
195#define MFEM_FORALL_2D(i,N,X,Y,BZ,...) \
196 ForallWrap<2>(true,N,[=] MFEM_HOST_DEVICE (int i) {__VA_ARGS__},X,Y,BZ)
199#define MFEM_FORALL_3D(i,N,X,Y,Z,...) \
200 ForallWrap<3>(true,N,[=] MFEM_HOST_DEVICE (int i) {__VA_ARGS__},X,Y,Z)
204#define MFEM_FORALL_3D_GRID(i,N,X,Y,Z,G,...) \
205 ForallWrap<3>(true,N,[=] MFEM_HOST_DEVICE (int i) {__VA_ARGS__},X,Y,Z,G)
210#define MFEM_FORALL_SWITCH(use_dev,i,N,...) \
211 ForallWrap<1>(use_dev,N,[=] MFEM_HOST_DEVICE (int i) {__VA_ARGS__})
215template <
typename HBODY>
218#ifdef MFEM_USE_OPENMP
219 #pragma omp parallel for
220 for (
int k = 0; k < N; k++)
225 MFEM_CONTRACT_VAR(N);
226 MFEM_CONTRACT_VAR(h_body);
227 MFEM_ABORT(
"OpenMP requested for MFEM but OpenMP is not enabled!");
231template <
typename HBODY>
232void OmpWrap2D(
const int Nx,
const int Ny, HBODY &&h_body)
234#ifdef MFEM_USE_OPENMP
236 #pragma omp parallel for collapse(2)
237 for (
int j = 0; j < Ny; j++)
239 for (
int i = 0; i < Nx; i++)
245 MFEM_CONTRACT_VAR(Nx);
246 MFEM_CONTRACT_VAR(Ny);
247 MFEM_CONTRACT_VAR(h_body);
248 MFEM_ABORT(
"OpenMP requested for MFEM but OpenMP is not enabled!");
252template <
typename HBODY>
253void OmpWrap3D(
const int Nx,
const int Ny,
const int Nz, HBODY &&h_body)
255#ifdef MFEM_USE_OPENMP
257 #pragma omp parallel for collapse(3)
258 for (
int k = 0; k < Nz; k++)
260 for (
int j = 0; j < Ny; j++)
262 for (
int i = 0; i < Nx; i++)
269 MFEM_CONTRACT_VAR(Nx);
270 MFEM_CONTRACT_VAR(Ny);
271 MFEM_CONTRACT_VAR(Nz);
272 MFEM_CONTRACT_VAR(h_body);
273 MFEM_ABORT(
"OpenMP requested for MFEM but OpenMP is not enabled!");
279#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_CUDA) && defined(__CUDACC__)
281 RAJA::LaunchPolicy<RAJA::cuda_launch_t<true>>;
283 RAJA::LoopPolicy<RAJA::cuda_block_x_direct>;
285 RAJA::LoopPolicy<RAJA::cuda_thread_z_direct>;
288#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_HIP) && defined(__HIP__)
290 RAJA::LaunchPolicy<RAJA::hip_launch_t<true>>;
292 RAJA::LoopPolicy<RAJA::hip_block_x_direct>;
294 RAJA::LoopPolicy<RAJA::hip_thread_z_direct>;
297#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_CUDA) && defined(__CUDACC__)
298template <const
int BLOCKS = MFEM_CUDA_BLOCKS,
typename DBODY>
302 RAJA::forall<RAJA::cuda_exec<BLOCKS,true>>(RAJA::RangeSegment(0,N),d_body);
305template <
typename DBODY>
307 const int X,
const int Y,
const int BZ)
309 MFEM_VERIFY(BZ>0,
"");
310 const int G = (N+BZ-1)/BZ;
312 using namespace RAJA;
313 using RAJA::RangeSegment;
315 launch<cuda_launch_policy>
316 (LaunchParams(Teams(G), Threads(X, Y, BZ)),
317 [=] RAJA_DEVICE (LaunchContext
ctx)
320 loop<cuda_teams_x>(
ctx, RangeSegment(0, G), [&] (
const int n)
323 loop<cuda_threads_z>(
ctx, RangeSegment(0, BZ), [&] (
const int tz)
326 const int k = n*BZ + tz;
327 if (k >= N) {
return; }
336 MFEM_GPU_CHECK(cudaGetLastError());
339template <
typename DBODY>
341 const int X,
const int Y,
const int Z,
const int G)
343 const int GRID = G == 0 ? N : G;
344 using namespace RAJA;
345 using RAJA::RangeSegment;
347 launch<cuda_launch_policy>
348 (LaunchParams(Teams(GRID), Threads(X, Y, Z)),
349 [=] RAJA_DEVICE (LaunchContext
ctx)
352 loop<cuda_teams_x>(
ctx, RangeSegment(0, N), d_body);
356 MFEM_GPU_CHECK(cudaGetLastError());
365 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
366 static void run(
const int N, DBODY &&d_body,
367 const int X,
const int Y,
const int Z,
const int G)
376 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
377 static void run(
const int N, DBODY &&d_body,
378 const int X,
const int Y,
const int Z,
const int G)
387 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
388 static void run(
const int N, DBODY &&d_body,
389 const int X,
const int Y,
const int Z,
const int G)
397#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_HIP) && defined(__HIP__)
398template <const
int BLOCKS = MFEM_HIP_BLOCKS,
typename DBODY>
402 RAJA::forall<RAJA::hip_exec<BLOCKS,true>>(RAJA::RangeSegment(0,N),d_body);
405template <
typename DBODY>
407 const int X,
const int Y,
const int BZ)
409 MFEM_VERIFY(BZ>0,
"");
410 const int G = (N+BZ-1)/BZ;
412 using namespace RAJA;
413 using RAJA::RangeSegment;
415 launch<hip_launch_policy>
416 (LaunchParams(Teams(G), Threads(X, Y, BZ)),
417 [=] RAJA_DEVICE (LaunchContext
ctx)
420 loop<hip_teams_x>(
ctx, RangeSegment(0, G), [&] (
const int n)
423 loop<hip_threads_z>(
ctx, RangeSegment(0, BZ), [&] (
const int tz)
426 const int k = n*BZ + tz;
427 if (k >= N) {
return; }
436 MFEM_GPU_CHECK(hipGetLastError());
439template <
typename DBODY>
441 const int X,
const int Y,
const int Z,
const int G)
443 const int GRID = G == 0 ? N : G;
444 using namespace RAJA;
445 using RAJA::RangeSegment;
447 launch<hip_launch_policy>
448 (LaunchParams(Teams(GRID), Threads(X, Y, Z)),
449 [=] RAJA_DEVICE (LaunchContext
ctx)
452 loop<hip_teams_x>(
ctx, RangeSegment(0, N), d_body);
456 MFEM_GPU_CHECK(hipGetLastError());
465 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
466 static void run(
const int N, DBODY &&d_body,
467 const int X,
const int Y,
const int Z,
const int G)
476 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
477 static void run(
const int N, DBODY &&d_body,
478 const int X,
const int Y,
const int Z,
const int G)
487 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
488 static void run(
const int N, DBODY &&d_body,
489 const int X,
const int Y,
const int Z,
const int G)
498#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_OPENMP)
500template <
typename HBODY>
503 RAJA::forall<RAJA::omp_parallel_for_exec>(RAJA::RangeSegment(0,N), h_body);
506template <
typename HBODY>
509 using omp_launch_policy = RAJA::LaunchPolicy<RAJA::omp_launch_t>;
510 using global_thread_xy = RAJA::LoopPolicy<RAJA::omp_for_exec>;
511 RAJA::RangeSegment xrange(0, Nx);
512 RAJA::RangeSegment yrange(0, Ny);
513 RAJA::launch<omp_launch_policy>(RAJA::ExecPlace::HOST, RAJA::LaunchParams(),
514 [=](RAJA::LaunchContext
ctx)
517 RAJA::expt::loop<global_thread_xy>(
ctx, xrange, yrange, [&](
int i,
int j)
524template <
typename HBODY>
527 using omp_launch_policy = RAJA::LaunchPolicy<RAJA::omp_launch_t>;
528 using global_thread_xyz = RAJA::LoopPolicy<RAJA::omp_for_exec>;
529 RAJA::RangeSegment xrange(0, Nx);
530 RAJA::RangeSegment yrange(0, Ny);
531 RAJA::RangeSegment zrange(0, Nz);
532 RAJA::launch<omp_launch_policy>(RAJA::ExecPlace::HOST, RAJA::LaunchParams(),
533 [=](RAJA::LaunchContext
ctx)
536 RAJA::expt::loop<global_thread_xyz>(
ctx, xrange, yrange, zrange,
537 [&](
int i,
int j,
int k)
538 { h_body(i, j, k); });
546template <
typename HBODY>
551#if (RAJA_VERSION_MAJOR >= 2023)
554 using raja_forall_pol = RAJA::seq_exec;
556 using raja_forall_pol = RAJA::loop_exec;
559 RAJA::forall<raja_forall_pol>(RAJA::RangeSegment(0,N), h_body);
561 MFEM_CONTRACT_VAR(N);
562 MFEM_CONTRACT_VAR(h_body);
563 MFEM_ABORT(
"RAJA requested but RAJA is not enabled!");
569#if defined(MFEM_USE_CUDA) && defined(__CUDACC__)
571template <
typename BODY> __global__
static
572void CuKernel1D(
const int N, BODY body)
574 const int k = blockDim.x*blockIdx.x + threadIdx.x;
575 if (k >= N) {
return; }
579template <
typename BODY> __global__
static
580void CuKernel2D(
const int N, BODY body)
582 const int k = blockIdx.x*blockDim.z + threadIdx.z;
583 if (k >= N) {
return; }
587template <
typename BODY> __global__
static
588void CuKernel3D(
const int N, BODY body)
590 for (
int k = blockIdx.x; k < N; k += gridDim.x) { body(k); }
593template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
596 if (N==0) {
return; }
597 const int GRID = (N+BLCK-1)/BLCK;
598 CuKernel1D<<<GRID,BLCK>>>(N, d_body);
599 MFEM_GPU_CHECK(cudaGetLastError());
602template <
typename DBODY>
604 const int X,
const int Y,
const int BZ)
606 if (N==0) {
return; }
607 MFEM_VERIFY(BZ>0,
"");
608 const int GRID = (N+BZ-1)/BZ;
609 const dim3 BLCK(X,Y,BZ);
610 CuKernel2D<<<GRID,BLCK>>>(N,d_body);
611 MFEM_GPU_CHECK(cudaGetLastError());
614template <
typename DBODY>
616 const int X,
const int Y,
const int Z,
const int G)
618 if (N==0) {
return; }
619 const int GRID = G == 0 ? N : G;
620 const dim3 BLCK(X,Y,Z);
621 CuKernel3D<<<GRID,BLCK>>>(N,d_body);
622 MFEM_GPU_CHECK(cudaGetLastError());
631 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
632 static void run(
const int N, DBODY &&d_body,
633 const int X,
const int Y,
const int Z,
const int G)
642 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
643 static void run(
const int N, DBODY &&d_body,
644 const int X,
const int Y,
const int Z,
const int G)
653 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
654 static void run(
const int N, DBODY &&d_body,
655 const int X,
const int Y,
const int Z,
const int G)
665#if defined(MFEM_USE_HIP) && defined(__HIP__)
667template <
typename BODY> __global__
static
668void HipKernel1D(
const int N, BODY body)
670 const int k = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
671 if (k >= N) {
return; }
675template <
typename BODY> __global__
static
676void HipKernel2D(
const int N, BODY body)
678 const int k = hipBlockIdx_x*hipBlockDim_z + hipThreadIdx_z;
679 if (k >= N) {
return; }
683template <
typename BODY> __global__
static
684void HipKernel3D(
const int N, BODY body)
686 for (
int k = hipBlockIdx_x; k < N; k += hipGridDim_x) { body(k); }
689template <const
int BLCK = MFEM_HIP_BLOCKS,
typename DBODY>
692 if (N==0) {
return; }
693 const int GRID = (N+BLCK-1)/BLCK;
694 hipLaunchKernelGGL(HipKernel1D,GRID,BLCK,0,
nullptr,N,d_body);
695 MFEM_GPU_CHECK(hipGetLastError());
698template <
typename DBODY>
700 const int X,
const int Y,
const int BZ)
702 if (N==0) {
return; }
703 const int GRID = (N+BZ-1)/BZ;
704 const dim3 BLCK(X,Y,BZ);
705 hipLaunchKernelGGL(HipKernel2D,GRID,BLCK,0,
nullptr,N,d_body);
706 MFEM_GPU_CHECK(hipGetLastError());
709template <
typename DBODY>
711 const int X,
const int Y,
const int Z,
const int G)
713 if (N==0) {
return; }
714 const int GRID = G == 0 ? N : G;
715 const dim3 BLCK(X,Y,Z);
716 hipLaunchKernelGGL(HipKernel3D,GRID,BLCK,0,
nullptr,N,d_body);
717 MFEM_GPU_CHECK(hipGetLastError());
726 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
727 static void run(
const int N, DBODY &&d_body,
728 const int X,
const int Y,
const int Z,
const int G)
737 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
738 static void run(
const int N, DBODY &&d_body,
739 const int X,
const int Y,
const int Z,
const int G)
748 template <const
int BLCK = MFEM_CUDA_BLOCKS,
typename DBODY>
749 static void run(
const int N, DBODY &&d_body,
750 const int X,
const int Y,
const int Z,
const int G)
760template <const
int DIM,
typename d_lambda,
typename h_lambda>
762 d_lambda &&d_body, h_lambda &&h_body,
763 const int X=0,
const int Y=0,
const int Z=0,
766 MFEM_CONTRACT_VAR(X);
767 MFEM_CONTRACT_VAR(Y);
768 MFEM_CONTRACT_VAR(Z);
769 MFEM_CONTRACT_VAR(G);
770 MFEM_CONTRACT_VAR(d_body);
771 if (!use_dev) {
goto backend_cpu; }
773#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_CUDA) && defined(__CUDACC__)
781#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_HIP) && defined(__HIP__)
789#if defined(MFEM_USE_CUDA) && defined(__CUDACC__)
797#if defined(MFEM_USE_HIP) && defined(__HIP__)
808#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_OPENMP)
813#ifdef MFEM_USE_OPENMP
827 for (
int k = 0; k < N; k++) { h_body(k); }
830template <const
int DIM,
typename lambda>
831inline void ForallWrap(
const bool use_dev,
const int N, lambda &&body,
832 const int X=0,
const int Y=0,
const int Z=0,
838template<
typename lambda>
841template<
typename lambda>
842inline void forall(
int Nx,
int Ny, lambda &&body)
846 forall(Nx * Ny, [=] MFEM_HOST_DEVICE(
int idx)
853#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_OPENMP)
859#ifdef MFEM_USE_OPENMP
867 for (
int j = 0; j < Ny; ++j)
869 for (
int i = 0; i < Nx; ++i)
877template<
typename lambda>
878inline void forall(
int Nx,
int Ny,
int Nz, lambda &&body)
882 forall(Nx * Ny * Nz, [=] MFEM_HOST_DEVICE(
int idx)
891#if defined(MFEM_USE_RAJA) && defined(RAJA_ENABLE_OPENMP)
897#ifdef MFEM_USE_OPENMP
905 for (
int k = 0; k < Nz; ++k)
907 for (
int j = 0; j < Ny; ++j)
909 for (
int i = 0; i < Nx; ++i)
918template<
typename lambda>
924template<
typename lambda>
925inline void forall_2D(
int N,
int X,
int Y, lambda &&body)
930template<
typename lambda>
936template<
typename lambda>
937inline void forall_3D(
int N,
int X,
int Y,
int Z, lambda &&body)
942template<
typename lambda>
953template<
typename lambda>
956#ifdef HYPRE_USING_OPENMP
957 #pragma omp parallel for HYPRE_SMP_SCHEDULE
959 for (
int i = 0; i < N; i++) { body(i); }
964#if defined(HYPRE_USING_GPU)
965template<
typename lambda>
968#if defined(HYPRE_USING_CUDA)
970#elif defined(HYPRE_USING_HIP)
973#error Unknown HYPRE GPU backend!
984template<
typename lambda>
987#if !defined(HYPRE_USING_GPU)
989#elif MFEM_HYPRE_VERSION < 23100
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
RAJA::LaunchPolicy< RAJA::cuda_launch_t< true > > cuda_launch_policy
RAJA Cuda and Hip backends.
void RajaOmpWrap(const int N, HBODY &&h_body)
RAJA OpenMP backend.
RAJA::LaunchPolicy< RAJA::hip_launch_t< true > > hip_launch_policy
void RajaSeqWrap(const int N, HBODY &&h_body)
RAJA sequential loop backend.
MemoryClass GetHypreForallMemoryClass()
void RajaOmpWrap3D(const int Nx, const int Ny, const int Nz, HBODY &&h_body)
void OmpWrap3D(const int Nx, const int Ny, const int Nz, HBODY &&h_body)
void RajaHipWrap2D(const int N, DBODY &&d_body, const int X, const int Y, const int BZ)
void hypre_forall_cpu(int N, lambda &&body)
void RajaCuWrap2D(const int N, DBODY &&d_body, const int X, const int Y, const int BZ)
RAJA::LoopPolicy< RAJA::cuda_thread_z_direct > cuda_threads_z
void CuWrap1D(const int N, DBODY &&d_body)
MemoryClass
Memory classes identify sets of memory types.
void RajaOmpWrap2D(const int Nx, const int Ny, HBODY &&h_body)
void RajaCuWrap3D(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
void RajaHipWrap1D(const int N, DBODY &&d_body)
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
void CuWrap2D(const int N, DBODY &&d_body, const int X, const int Y, const int BZ)
void hypre_forall_gpu(int N, lambda &&body)
void OmpWrap2D(const int Nx, const int Ny, HBODY &&h_body)
internal::DofQuadLimits_CUDA DofQuadLimits
Maximum number of 1D DOFs or quadrature points for the architecture currently being compiled for (use...
void ForallWrap(const bool use_dev, const int N, d_lambda &&d_body, h_lambda &&h_body, const int X=0, const int Y=0, const int Z=0, const int G=0)
The forall kernel body wrapper.
void forall_2D(int N, int X, int Y, lambda &&body)
void HipWrap3D(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
RAJA::LoopPolicy< RAJA::hip_thread_z_direct > hip_threads_z
void CuWrap3D(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
void HipWrap2D(const int N, DBODY &&d_body, const int X, const int Y, const int BZ)
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
void hypre_forall(int N, lambda &&body)
void OmpWrap(const int N, HBODY &&h_body)
OpenMP backend.
bool HypreUsingGPU()
Return true if HYPRE is configured to use GPU.
void forall_3D_grid(int N, int X, int Y, int Z, int G, lambda &&body)
RAJA::LoopPolicy< RAJA::hip_block_x_direct > hip_teams_x
void RajaCuWrap1D(const int N, DBODY &&d_body)
void HipWrap1D(const int N, DBODY &&d_body)
void forall(int N, lambda &&body)
void forall_switch(bool use_dev, int N, lambda &&body)
RAJA::LoopPolicy< RAJA::cuda_block_x_direct > cuda_teams_x
void RajaHipWrap3D(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
struct s_NavierContext ctx
@ RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES.
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
@ RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES.
@ OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
@ DEVICE_MASK
Biwise-OR of all device backends.
@ HIP_MASK
Biwise-OR of all HIP backends.
@ CUDA_MASK
Biwise-OR of all CUDA backends.
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
Maximum number of 1D DOFs or quadrature points for the current runtime configuration of the Device (u...
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
int HCURL_MAX_D1D
Maximum number of 1D nodal points for H(curl).
int HCURL_MAX_Q1D
Maximum number of 1D quadrature points for H(curl).
int HDIV_MAX_Q1D
Maximum number of 1D quadrature points for H(div).
int MAX_INTERP_1D
Maximum number of points for use in QuadratureInterpolator.
int HDIV_MAX_D1D
Maximum number of 1D nodal points for H(div).
int MAX_DET_1D
Maximum number of points for determinant computation in QuadratureInterpolator.
int MAX_D1D
Maximum number of 1D nodal points.
int MAX_Q1D
Maximum number of 1D quadrature points.
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)
static void run(const int N, DBODY &&d_body, const int X, const int Y, const int Z, const int G)