12#ifndef MFEM_DGMASSINV_KERNELS_HPP 
   13#define MFEM_DGMASSINV_KERNELS_HPP 
   25template <
int DIM, 
int D1D, 
int Q1D>
 
   26MFEM_HOST_DEVICE 
inline 
   27void DGMassApply(
const int e,
 
   37   constexpr bool use_smem = (D1D > 0 && Q1D > 0);
 
   38   constexpr bool ACCUM = 
false;
 
   39   constexpr int NBZ = 1;
 
   44      constexpr int TD1D = D1D ? D1D : 1;
 
   45      constexpr int TQ1D = Q1D ? Q1D : 1;
 
   48         SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);
 
   52         SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);
 
   56         MFEM_ABORT_KERNEL(
"Unsupported dimension.");
 
   63         PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
 
   67         PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
 
   71         MFEM_ABORT_KERNEL(
"Unsupported dimension.");
 
   76MFEM_HOST_DEVICE 
inline 
   77void DGMassPreconditioner(
const int e,
 
   88   const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
 
   89   const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
 
   91   for (
int i = tid; i < ND; i += bxy)
 
   93      Y(i, e) = D(i, e)*X(i, e);
 
   98MFEM_HOST_DEVICE 
inline 
   99void DGMassAxpy(
const int e,
 
  112   const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
 
  113   const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
 
  115   for (
int i = tid; i < ND; i += bxy)
 
  117      Z(i, e) = 
a*X(i, e) + 
b*Y(i, e);
 
  123MFEM_HOST_DEVICE 
inline 
  124real_t DGMassDot(
const int e,
 
  133   const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
 
  134   const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
 
  136   MFEM_SHARED 
real_t s_dot[NB*NB];
 
  139   for (
int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }
 
  142   if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }
 
  145   if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }
 
  148   if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }
 
  151   if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }
 
  154   if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }
 
  157   if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }
 
  160   if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }
 
  163   if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }
 
  166   if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }
 
  169   if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }
 
  175template<
int T_D1D = 0>
 
  176MFEM_HOST_DEVICE 
inline 
  177void DGMassBasis2D(
const int e,
 
  184   constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
  185   const int D1D = T_D1D ? T_D1D : d1d;
 
  187   const auto b = 
Reshape(b_, D1D, D1D);
 
  188   const auto x = 
Reshape(x_, D1D, D1D, NE);
 
  189   auto y = 
Reshape(y_, D1D, D1D, NE);
 
  191   MFEM_SHARED 
real_t sB[MD1*MD1];
 
  192   MFEM_SHARED 
real_t sm0[MD1*MD1];
 
  193   MFEM_SHARED 
real_t sm1[MD1*MD1];
 
  195   kernels::internal::LoadB<MD1,MD1>(D1D,D1D,
b,sB);
 
  202   kernels::internal::LoadX(e,D1D,x,DD);
 
  203   kernels::internal::EvalX(D1D,D1D,B,DD,DQ);
 
  204   kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);
 
  206   MFEM_FOREACH_THREAD(qy,y,D1D)
 
  208      MFEM_FOREACH_THREAD(qx,x,D1D)
 
  210         y(qx,qy,e) = QQ(qx,qy);
 
  216template<
int T_D1D = 0>
 
  217MFEM_HOST_DEVICE 
inline 
  218void DGMassBasis3D(
const int e,
 
  225   const int D1D = T_D1D ? T_D1D : d1d;
 
  227   const auto b = 
Reshape(b_, D1D, D1D);
 
  228   const auto x = 
Reshape(x_, D1D, D1D, D1D, NE);
 
  229   auto y = 
Reshape(y_, D1D, D1D, D1D, NE);
 
  231   constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
  233   MFEM_SHARED 
real_t sB[MD1*MD1];
 
  234   MFEM_SHARED 
real_t sm0[MD1*MD1*MD1];
 
  235   MFEM_SHARED 
real_t sm1[MD1*MD1*MD1];
 
  237   kernels::internal::LoadB<MD1,MD1>(D1D,D1D,
b,sB);
 
  245   kernels::internal::LoadX(e,D1D,x,DDD);
 
  246   kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);
 
  247   kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);
 
  248   kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);
 
  250   MFEM_FOREACH_THREAD(qz,z,D1D)
 
  252      MFEM_FOREACH_THREAD(qy,y,D1D)
 
  254         for (
int qx = 0; qx < D1D; ++qx)
 
  256            y(qx,qy,qz,e) = QQQ(qz,qy,qx);
 
  263template<
int DIM, 
int T_D1D = 0>
 
  264MFEM_HOST_DEVICE 
inline 
  265void DGMassBasis(
const int e,
 
  274      DGMassBasis2D<T_D1D>(e, NE, b_, x_, y_, d1d);
 
  278      DGMassBasis3D<T_D1D>(e, NE, b_, x_, y_, d1d);
 
  282      MFEM_ABORT_KERNEL(
"Dimension not supported.");
 
DeviceTensor< 2, real_t > DeviceMatrix
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
DeviceTensor< 2, const real_t > ConstDeviceMatrix
DeviceTensor< 3, real_t > DeviceCube