26namespace quadrature_interpolator
 
   29static void Det1D(
const int NE,
 
   39   MFEM_CONTRACT_VAR(d_buff);
 
   40   const auto G = 
Reshape(g, q1d, d1d);
 
   41   const auto X = 
Reshape(x, d1d, NE);
 
   47      for (
int q = 0; q < q1d; q++)
 
   50         for (
int d = 0; d < d1d; d++)
 
   52            u += G(q, d) * X(d, e);
 
   59template<
int T_D1D = 0, 
int T_Q1D = 0>
 
   60static void Det2D(
const int NE,
 
   69   MFEM_CONTRACT_VAR(d_buff);
 
   70   static constexpr int SDIM = 2;
 
   71   static constexpr int NBZ = 1;
 
   73   const int D1D = T_D1D ? T_D1D : d1d;
 
   74   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   77   const auto G = 
Reshape(g, Q1D, D1D);
 
   79   auto Y = 
Reshape(y, Q1D, Q1D, NE);
 
   83      constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
 
   84      constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
   85      const int D1D = T_D1D ? T_D1D : d1d;
 
   86      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   88      MFEM_SHARED 
real_t BG[2][MQ1*MD1];
 
   93      kernels::internal::LoadX<MD1,NBZ>(e,D1D,X,XY);
 
   94      kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
 
   96      kernels::internal::GradX<MD1,MQ1,NBZ>(D1D,Q1D,BG,XY,DQ);
 
   97      kernels::internal::GradY<MD1,MQ1,NBZ>(D1D,Q1D,BG,DQ,QQ);
 
   99      MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  101         MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  104            kernels::internal::PullGrad<MQ1,NBZ>(Q1D,qx,qy,QQ,J);
 
  111template<
int T_D1D = 0, 
int T_Q1D = 0>
 
  112static void Det2DSurface(
const int NE,
 
  121   MFEM_CONTRACT_VAR(d_buff);
 
  123   static constexpr int SDIM = 3;
 
  124   static constexpr int NBZ = 1;
 
  126   const int D1D = T_D1D ? T_D1D : d1d;
 
  127   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  129   const auto B = 
Reshape(
b, Q1D, D1D);
 
  130   const auto G = 
Reshape(g, Q1D, D1D);
 
  132   auto Y = 
Reshape(y, Q1D, Q1D, NE);
 
  136      constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
 
  137      constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
  138      const int D1D = T_D1D ? T_D1D : d1d;
 
  139      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  140      const int tidz = MFEM_THREAD_ID(z);
 
  142      MFEM_SHARED 
real_t BG[2][MQ1*MD1];
 
  146      kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
 
  149      MFEM_FOREACH_THREAD(dy,y,D1D)
 
  151         MFEM_FOREACH_THREAD(dx,x,D1D)
 
  153            for (
int d = 0; d < 
SDIM; ++d)
 
  155               XYZ[d][tidz][dx + dy*D1D] = X(dx,dy,d,e);
 
  165      MFEM_FOREACH_THREAD(dy,y,D1D)
 
  167         MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  169            for (
int d = 0; d < 
SDIM; ++d)
 
  173               for (
int dx = 0; dx < D1D; ++dx)
 
  175                  const real_t xval = XYZ[d][tidz][dx + dy*D1D];
 
  176                  u += xval * G_mat(dx,qx);
 
  177                  v += xval * B_mat(dx,qx);
 
  179               DQ[d][tidz][dy + qx*D1D] = 
u;
 
  180               DQ[3 + d][tidz][dy + qx*D1D] = v;
 
  186      MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  188         MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  190            real_t J_[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
 
  191            for (
int d = 0; d < 
SDIM; ++d)
 
  193               for (
int dy = 0; dy < D1D; ++dy)
 
  195                  J_[d] += DQ[d][tidz][dy + qx*D1D] * B_mat(dy,qy);
 
  196                  J_[3 + d] += DQ[3 + d][tidz][dy + qx*D1D] * G_mat(dy,qy);
 
  200            const real_t E = J(0,0)*J(0,0) + J(1,0)*J(1,0) + J(2,0)*J(2,0);
 
  201            const real_t F = J(0,0)*J(0,1) + J(1,0)*J(1,1) + J(2,0)*J(2,1);
 
  202            const real_t G = J(0,1)*J(0,1) + J(1,1)*J(1,1) + J(2,1)*J(2,1);
 
  203            Y(qx,qy,e) = sqrt(E*G - F*F);
 
  209template<
int T_D1D = 0, 
int T_Q1D = 0, 
bool SMEM = true>
 
  210static void Det3D(
const int NE,
 
  219   constexpr int DIM = 3;
 
  220   static constexpr int GRID = SMEM ? 0 : 128;
 
  222   const int D1D = T_D1D ? T_D1D : d1d;
 
  223   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  225   const auto B = 
Reshape(
b, Q1D, D1D);
 
  226   const auto G = 
Reshape(g, Q1D, D1D);
 
  227   const auto X = 
Reshape(x, D1D, D1D, D1D, 
DIM, NE);
 
  228   auto Y = 
Reshape(y, Q1D, Q1D, Q1D, NE);
 
  234      const int max_q1d = T_Q1D ? T_Q1D : limits.
MAX_Q1D;
 
  235      const int max_d1d = T_D1D ? T_D1D : limits.
MAX_D1D;
 
  236      const int max_qd = std::max(max_q1d, max_d1d);
 
  237      const int mem_size = max_qd * max_qd * max_qd * 9;
 
  238      d_buff->SetSize(2*mem_size*GRID);
 
  239      GM = d_buff->Write();
 
  244      static constexpr int MQ1 = T_Q1D ? T_Q1D :
 
  245                                 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_Q1D);
 
  246      static constexpr int MD1 = T_D1D ? T_D1D :
 
  247                                 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_D1D);
 
  248      static constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
 
  249      static constexpr int MSZ = MDQ * MDQ * MDQ * 9;
 
  251      const int bid = MFEM_BLOCK_ID(x);
 
  252      MFEM_SHARED 
real_t BG[2][MQ1*MD1];
 
  253      MFEM_SHARED 
real_t SM0[SMEM?MSZ:1];
 
  254      MFEM_SHARED 
real_t SM1[SMEM?MSZ:1];
 
  255      real_t *lm0 = SMEM ? SM0 : GM + MSZ*bid;
 
  256      real_t *lm1 = SMEM ? SM1 : GM + MSZ*(GRID+bid);
 
  257      real_t (*DDD)[MD1*MD1*MD1] = (
real_t (*)[MD1*MD1*MD1]) (lm0);
 
  258      real_t (*DDQ)[MD1*MD1*MQ1] = (
real_t (*)[MD1*MD1*MQ1]) (lm1);
 
  259      real_t (*DQQ)[MD1*MQ1*MQ1] = (
real_t (*)[MD1*MQ1*MQ1]) (lm0);
 
  260      real_t (*QQQ)[MQ1*MQ1*MQ1] = (
real_t (*)[MQ1*MQ1*MQ1]) (lm1);
 
  262      kernels::internal::LoadX<MD1>(e,D1D,X,DDD);
 
  263      kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
 
  265      kernels::internal::GradX<MD1,MQ1>(D1D,Q1D,BG,DDD,DDQ);
 
  266      kernels::internal::GradY<MD1,MQ1>(D1D,Q1D,BG,DDQ,DQQ);
 
  267      kernels::internal::GradZ<MD1,MQ1>(D1D,Q1D,BG,DQQ,QQQ);
 
  269      MFEM_FOREACH_THREAD(qz,z,Q1D)
 
  271         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  273            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  276               kernels::internal::PullGrad<MQ1>(Q1D, qx,qy,qz, QQQ, J);
 
  286   using k = QuadratureInterpolator::DetKernels;
 
  288   k::Specialization<2,2,2,2>::Add();
 
  289   k::Specialization<2,2,2,3>::Add();
 
  290   k::Specialization<2,2,2,4>::Add();
 
  291   k::Specialization<2,2,2,6>::Add();
 
  292   k::Specialization<2,2,3,4>::Add();
 
  293   k::Specialization<2,2,3,6>::Add();
 
  294   k::Specialization<2,2,4,4>::Add();
 
  295   k::Specialization<2,2,4,6>::Add();
 
  296   k::Specialization<2,2,5,6>::Add();
 
  298   k::Specialization<3,3,2,4>::Add();
 
  299   k::Specialization<3,3,3,3>::Add();
 
  300   k::Specialization<3,3,3,5>::Add();
 
  301   k::Specialization<3,3,3,6>::Add();
 
  315template<
int DIM, 
int SDIM, 
int D1D, 
int Q1D>
 
  316DetKernel QuadratureInterpolator::DetKernels::Kernel()
 
  318   if (
DIM == 1) { 
return internal::quadrature_interpolator::Det1D; }
 
  319   else if (
DIM == 2 && 
SDIM == 2) { 
return internal::quadrature_interpolator::Det2D<D1D, Q1D>; }
 
  320   else if (
DIM == 2 && 
SDIM == 3) { 
return internal::quadrature_interpolator::Det2DSurface<D1D, Q1D>; }
 
  321   else if (
DIM == 3) { 
return internal::quadrature_interpolator::Det3D<D1D, Q1D>; }
 
  322   else { MFEM_ABORT(
""); }
 
  325DetKernel QuadratureInterpolator::DetKernels::Fallback(
 
  326   int DIM, 
int SDIM, 
int D1D, 
int Q1D)
 
  328   if (
DIM == 1) { 
return internal::quadrature_interpolator::Det1D; }
 
  329   else if (
DIM == 2 && 
SDIM == 2) { 
return internal::quadrature_interpolator::Det2D; }
 
  330   else if (
DIM == 2 && 
SDIM == 3) { 
return internal::quadrature_interpolator::Det2DSurface; }
 
  335      if (D1D <= MD && Q1D <= MQ) { 
return internal::quadrature_interpolator::Det3D<0,0,true>; }
 
  336      else { 
return internal::quadrature_interpolator::Det3D<0,0,false>; }
 
  338   else { MFEM_ABORT(
""); }
 
A basic generic Tensor class, appropriate for use on the GPU.
void(*)(const int NE, const real_t *, const real_t *, const real_t *, real_t *, const int, const int, Vector *) DetKernelType
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
MFEM_HOST_DEVICE real_t Det3D(DeviceMatrix &J)
void forall_3D_grid(int N, int X, int Y, int Z, int G, lambda &&body)
MFEM_HOST_DEVICE real_t Det2D(DeviceMatrix &J)
void forall(int N, lambda &&body)
Maximum number of 1D DOFs or quadrature points for the current runtime configuration of the Device (u...
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
int MAX_DET_1D
Maximum number of points for determinant computation in QuadratureInterpolator.
int MAX_D1D
Maximum number of 1D nodal points.
int MAX_Q1D
Maximum number of 1D quadrature points.