31   int r_MAX_D1D, r_MAX_Q1D;
 
   34      r_MAX_D1D = 6; r_MAX_Q1D = 7;
 
   38      r_MAX_D1D = 7; r_MAX_Q1D = 7;
 
   46   constexpr int DIM = 3;
 
   47   const int D1D = T_D1D ? T_D1D : d1d;
 
   48   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   49   MFEM_VERIFY(D1D <= r_MAX_D1D,
 
   50               "D1D: " << D1D << 
", r_MAX_D1D: " << r_MAX_D1D);
 
   51   MFEM_VERIFY(Q1D <= r_MAX_Q1D,
 
   52               "Q1D: " << Q1D << 
", r_MAX_Q1D: " << r_MAX_Q1D);
 
   54   const auto B = 
Reshape(
b.Read(), Q1D, D1D);
 
   64#if defined(__CUDA_ARCH__) 
   65      constexpr int MAX_D1D = 6;
 
   66      constexpr int MAX_Q1D = 7;
 
   67#elif defined(__HIP_DEVICE_COMPILE__) 
   68      constexpr int MAX_D1D = 7;
 
   69      constexpr int MAX_Q1D = 7;
 
   71      constexpr int MAX_D1D = DofQuadLimits::MAX_D1D;
 
   72      constexpr int MAX_Q1D = DofQuadLimits::MAX_Q1D;
 
   75      constexpr int DIM = 3;
 
   76      const int D1D = T_D1D ? T_D1D : d1d;
 
   77      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   78      constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
 
   79      constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
 
   81      MFEM_SHARED 
real_t bg[2*MQ1*MD1];
 
   92      MFEM_FOREACH_THREAD(q,x,Q1D)
 
   94         MFEM_FOREACH_THREAD(d,y,D1D)
 
   96            MFEM_FOREACH_THREAD(dummy,z,1)
 
  104      for (
int v = 0; v < 
DIM; ++v)
 
  107         MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  109            MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  111               MFEM_FOREACH_THREAD(qz,z,Q1D)
 
  113                  const real_t *Jtr = &J(0,0,qx,qy,qz,e);
 
  120                  for (
int s = 0; s < 
DIM; s++)
 
  122                     for (
int t = 0; t < 
DIM; t++)
 
  124                        H_loc(s,t) = H(v,s,v,t,qx,qy,qz,e);
 
  128                  for (
int m = 0; m < 
DIM; m++)
 
  130                     for (
int n = 0; n < 
DIM; n++)
 
  134                        Href(m,n,qx,qy,qz) = 0.0;
 
  135                        for (
int s = 0; s < 
DIM; s++)
 
  137                           for (
int t = 0; t < 
DIM; t++)
 
  139                              Href(m,n,qx,qy,qz) +=
 
  140                                 Jrt(m,s) * H_loc(s,t) * Jrt(n,t);
 
  151         MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  153            MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  155               MFEM_FOREACH_THREAD(dz,z,D1D)
 
  157                  for (
int m = 0; m < 
DIM; m++)
 
  159                     for (
int n = 0; n < 
DIM; n++)
 
  161                        QQD(m,n,qx,qy,dz) = 0.0;
 
  166                  for (
int qz = 0; qz < Q1D; ++qz)
 
  168                     const real_t Bz = B_sm(qz,dz);
 
  169                     const real_t Gz = G_sm(qz,dz);
 
  170                     for (
int m = 0; m < 
DIM; m++)
 
  172                        for (
int n = 0; n < 
DIM; n++)
 
  174                           const real_t L = (m == 2 ? Gz : Bz);
 
  175                           const real_t R = (n == 2 ? Gz : Bz);
 
  176                           QQD(m,n,qx,qy,dz) += L * Href(m,n,qx,qy,qz) * R;
 
  186         MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  188            MFEM_FOREACH_THREAD(dz,z,D1D)
 
  190               MFEM_FOREACH_THREAD(dy,y,D1D)
 
  192                  for (
int m = 0; m < 
DIM; m++)
 
  194                     for (
int n = 0; n < 
DIM; n++)
 
  196                        QDD(m,n,qx,dy,dz) = 0.0;
 
  201                  for (
int qy = 0; qy < Q1D; ++qy)
 
  203                     const real_t By = B_sm(qy,dy);
 
  204                     const real_t Gy = G_sm(qy,dy);
 
  205                     for (
int m = 0; m < 
DIM; m++)
 
  207                        for (
int n = 0; n < 
DIM; n++)
 
  209                           const real_t L = (m == 1 ? Gy : By);
 
  210                           const real_t R = (n == 1 ? Gy : By);
 
  211                           QDD(m,n,qx,dy,dz) += L * QQD(m,n,qx,qy,dz) * R;
 
  221         MFEM_FOREACH_THREAD(dz,z,D1D)
 
  223            MFEM_FOREACH_THREAD(dy,y,D1D)
 
  225               MFEM_FOREACH_THREAD(dx,x,D1D)
 
  229                  for (
int qx = 0; qx < Q1D; ++qx)
 
  231                     const real_t Bx = B_sm(qx,dx);
 
  232                     const real_t Gx = G_sm(qx,dx);
 
  233                     for (
int m = 0; m < 
DIM; m++)
 
  235                        for (
int n = 0; n < 
DIM; n++)
 
  237                           const real_t L = (m == 0 ? Gx : Bx);
 
  238                           const real_t R = (n == 0 ? Gx : Bx);
 
  239                           d += L * QDD(m,n,qx,dy,dz) * R;
 
  243                  D(dx,dy,dz,v,e) += d;
 
 
MFEM_REGISTER_TMOP_KERNELS(void, DatcSize, const int NE, const int ncomp, const int sizeidx, const real_t input_min_size, const DenseMatrix &w_, const Array< real_t > &b_, const Vector &x_, const Vector &nc_reduce, DenseTensor &j_, const int d1d, const int q1d)