19template<
int T_D1D = 0, 
int T_Q1D = 0>
 
   20static void EADiffusionAssemble1D(
const int NE,
 
   21                                  const Array<real_t> &
b,
 
   22                                  const Array<real_t> &g,
 
   29   const int D1D = T_D1D ? T_D1D : d1d;
 
   30   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   33   auto G = 
Reshape(g.Read(), Q1D, D1D);
 
   34   auto D = 
Reshape(padata.Read(), Q1D, NE);
 
   35   auto A = 
Reshape(eadata.ReadWrite(), D1D, D1D, NE);
 
   38      const int D1D = T_D1D ? T_D1D : d1d;
 
   39      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   40      constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
 
   43      for (
int q = 0; q < Q1D; q++)
 
   45         r_Gi[q] = G(q,MFEM_THREAD_ID(x));
 
   46         r_Gj[q] = G(q,MFEM_THREAD_ID(y));
 
   48      MFEM_FOREACH_THREAD(i1,x,D1D)
 
   50         MFEM_FOREACH_THREAD(j1,y,D1D)
 
   53            for (
int k1 = 0; k1 < Q1D; ++k1)
 
   55               val += r_Gj[k1] * D(k1, e) * r_Gi[k1];
 
   70template<
int T_D1D = 0, 
int T_Q1D = 0>
 
   71static void EADiffusionAssemble2D(
const int NE,
 
   72                                  const Array<real_t> &
b,
 
   73                                  const Array<real_t> &g,
 
   80   const int D1D = T_D1D ? T_D1D : d1d;
 
   81   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   84   auto B = 
Reshape(
b.Read(), Q1D, D1D);
 
   85   auto G = 
Reshape(g.Read(), Q1D, D1D);
 
   86   auto D = 
Reshape(padata.Read(), Q1D, Q1D, 3, NE);
 
   87   auto A = 
Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, NE);
 
   90      const int D1D = T_D1D ? T_D1D : d1d;
 
   91      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
   92      constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
   93      constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
 
   96      for (
int d = 0; d < D1D; d++)
 
   98         for (
int q = 0; q < Q1D; q++)
 
  105      MFEM_FOREACH_THREAD(i1,x,D1D)
 
  107         MFEM_FOREACH_THREAD(i2,y,D1D)
 
  109            for (
int j1 = 0; j1 < D1D; ++j1)
 
  111               for (
int j2 = 0; j2 < D1D; ++j2)
 
  114                  for (
int k1 = 0; k1 < Q1D; ++k1)
 
  116                     for (
int k2 = 0; k2 < Q1D; ++k2)
 
  118                        real_t bgi = r_G[k1][i1] * r_B[k2][i2];
 
  119                        real_t gbi = r_B[k1][i1] * r_G[k2][i2];
 
  120                        real_t bgj = r_G[k1][j1] * r_B[k2][j2];
 
  121                        real_t gbj = r_B[k1][j1] * r_G[k2][j2];
 
  122                        real_t D00 = D(k1,k2,0,e);
 
  123                        real_t D10 = D(k1,k2,1,e);
 
  125                        real_t D11 = D(k1,k2,2,e);
 
  126                        val += bgi * D00 * bgj
 
  134                     A(i1, i2, j1, j2, e) += val;
 
  138                     A(i1, i2, j1, j2, e) = val;
 
  147template<
int T_D1D = 0, 
int T_Q1D = 0>
 
  148static void EADiffusionAssemble3D(
const int NE,
 
  149                                  const Array<real_t> &
b,
 
  150                                  const Array<real_t> &g,
 
  151                                  const Vector &padata,
 
  157   const int D1D = T_D1D ? T_D1D : d1d;
 
  158   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  161   auto B = 
Reshape(
b.Read(), Q1D, D1D);
 
  162   auto G = 
Reshape(g.Read(), Q1D, D1D);
 
  163   auto D = 
Reshape(padata.Read(), Q1D, Q1D, Q1D, 6, NE);
 
  164   auto A = 
Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, D1D, D1D, NE);
 
  167      const int D1D = T_D1D ? T_D1D : d1d;
 
  168      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  169      constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
  170      constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
 
  173      for (
int d = 0; d < D1D; d++)
 
  175         for (
int q = 0; q < Q1D; q++)
 
  182      MFEM_FOREACH_THREAD(i1,x,D1D)
 
  184         MFEM_FOREACH_THREAD(i2,y,D1D)
 
  186            MFEM_FOREACH_THREAD(i3,z,D1D)
 
  188               for (
int j1 = 0; j1 < D1D; ++j1)
 
  190                  for (
int j2 = 0; j2 < D1D; ++j2)
 
  192                     for (
int j3 = 0; j3 < D1D; ++j3)
 
  195                        for (
int k1 = 0; k1 < Q1D; ++k1)
 
  197                           for (
int k2 = 0; k2 < Q1D; ++k2)
 
  199                              for (
int k3 = 0; k3 < Q1D; ++k3)
 
  201                                 real_t bbgi = r_G[k1][i1] * r_B[k2][i2] * r_B[k3][i3];
 
  202                                 real_t bgbi = r_B[k1][i1] * r_G[k2][i2] * r_B[k3][i3];
 
  203                                 real_t gbbi = r_B[k1][i1] * r_B[k2][i2] * r_G[k3][i3];
 
  204                                 real_t bbgj = r_G[k1][j1] * r_B[k2][j2] * r_B[k3][j3];
 
  205                                 real_t bgbj = r_B[k1][j1] * r_G[k2][j2] * r_B[k3][j3];
 
  206                                 real_t gbbj = r_B[k1][j1] * r_B[k2][j2] * r_G[k3][j3];
 
  207                                 real_t D00 = D(k1,k2,k3,0,e);
 
  208                                 real_t D10 = D(k1,k2,k3,1,e);
 
  209                                 real_t D20 = D(k1,k2,k3,2,e);
 
  211                                 real_t D11 = D(k1,k2,k3,3,e);
 
  212                                 real_t D21 = D(k1,k2,k3,4,e);
 
  215                                 real_t D22 = D(k1,k2,k3,5,e);
 
  216                                 val += bbgi * D00 * bbgj
 
  230                           A(i1, i2, i3, j1, j2, j3, e) += val;
 
  234                           A(i1, i2, i3, j1, j2, j3, e) = val;
 
  255      switch ((dofs1D << 4 ) | quad1D)
 
  257         case 0x22: 
return EADiffusionAssemble1D<2,2>(ne,B,G,pa_data,ea_data,
add);
 
  258         case 0x33: 
return EADiffusionAssemble1D<3,3>(ne,B,G,pa_data,ea_data,
add);
 
  259         case 0x44: 
return EADiffusionAssemble1D<4,4>(ne,B,G,pa_data,ea_data,
add);
 
  260         case 0x55: 
return EADiffusionAssemble1D<5,5>(ne,B,G,pa_data,ea_data,
add);
 
  261         case 0x66: 
return EADiffusionAssemble1D<6,6>(ne,B,G,pa_data,ea_data,
add);
 
  262         case 0x77: 
return EADiffusionAssemble1D<7,7>(ne,B,G,pa_data,ea_data,
add);
 
  263         case 0x88: 
return EADiffusionAssemble1D<8,8>(ne,B,G,pa_data,ea_data,
add);
 
  264         case 0x99: 
return EADiffusionAssemble1D<9,9>(ne,B,G,pa_data,ea_data,
add);
 
  265         default:   
return EADiffusionAssemble1D(ne,B,G,pa_data,ea_data,
add,
 
  271      switch ((dofs1D << 4 ) | quad1D)
 
  273         case 0x22: 
return EADiffusionAssemble2D<2,2>(ne,B,G,pa_data,ea_data,
add);
 
  274         case 0x33: 
return EADiffusionAssemble2D<3,3>(ne,B,G,pa_data,ea_data,
add);
 
  275         case 0x44: 
return EADiffusionAssemble2D<4,4>(ne,B,G,pa_data,ea_data,
add);
 
  276         case 0x55: 
return EADiffusionAssemble2D<5,5>(ne,B,G,pa_data,ea_data,
add);
 
  277         case 0x66: 
return EADiffusionAssemble2D<6,6>(ne,B,G,pa_data,ea_data,
add);
 
  278         case 0x77: 
return EADiffusionAssemble2D<7,7>(ne,B,G,pa_data,ea_data,
add);
 
  279         case 0x88: 
return EADiffusionAssemble2D<8,8>(ne,B,G,pa_data,ea_data,
add);
 
  280         case 0x99: 
return EADiffusionAssemble2D<9,9>(ne,B,G,pa_data,ea_data,
add);
 
  281         default:   
return EADiffusionAssemble2D(ne,B,G,pa_data,ea_data,
add,
 
  287      switch ((dofs1D << 4 ) | quad1D)
 
  289         case 0x23: 
return EADiffusionAssemble3D<2,3>(ne,B,G,pa_data,ea_data,
add);
 
  290         case 0x34: 
return EADiffusionAssemble3D<3,4>(ne,B,G,pa_data,ea_data,
add);
 
  291         case 0x45: 
return EADiffusionAssemble3D<4,5>(ne,B,G,pa_data,ea_data,
add);
 
  292         case 0x56: 
return EADiffusionAssemble3D<5,6>(ne,B,G,pa_data,ea_data,
add);
 
  293         case 0x67: 
return EADiffusionAssemble3D<6,7>(ne,B,G,pa_data,ea_data,
add);
 
  294         case 0x78: 
return EADiffusionAssemble3D<7,8>(ne,B,G,pa_data,ea_data,
add);
 
  295         case 0x89: 
return EADiffusionAssemble3D<8,9>(ne,B,G,pa_data,ea_data,
add);
 
  296         default:   
return EADiffusionAssemble3D(ne,B,G,pa_data,ea_data,
add,
 
  300   MFEM_ABORT(
"Unknown kernel.");
 
 
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
void AssembleEA(const FiniteElementSpace &fes, Vector &emat, const bool add) override
Method defining element assembly.
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Array< real_t > B
Basis functions evaluated at quadrature points.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Mesh * GetMesh() const
Returns the mesh.
int GetNE() const
Returns number of elements.
void add(const Vector &v1, const Vector &v2, Vector &v)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D(int N, int X, int Y, lambda &&body)
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.