19template<
int T_D1D = 0,
int T_Q1D = 0>
20static void EADiffusionAssemble1D(
const int NE,
21 const Array<real_t> &
b,
22 const Array<real_t> &g,
29 const int D1D = T_D1D ? T_D1D : d1d;
30 const int Q1D = T_Q1D ? T_Q1D : q1d;
33 auto G =
Reshape(g.Read(), Q1D, D1D);
34 auto D =
Reshape(padata.Read(), Q1D, NE);
35 auto A =
Reshape(eadata.ReadWrite(), D1D, D1D, NE);
38 const int D1D = T_D1D ? T_D1D : d1d;
39 const int Q1D = T_Q1D ? T_Q1D : q1d;
40 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
43 for (
int q = 0; q < Q1D; q++)
45 r_Gi[q] = G(q,MFEM_THREAD_ID(x));
46 r_Gj[q] = G(q,MFEM_THREAD_ID(y));
48 MFEM_FOREACH_THREAD(i1,x,D1D)
50 MFEM_FOREACH_THREAD(j1,y,D1D)
53 for (
int k1 = 0; k1 < Q1D; ++k1)
55 val += r_Gj[k1] * D(k1, e) * r_Gi[k1];
70template<
int T_D1D = 0,
int T_Q1D = 0>
71static void EADiffusionAssemble2D(
const int NE,
72 const Array<real_t> &
b,
73 const Array<real_t> &g,
80 const int D1D = T_D1D ? T_D1D : d1d;
81 const int Q1D = T_Q1D ? T_Q1D : q1d;
84 auto B =
Reshape(
b.Read(), Q1D, D1D);
85 auto G =
Reshape(g.Read(), Q1D, D1D);
86 auto D =
Reshape(padata.Read(), Q1D, Q1D, 3, NE);
87 auto A =
Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, NE);
90 const int D1D = T_D1D ? T_D1D : d1d;
91 const int Q1D = T_Q1D ? T_Q1D : q1d;
92 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
93 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
96 for (
int d = 0; d < D1D; d++)
98 for (
int q = 0; q < Q1D; q++)
105 MFEM_FOREACH_THREAD(i1,x,D1D)
107 MFEM_FOREACH_THREAD(i2,y,D1D)
109 for (
int j1 = 0; j1 < D1D; ++j1)
111 for (
int j2 = 0; j2 < D1D; ++j2)
114 for (
int k1 = 0; k1 < Q1D; ++k1)
116 for (
int k2 = 0; k2 < Q1D; ++k2)
118 real_t bgi = r_G[k1][i1] * r_B[k2][i2];
119 real_t gbi = r_B[k1][i1] * r_G[k2][i2];
120 real_t bgj = r_G[k1][j1] * r_B[k2][j2];
121 real_t gbj = r_B[k1][j1] * r_G[k2][j2];
122 real_t D00 = D(k1,k2,0,e);
123 real_t D10 = D(k1,k2,1,e);
125 real_t D11 = D(k1,k2,2,e);
126 val += bgi * D00 * bgj
134 A(i1, i2, j1, j2, e) += val;
138 A(i1, i2, j1, j2, e) = val;
147template<
int T_D1D = 0,
int T_Q1D = 0>
148static void EADiffusionAssemble3D(
const int NE,
149 const Array<real_t> &
b,
150 const Array<real_t> &g,
151 const Vector &padata,
157 const int D1D = T_D1D ? T_D1D : d1d;
158 const int Q1D = T_Q1D ? T_Q1D : q1d;
161 auto B =
Reshape(
b.Read(), Q1D, D1D);
162 auto G =
Reshape(g.Read(), Q1D, D1D);
163 auto D =
Reshape(padata.Read(), Q1D, Q1D, Q1D, 6, NE);
164 auto A =
Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, D1D, D1D, NE);
167 const int D1D = T_D1D ? T_D1D : d1d;
168 const int Q1D = T_Q1D ? T_Q1D : q1d;
169 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
170 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
173 for (
int d = 0; d < D1D; d++)
175 for (
int q = 0; q < Q1D; q++)
182 MFEM_FOREACH_THREAD(i1,x,D1D)
184 MFEM_FOREACH_THREAD(i2,y,D1D)
186 MFEM_FOREACH_THREAD(i3,z,D1D)
188 for (
int j1 = 0; j1 < D1D; ++j1)
190 for (
int j2 = 0; j2 < D1D; ++j2)
192 for (
int j3 = 0; j3 < D1D; ++j3)
195 for (
int k1 = 0; k1 < Q1D; ++k1)
197 for (
int k2 = 0; k2 < Q1D; ++k2)
199 for (
int k3 = 0; k3 < Q1D; ++k3)
201 real_t bbgi = r_G[k1][i1] * r_B[k2][i2] * r_B[k3][i3];
202 real_t bgbi = r_B[k1][i1] * r_G[k2][i2] * r_B[k3][i3];
203 real_t gbbi = r_B[k1][i1] * r_B[k2][i2] * r_G[k3][i3];
204 real_t bbgj = r_G[k1][j1] * r_B[k2][j2] * r_B[k3][j3];
205 real_t bgbj = r_B[k1][j1] * r_G[k2][j2] * r_B[k3][j3];
206 real_t gbbj = r_B[k1][j1] * r_B[k2][j2] * r_G[k3][j3];
207 real_t D00 = D(k1,k2,k3,0,e);
208 real_t D10 = D(k1,k2,k3,1,e);
209 real_t D20 = D(k1,k2,k3,2,e);
211 real_t D11 = D(k1,k2,k3,3,e);
212 real_t D21 = D(k1,k2,k3,4,e);
215 real_t D22 = D(k1,k2,k3,5,e);
216 val += bbgi * D00 * bbgj
230 A(i1, i2, i3, j1, j2, j3, e) += val;
234 A(i1, i2, i3, j1, j2, j3, e) = val;
255 switch ((dofs1D << 4 ) | quad1D)
257 case 0x22:
return EADiffusionAssemble1D<2,2>(ne,B,G,pa_data,ea_data,
add);
258 case 0x33:
return EADiffusionAssemble1D<3,3>(ne,B,G,pa_data,ea_data,
add);
259 case 0x44:
return EADiffusionAssemble1D<4,4>(ne,B,G,pa_data,ea_data,
add);
260 case 0x55:
return EADiffusionAssemble1D<5,5>(ne,B,G,pa_data,ea_data,
add);
261 case 0x66:
return EADiffusionAssemble1D<6,6>(ne,B,G,pa_data,ea_data,
add);
262 case 0x77:
return EADiffusionAssemble1D<7,7>(ne,B,G,pa_data,ea_data,
add);
263 case 0x88:
return EADiffusionAssemble1D<8,8>(ne,B,G,pa_data,ea_data,
add);
264 case 0x99:
return EADiffusionAssemble1D<9,9>(ne,B,G,pa_data,ea_data,
add);
265 default:
return EADiffusionAssemble1D(ne,B,G,pa_data,ea_data,
add,
271 switch ((dofs1D << 4 ) | quad1D)
273 case 0x22:
return EADiffusionAssemble2D<2,2>(ne,B,G,pa_data,ea_data,
add);
274 case 0x33:
return EADiffusionAssemble2D<3,3>(ne,B,G,pa_data,ea_data,
add);
275 case 0x44:
return EADiffusionAssemble2D<4,4>(ne,B,G,pa_data,ea_data,
add);
276 case 0x55:
return EADiffusionAssemble2D<5,5>(ne,B,G,pa_data,ea_data,
add);
277 case 0x66:
return EADiffusionAssemble2D<6,6>(ne,B,G,pa_data,ea_data,
add);
278 case 0x77:
return EADiffusionAssemble2D<7,7>(ne,B,G,pa_data,ea_data,
add);
279 case 0x88:
return EADiffusionAssemble2D<8,8>(ne,B,G,pa_data,ea_data,
add);
280 case 0x99:
return EADiffusionAssemble2D<9,9>(ne,B,G,pa_data,ea_data,
add);
281 default:
return EADiffusionAssemble2D(ne,B,G,pa_data,ea_data,
add,
287 switch ((dofs1D << 4 ) | quad1D)
289 case 0x23:
return EADiffusionAssemble3D<2,3>(ne,B,G,pa_data,ea_data,
add);
290 case 0x34:
return EADiffusionAssemble3D<3,4>(ne,B,G,pa_data,ea_data,
add);
291 case 0x45:
return EADiffusionAssemble3D<4,5>(ne,B,G,pa_data,ea_data,
add);
292 case 0x56:
return EADiffusionAssemble3D<5,6>(ne,B,G,pa_data,ea_data,
add);
293 case 0x67:
return EADiffusionAssemble3D<6,7>(ne,B,G,pa_data,ea_data,
add);
294 case 0x78:
return EADiffusionAssemble3D<7,8>(ne,B,G,pa_data,ea_data,
add);
295 case 0x89:
return EADiffusionAssemble3D<8,9>(ne,B,G,pa_data,ea_data,
add);
296 default:
return EADiffusionAssemble3D(ne,B,G,pa_data,ea_data,
add,
300 MFEM_ABORT(
"Unknown kernel.");
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
void AssembleEA(const FiniteElementSpace &fes, Vector &emat, const bool add) override
Method defining element assembly.
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Array< real_t > B
Basis functions evaluated at quadrature points.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Mesh * GetMesh() const
Returns the mesh.
int GetNE() const
Returns number of elements.
void add(const Vector &v1, const Vector &v2, Vector &v)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D(int N, int X, int Y, lambda &&body)
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.