12 #include "../general/forall.hpp"
19 template<
int T_D1D = 0,
int T_Q1D = 0>
20 static void EADiffusionAssemble1D(
const int NE,
21 const Array<double> &
b,
22 const Array<double> &g,
29 const int D1D = T_D1D ? T_D1D : d1d;
30 const int Q1D = T_Q1D ? T_Q1D : q1d;
31 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
32 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
33 auto G =
Reshape(g.Read(), Q1D, D1D);
34 auto D =
Reshape(padata.Read(), Q1D, NE);
35 auto A =
Reshape(eadata.ReadWrite(), D1D, D1D, NE);
36 MFEM_FORALL_3D(e, NE, D1D, D1D, 1,
38 const int D1D = T_D1D ? T_D1D : d1d;
39 const int Q1D = T_Q1D ? T_Q1D : q1d;
40 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
43 for (
int q = 0; q < Q1D; q++)
45 r_Gi[q] = G(q,MFEM_THREAD_ID(x));
46 r_Gj[q] = G(q,MFEM_THREAD_ID(y));
48 MFEM_FOREACH_THREAD(i1,x,D1D)
50 MFEM_FOREACH_THREAD(j1,y,D1D)
53 for (
int k1 = 0; k1 < Q1D; ++k1)
55 val += r_Gj[k1] * D(k1, e) * r_Gi[k1];
70 template<
int T_D1D = 0,
int T_Q1D = 0>
71 static void EADiffusionAssemble2D(
const int NE,
72 const Array<double> &b,
73 const Array<double> &g,
80 const int D1D = T_D1D ? T_D1D : d1d;
81 const int Q1D = T_Q1D ? T_Q1D : q1d;
82 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
83 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
84 auto B =
Reshape(b.Read(), Q1D, D1D);
85 auto G =
Reshape(g.Read(), Q1D, D1D);
86 auto D =
Reshape(padata.Read(), Q1D, Q1D, 3, NE);
87 auto A =
Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, NE);
88 MFEM_FORALL_3D(e, NE, D1D, D1D, 1,
90 const int D1D = T_D1D ? T_D1D : d1d;
91 const int Q1D = T_Q1D ? T_Q1D : q1d;
92 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
93 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
96 for (
int d = 0; d < D1D; d++)
98 for (
int q = 0; q < Q1D; q++)
105 MFEM_FOREACH_THREAD(i1,x,D1D)
107 MFEM_FOREACH_THREAD(i2,y,D1D)
109 for (
int j1 = 0; j1 < D1D; ++j1)
111 for (
int j2 = 0; j2 < D1D; ++j2)
114 for (
int k1 = 0; k1 < Q1D; ++k1)
116 for (
int k2 = 0; k2 < Q1D; ++k2)
118 double bgi = r_G[k1][i1] * r_B[k2][i2];
119 double gbi = r_B[k1][i1] * r_G[k2][i2];
120 double bgj = r_G[k1][j1] * r_B[k2][j2];
121 double gbj = r_B[k1][j1] * r_G[k2][j2];
122 double D00 = D(k1,k2,0,e);
123 double D10 = D(k1,k2,1,e);
125 double D11 = D(k1,k2,2,e);
126 val += bgi * D00 * bgj
134 A(i1, i2, j1, j2, e) += val;
138 A(i1, i2, j1, j2, e) = val;
147 template<
int T_D1D = 0,
int T_Q1D = 0>
148 static void EADiffusionAssemble3D(
const int NE,
149 const Array<double> &b,
150 const Array<double> &g,
151 const Vector &padata,
157 const int D1D = T_D1D ? T_D1D : d1d;
158 const int Q1D = T_Q1D ? T_Q1D : q1d;
159 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
160 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
161 auto B =
Reshape(b.Read(), Q1D, D1D);
162 auto G =
Reshape(g.Read(), Q1D, D1D);
163 auto D =
Reshape(padata.Read(), Q1D, Q1D, Q1D, 6, NE);
164 auto A =
Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, D1D, D1D, NE);
165 MFEM_FORALL_3D(e, NE, D1D, D1D, D1D,
167 const int D1D = T_D1D ? T_D1D : d1d;
168 const int Q1D = T_Q1D ? T_Q1D : q1d;
169 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
170 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
171 double r_B[MQ1][MD1];
172 double r_G[MQ1][MD1];
173 for (
int d = 0; d < D1D; d++)
175 for (
int q = 0; q < Q1D; q++)
182 MFEM_FOREACH_THREAD(i1,x,D1D)
184 MFEM_FOREACH_THREAD(i2,y,D1D)
186 MFEM_FOREACH_THREAD(i3,z,D1D)
188 for (
int j1 = 0; j1 < D1D; ++j1)
190 for (
int j2 = 0; j2 < D1D; ++j2)
192 for (
int j3 = 0; j3 < D1D; ++j3)
195 for (
int k1 = 0; k1 < Q1D; ++k1)
197 for (
int k2 = 0; k2 < Q1D; ++k2)
199 for (
int k3 = 0; k3 < Q1D; ++k3)
201 double bbgi = r_G[k1][i1] * r_B[k2][i2] * r_B[k3][i3];
202 double bgbi = r_B[k1][i1] * r_G[k2][i2] * r_B[k3][i3];
203 double gbbi = r_B[k1][i1] * r_B[k2][i2] * r_G[k3][i3];
204 double bbgj = r_G[k1][j1] * r_B[k2][j2] * r_B[k3][j3];
205 double bgbj = r_B[k1][j1] * r_G[k2][j2] * r_B[k3][j3];
206 double gbbj = r_B[k1][j1] * r_B[k2][j2] * r_G[k3][j3];
207 double D00 = D(k1,k2,k3,0,e);
208 double D10 = D(k1,k2,k3,1,e);
209 double D20 = D(k1,k2,k3,2,e);
211 double D11 = D(k1,k2,k3,3,e);
212 double D21 = D(k1,k2,k3,4,e);
215 double D22 = D(k1,k2,k3,5,e);
216 val += bbgi * D00 * bbgj
230 A(i1, i2, i3, j1, j2, j3, e) += val;
234 A(i1, i2, i3, j1, j2, j3, e) = val;
255 switch ((dofs1D << 4 ) | quad1D)
257 case 0x22:
return EADiffusionAssemble1D<2,2>(ne,B,G,pa_data,ea_data,
add);
258 case 0x33:
return EADiffusionAssemble1D<3,3>(ne,B,G,pa_data,ea_data,
add);
259 case 0x44:
return EADiffusionAssemble1D<4,4>(ne,B,G,pa_data,ea_data,
add);
260 case 0x55:
return EADiffusionAssemble1D<5,5>(ne,B,G,pa_data,ea_data,
add);
261 case 0x66:
return EADiffusionAssemble1D<6,6>(ne,B,G,pa_data,ea_data,
add);
262 case 0x77:
return EADiffusionAssemble1D<7,7>(ne,B,G,pa_data,ea_data,
add);
263 case 0x88:
return EADiffusionAssemble1D<8,8>(ne,B,G,pa_data,ea_data,
add);
264 case 0x99:
return EADiffusionAssemble1D<9,9>(ne,B,G,pa_data,ea_data,
add);
265 default:
return EADiffusionAssemble1D(ne,B,G,pa_data,ea_data,add,
271 switch ((dofs1D << 4 ) | quad1D)
273 case 0x22:
return EADiffusionAssemble2D<2,2>(ne,B,G,pa_data,ea_data,
add);
274 case 0x33:
return EADiffusionAssemble2D<3,3>(ne,B,G,pa_data,ea_data,
add);
275 case 0x44:
return EADiffusionAssemble2D<4,4>(ne,B,G,pa_data,ea_data,
add);
276 case 0x55:
return EADiffusionAssemble2D<5,5>(ne,B,G,pa_data,ea_data,
add);
277 case 0x66:
return EADiffusionAssemble2D<6,6>(ne,B,G,pa_data,ea_data,
add);
278 case 0x77:
return EADiffusionAssemble2D<7,7>(ne,B,G,pa_data,ea_data,
add);
279 case 0x88:
return EADiffusionAssemble2D<8,8>(ne,B,G,pa_data,ea_data,
add);
280 case 0x99:
return EADiffusionAssemble2D<9,9>(ne,B,G,pa_data,ea_data,
add);
281 default:
return EADiffusionAssemble2D(ne,B,G,pa_data,ea_data,add,
287 switch ((dofs1D << 4 ) | quad1D)
289 case 0x23:
return EADiffusionAssemble3D<2,3>(ne,B,G,pa_data,ea_data,
add);
290 case 0x34:
return EADiffusionAssemble3D<3,4>(ne,B,G,pa_data,ea_data,
add);
291 case 0x45:
return EADiffusionAssemble3D<4,5>(ne,B,G,pa_data,ea_data,
add);
292 case 0x56:
return EADiffusionAssemble3D<5,6>(ne,B,G,pa_data,ea_data,
add);
293 case 0x67:
return EADiffusionAssemble3D<6,7>(ne,B,G,pa_data,ea_data,
add);
294 case 0x78:
return EADiffusionAssemble3D<7,8>(ne,B,G,pa_data,ea_data,
add);
295 case 0x89:
return EADiffusionAssemble3D<8,9>(ne,B,G,pa_data,ea_data,
add);
296 default:
return EADiffusionAssemble3D(ne,B,G,pa_data,ea_data,add,
300 MFEM_ABORT(
"Unknown kernel.");
int GetNE() const
Returns number of elements.
void add(const Vector &v1, const Vector &v2, Vector &v)
Mesh * GetMesh() const
Returns the mesh.
virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat, const bool add)
Method defining element assembly.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Array< double > B
Basis functions evaluated at quadrature points.
virtual void AssemblePA(const FiniteElementSpace &fes)
Method defining partial assembly.
Array< double > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.