MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_convection_ea.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../bilininteg.hpp"
14#include "../gridfunc.hpp"
15
16namespace mfem
17{
18
19template<int T_D1D = 0, int T_Q1D = 0>
20static void EAConvectionAssemble1D(const int NE,
21 const Array<real_t> &b,
22 const Array<real_t> &g,
23 const Vector &padata,
24 Vector &eadata,
25 const bool add,
26 const int d1d = 0,
27 const int q1d = 0)
28{
29 const int D1D = T_D1D ? T_D1D : d1d;
30 const int Q1D = T_Q1D ? T_Q1D : q1d;
31 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
32 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
33 auto B = Reshape(b.Read(), Q1D, D1D);
34 auto G = Reshape(g.Read(), Q1D, D1D);
35 auto D = Reshape(padata.Read(), Q1D, NE);
36 auto A = Reshape(eadata.ReadWrite(), D1D, D1D, NE);
37 mfem::forall_2D(NE, D1D, D1D, [=] MFEM_HOST_DEVICE (int e)
38 {
39 const int D1D = T_D1D ? T_D1D : d1d;
40 const int Q1D = T_Q1D ? T_Q1D : q1d;
41 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
42 real_t r_Gi[MQ1];
43 real_t r_Bj[MQ1];
44 for (int q = 0; q < Q1D; q++)
45 {
46 r_Gi[q] = G(q,MFEM_THREAD_ID(x));
47 r_Bj[q] = B(q,MFEM_THREAD_ID(y));
48 }
49 MFEM_FOREACH_THREAD(i1,x,D1D)
50 {
51 MFEM_FOREACH_THREAD(j1,y,D1D)
52 {
53 real_t val = 0.0;
54 for (int k1 = 0; k1 < Q1D; ++k1)
55 {
56 val += r_Bj[k1] * D(k1, e) * r_Gi[k1];
57 }
58 if (add)
59 {
60 A(i1, j1, e) += val;
61 }
62 else
63 {
64 A(i1, j1, e) = val;
65 }
66 }
67 }
68 });
69}
70
71template<int T_D1D = 0, int T_Q1D = 0>
72static void EAConvectionAssemble2D(const int NE,
73 const Array<real_t> &b,
74 const Array<real_t> &g,
75 const Vector &padata,
76 Vector &eadata,
77 const bool add,
78 const int d1d = 0,
79 const int q1d = 0)
80{
81 const int D1D = T_D1D ? T_D1D : d1d;
82 const int Q1D = T_Q1D ? T_Q1D : q1d;
83 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
84 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
85 auto B = Reshape(b.Read(), Q1D, D1D);
86 auto G = Reshape(g.Read(), Q1D, D1D);
87 auto D = Reshape(padata.Read(), Q1D, Q1D, 2, NE);
88 auto A = Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, NE);
89 mfem::forall_2D(NE, D1D, D1D, [=] MFEM_HOST_DEVICE (int e)
90 {
91 const int D1D = T_D1D ? T_D1D : d1d;
92 const int Q1D = T_Q1D ? T_Q1D : q1d;
93 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
94 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
95 real_t r_B[MQ1][MD1];
96 real_t r_G[MQ1][MD1];
97 for (int d = 0; d < D1D; d++)
98 {
99 for (int q = 0; q < Q1D; q++)
100 {
101 r_B[q][d] = B(q,d);
102 r_G[q][d] = G(q,d);
103 }
104 }
105 MFEM_SHARED real_t s_D[MQ1][MQ1][2];
106 MFEM_FOREACH_THREAD(k1,x,Q1D)
107 {
108 MFEM_FOREACH_THREAD(k2,y,Q1D)
109 {
110 s_D[k1][k2][0] = D(k1,k2,0,e);
111 s_D[k1][k2][1] = D(k1,k2,1,e);
112 }
113 }
114 MFEM_SYNC_THREAD;
115 MFEM_FOREACH_THREAD(i1,x,D1D)
116 {
117 MFEM_FOREACH_THREAD(i2,y,D1D)
118 {
119 for (int j1 = 0; j1 < D1D; ++j1)
120 {
121 for (int j2 = 0; j2 < D1D; ++j2)
122 {
123 real_t val = 0.0;
124 for (int k1 = 0; k1 < Q1D; ++k1)
125 {
126 for (int k2 = 0; k2 < Q1D; ++k2)
127 {
128 val += (r_G[k1][i1] * r_B[k2][i2] * s_D[k1][k2][0]
129 + r_B[k1][i1] * r_G[k2][i2] * s_D[k1][k2][1])
130 * r_B[k1][j1]* r_B[k2][j2];
131 }
132 }
133 if (add)
134 {
135 A(i1, i2, j1, j2, e) += val;
136 }
137 else
138 {
139 A(i1, i2, j1, j2, e) = val;
140 }
141 }
142 }
143 }
144 }
145 });
146}
147
148template<int T_D1D = 0, int T_Q1D = 0>
149static void EAConvectionAssemble3D(const int NE,
150 const Array<real_t> &b,
151 const Array<real_t> &g,
152 const Vector &padata,
153 Vector &eadata,
154 const bool add,
155 const int d1d = 0,
156 const int q1d = 0)
157{
158 const int D1D = T_D1D ? T_D1D : d1d;
159 const int Q1D = T_Q1D ? T_Q1D : q1d;
160 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
161 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
162 auto B = Reshape(b.Read(), Q1D, D1D);
163 auto G = Reshape(g.Read(), Q1D, D1D);
164 auto D = Reshape(padata.Read(), Q1D, Q1D, Q1D, 3, NE);
165 auto A = Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, D1D, D1D, NE);
166 mfem::forall_3D(NE, D1D, D1D, D1D, [=] MFEM_HOST_DEVICE (int e)
167 {
168 const int D1D = T_D1D ? T_D1D : d1d;
169 const int Q1D = T_Q1D ? T_Q1D : q1d;
170 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
171 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
172 real_t r_B[MQ1][MD1];
173 real_t r_G[MQ1][MD1];
174 for (int d = 0; d < D1D; d++)
175 {
176 for (int q = 0; q < Q1D; q++)
177 {
178 r_B[q][d] = B(q,d);
179 r_G[q][d] = G(q,d);
180 }
181 }
182 MFEM_FOREACH_THREAD(i1,x,D1D)
183 {
184 MFEM_FOREACH_THREAD(i2,y,D1D)
185 {
186 MFEM_FOREACH_THREAD(i3,z,D1D)
187 {
188 for (int j1 = 0; j1 < D1D; ++j1)
189 {
190 for (int j2 = 0; j2 < D1D; ++j2)
191 {
192 for (int j3 = 0; j3 < D1D; ++j3)
193 {
194 real_t val = 0.0;
195 for (int k1 = 0; k1 < Q1D; ++k1)
196 {
197 for (int k2 = 0; k2 < Q1D; ++k2)
198 {
199 for (int k3 = 0; k3 < Q1D; ++k3)
200 {
201 real_t D0 = D(k1,k2,k3,0,e);
202 real_t D1 = D(k1,k2,k3,1,e);
203 real_t D2 = D(k1,k2,k3,2,e);
204 val += (r_G[k1][i1] * r_B[k2][i2] * r_B[k3][i3] * D0
205 + r_B[k1][i1] * r_G[k2][i2] * r_B[k3][i3] * D1
206 + r_B[k1][i1] * r_B[k2][i2] * r_G[k3][i3] * D2)
207 * r_B[k1][j1] * r_B[k2][j2] * r_B[k3][j3];
208 }
209 }
210 }
211 if (add)
212 {
213 A(i1, i2, i3, j1, j2, j3, e) += val;
214 }
215 else
216 {
217 A(i1, i2, i3, j1, j2, j3, e) = val;
218 }
219 }
220 }
221 }
222 }
223 }
224 }
225 });
226}
227
229 Vector &ea_data,
230 const bool add)
231{
232 AssemblePA(fes);
233 ne = fes.GetMesh()->GetNE();
234 const Array<real_t> &B = maps->B;
235 const Array<real_t> &G = maps->G;
236 if (dim == 1)
237 {
238 switch ((dofs1D << 4 ) | quad1D)
239 {
240 case 0x22: return EAConvectionAssemble1D<2,2>(ne,B,G,pa_data,ea_data,add);
241 case 0x33: return EAConvectionAssemble1D<3,3>(ne,B,G,pa_data,ea_data,add);
242 case 0x44: return EAConvectionAssemble1D<4,4>(ne,B,G,pa_data,ea_data,add);
243 case 0x55: return EAConvectionAssemble1D<5,5>(ne,B,G,pa_data,ea_data,add);
244 case 0x66: return EAConvectionAssemble1D<6,6>(ne,B,G,pa_data,ea_data,add);
245 case 0x77: return EAConvectionAssemble1D<7,7>(ne,B,G,pa_data,ea_data,add);
246 case 0x88: return EAConvectionAssemble1D<8,8>(ne,B,G,pa_data,ea_data,add);
247 case 0x99: return EAConvectionAssemble1D<9,9>(ne,B,G,pa_data,ea_data,add);
248 default: return EAConvectionAssemble1D(ne,B,G,pa_data,ea_data,add,
249 dofs1D,quad1D);
250 }
251 }
252 else if (dim == 2)
253 {
254 switch ((dofs1D << 4 ) | quad1D)
255 {
256 case 0x22: return EAConvectionAssemble2D<2,2>(ne,B,G,pa_data,ea_data,add);
257 case 0x33: return EAConvectionAssemble2D<3,3>(ne,B,G,pa_data,ea_data,add);
258 case 0x44: return EAConvectionAssemble2D<4,4>(ne,B,G,pa_data,ea_data,add);
259 case 0x55: return EAConvectionAssemble2D<5,5>(ne,B,G,pa_data,ea_data,add);
260 case 0x66: return EAConvectionAssemble2D<6,6>(ne,B,G,pa_data,ea_data,add);
261 case 0x77: return EAConvectionAssemble2D<7,7>(ne,B,G,pa_data,ea_data,add);
262 case 0x88: return EAConvectionAssemble2D<8,8>(ne,B,G,pa_data,ea_data,add);
263 case 0x99: return EAConvectionAssemble2D<9,9>(ne,B,G,pa_data,ea_data,add);
264 default: return EAConvectionAssemble2D(ne,B,G,pa_data,ea_data,add,
265 dofs1D,quad1D);
266 }
267 }
268 else if (dim == 3)
269 {
270 switch ((dofs1D << 4 ) | quad1D)
271 {
272 case 0x23: return EAConvectionAssemble3D<2,3>(ne,B,G,pa_data,ea_data,add);
273 case 0x34: return EAConvectionAssemble3D<3,4>(ne,B,G,pa_data,ea_data,add);
274 case 0x45: return EAConvectionAssemble3D<4,5>(ne,B,G,pa_data,ea_data,add);
275 case 0x56: return EAConvectionAssemble3D<5,6>(ne,B,G,pa_data,ea_data,add);
276 case 0x67: return EAConvectionAssemble3D<6,7>(ne,B,G,pa_data,ea_data,add);
277 case 0x78: return EAConvectionAssemble3D<7,8>(ne,B,G,pa_data,ea_data,add);
278 case 0x89: return EAConvectionAssemble3D<8,9>(ne,B,G,pa_data,ea_data,add);
279 default: return EAConvectionAssemble3D(ne,B,G,pa_data,ea_data,add,
280 dofs1D,quad1D);
281 }
282 }
283 MFEM_ABORT("Unknown kernel.");
284}
285
286}
const DofToQuad * maps
Not owned.
void AssemblePA(const FiniteElementSpace &) override
Method defining partial assembly.
void AssembleEA(const FiniteElementSpace &fes, Vector &emat, const bool add) override
Method defining element assembly.
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:679
int GetNE() const
Returns number of elements.
Definition mesh.hpp:1282
Vector data type.
Definition vector.hpp:82
real_t b
Definition lissajous.cpp:42
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:391
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:762
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128