MFEM v4.8.0
Finite element discretization library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_diffusion_ea.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../bilininteg.hpp"
14#include "../gridfunc.hpp"
15
16namespace mfem
17{
18
19template<int T_D1D = 0, int T_Q1D = 0>
20static void EADiffusionAssemble1D(const int NE,
21 const Array<real_t> &b,
22 const Array<real_t> &g,
23 const Vector &padata,
24 Vector &eadata,
25 const bool add,
26 const int d1d = 0,
27 const int q1d = 0)
28{
29 const int D1D = T_D1D ? T_D1D : d1d;
30 const int Q1D = T_Q1D ? T_Q1D : q1d;
31 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
32 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
33 auto G = Reshape(g.Read(), Q1D, D1D);
34 auto D = Reshape(padata.Read(), Q1D, NE);
35 auto A = Reshape(eadata.ReadWrite(), D1D, D1D, NE);
36 mfem::forall_2D(NE, D1D, D1D, [=] MFEM_HOST_DEVICE (int e)
37 {
38 const int D1D = T_D1D ? T_D1D : d1d;
39 const int Q1D = T_Q1D ? T_Q1D : q1d;
40 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
41 real_t r_Gi[MQ1];
42 real_t r_Gj[MQ1];
43 for (int q = 0; q < Q1D; q++)
44 {
45 r_Gi[q] = G(q,MFEM_THREAD_ID(x));
46 r_Gj[q] = G(q,MFEM_THREAD_ID(y));
47 }
48 MFEM_FOREACH_THREAD(i1,x,D1D)
49 {
50 MFEM_FOREACH_THREAD(j1,y,D1D)
51 {
52 real_t val = 0.0;
53 for (int k1 = 0; k1 < Q1D; ++k1)
54 {
55 val += r_Gj[k1] * D(k1, e) * r_Gi[k1];
56 }
57 if (add)
58 {
59 A(i1, j1, e) += val;
60 }
61 else
62 {
63 A(i1, j1, e) = val;
64 }
65 }
66 }
67 });
68}
69
70template<int T_D1D = 0, int T_Q1D = 0>
71static void EADiffusionAssemble2D(const int NE,
72 const Array<real_t> &b,
73 const Array<real_t> &g,
74 const Vector &padata,
75 Vector &eadata,
76 const bool add,
77 const int d1d = 0,
78 const int q1d = 0)
79{
80 const int D1D = T_D1D ? T_D1D : d1d;
81 const int Q1D = T_Q1D ? T_Q1D : q1d;
82 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
83 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
84 auto B = Reshape(b.Read(), Q1D, D1D);
85 auto G = Reshape(g.Read(), Q1D, D1D);
86 auto D = Reshape(padata.Read(), Q1D, Q1D, 3, NE);
87 auto A = Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, NE);
88 mfem::forall_2D(NE, D1D, D1D, [=] MFEM_HOST_DEVICE (int e)
89 {
90 const int D1D = T_D1D ? T_D1D : d1d;
91 const int Q1D = T_Q1D ? T_Q1D : q1d;
92 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
93 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
94 real_t r_B[MQ1][MD1];
95 real_t r_G[MQ1][MD1];
96 for (int d = 0; d < D1D; d++)
97 {
98 for (int q = 0; q < Q1D; q++)
99 {
100 r_B[q][d] = B(q,d);
101 r_G[q][d] = G(q,d);
102 }
103 }
104 MFEM_SYNC_THREAD;
105 MFEM_FOREACH_THREAD(i1,x,D1D)
106 {
107 MFEM_FOREACH_THREAD(i2,y,D1D)
108 {
109 for (int j1 = 0; j1 < D1D; ++j1)
110 {
111 for (int j2 = 0; j2 < D1D; ++j2)
112 {
113 real_t val = 0.0;
114 for (int k1 = 0; k1 < Q1D; ++k1)
115 {
116 for (int k2 = 0; k2 < Q1D; ++k2)
117 {
118 real_t bgi = r_G[k1][i1] * r_B[k2][i2];
119 real_t gbi = r_B[k1][i1] * r_G[k2][i2];
120 real_t bgj = r_G[k1][j1] * r_B[k2][j2];
121 real_t gbj = r_B[k1][j1] * r_G[k2][j2];
122 real_t D00 = D(k1,k2,0,e);
123 real_t D10 = D(k1,k2,1,e);
124 real_t D01 = D10;
125 real_t D11 = D(k1,k2,2,e);
126 val += bgi * D00 * bgj
127 + gbi * D01 * bgj
128 + bgi * D10 * gbj
129 + gbi * D11 * gbj;
130 }
131 }
132 if (add)
133 {
134 A(i1, i2, j1, j2, e) += val;
135 }
136 else
137 {
138 A(i1, i2, j1, j2, e) = val;
139 }
140 }
141 }
142 }
143 }
144 });
145}
146
147template<int T_D1D = 0, int T_Q1D = 0>
148static void EADiffusionAssemble3D(const int NE,
149 const Array<real_t> &b,
150 const Array<real_t> &g,
151 const Vector &padata,
152 Vector &eadata,
153 const bool add,
154 const int d1d = 0,
155 const int q1d = 0)
156{
157 const int D1D = T_D1D ? T_D1D : d1d;
158 const int Q1D = T_Q1D ? T_Q1D : q1d;
159 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
160 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
161 auto B = Reshape(b.Read(), Q1D, D1D);
162 auto G = Reshape(g.Read(), Q1D, D1D);
163 auto D = Reshape(padata.Read(), Q1D, Q1D, Q1D, 6, NE);
164 auto A = Reshape(eadata.ReadWrite(), D1D, D1D, D1D, D1D, D1D, D1D, NE);
165 mfem::forall_3D(NE, D1D, D1D, D1D, [=] MFEM_HOST_DEVICE (int e)
166 {
167 const int D1D = T_D1D ? T_D1D : d1d;
168 const int Q1D = T_Q1D ? T_Q1D : q1d;
169 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
170 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
171 real_t r_B[MQ1][MD1];
172 real_t r_G[MQ1][MD1];
173 for (int d = 0; d < D1D; d++)
174 {
175 for (int q = 0; q < Q1D; q++)
176 {
177 r_B[q][d] = B(q,d);
178 r_G[q][d] = G(q,d);
179 }
180 }
181 MFEM_SYNC_THREAD;
182 MFEM_FOREACH_THREAD(i1,x,D1D)
183 {
184 MFEM_FOREACH_THREAD(i2,y,D1D)
185 {
186 MFEM_FOREACH_THREAD(i3,z,D1D)
187 {
188 for (int j1 = 0; j1 < D1D; ++j1)
189 {
190 for (int j2 = 0; j2 < D1D; ++j2)
191 {
192 for (int j3 = 0; j3 < D1D; ++j3)
193 {
194 real_t val = 0.0;
195 for (int k1 = 0; k1 < Q1D; ++k1)
196 {
197 for (int k2 = 0; k2 < Q1D; ++k2)
198 {
199 for (int k3 = 0; k3 < Q1D; ++k3)
200 {
201 real_t bbgi = r_G[k1][i1] * r_B[k2][i2] * r_B[k3][i3];
202 real_t bgbi = r_B[k1][i1] * r_G[k2][i2] * r_B[k3][i3];
203 real_t gbbi = r_B[k1][i1] * r_B[k2][i2] * r_G[k3][i3];
204 real_t bbgj = r_G[k1][j1] * r_B[k2][j2] * r_B[k3][j3];
205 real_t bgbj = r_B[k1][j1] * r_G[k2][j2] * r_B[k3][j3];
206 real_t gbbj = r_B[k1][j1] * r_B[k2][j2] * r_G[k3][j3];
207 real_t D00 = D(k1,k2,k3,0,e);
208 real_t D10 = D(k1,k2,k3,1,e);
209 real_t D20 = D(k1,k2,k3,2,e);
210 real_t D01 = D10;
211 real_t D11 = D(k1,k2,k3,3,e);
212 real_t D21 = D(k1,k2,k3,4,e);
213 real_t D02 = D20;
214 real_t D12 = D21;
215 real_t D22 = D(k1,k2,k3,5,e);
216 val += bbgi * D00 * bbgj
217 + bgbi * D10 * bbgj
218 + gbbi * D20 * bbgj
219 + bbgi * D01 * bgbj
220 + bgbi * D11 * bgbj
221 + gbbi * D21 * bgbj
222 + bbgi * D02 * gbbj
223 + bgbi * D12 * gbbj
224 + gbbi * D22 * gbbj;
225 }
226 }
227 }
228 if (add)
229 {
230 A(i1, i2, i3, j1, j2, j3, e) += val;
231 }
232 else
233 {
234 A(i1, i2, i3, j1, j2, j3, e) = val;
235 }
236 }
237 }
238 }
239 }
240 }
241 }
242 });
243}
244
246 Vector &ea_data,
247 const bool add)
248{
249 AssemblePA(fes);
250 ne = fes.GetMesh()->GetNE();
251 const Array<real_t> &B = maps->B;
252 const Array<real_t> &G = maps->G;
253 if (dim == 1)
254 {
255 switch ((dofs1D << 4 ) | quad1D)
256 {
257 case 0x22: return EADiffusionAssemble1D<2,2>(ne,B,G,pa_data,ea_data,add);
258 case 0x33: return EADiffusionAssemble1D<3,3>(ne,B,G,pa_data,ea_data,add);
259 case 0x44: return EADiffusionAssemble1D<4,4>(ne,B,G,pa_data,ea_data,add);
260 case 0x55: return EADiffusionAssemble1D<5,5>(ne,B,G,pa_data,ea_data,add);
261 case 0x66: return EADiffusionAssemble1D<6,6>(ne,B,G,pa_data,ea_data,add);
262 case 0x77: return EADiffusionAssemble1D<7,7>(ne,B,G,pa_data,ea_data,add);
263 case 0x88: return EADiffusionAssemble1D<8,8>(ne,B,G,pa_data,ea_data,add);
264 case 0x99: return EADiffusionAssemble1D<9,9>(ne,B,G,pa_data,ea_data,add);
265 default: return EADiffusionAssemble1D(ne,B,G,pa_data,ea_data,add,
266 dofs1D,quad1D);
267 }
268 }
269 else if (dim == 2)
270 {
271 switch ((dofs1D << 4 ) | quad1D)
272 {
273 case 0x22: return EADiffusionAssemble2D<2,2>(ne,B,G,pa_data,ea_data,add);
274 case 0x33: return EADiffusionAssemble2D<3,3>(ne,B,G,pa_data,ea_data,add);
275 case 0x44: return EADiffusionAssemble2D<4,4>(ne,B,G,pa_data,ea_data,add);
276 case 0x55: return EADiffusionAssemble2D<5,5>(ne,B,G,pa_data,ea_data,add);
277 case 0x66: return EADiffusionAssemble2D<6,6>(ne,B,G,pa_data,ea_data,add);
278 case 0x77: return EADiffusionAssemble2D<7,7>(ne,B,G,pa_data,ea_data,add);
279 case 0x88: return EADiffusionAssemble2D<8,8>(ne,B,G,pa_data,ea_data,add);
280 case 0x99: return EADiffusionAssemble2D<9,9>(ne,B,G,pa_data,ea_data,add);
281 default: return EADiffusionAssemble2D(ne,B,G,pa_data,ea_data,add,
282 dofs1D,quad1D);
283 }
284 }
285 else if (dim == 3)
286 {
287 switch ((dofs1D << 4 ) | quad1D)
288 {
289 case 0x23: return EADiffusionAssemble3D<2,3>(ne,B,G,pa_data,ea_data,add);
290 case 0x34: return EADiffusionAssemble3D<3,4>(ne,B,G,pa_data,ea_data,add);
291 case 0x45: return EADiffusionAssemble3D<4,5>(ne,B,G,pa_data,ea_data,add);
292 case 0x56: return EADiffusionAssemble3D<5,6>(ne,B,G,pa_data,ea_data,add);
293 case 0x67: return EADiffusionAssemble3D<6,7>(ne,B,G,pa_data,ea_data,add);
294 case 0x78: return EADiffusionAssemble3D<7,8>(ne,B,G,pa_data,ea_data,add);
295 case 0x89: return EADiffusionAssemble3D<8,9>(ne,B,G,pa_data,ea_data,add);
296 default: return EADiffusionAssemble3D(ne,B,G,pa_data,ea_data,add,
297 dofs1D,quad1D);
298 }
299 }
300 MFEM_ABORT("Unknown kernel.");
301}
302
303}
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
void AssembleEA(const FiniteElementSpace &fes, Vector &emat, const bool add) override
Method defining element assembly.
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:679
int GetNE() const
Returns number of elements.
Definition mesh.hpp:1282
Vector data type.
Definition vector.hpp:82
real_t b
Definition lissajous.cpp:42
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:391
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:762
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128