MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_hdiv_ea.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../bilininteg.hpp"
14#include "../gridfunc.hpp"
15
16namespace mfem
17{
18
19// For H(div) mass, Bo and Bc are the basis evaluation operators, and the
20// pa_data corresponds to a (potentially symmetric) matrix coefficient.
21// coeff_dim must be 3 or 4 depending on symmetry.
22//
23// For div-div, Bc is the derivative evaluation operator, and pa_data
24// corresponds to a scalar coefficient. coeff_dim must be 1.
25//
26// These two integrators are distinguished using coeff_dim.
27template<int T_D1D = 0, int T_Q1D = 0>
28static void EAHdivAssemble2D(const int NE,
29 const Array<real_t> &Bo_,
30 const Array<real_t> &Bc_,
31 const int coeff_dim,
32 const Vector &pa_data,
33 Vector &ea_data,
34 const bool add,
35 const int d1d = 0,
36 const int q1d = 0)
37{
38 const int D1D = T_D1D ? T_D1D : d1d;
39 const int Q1D = T_Q1D ? T_Q1D : q1d;
40 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().HDIV_MAX_D1D, "");
41 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().HDIV_MAX_Q1D, "");
42 const int NDOF = 2*(D1D-1)*D1D;
43 const auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
44 const auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
45 const auto D = Reshape(pa_data.Read(), Q1D, Q1D, coeff_dim, NE);
46 const bool symmetric = (coeff_dim == 3);
47 auto M = Reshape(add ? ea_data.ReadWrite() : ea_data.Write(), NDOF, NDOF, NE);
48 mfem::forall_2D(NE, NDOF, 1, [=] MFEM_HOST_DEVICE (int e)
49 {
50 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
51 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
52 // Load Bo and Bc matrices into registers
53 real_t r_Bo[MQ1][MD1];
54 real_t r_Bc[MQ1][MD1];
55 for (int d = 0; d < D1D; d++)
56 {
57 for (int q = 0; q < Q1D; q++)
58 {
59 if (d < D1D - 1) { r_Bo[q][d] = Bo(q,d); }
60 r_Bc[q][d] = Bc(q,d);
61 }
62 }
63 // Store PA data in shared memory
64 MFEM_SHARED real_t s_D[4][MQ1][MQ1];
65 MFEM_FOREACH_THREAD(idx_q, x, Q1D*Q1D)
66 {
67 const int qx = idx_q % Q1D;
68 const int qy = idx_q / Q1D;
69 if (coeff_dim == 1)
70 {
71 const real_t val = D(qx, qy, 0, e);
72 for (int i = 0; i < 4; ++i) { s_D[i][qx][qy] = val; }
73 }
74 else
75 {
76 s_D[0][qx][qy] = D(qx, qy, 0, e);
77 s_D[1][qx][qy] = D(qx, qy, 1, e);
78 s_D[2][qx][qy] = (symmetric) ? s_D[1][qx][qy] : D(qx, qy, 2, e);
79 s_D[3][qx][qy] = (symmetric) ? D(qx, qy, 2, e) : D(qx, qy, 3, e);
80 }
81 }
82 MFEM_SYNC_THREAD;
83 // Assemble (one row per thread)
84 MFEM_FOREACH_THREAD(idx_i, x, NDOF)
85 {
86 const int ic = idx_i / D1D / (D1D-1);
87 const int idx_ii = idx_i % (D1D * (D1D-1));
88 const int ix = (ic == 0) ? idx_ii%D1D : idx_ii%(D1D-1);
89 const int iy = (ic == 0) ? idx_ii/D1D : idx_ii/(D1D-1);
90
91 const real_t (&Bi1)[MQ1][MD1] = (ic == 0) ? r_Bc : r_Bo;
92 const real_t (&Bi2)[MQ1][MD1] = (ic == 0) ? r_Bo : r_Bc;
93
94 for (int idx_j = 0; idx_j < NDOF; ++idx_j)
95 {
96 const int jc = idx_j / (D1D*(D1D-1));
97 const int idx_jj = idx_j % (D1D * (D1D-1));
98 const int jx = (jc == 0) ? idx_jj%D1D : idx_jj%(D1D-1);
99 const int jy = (jc == 0) ? idx_jj/D1D : idx_jj/(D1D-1);
100
101 const real_t (&Bj1)[MQ1][MD1] = (jc == 0) ? r_Bc : r_Bo;
102 const real_t (&Bj2)[MQ1][MD1] = (jc == 0) ? r_Bo : r_Bc;
103
104 real_t val = 0.0;
105 for (int qx = 0; qx < Q1D; ++qx)
106 {
107 for (int qy = 0; qy < Q1D; ++qy)
108 {
109 const double coeff = s_D[ic + jc*2][qx][qy];
110 val += coeff*Bi1[qx][ix]*Bi2[qy][iy]*Bj1[qx][jx]*Bj2[qy][jy];
111 }
112 }
113 if (add)
114 {
115 M(idx_i, idx_j, e) += val;
116 }
117 else
118 {
119 M(idx_i, idx_j, e) = val;
120 }
121 }
122 }
123 });
124}
125
126// For H(div) mass, Bo and Bc are the basis evaluation operators, and the
127// pa_data corresponds to a (potentially symmetric) matrix coefficient.
128// coeff_dim must be 6 or 9 depending on symmetry.
129//
130// For div-div, Bc is the derivative evaluation operator, and pa_data
131// corresponds to a scalar coefficient. coeff_dim must be 1.
132//
133// These two integrators are distinguished using coeff_dim.
134template<int T_D1D = 0, int T_Q1D = 0>
135static void EAHdivAssemble3D(const int NE,
136 const Array<real_t> &Bo_,
137 const Array<real_t> &Bc_,
138 const int coeff_dim,
139 const Vector &pa_data,
140 Vector &ea_data,
141 const bool add,
142 const int d1d = 0,
143 const int q1d = 0)
144{
145 const int D1D = T_D1D ? T_D1D : d1d;
146 const int Q1D = T_Q1D ? T_Q1D : q1d;
147 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().HDIV_MAX_D1D, "");
148 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().HDIV_MAX_Q1D, "");
149 const int NDOF_C = (D1D-1)*(D1D-1)*D1D;
150 const int NDOF = 3*NDOF_C;
151 const auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
152 const auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
153 const auto D = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, coeff_dim, NE);
154 const bool symmetric = (coeff_dim == 6);
155 auto M = Reshape(add ? ea_data.ReadWrite() : ea_data.Write(), NDOF, NDOF, NE);
156 mfem::forall_2D(NE, NDOF, 1, [=] MFEM_HOST_DEVICE (int e)
157 {
158 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
159 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
160 // Load Bo and Bc matrices into registers
161 real_t r_Bo[MQ1][MD1];
162 real_t r_Bc[MQ1][MD1];
163 for (int d = 0; d < D1D; d++)
164 {
165 for (int q = 0; q < Q1D; q++)
166 {
167 if (d < D1D - 1) { r_Bo[q][d] = Bo(q,d); }
168 r_Bc[q][d] = Bc(q,d);
169 }
170 }
171 // Store PA data in shared memory
172 MFEM_SHARED real_t s_D[9][MQ1][MQ1][MQ1];
173 MFEM_FOREACH_THREAD(idx_q, x, Q1D*Q1D*Q1D)
174 {
175 const int qx = idx_q % Q1D;
176 const int qy = (idx_q / Q1D) % Q1D;
177 const int qz = (idx_q / Q1D) / Q1D;
178 if (coeff_dim == 1)
179 {
180 const real_t val = D(qx,qy,qz,0,e);
181 for (int i = 0; i < 9; ++i) { s_D[i][qx][qy][qz] = val; }
182 }
183 else
184 {
185 s_D[0][qx][qy][qz] = D(qx,qy,qz,0,e);
186 s_D[1][qx][qy][qz] = D(qx,qy,qz,1,e);
187 s_D[2][qx][qy][qz] = D(qx,qy,qz,2,e);
188 s_D[3][qx][qy][qz] = symmetric ? s_D[1][qx][qy][qz] : D(qx,qy,qz,3,e);
189 s_D[4][qx][qy][qz] = symmetric ? D(qx,qy,qz,3,e) : D(qx,qy,qz,4,e);
190 s_D[5][qx][qy][qz] = symmetric ? D(qx,qy,qz,4,e) : D(qx,qy,qz,5,e);
191 s_D[6][qx][qy][qz] = symmetric ? s_D[2][qx][qy][qz] : D(qx,qy,qz,6,e);
192 s_D[7][qx][qy][qz] = symmetric ? s_D[5][qx][qy][qz] : D(qx,qy,qz,7,e);
193 s_D[8][qx][qy][qz] = symmetric ? D(qx,qy,qz,5,e) : D(qx,qy,qz,8,e);
194 }
195 }
196 MFEM_SYNC_THREAD;
197 // Assemble (one row per thread)
198 MFEM_FOREACH_THREAD(idx_i, x, NDOF)
199 {
200 const int ic = idx_i / NDOF_C;
201 const int idx_ii = idx_i % NDOF_C;
202
203 const int nx_i = (ic == 0) ? D1D : D1D-1;
204 const int ny_i = (ic == 1) ? D1D : D1D-1;
205
206 const int ix = idx_ii % nx_i;
207 const int iy = (idx_ii / nx_i) % ny_i;
208 const int iz = (idx_ii / nx_i) / ny_i;
209
210 const real_t (&Bi1)[MQ1][MD1] = (ic == 0) ? r_Bc : r_Bo;
211 const real_t (&Bi2)[MQ1][MD1] = (ic == 1) ? r_Bc : r_Bo;
212 const real_t (&Bi3)[MQ1][MD1] = (ic == 2) ? r_Bc : r_Bo;
213
214 for (int idx_j = 0; idx_j < NDOF; ++idx_j)
215 {
216 const int jc = idx_j / NDOF_C;
217 const int idx_jj = idx_j % NDOF_C;
218
219 const int nx_j = (jc == 0) ? D1D : D1D-1;
220 const int ny_j = (jc == 1) ? D1D : D1D-1;
221
222 const int jx = idx_jj % nx_j;
223 const int jy = (idx_jj / nx_j) % ny_j;
224 const int jz = (idx_jj / nx_j) / ny_j;
225
226 const real_t (&Bj1)[MQ1][MD1] = (jc == 0) ? r_Bc : r_Bo;
227 const real_t (&Bj2)[MQ1][MD1] = (jc == 1) ? r_Bc : r_Bo;
228 const real_t (&Bj3)[MQ1][MD1] = (jc == 2) ? r_Bc : r_Bo;
229
230 real_t val = 0.0;
231 for (int qx = 0; qx < Q1D; ++qx)
232 {
233 for (int qy = 0; qy < Q1D; ++qy)
234 {
235 for (int qz = 0; qz < Q1D; ++qz)
236 {
237 const double coeff = s_D[ic + jc*3][qx][qy][qz];
238 val += coeff*Bi1[qx][ix]*Bi2[qy][iy]*Bi3[qz][iz]*
239 Bj1[qx][jx]*Bj2[qy][jy]*Bj3[qz][jz];
240 }
241 }
242 }
243 if (add)
244 {
245 M(idx_i, idx_j, e) += val;
246 }
247 else
248 {
249 M(idx_i, idx_j, e) = val;
250 }
251 }
252 }
253 });
254}
255
257 Vector &ea_data,
258 const bool add)
259{
260 AssemblePA(fes);
261
264 {
265 MFEM_ABORT("Unsupported kernel.");
266 }
267
268 const Array<real_t> &Bo = mapsO->B;
269 const Array<real_t> &Bc = mapsC->B;
270
271 if (dim == 2)
272 {
273 const int coeff_dim = symmetric ? 3 : 4;
274 auto kernel = EAHdivAssemble2D<0,0>;
275 switch ((dofs1D << 4 ) | quad1D)
276 {
277 case 0x22: kernel = EAHdivAssemble2D<2,2>; break;
278 case 0x33: kernel = EAHdivAssemble2D<3,3>; break;
279 case 0x44: kernel = EAHdivAssemble2D<4,4>; break;
280 case 0x55: kernel = EAHdivAssemble2D<5,5>; break;
281 }
282 return kernel(ne,Bo,Bc,coeff_dim,pa_data,ea_data,add,dofs1D,quad1D);
283 }
284 else if (dim == 3)
285 {
286 const int coeff_dim = symmetric ? 6 : 9;
287 auto kernel = EAHdivAssemble3D<0,0>;
288 switch ((dofs1D << 4 ) | quad1D)
289 {
290 case 0x23: kernel = EAHdivAssemble3D<2,3>; break;
291 case 0x34: kernel = EAHdivAssemble3D<3,4>; break;
292 case 0x45: kernel = EAHdivAssemble3D<4,5>; break;
293 case 0x56: kernel = EAHdivAssemble3D<5,6>; break;
294 }
295 return kernel(ne,Bo,Bc,coeff_dim,pa_data,ea_data,add,dofs1D,quad1D);
296 }
297 MFEM_ABORT("Unknown kernel.");
298}
299
301 Vector &ea_data,
302 const bool add)
303{
304 AssemblePA(fes);
305
306 const Array<real_t> &Bo = mapsO->B;
307 const Array<real_t> &Gc = mapsC->G;
308
309 if (dim == 2)
310 {
311 auto kernel = EAHdivAssemble2D<0,0>;
312 switch ((dofs1D << 4 ) | quad1D)
313 {
314 case 0x22: kernel = EAHdivAssemble2D<2,2>; break;
315 case 0x33: kernel = EAHdivAssemble2D<3,3>; break;
316 case 0x44: kernel = EAHdivAssemble2D<4,4>; break;
317 case 0x55: kernel = EAHdivAssemble2D<5,5>; break;
318 }
319 return kernel(ne,Bo,Gc,1,pa_data,ea_data,add,dofs1D,quad1D);
320 }
321 else if (dim == 3)
322 {
323 auto kernel = EAHdivAssemble3D<0,0>;
324 switch ((dofs1D << 4 ) | quad1D)
325 {
326 case 0x23: kernel = EAHdivAssemble3D<2,3>; break;
327 case 0x34: kernel = EAHdivAssemble3D<3,4>; break;
328 case 0x45: kernel = EAHdivAssemble3D<4,5>; break;
329 case 0x56: kernel = EAHdivAssemble3D<5,6>; break;
330 }
331 return kernel(ne,Bo,Gc,1,pa_data,ea_data,add,dofs1D,quad1D);
332 }
333 MFEM_ABORT("Unknown kernel.");
334}
335
336}
void AssembleEA(const FiniteElementSpace &fes, Vector &emat, const bool add) override
Method defining element assembly.
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
@ DIV
Implements CalcDivShape methods.
Definition fe_base.hpp:306
const DofToQuad * mapsO
Not owned. DOF-to-quad map, open.
void AssembleEA(const FiniteElementSpace &fes, Vector &emat, const bool add) override
Method defining element assembly.
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
bool symmetric
False if using a nonsymmetric matrix coefficient.
const DofToQuad * mapsC
Not owned. DOF-to-quad map, closed.
Vector data type.
Definition vector.hpp:82
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:358
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:391
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:762
float real_t
Definition config.hpp:43
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128