MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
eval_hdiv.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12// Internal header, included only by .cpp files.
13// Template function implementations.
14
15#ifndef MFEM_QUADINTERP_EVAL_HDIV_HPP
16#define MFEM_QUADINTERP_EVAL_HDIV_HPP
17
22
23namespace mfem
24{
25
26namespace internal
27{
28namespace quadrature_interpolator
29{
30
31// Evaluate values in H(div) on quads: DIM = SDIM = 2
32// For RT(p): D1D = p + 2
33template<QVectorLayout Q_LAYOUT, unsigned FLAGS, int T_D1D = 0, int T_Q1D = 0>
34inline void EvalHDiv2D(const int NE,
35 const real_t *Bo_,
36 const real_t *Bc_,
37 const real_t *J_,
38 const real_t *x_,
39 real_t *y_,
40 const int d1d = 0,
41 const int q1d = 0)
42{
43 static constexpr int DIM = 2;
44
45 const int D1D = T_D1D ? T_D1D : d1d;
46 const int Q1D = T_Q1D ? T_Q1D : q1d;
47
48 const auto bo = Reshape(Bo_, Q1D, D1D-1);
49 const auto bc = Reshape(Bc_, Q1D, D1D);
50 // J is used only when PHYS is true, otherwise J_ can be nullptr.
51 const auto J = Reshape(J_, Q1D, Q1D, DIM, DIM, NE);
52 const auto x = Reshape(x_, D1D*(D1D-1), DIM, NE);
53 auto y = (FLAGS & (QuadratureInterpolator::VALUES |
55 ((Q_LAYOUT == QVectorLayout::byNODES) ?
56 Reshape(y_, Q1D, Q1D, DIM, NE) :
57 Reshape(y_, DIM, Q1D, Q1D, NE)) :
58 Reshape(y_, Q1D, Q1D, 1, NE);
59
60 mfem::forall_3D(NE, Q1D, Q1D, DIM, [=] MFEM_HOST_DEVICE (int e)
61 {
62 const int tidz = MFEM_THREAD_ID(z);
63
64 const int D1D = T_D1D ? T_D1D : d1d;
65 const int Q1D = T_Q1D ? T_Q1D : q1d;
66
67 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
68 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
69 constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
70
71 MFEM_SHARED real_t smo[MQ1*(MD1-1)];
72 DeviceMatrix Bo(smo, D1D-1, Q1D);
73
74 MFEM_SHARED real_t smc[MQ1*MD1];
75 DeviceMatrix Bc(smc, D1D, Q1D);
76
77 MFEM_SHARED real_t sm0[DIM*MDQ*MDQ];
78 MFEM_SHARED real_t sm1[DIM*MDQ*MDQ];
79 DeviceMatrix X(sm0, D1D*(D1D-1), DIM);
80 DeviceCube QD(sm1, Q1D, D1D, DIM);
81 DeviceCube QQ(sm0, Q1D, Q1D, DIM);
82
83 // Load X, Bo and Bc into shared memory
84 MFEM_FOREACH_THREAD(vd,z,DIM)
85 {
86 MFEM_FOREACH_THREAD(dy,y,D1D)
87 {
88 MFEM_FOREACH_THREAD(qx,x,Q1D)
89 {
90 if (qx < D1D && dy < (D1D-1))
91 {
92 X(qx + dy*D1D,vd) = x(qx+dy*D1D,vd,e);
93 }
94 if (tidz == 0)
95 {
96 if (dy < (D1D-1)) { Bo(dy,qx) = bo(qx,dy); }
97 Bc(dy,qx) = bc(qx,dy);
98 }
99 }
100 }
101 }
102 MFEM_SYNC_THREAD;
103 // Apply B operator
104 MFEM_FOREACH_THREAD(vd,z,DIM)
105 {
106 const int nx = (vd == 0) ? D1D : D1D-1;
107 const int ny = (vd == 1) ? D1D : D1D-1;
108 DeviceCube Xxy(X, nx, ny, DIM);
109 DeviceMatrix Bx = (vd == 0) ? Bc : Bo;
110 MFEM_FOREACH_THREAD(dy,y,ny)
111 {
112 MFEM_FOREACH_THREAD(qx,x,Q1D)
113 {
114 real_t dq = 0.0;
115 for (int dx = 0; dx < nx; ++dx)
116 {
117 dq += Xxy(dx,dy,vd) * Bx(dx,qx);
118 }
119 QD(qx,dy,vd) = dq;
120 }
121 }
122 }
123 MFEM_SYNC_THREAD;
124 MFEM_FOREACH_THREAD(vd,z,DIM)
125 {
126 const int ny = (vd == 1) ? D1D : D1D-1;
127 DeviceMatrix By = (vd == 1) ? Bc : Bo;
128 MFEM_FOREACH_THREAD(qy,y,Q1D)
129 {
130 MFEM_FOREACH_THREAD(qx,x,Q1D)
131 {
132 real_t qq = 0.0;
133 for (int dy = 0; dy < ny; ++dy)
134 {
135 qq += QD(qx,dy,vd) * By(dy,qy);
136 }
139 {
140 QQ(qx,qy,vd) = qq;
141 }
142 else if (Q_LAYOUT == QVectorLayout::byNODES)
143 {
144 y(qx,qy,vd,e) = qq;
145 }
146 else // Q_LAYOUT == QVectorLayout::byVDIM
147 {
148 y(vd,qx,qy,e) = qq;
149 }
150 }
151 }
152 }
153 MFEM_SYNC_THREAD;
156 {
157 if (tidz == 0)
158 {
159 MFEM_FOREACH_THREAD(qy,y,Q1D)
160 {
161 MFEM_FOREACH_THREAD(qx,x,Q1D)
162 {
163 real_t u_ref[DIM], u_phys[DIM];
164 real_t J_loc[DIM*DIM];
165 // Piola transformation: u_phys = J/det(J) * u_ref
166 MFEM_UNROLL(DIM)
167 for (int d = 0; d < DIM; d++)
168 {
169 u_ref[d] = QQ(qx,qy,d);
170 MFEM_UNROLL(DIM)
171 for (int sd = 0; sd < DIM; sd++)
172 {
173 J_loc[sd+DIM*d] = J(qx,qy,sd,d,e);
174 }
175 }
176 const real_t detJ = kernels::Det<DIM>(J_loc);
177 kernels::Mult(DIM, DIM, J_loc, u_ref, u_phys);
178 kernels::Set(DIM, 1, 1_r/detJ, u_phys, u_phys);
180 {
181 MFEM_UNROLL(DIM)
182 for (int sd = 0; sd < DIM; sd++)
183 {
184 if (Q_LAYOUT == QVectorLayout::byNODES)
185 {
186 y(qx,qy,sd,e) = u_phys[sd];
187 }
188 else // Q_LAYOUT == QVectorLayout::byVDIM
189 {
190 y(sd,qx,qy,e) = u_phys[sd];
191 }
192 }
193 }
195 {
196 y(qx,qy,0,e) = kernels::Norml2(DIM, u_phys);
197 }
198 }
199 }
200 }
201 MFEM_SYNC_THREAD;
202 }
203 });
204}
205
206// Evaluate values in H(div) on hexes: DIM = SDIM = 3
207// For RT(p): D1D = p + 2
208template<QVectorLayout Q_LAYOUT, unsigned FLAGS, int T_D1D = 0, int T_Q1D = 0>
209inline void EvalHDiv3D(const int NE,
210 const real_t *Bo_,
211 const real_t *Bc_,
212 const real_t *J_,
213 const real_t *x_,
214 real_t *y_,
215 const int d1d = 0,
216 const int q1d = 0)
217{
218 static constexpr int DIM = 3;
219
220 const int D1D = T_D1D ? T_D1D : d1d;
221 const int Q1D = T_Q1D ? T_Q1D : q1d;
222
223 const auto bo = Reshape(Bo_, Q1D, D1D-1);
224 const auto bc = Reshape(Bc_, Q1D, D1D);
225 // J is used only when PHYS is true, otherwise J_ can be nullptr.
226 const auto J = Reshape(J_, Q1D, Q1D, Q1D, DIM, DIM, NE);
227 const auto x = Reshape(x_, D1D*(D1D-1)*(D1D-1), DIM, NE);
228 auto y = (FLAGS & (QuadratureInterpolator::VALUES |
230 ((Q_LAYOUT == QVectorLayout::byNODES) ?
231 Reshape(y_, Q1D, Q1D, Q1D, DIM, NE) :
232 Reshape(y_, DIM, Q1D, Q1D, Q1D, NE)) :
233 Reshape(y_, Q1D, Q1D, Q1D, 1, NE);
234
235 mfem::forall_3D(NE, Q1D, Q1D, DIM, [=] MFEM_HOST_DEVICE (int e)
236 {
237 const int tidz = MFEM_THREAD_ID(z);
238
239 const int D1D = T_D1D ? T_D1D : d1d;
240 const int Q1D = T_Q1D ? T_Q1D : q1d;
241
242 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
243 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
244 constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
245
246 MFEM_SHARED real_t smo[MQ1*(MD1-1)];
247 DeviceMatrix Bo(smo, D1D-1, Q1D);
248
249 MFEM_SHARED real_t smc[MQ1*MD1];
250 DeviceMatrix Bc(smc, D1D, Q1D);
251
252 MFEM_SHARED real_t sm0[DIM*MDQ*MDQ*MDQ];
253 MFEM_SHARED real_t sm1[DIM*MDQ*MDQ*MDQ];
254 DeviceMatrix X(sm0, D1D*(D1D-1)*(D1D-1), DIM);
255 DeviceTensor<4> QDD(sm1, Q1D, D1D, D1D, DIM);
256 DeviceTensor<4> QQD(sm0, Q1D, Q1D, D1D, DIM);
257 DeviceTensor<4> QQQ(sm1, Q1D, Q1D, Q1D, DIM);
258
259 // Load X into shared memory
260 MFEM_FOREACH_THREAD(vd,z,DIM)
261 {
262 MFEM_FOREACH_THREAD(dz,y,D1D-1)
263 {
264 MFEM_FOREACH_THREAD(dy,x,D1D-1)
265 {
266 MFEM_UNROLL(MD1)
267 for (int dx = 0; dx < D1D; ++dx)
268 {
269 X(dx+(dy+dz*(D1D-1))*D1D,vd) = x(dx+(dy+dz*(D1D-1))*D1D,vd,e);
270 }
271 }
272 }
273 }
274 // Load Bo and Bc into shared memory
275 if (tidz == 0)
276 {
277 MFEM_FOREACH_THREAD(d,y,D1D-1)
278 {
279 MFEM_FOREACH_THREAD(q,x,Q1D)
280 {
281 Bo(d,q) = bo(q,d);
282 }
283 }
284 MFEM_FOREACH_THREAD(d,y,D1D)
285 {
286 MFEM_FOREACH_THREAD(q,x,Q1D)
287 {
288 Bc(d,q) = bc(q,d);
289 }
290 }
291 }
292 MFEM_SYNC_THREAD;
293 // Apply B operator
294 MFEM_FOREACH_THREAD(vd,z,DIM)
295 {
296 const int nx = (vd == 0) ? D1D : D1D-1;
297 const int ny = (vd == 1) ? D1D : D1D-1;
298 const int nz = (vd == 2) ? D1D : D1D-1;
299 DeviceTensor<4> Xxyz(X, nx, ny, nz, DIM);
300 DeviceMatrix Bx = (vd == 0) ? Bc : Bo;
301 MFEM_FOREACH_THREAD(dy,y,ny)
302 {
303 MFEM_FOREACH_THREAD(qx,x,Q1D)
304 {
305 real_t u[MD1];
306 MFEM_UNROLL(MD1)
307 for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
308 MFEM_UNROLL(MD1)
309 for (int dx = 0; dx < nx; ++dx)
310 {
311 MFEM_UNROLL(MD1)
312 for (int dz = 0; dz < nz; ++dz)
313 {
314 u[dz] += Xxyz(dx,dy,dz,vd) * Bx(dx,qx);
315 }
316 }
317 MFEM_UNROLL(MD1)
318 for (int dz = 0; dz < nz; ++dz) { QDD(qx,dy,dz,vd) = u[dz]; }
319 }
320 }
321 }
322 MFEM_SYNC_THREAD;
323 MFEM_FOREACH_THREAD(vd,z,DIM)
324 {
325 const int ny = (vd == 1) ? D1D : D1D-1;
326 const int nz = (vd == 2) ? D1D : D1D-1;
327 DeviceMatrix By = (vd == 1) ? Bc : Bo;
328 MFEM_FOREACH_THREAD(qy,y,Q1D)
329 {
330 MFEM_FOREACH_THREAD(qx,x,Q1D)
331 {
332 real_t u[MD1];
333 MFEM_UNROLL(MD1)
334 for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
335 MFEM_UNROLL(MD1)
336 for (int dy = 0; dy < ny; ++dy)
337 {
338 MFEM_UNROLL(MD1)
339 for (int dz = 0; dz < nz; ++dz)
340 {
341 u[dz] += QDD(qx,dy,dz,vd) * By(dy,qy);
342 }
343 }
344 MFEM_UNROLL(MD1)
345 for (int dz = 0; dz < nz; ++dz) { QQD(qx,qy,dz,vd) = u[dz]; }
346 }
347 }
348 }
349 MFEM_SYNC_THREAD;
350 MFEM_FOREACH_THREAD(vd,z,DIM)
351 {
352 const int nz = (vd == 2) ? D1D : D1D-1;
353 DeviceMatrix Bz = (vd == 2) ? Bc : Bo;
354 MFEM_FOREACH_THREAD(qy,y,Q1D)
355 {
356 MFEM_FOREACH_THREAD(qx,x,Q1D)
357 {
358 real_t u[MQ1];
359 MFEM_UNROLL(MQ1)
360 for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
361 MFEM_UNROLL(MD1)
362 for (int dz = 0; dz < nz; ++dz)
363 {
364 MFEM_UNROLL(MQ1)
365 for (int qz = 0; qz < Q1D; ++qz)
366 {
367 u[qz] += QQD(qx,qy,dz,vd) * Bz(dz,qz);
368 }
369 }
370 MFEM_UNROLL(MQ1)
371 for (int qz = 0; qz < Q1D; ++qz)
372 {
375 {
376 QQQ(qx,qy,qz,vd) = u[qz];
377 }
378 else if (Q_LAYOUT == QVectorLayout::byNODES)
379 {
380 y(qx,qy,qz,vd,e) = u[qz];
381 }
382 else // Q_LAYOUT == QVectorLayout::byVDIM
383 {
384 y(vd,qx,qy,qz,e) = u[qz];
385 }
386 }
387 }
388 }
389 }
390 MFEM_SYNC_THREAD;
393 {
394 MFEM_FOREACH_THREAD(qz,z,Q1D)
395 {
396 MFEM_FOREACH_THREAD(qy,y,Q1D)
397 {
398 MFEM_FOREACH_THREAD(qx,x,Q1D)
399 {
400 real_t u_ref[DIM], u_phys[DIM];
401 real_t J_loc[DIM*DIM];
402 // Piola transformation: u_phys = J/det(J) * u_ref
403 MFEM_UNROLL(DIM)
404 for (int d = 0; d < DIM; d++)
405 {
406 u_ref[d] = QQQ(qx,qy,qz,d);
407 MFEM_UNROLL(DIM)
408 for (int sd = 0; sd < DIM; sd++)
409 {
410 J_loc[sd+DIM*d] = J(qx,qy,qz,sd,d,e);
411 }
412 }
413 const real_t detJ = kernels::Det<DIM>(J_loc);
414 kernels::Mult(DIM, DIM, J_loc, u_ref, u_phys);
415 kernels::Set(DIM, 1, 1_r/detJ, u_phys, u_phys);
417 {
418 MFEM_UNROLL(DIM)
419 for (int sd = 0; sd < DIM; sd++)
420 {
421 if (Q_LAYOUT == QVectorLayout::byNODES)
422 {
423 y(qx,qy,qz,sd,e) = u_phys[sd];
424 }
425 else // Q_LAYOUT == QVectorLayout::byVDIM
426 {
427 y(sd,qx,qy,qz,e) = u_phys[sd];
428 }
429 }
430 }
432 {
433 y(qx,qy,qz,0,e) = kernels::Norml2(DIM, u_phys);
434 }
435 }
436 }
437 }
438 MFEM_SYNC_THREAD;
439 }
440 });
441}
442
443} // namespace quadrature_interpolator
444} // namespace internal
445
446/// @cond Suppress_Doxygen_warnings
447
448template<int DIM, QVectorLayout Q_LAYOUT, unsigned FLAGS, int D1D, int Q1D>
450QuadratureInterpolator::TensorEvalHDivKernels::Kernel()
451{
452 using namespace internal::quadrature_interpolator;
453 static_assert(DIM == 2 || DIM == 3, "only DIM=2 and DIM=3 are implemented!");
454 if (DIM == 2) { return EvalHDiv2D<Q_LAYOUT, FLAGS, D1D, Q1D>; }
455 return EvalHDiv3D<Q_LAYOUT, FLAGS, D1D, Q1D>;
456}
457
458/// @endcond
459
460} // namespace mfem
461
462#endif // MFEM_QUADINTERP_EVAL_HDIV_HPP
@ VALUES
Evaluate the values at quadrature points.
void(*)(const int, const real_t *, const real_t *, const real_t *, const real_t *, real_t *, const int, const int) TensorEvalHDivKernelType
constexpr int DIM
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data,...
Definition kernels.hpp:163
MFEM_HOST_DEVICE real_t Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition kernels.hpp:133
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
Definition kernels.hpp:326
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition kernels.hpp:237
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
DeviceTensor< 2, real_t > DeviceMatrix
Definition dtensor.hpp:143
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
DeviceTensor< 3, real_t > DeviceCube
Definition dtensor.hpp:146