MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
eval_hdiv.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12// Internal header, included only by .cpp files.
13// Template function implementations.
14
15#ifndef MFEM_QUADINTERP_EVAL_HDIV_HPP
16#define MFEM_QUADINTERP_EVAL_HDIV_HPP
17
22
23namespace mfem
24{
25
26namespace internal
27{
28namespace quadrature_interpolator
29{
30
31// Evaluate values in H(div) on quads: DIM = SDIM = 2
32// For RT(p): D1D = p + 2
33template<QVectorLayout Q_LAYOUT, unsigned FLAGS, int T_D1D = 0, int T_Q1D = 0>
34inline void EvalHDiv2D(const int NE,
35 const real_t *Bo_,
36 const real_t *Bc_,
37 const real_t *J_,
38 const real_t *x_,
39 real_t *y_,
40 const int d1d = 0,
41 const int q1d = 0)
42{
43 static constexpr int DIM = 2;
44
45 const int D1D = T_D1D ? T_D1D : d1d;
46 const int Q1D = T_Q1D ? T_Q1D : q1d;
47 const int M1D = (Q1D > D1D) ? Q1D : D1D;
48
49 const auto bo = Reshape(Bo_, Q1D, D1D-1);
50 const auto bc = Reshape(Bc_, Q1D, D1D);
51 // J is used only when PHYS is true, otherwise J_ can be nullptr.
52 const auto J = Reshape(J_, Q1D, Q1D, DIM, DIM, NE);
53 const auto x = Reshape(x_, D1D*(D1D-1), DIM, NE);
54 auto y = (FLAGS & (QuadratureInterpolator::VALUES |
56 ((Q_LAYOUT == QVectorLayout::byNODES) ?
57 Reshape(y_, Q1D, Q1D, DIM, NE) :
58 Reshape(y_, DIM, Q1D, Q1D, NE)) :
59 Reshape(y_, Q1D, Q1D, 1, NE);
60
61 mfem::forall_3D(NE, M1D, M1D, DIM, [=] MFEM_HOST_DEVICE (int e)
62 {
63 const int tidz = MFEM_THREAD_ID(z);
64
65 const int D1D = T_D1D ? T_D1D : d1d;
66 const int Q1D = T_Q1D ? T_Q1D : q1d;
67 const int M1D = (Q1D > D1D) ? Q1D : D1D;
68
69 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
70 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
71 constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
72
73 MFEM_SHARED real_t smo[MQ1*(MD1-1)];
74 DeviceMatrix Bo(smo, D1D-1, Q1D);
75
76 MFEM_SHARED real_t smc[MQ1*MD1];
77 DeviceMatrix Bc(smc, D1D, Q1D);
78
79 MFEM_SHARED real_t sm0[DIM*MDQ*MDQ];
80 MFEM_SHARED real_t sm1[DIM*MDQ*MDQ];
81 DeviceMatrix X(sm0, D1D*(D1D-1), DIM);
82 DeviceCube QD(sm1, Q1D, D1D, DIM);
83 DeviceCube QQ(sm0, Q1D, Q1D, DIM);
84
85 // Load X, Bo and Bc into shared memory
86 MFEM_FOREACH_THREAD(vd,z,DIM)
87 {
88 MFEM_FOREACH_THREAD(dy,y,D1D)
89 {
90 MFEM_FOREACH_THREAD(qx,x,M1D)
91 {
92 if (qx < D1D && dy < (D1D-1))
93 {
94 X(qx + dy*D1D,vd) = x(qx+dy*D1D,vd,e);
95 }
96 if (tidz == 0 && qx < Q1D)
97 {
98 if (dy < (D1D-1)) { Bo(dy,qx) = bo(qx,dy); }
99 Bc(dy,qx) = bc(qx,dy);
100 }
101 }
102 }
103 }
104 MFEM_SYNC_THREAD;
105 // Apply B operator
106 MFEM_FOREACH_THREAD(vd,z,DIM)
107 {
108 const int nx = (vd == 0) ? D1D : D1D-1;
109 const int ny = (vd == 1) ? D1D : D1D-1;
110 DeviceCube Xxy(X, nx, ny, DIM);
111 DeviceMatrix Bx = (vd == 0) ? Bc : Bo;
112 MFEM_FOREACH_THREAD(dy,y,ny)
113 {
114 MFEM_FOREACH_THREAD(qx,x,Q1D)
115 {
116 real_t dq = 0.0;
117 for (int dx = 0; dx < nx; ++dx)
118 {
119 dq += Xxy(dx,dy,vd) * Bx(dx,qx);
120 }
121 QD(qx,dy,vd) = dq;
122 }
123 }
124 }
125 MFEM_SYNC_THREAD;
126 MFEM_FOREACH_THREAD(vd,z,DIM)
127 {
128 const int ny = (vd == 1) ? D1D : D1D-1;
129 DeviceMatrix By = (vd == 1) ? Bc : Bo;
130 MFEM_FOREACH_THREAD(qy,y,Q1D)
131 {
132 MFEM_FOREACH_THREAD(qx,x,Q1D)
133 {
134 real_t qq = 0.0;
135 for (int dy = 0; dy < ny; ++dy)
136 {
137 qq += QD(qx,dy,vd) * By(dy,qy);
138 }
141 {
142 QQ(qx,qy,vd) = qq;
143 }
144 else if (Q_LAYOUT == QVectorLayout::byNODES)
145 {
146 y(qx,qy,vd,e) = qq;
147 }
148 else // Q_LAYOUT == QVectorLayout::byVDIM
149 {
150 y(vd,qx,qy,e) = qq;
151 }
152 }
153 }
154 }
155 MFEM_SYNC_THREAD;
158 {
159 if (tidz == 0)
160 {
161 MFEM_FOREACH_THREAD(qy,y,Q1D)
162 {
163 MFEM_FOREACH_THREAD(qx,x,Q1D)
164 {
165 real_t u_ref[DIM], u_phys[DIM];
166 real_t J_loc[DIM*DIM];
167 // Piola transformation: u_phys = J/det(J) * u_ref
168 MFEM_UNROLL(DIM)
169 for (int d = 0; d < DIM; d++)
170 {
171 u_ref[d] = QQ(qx,qy,d);
172 MFEM_UNROLL(DIM)
173 for (int sd = 0; sd < DIM; sd++)
174 {
175 J_loc[sd+DIM*d] = J(qx,qy,sd,d,e);
176 }
177 }
178 const real_t detJ = kernels::Det<DIM>(J_loc);
179 kernels::Mult(DIM, DIM, J_loc, u_ref, u_phys);
180 kernels::Set(DIM, 1, 1_r/detJ, u_phys, u_phys);
182 {
183 MFEM_UNROLL(DIM)
184 for (int sd = 0; sd < DIM; sd++)
185 {
186 if (Q_LAYOUT == QVectorLayout::byNODES)
187 {
188 y(qx,qy,sd,e) = u_phys[sd];
189 }
190 else // Q_LAYOUT == QVectorLayout::byVDIM
191 {
192 y(sd,qx,qy,e) = u_phys[sd];
193 }
194 }
195 }
197 {
198 y(qx,qy,0,e) = kernels::Norml2(DIM, u_phys);
199 }
200 }
201 }
202 }
203 MFEM_SYNC_THREAD;
204 }
205 });
206}
207
208// Evaluate values in H(div) on hexes: DIM = SDIM = 3
209// For RT(p): D1D = p + 2
210template<QVectorLayout Q_LAYOUT, unsigned FLAGS, int T_D1D = 0, int T_Q1D = 0>
211inline void EvalHDiv3D(const int NE,
212 const real_t *Bo_,
213 const real_t *Bc_,
214 const real_t *J_,
215 const real_t *x_,
216 real_t *y_,
217 const int d1d = 0,
218 const int q1d = 0)
219{
220 static constexpr int DIM = 3;
221
222 const int D1D = T_D1D ? T_D1D : d1d;
223 const int Q1D = T_Q1D ? T_Q1D : q1d;
224
225 const auto bo = Reshape(Bo_, Q1D, D1D-1);
226 const auto bc = Reshape(Bc_, Q1D, D1D);
227 // J is used only when PHYS is true, otherwise J_ can be nullptr.
228 const auto J = Reshape(J_, Q1D, Q1D, Q1D, DIM, DIM, NE);
229 const auto x = Reshape(x_, D1D*(D1D-1)*(D1D-1), DIM, NE);
230 auto y = (FLAGS & (QuadratureInterpolator::VALUES |
232 ((Q_LAYOUT == QVectorLayout::byNODES) ?
233 Reshape(y_, Q1D, Q1D, Q1D, DIM, NE) :
234 Reshape(y_, DIM, Q1D, Q1D, Q1D, NE)) :
235 Reshape(y_, Q1D, Q1D, Q1D, 1, NE);
236
237 mfem::forall_3D(NE, Q1D, Q1D, DIM, [=] MFEM_HOST_DEVICE (int e)
238 {
239 const int tidz = MFEM_THREAD_ID(z);
240
241 const int D1D = T_D1D ? T_D1D : d1d;
242 const int Q1D = T_Q1D ? T_Q1D : q1d;
243
244 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
245 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
246 constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
247
248 MFEM_SHARED real_t smo[MQ1*(MD1-1)];
249 DeviceMatrix Bo(smo, D1D-1, Q1D);
250
251 MFEM_SHARED real_t smc[MQ1*MD1];
252 DeviceMatrix Bc(smc, D1D, Q1D);
253
254 MFEM_SHARED real_t sm0[DIM*MDQ*MDQ*MDQ];
255 MFEM_SHARED real_t sm1[DIM*MDQ*MDQ*MDQ];
256 DeviceMatrix X(sm0, D1D*(D1D-1)*(D1D-1), DIM);
257 DeviceTensor<4> QDD(sm1, Q1D, D1D, D1D, DIM);
258 DeviceTensor<4> QQD(sm0, Q1D, Q1D, D1D, DIM);
259 DeviceTensor<4> QQQ(sm1, Q1D, Q1D, Q1D, DIM);
260
261 // Load X into shared memory
262 MFEM_FOREACH_THREAD(vd,z,DIM)
263 {
264 MFEM_FOREACH_THREAD(dz,y,D1D-1)
265 {
266 MFEM_FOREACH_THREAD(dy,x,D1D-1)
267 {
268 MFEM_UNROLL(MD1)
269 for (int dx = 0; dx < D1D; ++dx)
270 {
271 X(dx+(dy+dz*(D1D-1))*D1D,vd) = x(dx+(dy+dz*(D1D-1))*D1D,vd,e);
272 }
273 }
274 }
275 }
276 // Load Bo and Bc into shared memory
277 if (tidz == 0)
278 {
279 MFEM_FOREACH_THREAD(d,y,D1D-1)
280 {
281 MFEM_FOREACH_THREAD(q,x,Q1D)
282 {
283 Bo(d,q) = bo(q,d);
284 }
285 }
286 MFEM_FOREACH_THREAD(d,y,D1D)
287 {
288 MFEM_FOREACH_THREAD(q,x,Q1D)
289 {
290 Bc(d,q) = bc(q,d);
291 }
292 }
293 }
294 MFEM_SYNC_THREAD;
295 // Apply B operator
296 MFEM_FOREACH_THREAD(vd,z,DIM)
297 {
298 const int nx = (vd == 0) ? D1D : D1D-1;
299 const int ny = (vd == 1) ? D1D : D1D-1;
300 const int nz = (vd == 2) ? D1D : D1D-1;
301 DeviceTensor<4> Xxyz(X, nx, ny, nz, DIM);
302 DeviceMatrix Bx = (vd == 0) ? Bc : Bo;
303 MFEM_FOREACH_THREAD(dy,y,ny)
304 {
305 MFEM_FOREACH_THREAD(qx,x,Q1D)
306 {
307 real_t u[MD1];
308 MFEM_UNROLL(MD1)
309 for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
310 MFEM_UNROLL(MD1)
311 for (int dx = 0; dx < nx; ++dx)
312 {
313 MFEM_UNROLL(MD1)
314 for (int dz = 0; dz < nz; ++dz)
315 {
316 u[dz] += Xxyz(dx,dy,dz,vd) * Bx(dx,qx);
317 }
318 }
319 MFEM_UNROLL(MD1)
320 for (int dz = 0; dz < nz; ++dz) { QDD(qx,dy,dz,vd) = u[dz]; }
321 }
322 }
323 }
324 MFEM_SYNC_THREAD;
325 MFEM_FOREACH_THREAD(vd,z,DIM)
326 {
327 const int ny = (vd == 1) ? D1D : D1D-1;
328 const int nz = (vd == 2) ? D1D : D1D-1;
329 DeviceMatrix By = (vd == 1) ? Bc : Bo;
330 MFEM_FOREACH_THREAD(qy,y,Q1D)
331 {
332 MFEM_FOREACH_THREAD(qx,x,Q1D)
333 {
334 real_t u[MD1];
335 MFEM_UNROLL(MD1)
336 for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
337 MFEM_UNROLL(MD1)
338 for (int dy = 0; dy < ny; ++dy)
339 {
340 MFEM_UNROLL(MD1)
341 for (int dz = 0; dz < nz; ++dz)
342 {
343 u[dz] += QDD(qx,dy,dz,vd) * By(dy,qy);
344 }
345 }
346 MFEM_UNROLL(MD1)
347 for (int dz = 0; dz < nz; ++dz) { QQD(qx,qy,dz,vd) = u[dz]; }
348 }
349 }
350 }
351 MFEM_SYNC_THREAD;
352 MFEM_FOREACH_THREAD(vd,z,DIM)
353 {
354 const int nz = (vd == 2) ? D1D : D1D-1;
355 DeviceMatrix Bz = (vd == 2) ? Bc : Bo;
356 MFEM_FOREACH_THREAD(qy,y,Q1D)
357 {
358 MFEM_FOREACH_THREAD(qx,x,Q1D)
359 {
360 real_t u[MQ1];
361 MFEM_UNROLL(MQ1)
362 for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
363 MFEM_UNROLL(MD1)
364 for (int dz = 0; dz < nz; ++dz)
365 {
366 MFEM_UNROLL(MQ1)
367 for (int qz = 0; qz < Q1D; ++qz)
368 {
369 u[qz] += QQD(qx,qy,dz,vd) * Bz(dz,qz);
370 }
371 }
372 MFEM_UNROLL(MQ1)
373 for (int qz = 0; qz < Q1D; ++qz)
374 {
377 {
378 QQQ(qx,qy,qz,vd) = u[qz];
379 }
380 else if (Q_LAYOUT == QVectorLayout::byNODES)
381 {
382 y(qx,qy,qz,vd,e) = u[qz];
383 }
384 else // Q_LAYOUT == QVectorLayout::byVDIM
385 {
386 y(vd,qx,qy,qz,e) = u[qz];
387 }
388 }
389 }
390 }
391 }
392 MFEM_SYNC_THREAD;
395 {
396 MFEM_FOREACH_THREAD(qz,z,Q1D)
397 {
398 MFEM_FOREACH_THREAD(qy,y,Q1D)
399 {
400 MFEM_FOREACH_THREAD(qx,x,Q1D)
401 {
402 real_t u_ref[DIM], u_phys[DIM];
403 real_t J_loc[DIM*DIM];
404 // Piola transformation: u_phys = J/det(J) * u_ref
405 MFEM_UNROLL(DIM)
406 for (int d = 0; d < DIM; d++)
407 {
408 u_ref[d] = QQQ(qx,qy,qz,d);
409 MFEM_UNROLL(DIM)
410 for (int sd = 0; sd < DIM; sd++)
411 {
412 J_loc[sd+DIM*d] = J(qx,qy,qz,sd,d,e);
413 }
414 }
415 const real_t detJ = kernels::Det<DIM>(J_loc);
416 kernels::Mult(DIM, DIM, J_loc, u_ref, u_phys);
417 kernels::Set(DIM, 1, 1_r/detJ, u_phys, u_phys);
419 {
420 MFEM_UNROLL(DIM)
421 for (int sd = 0; sd < DIM; sd++)
422 {
423 if (Q_LAYOUT == QVectorLayout::byNODES)
424 {
425 y(qx,qy,qz,sd,e) = u_phys[sd];
426 }
427 else // Q_LAYOUT == QVectorLayout::byVDIM
428 {
429 y(sd,qx,qy,qz,e) = u_phys[sd];
430 }
431 }
432 }
434 {
435 y(qx,qy,qz,0,e) = kernels::Norml2(DIM, u_phys);
436 }
437 }
438 }
439 }
440 MFEM_SYNC_THREAD;
441 }
442 });
443}
444
445} // namespace quadrature_interpolator
446} // namespace internal
447
448/// @cond Suppress_Doxygen_warnings
449
450template<int DIM, QVectorLayout Q_LAYOUT, unsigned FLAGS, int D1D, int Q1D>
452QuadratureInterpolator::TensorEvalHDivKernels::Kernel()
453{
454 using namespace internal::quadrature_interpolator;
455 static_assert(DIM == 2 || DIM == 3, "only DIM=2 and DIM=3 are implemented!");
456 if (DIM == 2) { return EvalHDiv2D<Q_LAYOUT, FLAGS, D1D, Q1D>; }
457 return EvalHDiv3D<Q_LAYOUT, FLAGS, D1D, Q1D>;
458}
459
460/// @endcond
461
462} // namespace mfem
463
464#endif // MFEM_QUADINTERP_EVAL_HDIV_HPP
@ VALUES
Evaluate the values at quadrature points.
void(*)(const int, const real_t *, const real_t *, const real_t *, const real_t *, real_t *, const int, const int) TensorEvalHDivKernelType
constexpr int DIM
mfem::real_t real_t
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data,...
Definition kernels.hpp:160
MFEM_HOST_DEVICE real_t Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition kernels.hpp:130
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
Definition kernels.hpp:386
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition kernels.hpp:297
DeviceTensor< 3, real_t > DeviceCube
Definition dtensor.hpp:153
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:937
DeviceTensor< 2, real_t > DeviceMatrix
Definition dtensor.hpp:150