MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
kernel_helpers.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_ELASTICITY_KERNEL_HELPERS_HPP
13#define MFEM_ELASTICITY_KERNEL_HELPERS_HPP
14
15#include "mfem.hpp"
16#include "linalg/tensor.hpp"
17
18namespace mfem
19{
20
21namespace KernelHelpers
22{
23
25
26// MFEM_SHARED_3D_BLOCK_TENSOR definition
27// Should be moved in backends/cuda/hip header files.
28#if defined(__CUDA_ARCH__)
29#define MFEM_SHARED_3D_BLOCK_TENSOR(name,T,bx,by,bz,...)\
30MFEM_SHARED tensor<T,bx,by,bz,__VA_ARGS__> name;\
31name(threadIdx.x, threadIdx.y, threadIdx.z) = tensor<T,__VA_ARGS__> {};
32#else
33#define MFEM_SHARED_3D_BLOCK_TENSOR(name,...) tensor<__VA_ARGS__> name {};
34#endif
35
36// Kernel helper functions
37
38/**
39 * @brief Runtime check for memory restrictions that are determined at compile
40 * time.
41 *
42 * @param d1d Number of degrees of freedom in 1D.
43 * @param q1d Number of quadrature points in 1D.
44 */
45inline void CheckMemoryRestriction(int d1d, int q1d)
46{
47 MFEM_VERIFY(d1d <= q1d,
48 "There should be more or equal quadrature points "
49 "as there are dofs");
50 MFEM_VERIFY(d1d <= DeviceDofQuadLimits::Get().MAX_D1D,
51 "Maximum number of degrees of freedom in 1D reached."
52 "This number can be increased globally in general/forall.hpp if "
53 "device memory allows.");
54 MFEM_VERIFY(q1d <= DeviceDofQuadLimits::Get().MAX_Q1D,
55 "Maximum quadrature points 1D reached."
56 "This number can be increased globally in "
57 "general/forall.hpp if device memory allows.");
58}
59
60/**
61 * @brief Multi-component gradient evaluation from DOFs to quadrature points in
62 * reference coordinates.
63 *
64 * The implementation exploits sum factorization.
65 *
66 * @note DeviceTensor<2> means RANK=2
67 *
68 * @tparam dim Dimension.
69 * @tparam d1d Number of degrees of freedom in 1D.
70 * @tparam q1d er of quadrature points in 1D.
71 * @param B Basis functions evaluated at quadrature points in column-major
72 * layout q1d x d1d.
73 * @param G Gradients of basis functions evaluated at quadrature points in
74 * column major layout q1d x d1d.
75 * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
76 * q1d x q1d x q1d.
77 * @param U Input vector d1d x d1d x d1d x dim.
78 * @param dUdxi Gradient of the input vector wrt to reference coordinates. Size
79 * needed q1d x q1d x q1d x dim x dim.
80 */
81template <int dim, int d1d, int q1d>
82static inline MFEM_HOST_DEVICE void
83CalcGrad(const tensor<real_t, q1d, d1d> &B,
84 const tensor<real_t, q1d, d1d> &G,
85 tensor<real_t,2,3,q1d,q1d,q1d> &smem,
87 tensor<real_t, q1d, q1d, q1d, dim, dim> &dUdxi)
88{
89 for (int c = 0; c < dim; ++c)
90 {
91 MFEM_FOREACH_THREAD(dz,z,d1d)
92 {
93 MFEM_FOREACH_THREAD(dy,y,d1d)
94 {
95 MFEM_FOREACH_THREAD(dx,x,d1d)
96 {
97 smem(0,0,dx,dy,dz) = U(dx,dy,dz,c);
98 }
99 }
100 }
101 MFEM_SYNC_THREAD;
102 MFEM_FOREACH_THREAD(dz,z,d1d)
103 {
104 MFEM_FOREACH_THREAD(dy,y,d1d)
105 {
106 MFEM_FOREACH_THREAD(qx,x,q1d)
107 {
108 real_t u = 0.0;
109 real_t v = 0.0;
110 for (int dx = 0; dx < d1d; ++dx)
111 {
112 const real_t input = smem(0,0,dx,dy,dz);
113 u += input * B(qx,dx);
114 v += input * G(qx,dx);
115 }
116 smem(0,1,dz,dy,qx) = u;
117 smem(0,2,dz,dy,qx) = v;
118 }
119 }
120 }
121 MFEM_SYNC_THREAD;
122 MFEM_FOREACH_THREAD(dz,z,d1d)
123 {
124 MFEM_FOREACH_THREAD(qy,y,q1d)
125 {
126 MFEM_FOREACH_THREAD(qx,x,q1d)
127 {
128 real_t u = 0.0;
129 real_t v = 0.0;
130 real_t w = 0.0;
131 for (int dy = 0; dy < d1d; ++dy)
132 {
133 u += smem(0,2,dz,dy,qx) * B(qy,dy);
134 v += smem(0,1,dz,dy,qx) * G(qy,dy);
135 w += smem(0,1,dz,dy,qx) * B(qy,dy);
136 }
137 smem(1,0,dz,qy,qx) = u;
138 smem(1,1,dz,qy,qx) = v;
139 smem(1,2,dz,qy,qx) = w;
140 }
141 }
142 }
143 MFEM_SYNC_THREAD;
144 MFEM_FOREACH_THREAD(qz,z,q1d)
145 {
146 MFEM_FOREACH_THREAD(qy,y,q1d)
147 {
148 MFEM_FOREACH_THREAD(qx,x,q1d)
149 {
150 real_t u = 0.0;
151 real_t v = 0.0;
152 real_t w = 0.0;
153 for (int dz = 0; dz < d1d; ++dz)
154 {
155 u += smem(1,0,dz,qy,qx) * B(qz,dz);
156 v += smem(1,1,dz,qy,qx) * B(qz,dz);
157 w += smem(1,2,dz,qy,qx) * G(qz,dz);
158 }
159 dUdxi(qz,qy,qx,c,0) += u;
160 dUdxi(qz,qy,qx,c,1) += v;
161 dUdxi(qz,qy,qx,c,2) += w;
162 }
163 }
164 }
165 MFEM_SYNC_THREAD;
166 } // vdim
167}
168
169/**
170 * @brief Multi-component transpose gradient evaluation from DOFs to quadrature
171 * points in reference coordinates with contraction of the D vector.
172 *
173 * @tparam dim Dimension.
174 * @tparam d1d Number of degrees of freedom in 1D.
175 * @tparam q1d er of quadrature points in 1D.
176 * @param B Basis functions evaluated at quadrature points in column-major
177 * layout q1d x d1d.
178 * @param G Gradients of basis functions evaluated at quadrature points in
179 * column major layout q1d x d1d.
180 * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
181 * q1d x q1d x q1d.
182 * @param U Input vector q1d x q1d x q1d x dim.
183 * @param F Output vector that applied the gradient evaluation from DOFs to
184 * quadrature points in reference coordinates with contraction of the D operator
185 * on the input vector. Size is d1d x d1d x d1d x dim.
186 */
187template <int dim, int d1d, int q1d>
188static inline MFEM_HOST_DEVICE void
189CalcGradTSum(const tensor<real_t, q1d, d1d> &B,
190 const tensor<real_t, q1d, d1d> &G,
191 tensor<real_t, 2, 3, q1d, q1d, q1d> &smem,
192 const tensor<real_t, q1d, q1d, q1d, dim, dim> &U,
193 DeviceTensor<4, real_t> &F)
194{
195 for (int c = 0; c < dim; ++c)
196 {
197 MFEM_FOREACH_THREAD(qz, z, q1d)
198 {
199 MFEM_FOREACH_THREAD(qy, y, q1d)
200 {
201 MFEM_FOREACH_THREAD(dx, x, d1d)
202 {
203 real_t u = 0.0, v = 0.0, w = 0.0;
204 for (int qx = 0; qx < q1d; ++qx)
205 {
206 u += U(qx, qy, qz, 0, c) * G(qx, dx);
207 v += U(qx, qy, qz, 1, c) * B(qx, dx);
208 w += U(qx, qy, qz, 2, c) * B(qx, dx);
209 }
210 smem(0, 0, qz, qy, dx) = u;
211 smem(0, 1, qz, qy, dx) = v;
212 smem(0, 2, qz, qy, dx) = w;
213 }
214 }
215 }
216 MFEM_SYNC_THREAD;
217 MFEM_FOREACH_THREAD(qz, z, q1d)
218 {
219 MFEM_FOREACH_THREAD(dy, y, d1d)
220 {
221 MFEM_FOREACH_THREAD(dx, x, d1d)
222 {
223 real_t u = 0.0, v = 0.0, w = 0.0;
224 for (int qy = 0; qy < q1d; ++qy)
225 {
226 u += smem(0, 0, qz, qy, dx) * B(qy, dy);
227 v += smem(0, 1, qz, qy, dx) * G(qy, dy);
228 w += smem(0, 2, qz, qy, dx) * B(qy, dy);
229 }
230 smem(1, 0, qz, dy, dx) = u;
231 smem(1, 1, qz, dy, dx) = v;
232 smem(1, 2, qz, dy, dx) = w;
233 }
234 }
235 }
236 MFEM_SYNC_THREAD;
237 MFEM_FOREACH_THREAD(dz, z, d1d)
238 {
239 MFEM_FOREACH_THREAD(dy, y, d1d)
240 {
241 MFEM_FOREACH_THREAD(dx, x, d1d)
242 {
243 real_t u = 0.0, v = 0.0, w = 0.0;
244 for (int qz = 0; qz < q1d; ++qz)
245 {
246 u += smem(1, 0, qz, dy, dx) * B(qz, dz);
247 v += smem(1, 1, qz, dy, dx) * B(qz, dz);
248 w += smem(1, 2, qz, dy, dx) * G(qz, dz);
249 }
250 const real_t sum = u + v + w;
251 F(dx, dy, dz, c) += sum;
252 }
253 }
254 }
255 MFEM_SYNC_THREAD;
256 }
257}
258
259/**
260 * @brief Compute the gradient of all shape functions.
261 *
262 * @note TODO: Does not make use of shared memory on the GPU.
263 *
264 * @tparam dim Dimension.
265 * @tparam d1d Number of degrees of freedom in 1D.
266 * @tparam q1d er of quadrature points in 1D.
267 * @param qx Quadrature point x index.
268 * @param qy Quadrature point y index.
269 * @param qz Quadrature point z index.
270 * @param B Basis functions evaluated at quadrature points in column-major
271 * layout q1d x d1d.
272 * @param G Gradients of basis functions evaluated at quadrature points in
273 * column major layout q1d x d1d.
274 * @param invJ Inverse of the Jacobian at the quadrature point. Size is dim x
275 * dim.
276 *
277 * @return Gradient of all shape functions at the quadrature point. Size is d1d
278 * x d1d x d1d x dim.
279 */
280template <int dim, int d1d, int q1d>
281static inline MFEM_HOST_DEVICE tensor<real_t, d1d, d1d, d1d, dim>
282GradAllShapeFunctions(int qx, int qy, int qz,
283 const tensor<real_t, q1d, d1d> &B,
284 const tensor<real_t, q1d, d1d> &G,
285 const tensor<real_t, dim, dim> &invJ)
286{
287 tensor<real_t, d1d, d1d, d1d, dim> dphi_dx;
288 // G (x) B (x) B
289 // B (x) G (x) B
290 // B (x) B (x) G
291 for (int dx = 0; dx < d1d; dx++)
292 {
293 for (int dy = 0; dy < d1d; dy++)
294 {
295 for (int dz = 0; dz < d1d; dz++)
296 {
297 dphi_dx(dx,dy,dz) =
298 transpose(invJ) *
299 tensor<real_t, dim> {G(qx, dx) * B(qy, dy) * B(qz, dz),
300 B(qx, dx) * G(qy, dy) * B(qz, dz),
301 B(qx, dx) * B(qy, dy) * G(qz, dz)
302 };
303 }
304 }
305 }
306 return dphi_dx;
307}
308
309} // namespace KernelHelpers
310
311} // namespace mfem
312
313#endif
A basic generic Tensor class, appropriate for use on the GPU.
Definition dtensor.hpp:84
int dim
Definition ex24.cpp:53
mfem::real_t real_t
void CheckMemoryRestriction(int d1d, int q1d)
Runtime check for memory restrictions that are determined at compile time.
MFEM_HOST_DEVICE tensor< T, n, m > transpose(const tensor< T, m, n > &A)
Returns the transpose of the matrix.
Definition tensor.hpp:1388
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128
Implementation of the tensor class.