MFEM  v4.6.0
Finite element discretization library
kernel_helpers.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_ELASTICITY_KERNEL_HELPERS_HPP
13 #define MFEM_ELASTICITY_KERNEL_HELPERS_HPP
14 
15 #include "mfem.hpp"
16 #include "general/forall.hpp"
17 #include "linalg/tensor.hpp"
18 
19 using mfem::internal::tensor;
20 
21 namespace mfem
22 {
23 
24 namespace KernelHelpers
25 {
26 
27 // MFEM_SHARED_3D_BLOCK_TENSOR definition
28 // Should be moved in backends/cuda/hip header files.
29 #if defined(__CUDA_ARCH__)
30 #define MFEM_SHARED_3D_BLOCK_TENSOR(name,T,bx,by,bz,...)\
31 MFEM_SHARED tensor<T,bx,by,bz,__VA_ARGS__> name;\
32 name(threadIdx.x, threadIdx.y, threadIdx.z) = tensor<T,__VA_ARGS__> {};
33 #else
34 #define MFEM_SHARED_3D_BLOCK_TENSOR(name,...) tensor<__VA_ARGS__> name {};
35 #endif
36 
37 // Kernel helper functions
38 
39 /**
40  * @brief Runtime check for memory restrictions that are determined at compile
41  * time.
42  *
43  * @param d1d Number of degrees of freedom in 1D.
44  * @param q1d Number of quadrature points in 1D.
45  */
46 inline void CheckMemoryRestriction(int d1d, int q1d)
47 {
48  MFEM_VERIFY(d1d <= q1d,
49  "There should be more or equal quadrature points "
50  "as there are dofs");
51  MFEM_VERIFY(d1d <= DeviceDofQuadLimits::Get().MAX_D1D,
52  "Maximum number of degrees of freedom in 1D reached."
53  "This number can be increased globally in general/forall.hpp if "
54  "device memory allows.");
55  MFEM_VERIFY(q1d <= DeviceDofQuadLimits::Get().MAX_Q1D,
56  "Maximum quadrature points 1D reached."
57  "This number can be increased globally in "
58  "general/forall.hpp if device memory allows.");
59 }
60 
61 /**
62  * @brief Multi-component gradient evaluation from DOFs to quadrature points in
63  * reference coordinates.
64  *
65  * The implementation exploits sum factorization.
66  *
67  * @note DeviceTensor<2> means RANK=2
68  *
69  * @tparam dim Dimension.
70  * @tparam d1d Number of degrees of freedom in 1D.
71  * @tparam q1d er of quadrature points in 1D.
72  * @param B Basis functions evaluated at quadrature points in column-major
73  * layout q1d x d1d.
74  * @param G Gradients of basis functions evaluated at quadrature points in
75  * column major layout q1d x d1d.
76  * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
77  * q1d x q1d x q1d.
78  * @param U Input vector d1d x d1d x d1d x dim.
79  * @param dUdxi Gradient of the input vector wrt to reference coordinates. Size
80  * needed q1d x q1d x q1d x dim x dim.
81  */
82 template <int dim, int d1d, int q1d>
83 static inline MFEM_HOST_DEVICE void
84 CalcGrad(const tensor<double, q1d, d1d> &B,
85  const tensor<double, q1d, d1d> &G,
86  tensor<double,2,3,q1d,q1d,q1d> &smem,
88  tensor<double, q1d, q1d, q1d, dim, dim> &dUdxi)
89 {
90  for (int c = 0; c < dim; ++c)
91  {
92  MFEM_FOREACH_THREAD(dz,z,d1d)
93  {
94  MFEM_FOREACH_THREAD(dy,y,d1d)
95  {
96  MFEM_FOREACH_THREAD(dx,x,d1d)
97  {
98  smem(0,0,dx,dy,dz) = U(dx,dy,dz,c);
99  }
100  }
101  }
102  MFEM_SYNC_THREAD;
103  MFEM_FOREACH_THREAD(dz,z,d1d)
104  {
105  MFEM_FOREACH_THREAD(dy,y,d1d)
106  {
107  MFEM_FOREACH_THREAD(qx,x,q1d)
108  {
109  double u = 0.0;
110  double v = 0.0;
111  for (int dx = 0; dx < d1d; ++dx)
112  {
113  const double input = smem(0,0,dx,dy,dz);
114  u += input * B(qx,dx);
115  v += input * G(qx,dx);
116  }
117  smem(0,1,dz,dy,qx) = u;
118  smem(0,2,dz,dy,qx) = v;
119  }
120  }
121  }
122  MFEM_SYNC_THREAD;
123  MFEM_FOREACH_THREAD(dz,z,d1d)
124  {
125  MFEM_FOREACH_THREAD(qy,y,q1d)
126  {
127  MFEM_FOREACH_THREAD(qx,x,q1d)
128  {
129  double u = 0.0;
130  double v = 0.0;
131  double w = 0.0;
132  for (int dy = 0; dy < d1d; ++dy)
133  {
134  u += smem(0,2,dz,dy,qx) * B(qy,dy);
135  v += smem(0,1,dz,dy,qx) * G(qy,dy);
136  w += smem(0,1,dz,dy,qx) * B(qy,dy);
137  }
138  smem(1,0,dz,qy,qx) = u;
139  smem(1,1,dz,qy,qx) = v;
140  smem(1,2,dz,qy,qx) = w;
141  }
142  }
143  }
144  MFEM_SYNC_THREAD;
145  MFEM_FOREACH_THREAD(qz,z,q1d)
146  {
147  MFEM_FOREACH_THREAD(qy,y,q1d)
148  {
149  MFEM_FOREACH_THREAD(qx,x,q1d)
150  {
151  double u = 0.0;
152  double v = 0.0;
153  double w = 0.0;
154  for (int dz = 0; dz < d1d; ++dz)
155  {
156  u += smem(1,0,dz,qy,qx) * B(qz,dz);
157  v += smem(1,1,dz,qy,qx) * B(qz,dz);
158  w += smem(1,2,dz,qy,qx) * G(qz,dz);
159  }
160  dUdxi(qz,qy,qx,c,0) += u;
161  dUdxi(qz,qy,qx,c,1) += v;
162  dUdxi(qz,qy,qx,c,2) += w;
163  }
164  }
165  }
166  MFEM_SYNC_THREAD;
167  } // vdim
168 }
169 
170 /**
171  * @brief Multi-component transpose gradient evaluation from DOFs to quadrature
172  * points in reference coordinates with contraction of the D vector.
173  *
174  * @tparam dim Dimension.
175  * @tparam d1d Number of degrees of freedom in 1D.
176  * @tparam q1d er of quadrature points in 1D.
177  * @param B Basis functions evaluated at quadrature points in column-major
178  * layout q1d x d1d.
179  * @param G Gradients of basis functions evaluated at quadrature points in
180  * column major layout q1d x d1d.
181  * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
182  * q1d x q1d x q1d.
183  * @param U Input vector q1d x q1d x q1d x dim.
184  * @param F Output vector that applied the gradient evaluation from DOFs to
185  * quadrature points in reference coordinates with contraction of the D operator
186  * on the input vector. Size is d1d x d1d x d1d x dim.
187  */
188 template <int dim, int d1d, int q1d>
189 static inline MFEM_HOST_DEVICE void
190 CalcGradTSum(const tensor<double, q1d, d1d> &B,
191  const tensor<double, q1d, d1d> &G,
192  tensor<double, 2, 3, q1d, q1d, q1d> &smem,
193  const tensor<double, q1d, q1d, q1d, dim, dim> &U,
194  DeviceTensor<4, double> &F)
195 {
196  for (int c = 0; c < dim; ++c)
197  {
198  MFEM_FOREACH_THREAD(qz, z, q1d)
199  {
200  MFEM_FOREACH_THREAD(qy, y, q1d)
201  {
202  MFEM_FOREACH_THREAD(dx, x, d1d)
203  {
204  double u = 0.0, v = 0.0, w = 0.0;
205  for (int qx = 0; qx < q1d; ++qx)
206  {
207  u += U(qx, qy, qz, 0, c) * G(qx, dx);
208  v += U(qx, qy, qz, 1, c) * B(qx, dx);
209  w += U(qx, qy, qz, 2, c) * B(qx, dx);
210  }
211  smem(0, 0, qz, qy, dx) = u;
212  smem(0, 1, qz, qy, dx) = v;
213  smem(0, 2, qz, qy, dx) = w;
214  }
215  }
216  }
217  MFEM_SYNC_THREAD;
218  MFEM_FOREACH_THREAD(qz, z, q1d)
219  {
220  MFEM_FOREACH_THREAD(dy, y, d1d)
221  {
222  MFEM_FOREACH_THREAD(dx, x, d1d)
223  {
224  double u = 0.0, v = 0.0, w = 0.0;
225  for (int qy = 0; qy < q1d; ++qy)
226  {
227  u += smem(0, 0, qz, qy, dx) * B(qy, dy);
228  v += smem(0, 1, qz, qy, dx) * G(qy, dy);
229  w += smem(0, 2, qz, qy, dx) * B(qy, dy);
230  }
231  smem(1, 0, qz, dy, dx) = u;
232  smem(1, 1, qz, dy, dx) = v;
233  smem(1, 2, qz, dy, dx) = w;
234  }
235  }
236  }
237  MFEM_SYNC_THREAD;
238  MFEM_FOREACH_THREAD(dz, z, d1d)
239  {
240  MFEM_FOREACH_THREAD(dy, y, d1d)
241  {
242  MFEM_FOREACH_THREAD(dx, x, d1d)
243  {
244  double u = 0.0, v = 0.0, w = 0.0;
245  for (int qz = 0; qz < q1d; ++qz)
246  {
247  u += smem(1, 0, qz, dy, dx) * B(qz, dz);
248  v += smem(1, 1, qz, dy, dx) * B(qz, dz);
249  w += smem(1, 2, qz, dy, dx) * G(qz, dz);
250  }
251  const double sum = u + v + w;
252  F(dx, dy, dz, c) += sum;
253  }
254  }
255  }
256  MFEM_SYNC_THREAD;
257  }
258 }
259 
260 /**
261  * @brief Compute the gradient of all shape functions.
262  *
263  * @note TODO: Does not make use of shared memory on the GPU.
264  *
265  * @tparam dim Dimension.
266  * @tparam d1d Number of degrees of freedom in 1D.
267  * @tparam q1d er of quadrature points in 1D.
268  * @param qx Quadrature point x index.
269  * @param qy Quadrature point y index.
270  * @param qz Quadrature point z index.
271  * @param B Basis functions evaluated at quadrature points in column-major
272  * layout q1d x d1d.
273  * @param G Gradients of basis functions evaluated at quadrature points in
274  * column major layout q1d x d1d.
275  * @param invJ Inverse of the Jacobian at the quadrature point. Size is dim x
276  * dim.
277  *
278  * @return Gradient of all shape functions at the quadrature point. Size is d1d
279  * x d1d x d1d x dim.
280  */
281 template <int dim, int d1d, int q1d>
282 static inline MFEM_HOST_DEVICE tensor<double, d1d, d1d, d1d, dim>
283 GradAllShapeFunctions(int qx, int qy, int qz,
284  const tensor<double, q1d, d1d> &B,
285  const tensor<double, q1d, d1d> &G,
286  const tensor<double, dim, dim> &invJ)
287 {
288  tensor<double, d1d, d1d, d1d, dim> dphi_dx;
289  // G (x) B (x) B
290  // B (x) G (x) B
291  // B (x) B (x) G
292  for (int dx = 0; dx < d1d; dx++)
293  {
294  for (int dy = 0; dy < d1d; dy++)
295  {
296  for (int dz = 0; dz < d1d; dz++)
297  {
298  dphi_dx(dx,dy,dz) =
299  transpose(invJ) *
300  tensor<double, dim> {G(qx, dx) * B(qy, dy) * B(qz, dz),
301  B(qx, dx) * G(qy, dy) * B(qz, dz),
302  B(qx, dx) * B(qy, dy) * G(qz, dz)
303  };
304  }
305  }
306  }
307  return dphi_dx;
308 }
309 
310 } // namespace KernelHelpers
311 
312 } // namespace mfem
313 
314 #endif
void CheckMemoryRestriction(int d1d, int q1d)
Runtime check for memory restrictions that are determined at compile time.
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:81
Implementation of the tensor class.
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition: forall.hpp:122
int dim
Definition: ex24.cpp:53
double u(const Vector &xvec)
Definition: lor_mms.hpp:22