MFEM  v4.5.2
Finite element discretization library
kernel_helpers.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_ELASTICITY_KERNEL_HELPERS_HPP
13 #define MFEM_ELASTICITY_KERNEL_HELPERS_HPP
14 
15 #include "mfem.hpp"
16 #include "general/forall.hpp"
17 #include "linalg/tensor.hpp"
18 
19 using mfem::internal::tensor;
20 
21 namespace mfem
22 {
23 
24 namespace KernelHelpers
25 {
26 
27 // MFEM_SHARED_3D_BLOCK_TENSOR definition
28 // Should be moved in backends/cuda/hip header files.
29 #if defined(__CUDA_ARCH__)
30 #define MFEM_SHARED_3D_BLOCK_TENSOR(name,T,bx,by,bz,...)\
31 MFEM_SHARED tensor<T,bx,by,bz,__VA_ARGS__> name;\
32 name(threadIdx.x, threadIdx.y, threadIdx.z) = tensor<T,__VA_ARGS__> {};
33 #else
34 #define MFEM_SHARED_3D_BLOCK_TENSOR(name,...) tensor<__VA_ARGS__> name {};
35 #endif
36 
37 // Kernel helper functions
38 
39 /**
40  * @brief Runtime check for memory restrictions that are determined at compile
41  * time.
42  *
43  * @param d1d Number of degrees of freedom in 1D.
44  * @param q1d Number of quadrature points in 1D.
45  */
46 inline void CheckMemoryRestriction(int d1d, int q1d)
47 {
48  MFEM_VERIFY(d1d <= q1d,
49  "There should be more or equal quadrature points "
50  "as there are dofs");
51  MFEM_VERIFY(d1d <= MAX_D1D,
52  "Maximum number of degrees of freedom in 1D reached."
53  "This number can be increased globally in general/forall.hpp if "
54  "device memory allows.");
55  MFEM_VERIFY(q1d <= MAX_Q1D, "Maximum quadrature points 1D reached."
56  "This number can be increased globally in "
57  "general/forall.hpp if device memory allows.");
58 }
59 
60 /**
61  * @brief Multi-component gradient evaluation from DOFs to quadrature points in
62  * reference coordinates.
63  *
64  * The implementation exploits sum factorization.
65  *
66  * @note DeviceTensor<2> means RANK=2
67  *
68  * @tparam dim Dimension.
69  * @tparam d1d Number of degrees of freedom in 1D.
70  * @tparam q1d er of quadrature points in 1D.
71  * @param B Basis functions evaluated at quadrature points in column-major
72  * layout q1d x d1d.
73  * @param G Gradients of basis functions evaluated at quadrature points in
74  * column major layout q1d x d1d.
75  * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
76  * q1d x q1d x q1d.
77  * @param U Input vector d1d x d1d x d1d x dim.
78  * @param dUdxi Gradient of the input vector wrt to reference coordinates. Size
79  * needed q1d x q1d x q1d x dim x dim.
80  */
81 template <int dim, int d1d, int q1d>
82 static inline MFEM_HOST_DEVICE void
83 CalcGrad(const tensor<double, q1d, d1d> &B,
84  const tensor<double, q1d, d1d> &G,
85  tensor<double,2,3,q1d,q1d,q1d> &smem,
87  tensor<double, q1d, q1d, q1d, dim, dim> &dUdxi)
88 {
89  for (int c = 0; c < dim; ++c)
90  {
91  MFEM_FOREACH_THREAD(dz,z,d1d)
92  {
93  MFEM_FOREACH_THREAD(dy,y,d1d)
94  {
95  MFEM_FOREACH_THREAD(dx,x,d1d)
96  {
97  smem(0,0,dx,dy,dz) = U(dx,dy,dz,c);
98  }
99  }
100  }
101  MFEM_SYNC_THREAD;
102  MFEM_FOREACH_THREAD(dz,z,d1d)
103  {
104  MFEM_FOREACH_THREAD(dy,y,d1d)
105  {
106  MFEM_FOREACH_THREAD(qx,x,q1d)
107  {
108  double u = 0.0;
109  double v = 0.0;
110  for (int dx = 0; dx < d1d; ++dx)
111  {
112  const double input = smem(0,0,dx,dy,dz);
113  u += input * B(qx,dx);
114  v += input * G(qx,dx);
115  }
116  smem(0,1,dz,dy,qx) = u;
117  smem(0,2,dz,dy,qx) = v;
118  }
119  }
120  }
121  MFEM_SYNC_THREAD;
122  MFEM_FOREACH_THREAD(dz,z,d1d)
123  {
124  MFEM_FOREACH_THREAD(qy,y,q1d)
125  {
126  MFEM_FOREACH_THREAD(qx,x,q1d)
127  {
128  double u = 0.0;
129  double v = 0.0;
130  double w = 0.0;
131  for (int dy = 0; dy < d1d; ++dy)
132  {
133  u += smem(0,2,dz,dy,qx) * B(qy,dy);
134  v += smem(0,1,dz,dy,qx) * G(qy,dy);
135  w += smem(0,1,dz,dy,qx) * B(qy,dy);
136  }
137  smem(1,0,dz,qy,qx) = u;
138  smem(1,1,dz,qy,qx) = v;
139  smem(1,2,dz,qy,qx) = w;
140  }
141  }
142  }
143  MFEM_SYNC_THREAD;
144  MFEM_FOREACH_THREAD(qz,z,q1d)
145  {
146  MFEM_FOREACH_THREAD(qy,y,q1d)
147  {
148  MFEM_FOREACH_THREAD(qx,x,q1d)
149  {
150  double u = 0.0;
151  double v = 0.0;
152  double w = 0.0;
153  for (int dz = 0; dz < d1d; ++dz)
154  {
155  u += smem(1,0,dz,qy,qx) * B(qz,dz);
156  v += smem(1,1,dz,qy,qx) * B(qz,dz);
157  w += smem(1,2,dz,qy,qx) * G(qz,dz);
158  }
159  dUdxi(qz,qy,qx,c,0) += u;
160  dUdxi(qz,qy,qx,c,1) += v;
161  dUdxi(qz,qy,qx,c,2) += w;
162  }
163  }
164  }
165  MFEM_SYNC_THREAD;
166  } // vdim
167 }
168 
169 /**
170  * @brief Multi-component transpose gradient evaluation from DOFs to quadrature
171  * points in reference coordinates with contraction of the D vector.
172  *
173  * @tparam dim Dimension.
174  * @tparam d1d Number of degrees of freedom in 1D.
175  * @tparam q1d er of quadrature points in 1D.
176  * @param B Basis functions evaluated at quadrature points in column-major
177  * layout q1d x d1d.
178  * @param G Gradients of basis functions evaluated at quadrature points in
179  * column major layout q1d x d1d.
180  * @param smem Block of shared memory for scratch space. Size needed is 2 x 3 x
181  * q1d x q1d x q1d.
182  * @param U Input vector q1d x q1d x q1d x dim.
183  * @param F Output vector that applied the gradient evaluation from DOFs to
184  * quadrature points in reference coordinates with contraction of the D operator
185  * on the input vector. Size is d1d x d1d x d1d x dim.
186  */
187 template <int dim, int d1d, int q1d>
188 static inline MFEM_HOST_DEVICE void
189 CalcGradTSum(const tensor<double, q1d, d1d> &B,
190  const tensor<double, q1d, d1d> &G,
191  tensor<double, 2, 3, q1d, q1d, q1d> &smem,
192  const tensor<double, q1d, q1d, q1d, dim, dim> &U,
193  DeviceTensor<4, double> &F)
194 {
195  for (int c = 0; c < dim; ++c)
196  {
197  MFEM_FOREACH_THREAD(qz, z, q1d)
198  {
199  MFEM_FOREACH_THREAD(qy, y, q1d)
200  {
201  MFEM_FOREACH_THREAD(dx, x, d1d)
202  {
203  double u = 0.0, v = 0.0, w = 0.0;
204  for (int qx = 0; qx < q1d; ++qx)
205  {
206  u += U(qx, qy, qz, 0, c) * G(qx, dx);
207  v += U(qx, qy, qz, 1, c) * B(qx, dx);
208  w += U(qx, qy, qz, 2, c) * B(qx, dx);
209  }
210  smem(0, 0, qz, qy, dx) = u;
211  smem(0, 1, qz, qy, dx) = v;
212  smem(0, 2, qz, qy, dx) = w;
213  }
214  }
215  }
216  MFEM_SYNC_THREAD;
217  MFEM_FOREACH_THREAD(qz, z, q1d)
218  {
219  MFEM_FOREACH_THREAD(dy, y, d1d)
220  {
221  MFEM_FOREACH_THREAD(dx, x, d1d)
222  {
223  double u = 0.0, v = 0.0, w = 0.0;
224  for (int qy = 0; qy < q1d; ++qy)
225  {
226  u += smem(0, 0, qz, qy, dx) * B(qy, dy);
227  v += smem(0, 1, qz, qy, dx) * G(qy, dy);
228  w += smem(0, 2, qz, qy, dx) * B(qy, dy);
229  }
230  smem(1, 0, qz, dy, dx) = u;
231  smem(1, 1, qz, dy, dx) = v;
232  smem(1, 2, qz, dy, dx) = w;
233  }
234  }
235  }
236  MFEM_SYNC_THREAD;
237  MFEM_FOREACH_THREAD(dz, z, d1d)
238  {
239  MFEM_FOREACH_THREAD(dy, y, d1d)
240  {
241  MFEM_FOREACH_THREAD(dx, x, d1d)
242  {
243  double u = 0.0, v = 0.0, w = 0.0;
244  for (int qz = 0; qz < q1d; ++qz)
245  {
246  u += smem(1, 0, qz, dy, dx) * B(qz, dz);
247  v += smem(1, 1, qz, dy, dx) * B(qz, dz);
248  w += smem(1, 2, qz, dy, dx) * G(qz, dz);
249  }
250  const double sum = u + v + w;
251  F(dx, dy, dz, c) += sum;
252  }
253  }
254  }
255  MFEM_SYNC_THREAD;
256  }
257 }
258 
259 /**
260  * @brief Compute the gradient of all shape functions.
261  *
262  * @note TODO: Does not make use of shared memory on the GPU.
263  *
264  * @tparam dim Dimension.
265  * @tparam d1d Number of degrees of freedom in 1D.
266  * @tparam q1d er of quadrature points in 1D.
267  * @param qx Quadrature point x index.
268  * @param qy Quadrature point y index.
269  * @param qz Quadrature point z index.
270  * @param B Basis functions evaluated at quadrature points in column-major
271  * layout q1d x d1d.
272  * @param G Gradients of basis functions evaluated at quadrature points in
273  * column major layout q1d x d1d.
274  * @param invJ Inverse of the Jacobian at the quadrature point. Size is dim x
275  * dim.
276  *
277  * @return Gradient of all shape functions at the quadrature point. Size is d1d
278  * x d1d x d1d x dim.
279  */
280 template <int dim, int d1d, int q1d>
281 static inline MFEM_HOST_DEVICE tensor<double, d1d, d1d, d1d, dim>
282 GradAllShapeFunctions(int qx, int qy, int qz,
283  const tensor<double, q1d, d1d> &B,
284  const tensor<double, q1d, d1d> &G,
285  const tensor<double, dim, dim> &invJ)
286 {
287  tensor<double, d1d, d1d, d1d, dim> dphi_dx;
288  // G (x) B (x) B
289  // B (x) G (x) B
290  // B (x) B (x) G
291  for (int dx = 0; dx < d1d; dx++)
292  {
293  for (int dy = 0; dy < d1d; dy++)
294  {
295  for (int dz = 0; dz < d1d; dz++)
296  {
297  dphi_dx(dx,dy,dz) =
298  transpose(invJ) *
299  tensor<double, dim> {G(qx, dx) * B(qy, dy) * B(qz, dz),
300  B(qx, dx) * G(qy, dy) * B(qz, dz),
301  B(qx, dx) * B(qy, dy) * G(qz, dz)
302  };
303  }
304  }
305  }
306  return dphi_dx;
307 }
308 
309 } // namespace KernelHelpers
310 
311 } // namespace mfem
312 
313 #endif
void CheckMemoryRestriction(int d1d, int q1d)
Runtime check for memory restrictions that are determined at compile time.
const int MAX_Q1D
Definition: forall.hpp:29
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:81
Implementation of the tensor class.
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:28
double u(const Vector &xvec)
Definition: lor_mms.hpp:24