MFEM  v4.6.0
Finite element discretization library
grad.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 // Internal header, included only by .cpp files.
13 // Template function implementations.
14 
15 #include "../quadinterpolator.hpp"
16 #include "../../general/forall.hpp"
17 #include "../../linalg/dtensor.hpp"
18 #include "../../fem/kernels.hpp"
19 #include "../../linalg/kernels.hpp"
20 
21 namespace mfem
22 {
23 
24 namespace internal
25 {
26 
27 namespace quadrature_interpolator
28 {
29 
30 template<QVectorLayout Q_LAYOUT, bool GRAD_PHYS>
31 static void Derivatives1D(const int NE,
32  const double *g_,
33  const double *j_,
34  const double *x_,
35  double *y_,
36  const int sdim,
37  const int vdim,
38  const int d1d,
39  const int q1d)
40 {
41  const auto g = Reshape(g_, q1d, d1d);
42  const auto j = Reshape(j_, q1d, sdim, NE);
43  const auto x = Reshape(x_, d1d, vdim, NE);
44  auto y = Q_LAYOUT == QVectorLayout::byNODES ?
45  Reshape(y_, q1d, vdim, sdim, NE):
46  Reshape(y_, vdim, sdim, q1d, NE);
47 
48  mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
49  {
50  for (int c = 0; c < vdim; c++)
51  {
52  for (int q = 0; q < q1d; q++)
53  {
54  double du[3] = {0.0, 0.0, 0.0};
55  for (int d = 0; d < d1d; d++)
56  {
57  du[0] += g(q, d) * x(d, c, e);
58  }
59  if (GRAD_PHYS)
60  {
61  if (sdim == 1) { du[0] /= j(q, 0, e); }
62  else if (sdim == 2)
63  {
64  const double Jloc[2] = {j(q,0,e), j(q,1,e)};
65  double Jinv[3];
67  const double U = Jinv[0]*du[0];
68  const double V = Jinv[1]*du[0];
69  du[0] = U;
70  du[1] = V;
71  }
72  else // sdim == 3
73  {
74  const double Jloc[3] = {j(q,0,e), j(q,1,e), j(q,2,e)};
75  double Jinv[3];
77  const double U = Jinv[0]*du[0];
78  const double V = Jinv[1]*du[0];
79  const double W = Jinv[2]*du[0];
80  du[0] = U;
81  du[1] = V;
82  du[2] = W;
83  }
84  }
85  for (int d = 0; d < sdim; ++d)
86  {
87  if (Q_LAYOUT == QVectorLayout::byVDIM) { y(c, d, q, e) = du[d]; }
88  if (Q_LAYOUT == QVectorLayout::byNODES) { y(q, c, d, e) = du[d]; }
89  }
90  }
91  }
92  });
93 }
94 
95 // Template compute kernel for derivatives in 2D: tensor product version.
96 template<QVectorLayout Q_LAYOUT, bool GRAD_PHYS,
97  int T_VDIM = 0, int T_D1D = 0, int T_Q1D = 0,
98  int T_NBZ = 1>
99 static void Derivatives2D(const int NE,
100  const double *b_,
101  const double *g_,
102  const double *j_,
103  const double *x_,
104  double *y_,
105  const int sdim = 2,
106  const int vdim = 0,
107  const int d1d = 0,
108  const int q1d = 0)
109 {
110  const int D1D = T_D1D ? T_D1D : d1d;
111  const int Q1D = T_Q1D ? T_Q1D : q1d;
112  const int VDIM = T_VDIM ? T_VDIM : vdim;
113  const int SDIM = GRAD_PHYS ? sdim : 2;
114  static constexpr int NBZ = T_NBZ ? T_NBZ : 1;
115 
116  const auto b = Reshape(b_, Q1D, D1D);
117  const auto g = Reshape(g_, Q1D, D1D);
118  const auto j = Reshape(j_, Q1D, Q1D, SDIM, 2, NE);
119  const auto x = Reshape(x_, D1D, D1D, VDIM, NE);
120  auto y = Q_LAYOUT == QVectorLayout:: byNODES ?
121  Reshape(y_, Q1D, Q1D, VDIM, SDIM, NE):
122  Reshape(y_, VDIM, SDIM, Q1D, Q1D, NE);
123 
124  mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e)
125  {
126  const int D1D = T_D1D ? T_D1D : d1d;
127  const int Q1D = T_Q1D ? T_Q1D : q1d;
128  const int VDIM = T_VDIM ? T_VDIM : vdim;
129  constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
130  constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
131 
132  const int tidz = MFEM_THREAD_ID(z);
133  MFEM_SHARED double BG[2][MQ1*MD1];
134  kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,b,g,BG);
135  DeviceMatrix B(BG[0], D1D, Q1D);
136  DeviceMatrix G(BG[1], D1D, Q1D);
137 
138  MFEM_SHARED double XY[NBZ][MD1*MD1];
139  DeviceTensor<2> X((double*)(XY+tidz), D1D, D1D);
140 
141  MFEM_SHARED double s_DQ[2][NBZ][MD1*MQ1];
142  DeviceTensor<2> DQ0(s_DQ[0][tidz], D1D, Q1D);
143  DeviceTensor<2> DQ1(s_DQ[1][tidz], D1D, Q1D);
144 
145  for (int c = 0; c < VDIM; ++c)
146  {
147  kernels::internal::LoadX<MD1,NBZ>(e,D1D,c,x,XY);
148  MFEM_FOREACH_THREAD(dy,y,D1D)
149  {
150  MFEM_FOREACH_THREAD(qx,x,Q1D)
151  {
152  double u = 0.0;
153  double v = 0.0;
154  for (int dx = 0; dx < D1D; ++dx)
155  {
156  const double input = X(dx,dy);
157  u += input * B(dx,qx);
158  v += input * G(dx,qx);
159  }
160  DQ0(dy,qx) = u;
161  DQ1(dy,qx) = v;
162  }
163  }
164  MFEM_SYNC_THREAD;
165  MFEM_FOREACH_THREAD(qy,y,Q1D)
166  {
167  MFEM_FOREACH_THREAD(qx,x,Q1D)
168  {
169  double du[3] = {0.0, 0.0, 0.0};
170  for (int dy = 0; dy < D1D; ++dy)
171  {
172  du[0] += DQ1(dy,qx) * B(dy,qy);
173  du[1] += DQ0(dy,qx) * G(dy,qy);
174  }
175  if (GRAD_PHYS)
176  {
177  if (SDIM == 2)
178  {
179  double Jloc[4], Jinv[4];
180  Jloc[0] = j(qx,qy,0,0,e);
181  Jloc[1] = j(qx,qy,1,0,e);
182  Jloc[2] = j(qx,qy,0,1,e);
183  Jloc[3] = j(qx,qy,1,1,e);
184  kernels::CalcInverse<2>(Jloc, Jinv);
185  const double U = Jinv[0]*du[0] + Jinv[1]*du[1];
186  const double V = Jinv[2]*du[0] + Jinv[3]*du[1];
187  du[0] = U;
188  du[1] = V;
189  }
190  else
191  {
192  double Jloc[6], Jinv[6];
193  Jloc[0] = j(qx,qy,0,0,e);
194  Jloc[1] = j(qx,qy,1,0,e);
195  Jloc[2] = j(qx,qy,2,0,e);
196  Jloc[3] = j(qx,qy,0,1,e);
197  Jloc[4] = j(qx,qy,1,1,e);
198  Jloc[5] = j(qx,qy,2,1,e);
199  kernels::CalcLeftInverse<3,2>(Jloc, Jinv);
200  const double U = Jinv[0]*du[0] + Jinv[1]*du[1];
201  const double V = Jinv[2]*du[0] + Jinv[3]*du[1];
202  const double W = Jinv[4]*du[0] + Jinv[5]*du[1];
203  du[0] = U;
204  du[1] = V;
205  du[2] = W;
206  }
207  }
208  for (int d = 0; d < SDIM; ++d)
209  {
210  if (Q_LAYOUT == QVectorLayout::byVDIM)
211  {
212  y(c,d,qx,qy,e) = du[d];
213  }
214  else // Q_LAYOUT == QVectorLayout::byNODES
215  {
216  y(qx,qy,c,d,e) = du[d];
217  }
218  }
219  }
220  }
221  MFEM_SYNC_THREAD;
222  }
223  });
224 }
225 
226 // Template compute kernel for derivatives in 3D: tensor product version.
227 template<QVectorLayout Q_LAYOUT, bool GRAD_PHYS,
228  int T_VDIM = 0, int T_D1D = 0, int T_Q1D = 0>
229 static void Derivatives3D(const int NE,
230  const double *b_,
231  const double *g_,
232  const double *j_,
233  const double *x_,
234  double *y_,
235  const int vdim = 0,
236  const int d1d = 0,
237  const int q1d = 0)
238 {
239  const int D1D = T_D1D ? T_D1D : d1d;
240  const int Q1D = T_Q1D ? T_Q1D : q1d;
241  const int VDIM = T_VDIM ? T_VDIM : vdim;
242 
243  const auto b = Reshape(b_, Q1D, D1D);
244  const auto g = Reshape(g_, Q1D, D1D);
245  const auto j = Reshape(j_, Q1D, Q1D, Q1D, 3, 3, NE);
246  const auto x = Reshape(x_, D1D, D1D, D1D, VDIM, NE);
247  auto y = Q_LAYOUT == QVectorLayout:: byNODES ?
248  Reshape(y_, Q1D, Q1D, Q1D, VDIM, 3, NE):
249  Reshape(y_, VDIM, 3, Q1D, Q1D, Q1D, NE);
250 
251  mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
252  {
253  const int D1D = T_D1D ? T_D1D : d1d;
254  const int Q1D = T_Q1D ? T_Q1D : q1d;
255  const int VDIM = T_VDIM ? T_VDIM : vdim;
256  constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_INTERP_1D;
257  constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_INTERP_1D;
258 
259  MFEM_SHARED double BG[2][MQ1*MD1];
260  kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,b,g,BG);
261  DeviceMatrix B(BG[0], D1D, Q1D);
262  DeviceMatrix G(BG[1], D1D, Q1D);
263 
264  MFEM_SHARED double sm0[3][MQ1*MQ1*MQ1];
265  MFEM_SHARED double sm1[3][MQ1*MQ1*MQ1];
266  DeviceTensor<3> X(sm0[2], D1D, D1D, D1D);
267  DeviceTensor<3> DDQ0(sm0[0], D1D, D1D, Q1D);
268  DeviceTensor<3> DDQ1(sm0[1], D1D, D1D, Q1D);
269  DeviceTensor<3> DQQ0(sm1[0], D1D, Q1D, Q1D);
270  DeviceTensor<3> DQQ1(sm1[1], D1D, Q1D, Q1D);
271  DeviceTensor<3> DQQ2(sm1[2], D1D, Q1D, Q1D);
272 
273  for (int c = 0; c < VDIM; ++c)
274  {
275  kernels::internal::LoadX(e,D1D,c,x,X);
276  MFEM_FOREACH_THREAD(dz,z,D1D)
277  {
278  MFEM_FOREACH_THREAD(dy,y,D1D)
279  {
280  MFEM_FOREACH_THREAD(qx,x,Q1D)
281  {
282  double u = 0.0;
283  double v = 0.0;
284  for (int dx = 0; dx < D1D; ++dx)
285  {
286  const double input = X(dx,dy,dz);
287  u += input * B(dx,qx);
288  v += input * G(dx,qx);
289  }
290  DDQ0(dz,dy,qx) = u;
291  DDQ1(dz,dy,qx) = v;
292  }
293  }
294  }
295  MFEM_SYNC_THREAD;
296  MFEM_FOREACH_THREAD(dz,z,D1D)
297  {
298  MFEM_FOREACH_THREAD(qy,y,Q1D)
299  {
300  MFEM_FOREACH_THREAD(qx,x,Q1D)
301  {
302  double u = 0.0;
303  double v = 0.0;
304  double w = 0.0;
305  for (int dy = 0; dy < D1D; ++dy)
306  {
307  u += DDQ1(dz,dy,qx) * B(dy,qy);
308  v += DDQ0(dz,dy,qx) * G(dy,qy);
309  w += DDQ0(dz,dy,qx) * B(dy,qy);
310  }
311  DQQ0(dz,qy,qx) = u;
312  DQQ1(dz,qy,qx) = v;
313  DQQ2(dz,qy,qx) = w;
314  }
315  }
316  }
317  MFEM_SYNC_THREAD;
318  MFEM_FOREACH_THREAD(qz,z,Q1D)
319  {
320  MFEM_FOREACH_THREAD(qy,y,Q1D)
321  {
322  MFEM_FOREACH_THREAD(qx,x,Q1D)
323  {
324  double u = 0.0;
325  double v = 0.0;
326  double w = 0.0;
327  for (int dz = 0; dz < D1D; ++dz)
328  {
329  u += DQQ0(dz,qy,qx) * B(dz,qz);
330  v += DQQ1(dz,qy,qx) * B(dz,qz);
331  w += DQQ2(dz,qy,qx) * G(dz,qz);
332  }
333  if (GRAD_PHYS)
334  {
335  double Jloc[9], Jinv[9];
336  for (int col = 0; col < 3; col++)
337  {
338  for (int row = 0; row < 3; row++)
339  {
340  Jloc[row+3*col] = j(qx,qy,qz,row,col,e);
341  }
342  }
343  kernels::CalcInverse<3>(Jloc, Jinv);
344  const double U = Jinv[0]*u + Jinv[1]*v + Jinv[2]*w;
345  const double V = Jinv[3]*u + Jinv[4]*v + Jinv[5]*w;
346  const double W = Jinv[6]*u + Jinv[7]*v + Jinv[8]*w;
347  u = U; v = V; w = W;
348  }
349  if (Q_LAYOUT == QVectorLayout::byVDIM)
350  {
351  y(c,0,qx,qy,qz,e) = u;
352  y(c,1,qx,qy,qz,e) = v;
353  y(c,2,qx,qy,qz,e) = w;
354  }
355  if (Q_LAYOUT == QVectorLayout::byNODES)
356  {
357  y(qx,qy,qz,c,0,e) = u;
358  y(qx,qy,qz,c,1,e) = v;
359  y(qx,qy,qz,c,2,e) = w;
360  }
361  }
362  }
363  }
364  MFEM_SYNC_THREAD;
365  }
366  });
367 }
368 
369 } // namespace quadrature_interpolator
370 
371 } // namespace internal
372 
373 } // namespace mfem
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition: forall.hpp:763
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const double *d, double *left_inv)
Definition: kernels.hpp:1080
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const double *d, double *left_inv)
Definition: kernels.hpp:1072
DeviceTensor< 2, double > DeviceMatrix
Definition: dtensor.hpp:143
double b
Definition: lissajous.cpp:42
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
Definition: forall.hpp:757
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const double *d, double *left_inv)
Definition: kernels.hpp:1089
void forall(int N, lambda &&body)
Definition: forall.hpp:742
QVectorLayout
Type describing possible layouts for Q-vectors.
Definition: fespace.hpp:52
constexpr int SDIM
NQPT x VDIM x NE (values) / NQPT x VDIM x DIM x NE (grads)
double u(const Vector &xvec)
Definition: lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
VDIM x NQPT x NE (values) / VDIM x DIM x NQPT x NE (grads)