27namespace quadrature_interpolator
30template<QVectorLayout Q_LAYOUT,
bool GRAD_PHYS>
31static void Derivatives1D(
const int NE,
41 const auto g =
Reshape(g_, q1d, d1d);
42 const auto j =
Reshape(j_, q1d, sdim, NE);
43 const auto x =
Reshape(x_, d1d, vdim, NE);
45 Reshape(y_, q1d, vdim, sdim, NE):
46 Reshape(y_, vdim, sdim, q1d, NE);
50 for (
int c = 0; c < vdim; c++)
52 for (
int q = 0; q < q1d; q++)
54 real_t du[3] = {0.0, 0.0, 0.0};
55 for (
int d = 0; d < d1d; d++)
57 du[0] += g(q, d) * x(d, c, e);
61 if (sdim == 1) { du[0] /= j(q, 0, e); }
64 const real_t Jloc[2] = {j(q,0,e), j(q,1,e)};
67 const real_t U = Jinv[0]*du[0];
68 const real_t V = Jinv[1]*du[0];
74 const real_t Jloc[3] = {j(q,0,e), j(q,1,e), j(q,2,e)};
77 const real_t U = Jinv[0]*du[0];
78 const real_t V = Jinv[1]*du[0];
79 const real_t W = Jinv[2]*du[0];
85 for (
int d = 0; d < sdim; ++d)
97 int T_VDIM = 0,
int T_D1D = 0,
int T_Q1D = 0,
99static void Derivatives2D(
const int NE,
110 const int D1D = T_D1D ? T_D1D : d1d;
111 const int Q1D = T_Q1D ? T_Q1D : q1d;
112 const int VDIM = T_VDIM ? T_VDIM : vdim;
113 const int SDIM = GRAD_PHYS ? sdim : 2;
114 static constexpr int NBZ = T_NBZ ? T_NBZ : 1;
116 const auto b =
Reshape(b_, Q1D, D1D);
117 const auto g =
Reshape(g_, Q1D, D1D);
119 const auto x =
Reshape(x_, D1D, D1D, VDIM, NE);
126 const int D1D = T_D1D ? T_D1D : d1d;
127 const int Q1D = T_Q1D ? T_Q1D : q1d;
128 const int VDIM = T_VDIM ? T_VDIM : vdim;
129 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
130 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
132 const int tidz = MFEM_THREAD_ID(z);
133 MFEM_SHARED
real_t BG[2][MQ1*MD1];
134 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,
b,g,BG);
138 MFEM_SHARED
real_t XY[NBZ][MD1*MD1];
139 DeviceTensor<2> X((
real_t*)(XY+tidz), D1D, D1D);
141 MFEM_SHARED
real_t s_DQ[2][NBZ][MD1*MQ1];
142 DeviceTensor<2> DQ0(s_DQ[0][tidz], D1D, Q1D);
143 DeviceTensor<2> DQ1(s_DQ[1][tidz], D1D, Q1D);
145 for (
int c = 0; c < VDIM; ++c)
147 kernels::internal::LoadX<MD1,NBZ>(e,D1D,c,x,XY);
148 MFEM_FOREACH_THREAD(dy,y,D1D)
150 MFEM_FOREACH_THREAD(qx,x,Q1D)
154 for (
int dx = 0; dx < D1D; ++dx)
156 const real_t input = X(dx,dy);
157 u += input * B(dx,qx);
158 v += input * G(dx,qx);
165 MFEM_FOREACH_THREAD(qy,y,Q1D)
167 MFEM_FOREACH_THREAD(qx,x,Q1D)
169 real_t du[3] = {0.0, 0.0, 0.0};
170 for (
int dy = 0; dy < D1D; ++dy)
172 du[0] += DQ1(dy,qx) * B(dy,qy);
173 du[1] += DQ0(dy,qx) * G(dy,qy);
180 Jloc[0] = j(qx,qy,0,0,e);
181 Jloc[1] = j(qx,qy,1,0,e);
182 Jloc[2] = j(qx,qy,0,1,e);
183 Jloc[3] = j(qx,qy,1,1,e);
185 const real_t U = Jinv[0]*du[0] + Jinv[1]*du[1];
186 const real_t V = Jinv[2]*du[0] + Jinv[3]*du[1];
193 Jloc[0] = j(qx,qy,0,0,e);
194 Jloc[1] = j(qx,qy,1,0,e);
195 Jloc[2] = j(qx,qy,2,0,e);
196 Jloc[3] = j(qx,qy,0,1,e);
197 Jloc[4] = j(qx,qy,1,1,e);
198 Jloc[5] = j(qx,qy,2,1,e);
200 const real_t U = Jinv[0]*du[0] + Jinv[1]*du[1];
201 const real_t V = Jinv[2]*du[0] + Jinv[3]*du[1];
202 const real_t W = Jinv[4]*du[0] + Jinv[5]*du[1];
208 for (
int d = 0; d <
SDIM; ++d)
212 y(c,d,qx,qy,e) = du[d];
216 y(qx,qy,c,d,e) = du[d];
228 int T_VDIM = 0,
int T_D1D = 0,
int T_Q1D = 0>
229static void Derivatives3D(
const int NE,
239 const int D1D = T_D1D ? T_D1D : d1d;
240 const int Q1D = T_Q1D ? T_Q1D : q1d;
241 const int VDIM = T_VDIM ? T_VDIM : vdim;
243 const auto b =
Reshape(b_, Q1D, D1D);
244 const auto g =
Reshape(g_, Q1D, D1D);
245 const auto j =
Reshape(j_, Q1D, Q1D, Q1D, 3, 3, NE);
246 const auto x =
Reshape(x_, D1D, D1D, D1D, VDIM, NE);
248 Reshape(y_, Q1D, Q1D, Q1D, VDIM, 3, NE):
249 Reshape(y_, VDIM, 3, Q1D, Q1D, Q1D, NE);
253 const int D1D = T_D1D ? T_D1D : d1d;
254 const int Q1D = T_Q1D ? T_Q1D : q1d;
255 const int VDIM = T_VDIM ? T_VDIM : vdim;
256 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_INTERP_1D;
257 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_INTERP_1D;
259 MFEM_SHARED
real_t BG[2][MQ1*MD1];
260 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,
b,g,BG);
264 MFEM_SHARED
real_t sm0[3][MQ1*MQ1*MQ1];
265 MFEM_SHARED
real_t sm1[3][MQ1*MQ1*MQ1];
266 DeviceTensor<3> X(sm0[2], D1D, D1D, D1D);
267 DeviceTensor<3> DDQ0(sm0[0], D1D, D1D, Q1D);
268 DeviceTensor<3> DDQ1(sm0[1], D1D, D1D, Q1D);
269 DeviceTensor<3> DQQ0(sm1[0], D1D, Q1D, Q1D);
270 DeviceTensor<3> DQQ1(sm1[1], D1D, Q1D, Q1D);
271 DeviceTensor<3> DQQ2(sm1[2], D1D, Q1D, Q1D);
273 for (
int c = 0; c < VDIM; ++c)
275 kernels::internal::LoadX(e,D1D,c,x,X);
276 MFEM_FOREACH_THREAD(dz,z,D1D)
278 MFEM_FOREACH_THREAD(dy,y,D1D)
280 MFEM_FOREACH_THREAD(qx,x,Q1D)
284 for (
int dx = 0; dx < D1D; ++dx)
286 const real_t input = X(dx,dy,dz);
287 u += input * B(dx,qx);
288 v += input * G(dx,qx);
296 MFEM_FOREACH_THREAD(dz,z,D1D)
298 MFEM_FOREACH_THREAD(qy,y,Q1D)
300 MFEM_FOREACH_THREAD(qx,x,Q1D)
305 for (
int dy = 0; dy < D1D; ++dy)
307 u += DDQ1(dz,dy,qx) * B(dy,qy);
308 v += DDQ0(dz,dy,qx) * G(dy,qy);
309 w += DDQ0(dz,dy,qx) * B(dy,qy);
318 MFEM_FOREACH_THREAD(qz,z,Q1D)
320 MFEM_FOREACH_THREAD(qy,y,Q1D)
322 MFEM_FOREACH_THREAD(qx,x,Q1D)
327 for (
int dz = 0; dz < D1D; ++dz)
329 u += DQQ0(dz,qy,qx) * B(dz,qz);
330 v += DQQ1(dz,qy,qx) * B(dz,qz);
331 w += DQQ2(dz,qy,qx) * G(dz,qz);
336 for (
int col = 0; col < 3; col++)
338 for (
int row = 0; row < 3; row++)
340 Jloc[row+3*col] = j(qx,qy,qz,row,col,e);
344 const real_t U = Jinv[0]*
u + Jinv[1]*v + Jinv[2]*w;
345 const real_t V = Jinv[3]*
u + Jinv[4]*v + Jinv[5]*w;
346 const real_t W = Jinv[6]*
u + Jinv[7]*v + Jinv[8]*w;
351 y(c,0,qx,qy,qz,e) =
u;
352 y(c,1,qx,qy,qz,e) = v;
353 y(c,2,qx,qy,qz,e) = w;
357 y(qx,qy,qz,c,0,e) =
u;
358 y(qx,qy,qz,c,1,e) = v;
359 y(qx,qy,qz,c,2,e) = w;
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data.
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const real_t *d, real_t *left_inv)
real_t u(const Vector &xvec)
DeviceTensor< 2, real_t > DeviceMatrix
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
QVectorLayout
Type describing possible layouts for Q-vectors.
@ byNODES
NQPT x VDIM x NE (values) / NQPT x VDIM x DIM x NE (grads)
@ byVDIM
VDIM x NQPT x NE (values) / VDIM x DIM x NQPT x NE (grads)
void forall(int N, lambda &&body)