15#ifndef MFEM_QUADINTERP_GRAD
16#define MFEM_QUADINTERP_GRAD
30namespace quadrature_interpolator
33template<QVectorLayout Q_LAYOUT,
bool GRAD_PHYS>
34static void Derivatives1D(
const int NE,
45 MFEM_CONTRACT_VAR(b_);
46 const int SDIM = GRAD_PHYS ? sdim : 1;
47 const auto g =
Reshape(g_, q1d, d1d);
49 const auto x =
Reshape(x_, d1d, vdim, NE);
56 for (
int c = 0; c < vdim; c++)
58 for (
int q = 0; q < q1d; q++)
60 real_t du[3] = {0.0, 0.0, 0.0};
61 for (
int d = 0; d < d1d; d++)
63 du[0] += g(q, d) * x(d, c, e);
67 if (
SDIM == 1) { du[0] /= j(q, 0, e); }
70 const real_t Jloc[2] = {j(q,0,e), j(q,1,e)};
73 const real_t U = Jinv[0]*du[0];
74 const real_t V = Jinv[1]*du[0];
80 const real_t Jloc[3] = {j(q,0,e), j(q,1,e), j(q,2,e)};
83 const real_t U = Jinv[0]*du[0];
84 const real_t V = Jinv[1]*du[0];
85 const real_t W = Jinv[2]*du[0];
91 for (
int d = 0; d <
SDIM; ++d)
103 int T_VDIM = 0,
int T_D1D = 0,
int T_Q1D = 0,
105static void Derivatives2D(
const int NE,
116 const int D1D = T_D1D ? T_D1D : d1d;
117 const int Q1D = T_Q1D ? T_Q1D : q1d;
118 const int VDIM = T_VDIM ? T_VDIM : vdim;
119 const int SDIM = GRAD_PHYS ? sdim : 2;
120 static constexpr int NBZ = T_NBZ ? T_NBZ : 1;
122 const auto b =
Reshape(b_, Q1D, D1D);
123 const auto g =
Reshape(g_, Q1D, D1D);
125 const auto x =
Reshape(x_, D1D, D1D, VDIM, NE);
132 const int D1D = T_D1D ? T_D1D : d1d;
133 const int Q1D = T_Q1D ? T_Q1D : q1d;
134 const int VDIM = T_VDIM ? T_VDIM : vdim;
135 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
136 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
138 const int tidz = MFEM_THREAD_ID(z);
139 MFEM_SHARED
real_t BG[2][MQ1*MD1];
140 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,
b,g,BG);
144 MFEM_SHARED
real_t XY[NBZ][MD1*MD1];
145 DeviceTensor<2> X((
real_t*)(XY+tidz), D1D, D1D);
147 MFEM_SHARED
real_t s_DQ[2][NBZ][MD1*MQ1];
148 DeviceTensor<2> DQ0(s_DQ[0][tidz], D1D, Q1D);
149 DeviceTensor<2> DQ1(s_DQ[1][tidz], D1D, Q1D);
151 for (
int c = 0; c < VDIM; ++c)
153 kernels::internal::LoadX<MD1,NBZ>(e,D1D,c,x,XY);
154 MFEM_FOREACH_THREAD(dy,y,D1D)
156 MFEM_FOREACH_THREAD(qx,x,Q1D)
160 for (
int dx = 0; dx < D1D; ++dx)
162 const real_t input = X(dx,dy);
163 u += input * B(dx,qx);
164 v += input * G(dx,qx);
171 MFEM_FOREACH_THREAD(qy,y,Q1D)
173 MFEM_FOREACH_THREAD(qx,x,Q1D)
175 real_t du[3] = {0.0, 0.0, 0.0};
176 for (
int dy = 0; dy < D1D; ++dy)
178 du[0] += DQ1(dy,qx) * B(dy,qy);
179 du[1] += DQ0(dy,qx) * G(dy,qy);
186 Jloc[0] = j(qx,qy,0,0,e);
187 Jloc[1] = j(qx,qy,1,0,e);
188 Jloc[2] = j(qx,qy,0,1,e);
189 Jloc[3] = j(qx,qy,1,1,e);
191 const real_t U = Jinv[0]*du[0] + Jinv[1]*du[1];
192 const real_t V = Jinv[2]*du[0] + Jinv[3]*du[1];
199 Jloc[0] = j(qx,qy,0,0,e);
200 Jloc[1] = j(qx,qy,1,0,e);
201 Jloc[2] = j(qx,qy,2,0,e);
202 Jloc[3] = j(qx,qy,0,1,e);
203 Jloc[4] = j(qx,qy,1,1,e);
204 Jloc[5] = j(qx,qy,2,1,e);
206 const real_t U = Jinv[0]*du[0] + Jinv[1]*du[1];
207 const real_t V = Jinv[2]*du[0] + Jinv[3]*du[1];
208 const real_t W = Jinv[4]*du[0] + Jinv[5]*du[1];
214 for (
int d = 0; d <
SDIM; ++d)
218 y(c,d,qx,qy,e) = du[d];
222 y(qx,qy,c,d,e) = du[d];
234 int T_VDIM = 0,
int T_D1D = 0,
int T_Q1D = 0>
235static void Derivatives3D(
const int NE,
246 const int D1D = T_D1D ? T_D1D : d1d;
247 const int Q1D = T_Q1D ? T_Q1D : q1d;
248 const int VDIM = T_VDIM ? T_VDIM : vdim;
250 const auto b =
Reshape(b_, Q1D, D1D);
251 const auto g =
Reshape(g_, Q1D, D1D);
252 const auto j =
Reshape(j_, Q1D, Q1D, Q1D, 3, 3, NE);
253 const auto x =
Reshape(x_, D1D, D1D, D1D, VDIM, NE);
255 Reshape(y_, Q1D, Q1D, Q1D, VDIM, 3, NE):
256 Reshape(y_, VDIM, 3, Q1D, Q1D, Q1D, NE);
260 const int D1D = T_D1D ? T_D1D : d1d;
261 const int Q1D = T_Q1D ? T_Q1D : q1d;
262 const int VDIM = T_VDIM ? T_VDIM : vdim;
263 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_INTERP_1D;
264 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_INTERP_1D;
266 MFEM_SHARED
real_t BG[2][MQ1*MD1];
267 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,
b,g,BG);
271 MFEM_SHARED
real_t sm0[3][MQ1*MQ1*MQ1];
272 MFEM_SHARED
real_t sm1[3][MQ1*MQ1*MQ1];
273 DeviceTensor<3> X(sm0[2], D1D, D1D, D1D);
274 DeviceTensor<3> DDQ0(sm0[0], D1D, D1D, Q1D);
275 DeviceTensor<3> DDQ1(sm0[1], D1D, D1D, Q1D);
276 DeviceTensor<3> DQQ0(sm1[0], D1D, Q1D, Q1D);
277 DeviceTensor<3> DQQ1(sm1[1], D1D, Q1D, Q1D);
278 DeviceTensor<3> DQQ2(sm1[2], D1D, Q1D, Q1D);
280 for (
int c = 0; c < VDIM; ++c)
282 kernels::internal::LoadX(e,D1D,c,x,X);
283 MFEM_FOREACH_THREAD(dz,z,D1D)
285 MFEM_FOREACH_THREAD(dy,y,D1D)
287 MFEM_FOREACH_THREAD(qx,x,Q1D)
291 for (
int dx = 0; dx < D1D; ++dx)
293 const real_t input = X(dx,dy,dz);
294 u += input * B(dx,qx);
295 v += input * G(dx,qx);
303 MFEM_FOREACH_THREAD(dz,z,D1D)
305 MFEM_FOREACH_THREAD(qy,y,Q1D)
307 MFEM_FOREACH_THREAD(qx,x,Q1D)
312 for (
int dy = 0; dy < D1D; ++dy)
314 u += DDQ1(dz,dy,qx) * B(dy,qy);
315 v += DDQ0(dz,dy,qx) * G(dy,qy);
316 w += DDQ0(dz,dy,qx) * B(dy,qy);
325 MFEM_FOREACH_THREAD(qz,z,Q1D)
327 MFEM_FOREACH_THREAD(qy,y,Q1D)
329 MFEM_FOREACH_THREAD(qx,x,Q1D)
334 for (
int dz = 0; dz < D1D; ++dz)
336 u += DQQ0(dz,qy,qx) * B(dz,qz);
337 v += DQQ1(dz,qy,qx) * B(dz,qz);
338 w += DQQ2(dz,qy,qx) * G(dz,qz);
343 for (
int col = 0; col < 3; col++)
345 for (
int row = 0; row < 3; row++)
347 Jloc[row+3*col] = j(qx,qy,qz,row,col,e);
351 const real_t U = Jinv[0]*
u + Jinv[1]*v + Jinv[2]*w;
352 const real_t V = Jinv[3]*
u + Jinv[4]*v + Jinv[5]*w;
353 const real_t W = Jinv[6]*
u + Jinv[7]*v + Jinv[8]*w;
358 y(c,0,qx,qy,qz,e) =
u;
359 y(c,1,qx,qy,qz,e) = v;
360 y(c,2,qx,qy,qz,e) = w;
364 y(qx,qy,qz,c,0,e) =
u;
365 y(qx,qy,qz,c,1,e) = v;
366 y(qx,qy,qz,c,2,e) = w;
376template<QVectorLayout Q_LAYOUT,
bool GRAD_PHYS>
377static void CollocatedDerivatives1D(
const int NE,
386 Derivatives1D<Q_LAYOUT, GRAD_PHYS>(
387 NE,
nullptr, g_, j_, x_, y_, sdim, vdim, d1d, d1d);
392 int T_VDIM = 0,
int T_D1D = 0,
394static void CollocatedDerivatives2D(
const int NE,
403 const int D1D = T_D1D ? T_D1D : d1d;
404 const int VDIM = T_VDIM ? T_VDIM : vdim;
405 const int SDIM = GRAD_PHYS ? sdim : 2;
406 static constexpr int NBZ = T_NBZ ? T_NBZ : 1;
408 const auto g =
Reshape(g_, D1D, D1D);
410 const auto x =
Reshape(x_, D1D, D1D, VDIM, NE);
417 const int D1D = T_D1D ? T_D1D : d1d;
418 const int VDIM = T_VDIM ? T_VDIM : vdim;
419 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
421 const int tidz = MFEM_THREAD_ID(z);
423 MFEM_SHARED
real_t XY[NBZ][MD1*MD1];
424 DeviceTensor<2> X((
real_t*)(XY+tidz), D1D, D1D);
426 for (
int c = 0; c < VDIM; ++c)
428 kernels::internal::LoadX<MD1,NBZ>(e,D1D,c,x,XY);
429 MFEM_FOREACH_THREAD(dy,y,D1D)
431 MFEM_FOREACH_THREAD(dx,x,D1D)
436 for (
int dxy = 0; dxy < D1D; ++dxy)
438 u += X(dxy, dy) * g(dx,dxy);
439 v += X(dx, dxy) * g(dy,dxy);
447 Jloc[0] = j(dx,dy,0,0,e);
448 Jloc[1] = j(dx,dy,1,0,e);
449 Jloc[2] = j(dx,dy,0,1,e);
450 Jloc[3] = j(dx,dy,1,1,e);
452 const real_t U = Jinv[0]*
u + Jinv[1]*v;
453 const real_t V = Jinv[2]*
u + Jinv[3]*v;
460 Jloc[0] = j(dx,dy,0,0,e);
461 Jloc[1] = j(dx,dy,1,0,e);
462 Jloc[2] = j(dx,dy,2,0,e);
463 Jloc[3] = j(dx,dy,0,1,e);
464 Jloc[4] = j(dx,dy,1,1,e);
465 Jloc[5] = j(dx,dy,2,1,e);
467 const real_t U = Jinv[0]*
u + Jinv[1]*v;
468 const real_t V = Jinv[2]*
u + Jinv[3]*v;
469 const real_t W = Jinv[4]*
u + Jinv[5]*v;
480 if (
SDIM == 3) { y(c,2,dx,dy,e) = w; }
486 if (
SDIM == 3) { y(dx,dy,c,2,e) = w; }
497 int T_VDIM = 0,
int T_D1D = 0>
498static void CollocatedDerivatives3D(
const int NE,
507 MFEM_VERIFY(sdim == 3,
"");
509 const int D1D = T_D1D ? T_D1D : d1d;
510 const int VDIM = T_VDIM ? T_VDIM : vdim;
512 const auto g =
Reshape(g_, D1D, D1D);
513 const auto j =
Reshape(j_, D1D, D1D, D1D, 3, 3, NE);
514 const auto x =
Reshape(x_, D1D, D1D, D1D, VDIM, NE);
516 Reshape(y_, D1D, D1D, D1D, VDIM, 3, NE):
517 Reshape(y_, VDIM, 3, D1D, D1D, D1D, NE);
521 const int D1D = T_D1D ? T_D1D : d1d;
522 const int VDIM = T_VDIM ? T_VDIM : vdim;
523 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_INTERP_1D;
525 MFEM_SHARED
real_t uvw[MD1*MD1*MD1];
526 DeviceTensor<3> X(uvw, D1D, D1D, D1D);
528 for (
int c = 0; c < VDIM; ++c)
530 kernels::internal::LoadX(e,D1D,c,x,X);
531 MFEM_FOREACH_THREAD(dz,z,D1D)
533 MFEM_FOREACH_THREAD(dy,y,D1D)
535 MFEM_FOREACH_THREAD(dx,x,D1D)
540 for (
int dxyz = 0; dxyz < D1D; ++dxyz)
542 u += X(dxyz, dy, dz) * g(dx,dxyz);
543 v += X(dx, dxyz, dz) * g(dy,dxyz);
544 w += X(dx, dy, dxyz) * g(dz,dxyz);
550 for (
int col = 0; col < 3; col++)
552 for (
int row = 0; row < 3; row++)
554 Jloc[row+3*col] = j(dx,dy,dz,row,col,e);
558 const real_t U = Jinv[0]*
u + Jinv[1]*v + Jinv[2]*w;
559 const real_t V = Jinv[3]*
u + Jinv[4]*v + Jinv[5]*w;
560 const real_t W = Jinv[6]*
u + Jinv[7]*v + Jinv[8]*w;
565 y(c,0,dx,dy,dz,e) =
u;
566 y(c,1,dx,dy,dz,e) = v;
567 y(c,2,dx,dy,dz,e) = w;
571 y(dx,dy,dz,c,0,e) =
u;
572 y(dx,dy,dz,c,1,e) = v;
573 y(dx,dy,dz,c,2,e) = w;
593QuadratureInterpolator::GradKernels::Kernel()
595 if (
DIM == 1) {
return internal::quadrature_interpolator::Derivatives1D<Q_LAYOUT, GRAD_PHYS>; }
596 else if (
DIM == 2) {
return internal::quadrature_interpolator::Derivatives2D<Q_LAYOUT, GRAD_PHYS, VDIM, D1D, Q1D, NBZ>; }
597 else if (
DIM == 3) {
return internal::quadrature_interpolator::Derivatives3D<Q_LAYOUT, GRAD_PHYS, VDIM, D1D, Q1D>; }
598 else { MFEM_ABORT(
""); }
604QuadratureInterpolator::CollocatedGradKernels::Kernel()
606 if (
DIM == 1) {
return internal::quadrature_interpolator::CollocatedDerivatives1D<Q_LAYOUT, GRAD_PHYS>; }
607 else if (
DIM == 2) {
return internal::quadrature_interpolator::CollocatedDerivatives2D<Q_LAYOUT, GRAD_PHYS, VDIM, D1D, NBZ>; }
608 else if (
DIM == 3) {
return internal::quadrature_interpolator::CollocatedDerivatives3D<Q_LAYOUT, GRAD_PHYS, VDIM, D1D>; }
609 else { MFEM_ABORT(
""); }
void(*)(const int, const real_t *, const real_t *, const real_t *, real_t *, const int, const int, const int) CollocatedGradKernelType
void(*)(const int, const real_t *, const real_t *, const real_t *, const real_t *, real_t *, const int, const int, const int, const int) GradKernelType
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data.
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const real_t *d, real_t *left_inv)
real_t u(const Vector &xvec)
DeviceTensor< 2, real_t > DeviceMatrix
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
QVectorLayout
Type describing possible layouts for Q-vectors.
void forall(int N, lambda &&body)