26namespace quadrature_interpolator
29static void Det1D(
const int NE,
39 MFEM_CONTRACT_VAR(d_buff);
40 const auto G =
Reshape(g, q1d, d1d);
41 const auto X =
Reshape(x, d1d, NE);
47 for (
int q = 0; q < q1d; q++)
50 for (
int d = 0; d < d1d; d++)
52 u += G(q, d) * X(d, e);
59template<
int T_D1D = 0,
int T_Q1D = 0>
60static void Det2D(
const int NE,
69 MFEM_CONTRACT_VAR(d_buff);
70 static constexpr int SDIM = 2;
71 static constexpr int NBZ = 1;
73 const int D1D = T_D1D ? T_D1D : d1d;
74 const int Q1D = T_Q1D ? T_Q1D : q1d;
77 const auto G =
Reshape(g, Q1D, D1D);
79 auto Y =
Reshape(y, Q1D, Q1D, NE);
83 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
84 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
85 const int D1D = T_D1D ? T_D1D : d1d;
86 const int Q1D = T_Q1D ? T_Q1D : q1d;
88 MFEM_SHARED
real_t BG[2][MQ1*MD1];
93 kernels::internal::LoadX<MD1,NBZ>(e,D1D,X,XY);
94 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
96 kernels::internal::GradX<MD1,MQ1,NBZ>(D1D,Q1D,BG,XY,DQ);
97 kernels::internal::GradY<MD1,MQ1,NBZ>(D1D,Q1D,BG,DQ,QQ);
99 MFEM_FOREACH_THREAD(qy,y,Q1D)
101 MFEM_FOREACH_THREAD(qx,x,Q1D)
104 kernels::internal::PullGrad<MQ1,NBZ>(Q1D,qx,qy,QQ,J);
111template<
int T_D1D = 0,
int T_Q1D = 0>
112static void Det2DSurface(
const int NE,
121 MFEM_CONTRACT_VAR(d_buff);
123 static constexpr int SDIM = 3;
124 static constexpr int NBZ = 1;
126 const int D1D = T_D1D ? T_D1D : d1d;
127 const int Q1D = T_Q1D ? T_Q1D : q1d;
129 const auto B =
Reshape(
b, Q1D, D1D);
130 const auto G =
Reshape(g, Q1D, D1D);
132 auto Y =
Reshape(y, Q1D, Q1D, NE);
136 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
137 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
138 const int D1D = T_D1D ? T_D1D : d1d;
139 const int Q1D = T_Q1D ? T_Q1D : q1d;
140 const int tidz = MFEM_THREAD_ID(z);
142 MFEM_SHARED
real_t BG[2][MQ1*MD1];
146 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
149 MFEM_FOREACH_THREAD(dy,y,D1D)
151 MFEM_FOREACH_THREAD(dx,x,D1D)
153 for (
int d = 0; d <
SDIM; ++d)
155 XYZ[d][tidz][dx + dy*D1D] = X(dx,dy,d,e);
165 MFEM_FOREACH_THREAD(dy,y,D1D)
167 MFEM_FOREACH_THREAD(qx,x,Q1D)
169 for (
int d = 0; d <
SDIM; ++d)
173 for (
int dx = 0; dx < D1D; ++dx)
175 const real_t xval = XYZ[d][tidz][dx + dy*D1D];
176 u += xval * G_mat(dx,qx);
177 v += xval * B_mat(dx,qx);
179 DQ[d][tidz][dy + qx*D1D] =
u;
180 DQ[3 + d][tidz][dy + qx*D1D] = v;
186 MFEM_FOREACH_THREAD(qy,y,Q1D)
188 MFEM_FOREACH_THREAD(qx,x,Q1D)
190 real_t J_[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
191 for (
int d = 0; d <
SDIM; ++d)
193 for (
int dy = 0; dy < D1D; ++dy)
195 J_[d] += DQ[d][tidz][dy + qx*D1D] * B_mat(dy,qy);
196 J_[3 + d] += DQ[3 + d][tidz][dy + qx*D1D] * G_mat(dy,qy);
200 const real_t E = J(0,0)*J(0,0) + J(1,0)*J(1,0) + J(2,0)*J(2,0);
201 const real_t F = J(0,0)*J(0,1) + J(1,0)*J(1,1) + J(2,0)*J(2,1);
202 const real_t G = J(0,1)*J(0,1) + J(1,1)*J(1,1) + J(2,1)*J(2,1);
203 Y(qx,qy,e) = sqrt(E*G - F*F);
209template<
int T_D1D = 0,
int T_Q1D = 0,
bool SMEM = true>
210static void Det3D(
const int NE,
219 constexpr int DIM = 3;
220 static constexpr int GRID = SMEM ? 0 : 128;
222 const int D1D = T_D1D ? T_D1D : d1d;
223 const int Q1D = T_Q1D ? T_Q1D : q1d;
225 const auto B =
Reshape(
b, Q1D, D1D);
226 const auto G =
Reshape(g, Q1D, D1D);
227 const auto X =
Reshape(x, D1D, D1D, D1D,
DIM, NE);
228 auto Y =
Reshape(y, Q1D, Q1D, Q1D, NE);
234 const int max_q1d = T_Q1D ? T_Q1D : limits.
MAX_Q1D;
235 const int max_d1d = T_D1D ? T_D1D : limits.
MAX_D1D;
236 const int max_qd = std::max(max_q1d, max_d1d);
237 const int mem_size = max_qd * max_qd * max_qd * 9;
238 d_buff->SetSize(2*mem_size*GRID);
239 GM = d_buff->Write();
244 static constexpr int MQ1 = T_Q1D ? T_Q1D :
245 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_Q1D);
246 static constexpr int MD1 = T_D1D ? T_D1D :
247 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_D1D);
248 static constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
249 static constexpr int MSZ = MDQ * MDQ * MDQ * 9;
251 const int bid = MFEM_BLOCK_ID(x);
252 MFEM_SHARED
real_t BG[2][MQ1*MD1];
253 MFEM_SHARED
real_t SM0[SMEM?MSZ:1];
254 MFEM_SHARED
real_t SM1[SMEM?MSZ:1];
255 real_t *lm0 = SMEM ? SM0 : GM + MSZ*bid;
256 real_t *lm1 = SMEM ? SM1 : GM + MSZ*(GRID+bid);
257 real_t (*DDD)[MD1*MD1*MD1] = (
real_t (*)[MD1*MD1*MD1]) (lm0);
258 real_t (*DDQ)[MD1*MD1*MQ1] = (
real_t (*)[MD1*MD1*MQ1]) (lm1);
259 real_t (*DQQ)[MD1*MQ1*MQ1] = (
real_t (*)[MD1*MQ1*MQ1]) (lm0);
260 real_t (*QQQ)[MQ1*MQ1*MQ1] = (
real_t (*)[MQ1*MQ1*MQ1]) (lm1);
262 kernels::internal::LoadX<MD1>(e,D1D,X,DDD);
263 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
265 kernels::internal::GradX<MD1,MQ1>(D1D,Q1D,BG,DDD,DDQ);
266 kernels::internal::GradY<MD1,MQ1>(D1D,Q1D,BG,DDQ,DQQ);
267 kernels::internal::GradZ<MD1,MQ1>(D1D,Q1D,BG,DQQ,QQQ);
269 MFEM_FOREACH_THREAD(qz,z,Q1D)
271 MFEM_FOREACH_THREAD(qy,y,Q1D)
273 MFEM_FOREACH_THREAD(qx,x,Q1D)
276 kernels::internal::PullGrad<MQ1>(Q1D, qx,qy,qz, QQQ, J);
286 using k = QuadratureInterpolator::DetKernels;
288 k::Specialization<2,2,2,2>::Add();
289 k::Specialization<2,2,2,3>::Add();
290 k::Specialization<2,2,2,4>::Add();
291 k::Specialization<2,2,2,6>::Add();
292 k::Specialization<2,2,3,4>::Add();
293 k::Specialization<2,2,3,6>::Add();
294 k::Specialization<2,2,4,4>::Add();
295 k::Specialization<2,2,4,6>::Add();
296 k::Specialization<2,2,5,6>::Add();
298 k::Specialization<3,3,2,4>::Add();
299 k::Specialization<3,3,3,3>::Add();
300 k::Specialization<3,3,3,5>::Add();
301 k::Specialization<3,3,3,6>::Add();
315template<
int DIM,
int SDIM,
int D1D,
int Q1D>
316DetKernel QuadratureInterpolator::DetKernels::Kernel()
318 if (
DIM == 1) {
return internal::quadrature_interpolator::Det1D; }
319 else if (
DIM == 2 &&
SDIM == 2) {
return internal::quadrature_interpolator::Det2D<D1D, Q1D>; }
320 else if (
DIM == 2 &&
SDIM == 3) {
return internal::quadrature_interpolator::Det2DSurface<D1D, Q1D>; }
321 else if (
DIM == 3) {
return internal::quadrature_interpolator::Det3D<D1D, Q1D>; }
322 else { MFEM_ABORT(
""); }
325DetKernel QuadratureInterpolator::DetKernels::Fallback(
326 int DIM,
int SDIM,
int D1D,
int Q1D)
328 if (
DIM == 1) {
return internal::quadrature_interpolator::Det1D; }
329 else if (
DIM == 2 &&
SDIM == 2) {
return internal::quadrature_interpolator::Det2D; }
330 else if (
DIM == 2 &&
SDIM == 3) {
return internal::quadrature_interpolator::Det2DSurface; }
335 if (D1D <= MD && Q1D <= MQ) {
return internal::quadrature_interpolator::Det3D<0,0,true>; }
336 else {
return internal::quadrature_interpolator::Det3D<0,0,false>; }
338 else { MFEM_ABORT(
""); }
A basic generic Tensor class, appropriate for use on the GPU.
void(*)(const int NE, const real_t *, const real_t *, const real_t *, real_t *, const int, const int, Vector *) DetKernelType
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
MFEM_HOST_DEVICE real_t Det3D(DeviceMatrix &J)
void forall_3D_grid(int N, int X, int Y, int Z, int G, lambda &&body)
MFEM_HOST_DEVICE real_t Det2D(DeviceMatrix &J)
void forall(int N, lambda &&body)
Maximum number of 1D DOFs or quadrature points for the current runtime configuration of the Device (u...
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
int MAX_DET_1D
Maximum number of points for determinant computation in QuadratureInterpolator.
int MAX_D1D
Maximum number of 1D nodal points.
int MAX_Q1D
Maximum number of 1D quadrature points.