26namespace quadrature_interpolator
29static void Det1D(
const int NE,
36 const auto G =
Reshape(g, q1d, d1d);
37 const auto X =
Reshape(x, d1d, NE);
43 for (
int q = 0; q < q1d; q++)
46 for (
int d = 0; d < d1d; d++)
48 u += G(q, d) * X(d, e);
55template<
int T_D1D = 0,
int T_Q1D = 0>
56static void Det2D(
const int NE,
64 static constexpr int SDIM = 2;
65 static constexpr int NBZ = 1;
67 const int D1D = T_D1D ? T_D1D : d1d;
68 const int Q1D = T_Q1D ? T_Q1D : q1d;
71 const auto G =
Reshape(g, Q1D, D1D);
73 auto Y =
Reshape(y, Q1D, Q1D, NE);
77 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
78 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
79 const int D1D = T_D1D ? T_D1D : d1d;
80 const int Q1D = T_Q1D ? T_Q1D : q1d;
82 MFEM_SHARED
real_t BG[2][MQ1*MD1];
87 kernels::internal::LoadX<MD1,NBZ>(e,D1D,X,XY);
88 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
90 kernels::internal::GradX<MD1,MQ1,NBZ>(D1D,Q1D,BG,XY,DQ);
91 kernels::internal::GradY<MD1,MQ1,NBZ>(D1D,Q1D,BG,DQ,QQ);
93 MFEM_FOREACH_THREAD(qy,y,Q1D)
95 MFEM_FOREACH_THREAD(qx,x,Q1D)
98 kernels::internal::PullGrad<MQ1,NBZ>(Q1D,qx,qy,QQ,J);
105template<
int T_D1D = 0,
int T_Q1D = 0>
106static void Det2DSurface(
const int NE,
114 static constexpr int SDIM = 3;
115 static constexpr int NBZ = 1;
117 const int D1D = T_D1D ? T_D1D : d1d;
118 const int Q1D = T_Q1D ? T_Q1D : q1d;
120 const auto B =
Reshape(
b, Q1D, D1D);
121 const auto G =
Reshape(g, Q1D, D1D);
123 auto Y =
Reshape(y, Q1D, Q1D, NE);
127 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
128 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
129 const int D1D = T_D1D ? T_D1D : d1d;
130 const int Q1D = T_Q1D ? T_Q1D : q1d;
131 const int tidz = MFEM_THREAD_ID(z);
133 MFEM_SHARED
real_t BG[2][MQ1*MD1];
137 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
140 MFEM_FOREACH_THREAD(dy,y,D1D)
142 MFEM_FOREACH_THREAD(dx,x,D1D)
144 for (
int d = 0; d <
SDIM; ++d)
146 XYZ[d][tidz][dx + dy*D1D] = X(dx,dy,d,e);
156 MFEM_FOREACH_THREAD(dy,y,D1D)
158 MFEM_FOREACH_THREAD(qx,x,Q1D)
160 for (
int d = 0; d <
SDIM; ++d)
164 for (
int dx = 0; dx < D1D; ++dx)
166 const real_t xval = XYZ[d][tidz][dx + dy*D1D];
167 u += xval * G_mat(dx,qx);
168 v += xval * B_mat(dx,qx);
170 DQ[d][tidz][dy + qx*D1D] =
u;
171 DQ[3 + d][tidz][dy + qx*D1D] = v;
177 MFEM_FOREACH_THREAD(qy,y,Q1D)
179 MFEM_FOREACH_THREAD(qx,x,Q1D)
181 real_t J_[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
182 for (
int d = 0; d <
SDIM; ++d)
184 for (
int dy = 0; dy < D1D; ++dy)
186 J_[d] += DQ[d][tidz][dy + qx*D1D] * B_mat(dy,qy);
187 J_[3 + d] += DQ[3 + d][tidz][dy + qx*D1D] * G_mat(dy,qy);
191 const real_t E = J(0,0)*J(0,0) + J(1,0)*J(1,0) + J(2,0)*J(2,0);
192 const real_t F = J(0,0)*J(0,1) + J(1,0)*J(1,1) + J(2,0)*J(2,1);
193 const real_t G = J(0,1)*J(0,1) + J(1,1)*J(1,1) + J(2,1)*J(2,1);
194 Y(qx,qy,e) = sqrt(E*G - F*F);
200template<
int T_D1D = 0,
int T_Q1D = 0,
bool SMEM = true>
201static void Det3D(
const int NE,
210 constexpr int DIM = 3;
211 static constexpr int GRID = SMEM ? 0 : 128;
213 const int D1D = T_D1D ? T_D1D : d1d;
214 const int Q1D = T_Q1D ? T_Q1D : q1d;
216 const auto B =
Reshape(
b, Q1D, D1D);
217 const auto G =
Reshape(g, Q1D, D1D);
218 const auto X =
Reshape(x, D1D, D1D, D1D,
DIM, NE);
219 auto Y =
Reshape(y, Q1D, Q1D, Q1D, NE);
225 const int max_q1d = T_Q1D ? T_Q1D : limits.
MAX_D1D;
226 const int max_d1d = T_D1D ? T_D1D : limits.
MAX_Q1D;
227 const int max_qd = std::max(max_q1d, max_d1d);
228 const int mem_size = max_qd * max_qd * max_qd * 9;
229 d_buff->SetSize(2*mem_size*GRID);
230 GM = d_buff->Write();
235 static constexpr int MQ1 = T_Q1D ? T_Q1D :
236 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_D1D);
237 static constexpr int MD1 = T_D1D ? T_D1D :
238 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_Q1D);
239 static constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
240 static constexpr int MSZ = MDQ * MDQ * MDQ * 9;
242 const int bid = MFEM_BLOCK_ID(x);
243 MFEM_SHARED
real_t BG[2][MQ1*MD1];
244 MFEM_SHARED
real_t SM0[SMEM?MSZ:1];
245 MFEM_SHARED
real_t SM1[SMEM?MSZ:1];
246 real_t *lm0 = SMEM ? SM0 : GM + MSZ*bid;
247 real_t *lm1 = SMEM ? SM1 : GM + MSZ*(GRID+bid);
248 real_t (*DDD)[MD1*MD1*MD1] = (
real_t (*)[MD1*MD1*MD1]) (lm0);
249 real_t (*DDQ)[MD1*MD1*MQ1] = (
real_t (*)[MD1*MD1*MQ1]) (lm1);
250 real_t (*DQQ)[MD1*MQ1*MQ1] = (
real_t (*)[MD1*MQ1*MQ1]) (lm0);
251 real_t (*QQQ)[MQ1*MQ1*MQ1] = (
real_t (*)[MQ1*MQ1*MQ1]) (lm1);
253 kernels::internal::LoadX<MD1>(e,D1D,X,DDD);
254 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
256 kernels::internal::GradX<MD1,MQ1>(D1D,Q1D,BG,DDD,DDQ);
257 kernels::internal::GradY<MD1,MQ1>(D1D,Q1D,BG,DDQ,DQQ);
258 kernels::internal::GradZ<MD1,MQ1>(D1D,Q1D,BG,DQQ,QQQ);
260 MFEM_FOREACH_THREAD(qz,z,Q1D)
262 MFEM_FOREACH_THREAD(qy,y,Q1D)
264 MFEM_FOREACH_THREAD(qx,x,Q1D)
267 kernels::internal::PullGrad<MQ1>(Q1D, qx,qy,qz, QQQ, J);
277void TensorDeterminants(
const int NE,
284 if (NE == 0) {
return; }
286 const int D1D = maps.
ndof;
287 const int Q1D = maps.
nqpt;
293 const int id = (vdim<<8) | (D1D<<4) | Q1D;
299 <<
" are not supported!");
301 "Quadrature rules with more than "
303 Det1D(NE, G, X, Y, D1D, Q1D);
310 case 0x222:
return Det2D<2,2>(NE,B,G,X,Y);
311 case 0x223:
return Det2D<2,3>(NE,B,G,X,Y);
312 case 0x224:
return Det2D<2,4>(NE,B,G,X,Y);
313 case 0x226:
return Det2D<2,6>(NE,B,G,X,Y);
314 case 0x234:
return Det2D<3,4>(NE,B,G,X,Y);
315 case 0x236:
return Det2D<3,6>(NE,B,G,X,Y);
316 case 0x244:
return Det2D<4,4>(NE,B,G,X,Y);
317 case 0x246:
return Det2D<4,6>(NE,B,G,X,Y);
318 case 0x256:
return Det2D<5,6>(NE,B,G,X,Y);
323 MFEM_VERIFY(D1D <= MD,
"Orders higher than " << MD-1
324 <<
" are not supported!");
325 MFEM_VERIFY(Q1D <= MQ,
"Quadrature rules with more than "
326 << MQ <<
" 1D points are not supported!");
327 if (vdim == 2) {
Det2D(NE,B,G,X,Y,D1D,Q1D); }
328 else if (vdim == 3) { Det2DSurface(NE,B,G,X,Y,D1D,Q1D); }
329 else { MFEM_ABORT(
"Invalid space dimension."); }
338 case 0x324:
return Det3D<2,4>(NE,B,G,X,Y);
339 case 0x333:
return Det3D<3,3>(NE,B,G,X,Y);
340 case 0x335:
return Det3D<3,5>(NE,B,G,X,Y);
341 case 0x336:
return Det3D<3,6>(NE,B,G,X,Y);
347 if (D1D <= MD && Q1D <= MQ)
348 {
return Det3D<0,0,true>(NE,B,G,X,Y,D1D,Q1D); }
350 return Det3D<0,0,false>(
351 NE,B,G,X,Y,D1D,Q1D,&d_buff);
355 MFEM_ABORT(
"Kernel " << std::hex <<
id << std::dec <<
" not supported yet");
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
A basic generic Tensor class, appropriate for use on the GPU.
Structure representing the matrices/tensors needed to evaluate (in reference space) the values,...
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Array< real_t > B
Basis functions evaluated at quadrature points.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
const class FiniteElement * FE
The FiniteElement that created and owns this object.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
int GetDim() const
Returns the reference space dimension for the finite element.
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
MFEM_HOST_DEVICE real_t Det3D(DeviceMatrix &J)
void forall_3D_grid(int N, int X, int Y, int Z, int G, lambda &&body)
MFEM_HOST_DEVICE real_t Det2D(DeviceMatrix &J)
void forall(int N, lambda &&body)
Maximum number of 1D DOFs or quadrature points for the current runtime configuration of the Device (u...
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
int MAX_DET_1D
Maximum number of points for determinant computation in QuadratureInterpolator.
int MAX_D1D
Maximum number of 1D nodal points.
int MAX_Q1D
Maximum number of 1D quadrature points.