12 #include "../quadinterpolator.hpp" 13 #include "../../general/forall.hpp" 14 #include "../../linalg/dtensor.hpp" 15 #include "../../fem/kernels.hpp" 16 #include "../../linalg/kernels.hpp" 26 namespace quadrature_interpolator
29 static void Det1D(
const int NE,
36 const auto G =
Reshape(g, q1d, d1d);
37 const auto X =
Reshape(x, d1d, NE);
43 for (
int q = 0; q < q1d; q++)
46 for (
int d = 0; d < d1d; d++)
48 u += G(q, d) * X(d, e);
55 template<
int T_D1D = 0,
int T_Q1D = 0>
56 static void Det2D(
const int NE,
65 constexpr
int DIM = 2;
66 static constexpr
int NBZ = 1;
68 const int D1D = T_D1D ? T_D1D : d1d;
69 const int Q1D = T_Q1D ? T_Q1D : q1d;
72 const auto G =
Reshape(g, Q1D, D1D);
74 auto Y =
Reshape(y, Q1D, Q1D, NE);
78 constexpr
int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
79 constexpr
int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
80 const int D1D = T_D1D ? T_D1D : d1d;
81 const int Q1D = T_Q1D ? T_Q1D : q1d;
83 MFEM_SHARED
double BG[2][MQ1*MD1];
84 MFEM_SHARED
double XY[2][NBZ][MD1*MD1];
85 MFEM_SHARED
double DQ[4][NBZ][MD1*MQ1];
86 MFEM_SHARED
double QQ[4][NBZ][MQ1*MQ1];
88 kernels::internal::LoadX<MD1,NBZ>(e,D1D,X,XY);
89 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
91 kernels::internal::GradX<MD1,MQ1,NBZ>(D1D,Q1D,BG,XY,DQ);
92 kernels::internal::GradY<MD1,MQ1,NBZ>(D1D,Q1D,BG,DQ,QQ);
94 MFEM_FOREACH_THREAD(qy,y,Q1D)
96 MFEM_FOREACH_THREAD(qx,x,Q1D)
99 kernels::internal::PullGrad<MQ1,NBZ>(Q1D,qx,qy,QQ,J);
100 Y(qx,qy,e) = kernels::Det<2>(J);
106 template<
int T_D1D = 0,
int T_Q1D = 0,
bool SMEM = true>
107 static void Det3D(
const int NE,
117 constexpr
int DIM = 3;
118 static constexpr
int GRID = SMEM ? 0 : 128;
120 const int D1D = T_D1D ? T_D1D : d1d;
121 const int Q1D = T_Q1D ? T_Q1D : q1d;
123 const auto B =
Reshape(
b, Q1D, D1D);
124 const auto G =
Reshape(g, Q1D, D1D);
125 const auto X =
Reshape(x, D1D, D1D, D1D,
DIM, NE);
126 auto Y =
Reshape(y, Q1D, Q1D, Q1D, NE);
128 double *GM =
nullptr;
132 const int max_q1d = T_Q1D ? T_Q1D : limits.
MAX_D1D;
133 const int max_d1d = T_D1D ? T_D1D : limits.
MAX_Q1D;
134 const int max_qd = std::max(max_q1d, max_d1d);
135 const int mem_size = max_qd * max_qd * max_qd * 9;
136 d_buff->SetSize(2*mem_size*GRID);
137 GM = d_buff->Write();
142 static constexpr
int MQ1 = T_Q1D ? T_Q1D :
143 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_D1D);
144 static constexpr
int MD1 = T_D1D ? T_D1D :
145 (SMEM ? DofQuadLimits::MAX_DET_1D : DofQuadLimits::MAX_Q1D);
146 static constexpr
int MDQ = MQ1 > MD1 ? MQ1 : MD1;
147 static constexpr
int MSZ = MDQ * MDQ * MDQ * 9;
149 const int bid = MFEM_BLOCK_ID(x);
150 MFEM_SHARED
double BG[2][MQ1*MD1];
151 MFEM_SHARED
double SM0[SMEM?MSZ:1];
152 MFEM_SHARED
double SM1[SMEM?MSZ:1];
153 double *lm0 = SMEM ? SM0 : GM + MSZ*bid;
154 double *lm1 = SMEM ? SM1 : GM + MSZ*(GRID+bid);
155 double (*DDD)[MD1*MD1*MD1] = (double (*)[MD1*MD1*MD1]) (lm0);
156 double (*DDQ)[MD1*MD1*MQ1] = (double (*)[MD1*MD1*MQ1]) (lm1);
157 double (*DQQ)[MD1*MQ1*MQ1] = (double (*)[MD1*MQ1*MQ1]) (lm0);
158 double (*QQQ)[MQ1*MQ1*MQ1] = (double (*)[MQ1*MQ1*MQ1]) (lm1);
160 kernels::internal::LoadX<MD1>(e,D1D,X,DDD);
161 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,B,G,BG);
163 kernels::internal::GradX<MD1,MQ1>(D1D,Q1D,BG,DDD,DDQ);
164 kernels::internal::GradY<MD1,MQ1>(D1D,Q1D,BG,DDQ,DQQ);
165 kernels::internal::GradZ<MD1,MQ1>(D1D,Q1D,BG,DQQ,QQQ);
167 MFEM_FOREACH_THREAD(qz,z,Q1D)
169 MFEM_FOREACH_THREAD(qy,y,Q1D)
171 MFEM_FOREACH_THREAD(qx,x,Q1D)
174 kernels::internal::PullGrad<MQ1>(Q1D, qx,qy,qz, QQQ, J);
175 Y(qx,qy,qz,e) = kernels::Det<3>(J);
184 void TensorDeterminants(
const int NE,
191 if (NE == 0) {
return; }
193 const int D1D = maps.
ndof;
194 const int Q1D = maps.
nqpt;
195 const double *B = maps.
B.
Read();
196 const double *G = maps.
G.
Read();
197 const double *X = e_vec.
Read();
198 double *Y = q_det.
Write();
200 const int id = (vdim<<8) | (D1D<<4) | Q1D;
206 <<
" are not supported!");
208 "Quadrature rules with more than " 210 Det1D(NE, G, X, Y, D1D, Q1D);
217 case 0x222:
return Det2D<2,2>(NE,B,G,X,Y);
218 case 0x223:
return Det2D<2,3>(NE,B,G,X,Y);
219 case 0x224:
return Det2D<2,4>(NE,B,G,X,Y);
220 case 0x226:
return Det2D<2,6>(NE,B,G,X,Y);
221 case 0x234:
return Det2D<3,4>(NE,B,G,X,Y);
222 case 0x236:
return Det2D<3,6>(NE,B,G,X,Y);
223 case 0x244:
return Det2D<4,4>(NE,B,G,X,Y);
224 case 0x246:
return Det2D<4,6>(NE,B,G,X,Y);
225 case 0x256:
return Det2D<5,6>(NE,B,G,X,Y);
230 MFEM_VERIFY(D1D <= MD,
"Orders higher than " << MD-1
231 <<
" are not supported!");
232 MFEM_VERIFY(Q1D <= MQ,
"Quadrature rules with more than " 233 << MQ <<
" 1D points are not supported!");
234 Det2D(NE,B,G,X,Y,vdim,D1D,Q1D);
243 case 0x324:
return Det3D<2,4>(NE,B,G,X,Y);
244 case 0x333:
return Det3D<3,3>(NE,B,G,X,Y);
245 case 0x335:
return Det3D<3,5>(NE,B,G,X,Y);
246 case 0x336:
return Det3D<3,6>(NE,B,G,X,Y);
252 if (D1D <= MD && Q1D <= MQ)
253 {
return Det3D<0,0,true>(NE,B,G,X,Y,vdim,D1D,Q1D); }
255 return Det3D<0,0,false>(
256 NE,B,G,X,Y,vdim,D1D,Q1D,&d_buff);
260 MFEM_ABORT(
"Kernel " << std::hex <<
id << std::dec <<
" not supported yet");
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
const class FiniteElement * FE
The FiniteElement that created and owns this object.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int MAX_Q1D
Maximum number of 1D quadrature points.
int MAX_DET_1D
Maximum number of points for determinant computation in QuadratureInterpolator.
void forall_3D_grid(int N, int X, int Y, int Z, int G, lambda &&body)
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
int MAX_D1D
Maximum number of 1D nodal points.
int GetDim() const
Returns the reference space dimension for the finite element.
void forall(int N, lambda &&body)
Maximum number of 1D DOFs or quadrature points for the current runtime configuration of the Device (u...
Structure representing the matrices/tensors needed to evaluate (in reference space) the values...
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Array< double > B
Basis functions evaluated at quadrature points.
Array< double > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
double u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.