12 #ifndef MFEM_DGMASSINV_KERNELS_HPP 13 #define MFEM_DGMASSINV_KERNELS_HPP 15 #include "../linalg/kernels.hpp" 16 #include "kernels.hpp" 17 #include "integ/bilininteg_mass_kernels.hpp" 25 template <
int DIM,
int D1D,
int Q1D>
26 MFEM_HOST_DEVICE
inline 27 void DGMassApply(
const int e,
31 const double *pa_data,
37 constexpr
bool use_smem = (D1D > 0 && Q1D > 0);
38 constexpr
bool ACCUM =
false;
39 constexpr
int NBZ = 1;
44 constexpr
int TD1D = D1D ? D1D : 1;
45 constexpr
int TQ1D = Q1D ? Q1D : 1;
48 SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);
52 SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);
56 MFEM_ABORT_KERNEL(
"Unsupported dimension.");
63 PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
67 PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
71 MFEM_ABORT_KERNEL(
"Unsupported dimension.");
76 MFEM_HOST_DEVICE
inline 77 void DGMassPreconditioner(
const int e,
88 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
89 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
91 for (
int i = tid; i < ND; i += bxy)
93 Y(i, e) = D(i, e)*X(i, e);
98 MFEM_HOST_DEVICE
inline 99 void DGMassAxpy(
const int e,
112 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
113 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
115 for (
int i = tid; i < ND; i += bxy)
117 Z(i, e) =
a*X(i, e) +
b*Y(i, e);
123 MFEM_HOST_DEVICE
inline 124 double DGMassDot(
const int e,
133 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
134 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
136 MFEM_SHARED
double s_dot[NB*NB];
139 for (
int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }
142 if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }
145 if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }
148 if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }
151 if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }
154 if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }
157 if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }
160 if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }
163 if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }
166 if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }
169 if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }
175 template<
int T_D1D = 0>
176 MFEM_HOST_DEVICE
inline 177 void DGMassBasis2D(
const int e,
184 constexpr
int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
185 const int D1D = T_D1D ? T_D1D : d1d;
187 const auto b =
Reshape(b_, D1D, D1D);
188 const auto x =
Reshape(x_, D1D, D1D, NE);
189 auto y =
Reshape(y_, D1D, D1D, NE);
191 MFEM_SHARED
double sB[MD1*MD1];
192 MFEM_SHARED
double sm0[MD1*MD1];
193 MFEM_SHARED
double sm1[MD1*MD1];
195 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,
b,sB);
202 kernels::internal::LoadX(e,D1D,x,DD);
203 kernels::internal::EvalX(D1D,D1D,B,DD,DQ);
204 kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);
206 MFEM_FOREACH_THREAD(qy,y,D1D)
208 MFEM_FOREACH_THREAD(qx,x,D1D)
210 y(qx,qy,e) = QQ(qx,qy);
216 template<
int T_D1D = 0>
217 MFEM_HOST_DEVICE
inline 218 void DGMassBasis3D(
const int e,
225 const int D1D = T_D1D ? T_D1D : d1d;
227 const auto b =
Reshape(b_, D1D, D1D);
228 const auto x =
Reshape(x_, D1D, D1D, D1D, NE);
229 auto y =
Reshape(y_, D1D, D1D, D1D, NE);
231 constexpr
int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
233 MFEM_SHARED
double sB[MD1*MD1];
234 MFEM_SHARED
double sm0[MD1*MD1*MD1];
235 MFEM_SHARED
double sm1[MD1*MD1*MD1];
237 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,
b,sB);
245 kernels::internal::LoadX(e,D1D,x,DDD);
246 kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);
247 kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);
248 kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);
250 MFEM_FOREACH_THREAD(qz,z,D1D)
252 MFEM_FOREACH_THREAD(qy,y,D1D)
254 for (
int qx = 0; qx < D1D; ++qx)
256 y(qx,qy,qz,e) = QQQ(qz,qy,qx);
263 template<
int DIM,
int T_D1D = 0>
264 MFEM_HOST_DEVICE
inline 265 void DGMassBasis(
const int e,
274 DGMassBasis2D<T_D1D>(e, NE, b_, x_, y_, d1d);
278 DGMassBasis3D<T_D1D>(e, NE, b_, x_, y_, d1d);
282 MFEM_ABORT_KERNEL(
"Dimension not supported.");
DeviceTensor< 2, const double > ConstDeviceMatrix
DeviceTensor< 2, double > DeviceMatrix
DeviceTensor< 3, double > DeviceCube
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.