12 #ifndef MFEM_DGMASSINV_KERNELS_HPP
13 #define MFEM_DGMASSINV_KERNELS_HPP
16 #include "../linalg/kernels.hpp"
17 #include "kernels.hpp"
25 void MakeReciprocal(
int n,
double *x)
27 MFEM_FORALL(i, n, x[i] = 1.0/x[i]; );
30 template <
int DIM,
int D1D,
int Q1D>
31 MFEM_HOST_DEVICE
inline
32 void DGMassApply(
const int e,
36 const double *pa_data,
42 constexpr
bool use_smem = (D1D > 0 && Q1D > 0);
43 constexpr
bool ACCUM =
false;
44 constexpr
int NBZ = 1;
49 constexpr
int TD1D = D1D ? D1D : 1;
50 constexpr
int TQ1D = Q1D ? Q1D : 1;
53 SmemPAMassApply2D_Element<TD1D,TQ1D,NBZ,ACCUM>(e, NE, B, pa_data, x, y);
57 SmemPAMassApply3D_Element<TD1D,TQ1D,ACCUM>(e, NE, B, pa_data, x, y);
61 MFEM_ABORT_KERNEL(
"Unsupported dimension.");
68 PAMassApply2D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
72 PAMassApply3D_Element<ACCUM>(e, NE, B, Bt, pa_data, x, y, d1d, q1d);
76 MFEM_ABORT_KERNEL(
"Unsupported dimension.");
81 MFEM_HOST_DEVICE
inline
82 void DGMassPreconditioner(
const int e,
93 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
94 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
96 for (
int i = tid; i < ND; i += bxy)
98 Y(i, e) = D(i, e)*X(i, e);
103 MFEM_HOST_DEVICE
inline
104 void DGMassAxpy(
const int e,
117 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
118 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
120 for (
int i = tid; i < ND; i += bxy)
122 Z(i, e) = a*X(i, e) + b*Y(i, e);
128 MFEM_HOST_DEVICE
inline
129 double DGMassDot(
const int e,
138 const int tid = MFEM_THREAD_ID(x) + MFEM_THREAD_SIZE(x)*MFEM_THREAD_ID(y);
139 const int bxy = MFEM_THREAD_SIZE(x)*MFEM_THREAD_SIZE(y);
141 MFEM_SHARED
double s_dot[NB*NB];
144 for (
int i = tid; i < ND; i += bxy) { s_dot[tid] += X(i,e)*Y(i,e); }
147 if (bxy > 512 && tid + 512 < bxy) { s_dot[tid] += s_dot[tid + 512]; }
150 if (bxy > 256 && tid < 256 && tid + 256 < bxy) { s_dot[tid] += s_dot[tid + 256]; }
153 if (bxy > 128 && tid < 128 && tid + 128 < bxy) { s_dot[tid] += s_dot[tid + 128]; }
156 if (bxy > 64 && tid < 64 && tid + 64 < bxy) { s_dot[tid] += s_dot[tid + 64]; }
159 if (bxy > 32 && tid < 32 && tid + 32 < bxy) { s_dot[tid] += s_dot[tid + 32]; }
162 if (bxy > 16 && tid < 16 && tid + 16 < bxy) { s_dot[tid] += s_dot[tid + 16]; }
165 if (bxy > 8 && tid < 8 && tid + 8 < bxy) { s_dot[tid] += s_dot[tid + 8]; }
168 if (bxy > 4 && tid < 4 && tid + 4 < bxy) { s_dot[tid] += s_dot[tid + 4]; }
171 if (bxy > 2 && tid < 2 && tid + 2 < bxy) { s_dot[tid] += s_dot[tid + 2]; }
174 if (bxy > 1 && tid < 1 && tid + 1 < bxy) { s_dot[tid] += s_dot[tid + 1]; }
180 template<
int T_D1D = 0,
int MAX_D1D = 0>
181 MFEM_HOST_DEVICE
inline
182 void DGMassBasis2D(
const int e,
189 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
190 const int D1D = T_D1D ? T_D1D : d1d;
192 const auto b =
Reshape(b_, D1D, D1D);
193 const auto x =
Reshape(x_, D1D, D1D, NE);
194 auto y =
Reshape(y_, D1D, D1D, NE);
196 MFEM_SHARED
double sB[MD1*MD1];
197 MFEM_SHARED
double sm0[MD1*MD1];
198 MFEM_SHARED
double sm1[MD1*MD1];
200 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,
b,sB);
207 kernels::internal::LoadX(e,D1D,x,DD);
208 kernels::internal::EvalX(D1D,D1D,B,DD,DQ);
209 kernels::internal::EvalY(D1D,D1D,B,DQ,QQ);
211 MFEM_FOREACH_THREAD(qy,y,D1D)
213 MFEM_FOREACH_THREAD(qx,x,D1D)
215 y(qx,qy,e) = QQ(qx,qy);
221 template<
int T_D1D = 0,
int MAX_D1D = 0>
222 MFEM_HOST_DEVICE
inline
223 void DGMassBasis3D(
const int e,
230 const int D1D = T_D1D ? T_D1D : d1d;
232 const auto b =
Reshape(b_, D1D, D1D);
233 const auto x =
Reshape(x_, D1D, D1D, D1D, NE);
234 auto y =
Reshape(y_, D1D, D1D, D1D, NE);
236 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
238 MFEM_SHARED
double sB[MD1*MD1];
239 MFEM_SHARED
double sm0[MD1*MD1*MD1];
240 MFEM_SHARED
double sm1[MD1*MD1*MD1];
242 kernels::internal::LoadB<MD1,MD1>(D1D,D1D,
b,sB);
250 kernels::internal::LoadX(e,D1D,x,DDD);
251 kernels::internal::EvalX(D1D,D1D,B,DDD,DDQ);
252 kernels::internal::EvalY(D1D,D1D,B,DDQ,DQQ);
253 kernels::internal::EvalZ(D1D,D1D,B,DQQ,QQQ);
255 MFEM_FOREACH_THREAD(qz,z,D1D)
257 MFEM_FOREACH_THREAD(qy,y,D1D)
259 for (
int qx = 0; qx < D1D; ++qx)
261 y(qx,qy,qz,e) = QQQ(qz,qy,qx);
268 template<
int DIM,
int T_D1D = 0,
int MAX_D1D = 0>
269 MFEM_HOST_DEVICE
inline
270 void DGMassBasis(
const int e,
279 DGMassBasis2D<T_D1D, MAX_D1D>(e, NE, b_, x_, y_, d1d);
283 DGMassBasis3D<T_D1D, MAX_D1D>(e, NE, b_, x_, y_, d1d);
287 MFEM_ABORT_KERNEL(
"Dimension not supported.");
DeviceTensor< 2, const double > ConstDeviceMatrix
DeviceTensor< 2, double > DeviceMatrix
DeviceTensor< 3, double > DeviceCube
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.