12#ifndef MFEM_LININTEG_DOMAIN_KERNELS_HPP
13#define MFEM_LININTEG_DOMAIN_KERNELS_HPP
24template <
int T_D1D = 0,
int T_Q1D = 0>
25static void DLFEvalAssemble1D(
const int vdim,
const int ne,
const int d,
26 const int q,
const int map_type,
27 const int *markers,
const real_t *
b,
29 const Vector &coeff,
real_t *y)
32 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
33 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
34 MFEM_VERIFY(q <= Q,
"");
35 MFEM_VERIFY(d <= D,
"");
38 const auto F = coeff.Read();
40 const auto DETJ =
Reshape(detj, q, ne);
41 const bool cst = coeff.Size() == vdim;
42 const auto C = cst ?
Reshape(F, vdim, 1, 1) :
Reshape(F, vdim, q, ne);
43 auto Y =
Reshape(y, d, vdim, ne);
52 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
53 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
55 MFEM_SHARED
real_t sBt[Q * D];
57 kernels::internal::LoadB<D, Q>(d, q, B, sBt);
59 for (
int c = 0; c < vdim; ++c)
61 const real_t cst_val = C(c, 0, 0);
62 MFEM_FOREACH_THREAD(dx, x, d)
65 for (
int qx = 0; qx < q; ++qx)
69 const real_t coeff_val = cst ? cst_val : C(c, qx, e);
70 u += weights[qx] * coeff_val * detJ * Bt(dx, qx);
78template <
int T_D1D = 0,
int T_Q1D = 0>
79static void DLFEvalAssemble2D(
const int vdim,
const int ne,
const int d,
80 const int q,
const int map_type,
81 const int *markers,
const real_t *
b,
83 const Vector &coeff,
real_t *y)
86 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
87 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
88 MFEM_VERIFY(q <= Q,
"");
89 MFEM_VERIFY(d <= D,
"");
92 const auto F = coeff.Read();
94 const auto DETJ =
Reshape(detj, q, q, ne);
95 const auto W =
Reshape(weights, q, q);
96 const bool cst = coeff.Size() == vdim;
97 const auto C = cst ?
Reshape(F, vdim, 1, 1, 1) :
Reshape(F, vdim, q, q, ne);
98 auto Y =
Reshape(y, d, d, vdim, ne);
107 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
108 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
110 MFEM_SHARED
real_t sBt[Q * D];
111 MFEM_SHARED
real_t sQQ[Q * Q];
112 MFEM_SHARED
real_t sQD[Q * D];
115 kernels::internal::LoadB<D, Q>(d, q, B, sBt);
120 for (
int c = 0; c < vdim; ++c)
122 const real_t cst_val = C(c, 0, 0, 0);
123 MFEM_FOREACH_THREAD(x, x, q)
125 MFEM_FOREACH_THREAD(y, y, q)
129 const real_t coeff_val = cst ? cst_val : C(c, x, y, e);
130 QQ(y, x) = W(x, y) * coeff_val * detJ;
134 MFEM_FOREACH_THREAD(qy, y, q)
136 MFEM_FOREACH_THREAD(dx, x, d)
139 for (
int qx = 0; qx < q; ++qx)
141 u += QQ(qy, qx) * Bt(dx, qx);
147 MFEM_FOREACH_THREAD(dy, y, d)
149 MFEM_FOREACH_THREAD(dx, x, d)
152 for (
int qy = 0; qy < q; ++qy)
154 u += QD(qy, dx) * Bt(dy, qy);
156 Y(dx, dy, c, e) +=
u;
164template <
int T_D1D = 0,
int T_Q1D = 0>
165static void DLFEvalAssemble3D(
const int vdim,
const int ne,
const int d,
166 const int q,
const int map_type,
167 const int* markers,
const real_t *
b,
169 const Vector &coeff,
real_t *y)
172 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
173 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
174 MFEM_VERIFY(q <= Q,
"");
175 MFEM_VERIFY(d <= D,
"");
178 const auto F = coeff.Read();
180 const auto DETJ =
Reshape(detj, q, q, q, ne);
181 const auto W =
Reshape(weights, q, q, q);
182 const bool cst_coeff = coeff.Size() == vdim;
184 cst_coeff ?
Reshape(F, vdim, 1, 1, 1, 1) :
Reshape(F, vdim, q, q, q, ne);
186 auto Y =
Reshape(y, d, d, d, vdim, ne);
195 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
196 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
197 constexpr int MQD = (Q >= D) ? Q : D;
201 MFEM_SHARED
real_t sBt[Q * D];
203 kernels::internal::LoadB<D, Q>(d, q, B, sBt);
205 MFEM_SHARED
real_t sQQQ[MQD * MQD * MQD];
208 for (
int c = 0; c < vdim; ++c)
210 const real_t cst_val = C(c, 0, 0, 0, 0);
211 MFEM_FOREACH_THREAD(x, x, q)
213 MFEM_FOREACH_THREAD(y, y, q)
215 for (
int z = 0; z < q; ++z)
221 cst_coeff ? cst_val : C(c, x, y, z, e);
222 QQQ(z, y, x) = W(x, y, z) * coeff_val * detJ;
227 MFEM_FOREACH_THREAD(qx, x, q)
229 MFEM_FOREACH_THREAD(qy, y, q)
231 for (
int dz = 0; dz < d; ++dz)
235 for (
int qz = 0; qz < q; ++qz)
237 const real_t ZYX = QQQ(qz, qy, qx);
238 for (
int dz = 0; dz < d; ++dz)
240 u[dz] += ZYX * Bt(dz, qz);
243 for (
int dz = 0; dz < d; ++dz)
245 QQQ(dz, qy, qx) =
u[dz];
250 MFEM_FOREACH_THREAD(dz, y, d)
252 MFEM_FOREACH_THREAD(qx, x, q)
254 for (
int dy = 0; dy < d; ++dy)
258 for (
int qy = 0; qy < q; ++qy)
260 const real_t zYX = QQQ(dz, qy, qx);
261 for (
int dy = 0; dy < d; ++dy)
263 u[dy] += zYX * Bt(dy, qy);
266 for (
int dy = 0; dy < d; ++dy)
268 QQQ(dz, dy, qx) =
u[dy];
273 MFEM_FOREACH_THREAD(dz, y, d)
275 MFEM_FOREACH_THREAD(dy, x, d)
277 for (
int dx = 0; dx < d; ++dx)
281 for (
int qx = 0; qx < q; ++qx)
283 const real_t zyX = QQQ(dz, dy, qx);
284 for (
int dx = 0; dx < d; ++dx)
286 u[dx] += zyX * Bt(dx, qx);
289 for (
int dx = 0; dx < d; ++dx)
291 Y(dx, dy, dz, c, e) +=
u[dx];
300template <
int DIM,
int T_D1D,
int T_Q1D>
302DomainLFIntegrator::AssembleKernels::Kernel()
307 return DLFEvalAssemble1D<T_D1D, T_Q1D>;
309 return DLFEvalAssemble2D<T_D1D, T_Q1D>;
311 return DLFEvalAssemble3D<T_D1D, T_Q1D>;
void(*)(const int, const int, const int, const int, const int, const int *, const real_t *, const real_t *, const real_t *, const Vector &coeff, real_t *y) AssembleKernelType
args: vdim, ne, d1d, q1d, map_type, markers, B, detJ, W, coeff, y
DeviceTensor< 3, real_t > DeviceCube
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D(int N, int X, int Y, lambda &&body)
DeviceTensor< 2, real_t > DeviceMatrix