12#ifndef MFEM_BILININTEG_DGDIFFUSION_KERNELS_HPP
13#define MFEM_BILININTEG_DGDIFFUSION_KERNELS_HPP
28template <
int T_D1D = 0,
int T_Q1D = 0>
29static void PADGDiffusionApply2D(
const int NF,
const Array<real_t> &
b,
30 const Array<real_t> &bt,
31 const Array<real_t> &g,
33 const Vector &pa_data,
const Vector &x_,
34 const Vector &dxdn_, Vector &y_, Vector &dydn_,
35 const int d1d = 0,
const int q1d = 0)
37 const int D1D = T_D1D ? T_D1D : d1d;
38 const int Q1D = T_Q1D ? T_Q1D : q1d;
42 auto B_ =
Reshape(
b.Read(), Q1D, D1D);
43 auto G_ =
Reshape(g.Read(), Q1D, D1D);
46 Reshape(pa_data.Read(), 6, Q1D, NF);
48 auto x =
Reshape(x_.Read(), D1D, 2, NF);
49 auto y =
Reshape(y_.ReadWrite(), D1D, 2, NF);
50 auto dxdn =
Reshape(dxdn_.Read(), D1D, 2, NF);
51 auto dydn =
Reshape(dydn_.ReadWrite(), D1D, 2, NF);
53 const int NBX = std::max(D1D, Q1D);
57 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
58 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
60 MFEM_SHARED
real_t u0[max_D1D];
61 MFEM_SHARED
real_t u1[max_D1D];
62 MFEM_SHARED
real_t du0[max_D1D];
63 MFEM_SHARED
real_t du1[max_D1D];
65 MFEM_SHARED
real_t Bu0[max_Q1D];
66 MFEM_SHARED
real_t Bu1[max_Q1D];
67 MFEM_SHARED
real_t Bdu0[max_Q1D];
68 MFEM_SHARED
real_t Bdu1[max_Q1D];
70 MFEM_SHARED
real_t r[max_Q1D];
72 MFEM_SHARED
real_t BG[2 * max_D1D * max_Q1D];
76 if (MFEM_THREAD_ID(y) == 0)
78 MFEM_FOREACH_THREAD(
p, x, Q1D)
80 for (
int d = 0; d < D1D; ++d)
90 MFEM_FOREACH_THREAD(side, y, 2)
92 real_t *
u = (side == 0) ? u0 : u1;
93 real_t *du = (side == 0) ? du0 : du1;
94 MFEM_FOREACH_THREAD(d, x, D1D)
97 du[d] = dxdn(d, side,
f);
103 MFEM_FOREACH_THREAD(side, y, 2)
105 real_t *
u = (side == 0) ? u0 : u1;
106 real_t *du = (side == 0) ? du0 : du1;
107 real_t *Bu = (side == 0) ? Bu0 : Bu1;
108 real_t *Bdu = (side == 0) ? Bdu0 : Bdu1;
110 MFEM_FOREACH_THREAD(
p, x, Q1D)
112 const real_t Je_side[] = {pa(2 + 2 * side,
p,
f),
113 pa(2 + 2 * side + 1,
p,
f)
119 for (
int d = 0; d < D1D; ++d)
125 Bdu[
p] += Je_side[0] *
b * du[d] + Je_side[1] * g *
u[d];
132 if (MFEM_THREAD_ID(y) == 0)
134 MFEM_FOREACH_THREAD(
p, x, Q1D)
138 const real_t jump = Bu0[
p] - Bu1[
p];
139 const real_t avg = Bdu0[
p] + Bdu1[
p];
140 r[
p] = -avg + hi * q * jump;
145 MFEM_FOREACH_THREAD(d, x, D1D)
149 for (
int p = 0;
p < Q1D; ++
p)
151 Br += B(
p, d) * r[
p];
159 MFEM_FOREACH_THREAD(side, y, 2)
161 real_t *du = (side == 0) ? du0 : du1;
162 MFEM_FOREACH_THREAD(d, x, D1D) { du[d] = 0.0; }
167 MFEM_FOREACH_THREAD(side, y, 2)
169 real_t *
const du = (side == 0) ? du0 : du1;
170 real_t *
const u = (side == 0) ? u0 : u1;
172 MFEM_FOREACH_THREAD(d, x, D1D)
174 for (
int p = 0;
p < Q1D; ++
p)
176 const real_t Je[] = {pa(2 + 2 * side,
p,
f),
177 pa(2 + 2 * side + 1,
p,
f)
179 const real_t jump = Bu0[
p] - Bu1[
p];
180 const real_t r_p = Je[0] * jump;
181 const real_t w_p = Je[1] * jump;
182 du[d] +=
sigma * B(
p, d) * r_p;
189 MFEM_FOREACH_THREAD(side, y, 2)
191 real_t *
u = (side == 0) ? u0 : u1;
192 real_t *du = (side == 0) ? du0 : du1;
193 MFEM_FOREACH_THREAD(d, x, D1D)
195 y(d, side,
f) +=
u[d];
196 dydn(d, side,
f) += du[d];
202template <
int T_D1D = 0,
int T_Q1D = 0>
203static void PADGDiffusionApply3D(
const int NF,
const Array<real_t> &
b,
204 const Array<real_t> &bt,
205 const Array<real_t> &g,
207 const Vector &pa_data,
const Vector &x_,
208 const Vector &dxdn_, Vector &y_, Vector &dydn_,
209 const int d1d = 0,
const int q1d = 0)
211 const int D1D = T_D1D ? T_D1D : d1d;
212 const int Q1D = T_Q1D ? T_Q1D : q1d;
216 auto B_ =
Reshape(
b.Read(), Q1D, D1D);
217 auto G_ =
Reshape(g.Read(), Q1D, D1D);
220 auto pa =
Reshape(pa_data.Read(), 7, Q1D, Q1D, NF);
222 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
223 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
224 auto dxdn =
Reshape(dxdn_.Read(), D1D, D1D, 2, NF);
225 auto dydn =
Reshape(dydn_.ReadWrite(), D1D, D1D, 2, NF);
227 const int NBX = std::max(D1D, Q1D);
231 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
232 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
234 MFEM_SHARED
real_t u0[max_Q1D][max_Q1D];
235 MFEM_SHARED
real_t u1[max_Q1D][max_Q1D];
237 MFEM_SHARED
real_t du0[max_Q1D][max_Q1D];
238 MFEM_SHARED
real_t du1[max_Q1D][max_Q1D];
240 MFEM_SHARED
real_t Gu0[max_Q1D][max_Q1D];
241 MFEM_SHARED
real_t Gu1[max_Q1D][max_Q1D];
243 MFEM_SHARED
real_t Bu0[max_Q1D][max_Q1D];
244 MFEM_SHARED
real_t Bu1[max_Q1D][max_Q1D];
246 MFEM_SHARED
real_t Bdu0[max_Q1D][max_Q1D];
247 MFEM_SHARED
real_t Bdu1[max_Q1D][max_Q1D];
249 MFEM_SHARED
real_t kappa_Qh[max_Q1D][max_Q1D];
251 MFEM_SHARED
real_t nJe[2][max_Q1D][max_Q1D][3];
252 MFEM_SHARED
real_t BG[2 * max_D1D * max_Q1D];
255 real_t(*Bj0)[max_Q1D] = Bu0;
256 real_t(*Bj1)[max_Q1D] = Bu1;
257 real_t(*Bjn0)[max_Q1D] = Bdu0;
258 real_t(*Bjn1)[max_Q1D] = Bdu1;
259 real_t(*Gj0)[max_Q1D] = Gu0;
260 real_t(*Gj1)[max_Q1D] = Gu1;
266 MFEM_FOREACH_THREAD(side, z, 2)
268 real_t(*
u)[max_Q1D] = (side == 0) ? u0 : u1;
269 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
271 MFEM_FOREACH_THREAD(d2, x, D1D)
273 MFEM_FOREACH_THREAD(d1, y, D1D)
275 u[d2][d1] = x(d1, d2, side,
277 du[d2][d1] = dxdn(d1, d2, side,
f);
281 MFEM_FOREACH_THREAD(p1, x, Q1D)
283 MFEM_FOREACH_THREAD(p2, y, Q1D)
285 for (
int l = 0; l < 3; ++l)
287 nJe[side][p2][p1][l] = pa(3 * side + l, p1, p2,
f);
292 kappa_Qh[p2][p1] = pa(6, p1, p2,
f);
299 MFEM_FOREACH_THREAD(
p, x, Q1D)
301 MFEM_FOREACH_THREAD(d, y, D1D)
312 MFEM_FOREACH_THREAD(side, z, 2)
314 real_t(*
u)[max_Q1D] = (side == 0) ? u0 : u1;
315 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
316 real_t(*Bu)[max_Q1D] = (side == 0) ? Bu0 : Bu1;
317 real_t(*Bdu)[max_Q1D] = (side == 0) ? Bdu0 : Bdu1;
318 real_t(*Gu)[max_Q1D] = (side == 0) ? Gu0 : Gu1;
320 MFEM_FOREACH_THREAD(p1, x, Q1D)
322 MFEM_FOREACH_THREAD(d2, y, D1D)
328 for (
int d1 = 0; d1 < D1D; ++d1)
331 const real_t g = G(p1, d1);
334 bdu +=
b * du[d2][d1];
346 MFEM_FOREACH_THREAD(side, z, 2)
348 real_t(*
u)[max_Q1D] = (side == 0) ? u0 : u1;
349 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
350 real_t(*Bu)[max_Q1D] = (side == 0) ? Bu0 : Bu1;
351 real_t(*Gu)[max_Q1D] = (side == 0) ? Gu0 : Gu1;
352 real_t(*Bdu)[max_Q1D] = (side == 0) ? Bdu0 : Bdu1;
354 MFEM_FOREACH_THREAD(p2, x, Q1D)
356 MFEM_FOREACH_THREAD(p1, y, Q1D)
358 const real_t *Je = nJe[side][p2][p1];
365 for (
int d2 = 0; d2 < D1D; ++d2)
368 const real_t g = G(p2, d2);
369 bbu +=
b * Bu[p1][d2];
370 gbu += g * Bu[p1][d2];
371 bgu +=
b * Gu[p1][d2];
372 bbdu +=
b * Bdu[p1][d2];
377 du[p2][p1] = Je[0] * bbdu + Je[1] * bgu + Je[2] * gbu;
383 MFEM_FOREACH_THREAD(side, z, 2)
385 real_t(*Bj)[max_Q1D] = (side == 0) ? Bj0 : Bj1;
386 real_t(*Bjn)[max_Q1D] = (side == 0) ? Bjn0 : Bjn1;
387 real_t(*Gj)[max_Q1D] = (side == 0) ? Gj0 : Gj1;
389 MFEM_FOREACH_THREAD(d1, x, D1D)
391 MFEM_FOREACH_THREAD(p2, y, Q1D)
398 for (
int p1 = 0; p1 < Q1D; ++p1)
401 const real_t g = G(p1, d1);
403 const real_t *Je = nJe[side][p2][p1];
405 const real_t jump = u0[p2][p1] - u1[p2][p1];
406 const real_t avg = du0[p2][p1] + du1[p2][p1];
409 const real_t r = -avg + kappa_Qh[p2][p1] * jump;
412 bj +=
b * Je[0] * jump;
413 gj += g * Je[1] * jump;
414 bjn +=
b * Je[2] * jump;
419 Bj[d1][p2] =
sigma * bj;
420 Bjn[d1][p2] =
sigma * bjn;
424 const real_t sgn = (side == 0) ? 1.0 : -1.0;
425 Gj[d1][p2] = sgn * br +
sigma * gj;
431 MFEM_FOREACH_THREAD(side, z, 2)
433 real_t(*
u)[max_Q1D] = (side == 0) ? u0 : u1;
434 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
435 real_t(*Bj)[max_Q1D] = (side == 0) ? Bj0 : Bj1;
436 real_t(*Bjn)[max_Q1D] = (side == 0) ? Bjn0 : Bjn1;
437 real_t(*Gj)[max_Q1D] = (side == 0) ? Gj0 : Gj1;
439 MFEM_FOREACH_THREAD(d2, x, D1D)
441 MFEM_FOREACH_THREAD(d1, y, D1D)
447 for (
int p2 = 0; p2 < Q1D; ++p2)
450 const real_t g = G(p2, d2);
452 bbj +=
b * Bj[d1][p2];
453 bgj +=
b * Gj[d1][p2];
454 gbj += g * Bjn[d1][p2];
458 u[d2][d1] = bgj + gbj;
465 MFEM_FOREACH_THREAD(side, z, 2)
467 const real_t(*
u)[max_Q1D] = (side == 0) ? u0 : u1;
468 const real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
470 MFEM_FOREACH_THREAD(d2, x, D1D)
472 MFEM_FOREACH_THREAD(d1, y, D1D)
474 y(d1, d2, side,
f) +=
u[d2][d1];
475 dydn(d1, d2, side,
f) += du[d2][d1];
484template <
int DIM,
int D1D,
int Q1D>
486DGDiffusionIntegrator::ApplyPAKernels::Kernel()
488 if constexpr (
DIM == 2)
490 return internal::PADGDiffusionApply2D<D1D, Q1D>;
492 else if constexpr (
DIM == 3)
494 return internal::PADGDiffusionApply3D<D1D, Q1D>;
void(*)(const int, const Array< real_t > &, const Array< real_t > &, const Array< real_t > &, const Array< real_t > &, const real_t, const Vector &, const Vector &_, const Vector &, Vector &, Vector &, const int, const int) ApplyKernelType
real_t sigma(const Vector &x)
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D(int N, int X, int Y, lambda &&body)
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
std::function< real_t(const Vector &)> f(real_t mass_coeff)
DeviceTensor< 2, real_t > DeviceMatrix
real_t p(const Vector &x, real_t t)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.