MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_dgdiffusion_kernels.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_BILININTEG_DGDIFFUSION_KERNELS_HPP
13#define MFEM_BILININTEG_DGDIFFUSION_KERNELS_HPP
14
18#include "../gridfunc.hpp"
19#include "../qfunction.hpp"
20
21/// \cond DO_NOT_DOCUMENT
22namespace mfem
23{
24
25namespace internal
26{
27
28template <int T_D1D = 0, int T_Q1D = 0>
29static void PADGDiffusionApply2D(const int NF, const Array<real_t> &b,
30 const Array<real_t> &bt,
31 const Array<real_t> &g,
32 const Array<real_t> &gt, const real_t sigma,
33 const Vector &pa_data, const Vector &x_,
34 const Vector &dxdn_, Vector &y_, Vector &dydn_,
35 const int d1d = 0, const int q1d = 0)
36{
37 const int D1D = T_D1D ? T_D1D : d1d;
38 const int Q1D = T_Q1D ? T_Q1D : q1d;
39 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
40 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
41
42 auto B_ = Reshape(b.Read(), Q1D, D1D);
43 auto G_ = Reshape(g.Read(), Q1D, D1D);
44
45 auto pa =
46 Reshape(pa_data.Read(), 6, Q1D, NF); // (q, 1/h, J00, J01, J10, J11)
47
48 auto x = Reshape(x_.Read(), D1D, 2, NF);
49 auto y = Reshape(y_.ReadWrite(), D1D, 2, NF);
50 auto dxdn = Reshape(dxdn_.Read(), D1D, 2, NF);
51 auto dydn = Reshape(dydn_.ReadWrite(), D1D, 2, NF);
52
53 const int NBX = std::max(D1D, Q1D);
54
55 mfem::forall_2D(NF, NBX, 2, [=] MFEM_HOST_DEVICE(int f) -> void
56 {
57 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
58 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
59
60 MFEM_SHARED real_t u0[max_D1D];
61 MFEM_SHARED real_t u1[max_D1D];
62 MFEM_SHARED real_t du0[max_D1D];
63 MFEM_SHARED real_t du1[max_D1D];
64
65 MFEM_SHARED real_t Bu0[max_Q1D];
66 MFEM_SHARED real_t Bu1[max_Q1D];
67 MFEM_SHARED real_t Bdu0[max_Q1D];
68 MFEM_SHARED real_t Bdu1[max_Q1D];
69
70 MFEM_SHARED real_t r[max_Q1D];
71
72 MFEM_SHARED real_t BG[2 * max_D1D * max_Q1D];
73 DeviceMatrix B(BG, Q1D, D1D);
74 DeviceMatrix G(BG + D1D * Q1D, Q1D, D1D);
75
76 if (MFEM_THREAD_ID(y) == 0)
77 {
78 MFEM_FOREACH_THREAD(p, x, Q1D)
79 {
80 for (int d = 0; d < D1D; ++d)
81 {
82 B(p, d) = B_(p, d);
83 G(p, d) = G_(p, d);
84 }
85 }
86 }
87 MFEM_SYNC_THREAD;
88
89 // copy edge values to u0, u1 and copy edge normals to du0, du1
90 MFEM_FOREACH_THREAD(side, y, 2)
91 {
92 real_t *u = (side == 0) ? u0 : u1;
93 real_t *du = (side == 0) ? du0 : du1;
94 MFEM_FOREACH_THREAD(d, x, D1D)
95 {
96 u[d] = x(d, side, f);
97 du[d] = dxdn(d, side, f);
98 }
99 }
100 MFEM_SYNC_THREAD;
101
102 // eval @ quad points
103 MFEM_FOREACH_THREAD(side, y, 2)
104 {
105 real_t *u = (side == 0) ? u0 : u1;
106 real_t *du = (side == 0) ? du0 : du1;
107 real_t *Bu = (side == 0) ? Bu0 : Bu1;
108 real_t *Bdu = (side == 0) ? Bdu0 : Bdu1;
109
110 MFEM_FOREACH_THREAD(p, x, Q1D)
111 {
112 const real_t Je_side[] = {pa(2 + 2 * side, p, f),
113 pa(2 + 2 * side + 1, p, f)
114 };
115
116 Bu[p] = 0.0;
117 Bdu[p] = 0.0;
118
119 for (int d = 0; d < D1D; ++d)
120 {
121 const real_t b = B(p, d);
122 const real_t g = G(p, d);
123
124 Bu[p] += b * u[d];
125 Bdu[p] += Je_side[0] * b * du[d] + Je_side[1] * g * u[d];
126 }
127 }
128 }
129 MFEM_SYNC_THREAD;
130
131 // term - < {Q du/dn}, [v] > + kappa * < {Q/h} [u], [v] >:
132 if (MFEM_THREAD_ID(y) == 0)
133 {
134 MFEM_FOREACH_THREAD(p, x, Q1D)
135 {
136 const real_t q = pa(0, p, f);
137 const real_t hi = pa(1, p, f);
138 const real_t jump = Bu0[p] - Bu1[p];
139 const real_t avg = Bdu0[p] + Bdu1[p]; // = {Q du/dn} * w * det(J)
140 r[p] = -avg + hi * q * jump;
141 }
142 }
143 MFEM_SYNC_THREAD;
144
145 MFEM_FOREACH_THREAD(d, x, D1D)
146 {
147 real_t Br = 0.0;
148
149 for (int p = 0; p < Q1D; ++p)
150 {
151 Br += B(p, d) * r[p];
152 }
153
154 u0[d] = Br; // overwrite u0, u1
155 u1[d] = -Br;
156 } // for d
157 MFEM_SYNC_THREAD;
158
159 MFEM_FOREACH_THREAD(side, y, 2)
160 {
161 real_t *du = (side == 0) ? du0 : du1;
162 MFEM_FOREACH_THREAD(d, x, D1D) { du[d] = 0.0; }
163 }
164 MFEM_SYNC_THREAD;
165
166 // term sigma * < [u], {Q dv/dn} >
167 MFEM_FOREACH_THREAD(side, y, 2)
168 {
169 real_t *const du = (side == 0) ? du0 : du1;
170 real_t *const u = (side == 0) ? u0 : u1;
171
172 MFEM_FOREACH_THREAD(d, x, D1D)
173 {
174 for (int p = 0; p < Q1D; ++p)
175 {
176 const real_t Je[] = {pa(2 + 2 * side, p, f),
177 pa(2 + 2 * side + 1, p, f)
178 };
179 const real_t jump = Bu0[p] - Bu1[p];
180 const real_t r_p = Je[0] * jump; // normal
181 const real_t w_p = Je[1] * jump; // tangential
182 du[d] += sigma * B(p, d) * r_p;
183 u[d] += sigma * G(p, d) * w_p;
184 }
185 }
186 }
187 MFEM_SYNC_THREAD;
188
189 MFEM_FOREACH_THREAD(side, y, 2)
190 {
191 real_t *u = (side == 0) ? u0 : u1;
192 real_t *du = (side == 0) ? du0 : du1;
193 MFEM_FOREACH_THREAD(d, x, D1D)
194 {
195 y(d, side, f) += u[d];
196 dydn(d, side, f) += du[d];
197 }
198 }
199 }); // mfem::forall
200}
201
202template <int T_D1D = 0, int T_Q1D = 0>
203static void PADGDiffusionApply3D(const int NF, const Array<real_t> &b,
204 const Array<real_t> &bt,
205 const Array<real_t> &g,
206 const Array<real_t> &gt, const real_t sigma,
207 const Vector &pa_data, const Vector &x_,
208 const Vector &dxdn_, Vector &y_, Vector &dydn_,
209 const int d1d = 0, const int q1d = 0)
210{
211 const int D1D = T_D1D ? T_D1D : d1d;
212 const int Q1D = T_Q1D ? T_Q1D : q1d;
213 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
214 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
215
216 auto B_ = Reshape(b.Read(), Q1D, D1D);
217 auto G_ = Reshape(g.Read(), Q1D, D1D);
218
219 // (J0[0], J0[1], J0[2], J1[0], J1[1], J1[2], q/h)
220 auto pa = Reshape(pa_data.Read(), 7, Q1D, Q1D, NF);
221
222 auto x = Reshape(x_.Read(), D1D, D1D, 2, NF);
223 auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
224 auto dxdn = Reshape(dxdn_.Read(), D1D, D1D, 2, NF);
225 auto dydn = Reshape(dydn_.ReadWrite(), D1D, D1D, 2, NF);
226
227 const int NBX = std::max(D1D, Q1D);
228
229 mfem::forall_3D(NF, NBX, NBX, 2, [=] MFEM_HOST_DEVICE(int f) -> void
230 {
231 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
232 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
233
234 MFEM_SHARED real_t u0[max_Q1D][max_Q1D];
235 MFEM_SHARED real_t u1[max_Q1D][max_Q1D];
236
237 MFEM_SHARED real_t du0[max_Q1D][max_Q1D];
238 MFEM_SHARED real_t du1[max_Q1D][max_Q1D];
239
240 MFEM_SHARED real_t Gu0[max_Q1D][max_Q1D];
241 MFEM_SHARED real_t Gu1[max_Q1D][max_Q1D];
242
243 MFEM_SHARED real_t Bu0[max_Q1D][max_Q1D];
244 MFEM_SHARED real_t Bu1[max_Q1D][max_Q1D];
245
246 MFEM_SHARED real_t Bdu0[max_Q1D][max_Q1D];
247 MFEM_SHARED real_t Bdu1[max_Q1D][max_Q1D];
248
249 MFEM_SHARED real_t kappa_Qh[max_Q1D][max_Q1D];
250
251 MFEM_SHARED real_t nJe[2][max_Q1D][max_Q1D][3];
252 MFEM_SHARED real_t BG[2 * max_D1D * max_Q1D];
253
254 // some buffers are reused multiple times, but for clarity have new names:
255 real_t(*Bj0)[max_Q1D] = Bu0;
256 real_t(*Bj1)[max_Q1D] = Bu1;
257 real_t(*Bjn0)[max_Q1D] = Bdu0;
258 real_t(*Bjn1)[max_Q1D] = Bdu1;
259 real_t(*Gj0)[max_Q1D] = Gu0;
260 real_t(*Gj1)[max_Q1D] = Gu1;
261
262 DeviceMatrix B(BG, Q1D, D1D);
263 DeviceMatrix G(BG + D1D * Q1D, Q1D, D1D);
264
265 // copy face values to u0, u1 and copy normals to du0, du1
266 MFEM_FOREACH_THREAD(side, z, 2)
267 {
268 real_t(*u)[max_Q1D] = (side == 0) ? u0 : u1;
269 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
270
271 MFEM_FOREACH_THREAD(d2, x, D1D)
272 {
273 MFEM_FOREACH_THREAD(d1, y, D1D)
274 {
275 u[d2][d1] = x(d1, d2, side,
276 f); // copy transposed for better memory access
277 du[d2][d1] = dxdn(d1, d2, side, f);
278 }
279 }
280
281 MFEM_FOREACH_THREAD(p1, x, Q1D)
282 {
283 MFEM_FOREACH_THREAD(p2, y, Q1D)
284 {
285 for (int l = 0; l < 3; ++l)
286 {
287 nJe[side][p2][p1][l] = pa(3 * side + l, p1, p2, f);
288 }
289
290 if (side == 0)
291 {
292 kappa_Qh[p2][p1] = pa(6, p1, p2, f);
293 }
294 }
295 }
296
297 if (side == 0)
298 {
299 MFEM_FOREACH_THREAD(p, x, Q1D)
300 {
301 MFEM_FOREACH_THREAD(d, y, D1D)
302 {
303 B(p, d) = B_(p, d);
304 G(p, d) = G_(p, d);
305 }
306 }
307 }
308 }
309 MFEM_SYNC_THREAD;
310
311 // eval u and normal derivative @ quad points
312 MFEM_FOREACH_THREAD(side, z, 2)
313 {
314 real_t(*u)[max_Q1D] = (side == 0) ? u0 : u1;
315 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
316 real_t(*Bu)[max_Q1D] = (side == 0) ? Bu0 : Bu1;
317 real_t(*Bdu)[max_Q1D] = (side == 0) ? Bdu0 : Bdu1;
318 real_t(*Gu)[max_Q1D] = (side == 0) ? Gu0 : Gu1;
319
320 MFEM_FOREACH_THREAD(p1, x, Q1D)
321 {
322 MFEM_FOREACH_THREAD(d2, y, D1D)
323 {
324 real_t bu = 0.0;
325 real_t bdu = 0.0;
326 real_t gu = 0.0;
327
328 for (int d1 = 0; d1 < D1D; ++d1)
329 {
330 const real_t b = B(p1, d1);
331 const real_t g = G(p1, d1);
332
333 bu += b * u[d2][d1];
334 bdu += b * du[d2][d1];
335 gu += g * u[d2][d1];
336 }
337
338 Bu[p1][d2] = bu;
339 Bdu[p1][d2] = bdu;
340 Gu[p1][d2] = gu;
341 }
342 }
343 }
344 MFEM_SYNC_THREAD;
345
346 MFEM_FOREACH_THREAD(side, z, 2)
347 {
348 real_t(*u)[max_Q1D] = (side == 0) ? u0 : u1;
349 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
350 real_t(*Bu)[max_Q1D] = (side == 0) ? Bu0 : Bu1;
351 real_t(*Gu)[max_Q1D] = (side == 0) ? Gu0 : Gu1;
352 real_t(*Bdu)[max_Q1D] = (side == 0) ? Bdu0 : Bdu1;
353
354 MFEM_FOREACH_THREAD(p2, x, Q1D)
355 {
356 MFEM_FOREACH_THREAD(p1, y, Q1D)
357 {
358 const real_t *Je = nJe[side][p2][p1];
359
360 real_t bbu = 0.0;
361 real_t bgu = 0.0;
362 real_t gbu = 0.0;
363 real_t bbdu = 0.0;
364
365 for (int d2 = 0; d2 < D1D; ++d2)
366 {
367 const real_t b = B(p2, d2);
368 const real_t g = G(p2, d2);
369 bbu += b * Bu[p1][d2];
370 gbu += g * Bu[p1][d2];
371 bgu += b * Gu[p1][d2];
372 bbdu += b * Bdu[p1][d2];
373 }
374
375 u[p2][p1] = bbu;
376 // du <- Q du/dn * w * det(J)
377 du[p2][p1] = Je[0] * bbdu + Je[1] * bgu + Je[2] * gbu;
378 }
379 }
380 }
381 MFEM_SYNC_THREAD;
382
383 MFEM_FOREACH_THREAD(side, z, 2)
384 {
385 real_t(*Bj)[max_Q1D] = (side == 0) ? Bj0 : Bj1;
386 real_t(*Bjn)[max_Q1D] = (side == 0) ? Bjn0 : Bjn1;
387 real_t(*Gj)[max_Q1D] = (side == 0) ? Gj0 : Gj1;
388
389 MFEM_FOREACH_THREAD(d1, x, D1D)
390 {
391 MFEM_FOREACH_THREAD(p2, y, Q1D)
392 {
393 real_t bj = 0.0;
394 real_t bjn = 0.0;
395 real_t gj = 0.0;
396 real_t br = 0.0;
397
398 for (int p1 = 0; p1 < Q1D; ++p1)
399 {
400 const real_t b = B(p1, d1);
401 const real_t g = G(p1, d1);
402
403 const real_t *Je = nJe[side][p2][p1];
404
405 const real_t jump = u0[p2][p1] - u1[p2][p1];
406 const real_t avg = du0[p2][p1] + du1[p2][p1];
407
408 // r = - < {Q du/dn}, [v] > + kappa * < {Q/h} [u], [v] >
409 const real_t r = -avg + kappa_Qh[p2][p1] * jump;
410
411 // bj, gj, bjn contribute to sigma term
412 bj += b * Je[0] * jump;
413 gj += g * Je[1] * jump;
414 bjn += b * Je[2] * jump;
415
416 br += b * r;
417 }
418
419 Bj[d1][p2] = sigma * bj;
420 Bjn[d1][p2] = sigma * bjn;
421
422 // group br and gj together since we will multiply them both by B
423 // and then sum
424 const real_t sgn = (side == 0) ? 1.0 : -1.0;
425 Gj[d1][p2] = sgn * br + sigma * gj;
426 }
427 }
428 }
429 MFEM_SYNC_THREAD;
430
431 MFEM_FOREACH_THREAD(side, z, 2)
432 {
433 real_t(*u)[max_Q1D] = (side == 0) ? u0 : u1;
434 real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
435 real_t(*Bj)[max_Q1D] = (side == 0) ? Bj0 : Bj1;
436 real_t(*Bjn)[max_Q1D] = (side == 0) ? Bjn0 : Bjn1;
437 real_t(*Gj)[max_Q1D] = (side == 0) ? Gj0 : Gj1;
438
439 MFEM_FOREACH_THREAD(d2, x, D1D)
440 {
441 MFEM_FOREACH_THREAD(d1, y, D1D)
442 {
443 real_t bbj = 0.0;
444 real_t gbj = 0.0;
445 real_t bgj = 0.0;
446
447 for (int p2 = 0; p2 < Q1D; ++p2)
448 {
449 const real_t b = B(p2, d2);
450 const real_t g = G(p2, d2);
451
452 bbj += b * Bj[d1][p2];
453 bgj += b * Gj[d1][p2];
454 gbj += g * Bjn[d1][p2];
455 }
456
457 du[d2][d1] = bbj;
458 u[d2][d1] = bgj + gbj;
459 }
460 }
461 }
462 MFEM_SYNC_THREAD;
463
464 // map back to y and dydn
465 MFEM_FOREACH_THREAD(side, z, 2)
466 {
467 const real_t(*u)[max_Q1D] = (side == 0) ? u0 : u1;
468 const real_t(*du)[max_Q1D] = (side == 0) ? du0 : du1;
469
470 MFEM_FOREACH_THREAD(d2, x, D1D)
471 {
472 MFEM_FOREACH_THREAD(d1, y, D1D)
473 {
474 y(d1, d2, side, f) += u[d2][d1];
475 dydn(d1, d2, side, f) += du[d2][d1];
476 }
477 }
478 }
479 });
480}
481
482} // namespace internal
483
484template <int DIM, int D1D, int Q1D>
486DGDiffusionIntegrator::ApplyPAKernels::Kernel()
487{
488 if constexpr (DIM == 2)
489 {
490 return internal::PADGDiffusionApply2D<D1D, Q1D>;
491 }
492 else if constexpr (DIM == 3)
493 {
494 return internal::PADGDiffusionApply3D<D1D, Q1D>;
495 }
496 MFEM_ABORT("");
497}
498} // namespace mfem
499/// \endcond DO_NOT_DOCUMENT
500#endif
void(*)(const int, const Array< real_t > &, const Array< real_t > &, const Array< real_t > &, const Array< real_t > &, const real_t, const Vector &, const Vector &_, const Vector &, Vector &, Vector &, const int, const int) ApplyKernelType
real_t sigma(const Vector &x)
Definition maxwell.cpp:91
real_t b
Definition lissajous.cpp:42
constexpr int DIM
mfem::real_t real_t
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:925
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:937
float real_t
Definition config.hpp:46
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
DeviceTensor< 2, real_t > DeviceMatrix
Definition dtensor.hpp:150
real_t p(const Vector &x, real_t t)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128