MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_dgdiffusion_pa.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
14#include "../gridfunc.hpp"
15#include "../qfunction.hpp"
17
18using namespace std;
19
20namespace mfem
21{
22
23static void PADGDiffusionSetup2D(const int Q1D,
24 const int NE,
25 const int NF,
26 const Array<real_t> &w,
27 const GeometricFactors &el_geom,
28 const FaceGeometricFactors &face_geom,
29 const FaceNeighborGeometricFactors *nbr_geom,
30 const Vector &q,
31 const real_t sigma,
32 const real_t kappa,
33 Vector &pa_data,
34 const Array<int> &face_info_)
35{
36 const auto J_loc = Reshape(el_geom.J.Read(), Q1D, Q1D, 2, 2, NE);
37 const auto detJe_loc = Reshape(el_geom.detJ.Read(), Q1D, Q1D, NE);
38
39 const int n_nbr = nbr_geom ? nbr_geom->num_neighbor_elems : 0;
40 const auto J_shared = Reshape(nbr_geom ? nbr_geom->J.Read() : nullptr,
41 Q1D, Q1D, 2, 2, n_nbr);
42 const auto detJ_shared = Reshape(nbr_geom ? nbr_geom->detJ.Read() : nullptr,
43 Q1D, Q1D, n_nbr);
44
45 const auto detJf = Reshape(face_geom.detJ.Read(), Q1D, NF);
46 const auto n = Reshape(face_geom.normal.Read(), Q1D, 2, NF);
47
48 const bool const_q = (q.Size() == 1);
49 const auto Q = const_q ? Reshape(q.Read(), 1,1) : Reshape(q.Read(), Q1D,NF);
50
51 const auto W = w.Read();
52
53 // (normal0, normal1, e0, e1, fid0, fid1)
54 const auto face_info = Reshape(face_info_.Read(), 6, NF);
55
56 // (q, 1/h, J0_0, J0_1, J1_0, J1_1)
57 auto pa = Reshape(pa_data.Write(), 6, Q1D, NF);
58
59 mfem::forall(NF, [=] MFEM_HOST_DEVICE (int f) -> void
60 {
61 const int normal_dir[] = {face_info(0, f), face_info(1, f)};
62 const int fid[] = {face_info(4, f), face_info(5, f)};
63
64 int el[] = {face_info(2, f), face_info(3, f)};
65 const bool interior = el[1] >= 0;
66 const int nsides = (interior) ? 2 : 1;
67 const real_t factor = interior ? 0.5 : 1.0;
68
69 const bool shared = el[1] >= NE;
70 el[1] = shared ? el[1] - NE : el[1];
71
72 const int sgn0 = (fid[0] == 0 || fid[0] == 1) ? 1 : -1;
73 const int sgn1 = (fid[1] == 0 || fid[1] == 1) ? 1 : -1;
74
75 for (int p = 0; p < Q1D; ++p)
76 {
77 const real_t Qp = const_q ? Q(0,0) : Q(p, f);
78 pa(0, p, f) = kappa * Qp * W[p] * detJf(p, f);
79
80 real_t hi = 0.0;
81 for (int side = 0; side < nsides; ++side)
82 {
83 int i, j;
84 internal::FaceIdxToVolIdx2D(p, Q1D, fid[0], fid[1], side, i, j);
85
86 // Always opposite direction in "native" ordering
87 // Need to multiply the native=>lex0 with native=>lex1 and negate
88 const int sgn = (side == 1) ? -1*sgn0*sgn1 : 1;
89
90 const int e = el[side];
91 const auto &J = (side == 1 && shared) ? J_shared : J_loc;
92 const auto &detJ = (side == 1 && shared) ? detJ_shared : detJe_loc;
93
94 real_t nJi[2];
95 nJi[0] = n(p,0,f)*J(i,j, 1,1, e) - n(p,1,f)*J(i,j,0,1,e);
96 nJi[1] = -n(p,0,f)*J(i,j,1,0, e) + n(p,1,f)*J(i,j,0,0,e);
97
98 const real_t dJe = detJ(i,j,e);
99 const real_t dJf = detJf(p, f);
100
101 const real_t w = factor * Qp * W[p] * dJf / dJe;
102
103 const int ni = normal_dir[side];
104 const int ti = 1 - ni;
105
106 // Normal
107 pa(2 + 2*side + 0, p, f) = w * nJi[ni];
108 // Tangential
109 pa(2 + 2*side + 1, p, f) = sgn * w * nJi[ti];
110
111 hi += factor * dJf / dJe;
112 }
113
114 if (nsides == 1)
115 {
116 pa(4, p, f) = 0.0;
117 pa(5, p, f) = 0.0;
118 }
119
120 pa(1, p, f) = hi;
121 }
122 });
123}
124
125static void PADGDiffusionSetup3D(const int Q1D,
126 const int NE,
127 const int NF,
128 const Array<real_t> &w,
129 const GeometricFactors &el_geom,
130 const FaceGeometricFactors &face_geom,
131 const FaceNeighborGeometricFactors *nbr_geom,
132 const Vector &q,
133 const real_t sigma,
134 const real_t kappa,
135 Vector &pa_data,
136 const Array<int> &face_info_)
137{
138 const auto J_loc = Reshape(el_geom.J.Read(), Q1D, Q1D, Q1D, 3, 3, NE);
139 const auto detJe_loc = Reshape(el_geom.detJ.Read(), Q1D, Q1D, Q1D, NE);
140
141 const int n_nbr = nbr_geom ? nbr_geom->num_neighbor_elems : 0;
142 const auto J_shared = Reshape(nbr_geom ? nbr_geom->J.Read() : nullptr,
143 Q1D, Q1D, Q1D, 3, 3, n_nbr);
144 const auto detJ_shared = Reshape(nbr_geom ? nbr_geom->detJ.Read() : nullptr,
145 Q1D, Q1D, Q1D, n_nbr);
146
147 const auto detJf = Reshape(face_geom.detJ.Read(), Q1D, Q1D, NF);
148 const auto n = Reshape(face_geom.normal.Read(), Q1D, Q1D, 3, NF);
149
150 const bool const_q = (q.Size() == 1);
151 const auto Q = const_q ? Reshape(q.Read(), 1, 1, 1)
152 : Reshape(q.Read(), Q1D, Q1D, NF);
153
154 const auto W = Reshape(w.Read(), Q1D, Q1D);
155
156 // (perm[0], perm[1], perm[2], element_index, local_face_id, orientation)
157 const auto face_info = Reshape(face_info_.Read(), 6, 2, NF);
158 constexpr int _el_ = 3; // offset in face_info for element index
159 constexpr int _fid_ = 4; // offset in face_info for local face id
160 constexpr int _or_ = 5; // offset in face_info for orientation
161
162 // (J00, J01, J02, J10, J11, J12, q/h)
163 const auto pa = Reshape(pa_data.Write(), 7, Q1D, Q1D, NF);
164
165 mfem::forall_2D(NF, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int f) -> void
166 {
167 MFEM_SHARED int perm[2][3];
168 MFEM_SHARED int el[2];
169 MFEM_SHARED bool shared[2];
170 MFEM_SHARED int fid[2];
171 MFEM_SHARED int ortn[2];
172
173 MFEM_FOREACH_THREAD(side, x, 2)
174 {
175 MFEM_FOREACH_THREAD(i, y, 3)
176 {
177 perm[side][i] = face_info(i, side, f);
178 }
179
180 if (MFEM_THREAD_ID(y) == 0)
181 {
182 el[side] = face_info(_el_, side, f);
183 fid[side] = face_info(_fid_, side, f);
184 ortn[side] = face_info(_or_, side, f);
185
186 // If the element index is beyond the local partition NE, then the
187 // element is a "face neighbor" element.
188 shared[side] = (el[side] >= NE);
189 el[side] = shared[side] ? el[side] - NE : el[side];
190 }
191 }
192
193 MFEM_SYNC_THREAD;
194
195 const bool interior = el[1] >= 0;
196 const int nsides = interior ? 2 : 1;
197 const real_t factor = interior ? 0.5 : 1.0;
198
199 MFEM_FOREACH_THREAD(p1, x, Q1D)
200 {
201 MFEM_FOREACH_THREAD(p2, y, Q1D)
202 {
203 const real_t Qp = const_q ? Q(0,0,0) : Q(p1, p2, f);
204 const real_t dJf = detJf(p1,p2,f);
205
206 real_t hi = 0.0;
207
208 for (int side = 0; side < nsides; ++side)
209 {
210 int i, j, k;
211 internal::FaceIdxToVolIdx3D(
212 p1 + Q1D*p2, Q1D, fid[0], fid[1], side, ortn[1], i, j, k);
213
214 const int e = el[side];
215 const auto &J = shared[side] ? J_shared : J_loc;
216 const auto &detJe = shared[side] ? detJ_shared : detJe_loc;
217
218 // *INDENT-OFF*
219 real_t nJi[3];
220 nJi[0] = ( -J(i,j,k, 1,2, e)*J(i,j,k, 2,1, e) + J(i,j,k, 1,1, e)*J(i,j,k, 2,2, e)) * n(p1,p2, 0, f)
221 + ( J(i,j,k, 0,2, e)*J(i,j,k, 2,1, e) - J(i,j,k, 0,1, e)*J(i,j,k, 2,2, e)) * n(p1,p2, 1, f)
222 + (-J(i,j,k, 0,2, e)*J(i,j,k, 1,1, e) + J(i,j,k, 0,1, e)*J(i,j,k, 1,2, e)) * n(p1,p2, 2, f);
223
224 nJi[1] = ( J(i,j,k, 1,2, e)*J(i,j,k, 2,0, e) - J(i,j,k, 1,0, e)*J(i,j,k, 2,2, e)) * n(p1,p2, 0, f)
225 + (-J(i,j,k, 0,2, e)*J(i,j,k, 2,0, e) + J(i,j,k, 0,0, e)*J(i,j,k, 2,2, e)) * n(p1,p2, 1, f)
226 + ( J(i,j,k, 0,2, e)*J(i,j,k, 1,0, e) - J(i,j,k, 0,0, e)*J(i,j,k, 1,2, e)) * n(p1,p2, 2, f);
227
228 nJi[2] = ( -J(i,j,k, 1,1, e)*J(i,j,k, 2,0, e) + J(i,j,k, 1,0, e)*J(i,j,k, 2,1, e)) * n(p1,p2, 0, f)
229 + ( J(i,j,k, 0,1, e)*J(i,j,k, 2,0, e) - J(i,j,k, 0,0, e)*J(i,j,k, 2,1, e)) * n(p1,p2, 1, f)
230 + (-J(i,j,k, 0,1, e)*J(i,j,k, 1,0, e) + J(i,j,k, 0,0, e)*J(i,j,k, 1,1, e)) * n(p1,p2, 2, f);
231 // *INDENT-ON*
232
233 const real_t dJe = detJe(i,j,k,e);
234 const real_t val = factor * Qp * W(p1, p2) * dJf / dJe;
235
236 for (int d = 0; d < 3; ++d)
237 {
238 const int idx = std::abs(perm[side][d]) - 1;
239 const int sgn = (perm[side][d] < 0) ? -1 : 1;
240 pa(3*side + d, p1, p2, f) = sgn * val * nJi[idx];
241 }
242
243 hi += factor * dJf / dJe;
244 }
245
246 if (nsides == 1)
247 {
248 pa(3, p1, p2, f) = 0.0;
249 pa(4, p1, p2, f) = 0.0;
250 pa(5, p1, p2, f) = 0.0;
251 }
252
253 pa(6, p1, p2, f) = kappa * hi * Qp * W(p1, p2) * dJf;
254 }
255 }
256 });
257}
258
259static void PADGDiffusionSetupFaceInfo2D(const int nf, const Mesh &mesh,
260 const FaceType type, Array<int> &face_info_)
261{
262 const int ne = mesh.GetNE();
263
264 int fidx = 0;
265 face_info_.SetSize(nf * 6);
266
267 // normal0 and normal1 are the indices of the face normal direction relative
268 // to the element in reference coordinates, i.e. if the face is normal to the
269 // x-vector (left or right face), then it will be 0, otherwise 1.
270
271 // 2d: (normal0, normal1, e0, e1, fid0, fid1)
272 auto face_info = Reshape(face_info_.HostWrite(), 6, nf);
273 for (int f = 0; f < mesh.GetNumFaces(); ++f)
274 {
275 auto f_info = mesh.GetFaceInformation(f);
276
277 if (f_info.IsOfFaceType(type))
278 {
279 const int face_id_1 = f_info.element[0].local_face_id;
280 face_info(0, fidx) = (face_id_1 == 1 || face_id_1 == 3) ? 0 : 1;
281 face_info(2, fidx) = f_info.element[0].index;
282 face_info(4, fidx) = face_id_1;
283
284 if (f_info.IsInterior())
285 {
286 const int face_id_2 = f_info.element[1].local_face_id;
287 face_info(1, fidx) = (face_id_2 == 1 || face_id_2 == 3) ? 0 : 1;
288 if (f_info.IsShared())
289 {
290 face_info(3, fidx) = ne + f_info.element[1].index;
291 }
292 else
293 {
294 face_info(3, fidx) = f_info.element[1].index;
295 }
296 face_info(5, fidx) = face_id_2;
297 }
298 else
299 {
300 face_info(1, fidx) = -1;
301 face_info(3, fidx) = -1;
302 face_info(5, fidx) = -1;
303 }
304
305 fidx++;
306 }
307 }
308}
309
310// Assigns to perm the permutation:
311// perm[0] <- normal component
312// perm[1] <- first tangential component
313// perm[2] <- second tangential component
314//
315// (Tangential components are ordering lexicographically).
316inline void FaceNormalPermutation(int perm[3], const int face_id)
317{
318 const bool xy_plane = (face_id == 0 || face_id == 5);
319 const bool xz_plane = (face_id == 1 || face_id == 3);
320 // const bool yz_plane = (face_id == 2 || face_id == 4);
321
322 perm[0] = (xy_plane) ? 3 : (xz_plane) ? 2 : 1;
323 perm[1] = (xy_plane || xz_plane) ? 1 : 2;
324 perm[2] = (xy_plane) ? 2 : 3;
325}
326
327// Assigns to perm the permutation as in FaceNormalPermutation for the second
328// element on the face but signed to indicate the sign of the normal derivative.
329inline void SignedFaceNormalPermutation(int perm[3],
330 const int face_id1,
331 const int face_id2,
332 const int orientation)
333{
334 FaceNormalPermutation(perm, face_id2);
335
336 // Sets perm according to the inverse of PermuteFace3D
337 if (face_id2 == 3 || face_id2 == 4)
338 {
339 perm[1] *= -1;
340 }
341 else if (face_id2 == 0)
342 {
343 perm[2] *= -1;
344 }
345
346 switch (orientation)
347 {
348 case 1:
349 std::swap(perm[1], perm[2]);
350 break;
351 case 2:
352 std::swap(perm[1], perm[2]);
353 perm[1] *= -1;
354 break;
355 case 3:
356 perm[1] *= -1;
357 break;
358 case 4:
359 perm[1] *= -1;
360 perm[2] *= -1;
361 break;
362 case 5:
363 std::swap(perm[1], perm[2]);
364 perm[1] *= -1;
365 perm[2] *= -1;
366 break;
367 case 6:
368 std::swap(perm[1], perm[2]);
369 perm[2] *= -1;
370 break;
371 case 7:
372 perm[2] *= -1;
373 break;
374 default:
375 break;
376 }
377
378 if (face_id1 == 3 || face_id1 == 4)
379 {
380 perm[1] *= -1;
381 }
382 else if (face_id1 == 0)
383 {
384 perm[2] *= -1;
385 }
386}
387
388static void PADGDiffusionSetupFaceInfo3D(const int nf, const Mesh &mesh,
389 const FaceType type, Array<int> &face_info_)
390{
391 const int ne = mesh.GetNE();
392
393 int fidx = 0;
394 // face_info array has 12 entries per face, 6 for each of the adjacent elements:
395 // (perm[0], perm[1], perm[2], element_index, local_face_id, orientation)
396 face_info_.SetSize(nf * 12);
397 constexpr int _e_ = 3; // offset for element index
398 constexpr int _fid_ = 4; // offset for local face id
399 constexpr int _or_ = 5; // offset for orientation
400
401 auto face_info = Reshape(face_info_.HostWrite(), 6, 2, nf);
402 for (int f = 0; f < mesh.GetNumFaces(); ++f)
403 {
404 auto f_info = mesh.GetFaceInformation(f);
405
406 if (f_info.IsOfFaceType(type))
407 {
408 const int fid0 = f_info.element[0].local_face_id;
409 const int or0 = f_info.element[0].orientation;
410
411 face_info( _e_, 0, fidx) = f_info.element[0].index;
412 face_info(_fid_, 0, fidx) = fid0;
413 face_info( _or_, 0, fidx) = or0;
414
415 FaceNormalPermutation(&face_info(0, 0, fidx), fid0);
416
417 if (f_info.IsInterior())
418 {
419 const int fid1 = f_info.element[1].local_face_id;
420 const int or1 = f_info.element[1].orientation;
421
422 if (f_info.IsShared())
423 {
424 face_info( _e_, 1, fidx) = ne + f_info.element[1].index;
425 }
426 else
427 {
428 face_info( _e_, 1, fidx) = f_info.element[1].index;
429 }
430 face_info(_fid_, 1, fidx) = fid1;
431 face_info( _or_, 1, fidx) = or1;
432
433 SignedFaceNormalPermutation(&face_info(0, 1, fidx), fid0, fid1, or1);
434 }
435 else
436 {
437 for (int i = 0; i < 6; ++i)
438 {
439 face_info(i, 1, fidx) = -1;
440 }
441 }
442
443 fidx++;
444 }
445 }
446}
447
448void DGDiffusionIntegrator::SetupPA(const FiniteElementSpace &fes,
449 FaceType type)
450{
451 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
453
454 const int ne = fes.GetNE();
455 nf = fes.GetNFbyType(type);
456
457 // Assumes tensor-product elements
458 Mesh &mesh = *fes.GetMesh();
459 const Geometry::Type face_geom_type = mesh.GetTypicalFaceGeometry();
460 const FiniteElement &el = *fes.GetTypicalTraceElement();
461 const int ir_order = IntRule ? IntRule->GetOrder()
462 : GetRule(el.GetOrder(), face_geom_type).GetOrder();
463 const IntegrationRule &ir = irs.Get(face_geom_type, ir_order);
464 dim = mesh.Dimension();
465 const int q1d = (ir.GetOrder() + 3)/2;
466 MFEM_ASSERT(q1d == pow(real_t(ir.Size()), 1.0/(dim - 1)), "");
467
468 const auto vol_ir = irs.Get(mesh.GetTypicalElementGeometry(), ir_order);
469 const auto geom_flags = GeometricFactors::JACOBIANS |
471 const auto el_geom = mesh.GetGeometricFactors(vol_ir, geom_flags, mt);
472
473 std::unique_ptr<FaceNeighborGeometricFactors> nbr_geom;
474 if (type == FaceType::Interior)
475 {
476 nbr_geom.reset(new FaceNeighborGeometricFactors(*el_geom));
477 }
478
479 const auto face_geom_flags = FaceGeometricFactors::DETERMINANTS |
481 auto face_geom = mesh.GetFaceGeometricFactors(ir, face_geom_flags, type, mt);
482 maps = &el.GetDofToQuad(ir, DofToQuad::TENSOR);
483 dofs1D = maps->ndof;
484 quad1D = maps->nqpt;
485
486 const int pa_size = (dim == 2) ? (6 * q1d * nf) : (7 * q1d * q1d * nf);
488
489 // Evaluate the coefficient at the face quadrature points.
490 FaceQuadratureSpace fqs(mesh, ir, type);
491 CoefficientVector q(fqs, CoefficientStorage::COMPRESSED);
492 if (Q) { q.Project(*Q); }
493 else if (MQ) { MFEM_ABORT("Not yet implemented"); /* q.Project(*MQ); */ }
494 else { q.SetConstant(1.0); }
495
496 Array<int> face_info;
497 if (dim == 1)
498 {
499 MFEM_ABORT("dim==1 not supported in PADGTraceSetup");
500 }
501 else if (dim == 2)
502 {
503 PADGDiffusionSetupFaceInfo2D(nf, mesh, type, face_info);
504 PADGDiffusionSetup2D(quad1D, ne, nf, ir.GetWeights(), *el_geom, *face_geom,
505 nbr_geom.get(), q, sigma, kappa, pa_data, face_info);
506 }
507 else if (dim == 3)
508 {
509 PADGDiffusionSetupFaceInfo3D(nf, mesh, type, face_info);
510 PADGDiffusionSetup3D(quad1D, ne, nf, ir.GetWeights(), *el_geom, *face_geom,
511 nbr_geom.get(), q, sigma, kappa, pa_data, face_info);
512 }
513}
514
520
526
527template<int T_D1D = 0, int T_Q1D = 0> static
528void PADGDiffusionApply2D(const int NF,
529 const Array<real_t> &b,
530 const Array<real_t> &bt,
531 const Array<real_t>& g,
532 const Array<real_t>& gt,
533 const real_t sigma,
534 const Vector &pa_data,
535 const Vector &x_,
536 const Vector &dxdn_,
537 Vector &y_,
538 Vector &dydn_,
539 const int d1d = 0,
540 const int q1d = 0)
541{
542 const int D1D = T_D1D ? T_D1D : d1d;
543 const int Q1D = T_Q1D ? T_Q1D : q1d;
544 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
545 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
546
547 auto B_ = Reshape(b.Read(), Q1D, D1D);
548 auto G_ = Reshape(g.Read(), Q1D, D1D);
549
550 auto pa = Reshape(pa_data.Read(), 6, Q1D, NF); // (q, 1/h, J00, J01, J10, J11)
551
552 auto x = Reshape(x_.Read(), D1D, 2, NF);
553 auto y = Reshape(y_.ReadWrite(), D1D, 2, NF);
554 auto dxdn = Reshape(dxdn_.Read(), D1D, 2, NF);
555 auto dydn = Reshape(dydn_.ReadWrite(), D1D, 2, NF);
556
557 const int NBX = std::max(D1D, Q1D);
558
559 mfem::forall_2D(NF, NBX, 2, [=] MFEM_HOST_DEVICE (int f) -> void
560 {
561 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
562 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
563
564 MFEM_SHARED real_t u0[max_D1D];
565 MFEM_SHARED real_t u1[max_D1D];
566 MFEM_SHARED real_t du0[max_D1D];
567 MFEM_SHARED real_t du1[max_D1D];
568
569 MFEM_SHARED real_t Bu0[max_Q1D];
570 MFEM_SHARED real_t Bu1[max_Q1D];
571 MFEM_SHARED real_t Bdu0[max_Q1D];
572 MFEM_SHARED real_t Bdu1[max_Q1D];
573
574 MFEM_SHARED real_t r[max_Q1D];
575
576 MFEM_SHARED real_t BG[2*max_D1D*max_Q1D];
577 DeviceMatrix B(BG, Q1D, D1D);
578 DeviceMatrix G(BG + D1D*Q1D, Q1D, D1D);
579
580 if (MFEM_THREAD_ID(y) == 0)
581 {
582 MFEM_FOREACH_THREAD(p,x,Q1D)
583 {
584 for (int d = 0; d < D1D; ++d)
585 {
586 B(p,d) = B_(p,d);
587 G(p,d) = G_(p,d);
588 }
589 }
590 }
591 MFEM_SYNC_THREAD;
592
593 // copy edge values to u0, u1 and copy edge normals to du0, du1
594 MFEM_FOREACH_THREAD(side,y,2)
595 {
596 real_t *u = (side == 0) ? u0 : u1;
597 real_t *du = (side == 0) ? du0 : du1;
598 MFEM_FOREACH_THREAD(d,x,D1D)
599 {
600 u[d] = x(d, side, f);
601 du[d] = dxdn(d, side, f);
602 }
603 }
604 MFEM_SYNC_THREAD;
605
606 // eval @ quad points
607 MFEM_FOREACH_THREAD(side,y,2)
608 {
609 real_t *u = (side == 0) ? u0 : u1;
610 real_t *du = (side == 0) ? du0 : du1;
611 real_t *Bu = (side == 0) ? Bu0 : Bu1;
612 real_t *Bdu = (side == 0) ? Bdu0 : Bdu1;
613
614 MFEM_FOREACH_THREAD(p,x,Q1D)
615 {
616 const real_t Je_side[] = {pa(2 + 2*side, p, f), pa(2 + 2*side + 1, p, f)};
617
618 Bu[p] = 0.0;
619 Bdu[p] = 0.0;
620
621 for (int d = 0; d < D1D; ++d)
622 {
623 const real_t b = B(p,d);
624 const real_t g = G(p,d);
625
626 Bu[p] += b*u[d];
627 Bdu[p] += Je_side[0] * b * du[d] + Je_side[1] * g * u[d];
628 }
629 }
630 }
631 MFEM_SYNC_THREAD;
632
633 // term - < {Q du/dn}, [v] > + kappa * < {Q/h} [u], [v] >:
634 if (MFEM_THREAD_ID(y) == 0)
635 {
636 MFEM_FOREACH_THREAD(p,x,Q1D)
637 {
638 const real_t q = pa(0, p, f);
639 const real_t hi = pa(1, p, f);
640 const real_t jump = Bu0[p] - Bu1[p];
641 const real_t avg = Bdu0[p] + Bdu1[p]; // = {Q du/dn} * w * det(J)
642 r[p] = -avg + hi * q * jump;
643 }
644 }
645 MFEM_SYNC_THREAD;
646
647 MFEM_FOREACH_THREAD(d,x,D1D)
648 {
649 real_t Br = 0.0;
650
651 for (int p = 0; p < Q1D; ++p)
652 {
653 Br += B(p, d) * r[p];
654 }
655
656 u0[d] = Br; // overwrite u0, u1
657 u1[d] = -Br;
658 } // for d
659 MFEM_SYNC_THREAD;
660
661
662 MFEM_FOREACH_THREAD(side,y,2)
663 {
664 real_t *du = (side == 0) ? du0 : du1;
665 MFEM_FOREACH_THREAD(d,x,D1D)
666 {
667 du[d] = 0.0;
668 }
669 }
670 MFEM_SYNC_THREAD;
671
672 // term sigma * < [u], {Q dv/dn} >
673 MFEM_FOREACH_THREAD(side,y,2)
674 {
675 real_t * const du = (side == 0) ? du0 : du1;
676 real_t * const u = (side == 0) ? u0 : u1;
677
678 MFEM_FOREACH_THREAD(d,x,D1D)
679 {
680 for (int p = 0; p < Q1D; ++p)
681 {
682 const real_t Je[] = {pa(2 + 2*side, p, f), pa(2 + 2*side + 1, p, f)};
683 const real_t jump = Bu0[p] - Bu1[p];
684 const real_t r_p = Je[0] * jump; // normal
685 const real_t w_p = Je[1] * jump; // tangential
686 du[d] += sigma * B(p, d) * r_p;
687 u[d] += sigma * G(p, d) * w_p;
688 }
689 }
690 }
691 MFEM_SYNC_THREAD;
692
693 MFEM_FOREACH_THREAD(side,y,2)
694 {
695 real_t *u = (side == 0) ? u0 : u1;
696 real_t *du = (side == 0) ? du0 : du1;
697 MFEM_FOREACH_THREAD(d,x,D1D)
698 {
699 y(d, side, f) += u[d];
700 dydn(d, side, f) += du[d];
701 }
702 }
703 }); // mfem::forall
704}
705
706template <int T_D1D = 0, int T_Q1D = 0>
707static void PADGDiffusionApply3D(const int NF,
708 const Array<real_t>& b,
709 const Array<real_t>& bt,
710 const Array<real_t>& g,
711 const Array<real_t>& gt,
712 const real_t sigma,
713 const Vector& pa_data,
714 const Vector& x_,
715 const Vector& dxdn_,
716 Vector& y_,
717 Vector& dydn_,
718 const int d1d = 0,
719 const int q1d = 0)
720{
721 const int D1D = T_D1D ? T_D1D : d1d;
722 const int Q1D = T_Q1D ? T_Q1D : q1d;
723 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
724 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
725
726 auto B_ = Reshape(b.Read(), Q1D, D1D);
727 auto G_ = Reshape(g.Read(), Q1D, D1D);
728
729 // (J0[0], J0[1], J0[2], J1[0], J1[1], J1[2], q/h)
730 auto pa = Reshape(pa_data.Read(), 7, Q1D, Q1D, NF);
731
732 auto x = Reshape(x_.Read(), D1D, D1D, 2, NF);
733 auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
734 auto dxdn = Reshape(dxdn_.Read(), D1D, D1D, 2, NF);
735 auto dydn = Reshape(dydn_.ReadWrite(), D1D, D1D, 2, NF);
736
737 const int NBX = std::max(D1D, Q1D);
738
739 mfem::forall_3D(NF, NBX, NBX, 2, [=] MFEM_HOST_DEVICE (int f) -> void
740 {
741 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
742 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
743
744 MFEM_SHARED real_t u0[max_Q1D][max_Q1D];
745 MFEM_SHARED real_t u1[max_Q1D][max_Q1D];
746
747 MFEM_SHARED real_t du0[max_Q1D][max_Q1D];
748 MFEM_SHARED real_t du1[max_Q1D][max_Q1D];
749
750 MFEM_SHARED real_t Gu0[max_Q1D][max_Q1D];
751 MFEM_SHARED real_t Gu1[max_Q1D][max_Q1D];
752
753 MFEM_SHARED real_t Bu0[max_Q1D][max_Q1D];
754 MFEM_SHARED real_t Bu1[max_Q1D][max_Q1D];
755
756 MFEM_SHARED real_t Bdu0[max_Q1D][max_Q1D];
757 MFEM_SHARED real_t Bdu1[max_Q1D][max_Q1D];
758
759 MFEM_SHARED real_t kappa_Qh[max_Q1D][max_Q1D];
760
761 MFEM_SHARED real_t nJe[2][max_Q1D][max_Q1D][3];
762 MFEM_SHARED real_t BG[2*max_D1D*max_Q1D];
763
764 // some buffers are reused multiple times, but for clarity have new names:
765 real_t (*Bj0)[max_Q1D] = Bu0;
766 real_t (*Bj1)[max_Q1D] = Bu1;
767 real_t (*Bjn0)[max_Q1D] = Bdu0;
768 real_t (*Bjn1)[max_Q1D] = Bdu1;
769 real_t (*Gj0)[max_Q1D] = Gu0;
770 real_t (*Gj1)[max_Q1D] = Gu1;
771
772 DeviceMatrix B(BG, Q1D, D1D);
773 DeviceMatrix G(BG + D1D*Q1D, Q1D, D1D);
774
775 // copy face values to u0, u1 and copy normals to du0, du1
776 MFEM_FOREACH_THREAD(side, z, 2)
777 {
778 real_t (*u)[max_Q1D] = (side == 0) ? u0 : u1;
779 real_t (*du)[max_Q1D] = (side == 0) ? du0 : du1;
780
781 MFEM_FOREACH_THREAD(d2, x, D1D)
782 {
783 MFEM_FOREACH_THREAD(d1, y, D1D)
784 {
785 u[d2][d1] = x(d1, d2, side, f); // copy transposed for better memory access
786 du[d2][d1] = dxdn(d1, d2, side, f);
787 }
788 }
789
790 MFEM_FOREACH_THREAD(p1, x, Q1D)
791 {
792 MFEM_FOREACH_THREAD(p2, y, Q1D)
793 {
794 for (int l=0; l < 3; ++l)
795 {
796 nJe[side][p2][p1][l] = pa(3*side + l, p1, p2, f);
797 }
798
799 if (side == 0)
800 {
801 kappa_Qh[p2][p1] = pa(6, p1, p2, f);
802 }
803 }
804 }
805
806 if (side == 0)
807 {
808 MFEM_FOREACH_THREAD(p, x, Q1D)
809 {
810 MFEM_FOREACH_THREAD(d, y, D1D)
811 {
812 B(p, d) = B_(p, d);
813 G(p, d) = G_(p, d);
814 }
815 }
816 }
817 }
818 MFEM_SYNC_THREAD;
819
820 // eval u and normal derivative @ quad points
821 MFEM_FOREACH_THREAD(side, z, 2)
822 {
823 real_t (*u)[max_Q1D] = (side == 0) ? u0 : u1;
824 real_t (*du)[max_Q1D] = (side == 0) ? du0 : du1;
825 real_t (*Bu)[max_Q1D] = (side == 0) ? Bu0 : Bu1;
826 real_t (*Bdu)[max_Q1D] = (side == 0) ? Bdu0 : Bdu1;
827 real_t (*Gu)[max_Q1D] = (side == 0) ? Gu0 : Gu1;
828
829 MFEM_FOREACH_THREAD(p1, x, Q1D)
830 {
831 MFEM_FOREACH_THREAD(d2, y, D1D)
832 {
833 real_t bu = 0.0;
834 real_t bdu = 0.0;
835 real_t gu = 0.0;
836
837 for (int d1=0; d1 < D1D; ++d1)
838 {
839 const real_t b = B(p1, d1);
840 const real_t g = G(p1, d1);
841
842 bu += b * u[d2][d1];
843 bdu += b * du[d2][d1];
844 gu += g * u[d2][d1];
845 }
846
847 Bu[p1][d2] = bu;
848 Bdu[p1][d2] = bdu;
849 Gu[p1][d2] = gu;
850 }
851 }
852 }
853 MFEM_SYNC_THREAD;
854
855 MFEM_FOREACH_THREAD(side, z, 2)
856 {
857 real_t (*u)[max_Q1D] = (side == 0) ? u0 : u1;
858 real_t (*du)[max_Q1D] = (side == 0) ? du0 : du1;
859 real_t (*Bu)[max_Q1D] = (side == 0) ? Bu0 : Bu1;
860 real_t (*Gu)[max_Q1D] = (side == 0) ? Gu0 : Gu1;
861 real_t (*Bdu)[max_Q1D] = (side == 0) ? Bdu0 : Bdu1;
862
863 MFEM_FOREACH_THREAD(p2, x, Q1D)
864 {
865 MFEM_FOREACH_THREAD(p1, y, Q1D)
866 {
867 const real_t * Je = nJe[side][p2][p1];
868
869 real_t bbu = 0.0;
870 real_t bgu = 0.0;
871 real_t gbu = 0.0;
872 real_t bbdu = 0.0;
873
874 for (int d2 = 0; d2 < D1D; ++d2)
875 {
876 const real_t b = B(p2, d2);
877 const real_t g = G(p2, d2);
878 bbu += b * Bu[p1][d2];
879 gbu += g * Bu[p1][d2];
880 bgu += b * Gu[p1][d2];
881 bbdu += b * Bdu[p1][d2];
882 }
883
884 u[p2][p1] = bbu;
885 // du <- Q du/dn * w * det(J)
886 du[p2][p1] = Je[0] * bbdu + Je[1] * bgu + Je[2] * gbu;
887 }
888 }
889 }
890 MFEM_SYNC_THREAD;
891
892 MFEM_FOREACH_THREAD(side, z, 2)
893 {
894 real_t (*Bj)[max_Q1D] = (side == 0) ? Bj0 : Bj1;
895 real_t (*Bjn)[max_Q1D] = (side == 0) ? Bjn0 : Bjn1;
896 real_t (*Gj)[max_Q1D] = (side == 0) ? Gj0 : Gj1;
897
898 MFEM_FOREACH_THREAD(d1, x, D1D)
899 {
900 MFEM_FOREACH_THREAD(p2, y, Q1D)
901 {
902 real_t bj = 0.0;
903 real_t bjn = 0.0;
904 real_t gj = 0.0;
905 real_t br = 0.0;
906
907 for (int p1 = 0; p1 < Q1D; ++p1)
908 {
909 const real_t b = B(p1, d1);
910 const real_t g = G(p1, d1);
911
912 const real_t * Je = nJe[side][p2][p1];
913
914 const real_t jump = u0[p2][p1] - u1[p2][p1];
915 const real_t avg = du0[p2][p1] + du1[p2][p1];
916
917 // r = - < {Q du/dn}, [v] > + kappa * < {Q/h} [u], [v] >
918 const real_t r = -avg + kappa_Qh[p2][p1] * jump;
919
920 // bj, gj, bjn contribute to sigma term
921 bj += b * Je[0] * jump;
922 gj += g * Je[1] * jump;
923 bjn += b * Je[2] * jump;
924
925 br += b * r;
926 }
927
928 Bj[d1][p2] = sigma * bj;
929 Bjn[d1][p2] = sigma * bjn;
930
931 // group br and gj together since we will multiply them both by B
932 // and then sum
933 const real_t sgn = (side == 0) ? 1.0 : -1.0;
934 Gj[d1][p2] = sgn * br + sigma * gj;
935 }
936 }
937 }
938 MFEM_SYNC_THREAD;
939
940 MFEM_FOREACH_THREAD(side, z, 2)
941 {
942 real_t (*u)[max_Q1D] = (side == 0) ? u0 : u1;
943 real_t (*du)[max_Q1D] = (side == 0) ? du0 : du1;
944 real_t (*Bj)[max_Q1D] = (side == 0) ? Bj0 : Bj1;
945 real_t (*Bjn)[max_Q1D] = (side == 0) ? Bjn0 : Bjn1;
946 real_t (*Gj)[max_Q1D] = (side == 0) ? Gj0 : Gj1;
947
948 MFEM_FOREACH_THREAD(d2, x, D1D)
949 {
950 MFEM_FOREACH_THREAD(d1, y, D1D)
951 {
952 real_t bbj = 0.0;
953 real_t gbj = 0.0;
954 real_t bgj = 0.0;
955
956 for (int p2 = 0; p2 < Q1D; ++p2)
957 {
958 const real_t b = B(p2, d2);
959 const real_t g = G(p2, d2);
960
961 bbj += b * Bj[d1][p2];
962 bgj += b * Gj[d1][p2];
963 gbj += g * Bjn[d1][p2];
964 }
965
966 du[d2][d1] = bbj;
967 u[d2][d1] = bgj + gbj;
968 }
969 }
970 }
971 MFEM_SYNC_THREAD;
972
973 // map back to y and dydn
974 MFEM_FOREACH_THREAD(side, z, 2)
975 {
976 const real_t (*u)[max_Q1D] = (side == 0) ? u0 : u1;
977 const real_t (*du)[max_Q1D] = (side == 0) ? du0 : du1;
978
979 MFEM_FOREACH_THREAD(d2, x, D1D)
980 {
981 MFEM_FOREACH_THREAD(d1, y, D1D)
982 {
983 y(d1, d2, side, f) += u[d2][d1];
984 dydn(d1, d2, side, f) += du[d2][d1];
985 }
986 }
987 }
988 });
989}
990
991static void PADGDiffusionApply(const int dim,
992 const int D1D,
993 const int Q1D,
994 const int NF,
995 const Array<real_t> &B,
996 const Array<real_t> &Bt,
997 const Array<real_t> &G,
998 const Array<real_t> &Gt,
999 const real_t sigma,
1000 const Vector &pa_data,
1001 const Vector &x,
1002 const Vector &dxdn,
1003 Vector &y,
1004 Vector &dydn)
1005{
1006 if (dim == 2)
1007 {
1008 auto kernel = PADGDiffusionApply2D<0,0>;
1009 switch ((D1D << 4 ) | Q1D)
1010 {
1011 case 0x23: kernel = PADGDiffusionApply2D<2,3>; break;
1012 case 0x34: kernel = PADGDiffusionApply2D<3,4>; break;
1013 case 0x45: kernel = PADGDiffusionApply2D<4,5>; break;
1014 case 0x56: kernel = PADGDiffusionApply2D<5,6>; break;
1015 case 0x67: kernel = PADGDiffusionApply2D<6,7>; break;
1016 case 0x78: kernel = PADGDiffusionApply2D<7,8>; break;
1017 case 0x89: kernel = PADGDiffusionApply2D<8,9>; break;
1018 case 0x9A: kernel = PADGDiffusionApply2D<9,10>; break;
1019 }
1020 kernel(NF, B, Bt, G, Gt, sigma, pa_data, x, dxdn, y, dydn, D1D, Q1D);
1021 }
1022 else if (dim == 3)
1023 {
1024 auto kernel = PADGDiffusionApply3D<0,0>;
1025 switch ((D1D << 4) | Q1D)
1026 {
1027 case 0x24: kernel = PADGDiffusionApply3D<2,4>; break;
1028 case 0x35: kernel = PADGDiffusionApply3D<3,5>; break;
1029 case 0x46: kernel = PADGDiffusionApply3D<4,6>; break;
1030 case 0x57: kernel = PADGDiffusionApply3D<5,7>; break;
1031 case 0x68: kernel = PADGDiffusionApply3D<6,8>; break;
1032 case 0x79: kernel = PADGDiffusionApply3D<7,9>; break;
1033 case 0x8A: kernel = PADGDiffusionApply3D<8,10>; break;
1034 case 0x9B: kernel = PADGDiffusionApply3D<9,11>; break;
1035 }
1036 kernel(NF, B, Bt, G, Gt, sigma, pa_data, x, dxdn, y, dydn, D1D, Q1D);
1037 }
1038 else
1039 {
1040 MFEM_ABORT("Unsupported dimension");
1041 }
1042}
1043
1045 const Vector &x, const Vector &dxdn, Vector &y, Vector &dydn) const
1046{
1047 PADGDiffusionApply(dim, dofs1D, quad1D, nf,
1048 maps->B, maps->Bt, maps->G, maps->Gt,
1049 sigma, pa_data, x, dxdn, y, dydn);
1050}
1051
1052} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
void AssemblePAInteriorFaces(const FiniteElementSpace &fes) override
const IntegrationRule & GetRule(int order, FaceElementTransformations &T)
void AddMultPAFaceNormalDerivatives(const Vector &x, const Vector &dxdn, Vector &y, Vector &dydn) const override
Method for partially assembled action.
void AssemblePABoundaryFaces(const FiniteElementSpace &fes) override
const DofToQuad * maps
Not owned.
A basic generic Tensor class, appropriate for use on the GPU.
Definition dtensor.hpp:82
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition device.hpp:278
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition fe_base.hpp:165
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:178
Array< real_t > Gt
Transpose of G.
Definition fe_base.hpp:221
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:182
Array< real_t > Bt
Transpose of B.
Definition fe_base.hpp:199
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
int GetOrder() const
Returns the order of the integration rule.
Definition intrules.hpp:249
const IntegrationRule & Get(int GeomType, int Order)
Returns an integration rule for given GeomType and Order.
const IntegrationRule * IntRule
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
real_t sigma(const Vector &x)
Definition maxwell.cpp:102
real_t kappa
Definition ex24.cpp:54
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:341
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
DeviceTensor< 2, real_t > DeviceMatrix
Definition dtensor.hpp:143
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void SignedFaceNormalPermutation(int perm[3], const int face_id1, const int face_id2, const int orientation)
void FaceNormalPermutation(int perm[3], const int face_id)
@ COMPRESSED
Enable all above compressions.
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:762
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
void forall(int N, lambda &&body)
Definition forall.hpp:753
FaceType
Definition mesh.hpp:48
real_t p(const Vector &x, real_t t)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128