22static void PAConvectionSetup2D(
const int NQ,
24 const Array<real_t> &w,
30 constexpr int DIM = 2;
32 const bool const_v =
vel.Size() ==
DIM;
34 const auto W = w.Read();
36 const auto V = const_v ?
43 const int e = q_global / NQ;
44 const int q = q_global % NQ;
45 const real_t J11 = J(q,0,0,e);
46 const real_t J21 = J(q,1,0,e);
47 const real_t J12 = J(q,0,1,e);
48 const real_t J22 = J(q,1,1,e);
50 const real_t v0 = const_v ? V(0,0,0) : V(0,q,e);
51 const real_t v1 = const_v ? V(1,0,0) : V(1,q,e);
55 y(q,0,e) = wx * J22 - wy * J12;
56 y(q,1,e) = -wx * J21 + wy * J11;
61static void PAConvectionSetup3D(
const int NQ,
63 const Array<real_t> &w,
69 constexpr int DIM = 3;
71 const auto W =
Reshape(w.Read(), NQ);
73 const bool const_v =
vel.Size() ==
DIM;
74 const auto V = const_v ?
77 auto y =
Reshape(op.Write(), NQ,3,NE);
80 const int e = q_global / NQ;
81 const int q = q_global % NQ;
82 const real_t J11 = J(q,0,0,e);
83 const real_t J12 = J(q,0,1,e);
84 const real_t J13 = J(q,0,2,e);
85 const real_t J21 = J(q,1,0,e);
86 const real_t J22 = J(q,1,1,e);
87 const real_t J23 = J(q,1,2,e);
88 const real_t J31 = J(q,2,0,e);
89 const real_t J32 = J(q,2,1,e);
90 const real_t J33 = J(q,2,2,e);
92 const real_t v0 = const_v ? V(0,0,0) : V(0,q,e);
93 const real_t v1 = const_v ? V(1,0,0) : V(1,q,e);
94 const real_t v2 = const_v ? V(2,0,0) : V(2,q,e);
99 const real_t A11 = (J22 * J33) - (J23 * J32);
100 const real_t A12 = (J32 * J13) - (J12 * J33);
101 const real_t A13 = (J12 * J23) - (J22 * J13);
102 const real_t A21 = (J31 * J23) - (J21 * J33);
103 const real_t A22 = (J11 * J33) - (J13 * J31);
104 const real_t A23 = (J21 * J13) - (J11 * J23);
105 const real_t A31 = (J21 * J32) - (J31 * J22);
106 const real_t A32 = (J31 * J12) - (J11 * J32);
107 const real_t A33 = (J11 * J22) - (J12 * J21);
109 y(q,0,e) = wx * A11 + wy * A12 + wz * A13;
110 y(q,1,e) = wx * A21 + wy * A22 + wz * A23;
111 y(q,2,e) = wx * A31 + wy * A32 + wz * A33;
115static void PAConvectionSetup(
const int dim,
118 const Array<real_t> &W,
124 if (
dim == 1) { MFEM_ABORT(
"dim==1 not supported in PAConvectionSetup"); }
127 PAConvectionSetup2D(NQ, NE, W, J, coeff,
alpha, op);
131 PAConvectionSetup3D(NQ, NE, W, J, coeff,
alpha, op);
159 const int dims = el.
GetDim();
160 const int symmDims = dims;
185 MFEM_ABORT(
"AssembleDiagonalPA not yet implemented for"
186 " ConvectionIntegrator.");
191template<
int T_D1D = 0,
int T_Q1D = 0>
static
192void PAConvectionApply2D(
const int ne,
204 const int D1D = T_D1D ? T_D1D : d1d;
205 const int Q1D = T_Q1D ? T_Q1D : q1d;
208 auto B =
Reshape(
b.Read(), Q1D, D1D);
216 const int D1D = T_D1D ? T_D1D : d1d;
217 const int Q1D = T_Q1D ? T_Q1D : q1d;
219 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
220 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
223 for (
int dy = 0; dy < D1D; ++dy)
225 for (
int dx = 0; dx < D1D; ++dx)
227 u[dy][dx] = x(dx,dy,e);
230 real_t Bu[max_D1D][max_Q1D];
231 real_t Gu[max_D1D][max_Q1D];
232 for (
int dy = 0; dy < D1D; ++dy)
234 for (
int qx = 0; qx < Q1D; ++qx)
238 for (
int dx = 0; dx < D1D; ++dx)
240 const real_t bx = B(qx,dx);
241 const real_t gx = G(qx,dx);
243 Bu[dy][qx] += bx * x;
244 Gu[dy][qx] += gx * x;
248 real_t GBu[max_Q1D][max_Q1D];
249 real_t BGu[max_Q1D][max_Q1D];
250 for (
int qx = 0; qx < Q1D; ++qx)
252 for (
int qy = 0; qy < Q1D; ++qy)
256 for (
int dy = 0; dy < D1D; ++dy)
258 const real_t bx = B(qy,dy);
259 const real_t gx = G(qy,dy);
260 GBu[qy][qx] += gx * Bu[dy][qx];
261 BGu[qy][qx] += bx * Gu[dy][qx];
266 real_t DGu[max_Q1D][max_Q1D];
267 for (
int qy = 0; qy < Q1D; ++qy)
269 for (
int qx = 0; qx < Q1D; ++qx)
271 const real_t O1 = op(qx,qy,0,e);
272 const real_t O2 = op(qx,qy,1,e);
274 const real_t gradX = BGu[qy][qx];
275 const real_t gradY = GBu[qy][qx];
277 DGu[qy][qx] = (O1 * gradX) + (O2 * gradY);
280 real_t BDGu[max_D1D][max_Q1D];
281 for (
int qx = 0; qx < Q1D; ++qx)
283 for (
int dy = 0; dy < D1D; ++dy)
286 for (
int qy = 0; qy < Q1D; ++qy)
288 const real_t w = Bt(dy,qy);
289 BDGu[dy][qx] += w * DGu[qy][qx];
293 for (
int dx = 0; dx < D1D; ++dx)
295 for (
int dy = 0; dy < D1D; ++dy)
298 for (
int qx = 0; qx < Q1D; ++qx)
300 const real_t w = Bt(dx,qx);
301 BBDGu += w * BDGu[dy][qx];
310template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
311void SmemPAConvectionApply2D(
const int ne,
312 const Array<real_t> &
b,
313 const Array<real_t> &g,
314 const Array<real_t> &bt,
315 const Array<real_t> >,
323 const int D1D = T_D1D ? T_D1D : d1d;
324 const int Q1D = T_Q1D ? T_Q1D : q1d;
325 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
328 auto B =
Reshape(
b.Read(), Q1D, D1D);
329 auto G =
Reshape(g.Read(), Q1D, D1D);
330 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
331 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, NE);
332 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
333 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
336 const int tidz = MFEM_THREAD_ID(z);
337 const int D1D = T_D1D ? T_D1D : d1d;
338 const int Q1D = T_Q1D ? T_Q1D : q1d;
340 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
341 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
342 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
344 MFEM_SHARED
real_t u[NBZ][max_D1D][max_D1D];
345 MFEM_FOREACH_THREAD(dy,y,D1D)
347 MFEM_FOREACH_THREAD(dx,x,D1D)
350 u[tidz][dy][dx] = x(dx,dy,e);
354 MFEM_SHARED
real_t Bu[NBZ][max_D1D][max_Q1D];
355 MFEM_SHARED
real_t Gu[NBZ][max_D1D][max_Q1D];
356 MFEM_FOREACH_THREAD(dy,y,D1D)
358 MFEM_FOREACH_THREAD(qx,x,Q1D)
360 Bu[tidz][dy][qx] = 0.0;
361 Gu[tidz][dy][qx] = 0.0;
362 for (
int dx = 0; dx < D1D; ++dx)
364 const real_t bx = B(qx,dx);
365 const real_t gx = G(qx,dx);
366 const real_t x =
u[tidz][dy][dx];
367 Bu[tidz][dy][qx] += bx * x;
368 Gu[tidz][dy][qx] += gx * x;
373 MFEM_SHARED
real_t GBu[NBZ][max_Q1D][max_Q1D];
374 MFEM_SHARED
real_t BGu[NBZ][max_Q1D][max_Q1D];
375 MFEM_FOREACH_THREAD(qx,x,Q1D)
377 MFEM_FOREACH_THREAD(qy,y,Q1D)
379 GBu[tidz][qy][qx] = 0.0;
380 BGu[tidz][qy][qx] = 0.0;
381 for (
int dy = 0; dy < D1D; ++dy)
383 const real_t bx = B(qy,dy);
384 const real_t gx = G(qy,dy);
385 GBu[tidz][qy][qx] += gx * Bu[tidz][dy][qx];
386 BGu[tidz][qy][qx] += bx * Gu[tidz][dy][qx];
392 MFEM_SHARED
real_t DGu[NBZ][max_Q1D][max_Q1D];
393 MFEM_FOREACH_THREAD(qy,y,Q1D)
395 MFEM_FOREACH_THREAD(qx,x,Q1D)
397 const real_t O1 = op(qx,qy,0,e);
398 const real_t O2 = op(qx,qy,1,e);
400 const real_t gradX = BGu[tidz][qy][qx];
401 const real_t gradY = GBu[tidz][qy][qx];
403 DGu[tidz][qy][qx] = (O1 * gradX) + (O2 * gradY);
407 MFEM_SHARED
real_t BDGu[NBZ][max_D1D][max_Q1D];
408 MFEM_FOREACH_THREAD(qx,x,Q1D)
410 MFEM_FOREACH_THREAD(dy,y,D1D)
412 BDGu[tidz][dy][qx] = 0.0;
413 for (
int qy = 0; qy < Q1D; ++qy)
415 const real_t w = Bt(dy,qy);
416 BDGu[tidz][dy][qx] += w * DGu[tidz][qy][qx];
421 MFEM_FOREACH_THREAD(dx,x,D1D)
423 MFEM_FOREACH_THREAD(dy,y,D1D)
426 for (
int qx = 0; qx < Q1D; ++qx)
428 const real_t w = Bt(dx,qx);
429 BBDGu += w * BDGu[tidz][dy][qx];
438template<
int T_D1D = 0,
int T_Q1D = 0>
static
439void PAConvectionApply3D(
const int ne,
440 const Array<real_t> &
b,
441 const Array<real_t> &g,
442 const Array<real_t> &bt,
443 const Array<real_t> >,
451 const int D1D = T_D1D ? T_D1D : d1d;
452 const int Q1D = T_Q1D ? T_Q1D : q1d;
455 auto B =
Reshape(
b.Read(), Q1D, D1D);
456 auto G =
Reshape(g.Read(), Q1D, D1D);
457 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
458 auto op =
Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
459 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
460 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
463 const int D1D = T_D1D ? T_D1D : d1d;
464 const int Q1D = T_Q1D ? T_Q1D : q1d;
466 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
467 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
469 real_t u[max_D1D][max_D1D][max_D1D];
470 for (
int dz = 0; dz < D1D; ++dz)
472 for (
int dy = 0; dy < D1D; ++dy)
474 for (
int dx = 0; dx < D1D; ++dx)
476 u[dz][dy][dx] = x(dx,dy,dz,e);
480 real_t Bu[max_D1D][max_D1D][max_Q1D];
481 real_t Gu[max_D1D][max_D1D][max_Q1D];
482 for (
int dz = 0; dz < D1D; ++dz)
484 for (
int dy = 0; dy < D1D; ++dy)
486 for (
int qx = 0; qx < Q1D; ++qx)
488 Bu[dz][dy][qx] = 0.0;
489 Gu[dz][dy][qx] = 0.0;
490 for (
int dx = 0; dx < D1D; ++dx)
492 const real_t bx = B(qx,dx);
493 const real_t gx = G(qx,dx);
494 const real_t x =
u[dz][dy][dx];
495 Bu[dz][dy][qx] += bx * x;
496 Gu[dz][dy][qx] += gx * x;
501 real_t BBu[max_D1D][max_Q1D][max_Q1D];
502 real_t GBu[max_D1D][max_Q1D][max_Q1D];
503 real_t BGu[max_D1D][max_Q1D][max_Q1D];
504 for (
int dz = 0; dz < D1D; ++dz)
506 for (
int qx = 0; qx < Q1D; ++qx)
508 for (
int qy = 0; qy < Q1D; ++qy)
510 BBu[dz][qy][qx] = 0.0;
511 GBu[dz][qy][qx] = 0.0;
512 BGu[dz][qy][qx] = 0.0;
513 for (
int dy = 0; dy < D1D; ++dy)
515 const real_t bx = B(qy,dy);
516 const real_t gx = G(qy,dy);
517 BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
518 GBu[dz][qy][qx] += gx * Bu[dz][dy][qx];
519 BGu[dz][qy][qx] += bx * Gu[dz][dy][qx];
524 real_t GBBu[max_Q1D][max_Q1D][max_Q1D];
525 real_t BGBu[max_Q1D][max_Q1D][max_Q1D];
526 real_t BBGu[max_Q1D][max_Q1D][max_Q1D];
527 for (
int qx = 0; qx < Q1D; ++qx)
529 for (
int qy = 0; qy < Q1D; ++qy)
531 for (
int qz = 0; qz < Q1D; ++qz)
533 GBBu[qz][qy][qx] = 0.0;
534 BGBu[qz][qy][qx] = 0.0;
535 BBGu[qz][qy][qx] = 0.0;
536 for (
int dz = 0; dz < D1D; ++dz)
538 const real_t bx = B(qz,dz);
539 const real_t gx = G(qz,dz);
540 GBBu[qz][qy][qx] += gx * BBu[dz][qy][qx];
541 BGBu[qz][qy][qx] += bx * GBu[dz][qy][qx];
542 BBGu[qz][qy][qx] += bx * BGu[dz][qy][qx];
548 real_t DGu[max_Q1D][max_Q1D][max_Q1D];
549 for (
int qz = 0; qz < Q1D; ++qz)
551 for (
int qy = 0; qy < Q1D; ++qy)
553 for (
int qx = 0; qx < Q1D; ++qx)
555 const real_t O1 = op(qx,qy,qz,0,e);
556 const real_t O2 = op(qx,qy,qz,1,e);
557 const real_t O3 = op(qx,qy,qz,2,e);
559 const real_t gradX = BBGu[qz][qy][qx];
560 const real_t gradY = BGBu[qz][qy][qx];
561 const real_t gradZ = GBBu[qz][qy][qx];
563 DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ);
567 real_t BDGu[max_D1D][max_Q1D][max_Q1D];
568 for (
int qx = 0; qx < Q1D; ++qx)
570 for (
int qy = 0; qy < Q1D; ++qy)
572 for (
int dz = 0; dz < D1D; ++dz)
574 BDGu[dz][qy][qx] = 0.0;
575 for (
int qz = 0; qz < Q1D; ++qz)
577 const real_t w = Bt(dz,qz);
578 BDGu[dz][qy][qx] += w * DGu[qz][qy][qx];
583 real_t BBDGu[max_D1D][max_D1D][max_Q1D];
584 for (
int dz = 0; dz < D1D; ++dz)
586 for (
int qx = 0; qx < Q1D; ++qx)
588 for (
int dy = 0; dy < D1D; ++dy)
590 BBDGu[dz][dy][qx] = 0.0;
591 for (
int qy = 0; qy < Q1D; ++qy)
593 const real_t w = Bt(dy,qy);
594 BBDGu[dz][dy][qx] += w * BDGu[dz][qy][qx];
599 for (
int dz = 0; dz < D1D; ++dz)
601 for (
int dy = 0; dy < D1D; ++dy)
603 for (
int dx = 0; dx < D1D; ++dx)
606 for (
int qx = 0; qx < Q1D; ++qx)
608 const real_t w = Bt(dx,qx);
609 BBBDGu += w * BBDGu[dz][dy][qx];
611 y(dx,dy,dz,e) += BBBDGu;
619template<
int T_D1D = 0,
int T_Q1D = 0>
static
620void SmemPAConvectionApply3D(
const int ne,
621 const Array<real_t> &
b,
622 const Array<real_t> &g,
623 const Array<real_t> &bt,
624 const Array<real_t> >,
632 const int D1D = T_D1D ? T_D1D : d1d;
633 const int Q1D = T_Q1D ? T_Q1D : q1d;
636 auto B =
Reshape(
b.Read(), Q1D, D1D);
637 auto G =
Reshape(g.Read(), Q1D, D1D);
638 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
639 auto op =
Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
640 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
641 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
644 const int D1D = T_D1D ? T_D1D : d1d;
645 const int Q1D = T_Q1D ? T_Q1D : q1d;
647 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
648 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
649 constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
650 MFEM_SHARED
real_t sm0[max_DQ*max_DQ*max_DQ];
651 MFEM_SHARED
real_t sm1[max_DQ*max_DQ*max_DQ];
652 MFEM_SHARED
real_t sm2[max_DQ*max_DQ*max_DQ];
653 MFEM_SHARED
real_t sm3[max_DQ*max_DQ*max_DQ];
654 MFEM_SHARED
real_t sm4[max_DQ*max_DQ*max_DQ];
655 MFEM_SHARED
real_t sm5[max_DQ*max_DQ*max_DQ];
657 real_t (*
u)[max_D1D][max_D1D] = (
real_t (*)[max_D1D][max_D1D]) sm0;
658 MFEM_FOREACH_THREAD(dz,z,D1D)
660 MFEM_FOREACH_THREAD(dy,y,D1D)
662 MFEM_FOREACH_THREAD(dx,x,D1D)
664 u[dz][dy][dx] = x(dx,dy,dz,e);
669 real_t (*Bu)[max_D1D][max_Q1D] = (
real_t (*)[max_D1D][max_Q1D])sm1;
670 real_t (*Gu)[max_D1D][max_Q1D] = (
real_t (*)[max_D1D][max_Q1D])sm2;
671 MFEM_FOREACH_THREAD(dz,z,D1D)
673 MFEM_FOREACH_THREAD(dy,y,D1D)
675 MFEM_FOREACH_THREAD(qx,x,Q1D)
679 for (
int dx = 0; dx < D1D; ++dx)
681 const real_t bx = B(qx,dx);
682 const real_t gx = G(qx,dx);
683 const real_t x =
u[dz][dy][dx];
687 Bu[dz][dy][qx] = Bu_;
688 Gu[dz][dy][qx] = Gu_;
693 real_t (*BBu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm3;
694 real_t (*GBu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm4;
695 real_t (*BGu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm5;
696 MFEM_FOREACH_THREAD(dz,z,D1D)
698 MFEM_FOREACH_THREAD(qx,x,Q1D)
700 MFEM_FOREACH_THREAD(qy,y,Q1D)
705 for (
int dy = 0; dy < D1D; ++dy)
707 const real_t bx = B(qy,dy);
708 const real_t gx = G(qy,dy);
709 BBu_ += bx * Bu[dz][dy][qx];
710 GBu_ += gx * Bu[dz][dy][qx];
711 BGu_ += bx * Gu[dz][dy][qx];
713 BBu[dz][qy][qx] = BBu_;
714 GBu[dz][qy][qx] = GBu_;
715 BGu[dz][qy][qx] = BGu_;
720 real_t (*GBBu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm0;
721 real_t (*BGBu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm1;
722 real_t (*BBGu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm2;
723 MFEM_FOREACH_THREAD(qx,x,Q1D)
725 MFEM_FOREACH_THREAD(qy,y,Q1D)
727 MFEM_FOREACH_THREAD(qz,z,Q1D)
732 for (
int dz = 0; dz < D1D; ++dz)
734 const real_t bx = B(qz,dz);
735 const real_t gx = G(qz,dz);
736 GBBu_ += gx * BBu[dz][qy][qx];
737 BGBu_ += bx * GBu[dz][qy][qx];
738 BBGu_ += bx * BGu[dz][qy][qx];
740 GBBu[qz][qy][qx] = GBBu_;
741 BGBu[qz][qy][qx] = BGBu_;
742 BBGu[qz][qy][qx] = BBGu_;
747 real_t (*DGu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm3;
748 MFEM_FOREACH_THREAD(qz,z,Q1D)
750 MFEM_FOREACH_THREAD(qy,y,Q1D)
752 MFEM_FOREACH_THREAD(qx,x,Q1D)
754 const real_t O1 = op(qx,qy,qz,0,e);
755 const real_t O2 = op(qx,qy,qz,1,e);
756 const real_t O3 = op(qx,qy,qz,2,e);
758 const real_t gradX = BBGu[qz][qy][qx];
759 const real_t gradY = BGBu[qz][qy][qx];
760 const real_t gradZ = GBBu[qz][qy][qx];
762 DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ);
767 real_t (*BDGu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm4;
768 MFEM_FOREACH_THREAD(qx,x,Q1D)
770 MFEM_FOREACH_THREAD(qy,y,Q1D)
772 MFEM_FOREACH_THREAD(dz,z,D1D)
775 for (
int qz = 0; qz < Q1D; ++qz)
777 const real_t w = Bt(dz,qz);
778 BDGu_ += w * DGu[qz][qy][qx];
780 BDGu[dz][qy][qx] = BDGu_;
785 real_t (*BBDGu)[max_D1D][max_Q1D] = (
real_t (*)[max_D1D][max_Q1D])sm5;
786 MFEM_FOREACH_THREAD(dz,z,D1D)
788 MFEM_FOREACH_THREAD(qx,x,Q1D)
790 MFEM_FOREACH_THREAD(dy,y,D1D)
793 for (
int qy = 0; qy < Q1D; ++qy)
795 const real_t w = Bt(dy,qy);
796 BBDGu_ += w * BDGu[dz][qy][qx];
798 BBDGu[dz][dy][qx] = BBDGu_;
803 MFEM_FOREACH_THREAD(dz,z,D1D)
805 MFEM_FOREACH_THREAD(dy,y,D1D)
807 MFEM_FOREACH_THREAD(dx,x,D1D)
810 for (
int qx = 0; qx < Q1D; ++qx)
812 const real_t w = Bt(dx,qx);
813 BBBDGu += w * BBDGu[dz][dy][qx];
815 y(dx,dy,dz,e) += BBBDGu;
823template<
int T_D1D = 0,
int T_Q1D = 0>
static
824void PAConvectionApplyT2D(
const int ne,
825 const Array<real_t> &
b,
826 const Array<real_t> &g,
827 const Array<real_t> &bt,
828 const Array<real_t> >,
836 const int D1D = T_D1D ? T_D1D : d1d;
837 const int Q1D = T_Q1D ? T_Q1D : q1d;
840 auto B =
Reshape(
b.Read(), Q1D, D1D);
841 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
842 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
843 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, NE);
844 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
845 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
848 const int D1D = T_D1D ? T_D1D : d1d;
849 const int Q1D = T_Q1D ? T_Q1D : q1d;
851 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
852 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
855 for (
int dy = 0; dy < D1D; ++dy)
857 for (
int dx = 0; dx < D1D; ++dx)
859 u[dy][dx] = x(dx,dy,e);
862 real_t Bu[max_D1D][max_Q1D];
863 for (
int dy = 0; dy < D1D; ++dy)
865 for (
int qx = 0; qx < Q1D; ++qx)
868 for (
int dx = 0; dx < D1D; ++dx)
870 const real_t bx = B(qx,dx);
872 Bu[dy][qx] += bx * x;
876 real_t BBu[max_Q1D][max_Q1D];
877 for (
int qx = 0; qx < Q1D; ++qx)
879 for (
int qy = 0; qy < Q1D; ++qy)
882 for (
int dy = 0; dy < D1D; ++dy)
884 const real_t bx = B(qy,dy);
885 BBu[qy][qx] += bx * Bu[dy][qx];
890 real_t DBu[max_Q1D][max_Q1D][2];
891 for (
int qy = 0; qy < Q1D; ++qy)
893 for (
int qx = 0; qx < Q1D; ++qx)
895 const real_t O1 = op(qx,qy,0,e);
896 const real_t O2 = op(qx,qy,1,e);
898 const real_t X = BBu[qy][qx];
900 DBu[qy][qx][0] = O1 * X;
901 DBu[qy][qx][1] = O2 * X;
904 real_t GDBu[max_D1D][max_Q1D][2];
905 for (
int qx = 0; qx < Q1D; ++qx)
907 for (
int dy = 0; dy < D1D; ++dy)
909 GDBu[dy][qx][0] = 0.0;
910 GDBu[dy][qx][1] = 0.0;
911 for (
int qy = 0; qy < Q1D; ++qy)
913 const real_t by = Bt(dy,qy);
914 const real_t gy = Gt(dy,qy);
915 GDBu[dy][qx][0] += by * DBu[qy][qx][0];
916 GDBu[dy][qx][1] += gy * DBu[qy][qx][1];
920 for (
int dx = 0; dx < D1D; ++dx)
922 for (
int dy = 0; dy < D1D; ++dy)
925 for (
int qx = 0; qx < Q1D; ++qx)
927 const real_t bx = Bt(dx,qx);
928 const real_t gx = Gt(dx,qx);
929 res += gx * GDBu[dy][qx][0] + bx * GDBu[dy][qx][1];
938template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
939void SmemPAConvectionApplyT2D(
const int ne,
940 const Array<real_t> &
b,
941 const Array<real_t> &g,
942 const Array<real_t> &bt,
943 const Array<real_t> >,
951 const int D1D = T_D1D ? T_D1D : d1d;
952 const int Q1D = T_Q1D ? T_Q1D : q1d;
953 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
956 auto B =
Reshape(
b.Read(), Q1D, D1D);
957 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
958 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
959 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, NE);
960 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
961 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
964 const int tidz = MFEM_THREAD_ID(z);
965 const int D1D = T_D1D ? T_D1D : d1d;
966 const int Q1D = T_Q1D ? T_Q1D : q1d;
968 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
969 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
970 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
971 MFEM_SHARED
real_t u[NBZ][max_D1D][max_D1D];
972 MFEM_FOREACH_THREAD(dy,y,D1D)
974 MFEM_FOREACH_THREAD(dx,x,D1D)
977 u[tidz][dy][dx] = x(dx,dy,e);
981 MFEM_SHARED
real_t Bu[NBZ][max_D1D][max_Q1D];
982 MFEM_FOREACH_THREAD(dy,y,D1D)
984 MFEM_FOREACH_THREAD(qx,x,Q1D)
986 Bu[tidz][dy][qx] = 0.0;
987 for (
int dx = 0; dx < D1D; ++dx)
989 const real_t bx = B(qx,dx);
990 const real_t x =
u[tidz][dy][dx];
991 Bu[tidz][dy][qx] += bx * x;
996 MFEM_SHARED
real_t BBu[NBZ][max_Q1D][max_Q1D];
997 MFEM_FOREACH_THREAD(qx,x,Q1D)
999 MFEM_FOREACH_THREAD(qy,y,Q1D)
1001 BBu[tidz][qy][qx] = 0.0;
1002 for (
int dy = 0; dy < D1D; ++dy)
1004 const real_t bx = B(qy,dy);
1005 BBu[tidz][qy][qx] += bx * Bu[tidz][dy][qx];
1011 MFEM_SHARED
real_t DBu[NBZ][max_Q1D][max_Q1D][2];
1012 MFEM_FOREACH_THREAD(qy,y,Q1D)
1014 MFEM_FOREACH_THREAD(qx,x,Q1D)
1016 const real_t O1 = op(qx,qy,0,e);
1017 const real_t O2 = op(qx,qy,1,e);
1019 const real_t X = BBu[tidz][qy][qx];
1021 DBu[tidz][qy][qx][0] = O1 * X;
1022 DBu[tidz][qy][qx][1] = O2 * X;
1026 MFEM_SHARED
real_t GDBu[NBZ][max_D1D][max_Q1D][2];
1027 MFEM_FOREACH_THREAD(qx,x,Q1D)
1029 MFEM_FOREACH_THREAD(dy,y,D1D)
1031 GDBu[tidz][dy][qx][0] = 0.0;
1032 GDBu[tidz][dy][qx][1] = 0.0;
1033 for (
int qy = 0; qy < Q1D; ++qy)
1035 const real_t by = Bt(dy,qy);
1036 const real_t gy = Gt(dy,qy);
1037 GDBu[tidz][dy][qx][0] += by * DBu[tidz][qy][qx][0];
1038 GDBu[tidz][dy][qx][1] += gy * DBu[tidz][qy][qx][1];
1043 MFEM_FOREACH_THREAD(dx,x,D1D)
1045 MFEM_FOREACH_THREAD(dy,y,D1D)
1048 for (
int qx = 0; qx < Q1D; ++qx)
1050 const real_t bx = Bt(dx,qx);
1051 const real_t gx = Gt(dx,qx);
1052 res += gx * GDBu[tidz][dy][qx][0] + bx * GDBu[tidz][dy][qx][1];
1061template<
int T_D1D = 0,
int T_Q1D = 0>
static
1062void PAConvectionApplyT3D(
const int ne,
1063 const Array<real_t> &
b,
1064 const Array<real_t> &g,
1065 const Array<real_t> &bt,
1066 const Array<real_t> >,
1074 const int D1D = T_D1D ? T_D1D : d1d;
1075 const int Q1D = T_Q1D ? T_Q1D : q1d;
1078 auto B =
Reshape(
b.Read(), Q1D, D1D);
1079 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
1080 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
1081 auto op =
Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1082 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1083 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1086 const int D1D = T_D1D ? T_D1D : d1d;
1087 const int Q1D = T_Q1D ? T_Q1D : q1d;
1089 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
1090 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
1092 real_t u[max_D1D][max_D1D][max_D1D];
1093 for (
int dz = 0; dz < D1D; ++dz)
1095 for (
int dy = 0; dy < D1D; ++dy)
1097 for (
int dx = 0; dx < D1D; ++dx)
1099 u[dz][dy][dx] = x(dx,dy,dz,e);
1103 real_t Bu[max_D1D][max_D1D][max_Q1D];
1104 for (
int dz = 0; dz < D1D; ++dz)
1106 for (
int dy = 0; dy < D1D; ++dy)
1108 for (
int qx = 0; qx < Q1D; ++qx)
1110 Bu[dz][dy][qx] = 0.0;
1111 for (
int dx = 0; dx < D1D; ++dx)
1113 const real_t bx = B(qx,dx);
1114 const real_t x =
u[dz][dy][dx];
1115 Bu[dz][dy][qx] += bx * x;
1120 real_t BBu[max_D1D][max_Q1D][max_Q1D];
1121 for (
int dz = 0; dz < D1D; ++dz)
1123 for (
int qx = 0; qx < Q1D; ++qx)
1125 for (
int qy = 0; qy < Q1D; ++qy)
1127 BBu[dz][qy][qx] = 0.0;
1128 for (
int dy = 0; dy < D1D; ++dy)
1130 const real_t bx = B(qy,dy);
1131 BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
1136 real_t BBBu[max_Q1D][max_Q1D][max_Q1D];
1137 for (
int qx = 0; qx < Q1D; ++qx)
1139 for (
int qy = 0; qy < Q1D; ++qy)
1141 for (
int qz = 0; qz < Q1D; ++qz)
1143 BBBu[qz][qy][qx] = 0.0;
1144 for (
int dz = 0; dz < D1D; ++dz)
1146 const real_t bx = B(qz,dz);
1147 BBBu[qz][qy][qx] += bx * BBu[dz][qy][qx];
1153 real_t DBu[max_Q1D][max_Q1D][max_Q1D][3];
1154 for (
int qz = 0; qz < Q1D; ++qz)
1156 for (
int qy = 0; qy < Q1D; ++qy)
1158 for (
int qx = 0; qx < Q1D; ++qx)
1160 const real_t O1 = op(qx,qy,qz,0,e);
1161 const real_t O2 = op(qx,qy,qz,1,e);
1162 const real_t O3 = op(qx,qy,qz,2,e);
1164 const real_t X = BBBu[qz][qy][qx];
1166 DBu[qz][qy][qx][0] = O1 * X;
1167 DBu[qz][qy][qx][1] = O2 * X;
1168 DBu[qz][qy][qx][2] = O3 * X;
1172 real_t GDBu[max_D1D][max_Q1D][max_Q1D][3];
1173 for (
int qx = 0; qx < Q1D; ++qx)
1175 for (
int qy = 0; qy < Q1D; ++qy)
1177 for (
int dz = 0; dz < D1D; ++dz)
1179 GDBu[dz][qy][qx][0] = 0.0;
1180 GDBu[dz][qy][qx][1] = 0.0;
1181 GDBu[dz][qy][qx][2] = 0.0;
1182 for (
int qz = 0; qz < Q1D; ++qz)
1184 const real_t bz = Bt(dz,qz);
1185 const real_t gz = Gt(dz,qz);
1186 GDBu[dz][qy][qx][0] += bz * DBu[qz][qy][qx][0];
1187 GDBu[dz][qy][qx][1] += bz * DBu[qz][qy][qx][1];
1188 GDBu[dz][qy][qx][2] += gz * DBu[qz][qy][qx][2];
1193 real_t GGDBu[max_D1D][max_D1D][max_Q1D][3];
1194 for (
int dz = 0; dz < D1D; ++dz)
1196 for (
int qx = 0; qx < Q1D; ++qx)
1198 for (
int dy = 0; dy < D1D; ++dy)
1200 GGDBu[dz][dy][qx][0] = 0.0;
1201 GGDBu[dz][dy][qx][1] = 0.0;
1202 GGDBu[dz][dy][qx][2] = 0.0;
1203 for (
int qy = 0; qy < Q1D; ++qy)
1205 const real_t by = Bt(dy,qy);
1206 const real_t gy = Gt(dy,qy);
1207 GGDBu[dz][dy][qx][0] += by * GDBu[dz][qy][qx][0];
1208 GGDBu[dz][dy][qx][1] += gy * GDBu[dz][qy][qx][1];
1209 GGDBu[dz][dy][qx][2] += by * GDBu[dz][qy][qx][2];
1214 for (
int dz = 0; dz < D1D; ++dz)
1216 for (
int dy = 0; dy < D1D; ++dy)
1218 for (
int dx = 0; dx < D1D; ++dx)
1221 for (
int qx = 0; qx < Q1D; ++qx)
1223 const real_t bx = Bt(dx,qx);
1224 const real_t gx = Gt(dx,qx);
1225 res += gx * GGDBu[dz][dy][qx][0];
1226 res += bx * GGDBu[dz][dy][qx][1];
1227 res += bx * GGDBu[dz][dy][qx][2];
1229 y(dx,dy,dz,e) += res;
1237template<
int T_D1D = 0,
int T_Q1D = 0>
static
1238void SmemPAConvectionApplyT3D(
const int ne,
1239 const Array<real_t> &
b,
1240 const Array<real_t> &g,
1241 const Array<real_t> &bt,
1242 const Array<real_t> >,
1250 const int D1D = T_D1D ? T_D1D : d1d;
1251 const int Q1D = T_Q1D ? T_Q1D : q1d;
1254 auto B =
Reshape(
b.Read(), Q1D, D1D);
1255 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
1256 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
1257 auto op =
Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1258 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1259 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1262 const int D1D = T_D1D ? T_D1D : d1d;
1263 const int Q1D = T_Q1D ? T_Q1D : q1d;
1265 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
1266 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
1267 constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
1268 MFEM_SHARED
real_t sm0[3*max_DQ*max_DQ*max_DQ];
1269 MFEM_SHARED
real_t sm1[3*max_DQ*max_DQ*max_DQ];
1271 real_t (*
u)[max_D1D][max_D1D] = (
real_t (*)[max_D1D][max_D1D]) sm0;
1272 MFEM_FOREACH_THREAD(dz,z,D1D)
1274 MFEM_FOREACH_THREAD(dy,y,D1D)
1276 MFEM_FOREACH_THREAD(dx,x,D1D)
1278 u[dz][dy][dx] = x(dx,dy,dz,e);
1283 real_t (*Bu)[max_D1D][max_Q1D] = (
real_t (*)[max_D1D][max_Q1D])sm1;
1284 MFEM_FOREACH_THREAD(dz,z,D1D)
1286 MFEM_FOREACH_THREAD(dy,y,D1D)
1288 MFEM_FOREACH_THREAD(qx,x,Q1D)
1291 for (
int dx = 0; dx < D1D; ++dx)
1293 const real_t bx = B(qx,dx);
1294 const real_t x =
u[dz][dy][dx];
1297 Bu[dz][dy][qx] = Bu_;
1302 real_t (*BBu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm0;
1303 MFEM_FOREACH_THREAD(dz,z,D1D)
1305 MFEM_FOREACH_THREAD(qx,x,Q1D)
1307 MFEM_FOREACH_THREAD(qy,y,Q1D)
1310 for (
int dy = 0; dy < D1D; ++dy)
1312 const real_t bx = B(qy,dy);
1313 BBu_ += bx * Bu[dz][dy][qx];
1315 BBu[dz][qy][qx] = BBu_;
1320 real_t (*BBBu)[max_Q1D][max_Q1D] = (
real_t (*)[max_Q1D][max_Q1D])sm1;
1321 MFEM_FOREACH_THREAD(qx,x,Q1D)
1323 MFEM_FOREACH_THREAD(qy,y,Q1D)
1325 MFEM_FOREACH_THREAD(qz,z,Q1D)
1328 for (
int dz = 0; dz < D1D; ++dz)
1330 const real_t bx = B(qz,dz);
1331 BBBu_ += bx * BBu[dz][qy][qx];
1333 BBBu[qz][qy][qx] = BBBu_;
1338 real_t (*DBu)[max_Q1D][max_Q1D][3] = (
real_t (*)[max_Q1D][max_Q1D][3])sm0;
1339 MFEM_FOREACH_THREAD(qz,z,Q1D)
1341 MFEM_FOREACH_THREAD(qy,y,Q1D)
1343 MFEM_FOREACH_THREAD(qx,x,Q1D)
1345 const real_t O1 = op(qx,qy,qz,0,e);
1346 const real_t O2 = op(qx,qy,qz,1,e);
1347 const real_t O3 = op(qx,qy,qz,2,e);
1349 const real_t X = BBBu[qz][qy][qx];
1351 DBu[qz][qy][qx][0] = O1 * X;
1352 DBu[qz][qy][qx][1] = O2 * X;
1353 DBu[qz][qy][qx][2] = O3 * X;
1358 real_t (*GDBu)[max_Q1D][max_Q1D][3] = (
real_t (*)[max_Q1D][max_Q1D][3])sm1;
1359 MFEM_FOREACH_THREAD(qx,x,Q1D)
1361 MFEM_FOREACH_THREAD(qy,y,Q1D)
1363 MFEM_FOREACH_THREAD(dz,z,D1D)
1368 for (
int qz = 0; qz < Q1D; ++qz)
1370 const real_t bz = Bt(dz,qz);
1371 const real_t gz = Gt(dz,qz);
1372 GDBu0 += bz * DBu[qz][qy][qx][0];
1373 GDBu1 += bz * DBu[qz][qy][qx][1];
1374 GDBu2 += gz * DBu[qz][qy][qx][2];
1376 GDBu[dz][qy][qx][0] = GDBu0;
1377 GDBu[dz][qy][qx][1] = GDBu1;
1378 GDBu[dz][qy][qx][2] = GDBu2;
1383 real_t (*GGDBu)[max_D1D][max_Q1D][3] = (
real_t (*)[max_D1D][max_Q1D][3])sm0;
1384 MFEM_FOREACH_THREAD(dz,z,D1D)
1386 MFEM_FOREACH_THREAD(qx,x,Q1D)
1388 MFEM_FOREACH_THREAD(dy,y,D1D)
1393 for (
int qy = 0; qy < Q1D; ++qy)
1395 const real_t by = Bt(dy,qy);
1396 const real_t gy = Gt(dy,qy);
1397 GGDBu0 += by * GDBu[dz][qy][qx][0];
1398 GGDBu1 += gy * GDBu[dz][qy][qx][1];
1399 GGDBu2 += by * GDBu[dz][qy][qx][2];
1401 GGDBu[dz][dy][qx][0] = GGDBu0;
1402 GGDBu[dz][dy][qx][1] = GGDBu1;
1403 GGDBu[dz][dy][qx][2] = GGDBu2;
1408 MFEM_FOREACH_THREAD(dz,z,D1D)
1410 MFEM_FOREACH_THREAD(dy,y,D1D)
1412 MFEM_FOREACH_THREAD(dx,x,D1D)
1415 for (
int qx = 0; qx < Q1D; ++qx)
1417 const real_t bx = Bt(dx,qx);
1418 const real_t gx = Gt(dx,qx);
1419 res += gx * GGDBu[dz][dy][qx][0];
1420 res += bx * GGDBu[dz][dy][qx][1];
1421 res += bx * GGDBu[dz][dy][qx][2];
1423 y(dx,dy,dz,e) += res;
1430static void PAConvectionApply(
const int dim,
1434 const Array<real_t> &B,
1435 const Array<real_t> &G,
1436 const Array<real_t> &Bt,
1437 const Array<real_t> &Gt,
1444 switch ((D1D << 4 ) | Q1D)
1446 case 0x22:
return SmemPAConvectionApply2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1447 case 0x33:
return SmemPAConvectionApply2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1448 case 0x34:
return SmemPAConvectionApply2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1449 case 0x44:
return SmemPAConvectionApply2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1450 case 0x46:
return SmemPAConvectionApply2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1451 case 0x55:
return SmemPAConvectionApply2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1452 case 0x58:
return SmemPAConvectionApply2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1453 case 0x66:
return SmemPAConvectionApply2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1454 case 0x77:
return SmemPAConvectionApply2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1455 case 0x88:
return SmemPAConvectionApply2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1456 case 0x99:
return SmemPAConvectionApply2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1457 default:
return PAConvectionApply2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1462 switch ((D1D << 4 ) | Q1D)
1464 case 0x22:
return SmemPAConvectionApply3D<2,2>(NE,B,G,Bt,Gt,op,x,y);
1465 case 0x23:
return SmemPAConvectionApply3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1466 case 0x24:
return SmemPAConvectionApply3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1467 case 0x26:
return SmemPAConvectionApply3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1468 case 0x34:
return SmemPAConvectionApply3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1469 case 0x35:
return SmemPAConvectionApply3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1470 case 0x45:
return SmemPAConvectionApply3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1471 case 0x48:
return SmemPAConvectionApply3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1472 case 0x56:
return SmemPAConvectionApply3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1473 case 0x67:
return SmemPAConvectionApply3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1474 case 0x78:
return SmemPAConvectionApply3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1475 case 0x89:
return SmemPAConvectionApply3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1476 default:
return PAConvectionApply3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1479 MFEM_ABORT(
"Unknown kernel.");
1482static void PAConvectionApplyT(
const int dim,
1486 const Array<real_t> &B,
1487 const Array<real_t> &G,
1488 const Array<real_t> &Bt,
1489 const Array<real_t> &Gt,
1496 switch ((D1D << 4 ) | Q1D)
1498 case 0x22:
return SmemPAConvectionApplyT2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1499 case 0x33:
return SmemPAConvectionApplyT2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1500 case 0x34:
return SmemPAConvectionApplyT2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1501 case 0x44:
return SmemPAConvectionApplyT2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1502 case 0x46:
return SmemPAConvectionApplyT2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1503 case 0x55:
return SmemPAConvectionApplyT2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1504 case 0x58:
return SmemPAConvectionApplyT2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1505 case 0x66:
return SmemPAConvectionApplyT2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1506 case 0x77:
return SmemPAConvectionApplyT2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1507 case 0x88:
return SmemPAConvectionApplyT2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1508 case 0x99:
return SmemPAConvectionApplyT2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1509 default:
return PAConvectionApplyT2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1514 switch ((D1D << 4 ) | Q1D)
1516 case 0x22:
return SmemPAConvectionApplyT3D<2,2>(NE,B,G,Bt,Gt,op,x,y);
1517 case 0x23:
return SmemPAConvectionApplyT3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1518 case 0x24:
return SmemPAConvectionApplyT3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1519 case 0x26:
return SmemPAConvectionApplyT3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1520 case 0x34:
return SmemPAConvectionApplyT3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1521 case 0x35:
return SmemPAConvectionApplyT3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1522 case 0x45:
return SmemPAConvectionApplyT3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1523 case 0x48:
return SmemPAConvectionApplyT3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1524 case 0x56:
return SmemPAConvectionApplyT3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1525 case 0x67:
return SmemPAConvectionApplyT3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1526 case 0x78:
return SmemPAConvectionApplyT3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1527 case 0x89:
return SmemPAConvectionApplyT3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1528 default:
return PAConvectionApplyT3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1531 MFEM_ABORT(
"Unknown kernel.");
1552 MFEM_ABORT(
"AddMultPA not yet implemented with libCEED for"
1553 " ConvectionIntegrator.");
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Class to represent a coefficient evaluated at quadrature points.
const DofToQuad * maps
Not owned.
static const IntegrationRule & GetRule(const FiniteElement &el, const ElementTransformation &Trans)
void AssembleDiagonalPA(Vector &diag) override
Assemble diagonal and add it to Vector diag.
void AssemblePA(const FiniteElementSpace &) override
Method defining partial assembly.
void AddMultPA(const Vector &, Vector &) const override
Method for partially assembled action.
const GeometricFactors * geom
Not owned.
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Array< real_t > B
Basis functions evaluated at quadrature points.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Array< real_t > Gt
Transpose of G.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Array< real_t > Bt
Transpose of B.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
int GetNE() const
Returns number of elements in the mesh.
Mesh * GetMesh() const
Returns the mesh.
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Abstract class for all finite elements.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
int GetDim() const
Returns the reference space dimension for the finite element.
Vector J
Jacobians of the element transformations at all quadrature points.
Class for an integration rule - an Array of IntegrationPoint.
int GetNPoints() const
Returns the number of the points in the integration rule.
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
const IntegrationRule * IntRule
int Dimension() const
Dimension of the reference space used within the elements.
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Class representing the storage layout of a QuadratureFunction.
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void SetSize(int s)
Resize the vector to size s.
void GetDiagonal(mfem::Vector &diag) const
void AddMult(const mfem::Vector &x, mfem::Vector &y, const real_t a=1.0) const override
Operator application: y+=A(x) (default) or y+=a*A(x).
Represent a ConvectionIntegrator with AssemblyLevel::Partial using libCEED.
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
@ COMPRESSED
Enable all above compressions.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
MemoryType
Memory types supported by MFEM.
void forall(int N, lambda &&body)
void vel(const Vector &x, real_t t, Vector &u)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.