22static void PADGTraceSetup2D(
const int Q1D,
24 const Array<real_t> &w,
35 auto d =
Reshape(det.Read(), Q1D, NF);
36 auto n =
Reshape(nor.Read(), Q1D, VDIM, NF);
37 const bool const_r = rho.Size() == 1;
40 const bool const_v =
vel.Size() == 2;
44 auto qd =
Reshape(op.Write(), Q1D, 2, 2, NF);
48 const int f = tid / Q1D;
49 const int q = tid % Q1D;
51 const real_t r = const_r ? R(0,0) : R(q,
f);
52 const real_t v0 = const_v ? V(0,0,0) : V(0,q,
f);
53 const real_t v1 = const_v ? V(1,0,0) : V(1,q,
f);
54 const real_t dot = n(q,0,
f) * v0 + n(q,1,
f) * v1;
55 const real_t abs = dot > 0_r ? dot : -dot;
57 qd(q,0,0,
f) = w*(
alpha/2 * dot +
beta * abs );
58 qd(q,1,0,
f) = w*(
alpha/2 * dot -
beta * abs );
59 qd(q,0,1,
f) = w*(-
alpha/2 * dot -
beta * abs );
60 qd(q,1,1,
f) = w*(-
alpha/2 * dot +
beta * abs );
65static void PADGTraceSetup3D(
const int Q1D,
67 const Array<real_t> &w,
78 auto d =
Reshape(det.Read(), Q1D, Q1D, NF);
79 auto n =
Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
80 const bool const_r = rho.Size() == 1;
83 const bool const_v =
vel.Size() == 3;
87 auto qd =
Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
91 int f = tid / (Q1D * Q1D);
92 int q2 = (tid / Q1D) % Q1D;
96 const real_t r = const_r ? R(0,0,0) : R(q1,q2,
f);
97 const real_t v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,
f);
98 const real_t v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,
f);
99 const real_t v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,
f);
100 const real_t dot = n(q1,q2,0,
f) * v0 + n(q1,q2,1,
f) * v1 +
102 const real_t abs = dot > 0.0 ? dot : -dot;
103 const real_t w = W[q1+q2*Q1D]*r*d(q1,q2,
f);
104 qd(q1,q2,0,0,
f) = w*(
alpha/2 * dot +
beta * abs );
105 qd(q1,q2,1,0,
f) = w*(
alpha/2 * dot -
beta * abs );
106 qd(q1,q2,0,1,
f) = w*(-
alpha/2 * dot -
beta * abs );
107 qd(q1,q2,1,1,
f) = w*(-
alpha/2 * dot +
beta * abs );
113static void PADGTraceSetup(
const int dim,
117 const Array<real_t> &W,
126 if (
dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADGTraceSetup"); }
129 PADGTraceSetup2D(Q1D, NF, W, det, nor, rho,
u,
alpha,
beta, op);
133 PADGTraceSetup3D(Q1D, NF, W, det, nor, rho,
u,
alpha,
beta, op);
137void DGTraceIntegrator::SetupPA(
const FiniteElementSpace &fes,
FaceType type)
142 nf = fes.GetNFbyType(type);
143 if (
nf==0) {
return; }
145 Mesh *mesh = fes.GetMesh();
146 const FiniteElement &el = *fes.GetTypicalTraceElement();
147 const IntegrationRule *ir =
IntRule?
149 &
GetRule(el.GetGeomType(), el.GetOrder(),
150 *mesh->GetTypicalElementTransformation());
151 const int symmDims = 4;
152 nq = ir->GetNPoints();
153 dim = mesh->Dimension();
154 geom = mesh->GetFaceGeometricFactors(
163 FaceQuadratureSpace qs(*mesh, *ir, type);
171 else if (ConstantCoefficient *const_rho =
dynamic_cast<ConstantCoefficient*
>
174 r.SetConstant(const_rho->constant);
176 else if (QuadratureFunctionCoefficient* qf_rho =
177 dynamic_cast<QuadratureFunctionCoefficient*
>(
rho))
179 r.MakeRef(qf_rho->GetQuadFunction());
188 for (
int f = 0;
f < mesh->GetNumFacesWithGhost(); ++
f)
190 Mesh::FaceInformation face = mesh->GetFaceInformation(
f);
191 if (face.IsNonconformingCoarse() || !face.IsOfFaceType(type))
197 FaceElementTransformations &T =
198 *fes.GetMesh()->GetFaceElementTransformations(
f);
199 for (
int q = 0; q <
nq; ++q)
205 T.SetAllIntPoints(&ir->IntPoint(q));
206 const IntegrationPoint &eip1 = T.GetElement1IntPoint();
207 const IntegrationPoint &eip2 = T.GetElement2IntPoint();
210 if (face.IsBoundary())
212 rq =
rho->
Eval(*T.Elem1, eip1);
217 for (
int d=0; d<
dim; ++d)
219 udotn += C_vel(d,iq,f_ind)*n(iq,d,f_ind);
221 if (udotn >= 0.0) { rq =
rho->
Eval(*T.Elem2, eip2); }
222 else { rq =
rho->
Eval(*T.Elem1, eip1); }
228 MFEM_VERIFY(f_ind==
nf,
"Incorrect number of faces.");
246template<
int T_D1D = 0,
int T_Q1D = 0>
static
247void PADGTraceApply2D(
const int NF,
257 const int D1D = T_D1D ? T_D1D : d1d;
258 const int Q1D = T_Q1D ? T_Q1D : q1d;
261 auto B =
Reshape(
b.Read(), Q1D, D1D);
270 const int D1D = T_D1D ? T_D1D : d1d;
271 const int Q1D = T_Q1D ? T_Q1D : q1d;
273 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
274 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
277 for (
int d = 0; d < D1D; d++)
279 for (
int c = 0; c < VDIM; c++)
281 u0[d][c] = x(d,c,0,
f);
282 u1[d][c] = x(d,c,1,
f);
285 real_t Bu0[max_Q1D][VDIM];
286 real_t Bu1[max_Q1D][VDIM];
287 for (
int q = 0; q < Q1D; ++q)
289 for (
int c = 0; c < VDIM; c++)
294 for (
int d = 0; d < D1D; ++d)
297 for (
int c = 0; c < VDIM; c++)
299 Bu0[q][c] +=
b*u0[d][c];
300 Bu1[q][c] +=
b*u1[d][c];
304 real_t DBu[max_Q1D][VDIM];
305 for (
int q = 0; q < Q1D; ++q)
307 for (
int c = 0; c < VDIM; c++)
309 DBu[q][c] = op(q,0,0,
f)*Bu0[q][c] + op(q,1,0,
f)*Bu1[q][c];
312 real_t BDBu[max_D1D][VDIM];
313 for (
int d = 0; d < D1D; ++d)
315 for (
int c = 0; c < VDIM; c++)
319 for (
int q = 0; q < Q1D; ++q)
322 for (
int c = 0; c < VDIM; c++)
324 BDBu[d][c] +=
b*DBu[q][c];
327 for (
int c = 0; c < VDIM; c++)
329 y(d,c,0,
f) += BDBu[d][c];
330 y(d,c,1,
f) += -BDBu[d][c];
337template<
int T_D1D = 0,
int T_Q1D = 0>
static
338void PADGTraceApply3D(
const int NF,
339 const Array<real_t> &
b,
340 const Array<real_t> &bt,
348 const int D1D = T_D1D ? T_D1D : d1d;
349 const int Q1D = T_Q1D ? T_Q1D : q1d;
352 auto B =
Reshape(
b.Read(), Q1D, D1D);
353 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
354 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
355 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
356 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
361 const int D1D = T_D1D ? T_D1D : d1d;
362 const int Q1D = T_Q1D ? T_Q1D : q1d;
364 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
365 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
366 real_t u0[max_D1D][max_D1D][VDIM];
367 real_t u1[max_D1D][max_D1D][VDIM];
368 for (
int d1 = 0; d1 < D1D; d1++)
370 for (
int d2 = 0; d2 < D1D; d2++)
372 for (
int c = 0; c < VDIM; c++)
374 u0[d1][d2][c] = x(d1,d2,c,0,
f);
375 u1[d1][d2][c] = x(d1,d2,c,1,
f);
379 real_t Bu0[max_Q1D][max_D1D][VDIM];
380 real_t Bu1[max_Q1D][max_D1D][VDIM];
381 for (
int q = 0; q < Q1D; ++q)
383 for (
int d2 = 0; d2 < D1D; d2++)
385 for (
int c = 0; c < VDIM; c++)
390 for (
int d1 = 0; d1 < D1D; ++d1)
393 for (
int c = 0; c < VDIM; c++)
395 Bu0[q][d2][c] +=
b*u0[d1][d2][c];
396 Bu1[q][d2][c] +=
b*u1[d1][d2][c];
401 real_t BBu0[max_Q1D][max_Q1D][VDIM];
402 real_t BBu1[max_Q1D][max_Q1D][VDIM];
403 for (
int q1 = 0; q1 < Q1D; ++q1)
405 for (
int q2 = 0; q2 < Q1D; q2++)
407 for (
int c = 0; c < VDIM; c++)
409 BBu0[q1][q2][c] = 0.0;
410 BBu1[q1][q2][c] = 0.0;
412 for (
int d2 = 0; d2 < D1D; ++d2)
415 for (
int c = 0; c < VDIM; c++)
417 BBu0[q1][q2][c] +=
b*Bu0[q1][d2][c];
418 BBu1[q1][q2][c] +=
b*Bu1[q1][d2][c];
423 real_t DBBu[max_Q1D][max_Q1D][VDIM];
424 for (
int q1 = 0; q1 < Q1D; ++q1)
426 for (
int q2 = 0; q2 < Q1D; q2++)
428 for (
int c = 0; c < VDIM; c++)
430 DBBu[q1][q2][c] = op(q1,q2,0,0,
f)*BBu0[q1][q2][c] +
431 op(q1,q2,1,0,
f)*BBu1[q1][q2][c];
435 real_t BDBBu[max_Q1D][max_D1D][VDIM];
436 for (
int q1 = 0; q1 < Q1D; ++q1)
438 for (
int d2 = 0; d2 < D1D; d2++)
440 for (
int c = 0; c < VDIM; c++)
442 BDBBu[q1][d2][c] = 0.0;
444 for (
int q2 = 0; q2 < Q1D; ++q2)
447 for (
int c = 0; c < VDIM; c++)
449 BDBBu[q1][d2][c] +=
b*DBBu[q1][q2][c];
454 real_t BBDBBu[max_D1D][max_D1D][VDIM];
455 for (
int d1 = 0; d1 < D1D; ++d1)
457 for (
int d2 = 0; d2 < D1D; d2++)
459 for (
int c = 0; c < VDIM; c++)
461 BBDBBu[d1][d2][c] = 0.0;
463 for (
int q1 = 0; q1 < Q1D; ++q1)
466 for (
int c = 0; c < VDIM; c++)
468 BBDBBu[d1][d2][c] +=
b*BDBBu[q1][d2][c];
471 for (
int c = 0; c < VDIM; c++)
473 y(d1,d2,c,0,
f) += BBDBBu[d1][d2][c];
474 y(d1,d2,c,1,
f) += -BBDBBu[d1][d2][c];
482template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
483void SmemPADGTraceApply3D(
const int NF,
484 const Array<real_t> &
b,
485 const Array<real_t> &bt,
492 const int D1D = T_D1D ? T_D1D : d1d;
493 const int Q1D = T_Q1D ? T_Q1D : q1d;
494 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
497 auto B =
Reshape(
b.Read(), Q1D, D1D);
498 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
499 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
500 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
501 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
505 const int tidz = MFEM_THREAD_ID(z);
506 const int D1D = T_D1D ? T_D1D : d1d;
507 const int Q1D = T_Q1D ? T_Q1D : q1d;
509 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
510 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
511 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
512 MFEM_SHARED
real_t u0[NBZ][max_D1D][max_D1D];
513 MFEM_SHARED
real_t u1[NBZ][max_D1D][max_D1D];
514 MFEM_FOREACH_THREAD(d1,x,D1D)
516 MFEM_FOREACH_THREAD(d2,y,D1D)
518 u0[tidz][d1][d2] = x(d1,d2,0,
f);
519 u1[tidz][d1][d2] = x(d1,d2,1,
f);
523 MFEM_SHARED
real_t Bu0[NBZ][max_Q1D][max_D1D];
524 MFEM_SHARED
real_t Bu1[NBZ][max_Q1D][max_D1D];
525 MFEM_FOREACH_THREAD(q1,x,Q1D)
527 MFEM_FOREACH_THREAD(d2,y,D1D)
531 for (
int d1 = 0; d1 < D1D; ++d1)
534 Bu0_ +=
b*u0[tidz][d1][d2];
535 Bu1_ +=
b*u1[tidz][d1][d2];
537 Bu0[tidz][q1][d2] = Bu0_;
538 Bu1[tidz][q1][d2] = Bu1_;
542 MFEM_SHARED
real_t BBu0[NBZ][max_Q1D][max_Q1D];
543 MFEM_SHARED
real_t BBu1[NBZ][max_Q1D][max_Q1D];
544 MFEM_FOREACH_THREAD(q1,x,Q1D)
546 MFEM_FOREACH_THREAD(q2,y,Q1D)
550 for (
int d2 = 0; d2 < D1D; ++d2)
553 BBu0_ +=
b*Bu0[tidz][q1][d2];
554 BBu1_ +=
b*Bu1[tidz][q1][d2];
556 BBu0[tidz][q1][q2] = BBu0_;
557 BBu1[tidz][q1][q2] = BBu1_;
561 MFEM_SHARED
real_t DBBu[NBZ][max_Q1D][max_Q1D];
562 MFEM_FOREACH_THREAD(q1,x,Q1D)
564 MFEM_FOREACH_THREAD(q2,y,Q1D)
566 DBBu[tidz][q1][q2] = op(q1,q2,0,0,
f)*BBu0[tidz][q1][q2] +
567 op(q1,q2,1,0,
f)*BBu1[tidz][q1][q2];
571 MFEM_SHARED
real_t BDBBu[NBZ][max_Q1D][max_D1D];
572 MFEM_FOREACH_THREAD(q1,x,Q1D)
574 MFEM_FOREACH_THREAD(d2,y,D1D)
577 for (
int q2 = 0; q2 < Q1D; ++q2)
580 BDBBu_ +=
b*DBBu[tidz][q1][q2];
582 BDBBu[tidz][q1][d2] = BDBBu_;
586 MFEM_FOREACH_THREAD(d1,x,D1D)
588 MFEM_FOREACH_THREAD(d2,y,D1D)
591 for (
int q1 = 0; q1 < Q1D; ++q1)
594 BBDBBu_ +=
b*BDBBu[tidz][q1][d2];
596 y(d1,d2,0,
f) += BBDBBu_;
597 y(d1,d2,1,
f) += -BBDBBu_;
603static void PADGTraceApply(
const int dim,
607 const Array<real_t> &B,
608 const Array<real_t> &Bt,
615 switch ((D1D << 4 ) | Q1D)
617 case 0x22:
return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
618 case 0x33:
return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
619 case 0x44:
return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
620 case 0x55:
return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
621 case 0x66:
return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
622 case 0x77:
return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
623 case 0x88:
return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
624 case 0x99:
return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
625 default:
return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
630 switch ((D1D << 4 ) | Q1D)
632 case 0x22:
return SmemPADGTraceApply3D<2,2,1>(NF,B,Bt,op,x,y);
633 case 0x23:
return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
634 case 0x34:
return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
635 case 0x45:
return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
636 case 0x56:
return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
637 case 0x67:
return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
638 case 0x78:
return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
639 case 0x89:
return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
640 default:
return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
643 MFEM_ABORT(
"Unknown kernel.");
647template<
int T_D1D = 0,
int T_Q1D = 0>
static
648void PADGTraceApplyTranspose2D(
const int NF,
649 const Array<real_t> &
b,
650 const Array<real_t> &bt,
658 const int D1D = T_D1D ? T_D1D : d1d;
659 const int Q1D = T_Q1D ? T_Q1D : q1d;
662 auto B =
Reshape(
b.Read(), Q1D, D1D);
663 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
664 auto op =
Reshape(op_.Read(), Q1D, 2, 2, NF);
665 auto x =
Reshape(x_.Read(), D1D, VDIM, 2, NF);
666 auto y =
Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
671 const int D1D = T_D1D ? T_D1D : d1d;
672 const int Q1D = T_Q1D ? T_Q1D : q1d;
674 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
675 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
678 for (
int d = 0; d < D1D; d++)
680 for (
int c = 0; c < VDIM; c++)
682 u0[d][c] = x(d,c,0,
f);
683 u1[d][c] = x(d,c,1,
f);
686 real_t Bu0[max_Q1D][VDIM];
687 real_t Bu1[max_Q1D][VDIM];
688 for (
int q = 0; q < Q1D; ++q)
690 for (
int c = 0; c < VDIM; c++)
695 for (
int d = 0; d < D1D; ++d)
698 for (
int c = 0; c < VDIM; c++)
700 Bu0[q][c] +=
b*u0[d][c];
701 Bu1[q][c] +=
b*u1[d][c];
705 real_t DBu0[max_Q1D][VDIM];
706 real_t DBu1[max_Q1D][VDIM];
707 for (
int q = 0; q < Q1D; ++q)
709 for (
int c = 0; c < VDIM; c++)
711 DBu0[q][c] = op(q,0,0,
f)*Bu0[q][c] + op(q,0,1,
f)*Bu1[q][c];
712 DBu1[q][c] = op(q,1,0,
f)*Bu0[q][c] + op(q,1,1,
f)*Bu1[q][c];
715 real_t BDBu0[max_D1D][VDIM];
716 real_t BDBu1[max_D1D][VDIM];
717 for (
int d = 0; d < D1D; ++d)
719 for (
int c = 0; c < VDIM; c++)
724 for (
int q = 0; q < Q1D; ++q)
727 for (
int c = 0; c < VDIM; c++)
729 BDBu0[d][c] +=
b*DBu0[q][c];
730 BDBu1[d][c] +=
b*DBu1[q][c];
733 for (
int c = 0; c < VDIM; c++)
735 y(d,c,0,
f) += BDBu0[d][c];
736 y(d,c,1,
f) += BDBu1[d][c];
743template<
int T_D1D = 0,
int T_Q1D = 0>
static
744void PADGTraceApplyTranspose3D(
const int NF,
745 const Array<real_t> &
b,
746 const Array<real_t> &bt,
754 const int D1D = T_D1D ? T_D1D : d1d;
755 const int Q1D = T_Q1D ? T_Q1D : q1d;
758 auto B =
Reshape(
b.Read(), Q1D, D1D);
759 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
760 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
761 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
762 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
767 const int D1D = T_D1D ? T_D1D : d1d;
768 const int Q1D = T_Q1D ? T_Q1D : q1d;
770 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
771 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
772 real_t u0[max_D1D][max_D1D][VDIM];
773 real_t u1[max_D1D][max_D1D][VDIM];
774 for (
int d1 = 0; d1 < D1D; d1++)
776 for (
int d2 = 0; d2 < D1D; d2++)
778 for (
int c = 0; c < VDIM; c++)
780 u0[d1][d2][c] = x(d1,d2,c,0,
f);
781 u1[d1][d2][c] = x(d1,d2,c,1,
f);
785 real_t Bu0[max_Q1D][max_D1D][VDIM];
786 real_t Bu1[max_Q1D][max_D1D][VDIM];
787 for (
int q1 = 0; q1 < Q1D; ++q1)
789 for (
int d2 = 0; d2 < D1D; ++d2)
791 for (
int c = 0; c < VDIM; c++)
793 Bu0[q1][d2][c] = 0.0;
794 Bu1[q1][d2][c] = 0.0;
796 for (
int d1 = 0; d1 < D1D; ++d1)
799 for (
int c = 0; c < VDIM; c++)
801 Bu0[q1][d2][c] +=
b*u0[d1][d2][c];
802 Bu1[q1][d2][c] +=
b*u1[d1][d2][c];
807 real_t BBu0[max_Q1D][max_Q1D][VDIM];
808 real_t BBu1[max_Q1D][max_Q1D][VDIM];
809 for (
int q1 = 0; q1 < Q1D; ++q1)
811 for (
int q2 = 0; q2 < Q1D; ++q2)
813 for (
int c = 0; c < VDIM; c++)
815 BBu0[q1][q2][c] = 0.0;
816 BBu1[q1][q2][c] = 0.0;
818 for (
int d2 = 0; d2 < D1D; ++d2)
821 for (
int c = 0; c < VDIM; c++)
823 BBu0[q1][q2][c] +=
b*Bu0[q1][d2][c];
824 BBu1[q1][q2][c] +=
b*Bu1[q1][d2][c];
829 real_t DBu0[max_Q1D][max_Q1D][VDIM];
830 real_t DBu1[max_Q1D][max_Q1D][VDIM];
831 for (
int q1 = 0; q1 < Q1D; ++q1)
833 for (
int q2 = 0; q2 < Q1D; ++q2)
835 const real_t D00 = op(q1,q2,0,0,
f);
836 const real_t D01 = op(q1,q2,0,1,
f);
837 const real_t D10 = op(q1,q2,1,0,
f);
838 const real_t D11 = op(q1,q2,1,1,
f);
839 for (
int c = 0; c < VDIM; c++)
841 DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
842 DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
846 real_t BDBu0[max_D1D][max_Q1D][VDIM];
847 real_t BDBu1[max_D1D][max_Q1D][VDIM];
848 for (
int d1 = 0; d1 < D1D; ++d1)
850 for (
int q2 = 0; q2 < Q1D; ++q2)
852 for (
int c = 0; c < VDIM; c++)
854 BDBu0[d1][q2][c] = 0.0;
855 BDBu1[d1][q2][c] = 0.0;
857 for (
int q1 = 0; q1 < Q1D; ++q1)
860 for (
int c = 0; c < VDIM; c++)
862 BDBu0[d1][q2][c] +=
b*DBu0[q1][q2][c];
863 BDBu1[d1][q2][c] +=
b*DBu1[q1][q2][c];
868 real_t BBDBu0[max_D1D][max_D1D][VDIM];
869 real_t BBDBu1[max_D1D][max_D1D][VDIM];
870 for (
int d1 = 0; d1 < D1D; ++d1)
872 for (
int d2 = 0; d2 < D1D; ++d2)
874 for (
int c = 0; c < VDIM; c++)
876 BBDBu0[d1][d2][c] = 0.0;
877 BBDBu1[d1][d2][c] = 0.0;
879 for (
int q2 = 0; q2 < Q1D; ++q2)
882 for (
int c = 0; c < VDIM; c++)
884 BBDBu0[d1][d2][c] +=
b*BDBu0[d1][q2][c];
885 BBDBu1[d1][d2][c] +=
b*BDBu1[d1][q2][c];
888 for (
int c = 0; c < VDIM; c++)
890 y(d1,d2,c,0,
f) += BBDBu0[d1][d2][c];
891 y(d1,d2,c,1,
f) += BBDBu1[d1][d2][c];
899template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
900void SmemPADGTraceApplyTranspose3D(
const int NF,
901 const Array<real_t> &
b,
902 const Array<real_t> &bt,
909 const int D1D = T_D1D ? T_D1D : d1d;
910 const int Q1D = T_Q1D ? T_Q1D : q1d;
911 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
914 auto B =
Reshape(
b.Read(), Q1D, D1D);
915 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
916 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
917 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
918 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
922 const int tidz = MFEM_THREAD_ID(z);
923 const int D1D = T_D1D ? T_D1D : d1d;
924 const int Q1D = T_Q1D ? T_Q1D : q1d;
926 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
927 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
928 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
929 MFEM_SHARED
real_t u0[NBZ][max_D1D][max_D1D];
930 MFEM_SHARED
real_t u1[NBZ][max_D1D][max_D1D];
931 MFEM_FOREACH_THREAD(d1,x,D1D)
933 MFEM_FOREACH_THREAD(d2,y,D1D)
935 u0[tidz][d1][d2] = x(d1,d2,0,
f);
936 u1[tidz][d1][d2] = x(d1,d2,1,
f);
940 MFEM_SHARED
real_t Bu0[NBZ][max_Q1D][max_D1D];
941 MFEM_SHARED
real_t Bu1[NBZ][max_Q1D][max_D1D];
942 MFEM_FOREACH_THREAD(q1,x,Q1D)
944 MFEM_FOREACH_THREAD(d2,y,D1D)
948 for (
int d1 = 0; d1 < D1D; ++d1)
951 Bu0_ +=
b*u0[tidz][d1][d2];
952 Bu1_ +=
b*u1[tidz][d1][d2];
954 Bu0[tidz][q1][d2] = Bu0_;
955 Bu1[tidz][q1][d2] = Bu1_;
959 MFEM_SHARED
real_t BBu0[NBZ][max_Q1D][max_Q1D];
960 MFEM_SHARED
real_t BBu1[NBZ][max_Q1D][max_Q1D];
961 MFEM_FOREACH_THREAD(q1,x,Q1D)
963 MFEM_FOREACH_THREAD(q2,y,Q1D)
967 for (
int d2 = 0; d2 < D1D; ++d2)
970 BBu0_ +=
b*Bu0[tidz][q1][d2];
971 BBu1_ +=
b*Bu1[tidz][q1][d2];
973 BBu0[tidz][q1][q2] = BBu0_;
974 BBu1[tidz][q1][q2] = BBu1_;
978 MFEM_SHARED
real_t DBBu0[NBZ][max_Q1D][max_Q1D];
979 MFEM_SHARED
real_t DBBu1[NBZ][max_Q1D][max_Q1D];
980 MFEM_FOREACH_THREAD(q1,x,Q1D)
982 MFEM_FOREACH_THREAD(q2,y,Q1D)
984 const real_t D00 = op(q1,q2,0,0,
f);
985 const real_t D01 = op(q1,q2,0,1,
f);
986 const real_t D10 = op(q1,q2,1,0,
f);
987 const real_t D11 = op(q1,q2,1,1,
f);
988 const real_t u0q = BBu0[tidz][q1][q2];
989 const real_t u1q = BBu1[tidz][q1][q2];
990 DBBu0[tidz][q1][q2] = D00*u0q + D01*u1q;
991 DBBu1[tidz][q1][q2] = D10*u0q + D11*u1q;
995 MFEM_SHARED
real_t BDBBu0[NBZ][max_Q1D][max_D1D];
996 MFEM_SHARED
real_t BDBBu1[NBZ][max_Q1D][max_D1D];
997 MFEM_FOREACH_THREAD(q1,x,Q1D)
999 MFEM_FOREACH_THREAD(d2,y,D1D)
1003 for (
int q2 = 0; q2 < Q1D; ++q2)
1006 BDBBu0_ +=
b*DBBu0[tidz][q1][q2];
1007 BDBBu1_ +=
b*DBBu1[tidz][q1][q2];
1009 BDBBu0[tidz][q1][d2] = BDBBu0_;
1010 BDBBu1[tidz][q1][d2] = BDBBu1_;
1014 MFEM_FOREACH_THREAD(d1,x,D1D)
1016 MFEM_FOREACH_THREAD(d2,y,D1D)
1020 for (
int q1 = 0; q1 < Q1D; ++q1)
1023 BBDBBu0_ +=
b*BDBBu0[tidz][q1][d2];
1024 BBDBBu1_ +=
b*BDBBu1[tidz][q1][d2];
1026 y(d1,d2,0,
f) += BBDBBu0_;
1027 y(d1,d2,1,
f) += BBDBBu1_;
1033static void PADGTraceApplyTranspose(
const int dim,
1037 const Array<real_t> &B,
1038 const Array<real_t> &Bt,
1045 switch ((D1D << 4 ) | Q1D)
1047 case 0x22:
return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1048 case 0x33:
return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1049 case 0x44:
return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1050 case 0x55:
return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1051 case 0x66:
return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1052 case 0x77:
return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1053 case 0x88:
return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1054 case 0x99:
return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1055 default:
return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1060 switch ((D1D << 4 ) | Q1D)
1062 case 0x22:
return SmemPADGTraceApplyTranspose3D<2,2>(NF,B,Bt,op,x,y);
1063 case 0x23:
return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1064 case 0x34:
return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1065 case 0x45:
return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1066 case 0x56:
return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1067 case 0x67:
return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1068 case 0x78:
return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1069 case 0x89:
return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1070 default:
return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1073 MFEM_ABORT(
"Unknown kernel.");
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
virtual real_t Eval(ElementTransformation &T, const IntegrationPoint &ip)=0
Evaluate the coefficient in the element described by T at the point ip.
void AddMultPA(const Vector &, Vector &) const override
Method for partially assembled action.
void AssemblePABoundaryFaces(const FiniteElementSpace &fes) override
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
void AssemblePAInteriorFaces(const FiniteElementSpace &fes) override
static const IntegrationRule & GetRule(Geometry::Type geom, int order, const FaceElementTransformations &T)
const FaceGeometricFactors * geom
Not owned.
const DofToQuad * maps
Not owned.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Array< real_t > B
Basis functions evaluated at quadrature points.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Array< real_t > Bt
Transpose of B.
Vector normal
Normals at all quadrature points.
Vector detJ
Determinants of the Jacobians at all quadrature points.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
const IntegrationRule * IntRule
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void SetSize(int s)
Resize the vector to size s.
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Convert a dof face index from Native ordering to lexicographic ordering for quads and hexes.
@ COMPRESSED
Enable all above compressions.
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
void forall(int N, lambda &&body)
void vel(const Vector &x, real_t t, Vector &u)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.