12 #include "../general/forall.hpp"
22 static void PADGTraceSetup2D(
const int Q1D,
24 const Array<double> &w,
35 auto d =
Reshape(det.Read(), Q1D, NF);
36 auto n =
Reshape(nor.Read(), Q1D, VDIM, NF);
37 const bool const_r = rho.Size() == 1;
40 const bool const_v = vel.Size() == 2;
42 const_v ?
Reshape(vel.Read(), 2,1,1) :
Reshape(vel.Read(), 2,Q1D,NF);
44 auto qd =
Reshape(op.Write(), Q1D, 2, 2, NF);
48 for (
int q = 0; q < Q1D; ++q)
50 const double r = const_r ? R(0,0) : R(q,
f);
51 const double v0 = const_v ? V(0,0,0) : V(0,q,
f);
52 const double v1 = const_v ? V(1,0,0) : V(1,q,
f);
53 const double dot = n(q,0,
f) * v0 + n(q,1,
f) * v1;
54 const double abs = dot > 0.0 ? dot : -dot;
55 const double w = W[q]*r*d(q,
f);
56 qd(q,0,0,
f) = w*( alpha/2 * dot + beta * abs );
57 qd(q,1,0,
f) = w*( alpha/2 * dot - beta * abs );
58 qd(q,0,1,
f) = w*(-alpha/2 * dot - beta * abs );
59 qd(q,1,1,
f) = w*(-alpha/2 * dot + beta * abs );
64 static void PADGTraceSetup3D(
const int Q1D,
66 const Array<double> &w,
77 auto d =
Reshape(det.Read(), Q1D, Q1D, NF);
78 auto n =
Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
79 const bool const_r = rho.Size() == 1;
81 const_r ?
Reshape(rho.Read(), 1,1,1) :
Reshape(rho.Read(), Q1D,Q1D,NF);
82 const bool const_v = vel.Size() == 3;
84 const_v ?
Reshape(vel.Read(), 3,1,1,1) :
Reshape(vel.Read(), 3,Q1D,Q1D,NF);
86 auto qd =
Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
90 for (
int q1 = 0; q1 < Q1D; ++q1)
92 for (
int q2 = 0; q2 < Q1D; ++q2)
94 const double r = const_r ? R(0,0,0) : R(q1,q2,
f);
95 const double v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,
f);
96 const double v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,
f);
97 const double v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,
f);
98 const double dot = n(q1,q2,0,
f) * v0 + n(q1,q2,1,
f) * v1 +
100 const double abs = dot > 0.0 ? dot : -dot;
101 const double w = W[q1+q2*Q1D]*r*d(q1,q2,
f);
102 qd(q1,q2,0,0,
f) = w*( alpha/2 * dot + beta * abs );
103 qd(q1,q2,1,0,
f) = w*( alpha/2 * dot - beta * abs );
104 qd(q1,q2,0,1,
f) = w*(-alpha/2 * dot - beta * abs );
105 qd(q1,q2,1,1,
f) = w*(-alpha/2 * dot + beta * abs );
111 static void PADGTraceSetup(
const int dim,
115 const Array<double> &W,
124 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADGTraceSetup"); }
127 PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
131 PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
135 void DGTraceIntegrator::SetupPA(
const FiniteElementSpace &fes,
FaceType type)
137 nf = fes.GetNFbyType(type);
138 if (nf==0) {
return; }
140 Mesh *mesh = fes.GetMesh();
141 const FiniteElement &el =
142 *fes.GetTraceElement(0, fes.GetMesh()->GetFaceBaseGeometry(0));
143 FaceElementTransformations &T0 =
144 *fes.GetMesh()->GetFaceElementTransformations(0);
145 const IntegrationRule *ir = IntRule?
147 &GetRule(el.GetGeomType(), el.GetOrder(), T0);
148 const int symmDims = 4;
149 nq = ir->GetNPoints();
150 dim = mesh->Dimension();
151 geom = mesh->GetFaceGeometricFactors(
153 FaceGeometricFactors::DETERMINANTS |
154 FaceGeometricFactors::NORMALS, type);
155 maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
160 if (VectorConstantCoefficient *c_u = dynamic_cast<VectorConstantCoefficient*>
165 else if (VectorQuadratureFunctionCoefficient* qf_u =
166 dynamic_cast<VectorQuadratureFunctionCoefficient*>(u))
169 const QuadratureFunction &qFun = qf_u->GetQuadFunction();
170 MFEM_VERIFY(qFun.Size() == dim * nq * nf,
171 "Incompatible QuadratureFunction dimension \n");
173 MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
174 "IntegrationRule used within integrator and in"
175 " QuadratureFunction appear to be different");
177 vel.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
181 vel.SetSize(dim * nq * nf);
182 auto C =
Reshape(vel.HostWrite(),
dim, nq, nf);
185 for (
int f = 0;
f < mesh->GetNumFacesWithGhost(); ++
f)
187 Mesh::FaceInformation face = mesh->GetFaceInformation(
f);
188 if (face.IsNonconformingCoarse())
194 else if ( face.IsOfFaceType(type) )
196 const int mask = FaceElementTransformations::HAVE_ELEM1 |
197 FaceElementTransformations::HAVE_LOC1;
198 FaceElementTransformations &T =
199 *fes.GetMesh()->GetFaceElementTransformations(
f, mask);
200 for (
int q = 0; q < nq; ++q)
205 T.SetAllIntPoints(&ir->IntPoint(q));
206 const IntegrationPoint &eip1 = T.GetElement1IntPoint();
207 u->Eval(Vq, *T.Elem1, eip1);
208 for (
int i = 0; i <
dim; ++i)
210 C(i,iq,f_ind) = Vq(i);
216 MFEM_VERIFY(f_ind==nf,
"Incorrect number of faces.");
224 else if (ConstantCoefficient *c_rho = dynamic_cast<ConstantCoefficient*>(rho))
227 r(0) = c_rho->constant;
229 else if (QuadratureFunctionCoefficient* qf_rho =
230 dynamic_cast<QuadratureFunctionCoefficient*>(rho))
232 const QuadratureFunction &qFun = qf_rho->GetQuadFunction();
233 MFEM_VERIFY(qFun.Size() == nq * nf,
234 "Incompatible QuadratureFunction dimension \n");
236 MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
237 "IntegrationRule used within integrator and in"
238 " QuadratureFunction appear to be different");
240 r.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
245 auto C_vel =
Reshape(vel.HostRead(),
dim, nq, nf);
246 auto n =
Reshape(geom->normal.HostRead(), nq,
dim, nf);
247 auto C =
Reshape(r.HostWrite(), nq, nf);
249 for (
int f = 0;
f < mesh->GetNumFacesWithGhost(); ++
f)
251 Mesh::FaceInformation face = mesh->GetFaceInformation(
f);
252 if (face.IsNonconformingCoarse())
258 else if ( face.IsOfFaceType(type) )
260 FaceElementTransformations &T =
261 *fes.GetMesh()->GetFaceElementTransformations(
f);
262 for (
int q = 0; q < nq; ++q)
268 T.SetAllIntPoints(&ir->IntPoint(q));
269 const IntegrationPoint &eip1 = T.GetElement1IntPoint();
270 const IntegrationPoint &eip2 = T.GetElement2IntPoint();
273 if ( face.IsBoundary() )
275 rq = rho->Eval(*T.Elem1, eip1);
280 for (
int d=0; d<
dim; ++d)
282 udotn += C_vel(d,iq,f_ind)*n(iq,d,f_ind);
284 if (udotn >= 0.0) { rq = rho->Eval(*T.Elem2, eip2); }
285 else { rq = rho->Eval(*T.Elem1, eip1); }
292 MFEM_VERIFY(f_ind==nf,
"Incorrect number of faces.");
294 PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(),
295 geom->detJ, geom->normal, r,
vel,
296 alpha, beta, pa_data);
301 SetupPA(fes, FaceType::Interior);
306 SetupPA(fes, FaceType::Boundary);
310 template<
int T_D1D = 0,
int T_Q1D = 0>
static
311 void PADGTraceApply2D(
const int NF,
321 const int D1D = T_D1D ? T_D1D : d1d;
322 const int Q1D = T_Q1D ? T_Q1D : q1d;
323 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
324 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
334 const int D1D = T_D1D ? T_D1D : d1d;
335 const int Q1D = T_Q1D ? T_Q1D : q1d;
337 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
338 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
339 double u0[max_D1D][VDIM];
340 double u1[max_D1D][VDIM];
341 for (
int d = 0; d < D1D; d++)
343 for (
int c = 0; c < VDIM; c++)
345 u0[d][c] = x(d,c,0,
f);
346 u1[d][c] = x(d,c,1,
f);
349 double Bu0[max_Q1D][VDIM];
350 double Bu1[max_Q1D][VDIM];
351 for (
int q = 0; q < Q1D; ++q)
353 for (
int c = 0; c < VDIM; c++)
358 for (
int d = 0; d < D1D; ++d)
360 const double b = B(q,d);
361 for (
int c = 0; c < VDIM; c++)
363 Bu0[q][c] += b*u0[d][c];
364 Bu1[q][c] += b*u1[d][c];
368 double DBu[max_Q1D][VDIM];
369 for (
int q = 0; q < Q1D; ++q)
371 for (
int c = 0; c < VDIM; c++)
373 DBu[q][c] = op(q,0,0,
f)*Bu0[q][c] + op(q,1,0,
f)*Bu1[q][c];
376 double BDBu[max_D1D][VDIM];
377 for (
int d = 0; d < D1D; ++d)
379 for (
int c = 0; c < VDIM; c++)
383 for (
int q = 0; q < Q1D; ++q)
385 const double b = Bt(d,q);
386 for (
int c = 0; c < VDIM; c++)
388 BDBu[d][c] += b*DBu[q][c];
391 for (
int c = 0; c < VDIM; c++)
393 y(d,c,0,
f) += BDBu[d][c];
394 y(d,c,1,
f) += -BDBu[d][c];
401 template<
int T_D1D = 0,
int T_Q1D = 0>
static
402 void PADGTraceApply3D(
const int NF,
403 const Array<double> &b,
404 const Array<double> &bt,
412 const int D1D = T_D1D ? T_D1D : d1d;
413 const int Q1D = T_Q1D ? T_Q1D : q1d;
414 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
415 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
416 auto B =
Reshape(b.Read(), Q1D, D1D);
417 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
418 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
419 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
420 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
425 const int D1D = T_D1D ? T_D1D : d1d;
426 const int Q1D = T_Q1D ? T_Q1D : q1d;
428 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
429 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
430 double u0[max_D1D][max_D1D][VDIM];
431 double u1[max_D1D][max_D1D][VDIM];
432 for (
int d1 = 0; d1 < D1D; d1++)
434 for (
int d2 = 0; d2 < D1D; d2++)
436 for (
int c = 0; c < VDIM; c++)
438 u0[d1][d2][c] = x(d1,d2,c,0,
f);
439 u1[d1][d2][c] = x(d1,d2,c,1,
f);
443 double Bu0[max_Q1D][max_D1D][VDIM];
444 double Bu1[max_Q1D][max_D1D][VDIM];
445 for (
int q = 0; q < Q1D; ++q)
447 for (
int d2 = 0; d2 < D1D; d2++)
449 for (
int c = 0; c < VDIM; c++)
454 for (
int d1 = 0; d1 < D1D; ++d1)
456 const double b = B(q,d1);
457 for (
int c = 0; c < VDIM; c++)
459 Bu0[q][d2][c] += b*u0[d1][d2][c];
460 Bu1[q][d2][c] += b*u1[d1][d2][c];
465 double BBu0[max_Q1D][max_Q1D][VDIM];
466 double BBu1[max_Q1D][max_Q1D][VDIM];
467 for (
int q1 = 0; q1 < Q1D; ++q1)
469 for (
int q2 = 0; q2 < Q1D; q2++)
471 for (
int c = 0; c < VDIM; c++)
473 BBu0[q1][q2][c] = 0.0;
474 BBu1[q1][q2][c] = 0.0;
476 for (
int d2 = 0; d2 < D1D; ++d2)
478 const double b = B(q2,d2);
479 for (
int c = 0; c < VDIM; c++)
481 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
482 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
487 double DBBu[max_Q1D][max_Q1D][VDIM];
488 for (
int q1 = 0; q1 < Q1D; ++q1)
490 for (
int q2 = 0; q2 < Q1D; q2++)
492 for (
int c = 0; c < VDIM; c++)
494 DBBu[q1][q2][c] = op(q1,q2,0,0,
f)*BBu0[q1][q2][c] +
495 op(q1,q2,1,0,
f)*BBu1[q1][q2][c];
499 double BDBBu[max_Q1D][max_D1D][VDIM];
500 for (
int q1 = 0; q1 < Q1D; ++q1)
502 for (
int d2 = 0; d2 < D1D; d2++)
504 for (
int c = 0; c < VDIM; c++)
506 BDBBu[q1][d2][c] = 0.0;
508 for (
int q2 = 0; q2 < Q1D; ++q2)
510 const double b = Bt(d2,q2);
511 for (
int c = 0; c < VDIM; c++)
513 BDBBu[q1][d2][c] += b*DBBu[q1][q2][c];
518 double BBDBBu[max_D1D][max_D1D][VDIM];
519 for (
int d1 = 0; d1 < D1D; ++d1)
521 for (
int d2 = 0; d2 < D1D; d2++)
523 for (
int c = 0; c < VDIM; c++)
525 BBDBBu[d1][d2][c] = 0.0;
527 for (
int q1 = 0; q1 < Q1D; ++q1)
529 const double b = Bt(d1,q1);
530 for (
int c = 0; c < VDIM; c++)
532 BBDBBu[d1][d2][c] += b*BDBBu[q1][d2][c];
535 for (
int c = 0; c < VDIM; c++)
537 y(d1,d2,c,0,
f) += BBDBBu[d1][d2][c];
538 y(d1,d2,c,1,
f) += -BBDBBu[d1][d2][c];
546 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
547 void SmemPADGTraceApply3D(
const int NF,
548 const Array<double> &b,
549 const Array<double> &bt,
556 const int D1D = T_D1D ? T_D1D : d1d;
557 const int Q1D = T_Q1D ? T_Q1D : q1d;
558 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
559 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
560 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
561 auto B =
Reshape(b.Read(), Q1D, D1D);
562 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
563 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
564 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
565 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
567 MFEM_FORALL_2D(
f, NF, Q1D, Q1D, NBZ,
569 const int tidz = MFEM_THREAD_ID(z);
570 const int D1D = T_D1D ? T_D1D : d1d;
571 const int Q1D = T_Q1D ? T_Q1D : q1d;
573 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
574 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
575 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
576 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
577 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
578 MFEM_FOREACH_THREAD(d1,x,D1D)
580 MFEM_FOREACH_THREAD(d2,y,D1D)
582 u0[tidz][d1][d2] = x(d1,d2,0,
f);
583 u1[tidz][d1][d2] = x(d1,d2,1,
f);
587 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
588 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
589 MFEM_FOREACH_THREAD(q1,x,Q1D)
591 MFEM_FOREACH_THREAD(d2,y,D1D)
595 for (
int d1 = 0; d1 < D1D; ++d1)
597 const double b = B(q1,d1);
598 Bu0_ += b*u0[tidz][d1][d2];
599 Bu1_ += b*u1[tidz][d1][d2];
601 Bu0[tidz][q1][d2] = Bu0_;
602 Bu1[tidz][q1][d2] = Bu1_;
606 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
607 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
608 MFEM_FOREACH_THREAD(q1,x,Q1D)
610 MFEM_FOREACH_THREAD(q2,y,Q1D)
614 for (
int d2 = 0; d2 < D1D; ++d2)
616 const double b = B(q2,d2);
617 BBu0_ += b*Bu0[tidz][q1][d2];
618 BBu1_ += b*Bu1[tidz][q1][d2];
620 BBu0[tidz][q1][q2] = BBu0_;
621 BBu1[tidz][q1][q2] = BBu1_;
625 MFEM_SHARED
double DBBu[NBZ][max_Q1D][max_Q1D];
626 MFEM_FOREACH_THREAD(q1,x,Q1D)
628 MFEM_FOREACH_THREAD(q2,y,Q1D)
630 DBBu[tidz][q1][q2] = op(q1,q2,0,0,
f)*BBu0[tidz][q1][q2] +
631 op(q1,q2,1,0,
f)*BBu1[tidz][q1][q2];
635 MFEM_SHARED
double BDBBu[NBZ][max_Q1D][max_D1D];
636 MFEM_FOREACH_THREAD(q1,x,Q1D)
638 MFEM_FOREACH_THREAD(d2,y,D1D)
641 for (
int q2 = 0; q2 < Q1D; ++q2)
643 const double b = Bt(d2,q2);
644 BDBBu_ += b*DBBu[tidz][q1][q2];
646 BDBBu[tidz][q1][d2] = BDBBu_;
650 MFEM_FOREACH_THREAD(d1,x,D1D)
652 MFEM_FOREACH_THREAD(d2,y,D1D)
654 double BBDBBu_ = 0.0;
655 for (
int q1 = 0; q1 < Q1D; ++q1)
657 const double b = Bt(d1,q1);
658 BBDBBu_ += b*BDBBu[tidz][q1][d2];
660 y(d1,d2,0,
f) += BBDBBu_;
661 y(d1,d2,1,
f) += -BBDBBu_;
667 static void PADGTraceApply(
const int dim,
671 const Array<double> &B,
672 const Array<double> &Bt,
679 switch ((D1D << 4 ) | Q1D)
681 case 0x22:
return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
682 case 0x33:
return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
683 case 0x44:
return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
684 case 0x55:
return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
685 case 0x66:
return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
686 case 0x77:
return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
687 case 0x88:
return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
688 case 0x99:
return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
689 default:
return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
694 switch ((D1D << 4 ) | Q1D)
696 case 0x23:
return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
697 case 0x34:
return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
698 case 0x45:
return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
699 case 0x56:
return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
700 case 0x67:
return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
701 case 0x78:
return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
702 case 0x89:
return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
703 default:
return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
706 MFEM_ABORT(
"Unknown kernel.");
710 template<
int T_D1D = 0,
int T_Q1D = 0>
static
711 void PADGTraceApplyTranspose2D(
const int NF,
712 const Array<double> &b,
713 const Array<double> &bt,
721 const int D1D = T_D1D ? T_D1D : d1d;
722 const int Q1D = T_Q1D ? T_Q1D : q1d;
723 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
724 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
725 auto B =
Reshape(b.Read(), Q1D, D1D);
726 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
727 auto op =
Reshape(op_.Read(), Q1D, 2, 2, NF);
728 auto x =
Reshape(x_.Read(), D1D, VDIM, 2, NF);
729 auto y =
Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
734 const int D1D = T_D1D ? T_D1D : d1d;
735 const int Q1D = T_Q1D ? T_Q1D : q1d;
737 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
738 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
739 double u0[max_D1D][VDIM];
740 double u1[max_D1D][VDIM];
741 for (
int d = 0; d < D1D; d++)
743 for (
int c = 0; c < VDIM; c++)
745 u0[d][c] = x(d,c,0,
f);
746 u1[d][c] = x(d,c,1,
f);
749 double Bu0[max_Q1D][VDIM];
750 double Bu1[max_Q1D][VDIM];
751 for (
int q = 0; q < Q1D; ++q)
753 for (
int c = 0; c < VDIM; c++)
758 for (
int d = 0; d < D1D; ++d)
760 const double b = B(q,d);
761 for (
int c = 0; c < VDIM; c++)
763 Bu0[q][c] += b*u0[d][c];
764 Bu1[q][c] += b*u1[d][c];
768 double DBu0[max_Q1D][VDIM];
769 double DBu1[max_Q1D][VDIM];
770 for (
int q = 0; q < Q1D; ++q)
772 for (
int c = 0; c < VDIM; c++)
774 DBu0[q][c] = op(q,0,0,
f)*Bu0[q][c] + op(q,0,1,
f)*Bu1[q][c];
775 DBu1[q][c] = op(q,1,0,
f)*Bu0[q][c] + op(q,1,1,
f)*Bu1[q][c];
778 double BDBu0[max_D1D][VDIM];
779 double BDBu1[max_D1D][VDIM];
780 for (
int d = 0; d < D1D; ++d)
782 for (
int c = 0; c < VDIM; c++)
787 for (
int q = 0; q < Q1D; ++q)
789 const double b = Bt(d,q);
790 for (
int c = 0; c < VDIM; c++)
792 BDBu0[d][c] += b*DBu0[q][c];
793 BDBu1[d][c] += b*DBu1[q][c];
796 for (
int c = 0; c < VDIM; c++)
798 y(d,c,0,
f) += BDBu0[d][c];
799 y(d,c,1,
f) += BDBu1[d][c];
806 template<
int T_D1D = 0,
int T_Q1D = 0>
static
807 void PADGTraceApplyTranspose3D(
const int NF,
808 const Array<double> &b,
809 const Array<double> &bt,
817 const int D1D = T_D1D ? T_D1D : d1d;
818 const int Q1D = T_Q1D ? T_Q1D : q1d;
819 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
820 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
821 auto B =
Reshape(b.Read(), Q1D, D1D);
822 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
823 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
824 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
825 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
830 const int D1D = T_D1D ? T_D1D : d1d;
831 const int Q1D = T_Q1D ? T_Q1D : q1d;
833 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
834 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
835 double u0[max_D1D][max_D1D][VDIM];
836 double u1[max_D1D][max_D1D][VDIM];
837 for (
int d1 = 0; d1 < D1D; d1++)
839 for (
int d2 = 0; d2 < D1D; d2++)
841 for (
int c = 0; c < VDIM; c++)
843 u0[d1][d2][c] = x(d1,d2,c,0,
f);
844 u1[d1][d2][c] = x(d1,d2,c,1,
f);
848 double Bu0[max_Q1D][max_D1D][VDIM];
849 double Bu1[max_Q1D][max_D1D][VDIM];
850 for (
int q1 = 0; q1 < Q1D; ++q1)
852 for (
int d2 = 0; d2 < D1D; ++d2)
854 for (
int c = 0; c < VDIM; c++)
856 Bu0[q1][d2][c] = 0.0;
857 Bu1[q1][d2][c] = 0.0;
859 for (
int d1 = 0; d1 < D1D; ++d1)
861 const double b = B(q1,d1);
862 for (
int c = 0; c < VDIM; c++)
864 Bu0[q1][d2][c] += b*u0[d1][d2][c];
865 Bu1[q1][d2][c] += b*u1[d1][d2][c];
870 double BBu0[max_Q1D][max_Q1D][VDIM];
871 double BBu1[max_Q1D][max_Q1D][VDIM];
872 for (
int q1 = 0; q1 < Q1D; ++q1)
874 for (
int q2 = 0; q2 < Q1D; ++q2)
876 for (
int c = 0; c < VDIM; c++)
878 BBu0[q1][q2][c] = 0.0;
879 BBu1[q1][q2][c] = 0.0;
881 for (
int d2 = 0; d2 < D1D; ++d2)
883 const double b = B(q2,d2);
884 for (
int c = 0; c < VDIM; c++)
886 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
887 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
892 double DBu0[max_Q1D][max_Q1D][VDIM];
893 double DBu1[max_Q1D][max_Q1D][VDIM];
894 for (
int q1 = 0; q1 < Q1D; ++q1)
896 for (
int q2 = 0; q2 < Q1D; ++q2)
898 const double D00 = op(q1,q2,0,0,
f);
899 const double D01 = op(q1,q2,0,1,
f);
900 const double D10 = op(q1,q2,1,0,
f);
901 const double D11 = op(q1,q2,1,1,
f);
902 for (
int c = 0; c < VDIM; c++)
904 DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
905 DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
909 double BDBu0[max_D1D][max_Q1D][VDIM];
910 double BDBu1[max_D1D][max_Q1D][VDIM];
911 for (
int d1 = 0; d1 < D1D; ++d1)
913 for (
int q2 = 0; q2 < Q1D; ++q2)
915 for (
int c = 0; c < VDIM; c++)
917 BDBu0[d1][q2][c] = 0.0;
918 BDBu1[d1][q2][c] = 0.0;
920 for (
int q1 = 0; q1 < Q1D; ++q1)
922 const double b = Bt(d1,q1);
923 for (
int c = 0; c < VDIM; c++)
925 BDBu0[d1][q2][c] += b*DBu0[q1][q2][c];
926 BDBu1[d1][q2][c] += b*DBu1[q1][q2][c];
931 double BBDBu0[max_D1D][max_D1D][VDIM];
932 double BBDBu1[max_D1D][max_D1D][VDIM];
933 for (
int d1 = 0; d1 < D1D; ++d1)
935 for (
int d2 = 0; d2 < D1D; ++d2)
937 for (
int c = 0; c < VDIM; c++)
939 BBDBu0[d1][d2][c] = 0.0;
940 BBDBu1[d1][d2][c] = 0.0;
942 for (
int q2 = 0; q2 < Q1D; ++q2)
944 const double b = Bt(d2,q2);
945 for (
int c = 0; c < VDIM; c++)
947 BBDBu0[d1][d2][c] += b*BDBu0[d1][q2][c];
948 BBDBu1[d1][d2][c] += b*BDBu1[d1][q2][c];
951 for (
int c = 0; c < VDIM; c++)
953 y(d1,d2,c,0,
f) += BBDBu0[d1][d2][c];
954 y(d1,d2,c,1,
f) += BBDBu1[d1][d2][c];
962 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
963 void SmemPADGTraceApplyTranspose3D(
const int NF,
964 const Array<double> &b,
965 const Array<double> &bt,
972 const int D1D = T_D1D ? T_D1D : d1d;
973 const int Q1D = T_Q1D ? T_Q1D : q1d;
974 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
975 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
976 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
977 auto B =
Reshape(b.Read(), Q1D, D1D);
978 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
979 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
980 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
981 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
983 MFEM_FORALL_2D(
f, NF, Q1D, Q1D, NBZ,
985 const int tidz = MFEM_THREAD_ID(z);
986 const int D1D = T_D1D ? T_D1D : d1d;
987 const int Q1D = T_Q1D ? T_Q1D : q1d;
989 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
990 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
991 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
992 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
993 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
994 MFEM_FOREACH_THREAD(d1,x,D1D)
996 MFEM_FOREACH_THREAD(d2,y,D1D)
998 u0[tidz][d1][d2] = x(d1,d2,0,
f);
999 u1[tidz][d1][d2] = x(d1,d2,1,
f);
1003 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
1004 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
1005 MFEM_FOREACH_THREAD(q1,x,Q1D)
1007 MFEM_FOREACH_THREAD(d2,y,D1D)
1011 for (
int d1 = 0; d1 < D1D; ++d1)
1013 const double b = B(q1,d1);
1014 Bu0_ += b*u0[tidz][d1][d2];
1015 Bu1_ += b*u1[tidz][d1][d2];
1017 Bu0[tidz][q1][d2] = Bu0_;
1018 Bu1[tidz][q1][d2] = Bu1_;
1022 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
1023 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
1024 MFEM_FOREACH_THREAD(q1,x,Q1D)
1026 MFEM_FOREACH_THREAD(q2,y,Q1D)
1030 for (
int d2 = 0; d2 < D1D; ++d2)
1032 const double b = B(q2,d2);
1033 BBu0_ += b*Bu0[tidz][q1][d2];
1034 BBu1_ += b*Bu1[tidz][q1][d2];
1036 BBu0[tidz][q1][q2] = BBu0_;
1037 BBu1[tidz][q1][q2] = BBu1_;
1041 MFEM_SHARED
double DBBu0[NBZ][max_Q1D][max_Q1D];
1042 MFEM_SHARED
double DBBu1[NBZ][max_Q1D][max_Q1D];
1043 MFEM_FOREACH_THREAD(q1,x,Q1D)
1045 MFEM_FOREACH_THREAD(q2,y,Q1D)
1047 const double D00 = op(q1,q2,0,0,
f);
1048 const double D01 = op(q1,q2,0,1,
f);
1049 const double D10 = op(q1,q2,1,0,
f);
1050 const double D11 = op(q1,q2,1,1,
f);
1051 const double u0q = BBu0[tidz][q1][q2];
1052 const double u1q = BBu1[tidz][q1][q2];
1053 DBBu0[tidz][q1][q2] = D00*u0q + D01*u1q;
1054 DBBu1[tidz][q1][q2] = D10*u0q + D11*u1q;
1058 MFEM_SHARED
double BDBBu0[NBZ][max_Q1D][max_D1D];
1059 MFEM_SHARED
double BDBBu1[NBZ][max_Q1D][max_D1D];
1060 MFEM_FOREACH_THREAD(q1,x,Q1D)
1062 MFEM_FOREACH_THREAD(d2,y,D1D)
1064 double BDBBu0_ = 0.0;
1065 double BDBBu1_ = 0.0;
1066 for (
int q2 = 0; q2 < Q1D; ++q2)
1068 const double b = Bt(d2,q2);
1069 BDBBu0_ += b*DBBu0[tidz][q1][q2];
1070 BDBBu1_ += b*DBBu1[tidz][q1][q2];
1072 BDBBu0[tidz][q1][d2] = BDBBu0_;
1073 BDBBu1[tidz][q1][d2] = BDBBu1_;
1077 MFEM_FOREACH_THREAD(d1,x,D1D)
1079 MFEM_FOREACH_THREAD(d2,y,D1D)
1081 double BBDBBu0_ = 0.0;
1082 double BBDBBu1_ = 0.0;
1083 for (
int q1 = 0; q1 < Q1D; ++q1)
1085 const double b = Bt(d1,q1);
1086 BBDBBu0_ += b*BDBBu0[tidz][q1][d2];
1087 BBDBBu1_ += b*BDBBu1[tidz][q1][d2];
1089 y(d1,d2,0,
f) += BBDBBu0_;
1090 y(d1,d2,1,
f) += BBDBBu1_;
1096 static void PADGTraceApplyTranspose(
const int dim,
1100 const Array<double> &B,
1101 const Array<double> &Bt,
1108 switch ((D1D << 4 ) | Q1D)
1110 case 0x22:
return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1111 case 0x33:
return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1112 case 0x44:
return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1113 case 0x55:
return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1114 case 0x66:
return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1115 case 0x77:
return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1116 case 0x88:
return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1117 case 0x99:
return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1118 default:
return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1123 switch ((D1D << 4 ) | Q1D)
1125 case 0x23:
return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1126 case 0x34:
return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1127 case 0x45:
return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1128 case 0x56:
return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1129 case 0x67:
return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1130 case 0x78:
return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1131 case 0x89:
return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1132 default:
return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1135 MFEM_ABORT(
"Unknown kernel.");
1141 PADGTraceApply(dim, dofs1D, quad1D, nf,
1146 void DGTraceIntegrator::AddMultTransposePA(
const Vector &x,
Vector &y)
const
1148 PADGTraceApplyTranspose(dim, dofs1D, quad1D, nf,
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Convert a dof face index from Native ordering to lexicographic ordering for quads and hexes...
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void vel(const Vector &x, double t, Vector &u)
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
double u(const Vector &xvec)
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
double f(const Vector &p)