12 #include "../general/forall.hpp"
22 static void PADGTraceSetup2D(
const int Q1D,
24 const Array<double> &w,
35 auto d =
Reshape(det.Read(), Q1D, NF);
36 auto n =
Reshape(nor.Read(), Q1D, VDIM, NF);
37 const bool const_r = rho.Size() == 1;
40 const bool const_v = vel.Size() == 2;
42 const_v ?
Reshape(vel.Read(), 2,1,1) :
Reshape(vel.Read(), 2,Q1D,NF);
44 auto qd =
Reshape(op.Write(), Q1D, 2, 2, NF);
48 for (
int q = 0; q < Q1D; ++q)
50 const double r = const_r ? R(0,0) : R(q,
f);
51 const double v0 = const_v ? V(0,0,0) : V(0,q,
f);
52 const double v1 = const_v ? V(1,0,0) : V(1,q,
f);
53 const double dot = n(q,0,
f) * v0 + n(q,1,
f) * v1;
54 const double abs = dot > 0.0 ? dot : -dot;
55 const double w = W[q]*r*d(q,
f);
56 qd(q,0,0,
f) = w*( alpha/2 * dot + beta * abs );
57 qd(q,1,0,
f) = w*( alpha/2 * dot - beta * abs );
58 qd(q,0,1,
f) = w*(-alpha/2 * dot - beta * abs );
59 qd(q,1,1,
f) = w*(-alpha/2 * dot + beta * abs );
64 static void PADGTraceSetup3D(
const int Q1D,
66 const Array<double> &w,
77 auto d =
Reshape(det.Read(), Q1D, Q1D, NF);
78 auto n =
Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
79 const bool const_r = rho.Size() == 1;
81 const_r ?
Reshape(rho.Read(), 1,1,1) :
Reshape(rho.Read(), Q1D,Q1D,NF);
82 const bool const_v = vel.Size() == 3;
84 const_v ?
Reshape(vel.Read(), 3,1,1,1) :
Reshape(vel.Read(), 3,Q1D,Q1D,NF);
86 auto qd =
Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
90 for (
int q1 = 0; q1 < Q1D; ++q1)
92 for (
int q2 = 0; q2 < Q1D; ++q2)
94 const double r = const_r ? R(0,0,0) : R(q1,q2,
f);
95 const double v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,
f);
96 const double v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,
f);
97 const double v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,
f);
98 const double dot = n(q1,q2,0,
f) * v0 + n(q1,q2,1,
f) * v1 +
100 const double abs = dot > 0.0 ? dot : -dot;
101 const double w = W[q1+q2*Q1D]*r*d(q1,q2,
f);
102 qd(q1,q2,0,0,
f) = w*( alpha/2 * dot + beta * abs );
103 qd(q1,q2,1,0,
f) = w*( alpha/2 * dot - beta * abs );
104 qd(q1,q2,0,1,
f) = w*(-alpha/2 * dot - beta * abs );
105 qd(q1,q2,1,1,
f) = w*(-alpha/2 * dot + beta * abs );
111 static void PADGTraceSetup(
const int dim,
115 const Array<double> &W,
124 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADGTraceSetup"); }
127 PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
131 PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
135 void DGTraceIntegrator::SetupPA(
const FiniteElementSpace &fes,
FaceType type)
137 nf = fes.GetNFbyType(type);
138 if (nf==0) {
return; }
140 Mesh *mesh = fes.GetMesh();
141 const FiniteElement &el =
142 *fes.GetTraceElement(0, fes.GetMesh()->GetFaceBaseGeometry(0));
143 FaceElementTransformations &T =
144 *fes.GetMesh()->GetFaceElementTransformations(0);
145 const IntegrationRule *ir = IntRule?
147 &GetRule(el.GetGeomType(), el.GetOrder(), T);
148 const int symmDims = 4;
149 const int nq = ir->GetNPoints();
150 dim = mesh->Dimension();
151 geom = mesh->GetFaceGeometricFactors(
153 FaceGeometricFactors::DETERMINANTS |
154 FaceGeometricFactors::NORMALS, type);
155 maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
160 if (VectorConstantCoefficient *c_u = dynamic_cast<VectorConstantCoefficient*>
165 else if (VectorQuadratureFunctionCoefficient* c_u =
166 dynamic_cast<VectorQuadratureFunctionCoefficient*>(u))
169 const QuadratureFunction &qFun = c_u->GetQuadFunction();
170 MFEM_VERIFY(qFun.Size() == dim * nq * nf,
171 "Incompatible QuadratureFunction dimension \n");
173 MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
174 "IntegrationRule used within integrator and in"
175 " QuadratureFunction appear to be different");
177 vel.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
181 vel.SetSize(dim * nq * nf);
182 auto C =
Reshape(vel.HostWrite(),
dim, nq, nf);
185 for (
int f = 0;
f < fes.GetNF(); ++
f)
189 fes.GetMesh()->GetFaceElements(
f, &e1, &e2);
190 fes.GetMesh()->GetFaceInfos(
f, &inf1, &inf2);
191 int face_id = inf1 / 64;
192 if ((type==FaceType::Interior && (e2>=0 || (e2<0 && inf2>=0))) ||
193 (type==FaceType::Boundary && e2<0 && inf2<0) )
195 FaceElementTransformations &T =
196 *fes.GetMesh()->GetFaceElementTransformations(
f);
197 for (
int q = 0; q < nq; ++q)
201 T.SetAllIntPoints(&ir->IntPoint(q));
202 const IntegrationPoint &eip1 = T.GetElement1IntPoint();
203 u->Eval(Vq, *T.Elem1, eip1);
204 for (
int i = 0; i <
dim; ++i)
206 C(i,iq,f_ind) = Vq(i);
212 MFEM_VERIFY(f_ind==nf,
"Incorrect number of faces.");
220 else if (ConstantCoefficient *c_rho = dynamic_cast<ConstantCoefficient*>(rho))
223 r(0) = c_rho->constant;
225 else if (QuadratureFunctionCoefficient* c_rho =
226 dynamic_cast<QuadratureFunctionCoefficient*>(rho))
228 const QuadratureFunction &qFun = c_rho->GetQuadFunction();
229 MFEM_VERIFY(qFun.Size() == nq * nf,
230 "Incompatible QuadratureFunction dimension \n");
232 MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
233 "IntegrationRule used within integrator and in"
234 " QuadratureFunction appear to be different");
236 r.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
241 auto C_vel =
Reshape(vel.HostRead(),
dim, nq, nf);
243 auto C =
Reshape(r.HostWrite(), nq, nf);
245 for (
int f = 0;
f < fes.GetNF(); ++
f)
249 fes.GetMesh()->GetFaceElements(
f, &e1, &e2);
250 fes.GetMesh()->GetFaceInfos(
f, &inf1, &inf2);
251 int face_id = inf1 / 64;
252 if ((type==FaceType::Interior && (e2>=0 || (e2<0 && inf2>=0))) ||
253 (type==FaceType::Boundary && e2<0 && inf2<0) )
255 FaceElementTransformations &T =
256 *fes.GetMesh()->GetFaceElementTransformations(
f);
257 for (
int q = 0; q < nq; ++q)
262 T.SetAllIntPoints(&ir->IntPoint(q));
263 const IntegrationPoint &eip1 = T.GetElement1IntPoint();
264 const IntegrationPoint &eip2 = T.GetElement2IntPoint();
269 r = rho->Eval(*T.Elem1, eip1);
274 for (
int d=0; d<
dim; ++d)
276 udotn += C_vel(d,iq,f_ind)*n(iq,d,f_ind);
278 if (udotn >= 0.0) { r = rho->Eval(*T.Elem2, eip2); }
279 else { r = rho->Eval(*T.Elem1, eip1); }
286 MFEM_VERIFY(f_ind==nf,
"Incorrect number of faces.");
288 PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(),
290 alpha, beta, pa_data);
295 SetupPA(fes, FaceType::Interior);
300 SetupPA(fes, FaceType::Boundary);
304 template<
int T_D1D = 0,
int T_Q1D = 0>
static
305 void PADGTraceApply2D(
const int NF,
315 const int D1D = T_D1D ? T_D1D : d1d;
316 const int Q1D = T_Q1D ? T_Q1D : q1d;
317 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
318 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
328 const int D1D = T_D1D ? T_D1D : d1d;
329 const int Q1D = T_Q1D ? T_Q1D : q1d;
331 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
332 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
333 double u0[max_D1D][VDIM];
334 double u1[max_D1D][VDIM];
335 for (
int d = 0; d < D1D; d++)
337 for (
int c = 0; c < VDIM; c++)
339 u0[d][c] = x(d,c,0,
f);
340 u1[d][c] = x(d,c,1,
f);
343 double Bu0[max_Q1D][VDIM];
344 double Bu1[max_Q1D][VDIM];
345 for (
int q = 0; q < Q1D; ++q)
347 for (
int c = 0; c < VDIM; c++)
352 for (
int d = 0; d < D1D; ++d)
354 const double b = B(q,d);
355 for (
int c = 0; c < VDIM; c++)
357 Bu0[q][c] += b*u0[d][c];
358 Bu1[q][c] += b*u1[d][c];
362 double DBu[max_Q1D][VDIM];
363 for (
int q = 0; q < Q1D; ++q)
365 for (
int c = 0; c < VDIM; c++)
367 DBu[q][c] = op(q,0,0,
f)*Bu0[q][c] + op(q,1,0,
f)*Bu1[q][c];
370 double BDBu[max_D1D][VDIM];
371 for (
int d = 0; d < D1D; ++d)
373 for (
int c = 0; c < VDIM; c++)
377 for (
int q = 0; q < Q1D; ++q)
379 const double b = Bt(d,q);
380 for (
int c = 0; c < VDIM; c++)
382 BDBu[d][c] += b*DBu[q][c];
385 for (
int c = 0; c < VDIM; c++)
387 y(d,c,0,
f) += BDBu[d][c];
388 y(d,c,1,
f) += -BDBu[d][c];
395 template<
int T_D1D = 0,
int T_Q1D = 0>
static
396 void PADGTraceApply3D(
const int NF,
397 const Array<double> &b,
398 const Array<double> &bt,
406 const int D1D = T_D1D ? T_D1D : d1d;
407 const int Q1D = T_Q1D ? T_Q1D : q1d;
408 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
409 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
410 auto B =
Reshape(b.Read(), Q1D, D1D);
411 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
412 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
413 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
414 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
419 const int D1D = T_D1D ? T_D1D : d1d;
420 const int Q1D = T_Q1D ? T_Q1D : q1d;
422 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
423 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
424 double u0[max_D1D][max_D1D][VDIM];
425 double u1[max_D1D][max_D1D][VDIM];
426 for (
int d1 = 0; d1 < D1D; d1++)
428 for (
int d2 = 0; d2 < D1D; d2++)
430 for (
int c = 0; c < VDIM; c++)
432 u0[d1][d2][c] = x(d1,d2,c,0,
f);
433 u1[d1][d2][c] = x(d1,d2,c,1,
f);
437 double Bu0[max_Q1D][max_D1D][VDIM];
438 double Bu1[max_Q1D][max_D1D][VDIM];
439 for (
int q = 0; q < Q1D; ++q)
441 for (
int d2 = 0; d2 < D1D; d2++)
443 for (
int c = 0; c < VDIM; c++)
448 for (
int d1 = 0; d1 < D1D; ++d1)
450 const double b = B(q,d1);
451 for (
int c = 0; c < VDIM; c++)
453 Bu0[q][d2][c] += b*u0[d1][d2][c];
454 Bu1[q][d2][c] += b*u1[d1][d2][c];
459 double BBu0[max_Q1D][max_Q1D][VDIM];
460 double BBu1[max_Q1D][max_Q1D][VDIM];
461 for (
int q1 = 0; q1 < Q1D; ++q1)
463 for (
int q2 = 0; q2 < Q1D; q2++)
465 for (
int c = 0; c < VDIM; c++)
467 BBu0[q1][q2][c] = 0.0;
468 BBu1[q1][q2][c] = 0.0;
470 for (
int d2 = 0; d2 < D1D; ++d2)
472 const double b = B(q2,d2);
473 for (
int c = 0; c < VDIM; c++)
475 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
476 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
481 double DBBu[max_Q1D][max_Q1D][VDIM];
482 for (
int q1 = 0; q1 < Q1D; ++q1)
484 for (
int q2 = 0; q2 < Q1D; q2++)
486 for (
int c = 0; c < VDIM; c++)
488 DBBu[q1][q2][c] = op(q1,q2,0,0,
f)*BBu0[q1][q2][c] +
489 op(q1,q2,1,0,
f)*BBu1[q1][q2][c];
493 double BDBBu[max_Q1D][max_D1D][VDIM];
494 for (
int q1 = 0; q1 < Q1D; ++q1)
496 for (
int d2 = 0; d2 < D1D; d2++)
498 for (
int c = 0; c < VDIM; c++)
500 BDBBu[q1][d2][c] = 0.0;
502 for (
int q2 = 0; q2 < Q1D; ++q2)
504 const double b = Bt(d2,q2);
505 for (
int c = 0; c < VDIM; c++)
507 BDBBu[q1][d2][c] += b*DBBu[q1][q2][c];
512 double BBDBBu[max_D1D][max_D1D][VDIM];
513 for (
int d1 = 0; d1 < D1D; ++d1)
515 for (
int d2 = 0; d2 < D1D; d2++)
517 for (
int c = 0; c < VDIM; c++)
519 BBDBBu[d1][d2][c] = 0.0;
521 for (
int q1 = 0; q1 < Q1D; ++q1)
523 const double b = Bt(d1,q1);
524 for (
int c = 0; c < VDIM; c++)
526 BBDBBu[d1][d2][c] += b*BDBBu[q1][d2][c];
529 for (
int c = 0; c < VDIM; c++)
531 y(d1,d2,c,0,
f) += BBDBBu[d1][d2][c];
532 y(d1,d2,c,1,
f) += -BBDBBu[d1][d2][c];
540 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
541 void SmemPADGTraceApply3D(
const int NF,
542 const Array<double> &b,
543 const Array<double> &bt,
550 const int D1D = T_D1D ? T_D1D : d1d;
551 const int Q1D = T_Q1D ? T_Q1D : q1d;
552 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
553 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
554 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
555 auto B =
Reshape(b.Read(), Q1D, D1D);
556 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
557 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
558 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
559 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
561 MFEM_FORALL_2D(
f, NF, Q1D, Q1D, NBZ,
563 const int tidz = MFEM_THREAD_ID(z);
564 const int D1D = T_D1D ? T_D1D : d1d;
565 const int Q1D = T_Q1D ? T_Q1D : q1d;
567 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
568 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
569 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
570 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
571 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
572 MFEM_FOREACH_THREAD(d1,x,D1D)
574 MFEM_FOREACH_THREAD(d2,y,D1D)
576 u0[tidz][d1][d2] = x(d1,d2,0,
f+tidz);
577 u1[tidz][d1][d2] = x(d1,d2,1,
f+tidz);
581 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
582 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
583 MFEM_FOREACH_THREAD(q1,x,Q1D)
585 MFEM_FOREACH_THREAD(d2,y,D1D)
589 for (
int d1 = 0; d1 < D1D; ++d1)
591 const double b = B(q1,d1);
592 Bu0_ += b*u0[tidz][d1][d2];
593 Bu1_ += b*u1[tidz][d1][d2];
595 Bu0[tidz][q1][d2] = Bu0_;
596 Bu1[tidz][q1][d2] = Bu1_;
600 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
601 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
602 MFEM_FOREACH_THREAD(q1,x,Q1D)
604 MFEM_FOREACH_THREAD(q2,y,Q1D)
608 for (
int d2 = 0; d2 < D1D; ++d2)
610 const double b = B(q2,d2);
611 BBu0_ += b*Bu0[tidz][q1][d2];
612 BBu1_ += b*Bu1[tidz][q1][d2];
614 BBu0[tidz][q1][q2] = BBu0_;
615 BBu1[tidz][q1][q2] = BBu1_;
619 MFEM_SHARED
double DBBu[NBZ][max_Q1D][max_Q1D];
620 MFEM_FOREACH_THREAD(q1,x,Q1D)
622 MFEM_FOREACH_THREAD(q2,y,Q1D)
624 DBBu[tidz][q1][q2] = op(q1,q2,0,0,
f+tidz)*BBu0[tidz][q1][q2] +
625 op(q1,q2,1,0,
f+tidz)*BBu1[tidz][q1][q2];
629 MFEM_SHARED
double BDBBu[NBZ][max_Q1D][max_D1D];
630 MFEM_FOREACH_THREAD(q1,x,Q1D)
632 MFEM_FOREACH_THREAD(d2,y,D1D)
635 for (
int q2 = 0; q2 < Q1D; ++q2)
637 const double b = Bt(d2,q2);
638 BDBBu_ += b*DBBu[tidz][q1][q2];
640 BDBBu[tidz][q1][d2] = BDBBu_;
644 MFEM_FOREACH_THREAD(d1,x,D1D)
646 MFEM_FOREACH_THREAD(d2,y,D1D)
648 double BBDBBu_ = 0.0;
649 for (
int q1 = 0; q1 < Q1D; ++q1)
651 const double b = Bt(d1,q1);
652 BBDBBu_ += b*BDBBu[tidz][q1][d2];
654 y(d1,d2,0,
f+tidz) += BBDBBu_;
655 y(d1,d2,1,
f+tidz) += -BBDBBu_;
661 static void PADGTraceApply(
const int dim,
665 const Array<double> &B,
666 const Array<double> &Bt,
673 switch ((D1D << 4 ) | Q1D)
675 case 0x22:
return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
676 case 0x33:
return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
677 case 0x44:
return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
678 case 0x55:
return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
679 case 0x66:
return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
680 case 0x77:
return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
681 case 0x88:
return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
682 case 0x99:
return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
683 default:
return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
688 switch ((D1D << 4 ) | Q1D)
690 case 0x23:
return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
691 case 0x34:
return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
692 case 0x45:
return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
693 case 0x56:
return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
694 case 0x67:
return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
695 case 0x78:
return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
696 case 0x89:
return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
697 default:
return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
700 MFEM_ABORT(
"Unknown kernel.");
704 template<
int T_D1D = 0,
int T_Q1D = 0>
static
705 void PADGTraceApplyTranspose2D(
const int NF,
706 const Array<double> &b,
707 const Array<double> &bt,
715 const int D1D = T_D1D ? T_D1D : d1d;
716 const int Q1D = T_Q1D ? T_Q1D : q1d;
717 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
718 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
719 auto B =
Reshape(b.Read(), Q1D, D1D);
720 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
721 auto op =
Reshape(op_.Read(), Q1D, 2, 2, NF);
722 auto x =
Reshape(x_.Read(), D1D, VDIM, 2, NF);
723 auto y =
Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
728 const int D1D = T_D1D ? T_D1D : d1d;
729 const int Q1D = T_Q1D ? T_Q1D : q1d;
731 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
732 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
733 double u0[max_D1D][VDIM];
734 double u1[max_D1D][VDIM];
735 for (
int d = 0; d < D1D; d++)
737 for (
int c = 0; c < VDIM; c++)
739 u0[d][c] = x(d,c,0,
f);
740 u1[d][c] = x(d,c,1,
f);
743 double Bu0[max_Q1D][VDIM];
744 double Bu1[max_Q1D][VDIM];
745 for (
int q = 0; q < Q1D; ++q)
747 for (
int c = 0; c < VDIM; c++)
752 for (
int d = 0; d < D1D; ++d)
754 const double b = B(q,d);
755 for (
int c = 0; c < VDIM; c++)
757 Bu0[q][c] += b*u0[d][c];
758 Bu1[q][c] += b*u1[d][c];
762 double DBu0[max_Q1D][VDIM];
763 double DBu1[max_Q1D][VDIM];
764 for (
int q = 0; q < Q1D; ++q)
766 for (
int c = 0; c < VDIM; c++)
768 DBu0[q][c] = op(q,0,0,
f)*Bu0[q][c] + op(q,0,1,
f)*Bu1[q][c];
769 DBu1[q][c] = op(q,1,0,
f)*Bu0[q][c] + op(q,1,1,
f)*Bu1[q][c];
772 double BDBu0[max_D1D][VDIM];
773 double BDBu1[max_D1D][VDIM];
774 for (
int d = 0; d < D1D; ++d)
776 for (
int c = 0; c < VDIM; c++)
781 for (
int q = 0; q < Q1D; ++q)
783 const double b = Bt(d,q);
784 for (
int c = 0; c < VDIM; c++)
786 BDBu0[d][c] += b*DBu0[q][c];
787 BDBu1[d][c] += b*DBu1[q][c];
790 for (
int c = 0; c < VDIM; c++)
792 y(d,c,0,
f) += BDBu0[d][c];
793 y(d,c,1,
f) += BDBu1[d][c];
800 template<
int T_D1D = 0,
int T_Q1D = 0>
static
801 void PADGTraceApplyTranspose3D(
const int NF,
802 const Array<double> &b,
803 const Array<double> &bt,
811 const int D1D = T_D1D ? T_D1D : d1d;
812 const int Q1D = T_Q1D ? T_Q1D : q1d;
813 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
814 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
815 auto B =
Reshape(b.Read(), Q1D, D1D);
816 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
817 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
818 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
819 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
824 const int D1D = T_D1D ? T_D1D : d1d;
825 const int Q1D = T_Q1D ? T_Q1D : q1d;
827 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
828 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
829 double u0[max_D1D][max_D1D][VDIM];
830 double u1[max_D1D][max_D1D][VDIM];
831 for (
int d1 = 0; d1 < D1D; d1++)
833 for (
int d2 = 0; d2 < D1D; d2++)
835 for (
int c = 0; c < VDIM; c++)
837 u0[d1][d2][c] = x(d1,d2,c,0,
f);
838 u1[d1][d2][c] = x(d1,d2,c,1,
f);
842 double Bu0[max_Q1D][max_D1D][VDIM];
843 double Bu1[max_Q1D][max_D1D][VDIM];
844 for (
int q1 = 0; q1 < Q1D; ++q1)
846 for (
int d2 = 0; d2 < D1D; ++d2)
848 for (
int c = 0; c < VDIM; c++)
850 Bu0[q1][d2][c] = 0.0;
851 Bu1[q1][d2][c] = 0.0;
853 for (
int d1 = 0; d1 < D1D; ++d1)
855 const double b = B(q1,d1);
856 for (
int c = 0; c < VDIM; c++)
858 Bu0[q1][d2][c] += b*u0[d1][d2][c];
859 Bu1[q1][d2][c] += b*u1[d1][d2][c];
864 double BBu0[max_Q1D][max_Q1D][VDIM];
865 double BBu1[max_Q1D][max_Q1D][VDIM];
866 for (
int q1 = 0; q1 < Q1D; ++q1)
868 for (
int q2 = 0; q2 < Q1D; ++q2)
870 for (
int c = 0; c < VDIM; c++)
872 BBu0[q1][q2][c] = 0.0;
873 BBu1[q1][q2][c] = 0.0;
875 for (
int d2 = 0; d2 < D1D; ++d2)
877 const double b = B(q2,d2);
878 for (
int c = 0; c < VDIM; c++)
880 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
881 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
886 double DBu0[max_Q1D][max_Q1D][VDIM];
887 double DBu1[max_Q1D][max_Q1D][VDIM];
888 for (
int q1 = 0; q1 < Q1D; ++q1)
890 for (
int q2 = 0; q2 < Q1D; ++q2)
892 const double D00 = op(q1,q2,0,0,
f);
893 const double D01 = op(q1,q2,0,1,
f);
894 const double D10 = op(q1,q2,1,0,
f);
895 const double D11 = op(q1,q2,1,1,
f);
896 for (
int c = 0; c < VDIM; c++)
898 DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
899 DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
903 double BDBu0[max_D1D][max_Q1D][VDIM];
904 double BDBu1[max_D1D][max_Q1D][VDIM];
905 for (
int d1 = 0; d1 < D1D; ++d1)
907 for (
int q2 = 0; q2 < Q1D; ++q2)
909 for (
int c = 0; c < VDIM; c++)
911 BDBu0[d1][q2][c] = 0.0;
912 BDBu1[d1][q2][c] = 0.0;
914 for (
int q1 = 0; q1 < Q1D; ++q1)
916 const double b = Bt(d1,q1);
917 for (
int c = 0; c < VDIM; c++)
919 BDBu0[d1][q2][c] += b*DBu0[q1][q2][c];
920 BDBu1[d1][q2][c] += b*DBu1[q1][q2][c];
925 double BBDBu0[max_D1D][max_D1D][VDIM];
926 double BBDBu1[max_D1D][max_D1D][VDIM];
927 for (
int d1 = 0; d1 < D1D; ++d1)
929 for (
int d2 = 0; d2 < D1D; ++d2)
931 for (
int c = 0; c < VDIM; c++)
933 BBDBu0[d1][d2][c] = 0.0;
934 BBDBu1[d1][d2][c] = 0.0;
936 for (
int q2 = 0; q2 < Q1D; ++q2)
938 const double b = Bt(d2,q2);
939 for (
int c = 0; c < VDIM; c++)
941 BBDBu0[d1][d2][c] += b*BDBu0[d1][q2][c];
942 BBDBu1[d1][d2][c] += b*BDBu1[d1][q2][c];
945 for (
int c = 0; c < VDIM; c++)
947 y(d1,d2,c,0,
f) += BBDBu0[d1][d2][c];
948 y(d1,d2,c,1,
f) += BBDBu1[d1][d2][c];
956 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
957 void SmemPADGTraceApplyTranspose3D(
const int NF,
958 const Array<double> &b,
959 const Array<double> &bt,
966 const int D1D = T_D1D ? T_D1D : d1d;
967 const int Q1D = T_Q1D ? T_Q1D : q1d;
968 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
969 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
970 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
971 auto B =
Reshape(b.Read(), Q1D, D1D);
972 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
973 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
974 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
975 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
977 MFEM_FORALL_2D(
f, NF, Q1D, Q1D, NBZ,
979 const int tidz = MFEM_THREAD_ID(z);
980 const int D1D = T_D1D ? T_D1D : d1d;
981 const int Q1D = T_Q1D ? T_Q1D : q1d;
983 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
984 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
985 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
986 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
987 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
988 MFEM_FOREACH_THREAD(d1,x,D1D)
990 MFEM_FOREACH_THREAD(d2,y,D1D)
992 u0[tidz][d1][d2] = x(d1,d2,0,
f+tidz);
993 u1[tidz][d1][d2] = x(d1,d2,1,
f+tidz);
997 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
998 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
999 MFEM_FOREACH_THREAD(q1,x,Q1D)
1001 MFEM_FOREACH_THREAD(d2,y,D1D)
1005 for (
int d1 = 0; d1 < D1D; ++d1)
1007 const double b = B(q1,d1);
1008 Bu0_ += b*u0[tidz][d1][d2];
1009 Bu1_ += b*u1[tidz][d1][d2];
1011 Bu0[tidz][q1][d2] = Bu0_;
1012 Bu1[tidz][q1][d2] = Bu1_;
1016 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
1017 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
1018 MFEM_FOREACH_THREAD(q1,x,Q1D)
1020 MFEM_FOREACH_THREAD(q2,y,Q1D)
1024 for (
int d2 = 0; d2 < D1D; ++d2)
1026 const double b = B(q2,d2);
1027 BBu0_ += b*Bu0[tidz][q1][d2];
1028 BBu1_ += b*Bu1[tidz][q1][d2];
1030 BBu0[tidz][q1][q2] = BBu0_;
1031 BBu1[tidz][q1][q2] = BBu1_;
1035 MFEM_SHARED
double DBBu0[NBZ][max_Q1D][max_Q1D];
1036 MFEM_SHARED
double DBBu1[NBZ][max_Q1D][max_Q1D];
1037 MFEM_FOREACH_THREAD(q1,x,Q1D)
1039 MFEM_FOREACH_THREAD(q2,y,Q1D)
1041 const double D00 = op(q1,q2,0,0,
f+tidz);
1042 const double D01 = op(q1,q2,0,1,
f+tidz);
1043 const double D10 = op(q1,q2,1,0,
f+tidz);
1044 const double D11 = op(q1,q2,1,1,
f+tidz);
1045 const double u0 = BBu0[tidz][q1][q2];
1046 const double u1 = BBu1[tidz][q1][q2];
1047 DBBu0[tidz][q1][q2] = D00*u0 + D01*u1;
1048 DBBu1[tidz][q1][q2] = D10*u0 + D11*u1;
1052 MFEM_SHARED
double BDBBu0[NBZ][max_Q1D][max_D1D];
1053 MFEM_SHARED
double BDBBu1[NBZ][max_Q1D][max_D1D];
1054 MFEM_FOREACH_THREAD(q1,x,Q1D)
1056 MFEM_FOREACH_THREAD(d2,y,D1D)
1058 double BDBBu0_ = 0.0;
1059 double BDBBu1_ = 0.0;
1060 for (
int q2 = 0; q2 < Q1D; ++q2)
1062 const double b = Bt(d2,q2);
1063 BDBBu0_ += b*DBBu0[tidz][q1][q2];
1064 BDBBu1_ += b*DBBu1[tidz][q1][q2];
1066 BDBBu0[tidz][q1][d2] = BDBBu0_;
1067 BDBBu1[tidz][q1][d2] = BDBBu1_;
1071 MFEM_FOREACH_THREAD(d1,x,D1D)
1073 MFEM_FOREACH_THREAD(d2,y,D1D)
1075 double BBDBBu0_ = 0.0;
1076 double BBDBBu1_ = 0.0;
1077 for (
int q1 = 0; q1 < Q1D; ++q1)
1079 const double b = Bt(d1,q1);
1080 BBDBBu0_ += b*BDBBu0[tidz][q1][d2];
1081 BBDBBu1_ += b*BDBBu1[tidz][q1][d2];
1083 y(d1,d2,0,
f+tidz) += BBDBBu0_;
1084 y(d1,d2,1,
f+tidz) += BBDBBu1_;
1090 static void PADGTraceApplyTranspose(
const int dim,
1094 const Array<double> &B,
1095 const Array<double> &Bt,
1102 switch ((D1D << 4 ) | Q1D)
1104 case 0x22:
return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1105 case 0x33:
return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1106 case 0x44:
return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1107 case 0x55:
return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1108 case 0x66:
return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1109 case 0x77:
return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1110 case 0x88:
return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1111 case 0x99:
return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1112 default:
return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1117 switch ((D1D << 4 ) | Q1D)
1119 case 0x23:
return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1120 case 0x34:
return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1121 case 0x45:
return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1122 case 0x56:
return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1123 case 0x67:
return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1124 case 0x78:
return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1125 case 0x89:
return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1126 default:
return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1129 MFEM_ABORT(
"Unknown kernel.");
1135 PADGTraceApply(dim, dofs1D, quad1D, nf,
1140 void DGTraceIntegrator::AddMultTransposePA(
const Vector &x,
Vector &y)
const
1142 PADGTraceApplyTranspose(dim, dofs1D, quad1D, nf,
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void vel(const Vector &x, double t, Vector &u)
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
double u(const Vector &xvec)
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
double f(const Vector &p)