12 #include "../general/forall.hpp"
16 #include "restriction.hpp"
23 static void PADGTraceSetup2D(
const int Q1D,
25 const Array<double> &w,
36 auto d =
Reshape(det.Read(), Q1D, NF);
37 auto n =
Reshape(nor.Read(), Q1D, VDIM, NF);
38 const bool const_r = rho.Size() == 1;
41 const bool const_v = vel.Size() == 2;
43 const_v ?
Reshape(vel.Read(), 2,1,1) :
Reshape(vel.Read(), 2,Q1D,NF);
45 auto qd =
Reshape(op.Write(), Q1D, 2, 2, NF);
47 MFEM_FORALL(tid, Q1D*NF,
49 const int f = tid / Q1D;
50 const int q = tid % Q1D;
52 const double r = const_r ? R(0,0) : R(q,f);
53 const double v0 = const_v ? V(0,0,0) : V(0,q,f);
54 const double v1 = const_v ? V(1,0,0) : V(1,q,f);
55 const double dot = n(q,0,f) * v0 + n(q,1,f) * v1;
56 const double abs = dot > 0.0 ? dot : -dot;
57 const double w = W[q]*r*d(q,f);
58 qd(q,0,0,f) = w*( alpha/2 * dot + beta * abs );
59 qd(q,1,0,f) = w*( alpha/2 * dot - beta * abs );
60 qd(q,0,1,f) = w*(-alpha/2 * dot - beta * abs );
61 qd(q,1,1,f) = w*(-alpha/2 * dot + beta * abs );
66 static void PADGTraceSetup3D(
const int Q1D,
68 const Array<double> &w,
79 auto d =
Reshape(det.Read(), Q1D, Q1D, NF);
80 auto n =
Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
81 const bool const_r = rho.Size() == 1;
83 const_r ?
Reshape(rho.Read(), 1,1,1) :
Reshape(rho.Read(), Q1D,Q1D,NF);
84 const bool const_v = vel.Size() == 3;
86 const_v ?
Reshape(vel.Read(), 3,1,1,1) :
Reshape(vel.Read(), 3,Q1D,Q1D,NF);
88 auto qd =
Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
90 MFEM_FORALL(tid, Q1D*Q1D*NF,
92 int f = tid / (Q1D * Q1D);
93 int q2 = (tid / Q1D) % Q1D;
97 const double r = const_r ? R(0,0,0) : R(q1,q2,f);
98 const double v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,f);
99 const double v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,f);
100 const double v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,f);
101 const double dot = n(q1,q2,0,f) * v0 + n(q1,q2,1,f) * v1 +
103 const double abs = dot > 0.0 ? dot : -dot;
104 const double w = W[q1+q2*Q1D]*r*d(q1,q2,f);
105 qd(q1,q2,0,0,f) = w*( alpha/2 * dot + beta * abs );
106 qd(q1,q2,1,0,f) = w*( alpha/2 * dot - beta * abs );
107 qd(q1,q2,0,1,f) = w*(-alpha/2 * dot - beta * abs );
108 qd(q1,q2,1,1,f) = w*(-alpha/2 * dot + beta * abs );
114 static void PADGTraceSetup(
const int dim,
118 const Array<double> &W,
127 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADGTraceSetup"); }
130 PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
134 PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
138 void DGTraceIntegrator::SetupPA(
const FiniteElementSpace &fes,
FaceType type)
140 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
141 Device::GetDeviceMemoryType() : pa_mt;
143 nf = fes.GetNFbyType(type);
144 if (nf==0) {
return; }
146 Mesh *mesh = fes.GetMesh();
147 const FiniteElement &el =
148 *fes.GetTraceElement(0, fes.GetMesh()->GetFaceBaseGeometry(0));
149 FaceElementTransformations &T0 =
150 *fes.GetMesh()->GetFaceElementTransformations(0);
151 const IntegrationRule *ir = IntRule?
153 &
GetRule(el.GetGeomType(), el.GetOrder(), T0);
154 const int symmDims = 4;
156 dim = mesh->Dimension();
157 geom = mesh->GetFaceGeometricFactors(
159 FaceGeometricFactors::DETERMINANTS |
160 FaceGeometricFactors::NORMALS, type, mt);
161 maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
166 FaceQuadratureSpace qs(*mesh, *ir, type);
167 CoefficientVector
vel(*u, qs, CoefficientStorage::COMPRESSED);
169 CoefficientVector r(qs, CoefficientStorage::COMPRESSED);
174 else if (ConstantCoefficient *const_rho = dynamic_cast<ConstantCoefficient*>
177 r.SetConstant(const_rho->constant);
179 else if (QuadratureFunctionCoefficient* qf_rho =
180 dynamic_cast<QuadratureFunctionCoefficient*>(rho))
182 r.MakeRef(qf_rho->GetQuadFunction());
187 auto C_vel =
Reshape(vel.HostRead(),
dim, nq, nf);
188 auto n =
Reshape(geom->normal.HostRead(), nq,
dim, nf);
189 auto C =
Reshape(r.HostWrite(), nq, nf);
191 for (
int f = 0; f < mesh->GetNumFacesWithGhost(); ++
f)
193 Mesh::FaceInformation face = mesh->GetFaceInformation(f);
194 if (face.IsNonconformingCoarse() || !face.IsOfFaceType(type))
200 FaceElementTransformations &T =
201 *fes.GetMesh()->GetFaceElementTransformations(f);
202 for (
int q = 0; q < nq; ++q)
208 T.SetAllIntPoints(&ir->IntPoint(q));
209 const IntegrationPoint &eip1 = T.GetElement1IntPoint();
210 const IntegrationPoint &eip2 = T.GetElement2IntPoint();
213 if (face.IsBoundary())
215 rq = rho->Eval(*T.Elem1, eip1);
220 for (
int d=0; d<
dim; ++d)
222 udotn += C_vel(d,iq,f_ind)*n(iq,d,f_ind);
224 if (udotn >= 0.0) { rq = rho->Eval(*T.Elem2, eip2); }
225 else { rq = rho->Eval(*T.Elem1, eip1); }
231 MFEM_VERIFY(f_ind==nf,
"Incorrect number of faces.");
233 PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(),
234 geom->detJ, geom->normal, r,
vel,
235 alpha, beta, pa_data);
240 SetupPA(fes, FaceType::Interior);
245 SetupPA(fes, FaceType::Boundary);
249 template<
int T_D1D = 0,
int T_Q1D = 0>
static
250 void PADGTraceApply2D(
const int NF,
260 const int D1D = T_D1D ? T_D1D : d1d;
261 const int Q1D = T_Q1D ? T_Q1D : q1d;
262 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
263 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
273 const int D1D = T_D1D ? T_D1D : d1d;
274 const int Q1D = T_Q1D ? T_Q1D : q1d;
276 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
277 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
278 double u0[max_D1D][VDIM];
279 double u1[max_D1D][VDIM];
280 for (
int d = 0; d < D1D; d++)
282 for (
int c = 0; c < VDIM; c++)
284 u0[d][c] = x(d,c,0,f);
285 u1[d][c] = x(d,c,1,f);
288 double Bu0[max_Q1D][VDIM];
289 double Bu1[max_Q1D][VDIM];
290 for (
int q = 0; q < Q1D; ++q)
292 for (
int c = 0; c < VDIM; c++)
297 for (
int d = 0; d < D1D; ++d)
299 const double b = B(q,d);
300 for (
int c = 0; c < VDIM; c++)
302 Bu0[q][c] += b*u0[d][c];
303 Bu1[q][c] += b*u1[d][c];
307 double DBu[max_Q1D][VDIM];
308 for (
int q = 0; q < Q1D; ++q)
310 for (
int c = 0; c < VDIM; c++)
312 DBu[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,1,0,f)*Bu1[q][c];
315 double BDBu[max_D1D][VDIM];
316 for (
int d = 0; d < D1D; ++d)
318 for (
int c = 0; c < VDIM; c++)
322 for (
int q = 0; q < Q1D; ++q)
324 const double b = Bt(d,q);
325 for (
int c = 0; c < VDIM; c++)
327 BDBu[d][c] += b*DBu[q][c];
330 for (
int c = 0; c < VDIM; c++)
332 y(d,c,0,f) += BDBu[d][c];
333 y(d,c,1,f) += -BDBu[d][c];
340 template<
int T_D1D = 0,
int T_Q1D = 0>
static
341 void PADGTraceApply3D(
const int NF,
342 const Array<double> &b,
343 const Array<double> &bt,
351 const int D1D = T_D1D ? T_D1D : d1d;
352 const int Q1D = T_Q1D ? T_Q1D : q1d;
353 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
354 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
355 auto B =
Reshape(b.Read(), Q1D, D1D);
356 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
357 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
358 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
359 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
364 const int D1D = T_D1D ? T_D1D : d1d;
365 const int Q1D = T_Q1D ? T_Q1D : q1d;
367 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
368 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
369 double u0[max_D1D][max_D1D][VDIM];
370 double u1[max_D1D][max_D1D][VDIM];
371 for (
int d1 = 0; d1 < D1D; d1++)
373 for (
int d2 = 0; d2 < D1D; d2++)
375 for (
int c = 0; c < VDIM; c++)
377 u0[d1][d2][c] = x(d1,d2,c,0,f);
378 u1[d1][d2][c] = x(d1,d2,c,1,f);
382 double Bu0[max_Q1D][max_D1D][VDIM];
383 double Bu1[max_Q1D][max_D1D][VDIM];
384 for (
int q = 0; q < Q1D; ++q)
386 for (
int d2 = 0; d2 < D1D; d2++)
388 for (
int c = 0; c < VDIM; c++)
393 for (
int d1 = 0; d1 < D1D; ++d1)
395 const double b = B(q,d1);
396 for (
int c = 0; c < VDIM; c++)
398 Bu0[q][d2][c] += b*u0[d1][d2][c];
399 Bu1[q][d2][c] += b*u1[d1][d2][c];
404 double BBu0[max_Q1D][max_Q1D][VDIM];
405 double BBu1[max_Q1D][max_Q1D][VDIM];
406 for (
int q1 = 0; q1 < Q1D; ++q1)
408 for (
int q2 = 0; q2 < Q1D; q2++)
410 for (
int c = 0; c < VDIM; c++)
412 BBu0[q1][q2][c] = 0.0;
413 BBu1[q1][q2][c] = 0.0;
415 for (
int d2 = 0; d2 < D1D; ++d2)
417 const double b = B(q2,d2);
418 for (
int c = 0; c < VDIM; c++)
420 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
421 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
426 double DBBu[max_Q1D][max_Q1D][VDIM];
427 for (
int q1 = 0; q1 < Q1D; ++q1)
429 for (
int q2 = 0; q2 < Q1D; q2++)
431 for (
int c = 0; c < VDIM; c++)
433 DBBu[q1][q2][c] = op(q1,q2,0,0,f)*BBu0[q1][q2][c] +
434 op(q1,q2,1,0,f)*BBu1[q1][q2][c];
438 double BDBBu[max_Q1D][max_D1D][VDIM];
439 for (
int q1 = 0; q1 < Q1D; ++q1)
441 for (
int d2 = 0; d2 < D1D; d2++)
443 for (
int c = 0; c < VDIM; c++)
445 BDBBu[q1][d2][c] = 0.0;
447 for (
int q2 = 0; q2 < Q1D; ++q2)
449 const double b = Bt(d2,q2);
450 for (
int c = 0; c < VDIM; c++)
452 BDBBu[q1][d2][c] += b*DBBu[q1][q2][c];
457 double BBDBBu[max_D1D][max_D1D][VDIM];
458 for (
int d1 = 0; d1 < D1D; ++d1)
460 for (
int d2 = 0; d2 < D1D; d2++)
462 for (
int c = 0; c < VDIM; c++)
464 BBDBBu[d1][d2][c] = 0.0;
466 for (
int q1 = 0; q1 < Q1D; ++q1)
468 const double b = Bt(d1,q1);
469 for (
int c = 0; c < VDIM; c++)
471 BBDBBu[d1][d2][c] += b*BDBBu[q1][d2][c];
474 for (
int c = 0; c < VDIM; c++)
476 y(d1,d2,c,0,f) += BBDBBu[d1][d2][c];
477 y(d1,d2,c,1,f) += -BBDBBu[d1][d2][c];
485 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
486 void SmemPADGTraceApply3D(
const int NF,
487 const Array<double> &b,
488 const Array<double> &bt,
495 const int D1D = T_D1D ? T_D1D : d1d;
496 const int Q1D = T_Q1D ? T_Q1D : q1d;
497 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
498 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
499 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
500 auto B =
Reshape(b.Read(), Q1D, D1D);
501 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
502 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
503 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
504 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
506 MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
508 const int tidz = MFEM_THREAD_ID(z);
509 const int D1D = T_D1D ? T_D1D : d1d;
510 const int Q1D = T_Q1D ? T_Q1D : q1d;
512 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
513 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
514 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
515 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
516 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
517 MFEM_FOREACH_THREAD(d1,x,D1D)
519 MFEM_FOREACH_THREAD(d2,y,D1D)
521 u0[tidz][d1][d2] = x(d1,d2,0,f);
522 u1[tidz][d1][d2] = x(d1,d2,1,f);
526 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
527 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
528 MFEM_FOREACH_THREAD(q1,x,Q1D)
530 MFEM_FOREACH_THREAD(d2,y,D1D)
534 for (
int d1 = 0; d1 < D1D; ++d1)
536 const double b = B(q1,d1);
537 Bu0_ += b*u0[tidz][d1][d2];
538 Bu1_ += b*u1[tidz][d1][d2];
540 Bu0[tidz][q1][d2] = Bu0_;
541 Bu1[tidz][q1][d2] = Bu1_;
545 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
546 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
547 MFEM_FOREACH_THREAD(q1,x,Q1D)
549 MFEM_FOREACH_THREAD(q2,y,Q1D)
553 for (
int d2 = 0; d2 < D1D; ++d2)
555 const double b = B(q2,d2);
556 BBu0_ += b*Bu0[tidz][q1][d2];
557 BBu1_ += b*Bu1[tidz][q1][d2];
559 BBu0[tidz][q1][q2] = BBu0_;
560 BBu1[tidz][q1][q2] = BBu1_;
564 MFEM_SHARED
double DBBu[NBZ][max_Q1D][max_Q1D];
565 MFEM_FOREACH_THREAD(q1,x,Q1D)
567 MFEM_FOREACH_THREAD(q2,y,Q1D)
569 DBBu[tidz][q1][q2] = op(q1,q2,0,0,f)*BBu0[tidz][q1][q2] +
570 op(q1,q2,1,0,f)*BBu1[tidz][q1][q2];
574 MFEM_SHARED
double BDBBu[NBZ][max_Q1D][max_D1D];
575 MFEM_FOREACH_THREAD(q1,x,Q1D)
577 MFEM_FOREACH_THREAD(d2,y,D1D)
580 for (
int q2 = 0; q2 < Q1D; ++q2)
582 const double b = Bt(d2,q2);
583 BDBBu_ += b*DBBu[tidz][q1][q2];
585 BDBBu[tidz][q1][d2] = BDBBu_;
589 MFEM_FOREACH_THREAD(d1,x,D1D)
591 MFEM_FOREACH_THREAD(d2,y,D1D)
593 double BBDBBu_ = 0.0;
594 for (
int q1 = 0; q1 < Q1D; ++q1)
596 const double b = Bt(d1,q1);
597 BBDBBu_ += b*BDBBu[tidz][q1][d2];
599 y(d1,d2,0,f) += BBDBBu_;
600 y(d1,d2,1,f) += -BBDBBu_;
606 static void PADGTraceApply(
const int dim,
610 const Array<double> &B,
611 const Array<double> &Bt,
618 switch ((D1D << 4 ) | Q1D)
620 case 0x22:
return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
621 case 0x33:
return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
622 case 0x44:
return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
623 case 0x55:
return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
624 case 0x66:
return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
625 case 0x77:
return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
626 case 0x88:
return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
627 case 0x99:
return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
628 default:
return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
633 switch ((D1D << 4 ) | Q1D)
635 case 0x22:
return SmemPADGTraceApply3D<2,2,1>(NF,B,Bt,op,x,y);
636 case 0x23:
return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
637 case 0x34:
return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
638 case 0x45:
return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
639 case 0x56:
return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
640 case 0x67:
return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
641 case 0x78:
return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
642 case 0x89:
return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
643 default:
return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
646 MFEM_ABORT(
"Unknown kernel.");
650 template<
int T_D1D = 0,
int T_Q1D = 0>
static
651 void PADGTraceApplyTranspose2D(
const int NF,
652 const Array<double> &b,
653 const Array<double> &bt,
661 const int D1D = T_D1D ? T_D1D : d1d;
662 const int Q1D = T_Q1D ? T_Q1D : q1d;
663 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
664 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
665 auto B =
Reshape(b.Read(), Q1D, D1D);
666 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
667 auto op =
Reshape(op_.Read(), Q1D, 2, 2, NF);
668 auto x =
Reshape(x_.Read(), D1D, VDIM, 2, NF);
669 auto y =
Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
674 const int D1D = T_D1D ? T_D1D : d1d;
675 const int Q1D = T_Q1D ? T_Q1D : q1d;
677 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
678 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
679 double u0[max_D1D][VDIM];
680 double u1[max_D1D][VDIM];
681 for (
int d = 0; d < D1D; d++)
683 for (
int c = 0; c < VDIM; c++)
685 u0[d][c] = x(d,c,0,f);
686 u1[d][c] = x(d,c,1,f);
689 double Bu0[max_Q1D][VDIM];
690 double Bu1[max_Q1D][VDIM];
691 for (
int q = 0; q < Q1D; ++q)
693 for (
int c = 0; c < VDIM; c++)
698 for (
int d = 0; d < D1D; ++d)
700 const double b = B(q,d);
701 for (
int c = 0; c < VDIM; c++)
703 Bu0[q][c] += b*u0[d][c];
704 Bu1[q][c] += b*u1[d][c];
708 double DBu0[max_Q1D][VDIM];
709 double DBu1[max_Q1D][VDIM];
710 for (
int q = 0; q < Q1D; ++q)
712 for (
int c = 0; c < VDIM; c++)
714 DBu0[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,0,1,f)*Bu1[q][c];
715 DBu1[q][c] = op(q,1,0,f)*Bu0[q][c] + op(q,1,1,f)*Bu1[q][c];
718 double BDBu0[max_D1D][VDIM];
719 double BDBu1[max_D1D][VDIM];
720 for (
int d = 0; d < D1D; ++d)
722 for (
int c = 0; c < VDIM; c++)
727 for (
int q = 0; q < Q1D; ++q)
729 const double b = Bt(d,q);
730 for (
int c = 0; c < VDIM; c++)
732 BDBu0[d][c] += b*DBu0[q][c];
733 BDBu1[d][c] += b*DBu1[q][c];
736 for (
int c = 0; c < VDIM; c++)
738 y(d,c,0,f) += BDBu0[d][c];
739 y(d,c,1,f) += BDBu1[d][c];
746 template<
int T_D1D = 0,
int T_Q1D = 0>
static
747 void PADGTraceApplyTranspose3D(
const int NF,
748 const Array<double> &b,
749 const Array<double> &bt,
757 const int D1D = T_D1D ? T_D1D : d1d;
758 const int Q1D = T_Q1D ? T_Q1D : q1d;
759 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
760 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
761 auto B =
Reshape(b.Read(), Q1D, D1D);
762 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
763 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
764 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
765 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
770 const int D1D = T_D1D ? T_D1D : d1d;
771 const int Q1D = T_Q1D ? T_Q1D : q1d;
773 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
774 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
775 double u0[max_D1D][max_D1D][VDIM];
776 double u1[max_D1D][max_D1D][VDIM];
777 for (
int d1 = 0; d1 < D1D; d1++)
779 for (
int d2 = 0; d2 < D1D; d2++)
781 for (
int c = 0; c < VDIM; c++)
783 u0[d1][d2][c] = x(d1,d2,c,0,f);
784 u1[d1][d2][c] = x(d1,d2,c,1,f);
788 double Bu0[max_Q1D][max_D1D][VDIM];
789 double Bu1[max_Q1D][max_D1D][VDIM];
790 for (
int q1 = 0; q1 < Q1D; ++q1)
792 for (
int d2 = 0; d2 < D1D; ++d2)
794 for (
int c = 0; c < VDIM; c++)
796 Bu0[q1][d2][c] = 0.0;
797 Bu1[q1][d2][c] = 0.0;
799 for (
int d1 = 0; d1 < D1D; ++d1)
801 const double b = B(q1,d1);
802 for (
int c = 0; c < VDIM; c++)
804 Bu0[q1][d2][c] += b*u0[d1][d2][c];
805 Bu1[q1][d2][c] += b*u1[d1][d2][c];
810 double BBu0[max_Q1D][max_Q1D][VDIM];
811 double BBu1[max_Q1D][max_Q1D][VDIM];
812 for (
int q1 = 0; q1 < Q1D; ++q1)
814 for (
int q2 = 0; q2 < Q1D; ++q2)
816 for (
int c = 0; c < VDIM; c++)
818 BBu0[q1][q2][c] = 0.0;
819 BBu1[q1][q2][c] = 0.0;
821 for (
int d2 = 0; d2 < D1D; ++d2)
823 const double b = B(q2,d2);
824 for (
int c = 0; c < VDIM; c++)
826 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
827 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
832 double DBu0[max_Q1D][max_Q1D][VDIM];
833 double DBu1[max_Q1D][max_Q1D][VDIM];
834 for (
int q1 = 0; q1 < Q1D; ++q1)
836 for (
int q2 = 0; q2 < Q1D; ++q2)
838 const double D00 = op(q1,q2,0,0,f);
839 const double D01 = op(q1,q2,0,1,f);
840 const double D10 = op(q1,q2,1,0,f);
841 const double D11 = op(q1,q2,1,1,f);
842 for (
int c = 0; c < VDIM; c++)
844 DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
845 DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
849 double BDBu0[max_D1D][max_Q1D][VDIM];
850 double BDBu1[max_D1D][max_Q1D][VDIM];
851 for (
int d1 = 0; d1 < D1D; ++d1)
853 for (
int q2 = 0; q2 < Q1D; ++q2)
855 for (
int c = 0; c < VDIM; c++)
857 BDBu0[d1][q2][c] = 0.0;
858 BDBu1[d1][q2][c] = 0.0;
860 for (
int q1 = 0; q1 < Q1D; ++q1)
862 const double b = Bt(d1,q1);
863 for (
int c = 0; c < VDIM; c++)
865 BDBu0[d1][q2][c] += b*DBu0[q1][q2][c];
866 BDBu1[d1][q2][c] += b*DBu1[q1][q2][c];
871 double BBDBu0[max_D1D][max_D1D][VDIM];
872 double BBDBu1[max_D1D][max_D1D][VDIM];
873 for (
int d1 = 0; d1 < D1D; ++d1)
875 for (
int d2 = 0; d2 < D1D; ++d2)
877 for (
int c = 0; c < VDIM; c++)
879 BBDBu0[d1][d2][c] = 0.0;
880 BBDBu1[d1][d2][c] = 0.0;
882 for (
int q2 = 0; q2 < Q1D; ++q2)
884 const double b = Bt(d2,q2);
885 for (
int c = 0; c < VDIM; c++)
887 BBDBu0[d1][d2][c] += b*BDBu0[d1][q2][c];
888 BBDBu1[d1][d2][c] += b*BDBu1[d1][q2][c];
891 for (
int c = 0; c < VDIM; c++)
893 y(d1,d2,c,0,f) += BBDBu0[d1][d2][c];
894 y(d1,d2,c,1,f) += BBDBu1[d1][d2][c];
902 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
903 void SmemPADGTraceApplyTranspose3D(
const int NF,
904 const Array<double> &b,
905 const Array<double> &bt,
912 const int D1D = T_D1D ? T_D1D : d1d;
913 const int Q1D = T_Q1D ? T_Q1D : q1d;
914 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
915 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
916 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
917 auto B =
Reshape(b.Read(), Q1D, D1D);
918 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
919 auto op =
Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
920 auto x =
Reshape(x_.Read(), D1D, D1D, 2, NF);
921 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
923 MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
925 const int tidz = MFEM_THREAD_ID(z);
926 const int D1D = T_D1D ? T_D1D : d1d;
927 const int Q1D = T_Q1D ? T_Q1D : q1d;
929 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
930 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
931 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
932 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
933 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
934 MFEM_FOREACH_THREAD(d1,x,D1D)
936 MFEM_FOREACH_THREAD(d2,y,D1D)
938 u0[tidz][d1][d2] = x(d1,d2,0,f);
939 u1[tidz][d1][d2] = x(d1,d2,1,f);
943 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
944 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
945 MFEM_FOREACH_THREAD(q1,x,Q1D)
947 MFEM_FOREACH_THREAD(d2,y,D1D)
951 for (
int d1 = 0; d1 < D1D; ++d1)
953 const double b = B(q1,d1);
954 Bu0_ += b*u0[tidz][d1][d2];
955 Bu1_ += b*u1[tidz][d1][d2];
957 Bu0[tidz][q1][d2] = Bu0_;
958 Bu1[tidz][q1][d2] = Bu1_;
962 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
963 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
964 MFEM_FOREACH_THREAD(q1,x,Q1D)
966 MFEM_FOREACH_THREAD(q2,y,Q1D)
970 for (
int d2 = 0; d2 < D1D; ++d2)
972 const double b = B(q2,d2);
973 BBu0_ += b*Bu0[tidz][q1][d2];
974 BBu1_ += b*Bu1[tidz][q1][d2];
976 BBu0[tidz][q1][q2] = BBu0_;
977 BBu1[tidz][q1][q2] = BBu1_;
981 MFEM_SHARED
double DBBu0[NBZ][max_Q1D][max_Q1D];
982 MFEM_SHARED
double DBBu1[NBZ][max_Q1D][max_Q1D];
983 MFEM_FOREACH_THREAD(q1,x,Q1D)
985 MFEM_FOREACH_THREAD(q2,y,Q1D)
987 const double D00 = op(q1,q2,0,0,f);
988 const double D01 = op(q1,q2,0,1,f);
989 const double D10 = op(q1,q2,1,0,f);
990 const double D11 = op(q1,q2,1,1,f);
991 const double u0q = BBu0[tidz][q1][q2];
992 const double u1q = BBu1[tidz][q1][q2];
993 DBBu0[tidz][q1][q2] = D00*u0q + D01*u1q;
994 DBBu1[tidz][q1][q2] = D10*u0q + D11*u1q;
998 MFEM_SHARED
double BDBBu0[NBZ][max_Q1D][max_D1D];
999 MFEM_SHARED
double BDBBu1[NBZ][max_Q1D][max_D1D];
1000 MFEM_FOREACH_THREAD(q1,x,Q1D)
1002 MFEM_FOREACH_THREAD(d2,y,D1D)
1004 double BDBBu0_ = 0.0;
1005 double BDBBu1_ = 0.0;
1006 for (
int q2 = 0; q2 < Q1D; ++q2)
1008 const double b = Bt(d2,q2);
1009 BDBBu0_ += b*DBBu0[tidz][q1][q2];
1010 BDBBu1_ += b*DBBu1[tidz][q1][q2];
1012 BDBBu0[tidz][q1][d2] = BDBBu0_;
1013 BDBBu1[tidz][q1][d2] = BDBBu1_;
1017 MFEM_FOREACH_THREAD(d1,x,D1D)
1019 MFEM_FOREACH_THREAD(d2,y,D1D)
1021 double BBDBBu0_ = 0.0;
1022 double BBDBBu1_ = 0.0;
1023 for (
int q1 = 0; q1 < Q1D; ++q1)
1025 const double b = Bt(d1,q1);
1026 BBDBBu0_ += b*BDBBu0[tidz][q1][d2];
1027 BBDBBu1_ += b*BDBBu1[tidz][q1][d2];
1029 y(d1,d2,0,f) += BBDBBu0_;
1030 y(d1,d2,1,f) += BBDBBu1_;
1036 static void PADGTraceApplyTranspose(
const int dim,
1040 const Array<double> &B,
1041 const Array<double> &Bt,
1048 switch ((D1D << 4 ) | Q1D)
1050 case 0x22:
return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1051 case 0x33:
return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1052 case 0x44:
return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1053 case 0x55:
return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1054 case 0x66:
return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1055 case 0x77:
return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1056 case 0x88:
return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1057 case 0x99:
return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1058 default:
return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1063 switch ((D1D << 4 ) | Q1D)
1065 case 0x22:
return SmemPADGTraceApplyTranspose3D<2,2>(NF,B,Bt,op,x,y);
1066 case 0x23:
return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1067 case 0x34:
return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1068 case 0x45:
return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1069 case 0x56:
return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1070 case 0x67:
return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1071 case 0x78:
return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1072 case 0x89:
return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1073 default:
return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1076 MFEM_ABORT(
"Unknown kernel.");
1082 PADGTraceApply(dim, dofs1D, quad1D, nf,
1087 void DGTraceIntegrator::AddMultTransposePA(
const Vector &x,
Vector &y)
const
1089 PADGTraceApplyTranspose(dim, dofs1D, quad1D, nf,
int GetNPoints() const
Returns the number of the points in the integration rule.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Convert a dof face index from Native ordering to lexicographic ordering for quads and hexes...
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
MemoryType
Memory types supported by MFEM.
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void vel(const Vector &x, double t, Vector &u)
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
double u(const Vector &xvec)
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
double f(const Vector &p)