12 #include "../general/forall.hpp"
22 static void PADGTraceSetup2D(
const int Q1D,
24 const Array<double> &w,
35 auto d =
Reshape(det.Read(), Q1D, NF);
36 auto n =
Reshape(nor.Read(), Q1D, VDIM, NF);
37 const bool const_r = rho.Size() == 1;
40 const bool const_v = vel.Size() == 2;
42 const_v ?
Reshape(vel.Read(), 2,1,1) :
Reshape(vel.Read(), 2,Q1D,NF);
44 auto qd =
Reshape(op.Write(), Q1D, 2, 2, NF);
48 for (
int q = 0; q < Q1D; ++q)
50 const double r = const_r ? R(0,0) : R(q,f);
51 const double v0 = const_v ? V(0,0,0) : V(0,q,f);
52 const double v1 = const_v ? V(1,0,0) : V(1,q,f);
53 const double dot = n(q,0,f) * v0 + n(q,1,f) * v1;
54 const double abs = dot > 0.0 ? dot : -dot;
55 const double w = W[q]*r*d(q,f);
56 qd(q,0,0,f) = w*( alpha/2 * dot + beta * abs );
57 qd(q,1,0,f) = w*( alpha/2 * dot - beta * abs );
58 qd(q,0,1,f) = w*(-alpha/2 * dot - beta * abs );
59 qd(q,1,1,f) = w*(-alpha/2 * dot + beta * abs );
64 static void PADGTraceSetup3D(
const int Q1D,
66 const Array<double> &w,
77 auto d =
Reshape(det.Read(), Q1D, Q1D, NF);
78 auto n =
Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
79 const bool const_r = rho.Size() == 1;
81 const_r ?
Reshape(rho.Read(), 1,1,1) :
Reshape(rho.Read(), Q1D,Q1D,NF);
82 const bool const_v = vel.Size() == 3;
84 const_v ?
Reshape(vel.Read(), 3,1,1,1) :
Reshape(vel.Read(), 3,Q1D,Q1D,NF);
86 auto qd =
Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
90 for (
int q1 = 0; q1 < Q1D; ++q1)
92 for (
int q2 = 0; q2 < Q1D; ++q2)
94 const double r = const_r ? R(0,0,0) : R(q1,q2,f);
95 const double v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,f);
96 const double v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,f);
97 const double v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,f);
98 const double dot = n(q1,q2,0,f) * v0 + n(q1,q2,1,f) * v1 +
100 const double abs = dot > 0.0 ? dot : -dot;
101 const double w = W[q1+q2*Q1D]*r*d(q1,q2,f);
102 qd(q1,q2,0,0,f) = w*( alpha/2 * dot + beta * abs );
103 qd(q1,q2,1,0,f) = w*( alpha/2 * dot - beta * abs );
104 qd(q1,q2,0,1,f) = w*(-alpha/2 * dot - beta * abs );
105 qd(q1,q2,1,1,f) = w*(-alpha/2 * dot + beta * abs );
111 static void PADGTraceSetup(
const int dim,
115 const Array<double> &W,
124 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADGTraceSetup"); }
127 PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
131 PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
135 void DGTraceIntegrator::SetupPA(
const FiniteElementSpace &fes,
FaceType type)
137 nf = fes.GetNFbyType(type);
138 if (nf==0) {
return; }
140 Mesh *mesh = fes.GetMesh();
141 const FiniteElement &el =
142 *fes.GetTraceElement(0, fes.GetMesh()->GetFaceBaseGeometry(0));
143 FaceElementTransformations &T =
144 *fes.GetMesh()->GetFaceElementTransformations(0);
145 const IntegrationRule *ir = IntRule?
147 &GetRule(el.GetGeomType(), el.GetOrder(), T);
148 const int symmDims = 4;
149 const int nq = ir->GetNPoints();
150 dim = mesh->Dimension();
151 geom = mesh->GetFaceGeometricFactors(
153 FaceGeometricFactors::DETERMINANTS |
154 FaceGeometricFactors::NORMALS, type);
155 maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
165 else if (ConstantCoefficient *c_rho = dynamic_cast<ConstantCoefficient*>(rho))
168 r(0) = c_rho->constant;
173 auto C =
Reshape(r.HostWrite(), nq, nf);
175 for (
int f = 0; f < fes.GetNF(); ++f)
179 fes.GetMesh()->GetFaceElements(f, &e1, &e2);
180 fes.GetMesh()->GetFaceInfos(f, &inf1, &inf2);
181 int face_id = inf1 / 64;
182 if ((type==FaceType::Interior && (e2>=0 || (e2<0 && inf2>=0))) ||
183 (type==FaceType::Boundary && e2<0 && inf2<0) )
185 ElementTransformation& T = *fes.GetMesh()->GetFaceTransformation(f);
186 for (
int q = 0; q < nq; ++q)
190 C(iq,f_ind) = rho->Eval(T, ir->IntPoint(q));
195 MFEM_VERIFY(f_ind==nf,
"Incorrect number of faces.");
198 if (VectorConstantCoefficient *c_u = dynamic_cast<VectorConstantCoefficient*>
205 vel.SetSize(dim * nq * nf);
206 auto C =
Reshape(vel.HostWrite(),
dim, nq, nf);
209 for (
int f = 0; f < fes.GetNF(); ++f)
213 fes.GetMesh()->GetFaceElements(f, &e1, &e2);
214 fes.GetMesh()->GetFaceInfos(f, &inf1, &inf2);
215 int face_id = inf1 / 64;
216 if ((type==FaceType::Interior && (e2>=0 || (e2<0 && inf2>=0))) ||
217 (type==FaceType::Boundary && e2<0 && inf2<0) )
219 ElementTransformation& T = *fes.GetMesh()->GetFaceTransformation(f);
220 for (
int q = 0; q < nq; ++q)
224 u->Eval(Vq, T, ir->IntPoint(q));
225 for (
int i = 0; i <
dim; ++i)
227 C(i,iq,f_ind) = Vq(i);
233 MFEM_VERIFY(f_ind==nf,
"Incorrect number of faces.");
235 PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(),
237 alpha, beta, pa_data);
242 SetupPA(fes, FaceType::Interior);
247 SetupPA(fes, FaceType::Boundary);
251 template<
int T_D1D = 0,
int T_Q1D = 0>
static
252 void PADGTraceApply2D(
const int NF,
262 const int D1D = T_D1D ? T_D1D : d1d;
263 const int Q1D = T_Q1D ? T_Q1D : q1d;
264 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
265 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
275 const int D1D = T_D1D ? T_D1D : d1d;
276 const int Q1D = T_Q1D ? T_Q1D : q1d;
278 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
279 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
280 double u0[max_D1D][VDIM];
281 double u1[max_D1D][VDIM];
282 for (
int d = 0; d < D1D; d++)
284 for (
int c = 0; c < VDIM; c++)
286 u0[d][c] = x(d,c,0,f);
287 u1[d][c] = x(d,c,1,f);
290 double Bu0[max_Q1D][VDIM];
291 double Bu1[max_Q1D][VDIM];
292 for (
int q = 0; q < Q1D; ++q)
294 for (
int c = 0; c < VDIM; c++)
299 for (
int d = 0; d < D1D; ++d)
301 const double b = B(q,d);
302 for (
int c = 0; c < VDIM; c++)
304 Bu0[q][c] += b*u0[d][c];
305 Bu1[q][c] += b*u1[d][c];
309 double DBu[max_Q1D][VDIM];
310 for (
int q = 0; q < Q1D; ++q)
312 for (
int c = 0; c < VDIM; c++)
314 DBu[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,1,0,f)*Bu1[q][c];
317 double BDBu[max_D1D][VDIM];
318 for (
int d = 0; d < D1D; ++d)
320 for (
int c = 0; c < VDIM; c++)
324 for (
int q = 0; q < Q1D; ++q)
326 const double b = Bt(d,q);
327 for (
int c = 0; c < VDIM; c++)
329 BDBu[d][c] += b*DBu[q][c];
332 for (
int c = 0; c < VDIM; c++)
334 y(d,c,0,f) += BDBu[d][c];
335 y(d,c,1,f) += -BDBu[d][c];
342 template<
int T_D1D = 0,
int T_Q1D = 0>
static
343 void PADGTraceApply3D(
const int NF,
344 const Array<double> &b,
345 const Array<double> &bt,
353 const int D1D = T_D1D ? T_D1D : d1d;
354 const int Q1D = T_Q1D ? T_Q1D : q1d;
355 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
356 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
357 auto B =
Reshape(b.Read(), Q1D, D1D);
358 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
359 auto op =
Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
360 auto x =
Reshape(_x.Read(), D1D, D1D, VDIM, 2, NF);
361 auto y =
Reshape(_y.ReadWrite(), D1D, D1D, VDIM, 2, NF);
366 const int D1D = T_D1D ? T_D1D : d1d;
367 const int Q1D = T_Q1D ? T_Q1D : q1d;
369 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
370 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
371 double u0[max_D1D][max_D1D][VDIM];
372 double u1[max_D1D][max_D1D][VDIM];
373 for (
int d1 = 0; d1 < D1D; d1++)
375 for (
int d2 = 0; d2 < D1D; d2++)
377 for (
int c = 0; c < VDIM; c++)
379 u0[d1][d2][c] = x(d1,d2,c,0,f);
380 u1[d1][d2][c] = x(d1,d2,c,1,f);
384 double Bu0[max_Q1D][max_D1D][VDIM];
385 double Bu1[max_Q1D][max_D1D][VDIM];
386 for (
int q = 0; q < Q1D; ++q)
388 for (
int d2 = 0; d2 < D1D; d2++)
390 for (
int c = 0; c < VDIM; c++)
395 for (
int d1 = 0; d1 < D1D; ++d1)
397 const double b = B(q,d1);
398 for (
int c = 0; c < VDIM; c++)
400 Bu0[q][d2][c] += b*u0[d1][d2][c];
401 Bu1[q][d2][c] += b*u1[d1][d2][c];
406 double BBu0[max_Q1D][max_Q1D][VDIM];
407 double BBu1[max_Q1D][max_Q1D][VDIM];
408 for (
int q1 = 0; q1 < Q1D; ++q1)
410 for (
int q2 = 0; q2 < Q1D; q2++)
412 for (
int c = 0; c < VDIM; c++)
414 BBu0[q1][q2][c] = 0.0;
415 BBu1[q1][q2][c] = 0.0;
417 for (
int d2 = 0; d2 < D1D; ++d2)
419 const double b = B(q2,d2);
420 for (
int c = 0; c < VDIM; c++)
422 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
423 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
428 double DBBu[max_Q1D][max_Q1D][VDIM];
429 for (
int q1 = 0; q1 < Q1D; ++q1)
431 for (
int q2 = 0; q2 < Q1D; q2++)
433 for (
int c = 0; c < VDIM; c++)
435 DBBu[q1][q2][c] = op(q1,q2,0,0,f)*BBu0[q1][q2][c] +
436 op(q1,q2,1,0,f)*BBu1[q1][q2][c];
440 double BDBBu[max_Q1D][max_D1D][VDIM];
441 for (
int q1 = 0; q1 < Q1D; ++q1)
443 for (
int d2 = 0; d2 < D1D; d2++)
445 for (
int c = 0; c < VDIM; c++)
447 BDBBu[q1][d2][c] = 0.0;
449 for (
int q2 = 0; q2 < Q1D; ++q2)
451 const double b = Bt(d2,q2);
452 for (
int c = 0; c < VDIM; c++)
454 BDBBu[q1][d2][c] += b*DBBu[q1][q2][c];
459 double BBDBBu[max_D1D][max_D1D][VDIM];
460 for (
int d1 = 0; d1 < D1D; ++d1)
462 for (
int d2 = 0; d2 < D1D; d2++)
464 for (
int c = 0; c < VDIM; c++)
466 BBDBBu[d1][d2][c] = 0.0;
468 for (
int q1 = 0; q1 < Q1D; ++q1)
470 const double b = Bt(d1,q1);
471 for (
int c = 0; c < VDIM; c++)
473 BBDBBu[d1][d2][c] += b*BDBBu[q1][d2][c];
476 for (
int c = 0; c < VDIM; c++)
478 y(d1,d2,c,0,f) += BBDBBu[d1][d2][c];
479 y(d1,d2,c,1,f) += -BBDBBu[d1][d2][c];
487 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
488 void SmemPADGTraceApply3D(
const int NF,
489 const Array<double> &b,
490 const Array<double> &bt,
497 const int D1D = T_D1D ? T_D1D : d1d;
498 const int Q1D = T_Q1D ? T_Q1D : q1d;
499 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
500 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
501 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
502 auto B =
Reshape(b.Read(), Q1D, D1D);
503 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
504 auto op =
Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
505 auto x =
Reshape(_x.Read(), D1D, D1D, 2, NF);
506 auto y =
Reshape(_y.ReadWrite(), D1D, D1D, 2, NF);
508 MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
510 const int tidz = MFEM_THREAD_ID(z);
511 const int D1D = T_D1D ? T_D1D : d1d;
512 const int Q1D = T_Q1D ? T_Q1D : q1d;
514 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
515 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
516 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
517 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
518 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
519 MFEM_FOREACH_THREAD(d1,x,D1D)
521 MFEM_FOREACH_THREAD(d2,y,D1D)
523 u0[tidz][d1][d2] = x(d1,d2,0,f+tidz);
524 u1[tidz][d1][d2] = x(d1,d2,1,f+tidz);
528 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
529 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
530 MFEM_FOREACH_THREAD(q1,x,Q1D)
532 MFEM_FOREACH_THREAD(d2,y,D1D)
536 for (
int d1 = 0; d1 < D1D; ++d1)
538 const double b = B(q1,d1);
539 Bu0_ += b*u0[tidz][d1][d2];
540 Bu1_ += b*u1[tidz][d1][d2];
542 Bu0[tidz][q1][d2] = Bu0_;
543 Bu1[tidz][q1][d2] = Bu1_;
547 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
548 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
549 MFEM_FOREACH_THREAD(q1,x,Q1D)
551 MFEM_FOREACH_THREAD(q2,y,Q1D)
555 for (
int d2 = 0; d2 < D1D; ++d2)
557 const double b = B(q2,d2);
558 BBu0_ += b*Bu0[tidz][q1][d2];
559 BBu1_ += b*Bu1[tidz][q1][d2];
561 BBu0[tidz][q1][q2] = BBu0_;
562 BBu1[tidz][q1][q2] = BBu1_;
566 MFEM_SHARED
double DBBu[NBZ][max_Q1D][max_Q1D];
567 MFEM_FOREACH_THREAD(q1,x,Q1D)
569 MFEM_FOREACH_THREAD(q2,y,Q1D)
571 DBBu[tidz][q1][q2] = op(q1,q2,0,0,f+tidz)*BBu0[tidz][q1][q2] +
572 op(q1,q2,1,0,f+tidz)*BBu1[tidz][q1][q2];
576 MFEM_SHARED
double BDBBu[NBZ][max_Q1D][max_D1D];
577 MFEM_FOREACH_THREAD(q1,x,Q1D)
579 MFEM_FOREACH_THREAD(d2,y,D1D)
582 for (
int q2 = 0; q2 < Q1D; ++q2)
584 const double b = Bt(d2,q2);
585 BDBBu_ += b*DBBu[tidz][q1][q2];
587 BDBBu[tidz][q1][d2] = BDBBu_;
591 MFEM_FOREACH_THREAD(d1,x,D1D)
593 MFEM_FOREACH_THREAD(d2,y,D1D)
595 double BBDBBu_ = 0.0;
596 for (
int q1 = 0; q1 < Q1D; ++q1)
598 const double b = Bt(d1,q1);
599 BBDBBu_ += b*BDBBu[tidz][q1][d2];
601 y(d1,d2,0,f+tidz) += BBDBBu_;
602 y(d1,d2,1,f+tidz) += -BBDBBu_;
608 static void PADGTraceApply(
const int dim,
612 const Array<double> &B,
613 const Array<double> &Bt,
620 switch ((D1D << 4 ) | Q1D)
622 case 0x22:
return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
623 case 0x33:
return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
624 case 0x44:
return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
625 case 0x55:
return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
626 case 0x66:
return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
627 case 0x77:
return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
628 case 0x88:
return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
629 case 0x99:
return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
630 default:
return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
635 switch ((D1D << 4 ) | Q1D)
637 case 0x23:
return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
638 case 0x34:
return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
639 case 0x45:
return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
640 case 0x56:
return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
641 case 0x67:
return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
642 case 0x78:
return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
643 case 0x89:
return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
644 default:
return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
647 MFEM_ABORT(
"Unknown kernel.");
651 template<
int T_D1D = 0,
int T_Q1D = 0>
static
652 void PADGTraceApplyTranspose2D(
const int NF,
653 const Array<double> &b,
654 const Array<double> &bt,
662 const int D1D = T_D1D ? T_D1D : d1d;
663 const int Q1D = T_Q1D ? T_Q1D : q1d;
664 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
665 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
666 auto B =
Reshape(b.Read(), Q1D, D1D);
667 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
668 auto op =
Reshape(_op.Read(), Q1D, 2, 2, NF);
669 auto x =
Reshape(_x.Read(), D1D, VDIM, 2, NF);
670 auto y =
Reshape(_y.ReadWrite(), D1D, VDIM, 2, NF);
675 const int D1D = T_D1D ? T_D1D : d1d;
676 const int Q1D = T_Q1D ? T_Q1D : q1d;
678 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
679 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
680 double u0[max_D1D][VDIM];
681 double u1[max_D1D][VDIM];
682 for (
int d = 0; d < D1D; d++)
684 for (
int c = 0; c < VDIM; c++)
686 u0[d][c] = x(d,c,0,f);
687 u1[d][c] = x(d,c,1,f);
690 double Bu0[max_Q1D][VDIM];
691 double Bu1[max_Q1D][VDIM];
692 for (
int q = 0; q < Q1D; ++q)
694 for (
int c = 0; c < VDIM; c++)
699 for (
int d = 0; d < D1D; ++d)
701 const double b = B(q,d);
702 for (
int c = 0; c < VDIM; c++)
704 Bu0[q][c] += b*u0[d][c];
705 Bu1[q][c] += b*u1[d][c];
709 double DBu0[max_Q1D][VDIM];
710 double DBu1[max_Q1D][VDIM];
711 for (
int q = 0; q < Q1D; ++q)
713 for (
int c = 0; c < VDIM; c++)
715 DBu0[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,0,1,f)*Bu1[q][c];
716 DBu1[q][c] = op(q,1,0,f)*Bu0[q][c] + op(q,1,1,f)*Bu1[q][c];
719 double BDBu0[max_D1D][VDIM];
720 double BDBu1[max_D1D][VDIM];
721 for (
int d = 0; d < D1D; ++d)
723 for (
int c = 0; c < VDIM; c++)
728 for (
int q = 0; q < Q1D; ++q)
730 const double b = Bt(d,q);
731 for (
int c = 0; c < VDIM; c++)
733 BDBu0[d][c] += b*DBu0[q][c];
734 BDBu1[d][c] += b*DBu1[q][c];
737 for (
int c = 0; c < VDIM; c++)
739 y(d,c,0,f) += BDBu0[d][c];
740 y(d,c,1,f) += BDBu1[d][c];
747 template<
int T_D1D = 0,
int T_Q1D = 0>
static
748 void PADGTraceApplyTranspose3D(
const int NF,
749 const Array<double> &b,
750 const Array<double> &bt,
758 const int D1D = T_D1D ? T_D1D : d1d;
759 const int Q1D = T_Q1D ? T_Q1D : q1d;
760 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
761 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
762 auto B =
Reshape(b.Read(), Q1D, D1D);
763 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
764 auto op =
Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
765 auto x =
Reshape(_x.Read(), D1D, D1D, VDIM, 2, NF);
766 auto y =
Reshape(_y.ReadWrite(), D1D, D1D, VDIM, 2, NF);
771 const int D1D = T_D1D ? T_D1D : d1d;
772 const int Q1D = T_Q1D ? T_Q1D : q1d;
774 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
775 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
776 double u0[max_D1D][max_D1D][VDIM];
777 double u1[max_D1D][max_D1D][VDIM];
778 for (
int d1 = 0; d1 < D1D; d1++)
780 for (
int d2 = 0; d2 < D1D; d2++)
782 for (
int c = 0; c < VDIM; c++)
784 u0[d1][d2][c] = x(d1,d2,c,0,f);
785 u1[d1][d2][c] = x(d1,d2,c,1,f);
789 double Bu0[max_Q1D][max_D1D][VDIM];
790 double Bu1[max_Q1D][max_D1D][VDIM];
791 for (
int q1 = 0; q1 < Q1D; ++q1)
793 for (
int d2 = 0; d2 < D1D; ++d2)
795 for (
int c = 0; c < VDIM; c++)
797 Bu0[q1][d2][c] = 0.0;
798 Bu1[q1][d2][c] = 0.0;
800 for (
int d1 = 0; d1 < D1D; ++d1)
802 const double b = B(q1,d1);
803 for (
int c = 0; c < VDIM; c++)
805 Bu0[q1][d2][c] += b*u0[d1][d2][c];
806 Bu1[q1][d2][c] += b*u1[d1][d2][c];
811 double BBu0[max_Q1D][max_Q1D][VDIM];
812 double BBu1[max_Q1D][max_Q1D][VDIM];
813 for (
int q1 = 0; q1 < Q1D; ++q1)
815 for (
int q2 = 0; q2 < Q1D; ++q2)
817 for (
int c = 0; c < VDIM; c++)
819 BBu0[q1][q2][c] = 0.0;
820 BBu1[q1][q2][c] = 0.0;
822 for (
int d2 = 0; d2 < D1D; ++d2)
824 const double b = B(q2,d2);
825 for (
int c = 0; c < VDIM; c++)
827 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
828 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
833 double DBu0[max_Q1D][max_Q1D][VDIM];
834 double DBu1[max_Q1D][max_Q1D][VDIM];
835 for (
int q1 = 0; q1 < Q1D; ++q1)
837 for (
int q2 = 0; q2 < Q1D; ++q2)
839 const double D00 = op(q1,q2,0,0,f);
840 const double D01 = op(q1,q2,0,1,f);
841 const double D10 = op(q1,q2,1,0,f);
842 const double D11 = op(q1,q2,1,1,f);
843 for (
int c = 0; c < VDIM; c++)
845 DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
846 DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
850 double BDBu0[max_D1D][max_Q1D][VDIM];
851 double BDBu1[max_D1D][max_Q1D][VDIM];
852 for (
int d1 = 0; d1 < D1D; ++d1)
854 for (
int q2 = 0; q2 < Q1D; ++q2)
856 for (
int c = 0; c < VDIM; c++)
858 BDBu0[d1][q2][c] = 0.0;
859 BDBu1[d1][q2][c] = 0.0;
861 for (
int q1 = 0; q1 < Q1D; ++q1)
863 const double b = Bt(d1,q1);
864 for (
int c = 0; c < VDIM; c++)
866 BDBu0[d1][q2][c] += b*DBu0[q1][q2][c];
867 BDBu1[d1][q2][c] += b*DBu1[q1][q2][c];
872 double BBDBu0[max_D1D][max_D1D][VDIM];
873 double BBDBu1[max_D1D][max_D1D][VDIM];
874 for (
int d1 = 0; d1 < D1D; ++d1)
876 for (
int d2 = 0; d2 < D1D; ++d2)
878 for (
int c = 0; c < VDIM; c++)
880 BBDBu0[d1][d2][c] = 0.0;
881 BBDBu1[d1][d2][c] = 0.0;
883 for (
int q2 = 0; q2 < Q1D; ++q2)
885 const double b = Bt(d2,q2);
886 for (
int c = 0; c < VDIM; c++)
888 BBDBu0[d1][d2][c] += b*BDBu0[d1][q2][c];
889 BBDBu1[d1][d2][c] += b*BDBu1[d1][q2][c];
892 for (
int c = 0; c < VDIM; c++)
894 y(d1,d2,c,0,f) += BBDBu0[d1][d2][c];
895 y(d1,d2,c,1,f) += BBDBu1[d1][d2][c];
903 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
static
904 void SmemPADGTraceApplyTranspose3D(
const int NF,
905 const Array<double> &b,
906 const Array<double> &bt,
913 const int D1D = T_D1D ? T_D1D : d1d;
914 const int Q1D = T_Q1D ? T_Q1D : q1d;
915 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
916 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
917 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
918 auto B =
Reshape(b.Read(), Q1D, D1D);
919 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
920 auto op =
Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
921 auto x =
Reshape(_x.Read(), D1D, D1D, 2, NF);
922 auto y =
Reshape(_y.ReadWrite(), D1D, D1D, 2, NF);
924 MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
926 const int tidz = MFEM_THREAD_ID(z);
927 const int D1D = T_D1D ? T_D1D : d1d;
928 const int Q1D = T_Q1D ? T_Q1D : q1d;
930 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
931 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
932 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
933 MFEM_SHARED
double u0[NBZ][max_D1D][max_D1D];
934 MFEM_SHARED
double u1[NBZ][max_D1D][max_D1D];
935 MFEM_FOREACH_THREAD(d1,x,D1D)
937 MFEM_FOREACH_THREAD(d2,y,D1D)
939 u0[tidz][d1][d2] = x(d1,d2,0,f+tidz);
940 u1[tidz][d1][d2] = x(d1,d2,1,f+tidz);
944 MFEM_SHARED
double Bu0[NBZ][max_Q1D][max_D1D];
945 MFEM_SHARED
double Bu1[NBZ][max_Q1D][max_D1D];
946 MFEM_FOREACH_THREAD(q1,x,Q1D)
948 MFEM_FOREACH_THREAD(d2,y,D1D)
952 for (
int d1 = 0; d1 < D1D; ++d1)
954 const double b = B(q1,d1);
955 Bu0_ += b*u0[tidz][d1][d2];
956 Bu1_ += b*u1[tidz][d1][d2];
958 Bu0[tidz][q1][d2] = Bu0_;
959 Bu1[tidz][q1][d2] = Bu1_;
963 MFEM_SHARED
double BBu0[NBZ][max_Q1D][max_Q1D];
964 MFEM_SHARED
double BBu1[NBZ][max_Q1D][max_Q1D];
965 MFEM_FOREACH_THREAD(q1,x,Q1D)
967 MFEM_FOREACH_THREAD(q2,y,Q1D)
971 for (
int d2 = 0; d2 < D1D; ++d2)
973 const double b = B(q2,d2);
974 BBu0_ += b*Bu0[tidz][q1][d2];
975 BBu1_ += b*Bu1[tidz][q1][d2];
977 BBu0[tidz][q1][q2] = BBu0_;
978 BBu1[tidz][q1][q2] = BBu1_;
982 MFEM_SHARED
double DBBu0[NBZ][max_Q1D][max_Q1D];
983 MFEM_SHARED
double DBBu1[NBZ][max_Q1D][max_Q1D];
984 MFEM_FOREACH_THREAD(q1,x,Q1D)
986 MFEM_FOREACH_THREAD(q2,y,Q1D)
988 const double D00 = op(q1,q2,0,0,f+tidz);
989 const double D01 = op(q1,q2,0,1,f+tidz);
990 const double D10 = op(q1,q2,1,0,f+tidz);
991 const double D11 = op(q1,q2,1,1,f+tidz);
992 const double u0 = BBu0[tidz][q1][q2];
993 const double u1 = BBu1[tidz][q1][q2];
994 DBBu0[tidz][q1][q2] = D00*u0 + D01*u1;
995 DBBu1[tidz][q1][q2] = D10*u0 + D11*u1;
999 MFEM_SHARED
double BDBBu0[NBZ][max_Q1D][max_D1D];
1000 MFEM_SHARED
double BDBBu1[NBZ][max_Q1D][max_D1D];
1001 MFEM_FOREACH_THREAD(q1,x,Q1D)
1003 MFEM_FOREACH_THREAD(d2,y,D1D)
1005 double BDBBu0_ = 0.0;
1006 double BDBBu1_ = 0.0;
1007 for (
int q2 = 0; q2 < Q1D; ++q2)
1009 const double b = Bt(d2,q2);
1010 BDBBu0_ += b*DBBu0[tidz][q1][q2];
1011 BDBBu1_ += b*DBBu1[tidz][q1][q2];
1013 BDBBu0[tidz][q1][d2] = BDBBu0_;
1014 BDBBu1[tidz][q1][d2] = BDBBu1_;
1018 MFEM_FOREACH_THREAD(d1,x,D1D)
1020 MFEM_FOREACH_THREAD(d2,y,D1D)
1022 double BBDBBu0_ = 0.0;
1023 double BBDBBu1_ = 0.0;
1024 for (
int q1 = 0; q1 < Q1D; ++q1)
1026 const double b = Bt(d1,q1);
1027 BBDBBu0_ += b*BDBBu0[tidz][q1][d2];
1028 BBDBBu1_ += b*BDBBu1[tidz][q1][d2];
1030 y(d1,d2,0,f+tidz) += BBDBBu0_;
1031 y(d1,d2,1,f+tidz) += BBDBBu1_;
1037 static void PADGTraceApplyTranspose(
const int dim,
1041 const Array<double> &B,
1042 const Array<double> &Bt,
1049 switch ((D1D << 4 ) | Q1D)
1051 case 0x22:
return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1052 case 0x33:
return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1053 case 0x44:
return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1054 case 0x55:
return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1055 case 0x66:
return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1056 case 0x77:
return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1057 case 0x88:
return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1058 case 0x99:
return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1059 default:
return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1064 switch ((D1D << 4 ) | Q1D)
1066 case 0x23:
return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1067 case 0x34:
return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1068 case 0x45:
return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1069 case 0x56:
return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1070 case 0x67:
return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1071 case 0x78:
return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1072 case 0x89:
return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1073 default:
return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1076 MFEM_ABORT(
"Unknown kernel.");
1082 PADGTraceApply(dim, dofs1D, quad1D, nf,
1087 void DGTraceIntegrator::AddMultTransposePA(
const Vector &x,
Vector &y)
const
1089 PADGTraceApplyTranspose(dim, dofs1D, quad1D, nf,
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.