MFEM  v4.4.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_dgtrace_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "restriction.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 // PA DG Trace Integrator
22 static void PADGTraceSetup2D(const int Q1D,
23  const int NF,
24  const Array<double> &w,
25  const Vector &det,
26  const Vector &nor,
27  const Vector &rho,
28  const Vector &vel,
29  const double alpha,
30  const double beta,
31  Vector &op)
32 {
33  const int VDIM = 2;
34 
35  auto d = Reshape(det.Read(), Q1D, NF);
36  auto n = Reshape(nor.Read(), Q1D, VDIM, NF);
37  const bool const_r = rho.Size() == 1;
38  auto R =
39  const_r ? Reshape(rho.Read(), 1,1) : Reshape(rho.Read(), Q1D,NF);
40  const bool const_v = vel.Size() == 2;
41  auto V =
42  const_v ? Reshape(vel.Read(), 2,1,1) : Reshape(vel.Read(), 2,Q1D,NF);
43  auto W = w.Read();
44  auto qd = Reshape(op.Write(), Q1D, 2, 2, NF);
45 
46  MFEM_FORALL(f, NF, // can be optimized with Q1D thread for NF blocks
47  {
48  for (int q = 0; q < Q1D; ++q)
49  {
50  const double r = const_r ? R(0,0) : R(q,f);
51  const double v0 = const_v ? V(0,0,0) : V(0,q,f);
52  const double v1 = const_v ? V(1,0,0) : V(1,q,f);
53  const double dot = n(q,0,f) * v0 + n(q,1,f) * v1;
54  const double abs = dot > 0.0 ? dot : -dot;
55  const double w = W[q]*r*d(q,f);
56  qd(q,0,0,f) = w*( alpha/2 * dot + beta * abs );
57  qd(q,1,0,f) = w*( alpha/2 * dot - beta * abs );
58  qd(q,0,1,f) = w*(-alpha/2 * dot - beta * abs );
59  qd(q,1,1,f) = w*(-alpha/2 * dot + beta * abs );
60  }
61  });
62 }
63 
64 static void PADGTraceSetup3D(const int Q1D,
65  const int NF,
66  const Array<double> &w,
67  const Vector &det,
68  const Vector &nor,
69  const Vector &rho,
70  const Vector &vel,
71  const double alpha,
72  const double beta,
73  Vector &op)
74 {
75  const int VDIM = 3;
76 
77  auto d = Reshape(det.Read(), Q1D, Q1D, NF);
78  auto n = Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
79  const bool const_r = rho.Size() == 1;
80  auto R =
81  const_r ? Reshape(rho.Read(), 1,1,1) : Reshape(rho.Read(), Q1D,Q1D,NF);
82  const bool const_v = vel.Size() == 3;
83  auto V =
84  const_v ? Reshape(vel.Read(), 3,1,1,1) : Reshape(vel.Read(), 3,Q1D,Q1D,NF);
85  auto W = w.Read();
86  auto qd = Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
87 
88  MFEM_FORALL(f, NF, // can be optimized with Q1D*Q1D threads for NF blocks
89  {
90  for (int q1 = 0; q1 < Q1D; ++q1)
91  {
92  for (int q2 = 0; q2 < Q1D; ++q2)
93  {
94  const double r = const_r ? R(0,0,0) : R(q1,q2,f);
95  const double v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,f);
96  const double v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,f);
97  const double v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,f);
98  const double dot = n(q1,q2,0,f) * v0 + n(q1,q2,1,f) * v1 +
99  /* */ n(q1,q2,2,f) * v2;
100  const double abs = dot > 0.0 ? dot : -dot;
101  const double w = W[q1+q2*Q1D]*r*d(q1,q2,f);
102  qd(q1,q2,0,0,f) = w*( alpha/2 * dot + beta * abs );
103  qd(q1,q2,1,0,f) = w*( alpha/2 * dot - beta * abs );
104  qd(q1,q2,0,1,f) = w*(-alpha/2 * dot - beta * abs );
105  qd(q1,q2,1,1,f) = w*(-alpha/2 * dot + beta * abs );
106  }
107  }
108  });
109 }
110 
111 static void PADGTraceSetup(const int dim,
112  const int D1D,
113  const int Q1D,
114  const int NF,
115  const Array<double> &W,
116  const Vector &det,
117  const Vector &nor,
118  const Vector &rho,
119  const Vector &u,
120  const double alpha,
121  const double beta,
122  Vector &op)
123 {
124  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADGTraceSetup"); }
125  if (dim == 2)
126  {
127  PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
128  }
129  if (dim == 3)
130  {
131  PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
132  }
133 }
134 
135 void DGTraceIntegrator::SetupPA(const FiniteElementSpace &fes, FaceType type)
136 {
137  nf = fes.GetNFbyType(type);
138  if (nf==0) { return; }
139  // Assumes tensor-product elements
140  Mesh *mesh = fes.GetMesh();
141  const FiniteElement &el =
142  *fes.GetTraceElement(0, fes.GetMesh()->GetFaceBaseGeometry(0));
143  FaceElementTransformations &T0 =
144  *fes.GetMesh()->GetFaceElementTransformations(0);
145  const IntegrationRule *ir = IntRule?
146  IntRule:
147  &GetRule(el.GetGeomType(), el.GetOrder(), T0);
148  const int symmDims = 4;
149  nq = ir->GetNPoints();
150  dim = mesh->Dimension();
151  geom = mesh->GetFaceGeometricFactors(
152  *ir,
153  FaceGeometricFactors::DETERMINANTS |
154  FaceGeometricFactors::NORMALS, type);
155  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
156  dofs1D = maps->ndof;
157  quad1D = maps->nqpt;
158  pa_data.SetSize(symmDims * nq * nf, Device::GetMemoryType());
159  Vector vel;
160  if (VectorConstantCoefficient *c_u = dynamic_cast<VectorConstantCoefficient*>
161  (u))
162  {
163  vel = c_u->GetVec();
164  }
165  else if (VectorQuadratureFunctionCoefficient* qf_u =
166  dynamic_cast<VectorQuadratureFunctionCoefficient*>(u))
167  {
168  // Assumed to be in lexicographical ordering
169  const QuadratureFunction &qFun = qf_u->GetQuadFunction();
170  MFEM_VERIFY(qFun.Size() == dim * nq * nf,
171  "Incompatible QuadratureFunction dimension \n");
172 
173  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
174  "IntegrationRule used within integrator and in"
175  " QuadratureFunction appear to be different");
176  qFun.Read();
177  vel.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
178  }
179  else
180  {
181  vel.SetSize(dim * nq * nf);
182  auto C = Reshape(vel.HostWrite(), dim, nq, nf);
183  Vector Vq(dim);
184  int f_ind = 0;
185  for (int f = 0; f < mesh->GetNumFacesWithGhost(); ++f)
186  {
187  Mesh::FaceInformation face = mesh->GetFaceInformation(f);
188  if (face.IsNonconformingCoarse())
189  {
190  // We skip nonconforming coarse faces as they are treated
191  // by the corresponding nonconforming fine faces.
192  continue;
193  }
194  else if ( face.IsOfFaceType(type) )
195  {
196  const int mask = FaceElementTransformations::HAVE_ELEM1 |
197  FaceElementTransformations::HAVE_LOC1;
198  FaceElementTransformations &T =
199  *fes.GetMesh()->GetFaceElementTransformations(f, mask);
200  for (int q = 0; q < nq; ++q)
201  {
202  // Convert to lexicographic ordering
203  int iq = ToLexOrdering(dim, face.element[0].local_face_id,
204  quad1D, q);
205  T.SetAllIntPoints(&ir->IntPoint(q));
206  const IntegrationPoint &eip1 = T.GetElement1IntPoint();
207  u->Eval(Vq, *T.Elem1, eip1);
208  for (int i = 0; i < dim; ++i)
209  {
210  C(i,iq,f_ind) = Vq(i);
211  }
212  }
213  f_ind++;
214  }
215  }
216  MFEM_VERIFY(f_ind==nf, "Incorrect number of faces.");
217  }
218  Vector r;
219  if (rho==nullptr)
220  {
221  r.SetSize(1);
222  r(0) = 1.0;
223  }
224  else if (ConstantCoefficient *c_rho = dynamic_cast<ConstantCoefficient*>(rho))
225  {
226  r.SetSize(1);
227  r(0) = c_rho->constant;
228  }
229  else if (QuadratureFunctionCoefficient* qf_rho =
230  dynamic_cast<QuadratureFunctionCoefficient*>(rho))
231  {
232  const QuadratureFunction &qFun = qf_rho->GetQuadFunction();
233  MFEM_VERIFY(qFun.Size() == nq * nf,
234  "Incompatible QuadratureFunction dimension \n");
235 
236  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
237  "IntegrationRule used within integrator and in"
238  " QuadratureFunction appear to be different");
239  qFun.Read();
240  r.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
241  }
242  else
243  {
244  r.SetSize(nq * nf);
245  auto C_vel = Reshape(vel.HostRead(), dim, nq, nf);
246  auto n = Reshape(geom->normal.HostRead(), nq, dim, nf);
247  auto C = Reshape(r.HostWrite(), nq, nf);
248  int f_ind = 0;
249  for (int f = 0; f < mesh->GetNumFacesWithGhost(); ++f)
250  {
251  Mesh::FaceInformation face = mesh->GetFaceInformation(f);
252  if (face.IsNonconformingCoarse())
253  {
254  // We skip nonconforming coarse faces as they are treated
255  // by the corresponding nonconforming fine faces.
256  continue;
257  }
258  else if ( face.IsOfFaceType(type) )
259  {
260  FaceElementTransformations &T =
261  *fes.GetMesh()->GetFaceElementTransformations(f);
262  for (int q = 0; q < nq; ++q)
263  {
264  // Convert to lexicographic ordering
265  int iq = ToLexOrdering(dim, face.element[0].local_face_id,
266  quad1D, q);
267 
268  T.SetAllIntPoints(&ir->IntPoint(q));
269  const IntegrationPoint &eip1 = T.GetElement1IntPoint();
270  const IntegrationPoint &eip2 = T.GetElement2IntPoint();
271  double rq;
272 
273  if ( face.IsBoundary() )
274  {
275  rq = rho->Eval(*T.Elem1, eip1);
276  }
277  else
278  {
279  double udotn = 0.0;
280  for (int d=0; d<dim; ++d)
281  {
282  udotn += C_vel(d,iq,f_ind)*n(iq,d,f_ind);
283  }
284  if (udotn >= 0.0) { rq = rho->Eval(*T.Elem2, eip2); }
285  else { rq = rho->Eval(*T.Elem1, eip1); }
286  }
287  C(iq,f_ind) = rq;
288  }
289  f_ind++;
290  }
291  }
292  MFEM_VERIFY(f_ind==nf, "Incorrect number of faces.");
293  }
294  PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(),
295  geom->detJ, geom->normal, r, vel,
296  alpha, beta, pa_data);
297 }
298 
299 void DGTraceIntegrator::AssemblePAInteriorFaces(const FiniteElementSpace& fes)
300 {
301  SetupPA(fes, FaceType::Interior);
302 }
303 
304 void DGTraceIntegrator::AssemblePABoundaryFaces(const FiniteElementSpace& fes)
305 {
306  SetupPA(fes, FaceType::Boundary);
307 }
308 
309 // PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein
310 template<int T_D1D = 0, int T_Q1D = 0> static
311 void PADGTraceApply2D(const int NF,
312  const Array<double> &b,
313  const Array<double> &bt,
314  const Vector &op_,
315  const Vector &x_,
316  Vector &y_,
317  const int d1d = 0,
318  const int q1d = 0)
319 {
320  const int VDIM = 1;
321  const int D1D = T_D1D ? T_D1D : d1d;
322  const int Q1D = T_Q1D ? T_Q1D : q1d;
323  MFEM_VERIFY(D1D <= MAX_D1D, "");
324  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
325  auto B = Reshape(b.Read(), Q1D, D1D);
326  auto Bt = Reshape(bt.Read(), D1D, Q1D);
327  auto op = Reshape(op_.Read(), Q1D, 2, 2, NF);
328  auto x = Reshape(x_.Read(), D1D, VDIM, 2, NF);
329  auto y = Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
330 
331  MFEM_FORALL(f, NF,
332  {
333  const int VDIM = 1;
334  const int D1D = T_D1D ? T_D1D : d1d;
335  const int Q1D = T_Q1D ? T_Q1D : q1d;
336  // the following variables are evaluated at compile time
337  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
338  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
339  double u0[max_D1D][VDIM];
340  double u1[max_D1D][VDIM];
341  for (int d = 0; d < D1D; d++)
342  {
343  for (int c = 0; c < VDIM; c++)
344  {
345  u0[d][c] = x(d,c,0,f);
346  u1[d][c] = x(d,c,1,f);
347  }
348  }
349  double Bu0[max_Q1D][VDIM];
350  double Bu1[max_Q1D][VDIM];
351  for (int q = 0; q < Q1D; ++q)
352  {
353  for (int c = 0; c < VDIM; c++)
354  {
355  Bu0[q][c] = 0.0;
356  Bu1[q][c] = 0.0;
357  }
358  for (int d = 0; d < D1D; ++d)
359  {
360  const double b = B(q,d);
361  for (int c = 0; c < VDIM; c++)
362  {
363  Bu0[q][c] += b*u0[d][c];
364  Bu1[q][c] += b*u1[d][c];
365  }
366  }
367  }
368  double DBu[max_Q1D][VDIM];
369  for (int q = 0; q < Q1D; ++q)
370  {
371  for (int c = 0; c < VDIM; c++)
372  {
373  DBu[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,1,0,f)*Bu1[q][c];
374  }
375  }
376  double BDBu[max_D1D][VDIM];
377  for (int d = 0; d < D1D; ++d)
378  {
379  for (int c = 0; c < VDIM; c++)
380  {
381  BDBu[d][c] = 0.0;
382  }
383  for (int q = 0; q < Q1D; ++q)
384  {
385  const double b = Bt(d,q);
386  for (int c = 0; c < VDIM; c++)
387  {
388  BDBu[d][c] += b*DBu[q][c];
389  }
390  }
391  for (int c = 0; c < VDIM; c++)
392  {
393  y(d,c,0,f) += BDBu[d][c];
394  y(d,c,1,f) += -BDBu[d][c];
395  }
396  }
397  });
398 }
399 
400 // PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein
401 template<int T_D1D = 0, int T_Q1D = 0> static
402 void PADGTraceApply3D(const int NF,
403  const Array<double> &b,
404  const Array<double> &bt,
405  const Vector &op_,
406  const Vector &x_,
407  Vector &y_,
408  const int d1d = 0,
409  const int q1d = 0)
410 {
411  const int VDIM = 1;
412  const int D1D = T_D1D ? T_D1D : d1d;
413  const int Q1D = T_Q1D ? T_Q1D : q1d;
414  MFEM_VERIFY(D1D <= MAX_D1D, "");
415  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
416  auto B = Reshape(b.Read(), Q1D, D1D);
417  auto Bt = Reshape(bt.Read(), D1D, Q1D);
418  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
419  auto x = Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
420  auto y = Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
421 
422  MFEM_FORALL(f, NF,
423  {
424  const int VDIM = 1;
425  const int D1D = T_D1D ? T_D1D : d1d;
426  const int Q1D = T_Q1D ? T_Q1D : q1d;
427  // the following variables are evaluated at compile time
428  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
429  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
430  double u0[max_D1D][max_D1D][VDIM];
431  double u1[max_D1D][max_D1D][VDIM];
432  for (int d1 = 0; d1 < D1D; d1++)
433  {
434  for (int d2 = 0; d2 < D1D; d2++)
435  {
436  for (int c = 0; c < VDIM; c++)
437  {
438  u0[d1][d2][c] = x(d1,d2,c,0,f);
439  u1[d1][d2][c] = x(d1,d2,c,1,f);
440  }
441  }
442  }
443  double Bu0[max_Q1D][max_D1D][VDIM];
444  double Bu1[max_Q1D][max_D1D][VDIM];
445  for (int q = 0; q < Q1D; ++q)
446  {
447  for (int d2 = 0; d2 < D1D; d2++)
448  {
449  for (int c = 0; c < VDIM; c++)
450  {
451  Bu0[q][d2][c] = 0.0;
452  Bu1[q][d2][c] = 0.0;
453  }
454  for (int d1 = 0; d1 < D1D; ++d1)
455  {
456  const double b = B(q,d1);
457  for (int c = 0; c < VDIM; c++)
458  {
459  Bu0[q][d2][c] += b*u0[d1][d2][c];
460  Bu1[q][d2][c] += b*u1[d1][d2][c];
461  }
462  }
463  }
464  }
465  double BBu0[max_Q1D][max_Q1D][VDIM];
466  double BBu1[max_Q1D][max_Q1D][VDIM];
467  for (int q1 = 0; q1 < Q1D; ++q1)
468  {
469  for (int q2 = 0; q2 < Q1D; q2++)
470  {
471  for (int c = 0; c < VDIM; c++)
472  {
473  BBu0[q1][q2][c] = 0.0;
474  BBu1[q1][q2][c] = 0.0;
475  }
476  for (int d2 = 0; d2 < D1D; ++d2)
477  {
478  const double b = B(q2,d2);
479  for (int c = 0; c < VDIM; c++)
480  {
481  BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
482  BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
483  }
484  }
485  }
486  }
487  double DBBu[max_Q1D][max_Q1D][VDIM];
488  for (int q1 = 0; q1 < Q1D; ++q1)
489  {
490  for (int q2 = 0; q2 < Q1D; q2++)
491  {
492  for (int c = 0; c < VDIM; c++)
493  {
494  DBBu[q1][q2][c] = op(q1,q2,0,0,f)*BBu0[q1][q2][c] +
495  op(q1,q2,1,0,f)*BBu1[q1][q2][c];
496  }
497  }
498  }
499  double BDBBu[max_Q1D][max_D1D][VDIM];
500  for (int q1 = 0; q1 < Q1D; ++q1)
501  {
502  for (int d2 = 0; d2 < D1D; d2++)
503  {
504  for (int c = 0; c < VDIM; c++)
505  {
506  BDBBu[q1][d2][c] = 0.0;
507  }
508  for (int q2 = 0; q2 < Q1D; ++q2)
509  {
510  const double b = Bt(d2,q2);
511  for (int c = 0; c < VDIM; c++)
512  {
513  BDBBu[q1][d2][c] += b*DBBu[q1][q2][c];
514  }
515  }
516  }
517  }
518  double BBDBBu[max_D1D][max_D1D][VDIM];
519  for (int d1 = 0; d1 < D1D; ++d1)
520  {
521  for (int d2 = 0; d2 < D1D; d2++)
522  {
523  for (int c = 0; c < VDIM; c++)
524  {
525  BBDBBu[d1][d2][c] = 0.0;
526  }
527  for (int q1 = 0; q1 < Q1D; ++q1)
528  {
529  const double b = Bt(d1,q1);
530  for (int c = 0; c < VDIM; c++)
531  {
532  BBDBBu[d1][d2][c] += b*BDBBu[q1][d2][c];
533  }
534  }
535  for (int c = 0; c < VDIM; c++)
536  {
537  y(d1,d2,c,0,f) += BBDBBu[d1][d2][c];
538  y(d1,d2,c,1,f) += -BBDBBu[d1][d2][c];
539  }
540  }
541  }
542  });
543 }
544 
545 // Optimized PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein
546 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
547 void SmemPADGTraceApply3D(const int NF,
548  const Array<double> &b,
549  const Array<double> &bt,
550  const Vector &op_,
551  const Vector &x_,
552  Vector &y_,
553  const int d1d = 0,
554  const int q1d = 0)
555 {
556  const int D1D = T_D1D ? T_D1D : d1d;
557  const int Q1D = T_Q1D ? T_Q1D : q1d;
558  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
559  MFEM_VERIFY(D1D <= MAX_D1D, "");
560  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
561  auto B = Reshape(b.Read(), Q1D, D1D);
562  auto Bt = Reshape(bt.Read(), D1D, Q1D);
563  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
564  auto x = Reshape(x_.Read(), D1D, D1D, 2, NF);
565  auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
566 
567  MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
568  {
569  const int tidz = MFEM_THREAD_ID(z);
570  const int D1D = T_D1D ? T_D1D : d1d;
571  const int Q1D = T_Q1D ? T_Q1D : q1d;
572  // the following variables are evaluated at compile time
573  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
574  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
575  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
576  MFEM_SHARED double u0[NBZ][max_D1D][max_D1D];
577  MFEM_SHARED double u1[NBZ][max_D1D][max_D1D];
578  MFEM_FOREACH_THREAD(d1,x,D1D)
579  {
580  MFEM_FOREACH_THREAD(d2,y,D1D)
581  {
582  u0[tidz][d1][d2] = x(d1,d2,0,f);
583  u1[tidz][d1][d2] = x(d1,d2,1,f);
584  }
585  }
586  MFEM_SYNC_THREAD;
587  MFEM_SHARED double Bu0[NBZ][max_Q1D][max_D1D];
588  MFEM_SHARED double Bu1[NBZ][max_Q1D][max_D1D];
589  MFEM_FOREACH_THREAD(q1,x,Q1D)
590  {
591  MFEM_FOREACH_THREAD(d2,y,D1D)
592  {
593  double Bu0_ = 0.0;
594  double Bu1_ = 0.0;
595  for (int d1 = 0; d1 < D1D; ++d1)
596  {
597  const double b = B(q1,d1);
598  Bu0_ += b*u0[tidz][d1][d2];
599  Bu1_ += b*u1[tidz][d1][d2];
600  }
601  Bu0[tidz][q1][d2] = Bu0_;
602  Bu1[tidz][q1][d2] = Bu1_;
603  }
604  }
605  MFEM_SYNC_THREAD;
606  MFEM_SHARED double BBu0[NBZ][max_Q1D][max_Q1D];
607  MFEM_SHARED double BBu1[NBZ][max_Q1D][max_Q1D];
608  MFEM_FOREACH_THREAD(q1,x,Q1D)
609  {
610  MFEM_FOREACH_THREAD(q2,y,Q1D)
611  {
612  double BBu0_ = 0.0;
613  double BBu1_ = 0.0;
614  for (int d2 = 0; d2 < D1D; ++d2)
615  {
616  const double b = B(q2,d2);
617  BBu0_ += b*Bu0[tidz][q1][d2];
618  BBu1_ += b*Bu1[tidz][q1][d2];
619  }
620  BBu0[tidz][q1][q2] = BBu0_;
621  BBu1[tidz][q1][q2] = BBu1_;
622  }
623  }
624  MFEM_SYNC_THREAD;
625  MFEM_SHARED double DBBu[NBZ][max_Q1D][max_Q1D];
626  MFEM_FOREACH_THREAD(q1,x,Q1D)
627  {
628  MFEM_FOREACH_THREAD(q2,y,Q1D)
629  {
630  DBBu[tidz][q1][q2] = op(q1,q2,0,0,f)*BBu0[tidz][q1][q2] +
631  op(q1,q2,1,0,f)*BBu1[tidz][q1][q2];
632  }
633  }
634  MFEM_SYNC_THREAD;
635  MFEM_SHARED double BDBBu[NBZ][max_Q1D][max_D1D];
636  MFEM_FOREACH_THREAD(q1,x,Q1D)
637  {
638  MFEM_FOREACH_THREAD(d2,y,D1D)
639  {
640  double BDBBu_ = 0.0;
641  for (int q2 = 0; q2 < Q1D; ++q2)
642  {
643  const double b = Bt(d2,q2);
644  BDBBu_ += b*DBBu[tidz][q1][q2];
645  }
646  BDBBu[tidz][q1][d2] = BDBBu_;
647  }
648  }
649  MFEM_SYNC_THREAD;
650  MFEM_FOREACH_THREAD(d1,x,D1D)
651  {
652  MFEM_FOREACH_THREAD(d2,y,D1D)
653  {
654  double BBDBBu_ = 0.0;
655  for (int q1 = 0; q1 < Q1D; ++q1)
656  {
657  const double b = Bt(d1,q1);
658  BBDBBu_ += b*BDBBu[tidz][q1][d2];
659  }
660  y(d1,d2,0,f) += BBDBBu_;
661  y(d1,d2,1,f) += -BBDBBu_;
662  }
663  }
664  });
665 }
666 
667 static void PADGTraceApply(const int dim,
668  const int D1D,
669  const int Q1D,
670  const int NF,
671  const Array<double> &B,
672  const Array<double> &Bt,
673  const Vector &op,
674  const Vector &x,
675  Vector &y)
676 {
677  if (dim == 2)
678  {
679  switch ((D1D << 4 ) | Q1D)
680  {
681  case 0x22: return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
682  case 0x33: return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
683  case 0x44: return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
684  case 0x55: return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
685  case 0x66: return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
686  case 0x77: return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
687  case 0x88: return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
688  case 0x99: return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
689  default: return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
690  }
691  }
692  else if (dim == 3)
693  {
694  switch ((D1D << 4 ) | Q1D)
695  {
696  case 0x23: return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
697  case 0x34: return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
698  case 0x45: return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
699  case 0x56: return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
700  case 0x67: return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
701  case 0x78: return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
702  case 0x89: return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
703  default: return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
704  }
705  }
706  MFEM_ABORT("Unknown kernel.");
707 }
708 
709 // PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein
710 template<int T_D1D = 0, int T_Q1D = 0> static
711 void PADGTraceApplyTranspose2D(const int NF,
712  const Array<double> &b,
713  const Array<double> &bt,
714  const Vector &op_,
715  const Vector &x_,
716  Vector &y_,
717  const int d1d = 0,
718  const int q1d = 0)
719 {
720  const int VDIM = 1;
721  const int D1D = T_D1D ? T_D1D : d1d;
722  const int Q1D = T_Q1D ? T_Q1D : q1d;
723  MFEM_VERIFY(D1D <= MAX_D1D, "");
724  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
725  auto B = Reshape(b.Read(), Q1D, D1D);
726  auto Bt = Reshape(bt.Read(), D1D, Q1D);
727  auto op = Reshape(op_.Read(), Q1D, 2, 2, NF);
728  auto x = Reshape(x_.Read(), D1D, VDIM, 2, NF);
729  auto y = Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
730 
731  MFEM_FORALL(f, NF,
732  {
733  const int VDIM = 1;
734  const int D1D = T_D1D ? T_D1D : d1d;
735  const int Q1D = T_Q1D ? T_Q1D : q1d;
736  // the following variables are evaluated at compile time
737  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
738  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
739  double u0[max_D1D][VDIM];
740  double u1[max_D1D][VDIM];
741  for (int d = 0; d < D1D; d++)
742  {
743  for (int c = 0; c < VDIM; c++)
744  {
745  u0[d][c] = x(d,c,0,f);
746  u1[d][c] = x(d,c,1,f);
747  }
748  }
749  double Bu0[max_Q1D][VDIM];
750  double Bu1[max_Q1D][VDIM];
751  for (int q = 0; q < Q1D; ++q)
752  {
753  for (int c = 0; c < VDIM; c++)
754  {
755  Bu0[q][c] = 0.0;
756  Bu1[q][c] = 0.0;
757  }
758  for (int d = 0; d < D1D; ++d)
759  {
760  const double b = B(q,d);
761  for (int c = 0; c < VDIM; c++)
762  {
763  Bu0[q][c] += b*u0[d][c];
764  Bu1[q][c] += b*u1[d][c];
765  }
766  }
767  }
768  double DBu0[max_Q1D][VDIM];
769  double DBu1[max_Q1D][VDIM];
770  for (int q = 0; q < Q1D; ++q)
771  {
772  for (int c = 0; c < VDIM; c++)
773  {
774  DBu0[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,0,1,f)*Bu1[q][c];
775  DBu1[q][c] = op(q,1,0,f)*Bu0[q][c] + op(q,1,1,f)*Bu1[q][c];
776  }
777  }
778  double BDBu0[max_D1D][VDIM];
779  double BDBu1[max_D1D][VDIM];
780  for (int d = 0; d < D1D; ++d)
781  {
782  for (int c = 0; c < VDIM; c++)
783  {
784  BDBu0[d][c] = 0.0;
785  BDBu1[d][c] = 0.0;
786  }
787  for (int q = 0; q < Q1D; ++q)
788  {
789  const double b = Bt(d,q);
790  for (int c = 0; c < VDIM; c++)
791  {
792  BDBu0[d][c] += b*DBu0[q][c];
793  BDBu1[d][c] += b*DBu1[q][c];
794  }
795  }
796  for (int c = 0; c < VDIM; c++)
797  {
798  y(d,c,0,f) += BDBu0[d][c];
799  y(d,c,1,f) += BDBu1[d][c];
800  }
801  }
802  });
803 }
804 
805 // PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein
806 template<int T_D1D = 0, int T_Q1D = 0> static
807 void PADGTraceApplyTranspose3D(const int NF,
808  const Array<double> &b,
809  const Array<double> &bt,
810  const Vector &op_,
811  const Vector &x_,
812  Vector &y_,
813  const int d1d = 0,
814  const int q1d = 0)
815 {
816  const int VDIM = 1;
817  const int D1D = T_D1D ? T_D1D : d1d;
818  const int Q1D = T_Q1D ? T_Q1D : q1d;
819  MFEM_VERIFY(D1D <= MAX_D1D, "");
820  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
821  auto B = Reshape(b.Read(), Q1D, D1D);
822  auto Bt = Reshape(bt.Read(), D1D, Q1D);
823  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
824  auto x = Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
825  auto y = Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
826 
827  MFEM_FORALL(f, NF,
828  {
829  const int VDIM = 1;
830  const int D1D = T_D1D ? T_D1D : d1d;
831  const int Q1D = T_Q1D ? T_Q1D : q1d;
832  // the following variables are evaluated at compile time
833  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
834  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
835  double u0[max_D1D][max_D1D][VDIM];
836  double u1[max_D1D][max_D1D][VDIM];
837  for (int d1 = 0; d1 < D1D; d1++)
838  {
839  for (int d2 = 0; d2 < D1D; d2++)
840  {
841  for (int c = 0; c < VDIM; c++)
842  {
843  u0[d1][d2][c] = x(d1,d2,c,0,f);
844  u1[d1][d2][c] = x(d1,d2,c,1,f);
845  }
846  }
847  }
848  double Bu0[max_Q1D][max_D1D][VDIM];
849  double Bu1[max_Q1D][max_D1D][VDIM];
850  for (int q1 = 0; q1 < Q1D; ++q1)
851  {
852  for (int d2 = 0; d2 < D1D; ++d2)
853  {
854  for (int c = 0; c < VDIM; c++)
855  {
856  Bu0[q1][d2][c] = 0.0;
857  Bu1[q1][d2][c] = 0.0;
858  }
859  for (int d1 = 0; d1 < D1D; ++d1)
860  {
861  const double b = B(q1,d1);
862  for (int c = 0; c < VDIM; c++)
863  {
864  Bu0[q1][d2][c] += b*u0[d1][d2][c];
865  Bu1[q1][d2][c] += b*u1[d1][d2][c];
866  }
867  }
868  }
869  }
870  double BBu0[max_Q1D][max_Q1D][VDIM];
871  double BBu1[max_Q1D][max_Q1D][VDIM];
872  for (int q1 = 0; q1 < Q1D; ++q1)
873  {
874  for (int q2 = 0; q2 < Q1D; ++q2)
875  {
876  for (int c = 0; c < VDIM; c++)
877  {
878  BBu0[q1][q2][c] = 0.0;
879  BBu1[q1][q2][c] = 0.0;
880  }
881  for (int d2 = 0; d2 < D1D; ++d2)
882  {
883  const double b = B(q2,d2);
884  for (int c = 0; c < VDIM; c++)
885  {
886  BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
887  BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
888  }
889  }
890  }
891  }
892  double DBu0[max_Q1D][max_Q1D][VDIM];
893  double DBu1[max_Q1D][max_Q1D][VDIM];
894  for (int q1 = 0; q1 < Q1D; ++q1)
895  {
896  for (int q2 = 0; q2 < Q1D; ++q2)
897  {
898  const double D00 = op(q1,q2,0,0,f);
899  const double D01 = op(q1,q2,0,1,f);
900  const double D10 = op(q1,q2,1,0,f);
901  const double D11 = op(q1,q2,1,1,f);
902  for (int c = 0; c < VDIM; c++)
903  {
904  DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
905  DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
906  }
907  }
908  }
909  double BDBu0[max_D1D][max_Q1D][VDIM];
910  double BDBu1[max_D1D][max_Q1D][VDIM];
911  for (int d1 = 0; d1 < D1D; ++d1)
912  {
913  for (int q2 = 0; q2 < Q1D; ++q2)
914  {
915  for (int c = 0; c < VDIM; c++)
916  {
917  BDBu0[d1][q2][c] = 0.0;
918  BDBu1[d1][q2][c] = 0.0;
919  }
920  for (int q1 = 0; q1 < Q1D; ++q1)
921  {
922  const double b = Bt(d1,q1);
923  for (int c = 0; c < VDIM; c++)
924  {
925  BDBu0[d1][q2][c] += b*DBu0[q1][q2][c];
926  BDBu1[d1][q2][c] += b*DBu1[q1][q2][c];
927  }
928  }
929  }
930  }
931  double BBDBu0[max_D1D][max_D1D][VDIM];
932  double BBDBu1[max_D1D][max_D1D][VDIM];
933  for (int d1 = 0; d1 < D1D; ++d1)
934  {
935  for (int d2 = 0; d2 < D1D; ++d2)
936  {
937  for (int c = 0; c < VDIM; c++)
938  {
939  BBDBu0[d1][d2][c] = 0.0;
940  BBDBu1[d1][d2][c] = 0.0;
941  }
942  for (int q2 = 0; q2 < Q1D; ++q2)
943  {
944  const double b = Bt(d2,q2);
945  for (int c = 0; c < VDIM; c++)
946  {
947  BBDBu0[d1][d2][c] += b*BDBu0[d1][q2][c];
948  BBDBu1[d1][d2][c] += b*BDBu1[d1][q2][c];
949  }
950  }
951  for (int c = 0; c < VDIM; c++)
952  {
953  y(d1,d2,c,0,f) += BBDBu0[d1][d2][c];
954  y(d1,d2,c,1,f) += BBDBu1[d1][d2][c];
955  }
956  }
957  }
958  });
959 }
960 
961 // Optimized PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein
962 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
963 void SmemPADGTraceApplyTranspose3D(const int NF,
964  const Array<double> &b,
965  const Array<double> &bt,
966  const Vector &op_,
967  const Vector &x_,
968  Vector &y_,
969  const int d1d = 0,
970  const int q1d = 0)
971 {
972  const int D1D = T_D1D ? T_D1D : d1d;
973  const int Q1D = T_Q1D ? T_Q1D : q1d;
974  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
975  MFEM_VERIFY(D1D <= MAX_D1D, "");
976  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
977  auto B = Reshape(b.Read(), Q1D, D1D);
978  auto Bt = Reshape(bt.Read(), D1D, Q1D);
979  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
980  auto x = Reshape(x_.Read(), D1D, D1D, 2, NF);
981  auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
982 
983  MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
984  {
985  const int tidz = MFEM_THREAD_ID(z);
986  const int D1D = T_D1D ? T_D1D : d1d;
987  const int Q1D = T_Q1D ? T_Q1D : q1d;
988  // the following variables are evaluated at compile time
989  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
990  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
991  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
992  MFEM_SHARED double u0[NBZ][max_D1D][max_D1D];
993  MFEM_SHARED double u1[NBZ][max_D1D][max_D1D];
994  MFEM_FOREACH_THREAD(d1,x,D1D)
995  {
996  MFEM_FOREACH_THREAD(d2,y,D1D)
997  {
998  u0[tidz][d1][d2] = x(d1,d2,0,f);
999  u1[tidz][d1][d2] = x(d1,d2,1,f);
1000  }
1001  }
1002  MFEM_SYNC_THREAD;
1003  MFEM_SHARED double Bu0[NBZ][max_Q1D][max_D1D];
1004  MFEM_SHARED double Bu1[NBZ][max_Q1D][max_D1D];
1005  MFEM_FOREACH_THREAD(q1,x,Q1D)
1006  {
1007  MFEM_FOREACH_THREAD(d2,y,D1D)
1008  {
1009  double Bu0_ = 0.0;
1010  double Bu1_ = 0.0;
1011  for (int d1 = 0; d1 < D1D; ++d1)
1012  {
1013  const double b = B(q1,d1);
1014  Bu0_ += b*u0[tidz][d1][d2];
1015  Bu1_ += b*u1[tidz][d1][d2];
1016  }
1017  Bu0[tidz][q1][d2] = Bu0_;
1018  Bu1[tidz][q1][d2] = Bu1_;
1019  }
1020  }
1021  MFEM_SYNC_THREAD;
1022  MFEM_SHARED double BBu0[NBZ][max_Q1D][max_Q1D];
1023  MFEM_SHARED double BBu1[NBZ][max_Q1D][max_Q1D];
1024  MFEM_FOREACH_THREAD(q1,x,Q1D)
1025  {
1026  MFEM_FOREACH_THREAD(q2,y,Q1D)
1027  {
1028  double BBu0_ = 0.0;
1029  double BBu1_ = 0.0;
1030  for (int d2 = 0; d2 < D1D; ++d2)
1031  {
1032  const double b = B(q2,d2);
1033  BBu0_ += b*Bu0[tidz][q1][d2];
1034  BBu1_ += b*Bu1[tidz][q1][d2];
1035  }
1036  BBu0[tidz][q1][q2] = BBu0_;
1037  BBu1[tidz][q1][q2] = BBu1_;
1038  }
1039  }
1040  MFEM_SYNC_THREAD;
1041  MFEM_SHARED double DBBu0[NBZ][max_Q1D][max_Q1D];
1042  MFEM_SHARED double DBBu1[NBZ][max_Q1D][max_Q1D];
1043  MFEM_FOREACH_THREAD(q1,x,Q1D)
1044  {
1045  MFEM_FOREACH_THREAD(q2,y,Q1D)
1046  {
1047  const double D00 = op(q1,q2,0,0,f);
1048  const double D01 = op(q1,q2,0,1,f);
1049  const double D10 = op(q1,q2,1,0,f);
1050  const double D11 = op(q1,q2,1,1,f);
1051  const double u0q = BBu0[tidz][q1][q2];
1052  const double u1q = BBu1[tidz][q1][q2];
1053  DBBu0[tidz][q1][q2] = D00*u0q + D01*u1q;
1054  DBBu1[tidz][q1][q2] = D10*u0q + D11*u1q;
1055  }
1056  }
1057  MFEM_SYNC_THREAD;
1058  MFEM_SHARED double BDBBu0[NBZ][max_Q1D][max_D1D];
1059  MFEM_SHARED double BDBBu1[NBZ][max_Q1D][max_D1D];
1060  MFEM_FOREACH_THREAD(q1,x,Q1D)
1061  {
1062  MFEM_FOREACH_THREAD(d2,y,D1D)
1063  {
1064  double BDBBu0_ = 0.0;
1065  double BDBBu1_ = 0.0;
1066  for (int q2 = 0; q2 < Q1D; ++q2)
1067  {
1068  const double b = Bt(d2,q2);
1069  BDBBu0_ += b*DBBu0[tidz][q1][q2];
1070  BDBBu1_ += b*DBBu1[tidz][q1][q2];
1071  }
1072  BDBBu0[tidz][q1][d2] = BDBBu0_;
1073  BDBBu1[tidz][q1][d2] = BDBBu1_;
1074  }
1075  }
1076  MFEM_SYNC_THREAD;
1077  MFEM_FOREACH_THREAD(d1,x,D1D)
1078  {
1079  MFEM_FOREACH_THREAD(d2,y,D1D)
1080  {
1081  double BBDBBu0_ = 0.0;
1082  double BBDBBu1_ = 0.0;
1083  for (int q1 = 0; q1 < Q1D; ++q1)
1084  {
1085  const double b = Bt(d1,q1);
1086  BBDBBu0_ += b*BDBBu0[tidz][q1][d2];
1087  BBDBBu1_ += b*BDBBu1[tidz][q1][d2];
1088  }
1089  y(d1,d2,0,f) += BBDBBu0_;
1090  y(d1,d2,1,f) += BBDBBu1_;
1091  }
1092  }
1093  });
1094 }
1095 
1096 static void PADGTraceApplyTranspose(const int dim,
1097  const int D1D,
1098  const int Q1D,
1099  const int NF,
1100  const Array<double> &B,
1101  const Array<double> &Bt,
1102  const Vector &op,
1103  const Vector &x,
1104  Vector &y)
1105 {
1106  if (dim == 2)
1107  {
1108  switch ((D1D << 4 ) | Q1D)
1109  {
1110  case 0x22: return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1111  case 0x33: return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1112  case 0x44: return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1113  case 0x55: return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1114  case 0x66: return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1115  case 0x77: return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1116  case 0x88: return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1117  case 0x99: return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1118  default: return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1119  }
1120  }
1121  else if (dim == 3)
1122  {
1123  switch ((D1D << 4 ) | Q1D)
1124  {
1125  case 0x23: return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1126  case 0x34: return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1127  case 0x45: return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1128  case 0x56: return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1129  case 0x67: return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1130  case 0x78: return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1131  case 0x89: return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1132  default: return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1133  }
1134  }
1135  MFEM_ABORT("Unknown kernel.");
1136 }
1137 
1138 // PA DGTraceIntegrator Apply kernel
1139 void DGTraceIntegrator::AddMultPA(const Vector &x, Vector &y) const
1140 {
1141  PADGTraceApply(dim, dofs1D, quad1D, nf,
1142  maps->B, maps->Bt,
1143  pa_data, x, y);
1144 }
1145 
1146 void DGTraceIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
1147 {
1148  PADGTraceApplyTranspose(dim, dofs1D, quad1D, nf,
1149  maps->B, maps->Bt,
1150  pa_data, x, y);
1151 }
1152 
1153 } // namespace mfem
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
FaceType
Definition: mesh.hpp:45
const int MAX_Q1D
Definition: forall.hpp:29
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Convert a dof face index from Native ordering to lexicographic ordering for quads and hexes...
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:88
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:454
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
const int MAX_D1D
Definition: forall.hpp:28
const double alpha
Definition: ex15.cpp:369
Vector data type.
Definition: vector.hpp:60
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:438
double f(const Vector &p)