MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_dgtrace_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "restriction.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 // PA DG Trace Integrator
22 static void PADGTraceSetup2D(const int Q1D,
23  const int NF,
24  const Array<double> &w,
25  const Vector &det,
26  const Vector &nor,
27  const Vector &rho,
28  const Vector &vel,
29  const double alpha,
30  const double beta,
31  Vector &op)
32 {
33  const int VDIM = 2;
34 
35  auto d = Reshape(det.Read(), Q1D, NF);
36  auto n = Reshape(nor.Read(), Q1D, VDIM, NF);
37  const bool const_r = rho.Size() == 1;
38  auto R =
39  const_r ? Reshape(rho.Read(), 1,1) : Reshape(rho.Read(), Q1D,NF);
40  const bool const_v = vel.Size() == 2;
41  auto V =
42  const_v ? Reshape(vel.Read(), 2,1,1) : Reshape(vel.Read(), 2,Q1D,NF);
43  auto W = w.Read();
44  auto qd = Reshape(op.Write(), Q1D, 2, 2, NF);
45 
46  MFEM_FORALL(f, NF, // can be optimized with Q1D thread for NF blocks
47  {
48  for (int q = 0; q < Q1D; ++q)
49  {
50  const double r = const_r ? R(0,0) : R(q,f);
51  const double v0 = const_v ? V(0,0,0) : V(0,q,f);
52  const double v1 = const_v ? V(1,0,0) : V(1,q,f);
53  const double dot = n(q,0,f) * v0 + n(q,1,f) * v1;
54  const double abs = dot > 0.0 ? dot : -dot;
55  const double w = W[q]*r*d(q,f);
56  qd(q,0,0,f) = w*( alpha/2 * dot + beta * abs );
57  qd(q,1,0,f) = w*( alpha/2 * dot - beta * abs );
58  qd(q,0,1,f) = w*(-alpha/2 * dot - beta * abs );
59  qd(q,1,1,f) = w*(-alpha/2 * dot + beta * abs );
60  }
61  });
62 }
63 
64 static void PADGTraceSetup3D(const int Q1D,
65  const int NF,
66  const Array<double> &w,
67  const Vector &det,
68  const Vector &nor,
69  const Vector &rho,
70  const Vector &vel,
71  const double alpha,
72  const double beta,
73  Vector &op)
74 {
75  const int VDIM = 3;
76 
77  auto d = Reshape(det.Read(), Q1D, Q1D, NF);
78  auto n = Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
79  const bool const_r = rho.Size() == 1;
80  auto R =
81  const_r ? Reshape(rho.Read(), 1,1,1) : Reshape(rho.Read(), Q1D,Q1D,NF);
82  const bool const_v = vel.Size() == 3;
83  auto V =
84  const_v ? Reshape(vel.Read(), 3,1,1,1) : Reshape(vel.Read(), 3,Q1D,Q1D,NF);
85  auto W = w.Read();
86  auto qd = Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
87 
88  MFEM_FORALL(f, NF, // can be optimized with Q1D*Q1D threads for NF blocks
89  {
90  for (int q1 = 0; q1 < Q1D; ++q1)
91  {
92  for (int q2 = 0; q2 < Q1D; ++q2)
93  {
94  const double r = const_r ? R(0,0,0) : R(q1,q2,f);
95  const double v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,f);
96  const double v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,f);
97  const double v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,f);
98  const double dot = n(q1,q2,0,f) * v0 + n(q1,q2,1,f) * v1 +
99  /* */ n(q1,q2,2,f) * v2;
100  const double abs = dot > 0.0 ? dot : -dot;
101  const double w = W[q1+q2*Q1D]*r*d(q1,q2,f);
102  qd(q1,q2,0,0,f) = w*( alpha/2 * dot + beta * abs );
103  qd(q1,q2,1,0,f) = w*( alpha/2 * dot - beta * abs );
104  qd(q1,q2,0,1,f) = w*(-alpha/2 * dot - beta * abs );
105  qd(q1,q2,1,1,f) = w*(-alpha/2 * dot + beta * abs );
106  }
107  }
108  });
109 }
110 
111 static void PADGTraceSetup(const int dim,
112  const int D1D,
113  const int Q1D,
114  const int NF,
115  const Array<double> &W,
116  const Vector &det,
117  const Vector &nor,
118  const Vector &rho,
119  const Vector &u,
120  const double alpha,
121  const double beta,
122  Vector &op)
123 {
124  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADGTraceSetup"); }
125  if (dim == 2)
126  {
127  PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
128  }
129  if (dim == 3)
130  {
131  PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
132  }
133 }
134 
135 void DGTraceIntegrator::SetupPA(const FiniteElementSpace &fes, FaceType type)
136 {
137  nf = fes.GetNFbyType(type);
138  if (nf==0) { return; }
139  // Assumes tensor-product elements
140  Mesh *mesh = fes.GetMesh();
141  const FiniteElement &el =
142  *fes.GetTraceElement(0, fes.GetMesh()->GetFaceBaseGeometry(0));
143  FaceElementTransformations &T =
144  *fes.GetMesh()->GetFaceElementTransformations(0);
145  const IntegrationRule *ir = IntRule?
146  IntRule:
147  &GetRule(el.GetGeomType(), el.GetOrder(), T);
148  const int symmDims = 4;
149  const int nq = ir->GetNPoints();
150  dim = mesh->Dimension();
151  geom = mesh->GetFaceGeometricFactors(
152  *ir,
153  FaceGeometricFactors::DETERMINANTS |
154  FaceGeometricFactors::NORMALS, type);
155  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
156  dofs1D = maps->ndof;
157  quad1D = maps->nqpt;
158  pa_data.SetSize(symmDims * nq * nf, Device::GetMemoryType());
159  Vector vel;
160  if (VectorConstantCoefficient *c_u = dynamic_cast<VectorConstantCoefficient*>
161  (u))
162  {
163  vel = c_u->GetVec();
164  }
165  else if (VectorQuadratureFunctionCoefficient* c_u =
166  dynamic_cast<VectorQuadratureFunctionCoefficient*>(u))
167  {
168  // Assumed to be in lexicographical ordering
169  const QuadratureFunction &qFun = c_u->GetQuadFunction();
170  MFEM_VERIFY(qFun.Size() == dim * nq * nf,
171  "Incompatible QuadratureFunction dimension \n");
172 
173  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
174  "IntegrationRule used within integrator and in"
175  " QuadratureFunction appear to be different");
176  qFun.Read();
177  vel.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
178  }
179  else
180  {
181  vel.SetSize(dim * nq * nf);
182  auto C = Reshape(vel.HostWrite(), dim, nq, nf);
183  Vector Vq(dim);
184  int f_ind = 0;
185  for (int f = 0; f < fes.GetNF(); ++f)
186  {
187  int e1, e2;
188  int inf1, inf2;
189  fes.GetMesh()->GetFaceElements(f, &e1, &e2);
190  fes.GetMesh()->GetFaceInfos(f, &inf1, &inf2);
191  int face_id = inf1 / 64;
192  if ((type==FaceType::Interior && (e2>=0 || (e2<0 && inf2>=0))) ||
193  (type==FaceType::Boundary && e2<0 && inf2<0) )
194  {
195  FaceElementTransformations &T =
196  *fes.GetMesh()->GetFaceElementTransformations(f);
197  for (int q = 0; q < nq; ++q)
198  {
199  // Convert to lexicographic ordering
200  int iq = ToLexOrdering(dim, face_id, quad1D, q);
201  T.SetAllIntPoints(&ir->IntPoint(q));
202  const IntegrationPoint &eip1 = T.GetElement1IntPoint();
203  u->Eval(Vq, *T.Elem1, eip1);
204  for (int i = 0; i < dim; ++i)
205  {
206  C(i,iq,f_ind) = Vq(i);
207  }
208  }
209  f_ind++;
210  }
211  }
212  MFEM_VERIFY(f_ind==nf, "Incorrect number of faces.");
213  }
214  Vector r;
215  if (rho==nullptr)
216  {
217  r.SetSize(1);
218  r(0) = 1.0;
219  }
220  else if (ConstantCoefficient *c_rho = dynamic_cast<ConstantCoefficient*>(rho))
221  {
222  r.SetSize(1);
223  r(0) = c_rho->constant;
224  }
225  else if (QuadratureFunctionCoefficient* c_rho =
226  dynamic_cast<QuadratureFunctionCoefficient*>(rho))
227  {
228  const QuadratureFunction &qFun = c_rho->GetQuadFunction();
229  MFEM_VERIFY(qFun.Size() == nq * nf,
230  "Incompatible QuadratureFunction dimension \n");
231 
232  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
233  "IntegrationRule used within integrator and in"
234  " QuadratureFunction appear to be different");
235  qFun.Read();
236  r.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
237  }
238  else
239  {
240  r.SetSize(nq * nf);
241  auto C_vel = Reshape(vel.HostRead(), dim, nq, nf);
242  auto n = Reshape(geom->normal.HostRead(), nq, dim, nf);
243  auto C = Reshape(r.HostWrite(), nq, nf);
244  int f_ind = 0;
245  for (int f = 0; f < fes.GetNF(); ++f)
246  {
247  int e1, e2;
248  int inf1, inf2;
249  fes.GetMesh()->GetFaceElements(f, &e1, &e2);
250  fes.GetMesh()->GetFaceInfos(f, &inf1, &inf2);
251  int face_id = inf1 / 64;
252  if ((type==FaceType::Interior && (e2>=0 || (e2<0 && inf2>=0))) ||
253  (type==FaceType::Boundary && e2<0 && inf2<0) )
254  {
255  FaceElementTransformations &T =
256  *fes.GetMesh()->GetFaceElementTransformations(f);
257  for (int q = 0; q < nq; ++q)
258  {
259  // Convert to lexicographic ordering
260  int iq = ToLexOrdering(dim, face_id, quad1D, q);
261 
262  T.SetAllIntPoints(&ir->IntPoint(q));
263  const IntegrationPoint &eip1 = T.GetElement1IntPoint();
264  const IntegrationPoint &eip2 = T.GetElement2IntPoint();
265  double r;
266 
267  if (inf2 < 0)
268  {
269  r = rho->Eval(*T.Elem1, eip1);
270  }
271  else
272  {
273  double udotn = 0.0;
274  for (int d=0; d<dim; ++d)
275  {
276  udotn += C_vel(d,iq,f_ind)*n(iq,d,f_ind);
277  }
278  if (udotn >= 0.0) { r = rho->Eval(*T.Elem2, eip2); }
279  else { r = rho->Eval(*T.Elem1, eip1); }
280  }
281  C(iq,f_ind) = r;
282  }
283  f_ind++;
284  }
285  }
286  MFEM_VERIFY(f_ind==nf, "Incorrect number of faces.");
287  }
288  PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(),
289  geom->detJ, geom->normal, r, vel,
290  alpha, beta, pa_data);
291 }
292 
293 void DGTraceIntegrator::AssemblePAInteriorFaces(const FiniteElementSpace& fes)
294 {
295  SetupPA(fes, FaceType::Interior);
296 }
297 
298 void DGTraceIntegrator::AssemblePABoundaryFaces(const FiniteElementSpace& fes)
299 {
300  SetupPA(fes, FaceType::Boundary);
301 }
302 
303 // PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein
304 template<int T_D1D = 0, int T_Q1D = 0> static
305 void PADGTraceApply2D(const int NF,
306  const Array<double> &b,
307  const Array<double> &bt,
308  const Vector &_op,
309  const Vector &_x,
310  Vector &_y,
311  const int d1d = 0,
312  const int q1d = 0)
313 {
314  const int VDIM = 1;
315  const int D1D = T_D1D ? T_D1D : d1d;
316  const int Q1D = T_Q1D ? T_Q1D : q1d;
317  MFEM_VERIFY(D1D <= MAX_D1D, "");
318  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
319  auto B = Reshape(b.Read(), Q1D, D1D);
320  auto Bt = Reshape(bt.Read(), D1D, Q1D);
321  auto op = Reshape(_op.Read(), Q1D, 2, 2, NF);
322  auto x = Reshape(_x.Read(), D1D, VDIM, 2, NF);
323  auto y = Reshape(_y.ReadWrite(), D1D, VDIM, 2, NF);
324 
325  MFEM_FORALL(f, NF,
326  {
327  const int VDIM = 1;
328  const int D1D = T_D1D ? T_D1D : d1d;
329  const int Q1D = T_Q1D ? T_Q1D : q1d;
330  // the following variables are evaluated at compile time
331  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
332  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
333  double u0[max_D1D][VDIM];
334  double u1[max_D1D][VDIM];
335  for (int d = 0; d < D1D; d++)
336  {
337  for (int c = 0; c < VDIM; c++)
338  {
339  u0[d][c] = x(d,c,0,f);
340  u1[d][c] = x(d,c,1,f);
341  }
342  }
343  double Bu0[max_Q1D][VDIM];
344  double Bu1[max_Q1D][VDIM];
345  for (int q = 0; q < Q1D; ++q)
346  {
347  for (int c = 0; c < VDIM; c++)
348  {
349  Bu0[q][c] = 0.0;
350  Bu1[q][c] = 0.0;
351  }
352  for (int d = 0; d < D1D; ++d)
353  {
354  const double b = B(q,d);
355  for (int c = 0; c < VDIM; c++)
356  {
357  Bu0[q][c] += b*u0[d][c];
358  Bu1[q][c] += b*u1[d][c];
359  }
360  }
361  }
362  double DBu[max_Q1D][VDIM];
363  for (int q = 0; q < Q1D; ++q)
364  {
365  for (int c = 0; c < VDIM; c++)
366  {
367  DBu[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,1,0,f)*Bu1[q][c];
368  }
369  }
370  double BDBu[max_D1D][VDIM];
371  for (int d = 0; d < D1D; ++d)
372  {
373  for (int c = 0; c < VDIM; c++)
374  {
375  BDBu[d][c] = 0.0;
376  }
377  for (int q = 0; q < Q1D; ++q)
378  {
379  const double b = Bt(d,q);
380  for (int c = 0; c < VDIM; c++)
381  {
382  BDBu[d][c] += b*DBu[q][c];
383  }
384  }
385  for (int c = 0; c < VDIM; c++)
386  {
387  y(d,c,0,f) += BDBu[d][c];
388  y(d,c,1,f) += -BDBu[d][c];
389  }
390  }
391  });
392 }
393 
394 // PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein
395 template<int T_D1D = 0, int T_Q1D = 0> static
396 void PADGTraceApply3D(const int NF,
397  const Array<double> &b,
398  const Array<double> &bt,
399  const Vector &_op,
400  const Vector &_x,
401  Vector &_y,
402  const int d1d = 0,
403  const int q1d = 0)
404 {
405  const int VDIM = 1;
406  const int D1D = T_D1D ? T_D1D : d1d;
407  const int Q1D = T_Q1D ? T_Q1D : q1d;
408  MFEM_VERIFY(D1D <= MAX_D1D, "");
409  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
410  auto B = Reshape(b.Read(), Q1D, D1D);
411  auto Bt = Reshape(bt.Read(), D1D, Q1D);
412  auto op = Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
413  auto x = Reshape(_x.Read(), D1D, D1D, VDIM, 2, NF);
414  auto y = Reshape(_y.ReadWrite(), D1D, D1D, VDIM, 2, NF);
415 
416  MFEM_FORALL(f, NF,
417  {
418  const int VDIM = 1;
419  const int D1D = T_D1D ? T_D1D : d1d;
420  const int Q1D = T_Q1D ? T_Q1D : q1d;
421  // the following variables are evaluated at compile time
422  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
423  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
424  double u0[max_D1D][max_D1D][VDIM];
425  double u1[max_D1D][max_D1D][VDIM];
426  for (int d1 = 0; d1 < D1D; d1++)
427  {
428  for (int d2 = 0; d2 < D1D; d2++)
429  {
430  for (int c = 0; c < VDIM; c++)
431  {
432  u0[d1][d2][c] = x(d1,d2,c,0,f);
433  u1[d1][d2][c] = x(d1,d2,c,1,f);
434  }
435  }
436  }
437  double Bu0[max_Q1D][max_D1D][VDIM];
438  double Bu1[max_Q1D][max_D1D][VDIM];
439  for (int q = 0; q < Q1D; ++q)
440  {
441  for (int d2 = 0; d2 < D1D; d2++)
442  {
443  for (int c = 0; c < VDIM; c++)
444  {
445  Bu0[q][d2][c] = 0.0;
446  Bu1[q][d2][c] = 0.0;
447  }
448  for (int d1 = 0; d1 < D1D; ++d1)
449  {
450  const double b = B(q,d1);
451  for (int c = 0; c < VDIM; c++)
452  {
453  Bu0[q][d2][c] += b*u0[d1][d2][c];
454  Bu1[q][d2][c] += b*u1[d1][d2][c];
455  }
456  }
457  }
458  }
459  double BBu0[max_Q1D][max_Q1D][VDIM];
460  double BBu1[max_Q1D][max_Q1D][VDIM];
461  for (int q1 = 0; q1 < Q1D; ++q1)
462  {
463  for (int q2 = 0; q2 < Q1D; q2++)
464  {
465  for (int c = 0; c < VDIM; c++)
466  {
467  BBu0[q1][q2][c] = 0.0;
468  BBu1[q1][q2][c] = 0.0;
469  }
470  for (int d2 = 0; d2 < D1D; ++d2)
471  {
472  const double b = B(q2,d2);
473  for (int c = 0; c < VDIM; c++)
474  {
475  BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
476  BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
477  }
478  }
479  }
480  }
481  double DBBu[max_Q1D][max_Q1D][VDIM];
482  for (int q1 = 0; q1 < Q1D; ++q1)
483  {
484  for (int q2 = 0; q2 < Q1D; q2++)
485  {
486  for (int c = 0; c < VDIM; c++)
487  {
488  DBBu[q1][q2][c] = op(q1,q2,0,0,f)*BBu0[q1][q2][c] +
489  op(q1,q2,1,0,f)*BBu1[q1][q2][c];
490  }
491  }
492  }
493  double BDBBu[max_Q1D][max_D1D][VDIM];
494  for (int q1 = 0; q1 < Q1D; ++q1)
495  {
496  for (int d2 = 0; d2 < D1D; d2++)
497  {
498  for (int c = 0; c < VDIM; c++)
499  {
500  BDBBu[q1][d2][c] = 0.0;
501  }
502  for (int q2 = 0; q2 < Q1D; ++q2)
503  {
504  const double b = Bt(d2,q2);
505  for (int c = 0; c < VDIM; c++)
506  {
507  BDBBu[q1][d2][c] += b*DBBu[q1][q2][c];
508  }
509  }
510  }
511  }
512  double BBDBBu[max_D1D][max_D1D][VDIM];
513  for (int d1 = 0; d1 < D1D; ++d1)
514  {
515  for (int d2 = 0; d2 < D1D; d2++)
516  {
517  for (int c = 0; c < VDIM; c++)
518  {
519  BBDBBu[d1][d2][c] = 0.0;
520  }
521  for (int q1 = 0; q1 < Q1D; ++q1)
522  {
523  const double b = Bt(d1,q1);
524  for (int c = 0; c < VDIM; c++)
525  {
526  BBDBBu[d1][d2][c] += b*BDBBu[q1][d2][c];
527  }
528  }
529  for (int c = 0; c < VDIM; c++)
530  {
531  y(d1,d2,c,0,f) += BBDBBu[d1][d2][c];
532  y(d1,d2,c,1,f) += -BBDBBu[d1][d2][c];
533  }
534  }
535  }
536  });
537 }
538 
539 // Optimized PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein
540 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
541 void SmemPADGTraceApply3D(const int NF,
542  const Array<double> &b,
543  const Array<double> &bt,
544  const Vector &_op,
545  const Vector &_x,
546  Vector &_y,
547  const int d1d = 0,
548  const int q1d = 0)
549 {
550  const int D1D = T_D1D ? T_D1D : d1d;
551  const int Q1D = T_Q1D ? T_Q1D : q1d;
552  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
553  MFEM_VERIFY(D1D <= MAX_D1D, "");
554  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
555  auto B = Reshape(b.Read(), Q1D, D1D);
556  auto Bt = Reshape(bt.Read(), D1D, Q1D);
557  auto op = Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
558  auto x = Reshape(_x.Read(), D1D, D1D, 2, NF);
559  auto y = Reshape(_y.ReadWrite(), D1D, D1D, 2, NF);
560 
561  MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
562  {
563  const int tidz = MFEM_THREAD_ID(z);
564  const int D1D = T_D1D ? T_D1D : d1d;
565  const int Q1D = T_Q1D ? T_Q1D : q1d;
566  // the following variables are evaluated at compile time
567  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
568  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
569  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
570  MFEM_SHARED double u0[NBZ][max_D1D][max_D1D];
571  MFEM_SHARED double u1[NBZ][max_D1D][max_D1D];
572  MFEM_FOREACH_THREAD(d1,x,D1D)
573  {
574  MFEM_FOREACH_THREAD(d2,y,D1D)
575  {
576  u0[tidz][d1][d2] = x(d1,d2,0,f+tidz);
577  u1[tidz][d1][d2] = x(d1,d2,1,f+tidz);
578  }
579  }
580  MFEM_SYNC_THREAD;
581  MFEM_SHARED double Bu0[NBZ][max_Q1D][max_D1D];
582  MFEM_SHARED double Bu1[NBZ][max_Q1D][max_D1D];
583  MFEM_FOREACH_THREAD(q1,x,Q1D)
584  {
585  MFEM_FOREACH_THREAD(d2,y,D1D)
586  {
587  double Bu0_ = 0.0;
588  double Bu1_ = 0.0;
589  for (int d1 = 0; d1 < D1D; ++d1)
590  {
591  const double b = B(q1,d1);
592  Bu0_ += b*u0[tidz][d1][d2];
593  Bu1_ += b*u1[tidz][d1][d2];
594  }
595  Bu0[tidz][q1][d2] = Bu0_;
596  Bu1[tidz][q1][d2] = Bu1_;
597  }
598  }
599  MFEM_SYNC_THREAD;
600  MFEM_SHARED double BBu0[NBZ][max_Q1D][max_Q1D];
601  MFEM_SHARED double BBu1[NBZ][max_Q1D][max_Q1D];
602  MFEM_FOREACH_THREAD(q1,x,Q1D)
603  {
604  MFEM_FOREACH_THREAD(q2,y,Q1D)
605  {
606  double BBu0_ = 0.0;
607  double BBu1_ = 0.0;
608  for (int d2 = 0; d2 < D1D; ++d2)
609  {
610  const double b = B(q2,d2);
611  BBu0_ += b*Bu0[tidz][q1][d2];
612  BBu1_ += b*Bu1[tidz][q1][d2];
613  }
614  BBu0[tidz][q1][q2] = BBu0_;
615  BBu1[tidz][q1][q2] = BBu1_;
616  }
617  }
618  MFEM_SYNC_THREAD;
619  MFEM_SHARED double DBBu[NBZ][max_Q1D][max_Q1D];
620  MFEM_FOREACH_THREAD(q1,x,Q1D)
621  {
622  MFEM_FOREACH_THREAD(q2,y,Q1D)
623  {
624  DBBu[tidz][q1][q2] = op(q1,q2,0,0,f+tidz)*BBu0[tidz][q1][q2] +
625  op(q1,q2,1,0,f+tidz)*BBu1[tidz][q1][q2];
626  }
627  }
628  MFEM_SYNC_THREAD;
629  MFEM_SHARED double BDBBu[NBZ][max_Q1D][max_D1D];
630  MFEM_FOREACH_THREAD(q1,x,Q1D)
631  {
632  MFEM_FOREACH_THREAD(d2,y,D1D)
633  {
634  double BDBBu_ = 0.0;
635  for (int q2 = 0; q2 < Q1D; ++q2)
636  {
637  const double b = Bt(d2,q2);
638  BDBBu_ += b*DBBu[tidz][q1][q2];
639  }
640  BDBBu[tidz][q1][d2] = BDBBu_;
641  }
642  }
643  MFEM_SYNC_THREAD;
644  MFEM_FOREACH_THREAD(d1,x,D1D)
645  {
646  MFEM_FOREACH_THREAD(d2,y,D1D)
647  {
648  double BBDBBu_ = 0.0;
649  for (int q1 = 0; q1 < Q1D; ++q1)
650  {
651  const double b = Bt(d1,q1);
652  BBDBBu_ += b*BDBBu[tidz][q1][d2];
653  }
654  y(d1,d2,0,f+tidz) += BBDBBu_;
655  y(d1,d2,1,f+tidz) += -BBDBBu_;
656  }
657  }
658  });
659 }
660 
661 static void PADGTraceApply(const int dim,
662  const int D1D,
663  const int Q1D,
664  const int NF,
665  const Array<double> &B,
666  const Array<double> &Bt,
667  const Vector &op,
668  const Vector &x,
669  Vector &y)
670 {
671  if (dim == 2)
672  {
673  switch ((D1D << 4 ) | Q1D)
674  {
675  case 0x22: return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
676  case 0x33: return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
677  case 0x44: return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
678  case 0x55: return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
679  case 0x66: return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
680  case 0x77: return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
681  case 0x88: return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
682  case 0x99: return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
683  default: return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
684  }
685  }
686  else if (dim == 3)
687  {
688  switch ((D1D << 4 ) | Q1D)
689  {
690  case 0x23: return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
691  case 0x34: return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
692  case 0x45: return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
693  case 0x56: return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
694  case 0x67: return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
695  case 0x78: return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
696  case 0x89: return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
697  default: return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
698  }
699  }
700  MFEM_ABORT("Unknown kernel.");
701 }
702 
703 // PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein
704 template<int T_D1D = 0, int T_Q1D = 0> static
705 void PADGTraceApplyTranspose2D(const int NF,
706  const Array<double> &b,
707  const Array<double> &bt,
708  const Vector &_op,
709  const Vector &_x,
710  Vector &_y,
711  const int d1d = 0,
712  const int q1d = 0)
713 {
714  const int VDIM = 1;
715  const int D1D = T_D1D ? T_D1D : d1d;
716  const int Q1D = T_Q1D ? T_Q1D : q1d;
717  MFEM_VERIFY(D1D <= MAX_D1D, "");
718  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
719  auto B = Reshape(b.Read(), Q1D, D1D);
720  auto Bt = Reshape(bt.Read(), D1D, Q1D);
721  auto op = Reshape(_op.Read(), Q1D, 2, 2, NF);
722  auto x = Reshape(_x.Read(), D1D, VDIM, 2, NF);
723  auto y = Reshape(_y.ReadWrite(), D1D, VDIM, 2, NF);
724 
725  MFEM_FORALL(f, NF,
726  {
727  const int VDIM = 1;
728  const int D1D = T_D1D ? T_D1D : d1d;
729  const int Q1D = T_Q1D ? T_Q1D : q1d;
730  // the following variables are evaluated at compile time
731  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
732  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
733  double u0[max_D1D][VDIM];
734  double u1[max_D1D][VDIM];
735  for (int d = 0; d < D1D; d++)
736  {
737  for (int c = 0; c < VDIM; c++)
738  {
739  u0[d][c] = x(d,c,0,f);
740  u1[d][c] = x(d,c,1,f);
741  }
742  }
743  double Bu0[max_Q1D][VDIM];
744  double Bu1[max_Q1D][VDIM];
745  for (int q = 0; q < Q1D; ++q)
746  {
747  for (int c = 0; c < VDIM; c++)
748  {
749  Bu0[q][c] = 0.0;
750  Bu1[q][c] = 0.0;
751  }
752  for (int d = 0; d < D1D; ++d)
753  {
754  const double b = B(q,d);
755  for (int c = 0; c < VDIM; c++)
756  {
757  Bu0[q][c] += b*u0[d][c];
758  Bu1[q][c] += b*u1[d][c];
759  }
760  }
761  }
762  double DBu0[max_Q1D][VDIM];
763  double DBu1[max_Q1D][VDIM];
764  for (int q = 0; q < Q1D; ++q)
765  {
766  for (int c = 0; c < VDIM; c++)
767  {
768  DBu0[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,0,1,f)*Bu1[q][c];
769  DBu1[q][c] = op(q,1,0,f)*Bu0[q][c] + op(q,1,1,f)*Bu1[q][c];
770  }
771  }
772  double BDBu0[max_D1D][VDIM];
773  double BDBu1[max_D1D][VDIM];
774  for (int d = 0; d < D1D; ++d)
775  {
776  for (int c = 0; c < VDIM; c++)
777  {
778  BDBu0[d][c] = 0.0;
779  BDBu1[d][c] = 0.0;
780  }
781  for (int q = 0; q < Q1D; ++q)
782  {
783  const double b = Bt(d,q);
784  for (int c = 0; c < VDIM; c++)
785  {
786  BDBu0[d][c] += b*DBu0[q][c];
787  BDBu1[d][c] += b*DBu1[q][c];
788  }
789  }
790  for (int c = 0; c < VDIM; c++)
791  {
792  y(d,c,0,f) += BDBu0[d][c];
793  y(d,c,1,f) += BDBu1[d][c];
794  }
795  }
796  });
797 }
798 
799 // PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein
800 template<int T_D1D = 0, int T_Q1D = 0> static
801 void PADGTraceApplyTranspose3D(const int NF,
802  const Array<double> &b,
803  const Array<double> &bt,
804  const Vector &_op,
805  const Vector &_x,
806  Vector &_y,
807  const int d1d = 0,
808  const int q1d = 0)
809 {
810  const int VDIM = 1;
811  const int D1D = T_D1D ? T_D1D : d1d;
812  const int Q1D = T_Q1D ? T_Q1D : q1d;
813  MFEM_VERIFY(D1D <= MAX_D1D, "");
814  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
815  auto B = Reshape(b.Read(), Q1D, D1D);
816  auto Bt = Reshape(bt.Read(), D1D, Q1D);
817  auto op = Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
818  auto x = Reshape(_x.Read(), D1D, D1D, VDIM, 2, NF);
819  auto y = Reshape(_y.ReadWrite(), D1D, D1D, VDIM, 2, NF);
820 
821  MFEM_FORALL(f, NF,
822  {
823  const int VDIM = 1;
824  const int D1D = T_D1D ? T_D1D : d1d;
825  const int Q1D = T_Q1D ? T_Q1D : q1d;
826  // the following variables are evaluated at compile time
827  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
828  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
829  double u0[max_D1D][max_D1D][VDIM];
830  double u1[max_D1D][max_D1D][VDIM];
831  for (int d1 = 0; d1 < D1D; d1++)
832  {
833  for (int d2 = 0; d2 < D1D; d2++)
834  {
835  for (int c = 0; c < VDIM; c++)
836  {
837  u0[d1][d2][c] = x(d1,d2,c,0,f);
838  u1[d1][d2][c] = x(d1,d2,c,1,f);
839  }
840  }
841  }
842  double Bu0[max_Q1D][max_D1D][VDIM];
843  double Bu1[max_Q1D][max_D1D][VDIM];
844  for (int q1 = 0; q1 < Q1D; ++q1)
845  {
846  for (int d2 = 0; d2 < D1D; ++d2)
847  {
848  for (int c = 0; c < VDIM; c++)
849  {
850  Bu0[q1][d2][c] = 0.0;
851  Bu1[q1][d2][c] = 0.0;
852  }
853  for (int d1 = 0; d1 < D1D; ++d1)
854  {
855  const double b = B(q1,d1);
856  for (int c = 0; c < VDIM; c++)
857  {
858  Bu0[q1][d2][c] += b*u0[d1][d2][c];
859  Bu1[q1][d2][c] += b*u1[d1][d2][c];
860  }
861  }
862  }
863  }
864  double BBu0[max_Q1D][max_Q1D][VDIM];
865  double BBu1[max_Q1D][max_Q1D][VDIM];
866  for (int q1 = 0; q1 < Q1D; ++q1)
867  {
868  for (int q2 = 0; q2 < Q1D; ++q2)
869  {
870  for (int c = 0; c < VDIM; c++)
871  {
872  BBu0[q1][q2][c] = 0.0;
873  BBu1[q1][q2][c] = 0.0;
874  }
875  for (int d2 = 0; d2 < D1D; ++d2)
876  {
877  const double b = B(q2,d2);
878  for (int c = 0; c < VDIM; c++)
879  {
880  BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
881  BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
882  }
883  }
884  }
885  }
886  double DBu0[max_Q1D][max_Q1D][VDIM];
887  double DBu1[max_Q1D][max_Q1D][VDIM];
888  for (int q1 = 0; q1 < Q1D; ++q1)
889  {
890  for (int q2 = 0; q2 < Q1D; ++q2)
891  {
892  const double D00 = op(q1,q2,0,0,f);
893  const double D01 = op(q1,q2,0,1,f);
894  const double D10 = op(q1,q2,1,0,f);
895  const double D11 = op(q1,q2,1,1,f);
896  for (int c = 0; c < VDIM; c++)
897  {
898  DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
899  DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
900  }
901  }
902  }
903  double BDBu0[max_D1D][max_Q1D][VDIM];
904  double BDBu1[max_D1D][max_Q1D][VDIM];
905  for (int d1 = 0; d1 < D1D; ++d1)
906  {
907  for (int q2 = 0; q2 < Q1D; ++q2)
908  {
909  for (int c = 0; c < VDIM; c++)
910  {
911  BDBu0[d1][q2][c] = 0.0;
912  BDBu1[d1][q2][c] = 0.0;
913  }
914  for (int q1 = 0; q1 < Q1D; ++q1)
915  {
916  const double b = Bt(d1,q1);
917  for (int c = 0; c < VDIM; c++)
918  {
919  BDBu0[d1][q2][c] += b*DBu0[q1][q2][c];
920  BDBu1[d1][q2][c] += b*DBu1[q1][q2][c];
921  }
922  }
923  }
924  }
925  double BBDBu0[max_D1D][max_D1D][VDIM];
926  double BBDBu1[max_D1D][max_D1D][VDIM];
927  for (int d1 = 0; d1 < D1D; ++d1)
928  {
929  for (int d2 = 0; d2 < D1D; ++d2)
930  {
931  for (int c = 0; c < VDIM; c++)
932  {
933  BBDBu0[d1][d2][c] = 0.0;
934  BBDBu1[d1][d2][c] = 0.0;
935  }
936  for (int q2 = 0; q2 < Q1D; ++q2)
937  {
938  const double b = Bt(d2,q2);
939  for (int c = 0; c < VDIM; c++)
940  {
941  BBDBu0[d1][d2][c] += b*BDBu0[d1][q2][c];
942  BBDBu1[d1][d2][c] += b*BDBu1[d1][q2][c];
943  }
944  }
945  for (int c = 0; c < VDIM; c++)
946  {
947  y(d1,d2,c,0,f) += BBDBu0[d1][d2][c];
948  y(d1,d2,c,1,f) += BBDBu1[d1][d2][c];
949  }
950  }
951  }
952  });
953 }
954 
955 // Optimized PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein
956 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
957 void SmemPADGTraceApplyTranspose3D(const int NF,
958  const Array<double> &b,
959  const Array<double> &bt,
960  const Vector &_op,
961  const Vector &_x,
962  Vector &_y,
963  const int d1d = 0,
964  const int q1d = 0)
965 {
966  const int D1D = T_D1D ? T_D1D : d1d;
967  const int Q1D = T_Q1D ? T_Q1D : q1d;
968  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
969  MFEM_VERIFY(D1D <= MAX_D1D, "");
970  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
971  auto B = Reshape(b.Read(), Q1D, D1D);
972  auto Bt = Reshape(bt.Read(), D1D, Q1D);
973  auto op = Reshape(_op.Read(), Q1D, Q1D, 2, 2, NF);
974  auto x = Reshape(_x.Read(), D1D, D1D, 2, NF);
975  auto y = Reshape(_y.ReadWrite(), D1D, D1D, 2, NF);
976 
977  MFEM_FORALL_2D(f, NF, Q1D, Q1D, NBZ,
978  {
979  const int tidz = MFEM_THREAD_ID(z);
980  const int D1D = T_D1D ? T_D1D : d1d;
981  const int Q1D = T_Q1D ? T_Q1D : q1d;
982  // the following variables are evaluated at compile time
983  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
984  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
985  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
986  MFEM_SHARED double u0[NBZ][max_D1D][max_D1D];
987  MFEM_SHARED double u1[NBZ][max_D1D][max_D1D];
988  MFEM_FOREACH_THREAD(d1,x,D1D)
989  {
990  MFEM_FOREACH_THREAD(d2,y,D1D)
991  {
992  u0[tidz][d1][d2] = x(d1,d2,0,f+tidz);
993  u1[tidz][d1][d2] = x(d1,d2,1,f+tidz);
994  }
995  }
996  MFEM_SYNC_THREAD;
997  MFEM_SHARED double Bu0[NBZ][max_Q1D][max_D1D];
998  MFEM_SHARED double Bu1[NBZ][max_Q1D][max_D1D];
999  MFEM_FOREACH_THREAD(q1,x,Q1D)
1000  {
1001  MFEM_FOREACH_THREAD(d2,y,D1D)
1002  {
1003  double Bu0_ = 0.0;
1004  double Bu1_ = 0.0;
1005  for (int d1 = 0; d1 < D1D; ++d1)
1006  {
1007  const double b = B(q1,d1);
1008  Bu0_ += b*u0[tidz][d1][d2];
1009  Bu1_ += b*u1[tidz][d1][d2];
1010  }
1011  Bu0[tidz][q1][d2] = Bu0_;
1012  Bu1[tidz][q1][d2] = Bu1_;
1013  }
1014  }
1015  MFEM_SYNC_THREAD;
1016  MFEM_SHARED double BBu0[NBZ][max_Q1D][max_Q1D];
1017  MFEM_SHARED double BBu1[NBZ][max_Q1D][max_Q1D];
1018  MFEM_FOREACH_THREAD(q1,x,Q1D)
1019  {
1020  MFEM_FOREACH_THREAD(q2,y,Q1D)
1021  {
1022  double BBu0_ = 0.0;
1023  double BBu1_ = 0.0;
1024  for (int d2 = 0; d2 < D1D; ++d2)
1025  {
1026  const double b = B(q2,d2);
1027  BBu0_ += b*Bu0[tidz][q1][d2];
1028  BBu1_ += b*Bu1[tidz][q1][d2];
1029  }
1030  BBu0[tidz][q1][q2] = BBu0_;
1031  BBu1[tidz][q1][q2] = BBu1_;
1032  }
1033  }
1034  MFEM_SYNC_THREAD;
1035  MFEM_SHARED double DBBu0[NBZ][max_Q1D][max_Q1D];
1036  MFEM_SHARED double DBBu1[NBZ][max_Q1D][max_Q1D];
1037  MFEM_FOREACH_THREAD(q1,x,Q1D)
1038  {
1039  MFEM_FOREACH_THREAD(q2,y,Q1D)
1040  {
1041  const double D00 = op(q1,q2,0,0,f+tidz);
1042  const double D01 = op(q1,q2,0,1,f+tidz);
1043  const double D10 = op(q1,q2,1,0,f+tidz);
1044  const double D11 = op(q1,q2,1,1,f+tidz);
1045  const double u0 = BBu0[tidz][q1][q2];
1046  const double u1 = BBu1[tidz][q1][q2];
1047  DBBu0[tidz][q1][q2] = D00*u0 + D01*u1;
1048  DBBu1[tidz][q1][q2] = D10*u0 + D11*u1;
1049  }
1050  }
1051  MFEM_SYNC_THREAD;
1052  MFEM_SHARED double BDBBu0[NBZ][max_Q1D][max_D1D];
1053  MFEM_SHARED double BDBBu1[NBZ][max_Q1D][max_D1D];
1054  MFEM_FOREACH_THREAD(q1,x,Q1D)
1055  {
1056  MFEM_FOREACH_THREAD(d2,y,D1D)
1057  {
1058  double BDBBu0_ = 0.0;
1059  double BDBBu1_ = 0.0;
1060  for (int q2 = 0; q2 < Q1D; ++q2)
1061  {
1062  const double b = Bt(d2,q2);
1063  BDBBu0_ += b*DBBu0[tidz][q1][q2];
1064  BDBBu1_ += b*DBBu1[tidz][q1][q2];
1065  }
1066  BDBBu0[tidz][q1][d2] = BDBBu0_;
1067  BDBBu1[tidz][q1][d2] = BDBBu1_;
1068  }
1069  }
1070  MFEM_SYNC_THREAD;
1071  MFEM_FOREACH_THREAD(d1,x,D1D)
1072  {
1073  MFEM_FOREACH_THREAD(d2,y,D1D)
1074  {
1075  double BBDBBu0_ = 0.0;
1076  double BBDBBu1_ = 0.0;
1077  for (int q1 = 0; q1 < Q1D; ++q1)
1078  {
1079  const double b = Bt(d1,q1);
1080  BBDBBu0_ += b*BDBBu0[tidz][q1][d2];
1081  BBDBBu1_ += b*BDBBu1[tidz][q1][d2];
1082  }
1083  y(d1,d2,0,f+tidz) += BBDBBu0_;
1084  y(d1,d2,1,f+tidz) += BBDBBu1_;
1085  }
1086  }
1087  });
1088 }
1089 
1090 static void PADGTraceApplyTranspose(const int dim,
1091  const int D1D,
1092  const int Q1D,
1093  const int NF,
1094  const Array<double> &B,
1095  const Array<double> &Bt,
1096  const Vector &op,
1097  const Vector &x,
1098  Vector &y)
1099 {
1100  if (dim == 2)
1101  {
1102  switch ((D1D << 4 ) | Q1D)
1103  {
1104  case 0x22: return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1105  case 0x33: return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1106  case 0x44: return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1107  case 0x55: return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1108  case 0x66: return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1109  case 0x77: return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1110  case 0x88: return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1111  case 0x99: return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1112  default: return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1113  }
1114  }
1115  else if (dim == 3)
1116  {
1117  switch ((D1D << 4 ) | Q1D)
1118  {
1119  case 0x23: return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1120  case 0x34: return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1121  case 0x45: return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1122  case 0x56: return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1123  case 0x67: return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1124  case 0x78: return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1125  case 0x89: return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1126  default: return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1127  }
1128  }
1129  MFEM_ABORT("Unknown kernel.");
1130 }
1131 
1132 // PA DGTraceIntegrator Apply kernel
1133 void DGTraceIntegrator::AddMultPA(const Vector &x, Vector &y) const
1134 {
1135  PADGTraceApply(dim, dofs1D, quad1D, nf,
1136  maps->B, maps->Bt,
1137  pa_data, x, y);
1138 }
1139 
1140 void DGTraceIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
1141 {
1142  PADGTraceApplyTranspose(dim, dofs1D, quad1D, nf,
1143  maps->B, maps->Bt,
1144  pa_data, x, y);
1145 }
1146 
1147 } // namespace mfem
const Geometry::Type geom
Definition: ex1.cpp:40
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:134
FaceType
Definition: mesh.hpp:45
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:388
const int MAX_Q1D
Definition: forall.hpp:27
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:290
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:372
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:51
const int MAX_D1D
Definition: forall.hpp:26
const double alpha
Definition: ex15.cpp:336
Vector data type.
Definition: vector.hpp:51
double f(const Vector &p)