MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_mass_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "libceed/mass.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 // PA Mass Integrator
23 
24 // PA Mass Assemble kernel
25 
26 void MassIntegrator::AssemblePA(const FiniteElementSpace &fes)
27 {
28  // Assuming the same element type
29  fespace = &fes;
30  Mesh *mesh = fes.GetMesh();
31  if (mesh->GetNE() == 0) { return; }
32  const FiniteElement &el = *fes.GetFE(0);
34  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T);
35  if (DeviceCanUseCeed())
36  {
37  delete ceedDataPtr;
38  ceedDataPtr = new CeedData;
39  InitCeedCoeff(Q, *mesh, *ir, ceedDataPtr);
40  return CeedPAMassAssemble(fes, *ir, *ceedDataPtr);
41  }
42  dim = mesh->Dimension();
43  ne = fes.GetMesh()->GetNE();
44  nq = ir->GetNPoints();
45  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::COORDINATES |
46  GeometricFactors::JACOBIANS);
47  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
48  dofs1D = maps->ndof;
49  quad1D = maps->nqpt;
50  pa_data.SetSize(ne*nq, Device::GetDeviceMemoryType());
51  Vector coeff;
52  if (Q == nullptr)
53  {
54  coeff.SetSize(1);
55  coeff(0) = 1.0;
56  }
57  else if (ConstantCoefficient* cQ = dynamic_cast<ConstantCoefficient*>(Q))
58  {
59  coeff.SetSize(1);
60  coeff(0) = cQ->constant;
61  }
62  else if (QuadratureFunctionCoefficient* cQ =
63  dynamic_cast<QuadratureFunctionCoefficient*>(Q))
64  {
65  const QuadratureFunction &qFun = cQ->GetQuadFunction();
66  MFEM_VERIFY(qFun.Size() == nq * ne,
67  "Incompatible QuadratureFunction dimension \n");
68 
69  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
70  "IntegrationRule used within integrator and in"
71  " QuadratureFunction appear to be different");
72  qFun.Read();
73  coeff.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
74  }
75  else
76  {
77  coeff.SetSize(nq * ne);
78  auto C = Reshape(coeff.HostWrite(), nq, ne);
79  for (int e = 0; e < ne; ++e)
80  {
82  for (int q = 0; q < nq; ++q)
83  {
84  C(q,e) = Q->Eval(T, ir->IntPoint(q));
85  }
86  }
87  }
88  if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); }
89  if (dim==2)
90  {
91  const int NE = ne;
92  const int Q1D = quad1D;
93  const bool const_c = coeff.Size() == 1;
94  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D);
95  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,2,2,NE);
96  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) :
97  Reshape(coeff.Read(), Q1D,Q1D,NE);
98  auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE);
99  MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
100  {
101  MFEM_FOREACH_THREAD(qx,x,Q1D)
102  {
103  MFEM_FOREACH_THREAD(qy,y,Q1D)
104  {
105  const double J11 = J(qx,qy,0,0,e);
106  const double J12 = J(qx,qy,1,0,e);
107  const double J21 = J(qx,qy,0,1,e);
108  const double J22 = J(qx,qy,1,1,e);
109  const double detJ = (J11*J22)-(J21*J12);
110  const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
111  v(qx,qy,e) = W(qx,qy) * coeff * detJ;
112  }
113  }
114  });
115  }
116  if (dim==3)
117  {
118  const int NE = ne;
119  const int Q1D = quad1D;
120  const bool const_c = coeff.Size() == 1;
121  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D,Q1D);
122  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,Q1D,3,3,NE);
123  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1,1) :
124  Reshape(coeff.Read(), Q1D,Q1D,Q1D,NE);
125  auto v = Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
126  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
127  {
128  MFEM_FOREACH_THREAD(qx,x,Q1D)
129  {
130  MFEM_FOREACH_THREAD(qy,y,Q1D)
131  {
132  MFEM_FOREACH_THREAD(qz,z,Q1D)
133  {
134  const double J11 = J(qx,qy,qz,0,0,e);
135  const double J21 = J(qx,qy,qz,1,0,e);
136  const double J31 = J(qx,qy,qz,2,0,e);
137  const double J12 = J(qx,qy,qz,0,1,e);
138  const double J22 = J(qx,qy,qz,1,1,e);
139  const double J32 = J(qx,qy,qz,2,1,e);
140  const double J13 = J(qx,qy,qz,0,2,e);
141  const double J23 = J(qx,qy,qz,1,2,e);
142  const double J33 = J(qx,qy,qz,2,2,e);
143  const double detJ = J11 * (J22 * J33 - J32 * J23) -
144  /* */ J21 * (J12 * J33 - J32 * J13) +
145  /* */ J31 * (J12 * J23 - J22 * J13);
146  const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
147  v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * detJ;
148  }
149  }
150  }
151  });
152  }
153 }
154 
155 template<int T_D1D = 0, int T_Q1D = 0>
156 static void PAMassAssembleDiagonal2D(const int NE,
157  const Array<double> &b,
158  const Vector &d,
159  Vector &y,
160  const int d1d = 0,
161  const int q1d = 0)
162 {
163  const int D1D = T_D1D ? T_D1D : d1d;
164  const int Q1D = T_Q1D ? T_Q1D : q1d;
165  MFEM_VERIFY(D1D <= MAX_D1D, "");
166  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
167  auto B = Reshape(b.Read(), Q1D, D1D);
168  auto D = Reshape(d.Read(), Q1D, Q1D, NE);
169  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
170  MFEM_FORALL(e, NE,
171  {
172  const int D1D = T_D1D ? T_D1D : d1d;
173  const int Q1D = T_Q1D ? T_Q1D : q1d;
174  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
175  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
176  double QD[MQ1][MD1];
177  for (int qx = 0; qx < Q1D; ++qx)
178  {
179  for (int dy = 0; dy < D1D; ++dy)
180  {
181  QD[qx][dy] = 0.0;
182  for (int qy = 0; qy < Q1D; ++qy)
183  {
184  QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
185  }
186  }
187  }
188  for (int dy = 0; dy < D1D; ++dy)
189  {
190  for (int dx = 0; dx < D1D; ++dx)
191  {
192  for (int qx = 0; qx < Q1D; ++qx)
193  {
194  Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
195  }
196  }
197  }
198  });
199 }
200 
201 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
202 static void SmemPAMassAssembleDiagonal2D(const int NE,
203  const Array<double> &b_,
204  const Vector &d_,
205  Vector &y_,
206  const int d1d = 0,
207  const int q1d = 0)
208 {
209  const int D1D = T_D1D ? T_D1D : d1d;
210  const int Q1D = T_Q1D ? T_Q1D : q1d;
211  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
212  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
213  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
214  MFEM_VERIFY(D1D <= MD1, "");
215  MFEM_VERIFY(Q1D <= MQ1, "");
216  auto b = Reshape(b_.Read(), Q1D, D1D);
217  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
218  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
219  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
220  {
221  const int tidz = MFEM_THREAD_ID(z);
222  const int D1D = T_D1D ? T_D1D : d1d;
223  const int Q1D = T_Q1D ? T_Q1D : q1d;
224  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
225  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
226  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
227  MFEM_SHARED double B[MQ1][MD1];
228  MFEM_SHARED double QDZ[NBZ][MQ1][MD1];
229  double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
230  if (tidz == 0)
231  {
232  MFEM_FOREACH_THREAD(d,y,D1D)
233  {
234  MFEM_FOREACH_THREAD(q,x,Q1D)
235  {
236  B[q][d] = b(q,d);
237  }
238  }
239  }
240  MFEM_SYNC_THREAD;
241  MFEM_FOREACH_THREAD(qx,x,Q1D)
242  {
243  MFEM_FOREACH_THREAD(dy,y,D1D)
244  {
245  QD[qx][dy] = 0.0;
246  for (int qy = 0; qy < Q1D; ++qy)
247  {
248  QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
249  }
250  }
251  }
252  MFEM_SYNC_THREAD;
253  MFEM_FOREACH_THREAD(dy,y,D1D)
254  {
255  MFEM_FOREACH_THREAD(dx,x,D1D)
256  {
257  for (int qx = 0; qx < Q1D; ++qx)
258  {
259  // might need absolute values on next line
260  Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
261  }
262  }
263  }
264  });
265 }
266 
267 template<int T_D1D = 0, int T_Q1D = 0>
268 static void PAMassAssembleDiagonal3D(const int NE,
269  const Array<double> &b,
270  const Vector &d,
271  Vector &y,
272  const int d1d = 0,
273  const int q1d = 0)
274 {
275  const int D1D = T_D1D ? T_D1D : d1d;
276  const int Q1D = T_Q1D ? T_Q1D : q1d;
277  MFEM_VERIFY(D1D <= MAX_D1D, "");
278  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
279  auto B = Reshape(b.Read(), Q1D, D1D);
280  auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
281  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
282  MFEM_FORALL(e, NE,
283  {
284  const int D1D = T_D1D ? T_D1D : d1d;
285  const int Q1D = T_Q1D ? T_Q1D : q1d;
286  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
287  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
288  double QQD[MQ1][MQ1][MD1];
289  double QDD[MQ1][MD1][MD1];
290  for (int qx = 0; qx < Q1D; ++qx)
291  {
292  for (int qy = 0; qy < Q1D; ++qy)
293  {
294  for (int dz = 0; dz < D1D; ++dz)
295  {
296  QQD[qx][qy][dz] = 0.0;
297  for (int qz = 0; qz < Q1D; ++qz)
298  {
299  QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
300  }
301  }
302  }
303  }
304  for (int qx = 0; qx < Q1D; ++qx)
305  {
306  for (int dz = 0; dz < D1D; ++dz)
307  {
308  for (int dy = 0; dy < D1D; ++dy)
309  {
310  QDD[qx][dy][dz] = 0.0;
311  for (int qy = 0; qy < Q1D; ++qy)
312  {
313  QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
314  }
315  }
316  }
317  }
318  for (int dz = 0; dz < D1D; ++dz)
319  {
320  for (int dy = 0; dy < D1D; ++dy)
321  {
322  for (int dx = 0; dx < D1D; ++dx)
323  {
324  double t = 0.0;
325  for (int qx = 0; qx < Q1D; ++qx)
326  {
327  t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
328  }
329  Y(dx, dy, dz, e) += t;
330  }
331  }
332  }
333  });
334 }
335 
336 template<int T_D1D = 0, int T_Q1D = 0>
337 static void SmemPAMassAssembleDiagonal3D(const int NE,
338  const Array<double> &b_,
339  const Vector &d_,
340  Vector &y_,
341  const int d1d = 0,
342  const int q1d = 0)
343 {
344  const int D1D = T_D1D ? T_D1D : d1d;
345  const int Q1D = T_Q1D ? T_Q1D : q1d;
346  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
347  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
348  MFEM_VERIFY(D1D <= MD1, "");
349  MFEM_VERIFY(Q1D <= MQ1, "");
350  auto b = Reshape(b_.Read(), Q1D, D1D);
351  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
352  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
353  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
354  {
355  const int tidz = MFEM_THREAD_ID(z);
356  const int D1D = T_D1D ? T_D1D : d1d;
357  const int Q1D = T_Q1D ? T_Q1D : q1d;
358  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
359  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
360  MFEM_SHARED double B[MQ1][MD1];
361  MFEM_SHARED double QQD[MQ1][MQ1][MD1];
362  MFEM_SHARED double QDD[MQ1][MD1][MD1];
363  if (tidz == 0)
364  {
365  MFEM_FOREACH_THREAD(d,y,D1D)
366  {
367  MFEM_FOREACH_THREAD(q,x,Q1D)
368  {
369  B[q][d] = b(q,d);
370  }
371  }
372  }
373  MFEM_SYNC_THREAD;
374  MFEM_FOREACH_THREAD(qx,x,Q1D)
375  {
376  MFEM_FOREACH_THREAD(qy,y,Q1D)
377  {
378  MFEM_FOREACH_THREAD(dz,z,D1D)
379  {
380  QQD[qx][qy][dz] = 0.0;
381  for (int qz = 0; qz < Q1D; ++qz)
382  {
383  QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
384  }
385  }
386  }
387  }
388  MFEM_SYNC_THREAD;
389  MFEM_FOREACH_THREAD(qx,x,Q1D)
390  {
391  MFEM_FOREACH_THREAD(dz,z,D1D)
392  {
393  MFEM_FOREACH_THREAD(dy,y,D1D)
394  {
395  QDD[qx][dy][dz] = 0.0;
396  for (int qy = 0; qy < Q1D; ++qy)
397  {
398  QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
399  }
400  }
401  }
402  }
403  MFEM_SYNC_THREAD;
404  MFEM_FOREACH_THREAD(dz,z,D1D)
405  {
406  MFEM_FOREACH_THREAD(dy,y,D1D)
407  {
408  MFEM_FOREACH_THREAD(dx,x,D1D)
409  {
410  double t = 0.0;
411  for (int qx = 0; qx < Q1D; ++qx)
412  {
413  t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
414  }
415  Y(dx, dy, dz, e) += t;
416  }
417  }
418  }
419  });
420 }
421 
422 static void PAMassAssembleDiagonal(const int dim, const int D1D,
423  const int Q1D, const int NE,
424  const Array<double> &B,
425  const Vector &D,
426  Vector &Y)
427 {
428  if (dim == 2)
429  {
430  switch ((D1D << 4 ) | Q1D)
431  {
432  case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
433  case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
434  case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
435  case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
436  case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
437  case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
438  case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
439  case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
440  default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
441  }
442  }
443  else if (dim == 3)
444  {
445  switch ((D1D << 4 ) | Q1D)
446  {
447  case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
448  case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
449  case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
450  case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
451  case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
452  case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
453  case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
454  default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
455  }
456  }
457  MFEM_ABORT("Unknown kernel.");
458 }
459 
460 void MassIntegrator::AssembleDiagonalPA(Vector &diag)
461 {
462  if (DeviceCanUseCeed())
463  {
464  CeedAssembleDiagonal(ceedDataPtr, diag);
465  }
466  else
467  {
468  PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
469  }
470 }
471 
472 
473 #ifdef MFEM_USE_OCCA
474 // OCCA PA Mass Apply 2D kernel
475 static void OccaPAMassApply2D(const int D1D,
476  const int Q1D,
477  const int NE,
478  const Array<double> &B,
479  const Array<double> &Bt,
480  const Vector &D,
481  const Vector &X,
482  Vector &Y)
483 {
484  occa::properties props;
485  props["defines/D1D"] = D1D;
486  props["defines/Q1D"] = Q1D;
487  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
488  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
489  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
490  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
491  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
492  const occa_id_t id = std::make_pair(D1D,Q1D);
493  if (!Device::Allows(Backend::OCCA_CUDA))
494  {
495  static occa_kernel_t OccaMassApply2D_cpu;
496  if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end())
497  {
498  const occa::kernel MassApply2D_CPU =
499  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
500  "MassApply2D_CPU", props);
501  OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU);
502  }
503  OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
504  }
505  else
506  {
507  static occa_kernel_t OccaMassApply2D_gpu;
508  if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end())
509  {
510  const occa::kernel MassApply2D_GPU =
511  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
512  "MassApply2D_GPU", props);
513  OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU);
514  }
515  OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
516  }
517 }
518 
519 // OCCA PA Mass Apply 3D kernel
520 static void OccaPAMassApply3D(const int D1D,
521  const int Q1D,
522  const int NE,
523  const Array<double> &B,
524  const Array<double> &Bt,
525  const Vector &D,
526  const Vector &X,
527  Vector &Y)
528 {
529  occa::properties props;
530  props["defines/D1D"] = D1D;
531  props["defines/Q1D"] = Q1D;
532  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
533  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
534  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
535  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
536  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
537  const occa_id_t id = std::make_pair(D1D,Q1D);
538  if (!Device::Allows(Backend::OCCA_CUDA))
539  {
540  static occa_kernel_t OccaMassApply3D_cpu;
541  if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end())
542  {
543  const occa::kernel MassApply3D_CPU =
544  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
545  "MassApply3D_CPU", props);
546  OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU);
547  }
548  OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
549  }
550  else
551  {
552  static occa_kernel_t OccaMassApply3D_gpu;
553  if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end())
554  {
555  const occa::kernel MassApply3D_GPU =
556  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
557  "MassApply3D_GPU", props);
558  OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU);
559  }
560  OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
561  }
562 }
563 #endif // MFEM_USE_OCCA
564 
565 template<int T_D1D = 0, int T_Q1D = 0>
566 static void PAMassApply2D(const int NE,
567  const Array<double> &b_,
568  const Array<double> &bt_,
569  const Vector &d_,
570  const Vector &x_,
571  Vector &y_,
572  const int d1d = 0,
573  const int q1d = 0)
574 {
575  const int D1D = T_D1D ? T_D1D : d1d;
576  const int Q1D = T_Q1D ? T_Q1D : q1d;
577  MFEM_VERIFY(D1D <= MAX_D1D, "");
578  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
579  auto B = Reshape(b_.Read(), Q1D, D1D);
580  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
581  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
582  auto X = Reshape(x_.Read(), D1D, D1D, NE);
583  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
584  MFEM_FORALL(e, NE,
585  {
586  const int D1D = T_D1D ? T_D1D : d1d; // nvcc workaround
587  const int Q1D = T_Q1D ? T_Q1D : q1d;
588  // the following variables are evaluated at compile time
589  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
590  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
591  double sol_xy[max_Q1D][max_Q1D];
592  for (int qy = 0; qy < Q1D; ++qy)
593  {
594  for (int qx = 0; qx < Q1D; ++qx)
595  {
596  sol_xy[qy][qx] = 0.0;
597  }
598  }
599  for (int dy = 0; dy < D1D; ++dy)
600  {
601  double sol_x[max_Q1D];
602  for (int qy = 0; qy < Q1D; ++qy)
603  {
604  sol_x[qy] = 0.0;
605  }
606  for (int dx = 0; dx < D1D; ++dx)
607  {
608  const double s = X(dx,dy,e);
609  for (int qx = 0; qx < Q1D; ++qx)
610  {
611  sol_x[qx] += B(qx,dx)* s;
612  }
613  }
614  for (int qy = 0; qy < Q1D; ++qy)
615  {
616  const double d2q = B(qy,dy);
617  for (int qx = 0; qx < Q1D; ++qx)
618  {
619  sol_xy[qy][qx] += d2q * sol_x[qx];
620  }
621  }
622  }
623  for (int qy = 0; qy < Q1D; ++qy)
624  {
625  for (int qx = 0; qx < Q1D; ++qx)
626  {
627  sol_xy[qy][qx] *= D(qx,qy,e);
628  }
629  }
630  for (int qy = 0; qy < Q1D; ++qy)
631  {
632  double sol_x[max_D1D];
633  for (int dx = 0; dx < D1D; ++dx)
634  {
635  sol_x[dx] = 0.0;
636  }
637  for (int qx = 0; qx < Q1D; ++qx)
638  {
639  const double s = sol_xy[qy][qx];
640  for (int dx = 0; dx < D1D; ++dx)
641  {
642  sol_x[dx] += Bt(dx,qx) * s;
643  }
644  }
645  for (int dy = 0; dy < D1D; ++dy)
646  {
647  const double q2d = Bt(dy,qy);
648  for (int dx = 0; dx < D1D; ++dx)
649  {
650  Y(dx,dy,e) += q2d * sol_x[dx];
651  }
652  }
653  }
654  });
655 }
656 
657 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
658 static void SmemPAMassApply2D(const int NE,
659  const Array<double> &b_,
660  const Array<double> &bt_,
661  const Vector &d_,
662  const Vector &x_,
663  Vector &y_,
664  const int d1d = 0,
665  const int q1d = 0)
666 {
667  MFEM_CONTRACT_VAR(bt_);
668  const int D1D = T_D1D ? T_D1D : d1d;
669  const int Q1D = T_Q1D ? T_Q1D : q1d;
670  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
671  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
672  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
673  MFEM_VERIFY(D1D <= MD1, "");
674  MFEM_VERIFY(Q1D <= MQ1, "");
675  auto b = Reshape(b_.Read(), Q1D, D1D);
676  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
677  auto x = Reshape(x_.Read(), D1D, D1D, NE);
678  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
679  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
680  {
681  const int tidz = MFEM_THREAD_ID(z);
682  const int D1D = T_D1D ? T_D1D : d1d;
683  const int Q1D = T_Q1D ? T_Q1D : q1d;
684  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
685  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
686  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
687  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
688  MFEM_SHARED double BBt[MQ1*MD1];
689  double (*B)[MD1] = (double (*)[MD1]) BBt;
690  double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
691  MFEM_SHARED double sm0[NBZ][MDQ*MDQ];
692  MFEM_SHARED double sm1[NBZ][MDQ*MDQ];
693  double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
694  double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
695  double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
696  double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
697  MFEM_FOREACH_THREAD(dy,y,D1D)
698  {
699  MFEM_FOREACH_THREAD(dx,x,D1D)
700  {
701  X[dy][dx] = x(dx,dy,e);
702  }
703  }
704  if (tidz == 0)
705  {
706  MFEM_FOREACH_THREAD(dy,y,D1D)
707  {
708  MFEM_FOREACH_THREAD(q,x,Q1D)
709  {
710  B[q][dy] = b(q,dy);
711  }
712  }
713  }
714  MFEM_SYNC_THREAD;
715  MFEM_FOREACH_THREAD(dy,y,D1D)
716  {
717  MFEM_FOREACH_THREAD(qx,x,Q1D)
718  {
719  double dq = 0.0;
720  for (int dx = 0; dx < D1D; ++dx)
721  {
722  dq += X[dy][dx] * B[qx][dx];
723  }
724  DQ[dy][qx] = dq;
725  }
726  }
727  MFEM_SYNC_THREAD;
728  MFEM_FOREACH_THREAD(qy,y,Q1D)
729  {
730  MFEM_FOREACH_THREAD(qx,x,Q1D)
731  {
732  double qq = 0.0;
733  for (int dy = 0; dy < D1D; ++dy)
734  {
735  qq += DQ[dy][qx] * B[qy][dy];
736  }
737  QQ[qy][qx] = qq * D(qx, qy, e);
738  }
739  }
740  MFEM_SYNC_THREAD;
741  if (tidz == 0)
742  {
743  MFEM_FOREACH_THREAD(dy,y,D1D)
744  {
745  MFEM_FOREACH_THREAD(q,x,Q1D)
746  {
747  Bt[dy][q] = b(q,dy);
748  }
749  }
750  }
751  MFEM_SYNC_THREAD;
752  MFEM_FOREACH_THREAD(qy,y,Q1D)
753  {
754  MFEM_FOREACH_THREAD(dx,x,D1D)
755  {
756  double dq = 0.0;
757  for (int qx = 0; qx < Q1D; ++qx)
758  {
759  dq += QQ[qy][qx] * Bt[dx][qx];
760  }
761  QD[qy][dx] = dq;
762  }
763  }
764  MFEM_SYNC_THREAD;
765  MFEM_FOREACH_THREAD(dy,y,D1D)
766  {
767  MFEM_FOREACH_THREAD(dx,x,D1D)
768  {
769  double dd = 0.0;
770  for (int qy = 0; qy < Q1D; ++qy)
771  {
772  dd += (QD[qy][dx] * Bt[dy][qy]);
773  }
774  Y(dx, dy, e) += dd;
775  }
776  }
777  });
778 }
779 
780 template<int T_D1D = 0, int T_Q1D = 0>
781 static void PAMassApply3D(const int NE,
782  const Array<double> &b_,
783  const Array<double> &bt_,
784  const Vector &d_,
785  const Vector &x_,
786  Vector &y_,
787  const int d1d = 0,
788  const int q1d = 0)
789 {
790  const int D1D = T_D1D ? T_D1D : d1d;
791  const int Q1D = T_Q1D ? T_Q1D : q1d;
792  MFEM_VERIFY(D1D <= MAX_D1D, "");
793  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
794  auto B = Reshape(b_.Read(), Q1D, D1D);
795  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
796  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
797  auto X = Reshape(x_.Read(), D1D, D1D, D1D, NE);
798  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
799  MFEM_FORALL(e, NE,
800  {
801  const int D1D = T_D1D ? T_D1D : d1d;
802  const int Q1D = T_Q1D ? T_Q1D : q1d;
803  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
804  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
805  double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
806  for (int qz = 0; qz < Q1D; ++qz)
807  {
808  for (int qy = 0; qy < Q1D; ++qy)
809  {
810  for (int qx = 0; qx < Q1D; ++qx)
811  {
812  sol_xyz[qz][qy][qx] = 0.0;
813  }
814  }
815  }
816  for (int dz = 0; dz < D1D; ++dz)
817  {
818  double sol_xy[max_Q1D][max_Q1D];
819  for (int qy = 0; qy < Q1D; ++qy)
820  {
821  for (int qx = 0; qx < Q1D; ++qx)
822  {
823  sol_xy[qy][qx] = 0.0;
824  }
825  }
826  for (int dy = 0; dy < D1D; ++dy)
827  {
828  double sol_x[max_Q1D];
829  for (int qx = 0; qx < Q1D; ++qx)
830  {
831  sol_x[qx] = 0;
832  }
833  for (int dx = 0; dx < D1D; ++dx)
834  {
835  const double s = X(dx,dy,dz,e);
836  for (int qx = 0; qx < Q1D; ++qx)
837  {
838  sol_x[qx] += B(qx,dx) * s;
839  }
840  }
841  for (int qy = 0; qy < Q1D; ++qy)
842  {
843  const double wy = B(qy,dy);
844  for (int qx = 0; qx < Q1D; ++qx)
845  {
846  sol_xy[qy][qx] += wy * sol_x[qx];
847  }
848  }
849  }
850  for (int qz = 0; qz < Q1D; ++qz)
851  {
852  const double wz = B(qz,dz);
853  for (int qy = 0; qy < Q1D; ++qy)
854  {
855  for (int qx = 0; qx < Q1D; ++qx)
856  {
857  sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
858  }
859  }
860  }
861  }
862  for (int qz = 0; qz < Q1D; ++qz)
863  {
864  for (int qy = 0; qy < Q1D; ++qy)
865  {
866  for (int qx = 0; qx < Q1D; ++qx)
867  {
868  sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
869  }
870  }
871  }
872  for (int qz = 0; qz < Q1D; ++qz)
873  {
874  double sol_xy[max_D1D][max_D1D];
875  for (int dy = 0; dy < D1D; ++dy)
876  {
877  for (int dx = 0; dx < D1D; ++dx)
878  {
879  sol_xy[dy][dx] = 0;
880  }
881  }
882  for (int qy = 0; qy < Q1D; ++qy)
883  {
884  double sol_x[max_D1D];
885  for (int dx = 0; dx < D1D; ++dx)
886  {
887  sol_x[dx] = 0;
888  }
889  for (int qx = 0; qx < Q1D; ++qx)
890  {
891  const double s = sol_xyz[qz][qy][qx];
892  for (int dx = 0; dx < D1D; ++dx)
893  {
894  sol_x[dx] += Bt(dx,qx) * s;
895  }
896  }
897  for (int dy = 0; dy < D1D; ++dy)
898  {
899  const double wy = Bt(dy,qy);
900  for (int dx = 0; dx < D1D; ++dx)
901  {
902  sol_xy[dy][dx] += wy * sol_x[dx];
903  }
904  }
905  }
906  for (int dz = 0; dz < D1D; ++dz)
907  {
908  const double wz = Bt(dz,qz);
909  for (int dy = 0; dy < D1D; ++dy)
910  {
911  for (int dx = 0; dx < D1D; ++dx)
912  {
913  Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
914  }
915  }
916  }
917  }
918  });
919 }
920 
921 template<int T_D1D = 0, int T_Q1D = 0>
922 static void SmemPAMassApply3D(const int NE,
923  const Array<double> &b_,
924  const Array<double> &bt_,
925  const Vector &d_,
926  const Vector &x_,
927  Vector &y_,
928  const int d1d = 0,
929  const int q1d = 0)
930 {
931  MFEM_CONTRACT_VAR(bt_);
932  const int D1D = T_D1D ? T_D1D : d1d;
933  const int Q1D = T_Q1D ? T_Q1D : q1d;
934  constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D;
935  constexpr int M1D = T_D1D ? T_D1D : MAX_D1D;
936  MFEM_VERIFY(D1D <= M1D, "");
937  MFEM_VERIFY(Q1D <= M1Q, "");
938  auto b = Reshape(b_.Read(), Q1D, D1D);
939  auto d = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
940  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
941  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
942  MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
943  {
944  const int D1D = T_D1D ? T_D1D : d1d;
945  const int Q1D = T_Q1D ? T_Q1D : q1d;
946  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
947  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
948  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
949  MFEM_SHARED double sDQ[MQ1*MD1];
950  double (*B)[MD1] = (double (*)[MD1]) sDQ;
951  double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
952  MFEM_SHARED double sm0[MDQ*MDQ*MDQ];
953  MFEM_SHARED double sm1[MDQ*MDQ*MDQ];
954  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
955  double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
956  double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
957  double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
958  double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
959  double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
960  MFEM_FOREACH_THREAD(dy,y,D1D)
961  {
962  MFEM_FOREACH_THREAD(dx,x,D1D)
963  {
964  MFEM_UNROLL(MD1)
965  for (int dz = 0; dz < D1D; ++dz)
966  {
967  X[dz][dy][dx] = x(dx,dy,dz,e);
968  }
969  }
970  MFEM_FOREACH_THREAD(dx,x,Q1D)
971  {
972  B[dx][dy] = b(dx,dy);
973  }
974  }
975  MFEM_SYNC_THREAD;
976  MFEM_FOREACH_THREAD(dy,y,D1D)
977  {
978  MFEM_FOREACH_THREAD(qx,x,Q1D)
979  {
980  double u[D1D];
981  MFEM_UNROLL(MD1)
982  for (int dz = 0; dz < D1D; dz++)
983  {
984  u[dz] = 0;
985  }
986  MFEM_UNROLL(MD1)
987  for (int dx = 0; dx < D1D; ++dx)
988  {
989  MFEM_UNROLL(MD1)
990  for (int dz = 0; dz < D1D; ++dz)
991  {
992  u[dz] += X[dz][dy][dx] * B[qx][dx];
993  }
994  }
995  MFEM_UNROLL(MD1)
996  for (int dz = 0; dz < D1D; ++dz)
997  {
998  DDQ[dz][dy][qx] = u[dz];
999  }
1000  }
1001  }
1002  MFEM_SYNC_THREAD;
1003  MFEM_FOREACH_THREAD(qy,y,Q1D)
1004  {
1005  MFEM_FOREACH_THREAD(qx,x,Q1D)
1006  {
1007  double u[D1D];
1008  MFEM_UNROLL(MD1)
1009  for (int dz = 0; dz < D1D; dz++)
1010  {
1011  u[dz] = 0;
1012  }
1013  MFEM_UNROLL(MD1)
1014  for (int dy = 0; dy < D1D; ++dy)
1015  {
1016  MFEM_UNROLL(MD1)
1017  for (int dz = 0; dz < D1D; dz++)
1018  {
1019  u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
1020  }
1021  }
1022  MFEM_UNROLL(MD1)
1023  for (int dz = 0; dz < D1D; dz++)
1024  {
1025  DQQ[dz][qy][qx] = u[dz];
1026  }
1027  }
1028  }
1029  MFEM_SYNC_THREAD;
1030  MFEM_FOREACH_THREAD(qy,y,Q1D)
1031  {
1032  MFEM_FOREACH_THREAD(qx,x,Q1D)
1033  {
1034  double u[Q1D];
1035  MFEM_UNROLL(MQ1)
1036  for (int qz = 0; qz < Q1D; qz++)
1037  {
1038  u[qz] = 0;
1039  }
1040  MFEM_UNROLL(MD1)
1041  for (int dz = 0; dz < D1D; ++dz)
1042  {
1043  MFEM_UNROLL(MQ1)
1044  for (int qz = 0; qz < Q1D; qz++)
1045  {
1046  u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1047  }
1048  }
1049  MFEM_UNROLL(MQ1)
1050  for (int qz = 0; qz < Q1D; qz++)
1051  {
1052  QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1053  }
1054  }
1055  }
1056  MFEM_SYNC_THREAD;
1057  MFEM_FOREACH_THREAD(d,y,D1D)
1058  {
1059  MFEM_FOREACH_THREAD(q,x,Q1D)
1060  {
1061  Bt[d][q] = b(q,d);
1062  }
1063  }
1064  MFEM_SYNC_THREAD;
1065  MFEM_FOREACH_THREAD(qy,y,Q1D)
1066  {
1067  MFEM_FOREACH_THREAD(dx,x,D1D)
1068  {
1069  double u[Q1D];
1070  MFEM_UNROLL(MQ1)
1071  for (int qz = 0; qz < Q1D; ++qz)
1072  {
1073  u[qz] = 0;
1074  }
1075  MFEM_UNROLL(MQ1)
1076  for (int qx = 0; qx < Q1D; ++qx)
1077  {
1078  MFEM_UNROLL(MQ1)
1079  for (int qz = 0; qz < Q1D; ++qz)
1080  {
1081  u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1082  }
1083  }
1084  MFEM_UNROLL(MQ1)
1085  for (int qz = 0; qz < Q1D; ++qz)
1086  {
1087  QQD[qz][qy][dx] = u[qz];
1088  }
1089  }
1090  }
1091  MFEM_SYNC_THREAD;
1092  MFEM_FOREACH_THREAD(dy,y,D1D)
1093  {
1094  MFEM_FOREACH_THREAD(dx,x,D1D)
1095  {
1096  double u[Q1D];
1097  MFEM_UNROLL(MQ1)
1098  for (int qz = 0; qz < Q1D; ++qz)
1099  {
1100  u[qz] = 0;
1101  }
1102  MFEM_UNROLL(MQ1)
1103  for (int qy = 0; qy < Q1D; ++qy)
1104  {
1105  MFEM_UNROLL(MQ1)
1106  for (int qz = 0; qz < Q1D; ++qz)
1107  {
1108  u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1109  }
1110  }
1111  MFEM_UNROLL(MQ1)
1112  for (int qz = 0; qz < Q1D; ++qz)
1113  {
1114  QDD[qz][dy][dx] = u[qz];
1115  }
1116  }
1117  }
1118  MFEM_SYNC_THREAD;
1119  MFEM_FOREACH_THREAD(dy,y,D1D)
1120  {
1121  MFEM_FOREACH_THREAD(dx,x,D1D)
1122  {
1123  double u[D1D];
1124  MFEM_UNROLL(MD1)
1125  for (int dz = 0; dz < D1D; ++dz)
1126  {
1127  u[dz] = 0;
1128  }
1129  MFEM_UNROLL(MQ1)
1130  for (int qz = 0; qz < Q1D; ++qz)
1131  {
1132  MFEM_UNROLL(MD1)
1133  for (int dz = 0; dz < D1D; ++dz)
1134  {
1135  u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1136  }
1137  }
1138  MFEM_UNROLL(MD1)
1139  for (int dz = 0; dz < D1D; ++dz)
1140  {
1141  y(dx,dy,dz,e) += u[dz];
1142  }
1143  }
1144  }
1145  });
1146 }
1147 
1148 static void PAMassApply(const int dim,
1149  const int D1D,
1150  const int Q1D,
1151  const int NE,
1152  const Array<double> &B,
1153  const Array<double> &Bt,
1154  const Vector &D,
1155  const Vector &X,
1156  Vector &Y)
1157 {
1158 #ifdef MFEM_USE_OCCA
1159  if (DeviceCanUseOcca())
1160  {
1161  if (dim == 2)
1162  {
1163  return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1164  }
1165  if (dim == 3)
1166  {
1167  return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1168  }
1169  MFEM_ABORT("OCCA PA Mass Apply unknown kernel!");
1170  }
1171 #endif // MFEM_USE_OCCA
1172  const int id = (D1D << 4) | Q1D;
1173  if (dim == 2)
1174  {
1175  switch (id)
1176  {
1177  case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1178  case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1179  case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1180  case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1181  case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1182  case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1183  case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1184  case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1185  case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1186  case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1187  case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1188  case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1189  case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1190  default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1191  }
1192  }
1193  else if (dim == 3)
1194  {
1195  switch (id)
1196  {
1197  case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1198  case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1199  case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1200  case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1201  case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1202  case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1203  case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1204  case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1205  case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1206  case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1207  case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1208  case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1209  case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1210  default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1211  }
1212  }
1213  mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl;
1214  MFEM_ABORT("Unknown kernel.");
1215 }
1216 
1217 void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const
1218 {
1219  if (DeviceCanUseCeed())
1220  {
1221  CeedAddMult(ceedDataPtr, x, y);
1222  }
1223  else
1224  {
1225  PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
1226  }
1227 }
1228 
1229 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:245
Abstract class for all finite elements.
Definition: fe.hpp:235
int Size() const
Return the logical size of the array.
Definition: array.hpp:124
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:103
A coefficient that is constant across space and time.
Definition: coefficient.hpp:78
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
Definition: fespace.hpp:766
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:459
const Geometry::Type geom
Definition: ex1.cpp:40
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
int Size() const
Returns the size of the vector.
Definition: vector.hpp:160
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:165
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:737
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:85
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:183
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:134
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Definition: intrules.hpp:248
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:768
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:388
const int MAX_Q1D
Definition: forall.hpp:27
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:314
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
Definition: gridfunc.hpp:711
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:290
int Dimension() const
Definition: mesh.hpp:788
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:460
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe.cpp:376
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:372
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
Definition: mesh.cpp:336
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:26
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:1798
double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:384
Vector data type.
Definition: vector.hpp:51
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
Definition: vector.hpp:517
std::pair< int, int > occa_id_t
Definition: occa.hpp:78
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
Class representing a function through its values (scalar or vector) at quadrature points...
Definition: gridfunc.hpp:670