MFEM  v4.3.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_mass_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "ceed/mass.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 // PA Mass Integrator
23 
24 // PA Mass Assemble kernel
25 
26 void MassIntegrator::AssemblePA(const FiniteElementSpace &fes)
27 {
28  const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
29  Device::GetDeviceMemoryType() : pa_mt;
30 
31  // Assuming the same element type
32  fespace = &fes;
33  Mesh *mesh = fes.GetMesh();
34  if (mesh->GetNE() == 0) { return; }
35  const FiniteElement &el = *fes.GetFE(0);
37  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T);
38  if (DeviceCanUseCeed())
39  {
40  delete ceedOp;
41  ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q);
42  return;
43  }
44  dim = mesh->Dimension();
45  ne = fes.GetMesh()->GetNE();
46  nq = ir->GetNPoints();
47  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::COORDINATES |
48  GeometricFactors::JACOBIANS, mt);
49  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
50  dofs1D = maps->ndof;
51  quad1D = maps->nqpt;
52  pa_data.SetSize(ne*nq, mt);
53  Vector coeff;
54  if (Q == nullptr)
55  {
56  coeff.SetSize(1);
57  coeff(0) = 1.0;
58  }
59  else if (ConstantCoefficient* cQ = dynamic_cast<ConstantCoefficient*>(Q))
60  {
61  coeff.SetSize(1);
62  coeff(0) = cQ->constant;
63  }
64  else if (QuadratureFunctionCoefficient* cQ =
65  dynamic_cast<QuadratureFunctionCoefficient*>(Q))
66  {
67  const QuadratureFunction &qFun = cQ->GetQuadFunction();
68  MFEM_VERIFY(qFun.Size() == nq * ne,
69  "Incompatible QuadratureFunction dimension \n");
70 
71  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
72  "IntegrationRule used within integrator and in"
73  " QuadratureFunction appear to be different");
74  qFun.Read();
75  coeff.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
76  }
77  else
78  {
79  coeff.SetSize(nq * ne);
80  auto C = Reshape(coeff.HostWrite(), nq, ne);
81  for (int e = 0; e < ne; ++e)
82  {
84  for (int q = 0; q < nq; ++q)
85  {
86  C(q,e) = Q->Eval(T, ir->IntPoint(q));
87  }
88  }
89  }
90  if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); }
91  if (dim==2)
92  {
93  const int NE = ne;
94  const int Q1D = quad1D;
95  const bool const_c = coeff.Size() == 1;
96  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D);
97  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,2,2,NE);
98  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) :
99  Reshape(coeff.Read(), Q1D,Q1D,NE);
100  auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE);
101  MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
102  {
103  MFEM_FOREACH_THREAD(qx,x,Q1D)
104  {
105  MFEM_FOREACH_THREAD(qy,y,Q1D)
106  {
107  const double J11 = J(qx,qy,0,0,e);
108  const double J12 = J(qx,qy,1,0,e);
109  const double J21 = J(qx,qy,0,1,e);
110  const double J22 = J(qx,qy,1,1,e);
111  const double detJ = (J11*J22)-(J21*J12);
112  const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
113  v(qx,qy,e) = W(qx,qy) * coeff * detJ;
114  }
115  }
116  });
117  }
118  if (dim==3)
119  {
120  const int NE = ne;
121  const int Q1D = quad1D;
122  const bool const_c = coeff.Size() == 1;
123  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D,Q1D);
124  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,Q1D,3,3,NE);
125  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1,1) :
126  Reshape(coeff.Read(), Q1D,Q1D,Q1D,NE);
127  auto v = Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
128  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
129  {
130  MFEM_FOREACH_THREAD(qx,x,Q1D)
131  {
132  MFEM_FOREACH_THREAD(qy,y,Q1D)
133  {
134  MFEM_FOREACH_THREAD(qz,z,Q1D)
135  {
136  const double J11 = J(qx,qy,qz,0,0,e);
137  const double J21 = J(qx,qy,qz,1,0,e);
138  const double J31 = J(qx,qy,qz,2,0,e);
139  const double J12 = J(qx,qy,qz,0,1,e);
140  const double J22 = J(qx,qy,qz,1,1,e);
141  const double J32 = J(qx,qy,qz,2,1,e);
142  const double J13 = J(qx,qy,qz,0,2,e);
143  const double J23 = J(qx,qy,qz,1,2,e);
144  const double J33 = J(qx,qy,qz,2,2,e);
145  const double detJ = J11 * (J22 * J33 - J32 * J23) -
146  /* */ J21 * (J12 * J33 - J32 * J13) +
147  /* */ J31 * (J12 * J23 - J22 * J13);
148  const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
149  v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * detJ;
150  }
151  }
152  }
153  });
154  }
155 }
156 
157 template<int T_D1D = 0, int T_Q1D = 0>
158 static void PAMassAssembleDiagonal2D(const int NE,
159  const Array<double> &b,
160  const Vector &d,
161  Vector &y,
162  const int d1d = 0,
163  const int q1d = 0)
164 {
165  const int D1D = T_D1D ? T_D1D : d1d;
166  const int Q1D = T_Q1D ? T_Q1D : q1d;
167  MFEM_VERIFY(D1D <= MAX_D1D, "");
168  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
169  auto B = Reshape(b.Read(), Q1D, D1D);
170  auto D = Reshape(d.Read(), Q1D, Q1D, NE);
171  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
172  MFEM_FORALL(e, NE,
173  {
174  const int D1D = T_D1D ? T_D1D : d1d;
175  const int Q1D = T_Q1D ? T_Q1D : q1d;
176  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
177  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
178  double QD[MQ1][MD1];
179  for (int qx = 0; qx < Q1D; ++qx)
180  {
181  for (int dy = 0; dy < D1D; ++dy)
182  {
183  QD[qx][dy] = 0.0;
184  for (int qy = 0; qy < Q1D; ++qy)
185  {
186  QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
187  }
188  }
189  }
190  for (int dy = 0; dy < D1D; ++dy)
191  {
192  for (int dx = 0; dx < D1D; ++dx)
193  {
194  for (int qx = 0; qx < Q1D; ++qx)
195  {
196  Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
197  }
198  }
199  }
200  });
201 }
202 
203 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
204 static void SmemPAMassAssembleDiagonal2D(const int NE,
205  const Array<double> &b_,
206  const Vector &d_,
207  Vector &y_,
208  const int d1d = 0,
209  const int q1d = 0)
210 {
211  const int D1D = T_D1D ? T_D1D : d1d;
212  const int Q1D = T_Q1D ? T_Q1D : q1d;
213  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
214  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
215  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
216  MFEM_VERIFY(D1D <= MD1, "");
217  MFEM_VERIFY(Q1D <= MQ1, "");
218  auto b = Reshape(b_.Read(), Q1D, D1D);
219  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
220  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
221  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
222  {
223  const int tidz = MFEM_THREAD_ID(z);
224  const int D1D = T_D1D ? T_D1D : d1d;
225  const int Q1D = T_Q1D ? T_Q1D : q1d;
226  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
227  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
228  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
229  MFEM_SHARED double B[MQ1][MD1];
230  MFEM_SHARED double QDZ[NBZ][MQ1][MD1];
231  double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
232  if (tidz == 0)
233  {
234  MFEM_FOREACH_THREAD(d,y,D1D)
235  {
236  MFEM_FOREACH_THREAD(q,x,Q1D)
237  {
238  B[q][d] = b(q,d);
239  }
240  }
241  }
242  MFEM_SYNC_THREAD;
243  MFEM_FOREACH_THREAD(qx,x,Q1D)
244  {
245  MFEM_FOREACH_THREAD(dy,y,D1D)
246  {
247  QD[qx][dy] = 0.0;
248  for (int qy = 0; qy < Q1D; ++qy)
249  {
250  QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
251  }
252  }
253  }
254  MFEM_SYNC_THREAD;
255  MFEM_FOREACH_THREAD(dy,y,D1D)
256  {
257  MFEM_FOREACH_THREAD(dx,x,D1D)
258  {
259  for (int qx = 0; qx < Q1D; ++qx)
260  {
261  // might need absolute values on next line
262  Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
263  }
264  }
265  }
266  });
267 }
268 
269 template<int T_D1D = 0, int T_Q1D = 0>
270 static void PAMassAssembleDiagonal3D(const int NE,
271  const Array<double> &b,
272  const Vector &d,
273  Vector &y,
274  const int d1d = 0,
275  const int q1d = 0)
276 {
277  const int D1D = T_D1D ? T_D1D : d1d;
278  const int Q1D = T_Q1D ? T_Q1D : q1d;
279  MFEM_VERIFY(D1D <= MAX_D1D, "");
280  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
281  auto B = Reshape(b.Read(), Q1D, D1D);
282  auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
283  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
284  MFEM_FORALL(e, NE,
285  {
286  const int D1D = T_D1D ? T_D1D : d1d;
287  const int Q1D = T_Q1D ? T_Q1D : q1d;
288  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
289  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
290  double QQD[MQ1][MQ1][MD1];
291  double QDD[MQ1][MD1][MD1];
292  for (int qx = 0; qx < Q1D; ++qx)
293  {
294  for (int qy = 0; qy < Q1D; ++qy)
295  {
296  for (int dz = 0; dz < D1D; ++dz)
297  {
298  QQD[qx][qy][dz] = 0.0;
299  for (int qz = 0; qz < Q1D; ++qz)
300  {
301  QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
302  }
303  }
304  }
305  }
306  for (int qx = 0; qx < Q1D; ++qx)
307  {
308  for (int dz = 0; dz < D1D; ++dz)
309  {
310  for (int dy = 0; dy < D1D; ++dy)
311  {
312  QDD[qx][dy][dz] = 0.0;
313  for (int qy = 0; qy < Q1D; ++qy)
314  {
315  QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
316  }
317  }
318  }
319  }
320  for (int dz = 0; dz < D1D; ++dz)
321  {
322  for (int dy = 0; dy < D1D; ++dy)
323  {
324  for (int dx = 0; dx < D1D; ++dx)
325  {
326  double t = 0.0;
327  for (int qx = 0; qx < Q1D; ++qx)
328  {
329  t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
330  }
331  Y(dx, dy, dz, e) += t;
332  }
333  }
334  }
335  });
336 }
337 
338 template<int T_D1D = 0, int T_Q1D = 0>
339 static void SmemPAMassAssembleDiagonal3D(const int NE,
340  const Array<double> &b_,
341  const Vector &d_,
342  Vector &y_,
343  const int d1d = 0,
344  const int q1d = 0)
345 {
346  const int D1D = T_D1D ? T_D1D : d1d;
347  const int Q1D = T_Q1D ? T_Q1D : q1d;
348  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
349  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
350  MFEM_VERIFY(D1D <= MD1, "");
351  MFEM_VERIFY(Q1D <= MQ1, "");
352  auto b = Reshape(b_.Read(), Q1D, D1D);
353  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
354  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
355  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
356  {
357  const int tidz = MFEM_THREAD_ID(z);
358  const int D1D = T_D1D ? T_D1D : d1d;
359  const int Q1D = T_Q1D ? T_Q1D : q1d;
360  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
361  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
362  MFEM_SHARED double B[MQ1][MD1];
363  MFEM_SHARED double QQD[MQ1][MQ1][MD1];
364  MFEM_SHARED double QDD[MQ1][MD1][MD1];
365  if (tidz == 0)
366  {
367  MFEM_FOREACH_THREAD(d,y,D1D)
368  {
369  MFEM_FOREACH_THREAD(q,x,Q1D)
370  {
371  B[q][d] = b(q,d);
372  }
373  }
374  }
375  MFEM_SYNC_THREAD;
376  MFEM_FOREACH_THREAD(qx,x,Q1D)
377  {
378  MFEM_FOREACH_THREAD(qy,y,Q1D)
379  {
380  MFEM_FOREACH_THREAD(dz,z,D1D)
381  {
382  QQD[qx][qy][dz] = 0.0;
383  for (int qz = 0; qz < Q1D; ++qz)
384  {
385  QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
386  }
387  }
388  }
389  }
390  MFEM_SYNC_THREAD;
391  MFEM_FOREACH_THREAD(qx,x,Q1D)
392  {
393  MFEM_FOREACH_THREAD(dz,z,D1D)
394  {
395  MFEM_FOREACH_THREAD(dy,y,D1D)
396  {
397  QDD[qx][dy][dz] = 0.0;
398  for (int qy = 0; qy < Q1D; ++qy)
399  {
400  QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
401  }
402  }
403  }
404  }
405  MFEM_SYNC_THREAD;
406  MFEM_FOREACH_THREAD(dz,z,D1D)
407  {
408  MFEM_FOREACH_THREAD(dy,y,D1D)
409  {
410  MFEM_FOREACH_THREAD(dx,x,D1D)
411  {
412  double t = 0.0;
413  for (int qx = 0; qx < Q1D; ++qx)
414  {
415  t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
416  }
417  Y(dx, dy, dz, e) += t;
418  }
419  }
420  }
421  });
422 }
423 
424 static void PAMassAssembleDiagonal(const int dim, const int D1D,
425  const int Q1D, const int NE,
426  const Array<double> &B,
427  const Vector &D,
428  Vector &Y)
429 {
430  if (dim == 2)
431  {
432  switch ((D1D << 4 ) | Q1D)
433  {
434  case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
435  case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
436  case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
437  case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
438  case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
439  case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
440  case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
441  case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
442  default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
443  }
444  }
445  else if (dim == 3)
446  {
447  switch ((D1D << 4 ) | Q1D)
448  {
449  case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
450  case 0x24: return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y);
451  case 0x26: return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y);
452  case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
453  case 0x35: return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y);
454  case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
455  case 0x48: return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y);
456  case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
457  case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
458  case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
459  case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
460  default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
461  }
462  }
463  MFEM_ABORT("Unknown kernel.");
464 }
465 
466 void MassIntegrator::AssembleDiagonalPA(Vector &diag)
467 {
468  if (DeviceCanUseCeed())
469  {
470  ceedOp->GetDiagonal(diag);
471  }
472  else
473  {
474  PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
475  }
476 }
477 
478 
479 #ifdef MFEM_USE_OCCA
480 // OCCA PA Mass Apply 2D kernel
481 static void OccaPAMassApply2D(const int D1D,
482  const int Q1D,
483  const int NE,
484  const Array<double> &B,
485  const Array<double> &Bt,
486  const Vector &D,
487  const Vector &X,
488  Vector &Y)
489 {
490  occa::properties props;
491  props["defines/D1D"] = D1D;
492  props["defines/Q1D"] = Q1D;
493  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
494  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
495  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
496  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
497  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
498  const occa_id_t id = std::make_pair(D1D,Q1D);
499  if (!Device::Allows(Backend::OCCA_CUDA))
500  {
501  static occa_kernel_t OccaMassApply2D_cpu;
502  if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end())
503  {
504  const occa::kernel MassApply2D_CPU =
505  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
506  "MassApply2D_CPU", props);
507  OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU);
508  }
509  OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
510  }
511  else
512  {
513  static occa_kernel_t OccaMassApply2D_gpu;
514  if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end())
515  {
516  const occa::kernel MassApply2D_GPU =
517  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
518  "MassApply2D_GPU", props);
519  OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU);
520  }
521  OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
522  }
523 }
524 
525 // OCCA PA Mass Apply 3D kernel
526 static void OccaPAMassApply3D(const int D1D,
527  const int Q1D,
528  const int NE,
529  const Array<double> &B,
530  const Array<double> &Bt,
531  const Vector &D,
532  const Vector &X,
533  Vector &Y)
534 {
535  occa::properties props;
536  props["defines/D1D"] = D1D;
537  props["defines/Q1D"] = Q1D;
538  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
539  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
540  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
541  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
542  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
543  const occa_id_t id = std::make_pair(D1D,Q1D);
544  if (!Device::Allows(Backend::OCCA_CUDA))
545  {
546  static occa_kernel_t OccaMassApply3D_cpu;
547  if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end())
548  {
549  const occa::kernel MassApply3D_CPU =
550  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
551  "MassApply3D_CPU", props);
552  OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU);
553  }
554  OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
555  }
556  else
557  {
558  static occa_kernel_t OccaMassApply3D_gpu;
559  if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end())
560  {
561  const occa::kernel MassApply3D_GPU =
562  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
563  "MassApply3D_GPU", props);
564  OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU);
565  }
566  OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
567  }
568 }
569 #endif // MFEM_USE_OCCA
570 
571 template<int T_D1D = 0, int T_Q1D = 0>
572 static void PAMassApply2D(const int NE,
573  const Array<double> &b_,
574  const Array<double> &bt_,
575  const Vector &d_,
576  const Vector &x_,
577  Vector &y_,
578  const int d1d = 0,
579  const int q1d = 0)
580 {
581  const int D1D = T_D1D ? T_D1D : d1d;
582  const int Q1D = T_Q1D ? T_Q1D : q1d;
583  MFEM_VERIFY(D1D <= MAX_D1D, "");
584  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
585  auto B = Reshape(b_.Read(), Q1D, D1D);
586  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
587  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
588  auto X = Reshape(x_.Read(), D1D, D1D, NE);
589  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
590  MFEM_FORALL(e, NE,
591  {
592  const int D1D = T_D1D ? T_D1D : d1d; // nvcc workaround
593  const int Q1D = T_Q1D ? T_Q1D : q1d;
594  // the following variables are evaluated at compile time
595  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
596  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
597  double sol_xy[max_Q1D][max_Q1D];
598  for (int qy = 0; qy < Q1D; ++qy)
599  {
600  for (int qx = 0; qx < Q1D; ++qx)
601  {
602  sol_xy[qy][qx] = 0.0;
603  }
604  }
605  for (int dy = 0; dy < D1D; ++dy)
606  {
607  double sol_x[max_Q1D];
608  for (int qy = 0; qy < Q1D; ++qy)
609  {
610  sol_x[qy] = 0.0;
611  }
612  for (int dx = 0; dx < D1D; ++dx)
613  {
614  const double s = X(dx,dy,e);
615  for (int qx = 0; qx < Q1D; ++qx)
616  {
617  sol_x[qx] += B(qx,dx)* s;
618  }
619  }
620  for (int qy = 0; qy < Q1D; ++qy)
621  {
622  const double d2q = B(qy,dy);
623  for (int qx = 0; qx < Q1D; ++qx)
624  {
625  sol_xy[qy][qx] += d2q * sol_x[qx];
626  }
627  }
628  }
629  for (int qy = 0; qy < Q1D; ++qy)
630  {
631  for (int qx = 0; qx < Q1D; ++qx)
632  {
633  sol_xy[qy][qx] *= D(qx,qy,e);
634  }
635  }
636  for (int qy = 0; qy < Q1D; ++qy)
637  {
638  double sol_x[max_D1D];
639  for (int dx = 0; dx < D1D; ++dx)
640  {
641  sol_x[dx] = 0.0;
642  }
643  for (int qx = 0; qx < Q1D; ++qx)
644  {
645  const double s = sol_xy[qy][qx];
646  for (int dx = 0; dx < D1D; ++dx)
647  {
648  sol_x[dx] += Bt(dx,qx) * s;
649  }
650  }
651  for (int dy = 0; dy < D1D; ++dy)
652  {
653  const double q2d = Bt(dy,qy);
654  for (int dx = 0; dx < D1D; ++dx)
655  {
656  Y(dx,dy,e) += q2d * sol_x[dx];
657  }
658  }
659  }
660  });
661 }
662 
663 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
664 static void SmemPAMassApply2D(const int NE,
665  const Array<double> &b_,
666  const Array<double> &bt_,
667  const Vector &d_,
668  const Vector &x_,
669  Vector &y_,
670  const int d1d = 0,
671  const int q1d = 0)
672 {
673  MFEM_CONTRACT_VAR(bt_);
674  const int D1D = T_D1D ? T_D1D : d1d;
675  const int Q1D = T_Q1D ? T_Q1D : q1d;
676  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
677  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
678  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
679  MFEM_VERIFY(D1D <= MD1, "");
680  MFEM_VERIFY(Q1D <= MQ1, "");
681  auto b = Reshape(b_.Read(), Q1D, D1D);
682  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
683  auto x = Reshape(x_.Read(), D1D, D1D, NE);
684  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
685  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
686  {
687  const int tidz = MFEM_THREAD_ID(z);
688  const int D1D = T_D1D ? T_D1D : d1d;
689  const int Q1D = T_Q1D ? T_Q1D : q1d;
690  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
691  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
692  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
693  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
694  MFEM_SHARED double BBt[MQ1*MD1];
695  double (*B)[MD1] = (double (*)[MD1]) BBt;
696  double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
697  MFEM_SHARED double sm0[NBZ][MDQ*MDQ];
698  MFEM_SHARED double sm1[NBZ][MDQ*MDQ];
699  double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
700  double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
701  double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
702  double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
703  MFEM_FOREACH_THREAD(dy,y,D1D)
704  {
705  MFEM_FOREACH_THREAD(dx,x,D1D)
706  {
707  X[dy][dx] = x(dx,dy,e);
708  }
709  }
710  if (tidz == 0)
711  {
712  MFEM_FOREACH_THREAD(dy,y,D1D)
713  {
714  MFEM_FOREACH_THREAD(q,x,Q1D)
715  {
716  B[q][dy] = b(q,dy);
717  }
718  }
719  }
720  MFEM_SYNC_THREAD;
721  MFEM_FOREACH_THREAD(dy,y,D1D)
722  {
723  MFEM_FOREACH_THREAD(qx,x,Q1D)
724  {
725  double dq = 0.0;
726  for (int dx = 0; dx < D1D; ++dx)
727  {
728  dq += X[dy][dx] * B[qx][dx];
729  }
730  DQ[dy][qx] = dq;
731  }
732  }
733  MFEM_SYNC_THREAD;
734  MFEM_FOREACH_THREAD(qy,y,Q1D)
735  {
736  MFEM_FOREACH_THREAD(qx,x,Q1D)
737  {
738  double qq = 0.0;
739  for (int dy = 0; dy < D1D; ++dy)
740  {
741  qq += DQ[dy][qx] * B[qy][dy];
742  }
743  QQ[qy][qx] = qq * D(qx, qy, e);
744  }
745  }
746  MFEM_SYNC_THREAD;
747  if (tidz == 0)
748  {
749  MFEM_FOREACH_THREAD(dy,y,D1D)
750  {
751  MFEM_FOREACH_THREAD(q,x,Q1D)
752  {
753  Bt[dy][q] = b(q,dy);
754  }
755  }
756  }
757  MFEM_SYNC_THREAD;
758  MFEM_FOREACH_THREAD(qy,y,Q1D)
759  {
760  MFEM_FOREACH_THREAD(dx,x,D1D)
761  {
762  double dq = 0.0;
763  for (int qx = 0; qx < Q1D; ++qx)
764  {
765  dq += QQ[qy][qx] * Bt[dx][qx];
766  }
767  QD[qy][dx] = dq;
768  }
769  }
770  MFEM_SYNC_THREAD;
771  MFEM_FOREACH_THREAD(dy,y,D1D)
772  {
773  MFEM_FOREACH_THREAD(dx,x,D1D)
774  {
775  double dd = 0.0;
776  for (int qy = 0; qy < Q1D; ++qy)
777  {
778  dd += (QD[qy][dx] * Bt[dy][qy]);
779  }
780  Y(dx, dy, e) += dd;
781  }
782  }
783  });
784 }
785 
786 template<int T_D1D = 0, int T_Q1D = 0>
787 static void PAMassApply3D(const int NE,
788  const Array<double> &b_,
789  const Array<double> &bt_,
790  const Vector &d_,
791  const Vector &x_,
792  Vector &y_,
793  const int d1d = 0,
794  const int q1d = 0)
795 {
796  const int D1D = T_D1D ? T_D1D : d1d;
797  const int Q1D = T_Q1D ? T_Q1D : q1d;
798  MFEM_VERIFY(D1D <= MAX_D1D, "");
799  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
800  auto B = Reshape(b_.Read(), Q1D, D1D);
801  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
802  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
803  auto X = Reshape(x_.Read(), D1D, D1D, D1D, NE);
804  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
805  MFEM_FORALL(e, NE,
806  {
807  const int D1D = T_D1D ? T_D1D : d1d;
808  const int Q1D = T_Q1D ? T_Q1D : q1d;
809  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
810  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
811  double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
812  for (int qz = 0; qz < Q1D; ++qz)
813  {
814  for (int qy = 0; qy < Q1D; ++qy)
815  {
816  for (int qx = 0; qx < Q1D; ++qx)
817  {
818  sol_xyz[qz][qy][qx] = 0.0;
819  }
820  }
821  }
822  for (int dz = 0; dz < D1D; ++dz)
823  {
824  double sol_xy[max_Q1D][max_Q1D];
825  for (int qy = 0; qy < Q1D; ++qy)
826  {
827  for (int qx = 0; qx < Q1D; ++qx)
828  {
829  sol_xy[qy][qx] = 0.0;
830  }
831  }
832  for (int dy = 0; dy < D1D; ++dy)
833  {
834  double sol_x[max_Q1D];
835  for (int qx = 0; qx < Q1D; ++qx)
836  {
837  sol_x[qx] = 0;
838  }
839  for (int dx = 0; dx < D1D; ++dx)
840  {
841  const double s = X(dx,dy,dz,e);
842  for (int qx = 0; qx < Q1D; ++qx)
843  {
844  sol_x[qx] += B(qx,dx) * s;
845  }
846  }
847  for (int qy = 0; qy < Q1D; ++qy)
848  {
849  const double wy = B(qy,dy);
850  for (int qx = 0; qx < Q1D; ++qx)
851  {
852  sol_xy[qy][qx] += wy * sol_x[qx];
853  }
854  }
855  }
856  for (int qz = 0; qz < Q1D; ++qz)
857  {
858  const double wz = B(qz,dz);
859  for (int qy = 0; qy < Q1D; ++qy)
860  {
861  for (int qx = 0; qx < Q1D; ++qx)
862  {
863  sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
864  }
865  }
866  }
867  }
868  for (int qz = 0; qz < Q1D; ++qz)
869  {
870  for (int qy = 0; qy < Q1D; ++qy)
871  {
872  for (int qx = 0; qx < Q1D; ++qx)
873  {
874  sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
875  }
876  }
877  }
878  for (int qz = 0; qz < Q1D; ++qz)
879  {
880  double sol_xy[max_D1D][max_D1D];
881  for (int dy = 0; dy < D1D; ++dy)
882  {
883  for (int dx = 0; dx < D1D; ++dx)
884  {
885  sol_xy[dy][dx] = 0;
886  }
887  }
888  for (int qy = 0; qy < Q1D; ++qy)
889  {
890  double sol_x[max_D1D];
891  for (int dx = 0; dx < D1D; ++dx)
892  {
893  sol_x[dx] = 0;
894  }
895  for (int qx = 0; qx < Q1D; ++qx)
896  {
897  const double s = sol_xyz[qz][qy][qx];
898  for (int dx = 0; dx < D1D; ++dx)
899  {
900  sol_x[dx] += Bt(dx,qx) * s;
901  }
902  }
903  for (int dy = 0; dy < D1D; ++dy)
904  {
905  const double wy = Bt(dy,qy);
906  for (int dx = 0; dx < D1D; ++dx)
907  {
908  sol_xy[dy][dx] += wy * sol_x[dx];
909  }
910  }
911  }
912  for (int dz = 0; dz < D1D; ++dz)
913  {
914  const double wz = Bt(dz,qz);
915  for (int dy = 0; dy < D1D; ++dy)
916  {
917  for (int dx = 0; dx < D1D; ++dx)
918  {
919  Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
920  }
921  }
922  }
923  }
924  });
925 }
926 
927 template<int T_D1D = 0, int T_Q1D = 0>
928 static void SmemPAMassApply3D(const int NE,
929  const Array<double> &b_,
930  const Array<double> &bt_,
931  const Vector &d_,
932  const Vector &x_,
933  Vector &y_,
934  const int d1d = 0,
935  const int q1d = 0)
936 {
937  MFEM_CONTRACT_VAR(bt_);
938  const int D1D = T_D1D ? T_D1D : d1d;
939  const int Q1D = T_Q1D ? T_Q1D : q1d;
940  constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D;
941  constexpr int M1D = T_D1D ? T_D1D : MAX_D1D;
942  MFEM_VERIFY(D1D <= M1D, "");
943  MFEM_VERIFY(Q1D <= M1Q, "");
944  auto b = Reshape(b_.Read(), Q1D, D1D);
945  auto d = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
946  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
947  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
948  MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
949  {
950  const int D1D = T_D1D ? T_D1D : d1d;
951  const int Q1D = T_Q1D ? T_Q1D : q1d;
952  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
953  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
954  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
955  MFEM_SHARED double sDQ[MQ1*MD1];
956  double (*B)[MD1] = (double (*)[MD1]) sDQ;
957  double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
958  MFEM_SHARED double sm0[MDQ*MDQ*MDQ];
959  MFEM_SHARED double sm1[MDQ*MDQ*MDQ];
960  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
961  double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
962  double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
963  double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
964  double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
965  double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
966  MFEM_FOREACH_THREAD(dy,y,D1D)
967  {
968  MFEM_FOREACH_THREAD(dx,x,D1D)
969  {
970  MFEM_UNROLL(MD1)
971  for (int dz = 0; dz < D1D; ++dz)
972  {
973  X[dz][dy][dx] = x(dx,dy,dz,e);
974  }
975  }
976  MFEM_FOREACH_THREAD(dx,x,Q1D)
977  {
978  B[dx][dy] = b(dx,dy);
979  }
980  }
981  MFEM_SYNC_THREAD;
982  MFEM_FOREACH_THREAD(dy,y,D1D)
983  {
984  MFEM_FOREACH_THREAD(qx,x,Q1D)
985  {
986  double u[D1D];
987  MFEM_UNROLL(MD1)
988  for (int dz = 0; dz < D1D; dz++)
989  {
990  u[dz] = 0;
991  }
992  MFEM_UNROLL(MD1)
993  for (int dx = 0; dx < D1D; ++dx)
994  {
995  MFEM_UNROLL(MD1)
996  for (int dz = 0; dz < D1D; ++dz)
997  {
998  u[dz] += X[dz][dy][dx] * B[qx][dx];
999  }
1000  }
1001  MFEM_UNROLL(MD1)
1002  for (int dz = 0; dz < D1D; ++dz)
1003  {
1004  DDQ[dz][dy][qx] = u[dz];
1005  }
1006  }
1007  }
1008  MFEM_SYNC_THREAD;
1009  MFEM_FOREACH_THREAD(qy,y,Q1D)
1010  {
1011  MFEM_FOREACH_THREAD(qx,x,Q1D)
1012  {
1013  double u[D1D];
1014  MFEM_UNROLL(MD1)
1015  for (int dz = 0; dz < D1D; dz++)
1016  {
1017  u[dz] = 0;
1018  }
1019  MFEM_UNROLL(MD1)
1020  for (int dy = 0; dy < D1D; ++dy)
1021  {
1022  MFEM_UNROLL(MD1)
1023  for (int dz = 0; dz < D1D; dz++)
1024  {
1025  u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
1026  }
1027  }
1028  MFEM_UNROLL(MD1)
1029  for (int dz = 0; dz < D1D; dz++)
1030  {
1031  DQQ[dz][qy][qx] = u[dz];
1032  }
1033  }
1034  }
1035  MFEM_SYNC_THREAD;
1036  MFEM_FOREACH_THREAD(qy,y,Q1D)
1037  {
1038  MFEM_FOREACH_THREAD(qx,x,Q1D)
1039  {
1040  double u[Q1D];
1041  MFEM_UNROLL(MQ1)
1042  for (int qz = 0; qz < Q1D; qz++)
1043  {
1044  u[qz] = 0;
1045  }
1046  MFEM_UNROLL(MD1)
1047  for (int dz = 0; dz < D1D; ++dz)
1048  {
1049  MFEM_UNROLL(MQ1)
1050  for (int qz = 0; qz < Q1D; qz++)
1051  {
1052  u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1053  }
1054  }
1055  MFEM_UNROLL(MQ1)
1056  for (int qz = 0; qz < Q1D; qz++)
1057  {
1058  QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1059  }
1060  }
1061  }
1062  MFEM_SYNC_THREAD;
1063  MFEM_FOREACH_THREAD(d,y,D1D)
1064  {
1065  MFEM_FOREACH_THREAD(q,x,Q1D)
1066  {
1067  Bt[d][q] = b(q,d);
1068  }
1069  }
1070  MFEM_SYNC_THREAD;
1071  MFEM_FOREACH_THREAD(qy,y,Q1D)
1072  {
1073  MFEM_FOREACH_THREAD(dx,x,D1D)
1074  {
1075  double u[Q1D];
1076  MFEM_UNROLL(MQ1)
1077  for (int qz = 0; qz < Q1D; ++qz)
1078  {
1079  u[qz] = 0;
1080  }
1081  MFEM_UNROLL(MQ1)
1082  for (int qx = 0; qx < Q1D; ++qx)
1083  {
1084  MFEM_UNROLL(MQ1)
1085  for (int qz = 0; qz < Q1D; ++qz)
1086  {
1087  u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1088  }
1089  }
1090  MFEM_UNROLL(MQ1)
1091  for (int qz = 0; qz < Q1D; ++qz)
1092  {
1093  QQD[qz][qy][dx] = u[qz];
1094  }
1095  }
1096  }
1097  MFEM_SYNC_THREAD;
1098  MFEM_FOREACH_THREAD(dy,y,D1D)
1099  {
1100  MFEM_FOREACH_THREAD(dx,x,D1D)
1101  {
1102  double u[Q1D];
1103  MFEM_UNROLL(MQ1)
1104  for (int qz = 0; qz < Q1D; ++qz)
1105  {
1106  u[qz] = 0;
1107  }
1108  MFEM_UNROLL(MQ1)
1109  for (int qy = 0; qy < Q1D; ++qy)
1110  {
1111  MFEM_UNROLL(MQ1)
1112  for (int qz = 0; qz < Q1D; ++qz)
1113  {
1114  u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1115  }
1116  }
1117  MFEM_UNROLL(MQ1)
1118  for (int qz = 0; qz < Q1D; ++qz)
1119  {
1120  QDD[qz][dy][dx] = u[qz];
1121  }
1122  }
1123  }
1124  MFEM_SYNC_THREAD;
1125  MFEM_FOREACH_THREAD(dy,y,D1D)
1126  {
1127  MFEM_FOREACH_THREAD(dx,x,D1D)
1128  {
1129  double u[D1D];
1130  MFEM_UNROLL(MD1)
1131  for (int dz = 0; dz < D1D; ++dz)
1132  {
1133  u[dz] = 0;
1134  }
1135  MFEM_UNROLL(MQ1)
1136  for (int qz = 0; qz < Q1D; ++qz)
1137  {
1138  MFEM_UNROLL(MD1)
1139  for (int dz = 0; dz < D1D; ++dz)
1140  {
1141  u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1142  }
1143  }
1144  MFEM_UNROLL(MD1)
1145  for (int dz = 0; dz < D1D; ++dz)
1146  {
1147  y(dx,dy,dz,e) += u[dz];
1148  }
1149  }
1150  }
1151  });
1152 }
1153 
1154 static void PAMassApply(const int dim,
1155  const int D1D,
1156  const int Q1D,
1157  const int NE,
1158  const Array<double> &B,
1159  const Array<double> &Bt,
1160  const Vector &D,
1161  const Vector &X,
1162  Vector &Y)
1163 {
1164 #ifdef MFEM_USE_OCCA
1165  if (DeviceCanUseOcca())
1166  {
1167  if (dim == 2)
1168  {
1169  return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1170  }
1171  if (dim == 3)
1172  {
1173  return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1174  }
1175  MFEM_ABORT("OCCA PA Mass Apply unknown kernel!");
1176  }
1177 #endif // MFEM_USE_OCCA
1178  const int id = (D1D << 4) | Q1D;
1179  if (dim == 2)
1180  {
1181  switch (id)
1182  {
1183  case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1184  case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1185  case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1186  case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1187  case 0x35: return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y);
1188  case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1189  case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1190  case 0x46: return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y);
1191  case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1192  case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1193  case 0x57: return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y);
1194  case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1195  case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1196  case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1197  case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1198  case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1199  default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1200  }
1201  }
1202  else if (dim == 3)
1203  {
1204  switch (id)
1205  {
1206  case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1207  case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1208  case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1209  case 0x35: return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y);
1210  case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1211  case 0x37: return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y);
1212  case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1213  case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1214  case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1215  case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1216  case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1217  case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1218  case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1219  case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1220  case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1221  default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1222  }
1223  }
1224  mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl;
1225  MFEM_ABORT("Unknown kernel.");
1226 }
1227 
1228 void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const
1229 {
1230  if (DeviceCanUseCeed())
1231  {
1232  ceedOp->AddMult(x, y);
1233  }
1234  else
1235  {
1236  PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
1237  }
1238 }
1239 
1240 void MassIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
1241 {
1242  // Mass integrator is symmetric
1243  AddMultPA(x, y);
1244 }
1245 
1246 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe.hpp:243
int Size() const
Return the logical size of the array.
Definition: array.hpp:134
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:113
A coefficient that is constant across space and time.
Definition: coefficient.hpp:78
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:784
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
Definition: fespace.hpp:950
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:513
const Geometry::Type geom
Definition: ex1.cpp:40
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:438
int Size() const
Returns the size of the vector.
Definition: vector.hpp:190
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:173
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:846
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:225
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:136
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Definition: intrules.hpp:250
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
const int MAX_Q1D
Definition: forall.hpp:28
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:410
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
Definition: gridfunc.hpp:775
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:300
int Dimension() const
Definition: mesh.hpp:911
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:600
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe.cpp:367
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:442
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
Definition: mesh.cpp:347
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:27
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2388
RefCoord t[3]
Vector data type.
Definition: vector.hpp:60
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
Definition: vector.hpp:577
std::pair< int, int > occa_id_t
Definition: occa.hpp:78
RefCoord s[3]
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
Class representing a function through its values (scalar or vector) at quadrature points...
Definition: gridfunc.hpp:734
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:426