MFEM  v4.4.0
Finite element discretization library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_mass_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "ceed/mass.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 // PA Mass Integrator
23 
24 // PA Mass Assemble kernel
25 
26 void MassIntegrator::AssemblePA(const FiniteElementSpace &fes)
27 {
28  const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
29  Device::GetDeviceMemoryType() : pa_mt;
30 
31  // Assuming the same element type
32  fespace = &fes;
33  Mesh *mesh = fes.GetMesh();
34  if (mesh->GetNE() == 0) { return; }
35  const FiniteElement &el = *fes.GetFE(0);
37  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0);
38  if (DeviceCanUseCeed())
39  {
40  delete ceedOp;
41  ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q);
42  return;
43  }
44  int map_type = el.GetMapType();
45  dim = mesh->Dimension();
46  ne = fes.GetMesh()->GetNE();
47  nq = ir->GetNPoints();
48  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::COORDINATES |
49  GeometricFactors::JACOBIANS, mt);
50  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
51  dofs1D = maps->ndof;
52  quad1D = maps->nqpt;
53  pa_data.SetSize(ne*nq, mt);
54  Vector coeff;
55  if (Q == nullptr)
56  {
57  coeff.SetSize(1);
58  coeff(0) = 1.0;
59  }
60  else if (ConstantCoefficient* cQ = dynamic_cast<ConstantCoefficient*>(Q))
61  {
62  coeff.SetSize(1);
63  coeff(0) = cQ->constant;
64  }
65  else if (QuadratureFunctionCoefficient* qfQ =
66  dynamic_cast<QuadratureFunctionCoefficient*>(Q))
67  {
68  const QuadratureFunction &qFun = qfQ->GetQuadFunction();
69  MFEM_VERIFY(qFun.Size() == nq * ne,
70  "Incompatible QuadratureFunction dimension \n");
71 
72  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
73  "IntegrationRule used within integrator and in"
74  " QuadratureFunction appear to be different");
75  qFun.Read();
76  coeff.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
77  }
78  else
79  {
80  coeff.SetSize(nq * ne);
81  auto C = Reshape(coeff.HostWrite(), nq, ne);
82  for (int e = 0; e < ne; ++e)
83  {
85  for (int q = 0; q < nq; ++q)
86  {
87  C(q,e) = Q->Eval(T, ir->IntPoint(q));
88  }
89  }
90  }
91  if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); }
92  if (dim==2)
93  {
94  const int NE = ne;
95  const int Q1D = quad1D;
96  const bool const_c = coeff.Size() == 1;
97  const bool by_val = map_type == FiniteElement::VALUE;
98  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D);
99  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,2,2,NE);
100  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) :
101  Reshape(coeff.Read(), Q1D,Q1D,NE);
102  auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE);
103  MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
104  {
105  MFEM_FOREACH_THREAD(qx,x,Q1D)
106  {
107  MFEM_FOREACH_THREAD(qy,y,Q1D)
108  {
109  const double J11 = J(qx,qy,0,0,e);
110  const double J12 = J(qx,qy,1,0,e);
111  const double J21 = J(qx,qy,0,1,e);
112  const double J22 = J(qx,qy,1,1,e);
113  const double detJ = (J11*J22)-(J21*J12);
114  const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
115  v(qx,qy,e) = W(qx,qy) * coeff * (by_val ? detJ : 1.0/detJ);
116  }
117  }
118  });
119  }
120  if (dim==3)
121  {
122  const int NE = ne;
123  const int Q1D = quad1D;
124  const bool const_c = coeff.Size() == 1;
125  const bool by_val = map_type == FiniteElement::VALUE;
126  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D,Q1D);
127  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,Q1D,3,3,NE);
128  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1,1) :
129  Reshape(coeff.Read(), Q1D,Q1D,Q1D,NE);
130  auto v = Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
131  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
132  {
133  MFEM_FOREACH_THREAD(qx,x,Q1D)
134  {
135  MFEM_FOREACH_THREAD(qy,y,Q1D)
136  {
137  MFEM_FOREACH_THREAD(qz,z,Q1D)
138  {
139  const double J11 = J(qx,qy,qz,0,0,e);
140  const double J21 = J(qx,qy,qz,1,0,e);
141  const double J31 = J(qx,qy,qz,2,0,e);
142  const double J12 = J(qx,qy,qz,0,1,e);
143  const double J22 = J(qx,qy,qz,1,1,e);
144  const double J32 = J(qx,qy,qz,2,1,e);
145  const double J13 = J(qx,qy,qz,0,2,e);
146  const double J23 = J(qx,qy,qz,1,2,e);
147  const double J33 = J(qx,qy,qz,2,2,e);
148  const double detJ = J11 * (J22 * J33 - J32 * J23) -
149  /* */ J21 * (J12 * J33 - J32 * J13) +
150  /* */ J31 * (J12 * J23 - J22 * J13);
151  const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
152  v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * (by_val ? detJ : 1.0/detJ);
153  }
154  }
155  }
156  });
157  }
158 }
159 
160 template<int T_D1D = 0, int T_Q1D = 0>
161 static void PAMassAssembleDiagonal2D(const int NE,
162  const Array<double> &b,
163  const Vector &d,
164  Vector &y,
165  const int d1d = 0,
166  const int q1d = 0)
167 {
168  const int D1D = T_D1D ? T_D1D : d1d;
169  const int Q1D = T_Q1D ? T_Q1D : q1d;
170  MFEM_VERIFY(D1D <= MAX_D1D, "");
171  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
172  auto B = Reshape(b.Read(), Q1D, D1D);
173  auto D = Reshape(d.Read(), Q1D, Q1D, NE);
174  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
175  MFEM_FORALL(e, NE,
176  {
177  const int D1D = T_D1D ? T_D1D : d1d;
178  const int Q1D = T_Q1D ? T_Q1D : q1d;
179  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
180  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
181  double QD[MQ1][MD1];
182  for (int qx = 0; qx < Q1D; ++qx)
183  {
184  for (int dy = 0; dy < D1D; ++dy)
185  {
186  QD[qx][dy] = 0.0;
187  for (int qy = 0; qy < Q1D; ++qy)
188  {
189  QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
190  }
191  }
192  }
193  for (int dy = 0; dy < D1D; ++dy)
194  {
195  for (int dx = 0; dx < D1D; ++dx)
196  {
197  for (int qx = 0; qx < Q1D; ++qx)
198  {
199  Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
200  }
201  }
202  }
203  });
204 }
205 
206 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
207 static void SmemPAMassAssembleDiagonal2D(const int NE,
208  const Array<double> &b_,
209  const Vector &d_,
210  Vector &y_,
211  const int d1d = 0,
212  const int q1d = 0)
213 {
214  const int D1D = T_D1D ? T_D1D : d1d;
215  const int Q1D = T_Q1D ? T_Q1D : q1d;
216  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
217  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
218  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
219  MFEM_VERIFY(D1D <= MD1, "");
220  MFEM_VERIFY(Q1D <= MQ1, "");
221  auto b = Reshape(b_.Read(), Q1D, D1D);
222  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
223  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
224  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
225  {
226  const int tidz = MFEM_THREAD_ID(z);
227  const int D1D = T_D1D ? T_D1D : d1d;
228  const int Q1D = T_Q1D ? T_Q1D : q1d;
229  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
230  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
231  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
232  MFEM_SHARED double B[MQ1][MD1];
233  MFEM_SHARED double QDZ[NBZ][MQ1][MD1];
234  double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
235  if (tidz == 0)
236  {
237  MFEM_FOREACH_THREAD(d,y,D1D)
238  {
239  MFEM_FOREACH_THREAD(q,x,Q1D)
240  {
241  B[q][d] = b(q,d);
242  }
243  }
244  }
245  MFEM_SYNC_THREAD;
246  MFEM_FOREACH_THREAD(qx,x,Q1D)
247  {
248  MFEM_FOREACH_THREAD(dy,y,D1D)
249  {
250  QD[qx][dy] = 0.0;
251  for (int qy = 0; qy < Q1D; ++qy)
252  {
253  QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
254  }
255  }
256  }
257  MFEM_SYNC_THREAD;
258  MFEM_FOREACH_THREAD(dy,y,D1D)
259  {
260  MFEM_FOREACH_THREAD(dx,x,D1D)
261  {
262  for (int qx = 0; qx < Q1D; ++qx)
263  {
264  // might need absolute values on next line
265  Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
266  }
267  }
268  }
269  });
270 }
271 
272 template<int T_D1D = 0, int T_Q1D = 0>
273 static void PAMassAssembleDiagonal3D(const int NE,
274  const Array<double> &b,
275  const Vector &d,
276  Vector &y,
277  const int d1d = 0,
278  const int q1d = 0)
279 {
280  const int D1D = T_D1D ? T_D1D : d1d;
281  const int Q1D = T_Q1D ? T_Q1D : q1d;
282  MFEM_VERIFY(D1D <= MAX_D1D, "");
283  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
284  auto B = Reshape(b.Read(), Q1D, D1D);
285  auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
286  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
287  MFEM_FORALL(e, NE,
288  {
289  const int D1D = T_D1D ? T_D1D : d1d;
290  const int Q1D = T_Q1D ? T_Q1D : q1d;
291  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
292  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
293  double QQD[MQ1][MQ1][MD1];
294  double QDD[MQ1][MD1][MD1];
295  for (int qx = 0; qx < Q1D; ++qx)
296  {
297  for (int qy = 0; qy < Q1D; ++qy)
298  {
299  for (int dz = 0; dz < D1D; ++dz)
300  {
301  QQD[qx][qy][dz] = 0.0;
302  for (int qz = 0; qz < Q1D; ++qz)
303  {
304  QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
305  }
306  }
307  }
308  }
309  for (int qx = 0; qx < Q1D; ++qx)
310  {
311  for (int dz = 0; dz < D1D; ++dz)
312  {
313  for (int dy = 0; dy < D1D; ++dy)
314  {
315  QDD[qx][dy][dz] = 0.0;
316  for (int qy = 0; qy < Q1D; ++qy)
317  {
318  QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
319  }
320  }
321  }
322  }
323  for (int dz = 0; dz < D1D; ++dz)
324  {
325  for (int dy = 0; dy < D1D; ++dy)
326  {
327  for (int dx = 0; dx < D1D; ++dx)
328  {
329  double t = 0.0;
330  for (int qx = 0; qx < Q1D; ++qx)
331  {
332  t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
333  }
334  Y(dx, dy, dz, e) += t;
335  }
336  }
337  }
338  });
339 }
340 
341 template<int T_D1D = 0, int T_Q1D = 0>
342 static void SmemPAMassAssembleDiagonal3D(const int NE,
343  const Array<double> &b_,
344  const Vector &d_,
345  Vector &y_,
346  const int d1d = 0,
347  const int q1d = 0)
348 {
349  const int D1D = T_D1D ? T_D1D : d1d;
350  const int Q1D = T_Q1D ? T_Q1D : q1d;
351  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
352  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
353  MFEM_VERIFY(D1D <= MD1, "");
354  MFEM_VERIFY(Q1D <= MQ1, "");
355  auto b = Reshape(b_.Read(), Q1D, D1D);
356  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
357  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
358  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
359  {
360  const int tidz = MFEM_THREAD_ID(z);
361  const int D1D = T_D1D ? T_D1D : d1d;
362  const int Q1D = T_Q1D ? T_Q1D : q1d;
363  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
364  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
365  MFEM_SHARED double B[MQ1][MD1];
366  MFEM_SHARED double QQD[MQ1][MQ1][MD1];
367  MFEM_SHARED double QDD[MQ1][MD1][MD1];
368  if (tidz == 0)
369  {
370  MFEM_FOREACH_THREAD(d,y,D1D)
371  {
372  MFEM_FOREACH_THREAD(q,x,Q1D)
373  {
374  B[q][d] = b(q,d);
375  }
376  }
377  }
378  MFEM_SYNC_THREAD;
379  MFEM_FOREACH_THREAD(qx,x,Q1D)
380  {
381  MFEM_FOREACH_THREAD(qy,y,Q1D)
382  {
383  MFEM_FOREACH_THREAD(dz,z,D1D)
384  {
385  QQD[qx][qy][dz] = 0.0;
386  for (int qz = 0; qz < Q1D; ++qz)
387  {
388  QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
389  }
390  }
391  }
392  }
393  MFEM_SYNC_THREAD;
394  MFEM_FOREACH_THREAD(qx,x,Q1D)
395  {
396  MFEM_FOREACH_THREAD(dz,z,D1D)
397  {
398  MFEM_FOREACH_THREAD(dy,y,D1D)
399  {
400  QDD[qx][dy][dz] = 0.0;
401  for (int qy = 0; qy < Q1D; ++qy)
402  {
403  QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
404  }
405  }
406  }
407  }
408  MFEM_SYNC_THREAD;
409  MFEM_FOREACH_THREAD(dz,z,D1D)
410  {
411  MFEM_FOREACH_THREAD(dy,y,D1D)
412  {
413  MFEM_FOREACH_THREAD(dx,x,D1D)
414  {
415  double t = 0.0;
416  for (int qx = 0; qx < Q1D; ++qx)
417  {
418  t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
419  }
420  Y(dx, dy, dz, e) += t;
421  }
422  }
423  }
424  });
425 }
426 
427 static void PAMassAssembleDiagonal(const int dim, const int D1D,
428  const int Q1D, const int NE,
429  const Array<double> &B,
430  const Vector &D,
431  Vector &Y)
432 {
433  if (dim == 2)
434  {
435  switch ((D1D << 4 ) | Q1D)
436  {
437  case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
438  case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
439  case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
440  case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
441  case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
442  case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
443  case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
444  case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
445  default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
446  }
447  }
448  else if (dim == 3)
449  {
450  switch ((D1D << 4 ) | Q1D)
451  {
452  case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
453  case 0x24: return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y);
454  case 0x26: return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y);
455  case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
456  case 0x35: return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y);
457  case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
458  case 0x48: return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y);
459  case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
460  case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
461  case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
462  case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
463  default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
464  }
465  }
466  MFEM_ABORT("Unknown kernel.");
467 }
468 
469 void MassIntegrator::AssembleDiagonalPA(Vector &diag)
470 {
471  if (DeviceCanUseCeed())
472  {
473  ceedOp->GetDiagonal(diag);
474  }
475  else
476  {
477  PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
478  }
479 }
480 
481 
482 #ifdef MFEM_USE_OCCA
483 // OCCA PA Mass Apply 2D kernel
484 static void OccaPAMassApply2D(const int D1D,
485  const int Q1D,
486  const int NE,
487  const Array<double> &B,
488  const Array<double> &Bt,
489  const Vector &D,
490  const Vector &X,
491  Vector &Y)
492 {
493  occa::properties props;
494  props["defines/D1D"] = D1D;
495  props["defines/Q1D"] = Q1D;
496  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
497  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
498  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
499  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
500  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
501  const occa_id_t id = std::make_pair(D1D,Q1D);
502  if (!Device::Allows(Backend::OCCA_CUDA))
503  {
504  static occa_kernel_t OccaMassApply2D_cpu;
505  if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end())
506  {
507  const occa::kernel MassApply2D_CPU =
508  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
509  "MassApply2D_CPU", props);
510  OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU);
511  }
512  OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
513  }
514  else
515  {
516  static occa_kernel_t OccaMassApply2D_gpu;
517  if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end())
518  {
519  const occa::kernel MassApply2D_GPU =
520  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
521  "MassApply2D_GPU", props);
522  OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU);
523  }
524  OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
525  }
526 }
527 
528 // OCCA PA Mass Apply 3D kernel
529 static void OccaPAMassApply3D(const int D1D,
530  const int Q1D,
531  const int NE,
532  const Array<double> &B,
533  const Array<double> &Bt,
534  const Vector &D,
535  const Vector &X,
536  Vector &Y)
537 {
538  occa::properties props;
539  props["defines/D1D"] = D1D;
540  props["defines/Q1D"] = Q1D;
541  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
542  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
543  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
544  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
545  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
546  const occa_id_t id = std::make_pair(D1D,Q1D);
547  if (!Device::Allows(Backend::OCCA_CUDA))
548  {
549  static occa_kernel_t OccaMassApply3D_cpu;
550  if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end())
551  {
552  const occa::kernel MassApply3D_CPU =
553  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
554  "MassApply3D_CPU", props);
555  OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU);
556  }
557  OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
558  }
559  else
560  {
561  static occa_kernel_t OccaMassApply3D_gpu;
562  if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end())
563  {
564  const occa::kernel MassApply3D_GPU =
565  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
566  "MassApply3D_GPU", props);
567  OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU);
568  }
569  OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
570  }
571 }
572 #endif // MFEM_USE_OCCA
573 
574 template<int T_D1D = 0, int T_Q1D = 0>
575 static void PAMassApply2D(const int NE,
576  const Array<double> &b_,
577  const Array<double> &bt_,
578  const Vector &d_,
579  const Vector &x_,
580  Vector &y_,
581  const int d1d = 0,
582  const int q1d = 0)
583 {
584  const int D1D = T_D1D ? T_D1D : d1d;
585  const int Q1D = T_Q1D ? T_Q1D : q1d;
586  MFEM_VERIFY(D1D <= MAX_D1D, "");
587  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
588  auto B = Reshape(b_.Read(), Q1D, D1D);
589  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
590  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
591  auto X = Reshape(x_.Read(), D1D, D1D, NE);
592  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
593  MFEM_FORALL(e, NE,
594  {
595  const int D1D = T_D1D ? T_D1D : d1d; // nvcc workaround
596  const int Q1D = T_Q1D ? T_Q1D : q1d;
597  // the following variables are evaluated at compile time
598  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
599  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
600  double sol_xy[max_Q1D][max_Q1D];
601  for (int qy = 0; qy < Q1D; ++qy)
602  {
603  for (int qx = 0; qx < Q1D; ++qx)
604  {
605  sol_xy[qy][qx] = 0.0;
606  }
607  }
608  for (int dy = 0; dy < D1D; ++dy)
609  {
610  double sol_x[max_Q1D];
611  for (int qy = 0; qy < Q1D; ++qy)
612  {
613  sol_x[qy] = 0.0;
614  }
615  for (int dx = 0; dx < D1D; ++dx)
616  {
617  const double s = X(dx,dy,e);
618  for (int qx = 0; qx < Q1D; ++qx)
619  {
620  sol_x[qx] += B(qx,dx)* s;
621  }
622  }
623  for (int qy = 0; qy < Q1D; ++qy)
624  {
625  const double d2q = B(qy,dy);
626  for (int qx = 0; qx < Q1D; ++qx)
627  {
628  sol_xy[qy][qx] += d2q * sol_x[qx];
629  }
630  }
631  }
632  for (int qy = 0; qy < Q1D; ++qy)
633  {
634  for (int qx = 0; qx < Q1D; ++qx)
635  {
636  sol_xy[qy][qx] *= D(qx,qy,e);
637  }
638  }
639  for (int qy = 0; qy < Q1D; ++qy)
640  {
641  double sol_x[max_D1D];
642  for (int dx = 0; dx < D1D; ++dx)
643  {
644  sol_x[dx] = 0.0;
645  }
646  for (int qx = 0; qx < Q1D; ++qx)
647  {
648  const double s = sol_xy[qy][qx];
649  for (int dx = 0; dx < D1D; ++dx)
650  {
651  sol_x[dx] += Bt(dx,qx) * s;
652  }
653  }
654  for (int dy = 0; dy < D1D; ++dy)
655  {
656  const double q2d = Bt(dy,qy);
657  for (int dx = 0; dx < D1D; ++dx)
658  {
659  Y(dx,dy,e) += q2d * sol_x[dx];
660  }
661  }
662  }
663  });
664 }
665 
666 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
667 static void SmemPAMassApply2D(const int NE,
668  const Array<double> &b_,
669  const Array<double> &bt_,
670  const Vector &d_,
671  const Vector &x_,
672  Vector &y_,
673  const int d1d = 0,
674  const int q1d = 0)
675 {
676  MFEM_CONTRACT_VAR(bt_);
677  const int D1D = T_D1D ? T_D1D : d1d;
678  const int Q1D = T_Q1D ? T_Q1D : q1d;
679  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
680  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
681  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
682  MFEM_VERIFY(D1D <= MD1, "");
683  MFEM_VERIFY(Q1D <= MQ1, "");
684  auto b = Reshape(b_.Read(), Q1D, D1D);
685  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
686  auto x = Reshape(x_.Read(), D1D, D1D, NE);
687  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
688  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
689  {
690  const int tidz = MFEM_THREAD_ID(z);
691  const int D1D = T_D1D ? T_D1D : d1d;
692  const int Q1D = T_Q1D ? T_Q1D : q1d;
693  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
694  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
695  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
696  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
697  MFEM_SHARED double BBt[MQ1*MD1];
698  double (*B)[MD1] = (double (*)[MD1]) BBt;
699  double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
700  MFEM_SHARED double sm0[NBZ][MDQ*MDQ];
701  MFEM_SHARED double sm1[NBZ][MDQ*MDQ];
702  double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
703  double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
704  double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
705  double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
706  MFEM_FOREACH_THREAD(dy,y,D1D)
707  {
708  MFEM_FOREACH_THREAD(dx,x,D1D)
709  {
710  X[dy][dx] = x(dx,dy,e);
711  }
712  }
713  if (tidz == 0)
714  {
715  MFEM_FOREACH_THREAD(dy,y,D1D)
716  {
717  MFEM_FOREACH_THREAD(q,x,Q1D)
718  {
719  B[q][dy] = b(q,dy);
720  }
721  }
722  }
723  MFEM_SYNC_THREAD;
724  MFEM_FOREACH_THREAD(dy,y,D1D)
725  {
726  MFEM_FOREACH_THREAD(qx,x,Q1D)
727  {
728  double dq = 0.0;
729  for (int dx = 0; dx < D1D; ++dx)
730  {
731  dq += X[dy][dx] * B[qx][dx];
732  }
733  DQ[dy][qx] = dq;
734  }
735  }
736  MFEM_SYNC_THREAD;
737  MFEM_FOREACH_THREAD(qy,y,Q1D)
738  {
739  MFEM_FOREACH_THREAD(qx,x,Q1D)
740  {
741  double qq = 0.0;
742  for (int dy = 0; dy < D1D; ++dy)
743  {
744  qq += DQ[dy][qx] * B[qy][dy];
745  }
746  QQ[qy][qx] = qq * D(qx, qy, e);
747  }
748  }
749  MFEM_SYNC_THREAD;
750  if (tidz == 0)
751  {
752  MFEM_FOREACH_THREAD(dy,y,D1D)
753  {
754  MFEM_FOREACH_THREAD(q,x,Q1D)
755  {
756  Bt[dy][q] = b(q,dy);
757  }
758  }
759  }
760  MFEM_SYNC_THREAD;
761  MFEM_FOREACH_THREAD(qy,y,Q1D)
762  {
763  MFEM_FOREACH_THREAD(dx,x,D1D)
764  {
765  double dq = 0.0;
766  for (int qx = 0; qx < Q1D; ++qx)
767  {
768  dq += QQ[qy][qx] * Bt[dx][qx];
769  }
770  QD[qy][dx] = dq;
771  }
772  }
773  MFEM_SYNC_THREAD;
774  MFEM_FOREACH_THREAD(dy,y,D1D)
775  {
776  MFEM_FOREACH_THREAD(dx,x,D1D)
777  {
778  double dd = 0.0;
779  for (int qy = 0; qy < Q1D; ++qy)
780  {
781  dd += (QD[qy][dx] * Bt[dy][qy]);
782  }
783  Y(dx, dy, e) += dd;
784  }
785  }
786  });
787 }
788 
789 template<int T_D1D = 0, int T_Q1D = 0>
790 static void PAMassApply3D(const int NE,
791  const Array<double> &b_,
792  const Array<double> &bt_,
793  const Vector &d_,
794  const Vector &x_,
795  Vector &y_,
796  const int d1d = 0,
797  const int q1d = 0)
798 {
799  const int D1D = T_D1D ? T_D1D : d1d;
800  const int Q1D = T_Q1D ? T_Q1D : q1d;
801  MFEM_VERIFY(D1D <= MAX_D1D, "");
802  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
803  auto B = Reshape(b_.Read(), Q1D, D1D);
804  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
805  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
806  auto X = Reshape(x_.Read(), D1D, D1D, D1D, NE);
807  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
808  MFEM_FORALL(e, NE,
809  {
810  const int D1D = T_D1D ? T_D1D : d1d;
811  const int Q1D = T_Q1D ? T_Q1D : q1d;
812  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
813  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
814  double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
815  for (int qz = 0; qz < Q1D; ++qz)
816  {
817  for (int qy = 0; qy < Q1D; ++qy)
818  {
819  for (int qx = 0; qx < Q1D; ++qx)
820  {
821  sol_xyz[qz][qy][qx] = 0.0;
822  }
823  }
824  }
825  for (int dz = 0; dz < D1D; ++dz)
826  {
827  double sol_xy[max_Q1D][max_Q1D];
828  for (int qy = 0; qy < Q1D; ++qy)
829  {
830  for (int qx = 0; qx < Q1D; ++qx)
831  {
832  sol_xy[qy][qx] = 0.0;
833  }
834  }
835  for (int dy = 0; dy < D1D; ++dy)
836  {
837  double sol_x[max_Q1D];
838  for (int qx = 0; qx < Q1D; ++qx)
839  {
840  sol_x[qx] = 0;
841  }
842  for (int dx = 0; dx < D1D; ++dx)
843  {
844  const double s = X(dx,dy,dz,e);
845  for (int qx = 0; qx < Q1D; ++qx)
846  {
847  sol_x[qx] += B(qx,dx) * s;
848  }
849  }
850  for (int qy = 0; qy < Q1D; ++qy)
851  {
852  const double wy = B(qy,dy);
853  for (int qx = 0; qx < Q1D; ++qx)
854  {
855  sol_xy[qy][qx] += wy * sol_x[qx];
856  }
857  }
858  }
859  for (int qz = 0; qz < Q1D; ++qz)
860  {
861  const double wz = B(qz,dz);
862  for (int qy = 0; qy < Q1D; ++qy)
863  {
864  for (int qx = 0; qx < Q1D; ++qx)
865  {
866  sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
867  }
868  }
869  }
870  }
871  for (int qz = 0; qz < Q1D; ++qz)
872  {
873  for (int qy = 0; qy < Q1D; ++qy)
874  {
875  for (int qx = 0; qx < Q1D; ++qx)
876  {
877  sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
878  }
879  }
880  }
881  for (int qz = 0; qz < Q1D; ++qz)
882  {
883  double sol_xy[max_D1D][max_D1D];
884  for (int dy = 0; dy < D1D; ++dy)
885  {
886  for (int dx = 0; dx < D1D; ++dx)
887  {
888  sol_xy[dy][dx] = 0;
889  }
890  }
891  for (int qy = 0; qy < Q1D; ++qy)
892  {
893  double sol_x[max_D1D];
894  for (int dx = 0; dx < D1D; ++dx)
895  {
896  sol_x[dx] = 0;
897  }
898  for (int qx = 0; qx < Q1D; ++qx)
899  {
900  const double s = sol_xyz[qz][qy][qx];
901  for (int dx = 0; dx < D1D; ++dx)
902  {
903  sol_x[dx] += Bt(dx,qx) * s;
904  }
905  }
906  for (int dy = 0; dy < D1D; ++dy)
907  {
908  const double wy = Bt(dy,qy);
909  for (int dx = 0; dx < D1D; ++dx)
910  {
911  sol_xy[dy][dx] += wy * sol_x[dx];
912  }
913  }
914  }
915  for (int dz = 0; dz < D1D; ++dz)
916  {
917  const double wz = Bt(dz,qz);
918  for (int dy = 0; dy < D1D; ++dy)
919  {
920  for (int dx = 0; dx < D1D; ++dx)
921  {
922  Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
923  }
924  }
925  }
926  }
927  });
928 }
929 
930 template<int T_D1D = 0, int T_Q1D = 0>
931 static void SmemPAMassApply3D(const int NE,
932  const Array<double> &b_,
933  const Array<double> &bt_,
934  const Vector &d_,
935  const Vector &x_,
936  Vector &y_,
937  const int d1d = 0,
938  const int q1d = 0)
939 {
940  MFEM_CONTRACT_VAR(bt_);
941  const int D1D = T_D1D ? T_D1D : d1d;
942  const int Q1D = T_Q1D ? T_Q1D : q1d;
943  constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D;
944  constexpr int M1D = T_D1D ? T_D1D : MAX_D1D;
945  MFEM_VERIFY(D1D <= M1D, "");
946  MFEM_VERIFY(Q1D <= M1Q, "");
947  auto b = Reshape(b_.Read(), Q1D, D1D);
948  auto d = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
949  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
950  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
951  MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
952  {
953  const int D1D = T_D1D ? T_D1D : d1d;
954  const int Q1D = T_Q1D ? T_Q1D : q1d;
955  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
956  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
957  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
958  MFEM_SHARED double sDQ[MQ1*MD1];
959  double (*B)[MD1] = (double (*)[MD1]) sDQ;
960  double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
961  MFEM_SHARED double sm0[MDQ*MDQ*MDQ];
962  MFEM_SHARED double sm1[MDQ*MDQ*MDQ];
963  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
964  double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
965  double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
966  double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
967  double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
968  double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
969  MFEM_FOREACH_THREAD(dy,y,D1D)
970  {
971  MFEM_FOREACH_THREAD(dx,x,D1D)
972  {
973  MFEM_UNROLL(MD1)
974  for (int dz = 0; dz < D1D; ++dz)
975  {
976  X[dz][dy][dx] = x(dx,dy,dz,e);
977  }
978  }
979  MFEM_FOREACH_THREAD(dx,x,Q1D)
980  {
981  B[dx][dy] = b(dx,dy);
982  }
983  }
984  MFEM_SYNC_THREAD;
985  MFEM_FOREACH_THREAD(dy,y,D1D)
986  {
987  MFEM_FOREACH_THREAD(qx,x,Q1D)
988  {
989  double u[D1D];
990  MFEM_UNROLL(MD1)
991  for (int dz = 0; dz < D1D; dz++)
992  {
993  u[dz] = 0;
994  }
995  MFEM_UNROLL(MD1)
996  for (int dx = 0; dx < D1D; ++dx)
997  {
998  MFEM_UNROLL(MD1)
999  for (int dz = 0; dz < D1D; ++dz)
1000  {
1001  u[dz] += X[dz][dy][dx] * B[qx][dx];
1002  }
1003  }
1004  MFEM_UNROLL(MD1)
1005  for (int dz = 0; dz < D1D; ++dz)
1006  {
1007  DDQ[dz][dy][qx] = u[dz];
1008  }
1009  }
1010  }
1011  MFEM_SYNC_THREAD;
1012  MFEM_FOREACH_THREAD(qy,y,Q1D)
1013  {
1014  MFEM_FOREACH_THREAD(qx,x,Q1D)
1015  {
1016  double u[D1D];
1017  MFEM_UNROLL(MD1)
1018  for (int dz = 0; dz < D1D; dz++)
1019  {
1020  u[dz] = 0;
1021  }
1022  MFEM_UNROLL(MD1)
1023  for (int dy = 0; dy < D1D; ++dy)
1024  {
1025  MFEM_UNROLL(MD1)
1026  for (int dz = 0; dz < D1D; dz++)
1027  {
1028  u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
1029  }
1030  }
1031  MFEM_UNROLL(MD1)
1032  for (int dz = 0; dz < D1D; dz++)
1033  {
1034  DQQ[dz][qy][qx] = u[dz];
1035  }
1036  }
1037  }
1038  MFEM_SYNC_THREAD;
1039  MFEM_FOREACH_THREAD(qy,y,Q1D)
1040  {
1041  MFEM_FOREACH_THREAD(qx,x,Q1D)
1042  {
1043  double u[Q1D];
1044  MFEM_UNROLL(MQ1)
1045  for (int qz = 0; qz < Q1D; qz++)
1046  {
1047  u[qz] = 0;
1048  }
1049  MFEM_UNROLL(MD1)
1050  for (int dz = 0; dz < D1D; ++dz)
1051  {
1052  MFEM_UNROLL(MQ1)
1053  for (int qz = 0; qz < Q1D; qz++)
1054  {
1055  u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1056  }
1057  }
1058  MFEM_UNROLL(MQ1)
1059  for (int qz = 0; qz < Q1D; qz++)
1060  {
1061  QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1062  }
1063  }
1064  }
1065  MFEM_SYNC_THREAD;
1066  MFEM_FOREACH_THREAD(d,y,D1D)
1067  {
1068  MFEM_FOREACH_THREAD(q,x,Q1D)
1069  {
1070  Bt[d][q] = b(q,d);
1071  }
1072  }
1073  MFEM_SYNC_THREAD;
1074  MFEM_FOREACH_THREAD(qy,y,Q1D)
1075  {
1076  MFEM_FOREACH_THREAD(dx,x,D1D)
1077  {
1078  double u[Q1D];
1079  MFEM_UNROLL(MQ1)
1080  for (int qz = 0; qz < Q1D; ++qz)
1081  {
1082  u[qz] = 0;
1083  }
1084  MFEM_UNROLL(MQ1)
1085  for (int qx = 0; qx < Q1D; ++qx)
1086  {
1087  MFEM_UNROLL(MQ1)
1088  for (int qz = 0; qz < Q1D; ++qz)
1089  {
1090  u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1091  }
1092  }
1093  MFEM_UNROLL(MQ1)
1094  for (int qz = 0; qz < Q1D; ++qz)
1095  {
1096  QQD[qz][qy][dx] = u[qz];
1097  }
1098  }
1099  }
1100  MFEM_SYNC_THREAD;
1101  MFEM_FOREACH_THREAD(dy,y,D1D)
1102  {
1103  MFEM_FOREACH_THREAD(dx,x,D1D)
1104  {
1105  double u[Q1D];
1106  MFEM_UNROLL(MQ1)
1107  for (int qz = 0; qz < Q1D; ++qz)
1108  {
1109  u[qz] = 0;
1110  }
1111  MFEM_UNROLL(MQ1)
1112  for (int qy = 0; qy < Q1D; ++qy)
1113  {
1114  MFEM_UNROLL(MQ1)
1115  for (int qz = 0; qz < Q1D; ++qz)
1116  {
1117  u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1118  }
1119  }
1120  MFEM_UNROLL(MQ1)
1121  for (int qz = 0; qz < Q1D; ++qz)
1122  {
1123  QDD[qz][dy][dx] = u[qz];
1124  }
1125  }
1126  }
1127  MFEM_SYNC_THREAD;
1128  MFEM_FOREACH_THREAD(dy,y,D1D)
1129  {
1130  MFEM_FOREACH_THREAD(dx,x,D1D)
1131  {
1132  double u[D1D];
1133  MFEM_UNROLL(MD1)
1134  for (int dz = 0; dz < D1D; ++dz)
1135  {
1136  u[dz] = 0;
1137  }
1138  MFEM_UNROLL(MQ1)
1139  for (int qz = 0; qz < Q1D; ++qz)
1140  {
1141  MFEM_UNROLL(MD1)
1142  for (int dz = 0; dz < D1D; ++dz)
1143  {
1144  u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1145  }
1146  }
1147  MFEM_UNROLL(MD1)
1148  for (int dz = 0; dz < D1D; ++dz)
1149  {
1150  y(dx,dy,dz,e) += u[dz];
1151  }
1152  }
1153  }
1154  });
1155 }
1156 
1157 static void PAMassApply(const int dim,
1158  const int D1D,
1159  const int Q1D,
1160  const int NE,
1161  const Array<double> &B,
1162  const Array<double> &Bt,
1163  const Vector &D,
1164  const Vector &X,
1165  Vector &Y)
1166 {
1167 #ifdef MFEM_USE_OCCA
1168  if (DeviceCanUseOcca())
1169  {
1170  if (dim == 2)
1171  {
1172  return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1173  }
1174  if (dim == 3)
1175  {
1176  return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1177  }
1178  MFEM_ABORT("OCCA PA Mass Apply unknown kernel!");
1179  }
1180 #endif // MFEM_USE_OCCA
1181  const int id = (D1D << 4) | Q1D;
1182 
1183  if (dim == 2)
1184  {
1185  switch (id)
1186  {
1187  case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1188  case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1189  case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1190  case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1191  case 0x35: return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y);
1192  case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1193  case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1194  case 0x46: return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y);
1195  case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1196  case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1197  case 0x57: return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y);
1198  case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1199  case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1200  case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1201  case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1202  case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1203  default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1204  }
1205  }
1206  else if (dim == 3)
1207  {
1208  switch (id)
1209  {
1210  case 0x22: return SmemPAMassApply3D<2,2>(NE,B,Bt,D,X,Y);
1211  case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1212  case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1213  case 0x26: return SmemPAMassApply3D<2,6>(NE,B,Bt,D,X,Y);
1214  case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1215  case 0x35: return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y);
1216  case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1217  case 0x37: return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y);
1218  case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1219  case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1220  case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1221  case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1222  case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1223  case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1224  case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1225  case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1226  case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1227  default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1228  }
1229  }
1230  mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl;
1231  MFEM_ABORT("Unknown kernel.");
1232 }
1233 
1234 void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const
1235 {
1236  if (DeviceCanUseCeed())
1237  {
1238  ceedOp->AddMult(x, y);
1239  }
1240  else
1241  {
1242  PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
1243  }
1244 }
1245 
1246 void MassIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
1247 {
1248  // Mass integrator is symmetric
1249  AddMultPA(x, y);
1250 }
1251 
1252 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe_base.hpp:235
int Size() const
Return the logical size of the array.
Definition: array.hpp:138
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:117
A coefficient that is constant across space and time.
Definition: coefficient.hpp:78
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:840
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
Definition: fespace.hpp:976
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:521
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:450
int Size() const
Returns the size of the vector.
Definition: vector.hpp:199
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe_base.hpp:170
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:928
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
int GetMapType() const
Returns the FiniteElement::MapType of the element describing how reference functions are mapped to ph...
Definition: fe_base.hpp:349
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:234
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Definition: intrules.hpp:250
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
const int MAX_Q1D
Definition: forall.hpp:29
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:433
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
Definition: gridfunc.hpp:798
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
int Dimension() const
Definition: mesh.hpp:999
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:623
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:88
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:366
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:454
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
Definition: mesh.cpp:348
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:28
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2783
RefCoord t[3]
Vector data type.
Definition: vector.hpp:60
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
Definition: vector.hpp:585
std::pair< int, int > occa_id_t
Definition: occa.hpp:78
RefCoord s[3]
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
Class representing a function through its values (scalar or vector) at quadrature points...
Definition: gridfunc.hpp:757
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:438