MFEM  v4.5.2
Finite element discretization library
bilininteg_mass_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "qfunction.hpp"
17 #include "bilininteg_mass_pa.hpp"
18 
19 using namespace std;
20 
21 namespace mfem
22 {
23 
24 // PA Mass Integrator
25 
26 // PA Mass Assemble kernel
27 
28 void MassIntegrator::AssemblePA(const FiniteElementSpace &fes)
29 {
30  const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
31  Device::GetDeviceMemoryType() : pa_mt;
32 
33  // Assuming the same element type
34  fespace = &fes;
35  Mesh *mesh = fes.GetMesh();
36  if (mesh->GetNE() == 0) { return; }
37  const FiniteElement &el = *fes.GetFE(0);
39  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0);
40  if (DeviceCanUseCeed())
41  {
42  delete ceedOp;
43  const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 ||
44  fes.IsVariableOrder();
45  if (mixed)
46  {
47  ceedOp = new ceed::MixedPAMassIntegrator(*this, fes, Q);
48  }
49  else
50  {
51  ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q);
52  }
53  return;
54  }
55  int map_type = el.GetMapType();
56  dim = mesh->Dimension();
57  ne = fes.GetMesh()->GetNE();
58  nq = ir->GetNPoints();
59  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::DETERMINANTS, mt);
60  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
61  dofs1D = maps->ndof;
62  quad1D = maps->nqpt;
63  pa_data.SetSize(ne*nq, mt);
64 
65  QuadratureSpace qs(*mesh, *ir);
66  CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED);
67 
68  if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); }
69  if (dim==2)
70  {
71  const int NE = ne;
72  const int Q1D = quad1D;
73  const bool const_c = coeff.Size() == 1;
74  const bool by_val = map_type == FiniteElement::VALUE;
75  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D);
76  const auto J = Reshape(geom->detJ.Read(), Q1D,Q1D,NE);
77  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) :
78  Reshape(coeff.Read(), Q1D,Q1D,NE);
79  auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE);
80  MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
81  {
82  MFEM_FOREACH_THREAD(qx,x,Q1D)
83  {
84  MFEM_FOREACH_THREAD(qy,y,Q1D)
85  {
86  const double detJ = J(qx,qy,e);
87  const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
88  v(qx,qy,e) = W(qx,qy) * coeff * (by_val ? detJ : 1.0/detJ);
89  }
90  }
91  });
92  }
93  if (dim==3)
94  {
95  const int NE = ne;
96  const int Q1D = quad1D;
97  const bool const_c = coeff.Size() == 1;
98  const bool by_val = map_type == FiniteElement::VALUE;
99  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D,Q1D);
100  const auto J = Reshape(geom->detJ.Read(), Q1D,Q1D,Q1D,NE);
101  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1,1) :
102  Reshape(coeff.Read(), Q1D,Q1D,Q1D,NE);
103  auto v = Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
104  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
105  {
106  MFEM_FOREACH_THREAD(qx,x,Q1D)
107  {
108  MFEM_FOREACH_THREAD(qy,y,Q1D)
109  {
110  MFEM_FOREACH_THREAD(qz,z,Q1D)
111  {
112  const double detJ = J(qx,qy,qz,e);
113  const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
114  v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * (by_val ? detJ : 1.0/detJ);
115  }
116  }
117  }
118  });
119  }
120 }
121 
122 template<int T_D1D = 0, int T_Q1D = 0>
123 static void PAMassAssembleDiagonal2D(const int NE,
124  const Array<double> &b,
125  const Vector &d,
126  Vector &y,
127  const int d1d = 0,
128  const int q1d = 0)
129 {
130  const int D1D = T_D1D ? T_D1D : d1d;
131  const int Q1D = T_Q1D ? T_Q1D : q1d;
132  MFEM_VERIFY(D1D <= MAX_D1D, "");
133  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
134  auto B = Reshape(b.Read(), Q1D, D1D);
135  auto D = Reshape(d.Read(), Q1D, Q1D, NE);
136  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
137  MFEM_FORALL(e, NE,
138  {
139  const int D1D = T_D1D ? T_D1D : d1d;
140  const int Q1D = T_Q1D ? T_Q1D : q1d;
141  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
142  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
143  double QD[MQ1][MD1];
144  for (int qx = 0; qx < Q1D; ++qx)
145  {
146  for (int dy = 0; dy < D1D; ++dy)
147  {
148  QD[qx][dy] = 0.0;
149  for (int qy = 0; qy < Q1D; ++qy)
150  {
151  QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
152  }
153  }
154  }
155  for (int dy = 0; dy < D1D; ++dy)
156  {
157  for (int dx = 0; dx < D1D; ++dx)
158  {
159  for (int qx = 0; qx < Q1D; ++qx)
160  {
161  Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
162  }
163  }
164  }
165  });
166 }
167 
168 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
169 static void SmemPAMassAssembleDiagonal2D(const int NE,
170  const Array<double> &b_,
171  const Vector &d_,
172  Vector &y_,
173  const int d1d = 0,
174  const int q1d = 0)
175 {
176  const int D1D = T_D1D ? T_D1D : d1d;
177  const int Q1D = T_Q1D ? T_Q1D : q1d;
178  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
179  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
180  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
181  MFEM_VERIFY(D1D <= MD1, "");
182  MFEM_VERIFY(Q1D <= MQ1, "");
183  auto b = Reshape(b_.Read(), Q1D, D1D);
184  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
185  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
186  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
187  {
188  const int tidz = MFEM_THREAD_ID(z);
189  const int D1D = T_D1D ? T_D1D : d1d;
190  const int Q1D = T_Q1D ? T_Q1D : q1d;
191  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
192  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
193  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
194  MFEM_SHARED double B[MQ1][MD1];
195  MFEM_SHARED double QDZ[NBZ][MQ1][MD1];
196  double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
197  if (tidz == 0)
198  {
199  MFEM_FOREACH_THREAD(d,y,D1D)
200  {
201  MFEM_FOREACH_THREAD(q,x,Q1D)
202  {
203  B[q][d] = b(q,d);
204  }
205  }
206  }
207  MFEM_SYNC_THREAD;
208  MFEM_FOREACH_THREAD(qx,x,Q1D)
209  {
210  MFEM_FOREACH_THREAD(dy,y,D1D)
211  {
212  QD[qx][dy] = 0.0;
213  for (int qy = 0; qy < Q1D; ++qy)
214  {
215  QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
216  }
217  }
218  }
219  MFEM_SYNC_THREAD;
220  MFEM_FOREACH_THREAD(dy,y,D1D)
221  {
222  MFEM_FOREACH_THREAD(dx,x,D1D)
223  {
224  for (int qx = 0; qx < Q1D; ++qx)
225  {
226  // might need absolute values on next line
227  Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
228  }
229  }
230  }
231  });
232 }
233 
234 template<int T_D1D = 0, int T_Q1D = 0>
235 static void PAMassAssembleDiagonal3D(const int NE,
236  const Array<double> &b,
237  const Vector &d,
238  Vector &y,
239  const int d1d = 0,
240  const int q1d = 0)
241 {
242  const int D1D = T_D1D ? T_D1D : d1d;
243  const int Q1D = T_Q1D ? T_Q1D : q1d;
244  MFEM_VERIFY(D1D <= MAX_D1D, "");
245  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
246  auto B = Reshape(b.Read(), Q1D, D1D);
247  auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
248  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
249  MFEM_FORALL(e, NE,
250  {
251  const int D1D = T_D1D ? T_D1D : d1d;
252  const int Q1D = T_Q1D ? T_Q1D : q1d;
253  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
254  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
255  double QQD[MQ1][MQ1][MD1];
256  double QDD[MQ1][MD1][MD1];
257  for (int qx = 0; qx < Q1D; ++qx)
258  {
259  for (int qy = 0; qy < Q1D; ++qy)
260  {
261  for (int dz = 0; dz < D1D; ++dz)
262  {
263  QQD[qx][qy][dz] = 0.0;
264  for (int qz = 0; qz < Q1D; ++qz)
265  {
266  QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
267  }
268  }
269  }
270  }
271  for (int qx = 0; qx < Q1D; ++qx)
272  {
273  for (int dz = 0; dz < D1D; ++dz)
274  {
275  for (int dy = 0; dy < D1D; ++dy)
276  {
277  QDD[qx][dy][dz] = 0.0;
278  for (int qy = 0; qy < Q1D; ++qy)
279  {
280  QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
281  }
282  }
283  }
284  }
285  for (int dz = 0; dz < D1D; ++dz)
286  {
287  for (int dy = 0; dy < D1D; ++dy)
288  {
289  for (int dx = 0; dx < D1D; ++dx)
290  {
291  double t = 0.0;
292  for (int qx = 0; qx < Q1D; ++qx)
293  {
294  t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
295  }
296  Y(dx, dy, dz, e) += t;
297  }
298  }
299  }
300  });
301 }
302 
303 template<int T_D1D = 0, int T_Q1D = 0>
304 static void SmemPAMassAssembleDiagonal3D(const int NE,
305  const Array<double> &b_,
306  const Vector &d_,
307  Vector &y_,
308  const int d1d = 0,
309  const int q1d = 0)
310 {
311  const int D1D = T_D1D ? T_D1D : d1d;
312  const int Q1D = T_Q1D ? T_Q1D : q1d;
313  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
314  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
315  MFEM_VERIFY(D1D <= MD1, "");
316  MFEM_VERIFY(Q1D <= MQ1, "");
317  auto b = Reshape(b_.Read(), Q1D, D1D);
318  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
319  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
320  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
321  {
322  const int tidz = MFEM_THREAD_ID(z);
323  const int D1D = T_D1D ? T_D1D : d1d;
324  const int Q1D = T_Q1D ? T_Q1D : q1d;
325  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
326  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
327  MFEM_SHARED double B[MQ1][MD1];
328  MFEM_SHARED double QQD[MQ1][MQ1][MD1];
329  MFEM_SHARED double QDD[MQ1][MD1][MD1];
330  if (tidz == 0)
331  {
332  MFEM_FOREACH_THREAD(d,y,D1D)
333  {
334  MFEM_FOREACH_THREAD(q,x,Q1D)
335  {
336  B[q][d] = b(q,d);
337  }
338  }
339  }
340  MFEM_SYNC_THREAD;
341  MFEM_FOREACH_THREAD(qx,x,Q1D)
342  {
343  MFEM_FOREACH_THREAD(qy,y,Q1D)
344  {
345  MFEM_FOREACH_THREAD(dz,z,D1D)
346  {
347  QQD[qx][qy][dz] = 0.0;
348  for (int qz = 0; qz < Q1D; ++qz)
349  {
350  QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
351  }
352  }
353  }
354  }
355  MFEM_SYNC_THREAD;
356  MFEM_FOREACH_THREAD(qx,x,Q1D)
357  {
358  MFEM_FOREACH_THREAD(dz,z,D1D)
359  {
360  MFEM_FOREACH_THREAD(dy,y,D1D)
361  {
362  QDD[qx][dy][dz] = 0.0;
363  for (int qy = 0; qy < Q1D; ++qy)
364  {
365  QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
366  }
367  }
368  }
369  }
370  MFEM_SYNC_THREAD;
371  MFEM_FOREACH_THREAD(dz,z,D1D)
372  {
373  MFEM_FOREACH_THREAD(dy,y,D1D)
374  {
375  MFEM_FOREACH_THREAD(dx,x,D1D)
376  {
377  double t = 0.0;
378  for (int qx = 0; qx < Q1D; ++qx)
379  {
380  t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
381  }
382  Y(dx, dy, dz, e) += t;
383  }
384  }
385  }
386  });
387 }
388 
389 static void PAMassAssembleDiagonal(const int dim, const int D1D,
390  const int Q1D, const int NE,
391  const Array<double> &B,
392  const Vector &D,
393  Vector &Y)
394 {
395  if (dim == 2)
396  {
397  switch ((D1D << 4 ) | Q1D)
398  {
399  case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
400  case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
401  case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
402  case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
403  case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
404  case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
405  case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
406  case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
407  default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
408  }
409  }
410  else if (dim == 3)
411  {
412  switch ((D1D << 4 ) | Q1D)
413  {
414  case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
415  case 0x24: return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y);
416  case 0x26: return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y);
417  case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
418  case 0x35: return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y);
419  case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
420  case 0x48: return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y);
421  case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
422  case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
423  case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
424  case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
425  default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
426  }
427  }
428  MFEM_ABORT("Unknown kernel.");
429 }
430 
431 void MassIntegrator::AssembleDiagonalPA(Vector &diag)
432 {
433  if (DeviceCanUseCeed())
434  {
435  ceedOp->GetDiagonal(diag);
436  }
437  else
438  {
439  PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
440  }
441 }
442 
443 
444 #ifdef MFEM_USE_OCCA
445 // OCCA PA Mass Apply 2D kernel
446 static void OccaPAMassApply2D(const int D1D,
447  const int Q1D,
448  const int NE,
449  const Array<double> &B,
450  const Array<double> &Bt,
451  const Vector &D,
452  const Vector &X,
453  Vector &Y)
454 {
455  occa::properties props;
456  props["defines/D1D"] = D1D;
457  props["defines/Q1D"] = Q1D;
458  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
459  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
460  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
461  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
462  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
463  const occa_id_t id = std::make_pair(D1D,Q1D);
464  if (!Device::Allows(Backend::OCCA_CUDA))
465  {
466  static occa_kernel_t OccaMassApply2D_cpu;
467  if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end())
468  {
469  const occa::kernel MassApply2D_CPU =
470  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
471  "MassApply2D_CPU", props);
472  OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU);
473  }
474  OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
475  }
476  else
477  {
478  static occa_kernel_t OccaMassApply2D_gpu;
479  if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end())
480  {
481  const occa::kernel MassApply2D_GPU =
482  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
483  "MassApply2D_GPU", props);
484  OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU);
485  }
486  OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
487  }
488 }
489 
490 // OCCA PA Mass Apply 3D kernel
491 static void OccaPAMassApply3D(const int D1D,
492  const int Q1D,
493  const int NE,
494  const Array<double> &B,
495  const Array<double> &Bt,
496  const Vector &D,
497  const Vector &X,
498  Vector &Y)
499 {
500  occa::properties props;
501  props["defines/D1D"] = D1D;
502  props["defines/Q1D"] = Q1D;
503  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
504  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
505  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
506  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
507  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
508  const occa_id_t id = std::make_pair(D1D,Q1D);
509  if (!Device::Allows(Backend::OCCA_CUDA))
510  {
511  static occa_kernel_t OccaMassApply3D_cpu;
512  if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end())
513  {
514  const occa::kernel MassApply3D_CPU =
515  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
516  "MassApply3D_CPU", props);
517  OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU);
518  }
519  OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
520  }
521  else
522  {
523  static occa_kernel_t OccaMassApply3D_gpu;
524  if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end())
525  {
526  const occa::kernel MassApply3D_GPU =
527  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
528  "MassApply3D_GPU", props);
529  OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU);
530  }
531  OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
532  }
533 }
534 #endif // MFEM_USE_OCCA
535 
536 template<int T_D1D = 0, int T_Q1D = 0>
537 static void PAMassApply2D(const int NE,
538  const Array<double> &b_,
539  const Array<double> &bt_,
540  const Vector &d_,
541  const Vector &x_,
542  Vector &y_,
543  const int d1d = 0,
544  const int q1d = 0)
545 {
546  MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, "");
547  MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, "");
548 
549  const auto B = b_.Read();
550  const auto Bt = bt_.Read();
551  const auto D = d_.Read();
552  const auto X = x_.Read();
553  auto Y = y_.ReadWrite();
554 
555  MFEM_FORALL(e, NE,
556  {
557  internal::PAMassApply2D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d);
558  });
559 }
560 
561 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
562 static void SmemPAMassApply2D(const int NE,
563  const Array<double> &b_,
564  const Array<double> &bt_,
565  const Vector &d_,
566  const Vector &x_,
567  Vector &y_,
568  const int d1d = 0,
569  const int q1d = 0)
570 {
571  MFEM_CONTRACT_VAR(bt_);
572  const int D1D = T_D1D ? T_D1D : d1d;
573  const int Q1D = T_Q1D ? T_Q1D : q1d;
574  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
575  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
576  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
577  MFEM_VERIFY(D1D <= MD1, "");
578  MFEM_VERIFY(Q1D <= MQ1, "");
579  const auto b = b_.Read();
580  const auto D = d_.Read();
581  const auto x = x_.Read();
582  auto Y = y_.ReadWrite();
583  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
584  {
585  internal::SmemPAMassApply2D_Element<T_D1D,T_Q1D,T_NBZ>(e, NE, b, D, x, Y, d1d, q1d);
586  });
587 }
588 
589 template<int T_D1D = 0, int T_Q1D = 0>
590 static void PAMassApply3D(const int NE,
591  const Array<double> &b_,
592  const Array<double> &bt_,
593  const Vector &d_,
594  const Vector &x_,
595  Vector &y_,
596  const int d1d = 0,
597  const int q1d = 0)
598 {
599  MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, "");
600  MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, "");
601 
602  const auto B = b_.Read();
603  const auto Bt = bt_.Read();
604  const auto D = d_.Read();
605  const auto X = x_.Read();
606  auto Y = y_.ReadWrite();
607 
608  MFEM_FORALL(e, NE,
609  {
610  internal::PAMassApply3D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d);
611  });
612 }
613 
614 template<int T_D1D = 0, int T_Q1D = 0>
615 static void SmemPAMassApply3D(const int NE,
616  const Array<double> &b_,
617  const Array<double> &bt_,
618  const Vector &d_,
619  const Vector &x_,
620  Vector &y_,
621  const int d1d = 0,
622  const int q1d = 0)
623 {
624  MFEM_CONTRACT_VAR(bt_);
625  const int D1D = T_D1D ? T_D1D : d1d;
626  const int Q1D = T_Q1D ? T_Q1D : q1d;
627  constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D;
628  constexpr int M1D = T_D1D ? T_D1D : MAX_D1D;
629  MFEM_VERIFY(D1D <= M1D, "");
630  MFEM_VERIFY(Q1D <= M1Q, "");
631  auto b = b_.Read();
632  auto d = d_.Read();
633  auto x = x_.Read();
634  auto y = y_.ReadWrite();
635  MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
636  {
637  internal::SmemPAMassApply3D_Element<T_D1D,T_Q1D>(e, NE, b, d, x, y, d1d, q1d);
638  });
639 }
640 
641 static void PAMassApply(const int dim,
642  const int D1D,
643  const int Q1D,
644  const int NE,
645  const Array<double> &B,
646  const Array<double> &Bt,
647  const Vector &D,
648  const Vector &X,
649  Vector &Y)
650 {
651 #ifdef MFEM_USE_OCCA
652  if (DeviceCanUseOcca())
653  {
654  if (dim == 2)
655  {
656  return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
657  }
658  if (dim == 3)
659  {
660  return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
661  }
662  MFEM_ABORT("OCCA PA Mass Apply unknown kernel!");
663  }
664 #endif // MFEM_USE_OCCA
665  const int id = (D1D << 4) | Q1D;
666 
667  if (dim == 2)
668  {
669  switch (id)
670  {
671  case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
672  case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
673  case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
674  case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
675  case 0x35: return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y);
676  case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
677  case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
678  case 0x46: return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y);
679  case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
680  case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
681  case 0x57: return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y);
682  case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
683  case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
684  case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
685  case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
686  case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
687  default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
688  }
689  }
690  else if (dim == 3)
691  {
692  switch (id)
693  {
694  case 0x22: return SmemPAMassApply3D<2,2>(NE,B,Bt,D,X,Y);
695  case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
696  case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
697  case 0x26: return SmemPAMassApply3D<2,6>(NE,B,Bt,D,X,Y);
698  case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
699  case 0x35: return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y);
700  case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
701  case 0x37: return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y);
702  case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
703  case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
704  case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
705  case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
706  case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
707  case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
708  case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
709  case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
710  case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
711  default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
712  }
713  }
714  mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl;
715  MFEM_ABORT("Unknown kernel.");
716 }
717 
718 void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const
719 {
720  if (DeviceCanUseCeed())
721  {
722  ceedOp->AddMult(x, y);
723  }
724  else
725  {
726  PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
727  }
728 }
729 
730 void MassIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
731 {
732  // Mass integrator is symmetric
733  AddMultPA(x, y);
734 }
735 
736 } // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:307
Abstract class for all finite elements.
Definition: fe_base.hpp:232
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition: fespace.hpp:463
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:120
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:840
int Dimension() const
Definition: mesh.hpp:1047
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
int Size() const
Returns the size of the vector.
Definition: vector.hpp:199
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:448
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe_base.hpp:168
STL namespace.
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:230
Class to represent a coefficient evaluated at quadrature points.
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2783
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Definition: mesh.cpp:5921
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:364
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
const int MAX_Q1D
Definition: forall.hpp:29
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
Represent a MassIntegrator with AssemblyLevel::Partial using libCEED.
Definition: mass.hpp:26
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:441
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:96
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
Definition: util.cpp:33
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:936
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:464
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
Definition: mesh.cpp:348
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:28
int GetMapType() const
Returns the FiniteElement::MapType of the element describing how reference functions are mapped to ph...
Definition: fe_base.hpp:348
int Size() const
Return the logical size of the array.
Definition: array.hpp:141
RefCoord t[3]
Vector data type.
Definition: vector.hpp:60
std::pair< int, int > occa_id_t
Definition: occa.hpp:78
Class representing the storage layout of a QuadratureFunction.
Definition: qspace.hpp:92
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131