MFEM  v4.5.1
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_mass_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "qfunction.hpp"
17 #include "bilininteg_mass_pa.hpp"
18 
19 using namespace std;
20 
21 namespace mfem
22 {
23 
24 // PA Mass Integrator
25 
26 // PA Mass Assemble kernel
27 
28 void MassIntegrator::AssemblePA(const FiniteElementSpace &fes)
29 {
30  const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
31  Device::GetDeviceMemoryType() : pa_mt;
32 
33  // Assuming the same element type
34  fespace = &fes;
35  Mesh *mesh = fes.GetMesh();
36  if (mesh->GetNE() == 0) { return; }
37  const FiniteElement &el = *fes.GetFE(0);
39  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0);
40  if (DeviceCanUseCeed())
41  {
42  delete ceedOp;
43  const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 ||
44  fes.IsVariableOrder();
45  if (mixed)
46  {
47  ceedOp = new ceed::MixedPAMassIntegrator(*this, fes, Q);
48  }
49  else
50  {
51  ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q);
52  }
53  return;
54  }
55  int map_type = el.GetMapType();
56  dim = mesh->Dimension();
57  ne = fes.GetMesh()->GetNE();
58  nq = ir->GetNPoints();
59  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::COORDINATES |
60  GeometricFactors::JACOBIANS, mt);
61  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
62  dofs1D = maps->ndof;
63  quad1D = maps->nqpt;
64  pa_data.SetSize(ne*nq, mt);
65 
66  QuadratureSpace qs(*mesh, *ir);
67  CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED);
68 
69  if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); }
70  if (dim==2)
71  {
72  const int NE = ne;
73  const int Q1D = quad1D;
74  const bool const_c = coeff.Size() == 1;
75  const bool by_val = map_type == FiniteElement::VALUE;
76  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D);
77  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,2,2,NE);
78  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) :
79  Reshape(coeff.Read(), Q1D,Q1D,NE);
80  auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE);
81  MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
82  {
83  MFEM_FOREACH_THREAD(qx,x,Q1D)
84  {
85  MFEM_FOREACH_THREAD(qy,y,Q1D)
86  {
87  const double J11 = J(qx,qy,0,0,e);
88  const double J12 = J(qx,qy,1,0,e);
89  const double J21 = J(qx,qy,0,1,e);
90  const double J22 = J(qx,qy,1,1,e);
91  const double detJ = (J11*J22)-(J21*J12);
92  const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
93  v(qx,qy,e) = W(qx,qy) * coeff * (by_val ? detJ : 1.0/detJ);
94  }
95  }
96  });
97  }
98  if (dim==3)
99  {
100  const int NE = ne;
101  const int Q1D = quad1D;
102  const bool const_c = coeff.Size() == 1;
103  const bool by_val = map_type == FiniteElement::VALUE;
104  const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D,Q1D);
105  const auto J = Reshape(geom->J.Read(), Q1D,Q1D,Q1D,3,3,NE);
106  const auto C = const_c ? Reshape(coeff.Read(), 1,1,1,1) :
107  Reshape(coeff.Read(), Q1D,Q1D,Q1D,NE);
108  auto v = Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
109  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
110  {
111  MFEM_FOREACH_THREAD(qx,x,Q1D)
112  {
113  MFEM_FOREACH_THREAD(qy,y,Q1D)
114  {
115  MFEM_FOREACH_THREAD(qz,z,Q1D)
116  {
117  const double J11 = J(qx,qy,qz,0,0,e);
118  const double J21 = J(qx,qy,qz,1,0,e);
119  const double J31 = J(qx,qy,qz,2,0,e);
120  const double J12 = J(qx,qy,qz,0,1,e);
121  const double J22 = J(qx,qy,qz,1,1,e);
122  const double J32 = J(qx,qy,qz,2,1,e);
123  const double J13 = J(qx,qy,qz,0,2,e);
124  const double J23 = J(qx,qy,qz,1,2,e);
125  const double J33 = J(qx,qy,qz,2,2,e);
126  const double detJ = J11 * (J22 * J33 - J32 * J23) -
127  /* */ J21 * (J12 * J33 - J32 * J13) +
128  /* */ J31 * (J12 * J23 - J22 * J13);
129  const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
130  v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * (by_val ? detJ : 1.0/detJ);
131  }
132  }
133  }
134  });
135  }
136 }
137 
138 template<int T_D1D = 0, int T_Q1D = 0>
139 static void PAMassAssembleDiagonal2D(const int NE,
140  const Array<double> &b,
141  const Vector &d,
142  Vector &y,
143  const int d1d = 0,
144  const int q1d = 0)
145 {
146  const int D1D = T_D1D ? T_D1D : d1d;
147  const int Q1D = T_Q1D ? T_Q1D : q1d;
148  MFEM_VERIFY(D1D <= MAX_D1D, "");
149  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
150  auto B = Reshape(b.Read(), Q1D, D1D);
151  auto D = Reshape(d.Read(), Q1D, Q1D, NE);
152  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
153  MFEM_FORALL(e, NE,
154  {
155  const int D1D = T_D1D ? T_D1D : d1d;
156  const int Q1D = T_Q1D ? T_Q1D : q1d;
157  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
158  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
159  double QD[MQ1][MD1];
160  for (int qx = 0; qx < Q1D; ++qx)
161  {
162  for (int dy = 0; dy < D1D; ++dy)
163  {
164  QD[qx][dy] = 0.0;
165  for (int qy = 0; qy < Q1D; ++qy)
166  {
167  QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
168  }
169  }
170  }
171  for (int dy = 0; dy < D1D; ++dy)
172  {
173  for (int dx = 0; dx < D1D; ++dx)
174  {
175  for (int qx = 0; qx < Q1D; ++qx)
176  {
177  Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
178  }
179  }
180  }
181  });
182 }
183 
184 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
185 static void SmemPAMassAssembleDiagonal2D(const int NE,
186  const Array<double> &b_,
187  const Vector &d_,
188  Vector &y_,
189  const int d1d = 0,
190  const int q1d = 0)
191 {
192  const int D1D = T_D1D ? T_D1D : d1d;
193  const int Q1D = T_Q1D ? T_Q1D : q1d;
194  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
195  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
196  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
197  MFEM_VERIFY(D1D <= MD1, "");
198  MFEM_VERIFY(Q1D <= MQ1, "");
199  auto b = Reshape(b_.Read(), Q1D, D1D);
200  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
201  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
202  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
203  {
204  const int tidz = MFEM_THREAD_ID(z);
205  const int D1D = T_D1D ? T_D1D : d1d;
206  const int Q1D = T_Q1D ? T_Q1D : q1d;
207  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
208  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
209  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
210  MFEM_SHARED double B[MQ1][MD1];
211  MFEM_SHARED double QDZ[NBZ][MQ1][MD1];
212  double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
213  if (tidz == 0)
214  {
215  MFEM_FOREACH_THREAD(d,y,D1D)
216  {
217  MFEM_FOREACH_THREAD(q,x,Q1D)
218  {
219  B[q][d] = b(q,d);
220  }
221  }
222  }
223  MFEM_SYNC_THREAD;
224  MFEM_FOREACH_THREAD(qx,x,Q1D)
225  {
226  MFEM_FOREACH_THREAD(dy,y,D1D)
227  {
228  QD[qx][dy] = 0.0;
229  for (int qy = 0; qy < Q1D; ++qy)
230  {
231  QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
232  }
233  }
234  }
235  MFEM_SYNC_THREAD;
236  MFEM_FOREACH_THREAD(dy,y,D1D)
237  {
238  MFEM_FOREACH_THREAD(dx,x,D1D)
239  {
240  for (int qx = 0; qx < Q1D; ++qx)
241  {
242  // might need absolute values on next line
243  Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
244  }
245  }
246  }
247  });
248 }
249 
250 template<int T_D1D = 0, int T_Q1D = 0>
251 static void PAMassAssembleDiagonal3D(const int NE,
252  const Array<double> &b,
253  const Vector &d,
254  Vector &y,
255  const int d1d = 0,
256  const int q1d = 0)
257 {
258  const int D1D = T_D1D ? T_D1D : d1d;
259  const int Q1D = T_Q1D ? T_Q1D : q1d;
260  MFEM_VERIFY(D1D <= MAX_D1D, "");
261  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
262  auto B = Reshape(b.Read(), Q1D, D1D);
263  auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
264  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
265  MFEM_FORALL(e, NE,
266  {
267  const int D1D = T_D1D ? T_D1D : d1d;
268  const int Q1D = T_Q1D ? T_Q1D : q1d;
269  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
270  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
271  double QQD[MQ1][MQ1][MD1];
272  double QDD[MQ1][MD1][MD1];
273  for (int qx = 0; qx < Q1D; ++qx)
274  {
275  for (int qy = 0; qy < Q1D; ++qy)
276  {
277  for (int dz = 0; dz < D1D; ++dz)
278  {
279  QQD[qx][qy][dz] = 0.0;
280  for (int qz = 0; qz < Q1D; ++qz)
281  {
282  QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
283  }
284  }
285  }
286  }
287  for (int qx = 0; qx < Q1D; ++qx)
288  {
289  for (int dz = 0; dz < D1D; ++dz)
290  {
291  for (int dy = 0; dy < D1D; ++dy)
292  {
293  QDD[qx][dy][dz] = 0.0;
294  for (int qy = 0; qy < Q1D; ++qy)
295  {
296  QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
297  }
298  }
299  }
300  }
301  for (int dz = 0; dz < D1D; ++dz)
302  {
303  for (int dy = 0; dy < D1D; ++dy)
304  {
305  for (int dx = 0; dx < D1D; ++dx)
306  {
307  double t = 0.0;
308  for (int qx = 0; qx < Q1D; ++qx)
309  {
310  t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
311  }
312  Y(dx, dy, dz, e) += t;
313  }
314  }
315  }
316  });
317 }
318 
319 template<int T_D1D = 0, int T_Q1D = 0>
320 static void SmemPAMassAssembleDiagonal3D(const int NE,
321  const Array<double> &b_,
322  const Vector &d_,
323  Vector &y_,
324  const int d1d = 0,
325  const int q1d = 0)
326 {
327  const int D1D = T_D1D ? T_D1D : d1d;
328  const int Q1D = T_Q1D ? T_Q1D : q1d;
329  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
330  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
331  MFEM_VERIFY(D1D <= MD1, "");
332  MFEM_VERIFY(Q1D <= MQ1, "");
333  auto b = Reshape(b_.Read(), Q1D, D1D);
334  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
335  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
336  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
337  {
338  const int tidz = MFEM_THREAD_ID(z);
339  const int D1D = T_D1D ? T_D1D : d1d;
340  const int Q1D = T_Q1D ? T_Q1D : q1d;
341  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
342  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
343  MFEM_SHARED double B[MQ1][MD1];
344  MFEM_SHARED double QQD[MQ1][MQ1][MD1];
345  MFEM_SHARED double QDD[MQ1][MD1][MD1];
346  if (tidz == 0)
347  {
348  MFEM_FOREACH_THREAD(d,y,D1D)
349  {
350  MFEM_FOREACH_THREAD(q,x,Q1D)
351  {
352  B[q][d] = b(q,d);
353  }
354  }
355  }
356  MFEM_SYNC_THREAD;
357  MFEM_FOREACH_THREAD(qx,x,Q1D)
358  {
359  MFEM_FOREACH_THREAD(qy,y,Q1D)
360  {
361  MFEM_FOREACH_THREAD(dz,z,D1D)
362  {
363  QQD[qx][qy][dz] = 0.0;
364  for (int qz = 0; qz < Q1D; ++qz)
365  {
366  QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
367  }
368  }
369  }
370  }
371  MFEM_SYNC_THREAD;
372  MFEM_FOREACH_THREAD(qx,x,Q1D)
373  {
374  MFEM_FOREACH_THREAD(dz,z,D1D)
375  {
376  MFEM_FOREACH_THREAD(dy,y,D1D)
377  {
378  QDD[qx][dy][dz] = 0.0;
379  for (int qy = 0; qy < Q1D; ++qy)
380  {
381  QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
382  }
383  }
384  }
385  }
386  MFEM_SYNC_THREAD;
387  MFEM_FOREACH_THREAD(dz,z,D1D)
388  {
389  MFEM_FOREACH_THREAD(dy,y,D1D)
390  {
391  MFEM_FOREACH_THREAD(dx,x,D1D)
392  {
393  double t = 0.0;
394  for (int qx = 0; qx < Q1D; ++qx)
395  {
396  t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
397  }
398  Y(dx, dy, dz, e) += t;
399  }
400  }
401  }
402  });
403 }
404 
405 static void PAMassAssembleDiagonal(const int dim, const int D1D,
406  const int Q1D, const int NE,
407  const Array<double> &B,
408  const Vector &D,
409  Vector &Y)
410 {
411  if (dim == 2)
412  {
413  switch ((D1D << 4 ) | Q1D)
414  {
415  case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
416  case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
417  case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
418  case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
419  case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
420  case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
421  case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
422  case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
423  default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
424  }
425  }
426  else if (dim == 3)
427  {
428  switch ((D1D << 4 ) | Q1D)
429  {
430  case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
431  case 0x24: return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y);
432  case 0x26: return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y);
433  case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
434  case 0x35: return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y);
435  case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
436  case 0x48: return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y);
437  case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
438  case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
439  case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
440  case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
441  default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
442  }
443  }
444  MFEM_ABORT("Unknown kernel.");
445 }
446 
447 void MassIntegrator::AssembleDiagonalPA(Vector &diag)
448 {
449  if (DeviceCanUseCeed())
450  {
451  ceedOp->GetDiagonal(diag);
452  }
453  else
454  {
455  PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
456  }
457 }
458 
459 
460 #ifdef MFEM_USE_OCCA
461 // OCCA PA Mass Apply 2D kernel
462 static void OccaPAMassApply2D(const int D1D,
463  const int Q1D,
464  const int NE,
465  const Array<double> &B,
466  const Array<double> &Bt,
467  const Vector &D,
468  const Vector &X,
469  Vector &Y)
470 {
471  occa::properties props;
472  props["defines/D1D"] = D1D;
473  props["defines/Q1D"] = Q1D;
474  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
475  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
476  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
477  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
478  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
479  const occa_id_t id = std::make_pair(D1D,Q1D);
480  if (!Device::Allows(Backend::OCCA_CUDA))
481  {
482  static occa_kernel_t OccaMassApply2D_cpu;
483  if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end())
484  {
485  const occa::kernel MassApply2D_CPU =
486  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
487  "MassApply2D_CPU", props);
488  OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU);
489  }
490  OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
491  }
492  else
493  {
494  static occa_kernel_t OccaMassApply2D_gpu;
495  if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end())
496  {
497  const occa::kernel MassApply2D_GPU =
498  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
499  "MassApply2D_GPU", props);
500  OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU);
501  }
502  OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
503  }
504 }
505 
506 // OCCA PA Mass Apply 3D kernel
507 static void OccaPAMassApply3D(const int D1D,
508  const int Q1D,
509  const int NE,
510  const Array<double> &B,
511  const Array<double> &Bt,
512  const Vector &D,
513  const Vector &X,
514  Vector &Y)
515 {
516  occa::properties props;
517  props["defines/D1D"] = D1D;
518  props["defines/Q1D"] = Q1D;
519  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
520  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
521  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
522  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
523  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
524  const occa_id_t id = std::make_pair(D1D,Q1D);
525  if (!Device::Allows(Backend::OCCA_CUDA))
526  {
527  static occa_kernel_t OccaMassApply3D_cpu;
528  if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end())
529  {
530  const occa::kernel MassApply3D_CPU =
531  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
532  "MassApply3D_CPU", props);
533  OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU);
534  }
535  OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
536  }
537  else
538  {
539  static occa_kernel_t OccaMassApply3D_gpu;
540  if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end())
541  {
542  const occa::kernel MassApply3D_GPU =
543  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
544  "MassApply3D_GPU", props);
545  OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU);
546  }
547  OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
548  }
549 }
550 #endif // MFEM_USE_OCCA
551 
552 template<int T_D1D = 0, int T_Q1D = 0>
553 static void PAMassApply2D(const int NE,
554  const Array<double> &b_,
555  const Array<double> &bt_,
556  const Vector &d_,
557  const Vector &x_,
558  Vector &y_,
559  const int d1d = 0,
560  const int q1d = 0)
561 {
562  MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, "");
563  MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, "");
564 
565  const auto B = b_.Read();
566  const auto Bt = bt_.Read();
567  const auto D = d_.Read();
568  const auto X = x_.Read();
569  auto Y = y_.ReadWrite();
570 
571  MFEM_FORALL(e, NE,
572  {
573  internal::PAMassApply2D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d);
574  });
575 }
576 
577 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
578 static void SmemPAMassApply2D(const int NE,
579  const Array<double> &b_,
580  const Array<double> &bt_,
581  const Vector &d_,
582  const Vector &x_,
583  Vector &y_,
584  const int d1d = 0,
585  const int q1d = 0)
586 {
587  MFEM_CONTRACT_VAR(bt_);
588  const int D1D = T_D1D ? T_D1D : d1d;
589  const int Q1D = T_Q1D ? T_Q1D : q1d;
590  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
591  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
592  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
593  MFEM_VERIFY(D1D <= MD1, "");
594  MFEM_VERIFY(Q1D <= MQ1, "");
595  const auto b = b_.Read();
596  const auto D = d_.Read();
597  const auto x = x_.Read();
598  auto Y = y_.ReadWrite();
599  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
600  {
601  internal::SmemPAMassApply2D_Element<T_D1D,T_Q1D,T_NBZ>(e, NE, b, D, x, Y, d1d, q1d);
602  });
603 }
604 
605 template<int T_D1D = 0, int T_Q1D = 0>
606 static void PAMassApply3D(const int NE,
607  const Array<double> &b_,
608  const Array<double> &bt_,
609  const Vector &d_,
610  const Vector &x_,
611  Vector &y_,
612  const int d1d = 0,
613  const int q1d = 0)
614 {
615  MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, "");
616  MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, "");
617 
618  const auto B = b_.Read();
619  const auto Bt = bt_.Read();
620  const auto D = d_.Read();
621  const auto X = x_.Read();
622  auto Y = y_.ReadWrite();
623 
624  MFEM_FORALL(e, NE,
625  {
626  internal::PAMassApply3D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d);
627  });
628 }
629 
630 template<int T_D1D = 0, int T_Q1D = 0>
631 static void SmemPAMassApply3D(const int NE,
632  const Array<double> &b_,
633  const Array<double> &bt_,
634  const Vector &d_,
635  const Vector &x_,
636  Vector &y_,
637  const int d1d = 0,
638  const int q1d = 0)
639 {
640  MFEM_CONTRACT_VAR(bt_);
641  const int D1D = T_D1D ? T_D1D : d1d;
642  const int Q1D = T_Q1D ? T_Q1D : q1d;
643  constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D;
644  constexpr int M1D = T_D1D ? T_D1D : MAX_D1D;
645  MFEM_VERIFY(D1D <= M1D, "");
646  MFEM_VERIFY(Q1D <= M1Q, "");
647  auto b = b_.Read();
648  auto d = d_.Read();
649  auto x = x_.Read();
650  auto y = y_.ReadWrite();
651  MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
652  {
653  internal::SmemPAMassApply3D_Element<T_D1D,T_Q1D>(e, NE, b, d, x, y, d1d, q1d);
654  });
655 }
656 
657 static void PAMassApply(const int dim,
658  const int D1D,
659  const int Q1D,
660  const int NE,
661  const Array<double> &B,
662  const Array<double> &Bt,
663  const Vector &D,
664  const Vector &X,
665  Vector &Y)
666 {
667 #ifdef MFEM_USE_OCCA
668  if (DeviceCanUseOcca())
669  {
670  if (dim == 2)
671  {
672  return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
673  }
674  if (dim == 3)
675  {
676  return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
677  }
678  MFEM_ABORT("OCCA PA Mass Apply unknown kernel!");
679  }
680 #endif // MFEM_USE_OCCA
681  const int id = (D1D << 4) | Q1D;
682 
683  if (dim == 2)
684  {
685  switch (id)
686  {
687  case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
688  case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
689  case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
690  case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
691  case 0x35: return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y);
692  case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
693  case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
694  case 0x46: return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y);
695  case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
696  case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
697  case 0x57: return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y);
698  case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
699  case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
700  case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
701  case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
702  case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
703  default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
704  }
705  }
706  else if (dim == 3)
707  {
708  switch (id)
709  {
710  case 0x22: return SmemPAMassApply3D<2,2>(NE,B,Bt,D,X,Y);
711  case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
712  case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
713  case 0x26: return SmemPAMassApply3D<2,6>(NE,B,Bt,D,X,Y);
714  case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
715  case 0x35: return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y);
716  case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
717  case 0x37: return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y);
718  case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
719  case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
720  case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
721  case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
722  case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
723  case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
724  case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
725  case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
726  case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
727  default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
728  }
729  }
730  mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl;
731  MFEM_ABORT("Unknown kernel.");
732 }
733 
734 void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const
735 {
736  if (DeviceCanUseCeed())
737  {
738  ceedOp->AddMult(x, y);
739  }
740  else
741  {
742  PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
743  }
744 }
745 
746 void MassIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
747 {
748  // Mass integrator is symmetric
749  AddMultPA(x, y);
750 }
751 
752 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe_base.hpp:235
int Size() const
Return the logical size of the array.
Definition: array.hpp:138
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:117
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition: fespace.hpp:463
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:840
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Definition: mesh.cpp:5908
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
int Size() const
Returns the size of the vector.
Definition: vector.hpp:200
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe_base.hpp:170
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:923
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
int GetMapType() const
Returns the FiniteElement::MapType of the element describing how reference functions are mapped to ph...
Definition: fe_base.hpp:349
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:235
Class to represent a coefficient evaluated at quadrature points.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
const int MAX_Q1D
Definition: forall.hpp:29
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:441
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
Represent a MassIntegrator with AssemblyLevel::Partial using libCEED.
Definition: mass.hpp:26
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
int Dimension() const
Definition: mesh.hpp:1006
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:96
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
Definition: util.cpp:33
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:366
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:465
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
Definition: mesh.cpp:348
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:28
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2781
RefCoord t[3]
Vector data type.
Definition: vector.hpp:60
std::pair< int, int > occa_id_t
Definition: occa.hpp:78
Class representing the storage layout of a QuadratureFunction.
Definition: qspace.hpp:92
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:449
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131