MFEM  v4.1.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_mass.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "libceed/mass.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 // PA Mass Integrator
23 
24 // PA Mass Assemble kernel
25 
26 void MassIntegrator::SetupPA(const FiniteElementSpace &fes, const bool force)
27 {
28  // Assuming the same element type
29  fespace = &fes;
30  Mesh *mesh = fes.GetMesh();
31  if (mesh->GetNE() == 0) { return; }
32  const FiniteElement &el = *fes.GetFE(0);
34  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T);
35 #ifdef MFEM_USE_CEED
36  if (DeviceCanUseCeed() && !force)
37  {
38  if (ceedDataPtr) { delete ceedDataPtr; }
39  CeedData* ptr = new CeedData();
40  ceedDataPtr = ptr;
41  InitCeedCoeff(Q, ptr);
42  return CeedPAMassAssemble(fes, *ir, *ptr);
43  }
44 #endif
45  dim = mesh->Dimension();
46  ne = fes.GetMesh()->GetNE();
47  nq = ir->GetNPoints();
48  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::COORDINATES |
49  GeometricFactors::JACOBIANS);
50  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
51  dofs1D = maps->ndof;
52  quad1D = maps->nqpt;
53  pa_data.SetSize(ne*nq, Device::GetDeviceMemoryType());
54  Vector coeff;
55  if (Q == nullptr)
56  {
57  coeff.SetSize(1);
58  coeff(0) = 1.0;
59  }
60  else if (ConstantCoefficient* cQ = dynamic_cast<ConstantCoefficient*>(Q))
61  {
62  coeff.SetSize(1);
63  coeff(0) = cQ->constant;
64  }
65  else
66  {
67  coeff.SetSize(nq * ne);
68  auto C = Reshape(coeff.HostWrite(), nq, ne);
69  for (int e = 0; e < ne; ++e)
70  {
72  for (int q = 0; q < nq; ++q)
73  {
74  C(q,e) = Q->Eval(T, ir->IntPoint(q));
75  }
76  }
77  }
78  if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); }
79  if (dim==2)
80  {
81  const int NE = ne;
82  const int NQ = nq;
83  const bool const_c = coeff.Size() == 1;
84  auto w = ir->GetWeights().Read();
85  auto J = Reshape(geom->J.Read(), NQ,2,2,NE);
86  auto C =
87  const_c ? Reshape(coeff.Read(), 1,1) : Reshape(coeff.Read(), NQ,NE);
88  auto v = Reshape(pa_data.Write(), NQ, NE);
89  MFEM_FORALL(e, NE,
90  {
91  for (int q = 0; q < NQ; ++q)
92  {
93  const double J11 = J(q,0,0,e);
94  const double J12 = J(q,1,0,e);
95  const double J21 = J(q,0,1,e);
96  const double J22 = J(q,1,1,e);
97  const double detJ = (J11*J22)-(J21*J12);
98  const double coeff = const_c ? C(0,0) : C(q,e);
99  v(q,e) = w[q] * coeff * detJ;
100  }
101  });
102  }
103  if (dim==3)
104  {
105  const int NE = ne;
106  const int NQ = nq;
107  const bool const_c = coeff.Size() == 1;
108  auto W = ir->GetWeights().Read();
109  auto J = Reshape(geom->J.Read(), NQ,3,3,NE);
110  auto C =
111  const_c ? Reshape(coeff.Read(), 1,1) : Reshape(coeff.Read(), NQ,NE);
112  auto v = Reshape(pa_data.Write(), NQ,NE);
113  MFEM_FORALL(e, NE,
114  {
115  for (int q = 0; q < NQ; ++q)
116  {
117  const double J11 = J(q,0,0,e), J12 = J(q,0,1,e), J13 = J(q,0,2,e);
118  const double J21 = J(q,1,0,e), J22 = J(q,1,1,e), J23 = J(q,1,2,e);
119  const double J31 = J(q,2,0,e), J32 = J(q,2,1,e), J33 = J(q,2,2,e);
120  const double detJ = J11 * (J22 * J33 - J32 * J23) -
121  /* */ J21 * (J12 * J33 - J32 * J13) +
122  /* */ J31 * (J12 * J23 - J22 * J13);
123  const double coeff = const_c ? C(0,0) : C(q,e);
124  v(q,e) = W[q] * coeff * detJ;
125  }
126  });
127  }
128 }
129 
130 void MassIntegrator::AssemblePA(const FiniteElementSpace &fes)
131 {
132  SetupPA(fes);
133 }
134 
135 
136 template<int T_D1D = 0, int T_Q1D = 0>
137 static void PAMassAssembleDiagonal2D(const int NE,
138  const Array<double> &b,
139  const Vector &d,
140  Vector &y,
141  const int d1d = 0,
142  const int q1d = 0)
143 {
144  const int D1D = T_D1D ? T_D1D : d1d;
145  const int Q1D = T_Q1D ? T_Q1D : q1d;
146  MFEM_VERIFY(D1D <= MAX_D1D, "");
147  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
148  auto B = Reshape(b.Read(), Q1D, D1D);
149  auto D = Reshape(d.Read(), Q1D, Q1D, NE);
150  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
151  MFEM_FORALL(e, NE,
152  {
153  const int D1D = T_D1D ? T_D1D : d1d;
154  const int Q1D = T_Q1D ? T_Q1D : q1d;
155  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
156  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
157  double QD[MQ1][MD1];
158  for (int qx = 0; qx < Q1D; ++qx)
159  {
160  for (int dy = 0; dy < D1D; ++dy)
161  {
162  QD[qx][dy] = 0.0;
163  for (int qy = 0; qy < Q1D; ++qy)
164  {
165  QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
166  }
167  }
168  }
169  for (int dy = 0; dy < D1D; ++dy)
170  {
171  for (int dx = 0; dx < D1D; ++dx)
172  {
173  for (int qx = 0; qx < Q1D; ++qx)
174  {
175  Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
176  }
177  }
178  }
179  });
180 }
181 
182 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
183 static void SmemPAMassAssembleDiagonal2D(const int NE,
184  const Array<double> &b_,
185  const Vector &d_,
186  Vector &y_,
187  const int d1d = 0,
188  const int q1d = 0)
189 {
190  const int D1D = T_D1D ? T_D1D : d1d;
191  const int Q1D = T_Q1D ? T_Q1D : q1d;
192  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
193  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
194  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
195  MFEM_VERIFY(D1D <= MD1, "");
196  MFEM_VERIFY(Q1D <= MQ1, "");
197  auto b = Reshape(b_.Read(), Q1D, D1D);
198  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
199  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
200  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
201  {
202  const int tidz = MFEM_THREAD_ID(z);
203  const int D1D = T_D1D ? T_D1D : d1d;
204  const int Q1D = T_Q1D ? T_Q1D : q1d;
205  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
206  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
207  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
208  MFEM_SHARED double B[MQ1][MD1];
209  MFEM_SHARED double QDZ[NBZ][MQ1][MD1];
210  double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
211  if (tidz == 0)
212  {
213  MFEM_FOREACH_THREAD(d,y,D1D)
214  {
215  MFEM_FOREACH_THREAD(q,x,Q1D)
216  {
217  B[q][d] = b(q,d);
218  }
219  }
220  }
221  MFEM_SYNC_THREAD;
222  MFEM_FOREACH_THREAD(qx,x,Q1D)
223  {
224  MFEM_FOREACH_THREAD(dy,y,D1D)
225  {
226  QD[qx][dy] = 0.0;
227  for (int qy = 0; qy < Q1D; ++qy)
228  {
229  QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
230  }
231  }
232  }
233  MFEM_SYNC_THREAD;
234  MFEM_FOREACH_THREAD(dy,y,D1D)
235  {
236  MFEM_FOREACH_THREAD(dx,x,D1D)
237  {
238  for (int qx = 0; qx < Q1D; ++qx)
239  {
240  // might need absolute values on next line
241  Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
242  }
243  }
244  }
245  });
246 }
247 
248 template<int T_D1D = 0, int T_Q1D = 0>
249 static void PAMassAssembleDiagonal3D(const int NE,
250  const Array<double> &b,
251  const Vector &d,
252  Vector &y,
253  const int d1d = 0,
254  const int q1d = 0)
255 {
256  const int D1D = T_D1D ? T_D1D : d1d;
257  const int Q1D = T_Q1D ? T_Q1D : q1d;
258  MFEM_VERIFY(D1D <= MAX_D1D, "");
259  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
260  auto B = Reshape(b.Read(), Q1D, D1D);
261  auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
262  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
263  MFEM_FORALL(e, NE,
264  {
265  const int D1D = T_D1D ? T_D1D : d1d;
266  const int Q1D = T_Q1D ? T_Q1D : q1d;
267  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
268  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
269  double QQD[MQ1][MQ1][MD1];
270  double QDD[MQ1][MD1][MD1];
271  for (int qx = 0; qx < Q1D; ++qx)
272  {
273  for (int qy = 0; qy < Q1D; ++qy)
274  {
275  for (int dz = 0; dz < D1D; ++dz)
276  {
277  QQD[qx][qy][dz] = 0.0;
278  for (int qz = 0; qz < Q1D; ++qz)
279  {
280  QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
281  }
282  }
283  }
284  }
285  for (int qx = 0; qx < Q1D; ++qx)
286  {
287  for (int dz = 0; dz < D1D; ++dz)
288  {
289  for (int dy = 0; dy < D1D; ++dy)
290  {
291  QDD[qx][dy][dz] = 0.0;
292  for (int qy = 0; qy < Q1D; ++qy)
293  {
294  QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
295  }
296  }
297  }
298  }
299  for (int dz = 0; dz < D1D; ++dz)
300  {
301  for (int dy = 0; dy < D1D; ++dy)
302  {
303  for (int dx = 0; dx < D1D; ++dx)
304  {
305  double t = 0.0;
306  for (int qx = 0; qx < Q1D; ++qx)
307  {
308  t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
309  }
310  Y(dx, dy, dz, e) += t;
311  }
312  }
313  }
314  });
315 }
316 
317 template<int T_D1D = 0, int T_Q1D = 0>
318 static void SmemPAMassAssembleDiagonal3D(const int NE,
319  const Array<double> &b_,
320  const Vector &d_,
321  Vector &y_,
322  const int d1d = 0,
323  const int q1d = 0)
324 {
325  const int D1D = T_D1D ? T_D1D : d1d;
326  const int Q1D = T_Q1D ? T_Q1D : q1d;
327  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
328  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
329  MFEM_VERIFY(D1D <= MD1, "");
330  MFEM_VERIFY(Q1D <= MQ1, "");
331  auto b = Reshape(b_.Read(), Q1D, D1D);
332  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
333  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
334  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
335  {
336  const int tidz = MFEM_THREAD_ID(z);
337  const int D1D = T_D1D ? T_D1D : d1d;
338  const int Q1D = T_Q1D ? T_Q1D : q1d;
339  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
340  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
341  MFEM_SHARED double B[MQ1][MD1];
342  MFEM_SHARED double QQD[MQ1][MQ1][MD1];
343  MFEM_SHARED double QDD[MQ1][MD1][MD1];
344  if (tidz == 0)
345  {
346  MFEM_FOREACH_THREAD(d,y,D1D)
347  {
348  MFEM_FOREACH_THREAD(q,x,Q1D)
349  {
350  B[q][d] = b(q,d);
351  }
352  }
353  }
354  MFEM_SYNC_THREAD;
355  MFEM_FOREACH_THREAD(qx,x,Q1D)
356  {
357  MFEM_FOREACH_THREAD(qy,y,Q1D)
358  {
359  MFEM_FOREACH_THREAD(dz,z,D1D)
360  {
361  QQD[qx][qy][dz] = 0.0;
362  for (int qz = 0; qz < Q1D; ++qz)
363  {
364  QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
365  }
366  }
367  }
368  }
369  MFEM_SYNC_THREAD;
370  MFEM_FOREACH_THREAD(qx,x,Q1D)
371  {
372  MFEM_FOREACH_THREAD(dz,z,D1D)
373  {
374  MFEM_FOREACH_THREAD(dy,y,D1D)
375  {
376  QDD[qx][dy][dz] = 0.0;
377  for (int qy = 0; qy < Q1D; ++qy)
378  {
379  QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
380  }
381  }
382  }
383  }
384  MFEM_SYNC_THREAD;
385  MFEM_FOREACH_THREAD(dz,z,D1D)
386  {
387  MFEM_FOREACH_THREAD(dy,y,D1D)
388  {
389  MFEM_FOREACH_THREAD(dx,x,D1D)
390  {
391  double t = 0.0;
392  for (int qx = 0; qx < Q1D; ++qx)
393  {
394  t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
395  }
396  Y(dx, dy, dz, e) += t;
397  }
398  }
399  }
400  });
401 }
402 
403 static void PAMassAssembleDiagonal(const int dim, const int D1D,
404  const int Q1D, const int NE,
405  const Array<double> &B,
406  const Vector &D,
407  Vector &Y)
408 {
409  if (dim == 2)
410  {
411  switch ((D1D << 4 ) | Q1D)
412  {
413  case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
414  case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
415  case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
416  case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
417  case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
418  case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
419  case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
420  case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
421  default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
422  }
423  }
424  else if (dim == 3)
425  {
426  switch ((D1D << 4 ) | Q1D)
427  {
428  case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
429  case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
430  case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
431  case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
432  case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
433  case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
434  case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
435  default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
436  }
437  }
438  MFEM_ABORT("Unknown kernel.");
439 }
440 
441 void MassIntegrator::AssembleDiagonalPA(Vector &diag)
442 {
443  if (pa_data.Size()==0) { SetupPA(*fespace, true); }
444  PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
445 }
446 
447 
448 #ifdef MFEM_USE_OCCA
449 // OCCA PA Mass Apply 2D kernel
450 static void OccaPAMassApply2D(const int D1D,
451  const int Q1D,
452  const int NE,
453  const Array<double> &B,
454  const Array<double> &Bt,
455  const Vector &D,
456  const Vector &X,
457  Vector &Y)
458 {
459  occa::properties props;
460  props["defines/D1D"] = D1D;
461  props["defines/Q1D"] = Q1D;
462  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
463  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
464  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
465  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
466  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
467  const occa_id_t id = std::make_pair(D1D,Q1D);
468  if (!Device::Allows(Backend::OCCA_CUDA))
469  {
470  static occa_kernel_t OccaMassApply2D_cpu;
471  if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end())
472  {
473  const occa::kernel MassApply2D_CPU =
474  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
475  "MassApply2D_CPU", props);
476  OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU);
477  }
478  OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
479  }
480  else
481  {
482  static occa_kernel_t OccaMassApply2D_gpu;
483  if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end())
484  {
485  const occa::kernel MassApply2D_GPU =
486  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
487  "MassApply2D_GPU", props);
488  OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU);
489  }
490  OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
491  }
492 }
493 
494 // OCCA PA Mass Apply 3D kernel
495 static void OccaPAMassApply3D(const int D1D,
496  const int Q1D,
497  const int NE,
498  const Array<double> &B,
499  const Array<double> &Bt,
500  const Vector &D,
501  const Vector &X,
502  Vector &Y)
503 {
504  occa::properties props;
505  props["defines/D1D"] = D1D;
506  props["defines/Q1D"] = Q1D;
507  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
508  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
509  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
510  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
511  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
512  const occa_id_t id = std::make_pair(D1D,Q1D);
513  if (!Device::Allows(Backend::OCCA_CUDA))
514  {
515  static occa_kernel_t OccaMassApply3D_cpu;
516  if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end())
517  {
518  const occa::kernel MassApply3D_CPU =
519  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
520  "MassApply3D_CPU", props);
521  OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU);
522  }
523  OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
524  }
525  else
526  {
527  static occa_kernel_t OccaMassApply3D_gpu;
528  if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end())
529  {
530  const occa::kernel MassApply3D_GPU =
531  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
532  "MassApply3D_GPU", props);
533  OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU);
534  }
535  OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
536  }
537 }
538 #endif // MFEM_USE_OCCA
539 
540 template<int T_D1D = 0, int T_Q1D = 0>
541 static void PAMassApply2D(const int NE,
542  const Array<double> &b_,
543  const Array<double> &bt_,
544  const Vector &d_,
545  const Vector &x_,
546  Vector &y_,
547  const int d1d = 0,
548  const int q1d = 0)
549 {
550  const int D1D = T_D1D ? T_D1D : d1d;
551  const int Q1D = T_Q1D ? T_Q1D : q1d;
552  MFEM_VERIFY(D1D <= MAX_D1D, "");
553  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
554  auto B = Reshape(b_.Read(), Q1D, D1D);
555  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
556  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
557  auto X = Reshape(x_.Read(), D1D, D1D, NE);
558  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
559  MFEM_FORALL(e, NE,
560  {
561  const int D1D = T_D1D ? T_D1D : d1d; // nvcc workaround
562  const int Q1D = T_Q1D ? T_Q1D : q1d;
563  // the following variables are evaluated at compile time
564  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
565  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
566  double sol_xy[max_Q1D][max_Q1D];
567  for (int qy = 0; qy < Q1D; ++qy)
568  {
569  for (int qx = 0; qx < Q1D; ++qx)
570  {
571  sol_xy[qy][qx] = 0.0;
572  }
573  }
574  for (int dy = 0; dy < D1D; ++dy)
575  {
576  double sol_x[max_Q1D];
577  for (int qy = 0; qy < Q1D; ++qy)
578  {
579  sol_x[qy] = 0.0;
580  }
581  for (int dx = 0; dx < D1D; ++dx)
582  {
583  const double s = X(dx,dy,e);
584  for (int qx = 0; qx < Q1D; ++qx)
585  {
586  sol_x[qx] += B(qx,dx)* s;
587  }
588  }
589  for (int qy = 0; qy < Q1D; ++qy)
590  {
591  const double d2q = B(qy,dy);
592  for (int qx = 0; qx < Q1D; ++qx)
593  {
594  sol_xy[qy][qx] += d2q * sol_x[qx];
595  }
596  }
597  }
598  for (int qy = 0; qy < Q1D; ++qy)
599  {
600  for (int qx = 0; qx < Q1D; ++qx)
601  {
602  sol_xy[qy][qx] *= D(qx,qy,e);
603  }
604  }
605  for (int qy = 0; qy < Q1D; ++qy)
606  {
607  double sol_x[max_D1D];
608  for (int dx = 0; dx < D1D; ++dx)
609  {
610  sol_x[dx] = 0.0;
611  }
612  for (int qx = 0; qx < Q1D; ++qx)
613  {
614  const double s = sol_xy[qy][qx];
615  for (int dx = 0; dx < D1D; ++dx)
616  {
617  sol_x[dx] += Bt(dx,qx) * s;
618  }
619  }
620  for (int dy = 0; dy < D1D; ++dy)
621  {
622  const double q2d = Bt(dy,qy);
623  for (int dx = 0; dx < D1D; ++dx)
624  {
625  Y(dx,dy,e) += q2d * sol_x[dx];
626  }
627  }
628  }
629  });
630 }
631 
632 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
633 static void SmemPAMassApply2D(const int NE,
634  const Array<double> &b_,
635  const Array<double> &bt_,
636  const Vector &d_,
637  const Vector &x_,
638  Vector &y_,
639  const int d1d = 0,
640  const int q1d = 0)
641 {
642  const int D1D = T_D1D ? T_D1D : d1d;
643  const int Q1D = T_Q1D ? T_Q1D : q1d;
644  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
645  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
646  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
647  MFEM_VERIFY(D1D <= MD1, "");
648  MFEM_VERIFY(Q1D <= MQ1, "");
649  auto b = Reshape(b_.Read(), Q1D, D1D);
650  auto D = Reshape(d_.Read(), Q1D, Q1D, NE);
651  auto x = Reshape(x_.Read(), D1D, D1D, NE);
652  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
653  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
654  {
655  const int tidz = MFEM_THREAD_ID(z);
656  const int D1D = T_D1D ? T_D1D : d1d;
657  const int Q1D = T_Q1D ? T_Q1D : q1d;
658  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
659  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
660  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
661  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
662  MFEM_SHARED double BBt[MQ1*MD1];
663  double (*B)[MD1] = (double (*)[MD1]) BBt;
664  double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
665  MFEM_SHARED double sm0[NBZ][MDQ*MDQ];
666  MFEM_SHARED double sm1[NBZ][MDQ*MDQ];
667  double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
668  double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
669  double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
670  double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
671  MFEM_FOREACH_THREAD(dy,y,D1D)
672  {
673  MFEM_FOREACH_THREAD(dx,x,D1D)
674  {
675  X[dy][dx] = x(dx,dy,e);
676  }
677  }
678  if (tidz == 0)
679  {
680  MFEM_FOREACH_THREAD(dy,y,D1D)
681  {
682  MFEM_FOREACH_THREAD(q,x,Q1D)
683  {
684  B[q][dy] = b(q,dy);
685  }
686  }
687  }
688  MFEM_SYNC_THREAD;
689  MFEM_FOREACH_THREAD(dy,y,D1D)
690  {
691  MFEM_FOREACH_THREAD(qx,x,Q1D)
692  {
693  double dq = 0.0;
694  for (int dx = 0; dx < D1D; ++dx)
695  {
696  dq += X[dy][dx] * B[qx][dx];
697  }
698  DQ[dy][qx] = dq;
699  }
700  }
701  MFEM_SYNC_THREAD;
702  MFEM_FOREACH_THREAD(qy,y,Q1D)
703  {
704  MFEM_FOREACH_THREAD(qx,x,Q1D)
705  {
706  double qq = 0.0;
707  for (int dy = 0; dy < D1D; ++dy)
708  {
709  qq += DQ[dy][qx] * B[qy][dy];
710  }
711  QQ[qy][qx] = qq * D(qx, qy, e);
712  }
713  }
714  MFEM_SYNC_THREAD;
715  if (tidz == 0)
716  {
717  MFEM_FOREACH_THREAD(dy,y,D1D)
718  {
719  MFEM_FOREACH_THREAD(q,x,Q1D)
720  {
721  Bt[dy][q] = b(q,dy);
722  }
723  }
724  }
725  MFEM_SYNC_THREAD;
726  MFEM_FOREACH_THREAD(qy,y,Q1D)
727  {
728  MFEM_FOREACH_THREAD(dx,x,D1D)
729  {
730  double dq = 0.0;
731  for (int qx = 0; qx < Q1D; ++qx)
732  {
733  dq += QQ[qy][qx] * Bt[dx][qx];
734  }
735  QD[qy][dx] = dq;
736  }
737  }
738  MFEM_SYNC_THREAD;
739  MFEM_FOREACH_THREAD(dy,y,D1D)
740  {
741  MFEM_FOREACH_THREAD(dx,x,D1D)
742  {
743  double dd = 0.0;
744  for (int qy = 0; qy < Q1D; ++qy)
745  {
746  dd += (QD[qy][dx] * Bt[dy][qy]);
747  }
748  Y(dx, dy, e) += dd;
749  }
750  }
751  });
752 }
753 
754 template<int T_D1D = 0, int T_Q1D = 0>
755 static void PAMassApply3D(const int NE,
756  const Array<double> &b_,
757  const Array<double> &bt_,
758  const Vector &d_,
759  const Vector &x_,
760  Vector &y_,
761  const int d1d = 0,
762  const int q1d = 0)
763 {
764  const int D1D = T_D1D ? T_D1D : d1d;
765  const int Q1D = T_Q1D ? T_Q1D : q1d;
766  MFEM_VERIFY(D1D <= MAX_D1D, "");
767  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
768  auto B = Reshape(b_.Read(), Q1D, D1D);
769  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
770  auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
771  auto X = Reshape(x_.Read(), D1D, D1D, D1D, NE);
772  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
773  MFEM_FORALL(e, NE,
774  {
775  const int D1D = T_D1D ? T_D1D : d1d;
776  const int Q1D = T_Q1D ? T_Q1D : q1d;
777  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
778  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
779  double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
780  for (int qz = 0; qz < Q1D; ++qz)
781  {
782  for (int qy = 0; qy < Q1D; ++qy)
783  {
784  for (int qx = 0; qx < Q1D; ++qx)
785  {
786  sol_xyz[qz][qy][qx] = 0.0;
787  }
788  }
789  }
790  for (int dz = 0; dz < D1D; ++dz)
791  {
792  double sol_xy[max_Q1D][max_Q1D];
793  for (int qy = 0; qy < Q1D; ++qy)
794  {
795  for (int qx = 0; qx < Q1D; ++qx)
796  {
797  sol_xy[qy][qx] = 0.0;
798  }
799  }
800  for (int dy = 0; dy < D1D; ++dy)
801  {
802  double sol_x[max_Q1D];
803  for (int qx = 0; qx < Q1D; ++qx)
804  {
805  sol_x[qx] = 0;
806  }
807  for (int dx = 0; dx < D1D; ++dx)
808  {
809  const double s = X(dx,dy,dz,e);
810  for (int qx = 0; qx < Q1D; ++qx)
811  {
812  sol_x[qx] += B(qx,dx) * s;
813  }
814  }
815  for (int qy = 0; qy < Q1D; ++qy)
816  {
817  const double wy = B(qy,dy);
818  for (int qx = 0; qx < Q1D; ++qx)
819  {
820  sol_xy[qy][qx] += wy * sol_x[qx];
821  }
822  }
823  }
824  for (int qz = 0; qz < Q1D; ++qz)
825  {
826  const double wz = B(qz,dz);
827  for (int qy = 0; qy < Q1D; ++qy)
828  {
829  for (int qx = 0; qx < Q1D; ++qx)
830  {
831  sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
832  }
833  }
834  }
835  }
836  for (int qz = 0; qz < Q1D; ++qz)
837  {
838  for (int qy = 0; qy < Q1D; ++qy)
839  {
840  for (int qx = 0; qx < Q1D; ++qx)
841  {
842  sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
843  }
844  }
845  }
846  for (int qz = 0; qz < Q1D; ++qz)
847  {
848  double sol_xy[max_D1D][max_D1D];
849  for (int dy = 0; dy < D1D; ++dy)
850  {
851  for (int dx = 0; dx < D1D; ++dx)
852  {
853  sol_xy[dy][dx] = 0;
854  }
855  }
856  for (int qy = 0; qy < Q1D; ++qy)
857  {
858  double sol_x[max_D1D];
859  for (int dx = 0; dx < D1D; ++dx)
860  {
861  sol_x[dx] = 0;
862  }
863  for (int qx = 0; qx < Q1D; ++qx)
864  {
865  const double s = sol_xyz[qz][qy][qx];
866  for (int dx = 0; dx < D1D; ++dx)
867  {
868  sol_x[dx] += Bt(dx,qx) * s;
869  }
870  }
871  for (int dy = 0; dy < D1D; ++dy)
872  {
873  const double wy = Bt(dy,qy);
874  for (int dx = 0; dx < D1D; ++dx)
875  {
876  sol_xy[dy][dx] += wy * sol_x[dx];
877  }
878  }
879  }
880  for (int dz = 0; dz < D1D; ++dz)
881  {
882  const double wz = Bt(dz,qz);
883  for (int dy = 0; dy < D1D; ++dy)
884  {
885  for (int dx = 0; dx < D1D; ++dx)
886  {
887  Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
888  }
889  }
890  }
891  }
892  });
893 }
894 
895 template<int T_D1D = 0, int T_Q1D = 0>
896 static void SmemPAMassApply3D(const int NE,
897  const Array<double> &b_,
898  const Array<double> &bt_,
899  const Vector &d_,
900  const Vector &x_,
901  Vector &y_,
902  const int d1d = 0,
903  const int q1d = 0)
904 {
905  const int D1D = T_D1D ? T_D1D : d1d;
906  const int Q1D = T_Q1D ? T_Q1D : q1d;
907  constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D;
908  constexpr int M1D = T_D1D ? T_D1D : MAX_D1D;
909  MFEM_VERIFY(D1D <= M1D, "");
910  MFEM_VERIFY(Q1D <= M1Q, "");
911  auto b = Reshape(b_.Read(), Q1D, D1D);
912  auto d = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
913  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
914  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
915  MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
916  {
917  const int D1D = T_D1D ? T_D1D : d1d;
918  const int Q1D = T_Q1D ? T_Q1D : q1d;
919  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
920  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
921  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
922  MFEM_SHARED double sDQ[MQ1*MD1];
923  double (*B)[MD1] = (double (*)[MD1]) sDQ;
924  double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
925  MFEM_SHARED double sm0[MDQ*MDQ*MDQ];
926  MFEM_SHARED double sm1[MDQ*MDQ*MDQ];
927  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
928  double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
929  double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
930  double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
931  double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
932  double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
933  MFEM_FOREACH_THREAD(dy,y,D1D)
934  {
935  MFEM_FOREACH_THREAD(dx,x,D1D)
936  {
937  MFEM_UNROLL(MD1)
938  for (int dz = 0; dz < D1D; ++dz)
939  {
940  X[dz][dy][dx] = x(dx,dy,dz,e);
941  }
942  }
943  MFEM_FOREACH_THREAD(dx,x,Q1D)
944  {
945  B[dx][dy] = b(dx,dy);
946  }
947  }
948  MFEM_SYNC_THREAD;
949  MFEM_FOREACH_THREAD(dy,y,D1D)
950  {
951  MFEM_FOREACH_THREAD(qx,x,Q1D)
952  {
953  double u[D1D];
954  MFEM_UNROLL(MD1)
955  for (int dz = 0; dz < D1D; dz++)
956  {
957  u[dz] = 0;
958  }
959  MFEM_UNROLL(MD1)
960  for (int dx = 0; dx < D1D; ++dx)
961  {
962  MFEM_UNROLL(MD1)
963  for (int dz = 0; dz < D1D; ++dz)
964  {
965  u[dz] += X[dz][dy][dx] * B[qx][dx];
966  }
967  }
968  MFEM_UNROLL(MD1)
969  for (int dz = 0; dz < D1D; ++dz)
970  {
971  DDQ[dz][dy][qx] = u[dz];
972  }
973  }
974  }
975  MFEM_SYNC_THREAD;
976  MFEM_FOREACH_THREAD(qy,y,Q1D)
977  {
978  MFEM_FOREACH_THREAD(qx,x,Q1D)
979  {
980  double u[D1D];
981  MFEM_UNROLL(MD1)
982  for (int dz = 0; dz < D1D; dz++)
983  {
984  u[dz] = 0;
985  }
986  MFEM_UNROLL(MD1)
987  for (int dy = 0; dy < D1D; ++dy)
988  {
989  MFEM_UNROLL(MD1)
990  for (int dz = 0; dz < D1D; dz++)
991  {
992  u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
993  }
994  }
995  MFEM_UNROLL(MD1)
996  for (int dz = 0; dz < D1D; dz++)
997  {
998  DQQ[dz][qy][qx] = u[dz];
999  }
1000  }
1001  }
1002  MFEM_SYNC_THREAD;
1003  MFEM_FOREACH_THREAD(qy,y,Q1D)
1004  {
1005  MFEM_FOREACH_THREAD(qx,x,Q1D)
1006  {
1007  double u[Q1D];
1008  MFEM_UNROLL(MQ1)
1009  for (int qz = 0; qz < Q1D; qz++)
1010  {
1011  u[qz] = 0;
1012  }
1013  MFEM_UNROLL(MD1)
1014  for (int dz = 0; dz < D1D; ++dz)
1015  {
1016  MFEM_UNROLL(MQ1)
1017  for (int qz = 0; qz < Q1D; qz++)
1018  {
1019  u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1020  }
1021  }
1022  MFEM_UNROLL(MQ1)
1023  for (int qz = 0; qz < Q1D; qz++)
1024  {
1025  QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1026  }
1027  }
1028  }
1029  MFEM_SYNC_THREAD;
1030  MFEM_FOREACH_THREAD(d,y,D1D)
1031  {
1032  MFEM_FOREACH_THREAD(q,x,Q1D)
1033  {
1034  Bt[d][q] = b(q,d);
1035  }
1036  }
1037  MFEM_SYNC_THREAD;
1038  MFEM_FOREACH_THREAD(qy,y,Q1D)
1039  {
1040  MFEM_FOREACH_THREAD(dx,x,D1D)
1041  {
1042  double u[Q1D];
1043  MFEM_UNROLL(MQ1)
1044  for (int qz = 0; qz < Q1D; ++qz)
1045  {
1046  u[qz] = 0;
1047  }
1048  MFEM_UNROLL(MQ1)
1049  for (int qx = 0; qx < Q1D; ++qx)
1050  {
1051  MFEM_UNROLL(MQ1)
1052  for (int qz = 0; qz < Q1D; ++qz)
1053  {
1054  u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1055  }
1056  }
1057  MFEM_UNROLL(MQ1)
1058  for (int qz = 0; qz < Q1D; ++qz)
1059  {
1060  QQD[qz][qy][dx] = u[qz];
1061  }
1062  }
1063  }
1064  MFEM_SYNC_THREAD;
1065  MFEM_FOREACH_THREAD(dy,y,D1D)
1066  {
1067  MFEM_FOREACH_THREAD(dx,x,D1D)
1068  {
1069  double u[Q1D];
1070  MFEM_UNROLL(MQ1)
1071  for (int qz = 0; qz < Q1D; ++qz)
1072  {
1073  u[qz] = 0;
1074  }
1075  MFEM_UNROLL(MQ1)
1076  for (int qy = 0; qy < Q1D; ++qy)
1077  {
1078  MFEM_UNROLL(MQ1)
1079  for (int qz = 0; qz < Q1D; ++qz)
1080  {
1081  u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1082  }
1083  }
1084  MFEM_UNROLL(MQ1)
1085  for (int qz = 0; qz < Q1D; ++qz)
1086  {
1087  QDD[qz][dy][dx] = u[qz];
1088  }
1089  }
1090  }
1091  MFEM_SYNC_THREAD;
1092  MFEM_FOREACH_THREAD(dy,y,D1D)
1093  {
1094  MFEM_FOREACH_THREAD(dx,x,D1D)
1095  {
1096  double u[D1D];
1097  MFEM_UNROLL(MD1)
1098  for (int dz = 0; dz < D1D; ++dz)
1099  {
1100  u[dz] = 0;
1101  }
1102  MFEM_UNROLL(MQ1)
1103  for (int qz = 0; qz < Q1D; ++qz)
1104  {
1105  MFEM_UNROLL(MD1)
1106  for (int dz = 0; dz < D1D; ++dz)
1107  {
1108  u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1109  }
1110  }
1111  MFEM_UNROLL(MD1)
1112  for (int dz = 0; dz < D1D; ++dz)
1113  {
1114  y(dx,dy,dz,e) += u[dz];
1115  }
1116  }
1117  }
1118  });
1119 }
1120 
1121 static void PAMassApply(const int dim,
1122  const int D1D,
1123  const int Q1D,
1124  const int NE,
1125  const Array<double> &B,
1126  const Array<double> &Bt,
1127  const Vector &D,
1128  const Vector &X,
1129  Vector &Y)
1130 {
1131 #ifdef MFEM_USE_OCCA
1132  if (DeviceCanUseOcca())
1133  {
1134  if (dim == 2)
1135  {
1136  return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1137  }
1138  if (dim == 3)
1139  {
1140  return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1141  }
1142  MFEM_ABORT("OCCA PA Mass Apply unknown kernel!");
1143  }
1144 #endif // MFEM_USE_OCCA
1145  const int id = (D1D << 4) | Q1D;
1146  if (dim == 2)
1147  {
1148  switch (id)
1149  {
1150  case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1151  case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1152  case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1153  case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1154  case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1155  case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1156  case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1157  case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1158  case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1159  case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1160  case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1161  case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1162  case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1163  default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1164  }
1165  }
1166  else if (dim == 3)
1167  {
1168  switch (id)
1169  {
1170  case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1171  case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1172  case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1173  case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1174  case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1175  case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1176  case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1177  case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1178  case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1179  case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1180  case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1181  case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1182  case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1183  default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1184  }
1185  }
1186  mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl;
1187  MFEM_ABORT("Unknown kernel.");
1188 }
1189 
1190 void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const
1191 {
1192 #ifdef MFEM_USE_CEED
1193  if (DeviceCanUseCeed())
1194  {
1195  const CeedScalar *x_ptr;
1196  CeedScalar *y_ptr;
1197  CeedMemType mem;
1198  CeedGetPreferredMemType(internal::ceed, &mem);
1199  if ( Device::Allows(Backend::CUDA) && mem==CEED_MEM_DEVICE )
1200  {
1201  x_ptr = x.Read();
1202  y_ptr = y.ReadWrite();
1203  }
1204  else
1205  {
1206  x_ptr = x.HostRead();
1207  y_ptr = y.HostReadWrite();
1208  mem = CEED_MEM_HOST;
1209  }
1210  CeedVectorSetArray(ceedDataPtr->u, mem, CEED_USE_POINTER,
1211  const_cast<CeedScalar*>(x_ptr));
1212  CeedVectorSetArray(ceedDataPtr->v, mem, CEED_USE_POINTER, y_ptr);
1213 
1214  CeedOperatorApplyAdd(ceedDataPtr->oper, ceedDataPtr->u, ceedDataPtr->v,
1215  CEED_REQUEST_IMMEDIATE);
1216 
1217  CeedVectorSyncArray(ceedDataPtr->v, mem);
1218  }
1219  else
1220 #endif
1221  {
1222  PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
1223  }
1224 }
1225 
1226 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:245
Abstract class for Finite Elements.
Definition: fe.hpp:232
int Size() const
Logical size of the array.
Definition: array.hpp:124
double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:353
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:103
Subclass constant coefficient.
Definition: coefficient.hpp:67
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:407
const Geometry::Type geom
Definition: ex1.cpp:40
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
int Size() const
Returns the size of the vector.
Definition: vector.hpp:157
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:162
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:693
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:85
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:180
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:134
const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:337
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Definition: intrules.hpp:248
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:763
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:349
const int MAX_Q1D
Definition: forall.hpp:36
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:295
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:273
int Dimension() const
Definition: mesh.hpp:744
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:441
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Definition: fe.cpp:370
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:333
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
Definition: mesh.cpp:338
int dim
Definition: ex24.cpp:43
const int MAX_D1D
Definition: forall.hpp:35
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement associated with i&#39;th element.
Definition: fespace.cpp:1642
double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:345
Vector data type.
Definition: vector.hpp:48
std::pair< int, int > occa_id_t
Definition: occa.hpp:78
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66