MFEM  v4.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_mass.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-443211. All Rights
3 // reserved. See file COPYRIGHT for details.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability see http://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the GNU Lesser General Public License (as published by the Free
10 // Software Foundation) version 2.1 dated February 1999.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
21 // PA Mass Integrator
22 
23 // PA Mass Assemble kernel
24 void MassIntegrator::AssemblePA(const FiniteElementSpace &fes)
25 {
26  // Assuming the same element type
27  Mesh *mesh = fes.GetMesh();
28  if (mesh->GetNE() == 0) { return; }
29  const FiniteElement &el = *fes.GetFE(0);
31  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T);
32  dim = mesh->Dimension();
33  ne = fes.GetMesh()->GetNE();
34  nq = ir->GetNPoints();
35  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::COORDINATES |
36  GeometricFactors::JACOBIANS);
37  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
38  dofs1D = maps->ndof;
39  quad1D = maps->nqpt;
40  pa_data.SetSize(ne*nq, Device::GetMemoryType());
41  ConstantCoefficient *const_coeff = dynamic_cast<ConstantCoefficient*>(Q);
42  // TODO: other types of coefficients ...
43  if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); }
44  if (dim==2)
45  {
46  double constant = 0.0;
47  if (const_coeff)
48  {
49  constant = const_coeff->constant;
50  }
51  else
52  {
53  MFEM_ABORT("Coefficient type not supported");
54  }
55  const int NE = ne;
56  const int NQ = nq;
57  auto w = ir->GetWeights().Read();
58  auto J = Reshape(geom->J.Read(), NQ,2,2,NE);
59  auto v = Reshape(pa_data.Write(), NQ, NE);
60  MFEM_FORALL(e, NE,
61  {
62  for (int q = 0; q < NQ; ++q)
63  {
64  const double J11 = J(q,0,0,e);
65  const double J12 = J(q,1,0,e);
66  const double J21 = J(q,0,1,e);
67  const double J22 = J(q,1,1,e);
68  const double detJ = (J11*J22)-(J21*J12);
69  v(q,e) = w[q] * constant * detJ;
70  }
71  });
72  }
73  if (dim==3)
74  {
75  double constant = 0.0;
76  if (const_coeff)
77  {
78  constant = const_coeff->constant;
79  }
80  else
81  {
82  MFEM_ABORT("Coefficient type not supported");
83  }
84  const int NE = ne;
85  const int NQ = nq;
86  auto W = ir->GetWeights().Read();
87  auto J = Reshape(geom->J.Read(), NQ,3,3,NE);
88  auto v = Reshape(pa_data.Write(), NQ,NE);
89  MFEM_FORALL(e, NE,
90  {
91  for (int q = 0; q < NQ; ++q)
92  {
93  const double J11 = J(q,0,0,e), J12 = J(q,0,1,e), J13 = J(q,0,2,e);
94  const double J21 = J(q,1,0,e), J22 = J(q,1,1,e), J23 = J(q,1,2,e);
95  const double J31 = J(q,2,0,e), J32 = J(q,2,1,e), J33 = J(q,2,2,e);
96  const double detJ = J11 * (J22 * J33 - J32 * J23) -
97  /* */ J21 * (J12 * J33 - J32 * J13) +
98  /* */ J31 * (J12 * J23 - J22 * J13);
99  v(q,e) = W[q] * constant * detJ;
100  }
101  });
102  }
103 }
104 
105 #ifdef MFEM_USE_OCCA
106 // OCCA PA Mass Apply 2D kernel
107 static void OccaPAMassApply2D(const int D1D,
108  const int Q1D,
109  const int NE,
110  const Array<double> &B,
111  const Array<double> &Bt,
112  const Vector &op,
113  const Vector &x,
114  Vector &y)
115 {
116  occa::properties props;
117  props["defines/D1D"] = D1D;
118  props["defines/Q1D"] = Q1D;
119  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
120  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
121  const occa::memory o_op = OccaMemoryRead(op.GetMemory(), op.Size());
122  const occa::memory o_x = OccaMemoryRead(x.GetMemory(), x.Size());
123  occa::memory o_y = OccaMemoryReadWrite(y.GetMemory(), y.Size());
124  const occa_id_t id = std::make_pair(D1D,Q1D);
125  if (!Device::Allows(Backend::OCCA_CUDA))
126  {
127  static occa_kernel_t OccaMassApply2D_cpu;
128  if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end())
129  {
130  const occa::kernel MassApply2D_CPU =
131  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
132  "MassApply2D_CPU", props);
133  OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU);
134  }
135  OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_op, o_x, o_y);
136  }
137  else
138  {
139  static occa_kernel_t OccaMassApply2D_gpu;
140  if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end())
141  {
142  const occa::kernel MassApply2D_GPU =
143  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
144  "MassApply2D_GPU", props);
145  OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU);
146  }
147  OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_op, o_x, o_y);
148  }
149 }
150 
151 // OCCA PA Mass Apply 3D kernel
152 static void OccaPAMassApply3D(const int D1D,
153  const int Q1D,
154  const int NE,
155  const Array<double> &B,
156  const Array<double> &Bt,
157  const Vector &op,
158  const Vector &x,
159  Vector &y)
160 {
161  occa::properties props;
162  props["defines/D1D"] = D1D;
163  props["defines/Q1D"] = Q1D;
164  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
165  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
166  const occa::memory o_op = OccaMemoryRead(op.GetMemory(), op.Size());
167  const occa::memory o_x = OccaMemoryRead(x.GetMemory(), x.Size());
168  occa::memory o_y = OccaMemoryReadWrite(y.GetMemory(), y.Size());
169  const occa_id_t id = std::make_pair(D1D,Q1D);
170  if (!Device::Allows(Backend::OCCA_CUDA))
171  {
172  static occa_kernel_t OccaMassApply3D_cpu;
173  if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end())
174  {
175  const occa::kernel MassApply3D_CPU =
176  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
177  "MassApply3D_CPU", props);
178  OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU);
179  }
180  OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_op, o_x, o_y);
181  }
182  else
183  {
184  static occa_kernel_t OccaMassApply3D_gpu;
185  if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end())
186  {
187  const occa::kernel MassApply3D_GPU =
188  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
189  "MassApply3D_GPU", props);
190  OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU);
191  }
192  OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_op, o_x, o_y);
193  }
194 }
195 #endif // MFEM_USE_OCCA
196 
197 template<const int T_D1D = 0,
198  const int T_Q1D = 0>
199 static void PAMassApply2D(const int NE,
200  const Array<double> &_B,
201  const Array<double> &_Bt,
202  const Vector &_op,
203  const Vector &_x,
204  Vector &_y,
205  const int d1d = 0,
206  const int q1d = 0)
207 {
208  const int D1D = T_D1D ? T_D1D : d1d;
209  const int Q1D = T_Q1D ? T_Q1D : q1d;
210  MFEM_VERIFY(D1D <= MAX_D1D, "");
211  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
212  auto B = Reshape(_B.Read(), Q1D, D1D);
213  auto Bt = Reshape(_Bt.Read(), D1D, Q1D);
214  auto op = Reshape(_op.Read(), Q1D, Q1D, NE);
215  auto x = Reshape(_x.Read(), D1D, D1D, NE);
216  auto y = Reshape(_y.ReadWrite(), D1D, D1D, NE);
217  MFEM_FORALL(e, NE,
218  {
219  const int D1D = T_D1D ? T_D1D : d1d; // nvcc workaround
220  const int Q1D = T_Q1D ? T_Q1D : q1d;
221  // the following variables are evaluated at compile time
222  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
223  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
224  double sol_xy[max_Q1D][max_Q1D];
225  for (int qy = 0; qy < Q1D; ++qy)
226  {
227  for (int qx = 0; qx < Q1D; ++qx)
228  {
229  sol_xy[qy][qx] = 0.0;
230  }
231  }
232  for (int dy = 0; dy < D1D; ++dy)
233  {
234  double sol_x[max_Q1D];
235  for (int qy = 0; qy < Q1D; ++qy)
236  {
237  sol_x[qy] = 0.0;
238  }
239  for (int dx = 0; dx < D1D; ++dx)
240  {
241  const double s = x(dx,dy,e);
242  for (int qx = 0; qx < Q1D; ++qx)
243  {
244  sol_x[qx] += B(qx,dx)* s;
245  }
246  }
247  for (int qy = 0; qy < Q1D; ++qy)
248  {
249  const double d2q = B(qy,dy);
250  for (int qx = 0; qx < Q1D; ++qx)
251  {
252  sol_xy[qy][qx] += d2q * sol_x[qx];
253  }
254  }
255  }
256  for (int qy = 0; qy < Q1D; ++qy)
257  {
258  for (int qx = 0; qx < Q1D; ++qx)
259  {
260  sol_xy[qy][qx] *= op(qx,qy,e);
261  }
262  }
263  for (int qy = 0; qy < Q1D; ++qy)
264  {
265  double sol_x[max_D1D];
266  for (int dx = 0; dx < D1D; ++dx)
267  {
268  sol_x[dx] = 0.0;
269  }
270  for (int qx = 0; qx < Q1D; ++qx)
271  {
272  const double s = sol_xy[qy][qx];
273  for (int dx = 0; dx < D1D; ++dx)
274  {
275  sol_x[dx] += Bt(dx,qx) * s;
276  }
277  }
278  for (int dy = 0; dy < D1D; ++dy)
279  {
280  const double q2d = Bt(dy,qy);
281  for (int dx = 0; dx < D1D; ++dx)
282  {
283  y(dx,dy,e) += q2d * sol_x[dx];
284  }
285  }
286  }
287  });
288 }
289 
290 template<const int T_D1D = 0,
291  const int T_Q1D = 0,
292  const int T_NBZ = 0>
293 static void SmemPAMassApply2D(const int NE,
294  const Array<double> &_b,
295  const Array<double> &_bt,
296  const Vector &_op,
297  const Vector &_x,
298  Vector &_y,
299  const int d1d = 0,
300  const int q1d = 0)
301 {
302  const int D1D = T_D1D ? T_D1D : d1d;
303  const int Q1D = T_Q1D ? T_Q1D : q1d;
304  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
305  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
306  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
307  MFEM_VERIFY(D1D <= MD1, "");
308  MFEM_VERIFY(Q1D <= MQ1, "");
309  auto b = Reshape(_b.Read(), Q1D, D1D);
310  auto op = Reshape(_op.Read(), Q1D, Q1D, NE);
311  auto x = Reshape(_x.Read(), D1D, D1D, NE);
312  auto y = Reshape(_y.ReadWrite(), D1D, D1D, NE);
313  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
314  {
315  const int tidz = MFEM_THREAD_ID(z);
316  const int D1D = T_D1D ? T_D1D : d1d;
317  const int Q1D = T_Q1D ? T_Q1D : q1d;
318  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
319  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
320  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
321  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
322  MFEM_SHARED double BBt[MQ1*MD1];
323  double (*B)[MD1] = (double (*)[MD1]) BBt;
324  double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
325  MFEM_SHARED double sm0[NBZ][MDQ*MDQ];
326  MFEM_SHARED double sm1[NBZ][MDQ*MDQ];
327  double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
328  double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
329  double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
330  double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
331  MFEM_FOREACH_THREAD(dy,y,D1D)
332  {
333  MFEM_FOREACH_THREAD(dx,x,D1D)
334  {
335  X[dy][dx] = x(dx,dy,e);
336  }
337  }
338  if (tidz == 0)
339  {
340  MFEM_FOREACH_THREAD(d,y,D1D)
341  {
342  MFEM_FOREACH_THREAD(q,x,Q1D)
343  {
344  B[q][d] = b(q,d);
345  }
346  }
347  }
348  MFEM_SYNC_THREAD;
349  MFEM_FOREACH_THREAD(dy,y,D1D)
350  {
351  MFEM_FOREACH_THREAD(qx,x,Q1D)
352  {
353  double dq = 0.0;
354  for (int dx = 0; dx < D1D; ++dx)
355  {
356  dq += X[dy][dx] * B[qx][dx];
357  }
358  DQ[dy][qx] = dq;
359  }
360  }
361  MFEM_SYNC_THREAD;
362  MFEM_FOREACH_THREAD(qy,y,Q1D)
363  {
364  MFEM_FOREACH_THREAD(qx,x,Q1D)
365  {
366  double qq = 0.0;
367  for (int dy = 0; dy < D1D; ++dy)
368  {
369  qq += DQ[dy][qx] * B[qy][dy];
370  }
371  QQ[qy][qx] = qq * op(qx, qy, e);
372  }
373  }
374  MFEM_SYNC_THREAD;
375  if (tidz == 0)
376  {
377  MFEM_FOREACH_THREAD(d,y,D1D)
378  {
379  MFEM_FOREACH_THREAD(q,x,Q1D)
380  {
381  Bt[d][q] = b(q,d);
382  }
383  }
384  }
385  MFEM_SYNC_THREAD;
386  MFEM_FOREACH_THREAD(qy,y,Q1D)
387  {
388  MFEM_FOREACH_THREAD(dx,x,D1D)
389  {
390  double dq = 0.0;
391  for (int qx = 0; qx < Q1D; ++qx)
392  {
393  dq += QQ[qy][qx] * Bt[dx][qx];
394  }
395  QD[qy][dx] = dq;
396  }
397  }
398  MFEM_SYNC_THREAD;
399  MFEM_FOREACH_THREAD(dy,y,D1D)
400  {
401  MFEM_FOREACH_THREAD(dx,x,D1D)
402  {
403  double dd = 0.0;
404  for (int qy = 0; qy < Q1D; ++qy)
405  {
406  dd += (QD[qy][dx] * Bt[dy][qy]);
407  }
408  y(dx, dy, e) += dd;
409  }
410  }
411  });
412 }
413 
414 template<const int T_D1D = 0,
415  const int T_Q1D = 0>
416 static void PAMassApply3D(const int NE,
417  const Array<double> &_B,
418  const Array<double> &_Bt,
419  const Vector &_op,
420  const Vector &_x,
421  Vector &_y,
422  const int d1d = 0,
423  const int q1d = 0)
424 {
425  const int D1D = T_D1D ? T_D1D : d1d;
426  const int Q1D = T_Q1D ? T_Q1D : q1d;
427  MFEM_VERIFY(D1D <= MAX_D1D, "");
428  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
429  auto B = Reshape(_B.Read(), Q1D, D1D);
430  auto Bt = Reshape(_Bt.Read(), D1D, Q1D);
431  auto op = Reshape(_op.Read(), Q1D, Q1D, Q1D, NE);
432  auto x = Reshape(_x.Read(), D1D, D1D, D1D, NE);
433  auto y = Reshape(_y.ReadWrite(), D1D, D1D, D1D, NE);
434  MFEM_FORALL(e, NE,
435  {
436  const int D1D = T_D1D ? T_D1D : d1d;
437  const int Q1D = T_Q1D ? T_Q1D : q1d;
438  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
439  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
440  double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
441  for (int qz = 0; qz < Q1D; ++qz)
442  {
443  for (int qy = 0; qy < Q1D; ++qy)
444  {
445  for (int qx = 0; qx < Q1D; ++qx)
446  {
447  sol_xyz[qz][qy][qx] = 0.0;
448  }
449  }
450  }
451  for (int dz = 0; dz < D1D; ++dz)
452  {
453  double sol_xy[max_Q1D][max_Q1D];
454  for (int qy = 0; qy < Q1D; ++qy)
455  {
456  for (int qx = 0; qx < Q1D; ++qx)
457  {
458  sol_xy[qy][qx] = 0.0;
459  }
460  }
461  for (int dy = 0; dy < D1D; ++dy)
462  {
463  double sol_x[max_Q1D];
464  for (int qx = 0; qx < Q1D; ++qx)
465  {
466  sol_x[qx] = 0;
467  }
468  for (int dx = 0; dx < D1D; ++dx)
469  {
470  const double s = x(dx,dy,dz,e);
471  for (int qx = 0; qx < Q1D; ++qx)
472  {
473  sol_x[qx] += B(qx,dx) * s;
474  }
475  }
476  for (int qy = 0; qy < Q1D; ++qy)
477  {
478  const double wy = B(qy,dy);
479  for (int qx = 0; qx < Q1D; ++qx)
480  {
481  sol_xy[qy][qx] += wy * sol_x[qx];
482  }
483  }
484  }
485  for (int qz = 0; qz < Q1D; ++qz)
486  {
487  const double wz = B(qz,dz);
488  for (int qy = 0; qy < Q1D; ++qy)
489  {
490  for (int qx = 0; qx < Q1D; ++qx)
491  {
492  sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
493  }
494  }
495  }
496  }
497  for (int qz = 0; qz < Q1D; ++qz)
498  {
499  for (int qy = 0; qy < Q1D; ++qy)
500  {
501  for (int qx = 0; qx < Q1D; ++qx)
502  {
503  sol_xyz[qz][qy][qx] *= op(qx,qy,qz,e);
504  }
505  }
506  }
507  for (int qz = 0; qz < Q1D; ++qz)
508  {
509  double sol_xy[max_D1D][max_D1D];
510  for (int dy = 0; dy < D1D; ++dy)
511  {
512  for (int dx = 0; dx < D1D; ++dx)
513  {
514  sol_xy[dy][dx] = 0;
515  }
516  }
517  for (int qy = 0; qy < Q1D; ++qy)
518  {
519  double sol_x[max_D1D];
520  for (int dx = 0; dx < D1D; ++dx)
521  {
522  sol_x[dx] = 0;
523  }
524  for (int qx = 0; qx < Q1D; ++qx)
525  {
526  const double s = sol_xyz[qz][qy][qx];
527  for (int dx = 0; dx < D1D; ++dx)
528  {
529  sol_x[dx] += Bt(dx,qx) * s;
530  }
531  }
532  for (int dy = 0; dy < D1D; ++dy)
533  {
534  const double wy = Bt(dy,qy);
535  for (int dx = 0; dx < D1D; ++dx)
536  {
537  sol_xy[dy][dx] += wy * sol_x[dx];
538  }
539  }
540  }
541  for (int dz = 0; dz < D1D; ++dz)
542  {
543  const double wz = Bt(dz,qz);
544  for (int dy = 0; dy < D1D; ++dy)
545  {
546  for (int dx = 0; dx < D1D; ++dx)
547  {
548  y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
549  }
550  }
551  }
552  }
553  });
554 }
555 
556 template<const int T_D1D = 0,
557  const int T_Q1D = 0>
558 static void SmemPAMassApply3D(const int NE,
559  const Array<double> &_b,
560  const Array<double> &_bt,
561  const Vector &_op,
562  const Vector &_x,
563  Vector &_y,
564  const int d1d = 0,
565  const int q1d = 0)
566 {
567  const int D1D = T_D1D ? T_D1D : d1d;
568  const int Q1D = T_Q1D ? T_Q1D : q1d;
569  constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D;
570  constexpr int M1D = T_D1D ? T_D1D : MAX_D1D;
571  MFEM_VERIFY(D1D <= M1D, "");
572  MFEM_VERIFY(Q1D <= M1Q, "");
573  auto b = Reshape(_b.Read(), Q1D, D1D);
574  auto op = Reshape(_op.Read(), Q1D, Q1D, Q1D, NE);
575  auto x = Reshape(_x.Read(), D1D, D1D, D1D, NE);
576  auto y = Reshape(_y.ReadWrite(), D1D, D1D, D1D, NE);
577  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
578  {
579  const int tidz = MFEM_THREAD_ID(z);
580  const int D1D = T_D1D ? T_D1D : d1d;
581  const int Q1D = T_Q1D ? T_Q1D : q1d;
582  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
583  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
584  constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
585  MFEM_SHARED double sDQ[MQ1*MD1];
586  double (*B)[MD1] = (double (*)[MD1]) sDQ;
587  double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
588  MFEM_SHARED double sm0[MDQ*MDQ*MDQ];
589  MFEM_SHARED double sm1[MDQ*MDQ*MDQ];
590  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
591  double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
592  double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
593  double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
594  double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
595  double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
596  MFEM_FOREACH_THREAD(dz,z,D1D)
597  {
598  MFEM_FOREACH_THREAD(dy,y,D1D)
599  {
600  MFEM_FOREACH_THREAD(dx,x,D1D)
601  {
602  X[dz][dy][dx] = x(dx,dy,dz,e);
603  }
604  }
605  }
606  if (tidz == 0)
607  {
608  MFEM_FOREACH_THREAD(d,y,D1D)
609  {
610  MFEM_FOREACH_THREAD(q,x,Q1D)
611  {
612  B[q][d] = b(q,d);
613  }
614  }
615  }
616  MFEM_SYNC_THREAD;
617  MFEM_FOREACH_THREAD(dz,z,D1D)
618  {
619  MFEM_FOREACH_THREAD(dy,y,D1D)
620  {
621  MFEM_FOREACH_THREAD(qx,x,Q1D)
622  {
623  double u = 0.0;
624  for (int dx = 0; dx < D1D; ++dx)
625  {
626  u += X[dz][dy][dx] * B[qx][dx];
627  }
628  DDQ[dz][dy][qx] = u;
629  }
630  }
631  }
632  MFEM_SYNC_THREAD;
633  MFEM_FOREACH_THREAD(dz,z,D1D)
634  {
635  MFEM_FOREACH_THREAD(qy,y,Q1D)
636  {
637  MFEM_FOREACH_THREAD(qx,x,Q1D)
638  {
639  double u = 0.0;
640  for (int dy = 0; dy < D1D; ++dy)
641  {
642  u += DDQ[dz][dy][qx] * B[qy][dy];
643  }
644  DQQ[dz][qy][qx] = u;
645  }
646  }
647  }
648  MFEM_SYNC_THREAD;
649  MFEM_FOREACH_THREAD(qz,z,Q1D)
650  {
651  MFEM_FOREACH_THREAD(qy,y,Q1D)
652  {
653  MFEM_FOREACH_THREAD(qx,x,Q1D)
654  {
655  double u = 0.0;
656  for (int dz = 0; dz < D1D; ++dz)
657  {
658  u += DQQ[dz][qy][qx] * B[qz][dz];
659  }
660  QQQ[qz][qy][qx] = u * op(qx,qy,qz,e);
661  }
662  }
663  }
664  MFEM_SYNC_THREAD;
665  if (tidz == 0)
666  {
667  MFEM_FOREACH_THREAD(d,y,D1D)
668  {
669  MFEM_FOREACH_THREAD(q,x,Q1D)
670  {
671  Bt[d][q] = b(q,d);
672  }
673  }
674  }
675  MFEM_SYNC_THREAD;
676  MFEM_FOREACH_THREAD(qz,z,Q1D)
677  {
678  MFEM_FOREACH_THREAD(qy,y,Q1D)
679  {
680  MFEM_FOREACH_THREAD(dx,x,D1D)
681  {
682  double u = 0.0;
683  for (int qx = 0; qx < Q1D; ++qx)
684  {
685  u += QQQ[qz][qy][qx] * Bt[dx][qx];
686  }
687  QQD[qz][qy][dx] = u;
688  }
689  }
690  }
691  MFEM_SYNC_THREAD;
692  MFEM_FOREACH_THREAD(qz,z,Q1D)
693  {
694  MFEM_FOREACH_THREAD(dy,y,D1D)
695  {
696  MFEM_FOREACH_THREAD(dx,x,D1D)
697  {
698  double u = 0.0;
699  for (int qy = 0; qy < Q1D; ++qy)
700  {
701  u += QQD[qz][qy][dx] * Bt[dy][qy];
702  }
703  QDD[qz][dy][dx] = u;
704  }
705  }
706  }
707  MFEM_SYNC_THREAD;
708  MFEM_FOREACH_THREAD(dz,z,D1D)
709  {
710  MFEM_FOREACH_THREAD(dy,y,D1D)
711  {
712  MFEM_FOREACH_THREAD(dx,x,D1D)
713  {
714  double u = 0.0;
715  for (int qz = 0; qz < Q1D; ++qz)
716  {
717  u += QDD[qz][dy][dx] * Bt[dz][qz];
718  }
719  y(dx,dy,dz,e) += u;
720  }
721  }
722  }
723  });
724 }
725 
726 static void PAMassApply(const int dim,
727  const int D1D,
728  const int Q1D,
729  const int NE,
730  const Array<double> &B,
731  const Array<double> &Bt,
732  const Vector &op,
733  const Vector &x,
734  Vector &y)
735 {
736 #ifdef MFEM_USE_OCCA
737  if (DeviceCanUseOcca())
738  {
739  if (dim == 2)
740  {
741  OccaPAMassApply2D(D1D, Q1D, NE, B, Bt, op, x, y);
742  return;
743  }
744  if (dim == 3)
745  {
746  OccaPAMassApply3D(D1D, Q1D, NE, B, Bt, op, x, y);
747  return;
748  }
749  MFEM_ABORT("OCCA PA Mass Apply unknown kernel!");
750  }
751 #endif // MFEM_USE_OCCA
752 
753  if (Device::Allows(Backend::RAJA_CUDA))
754  {
755  if (dim == 2)
756  {
757  switch ((D1D << 4 ) | Q1D)
758  {
759  case 0x22: return PAMassApply2D<2,2>(NE, B, Bt, op, x, y);
760  case 0x33: return PAMassApply2D<3,3>(NE, B, Bt, op, x, y);
761  case 0x44: return PAMassApply2D<4,4>(NE, B, Bt, op, x, y);
762  case 0x55: return PAMassApply2D<5,5>(NE, B, Bt, op, x, y);
763  case 0x66: return PAMassApply2D<6,6>(NE, B, Bt, op, x, y);
764  case 0x77: return PAMassApply2D<7,7>(NE, B, Bt, op, x, y);
765  case 0x88: return PAMassApply2D<8,8>(NE, B, Bt, op, x, y);
766  case 0x99: return PAMassApply2D<9,9>(NE, B, Bt, op, x, y);
767  default: return PAMassApply2D(NE, B, Bt, op, x, y, D1D, Q1D);
768  }
769  }
770  if (dim == 3)
771  {
772  switch ((D1D << 4 ) | Q1D)
773  {
774  case 0x23: return PAMassApply3D<2,3>(NE, B, Bt, op, x, y);
775  case 0x34: return PAMassApply3D<3,4>(NE, B, Bt, op, x, y);
776  case 0x45: return PAMassApply3D<4,5>(NE, B, Bt, op, x, y);
777  case 0x56: return PAMassApply3D<5,6>(NE, B, Bt, op, x, y);
778  case 0x67: return PAMassApply3D<6,7>(NE, B, Bt, op, x, y);
779  case 0x78: return PAMassApply3D<7,8>(NE, B, Bt, op, x, y);
780  case 0x89: return PAMassApply3D<8,9>(NE, B, Bt, op, x, y);
781  default: return PAMassApply3D(NE, B, Bt, op, x, y, D1D, Q1D);
782  }
783  }
784  }
785  else if (dim == 2)
786  {
787  switch ((D1D << 4 ) | Q1D)
788  {
789  case 0x22: return SmemPAMassApply2D<2,2,16>(NE, B, Bt, op, x, y);
790  case 0x33: return SmemPAMassApply2D<3,3,16>(NE, B, Bt, op, x, y);
791  case 0x44: return SmemPAMassApply2D<4,4,8>(NE, B, Bt, op, x, y);
792  case 0x55: return SmemPAMassApply2D<5,5,8>(NE, B, Bt, op, x, y);
793  case 0x66: return SmemPAMassApply2D<6,6,4>(NE, B, Bt, op, x, y);
794  case 0x77: return SmemPAMassApply2D<7,7,4>(NE, B, Bt, op, x, y);
795  case 0x88: return SmemPAMassApply2D<8,8,2>(NE, B, Bt, op, x, y);
796  case 0x99: return SmemPAMassApply2D<9,9,2>(NE, B, Bt, op, x, y);
797  default: return PAMassApply2D(NE, B, Bt, op, x, y, D1D, Q1D);
798  }
799  }
800  else if (dim == 3)
801  {
802  switch ((D1D << 4 ) | Q1D)
803  {
804  case 0x23: return SmemPAMassApply3D<2,3>(NE, B, Bt, op, x, y);
805  case 0x34: return SmemPAMassApply3D<3,4>(NE, B, Bt, op, x, y);
806  case 0x45: return SmemPAMassApply3D<4,5>(NE, B, Bt, op, x, y);
807  case 0x56: return SmemPAMassApply3D<5,6>(NE, B, Bt, op, x, y);
808  case 0x67: return SmemPAMassApply3D<6,7>(NE, B, Bt, op, x, y);
809  case 0x78: return SmemPAMassApply3D<7,8>(NE, B, Bt, op, x, y);
810  case 0x89: return SmemPAMassApply3D<8,9>(NE, B, Bt, op, x, y);
811  default: return PAMassApply3D(NE, B, Bt, op, x, y, D1D, Q1D);
812  }
813  }
814  MFEM_ABORT("Unknown kernel.");
815 }
816 
817 void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const
818 {
819  PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
820 }
821 
822 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:237
Abstract class for Finite Elements.
Definition: fe.hpp:229
int Size() const
Logical size of the array.
Definition: array.hpp:118
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:85
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:97
Subclass constant coefficient.
Definition: coefficient.hpp:67
const Geometry::Type geom
Definition: ex1.cpp:40
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
int Size() const
Returns the size of the vector.
Definition: vector.hpp:150
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:159
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:676
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:81
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:173
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:135
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:756
int dim
Definition: ex3.cpp:48
const int MAX_Q1D
Definition: forall.hpp:35
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:272
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:261
int Dimension() const
Definition: mesh.hpp:713
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:85
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Definition: fe.cpp:206
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
Definition: mesh.cpp:336
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:23
const int MAX_D1D
Definition: forall.hpp:34
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement associated with i&#39;th element.
Definition: fespace.cpp:1541
Vector data type.
Definition: vector.hpp:48
std::pair< int, int > occa_id_t
Definition: occa.hpp:78