MFEM  v4.1.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_diffusion.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "libceed/diffusion.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 // PA Diffusion Integrator
23 
24 // OCCA 2D Assemble kernel
25 #ifdef MFEM_USE_OCCA
26 static void OccaPADiffusionSetup2D(const int D1D,
27  const int Q1D,
28  const int NE,
29  const Array<double> &W,
30  const Vector &J,
31  const Vector &C,
32  Vector &op)
33 {
34  occa::properties props;
35  props["defines/D1D"] = D1D;
36  props["defines/Q1D"] = Q1D;
37  const occa::memory o_W = OccaMemoryRead(W.GetMemory(), W.Size());
38  const occa::memory o_J = OccaMemoryRead(J.GetMemory(), J.Size());
39  const occa::memory o_C = OccaMemoryRead(C.GetMemory(), C.Size());
40  occa::memory o_op = OccaMemoryWrite(op.GetMemory(), op.Size());
41  const bool const_c = C.Size() == 1;
42  const occa_id_t id = std::make_pair(D1D,Q1D);
43  static occa_kernel_t OccaDiffSetup2D_ker;
44  if (OccaDiffSetup2D_ker.find(id) == OccaDiffSetup2D_ker.end())
45  {
46  const occa::kernel DiffusionSetup2D =
47  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
48  "DiffusionSetup2D", props);
49  OccaDiffSetup2D_ker.emplace(id, DiffusionSetup2D);
50  }
51  OccaDiffSetup2D_ker.at(id)(NE, o_W, o_J, o_C, o_op, const_c);
52 }
53 
54 static void OccaPADiffusionSetup3D(const int D1D,
55  const int Q1D,
56  const int NE,
57  const Array<double> &W,
58  const Vector &J,
59  const Vector &C,
60  Vector &op)
61 {
62  occa::properties props;
63  props["defines/D1D"] = D1D;
64  props["defines/Q1D"] = Q1D;
65  const occa::memory o_W = OccaMemoryRead(W.GetMemory(), W.Size());
66  const occa::memory o_J = OccaMemoryRead(J.GetMemory(), J.Size());
67  const occa::memory o_C = OccaMemoryRead(C.GetMemory(), C.Size());
68  occa::memory o_op = OccaMemoryWrite(op.GetMemory(), op.Size());
69  const bool const_c = C.Size() == 1;
70  const occa_id_t id = std::make_pair(D1D,Q1D);
71  static occa_kernel_t OccaDiffSetup3D_ker;
72  if (OccaDiffSetup3D_ker.find(id) == OccaDiffSetup3D_ker.end())
73  {
74  const occa::kernel DiffusionSetup3D =
75  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
76  "DiffusionSetup3D", props);
77  OccaDiffSetup3D_ker.emplace(id, DiffusionSetup3D);
78  }
79  OccaDiffSetup3D_ker.at(id)(NE, o_W, o_J, o_C, o_op, const_c);
80 }
81 #endif // MFEM_USE_OCCA
82 
83 // PA Diffusion Assemble 2D kernel
84 static void PADiffusionSetup2D(const int Q1D,
85  const int NE,
86  const Array<double> &w,
87  const Vector &j,
88  const Vector &c,
89  Vector &d)
90 {
91  const int NQ = Q1D*Q1D;
92  const bool const_c = c.Size() == 1;
93  auto W = w.Read();
94  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
95  auto C = const_c ? Reshape(c.Read(), 1, 1) : Reshape(c.Read(), NQ, NE);
96  auto D = Reshape(d.Write(), NQ, 3, NE);
97 
98  MFEM_FORALL(e, NE,
99  {
100  for (int q = 0; q < NQ; ++q)
101  {
102  const double J11 = J(q,0,0,e);
103  const double J21 = J(q,1,0,e);
104  const double J12 = J(q,0,1,e);
105  const double J22 = J(q,1,1,e);
106  const double coeff = const_c ? C(0,0) : C(q,e);
107  const double c_detJ = W[q] * coeff / ((J11*J22)-(J21*J12));
108  D(q,0,e) = c_detJ * (J12*J12 + J22*J22); // 1,1
109  D(q,1,e) = -c_detJ * (J12*J11 + J22*J21); // 1,2
110  D(q,2,e) = c_detJ * (J11*J11 + J21*J21); // 2,2
111  }
112  });
113 }
114 
115 // PA Diffusion Assemble 3D kernel
116 static void PADiffusionSetup3D(const int Q1D,
117  const int NE,
118  const Array<double> &w,
119  const Vector &j,
120  const Vector &c,
121  Vector &d)
122 {
123  const int NQ = Q1D*Q1D*Q1D;
124  const bool const_c = c.Size() == 1;
125  auto W = w.Read();
126  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
127  auto C = const_c ? Reshape(c.Read(), 1, 1) : Reshape(c.Read(), NQ, NE);
128  auto D = Reshape(d.Write(), NQ, 6, NE);
129  MFEM_FORALL(e, NE,
130  {
131  for (int q = 0; q < NQ; ++q)
132  {
133  const double J11 = J(q,0,0,e);
134  const double J21 = J(q,1,0,e);
135  const double J31 = J(q,2,0,e);
136  const double J12 = J(q,0,1,e);
137  const double J22 = J(q,1,1,e);
138  const double J32 = J(q,2,1,e);
139  const double J13 = J(q,0,2,e);
140  const double J23 = J(q,1,2,e);
141  const double J33 = J(q,2,2,e);
142  const double detJ = J11 * (J22 * J33 - J32 * J23) -
143  /* */ J21 * (J12 * J33 - J32 * J13) +
144  /* */ J31 * (J12 * J23 - J22 * J13);
145  const double coeff = const_c ? C(0,0) : C(q,e);
146  const double c_detJ = W[q] * coeff / detJ;
147  // adj(J)
148  const double A11 = (J22 * J33) - (J23 * J32);
149  const double A12 = (J32 * J13) - (J12 * J33);
150  const double A13 = (J12 * J23) - (J22 * J13);
151  const double A21 = (J31 * J23) - (J21 * J33);
152  const double A22 = (J11 * J33) - (J13 * J31);
153  const double A23 = (J21 * J13) - (J11 * J23);
154  const double A31 = (J21 * J32) - (J31 * J22);
155  const double A32 = (J31 * J12) - (J11 * J32);
156  const double A33 = (J11 * J22) - (J12 * J21);
157  // detJ J^{-1} J^{-T} = (1/detJ) adj(J) adj(J)^T
158  D(q,0,e) = c_detJ * (A11*A11 + A12*A12 + A13*A13); // 1,1
159  D(q,1,e) = c_detJ * (A11*A21 + A12*A22 + A13*A23); // 2,1
160  D(q,2,e) = c_detJ * (A11*A31 + A12*A32 + A13*A33); // 3,1
161  D(q,3,e) = c_detJ * (A21*A21 + A22*A22 + A23*A23); // 2,2
162  D(q,4,e) = c_detJ * (A21*A31 + A22*A32 + A23*A33); // 3,2
163  D(q,5,e) = c_detJ * (A31*A31 + A32*A32 + A33*A33); // 3,3
164  }
165  });
166 }
167 
168 static void PADiffusionSetup(const int dim,
169  const int D1D,
170  const int Q1D,
171  const int NE,
172  const Array<double> &W,
173  const Vector &J,
174  const Vector &C,
175  Vector &D)
176 {
177  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADiffusionSetup"); }
178  if (dim == 2)
179  {
180 #ifdef MFEM_USE_OCCA
181  if (DeviceCanUseOcca())
182  {
183  OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
184  return;
185  }
186 #endif // MFEM_USE_OCCA
187  PADiffusionSetup2D(Q1D, NE, W, J, C, D);
188  }
189  if (dim == 3)
190  {
191 #ifdef MFEM_USE_OCCA
192  if (DeviceCanUseOcca())
193  {
194  OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
195  return;
196  }
197 #endif // MFEM_USE_OCCA
198  PADiffusionSetup3D(Q1D, NE, W, J, C, D);
199  }
200 }
201 
202 void DiffusionIntegrator::SetupPA(const FiniteElementSpace &fes,
203  const bool force)
204 {
205  // Assuming the same element type
206  fespace = &fes;
207  Mesh *mesh = fes.GetMesh();
208  if (mesh->GetNE() == 0) { return; }
209  const FiniteElement &el = *fes.GetFE(0);
210  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el);
211 #ifdef MFEM_USE_CEED
212  if (DeviceCanUseCeed() && !force)
213  {
214  if (ceedDataPtr) { delete ceedDataPtr; }
215  CeedData* ptr = new CeedData();
216  ceedDataPtr = ptr;
217  InitCeedCoeff(Q, ptr);
218  return CeedPADiffusionAssemble(fes, *ir, *ptr);
219  }
220 #endif
221  const int dims = el.GetDim();
222  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
223  const int nq = ir->GetNPoints();
224  dim = mesh->Dimension();
225  ne = fes.GetNE();
226  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
227  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
228  dofs1D = maps->ndof;
229  quad1D = maps->nqpt;
230  pa_data.SetSize(symmDims * nq * ne, Device::GetDeviceMemoryType());
231  Vector coeff;
232  if (Q == nullptr)
233  {
234  coeff.SetSize(1);
235  coeff(0) = 1.0;
236  }
237  else if (ConstantCoefficient* cQ = dynamic_cast<ConstantCoefficient*>(Q))
238  {
239  coeff.SetSize(1);
240  coeff(0) = cQ->constant;
241  }
242  else
243  {
244  coeff.SetSize(nq * ne);
245  auto C = Reshape(coeff.HostWrite(), nq, ne);
246  for (int e = 0; e < ne; ++e)
247  {
249  for (int q = 0; q < nq; ++q)
250  {
251  C(q,e) = Q->Eval(T, ir->IntPoint(q));
252  }
253  }
254  }
255  PADiffusionSetup(dim, dofs1D, quad1D, ne, ir->GetWeights(), geom->J, coeff,
256  pa_data);
257 }
258 
259 void DiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes)
260 {
261  SetupPA(fes);
262 }
263 
264 
265 template<int T_D1D = 0, int T_Q1D = 0>
266 static void PADiffusionDiagonal2D(const int NE,
267  const Array<double> &b,
268  const Array<double> &g,
269  const Vector &d,
270  Vector &y,
271  const int d1d = 0,
272  const int q1d = 0)
273 {
274  const int D1D = T_D1D ? T_D1D : d1d;
275  const int Q1D = T_Q1D ? T_Q1D : q1d;
276  MFEM_VERIFY(D1D <= MAX_D1D, "");
277  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
278  auto B = Reshape(b.Read(), Q1D, D1D);
279  auto G = Reshape(g.Read(), Q1D, D1D);
280  // note the different shape for D, this is a (symmetric) matrix so we only
281  // store necessary entries
282  auto D = Reshape(d.Read(), Q1D*Q1D, 3, NE);
283  auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
284  MFEM_FORALL(e, NE,
285  {
286  const int D1D = T_D1D ? T_D1D : d1d;
287  const int Q1D = T_Q1D ? T_Q1D : q1d;
288  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
289  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
290  // gradphi \cdot Q \gradphi has four terms
291  double QD0[MQ1][MD1];
292  double QD1[MQ1][MD1];
293  double QD2[MQ1][MD1];
294  for (int qx = 0; qx < Q1D; ++qx)
295  {
296  for (int dy = 0; dy < D1D; ++dy)
297  {
298  QD0[qx][dy] = 0.0;
299  QD1[qx][dy] = 0.0;
300  QD2[qx][dy] = 0.0;
301  for (int qy = 0; qy < Q1D; ++qy)
302  {
303  const int q = qx + qy * Q1D;
304  const double D0 = D(q,0,e);
305  const double D1 = D(q,1,e);
306  const double D2 = D(q,2,e);
307  QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D0;
308  QD1[qx][dy] += B(qy, dy) * G(qy, dy) * D1;
309  QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D2;
310  }
311  }
312  }
313  for (int dy = 0; dy < D1D; ++dy)
314  {
315  for (int dx = 0; dx < D1D; ++dx)
316  {
317  for (int qx = 0; qx < Q1D; ++qx)
318  {
319  Y(dx,dy,e) += G(qx, dx) * G(qx, dx) * QD0[qx][dy];
320  Y(dx,dy,e) += G(qx, dx) * B(qx, dx) * QD1[qx][dy];
321  Y(dx,dy,e) += B(qx, dx) * G(qx, dx) * QD1[qx][dy];
322  Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD2[qx][dy];
323  }
324  }
325  }
326  });
327 }
328 
329 // Shared memory PA Diffusion Diagonal 2D kernel
330 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
331 static void SmemPADiffusionDiagonal2D(const int NE,
332  const Array<double> &b_,
333  const Array<double> &g_,
334  const Vector &d_,
335  Vector &y_,
336  const int d1d = 0,
337  const int q1d = 0)
338 {
339  const int D1D = T_D1D ? T_D1D : d1d;
340  const int Q1D = T_Q1D ? T_Q1D : q1d;
341  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
342  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
343  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
344  MFEM_VERIFY(D1D <= MD1, "");
345  MFEM_VERIFY(Q1D <= MQ1, "");
346  auto b = Reshape(b_.Read(), Q1D, D1D);
347  auto g = Reshape(g_.Read(), Q1D, D1D);
348  auto D = Reshape(d_.Read(), Q1D*Q1D, 3, NE);
349  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
350  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
351  {
352  const int tidz = MFEM_THREAD_ID(z);
353  const int D1D = T_D1D ? T_D1D : d1d;
354  const int Q1D = T_Q1D ? T_Q1D : q1d;
355  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
356  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
357  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
358  MFEM_SHARED double BG[2][MQ1*MD1];
359  double (*B)[MD1] = (double (*)[MD1]) (BG+0);
360  double (*G)[MD1] = (double (*)[MD1]) (BG+1);
361  MFEM_SHARED double QD[4][NBZ][MD1][MQ1];
362  double (*QD0)[MD1] = (double (*)[MD1])(QD[0] + tidz);
363  double (*QD1)[MD1] = (double (*)[MD1])(QD[1] + tidz);
364  double (*QD2)[MD1] = (double (*)[MD1])(QD[3] + tidz);
365  if (tidz == 0)
366  {
367  MFEM_FOREACH_THREAD(d,y,D1D)
368  {
369  MFEM_FOREACH_THREAD(q,x,Q1D)
370  {
371  B[q][d] = b(q,d);
372  G[q][d] = g(q,d);
373  }
374  }
375  }
376  MFEM_SYNC_THREAD;
377  MFEM_FOREACH_THREAD(qx,x,Q1D)
378  {
379  MFEM_FOREACH_THREAD(dy,y,D1D)
380  {
381  QD0[qx][dy] = 0.0;
382  QD1[qx][dy] = 0.0;
383  QD2[qx][dy] = 0.0;
384  for (int qy = 0; qy < Q1D; ++qy)
385  {
386  const int q = qx + qy * Q1D;
387  const double D0 = D(q,0,e);
388  const double D1 = D(q,1,e);
389  const double D2 = D(q,2,e);
390  const double By = B[qy][dy];
391  const double Gy = G[qy][dy];
392  const double BB = By * By;
393  const double BG = By * Gy;
394  const double GG = Gy * Gy;
395  QD0[qx][dy] += BB * D0;
396  QD1[qx][dy] += BG * D1;
397  QD2[qx][dy] += GG * D2;
398  }
399  }
400  }
401  MFEM_SYNC_THREAD;
402  MFEM_FOREACH_THREAD(dy,y,D1D)
403  {
404  MFEM_FOREACH_THREAD(dx,x,D1D)
405  {
406  for (int qx = 0; qx < Q1D; ++qx)
407  {
408  const double Bx = B[qx][dx];
409  const double Gx = G[qx][dx];
410  const double BB = Bx * Bx;
411  const double BG = Bx * Gx;
412  const double GG = Gx * Gx;
413  Y(dx,dy,e) += GG * QD0[qx][dy];
414  Y(dx,dy,e) += BG * QD1[qx][dy];
415  Y(dx,dy,e) += BG * QD1[qx][dy];
416  Y(dx,dy,e) += BB * QD2[qx][dy];
417  }
418  }
419  }
420  });
421 }
422 
423 template<int T_D1D = 0, int T_Q1D = 0>
424 static void PADiffusionDiagonal3D(const int NE,
425  const Array<double> &b,
426  const Array<double> &g,
427  const Vector &d,
428  Vector &y,
429  const int d1d = 0,
430  const int q1d = 0)
431 {
432  constexpr int DIM = 3;
433  const int D1D = T_D1D ? T_D1D : d1d;
434  const int Q1D = T_Q1D ? T_Q1D : q1d;
435  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
436  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
437  MFEM_VERIFY(D1D <= MD1, "");
438  MFEM_VERIFY(Q1D <= MQ1, "");
439  auto B = Reshape(b.Read(), Q1D, D1D);
440  auto G = Reshape(g.Read(), Q1D, D1D);
441  auto Q = Reshape(d.Read(), Q1D*Q1D*Q1D, 6, NE);
442  auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
443  MFEM_FORALL(e, NE,
444  {
445  const int D1D = T_D1D ? T_D1D : d1d;
446  const int Q1D = T_Q1D ? T_Q1D : q1d;
447  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
448  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
449  double QQD[MQ1][MQ1][MD1];
450  double QDD[MQ1][MD1][MD1];
451  for (int i = 0; i < DIM; ++i)
452  {
453  for (int j = 0; j < DIM; ++j)
454  {
455  // first tensor contraction, along z direction
456  for (int qx = 0; qx < Q1D; ++qx)
457  {
458  for (int qy = 0; qy < Q1D; ++qy)
459  {
460  for (int dz = 0; dz < D1D; ++dz)
461  {
462  QQD[qx][qy][dz] = 0.0;
463  for (int qz = 0; qz < Q1D; ++qz)
464  {
465  const int q = qx + (qy + qz * Q1D) * Q1D;
466  const int k = j >= i ?
467  3 - (3-i)*(2-i)/2 + j:
468  3 - (3-j)*(2-j)/2 + i;
469  const double O = Q(q,k,e);
470  const double Bz = B(qz,dz);
471  const double Gz = G(qz,dz);
472  const double L = i==2 ? Gz : Bz;
473  const double R = j==2 ? Gz : Bz;
474  QQD[qx][qy][dz] += L * O * R;
475  }
476  }
477  }
478  }
479  // second tensor contraction, along y direction
480  for (int qx = 0; qx < Q1D; ++qx)
481  {
482  for (int dz = 0; dz < D1D; ++dz)
483  {
484  for (int dy = 0; dy < D1D; ++dy)
485  {
486  QDD[qx][dy][dz] = 0.0;
487  for (int qy = 0; qy < Q1D; ++qy)
488  {
489  const double By = B(qy,dy);
490  const double Gy = G(qy,dy);
491  const double L = i==1 ? Gy : By;
492  const double R = j==1 ? Gy : By;
493  QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
494  }
495  }
496  }
497  }
498  // third tensor contraction, along x direction
499  for (int dz = 0; dz < D1D; ++dz)
500  {
501  for (int dy = 0; dy < D1D; ++dy)
502  {
503  for (int dx = 0; dx < D1D; ++dx)
504  {
505  for (int qx = 0; qx < Q1D; ++qx)
506  {
507  const double Bx = B(qx,dx);
508  const double Gx = G(qx,dx);
509  const double L = i==0 ? Gx : Bx;
510  const double R = j==0 ? Gx : Bx;
511  Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
512  }
513  }
514  }
515  }
516  }
517  }
518  });
519 }
520 
521 // Shared memory PA Diffusion Diagonal 3D kernel
522 template<int T_D1D = 0, int T_Q1D = 0>
523 static void SmemPADiffusionDiagonal3D(const int NE,
524  const Array<double> &b_,
525  const Array<double> &g_,
526  const Vector &d_,
527  Vector &y_,
528  const int d1d = 0,
529  const int q1d = 0)
530 {
531  constexpr int DIM = 3;
532  const int D1D = T_D1D ? T_D1D : d1d;
533  const int Q1D = T_Q1D ? T_Q1D : q1d;
534  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
535  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
536  MFEM_VERIFY(D1D <= MD1, "");
537  MFEM_VERIFY(Q1D <= MQ1, "");
538  auto b = Reshape(b_.Read(), Q1D, D1D);
539  auto g = Reshape(g_.Read(), Q1D, D1D);
540  auto D = Reshape(d_.Read(), Q1D*Q1D*Q1D, 6, NE);
541  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
542  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
543  {
544  const int tidz = MFEM_THREAD_ID(z);
545  const int D1D = T_D1D ? T_D1D : d1d;
546  const int Q1D = T_Q1D ? T_Q1D : q1d;
547  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
548  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
549  MFEM_SHARED double BG[2][MQ1*MD1];
550  double (*B)[MD1] = (double (*)[MD1]) (BG+0);
551  double (*G)[MD1] = (double (*)[MD1]) (BG+1);
552  MFEM_SHARED double QQD[MQ1][MQ1][MD1];
553  MFEM_SHARED double QDD[MQ1][MD1][MD1];
554  if (tidz == 0)
555  {
556  MFEM_FOREACH_THREAD(d,y,D1D)
557  {
558  MFEM_FOREACH_THREAD(q,x,Q1D)
559  {
560  B[q][d] = b(q,d);
561  G[q][d] = g(q,d);
562  }
563  }
564  }
565  MFEM_SYNC_THREAD;
566  for (int i = 0; i < DIM; ++i)
567  {
568  for (int j = 0; j < DIM; ++j)
569  {
570  // first tensor contraction, along z direction
571  MFEM_FOREACH_THREAD(qx,x,Q1D)
572  {
573  MFEM_FOREACH_THREAD(qy,y,Q1D)
574  {
575  MFEM_FOREACH_THREAD(dz,z,D1D)
576  {
577  QQD[qx][qy][dz] = 0.0;
578  for (int qz = 0; qz < Q1D; ++qz)
579  {
580  const int q = qx + (qy + qz * Q1D) * Q1D;
581  const int k = j >= i ?
582  3 - (3-i)*(2-i)/2 + j:
583  3 - (3-j)*(2-j)/2 + i;
584  const double O = D(q,k,e);
585  const double Bz = B[qz][dz];
586  const double Gz = G[qz][dz];
587  const double L = i==2 ? Gz : Bz;
588  const double R = j==2 ? Gz : Bz;
589  QQD[qx][qy][dz] += L * O * R;
590  }
591  }
592  }
593  }
594  MFEM_SYNC_THREAD;
595  // second tensor contraction, along y direction
596  MFEM_FOREACH_THREAD(qx,x,Q1D)
597  {
598  MFEM_FOREACH_THREAD(dz,z,D1D)
599  {
600  MFEM_FOREACH_THREAD(dy,y,D1D)
601  {
602  QDD[qx][dy][dz] = 0.0;
603  for (int qy = 0; qy < Q1D; ++qy)
604  {
605  const double By = B[qy][dy];
606  const double Gy = G[qy][dy];
607  const double L = i==1 ? Gy : By;
608  const double R = j==1 ? Gy : By;
609  QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
610  }
611  }
612  }
613  }
614  MFEM_SYNC_THREAD;
615  // third tensor contraction, along x direction
616  MFEM_FOREACH_THREAD(dz,z,D1D)
617  {
618  MFEM_FOREACH_THREAD(dy,y,D1D)
619  {
620  MFEM_FOREACH_THREAD(dx,x,D1D)
621  {
622  for (int qx = 0; qx < Q1D; ++qx)
623  {
624  const double Bx = B[qx][dx];
625  const double Gx = G[qx][dx];
626  const double L = i==0 ? Gx : Bx;
627  const double R = j==0 ? Gx : Bx;
628  Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
629  }
630  }
631  }
632  }
633  }
634  }
635  });
636 }
637 
638 static void PADiffusionAssembleDiagonal(const int dim,
639  const int D1D,
640  const int Q1D,
641  const int NE,
642  const Array<double> &B,
643  const Array<double> &G,
644  const Vector &D,
645  Vector &Y)
646 {
647  if (dim == 2)
648  {
649  switch ((D1D << 4 ) | Q1D)
650  {
651  case 0x22: return SmemPADiffusionDiagonal2D<2,2,8>(NE,B,G,D,Y);
652  case 0x33: return SmemPADiffusionDiagonal2D<3,3,8>(NE,B,G,D,Y);
653  case 0x44: return SmemPADiffusionDiagonal2D<4,4,4>(NE,B,G,D,Y);
654  case 0x55: return SmemPADiffusionDiagonal2D<5,5,4>(NE,B,G,D,Y);
655  case 0x66: return SmemPADiffusionDiagonal2D<6,6,2>(NE,B,G,D,Y);
656  case 0x77: return SmemPADiffusionDiagonal2D<7,7,2>(NE,B,G,D,Y);
657  case 0x88: return SmemPADiffusionDiagonal2D<8,8,1>(NE,B,G,D,Y);
658  case 0x99: return SmemPADiffusionDiagonal2D<9,9,1>(NE,B,G,D,Y);
659  default: return PADiffusionDiagonal2D(NE,B,G,D,Y,D1D,Q1D);
660  }
661  }
662  else if (dim == 3)
663  {
664  switch ((D1D << 4 ) | Q1D)
665  {
666  case 0x23: return SmemPADiffusionDiagonal3D<2,3>(NE,B,G,D,Y);
667  case 0x34: return SmemPADiffusionDiagonal3D<3,4>(NE,B,G,D,Y);
668  case 0x45: return SmemPADiffusionDiagonal3D<4,5>(NE,B,G,D,Y);
669  case 0x56: return SmemPADiffusionDiagonal3D<5,6>(NE,B,G,D,Y);
670  case 0x67: return SmemPADiffusionDiagonal3D<6,7>(NE,B,G,D,Y);
671  case 0x78: return SmemPADiffusionDiagonal3D<7,8>(NE,B,G,D,Y);
672  case 0x89: return SmemPADiffusionDiagonal3D<8,9>(NE,B,G,D,Y);
673  case 0x9A: return SmemPADiffusionDiagonal3D<9,10>(NE,B,G,D,Y);
674  default: return PADiffusionDiagonal3D(NE,B,G,D,Y,D1D,Q1D);
675  }
676  }
677  MFEM_ABORT("Unknown kernel.");
678 }
679 
680 void DiffusionIntegrator::AssembleDiagonalPA(Vector &diag)
681 {
682  if (pa_data.Size()==0) { SetupPA(*fespace, true); }
683  PADiffusionAssembleDiagonal(dim, dofs1D, quad1D, ne,
684  maps->B, maps->G, pa_data, diag);
685 }
686 
687 
688 #ifdef MFEM_USE_OCCA
689 // OCCA PA Diffusion Apply 2D kernel
690 static void OccaPADiffusionApply2D(const int D1D,
691  const int Q1D,
692  const int NE,
693  const Array<double> &B,
694  const Array<double> &G,
695  const Array<double> &Bt,
696  const Array<double> &Gt,
697  const Vector &D,
698  const Vector &X,
699  Vector &Y)
700 {
701  occa::properties props;
702  props["defines/D1D"] = D1D;
703  props["defines/Q1D"] = Q1D;
704  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
705  const occa::memory o_G = OccaMemoryRead(G.GetMemory(), G.Size());
706  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
707  const occa::memory o_Gt = OccaMemoryRead(Gt.GetMemory(), Gt.Size());
708  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
709  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
710  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
711  const occa_id_t id = std::make_pair(D1D,Q1D);
712  if (!Device::Allows(Backend::OCCA_CUDA))
713  {
714  static occa_kernel_t OccaDiffApply2D_cpu;
715  if (OccaDiffApply2D_cpu.find(id) == OccaDiffApply2D_cpu.end())
716  {
717  const occa::kernel DiffusionApply2D_CPU =
718  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
719  "DiffusionApply2D_CPU", props);
720  OccaDiffApply2D_cpu.emplace(id, DiffusionApply2D_CPU);
721  }
722  OccaDiffApply2D_cpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
723  }
724  else
725  {
726  static occa_kernel_t OccaDiffApply2D_gpu;
727  if (OccaDiffApply2D_gpu.find(id) == OccaDiffApply2D_gpu.end())
728  {
729  const occa::kernel DiffusionApply2D_GPU =
730  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
731  "DiffusionApply2D_GPU", props);
732  OccaDiffApply2D_gpu.emplace(id, DiffusionApply2D_GPU);
733  }
734  OccaDiffApply2D_gpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
735  }
736 }
737 
738 // OCCA PA Diffusion Apply 3D kernel
739 static void OccaPADiffusionApply3D(const int D1D,
740  const int Q1D,
741  const int NE,
742  const Array<double> &B,
743  const Array<double> &G,
744  const Array<double> &Bt,
745  const Array<double> &Gt,
746  const Vector &D,
747  const Vector &X,
748  Vector &Y)
749 {
750  occa::properties props;
751  props["defines/D1D"] = D1D;
752  props["defines/Q1D"] = Q1D;
753  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
754  const occa::memory o_G = OccaMemoryRead(G.GetMemory(), G.Size());
755  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
756  const occa::memory o_Gt = OccaMemoryRead(Gt.GetMemory(), Gt.Size());
757  const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
758  const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
759  occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
760  const occa_id_t id = std::make_pair(D1D,Q1D);
761  if (!Device::Allows(Backend::OCCA_CUDA))
762  {
763  static occa_kernel_t OccaDiffApply3D_cpu;
764  if (OccaDiffApply3D_cpu.find(id) == OccaDiffApply3D_cpu.end())
765  {
766  const occa::kernel DiffusionApply3D_CPU =
767  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
768  "DiffusionApply3D_CPU", props);
769  OccaDiffApply3D_cpu.emplace(id, DiffusionApply3D_CPU);
770  }
771  OccaDiffApply3D_cpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
772  }
773  else
774  {
775  static occa_kernel_t OccaDiffApply3D_gpu;
776  if (OccaDiffApply3D_gpu.find(id) == OccaDiffApply3D_gpu.end())
777  {
778  const occa::kernel DiffusionApply3D_GPU =
779  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
780  "DiffusionApply3D_GPU", props);
781  OccaDiffApply3D_gpu.emplace(id, DiffusionApply3D_GPU);
782  }
783  OccaDiffApply3D_gpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
784  }
785 }
786 #endif // MFEM_USE_OCCA
787 
788 // PA Diffusion Apply 2D kernel
789 template<int T_D1D = 0, int T_Q1D = 0>
790 static void PADiffusionApply2D(const int NE,
791  const Array<double> &b_,
792  const Array<double> &g_,
793  const Array<double> &bt_,
794  const Array<double> &gt_,
795  const Vector &d_,
796  const Vector &x_,
797  Vector &y_,
798  const int d1d = 0,
799  const int q1d = 0)
800 {
801  const int D1D = T_D1D ? T_D1D : d1d;
802  const int Q1D = T_Q1D ? T_Q1D : q1d;
803  MFEM_VERIFY(D1D <= MAX_D1D, "");
804  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
805  auto B = Reshape(b_.Read(), Q1D, D1D);
806  auto G = Reshape(g_.Read(), Q1D, D1D);
807  auto Bt = Reshape(bt_.Read(), D1D, Q1D);
808  auto Gt = Reshape(gt_.Read(), D1D, Q1D);
809  auto D = Reshape(d_.Read(), Q1D*Q1D, 3, NE);
810  auto X = Reshape(x_.Read(), D1D, D1D, NE);
811  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
812  MFEM_FORALL(e, NE,
813  {
814  const int D1D = T_D1D ? T_D1D : d1d;
815  const int Q1D = T_Q1D ? T_Q1D : q1d;
816  // the following variables are evaluated at compile time
817  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
818  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
819 
820  double grad[max_Q1D][max_Q1D][2];
821  for (int qy = 0; qy < Q1D; ++qy)
822  {
823  for (int qx = 0; qx < Q1D; ++qx)
824  {
825  grad[qy][qx][0] = 0.0;
826  grad[qy][qx][1] = 0.0;
827  }
828  }
829  for (int dy = 0; dy < D1D; ++dy)
830  {
831  double gradX[max_Q1D][2];
832  for (int qx = 0; qx < Q1D; ++qx)
833  {
834  gradX[qx][0] = 0.0;
835  gradX[qx][1] = 0.0;
836  }
837  for (int dx = 0; dx < D1D; ++dx)
838  {
839  const double s = X(dx,dy,e);
840  for (int qx = 0; qx < Q1D; ++qx)
841  {
842  gradX[qx][0] += s * B(qx,dx);
843  gradX[qx][1] += s * G(qx,dx);
844  }
845  }
846  for (int qy = 0; qy < Q1D; ++qy)
847  {
848  const double wy = B(qy,dy);
849  const double wDy = G(qy,dy);
850  for (int qx = 0; qx < Q1D; ++qx)
851  {
852  grad[qy][qx][0] += gradX[qx][1] * wy;
853  grad[qy][qx][1] += gradX[qx][0] * wDy;
854  }
855  }
856  }
857  // Calculate Dxy, xDy in plane
858  for (int qy = 0; qy < Q1D; ++qy)
859  {
860  for (int qx = 0; qx < Q1D; ++qx)
861  {
862  const int q = qx + qy * Q1D;
863 
864  const double O11 = D(q,0,e);
865  const double O12 = D(q,1,e);
866  const double O22 = D(q,2,e);
867 
868  const double gradX = grad[qy][qx][0];
869  const double gradY = grad[qy][qx][1];
870 
871  grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
872  grad[qy][qx][1] = (O12 * gradX) + (O22 * gradY);
873  }
874  }
875  for (int qy = 0; qy < Q1D; ++qy)
876  {
877  double gradX[max_D1D][2];
878  for (int dx = 0; dx < D1D; ++dx)
879  {
880  gradX[dx][0] = 0;
881  gradX[dx][1] = 0;
882  }
883  for (int qx = 0; qx < Q1D; ++qx)
884  {
885  const double gX = grad[qy][qx][0];
886  const double gY = grad[qy][qx][1];
887  for (int dx = 0; dx < D1D; ++dx)
888  {
889  const double wx = Bt(dx,qx);
890  const double wDx = Gt(dx,qx);
891  gradX[dx][0] += gX * wDx;
892  gradX[dx][1] += gY * wx;
893  }
894  }
895  for (int dy = 0; dy < D1D; ++dy)
896  {
897  const double wy = Bt(dy,qy);
898  const double wDy = Gt(dy,qy);
899  for (int dx = 0; dx < D1D; ++dx)
900  {
901  Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
902  }
903  }
904  }
905  });
906 }
907 
908 // Shared memory PA Diffusion Apply 2D kernel
909 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0>
910 static void SmemPADiffusionApply2D(const int NE,
911  const Array<double> &b_,
912  const Array<double> &g_,
913  const Array<double> &bt_,
914  const Array<double> &gt_,
915  const Vector &d_,
916  const Vector &x_,
917  Vector &y_,
918  const int d1d = 0,
919  const int q1d = 0)
920 {
921  const int D1D = T_D1D ? T_D1D : d1d;
922  const int Q1D = T_Q1D ? T_Q1D : q1d;
923  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
924  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
925  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
926  MFEM_VERIFY(D1D <= MD1, "");
927  MFEM_VERIFY(Q1D <= MQ1, "");
928  auto b = Reshape(b_.Read(), Q1D, D1D);
929  auto g = Reshape(g_.Read(), Q1D, D1D);
930  auto D = Reshape(d_.Read(), Q1D*Q1D, 3, NE);
931  auto x = Reshape(x_.Read(), D1D, D1D, NE);
932  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
933  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
934  {
935  const int tidz = MFEM_THREAD_ID(z);
936  const int D1D = T_D1D ? T_D1D : d1d;
937  const int Q1D = T_Q1D ? T_Q1D : q1d;
938  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
939  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
940  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
941  MFEM_SHARED double sBG[2][MQ1*MD1];
942  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
943  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
944  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
945  double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
946  MFEM_SHARED double Xz[NBZ][MD1][MD1];
947  MFEM_SHARED double GD[2][NBZ][MD1][MQ1];
948  MFEM_SHARED double GQ[2][NBZ][MD1][MQ1];
949  double (*X)[MD1] = (double (*)[MD1])(Xz + tidz);
950  double (*DQ0)[MD1] = (double (*)[MD1])(GD[0] + tidz);
951  double (*DQ1)[MD1] = (double (*)[MD1])(GD[1] + tidz);
952  double (*QQ0)[MD1] = (double (*)[MD1])(GQ[0] + tidz);
953  double (*QQ1)[MD1] = (double (*)[MD1])(GQ[1] + tidz);
954  MFEM_FOREACH_THREAD(dy,y,D1D)
955  {
956  MFEM_FOREACH_THREAD(dx,x,D1D)
957  {
958  X[dy][dx] = x(dx,dy,e);
959  }
960  }
961  if (tidz == 0)
962  {
963  MFEM_FOREACH_THREAD(dy,y,D1D)
964  {
965  MFEM_FOREACH_THREAD(q,x,Q1D)
966  {
967  B[q][dy] = b(q,dy);
968  G[q][dy] = g(q,dy);
969  }
970  }
971  }
972  MFEM_SYNC_THREAD;
973  MFEM_FOREACH_THREAD(dy,y,D1D)
974  {
975  MFEM_FOREACH_THREAD(qx,x,Q1D)
976  {
977  double u = 0.0;
978  double v = 0.0;
979  for (int dx = 0; dx < D1D; ++dx)
980  {
981  const double coords = X[dy][dx];
982  u += B[qx][dx] * coords;
983  v += G[qx][dx] * coords;
984  }
985  DQ0[dy][qx] = u;
986  DQ1[dy][qx] = v;
987  }
988  }
989  MFEM_SYNC_THREAD;
990  MFEM_FOREACH_THREAD(qy,y,Q1D)
991  {
992  MFEM_FOREACH_THREAD(qx,x,Q1D)
993  {
994  double u = 0.0;
995  double v = 0.0;
996  for (int dy = 0; dy < D1D; ++dy)
997  {
998  u += DQ1[dy][qx] * B[qy][dy];
999  v += DQ0[dy][qx] * G[qy][dy];
1000  }
1001  QQ0[qy][qx] = u;
1002  QQ1[qy][qx] = v;
1003  }
1004  }
1005  MFEM_SYNC_THREAD;
1006  MFEM_FOREACH_THREAD(qy,y,Q1D)
1007  {
1008  MFEM_FOREACH_THREAD(qx,x,Q1D)
1009  {
1010  const int q = (qx + ((qy) * Q1D));
1011  const double O11 = D(q,0,e);
1012  const double O12 = D(q,1,e);
1013  const double O22 = D(q,2,e);
1014  const double gX = QQ0[qy][qx];
1015  const double gY = QQ1[qy][qx];
1016  QQ0[qy][qx] = (O11 * gX) + (O12 * gY);
1017  QQ1[qy][qx] = (O12 * gX) + (O22 * gY);
1018  }
1019  }
1020  MFEM_SYNC_THREAD;
1021  if (tidz == 0)
1022  {
1023  MFEM_FOREACH_THREAD(dy,y,D1D)
1024  {
1025  MFEM_FOREACH_THREAD(q,x,Q1D)
1026  {
1027  Bt[dy][q] = b(q,dy);
1028  Gt[dy][q] = g(q,dy);
1029  }
1030  }
1031  }
1032  MFEM_SYNC_THREAD;
1033  MFEM_FOREACH_THREAD(qy,y,Q1D)
1034  {
1035  MFEM_FOREACH_THREAD(dx,x,D1D)
1036  {
1037  double u = 0.0;
1038  double v = 0.0;
1039  for (int qx = 0; qx < Q1D; ++qx)
1040  {
1041  u += Gt[dx][qx] * QQ0[qy][qx];
1042  v += Bt[dx][qx] * QQ1[qy][qx];
1043  }
1044  DQ0[qy][dx] = u;
1045  DQ1[qy][dx] = v;
1046  }
1047  }
1048  MFEM_SYNC_THREAD;
1049  MFEM_FOREACH_THREAD(dy,y,D1D)
1050  {
1051  MFEM_FOREACH_THREAD(dx,x,D1D)
1052  {
1053  double u = 0.0;
1054  double v = 0.0;
1055  for (int qy = 0; qy < Q1D; ++qy)
1056  {
1057  u += DQ0[qy][dx] * Bt[dy][qy];
1058  v += DQ1[qy][dx] * Gt[dy][qy];
1059  }
1060  Y(dx,dy,e) += (u + v);
1061  }
1062  }
1063  });
1064 }
1065 
1066 // PA Diffusion Apply 3D kernel
1067 template<int T_D1D = 0, int T_Q1D = 0>
1068 static void PADiffusionApply3D(const int NE,
1069  const Array<double> &b,
1070  const Array<double> &g,
1071  const Array<double> &bt,
1072  const Array<double> &gt,
1073  const Vector &d_,
1074  const Vector &x_,
1075  Vector &y_,
1076  int d1d = 0, int q1d = 0)
1077 {
1078  const int D1D = T_D1D ? T_D1D : d1d;
1079  const int Q1D = T_Q1D ? T_Q1D : q1d;
1080  MFEM_VERIFY(D1D <= MAX_D1D, "");
1081  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
1082  auto B = Reshape(b.Read(), Q1D, D1D);
1083  auto G = Reshape(g.Read(), Q1D, D1D);
1084  auto Bt = Reshape(bt.Read(), D1D, Q1D);
1085  auto Gt = Reshape(gt.Read(), D1D, Q1D);
1086  auto D = Reshape(d_.Read(), Q1D*Q1D*Q1D, 6, NE);
1087  auto X = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1088  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1089  MFEM_FORALL(e, NE,
1090  {
1091  const int D1D = T_D1D ? T_D1D : d1d;
1092  const int Q1D = T_Q1D ? T_Q1D : q1d;
1093  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
1094  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
1095  double grad[max_Q1D][max_Q1D][max_Q1D][3];
1096  for (int qz = 0; qz < Q1D; ++qz)
1097  {
1098  for (int qy = 0; qy < Q1D; ++qy)
1099  {
1100  for (int qx = 0; qx < Q1D; ++qx)
1101  {
1102  grad[qz][qy][qx][0] = 0.0;
1103  grad[qz][qy][qx][1] = 0.0;
1104  grad[qz][qy][qx][2] = 0.0;
1105  }
1106  }
1107  }
1108  for (int dz = 0; dz < D1D; ++dz)
1109  {
1110  double gradXY[max_Q1D][max_Q1D][3];
1111  for (int qy = 0; qy < Q1D; ++qy)
1112  {
1113  for (int qx = 0; qx < Q1D; ++qx)
1114  {
1115  gradXY[qy][qx][0] = 0.0;
1116  gradXY[qy][qx][1] = 0.0;
1117  gradXY[qy][qx][2] = 0.0;
1118  }
1119  }
1120  for (int dy = 0; dy < D1D; ++dy)
1121  {
1122  double gradX[max_Q1D][2];
1123  for (int qx = 0; qx < Q1D; ++qx)
1124  {
1125  gradX[qx][0] = 0.0;
1126  gradX[qx][1] = 0.0;
1127  }
1128  for (int dx = 0; dx < D1D; ++dx)
1129  {
1130  const double s = X(dx,dy,dz,e);
1131  for (int qx = 0; qx < Q1D; ++qx)
1132  {
1133  gradX[qx][0] += s * B(qx,dx);
1134  gradX[qx][1] += s * G(qx,dx);
1135  }
1136  }
1137  for (int qy = 0; qy < Q1D; ++qy)
1138  {
1139  const double wy = B(qy,dy);
1140  const double wDy = G(qy,dy);
1141  for (int qx = 0; qx < Q1D; ++qx)
1142  {
1143  const double wx = gradX[qx][0];
1144  const double wDx = gradX[qx][1];
1145  gradXY[qy][qx][0] += wDx * wy;
1146  gradXY[qy][qx][1] += wx * wDy;
1147  gradXY[qy][qx][2] += wx * wy;
1148  }
1149  }
1150  }
1151  for (int qz = 0; qz < Q1D; ++qz)
1152  {
1153  const double wz = B(qz,dz);
1154  const double wDz = G(qz,dz);
1155  for (int qy = 0; qy < Q1D; ++qy)
1156  {
1157  for (int qx = 0; qx < Q1D; ++qx)
1158  {
1159  grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
1160  grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
1161  grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
1162  }
1163  }
1164  }
1165  }
1166  // Calculate Dxyz, xDyz, xyDz in plane
1167  for (int qz = 0; qz < Q1D; ++qz)
1168  {
1169  for (int qy = 0; qy < Q1D; ++qy)
1170  {
1171  for (int qx = 0; qx < Q1D; ++qx)
1172  {
1173  const int q = qx + (qy + qz * Q1D) * Q1D;
1174  const double O11 = D(q,0,e);
1175  const double O12 = D(q,1,e);
1176  const double O13 = D(q,2,e);
1177  const double O22 = D(q,3,e);
1178  const double O23 = D(q,4,e);
1179  const double O33 = D(q,5,e);
1180  const double gradX = grad[qz][qy][qx][0];
1181  const double gradY = grad[qz][qy][qx][1];
1182  const double gradZ = grad[qz][qy][qx][2];
1183  grad[qz][qy][qx][0] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
1184  grad[qz][qy][qx][1] = (O12*gradX)+(O22*gradY)+(O23*gradZ);
1185  grad[qz][qy][qx][2] = (O13*gradX)+(O23*gradY)+(O33*gradZ);
1186  }
1187  }
1188  }
1189  for (int qz = 0; qz < Q1D; ++qz)
1190  {
1191  double gradXY[max_D1D][max_D1D][3];
1192  for (int dy = 0; dy < D1D; ++dy)
1193  {
1194  for (int dx = 0; dx < D1D; ++dx)
1195  {
1196  gradXY[dy][dx][0] = 0;
1197  gradXY[dy][dx][1] = 0;
1198  gradXY[dy][dx][2] = 0;
1199  }
1200  }
1201  for (int qy = 0; qy < Q1D; ++qy)
1202  {
1203  double gradX[max_D1D][3];
1204  for (int dx = 0; dx < D1D; ++dx)
1205  {
1206  gradX[dx][0] = 0;
1207  gradX[dx][1] = 0;
1208  gradX[dx][2] = 0;
1209  }
1210  for (int qx = 0; qx < Q1D; ++qx)
1211  {
1212  const double gX = grad[qz][qy][qx][0];
1213  const double gY = grad[qz][qy][qx][1];
1214  const double gZ = grad[qz][qy][qx][2];
1215  for (int dx = 0; dx < D1D; ++dx)
1216  {
1217  const double wx = Bt(dx,qx);
1218  const double wDx = Gt(dx,qx);
1219  gradX[dx][0] += gX * wDx;
1220  gradX[dx][1] += gY * wx;
1221  gradX[dx][2] += gZ * wx;
1222  }
1223  }
1224  for (int dy = 0; dy < D1D; ++dy)
1225  {
1226  const double wy = Bt(dy,qy);
1227  const double wDy = Gt(dy,qy);
1228  for (int dx = 0; dx < D1D; ++dx)
1229  {
1230  gradXY[dy][dx][0] += gradX[dx][0] * wy;
1231  gradXY[dy][dx][1] += gradX[dx][1] * wDy;
1232  gradXY[dy][dx][2] += gradX[dx][2] * wy;
1233  }
1234  }
1235  }
1236  for (int dz = 0; dz < D1D; ++dz)
1237  {
1238  const double wz = Bt(dz,qz);
1239  const double wDz = Gt(dz,qz);
1240  for (int dy = 0; dy < D1D; ++dy)
1241  {
1242  for (int dx = 0; dx < D1D; ++dx)
1243  {
1244  Y(dx,dy,dz,e) +=
1245  ((gradXY[dy][dx][0] * wz) +
1246  (gradXY[dy][dx][1] * wz) +
1247  (gradXY[dy][dx][2] * wDz));
1248  }
1249  }
1250  }
1251  }
1252  });
1253 }
1254 
1255 // Shared memory PA Diffusion Apply 3D kernel
1256 template<int T_D1D = 0, int T_Q1D = 0>
1257 static void SmemPADiffusionApply3D(const int NE,
1258  const Array<double> &b_,
1259  const Array<double> &g_,
1260  const Array<double> &bt_,
1261  const Array<double> &gt_,
1262  const Vector &d_,
1263  const Vector &x_,
1264  Vector &y_,
1265  const int d1d = 0,
1266  const int q1d = 0)
1267 {
1268  const int D1D = T_D1D ? T_D1D : d1d;
1269  const int Q1D = T_Q1D ? T_Q1D : q1d;
1270  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
1271  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
1272  MFEM_VERIFY(D1D <= MD1, "");
1273  MFEM_VERIFY(Q1D <= MQ1, "");
1274  auto b = Reshape(b_.Read(), Q1D, D1D);
1275  auto g = Reshape(g_.Read(), Q1D, D1D);
1276  auto d = Reshape(d_.Read(), Q1D*Q1D*Q1D, 6, NE);
1277  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1278  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1279  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1280  {
1281  const int tidz = MFEM_THREAD_ID(z);
1282  const int D1D = T_D1D ? T_D1D : d1d;
1283  const int Q1D = T_Q1D ? T_Q1D : q1d;
1284  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
1285  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
1286  constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
1287  MFEM_SHARED double sBG[2][MQ1*MD1];
1288  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
1289  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
1290  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
1291  double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
1292  MFEM_SHARED double sm0[3][MDQ*MDQ*MDQ];
1293  MFEM_SHARED double sm1[3][MDQ*MDQ*MDQ];
1294  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1295  double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
1296  double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
1297  double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
1298  double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
1299  double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
1300  double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
1301  double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
1302  double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
1303  double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
1304  double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
1305  double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
1306  double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
1307  double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
1308  double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1309  MFEM_FOREACH_THREAD(dz,z,D1D)
1310  {
1311  MFEM_FOREACH_THREAD(dy,y,D1D)
1312  {
1313  MFEM_FOREACH_THREAD(dx,x,D1D)
1314  {
1315  X[dz][dy][dx] = x(dx,dy,dz,e);
1316  }
1317  }
1318  }
1319  if (tidz == 0)
1320  {
1321  MFEM_FOREACH_THREAD(d,y,D1D)
1322  {
1323  MFEM_FOREACH_THREAD(q,x,Q1D)
1324  {
1325  B[q][d] = b(q,d);
1326  G[q][d] = g(q,d);
1327  }
1328  }
1329  }
1330  MFEM_SYNC_THREAD;
1331  MFEM_FOREACH_THREAD(dz,z,D1D)
1332  {
1333  MFEM_FOREACH_THREAD(dy,y,D1D)
1334  {
1335  MFEM_FOREACH_THREAD(qx,x,Q1D)
1336  {
1337  double u = 0.0;
1338  double v = 0.0;
1339  for (int dx = 0; dx < D1D; ++dx)
1340  {
1341  const double coords = X[dz][dy][dx];
1342  u += coords * B[qx][dx];
1343  v += coords * G[qx][dx];
1344  }
1345  DDQ0[dz][dy][qx] = u;
1346  DDQ1[dz][dy][qx] = v;
1347  }
1348  }
1349  }
1350  MFEM_SYNC_THREAD;
1351  MFEM_FOREACH_THREAD(dz,z,D1D)
1352  {
1353  MFEM_FOREACH_THREAD(qy,y,Q1D)
1354  {
1355  MFEM_FOREACH_THREAD(qx,x,Q1D)
1356  {
1357  double u = 0.0;
1358  double v = 0.0;
1359  double w = 0.0;
1360  for (int dy = 0; dy < D1D; ++dy)
1361  {
1362  u += DDQ1[dz][dy][qx] * B[qy][dy];
1363  v += DDQ0[dz][dy][qx] * G[qy][dy];
1364  w += DDQ0[dz][dy][qx] * B[qy][dy];
1365  }
1366  DQQ0[dz][qy][qx] = u;
1367  DQQ1[dz][qy][qx] = v;
1368  DQQ2[dz][qy][qx] = w;
1369  }
1370  }
1371  }
1372  MFEM_SYNC_THREAD;
1373  MFEM_FOREACH_THREAD(qz,z,Q1D)
1374  {
1375  MFEM_FOREACH_THREAD(qy,y,Q1D)
1376  {
1377  MFEM_FOREACH_THREAD(qx,x,Q1D)
1378  {
1379  double u = 0.0;
1380  double v = 0.0;
1381  double w = 0.0;
1382  for (int dz = 0; dz < D1D; ++dz)
1383  {
1384  u += DQQ0[dz][qy][qx] * B[qz][dz];
1385  v += DQQ1[dz][qy][qx] * B[qz][dz];
1386  w += DQQ2[dz][qy][qx] * G[qz][dz];
1387  }
1388  QQQ0[qz][qy][qx] = u;
1389  QQQ1[qz][qy][qx] = v;
1390  QQQ2[qz][qy][qx] = w;
1391  }
1392  }
1393  }
1394  MFEM_SYNC_THREAD;
1395  MFEM_FOREACH_THREAD(qz,z,Q1D)
1396  {
1397  MFEM_FOREACH_THREAD(qy,y,Q1D)
1398  {
1399  MFEM_FOREACH_THREAD(qx,x,Q1D)
1400  {
1401  const int q = qx + ((qy*Q1D) + (qz*Q1D*Q1D));
1402  const double O11 = d(q,0,e);
1403  const double O12 = d(q,1,e);
1404  const double O13 = d(q,2,e);
1405  const double O22 = d(q,3,e);
1406  const double O23 = d(q,4,e);
1407  const double O33 = d(q,5,e);
1408  const double gX = QQQ0[qz][qy][qx];
1409  const double gY = QQQ1[qz][qy][qx];
1410  const double gZ = QQQ2[qz][qy][qx];
1411  QQQ0[qz][qy][qx] = (O11*gX) + (O12*gY) + (O13*gZ);
1412  QQQ1[qz][qy][qx] = (O12*gX) + (O22*gY) + (O23*gZ);
1413  QQQ2[qz][qy][qx] = (O13*gX) + (O23*gY) + (O33*gZ);
1414  }
1415  }
1416  }
1417  MFEM_SYNC_THREAD;
1418  if (tidz == 0)
1419  {
1420  MFEM_FOREACH_THREAD(d,y,D1D)
1421  {
1422  MFEM_FOREACH_THREAD(q,x,Q1D)
1423  {
1424  Bt[d][q] = b(q,d);
1425  Gt[d][q] = g(q,d);
1426  }
1427  }
1428  }
1429  MFEM_SYNC_THREAD;
1430  MFEM_FOREACH_THREAD(qz,z,Q1D)
1431  {
1432  MFEM_FOREACH_THREAD(qy,y,Q1D)
1433  {
1434  MFEM_FOREACH_THREAD(dx,x,D1D)
1435  {
1436  double u = 0.0;
1437  double v = 0.0;
1438  double w = 0.0;
1439  for (int qx = 0; qx < Q1D; ++qx)
1440  {
1441  u += QQQ0[qz][qy][qx] * Gt[dx][qx];
1442  v += QQQ1[qz][qy][qx] * Bt[dx][qx];
1443  w += QQQ2[qz][qy][qx] * Bt[dx][qx];
1444  }
1445  QQD0[qz][qy][dx] = u;
1446  QQD1[qz][qy][dx] = v;
1447  QQD2[qz][qy][dx] = w;
1448  }
1449  }
1450  }
1451  MFEM_SYNC_THREAD;
1452  MFEM_FOREACH_THREAD(qz,z,Q1D)
1453  {
1454  MFEM_FOREACH_THREAD(dy,y,D1D)
1455  {
1456  MFEM_FOREACH_THREAD(dx,x,D1D)
1457  {
1458  double u = 0.0;
1459  double v = 0.0;
1460  double w = 0.0;
1461  for (int qy = 0; qy < Q1D; ++qy)
1462  {
1463  u += QQD0[qz][qy][dx] * Bt[dy][qy];
1464  v += QQD1[qz][qy][dx] * Gt[dy][qy];
1465  w += QQD2[qz][qy][dx] * Bt[dy][qy];
1466  }
1467  QDD0[qz][dy][dx] = u;
1468  QDD1[qz][dy][dx] = v;
1469  QDD2[qz][dy][dx] = w;
1470  }
1471  }
1472  }
1473  MFEM_SYNC_THREAD;
1474  MFEM_FOREACH_THREAD(dz,z,D1D)
1475  {
1476  MFEM_FOREACH_THREAD(dy,y,D1D)
1477  {
1478  MFEM_FOREACH_THREAD(dx,x,D1D)
1479  {
1480  double u = 0.0;
1481  double v = 0.0;
1482  double w = 0.0;
1483  for (int qz = 0; qz < Q1D; ++qz)
1484  {
1485  u += QDD0[qz][dy][dx] * Bt[dz][qz];
1486  v += QDD1[qz][dy][dx] * Bt[dz][qz];
1487  w += QDD2[qz][dy][dx] * Gt[dz][qz];
1488  }
1489  y(dx,dy,dz,e) += (u + v + w);
1490  }
1491  }
1492  }
1493  });
1494 }
1495 
1496 static void PADiffusionApply(const int dim,
1497  const int D1D,
1498  const int Q1D,
1499  const int NE,
1500  const Array<double> &B,
1501  const Array<double> &G,
1502  const Array<double> &Bt,
1503  const Array<double> &Gt,
1504  const Vector &D,
1505  const Vector &X,
1506  Vector &Y)
1507 {
1508 #ifdef MFEM_USE_OCCA
1509  if (DeviceCanUseOcca())
1510  {
1511  if (dim == 2)
1512  {
1513  OccaPADiffusionApply2D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1514  return;
1515  }
1516  if (dim == 3)
1517  {
1518  OccaPADiffusionApply3D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1519  return;
1520  }
1521  MFEM_ABORT("OCCA PADiffusionApply unknown kernel!");
1522  }
1523 #endif // MFEM_USE_OCCA
1524  if (dim == 2)
1525  {
1526  switch ((D1D << 4 ) | Q1D)
1527  {
1528  case 0x22: return SmemPADiffusionApply2D<2,2,16>(NE,B,G,Bt,Gt,D,X,Y);
1529  case 0x33: return SmemPADiffusionApply2D<3,3,16>(NE,B,G,Bt,Gt,D,X,Y);
1530  case 0x44: return SmemPADiffusionApply2D<4,4,8>(NE,B,G,Bt,Gt,D,X,Y);
1531  case 0x55: return SmemPADiffusionApply2D<5,5,8>(NE,B,G,Bt,Gt,D,X,Y);
1532  case 0x66: return SmemPADiffusionApply2D<6,6,4>(NE,B,G,Bt,Gt,D,X,Y);
1533  case 0x77: return SmemPADiffusionApply2D<7,7,4>(NE,B,G,Bt,Gt,D,X,Y);
1534  case 0x88: return SmemPADiffusionApply2D<8,8,2>(NE,B,G,Bt,Gt,D,X,Y);
1535  case 0x99: return SmemPADiffusionApply2D<9,9,2>(NE,B,G,Bt,Gt,D,X,Y);
1536  default: return PADiffusionApply2D(NE,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1537  }
1538  }
1539  else if (dim == 3)
1540  {
1541  switch ((D1D << 4 ) | Q1D)
1542  {
1543  case 0x23: return SmemPADiffusionApply3D<2,3>(NE,B,G,Bt,Gt,D,X,Y);
1544  case 0x34: return SmemPADiffusionApply3D<3,4>(NE,B,G,Bt,Gt,D,X,Y);
1545  case 0x45: return SmemPADiffusionApply3D<4,5>(NE,B,G,Bt,Gt,D,X,Y);
1546  case 0x46: return SmemPADiffusionApply3D<4,6>(NE,B,G,Bt,Gt,D,X,Y);
1547  case 0x56: return SmemPADiffusionApply3D<5,6>(NE,B,G,Bt,Gt,D,X,Y);
1548  case 0x58: return SmemPADiffusionApply3D<5,8>(NE,B,G,Bt,Gt,D,X,Y);
1549  case 0x67: return SmemPADiffusionApply3D<6,7>(NE,B,G,Bt,Gt,D,X,Y);
1550  case 0x78: return SmemPADiffusionApply3D<7,8>(NE,B,G,Bt,Gt,D,X,Y);
1551  case 0x89: return SmemPADiffusionApply3D<8,9>(NE,B,G,Bt,Gt,D,X,Y);
1552  default: return PADiffusionApply3D(NE,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1553  }
1554  }
1555  MFEM_ABORT("Unknown kernel.");
1556 }
1557 
1558 // PA Diffusion Apply kernel
1559 void DiffusionIntegrator::AddMultPA(const Vector &x, Vector &y) const
1560 {
1561 #ifdef MFEM_USE_CEED
1562  if (DeviceCanUseCeed())
1563  {
1564  const CeedScalar *x_ptr;
1565  CeedScalar *y_ptr;
1566  CeedMemType mem;
1567  CeedGetPreferredMemType(internal::ceed, &mem);
1568  if ( Device::Allows(Backend::CUDA) && mem==CEED_MEM_DEVICE )
1569  {
1570  x_ptr = x.Read();
1571  y_ptr = y.ReadWrite();
1572  }
1573  else
1574  {
1575  x_ptr = x.HostRead();
1576  y_ptr = y.HostReadWrite();
1577  mem = CEED_MEM_HOST;
1578  }
1579  CeedVectorSetArray(ceedDataPtr->u, mem, CEED_USE_POINTER,
1580  const_cast<CeedScalar*>(x_ptr));
1581  CeedVectorSetArray(ceedDataPtr->v, mem, CEED_USE_POINTER, y_ptr);
1582 
1583  CeedOperatorApplyAdd(ceedDataPtr->oper, ceedDataPtr->u, ceedDataPtr->v,
1584  CEED_REQUEST_IMMEDIATE);
1585 
1586  CeedVectorSyncArray(ceedDataPtr->v, mem);
1587  }
1588  else
1589 #endif
1590  {
1591  PADiffusionApply(dim, dofs1D, quad1D, ne,
1592  maps->B, maps->G, maps->Bt, maps->Gt,
1593  pa_data, x, y);
1594  }
1595 }
1596 
1597 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:245
Abstract class for Finite Elements.
Definition: fe.hpp:232
int Size() const
Logical size of the array.
Definition: array.hpp:124
double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:353
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe.hpp:308
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:103
Subclass constant coefficient.
Definition: coefficient.hpp:67
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:407
const Geometry::Type geom
Definition: ex1.cpp:40
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
int Size() const
Returns the size of the vector.
Definition: vector.hpp:157
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:162
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:693
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:85
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:180
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:134
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:408
const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:337
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Definition: intrules.hpp:248
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:763
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:349
const int MAX_Q1D
Definition: forall.hpp:36
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:295
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:273
int Dimension() const
Definition: mesh.hpp:744
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:441
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:48
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Definition: fe.cpp:370
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:333
int dim
Definition: ex24.cpp:43
const int MAX_D1D
Definition: forall.hpp:35
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement associated with i&#39;th element.
Definition: fespace.cpp:1642
double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:345
Vector data type.
Definition: vector.hpp:48
std::pair< int, int > occa_id_t
Definition: occa.hpp:78