MFEM  v4.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_diffusion.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-443211. All Rights
3 // reserved. See file COPYRIGHT for details.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability see http://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the GNU Lesser General Public License (as published by the Free
10 // Software Foundation) version 2.1 dated February 1999.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
21 // PA Diffusion Integrator
22 
23 // OCCA 2D Assemble kernel
24 #ifdef MFEM_USE_OCCA
25 static void OccaPADiffusionSetup2D(const int D1D,
26  const int Q1D,
27  const int NE,
28  const Array<double> &W,
29  const Vector &J,
30  const double COEFF,
31  Vector &op)
32 {
33  occa::properties props;
34  props["defines/D1D"] = D1D;
35  props["defines/Q1D"] = Q1D;
36  const occa::memory o_W = OccaMemoryRead(W.GetMemory(), W.Size());
37  const occa::memory o_J = OccaMemoryRead(J.GetMemory(), J.Size());
38  occa::memory o_op = OccaMemoryWrite(op.GetMemory(), op.Size());
39  const occa_id_t id = std::make_pair(D1D,Q1D);
40  static occa_kernel_t OccaDiffSetup2D_ker;
41  if (OccaDiffSetup2D_ker.find(id) == OccaDiffSetup2D_ker.end())
42  {
43  const occa::kernel DiffusionSetup2D =
44  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
45  "DiffusionSetup2D", props);
46  OccaDiffSetup2D_ker.emplace(id, DiffusionSetup2D);
47  }
48  OccaDiffSetup2D_ker.at(id)(NE, o_W, o_J, COEFF, o_op);
49 }
50 
51 static void OccaPADiffusionSetup3D(const int D1D,
52  const int Q1D,
53  const int NE,
54  const Array<double> &W,
55  const Vector &J,
56  const double COEFF,
57  Vector &op)
58 {
59  occa::properties props;
60  props["defines/D1D"] = D1D;
61  props["defines/Q1D"] = Q1D;
62  const occa::memory o_W = OccaMemoryRead(W.GetMemory(), W.Size());
63  const occa::memory o_J = OccaMemoryRead(J.GetMemory(), J.Size());
64  occa::memory o_op = OccaMemoryWrite(op.GetMemory(), op.Size());
65  const occa_id_t id = std::make_pair(D1D,Q1D);
66  static occa_kernel_t OccaDiffSetup3D_ker;
67  if (OccaDiffSetup3D_ker.find(id) == OccaDiffSetup3D_ker.end())
68  {
69  const occa::kernel DiffusionSetup3D =
70  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
71  "DiffusionSetup3D", props);
72  OccaDiffSetup3D_ker.emplace(id, DiffusionSetup3D);
73  }
74  OccaDiffSetup3D_ker.at(id)(NE, o_W, o_J, COEFF, o_op);
75 }
76 #endif // MFEM_USE_OCCA
77 
78 // PA Diffusion Assemble 2D kernel
79 static void PADiffusionSetup2D(const int Q1D,
80  const int NE,
81  const Array<double> &w,
82  const Vector &j,
83  const double COEFF,
84  Vector &op)
85 {
86  const int NQ = Q1D*Q1D;
87  auto W = w.Read();
88 
89  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
90  auto y = Reshape(op.Write(), NQ, 3, NE);
91 
92  MFEM_FORALL(e, NE,
93  {
94  for (int q = 0; q < NQ; ++q)
95  {
96  const double J11 = J(q,0,0,e);
97  const double J21 = J(q,1,0,e);
98  const double J12 = J(q,0,1,e);
99  const double J22 = J(q,1,1,e);
100  const double c_detJ = W[q] * COEFF / ((J11*J22)-(J21*J12));
101  y(q,0,e) = c_detJ * (J12*J12 + J22*J22); // 1,1
102  y(q,1,e) = -c_detJ * (J12*J11 + J22*J21); // 1,2
103  y(q,2,e) = c_detJ * (J11*J11 + J21*J21); // 2,2
104  }
105  });
106 }
107 
108 // PA Diffusion Assemble 3D kernel
109 static void PADiffusionSetup3D(const int Q1D,
110  const int NE,
111  const Array<double> &w,
112  const Vector &j,
113  const double COEFF,
114  Vector &op)
115 {
116  const int NQ = Q1D*Q1D*Q1D;
117  auto W = w.Read();
118  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
119  auto y = Reshape(op.Write(), NQ, 6, NE);
120  MFEM_FORALL(e, NE,
121  {
122  for (int q = 0; q < NQ; ++q)
123  {
124  const double J11 = J(q,0,0,e);
125  const double J21 = J(q,1,0,e);
126  const double J31 = J(q,2,0,e);
127  const double J12 = J(q,0,1,e);
128  const double J22 = J(q,1,1,e);
129  const double J32 = J(q,2,1,e);
130  const double J13 = J(q,0,2,e);
131  const double J23 = J(q,1,2,e);
132  const double J33 = J(q,2,2,e);
133  const double detJ = J11 * (J22 * J33 - J32 * J23) -
134  /* */ J21 * (J12 * J33 - J32 * J13) +
135  /* */ J31 * (J12 * J23 - J22 * J13);
136  const double c_detJ = W[q] * COEFF / detJ;
137  // adj(J)
138  const double A11 = (J22 * J33) - (J23 * J32);
139  const double A12 = (J32 * J13) - (J12 * J33);
140  const double A13 = (J12 * J23) - (J22 * J13);
141  const double A21 = (J31 * J23) - (J21 * J33);
142  const double A22 = (J11 * J33) - (J13 * J31);
143  const double A23 = (J21 * J13) - (J11 * J23);
144  const double A31 = (J21 * J32) - (J31 * J22);
145  const double A32 = (J31 * J12) - (J11 * J32);
146  const double A33 = (J11 * J22) - (J12 * J21);
147  // detJ J^{-1} J^{-T} = (1/detJ) adj(J) adj(J)^T
148  y(q,0,e) = c_detJ * (A11*A11 + A12*A12 + A13*A13); // 1,1
149  y(q,1,e) = c_detJ * (A11*A21 + A12*A22 + A13*A23); // 2,1
150  y(q,2,e) = c_detJ * (A11*A31 + A12*A32 + A13*A33); // 3,1
151  y(q,3,e) = c_detJ * (A21*A21 + A22*A22 + A23*A23); // 2,2
152  y(q,4,e) = c_detJ * (A21*A31 + A22*A32 + A23*A33); // 3,2
153  y(q,5,e) = c_detJ * (A31*A31 + A32*A32 + A33*A33); // 3,3
154  }
155  });
156 }
157 
158 static void PADiffusionSetup(const int dim,
159  const int D1D,
160  const int Q1D,
161  const int NE,
162  const Array<double> &W,
163  const Vector &J,
164  const double COEFF,
165  Vector &op)
166 {
167  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADiffusionSetup"); }
168  if (dim == 2)
169  {
170 #ifdef MFEM_USE_OCCA
171  if (DeviceCanUseOcca())
172  {
173  OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, COEFF, op);
174  return;
175  }
176 #endif // MFEM_USE_OCCA
177  PADiffusionSetup2D(Q1D, NE, W, J, COEFF, op);
178  }
179  if (dim == 3)
180  {
181 #ifdef MFEM_USE_OCCA
182  if (DeviceCanUseOcca())
183  {
184  OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, COEFF, op);
185  return;
186  }
187 #endif // MFEM_USE_OCCA
188  PADiffusionSetup3D(Q1D, NE, W, J, COEFF, op);
189  }
190 }
191 
192 void DiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes)
193 {
194  // Assumes tensor-product elements
195  Mesh *mesh = fes.GetMesh();
196  const FiniteElement &el = *fes.GetFE(0);
197  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el);
198  const int dims = el.GetDim();
199  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
200  const int nq = ir->GetNPoints();
201  dim = mesh->Dimension();
202  ne = fes.GetNE();
203  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
204  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
205  dofs1D = maps->ndof;
206  quad1D = maps->nqpt;
207  pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType());
208  ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient*>(Q);
209  MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
210  const double coeff = cQ->constant;
211  PADiffusionSetup(dim, dofs1D, quad1D, ne, ir->GetWeights(), geom->J,
212  coeff, pa_data);
213 }
214 
215 #ifdef MFEM_USE_OCCA
216 // OCCA PA Diffusion Apply 2D kernel
217 static void OccaPADiffusionApply2D(const int D1D,
218  const int Q1D,
219  const int NE,
220  const Array<double> &B,
221  const Array<double> &G,
222  const Array<double> &Bt,
223  const Array<double> &Gt,
224  const Vector &op,
225  const Vector &x,
226  Vector &y)
227 {
228  occa::properties props;
229  props["defines/D1D"] = D1D;
230  props["defines/Q1D"] = Q1D;
231  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
232  const occa::memory o_G = OccaMemoryRead(G.GetMemory(), G.Size());
233  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
234  const occa::memory o_Gt = OccaMemoryRead(Gt.GetMemory(), Gt.Size());
235  const occa::memory o_op = OccaMemoryRead(op.GetMemory(), op.Size());
236  const occa::memory o_x = OccaMemoryRead(x.GetMemory(), x.Size());
237  occa::memory o_y = OccaMemoryReadWrite(y.GetMemory(), y.Size());
238  const occa_id_t id = std::make_pair(D1D,Q1D);
239  if (!Device::Allows(Backend::OCCA_CUDA))
240  {
241  static occa_kernel_t OccaDiffApply2D_cpu;
242  if (OccaDiffApply2D_cpu.find(id) == OccaDiffApply2D_cpu.end())
243  {
244  const occa::kernel DiffusionApply2D_CPU =
245  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
246  "DiffusionApply2D_CPU", props);
247  OccaDiffApply2D_cpu.emplace(id, DiffusionApply2D_CPU);
248  }
249  OccaDiffApply2D_cpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_op, o_x, o_y);
250  }
251  else
252  {
253  static occa_kernel_t OccaDiffApply2D_gpu;
254  if (OccaDiffApply2D_gpu.find(id) == OccaDiffApply2D_gpu.end())
255  {
256  const occa::kernel DiffusionApply2D_GPU =
257  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
258  "DiffusionApply2D_GPU", props);
259  OccaDiffApply2D_gpu.emplace(id, DiffusionApply2D_GPU);
260  }
261  OccaDiffApply2D_gpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_op, o_x, o_y);
262  }
263 }
264 
265 // OCCA PA Diffusion Apply 3D kernel
266 static void OccaPADiffusionApply3D(const int D1D,
267  const int Q1D,
268  const int NE,
269  const Array<double> &B,
270  const Array<double> &G,
271  const Array<double> &Bt,
272  const Array<double> &Gt,
273  const Vector &op,
274  const Vector &x,
275  Vector &y)
276 {
277  occa::properties props;
278  props["defines/D1D"] = D1D;
279  props["defines/Q1D"] = Q1D;
280  const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
281  const occa::memory o_G = OccaMemoryRead(G.GetMemory(), G.Size());
282  const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
283  const occa::memory o_Gt = OccaMemoryRead(Gt.GetMemory(), Gt.Size());
284  const occa::memory o_op = OccaMemoryRead(op.GetMemory(), op.Size());
285  const occa::memory o_x = OccaMemoryRead(x.GetMemory(), x.Size());
286  occa::memory o_y = OccaMemoryReadWrite(y.GetMemory(), y.Size());
287  const occa_id_t id = std::make_pair(D1D,Q1D);
288  if (!Device::Allows(Backend::OCCA_CUDA))
289  {
290  static occa_kernel_t OccaDiffApply3D_cpu;
291  if (OccaDiffApply3D_cpu.find(id) == OccaDiffApply3D_cpu.end())
292  {
293  const occa::kernel DiffusionApply3D_CPU =
294  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
295  "DiffusionApply3D_CPU", props);
296  OccaDiffApply3D_cpu.emplace(id, DiffusionApply3D_CPU);
297  }
298  OccaDiffApply3D_cpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_op, o_x, o_y);
299  }
300  else
301  {
302  static occa_kernel_t OccaDiffApply3D_gpu;
303  if (OccaDiffApply3D_gpu.find(id) == OccaDiffApply3D_gpu.end())
304  {
305  const occa::kernel DiffusionApply3D_GPU =
306  mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
307  "DiffusionApply3D_GPU", props);
308  OccaDiffApply3D_gpu.emplace(id, DiffusionApply3D_GPU);
309  }
310  OccaDiffApply3D_gpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_op, o_x, o_y);
311  }
312 }
313 #endif // MFEM_USE_OCCA
314 
315 // PA Diffusion Apply 2D kernel
316 template<int T_D1D = 0, int T_Q1D = 0> static
317 void PADiffusionApply2D(const int NE,
318  const Array<double> &b,
319  const Array<double> &g,
320  const Array<double> &bt,
321  const Array<double> &gt,
322  const Vector &_op,
323  const Vector &_x,
324  Vector &_y,
325  const int d1d = 0,
326  const int q1d = 0)
327 {
328  const int D1D = T_D1D ? T_D1D : d1d;
329  const int Q1D = T_Q1D ? T_Q1D : q1d;
330  MFEM_VERIFY(D1D <= MAX_D1D, "");
331  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
332  auto B = Reshape(b.Read(), Q1D, D1D);
333  auto G = Reshape(g.Read(), Q1D, D1D);
334  auto Bt = Reshape(bt.Read(), D1D, Q1D);
335  auto Gt = Reshape(gt.Read(), D1D, Q1D);
336  auto op = Reshape(_op.Read(), Q1D*Q1D, 3, NE);
337  auto x = Reshape(_x.Read(), D1D, D1D, NE);
338  auto y = Reshape(_y.ReadWrite(), D1D, D1D, NE);
339  MFEM_FORALL(e, NE,
340  {
341  const int D1D = T_D1D ? T_D1D : d1d;
342  const int Q1D = T_Q1D ? T_Q1D : q1d;
343  // the following variables are evaluated at compile time
344  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
345  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
346 
347  double grad[max_Q1D][max_Q1D][2];
348  for (int qy = 0; qy < Q1D; ++qy)
349  {
350  for (int qx = 0; qx < Q1D; ++qx)
351  {
352  grad[qy][qx][0] = 0.0;
353  grad[qy][qx][1] = 0.0;
354  }
355  }
356  for (int dy = 0; dy < D1D; ++dy)
357  {
358  double gradX[max_Q1D][2];
359  for (int qx = 0; qx < Q1D; ++qx)
360  {
361  gradX[qx][0] = 0.0;
362  gradX[qx][1] = 0.0;
363  }
364  for (int dx = 0; dx < D1D; ++dx)
365  {
366  const double s = x(dx,dy,e);
367  for (int qx = 0; qx < Q1D; ++qx)
368  {
369  gradX[qx][0] += s * B(qx,dx);
370  gradX[qx][1] += s * G(qx,dx);
371  }
372  }
373  for (int qy = 0; qy < Q1D; ++qy)
374  {
375  const double wy = B(qy,dy);
376  const double wDy = G(qy,dy);
377  for (int qx = 0; qx < Q1D; ++qx)
378  {
379  grad[qy][qx][0] += gradX[qx][1] * wy;
380  grad[qy][qx][1] += gradX[qx][0] * wDy;
381  }
382  }
383  }
384  // Calculate Dxy, xDy in plane
385  for (int qy = 0; qy < Q1D; ++qy)
386  {
387  for (int qx = 0; qx < Q1D; ++qx)
388  {
389  const int q = qx + qy * Q1D;
390 
391  const double O11 = op(q,0,e);
392  const double O12 = op(q,1,e);
393  const double O22 = op(q,2,e);
394 
395  const double gradX = grad[qy][qx][0];
396  const double gradY = grad[qy][qx][1];
397 
398  grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
399  grad[qy][qx][1] = (O12 * gradX) + (O22 * gradY);
400  }
401  }
402  for (int qy = 0; qy < Q1D; ++qy)
403  {
404  double gradX[max_D1D][2];
405  for (int dx = 0; dx < D1D; ++dx)
406  {
407  gradX[dx][0] = 0;
408  gradX[dx][1] = 0;
409  }
410  for (int qx = 0; qx < Q1D; ++qx)
411  {
412  const double gX = grad[qy][qx][0];
413  const double gY = grad[qy][qx][1];
414  for (int dx = 0; dx < D1D; ++dx)
415  {
416  const double wx = Bt(dx,qx);
417  const double wDx = Gt(dx,qx);
418  gradX[dx][0] += gX * wDx;
419  gradX[dx][1] += gY * wx;
420  }
421  }
422  for (int dy = 0; dy < D1D; ++dy)
423  {
424  const double wy = Bt(dy,qy);
425  const double wDy = Gt(dy,qy);
426  for (int dx = 0; dx < D1D; ++dx)
427  {
428  y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
429  }
430  }
431  }
432  });
433 }
434 
435 // Shared memory PA Diffusion Apply 2D kernel
436 template<const int T_D1D = 0,
437  const int T_Q1D = 0,
438  const int T_NBZ = 0>
439 static void SmemPADiffusionApply2D(const int NE,
440  const Array<double> &_b,
441  const Array<double> &_g,
442  const Array<double> &_bt,
443  const Array<double> &_gt,
444  const Vector &_op,
445  const Vector &_x,
446  Vector &_y,
447  const int d1d = 0,
448  const int q1d = 0)
449 {
450  const int D1D = T_D1D ? T_D1D : d1d;
451  const int Q1D = T_Q1D ? T_Q1D : q1d;
452  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
453  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
454  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
455  MFEM_VERIFY(D1D <= MD1, "");
456  MFEM_VERIFY(Q1D <= MQ1, "");
457  auto b = Reshape(_b.Read(), Q1D, D1D);
458  auto g = Reshape(_g.Read(), Q1D, D1D);
459  auto op = Reshape(_op.Read(), Q1D*Q1D, 3, NE);
460  auto x = Reshape(_x.Read(), D1D, D1D, NE);
461  auto y = Reshape(_y.ReadWrite(), D1D, D1D, NE);
462  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
463  {
464  const int tidz = MFEM_THREAD_ID(z);
465  const int D1D = T_D1D ? T_D1D : d1d;
466  const int Q1D = T_Q1D ? T_Q1D : q1d;
467  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
468  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
469  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
470  MFEM_SHARED double sBG[2][MQ1*MD1];
471  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
472  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
473  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
474  double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
475  MFEM_SHARED double Xz[NBZ][MD1][MD1];
476  MFEM_SHARED double GD[2][NBZ][MD1][MQ1];
477  MFEM_SHARED double GQ[2][NBZ][MD1][MQ1];
478  double (*X)[MD1] = (double (*)[MD1])(Xz + tidz);
479  double (*DQ0)[MD1] = (double (*)[MD1])(GD[0] + tidz);
480  double (*DQ1)[MD1] = (double (*)[MD1])(GD[1] + tidz);
481  double (*QQ0)[MD1] = (double (*)[MD1])(GQ[0] + tidz);
482  double (*QQ1)[MD1] = (double (*)[MD1])(GQ[1] + tidz);
483  MFEM_FOREACH_THREAD(dy,y,D1D)
484  {
485  MFEM_FOREACH_THREAD(dx,x,D1D)
486  {
487  X[dy][dx] = x(dx,dy,e);
488  }
489  }
490  if (tidz == 0)
491  {
492  MFEM_FOREACH_THREAD(d,y,D1D)
493  {
494  MFEM_FOREACH_THREAD(q,x,Q1D)
495  {
496  B[q][d] = b(q,d);
497  G[q][d] = g(q,d);
498  }
499  }
500  }
501  MFEM_SYNC_THREAD;
502  MFEM_FOREACH_THREAD(dy,y,D1D)
503  {
504  MFEM_FOREACH_THREAD(qx,x,Q1D)
505  {
506  double u = 0.0;
507  double v = 0.0;
508  for (int dx = 0; dx < D1D; ++dx)
509  {
510  const double coords = X[dy][dx];
511  u += B[qx][dx] * coords;
512  v += G[qx][dx] * coords;
513  }
514  DQ0[dy][qx] = u;
515  DQ1[dy][qx] = v;
516  }
517  }
518  MFEM_SYNC_THREAD;
519  MFEM_FOREACH_THREAD(qy,y,Q1D)
520  {
521  MFEM_FOREACH_THREAD(qx,x,Q1D)
522  {
523  double u = 0.0;
524  double v = 0.0;
525  for (int dy = 0; dy < D1D; ++dy)
526  {
527  u += DQ1[dy][qx] * B[qy][dy];
528  v += DQ0[dy][qx] * G[qy][dy];
529  }
530  QQ0[qy][qx] = u;
531  QQ1[qy][qx] = v;
532  }
533  }
534  MFEM_SYNC_THREAD;
535  MFEM_FOREACH_THREAD(qy,y,Q1D)
536  {
537  MFEM_FOREACH_THREAD(qx,x,Q1D)
538  {
539  const int q = (qx + ((qy) * Q1D));
540  const double O11 = op(q,0,e);
541  const double O12 = op(q,1,e);
542  const double O22 = op(q,2,e);
543  const double gX = QQ0[qy][qx];
544  const double gY = QQ1[qy][qx];
545  QQ0[qy][qx] = (O11 * gX) + (O12 * gY);
546  QQ1[qy][qx] = (O12 * gX) + (O22 * gY);
547  }
548  }
549  MFEM_SYNC_THREAD;
550  if (tidz == 0)
551  {
552  MFEM_FOREACH_THREAD(d,y,D1D)
553  {
554  MFEM_FOREACH_THREAD(q,x,Q1D)
555  {
556  Bt[d][q] = b(q,d);
557  Gt[d][q] = g(q,d);
558  }
559  }
560  }
561  MFEM_SYNC_THREAD;
562  MFEM_FOREACH_THREAD(qy,y,Q1D)
563  {
564  MFEM_FOREACH_THREAD(dx,x,D1D)
565  {
566  double u = 0.0;
567  double v = 0.0;
568  for (int qx = 0; qx < Q1D; ++qx)
569  {
570  u += Gt[dx][qx] * QQ0[qy][qx];
571  v += Bt[dx][qx] * QQ1[qy][qx];
572  }
573  DQ0[qy][dx] = u;
574  DQ1[qy][dx] = v;
575  }
576  }
577  MFEM_SYNC_THREAD;
578  MFEM_FOREACH_THREAD(dy,y,D1D)
579  {
580  MFEM_FOREACH_THREAD(dx,x,D1D)
581  {
582  double u = 0.0;
583  double v = 0.0;
584  for (int qy = 0; qy < Q1D; ++qy)
585  {
586  u += DQ0[qy][dx] * Bt[dy][qy];
587  v += DQ1[qy][dx] * Gt[dy][qy];
588  }
589  y(dx,dy,e) += (u + v);
590  }
591  }
592  });
593 }
594 
595 
596 // PA Diffusion Apply 3D kernel
597 template<const int T_D1D = 0,
598  const int T_Q1D = 0> static
599 void PADiffusionApply3D(const int NE,
600  const Array<double> &b,
601  const Array<double> &g,
602  const Array<double> &bt,
603  const Array<double> &gt,
604  const Vector &_op,
605  const Vector &_x,
606  Vector &_y,
607  int d1d = 0, int q1d = 0)
608 {
609  const int D1D = T_D1D ? T_D1D : d1d;
610  const int Q1D = T_Q1D ? T_Q1D : q1d;
611  MFEM_VERIFY(D1D <= MAX_D1D, "");
612  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
613  auto B = Reshape(b.Read(), Q1D, D1D);
614  auto G = Reshape(g.Read(), Q1D, D1D);
615  auto Bt = Reshape(bt.Read(), D1D, Q1D);
616  auto Gt = Reshape(gt.Read(), D1D, Q1D);
617  auto op = Reshape(_op.Read(), Q1D*Q1D*Q1D, 6, NE);
618  auto x = Reshape(_x.Read(), D1D, D1D, D1D, NE);
619  auto y = Reshape(_y.ReadWrite(), D1D, D1D, D1D, NE);
620  MFEM_FORALL(e, NE,
621  {
622  const int D1D = T_D1D ? T_D1D : d1d;
623  const int Q1D = T_Q1D ? T_Q1D : q1d;
624  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
625  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
626  double grad[max_Q1D][max_Q1D][max_Q1D][4];
627  for (int qz = 0; qz < Q1D; ++qz)
628  {
629  for (int qy = 0; qy < Q1D; ++qy)
630  {
631  for (int qx = 0; qx < Q1D; ++qx)
632  {
633  grad[qz][qy][qx][0] = 0.0;
634  grad[qz][qy][qx][1] = 0.0;
635  grad[qz][qy][qx][2] = 0.0;
636  }
637  }
638  }
639  for (int dz = 0; dz < D1D; ++dz)
640  {
641  double gradXY[max_Q1D][max_Q1D][4];
642  for (int qy = 0; qy < Q1D; ++qy)
643  {
644  for (int qx = 0; qx < Q1D; ++qx)
645  {
646  gradXY[qy][qx][0] = 0.0;
647  gradXY[qy][qx][1] = 0.0;
648  gradXY[qy][qx][2] = 0.0;
649  }
650  }
651  for (int dy = 0; dy < D1D; ++dy)
652  {
653  double gradX[max_Q1D][2];
654  for (int qx = 0; qx < Q1D; ++qx)
655  {
656  gradX[qx][0] = 0.0;
657  gradX[qx][1] = 0.0;
658  }
659  for (int dx = 0; dx < D1D; ++dx)
660  {
661  const double s = x(dx,dy,dz,e);
662  for (int qx = 0; qx < Q1D; ++qx)
663  {
664  gradX[qx][0] += s * B(qx,dx);
665  gradX[qx][1] += s * G(qx,dx);
666  }
667  }
668  for (int qy = 0; qy < Q1D; ++qy)
669  {
670  const double wy = B(qy,dy);
671  const double wDy = G(qy,dy);
672  for (int qx = 0; qx < Q1D; ++qx)
673  {
674  const double wx = gradX[qx][0];
675  const double wDx = gradX[qx][1];
676  gradXY[qy][qx][0] += wDx * wy;
677  gradXY[qy][qx][1] += wx * wDy;
678  gradXY[qy][qx][2] += wx * wy;
679  }
680  }
681  }
682  for (int qz = 0; qz < Q1D; ++qz)
683  {
684  const double wz = B(qz,dz);
685  const double wDz = G(qz,dz);
686  for (int qy = 0; qy < Q1D; ++qy)
687  {
688  for (int qx = 0; qx < Q1D; ++qx)
689  {
690  grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
691  grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
692  grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
693  }
694  }
695  }
696  }
697  // Calculate Dxyz, xDyz, xyDz in plane
698  for (int qz = 0; qz < Q1D; ++qz)
699  {
700  for (int qy = 0; qy < Q1D; ++qy)
701  {
702  for (int qx = 0; qx < Q1D; ++qx)
703  {
704  const int q = qx + (qy + qz * Q1D) * Q1D;
705  const double O11 = op(q,0,e);
706  const double O12 = op(q,1,e);
707  const double O13 = op(q,2,e);
708  const double O22 = op(q,3,e);
709  const double O23 = op(q,4,e);
710  const double O33 = op(q,5,e);
711  const double gradX = grad[qz][qy][qx][0];
712  const double gradY = grad[qz][qy][qx][1];
713  const double gradZ = grad[qz][qy][qx][2];
714  grad[qz][qy][qx][0] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
715  grad[qz][qy][qx][1] = (O12*gradX)+(O22*gradY)+(O23*gradZ);
716  grad[qz][qy][qx][2] = (O13*gradX)+(O23*gradY)+(O33*gradZ);
717  }
718  }
719  }
720  for (int qz = 0; qz < Q1D; ++qz)
721  {
722  double gradXY[max_D1D][max_D1D][4];
723  for (int dy = 0; dy < D1D; ++dy)
724  {
725  for (int dx = 0; dx < D1D; ++dx)
726  {
727  gradXY[dy][dx][0] = 0;
728  gradXY[dy][dx][1] = 0;
729  gradXY[dy][dx][2] = 0;
730  }
731  }
732  for (int qy = 0; qy < Q1D; ++qy)
733  {
734  double gradX[max_D1D][4];
735  for (int dx = 0; dx < D1D; ++dx)
736  {
737  gradX[dx][0] = 0;
738  gradX[dx][1] = 0;
739  gradX[dx][2] = 0;
740  }
741  for (int qx = 0; qx < Q1D; ++qx)
742  {
743  const double gX = grad[qz][qy][qx][0];
744  const double gY = grad[qz][qy][qx][1];
745  const double gZ = grad[qz][qy][qx][2];
746  for (int dx = 0; dx < D1D; ++dx)
747  {
748  const double wx = Bt(dx,qx);
749  const double wDx = Gt(dx,qx);
750  gradX[dx][0] += gX * wDx;
751  gradX[dx][1] += gY * wx;
752  gradX[dx][2] += gZ * wx;
753  }
754  }
755  for (int dy = 0; dy < D1D; ++dy)
756  {
757  const double wy = Bt(dy,qy);
758  const double wDy = Gt(dy,qy);
759  for (int dx = 0; dx < D1D; ++dx)
760  {
761  gradXY[dy][dx][0] += gradX[dx][0] * wy;
762  gradXY[dy][dx][1] += gradX[dx][1] * wDy;
763  gradXY[dy][dx][2] += gradX[dx][2] * wy;
764  }
765  }
766  }
767  for (int dz = 0; dz < D1D; ++dz)
768  {
769  const double wz = Bt(dz,qz);
770  const double wDz = Gt(dz,qz);
771  for (int dy = 0; dy < D1D; ++dy)
772  {
773  for (int dx = 0; dx < D1D; ++dx)
774  {
775  y(dx,dy,dz,e) +=
776  ((gradXY[dy][dx][0] * wz) +
777  (gradXY[dy][dx][1] * wz) +
778  (gradXY[dy][dx][2] * wDz));
779  }
780  }
781  }
782  }
783  });
784 }
785 
786 // Shared memory PA Diffusion Apply 3D kernel
787 template<const int T_D1D = 0,
788  const int T_Q1D = 0>
789 static void SmemPADiffusionApply3D(const int NE,
790  const Array<double> &_b,
791  const Array<double> &_g,
792  const Array<double> &_bt,
793  const Array<double> &_gt,
794  const Vector &_op,
795  const Vector &_x,
796  Vector &_y,
797  const int d1d = 0,
798  const int q1d = 0)
799 {
800  const int D1D = T_D1D ? T_D1D : d1d;
801  const int Q1D = T_Q1D ? T_Q1D : q1d;
802  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
803  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
804  MFEM_VERIFY(D1D <= MD1, "");
805  MFEM_VERIFY(Q1D <= MQ1, "");
806  auto b = Reshape(_b.Read(), Q1D, D1D);
807  auto g = Reshape(_g.Read(), Q1D, D1D);
808  auto op = Reshape(_op.Read(), Q1D*Q1D*Q1D, 6, NE);
809  auto x = Reshape(_x.Read(), D1D, D1D, D1D, NE);
810  auto y = Reshape(_y.ReadWrite(), D1D, D1D, D1D, NE);
811  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
812  {
813  const int tidz = MFEM_THREAD_ID(z);
814  const int D1D = T_D1D ? T_D1D : d1d;
815  const int Q1D = T_Q1D ? T_Q1D : q1d;
816  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
817  constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
818  constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
819  MFEM_SHARED double sBG[2][MQ1*MD1];
820  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
821  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
822  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
823  double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
824  MFEM_SHARED double sm0[3][MDQ*MDQ*MDQ];
825  MFEM_SHARED double sm1[3][MDQ*MDQ*MDQ];
826  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
827  double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
828  double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
829  double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
830  double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
831  double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
832  double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
833  double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
834  double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
835  double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
836  double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
837  double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
838  double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
839  double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
840  double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
841  MFEM_FOREACH_THREAD(dz,z,D1D)
842  {
843  MFEM_FOREACH_THREAD(dy,y,D1D)
844  {
845  MFEM_FOREACH_THREAD(dx,x,D1D)
846  {
847  X[dz][dy][dx] = x(dx,dy,dz,e);
848  }
849  }
850  }
851  if (tidz == 0)
852  {
853  MFEM_FOREACH_THREAD(d,y,D1D)
854  {
855  MFEM_FOREACH_THREAD(q,x,Q1D)
856  {
857  B[q][d] = b(q,d);
858  G[q][d] = g(q,d);
859  }
860  }
861  }
862  MFEM_SYNC_THREAD;
863  MFEM_FOREACH_THREAD(dz,z,D1D)
864  {
865  MFEM_FOREACH_THREAD(dy,y,D1D)
866  {
867  MFEM_FOREACH_THREAD(qx,x,Q1D)
868  {
869  double u = 0.0;
870  double v = 0.0;
871  for (int dx = 0; dx < D1D; ++dx)
872  {
873  const double coords = X[dz][dy][dx];
874  u += coords * B[qx][dx];
875  v += coords * G[qx][dx];
876  }
877  DDQ0[dz][dy][qx] = u;
878  DDQ1[dz][dy][qx] = v;
879  }
880  }
881  }
882  MFEM_SYNC_THREAD;
883  MFEM_FOREACH_THREAD(dz,z,D1D)
884  {
885  MFEM_FOREACH_THREAD(qy,y,Q1D)
886  {
887  MFEM_FOREACH_THREAD(qx,x,Q1D)
888  {
889  double u = 0.0;
890  double v = 0.0;
891  double w = 0.0;
892  for (int dy = 0; dy < D1D; ++dy)
893  {
894  u += DDQ1[dz][dy][qx] * B[qy][dy];
895  v += DDQ0[dz][dy][qx] * G[qy][dy];
896  w += DDQ0[dz][dy][qx] * B[qy][dy];
897  }
898  DQQ0[dz][qy][qx] = u;
899  DQQ1[dz][qy][qx] = v;
900  DQQ2[dz][qy][qx] = w;
901  }
902  }
903  }
904  MFEM_SYNC_THREAD;
905  MFEM_FOREACH_THREAD(qz,z,Q1D)
906  {
907  MFEM_FOREACH_THREAD(qy,y,Q1D)
908  {
909  MFEM_FOREACH_THREAD(qx,x,Q1D)
910  {
911  double u = 0.0;
912  double v = 0.0;
913  double w = 0.0;
914  for (int dz = 0; dz < D1D; ++dz)
915  {
916  u += DQQ0[dz][qy][qx] * B[qz][dz];
917  v += DQQ1[dz][qy][qx] * B[qz][dz];
918  w += DQQ2[dz][qy][qx] * G[qz][dz];
919  }
920  QQQ0[qz][qy][qx] = u;
921  QQQ1[qz][qy][qx] = v;
922  QQQ2[qz][qy][qx] = w;
923  }
924  }
925  }
926  MFEM_SYNC_THREAD;
927  MFEM_FOREACH_THREAD(qz,z,Q1D)
928  {
929  MFEM_FOREACH_THREAD(qy,y,Q1D)
930  {
931  MFEM_FOREACH_THREAD(qx,x,Q1D)
932  {
933  const int q = qx + ((qy*Q1D) + (qz*Q1D*Q1D));
934  const double O11 = op(q,0,e);
935  const double O12 = op(q,1,e);
936  const double O13 = op(q,2,e);
937  const double O22 = op(q,3,e);
938  const double O23 = op(q,4,e);
939  const double O33 = op(q,5,e);
940  const double gX = QQQ0[qz][qy][qx];
941  const double gY = QQQ1[qz][qy][qx];
942  const double gZ = QQQ2[qz][qy][qx];
943  QQQ0[qz][qy][qx] = (O11*gX) + (O12*gY) + (O13*gZ);
944  QQQ1[qz][qy][qx] = (O12*gX) + (O22*gY) + (O23*gZ);
945  QQQ2[qz][qy][qx] = (O13*gX) + (O23*gY) + (O33*gZ);
946  }
947  }
948  }
949  MFEM_SYNC_THREAD;
950  if (tidz == 0)
951  {
952  MFEM_FOREACH_THREAD(d,y,D1D)
953  {
954  MFEM_FOREACH_THREAD(q,x,Q1D)
955  {
956  Bt[d][q] = b(q,d);
957  Gt[d][q] = g(q,d);
958  }
959  }
960  }
961  MFEM_SYNC_THREAD;
962  MFEM_FOREACH_THREAD(qz,z,Q1D)
963  {
964  MFEM_FOREACH_THREAD(qy,y,Q1D)
965  {
966  MFEM_FOREACH_THREAD(dx,x,D1D)
967  {
968  double u = 0.0;
969  double v = 0.0;
970  double w = 0.0;
971  for (int qx = 0; qx < Q1D; ++qx)
972  {
973  u += QQQ0[qz][qy][qx] * Gt[dx][qx];
974  v += QQQ1[qz][qy][qx] * Bt[dx][qx];
975  w += QQQ2[qz][qy][qx] * Bt[dx][qx];
976  }
977  QQD0[qz][qy][dx] = u;
978  QQD1[qz][qy][dx] = v;
979  QQD2[qz][qy][dx] = w;
980  }
981  }
982  }
983  MFEM_SYNC_THREAD;
984  MFEM_FOREACH_THREAD(qz,z,Q1D)
985  {
986  MFEM_FOREACH_THREAD(dy,y,D1D)
987  {
988  MFEM_FOREACH_THREAD(dx,x,D1D)
989  {
990  double u = 0.0;
991  double v = 0.0;
992  double w = 0.0;
993  for (int qy = 0; qy < Q1D; ++qy)
994  {
995  u += QQD0[qz][qy][dx] * Bt[dy][qy];
996  v += QQD1[qz][qy][dx] * Gt[dy][qy];
997  w += QQD2[qz][qy][dx] * Bt[dy][qy];
998  }
999  QDD0[qz][dy][dx] = u;
1000  QDD1[qz][dy][dx] = v;
1001  QDD2[qz][dy][dx] = w;
1002  }
1003  }
1004  }
1005  MFEM_SYNC_THREAD;
1006  MFEM_FOREACH_THREAD(dz,z,D1D)
1007  {
1008  MFEM_FOREACH_THREAD(dy,y,D1D)
1009  {
1010  MFEM_FOREACH_THREAD(dx,x,D1D)
1011  {
1012  double u = 0.0;
1013  double v = 0.0;
1014  double w = 0.0;
1015  for (int qz = 0; qz < Q1D; ++qz)
1016  {
1017  u += QDD0[qz][dy][dx] * Bt[dz][qz];
1018  v += QDD1[qz][dy][dx] * Bt[dz][qz];
1019  w += QDD2[qz][dy][dx] * Gt[dz][qz];
1020  }
1021  y(dx,dy,dz,e) += (u + v + w);
1022  }
1023  }
1024  }
1025  });
1026 }
1027 
1028 static void PADiffusionApply(const int dim,
1029  const int D1D,
1030  const int Q1D,
1031  const int NE,
1032  const Array<double> &B,
1033  const Array<double> &G,
1034  const Array<double> &Bt,
1035  const Array<double> &Gt,
1036  const Vector &op,
1037  const Vector &x,
1038  Vector &y)
1039 {
1040 #ifdef MFEM_USE_OCCA
1041  if (DeviceCanUseOcca())
1042  {
1043  if (dim == 2)
1044  {
1045  OccaPADiffusionApply2D(D1D, Q1D, NE, B, G, Bt, Gt, op, x, y);
1046  return;
1047  }
1048  if (dim == 3)
1049  {
1050  OccaPADiffusionApply3D(D1D, Q1D, NE, B, G, Bt, Gt, op, x, y);
1051  return;
1052  }
1053  MFEM_ABORT("OCCA PADiffusionApply unknown kernel!");
1054  }
1055 #endif // MFEM_USE_OCCA
1056 
1057  if (Device::Allows(Backend::RAJA_CUDA))
1058  {
1059  if (dim == 2)
1060  {
1061  switch ((D1D << 4 ) | Q1D)
1062  {
1063  case 0x22: return PADiffusionApply2D<2,2>(NE,B,G,Bt,Gt,op,x,y);
1064  case 0x33: return PADiffusionApply2D<3,3>(NE,B,G,Bt,Gt,op,x,y);
1065  case 0x44: return PADiffusionApply2D<4,4>(NE,B,G,Bt,Gt,op,x,y);
1066  case 0x55: return PADiffusionApply2D<5,5>(NE,B,G,Bt,Gt,op,x,y);
1067  case 0x66: return PADiffusionApply2D<6,6>(NE,B,G,Bt,Gt,op,x,y);
1068  case 0x77: return PADiffusionApply2D<7,7>(NE,B,G,Bt,Gt,op,x,y);
1069  case 0x88: return PADiffusionApply2D<8,8>(NE,B,G,Bt,Gt,op,x,y);
1070  case 0x99: return PADiffusionApply2D<9,9>(NE,B,G,Bt,Gt,op,x,y);
1071  default: return PADiffusionApply2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1072  }
1073  }
1074  if (dim == 3)
1075  {
1076  switch ((D1D << 4 ) | Q1D)
1077  {
1078  case 0x23: return PADiffusionApply3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1079  case 0x34: return PADiffusionApply3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1080  case 0x45: return PADiffusionApply3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1081  case 0x56: return PADiffusionApply3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1082  case 0x67: return PADiffusionApply3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1083  case 0x78: return PADiffusionApply3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1084  case 0x89: return PADiffusionApply3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1085  default: return PADiffusionApply3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1086  }
1087  }
1088  }
1089  else if (dim == 2)
1090  {
1091  switch ((D1D << 4 ) | Q1D)
1092  {
1093  case 0x22: return SmemPADiffusionApply2D<2,2,16>(NE,B,G,Bt,Gt,op,x,y);
1094  case 0x33: return SmemPADiffusionApply2D<3,3,16>(NE,B,G,Bt,Gt,op,x,y);
1095  case 0x44: return SmemPADiffusionApply2D<4,4,8>(NE,B,G,Bt,Gt,op,x,y);
1096  case 0x55: return SmemPADiffusionApply2D<5,5,8>(NE,B,G,Bt,Gt,op,x,y);
1097  case 0x66: return SmemPADiffusionApply2D<6,6,4>(NE,B,G,Bt,Gt,op,x,y);
1098  case 0x77: return SmemPADiffusionApply2D<7,7,4>(NE,B,G,Bt,Gt,op,x,y);
1099  case 0x88: return SmemPADiffusionApply2D<8,8,2>(NE,B,G,Bt,Gt,op,x,y);
1100  case 0x99: return SmemPADiffusionApply2D<9,9,2>(NE,B,G,Bt,Gt,op,x,y);
1101  default: return PADiffusionApply2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1102  }
1103  }
1104  else if (dim == 3)
1105  {
1106  switch ((D1D << 4 ) | Q1D)
1107  {
1108  case 0x23: return SmemPADiffusionApply3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1109  case 0x34: return SmemPADiffusionApply3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1110  case 0x45: return SmemPADiffusionApply3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1111  case 0x56: return SmemPADiffusionApply3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1112  case 0x67: return SmemPADiffusionApply3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1113  case 0x78: return SmemPADiffusionApply3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1114  case 0x89: return SmemPADiffusionApply3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1115  default: return PADiffusionApply3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1116  }
1117  }
1118  MFEM_ABORT("Unknown kernel.");
1119 }
1120 
1121 // PA Diffusion Apply kernel
1122 void DiffusionIntegrator::AddMultPA(const Vector &x, Vector &y) const
1123 {
1124  PADiffusionApply(dim, dofs1D, quad1D, ne,
1125  maps->B, maps->G, maps->Bt, maps->Gt,
1126  pa_data, x, y);
1127 }
1128 
1129 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:237
Abstract class for Finite Elements.
Definition: fe.hpp:229
int Size() const
Logical size of the array.
Definition: array.hpp:118
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe.hpp:305
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:85
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:97
Subclass constant coefficient.
Definition: coefficient.hpp:67
const Geometry::Type geom
Definition: ex1.cpp:40
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition: occa.cpp:27
int Size() const
Returns the size of the vector.
Definition: vector.hpp:150
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:159
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:81
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition: occa.hpp:79
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Definition: vector.hpp:173
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:59
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:135
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:374
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:756
int dim
Definition: ex3.cpp:48
const int MAX_Q1D
Definition: forall.hpp:35
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:272
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
int Dimension() const
Definition: mesh.hpp:713
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:85
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:48
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Definition: fe.cpp:206
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:23
const int MAX_D1D
Definition: forall.hpp:34
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement associated with i&#39;th element.
Definition: fespace.cpp:1541
Vector data type.
Definition: vector.hpp:48
std::pair< int, int > occa_id_t
Definition: occa.hpp:78