MFEM  v4.3.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_gradient.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
21 // PA Gradient Integrator
22 
23 /* Description of the *SetupND functions
24  Inputs are as follows
25  \b Q1D number of quadrature points in one dimension.
26  \b w quadrature weights.
27  \b j element Jacobians.
28  \b c coefficient at quadrature points.
29 
30  The function is used precompute data needed at quadrature points during
31  the action. */
32 
33 /* Description of the *ApplyND functions
34  The template parameters are
35  \b T_D1D number of degrees of freedom in one dimension,
36  \b T_Q1D number of quadrature points in one dimension,
37  and are necessary to allow for compiler optimizations inside the kernel.
38 
39  Inputs are as follows
40  \b NE number of elements.
41  \b B matrix of basis functions.
42  \b G matrix of derivatives of the basis functions.
43  \b Bt transpose of matrix of basis functions.
44  \b Gt transpose matrix of derivatives of the basis functions.
45  \b op data used during action of the element matrix in the tensor
46  product application.
47 
48  \b x input vector of degrees of freedom on the element.
49  \b y output vector of degrees of freedom on the element.
50 
51  The function computes the kernel for one dimension that is suitable for
52  tensor product action to form ND operators.
53  Most of the ND inputs are reshaped as NQ*(ND*ND)*NE data structure, i.e
54  to allow indexing such as op(qpt,i,j,el).
55 
56  The output data structure is dependent on the kernel and layout of the
57  dimension ND and element number, but in general resembles the action of the
58  element matrix in the tensor product application. */
59 
60 /* Description of the Smem*ApplyND functions
61  The shared memory (Smem) versions of the kernels differ from the regular
62  versions in the following properties.
63 
64  \b MFEM_FORALL is using only one level of parallelism.
65  \b MFEM_FORALL_ND uses an additional level of parallelism
66  \b MFEM_FOREACH_THREAD
67 
68  These macros allow automatic mapping of manually defined blocks to
69  underlying hardware threads. These threads can share memory by using
70  the \b MFEM_SHARED keyword for local arrays. */
71 
72 // PA Gradient Assemble 2D kernel
73 static void PAGradientSetup2D(const int Q1D,
74  const int NE,
75  const Array<double> &w,
76  const Vector &j,
77  const Vector &c,
78  Vector &op)
79 {
80  const int NQ = Q1D*Q1D;
81  auto W = w.Read();
82  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
83  auto y = Reshape(op.Write(), NQ, 2, 2, NE);
84 
85  const bool const_c = c.Size() == 1;
86  const auto C = const_c ? Reshape(c.Read(), 1,1) :
87  Reshape(c.Read(), NQ, NE);
88 
89  MFEM_FORALL(e, NE,
90  {
91  for (int q = 0; q < NQ; ++q)
92  {
93  const double J11 = J(q,0,0,e);
94  const double J12 = J(q,0,1,e);
95  const double J21 = J(q,1,0,e);
96  const double J22 = J(q,1,1,e);
97  // Store wq * Q * adj(J)
98  const double Co = const_c ? C(0,0) : C(q,e);
99  y(q,0,0,e) = W[q] * Co * J22; // 1,1
100  y(q,0,1,e) = W[q] * Co * -J12; // 1,2
101  y(q,1,0,e) = W[q] * Co * -J21; // 2,1
102  y(q,1,1,e) = W[q] * Co * J11; // 2,2
103  }
104  });
105 }
106 
107 // PA Gradient Assemble 3D kernel
108 static void PAGradientSetup3D(const int Q1D,
109  const int NE,
110  const Array<double> &w,
111  const Vector &j,
112  const Vector &c,
113  Vector &op)
114 {
115  const int NQ = Q1D*Q1D*Q1D;
116  auto W = w.Read();
117  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
118  auto y = Reshape(op.Write(), NQ, 3, 3, NE);
119 
120  const bool const_c = c.Size() == 1;
121  const auto C = const_c ? Reshape(c.Read(), 1,1) :
122  Reshape(c.Read(), NQ,NE);
123 
124  MFEM_FORALL(e, NE,
125  {
126  for (int q = 0; q < NQ; ++q)
127  {
128  const double Co = const_c ? C(0,0) : C(q,e);
129 
130  const double J11 = J(q,0,0,e);
131  const double J21 = J(q,1,0,e);
132  const double J31 = J(q,2,0,e);
133  const double J12 = J(q,0,1,e);
134  const double J22 = J(q,1,1,e);
135  const double J32 = J(q,2,1,e);
136  const double J13 = J(q,0,2,e);
137  const double J23 = J(q,1,2,e);
138  const double J33 = J(q,2,2,e);
139  const double cw = W[q] * Co;
140  // adj(J)
141  const double A11 = (J22 * J33) - (J23 * J32);
142  const double A12 = (J32 * J13) - (J12 * J33);
143  const double A13 = (J12 * J23) - (J22 * J13);
144  const double A21 = (J31 * J23) - (J21 * J33);
145  const double A22 = (J11 * J33) - (J13 * J31);
146  const double A23 = (J21 * J13) - (J11 * J23);
147  const double A31 = (J21 * J32) - (J31 * J22);
148  const double A32 = (J31 * J12) - (J11 * J32);
149  const double A33 = (J11 * J22) - (J12 * J21);
150  // Store wq * Q * adj(J)
151  y(q,0,0,e) = cw * A11; // 1,1
152  y(q,0,1,e) = cw * A12; // 1,2
153  y(q,0,2,e) = cw * A13; // 1,3
154  y(q,1,0,e) = cw * A21; // 2,1
155  y(q,1,1,e) = cw * A22; // 2,2
156  y(q,1,2,e) = cw * A23; // 2,3
157  y(q,2,0,e) = cw * A31; // 3,1
158  y(q,2,1,e) = cw * A32; // 3,2
159  y(q,2,2,e) = cw * A33; // 3,3
160  }
161  });
162 }
163 
164 static void PAGradientSetup(const int dim,
165  const int TR_D1D,
166  const int TE_D1D,
167  const int Q1D,
168  const int NE,
169  const Array<double> &W,
170  const Vector &J,
171  const Vector &COEFF,
172  Vector &op)
173 {
174  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAGradientSetup"); }
175  if (dim == 2)
176  {
177  PAGradientSetup2D(Q1D, NE, W, J, COEFF, op);
178  }
179  if (dim == 3)
180  {
181  PAGradientSetup3D(Q1D, NE, W, J, COEFF, op);
182  }
183 }
184 
185 void GradientIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
186  const FiniteElementSpace &test_fes)
187 {
188  // Assumes tensor-product elements ordered by nodes
189  MFEM_ASSERT(trial_fes.GetOrdering() == Ordering::byNODES,
190  "PA Only supports Ordering::byNODES!");
191  Mesh *mesh = trial_fes.GetMesh();
192  const FiniteElement &trial_fe = *trial_fes.GetFE(0);
193  const FiniteElement &test_fe = *test_fes.GetFE(0);
194  ElementTransformation *trans = mesh->GetElementTransformation(0);
195  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe,
196  *trans);
197  const int dims = trial_fe.GetDim();
198  const int dimsToStore = dims * dims;
199  const int nq = ir->GetNPoints();
200  dim = mesh->Dimension();
201  ne = trial_fes.GetNE();
202  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
203  trial_maps = &trial_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
204  trial_dofs1D = trial_maps->ndof;
205  quad1D = trial_maps->nqpt;
206  test_maps = &test_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
207  test_dofs1D = test_maps->ndof;
208  MFEM_ASSERT(quad1D == test_maps->nqpt,
209  "PA requires test and trial space to have same number of quadrature points!");
210  pa_data.SetSize(nq * dimsToStore * ne, Device::GetMemoryType());
211 
212  Vector coeff;
213 
214  if (Q == nullptr)
215  {
216  coeff.SetSize(1);
217  coeff(0) = 1.0;
218  }
219  else if (ConstantCoefficient* cQ = dynamic_cast<ConstantCoefficient*>(Q))
220  {
221  coeff.SetSize(1);
222  coeff(0) = cQ->constant;
223  }
224  else if (QuadratureFunctionCoefficient* cQ =
225  dynamic_cast<QuadratureFunctionCoefficient*>(Q))
226  {
227  const QuadratureFunction &qFun = cQ->GetQuadFunction();
228  MFEM_VERIFY(qFun.Size() == ne*nq,
229  "Incompatible QuadratureFunction dimension \n");
230 
231  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
232  "IntegrationRule used within integrator and in"
233  " QuadratureFunction appear to be different");
234  qFun.Read();
235  coeff.MakeRef(const_cast<QuadratureFunction &>(qFun),0);
236  }
237  else
238  {
239  coeff.SetSize(nq * ne);
240  auto C = Reshape(coeff.HostWrite(), nq, ne);
241  for (int e = 0; e < ne; ++e)
242  {
244  for (int q = 0; q < nq; ++q)
245  {
246  C(q,e) = Q->Eval(T, ir->IntPoint(q));
247  }
248  }
249  }
250 
251  PAGradientSetup(dim, trial_dofs1D, test_dofs1D, quad1D,
252  ne, ir->GetWeights(), geom->J, coeff, pa_data);
253 }
254 
255 // PA Gradient Apply 2D kernel
256 template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
257 static void PAGradientApply2D(const int NE,
258  const Array<double> &b,
259  const Array<double> &g,
260  const Array<double> &bt,
261  const Vector &op_,
262  const Vector &x_,
263  Vector &y_,
264  const int tr_d1d = 0,
265  const int te_d1d = 0,
266  const int q1d = 0)
267 {
268  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
269  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
270  const int Q1D = T_Q1D ? T_Q1D : q1d;
271  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
272  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
273  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
274  auto B = Reshape(b.Read(), Q1D, TR_D1D);
275  auto G = Reshape(g.Read(), Q1D, TR_D1D);
276  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
277  auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
278  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, NE);
279  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, 2, NE);
280  MFEM_FORALL(e, NE,
281  {
282  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
283  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
284  const int Q1D = T_Q1D ? T_Q1D : q1d;
285  const int VDIM = 2;
286  // the following variables are evaluated at compile time
287  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
288  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
289 
290  double grad[max_Q1D][max_Q1D][VDIM];
291  for (int qy = 0; qy < Q1D; ++qy)
292  {
293  for (int qx = 0; qx < Q1D; ++qx)
294  {
295  grad[qy][qx][0] = 0.0;
296  grad[qy][qx][1] = 0.0;
297  }
298  }
299  for (int dy = 0; dy < TR_D1D; ++dy)
300  {
301  double gradX[max_Q1D][VDIM];
302  for (int qx = 0; qx < Q1D; ++qx)
303  {
304  gradX[qx][0] = 0.0;
305  gradX[qx][1] = 0.0;
306  }
307  for (int dx = 0; dx < TR_D1D; ++dx)
308  {
309  const double s = x(dx,dy,e);
310  for (int qx = 0; qx < Q1D; ++qx)
311  {
312  gradX[qx][0] += s * G(qx,dx);
313  gradX[qx][1] += s * B(qx,dx);
314  }
315  }
316  for (int qy = 0; qy < Q1D; ++qy)
317  {
318  const double wy = B(qy,dy);
319  const double wDy = G(qy,dy);
320  for (int qx = 0; qx < Q1D; ++qx)
321  {
322  grad[qy][qx][0] += gradX[qx][0] * wy;
323  grad[qy][qx][1] += gradX[qx][1] * wDy;
324  }
325  }
326  }
327  // We've now calculated grad(p) = [Dxy, xDy] in plane
328  for (int qy = 0; qy < Q1D; ++qy)
329  {
330  for (int qx = 0; qx < Q1D; ++qx)
331  {
332  const int q = qx + qy * Q1D;
333  const double gradX = grad[qy][qx][0];
334  const double gradY = grad[qy][qx][1];
335 
336  grad[qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e);
337  grad[qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e);
338  }
339  }
340  // We've now calculated grad = grad p * op
341  for (int qy = 0; qy < Q1D; ++qy)
342  {
343  double opX[max_TE_D1D][VDIM];
344  for (int dx = 0; dx < TE_D1D; ++dx)
345  {
346  opX[dx][0] = 0.0;
347  opX[dx][1] = 0.0;
348  for (int qx = 0; qx < Q1D; ++qx)
349  {
350  opX[dx][0] += Bt(dx,qx)*grad[qy][qx][0];
351  opX[dx][1] += Bt(dx,qx)*grad[qy][qx][1];
352  }
353  }
354  for (int dy = 0; dy < TE_D1D; ++dy)
355  {
356  for (int dx = 0; dx < TE_D1D; ++dx)
357  {
358  y(dx,dy,0,e) += Bt(dy,qy)*opX[dx][0];
359  y(dx,dy,1,e) += Bt(dy,qy)*opX[dx][1];
360  }
361  }
362  }
363  // We've now calculated y = u * grad
364  });
365 
366 }
367 
368 // PA Gradient Apply 2D kernel transpose
369 template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
370 static void PAGradientApplyTranspose2D(const int NE,
371  const Array<double> &bt,
372  const Array<double> &gt,
373  const Array<double> &b,
374  const Vector &op_,
375  const Vector &x_,
376  Vector &y_,
377  const int tr_d1d = 0,
378  const int te_d1d = 0,
379  const int q1d = 0)
380 {
381  // TODO
382  MFEM_ASSERT(false, "PAGradientApplyTranspose2D not implemented.");
383 }
384 
385 // PA Gradient Apply 3D kernel
386 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
387 static void PAGradientApply3D(const int NE,
388  const Array<double> &b,
389  const Array<double> &g,
390  const Array<double> &bt,
391  const Vector &op_,
392  const Vector &x_,
393  Vector &y_,
394  int tr_d1d = 0,
395  int te_d1d = 0,
396  int q1d = 0)
397 {
398  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
399  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
400  const int Q1D = T_Q1D ? T_Q1D : q1d;
401  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
402  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
403  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
404  auto B = Reshape(b.Read(), Q1D, TR_D1D);
405  auto G = Reshape(g.Read(), Q1D, TR_D1D);
406  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
407  auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
408  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
409  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
410  MFEM_FORALL(e, NE,
411  {
412  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
413  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
414  const int Q1D = T_Q1D ? T_Q1D : q1d;
415  const int VDIM = 3;
416  // the following variables are evaluated at compile time
417  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
418  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
419 
420  double grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
421  for (int qz = 0; qz < Q1D; ++qz)
422  {
423  for (int qy = 0; qy < Q1D; ++qy)
424  {
425  for (int qx = 0; qx < Q1D; ++qx)
426  {
427  grad[qz][qy][qx][0] = 0.0;
428  grad[qz][qy][qx][1] = 0.0;
429  grad[qz][qy][qx][2] = 0.0;
430  }
431  }
432  }
433  for (int dz = 0; dz < TR_D1D; ++dz)
434  {
435  double gradXY[max_Q1D][max_Q1D][3];
436  for (int qy = 0; qy < Q1D; ++qy)
437  {
438  for (int qx = 0; qx < Q1D; ++qx)
439  {
440  gradXY[qy][qx][0] = 0.0;
441  gradXY[qy][qx][1] = 0.0;
442  gradXY[qy][qx][2] = 0.0;
443  }
444  }
445  for (int dy = 0; dy < TR_D1D; ++dy)
446  {
447  double gradX[max_Q1D][2];
448  for (int qx = 0; qx < Q1D; ++qx)
449  {
450  gradX[qx][0] = 0.0;
451  gradX[qx][1] = 0.0;
452  }
453  for (int dx = 0; dx < TR_D1D; ++dx)
454  {
455  const double s = x(dx,dy,dz,e);
456  for (int qx = 0; qx < Q1D; ++qx)
457  {
458  gradX[qx][0] += s * B(qx,dx);
459  gradX[qx][1] += s * G(qx,dx);
460  }
461  }
462  for (int qy = 0; qy < Q1D; ++qy)
463  {
464  const double wy = B(qy,dy);
465  const double wDy = G(qy,dy);
466  for (int qx = 0; qx < Q1D; ++qx)
467  {
468  const double wx = gradX[qx][0];
469  const double wDx = gradX[qx][1];
470  gradXY[qy][qx][0] += wDx * wy;
471  gradXY[qy][qx][1] += wx * wDy;
472  gradXY[qy][qx][2] += wx * wy;
473  }
474  }
475  }
476  for (int qz = 0; qz < Q1D; ++qz)
477  {
478  const double wz = B(qz,dz);
479  const double wDz = G(qz,dz);
480  for (int qy = 0; qy < Q1D; ++qy)
481  {
482  for (int qx = 0; qx < Q1D; ++qx)
483  {
484  grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
485  grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
486  grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
487  }
488  }
489  }
490  }
491  // We've now calculated grad(p) = [Dxyz, xDyz, xyDz] in plane
492  for (int qz = 0; qz < Q1D; ++qz)
493  {
494  for (int qy = 0; qy < Q1D; ++qy)
495  {
496  for (int qx = 0; qx < Q1D; ++qx)
497  {
498  const int q = qx + (qy + qz * Q1D) * Q1D;
499  const double gradX = grad[qz][qy][qx][0];
500  const double gradY = grad[qz][qy][qx][1];
501  const double gradZ = grad[qz][qy][qx][2];
502 
503  grad[qz][qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e) + gradZ*op(q,2,0,e);
504  grad[qz][qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e) + gradZ*op(q,2,1,e);
505  grad[qz][qy][qx][2] = gradX*op(q,0,2,e) + gradY*op(q,1,2,e) + gradZ*op(q,2,2,e);
506  }
507  }
508  }
509  // We've now calculated grad = grad p * op
510  for (int qz = 0; qz < Q1D; ++qz)
511  {
512  double opXY[max_TE_D1D][max_TE_D1D][VDIM];
513  for (int dy = 0; dy < TE_D1D; ++dy)
514  {
515  for (int dx = 0; dx < TE_D1D; ++dx)
516  {
517  opXY[dy][dx][0] = 0.0;
518  opXY[dy][dx][1] = 0.0;
519  opXY[dy][dx][2] = 0.0;
520  }
521  }
522  for (int qy = 0; qy < Q1D; ++qy)
523  {
524  double opX[max_TE_D1D][VDIM];
525  for (int dx = 0; dx < TE_D1D; ++dx)
526  {
527  opX[dx][0] = 0.0;
528  opX[dx][1] = 0.0;
529  opX[dx][2] = 0.0;
530  for (int qx = 0; qx < Q1D; ++qx)
531  {
532  opX[dx][0] += Bt(dx,qx)*grad[qz][qy][qx][0];
533  opX[dx][1] += Bt(dx,qx)*grad[qz][qy][qx][1];
534  opX[dx][2] += Bt(dx,qx)*grad[qz][qy][qx][2];
535  }
536  }
537  for (int dy = 0; dy < TE_D1D; ++dy)
538  {
539  for (int dx = 0; dx < TE_D1D; ++dx)
540  {
541  opXY[dy][dx][0] += Bt(dy,qy)*opX[dx][0];
542  opXY[dy][dx][1] += Bt(dy,qy)*opX[dx][1];
543  opXY[dy][dx][2] += Bt(dy,qy)*opX[dx][2];
544  }
545  }
546  }
547  for (int dz = 0; dz < TE_D1D; ++dz)
548  {
549  for (int dy = 0; dy < TE_D1D; ++dy)
550  {
551  for (int dx = 0; dx < TE_D1D; ++dx)
552  {
553  y(dx,dy,dz,0,e) += Bt(dz,qz)*opXY[dy][dx][0];
554  y(dx,dy,dz,1,e) += Bt(dz,qz)*opXY[dy][dx][1];
555  y(dx,dy,dz,2,e) += Bt(dz,qz)*opXY[dy][dx][2];
556  }
557  }
558  }
559  }
560  // We've now calculated y = u * grad
561  });
562 }
563 
564 // PA Gradient Apply 3D kernel
565 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
566 static void PAGradientApplyTranspose3D(const int NE,
567  const Array<double> &bt,
568  const Array<double> &gt,
569  const Array<double> &b,
570  const Vector &op_,
571  const Vector &x_,
572  Vector &y_,
573  int tr_d1d = 0,
574  int te_d1d = 0,
575  int q1d = 0)
576 {
577  MFEM_ASSERT(false, "Gradient PA Apply Transpose 3D not implemented.");
578 }
579 
580 // Shared memory PA Gradient Apply 3D kernel
581 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
582 static void SmemPAGradientApply3D(const int NE,
583  const Array<double> &b_,
584  const Array<double> &g_,
585  const Array<double> &bt_,
586  const Vector &d_,
587  const Vector &x_,
588  Vector &y_,
589  const int tr_d1d = 0,
590  const int te_d1d = 0,
591  const int q1d = 0)
592 {
593  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
594  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
595  const int Q1D = T_Q1D ? T_Q1D : q1d;
596 
597  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
598  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
599  MFEM_VERIFY(TR_D1D <= Q1D, "");
600  MFEM_VERIFY(TE_D1D <= Q1D, "");
601  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
602 
603  auto b = Reshape(b_.Read(), Q1D, TR_D1D);
604  auto g = Reshape(g_.Read(), Q1D, TR_D1D);
605  auto bt = Reshape(bt_.Read(), TE_D1D, Q1D);
606  auto D = Reshape(d_.Read(), Q1D*Q1D*Q1D, 3, 3, NE);
607  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
608  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
609 
610  MFEM_FORALL_3D(e, NE, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D,
611  {
612  const int tidz = MFEM_THREAD_ID(z);
613  const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
614  const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
615  const int Q1D = T_Q1D ? T_Q1D : q1d;
616  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
617  constexpr int MD1R = T_TR_D1D ? T_TR_D1D : MAX_D1D;
618  constexpr int MD1E = T_TE_D1D ? T_TE_D1D : MAX_D1D;
619  constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
620  constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
621  MFEM_SHARED double sBG[2][MQ1*MD1];
622  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
623  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
624  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
625  MFEM_SHARED double sm0[3][MDQ*MDQ*MDQ];
626  MFEM_SHARED double sm1[3][MDQ*MDQ*MDQ];
627  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
628  double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
629  double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
630 
631  double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
632  double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
633  double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
634 
635  double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
636  double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
637  double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
638 
639  double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
640  double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
641  double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
642 
643  double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
644  double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
645  double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
646  MFEM_FOREACH_THREAD(dz,z,D1DR)
647  {
648  MFEM_FOREACH_THREAD(dy,y,D1DR)
649  {
650  MFEM_FOREACH_THREAD(dx,x,D1DR)
651  {
652  X[dz][dy][dx] = x(dx,dy,dz,e);
653  }
654  }
655  }
656  if (tidz == 0)
657  {
658  MFEM_FOREACH_THREAD(d,y,D1DR)
659  {
660  MFEM_FOREACH_THREAD(q,x,Q1D)
661  {
662  B[q][d] = b(q,d);
663  G[q][d] = g(q,d);
664  }
665  }
666  }
667  MFEM_SYNC_THREAD;
668  MFEM_FOREACH_THREAD(dz,z,D1DR)
669  {
670  MFEM_FOREACH_THREAD(dy,y,D1DR)
671  {
672  MFEM_FOREACH_THREAD(qx,x,Q1D)
673  {
674  double u = 0.0;
675  double v = 0.0;
676  for (int dx = 0; dx < D1DR; ++dx)
677  {
678  const double coord = X[dz][dy][dx];
679  u += coord * B[qx][dx];
680  v += coord * G[qx][dx];
681  }
682  DDQ0[dz][dy][qx] = u;
683  DDQ1[dz][dy][qx] = v;
684  }
685  }
686  }
687  MFEM_SYNC_THREAD;
688  MFEM_FOREACH_THREAD(dz,z,D1DR)
689  {
690  MFEM_FOREACH_THREAD(qy,y,Q1D)
691  {
692  MFEM_FOREACH_THREAD(qx,x,Q1D)
693  {
694  double u = 0.0;
695  double v = 0.0;
696  double w = 0.0;
697  for (int dy = 0; dy < D1DR; ++dy)
698  {
699  u += DDQ1[dz][dy][qx] * B[qy][dy];
700  v += DDQ0[dz][dy][qx] * G[qy][dy];
701  w += DDQ0[dz][dy][qx] * B[qy][dy];
702  }
703  DQQ0[dz][qy][qx] = u;
704  DQQ1[dz][qy][qx] = v;
705  DQQ2[dz][qy][qx] = w;
706  }
707  }
708  }
709  MFEM_SYNC_THREAD;
710  MFEM_FOREACH_THREAD(qz,z,Q1D)
711  {
712  MFEM_FOREACH_THREAD(qy,y,Q1D)
713  {
714  MFEM_FOREACH_THREAD(qx,x,Q1D)
715  {
716  double u = 0.0;
717  double v = 0.0;
718  double w = 0.0;
719  for (int dz = 0; dz < D1DR; ++dz)
720  {
721  u += DQQ0[dz][qy][qx] * B[qz][dz];
722  v += DQQ1[dz][qy][qx] * B[qz][dz];
723  w += DQQ2[dz][qy][qx] * G[qz][dz];
724  }
725  QQQ0[qz][qy][qx] = u;
726  QQQ1[qz][qy][qx] = v;
727  QQQ2[qz][qy][qx] = w;
728  }
729  }
730  }
731  MFEM_SYNC_THREAD;
732  MFEM_FOREACH_THREAD(qz,z,Q1D)
733  {
734  MFEM_FOREACH_THREAD(qy,y,Q1D)
735  {
736  MFEM_FOREACH_THREAD(qx,x,Q1D)
737  {
738  const int q = qx + (qy + qz * Q1D) * Q1D;
739  const double gX = QQQ0[qz][qy][qx];
740  const double gY = QQQ1[qz][qy][qx];
741  const double gZ = QQQ2[qz][qy][qx];
742  QQQ0[qz][qy][qx] = (D(q,0,0,e)*gX) + (D(q,1,0,e)*gY) + (D(q,2,0,e)*gZ);
743  QQQ1[qz][qy][qx] = (D(q,0,1,e)*gX) + (D(q,1,1,e)*gY) + (D(q,2,1,e)*gZ);
744  QQQ2[qz][qy][qx] = (D(q,0,2,e)*gX) + (D(q,1,2,e)*gY) + (D(q,2,2,e)*gZ);
745  }
746  }
747  }
748  MFEM_SYNC_THREAD;
749  if (tidz == 0)
750  {
751  MFEM_FOREACH_THREAD(d,y,D1DE)
752  {
753  MFEM_FOREACH_THREAD(q,x,Q1D)
754  {
755  Bt[d][q] = bt(d,q);
756  }
757  }
758  }
759  MFEM_SYNC_THREAD;
760  MFEM_FOREACH_THREAD(qz,z,Q1D)
761  {
762  MFEM_FOREACH_THREAD(qy,y,Q1D)
763  {
764  MFEM_FOREACH_THREAD(dx,x,D1DE)
765  {
766  double u = 0.0;
767  double v = 0.0;
768  double w = 0.0;
769  for (int qx = 0; qx < Q1D; ++qx)
770  {
771  u += QQQ0[qz][qy][qx] * Bt[dx][qx];
772  v += QQQ1[qz][qy][qx] * Bt[dx][qx];
773  w += QQQ2[qz][qy][qx] * Bt[dx][qx];
774  }
775  QQD0[qz][qy][dx] = u;
776  QQD1[qz][qy][dx] = v;
777  QQD2[qz][qy][dx] = w;
778  }
779  }
780  }
781  MFEM_SYNC_THREAD;
782  MFEM_FOREACH_THREAD(qz,z,Q1D)
783  {
784  MFEM_FOREACH_THREAD(dy,y,D1DE)
785  {
786  MFEM_FOREACH_THREAD(dx,x,D1DE)
787  {
788  double u = 0.0;
789  double v = 0.0;
790  double w = 0.0;
791  for (int qy = 0; qy < Q1D; ++qy)
792  {
793  u += QQD0[qz][qy][dx] * Bt[dy][qy];
794  v += QQD1[qz][qy][dx] * Bt[dy][qy];
795  w += QQD2[qz][qy][dx] * Bt[dy][qy];
796  }
797  QDD0[qz][dy][dx] = u;
798  QDD1[qz][dy][dx] = v;
799  QDD2[qz][dy][dx] = w;
800  }
801  }
802  }
803  MFEM_SYNC_THREAD;
804  MFEM_FOREACH_THREAD(dz,z,D1DE)
805  {
806  MFEM_FOREACH_THREAD(dy,y,D1DE)
807  {
808  MFEM_FOREACH_THREAD(dx,x,D1DE)
809  {
810  double u = 0.0;
811  double v = 0.0;
812  double w = 0.0;
813  for (int qz = 0; qz < Q1D; ++qz)
814  {
815  u += QDD0[qz][dy][dx] * Bt[dz][qz];
816  v += QDD1[qz][dy][dx] * Bt[dz][qz];
817  w += QDD2[qz][dy][dx] * Bt[dz][qz];
818  }
819  y(dx,dy,dz,0,e) += u;
820  y(dx,dy,dz,1,e) += v;
821  y(dx,dy,dz,2,e) += w;
822  }
823  }
824  }
825  });
826 }
827 
828 static void PAGradientApply(const int dim,
829  const int TR_D1D,
830  const int TE_D1D,
831  const int Q1D,
832  const int NE,
833  const Array<double> &B,
834  const Array<double> &G,
835  const Array<double> &Bt,
836  const Vector &op,
837  const Vector &x,
838  Vector &y,
839  bool transpose=false)
840 {
841 
842  if (dim == 2)
843  {
844  return PAGradientApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
845  }
846  if (dim == 3)
847  {
848  return PAGradientApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
849  }
850  MFEM_ABORT("Unknown kernel.");
851 }
852 
853 // PA Gradient Apply kernel
854 void GradientIntegrator::AddMultPA(const Vector &x, Vector &y) const
855 {
856  PAGradientApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
857  trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y,
858  false);
859 }
860 
861 // PA Gradient Apply kernel
862 void GradientIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
863 {
864  MFEM_ABORT("PA Gradient AddMultTransposePA not implemented.");
865 }
866 
867 } // namespace mfem
868 
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe.hpp:243
Ordering::Type GetOrdering() const
Return the ordering method.
Definition: fespace.hpp:552
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe.hpp:317
void trans(const Vector &u, Vector &x)
Definition: ex27.cpp:421
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
A coefficient that is constant across space and time.
Definition: coefficient.hpp:78
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
Definition: fespace.hpp:950
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:513
const Geometry::Type geom
Definition: ex1.cpp:40
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:438
int Size() const
Returns the size of the vector.
Definition: vector.hpp:190
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:173
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:136
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:567
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Definition: intrules.hpp:250
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
const int MAX_Q1D
Definition: forall.hpp:28
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:410
double b
Definition: lissajous.cpp:42
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
Definition: gridfunc.hpp:775
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:300
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:600
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe.cpp:367
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:442
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:54
const int MAX_D1D
Definition: forall.hpp:27
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2388
Vector data type.
Definition: vector.hpp:60
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
Definition: vector.hpp:577
RefCoord s[3]
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
Class representing a function through its values (scalar or vector) at quadrature points...
Definition: gridfunc.hpp:734
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:426