MFEM  v4.5.1
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_gradient.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "qfunction.hpp"
16 
17 using namespace std;
18 
19 namespace mfem
20 {
21 
22 // PA Gradient Integrator
23 
24 /* Description of the *SetupND functions
25  Inputs are as follows
26  \b Q1D number of quadrature points in one dimension.
27  \b w quadrature weights.
28  \b j element Jacobians.
29  \b c coefficient at quadrature points.
30 
31  The function is used precompute data needed at quadrature points during
32  the action. */
33 
34 /* Description of the *ApplyND functions
35  The template parameters are
36  \b T_D1D number of degrees of freedom in one dimension,
37  \b T_Q1D number of quadrature points in one dimension,
38  and are necessary to allow for compiler optimizations inside the kernel.
39 
40  Inputs are as follows
41  \b NE number of elements.
42  \b B matrix of basis functions.
43  \b G matrix of derivatives of the basis functions.
44  \b Bt transpose of matrix of basis functions.
45  \b Gt transpose matrix of derivatives of the basis functions.
46  \b op data used during action of the element matrix in the tensor
47  product application.
48 
49  \b x input vector of degrees of freedom on the element.
50  \b y output vector of degrees of freedom on the element.
51 
52  The function computes the kernel for one dimension that is suitable for
53  tensor product action to form ND operators.
54  Most of the ND inputs are reshaped as NQ*(ND*ND)*NE data structure, i.e
55  to allow indexing such as op(qpt,i,j,el).
56 
57  The output data structure is dependent on the kernel and layout of the
58  dimension ND and element number, but in general resembles the action of the
59  element matrix in the tensor product application. */
60 
61 /* Description of the Smem*ApplyND functions
62  The shared memory (Smem) versions of the kernels differ from the regular
63  versions in the following properties.
64 
65  \b MFEM_FORALL is using only one level of parallelism.
66  \b MFEM_FORALL_ND uses an additional level of parallelism
67  \b MFEM_FOREACH_THREAD
68 
69  These macros allow automatic mapping of manually defined blocks to
70  underlying hardware threads. These threads can share memory by using
71  the \b MFEM_SHARED keyword for local arrays. */
72 
73 // PA Gradient Assemble 2D kernel
74 static void PAGradientSetup2D(const int Q1D,
75  const int NE,
76  const Array<double> &w,
77  const Vector &j,
78  const Vector &c,
79  Vector &op)
80 {
81  const int NQ = Q1D*Q1D;
82  auto W = w.Read();
83  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
84  auto y = Reshape(op.Write(), NQ, 2, 2, NE);
85 
86  const bool const_c = c.Size() == 1;
87  const auto C = const_c ? Reshape(c.Read(), 1,1) :
88  Reshape(c.Read(), NQ, NE);
89 
90  MFEM_FORALL(e, NE,
91  {
92  for (int q = 0; q < NQ; ++q)
93  {
94  const double J11 = J(q,0,0,e);
95  const double J12 = J(q,0,1,e);
96  const double J21 = J(q,1,0,e);
97  const double J22 = J(q,1,1,e);
98  // Store wq * Q * adj(J)
99  const double Co = const_c ? C(0,0) : C(q,e);
100  y(q,0,0,e) = W[q] * Co * J22; // 1,1
101  y(q,0,1,e) = W[q] * Co * -J12; // 1,2
102  y(q,1,0,e) = W[q] * Co * -J21; // 2,1
103  y(q,1,1,e) = W[q] * Co * J11; // 2,2
104  }
105  });
106 }
107 
108 // PA Gradient Assemble 3D kernel
109 static void PAGradientSetup3D(const int Q1D,
110  const int NE,
111  const Array<double> &w,
112  const Vector &j,
113  const Vector &c,
114  Vector &op)
115 {
116  const int NQ = Q1D*Q1D*Q1D;
117  auto W = w.Read();
118  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
119  auto y = Reshape(op.Write(), NQ, 3, 3, NE);
120 
121  const bool const_c = c.Size() == 1;
122  const auto C = const_c ? Reshape(c.Read(), 1,1) :
123  Reshape(c.Read(), NQ,NE);
124 
125  MFEM_FORALL(e, NE,
126  {
127  for (int q = 0; q < NQ; ++q)
128  {
129  const double Co = const_c ? C(0,0) : C(q,e);
130 
131  const double J11 = J(q,0,0,e);
132  const double J21 = J(q,1,0,e);
133  const double J31 = J(q,2,0,e);
134  const double J12 = J(q,0,1,e);
135  const double J22 = J(q,1,1,e);
136  const double J32 = J(q,2,1,e);
137  const double J13 = J(q,0,2,e);
138  const double J23 = J(q,1,2,e);
139  const double J33 = J(q,2,2,e);
140  const double cw = W[q] * Co;
141  // adj(J)
142  const double A11 = (J22 * J33) - (J23 * J32);
143  const double A12 = (J32 * J13) - (J12 * J33);
144  const double A13 = (J12 * J23) - (J22 * J13);
145  const double A21 = (J31 * J23) - (J21 * J33);
146  const double A22 = (J11 * J33) - (J13 * J31);
147  const double A23 = (J21 * J13) - (J11 * J23);
148  const double A31 = (J21 * J32) - (J31 * J22);
149  const double A32 = (J31 * J12) - (J11 * J32);
150  const double A33 = (J11 * J22) - (J12 * J21);
151  // Store wq * Q * adj(J)
152  y(q,0,0,e) = cw * A11; // 1,1
153  y(q,0,1,e) = cw * A12; // 1,2
154  y(q,0,2,e) = cw * A13; // 1,3
155  y(q,1,0,e) = cw * A21; // 2,1
156  y(q,1,1,e) = cw * A22; // 2,2
157  y(q,1,2,e) = cw * A23; // 2,3
158  y(q,2,0,e) = cw * A31; // 3,1
159  y(q,2,1,e) = cw * A32; // 3,2
160  y(q,2,2,e) = cw * A33; // 3,3
161  }
162  });
163 }
164 
165 static void PAGradientSetup(const int dim,
166  const int TR_D1D,
167  const int TE_D1D,
168  const int Q1D,
169  const int NE,
170  const Array<double> &W,
171  const Vector &J,
172  const Vector &COEFF,
173  Vector &op)
174 {
175  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAGradientSetup"); }
176  if (dim == 2)
177  {
178  PAGradientSetup2D(Q1D, NE, W, J, COEFF, op);
179  }
180  if (dim == 3)
181  {
182  PAGradientSetup3D(Q1D, NE, W, J, COEFF, op);
183  }
184 }
185 
186 void GradientIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
187  const FiniteElementSpace &test_fes)
188 {
189  // Assumes tensor-product elements ordered by nodes
190  MFEM_ASSERT(trial_fes.GetOrdering() == Ordering::byNODES,
191  "PA Only supports Ordering::byNODES!");
192  Mesh *mesh = trial_fes.GetMesh();
193  const FiniteElement &trial_fe = *trial_fes.GetFE(0);
194  const FiniteElement &test_fe = *test_fes.GetFE(0);
195  ElementTransformation *trans = mesh->GetElementTransformation(0);
196  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe,
197  *trans);
198  const int dims = trial_fe.GetDim();
199  const int dimsToStore = dims * dims;
200  nq = ir->GetNPoints();
201  dim = mesh->Dimension();
202  ne = trial_fes.GetNE();
203  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
204  trial_maps = &trial_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
205  trial_dofs1D = trial_maps->ndof;
206  quad1D = trial_maps->nqpt;
207  test_maps = &test_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
208  test_dofs1D = test_maps->ndof;
209  MFEM_ASSERT(quad1D == test_maps->nqpt,
210  "PA requires test and trial space to have same number of quadrature points!");
211  pa_data.SetSize(nq * dimsToStore * ne, Device::GetMemoryType());
212 
213  QuadratureSpace qs(*mesh, *ir);
214  CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED);
215 
216  PAGradientSetup(dim, trial_dofs1D, test_dofs1D, quad1D,
217  ne, ir->GetWeights(), geom->J, coeff, pa_data);
218 }
219 
220 // PA Gradient Apply 2D kernel
221 template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
222 static void PAGradientApply2D(const int NE,
223  const Array<double> &b,
224  const Array<double> &g,
225  const Array<double> &bt,
226  const Vector &op_,
227  const Vector &x_,
228  Vector &y_,
229  const int tr_d1d = 0,
230  const int te_d1d = 0,
231  const int q1d = 0)
232 {
233  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
234  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
235  const int Q1D = T_Q1D ? T_Q1D : q1d;
236  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
237  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
238  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
239  auto B = Reshape(b.Read(), Q1D, TR_D1D);
240  auto G = Reshape(g.Read(), Q1D, TR_D1D);
241  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
242  auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
243  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, NE);
244  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, 2, NE);
245  MFEM_FORALL(e, NE,
246  {
247  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
248  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
249  const int Q1D = T_Q1D ? T_Q1D : q1d;
250  const int VDIM = 2;
251  // the following variables are evaluated at compile time
252  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
253  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
254 
255  double grad[max_Q1D][max_Q1D][VDIM];
256  for (int qy = 0; qy < Q1D; ++qy)
257  {
258  for (int qx = 0; qx < Q1D; ++qx)
259  {
260  grad[qy][qx][0] = 0.0;
261  grad[qy][qx][1] = 0.0;
262  }
263  }
264  for (int dy = 0; dy < TR_D1D; ++dy)
265  {
266  double gradX[max_Q1D][VDIM];
267  for (int qx = 0; qx < Q1D; ++qx)
268  {
269  gradX[qx][0] = 0.0;
270  gradX[qx][1] = 0.0;
271  }
272  for (int dx = 0; dx < TR_D1D; ++dx)
273  {
274  const double s = x(dx,dy,e);
275  for (int qx = 0; qx < Q1D; ++qx)
276  {
277  gradX[qx][0] += s * G(qx,dx);
278  gradX[qx][1] += s * B(qx,dx);
279  }
280  }
281  for (int qy = 0; qy < Q1D; ++qy)
282  {
283  const double wy = B(qy,dy);
284  const double wDy = G(qy,dy);
285  for (int qx = 0; qx < Q1D; ++qx)
286  {
287  grad[qy][qx][0] += gradX[qx][0] * wy;
288  grad[qy][qx][1] += gradX[qx][1] * wDy;
289  }
290  }
291  }
292  // We've now calculated grad(p) = [Dxy, xDy] in plane
293  for (int qy = 0; qy < Q1D; ++qy)
294  {
295  for (int qx = 0; qx < Q1D; ++qx)
296  {
297  const int q = qx + qy * Q1D;
298  const double gradX = grad[qy][qx][0];
299  const double gradY = grad[qy][qx][1];
300 
301  grad[qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e);
302  grad[qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e);
303  }
304  }
305  // We've now calculated grad = grad p * op
306  for (int qy = 0; qy < Q1D; ++qy)
307  {
308  double opX[max_TE_D1D][VDIM];
309  for (int dx = 0; dx < TE_D1D; ++dx)
310  {
311  opX[dx][0] = 0.0;
312  opX[dx][1] = 0.0;
313  for (int qx = 0; qx < Q1D; ++qx)
314  {
315  opX[dx][0] += Bt(dx,qx)*grad[qy][qx][0];
316  opX[dx][1] += Bt(dx,qx)*grad[qy][qx][1];
317  }
318  }
319  for (int dy = 0; dy < TE_D1D; ++dy)
320  {
321  for (int dx = 0; dx < TE_D1D; ++dx)
322  {
323  y(dx,dy,0,e) += Bt(dy,qy)*opX[dx][0];
324  y(dx,dy,1,e) += Bt(dy,qy)*opX[dx][1];
325  }
326  }
327  }
328  // We've now calculated y = u * grad
329  });
330 
331 }
332 
333 // PA Gradient Apply 2D kernel transpose
334 template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
335 static void PAGradientApplyTranspose2D(const int NE,
336  const Array<double> &bt,
337  const Array<double> &gt,
338  const Array<double> &b,
339  const Vector &op_,
340  const Vector &x_,
341  Vector &y_,
342  const int tr_d1d = 0,
343  const int te_d1d = 0,
344  const int q1d = 0)
345 {
346  // TODO
347  MFEM_ASSERT(false, "PAGradientApplyTranspose2D not implemented.");
348 }
349 
350 // PA Gradient Apply 3D kernel
351 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
352 static void PAGradientApply3D(const int NE,
353  const Array<double> &b,
354  const Array<double> &g,
355  const Array<double> &bt,
356  const Vector &op_,
357  const Vector &x_,
358  Vector &y_,
359  int tr_d1d = 0,
360  int te_d1d = 0,
361  int q1d = 0)
362 {
363  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
364  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
365  const int Q1D = T_Q1D ? T_Q1D : q1d;
366  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
367  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
368  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
369  auto B = Reshape(b.Read(), Q1D, TR_D1D);
370  auto G = Reshape(g.Read(), Q1D, TR_D1D);
371  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
372  auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
373  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
374  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
375  MFEM_FORALL(e, NE,
376  {
377  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
378  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
379  const int Q1D = T_Q1D ? T_Q1D : q1d;
380  const int VDIM = 3;
381  // the following variables are evaluated at compile time
382  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
383  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
384 
385  double grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
386  for (int qz = 0; qz < Q1D; ++qz)
387  {
388  for (int qy = 0; qy < Q1D; ++qy)
389  {
390  for (int qx = 0; qx < Q1D; ++qx)
391  {
392  grad[qz][qy][qx][0] = 0.0;
393  grad[qz][qy][qx][1] = 0.0;
394  grad[qz][qy][qx][2] = 0.0;
395  }
396  }
397  }
398  for (int dz = 0; dz < TR_D1D; ++dz)
399  {
400  double gradXY[max_Q1D][max_Q1D][3];
401  for (int qy = 0; qy < Q1D; ++qy)
402  {
403  for (int qx = 0; qx < Q1D; ++qx)
404  {
405  gradXY[qy][qx][0] = 0.0;
406  gradXY[qy][qx][1] = 0.0;
407  gradXY[qy][qx][2] = 0.0;
408  }
409  }
410  for (int dy = 0; dy < TR_D1D; ++dy)
411  {
412  double gradX[max_Q1D][2];
413  for (int qx = 0; qx < Q1D; ++qx)
414  {
415  gradX[qx][0] = 0.0;
416  gradX[qx][1] = 0.0;
417  }
418  for (int dx = 0; dx < TR_D1D; ++dx)
419  {
420  const double s = x(dx,dy,dz,e);
421  for (int qx = 0; qx < Q1D; ++qx)
422  {
423  gradX[qx][0] += s * B(qx,dx);
424  gradX[qx][1] += s * G(qx,dx);
425  }
426  }
427  for (int qy = 0; qy < Q1D; ++qy)
428  {
429  const double wy = B(qy,dy);
430  const double wDy = G(qy,dy);
431  for (int qx = 0; qx < Q1D; ++qx)
432  {
433  const double wx = gradX[qx][0];
434  const double wDx = gradX[qx][1];
435  gradXY[qy][qx][0] += wDx * wy;
436  gradXY[qy][qx][1] += wx * wDy;
437  gradXY[qy][qx][2] += wx * wy;
438  }
439  }
440  }
441  for (int qz = 0; qz < Q1D; ++qz)
442  {
443  const double wz = B(qz,dz);
444  const double wDz = G(qz,dz);
445  for (int qy = 0; qy < Q1D; ++qy)
446  {
447  for (int qx = 0; qx < Q1D; ++qx)
448  {
449  grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
450  grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
451  grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
452  }
453  }
454  }
455  }
456  // We've now calculated grad(p) = [Dxyz, xDyz, xyDz] in plane
457  for (int qz = 0; qz < Q1D; ++qz)
458  {
459  for (int qy = 0; qy < Q1D; ++qy)
460  {
461  for (int qx = 0; qx < Q1D; ++qx)
462  {
463  const int q = qx + (qy + qz * Q1D) * Q1D;
464  const double gradX = grad[qz][qy][qx][0];
465  const double gradY = grad[qz][qy][qx][1];
466  const double gradZ = grad[qz][qy][qx][2];
467 
468  grad[qz][qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e) + gradZ*op(q,2,0,e);
469  grad[qz][qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e) + gradZ*op(q,2,1,e);
470  grad[qz][qy][qx][2] = gradX*op(q,0,2,e) + gradY*op(q,1,2,e) + gradZ*op(q,2,2,e);
471  }
472  }
473  }
474  // We've now calculated grad = grad p * op
475  for (int qz = 0; qz < Q1D; ++qz)
476  {
477  double opXY[max_TE_D1D][max_TE_D1D][VDIM];
478  for (int dy = 0; dy < TE_D1D; ++dy)
479  {
480  for (int dx = 0; dx < TE_D1D; ++dx)
481  {
482  opXY[dy][dx][0] = 0.0;
483  opXY[dy][dx][1] = 0.0;
484  opXY[dy][dx][2] = 0.0;
485  }
486  }
487  for (int qy = 0; qy < Q1D; ++qy)
488  {
489  double opX[max_TE_D1D][VDIM];
490  for (int dx = 0; dx < TE_D1D; ++dx)
491  {
492  opX[dx][0] = 0.0;
493  opX[dx][1] = 0.0;
494  opX[dx][2] = 0.0;
495  for (int qx = 0; qx < Q1D; ++qx)
496  {
497  opX[dx][0] += Bt(dx,qx)*grad[qz][qy][qx][0];
498  opX[dx][1] += Bt(dx,qx)*grad[qz][qy][qx][1];
499  opX[dx][2] += Bt(dx,qx)*grad[qz][qy][qx][2];
500  }
501  }
502  for (int dy = 0; dy < TE_D1D; ++dy)
503  {
504  for (int dx = 0; dx < TE_D1D; ++dx)
505  {
506  opXY[dy][dx][0] += Bt(dy,qy)*opX[dx][0];
507  opXY[dy][dx][1] += Bt(dy,qy)*opX[dx][1];
508  opXY[dy][dx][2] += Bt(dy,qy)*opX[dx][2];
509  }
510  }
511  }
512  for (int dz = 0; dz < TE_D1D; ++dz)
513  {
514  for (int dy = 0; dy < TE_D1D; ++dy)
515  {
516  for (int dx = 0; dx < TE_D1D; ++dx)
517  {
518  y(dx,dy,dz,0,e) += Bt(dz,qz)*opXY[dy][dx][0];
519  y(dx,dy,dz,1,e) += Bt(dz,qz)*opXY[dy][dx][1];
520  y(dx,dy,dz,2,e) += Bt(dz,qz)*opXY[dy][dx][2];
521  }
522  }
523  }
524  }
525  // We've now calculated y = u * grad
526  });
527 }
528 
529 // PA Gradient Apply 3D kernel
530 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
531 static void PAGradientApplyTranspose3D(const int NE,
532  const Array<double> &bt,
533  const Array<double> &gt,
534  const Array<double> &b,
535  const Vector &op_,
536  const Vector &x_,
537  Vector &y_,
538  int tr_d1d = 0,
539  int te_d1d = 0,
540  int q1d = 0)
541 {
542  MFEM_ASSERT(false, "Gradient PA Apply Transpose 3D not implemented.");
543 }
544 
545 // Shared memory PA Gradient Apply 3D kernel
546 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
547 static void SmemPAGradientApply3D(const int NE,
548  const Array<double> &b_,
549  const Array<double> &g_,
550  const Array<double> &bt_,
551  const Vector &d_,
552  const Vector &x_,
553  Vector &y_,
554  const int tr_d1d = 0,
555  const int te_d1d = 0,
556  const int q1d = 0)
557 {
558  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
559  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
560  const int Q1D = T_Q1D ? T_Q1D : q1d;
561 
562  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
563  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
564  MFEM_VERIFY(TR_D1D <= Q1D, "");
565  MFEM_VERIFY(TE_D1D <= Q1D, "");
566  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
567 
568  auto b = Reshape(b_.Read(), Q1D, TR_D1D);
569  auto g = Reshape(g_.Read(), Q1D, TR_D1D);
570  auto bt = Reshape(bt_.Read(), TE_D1D, Q1D);
571  auto D = Reshape(d_.Read(), Q1D*Q1D*Q1D, 3, 3, NE);
572  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
573  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
574 
575  MFEM_FORALL_3D(e, NE, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D,
576  {
577  const int tidz = MFEM_THREAD_ID(z);
578  const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
579  const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
580  const int Q1D = T_Q1D ? T_Q1D : q1d;
581  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
582  constexpr int MD1R = T_TR_D1D ? T_TR_D1D : MAX_D1D;
583  constexpr int MD1E = T_TE_D1D ? T_TE_D1D : MAX_D1D;
584  constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
585  constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
586  MFEM_SHARED double sBG[2][MQ1*MD1];
587  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
588  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
589  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
590  MFEM_SHARED double sm0[3][MDQ*MDQ*MDQ];
591  MFEM_SHARED double sm1[3][MDQ*MDQ*MDQ];
592  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
593  double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
594  double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
595 
596  double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
597  double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
598  double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
599 
600  double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
601  double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
602  double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
603 
604  double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
605  double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
606  double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
607 
608  double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
609  double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
610  double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
611  MFEM_FOREACH_THREAD(dz,z,D1DR)
612  {
613  MFEM_FOREACH_THREAD(dy,y,D1DR)
614  {
615  MFEM_FOREACH_THREAD(dx,x,D1DR)
616  {
617  X[dz][dy][dx] = x(dx,dy,dz,e);
618  }
619  }
620  }
621  if (tidz == 0)
622  {
623  MFEM_FOREACH_THREAD(d,y,D1DR)
624  {
625  MFEM_FOREACH_THREAD(q,x,Q1D)
626  {
627  B[q][d] = b(q,d);
628  G[q][d] = g(q,d);
629  }
630  }
631  }
632  MFEM_SYNC_THREAD;
633  MFEM_FOREACH_THREAD(dz,z,D1DR)
634  {
635  MFEM_FOREACH_THREAD(dy,y,D1DR)
636  {
637  MFEM_FOREACH_THREAD(qx,x,Q1D)
638  {
639  double u = 0.0;
640  double v = 0.0;
641  for (int dx = 0; dx < D1DR; ++dx)
642  {
643  const double coord = X[dz][dy][dx];
644  u += coord * B[qx][dx];
645  v += coord * G[qx][dx];
646  }
647  DDQ0[dz][dy][qx] = u;
648  DDQ1[dz][dy][qx] = v;
649  }
650  }
651  }
652  MFEM_SYNC_THREAD;
653  MFEM_FOREACH_THREAD(dz,z,D1DR)
654  {
655  MFEM_FOREACH_THREAD(qy,y,Q1D)
656  {
657  MFEM_FOREACH_THREAD(qx,x,Q1D)
658  {
659  double u = 0.0;
660  double v = 0.0;
661  double w = 0.0;
662  for (int dy = 0; dy < D1DR; ++dy)
663  {
664  u += DDQ1[dz][dy][qx] * B[qy][dy];
665  v += DDQ0[dz][dy][qx] * G[qy][dy];
666  w += DDQ0[dz][dy][qx] * B[qy][dy];
667  }
668  DQQ0[dz][qy][qx] = u;
669  DQQ1[dz][qy][qx] = v;
670  DQQ2[dz][qy][qx] = w;
671  }
672  }
673  }
674  MFEM_SYNC_THREAD;
675  MFEM_FOREACH_THREAD(qz,z,Q1D)
676  {
677  MFEM_FOREACH_THREAD(qy,y,Q1D)
678  {
679  MFEM_FOREACH_THREAD(qx,x,Q1D)
680  {
681  double u = 0.0;
682  double v = 0.0;
683  double w = 0.0;
684  for (int dz = 0; dz < D1DR; ++dz)
685  {
686  u += DQQ0[dz][qy][qx] * B[qz][dz];
687  v += DQQ1[dz][qy][qx] * B[qz][dz];
688  w += DQQ2[dz][qy][qx] * G[qz][dz];
689  }
690  QQQ0[qz][qy][qx] = u;
691  QQQ1[qz][qy][qx] = v;
692  QQQ2[qz][qy][qx] = w;
693  }
694  }
695  }
696  MFEM_SYNC_THREAD;
697  MFEM_FOREACH_THREAD(qz,z,Q1D)
698  {
699  MFEM_FOREACH_THREAD(qy,y,Q1D)
700  {
701  MFEM_FOREACH_THREAD(qx,x,Q1D)
702  {
703  const int q = qx + (qy + qz * Q1D) * Q1D;
704  const double gX = QQQ0[qz][qy][qx];
705  const double gY = QQQ1[qz][qy][qx];
706  const double gZ = QQQ2[qz][qy][qx];
707  QQQ0[qz][qy][qx] = (D(q,0,0,e)*gX) + (D(q,1,0,e)*gY) + (D(q,2,0,e)*gZ);
708  QQQ1[qz][qy][qx] = (D(q,0,1,e)*gX) + (D(q,1,1,e)*gY) + (D(q,2,1,e)*gZ);
709  QQQ2[qz][qy][qx] = (D(q,0,2,e)*gX) + (D(q,1,2,e)*gY) + (D(q,2,2,e)*gZ);
710  }
711  }
712  }
713  MFEM_SYNC_THREAD;
714  if (tidz == 0)
715  {
716  MFEM_FOREACH_THREAD(d,y,D1DE)
717  {
718  MFEM_FOREACH_THREAD(q,x,Q1D)
719  {
720  Bt[d][q] = bt(d,q);
721  }
722  }
723  }
724  MFEM_SYNC_THREAD;
725  MFEM_FOREACH_THREAD(qz,z,Q1D)
726  {
727  MFEM_FOREACH_THREAD(qy,y,Q1D)
728  {
729  MFEM_FOREACH_THREAD(dx,x,D1DE)
730  {
731  double u = 0.0;
732  double v = 0.0;
733  double w = 0.0;
734  for (int qx = 0; qx < Q1D; ++qx)
735  {
736  u += QQQ0[qz][qy][qx] * Bt[dx][qx];
737  v += QQQ1[qz][qy][qx] * Bt[dx][qx];
738  w += QQQ2[qz][qy][qx] * Bt[dx][qx];
739  }
740  QQD0[qz][qy][dx] = u;
741  QQD1[qz][qy][dx] = v;
742  QQD2[qz][qy][dx] = w;
743  }
744  }
745  }
746  MFEM_SYNC_THREAD;
747  MFEM_FOREACH_THREAD(qz,z,Q1D)
748  {
749  MFEM_FOREACH_THREAD(dy,y,D1DE)
750  {
751  MFEM_FOREACH_THREAD(dx,x,D1DE)
752  {
753  double u = 0.0;
754  double v = 0.0;
755  double w = 0.0;
756  for (int qy = 0; qy < Q1D; ++qy)
757  {
758  u += QQD0[qz][qy][dx] * Bt[dy][qy];
759  v += QQD1[qz][qy][dx] * Bt[dy][qy];
760  w += QQD2[qz][qy][dx] * Bt[dy][qy];
761  }
762  QDD0[qz][dy][dx] = u;
763  QDD1[qz][dy][dx] = v;
764  QDD2[qz][dy][dx] = w;
765  }
766  }
767  }
768  MFEM_SYNC_THREAD;
769  MFEM_FOREACH_THREAD(dz,z,D1DE)
770  {
771  MFEM_FOREACH_THREAD(dy,y,D1DE)
772  {
773  MFEM_FOREACH_THREAD(dx,x,D1DE)
774  {
775  double u = 0.0;
776  double v = 0.0;
777  double w = 0.0;
778  for (int qz = 0; qz < Q1D; ++qz)
779  {
780  u += QDD0[qz][dy][dx] * Bt[dz][qz];
781  v += QDD1[qz][dy][dx] * Bt[dz][qz];
782  w += QDD2[qz][dy][dx] * Bt[dz][qz];
783  }
784  y(dx,dy,dz,0,e) += u;
785  y(dx,dy,dz,1,e) += v;
786  y(dx,dy,dz,2,e) += w;
787  }
788  }
789  }
790  });
791 }
792 
793 static void PAGradientApply(const int dim,
794  const int TR_D1D,
795  const int TE_D1D,
796  const int Q1D,
797  const int NE,
798  const Array<double> &B,
799  const Array<double> &G,
800  const Array<double> &Bt,
801  const Vector &op,
802  const Vector &x,
803  Vector &y,
804  bool transpose=false)
805 {
806 
807  if (dim == 2)
808  {
809  return PAGradientApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
810  }
811  if (dim == 3)
812  {
813  return PAGradientApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
814  }
815  MFEM_ABORT("Unknown kernel.");
816 }
817 
818 // PA Gradient Apply kernel
819 void GradientIntegrator::AddMultPA(const Vector &x, Vector &y) const
820 {
821  PAGradientApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
822  trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y,
823  false);
824 }
825 
826 // PA Gradient Apply kernel
827 void GradientIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
828 {
829  MFEM_ABORT("PA Gradient AddMultTransposePA not implemented.");
830 }
831 
832 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe_base.hpp:235
Ordering::Type GetOrdering() const
Return the ordering method.
Definition: fespace.hpp:599
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe_base.hpp:311
void trans(const Vector &u, Vector &x)
Definition: ex27.cpp:412
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe_base.hpp:170
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
Class to represent a coefficient evaluated at quadrature points.
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:614
const int MAX_Q1D
Definition: forall.hpp:29
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:441
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:96
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:366
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:465
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
const int MAX_D1D
Definition: forall.hpp:28
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2781
Vector data type.
Definition: vector.hpp:60
RefCoord s[3]
Class representing the storage layout of a QuadratureFunction.
Definition: qspace.hpp:92
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:449
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131