MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_gradient.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
21 // PA Gradient Integrator
22 
23 /* Description of the *SetupND functions
24  Inputs are as follows
25  \b Q1D number of quadrature points in one dimension.
26  \b w quadrature weights.
27  \b j element Jacobians.
28  \b COEFF coefficient at quadrature points.
29 
30  The function is used precompute data needed at quadrature points during
31  the action. */
32 
33 /* Description of the *ApplyND functions
34  The template parameters are
35  \b T_D1D number of degrees of freedom in one dimension,
36  \b T_Q1D number of quadrature points in one dimension,
37  and are necessary to allow for compiler optimizations inside the kernel.
38 
39  Inputs are as follows
40  \b NE number of elements.
41  \b B matrix of basis functions.
42  \b G matrix of derivatives of the basis functions.
43  \b Bt transpose of matrix of basis functions.
44  \b Gt transpose matrix of derivatives of the basis functions.
45  \b op data used during action of the element matrix in the tensor
46  product application.
47 
48  \b x input vector of degrees of freedom on the element.
49  \b y output vector of degrees of freedom on the element.
50 
51  The function computes the kernel for one dimension that is suitable for
52  tensor product action to form ND operators.
53  Most of the ND inputs are reshaped as NQ*(ND*ND)*NE data structure, i.e
54  to allow indexing such as op(qpt,i,j,el).
55 
56  The output data structure is dependent on the kernel and layout of the
57  dimension ND and element number, but in general resembles the action of the
58  element matrix in the tensor product application. */
59 
60 /* Description of the Smem*ApplyND functions
61  The shared memory (Smem) versions of the kernels differ from the regular
62  versions in the following properties.
63 
64  \b MFEM_FORALL is using only one level of parallelism.
65  \b MFEM_FORALL_ND uses an additional level of parallelism
66  \b MFEM_FOREACH_THREAD
67 
68  These macros allow automatic mapping of manually defined blocks to
69  underlying hardware threads. These threads can share memory by using
70  the \b MFEM_SHARED keyword for local arrays. */
71 
72 // PA Gradient Assemble 2D kernel
73 static void PAGradientSetup2D(const int Q1D,
74  const int NE,
75  const Array<double> &w,
76  const Vector &j,
77  const double COEFF,
78  Vector &op)
79 {
80  const int NQ = Q1D*Q1D;
81  auto W = w.Read();
82  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
83  auto y = Reshape(op.Write(), NQ, 2, 2, NE);
84 
85  MFEM_FORALL(e, NE,
86  {
87  for (int q = 0; q < NQ; ++q)
88  {
89  const double J11 = J(q,0,0,e);
90  const double J12 = J(q,0,1,e);
91  const double J21 = J(q,1,0,e);
92  const double J22 = J(q,1,1,e);
93  // Store wq * Q * adj(J)
94  y(q,0,0,e) = W[q] * COEFF * J22; // 1,1
95  y(q,0,1,e) = W[q] * COEFF * -J12; // 1,2
96  y(q,1,0,e) = W[q] * COEFF * -J21; // 2,1
97  y(q,1,1,e) = W[q] * COEFF * J11; // 2,2
98  }
99  });
100 }
101 
102 // PA Gradient Assemble 3D kernel
103 static void PAGradientSetup3D(const int Q1D,
104  const int NE,
105  const Array<double> &w,
106  const Vector &j,
107  const double COEFF,
108  Vector &op)
109 {
110  const int NQ = Q1D*Q1D*Q1D;
111  auto W = w.Read();
112  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
113  auto y = Reshape(op.Write(), NQ, 3, 3, NE);
114  MFEM_FORALL(e, NE,
115  {
116  for (int q = 0; q < NQ; ++q)
117  {
118  const double J11 = J(q,0,0,e);
119  const double J21 = J(q,1,0,e);
120  const double J31 = J(q,2,0,e);
121  const double J12 = J(q,0,1,e);
122  const double J22 = J(q,1,1,e);
123  const double J32 = J(q,2,1,e);
124  const double J13 = J(q,0,2,e);
125  const double J23 = J(q,1,2,e);
126  const double J33 = J(q,2,2,e);
127  const double cw = W[q] * COEFF;
128  // adj(J)
129  const double A11 = (J22 * J33) - (J23 * J32);
130  const double A12 = (J32 * J13) - (J12 * J33);
131  const double A13 = (J12 * J23) - (J22 * J13);
132  const double A21 = (J31 * J23) - (J21 * J33);
133  const double A22 = (J11 * J33) - (J13 * J31);
134  const double A23 = (J21 * J13) - (J11 * J23);
135  const double A31 = (J21 * J32) - (J31 * J22);
136  const double A32 = (J31 * J12) - (J11 * J32);
137  const double A33 = (J11 * J22) - (J12 * J21);
138  // Store wq * Q * adj(J)
139  y(q,0,0,e) = cw * A11; // 1,1
140  y(q,0,1,e) = cw * A12; // 1,2
141  y(q,0,2,e) = cw * A13; // 1,3
142  y(q,1,0,e) = cw * A21; // 2,1
143  y(q,1,1,e) = cw * A22; // 2,2
144  y(q,1,2,e) = cw * A23; // 2,3
145  y(q,2,0,e) = cw * A31; // 3,1
146  y(q,2,1,e) = cw * A32; // 3,2
147  y(q,2,2,e) = cw * A33; // 3,3
148  }
149  });
150 }
151 
152 static void PAGradientSetup(const int dim,
153  const int TR_D1D,
154  const int TE_D1D,
155  const int Q1D,
156  const int NE,
157  const Array<double> &W,
158  const Vector &J,
159  const double COEFF,
160  Vector &op)
161 {
162  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAGradientSetup"); }
163  if (dim == 2)
164  {
165  PAGradientSetup2D(Q1D, NE, W, J, COEFF, op);
166  }
167  if (dim == 3)
168  {
169  PAGradientSetup3D(Q1D, NE, W, J, COEFF, op);
170  }
171 }
172 
173 void GradientIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
174  const FiniteElementSpace &test_fes)
175 {
176  // Assumes tensor-product elements ordered by nodes
177  MFEM_ASSERT(trial_fes.GetOrdering() == Ordering::byNODES,
178  "PA Only supports Ordering::byNODES!");
179  Mesh *mesh = trial_fes.GetMesh();
180  const FiniteElement &trial_fe = *trial_fes.GetFE(0);
181  const FiniteElement &test_fe = *test_fes.GetFE(0);
182  ElementTransformation *trans = mesh->GetElementTransformation(0);
183  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe,
184  *trans);
185  const int dims = trial_fe.GetDim();
186  const int dimsToStore = dims * dims;
187  const int nq = ir->GetNPoints();
188  dim = mesh->Dimension();
189  ne = trial_fes.GetNE();
190  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
191  trial_maps = &trial_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
192  trial_dofs1D = trial_maps->ndof;
193  quad1D = trial_maps->nqpt;
194  test_maps = &test_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
195  test_dofs1D = test_maps->ndof;
196  MFEM_ASSERT(quad1D == test_maps->nqpt,
197  "PA requires test and trial space to have same number of quadrature points!");
198  pa_data.SetSize(nq * dimsToStore * ne, Device::GetMemoryType());
199  double coeff = 1.0;
200  if (Q)
201  {
202  ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient*>(Q);
203  MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
204  coeff = cQ->constant;
205  }
206  PAGradientSetup(dim, trial_dofs1D, test_dofs1D, quad1D,
207  ne, ir->GetWeights(), geom->J, coeff, pa_data);
208 }
209 
210 // PA Gradient Apply 2D kernel
211 template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
212 static void PAGradientApply2D(const int NE,
213  const Array<double> &b,
214  const Array<double> &g,
215  const Array<double> &bt,
216  const Vector &_op,
217  const Vector &_x,
218  Vector &_y,
219  const int tr_d1d = 0,
220  const int te_d1d = 0,
221  const int q1d = 0)
222 {
223  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
224  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
225  const int Q1D = T_Q1D ? T_Q1D : q1d;
226  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
227  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
228  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
229  auto B = Reshape(b.Read(), Q1D, TR_D1D);
230  auto G = Reshape(g.Read(), Q1D, TR_D1D);
231  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
232  auto op = Reshape(_op.Read(), Q1D*Q1D, 2,2, NE);
233  auto x = Reshape(_x.Read(), TR_D1D, TR_D1D, NE);
234  auto y = Reshape(_y.ReadWrite(), TE_D1D, TE_D1D, 2, NE);
235  MFEM_FORALL(e, NE,
236  {
237  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
238  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
239  const int Q1D = T_Q1D ? T_Q1D : q1d;
240  const int VDIM = 2;
241  // the following variables are evaluated at compile time
242  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
243  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
244 
245  double grad[max_Q1D][max_Q1D][VDIM];
246  for (int qy = 0; qy < Q1D; ++qy)
247  {
248  for (int qx = 0; qx < Q1D; ++qx)
249  {
250  grad[qy][qx][0] = 0.0;
251  grad[qy][qx][1] = 0.0;
252  }
253  }
254  for (int dy = 0; dy < TR_D1D; ++dy)
255  {
256  double gradX[max_Q1D][VDIM];
257  for (int qx = 0; qx < Q1D; ++qx)
258  {
259  gradX[qx][0] = 0.0;
260  gradX[qx][1] = 0.0;
261  }
262  for (int dx = 0; dx < TR_D1D; ++dx)
263  {
264  const double s = x(dx,dy,e);
265  for (int qx = 0; qx < Q1D; ++qx)
266  {
267  gradX[qx][0] += s * G(qx,dx);
268  gradX[qx][1] += s * B(qx,dx);
269  }
270  }
271  for (int qy = 0; qy < Q1D; ++qy)
272  {
273  const double wy = B(qy,dy);
274  const double wDy = G(qy,dy);
275  for (int qx = 0; qx < Q1D; ++qx)
276  {
277  grad[qy][qx][0] += gradX[qx][0] * wy;
278  grad[qy][qx][1] += gradX[qx][1] * wDy;
279  }
280  }
281  }
282  // We've now calculated grad(p) = [Dxy, xDy] in plane
283  for (int qy = 0; qy < Q1D; ++qy)
284  {
285  for (int qx = 0; qx < Q1D; ++qx)
286  {
287  const int q = qx + qy * Q1D;
288  const double gradX = grad[qy][qx][0];
289  const double gradY = grad[qy][qx][1];
290 
291  grad[qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e);
292  grad[qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e);
293  }
294  }
295  // We've now calculated grad = grad p * op
296  for (int qy = 0; qy < Q1D; ++qy)
297  {
298  double opX[max_TE_D1D][VDIM];
299  for (int dx = 0; dx < TE_D1D; ++dx)
300  {
301  opX[dx][0] = 0.0;
302  opX[dx][1] = 0.0;
303  for (int qx = 0; qx < Q1D; ++qx)
304  {
305  opX[dx][0] += Bt(dx,qx)*grad[qy][qx][0];
306  opX[dx][1] += Bt(dx,qx)*grad[qy][qx][1];
307  }
308  }
309  for (int dy = 0; dy < TE_D1D; ++dy)
310  {
311  for (int dx = 0; dx < TE_D1D; ++dx)
312  {
313  y(dx,dy,0,e) += Bt(dy,qy)*opX[dx][0];
314  y(dx,dy,1,e) += Bt(dy,qy)*opX[dx][1];
315  }
316  }
317  }
318  // We've now calculated y = u * grad
319  });
320 
321 }
322 
323 // PA Gradient Apply 2D kernel transpose
324 template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
325 static void PAGradientApplyTranspose2D(const int NE,
326  const Array<double> &bt,
327  const Array<double> &gt,
328  const Array<double> &b,
329  const Vector &_op,
330  const Vector &_x,
331  Vector &_y,
332  const int tr_d1d = 0,
333  const int te_d1d = 0,
334  const int q1d = 0)
335 {
336  // TODO
337  MFEM_ASSERT(false, "GradientPAApplyTranspose 3D not implemented.");
338 }
339 
340 // PA Gradient Apply 3D kernel
341 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
342 static void PAGradientApply3D(const int NE,
343  const Array<double> &b,
344  const Array<double> &g,
345  const Array<double> &bt,
346  const Vector &_op,
347  const Vector &_x,
348  Vector &_y,
349  int tr_d1d = 0,
350  int te_d1d = 0,
351  int q1d = 0)
352 {
353  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
354  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
355  const int Q1D = T_Q1D ? T_Q1D : q1d;
356  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
357  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
358  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
359  auto B = Reshape(b.Read(), Q1D, TR_D1D);
360  auto G = Reshape(g.Read(), Q1D, TR_D1D);
361  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
362  auto op = Reshape(_op.Read(), Q1D*Q1D*Q1D, 3,3, NE);
363  auto x = Reshape(_x.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
364  auto y = Reshape(_y.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
365  MFEM_FORALL(e, NE,
366  {
367  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
368  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
369  const int Q1D = T_Q1D ? T_Q1D : q1d;
370  const int VDIM = 3;
371  // the following variables are evaluated at compile time
372  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
373  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
374 
375  double grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
376  for (int qz = 0; qz < Q1D; ++qz)
377  {
378  for (int qy = 0; qy < Q1D; ++qy)
379  {
380  for (int qx = 0; qx < Q1D; ++qx)
381  {
382  grad[qz][qy][qx][0] = 0.0;
383  grad[qz][qy][qx][1] = 0.0;
384  grad[qz][qy][qx][2] = 0.0;
385  }
386  }
387  }
388  for (int dz = 0; dz < TR_D1D; ++dz)
389  {
390  double gradXY[max_Q1D][max_Q1D][3];
391  for (int qy = 0; qy < Q1D; ++qy)
392  {
393  for (int qx = 0; qx < Q1D; ++qx)
394  {
395  gradXY[qy][qx][0] = 0.0;
396  gradXY[qy][qx][1] = 0.0;
397  gradXY[qy][qx][2] = 0.0;
398  }
399  }
400  for (int dy = 0; dy < TR_D1D; ++dy)
401  {
402  double gradX[max_Q1D][2];
403  for (int qx = 0; qx < Q1D; ++qx)
404  {
405  gradX[qx][0] = 0.0;
406  gradX[qx][1] = 0.0;
407  }
408  for (int dx = 0; dx < TR_D1D; ++dx)
409  {
410  const double s = x(dx,dy,dz,e);
411  for (int qx = 0; qx < Q1D; ++qx)
412  {
413  gradX[qx][0] += s * B(qx,dx);
414  gradX[qx][1] += s * G(qx,dx);
415  }
416  }
417  for (int qy = 0; qy < Q1D; ++qy)
418  {
419  const double wy = B(qy,dy);
420  const double wDy = G(qy,dy);
421  for (int qx = 0; qx < Q1D; ++qx)
422  {
423  const double wx = gradX[qx][0];
424  const double wDx = gradX[qx][1];
425  gradXY[qy][qx][0] += wDx * wy;
426  gradXY[qy][qx][1] += wx * wDy;
427  gradXY[qy][qx][2] += wx * wy;
428  }
429  }
430  }
431  for (int qz = 0; qz < Q1D; ++qz)
432  {
433  const double wz = B(qz,dz);
434  const double wDz = G(qz,dz);
435  for (int qy = 0; qy < Q1D; ++qy)
436  {
437  for (int qx = 0; qx < Q1D; ++qx)
438  {
439  grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
440  grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
441  grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
442  }
443  }
444  }
445  }
446  // We've now calculated grad(p) = [Dxyz, xDyz, xyDz] in plane
447  for (int qz = 0; qz < Q1D; ++qz)
448  {
449  for (int qy = 0; qy < Q1D; ++qy)
450  {
451  for (int qx = 0; qx < Q1D; ++qx)
452  {
453  const int q = qx + (qy + qz * Q1D) * Q1D;
454  const double gradX = grad[qz][qy][qx][0];
455  const double gradY = grad[qz][qy][qx][1];
456  const double gradZ = grad[qz][qy][qx][2];
457 
458  grad[qz][qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e) + gradZ*op(q,2,0,e);
459  grad[qz][qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e) + gradZ*op(q,2,1,e);
460  grad[qz][qy][qx][2] = gradX*op(q,0,2,e) + gradY*op(q,1,2,e) + gradZ*op(q,2,2,e);
461  }
462  }
463  }
464  // We've now calculated grad = grad p * op
465  for (int qz = 0; qz < Q1D; ++qz)
466  {
467  double opXY[max_TE_D1D][max_TE_D1D][VDIM];
468  for (int dy = 0; dy < TE_D1D; ++dy)
469  {
470  for (int dx = 0; dx < TE_D1D; ++dx)
471  {
472  opXY[dy][dx][0] = 0.0;
473  opXY[dy][dx][1] = 0.0;
474  opXY[dy][dx][2] = 0.0;
475  }
476  }
477  for (int qy = 0; qy < Q1D; ++qy)
478  {
479  double opX[max_TE_D1D][VDIM];
480  for (int dx = 0; dx < TE_D1D; ++dx)
481  {
482  opX[dx][0] = 0.0;
483  opX[dx][1] = 0.0;
484  opX[dx][2] = 0.0;
485  for (int qx = 0; qx < Q1D; ++qx)
486  {
487  opX[dx][0] += Bt(dx,qx)*grad[qz][qy][qx][0];
488  opX[dx][1] += Bt(dx,qx)*grad[qz][qy][qx][1];
489  opX[dx][2] += Bt(dx,qx)*grad[qz][qy][qx][2];
490  }
491  }
492  for (int dy = 0; dy < TE_D1D; ++dy)
493  {
494  for (int dx = 0; dx < TE_D1D; ++dx)
495  {
496  opXY[dy][dx][0] += Bt(dy,qy)*opX[dx][0];
497  opXY[dy][dx][1] += Bt(dy,qy)*opX[dx][1];
498  opXY[dy][dx][2] += Bt(dy,qy)*opX[dx][2];
499  }
500  }
501  }
502  for (int dz = 0; dz < TE_D1D; ++dz)
503  {
504  for (int dy = 0; dy < TE_D1D; ++dy)
505  {
506  for (int dx = 0; dx < TE_D1D; ++dx)
507  {
508  y(dx,dy,dz,0,e) += Bt(dz,qz)*opXY[dy][dx][0];
509  y(dx,dy,dz,1,e) += Bt(dz,qz)*opXY[dy][dx][1];
510  y(dx,dy,dz,2,e) += Bt(dz,qz)*opXY[dy][dx][2];
511  }
512  }
513  }
514  }
515  // We've now calculated y = u * grad
516  });
517 }
518 
519 // PA Gradient Apply 3D kernel
520 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
521 static void PAGradientApplyTranspose3D(const int NE,
522  const Array<double> &bt,
523  const Array<double> &gt,
524  const Array<double> &b,
525  const Vector &_op,
526  const Vector &_x,
527  Vector &_y,
528  int tr_d1d = 0,
529  int te_d1d = 0,
530  int q1d = 0)
531 {
532  MFEM_ASSERT(false, "Gradient PA Apply Transpose 3D not implemented.");
533 }
534 
535 // Shared memory PA Gradient Apply 3D kernel
536 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
537 static void SmemPAGradientApply3D(const int NE,
538  const Array<double> &b_,
539  const Array<double> &g_,
540  const Array<double> &bt_,
541  const Vector &d_,
542  const Vector &x_,
543  Vector &y_,
544  const int tr_d1d = 0,
545  const int te_d1d = 0,
546  const int q1d = 0)
547 {
548  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
549  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
550  const int Q1D = T_Q1D ? T_Q1D : q1d;
551 
552  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
553  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
554  MFEM_VERIFY(TR_D1D <= Q1D, "");
555  MFEM_VERIFY(TE_D1D <= Q1D, "");
556  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
557 
558  auto b = Reshape(b_.Read(), Q1D, TR_D1D);
559  auto g = Reshape(g_.Read(), Q1D, TR_D1D);
560  auto bt = Reshape(bt_.Read(), TE_D1D, Q1D);
561  auto D = Reshape(d_.Read(), Q1D*Q1D*Q1D, 3, 3, NE);
562  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
563  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
564 
565  MFEM_FORALL_3D(e, NE, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D,
566  {
567  const int tidz = MFEM_THREAD_ID(z);
568  const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
569  const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
570  const int Q1D = T_Q1D ? T_Q1D : q1d;
571  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
572  constexpr int MD1R = T_TR_D1D ? T_TR_D1D : MAX_D1D;
573  constexpr int MD1E = T_TE_D1D ? T_TE_D1D : MAX_D1D;
574  constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
575  constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
576  MFEM_SHARED double sBG[2][MQ1*MD1];
577  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
578  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
579  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
580  MFEM_SHARED double sm0[3][MDQ*MDQ*MDQ];
581  MFEM_SHARED double sm1[3][MDQ*MDQ*MDQ];
582  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
583  double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
584  double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
585 
586  double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
587  double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
588  double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
589 
590  double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
591  double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
592  double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
593 
594  double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
595  double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
596  double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
597 
598  double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
599  double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
600  double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
601  MFEM_FOREACH_THREAD(dz,z,D1DR)
602  {
603  MFEM_FOREACH_THREAD(dy,y,D1DR)
604  {
605  MFEM_FOREACH_THREAD(dx,x,D1DR)
606  {
607  X[dz][dy][dx] = x(dx,dy,dz,e);
608  }
609  }
610  }
611  if (tidz == 0)
612  {
613  MFEM_FOREACH_THREAD(d,y,D1DR)
614  {
615  MFEM_FOREACH_THREAD(q,x,Q1D)
616  {
617  B[q][d] = b(q,d);
618  G[q][d] = g(q,d);
619  }
620  }
621  }
622  MFEM_SYNC_THREAD;
623  MFEM_FOREACH_THREAD(dz,z,D1DR)
624  {
625  MFEM_FOREACH_THREAD(dy,y,D1DR)
626  {
627  MFEM_FOREACH_THREAD(qx,x,Q1D)
628  {
629  double u = 0.0;
630  double v = 0.0;
631  for (int dx = 0; dx < D1DR; ++dx)
632  {
633  const double coord = X[dz][dy][dx];
634  u += coord * B[qx][dx];
635  v += coord * G[qx][dx];
636  }
637  DDQ0[dz][dy][qx] = u;
638  DDQ1[dz][dy][qx] = v;
639  }
640  }
641  }
642  MFEM_SYNC_THREAD;
643  MFEM_FOREACH_THREAD(dz,z,D1DR)
644  {
645  MFEM_FOREACH_THREAD(qy,y,Q1D)
646  {
647  MFEM_FOREACH_THREAD(qx,x,Q1D)
648  {
649  double u = 0.0;
650  double v = 0.0;
651  double w = 0.0;
652  for (int dy = 0; dy < D1DR; ++dy)
653  {
654  u += DDQ1[dz][dy][qx] * B[qy][dy];
655  v += DDQ0[dz][dy][qx] * G[qy][dy];
656  w += DDQ0[dz][dy][qx] * B[qy][dy];
657  }
658  DQQ0[dz][qy][qx] = u;
659  DQQ1[dz][qy][qx] = v;
660  DQQ2[dz][qy][qx] = w;
661  }
662  }
663  }
664  MFEM_SYNC_THREAD;
665  MFEM_FOREACH_THREAD(qz,z,Q1D)
666  {
667  MFEM_FOREACH_THREAD(qy,y,Q1D)
668  {
669  MFEM_FOREACH_THREAD(qx,x,Q1D)
670  {
671  double u = 0.0;
672  double v = 0.0;
673  double w = 0.0;
674  for (int dz = 0; dz < D1DR; ++dz)
675  {
676  u += DQQ0[dz][qy][qx] * B[qz][dz];
677  v += DQQ1[dz][qy][qx] * B[qz][dz];
678  w += DQQ2[dz][qy][qx] * G[qz][dz];
679  }
680  QQQ0[qz][qy][qx] = u;
681  QQQ1[qz][qy][qx] = v;
682  QQQ2[qz][qy][qx] = w;
683  }
684  }
685  }
686  MFEM_SYNC_THREAD;
687  MFEM_FOREACH_THREAD(qz,z,Q1D)
688  {
689  MFEM_FOREACH_THREAD(qy,y,Q1D)
690  {
691  MFEM_FOREACH_THREAD(qx,x,Q1D)
692  {
693  const int q = qx + (qy + qz * Q1D) * Q1D;
694  const double gX = QQQ0[qz][qy][qx];
695  const double gY = QQQ1[qz][qy][qx];
696  const double gZ = QQQ2[qz][qy][qx];
697  QQQ0[qz][qy][qx] = (D(q,0,0,e)*gX) + (D(q,1,0,e)*gY) + (D(q,2,0,e)*gZ);
698  QQQ1[qz][qy][qx] = (D(q,0,1,e)*gX) + (D(q,1,1,e)*gY) + (D(q,2,1,e)*gZ);
699  QQQ2[qz][qy][qx] = (D(q,0,2,e)*gX) + (D(q,1,2,e)*gY) + (D(q,2,2,e)*gZ);
700  }
701  }
702  }
703  MFEM_SYNC_THREAD;
704  if (tidz == 0)
705  {
706  MFEM_FOREACH_THREAD(d,y,D1DE)
707  {
708  MFEM_FOREACH_THREAD(q,x,Q1D)
709  {
710  Bt[d][q] = bt(d,q);
711  }
712  }
713  }
714  MFEM_SYNC_THREAD;
715  MFEM_FOREACH_THREAD(qz,z,Q1D)
716  {
717  MFEM_FOREACH_THREAD(qy,y,Q1D)
718  {
719  MFEM_FOREACH_THREAD(dx,x,D1DE)
720  {
721  double u = 0.0;
722  double v = 0.0;
723  double w = 0.0;
724  for (int qx = 0; qx < Q1D; ++qx)
725  {
726  u += QQQ0[qz][qy][qx] * Bt[dx][qx];
727  v += QQQ1[qz][qy][qx] * Bt[dx][qx];
728  w += QQQ2[qz][qy][qx] * Bt[dx][qx];
729  }
730  QQD0[qz][qy][dx] = u;
731  QQD1[qz][qy][dx] = v;
732  QQD2[qz][qy][dx] = w;
733  }
734  }
735  }
736  MFEM_SYNC_THREAD;
737  MFEM_FOREACH_THREAD(qz,z,Q1D)
738  {
739  MFEM_FOREACH_THREAD(dy,y,D1DE)
740  {
741  MFEM_FOREACH_THREAD(dx,x,D1DE)
742  {
743  double u = 0.0;
744  double v = 0.0;
745  double w = 0.0;
746  for (int qy = 0; qy < Q1D; ++qy)
747  {
748  u += QQD0[qz][qy][dx] * Bt[dy][qy];
749  v += QQD1[qz][qy][dx] * Bt[dy][qy];
750  w += QQD2[qz][qy][dx] * Bt[dy][qy];
751  }
752  QDD0[qz][dy][dx] = u;
753  QDD1[qz][dy][dx] = v;
754  QDD2[qz][dy][dx] = w;
755  }
756  }
757  }
758  MFEM_SYNC_THREAD;
759  MFEM_FOREACH_THREAD(dz,z,D1DE)
760  {
761  MFEM_FOREACH_THREAD(dy,y,D1DE)
762  {
763  MFEM_FOREACH_THREAD(dx,x,D1DE)
764  {
765  double u = 0.0;
766  double v = 0.0;
767  double w = 0.0;
768  for (int qz = 0; qz < Q1D; ++qz)
769  {
770  u += QDD0[qz][dy][dx] * Bt[dz][qz];
771  v += QDD1[qz][dy][dx] * Bt[dz][qz];
772  w += QDD2[qz][dy][dx] * Bt[dz][qz];
773  }
774  y(dx,dy,dz,0,e) += u;
775  y(dx,dy,dz,1,e) += v;
776  y(dx,dy,dz,2,e) += w;
777  }
778  }
779  }
780  });
781 }
782 
783 static void PAGradientApply(const int dim,
784  const int TR_D1D,
785  const int TE_D1D,
786  const int Q1D,
787  const int NE,
788  const Array<double> &B,
789  const Array<double> &G,
790  const Array<double> &Bt,
791  const Vector &op,
792  const Vector &x,
793  Vector &y,
794  bool transpose=false)
795 {
796 
797  if (dim == 2)
798  {
799  return PAGradientApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
800  }
801  if (dim == 3)
802  {
803  return PAGradientApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
804  }
805  MFEM_ABORT("Unknown kernel.");
806 }
807 
808 // PA Gradient Apply kernel
809 void GradientIntegrator::AddMultPA(const Vector &x, Vector &y) const
810 {
811  PAGradientApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
812  trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y,
813  false);
814 }
815 
816 // PA Gradient Apply kernel
817 void GradientIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const
818 {
819  MFEM_ABORT("PA Gradient AddMultTransposePA not implemented.");
820 }
821 
822 } // namespace mfem
823 
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:245
Abstract class for all finite elements.
Definition: fe.hpp:235
Ordering::Type GetOrdering() const
Return the ordering method.
Definition: fespace.hpp:412
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe.hpp:309
void trans(const Vector &u, Vector &x)
Definition: ex27.cpp:421
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
A coefficient that is constant across space and time.
Definition: coefficient.hpp:78
const Geometry::Type geom
Definition: ex1.cpp:40
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe.hpp:165
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:85
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:134
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:427
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:388
const int MAX_Q1D
Definition: forall.hpp:27
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:314
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:290
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe.cpp:376
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:372
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:51
const int MAX_D1D
Definition: forall.hpp:26
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:1798
Vector data type.
Definition: vector.hpp:51