MFEM  v4.5.1
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_divergence.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
21 // PA Divergence Integrator
22 
23 // PA Divergence Assemble 2D kernel
24 static void PADivergenceSetup2D(const int Q1D,
25  const int NE,
26  const Array<double> &w,
27  const Vector &j,
28  const double COEFF,
29  Vector &op)
30 {
31  const int NQ = Q1D*Q1D;
32  auto W = w.Read();
33  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
34  auto y = Reshape(op.Write(), NQ, 2, 2, NE);
35 
36  MFEM_FORALL(e, NE,
37  {
38  for (int q = 0; q < NQ; ++q)
39  {
40  const double J11 = J(q,0,0,e);
41  const double J12 = J(q,0,1,e);
42  const double J21 = J(q,1,0,e);
43  const double J22 = J(q,1,1,e);
44  // Store wq * Q * adj(J)
45  y(q,0,0,e) = W[q] * COEFF * J22; // 1,1
46  y(q,0,1,e) = W[q] * COEFF * -J12; // 1,2
47  y(q,1,0,e) = W[q] * COEFF * -J21; // 2,1
48  y(q,1,1,e) = W[q] * COEFF * J11; // 2,2
49  }
50  });
51 }
52 
53 // PA Divergence Assemble 3D kernel
54 static void PADivergenceSetup3D(const int Q1D,
55  const int NE,
56  const Array<double> &w,
57  const Vector &j,
58  const double COEFF,
59  Vector &op)
60 {
61  const int NQ = Q1D*Q1D*Q1D;
62  auto W = w.Read();
63  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
64  auto y = Reshape(op.Write(), NQ, 3, 3, NE);
65  MFEM_FORALL(e, NE,
66  {
67  for (int q = 0; q < NQ; ++q)
68  {
69  const double J11 = J(q,0,0,e);
70  const double J21 = J(q,1,0,e);
71  const double J31 = J(q,2,0,e);
72  const double J12 = J(q,0,1,e);
73  const double J22 = J(q,1,1,e);
74  const double J32 = J(q,2,1,e);
75  const double J13 = J(q,0,2,e);
76  const double J23 = J(q,1,2,e);
77  const double J33 = J(q,2,2,e);
78  const double cw = W[q] * COEFF;
79  // adj(J)
80  const double A11 = (J22 * J33) - (J23 * J32);
81  const double A12 = (J32 * J13) - (J12 * J33);
82  const double A13 = (J12 * J23) - (J22 * J13);
83  const double A21 = (J31 * J23) - (J21 * J33);
84  const double A22 = (J11 * J33) - (J13 * J31);
85  const double A23 = (J21 * J13) - (J11 * J23);
86  const double A31 = (J21 * J32) - (J31 * J22);
87  const double A32 = (J31 * J12) - (J11 * J32);
88  const double A33 = (J11 * J22) - (J12 * J21);
89  // Store wq * Q * adj(J)
90  y(q,0,0,e) = cw * A11; // 1,1
91  y(q,0,1,e) = cw * A12; // 1,2
92  y(q,0,2,e) = cw * A13; // 1,3
93  y(q,1,0,e) = cw * A21; // 2,1
94  y(q,1,1,e) = cw * A22; // 2,2
95  y(q,1,2,e) = cw * A23; // 2,3
96  y(q,2,0,e) = cw * A31; // 3,1
97  y(q,2,1,e) = cw * A32; // 3,2
98  y(q,2,2,e) = cw * A33; // 3,3
99  }
100  });
101 }
102 
103 static void PADivergenceSetup(const int dim,
104  const int TR_D1D,
105  const int TE_D1D,
106  const int Q1D,
107  const int NE,
108  const Array<double> &W,
109  const Vector &J,
110  const double COEFF,
111  Vector &op)
112 {
113  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADivergenceSetup"); }
114  if (dim == 2)
115  {
116  PADivergenceSetup2D(Q1D, NE, W, J, COEFF, op);
117  }
118  if (dim == 3)
119  {
120  PADivergenceSetup3D(Q1D, NE, W, J, COEFF, op);
121  }
122 }
123 
124 void VectorDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
125  const FiniteElementSpace &test_fes)
126 {
127  // Assumes tensor-product elements ordered by nodes
128  MFEM_ASSERT(trial_fes.GetOrdering() == Ordering::byNODES,
129  "PA Only supports Ordering::byNODES!");
130  Mesh *mesh = trial_fes.GetMesh();
131  const FiniteElement &trial_fe = *trial_fes.GetFE(0);
132  const FiniteElement &test_fe = *test_fes.GetFE(0);
133  ElementTransformation *trans = mesh->GetElementTransformation(0);
134  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe,
135  *trans);
136  const int dims = trial_fe.GetDim();
137  const int dimsToStore = dims * dims;
138  nq = ir->GetNPoints();
139  dim = mesh->Dimension();
140  ne = trial_fes.GetNE();
141  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
142  trial_maps = &trial_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
143  trial_dofs1D = trial_maps->ndof;
144  quad1D = trial_maps->nqpt;
145  test_maps = &test_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
146  test_dofs1D = test_maps->ndof;
147  MFEM_ASSERT(quad1D == test_maps->nqpt,
148  "PA requires test and trial space to have same number of quadrature points!");
149  pa_data.SetSize(nq * dimsToStore * ne, Device::GetMemoryType());
150  double coeff = 1.0;
151  if (Q)
152  {
153  ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient*>(Q);
154  MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
155  coeff = cQ->constant;
156  }
157  PADivergenceSetup(dim, trial_dofs1D, test_dofs1D, quad1D,
158  ne, ir->GetWeights(), geom->J, coeff, pa_data);
159 }
160 
161 // PA Divergence Apply 2D kernel
162 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
163 static void PADivergenceApply2D(const int NE,
164  const Array<double> &b,
165  const Array<double> &g,
166  const Array<double> &bt,
167  const Vector &op_,
168  const Vector &x_,
169  Vector &y_,
170  const int tr_d1d = 0,
171  const int te_d1d = 0,
172  const int q1d = 0)
173 {
174  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
175  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
176  const int Q1D = T_Q1D ? T_Q1D : q1d;
177  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
178  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
179  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
180  auto B = Reshape(b.Read(), Q1D, TR_D1D);
181  auto G = Reshape(g.Read(), Q1D, TR_D1D);
182  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
183  auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
184  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, 2, NE);
185  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, NE);
186  MFEM_FORALL(e, NE,
187  {
188  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
189  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
190  const int Q1D = T_Q1D ? T_Q1D : q1d;
191  const int VDIM = 2;
192  // the following variables are evaluated at compile time
193  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
194  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
195 
196  double grad[max_Q1D][max_Q1D][VDIM];
197  double div[max_Q1D][max_Q1D];
198  for (int qy = 0; qy < Q1D; ++qy)
199  {
200  for (int qx = 0; qx < Q1D; ++qx)
201  {
202  div[qy][qx] = 0.0;
203  }
204  }
205 
206  for (int c = 0; c < VDIM; ++c)
207  {
208  for (int qy = 0; qy < Q1D; ++qy)
209  {
210  for (int qx = 0; qx < Q1D; ++qx)
211  {
212  grad[qy][qx][0] = 0.0;
213  grad[qy][qx][1] = 0.0;
214  }
215  }
216  for (int dy = 0; dy < TR_D1D; ++dy)
217  {
218  double gradX[max_Q1D][VDIM];
219  for (int qx = 0; qx < Q1D; ++qx)
220  {
221  gradX[qx][0] = 0.0;
222  gradX[qx][1] = 0.0;
223  }
224  for (int dx = 0; dx < TR_D1D; ++dx)
225  {
226  const double s = x(dx,dy,c,e);
227  for (int qx = 0; qx < Q1D; ++qx)
228  {
229  gradX[qx][0] += s * G(qx,dx);
230  gradX[qx][1] += s * B(qx,dx);
231  }
232  }
233  for (int qy = 0; qy < Q1D; ++qy)
234  {
235  const double wy = B(qy,dy);
236  const double wDy = G(qy,dy);
237  for (int qx = 0; qx < Q1D; ++qx)
238  {
239  grad[qy][qx][0] += gradX[qx][0] * wy;
240  grad[qy][qx][1] += gradX[qx][1] * wDy;
241  }
242  }
243  }
244  // We've now calculated grad(u_c) = [Dxy_1, xDy_2] in plane
245  for (int qy = 0; qy < Q1D; ++qy)
246  {
247  for (int qx = 0; qx < Q1D; ++qx)
248  {
249  const int q = qx + qy * Q1D;
250  const double gradX = grad[qy][qx][0];
251  const double gradY = grad[qy][qx][1];
252 
253  div[qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e);
254  }
255  }
256  }
257  // We've now calculated div = reshape(div phi * op) * u
258  for (int qy = 0; qy < Q1D; ++qy)
259  {
260  double opX[max_TE_D1D];
261  for (int dx = 0; dx < TE_D1D; ++dx)
262  {
263  opX[dx] = 0.0;
264  for (int qx = 0; qx < Q1D; ++qx)
265  {
266  opX[dx] += Bt(dx,qx)*div[qy][qx];
267  }
268  }
269  for (int dy = 0; dy < TE_D1D; ++dy)
270  {
271  for (int dx = 0; dx < TE_D1D; ++dx)
272  {
273  y(dx,dy,e) += Bt(dy,qy)*opX[dx];
274  }
275  }
276  }
277  // We've now calculated y = p * div
278  });
279 }
280 
281 // Shared memory PA Divergence Apply 2D kernel
282 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0,
283  const int T_NBZ = 0>
284 static void SmemPADivergenceApply2D(const int NE,
285  const Array<double> &b_,
286  const Array<double> &g_,
287  const Array<double> &bt_,
288  const Vector &op_,
289  const Vector &x_,
290  Vector &y_,
291  const int tr_d1d = 0,
292  const int te_d1d = 0,
293  const int q1d = 0)
294 {
295  // TODO
296  MFEM_ASSERT(false, "SHARED MEM NOT PROGRAMMED YET");
297 }
298 
299 // PA Divergence Apply 2D kernel transpose
300 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
301 static void PADivergenceApplyTranspose2D(const int NE,
302  const Array<double> &bt,
303  const Array<double> &gt,
304  const Array<double> &b,
305  const Vector &op_,
306  const Vector &x_,
307  Vector &y_,
308  const int tr_d1d = 0,
309  const int te_d1d = 0,
310  const int q1d = 0)
311 {
312  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
313  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
314  const int Q1D = T_Q1D ? T_Q1D : q1d;
315  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
316  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
317  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
318  auto Bt = Reshape(bt.Read(), TR_D1D, Q1D);
319  auto Gt = Reshape(gt.Read(), TR_D1D, Q1D);
320  auto B = Reshape(b.Read(), Q1D, TE_D1D);
321  auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
322  auto x = Reshape(x_.Read(), TE_D1D, TE_D1D, NE);
323  auto y = Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, 2, NE);
324  MFEM_FORALL(e, NE,
325  {
326  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
327  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
328  const int Q1D = T_Q1D ? T_Q1D : q1d;
329  const int VDIM = 2;
330  // the following variables are evaluated at compile time
331  constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : MAX_D1D;
332  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
333 
334  double quadTest[max_Q1D][max_Q1D];
335  double grad[max_Q1D][max_Q1D][VDIM];
336  for (int qy = 0; qy < Q1D; ++qy)
337  {
338  for (int qx = 0; qx < Q1D; ++qx)
339  {
340  quadTest[qy][qx] = 0.0;
341  }
342  }
343  for (int dy = 0; dy < TE_D1D; ++dy)
344  {
345  double quadTestX[max_Q1D];
346  for (int qx = 0; qx < Q1D; ++qx)
347  {
348  quadTestX[qx] = 0.0;
349  }
350  for (int dx = 0; dx < TE_D1D; ++dx)
351  {
352  const double s = x(dx,dy,e);
353  for (int qx = 0; qx < Q1D; ++qx)
354  {
355  quadTestX[qx] += s * B(qx,dx);
356  }
357  }
358  for (int qy = 0; qy < Q1D; ++qy)
359  {
360  const double wy = B(qy,dy);
361  for (int qx = 0; qx < Q1D; ++qx)
362  {
363  quadTest[qy][qx] += quadTestX[qx] * wy;
364  }
365  }
366  }
367  // We've now calculated x on the quads
368  for (int c = 0; c < VDIM; ++c)
369  {
370  for (int qy = 0; qy < Q1D; ++qy)
371  {
372  for (int qx = 0; qx < Q1D; ++qx)
373  {
374  const int q = qx + qy * Q1D;
375  grad[qy][qx][0] = quadTest[qy][qx]*op(q,0,c,e);
376  grad[qy][qx][1] = quadTest[qy][qx]*op(q,1,c,e);
377  }
378  }
379  // We've now calculated op_c^T * x
380  for (int qy = 0; qy < Q1D; ++qy)
381  {
382  double gradX[max_TR_D1D][VDIM];
383  for (int dx = 0; dx < TR_D1D; ++dx)
384  {
385  gradX[dx][0] = 0.0;
386  gradX[dx][1] = 0.0;
387  }
388  for (int qx = 0; qx < Q1D; ++qx)
389  {
390  const double gX = grad[qy][qx][0];
391  const double gY = grad[qy][qx][1];
392  for (int dx = 0; dx < TR_D1D; ++dx)
393  {
394  const double wx = Bt(dx,qx);
395  const double wDx = Gt(dx,qx);
396  gradX[dx][0] += gX * wDx;
397  gradX[dx][1] += gY * wx;
398  }
399  }
400  for (int dy = 0; dy < TR_D1D; ++dy)
401  {
402  const double wy = Bt(dy,qy);
403  const double wDy = Gt(dy,qy);
404  for (int dx = 0; dx < TR_D1D; ++dx)
405  {
406  y(dx,dy,c,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
407  }
408  }
409  }
410  }
411  // We've now calculated y = reshape(div u * op^T) * x
412  });
413 }
414 
415 // PA Vector Divergence Apply 3D kernel
416 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
417 static void PADivergenceApply3D(const int NE,
418  const Array<double> &b,
419  const Array<double> &g,
420  const Array<double> &bt,
421  const Vector &op_,
422  const Vector &x_,
423  Vector &y_,
424  int tr_d1d = 0,
425  int te_d1d = 0,
426  int q1d = 0)
427 {
428  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
429  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
430  const int Q1D = T_Q1D ? T_Q1D : q1d;
431  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
432  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
433  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
434  auto B = Reshape(b.Read(), Q1D, TR_D1D);
435  auto G = Reshape(g.Read(), Q1D, TR_D1D);
436  auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
437  auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
438  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
439  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
440  MFEM_FORALL(e, NE,
441  {
442  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
443  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
444  const int Q1D = T_Q1D ? T_Q1D : q1d;
445  const int VDIM = 3;
446  // the following variables are evaluated at compile time
447  constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : MAX_D1D;
448  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
449 
450  double grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
451  double div[max_Q1D][max_Q1D][max_Q1D];
452  for (int qz = 0; qz < Q1D; ++qz)
453  {
454  for (int qy = 0; qy < Q1D; ++qy)
455  {
456  for (int qx = 0; qx < Q1D; ++qx)
457  {
458  div[qz][qy][qx] = 0.0;
459  }
460  }
461  }
462 
463  for (int c = 0; c < VDIM; ++c)
464  {
465  for (int qz = 0; qz < Q1D; ++qz)
466  {
467  for (int qy = 0; qy < Q1D; ++qy)
468  {
469  for (int qx = 0; qx < Q1D; ++qx)
470  {
471  grad[qz][qy][qx][0] = 0.0;
472  grad[qz][qy][qx][1] = 0.0;
473  grad[qz][qy][qx][2] = 0.0;
474  }
475  }
476  }
477  for (int dz = 0; dz < TR_D1D; ++dz)
478  {
479  double gradXY[max_Q1D][max_Q1D][VDIM];
480  for (int qy = 0; qy < Q1D; ++qy)
481  {
482  for (int qx = 0; qx < Q1D; ++qx)
483  {
484  gradXY[qy][qx][0] = 0.0;
485  gradXY[qy][qx][1] = 0.0;
486  gradXY[qy][qx][2] = 0.0;
487  }
488  }
489  for (int dy = 0; dy < TR_D1D; ++dy)
490  {
491  double gradX[max_Q1D][VDIM];
492  for (int qx = 0; qx < Q1D; ++qx)
493  {
494  gradX[qx][0] = 0.0;
495  gradX[qx][1] = 0.0;
496  gradX[qx][2] = 0.0;
497  }
498  for (int dx = 0; dx < TR_D1D; ++dx)
499  {
500  const double s = x(dx,dy,dz,c,e);
501  for (int qx = 0; qx < Q1D; ++qx)
502  {
503  gradX[qx][0] += s * G(qx,dx);
504  gradX[qx][1] += s * B(qx,dx);
505  gradX[qx][2] += s * B(qx,dx);
506  }
507  }
508  for (int qy = 0; qy < Q1D; ++qy)
509  {
510  const double wy = B(qy,dy);
511  const double wDy = G(qy,dy);
512  for (int qx = 0; qx < Q1D; ++qx)
513  {
514  gradXY[qy][qx][0] += gradX[qx][0] * wy;
515  gradXY[qy][qx][1] += gradX[qx][1] * wDy;
516  gradXY[qy][qx][2] += gradX[qx][2] * wy;
517  }
518  }
519  }
520  for (int qz = 0; qz < Q1D; ++qz)
521  {
522  const double wz = B(qz,dz);
523  const double wDz = G(qz,dz);
524  for (int qy = 0; qy < Q1D; ++qy)
525  {
526  for (int qx = 0; qx < Q1D; ++qx)
527  {
528  grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
529  grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
530  grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
531  }
532  }
533  }
534  }
535  // We've now calculated grad(u_c) = [Dxyz_1, xDyz_2, xyDz_3] in plane
536  for (int qz = 0; qz < Q1D; ++qz)
537  {
538  for (int qy = 0; qy < Q1D; ++qy)
539  {
540  for (int qx = 0; qx < Q1D; ++qx)
541  {
542  const int q = qx + (qy + qz * Q1D) * Q1D;
543  const double gradX = grad[qz][qy][qx][0];
544  const double gradY = grad[qz][qy][qx][1];
545  const double gradZ = grad[qz][qy][qx][2];
546 
547  div[qz][qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e) + gradZ*op(q,2,c,e);
548 
549  }
550  }
551  }
552  }
553  // We've now calculated div = reshape(div phi * op) * u
554  for (int qz = 0; qz < Q1D; ++qz)
555  {
556  double opXY[max_TE_D1D][max_TE_D1D];
557  for (int dy = 0; dy < TE_D1D; ++dy)
558  {
559  for (int dx = 0; dx < TE_D1D; ++dx)
560  {
561  opXY[dy][dx] = 0.0;
562  }
563  }
564  for (int qy = 0; qy < Q1D; ++qy)
565  {
566  double opX[max_TE_D1D];
567  for (int dx = 0; dx < TE_D1D; ++dx)
568  {
569  opX[dx] = 0.0;
570  for (int qx = 0; qx < Q1D; ++qx)
571  {
572  opX[dx] += Bt(dx,qx)*div[qz][qy][qx];
573  }
574  }
575  for (int dy = 0; dy < TE_D1D; ++dy)
576  {
577  for (int dx = 0; dx < TE_D1D; ++dx)
578  {
579  opXY[dy][dx] += Bt(dy,qy)*opX[dx];
580  }
581  }
582  }
583  for (int dz = 0; dz < TE_D1D; ++dz)
584  {
585  for (int dy = 0; dy < TE_D1D; ++dy)
586  {
587  for (int dx = 0; dx < TE_D1D; ++dx)
588  {
589  y(dx,dy,dz,e) += Bt(dz,qz)*opXY[dy][dx];
590  }
591  }
592  }
593  }
594  // We've now calculated y = p * div
595  });
596 }
597 
598 // PA Vector Divergence Apply 3D kernel
599 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
600 static void PADivergenceApplyTranspose3D(const int NE,
601  const Array<double> &bt,
602  const Array<double> &gt,
603  const Array<double> &b,
604  const Vector &op_,
605  const Vector &x_,
606  Vector &y_,
607  int tr_d1d = 0,
608  int te_d1d = 0,
609  int q1d = 0)
610 {
611  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
612  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
613  const int Q1D = T_Q1D ? T_Q1D : q1d;
614  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
615  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
616  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
617  auto Bt = Reshape(bt.Read(), TR_D1D, Q1D);
618  auto Gt = Reshape(gt.Read(), TR_D1D, Q1D);
619  auto B = Reshape(b.Read(), Q1D, TE_D1D);
620  auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
621  auto x = Reshape(x_.Read(), TE_D1D, TE_D1D, TE_D1D, NE);
622  auto y = Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
623  MFEM_FORALL(e, NE,
624  {
625  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
626  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
627  const int Q1D = T_Q1D ? T_Q1D : q1d;
628  const int VDIM = 3;
629  // the following variables are evaluated at compile time
630  constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : MAX_D1D;
631  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
632 
633  double quadTest[max_Q1D][max_Q1D][max_Q1D];
634  double grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
635  for (int qz = 0; qz < Q1D; ++qz)
636  {
637  for (int qy = 0; qy < Q1D; ++qy)
638  {
639  for (int qx = 0; qx < Q1D; ++qx)
640  {
641  quadTest[qz][qy][qx] = 0.0;
642  }
643  }
644  }
645  for (int dz = 0; dz < TE_D1D; ++dz)
646  {
647  double quadTestXY[max_Q1D][max_Q1D];
648  for (int qy = 0; qy < Q1D; ++qy)
649  {
650  for (int qx = 0; qx < Q1D; ++qx)
651  {
652  quadTestXY[qy][qx] = 0.0;
653  }
654  }
655  for (int dy = 0; dy < TE_D1D; ++dy)
656  {
657  double quadTestX[max_Q1D];
658  for (int qx = 0; qx < Q1D; ++qx)
659  {
660  quadTestX[qx] = 0.0;
661  }
662  for (int dx = 0; dx < TE_D1D; ++dx)
663  {
664  const double s = x(dx,dy,dz,e);
665  for (int qx = 0; qx < Q1D; ++qx)
666  {
667  quadTestX[qx] += s * B(qx,dx);
668  }
669  }
670  for (int qy = 0; qy < Q1D; ++qy)
671  {
672  const double wy = B(qy,dy);
673  for (int qx = 0; qx < Q1D; ++qx)
674  {
675  quadTestXY[qy][qx] += quadTestX[qx] * wy;
676  }
677  }
678  }
679  for (int qz = 0; qz < Q1D; ++qz)
680  {
681  const double wz = B(qz,dz);
682  for (int qy = 0; qy < Q1D; ++qy)
683  {
684  for (int qx = 0; qx < Q1D; ++qx)
685  {
686  quadTest[qz][qy][qx] += quadTestXY[qy][qx] * wz;
687  }
688  }
689  }
690  }
691  // We've now calculated x on the quads
692  for (int c = 0; c < VDIM; ++c)
693  {
694  for (int qz = 0; qz < Q1D; ++qz)
695  {
696  for (int qy = 0; qy < Q1D; ++qy)
697  {
698  for (int qx = 0; qx < Q1D; ++qx)
699  {
700  const int q = qx + (qy + qz * Q1D) * Q1D;
701  grad[qz][qy][qx][0] = quadTest[qz][qy][qx]*op(q,0,c,e);
702  grad[qz][qy][qx][1] = quadTest[qz][qy][qx]*op(q,1,c,e);
703  grad[qz][qy][qx][2] = quadTest[qz][qy][qx]*op(q,2,c,e);
704  }
705  }
706  }
707  // We've now calculated op_c^T * x
708  for (int qz = 0; qz < Q1D; ++qz)
709  {
710  double gradXY[max_TR_D1D][max_TR_D1D][VDIM];
711  for (int dy = 0; dy < TR_D1D; ++dy)
712  {
713  for (int dx = 0; dx < TR_D1D; ++dx)
714  {
715  gradXY[dy][dx][0] = 0.0;
716  gradXY[dy][dx][1] = 0.0;
717  gradXY[dy][dx][2] = 0.0;
718  }
719  }
720  for (int qy = 0; qy < Q1D; ++qy)
721  {
722  double gradX[max_TR_D1D][VDIM];
723  for (int dx = 0; dx < TR_D1D; ++dx)
724  {
725  gradX[dx][0] = 0.0;
726  gradX[dx][1] = 0.0;
727  gradX[dx][2] = 0.0;
728  }
729  for (int qx = 0; qx < Q1D; ++qx)
730  {
731  const double gX = grad[qz][qy][qx][0];
732  const double gY = grad[qz][qy][qx][1];
733  const double gZ = grad[qz][qy][qx][2];
734  for (int dx = 0; dx < TR_D1D; ++dx)
735  {
736  const double wx = Bt(dx,qx);
737  const double wDx = Gt(dx,qx);
738  gradX[dx][0] += gX * wDx;
739  gradX[dx][1] += gY * wx;
740  gradX[dx][2] += gZ * wx;
741  }
742  }
743  for (int dy = 0; dy < TR_D1D; ++dy)
744  {
745  const double wy = Bt(dy,qy);
746  const double wDy = Gt(dy,qy);
747  for (int dx = 0; dx < TR_D1D; ++dx)
748  {
749  gradXY[dy][dx][0] += gradX[dx][0] * wy;
750  gradXY[dy][dx][1] += gradX[dx][1] * wDy;
751  gradXY[dy][dx][2] += gradX[dx][2] * wy;
752  }
753  }
754  }
755  for (int dz = 0; dz < TR_D1D; ++dz)
756  {
757  const double wz = Bt(dz,qz);
758  const double wDz = Gt(dz,qz);
759  for (int dy = 0; dy < TR_D1D; ++dy)
760  {
761  for (int dx = 0; dx < TR_D1D; ++dx)
762  {
763  y(dx,dy,dz,c,e) +=
764  ((gradXY[dy][dx][0] * wz) +
765  (gradXY[dy][dx][1] * wz) +
766  (gradXY[dy][dx][2] * wDz));
767  }
768  }
769  }
770  }
771  }
772  // We've now calculated y = reshape(div u * op^T) * x
773  });
774 }
775 
776 // Shared memory PA Vector Divergence Apply 3D kernel
777 template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
778 static void SmemPADivergenceApply3D(const int NE,
779  const Array<double> &b_,
780  const Array<double> &g_,
781  const Array<double> &bt_,
782  const Vector &q_,
783  const Vector &x_,
784  Vector &y_,
785  const int tr_d1d = 0,
786  const int te_d1d = 0,
787  const int q1d = 0)
788 {
789  const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
790  const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
791  const int Q1D = T_Q1D ? T_Q1D : q1d;
792 
793  MFEM_VERIFY(TR_D1D <= MAX_D1D, "");
794  MFEM_VERIFY(TE_D1D <= MAX_D1D, "");
795  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
796 
797  auto b = Reshape(b_.Read(), Q1D, TR_D1D);
798  auto g = Reshape(g_.Read(), Q1D, TR_D1D);
799  auto bt = Reshape(bt_.Read(), TE_D1D, Q1D);
800  auto Q = Reshape(q_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
801  auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
802  auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
803 
804  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
805  {
806  constexpr int VDIM = 3;
807  const int tidz = MFEM_THREAD_ID(z);
808  const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
809  const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
810  const int Q1D = T_Q1D ? T_Q1D : q1d;
811  constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
812  constexpr int MD1R = T_TR_D1D ? T_TR_D1D : MAX_D1D;
813  constexpr int MD1E = T_TE_D1D ? T_TE_D1D : MAX_D1D;
814  constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
815  constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
816  MFEM_SHARED double sBG[2][MQ1*MD1];
817  double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
818  double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
819  double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
820  MFEM_SHARED double sm0[3][MDQ*MDQ*MDQ];
821  MFEM_SHARED double sm1[3][MDQ*MDQ*MDQ];
822  double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
823  double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
824  double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
825  double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
826  double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
827  double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
828  double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
829  double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
830  double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
831  double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
832  double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
833  MFEM_SHARED double div[MQ1][MQ1][MQ1];
834 
835  if (tidz == 0)
836  {
837  MFEM_FOREACH_THREAD(d,y,D1DR)
838  {
839  MFEM_FOREACH_THREAD(q,x,Q1D)
840  {
841  B[q][d] = b(q,d);
842  G[q][d] = g(q,d);
843  }
844  }
845  }
846  MFEM_SYNC_THREAD;
847  MFEM_FOREACH_THREAD(qz,z,Q1D)
848  {
849  MFEM_FOREACH_THREAD(qy,y,Q1D)
850  {
851  MFEM_FOREACH_THREAD(qx,x,Q1D)
852  {
853  div[qz][qy][qx] = 0.0;
854  }
855  }
856  }
857  MFEM_SYNC_THREAD;
858 
859  for (int c = 0; c < VDIM; ++c)
860  {
861  MFEM_FOREACH_THREAD(qz,z,Q1D)
862  {
863  MFEM_FOREACH_THREAD(qy,y,Q1D)
864  {
865  MFEM_FOREACH_THREAD(qx,x,Q1D)
866  {
867  QQQ0[qz][qy][qx] = 0.0;
868  QQQ1[qz][qy][qx] = 0.0;
869  QQQ2[qz][qy][qx] = 0.0;
870  }
871  }
872  }
873  MFEM_SYNC_THREAD;
874  MFEM_FOREACH_THREAD(dz,z,D1DR)
875  {
876  MFEM_FOREACH_THREAD(dy,y,D1DR)
877  {
878  MFEM_FOREACH_THREAD(dx,x,D1DR)
879  {
880  X[dz][dy][dx] = x(dx,dy,dz,c,e);
881  }
882  }
883  }
884  MFEM_SYNC_THREAD;
885  MFEM_FOREACH_THREAD(dz,z,D1DR)
886  {
887  MFEM_FOREACH_THREAD(dy,y,D1DR)
888  {
889  MFEM_FOREACH_THREAD(qx,x,Q1D)
890  {
891  double u = 0.0;
892  double v = 0.0;
893  for (int dx = 0; dx < D1DR; ++dx)
894  {
895  const double coord = X[dz][dy][dx];
896  u += coord * B[qx][dx];
897  v += coord * G[qx][dx];
898  }
899  DDQ0[dz][dy][qx] = u;
900  DDQ1[dz][dy][qx] = v;
901  }
902  }
903  }
904  MFEM_SYNC_THREAD;
905  MFEM_FOREACH_THREAD(dz,z,D1DR)
906  {
907  MFEM_FOREACH_THREAD(qy,y,Q1D)
908  {
909  MFEM_FOREACH_THREAD(qx,x,Q1D)
910  {
911  double u = 0.0;
912  double v = 0.0;
913  double w = 0.0;
914  for (int dy = 0; dy < D1DR; ++dy)
915  {
916  u += DDQ1[dz][dy][qx] * B[qy][dy];
917  v += DDQ0[dz][dy][qx] * G[qy][dy];
918  w += DDQ0[dz][dy][qx] * B[qy][dy];
919  }
920  DQQ0[dz][qy][qx] = u;
921  DQQ1[dz][qy][qx] = v;
922  DQQ2[dz][qy][qx] = w;
923  }
924  }
925  }
926  MFEM_SYNC_THREAD;
927  MFEM_FOREACH_THREAD(qz,z,Q1D)
928  {
929  MFEM_FOREACH_THREAD(qy,y,Q1D)
930  {
931  MFEM_FOREACH_THREAD(qx,x,Q1D)
932  {
933  double u = 0.0;
934  double v = 0.0;
935  double w = 0.0;
936  for (int dz = 0; dz < D1DR; ++dz)
937  {
938  u += DQQ0[dz][qy][qx] * B[qz][dz];
939  v += DQQ1[dz][qy][qx] * B[qz][dz];
940  w += DQQ2[dz][qy][qx] * G[qz][dz];
941  }
942  QQQ0[qz][qy][qx] = u;
943  QQQ1[qz][qy][qx] = v;
944  QQQ2[qz][qy][qx] = w;
945  }
946  }
947  }
948  MFEM_SYNC_THREAD;
949  MFEM_FOREACH_THREAD(qz,z,Q1D)
950  {
951  MFEM_FOREACH_THREAD(qy,y,Q1D)
952  {
953  MFEM_FOREACH_THREAD(qx,x,Q1D)
954  {
955  const int q = qx + (qy + qz * Q1D) * Q1D;
956  const double gX = QQQ0[qz][qy][qx];
957  const double gY = QQQ1[qz][qy][qx];
958  const double gZ = QQQ2[qz][qy][qx];
959  div[qz][qy][qx] += gX*Q(q,0,c,e) + gY*Q(q,1,c,e) + gZ*Q(q,2,c,e);
960  }
961  }
962  }
963  MFEM_SYNC_THREAD;
964  }
965 
966  if (tidz == 0)
967  {
968  MFEM_FOREACH_THREAD(d,y,D1DE)
969  {
970  MFEM_FOREACH_THREAD(q,x,Q1D)
971  {
972  Bt[d][q] = bt(d,q);
973  }
974  }
975  }
976  MFEM_SYNC_THREAD;
977 
978  MFEM_FOREACH_THREAD(qz,z,Q1D)
979  {
980  MFEM_FOREACH_THREAD(qy,y,Q1D)
981  {
982  MFEM_FOREACH_THREAD(dx,x,D1DE)
983  {
984  double u = 0.0;
985  for (int qx = 0; qx < Q1D; ++qx)
986  {
987  u += div[qz][qy][qx] * Bt[dx][qx];
988  }
989  QQD0[qz][qy][dx] = u;
990  }
991  }
992  }
993  MFEM_SYNC_THREAD;
994  MFEM_FOREACH_THREAD(qz,z,Q1D)
995  {
996  MFEM_FOREACH_THREAD(dy,y,D1DE)
997  {
998  MFEM_FOREACH_THREAD(dx,x,D1DE)
999  {
1000  double u = 0.0;
1001  for (int qy = 0; qy < Q1D; ++qy)
1002  {
1003  u += QQD0[qz][qy][dx] * Bt[dy][qy];
1004  }
1005  QDD0[qz][dy][dx] = u;
1006  }
1007  }
1008  }
1009  MFEM_SYNC_THREAD;
1010  MFEM_FOREACH_THREAD(dz,z,D1DE)
1011  {
1012  MFEM_FOREACH_THREAD(dy,y,D1DE)
1013  {
1014  MFEM_FOREACH_THREAD(dx,x,D1DE)
1015  {
1016  double u = 0.0;
1017  for (int qz = 0; qz < Q1D; ++qz)
1018  {
1019  u += QDD0[qz][dy][dx] * Bt[dz][qz];
1020  }
1021  y(dx,dy,dz,e) += u;
1022  }
1023  }
1024  }
1025  });
1026 }
1027 
1028 static void PADivergenceApply(const int dim,
1029  const int TR_D1D,
1030  const int TE_D1D,
1031  const int Q1D,
1032  const int NE,
1033  const Array<double> &B,
1034  const Array<double> &G,
1035  const Array<double> &Bt,
1036  const Vector &op,
1037  const Vector &x,
1038  Vector &y,
1039  bool transpose=false)
1040 {
1041  if (dim == 2)
1042  {
1043  return PADivergenceApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1044  }
1045  if (dim == 3)
1046  {
1047  return PADivergenceApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1048  }
1049  MFEM_ABORT("Unknown kernel.");
1050 }
1051 
1052 // PA Divergence Apply kernel
1053 void VectorDivergenceIntegrator::AddMultPA(const Vector &x, Vector &y) const
1054 {
1055  PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1056  trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y,
1057  false);
1058 }
1059 
1060 // PA Divergence Apply kernel
1061 void VectorDivergenceIntegrator::AddMultTransposePA(const Vector &x,
1062  Vector &y) const
1063 {
1064  PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1065  trial_maps->Bt, trial_maps->Gt, test_maps->B, pa_data, x, y,
1066  true);
1067 }
1068 
1069 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe_base.hpp:235
Ordering::Type GetOrdering() const
Return the ordering method.
Definition: fespace.hpp:599
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe_base.hpp:311
void trans(const Vector &u, Vector &x)
Definition: ex27.cpp:412
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
A coefficient that is constant across space and time.
Definition: coefficient.hpp:84
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe_base.hpp:170
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:614
const int MAX_Q1D
Definition: forall.hpp:29
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:441
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:96
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:366
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:465
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
const int MAX_D1D
Definition: forall.hpp:28
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2781
Vector data type.
Definition: vector.hpp:60
RefCoord s[3]
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:449
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131