MFEM  v4.5.1
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
nonlininteg_vectorconvection.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "nonlininteg.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 void VectorConvectionNLFIntegrator::AssemblePA(const FiniteElementSpace &fes)
21 {
22  MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES,
23  "PA Only supports Ordering::byNODES!");
24  Mesh *mesh = fes.GetMesh();
25  const FiniteElement &el = *fes.GetFE(0);
26  ElementTransformation &T = *mesh->GetElementTransformation(0);
27  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T);
28  if (DeviceCanUseCeed())
29  {
30  delete ceedOp;
31  const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 ||
32  fes.IsVariableOrder();
33  if (mixed)
34  {
35  ceedOp = new ceed::MixedPAVectorConvectionNLIntegrator(*this, fes, Q);
36  }
37  else
38  {
39  ceedOp = new ceed::PAVectorConvectionNLFIntegrator(fes, *ir, Q);
40  }
41  return;
42  }
43  dim = mesh->Dimension();
44  ne = fes.GetMesh()->GetNE();
45  nq = ir->GetNPoints();
46  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
47  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
48  pa_data.SetSize(ne * nq * dim * dim, Device::GetMemoryType());
49  double COEFF = 1.0;
50  if (Q)
51  {
52  ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient *>(Q);
53  MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
54  COEFF = cQ->constant;
55  }
56  const int NE = ne;
57  const int NQ = nq;
58  auto W = ir->GetWeights().Read();
59  if (dim == 1)
60  {
61  MFEM_ABORT("dim==1 not supported!");
62  }
63  if (dim == 2)
64  {
65  auto J = Reshape(geom->J.Read(), NQ, 2, 2, NE);
66  auto G = Reshape(pa_data.Write(), NQ, 2, 2, NE);
67  MFEM_FORALL(e, NE,
68  {
69  for (int q = 0; q < NQ; ++q)
70  {
71  const double J11 = J(q, 0, 0, e);
72  const double J12 = J(q, 0, 1, e);
73  const double J21 = J(q, 1, 0, e);
74  const double J22 = J(q, 1, 1, e);
75  // Store wq * Q * adj(J)
76  G(q, 0, 0, e) = W[q] * COEFF * J22; // 1,1
77  G(q, 0, 1, e) = W[q] * COEFF * -J12; // 1,2
78  G(q, 1, 0, e) = W[q] * COEFF * -J21; // 2,1
79  G(q, 1, 1, e) = W[q] * COEFF * J11; // 2,2
80  }
81  });
82  }
83  if (dim == 3)
84  {
85  auto J = Reshape(geom->J.Read(), NQ, 3, 3, NE);
86  auto G = Reshape(pa_data.Write(), NQ, 3, 3, NE);
87  MFEM_FORALL(e, NE,
88  {
89  for (int q = 0; q < NQ; ++q)
90  {
91  const double J11 = J(q, 0, 0, e);
92  const double J21 = J(q, 1, 0, e);
93  const double J31 = J(q, 2, 0, e);
94  const double J12 = J(q, 0, 1, e);
95  const double J22 = J(q, 1, 1, e);
96  const double J32 = J(q, 2, 1, e);
97  const double J13 = J(q, 0, 2, e);
98  const double J23 = J(q, 1, 2, e);
99  const double J33 = J(q, 2, 2, e);
100  const double cw = W[q] * COEFF;
101  // adj(J)
102  const double A11 = (J22 * J33) - (J23 * J32);
103  const double A12 = (J32 * J13) - (J12 * J33);
104  const double A13 = (J12 * J23) - (J22 * J13);
105  const double A21 = (J31 * J23) - (J21 * J33);
106  const double A22 = (J11 * J33) - (J13 * J31);
107  const double A23 = (J21 * J13) - (J11 * J23);
108  const double A31 = (J21 * J32) - (J31 * J22);
109  const double A32 = (J31 * J12) - (J11 * J32);
110  const double A33 = (J11 * J22) - (J12 * J21);
111  // Store wq * Q * adj(J)
112  G(q, 0, 0, e) = cw * A11; // 1,1
113  G(q, 0, 1, e) = cw * A12; // 1,2
114  G(q, 0, 2, e) = cw * A13; // 1,3
115  G(q, 1, 0, e) = cw * A21; // 2,1
116  G(q, 1, 1, e) = cw * A22; // 2,2
117  G(q, 1, 2, e) = cw * A23; // 2,3
118  G(q, 2, 0, e) = cw * A31; // 3,1
119  G(q, 2, 1, e) = cw * A32; // 3,2
120  G(q, 2, 2, e) = cw * A33; // 3,3
121  }
122  });
123  }
124 }
125 
126 // PA Convection NL 2D kernel
127 template<int T_D1D = 0, int T_Q1D = 0>
128 static void PAConvectionNLApply2D(const int NE,
129  const Array<double> &b,
130  const Array<double> &g,
131  const Array<double> &bt,
132  const Vector &q_,
133  const Vector &x_,
134  Vector &y_,
135  const int d1d = 0,
136  const int q1d = 0)
137 {
138  const int D1D = T_D1D ? T_D1D : d1d;
139  const int Q1D = T_Q1D ? T_Q1D : q1d;
140  MFEM_VERIFY(D1D <= MAX_D1D, "");
141  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
142  auto B = Reshape(b.Read(), Q1D, D1D);
143  auto G = Reshape(g.Read(), Q1D, D1D);
144  auto Bt = Reshape(bt.Read(), D1D, Q1D);
145  auto Q = Reshape(q_.Read(), Q1D * Q1D, 2, 2, NE);
146  auto x = Reshape(x_.Read(), D1D, D1D, 2, NE);
147  auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NE);
148  MFEM_FORALL(e, NE,
149  {
150  const int D1D = T_D1D ? T_D1D : d1d;
151  const int Q1D = T_Q1D ? T_Q1D : q1d;
152  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
153  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
154 
155  double data[max_Q1D][max_Q1D][2];
156  double grad0[max_Q1D][max_Q1D][2];
157  double grad1[max_Q1D][max_Q1D][2];
158  double Z[max_Q1D][max_Q1D][2];
159  for (int qy = 0; qy < Q1D; ++qy)
160  {
161  for (int qx = 0; qx < Q1D; ++qx)
162  {
163  data[qy][qx][0] = 0.0;
164  data[qy][qx][1] = 0.0;
165  grad0[qy][qx][0] = 0.0;
166  grad0[qy][qx][1] = 0.0;
167  grad1[qy][qx][0] = 0.0;
168  grad1[qy][qx][1] = 0.0;
169  }
170  }
171  for (int dy = 0; dy < D1D; ++dy)
172  {
173  double dataX[max_Q1D][2];
174  double gradX0[max_Q1D][2];
175  double gradX1[max_Q1D][2];
176  for (int qx = 0; qx < Q1D; ++qx)
177  {
178  dataX[qx][0] = 0.0;
179  dataX[qx][1] = 0.0;
180  gradX0[qx][0] = 0.0;
181  gradX0[qx][1] = 0.0;
182  gradX1[qx][0] = 0.0;
183  gradX1[qx][1] = 0.0;
184  }
185  for (int dx = 0; dx < D1D; ++dx)
186  {
187  const double s0 = x(dx, dy, 0, e);
188  const double s1 = x(dx, dy, 1, e);
189  for (int qx = 0; qx < Q1D; ++qx)
190  {
191  const double Bx = B(qx, dx);
192  const double Gx = G(qx, dx);
193  dataX[qx][0] += s0 * Bx;
194  dataX[qx][1] += s1 * Bx;
195  gradX0[qx][0] += s0 * Gx;
196  gradX0[qx][1] += s0 * Bx;
197  gradX1[qx][0] += s1 * Gx;
198  gradX1[qx][1] += s1 * Bx;
199  }
200  }
201  for (int qy = 0; qy < Q1D; ++qy)
202  {
203  const double By = B(qy, dy);
204  const double Gy = G(qy, dy);
205  for (int qx = 0; qx < Q1D; ++qx)
206  {
207  data[qy][qx][0] += dataX[qx][0] * By;
208  data[qy][qx][1] += dataX[qx][1] * By;
209  grad0[qy][qx][0] += gradX0[qx][0] * By;
210  grad0[qy][qx][1] += gradX0[qx][1] * Gy;
211  grad1[qy][qx][0] += gradX1[qx][0] * By;
212  grad1[qy][qx][1] += gradX1[qx][1] * Gy;
213  }
214  }
215  }
216  for (int qy = 0; qy < Q1D; ++qy)
217  {
218  for (int qx = 0; qx < Q1D; ++qx)
219  {
220  const int q = qx + qy * Q1D;
221  const double u1 = data[qy][qx][0];
222  const double u2 = data[qy][qx][1];
223  const double grad00 = grad0[qy][qx][0];
224  const double grad01 = grad0[qy][qx][1];
225  const double grad10 = grad1[qy][qx][0];
226  const double grad11 = grad1[qy][qx][1];
227  const double Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
228  const double Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
229  const double Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
230  const double Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
231  Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
232  Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
233  }
234  }
235  for (int qy = 0; qy < Q1D; ++qy)
236  {
237  double Y[max_D1D][2];
238  for (int dx = 0; dx < D1D; ++dx)
239  {
240  Y[dx][0] = 0.0;
241  Y[dx][1] = 0.0;
242  for (int qx = 0; qx < Q1D; ++qx)
243  {
244  const double Btx = Bt(dx, qx);
245  Y[dx][0] += Btx * Z[qy][qx][0];
246  Y[dx][1] += Btx * Z[qy][qx][1];
247  }
248  }
249  for (int dy = 0; dy < D1D; ++dy)
250  {
251  for (int dx = 0; dx < D1D; ++dx)
252  {
253  const double Bty = Bt(dy, qy);
254  y(dx, dy, 0, e) += Bty * Y[dx][0];
255  y(dx, dy, 1, e) += Bty * Y[dx][1];
256  }
257  }
258  }
259  });
260 }
261 
262 // PA Convection NL 3D kernel
263 template<int T_D1D = 0, int T_Q1D = 0>
264 static void PAConvectionNLApply3D(const int NE,
265  const Array<double> &b,
266  const Array<double> &g,
267  const Array<double> &bt,
268  const Vector &q_,
269  const Vector &x_,
270  Vector &y_,
271  const int d1d = 0,
272  const int q1d = 0)
273 {
274  constexpr int VDIM = 3;
275  const int D1D = T_D1D ? T_D1D : d1d;
276  const int Q1D = T_Q1D ? T_Q1D : q1d;
277  MFEM_VERIFY(D1D <= MAX_D1D, "");
278  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
279 
280  auto B = Reshape(b.Read(), Q1D, D1D);
281  auto G = Reshape(g.Read(), Q1D, D1D);
282  auto Bt = Reshape(bt.Read(), D1D, Q1D);
283  auto Q = Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
284  auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
285  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
286 
287  MFEM_FORALL(e, NE,
288  {
289  constexpr int VDIM = 3;
290  const int D1D = T_D1D ? T_D1D : d1d;
291  const int Q1D = T_Q1D ? T_Q1D : q1d;
292  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
293  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
294 
295  double data[max_Q1D][max_Q1D][max_Q1D][VDIM];
296  double grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
297  double grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
298  double grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
299  double Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
300  for (int qz = 0; qz < Q1D; ++qz)
301  {
302  for (int qy = 0; qy < Q1D; ++qy)
303  {
304  for (int qx = 0; qx < Q1D; ++qx)
305  {
306  data[qz][qy][qx][0] = 0.0;
307  data[qz][qy][qx][1] = 0.0;
308  data[qz][qy][qx][2] = 0.0;
309 
310  grad0[qz][qy][qx][0] = 0.0;
311  grad0[qz][qy][qx][1] = 0.0;
312  grad0[qz][qy][qx][2] = 0.0;
313 
314  grad1[qz][qy][qx][0] = 0.0;
315  grad1[qz][qy][qx][1] = 0.0;
316  grad1[qz][qy][qx][2] = 0.0;
317 
318  grad2[qz][qy][qx][0] = 0.0;
319  grad2[qz][qy][qx][1] = 0.0;
320  grad2[qz][qy][qx][2] = 0.0;
321  }
322  }
323  }
324  for (int dz = 0; dz < D1D; ++dz)
325  {
326  double dataXY[max_Q1D][max_Q1D][VDIM];
327  double gradXY0[max_Q1D][max_Q1D][VDIM];
328  double gradXY1[max_Q1D][max_Q1D][VDIM];
329  double gradXY2[max_Q1D][max_Q1D][VDIM];
330  for (int qy = 0; qy < Q1D; ++qy)
331  {
332  for (int qx = 0; qx < Q1D; ++qx)
333  {
334  dataXY[qy][qx][0] = 0.0;
335  dataXY[qy][qx][1] = 0.0;
336  dataXY[qy][qx][2] = 0.0;
337 
338  gradXY0[qy][qx][0] = 0.0;
339  gradXY0[qy][qx][1] = 0.0;
340  gradXY0[qy][qx][2] = 0.0;
341 
342  gradXY1[qy][qx][0] = 0.0;
343  gradXY1[qy][qx][1] = 0.0;
344  gradXY1[qy][qx][2] = 0.0;
345 
346  gradXY2[qy][qx][0] = 0.0;
347  gradXY2[qy][qx][1] = 0.0;
348  gradXY2[qy][qx][2] = 0.0;
349  }
350  }
351  for (int dy = 0; dy < D1D; ++dy)
352  {
353  double dataX[max_Q1D][VDIM];
354  double gradX0[max_Q1D][VDIM];
355  double gradX1[max_Q1D][VDIM];
356  double gradX2[max_Q1D][VDIM];
357  for (int qx = 0; qx < Q1D; ++qx)
358  {
359  dataX[qx][0] = 0.0;
360  dataX[qx][1] = 0.0;
361  dataX[qx][2] = 0.0;
362 
363  gradX0[qx][0] = 0.0;
364  gradX0[qx][1] = 0.0;
365  gradX0[qx][2] = 0.0;
366 
367  gradX1[qx][0] = 0.0;
368  gradX1[qx][1] = 0.0;
369  gradX1[qx][2] = 0.0;
370 
371  gradX2[qx][0] = 0.0;
372  gradX2[qx][1] = 0.0;
373  gradX2[qx][2] = 0.0;
374  }
375  for (int dx = 0; dx < D1D; ++dx)
376  {
377  const double s0 = x(dx, dy, dz, 0, e);
378  const double s1 = x(dx, dy, dz, 1, e);
379  const double s2 = x(dx, dy, dz, 2, e);
380  for (int qx = 0; qx < Q1D; ++qx)
381  {
382  const double Bx = B(qx, dx);
383  const double Gx = G(qx, dx);
384 
385  dataX[qx][0] += s0 * Bx;
386  dataX[qx][1] += s1 * Bx;
387  dataX[qx][2] += s2 * Bx;
388 
389  gradX0[qx][0] += s0 * Gx;
390  gradX0[qx][1] += s0 * Bx;
391  gradX0[qx][2] += s0 * Bx;
392 
393  gradX1[qx][0] += s1 * Gx;
394  gradX1[qx][1] += s1 * Bx;
395  gradX1[qx][2] += s1 * Bx;
396 
397  gradX2[qx][0] += s2 * Gx;
398  gradX2[qx][1] += s2 * Bx;
399  gradX2[qx][2] += s2 * Bx;
400  }
401  }
402  for (int qy = 0; qy < Q1D; ++qy)
403  {
404  const double By = B(qy, dy);
405  const double Gy = G(qy, dy);
406  for (int qx = 0; qx < Q1D; ++qx)
407  {
408  dataXY[qy][qx][0] += dataX[qx][0] * By;
409  dataXY[qy][qx][1] += dataX[qx][1] * By;
410  dataXY[qy][qx][2] += dataX[qx][2] * By;
411 
412  gradXY0[qy][qx][0] += gradX0[qx][0] * By;
413  gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
414  gradXY0[qy][qx][2] += gradX0[qx][2] * By;
415 
416  gradXY1[qy][qx][0] += gradX1[qx][0] * By;
417  gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
418  gradXY1[qy][qx][2] += gradX1[qx][2] * By;
419 
420  gradXY2[qy][qx][0] += gradX2[qx][0] * By;
421  gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
422  gradXY2[qy][qx][2] += gradX2[qx][2] * By;
423  }
424  }
425  }
426  for (int qz = 0; qz < Q1D; ++qz)
427  {
428  const double Bz = B(qz, dz);
429  const double Gz = G(qz, dz);
430  for (int qy = 0; qy < Q1D; ++qy)
431  {
432  for (int qx = 0; qx < Q1D; ++qx)
433  {
434  data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
435  data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
436  data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
437 
438  grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
439  grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
440  grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
441 
442  grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
443  grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
444  grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
445 
446  grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
447  grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
448  grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
449  }
450  }
451  }
452  }
453  for (int qz = 0; qz < Q1D; ++qz)
454  {
455  for (int qy = 0; qy < Q1D; ++qy)
456  {
457  for (int qx = 0; qx < Q1D; ++qx)
458  {
459  const int q = qx + Q1D * (qy + qz * Q1D);
460 
461  const double u1 = data[qz][qy][qx][0];
462  const double u2 = data[qz][qy][qx][1];
463  const double u3 = data[qz][qy][qx][2];
464 
465  const double grad00 = grad0[qz][qy][qx][0];
466  const double grad01 = grad0[qz][qy][qx][1];
467  const double grad02 = grad0[qz][qy][qx][2];
468 
469  const double grad10 = grad1[qz][qy][qx][0];
470  const double grad11 = grad1[qz][qy][qx][1];
471  const double grad12 = grad1[qz][qy][qx][2];
472 
473  const double grad20 = grad2[qz][qy][qx][0];
474  const double grad21 = grad2[qz][qy][qx][1];
475  const double grad22 = grad2[qz][qy][qx][2];
476 
477  const double Dxu1 = grad00 * Q(q, 0, 0, e)
478  + grad01 * Q(q, 1, 0, e)
479  + grad02 * Q(q, 2, 0, e);
480  const double Dyu1 = grad00 * Q(q, 0, 1, e)
481  + grad01 * Q(q, 1, 1, e)
482  + grad02 * Q(q, 2, 1, e);
483  const double Dzu1 = grad00 * Q(q, 0, 2, e)
484  + grad01 * Q(q, 1, 2, e)
485  + grad02 * Q(q, 2, 2, e);
486 
487  const double Dxu2 = grad10 * Q(q, 0, 0, e)
488  + grad11 * Q(q, 1, 0, e)
489  + grad12 * Q(q, 2, 0, e);
490  const double Dyu2 = grad10 * Q(q, 0, 1, e)
491  + grad11 * Q(q, 1, 1, e)
492  + grad12 * Q(q, 2, 1, e);
493  const double Dzu2 = grad10 * Q(q, 0, 2, e)
494  + grad11 * Q(q, 1, 2, e)
495  + grad12 * Q(q, 2, 2, e);
496 
497  const double Dxu3 = grad20 * Q(q, 0, 0, e)
498  + grad21 * Q(q, 1, 0, e)
499  + grad22 * Q(q, 2, 0, e);
500  const double Dyu3 = grad20 * Q(q, 0, 1, e)
501  + grad21 * Q(q, 1, 1, e)
502  + grad22 * Q(q, 2, 1, e);
503  const double Dzu3 = grad20 * Q(q, 0, 2, e)
504  + grad21 * Q(q, 1, 2, e)
505  + grad22 * Q(q, 2, 2, e);
506 
507  Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
508  Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
509  Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
510  }
511  }
512  }
513  for (int qz = 0; qz < Q1D; ++qz)
514  {
515  double opXY[max_D1D][max_D1D][VDIM];
516  for (int dy = 0; dy < D1D; ++dy)
517  {
518  for (int dx = 0; dx < D1D; ++dx)
519  {
520  opXY[dy][dx][0] = 0.0;
521  opXY[dy][dx][1] = 0.0;
522  opXY[dy][dx][2] = 0.0;
523  }
524  }
525  for (int qy = 0; qy < Q1D; ++qy)
526  {
527  double opX[max_D1D][VDIM];
528  for (int dx = 0; dx < D1D; ++dx)
529  {
530  opX[dx][0] = 0.0;
531  opX[dx][1] = 0.0;
532  opX[dx][2] = 0.0;
533  for (int qx = 0; qx < Q1D; ++qx)
534  {
535  const double Btx = Bt(dx, qx);
536  opX[dx][0] += Btx * Z[qz][qy][qx][0];
537  opX[dx][1] += Btx * Z[qz][qy][qx][1];
538  opX[dx][2] += Btx * Z[qz][qy][qx][2];
539  }
540  }
541  for (int dy = 0; dy < D1D; ++dy)
542  {
543  for (int dx = 0; dx < D1D; ++dx)
544  {
545  const double Bty = Bt(dy, qy);
546  opXY[dy][dx][0] += Bty * opX[dx][0];
547  opXY[dy][dx][1] += Bty * opX[dx][1];
548  opXY[dy][dx][2] += Bty * opX[dx][2];
549  }
550  }
551  }
552  for (int dz = 0; dz < D1D; ++dz)
553  {
554  for (int dy = 0; dy < D1D; ++dy)
555  {
556  for (int dx = 0; dx < D1D; ++dx)
557  {
558  const double Btz = Bt(dz, qz);
559  y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
560  y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
561  y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
562  }
563  }
564  }
565  }
566  });
567 }
568 
569 template<int T_D1D = 0, int T_Q1D = 0, int T_MAX_D1D =0, int T_MAX_Q1D =0>
570 static void SmemPAConvectionNLApply3D(const int NE,
571  const Array<double> &b_,
572  const Array<double> &g_,
573  const Vector &d_,
574  const Vector &x_,
575  Vector &y_,
576  const int d1d = 0,
577  const int q1d = 0)
578 {
579  constexpr int VDIM = 3;
580  const int D1D = T_D1D ? T_D1D : d1d;
581  const int Q1D = T_Q1D ? T_Q1D : q1d;
582  constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
583  constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
584  MFEM_VERIFY(D1D <= MD1, "");
585  MFEM_VERIFY(Q1D <= MQ1, "");
586 
587  auto b = Reshape(b_.Read(), Q1D, D1D);
588  auto g = Reshape(g_.Read(), Q1D, D1D);
589  auto D = Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
590  auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
591  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
592 
593  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
594  {
595  const int tidz = MFEM_THREAD_ID(z);
596  const int D1D = T_D1D ? T_D1D : d1d;
597  const int Q1D = T_Q1D ? T_Q1D : q1d;
598  constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
599  constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
600  MFEM_SHARED double BG[2][MQ1 * MD1];
601  double(*B)[MD1] = (double(*)[MD1])(BG + 0);
602  double(*G)[MD1] = (double(*)[MD1])(BG + 1);
603  double(*Bt)[MQ1] = (double(*)[MQ1])(BG + 0);
604  MFEM_SHARED double U[2][MQ1][MQ1][MQ1];
605  MFEM_SHARED double sm0[3][MQ1 * MQ1 * MQ1];
606  MFEM_SHARED double sm1[3][MQ1 * MQ1 * MQ1];
607  double(*DDQ0)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 0);
608  double(*DDQ1)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 1);
609  double(*X)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 2);
610  double(*DQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 0);
611  double(*DQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 1);
612  double(*DQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 2);
613  double(*QQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 0);
614  double(*QQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 1);
615  double(*QQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 2);
616  double(*QQD0)[MQ1][MD1] = (double(*)[MQ1][MD1])(sm1 + 0);
617  double(*QDD0)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 0);
618  MFEM_SHARED double Z[MQ1][MQ1][MQ1];
619 
620  for (int cy = 0; cy < VDIM; ++cy)
621  {
622  if (tidz == 0)
623  {
624  MFEM_FOREACH_THREAD(q, x, Q1D)
625  {
626  MFEM_FOREACH_THREAD(d, y, D1D)
627  {
628  B[q][d] = b(q, d);
629  G[q][d] = g(q, d);
630  }
631  }
632  }
633  MFEM_FOREACH_THREAD(qz, z, Q1D)
634  {
635  MFEM_FOREACH_THREAD(qy, y, Q1D)
636  {
637  MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
638  }
639  }
640  MFEM_SYNC_THREAD;
641  for (int c = 0; c < VDIM; ++c)
642  {
643  MFEM_FOREACH_THREAD(dz, z, D1D)
644  {
645  MFEM_FOREACH_THREAD(dy, y, D1D)
646  {
647  MFEM_FOREACH_THREAD(dx, x, D1D)
648  {
649  X[dz][dy][dx] = x(dx, dy, dz, cy, e);
650  U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
651  }
652  }
653  }
654  MFEM_SYNC_THREAD;
655  MFEM_FOREACH_THREAD(dz, z, D1D)
656  {
657  MFEM_FOREACH_THREAD(dy, y, D1D)
658  {
659  MFEM_FOREACH_THREAD(qx, x, Q1D)
660  {
661  double u = 0.0;
662  double v = 0.0;
663  double z = 0.0;
664  for (int dx = 0; dx < D1D; ++dx)
665  {
666  const double coord = X[dz][dy][dx];
667  const double value = U[0][dz][dy][dx];
668  u += coord * B[qx][dx];
669  v += coord * G[qx][dx];
670  z += value * B[qx][dx];
671  }
672  DDQ0[dz][dy][qx] = u;
673  DDQ1[dz][dy][qx] = v;
674  U[1][dz][dy][qx] = z;
675  }
676  }
677  }
678  MFEM_SYNC_THREAD;
679  MFEM_FOREACH_THREAD(dz, z, D1D)
680  {
681  MFEM_FOREACH_THREAD(qy, y, Q1D)
682  {
683  MFEM_FOREACH_THREAD(qx, x, Q1D)
684  {
685  double u = 0.0;
686  double v = 0.0;
687  double w = 0.0;
688  double z = 0.0;
689  for (int dy = 0; dy < D1D; ++dy)
690  {
691  u += DDQ1[dz][dy][qx] * B[qy][dy];
692  v += DDQ0[dz][dy][qx] * G[qy][dy];
693  w += DDQ0[dz][dy][qx] * B[qy][dy];
694  z += U[1][dz][dy][qx] * B[qy][dy];
695  }
696  DQQ0[dz][qy][qx] = u;
697  DQQ1[dz][qy][qx] = v;
698  DQQ2[dz][qy][qx] = w;
699  U[0][dz][qy][qx] = z;
700  }
701  }
702  }
703  MFEM_SYNC_THREAD;
704  MFEM_FOREACH_THREAD(qz, z, Q1D)
705  {
706  MFEM_FOREACH_THREAD(qy, y, Q1D)
707  {
708  MFEM_FOREACH_THREAD(qx, x, Q1D)
709  {
710  double u = 0.0;
711  double v = 0.0;
712  double w = 0.0;
713  double z = 0.0;
714  for (int dz = 0; dz < D1D; ++dz)
715  {
716  u += DQQ0[dz][qy][qx] * B[qz][dz];
717  v += DQQ1[dz][qy][qx] * B[qz][dz];
718  w += DQQ2[dz][qy][qx] * G[qz][dz];
719  z += U[0][dz][qy][qx] * B[qz][dz];
720  }
721  QQQ0[qz][qy][qx] = u;
722  QQQ1[qz][qy][qx] = v;
723  QQQ2[qz][qy][qx] = w;
724  U[1][qz][qy][qx] = z;
725  }
726  }
727  }
728  MFEM_SYNC_THREAD;
729  MFEM_FOREACH_THREAD(qz, z, Q1D)
730  {
731  MFEM_FOREACH_THREAD(qy, y, Q1D)
732  {
733  MFEM_FOREACH_THREAD(qx, x, Q1D)
734  {
735  const int q = qx + (qy + qz * Q1D) * Q1D;
736  const double z = U[1][qz][qy][qx];
737  const double gX = QQQ0[qz][qy][qx];
738  const double gY = QQQ1[qz][qy][qx];
739  const double gZ = QQQ2[qz][qy][qx];
740  const double d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
741  + gZ * D(q, 2, c, e);
742  Z[qz][qy][qx] += z * d;
743  }
744  }
745  }
746  MFEM_SYNC_THREAD;
747  } // for each conv component
748  if (tidz == 0)
749  {
750  MFEM_FOREACH_THREAD(d, y, D1D)
751  {
752  MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] = b(q, d); }
753  }
754  }
755  MFEM_SYNC_THREAD;
756  MFEM_FOREACH_THREAD(qz, z, Q1D)
757  {
758  MFEM_FOREACH_THREAD(qy, y, Q1D)
759  {
760  MFEM_FOREACH_THREAD(dx, x, D1D)
761  {
762  double u = 0.0;
763  for (int qx = 0; qx < Q1D; ++qx)
764  {
765  u += Z[qz][qy][qx] * Bt[dx][qx];
766  }
767  QQD0[qz][qy][dx] = u;
768  }
769  }
770  }
771  MFEM_SYNC_THREAD;
772  MFEM_FOREACH_THREAD(qz, z, Q1D)
773  {
774  MFEM_FOREACH_THREAD(dy, y, D1D)
775  {
776  MFEM_FOREACH_THREAD(dx, x, D1D)
777  {
778  double u = 0.0;
779  for (int qy = 0; qy < Q1D; ++qy)
780  {
781  u += QQD0[qz][qy][dx] * Bt[dy][qy];
782  }
783  QDD0[qz][dy][dx] = u;
784  }
785  }
786  }
787  MFEM_SYNC_THREAD;
788  MFEM_FOREACH_THREAD(dz, z, D1D)
789  {
790  MFEM_FOREACH_THREAD(dy, y, D1D)
791  {
792  MFEM_FOREACH_THREAD(dx, x, D1D)
793  {
794  double u = 0.0;
795  for (int qz = 0; qz < Q1D; ++qz)
796  {
797  u += QDD0[qz][dy][dx] * Bt[dz][qz];
798  }
799  Y(dx, dy, dz, cy, e) += u;
800  }
801  }
802  }
803  MFEM_SYNC_THREAD;
804  }
805  });
806 }
807 
808 void VectorConvectionNLFIntegrator::AddMultPA(const Vector &x, Vector &y) const
809 {
810  if (DeviceCanUseCeed())
811  {
812  ceedOp->AddMult(x, y);
813  }
814  else
815  {
816  const int NE = ne;
817  const int D1D = maps->ndof;
818  const int Q1D = maps->nqpt;
819  const Vector &QV = pa_data;
820  const Array<double> &B = maps->B;
821  const Array<double> &G = maps->G;
822  const Array<double> &Bt = maps->Bt;
823  if (dim == 2)
824  {
825  return PAConvectionNLApply2D(NE, B, G, Bt, QV, x, y, D1D, Q1D);
826  }
827  if (dim == 3)
828  {
829  constexpr int T_MAX_D1D = 8;
830  constexpr int T_MAX_Q1D = 8;
831  MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D, "Not yet implemented!");
832  return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
833  (NE, B, G, QV, x, y, D1D, Q1D);
834  }
835  MFEM_ABORT("Not yet implemented!");
836  }
837 }
838 
839 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe_base.hpp:235
Ordering::Type GetOrdering() const
Return the ordering method.
Definition: fespace.hpp:599
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition: fespace.hpp:463
A coefficient that is constant across space and time.
Definition: coefficient.hpp:84
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:923
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
const int MAX_Q1D
Definition: forall.hpp:29
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:441
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:96
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
Definition: util.cpp:33
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:366
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:465
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
const int MAX_D1D
Definition: forall.hpp:28
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2781
Vector data type.
Definition: vector.hpp:60
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:449
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131