MFEM  v4.4.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
nonlininteg_vectorconvection.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "nonlininteg.hpp"
14 #include "ceed/nlconvection.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 void VectorConvectionNLFIntegrator::AssemblePA(const FiniteElementSpace &fes)
21 {
22  MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES,
23  "PA Only supports Ordering::byNODES!");
24  Mesh *mesh = fes.GetMesh();
25  const FiniteElement &el = *fes.GetFE(0);
26  ElementTransformation &T = *mesh->GetElementTransformation(0);
27  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T);
28  if (DeviceCanUseCeed())
29  {
30  delete ceedOp;
31  ceedOp = new ceed::PAVectorConvectionNLFIntegrator(fes, *ir, Q);
32  return;
33  }
34  dim = mesh->Dimension();
35  ne = fes.GetMesh()->GetNE();
36  nq = ir->GetNPoints();
37  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
38  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
39  pa_data.SetSize(ne * nq * dim * dim, Device::GetMemoryType());
40  double COEFF = 1.0;
41  if (Q)
42  {
43  ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient *>(Q);
44  MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
45  COEFF = cQ->constant;
46  }
47  const int NE = ne;
48  const int NQ = nq;
49  auto W = ir->GetWeights().Read();
50  if (dim == 1)
51  {
52  MFEM_ABORT("dim==1 not supported!");
53  }
54  if (dim == 2)
55  {
56  auto J = Reshape(geom->J.Read(), NQ, 2, 2, NE);
57  auto G = Reshape(pa_data.Write(), NQ, 2, 2, NE);
58  MFEM_FORALL(e, NE,
59  {
60  for (int q = 0; q < NQ; ++q)
61  {
62  const double J11 = J(q, 0, 0, e);
63  const double J12 = J(q, 0, 1, e);
64  const double J21 = J(q, 1, 0, e);
65  const double J22 = J(q, 1, 1, e);
66  // Store wq * Q * adj(J)
67  G(q, 0, 0, e) = W[q] * COEFF * J22; // 1,1
68  G(q, 0, 1, e) = W[q] * COEFF * -J12; // 1,2
69  G(q, 1, 0, e) = W[q] * COEFF * -J21; // 2,1
70  G(q, 1, 1, e) = W[q] * COEFF * J11; // 2,2
71  }
72  });
73  }
74  if (dim == 3)
75  {
76  auto J = Reshape(geom->J.Read(), NQ, 3, 3, NE);
77  auto G = Reshape(pa_data.Write(), NQ, 3, 3, NE);
78  MFEM_FORALL(e, NE,
79  {
80  for (int q = 0; q < NQ; ++q)
81  {
82  const double J11 = J(q, 0, 0, e);
83  const double J21 = J(q, 1, 0, e);
84  const double J31 = J(q, 2, 0, e);
85  const double J12 = J(q, 0, 1, e);
86  const double J22 = J(q, 1, 1, e);
87  const double J32 = J(q, 2, 1, e);
88  const double J13 = J(q, 0, 2, e);
89  const double J23 = J(q, 1, 2, e);
90  const double J33 = J(q, 2, 2, e);
91  const double cw = W[q] * COEFF;
92  // adj(J)
93  const double A11 = (J22 * J33) - (J23 * J32);
94  const double A12 = (J32 * J13) - (J12 * J33);
95  const double A13 = (J12 * J23) - (J22 * J13);
96  const double A21 = (J31 * J23) - (J21 * J33);
97  const double A22 = (J11 * J33) - (J13 * J31);
98  const double A23 = (J21 * J13) - (J11 * J23);
99  const double A31 = (J21 * J32) - (J31 * J22);
100  const double A32 = (J31 * J12) - (J11 * J32);
101  const double A33 = (J11 * J22) - (J12 * J21);
102  // Store wq * Q * adj(J)
103  G(q, 0, 0, e) = cw * A11; // 1,1
104  G(q, 0, 1, e) = cw * A12; // 1,2
105  G(q, 0, 2, e) = cw * A13; // 1,3
106  G(q, 1, 0, e) = cw * A21; // 2,1
107  G(q, 1, 1, e) = cw * A22; // 2,2
108  G(q, 1, 2, e) = cw * A23; // 2,3
109  G(q, 2, 0, e) = cw * A31; // 3,1
110  G(q, 2, 1, e) = cw * A32; // 3,2
111  G(q, 2, 2, e) = cw * A33; // 3,3
112  }
113  });
114  }
115 }
116 
117 // PA Convection NL 2D kernel
118 template<int T_D1D = 0, int T_Q1D = 0>
119 static void PAConvectionNLApply2D(const int NE,
120  const Array<double> &b,
121  const Array<double> &g,
122  const Array<double> &bt,
123  const Vector &q_,
124  const Vector &x_,
125  Vector &y_,
126  const int d1d = 0,
127  const int q1d = 0)
128 {
129  const int D1D = T_D1D ? T_D1D : d1d;
130  const int Q1D = T_Q1D ? T_Q1D : q1d;
131  MFEM_VERIFY(D1D <= MAX_D1D, "");
132  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
133  auto B = Reshape(b.Read(), Q1D, D1D);
134  auto G = Reshape(g.Read(), Q1D, D1D);
135  auto Bt = Reshape(bt.Read(), D1D, Q1D);
136  auto Q = Reshape(q_.Read(), Q1D * Q1D, 2, 2, NE);
137  auto x = Reshape(x_.Read(), D1D, D1D, 2, NE);
138  auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NE);
139  MFEM_FORALL(e, NE,
140  {
141  const int D1D = T_D1D ? T_D1D : d1d;
142  const int Q1D = T_Q1D ? T_Q1D : q1d;
143  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
144  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
145 
146  double data[max_Q1D][max_Q1D][2];
147  double grad0[max_Q1D][max_Q1D][2];
148  double grad1[max_Q1D][max_Q1D][2];
149  double Z[max_Q1D][max_Q1D][2];
150  for (int qy = 0; qy < Q1D; ++qy)
151  {
152  for (int qx = 0; qx < Q1D; ++qx)
153  {
154  data[qy][qx][0] = 0.0;
155  data[qy][qx][1] = 0.0;
156  grad0[qy][qx][0] = 0.0;
157  grad0[qy][qx][1] = 0.0;
158  grad1[qy][qx][0] = 0.0;
159  grad1[qy][qx][1] = 0.0;
160  }
161  }
162  for (int dy = 0; dy < D1D; ++dy)
163  {
164  double dataX[max_Q1D][2];
165  double gradX0[max_Q1D][2];
166  double gradX1[max_Q1D][2];
167  for (int qx = 0; qx < Q1D; ++qx)
168  {
169  dataX[qx][0] = 0.0;
170  dataX[qx][1] = 0.0;
171  gradX0[qx][0] = 0.0;
172  gradX0[qx][1] = 0.0;
173  gradX1[qx][0] = 0.0;
174  gradX1[qx][1] = 0.0;
175  }
176  for (int dx = 0; dx < D1D; ++dx)
177  {
178  const double s0 = x(dx, dy, 0, e);
179  const double s1 = x(dx, dy, 1, e);
180  for (int qx = 0; qx < Q1D; ++qx)
181  {
182  const double Bx = B(qx, dx);
183  const double Gx = G(qx, dx);
184  dataX[qx][0] += s0 * Bx;
185  dataX[qx][1] += s1 * Bx;
186  gradX0[qx][0] += s0 * Gx;
187  gradX0[qx][1] += s0 * Bx;
188  gradX1[qx][0] += s1 * Gx;
189  gradX1[qx][1] += s1 * Bx;
190  }
191  }
192  for (int qy = 0; qy < Q1D; ++qy)
193  {
194  const double By = B(qy, dy);
195  const double Gy = G(qy, dy);
196  for (int qx = 0; qx < Q1D; ++qx)
197  {
198  data[qy][qx][0] += dataX[qx][0] * By;
199  data[qy][qx][1] += dataX[qx][1] * By;
200  grad0[qy][qx][0] += gradX0[qx][0] * By;
201  grad0[qy][qx][1] += gradX0[qx][1] * Gy;
202  grad1[qy][qx][0] += gradX1[qx][0] * By;
203  grad1[qy][qx][1] += gradX1[qx][1] * Gy;
204  }
205  }
206  }
207  for (int qy = 0; qy < Q1D; ++qy)
208  {
209  for (int qx = 0; qx < Q1D; ++qx)
210  {
211  const int q = qx + qy * Q1D;
212  const double u1 = data[qy][qx][0];
213  const double u2 = data[qy][qx][1];
214  const double grad00 = grad0[qy][qx][0];
215  const double grad01 = grad0[qy][qx][1];
216  const double grad10 = grad1[qy][qx][0];
217  const double grad11 = grad1[qy][qx][1];
218  const double Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
219  const double Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
220  const double Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
221  const double Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
222  Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
223  Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
224  }
225  }
226  for (int qy = 0; qy < Q1D; ++qy)
227  {
228  double Y[max_D1D][2];
229  for (int dx = 0; dx < D1D; ++dx)
230  {
231  Y[dx][0] = 0.0;
232  Y[dx][1] = 0.0;
233  for (int qx = 0; qx < Q1D; ++qx)
234  {
235  const double Btx = Bt(dx, qx);
236  Y[dx][0] += Btx * Z[qy][qx][0];
237  Y[dx][1] += Btx * Z[qy][qx][1];
238  }
239  }
240  for (int dy = 0; dy < D1D; ++dy)
241  {
242  for (int dx = 0; dx < D1D; ++dx)
243  {
244  const double Bty = Bt(dy, qy);
245  y(dx, dy, 0, e) += Bty * Y[dx][0];
246  y(dx, dy, 1, e) += Bty * Y[dx][1];
247  }
248  }
249  }
250  });
251 }
252 
253 // PA Convection NL 3D kernel
254 template<int T_D1D = 0, int T_Q1D = 0>
255 static void PAConvectionNLApply3D(const int NE,
256  const Array<double> &b,
257  const Array<double> &g,
258  const Array<double> &bt,
259  const Vector &q_,
260  const Vector &x_,
261  Vector &y_,
262  const int d1d = 0,
263  const int q1d = 0)
264 {
265  constexpr int VDIM = 3;
266  const int D1D = T_D1D ? T_D1D : d1d;
267  const int Q1D = T_Q1D ? T_Q1D : q1d;
268  MFEM_VERIFY(D1D <= MAX_D1D, "");
269  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
270 
271  auto B = Reshape(b.Read(), Q1D, D1D);
272  auto G = Reshape(g.Read(), Q1D, D1D);
273  auto Bt = Reshape(bt.Read(), D1D, Q1D);
274  auto Q = Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
275  auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
276  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
277 
278  MFEM_FORALL(e, NE,
279  {
280  constexpr int VDIM = 3;
281  const int D1D = T_D1D ? T_D1D : d1d;
282  const int Q1D = T_Q1D ? T_Q1D : q1d;
283  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
284  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
285 
286  double data[max_Q1D][max_Q1D][max_Q1D][VDIM];
287  double grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
288  double grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
289  double grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
290  double Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
291  for (int qz = 0; qz < Q1D; ++qz)
292  {
293  for (int qy = 0; qy < Q1D; ++qy)
294  {
295  for (int qx = 0; qx < Q1D; ++qx)
296  {
297  data[qz][qy][qx][0] = 0.0;
298  data[qz][qy][qx][1] = 0.0;
299  data[qz][qy][qx][2] = 0.0;
300 
301  grad0[qz][qy][qx][0] = 0.0;
302  grad0[qz][qy][qx][1] = 0.0;
303  grad0[qz][qy][qx][2] = 0.0;
304 
305  grad1[qz][qy][qx][0] = 0.0;
306  grad1[qz][qy][qx][1] = 0.0;
307  grad1[qz][qy][qx][2] = 0.0;
308 
309  grad2[qz][qy][qx][0] = 0.0;
310  grad2[qz][qy][qx][1] = 0.0;
311  grad2[qz][qy][qx][2] = 0.0;
312  }
313  }
314  }
315  for (int dz = 0; dz < D1D; ++dz)
316  {
317  double dataXY[max_Q1D][max_Q1D][VDIM];
318  double gradXY0[max_Q1D][max_Q1D][VDIM];
319  double gradXY1[max_Q1D][max_Q1D][VDIM];
320  double gradXY2[max_Q1D][max_Q1D][VDIM];
321  for (int qy = 0; qy < Q1D; ++qy)
322  {
323  for (int qx = 0; qx < Q1D; ++qx)
324  {
325  dataXY[qy][qx][0] = 0.0;
326  dataXY[qy][qx][1] = 0.0;
327  dataXY[qy][qx][2] = 0.0;
328 
329  gradXY0[qy][qx][0] = 0.0;
330  gradXY0[qy][qx][1] = 0.0;
331  gradXY0[qy][qx][2] = 0.0;
332 
333  gradXY1[qy][qx][0] = 0.0;
334  gradXY1[qy][qx][1] = 0.0;
335  gradXY1[qy][qx][2] = 0.0;
336 
337  gradXY2[qy][qx][0] = 0.0;
338  gradXY2[qy][qx][1] = 0.0;
339  gradXY2[qy][qx][2] = 0.0;
340  }
341  }
342  for (int dy = 0; dy < D1D; ++dy)
343  {
344  double dataX[max_Q1D][VDIM];
345  double gradX0[max_Q1D][VDIM];
346  double gradX1[max_Q1D][VDIM];
347  double gradX2[max_Q1D][VDIM];
348  for (int qx = 0; qx < Q1D; ++qx)
349  {
350  dataX[qx][0] = 0.0;
351  dataX[qx][1] = 0.0;
352  dataX[qx][2] = 0.0;
353 
354  gradX0[qx][0] = 0.0;
355  gradX0[qx][1] = 0.0;
356  gradX0[qx][2] = 0.0;
357 
358  gradX1[qx][0] = 0.0;
359  gradX1[qx][1] = 0.0;
360  gradX1[qx][2] = 0.0;
361 
362  gradX2[qx][0] = 0.0;
363  gradX2[qx][1] = 0.0;
364  gradX2[qx][2] = 0.0;
365  }
366  for (int dx = 0; dx < D1D; ++dx)
367  {
368  const double s0 = x(dx, dy, dz, 0, e);
369  const double s1 = x(dx, dy, dz, 1, e);
370  const double s2 = x(dx, dy, dz, 2, e);
371  for (int qx = 0; qx < Q1D; ++qx)
372  {
373  const double Bx = B(qx, dx);
374  const double Gx = G(qx, dx);
375 
376  dataX[qx][0] += s0 * Bx;
377  dataX[qx][1] += s1 * Bx;
378  dataX[qx][2] += s2 * Bx;
379 
380  gradX0[qx][0] += s0 * Gx;
381  gradX0[qx][1] += s0 * Bx;
382  gradX0[qx][2] += s0 * Bx;
383 
384  gradX1[qx][0] += s1 * Gx;
385  gradX1[qx][1] += s1 * Bx;
386  gradX1[qx][2] += s1 * Bx;
387 
388  gradX2[qx][0] += s2 * Gx;
389  gradX2[qx][1] += s2 * Bx;
390  gradX2[qx][2] += s2 * Bx;
391  }
392  }
393  for (int qy = 0; qy < Q1D; ++qy)
394  {
395  const double By = B(qy, dy);
396  const double Gy = G(qy, dy);
397  for (int qx = 0; qx < Q1D; ++qx)
398  {
399  dataXY[qy][qx][0] += dataX[qx][0] * By;
400  dataXY[qy][qx][1] += dataX[qx][1] * By;
401  dataXY[qy][qx][2] += dataX[qx][2] * By;
402 
403  gradXY0[qy][qx][0] += gradX0[qx][0] * By;
404  gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
405  gradXY0[qy][qx][2] += gradX0[qx][2] * By;
406 
407  gradXY1[qy][qx][0] += gradX1[qx][0] * By;
408  gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
409  gradXY1[qy][qx][2] += gradX1[qx][2] * By;
410 
411  gradXY2[qy][qx][0] += gradX2[qx][0] * By;
412  gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
413  gradXY2[qy][qx][2] += gradX2[qx][2] * By;
414  }
415  }
416  }
417  for (int qz = 0; qz < Q1D; ++qz)
418  {
419  const double Bz = B(qz, dz);
420  const double Gz = G(qz, dz);
421  for (int qy = 0; qy < Q1D; ++qy)
422  {
423  for (int qx = 0; qx < Q1D; ++qx)
424  {
425  data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
426  data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
427  data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
428 
429  grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
430  grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
431  grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
432 
433  grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
434  grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
435  grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
436 
437  grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
438  grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
439  grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
440  }
441  }
442  }
443  }
444  for (int qz = 0; qz < Q1D; ++qz)
445  {
446  for (int qy = 0; qy < Q1D; ++qy)
447  {
448  for (int qx = 0; qx < Q1D; ++qx)
449  {
450  const int q = qx + Q1D * (qy + qz * Q1D);
451 
452  const double u1 = data[qz][qy][qx][0];
453  const double u2 = data[qz][qy][qx][1];
454  const double u3 = data[qz][qy][qx][2];
455 
456  const double grad00 = grad0[qz][qy][qx][0];
457  const double grad01 = grad0[qz][qy][qx][1];
458  const double grad02 = grad0[qz][qy][qx][2];
459 
460  const double grad10 = grad1[qz][qy][qx][0];
461  const double grad11 = grad1[qz][qy][qx][1];
462  const double grad12 = grad1[qz][qy][qx][2];
463 
464  const double grad20 = grad2[qz][qy][qx][0];
465  const double grad21 = grad2[qz][qy][qx][1];
466  const double grad22 = grad2[qz][qy][qx][2];
467 
468  const double Dxu1 = grad00 * Q(q, 0, 0, e)
469  + grad01 * Q(q, 1, 0, e)
470  + grad02 * Q(q, 2, 0, e);
471  const double Dyu1 = grad00 * Q(q, 0, 1, e)
472  + grad01 * Q(q, 1, 1, e)
473  + grad02 * Q(q, 2, 1, e);
474  const double Dzu1 = grad00 * Q(q, 0, 2, e)
475  + grad01 * Q(q, 1, 2, e)
476  + grad02 * Q(q, 2, 2, e);
477 
478  const double Dxu2 = grad10 * Q(q, 0, 0, e)
479  + grad11 * Q(q, 1, 0, e)
480  + grad12 * Q(q, 2, 0, e);
481  const double Dyu2 = grad10 * Q(q, 0, 1, e)
482  + grad11 * Q(q, 1, 1, e)
483  + grad12 * Q(q, 2, 1, e);
484  const double Dzu2 = grad10 * Q(q, 0, 2, e)
485  + grad11 * Q(q, 1, 2, e)
486  + grad12 * Q(q, 2, 2, e);
487 
488  const double Dxu3 = grad20 * Q(q, 0, 0, e)
489  + grad21 * Q(q, 1, 0, e)
490  + grad22 * Q(q, 2, 0, e);
491  const double Dyu3 = grad20 * Q(q, 0, 1, e)
492  + grad21 * Q(q, 1, 1, e)
493  + grad22 * Q(q, 2, 1, e);
494  const double Dzu3 = grad20 * Q(q, 0, 2, e)
495  + grad21 * Q(q, 1, 2, e)
496  + grad22 * Q(q, 2, 2, e);
497 
498  Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
499  Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
500  Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
501  }
502  }
503  }
504  for (int qz = 0; qz < Q1D; ++qz)
505  {
506  double opXY[max_D1D][max_D1D][VDIM];
507  for (int dy = 0; dy < D1D; ++dy)
508  {
509  for (int dx = 0; dx < D1D; ++dx)
510  {
511  opXY[dy][dx][0] = 0.0;
512  opXY[dy][dx][1] = 0.0;
513  opXY[dy][dx][2] = 0.0;
514  }
515  }
516  for (int qy = 0; qy < Q1D; ++qy)
517  {
518  double opX[max_D1D][VDIM];
519  for (int dx = 0; dx < D1D; ++dx)
520  {
521  opX[dx][0] = 0.0;
522  opX[dx][1] = 0.0;
523  opX[dx][2] = 0.0;
524  for (int qx = 0; qx < Q1D; ++qx)
525  {
526  const double Btx = Bt(dx, qx);
527  opX[dx][0] += Btx * Z[qz][qy][qx][0];
528  opX[dx][1] += Btx * Z[qz][qy][qx][1];
529  opX[dx][2] += Btx * Z[qz][qy][qx][2];
530  }
531  }
532  for (int dy = 0; dy < D1D; ++dy)
533  {
534  for (int dx = 0; dx < D1D; ++dx)
535  {
536  const double Bty = Bt(dy, qy);
537  opXY[dy][dx][0] += Bty * opX[dx][0];
538  opXY[dy][dx][1] += Bty * opX[dx][1];
539  opXY[dy][dx][2] += Bty * opX[dx][2];
540  }
541  }
542  }
543  for (int dz = 0; dz < D1D; ++dz)
544  {
545  for (int dy = 0; dy < D1D; ++dy)
546  {
547  for (int dx = 0; dx < D1D; ++dx)
548  {
549  const double Btz = Bt(dz, qz);
550  y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
551  y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
552  y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
553  }
554  }
555  }
556  }
557  });
558 }
559 
560 template<int T_D1D = 0, int T_Q1D = 0, int T_MAX_D1D =0, int T_MAX_Q1D =0>
561 static void SmemPAConvectionNLApply3D(const int NE,
562  const Array<double> &b_,
563  const Array<double> &g_,
564  const Vector &d_,
565  const Vector &x_,
566  Vector &y_,
567  const int d1d = 0,
568  const int q1d = 0)
569 {
570  constexpr int VDIM = 3;
571  const int D1D = T_D1D ? T_D1D : d1d;
572  const int Q1D = T_Q1D ? T_Q1D : q1d;
573  constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
574  constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
575  MFEM_VERIFY(D1D <= MD1, "");
576  MFEM_VERIFY(Q1D <= MQ1, "");
577 
578  auto b = Reshape(b_.Read(), Q1D, D1D);
579  auto g = Reshape(g_.Read(), Q1D, D1D);
580  auto D = Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
581  auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
582  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
583 
584  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
585  {
586  const int tidz = MFEM_THREAD_ID(z);
587  const int D1D = T_D1D ? T_D1D : d1d;
588  const int Q1D = T_Q1D ? T_Q1D : q1d;
589  constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
590  constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
591  MFEM_SHARED double BG[2][MQ1 * MD1];
592  double(*B)[MD1] = (double(*)[MD1])(BG + 0);
593  double(*G)[MD1] = (double(*)[MD1])(BG + 1);
594  double(*Bt)[MQ1] = (double(*)[MQ1])(BG + 0);
595  MFEM_SHARED double U[2][MQ1][MQ1][MQ1];
596  MFEM_SHARED double sm0[3][MQ1 * MQ1 * MQ1];
597  MFEM_SHARED double sm1[3][MQ1 * MQ1 * MQ1];
598  double(*DDQ0)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 0);
599  double(*DDQ1)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 1);
600  double(*X)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 2);
601  double(*DQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 0);
602  double(*DQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 1);
603  double(*DQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 2);
604  double(*QQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 0);
605  double(*QQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 1);
606  double(*QQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 2);
607  double(*QQD0)[MQ1][MD1] = (double(*)[MQ1][MD1])(sm1 + 0);
608  double(*QDD0)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 0);
609  MFEM_SHARED double Z[MQ1][MQ1][MQ1];
610 
611  for (int cy = 0; cy < VDIM; ++cy)
612  {
613  if (tidz == 0)
614  {
615  MFEM_FOREACH_THREAD(q, x, Q1D)
616  {
617  MFEM_FOREACH_THREAD(d, y, D1D)
618  {
619  B[q][d] = b(q, d);
620  G[q][d] = g(q, d);
621  }
622  }
623  }
624  MFEM_FOREACH_THREAD(qz, z, Q1D)
625  {
626  MFEM_FOREACH_THREAD(qy, y, Q1D)
627  {
628  MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
629  }
630  }
631  MFEM_SYNC_THREAD;
632  for (int c = 0; c < VDIM; ++c)
633  {
634  MFEM_FOREACH_THREAD(dz, z, D1D)
635  {
636  MFEM_FOREACH_THREAD(dy, y, D1D)
637  {
638  MFEM_FOREACH_THREAD(dx, x, D1D)
639  {
640  X[dz][dy][dx] = x(dx, dy, dz, cy, e);
641  U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
642  }
643  }
644  }
645  MFEM_SYNC_THREAD;
646  MFEM_FOREACH_THREAD(dz, z, D1D)
647  {
648  MFEM_FOREACH_THREAD(dy, y, D1D)
649  {
650  MFEM_FOREACH_THREAD(qx, x, Q1D)
651  {
652  double u = 0.0;
653  double v = 0.0;
654  double z = 0.0;
655  for (int dx = 0; dx < D1D; ++dx)
656  {
657  const double coord = X[dz][dy][dx];
658  const double value = U[0][dz][dy][dx];
659  u += coord * B[qx][dx];
660  v += coord * G[qx][dx];
661  z += value * B[qx][dx];
662  }
663  DDQ0[dz][dy][qx] = u;
664  DDQ1[dz][dy][qx] = v;
665  U[1][dz][dy][qx] = z;
666  }
667  }
668  }
669  MFEM_SYNC_THREAD;
670  MFEM_FOREACH_THREAD(dz, z, D1D)
671  {
672  MFEM_FOREACH_THREAD(qy, y, Q1D)
673  {
674  MFEM_FOREACH_THREAD(qx, x, Q1D)
675  {
676  double u = 0.0;
677  double v = 0.0;
678  double w = 0.0;
679  double z = 0.0;
680  for (int dy = 0; dy < D1D; ++dy)
681  {
682  u += DDQ1[dz][dy][qx] * B[qy][dy];
683  v += DDQ0[dz][dy][qx] * G[qy][dy];
684  w += DDQ0[dz][dy][qx] * B[qy][dy];
685  z += U[1][dz][dy][qx] * B[qy][dy];
686  }
687  DQQ0[dz][qy][qx] = u;
688  DQQ1[dz][qy][qx] = v;
689  DQQ2[dz][qy][qx] = w;
690  U[0][dz][qy][qx] = z;
691  }
692  }
693  }
694  MFEM_SYNC_THREAD;
695  MFEM_FOREACH_THREAD(qz, z, Q1D)
696  {
697  MFEM_FOREACH_THREAD(qy, y, Q1D)
698  {
699  MFEM_FOREACH_THREAD(qx, x, Q1D)
700  {
701  double u = 0.0;
702  double v = 0.0;
703  double w = 0.0;
704  double z = 0.0;
705  for (int dz = 0; dz < D1D; ++dz)
706  {
707  u += DQQ0[dz][qy][qx] * B[qz][dz];
708  v += DQQ1[dz][qy][qx] * B[qz][dz];
709  w += DQQ2[dz][qy][qx] * G[qz][dz];
710  z += U[0][dz][qy][qx] * B[qz][dz];
711  }
712  QQQ0[qz][qy][qx] = u;
713  QQQ1[qz][qy][qx] = v;
714  QQQ2[qz][qy][qx] = w;
715  U[1][qz][qy][qx] = z;
716  }
717  }
718  }
719  MFEM_SYNC_THREAD;
720  MFEM_FOREACH_THREAD(qz, z, Q1D)
721  {
722  MFEM_FOREACH_THREAD(qy, y, Q1D)
723  {
724  MFEM_FOREACH_THREAD(qx, x, Q1D)
725  {
726  const int q = qx + (qy + qz * Q1D) * Q1D;
727  const double z = U[1][qz][qy][qx];
728  const double gX = QQQ0[qz][qy][qx];
729  const double gY = QQQ1[qz][qy][qx];
730  const double gZ = QQQ2[qz][qy][qx];
731  const double d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
732  + gZ * D(q, 2, c, e);
733  Z[qz][qy][qx] += z * d;
734  }
735  }
736  }
737  MFEM_SYNC_THREAD;
738  } // for each conv component
739  if (tidz == 0)
740  {
741  MFEM_FOREACH_THREAD(d, y, D1D)
742  {
743  MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] = b(q, d); }
744  }
745  }
746  MFEM_SYNC_THREAD;
747  MFEM_FOREACH_THREAD(qz, z, Q1D)
748  {
749  MFEM_FOREACH_THREAD(qy, y, Q1D)
750  {
751  MFEM_FOREACH_THREAD(dx, x, D1D)
752  {
753  double u = 0.0;
754  for (int qx = 0; qx < Q1D; ++qx)
755  {
756  u += Z[qz][qy][qx] * Bt[dx][qx];
757  }
758  QQD0[qz][qy][dx] = u;
759  }
760  }
761  }
762  MFEM_SYNC_THREAD;
763  MFEM_FOREACH_THREAD(qz, z, Q1D)
764  {
765  MFEM_FOREACH_THREAD(dy, y, D1D)
766  {
767  MFEM_FOREACH_THREAD(dx, x, D1D)
768  {
769  double u = 0.0;
770  for (int qy = 0; qy < Q1D; ++qy)
771  {
772  u += QQD0[qz][qy][dx] * Bt[dy][qy];
773  }
774  QDD0[qz][dy][dx] = u;
775  }
776  }
777  }
778  MFEM_SYNC_THREAD;
779  MFEM_FOREACH_THREAD(dz, z, D1D)
780  {
781  MFEM_FOREACH_THREAD(dy, y, D1D)
782  {
783  MFEM_FOREACH_THREAD(dx, x, D1D)
784  {
785  double u = 0.0;
786  for (int qz = 0; qz < Q1D; ++qz)
787  {
788  u += QDD0[qz][dy][dx] * Bt[dz][qz];
789  }
790  Y(dx, dy, dz, cy, e) += u;
791  }
792  }
793  }
794  MFEM_SYNC_THREAD;
795  }
796  });
797 }
798 
799 void VectorConvectionNLFIntegrator::AddMultPA(const Vector &x, Vector &y) const
800 {
801  if (DeviceCanUseCeed())
802  {
803  ceedOp->AddMult(x, y);
804  }
805  else
806  {
807  const int NE = ne;
808  const int D1D = maps->ndof;
809  const int Q1D = maps->nqpt;
810  const Vector &QV = pa_data;
811  const Array<double> &B = maps->B;
812  const Array<double> &G = maps->G;
813  const Array<double> &Bt = maps->Bt;
814  if (dim == 2)
815  {
816  return PAConvectionNLApply2D(NE, B, G, Bt, QV, x, y, D1D, Q1D);
817  }
818  if (dim == 3)
819  {
820  constexpr int T_MAX_D1D = 8;
821  constexpr int T_MAX_Q1D = 8;
822  MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D, "Not yet implemented!");
823  return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
824  (NE, B, G, QV, x, y, D1D, Q1D);
825  }
826  MFEM_ABORT("Not yet implemented!");
827  }
828 }
829 
830 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe_base.hpp:235
Ordering::Type GetOrdering() const
Return the ordering method.
Definition: fespace.hpp:575
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
A coefficient that is constant across space and time.
Definition: coefficient.hpp:78
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:928
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
const int MAX_Q1D
Definition: forall.hpp:29
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:433
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:88
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:366
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:454
int dim
Definition: ex24.cpp:53
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
const int MAX_D1D
Definition: forall.hpp:28
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2783
Vector data type.
Definition: vector.hpp:60
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:438