MFEM  v4.1.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
nonlininteg_vectorconvection.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "nonlininteg.hpp"
14 
15 using namespace std;
16 
17 namespace mfem
18 {
19 void VectorConvectionNLFIntegrator::AssemblePA(const FiniteElementSpace &fes)
20 {
21  MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES,
22  "PA Only supports Ordering::byNODES!");
23  Mesh *mesh = fes.GetMesh();
24  const FiniteElement &el = *fes.GetFE(0);
25  ElementTransformation &T = *mesh->GetElementTransformation(0);
26  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T);
27  dim = mesh->Dimension();
28  ne = fes.GetMesh()->GetNE();
29  nq = ir->GetNPoints();
30  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
31  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
32  pa_data.SetSize(ne * nq * dim * dim, Device::GetMemoryType());
33  double COEFF = 1.0;
34  if (Q)
35  {
36  ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient *>(Q);
37  MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
38  COEFF = cQ->constant;
39  }
40  const int NE = ne;
41  const int NQ = nq;
42  auto W = ir->GetWeights().Read();
43  if (dim == 1)
44  {
45  MFEM_ABORT("dim==1 not supported!");
46  }
47  if (dim == 2)
48  {
49  auto J = Reshape(geom->J.Read(), NQ, 2, 2, NE);
50  auto G = Reshape(pa_data.Write(), NQ, 2, 2, NE);
51  MFEM_FORALL(e, NE,
52  {
53  for (int q = 0; q < NQ; ++q)
54  {
55  const double J11 = J(q, 0, 0, e);
56  const double J12 = J(q, 0, 1, e);
57  const double J21 = J(q, 1, 0, e);
58  const double J22 = J(q, 1, 1, e);
59  // Store wq * Q * adj(J)
60  G(q, 0, 0, e) = W[q] * COEFF * J22; // 1,1
61  G(q, 0, 1, e) = W[q] * COEFF * -J12; // 1,2
62  G(q, 1, 0, e) = W[q] * COEFF * -J21; // 2,1
63  G(q, 1, 1, e) = W[q] * COEFF * J11; // 2,2
64  }
65  });
66  }
67  if (dim == 3)
68  {
69  auto J = Reshape(geom->J.Read(), NQ, 3, 3, NE);
70  auto G = Reshape(pa_data.Write(), NQ, 3, 3, NE);
71  MFEM_FORALL(e, NE,
72  {
73  for (int q = 0; q < NQ; ++q)
74  {
75  const double J11 = J(q, 0, 0, e);
76  const double J21 = J(q, 1, 0, e);
77  const double J31 = J(q, 2, 0, e);
78  const double J12 = J(q, 0, 1, e);
79  const double J22 = J(q, 1, 1, e);
80  const double J32 = J(q, 2, 1, e);
81  const double J13 = J(q, 0, 2, e);
82  const double J23 = J(q, 1, 2, e);
83  const double J33 = J(q, 2, 2, e);
84  const double cw = W[q] * COEFF;
85  // adj(J)
86  const double A11 = (J22 * J33) - (J23 * J32);
87  const double A12 = (J32 * J13) - (J12 * J33);
88  const double A13 = (J12 * J23) - (J22 * J13);
89  const double A21 = (J31 * J23) - (J21 * J33);
90  const double A22 = (J11 * J33) - (J13 * J31);
91  const double A23 = (J21 * J13) - (J11 * J23);
92  const double A31 = (J21 * J32) - (J31 * J22);
93  const double A32 = (J31 * J12) - (J11 * J32);
94  const double A33 = (J11 * J22) - (J12 * J21);
95  // Store wq * Q * adj(J)
96  G(q, 0, 0, e) = cw * A11; // 1,1
97  G(q, 0, 1, e) = cw * A12; // 1,2
98  G(q, 0, 2, e) = cw * A13; // 1,3
99  G(q, 1, 0, e) = cw * A21; // 2,1
100  G(q, 1, 1, e) = cw * A22; // 2,2
101  G(q, 1, 2, e) = cw * A23; // 2,3
102  G(q, 2, 0, e) = cw * A31; // 3,1
103  G(q, 2, 1, e) = cw * A32; // 3,2
104  G(q, 2, 2, e) = cw * A33; // 3,3
105  }
106  });
107  }
108 }
109 
110 // PA Convection NL 2D kernel
111 template<int T_D1D = 0, int T_Q1D = 0>
112 static void PAConvectionNLApply2D(const int NE,
113  const Array<double> &b,
114  const Array<double> &g,
115  const Array<double> &bt,
116  const Vector &q_,
117  const Vector &x_,
118  Vector &y_,
119  const int d1d = 0,
120  const int q1d = 0)
121 {
122  const int D1D = T_D1D ? T_D1D : d1d;
123  const int Q1D = T_Q1D ? T_Q1D : q1d;
124  MFEM_VERIFY(D1D <= MAX_D1D, "");
125  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
126  auto B = Reshape(b.Read(), Q1D, D1D);
127  auto G = Reshape(g.Read(), Q1D, D1D);
128  auto Bt = Reshape(bt.Read(), D1D, Q1D);
129  auto Q = Reshape(q_.Read(), Q1D * Q1D, 2, 2, NE);
130  auto x = Reshape(x_.Read(), D1D, D1D, 2, NE);
131  auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NE);
132  MFEM_FORALL(e, NE,
133  {
134  const int D1D = T_D1D ? T_D1D : d1d;
135  const int Q1D = T_Q1D ? T_Q1D : q1d;
136  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
137  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
138 
139  double data[max_Q1D][max_Q1D][2];
140  double grad0[max_Q1D][max_Q1D][2];
141  double grad1[max_Q1D][max_Q1D][2];
142  double Z[max_Q1D][max_Q1D][2];
143  for (int qy = 0; qy < Q1D; ++qy)
144  {
145  for (int qx = 0; qx < Q1D; ++qx)
146  {
147  data[qy][qx][0] = 0.0;
148  data[qy][qx][1] = 0.0;
149  grad0[qy][qx][0] = 0.0;
150  grad0[qy][qx][1] = 0.0;
151  grad1[qy][qx][0] = 0.0;
152  grad1[qy][qx][1] = 0.0;
153  }
154  }
155  for (int dy = 0; dy < D1D; ++dy)
156  {
157  double dataX[max_Q1D][2];
158  double gradX0[max_Q1D][2];
159  double gradX1[max_Q1D][2];
160  for (int qx = 0; qx < Q1D; ++qx)
161  {
162  dataX[qx][0] = 0.0;
163  dataX[qx][1] = 0.0;
164  gradX0[qx][0] = 0.0;
165  gradX0[qx][1] = 0.0;
166  gradX1[qx][0] = 0.0;
167  gradX1[qx][1] = 0.0;
168  }
169  for (int dx = 0; dx < D1D; ++dx)
170  {
171  const double s0 = x(dx, dy, 0, e);
172  const double s1 = x(dx, dy, 1, e);
173  for (int qx = 0; qx < Q1D; ++qx)
174  {
175  const double Bx = B(qx, dx);
176  const double Gx = G(qx, dx);
177  dataX[qx][0] += s0 * Bx;
178  dataX[qx][1] += s1 * Bx;
179  gradX0[qx][0] += s0 * Gx;
180  gradX0[qx][1] += s0 * Bx;
181  gradX1[qx][0] += s1 * Gx;
182  gradX1[qx][1] += s1 * Bx;
183  }
184  }
185  for (int qy = 0; qy < Q1D; ++qy)
186  {
187  const double By = B(qy, dy);
188  const double Gy = G(qy, dy);
189  for (int qx = 0; qx < Q1D; ++qx)
190  {
191  data[qy][qx][0] += dataX[qx][0] * By;
192  data[qy][qx][1] += dataX[qx][1] * By;
193  grad0[qy][qx][0] += gradX0[qx][0] * By;
194  grad0[qy][qx][1] += gradX0[qx][1] * Gy;
195  grad1[qy][qx][0] += gradX1[qx][0] * By;
196  grad1[qy][qx][1] += gradX1[qx][1] * Gy;
197  }
198  }
199  }
200  for (int qy = 0; qy < Q1D; ++qy)
201  {
202  for (int qx = 0; qx < Q1D; ++qx)
203  {
204  const int q = qx + qy * Q1D;
205  const double u1 = data[qy][qx][0];
206  const double u2 = data[qy][qx][1];
207  const double grad00 = grad0[qy][qx][0];
208  const double grad01 = grad0[qy][qx][1];
209  const double grad10 = grad1[qy][qx][0];
210  const double grad11 = grad1[qy][qx][1];
211  const double Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
212  const double Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
213  const double Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
214  const double Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
215  Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
216  Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
217  }
218  }
219  for (int qy = 0; qy < Q1D; ++qy)
220  {
221  double Y[max_D1D][2];
222  for (int dx = 0; dx < D1D; ++dx)
223  {
224  Y[dx][0] = 0.0;
225  Y[dx][1] = 0.0;
226  for (int qx = 0; qx < Q1D; ++qx)
227  {
228  const double Btx = Bt(dx, qx);
229  Y[dx][0] += Btx * Z[qy][qx][0];
230  Y[dx][1] += Btx * Z[qy][qx][1];
231  }
232  }
233  for (int dy = 0; dy < D1D; ++dy)
234  {
235  for (int dx = 0; dx < D1D; ++dx)
236  {
237  const double Bty = Bt(dy, qy);
238  y(dx, dy, 0, e) += Bty * Y[dx][0];
239  y(dx, dy, 1, e) += Bty * Y[dx][1];
240  }
241  }
242  }
243  });
244 }
245 
246 // PA Convection NL 3D kernel
247 template<int T_D1D = 0, int T_Q1D = 0>
248 static void PAConvectionNLApply3D(const int NE,
249  const Array<double> &b,
250  const Array<double> &g,
251  const Array<double> &bt,
252  const Vector &q_,
253  const Vector &x_,
254  Vector &y_,
255  const int d1d = 0,
256  const int q1d = 0)
257 {
258  constexpr int VDIM = 3;
259  const int D1D = T_D1D ? T_D1D : d1d;
260  const int Q1D = T_Q1D ? T_Q1D : q1d;
261  MFEM_VERIFY(D1D <= MAX_D1D, "");
262  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
263 
264  auto B = Reshape(b.Read(), Q1D, D1D);
265  auto G = Reshape(g.Read(), Q1D, D1D);
266  auto Bt = Reshape(bt.Read(), D1D, Q1D);
267  auto Q = Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
268  auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
269  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
270 
271  MFEM_FORALL(e, NE,
272  {
273  constexpr int VDIM = 3;
274  const int D1D = T_D1D ? T_D1D : d1d;
275  const int Q1D = T_Q1D ? T_Q1D : q1d;
276  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
277  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
278 
279  double data[max_Q1D][max_Q1D][max_Q1D][VDIM];
280  double grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
281  double grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
282  double grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
283  double Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
284  for (int qz = 0; qz < Q1D; ++qz)
285  {
286  for (int qy = 0; qy < Q1D; ++qy)
287  {
288  for (int qx = 0; qx < Q1D; ++qx)
289  {
290  data[qz][qy][qx][0] = 0.0;
291  data[qz][qy][qx][1] = 0.0;
292  data[qz][qy][qx][2] = 0.0;
293 
294  grad0[qz][qy][qx][0] = 0.0;
295  grad0[qz][qy][qx][1] = 0.0;
296  grad0[qz][qy][qx][2] = 0.0;
297 
298  grad1[qz][qy][qx][0] = 0.0;
299  grad1[qz][qy][qx][1] = 0.0;
300  grad1[qz][qy][qx][2] = 0.0;
301 
302  grad2[qz][qy][qx][0] = 0.0;
303  grad2[qz][qy][qx][1] = 0.0;
304  grad2[qz][qy][qx][2] = 0.0;
305  }
306  }
307  }
308  for (int dz = 0; dz < D1D; ++dz)
309  {
310  double dataXY[max_Q1D][max_Q1D][VDIM];
311  double gradXY0[max_Q1D][max_Q1D][VDIM];
312  double gradXY1[max_Q1D][max_Q1D][VDIM];
313  double gradXY2[max_Q1D][max_Q1D][VDIM];
314  for (int qy = 0; qy < Q1D; ++qy)
315  {
316  for (int qx = 0; qx < Q1D; ++qx)
317  {
318  dataXY[qy][qx][0] = 0.0;
319  dataXY[qy][qx][1] = 0.0;
320  dataXY[qy][qx][2] = 0.0;
321 
322  gradXY0[qy][qx][0] = 0.0;
323  gradXY0[qy][qx][1] = 0.0;
324  gradXY0[qy][qx][2] = 0.0;
325 
326  gradXY1[qy][qx][0] = 0.0;
327  gradXY1[qy][qx][1] = 0.0;
328  gradXY1[qy][qx][2] = 0.0;
329 
330  gradXY2[qy][qx][0] = 0.0;
331  gradXY2[qy][qx][1] = 0.0;
332  gradXY2[qy][qx][2] = 0.0;
333  }
334  }
335  for (int dy = 0; dy < D1D; ++dy)
336  {
337  double dataX[max_Q1D][VDIM];
338  double gradX0[max_Q1D][VDIM];
339  double gradX1[max_Q1D][VDIM];
340  double gradX2[max_Q1D][VDIM];
341  for (int qx = 0; qx < Q1D; ++qx)
342  {
343  dataX[qx][0] = 0.0;
344  dataX[qx][1] = 0.0;
345  dataX[qx][2] = 0.0;
346 
347  gradX0[qx][0] = 0.0;
348  gradX0[qx][1] = 0.0;
349  gradX0[qx][2] = 0.0;
350 
351  gradX1[qx][0] = 0.0;
352  gradX1[qx][1] = 0.0;
353  gradX1[qx][2] = 0.0;
354 
355  gradX2[qx][0] = 0.0;
356  gradX2[qx][1] = 0.0;
357  gradX2[qx][2] = 0.0;
358  }
359  for (int dx = 0; dx < D1D; ++dx)
360  {
361  const double s0 = x(dx, dy, dz, 0, e);
362  const double s1 = x(dx, dy, dz, 1, e);
363  const double s2 = x(dx, dy, dz, 2, e);
364  for (int qx = 0; qx < Q1D; ++qx)
365  {
366  const double Bx = B(qx, dx);
367  const double Gx = G(qx, dx);
368 
369  dataX[qx][0] += s0 * Bx;
370  dataX[qx][1] += s1 * Bx;
371  dataX[qx][2] += s2 * Bx;
372 
373  gradX0[qx][0] += s0 * Gx;
374  gradX0[qx][1] += s0 * Bx;
375  gradX0[qx][2] += s0 * Bx;
376 
377  gradX1[qx][0] += s1 * Gx;
378  gradX1[qx][1] += s1 * Bx;
379  gradX1[qx][2] += s1 * Bx;
380 
381  gradX2[qx][0] += s2 * Gx;
382  gradX2[qx][1] += s2 * Bx;
383  gradX2[qx][2] += s2 * Bx;
384  }
385  }
386  for (int qy = 0; qy < Q1D; ++qy)
387  {
388  const double By = B(qy, dy);
389  const double Gy = G(qy, dy);
390  for (int qx = 0; qx < Q1D; ++qx)
391  {
392  dataXY[qy][qx][0] += dataX[qx][0] * By;
393  dataXY[qy][qx][1] += dataX[qx][1] * By;
394  dataXY[qy][qx][2] += dataX[qx][2] * By;
395 
396  gradXY0[qy][qx][0] += gradX0[qx][0] * By;
397  gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
398  gradXY0[qy][qx][2] += gradX0[qx][2] * By;
399 
400  gradXY1[qy][qx][0] += gradX1[qx][0] * By;
401  gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
402  gradXY1[qy][qx][2] += gradX1[qx][2] * By;
403 
404  gradXY2[qy][qx][0] += gradX2[qx][0] * By;
405  gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
406  gradXY2[qy][qx][2] += gradX2[qx][2] * By;
407  }
408  }
409  }
410  for (int qz = 0; qz < Q1D; ++qz)
411  {
412  const double Bz = B(qz, dz);
413  const double Gz = G(qz, dz);
414  for (int qy = 0; qy < Q1D; ++qy)
415  {
416  for (int qx = 0; qx < Q1D; ++qx)
417  {
418  data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
419  data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
420  data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
421 
422  grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
423  grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
424  grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
425 
426  grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
427  grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
428  grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
429 
430  grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
431  grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
432  grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
433  }
434  }
435  }
436  }
437  for (int qz = 0; qz < Q1D; ++qz)
438  {
439  for (int qy = 0; qy < Q1D; ++qy)
440  {
441  for (int qx = 0; qx < Q1D; ++qx)
442  {
443  const int q = qx + Q1D * (qy + qz * Q1D);
444 
445  const double u1 = data[qz][qy][qx][0];
446  const double u2 = data[qz][qy][qx][1];
447  const double u3 = data[qz][qy][qx][2];
448 
449  const double grad00 = grad0[qz][qy][qx][0];
450  const double grad01 = grad0[qz][qy][qx][1];
451  const double grad02 = grad0[qz][qy][qx][2];
452 
453  const double grad10 = grad1[qz][qy][qx][0];
454  const double grad11 = grad1[qz][qy][qx][1];
455  const double grad12 = grad1[qz][qy][qx][2];
456 
457  const double grad20 = grad2[qz][qy][qx][0];
458  const double grad21 = grad2[qz][qy][qx][1];
459  const double grad22 = grad2[qz][qy][qx][2];
460 
461  const double Dxu1 = grad00 * Q(q, 0, 0, e)
462  + grad01 * Q(q, 1, 0, e)
463  + grad02 * Q(q, 2, 0, e);
464  const double Dyu1 = grad00 * Q(q, 0, 1, e)
465  + grad01 * Q(q, 1, 1, e)
466  + grad02 * Q(q, 2, 1, e);
467  const double Dzu1 = grad00 * Q(q, 0, 2, e)
468  + grad01 * Q(q, 1, 2, e)
469  + grad02 * Q(q, 2, 2, e);
470 
471  const double Dxu2 = grad10 * Q(q, 0, 0, e)
472  + grad11 * Q(q, 1, 0, e)
473  + grad12 * Q(q, 2, 0, e);
474  const double Dyu2 = grad10 * Q(q, 0, 1, e)
475  + grad11 * Q(q, 1, 1, e)
476  + grad12 * Q(q, 2, 1, e);
477  const double Dzu2 = grad10 * Q(q, 0, 2, e)
478  + grad11 * Q(q, 1, 2, e)
479  + grad12 * Q(q, 2, 2, e);
480 
481  const double Dxu3 = grad20 * Q(q, 0, 0, e)
482  + grad21 * Q(q, 1, 0, e)
483  + grad22 * Q(q, 2, 0, e);
484  const double Dyu3 = grad20 * Q(q, 0, 1, e)
485  + grad21 * Q(q, 1, 1, e)
486  + grad22 * Q(q, 2, 1, e);
487  const double Dzu3 = grad20 * Q(q, 0, 2, e)
488  + grad21 * Q(q, 1, 2, e)
489  + grad22 * Q(q, 2, 2, e);
490 
491  Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
492  Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
493  Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
494  }
495  }
496  }
497  for (int qz = 0; qz < Q1D; ++qz)
498  {
499  double opXY[max_D1D][max_D1D][VDIM];
500  for (int dy = 0; dy < D1D; ++dy)
501  {
502  for (int dx = 0; dx < D1D; ++dx)
503  {
504  opXY[dy][dx][0] = 0.0;
505  opXY[dy][dx][1] = 0.0;
506  opXY[dy][dx][2] = 0.0;
507  }
508  }
509  for (int qy = 0; qy < Q1D; ++qy)
510  {
511  double opX[max_D1D][VDIM];
512  for (int dx = 0; dx < D1D; ++dx)
513  {
514  opX[dx][0] = 0.0;
515  opX[dx][1] = 0.0;
516  opX[dx][2] = 0.0;
517  for (int qx = 0; qx < Q1D; ++qx)
518  {
519  const double Btx = Bt(dx, qx);
520  opX[dx][0] += Btx * Z[qz][qy][qx][0];
521  opX[dx][1] += Btx * Z[qz][qy][qx][1];
522  opX[dx][2] += Btx * Z[qz][qy][qx][2];
523  }
524  }
525  for (int dy = 0; dy < D1D; ++dy)
526  {
527  for (int dx = 0; dx < D1D; ++dx)
528  {
529  const double Bty = Bt(dy, qy);
530  opXY[dy][dx][0] += Bty * opX[dx][0];
531  opXY[dy][dx][1] += Bty * opX[dx][1];
532  opXY[dy][dx][2] += Bty * opX[dx][2];
533  }
534  }
535  }
536  for (int dz = 0; dz < D1D; ++dz)
537  {
538  for (int dy = 0; dy < D1D; ++dy)
539  {
540  for (int dx = 0; dx < D1D; ++dx)
541  {
542  const double Btz = Bt(dz, qz);
543  y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
544  y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
545  y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
546  }
547  }
548  }
549  }
550  });
551 }
552 
553 template<int T_D1D = 0, int T_Q1D = 0, int T_MAX_D1D =0, int T_MAX_Q1D =0>
554 static void SmemPAConvectionNLApply3D(const int NE,
555  const Array<double> &b_,
556  const Array<double> &g_,
557  const Vector &d_,
558  const Vector &x_,
559  Vector &y_,
560  const int d1d = 0,
561  const int q1d = 0)
562 {
563  constexpr int VDIM = 3;
564  const int D1D = T_D1D ? T_D1D : d1d;
565  const int Q1D = T_Q1D ? T_Q1D : q1d;
566  constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
567  constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
568  MFEM_VERIFY(D1D <= MD1, "");
569  MFEM_VERIFY(Q1D <= MQ1, "");
570 
571  auto b = Reshape(b_.Read(), Q1D, D1D);
572  auto g = Reshape(g_.Read(), Q1D, D1D);
573  auto D = Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
574  auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
575  auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
576 
577  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
578  {
579  const int tidz = MFEM_THREAD_ID(z);
580  const int D1D = T_D1D ? T_D1D : d1d;
581  const int Q1D = T_Q1D ? T_Q1D : q1d;
582  constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
583  constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
584  MFEM_SHARED double BG[2][MQ1 * MD1];
585  double(*B)[MD1] = (double(*)[MD1])(BG + 0);
586  double(*G)[MD1] = (double(*)[MD1])(BG + 1);
587  double(*Bt)[MQ1] = (double(*)[MQ1])(BG + 0);
588  MFEM_SHARED double U[2][MQ1][MQ1][MQ1];
589  MFEM_SHARED double sm0[3][MQ1 * MQ1 * MQ1];
590  MFEM_SHARED double sm1[3][MQ1 * MQ1 * MQ1];
591  double(*DDQ0)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 0);
592  double(*DDQ1)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 1);
593  double(*X)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 2);
594  double(*DQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 0);
595  double(*DQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 1);
596  double(*DQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 2);
597  double(*QQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 0);
598  double(*QQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 1);
599  double(*QQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 2);
600  double(*QQD0)[MQ1][MD1] = (double(*)[MQ1][MD1])(sm1 + 0);
601  double(*QDD0)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 0);
602  MFEM_SHARED double Z[MQ1][MQ1][MQ1];
603 
604  for (int cy = 0; cy < VDIM; ++cy)
605  {
606  if (tidz == 0)
607  {
608  MFEM_FOREACH_THREAD(q, x, Q1D)
609  {
610  MFEM_FOREACH_THREAD(d, y, D1D)
611  {
612  B[q][d] = b(q, d);
613  G[q][d] = g(q, d);
614  }
615  }
616  }
617  MFEM_FOREACH_THREAD(qz, z, Q1D)
618  {
619  MFEM_FOREACH_THREAD(qy, y, Q1D)
620  {
621  MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
622  }
623  }
624  MFEM_SYNC_THREAD;
625  for (int c = 0; c < VDIM; ++c)
626  {
627  MFEM_FOREACH_THREAD(dz, z, D1D)
628  {
629  MFEM_FOREACH_THREAD(dy, y, D1D)
630  {
631  MFEM_FOREACH_THREAD(dx, x, D1D)
632  {
633  X[dz][dy][dx] = x(dx, dy, dz, cy, e);
634  U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
635  }
636  }
637  }
638  MFEM_SYNC_THREAD;
639  MFEM_FOREACH_THREAD(dz, z, D1D)
640  {
641  MFEM_FOREACH_THREAD(dy, y, D1D)
642  {
643  MFEM_FOREACH_THREAD(qx, x, Q1D)
644  {
645  double u = 0.0;
646  double v = 0.0;
647  double z = 0.0;
648  for (int dx = 0; dx < D1D; ++dx)
649  {
650  const double coord = X[dz][dy][dx];
651  const double value = U[0][dz][dy][dx];
652  u += coord * B[qx][dx];
653  v += coord * G[qx][dx];
654  z += value * B[qx][dx];
655  }
656  DDQ0[dz][dy][qx] = u;
657  DDQ1[dz][dy][qx] = v;
658  U[1][dz][dy][qx] = z;
659  }
660  }
661  }
662  MFEM_SYNC_THREAD;
663  MFEM_FOREACH_THREAD(dz, z, D1D)
664  {
665  MFEM_FOREACH_THREAD(qy, y, Q1D)
666  {
667  MFEM_FOREACH_THREAD(qx, x, Q1D)
668  {
669  double u = 0.0;
670  double v = 0.0;
671  double w = 0.0;
672  double z = 0.0;
673  for (int dy = 0; dy < D1D; ++dy)
674  {
675  u += DDQ1[dz][dy][qx] * B[qy][dy];
676  v += DDQ0[dz][dy][qx] * G[qy][dy];
677  w += DDQ0[dz][dy][qx] * B[qy][dy];
678  z += U[1][dz][dy][qx] * B[qy][dy];
679  }
680  DQQ0[dz][qy][qx] = u;
681  DQQ1[dz][qy][qx] = v;
682  DQQ2[dz][qy][qx] = w;
683  U[0][dz][qy][qx] = z;
684  }
685  }
686  }
687  MFEM_SYNC_THREAD;
688  MFEM_FOREACH_THREAD(qz, z, Q1D)
689  {
690  MFEM_FOREACH_THREAD(qy, y, Q1D)
691  {
692  MFEM_FOREACH_THREAD(qx, x, Q1D)
693  {
694  double u = 0.0;
695  double v = 0.0;
696  double w = 0.0;
697  double z = 0.0;
698  for (int dz = 0; dz < D1D; ++dz)
699  {
700  u += DQQ0[dz][qy][qx] * B[qz][dz];
701  v += DQQ1[dz][qy][qx] * B[qz][dz];
702  w += DQQ2[dz][qy][qx] * G[qz][dz];
703  z += U[0][dz][qy][qx] * B[qz][dz];
704  }
705  QQQ0[qz][qy][qx] = u;
706  QQQ1[qz][qy][qx] = v;
707  QQQ2[qz][qy][qx] = w;
708  U[1][qz][qy][qx] = z;
709  }
710  }
711  }
712  MFEM_SYNC_THREAD;
713  MFEM_FOREACH_THREAD(qz, z, Q1D)
714  {
715  MFEM_FOREACH_THREAD(qy, y, Q1D)
716  {
717  MFEM_FOREACH_THREAD(qx, x, Q1D)
718  {
719  const int q = qx + (qy + qz * Q1D) * Q1D;
720  const double z = U[1][qz][qy][qx];
721  const double gX = QQQ0[qz][qy][qx];
722  const double gY = QQQ1[qz][qy][qx];
723  const double gZ = QQQ2[qz][qy][qx];
724  const double d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
725  + gZ * D(q, 2, c, e);
726  Z[qz][qy][qx] += z * d;
727  }
728  }
729  }
730  MFEM_SYNC_THREAD;
731  } // for each conv component
732  if (tidz == 0)
733  {
734  MFEM_FOREACH_THREAD(d, y, D1D)
735  {
736  MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] = b(q, d); }
737  }
738  }
739  MFEM_SYNC_THREAD;
740  MFEM_FOREACH_THREAD(qz, z, Q1D)
741  {
742  MFEM_FOREACH_THREAD(qy, y, Q1D)
743  {
744  MFEM_FOREACH_THREAD(dx, x, D1D)
745  {
746  double u = 0.0;
747  for (int qx = 0; qx < Q1D; ++qx)
748  {
749  u += Z[qz][qy][qx] * Bt[dx][qx];
750  }
751  QQD0[qz][qy][dx] = u;
752  }
753  }
754  }
755  MFEM_SYNC_THREAD;
756  MFEM_FOREACH_THREAD(qz, z, Q1D)
757  {
758  MFEM_FOREACH_THREAD(dy, y, D1D)
759  {
760  MFEM_FOREACH_THREAD(dx, x, D1D)
761  {
762  double u = 0.0;
763  for (int qy = 0; qy < Q1D; ++qy)
764  {
765  u += QQD0[qz][qy][dx] * Bt[dy][qy];
766  }
767  QDD0[qz][dy][dx] = u;
768  }
769  }
770  }
771  MFEM_SYNC_THREAD;
772  MFEM_FOREACH_THREAD(dz, z, D1D)
773  {
774  MFEM_FOREACH_THREAD(dy, y, D1D)
775  {
776  MFEM_FOREACH_THREAD(dx, x, D1D)
777  {
778  double u = 0.0;
779  for (int qz = 0; qz < Q1D; ++qz)
780  {
781  u += QDD0[qz][dy][dx] * Bt[dz][qz];
782  }
783  Y(dx, dy, dz, cy, e) += u;
784  }
785  }
786  }
787  MFEM_SYNC_THREAD;
788  }
789  });
790 }
791 
792 void VectorConvectionNLFIntegrator::AddMultPA(const Vector &x, Vector &y) const
793 {
794  const int NE = ne;
795  const int D1D = maps->ndof;
796  const int Q1D = maps->nqpt;
797  const Vector &Q = pa_data;
798  const Array<double> &B = maps->B;
799  const Array<double> &G = maps->G;
800  const Array<double> &Bt = maps->Bt;
801  if (dim == 2)
802  {
803  return PAConvectionNLApply2D(NE, B, G, Bt, Q, x, y, D1D, Q1D);
804  }
805  if (dim == 3)
806  {
807  constexpr int T_MAX_D1D = 8;
808  constexpr int T_MAX_Q1D = 8;
809  MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D, "Not yet implemented!");
810  return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
811  (NE, B, G, Q, x, y, D1D, Q1D);
812  }
813  MFEM_ABORT("Not yet implemented!");
814 }
815 
816 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:245
Abstract class for Finite Elements.
Definition: fe.hpp:232
Ordering::Type GetOrdering() const
Return the ordering method.
Definition: fespace.hpp:393
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Subclass constant coefficient.
Definition: coefficient.hpp:67
const Geometry::Type geom
Definition: ex1.cpp:40
int GetNE() const
Returns number of elements.
Definition: mesh.hpp:693
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:85
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:134
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:349
const int MAX_Q1D
Definition: forall.hpp:36
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:295
double b
Definition: lissajous.cpp:42
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:273
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:87
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Definition: fe.cpp:370
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:333
int dim
Definition: ex24.cpp:43
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:49
const int MAX_D1D
Definition: forall.hpp:35
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement associated with i&#39;th element.
Definition: fespace.cpp:1642
Vector data type.
Definition: vector.hpp:48