MFEM  v4.4.0 Finite element discretization library
bilininteg_convection_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "ceed/convection.hpp"
17
18 namespace mfem
19 {
20
21 // PA Convection Integrator
22
23 // PA Convection Assemble 2D kernel
24 static void PAConvectionSetup2D(const int NQ,
25  const int NE,
26  const Array<double> &w,
27  const Vector &j,
28  const Vector &vel,
29  const double alpha,
30  Vector &op)
31 {
32  constexpr int DIM = 2;
33
34  const bool const_v = vel.Size() == DIM;
35
36  const auto W = w.Read();
37  const auto J = Reshape(j.Read(), NQ,DIM,DIM,NE);
38  const auto V = const_v ?
41  auto y = Reshape(op.Write(), NQ,DIM,NE);
42
43  MFEM_FORALL(q_global, NE*NQ,
44  {
45  const int e = q_global / NQ;
46  const int q = q_global % NQ;
47  const double J11 = J(q,0,0,e);
48  const double J21 = J(q,1,0,e);
49  const double J12 = J(q,0,1,e);
50  const double J22 = J(q,1,1,e);
51  const double w = alpha * W[q];
52  const double v0 = const_v ? V(0,0,0) : V(0,q,e);
53  const double v1 = const_v ? V(1,0,0) : V(1,q,e);
54  const double wx = w * v0;
55  const double wy = w * v1;
56  // y = alpha * W * det(J) * J^{-1} . v = adj(J) . { wx, wy }
57  y(q,0,e) = wx * J22 - wy * J12; // 1
58  y(q,1,e) = -wx * J21 + wy * J11; // 2
59  });
60 }
61
62 // PA Convection Assemble 3D kernel
63 static void PAConvectionSetup3D(const int NQ,
64  const int NE,
65  const Array<double> &w,
66  const Vector &j,
67  const Vector &vel,
68  const double alpha,
69  Vector &op)
70 {
71  constexpr int DIM = 3;
72  constexpr int SDIM = DIM;
73  const auto W = Reshape(w.Read(), NQ);
74  const auto J = Reshape(j.Read(), NQ,SDIM,DIM,NE);
75  const bool const_v = vel.Size() == DIM;
76  const auto V = const_v ?
79  auto y = Reshape(op.Write(), NQ,3,NE);
80  MFEM_FORALL(q_global, NE*NQ,
81  {
82  const int e = q_global / NQ;
83  const int q = q_global % NQ;
84  const double J11 = J(q,0,0,e);
85  const double J12 = J(q,0,1,e);
86  const double J13 = J(q,0,2,e);
87  const double J21 = J(q,1,0,e);
88  const double J22 = J(q,1,1,e);
89  const double J23 = J(q,1,2,e);
90  const double J31 = J(q,2,0,e);
91  const double J32 = J(q,2,1,e);
92  const double J33 = J(q,2,2,e);
93  const double w = alpha * W(q);
94  const double v0 = const_v ? V(0,0,0) : V(0,q,e);
95  const double v1 = const_v ? V(1,0,0) : V(1,q,e);
96  const double v2 = const_v ? V(2,0,0) : V(2,q,e);
97  const double wx = w * v0;
98  const double wy = w * v1;
99  const double wz = w * v2;
101  const double A11 = (J22 * J33) - (J23 * J32);
102  const double A12 = (J32 * J13) - (J12 * J33);
103  const double A13 = (J12 * J23) - (J22 * J13);
104  const double A21 = (J31 * J23) - (J21 * J33);
105  const double A22 = (J11 * J33) - (J13 * J31);
106  const double A23 = (J21 * J13) - (J11 * J23);
107  const double A31 = (J21 * J32) - (J31 * J22);
108  const double A32 = (J31 * J12) - (J11 * J32);
109  const double A33 = (J11 * J22) - (J12 * J21);
110  // y = alpha * W * det(J) * J^{-1} . v = adj(J) . { wx, wy, wz }
111  y(q,0,e) = wx * A11 + wy * A12 + wz * A13;
112  y(q,1,e) = wx * A21 + wy * A22 + wz * A23;
113  y(q,2,e) = wx * A31 + wy * A32 + wz * A33;
114  });
115 }
116
117 static void PAConvectionSetup(const int dim,
118  const int NQ,
119  const int NE,
120  const Array<double> &W,
121  const Vector &J,
122  const Vector &coeff,
123  const double alpha,
124  Vector &op)
125 {
126  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAConvectionSetup"); }
127  if (dim == 2)
128  {
129  PAConvectionSetup2D(NQ, NE, W, J, coeff, alpha, op);
130  }
131  if (dim == 3)
132  {
133  PAConvectionSetup3D(NQ, NE, W, J, coeff, alpha, op);
134  }
135 }
136
137 // PA Convection Apply 2D kernel
138 template<int T_D1D = 0, int T_Q1D = 0> static
139 void PAConvectionApply2D(const int ne,
140  const Array<double> &b,
141  const Array<double> &g,
142  const Array<double> &bt,
143  const Array<double> &gt,
144  const Vector &op_,
145  const Vector &x_,
146  Vector &y_,
147  const int d1d = 0,
148  const int q1d = 0)
149 {
150  const int NE = ne;
151  const int D1D = T_D1D ? T_D1D : d1d;
152  const int Q1D = T_Q1D ? T_Q1D : q1d;
153  MFEM_VERIFY(D1D <= MAX_D1D, "");
154  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
155  auto B = Reshape(b.Read(), Q1D, D1D);
156  auto G = Reshape(g.Read(), Q1D, D1D);
157  auto Bt = Reshape(bt.Read(), D1D, Q1D);
158  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
159  auto x = Reshape(x_.Read(), D1D, D1D, NE);
160  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
161  MFEM_FORALL(e, NE,
162  {
163  const int D1D = T_D1D ? T_D1D : d1d;
164  const int Q1D = T_Q1D ? T_Q1D : q1d;
165  // the following variables are evaluated at compile time
166  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
167  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
168
169  double u[max_D1D][max_D1D];
170  for (int dy = 0; dy < D1D; ++dy)
171  {
172  for (int dx = 0; dx < D1D; ++dx)
173  {
174  u[dy][dx] = x(dx,dy,e);
175  }
176  }
177  double Bu[max_D1D][max_Q1D];
178  double Gu[max_D1D][max_Q1D];
179  for (int dy = 0; dy < D1D; ++dy)
180  {
181  for (int qx = 0; qx < Q1D; ++qx)
182  {
183  Bu[dy][qx] = 0.0;
184  Gu[dy][qx] = 0.0;
185  for (int dx = 0; dx < D1D; ++dx)
186  {
187  const double bx = B(qx,dx);
188  const double gx = G(qx,dx);
189  const double x = u[dy][dx];
190  Bu[dy][qx] += bx * x;
191  Gu[dy][qx] += gx * x;
192  }
193  }
194  }
195  double GBu[max_Q1D][max_Q1D];
196  double BGu[max_Q1D][max_Q1D];
197  for (int qx = 0; qx < Q1D; ++qx)
198  {
199  for (int qy = 0; qy < Q1D; ++qy)
200  {
201  GBu[qy][qx] = 0.0;
202  BGu[qy][qx] = 0.0;
203  for (int dy = 0; dy < D1D; ++dy)
204  {
205  const double bx = B(qy,dy);
206  const double gx = G(qy,dy);
207  GBu[qy][qx] += gx * Bu[dy][qx];
208  BGu[qy][qx] += bx * Gu[dy][qx];
209  }
210  }
211  }
212  // Calculate Dxy, xDy in plane
213  double DGu[max_Q1D][max_Q1D];
214  for (int qy = 0; qy < Q1D; ++qy)
215  {
216  for (int qx = 0; qx < Q1D; ++qx)
217  {
218  const double O1 = op(qx,qy,0,e);
219  const double O2 = op(qx,qy,1,e);
220
221  const double gradX = BGu[qy][qx];
222  const double gradY = GBu[qy][qx];
223
225  }
226  }
227  double BDGu[max_D1D][max_Q1D];
228  for (int qx = 0; qx < Q1D; ++qx)
229  {
230  for (int dy = 0; dy < D1D; ++dy)
231  {
232  BDGu[dy][qx] = 0.0;
233  for (int qy = 0; qy < Q1D; ++qy)
234  {
235  const double w = Bt(dy,qy);
236  BDGu[dy][qx] += w * DGu[qy][qx];
237  }
238  }
239  }
240  for (int dx = 0; dx < D1D; ++dx)
241  {
242  for (int dy = 0; dy < D1D; ++dy)
243  {
244  double BBDGu = 0.0;
245  for (int qx = 0; qx < Q1D; ++qx)
246  {
247  const double w = Bt(dx,qx);
248  BBDGu += w * BDGu[dy][qx];
249  }
250  y(dx,dy,e) += BBDGu;
251  }
252  }
253  });
254 }
255
256 // Optimized PA Convection Apply 2D kernel
257 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
258 void SmemPAConvectionApply2D(const int ne,
259  const Array<double> &b,
260  const Array<double> &g,
261  const Array<double> &bt,
262  const Array<double> &gt,
263  const Vector &op_,
264  const Vector &x_,
265  Vector &y_,
266  const int d1d = 0,
267  const int q1d = 0)
268 {
269  const int NE = ne;
270  const int D1D = T_D1D ? T_D1D : d1d;
271  const int Q1D = T_Q1D ? T_Q1D : q1d;
272  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
273  MFEM_VERIFY(D1D <= MAX_D1D, "");
274  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
275  auto B = Reshape(b.Read(), Q1D, D1D);
276  auto G = Reshape(g.Read(), Q1D, D1D);
277  auto Bt = Reshape(bt.Read(), D1D, Q1D);
278  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
279  auto x = Reshape(x_.Read(), D1D, D1D, NE);
280  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
281  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
282  {
283  const int tidz = MFEM_THREAD_ID(z);
284  const int D1D = T_D1D ? T_D1D : d1d;
285  const int Q1D = T_Q1D ? T_Q1D : q1d;
286  // the following variables are evaluated at compile time
287  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
288  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
289  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
290  // constexpr int MDQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
291  MFEM_SHARED double u[NBZ][max_D1D][max_D1D];
293  {
295  {
296  // e is really equal to e+tidz
297  u[tidz][dy][dx] = x(dx,dy,e);
298  }
299  }
301  MFEM_SHARED double Bu[NBZ][max_D1D][max_Q1D];
302  MFEM_SHARED double Gu[NBZ][max_D1D][max_Q1D];
304  {
306  {
307  Bu[tidz][dy][qx] = 0.0;
308  Gu[tidz][dy][qx] = 0.0;
309  for (int dx = 0; dx < D1D; ++dx)
310  {
311  const double bx = B(qx,dx);
312  const double gx = G(qx,dx);
313  const double x = u[tidz][dy][dx];
314  Bu[tidz][dy][qx] += bx * x;
315  Gu[tidz][dy][qx] += gx * x;
316  }
317  }
318  }
320  MFEM_SHARED double GBu[NBZ][max_Q1D][max_Q1D];
321  MFEM_SHARED double BGu[NBZ][max_Q1D][max_Q1D];
323  {
325  {
326  GBu[tidz][qy][qx] = 0.0;
327  BGu[tidz][qy][qx] = 0.0;
328  for (int dy = 0; dy < D1D; ++dy)
329  {
330  const double bx = B(qy,dy);
331  const double gx = G(qy,dy);
332  GBu[tidz][qy][qx] += gx * Bu[tidz][dy][qx];
333  BGu[tidz][qy][qx] += bx * Gu[tidz][dy][qx];
334  }
335  }
336  }
338  // Calculate Dxy, xDy in plane
339  MFEM_SHARED double DGu[NBZ][max_Q1D][max_Q1D];
341  {
343  {
344  const double O1 = op(qx,qy,0,e);
345  const double O2 = op(qx,qy,1,e);
346
347  const double gradX = BGu[tidz][qy][qx];
348  const double gradY = GBu[tidz][qy][qx];
349
351  }
352  }
354  MFEM_SHARED double BDGu[NBZ][max_D1D][max_Q1D];
356  {
358  {
359  BDGu[tidz][dy][qx] = 0.0;
360  for (int qy = 0; qy < Q1D; ++qy)
361  {
362  const double w = Bt(dy,qy);
363  BDGu[tidz][dy][qx] += w * DGu[tidz][qy][qx];
364  }
365  }
366  }
369  {
371  {
372  double BBDGu = 0.0;
373  for (int qx = 0; qx < Q1D; ++qx)
374  {
375  const double w = Bt(dx,qx);
376  BBDGu += w * BDGu[tidz][dy][qx];
377  }
378  y(dx,dy,e) += BBDGu;
379  }
380  }
381  });
382 }
383
384 // PA Convection Apply 3D kernel
385 template<int T_D1D = 0, int T_Q1D = 0> static
386 void PAConvectionApply3D(const int ne,
387  const Array<double> &b,
388  const Array<double> &g,
389  const Array<double> &bt,
390  const Array<double> &gt,
391  const Vector &op_,
392  const Vector &x_,
393  Vector &y_,
394  const int d1d = 0,
395  const int q1d = 0)
396 {
397  const int NE = ne;
398  const int D1D = T_D1D ? T_D1D : d1d;
399  const int Q1D = T_Q1D ? T_Q1D : q1d;
400  MFEM_VERIFY(D1D <= MAX_D1D, "");
401  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
402  auto B = Reshape(b.Read(), Q1D, D1D);
403  auto G = Reshape(g.Read(), Q1D, D1D);
404  auto Bt = Reshape(bt.Read(), D1D, Q1D);
405  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
406  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
407  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
408  MFEM_FORALL(e, NE,
409  {
410  const int D1D = T_D1D ? T_D1D : d1d;
411  const int Q1D = T_Q1D ? T_Q1D : q1d;
412  // the following variables are evaluated at compile time
413  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
414  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
415
416  double u[max_D1D][max_D1D][max_D1D];
417  for (int dz = 0; dz < D1D; ++dz)
418  {
419  for (int dy = 0; dy < D1D; ++dy)
420  {
421  for (int dx = 0; dx < D1D; ++dx)
422  {
423  u[dz][dy][dx] = x(dx,dy,dz,e);
424  }
425  }
426  }
427  double Bu[max_D1D][max_D1D][max_Q1D];
428  double Gu[max_D1D][max_D1D][max_Q1D];
429  for (int dz = 0; dz < D1D; ++dz)
430  {
431  for (int dy = 0; dy < D1D; ++dy)
432  {
433  for (int qx = 0; qx < Q1D; ++qx)
434  {
435  Bu[dz][dy][qx] = 0.0;
436  Gu[dz][dy][qx] = 0.0;
437  for (int dx = 0; dx < D1D; ++dx)
438  {
439  const double bx = B(qx,dx);
440  const double gx = G(qx,dx);
441  const double x = u[dz][dy][dx];
442  Bu[dz][dy][qx] += bx * x;
443  Gu[dz][dy][qx] += gx * x;
444  }
445  }
446  }
447  }
448  double BBu[max_D1D][max_Q1D][max_Q1D];
449  double GBu[max_D1D][max_Q1D][max_Q1D];
450  double BGu[max_D1D][max_Q1D][max_Q1D];
451  for (int dz = 0; dz < D1D; ++dz)
452  {
453  for (int qx = 0; qx < Q1D; ++qx)
454  {
455  for (int qy = 0; qy < Q1D; ++qy)
456  {
457  BBu[dz][qy][qx] = 0.0;
458  GBu[dz][qy][qx] = 0.0;
459  BGu[dz][qy][qx] = 0.0;
460  for (int dy = 0; dy < D1D; ++dy)
461  {
462  const double bx = B(qy,dy);
463  const double gx = G(qy,dy);
464  BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
465  GBu[dz][qy][qx] += gx * Bu[dz][dy][qx];
466  BGu[dz][qy][qx] += bx * Gu[dz][dy][qx];
467  }
468  }
469  }
470  }
471  double GBBu[max_Q1D][max_Q1D][max_Q1D];
472  double BGBu[max_Q1D][max_Q1D][max_Q1D];
473  double BBGu[max_Q1D][max_Q1D][max_Q1D];
474  for (int qx = 0; qx < Q1D; ++qx)
475  {
476  for (int qy = 0; qy < Q1D; ++qy)
477  {
478  for (int qz = 0; qz < Q1D; ++qz)
479  {
480  GBBu[qz][qy][qx] = 0.0;
481  BGBu[qz][qy][qx] = 0.0;
482  BBGu[qz][qy][qx] = 0.0;
483  for (int dz = 0; dz < D1D; ++dz)
484  {
485  const double bx = B(qz,dz);
486  const double gx = G(qz,dz);
487  GBBu[qz][qy][qx] += gx * BBu[dz][qy][qx];
488  BGBu[qz][qy][qx] += bx * GBu[dz][qy][qx];
489  BBGu[qz][qy][qx] += bx * BGu[dz][qy][qx];
490  }
491  }
492  }
493  }
494  // Calculate Dxy, xDy in plane
495  double DGu[max_Q1D][max_Q1D][max_Q1D];
496  for (int qz = 0; qz < Q1D; ++qz)
497  {
498  for (int qy = 0; qy < Q1D; ++qy)
499  {
500  for (int qx = 0; qx < Q1D; ++qx)
501  {
502  const double O1 = op(qx,qy,qz,0,e);
503  const double O2 = op(qx,qy,qz,1,e);
504  const double O3 = op(qx,qy,qz,2,e);
505
506  const double gradX = BBGu[qz][qy][qx];
507  const double gradY = BGBu[qz][qy][qx];
508  const double gradZ = GBBu[qz][qy][qx];
509
511  }
512  }
513  }
514  double BDGu[max_D1D][max_Q1D][max_Q1D];
515  for (int qx = 0; qx < Q1D; ++qx)
516  {
517  for (int qy = 0; qy < Q1D; ++qy)
518  {
519  for (int dz = 0; dz < D1D; ++dz)
520  {
521  BDGu[dz][qy][qx] = 0.0;
522  for (int qz = 0; qz < Q1D; ++qz)
523  {
524  const double w = Bt(dz,qz);
525  BDGu[dz][qy][qx] += w * DGu[qz][qy][qx];
526  }
527  }
528  }
529  }
530  double BBDGu[max_D1D][max_D1D][max_Q1D];
531  for (int dz = 0; dz < D1D; ++dz)
532  {
533  for (int qx = 0; qx < Q1D; ++qx)
534  {
535  for (int dy = 0; dy < D1D; ++dy)
536  {
537  BBDGu[dz][dy][qx] = 0.0;
538  for (int qy = 0; qy < Q1D; ++qy)
539  {
540  const double w = Bt(dy,qy);
541  BBDGu[dz][dy][qx] += w * BDGu[dz][qy][qx];
542  }
543  }
544  }
545  }
546  for (int dz = 0; dz < D1D; ++dz)
547  {
548  for (int dy = 0; dy < D1D; ++dy)
549  {
550  for (int dx = 0; dx < D1D; ++dx)
551  {
552  double BBBDGu = 0.0;
553  for (int qx = 0; qx < Q1D; ++qx)
554  {
555  const double w = Bt(dx,qx);
556  BBBDGu += w * BBDGu[dz][dy][qx];
557  }
558  y(dx,dy,dz,e) += BBBDGu;
559  }
560  }
561  }
562  });
563 }
564
565 // Optimized PA Convection Apply 3D kernel
566 template<int T_D1D = 0, int T_Q1D = 0> static
567 void SmemPAConvectionApply3D(const int ne,
568  const Array<double> &b,
569  const Array<double> &g,
570  const Array<double> &bt,
571  const Array<double> &gt,
572  const Vector &op_,
573  const Vector &x_,
574  Vector &y_,
575  const int d1d = 0,
576  const int q1d = 0)
577 {
578  const int NE = ne;
579  const int D1D = T_D1D ? T_D1D : d1d;
580  const int Q1D = T_Q1D ? T_Q1D : q1d;
581  MFEM_VERIFY(D1D <= MAX_D1D, "");
582  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
583  auto B = Reshape(b.Read(), Q1D, D1D);
584  auto G = Reshape(g.Read(), Q1D, D1D);
585  auto Bt = Reshape(bt.Read(), D1D, Q1D);
586  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
587  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
588  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
589  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
590  {
591  const int D1D = T_D1D ? T_D1D : d1d;
592  const int Q1D = T_Q1D ? T_Q1D : q1d;
593  // the following variables are evaluated at compile time
594  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
595  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
596  constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
597  MFEM_SHARED double sm0[max_DQ*max_DQ*max_DQ];
598  MFEM_SHARED double sm1[max_DQ*max_DQ*max_DQ];
599  MFEM_SHARED double sm2[max_DQ*max_DQ*max_DQ];
600  MFEM_SHARED double sm3[max_DQ*max_DQ*max_DQ];
601  MFEM_SHARED double sm4[max_DQ*max_DQ*max_DQ];
602  MFEM_SHARED double sm5[max_DQ*max_DQ*max_DQ];
603
604  double (*u)[max_D1D][max_D1D] = (double (*)[max_D1D][max_D1D]) sm0;
606  {
608  {
610  {
611  u[dz][dy][dx] = x(dx,dy,dz,e);
612  }
613  }
614  }
616  double (*Bu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm1;
617  double (*Gu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm2;
619  {
621  {
623  {
624  double Bu_ = 0.0;
625  double Gu_ = 0.0;
626  for (int dx = 0; dx < D1D; ++dx)
627  {
628  const double bx = B(qx,dx);
629  const double gx = G(qx,dx);
630  const double x = u[dz][dy][dx];
631  Bu_ += bx * x;
632  Gu_ += gx * x;
633  }
634  Bu[dz][dy][qx] = Bu_;
635  Gu[dz][dy][qx] = Gu_;
636  }
637  }
638  }
640  double (*BBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3;
641  double (*GBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4;
642  double (*BGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm5;
644  {
646  {
648  {
649  double BBu_ = 0.0;
650  double GBu_ = 0.0;
651  double BGu_ = 0.0;
652  for (int dy = 0; dy < D1D; ++dy)
653  {
654  const double bx = B(qy,dy);
655  const double gx = G(qy,dy);
656  BBu_ += bx * Bu[dz][dy][qx];
657  GBu_ += gx * Bu[dz][dy][qx];
658  BGu_ += bx * Gu[dz][dy][qx];
659  }
660  BBu[dz][qy][qx] = BBu_;
661  GBu[dz][qy][qx] = GBu_;
662  BGu[dz][qy][qx] = BGu_;
663  }
664  }
665  }
667  double (*GBBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm0;
668  double (*BGBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm1;
669  double (*BBGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm2;
671  {
673  {
675  {
676  double GBBu_ = 0.0;
677  double BGBu_ = 0.0;
678  double BBGu_ = 0.0;
679  for (int dz = 0; dz < D1D; ++dz)
680  {
681  const double bx = B(qz,dz);
682  const double gx = G(qz,dz);
683  GBBu_ += gx * BBu[dz][qy][qx];
684  BGBu_ += bx * GBu[dz][qy][qx];
685  BBGu_ += bx * BGu[dz][qy][qx];
686  }
687  GBBu[qz][qy][qx] = GBBu_;
688  BGBu[qz][qy][qx] = BGBu_;
689  BBGu[qz][qy][qx] = BBGu_;
690  }
691  }
692  }
694  double (*DGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3;
696  {
698  {
700  {
701  const double O1 = op(qx,qy,qz,0,e);
702  const double O2 = op(qx,qy,qz,1,e);
703  const double O3 = op(qx,qy,qz,2,e);
704
705  const double gradX = BBGu[qz][qy][qx];
706  const double gradY = BGBu[qz][qy][qx];
707  const double gradZ = GBBu[qz][qy][qx];
708
710  }
711  }
712  }
714  double (*BDGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4;
716  {
718  {
720  {
721  double BDGu_ = 0.0;
722  for (int qz = 0; qz < Q1D; ++qz)
723  {
724  const double w = Bt(dz,qz);
725  BDGu_ += w * DGu[qz][qy][qx];
726  }
727  BDGu[dz][qy][qx] = BDGu_;
728  }
729  }
730  }
732  double (*BBDGu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm5;
734  {
736  {
738  {
739  double BBDGu_ = 0.0;
740  for (int qy = 0; qy < Q1D; ++qy)
741  {
742  const double w = Bt(dy,qy);
743  BBDGu_ += w * BDGu[dz][qy][qx];
744  }
745  BBDGu[dz][dy][qx] = BBDGu_;
746  }
747  }
748  }
751  {
753  {
755  {
756  double BBBDGu = 0.0;
757  for (int qx = 0; qx < Q1D; ++qx)
758  {
759  const double w = Bt(dx,qx);
760  BBBDGu += w * BBDGu[dz][dy][qx];
761  }
762  y(dx,dy,dz,e) += BBBDGu;
763  }
764  }
765  }
766  });
767 }
768
769 // PA Convection Apply 2D kernel
770 template<int T_D1D = 0, int T_Q1D = 0> static
771 void PAConvectionApplyT2D(const int ne,
772  const Array<double> &b,
773  const Array<double> &g,
774  const Array<double> &bt,
775  const Array<double> &gt,
776  const Vector &op_,
777  const Vector &x_,
778  Vector &y_,
779  const int d1d = 0,
780  const int q1d = 0)
781 {
782  const int NE = ne;
783  const int D1D = T_D1D ? T_D1D : d1d;
784  const int Q1D = T_Q1D ? T_Q1D : q1d;
785  MFEM_VERIFY(D1D <= MAX_D1D, "");
786  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
787  auto B = Reshape(b.Read(), Q1D, D1D);
788  auto Bt = Reshape(bt.Read(), D1D, Q1D);
789  auto Gt = Reshape(gt.Read(), D1D, Q1D);
790  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
791  auto x = Reshape(x_.Read(), D1D, D1D, NE);
792  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
793  MFEM_FORALL(e, NE,
794  {
795  const int D1D = T_D1D ? T_D1D : d1d;
796  const int Q1D = T_Q1D ? T_Q1D : q1d;
797  // the following variables are evaluated at compile time
798  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
799  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
800
801  double u[max_D1D][max_D1D];
802  for (int dy = 0; dy < D1D; ++dy)
803  {
804  for (int dx = 0; dx < D1D; ++dx)
805  {
806  u[dy][dx] = x(dx,dy,e);
807  }
808  }
809  double Bu[max_D1D][max_Q1D];
810  for (int dy = 0; dy < D1D; ++dy)
811  {
812  for (int qx = 0; qx < Q1D; ++qx)
813  {
814  Bu[dy][qx] = 0.0;
815  for (int dx = 0; dx < D1D; ++dx)
816  {
817  const double bx = B(qx,dx);
818  const double x = u[dy][dx];
819  Bu[dy][qx] += bx * x;
820  }
821  }
822  }
823  double BBu[max_Q1D][max_Q1D];
824  for (int qx = 0; qx < Q1D; ++qx)
825  {
826  for (int qy = 0; qy < Q1D; ++qy)
827  {
828  BBu[qy][qx] = 0.0;
829  for (int dy = 0; dy < D1D; ++dy)
830  {
831  const double bx = B(qy,dy);
832  BBu[qy][qx] += bx * Bu[dy][qx];
833  }
834  }
835  }
836  // Calculate Dxy, xDy in plane
837  double DBu[max_Q1D][max_Q1D][2];
838  for (int qy = 0; qy < Q1D; ++qy)
839  {
840  for (int qx = 0; qx < Q1D; ++qx)
841  {
842  const double O1 = op(qx,qy,0,e);
843  const double O2 = op(qx,qy,1,e);
844
845  const double X = BBu[qy][qx];
846
847  DBu[qy][qx][0] = O1 * X;
848  DBu[qy][qx][1] = O2 * X;
849  }
850  }
851  double GDBu[max_D1D][max_Q1D][2];
852  for (int qx = 0; qx < Q1D; ++qx)
853  {
854  for (int dy = 0; dy < D1D; ++dy)
855  {
856  GDBu[dy][qx][0] = 0.0;
857  GDBu[dy][qx][1] = 0.0;
858  for (int qy = 0; qy < Q1D; ++qy)
859  {
860  const double by = Bt(dy,qy);
861  const double gy = Gt(dy,qy);
862  GDBu[dy][qx][0] += by * DBu[qy][qx][0];
863  GDBu[dy][qx][1] += gy * DBu[qy][qx][1];
864  }
865  }
866  }
867  for (int dx = 0; dx < D1D; ++dx)
868  {
869  for (int dy = 0; dy < D1D; ++dy)
870  {
871  double res = 0.0;
872  for (int qx = 0; qx < Q1D; ++qx)
873  {
874  const double bx = Bt(dx,qx);
875  const double gx = Gt(dx,qx);
876  res += gx * GDBu[dy][qx][0] + bx * GDBu[dy][qx][1];
877  }
878  y(dx,dy,e) += res;
879  }
880  }
881  });
882 }
883
884 // Optimized PA Convection Apply 2D kernel
885 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
886 void SmemPAConvectionApplyT2D(const int ne,
887  const Array<double> &b,
888  const Array<double> &g,
889  const Array<double> &bt,
890  const Array<double> &gt,
891  const Vector &op_,
892  const Vector &x_,
893  Vector &y_,
894  const int d1d = 0,
895  const int q1d = 0)
896 {
897  const int NE = ne;
898  const int D1D = T_D1D ? T_D1D : d1d;
899  const int Q1D = T_Q1D ? T_Q1D : q1d;
900  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
901  MFEM_VERIFY(D1D <= MAX_D1D, "");
902  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
903  auto B = Reshape(b.Read(), Q1D, D1D);
904  auto Bt = Reshape(bt.Read(), D1D, Q1D);
905  auto Gt = Reshape(gt.Read(), D1D, Q1D);
906  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
907  auto x = Reshape(x_.Read(), D1D, D1D, NE);
908  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
909  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
910  {
911  const int tidz = MFEM_THREAD_ID(z);
912  const int D1D = T_D1D ? T_D1D : d1d;
913  const int Q1D = T_Q1D ? T_Q1D : q1d;
914  // the following variables are evaluated at compile time
915  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
916  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
917  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
918  MFEM_SHARED double u[NBZ][max_D1D][max_D1D];
920  {
922  {
923  // e is really equal to e+tidz
924  u[tidz][dy][dx] = x(dx,dy,e);
925  }
926  }
928  MFEM_SHARED double Bu[NBZ][max_D1D][max_Q1D];
930  {
932  {
933  Bu[tidz][dy][qx] = 0.0;
934  for (int dx = 0; dx < D1D; ++dx)
935  {
936  const double bx = B(qx,dx);
937  const double x = u[tidz][dy][dx];
938  Bu[tidz][dy][qx] += bx * x;
939  }
940  }
941  }
943  MFEM_SHARED double BBu[NBZ][max_Q1D][max_Q1D];
945  {
947  {
948  BBu[tidz][qy][qx] = 0.0;
949  for (int dy = 0; dy < D1D; ++dy)
950  {
951  const double bx = B(qy,dy);
952  BBu[tidz][qy][qx] += bx * Bu[tidz][dy][qx];
953  }
954  }
955  }
957  // Calculate Dxy, xDy in plane
958  MFEM_SHARED double DBu[NBZ][max_Q1D][max_Q1D][2];
960  {
962  {
963  const double O1 = op(qx,qy,0,e);
964  const double O2 = op(qx,qy,1,e);
965
966  const double X = BBu[tidz][qy][qx];
967
968  DBu[tidz][qy][qx][0] = O1 * X;
969  DBu[tidz][qy][qx][1] = O2 * X;
970  }
971  }
973  MFEM_SHARED double GDBu[NBZ][max_D1D][max_Q1D][2];
975  {
977  {
978  GDBu[tidz][dy][qx][0] = 0.0;
979  GDBu[tidz][dy][qx][1] = 0.0;
980  for (int qy = 0; qy < Q1D; ++qy)
981  {
982  const double by = Bt(dy,qy);
983  const double gy = Gt(dy,qy);
984  GDBu[tidz][dy][qx][0] += by * DBu[tidz][qy][qx][0];
985  GDBu[tidz][dy][qx][1] += gy * DBu[tidz][qy][qx][1];
986  }
987  }
988  }
991  {
993  {
994  double res = 0.0;
995  for (int qx = 0; qx < Q1D; ++qx)
996  {
997  const double bx = Bt(dx,qx);
998  const double gx = Gt(dx,qx);
999  res += gx * GDBu[tidz][dy][qx][0] + bx * GDBu[tidz][dy][qx][1];
1000  }
1001  y(dx,dy,e) += res;
1002  }
1003  }
1004  });
1005 }
1006
1007 // PA Convection Apply 3D kernel
1008 template<int T_D1D = 0, int T_Q1D = 0> static
1009 void PAConvectionApplyT3D(const int ne,
1010  const Array<double> &b,
1011  const Array<double> &g,
1012  const Array<double> &bt,
1013  const Array<double> &gt,
1014  const Vector &op_,
1015  const Vector &x_,
1016  Vector &y_,
1017  const int d1d = 0,
1018  const int q1d = 0)
1019 {
1020  const int NE = ne;
1021  const int D1D = T_D1D ? T_D1D : d1d;
1022  const int Q1D = T_Q1D ? T_Q1D : q1d;
1023  MFEM_VERIFY(D1D <= MAX_D1D, "");
1024  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
1025  auto B = Reshape(b.Read(), Q1D, D1D);
1026  auto Bt = Reshape(bt.Read(), D1D, Q1D);
1027  auto Gt = Reshape(gt.Read(), D1D, Q1D);
1028  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1029  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1030  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1031  MFEM_FORALL(e, NE,
1032  {
1033  const int D1D = T_D1D ? T_D1D : d1d;
1034  const int Q1D = T_Q1D ? T_Q1D : q1d;
1035  // the following variables are evaluated at compile time
1036  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
1037  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
1038
1039  double u[max_D1D][max_D1D][max_D1D];
1040  for (int dz = 0; dz < D1D; ++dz)
1041  {
1042  for (int dy = 0; dy < D1D; ++dy)
1043  {
1044  for (int dx = 0; dx < D1D; ++dx)
1045  {
1046  u[dz][dy][dx] = x(dx,dy,dz,e);
1047  }
1048  }
1049  }
1050  double Bu[max_D1D][max_D1D][max_Q1D];
1051  for (int dz = 0; dz < D1D; ++dz)
1052  {
1053  for (int dy = 0; dy < D1D; ++dy)
1054  {
1055  for (int qx = 0; qx < Q1D; ++qx)
1056  {
1057  Bu[dz][dy][qx] = 0.0;
1058  for (int dx = 0; dx < D1D; ++dx)
1059  {
1060  const double bx = B(qx,dx);
1061  const double x = u[dz][dy][dx];
1062  Bu[dz][dy][qx] += bx * x;
1063  }
1064  }
1065  }
1066  }
1067  double BBu[max_D1D][max_Q1D][max_Q1D];
1068  for (int dz = 0; dz < D1D; ++dz)
1069  {
1070  for (int qx = 0; qx < Q1D; ++qx)
1071  {
1072  for (int qy = 0; qy < Q1D; ++qy)
1073  {
1074  BBu[dz][qy][qx] = 0.0;
1075  for (int dy = 0; dy < D1D; ++dy)
1076  {
1077  const double bx = B(qy,dy);
1078  BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
1079  }
1080  }
1081  }
1082  }
1083  double BBBu[max_Q1D][max_Q1D][max_Q1D];
1084  for (int qx = 0; qx < Q1D; ++qx)
1085  {
1086  for (int qy = 0; qy < Q1D; ++qy)
1087  {
1088  for (int qz = 0; qz < Q1D; ++qz)
1089  {
1090  BBBu[qz][qy][qx] = 0.0;
1091  for (int dz = 0; dz < D1D; ++dz)
1092  {
1093  const double bx = B(qz,dz);
1094  BBBu[qz][qy][qx] += bx * BBu[dz][qy][qx];
1095  }
1096  }
1097  }
1098  }
1099  // Calculate Dxy, xDy in plane
1100  double DBu[max_Q1D][max_Q1D][max_Q1D][3];
1101  for (int qz = 0; qz < Q1D; ++qz)
1102  {
1103  for (int qy = 0; qy < Q1D; ++qy)
1104  {
1105  for (int qx = 0; qx < Q1D; ++qx)
1106  {
1107  const double O1 = op(qx,qy,qz,0,e);
1108  const double O2 = op(qx,qy,qz,1,e);
1109  const double O3 = op(qx,qy,qz,2,e);
1110
1111  const double X = BBBu[qz][qy][qx];
1112
1113  DBu[qz][qy][qx][0] = O1 * X;
1114  DBu[qz][qy][qx][1] = O2 * X;
1115  DBu[qz][qy][qx][2] = O3 * X;
1116  }
1117  }
1118  }
1119  double GDBu[max_D1D][max_Q1D][max_Q1D][3];
1120  for (int qx = 0; qx < Q1D; ++qx)
1121  {
1122  for (int qy = 0; qy < Q1D; ++qy)
1123  {
1124  for (int dz = 0; dz < D1D; ++dz)
1125  {
1126  GDBu[dz][qy][qx][0] = 0.0;
1127  GDBu[dz][qy][qx][1] = 0.0;
1128  GDBu[dz][qy][qx][2] = 0.0;
1129  for (int qz = 0; qz < Q1D; ++qz)
1130  {
1131  const double bz = Bt(dz,qz);
1132  const double gz = Gt(dz,qz);
1133  GDBu[dz][qy][qx][0] += bz * DBu[qz][qy][qx][0];
1134  GDBu[dz][qy][qx][1] += bz * DBu[qz][qy][qx][1];
1135  GDBu[dz][qy][qx][2] += gz * DBu[qz][qy][qx][2];
1136  }
1137  }
1138  }
1139  }
1140  double GGDBu[max_D1D][max_D1D][max_Q1D][3];
1141  for (int dz = 0; dz < D1D; ++dz)
1142  {
1143  for (int qx = 0; qx < Q1D; ++qx)
1144  {
1145  for (int dy = 0; dy < D1D; ++dy)
1146  {
1147  GGDBu[dz][dy][qx][0] = 0.0;
1148  GGDBu[dz][dy][qx][1] = 0.0;
1149  GGDBu[dz][dy][qx][2] = 0.0;
1150  for (int qy = 0; qy < Q1D; ++qy)
1151  {
1152  const double by = Bt(dy,qy);
1153  const double gy = Gt(dy,qy);
1154  GGDBu[dz][dy][qx][0] += by * GDBu[dz][qy][qx][0];
1155  GGDBu[dz][dy][qx][1] += gy * GDBu[dz][qy][qx][1];
1156  GGDBu[dz][dy][qx][2] += by * GDBu[dz][qy][qx][2];
1157  }
1158  }
1159  }
1160  }
1161  for (int dz = 0; dz < D1D; ++dz)
1162  {
1163  for (int dy = 0; dy < D1D; ++dy)
1164  {
1165  for (int dx = 0; dx < D1D; ++dx)
1166  {
1167  double res = 0.0;
1168  for (int qx = 0; qx < Q1D; ++qx)
1169  {
1170  const double bx = Bt(dx,qx);
1171  const double gx = Gt(dx,qx);
1172  res += gx * GGDBu[dz][dy][qx][0];
1173  res += bx * GGDBu[dz][dy][qx][1];
1174  res += bx * GGDBu[dz][dy][qx][2];
1175  }
1176  y(dx,dy,dz,e) += res;
1177  }
1178  }
1179  }
1180  });
1181 }
1182
1183 // Optimized PA Convection Apply 3D kernel
1184 template<int T_D1D = 0, int T_Q1D = 0> static
1185 void SmemPAConvectionApplyT3D(const int ne,
1186  const Array<double> &b,
1187  const Array<double> &g,
1188  const Array<double> &bt,
1189  const Array<double> &gt,
1190  const Vector &op_,
1191  const Vector &x_,
1192  Vector &y_,
1193  const int d1d = 0,
1194  const int q1d = 0)
1195 {
1196  const int NE = ne;
1197  const int D1D = T_D1D ? T_D1D : d1d;
1198  const int Q1D = T_Q1D ? T_Q1D : q1d;
1199  MFEM_VERIFY(D1D <= MAX_D1D, "");
1200  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
1201  auto B = Reshape(b.Read(), Q1D, D1D);
1202  auto Bt = Reshape(bt.Read(), D1D, Q1D);
1203  auto Gt = Reshape(gt.Read(), D1D, Q1D);
1204  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1205  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1206  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1207  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1208  {
1209  const int D1D = T_D1D ? T_D1D : d1d;
1210  const int Q1D = T_Q1D ? T_Q1D : q1d;
1211  // the following variables are evaluated at compile time
1212  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
1213  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
1214  constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
1215  MFEM_SHARED double sm0[3*max_DQ*max_DQ*max_DQ];
1216  MFEM_SHARED double sm1[3*max_DQ*max_DQ*max_DQ];
1217
1218  double (*u)[max_D1D][max_D1D] = (double (*)[max_D1D][max_D1D]) sm0;
1220  {
1222  {
1224  {
1225  u[dz][dy][dx] = x(dx,dy,dz,e);
1226  }
1227  }
1228  }
1230  double (*Bu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm1;
1232  {
1234  {
1236  {
1237  double Bu_ = 0.0;
1238  for (int dx = 0; dx < D1D; ++dx)
1239  {
1240  const double bx = B(qx,dx);
1241  const double x = u[dz][dy][dx];
1242  Bu_ += bx * x;
1243  }
1244  Bu[dz][dy][qx] = Bu_;
1245  }
1246  }
1247  }
1249  double (*BBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm0;
1251  {
1253  {
1255  {
1256  double BBu_ = 0.0;
1257  for (int dy = 0; dy < D1D; ++dy)
1258  {
1259  const double bx = B(qy,dy);
1260  BBu_ += bx * Bu[dz][dy][qx];
1261  }
1262  BBu[dz][qy][qx] = BBu_;
1263  }
1264  }
1265  }
1267  double (*BBBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm1;
1269  {
1271  {
1273  {
1274  double BBBu_ = 0.0;
1275  for (int dz = 0; dz < D1D; ++dz)
1276  {
1277  const double bx = B(qz,dz);
1278  BBBu_ += bx * BBu[dz][qy][qx];
1279  }
1280  BBBu[qz][qy][qx] = BBBu_;
1281  }
1282  }
1283  }
1285  double (*DBu)[max_Q1D][max_Q1D][3] = (double (*)[max_Q1D][max_Q1D][3])sm0;
1287  {
1289  {
1291  {
1292  const double O1 = op(qx,qy,qz,0,e);
1293  const double O2 = op(qx,qy,qz,1,e);
1294  const double O3 = op(qx,qy,qz,2,e);
1295
1296  const double X = BBBu[qz][qy][qx];
1297
1298  DBu[qz][qy][qx][0] = O1 * X;
1299  DBu[qz][qy][qx][1] = O2 * X;
1300  DBu[qz][qy][qx][2] = O3 * X;
1301  }
1302  }
1303  }
1305  double (*GDBu)[max_Q1D][max_Q1D][3] = (double (*)[max_Q1D][max_Q1D][3])sm1;
1307  {
1309  {
1311  {
1312  double GDBu0 = 0.0;
1313  double GDBu1 = 0.0;
1314  double GDBu2 = 0.0;
1315  for (int qz = 0; qz < Q1D; ++qz)
1316  {
1317  const double bz = Bt(dz,qz);
1318  const double gz = Gt(dz,qz);
1319  GDBu0 += bz * DBu[qz][qy][qx][0];
1320  GDBu1 += bz * DBu[qz][qy][qx][1];
1321  GDBu2 += gz * DBu[qz][qy][qx][2];
1322  }
1323  GDBu[dz][qy][qx][0] = GDBu0;
1324  GDBu[dz][qy][qx][1] = GDBu1;
1325  GDBu[dz][qy][qx][2] = GDBu2;
1326  }
1327  }
1328  }
1330  double (*GGDBu)[max_D1D][max_Q1D][3] = (double (*)[max_D1D][max_Q1D][3])sm0;
1332  {
1334  {
1336  {
1337  double GGDBu0 = 0.0;
1338  double GGDBu1 = 0.0;
1339  double GGDBu2 = 0.0;
1340  for (int qy = 0; qy < Q1D; ++qy)
1341  {
1342  const double by = Bt(dy,qy);
1343  const double gy = Gt(dy,qy);
1344  GGDBu0 += by * GDBu[dz][qy][qx][0];
1345  GGDBu1 += gy * GDBu[dz][qy][qx][1];
1346  GGDBu2 += by * GDBu[dz][qy][qx][2];
1347  }
1348  GGDBu[dz][dy][qx][0] = GGDBu0;
1349  GGDBu[dz][dy][qx][1] = GGDBu1;
1350  GGDBu[dz][dy][qx][2] = GGDBu2;
1351  }
1352  }
1353  }
1356  {
1358  {
1360  {
1361  double res = 0.0;
1362  for (int qx = 0; qx < Q1D; ++qx)
1363  {
1364  const double bx = Bt(dx,qx);
1365  const double gx = Gt(dx,qx);
1366  res += gx * GGDBu[dz][dy][qx][0];
1367  res += bx * GGDBu[dz][dy][qx][1];
1368  res += bx * GGDBu[dz][dy][qx][2];
1369  }
1370  y(dx,dy,dz,e) += res;
1371  }
1372  }
1373  }
1374  });
1375 }
1376
1378 {
1379  const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
1381  // Assumes tensor-product elements
1382  Mesh *mesh = fes.GetMesh();
1383  const FiniteElement &el = *fes.GetFE(0);
1385  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans);
1386  if (DeviceCanUseCeed())
1387  {
1388  delete ceedOp;
1389  ceedOp = new ceed::PAConvectionIntegrator(fes, *ir, Q, alpha);
1390  return;
1391  }
1392  const int dims = el.GetDim();
1393  const int symmDims = dims;
1394  nq = ir->GetNPoints();
1395  dim = mesh->Dimension();
1396  ne = fes.GetNE();
1399  dofs1D = maps->ndof;
1401  pa_data.SetSize(symmDims * nq * ne, mt);
1402  Vector vel;
1403  if (VectorConstantCoefficient *cQ =
1404  dynamic_cast<VectorConstantCoefficient*>(Q))
1405  {
1406  vel = cQ->GetVec();
1407  }
1408  else if (VectorGridFunctionCoefficient *vgfQ =
1409  dynamic_cast<VectorGridFunctionCoefficient*>(Q))
1410  {
1411  vel.SetSize(dim * nq * ne, mt);
1412
1413  const GridFunction *gf = vgfQ->GetGridFunction();
1414  const FiniteElementSpace &gf_fes = *gf->FESpace();
1416  const bool use_tensor_products = UsesTensorBasis(gf_fes);
1417  const ElementDofOrdering ordering = use_tensor_products ?
1420  const Operator *R = gf_fes.GetElementRestriction(ordering);
1421
1422  Vector xe(R->Height(), mt);
1423  xe.UseDevice(true);
1424
1425  R->Mult(*gf, xe);
1426  qi->SetOutputLayout(QVectorLayout::byVDIM);
1427  qi->DisableTensorProducts(!use_tensor_products);
1428  qi->Values(xe,vel);
1429  }
1430  else if (VectorQuadratureFunctionCoefficient* vqfQ =
1432  {
1434  MFEM_VERIFY(qFun.Size() == dim * nq * ne,
1436
1437  MFEM_VERIFY(ir == &qFun.GetSpace()->GetElementIntRule(0),
1438  "IntegrationRule used within integrator and in"
1439  " QuadratureFunction appear to be different");
1440
1443  }
1444  else
1445  {
1446  vel.SetSize(dim * nq * ne);
1447  auto C = Reshape(vel.HostWrite(), dim, nq, ne);
1448  DenseMatrix MQ_ir;
1449  for (int e = 0; e < ne; ++e)
1450  {
1452  Q->Eval(MQ_ir, T, *ir);
1453  for (int q = 0; q < nq; ++q)
1454  {
1455  for (int i = 0; i < dim; ++i)
1456  {
1457  C(i,q,e) = MQ_ir(i,q);
1458  }
1459  }
1460  }
1461  }
1462  PAConvectionSetup(dim, nq, ne, ir->GetWeights(), geom->J,
1463  vel, alpha, pa_data);
1464 }
1465
1466 static void PAConvectionApply(const int dim,
1467  const int D1D,
1468  const int Q1D,
1469  const int NE,
1470  const Array<double> &B,
1471  const Array<double> &G,
1472  const Array<double> &Bt,
1473  const Array<double> &Gt,
1474  const Vector &op,
1475  const Vector &x,
1476  Vector &y)
1477 {
1478  if (dim == 2)
1479  {
1480  switch ((D1D << 4 ) | Q1D)
1481  {
1482  case 0x22: return SmemPAConvectionApply2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1483  case 0x33: return SmemPAConvectionApply2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1484  case 0x34: return SmemPAConvectionApply2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1485  case 0x44: return SmemPAConvectionApply2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1486  case 0x46: return SmemPAConvectionApply2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1487  case 0x55: return SmemPAConvectionApply2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1488  case 0x58: return SmemPAConvectionApply2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1489  case 0x66: return SmemPAConvectionApply2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1490  case 0x77: return SmemPAConvectionApply2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1491  case 0x88: return SmemPAConvectionApply2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1492  case 0x99: return SmemPAConvectionApply2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1493  default: return PAConvectionApply2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1494  }
1495  }
1496  else if (dim == 3)
1497  {
1498  switch ((D1D << 4 ) | Q1D)
1499  {
1500  case 0x23: return SmemPAConvectionApply3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1501  case 0x24: return SmemPAConvectionApply3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1502  case 0x26: return SmemPAConvectionApply3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1503  case 0x34: return SmemPAConvectionApply3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1504  case 0x35: return SmemPAConvectionApply3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1505  case 0x45: return SmemPAConvectionApply3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1506  case 0x48: return SmemPAConvectionApply3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1507  case 0x56: return SmemPAConvectionApply3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1508  case 0x67: return SmemPAConvectionApply3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1509  case 0x78: return SmemPAConvectionApply3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1510  case 0x89: return SmemPAConvectionApply3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1511  default: return PAConvectionApply3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1512  }
1513  }
1514  MFEM_ABORT("Unknown kernel.");
1515 }
1516
1517 static void PAConvectionApplyT(const int dim,
1518  const int D1D,
1519  const int Q1D,
1520  const int NE,
1521  const Array<double> &B,
1522  const Array<double> &G,
1523  const Array<double> &Bt,
1524  const Array<double> &Gt,
1525  const Vector &op,
1526  const Vector &x,
1527  Vector &y)
1528 {
1529  if (dim == 2)
1530  {
1531  switch ((D1D << 4 ) | Q1D)
1532  {
1533  case 0x22: return SmemPAConvectionApplyT2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1534  case 0x33: return SmemPAConvectionApplyT2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1535  case 0x34: return SmemPAConvectionApplyT2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1536  case 0x44: return SmemPAConvectionApplyT2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1537  case 0x46: return SmemPAConvectionApplyT2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1538  case 0x55: return SmemPAConvectionApplyT2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1539  case 0x58: return SmemPAConvectionApplyT2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1540  case 0x66: return SmemPAConvectionApplyT2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1541  case 0x77: return SmemPAConvectionApplyT2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1542  case 0x88: return SmemPAConvectionApplyT2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1543  case 0x99: return SmemPAConvectionApplyT2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1544  default: return PAConvectionApplyT2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1545  }
1546  }
1547  else if (dim == 3)
1548  {
1549  switch ((D1D << 4 ) | Q1D)
1550  {
1551  case 0x23: return SmemPAConvectionApplyT3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1552  case 0x24: return SmemPAConvectionApplyT3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1553  case 0x26: return SmemPAConvectionApplyT3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1554  case 0x34: return SmemPAConvectionApplyT3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1555  case 0x35: return SmemPAConvectionApplyT3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1556  case 0x45: return SmemPAConvectionApplyT3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1557  case 0x48: return SmemPAConvectionApplyT3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1558  case 0x56: return SmemPAConvectionApplyT3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1559  case 0x67: return SmemPAConvectionApplyT3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1560  case 0x78: return SmemPAConvectionApplyT3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1561  case 0x89: return SmemPAConvectionApplyT3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1562  default: return PAConvectionApplyT3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1563  }
1564  }
1565  MFEM_ABORT("Unknown kernel.");
1566 }
1567
1568 // PA Convection Apply kernel
1570 {
1571  if (DeviceCanUseCeed())
1572  {
1574  }
1575  else
1576  {
1578  maps->B, maps->G, maps->Bt, maps->Gt,
1579  pa_data, x, y);
1580  }
1581 }
1582
1583 // PA Convection Apply transpose kernel
1585 {
1586  if (DeviceCanUseCeed())
1587  {
1588  MFEM_ABORT("AddMultPA not yet implemented with libCEED for"
1589  " ConvectionIntegrator.");
1590  }
1591  else
1592  {
1594  maps->B, maps->G, maps->Bt, maps->Gt,
1595  pa_data, x, y);
1596  }
1597 }
1598
1600 {
1601  if (DeviceCanUseCeed())
1602  {
1603  ceedOp->GetDiagonal(diag);
1604  }
1605  else
1606  {
1607  MFEM_ABORT("AssembleDiagonalPA not yet implemented for"
1608  " ConvectionIntegrator.");
1609  }
1610 }
1611
1612 } // namespace mfem
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Abstract class for all finite elements.
Definition: fe_base.hpp:235
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe_base.hpp:311
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
Class for grid function - Vector with associated FE space.
Definition: gridfunc.hpp:30
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition: fe_base.hpp:162
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:840
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
Definition: fespace.hpp:976
virtual void Eval(Vector &V, ElementTransformation &T, const IntegrationPoint &ip)=0
Evaluate the vector coefficient in the element described by T at the point ip, storing the result in ...
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:521
const GeometricFactors * geom
Not owned.
Vector quadrature function coefficient which requires that the quadrature rules used for this vector ...
Vector coefficient that is constant in space and time.
Data type dense matrix using column-major storage.
Definition: densemat.hpp:23
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition: fe_base.hpp:174
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:450
int Size() const
Returns the size of the vector.
Definition: vector.hpp:199
Array< double > Gt
Transpose of G.
Definition: fe_base.hpp:213
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe_base.hpp:170
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
constexpr int DIM
bool UsesTensorBasis(const FiniteElementSpace &fes)
Definition: fespace.hpp:983
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition: vector.hpp:117
virtual void AssembleDiagonalPA(Vector &diag)
Assemble diagonal and add it to Vector diag.
const Operator * GetElementRestriction(ElementDofOrdering e_ordering) const
Return an Operator that converts L-vectors to E-vectors.
Definition: fespace.cpp:1261
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:590
Vector J
Jacobians of the element transformations at all quadrature points.
Definition: mesh.hpp:1855
Native ordering as defined by the FiniteElement.
const int MAX_Q1D
Definition: forall.hpp:29
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition: operator.hpp:66
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:433
A class that performs interpolation from an E-vector to quadrature point values and/or derivatives (Q...
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:273
Definition: gridfunc.hpp:798
FiniteElementSpace * FESpace()
Definition: gridfunc.hpp:652
int Dimension() const
Definition: mesh.hpp:999
virtual void AddMultPA(const Vector &, Vector &) const
Method for partially assembled action.
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:623
Array< double > Bt
Transpose of B.
Definition: fe_base.hpp:191
virtual void AssemblePA(const FiniteElementSpace &)
Method defining partial assembly.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:88
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
static const IntegrationRule & GetRule(const FiniteElement &el, ElementTransformation &Trans)
const IntegrationRule * IntRule
Definition: nonlininteg.hpp:30
Array< double > B
Basis functions evaluated at quadrature points.
Definition: fe_base.hpp:185
virtual void AddMultTransposePA(const Vector &x, Vector &y) const
Method for partially assembled transposed action.
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:366
Return a QuadratureInterpolator that interpolates E-vectors to quadrature point values and/or derivat...
Definition: fespace.cpp:1329
ElementDofOrdering
Constants describing the possible orderings of the DOFs in one element.
Definition: fespace.hpp:66
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:28
Array< double > G
Definition: fe_base.hpp:206
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2783
constexpr int SDIM
Lexicographic ordering for tensor-product FiniteElements.
const double alpha
Definition: ex15.cpp:369
Vector data type.
Definition: vector.hpp:60
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
Definition: vector.hpp:585
Vector coefficient defined by a vector GridFunction.
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
Abstract operator.
Definition: operator.hpp:24
Class representing a function through its values (scalar or vector) at quadrature points...
Definition: gridfunc.hpp:757
virtual const double * Read(bool on_dev=true) const