MFEM  v4.5.2
Finite element discretization library
bilininteg_convection_pa.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 #include "qfunction.hpp"
17 #include "quadinterpolator.hpp"
18 
19 namespace mfem
20 {
21 
22 // PA Convection Integrator
23 
24 // PA Convection Assemble 2D kernel
25 static void PAConvectionSetup2D(const int NQ,
26  const int NE,
27  const Array<double> &w,
28  const Vector &j,
29  const Vector &vel,
30  const double alpha,
31  Vector &op)
32 {
33  constexpr int DIM = 2;
34 
35  const bool const_v = vel.Size() == DIM;
36 
37  const auto W = w.Read();
38  const auto J = Reshape(j.Read(), NQ,DIM,DIM,NE);
39  const auto V = const_v ?
40  Reshape(vel.Read(), DIM,1,1) :
41  Reshape(vel.Read(), DIM,NQ,NE);
42  auto y = Reshape(op.Write(), NQ,DIM,NE);
43 
44  MFEM_FORALL(q_global, NE*NQ,
45  {
46  const int e = q_global / NQ;
47  const int q = q_global % NQ;
48  const double J11 = J(q,0,0,e);
49  const double J21 = J(q,1,0,e);
50  const double J12 = J(q,0,1,e);
51  const double J22 = J(q,1,1,e);
52  const double w = alpha * W[q];
53  const double v0 = const_v ? V(0,0,0) : V(0,q,e);
54  const double v1 = const_v ? V(1,0,0) : V(1,q,e);
55  const double wx = w * v0;
56  const double wy = w * v1;
57  // y = alpha * W * det(J) * J^{-1} . v = adj(J) . { wx, wy }
58  y(q,0,e) = wx * J22 - wy * J12; // 1
59  y(q,1,e) = -wx * J21 + wy * J11; // 2
60  });
61 }
62 
63 // PA Convection Assemble 3D kernel
64 static void PAConvectionSetup3D(const int NQ,
65  const int NE,
66  const Array<double> &w,
67  const Vector &j,
68  const Vector &vel,
69  const double alpha,
70  Vector &op)
71 {
72  constexpr int DIM = 3;
73  constexpr int SDIM = DIM;
74  const auto W = Reshape(w.Read(), NQ);
75  const auto J = Reshape(j.Read(), NQ,SDIM,DIM,NE);
76  const bool const_v = vel.Size() == DIM;
77  const auto V = const_v ?
78  Reshape(vel.Read(), 3,1,1) :
79  Reshape(vel.Read(), 3,NQ,NE);
80  auto y = Reshape(op.Write(), NQ,3,NE);
81  MFEM_FORALL(q_global, NE*NQ,
82  {
83  const int e = q_global / NQ;
84  const int q = q_global % NQ;
85  const double J11 = J(q,0,0,e);
86  const double J12 = J(q,0,1,e);
87  const double J13 = J(q,0,2,e);
88  const double J21 = J(q,1,0,e);
89  const double J22 = J(q,1,1,e);
90  const double J23 = J(q,1,2,e);
91  const double J31 = J(q,2,0,e);
92  const double J32 = J(q,2,1,e);
93  const double J33 = J(q,2,2,e);
94  const double w = alpha * W(q);
95  const double v0 = const_v ? V(0,0,0) : V(0,q,e);
96  const double v1 = const_v ? V(1,0,0) : V(1,q,e);
97  const double v2 = const_v ? V(2,0,0) : V(2,q,e);
98  const double wx = w * v0;
99  const double wy = w * v1;
100  const double wz = w * v2;
101  // A = adj(J)
102  const double A11 = (J22 * J33) - (J23 * J32);
103  const double A12 = (J32 * J13) - (J12 * J33);
104  const double A13 = (J12 * J23) - (J22 * J13);
105  const double A21 = (J31 * J23) - (J21 * J33);
106  const double A22 = (J11 * J33) - (J13 * J31);
107  const double A23 = (J21 * J13) - (J11 * J23);
108  const double A31 = (J21 * J32) - (J31 * J22);
109  const double A32 = (J31 * J12) - (J11 * J32);
110  const double A33 = (J11 * J22) - (J12 * J21);
111  // y = alpha * W * det(J) * J^{-1} . v = adj(J) . { wx, wy, wz }
112  y(q,0,e) = wx * A11 + wy * A12 + wz * A13;
113  y(q,1,e) = wx * A21 + wy * A22 + wz * A23;
114  y(q,2,e) = wx * A31 + wy * A32 + wz * A33;
115  });
116 }
117 
118 static void PAConvectionSetup(const int dim,
119  const int NQ,
120  const int NE,
121  const Array<double> &W,
122  const Vector &J,
123  const Vector &coeff,
124  const double alpha,
125  Vector &op)
126 {
127  if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAConvectionSetup"); }
128  if (dim == 2)
129  {
130  PAConvectionSetup2D(NQ, NE, W, J, coeff, alpha, op);
131  }
132  if (dim == 3)
133  {
134  PAConvectionSetup3D(NQ, NE, W, J, coeff, alpha, op);
135  }
136 }
137 
138 // PA Convection Apply 2D kernel
139 template<int T_D1D = 0, int T_Q1D = 0> static
140 void PAConvectionApply2D(const int ne,
141  const Array<double> &b,
142  const Array<double> &g,
143  const Array<double> &bt,
144  const Array<double> &gt,
145  const Vector &op_,
146  const Vector &x_,
147  Vector &y_,
148  const int d1d = 0,
149  const int q1d = 0)
150 {
151  const int NE = ne;
152  const int D1D = T_D1D ? T_D1D : d1d;
153  const int Q1D = T_Q1D ? T_Q1D : q1d;
154  MFEM_VERIFY(D1D <= MAX_D1D, "");
155  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
156  auto B = Reshape(b.Read(), Q1D, D1D);
157  auto G = Reshape(g.Read(), Q1D, D1D);
158  auto Bt = Reshape(bt.Read(), D1D, Q1D);
159  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
160  auto x = Reshape(x_.Read(), D1D, D1D, NE);
161  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
162  MFEM_FORALL(e, NE,
163  {
164  const int D1D = T_D1D ? T_D1D : d1d;
165  const int Q1D = T_Q1D ? T_Q1D : q1d;
166  // the following variables are evaluated at compile time
167  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
168  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
169 
170  double u[max_D1D][max_D1D];
171  for (int dy = 0; dy < D1D; ++dy)
172  {
173  for (int dx = 0; dx < D1D; ++dx)
174  {
175  u[dy][dx] = x(dx,dy,e);
176  }
177  }
178  double Bu[max_D1D][max_Q1D];
179  double Gu[max_D1D][max_Q1D];
180  for (int dy = 0; dy < D1D; ++dy)
181  {
182  for (int qx = 0; qx < Q1D; ++qx)
183  {
184  Bu[dy][qx] = 0.0;
185  Gu[dy][qx] = 0.0;
186  for (int dx = 0; dx < D1D; ++dx)
187  {
188  const double bx = B(qx,dx);
189  const double gx = G(qx,dx);
190  const double x = u[dy][dx];
191  Bu[dy][qx] += bx * x;
192  Gu[dy][qx] += gx * x;
193  }
194  }
195  }
196  double GBu[max_Q1D][max_Q1D];
197  double BGu[max_Q1D][max_Q1D];
198  for (int qx = 0; qx < Q1D; ++qx)
199  {
200  for (int qy = 0; qy < Q1D; ++qy)
201  {
202  GBu[qy][qx] = 0.0;
203  BGu[qy][qx] = 0.0;
204  for (int dy = 0; dy < D1D; ++dy)
205  {
206  const double bx = B(qy,dy);
207  const double gx = G(qy,dy);
208  GBu[qy][qx] += gx * Bu[dy][qx];
209  BGu[qy][qx] += bx * Gu[dy][qx];
210  }
211  }
212  }
213  // Calculate Dxy, xDy in plane
214  double DGu[max_Q1D][max_Q1D];
215  for (int qy = 0; qy < Q1D; ++qy)
216  {
217  for (int qx = 0; qx < Q1D; ++qx)
218  {
219  const double O1 = op(qx,qy,0,e);
220  const double O2 = op(qx,qy,1,e);
221 
222  const double gradX = BGu[qy][qx];
223  const double gradY = GBu[qy][qx];
224 
225  DGu[qy][qx] = (O1 * gradX) + (O2 * gradY);
226  }
227  }
228  double BDGu[max_D1D][max_Q1D];
229  for (int qx = 0; qx < Q1D; ++qx)
230  {
231  for (int dy = 0; dy < D1D; ++dy)
232  {
233  BDGu[dy][qx] = 0.0;
234  for (int qy = 0; qy < Q1D; ++qy)
235  {
236  const double w = Bt(dy,qy);
237  BDGu[dy][qx] += w * DGu[qy][qx];
238  }
239  }
240  }
241  for (int dx = 0; dx < D1D; ++dx)
242  {
243  for (int dy = 0; dy < D1D; ++dy)
244  {
245  double BBDGu = 0.0;
246  for (int qx = 0; qx < Q1D; ++qx)
247  {
248  const double w = Bt(dx,qx);
249  BBDGu += w * BDGu[dy][qx];
250  }
251  y(dx,dy,e) += BBDGu;
252  }
253  }
254  });
255 }
256 
257 // Optimized PA Convection Apply 2D kernel
258 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
259 void SmemPAConvectionApply2D(const int ne,
260  const Array<double> &b,
261  const Array<double> &g,
262  const Array<double> &bt,
263  const Array<double> &gt,
264  const Vector &op_,
265  const Vector &x_,
266  Vector &y_,
267  const int d1d = 0,
268  const int q1d = 0)
269 {
270  const int NE = ne;
271  const int D1D = T_D1D ? T_D1D : d1d;
272  const int Q1D = T_Q1D ? T_Q1D : q1d;
273  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
274  MFEM_VERIFY(D1D <= MAX_D1D, "");
275  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
276  auto B = Reshape(b.Read(), Q1D, D1D);
277  auto G = Reshape(g.Read(), Q1D, D1D);
278  auto Bt = Reshape(bt.Read(), D1D, Q1D);
279  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
280  auto x = Reshape(x_.Read(), D1D, D1D, NE);
281  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
282  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
283  {
284  const int tidz = MFEM_THREAD_ID(z);
285  const int D1D = T_D1D ? T_D1D : d1d;
286  const int Q1D = T_Q1D ? T_Q1D : q1d;
287  // the following variables are evaluated at compile time
288  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
289  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
290  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
291  // constexpr int MDQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
292  MFEM_SHARED double u[NBZ][max_D1D][max_D1D];
293  MFEM_FOREACH_THREAD(dy,y,D1D)
294  {
295  MFEM_FOREACH_THREAD(dx,x,D1D)
296  {
297  // e is really equal to e+tidz
298  u[tidz][dy][dx] = x(dx,dy,e);
299  }
300  }
301  MFEM_SYNC_THREAD;
302  MFEM_SHARED double Bu[NBZ][max_D1D][max_Q1D];
303  MFEM_SHARED double Gu[NBZ][max_D1D][max_Q1D];
304  MFEM_FOREACH_THREAD(dy,y,D1D)
305  {
306  MFEM_FOREACH_THREAD(qx,x,Q1D)
307  {
308  Bu[tidz][dy][qx] = 0.0;
309  Gu[tidz][dy][qx] = 0.0;
310  for (int dx = 0; dx < D1D; ++dx)
311  {
312  const double bx = B(qx,dx);
313  const double gx = G(qx,dx);
314  const double x = u[tidz][dy][dx];
315  Bu[tidz][dy][qx] += bx * x;
316  Gu[tidz][dy][qx] += gx * x;
317  }
318  }
319  }
320  MFEM_SYNC_THREAD;
321  MFEM_SHARED double GBu[NBZ][max_Q1D][max_Q1D];
322  MFEM_SHARED double BGu[NBZ][max_Q1D][max_Q1D];
323  MFEM_FOREACH_THREAD(qx,x,Q1D)
324  {
325  MFEM_FOREACH_THREAD(qy,y,Q1D)
326  {
327  GBu[tidz][qy][qx] = 0.0;
328  BGu[tidz][qy][qx] = 0.0;
329  for (int dy = 0; dy < D1D; ++dy)
330  {
331  const double bx = B(qy,dy);
332  const double gx = G(qy,dy);
333  GBu[tidz][qy][qx] += gx * Bu[tidz][dy][qx];
334  BGu[tidz][qy][qx] += bx * Gu[tidz][dy][qx];
335  }
336  }
337  }
338  MFEM_SYNC_THREAD;
339  // Calculate Dxy, xDy in plane
340  MFEM_SHARED double DGu[NBZ][max_Q1D][max_Q1D];
341  MFEM_FOREACH_THREAD(qy,y,Q1D)
342  {
343  MFEM_FOREACH_THREAD(qx,x,Q1D)
344  {
345  const double O1 = op(qx,qy,0,e);
346  const double O2 = op(qx,qy,1,e);
347 
348  const double gradX = BGu[tidz][qy][qx];
349  const double gradY = GBu[tidz][qy][qx];
350 
351  DGu[tidz][qy][qx] = (O1 * gradX) + (O2 * gradY);
352  }
353  }
354  MFEM_SYNC_THREAD;
355  MFEM_SHARED double BDGu[NBZ][max_D1D][max_Q1D];
356  MFEM_FOREACH_THREAD(qx,x,Q1D)
357  {
358  MFEM_FOREACH_THREAD(dy,y,D1D)
359  {
360  BDGu[tidz][dy][qx] = 0.0;
361  for (int qy = 0; qy < Q1D; ++qy)
362  {
363  const double w = Bt(dy,qy);
364  BDGu[tidz][dy][qx] += w * DGu[tidz][qy][qx];
365  }
366  }
367  }
368  MFEM_SYNC_THREAD;
369  MFEM_FOREACH_THREAD(dx,x,D1D)
370  {
371  MFEM_FOREACH_THREAD(dy,y,D1D)
372  {
373  double BBDGu = 0.0;
374  for (int qx = 0; qx < Q1D; ++qx)
375  {
376  const double w = Bt(dx,qx);
377  BBDGu += w * BDGu[tidz][dy][qx];
378  }
379  y(dx,dy,e) += BBDGu;
380  }
381  }
382  });
383 }
384 
385 // PA Convection Apply 3D kernel
386 template<int T_D1D = 0, int T_Q1D = 0> static
387 void PAConvectionApply3D(const int ne,
388  const Array<double> &b,
389  const Array<double> &g,
390  const Array<double> &bt,
391  const Array<double> &gt,
392  const Vector &op_,
393  const Vector &x_,
394  Vector &y_,
395  const int d1d = 0,
396  const int q1d = 0)
397 {
398  const int NE = ne;
399  const int D1D = T_D1D ? T_D1D : d1d;
400  const int Q1D = T_Q1D ? T_Q1D : q1d;
401  MFEM_VERIFY(D1D <= MAX_D1D, "");
402  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
403  auto B = Reshape(b.Read(), Q1D, D1D);
404  auto G = Reshape(g.Read(), Q1D, D1D);
405  auto Bt = Reshape(bt.Read(), D1D, Q1D);
406  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
407  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
408  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
409  MFEM_FORALL(e, NE,
410  {
411  const int D1D = T_D1D ? T_D1D : d1d;
412  const int Q1D = T_Q1D ? T_Q1D : q1d;
413  // the following variables are evaluated at compile time
414  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
415  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
416 
417  double u[max_D1D][max_D1D][max_D1D];
418  for (int dz = 0; dz < D1D; ++dz)
419  {
420  for (int dy = 0; dy < D1D; ++dy)
421  {
422  for (int dx = 0; dx < D1D; ++dx)
423  {
424  u[dz][dy][dx] = x(dx,dy,dz,e);
425  }
426  }
427  }
428  double Bu[max_D1D][max_D1D][max_Q1D];
429  double Gu[max_D1D][max_D1D][max_Q1D];
430  for (int dz = 0; dz < D1D; ++dz)
431  {
432  for (int dy = 0; dy < D1D; ++dy)
433  {
434  for (int qx = 0; qx < Q1D; ++qx)
435  {
436  Bu[dz][dy][qx] = 0.0;
437  Gu[dz][dy][qx] = 0.0;
438  for (int dx = 0; dx < D1D; ++dx)
439  {
440  const double bx = B(qx,dx);
441  const double gx = G(qx,dx);
442  const double x = u[dz][dy][dx];
443  Bu[dz][dy][qx] += bx * x;
444  Gu[dz][dy][qx] += gx * x;
445  }
446  }
447  }
448  }
449  double BBu[max_D1D][max_Q1D][max_Q1D];
450  double GBu[max_D1D][max_Q1D][max_Q1D];
451  double BGu[max_D1D][max_Q1D][max_Q1D];
452  for (int dz = 0; dz < D1D; ++dz)
453  {
454  for (int qx = 0; qx < Q1D; ++qx)
455  {
456  for (int qy = 0; qy < Q1D; ++qy)
457  {
458  BBu[dz][qy][qx] = 0.0;
459  GBu[dz][qy][qx] = 0.0;
460  BGu[dz][qy][qx] = 0.0;
461  for (int dy = 0; dy < D1D; ++dy)
462  {
463  const double bx = B(qy,dy);
464  const double gx = G(qy,dy);
465  BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
466  GBu[dz][qy][qx] += gx * Bu[dz][dy][qx];
467  BGu[dz][qy][qx] += bx * Gu[dz][dy][qx];
468  }
469  }
470  }
471  }
472  double GBBu[max_Q1D][max_Q1D][max_Q1D];
473  double BGBu[max_Q1D][max_Q1D][max_Q1D];
474  double BBGu[max_Q1D][max_Q1D][max_Q1D];
475  for (int qx = 0; qx < Q1D; ++qx)
476  {
477  for (int qy = 0; qy < Q1D; ++qy)
478  {
479  for (int qz = 0; qz < Q1D; ++qz)
480  {
481  GBBu[qz][qy][qx] = 0.0;
482  BGBu[qz][qy][qx] = 0.0;
483  BBGu[qz][qy][qx] = 0.0;
484  for (int dz = 0; dz < D1D; ++dz)
485  {
486  const double bx = B(qz,dz);
487  const double gx = G(qz,dz);
488  GBBu[qz][qy][qx] += gx * BBu[dz][qy][qx];
489  BGBu[qz][qy][qx] += bx * GBu[dz][qy][qx];
490  BBGu[qz][qy][qx] += bx * BGu[dz][qy][qx];
491  }
492  }
493  }
494  }
495  // Calculate Dxy, xDy in plane
496  double DGu[max_Q1D][max_Q1D][max_Q1D];
497  for (int qz = 0; qz < Q1D; ++qz)
498  {
499  for (int qy = 0; qy < Q1D; ++qy)
500  {
501  for (int qx = 0; qx < Q1D; ++qx)
502  {
503  const double O1 = op(qx,qy,qz,0,e);
504  const double O2 = op(qx,qy,qz,1,e);
505  const double O3 = op(qx,qy,qz,2,e);
506 
507  const double gradX = BBGu[qz][qy][qx];
508  const double gradY = BGBu[qz][qy][qx];
509  const double gradZ = GBBu[qz][qy][qx];
510 
511  DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ);
512  }
513  }
514  }
515  double BDGu[max_D1D][max_Q1D][max_Q1D];
516  for (int qx = 0; qx < Q1D; ++qx)
517  {
518  for (int qy = 0; qy < Q1D; ++qy)
519  {
520  for (int dz = 0; dz < D1D; ++dz)
521  {
522  BDGu[dz][qy][qx] = 0.0;
523  for (int qz = 0; qz < Q1D; ++qz)
524  {
525  const double w = Bt(dz,qz);
526  BDGu[dz][qy][qx] += w * DGu[qz][qy][qx];
527  }
528  }
529  }
530  }
531  double BBDGu[max_D1D][max_D1D][max_Q1D];
532  for (int dz = 0; dz < D1D; ++dz)
533  {
534  for (int qx = 0; qx < Q1D; ++qx)
535  {
536  for (int dy = 0; dy < D1D; ++dy)
537  {
538  BBDGu[dz][dy][qx] = 0.0;
539  for (int qy = 0; qy < Q1D; ++qy)
540  {
541  const double w = Bt(dy,qy);
542  BBDGu[dz][dy][qx] += w * BDGu[dz][qy][qx];
543  }
544  }
545  }
546  }
547  for (int dz = 0; dz < D1D; ++dz)
548  {
549  for (int dy = 0; dy < D1D; ++dy)
550  {
551  for (int dx = 0; dx < D1D; ++dx)
552  {
553  double BBBDGu = 0.0;
554  for (int qx = 0; qx < Q1D; ++qx)
555  {
556  const double w = Bt(dx,qx);
557  BBBDGu += w * BBDGu[dz][dy][qx];
558  }
559  y(dx,dy,dz,e) += BBBDGu;
560  }
561  }
562  }
563  });
564 }
565 
566 // Optimized PA Convection Apply 3D kernel
567 template<int T_D1D = 0, int T_Q1D = 0> static
568 void SmemPAConvectionApply3D(const int ne,
569  const Array<double> &b,
570  const Array<double> &g,
571  const Array<double> &bt,
572  const Array<double> &gt,
573  const Vector &op_,
574  const Vector &x_,
575  Vector &y_,
576  const int d1d = 0,
577  const int q1d = 0)
578 {
579  const int NE = ne;
580  const int D1D = T_D1D ? T_D1D : d1d;
581  const int Q1D = T_Q1D ? T_Q1D : q1d;
582  MFEM_VERIFY(D1D <= MAX_D1D, "");
583  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
584  auto B = Reshape(b.Read(), Q1D, D1D);
585  auto G = Reshape(g.Read(), Q1D, D1D);
586  auto Bt = Reshape(bt.Read(), D1D, Q1D);
587  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
588  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
589  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
590  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
591  {
592  const int D1D = T_D1D ? T_D1D : d1d;
593  const int Q1D = T_Q1D ? T_Q1D : q1d;
594  // the following variables are evaluated at compile time
595  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
596  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
597  constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
598  MFEM_SHARED double sm0[max_DQ*max_DQ*max_DQ];
599  MFEM_SHARED double sm1[max_DQ*max_DQ*max_DQ];
600  MFEM_SHARED double sm2[max_DQ*max_DQ*max_DQ];
601  MFEM_SHARED double sm3[max_DQ*max_DQ*max_DQ];
602  MFEM_SHARED double sm4[max_DQ*max_DQ*max_DQ];
603  MFEM_SHARED double sm5[max_DQ*max_DQ*max_DQ];
604 
605  double (*u)[max_D1D][max_D1D] = (double (*)[max_D1D][max_D1D]) sm0;
606  MFEM_FOREACH_THREAD(dz,z,D1D)
607  {
608  MFEM_FOREACH_THREAD(dy,y,D1D)
609  {
610  MFEM_FOREACH_THREAD(dx,x,D1D)
611  {
612  u[dz][dy][dx] = x(dx,dy,dz,e);
613  }
614  }
615  }
616  MFEM_SYNC_THREAD;
617  double (*Bu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm1;
618  double (*Gu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm2;
619  MFEM_FOREACH_THREAD(dz,z,D1D)
620  {
621  MFEM_FOREACH_THREAD(dy,y,D1D)
622  {
623  MFEM_FOREACH_THREAD(qx,x,Q1D)
624  {
625  double Bu_ = 0.0;
626  double Gu_ = 0.0;
627  for (int dx = 0; dx < D1D; ++dx)
628  {
629  const double bx = B(qx,dx);
630  const double gx = G(qx,dx);
631  const double x = u[dz][dy][dx];
632  Bu_ += bx * x;
633  Gu_ += gx * x;
634  }
635  Bu[dz][dy][qx] = Bu_;
636  Gu[dz][dy][qx] = Gu_;
637  }
638  }
639  }
640  MFEM_SYNC_THREAD;
641  double (*BBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3;
642  double (*GBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4;
643  double (*BGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm5;
644  MFEM_FOREACH_THREAD(dz,z,D1D)
645  {
646  MFEM_FOREACH_THREAD(qx,x,Q1D)
647  {
648  MFEM_FOREACH_THREAD(qy,y,Q1D)
649  {
650  double BBu_ = 0.0;
651  double GBu_ = 0.0;
652  double BGu_ = 0.0;
653  for (int dy = 0; dy < D1D; ++dy)
654  {
655  const double bx = B(qy,dy);
656  const double gx = G(qy,dy);
657  BBu_ += bx * Bu[dz][dy][qx];
658  GBu_ += gx * Bu[dz][dy][qx];
659  BGu_ += bx * Gu[dz][dy][qx];
660  }
661  BBu[dz][qy][qx] = BBu_;
662  GBu[dz][qy][qx] = GBu_;
663  BGu[dz][qy][qx] = BGu_;
664  }
665  }
666  }
667  MFEM_SYNC_THREAD;
668  double (*GBBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm0;
669  double (*BGBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm1;
670  double (*BBGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm2;
671  MFEM_FOREACH_THREAD(qx,x,Q1D)
672  {
673  MFEM_FOREACH_THREAD(qy,y,Q1D)
674  {
675  MFEM_FOREACH_THREAD(qz,z,Q1D)
676  {
677  double GBBu_ = 0.0;
678  double BGBu_ = 0.0;
679  double BBGu_ = 0.0;
680  for (int dz = 0; dz < D1D; ++dz)
681  {
682  const double bx = B(qz,dz);
683  const double gx = G(qz,dz);
684  GBBu_ += gx * BBu[dz][qy][qx];
685  BGBu_ += bx * GBu[dz][qy][qx];
686  BBGu_ += bx * BGu[dz][qy][qx];
687  }
688  GBBu[qz][qy][qx] = GBBu_;
689  BGBu[qz][qy][qx] = BGBu_;
690  BBGu[qz][qy][qx] = BBGu_;
691  }
692  }
693  }
694  MFEM_SYNC_THREAD;
695  double (*DGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm3;
696  MFEM_FOREACH_THREAD(qz,z,Q1D)
697  {
698  MFEM_FOREACH_THREAD(qy,y,Q1D)
699  {
700  MFEM_FOREACH_THREAD(qx,x,Q1D)
701  {
702  const double O1 = op(qx,qy,qz,0,e);
703  const double O2 = op(qx,qy,qz,1,e);
704  const double O3 = op(qx,qy,qz,2,e);
705 
706  const double gradX = BBGu[qz][qy][qx];
707  const double gradY = BGBu[qz][qy][qx];
708  const double gradZ = GBBu[qz][qy][qx];
709 
710  DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ);
711  }
712  }
713  }
714  MFEM_SYNC_THREAD;
715  double (*BDGu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm4;
716  MFEM_FOREACH_THREAD(qx,x,Q1D)
717  {
718  MFEM_FOREACH_THREAD(qy,y,Q1D)
719  {
720  MFEM_FOREACH_THREAD(dz,z,D1D)
721  {
722  double BDGu_ = 0.0;
723  for (int qz = 0; qz < Q1D; ++qz)
724  {
725  const double w = Bt(dz,qz);
726  BDGu_ += w * DGu[qz][qy][qx];
727  }
728  BDGu[dz][qy][qx] = BDGu_;
729  }
730  }
731  }
732  MFEM_SYNC_THREAD;
733  double (*BBDGu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm5;
734  MFEM_FOREACH_THREAD(dz,z,D1D)
735  {
736  MFEM_FOREACH_THREAD(qx,x,Q1D)
737  {
738  MFEM_FOREACH_THREAD(dy,y,D1D)
739  {
740  double BBDGu_ = 0.0;
741  for (int qy = 0; qy < Q1D; ++qy)
742  {
743  const double w = Bt(dy,qy);
744  BBDGu_ += w * BDGu[dz][qy][qx];
745  }
746  BBDGu[dz][dy][qx] = BBDGu_;
747  }
748  }
749  }
750  MFEM_SYNC_THREAD;
751  MFEM_FOREACH_THREAD(dz,z,D1D)
752  {
753  MFEM_FOREACH_THREAD(dy,y,D1D)
754  {
755  MFEM_FOREACH_THREAD(dx,x,D1D)
756  {
757  double BBBDGu = 0.0;
758  for (int qx = 0; qx < Q1D; ++qx)
759  {
760  const double w = Bt(dx,qx);
761  BBBDGu += w * BBDGu[dz][dy][qx];
762  }
763  y(dx,dy,dz,e) += BBBDGu;
764  }
765  }
766  }
767  });
768 }
769 
770 // PA Convection Apply 2D kernel
771 template<int T_D1D = 0, int T_Q1D = 0> static
772 void PAConvectionApplyT2D(const int ne,
773  const Array<double> &b,
774  const Array<double> &g,
775  const Array<double> &bt,
776  const Array<double> &gt,
777  const Vector &op_,
778  const Vector &x_,
779  Vector &y_,
780  const int d1d = 0,
781  const int q1d = 0)
782 {
783  const int NE = ne;
784  const int D1D = T_D1D ? T_D1D : d1d;
785  const int Q1D = T_Q1D ? T_Q1D : q1d;
786  MFEM_VERIFY(D1D <= MAX_D1D, "");
787  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
788  auto B = Reshape(b.Read(), Q1D, D1D);
789  auto Bt = Reshape(bt.Read(), D1D, Q1D);
790  auto Gt = Reshape(gt.Read(), D1D, Q1D);
791  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
792  auto x = Reshape(x_.Read(), D1D, D1D, NE);
793  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
794  MFEM_FORALL(e, NE,
795  {
796  const int D1D = T_D1D ? T_D1D : d1d;
797  const int Q1D = T_Q1D ? T_Q1D : q1d;
798  // the following variables are evaluated at compile time
799  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
800  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
801 
802  double u[max_D1D][max_D1D];
803  for (int dy = 0; dy < D1D; ++dy)
804  {
805  for (int dx = 0; dx < D1D; ++dx)
806  {
807  u[dy][dx] = x(dx,dy,e);
808  }
809  }
810  double Bu[max_D1D][max_Q1D];
811  for (int dy = 0; dy < D1D; ++dy)
812  {
813  for (int qx = 0; qx < Q1D; ++qx)
814  {
815  Bu[dy][qx] = 0.0;
816  for (int dx = 0; dx < D1D; ++dx)
817  {
818  const double bx = B(qx,dx);
819  const double x = u[dy][dx];
820  Bu[dy][qx] += bx * x;
821  }
822  }
823  }
824  double BBu[max_Q1D][max_Q1D];
825  for (int qx = 0; qx < Q1D; ++qx)
826  {
827  for (int qy = 0; qy < Q1D; ++qy)
828  {
829  BBu[qy][qx] = 0.0;
830  for (int dy = 0; dy < D1D; ++dy)
831  {
832  const double bx = B(qy,dy);
833  BBu[qy][qx] += bx * Bu[dy][qx];
834  }
835  }
836  }
837  // Calculate Dxy, xDy in plane
838  double DBu[max_Q1D][max_Q1D][2];
839  for (int qy = 0; qy < Q1D; ++qy)
840  {
841  for (int qx = 0; qx < Q1D; ++qx)
842  {
843  const double O1 = op(qx,qy,0,e);
844  const double O2 = op(qx,qy,1,e);
845 
846  const double X = BBu[qy][qx];
847 
848  DBu[qy][qx][0] = O1 * X;
849  DBu[qy][qx][1] = O2 * X;
850  }
851  }
852  double GDBu[max_D1D][max_Q1D][2];
853  for (int qx = 0; qx < Q1D; ++qx)
854  {
855  for (int dy = 0; dy < D1D; ++dy)
856  {
857  GDBu[dy][qx][0] = 0.0;
858  GDBu[dy][qx][1] = 0.0;
859  for (int qy = 0; qy < Q1D; ++qy)
860  {
861  const double by = Bt(dy,qy);
862  const double gy = Gt(dy,qy);
863  GDBu[dy][qx][0] += by * DBu[qy][qx][0];
864  GDBu[dy][qx][1] += gy * DBu[qy][qx][1];
865  }
866  }
867  }
868  for (int dx = 0; dx < D1D; ++dx)
869  {
870  for (int dy = 0; dy < D1D; ++dy)
871  {
872  double res = 0.0;
873  for (int qx = 0; qx < Q1D; ++qx)
874  {
875  const double bx = Bt(dx,qx);
876  const double gx = Gt(dx,qx);
877  res += gx * GDBu[dy][qx][0] + bx * GDBu[dy][qx][1];
878  }
879  y(dx,dy,e) += res;
880  }
881  }
882  });
883 }
884 
885 // Optimized PA Convection Apply 2D kernel
886 template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
887 void SmemPAConvectionApplyT2D(const int ne,
888  const Array<double> &b,
889  const Array<double> &g,
890  const Array<double> &bt,
891  const Array<double> &gt,
892  const Vector &op_,
893  const Vector &x_,
894  Vector &y_,
895  const int d1d = 0,
896  const int q1d = 0)
897 {
898  const int NE = ne;
899  const int D1D = T_D1D ? T_D1D : d1d;
900  const int Q1D = T_Q1D ? T_Q1D : q1d;
901  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
902  MFEM_VERIFY(D1D <= MAX_D1D, "");
903  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
904  auto B = Reshape(b.Read(), Q1D, D1D);
905  auto Bt = Reshape(bt.Read(), D1D, Q1D);
906  auto Gt = Reshape(gt.Read(), D1D, Q1D);
907  auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
908  auto x = Reshape(x_.Read(), D1D, D1D, NE);
909  auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
910  MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
911  {
912  const int tidz = MFEM_THREAD_ID(z);
913  const int D1D = T_D1D ? T_D1D : d1d;
914  const int Q1D = T_Q1D ? T_Q1D : q1d;
915  // the following variables are evaluated at compile time
916  constexpr int NBZ = T_NBZ ? T_NBZ : 1;
917  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
918  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
919  MFEM_SHARED double u[NBZ][max_D1D][max_D1D];
920  MFEM_FOREACH_THREAD(dy,y,D1D)
921  {
922  MFEM_FOREACH_THREAD(dx,x,D1D)
923  {
924  // e is really equal to e+tidz
925  u[tidz][dy][dx] = x(dx,dy,e);
926  }
927  }
928  MFEM_SYNC_THREAD;
929  MFEM_SHARED double Bu[NBZ][max_D1D][max_Q1D];
930  MFEM_FOREACH_THREAD(dy,y,D1D)
931  {
932  MFEM_FOREACH_THREAD(qx,x,Q1D)
933  {
934  Bu[tidz][dy][qx] = 0.0;
935  for (int dx = 0; dx < D1D; ++dx)
936  {
937  const double bx = B(qx,dx);
938  const double x = u[tidz][dy][dx];
939  Bu[tidz][dy][qx] += bx * x;
940  }
941  }
942  }
943  MFEM_SYNC_THREAD;
944  MFEM_SHARED double BBu[NBZ][max_Q1D][max_Q1D];
945  MFEM_FOREACH_THREAD(qx,x,Q1D)
946  {
947  MFEM_FOREACH_THREAD(qy,y,Q1D)
948  {
949  BBu[tidz][qy][qx] = 0.0;
950  for (int dy = 0; dy < D1D; ++dy)
951  {
952  const double bx = B(qy,dy);
953  BBu[tidz][qy][qx] += bx * Bu[tidz][dy][qx];
954  }
955  }
956  }
957  MFEM_SYNC_THREAD;
958  // Calculate Dxy, xDy in plane
959  MFEM_SHARED double DBu[NBZ][max_Q1D][max_Q1D][2];
960  MFEM_FOREACH_THREAD(qy,y,Q1D)
961  {
962  MFEM_FOREACH_THREAD(qx,x,Q1D)
963  {
964  const double O1 = op(qx,qy,0,e);
965  const double O2 = op(qx,qy,1,e);
966 
967  const double X = BBu[tidz][qy][qx];
968 
969  DBu[tidz][qy][qx][0] = O1 * X;
970  DBu[tidz][qy][qx][1] = O2 * X;
971  }
972  }
973  MFEM_SYNC_THREAD;
974  MFEM_SHARED double GDBu[NBZ][max_D1D][max_Q1D][2];
975  MFEM_FOREACH_THREAD(qx,x,Q1D)
976  {
977  MFEM_FOREACH_THREAD(dy,y,D1D)
978  {
979  GDBu[tidz][dy][qx][0] = 0.0;
980  GDBu[tidz][dy][qx][1] = 0.0;
981  for (int qy = 0; qy < Q1D; ++qy)
982  {
983  const double by = Bt(dy,qy);
984  const double gy = Gt(dy,qy);
985  GDBu[tidz][dy][qx][0] += by * DBu[tidz][qy][qx][0];
986  GDBu[tidz][dy][qx][1] += gy * DBu[tidz][qy][qx][1];
987  }
988  }
989  }
990  MFEM_SYNC_THREAD;
991  MFEM_FOREACH_THREAD(dx,x,D1D)
992  {
993  MFEM_FOREACH_THREAD(dy,y,D1D)
994  {
995  double res = 0.0;
996  for (int qx = 0; qx < Q1D; ++qx)
997  {
998  const double bx = Bt(dx,qx);
999  const double gx = Gt(dx,qx);
1000  res += gx * GDBu[tidz][dy][qx][0] + bx * GDBu[tidz][dy][qx][1];
1001  }
1002  y(dx,dy,e) += res;
1003  }
1004  }
1005  });
1006 }
1007 
1008 // PA Convection Apply 3D kernel
1009 template<int T_D1D = 0, int T_Q1D = 0> static
1010 void PAConvectionApplyT3D(const int ne,
1011  const Array<double> &b,
1012  const Array<double> &g,
1013  const Array<double> &bt,
1014  const Array<double> &gt,
1015  const Vector &op_,
1016  const Vector &x_,
1017  Vector &y_,
1018  const int d1d = 0,
1019  const int q1d = 0)
1020 {
1021  const int NE = ne;
1022  const int D1D = T_D1D ? T_D1D : d1d;
1023  const int Q1D = T_Q1D ? T_Q1D : q1d;
1024  MFEM_VERIFY(D1D <= MAX_D1D, "");
1025  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
1026  auto B = Reshape(b.Read(), Q1D, D1D);
1027  auto Bt = Reshape(bt.Read(), D1D, Q1D);
1028  auto Gt = Reshape(gt.Read(), D1D, Q1D);
1029  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1030  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1031  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1032  MFEM_FORALL(e, NE,
1033  {
1034  const int D1D = T_D1D ? T_D1D : d1d;
1035  const int Q1D = T_Q1D ? T_Q1D : q1d;
1036  // the following variables are evaluated at compile time
1037  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
1038  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
1039 
1040  double u[max_D1D][max_D1D][max_D1D];
1041  for (int dz = 0; dz < D1D; ++dz)
1042  {
1043  for (int dy = 0; dy < D1D; ++dy)
1044  {
1045  for (int dx = 0; dx < D1D; ++dx)
1046  {
1047  u[dz][dy][dx] = x(dx,dy,dz,e);
1048  }
1049  }
1050  }
1051  double Bu[max_D1D][max_D1D][max_Q1D];
1052  for (int dz = 0; dz < D1D; ++dz)
1053  {
1054  for (int dy = 0; dy < D1D; ++dy)
1055  {
1056  for (int qx = 0; qx < Q1D; ++qx)
1057  {
1058  Bu[dz][dy][qx] = 0.0;
1059  for (int dx = 0; dx < D1D; ++dx)
1060  {
1061  const double bx = B(qx,dx);
1062  const double x = u[dz][dy][dx];
1063  Bu[dz][dy][qx] += bx * x;
1064  }
1065  }
1066  }
1067  }
1068  double BBu[max_D1D][max_Q1D][max_Q1D];
1069  for (int dz = 0; dz < D1D; ++dz)
1070  {
1071  for (int qx = 0; qx < Q1D; ++qx)
1072  {
1073  for (int qy = 0; qy < Q1D; ++qy)
1074  {
1075  BBu[dz][qy][qx] = 0.0;
1076  for (int dy = 0; dy < D1D; ++dy)
1077  {
1078  const double bx = B(qy,dy);
1079  BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
1080  }
1081  }
1082  }
1083  }
1084  double BBBu[max_Q1D][max_Q1D][max_Q1D];
1085  for (int qx = 0; qx < Q1D; ++qx)
1086  {
1087  for (int qy = 0; qy < Q1D; ++qy)
1088  {
1089  for (int qz = 0; qz < Q1D; ++qz)
1090  {
1091  BBBu[qz][qy][qx] = 0.0;
1092  for (int dz = 0; dz < D1D; ++dz)
1093  {
1094  const double bx = B(qz,dz);
1095  BBBu[qz][qy][qx] += bx * BBu[dz][qy][qx];
1096  }
1097  }
1098  }
1099  }
1100  // Calculate Dxy, xDy in plane
1101  double DBu[max_Q1D][max_Q1D][max_Q1D][3];
1102  for (int qz = 0; qz < Q1D; ++qz)
1103  {
1104  for (int qy = 0; qy < Q1D; ++qy)
1105  {
1106  for (int qx = 0; qx < Q1D; ++qx)
1107  {
1108  const double O1 = op(qx,qy,qz,0,e);
1109  const double O2 = op(qx,qy,qz,1,e);
1110  const double O3 = op(qx,qy,qz,2,e);
1111 
1112  const double X = BBBu[qz][qy][qx];
1113 
1114  DBu[qz][qy][qx][0] = O1 * X;
1115  DBu[qz][qy][qx][1] = O2 * X;
1116  DBu[qz][qy][qx][2] = O3 * X;
1117  }
1118  }
1119  }
1120  double GDBu[max_D1D][max_Q1D][max_Q1D][3];
1121  for (int qx = 0; qx < Q1D; ++qx)
1122  {
1123  for (int qy = 0; qy < Q1D; ++qy)
1124  {
1125  for (int dz = 0; dz < D1D; ++dz)
1126  {
1127  GDBu[dz][qy][qx][0] = 0.0;
1128  GDBu[dz][qy][qx][1] = 0.0;
1129  GDBu[dz][qy][qx][2] = 0.0;
1130  for (int qz = 0; qz < Q1D; ++qz)
1131  {
1132  const double bz = Bt(dz,qz);
1133  const double gz = Gt(dz,qz);
1134  GDBu[dz][qy][qx][0] += bz * DBu[qz][qy][qx][0];
1135  GDBu[dz][qy][qx][1] += bz * DBu[qz][qy][qx][1];
1136  GDBu[dz][qy][qx][2] += gz * DBu[qz][qy][qx][2];
1137  }
1138  }
1139  }
1140  }
1141  double GGDBu[max_D1D][max_D1D][max_Q1D][3];
1142  for (int dz = 0; dz < D1D; ++dz)
1143  {
1144  for (int qx = 0; qx < Q1D; ++qx)
1145  {
1146  for (int dy = 0; dy < D1D; ++dy)
1147  {
1148  GGDBu[dz][dy][qx][0] = 0.0;
1149  GGDBu[dz][dy][qx][1] = 0.0;
1150  GGDBu[dz][dy][qx][2] = 0.0;
1151  for (int qy = 0; qy < Q1D; ++qy)
1152  {
1153  const double by = Bt(dy,qy);
1154  const double gy = Gt(dy,qy);
1155  GGDBu[dz][dy][qx][0] += by * GDBu[dz][qy][qx][0];
1156  GGDBu[dz][dy][qx][1] += gy * GDBu[dz][qy][qx][1];
1157  GGDBu[dz][dy][qx][2] += by * GDBu[dz][qy][qx][2];
1158  }
1159  }
1160  }
1161  }
1162  for (int dz = 0; dz < D1D; ++dz)
1163  {
1164  for (int dy = 0; dy < D1D; ++dy)
1165  {
1166  for (int dx = 0; dx < D1D; ++dx)
1167  {
1168  double res = 0.0;
1169  for (int qx = 0; qx < Q1D; ++qx)
1170  {
1171  const double bx = Bt(dx,qx);
1172  const double gx = Gt(dx,qx);
1173  res += gx * GGDBu[dz][dy][qx][0];
1174  res += bx * GGDBu[dz][dy][qx][1];
1175  res += bx * GGDBu[dz][dy][qx][2];
1176  }
1177  y(dx,dy,dz,e) += res;
1178  }
1179  }
1180  }
1181  });
1182 }
1183 
1184 // Optimized PA Convection Apply 3D kernel
1185 template<int T_D1D = 0, int T_Q1D = 0> static
1186 void SmemPAConvectionApplyT3D(const int ne,
1187  const Array<double> &b,
1188  const Array<double> &g,
1189  const Array<double> &bt,
1190  const Array<double> &gt,
1191  const Vector &op_,
1192  const Vector &x_,
1193  Vector &y_,
1194  const int d1d = 0,
1195  const int q1d = 0)
1196 {
1197  const int NE = ne;
1198  const int D1D = T_D1D ? T_D1D : d1d;
1199  const int Q1D = T_Q1D ? T_Q1D : q1d;
1200  MFEM_VERIFY(D1D <= MAX_D1D, "");
1201  MFEM_VERIFY(Q1D <= MAX_Q1D, "");
1202  auto B = Reshape(b.Read(), Q1D, D1D);
1203  auto Bt = Reshape(bt.Read(), D1D, Q1D);
1204  auto Gt = Reshape(gt.Read(), D1D, Q1D);
1205  auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1206  auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1207  auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1208  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1209  {
1210  const int D1D = T_D1D ? T_D1D : d1d;
1211  const int Q1D = T_Q1D ? T_Q1D : q1d;
1212  // the following variables are evaluated at compile time
1213  constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D;
1214  constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D;
1215  constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
1216  MFEM_SHARED double sm0[3*max_DQ*max_DQ*max_DQ];
1217  MFEM_SHARED double sm1[3*max_DQ*max_DQ*max_DQ];
1218 
1219  double (*u)[max_D1D][max_D1D] = (double (*)[max_D1D][max_D1D]) sm0;
1220  MFEM_FOREACH_THREAD(dz,z,D1D)
1221  {
1222  MFEM_FOREACH_THREAD(dy,y,D1D)
1223  {
1224  MFEM_FOREACH_THREAD(dx,x,D1D)
1225  {
1226  u[dz][dy][dx] = x(dx,dy,dz,e);
1227  }
1228  }
1229  }
1230  MFEM_SYNC_THREAD;
1231  double (*Bu)[max_D1D][max_Q1D] = (double (*)[max_D1D][max_Q1D])sm1;
1232  MFEM_FOREACH_THREAD(dz,z,D1D)
1233  {
1234  MFEM_FOREACH_THREAD(dy,y,D1D)
1235  {
1236  MFEM_FOREACH_THREAD(qx,x,Q1D)
1237  {
1238  double Bu_ = 0.0;
1239  for (int dx = 0; dx < D1D; ++dx)
1240  {
1241  const double bx = B(qx,dx);
1242  const double x = u[dz][dy][dx];
1243  Bu_ += bx * x;
1244  }
1245  Bu[dz][dy][qx] = Bu_;
1246  }
1247  }
1248  }
1249  MFEM_SYNC_THREAD;
1250  double (*BBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm0;
1251  MFEM_FOREACH_THREAD(dz,z,D1D)
1252  {
1253  MFEM_FOREACH_THREAD(qx,x,Q1D)
1254  {
1255  MFEM_FOREACH_THREAD(qy,y,Q1D)
1256  {
1257  double BBu_ = 0.0;
1258  for (int dy = 0; dy < D1D; ++dy)
1259  {
1260  const double bx = B(qy,dy);
1261  BBu_ += bx * Bu[dz][dy][qx];
1262  }
1263  BBu[dz][qy][qx] = BBu_;
1264  }
1265  }
1266  }
1267  MFEM_SYNC_THREAD;
1268  double (*BBBu)[max_Q1D][max_Q1D] = (double (*)[max_Q1D][max_Q1D])sm1;
1269  MFEM_FOREACH_THREAD(qx,x,Q1D)
1270  {
1271  MFEM_FOREACH_THREAD(qy,y,Q1D)
1272  {
1273  MFEM_FOREACH_THREAD(qz,z,Q1D)
1274  {
1275  double BBBu_ = 0.0;
1276  for (int dz = 0; dz < D1D; ++dz)
1277  {
1278  const double bx = B(qz,dz);
1279  BBBu_ += bx * BBu[dz][qy][qx];
1280  }
1281  BBBu[qz][qy][qx] = BBBu_;
1282  }
1283  }
1284  }
1285  MFEM_SYNC_THREAD;
1286  double (*DBu)[max_Q1D][max_Q1D][3] = (double (*)[max_Q1D][max_Q1D][3])sm0;
1287  MFEM_FOREACH_THREAD(qz,z,Q1D)
1288  {
1289  MFEM_FOREACH_THREAD(qy,y,Q1D)
1290  {
1291  MFEM_FOREACH_THREAD(qx,x,Q1D)
1292  {
1293  const double O1 = op(qx,qy,qz,0,e);
1294  const double O2 = op(qx,qy,qz,1,e);
1295  const double O3 = op(qx,qy,qz,2,e);
1296 
1297  const double X = BBBu[qz][qy][qx];
1298 
1299  DBu[qz][qy][qx][0] = O1 * X;
1300  DBu[qz][qy][qx][1] = O2 * X;
1301  DBu[qz][qy][qx][2] = O3 * X;
1302  }
1303  }
1304  }
1305  MFEM_SYNC_THREAD;
1306  double (*GDBu)[max_Q1D][max_Q1D][3] = (double (*)[max_Q1D][max_Q1D][3])sm1;
1307  MFEM_FOREACH_THREAD(qx,x,Q1D)
1308  {
1309  MFEM_FOREACH_THREAD(qy,y,Q1D)
1310  {
1311  MFEM_FOREACH_THREAD(dz,z,D1D)
1312  {
1313  double GDBu0 = 0.0;
1314  double GDBu1 = 0.0;
1315  double GDBu2 = 0.0;
1316  for (int qz = 0; qz < Q1D; ++qz)
1317  {
1318  const double bz = Bt(dz,qz);
1319  const double gz = Gt(dz,qz);
1320  GDBu0 += bz * DBu[qz][qy][qx][0];
1321  GDBu1 += bz * DBu[qz][qy][qx][1];
1322  GDBu2 += gz * DBu[qz][qy][qx][2];
1323  }
1324  GDBu[dz][qy][qx][0] = GDBu0;
1325  GDBu[dz][qy][qx][1] = GDBu1;
1326  GDBu[dz][qy][qx][2] = GDBu2;
1327  }
1328  }
1329  }
1330  MFEM_SYNC_THREAD;
1331  double (*GGDBu)[max_D1D][max_Q1D][3] = (double (*)[max_D1D][max_Q1D][3])sm0;
1332  MFEM_FOREACH_THREAD(dz,z,D1D)
1333  {
1334  MFEM_FOREACH_THREAD(qx,x,Q1D)
1335  {
1336  MFEM_FOREACH_THREAD(dy,y,D1D)
1337  {
1338  double GGDBu0 = 0.0;
1339  double GGDBu1 = 0.0;
1340  double GGDBu2 = 0.0;
1341  for (int qy = 0; qy < Q1D; ++qy)
1342  {
1343  const double by = Bt(dy,qy);
1344  const double gy = Gt(dy,qy);
1345  GGDBu0 += by * GDBu[dz][qy][qx][0];
1346  GGDBu1 += gy * GDBu[dz][qy][qx][1];
1347  GGDBu2 += by * GDBu[dz][qy][qx][2];
1348  }
1349  GGDBu[dz][dy][qx][0] = GGDBu0;
1350  GGDBu[dz][dy][qx][1] = GGDBu1;
1351  GGDBu[dz][dy][qx][2] = GGDBu2;
1352  }
1353  }
1354  }
1355  MFEM_SYNC_THREAD;
1356  MFEM_FOREACH_THREAD(dz,z,D1D)
1357  {
1358  MFEM_FOREACH_THREAD(dy,y,D1D)
1359  {
1360  MFEM_FOREACH_THREAD(dx,x,D1D)
1361  {
1362  double res = 0.0;
1363  for (int qx = 0; qx < Q1D; ++qx)
1364  {
1365  const double bx = Bt(dx,qx);
1366  const double gx = Gt(dx,qx);
1367  res += gx * GGDBu[dz][dy][qx][0];
1368  res += bx * GGDBu[dz][dy][qx][1];
1369  res += bx * GGDBu[dz][dy][qx][2];
1370  }
1371  y(dx,dy,dz,e) += res;
1372  }
1373  }
1374  }
1375  });
1376 }
1377 
1379 {
1380  const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
1382  // Assumes tensor-product elements
1383  Mesh *mesh = fes.GetMesh();
1384  const FiniteElement &el = *fes.GetFE(0);
1386  const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans);
1387  if (DeviceCanUseCeed())
1388  {
1389  delete ceedOp;
1390  const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 ||
1391  fes.IsVariableOrder();
1392  if (mixed)
1393  {
1394  ceedOp = new ceed::MixedPAConvectionIntegrator(*this, fes, Q, alpha);
1395  }
1396  else
1397  {
1398  ceedOp = new ceed::PAConvectionIntegrator(fes, *ir, Q, alpha);
1399  }
1400  return;
1401  }
1402  const int dims = el.GetDim();
1403  const int symmDims = dims;
1404  nq = ir->GetNPoints();
1405  dim = mesh->Dimension();
1406  ne = fes.GetNE();
1408  maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
1409  dofs1D = maps->ndof;
1410  quad1D = maps->nqpt;
1411  pa_data.SetSize(symmDims * nq * ne, mt);
1412 
1413  QuadratureSpace qs(*mesh, *ir);
1415 
1416  PAConvectionSetup(dim, nq, ne, ir->GetWeights(), geom->J,
1417  vel, alpha, pa_data);
1418 }
1419 
1420 static void PAConvectionApply(const int dim,
1421  const int D1D,
1422  const int Q1D,
1423  const int NE,
1424  const Array<double> &B,
1425  const Array<double> &G,
1426  const Array<double> &Bt,
1427  const Array<double> &Gt,
1428  const Vector &op,
1429  const Vector &x,
1430  Vector &y)
1431 {
1432  if (dim == 2)
1433  {
1434  switch ((D1D << 4 ) | Q1D)
1435  {
1436  case 0x22: return SmemPAConvectionApply2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1437  case 0x33: return SmemPAConvectionApply2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1438  case 0x34: return SmemPAConvectionApply2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1439  case 0x44: return SmemPAConvectionApply2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1440  case 0x46: return SmemPAConvectionApply2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1441  case 0x55: return SmemPAConvectionApply2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1442  case 0x58: return SmemPAConvectionApply2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1443  case 0x66: return SmemPAConvectionApply2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1444  case 0x77: return SmemPAConvectionApply2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1445  case 0x88: return SmemPAConvectionApply2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1446  case 0x99: return SmemPAConvectionApply2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1447  default: return PAConvectionApply2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1448  }
1449  }
1450  else if (dim == 3)
1451  {
1452  switch ((D1D << 4 ) | Q1D)
1453  {
1454  case 0x22: return SmemPAConvectionApply3D<2,2>(NE,B,G,Bt,Gt,op,x,y);
1455  case 0x23: return SmemPAConvectionApply3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1456  case 0x24: return SmemPAConvectionApply3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1457  case 0x26: return SmemPAConvectionApply3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1458  case 0x34: return SmemPAConvectionApply3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1459  case 0x35: return SmemPAConvectionApply3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1460  case 0x45: return SmemPAConvectionApply3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1461  case 0x48: return SmemPAConvectionApply3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1462  case 0x56: return SmemPAConvectionApply3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1463  case 0x67: return SmemPAConvectionApply3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1464  case 0x78: return SmemPAConvectionApply3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1465  case 0x89: return SmemPAConvectionApply3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1466  default: return PAConvectionApply3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1467  }
1468  }
1469  MFEM_ABORT("Unknown kernel.");
1470 }
1471 
1472 static void PAConvectionApplyT(const int dim,
1473  const int D1D,
1474  const int Q1D,
1475  const int NE,
1476  const Array<double> &B,
1477  const Array<double> &G,
1478  const Array<double> &Bt,
1479  const Array<double> &Gt,
1480  const Vector &op,
1481  const Vector &x,
1482  Vector &y)
1483 {
1484  if (dim == 2)
1485  {
1486  switch ((D1D << 4 ) | Q1D)
1487  {
1488  case 0x22: return SmemPAConvectionApplyT2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1489  case 0x33: return SmemPAConvectionApplyT2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1490  case 0x34: return SmemPAConvectionApplyT2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1491  case 0x44: return SmemPAConvectionApplyT2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1492  case 0x46: return SmemPAConvectionApplyT2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1493  case 0x55: return SmemPAConvectionApplyT2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1494  case 0x58: return SmemPAConvectionApplyT2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1495  case 0x66: return SmemPAConvectionApplyT2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1496  case 0x77: return SmemPAConvectionApplyT2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1497  case 0x88: return SmemPAConvectionApplyT2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1498  case 0x99: return SmemPAConvectionApplyT2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1499  default: return PAConvectionApplyT2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1500  }
1501  }
1502  else if (dim == 3)
1503  {
1504  switch ((D1D << 4 ) | Q1D)
1505  {
1506  case 0x22: return SmemPAConvectionApplyT3D<2,2>(NE,B,G,Bt,Gt,op,x,y);
1507  case 0x23: return SmemPAConvectionApplyT3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1508  case 0x24: return SmemPAConvectionApplyT3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1509  case 0x26: return SmemPAConvectionApplyT3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1510  case 0x34: return SmemPAConvectionApplyT3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1511  case 0x35: return SmemPAConvectionApplyT3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1512  case 0x45: return SmemPAConvectionApplyT3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1513  case 0x48: return SmemPAConvectionApplyT3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1514  case 0x56: return SmemPAConvectionApplyT3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1515  case 0x67: return SmemPAConvectionApplyT3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1516  case 0x78: return SmemPAConvectionApplyT3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1517  case 0x89: return SmemPAConvectionApplyT3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1518  default: return PAConvectionApplyT3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1519  }
1520  }
1521  MFEM_ABORT("Unknown kernel.");
1522 }
1523 
1524 // PA Convection Apply kernel
1526 {
1527  if (DeviceCanUseCeed())
1528  {
1529  ceedOp->AddMult(x, y);
1530  }
1531  else
1532  {
1533  PAConvectionApply(dim, dofs1D, quad1D, ne,
1534  maps->B, maps->G, maps->Bt, maps->Gt,
1535  pa_data, x, y);
1536  }
1537 }
1538 
1539 // PA Convection Apply transpose kernel
1541 {
1542  if (DeviceCanUseCeed())
1543  {
1544  MFEM_ABORT("AddMultPA not yet implemented with libCEED for"
1545  " ConvectionIntegrator.");
1546  }
1547  else
1548  {
1549  PAConvectionApplyT(dim, dofs1D, quad1D, ne,
1550  maps->B, maps->G, maps->Bt, maps->Gt,
1551  pa_data, x, y);
1552  }
1553 }
1554 
1556 {
1557  if (DeviceCanUseCeed())
1558  {
1559  ceedOp->GetDiagonal(diag);
1560  }
1561  else
1562  {
1563  MFEM_ABORT("AssembleDiagonalPA not yet implemented for"
1564  " ConvectionIntegrator.");
1565  }
1566 }
1567 
1568 } // namespace mfem
Abstract class for all finite elements.
Definition: fe_base.hpp:232
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition: intrules.hpp:247
Class for an integration rule - an Array of IntegrationPoint.
Definition: intrules.hpp:90
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition: fespace.hpp:463
Represent a ConvectionIntegrator with AssemblyLevel::Partial using libCEED.
Definition: convection.hpp:26
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition: fe_base.hpp:160
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition: mesh.cpp:840
virtual void AddMultPA(const Vector &, Vector &) const
Method for partially assembled action.
int Dimension() const
Definition: mesh.hpp:1047
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:512
const GeometricFactors * geom
Not owned.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition: fe_base.hpp:172
Array< double > Gt
Transpose of G.
Definition: fe_base.hpp:211
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
Definition: fe_base.hpp:168
void AddMult(const mfem::Vector &x, mfem::Vector &y, const double a=1.0) const override
Operator application: y+=A(x) (default) or y+=a*A(x).
Definition: operator.cpp:72
constexpr int DIM
Class to represent a coefficient evaluated at quadrature points.
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i&#39;th element in t...
Definition: fespace.cpp:2783
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Definition: mesh.cpp:5921
virtual void AssembleDiagonalPA(Vector &diag)
Assemble diagonal and add it to Vector diag.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition: fe_base.cpp:364
Vector J
Jacobians of the element transformations at all quadrature points.
Definition: mesh.hpp:1956
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition: intrules.cpp:83
const int MAX_Q1D
Definition: forall.hpp:29
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:273
Enable all above compressions.
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:614
Array< double > Bt
Transpose of B.
Definition: fe_base.hpp:189
virtual void AssemblePA(const FiniteElementSpace &)
Method defining partial assembly.
Mesh * GetMesh() const
Returns the mesh.
Definition: fespace.hpp:441
int GetDim() const
Returns the reference space dimension for the finite element.
Definition: fe_base.hpp:310
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:96
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
static const IntegrationRule & GetRule(const FiniteElement &el, ElementTransformation &Trans)
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
Definition: util.cpp:33
virtual void AddMultTransposePA(const Vector &x, Vector &y) const
Method for partially assembled transposed action.
const IntegrationRule * IntRule
Definition: nonlininteg.hpp:30
Array< double > B
Basis functions evaluated at quadrature points.
Definition: fe_base.hpp:183
void GetDiagonal(mfem::Vector &diag) const
Definition: operator.cpp:104
int dim
Definition: ex24.cpp:53
const int MAX_D1D
Definition: forall.hpp:28
Array< double > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition: fe_base.hpp:204
constexpr int SDIM
const double alpha
Definition: ex15.cpp:369
Vector data type.
Definition: vector.hpp:60
Class representing the storage layout of a QuadratureFunction.
Definition: qspace.hpp:92
double u(const Vector &xvec)
Definition: lor_mms.hpp:24
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
const DofToQuad * maps
Not owned.
VectorCoefficient * Q
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Definition: fespace.hpp:647