MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_convection_pa.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../bilininteg.hpp"
14#include "../gridfunc.hpp"
15#include "../qfunction.hpp"
17
18namespace mfem
19{
20
21// PA Convection Assemble 2D kernel
22static void PAConvectionSetup2D(const int NQ,
23 const int NE,
24 const Array<real_t> &w,
25 const Vector &j,
26 const Vector &vel,
27 const real_t alpha,
28 Vector &op)
29{
30 constexpr int DIM = 2;
31
32 const bool const_v = vel.Size() == DIM;
33
34 const auto W = w.Read();
35 const auto J = Reshape(j.Read(), NQ,DIM,DIM,NE);
36 const auto V = const_v ?
37 Reshape(vel.Read(), DIM,1,1) :
38 Reshape(vel.Read(), DIM,NQ,NE);
39 auto y = Reshape(op.Write(), NQ,DIM,NE);
40
41 mfem::forall(NE*NQ, [=] MFEM_HOST_DEVICE (int q_global)
42 {
43 const int e = q_global / NQ;
44 const int q = q_global % NQ;
45 const real_t J11 = J(q,0,0,e);
46 const real_t J21 = J(q,1,0,e);
47 const real_t J12 = J(q,0,1,e);
48 const real_t J22 = J(q,1,1,e);
49 const real_t w = alpha * W[q];
50 const real_t v0 = const_v ? V(0,0,0) : V(0,q,e);
51 const real_t v1 = const_v ? V(1,0,0) : V(1,q,e);
52 const real_t wx = w * v0;
53 const real_t wy = w * v1;
54 // y = alpha * W * det(J) * J^{-1} . v = adj(J) . { wx, wy }
55 y(q,0,e) = wx * J22 - wy * J12; // 1
56 y(q,1,e) = -wx * J21 + wy * J11; // 2
57 });
58}
59
60// PA Convection Assemble 3D kernel
61static void PAConvectionSetup3D(const int NQ,
62 const int NE,
63 const Array<real_t> &w,
64 const Vector &j,
65 const Vector &vel,
66 const real_t alpha,
67 Vector &op)
68{
69 constexpr int DIM = 3;
70 constexpr int SDIM = DIM;
71 const auto W = Reshape(w.Read(), NQ);
72 const auto J = Reshape(j.Read(), NQ,SDIM,DIM,NE);
73 const bool const_v = vel.Size() == DIM;
74 const auto V = const_v ?
75 Reshape(vel.Read(), 3,1,1) :
76 Reshape(vel.Read(), 3,NQ,NE);
77 auto y = Reshape(op.Write(), NQ,3,NE);
78 mfem::forall(NE*NQ, [=] MFEM_HOST_DEVICE (int q_global)
79 {
80 const int e = q_global / NQ;
81 const int q = q_global % NQ;
82 const real_t J11 = J(q,0,0,e);
83 const real_t J12 = J(q,0,1,e);
84 const real_t J13 = J(q,0,2,e);
85 const real_t J21 = J(q,1,0,e);
86 const real_t J22 = J(q,1,1,e);
87 const real_t J23 = J(q,1,2,e);
88 const real_t J31 = J(q,2,0,e);
89 const real_t J32 = J(q,2,1,e);
90 const real_t J33 = J(q,2,2,e);
91 const real_t w = alpha * W(q);
92 const real_t v0 = const_v ? V(0,0,0) : V(0,q,e);
93 const real_t v1 = const_v ? V(1,0,0) : V(1,q,e);
94 const real_t v2 = const_v ? V(2,0,0) : V(2,q,e);
95 const real_t wx = w * v0;
96 const real_t wy = w * v1;
97 const real_t wz = w * v2;
98 // A = adj(J)
99 const real_t A11 = (J22 * J33) - (J23 * J32);
100 const real_t A12 = (J32 * J13) - (J12 * J33);
101 const real_t A13 = (J12 * J23) - (J22 * J13);
102 const real_t A21 = (J31 * J23) - (J21 * J33);
103 const real_t A22 = (J11 * J33) - (J13 * J31);
104 const real_t A23 = (J21 * J13) - (J11 * J23);
105 const real_t A31 = (J21 * J32) - (J31 * J22);
106 const real_t A32 = (J31 * J12) - (J11 * J32);
107 const real_t A33 = (J11 * J22) - (J12 * J21);
108 // y = alpha * W * det(J) * J^{-1} . v = adj(J) . { wx, wy, wz }
109 y(q,0,e) = wx * A11 + wy * A12 + wz * A13;
110 y(q,1,e) = wx * A21 + wy * A22 + wz * A23;
111 y(q,2,e) = wx * A31 + wy * A32 + wz * A33;
112 });
113}
114
115static void PAConvectionSetup(const int dim,
116 const int NQ,
117 const int NE,
118 const Array<real_t> &W,
119 const Vector &J,
120 const Vector &coeff,
121 const real_t alpha,
122 Vector &op)
123{
124 if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAConvectionSetup"); }
125 if (dim == 2)
126 {
127 PAConvectionSetup2D(NQ, NE, W, J, coeff, alpha, op);
128 }
129 if (dim == 3)
130 {
131 PAConvectionSetup3D(NQ, NE, W, J, coeff, alpha, op);
132 }
133}
134
136{
137 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
139 // Assumes tensor-product elements
140 Mesh *mesh = fes.GetMesh();
141 const FiniteElement &el = *fes.GetTypicalFE();
143 const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans);
144 if (DeviceCanUseCeed())
145 {
146 delete ceedOp;
147 const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 ||
148 fes.IsVariableOrder();
149 if (mixed)
150 {
152 }
153 else
154 {
155 ceedOp = new ceed::PAConvectionIntegrator(fes, *ir, Q, alpha);
156 }
157 return;
158 }
159 const int dims = el.GetDim();
160 const int symmDims = dims;
161 nq = ir->GetNPoints();
162 dim = mesh->Dimension();
163 ne = fes.GetNE();
166 dofs1D = maps->ndof;
167 quad1D = maps->nqpt;
168 pa_data.SetSize(symmDims * nq * ne, mt);
169
170 QuadratureSpace qs(*mesh, *ir);
172
173 PAConvectionSetup(dim, nq, ne, ir->GetWeights(), geom->J,
174 vel, alpha, pa_data);
175}
176
178{
179 if (DeviceCanUseCeed())
180 {
181 ceedOp->GetDiagonal(diag);
182 }
183 else
184 {
185 MFEM_ABORT("AssembleDiagonalPA not yet implemented for"
186 " ConvectionIntegrator.");
187 }
188}
189
190// PA Convection Apply 2D kernel
191template<int T_D1D = 0, int T_Q1D = 0> static
192void PAConvectionApply2D(const int ne,
193 const Array<real_t> &b,
194 const Array<real_t> &g,
195 const Array<real_t> &bt,
196 const Array<real_t> &gt,
197 const Vector &op_,
198 const Vector &x_,
199 Vector &y_,
200 const int d1d = 0,
201 const int q1d = 0)
202{
203 const int NE = ne;
204 const int D1D = T_D1D ? T_D1D : d1d;
205 const int Q1D = T_Q1D ? T_Q1D : q1d;
206 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
207 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
208 auto B = Reshape(b.Read(), Q1D, D1D);
209 auto G = Reshape(g.Read(), Q1D, D1D);
210 auto Bt = Reshape(bt.Read(), D1D, Q1D);
211 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
212 auto x = Reshape(x_.Read(), D1D, D1D, NE);
213 auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
214 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
215 {
216 const int D1D = T_D1D ? T_D1D : d1d;
217 const int Q1D = T_Q1D ? T_Q1D : q1d;
218 // the following variables are evaluated at compile time
219 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
220 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
221
222 real_t u[max_D1D][max_D1D];
223 for (int dy = 0; dy < D1D; ++dy)
224 {
225 for (int dx = 0; dx < D1D; ++dx)
226 {
227 u[dy][dx] = x(dx,dy,e);
228 }
229 }
230 real_t Bu[max_D1D][max_Q1D];
231 real_t Gu[max_D1D][max_Q1D];
232 for (int dy = 0; dy < D1D; ++dy)
233 {
234 for (int qx = 0; qx < Q1D; ++qx)
235 {
236 Bu[dy][qx] = 0.0;
237 Gu[dy][qx] = 0.0;
238 for (int dx = 0; dx < D1D; ++dx)
239 {
240 const real_t bx = B(qx,dx);
241 const real_t gx = G(qx,dx);
242 const real_t x = u[dy][dx];
243 Bu[dy][qx] += bx * x;
244 Gu[dy][qx] += gx * x;
245 }
246 }
247 }
248 real_t GBu[max_Q1D][max_Q1D];
249 real_t BGu[max_Q1D][max_Q1D];
250 for (int qx = 0; qx < Q1D; ++qx)
251 {
252 for (int qy = 0; qy < Q1D; ++qy)
253 {
254 GBu[qy][qx] = 0.0;
255 BGu[qy][qx] = 0.0;
256 for (int dy = 0; dy < D1D; ++dy)
257 {
258 const real_t bx = B(qy,dy);
259 const real_t gx = G(qy,dy);
260 GBu[qy][qx] += gx * Bu[dy][qx];
261 BGu[qy][qx] += bx * Gu[dy][qx];
262 }
263 }
264 }
265 // Calculate Dxy, xDy in plane
266 real_t DGu[max_Q1D][max_Q1D];
267 for (int qy = 0; qy < Q1D; ++qy)
268 {
269 for (int qx = 0; qx < Q1D; ++qx)
270 {
271 const real_t O1 = op(qx,qy,0,e);
272 const real_t O2 = op(qx,qy,1,e);
273
274 const real_t gradX = BGu[qy][qx];
275 const real_t gradY = GBu[qy][qx];
276
277 DGu[qy][qx] = (O1 * gradX) + (O2 * gradY);
278 }
279 }
280 real_t BDGu[max_D1D][max_Q1D];
281 for (int qx = 0; qx < Q1D; ++qx)
282 {
283 for (int dy = 0; dy < D1D; ++dy)
284 {
285 BDGu[dy][qx] = 0.0;
286 for (int qy = 0; qy < Q1D; ++qy)
287 {
288 const real_t w = Bt(dy,qy);
289 BDGu[dy][qx] += w * DGu[qy][qx];
290 }
291 }
292 }
293 for (int dx = 0; dx < D1D; ++dx)
294 {
295 for (int dy = 0; dy < D1D; ++dy)
296 {
297 real_t BBDGu = 0.0;
298 for (int qx = 0; qx < Q1D; ++qx)
299 {
300 const real_t w = Bt(dx,qx);
301 BBDGu += w * BDGu[dy][qx];
302 }
303 y(dx,dy,e) += BBDGu;
304 }
305 }
306 });
307}
308
309// Optimized PA Convection Apply 2D kernel
310template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
311void SmemPAConvectionApply2D(const int ne,
312 const Array<real_t> &b,
313 const Array<real_t> &g,
314 const Array<real_t> &bt,
315 const Array<real_t> &gt,
316 const Vector &op_,
317 const Vector &x_,
318 Vector &y_,
319 const int d1d = 0,
320 const int q1d = 0)
321{
322 const int NE = ne;
323 const int D1D = T_D1D ? T_D1D : d1d;
324 const int Q1D = T_Q1D ? T_Q1D : q1d;
325 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
326 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
327 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
328 auto B = Reshape(b.Read(), Q1D, D1D);
329 auto G = Reshape(g.Read(), Q1D, D1D);
330 auto Bt = Reshape(bt.Read(), D1D, Q1D);
331 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
332 auto x = Reshape(x_.Read(), D1D, D1D, NE);
333 auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
334 mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e)
335 {
336 const int tidz = MFEM_THREAD_ID(z);
337 const int D1D = T_D1D ? T_D1D : d1d;
338 const int Q1D = T_Q1D ? T_Q1D : q1d;
339 // the following variables are evaluated at compile time
340 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
341 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
342 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
343 // constexpr int MDQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
344 MFEM_SHARED real_t u[NBZ][max_D1D][max_D1D];
345 MFEM_FOREACH_THREAD(dy,y,D1D)
346 {
347 MFEM_FOREACH_THREAD(dx,x,D1D)
348 {
349 // e is really equal to e+tidz
350 u[tidz][dy][dx] = x(dx,dy,e);
351 }
352 }
353 MFEM_SYNC_THREAD;
354 MFEM_SHARED real_t Bu[NBZ][max_D1D][max_Q1D];
355 MFEM_SHARED real_t Gu[NBZ][max_D1D][max_Q1D];
356 MFEM_FOREACH_THREAD(dy,y,D1D)
357 {
358 MFEM_FOREACH_THREAD(qx,x,Q1D)
359 {
360 Bu[tidz][dy][qx] = 0.0;
361 Gu[tidz][dy][qx] = 0.0;
362 for (int dx = 0; dx < D1D; ++dx)
363 {
364 const real_t bx = B(qx,dx);
365 const real_t gx = G(qx,dx);
366 const real_t x = u[tidz][dy][dx];
367 Bu[tidz][dy][qx] += bx * x;
368 Gu[tidz][dy][qx] += gx * x;
369 }
370 }
371 }
372 MFEM_SYNC_THREAD;
373 MFEM_SHARED real_t GBu[NBZ][max_Q1D][max_Q1D];
374 MFEM_SHARED real_t BGu[NBZ][max_Q1D][max_Q1D];
375 MFEM_FOREACH_THREAD(qx,x,Q1D)
376 {
377 MFEM_FOREACH_THREAD(qy,y,Q1D)
378 {
379 GBu[tidz][qy][qx] = 0.0;
380 BGu[tidz][qy][qx] = 0.0;
381 for (int dy = 0; dy < D1D; ++dy)
382 {
383 const real_t bx = B(qy,dy);
384 const real_t gx = G(qy,dy);
385 GBu[tidz][qy][qx] += gx * Bu[tidz][dy][qx];
386 BGu[tidz][qy][qx] += bx * Gu[tidz][dy][qx];
387 }
388 }
389 }
390 MFEM_SYNC_THREAD;
391 // Calculate Dxy, xDy in plane
392 MFEM_SHARED real_t DGu[NBZ][max_Q1D][max_Q1D];
393 MFEM_FOREACH_THREAD(qy,y,Q1D)
394 {
395 MFEM_FOREACH_THREAD(qx,x,Q1D)
396 {
397 const real_t O1 = op(qx,qy,0,e);
398 const real_t O2 = op(qx,qy,1,e);
399
400 const real_t gradX = BGu[tidz][qy][qx];
401 const real_t gradY = GBu[tidz][qy][qx];
402
403 DGu[tidz][qy][qx] = (O1 * gradX) + (O2 * gradY);
404 }
405 }
406 MFEM_SYNC_THREAD;
407 MFEM_SHARED real_t BDGu[NBZ][max_D1D][max_Q1D];
408 MFEM_FOREACH_THREAD(qx,x,Q1D)
409 {
410 MFEM_FOREACH_THREAD(dy,y,D1D)
411 {
412 BDGu[tidz][dy][qx] = 0.0;
413 for (int qy = 0; qy < Q1D; ++qy)
414 {
415 const real_t w = Bt(dy,qy);
416 BDGu[tidz][dy][qx] += w * DGu[tidz][qy][qx];
417 }
418 }
419 }
420 MFEM_SYNC_THREAD;
421 MFEM_FOREACH_THREAD(dx,x,D1D)
422 {
423 MFEM_FOREACH_THREAD(dy,y,D1D)
424 {
425 real_t BBDGu = 0.0;
426 for (int qx = 0; qx < Q1D; ++qx)
427 {
428 const real_t w = Bt(dx,qx);
429 BBDGu += w * BDGu[tidz][dy][qx];
430 }
431 y(dx,dy,e) += BBDGu;
432 }
433 }
434 });
435}
436
437// PA Convection Apply 3D kernel
438template<int T_D1D = 0, int T_Q1D = 0> static
439void PAConvectionApply3D(const int ne,
440 const Array<real_t> &b,
441 const Array<real_t> &g,
442 const Array<real_t> &bt,
443 const Array<real_t> &gt,
444 const Vector &op_,
445 const Vector &x_,
446 Vector &y_,
447 const int d1d = 0,
448 const int q1d = 0)
449{
450 const int NE = ne;
451 const int D1D = T_D1D ? T_D1D : d1d;
452 const int Q1D = T_Q1D ? T_Q1D : q1d;
453 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
454 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
455 auto B = Reshape(b.Read(), Q1D, D1D);
456 auto G = Reshape(g.Read(), Q1D, D1D);
457 auto Bt = Reshape(bt.Read(), D1D, Q1D);
458 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
459 auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
460 auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
461 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
462 {
463 const int D1D = T_D1D ? T_D1D : d1d;
464 const int Q1D = T_Q1D ? T_Q1D : q1d;
465 // the following variables are evaluated at compile time
466 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
467 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
468
469 real_t u[max_D1D][max_D1D][max_D1D];
470 for (int dz = 0; dz < D1D; ++dz)
471 {
472 for (int dy = 0; dy < D1D; ++dy)
473 {
474 for (int dx = 0; dx < D1D; ++dx)
475 {
476 u[dz][dy][dx] = x(dx,dy,dz,e);
477 }
478 }
479 }
480 real_t Bu[max_D1D][max_D1D][max_Q1D];
481 real_t Gu[max_D1D][max_D1D][max_Q1D];
482 for (int dz = 0; dz < D1D; ++dz)
483 {
484 for (int dy = 0; dy < D1D; ++dy)
485 {
486 for (int qx = 0; qx < Q1D; ++qx)
487 {
488 Bu[dz][dy][qx] = 0.0;
489 Gu[dz][dy][qx] = 0.0;
490 for (int dx = 0; dx < D1D; ++dx)
491 {
492 const real_t bx = B(qx,dx);
493 const real_t gx = G(qx,dx);
494 const real_t x = u[dz][dy][dx];
495 Bu[dz][dy][qx] += bx * x;
496 Gu[dz][dy][qx] += gx * x;
497 }
498 }
499 }
500 }
501 real_t BBu[max_D1D][max_Q1D][max_Q1D];
502 real_t GBu[max_D1D][max_Q1D][max_Q1D];
503 real_t BGu[max_D1D][max_Q1D][max_Q1D];
504 for (int dz = 0; dz < D1D; ++dz)
505 {
506 for (int qx = 0; qx < Q1D; ++qx)
507 {
508 for (int qy = 0; qy < Q1D; ++qy)
509 {
510 BBu[dz][qy][qx] = 0.0;
511 GBu[dz][qy][qx] = 0.0;
512 BGu[dz][qy][qx] = 0.0;
513 for (int dy = 0; dy < D1D; ++dy)
514 {
515 const real_t bx = B(qy,dy);
516 const real_t gx = G(qy,dy);
517 BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
518 GBu[dz][qy][qx] += gx * Bu[dz][dy][qx];
519 BGu[dz][qy][qx] += bx * Gu[dz][dy][qx];
520 }
521 }
522 }
523 }
524 real_t GBBu[max_Q1D][max_Q1D][max_Q1D];
525 real_t BGBu[max_Q1D][max_Q1D][max_Q1D];
526 real_t BBGu[max_Q1D][max_Q1D][max_Q1D];
527 for (int qx = 0; qx < Q1D; ++qx)
528 {
529 for (int qy = 0; qy < Q1D; ++qy)
530 {
531 for (int qz = 0; qz < Q1D; ++qz)
532 {
533 GBBu[qz][qy][qx] = 0.0;
534 BGBu[qz][qy][qx] = 0.0;
535 BBGu[qz][qy][qx] = 0.0;
536 for (int dz = 0; dz < D1D; ++dz)
537 {
538 const real_t bx = B(qz,dz);
539 const real_t gx = G(qz,dz);
540 GBBu[qz][qy][qx] += gx * BBu[dz][qy][qx];
541 BGBu[qz][qy][qx] += bx * GBu[dz][qy][qx];
542 BBGu[qz][qy][qx] += bx * BGu[dz][qy][qx];
543 }
544 }
545 }
546 }
547 // Calculate Dxy, xDy in plane
548 real_t DGu[max_Q1D][max_Q1D][max_Q1D];
549 for (int qz = 0; qz < Q1D; ++qz)
550 {
551 for (int qy = 0; qy < Q1D; ++qy)
552 {
553 for (int qx = 0; qx < Q1D; ++qx)
554 {
555 const real_t O1 = op(qx,qy,qz,0,e);
556 const real_t O2 = op(qx,qy,qz,1,e);
557 const real_t O3 = op(qx,qy,qz,2,e);
558
559 const real_t gradX = BBGu[qz][qy][qx];
560 const real_t gradY = BGBu[qz][qy][qx];
561 const real_t gradZ = GBBu[qz][qy][qx];
562
563 DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ);
564 }
565 }
566 }
567 real_t BDGu[max_D1D][max_Q1D][max_Q1D];
568 for (int qx = 0; qx < Q1D; ++qx)
569 {
570 for (int qy = 0; qy < Q1D; ++qy)
571 {
572 for (int dz = 0; dz < D1D; ++dz)
573 {
574 BDGu[dz][qy][qx] = 0.0;
575 for (int qz = 0; qz < Q1D; ++qz)
576 {
577 const real_t w = Bt(dz,qz);
578 BDGu[dz][qy][qx] += w * DGu[qz][qy][qx];
579 }
580 }
581 }
582 }
583 real_t BBDGu[max_D1D][max_D1D][max_Q1D];
584 for (int dz = 0; dz < D1D; ++dz)
585 {
586 for (int qx = 0; qx < Q1D; ++qx)
587 {
588 for (int dy = 0; dy < D1D; ++dy)
589 {
590 BBDGu[dz][dy][qx] = 0.0;
591 for (int qy = 0; qy < Q1D; ++qy)
592 {
593 const real_t w = Bt(dy,qy);
594 BBDGu[dz][dy][qx] += w * BDGu[dz][qy][qx];
595 }
596 }
597 }
598 }
599 for (int dz = 0; dz < D1D; ++dz)
600 {
601 for (int dy = 0; dy < D1D; ++dy)
602 {
603 for (int dx = 0; dx < D1D; ++dx)
604 {
605 real_t BBBDGu = 0.0;
606 for (int qx = 0; qx < Q1D; ++qx)
607 {
608 const real_t w = Bt(dx,qx);
609 BBBDGu += w * BBDGu[dz][dy][qx];
610 }
611 y(dx,dy,dz,e) += BBBDGu;
612 }
613 }
614 }
615 });
616}
617
618// Optimized PA Convection Apply 3D kernel
619template<int T_D1D = 0, int T_Q1D = 0> static
620void SmemPAConvectionApply3D(const int ne,
621 const Array<real_t> &b,
622 const Array<real_t> &g,
623 const Array<real_t> &bt,
624 const Array<real_t> &gt,
625 const Vector &op_,
626 const Vector &x_,
627 Vector &y_,
628 const int d1d = 0,
629 const int q1d = 0)
630{
631 const int NE = ne;
632 const int D1D = T_D1D ? T_D1D : d1d;
633 const int Q1D = T_Q1D ? T_Q1D : q1d;
634 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
635 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
636 auto B = Reshape(b.Read(), Q1D, D1D);
637 auto G = Reshape(g.Read(), Q1D, D1D);
638 auto Bt = Reshape(bt.Read(), D1D, Q1D);
639 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
640 auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
641 auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
642 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
643 {
644 const int D1D = T_D1D ? T_D1D : d1d;
645 const int Q1D = T_Q1D ? T_Q1D : q1d;
646 // the following variables are evaluated at compile time
647 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
648 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
649 constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
650 MFEM_SHARED real_t sm0[max_DQ*max_DQ*max_DQ];
651 MFEM_SHARED real_t sm1[max_DQ*max_DQ*max_DQ];
652 MFEM_SHARED real_t sm2[max_DQ*max_DQ*max_DQ];
653 MFEM_SHARED real_t sm3[max_DQ*max_DQ*max_DQ];
654 MFEM_SHARED real_t sm4[max_DQ*max_DQ*max_DQ];
655 MFEM_SHARED real_t sm5[max_DQ*max_DQ*max_DQ];
656
657 real_t (*u)[max_D1D][max_D1D] = (real_t (*)[max_D1D][max_D1D]) sm0;
658 MFEM_FOREACH_THREAD(dz,z,D1D)
659 {
660 MFEM_FOREACH_THREAD(dy,y,D1D)
661 {
662 MFEM_FOREACH_THREAD(dx,x,D1D)
663 {
664 u[dz][dy][dx] = x(dx,dy,dz,e);
665 }
666 }
667 }
668 MFEM_SYNC_THREAD;
669 real_t (*Bu)[max_D1D][max_Q1D] = (real_t (*)[max_D1D][max_Q1D])sm1;
670 real_t (*Gu)[max_D1D][max_Q1D] = (real_t (*)[max_D1D][max_Q1D])sm2;
671 MFEM_FOREACH_THREAD(dz,z,D1D)
672 {
673 MFEM_FOREACH_THREAD(dy,y,D1D)
674 {
675 MFEM_FOREACH_THREAD(qx,x,Q1D)
676 {
677 real_t Bu_ = 0.0;
678 real_t Gu_ = 0.0;
679 for (int dx = 0; dx < D1D; ++dx)
680 {
681 const real_t bx = B(qx,dx);
682 const real_t gx = G(qx,dx);
683 const real_t x = u[dz][dy][dx];
684 Bu_ += bx * x;
685 Gu_ += gx * x;
686 }
687 Bu[dz][dy][qx] = Bu_;
688 Gu[dz][dy][qx] = Gu_;
689 }
690 }
691 }
692 MFEM_SYNC_THREAD;
693 real_t (*BBu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm3;
694 real_t (*GBu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm4;
695 real_t (*BGu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm5;
696 MFEM_FOREACH_THREAD(dz,z,D1D)
697 {
698 MFEM_FOREACH_THREAD(qx,x,Q1D)
699 {
700 MFEM_FOREACH_THREAD(qy,y,Q1D)
701 {
702 real_t BBu_ = 0.0;
703 real_t GBu_ = 0.0;
704 real_t BGu_ = 0.0;
705 for (int dy = 0; dy < D1D; ++dy)
706 {
707 const real_t bx = B(qy,dy);
708 const real_t gx = G(qy,dy);
709 BBu_ += bx * Bu[dz][dy][qx];
710 GBu_ += gx * Bu[dz][dy][qx];
711 BGu_ += bx * Gu[dz][dy][qx];
712 }
713 BBu[dz][qy][qx] = BBu_;
714 GBu[dz][qy][qx] = GBu_;
715 BGu[dz][qy][qx] = BGu_;
716 }
717 }
718 }
719 MFEM_SYNC_THREAD;
720 real_t (*GBBu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm0;
721 real_t (*BGBu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm1;
722 real_t (*BBGu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm2;
723 MFEM_FOREACH_THREAD(qx,x,Q1D)
724 {
725 MFEM_FOREACH_THREAD(qy,y,Q1D)
726 {
727 MFEM_FOREACH_THREAD(qz,z,Q1D)
728 {
729 real_t GBBu_ = 0.0;
730 real_t BGBu_ = 0.0;
731 real_t BBGu_ = 0.0;
732 for (int dz = 0; dz < D1D; ++dz)
733 {
734 const real_t bx = B(qz,dz);
735 const real_t gx = G(qz,dz);
736 GBBu_ += gx * BBu[dz][qy][qx];
737 BGBu_ += bx * GBu[dz][qy][qx];
738 BBGu_ += bx * BGu[dz][qy][qx];
739 }
740 GBBu[qz][qy][qx] = GBBu_;
741 BGBu[qz][qy][qx] = BGBu_;
742 BBGu[qz][qy][qx] = BBGu_;
743 }
744 }
745 }
746 MFEM_SYNC_THREAD;
747 real_t (*DGu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm3;
748 MFEM_FOREACH_THREAD(qz,z,Q1D)
749 {
750 MFEM_FOREACH_THREAD(qy,y,Q1D)
751 {
752 MFEM_FOREACH_THREAD(qx,x,Q1D)
753 {
754 const real_t O1 = op(qx,qy,qz,0,e);
755 const real_t O2 = op(qx,qy,qz,1,e);
756 const real_t O3 = op(qx,qy,qz,2,e);
757
758 const real_t gradX = BBGu[qz][qy][qx];
759 const real_t gradY = BGBu[qz][qy][qx];
760 const real_t gradZ = GBBu[qz][qy][qx];
761
762 DGu[qz][qy][qx] = (O1 * gradX) + (O2 * gradY) + (O3 * gradZ);
763 }
764 }
765 }
766 MFEM_SYNC_THREAD;
767 real_t (*BDGu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm4;
768 MFEM_FOREACH_THREAD(qx,x,Q1D)
769 {
770 MFEM_FOREACH_THREAD(qy,y,Q1D)
771 {
772 MFEM_FOREACH_THREAD(dz,z,D1D)
773 {
774 real_t BDGu_ = 0.0;
775 for (int qz = 0; qz < Q1D; ++qz)
776 {
777 const real_t w = Bt(dz,qz);
778 BDGu_ += w * DGu[qz][qy][qx];
779 }
780 BDGu[dz][qy][qx] = BDGu_;
781 }
782 }
783 }
784 MFEM_SYNC_THREAD;
785 real_t (*BBDGu)[max_D1D][max_Q1D] = (real_t (*)[max_D1D][max_Q1D])sm5;
786 MFEM_FOREACH_THREAD(dz,z,D1D)
787 {
788 MFEM_FOREACH_THREAD(qx,x,Q1D)
789 {
790 MFEM_FOREACH_THREAD(dy,y,D1D)
791 {
792 real_t BBDGu_ = 0.0;
793 for (int qy = 0; qy < Q1D; ++qy)
794 {
795 const real_t w = Bt(dy,qy);
796 BBDGu_ += w * BDGu[dz][qy][qx];
797 }
798 BBDGu[dz][dy][qx] = BBDGu_;
799 }
800 }
801 }
802 MFEM_SYNC_THREAD;
803 MFEM_FOREACH_THREAD(dz,z,D1D)
804 {
805 MFEM_FOREACH_THREAD(dy,y,D1D)
806 {
807 MFEM_FOREACH_THREAD(dx,x,D1D)
808 {
809 real_t BBBDGu = 0.0;
810 for (int qx = 0; qx < Q1D; ++qx)
811 {
812 const real_t w = Bt(dx,qx);
813 BBBDGu += w * BBDGu[dz][dy][qx];
814 }
815 y(dx,dy,dz,e) += BBBDGu;
816 }
817 }
818 }
819 });
820}
821
822// PA Convection Apply 2D kernel
823template<int T_D1D = 0, int T_Q1D = 0> static
824void PAConvectionApplyT2D(const int ne,
825 const Array<real_t> &b,
826 const Array<real_t> &g,
827 const Array<real_t> &bt,
828 const Array<real_t> &gt,
829 const Vector &op_,
830 const Vector &x_,
831 Vector &y_,
832 const int d1d = 0,
833 const int q1d = 0)
834{
835 const int NE = ne;
836 const int D1D = T_D1D ? T_D1D : d1d;
837 const int Q1D = T_Q1D ? T_Q1D : q1d;
838 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
839 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
840 auto B = Reshape(b.Read(), Q1D, D1D);
841 auto Bt = Reshape(bt.Read(), D1D, Q1D);
842 auto Gt = Reshape(gt.Read(), D1D, Q1D);
843 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
844 auto x = Reshape(x_.Read(), D1D, D1D, NE);
845 auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
846 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
847 {
848 const int D1D = T_D1D ? T_D1D : d1d;
849 const int Q1D = T_Q1D ? T_Q1D : q1d;
850 // the following variables are evaluated at compile time
851 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
852 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
853
854 real_t u[max_D1D][max_D1D];
855 for (int dy = 0; dy < D1D; ++dy)
856 {
857 for (int dx = 0; dx < D1D; ++dx)
858 {
859 u[dy][dx] = x(dx,dy,e);
860 }
861 }
862 real_t Bu[max_D1D][max_Q1D];
863 for (int dy = 0; dy < D1D; ++dy)
864 {
865 for (int qx = 0; qx < Q1D; ++qx)
866 {
867 Bu[dy][qx] = 0.0;
868 for (int dx = 0; dx < D1D; ++dx)
869 {
870 const real_t bx = B(qx,dx);
871 const real_t x = u[dy][dx];
872 Bu[dy][qx] += bx * x;
873 }
874 }
875 }
876 real_t BBu[max_Q1D][max_Q1D];
877 for (int qx = 0; qx < Q1D; ++qx)
878 {
879 for (int qy = 0; qy < Q1D; ++qy)
880 {
881 BBu[qy][qx] = 0.0;
882 for (int dy = 0; dy < D1D; ++dy)
883 {
884 const real_t bx = B(qy,dy);
885 BBu[qy][qx] += bx * Bu[dy][qx];
886 }
887 }
888 }
889 // Calculate Dxy, xDy in plane
890 real_t DBu[max_Q1D][max_Q1D][2];
891 for (int qy = 0; qy < Q1D; ++qy)
892 {
893 for (int qx = 0; qx < Q1D; ++qx)
894 {
895 const real_t O1 = op(qx,qy,0,e);
896 const real_t O2 = op(qx,qy,1,e);
897
898 const real_t X = BBu[qy][qx];
899
900 DBu[qy][qx][0] = O1 * X;
901 DBu[qy][qx][1] = O2 * X;
902 }
903 }
904 real_t GDBu[max_D1D][max_Q1D][2];
905 for (int qx = 0; qx < Q1D; ++qx)
906 {
907 for (int dy = 0; dy < D1D; ++dy)
908 {
909 GDBu[dy][qx][0] = 0.0;
910 GDBu[dy][qx][1] = 0.0;
911 for (int qy = 0; qy < Q1D; ++qy)
912 {
913 const real_t by = Bt(dy,qy);
914 const real_t gy = Gt(dy,qy);
915 GDBu[dy][qx][0] += by * DBu[qy][qx][0];
916 GDBu[dy][qx][1] += gy * DBu[qy][qx][1];
917 }
918 }
919 }
920 for (int dx = 0; dx < D1D; ++dx)
921 {
922 for (int dy = 0; dy < D1D; ++dy)
923 {
924 real_t res = 0.0;
925 for (int qx = 0; qx < Q1D; ++qx)
926 {
927 const real_t bx = Bt(dx,qx);
928 const real_t gx = Gt(dx,qx);
929 res += gx * GDBu[dy][qx][0] + bx * GDBu[dy][qx][1];
930 }
931 y(dx,dy,e) += res;
932 }
933 }
934 });
935}
936
937// Optimized PA Convection Apply 2D kernel
938template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
939void SmemPAConvectionApplyT2D(const int ne,
940 const Array<real_t> &b,
941 const Array<real_t> &g,
942 const Array<real_t> &bt,
943 const Array<real_t> &gt,
944 const Vector &op_,
945 const Vector &x_,
946 Vector &y_,
947 const int d1d = 0,
948 const int q1d = 0)
949{
950 const int NE = ne;
951 const int D1D = T_D1D ? T_D1D : d1d;
952 const int Q1D = T_Q1D ? T_Q1D : q1d;
953 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
954 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
955 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
956 auto B = Reshape(b.Read(), Q1D, D1D);
957 auto Bt = Reshape(bt.Read(), D1D, Q1D);
958 auto Gt = Reshape(gt.Read(), D1D, Q1D);
959 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, NE);
960 auto x = Reshape(x_.Read(), D1D, D1D, NE);
961 auto y = Reshape(y_.ReadWrite(), D1D, D1D, NE);
962 mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e)
963 {
964 const int tidz = MFEM_THREAD_ID(z);
965 const int D1D = T_D1D ? T_D1D : d1d;
966 const int Q1D = T_Q1D ? T_Q1D : q1d;
967 // the following variables are evaluated at compile time
968 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
969 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
970 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
971 MFEM_SHARED real_t u[NBZ][max_D1D][max_D1D];
972 MFEM_FOREACH_THREAD(dy,y,D1D)
973 {
974 MFEM_FOREACH_THREAD(dx,x,D1D)
975 {
976 // e is really equal to e+tidz
977 u[tidz][dy][dx] = x(dx,dy,e);
978 }
979 }
980 MFEM_SYNC_THREAD;
981 MFEM_SHARED real_t Bu[NBZ][max_D1D][max_Q1D];
982 MFEM_FOREACH_THREAD(dy,y,D1D)
983 {
984 MFEM_FOREACH_THREAD(qx,x,Q1D)
985 {
986 Bu[tidz][dy][qx] = 0.0;
987 for (int dx = 0; dx < D1D; ++dx)
988 {
989 const real_t bx = B(qx,dx);
990 const real_t x = u[tidz][dy][dx];
991 Bu[tidz][dy][qx] += bx * x;
992 }
993 }
994 }
995 MFEM_SYNC_THREAD;
996 MFEM_SHARED real_t BBu[NBZ][max_Q1D][max_Q1D];
997 MFEM_FOREACH_THREAD(qx,x,Q1D)
998 {
999 MFEM_FOREACH_THREAD(qy,y,Q1D)
1000 {
1001 BBu[tidz][qy][qx] = 0.0;
1002 for (int dy = 0; dy < D1D; ++dy)
1003 {
1004 const real_t bx = B(qy,dy);
1005 BBu[tidz][qy][qx] += bx * Bu[tidz][dy][qx];
1006 }
1007 }
1008 }
1009 MFEM_SYNC_THREAD;
1010 // Calculate Dxy, xDy in plane
1011 MFEM_SHARED real_t DBu[NBZ][max_Q1D][max_Q1D][2];
1012 MFEM_FOREACH_THREAD(qy,y,Q1D)
1013 {
1014 MFEM_FOREACH_THREAD(qx,x,Q1D)
1015 {
1016 const real_t O1 = op(qx,qy,0,e);
1017 const real_t O2 = op(qx,qy,1,e);
1018
1019 const real_t X = BBu[tidz][qy][qx];
1020
1021 DBu[tidz][qy][qx][0] = O1 * X;
1022 DBu[tidz][qy][qx][1] = O2 * X;
1023 }
1024 }
1025 MFEM_SYNC_THREAD;
1026 MFEM_SHARED real_t GDBu[NBZ][max_D1D][max_Q1D][2];
1027 MFEM_FOREACH_THREAD(qx,x,Q1D)
1028 {
1029 MFEM_FOREACH_THREAD(dy,y,D1D)
1030 {
1031 GDBu[tidz][dy][qx][0] = 0.0;
1032 GDBu[tidz][dy][qx][1] = 0.0;
1033 for (int qy = 0; qy < Q1D; ++qy)
1034 {
1035 const real_t by = Bt(dy,qy);
1036 const real_t gy = Gt(dy,qy);
1037 GDBu[tidz][dy][qx][0] += by * DBu[tidz][qy][qx][0];
1038 GDBu[tidz][dy][qx][1] += gy * DBu[tidz][qy][qx][1];
1039 }
1040 }
1041 }
1042 MFEM_SYNC_THREAD;
1043 MFEM_FOREACH_THREAD(dx,x,D1D)
1044 {
1045 MFEM_FOREACH_THREAD(dy,y,D1D)
1046 {
1047 real_t res = 0.0;
1048 for (int qx = 0; qx < Q1D; ++qx)
1049 {
1050 const real_t bx = Bt(dx,qx);
1051 const real_t gx = Gt(dx,qx);
1052 res += gx * GDBu[tidz][dy][qx][0] + bx * GDBu[tidz][dy][qx][1];
1053 }
1054 y(dx,dy,e) += res;
1055 }
1056 }
1057 });
1058}
1059
1060// PA Convection Apply 3D kernel
1061template<int T_D1D = 0, int T_Q1D = 0> static
1062void PAConvectionApplyT3D(const int ne,
1063 const Array<real_t> &b,
1064 const Array<real_t> &g,
1065 const Array<real_t> &bt,
1066 const Array<real_t> &gt,
1067 const Vector &op_,
1068 const Vector &x_,
1069 Vector &y_,
1070 const int d1d = 0,
1071 const int q1d = 0)
1072{
1073 const int NE = ne;
1074 const int D1D = T_D1D ? T_D1D : d1d;
1075 const int Q1D = T_Q1D ? T_Q1D : q1d;
1076 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
1077 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
1078 auto B = Reshape(b.Read(), Q1D, D1D);
1079 auto Bt = Reshape(bt.Read(), D1D, Q1D);
1080 auto Gt = Reshape(gt.Read(), D1D, Q1D);
1081 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1082 auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1083 auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1084 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
1085 {
1086 const int D1D = T_D1D ? T_D1D : d1d;
1087 const int Q1D = T_Q1D ? T_Q1D : q1d;
1088 // the following variables are evaluated at compile time
1089 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
1090 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
1091
1092 real_t u[max_D1D][max_D1D][max_D1D];
1093 for (int dz = 0; dz < D1D; ++dz)
1094 {
1095 for (int dy = 0; dy < D1D; ++dy)
1096 {
1097 for (int dx = 0; dx < D1D; ++dx)
1098 {
1099 u[dz][dy][dx] = x(dx,dy,dz,e);
1100 }
1101 }
1102 }
1103 real_t Bu[max_D1D][max_D1D][max_Q1D];
1104 for (int dz = 0; dz < D1D; ++dz)
1105 {
1106 for (int dy = 0; dy < D1D; ++dy)
1107 {
1108 for (int qx = 0; qx < Q1D; ++qx)
1109 {
1110 Bu[dz][dy][qx] = 0.0;
1111 for (int dx = 0; dx < D1D; ++dx)
1112 {
1113 const real_t bx = B(qx,dx);
1114 const real_t x = u[dz][dy][dx];
1115 Bu[dz][dy][qx] += bx * x;
1116 }
1117 }
1118 }
1119 }
1120 real_t BBu[max_D1D][max_Q1D][max_Q1D];
1121 for (int dz = 0; dz < D1D; ++dz)
1122 {
1123 for (int qx = 0; qx < Q1D; ++qx)
1124 {
1125 for (int qy = 0; qy < Q1D; ++qy)
1126 {
1127 BBu[dz][qy][qx] = 0.0;
1128 for (int dy = 0; dy < D1D; ++dy)
1129 {
1130 const real_t bx = B(qy,dy);
1131 BBu[dz][qy][qx] += bx * Bu[dz][dy][qx];
1132 }
1133 }
1134 }
1135 }
1136 real_t BBBu[max_Q1D][max_Q1D][max_Q1D];
1137 for (int qx = 0; qx < Q1D; ++qx)
1138 {
1139 for (int qy = 0; qy < Q1D; ++qy)
1140 {
1141 for (int qz = 0; qz < Q1D; ++qz)
1142 {
1143 BBBu[qz][qy][qx] = 0.0;
1144 for (int dz = 0; dz < D1D; ++dz)
1145 {
1146 const real_t bx = B(qz,dz);
1147 BBBu[qz][qy][qx] += bx * BBu[dz][qy][qx];
1148 }
1149 }
1150 }
1151 }
1152 // Calculate Dxy, xDy in plane
1153 real_t DBu[max_Q1D][max_Q1D][max_Q1D][3];
1154 for (int qz = 0; qz < Q1D; ++qz)
1155 {
1156 for (int qy = 0; qy < Q1D; ++qy)
1157 {
1158 for (int qx = 0; qx < Q1D; ++qx)
1159 {
1160 const real_t O1 = op(qx,qy,qz,0,e);
1161 const real_t O2 = op(qx,qy,qz,1,e);
1162 const real_t O3 = op(qx,qy,qz,2,e);
1163
1164 const real_t X = BBBu[qz][qy][qx];
1165
1166 DBu[qz][qy][qx][0] = O1 * X;
1167 DBu[qz][qy][qx][1] = O2 * X;
1168 DBu[qz][qy][qx][2] = O3 * X;
1169 }
1170 }
1171 }
1172 real_t GDBu[max_D1D][max_Q1D][max_Q1D][3];
1173 for (int qx = 0; qx < Q1D; ++qx)
1174 {
1175 for (int qy = 0; qy < Q1D; ++qy)
1176 {
1177 for (int dz = 0; dz < D1D; ++dz)
1178 {
1179 GDBu[dz][qy][qx][0] = 0.0;
1180 GDBu[dz][qy][qx][1] = 0.0;
1181 GDBu[dz][qy][qx][2] = 0.0;
1182 for (int qz = 0; qz < Q1D; ++qz)
1183 {
1184 const real_t bz = Bt(dz,qz);
1185 const real_t gz = Gt(dz,qz);
1186 GDBu[dz][qy][qx][0] += bz * DBu[qz][qy][qx][0];
1187 GDBu[dz][qy][qx][1] += bz * DBu[qz][qy][qx][1];
1188 GDBu[dz][qy][qx][2] += gz * DBu[qz][qy][qx][2];
1189 }
1190 }
1191 }
1192 }
1193 real_t GGDBu[max_D1D][max_D1D][max_Q1D][3];
1194 for (int dz = 0; dz < D1D; ++dz)
1195 {
1196 for (int qx = 0; qx < Q1D; ++qx)
1197 {
1198 for (int dy = 0; dy < D1D; ++dy)
1199 {
1200 GGDBu[dz][dy][qx][0] = 0.0;
1201 GGDBu[dz][dy][qx][1] = 0.0;
1202 GGDBu[dz][dy][qx][2] = 0.0;
1203 for (int qy = 0; qy < Q1D; ++qy)
1204 {
1205 const real_t by = Bt(dy,qy);
1206 const real_t gy = Gt(dy,qy);
1207 GGDBu[dz][dy][qx][0] += by * GDBu[dz][qy][qx][0];
1208 GGDBu[dz][dy][qx][1] += gy * GDBu[dz][qy][qx][1];
1209 GGDBu[dz][dy][qx][2] += by * GDBu[dz][qy][qx][2];
1210 }
1211 }
1212 }
1213 }
1214 for (int dz = 0; dz < D1D; ++dz)
1215 {
1216 for (int dy = 0; dy < D1D; ++dy)
1217 {
1218 for (int dx = 0; dx < D1D; ++dx)
1219 {
1220 real_t res = 0.0;
1221 for (int qx = 0; qx < Q1D; ++qx)
1222 {
1223 const real_t bx = Bt(dx,qx);
1224 const real_t gx = Gt(dx,qx);
1225 res += gx * GGDBu[dz][dy][qx][0];
1226 res += bx * GGDBu[dz][dy][qx][1];
1227 res += bx * GGDBu[dz][dy][qx][2];
1228 }
1229 y(dx,dy,dz,e) += res;
1230 }
1231 }
1232 }
1233 });
1234}
1235
1236// Optimized PA Convection Apply 3D kernel
1237template<int T_D1D = 0, int T_Q1D = 0> static
1238void SmemPAConvectionApplyT3D(const int ne,
1239 const Array<real_t> &b,
1240 const Array<real_t> &g,
1241 const Array<real_t> &bt,
1242 const Array<real_t> &gt,
1243 const Vector &op_,
1244 const Vector &x_,
1245 Vector &y_,
1246 const int d1d = 0,
1247 const int q1d = 0)
1248{
1249 const int NE = ne;
1250 const int D1D = T_D1D ? T_D1D : d1d;
1251 const int Q1D = T_Q1D ? T_Q1D : q1d;
1252 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
1253 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
1254 auto B = Reshape(b.Read(), Q1D, D1D);
1255 auto Bt = Reshape(bt.Read(), D1D, Q1D);
1256 auto Gt = Reshape(gt.Read(), D1D, Q1D);
1257 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, 3, NE);
1258 auto x = Reshape(x_.Read(), D1D, D1D, D1D, NE);
1259 auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1260 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
1261 {
1262 const int D1D = T_D1D ? T_D1D : d1d;
1263 const int Q1D = T_Q1D ? T_Q1D : q1d;
1264 // the following variables are evaluated at compile time
1265 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
1266 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
1267 constexpr int max_DQ = (max_Q1D > max_D1D) ? max_Q1D : max_D1D;
1268 MFEM_SHARED real_t sm0[3*max_DQ*max_DQ*max_DQ];
1269 MFEM_SHARED real_t sm1[3*max_DQ*max_DQ*max_DQ];
1270
1271 real_t (*u)[max_D1D][max_D1D] = (real_t (*)[max_D1D][max_D1D]) sm0;
1272 MFEM_FOREACH_THREAD(dz,z,D1D)
1273 {
1274 MFEM_FOREACH_THREAD(dy,y,D1D)
1275 {
1276 MFEM_FOREACH_THREAD(dx,x,D1D)
1277 {
1278 u[dz][dy][dx] = x(dx,dy,dz,e);
1279 }
1280 }
1281 }
1282 MFEM_SYNC_THREAD;
1283 real_t (*Bu)[max_D1D][max_Q1D] = (real_t (*)[max_D1D][max_Q1D])sm1;
1284 MFEM_FOREACH_THREAD(dz,z,D1D)
1285 {
1286 MFEM_FOREACH_THREAD(dy,y,D1D)
1287 {
1288 MFEM_FOREACH_THREAD(qx,x,Q1D)
1289 {
1290 real_t Bu_ = 0.0;
1291 for (int dx = 0; dx < D1D; ++dx)
1292 {
1293 const real_t bx = B(qx,dx);
1294 const real_t x = u[dz][dy][dx];
1295 Bu_ += bx * x;
1296 }
1297 Bu[dz][dy][qx] = Bu_;
1298 }
1299 }
1300 }
1301 MFEM_SYNC_THREAD;
1302 real_t (*BBu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm0;
1303 MFEM_FOREACH_THREAD(dz,z,D1D)
1304 {
1305 MFEM_FOREACH_THREAD(qx,x,Q1D)
1306 {
1307 MFEM_FOREACH_THREAD(qy,y,Q1D)
1308 {
1309 real_t BBu_ = 0.0;
1310 for (int dy = 0; dy < D1D; ++dy)
1311 {
1312 const real_t bx = B(qy,dy);
1313 BBu_ += bx * Bu[dz][dy][qx];
1314 }
1315 BBu[dz][qy][qx] = BBu_;
1316 }
1317 }
1318 }
1319 MFEM_SYNC_THREAD;
1320 real_t (*BBBu)[max_Q1D][max_Q1D] = (real_t (*)[max_Q1D][max_Q1D])sm1;
1321 MFEM_FOREACH_THREAD(qx,x,Q1D)
1322 {
1323 MFEM_FOREACH_THREAD(qy,y,Q1D)
1324 {
1325 MFEM_FOREACH_THREAD(qz,z,Q1D)
1326 {
1327 real_t BBBu_ = 0.0;
1328 for (int dz = 0; dz < D1D; ++dz)
1329 {
1330 const real_t bx = B(qz,dz);
1331 BBBu_ += bx * BBu[dz][qy][qx];
1332 }
1333 BBBu[qz][qy][qx] = BBBu_;
1334 }
1335 }
1336 }
1337 MFEM_SYNC_THREAD;
1338 real_t (*DBu)[max_Q1D][max_Q1D][3] = (real_t (*)[max_Q1D][max_Q1D][3])sm0;
1339 MFEM_FOREACH_THREAD(qz,z,Q1D)
1340 {
1341 MFEM_FOREACH_THREAD(qy,y,Q1D)
1342 {
1343 MFEM_FOREACH_THREAD(qx,x,Q1D)
1344 {
1345 const real_t O1 = op(qx,qy,qz,0,e);
1346 const real_t O2 = op(qx,qy,qz,1,e);
1347 const real_t O3 = op(qx,qy,qz,2,e);
1348
1349 const real_t X = BBBu[qz][qy][qx];
1350
1351 DBu[qz][qy][qx][0] = O1 * X;
1352 DBu[qz][qy][qx][1] = O2 * X;
1353 DBu[qz][qy][qx][2] = O3 * X;
1354 }
1355 }
1356 }
1357 MFEM_SYNC_THREAD;
1358 real_t (*GDBu)[max_Q1D][max_Q1D][3] = (real_t (*)[max_Q1D][max_Q1D][3])sm1;
1359 MFEM_FOREACH_THREAD(qx,x,Q1D)
1360 {
1361 MFEM_FOREACH_THREAD(qy,y,Q1D)
1362 {
1363 MFEM_FOREACH_THREAD(dz,z,D1D)
1364 {
1365 real_t GDBu0 = 0.0;
1366 real_t GDBu1 = 0.0;
1367 real_t GDBu2 = 0.0;
1368 for (int qz = 0; qz < Q1D; ++qz)
1369 {
1370 const real_t bz = Bt(dz,qz);
1371 const real_t gz = Gt(dz,qz);
1372 GDBu0 += bz * DBu[qz][qy][qx][0];
1373 GDBu1 += bz * DBu[qz][qy][qx][1];
1374 GDBu2 += gz * DBu[qz][qy][qx][2];
1375 }
1376 GDBu[dz][qy][qx][0] = GDBu0;
1377 GDBu[dz][qy][qx][1] = GDBu1;
1378 GDBu[dz][qy][qx][2] = GDBu2;
1379 }
1380 }
1381 }
1382 MFEM_SYNC_THREAD;
1383 real_t (*GGDBu)[max_D1D][max_Q1D][3] = (real_t (*)[max_D1D][max_Q1D][3])sm0;
1384 MFEM_FOREACH_THREAD(dz,z,D1D)
1385 {
1386 MFEM_FOREACH_THREAD(qx,x,Q1D)
1387 {
1388 MFEM_FOREACH_THREAD(dy,y,D1D)
1389 {
1390 real_t GGDBu0 = 0.0;
1391 real_t GGDBu1 = 0.0;
1392 real_t GGDBu2 = 0.0;
1393 for (int qy = 0; qy < Q1D; ++qy)
1394 {
1395 const real_t by = Bt(dy,qy);
1396 const real_t gy = Gt(dy,qy);
1397 GGDBu0 += by * GDBu[dz][qy][qx][0];
1398 GGDBu1 += gy * GDBu[dz][qy][qx][1];
1399 GGDBu2 += by * GDBu[dz][qy][qx][2];
1400 }
1401 GGDBu[dz][dy][qx][0] = GGDBu0;
1402 GGDBu[dz][dy][qx][1] = GGDBu1;
1403 GGDBu[dz][dy][qx][2] = GGDBu2;
1404 }
1405 }
1406 }
1407 MFEM_SYNC_THREAD;
1408 MFEM_FOREACH_THREAD(dz,z,D1D)
1409 {
1410 MFEM_FOREACH_THREAD(dy,y,D1D)
1411 {
1412 MFEM_FOREACH_THREAD(dx,x,D1D)
1413 {
1414 real_t res = 0.0;
1415 for (int qx = 0; qx < Q1D; ++qx)
1416 {
1417 const real_t bx = Bt(dx,qx);
1418 const real_t gx = Gt(dx,qx);
1419 res += gx * GGDBu[dz][dy][qx][0];
1420 res += bx * GGDBu[dz][dy][qx][1];
1421 res += bx * GGDBu[dz][dy][qx][2];
1422 }
1423 y(dx,dy,dz,e) += res;
1424 }
1425 }
1426 }
1427 });
1428}
1429
1430static void PAConvectionApply(const int dim,
1431 const int D1D,
1432 const int Q1D,
1433 const int NE,
1434 const Array<real_t> &B,
1435 const Array<real_t> &G,
1436 const Array<real_t> &Bt,
1437 const Array<real_t> &Gt,
1438 const Vector &op,
1439 const Vector &x,
1440 Vector &y)
1441{
1442 if (dim == 2)
1443 {
1444 switch ((D1D << 4 ) | Q1D)
1445 {
1446 case 0x22: return SmemPAConvectionApply2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1447 case 0x33: return SmemPAConvectionApply2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1448 case 0x34: return SmemPAConvectionApply2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1449 case 0x44: return SmemPAConvectionApply2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1450 case 0x46: return SmemPAConvectionApply2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1451 case 0x55: return SmemPAConvectionApply2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1452 case 0x58: return SmemPAConvectionApply2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1453 case 0x66: return SmemPAConvectionApply2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1454 case 0x77: return SmemPAConvectionApply2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1455 case 0x88: return SmemPAConvectionApply2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1456 case 0x99: return SmemPAConvectionApply2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1457 default: return PAConvectionApply2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1458 }
1459 }
1460 else if (dim == 3)
1461 {
1462 switch ((D1D << 4 ) | Q1D)
1463 {
1464 case 0x22: return SmemPAConvectionApply3D<2,2>(NE,B,G,Bt,Gt,op,x,y);
1465 case 0x23: return SmemPAConvectionApply3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1466 case 0x24: return SmemPAConvectionApply3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1467 case 0x26: return SmemPAConvectionApply3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1468 case 0x34: return SmemPAConvectionApply3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1469 case 0x35: return SmemPAConvectionApply3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1470 case 0x45: return SmemPAConvectionApply3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1471 case 0x48: return SmemPAConvectionApply3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1472 case 0x56: return SmemPAConvectionApply3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1473 case 0x67: return SmemPAConvectionApply3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1474 case 0x78: return SmemPAConvectionApply3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1475 case 0x89: return SmemPAConvectionApply3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1476 default: return PAConvectionApply3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1477 }
1478 }
1479 MFEM_ABORT("Unknown kernel.");
1480}
1481
1482static void PAConvectionApplyT(const int dim,
1483 const int D1D,
1484 const int Q1D,
1485 const int NE,
1486 const Array<real_t> &B,
1487 const Array<real_t> &G,
1488 const Array<real_t> &Bt,
1489 const Array<real_t> &Gt,
1490 const Vector &op,
1491 const Vector &x,
1492 Vector &y)
1493{
1494 if (dim == 2)
1495 {
1496 switch ((D1D << 4 ) | Q1D)
1497 {
1498 case 0x22: return SmemPAConvectionApplyT2D<2,2,8>(NE,B,G,Bt,Gt,op,x,y);
1499 case 0x33: return SmemPAConvectionApplyT2D<3,3,4>(NE,B,G,Bt,Gt,op,x,y);
1500 case 0x34: return SmemPAConvectionApplyT2D<3,4,4>(NE,B,G,Bt,Gt,op,x,y);
1501 case 0x44: return SmemPAConvectionApplyT2D<4,4,4>(NE,B,G,Bt,Gt,op,x,y);
1502 case 0x46: return SmemPAConvectionApplyT2D<4,6,4>(NE,B,G,Bt,Gt,op,x,y);
1503 case 0x55: return SmemPAConvectionApplyT2D<5,5,2>(NE,B,G,Bt,Gt,op,x,y);
1504 case 0x58: return SmemPAConvectionApplyT2D<5,8,2>(NE,B,G,Bt,Gt,op,x,y);
1505 case 0x66: return SmemPAConvectionApplyT2D<6,6,1>(NE,B,G,Bt,Gt,op,x,y);
1506 case 0x77: return SmemPAConvectionApplyT2D<7,7,1>(NE,B,G,Bt,Gt,op,x,y);
1507 case 0x88: return SmemPAConvectionApplyT2D<8,8,1>(NE,B,G,Bt,Gt,op,x,y);
1508 case 0x99: return SmemPAConvectionApplyT2D<9,9,1>(NE,B,G,Bt,Gt,op,x,y);
1509 default: return PAConvectionApplyT2D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1510 }
1511 }
1512 else if (dim == 3)
1513 {
1514 switch ((D1D << 4 ) | Q1D)
1515 {
1516 case 0x22: return SmemPAConvectionApplyT3D<2,2>(NE,B,G,Bt,Gt,op,x,y);
1517 case 0x23: return SmemPAConvectionApplyT3D<2,3>(NE,B,G,Bt,Gt,op,x,y);
1518 case 0x24: return SmemPAConvectionApplyT3D<2,4>(NE,B,G,Bt,Gt,op,x,y);
1519 case 0x26: return SmemPAConvectionApplyT3D<2,6>(NE,B,G,Bt,Gt,op,x,y);
1520 case 0x34: return SmemPAConvectionApplyT3D<3,4>(NE,B,G,Bt,Gt,op,x,y);
1521 case 0x35: return SmemPAConvectionApplyT3D<3,5>(NE,B,G,Bt,Gt,op,x,y);
1522 case 0x45: return SmemPAConvectionApplyT3D<4,5>(NE,B,G,Bt,Gt,op,x,y);
1523 case 0x48: return SmemPAConvectionApplyT3D<4,8>(NE,B,G,Bt,Gt,op,x,y);
1524 case 0x56: return SmemPAConvectionApplyT3D<5,6>(NE,B,G,Bt,Gt,op,x,y);
1525 case 0x67: return SmemPAConvectionApplyT3D<6,7>(NE,B,G,Bt,Gt,op,x,y);
1526 case 0x78: return SmemPAConvectionApplyT3D<7,8>(NE,B,G,Bt,Gt,op,x,y);
1527 case 0x89: return SmemPAConvectionApplyT3D<8,9>(NE,B,G,Bt,Gt,op,x,y);
1528 default: return PAConvectionApplyT3D(NE,B,G,Bt,Gt,op,x,y,D1D,Q1D);
1529 }
1530 }
1531 MFEM_ABORT("Unknown kernel.");
1532}
1533
1535{
1536 if (DeviceCanUseCeed())
1537 {
1538 ceedOp->AddMult(x, y);
1539 }
1540 else
1541 {
1542 PAConvectionApply(dim, dofs1D, quad1D, ne,
1543 maps->B, maps->G, maps->Bt, maps->Gt,
1544 pa_data, x, y);
1545 }
1546}
1547
1549{
1550 if (DeviceCanUseCeed())
1551 {
1552 MFEM_ABORT("AddMultPA not yet implemented with libCEED for"
1553 " ConvectionIntegrator.");
1554 }
1555 else
1556 {
1557 PAConvectionApplyT(dim, dofs1D, quad1D, ne,
1558 maps->B, maps->G, maps->Bt, maps->Gt,
1559 pa_data, x, y);
1560 }
1561}
1562
1563} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
Class to represent a coefficient evaluated at quadrature points.
const DofToQuad * maps
Not owned.
static const IntegrationRule & GetRule(const FiniteElement &el, const ElementTransformation &Trans)
void AssembleDiagonalPA(Vector &diag) override
Assemble diagonal and add it to Vector diag.
void AssemblePA(const FiniteElementSpace &) override
Method defining partial assembly.
void AddMultPA(const Vector &, Vector &) const override
Method for partially assembled action.
VectorCoefficient * Q
const GeometricFactors * geom
Not owned.
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition fe_base.hpp:165
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:178
Array< real_t > Gt
Transpose of G.
Definition fe_base.hpp:221
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:182
Array< real_t > Bt
Transpose of B.
Definition fe_base.hpp:199
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition fespace.hpp:709
int GetNE() const
Returns number of elements in the mesh.
Definition fespace.hpp:891
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:679
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Definition fespace.cpp:3871
Abstract class for all finite elements.
Definition fe_base.hpp:244
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition fe_base.cpp:365
int GetDim() const
Returns the reference space dimension for the finite element.
Definition fe_base.hpp:321
Vector J
Jacobians of the element transformations at all quadrature points.
Definition mesh.hpp:2976
Class for an integration rule - an Array of IntegrationPoint.
Definition intrules.hpp:100
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition intrules.hpp:256
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition intrules.cpp:86
const IntegrationRule * IntRule
Mesh data type.
Definition mesh.hpp:64
int Dimension() const
Dimension of the reference space used within the elements.
Definition mesh.hpp:1216
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
Definition mesh.cpp:390
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition mesh.cpp:880
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Definition mesh.cpp:7243
Class representing the storage layout of a QuadratureFunction.
Definition qspace.hpp:120
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
void GetDiagonal(mfem::Vector &diag) const
Definition operator.cpp:104
void AddMult(const mfem::Vector &x, mfem::Vector &y, const real_t a=1.0) const override
Operator application: y+=A(x) (default) or y+=a*A(x).
Definition operator.cpp:72
Represent a ConvectionIntegrator with AssemblyLevel::Partial using libCEED.
const real_t alpha
Definition ex15.cpp:369
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
constexpr int SDIM
constexpr int DIM
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:341
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
Definition forall.hpp:768
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
Definition util.cpp:33
@ COMPRESSED
Enable all above compressions.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
void forall(int N, lambda &&body)
Definition forall.hpp:753
void vel(const Vector &x, real_t t, Vector &u)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128