MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
nonlininteg_vecconvection_pa.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../nonlininteg.hpp"
15
16namespace mfem
17{
18
20{
21 MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES,
22 "PA Only supports Ordering::byNODES!");
23 Mesh *mesh = fes.GetMesh();
24 const FiniteElement &el = *fes.GetTypicalFE();
26 const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T);
27 if (DeviceCanUseCeed())
28 {
29 delete ceedOp;
30 const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 ||
31 fes.IsVariableOrder();
32 if (mixed)
33 {
35 }
36 else
37 {
39 }
40 return;
41 }
42 dim = mesh->Dimension();
43 ne = fes.GetMesh()->GetNE();
44 nq = ir->GetNPoints();
46 maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
47 pa_data.SetSize(ne * nq * dim * dim, Device::GetMemoryType());
48 real_t COEFF = 1.0;
49 if (Q)
50 {
51 ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient *>(Q);
52 MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
53 COEFF = cQ->constant;
54 }
55 const int NE = ne;
56 const int NQ = nq;
57 auto W = ir->GetWeights().Read();
58 if (dim == 1)
59 {
60 MFEM_ABORT("dim==1 not supported!");
61 }
62 if (dim == 2)
63 {
64 auto J = Reshape(geom->J.Read(), NQ, 2, 2, NE);
65 auto G = Reshape(pa_data.Write(), NQ, 2, 2, NE);
66 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
67 {
68 for (int q = 0; q < NQ; ++q)
69 {
70 const real_t J11 = J(q, 0, 0, e);
71 const real_t J12 = J(q, 0, 1, e);
72 const real_t J21 = J(q, 1, 0, e);
73 const real_t J22 = J(q, 1, 1, e);
74 // Store wq * Q * adj(J)
75 G(q, 0, 0, e) = W[q] * COEFF * J22; // 1,1
76 G(q, 0, 1, e) = W[q] * COEFF * -J12; // 1,2
77 G(q, 1, 0, e) = W[q] * COEFF * -J21; // 2,1
78 G(q, 1, 1, e) = W[q] * COEFF * J11; // 2,2
79 }
80 });
81 }
82 if (dim == 3)
83 {
84 auto J = Reshape(geom->J.Read(), NQ, 3, 3, NE);
85 auto G = Reshape(pa_data.Write(), NQ, 3, 3, NE);
86 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
87 {
88 for (int q = 0; q < NQ; ++q)
89 {
90 const real_t J11 = J(q, 0, 0, e);
91 const real_t J21 = J(q, 1, 0, e);
92 const real_t J31 = J(q, 2, 0, e);
93 const real_t J12 = J(q, 0, 1, e);
94 const real_t J22 = J(q, 1, 1, e);
95 const real_t J32 = J(q, 2, 1, e);
96 const real_t J13 = J(q, 0, 2, e);
97 const real_t J23 = J(q, 1, 2, e);
98 const real_t J33 = J(q, 2, 2, e);
99 const real_t cw = W[q] * COEFF;
100 // adj(J)
101 const real_t A11 = (J22 * J33) - (J23 * J32);
102 const real_t A12 = (J32 * J13) - (J12 * J33);
103 const real_t A13 = (J12 * J23) - (J22 * J13);
104 const real_t A21 = (J31 * J23) - (J21 * J33);
105 const real_t A22 = (J11 * J33) - (J13 * J31);
106 const real_t A23 = (J21 * J13) - (J11 * J23);
107 const real_t A31 = (J21 * J32) - (J31 * J22);
108 const real_t A32 = (J31 * J12) - (J11 * J32);
109 const real_t A33 = (J11 * J22) - (J12 * J21);
110 // Store wq * Q * adj(J)
111 G(q, 0, 0, e) = cw * A11; // 1,1
112 G(q, 0, 1, e) = cw * A12; // 1,2
113 G(q, 0, 2, e) = cw * A13; // 1,3
114 G(q, 1, 0, e) = cw * A21; // 2,1
115 G(q, 1, 1, e) = cw * A22; // 2,2
116 G(q, 1, 2, e) = cw * A23; // 2,3
117 G(q, 2, 0, e) = cw * A31; // 3,1
118 G(q, 2, 1, e) = cw * A32; // 3,2
119 G(q, 2, 2, e) = cw * A33; // 3,3
120 }
121 });
122 }
123}
124
125// PA Convection NL 2D kernel
126template<int T_D1D = 0, int T_Q1D = 0>
127static void PAConvectionNLApply2D(const int NE,
128 const Array<real_t> &b,
129 const Array<real_t> &g,
130 const Array<real_t> &bt,
131 const Vector &q_,
132 const Vector &x_,
133 Vector &y_,
134 const int d1d = 0,
135 const int q1d = 0)
136{
137 const int D1D = T_D1D ? T_D1D : d1d;
138 const int Q1D = T_Q1D ? T_Q1D : q1d;
139 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
140 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
141 auto B = Reshape(b.Read(), Q1D, D1D);
142 auto G = Reshape(g.Read(), Q1D, D1D);
143 auto Bt = Reshape(bt.Read(), D1D, Q1D);
144 auto Q = Reshape(q_.Read(), Q1D * Q1D, 2, 2, NE);
145 auto x = Reshape(x_.Read(), D1D, D1D, 2, NE);
146 auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NE);
147 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
148 {
149 const int D1D = T_D1D ? T_D1D : d1d;
150 const int Q1D = T_Q1D ? T_Q1D : q1d;
151 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
152 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
153
154 real_t data[max_Q1D][max_Q1D][2];
155 real_t grad0[max_Q1D][max_Q1D][2];
156 real_t grad1[max_Q1D][max_Q1D][2];
157 real_t Z[max_Q1D][max_Q1D][2];
158 for (int qy = 0; qy < Q1D; ++qy)
159 {
160 for (int qx = 0; qx < Q1D; ++qx)
161 {
162 data[qy][qx][0] = 0.0;
163 data[qy][qx][1] = 0.0;
164 grad0[qy][qx][0] = 0.0;
165 grad0[qy][qx][1] = 0.0;
166 grad1[qy][qx][0] = 0.0;
167 grad1[qy][qx][1] = 0.0;
168 }
169 }
170 for (int dy = 0; dy < D1D; ++dy)
171 {
172 real_t dataX[max_Q1D][2];
173 real_t gradX0[max_Q1D][2];
174 real_t gradX1[max_Q1D][2];
175 for (int qx = 0; qx < Q1D; ++qx)
176 {
177 dataX[qx][0] = 0.0;
178 dataX[qx][1] = 0.0;
179 gradX0[qx][0] = 0.0;
180 gradX0[qx][1] = 0.0;
181 gradX1[qx][0] = 0.0;
182 gradX1[qx][1] = 0.0;
183 }
184 for (int dx = 0; dx < D1D; ++dx)
185 {
186 const real_t s0 = x(dx, dy, 0, e);
187 const real_t s1 = x(dx, dy, 1, e);
188 for (int qx = 0; qx < Q1D; ++qx)
189 {
190 const real_t Bx = B(qx, dx);
191 const real_t Gx = G(qx, dx);
192 dataX[qx][0] += s0 * Bx;
193 dataX[qx][1] += s1 * Bx;
194 gradX0[qx][0] += s0 * Gx;
195 gradX0[qx][1] += s0 * Bx;
196 gradX1[qx][0] += s1 * Gx;
197 gradX1[qx][1] += s1 * Bx;
198 }
199 }
200 for (int qy = 0; qy < Q1D; ++qy)
201 {
202 const real_t By = B(qy, dy);
203 const real_t Gy = G(qy, dy);
204 for (int qx = 0; qx < Q1D; ++qx)
205 {
206 data[qy][qx][0] += dataX[qx][0] * By;
207 data[qy][qx][1] += dataX[qx][1] * By;
208 grad0[qy][qx][0] += gradX0[qx][0] * By;
209 grad0[qy][qx][1] += gradX0[qx][1] * Gy;
210 grad1[qy][qx][0] += gradX1[qx][0] * By;
211 grad1[qy][qx][1] += gradX1[qx][1] * Gy;
212 }
213 }
214 }
215 for (int qy = 0; qy < Q1D; ++qy)
216 {
217 for (int qx = 0; qx < Q1D; ++qx)
218 {
219 const int q = qx + qy * Q1D;
220 const real_t u1 = data[qy][qx][0];
221 const real_t u2 = data[qy][qx][1];
222 const real_t grad00 = grad0[qy][qx][0];
223 const real_t grad01 = grad0[qy][qx][1];
224 const real_t grad10 = grad1[qy][qx][0];
225 const real_t grad11 = grad1[qy][qx][1];
226 const real_t Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
227 const real_t Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
228 const real_t Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
229 const real_t Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
230 Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
231 Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
232 }
233 }
234 for (int qy = 0; qy < Q1D; ++qy)
235 {
236 real_t Y[max_D1D][2];
237 for (int dx = 0; dx < D1D; ++dx)
238 {
239 Y[dx][0] = 0.0;
240 Y[dx][1] = 0.0;
241 for (int qx = 0; qx < Q1D; ++qx)
242 {
243 const real_t Btx = Bt(dx, qx);
244 Y[dx][0] += Btx * Z[qy][qx][0];
245 Y[dx][1] += Btx * Z[qy][qx][1];
246 }
247 }
248 for (int dy = 0; dy < D1D; ++dy)
249 {
250 for (int dx = 0; dx < D1D; ++dx)
251 {
252 const real_t Bty = Bt(dy, qy);
253 y(dx, dy, 0, e) += Bty * Y[dx][0];
254 y(dx, dy, 1, e) += Bty * Y[dx][1];
255 }
256 }
257 }
258 });
259}
260
261// PA Convection NL 3D kernel
262template<int T_D1D = 0, int T_Q1D = 0>
263static void PAConvectionNLApply3D(const int NE,
264 const Array<real_t> &b,
265 const Array<real_t> &g,
266 const Array<real_t> &bt,
267 const Vector &q_,
268 const Vector &x_,
269 Vector &y_,
270 const int d1d = 0,
271 const int q1d = 0)
272{
273 constexpr int VDIM = 3;
274 const int D1D = T_D1D ? T_D1D : d1d;
275 const int Q1D = T_Q1D ? T_Q1D : q1d;
276 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
277 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
278
279 auto B = Reshape(b.Read(), Q1D, D1D);
280 auto G = Reshape(g.Read(), Q1D, D1D);
281 auto Bt = Reshape(bt.Read(), D1D, Q1D);
282 auto Q = Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
283 auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
284 auto y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
285
286 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
287 {
288 constexpr int VDIM = 3;
289 const int D1D = T_D1D ? T_D1D : d1d;
290 const int Q1D = T_Q1D ? T_Q1D : q1d;
291 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
292 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
293
294 real_t data[max_Q1D][max_Q1D][max_Q1D][VDIM];
295 real_t grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
296 real_t grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
297 real_t grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
298 real_t Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
299 for (int qz = 0; qz < Q1D; ++qz)
300 {
301 for (int qy = 0; qy < Q1D; ++qy)
302 {
303 for (int qx = 0; qx < Q1D; ++qx)
304 {
305 data[qz][qy][qx][0] = 0.0;
306 data[qz][qy][qx][1] = 0.0;
307 data[qz][qy][qx][2] = 0.0;
308
309 grad0[qz][qy][qx][0] = 0.0;
310 grad0[qz][qy][qx][1] = 0.0;
311 grad0[qz][qy][qx][2] = 0.0;
312
313 grad1[qz][qy][qx][0] = 0.0;
314 grad1[qz][qy][qx][1] = 0.0;
315 grad1[qz][qy][qx][2] = 0.0;
316
317 grad2[qz][qy][qx][0] = 0.0;
318 grad2[qz][qy][qx][1] = 0.0;
319 grad2[qz][qy][qx][2] = 0.0;
320 }
321 }
322 }
323 for (int dz = 0; dz < D1D; ++dz)
324 {
325 real_t dataXY[max_Q1D][max_Q1D][VDIM];
326 real_t gradXY0[max_Q1D][max_Q1D][VDIM];
327 real_t gradXY1[max_Q1D][max_Q1D][VDIM];
328 real_t gradXY2[max_Q1D][max_Q1D][VDIM];
329 for (int qy = 0; qy < Q1D; ++qy)
330 {
331 for (int qx = 0; qx < Q1D; ++qx)
332 {
333 dataXY[qy][qx][0] = 0.0;
334 dataXY[qy][qx][1] = 0.0;
335 dataXY[qy][qx][2] = 0.0;
336
337 gradXY0[qy][qx][0] = 0.0;
338 gradXY0[qy][qx][1] = 0.0;
339 gradXY0[qy][qx][2] = 0.0;
340
341 gradXY1[qy][qx][0] = 0.0;
342 gradXY1[qy][qx][1] = 0.0;
343 gradXY1[qy][qx][2] = 0.0;
344
345 gradXY2[qy][qx][0] = 0.0;
346 gradXY2[qy][qx][1] = 0.0;
347 gradXY2[qy][qx][2] = 0.0;
348 }
349 }
350 for (int dy = 0; dy < D1D; ++dy)
351 {
352 real_t dataX[max_Q1D][VDIM];
353 real_t gradX0[max_Q1D][VDIM];
354 real_t gradX1[max_Q1D][VDIM];
355 real_t gradX2[max_Q1D][VDIM];
356 for (int qx = 0; qx < Q1D; ++qx)
357 {
358 dataX[qx][0] = 0.0;
359 dataX[qx][1] = 0.0;
360 dataX[qx][2] = 0.0;
361
362 gradX0[qx][0] = 0.0;
363 gradX0[qx][1] = 0.0;
364 gradX0[qx][2] = 0.0;
365
366 gradX1[qx][0] = 0.0;
367 gradX1[qx][1] = 0.0;
368 gradX1[qx][2] = 0.0;
369
370 gradX2[qx][0] = 0.0;
371 gradX2[qx][1] = 0.0;
372 gradX2[qx][2] = 0.0;
373 }
374 for (int dx = 0; dx < D1D; ++dx)
375 {
376 const real_t s0 = x(dx, dy, dz, 0, e);
377 const real_t s1 = x(dx, dy, dz, 1, e);
378 const real_t s2 = x(dx, dy, dz, 2, e);
379 for (int qx = 0; qx < Q1D; ++qx)
380 {
381 const real_t Bx = B(qx, dx);
382 const real_t Gx = G(qx, dx);
383
384 dataX[qx][0] += s0 * Bx;
385 dataX[qx][1] += s1 * Bx;
386 dataX[qx][2] += s2 * Bx;
387
388 gradX0[qx][0] += s0 * Gx;
389 gradX0[qx][1] += s0 * Bx;
390 gradX0[qx][2] += s0 * Bx;
391
392 gradX1[qx][0] += s1 * Gx;
393 gradX1[qx][1] += s1 * Bx;
394 gradX1[qx][2] += s1 * Bx;
395
396 gradX2[qx][0] += s2 * Gx;
397 gradX2[qx][1] += s2 * Bx;
398 gradX2[qx][2] += s2 * Bx;
399 }
400 }
401 for (int qy = 0; qy < Q1D; ++qy)
402 {
403 const real_t By = B(qy, dy);
404 const real_t Gy = G(qy, dy);
405 for (int qx = 0; qx < Q1D; ++qx)
406 {
407 dataXY[qy][qx][0] += dataX[qx][0] * By;
408 dataXY[qy][qx][1] += dataX[qx][1] * By;
409 dataXY[qy][qx][2] += dataX[qx][2] * By;
410
411 gradXY0[qy][qx][0] += gradX0[qx][0] * By;
412 gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
413 gradXY0[qy][qx][2] += gradX0[qx][2] * By;
414
415 gradXY1[qy][qx][0] += gradX1[qx][0] * By;
416 gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
417 gradXY1[qy][qx][2] += gradX1[qx][2] * By;
418
419 gradXY2[qy][qx][0] += gradX2[qx][0] * By;
420 gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
421 gradXY2[qy][qx][2] += gradX2[qx][2] * By;
422 }
423 }
424 }
425 for (int qz = 0; qz < Q1D; ++qz)
426 {
427 const real_t Bz = B(qz, dz);
428 const real_t Gz = G(qz, dz);
429 for (int qy = 0; qy < Q1D; ++qy)
430 {
431 for (int qx = 0; qx < Q1D; ++qx)
432 {
433 data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
434 data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
435 data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
436
437 grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
438 grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
439 grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
440
441 grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
442 grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
443 grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
444
445 grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
446 grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
447 grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
448 }
449 }
450 }
451 }
452 for (int qz = 0; qz < Q1D; ++qz)
453 {
454 for (int qy = 0; qy < Q1D; ++qy)
455 {
456 for (int qx = 0; qx < Q1D; ++qx)
457 {
458 const int q = qx + Q1D * (qy + qz * Q1D);
459
460 const real_t u1 = data[qz][qy][qx][0];
461 const real_t u2 = data[qz][qy][qx][1];
462 const real_t u3 = data[qz][qy][qx][2];
463
464 const real_t grad00 = grad0[qz][qy][qx][0];
465 const real_t grad01 = grad0[qz][qy][qx][1];
466 const real_t grad02 = grad0[qz][qy][qx][2];
467
468 const real_t grad10 = grad1[qz][qy][qx][0];
469 const real_t grad11 = grad1[qz][qy][qx][1];
470 const real_t grad12 = grad1[qz][qy][qx][2];
471
472 const real_t grad20 = grad2[qz][qy][qx][0];
473 const real_t grad21 = grad2[qz][qy][qx][1];
474 const real_t grad22 = grad2[qz][qy][qx][2];
475
476 const real_t Dxu1 = grad00 * Q(q, 0, 0, e)
477 + grad01 * Q(q, 1, 0, e)
478 + grad02 * Q(q, 2, 0, e);
479 const real_t Dyu1 = grad00 * Q(q, 0, 1, e)
480 + grad01 * Q(q, 1, 1, e)
481 + grad02 * Q(q, 2, 1, e);
482 const real_t Dzu1 = grad00 * Q(q, 0, 2, e)
483 + grad01 * Q(q, 1, 2, e)
484 + grad02 * Q(q, 2, 2, e);
485
486 const real_t Dxu2 = grad10 * Q(q, 0, 0, e)
487 + grad11 * Q(q, 1, 0, e)
488 + grad12 * Q(q, 2, 0, e);
489 const real_t Dyu2 = grad10 * Q(q, 0, 1, e)
490 + grad11 * Q(q, 1, 1, e)
491 + grad12 * Q(q, 2, 1, e);
492 const real_t Dzu2 = grad10 * Q(q, 0, 2, e)
493 + grad11 * Q(q, 1, 2, e)
494 + grad12 * Q(q, 2, 2, e);
495
496 const real_t Dxu3 = grad20 * Q(q, 0, 0, e)
497 + grad21 * Q(q, 1, 0, e)
498 + grad22 * Q(q, 2, 0, e);
499 const real_t Dyu3 = grad20 * Q(q, 0, 1, e)
500 + grad21 * Q(q, 1, 1, e)
501 + grad22 * Q(q, 2, 1, e);
502 const real_t Dzu3 = grad20 * Q(q, 0, 2, e)
503 + grad21 * Q(q, 1, 2, e)
504 + grad22 * Q(q, 2, 2, e);
505
506 Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
507 Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
508 Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
509 }
510 }
511 }
512 for (int qz = 0; qz < Q1D; ++qz)
513 {
514 real_t opXY[max_D1D][max_D1D][VDIM];
515 for (int dy = 0; dy < D1D; ++dy)
516 {
517 for (int dx = 0; dx < D1D; ++dx)
518 {
519 opXY[dy][dx][0] = 0.0;
520 opXY[dy][dx][1] = 0.0;
521 opXY[dy][dx][2] = 0.0;
522 }
523 }
524 for (int qy = 0; qy < Q1D; ++qy)
525 {
526 real_t opX[max_D1D][VDIM];
527 for (int dx = 0; dx < D1D; ++dx)
528 {
529 opX[dx][0] = 0.0;
530 opX[dx][1] = 0.0;
531 opX[dx][2] = 0.0;
532 for (int qx = 0; qx < Q1D; ++qx)
533 {
534 const real_t Btx = Bt(dx, qx);
535 opX[dx][0] += Btx * Z[qz][qy][qx][0];
536 opX[dx][1] += Btx * Z[qz][qy][qx][1];
537 opX[dx][2] += Btx * Z[qz][qy][qx][2];
538 }
539 }
540 for (int dy = 0; dy < D1D; ++dy)
541 {
542 for (int dx = 0; dx < D1D; ++dx)
543 {
544 const real_t Bty = Bt(dy, qy);
545 opXY[dy][dx][0] += Bty * opX[dx][0];
546 opXY[dy][dx][1] += Bty * opX[dx][1];
547 opXY[dy][dx][2] += Bty * opX[dx][2];
548 }
549 }
550 }
551 for (int dz = 0; dz < D1D; ++dz)
552 {
553 for (int dy = 0; dy < D1D; ++dy)
554 {
555 for (int dx = 0; dx < D1D; ++dx)
556 {
557 const real_t Btz = Bt(dz, qz);
558 y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
559 y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
560 y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
561 }
562 }
563 }
564 }
565 });
566}
567
568template<int T_D1D = 0, int T_Q1D = 0, int T_MAX_D1D = 0, int T_MAX_Q1D = 0>
569static void SmemPAConvectionNLApply3D(const int NE,
570 const Array<real_t> &b_,
571 const Array<real_t> &g_,
572 const Vector &d_,
573 const Vector &x_,
574 Vector &y_,
575 const int d1d = 0,
576 const int q1d = 0)
577{
578 constexpr int VDIM = 3;
579 const int D1D = T_D1D ? T_D1D : d1d;
580 const int Q1D = T_Q1D ? T_Q1D : q1d;
581 constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
582 constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
583 MFEM_VERIFY(D1D <= MD1, "");
584 MFEM_VERIFY(Q1D <= MQ1, "");
585
586 auto b = Reshape(b_.Read(), Q1D, D1D);
587 auto g = Reshape(g_.Read(), Q1D, D1D);
588 auto D = Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
589 auto x = Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
590 auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
591
592 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
593 {
594 const int tidz = MFEM_THREAD_ID(z);
595 const int D1D = T_D1D ? T_D1D : d1d;
596 const int Q1D = T_Q1D ? T_Q1D : q1d;
597 constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
598 constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
599 MFEM_SHARED real_t BG[2][MQ1 * MD1];
600 real_t(*B)[MD1] = (real_t(*)[MD1])(BG + 0);
601 real_t(*G)[MD1] = (real_t(*)[MD1])(BG + 1);
602 real_t(*Bt)[MQ1] = (real_t(*)[MQ1])(BG + 0);
603 MFEM_SHARED real_t U[2][MQ1][MQ1][MQ1];
604 MFEM_SHARED real_t sm0[3][MQ1 * MQ1 * MQ1];
605 MFEM_SHARED real_t sm1[3][MQ1 * MQ1 * MQ1];
606 real_t(*DDQ0)[MD1][MQ1] = (real_t(*)[MD1][MQ1])(sm0 + 0);
607 real_t(*DDQ1)[MD1][MQ1] = (real_t(*)[MD1][MQ1])(sm0 + 1);
608 real_t(*X)[MD1][MD1] = (real_t(*)[MD1][MD1])(sm0 + 2);
609 real_t(*DQQ0)[MQ1][MQ1] = (real_t(*)[MQ1][MQ1])(sm1 + 0);
610 real_t(*DQQ1)[MQ1][MQ1] = (real_t(*)[MQ1][MQ1])(sm1 + 1);
611 real_t(*DQQ2)[MQ1][MQ1] = (real_t(*)[MQ1][MQ1])(sm1 + 2);
612 real_t(*QQQ0)[MQ1][MQ1] = (real_t(*)[MQ1][MQ1])(sm0 + 0);
613 real_t(*QQQ1)[MQ1][MQ1] = (real_t(*)[MQ1][MQ1])(sm0 + 1);
614 real_t(*QQQ2)[MQ1][MQ1] = (real_t(*)[MQ1][MQ1])(sm0 + 2);
615 real_t(*QQD0)[MQ1][MD1] = (real_t(*)[MQ1][MD1])(sm1 + 0);
616 real_t(*QDD0)[MD1][MD1] = (real_t(*)[MD1][MD1])(sm0 + 0);
617 MFEM_SHARED real_t Z[MQ1][MQ1][MQ1];
618
619 for (int cy = 0; cy < VDIM; ++cy)
620 {
621 if (tidz == 0)
622 {
623 MFEM_FOREACH_THREAD(q, x, Q1D)
624 {
625 MFEM_FOREACH_THREAD(d, y, D1D)
626 {
627 B[q][d] = b(q, d);
628 G[q][d] = g(q, d);
629 }
630 }
631 }
632 MFEM_FOREACH_THREAD(qz, z, Q1D)
633 {
634 MFEM_FOREACH_THREAD(qy, y, Q1D)
635 {
636 MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
637 }
638 }
639 MFEM_SYNC_THREAD;
640 for (int c = 0; c < VDIM; ++c)
641 {
642 MFEM_FOREACH_THREAD(dz, z, D1D)
643 {
644 MFEM_FOREACH_THREAD(dy, y, D1D)
645 {
646 MFEM_FOREACH_THREAD(dx, x, D1D)
647 {
648 X[dz][dy][dx] = x(dx, dy, dz, cy, e);
649 U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
650 }
651 }
652 }
653 MFEM_SYNC_THREAD;
654 MFEM_FOREACH_THREAD(dz, z, D1D)
655 {
656 MFEM_FOREACH_THREAD(dy, y, D1D)
657 {
658 MFEM_FOREACH_THREAD(qx, x, Q1D)
659 {
660 real_t u = 0.0;
661 real_t v = 0.0;
662 real_t z = 0.0;
663 for (int dx = 0; dx < D1D; ++dx)
664 {
665 const real_t coord = X[dz][dy][dx];
666 const real_t value = U[0][dz][dy][dx];
667 u += coord * B[qx][dx];
668 v += coord * G[qx][dx];
669 z += value * B[qx][dx];
670 }
671 DDQ0[dz][dy][qx] = u;
672 DDQ1[dz][dy][qx] = v;
673 U[1][dz][dy][qx] = z;
674 }
675 }
676 }
677 MFEM_SYNC_THREAD;
678 MFEM_FOREACH_THREAD(dz, z, D1D)
679 {
680 MFEM_FOREACH_THREAD(qy, y, Q1D)
681 {
682 MFEM_FOREACH_THREAD(qx, x, Q1D)
683 {
684 real_t u = 0.0;
685 real_t v = 0.0;
686 real_t w = 0.0;
687 real_t z = 0.0;
688 for (int dy = 0; dy < D1D; ++dy)
689 {
690 u += DDQ1[dz][dy][qx] * B[qy][dy];
691 v += DDQ0[dz][dy][qx] * G[qy][dy];
692 w += DDQ0[dz][dy][qx] * B[qy][dy];
693 z += U[1][dz][dy][qx] * B[qy][dy];
694 }
695 DQQ0[dz][qy][qx] = u;
696 DQQ1[dz][qy][qx] = v;
697 DQQ2[dz][qy][qx] = w;
698 U[0][dz][qy][qx] = z;
699 }
700 }
701 }
702 MFEM_SYNC_THREAD;
703 MFEM_FOREACH_THREAD(qz, z, Q1D)
704 {
705 MFEM_FOREACH_THREAD(qy, y, Q1D)
706 {
707 MFEM_FOREACH_THREAD(qx, x, Q1D)
708 {
709 real_t u = 0.0;
710 real_t v = 0.0;
711 real_t w = 0.0;
712 real_t z = 0.0;
713 for (int dz = 0; dz < D1D; ++dz)
714 {
715 u += DQQ0[dz][qy][qx] * B[qz][dz];
716 v += DQQ1[dz][qy][qx] * B[qz][dz];
717 w += DQQ2[dz][qy][qx] * G[qz][dz];
718 z += U[0][dz][qy][qx] * B[qz][dz];
719 }
720 QQQ0[qz][qy][qx] = u;
721 QQQ1[qz][qy][qx] = v;
722 QQQ2[qz][qy][qx] = w;
723 U[1][qz][qy][qx] = z;
724 }
725 }
726 }
727 MFEM_SYNC_THREAD;
728 MFEM_FOREACH_THREAD(qz, z, Q1D)
729 {
730 MFEM_FOREACH_THREAD(qy, y, Q1D)
731 {
732 MFEM_FOREACH_THREAD(qx, x, Q1D)
733 {
734 const int q = qx + (qy + qz * Q1D) * Q1D;
735 const real_t z = U[1][qz][qy][qx];
736 const real_t gX = QQQ0[qz][qy][qx];
737 const real_t gY = QQQ1[qz][qy][qx];
738 const real_t gZ = QQQ2[qz][qy][qx];
739 const real_t d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
740 + gZ * D(q, 2, c, e);
741 Z[qz][qy][qx] += z * d;
742 }
743 }
744 }
745 MFEM_SYNC_THREAD;
746 } // for each conv component
747 if (tidz == 0)
748 {
749 MFEM_FOREACH_THREAD(d, y, D1D)
750 {
751 MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] = b(q, d); }
752 }
753 }
754 MFEM_SYNC_THREAD;
755 MFEM_FOREACH_THREAD(qz, z, Q1D)
756 {
757 MFEM_FOREACH_THREAD(qy, y, Q1D)
758 {
759 MFEM_FOREACH_THREAD(dx, x, D1D)
760 {
761 real_t u = 0.0;
762 for (int qx = 0; qx < Q1D; ++qx)
763 {
764 u += Z[qz][qy][qx] * Bt[dx][qx];
765 }
766 QQD0[qz][qy][dx] = u;
767 }
768 }
769 }
770 MFEM_SYNC_THREAD;
771 MFEM_FOREACH_THREAD(qz, z, Q1D)
772 {
773 MFEM_FOREACH_THREAD(dy, y, D1D)
774 {
775 MFEM_FOREACH_THREAD(dx, x, D1D)
776 {
777 real_t u = 0.0;
778 for (int qy = 0; qy < Q1D; ++qy)
779 {
780 u += QQD0[qz][qy][dx] * Bt[dy][qy];
781 }
782 QDD0[qz][dy][dx] = u;
783 }
784 }
785 }
786 MFEM_SYNC_THREAD;
787 MFEM_FOREACH_THREAD(dz, z, D1D)
788 {
789 MFEM_FOREACH_THREAD(dy, y, D1D)
790 {
791 MFEM_FOREACH_THREAD(dx, x, D1D)
792 {
793 real_t u = 0.0;
794 for (int qz = 0; qz < Q1D; ++qz)
795 {
796 u += QDD0[qz][dy][dx] * Bt[dz][qz];
797 }
798 Y(dx, dy, dz, cy, e) += u;
799 }
800 }
801 }
802 MFEM_SYNC_THREAD;
803 }
804 });
805}
806
808{
809 if (DeviceCanUseCeed())
810 {
811 ceedOp->AddMult(x, y);
812 }
813 else
814 {
815 const int NE = ne;
816 const int D1D = maps->ndof;
817 const int Q1D = maps->nqpt;
818 const Vector &QV = pa_data;
819 const Array<real_t> &B = maps->B;
820 const Array<real_t> &G = maps->G;
821 const Array<real_t> &Bt = maps->Bt;
822 if (dim == 2)
823 {
824 return PAConvectionNLApply2D(NE, B, G, Bt, QV, x, y, D1D, Q1D);
825 }
826 if (dim == 3)
827 {
828 constexpr int T_MAX_D1D = 8;
829 constexpr int T_MAX_Q1D = 8;
830 MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D, "Not yet implemented!");
831 return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
832 (NE, B, G, QV, x, y, D1D, Q1D);
833 }
834 MFEM_ABORT("Not yet implemented!");
835 }
836}
837
838} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
A coefficient that is constant across space and time.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition device.hpp:278
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition fe_base.hpp:165
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:178
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:182
Array< real_t > Bt
Transpose of B.
Definition fe_base.hpp:199
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition fespace.hpp:709
Ordering::Type GetOrdering() const
Return the ordering method.
Definition fespace.hpp:876
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:679
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Definition fespace.cpp:3871
Abstract class for all finite elements.
Definition fe_base.hpp:244
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition fe_base.cpp:365
Vector J
Jacobians of the element transformations at all quadrature points.
Definition mesh.hpp:2976
Class for an integration rule - an Array of IntegrationPoint.
Definition intrules.hpp:100
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition intrules.hpp:256
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition intrules.cpp:86
const IntegrationRule * IntRule
Mesh data type.
Definition mesh.hpp:64
int GetNE() const
Returns number of elements.
Definition mesh.hpp:1282
int Dimension() const
Dimension of the reference space used within the elements.
Definition mesh.hpp:1216
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
Definition mesh.cpp:390
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition mesh.cpp:880
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Definition mesh.cpp:7243
void AddMultPA(const Vector &x, Vector &y) const override
Method for partially assembled action.
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
static const IntegrationRule & GetRule(const FiniteElement &fe, const ElementTransformation &T)
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:502
void AddMult(const mfem::Vector &x, mfem::Vector &y, const real_t a=1.0) const override
Operator application: y+=A(x) (default) or y+=a*A(x).
Definition operator.cpp:72
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
Definition util.cpp:33
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
void forall(int N, lambda &&body)
Definition forall.hpp:753
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128