12 #include "../general/forall.hpp"
14 #include "ceed/nlconvection.hpp"
23 "PA Only supports Ordering::byNODES!");
28 if (DeviceCanUseCeed())
31 ceedOp =
new ceed::PAVectorConvectionNLFIntegrator(fes, *ir, Q);
34 dim = mesh->Dimension();
37 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
44 MFEM_VERIFY(cQ != NULL,
"only ConstantCoefficient is supported!");
52 MFEM_ABORT(
"dim==1 not supported!");
56 auto J =
Reshape(geom->J.Read(), NQ, 2, 2, NE);
57 auto G =
Reshape(pa_data.Write(), NQ, 2, 2, NE);
60 for (
int q = 0; q < NQ; ++q)
62 const double J11 = J(q, 0, 0, e);
63 const double J12 = J(q, 0, 1, e);
64 const double J21 = J(q, 1, 0, e);
65 const double J22 = J(q, 1, 1, e);
67 G(q, 0, 0, e) = W[q] * COEFF * J22;
68 G(q, 0, 1, e) = W[q] * COEFF * -J12;
69 G(q, 1, 0, e) = W[q] * COEFF * -J21;
70 G(q, 1, 1, e) = W[q] * COEFF * J11;
76 auto J =
Reshape(geom->J.Read(), NQ, 3, 3, NE);
77 auto G =
Reshape(pa_data.Write(), NQ, 3, 3, NE);
80 for (
int q = 0; q < NQ; ++q)
82 const double J11 = J(q, 0, 0, e);
83 const double J21 = J(q, 1, 0, e);
84 const double J31 = J(q, 2, 0, e);
85 const double J12 = J(q, 0, 1, e);
86 const double J22 = J(q, 1, 1, e);
87 const double J32 = J(q, 2, 1, e);
88 const double J13 = J(q, 0, 2, e);
89 const double J23 = J(q, 1, 2, e);
90 const double J33 = J(q, 2, 2, e);
91 const double cw = W[q] * COEFF;
93 const double A11 = (J22 * J33) - (J23 * J32);
94 const double A12 = (J32 * J13) - (J12 * J33);
95 const double A13 = (J12 * J23) - (J22 * J13);
96 const double A21 = (J31 * J23) - (J21 * J33);
97 const double A22 = (J11 * J33) - (J13 * J31);
98 const double A23 = (J21 * J13) - (J11 * J23);
99 const double A31 = (J21 * J32) - (J31 * J22);
100 const double A32 = (J31 * J12) - (J11 * J32);
101 const double A33 = (J11 * J22) - (J12 * J21);
103 G(q, 0, 0, e) = cw * A11;
104 G(q, 0, 1, e) = cw * A12;
105 G(q, 0, 2, e) = cw * A13;
106 G(q, 1, 0, e) = cw * A21;
107 G(q, 1, 1, e) = cw * A22;
108 G(q, 1, 2, e) = cw * A23;
109 G(q, 2, 0, e) = cw * A31;
110 G(q, 2, 1, e) = cw * A32;
111 G(q, 2, 2, e) = cw * A33;
118 template<
int T_D1D = 0,
int T_Q1D = 0>
119 static void PAConvectionNLApply2D(
const int NE,
129 const int D1D = T_D1D ? T_D1D : d1d;
130 const int Q1D = T_Q1D ? T_Q1D : q1d;
131 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
132 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
141 const int D1D = T_D1D ? T_D1D : d1d;
142 const int Q1D = T_Q1D ? T_Q1D : q1d;
143 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
144 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
146 double data[max_Q1D][max_Q1D][2];
147 double grad0[max_Q1D][max_Q1D][2];
148 double grad1[max_Q1D][max_Q1D][2];
149 double Z[max_Q1D][max_Q1D][2];
150 for (
int qy = 0; qy < Q1D; ++qy)
152 for (
int qx = 0; qx < Q1D; ++qx)
154 data[qy][qx][0] = 0.0;
155 data[qy][qx][1] = 0.0;
156 grad0[qy][qx][0] = 0.0;
157 grad0[qy][qx][1] = 0.0;
158 grad1[qy][qx][0] = 0.0;
159 grad1[qy][qx][1] = 0.0;
162 for (
int dy = 0; dy < D1D; ++dy)
164 double dataX[max_Q1D][2];
165 double gradX0[max_Q1D][2];
166 double gradX1[max_Q1D][2];
167 for (
int qx = 0; qx < Q1D; ++qx)
176 for (
int dx = 0; dx < D1D; ++dx)
178 const double s0 = x(dx, dy, 0, e);
179 const double s1 = x(dx, dy, 1, e);
180 for (
int qx = 0; qx < Q1D; ++qx)
182 const double Bx = B(qx, dx);
183 const double Gx = G(qx, dx);
184 dataX[qx][0] += s0 * Bx;
185 dataX[qx][1] += s1 * Bx;
186 gradX0[qx][0] += s0 * Gx;
187 gradX0[qx][1] += s0 * Bx;
188 gradX1[qx][0] += s1 * Gx;
189 gradX1[qx][1] += s1 * Bx;
192 for (
int qy = 0; qy < Q1D; ++qy)
194 const double By = B(qy, dy);
195 const double Gy = G(qy, dy);
196 for (
int qx = 0; qx < Q1D; ++qx)
198 data[qy][qx][0] += dataX[qx][0] * By;
199 data[qy][qx][1] += dataX[qx][1] * By;
200 grad0[qy][qx][0] += gradX0[qx][0] * By;
201 grad0[qy][qx][1] += gradX0[qx][1] * Gy;
202 grad1[qy][qx][0] += gradX1[qx][0] * By;
203 grad1[qy][qx][1] += gradX1[qx][1] * Gy;
207 for (
int qy = 0; qy < Q1D; ++qy)
209 for (
int qx = 0; qx < Q1D; ++qx)
211 const int q = qx + qy * Q1D;
212 const double u1 = data[qy][qx][0];
213 const double u2 = data[qy][qx][1];
214 const double grad00 = grad0[qy][qx][0];
215 const double grad01 = grad0[qy][qx][1];
216 const double grad10 = grad1[qy][qx][0];
217 const double grad11 = grad1[qy][qx][1];
218 const double Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
219 const double Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
220 const double Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
221 const double Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
222 Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
223 Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
226 for (
int qy = 0; qy < Q1D; ++qy)
228 double Y[max_D1D][2];
229 for (
int dx = 0; dx < D1D; ++dx)
233 for (
int qx = 0; qx < Q1D; ++qx)
235 const double Btx = Bt(dx, qx);
236 Y[dx][0] += Btx * Z[qy][qx][0];
237 Y[dx][1] += Btx * Z[qy][qx][1];
240 for (
int dy = 0; dy < D1D; ++dy)
242 for (
int dx = 0; dx < D1D; ++dx)
244 const double Bty = Bt(dy, qy);
245 y(dx, dy, 0, e) += Bty * Y[dx][0];
246 y(dx, dy, 1, e) += Bty * Y[dx][1];
254 template<
int T_D1D = 0,
int T_Q1D = 0>
255 static void PAConvectionNLApply3D(
const int NE,
256 const Array<double> &b,
257 const Array<double> &g,
258 const Array<double> &bt,
265 constexpr
int VDIM = 3;
266 const int D1D = T_D1D ? T_D1D : d1d;
267 const int Q1D = T_Q1D ? T_Q1D : q1d;
268 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
269 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
271 auto B =
Reshape(b.Read(), Q1D, D1D);
272 auto G =
Reshape(g.Read(), Q1D, D1D);
273 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
274 auto Q =
Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
275 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
276 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
280 constexpr
int VDIM = 3;
281 const int D1D = T_D1D ? T_D1D : d1d;
282 const int Q1D = T_Q1D ? T_Q1D : q1d;
283 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
284 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
286 double data[max_Q1D][max_Q1D][max_Q1D][VDIM];
287 double grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
288 double grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
289 double grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
290 double Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
291 for (
int qz = 0; qz < Q1D; ++qz)
293 for (
int qy = 0; qy < Q1D; ++qy)
295 for (
int qx = 0; qx < Q1D; ++qx)
297 data[qz][qy][qx][0] = 0.0;
298 data[qz][qy][qx][1] = 0.0;
299 data[qz][qy][qx][2] = 0.0;
301 grad0[qz][qy][qx][0] = 0.0;
302 grad0[qz][qy][qx][1] = 0.0;
303 grad0[qz][qy][qx][2] = 0.0;
305 grad1[qz][qy][qx][0] = 0.0;
306 grad1[qz][qy][qx][1] = 0.0;
307 grad1[qz][qy][qx][2] = 0.0;
309 grad2[qz][qy][qx][0] = 0.0;
310 grad2[qz][qy][qx][1] = 0.0;
311 grad2[qz][qy][qx][2] = 0.0;
315 for (
int dz = 0; dz < D1D; ++dz)
317 double dataXY[max_Q1D][max_Q1D][VDIM];
318 double gradXY0[max_Q1D][max_Q1D][VDIM];
319 double gradXY1[max_Q1D][max_Q1D][VDIM];
320 double gradXY2[max_Q1D][max_Q1D][VDIM];
321 for (
int qy = 0; qy < Q1D; ++qy)
323 for (
int qx = 0; qx < Q1D; ++qx)
325 dataXY[qy][qx][0] = 0.0;
326 dataXY[qy][qx][1] = 0.0;
327 dataXY[qy][qx][2] = 0.0;
329 gradXY0[qy][qx][0] = 0.0;
330 gradXY0[qy][qx][1] = 0.0;
331 gradXY0[qy][qx][2] = 0.0;
333 gradXY1[qy][qx][0] = 0.0;
334 gradXY1[qy][qx][1] = 0.0;
335 gradXY1[qy][qx][2] = 0.0;
337 gradXY2[qy][qx][0] = 0.0;
338 gradXY2[qy][qx][1] = 0.0;
339 gradXY2[qy][qx][2] = 0.0;
342 for (
int dy = 0; dy < D1D; ++dy)
344 double dataX[max_Q1D][VDIM];
345 double gradX0[max_Q1D][VDIM];
346 double gradX1[max_Q1D][VDIM];
347 double gradX2[max_Q1D][VDIM];
348 for (
int qx = 0; qx < Q1D; ++qx)
366 for (
int dx = 0; dx < D1D; ++dx)
368 const double s0 = x(dx, dy, dz, 0, e);
369 const double s1 = x(dx, dy, dz, 1, e);
370 const double s2 = x(dx, dy, dz, 2, e);
371 for (
int qx = 0; qx < Q1D; ++qx)
373 const double Bx = B(qx, dx);
374 const double Gx = G(qx, dx);
376 dataX[qx][0] += s0 * Bx;
377 dataX[qx][1] += s1 * Bx;
378 dataX[qx][2] += s2 * Bx;
380 gradX0[qx][0] += s0 * Gx;
381 gradX0[qx][1] += s0 * Bx;
382 gradX0[qx][2] += s0 * Bx;
384 gradX1[qx][0] += s1 * Gx;
385 gradX1[qx][1] += s1 * Bx;
386 gradX1[qx][2] += s1 * Bx;
388 gradX2[qx][0] += s2 * Gx;
389 gradX2[qx][1] += s2 * Bx;
390 gradX2[qx][2] += s2 * Bx;
393 for (
int qy = 0; qy < Q1D; ++qy)
395 const double By = B(qy, dy);
396 const double Gy = G(qy, dy);
397 for (
int qx = 0; qx < Q1D; ++qx)
399 dataXY[qy][qx][0] += dataX[qx][0] * By;
400 dataXY[qy][qx][1] += dataX[qx][1] * By;
401 dataXY[qy][qx][2] += dataX[qx][2] * By;
403 gradXY0[qy][qx][0] += gradX0[qx][0] * By;
404 gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
405 gradXY0[qy][qx][2] += gradX0[qx][2] * By;
407 gradXY1[qy][qx][0] += gradX1[qx][0] * By;
408 gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
409 gradXY1[qy][qx][2] += gradX1[qx][2] * By;
411 gradXY2[qy][qx][0] += gradX2[qx][0] * By;
412 gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
413 gradXY2[qy][qx][2] += gradX2[qx][2] * By;
417 for (
int qz = 0; qz < Q1D; ++qz)
419 const double Bz = B(qz, dz);
420 const double Gz = G(qz, dz);
421 for (
int qy = 0; qy < Q1D; ++qy)
423 for (
int qx = 0; qx < Q1D; ++qx)
425 data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
426 data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
427 data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
429 grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
430 grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
431 grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
433 grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
434 grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
435 grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
437 grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
438 grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
439 grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
444 for (
int qz = 0; qz < Q1D; ++qz)
446 for (
int qy = 0; qy < Q1D; ++qy)
448 for (
int qx = 0; qx < Q1D; ++qx)
450 const int q = qx + Q1D * (qy + qz * Q1D);
452 const double u1 = data[qz][qy][qx][0];
453 const double u2 = data[qz][qy][qx][1];
454 const double u3 = data[qz][qy][qx][2];
456 const double grad00 = grad0[qz][qy][qx][0];
457 const double grad01 = grad0[qz][qy][qx][1];
458 const double grad02 = grad0[qz][qy][qx][2];
460 const double grad10 = grad1[qz][qy][qx][0];
461 const double grad11 = grad1[qz][qy][qx][1];
462 const double grad12 = grad1[qz][qy][qx][2];
464 const double grad20 = grad2[qz][qy][qx][0];
465 const double grad21 = grad2[qz][qy][qx][1];
466 const double grad22 = grad2[qz][qy][qx][2];
468 const double Dxu1 = grad00 * Q(q, 0, 0, e)
469 + grad01 * Q(q, 1, 0, e)
470 + grad02 * Q(q, 2, 0, e);
471 const double Dyu1 = grad00 * Q(q, 0, 1, e)
472 + grad01 * Q(q, 1, 1, e)
473 + grad02 * Q(q, 2, 1, e);
474 const double Dzu1 = grad00 * Q(q, 0, 2, e)
475 + grad01 * Q(q, 1, 2, e)
476 + grad02 * Q(q, 2, 2, e);
478 const double Dxu2 = grad10 * Q(q, 0, 0, e)
479 + grad11 * Q(q, 1, 0, e)
480 + grad12 * Q(q, 2, 0, e);
481 const double Dyu2 = grad10 * Q(q, 0, 1, e)
482 + grad11 * Q(q, 1, 1, e)
483 + grad12 * Q(q, 2, 1, e);
484 const double Dzu2 = grad10 * Q(q, 0, 2, e)
485 + grad11 * Q(q, 1, 2, e)
486 + grad12 * Q(q, 2, 2, e);
488 const double Dxu3 = grad20 * Q(q, 0, 0, e)
489 + grad21 * Q(q, 1, 0, e)
490 + grad22 * Q(q, 2, 0, e);
491 const double Dyu3 = grad20 * Q(q, 0, 1, e)
492 + grad21 * Q(q, 1, 1, e)
493 + grad22 * Q(q, 2, 1, e);
494 const double Dzu3 = grad20 * Q(q, 0, 2, e)
495 + grad21 * Q(q, 1, 2, e)
496 + grad22 * Q(q, 2, 2, e);
498 Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
499 Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
500 Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
504 for (
int qz = 0; qz < Q1D; ++qz)
506 double opXY[max_D1D][max_D1D][VDIM];
507 for (
int dy = 0; dy < D1D; ++dy)
509 for (
int dx = 0; dx < D1D; ++dx)
511 opXY[dy][dx][0] = 0.0;
512 opXY[dy][dx][1] = 0.0;
513 opXY[dy][dx][2] = 0.0;
516 for (
int qy = 0; qy < Q1D; ++qy)
518 double opX[max_D1D][VDIM];
519 for (
int dx = 0; dx < D1D; ++dx)
524 for (
int qx = 0; qx < Q1D; ++qx)
526 const double Btx = Bt(dx, qx);
527 opX[dx][0] += Btx * Z[qz][qy][qx][0];
528 opX[dx][1] += Btx * Z[qz][qy][qx][1];
529 opX[dx][2] += Btx * Z[qz][qy][qx][2];
532 for (
int dy = 0; dy < D1D; ++dy)
534 for (
int dx = 0; dx < D1D; ++dx)
536 const double Bty = Bt(dy, qy);
537 opXY[dy][dx][0] += Bty * opX[dx][0];
538 opXY[dy][dx][1] += Bty * opX[dx][1];
539 opXY[dy][dx][2] += Bty * opX[dx][2];
543 for (
int dz = 0; dz < D1D; ++dz)
545 for (
int dy = 0; dy < D1D; ++dy)
547 for (
int dx = 0; dx < D1D; ++dx)
549 const double Btz = Bt(dz, qz);
550 y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
551 y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
552 y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
560 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_MAX_D1D =0,
int T_MAX_Q1D =0>
561 static void SmemPAConvectionNLApply3D(
const int NE,
562 const Array<double> &b_,
563 const Array<double> &g_,
570 constexpr
int VDIM = 3;
571 const int D1D = T_D1D ? T_D1D : d1d;
572 const int Q1D = T_Q1D ? T_Q1D : q1d;
573 constexpr
int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
574 constexpr
int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
575 MFEM_VERIFY(D1D <= MD1,
"");
576 MFEM_VERIFY(Q1D <= MQ1,
"");
578 auto b =
Reshape(b_.Read(), Q1D, D1D);
579 auto g =
Reshape(g_.Read(), Q1D, D1D);
580 auto D =
Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
581 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
582 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
584 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
586 const int tidz = MFEM_THREAD_ID(z);
587 const int D1D = T_D1D ? T_D1D : d1d;
588 const int Q1D = T_Q1D ? T_Q1D : q1d;
589 constexpr
int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
590 constexpr
int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
591 MFEM_SHARED
double BG[2][MQ1 * MD1];
592 double(*B)[MD1] = (double(*)[MD1])(BG + 0);
593 double(*G)[MD1] = (double(*)[MD1])(BG + 1);
594 double(*Bt)[MQ1] = (double(*)[MQ1])(BG + 0);
595 MFEM_SHARED
double U[2][MQ1][MQ1][MQ1];
596 MFEM_SHARED
double sm0[3][MQ1 * MQ1 * MQ1];
597 MFEM_SHARED
double sm1[3][MQ1 * MQ1 * MQ1];
598 double(*DDQ0)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 0);
599 double(*DDQ1)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 1);
600 double(*X)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 2);
601 double(*DQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 0);
602 double(*DQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 1);
603 double(*DQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 2);
604 double(*QQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 0);
605 double(*QQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 1);
606 double(*QQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 2);
607 double(*QQD0)[MQ1][MD1] = (double(*)[MQ1][MD1])(sm1 + 0);
608 double(*QDD0)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 0);
609 MFEM_SHARED
double Z[MQ1][MQ1][MQ1];
611 for (
int cy = 0; cy < VDIM; ++cy)
615 MFEM_FOREACH_THREAD(q, x, Q1D)
617 MFEM_FOREACH_THREAD(d, y, D1D)
624 MFEM_FOREACH_THREAD(qz, z, Q1D)
626 MFEM_FOREACH_THREAD(qy, y, Q1D)
628 MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
632 for (
int c = 0; c < VDIM; ++c)
634 MFEM_FOREACH_THREAD(dz, z, D1D)
636 MFEM_FOREACH_THREAD(dy, y, D1D)
638 MFEM_FOREACH_THREAD(dx, x, D1D)
640 X[dz][dy][dx] = x(dx, dy, dz, cy, e);
641 U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
646 MFEM_FOREACH_THREAD(dz, z, D1D)
648 MFEM_FOREACH_THREAD(dy, y, D1D)
650 MFEM_FOREACH_THREAD(qx, x, Q1D)
655 for (
int dx = 0; dx < D1D; ++dx)
657 const double coord = X[dz][dy][dx];
658 const double value = U[0][dz][dy][dx];
659 u += coord * B[qx][dx];
660 v += coord * G[qx][dx];
661 z += value * B[qx][dx];
663 DDQ0[dz][dy][qx] =
u;
664 DDQ1[dz][dy][qx] = v;
665 U[1][dz][dy][qx] = z;
670 MFEM_FOREACH_THREAD(dz, z, D1D)
672 MFEM_FOREACH_THREAD(qy, y, Q1D)
674 MFEM_FOREACH_THREAD(qx, x, Q1D)
680 for (
int dy = 0; dy < D1D; ++dy)
682 u += DDQ1[dz][dy][qx] * B[qy][dy];
683 v += DDQ0[dz][dy][qx] * G[qy][dy];
684 w += DDQ0[dz][dy][qx] * B[qy][dy];
685 z += U[1][dz][dy][qx] * B[qy][dy];
687 DQQ0[dz][qy][qx] =
u;
688 DQQ1[dz][qy][qx] = v;
689 DQQ2[dz][qy][qx] = w;
690 U[0][dz][qy][qx] = z;
695 MFEM_FOREACH_THREAD(qz, z, Q1D)
697 MFEM_FOREACH_THREAD(qy, y, Q1D)
699 MFEM_FOREACH_THREAD(qx, x, Q1D)
705 for (
int dz = 0; dz < D1D; ++dz)
707 u += DQQ0[dz][qy][qx] * B[qz][dz];
708 v += DQQ1[dz][qy][qx] * B[qz][dz];
709 w += DQQ2[dz][qy][qx] * G[qz][dz];
710 z += U[0][dz][qy][qx] * B[qz][dz];
712 QQQ0[qz][qy][qx] =
u;
713 QQQ1[qz][qy][qx] = v;
714 QQQ2[qz][qy][qx] = w;
715 U[1][qz][qy][qx] = z;
720 MFEM_FOREACH_THREAD(qz, z, Q1D)
722 MFEM_FOREACH_THREAD(qy, y, Q1D)
724 MFEM_FOREACH_THREAD(qx, x, Q1D)
726 const int q = qx + (qy + qz * Q1D) * Q1D;
727 const double z = U[1][qz][qy][qx];
728 const double gX = QQQ0[qz][qy][qx];
729 const double gY = QQQ1[qz][qy][qx];
730 const double gZ = QQQ2[qz][qy][qx];
731 const double d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
732 + gZ * D(q, 2, c, e);
733 Z[qz][qy][qx] += z * d;
741 MFEM_FOREACH_THREAD(d, y, D1D)
743 MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] =
b(q, d); }
747 MFEM_FOREACH_THREAD(qz, z, Q1D)
749 MFEM_FOREACH_THREAD(qy, y, Q1D)
751 MFEM_FOREACH_THREAD(dx, x, D1D)
754 for (
int qx = 0; qx < Q1D; ++qx)
756 u += Z[qz][qy][qx] * Bt[dx][qx];
758 QQD0[qz][qy][dx] =
u;
763 MFEM_FOREACH_THREAD(qz, z, Q1D)
765 MFEM_FOREACH_THREAD(dy, y, D1D)
767 MFEM_FOREACH_THREAD(dx, x, D1D)
770 for (
int qy = 0; qy < Q1D; ++qy)
772 u += QQD0[qz][qy][dx] * Bt[dy][qy];
774 QDD0[qz][dy][dx] =
u;
779 MFEM_FOREACH_THREAD(dz, z, D1D)
781 MFEM_FOREACH_THREAD(dy, y, D1D)
783 MFEM_FOREACH_THREAD(dx, x, D1D)
786 for (
int qz = 0; qz < Q1D; ++qz)
788 u += QDD0[qz][dy][dx] * Bt[dz][qz];
790 Y(dx, dy, dz, cy, e) +=
u;
799 void VectorConvectionNLFIntegrator::AddMultPA(
const Vector &x,
Vector &y)
const
801 if (DeviceCanUseCeed())
803 ceedOp->AddMult(x, y);
808 const int D1D = maps->ndof;
809 const int Q1D = maps->nqpt;
810 const Vector &QV = pa_data;
816 return PAConvectionNLApply2D(NE, B, G, Bt, QV, x, y, D1D, Q1D);
820 constexpr
int T_MAX_D1D = 8;
821 constexpr
int T_MAX_Q1D = 8;
822 MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D,
"Not yet implemented!");
823 return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
824 (NE, B, G, QV, x, y, D1D, Q1D);
826 MFEM_ABORT(
"Not yet implemented!");
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
Ordering::Type GetOrdering() const
Return the ordering method.
Class for an integration rule - an Array of IntegrationPoint.
A coefficient that is constant across space and time.
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Mesh * GetMesh() const
Returns the mesh.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
double u(const Vector &xvec)
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).