22 "PA Only supports Ordering::byNODES!");
52 MFEM_VERIFY(cQ != NULL,
"only ConstantCoefficient is supported!");
60 MFEM_ABORT(
"dim==1 not supported!");
68 for (
int q = 0; q < NQ; ++q)
70 const real_t J11 = J(q, 0, 0, e);
71 const real_t J12 = J(q, 0, 1, e);
72 const real_t J21 = J(q, 1, 0, e);
73 const real_t J22 = J(q, 1, 1, e);
75 G(q, 0, 0, e) = W[q] * COEFF * J22;
76 G(q, 0, 1, e) = W[q] * COEFF * -J12;
77 G(q, 1, 0, e) = W[q] * COEFF * -J21;
78 G(q, 1, 1, e) = W[q] * COEFF * J11;
88 for (
int q = 0; q < NQ; ++q)
90 const real_t J11 = J(q, 0, 0, e);
91 const real_t J21 = J(q, 1, 0, e);
92 const real_t J31 = J(q, 2, 0, e);
93 const real_t J12 = J(q, 0, 1, e);
94 const real_t J22 = J(q, 1, 1, e);
95 const real_t J32 = J(q, 2, 1, e);
96 const real_t J13 = J(q, 0, 2, e);
97 const real_t J23 = J(q, 1, 2, e);
98 const real_t J33 = J(q, 2, 2, e);
99 const real_t cw = W[q] * COEFF;
101 const real_t A11 = (J22 * J33) - (J23 * J32);
102 const real_t A12 = (J32 * J13) - (J12 * J33);
103 const real_t A13 = (J12 * J23) - (J22 * J13);
104 const real_t A21 = (J31 * J23) - (J21 * J33);
105 const real_t A22 = (J11 * J33) - (J13 * J31);
106 const real_t A23 = (J21 * J13) - (J11 * J23);
107 const real_t A31 = (J21 * J32) - (J31 * J22);
108 const real_t A32 = (J31 * J12) - (J11 * J32);
109 const real_t A33 = (J11 * J22) - (J12 * J21);
111 G(q, 0, 0, e) = cw * A11;
112 G(q, 0, 1, e) = cw * A12;
113 G(q, 0, 2, e) = cw * A13;
114 G(q, 1, 0, e) = cw * A21;
115 G(q, 1, 1, e) = cw * A22;
116 G(q, 1, 2, e) = cw * A23;
117 G(q, 2, 0, e) = cw * A31;
118 G(q, 2, 1, e) = cw * A32;
119 G(q, 2, 2, e) = cw * A33;
126template<
int T_D1D = 0,
int T_Q1D = 0>
127static void PAConvectionNLApply2D(
const int NE,
137 const int D1D = T_D1D ? T_D1D : d1d;
138 const int Q1D = T_Q1D ? T_Q1D : q1d;
141 auto B =
Reshape(
b.Read(), Q1D, D1D);
149 const int D1D = T_D1D ? T_D1D : d1d;
150 const int Q1D = T_Q1D ? T_Q1D : q1d;
151 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
152 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
154 real_t data[max_Q1D][max_Q1D][2];
155 real_t grad0[max_Q1D][max_Q1D][2];
156 real_t grad1[max_Q1D][max_Q1D][2];
157 real_t Z[max_Q1D][max_Q1D][2];
158 for (
int qy = 0; qy < Q1D; ++qy)
160 for (
int qx = 0; qx < Q1D; ++qx)
162 data[qy][qx][0] = 0.0;
163 data[qy][qx][1] = 0.0;
164 grad0[qy][qx][0] = 0.0;
165 grad0[qy][qx][1] = 0.0;
166 grad1[qy][qx][0] = 0.0;
167 grad1[qy][qx][1] = 0.0;
170 for (
int dy = 0; dy < D1D; ++dy)
173 real_t gradX0[max_Q1D][2];
174 real_t gradX1[max_Q1D][2];
175 for (
int qx = 0; qx < Q1D; ++qx)
184 for (
int dx = 0; dx < D1D; ++dx)
186 const real_t s0 = x(dx, dy, 0, e);
187 const real_t s1 = x(dx, dy, 1, e);
188 for (
int qx = 0; qx < Q1D; ++qx)
190 const real_t Bx = B(qx, dx);
191 const real_t Gx = G(qx, dx);
192 dataX[qx][0] += s0 * Bx;
193 dataX[qx][1] += s1 * Bx;
194 gradX0[qx][0] += s0 * Gx;
195 gradX0[qx][1] += s0 * Bx;
196 gradX1[qx][0] += s1 * Gx;
197 gradX1[qx][1] += s1 * Bx;
200 for (
int qy = 0; qy < Q1D; ++qy)
202 const real_t By = B(qy, dy);
203 const real_t Gy = G(qy, dy);
204 for (
int qx = 0; qx < Q1D; ++qx)
206 data[qy][qx][0] += dataX[qx][0] * By;
207 data[qy][qx][1] += dataX[qx][1] * By;
208 grad0[qy][qx][0] += gradX0[qx][0] * By;
209 grad0[qy][qx][1] += gradX0[qx][1] * Gy;
210 grad1[qy][qx][0] += gradX1[qx][0] * By;
211 grad1[qy][qx][1] += gradX1[qx][1] * Gy;
215 for (
int qy = 0; qy < Q1D; ++qy)
217 for (
int qx = 0; qx < Q1D; ++qx)
219 const int q = qx + qy * Q1D;
220 const real_t u1 = data[qy][qx][0];
221 const real_t u2 = data[qy][qx][1];
222 const real_t grad00 = grad0[qy][qx][0];
223 const real_t grad01 = grad0[qy][qx][1];
224 const real_t grad10 = grad1[qy][qx][0];
225 const real_t grad11 = grad1[qy][qx][1];
226 const real_t Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
227 const real_t Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
228 const real_t Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
229 const real_t Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
230 Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
231 Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
234 for (
int qy = 0; qy < Q1D; ++qy)
237 for (
int dx = 0; dx < D1D; ++dx)
241 for (
int qx = 0; qx < Q1D; ++qx)
243 const real_t Btx = Bt(dx, qx);
244 Y[dx][0] += Btx * Z[qy][qx][0];
245 Y[dx][1] += Btx * Z[qy][qx][1];
248 for (
int dy = 0; dy < D1D; ++dy)
250 for (
int dx = 0; dx < D1D; ++dx)
252 const real_t Bty = Bt(dy, qy);
253 y(dx, dy, 0, e) += Bty * Y[dx][0];
254 y(dx, dy, 1, e) += Bty * Y[dx][1];
262template<
int T_D1D = 0,
int T_Q1D = 0>
263static void PAConvectionNLApply3D(
const int NE,
264 const Array<real_t> &
b,
265 const Array<real_t> &g,
266 const Array<real_t> &bt,
273 constexpr int VDIM = 3;
274 const int D1D = T_D1D ? T_D1D : d1d;
275 const int Q1D = T_Q1D ? T_Q1D : q1d;
279 auto B =
Reshape(
b.Read(), Q1D, D1D);
280 auto G =
Reshape(g.Read(), Q1D, D1D);
281 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
282 auto Q =
Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
283 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
284 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
288 constexpr int VDIM = 3;
289 const int D1D = T_D1D ? T_D1D : d1d;
290 const int Q1D = T_Q1D ? T_Q1D : q1d;
291 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
292 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
294 real_t data[max_Q1D][max_Q1D][max_Q1D][VDIM];
295 real_t grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
296 real_t grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
297 real_t grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
298 real_t Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
299 for (
int qz = 0; qz < Q1D; ++qz)
301 for (
int qy = 0; qy < Q1D; ++qy)
303 for (
int qx = 0; qx < Q1D; ++qx)
305 data[qz][qy][qx][0] = 0.0;
306 data[qz][qy][qx][1] = 0.0;
307 data[qz][qy][qx][2] = 0.0;
309 grad0[qz][qy][qx][0] = 0.0;
310 grad0[qz][qy][qx][1] = 0.0;
311 grad0[qz][qy][qx][2] = 0.0;
313 grad1[qz][qy][qx][0] = 0.0;
314 grad1[qz][qy][qx][1] = 0.0;
315 grad1[qz][qy][qx][2] = 0.0;
317 grad2[qz][qy][qx][0] = 0.0;
318 grad2[qz][qy][qx][1] = 0.0;
319 grad2[qz][qy][qx][2] = 0.0;
323 for (
int dz = 0; dz < D1D; ++dz)
325 real_t dataXY[max_Q1D][max_Q1D][VDIM];
326 real_t gradXY0[max_Q1D][max_Q1D][VDIM];
327 real_t gradXY1[max_Q1D][max_Q1D][VDIM];
328 real_t gradXY2[max_Q1D][max_Q1D][VDIM];
329 for (
int qy = 0; qy < Q1D; ++qy)
331 for (
int qx = 0; qx < Q1D; ++qx)
333 dataXY[qy][qx][0] = 0.0;
334 dataXY[qy][qx][1] = 0.0;
335 dataXY[qy][qx][2] = 0.0;
337 gradXY0[qy][qx][0] = 0.0;
338 gradXY0[qy][qx][1] = 0.0;
339 gradXY0[qy][qx][2] = 0.0;
341 gradXY1[qy][qx][0] = 0.0;
342 gradXY1[qy][qx][1] = 0.0;
343 gradXY1[qy][qx][2] = 0.0;
345 gradXY2[qy][qx][0] = 0.0;
346 gradXY2[qy][qx][1] = 0.0;
347 gradXY2[qy][qx][2] = 0.0;
350 for (
int dy = 0; dy < D1D; ++dy)
352 real_t dataX[max_Q1D][VDIM];
353 real_t gradX0[max_Q1D][VDIM];
354 real_t gradX1[max_Q1D][VDIM];
355 real_t gradX2[max_Q1D][VDIM];
356 for (
int qx = 0; qx < Q1D; ++qx)
374 for (
int dx = 0; dx < D1D; ++dx)
376 const real_t s0 = x(dx, dy, dz, 0, e);
377 const real_t s1 = x(dx, dy, dz, 1, e);
378 const real_t s2 = x(dx, dy, dz, 2, e);
379 for (
int qx = 0; qx < Q1D; ++qx)
381 const real_t Bx = B(qx, dx);
382 const real_t Gx = G(qx, dx);
384 dataX[qx][0] += s0 * Bx;
385 dataX[qx][1] += s1 * Bx;
386 dataX[qx][2] += s2 * Bx;
388 gradX0[qx][0] += s0 * Gx;
389 gradX0[qx][1] += s0 * Bx;
390 gradX0[qx][2] += s0 * Bx;
392 gradX1[qx][0] += s1 * Gx;
393 gradX1[qx][1] += s1 * Bx;
394 gradX1[qx][2] += s1 * Bx;
396 gradX2[qx][0] += s2 * Gx;
397 gradX2[qx][1] += s2 * Bx;
398 gradX2[qx][2] += s2 * Bx;
401 for (
int qy = 0; qy < Q1D; ++qy)
403 const real_t By = B(qy, dy);
404 const real_t Gy = G(qy, dy);
405 for (
int qx = 0; qx < Q1D; ++qx)
407 dataXY[qy][qx][0] += dataX[qx][0] * By;
408 dataXY[qy][qx][1] += dataX[qx][1] * By;
409 dataXY[qy][qx][2] += dataX[qx][2] * By;
411 gradXY0[qy][qx][0] += gradX0[qx][0] * By;
412 gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
413 gradXY0[qy][qx][2] += gradX0[qx][2] * By;
415 gradXY1[qy][qx][0] += gradX1[qx][0] * By;
416 gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
417 gradXY1[qy][qx][2] += gradX1[qx][2] * By;
419 gradXY2[qy][qx][0] += gradX2[qx][0] * By;
420 gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
421 gradXY2[qy][qx][2] += gradX2[qx][2] * By;
425 for (
int qz = 0; qz < Q1D; ++qz)
427 const real_t Bz = B(qz, dz);
428 const real_t Gz = G(qz, dz);
429 for (
int qy = 0; qy < Q1D; ++qy)
431 for (
int qx = 0; qx < Q1D; ++qx)
433 data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
434 data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
435 data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
437 grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
438 grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
439 grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
441 grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
442 grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
443 grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
445 grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
446 grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
447 grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
452 for (
int qz = 0; qz < Q1D; ++qz)
454 for (
int qy = 0; qy < Q1D; ++qy)
456 for (
int qx = 0; qx < Q1D; ++qx)
458 const int q = qx + Q1D * (qy + qz * Q1D);
460 const real_t u1 = data[qz][qy][qx][0];
461 const real_t u2 = data[qz][qy][qx][1];
462 const real_t u3 = data[qz][qy][qx][2];
464 const real_t grad00 = grad0[qz][qy][qx][0];
465 const real_t grad01 = grad0[qz][qy][qx][1];
466 const real_t grad02 = grad0[qz][qy][qx][2];
468 const real_t grad10 = grad1[qz][qy][qx][0];
469 const real_t grad11 = grad1[qz][qy][qx][1];
470 const real_t grad12 = grad1[qz][qy][qx][2];
472 const real_t grad20 = grad2[qz][qy][qx][0];
473 const real_t grad21 = grad2[qz][qy][qx][1];
474 const real_t grad22 = grad2[qz][qy][qx][2];
476 const real_t Dxu1 = grad00 * Q(q, 0, 0, e)
477 + grad01 * Q(q, 1, 0, e)
478 + grad02 * Q(q, 2, 0, e);
479 const real_t Dyu1 = grad00 * Q(q, 0, 1, e)
480 + grad01 * Q(q, 1, 1, e)
481 + grad02 * Q(q, 2, 1, e);
482 const real_t Dzu1 = grad00 * Q(q, 0, 2, e)
483 + grad01 * Q(q, 1, 2, e)
484 + grad02 * Q(q, 2, 2, e);
486 const real_t Dxu2 = grad10 * Q(q, 0, 0, e)
487 + grad11 * Q(q, 1, 0, e)
488 + grad12 * Q(q, 2, 0, e);
489 const real_t Dyu2 = grad10 * Q(q, 0, 1, e)
490 + grad11 * Q(q, 1, 1, e)
491 + grad12 * Q(q, 2, 1, e);
492 const real_t Dzu2 = grad10 * Q(q, 0, 2, e)
493 + grad11 * Q(q, 1, 2, e)
494 + grad12 * Q(q, 2, 2, e);
496 const real_t Dxu3 = grad20 * Q(q, 0, 0, e)
497 + grad21 * Q(q, 1, 0, e)
498 + grad22 * Q(q, 2, 0, e);
499 const real_t Dyu3 = grad20 * Q(q, 0, 1, e)
500 + grad21 * Q(q, 1, 1, e)
501 + grad22 * Q(q, 2, 1, e);
502 const real_t Dzu3 = grad20 * Q(q, 0, 2, e)
503 + grad21 * Q(q, 1, 2, e)
504 + grad22 * Q(q, 2, 2, e);
506 Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
507 Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
508 Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
512 for (
int qz = 0; qz < Q1D; ++qz)
514 real_t opXY[max_D1D][max_D1D][VDIM];
515 for (
int dy = 0; dy < D1D; ++dy)
517 for (
int dx = 0; dx < D1D; ++dx)
519 opXY[dy][dx][0] = 0.0;
520 opXY[dy][dx][1] = 0.0;
521 opXY[dy][dx][2] = 0.0;
524 for (
int qy = 0; qy < Q1D; ++qy)
526 real_t opX[max_D1D][VDIM];
527 for (
int dx = 0; dx < D1D; ++dx)
532 for (
int qx = 0; qx < Q1D; ++qx)
534 const real_t Btx = Bt(dx, qx);
535 opX[dx][0] += Btx * Z[qz][qy][qx][0];
536 opX[dx][1] += Btx * Z[qz][qy][qx][1];
537 opX[dx][2] += Btx * Z[qz][qy][qx][2];
540 for (
int dy = 0; dy < D1D; ++dy)
542 for (
int dx = 0; dx < D1D; ++dx)
544 const real_t Bty = Bt(dy, qy);
545 opXY[dy][dx][0] += Bty * opX[dx][0];
546 opXY[dy][dx][1] += Bty * opX[dx][1];
547 opXY[dy][dx][2] += Bty * opX[dx][2];
551 for (
int dz = 0; dz < D1D; ++dz)
553 for (
int dy = 0; dy < D1D; ++dy)
555 for (
int dx = 0; dx < D1D; ++dx)
557 const real_t Btz = Bt(dz, qz);
558 y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
559 y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
560 y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
568template<
int T_D1D = 0,
int T_Q1D = 0,
int T_MAX_D1D = 0,
int T_MAX_Q1D = 0>
569static void SmemPAConvectionNLApply3D(
const int NE,
570 const Array<real_t> &b_,
571 const Array<real_t> &g_,
578 constexpr int VDIM = 3;
579 const int D1D = T_D1D ? T_D1D : d1d;
580 const int Q1D = T_Q1D ? T_Q1D : q1d;
581 constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
582 constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
583 MFEM_VERIFY(D1D <= MD1,
"");
584 MFEM_VERIFY(Q1D <= MQ1,
"");
586 auto b =
Reshape(b_.Read(), Q1D, D1D);
587 auto g =
Reshape(g_.Read(), Q1D, D1D);
588 auto D =
Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
589 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
590 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
594 const int tidz = MFEM_THREAD_ID(z);
595 const int D1D = T_D1D ? T_D1D : d1d;
596 const int Q1D = T_Q1D ? T_Q1D : q1d;
597 constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
598 constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
599 MFEM_SHARED
real_t BG[2][MQ1 * MD1];
603 MFEM_SHARED
real_t U[2][MQ1][MQ1][MQ1];
604 MFEM_SHARED
real_t sm0[3][MQ1 * MQ1 * MQ1];
605 MFEM_SHARED
real_t sm1[3][MQ1 * MQ1 * MQ1];
606 real_t(*DDQ0)[MD1][MQ1] = (
real_t(*)[MD1][MQ1])(sm0 + 0);
607 real_t(*DDQ1)[MD1][MQ1] = (
real_t(*)[MD1][MQ1])(sm0 + 1);
609 real_t(*DQQ0)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm1 + 0);
610 real_t(*DQQ1)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm1 + 1);
611 real_t(*DQQ2)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm1 + 2);
612 real_t(*QQQ0)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm0 + 0);
613 real_t(*QQQ1)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm0 + 1);
614 real_t(*QQQ2)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm0 + 2);
615 real_t(*QQD0)[MQ1][MD1] = (
real_t(*)[MQ1][MD1])(sm1 + 0);
616 real_t(*QDD0)[MD1][MD1] = (
real_t(*)[MD1][MD1])(sm0 + 0);
617 MFEM_SHARED
real_t Z[MQ1][MQ1][MQ1];
619 for (
int cy = 0; cy < VDIM; ++cy)
623 MFEM_FOREACH_THREAD(q, x, Q1D)
625 MFEM_FOREACH_THREAD(d, y, D1D)
632 MFEM_FOREACH_THREAD(qz, z, Q1D)
634 MFEM_FOREACH_THREAD(qy, y, Q1D)
636 MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
640 for (
int c = 0; c < VDIM; ++c)
642 MFEM_FOREACH_THREAD(dz, z, D1D)
644 MFEM_FOREACH_THREAD(dy, y, D1D)
646 MFEM_FOREACH_THREAD(dx, x, D1D)
648 X[dz][dy][dx] = x(dx, dy, dz, cy, e);
649 U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
654 MFEM_FOREACH_THREAD(dz, z, D1D)
656 MFEM_FOREACH_THREAD(dy, y, D1D)
658 MFEM_FOREACH_THREAD(qx, x, Q1D)
663 for (
int dx = 0; dx < D1D; ++dx)
665 const real_t coord = X[dz][dy][dx];
666 const real_t value = U[0][dz][dy][dx];
667 u += coord * B[qx][dx];
668 v += coord * G[qx][dx];
669 z += value * B[qx][dx];
671 DDQ0[dz][dy][qx] =
u;
672 DDQ1[dz][dy][qx] = v;
673 U[1][dz][dy][qx] = z;
678 MFEM_FOREACH_THREAD(dz, z, D1D)
680 MFEM_FOREACH_THREAD(qy, y, Q1D)
682 MFEM_FOREACH_THREAD(qx, x, Q1D)
688 for (
int dy = 0; dy < D1D; ++dy)
690 u += DDQ1[dz][dy][qx] * B[qy][dy];
691 v += DDQ0[dz][dy][qx] * G[qy][dy];
692 w += DDQ0[dz][dy][qx] * B[qy][dy];
693 z += U[1][dz][dy][qx] * B[qy][dy];
695 DQQ0[dz][qy][qx] =
u;
696 DQQ1[dz][qy][qx] = v;
697 DQQ2[dz][qy][qx] = w;
698 U[0][dz][qy][qx] = z;
703 MFEM_FOREACH_THREAD(qz, z, Q1D)
705 MFEM_FOREACH_THREAD(qy, y, Q1D)
707 MFEM_FOREACH_THREAD(qx, x, Q1D)
713 for (
int dz = 0; dz < D1D; ++dz)
715 u += DQQ0[dz][qy][qx] * B[qz][dz];
716 v += DQQ1[dz][qy][qx] * B[qz][dz];
717 w += DQQ2[dz][qy][qx] * G[qz][dz];
718 z += U[0][dz][qy][qx] * B[qz][dz];
720 QQQ0[qz][qy][qx] =
u;
721 QQQ1[qz][qy][qx] = v;
722 QQQ2[qz][qy][qx] = w;
723 U[1][qz][qy][qx] = z;
728 MFEM_FOREACH_THREAD(qz, z, Q1D)
730 MFEM_FOREACH_THREAD(qy, y, Q1D)
732 MFEM_FOREACH_THREAD(qx, x, Q1D)
734 const int q = qx + (qy + qz * Q1D) * Q1D;
735 const real_t z = U[1][qz][qy][qx];
736 const real_t gX = QQQ0[qz][qy][qx];
737 const real_t gY = QQQ1[qz][qy][qx];
738 const real_t gZ = QQQ2[qz][qy][qx];
739 const real_t d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
740 + gZ * D(q, 2, c, e);
741 Z[qz][qy][qx] += z * d;
749 MFEM_FOREACH_THREAD(d, y, D1D)
751 MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] =
b(q, d); }
755 MFEM_FOREACH_THREAD(qz, z, Q1D)
757 MFEM_FOREACH_THREAD(qy, y, Q1D)
759 MFEM_FOREACH_THREAD(dx, x, D1D)
762 for (
int qx = 0; qx < Q1D; ++qx)
764 u += Z[qz][qy][qx] * Bt[dx][qx];
766 QQD0[qz][qy][dx] =
u;
771 MFEM_FOREACH_THREAD(qz, z, Q1D)
773 MFEM_FOREACH_THREAD(dy, y, D1D)
775 MFEM_FOREACH_THREAD(dx, x, D1D)
778 for (
int qy = 0; qy < Q1D; ++qy)
780 u += QQD0[qz][qy][dx] * Bt[dy][qy];
782 QDD0[qz][dy][dx] =
u;
787 MFEM_FOREACH_THREAD(dz, z, D1D)
789 MFEM_FOREACH_THREAD(dy, y, D1D)
791 MFEM_FOREACH_THREAD(dx, x, D1D)
794 for (
int qz = 0; qz < Q1D; ++qz)
796 u += QDD0[qz][dy][dx] * Bt[dz][qz];
798 Y(dx, dy, dz, cy, e) +=
u;
816 const int D1D = maps->
ndof;
817 const int Q1D = maps->
nqpt;
818 const Vector &QV = pa_data;
824 return PAConvectionNLApply2D(NE, B, G, Bt, QV, x, y, D1D, Q1D);
828 constexpr int T_MAX_D1D = 8;
829 constexpr int T_MAX_Q1D = 8;
830 MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D,
"Not yet implemented!");
831 return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
832 (NE, B, G, QV, x, y, D1D, Q1D);
834 MFEM_ABORT(
"Not yet implemented!");
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
A coefficient that is constant across space and time.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Array< real_t > B
Basis functions evaluated at quadrature points.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Array< real_t > Bt
Transpose of B.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Ordering::Type GetOrdering() const
Return the ordering method.
Mesh * GetMesh() const
Returns the mesh.
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Abstract class for all finite elements.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Vector J
Jacobians of the element transformations at all quadrature points.
Class for an integration rule - an Array of IntegrationPoint.
int GetNPoints() const
Returns the number of the points in the integration rule.
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
const IntegrationRule * IntRule
int GetNE() const
Returns number of elements.
int Dimension() const
Dimension of the reference space used within the elements.
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
void AddMultPA(const Vector &x, Vector &y) const override
Method for partially assembled action.
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
static const IntegrationRule & GetRule(const FiniteElement &fe, const ElementTransformation &T)
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void SetSize(int s)
Resize the vector to size s.
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
void AddMult(const mfem::Vector &x, mfem::Vector &y, const real_t a=1.0) const override
Operator application: y+=A(x) (default) or y+=a*A(x).
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.