12 #include "../general/forall.hpp"
22 "PA Only supports Ordering::byNODES!");
27 dim = mesh->Dimension();
30 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
37 MFEM_VERIFY(cQ != NULL,
"only ConstantCoefficient is supported!");
45 MFEM_ABORT(
"dim==1 not supported!");
50 auto G =
Reshape(pa_data.Write(), NQ, 2, 2, NE);
53 for (
int q = 0; q < NQ; ++q)
55 const double J11 = J(q, 0, 0, e);
56 const double J12 = J(q, 0, 1, e);
57 const double J21 = J(q, 1, 0, e);
58 const double J22 = J(q, 1, 1, e);
60 G(q, 0, 0, e) = W[q] * COEFF * J22;
61 G(q, 0, 1, e) = W[q] * COEFF * -J12;
62 G(q, 1, 0, e) = W[q] * COEFF * -J21;
63 G(q, 1, 1, e) = W[q] * COEFF * J11;
70 auto G =
Reshape(pa_data.Write(), NQ, 3, 3, NE);
73 for (
int q = 0; q < NQ; ++q)
75 const double J11 = J(q, 0, 0, e);
76 const double J21 = J(q, 1, 0, e);
77 const double J31 = J(q, 2, 0, e);
78 const double J12 = J(q, 0, 1, e);
79 const double J22 = J(q, 1, 1, e);
80 const double J32 = J(q, 2, 1, e);
81 const double J13 = J(q, 0, 2, e);
82 const double J23 = J(q, 1, 2, e);
83 const double J33 = J(q, 2, 2, e);
84 const double cw = W[q] * COEFF;
86 const double A11 = (J22 * J33) - (J23 * J32);
87 const double A12 = (J32 * J13) - (J12 * J33);
88 const double A13 = (J12 * J23) - (J22 * J13);
89 const double A21 = (J31 * J23) - (J21 * J33);
90 const double A22 = (J11 * J33) - (J13 * J31);
91 const double A23 = (J21 * J13) - (J11 * J23);
92 const double A31 = (J21 * J32) - (J31 * J22);
93 const double A32 = (J31 * J12) - (J11 * J32);
94 const double A33 = (J11 * J22) - (J12 * J21);
96 G(q, 0, 0, e) = cw * A11;
97 G(q, 0, 1, e) = cw * A12;
98 G(q, 0, 2, e) = cw * A13;
99 G(q, 1, 0, e) = cw * A21;
100 G(q, 1, 1, e) = cw * A22;
101 G(q, 1, 2, e) = cw * A23;
102 G(q, 2, 0, e) = cw * A31;
103 G(q, 2, 1, e) = cw * A32;
104 G(q, 2, 2, e) = cw * A33;
111 template<
int T_D1D = 0,
int T_Q1D = 0>
112 static void PAConvectionNLApply2D(
const int NE,
122 const int D1D = T_D1D ? T_D1D : d1d;
123 const int Q1D = T_Q1D ? T_Q1D : q1d;
124 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
125 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
134 const int D1D = T_D1D ? T_D1D : d1d;
135 const int Q1D = T_Q1D ? T_Q1D : q1d;
136 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
137 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
139 double data[max_Q1D][max_Q1D][2];
140 double grad0[max_Q1D][max_Q1D][2];
141 double grad1[max_Q1D][max_Q1D][2];
142 double Z[max_Q1D][max_Q1D][2];
143 for (
int qy = 0; qy < Q1D; ++qy)
145 for (
int qx = 0; qx < Q1D; ++qx)
147 data[qy][qx][0] = 0.0;
148 data[qy][qx][1] = 0.0;
149 grad0[qy][qx][0] = 0.0;
150 grad0[qy][qx][1] = 0.0;
151 grad1[qy][qx][0] = 0.0;
152 grad1[qy][qx][1] = 0.0;
155 for (
int dy = 0; dy < D1D; ++dy)
157 double dataX[max_Q1D][2];
158 double gradX0[max_Q1D][2];
159 double gradX1[max_Q1D][2];
160 for (
int qx = 0; qx < Q1D; ++qx)
169 for (
int dx = 0; dx < D1D; ++dx)
171 const double s0 = x(dx, dy, 0, e);
172 const double s1 = x(dx, dy, 1, e);
173 for (
int qx = 0; qx < Q1D; ++qx)
175 const double Bx = B(qx, dx);
176 const double Gx = G(qx, dx);
177 dataX[qx][0] += s0 * Bx;
178 dataX[qx][1] += s1 * Bx;
179 gradX0[qx][0] += s0 * Gx;
180 gradX0[qx][1] += s0 * Bx;
181 gradX1[qx][0] += s1 * Gx;
182 gradX1[qx][1] += s1 * Bx;
185 for (
int qy = 0; qy < Q1D; ++qy)
187 const double By = B(qy, dy);
188 const double Gy = G(qy, dy);
189 for (
int qx = 0; qx < Q1D; ++qx)
191 data[qy][qx][0] += dataX[qx][0] * By;
192 data[qy][qx][1] += dataX[qx][1] * By;
193 grad0[qy][qx][0] += gradX0[qx][0] * By;
194 grad0[qy][qx][1] += gradX0[qx][1] * Gy;
195 grad1[qy][qx][0] += gradX1[qx][0] * By;
196 grad1[qy][qx][1] += gradX1[qx][1] * Gy;
200 for (
int qy = 0; qy < Q1D; ++qy)
202 for (
int qx = 0; qx < Q1D; ++qx)
204 const int q = qx + qy * Q1D;
205 const double u1 = data[qy][qx][0];
206 const double u2 = data[qy][qx][1];
207 const double grad00 = grad0[qy][qx][0];
208 const double grad01 = grad0[qy][qx][1];
209 const double grad10 = grad1[qy][qx][0];
210 const double grad11 = grad1[qy][qx][1];
211 const double Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
212 const double Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
213 const double Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
214 const double Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
215 Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
216 Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
219 for (
int qy = 0; qy < Q1D; ++qy)
221 double Y[max_D1D][2];
222 for (
int dx = 0; dx < D1D; ++dx)
226 for (
int qx = 0; qx < Q1D; ++qx)
228 const double Btx = Bt(dx, qx);
229 Y[dx][0] += Btx * Z[qy][qx][0];
230 Y[dx][1] += Btx * Z[qy][qx][1];
233 for (
int dy = 0; dy < D1D; ++dy)
235 for (
int dx = 0; dx < D1D; ++dx)
237 const double Bty = Bt(dy, qy);
238 y(dx, dy, 0, e) += Bty * Y[dx][0];
239 y(dx, dy, 1, e) += Bty * Y[dx][1];
247 template<
int T_D1D = 0,
int T_Q1D = 0>
248 static void PAConvectionNLApply3D(
const int NE,
249 const Array<double> &b,
250 const Array<double> &g,
251 const Array<double> &bt,
258 constexpr
int VDIM = 3;
259 const int D1D = T_D1D ? T_D1D : d1d;
260 const int Q1D = T_Q1D ? T_Q1D : q1d;
261 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
262 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
264 auto B =
Reshape(b.Read(), Q1D, D1D);
265 auto G =
Reshape(g.Read(), Q1D, D1D);
266 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
267 auto Q =
Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
268 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
269 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
273 constexpr
int VDIM = 3;
274 const int D1D = T_D1D ? T_D1D : d1d;
275 const int Q1D = T_Q1D ? T_Q1D : q1d;
276 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
277 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
279 double data[max_Q1D][max_Q1D][max_Q1D][VDIM];
280 double grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
281 double grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
282 double grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
283 double Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
284 for (
int qz = 0; qz < Q1D; ++qz)
286 for (
int qy = 0; qy < Q1D; ++qy)
288 for (
int qx = 0; qx < Q1D; ++qx)
290 data[qz][qy][qx][0] = 0.0;
291 data[qz][qy][qx][1] = 0.0;
292 data[qz][qy][qx][2] = 0.0;
294 grad0[qz][qy][qx][0] = 0.0;
295 grad0[qz][qy][qx][1] = 0.0;
296 grad0[qz][qy][qx][2] = 0.0;
298 grad1[qz][qy][qx][0] = 0.0;
299 grad1[qz][qy][qx][1] = 0.0;
300 grad1[qz][qy][qx][2] = 0.0;
302 grad2[qz][qy][qx][0] = 0.0;
303 grad2[qz][qy][qx][1] = 0.0;
304 grad2[qz][qy][qx][2] = 0.0;
308 for (
int dz = 0; dz < D1D; ++dz)
310 double dataXY[max_Q1D][max_Q1D][VDIM];
311 double gradXY0[max_Q1D][max_Q1D][VDIM];
312 double gradXY1[max_Q1D][max_Q1D][VDIM];
313 double gradXY2[max_Q1D][max_Q1D][VDIM];
314 for (
int qy = 0; qy < Q1D; ++qy)
316 for (
int qx = 0; qx < Q1D; ++qx)
318 dataXY[qy][qx][0] = 0.0;
319 dataXY[qy][qx][1] = 0.0;
320 dataXY[qy][qx][2] = 0.0;
322 gradXY0[qy][qx][0] = 0.0;
323 gradXY0[qy][qx][1] = 0.0;
324 gradXY0[qy][qx][2] = 0.0;
326 gradXY1[qy][qx][0] = 0.0;
327 gradXY1[qy][qx][1] = 0.0;
328 gradXY1[qy][qx][2] = 0.0;
330 gradXY2[qy][qx][0] = 0.0;
331 gradXY2[qy][qx][1] = 0.0;
332 gradXY2[qy][qx][2] = 0.0;
335 for (
int dy = 0; dy < D1D; ++dy)
337 double dataX[max_Q1D][VDIM];
338 double gradX0[max_Q1D][VDIM];
339 double gradX1[max_Q1D][VDIM];
340 double gradX2[max_Q1D][VDIM];
341 for (
int qx = 0; qx < Q1D; ++qx)
359 for (
int dx = 0; dx < D1D; ++dx)
361 const double s0 = x(dx, dy, dz, 0, e);
362 const double s1 = x(dx, dy, dz, 1, e);
363 const double s2 = x(dx, dy, dz, 2, e);
364 for (
int qx = 0; qx < Q1D; ++qx)
366 const double Bx = B(qx, dx);
367 const double Gx = G(qx, dx);
369 dataX[qx][0] += s0 * Bx;
370 dataX[qx][1] += s1 * Bx;
371 dataX[qx][2] += s2 * Bx;
373 gradX0[qx][0] += s0 * Gx;
374 gradX0[qx][1] += s0 * Bx;
375 gradX0[qx][2] += s0 * Bx;
377 gradX1[qx][0] += s1 * Gx;
378 gradX1[qx][1] += s1 * Bx;
379 gradX1[qx][2] += s1 * Bx;
381 gradX2[qx][0] += s2 * Gx;
382 gradX2[qx][1] += s2 * Bx;
383 gradX2[qx][2] += s2 * Bx;
386 for (
int qy = 0; qy < Q1D; ++qy)
388 const double By = B(qy, dy);
389 const double Gy = G(qy, dy);
390 for (
int qx = 0; qx < Q1D; ++qx)
392 dataXY[qy][qx][0] += dataX[qx][0] * By;
393 dataXY[qy][qx][1] += dataX[qx][1] * By;
394 dataXY[qy][qx][2] += dataX[qx][2] * By;
396 gradXY0[qy][qx][0] += gradX0[qx][0] * By;
397 gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
398 gradXY0[qy][qx][2] += gradX0[qx][2] * By;
400 gradXY1[qy][qx][0] += gradX1[qx][0] * By;
401 gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
402 gradXY1[qy][qx][2] += gradX1[qx][2] * By;
404 gradXY2[qy][qx][0] += gradX2[qx][0] * By;
405 gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
406 gradXY2[qy][qx][2] += gradX2[qx][2] * By;
410 for (
int qz = 0; qz < Q1D; ++qz)
412 const double Bz = B(qz, dz);
413 const double Gz = G(qz, dz);
414 for (
int qy = 0; qy < Q1D; ++qy)
416 for (
int qx = 0; qx < Q1D; ++qx)
418 data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
419 data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
420 data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
422 grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
423 grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
424 grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
426 grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
427 grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
428 grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
430 grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
431 grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
432 grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
437 for (
int qz = 0; qz < Q1D; ++qz)
439 for (
int qy = 0; qy < Q1D; ++qy)
441 for (
int qx = 0; qx < Q1D; ++qx)
443 const int q = qx + Q1D * (qy + qz * Q1D);
445 const double u1 = data[qz][qy][qx][0];
446 const double u2 = data[qz][qy][qx][1];
447 const double u3 = data[qz][qy][qx][2];
449 const double grad00 = grad0[qz][qy][qx][0];
450 const double grad01 = grad0[qz][qy][qx][1];
451 const double grad02 = grad0[qz][qy][qx][2];
453 const double grad10 = grad1[qz][qy][qx][0];
454 const double grad11 = grad1[qz][qy][qx][1];
455 const double grad12 = grad1[qz][qy][qx][2];
457 const double grad20 = grad2[qz][qy][qx][0];
458 const double grad21 = grad2[qz][qy][qx][1];
459 const double grad22 = grad2[qz][qy][qx][2];
461 const double Dxu1 = grad00 * Q(q, 0, 0, e)
462 + grad01 * Q(q, 1, 0, e)
463 + grad02 * Q(q, 2, 0, e);
464 const double Dyu1 = grad00 * Q(q, 0, 1, e)
465 + grad01 * Q(q, 1, 1, e)
466 + grad02 * Q(q, 2, 1, e);
467 const double Dzu1 = grad00 * Q(q, 0, 2, e)
468 + grad01 * Q(q, 1, 2, e)
469 + grad02 * Q(q, 2, 2, e);
471 const double Dxu2 = grad10 * Q(q, 0, 0, e)
472 + grad11 * Q(q, 1, 0, e)
473 + grad12 * Q(q, 2, 0, e);
474 const double Dyu2 = grad10 * Q(q, 0, 1, e)
475 + grad11 * Q(q, 1, 1, e)
476 + grad12 * Q(q, 2, 1, e);
477 const double Dzu2 = grad10 * Q(q, 0, 2, e)
478 + grad11 * Q(q, 1, 2, e)
479 + grad12 * Q(q, 2, 2, e);
481 const double Dxu3 = grad20 * Q(q, 0, 0, e)
482 + grad21 * Q(q, 1, 0, e)
483 + grad22 * Q(q, 2, 0, e);
484 const double Dyu3 = grad20 * Q(q, 0, 1, e)
485 + grad21 * Q(q, 1, 1, e)
486 + grad22 * Q(q, 2, 1, e);
487 const double Dzu3 = grad20 * Q(q, 0, 2, e)
488 + grad21 * Q(q, 1, 2, e)
489 + grad22 * Q(q, 2, 2, e);
491 Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
492 Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
493 Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
497 for (
int qz = 0; qz < Q1D; ++qz)
499 double opXY[max_D1D][max_D1D][VDIM];
500 for (
int dy = 0; dy < D1D; ++dy)
502 for (
int dx = 0; dx < D1D; ++dx)
504 opXY[dy][dx][0] = 0.0;
505 opXY[dy][dx][1] = 0.0;
506 opXY[dy][dx][2] = 0.0;
509 for (
int qy = 0; qy < Q1D; ++qy)
511 double opX[max_D1D][VDIM];
512 for (
int dx = 0; dx < D1D; ++dx)
517 for (
int qx = 0; qx < Q1D; ++qx)
519 const double Btx = Bt(dx, qx);
520 opX[dx][0] += Btx * Z[qz][qy][qx][0];
521 opX[dx][1] += Btx * Z[qz][qy][qx][1];
522 opX[dx][2] += Btx * Z[qz][qy][qx][2];
525 for (
int dy = 0; dy < D1D; ++dy)
527 for (
int dx = 0; dx < D1D; ++dx)
529 const double Bty = Bt(dy, qy);
530 opXY[dy][dx][0] += Bty * opX[dx][0];
531 opXY[dy][dx][1] += Bty * opX[dx][1];
532 opXY[dy][dx][2] += Bty * opX[dx][2];
536 for (
int dz = 0; dz < D1D; ++dz)
538 for (
int dy = 0; dy < D1D; ++dy)
540 for (
int dx = 0; dx < D1D; ++dx)
542 const double Btz = Bt(dz, qz);
543 y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
544 y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
545 y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
553 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_MAX_D1D =0,
int T_MAX_Q1D =0>
554 static void SmemPAConvectionNLApply3D(
const int NE,
555 const Array<double> &b_,
556 const Array<double> &g_,
563 constexpr
int VDIM = 3;
564 const int D1D = T_D1D ? T_D1D : d1d;
565 const int Q1D = T_Q1D ? T_Q1D : q1d;
566 constexpr
int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
567 constexpr
int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
568 MFEM_VERIFY(D1D <= MD1,
"");
569 MFEM_VERIFY(Q1D <= MQ1,
"");
571 auto b =
Reshape(b_.Read(), Q1D, D1D);
572 auto g =
Reshape(g_.Read(), Q1D, D1D);
573 auto D =
Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
574 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
575 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
577 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
579 const int tidz = MFEM_THREAD_ID(z);
580 const int D1D = T_D1D ? T_D1D : d1d;
581 const int Q1D = T_Q1D ? T_Q1D : q1d;
582 constexpr
int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
583 constexpr
int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
584 MFEM_SHARED
double BG[2][MQ1 * MD1];
585 double(*B)[MD1] = (double(*)[MD1])(BG + 0);
586 double(*G)[MD1] = (double(*)[MD1])(BG + 1);
587 double(*Bt)[MQ1] = (double(*)[MQ1])(BG + 0);
588 MFEM_SHARED
double U[2][MQ1][MQ1][MQ1];
589 MFEM_SHARED
double sm0[3][MQ1 * MQ1 * MQ1];
590 MFEM_SHARED
double sm1[3][MQ1 * MQ1 * MQ1];
591 double(*DDQ0)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 0);
592 double(*DDQ1)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 1);
593 double(*X)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 2);
594 double(*DQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 0);
595 double(*DQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 1);
596 double(*DQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 2);
597 double(*QQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 0);
598 double(*QQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 1);
599 double(*QQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 2);
600 double(*QQD0)[MQ1][MD1] = (double(*)[MQ1][MD1])(sm1 + 0);
601 double(*QDD0)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 0);
602 MFEM_SHARED
double Z[MQ1][MQ1][MQ1];
604 for (
int cy = 0; cy < VDIM; ++cy)
608 MFEM_FOREACH_THREAD(q, x, Q1D)
610 MFEM_FOREACH_THREAD(d, y, D1D)
617 MFEM_FOREACH_THREAD(qz, z, Q1D)
619 MFEM_FOREACH_THREAD(qy, y, Q1D)
621 MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
625 for (
int c = 0; c < VDIM; ++c)
627 MFEM_FOREACH_THREAD(dz, z, D1D)
629 MFEM_FOREACH_THREAD(dy, y, D1D)
631 MFEM_FOREACH_THREAD(dx, x, D1D)
633 X[dz][dy][dx] = x(dx, dy, dz, cy, e);
634 U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
639 MFEM_FOREACH_THREAD(dz, z, D1D)
641 MFEM_FOREACH_THREAD(dy, y, D1D)
643 MFEM_FOREACH_THREAD(qx, x, Q1D)
648 for (
int dx = 0; dx < D1D; ++dx)
650 const double coord = X[dz][dy][dx];
651 const double value = U[0][dz][dy][dx];
652 u += coord * B[qx][dx];
653 v += coord * G[qx][dx];
654 z += value * B[qx][dx];
656 DDQ0[dz][dy][qx] = u;
657 DDQ1[dz][dy][qx] = v;
658 U[1][dz][dy][qx] = z;
663 MFEM_FOREACH_THREAD(dz, z, D1D)
665 MFEM_FOREACH_THREAD(qy, y, Q1D)
667 MFEM_FOREACH_THREAD(qx, x, Q1D)
673 for (
int dy = 0; dy < D1D; ++dy)
675 u += DDQ1[dz][dy][qx] * B[qy][dy];
676 v += DDQ0[dz][dy][qx] * G[qy][dy];
677 w += DDQ0[dz][dy][qx] * B[qy][dy];
678 z += U[1][dz][dy][qx] * B[qy][dy];
680 DQQ0[dz][qy][qx] = u;
681 DQQ1[dz][qy][qx] = v;
682 DQQ2[dz][qy][qx] = w;
683 U[0][dz][qy][qx] = z;
688 MFEM_FOREACH_THREAD(qz, z, Q1D)
690 MFEM_FOREACH_THREAD(qy, y, Q1D)
692 MFEM_FOREACH_THREAD(qx, x, Q1D)
698 for (
int dz = 0; dz < D1D; ++dz)
700 u += DQQ0[dz][qy][qx] * B[qz][dz];
701 v += DQQ1[dz][qy][qx] * B[qz][dz];
702 w += DQQ2[dz][qy][qx] * G[qz][dz];
703 z += U[0][dz][qy][qx] * B[qz][dz];
705 QQQ0[qz][qy][qx] = u;
706 QQQ1[qz][qy][qx] = v;
707 QQQ2[qz][qy][qx] = w;
708 U[1][qz][qy][qx] = z;
713 MFEM_FOREACH_THREAD(qz, z, Q1D)
715 MFEM_FOREACH_THREAD(qy, y, Q1D)
717 MFEM_FOREACH_THREAD(qx, x, Q1D)
719 const int q = qx + (qy + qz * Q1D) * Q1D;
720 const double z = U[1][qz][qy][qx];
721 const double gX = QQQ0[qz][qy][qx];
722 const double gY = QQQ1[qz][qy][qx];
723 const double gZ = QQQ2[qz][qy][qx];
724 const double d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
725 + gZ * D(q, 2, c, e);
726 Z[qz][qy][qx] += z * d;
734 MFEM_FOREACH_THREAD(d, y, D1D)
736 MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] =
b(q, d); }
740 MFEM_FOREACH_THREAD(qz, z, Q1D)
742 MFEM_FOREACH_THREAD(qy, y, Q1D)
744 MFEM_FOREACH_THREAD(dx, x, D1D)
747 for (
int qx = 0; qx < Q1D; ++qx)
749 u += Z[qz][qy][qx] * Bt[dx][qx];
751 QQD0[qz][qy][dx] = u;
756 MFEM_FOREACH_THREAD(qz, z, Q1D)
758 MFEM_FOREACH_THREAD(dy, y, D1D)
760 MFEM_FOREACH_THREAD(dx, x, D1D)
763 for (
int qy = 0; qy < Q1D; ++qy)
765 u += QQD0[qz][qy][dx] * Bt[dy][qy];
767 QDD0[qz][dy][dx] = u;
772 MFEM_FOREACH_THREAD(dz, z, D1D)
774 MFEM_FOREACH_THREAD(dy, y, D1D)
776 MFEM_FOREACH_THREAD(dx, x, D1D)
779 for (
int qz = 0; qz < Q1D; ++qz)
781 u += QDD0[qz][dy][dx] * Bt[dz][qz];
783 Y(dx, dy, dz, cy, e) += u;
792 void VectorConvectionNLFIntegrator::AddMultPA(
const Vector &x,
Vector &y)
const
795 const int D1D = maps->ndof;
796 const int Q1D = maps->nqpt;
797 const Vector &Q = pa_data;
803 return PAConvectionNLApply2D(NE, B, G, Bt, Q, x, y, D1D, Q1D);
807 constexpr
int T_MAX_D1D = 8;
808 constexpr
int T_MAX_Q1D = 8;
809 MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D,
"Not yet implemented!");
810 return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
811 (NE, B, G, Q, x, y, D1D, Q1D);
813 MFEM_ABORT(
"Not yet implemented!");
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
Ordering::Type GetOrdering() const
Return the ordering method.
Class for an integration rule - an Array of IntegrationPoint.
A coefficient that is constant across space and time.
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Mesh * GetMesh() const
Returns the mesh.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...