12 #include "../general/forall.hpp"
23 "PA Only supports Ordering::byNODES!");
31 const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 ||
43 dim = mesh->Dimension();
46 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
53 MFEM_VERIFY(cQ != NULL,
"only ConstantCoefficient is supported!");
61 MFEM_ABORT(
"dim==1 not supported!");
65 auto J =
Reshape(geom->J.Read(), NQ, 2, 2, NE);
66 auto G =
Reshape(pa_data.Write(), NQ, 2, 2, NE);
69 for (
int q = 0; q < NQ; ++q)
71 const double J11 = J(q, 0, 0, e);
72 const double J12 = J(q, 0, 1, e);
73 const double J21 = J(q, 1, 0, e);
74 const double J22 = J(q, 1, 1, e);
76 G(q, 0, 0, e) = W[q] * COEFF * J22;
77 G(q, 0, 1, e) = W[q] * COEFF * -J12;
78 G(q, 1, 0, e) = W[q] * COEFF * -J21;
79 G(q, 1, 1, e) = W[q] * COEFF * J11;
85 auto J =
Reshape(geom->J.Read(), NQ, 3, 3, NE);
86 auto G =
Reshape(pa_data.Write(), NQ, 3, 3, NE);
89 for (
int q = 0; q < NQ; ++q)
91 const double J11 = J(q, 0, 0, e);
92 const double J21 = J(q, 1, 0, e);
93 const double J31 = J(q, 2, 0, e);
94 const double J12 = J(q, 0, 1, e);
95 const double J22 = J(q, 1, 1, e);
96 const double J32 = J(q, 2, 1, e);
97 const double J13 = J(q, 0, 2, e);
98 const double J23 = J(q, 1, 2, e);
99 const double J33 = J(q, 2, 2, e);
100 const double cw = W[q] * COEFF;
102 const double A11 = (J22 * J33) - (J23 * J32);
103 const double A12 = (J32 * J13) - (J12 * J33);
104 const double A13 = (J12 * J23) - (J22 * J13);
105 const double A21 = (J31 * J23) - (J21 * J33);
106 const double A22 = (J11 * J33) - (J13 * J31);
107 const double A23 = (J21 * J13) - (J11 * J23);
108 const double A31 = (J21 * J32) - (J31 * J22);
109 const double A32 = (J31 * J12) - (J11 * J32);
110 const double A33 = (J11 * J22) - (J12 * J21);
112 G(q, 0, 0, e) = cw * A11;
113 G(q, 0, 1, e) = cw * A12;
114 G(q, 0, 2, e) = cw * A13;
115 G(q, 1, 0, e) = cw * A21;
116 G(q, 1, 1, e) = cw * A22;
117 G(q, 1, 2, e) = cw * A23;
118 G(q, 2, 0, e) = cw * A31;
119 G(q, 2, 1, e) = cw * A32;
120 G(q, 2, 2, e) = cw * A33;
127 template<
int T_D1D = 0,
int T_Q1D = 0>
128 static void PAConvectionNLApply2D(
const int NE,
138 const int D1D = T_D1D ? T_D1D : d1d;
139 const int Q1D = T_Q1D ? T_Q1D : q1d;
140 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
141 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
150 const int D1D = T_D1D ? T_D1D : d1d;
151 const int Q1D = T_Q1D ? T_Q1D : q1d;
152 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
153 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
155 double data[max_Q1D][max_Q1D][2];
156 double grad0[max_Q1D][max_Q1D][2];
157 double grad1[max_Q1D][max_Q1D][2];
158 double Z[max_Q1D][max_Q1D][2];
159 for (
int qy = 0; qy < Q1D; ++qy)
161 for (
int qx = 0; qx < Q1D; ++qx)
163 data[qy][qx][0] = 0.0;
164 data[qy][qx][1] = 0.0;
165 grad0[qy][qx][0] = 0.0;
166 grad0[qy][qx][1] = 0.0;
167 grad1[qy][qx][0] = 0.0;
168 grad1[qy][qx][1] = 0.0;
171 for (
int dy = 0; dy < D1D; ++dy)
173 double dataX[max_Q1D][2];
174 double gradX0[max_Q1D][2];
175 double gradX1[max_Q1D][2];
176 for (
int qx = 0; qx < Q1D; ++qx)
185 for (
int dx = 0; dx < D1D; ++dx)
187 const double s0 = x(dx, dy, 0, e);
188 const double s1 = x(dx, dy, 1, e);
189 for (
int qx = 0; qx < Q1D; ++qx)
191 const double Bx = B(qx, dx);
192 const double Gx = G(qx, dx);
193 dataX[qx][0] += s0 * Bx;
194 dataX[qx][1] += s1 * Bx;
195 gradX0[qx][0] += s0 * Gx;
196 gradX0[qx][1] += s0 * Bx;
197 gradX1[qx][0] += s1 * Gx;
198 gradX1[qx][1] += s1 * Bx;
201 for (
int qy = 0; qy < Q1D; ++qy)
203 const double By = B(qy, dy);
204 const double Gy = G(qy, dy);
205 for (
int qx = 0; qx < Q1D; ++qx)
207 data[qy][qx][0] += dataX[qx][0] * By;
208 data[qy][qx][1] += dataX[qx][1] * By;
209 grad0[qy][qx][0] += gradX0[qx][0] * By;
210 grad0[qy][qx][1] += gradX0[qx][1] * Gy;
211 grad1[qy][qx][0] += gradX1[qx][0] * By;
212 grad1[qy][qx][1] += gradX1[qx][1] * Gy;
216 for (
int qy = 0; qy < Q1D; ++qy)
218 for (
int qx = 0; qx < Q1D; ++qx)
220 const int q = qx + qy * Q1D;
221 const double u1 = data[qy][qx][0];
222 const double u2 = data[qy][qx][1];
223 const double grad00 = grad0[qy][qx][0];
224 const double grad01 = grad0[qy][qx][1];
225 const double grad10 = grad1[qy][qx][0];
226 const double grad11 = grad1[qy][qx][1];
227 const double Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
228 const double Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
229 const double Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
230 const double Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
231 Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
232 Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
235 for (
int qy = 0; qy < Q1D; ++qy)
237 double Y[max_D1D][2];
238 for (
int dx = 0; dx < D1D; ++dx)
242 for (
int qx = 0; qx < Q1D; ++qx)
244 const double Btx = Bt(dx, qx);
245 Y[dx][0] += Btx * Z[qy][qx][0];
246 Y[dx][1] += Btx * Z[qy][qx][1];
249 for (
int dy = 0; dy < D1D; ++dy)
251 for (
int dx = 0; dx < D1D; ++dx)
253 const double Bty = Bt(dy, qy);
254 y(dx, dy, 0, e) += Bty * Y[dx][0];
255 y(dx, dy, 1, e) += Bty * Y[dx][1];
263 template<
int T_D1D = 0,
int T_Q1D = 0>
264 static void PAConvectionNLApply3D(
const int NE,
265 const Array<double> &b,
266 const Array<double> &g,
267 const Array<double> &bt,
274 constexpr
int VDIM = 3;
275 const int D1D = T_D1D ? T_D1D : d1d;
276 const int Q1D = T_Q1D ? T_Q1D : q1d;
277 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
278 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
280 auto B =
Reshape(b.Read(), Q1D, D1D);
281 auto G =
Reshape(g.Read(), Q1D, D1D);
282 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
283 auto Q =
Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
284 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
285 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
289 constexpr
int VDIM = 3;
290 const int D1D = T_D1D ? T_D1D : d1d;
291 const int Q1D = T_Q1D ? T_Q1D : q1d;
292 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
293 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
295 double data[max_Q1D][max_Q1D][max_Q1D][VDIM];
296 double grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
297 double grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
298 double grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
299 double Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
300 for (
int qz = 0; qz < Q1D; ++qz)
302 for (
int qy = 0; qy < Q1D; ++qy)
304 for (
int qx = 0; qx < Q1D; ++qx)
306 data[qz][qy][qx][0] = 0.0;
307 data[qz][qy][qx][1] = 0.0;
308 data[qz][qy][qx][2] = 0.0;
310 grad0[qz][qy][qx][0] = 0.0;
311 grad0[qz][qy][qx][1] = 0.0;
312 grad0[qz][qy][qx][2] = 0.0;
314 grad1[qz][qy][qx][0] = 0.0;
315 grad1[qz][qy][qx][1] = 0.0;
316 grad1[qz][qy][qx][2] = 0.0;
318 grad2[qz][qy][qx][0] = 0.0;
319 grad2[qz][qy][qx][1] = 0.0;
320 grad2[qz][qy][qx][2] = 0.0;
324 for (
int dz = 0; dz < D1D; ++dz)
326 double dataXY[max_Q1D][max_Q1D][VDIM];
327 double gradXY0[max_Q1D][max_Q1D][VDIM];
328 double gradXY1[max_Q1D][max_Q1D][VDIM];
329 double gradXY2[max_Q1D][max_Q1D][VDIM];
330 for (
int qy = 0; qy < Q1D; ++qy)
332 for (
int qx = 0; qx < Q1D; ++qx)
334 dataXY[qy][qx][0] = 0.0;
335 dataXY[qy][qx][1] = 0.0;
336 dataXY[qy][qx][2] = 0.0;
338 gradXY0[qy][qx][0] = 0.0;
339 gradXY0[qy][qx][1] = 0.0;
340 gradXY0[qy][qx][2] = 0.0;
342 gradXY1[qy][qx][0] = 0.0;
343 gradXY1[qy][qx][1] = 0.0;
344 gradXY1[qy][qx][2] = 0.0;
346 gradXY2[qy][qx][0] = 0.0;
347 gradXY2[qy][qx][1] = 0.0;
348 gradXY2[qy][qx][2] = 0.0;
351 for (
int dy = 0; dy < D1D; ++dy)
353 double dataX[max_Q1D][VDIM];
354 double gradX0[max_Q1D][VDIM];
355 double gradX1[max_Q1D][VDIM];
356 double gradX2[max_Q1D][VDIM];
357 for (
int qx = 0; qx < Q1D; ++qx)
375 for (
int dx = 0; dx < D1D; ++dx)
377 const double s0 = x(dx, dy, dz, 0, e);
378 const double s1 = x(dx, dy, dz, 1, e);
379 const double s2 = x(dx, dy, dz, 2, e);
380 for (
int qx = 0; qx < Q1D; ++qx)
382 const double Bx = B(qx, dx);
383 const double Gx = G(qx, dx);
385 dataX[qx][0] += s0 * Bx;
386 dataX[qx][1] += s1 * Bx;
387 dataX[qx][2] += s2 * Bx;
389 gradX0[qx][0] += s0 * Gx;
390 gradX0[qx][1] += s0 * Bx;
391 gradX0[qx][2] += s0 * Bx;
393 gradX1[qx][0] += s1 * Gx;
394 gradX1[qx][1] += s1 * Bx;
395 gradX1[qx][2] += s1 * Bx;
397 gradX2[qx][0] += s2 * Gx;
398 gradX2[qx][1] += s2 * Bx;
399 gradX2[qx][2] += s2 * Bx;
402 for (
int qy = 0; qy < Q1D; ++qy)
404 const double By = B(qy, dy);
405 const double Gy = G(qy, dy);
406 for (
int qx = 0; qx < Q1D; ++qx)
408 dataXY[qy][qx][0] += dataX[qx][0] * By;
409 dataXY[qy][qx][1] += dataX[qx][1] * By;
410 dataXY[qy][qx][2] += dataX[qx][2] * By;
412 gradXY0[qy][qx][0] += gradX0[qx][0] * By;
413 gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
414 gradXY0[qy][qx][2] += gradX0[qx][2] * By;
416 gradXY1[qy][qx][0] += gradX1[qx][0] * By;
417 gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
418 gradXY1[qy][qx][2] += gradX1[qx][2] * By;
420 gradXY2[qy][qx][0] += gradX2[qx][0] * By;
421 gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
422 gradXY2[qy][qx][2] += gradX2[qx][2] * By;
426 for (
int qz = 0; qz < Q1D; ++qz)
428 const double Bz = B(qz, dz);
429 const double Gz = G(qz, dz);
430 for (
int qy = 0; qy < Q1D; ++qy)
432 for (
int qx = 0; qx < Q1D; ++qx)
434 data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
435 data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
436 data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
438 grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
439 grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
440 grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
442 grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
443 grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
444 grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
446 grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
447 grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
448 grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
453 for (
int qz = 0; qz < Q1D; ++qz)
455 for (
int qy = 0; qy < Q1D; ++qy)
457 for (
int qx = 0; qx < Q1D; ++qx)
459 const int q = qx + Q1D * (qy + qz * Q1D);
461 const double u1 = data[qz][qy][qx][0];
462 const double u2 = data[qz][qy][qx][1];
463 const double u3 = data[qz][qy][qx][2];
465 const double grad00 = grad0[qz][qy][qx][0];
466 const double grad01 = grad0[qz][qy][qx][1];
467 const double grad02 = grad0[qz][qy][qx][2];
469 const double grad10 = grad1[qz][qy][qx][0];
470 const double grad11 = grad1[qz][qy][qx][1];
471 const double grad12 = grad1[qz][qy][qx][2];
473 const double grad20 = grad2[qz][qy][qx][0];
474 const double grad21 = grad2[qz][qy][qx][1];
475 const double grad22 = grad2[qz][qy][qx][2];
477 const double Dxu1 = grad00 * Q(q, 0, 0, e)
478 + grad01 * Q(q, 1, 0, e)
479 + grad02 * Q(q, 2, 0, e);
480 const double Dyu1 = grad00 * Q(q, 0, 1, e)
481 + grad01 * Q(q, 1, 1, e)
482 + grad02 * Q(q, 2, 1, e);
483 const double Dzu1 = grad00 * Q(q, 0, 2, e)
484 + grad01 * Q(q, 1, 2, e)
485 + grad02 * Q(q, 2, 2, e);
487 const double Dxu2 = grad10 * Q(q, 0, 0, e)
488 + grad11 * Q(q, 1, 0, e)
489 + grad12 * Q(q, 2, 0, e);
490 const double Dyu2 = grad10 * Q(q, 0, 1, e)
491 + grad11 * Q(q, 1, 1, e)
492 + grad12 * Q(q, 2, 1, e);
493 const double Dzu2 = grad10 * Q(q, 0, 2, e)
494 + grad11 * Q(q, 1, 2, e)
495 + grad12 * Q(q, 2, 2, e);
497 const double Dxu3 = grad20 * Q(q, 0, 0, e)
498 + grad21 * Q(q, 1, 0, e)
499 + grad22 * Q(q, 2, 0, e);
500 const double Dyu3 = grad20 * Q(q, 0, 1, e)
501 + grad21 * Q(q, 1, 1, e)
502 + grad22 * Q(q, 2, 1, e);
503 const double Dzu3 = grad20 * Q(q, 0, 2, e)
504 + grad21 * Q(q, 1, 2, e)
505 + grad22 * Q(q, 2, 2, e);
507 Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
508 Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
509 Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
513 for (
int qz = 0; qz < Q1D; ++qz)
515 double opXY[max_D1D][max_D1D][VDIM];
516 for (
int dy = 0; dy < D1D; ++dy)
518 for (
int dx = 0; dx < D1D; ++dx)
520 opXY[dy][dx][0] = 0.0;
521 opXY[dy][dx][1] = 0.0;
522 opXY[dy][dx][2] = 0.0;
525 for (
int qy = 0; qy < Q1D; ++qy)
527 double opX[max_D1D][VDIM];
528 for (
int dx = 0; dx < D1D; ++dx)
533 for (
int qx = 0; qx < Q1D; ++qx)
535 const double Btx = Bt(dx, qx);
536 opX[dx][0] += Btx * Z[qz][qy][qx][0];
537 opX[dx][1] += Btx * Z[qz][qy][qx][1];
538 opX[dx][2] += Btx * Z[qz][qy][qx][2];
541 for (
int dy = 0; dy < D1D; ++dy)
543 for (
int dx = 0; dx < D1D; ++dx)
545 const double Bty = Bt(dy, qy);
546 opXY[dy][dx][0] += Bty * opX[dx][0];
547 opXY[dy][dx][1] += Bty * opX[dx][1];
548 opXY[dy][dx][2] += Bty * opX[dx][2];
552 for (
int dz = 0; dz < D1D; ++dz)
554 for (
int dy = 0; dy < D1D; ++dy)
556 for (
int dx = 0; dx < D1D; ++dx)
558 const double Btz = Bt(dz, qz);
559 y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
560 y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
561 y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
569 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_MAX_D1D =0,
int T_MAX_Q1D =0>
570 static void SmemPAConvectionNLApply3D(
const int NE,
571 const Array<double> &b_,
572 const Array<double> &g_,
579 constexpr
int VDIM = 3;
580 const int D1D = T_D1D ? T_D1D : d1d;
581 const int Q1D = T_Q1D ? T_Q1D : q1d;
582 constexpr
int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
583 constexpr
int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
584 MFEM_VERIFY(D1D <= MD1,
"");
585 MFEM_VERIFY(Q1D <= MQ1,
"");
587 auto b =
Reshape(b_.Read(), Q1D, D1D);
588 auto g =
Reshape(g_.Read(), Q1D, D1D);
589 auto D =
Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
590 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
591 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
593 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
595 const int tidz = MFEM_THREAD_ID(z);
596 const int D1D = T_D1D ? T_D1D : d1d;
597 const int Q1D = T_Q1D ? T_Q1D : q1d;
598 constexpr
int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
599 constexpr
int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
600 MFEM_SHARED
double BG[2][MQ1 * MD1];
601 double(*B)[MD1] = (double(*)[MD1])(BG + 0);
602 double(*G)[MD1] = (double(*)[MD1])(BG + 1);
603 double(*Bt)[MQ1] = (double(*)[MQ1])(BG + 0);
604 MFEM_SHARED
double U[2][MQ1][MQ1][MQ1];
605 MFEM_SHARED
double sm0[3][MQ1 * MQ1 * MQ1];
606 MFEM_SHARED
double sm1[3][MQ1 * MQ1 * MQ1];
607 double(*DDQ0)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 0);
608 double(*DDQ1)[MD1][MQ1] = (double(*)[MD1][MQ1])(sm0 + 1);
609 double(*X)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 2);
610 double(*DQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 0);
611 double(*DQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 1);
612 double(*DQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm1 + 2);
613 double(*QQQ0)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 0);
614 double(*QQQ1)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 1);
615 double(*QQQ2)[MQ1][MQ1] = (double(*)[MQ1][MQ1])(sm0 + 2);
616 double(*QQD0)[MQ1][MD1] = (double(*)[MQ1][MD1])(sm1 + 0);
617 double(*QDD0)[MD1][MD1] = (double(*)[MD1][MD1])(sm0 + 0);
618 MFEM_SHARED
double Z[MQ1][MQ1][MQ1];
620 for (
int cy = 0; cy < VDIM; ++cy)
624 MFEM_FOREACH_THREAD(q, x, Q1D)
626 MFEM_FOREACH_THREAD(d, y, D1D)
633 MFEM_FOREACH_THREAD(qz, z, Q1D)
635 MFEM_FOREACH_THREAD(qy, y, Q1D)
637 MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
641 for (
int c = 0; c < VDIM; ++c)
643 MFEM_FOREACH_THREAD(dz, z, D1D)
645 MFEM_FOREACH_THREAD(dy, y, D1D)
647 MFEM_FOREACH_THREAD(dx, x, D1D)
649 X[dz][dy][dx] = x(dx, dy, dz, cy, e);
650 U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
655 MFEM_FOREACH_THREAD(dz, z, D1D)
657 MFEM_FOREACH_THREAD(dy, y, D1D)
659 MFEM_FOREACH_THREAD(qx, x, Q1D)
664 for (
int dx = 0; dx < D1D; ++dx)
666 const double coord = X[dz][dy][dx];
667 const double value = U[0][dz][dy][dx];
668 u += coord * B[qx][dx];
669 v += coord * G[qx][dx];
670 z += value * B[qx][dx];
672 DDQ0[dz][dy][qx] =
u;
673 DDQ1[dz][dy][qx] = v;
674 U[1][dz][dy][qx] = z;
679 MFEM_FOREACH_THREAD(dz, z, D1D)
681 MFEM_FOREACH_THREAD(qy, y, Q1D)
683 MFEM_FOREACH_THREAD(qx, x, Q1D)
689 for (
int dy = 0; dy < D1D; ++dy)
691 u += DDQ1[dz][dy][qx] * B[qy][dy];
692 v += DDQ0[dz][dy][qx] * G[qy][dy];
693 w += DDQ0[dz][dy][qx] * B[qy][dy];
694 z += U[1][dz][dy][qx] * B[qy][dy];
696 DQQ0[dz][qy][qx] =
u;
697 DQQ1[dz][qy][qx] = v;
698 DQQ2[dz][qy][qx] = w;
699 U[0][dz][qy][qx] = z;
704 MFEM_FOREACH_THREAD(qz, z, Q1D)
706 MFEM_FOREACH_THREAD(qy, y, Q1D)
708 MFEM_FOREACH_THREAD(qx, x, Q1D)
714 for (
int dz = 0; dz < D1D; ++dz)
716 u += DQQ0[dz][qy][qx] * B[qz][dz];
717 v += DQQ1[dz][qy][qx] * B[qz][dz];
718 w += DQQ2[dz][qy][qx] * G[qz][dz];
719 z += U[0][dz][qy][qx] * B[qz][dz];
721 QQQ0[qz][qy][qx] =
u;
722 QQQ1[qz][qy][qx] = v;
723 QQQ2[qz][qy][qx] = w;
724 U[1][qz][qy][qx] = z;
729 MFEM_FOREACH_THREAD(qz, z, Q1D)
731 MFEM_FOREACH_THREAD(qy, y, Q1D)
733 MFEM_FOREACH_THREAD(qx, x, Q1D)
735 const int q = qx + (qy + qz * Q1D) * Q1D;
736 const double z = U[1][qz][qy][qx];
737 const double gX = QQQ0[qz][qy][qx];
738 const double gY = QQQ1[qz][qy][qx];
739 const double gZ = QQQ2[qz][qy][qx];
740 const double d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
741 + gZ * D(q, 2, c, e);
742 Z[qz][qy][qx] += z * d;
750 MFEM_FOREACH_THREAD(d, y, D1D)
752 MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] =
b(q, d); }
756 MFEM_FOREACH_THREAD(qz, z, Q1D)
758 MFEM_FOREACH_THREAD(qy, y, Q1D)
760 MFEM_FOREACH_THREAD(dx, x, D1D)
763 for (
int qx = 0; qx < Q1D; ++qx)
765 u += Z[qz][qy][qx] * Bt[dx][qx];
767 QQD0[qz][qy][dx] =
u;
772 MFEM_FOREACH_THREAD(qz, z, Q1D)
774 MFEM_FOREACH_THREAD(dy, y, D1D)
776 MFEM_FOREACH_THREAD(dx, x, D1D)
779 for (
int qy = 0; qy < Q1D; ++qy)
781 u += QQD0[qz][qy][dx] * Bt[dy][qy];
783 QDD0[qz][dy][dx] =
u;
788 MFEM_FOREACH_THREAD(dz, z, D1D)
790 MFEM_FOREACH_THREAD(dy, y, D1D)
792 MFEM_FOREACH_THREAD(dx, x, D1D)
795 for (
int qz = 0; qz < Q1D; ++qz)
797 u += QDD0[qz][dy][dx] * Bt[dz][qz];
799 Y(dx, dy, dz, cy, e) +=
u;
808 void VectorConvectionNLFIntegrator::AddMultPA(
const Vector &x,
Vector &y)
const
812 ceedOp->AddMult(x, y);
817 const int D1D = maps->ndof;
818 const int Q1D = maps->nqpt;
819 const Vector &QV = pa_data;
825 return PAConvectionNLApply2D(NE, B, G, Bt, QV, x, y, D1D, Q1D);
829 constexpr
int T_MAX_D1D = 8;
830 constexpr
int T_MAX_Q1D = 8;
831 MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D,
"Not yet implemented!");
832 return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
833 (NE, B, G, QV, x, y, D1D, Q1D);
835 MFEM_ABORT(
"Not yet implemented!");
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
Ordering::Type GetOrdering() const
Return the ordering method.
Class for an integration rule - an Array of IntegrationPoint.
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
A coefficient that is constant across space and time.
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
Mesh * GetMesh() const
Returns the mesh.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
double u(const Vector &xvec)
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.