20static void PADivergenceSetup2D(
const int Q1D,
22 const Array<real_t> &w,
27 const int NQ = Q1D*Q1D;
29 auto J =
Reshape(j.Read(), NQ, 2, 2, NE);
30 auto y =
Reshape(op.Write(), NQ, 2, 2, NE);
34 for (
int q = 0; q < NQ; ++q)
36 const real_t J11 = J(q,0,0,e);
37 const real_t J12 = J(q,0,1,e);
38 const real_t J21 = J(q,1,0,e);
39 const real_t J22 = J(q,1,1,e);
41 y(q,0,0,e) = W[q] * COEFF * J22;
42 y(q,0,1,e) = W[q] * COEFF * -J12;
43 y(q,1,0,e) = W[q] * COEFF * -J21;
44 y(q,1,1,e) = W[q] * COEFF * J11;
50static void PADivergenceSetup3D(
const int Q1D,
52 const Array<real_t> &w,
57 const int NQ = Q1D*Q1D*Q1D;
59 auto J =
Reshape(j.Read(), NQ, 3, 3, NE);
60 auto y =
Reshape(op.Write(), NQ, 3, 3, NE);
63 for (
int q = 0; q < NQ; ++q)
65 const real_t J11 = J(q,0,0,e);
66 const real_t J21 = J(q,1,0,e);
67 const real_t J31 = J(q,2,0,e);
68 const real_t J12 = J(q,0,1,e);
69 const real_t J22 = J(q,1,1,e);
70 const real_t J32 = J(q,2,1,e);
71 const real_t J13 = J(q,0,2,e);
72 const real_t J23 = J(q,1,2,e);
73 const real_t J33 = J(q,2,2,e);
74 const real_t cw = W[q] * COEFF;
76 const real_t A11 = (J22 * J33) - (J23 * J32);
77 const real_t A12 = (J32 * J13) - (J12 * J33);
78 const real_t A13 = (J12 * J23) - (J22 * J13);
79 const real_t A21 = (J31 * J23) - (J21 * J33);
80 const real_t A22 = (J11 * J33) - (J13 * J31);
81 const real_t A23 = (J21 * J13) - (J11 * J23);
82 const real_t A31 = (J21 * J32) - (J31 * J22);
83 const real_t A32 = (J31 * J12) - (J11 * J32);
84 const real_t A33 = (J11 * J22) - (J12 * J21);
86 y(q,0,0,e) = cw * A11;
87 y(q,0,1,e) = cw * A12;
88 y(q,0,2,e) = cw * A13;
89 y(q,1,0,e) = cw * A21;
90 y(q,1,1,e) = cw * A22;
91 y(q,1,2,e) = cw * A23;
92 y(q,2,0,e) = cw * A31;
93 y(q,2,1,e) = cw * A32;
94 y(q,2,2,e) = cw * A33;
99static void PADivergenceSetup(
const int dim,
104 const Array<real_t> &W,
109 if (
dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADivergenceSetup"); }
112 PADivergenceSetup2D(Q1D, NE, W, J, COEFF, op);
116 PADivergenceSetup3D(Q1D, NE, W, J, COEFF, op);
125 "PA Only supports Ordering::byNODES!");
132 const int dims = trial_fe.
GetDim();
133 const int dimsToStore = dims * dims;
136 ne = trial_fes.
GetNE();
139 trial_dofs1D = trial_maps->
ndof;
140 quad1D = trial_maps->
nqpt;
142 test_dofs1D = test_maps->
ndof;
143 MFEM_ASSERT(quad1D == test_maps->
nqpt,
144 "PA requires test and trial space to have same number of quadrature points!");
150 MFEM_VERIFY(cQ != NULL,
"only ConstantCoefficient is supported!");
153 PADivergenceSetup(
dim, trial_dofs1D, test_dofs1D, quad1D,
158template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
159static void PADivergenceApply2D(
const int NE,
166 const int tr_d1d = 0,
167 const int te_d1d = 0,
170 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
171 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
172 const int Q1D = T_Q1D ? T_Q1D : q1d;
176 auto B =
Reshape(
b.Read(), Q1D, TR_D1D);
180 auto x =
Reshape(x_.
Read(), TR_D1D, TR_D1D, 2, NE);
184 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
185 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
186 const int Q1D = T_Q1D ? T_Q1D : q1d;
189 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
190 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
192 real_t grad[max_Q1D][max_Q1D][VDIM];
193 real_t div[max_Q1D][max_Q1D];
194 for (
int qy = 0; qy < Q1D; ++qy)
196 for (
int qx = 0; qx < Q1D; ++qx)
202 for (
int c = 0; c < VDIM; ++c)
204 for (
int qy = 0; qy < Q1D; ++qy)
206 for (
int qx = 0; qx < Q1D; ++qx)
208 grad[qy][qx][0] = 0.0;
209 grad[qy][qx][1] = 0.0;
212 for (
int dy = 0; dy < TR_D1D; ++dy)
214 real_t gradX[max_Q1D][VDIM];
215 for (
int qx = 0; qx < Q1D; ++qx)
220 for (
int dx = 0; dx < TR_D1D; ++dx)
222 const real_t s = x(dx,dy,c,e);
223 for (
int qx = 0; qx < Q1D; ++qx)
225 gradX[qx][0] += s * G(qx,dx);
226 gradX[qx][1] += s * B(qx,dx);
229 for (
int qy = 0; qy < Q1D; ++qy)
231 const real_t wy = B(qy,dy);
232 const real_t wDy = G(qy,dy);
233 for (
int qx = 0; qx < Q1D; ++qx)
235 grad[qy][qx][0] += gradX[qx][0] * wy;
236 grad[qy][qx][1] += gradX[qx][1] * wDy;
241 for (
int qy = 0; qy < Q1D; ++qy)
243 for (
int qx = 0; qx < Q1D; ++qx)
245 const int q = qx + qy * Q1D;
246 const real_t gradX = grad[qy][qx][0];
247 const real_t gradY = grad[qy][qx][1];
249 div[qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e);
254 for (
int qy = 0; qy < Q1D; ++qy)
257 for (
int dx = 0; dx < TE_D1D; ++dx)
260 for (
int qx = 0; qx < Q1D; ++qx)
262 opX[dx] += Bt(dx,qx)*div[qy][qx];
265 for (
int dy = 0; dy < TE_D1D; ++dy)
267 for (
int dx = 0; dx < TE_D1D; ++dx)
269 y(dx,dy,e) += Bt(dy,qy)*opX[dx];
278template<
const int T_TR_D1D = 0,
const int T_TE_D1D = 0,
const int T_Q1D = 0,
280static void SmemPADivergenceApply2D(
const int NE,
281 const Array<real_t> &b_,
282 const Array<real_t> &g_,
283 const Array<real_t> &bt_,
287 const int tr_d1d = 0,
288 const int te_d1d = 0,
292 MFEM_ASSERT(
false,
"SHARED MEM NOT PROGRAMMED YET");
296template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
297static void PADivergenceApplyTranspose2D(
const int NE,
298 const Array<real_t> &bt,
299 const Array<real_t> >,
300 const Array<real_t> &
b,
304 const int tr_d1d = 0,
305 const int te_d1d = 0,
308 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
309 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
310 const int Q1D = T_Q1D ? T_Q1D : q1d;
314 auto Bt =
Reshape(bt.Read(), TR_D1D, Q1D);
315 auto Gt =
Reshape(gt.Read(), TR_D1D, Q1D);
316 auto B =
Reshape(
b.Read(), Q1D, TE_D1D);
317 auto op =
Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
318 auto x =
Reshape(x_.Read(), TE_D1D, TE_D1D, NE);
319 auto y =
Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, 2, NE);
322 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
323 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
324 const int Q1D = T_Q1D ? T_Q1D : q1d;
327 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
328 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
330 real_t quadTest[max_Q1D][max_Q1D];
331 real_t grad[max_Q1D][max_Q1D][VDIM];
332 for (
int qy = 0; qy < Q1D; ++qy)
334 for (
int qx = 0; qx < Q1D; ++qx)
336 quadTest[qy][qx] = 0.0;
339 for (
int dy = 0; dy < TE_D1D; ++dy)
341 real_t quadTestX[max_Q1D];
342 for (
int qx = 0; qx < Q1D; ++qx)
346 for (
int dx = 0; dx < TE_D1D; ++dx)
348 const real_t s = x(dx,dy,e);
349 for (
int qx = 0; qx < Q1D; ++qx)
351 quadTestX[qx] += s * B(qx,dx);
354 for (
int qy = 0; qy < Q1D; ++qy)
356 const real_t wy = B(qy,dy);
357 for (
int qx = 0; qx < Q1D; ++qx)
359 quadTest[qy][qx] += quadTestX[qx] * wy;
364 for (
int c = 0; c < VDIM; ++c)
366 for (
int qy = 0; qy < Q1D; ++qy)
368 for (
int qx = 0; qx < Q1D; ++qx)
370 const int q = qx + qy * Q1D;
371 grad[qy][qx][0] = quadTest[qy][qx]*op(q,0,c,e);
372 grad[qy][qx][1] = quadTest[qy][qx]*op(q,1,c,e);
376 for (
int qy = 0; qy < Q1D; ++qy)
378 real_t gradX[max_TR_D1D][VDIM];
379 for (
int dx = 0; dx < TR_D1D; ++dx)
384 for (
int qx = 0; qx < Q1D; ++qx)
386 const real_t gX = grad[qy][qx][0];
387 const real_t gY = grad[qy][qx][1];
388 for (
int dx = 0; dx < TR_D1D; ++dx)
390 const real_t wx = Bt(dx,qx);
391 const real_t wDx = Gt(dx,qx);
392 gradX[dx][0] += gX * wDx;
393 gradX[dx][1] += gY * wx;
396 for (
int dy = 0; dy < TR_D1D; ++dy)
398 const real_t wy = Bt(dy,qy);
399 const real_t wDy = Gt(dy,qy);
400 for (
int dx = 0; dx < TR_D1D; ++dx)
402 y(dx,dy,c,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
412template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
413static void PADivergenceApply3D(
const int NE,
414 const Array<real_t> &
b,
415 const Array<real_t> &g,
416 const Array<real_t> &bt,
424 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
425 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
426 const int Q1D = T_Q1D ? T_Q1D : q1d;
430 auto B =
Reshape(
b.Read(), Q1D, TR_D1D);
431 auto G =
Reshape(g.Read(), Q1D, TR_D1D);
432 auto Bt =
Reshape(bt.Read(), TE_D1D, Q1D);
433 auto op =
Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
434 auto x =
Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
435 auto y =
Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
438 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
439 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
440 const int Q1D = T_Q1D ? T_Q1D : q1d;
443 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
444 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
446 real_t grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
447 real_t div[max_Q1D][max_Q1D][max_Q1D];
448 for (
int qz = 0; qz < Q1D; ++qz)
450 for (
int qy = 0; qy < Q1D; ++qy)
452 for (
int qx = 0; qx < Q1D; ++qx)
454 div[qz][qy][qx] = 0.0;
459 for (
int c = 0; c < VDIM; ++c)
461 for (
int qz = 0; qz < Q1D; ++qz)
463 for (
int qy = 0; qy < Q1D; ++qy)
465 for (
int qx = 0; qx < Q1D; ++qx)
467 grad[qz][qy][qx][0] = 0.0;
468 grad[qz][qy][qx][1] = 0.0;
469 grad[qz][qy][qx][2] = 0.0;
473 for (
int dz = 0; dz < TR_D1D; ++dz)
475 real_t gradXY[max_Q1D][max_Q1D][VDIM];
476 for (
int qy = 0; qy < Q1D; ++qy)
478 for (
int qx = 0; qx < Q1D; ++qx)
480 gradXY[qy][qx][0] = 0.0;
481 gradXY[qy][qx][1] = 0.0;
482 gradXY[qy][qx][2] = 0.0;
485 for (
int dy = 0; dy < TR_D1D; ++dy)
487 real_t gradX[max_Q1D][VDIM];
488 for (
int qx = 0; qx < Q1D; ++qx)
494 for (
int dx = 0; dx < TR_D1D; ++dx)
496 const real_t s = x(dx,dy,dz,c,e);
497 for (
int qx = 0; qx < Q1D; ++qx)
499 gradX[qx][0] += s * G(qx,dx);
500 gradX[qx][1] += s * B(qx,dx);
501 gradX[qx][2] += s * B(qx,dx);
504 for (
int qy = 0; qy < Q1D; ++qy)
506 const real_t wy = B(qy,dy);
507 const real_t wDy = G(qy,dy);
508 for (
int qx = 0; qx < Q1D; ++qx)
510 gradXY[qy][qx][0] += gradX[qx][0] * wy;
511 gradXY[qy][qx][1] += gradX[qx][1] * wDy;
512 gradXY[qy][qx][2] += gradX[qx][2] * wy;
516 for (
int qz = 0; qz < Q1D; ++qz)
518 const real_t wz = B(qz,dz);
519 const real_t wDz = G(qz,dz);
520 for (
int qy = 0; qy < Q1D; ++qy)
522 for (
int qx = 0; qx < Q1D; ++qx)
524 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
525 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
526 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
532 for (
int qz = 0; qz < Q1D; ++qz)
534 for (
int qy = 0; qy < Q1D; ++qy)
536 for (
int qx = 0; qx < Q1D; ++qx)
538 const int q = qx + (qy + qz * Q1D) * Q1D;
539 const real_t gradX = grad[qz][qy][qx][0];
540 const real_t gradY = grad[qz][qy][qx][1];
541 const real_t gradZ = grad[qz][qy][qx][2];
543 div[qz][qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e) + gradZ*op(q,2,c,e);
550 for (
int qz = 0; qz < Q1D; ++qz)
552 real_t opXY[max_TE_D1D][max_TE_D1D];
553 for (
int dy = 0; dy < TE_D1D; ++dy)
555 for (
int dx = 0; dx < TE_D1D; ++dx)
560 for (
int qy = 0; qy < Q1D; ++qy)
563 for (
int dx = 0; dx < TE_D1D; ++dx)
566 for (
int qx = 0; qx < Q1D; ++qx)
568 opX[dx] += Bt(dx,qx)*div[qz][qy][qx];
571 for (
int dy = 0; dy < TE_D1D; ++dy)
573 for (
int dx = 0; dx < TE_D1D; ++dx)
575 opXY[dy][dx] += Bt(dy,qy)*opX[dx];
579 for (
int dz = 0; dz < TE_D1D; ++dz)
581 for (
int dy = 0; dy < TE_D1D; ++dy)
583 for (
int dx = 0; dx < TE_D1D; ++dx)
585 y(dx,dy,dz,e) += Bt(dz,qz)*opXY[dy][dx];
595template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
596static void PADivergenceApplyTranspose3D(
const int NE,
597 const Array<real_t> &bt,
598 const Array<real_t> >,
599 const Array<real_t> &
b,
607 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
608 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
609 const int Q1D = T_Q1D ? T_Q1D : q1d;
613 auto Bt =
Reshape(bt.Read(), TR_D1D, Q1D);
614 auto Gt =
Reshape(gt.Read(), TR_D1D, Q1D);
615 auto B =
Reshape(
b.Read(), Q1D, TE_D1D);
616 auto op =
Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
617 auto x =
Reshape(x_.Read(), TE_D1D, TE_D1D, TE_D1D, NE);
618 auto y =
Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
621 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
622 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
623 const int Q1D = T_Q1D ? T_Q1D : q1d;
626 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
627 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
629 real_t quadTest[max_Q1D][max_Q1D][max_Q1D];
630 real_t grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
631 for (
int qz = 0; qz < Q1D; ++qz)
633 for (
int qy = 0; qy < Q1D; ++qy)
635 for (
int qx = 0; qx < Q1D; ++qx)
637 quadTest[qz][qy][qx] = 0.0;
641 for (
int dz = 0; dz < TE_D1D; ++dz)
643 real_t quadTestXY[max_Q1D][max_Q1D];
644 for (
int qy = 0; qy < Q1D; ++qy)
646 for (
int qx = 0; qx < Q1D; ++qx)
648 quadTestXY[qy][qx] = 0.0;
651 for (
int dy = 0; dy < TE_D1D; ++dy)
653 real_t quadTestX[max_Q1D];
654 for (
int qx = 0; qx < Q1D; ++qx)
658 for (
int dx = 0; dx < TE_D1D; ++dx)
660 const real_t s = x(dx,dy,dz,e);
661 for (
int qx = 0; qx < Q1D; ++qx)
663 quadTestX[qx] += s * B(qx,dx);
666 for (
int qy = 0; qy < Q1D; ++qy)
668 const real_t wy = B(qy,dy);
669 for (
int qx = 0; qx < Q1D; ++qx)
671 quadTestXY[qy][qx] += quadTestX[qx] * wy;
675 for (
int qz = 0; qz < Q1D; ++qz)
677 const real_t wz = B(qz,dz);
678 for (
int qy = 0; qy < Q1D; ++qy)
680 for (
int qx = 0; qx < Q1D; ++qx)
682 quadTest[qz][qy][qx] += quadTestXY[qy][qx] * wz;
688 for (
int c = 0; c < VDIM; ++c)
690 for (
int qz = 0; qz < Q1D; ++qz)
692 for (
int qy = 0; qy < Q1D; ++qy)
694 for (
int qx = 0; qx < Q1D; ++qx)
696 const int q = qx + (qy + qz * Q1D) * Q1D;
697 grad[qz][qy][qx][0] = quadTest[qz][qy][qx]*op(q,0,c,e);
698 grad[qz][qy][qx][1] = quadTest[qz][qy][qx]*op(q,1,c,e);
699 grad[qz][qy][qx][2] = quadTest[qz][qy][qx]*op(q,2,c,e);
704 for (
int qz = 0; qz < Q1D; ++qz)
706 real_t gradXY[max_TR_D1D][max_TR_D1D][VDIM];
707 for (
int dy = 0; dy < TR_D1D; ++dy)
709 for (
int dx = 0; dx < TR_D1D; ++dx)
711 gradXY[dy][dx][0] = 0.0;
712 gradXY[dy][dx][1] = 0.0;
713 gradXY[dy][dx][2] = 0.0;
716 for (
int qy = 0; qy < Q1D; ++qy)
718 real_t gradX[max_TR_D1D][VDIM];
719 for (
int dx = 0; dx < TR_D1D; ++dx)
725 for (
int qx = 0; qx < Q1D; ++qx)
727 const real_t gX = grad[qz][qy][qx][0];
728 const real_t gY = grad[qz][qy][qx][1];
729 const real_t gZ = grad[qz][qy][qx][2];
730 for (
int dx = 0; dx < TR_D1D; ++dx)
732 const real_t wx = Bt(dx,qx);
733 const real_t wDx = Gt(dx,qx);
734 gradX[dx][0] += gX * wDx;
735 gradX[dx][1] += gY * wx;
736 gradX[dx][2] += gZ * wx;
739 for (
int dy = 0; dy < TR_D1D; ++dy)
741 const real_t wy = Bt(dy,qy);
742 const real_t wDy = Gt(dy,qy);
743 for (
int dx = 0; dx < TR_D1D; ++dx)
745 gradXY[dy][dx][0] += gradX[dx][0] * wy;
746 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
747 gradXY[dy][dx][2] += gradX[dx][2] * wy;
751 for (
int dz = 0; dz < TR_D1D; ++dz)
753 const real_t wz = Bt(dz,qz);
754 const real_t wDz = Gt(dz,qz);
755 for (
int dy = 0; dy < TR_D1D; ++dy)
757 for (
int dx = 0; dx < TR_D1D; ++dx)
760 ((gradXY[dy][dx][0] * wz) +
761 (gradXY[dy][dx][1] * wz) +
762 (gradXY[dy][dx][2] * wDz));
773template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
774static void SmemPADivergenceApply3D(
const int NE,
775 const Array<real_t> &b_,
776 const Array<real_t> &g_,
777 const Array<real_t> &bt_,
781 const int tr_d1d = 0,
782 const int te_d1d = 0,
785 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
786 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
787 const int Q1D = T_Q1D ? T_Q1D : q1d;
793 auto b =
Reshape(b_.Read(), Q1D, TR_D1D);
794 auto g =
Reshape(g_.Read(), Q1D, TR_D1D);
795 auto bt =
Reshape(bt_.Read(), TE_D1D, Q1D);
796 auto Q =
Reshape(q_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
797 auto x =
Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
798 auto y =
Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
802 constexpr int VDIM = 3;
803 const int tidz = MFEM_THREAD_ID(z);
804 const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
805 const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
806 const int Q1D = T_Q1D ? T_Q1D : q1d;
807 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
808 constexpr int MD1R = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
809 constexpr int MD1E = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
810 constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
811 constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
812 MFEM_SHARED
real_t sBG[2][MQ1*MD1];
816 MFEM_SHARED
real_t sm0[3][MDQ*MDQ*MDQ];
817 MFEM_SHARED
real_t sm1[3][MDQ*MDQ*MDQ];
819 real_t (*DDQ0)[MD1][MQ1] = (
real_t (*)[MD1][MQ1]) (sm0+0);
820 real_t (*DDQ1)[MD1][MQ1] = (
real_t (*)[MD1][MQ1]) (sm0+1);
821 real_t (*DQQ0)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm1+0);
822 real_t (*DQQ1)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm1+1);
823 real_t (*DQQ2)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm1+2);
824 real_t (*QQQ0)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm0+0);
825 real_t (*QQQ1)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm0+1);
826 real_t (*QQQ2)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm0+2);
827 real_t (*QQD0)[MQ1][MD1] = (
real_t (*)[MQ1][MD1]) (sm1+0);
828 real_t (*QDD0)[MD1][MD1] = (
real_t (*)[MD1][MD1]) (sm0+0);
829 MFEM_SHARED
real_t div[MQ1][MQ1][MQ1];
833 MFEM_FOREACH_THREAD(d,y,D1DR)
835 MFEM_FOREACH_THREAD(q,x,Q1D)
843 MFEM_FOREACH_THREAD(qz,z,Q1D)
845 MFEM_FOREACH_THREAD(qy,y,Q1D)
847 MFEM_FOREACH_THREAD(qx,x,Q1D)
849 div[qz][qy][qx] = 0.0;
855 for (
int c = 0; c < VDIM; ++c)
857 MFEM_FOREACH_THREAD(qz,z,Q1D)
859 MFEM_FOREACH_THREAD(qy,y,Q1D)
861 MFEM_FOREACH_THREAD(qx,x,Q1D)
863 QQQ0[qz][qy][qx] = 0.0;
864 QQQ1[qz][qy][qx] = 0.0;
865 QQQ2[qz][qy][qx] = 0.0;
870 MFEM_FOREACH_THREAD(dz,z,D1DR)
872 MFEM_FOREACH_THREAD(dy,y,D1DR)
874 MFEM_FOREACH_THREAD(dx,x,D1DR)
876 X[dz][dy][dx] = x(dx,dy,dz,c,e);
881 MFEM_FOREACH_THREAD(dz,z,D1DR)
883 MFEM_FOREACH_THREAD(dy,y,D1DR)
885 MFEM_FOREACH_THREAD(qx,x,Q1D)
889 for (
int dx = 0; dx < D1DR; ++dx)
891 const real_t coord = X[dz][dy][dx];
892 u += coord * B[qx][dx];
893 v += coord * G[qx][dx];
895 DDQ0[dz][dy][qx] =
u;
896 DDQ1[dz][dy][qx] = v;
901 MFEM_FOREACH_THREAD(dz,z,D1DR)
903 MFEM_FOREACH_THREAD(qy,y,Q1D)
905 MFEM_FOREACH_THREAD(qx,x,Q1D)
910 for (
int dy = 0; dy < D1DR; ++dy)
912 u += DDQ1[dz][dy][qx] * B[qy][dy];
913 v += DDQ0[dz][dy][qx] * G[qy][dy];
914 w += DDQ0[dz][dy][qx] * B[qy][dy];
916 DQQ0[dz][qy][qx] =
u;
917 DQQ1[dz][qy][qx] = v;
918 DQQ2[dz][qy][qx] = w;
923 MFEM_FOREACH_THREAD(qz,z,Q1D)
925 MFEM_FOREACH_THREAD(qy,y,Q1D)
927 MFEM_FOREACH_THREAD(qx,x,Q1D)
932 for (
int dz = 0; dz < D1DR; ++dz)
934 u += DQQ0[dz][qy][qx] * B[qz][dz];
935 v += DQQ1[dz][qy][qx] * B[qz][dz];
936 w += DQQ2[dz][qy][qx] * G[qz][dz];
938 QQQ0[qz][qy][qx] =
u;
939 QQQ1[qz][qy][qx] = v;
940 QQQ2[qz][qy][qx] = w;
945 MFEM_FOREACH_THREAD(qz,z,Q1D)
947 MFEM_FOREACH_THREAD(qy,y,Q1D)
949 MFEM_FOREACH_THREAD(qx,x,Q1D)
951 const int q = qx + (qy + qz * Q1D) * Q1D;
952 const real_t gX = QQQ0[qz][qy][qx];
953 const real_t gY = QQQ1[qz][qy][qx];
954 const real_t gZ = QQQ2[qz][qy][qx];
955 div[qz][qy][qx] += gX*Q(q,0,c,e) + gY*Q(q,1,c,e) + gZ*Q(q,2,c,e);
964 MFEM_FOREACH_THREAD(d,y,D1DE)
966 MFEM_FOREACH_THREAD(q,x,Q1D)
974 MFEM_FOREACH_THREAD(qz,z,Q1D)
976 MFEM_FOREACH_THREAD(qy,y,Q1D)
978 MFEM_FOREACH_THREAD(dx,x,D1DE)
981 for (
int qx = 0; qx < Q1D; ++qx)
983 u += div[qz][qy][qx] * Bt[dx][qx];
985 QQD0[qz][qy][dx] =
u;
990 MFEM_FOREACH_THREAD(qz,z,Q1D)
992 MFEM_FOREACH_THREAD(dy,y,D1DE)
994 MFEM_FOREACH_THREAD(dx,x,D1DE)
997 for (
int qy = 0; qy < Q1D; ++qy)
999 u += QQD0[qz][qy][dx] * Bt[dy][qy];
1001 QDD0[qz][dy][dx] =
u;
1006 MFEM_FOREACH_THREAD(dz,z,D1DE)
1008 MFEM_FOREACH_THREAD(dy,y,D1DE)
1010 MFEM_FOREACH_THREAD(dx,x,D1DE)
1013 for (
int qz = 0; qz < Q1D; ++qz)
1015 u += QDD0[qz][dy][dx] * Bt[dz][qz];
1024static void PADivergenceApply(
const int dim,
1029 const Array<real_t> &B,
1030 const Array<real_t> &G,
1031 const Array<real_t> &Bt,
1035 bool transpose=
false)
1041 return PADivergenceApplyTranspose2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1045 return PADivergenceApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1052 return PADivergenceApplyTranspose3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1056 return PADivergenceApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1059 MFEM_ABORT(
"Unknown kernel.");
1065 PADivergenceApply(
dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1066 trial_maps->
B, trial_maps->
G, test_maps->
Bt, pa_data, x, y,
1074 PADivergenceApply(
dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1075 trial_maps->
Bt, trial_maps->
Gt, test_maps->
B, pa_data, x, y,
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
A coefficient that is constant across space and time.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Array< real_t > B
Basis functions evaluated at quadrature points.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Array< real_t > Gt
Transpose of G.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Array< real_t > Bt
Transpose of B.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Ordering::Type GetOrdering() const
Return the ordering method.
int GetNE() const
Returns number of elements in the mesh.
Mesh * GetMesh() const
Returns the mesh.
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Abstract class for all finite elements.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
int GetDim() const
Returns the reference space dimension for the finite element.
Vector J
Jacobians of the element transformations at all quadrature points.
Class for an integration rule - an Array of IntegrationPoint.
int GetNPoints() const
Returns the number of the points in the integration rule.
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
const IntegrationRule * IntRule
int Dimension() const
Dimension of the reference space used within the elements.
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
void AssemblePA(const FiniteElementSpace &trial_fes, const FiniteElementSpace &test_fes) override
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
static const IntegrationRule & GetRule(const FiniteElement &trial_fe, const FiniteElement &test_fe, const ElementTransformation &Trans)
void AddMultPA(const Vector &x, Vector &y) const override
Method for partially assembled action.
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void SetSize(int s)
Resize the vector to size s.
void trans(const Vector &u, Vector &x)
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.