73static void PAGradientSetup2D(
const int Q1D,
76 const Array<real_t> &w,
83 const int NQ = Q1D*Q1D;
85 auto J =
Reshape(j.Read(), NQ, 2, 2, NE);
86 auto DETJ =
Reshape(detj.Read(), NQ, NE);
87 auto y =
Reshape(op.Write(), NQ, 2, 2, NE);
89 const bool const_c = c.Size() == 1;
90 const auto C = const_c ?
Reshape(c.Read(), 1,1) :
95 for (
int q = 0; q < NQ; ++q)
97 const real_t J11 = J(q,0,0,e);
98 const real_t J12 = J(q,0,1,e);
99 const real_t J21 = J(q,1,0,e);
100 const real_t J22 = J(q,1,1,e);
102 const real_t Co = const_c ? C(0,0) : C(q,e);
103 const real_t cw = W[q] * Co * (by_val ? 1.0 : 1.0/DETJ(q,e));
105 y(q,0,0,e) = cw * J22;
106 y(q,0,1,e) = cw * -J12;
107 y(q,1,0,e) = cw * -J21;
108 y(q,1,1,e) = cw * J11;
114static void PAGradientSetup3D(
const int Q1D,
117 const Array<real_t> &w,
124 const int NQ = Q1D*Q1D*Q1D;
126 auto J =
Reshape(j.Read(), NQ, 3, 3, NE);
127 auto DETJ =
Reshape(detj.Read(), NQ, NE);
128 auto y =
Reshape(op.Write(), NQ, 3, 3, NE);
130 const bool const_c = c.Size() == 1;
131 const auto C = const_c ?
Reshape(c.Read(), 1,1) :
136 for (
int q = 0; q < NQ; ++q)
138 const real_t J11 = J(q,0,0,e);
139 const real_t J21 = J(q,1,0,e);
140 const real_t J31 = J(q,2,0,e);
141 const real_t J12 = J(q,0,1,e);
142 const real_t J22 = J(q,1,1,e);
143 const real_t J32 = J(q,2,1,e);
144 const real_t J13 = J(q,0,2,e);
145 const real_t J23 = J(q,1,2,e);
146 const real_t J33 = J(q,2,2,e);
148 const real_t A11 = (J22 * J33) - (J23 * J32);
149 const real_t A12 = (J32 * J13) - (J12 * J33);
150 const real_t A13 = (J12 * J23) - (J22 * J13);
151 const real_t A21 = (J31 * J23) - (J21 * J33);
152 const real_t A22 = (J11 * J33) - (J13 * J31);
153 const real_t A23 = (J21 * J13) - (J11 * J23);
154 const real_t A31 = (J21 * J32) - (J31 * J22);
155 const real_t A32 = (J31 * J12) - (J11 * J32);
156 const real_t A33 = (J11 * J22) - (J12 * J21);
158 const real_t Co = const_c ? C(0,0) : C(q,e);
159 const real_t cw = W[q] * Co * (by_val ? 1.0 : 1.0/DETJ(q,e));
161 y(q,0,0,e) = cw * A11;
162 y(q,0,1,e) = cw * A12;
163 y(q,0,2,e) = cw * A13;
164 y(q,1,0,e) = cw * A21;
165 y(q,1,1,e) = cw * A22;
166 y(q,1,2,e) = cw * A23;
167 y(q,2,0,e) = cw * A31;
168 y(q,2,1,e) = cw * A32;
169 y(q,2,2,e) = cw * A33;
174static void PAGradientSetup(
const int dim,
180 const Array<real_t> &W,
186 if (
dim == 1) { MFEM_ABORT(
"dim==1 not supported in PAGradientSetup"); }
189 PAGradientSetup2D(Q1D, NE, MAP_TYPE, W, J, DET_J, COEFF, op);
193 PAGradientSetup3D(Q1D, NE, MAP_TYPE, W, J, DET_J, COEFF, op);
202 "PA Only supports Ordering::byNODES!");
210 const int dims = trial_fe.
GetDim();
211 const int dimsToStore = dims * dims;
214 ne = trial_fes.
GetNE();
218 trial_dofs1D = trial_maps->
ndof;
219 quad1D = trial_maps->
nqpt;
221 test_dofs1D = test_maps->
ndof;
222 MFEM_ASSERT(quad1D == test_maps->
nqpt,
223 "PA requires test and trial space to have same number of quadrature points!");
229 PAGradientSetup(
dim, trial_dofs1D, test_dofs1D, quad1D, ne,
235template<
int T_TR_D1D = 0,
int T_TE_D1D = 0,
int T_Q1D = 0>
236static void PAGradientApply2D(
const int NE,
243 const int tr_d1d = 0,
244 const int te_d1d = 0,
247 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
248 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
249 const int Q1D = T_Q1D ? T_Q1D : q1d;
253 auto B =
Reshape(
b.Read(), Q1D, TR_D1D);
261 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
262 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
263 const int Q1D = T_Q1D ? T_Q1D : q1d;
266 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
267 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
269 real_t grad[max_Q1D][max_Q1D][VDIM];
270 for (
int qy = 0; qy < Q1D; ++qy)
272 for (
int qx = 0; qx < Q1D; ++qx)
274 grad[qy][qx][0] = 0.0;
275 grad[qy][qx][1] = 0.0;
278 for (
int dy = 0; dy < TR_D1D; ++dy)
280 real_t gradX[max_Q1D][VDIM];
281 for (
int qx = 0; qx < Q1D; ++qx)
286 for (
int dx = 0; dx < TR_D1D; ++dx)
288 const real_t s = x(dx,dy,e);
289 for (
int qx = 0; qx < Q1D; ++qx)
291 gradX[qx][0] += s * G(qx,dx);
292 gradX[qx][1] += s * B(qx,dx);
295 for (
int qy = 0; qy < Q1D; ++qy)
297 const real_t wy = B(qy,dy);
298 const real_t wDy = G(qy,dy);
299 for (
int qx = 0; qx < Q1D; ++qx)
301 grad[qy][qx][0] += gradX[qx][0] * wy;
302 grad[qy][qx][1] += gradX[qx][1] * wDy;
307 for (
int qy = 0; qy < Q1D; ++qy)
309 for (
int qx = 0; qx < Q1D; ++qx)
311 const int q = qx + qy * Q1D;
312 const real_t gradX = grad[qy][qx][0];
313 const real_t gradY = grad[qy][qx][1];
315 grad[qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e);
316 grad[qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e);
320 for (
int qy = 0; qy < Q1D; ++qy)
322 real_t opX[max_TE_D1D][VDIM];
323 for (
int dx = 0; dx < TE_D1D; ++dx)
327 for (
int qx = 0; qx < Q1D; ++qx)
329 opX[dx][0] += Bt(dx,qx)*grad[qy][qx][0];
330 opX[dx][1] += Bt(dx,qx)*grad[qy][qx][1];
333 for (
int dy = 0; dy < TE_D1D; ++dy)
335 const real_t wy = Bt(dy,qy);
336 for (
int dx = 0; dx < TE_D1D; ++dx)
338 y(dx,dy,0,e) += wy*opX[dx][0];
339 y(dx,dy,1,e) += wy*opX[dx][1];
348template<
int T_TR_D1D = 0,
int T_TE_D1D = 0,
int T_Q1D = 0>
349static void PAGradientApplyTranspose2D(
const int NE,
350 const Array<real_t> &bt,
351 const Array<real_t> >,
352 const Array<real_t> &
b,
356 const int tr_d1d = 0,
357 const int te_d1d = 0,
360 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
361 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
362 const int Q1D = T_Q1D ? T_Q1D : q1d;
366 auto Bt =
Reshape(bt.Read(), TR_D1D, Q1D);
367 auto Gt =
Reshape(gt.Read(), TR_D1D, Q1D);
368 auto B =
Reshape(
b.Read(), Q1D, TE_D1D);
369 auto op =
Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
370 auto x =
Reshape(x_.Read(), TE_D1D, TE_D1D, 2, NE);
371 auto y =
Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, NE);
374 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
375 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
376 const int Q1D = T_Q1D ? T_Q1D : q1d;
379 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
380 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
383 real_t Bxy[max_Q1D][max_Q1D][VDIM];
384 for (
int qy = 0; qy < Q1D; ++qy)
386 for (
int qx = 0; qx < Q1D; ++qx)
388 Bxy[qy][qx][0] = 0.0;
389 Bxy[qy][qx][1] = 0.0;
392 for (
int dy = 0; dy < TE_D1D; ++dy)
395 for (
int qx = 0; qx < Q1D; ++qx)
400 for (
int dx = 0; dx < TE_D1D; ++dx)
402 const real_t s0 = x(dx,dy,0,e);
403 const real_t s1 = x(dx,dy,1,e);
404 for (
int qx = 0; qx < Q1D; ++qx)
406 Bx[qx][0] += s0 * B(qx,dx);
407 Bx[qx][1] += s1 * B(qx,dx);
410 for (
int qy = 0; qy < Q1D; ++qy)
412 const real_t wy = B(qy,dy);
413 for (
int qx = 0; qx < Q1D; ++qx)
415 Bxy[qy][qx][0] += Bx[qx][0] * wy;
416 Bxy[qy][qx][1] += Bx[qx][1] * wy;
421 for (
int qy = 0; qy < Q1D; ++qy)
423 for (
int qx = 0; qx < Q1D; ++qx)
425 const int q = qx + qy * Q1D;
426 const real_t Bxy0 = Bxy[qy][qx][0];
427 const real_t Bxy1 = Bxy[qy][qx][1];
429 Bxy[qy][qx][0] = Bxy0*op(q,0,0,e) + Bxy1*op(q,0,1,e);
430 Bxy[qy][qx][1] = Bxy0*op(q,1,0,e) + Bxy1*op(q,1,1,e);
434 for (
int qy = 0; qy < Q1D; ++qy)
436 real_t Btx[max_TR_D1D][VDIM];
437 for (
int dx = 0; dx < TR_D1D; ++dx)
441 for (
int qx = 0; qx < Q1D; ++qx)
443 Btx[dx][0] += Gt(dx,qx)*Bxy[qy][qx][0];
444 Btx[dx][1] += Bt(dx,qx)*Bxy[qy][qx][1];
447 for (
int dy = 0; dy < TR_D1D; ++dy)
449 const real_t wy = Bt(dy,qy);
450 const real_t wDy = Gt(dy,qy);
451 for (
int dx = 0; dx < TR_D1D; ++dx)
453 y(dx,dy,e) += wy*Btx[dx][0] + wDy*Btx[dx][1];
461template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
462static void PAGradientApply3D(
const int NE,
463 const Array<real_t> &
b,
464 const Array<real_t> &g,
465 const Array<real_t> &bt,
473 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
474 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
475 const int Q1D = T_Q1D ? T_Q1D : q1d;
479 auto B =
Reshape(
b.Read(), Q1D, TR_D1D);
480 auto G =
Reshape(g.Read(), Q1D, TR_D1D);
481 auto Bt =
Reshape(bt.Read(), TE_D1D, Q1D);
482 auto op =
Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
483 auto x =
Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
484 auto y =
Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
487 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
488 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
489 const int Q1D = T_Q1D ? T_Q1D : q1d;
492 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
493 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
495 real_t grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
496 for (
int qz = 0; qz < Q1D; ++qz)
498 for (
int qy = 0; qy < Q1D; ++qy)
500 for (
int qx = 0; qx < Q1D; ++qx)
502 grad[qz][qy][qx][0] = 0.0;
503 grad[qz][qy][qx][1] = 0.0;
504 grad[qz][qy][qx][2] = 0.0;
508 for (
int dz = 0; dz < TR_D1D; ++dz)
510 real_t gradXY[max_Q1D][max_Q1D][3];
511 for (
int qy = 0; qy < Q1D; ++qy)
513 for (
int qx = 0; qx < Q1D; ++qx)
515 gradXY[qy][qx][0] = 0.0;
516 gradXY[qy][qx][1] = 0.0;
517 gradXY[qy][qx][2] = 0.0;
520 for (
int dy = 0; dy < TR_D1D; ++dy)
523 for (
int qx = 0; qx < Q1D; ++qx)
528 for (
int dx = 0; dx < TR_D1D; ++dx)
530 const real_t s = x(dx,dy,dz,e);
531 for (
int qx = 0; qx < Q1D; ++qx)
533 gradX[qx][0] += s * B(qx,dx);
534 gradX[qx][1] += s * G(qx,dx);
537 for (
int qy = 0; qy < Q1D; ++qy)
539 const real_t wy = B(qy,dy);
540 const real_t wDy = G(qy,dy);
541 for (
int qx = 0; qx < Q1D; ++qx)
543 const real_t wx = gradX[qx][0];
544 const real_t wDx = gradX[qx][1];
545 gradXY[qy][qx][0] += wDx * wy;
546 gradXY[qy][qx][1] += wx * wDy;
547 gradXY[qy][qx][2] += wx * wy;
551 for (
int qz = 0; qz < Q1D; ++qz)
553 const real_t wz = B(qz,dz);
554 const real_t wDz = G(qz,dz);
555 for (
int qy = 0; qy < Q1D; ++qy)
557 for (
int qx = 0; qx < Q1D; ++qx)
559 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
560 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
561 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
567 for (
int qz = 0; qz < Q1D; ++qz)
569 for (
int qy = 0; qy < Q1D; ++qy)
571 for (
int qx = 0; qx < Q1D; ++qx)
573 const int q = qx + (qy + qz * Q1D) * Q1D;
574 const real_t gradX = grad[qz][qy][qx][0];
575 const real_t gradY = grad[qz][qy][qx][1];
576 const real_t gradZ = grad[qz][qy][qx][2];
578 grad[qz][qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e) + gradZ*op(q,2,0,e);
579 grad[qz][qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e) + gradZ*op(q,2,1,e);
580 grad[qz][qy][qx][2] = gradX*op(q,0,2,e) + gradY*op(q,1,2,e) + gradZ*op(q,2,2,e);
585 for (
int qz = 0; qz < Q1D; ++qz)
587 real_t opXY[max_TE_D1D][max_TE_D1D][VDIM];
588 for (
int dy = 0; dy < TE_D1D; ++dy)
590 for (
int dx = 0; dx < TE_D1D; ++dx)
592 opXY[dy][dx][0] = 0.0;
593 opXY[dy][dx][1] = 0.0;
594 opXY[dy][dx][2] = 0.0;
597 for (
int qy = 0; qy < Q1D; ++qy)
599 real_t opX[max_TE_D1D][VDIM];
600 for (
int dx = 0; dx < TE_D1D; ++dx)
605 for (
int qx = 0; qx < Q1D; ++qx)
607 opX[dx][0] += Bt(dx,qx)*grad[qz][qy][qx][0];
608 opX[dx][1] += Bt(dx,qx)*grad[qz][qy][qx][1];
609 opX[dx][2] += Bt(dx,qx)*grad[qz][qy][qx][2];
612 for (
int dy = 0; dy < TE_D1D; ++dy)
614 for (
int dx = 0; dx < TE_D1D; ++dx)
616 opXY[dy][dx][0] += Bt(dy,qy)*opX[dx][0];
617 opXY[dy][dx][1] += Bt(dy,qy)*opX[dx][1];
618 opXY[dy][dx][2] += Bt(dy,qy)*opX[dx][2];
622 for (
int dz = 0; dz < TE_D1D; ++dz)
624 for (
int dy = 0; dy < TE_D1D; ++dy)
626 for (
int dx = 0; dx < TE_D1D; ++dx)
628 y(dx,dy,dz,0,e) += Bt(dz,qz)*opXY[dy][dx][0];
629 y(dx,dy,dz,1,e) += Bt(dz,qz)*opXY[dy][dx][1];
630 y(dx,dy,dz,2,e) += Bt(dz,qz)*opXY[dy][dx][2];
640template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
641static void PAGradientApplyTranspose3D(
const int NE,
642 const Array<real_t> &bt,
643 const Array<real_t> >,
644 const Array<real_t> &
b,
652 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
653 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
654 const int Q1D = T_Q1D ? T_Q1D : q1d;
658 auto Bt =
Reshape(bt.Read(), TR_D1D, Q1D);
659 auto Gt =
Reshape(gt.Read(), TR_D1D, Q1D);
660 auto B =
Reshape(
b.Read(), Q1D, TE_D1D);
661 auto op =
Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
662 auto x =
Reshape(x_.Read(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
663 auto y =
Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, TR_D1D, NE);
666 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
667 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
668 const int Q1D = T_Q1D ? T_Q1D : q1d;
671 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
672 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
675 real_t Bxyz[max_Q1D][max_Q1D][max_Q1D][VDIM];
676 for (
int qz = 0; qz < Q1D; ++qz)
678 for (
int qy = 0; qy < Q1D; ++qy)
680 for (
int qx = 0; qx < Q1D; ++qx)
682 Bxyz[qz][qy][qx][0] = 0.0;
683 Bxyz[qz][qy][qx][1] = 0.0;
684 Bxyz[qz][qy][qx][2] = 0.0;
688 for (
int dz = 0; dz < TE_D1D; ++dz)
690 real_t Bxy[max_Q1D][max_Q1D][3];
691 for (
int qy = 0; qy < Q1D; ++qy)
693 for (
int qx = 0; qx < Q1D; ++qx)
695 Bxy[qy][qx][0] = 0.0;
696 Bxy[qy][qx][1] = 0.0;
697 Bxy[qy][qx][2] = 0.0;
700 for (
int dy = 0; dy < TE_D1D; ++dy)
703 for (
int qx = 0; qx < Q1D; ++qx)
709 for (
int dx = 0; dx < TE_D1D; ++dx)
711 const real_t s0 = x(dx,dy,dz,0,e);
712 const real_t s1 = x(dx,dy,dz,1,e);
713 const real_t s2 = x(dx,dy,dz,2,e);
714 for (
int qx = 0; qx < Q1D; ++qx)
716 Bx[qx][0] += s0 * B(qx,dx);
717 Bx[qx][1] += s1 * B(qx,dx);
718 Bx[qx][2] += s2 * B(qx,dx);
721 for (
int qy = 0; qy < Q1D; ++qy)
723 const real_t wy = B(qy,dy);
724 for (
int qx = 0; qx < Q1D; ++qx)
726 Bxy[qy][qx][0] += Bx[qx][0] * wy;
727 Bxy[qy][qx][1] += Bx[qx][1] * wy;
728 Bxy[qy][qx][2] += Bx[qx][2] * wy;
732 for (
int qz = 0; qz < Q1D; ++qz)
734 const real_t wz = B(qz,dz);
735 for (
int qy = 0; qy < Q1D; ++qy)
737 for (
int qx = 0; qx < Q1D; ++qx)
739 Bxyz[qz][qy][qx][0] += Bxy[qy][qx][0] * wz;
740 Bxyz[qz][qy][qx][1] += Bxy[qy][qx][1] * wz;
741 Bxyz[qz][qy][qx][2] += Bxy[qy][qx][2] * wz;
747 for (
int qz = 0; qz < Q1D; ++qz)
749 for (
int qy = 0; qy < Q1D; ++qy)
751 for (
int qx = 0; qx < Q1D; ++qx)
753 const int q = qx + (qy + qz * Q1D) * Q1D;
754 const real_t Bxyz0 = Bxyz[qz][qy][qx][0];
755 const real_t Bxyz1 = Bxyz[qz][qy][qx][1];
756 const real_t Bxyz2 = Bxyz[qz][qy][qx][2];
758 Bxyz[qz][qy][qx][0] = Bxyz0*op(q,0,0,e) + Bxyz1*op(q,0,1,e) + Bxyz2*op(q,0,2,e);
759 Bxyz[qz][qy][qx][1] = Bxyz0*op(q,1,0,e) + Bxyz1*op(q,1,1,e) + Bxyz2*op(q,1,2,e);
760 Bxyz[qz][qy][qx][2] = Bxyz0*op(q,2,0,e) + Bxyz1*op(q,2,1,e) + Bxyz2*op(q,2,2,e);
765 for (
int qz = 0; qz < Q1D; ++qz)
767 real_t Btxy[max_TR_D1D][max_TR_D1D][VDIM];
768 for (
int dy = 0; dy < TR_D1D; ++dy)
770 for (
int dx = 0; dx < TR_D1D; ++dx)
772 Btxy[dy][dx][0] = 0.0;
773 Btxy[dy][dx][1] = 0.0;
774 Btxy[dy][dx][2] = 0.0;
777 for (
int qy = 0; qy < Q1D; ++qy)
779 real_t Btx[max_TR_D1D][VDIM];
780 for (
int dx = 0; dx < TR_D1D; ++dx)
785 for (
int qx = 0; qx < Q1D; ++qx)
787 Btx[dx][0] += Gt(dx,qx)*Bxyz[qz][qy][qx][0];
788 Btx[dx][1] += Bt(dx,qx)*Bxyz[qz][qy][qx][1];
789 Btx[dx][2] += Bt(dx,qx)*Bxyz[qz][qy][qx][2];
792 for (
int dy = 0; dy < TR_D1D; ++dy)
794 const real_t wy = Bt(dy,qy);
795 const real_t wDy = Gt(dy,qy);
796 for (
int dx = 0; dx < TR_D1D; ++dx)
798 Btxy[dy][dx][0] += wy *Btx[dx][0];
799 Btxy[dy][dx][1] += wDy*Btx[dx][1];
800 Btxy[dy][dx][2] += wy *Btx[dx][2];
804 for (
int dz = 0; dz < TR_D1D; ++dz)
806 const real_t wz = Bt(dz,qz);
807 const real_t wDz = Gt(dz,qz);
808 for (
int dy = 0; dy < TR_D1D; ++dy)
810 for (
int dx = 0; dx < TR_D1D; ++dx)
812 y(dx,dy,dz,e) += wz *Btxy[dy][dx][0] +
813 wz *Btxy[dy][dx][1] +
823template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
824static void SmemPAGradientApply3D(
const int NE,
825 const Array<real_t> &b_,
826 const Array<real_t> &g_,
827 const Array<real_t> &bt_,
831 const int tr_d1d = 0,
832 const int te_d1d = 0,
835 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
836 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
837 const int Q1D = T_Q1D ? T_Q1D : q1d;
841 MFEM_VERIFY(TR_D1D <= Q1D,
"");
842 MFEM_VERIFY(TE_D1D <= Q1D,
"");
845 auto b =
Reshape(b_.Read(), Q1D, TR_D1D);
846 auto g =
Reshape(g_.Read(), Q1D, TR_D1D);
847 auto bt =
Reshape(bt_.Read(), TE_D1D, Q1D);
848 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, 3, 3, NE);
849 auto x =
Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
850 auto y =
Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
853 [=] MFEM_HOST_DEVICE (int e)
855 const int tidz = MFEM_THREAD_ID(z);
856 const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
857 const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
858 const int Q1D = T_Q1D ? T_Q1D : q1d;
859 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
860 constexpr int MD1R = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
861 constexpr int MD1E = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
862 constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
863 constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
864 MFEM_SHARED
real_t sBG[2][MQ1*MD1];
868 MFEM_SHARED
real_t sm0[3][MDQ*MDQ*MDQ];
869 MFEM_SHARED
real_t sm1[3][MDQ*MDQ*MDQ];
871 real_t (*DDQ0)[MD1][MQ1] = (
real_t (*)[MD1][MQ1]) (sm0+0);
872 real_t (*DDQ1)[MD1][MQ1] = (
real_t (*)[MD1][MQ1]) (sm0+1);
874 real_t (*DQQ0)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm1+0);
875 real_t (*DQQ1)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm1+1);
876 real_t (*DQQ2)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm1+2);
878 real_t (*QQQ0)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm0+0);
879 real_t (*QQQ1)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm0+1);
880 real_t (*QQQ2)[MQ1][MQ1] = (
real_t (*)[MQ1][MQ1]) (sm0+2);
882 real_t (*QQD0)[MQ1][MD1] = (
real_t (*)[MQ1][MD1]) (sm1+0);
883 real_t (*QQD1)[MQ1][MD1] = (
real_t (*)[MQ1][MD1]) (sm1+1);
884 real_t (*QQD2)[MQ1][MD1] = (
real_t (*)[MQ1][MD1]) (sm1+2);
886 real_t (*QDD0)[MD1][MD1] = (
real_t (*)[MD1][MD1]) (sm0+0);
887 real_t (*QDD1)[MD1][MD1] = (
real_t (*)[MD1][MD1]) (sm0+1);
888 real_t (*QDD2)[MD1][MD1] = (
real_t (*)[MD1][MD1]) (sm0+2);
889 MFEM_FOREACH_THREAD(dz,z,D1DR)
891 MFEM_FOREACH_THREAD(dy,y,D1DR)
893 MFEM_FOREACH_THREAD(dx,x,D1DR)
895 X[dz][dy][dx] = x(dx,dy,dz,e);
901 MFEM_FOREACH_THREAD(d,y,D1DR)
903 MFEM_FOREACH_THREAD(q,x,Q1D)
911 MFEM_FOREACH_THREAD(dz,z,D1DR)
913 MFEM_FOREACH_THREAD(dy,y,D1DR)
915 MFEM_FOREACH_THREAD(qx,x,Q1D)
919 for (
int dx = 0; dx < D1DR; ++dx)
921 const real_t coord = X[dz][dy][dx];
922 u += coord * B[qx][dx];
923 v += coord * G[qx][dx];
925 DDQ0[dz][dy][qx] =
u;
926 DDQ1[dz][dy][qx] = v;
931 MFEM_FOREACH_THREAD(dz,z,D1DR)
933 MFEM_FOREACH_THREAD(qy,y,Q1D)
935 MFEM_FOREACH_THREAD(qx,x,Q1D)
940 for (
int dy = 0; dy < D1DR; ++dy)
942 u += DDQ1[dz][dy][qx] * B[qy][dy];
943 v += DDQ0[dz][dy][qx] * G[qy][dy];
944 w += DDQ0[dz][dy][qx] * B[qy][dy];
946 DQQ0[dz][qy][qx] =
u;
947 DQQ1[dz][qy][qx] = v;
948 DQQ2[dz][qy][qx] = w;
953 MFEM_FOREACH_THREAD(qz,z,Q1D)
955 MFEM_FOREACH_THREAD(qy,y,Q1D)
957 MFEM_FOREACH_THREAD(qx,x,Q1D)
962 for (
int dz = 0; dz < D1DR; ++dz)
964 u += DQQ0[dz][qy][qx] * B[qz][dz];
965 v += DQQ1[dz][qy][qx] * B[qz][dz];
966 w += DQQ2[dz][qy][qx] * G[qz][dz];
968 QQQ0[qz][qy][qx] =
u;
969 QQQ1[qz][qy][qx] = v;
970 QQQ2[qz][qy][qx] = w;
975 MFEM_FOREACH_THREAD(qz,z,Q1D)
977 MFEM_FOREACH_THREAD(qy,y,Q1D)
979 MFEM_FOREACH_THREAD(qx,x,Q1D)
981 const int q = qx + (qy + qz * Q1D) * Q1D;
982 const real_t gX = QQQ0[qz][qy][qx];
983 const real_t gY = QQQ1[qz][qy][qx];
984 const real_t gZ = QQQ2[qz][qy][qx];
985 QQQ0[qz][qy][qx] = (D(q,0,0,e)*gX) + (D(q,1,0,e)*gY) + (D(q,2,0,e)*gZ);
986 QQQ1[qz][qy][qx] = (D(q,0,1,e)*gX) + (D(q,1,1,e)*gY) + (D(q,2,1,e)*gZ);
987 QQQ2[qz][qy][qx] = (D(q,0,2,e)*gX) + (D(q,1,2,e)*gY) + (D(q,2,2,e)*gZ);
994 MFEM_FOREACH_THREAD(d,y,D1DE)
996 MFEM_FOREACH_THREAD(q,x,Q1D)
1003 MFEM_FOREACH_THREAD(qz,z,Q1D)
1005 MFEM_FOREACH_THREAD(qy,y,Q1D)
1007 MFEM_FOREACH_THREAD(dx,x,D1DE)
1012 for (
int qx = 0; qx < Q1D; ++qx)
1014 u += QQQ0[qz][qy][qx] * Bt[dx][qx];
1015 v += QQQ1[qz][qy][qx] * Bt[dx][qx];
1016 w += QQQ2[qz][qy][qx] * Bt[dx][qx];
1018 QQD0[qz][qy][dx] =
u;
1019 QQD1[qz][qy][dx] = v;
1020 QQD2[qz][qy][dx] = w;
1025 MFEM_FOREACH_THREAD(qz,z,Q1D)
1027 MFEM_FOREACH_THREAD(dy,y,D1DE)
1029 MFEM_FOREACH_THREAD(dx,x,D1DE)
1034 for (
int qy = 0; qy < Q1D; ++qy)
1036 u += QQD0[qz][qy][dx] * Bt[dy][qy];
1037 v += QQD1[qz][qy][dx] * Bt[dy][qy];
1038 w += QQD2[qz][qy][dx] * Bt[dy][qy];
1040 QDD0[qz][dy][dx] =
u;
1041 QDD1[qz][dy][dx] = v;
1042 QDD2[qz][dy][dx] = w;
1047 MFEM_FOREACH_THREAD(dz,z,D1DE)
1049 MFEM_FOREACH_THREAD(dy,y,D1DE)
1051 MFEM_FOREACH_THREAD(dx,x,D1DE)
1056 for (
int qz = 0; qz < Q1D; ++qz)
1058 u += QDD0[qz][dy][dx] * Bt[dz][qz];
1059 v += QDD1[qz][dy][dx] * Bt[dz][qz];
1060 w += QDD2[qz][dy][dx] * Bt[dz][qz];
1062 y(dx,dy,dz,0,e) +=
u;
1063 y(dx,dy,dz,1,e) += v;
1064 y(dx,dy,dz,2,e) += w;
1071static void PAGradientApply(
const int dim,
1076 const Array<real_t> &B,
1077 const Array<real_t> &G,
1078 const Array<real_t> &Bt,
1085 return PAGradientApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1089 return PAGradientApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1091 MFEM_ABORT(
"Unknown kernel.");
1094static void PAGradientApplyTranspose(
const int dim,
1099 const Array<real_t> &Bt,
1100 const Array<real_t> &Gt,
1101 const Array<real_t> &B,
1108 return PAGradientApplyTranspose2D(NE,Bt,Gt,B,op,x,y,TR_D1D,TE_D1D,Q1D);
1112 return PAGradientApplyTranspose3D(NE,Bt,Gt,B,op,x,y,TR_D1D,TE_D1D,Q1D);
1114 MFEM_ABORT(
"Unknown kernel.");
1120 PAGradientApply(
dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1121 trial_maps->
B, trial_maps->
G, test_maps->
Bt, pa_data,
1128 PAGradientApplyTranspose(
dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1129 trial_maps->
Bt, trial_maps->
Gt, test_maps->
B, pa_data,
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Class to represent a coefficient evaluated at quadrature points.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Array< real_t > B
Basis functions evaluated at quadrature points.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Array< real_t > Gt
Transpose of G.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Array< real_t > Bt
Transpose of B.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Ordering::Type GetOrdering() const
Return the ordering method.
int GetNE() const
Returns number of elements in the mesh.
Mesh * GetMesh() const
Returns the mesh.
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Abstract class for all finite elements.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
int GetDim() const
Returns the reference space dimension for the finite element.
int GetMapType() const
Returns the FiniteElement::MapType of the element describing how reference functions are mapped to ph...
Vector detJ
Determinants of the Jacobians at all quadrature points.
Vector J
Jacobians of the element transformations at all quadrature points.
static const IntegrationRule & GetRule(const FiniteElement &trial_fe, const FiniteElement &test_fe, const ElementTransformation &Trans)
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
void AddMultPA(const Vector &x, Vector &y) const override
Method for partially assembled action.
void AssemblePA(const FiniteElementSpace &trial_fes, const FiniteElementSpace &test_fes) override
Class for an integration rule - an Array of IntegrationPoint.
int GetNPoints() const
Returns the number of the points in the integration rule.
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
const IntegrationRule * IntRule
int Dimension() const
Dimension of the reference space used within the elements.
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Class representing the storage layout of a QuadratureFunction.
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void SetSize(int s)
Resize the vector to size s.
void trans(const Vector &u, Vector &x)
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
@ COMPRESSED
Enable all above compressions.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.