12 #include "../general/forall.hpp"
35 const bool symmetric = (coeffDim != 4);
36 const int NQ = Q1D*Q1D;
45 for (
int q = 0; q < NQ; ++q)
47 const double J11 = J(q,0,0,e);
48 const double J21 = J(q,1,0,e);
49 const double J12 = J(q,0,1,e);
50 const double J22 = J(q,1,1,e);
51 const double c_detJ = W[q] / ((J11*J22)-(J21*J12));
54 if (coeffDim == 3 || coeffDim == 4)
56 const double C11 = C(0,q,e);
57 const double C12 = C(1,q,e);
58 const double C21 = symmetric ? C12 : C(2,q,e);
59 const double C22 = symmetric ? C(2,q,e) : C(3,q,e);
60 const double R11 = C11*J11 + C12*J21;
61 const double R21 = C21*J11 + C22*J21;
62 const double R12 = C11*J12 + C12*J22;
63 const double R22 = C21*J12 + C22*J22;
65 y(q,0,e) = c_detJ * (J11*R11 + J21*R21);
66 y(q,1,e) = c_detJ * (J11*R12 + J21*R22);
70 y(q,2,e) = c_detJ * (J12*R12 + J22*R22);
74 y(q,2,e) = c_detJ * (J12*R11 + J22*R21);
75 y(q,3,e) = c_detJ * (J12*R12 + J22*R22);
80 const double C1 = C(0,q,e);
81 const double C2 = (coeffDim == 2 ? C(1,q,e) : C1);
82 y(q,0,e) = c_detJ * (J11*C1*J11 + J21*C2*J21);
83 y(q,1,e) = c_detJ * (J11*C1*J12 + J21*C2*J22);
84 y(q,2,e) = c_detJ * (J12*C1*J12 + J22*C2*J22);
99 const bool symmetric = (coeffDim != 9);
100 const int NQ = Q1D*Q1D*Q1D;
104 auto y =
Reshape(op.
Write(), NQ, symmetric ? 6 : 9, NE);
108 for (
int q = 0; q < NQ; ++q)
110 const double J11 = J(q,0,0,e);
111 const double J21 = J(q,1,0,e);
112 const double J31 = J(q,2,0,e);
113 const double J12 = J(q,0,1,e);
114 const double J22 = J(q,1,1,e);
115 const double J32 = J(q,2,1,e);
116 const double J13 = J(q,0,2,e);
117 const double J23 = J(q,1,2,e);
118 const double J33 = J(q,2,2,e);
119 const double detJ = J11 * (J22 * J33 - J32 * J23) -
120 J21 * (J12 * J33 - J32 * J13) +
121 J31 * (J12 * J23 - J22 * J13);
122 const double c_detJ = W[q] / detJ;
125 if (coeffDim == 6 || coeffDim == 9)
128 M[0][0] = C(0, q, e);
129 M[0][1] = C(1, q, e);
130 M[0][2] = C(2, q, e);
131 M[1][0] = (!symmetric) ? C(3, q, e) : M[0][1];
132 M[1][1] = (!symmetric) ? C(4, q, e) : C(3, q, e);
133 M[1][2] = (!symmetric) ? C(5, q, e) : C(4, q, e);
134 M[2][0] = (!symmetric) ? C(6, q, e) : M[0][2];
135 M[2][1] = (!symmetric) ? C(7, q, e) : M[1][2];
136 M[2][2] = (!symmetric) ? C(8, q, e) : C(5, q, e);
139 for (
int i=0; i<3; ++i)
140 for (
int j = (symmetric ? i : 0); j<3; ++j)
143 for (
int k=0; k<3; ++k)
146 for (
int l=0; l<3; ++l)
148 MJ_kj += M[k][l] * J(q,l,j,e);
151 y(q,idx,e) += J(q,k,i,e) * MJ_kj;
154 y(q,idx,e) *= c_detJ;
161 for (
int i=0; i<3; ++i)
162 for (
int j=i; j<3; ++j)
165 for (
int k=0; k<3; ++k)
167 y(q,idx,e) += J(q,k,i,e) * C(coeffDim == 3 ? k : 0, q, e) * J(q,k,j,e);
170 y(q,idx,e) *= c_detJ;
181 const bool symmetric,
190 constexpr
static int VDIM = 2;
198 auto op =
Reshape(op_.
Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
206 for (
int qy = 0; qy < Q1D; ++qy)
208 for (
int qx = 0; qx < Q1D; ++qx)
210 for (
int c = 0; c < VDIM; ++c)
212 mass[qy][qx][c] = 0.0;
219 for (
int c = 0; c < VDIM; ++c)
221 const int D1Dx = (c == 1) ? D1D - 1 : D1D;
222 const int D1Dy = (c == 0) ? D1D - 1 : D1D;
224 for (
int dy = 0; dy < D1Dy; ++dy)
227 for (
int qx = 0; qx < Q1D; ++qx)
232 for (
int dx = 0; dx < D1Dx; ++dx)
234 const double t = x(dx + (dy * D1Dx) + osc, e);
235 for (
int qx = 0; qx < Q1D; ++qx)
237 massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
241 for (
int qy = 0; qy < Q1D; ++qy)
243 const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
244 for (
int qx = 0; qx < Q1D; ++qx)
246 mass[qy][qx][c] += massX[qx] * wy;
255 for (
int qy = 0; qy < Q1D; ++qy)
257 for (
int qx = 0; qx < Q1D; ++qx)
259 const double O11 = op(qx,qy,0,e);
260 const double O12 = op(qx,qy,1,e);
261 const double O21 = symmetric ? O12 : op(qx,qy,2,e);
262 const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
263 const double massX = mass[qy][qx][0];
264 const double massY = mass[qy][qx][1];
265 mass[qy][qx][0] = (O11*massX)+(O12*massY);
266 mass[qy][qx][1] = (O21*massX)+(O22*massY);
270 for (
int qy = 0; qy < Q1D; ++qy)
274 for (
int c = 0; c < VDIM; ++c)
276 const int D1Dx = (c == 1) ? D1D - 1 : D1D;
277 const int D1Dy = (c == 0) ? D1D - 1 : D1D;
280 for (
int dx = 0; dx < D1Dx; ++dx)
284 for (
int qx = 0; qx < Q1D; ++qx)
286 for (
int dx = 0; dx < D1Dx; ++dx)
288 massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bct(dx,qx) :
293 for (
int dy = 0; dy < D1Dy; ++dy)
295 const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
297 for (
int dx = 0; dx < D1Dx; ++dx)
299 y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
309 template<
int T_D1D = 0,
int T_Q1D = 0>
311 const bool symmetric,
322 MFEM_CONTRACT_VAR(Bot_);
323 MFEM_CONTRACT_VAR(Bct_);
325 static constexpr
int VDIM = 2;
327 const int D1D = T_D1D ? T_D1D : d1d;
328 const int Q1D = T_Q1D ? T_Q1D : q1d;
332 const auto D =
Reshape(op_.
Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
333 const auto x =
Reshape(x_.
Read(), D1D*(D1D-1), VDIM, NE);
336 MFEM_FORALL_3D(e, NE, Q1D, Q1D, VDIM,
338 const int tidz = MFEM_THREAD_ID(z);
340 const int D1D = T_D1D ? T_D1D : d1d;
341 const int Q1D = T_Q1D ? T_Q1D : q1d;
345 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
347 MFEM_SHARED
double smo[MQ1*(MD1-1)];
350 MFEM_SHARED
double smc[MQ1*MD1];
353 MFEM_SHARED
double sm0[VDIM*MDQ*MDQ];
354 MFEM_SHARED
double sm1[VDIM*MDQ*MDQ];
360 MFEM_FOREACH_THREAD(vd,z,VDIM)
362 MFEM_FOREACH_THREAD(dy,y,D1D)
364 MFEM_FOREACH_THREAD(qx,x,Q1D)
366 if (qx < D1D && dy < (D1D-1)) { X(qx + dy*D1D,vd) = x(qx+dy*D1D,vd,e); }
369 if (dy < (D1D-1)) { Bo(dy,qx) = bo(qx,dy); }
370 Bc(dy,qx) = bc(qx,dy);
377 MFEM_FOREACH_THREAD(vd,z,VDIM)
379 const int nx = (vd == 0) ? D1D : D1D-1;
380 const int ny = (vd == 1) ? D1D : D1D-1;
383 MFEM_FOREACH_THREAD(dy,y,ny)
385 MFEM_FOREACH_THREAD(qx,x,Q1D)
388 for (
int dx = 0; dx < nx; ++dx)
390 dq += Xxy(dx,dy,vd) * Bx(dx,qx);
397 MFEM_FOREACH_THREAD(vd,z,VDIM)
399 const int ny = (vd == 1) ? D1D : D1D-1;
401 MFEM_FOREACH_THREAD(qy,y,Q1D)
403 MFEM_FOREACH_THREAD(qx,x,Q1D)
406 for (
int dy = 0; dy < ny; ++dy)
408 qq += QD(qx,dy,vd) * By(dy,qy);
418 MFEM_FOREACH_THREAD(qy,y,Q1D)
420 MFEM_FOREACH_THREAD(qx,x,Q1D)
422 const double Qx = QQ(qx,qy,0);
423 const double Qy = QQ(qx,qy,1);
425 const double D11 = D(qx,qy,0,e);
426 const double D12 = D(qx,qy,1,e);
427 const double D21 = symmetric ? D12 : D(qx,qy,2,e);
428 const double D22 = symmetric ? D(qx,qy,2,e) : D(qx,qy,3,e);
430 QQ(qx,qy,0) = D11*Qx + D12*Qy;
431 QQ(qx,qy,1) = D21*Qx + D22*Qy;
437 MFEM_FOREACH_THREAD(vd,z,VDIM)
439 const int nx = (vd == 0) ? D1D : D1D-1;
441 MFEM_FOREACH_THREAD(qy,y,Q1D)
443 MFEM_FOREACH_THREAD(dx,x,nx)
446 for (
int qx = 0; qx < Q1D; ++qx)
448 qd += QQ(qx,qy,vd) * Btx(dx,qx);
455 MFEM_FOREACH_THREAD(vd,z,VDIM)
457 const int nx = (vd == 0) ? D1D : D1D-1;
458 const int ny = (vd == 1) ? D1D : D1D-1;
461 MFEM_FOREACH_THREAD(dy,y,ny)
463 MFEM_FOREACH_THREAD(dx,x,nx)
466 for (
int qy = 0; qy < Q1D; ++qy)
468 dd += QD(dx,qy,vd) * Bty(dy,qy);
470 Yxy(dx,dy,vd,e) += dd;
481 const bool symmetric,
487 constexpr
static int VDIM = 2;
492 auto op =
Reshape(op_.
Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
499 for (
int c = 0; c < VDIM; ++c)
501 const int D1Dx = (c == 1) ? D1D - 1 : D1D;
502 const int D1Dy = (c == 0) ? D1D - 1 : D1D;
504 for (
int dy = 0; dy < D1Dy; ++dy)
507 for (
int qx = 0; qx < Q1D; ++qx)
510 for (
int qy = 0; qy < Q1D; ++qy)
512 const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
513 mass[qx] += wy*wy*((c == 0) ? op(qx,qy,0,e) : op(qx,qy,symmetric ? 2 : 3,e));
517 for (
int dx = 0; dx < D1Dx; ++dx)
520 for (
int qx = 0; qx < Q1D; ++qx)
522 const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx);
523 val += mass[qx] * wx * wx;
525 diag(dx + (dy * D1Dx) + osc, e) += val;
537 const bool symmetric,
543 MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D >
HDIV_MAX_D1D");
544 MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D >
HDIV_MAX_Q1D");
545 constexpr static int VDIM = 3;
547 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
548 auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
549 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
550 auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
556 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
558 const int D1Dz = (c == 2) ? D1D : D1D - 1;
559 const int D1Dy = (c == 1) ? D1D : D1D - 1;
560 const int D1Dx = (c == 0) ? D1D : D1D - 1;
562 const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
563 (symmetric ? 5 : 8));
565 double mass[HDIV_MAX_Q1D];
567 for (int dz = 0; dz < D1Dz; ++dz)
569 for (int dy = 0; dy < D1Dy; ++dy)
571 for (int qx = 0; qx < Q1D; ++qx)
574 for (int qy = 0; qy < Q1D; ++qy)
576 const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
577 for (int qz = 0; qz < Q1D; ++qz)
579 const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
580 mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
585 for (int dx = 0; dx < D1Dx; ++dx)
588 for (int qx = 0; qx < Q1D; ++qx)
590 const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx);
591 val += mass[qx] * wx * wx;
593 diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val;
598 osc += D1Dx * D1Dy * D1Dz;
600 }); // end of element loop
603 void PAHdivMassApply3D(const int D1D,
606 const bool symmetric,
607 const Array<double> &Bo_,
608 const Array<double> &Bc_,
609 const Array<double> &Bot_,
610 const Array<double> &Bct_,
615 MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D >
HDIV_MAX_D1D");
616 MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D >
HDIV_MAX_Q1D");
617 constexpr static int VDIM = 3;
619 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
620 auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
621 auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
622 auto Bct = Reshape(Bct_.Read(), D1D, Q1D);
623 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
624 auto x = Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
625 auto y = Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
629 double mass[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D][VDIM];
631 for (int qz = 0; qz < Q1D; ++qz)
633 for (int qy = 0; qy < Q1D; ++qy)
635 for (int qx = 0; qx < Q1D; ++qx)
637 for (int c = 0; c < VDIM; ++c)
639 mass[qz][qy][qx][c] = 0.0;
647 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
649 const int D1Dz = (c == 2) ? D1D : D1D - 1;
650 const int D1Dy = (c == 1) ? D1D : D1D - 1;
651 const int D1Dx = (c == 0) ? D1D : D1D - 1;
653 for (int dz = 0; dz < D1Dz; ++dz)
655 double massXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
656 for (int qy = 0; qy < Q1D; ++qy)
658 for (int qx = 0; qx < Q1D; ++qx)
660 massXY[qy][qx] = 0.0;
664 for (int dy = 0; dy < D1Dy; ++dy)
666 double massX[HDIV_MAX_Q1D];
667 for (int qx = 0; qx < Q1D; ++qx)
672 for (int dx = 0; dx < D1Dx; ++dx)
674 const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
675 for (int qx = 0; qx < Q1D; ++qx)
677 massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
681 for (int qy = 0; qy < Q1D; ++qy)
683 const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
684 for (int qx = 0; qx < Q1D; ++qx)
686 const double wx = massX[qx];
687 massXY[qy][qx] += wx * wy;
692 for (int qz = 0; qz < Q1D; ++qz)
694 const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
695 for (int qy = 0; qy < Q1D; ++qy)
697 for (int qx = 0; qx < Q1D; ++qx)
699 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
705 osc += D1Dx * D1Dy * D1Dz;
706 } // loop (c) over components
709 for (int qz = 0; qz < Q1D; ++qz)
711 for (int qy = 0; qy < Q1D; ++qy)
713 for (int qx = 0; qx < Q1D; ++qx)
715 const double O11 = op(qx,qy,qz,0,e);
716 const double O12 = op(qx,qy,qz,1,e);
717 const double O13 = op(qx,qy,qz,2,e);
718 const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
719 const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
720 const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
721 const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
722 const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
723 const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
725 const double massX = mass[qz][qy][qx][0];
726 const double massY = mass[qz][qy][qx][1];
727 const double massZ = mass[qz][qy][qx][2];
728 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
729 mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
730 mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
735 for (int qz = 0; qz < Q1D; ++qz)
737 double massXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
741 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
743 const int D1Dz = (c == 2) ? D1D : D1D - 1;
744 const int D1Dy = (c == 1) ? D1D : D1D - 1;
745 const int D1Dx = (c == 0) ? D1D : D1D - 1;
747 for (int dy = 0; dy < D1Dy; ++dy)
749 for (int dx = 0; dx < D1Dx; ++dx)
754 for (int qy = 0; qy < Q1D; ++qy)
756 double massX[HDIV_MAX_D1D];
757 for (int dx = 0; dx < D1Dx; ++dx)
761 for (int qx = 0; qx < Q1D; ++qx)
763 for (int dx = 0; dx < D1Dx; ++dx)
765 massX[dx] += mass[qz][qy][qx][c] *
766 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
769 for (int dy = 0; dy < D1Dy; ++dy)
771 const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
772 for (int dx = 0; dx < D1Dx; ++dx)
774 massXY[dy][dx] += massX[dx] * wy;
779 for (int dz = 0; dz < D1Dz; ++dz)
781 const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
782 for (int dy = 0; dy < D1Dy; ++dy)
784 for (int dx = 0; dx < D1Dx; ++dx)
786 y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
792 osc += D1Dx * D1Dy * D1Dz;
795 }); // end of element loop
798 template<int T_D1D = 0, int T_Q1D = 0>
799 void SmemPAHdivMassApply3D(const int NE,
800 const bool symmetric,
801 const Array<double> &Bo_,
802 const Array<double> &Bc_,
803 const Array<double> &Bot_,
804 const Array<double> &Bct_,
811 MFEM_CONTRACT_VAR(Bot_);
812 MFEM_CONTRACT_VAR(Bct_);
814 static constexpr int VDIM = 3;
816 const int D1D = T_D1D ? T_D1D : d1d;
817 const int Q1D = T_Q1D ? T_Q1D : q1d;
819 const auto bo = Reshape(Bo_.Read(), Q1D, D1D-1);
820 const auto bc = Reshape(Bc_.Read(), Q1D, D1D);
821 const auto D = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
822 const auto x = Reshape(x_.Read(), D1D*(D1D-1)*(D1D-1), VDIM, NE);
823 auto y = y_.ReadWrite();
825 MFEM_FORALL_3D(e, NE, Q1D, Q1D, VDIM,
827 const int tidz = MFEM_THREAD_ID(z);
829 const int D1D = T_D1D ? T_D1D : d1d;
830 const int Q1D = T_Q1D ? T_Q1D : q1d;
832 constexpr int MQ1 = T_Q1D ? T_Q1D : HDIV_MAX_Q1D;
833 constexpr int MD1 = T_D1D ? T_D1D : HDIV_MAX_D1D;
834 constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
836 MFEM_SHARED double smo[MQ1*(MD1-1)];
837 DeviceMatrix Bo(smo, D1D-1, Q1D);
839 MFEM_SHARED double smc[MQ1*MD1];
840 DeviceMatrix Bc(smc, D1D, Q1D);
842 MFEM_SHARED double sm0[VDIM*MDQ*MDQ*MDQ];
843 MFEM_SHARED double sm1[VDIM*MDQ*MDQ*MDQ];
844 DeviceMatrix X(sm0, D1D*(D1D-1)*(D1D-1), VDIM);
845 DeviceTensor<4> QDD(sm1, Q1D, D1D, D1D, VDIM);
846 DeviceTensor<4> QQD(sm0, Q1D, Q1D, D1D, VDIM);
847 DeviceTensor<4> QQQ(sm1, Q1D, Q1D, Q1D, VDIM);
848 DeviceTensor<4> DQQ(sm0, D1D, Q1D, Q1D, VDIM);
849 DeviceTensor<4> DDQ(sm1, D1D, D1D, Q1D, VDIM);
851 // Load X into shared memory
852 MFEM_FOREACH_THREAD(vd,z,VDIM)
854 MFEM_FOREACH_THREAD(dz,y,D1D-1)
856 MFEM_FOREACH_THREAD(dy,x,D1D-1)
859 for (int dx = 0; dx < D1D; ++dx)
861 X(dx+(dy+dz*(D1D-1))*D1D,vd) = x(dx+(dy+dz*(D1D-1))*D1D,vd,e);
866 // Load Bo and Bc into shared memory
869 MFEM_FOREACH_THREAD(d,y,D1D-1)
871 MFEM_FOREACH_THREAD(q,x,Q1D)
876 MFEM_FOREACH_THREAD(d,y,D1D)
878 MFEM_FOREACH_THREAD(q,x,Q1D)
886 MFEM_FOREACH_THREAD(vd,z,VDIM)
888 const int nx = (vd == 0) ? D1D : D1D-1;
889 const int ny = (vd == 1) ? D1D : D1D-1;
890 const int nz = (vd == 2) ? D1D : D1D-1;
891 DeviceTensor<4> Xxyz(X, nx, ny, nz, VDIM);
892 DeviceMatrix Bx = (vd == 0) ? Bc : Bo;
893 MFEM_FOREACH_THREAD(dy,y,ny)
895 MFEM_FOREACH_THREAD(qx,x,Q1D)
899 for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
901 for (int dx = 0; dx < nx; ++dx)
904 for (int dz = 0; dz < nz; ++dz)
906 u[dz] += Xxyz(dx,dy,dz,vd) * Bx(dx,qx);
910 for (int dz = 0; dz < nz; ++dz) { QDD(qx,dy,dz,vd) = u[dz]; }
915 MFEM_FOREACH_THREAD(vd,z,VDIM)
917 const int ny = (vd == 1) ? D1D : D1D-1;
918 const int nz = (vd == 2) ? D1D : D1D-1;
919 DeviceMatrix By = (vd == 1) ? Bc : Bo;
920 MFEM_FOREACH_THREAD(qy,y,Q1D)
922 MFEM_FOREACH_THREAD(qx,x,Q1D)
926 for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
928 for (int dy = 0; dy < ny; ++dy)
931 for (int dz = 0; dz < nz; ++dz)
933 u[dz] += QDD(qx,dy,dz,vd) * By(dy,qy);
937 for (int dz = 0; dz < nz; ++dz) { QQD(qx,qy,dz,vd) = u[dz]; }
942 MFEM_FOREACH_THREAD(vd,z,VDIM)
944 const int nz = (vd == 2) ? D1D : D1D-1;
945 DeviceMatrix Bz = (vd == 2) ? Bc : Bo;
946 MFEM_FOREACH_THREAD(qy,y,Q1D)
948 MFEM_FOREACH_THREAD(qx,x,Q1D)
952 for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
954 for (int dz = 0; dz < nz; ++dz)
957 for (int qz = 0; qz < Q1D; ++qz)
959 u[qz] += QQD(qx,qy,dz,vd) * Bz(dz,qz);
963 for (int qz = 0; qz < Q1D; ++qz) { QQQ(qx,qy,qz,vd) = u[qz]; }
971 MFEM_FOREACH_THREAD(qy,y,Q1D)
973 MFEM_FOREACH_THREAD(qx,x,Q1D)
976 for (int qz = 0; qz < Q1D; ++qz)
978 const double Qx = QQQ(qx,qy,qz,0);
979 const double Qy = QQQ(qx,qy,qz,1);
980 const double Qz = QQQ(qx,qy,qz,2);
982 const double D11 = D(qx,qy,qz,0,e);
983 const double D12 = D(qx,qy,qz,1,e);
984 const double D13 = D(qx,qy,qz,2,e);
985 const double D21 = symmetric ? D12 : D(qx,qy,qz,3,e);
986 const double D22 = symmetric ? D(qx,qy,qz,3,e) : D(qx,qy,qz,4,e);
987 const double D23 = symmetric ? D(qx,qy,qz,4,e) : D(qx,qy,qz,5,e);
988 const double D31 = symmetric ? D13 : D(qx,qy,qz,6,e);
989 const double D32 = symmetric ? D23 : D(qx,qy,qz,7,e);
990 const double D33 = symmetric ? D(qx,qy,qz,5,e) : D(qx,qy,qz,8,e);
992 QQQ(qx,qy,qz,0) = D11*Qx + D12*Qy + D13*Qz;
993 QQQ(qx,qy,qz,1) = D21*Qx + D22*Qy + D23*Qz;
994 QQQ(qx,qy,qz,2) = D31*Qx + D32*Qy + D33*Qz;
1000 // Apply Bt operator
1001 MFEM_FOREACH_THREAD(vd,z,VDIM)
1003 const int nx = (vd == 0) ? D1D : D1D-1;
1004 DeviceMatrix Btx = (vd == 0) ? Bc : Bo;
1005 MFEM_FOREACH_THREAD(qy,y,Q1D)
1007 MFEM_FOREACH_THREAD(dx,x,nx)
1011 for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
1013 for (int qx = 0; qx < Q1D; ++qx)
1016 for (int qz = 0; qz < Q1D; ++qz)
1018 u[qz] += QQQ(qx,qy,qz,vd) * Btx(dx,qx);
1022 for (int qz = 0; qz < Q1D; ++qz) { DQQ(dx,qy,qz,vd) = u[qz]; }
1027 MFEM_FOREACH_THREAD(vd,z,VDIM)
1029 const int nx = (vd == 0) ? D1D : D1D-1;
1030 const int ny = (vd == 1) ? D1D : D1D-1;
1031 DeviceMatrix Bty = (vd == 1) ? Bc : Bo;
1032 MFEM_FOREACH_THREAD(dy,y,ny)
1034 MFEM_FOREACH_THREAD(dx,x,nx)
1038 for (int qz = 0; qz < Q1D; ++qz) { u[qz] = 0.0; }
1040 for (int qy = 0; qy < Q1D; ++qy)
1043 for (int qz = 0; qz < Q1D; ++qz)
1045 u[qz] += DQQ(dx,qy,qz,vd) * Bty(dy,qy);
1049 for (int qz = 0; qz < Q1D; ++qz) { DDQ(dx,dy,qz,vd) = u[qz]; }
1054 MFEM_FOREACH_THREAD(vd,z,VDIM)
1056 const int nx = (vd == 0) ? D1D : D1D-1;
1057 const int ny = (vd == 1) ? D1D : D1D-1;
1058 const int nz = (vd == 2) ? D1D : D1D-1;
1059 DeviceTensor<5> Yxyz(y, nx, ny, nz, VDIM, NE);
1060 DeviceMatrix Btz = (vd == 2) ? Bc : Bo;
1061 MFEM_FOREACH_THREAD(dy,y,ny)
1063 MFEM_FOREACH_THREAD(dx,x,nx)
1067 for (int dz = 0; dz < nz; ++dz) { u[dz] = 0.0; }
1069 for (int qz = 0; qz < Q1D; ++qz)
1072 for (int dz = 0; dz < nz; ++dz)
1074 u[dz] += DDQ(dx,dy,qz,vd) * Btz(dz,qz);
1078 for (int dz = 0; dz < nz; ++dz) { Yxyz(dx,dy,dz,vd,e) += u[dz]; }
1086 void PAHdivMassApply(const int dim,
1090 const bool symmetric,
1091 const Array<double> &Bo,
1092 const Array<double> &Bc,
1093 const Array<double> &Bot,
1094 const Array<double> &Bct,
1099 const int id = (D1D << 4) | Q1D;
1105 case 0x22: return SmemPAHdivMassApply2D<2,2>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1106 case 0x33: return SmemPAHdivMassApply2D<3,3>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1107 case 0x44: return SmemPAHdivMassApply2D<4,4>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1108 case 0x55: return SmemPAHdivMassApply2D<5,5>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1109 default: // fallback
1110 return PAHdivMassApply2D(D1D,Q1D,NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1117 case 0x23: return SmemPAHdivMassApply3D<2,3>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1118 case 0x34: return SmemPAHdivMassApply3D<3,4>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1119 case 0x45: return SmemPAHdivMassApply3D<4,5>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1120 case 0x56: return SmemPAHdivMassApply3D<5,6>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1121 case 0x67: return SmemPAHdivMassApply3D<6,7>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1122 case 0x78: return SmemPAHdivMassApply3D<7,8>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1123 default: // fallback
1124 return PAHdivMassApply3D(D1D,Q1D,NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
1129 // PA H(div) div-div assemble 2D kernel
1130 // NOTE: this is identical to PACurlCurlSetup3D
1131 static void PADivDivSetup2D(const int Q1D,
1133 const Array<double> &w,
1138 const int NQ = Q1D*Q1D;
1140 auto J = Reshape(j.Read(), NQ, 2, 2, NE);
1141 auto coeff = Reshape(coeff_.Read(), NQ, NE);
1142 auto y = Reshape(op.Write(), NQ, NE);
1145 for (int q = 0; q < NQ; ++q)
1147 const double J11 = J(q,0,0,e);
1148 const double J21 = J(q,1,0,e);
1149 const double J12 = J(q,0,1,e);
1150 const double J22 = J(q,1,1,e);
1151 const double detJ = (J11*J22)-(J21*J12);
1152 y(q,e) = W[q] * coeff(q,e) / detJ;
1157 static void PADivDivSetup3D(const int Q1D,
1159 const Array<double> &w,
1164 const int NQ = Q1D*Q1D*Q1D;
1166 auto J = Reshape(j.Read(), NQ, 3, 3, NE);
1167 auto coeff = Reshape(coeff_.Read(), NQ, NE);
1168 auto y = Reshape(op.Write(), NQ, NE);
1172 for (int q = 0; q < NQ; ++q)
1174 const double J11 = J(q,0,0,e);
1175 const double J21 = J(q,1,0,e);
1176 const double J31 = J(q,2,0,e);
1177 const double J12 = J(q,0,1,e);
1178 const double J22 = J(q,1,1,e);
1179 const double J32 = J(q,2,1,e);
1180 const double J13 = J(q,0,2,e);
1181 const double J23 = J(q,1,2,e);
1182 const double J33 = J(q,2,2,e);
1183 const double detJ = J11 * (J22 * J33 - J32 * J23) -
1184 /* */ J21 * (J12 * J33 - J32 * J13) +
1185 /* */ J31 * (J12 * J23 - J22 * J13);
1186 y(q,e) = W[q] * coeff(q, e) / detJ;
1191 static void PADivDivApply2D(const int D1D,
1194 const Array<double> &Bo_,
1195 const Array<double> &Gc_,
1196 const Array<double> &Bot_,
1197 const Array<double> &Gct_,
1202 constexpr static int VDIM = 2;
1203 constexpr static int MAX_D1D = HDIV_MAX_D1D;
1204 constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
1206 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1207 auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
1208 auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1209 auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
1210 auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
1211 auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
1212 auto y = Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
1216 double div[MAX_Q1D][MAX_Q1D];
1218 // div[qy][qx] will be computed as du_x/dx + du_y/dy
1220 for (int qy = 0; qy < Q1D; ++qy)
1222 for (int qx = 0; qx < Q1D; ++qx)
1230 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1232 const int D1Dx = (c == 1) ? D1D - 1 : D1D;
1233 const int D1Dy = (c == 0) ? D1D - 1 : D1D;
1235 for (int dy = 0; dy < D1Dy; ++dy)
1237 double gradX[MAX_Q1D];
1238 for (int qx = 0; qx < Q1D; ++qx)
1243 for (int dx = 0; dx < D1Dx; ++dx)
1245 const double t = x(dx + (dy * D1Dx) + osc, e);
1246 for (int qx = 0; qx < Q1D; ++qx)
1248 gradX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
1252 for (int qy = 0; qy < Q1D; ++qy)
1254 const double wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy);
1255 for (int qx = 0; qx < Q1D; ++qx)
1257 div[qy][qx] += gradX[qx] * wy;
1263 } // loop (c) over components
1265 // Apply D operator.
1266 for (int qy = 0; qy < Q1D; ++qy)
1268 for (int qx = 0; qx < Q1D; ++qx)
1270 div[qy][qx] *= op(qx,qy,e);
1274 for (int qy = 0; qy < Q1D; ++qy)
1278 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1280 const int D1Dx = (c == 1) ? D1D - 1 : D1D;
1281 const int D1Dy = (c == 0) ? D1D - 1 : D1D;
1283 double gradX[MAX_D1D];
1284 for (int dx = 0; dx < D1Dx; ++dx)
1288 for (int qx = 0; qx < Q1D; ++qx)
1290 for (int dx = 0; dx < D1Dx; ++dx)
1292 gradX[dx] += div[qy][qx] * (c == 0 ? Gct(dx,qx) : Bot(dx,qx));
1295 for (int dy = 0; dy < D1Dy; ++dy)
1297 const double wy = (c == 0) ? Bot(dy,qy) : Gct(dy,qy);
1298 for (int dx = 0; dx < D1Dx; ++dx)
1300 y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1307 }); // end of element loop
1310 static void PADivDivApply3D(const int D1D,
1313 const Array<double> &Bo_,
1314 const Array<double> &Gc_,
1315 const Array<double> &Bot_,
1316 const Array<double> &Gct_,
1321 MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D >
HDIV_MAX_D1D");
1322 MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D >
HDIV_MAX_Q1D");
1323 constexpr static int VDIM = 3;
1325 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1326 auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1327 auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
1328 auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
1329 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
1330 auto x = Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1331 auto y = Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1335 double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1337 for (int qz = 0; qz < Q1D; ++qz)
1339 for (int qy = 0; qy < Q1D; ++qy)
1341 for (int qx = 0; qx < Q1D; ++qx)
1343 div[qz][qy][qx] = 0.0;
1350 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1352 const int D1Dz = (c == 2) ? D1D : D1D - 1;
1353 const int D1Dy = (c == 1) ? D1D : D1D - 1;
1354 const int D1Dx = (c == 0) ? D1D : D1D - 1;
1356 for (int dz = 0; dz < D1Dz; ++dz)
1358 double aXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1359 for (int qy = 0; qy < Q1D; ++qy)
1361 for (int qx = 0; qx < Q1D; ++qx)
1367 for (int dy = 0; dy < D1Dy; ++dy)
1369 double aX[HDIV_MAX_Q1D];
1370 for (int qx = 0; qx < Q1D; ++qx)
1375 for (int dx = 0; dx < D1Dx; ++dx)
1377 const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1378 for (int qx = 0; qx < Q1D; ++qx)
1380 aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
1384 for (int qy = 0; qy < Q1D; ++qy)
1386 const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
1387 for (int qx = 0; qx < Q1D; ++qx)
1389 const double wx = aX[qx];
1390 aXY[qy][qx] += wx * wy;
1395 for (int qz = 0; qz < Q1D; ++qz)
1397 const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
1398 for (int qy = 0; qy < Q1D; ++qy)
1400 for (int qx = 0; qx < Q1D; ++qx)
1402 div[qz][qy][qx] += aXY[qy][qx] * wz;
1408 osc += D1Dx * D1Dy * D1Dz;
1409 } // loop (c) over components
1411 // Apply D operator.
1412 for (int qz = 0; qz < Q1D; ++qz)
1414 for (int qy = 0; qy < Q1D; ++qy)
1416 for (int qx = 0; qx < Q1D; ++qx)
1418 div[qz][qy][qx] *= op(qx,qy,qz,e);
1423 for (int qz = 0; qz < Q1D; ++qz)
1425 double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
1429 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1431 const int D1Dz = (c == 2) ? D1D : D1D - 1;
1432 const int D1Dy = (c == 1) ? D1D : D1D - 1;
1433 const int D1Dx = (c == 0) ? D1D : D1D - 1;
1435 for (int dy = 0; dy < D1Dy; ++dy)
1437 for (int dx = 0; dx < D1Dx; ++dx)
1442 for (int qy = 0; qy < Q1D; ++qy)
1444 double aX[HDIV_MAX_D1D];
1445 for (int dx = 0; dx < D1Dx; ++dx)
1449 for (int qx = 0; qx < Q1D; ++qx)
1451 for (int dx = 0; dx < D1Dx; ++dx)
1453 aX[dx] += div[qz][qy][qx] *
1454 (c == 0 ? Gct(dx,qx) : Bot(dx,qx));
1457 for (int dy = 0; dy < D1Dy; ++dy)
1459 const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
1460 for (int dx = 0; dx < D1Dx; ++dx)
1462 aXY[dy][dx] += aX[dx] * wy;
1467 for (int dz = 0; dz < D1Dz; ++dz)
1469 const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
1470 for (int dy = 0; dy < D1Dy; ++dy)
1472 for (int dx = 0; dx < D1Dx; ++dx)
1474 y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
1480 osc += D1Dx * D1Dy * D1Dz;
1483 }); // end of element loop
1486 void DivDivIntegrator::AssemblePA(const FiniteElementSpace &fes)
1488 // Assumes tensor-product elements
1489 Mesh *mesh = fes.GetMesh();
1490 const FiniteElement *fel = fes.GetFE(0);
1492 const VectorTensorFiniteElement *el =
1493 dynamic_cast<const VectorTensorFiniteElement*>(fel);
1496 const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule
1497 (*el, *el, *mesh->GetElementTransformation(0));
1499 const int dims = el->GetDim();
1500 MFEM_VERIFY(dims == 2 || dims == 3, "");
1502 const int nq = ir->GetNPoints();
1503 dim = mesh->Dimension();
1504 MFEM_VERIFY(dim == 2 || dim == 3, "");
1507 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
1508 mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
1509 mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
1510 dofs1D = mapsC->ndof;
1511 quad1D = mapsC->nqpt;
1513 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
1515 pa_data.SetSize(nq * ne, Device::GetMemoryType());
1517 QuadratureSpace qs(*mesh, *ir);
1518 CoefficientVector coeff(Q, qs, CoefficientStorage::FULL);
1520 if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3)
1522 PADivDivSetup3D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1524 else if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2)
1526 PADivDivSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1530 MFEM_ABORT("Unknown kernel.
");
1534 void DivDivIntegrator::AddMultPA(const Vector &x, Vector &y) const
1537 PADivDivApply3D(dofs1D, quad1D, ne, mapsO->B, mapsC->G,
1538 mapsO->Bt, mapsC->Gt, pa_data, x, y);
1540 PADivDivApply2D(dofs1D, quad1D, ne, mapsO->B, mapsC->G,
1541 mapsO->Bt, mapsC->Gt, pa_data, x, y);
1548 static void PADivDivAssembleDiagonal2D(const int D1D,
1551 const Array<double> &Bo_,
1552 const Array<double> &Gc_,
1556 constexpr static int VDIM = 2;
1557 constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
1559 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1560 auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1561 auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
1562 auto diag = Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE);
1568 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1570 const int D1Dx = (c == 1) ? D1D - 1 : D1D;
1571 const int D1Dy = (c == 0) ? D1D - 1 : D1D;
1573 double div[MAX_Q1D];
1575 for (int dy = 0; dy < D1Dy; ++dy)
1577 for (int qx = 0; qx < Q1D; ++qx)
1580 for (int qy = 0; qy < Q1D; ++qy)
1582 const double wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy);
1583 div[qx] += wy * wy * op(qx,qy,e);
1587 for (int dx = 0; dx < D1Dx; ++dx)
1590 for (int qx = 0; qx < Q1D; ++qx)
1592 const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx);
1593 val += div[qx] * wx * wx;
1595 diag(dx + (dy * D1Dx) + osc, e) += val;
1604 static void PADivDivAssembleDiagonal3D(const int D1D,
1607 const Array<double> &Bo_,
1608 const Array<double> &Gc_,
1612 MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D >
HDIV_MAX_D1D");
1613 MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D >
HDIV_MAX_Q1D");
1614 constexpr static int VDIM = 3;
1616 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1617 auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1618 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
1619 auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1625 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1627 const int D1Dz = (c == 2) ? D1D : D1D - 1;
1628 const int D1Dy = (c == 1) ? D1D : D1D - 1;
1629 const int D1Dx = (c == 0) ? D1D : D1D - 1;
1631 for (int dz = 0; dz < D1Dz; ++dz)
1633 for (int dy = 0; dy < D1Dy; ++dy)
1635 double a[HDIV_MAX_Q1D];
1637 for (int qx = 0; qx < Q1D; ++qx)
1640 for (int qy = 0; qy < Q1D; ++qy)
1642 const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
1644 for (int qz = 0; qz < Q1D; ++qz)
1646 const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
1647 a[qx] += wy * wy * wz * wz * op(qx,qy,qz,e);
1652 for (int dx = 0; dx < D1Dx; ++dx)
1655 for (int qx = 0; qx < Q1D; ++qx)
1657 const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx);
1658 val += a[qx] * wx * wx;
1660 diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val;
1665 osc += D1Dx * D1Dy * D1Dz;
1667 }); // end of element loop
1670 void DivDivIntegrator::AssembleDiagonalPA(Vector& diag)
1674 PADivDivAssembleDiagonal3D(dofs1D, quad1D, ne,
1675 mapsO->B, mapsC->G, pa_data, diag);
1679 PADivDivAssembleDiagonal2D(dofs1D, quad1D, ne,
1680 mapsO->B, mapsC->G, pa_data, diag);
1684 // PA H(div)-L2 (div u, p) assemble 2D kernel
1685 static void PADivL2Setup2D(const int Q1D,
1687 const Array<double> &w,
1691 const int NQ = Q1D*Q1D;
1693 auto coeff = Reshape(coeff_.Read(), NQ, NE);
1694 auto y = Reshape(op.Write(), NQ, NE);
1697 for (int q = 0; q < NQ; ++q)
1699 y(q,e) = W[q] * coeff(q,e);
1704 static void PADivL2Setup3D(const int Q1D,
1706 const Array<double> &w,
1710 const int NQ = Q1D*Q1D*Q1D;
1712 auto coeff = Reshape(coeff_.Read(), NQ, NE);
1713 auto y = Reshape(op.Write(), NQ, NE);
1717 for (int q = 0; q < NQ; ++q)
1719 y(q,e) = W[q] * coeff(q, e);
1725 VectorFEDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
1726 const FiniteElementSpace &test_fes)
1728 // Assumes tensor-product elements, with a vector test space and
1729 // scalar trial space.
1730 Mesh *mesh = trial_fes.GetMesh();
1731 const FiniteElement *trial_fel = trial_fes.GetFE(0);
1732 const FiniteElement *test_fel = test_fes.GetFE(0);
1734 const VectorTensorFiniteElement *trial_el =
1735 dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
1738 const NodalTensorFiniteElement *test_el =
1739 dynamic_cast<const NodalTensorFiniteElement*>(test_fel);
1742 const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule(
1743 *trial_el, *trial_el,
1744 *mesh->GetElementTransformation(0));
1746 const int dims = trial_el->GetDim();
1747 MFEM_VERIFY(dims == 2 || dims == 3, "");
1749 const int nq = ir->GetNPoints();
1750 dim = mesh->Dimension();
1751 MFEM_VERIFY(dim == 2 || dim == 3, "");
1753 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder() + 1, "");
1755 ne = trial_fes.GetNE();
1756 mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
1757 mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
1758 dofs1D = mapsC->ndof;
1759 quad1D = mapsC->nqpt;
1761 L2mapsO = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
1762 L2dofs1D = L2mapsO->ndof;
1764 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
1767 MFEM_VERIFY(nq == quad1D * quad1D, "");
1771 MFEM_VERIFY(nq == quad1D * quad1D * quad1D, "");
1774 pa_data.SetSize(nq * ne, Device::GetMemoryType());
1776 QuadratureSpace qs(*mesh, *ir);
1777 CoefficientVector coeff(Q, qs, CoefficientStorage::FULL);
1779 if (test_el->GetMapType() == FiniteElement::INTEGRAL)
1781 const GeometricFactors *geom =
1782 mesh->GetGeometricFactors(*ir, GeometricFactors::DETERMINANTS);
1783 coeff /= geom->detJ;
1786 if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3)
1788 PADivL2Setup3D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
1790 else if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2)
1792 PADivL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
1796 MFEM_ABORT("Unknown kernel.
");
1800 // Apply to x corresponding to DOFs in H(div) (trial), whose divergence is
1801 // integrated against L_2 test functions corresponding to y.
1802 static void PAHdivL2Apply3D(const int D1D,
1806 const Array<double> &Bo_,
1807 const Array<double> &Gc_,
1808 const Array<double> &L2Bot_,
1813 MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D >
HDIV_MAX_D1D");
1814 MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D >
HDIV_MAX_Q1D");
1815 constexpr static int VDIM = 3;
1817 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1818 auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1819 auto L2Bot = Reshape(L2Bot_.Read(), L2D1D, Q1D);
1820 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
1821 auto x = Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
1822 auto y = Reshape(y_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE);
1826 double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1828 for (int qz = 0; qz < Q1D; ++qz)
1830 for (int qy = 0; qy < Q1D; ++qy)
1832 for (int qx = 0; qx < Q1D; ++qx)
1834 div[qz][qy][qx] = 0.0;
1841 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1843 const int D1Dz = (c == 2) ? D1D : D1D - 1;
1844 const int D1Dy = (c == 1) ? D1D : D1D - 1;
1845 const int D1Dx = (c == 0) ? D1D : D1D - 1;
1847 for (int dz = 0; dz < D1Dz; ++dz)
1849 double aXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
1850 for (int qy = 0; qy < Q1D; ++qy)
1852 for (int qx = 0; qx < Q1D; ++qx)
1858 for (int dy = 0; dy < D1Dy; ++dy)
1860 double aX[HDIV_MAX_Q1D];
1861 for (int qx = 0; qx < Q1D; ++qx)
1866 for (int dx = 0; dx < D1Dx; ++dx)
1868 const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1869 for (int qx = 0; qx < Q1D; ++qx)
1871 aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
1875 for (int qy = 0; qy < Q1D; ++qy)
1877 const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
1878 for (int qx = 0; qx < Q1D; ++qx)
1880 aXY[qy][qx] += aX[qx] * wy;
1885 for (int qz = 0; qz < Q1D; ++qz)
1887 const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
1888 for (int qy = 0; qy < Q1D; ++qy)
1890 for (int qx = 0; qx < Q1D; ++qx)
1892 div[qz][qy][qx] += aXY[qy][qx] * wz;
1898 osc += D1Dx * D1Dy * D1Dz;
1899 } // loop (c) over components
1901 // Apply D operator.
1902 for (int qz = 0; qz < Q1D; ++qz)
1904 for (int qy = 0; qy < Q1D; ++qy)
1906 for (int qx = 0; qx < Q1D; ++qx)
1908 div[qz][qy][qx] *= op(qx,qy,qz,e);
1913 for (int qz = 0; qz < Q1D; ++qz)
1915 double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
1917 for (int dy = 0; dy < L2D1D; ++dy)
1919 for (int dx = 0; dx < L2D1D; ++dx)
1924 for (int qy = 0; qy < Q1D; ++qy)
1926 double aX[HDIV_MAX_D1D];
1927 for (int dx = 0; dx < L2D1D; ++dx)
1931 for (int qx = 0; qx < Q1D; ++qx)
1933 for (int dx = 0; dx < L2D1D; ++dx)
1935 aX[dx] += div[qz][qy][qx] * L2Bot(dx,qx);
1938 for (int dy = 0; dy < L2D1D; ++dy)
1940 const double wy = L2Bot(dy,qy);
1941 for (int dx = 0; dx < L2D1D; ++dx)
1943 aXY[dy][dx] += aX[dx] * wy;
1948 for (int dz = 0; dz < L2D1D; ++dz)
1950 const double wz = L2Bot(dz,qz);
1951 for (int dy = 0; dy < L2D1D; ++dy)
1953 for (int dx = 0; dx < L2D1D; ++dx)
1955 y(dx,dy,dz,e) += aXY[dy][dx] * wz;
1960 }); // end of element loop
1963 // Apply to x corresponding to DOFs in H(div) (trial), whose divergence is
1964 // integrated against L_2 test functions corresponding to y.
1965 static void PAHdivL2Apply2D(const int D1D,
1969 const Array<double> &Bo_,
1970 const Array<double> &Gc_,
1971 const Array<double> &L2Bot_,
1976 constexpr static int VDIM = 2;
1977 constexpr static int MAX_D1D = HDIV_MAX_D1D;
1978 constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
1980 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
1981 auto Gc = Reshape(Gc_.Read(), Q1D, D1D);
1982 auto L2Bot = Reshape(L2Bot_.Read(), L2D1D, Q1D);
1983 auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
1984 auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
1985 auto y = Reshape(y_.ReadWrite(), L2D1D, L2D1D, NE);
1989 double div[MAX_Q1D][MAX_Q1D];
1991 for (int qy = 0; qy < Q1D; ++qy)
1993 for (int qx = 0; qx < Q1D; ++qx)
2001 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2003 const int D1Dy = (c == 1) ? D1D : D1D - 1;
2004 const int D1Dx = (c == 0) ? D1D : D1D - 1;
2006 for (int dy = 0; dy < D1Dy; ++dy)
2009 for (int qx = 0; qx < Q1D; ++qx)
2014 for (int dx = 0; dx < D1Dx; ++dx)
2016 const double t = x(dx + (dy * D1Dx) + osc, e);
2017 for (int qx = 0; qx < Q1D; ++qx)
2019 aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
2023 for (int qy = 0; qy < Q1D; ++qy)
2025 const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
2026 for (int qx = 0; qx < Q1D; ++qx)
2028 div[qy][qx] += aX[qx] * wy;
2034 } // loop (c) over components
2036 // Apply D operator.
2037 for (int qy = 0; qy < Q1D; ++qy)
2039 for (int qx = 0; qx < Q1D; ++qx)
2041 div[qy][qx] *= op(qx,qy,e);
2045 for (int qy = 0; qy < Q1D; ++qy)
2048 for (int dx = 0; dx < L2D1D; ++dx)
2052 for (int qx = 0; qx < Q1D; ++qx)
2054 for (int dx = 0; dx < L2D1D; ++dx)
2056 aX[dx] += div[qy][qx] * L2Bot(dx,qx);
2059 for (int dy = 0; dy < L2D1D; ++dy)
2061 const double wy = L2Bot(dy,qy);
2062 for (int dx = 0; dx < L2D1D; ++dx)
2064 y(dx,dy,e) += aX[dx] * wy;
2068 }); // end of element loop
2071 static void PAHdivL2ApplyTranspose3D(const int D1D,
2075 const Array<double> &L2Bo_,
2076 const Array<double> &Gct_,
2077 const Array<double> &Bot_,
2082 MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D >
HDIV_MAX_D1D");
2083 MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D >
HDIV_MAX_Q1D");
2084 constexpr static int VDIM = 3;
2086 auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2087 auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2088 auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2089 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
2090 auto x = Reshape(x_.Read(), L2D1D, L2D1D, L2D1D, NE);
2091 auto y = Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
2095 double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2097 for (int qz = 0; qz < Q1D; ++qz)
2099 for (int qy = 0; qy < Q1D; ++qy)
2101 for (int qx = 0; qx < Q1D; ++qx)
2103 div[qz][qy][qx] = 0.0;
2108 for (int dz = 0; dz < L2D1D; ++dz)
2110 double aXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2111 for (int qy = 0; qy < Q1D; ++qy)
2113 for (int qx = 0; qx < Q1D; ++qx)
2119 for (int dy = 0; dy < L2D1D; ++dy)
2121 double aX[HDIV_MAX_Q1D];
2122 for (int qx = 0; qx < Q1D; ++qx)
2127 for (int dx = 0; dx < L2D1D; ++dx)
2129 const double t = x(dx,dy,dz,e);
2130 for (int qx = 0; qx < Q1D; ++qx)
2132 aX[qx] += t * L2Bo(qx,dx);
2136 for (int qy = 0; qy < Q1D; ++qy)
2138 const double wy = L2Bo(qy,dy);
2139 for (int qx = 0; qx < Q1D; ++qx)
2141 aXY[qy][qx] += aX[qx] * wy;
2146 for (int qz = 0; qz < Q1D; ++qz)
2148 const double wz = L2Bo(qz,dz);
2149 for (int qy = 0; qy < Q1D; ++qy)
2151 for (int qx = 0; qx < Q1D; ++qx)
2153 div[qz][qy][qx] += aXY[qy][qx] * wz;
2159 // Apply D operator.
2160 for (int qz = 0; qz < Q1D; ++qz)
2162 for (int qy = 0; qy < Q1D; ++qy)
2164 for (int qx = 0; qx < Q1D; ++qx)
2166 div[qz][qy][qx] *= op(qx,qy,qz,e);
2171 for (int qz = 0; qz < Q1D; ++qz)
2173 double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
2176 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2178 const int D1Dz = (c == 2) ? D1D : D1D - 1;
2179 const int D1Dy = (c == 1) ? D1D : D1D - 1;
2180 const int D1Dx = (c == 0) ? D1D : D1D - 1;
2182 for (int dy = 0; dy < D1Dy; ++dy)
2184 for (int dx = 0; dx < D1Dx; ++dx)
2189 for (int qy = 0; qy < Q1D; ++qy)
2191 double aX[HDIV_MAX_D1D];
2192 for (int dx = 0; dx < D1Dx; ++dx)
2196 for (int qx = 0; qx < Q1D; ++qx)
2198 for (int dx = 0; dx < D1Dx; ++dx)
2200 aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx) :
2204 for (int dy = 0; dy < D1Dy; ++dy)
2206 const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
2207 for (int dx = 0; dx < D1Dx; ++dx)
2209 aXY[dy][dx] += aX[dx] * wy;
2214 for (int dz = 0; dz < D1Dz; ++dz)
2216 const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
2217 for (int dy = 0; dy < D1Dy; ++dy)
2219 for (int dx = 0; dx < D1Dx; ++dx)
2221 y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
2227 osc += D1Dx * D1Dy * D1Dz;
2230 }); // end of element loop
2233 static void PAHdivL2ApplyTranspose2D(const int D1D,
2237 const Array<double> &L2Bo_,
2238 const Array<double> &Gct_,
2239 const Array<double> &Bot_,
2244 constexpr static int VDIM = 2;
2245 constexpr static int MAX_D1D = HDIV_MAX_D1D;
2246 constexpr static int MAX_Q1D = HDIV_MAX_Q1D;
2248 auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2249 auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2250 auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2251 auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
2252 auto x = Reshape(x_.Read(), L2D1D, L2D1D, NE);
2253 auto y = Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
2257 double div[MAX_Q1D][MAX_Q1D];
2259 for (int qy = 0; qy < Q1D; ++qy)
2261 for (int qx = 0; qx < Q1D; ++qx)
2267 for (int dy = 0; dy < L2D1D; ++dy)
2270 for (int qx = 0; qx < Q1D; ++qx)
2275 for (int dx = 0; dx < L2D1D; ++dx)
2277 const double t = x(dx,dy,e);
2278 for (int qx = 0; qx < Q1D; ++qx)
2280 aX[qx] += t * L2Bo(qx,dx);
2284 for (int qy = 0; qy < Q1D; ++qy)
2286 const double wy = L2Bo(qy,dy);
2287 for (int qx = 0; qx < Q1D; ++qx)
2289 div[qy][qx] += aX[qx] * wy;
2294 // Apply D operator.
2295 for (int qy = 0; qy < Q1D; ++qy)
2297 for (int qx = 0; qx < Q1D; ++qx)
2299 div[qy][qx] *= op(qx,qy,e);
2303 for (int qy = 0; qy < Q1D; ++qy)
2308 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2310 const int D1Dy = (c == 1) ? D1D : D1D - 1;
2311 const int D1Dx = (c == 0) ? D1D : D1D - 1;
2313 for (int dx = 0; dx < D1Dx; ++dx)
2317 for (int qx = 0; qx < Q1D; ++qx)
2319 for (int dx = 0; dx < D1Dx; ++dx)
2321 aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) : Bot(dx,qx));
2324 for (int dy = 0; dy < D1Dy; ++dy)
2326 const double wy = (c == 0) ? Bot(dy,qy) : Gct(dy,qy);
2327 for (int dx = 0; dx < D1Dx; ++dx)
2329 y(dx + (dy * D1Dx) + osc, e) += aX[dx] * wy;
2336 }); // end of element loop
2339 void VectorFEDivergenceIntegrator::AddMultPA(const Vector &x, Vector &y) const
2342 PAHdivL2Apply3D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G,
2343 L2mapsO->Bt, pa_data, x, y);
2345 PAHdivL2Apply2D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G,
2346 L2mapsO->Bt, pa_data, x, y);
2353 void VectorFEDivergenceIntegrator::AddMultTransposePA(const Vector &x,
2357 PAHdivL2ApplyTranspose3D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2358 mapsC->Gt, mapsO->Bt, pa_data, x, y);
2360 PAHdivL2ApplyTranspose2D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2361 mapsC->Gt, mapsO->Bt, pa_data, x, y);
2368 static void PAHdivL2AssembleDiagonal_ADAt_3D(const int D1D,
2372 const Array<double> &L2Bo_,
2373 const Array<double> &Gct_,
2374 const Array<double> &Bot_,
2379 MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D >
HDIV_MAX_D1D");
2380 MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D >
HDIV_MAX_Q1D");
2381 constexpr static int VDIM = 3;
2383 auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2384 auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2385 auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2386 auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
2387 auto D = Reshape(D_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
2388 auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE);
2392 for (int rz = 0; rz < L2D1D; ++rz)
2394 for (int ry = 0; ry < L2D1D; ++ry)
2396 for (int rx = 0; rx < L2D1D; ++rx)
2398 // Compute row (rx,ry,rz), assuming all contributions are from
2399 // a single element.
2401 double row[3*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)*(HDIV_MAX_D1D-1)];
2402 double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2404 for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i)
2409 for (int qz = 0; qz < Q1D; ++qz)
2411 for (int qy = 0; qy < Q1D; ++qy)
2413 for (int qx = 0; qx < Q1D; ++qx)
2415 div[qz][qy][qx] = op(qx,qy,qz,e) * L2Bo(qx,rx) *
2416 L2Bo(qy,ry) * L2Bo(qz,rz);
2421 for (int qz = 0; qz < Q1D; ++qz)
2423 double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D];
2426 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2428 const int D1Dz = (c == 2) ? D1D : D1D - 1;
2429 const int D1Dy = (c == 1) ? D1D : D1D - 1;
2430 const int D1Dx = (c == 0) ? D1D : D1D - 1;
2432 for (int dy = 0; dy < D1Dy; ++dy)
2434 for (int dx = 0; dx < D1Dx; ++dx)
2439 for (int qy = 0; qy < Q1D; ++qy)
2441 double aX[HDIV_MAX_D1D];
2442 for (int dx = 0; dx < D1Dx; ++dx)
2446 for (int qx = 0; qx < Q1D; ++qx)
2448 for (int dx = 0; dx < D1Dx; ++dx)
2450 aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx)
2454 for (int dy = 0; dy < D1Dy; ++dy)
2456 const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
2457 for (int dx = 0; dx < D1Dx; ++dx)
2459 aXY[dy][dx] += aX[dx] * wy;
2464 for (int dz = 0; dz < D1Dz; ++dz)
2466 const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
2467 for (int dy = 0; dy < D1Dy; ++dy)
2469 for (int dx = 0; dx < D1Dx; ++dx)
2471 row[dx + ((dy + (dz * D1Dy)) * D1Dx) + osc] +=
2477 osc += D1Dx * D1Dy * D1Dz;
2482 for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i)
2484 val += row[i] * row[i] * D(i,e);
2486 diag(rx,ry,rz,e) += val;
2490 }); // end of element loop
2493 static void PAHdivL2AssembleDiagonal_ADAt_2D(const int D1D,
2497 const Array<double> &L2Bo_,
2498 const Array<double> &Gct_,
2499 const Array<double> &Bot_,
2504 constexpr static int VDIM = 2;
2506 auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D);
2507 auto Gct = Reshape(Gct_.Read(), D1D, Q1D);
2508 auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D);
2509 auto op = Reshape(op_.Read(), Q1D, Q1D, NE);
2510 auto D = Reshape(D_.Read(), 2*(D1D-1)*D1D, NE);
2511 auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, NE);
2515 for (int ry = 0; ry < L2D1D; ++ry)
2517 for (int rx = 0; rx < L2D1D; ++rx)
2519 // Compute row (rx,ry), assuming all contributions are from
2520 // a single element.
2522 double row[2*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)];
2523 double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D];
2525 for (int i=0; i<2*D1D*(D1D - 1); ++i)
2530 for (int qy = 0; qy < Q1D; ++qy)
2532 for (int qx = 0; qx < Q1D; ++qx)
2534 div[qy][qx] = op(qx,qy,e) * L2Bo(qx,rx) * L2Bo(qy,ry);
2538 for (int qy = 0; qy < Q1D; ++qy)
2541 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2543 const int D1Dy = (c == 1) ? D1D : D1D - 1;
2544 const int D1Dx = (c == 0) ? D1D : D1D - 1;
2546 double aX[HDIV_MAX_D1D];
2547 for (int dx = 0; dx < D1Dx; ++dx)
2551 for (int qx = 0; qx < Q1D; ++qx)
2553 for (int dx = 0; dx < D1Dx; ++dx)
2555 aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) :
2560 for (int dy = 0; dy < D1Dy; ++dy)
2562 const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
2564 for (int dx = 0; dx < D1Dx; ++dx)
2566 row[dx + (dy * D1Dx) + osc] += aX[dx] * wy;
2575 for (int i=0; i<2*D1D*(D1D - 1); ++i)
2577 val += row[i] * row[i] * D(i,e);
2579 diag(rx,ry,e) += val;
2582 }); // end of element loop
2585 void VectorFEDivergenceIntegrator::AssembleDiagonalPA_ADAt(const Vector &D,
2589 PAHdivL2AssembleDiagonal_ADAt_3D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2590 mapsC->Gt, mapsO->Bt, pa_data, D, diag);
2592 PAHdivL2AssembleDiagonal_ADAt_2D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B,
2593 mapsC->Gt, mapsO->Bt, pa_data, D, diag);
constexpr int HDIV_MAX_D1D
void PAHdivMassApply2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Array< double > &Bot_, const Array< double > &Bct_, const Vector &op_, const Vector &x_, Vector &y_)
void PAHdivMassAssembleDiagonal3D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Vector &op_, Vector &diag_)
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
A basic generic Tensor class, appropriate for use on the GPU.
void PAHdivSetup2D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, Vector &coeff_, Vector &op)
void PAHdivMassAssembleDiagonal2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Vector &op_, Vector &diag_)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
constexpr int dimension
This example only works in 3D. Kernels for 2D are not implemented.
constexpr int HDIV_MAX_Q1D
void SmemPAHdivMassApply2D(const int NE, const bool symmetric, const Array< double > &Bo_, const Array< double > &Bc_, const Array< double > &Bot_, const Array< double > &Bct_, const Vector &op_, const Vector &x_, Vector &y_, const int d1d=0, const int q1d=0)
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void PAHdivSetup3D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, Vector &coeff_, Vector &op)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.