20void PAHdivMassSetup2D(
const int Q1D,
 
   23                       const Array<real_t> &w,
 
   28   const bool symmetric = (coeffDim != 4);
 
   29   const int NQ = Q1D*Q1D;
 
   32   auto J = 
Reshape(j.Read(), NQ, 2, 2, NE);
 
   33   auto C = 
Reshape(coeff_.Read(), coeffDim, NQ, NE);
 
   34   auto y = 
Reshape(op.Write(), NQ, symmetric ? 3 : 4, NE);
 
   38      for (
int q = 0; q < NQ; ++q)
 
   40         const real_t J11 = J(q,0,0,e);
 
   41         const real_t J21 = J(q,1,0,e);
 
   42         const real_t J12 = J(q,0,1,e);
 
   43         const real_t J22 = J(q,1,1,e);
 
   44         const real_t c_detJ = W[q] / ((J11*J22)-(J21*J12));
 
   47         if (coeffDim == 3 || coeffDim == 4) 
 
   49            const real_t C11 = C(0,q,e);
 
   50            const real_t C12 = C(1,q,e);
 
   51            const real_t C21 = symmetric ? C12 : C(2,q,e);
 
   52            const real_t C22 = symmetric ? C(2,q,e) : C(3,q,e);
 
   53            const real_t R11 = C11*J11 + C12*J21;
 
   54            const real_t R21 = C21*J11 + C22*J21;
 
   55            const real_t R12 = C11*J12 + C12*J22;
 
   56            const real_t R22 = C21*J12 + C22*J22;
 
   58            y(q,0,e) = c_detJ * (J11*R11 + J21*R21); 
 
   59            y(q,1,e) = c_detJ * (J11*R12 + J21*R22); 
 
   63               y(q,2,e) = c_detJ * (J12*R12 + J22*R22); 
 
   67               y(q,2,e) = c_detJ * (J12*R11 + J22*R21); 
 
   68               y(q,3,e) = c_detJ * (J12*R12 + J22*R22); 
 
   73            const real_t C1 = C(0,q,e);
 
   74            const real_t C2 = (coeffDim == 2 ? C(1,q,e) : C1);
 
   75            y(q,0,e) = c_detJ * (J11*C1*J11 + J21*C2*J21); 
 
   76            y(q,1,e) = c_detJ * (J11*C1*J12 + J21*C2*J22); 
 
   77            y(q,2,e) = c_detJ * (J12*C1*J12 + J22*C2*J22); 
 
   83void PAHdivMassSetup3D(
const int Q1D,
 
   86                       const Array<real_t> &w,
 
   91   const bool symmetric = (coeffDim != 9);
 
   92   const int NQ = Q1D*Q1D*Q1D;
 
   94   auto J = 
Reshape(j.Read(), NQ, 3, 3, NE);
 
   95   auto C = 
Reshape(coeff_.Read(), coeffDim, NQ, NE);
 
   96   auto y = 
Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
 
  100      for (
int q = 0; q < NQ; ++q)
 
  102         const real_t J11 = J(q,0,0,e);
 
  103         const real_t J21 = J(q,1,0,e);
 
  104         const real_t J31 = J(q,2,0,e);
 
  105         const real_t J12 = J(q,0,1,e);
 
  106         const real_t J22 = J(q,1,1,e);
 
  107         const real_t J32 = J(q,2,1,e);
 
  108         const real_t J13 = J(q,0,2,e);
 
  109         const real_t J23 = J(q,1,2,e);
 
  110         const real_t J33 = J(q,2,2,e);
 
  111         const real_t detJ = J11 * (J22 * J33 - J32 * J23) -
 
  112                             J21 * (J12 * J33 - J32 * J13) +
 
  113                             J31 * (J12 * J23 - J22 * J13);
 
  114         const real_t c_detJ = W[q] / detJ;
 
  117         if (coeffDim == 6 || coeffDim == 9) 
 
  120            M[0][0] = C(0, q, e);
 
  121            M[0][1] = C(1, q, e);
 
  122            M[0][2] = C(2, q, e);
 
  123            M[1][0] = (!symmetric) ? C(3, q, e) : M[0][1];
 
  124            M[1][1] = (!symmetric) ? C(4, q, e) : C(3, q, e);
 
  125            M[1][2] = (!symmetric) ? C(5, q, e) : C(4, q, e);
 
  126            M[2][0] = (!symmetric) ? C(6, q, e) : M[0][2];
 
  127            M[2][1] = (!symmetric) ? C(7, q, e) : M[1][2];
 
  128            M[2][2] = (!symmetric) ? C(8, q, e) : C(5, q, e);
 
  131            for (
int i=0; i<3; ++i)
 
  132               for (
int j = (symmetric ? i : 0); j<3; ++j)
 
  135                  for (
int k=0; k<3; ++k)
 
  138                     for (
int l=0; l<3; ++l)
 
  140                        MJ_kj += M[k][l] * J(q,l,j,e);
 
  143                     y(q,idx,e) += J(q,k,i,e) * MJ_kj;
 
  146                  y(q,idx,e) *= c_detJ;
 
  153            for (
int i=0; i<3; ++i)
 
  154               for (
int j=i; j<3; ++j)
 
  157                  for (
int k=0; k<3; ++k)
 
  159                     y(q,idx,e) += J(q,k,i,e) * C(coeffDim == 3 ? k : 0, q, e) * J(q,k,j,e);
 
  162                  y(q,idx,e) *= c_detJ;
 
  170void PAHdivMassAssembleDiagonal2D(
const int D1D,
 
  173                                  const bool symmetric,
 
  174                                  const Array<real_t> &Bo_,
 
  175                                  const Array<real_t> &Bc_,
 
  179   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  180   auto Bc = 
Reshape(Bc_.Read(), Q1D, D1D);
 
  181   auto op = 
Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
 
  182   auto diag = 
Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE);
 
  186      constexpr static int VDIM = 2;
 
  187      constexpr static int MAX_Q1D = DofQuadLimits::HDIV_MAX_Q1D;
 
  191      for (
int c = 0; c < VDIM; ++c)  
 
  193         const int D1Dx = (c == 1) ? D1D - 1 : D1D;
 
  194         const int D1Dy = (c == 0) ? D1D - 1 : D1D;
 
  196         for (
int dy = 0; dy < D1Dy; ++dy)
 
  199            for (
int qx = 0; qx < Q1D; ++qx)
 
  202               for (
int qy = 0; qy < Q1D; ++qy)
 
  204                  const real_t wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
 
  205                  mass[qx] += wy*wy*((c == 0) ? op(qx,qy,0,e) : op(qx,qy,symmetric ? 2 : 3,e));
 
  209            for (
int dx = 0; dx < D1Dx; ++dx)
 
  212               for (
int qx = 0; qx < Q1D; ++qx)
 
  214                  const real_t wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx);
 
  215                  val += mass[qx] * wx * wx;
 
  217               diag(dx + (dy * D1Dx) + osc, e) += val;
 
  226void PAHdivMassAssembleDiagonal3D(
const int D1D,
 
  229                                  const bool symmetric,
 
  230                                  const Array<real_t> &Bo_,
 
  231                                  const Array<real_t> &Bc_,
 
  236               "Error: D1D > HDIV_MAX_D1D");
 
  238               "Error: Q1D > HDIV_MAX_Q1D");
 
  239   constexpr static int VDIM = 3;
 
  241   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  242   auto Bc = 
Reshape(Bc_.Read(), Q1D, D1D);
 
  243   auto op = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
 
  244   auto diag = 
Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
  250      for (
int c = 0; c < VDIM; ++c)  
 
  252         const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
  253         const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
  254         const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
  256         const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
 
  257                                         (symmetric ? 5 : 8));
 
  259         real_t mass[DofQuadLimits::HDIV_MAX_Q1D];
 
  261         for (
int dz = 0; dz < D1Dz; ++dz)
 
  263            for (
int dy = 0; dy < D1Dy; ++dy)
 
  265               for (
int qx = 0; qx < Q1D; ++qx)
 
  268                  for (
int qy = 0; qy < Q1D; ++qy)
 
  270                     const real_t wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
 
  271                     for (
int qz = 0; qz < Q1D; ++qz)
 
  273                        const real_t wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
 
  274                        mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
 
  279               for (
int dx = 0; dx < D1Dx; ++dx)
 
  282                  for (
int qx = 0; qx < Q1D; ++qx)
 
  284                     const real_t wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx);
 
  285                     val += mass[qx] * wx * wx;
 
  287                  diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val;
 
  292         osc += D1Dx * D1Dy * D1Dz;
 
  297void PAHdivMassApply(
const int dim,
 
  301                     const bool symmetric,
 
  302                     const Array<real_t> &Bo,
 
  303                     const Array<real_t> &Bc,
 
  304                     const Array<real_t> &Bot,
 
  305                     const Array<real_t> &Bct,
 
  310   const int id = (D1D << 4) | Q1D;
 
  316         case 0x22: 
return SmemPAHdivMassApply2D<2,2>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  317         case 0x33: 
return SmemPAHdivMassApply2D<3,3>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  318         case 0x44: 
return SmemPAHdivMassApply2D<4,4>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  319         case 0x55: 
return SmemPAHdivMassApply2D<5,5>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  321            return PAHdivMassApply2D(D1D,Q1D,NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  328         case 0x23: 
return SmemPAHdivMassApply3D<2,3>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  329         case 0x34: 
return SmemPAHdivMassApply3D<3,4>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  330         case 0x45: 
return SmemPAHdivMassApply3D<4,5>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  331         case 0x56: 
return SmemPAHdivMassApply3D<5,6>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  332         case 0x67: 
return SmemPAHdivMassApply3D<6,7>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  333         case 0x78: 
return SmemPAHdivMassApply3D<7,8>(NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  335            return PAHdivMassApply3D(D1D,Q1D,NE,symmetric,Bo,Bc,Bot,Bct,op,x,y);
 
  340void PAHdivMassApply2D(
const int D1D,
 
  343                       const bool symmetric,
 
  344                       const Array<real_t> &Bo_,
 
  345                       const Array<real_t> &Bc_,
 
  346                       const Array<real_t> &Bot_,
 
  347                       const Array<real_t> &Bct_,
 
  352   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  353   auto Bc = 
Reshape(Bc_.Read(), Q1D, D1D);
 
  354   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
  355   auto Bct = 
Reshape(Bct_.Read(), D1D, Q1D);
 
  356   auto op = 
Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
 
  357   auto x = 
Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
 
  358   auto y = 
Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
 
  362      constexpr static int VDIM = 2;
 
  363      constexpr static int MAX_D1D = DofQuadLimits::HDIV_MAX_D1D;
 
  364      constexpr static int MAX_Q1D = DofQuadLimits::HDIV_MAX_Q1D;
 
  366      real_t mass[MAX_Q1D][MAX_Q1D][VDIM];
 
  368      for (
int qy = 0; qy < Q1D; ++qy)
 
  370         for (
int qx = 0; qx < Q1D; ++qx)
 
  372            for (
int c = 0; c < VDIM; ++c)
 
  374               mass[qy][qx][c] = 0.0;
 
  381      for (
int c = 0; c < VDIM; ++c)  
 
  383         const int D1Dx = (c == 1) ? D1D - 1 : D1D;
 
  384         const int D1Dy = (c == 0) ? D1D - 1 : D1D;
 
  386         for (
int dy = 0; dy < D1Dy; ++dy)
 
  389            for (
int qx = 0; qx < Q1D; ++qx)
 
  394            for (
int dx = 0; dx < D1Dx; ++dx)
 
  396               const real_t t = x(dx + (dy * D1Dx) + osc, e);
 
  397               for (
int qx = 0; qx < Q1D; ++qx)
 
  399                  massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
 
  403            for (
int qy = 0; qy < Q1D; ++qy)
 
  405               const real_t wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
 
  406               for (
int qx = 0; qx < Q1D; ++qx)
 
  408                  mass[qy][qx][c] += massX[qx] * wy;
 
  417      for (
int qy = 0; qy < Q1D; ++qy)
 
  419         for (
int qx = 0; qx < Q1D; ++qx)
 
  421            const real_t O11 = op(qx,qy,0,e);
 
  422            const real_t O12 = op(qx,qy,1,e);
 
  423            const real_t O21 = symmetric ? O12 : op(qx,qy,2,e);
 
  424            const real_t O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
 
  425            const real_t massX = mass[qy][qx][0];
 
  426            const real_t massY = mass[qy][qx][1];
 
  427            mass[qy][qx][0] = (O11*massX)+(O12*massY);
 
  428            mass[qy][qx][1] = (O21*massX)+(O22*massY);
 
  432      for (
int qy = 0; qy < Q1D; ++qy)
 
  436         for (
int c = 0; c < VDIM; ++c)  
 
  438            const int D1Dx = (c == 1) ? D1D - 1 : D1D;
 
  439            const int D1Dy = (c == 0) ? D1D - 1 : D1D;
 
  442            for (
int dx = 0; dx < D1Dx; ++dx)
 
  446            for (
int qx = 0; qx < Q1D; ++qx)
 
  448               for (
int dx = 0; dx < D1Dx; ++dx)
 
  450                  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bct(dx,qx) :
 
  455            for (
int dy = 0; dy < D1Dy; ++dy)
 
  457               const real_t wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
 
  459               for (
int dx = 0; dx < D1Dx; ++dx)
 
  461                  y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
 
  471void PAHdivMassApply3D(
const int D1D,
 
  474                       const bool symmetric,
 
  475                       const Array<real_t> &Bo_,
 
  476                       const Array<real_t> &Bc_,
 
  477                       const Array<real_t> &Bot_,
 
  478                       const Array<real_t> &Bct_,
 
  484               "Error: D1D > HDIV_MAX_D1D");
 
  486               "Error: Q1D > HDIV_MAX_Q1D");
 
  487   constexpr static int VDIM = 3;
 
  489   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  490   auto Bc = 
Reshape(Bc_.Read(), Q1D, D1D);
 
  491   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
  492   auto Bct = 
Reshape(Bct_.Read(), D1D, Q1D);
 
  493   auto op = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
 
  494   auto x = 
Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
  495   auto y = 
Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
  499      real_t mass[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D][VDIM];
 
  501      for (
int qz = 0; qz < Q1D; ++qz)
 
  503         for (
int qy = 0; qy < Q1D; ++qy)
 
  505            for (
int qx = 0; qx < Q1D; ++qx)
 
  507               for (
int c = 0; c < VDIM; ++c)
 
  509                  mass[qz][qy][qx][c] = 0.0;
 
  517      for (
int c = 0; c < VDIM; ++c)  
 
  519         const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
  520         const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
  521         const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
  523         for (
int dz = 0; dz < D1Dz; ++dz)
 
  525            real_t massXY[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
  526            for (
int qy = 0; qy < Q1D; ++qy)
 
  528               for (
int qx = 0; qx < Q1D; ++qx)
 
  530                  massXY[qy][qx] = 0.0;
 
  534            for (
int dy = 0; dy < D1Dy; ++dy)
 
  536               real_t massX[DofQuadLimits::HDIV_MAX_Q1D];
 
  537               for (
int qx = 0; qx < Q1D; ++qx)
 
  542               for (
int dx = 0; dx < D1Dx; ++dx)
 
  544                  const real_t t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
 
  545                  for (
int qx = 0; qx < Q1D; ++qx)
 
  547                     massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
 
  551               for (
int qy = 0; qy < Q1D; ++qy)
 
  553                  const real_t wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
 
  554                  for (
int qx = 0; qx < Q1D; ++qx)
 
  556                     const real_t wx = massX[qx];
 
  557                     massXY[qy][qx] += wx * wy;
 
  562            for (
int qz = 0; qz < Q1D; ++qz)
 
  564               const real_t wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
 
  565               for (
int qy = 0; qy < Q1D; ++qy)
 
  567                  for (
int qx = 0; qx < Q1D; ++qx)
 
  569                     mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
 
  575         osc += D1Dx * D1Dy * D1Dz;
 
  579      for (
int qz = 0; qz < Q1D; ++qz)
 
  581         for (
int qy = 0; qy < Q1D; ++qy)
 
  583            for (
int qx = 0; qx < Q1D; ++qx)
 
  585               const real_t O11 = op(qx,qy,qz,0,e);
 
  586               const real_t O12 = op(qx,qy,qz,1,e);
 
  587               const real_t O13 = op(qx,qy,qz,2,e);
 
  588               const real_t O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
 
  589               const real_t O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
 
  590               const real_t O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
 
  591               const real_t O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
 
  592               const real_t O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
 
  593               const real_t O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
 
  595               const real_t massX = mass[qz][qy][qx][0];
 
  596               const real_t massY = mass[qz][qy][qx][1];
 
  597               const real_t massZ = mass[qz][qy][qx][2];
 
  598               mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
 
  599               mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
 
  600               mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
 
  605      for (
int qz = 0; qz < Q1D; ++qz)
 
  607         real_t massXY[DofQuadLimits::HDIV_MAX_D1D][DofQuadLimits::HDIV_MAX_D1D];
 
  611         for (
int c = 0; c < VDIM; ++c)  
 
  613            const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
  614            const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
  615            const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
  617            for (
int dy = 0; dy < D1Dy; ++dy)
 
  619               for (
int dx = 0; dx < D1Dx; ++dx)
 
  624            for (
int qy = 0; qy < Q1D; ++qy)
 
  626               real_t massX[DofQuadLimits::HDIV_MAX_D1D];
 
  627               for (
int dx = 0; dx < D1Dx; ++dx)
 
  631               for (
int qx = 0; qx < Q1D; ++qx)
 
  633                  for (
int dx = 0; dx < D1Dx; ++dx)
 
  635                     massX[dx] += mass[qz][qy][qx][c] *
 
  636                                  ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
 
  639               for (
int dy = 0; dy < D1Dy; ++dy)
 
  641                  const real_t wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
 
  642                  for (
int dx = 0; dx < D1Dx; ++dx)
 
  644                     massXY[dy][dx] += massX[dx] * wy;
 
  649            for (
int dz = 0; dz < D1Dz; ++dz)
 
  651               const real_t wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
 
  652               for (
int dy = 0; dy < D1Dy; ++dy)
 
  654                  for (
int dx = 0; dx < D1Dx; ++dx)
 
  656                     y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
 
  662            osc += D1Dx * D1Dy * D1Dz;
 
  669void PADivDivSetup2D(
const int Q1D,
 
  671                     const Array<real_t> &w,
 
  676   const int NQ = Q1D*Q1D;
 
  678   auto J = 
Reshape(j.Read(), NQ, 2, 2, NE);
 
  679   auto coeff = 
Reshape(coeff_.Read(), NQ, NE);
 
  680   auto y = 
Reshape(op.Write(), NQ, NE);
 
  683      for (
int q = 0; q < NQ; ++q)
 
  685         const real_t J11 = J(q,0,0,e);
 
  686         const real_t J21 = J(q,1,0,e);
 
  687         const real_t J12 = J(q,0,1,e);
 
  688         const real_t J22 = J(q,1,1,e);
 
  689         const real_t detJ = (J11*J22)-(J21*J12);
 
  690         y(q,e) = W[q] * coeff(q,e) / detJ;
 
  695void PADivDivSetup3D(
const int Q1D,
 
  697                     const Array<real_t> &w,
 
  702   const int NQ = Q1D*Q1D*Q1D;
 
  704   auto J = 
Reshape(j.Read(), NQ, 3, 3, NE);
 
  705   auto coeff = 
Reshape(coeff_.Read(), NQ, NE);
 
  706   auto y = 
Reshape(op.Write(), NQ, NE);
 
  710      for (
int q = 0; q < NQ; ++q)
 
  712         const real_t J11 = J(q,0,0,e);
 
  713         const real_t J21 = J(q,1,0,e);
 
  714         const real_t J31 = J(q,2,0,e);
 
  715         const real_t J12 = J(q,0,1,e);
 
  716         const real_t J22 = J(q,1,1,e);
 
  717         const real_t J32 = J(q,2,1,e);
 
  718         const real_t J13 = J(q,0,2,e);
 
  719         const real_t J23 = J(q,1,2,e);
 
  720         const real_t J33 = J(q,2,2,e);
 
  721         const real_t detJ = J11 * (J22 * J33 - J32 * J23) -
 
  722                             J21 * (J12 * J33 - J32 * J13) +
 
  723                             J31 * (J12 * J23 - J22 * J13);
 
  724         y(q,e) = W[q] * coeff(q, e) / detJ;
 
  729void PADivDivAssembleDiagonal2D(
const int D1D,
 
  732                                const Array<real_t> &Bo_,
 
  733                                const Array<real_t> &Gc_,
 
  737   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  738   auto Gc = 
Reshape(Gc_.Read(), Q1D, D1D);
 
  739   auto op = 
Reshape(op_.Read(), Q1D, Q1D, NE);
 
  740   auto diag = 
Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE);
 
  744      constexpr static int VDIM = 2;
 
  745      constexpr static int MAX_Q1D = DofQuadLimits::HDIV_MAX_Q1D;
 
  749      for (
int c = 0; c < VDIM; ++c)  
 
  751         const int D1Dx = (c == 1) ? D1D - 1 : D1D;
 
  752         const int D1Dy = (c == 0) ? D1D - 1 : D1D;
 
  756         for (
int dy = 0; dy < D1Dy; ++dy)
 
  758            for (
int qx = 0; qx < Q1D; ++qx)
 
  761               for (
int qy = 0; qy < Q1D; ++qy)
 
  763                  const real_t wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy);
 
  764                  div[qx] += wy * wy * op(qx,qy,e);
 
  768            for (
int dx = 0; dx < D1Dx; ++dx)
 
  771               for (
int qx = 0; qx < Q1D; ++qx)
 
  773                  const real_t wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx);
 
  774                  val += div[qx] * wx * wx;
 
  776               diag(dx + (dy * D1Dx) + osc, e) += val;
 
  785void PADivDivAssembleDiagonal3D(
const int D1D,
 
  788                                const Array<real_t> &Bo_,
 
  789                                const Array<real_t> &Gc_,
 
  794               "Error: D1D > HDIV_MAX_D1D");
 
  796               "Error: Q1D > HDIV_MAX_Q1D");
 
  797   constexpr static int VDIM = 3;
 
  799   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  800   auto Gc = 
Reshape(Gc_.Read(), Q1D, D1D);
 
  801   auto op = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
 
  802   auto diag = 
Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
  808      for (
int c = 0; c < VDIM; ++c)  
 
  810         const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
  811         const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
  812         const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
  814         for (
int dz = 0; dz < D1Dz; ++dz)
 
  816            for (
int dy = 0; dy < D1Dy; ++dy)
 
  818               real_t a[DofQuadLimits::HDIV_MAX_Q1D];
 
  820               for (
int qx = 0; qx < Q1D; ++qx)
 
  823                  for (
int qy = 0; qy < Q1D; ++qy)
 
  825                     const real_t wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
 
  827                     for (
int qz = 0; qz < Q1D; ++qz)
 
  829                        const real_t wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
 
  830                        a[qx] += wy * wy * wz * wz * op(qx,qy,qz,e);
 
  835               for (
int dx = 0; dx < D1Dx; ++dx)
 
  838                  for (
int qx = 0; qx < Q1D; ++qx)
 
  840                     const real_t wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx);
 
  841                     val += 
a[qx] * wx * wx;
 
  843                  diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val;
 
  848         osc += D1Dx * D1Dy * D1Dz;
 
  853void PADivDivApply2D(
const int D1D,
 
  856                     const Array<real_t> &Bo_,
 
  857                     const Array<real_t> &Gc_,
 
  858                     const Array<real_t> &Bot_,
 
  859                     const Array<real_t> &Gct_,
 
  864   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  865   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
  866   auto Gc = 
Reshape(Gc_.Read(), Q1D, D1D);
 
  867   auto Gct = 
Reshape(Gct_.Read(), D1D, Q1D);
 
  868   auto op = 
Reshape(op_.Read(), Q1D, Q1D, NE);
 
  869   auto x = 
Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
 
  870   auto y = 
Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
 
  874      constexpr static int VDIM = 2;
 
  875      constexpr static int MAX_D1D = DofQuadLimits::HDIV_MAX_D1D;
 
  876      constexpr static int MAX_Q1D = DofQuadLimits::HDIV_MAX_Q1D;
 
  878      real_t div[MAX_Q1D][MAX_Q1D];
 
  882      for (
int qy = 0; qy < Q1D; ++qy)
 
  884         for (
int qx = 0; qx < Q1D; ++qx)
 
  892      for (
int c = 0; c < VDIM; ++c)  
 
  894         const int D1Dx = (c == 1) ? D1D - 1 : D1D;
 
  895         const int D1Dy = (c == 0) ? D1D - 1 : D1D;
 
  897         for (
int dy = 0; dy < D1Dy; ++dy)
 
  900            for (
int qx = 0; qx < Q1D; ++qx)
 
  905            for (
int dx = 0; dx < D1Dx; ++dx)
 
  907               const real_t t = x(dx + (dy * D1Dx) + osc, e);
 
  908               for (
int qx = 0; qx < Q1D; ++qx)
 
  910                  gradX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
 
  914            for (
int qy = 0; qy < Q1D; ++qy)
 
  916               const real_t wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy);
 
  917               for (
int qx = 0; qx < Q1D; ++qx)
 
  919                  div[qy][qx] += gradX[qx] * wy;
 
  928      for (
int qy = 0; qy < Q1D; ++qy)
 
  930         for (
int qx = 0; qx < Q1D; ++qx)
 
  932            div[qy][qx] *= op(qx,qy,e);
 
  936      for (
int qy = 0; qy < Q1D; ++qy)
 
  940         for (
int c = 0; c < VDIM; ++c)  
 
  942            const int D1Dx = (c == 1) ? D1D - 1 : D1D;
 
  943            const int D1Dy = (c == 0) ? D1D - 1 : D1D;
 
  946            for (
int dx = 0; dx < D1Dx; ++dx)
 
  950            for (
int qx = 0; qx < Q1D; ++qx)
 
  952               for (
int dx = 0; dx < D1Dx; ++dx)
 
  954                  gradX[dx] += div[qy][qx] * (c == 0 ? Gct(dx,qx) : Bot(dx,qx));
 
  957            for (
int dy = 0; dy < D1Dy; ++dy)
 
  959               const real_t wy = (c == 0) ? Bot(dy,qy) : Gct(dy,qy);
 
  960               for (
int dx = 0; dx < D1Dx; ++dx)
 
  962                  y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
 
  972void PADivDivApply3D(
const int D1D,
 
  975                     const Array<real_t> &Bo_,
 
  976                     const Array<real_t> &Gc_,
 
  977                     const Array<real_t> &Bot_,
 
  978                     const Array<real_t> &Gct_,
 
  984               "Error: D1D > HDIV_MAX_D1D");
 
  986               "Error: Q1D > HDIV_MAX_Q1D");
 
  987   constexpr static int VDIM = 3;
 
  989   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  990   auto Gc = 
Reshape(Gc_.Read(), Q1D, D1D);
 
  991   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
  992   auto Gct = 
Reshape(Gct_.Read(), D1D, Q1D);
 
  993   auto op = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
 
  994   auto x = 
Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
  995   auto y = 
Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
  999      real_t div[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1001      for (
int qz = 0; qz < Q1D; ++qz)
 
 1003         for (
int qy = 0; qy < Q1D; ++qy)
 
 1005            for (
int qx = 0; qx < Q1D; ++qx)
 
 1007               div[qz][qy][qx] = 0.0;
 
 1014      for (
int c = 0; c < VDIM; ++c)  
 
 1016         const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
 1017         const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1018         const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1020         for (
int dz = 0; dz < D1Dz; ++dz)
 
 1022            real_t aXY[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1023            for (
int qy = 0; qy < Q1D; ++qy)
 
 1025               for (
int qx = 0; qx < Q1D; ++qx)
 
 1031            for (
int dy = 0; dy < D1Dy; ++dy)
 
 1033               real_t aX[DofQuadLimits::HDIV_MAX_Q1D];
 
 1034               for (
int qx = 0; qx < Q1D; ++qx)
 
 1039               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1041                  const real_t t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
 
 1042                  for (
int qx = 0; qx < Q1D; ++qx)
 
 1044                     aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
 
 1048               for (
int qy = 0; qy < Q1D; ++qy)
 
 1050                  const real_t wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
 
 1051                  for (
int qx = 0; qx < Q1D; ++qx)
 
 1053                     const real_t wx = aX[qx];
 
 1054                     aXY[qy][qx] += wx * wy;
 
 1059            for (
int qz = 0; qz < Q1D; ++qz)
 
 1061               const real_t wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
 
 1062               for (
int qy = 0; qy < Q1D; ++qy)
 
 1064                  for (
int qx = 0; qx < Q1D; ++qx)
 
 1066                     div[qz][qy][qx] += aXY[qy][qx] * wz;
 
 1072         osc += D1Dx * D1Dy * D1Dz;
 
 1076      for (
int qz = 0; qz < Q1D; ++qz)
 
 1078         for (
int qy = 0; qy < Q1D; ++qy)
 
 1080            for (
int qx = 0; qx < Q1D; ++qx)
 
 1082               div[qz][qy][qx] *= op(qx,qy,qz,e);
 
 1087      for (
int qz = 0; qz < Q1D; ++qz)
 
 1089         real_t aXY[DofQuadLimits::HDIV_MAX_D1D][DofQuadLimits::HDIV_MAX_D1D];
 
 1093         for (
int c = 0; c < VDIM; ++c)  
 
 1095            const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
 1096            const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1097            const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1099            for (
int dy = 0; dy < D1Dy; ++dy)
 
 1101               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1106            for (
int qy = 0; qy < Q1D; ++qy)
 
 1108               real_t aX[DofQuadLimits::HDIV_MAX_D1D];
 
 1109               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1113               for (
int qx = 0; qx < Q1D; ++qx)
 
 1115                  for (
int dx = 0; dx < D1Dx; ++dx)
 
 1117                     aX[dx] += div[qz][qy][qx] *
 
 1118                               (c == 0 ? Gct(dx,qx) : Bot(dx,qx));
 
 1121               for (
int dy = 0; dy < D1Dy; ++dy)
 
 1123                  const real_t wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
 
 1124                  for (
int dx = 0; dx < D1Dx; ++dx)
 
 1126                     aXY[dy][dx] += aX[dx] * wy;
 
 1131            for (
int dz = 0; dz < D1Dz; ++dz)
 
 1133               const real_t wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
 
 1134               for (
int dy = 0; dy < D1Dy; ++dy)
 
 1136                  for (
int dx = 0; dx < D1Dx; ++dx)
 
 1138                     y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
 
 1144            osc += D1Dx * D1Dy * D1Dz;
 
 1150void PAHdivL2Setup2D(
const int Q1D,
 
 1152                     const Array<real_t> &w,
 
 1156   const int NQ = Q1D*Q1D;
 
 1158   auto coeff = 
Reshape(coeff_.Read(), NQ, NE);
 
 1159   auto y = 
Reshape(op.Write(), NQ, NE);
 
 1162      for (
int q = 0; q < NQ; ++q)
 
 1164         y(q,e) = W[q] * coeff(q,e);
 
 1169void PAHdivL2Setup3D(
const int Q1D,
 
 1171                     const Array<real_t> &w,
 
 1175   const int NQ = Q1D*Q1D*Q1D;
 
 1177   auto coeff = 
Reshape(coeff_.Read(), NQ, NE);
 
 1178   auto y = 
Reshape(op.Write(), NQ, NE);
 
 1182      for (
int q = 0; q < NQ; ++q)
 
 1184         y(q,e) = W[q] * coeff(q, e);
 
 1189void PAHdivL2AssembleDiagonal_ADAt_2D(
const int D1D,
 
 1193                                      const Array<real_t> &L2Bo_,
 
 1194                                      const Array<real_t> &Gct_,
 
 1195                                      const Array<real_t> &Bot_,
 
 1200   constexpr static int VDIM = 2;
 
 1202   auto L2Bo = 
Reshape(L2Bo_.Read(), Q1D, L2D1D);
 
 1203   auto Gct = 
Reshape(Gct_.Read(), D1D, Q1D);
 
 1204   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
 1205   auto op = 
Reshape(op_.Read(), Q1D, Q1D, NE);
 
 1206   auto D = 
Reshape(D_.Read(), 2*(D1D-1)*D1D, NE);
 
 1207   auto diag = 
Reshape(diag_.ReadWrite(), L2D1D, L2D1D, NE);
 
 1211      for (
int ry = 0; ry < L2D1D; ++ry)
 
 1213         for (
int rx = 0; rx < L2D1D; ++rx)
 
 1218            real_t row[2*DofQuadLimits::HDIV_MAX_D1D*(DofQuadLimits::HDIV_MAX_D1D-1)];
 
 1219            real_t div[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1221            for (
int i=0; i<2*D1D*(D1D - 1); ++i)
 
 1226            for (
int qy = 0; qy < Q1D; ++qy)
 
 1228               for (
int qx = 0; qx < Q1D; ++qx)
 
 1230                  div[qy][qx] = op(qx,qy,e) * L2Bo(qx,rx) * L2Bo(qy,ry);
 
 1234            for (
int qy = 0; qy < Q1D; ++qy)
 
 1237               for (
int c = 0; c < VDIM; ++c)  
 
 1239                  const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1240                  const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1242                  real_t aX[DofQuadLimits::HDIV_MAX_D1D];
 
 1243                  for (
int dx = 0; dx < D1Dx; ++dx)
 
 1247                  for (
int qx = 0; qx < Q1D; ++qx)
 
 1249                     for (
int dx = 0; dx < D1Dx; ++dx)
 
 1251                        aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) :
 
 1256                  for (
int dy = 0; dy < D1Dy; ++dy)
 
 1258                     const real_t wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
 
 1260                     for (
int dx = 0; dx < D1Dx; ++dx)
 
 1262                        row[dx + (dy * D1Dx) + osc] += aX[dx] * wy;
 
 1271            for (
int i=0; i<2*D1D*(D1D - 1); ++i)
 
 1273               val += row[i] * row[i] * D(i,e);
 
 1275            diag(rx,ry,e) += val;
 
 1281void PAHdivL2AssembleDiagonal_ADAt_3D(
const int D1D,
 
 1285                                      const Array<real_t> &L2Bo_,
 
 1286                                      const Array<real_t> &Gct_,
 
 1287                                      const Array<real_t> &Bot_,
 
 1293               "Error: D1D > HDIV_MAX_D1D");
 
 1295               "Error: Q1D > HDIV_MAX_Q1D");
 
 1296   constexpr static int VDIM = 3;
 
 1298   auto L2Bo = 
Reshape(L2Bo_.Read(), Q1D, L2D1D);
 
 1299   auto Gct = 
Reshape(Gct_.Read(), D1D, Q1D);
 
 1300   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
 1301   auto op = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
 
 1302   auto D = 
Reshape(D_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
 1303   auto diag = 
Reshape(diag_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE);
 
 1307      for (
int rz = 0; rz < L2D1D; ++rz)
 
 1309         for (
int ry = 0; ry < L2D1D; ++ry)
 
 1311            for (
int rx = 0; rx < L2D1D; ++rx)
 
 1316               real_t row[3*DofQuadLimits::HDIV_MAX_D1D*(DofQuadLimits::HDIV_MAX_D1D-1)*
 
 1317                          (DofQuadLimits::HDIV_MAX_D1D-1)];
 
 1318               real_t div[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1320               for (
int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i)
 
 1325               for (
int qz = 0; qz < Q1D; ++qz)
 
 1327                  for (
int qy = 0; qy < Q1D; ++qy)
 
 1329                     for (
int qx = 0; qx < Q1D; ++qx)
 
 1331                        div[qz][qy][qx] = op(qx,qy,qz,e) * L2Bo(qx,rx) *
 
 1332                                          L2Bo(qy,ry) * L2Bo(qz,rz);
 
 1337               for (
int qz = 0; qz < Q1D; ++qz)
 
 1339                  real_t aXY[DofQuadLimits::HDIV_MAX_D1D][DofQuadLimits::HDIV_MAX_D1D];
 
 1342                  for (
int c = 0; c < VDIM; ++c)  
 
 1344                     const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
 1345                     const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1346                     const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1348                     for (
int dy = 0; dy < D1Dy; ++dy)
 
 1350                        for (
int dx = 0; dx < D1Dx; ++dx)
 
 1355                     for (
int qy = 0; qy < Q1D; ++qy)
 
 1357                        real_t aX[DofQuadLimits::HDIV_MAX_D1D];
 
 1358                        for (
int dx = 0; dx < D1Dx; ++dx)
 
 1362                        for (
int qx = 0; qx < Q1D; ++qx)
 
 1364                           for (
int dx = 0; dx < D1Dx; ++dx)
 
 1366                              aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx)
 
 1370                        for (
int dy = 0; dy < D1Dy; ++dy)
 
 1372                           const real_t wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
 
 1373                           for (
int dx = 0; dx < D1Dx; ++dx)
 
 1375                              aXY[dy][dx] += aX[dx] * wy;
 
 1380                     for (
int dz = 0; dz < D1Dz; ++dz)
 
 1382                        const real_t wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
 
 1383                        for (
int dy = 0; dy < D1Dy; ++dy)
 
 1385                           for (
int dx = 0; dx < D1Dx; ++dx)
 
 1387                              row[dx + ((dy + (dz * D1Dy)) * D1Dx) + osc] +=
 
 1393                     osc += D1Dx * D1Dy * D1Dz;
 
 1398               for (
int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i)
 
 1400                  val += row[i] * row[i] * D(i,e);
 
 1402               diag(rx,ry,rz,e) += val;
 
 1411void PAHdivL2Apply2D(
const int D1D,
 
 1415                     const Array<real_t> &Bo_,
 
 1416                     const Array<real_t> &Gc_,
 
 1417                     const Array<real_t> &L2Bot_,
 
 1422   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
 1423   auto Gc = 
Reshape(Gc_.Read(), Q1D, D1D);
 
 1424   auto L2Bot = 
Reshape(L2Bot_.Read(), L2D1D, Q1D);
 
 1425   auto op = 
Reshape(op_.Read(), Q1D, Q1D, NE);
 
 1426   auto x = 
Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
 
 1427   auto y = 
Reshape(y_.ReadWrite(), L2D1D, L2D1D, NE);
 
 1431      constexpr static int VDIM = 2;
 
 1432      constexpr static int MAX_D1D = DofQuadLimits::HDIV_MAX_D1D;
 
 1433      constexpr static int MAX_Q1D = DofQuadLimits::HDIV_MAX_Q1D;
 
 1435      real_t div[MAX_Q1D][MAX_Q1D];
 
 1437      for (
int qy = 0; qy < Q1D; ++qy)
 
 1439         for (
int qx = 0; qx < Q1D; ++qx)
 
 1447      for (
int c = 0; c < VDIM; ++c)  
 
 1449         const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1450         const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1452         for (
int dy = 0; dy < D1Dy; ++dy)
 
 1455            for (
int qx = 0; qx < Q1D; ++qx)
 
 1460            for (
int dx = 0; dx < D1Dx; ++dx)
 
 1462               const real_t t = x(dx + (dy * D1Dx) + osc, e);
 
 1463               for (
int qx = 0; qx < Q1D; ++qx)
 
 1465                  aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
 
 1469            for (
int qy = 0; qy < Q1D; ++qy)
 
 1471               const real_t wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
 
 1472               for (
int qx = 0; qx < Q1D; ++qx)
 
 1474                  div[qy][qx] += aX[qx] * wy;
 
 1483      for (
int qy = 0; qy < Q1D; ++qy)
 
 1485         for (
int qx = 0; qx < Q1D; ++qx)
 
 1487            div[qy][qx] *= op(qx,qy,e);
 
 1491      for (
int qy = 0; qy < Q1D; ++qy)
 
 1494         for (
int dx = 0; dx < L2D1D; ++dx)
 
 1498         for (
int qx = 0; qx < Q1D; ++qx)
 
 1500            for (
int dx = 0; dx < L2D1D; ++dx)
 
 1502               aX[dx] += div[qy][qx] * L2Bot(dx,qx);
 
 1505         for (
int dy = 0; dy < L2D1D; ++dy)
 
 1507            const real_t wy = L2Bot(dy,qy);
 
 1508            for (
int dx = 0; dx < L2D1D; ++dx)
 
 1510               y(dx,dy,e) += aX[dx] * wy;
 
 1517void PAHdivL2ApplyTranspose2D(
const int D1D,
 
 1521                              const Array<real_t> &L2Bo_,
 
 1522                              const Array<real_t> &Gct_,
 
 1523                              const Array<real_t> &Bot_,
 
 1528   auto L2Bo = 
Reshape(L2Bo_.Read(), Q1D, L2D1D);
 
 1529   auto Gct = 
Reshape(Gct_.Read(), D1D, Q1D);
 
 1530   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
 1531   auto op = 
Reshape(op_.Read(), Q1D, Q1D, NE);
 
 1532   auto x = 
Reshape(x_.Read(), L2D1D, L2D1D, NE);
 
 1533   auto y = 
Reshape(y_.ReadWrite(), 2*(D1D-1)*D1D, NE);
 
 1537      constexpr static int VDIM = 2;
 
 1538      constexpr static int MAX_D1D = DofQuadLimits::HDIV_MAX_D1D;
 
 1539      constexpr static int MAX_Q1D = DofQuadLimits::HDIV_MAX_Q1D;
 
 1541      real_t div[MAX_Q1D][MAX_Q1D];
 
 1543      for (
int qy = 0; qy < Q1D; ++qy)
 
 1545         for (
int qx = 0; qx < Q1D; ++qx)
 
 1551      for (
int dy = 0; dy < L2D1D; ++dy)
 
 1554         for (
int qx = 0; qx < Q1D; ++qx)
 
 1559         for (
int dx = 0; dx < L2D1D; ++dx)
 
 1561            const real_t t = x(dx,dy,e);
 
 1562            for (
int qx = 0; qx < Q1D; ++qx)
 
 1564               aX[qx] += t * L2Bo(qx,dx);
 
 1568         for (
int qy = 0; qy < Q1D; ++qy)
 
 1570            const real_t wy = L2Bo(qy,dy);
 
 1571            for (
int qx = 0; qx < Q1D; ++qx)
 
 1573               div[qy][qx] += aX[qx] * wy;
 
 1579      for (
int qy = 0; qy < Q1D; ++qy)
 
 1581         for (
int qx = 0; qx < Q1D; ++qx)
 
 1583            div[qy][qx] *= op(qx,qy,e);
 
 1587      for (
int qy = 0; qy < Q1D; ++qy)
 
 1592         for (
int c = 0; c < VDIM; ++c)  
 
 1594            const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1595            const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1597            for (
int dx = 0; dx < D1Dx; ++dx)
 
 1601            for (
int qx = 0; qx < Q1D; ++qx)
 
 1603               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1605                  aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) : Bot(dx,qx));
 
 1608            for (
int dy = 0; dy < D1Dy; ++dy)
 
 1610               const real_t wy = (c == 0) ? Bot(dy,qy) : Gct(dy,qy);
 
 1611               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1613                  y(dx + (dy * D1Dx) + osc, e) += aX[dx] * wy;
 
 1625void PAHdivL2Apply3D(
const int D1D,
 
 1629                     const Array<real_t> &Bo_,
 
 1630                     const Array<real_t> &Gc_,
 
 1631                     const Array<real_t> &L2Bot_,
 
 1637               "Error: D1D > HDIV_MAX_D1D");
 
 1639               "Error: Q1D > HDIV_MAX_Q1D");
 
 1640   constexpr static int VDIM = 3;
 
 1642   auto Bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
 1643   auto Gc = 
Reshape(Gc_.Read(), Q1D, D1D);
 
 1644   auto L2Bot = 
Reshape(L2Bot_.Read(), L2D1D, Q1D);
 
 1645   auto op = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
 
 1646   auto x = 
Reshape(x_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
 1647   auto y = 
Reshape(y_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE);
 
 1651      real_t div[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1653      for (
int qz = 0; qz < Q1D; ++qz)
 
 1655         for (
int qy = 0; qy < Q1D; ++qy)
 
 1657            for (
int qx = 0; qx < Q1D; ++qx)
 
 1659               div[qz][qy][qx] = 0.0;
 
 1666      for (
int c = 0; c < VDIM; ++c)  
 
 1668         const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
 1669         const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1670         const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1672         for (
int dz = 0; dz < D1Dz; ++dz)
 
 1674            real_t aXY[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1675            for (
int qy = 0; qy < Q1D; ++qy)
 
 1677               for (
int qx = 0; qx < Q1D; ++qx)
 
 1683            for (
int dy = 0; dy < D1Dy; ++dy)
 
 1685               real_t aX[DofQuadLimits::HDIV_MAX_Q1D];
 
 1686               for (
int qx = 0; qx < Q1D; ++qx)
 
 1691               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1693                  const real_t t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
 
 1694                  for (
int qx = 0; qx < Q1D; ++qx)
 
 1696                     aX[qx] += t * ((c == 0) ? Gc(qx,dx) : Bo(qx,dx));
 
 1700               for (
int qy = 0; qy < Q1D; ++qy)
 
 1702                  const real_t wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy);
 
 1703                  for (
int qx = 0; qx < Q1D; ++qx)
 
 1705                     aXY[qy][qx] += aX[qx] * wy;
 
 1710            for (
int qz = 0; qz < Q1D; ++qz)
 
 1712               const real_t wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz);
 
 1713               for (
int qy = 0; qy < Q1D; ++qy)
 
 1715                  for (
int qx = 0; qx < Q1D; ++qx)
 
 1717                     div[qz][qy][qx] += aXY[qy][qx] * wz;
 
 1723         osc += D1Dx * D1Dy * D1Dz;
 
 1727      for (
int qz = 0; qz < Q1D; ++qz)
 
 1729         for (
int qy = 0; qy < Q1D; ++qy)
 
 1731            for (
int qx = 0; qx < Q1D; ++qx)
 
 1733               div[qz][qy][qx] *= op(qx,qy,qz,e);
 
 1738      for (
int qz = 0; qz < Q1D; ++qz)
 
 1740         real_t aXY[DofQuadLimits::HDIV_MAX_D1D][DofQuadLimits::HDIV_MAX_D1D];
 
 1742         for (
int dy = 0; dy < L2D1D; ++dy)
 
 1744            for (
int dx = 0; dx < L2D1D; ++dx)
 
 1749         for (
int qy = 0; qy < Q1D; ++qy)
 
 1751            real_t aX[DofQuadLimits::HDIV_MAX_D1D];
 
 1752            for (
int dx = 0; dx < L2D1D; ++dx)
 
 1756            for (
int qx = 0; qx < Q1D; ++qx)
 
 1758               for (
int dx = 0; dx < L2D1D; ++dx)
 
 1760                  aX[dx] += div[qz][qy][qx] * L2Bot(dx,qx);
 
 1763            for (
int dy = 0; dy < L2D1D; ++dy)
 
 1765               const real_t wy = L2Bot(dy,qy);
 
 1766               for (
int dx = 0; dx < L2D1D; ++dx)
 
 1768                  aXY[dy][dx] += aX[dx] * wy;
 
 1773         for (
int dz = 0; dz < L2D1D; ++dz)
 
 1775            const real_t wz = L2Bot(dz,qz);
 
 1776            for (
int dy = 0; dy < L2D1D; ++dy)
 
 1778               for (
int dx = 0; dx < L2D1D; ++dx)
 
 1780                  y(dx,dy,dz,e) += aXY[dy][dx] * wz;
 
 1788void PAHdivL2ApplyTranspose3D(
const int D1D,
 
 1792                              const Array<real_t> &L2Bo_,
 
 1793                              const Array<real_t> &Gct_,
 
 1794                              const Array<real_t> &Bot_,
 
 1800               "Error: D1D > HDIV_MAX_D1D");
 
 1802               "Error: Q1D > HDIV_MAX_Q1D");
 
 1803   constexpr static int VDIM = 3;
 
 1805   auto L2Bo = 
Reshape(L2Bo_.Read(), Q1D, L2D1D);
 
 1806   auto Gct = 
Reshape(Gct_.Read(), D1D, Q1D);
 
 1807   auto Bot = 
Reshape(Bot_.Read(), D1D-1, Q1D);
 
 1808   auto op = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, NE);
 
 1809   auto x = 
Reshape(x_.Read(), L2D1D, L2D1D, L2D1D, NE);
 
 1810   auto y = 
Reshape(y_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE);
 
 1814      real_t div[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1816      for (
int qz = 0; qz < Q1D; ++qz)
 
 1818         for (
int qy = 0; qy < Q1D; ++qy)
 
 1820            for (
int qx = 0; qx < Q1D; ++qx)
 
 1822               div[qz][qy][qx] = 0.0;
 
 1827      for (
int dz = 0; dz < L2D1D; ++dz)
 
 1829         real_t aXY[DofQuadLimits::HDIV_MAX_Q1D][DofQuadLimits::HDIV_MAX_Q1D];
 
 1830         for (
int qy = 0; qy < Q1D; ++qy)
 
 1832            for (
int qx = 0; qx < Q1D; ++qx)
 
 1838         for (
int dy = 0; dy < L2D1D; ++dy)
 
 1840            real_t aX[DofQuadLimits::HDIV_MAX_Q1D];
 
 1841            for (
int qx = 0; qx < Q1D; ++qx)
 
 1846            for (
int dx = 0; dx < L2D1D; ++dx)
 
 1848               const real_t t = x(dx,dy,dz,e);
 
 1849               for (
int qx = 0; qx < Q1D; ++qx)
 
 1851                  aX[qx] += t * L2Bo(qx,dx);
 
 1855            for (
int qy = 0; qy < Q1D; ++qy)
 
 1857               const real_t wy = L2Bo(qy,dy);
 
 1858               for (
int qx = 0; qx < Q1D; ++qx)
 
 1860                  aXY[qy][qx] += aX[qx] * wy;
 
 1865         for (
int qz = 0; qz < Q1D; ++qz)
 
 1867            const real_t wz = L2Bo(qz,dz);
 
 1868            for (
int qy = 0; qy < Q1D; ++qy)
 
 1870               for (
int qx = 0; qx < Q1D; ++qx)
 
 1872                  div[qz][qy][qx] += aXY[qy][qx] * wz;
 
 1879      for (
int qz = 0; qz < Q1D; ++qz)
 
 1881         for (
int qy = 0; qy < Q1D; ++qy)
 
 1883            for (
int qx = 0; qx < Q1D; ++qx)
 
 1885               div[qz][qy][qx] *= op(qx,qy,qz,e);
 
 1890      for (
int qz = 0; qz < Q1D; ++qz)
 
 1892         real_t aXY[DofQuadLimits::HDIV_MAX_D1D][DofQuadLimits::HDIV_MAX_D1D];
 
 1895         for (
int c = 0; c < VDIM; ++c)  
 
 1897            const int D1Dz = (c == 2) ? D1D : D1D - 1;
 
 1898            const int D1Dy = (c == 1) ? D1D : D1D - 1;
 
 1899            const int D1Dx = (c == 0) ? D1D : D1D - 1;
 
 1901            for (
int dy = 0; dy < D1Dy; ++dy)
 
 1903               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1908            for (
int qy = 0; qy < Q1D; ++qy)
 
 1910               real_t aX[DofQuadLimits::HDIV_MAX_D1D];
 
 1911               for (
int dx = 0; dx < D1Dx; ++dx)
 
 1915               for (
int qx = 0; qx < Q1D; ++qx)
 
 1917                  for (
int dx = 0; dx < D1Dx; ++dx)
 
 1919                     aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx) :
 
 1923               for (
int dy = 0; dy < D1Dy; ++dy)
 
 1925                  const real_t wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy);
 
 1926                  for (
int dx = 0; dx < D1Dx; ++dx)
 
 1928                     aXY[dy][dx] += aX[dx] * wy;
 
 1933            for (
int dz = 0; dz < D1Dz; ++dz)
 
 1935               const real_t wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz);
 
 1936               for (
int dy = 0; dy < D1Dy; ++dy)
 
 1938                  for (
int dx = 0; dx < D1Dx; ++dx)
 
 1940                     y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
 
 1946            osc += D1Dx * D1Dy * D1Dz;
 
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.