22               "PA Only supports Ordering::byNODES!");
 
   52      MFEM_VERIFY(cQ != NULL, 
"only ConstantCoefficient is supported!");
 
   60      MFEM_ABORT(
"dim==1 not supported!");
 
   68         for (
int q = 0; q < NQ; ++q)
 
   70            const real_t J11 = J(q, 0, 0, e);
 
   71            const real_t J12 = J(q, 0, 1, e);
 
   72            const real_t J21 = J(q, 1, 0, e);
 
   73            const real_t J22 = J(q, 1, 1, e);
 
   75            G(q, 0, 0, e) = W[q] * COEFF * J22;  
 
   76            G(q, 0, 1, e) = W[q] * COEFF * -J12; 
 
   77            G(q, 1, 0, e) = W[q] * COEFF * -J21; 
 
   78            G(q, 1, 1, e) = W[q] * COEFF * J11;  
 
   88         for (
int q = 0; q < NQ; ++q)
 
   90            const real_t J11 = J(q, 0, 0, e);
 
   91            const real_t J21 = J(q, 1, 0, e);
 
   92            const real_t J31 = J(q, 2, 0, e);
 
   93            const real_t J12 = J(q, 0, 1, e);
 
   94            const real_t J22 = J(q, 1, 1, e);
 
   95            const real_t J32 = J(q, 2, 1, e);
 
   96            const real_t J13 = J(q, 0, 2, e);
 
   97            const real_t J23 = J(q, 1, 2, e);
 
   98            const real_t J33 = J(q, 2, 2, e);
 
   99            const real_t cw = W[q] * COEFF;
 
  101            const real_t A11 = (J22 * J33) - (J23 * J32);
 
  102            const real_t A12 = (J32 * J13) - (J12 * J33);
 
  103            const real_t A13 = (J12 * J23) - (J22 * J13);
 
  104            const real_t A21 = (J31 * J23) - (J21 * J33);
 
  105            const real_t A22 = (J11 * J33) - (J13 * J31);
 
  106            const real_t A23 = (J21 * J13) - (J11 * J23);
 
  107            const real_t A31 = (J21 * J32) - (J31 * J22);
 
  108            const real_t A32 = (J31 * J12) - (J11 * J32);
 
  109            const real_t A33 = (J11 * J22) - (J12 * J21);
 
  111            G(q, 0, 0, e) = cw * A11; 
 
  112            G(q, 0, 1, e) = cw * A12; 
 
  113            G(q, 0, 2, e) = cw * A13; 
 
  114            G(q, 1, 0, e) = cw * A21; 
 
  115            G(q, 1, 1, e) = cw * A22; 
 
  116            G(q, 1, 2, e) = cw * A23; 
 
  117            G(q, 2, 0, e) = cw * A31; 
 
  118            G(q, 2, 1, e) = cw * A32; 
 
  119            G(q, 2, 2, e) = cw * A33; 
 
 
  126template<
int T_D1D = 0, 
int T_Q1D = 0>
 
  127static void PAConvectionNLApply2D(
const int NE,
 
  137   const int D1D = T_D1D ? T_D1D : d1d;
 
  138   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  141   auto B = 
Reshape(
b.Read(), Q1D, D1D);
 
  149      const int D1D = T_D1D ? T_D1D : d1d;
 
  150      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  151      constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
  152      constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
 
  154      real_t data[max_Q1D][max_Q1D][2];
 
  155      real_t grad0[max_Q1D][max_Q1D][2];
 
  156      real_t grad1[max_Q1D][max_Q1D][2];
 
  157      real_t Z[max_Q1D][max_Q1D][2];
 
  158      for (
int qy = 0; qy < Q1D; ++qy)
 
  160         for (
int qx = 0; qx < Q1D; ++qx)
 
  162            data[qy][qx][0] = 0.0;
 
  163            data[qy][qx][1] = 0.0;
 
  164            grad0[qy][qx][0] = 0.0;
 
  165            grad0[qy][qx][1] = 0.0;
 
  166            grad1[qy][qx][0] = 0.0;
 
  167            grad1[qy][qx][1] = 0.0;
 
  170      for (
int dy = 0; dy < D1D; ++dy)
 
  173         real_t gradX0[max_Q1D][2];
 
  174         real_t gradX1[max_Q1D][2];
 
  175         for (
int qx = 0; qx < Q1D; ++qx)
 
  184         for (
int dx = 0; dx < D1D; ++dx)
 
  186            const real_t s0 = x(dx, dy, 0, e);
 
  187            const real_t s1 = x(dx, dy, 1, e);
 
  188            for (
int qx = 0; qx < Q1D; ++qx)
 
  190               const real_t Bx = B(qx, dx);
 
  191               const real_t Gx = G(qx, dx);
 
  192               dataX[qx][0] += s0 * Bx;
 
  193               dataX[qx][1] += s1 * Bx;
 
  194               gradX0[qx][0] += s0 * Gx;
 
  195               gradX0[qx][1] += s0 * Bx;
 
  196               gradX1[qx][0] += s1 * Gx;
 
  197               gradX1[qx][1] += s1 * Bx;
 
  200         for (
int qy = 0; qy < Q1D; ++qy)
 
  202            const real_t By = B(qy, dy);
 
  203            const real_t Gy = G(qy, dy);
 
  204            for (
int qx = 0; qx < Q1D; ++qx)
 
  206               data[qy][qx][0] += dataX[qx][0] * By;
 
  207               data[qy][qx][1] += dataX[qx][1] * By;
 
  208               grad0[qy][qx][0] += gradX0[qx][0] * By;
 
  209               grad0[qy][qx][1] += gradX0[qx][1] * Gy;
 
  210               grad1[qy][qx][0] += gradX1[qx][0] * By;
 
  211               grad1[qy][qx][1] += gradX1[qx][1] * Gy;
 
  215      for (
int qy = 0; qy < Q1D; ++qy)
 
  217         for (
int qx = 0; qx < Q1D; ++qx)
 
  219            const int q = qx + qy * Q1D;
 
  220            const real_t u1 = data[qy][qx][0];
 
  221            const real_t u2 = data[qy][qx][1];
 
  222            const real_t grad00 = grad0[qy][qx][0];
 
  223            const real_t grad01 = grad0[qy][qx][1];
 
  224            const real_t grad10 = grad1[qy][qx][0];
 
  225            const real_t grad11 = grad1[qy][qx][1];
 
  226            const real_t Dxu1 = grad00 * Q(q, 0, 0, e) + grad01 * Q(q, 1, 0, e);
 
  227            const real_t Dyu1 = grad00 * Q(q, 0, 1, e) + grad01 * Q(q, 1, 1, e);
 
  228            const real_t Dxu2 = grad10 * Q(q, 0, 0, e) + grad11 * Q(q, 1, 0, e);
 
  229            const real_t Dyu2 = grad10 * Q(q, 0, 1, e) + grad11 * Q(q, 1, 1, e);
 
  230            Z[qy][qx][0] = u1 * Dxu1 + u2 * Dyu1;
 
  231            Z[qy][qx][1] = u1 * Dxu2 + u2 * Dyu2;
 
  234      for (
int qy = 0; qy < Q1D; ++qy)
 
  237         for (
int dx = 0; dx < D1D; ++dx)
 
  241            for (
int qx = 0; qx < Q1D; ++qx)
 
  243               const real_t Btx = Bt(dx, qx);
 
  244               Y[dx][0] += Btx * Z[qy][qx][0];
 
  245               Y[dx][1] += Btx * Z[qy][qx][1];
 
  248         for (
int dy = 0; dy < D1D; ++dy)
 
  250            for (
int dx = 0; dx < D1D; ++dx)
 
  252               const real_t Bty = Bt(dy, qy);
 
  253               y(dx, dy, 0, e) += Bty * Y[dx][0];
 
  254               y(dx, dy, 1, e) += Bty * Y[dx][1];
 
  262template<
int T_D1D = 0, 
int T_Q1D = 0>
 
  263static void PAConvectionNLApply3D(
const int NE,
 
  264                                  const Array<real_t> &
b,
 
  265                                  const Array<real_t> &g,
 
  266                                  const Array<real_t> &bt,
 
  273   constexpr int VDIM = 3;
 
  274   const int D1D = T_D1D ? T_D1D : d1d;
 
  275   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  279   auto B = 
Reshape(
b.Read(), Q1D, D1D);
 
  280   auto G = 
Reshape(g.Read(), Q1D, D1D);
 
  281   auto Bt = 
Reshape(bt.Read(), D1D, Q1D);
 
  282   auto Q = 
Reshape(q_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
 
  283   auto x = 
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
 
  284   auto y = 
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
 
  288      constexpr int VDIM = 3;
 
  289      const int D1D = T_D1D ? T_D1D : d1d;
 
  290      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  291      constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
 
  292      constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
 
  294      real_t data[max_Q1D][max_Q1D][max_Q1D][VDIM];
 
  295      real_t grad0[max_Q1D][max_Q1D][max_Q1D][VDIM];
 
  296      real_t grad1[max_Q1D][max_Q1D][max_Q1D][VDIM];
 
  297      real_t grad2[max_Q1D][max_Q1D][max_Q1D][VDIM];
 
  298      real_t Z[max_Q1D][max_Q1D][max_Q1D][VDIM];
 
  299      for (
int qz = 0; qz < Q1D; ++qz)
 
  301         for (
int qy = 0; qy < Q1D; ++qy)
 
  303            for (
int qx = 0; qx < Q1D; ++qx)
 
  305               data[qz][qy][qx][0] = 0.0;
 
  306               data[qz][qy][qx][1] = 0.0;
 
  307               data[qz][qy][qx][2] = 0.0;
 
  309               grad0[qz][qy][qx][0] = 0.0;
 
  310               grad0[qz][qy][qx][1] = 0.0;
 
  311               grad0[qz][qy][qx][2] = 0.0;
 
  313               grad1[qz][qy][qx][0] = 0.0;
 
  314               grad1[qz][qy][qx][1] = 0.0;
 
  315               grad1[qz][qy][qx][2] = 0.0;
 
  317               grad2[qz][qy][qx][0] = 0.0;
 
  318               grad2[qz][qy][qx][1] = 0.0;
 
  319               grad2[qz][qy][qx][2] = 0.0;
 
  323      for (
int dz = 0; dz < D1D; ++dz)
 
  325         real_t dataXY[max_Q1D][max_Q1D][VDIM];
 
  326         real_t gradXY0[max_Q1D][max_Q1D][VDIM];
 
  327         real_t gradXY1[max_Q1D][max_Q1D][VDIM];
 
  328         real_t gradXY2[max_Q1D][max_Q1D][VDIM];
 
  329         for (
int qy = 0; qy < Q1D; ++qy)
 
  331            for (
int qx = 0; qx < Q1D; ++qx)
 
  333               dataXY[qy][qx][0] = 0.0;
 
  334               dataXY[qy][qx][1] = 0.0;
 
  335               dataXY[qy][qx][2] = 0.0;
 
  337               gradXY0[qy][qx][0] = 0.0;
 
  338               gradXY0[qy][qx][1] = 0.0;
 
  339               gradXY0[qy][qx][2] = 0.0;
 
  341               gradXY1[qy][qx][0] = 0.0;
 
  342               gradXY1[qy][qx][1] = 0.0;
 
  343               gradXY1[qy][qx][2] = 0.0;
 
  345               gradXY2[qy][qx][0] = 0.0;
 
  346               gradXY2[qy][qx][1] = 0.0;
 
  347               gradXY2[qy][qx][2] = 0.0;
 
  350         for (
int dy = 0; dy < D1D; ++dy)
 
  352            real_t dataX[max_Q1D][VDIM];
 
  353            real_t gradX0[max_Q1D][VDIM];
 
  354            real_t gradX1[max_Q1D][VDIM];
 
  355            real_t gradX2[max_Q1D][VDIM];
 
  356            for (
int qx = 0; qx < Q1D; ++qx)
 
  374            for (
int dx = 0; dx < D1D; ++dx)
 
  376               const real_t s0 = x(dx, dy, dz, 0, e);
 
  377               const real_t s1 = x(dx, dy, dz, 1, e);
 
  378               const real_t s2 = x(dx, dy, dz, 2, e);
 
  379               for (
int qx = 0; qx < Q1D; ++qx)
 
  381                  const real_t Bx = B(qx, dx);
 
  382                  const real_t Gx = G(qx, dx);
 
  384                  dataX[qx][0] += s0 * Bx;
 
  385                  dataX[qx][1] += s1 * Bx;
 
  386                  dataX[qx][2] += s2 * Bx;
 
  388                  gradX0[qx][0] += s0 * Gx;
 
  389                  gradX0[qx][1] += s0 * Bx;
 
  390                  gradX0[qx][2] += s0 * Bx;
 
  392                  gradX1[qx][0] += s1 * Gx;
 
  393                  gradX1[qx][1] += s1 * Bx;
 
  394                  gradX1[qx][2] += s1 * Bx;
 
  396                  gradX2[qx][0] += s2 * Gx;
 
  397                  gradX2[qx][1] += s2 * Bx;
 
  398                  gradX2[qx][2] += s2 * Bx;
 
  401            for (
int qy = 0; qy < Q1D; ++qy)
 
  403               const real_t By = B(qy, dy);
 
  404               const real_t Gy = G(qy, dy);
 
  405               for (
int qx = 0; qx < Q1D; ++qx)
 
  407                  dataXY[qy][qx][0] += dataX[qx][0] * By;
 
  408                  dataXY[qy][qx][1] += dataX[qx][1] * By;
 
  409                  dataXY[qy][qx][2] += dataX[qx][2] * By;
 
  411                  gradXY0[qy][qx][0] += gradX0[qx][0] * By;
 
  412                  gradXY0[qy][qx][1] += gradX0[qx][1] * Gy;
 
  413                  gradXY0[qy][qx][2] += gradX0[qx][2] * By;
 
  415                  gradXY1[qy][qx][0] += gradX1[qx][0] * By;
 
  416                  gradXY1[qy][qx][1] += gradX1[qx][1] * Gy;
 
  417                  gradXY1[qy][qx][2] += gradX1[qx][2] * By;
 
  419                  gradXY2[qy][qx][0] += gradX2[qx][0] * By;
 
  420                  gradXY2[qy][qx][1] += gradX2[qx][1] * Gy;
 
  421                  gradXY2[qy][qx][2] += gradX2[qx][2] * By;
 
  425         for (
int qz = 0; qz < Q1D; ++qz)
 
  427            const real_t Bz = B(qz, dz);
 
  428            const real_t Gz = G(qz, dz);
 
  429            for (
int qy = 0; qy < Q1D; ++qy)
 
  431               for (
int qx = 0; qx < Q1D; ++qx)
 
  433                  data[qz][qy][qx][0] += dataXY[qy][qx][0] * Bz;
 
  434                  data[qz][qy][qx][1] += dataXY[qy][qx][1] * Bz;
 
  435                  data[qz][qy][qx][2] += dataXY[qy][qx][2] * Bz;
 
  437                  grad0[qz][qy][qx][0] += gradXY0[qy][qx][0] * Bz;
 
  438                  grad0[qz][qy][qx][1] += gradXY0[qy][qx][1] * Bz;
 
  439                  grad0[qz][qy][qx][2] += gradXY0[qy][qx][2] * Gz;
 
  441                  grad1[qz][qy][qx][0] += gradXY1[qy][qx][0] * Bz;
 
  442                  grad1[qz][qy][qx][1] += gradXY1[qy][qx][1] * Bz;
 
  443                  grad1[qz][qy][qx][2] += gradXY1[qy][qx][2] * Gz;
 
  445                  grad2[qz][qy][qx][0] += gradXY2[qy][qx][0] * Bz;
 
  446                  grad2[qz][qy][qx][1] += gradXY2[qy][qx][1] * Bz;
 
  447                  grad2[qz][qy][qx][2] += gradXY2[qy][qx][2] * Gz;
 
  452      for (
int qz = 0; qz < Q1D; ++qz)
 
  454         for (
int qy = 0; qy < Q1D; ++qy)
 
  456            for (
int qx = 0; qx < Q1D; ++qx)
 
  458               const int q = qx + Q1D * (qy + qz * Q1D);
 
  460               const real_t u1 = data[qz][qy][qx][0];
 
  461               const real_t u2 = data[qz][qy][qx][1];
 
  462               const real_t u3 = data[qz][qy][qx][2];
 
  464               const real_t grad00 = grad0[qz][qy][qx][0];
 
  465               const real_t grad01 = grad0[qz][qy][qx][1];
 
  466               const real_t grad02 = grad0[qz][qy][qx][2];
 
  468               const real_t grad10 = grad1[qz][qy][qx][0];
 
  469               const real_t grad11 = grad1[qz][qy][qx][1];
 
  470               const real_t grad12 = grad1[qz][qy][qx][2];
 
  472               const real_t grad20 = grad2[qz][qy][qx][0];
 
  473               const real_t grad21 = grad2[qz][qy][qx][1];
 
  474               const real_t grad22 = grad2[qz][qy][qx][2];
 
  476               const real_t Dxu1 = grad00 * Q(q, 0, 0, e)
 
  477                                   + grad01 * Q(q, 1, 0, e)
 
  478                                   + grad02 * Q(q, 2, 0, e);
 
  479               const real_t Dyu1 = grad00 * Q(q, 0, 1, e)
 
  480                                   + grad01 * Q(q, 1, 1, e)
 
  481                                   + grad02 * Q(q, 2, 1, e);
 
  482               const real_t Dzu1 = grad00 * Q(q, 0, 2, e)
 
  483                                   + grad01 * Q(q, 1, 2, e)
 
  484                                   + grad02 * Q(q, 2, 2, e);
 
  486               const real_t Dxu2 = grad10 * Q(q, 0, 0, e)
 
  487                                   + grad11 * Q(q, 1, 0, e)
 
  488                                   + grad12 * Q(q, 2, 0, e);
 
  489               const real_t Dyu2 = grad10 * Q(q, 0, 1, e)
 
  490                                   + grad11 * Q(q, 1, 1, e)
 
  491                                   + grad12 * Q(q, 2, 1, e);
 
  492               const real_t Dzu2 = grad10 * Q(q, 0, 2, e)
 
  493                                   + grad11 * Q(q, 1, 2, e)
 
  494                                   + grad12 * Q(q, 2, 2, e);
 
  496               const real_t Dxu3 = grad20 * Q(q, 0, 0, e)
 
  497                                   + grad21 * Q(q, 1, 0, e)
 
  498                                   + grad22 * Q(q, 2, 0, e);
 
  499               const real_t Dyu3 = grad20 * Q(q, 0, 1, e)
 
  500                                   + grad21 * Q(q, 1, 1, e)
 
  501                                   + grad22 * Q(q, 2, 1, e);
 
  502               const real_t Dzu3 = grad20 * Q(q, 0, 2, e)
 
  503                                   + grad21 * Q(q, 1, 2, e)
 
  504                                   + grad22 * Q(q, 2, 2, e);
 
  506               Z[qz][qy][qx][0] = u1 * Dxu1 + u2 * Dyu1 + u3 * Dzu1;
 
  507               Z[qz][qy][qx][1] = u1 * Dxu2 + u2 * Dyu2 + u3 * Dzu2;
 
  508               Z[qz][qy][qx][2] = u1 * Dxu3 + u2 * Dyu3 + u3 * Dzu3;
 
  512      for (
int qz = 0; qz < Q1D; ++qz)
 
  514         real_t opXY[max_D1D][max_D1D][VDIM];
 
  515         for (
int dy = 0; dy < D1D; ++dy)
 
  517            for (
int dx = 0; dx < D1D; ++dx)
 
  519               opXY[dy][dx][0] = 0.0;
 
  520               opXY[dy][dx][1] = 0.0;
 
  521               opXY[dy][dx][2] = 0.0;
 
  524         for (
int qy = 0; qy < Q1D; ++qy)
 
  526            real_t opX[max_D1D][VDIM];
 
  527            for (
int dx = 0; dx < D1D; ++dx)
 
  532               for (
int qx = 0; qx < Q1D; ++qx)
 
  534                  const real_t Btx = Bt(dx, qx);
 
  535                  opX[dx][0] += Btx * Z[qz][qy][qx][0];
 
  536                  opX[dx][1] += Btx * Z[qz][qy][qx][1];
 
  537                  opX[dx][2] += Btx * Z[qz][qy][qx][2];
 
  540            for (
int dy = 0; dy < D1D; ++dy)
 
  542               for (
int dx = 0; dx < D1D; ++dx)
 
  544                  const real_t Bty = Bt(dy, qy);
 
  545                  opXY[dy][dx][0] += Bty * opX[dx][0];
 
  546                  opXY[dy][dx][1] += Bty * opX[dx][1];
 
  547                  opXY[dy][dx][2] += Bty * opX[dx][2];
 
  551         for (
int dz = 0; dz < D1D; ++dz)
 
  553            for (
int dy = 0; dy < D1D; ++dy)
 
  555               for (
int dx = 0; dx < D1D; ++dx)
 
  557                  const real_t Btz = Bt(dz, qz);
 
  558                  y(dx, dy, dz, 0, e) += Btz * opXY[dy][dx][0];
 
  559                  y(dx, dy, dz, 1, e) += Btz * opXY[dy][dx][1];
 
  560                  y(dx, dy, dz, 2, e) += Btz * opXY[dy][dx][2];
 
  568template<
int T_D1D = 0, 
int T_Q1D = 0, 
int T_MAX_D1D = 0, 
int T_MAX_Q1D = 0>
 
  569static void SmemPAConvectionNLApply3D(
const int NE,
 
  570                                      const Array<real_t> &b_,
 
  571                                      const Array<real_t> &g_,
 
  578   constexpr int VDIM = 3;
 
  579   const int D1D = T_D1D ? T_D1D : d1d;
 
  580   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  581   constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
 
  582   constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
 
  583   MFEM_VERIFY(D1D <= MD1, 
"");
 
  584   MFEM_VERIFY(Q1D <= MQ1, 
"");
 
  586   auto b = 
Reshape(b_.Read(), Q1D, D1D);
 
  587   auto g = 
Reshape(g_.Read(), Q1D, D1D);
 
  588   auto D = 
Reshape(d_.Read(), Q1D * Q1D * Q1D, VDIM, VDIM, NE);
 
  589   auto x = 
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
 
  590   auto Y = 
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
 
  594      const int tidz = MFEM_THREAD_ID(z);
 
  595      const int D1D = T_D1D ? T_D1D : d1d;
 
  596      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  597      constexpr int MD1 = T_D1D ? T_D1D : T_MAX_D1D;
 
  598      constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX_Q1D;
 
  599      MFEM_SHARED 
real_t BG[2][MQ1 * MD1];
 
  603      MFEM_SHARED 
real_t U[2][MQ1][MQ1][MQ1];
 
  604      MFEM_SHARED 
real_t sm0[3][MQ1 * MQ1 * MQ1];
 
  605      MFEM_SHARED 
real_t sm1[3][MQ1 * MQ1 * MQ1];
 
  606      real_t(*DDQ0)[MD1][MQ1] = (
real_t(*)[MD1][MQ1])(sm0 + 0);
 
  607      real_t(*DDQ1)[MD1][MQ1] = (
real_t(*)[MD1][MQ1])(sm0 + 1);
 
  609      real_t(*DQQ0)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm1 + 0);
 
  610      real_t(*DQQ1)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm1 + 1);
 
  611      real_t(*DQQ2)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm1 + 2);
 
  612      real_t(*QQQ0)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm0 + 0);
 
  613      real_t(*QQQ1)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm0 + 1);
 
  614      real_t(*QQQ2)[MQ1][MQ1] = (
real_t(*)[MQ1][MQ1])(sm0 + 2);
 
  615      real_t(*QQD0)[MQ1][MD1] = (
real_t(*)[MQ1][MD1])(sm1 + 0);
 
  616      real_t(*QDD0)[MD1][MD1] = (
real_t(*)[MD1][MD1])(sm0 + 0);
 
  617      MFEM_SHARED 
real_t Z[MQ1][MQ1][MQ1];
 
  619      for (
int cy = 0; cy < VDIM; ++cy)
 
  623            MFEM_FOREACH_THREAD(q, x, Q1D)
 
  625               MFEM_FOREACH_THREAD(d, y, D1D)
 
  632         MFEM_FOREACH_THREAD(qz, z, Q1D)
 
  634            MFEM_FOREACH_THREAD(qy, y, Q1D)
 
  636               MFEM_FOREACH_THREAD(qx, x, Q1D) { Z[qz][qy][qx] = 0.0; }
 
  640         for (
int c = 0; c < VDIM; ++c)
 
  642            MFEM_FOREACH_THREAD(dz, z, D1D)
 
  644               MFEM_FOREACH_THREAD(dy, y, D1D)
 
  646                  MFEM_FOREACH_THREAD(dx, x, D1D)
 
  648                     X[dz][dy][dx] = x(dx, dy, dz, cy, e);
 
  649                     U[0][dz][dy][dx] = x(dx, dy, dz, c, e);
 
  654            MFEM_FOREACH_THREAD(dz, z, D1D)
 
  656               MFEM_FOREACH_THREAD(dy, y, D1D)
 
  658                  MFEM_FOREACH_THREAD(qx, x, Q1D)
 
  663                     for (
int dx = 0; dx < D1D; ++dx)
 
  665                        const real_t coord = X[dz][dy][dx];
 
  666                        const real_t value = U[0][dz][dy][dx];
 
  667                        u += coord * B[qx][dx];
 
  668                        v += coord * G[qx][dx];
 
  669                        z += value * B[qx][dx];
 
  671                     DDQ0[dz][dy][qx] = 
u;
 
  672                     DDQ1[dz][dy][qx] = v;
 
  673                     U[1][dz][dy][qx] = z;
 
  678            MFEM_FOREACH_THREAD(dz, z, D1D)
 
  680               MFEM_FOREACH_THREAD(qy, y, Q1D)
 
  682                  MFEM_FOREACH_THREAD(qx, x, Q1D)
 
  688                     for (
int dy = 0; dy < D1D; ++dy)
 
  690                        u += DDQ1[dz][dy][qx] * B[qy][dy];
 
  691                        v += DDQ0[dz][dy][qx] * G[qy][dy];
 
  692                        w += DDQ0[dz][dy][qx] * B[qy][dy];
 
  693                        z += U[1][dz][dy][qx] * B[qy][dy];
 
  695                     DQQ0[dz][qy][qx] = 
u;
 
  696                     DQQ1[dz][qy][qx] = v;
 
  697                     DQQ2[dz][qy][qx] = w;
 
  698                     U[0][dz][qy][qx] = z;
 
  703            MFEM_FOREACH_THREAD(qz, z, Q1D)
 
  705               MFEM_FOREACH_THREAD(qy, y, Q1D)
 
  707                  MFEM_FOREACH_THREAD(qx, x, Q1D)
 
  713                     for (
int dz = 0; dz < D1D; ++dz)
 
  715                        u += DQQ0[dz][qy][qx] * B[qz][dz];
 
  716                        v += DQQ1[dz][qy][qx] * B[qz][dz];
 
  717                        w += DQQ2[dz][qy][qx] * G[qz][dz];
 
  718                        z += U[0][dz][qy][qx] * B[qz][dz];
 
  720                     QQQ0[qz][qy][qx] = 
u;
 
  721                     QQQ1[qz][qy][qx] = v;
 
  722                     QQQ2[qz][qy][qx] = w;
 
  723                     U[1][qz][qy][qx] = z;
 
  728            MFEM_FOREACH_THREAD(qz, z, Q1D)
 
  730               MFEM_FOREACH_THREAD(qy, y, Q1D)
 
  732                  MFEM_FOREACH_THREAD(qx, x, Q1D)
 
  734                     const int q = qx + (qy + qz * Q1D) * Q1D;
 
  735                     const real_t z = U[1][qz][qy][qx];
 
  736                     const real_t gX = QQQ0[qz][qy][qx];
 
  737                     const real_t gY = QQQ1[qz][qy][qx];
 
  738                     const real_t gZ = QQQ2[qz][qy][qx];
 
  739                     const real_t d = gX * D(q, 0, c, e) + gY * D(q, 1, c, e)
 
  740                                      + gZ * D(q, 2, c, e);
 
  741                     Z[qz][qy][qx] += z * d;
 
  749            MFEM_FOREACH_THREAD(d, y, D1D)
 
  751               MFEM_FOREACH_THREAD(q, x, Q1D) { Bt[d][q] = 
b(q, d); }
 
  755         MFEM_FOREACH_THREAD(qz, z, Q1D)
 
  757            MFEM_FOREACH_THREAD(qy, y, Q1D)
 
  759               MFEM_FOREACH_THREAD(dx, x, D1D)
 
  762                  for (
int qx = 0; qx < Q1D; ++qx)
 
  764                     u += Z[qz][qy][qx] * Bt[dx][qx];
 
  766                  QQD0[qz][qy][dx] = 
u;
 
  771         MFEM_FOREACH_THREAD(qz, z, Q1D)
 
  773            MFEM_FOREACH_THREAD(dy, y, D1D)
 
  775               MFEM_FOREACH_THREAD(dx, x, D1D)
 
  778                  for (
int qy = 0; qy < Q1D; ++qy)
 
  780                     u += QQD0[qz][qy][dx] * Bt[dy][qy];
 
  782                  QDD0[qz][dy][dx] = 
u;
 
  787         MFEM_FOREACH_THREAD(dz, z, D1D)
 
  789            MFEM_FOREACH_THREAD(dy, y, D1D)
 
  791               MFEM_FOREACH_THREAD(dx, x, D1D)
 
  794                  for (
int qz = 0; qz < Q1D; ++qz)
 
  796                     u += QDD0[qz][dy][dx] * Bt[dz][qz];
 
  798                  Y(dx, dy, dz, cy, e) += 
u;
 
  816      const int D1D = maps->
ndof;
 
  817      const int Q1D = maps->
nqpt;
 
  818      const Vector &QV = pa_data;
 
  824         return PAConvectionNLApply2D(NE, B, G, Bt, QV, x, y, D1D, Q1D);
 
  828         constexpr int T_MAX_D1D = 8;
 
  829         constexpr int T_MAX_Q1D = 8;
 
  830         MFEM_VERIFY(D1D <= T_MAX_D1D && Q1D <= T_MAX_Q1D, 
"Not yet implemented!");
 
  831         return SmemPAConvectionNLApply3D<0, 0, T_MAX_D1D, T_MAX_Q1D>
 
  832                (NE, B, G, QV, x, y, D1D, Q1D);
 
  834      MFEM_ABORT(
"Not yet implemented!");
 
 
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
A coefficient that is constant across space and time.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Array< real_t > B
Basis functions evaluated at quadrature points.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Array< real_t > Bt
Transpose of B.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Ordering::Type GetOrdering() const
Return the ordering method.
Mesh * GetMesh() const
Returns the mesh.
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Abstract class for all finite elements.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Vector J
Jacobians of the element transformations at all quadrature points.
Class for an integration rule - an Array of IntegrationPoint.
int GetNPoints() const
Returns the number of the points in the integration rule.
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
const IntegrationRule * IntRule
int GetNE() const
Returns number of elements.
int Dimension() const
Dimension of the reference space used within the elements.
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
void AddMultPA(const Vector &x, Vector &y) const override
Method for partially assembled action.
void AssemblePA(const FiniteElementSpace &fes) override
Method defining partial assembly.
static const IntegrationRule & GetRule(const FiniteElement &fe, const ElementTransformation &T)
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void SetSize(int s)
Resize the vector to size s.
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
void AddMult(const mfem::Vector &x, mfem::Vector &y, const real_t a=1.0) const override
Operator application: y+=A(x) (default) or y+=a*A(x).
real_t u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.