12#ifndef MFEM_BILININTEG_HDIV_KERNELS_HPP 
   13#define MFEM_BILININTEG_HDIV_KERNELS_HPP 
   32void PAHdivMassSetup2D(
const int Q1D,
 
   35                       const Array<real_t> &w,
 
   41void PAHdivMassSetup3D(
const int Q1D,
 
   44                       const Array<real_t> &w,
 
   50void PAHdivMassAssembleDiagonal2D(
const int D1D,
 
   54                                  const Array<real_t> &Bo_,
 
   55                                  const Array<real_t> &Bc_,
 
   60void PAHdivMassAssembleDiagonal3D(
const int D1D,
 
   64                                  const Array<real_t> &Bo_,
 
   65                                  const Array<real_t> &Bc_,
 
   69void PAHdivMassApply(
const int dim,
 
   74                     const Array<real_t> &Bo,
 
   75                     const Array<real_t> &Bc,
 
   76                     const Array<real_t> &Bot,
 
   77                     const Array<real_t> &Bct,
 
   83void PAHdivMassApply2D(
const int D1D,
 
   87                       const Array<real_t> &Bo_,
 
   88                       const Array<real_t> &Bc_,
 
   89                       const Array<real_t> &Bot_,
 
   90                       const Array<real_t> &Bct_,
 
   96void PAHdivMassApply3D(
const int D1D,
 
  100                       const Array<real_t> &Bo_,
 
  101                       const Array<real_t> &Bc_,
 
  102                       const Array<real_t> &Bot_,
 
  103                       const Array<real_t> &Bct_,
 
  109template<
int T_D1D = 0, 
int T_Q1D = 0>
 
  110inline void SmemPAHdivMassApply2D(
const int NE,
 
  111                                  const bool symmetric,
 
  112                                  const Array<real_t> &Bo_,
 
  113                                  const Array<real_t> &Bc_,
 
  114                                  const Array<real_t> &Bot_,
 
  115                                  const Array<real_t> &Bct_,
 
  122   MFEM_CONTRACT_VAR(Bot_);
 
  123   MFEM_CONTRACT_VAR(Bct_);
 
  125   static constexpr int VDIM = 2;
 
  127   const int D1D = T_D1D ? T_D1D : d1d;
 
  128   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  130   const auto bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  131   const auto bc = 
Reshape(Bc_.Read(), Q1D, D1D);
 
  132   const auto D = 
Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
 
  133   const auto x = 
Reshape(x_.Read(), D1D*(D1D-1), VDIM, NE);
 
  134   auto y = y_.ReadWrite();
 
  138      const int tidz = MFEM_THREAD_ID(z);
 
  140      const int D1D = T_D1D ? T_D1D : d1d;
 
  141      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  143      constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
 
  144      constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
 
  145      constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
 
  147      MFEM_SHARED 
real_t smo[MQ1*(MD1-1)];
 
  150      MFEM_SHARED 
real_t smc[MQ1*MD1];
 
  153      MFEM_SHARED 
real_t sm0[VDIM*MDQ*MDQ];
 
  154      MFEM_SHARED 
real_t sm1[VDIM*MDQ*MDQ];
 
  161      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  163         MFEM_FOREACH_THREAD(dy,y,D1D)
 
  165            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  167               if (qx < D1D && dy < (D1D-1))
 
  169                  X(qx + dy*D1D,vd) = x(qx+dy*D1D,vd,e);
 
  173                  if (dy < (D1D-1)) { Bo(dy,qx) = bo(qx,dy); }
 
  174                  Bc(dy,qx) = bc(qx,dy);
 
  181      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  183         const int nx = (vd == 0) ? D1D : D1D-1;
 
  184         const int ny = (vd == 1) ? D1D : D1D-1;
 
  187         MFEM_FOREACH_THREAD(dy,y,ny)
 
  189            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  192               for (
int dx = 0; dx < nx; ++dx)
 
  194                  dq += Xxy(dx,dy,vd) * Bx(dx,qx);
 
  201      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  203         const int ny = (vd == 1) ? D1D : D1D-1;
 
  205         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  207            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  210               for (
int dy = 0; dy < ny; ++dy)
 
  212                  qq += QD(qx,dy,vd) * By(dy,qy);
 
  222         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  224            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  226               const real_t Qx = QQ(qx,qy,0);
 
  227               const real_t Qy = QQ(qx,qy,1);
 
  229               const real_t D11 = D(qx,qy,0,e);
 
  230               const real_t D12 = D(qx,qy,1,e);
 
  231               const real_t D21 = symmetric ? D12 : D(qx,qy,2,e);
 
  232               const real_t D22 = symmetric ? D(qx,qy,2,e) : D(qx,qy,3,e);
 
  234               QQ(qx,qy,0) = D11*Qx + D12*Qy;
 
  235               QQ(qx,qy,1) = D21*Qx + D22*Qy;
 
  241      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  243         const int nx = (vd == 0) ? D1D : D1D-1;
 
  245         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  247            MFEM_FOREACH_THREAD(dx,x,nx)
 
  250               for (
int qx = 0; qx < Q1D; ++qx)
 
  252                  qd += QQ(qx,qy,vd) * Btx(dx,qx);
 
  259      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  261         const int nx = (vd == 0) ? D1D : D1D-1;
 
  262         const int ny = (vd == 1) ? D1D : D1D-1;
 
  264         DeviceTensor<4> Yxy(y, nx, ny, VDIM, NE);
 
  265         MFEM_FOREACH_THREAD(dy,y,ny)
 
  267            MFEM_FOREACH_THREAD(dx,x,nx)
 
  270               for (
int qy = 0; qy < Q1D; ++qy)
 
  272                  dd += DQ(dx,qy,vd) * Bty(dy,qy);
 
  274               Yxy(dx,dy,vd,e) += dd;
 
  283template<
int T_D1D = 0, 
int T_Q1D = 0>
 
  284inline void SmemPAHdivMassApply3D(
const int NE,
 
  285                                  const bool symmetric,
 
  286                                  const Array<real_t> &Bo_,
 
  287                                  const Array<real_t> &Bc_,
 
  288                                  const Array<real_t> &Bot_,
 
  289                                  const Array<real_t> &Bct_,
 
  296   MFEM_CONTRACT_VAR(Bot_);
 
  297   MFEM_CONTRACT_VAR(Bct_);
 
  299   static constexpr int VDIM = 3;
 
  301   const int D1D = T_D1D ? T_D1D : d1d;
 
  302   const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  304   const auto bo = 
Reshape(Bo_.Read(), Q1D, D1D-1);
 
  305   const auto bc = 
Reshape(Bc_.Read(), Q1D, D1D);
 
  306   const auto D = 
Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
 
  307   const auto x = 
Reshape(x_.Read(), D1D*(D1D-1)*(D1D-1), VDIM, NE);
 
  308   auto y = y_.ReadWrite();
 
  312      const int tidz = MFEM_THREAD_ID(z);
 
  314      const int D1D = T_D1D ? T_D1D : d1d;
 
  315      const int Q1D = T_Q1D ? T_Q1D : q1d;
 
  317      constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
 
  318      constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
 
  319      constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
 
  321      MFEM_SHARED 
real_t smo[MQ1*(MD1-1)];
 
  324      MFEM_SHARED 
real_t smc[MQ1*MD1];
 
  327      MFEM_SHARED 
real_t sm0[VDIM*MDQ*MDQ*MDQ];
 
  328      MFEM_SHARED 
real_t sm1[VDIM*MDQ*MDQ*MDQ];
 
  330      DeviceTensor<4> QDD(sm1, Q1D, D1D, D1D, VDIM);
 
  331      DeviceTensor<4> QQD(sm0, Q1D, Q1D, D1D, VDIM);
 
  332      DeviceTensor<4> QQQ(sm1, Q1D, Q1D, Q1D, VDIM);
 
  333      DeviceTensor<4> DQQ(sm0, D1D, Q1D, Q1D, VDIM);
 
  334      DeviceTensor<4> DDQ(sm1, D1D, D1D, Q1D, VDIM);
 
  337      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  339         MFEM_FOREACH_THREAD(dz,y,D1D-1)
 
  341            MFEM_FOREACH_THREAD(dy,x,D1D-1)
 
  344               for (
int dx = 0; dx < D1D; ++dx)
 
  346                  X(dx+(dy+dz*(D1D-1))*D1D,vd) = x(dx+(dy+dz*(D1D-1))*D1D,vd,e);
 
  354         MFEM_FOREACH_THREAD(d,y,D1D-1)
 
  356            MFEM_FOREACH_THREAD(q,x,Q1D)
 
  361         MFEM_FOREACH_THREAD(d,y,D1D)
 
  363            MFEM_FOREACH_THREAD(q,x,Q1D)
 
  371      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  373         const int nx = (vd == 0) ? D1D : D1D-1;
 
  374         const int ny = (vd == 1) ? D1D : D1D-1;
 
  375         const int nz = (vd == 2) ? D1D : D1D-1;
 
  376         DeviceTensor<4> Xxyz(X, nx, ny, nz, VDIM);
 
  378         MFEM_FOREACH_THREAD(dy,y,ny)
 
  380            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  384               for (
int dz = 0; dz < nz; ++dz) { 
u[dz] = 0.0; }
 
  386               for (
int dx = 0; dx < nx; ++dx)
 
  389                  for (
int dz = 0; dz < nz; ++dz)
 
  391                     u[dz] += Xxyz(dx,dy,dz,vd) * Bx(dx,qx);
 
  395               for (
int dz = 0; dz < nz; ++dz) { QDD(qx,dy,dz,vd) = 
u[dz]; }
 
  400      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  402         const int ny = (vd == 1) ? D1D : D1D-1;
 
  403         const int nz = (vd == 2) ? D1D : D1D-1;
 
  405         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  407            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  411               for (
int dz = 0; dz < nz; ++dz) { 
u[dz] = 0.0; }
 
  413               for (
int dy = 0; dy < ny; ++dy)
 
  416                  for (
int dz = 0; dz < nz; ++dz)
 
  418                     u[dz] += QDD(qx,dy,dz,vd) * By(dy,qy);
 
  422               for (
int dz = 0; dz < nz; ++dz) { QQD(qx,qy,dz,vd) = 
u[dz]; }
 
  427      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  429         const int nz = (vd == 2) ? D1D : D1D-1;
 
  431         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  433            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  437               for (
int qz = 0; qz < Q1D; ++qz) { 
u[qz] = 0.0; }
 
  439               for (
int dz = 0; dz < nz; ++dz)
 
  442                  for (
int qz = 0; qz < Q1D; ++qz)
 
  444                     u[qz] += QQD(qx,qy,dz,vd) * Bz(dz,qz);
 
  448               for (
int qz = 0; qz < Q1D; ++qz) { QQQ(qx,qy,qz,vd) = 
u[qz]; }
 
  456         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  458            MFEM_FOREACH_THREAD(qx,x,Q1D)
 
  461               for (
int qz = 0; qz < Q1D; ++qz)
 
  463                  const real_t Qx = QQQ(qx,qy,qz,0);
 
  464                  const real_t Qy = QQQ(qx,qy,qz,1);
 
  465                  const real_t Qz = QQQ(qx,qy,qz,2);
 
  467                  const real_t D11 = D(qx,qy,qz,0,e);
 
  468                  const real_t D12 = D(qx,qy,qz,1,e);
 
  469                  const real_t D13 = D(qx,qy,qz,2,e);
 
  470                  const real_t D21 = symmetric ? D12 : D(qx,qy,qz,3,e);
 
  471                  const real_t D22 = symmetric ? D(qx,qy,qz,3,e) : D(qx,qy,qz,4,e);
 
  472                  const real_t D23 = symmetric ? D(qx,qy,qz,4,e) : D(qx,qy,qz,5,e);
 
  473                  const real_t D31 = symmetric ? D13 : D(qx,qy,qz,6,e);
 
  474                  const real_t D32 = symmetric ? D23 : D(qx,qy,qz,7,e);
 
  475                  const real_t D33 = symmetric ? D(qx,qy,qz,5,e) : D(qx,qy,qz,8,e);
 
  477                  QQQ(qx,qy,qz,0) = D11*Qx + D12*Qy + D13*Qz;
 
  478                  QQQ(qx,qy,qz,1) = D21*Qx + D22*Qy + D23*Qz;
 
  479                  QQQ(qx,qy,qz,2) = D31*Qx + D32*Qy + D33*Qz;
 
  486      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  488         const int nx = (vd == 0) ? D1D : D1D-1;
 
  490         MFEM_FOREACH_THREAD(qy,y,Q1D)
 
  492            MFEM_FOREACH_THREAD(dx,x,nx)
 
  496               for (
int qz = 0; qz < Q1D; ++qz) { 
u[qz] = 0.0; }
 
  498               for (
int qx = 0; qx < Q1D; ++qx)
 
  501                  for (
int qz = 0; qz < Q1D; ++qz)
 
  503                     u[qz] += QQQ(qx,qy,qz,vd) * Btx(dx,qx);
 
  507               for (
int qz = 0; qz < Q1D; ++qz) { DQQ(dx,qy,qz,vd) = 
u[qz]; }
 
  512      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  514         const int nx = (vd == 0) ? D1D : D1D-1;
 
  515         const int ny = (vd == 1) ? D1D : D1D-1;
 
  517         MFEM_FOREACH_THREAD(dy,y,ny)
 
  519            MFEM_FOREACH_THREAD(dx,x,nx)
 
  523               for (
int qz = 0; qz < Q1D; ++qz) { 
u[qz] = 0.0; }
 
  525               for (
int qy = 0; qy < Q1D; ++qy)
 
  528                  for (
int qz = 0; qz < Q1D; ++qz)
 
  530                     u[qz] += DQQ(dx,qy,qz,vd) * Bty(dy,qy);
 
  534               for (
int qz = 0; qz < Q1D; ++qz) { DDQ(dx,dy,qz,vd) = 
u[qz]; }
 
  539      MFEM_FOREACH_THREAD(vd,z,VDIM)
 
  541         const int nx = (vd == 0) ? D1D : D1D-1;
 
  542         const int ny = (vd == 1) ? D1D : D1D-1;
 
  543         const int nz = (vd == 2) ? D1D : D1D-1;
 
  544         DeviceTensor<5> Yxyz(y, nx, ny, nz, VDIM, NE);
 
  546         MFEM_FOREACH_THREAD(dy,y,ny)
 
  548            MFEM_FOREACH_THREAD(dx,x,nx)
 
  552               for (
int dz = 0; dz < nz; ++dz) { 
u[dz] = 0.0; }
 
  554               for (
int qz = 0; qz < Q1D; ++qz)
 
  557                  for (
int dz = 0; dz < nz; ++dz)
 
  559                     u[dz] += DDQ(dx,dy,qz,vd) * Btz(dz,qz);
 
  563               for (
int dz = 0; dz < nz; ++dz) { Yxyz(dx,dy,dz,vd,e) += 
u[dz]; }
 
  572void PADivDivSetup2D(
const int Q1D,
 
  574                     const Array<real_t> &w,
 
  580void PADivDivSetup3D(
const int Q1D,
 
  582                     const Array<real_t> &w,
 
  588void PADivDivAssembleDiagonal2D(
const int D1D,
 
  591                                const Array<real_t> &Bo_,
 
  592                                const Array<real_t> &Gc_,
 
  597void PADivDivAssembleDiagonal3D(
const int D1D,
 
  600                                const Array<real_t> &Bo_,
 
  601                                const Array<real_t> &Gc_,
 
  606void PADivDivApply2D(
const int D1D,
 
  609                     const Array<real_t> &Bo_,
 
  610                     const Array<real_t> &Gc_,
 
  611                     const Array<real_t> &Bot_,
 
  612                     const Array<real_t> &Gct_,
 
  618void PADivDivApply3D(
const int D1D,
 
  621                     const Array<real_t> &Bo_,
 
  622                     const Array<real_t> &Gc_,
 
  623                     const Array<real_t> &Bot_,
 
  624                     const Array<real_t> &Gct_,
 
  630void PAHdivL2Setup2D(
const int Q1D,
 
  632                     const Array<real_t> &w,
 
  637void PAHdivL2Setup3D(
const int Q1D,
 
  639                     const Array<real_t> &w,
 
  644void PAHdivL2AssembleDiagonal_ADAt_2D(
const int D1D,
 
  648                                      const Array<real_t> &L2Bo_,
 
  649                                      const Array<real_t> &Gct_,
 
  650                                      const Array<real_t> &Bot_,
 
  656void PAHdivL2AssembleDiagonal_ADAt_3D(
const int D1D,
 
  660                                      const Array<real_t> &L2Bo_,
 
  661                                      const Array<real_t> &Gct_,
 
  662                                      const Array<real_t> &Bot_,
 
  668void PAHdivL2Apply2D(
const int D1D,
 
  672                     const Array<real_t> &Bo_,
 
  673                     const Array<real_t> &Gc_,
 
  674                     const Array<real_t> &L2Bot_,
 
  680void PAHdivL2ApplyTranspose2D(
const int D1D,
 
  684                              const Array<real_t> &L2Bo_,
 
  685                              const Array<real_t> &Gct_,
 
  686                              const Array<real_t> &Bot_,
 
  692void PAHdivL2Apply3D(
const int D1D,
 
  696                     const Array<real_t> &Bo_,
 
  697                     const Array<real_t> &Gc_,
 
  698                     const Array<real_t> &L2Bot_,
 
  704void PAHdivL2ApplyTranspose3D(
const int D1D,
 
  708                              const Array<real_t> &L2Bo_,
 
  709                              const Array<real_t> &Gct_,
 
  710                              const Array<real_t> &Bot_,
 
real_t u(const Vector &xvec)
DeviceTensor< 2, real_t > DeviceMatrix
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
DeviceTensor< 3, real_t > DeviceCube