12#ifndef MFEM_BILININTEG_HDIV_KERNELS_HPP
13#define MFEM_BILININTEG_HDIV_KERNELS_HPP
32void PAHdivMassSetup2D(
const int Q1D,
35 const Array<real_t> &w,
41void PAHdivMassSetup3D(
const int Q1D,
44 const Array<real_t> &w,
50void PAHdivMassAssembleDiagonal2D(
const int D1D,
54 const Array<real_t> &Bo_,
55 const Array<real_t> &Bc_,
60void PAHdivMassAssembleDiagonal3D(
const int D1D,
64 const Array<real_t> &Bo_,
65 const Array<real_t> &Bc_,
69void PAHdivMassApply(
const int dim,
74 const Array<real_t> &Bo,
75 const Array<real_t> &Bc,
76 const Array<real_t> &Bot,
77 const Array<real_t> &Bct,
83void PAHdivMassApply2D(
const int D1D,
87 const Array<real_t> &Bo_,
88 const Array<real_t> &Bc_,
89 const Array<real_t> &Bot_,
90 const Array<real_t> &Bct_,
96void PAHdivMassApply3D(
const int D1D,
100 const Array<real_t> &Bo_,
101 const Array<real_t> &Bc_,
102 const Array<real_t> &Bot_,
103 const Array<real_t> &Bct_,
109template<
int T_D1D = 0,
int T_Q1D = 0>
110inline void SmemPAHdivMassApply2D(
const int NE,
111 const bool symmetric,
112 const Array<real_t> &Bo_,
113 const Array<real_t> &Bc_,
114 const Array<real_t> &Bot_,
115 const Array<real_t> &Bct_,
122 MFEM_CONTRACT_VAR(Bot_);
123 MFEM_CONTRACT_VAR(Bct_);
125 static constexpr int VDIM = 2;
127 const int D1D = T_D1D ? T_D1D : d1d;
128 const int Q1D = T_Q1D ? T_Q1D : q1d;
130 const auto bo =
Reshape(Bo_.Read(), Q1D, D1D-1);
131 const auto bc =
Reshape(Bc_.Read(), Q1D, D1D);
132 const auto D =
Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
133 const auto x =
Reshape(x_.Read(), D1D*(D1D-1), VDIM, NE);
134 auto y = y_.ReadWrite();
138 const int tidz = MFEM_THREAD_ID(z);
140 const int D1D = T_D1D ? T_D1D : d1d;
141 const int Q1D = T_Q1D ? T_Q1D : q1d;
143 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
144 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
145 constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
147 MFEM_SHARED
real_t smo[MQ1*(MD1-1)];
150 MFEM_SHARED
real_t smc[MQ1*MD1];
153 MFEM_SHARED
real_t sm0[VDIM*MDQ*MDQ];
154 MFEM_SHARED
real_t sm1[VDIM*MDQ*MDQ];
161 MFEM_FOREACH_THREAD(vd,z,VDIM)
163 MFEM_FOREACH_THREAD(dy,y,D1D)
165 MFEM_FOREACH_THREAD(qx,x,Q1D)
167 if (qx < D1D && dy < (D1D-1))
169 X(qx + dy*D1D,vd) = x(qx+dy*D1D,vd,e);
173 if (dy < (D1D-1)) { Bo(dy,qx) = bo(qx,dy); }
174 Bc(dy,qx) = bc(qx,dy);
181 MFEM_FOREACH_THREAD(vd,z,VDIM)
183 const int nx = (vd == 0) ? D1D : D1D-1;
184 const int ny = (vd == 1) ? D1D : D1D-1;
187 MFEM_FOREACH_THREAD(dy,y,ny)
189 MFEM_FOREACH_THREAD(qx,x,Q1D)
192 for (
int dx = 0; dx < nx; ++dx)
194 dq += Xxy(dx,dy,vd) * Bx(dx,qx);
201 MFEM_FOREACH_THREAD(vd,z,VDIM)
203 const int ny = (vd == 1) ? D1D : D1D-1;
205 MFEM_FOREACH_THREAD(qy,y,Q1D)
207 MFEM_FOREACH_THREAD(qx,x,Q1D)
210 for (
int dy = 0; dy < ny; ++dy)
212 qq += QD(qx,dy,vd) * By(dy,qy);
222 MFEM_FOREACH_THREAD(qy,y,Q1D)
224 MFEM_FOREACH_THREAD(qx,x,Q1D)
226 const real_t Qx = QQ(qx,qy,0);
227 const real_t Qy = QQ(qx,qy,1);
229 const real_t D11 = D(qx,qy,0,e);
230 const real_t D12 = D(qx,qy,1,e);
231 const real_t D21 = symmetric ? D12 : D(qx,qy,2,e);
232 const real_t D22 = symmetric ? D(qx,qy,2,e) : D(qx,qy,3,e);
234 QQ(qx,qy,0) = D11*Qx + D12*Qy;
235 QQ(qx,qy,1) = D21*Qx + D22*Qy;
241 MFEM_FOREACH_THREAD(vd,z,VDIM)
243 const int nx = (vd == 0) ? D1D : D1D-1;
245 MFEM_FOREACH_THREAD(qy,y,Q1D)
247 MFEM_FOREACH_THREAD(dx,x,nx)
250 for (
int qx = 0; qx < Q1D; ++qx)
252 qd += QQ(qx,qy,vd) * Btx(dx,qx);
259 MFEM_FOREACH_THREAD(vd,z,VDIM)
261 const int nx = (vd == 0) ? D1D : D1D-1;
262 const int ny = (vd == 1) ? D1D : D1D-1;
264 DeviceTensor<4> Yxy(y, nx, ny, VDIM, NE);
265 MFEM_FOREACH_THREAD(dy,y,ny)
267 MFEM_FOREACH_THREAD(dx,x,nx)
270 for (
int qy = 0; qy < Q1D; ++qy)
272 dd += DQ(dx,qy,vd) * Bty(dy,qy);
274 Yxy(dx,dy,vd,e) += dd;
283template<
int T_D1D = 0,
int T_Q1D = 0>
284inline void SmemPAHdivMassApply3D(
const int NE,
285 const bool symmetric,
286 const Array<real_t> &Bo_,
287 const Array<real_t> &Bc_,
288 const Array<real_t> &Bot_,
289 const Array<real_t> &Bct_,
296 MFEM_CONTRACT_VAR(Bot_);
297 MFEM_CONTRACT_VAR(Bct_);
299 static constexpr int VDIM = 3;
301 const int D1D = T_D1D ? T_D1D : d1d;
302 const int Q1D = T_Q1D ? T_Q1D : q1d;
304 const auto bo =
Reshape(Bo_.Read(), Q1D, D1D-1);
305 const auto bc =
Reshape(Bc_.Read(), Q1D, D1D);
306 const auto D =
Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
307 const auto x =
Reshape(x_.Read(), D1D*(D1D-1)*(D1D-1), VDIM, NE);
308 auto y = y_.ReadWrite();
312 const int tidz = MFEM_THREAD_ID(z);
314 const int D1D = T_D1D ? T_D1D : d1d;
315 const int Q1D = T_Q1D ? T_Q1D : q1d;
317 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
318 constexpr int MD1 = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
319 constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
321 MFEM_SHARED
real_t smo[MQ1*(MD1-1)];
324 MFEM_SHARED
real_t smc[MQ1*MD1];
327 MFEM_SHARED
real_t sm0[VDIM*MDQ*MDQ*MDQ];
328 MFEM_SHARED
real_t sm1[VDIM*MDQ*MDQ*MDQ];
330 DeviceTensor<4> QDD(sm1, Q1D, D1D, D1D, VDIM);
331 DeviceTensor<4> QQD(sm0, Q1D, Q1D, D1D, VDIM);
332 DeviceTensor<4> QQQ(sm1, Q1D, Q1D, Q1D, VDIM);
333 DeviceTensor<4> DQQ(sm0, D1D, Q1D, Q1D, VDIM);
334 DeviceTensor<4> DDQ(sm1, D1D, D1D, Q1D, VDIM);
337 MFEM_FOREACH_THREAD(vd,z,VDIM)
339 MFEM_FOREACH_THREAD(dz,y,D1D-1)
341 MFEM_FOREACH_THREAD(dy,x,D1D-1)
344 for (
int dx = 0; dx < D1D; ++dx)
346 X(dx+(dy+dz*(D1D-1))*D1D,vd) = x(dx+(dy+dz*(D1D-1))*D1D,vd,e);
354 MFEM_FOREACH_THREAD(d,y,D1D-1)
356 MFEM_FOREACH_THREAD(q,x,Q1D)
361 MFEM_FOREACH_THREAD(d,y,D1D)
363 MFEM_FOREACH_THREAD(q,x,Q1D)
371 MFEM_FOREACH_THREAD(vd,z,VDIM)
373 const int nx = (vd == 0) ? D1D : D1D-1;
374 const int ny = (vd == 1) ? D1D : D1D-1;
375 const int nz = (vd == 2) ? D1D : D1D-1;
376 DeviceTensor<4> Xxyz(X, nx, ny, nz, VDIM);
378 MFEM_FOREACH_THREAD(dy,y,ny)
380 MFEM_FOREACH_THREAD(qx,x,Q1D)
384 for (
int dz = 0; dz < nz; ++dz) {
u[dz] = 0.0; }
386 for (
int dx = 0; dx < nx; ++dx)
389 for (
int dz = 0; dz < nz; ++dz)
391 u[dz] += Xxyz(dx,dy,dz,vd) * Bx(dx,qx);
395 for (
int dz = 0; dz < nz; ++dz) { QDD(qx,dy,dz,vd) =
u[dz]; }
400 MFEM_FOREACH_THREAD(vd,z,VDIM)
402 const int ny = (vd == 1) ? D1D : D1D-1;
403 const int nz = (vd == 2) ? D1D : D1D-1;
405 MFEM_FOREACH_THREAD(qy,y,Q1D)
407 MFEM_FOREACH_THREAD(qx,x,Q1D)
411 for (
int dz = 0; dz < nz; ++dz) {
u[dz] = 0.0; }
413 for (
int dy = 0; dy < ny; ++dy)
416 for (
int dz = 0; dz < nz; ++dz)
418 u[dz] += QDD(qx,dy,dz,vd) * By(dy,qy);
422 for (
int dz = 0; dz < nz; ++dz) { QQD(qx,qy,dz,vd) =
u[dz]; }
427 MFEM_FOREACH_THREAD(vd,z,VDIM)
429 const int nz = (vd == 2) ? D1D : D1D-1;
431 MFEM_FOREACH_THREAD(qy,y,Q1D)
433 MFEM_FOREACH_THREAD(qx,x,Q1D)
437 for (
int qz = 0; qz < Q1D; ++qz) {
u[qz] = 0.0; }
439 for (
int dz = 0; dz < nz; ++dz)
442 for (
int qz = 0; qz < Q1D; ++qz)
444 u[qz] += QQD(qx,qy,dz,vd) * Bz(dz,qz);
448 for (
int qz = 0; qz < Q1D; ++qz) { QQQ(qx,qy,qz,vd) =
u[qz]; }
456 MFEM_FOREACH_THREAD(qy,y,Q1D)
458 MFEM_FOREACH_THREAD(qx,x,Q1D)
461 for (
int qz = 0; qz < Q1D; ++qz)
463 const real_t Qx = QQQ(qx,qy,qz,0);
464 const real_t Qy = QQQ(qx,qy,qz,1);
465 const real_t Qz = QQQ(qx,qy,qz,2);
467 const real_t D11 = D(qx,qy,qz,0,e);
468 const real_t D12 = D(qx,qy,qz,1,e);
469 const real_t D13 = D(qx,qy,qz,2,e);
470 const real_t D21 = symmetric ? D12 : D(qx,qy,qz,3,e);
471 const real_t D22 = symmetric ? D(qx,qy,qz,3,e) : D(qx,qy,qz,4,e);
472 const real_t D23 = symmetric ? D(qx,qy,qz,4,e) : D(qx,qy,qz,5,e);
473 const real_t D31 = symmetric ? D13 : D(qx,qy,qz,6,e);
474 const real_t D32 = symmetric ? D23 : D(qx,qy,qz,7,e);
475 const real_t D33 = symmetric ? D(qx,qy,qz,5,e) : D(qx,qy,qz,8,e);
477 QQQ(qx,qy,qz,0) = D11*Qx + D12*Qy + D13*Qz;
478 QQQ(qx,qy,qz,1) = D21*Qx + D22*Qy + D23*Qz;
479 QQQ(qx,qy,qz,2) = D31*Qx + D32*Qy + D33*Qz;
486 MFEM_FOREACH_THREAD(vd,z,VDIM)
488 const int nx = (vd == 0) ? D1D : D1D-1;
490 MFEM_FOREACH_THREAD(qy,y,Q1D)
492 MFEM_FOREACH_THREAD(dx,x,nx)
496 for (
int qz = 0; qz < Q1D; ++qz) {
u[qz] = 0.0; }
498 for (
int qx = 0; qx < Q1D; ++qx)
501 for (
int qz = 0; qz < Q1D; ++qz)
503 u[qz] += QQQ(qx,qy,qz,vd) * Btx(dx,qx);
507 for (
int qz = 0; qz < Q1D; ++qz) { DQQ(dx,qy,qz,vd) =
u[qz]; }
512 MFEM_FOREACH_THREAD(vd,z,VDIM)
514 const int nx = (vd == 0) ? D1D : D1D-1;
515 const int ny = (vd == 1) ? D1D : D1D-1;
517 MFEM_FOREACH_THREAD(dy,y,ny)
519 MFEM_FOREACH_THREAD(dx,x,nx)
523 for (
int qz = 0; qz < Q1D; ++qz) {
u[qz] = 0.0; }
525 for (
int qy = 0; qy < Q1D; ++qy)
528 for (
int qz = 0; qz < Q1D; ++qz)
530 u[qz] += DQQ(dx,qy,qz,vd) * Bty(dy,qy);
534 for (
int qz = 0; qz < Q1D; ++qz) { DDQ(dx,dy,qz,vd) =
u[qz]; }
539 MFEM_FOREACH_THREAD(vd,z,VDIM)
541 const int nx = (vd == 0) ? D1D : D1D-1;
542 const int ny = (vd == 1) ? D1D : D1D-1;
543 const int nz = (vd == 2) ? D1D : D1D-1;
544 DeviceTensor<5> Yxyz(y, nx, ny, nz, VDIM, NE);
546 MFEM_FOREACH_THREAD(dy,y,ny)
548 MFEM_FOREACH_THREAD(dx,x,nx)
552 for (
int dz = 0; dz < nz; ++dz) {
u[dz] = 0.0; }
554 for (
int qz = 0; qz < Q1D; ++qz)
557 for (
int dz = 0; dz < nz; ++dz)
559 u[dz] += DDQ(dx,dy,qz,vd) * Btz(dz,qz);
563 for (
int dz = 0; dz < nz; ++dz) { Yxyz(dx,dy,dz,vd,e) +=
u[dz]; }
572void PADivDivSetup2D(
const int Q1D,
574 const Array<real_t> &w,
580void PADivDivSetup3D(
const int Q1D,
582 const Array<real_t> &w,
588void PADivDivAssembleDiagonal2D(
const int D1D,
591 const Array<real_t> &Bo_,
592 const Array<real_t> &Gc_,
597void PADivDivAssembleDiagonal3D(
const int D1D,
600 const Array<real_t> &Bo_,
601 const Array<real_t> &Gc_,
606void PADivDivApply2D(
const int D1D,
609 const Array<real_t> &Bo_,
610 const Array<real_t> &Gc_,
611 const Array<real_t> &Bot_,
612 const Array<real_t> &Gct_,
618void PADivDivApply3D(
const int D1D,
621 const Array<real_t> &Bo_,
622 const Array<real_t> &Gc_,
623 const Array<real_t> &Bot_,
624 const Array<real_t> &Gct_,
630void PAHdivL2Setup2D(
const int Q1D,
632 const Array<real_t> &w,
637void PAHdivL2Setup3D(
const int Q1D,
639 const Array<real_t> &w,
644void PAHdivL2AssembleDiagonal_ADAt_2D(
const int D1D,
648 const Array<real_t> &L2Bo_,
649 const Array<real_t> &Gct_,
650 const Array<real_t> &Bot_,
656void PAHdivL2AssembleDiagonal_ADAt_3D(
const int D1D,
660 const Array<real_t> &L2Bo_,
661 const Array<real_t> &Gct_,
662 const Array<real_t> &Bot_,
668void PAHdivL2Apply2D(
const int D1D,
672 const Array<real_t> &Bo_,
673 const Array<real_t> &Gc_,
674 const Array<real_t> &L2Bot_,
680void PAHdivL2ApplyTranspose2D(
const int D1D,
684 const Array<real_t> &L2Bo_,
685 const Array<real_t> &Gct_,
686 const Array<real_t> &Bot_,
692void PAHdivL2Apply3D(
const int D1D,
696 const Array<real_t> &Bo_,
697 const Array<real_t> &Gc_,
698 const Array<real_t> &L2Bot_,
704void PAHdivL2ApplyTranspose3D(
const int D1D,
708 const Array<real_t> &L2Bo_,
709 const Array<real_t> &Gct_,
710 const Array<real_t> &Bot_,
real_t u(const Vector &xvec)
DeviceTensor< 2, real_t > DeviceMatrix
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
DeviceTensor< 3, real_t > DeviceCube