12#ifndef MFEM_FEM_KERNELS_HPP
13#define MFEM_FEM_KERNELS_HPP
30template<
int MD1,
int MQ1>
31MFEM_HOST_DEVICE
inline void LoadB(
const int D1D,
const int Q1D,
35 const int tidz = MFEM_THREAD_ID(z);
40 MFEM_FOREACH_THREAD(d,y,D1D)
42 MFEM_FOREACH_THREAD(q,x,Q1D)
52template<
int MD1,
int MQ1>
53MFEM_HOST_DEVICE
inline void LoadBt(
const int D1D,
const int Q1D,
57 const int tidz = MFEM_THREAD_ID(z);
62 MFEM_FOREACH_THREAD(d,y,D1D)
64 MFEM_FOREACH_THREAD(q,x,Q1D)
74template<
int MD1,
int MQ1>
75MFEM_HOST_DEVICE
inline void LoadBG(
const int D1D,
const int Q1D,
80 const int tidz = MFEM_THREAD_ID(z);
86 MFEM_FOREACH_THREAD(d,y,D1D)
88 MFEM_FOREACH_THREAD(q,x,Q1D)
99template<
int MD1,
int MQ1>
100MFEM_HOST_DEVICE
inline void LoadBGt(
const int D1D,
const int Q1D,
103 real_t (&sBG)[2][MQ1*MD1])
105 const int tidz = MFEM_THREAD_ID(z);
111 MFEM_FOREACH_THREAD(d,y,D1D)
113 MFEM_FOREACH_THREAD(q,x,Q1D)
124MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
125 const DeviceTensor<3, const real_t> &x,
128 MFEM_FOREACH_THREAD(dy,y,D1D)
130 MFEM_FOREACH_THREAD(dx,x,D1D)
132 DD(dx,dy) = x(dx,dy,e);
140template<
int MD1,
int NBZ>
141MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
142 const DeviceTensor<3, const real_t> &x,
143 real_t (&sX)[NBZ][MD1*MD1])
145 const int tidz = MFEM_THREAD_ID(z);
151MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
152 const DeviceTensor<4, const real_t> &x,
155 MFEM_FOREACH_THREAD(dy,y,D1D)
157 MFEM_FOREACH_THREAD(dx,x,D1D)
159 DD(dx,dy) = x(dx,dy,c,e);
165template<
int MD1,
int NBZ>
166MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
167 const DeviceTensor<4, const real_t> &x,
168 real_t (&sm)[NBZ][MD1*MD1])
170 const int tidz = MFEM_THREAD_ID(z);
176MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
181 MFEM_FOREACH_THREAD(dy,y,D1D)
183 MFEM_FOREACH_THREAD(qx,x,Q1D)
186 for (
int dx = 0; dx < D1D; ++dx)
188 u += B(dx,qx) * DD(dx,dy);
196template<
int MD1,
int MQ1,
int NBZ>
197MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
198 const real_t (&sB)[MQ1*MD1],
199 real_t (&sDD)[NBZ][MD1*MD1],
200 real_t (&sDQ)[NBZ][MD1*MQ1])
202 const int tidz = MFEM_THREAD_ID(z);
206 EvalX(D1D,Q1D,B,DD,DQ);
210MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
215 MFEM_FOREACH_THREAD(qy,y,Q1D)
217 MFEM_FOREACH_THREAD(qx,x,Q1D)
220 for (
int dy = 0; dy < D1D; ++dy)
222 u += DQ(dy,qx) * B(dy,qy);
230template<
int MD1,
int MQ1,
int NBZ>
231MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
232 const real_t (&sB)[MQ1*MD1],
233 real_t (&sDQ)[NBZ][MD1*MQ1],
234 real_t (&sQQ)[NBZ][MQ1*MQ1])
236 const int tidz = MFEM_THREAD_ID(z);
240 EvalY(D1D,Q1D,B,DQ,QQ);
244MFEM_HOST_DEVICE
inline void PullEval(
const int qx,
const int qy,
251template<
int MQ1,
int NBZ>
252MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
253 const int qx,
const int qy,
254 real_t (&sQQ)[NBZ][MQ1*MQ1],
257 const int tidz = MFEM_THREAD_ID(z);
259 PullEval(qx,qy,QQ,P);
263template<
int MD1,
int NBZ>
264MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
265 const DeviceTensor<4, const real_t> &X,
266 real_t (&sX)[2][NBZ][MD1*MD1])
268 const int tidz = MFEM_THREAD_ID(z);
272 MFEM_FOREACH_THREAD(dy,y,D1D)
274 MFEM_FOREACH_THREAD(dx,x,D1D)
276 X0(dx,dy) = X(dx,dy,0,e);
277 X1(dx,dy) = X(dx,dy,1,e);
284template<
int MD1,
int MQ1,
int NBZ>
285MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
286 const real_t (&sB)[MQ1*MD1],
287 const real_t (&sX)[2][NBZ][MD1*MD1],
288 real_t (&sDQ)[2][NBZ][MD1*MQ1])
290 const int tidz = MFEM_THREAD_ID(z);
297 MFEM_FOREACH_THREAD(dy,y,D1D)
299 MFEM_FOREACH_THREAD(qx,x,Q1D)
302 for (
int dx = 0; dx < D1D; ++dx)
304 const real_t xx = X0(dx,dy);
305 const real_t xy = X1(dx,dy);
306 u[0] += B(dx,qx) * xx;
307 u[1] += B(dx,qx) * xy;
317template<
int MD1,
int MQ1,
int NBZ>
318MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
319 const real_t (&sB)[MQ1*MD1],
320 const real_t (&sDQ)[2][NBZ][MD1*MQ1],
321 real_t (&sQQ)[2][NBZ][MQ1*MQ1])
323 const int tidz = MFEM_THREAD_ID(z);
330 MFEM_FOREACH_THREAD(qy,y,Q1D)
332 MFEM_FOREACH_THREAD(qx,x,Q1D)
335 for (
int dy = 0; dy < D1D; ++dy)
337 u[0] += DQ0(qx,dy) * B(dy,qy);
338 u[1] += DQ1(qx,dy) * B(dy,qy);
348template<
int MQ1,
int NBZ>
349MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
350 const int qx,
const int qy,
351 const real_t (&sQQ)[2][NBZ][MQ1*MQ1],
354 const int tidz = MFEM_THREAD_ID(z);
363template<
int MQ1,
int NBZ>
364MFEM_HOST_DEVICE
inline void PushEval(
const int Q1D,
365 const int qx,
const int qy,
367 real_t (&sQQ)[2][NBZ][MQ1*MQ1])
369 const int tidz = MFEM_THREAD_ID(z);
378template<
int MD1,
int MQ1,
int NBZ>
379MFEM_HOST_DEVICE
inline void EvalXt(
const int D1D,
const int Q1D,
380 const real_t (&sB)[MQ1*MD1],
381 const real_t (&sQQ)[2][NBZ][MQ1*MQ1],
382 real_t (&sDQ)[2][NBZ][MD1*MQ1])
384 const int tidz = MFEM_THREAD_ID(z);
391 MFEM_FOREACH_THREAD(qy,y,Q1D)
393 MFEM_FOREACH_THREAD(dx,x,D1D)
396 for (
int qx = 0; qx < Q1D; ++qx)
398 u[0] += QQ0(qx,qy) * Bt(qx,dx);
399 u[1] += QQ1(qx,qy) * Bt(qx,dx);
409template<
int MD1,
int MQ1,
int NBZ>
410MFEM_HOST_DEVICE
inline void EvalYt(
const int D1D,
const int Q1D,
411 const real_t (&sB)[MQ1*MD1],
412 const real_t (&sDQ)[2][NBZ][MD1*MQ1],
413 const DeviceTensor<4> &Y,
416 const int tidz = MFEM_THREAD_ID(z);
421 MFEM_FOREACH_THREAD(dy,y,D1D)
423 MFEM_FOREACH_THREAD(dx,x,D1D)
426 for (
int qy = 0; qy < Q1D; ++qy)
428 u[0] += Bt(qy,dy) * DQ0(qy,dx);
429 u[1] += Bt(qy,dy) * DQ1(qy,dx);
431 Y(dx,dy,0,e) +=
u[0];
432 Y(dx,dy,1,e) +=
u[1];
439template<
int MD1,
int MQ1,
int NBZ>
440MFEM_HOST_DEVICE
inline void GradX(
const int D1D,
const int Q1D,
441 const real_t (&sBG)[2][MQ1*MD1],
442 const real_t (&sX)[2][NBZ][MD1*MD1],
443 real_t (&sDQ)[4][NBZ][MD1*MQ1])
445 const int tidz = MFEM_THREAD_ID(z);
455 MFEM_FOREACH_THREAD(dy,y,D1D)
457 MFEM_FOREACH_THREAD(qx,x,Q1D)
461 for (
int dx = 0; dx < D1D; ++dx)
463 const real_t Bx = B(dx,qx);
464 const real_t Gx = G(dx,qx);
465 const real_t x0 = X0(dx,dy);
466 const real_t x1 = X1(dx,dy);
482template<
int MD1,
int MQ1,
int NBZ>
483MFEM_HOST_DEVICE
inline void GradY(
const int D1D,
const int Q1D,
484 const real_t (&sBG)[2][MQ1*MD1],
485 const real_t (&sDQ)[4][NBZ][MD1*MQ1],
486 real_t (&sQQ)[4][NBZ][MQ1*MQ1])
488 const int tidz = MFEM_THREAD_ID(z);
500 MFEM_FOREACH_THREAD(qy,y,Q1D)
502 MFEM_FOREACH_THREAD(qx,x,Q1D)
506 for (
int dy = 0; dy < D1D; ++dy)
508 const real_t By = B(dy,qy);
509 const real_t Gy = G(dy,qy);
510 u[0] += X0G(qx,dy) * By;
511 v[0] += X0B(qx,dy) * Gy;
512 u[1] += X1G(qx,dy) * By;
513 v[1] += X1B(qx,dy) * Gy;
525template<
int MQ1,
int NBZ>
526MFEM_HOST_DEVICE
inline void PullGrad(
const int Q1D,
527 const int qx,
const int qy,
528 const real_t (&sQQ)[4][NBZ][MQ1*MQ1],
531 const int tidz = MFEM_THREAD_ID(z);
537 Jpr[0] = X0GB(qx,qy);
538 Jpr[1] = X1GB(qx,qy);
539 Jpr[2] = X0BG(qx,qy);
540 Jpr[3] = X1BG(qx,qy);
544template<
int MQ1,
int NBZ>
545MFEM_HOST_DEVICE
inline void PushGrad(
const int Q1D,
546 const int qx,
const int qy,
548 real_t (&sQQ)[4][NBZ][MQ1*MQ1])
550 const int tidz = MFEM_THREAD_ID(z);
563template<
int MD1,
int MQ1,
int NBZ>
564MFEM_HOST_DEVICE
inline void GradYt(
const int D1D,
const int Q1D,
565 const real_t (&sBG)[2][MQ1*MD1],
566 const real_t (&GQ)[4][NBZ][MQ1*MQ1],
567 real_t (&GD)[4][NBZ][MD1*MQ1])
569 const int tidz = MFEM_THREAD_ID(z);
581 MFEM_FOREACH_THREAD(qy,y,Q1D)
583 MFEM_FOREACH_THREAD(dx,x,D1D)
587 for (
int qx = 0; qx < Q1D; ++qx)
589 u[0] += Gt(qx,dx) * QQx0(qx,qy);
590 u[1] += Gt(qx,dx) * QQy0(qx,qy);
591 v[0] += Bt(qx,dx) * QQx1(qx,qy);
592 v[1] += Bt(qx,dx) * QQy1(qx,qy);
604template<
int MD1,
int MQ1,
int NBZ>
605MFEM_HOST_DEVICE
inline void GradXt(
const int D1D,
const int Q1D,
606 const real_t (&sBG)[2][MQ1*MD1],
607 const real_t (&GD)[4][NBZ][MD1*MQ1],
608 const DeviceTensor<4> &Y,
611 const int tidz = MFEM_THREAD_ID(z);
619 MFEM_FOREACH_THREAD(dy,y,D1D)
621 MFEM_FOREACH_THREAD(dx,x,D1D)
625 for (
int qy = 0; qy < Q1D; ++qy)
627 u[0] += DQxB(qy,dx) * Bt(qy,dy);
628 u[1] += DQyB(qy,dx) * Bt(qy,dy);
629 v[0] += DQxG(qy,dx) * Gt(qy,dy);
630 v[1] += DQyG(qy,dx) * Gt(qy,dy);
632 Y(dx,dy,0,e) +=
u[0] + v[0];
633 Y(dx,dy,1,e) +=
u[1] + v[1];
640MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
641 const DeviceTensor<4, const real_t> &x,
644 MFEM_FOREACH_THREAD(dz,z,D1D)
646 MFEM_FOREACH_THREAD(dy,y,D1D)
648 MFEM_FOREACH_THREAD(dx,x,D1D)
650 X(dx,dy,dz) = x(dx,dy,dz,e);
658MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
659 const DeviceTensor<4, const real_t> &x,
660 real_t (&sm)[MD1*MD1*MD1])
667MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
668 const DeviceTensor<5, const real_t> &x,
671 MFEM_FOREACH_THREAD(dz,z,D1D)
673 MFEM_FOREACH_THREAD(dy,y,D1D)
675 MFEM_FOREACH_THREAD(dx,x,D1D)
677 X(dx,dy,dz) = x(dx,dy,dz,c,e);
686MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
687 const DeviceTensor<5, const real_t> &x,
688 real_t (&sm)[MD1*MD1*MD1])
691 return LoadX<MD1>(e,D1D,c,x,X);
695MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
700 MFEM_FOREACH_THREAD(dz,z,D1D)
702 MFEM_FOREACH_THREAD(dy,y,D1D)
704 MFEM_FOREACH_THREAD(qx,x,Q1D)
707 for (
int dx = 0; dx < D1D; ++dx)
709 const real_t Bx = B(dx,qx);
710 u += Bx * DDD(dx,dy,dz);
719template<
int MD1,
int MQ1>
720MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
721 const real_t (&sB)[MQ1*MD1],
722 const real_t (&sDDD)[MD1*MD1*MD1],
723 real_t (&sDDQ)[MD1*MD1*MQ1])
728 EvalX(D1D,Q1D,B,DDD,DDQ);
732MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
737 MFEM_FOREACH_THREAD(dz,z,D1D)
739 MFEM_FOREACH_THREAD(qy,y,Q1D)
741 MFEM_FOREACH_THREAD(qx,x,Q1D)
744 for (
int dy = 0; dy < D1D; ++dy)
746 const real_t By = B(dy,qy);
747 u += DDQ(dz,dy,qx) * By;
756template<
int MD1,
int MQ1>
757MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
758 const real_t (&sB)[MQ1*MD1],
759 const real_t (&sDDQ)[MD1*MD1*MQ1],
760 real_t (&sDQQ)[MD1*MQ1*MQ1])
765 EvalY(D1D,Q1D,B,DDQ,DQQ);
769MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
774 MFEM_FOREACH_THREAD(qz,z,Q1D)
776 MFEM_FOREACH_THREAD(qy,y,Q1D)
778 MFEM_FOREACH_THREAD(qx,x,Q1D)
781 for (
int dz = 0; dz < D1D; ++dz)
783 const real_t Bz = B(dz,qz);
784 u += DQQ(dz,qy,qx) * Bz;
793template<
int MD1,
int MQ1>
794MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
795 const real_t (&sB)[MQ1*MD1],
796 const real_t (&sDQQ)[MD1*MQ1*MQ1],
797 real_t (&sQQQ)[MQ1*MQ1*MQ1])
802 EvalZ(D1D,Q1D,B,DQQ,QQQ);
806MFEM_HOST_DEVICE
inline void PullEval(
const int x,
const int y,
const int z,
814MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
815 const int x,
const int y,
const int z,
816 const real_t (&sQQQ)[MQ1*MQ1*MQ1],
820 PullEval(x,y,z,QQQ,X);
825MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
826 const DeviceTensor<5, const real_t> &X,
827 real_t (*sm)[MD1*MD1*MD1])
833 MFEM_FOREACH_THREAD(dz,z,D1D)
835 MFEM_FOREACH_THREAD(dy,y,D1D)
837 MFEM_FOREACH_THREAD(dx,x,D1D)
839 Xx(dx,dy,dz) = X(dx,dy,dz,0,e);
840 Xy(dx,dy,dz) = X(dx,dy,dz,1,e);
841 Xz(dx,dy,dz) = X(dx,dy,dz,2,e);
849template<
int MD1,
int MQ1>
850MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
851 const real_t (&sB)[MQ1*MD1],
852 const real_t (&sDDD)[3][MD1*MD1*MD1],
853 real_t (&sDDQ)[3][MD1*MD1*MQ1])
863 MFEM_FOREACH_THREAD(dz,z,D1D)
865 MFEM_FOREACH_THREAD(dy,y,D1D)
867 MFEM_FOREACH_THREAD(qx,x,Q1D)
870 for (
int dx = 0; dx < D1D; ++dx)
872 const real_t Bx = B(dx,qx);
873 u[0] += Bx * Xx(dx,dy,dz);
874 u[1] += Bx * Xy(dx,dy,dz);
875 u[2] += Bx * Xz(dx,dy,dz);
877 XxB(qx,dy,dz) =
u[0];
878 XyB(qx,dy,dz) =
u[1];
879 XzB(qx,dy,dz) =
u[2];
887template<
int MD1,
int MQ1>
888MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
889 const real_t (&sB)[MQ1*MD1],
890 const real_t (&sDDQ)[3][MD1*MD1*MQ1],
891 real_t (&sDQQ)[3][MD1*MQ1*MQ1])
901 MFEM_FOREACH_THREAD(dz,z,D1D)
903 MFEM_FOREACH_THREAD(qy,y,Q1D)
905 MFEM_FOREACH_THREAD(qx,x,Q1D)
908 for (
int dy = 0; dy < D1D; ++dy)
910 const real_t By = B(dy,qy);
911 u[0] += XxB(qx,dy,dz) * By;
912 u[1] += XyB(qx,dy,dz) * By;
913 u[2] += XzB(qx,dy,dz) * By;
915 XxBB(qx,qy,dz) =
u[0];
916 XyBB(qx,qy,dz) =
u[1];
917 XzBB(qx,qy,dz) =
u[2];
925template<
int MD1,
int MQ1>
926MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
927 const real_t (&sB)[MQ1*MD1],
928 const real_t (&sDQQ)[3][MD1*MQ1*MQ1],
929 real_t (&sQQQ)[3][MQ1*MQ1*MQ1])
939 MFEM_FOREACH_THREAD(qz,z,Q1D)
941 MFEM_FOREACH_THREAD(qy,y,Q1D)
943 MFEM_FOREACH_THREAD(qx,x,Q1D)
946 for (
int dz = 0; dz < D1D; ++dz)
948 const real_t Bz = B(dz,qz);
949 u[0] += XxBB(qx,qy,dz) * Bz;
950 u[1] += XyBB(qx,qy,dz) * Bz;
951 u[2] += XzBB(qx,qy,dz) * Bz;
953 XxBBB(qx,qy,qz) =
u[0];
954 XyBBB(qx,qy,qz) =
u[1];
955 XzBBB(qx,qy,qz) =
u[2];
964MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
965 const int x,
const int y,
const int z,
966 const real_t (&sQQQ)[3][MQ1*MQ1*MQ1],
980MFEM_HOST_DEVICE
inline void PushEval(
const int Q1D,
981 const int x,
const int y,
const int z,
983 real_t (&sQQQ)[3][MQ1*MQ1*MQ1])
995template<
int MD1,
int MQ1>
996MFEM_HOST_DEVICE
inline void EvalXt(
const int D1D,
const int Q1D,
997 const real_t (&sB)[MQ1*MD1],
998 const real_t (&sQQQ)[3][MQ1*MQ1*MQ1],
999 real_t (&sDQQ)[3][MD1*MQ1*MQ1])
1009 MFEM_FOREACH_THREAD(qz,z,Q1D)
1011 MFEM_FOREACH_THREAD(qy,y,Q1D)
1013 MFEM_FOREACH_THREAD(dx,x,D1D)
1015 real_t u[3] = {0.0, 0.0, 0.0};
1016 for (
int qx = 0; qx < Q1D; ++qx)
1018 const real_t Btx = Bt(qx,dx);
1019 u[0] += XxBBB(qx,qy,qz) * Btx;
1020 u[1] += XyBBB(qx,qy,qz) * Btx;
1021 u[2] += XzBBB(qx,qy,qz) * Btx;
1023 XxBB(qz,qy,dx) =
u[0];
1024 XyBB(qz,qy,dx) =
u[1];
1025 XzBB(qz,qy,dx) =
u[2];
1033template<
int MD1,
int MQ1>
1034MFEM_HOST_DEVICE
inline void EvalYt(
const int D1D,
const int Q1D,
1035 const real_t (&sB)[MQ1*MD1],
1036 const real_t (&sDQQ)[3][MD1*MQ1*MQ1],
1037 real_t (&sDDQ)[3][MD1*MD1*MQ1])
1047 MFEM_FOREACH_THREAD(qz,z,Q1D)
1049 MFEM_FOREACH_THREAD(dy,y,D1D)
1051 MFEM_FOREACH_THREAD(dx,x,D1D)
1053 real_t u[3] = {0.0, 0.0, 0.0};
1054 for (
int qy = 0; qy < Q1D; ++qy)
1056 const real_t Bty = Bt(qy,dy);
1057 u[0] += XxBB(qz,qy,dx) * Bty;
1058 u[1] += XyBB(qz,qy,dx) * Bty;
1059 u[2] += XzBB(qz,qy,dx) * Bty;
1062 XxB(qz,dy,dx) =
u[0];
1063 XyB(qz,dy,dx) =
u[1];
1064 XzB(qz,dy,dx)=
u[2];
1072template<
int MD1,
int MQ1>
1073MFEM_HOST_DEVICE
inline void EvalZt(
const int D1D,
const int Q1D,
1074 const real_t (&sB)[MQ1*MD1],
1075 const real_t (&sDDQ)[3][MD1*MD1*MQ1],
1076 const DeviceTensor<5> &Y,
1084 MFEM_FOREACH_THREAD(dz,z,D1D)
1086 MFEM_FOREACH_THREAD(dy,y,D1D)
1088 MFEM_FOREACH_THREAD(dx,x,D1D)
1090 real_t u[3] = {0.0, 0.0, 0.0};
1091 for (
int qz = 0; qz < Q1D; ++qz)
1093 const real_t Btz = Bt(qz,dz);
1094 u[0] += XxB(qz,dy,dx) * Btz;
1095 u[1] += XyB(qz,dy,dx) * Btz;
1096 u[2] += XzB(qz,dy,dx) * Btz;
1098 Y(dx,dy,dz,0,e) +=
u[0];
1099 Y(dx,dy,dz,1,e) +=
u[1];
1100 Y(dx,dy,dz,2,e) +=
u[2];
1107template<
int MD1,
int MQ1>
1108MFEM_HOST_DEVICE
inline void GradX(
const int D1D,
const int Q1D,
1109 const real_t (*sBG)[MQ1*MD1],
1110 const real_t (*sDDD)[MD1*MD1*MD1],
1111 real_t (*sDDQ)[MD1*MD1*MQ1])
1125 MFEM_FOREACH_THREAD(dz,z,D1D)
1127 MFEM_FOREACH_THREAD(dy,y,D1D)
1129 MFEM_FOREACH_THREAD(qx,x,Q1D)
1131 real_t u[3] = {0.0, 0.0, 0.0};
1132 real_t v[3] = {0.0, 0.0, 0.0};
1133 for (
int dx = 0; dx < D1D; ++dx)
1135 const real_t xx = Xx(dx,dy,dz);
1136 const real_t xy = Xy(dx,dy,dz);
1137 const real_t xz = Xz(dx,dy,dz);
1138 const real_t Bx = B(dx,qx);
1139 const real_t Gx = G(dx,qx);
1148 XxB(qx,dy,dz) =
u[0];
1149 XyB(qx,dy,dz) =
u[1];
1150 XzB(qx,dy,dz) =
u[2];
1152 XxG(qx,dy,dz) = v[0];
1153 XyG(qx,dy,dz) = v[1];
1154 XzG(qx,dy,dz) = v[2];
1162template<
int MD1,
int MQ1>
1163MFEM_HOST_DEVICE
inline void GradY(
const int D1D,
const int Q1D,
1164 const real_t (*sBG)[MQ1*MD1],
1165 const real_t (*sDDQ)[MD1*MD1*MQ1],
1166 real_t (*sDQQ)[MD1*MQ1*MQ1])
1186 MFEM_FOREACH_THREAD(dz,z,D1D)
1188 MFEM_FOREACH_THREAD(qy,y,Q1D)
1190 MFEM_FOREACH_THREAD(qx,x,Q1D)
1192 real_t u[3] = {0.0, 0.0, 0.0};
1193 real_t v[3] = {0.0, 0.0, 0.0};
1194 real_t w[3] = {0.0, 0.0, 0.0};
1195 for (
int dy = 0; dy < D1D; ++dy)
1197 const real_t By = B(dy,qy);
1198 const real_t Gy = G(dy,qy);
1200 u[0] += XxB(qx,dy,dz) * By;
1201 u[1] += XyB(qx,dy,dz) * By;
1202 u[2] += XzB(qx,dy,dz) * By;
1204 v[0] += XxG(qx,dy,dz) * By;
1205 v[1] += XyG(qx,dy,dz) * By;
1206 v[2] += XzG(qx,dy,dz) * By;
1208 w[0] += XxB(qx,dy,dz) * Gy;
1209 w[1] += XyB(qx,dy,dz) * Gy;
1210 w[2] += XzB(qx,dy,dz) * Gy;
1212 XxBB(qx,qy,dz) =
u[0];
1213 XyBB(qx,qy,dz) =
u[1];
1214 XzBB(qx,qy,dz) =
u[2];
1216 XxBG(qx,qy,dz) = v[0];
1217 XyBG(qx,qy,dz) = v[1];
1218 XzBG(qx,qy,dz) = v[2];
1220 XxGB(qx,qy,dz) = w[0];
1221 XyGB(qx,qy,dz) = w[1];
1222 XzGB(qx,qy,dz) = w[2];
1230template<
int MD1,
int MQ1>
1231MFEM_HOST_DEVICE
inline void GradZ(
const int D1D,
const int Q1D,
1232 const real_t (*sBG)[MQ1*MD1],
1233 const real_t (*sDQQ)[MD1*MQ1*MQ1],
1234 real_t (*sQQQ)[MQ1*MQ1*MQ1])
1257 MFEM_FOREACH_THREAD(qz,z,Q1D)
1259 MFEM_FOREACH_THREAD(qy,y,Q1D)
1261 MFEM_FOREACH_THREAD(qx,x,Q1D)
1263 real_t u[3] = {0.0, 0.0, 0.0};
1264 real_t v[3] = {0.0, 0.0, 0.0};
1265 real_t w[3] = {0.0, 0.0, 0.0};
1266 for (
int dz = 0; dz < D1D; ++dz)
1268 const real_t Bz = B(dz,qz);
1269 const real_t Gz = G(dz,qz);
1271 u[0] += XxBG(qx,qy,dz) * Bz;
1272 u[1] += XyBG(qx,qy,dz) * Bz;
1273 u[2] += XzBG(qx,qy,dz) * Bz;
1275 v[0] += XxGB(qx,qy,dz) * Bz;
1276 v[1] += XyGB(qx,qy,dz) * Bz;
1277 v[2] += XzGB(qx,qy,dz) * Bz;
1279 w[0] += XxBB(qx,qy,dz) * Gz;
1280 w[1] += XyBB(qx,qy,dz) * Gz;
1281 w[2] += XzBB(qx,qy,dz) * Gz;
1283 XxBBG(qx,qy,qz) =
u[0];
1284 XyBBG(qx,qy,qz) =
u[1];
1285 XzBBG(qx,qy,qz) =
u[2];
1287 XxBGB(qx,qy,qz) = v[0];
1288 XyBGB(qx,qy,qz) = v[1];
1289 XzBGB(qx,qy,qz) = v[2];
1291 XxGBB(qx,qy,qz)= w[0];
1292 XyGBB(qx,qy,qz) = w[1];
1293 XzGBB(qx,qy,qz) = w[2];
1302MFEM_HOST_DEVICE
inline void PullGrad(
const int Q1D,
1303 const int x,
const int y,
const int z,
1304 const real_t (*sQQQ)[MQ1*MQ1*MQ1],
1317 Jpr[0] = XxBBG(x,y,z);
1318 Jpr[3] = XxBGB(x,y,z);
1319 Jpr[6] = XxGBB(x,y,z);
1320 Jpr[1] = XyBBG(x,y,z);
1321 Jpr[4] = XyBGB(x,y,z);
1322 Jpr[7] = XyGBB(x,y,z);
1323 Jpr[2] = XzBBG(x,y,z);
1324 Jpr[5] = XzBGB(x,y,z);
1325 Jpr[8] = XzGBB(x,y,z);
1330MFEM_HOST_DEVICE
inline void PushGrad(
const int Q1D,
1331 const int x,
const int y,
const int z,
1333 real_t (&sQQQ)[9][MQ1*MQ1*MQ1])
1345 XxBBG(x,y,z) = A[0];
1346 XxBGB(x,y,z) = A[1];
1347 XxGBB(x,y,z) = A[2];
1348 XyBBG(x,y,z) = A[3];
1349 XyBGB(x,y,z) = A[4];
1350 XyGBB(x,y,z) = A[5];
1351 XzBBG(x,y,z) = A[6];
1352 XzBGB(x,y,z) = A[7];
1353 XzGBB(x,y,z) = A[8];
1357template<
int MD1,
int MQ1>
1358MFEM_HOST_DEVICE
inline void GradZt(
const int D1D,
const int Q1D,
1359 const real_t (&sBG)[2][MQ1*MD1],
1360 const real_t (&sQQQ)[9][MQ1*MQ1*MQ1],
1361 real_t (&sDQQ)[9][MD1*MQ1*MQ1])
1385 MFEM_FOREACH_THREAD(qz,z,Q1D)
1387 MFEM_FOREACH_THREAD(qy,y,Q1D)
1389 MFEM_FOREACH_THREAD(dx,x,D1D)
1391 real_t u[3] = {0.0, 0.0, 0.0};
1392 real_t v[3] = {0.0, 0.0, 0.0};
1393 real_t w[3] = {0.0, 0.0, 0.0};
1394 for (
int qx = 0; qx < Q1D; ++qx)
1396 const real_t Btx = Bt(qx,dx);
1397 const real_t Gtx = Gt(qx,dx);
1399 u[0] += XxBBG(qx,qy,qz) * Gtx;
1400 v[0] += XxBGB(qx,qy,qz) * Btx;
1401 w[0] += XxGBB(qx,qy,qz) * Btx;
1403 u[1] += XyBBG(qx,qy,qz) * Gtx;
1404 v[1] += XyBGB(qx,qy,qz) * Btx;
1405 w[1] += XyGBB(qx,qy,qz) * Btx;
1407 u[2] += XzBBG(qx,qy,qz) * Gtx;
1408 v[2] += XzBGB(qx,qy,qz) * Btx;
1409 w[2] += XzGBB(qx,qy,qz) * Btx;
1411 XxBB(qz,qy,dx) =
u[0];
1412 XxBG(qz,qy,dx) = v[0];
1413 XxGB(qz,qy,dx) = w[0];
1415 XyBB(qz,qy,dx) =
u[1];
1416 XyBG(qz,qy,dx) = v[1];
1417 XyGB(qz,qy,dx) = w[1];
1419 XzBB(qz,qy,dx) =
u[2];
1420 XzBG(qz,qy,dx) = v[2];
1421 XzGB(qz,qy,dx) = w[2];
1429template<
int MD1,
int MQ1>
1430MFEM_HOST_DEVICE
inline void GradYt(
const int D1D,
const int Q1D,
1431 const real_t (&sBG)[2][MQ1*MD1],
1432 const real_t (&sDQQ)[9][MD1*MQ1*MQ1],
1433 real_t (&sDDQ)[9][MD1*MD1*MQ1])
1456 MFEM_FOREACH_THREAD(qz,z,Q1D)
1458 MFEM_FOREACH_THREAD(dy,y,D1D)
1460 MFEM_FOREACH_THREAD(dx,x,D1D)
1462 real_t u[3] = {0.0, 0.0, 0.0};
1463 real_t v[3] = {0.0, 0.0, 0.0};
1464 real_t w[3] = {0.0, 0.0, 0.0};
1465 for (
int qy = 0; qy < Q1D; ++qy)
1467 const real_t Bty = Bt(qy,dy);
1468 const real_t Gty = Gt(qy,dy);
1470 u[0] += XxBB(qz,qy,dx) * Bty;
1471 v[0] += XxBG(qz,qy,dx) * Gty;
1472 w[0] += XxGB(qz,qy,dx) * Bty;
1474 u[1] += XyBB(qz,qy,dx) * Bty;
1475 v[1] += XyBG(qz,qy,dx) * Gty;
1476 w[1] += XyGB(qz,qy,dx) * Bty;
1478 u[2] += XzBB(qz,qy,dx) * Bty;
1479 v[2] += XzBG(qz,qy,dx) * Gty;
1480 w[2] += XzGB(qz,qy,dx) * Bty;
1483 XxB(qz,dy,dx) =
u[0];
1484 XxC(qz,dy,dx) = v[0];
1485 XxG(qz,dy,dx) = w[0];
1487 XyB(qz,dy,dx) =
u[1];
1488 XyC(qz,dy,dx) = v[1];
1489 XyG(qz,dy,dx) = w[1];
1491 XzB(qz,dy,dx) =
u[2];
1492 XzC(qz,dy,dx) = v[2];
1493 XzG(qz,dy,dx) = w[2];
1501template<
int MD1,
int MQ1>
1502MFEM_HOST_DEVICE
inline void GradXt(
const int D1D,
const int Q1D,
1503 const real_t (&sBG)[2][MQ1*MD1],
1504 const real_t (&sDDQ)[9][MD1*MD1*MQ1],
1505 const DeviceTensor<5> &Y,
1520 MFEM_FOREACH_THREAD(dz,z,D1D)
1522 MFEM_FOREACH_THREAD(dy,y,D1D)
1524 MFEM_FOREACH_THREAD(dx,x,D1D)
1526 real_t u[3] = {0.0, 0.0, 0.0};
1527 real_t v[3] = {0.0, 0.0, 0.0};
1528 real_t w[3] = {0.0, 0.0, 0.0};
1529 for (
int qz = 0; qz < Q1D; ++qz)
1531 const real_t Btz = Bt(qz,dz);
1532 const real_t Gtz = Gt(qz,dz);
1534 u[0] += XxB(qz,dy,dx) * Btz;
1535 v[0] += XxC(qz,dy,dx) * Btz;
1536 w[0] += XxG(qz,dy,dx) * Gtz;
1538 u[1] += XyB(qz,dy,dx) * Btz;
1539 v[1] += XyC(qz,dy,dx)* Btz;
1540 w[1] += XyG(qz,dy,dx) * Gtz;
1542 u[2] += XzB(qz,dy,dx) * Btz;
1543 v[2] += XzC(qz,dy,dx) * Btz;
1544 w[2] += XzG(qz,dy,dx) * Gtz;
1546 Y(dx,dy,dz,0,e) +=
u[0] + v[0] + w[0];
1547 Y(dx,dy,dz,1,e) +=
u[1] + v[1] + w[1];
1548 Y(dx,dy,dz,2,e) +=
u[2] + v[2] + w[2];
real_t u(const Vector &xvec)
DeviceTensor< 2, real_t > DeviceMatrix
DeviceTensor< 2, const real_t > ConstDeviceMatrix
DeviceTensor< 3, const real_t > ConstDeviceCube
DeviceTensor< 3, real_t > DeviceCube