12 #ifndef MFEM_FEM_KERNELS_HPP
13 #define MFEM_FEM_KERNELS_HPP
15 #include "../config/config.hpp"
16 #include "../linalg/dtensor.hpp"
30 template<
int MD1,
int MQ1>
31 MFEM_HOST_DEVICE
inline void LoadB(
const int D1D,
const int Q1D,
33 double (&sB)[MQ1*MD1])
35 const int tidz = MFEM_THREAD_ID(z);
40 MFEM_FOREACH_THREAD(d,y,D1D)
42 MFEM_FOREACH_THREAD(q,x,Q1D)
52 template<
int MD1,
int MQ1>
53 MFEM_HOST_DEVICE
inline void LoadBt(
const int D1D,
const int Q1D,
55 double (&sB)[MQ1*MD1])
57 const int tidz = MFEM_THREAD_ID(z);
62 MFEM_FOREACH_THREAD(d,y,D1D)
64 MFEM_FOREACH_THREAD(q,x,Q1D)
74 template<
int MD1,
int MQ1>
75 MFEM_HOST_DEVICE
inline void LoadBG(
const int D1D,
const int Q1D,
78 double (&sBG)[2][MQ1*MD1])
80 const int tidz = MFEM_THREAD_ID(z);
86 MFEM_FOREACH_THREAD(d,y,D1D)
88 MFEM_FOREACH_THREAD(q,x,Q1D)
99 template<
int MD1,
int MQ1>
100 MFEM_HOST_DEVICE
inline void LoadBGt(
const int D1D,
const int Q1D,
103 double (&sBG)[2][MQ1*MD1])
105 const int tidz = MFEM_THREAD_ID(z);
111 MFEM_FOREACH_THREAD(d,y,D1D)
113 MFEM_FOREACH_THREAD(q,x,Q1D)
124 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
125 const DeviceTensor<3, const double> &x,
128 MFEM_FOREACH_THREAD(dy,y,D1D)
130 MFEM_FOREACH_THREAD(dx,x,D1D)
132 DD(dx,dy) = x(dx,dy,e);
140 template<
int MD1,
int NBZ>
141 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
142 const DeviceTensor<3, const double> &x,
143 double (&sX)[NBZ][MD1*MD1])
145 const int tidz = MFEM_THREAD_ID(z);
151 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
152 const DeviceTensor<4, const double> &x,
155 MFEM_FOREACH_THREAD(dy,y,D1D)
157 MFEM_FOREACH_THREAD(dx,x,D1D)
159 DD(dx,dy) = x(dx,dy,c,e);
165 template<
int MD1,
int NBZ>
166 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
167 const DeviceTensor<4, const double> &x,
168 double (&sm)[NBZ][MD1*MD1])
170 const int tidz = MFEM_THREAD_ID(z);
176 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
181 MFEM_FOREACH_THREAD(dy,y,D1D)
183 MFEM_FOREACH_THREAD(qx,x,Q1D)
186 for (
int dx = 0; dx < D1D; ++dx)
188 u += B(dx,qx) * DD(dx,dy);
196 template<
int MD1,
int MQ1,
int NBZ>
197 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
198 const double (&sB)[MQ1*MD1],
199 double (&sDD)[NBZ][MD1*MD1],
200 double (&sDQ)[NBZ][MD1*MQ1])
202 const int tidz = MFEM_THREAD_ID(z);
206 EvalX(D1D,Q1D,B,DD,DQ);
210 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
215 MFEM_FOREACH_THREAD(qy,y,Q1D)
217 MFEM_FOREACH_THREAD(qx,x,Q1D)
220 for (
int dy = 0; dy < D1D; ++dy)
222 u += DQ(dy,qx) * B(dy,qy);
230 template<
int MD1,
int MQ1,
int NBZ>
231 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
232 const double (&sB)[MQ1*MD1],
233 double (&sDQ)[NBZ][MD1*MQ1],
234 double (&sQQ)[NBZ][MQ1*MQ1])
236 const int tidz = MFEM_THREAD_ID(z);
240 EvalY(D1D,Q1D,B,DQ,QQ);
244 MFEM_HOST_DEVICE
inline void PullEval(
const int qx,
const int qy,
251 template<
int MQ1,
int NBZ>
252 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
253 const int qx,
const int qy,
254 double (&sQQ)[NBZ][MQ1*MQ1],
257 const int tidz = MFEM_THREAD_ID(z);
259 PullEval(qx,qy,QQ,P);
263 template<
int MD1,
int NBZ>
264 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
265 const DeviceTensor<4, const double> &X,
266 double (&sX)[2][NBZ][MD1*MD1])
268 const int tidz = MFEM_THREAD_ID(z);
272 MFEM_FOREACH_THREAD(dy,y,D1D)
274 MFEM_FOREACH_THREAD(dx,x,D1D)
276 X0(dx,dy) = X(dx,dy,0,e);
277 X1(dx,dy) = X(dx,dy,1,e);
284 template<
int MD1,
int MQ1,
int NBZ>
285 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
286 const double (&sB)[MQ1*MD1],
287 const double (&sX)[2][NBZ][MD1*MD1],
288 double (&sDQ)[2][NBZ][MD1*MQ1])
290 const int tidz = MFEM_THREAD_ID(z);
297 MFEM_FOREACH_THREAD(dy,y,D1D)
299 MFEM_FOREACH_THREAD(qx,x,Q1D)
301 double u[2] = {0.0, 0.0};
302 for (
int dx = 0; dx < D1D; ++dx)
304 const double xx = X0(dx,dy);
305 const double xy = X1(dx,dy);
306 u[0] += B(dx,qx) * xx;
307 u[1] += B(dx,qx) * xy;
317 template<
int MD1,
int MQ1,
int NBZ>
318 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
319 const double (&sB)[MQ1*MD1],
320 const double (&sDQ)[2][NBZ][MD1*MQ1],
321 double (&sQQ)[2][NBZ][MQ1*MQ1])
323 const int tidz = MFEM_THREAD_ID(z);
330 MFEM_FOREACH_THREAD(qy,y,Q1D)
332 MFEM_FOREACH_THREAD(qx,x,Q1D)
334 double u[2] = {0.0, 0.0};
335 for (
int dy = 0; dy < D1D; ++dy)
337 u[0] += DQ0(qx,dy) * B(dy,qy);
338 u[1] += DQ1(qx,dy) * B(dy,qy);
348 template<
int MQ1,
int NBZ>
349 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
350 const int qx,
const int qy,
351 const double (&sQQ)[2][NBZ][MQ1*MQ1],
354 const int tidz = MFEM_THREAD_ID(z);
363 template<
int MQ1,
int NBZ>
364 MFEM_HOST_DEVICE
inline void PushEval(
const int Q1D,
365 const int qx,
const int qy,
367 double (&sQQ)[2][NBZ][MQ1*MQ1])
369 const int tidz = MFEM_THREAD_ID(z);
378 template<
int MD1,
int MQ1,
int NBZ>
379 MFEM_HOST_DEVICE
inline void EvalXt(
const int D1D,
const int Q1D,
380 const double (&sB)[MQ1*MD1],
381 const double (&sQQ)[2][NBZ][MQ1*MQ1],
382 double (&sDQ)[2][NBZ][MD1*MQ1])
384 const int tidz = MFEM_THREAD_ID(z);
391 MFEM_FOREACH_THREAD(qy,y,Q1D)
393 MFEM_FOREACH_THREAD(dx,x,D1D)
395 double u[2] = {0.0, 0.0};
396 for (
int qx = 0; qx < Q1D; ++qx)
398 u[0] += QQ0(qx,qy) * Bt(qx,dx);
399 u[1] += QQ1(qx,qy) * Bt(qx,dx);
409 template<
int MD1,
int MQ1,
int NBZ>
410 MFEM_HOST_DEVICE
inline void EvalYt(
const int D1D,
const int Q1D,
411 const double (&sB)[MQ1*MD1],
412 const double (&sDQ)[2][NBZ][MD1*MQ1],
413 const DeviceTensor<4> &Y,
416 const int tidz = MFEM_THREAD_ID(z);
421 MFEM_FOREACH_THREAD(dy,y,D1D)
423 MFEM_FOREACH_THREAD(dx,x,D1D)
425 double u[2] = {0.0, 0.0};
426 for (
int qy = 0; qy < Q1D; ++qy)
428 u[0] += Bt(qy,dy) * DQ0(qy,dx);
429 u[1] += Bt(qy,dy) * DQ1(qy,dx);
431 Y(dx,dy,0,e) += u[0];
432 Y(dx,dy,1,e) += u[1];
439 template<
int MD1,
int MQ1,
int NBZ>
440 MFEM_HOST_DEVICE
inline void GradX(
const int D1D,
const int Q1D,
441 const double (&sBG)[2][MQ1*MD1],
442 const double (&sX)[2][NBZ][MD1*MD1],
443 double (&sDQ)[4][NBZ][MD1*MQ1])
445 const int tidz = MFEM_THREAD_ID(z);
455 MFEM_FOREACH_THREAD(dy,y,D1D)
457 MFEM_FOREACH_THREAD(qx,x,Q1D)
459 double u[2] = {0.0, 0.0};
460 double v[2] = {0.0, 0.0};
461 for (
int dx = 0; dx < D1D; ++dx)
463 const double Bx = B(dx,qx);
464 const double Gx = G(dx,qx);
465 const double x0 = X0(dx,dy);
466 const double x1 = X1(dx,dy);
482 template<
int MD1,
int MQ1,
int NBZ>
483 MFEM_HOST_DEVICE
inline void GradY(
const int D1D,
const int Q1D,
484 const double (&sBG)[2][MQ1*MD1],
485 const double (&sDQ)[4][NBZ][MD1*MQ1],
486 double (&sQQ)[4][NBZ][MQ1*MQ1])
488 const int tidz = MFEM_THREAD_ID(z);
500 MFEM_FOREACH_THREAD(qy,y,Q1D)
502 MFEM_FOREACH_THREAD(qx,x,Q1D)
504 double u[2] = {0.0, 0.0};
505 double v[2] = {0.0, 0.0};
506 for (
int dy = 0; dy < D1D; ++dy)
508 const double By = B(dy,qy);
509 const double Gy = G(dy,qy);
510 u[0] += X0G(qx,dy) * By;
511 v[0] += X0B(qx,dy) * Gy;
512 u[1] += X1G(qx,dy) * By;
513 v[1] += X1B(qx,dy) * Gy;
525 template<
int MQ1,
int NBZ>
526 MFEM_HOST_DEVICE
inline void PullGrad(
const int Q1D,
527 const int qx,
const int qy,
528 const double (&sQQ)[4][NBZ][MQ1*MQ1],
531 const int tidz = MFEM_THREAD_ID(z);
537 Jpr[0] = X0GB(qx,qy);
538 Jpr[1] = X1GB(qx,qy);
539 Jpr[2] = X0BG(qx,qy);
540 Jpr[3] = X1BG(qx,qy);
544 template<
int MQ1,
int NBZ>
545 MFEM_HOST_DEVICE
inline void PushGrad(
const int Q1D,
546 const int qx,
const int qy,
548 double (&sQQ)[4][NBZ][MQ1*MQ1])
550 const int tidz = MFEM_THREAD_ID(z);
563 template<
int MD1,
int MQ1,
int NBZ>
564 MFEM_HOST_DEVICE
inline void GradYt(
const int D1D,
const int Q1D,
565 const double (&sBG)[2][MQ1*MD1],
566 const double (&GQ)[4][NBZ][MQ1*MQ1],
567 double (&GD)[4][NBZ][MD1*MQ1])
569 const int tidz = MFEM_THREAD_ID(z);
581 MFEM_FOREACH_THREAD(qy,y,Q1D)
583 MFEM_FOREACH_THREAD(dx,x,D1D)
585 double u[2] = {0.0, 0.0};
586 double v[2] = {0.0, 0.0};
587 for (
int qx = 0; qx < Q1D; ++qx)
589 u[0] += Gt(qx,dx) * QQx0(qx,qy);
590 u[1] += Gt(qx,dx) * QQy0(qx,qy);
591 v[0] += Bt(qx,dx) * QQx1(qx,qy);
592 v[1] += Bt(qx,dx) * QQy1(qx,qy);
604 template<
int MD1,
int MQ1,
int NBZ>
605 MFEM_HOST_DEVICE
inline void GradXt(
const int D1D,
const int Q1D,
606 const double (&sBG)[2][MQ1*MD1],
607 const double (&GD)[4][NBZ][MD1*MQ1],
608 const DeviceTensor<4> &Y,
611 const int tidz = MFEM_THREAD_ID(z);
619 MFEM_FOREACH_THREAD(dy,y,D1D)
621 MFEM_FOREACH_THREAD(dx,x,D1D)
623 double u[2] = {0.0, 0.0};
624 double v[2] = {0.0, 0.0};
625 for (
int qy = 0; qy < Q1D; ++qy)
627 u[0] += DQxB(qy,dx) * Bt(qy,dy);
628 u[1] += DQyB(qy,dx) * Bt(qy,dy);
629 v[0] += DQxG(qy,dx) * Gt(qy,dy);
630 v[1] += DQyG(qy,dx) * Gt(qy,dy);
632 Y(dx,dy,0,e) += u[0] + v[0];
633 Y(dx,dy,1,e) += u[1] + v[1];
640 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
641 const DeviceTensor<4, const double> &x,
644 MFEM_FOREACH_THREAD(dz,z,D1D)
646 MFEM_FOREACH_THREAD(dy,y,D1D)
648 MFEM_FOREACH_THREAD(dx,x,D1D)
650 X(dx,dy,dz) = x(dx,dy,dz,e);
658 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
659 const DeviceTensor<4, const double> &x,
660 double (&sm)[MD1*MD1*MD1])
667 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
668 const DeviceTensor<5, const double> &x,
671 MFEM_FOREACH_THREAD(dz,z,D1D)
673 MFEM_FOREACH_THREAD(dy,y,D1D)
675 MFEM_FOREACH_THREAD(dx,x,D1D)
677 X(dx,dy,dz) = x(dx,dy,dz,c,e);
686 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
687 const DeviceTensor<5, const double> &x,
688 double (&sm)[MD1*MD1*MD1])
691 return LoadX<MD1>(e,D1D,c,x,X);
695 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
700 MFEM_FOREACH_THREAD(dz,z,D1D)
702 MFEM_FOREACH_THREAD(dy,y,D1D)
704 MFEM_FOREACH_THREAD(qx,x,Q1D)
707 for (
int dx = 0; dx < D1D; ++dx)
709 const double Bx = B(dx,qx);
710 u += Bx * DDD(dx,dy,dz);
719 template<
int MD1,
int MQ1>
720 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
721 const double (&sB)[MQ1*MD1],
722 const double (&sDDD)[MD1*MD1*MD1],
723 double (&sDDQ)[MD1*MD1*MQ1])
728 EvalX(D1D,Q1D,B,DDD,DDQ);
732 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
737 MFEM_FOREACH_THREAD(dz,z,D1D)
739 MFEM_FOREACH_THREAD(qy,y,Q1D)
741 MFEM_FOREACH_THREAD(qx,x,Q1D)
744 for (
int dy = 0; dy < D1D; ++dy)
746 const double By = B(dy,qy);
747 u += DDQ(dz,dy,qx) * By;
756 template<
int MD1,
int MQ1>
757 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
758 const double (&sB)[MQ1*MD1],
759 const double (&sDDQ)[MD1*MD1*MQ1],
760 double (&sDQQ)[MD1*MQ1*MQ1])
765 EvalY(D1D,Q1D,B,DDQ,DQQ);
769 MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
774 MFEM_FOREACH_THREAD(qz,z,Q1D)
776 MFEM_FOREACH_THREAD(qy,y,Q1D)
778 MFEM_FOREACH_THREAD(qx,x,Q1D)
781 for (
int dz = 0; dz < D1D; ++dz)
783 const double Bz = B(dz,qz);
784 u += DQQ(dz,qy,qx) * Bz;
793 template<
int MD1,
int MQ1>
794 MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
795 const double (&sB)[MQ1*MD1],
796 const double (&sDQQ)[MD1*MQ1*MQ1],
797 double (&sQQQ)[MQ1*MQ1*MQ1])
802 EvalZ(D1D,Q1D,B,DQQ,QQQ);
806 MFEM_HOST_DEVICE
inline void PullEval(
const int x,
const int y,
const int z,
814 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
815 const int x,
const int y,
const int z,
816 const double (&sQQQ)[MQ1*MQ1*MQ1],
820 PullEval(x,y,z,QQQ,X);
825 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
826 const DeviceTensor<5, const double> &X,
827 double (*sm)[MD1*MD1*MD1])
833 MFEM_FOREACH_THREAD(dz,z,D1D)
835 MFEM_FOREACH_THREAD(dy,y,D1D)
837 MFEM_FOREACH_THREAD(dx,x,D1D)
839 Xx(dx,dy,dz) = X(dx,dy,dz,0,e);
840 Xy(dx,dy,dz) = X(dx,dy,dz,1,e);
841 Xz(dx,dy,dz) = X(dx,dy,dz,2,e);
849 template<
int MD1,
int MQ1>
850 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
851 const double (&sB)[MQ1*MD1],
852 const double (&sDDD)[3][MD1*MD1*MD1],
853 double (&sDDQ)[3][MD1*MD1*MQ1])
863 MFEM_FOREACH_THREAD(dz,z,D1D)
865 MFEM_FOREACH_THREAD(dy,y,D1D)
867 MFEM_FOREACH_THREAD(qx,x,Q1D)
869 double u[3] = {0.0, 0.0, 0.0};
870 for (
int dx = 0; dx < D1D; ++dx)
872 const double Bx = B(dx,qx);
873 u[0] += Bx * Xx(dx,dy,dz);
874 u[1] += Bx * Xy(dx,dy,dz);
875 u[2] += Bx * Xz(dx,dy,dz);
877 XxB(qx,dy,dz) = u[0];
878 XyB(qx,dy,dz) = u[1];
879 XzB(qx,dy,dz) = u[2];
887 template<
int MD1,
int MQ1>
888 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
889 const double (&sB)[MQ1*MD1],
890 const double (&sDDQ)[3][MD1*MD1*MQ1],
891 double (&sDQQ)[3][MD1*MQ1*MQ1])
901 MFEM_FOREACH_THREAD(dz,z,D1D)
903 MFEM_FOREACH_THREAD(qy,y,Q1D)
905 MFEM_FOREACH_THREAD(qx,x,Q1D)
907 double u[3] = {0.0, 0.0, 0.0};
908 for (
int dy = 0; dy < D1D; ++dy)
910 const double By = B(dy,qy);
911 u[0] += XxB(qx,dy,dz) * By;
912 u[1] += XyB(qx,dy,dz) * By;
913 u[2] += XzB(qx,dy,dz) * By;
915 XxBB(qx,qy,dz) = u[0];
916 XyBB(qx,qy,dz) = u[1];
917 XzBB(qx,qy,dz) = u[2];
925 template<
int MD1,
int MQ1>
926 MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
927 const double (&sB)[MQ1*MD1],
928 const double (&sDQQ)[3][MD1*MQ1*MQ1],
929 double (&sQQQ)[3][MQ1*MQ1*MQ1])
939 MFEM_FOREACH_THREAD(qz,z,Q1D)
941 MFEM_FOREACH_THREAD(qy,y,Q1D)
943 MFEM_FOREACH_THREAD(qx,x,Q1D)
945 double u[3] = {0.0, 0.0, 0.0};
946 for (
int dz = 0; dz < D1D; ++dz)
948 const double Bz = B(dz,qz);
949 u[0] += XxBB(qx,qy,dz) * Bz;
950 u[1] += XyBB(qx,qy,dz) * Bz;
951 u[2] += XzBB(qx,qy,dz) * Bz;
953 XxBBB(qx,qy,qz) = u[0];
954 XyBBB(qx,qy,qz) = u[1];
955 XzBBB(qx,qy,qz) = u[2];
964 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
965 const int x,
const int y,
const int z,
966 const double (&sQQQ)[3][MQ1*MQ1*MQ1],
980 MFEM_HOST_DEVICE
inline void PushEval(
const int Q1D,
981 const int x,
const int y,
const int z,
982 const double (&A)[3],
983 double (&sQQQ)[3][MQ1*MQ1*MQ1])
995 template<
int MD1,
int MQ1>
996 MFEM_HOST_DEVICE
inline void EvalXt(
const int D1D,
const int Q1D,
997 const double (&sB)[MQ1*MD1],
998 const double (&sQQQ)[3][MQ1*MQ1*MQ1],
999 double (&sDQQ)[3][MD1*MQ1*MQ1])
1009 MFEM_FOREACH_THREAD(qz,z,Q1D)
1011 MFEM_FOREACH_THREAD(qy,y,Q1D)
1013 MFEM_FOREACH_THREAD(dx,x,D1D)
1015 double u[3] = {0.0, 0.0, 0.0};
1016 for (
int qx = 0; qx < Q1D; ++qx)
1018 const double Btx = Bt(qx,dx);
1019 u[0] += XxBBB(qx,qy,qz) * Btx;
1020 u[1] += XyBBB(qx,qy,qz) * Btx;
1021 u[2] += XzBBB(qx,qy,qz) * Btx;
1023 XxBB(qz,qy,dx) = u[0];
1024 XyBB(qz,qy,dx) = u[1];
1025 XzBB(qz,qy,dx) = u[2];
1033 template<
int MD1,
int MQ1>
1034 MFEM_HOST_DEVICE
inline void EvalYt(
const int D1D,
const int Q1D,
1035 const double (&sB)[MQ1*MD1],
1036 const double (&sDQQ)[3][MD1*MQ1*MQ1],
1037 double (&sDDQ)[3][MD1*MD1*MQ1])
1047 MFEM_FOREACH_THREAD(qz,z,Q1D)
1049 MFEM_FOREACH_THREAD(dy,y,D1D)
1051 MFEM_FOREACH_THREAD(dx,x,D1D)
1053 double u[3] = {0.0, 0.0, 0.0};
1054 for (
int qy = 0; qy < Q1D; ++qy)
1056 const double Bty = Bt(qy,dy);
1057 u[0] += XxBB(qz,qy,dx) * Bty;
1058 u[1] += XyBB(qz,qy,dx) * Bty;
1059 u[2] += XzBB(qz,qy,dx) * Bty;
1062 XxB(qz,dy,dx) = u[0];
1063 XyB(qz,dy,dx) = u[1];
1064 XzB(qz,dy,dx)= u[2];
1072 template<
int MD1,
int MQ1>
1073 MFEM_HOST_DEVICE
inline void EvalZt(
const int D1D,
const int Q1D,
1074 const double (&sB)[MQ1*MD1],
1075 const double (&sDDQ)[3][MD1*MD1*MQ1],
1076 const DeviceTensor<5> &Y,
1084 MFEM_FOREACH_THREAD(dz,z,D1D)
1086 MFEM_FOREACH_THREAD(dy,y,D1D)
1088 MFEM_FOREACH_THREAD(dx,x,D1D)
1090 double u[3] = {0.0, 0.0, 0.0};
1091 for (
int qz = 0; qz < Q1D; ++qz)
1093 const double Btz = Bt(qz,dz);
1094 u[0] += XxB(qz,dy,dx) * Btz;
1095 u[1] += XyB(qz,dy,dx) * Btz;
1096 u[2] += XzB(qz,dy,dx) * Btz;
1098 Y(dx,dy,dz,0,e) += u[0];
1099 Y(dx,dy,dz,1,e) += u[1];
1100 Y(dx,dy,dz,2,e) += u[2];
1107 template<
int MD1,
int MQ1>
1108 MFEM_HOST_DEVICE
inline void GradX(
const int D1D,
const int Q1D,
1109 const double (*sBG)[MQ1*MD1],
1110 const double (*sDDD)[MD1*MD1*MD1],
1111 double (*sDDQ)[MD1*MD1*MQ1])
1125 MFEM_FOREACH_THREAD(dz,z,D1D)
1127 MFEM_FOREACH_THREAD(dy,y,D1D)
1129 MFEM_FOREACH_THREAD(qx,x,Q1D)
1131 double u[3] = {0.0, 0.0, 0.0};
1132 double v[3] = {0.0, 0.0, 0.0};
1133 for (
int dx = 0; dx < D1D; ++dx)
1135 const double xx = Xx(dx,dy,dz);
1136 const double xy = Xy(dx,dy,dz);
1137 const double xz = Xz(dx,dy,dz);
1138 const double Bx = B(dx,qx);
1139 const double Gx = G(dx,qx);
1148 XxB(qx,dy,dz) = u[0];
1149 XyB(qx,dy,dz) = u[1];
1150 XzB(qx,dy,dz) = u[2];
1152 XxG(qx,dy,dz) = v[0];
1153 XyG(qx,dy,dz) = v[1];
1154 XzG(qx,dy,dz) = v[2];
1162 template<
int MD1,
int MQ1>
1163 MFEM_HOST_DEVICE
inline void GradY(
const int D1D,
const int Q1D,
1164 const double (*sBG)[MQ1*MD1],
1165 const double (*sDDQ)[MD1*MD1*MQ1],
1166 double (*sDQQ)[MD1*MQ1*MQ1])
1186 MFEM_FOREACH_THREAD(dz,z,D1D)
1188 MFEM_FOREACH_THREAD(qy,y,Q1D)
1190 MFEM_FOREACH_THREAD(qx,x,Q1D)
1192 double u[3] = {0.0, 0.0, 0.0};
1193 double v[3] = {0.0, 0.0, 0.0};
1194 double w[3] = {0.0, 0.0, 0.0};
1195 for (
int dy = 0; dy < D1D; ++dy)
1197 const double By = B(dy,qy);
1198 const double Gy = G(dy,qy);
1200 u[0] += XxB(qx,dy,dz) * By;
1201 u[1] += XyB(qx,dy,dz) * By;
1202 u[2] += XzB(qx,dy,dz) * By;
1204 v[0] += XxG(qx,dy,dz) * By;
1205 v[1] += XyG(qx,dy,dz) * By;
1206 v[2] += XzG(qx,dy,dz) * By;
1208 w[0] += XxB(qx,dy,dz) * Gy;
1209 w[1] += XyB(qx,dy,dz) * Gy;
1210 w[2] += XzB(qx,dy,dz) * Gy;
1212 XxBB(qx,qy,dz) = u[0];
1213 XyBB(qx,qy,dz) = u[1];
1214 XzBB(qx,qy,dz) = u[2];
1216 XxBG(qx,qy,dz) = v[0];
1217 XyBG(qx,qy,dz) = v[1];
1218 XzBG(qx,qy,dz) = v[2];
1220 XxGB(qx,qy,dz) = w[0];
1221 XyGB(qx,qy,dz) = w[1];
1222 XzGB(qx,qy,dz) = w[2];
1230 template<
int MD1,
int MQ1>
1231 MFEM_HOST_DEVICE
inline void GradZ(
const int D1D,
const int Q1D,
1232 const double (*sBG)[MQ1*MD1],
1233 const double (*sDQQ)[MD1*MQ1*MQ1],
1234 double (*sQQQ)[MQ1*MQ1*MQ1])
1257 MFEM_FOREACH_THREAD(qz,z,Q1D)
1259 MFEM_FOREACH_THREAD(qy,y,Q1D)
1261 MFEM_FOREACH_THREAD(qx,x,Q1D)
1263 double u[3] = {0.0, 0.0, 0.0};
1264 double v[3] = {0.0, 0.0, 0.0};
1265 double w[3] = {0.0, 0.0, 0.0};
1266 for (
int dz = 0; dz < D1D; ++dz)
1268 const double Bz = B(dz,qz);
1269 const double Gz = G(dz,qz);
1271 u[0] += XxBG(qx,qy,dz) * Bz;
1272 u[1] += XyBG(qx,qy,dz) * Bz;
1273 u[2] += XzBG(qx,qy,dz) * Bz;
1275 v[0] += XxGB(qx,qy,dz) * Bz;
1276 v[1] += XyGB(qx,qy,dz) * Bz;
1277 v[2] += XzGB(qx,qy,dz) * Bz;
1279 w[0] += XxBB(qx,qy,dz) * Gz;
1280 w[1] += XyBB(qx,qy,dz) * Gz;
1281 w[2] += XzBB(qx,qy,dz) * Gz;
1283 XxBBG(qx,qy,qz) = u[0];
1284 XyBBG(qx,qy,qz) = u[1];
1285 XzBBG(qx,qy,qz) = u[2];
1287 XxBGB(qx,qy,qz) = v[0];
1288 XyBGB(qx,qy,qz) = v[1];
1289 XzBGB(qx,qy,qz) = v[2];
1291 XxGBB(qx,qy,qz)= w[0];
1292 XyGBB(qx,qy,qz) = w[1];
1293 XzGBB(qx,qy,qz) = w[2];
1302 MFEM_HOST_DEVICE
inline void PullGrad(
const int Q1D,
1303 const int x,
const int y,
const int z,
1304 const double (*sQQQ)[MQ1*MQ1*MQ1],
1317 Jpr[0] = XxBBG(x,y,z);
1318 Jpr[3] = XxBGB(x,y,z);
1319 Jpr[6] = XxGBB(x,y,z);
1320 Jpr[1] = XyBBG(x,y,z);
1321 Jpr[4] = XyBGB(x,y,z);
1322 Jpr[7] = XyGBB(x,y,z);
1323 Jpr[2] = XzBBG(x,y,z);
1324 Jpr[5] = XzBGB(x,y,z);
1325 Jpr[8] = XzGBB(x,y,z);
1330 MFEM_HOST_DEVICE
inline void PushGrad(
const int Q1D,
1331 const int x,
const int y,
const int z,
1333 double (&sQQQ)[9][MQ1*MQ1*MQ1])
1345 XxBBG(x,y,z) = A[0];
1346 XxBGB(x,y,z) = A[1];
1347 XxGBB(x,y,z) = A[2];
1348 XyBBG(x,y,z) = A[3];
1349 XyBGB(x,y,z) = A[4];
1350 XyGBB(x,y,z) = A[5];
1351 XzBBG(x,y,z) = A[6];
1352 XzBGB(x,y,z) = A[7];
1353 XzGBB(x,y,z) = A[8];
1357 template<
int MD1,
int MQ1>
1358 MFEM_HOST_DEVICE
inline void GradZt(
const int D1D,
const int Q1D,
1359 const double (&sBG)[2][MQ1*MD1],
1360 const double (&sQQQ)[9][MQ1*MQ1*MQ1],
1361 double (&sDQQ)[9][MD1*MQ1*MQ1])
1385 MFEM_FOREACH_THREAD(qz,z,Q1D)
1387 MFEM_FOREACH_THREAD(qy,y,Q1D)
1389 MFEM_FOREACH_THREAD(dx,x,D1D)
1391 double u[3] = {0.0, 0.0, 0.0};
1392 double v[3] = {0.0, 0.0, 0.0};
1393 double w[3] = {0.0, 0.0, 0.0};
1394 for (
int qx = 0; qx < Q1D; ++qx)
1396 const double Btx = Bt(qx,dx);
1397 const double Gtx = Gt(qx,dx);
1399 u[0] += XxBBG(qx,qy,qz) * Gtx;
1400 v[0] += XxBGB(qx,qy,qz) * Btx;
1401 w[0] += XxGBB(qx,qy,qz) * Btx;
1403 u[1] += XyBBG(qx,qy,qz) * Gtx;
1404 v[1] += XyBGB(qx,qy,qz) * Btx;
1405 w[1] += XyGBB(qx,qy,qz) * Btx;
1407 u[2] += XzBBG(qx,qy,qz) * Gtx;
1408 v[2] += XzBGB(qx,qy,qz) * Btx;
1409 w[2] += XzGBB(qx,qy,qz) * Btx;
1411 XxBB(qz,qy,dx) = u[0];
1412 XxBG(qz,qy,dx) = v[0];
1413 XxGB(qz,qy,dx) = w[0];
1415 XyBB(qz,qy,dx) = u[1];
1416 XyBG(qz,qy,dx) = v[1];
1417 XyGB(qz,qy,dx) = w[1];
1419 XzBB(qz,qy,dx) = u[2];
1420 XzBG(qz,qy,dx) = v[2];
1421 XzGB(qz,qy,dx) = w[2];
1429 template<
int MD1,
int MQ1>
1430 MFEM_HOST_DEVICE
inline void GradYt(
const int D1D,
const int Q1D,
1431 const double (&sBG)[2][MQ1*MD1],
1432 const double (&sDQQ)[9][MD1*MQ1*MQ1],
1433 double (&sDDQ)[9][MD1*MD1*MQ1])
1456 MFEM_FOREACH_THREAD(qz,z,Q1D)
1458 MFEM_FOREACH_THREAD(dy,y,D1D)
1460 MFEM_FOREACH_THREAD(dx,x,D1D)
1462 double u[3] = {0.0, 0.0, 0.0};
1463 double v[3] = {0.0, 0.0, 0.0};
1464 double w[3] = {0.0, 0.0, 0.0};
1465 for (
int qy = 0; qy < Q1D; ++qy)
1467 const double Bty = Bt(qy,dy);
1468 const double Gty = Gt(qy,dy);
1470 u[0] += XxBB(qz,qy,dx) * Bty;
1471 v[0] += XxBG(qz,qy,dx) * Gty;
1472 w[0] += XxGB(qz,qy,dx) * Bty;
1474 u[1] += XyBB(qz,qy,dx) * Bty;
1475 v[1] += XyBG(qz,qy,dx) * Gty;
1476 w[1] += XyGB(qz,qy,dx) * Bty;
1478 u[2] += XzBB(qz,qy,dx) * Bty;
1479 v[2] += XzBG(qz,qy,dx) * Gty;
1480 w[2] += XzGB(qz,qy,dx) * Bty;
1483 XxB(qz,dy,dx) = u[0];
1484 XxC(qz,dy,dx) = v[0];
1485 XxG(qz,dy,dx) = w[0];
1487 XyB(qz,dy,dx) = u[1];
1488 XyC(qz,dy,dx) = v[1];
1489 XyG(qz,dy,dx) = w[1];
1491 XzB(qz,dy,dx) = u[2];
1492 XzC(qz,dy,dx) = v[2];
1493 XzG(qz,dy,dx) = w[2];
1501 template<
int MD1,
int MQ1>
1502 MFEM_HOST_DEVICE
inline void GradXt(
const int D1D,
const int Q1D,
1503 const double (&sBG)[2][MQ1*MD1],
1504 const double (&sDDQ)[9][MD1*MD1*MQ1],
1505 const DeviceTensor<5> &Y,
1520 MFEM_FOREACH_THREAD(dz,z,D1D)
1522 MFEM_FOREACH_THREAD(dy,y,D1D)
1524 MFEM_FOREACH_THREAD(dx,x,D1D)
1526 double u[3] = {0.0, 0.0, 0.0};
1527 double v[3] = {0.0, 0.0, 0.0};
1528 double w[3] = {0.0, 0.0, 0.0};
1529 for (
int qz = 0; qz < Q1D; ++qz)
1531 const double Btz = Bt(qz,dz);
1532 const double Gtz = Gt(qz,dz);
1534 u[0] += XxB(qz,dy,dx) * Btz;
1535 v[0] += XxC(qz,dy,dx) * Btz;
1536 w[0] += XxG(qz,dy,dx) * Gtz;
1538 u[1] += XyB(qz,dy,dx) * Btz;
1539 v[1] += XyC(qz,dy,dx)* Btz;
1540 w[1] += XyG(qz,dy,dx) * Gtz;
1542 u[2] += XzB(qz,dy,dx) * Btz;
1543 v[2] += XzC(qz,dy,dx) * Btz;
1544 w[2] += XzG(qz,dy,dx) * Gtz;
1546 Y(dx,dy,dz,0,e) += u[0] + v[0] + w[0];
1547 Y(dx,dy,dz,1,e) += u[1] + v[1] + w[1];
1548 Y(dx,dy,dz,2,e) += u[2] + v[2] + w[2];
1560 #endif // MFEM_FEM_KERNELS_HPP
DeviceTensor< 2, const double > ConstDeviceMatrix
DeviceTensor< 2, double > DeviceMatrix
DeviceTensor< 3, double > DeviceCube
double u(const Vector &xvec)
DeviceTensor< 3, const double > ConstDeviceCube