12 #ifndef MFEM_FEM_KERNELS_HPP
13 #define MFEM_FEM_KERNELS_HPP
15 #include "../config/config.hpp"
16 #include "../linalg/dtensor.hpp"
30 template<
int MD1,
int MQ1>
31 MFEM_HOST_DEVICE
inline void LoadB(
const int D1D,
const int Q1D,
33 double (&sB)[MQ1*MD1])
35 const int tidz = MFEM_THREAD_ID(z);
40 MFEM_FOREACH_THREAD(d,y,D1D)
42 MFEM_FOREACH_THREAD(q,x,Q1D)
52 template<
int MD1,
int MQ1>
53 MFEM_HOST_DEVICE
inline void LoadBt(
const int D1D,
const int Q1D,
55 double (&sB)[MQ1*MD1])
57 const int tidz = MFEM_THREAD_ID(z);
62 MFEM_FOREACH_THREAD(d,y,D1D)
64 MFEM_FOREACH_THREAD(q,x,Q1D)
74 template<
int MD1,
int MQ1>
75 MFEM_HOST_DEVICE
inline void LoadBG(
const int D1D,
const int Q1D,
78 double (&sBG)[2][MQ1*MD1])
80 const int tidz = MFEM_THREAD_ID(z);
86 MFEM_FOREACH_THREAD(d,y,D1D)
88 MFEM_FOREACH_THREAD(q,x,Q1D)
99 template<
int MD1,
int MQ1>
100 MFEM_HOST_DEVICE
inline void LoadBGt(
const int D1D,
const int Q1D,
103 double (&sBG)[2][MQ1*MD1])
105 const int tidz = MFEM_THREAD_ID(z);
111 MFEM_FOREACH_THREAD(d,y,D1D)
113 MFEM_FOREACH_THREAD(q,x,Q1D)
124 template<
int MD1,
int NBZ>
125 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
126 const DeviceTensor<3, const double> &x,
127 double (&sX)[NBZ][MD1*MD1])
129 const int tidz = MFEM_THREAD_ID(z);
132 MFEM_FOREACH_THREAD(dy,y,D1D)
134 MFEM_FOREACH_THREAD(dx,x,D1D)
136 X(dx,dy) = x(dx,dy,e);
143 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
144 const DeviceTensor<4, const double> &x,
147 MFEM_FOREACH_THREAD(dy,y,D1D)
149 MFEM_FOREACH_THREAD(dx,x,D1D)
151 DD(dx,dy) = x(dx,dy,c,e);
157 template<
int MD1,
int NBZ>
158 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
159 const DeviceTensor<4, const double> &x,
160 double (&sm)[NBZ][MD1*MD1])
162 const int tidz = MFEM_THREAD_ID(z);
168 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
173 MFEM_FOREACH_THREAD(dy,y,D1D)
175 MFEM_FOREACH_THREAD(qx,x,Q1D)
178 for (
int dx = 0; dx < D1D; ++dx)
180 u += B(dx,qx) * DD(dx,dy);
188 template<
int MD1,
int MQ1,
int NBZ>
189 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
190 const double (&sB)[MQ1*MD1],
191 double (&sDD)[NBZ][MD1*MD1],
192 double (&sDQ)[NBZ][MD1*MQ1])
194 const int tidz = MFEM_THREAD_ID(z);
198 EvalX(D1D,Q1D,B,DD,DQ);
202 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
207 MFEM_FOREACH_THREAD(qy,y,Q1D)
209 MFEM_FOREACH_THREAD(qx,x,Q1D)
212 for (
int dy = 0; dy < D1D; ++dy)
214 u += DQ(dy,qx) * B(dy,qy);
222 template<
int MD1,
int MQ1,
int NBZ>
223 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
224 const double (&sB)[MQ1*MD1],
225 double (&sDQ)[NBZ][MD1*MQ1],
226 double (&sQQ)[NBZ][MQ1*MQ1])
228 const int tidz = MFEM_THREAD_ID(z);
232 EvalY(D1D,Q1D,B,DQ,QQ);
236 MFEM_HOST_DEVICE
inline void PullEval(
const int qx,
const int qy,
243 template<
int MQ1,
int NBZ>
244 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
245 const int qx,
const int qy,
246 double (&sQQ)[NBZ][MQ1*MQ1],
249 const int tidz = MFEM_THREAD_ID(z);
251 PullEval(qx,qy,QQ,P);
255 template<
int MD1,
int NBZ>
256 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
257 const DeviceTensor<4, const double> &X,
258 double (&sX)[2][NBZ][MD1*MD1])
260 const int tidz = MFEM_THREAD_ID(z);
264 MFEM_FOREACH_THREAD(dy,y,D1D)
266 MFEM_FOREACH_THREAD(dx,x,D1D)
268 X0(dx,dy) = X(dx,dy,0,e);
269 X1(dx,dy) = X(dx,dy,1,e);
276 template<
int MD1,
int MQ1,
int NBZ>
277 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
278 const double (&sB)[MQ1*MD1],
279 const double (&sX)[2][NBZ][MD1*MD1],
280 double (&sDQ)[2][NBZ][MD1*MQ1])
282 const int tidz = MFEM_THREAD_ID(z);
289 MFEM_FOREACH_THREAD(dy,y,D1D)
291 MFEM_FOREACH_THREAD(qx,x,Q1D)
293 double u[2] = {0.0, 0.0};
294 for (
int dx = 0; dx < D1D; ++dx)
296 const double xx = X0(dx,dy);
297 const double xy = X1(dx,dy);
298 u[0] += B(dx,qx) * xx;
299 u[1] += B(dx,qx) * xy;
309 template<
int MD1,
int MQ1,
int NBZ>
310 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
311 const double (&sB)[MQ1*MD1],
312 const double (&sDQ)[2][NBZ][MD1*MQ1],
313 double (&sQQ)[2][NBZ][MQ1*MQ1])
315 const int tidz = MFEM_THREAD_ID(z);
322 MFEM_FOREACH_THREAD(qy,y,Q1D)
324 MFEM_FOREACH_THREAD(qx,x,Q1D)
326 double u[2] = {0.0, 0.0};
327 for (
int dy = 0; dy < D1D; ++dy)
329 u[0] += DQ0(qx,dy) * B(dy,qy);
330 u[1] += DQ1(qx,dy) * B(dy,qy);
340 template<
int MQ1,
int NBZ>
341 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
342 const int qx,
const int qy,
343 const double (&sQQ)[2][NBZ][MQ1*MQ1],
346 const int tidz = MFEM_THREAD_ID(z);
355 template<
int MQ1,
int NBZ>
356 MFEM_HOST_DEVICE
inline void PushEval(
const int Q1D,
357 const int qx,
const int qy,
359 double (&sQQ)[2][NBZ][MQ1*MQ1])
361 const int tidz = MFEM_THREAD_ID(z);
370 template<
int MD1,
int MQ1,
int NBZ>
371 MFEM_HOST_DEVICE
inline void EvalXt(
const int D1D,
const int Q1D,
372 const double (&sB)[MQ1*MD1],
373 const double (&sQQ)[2][NBZ][MQ1*MQ1],
374 double (&sDQ)[2][NBZ][MD1*MQ1])
376 const int tidz = MFEM_THREAD_ID(z);
383 MFEM_FOREACH_THREAD(qy,y,Q1D)
385 MFEM_FOREACH_THREAD(dx,x,D1D)
387 double u[2] = {0.0, 0.0};
388 for (
int qx = 0; qx < Q1D; ++qx)
390 u[0] += QQ0(qx,qy) * Bt(qx,dx);
391 u[1] += QQ1(qx,qy) * Bt(qx,dx);
401 template<
int MD1,
int MQ1,
int NBZ>
402 MFEM_HOST_DEVICE
inline void EvalYt(
const int D1D,
const int Q1D,
403 const double (&sB)[MQ1*MD1],
404 const double (&sDQ)[2][NBZ][MD1*MQ1],
405 const DeviceTensor<4> &Y,
408 const int tidz = MFEM_THREAD_ID(z);
413 MFEM_FOREACH_THREAD(dy,y,D1D)
415 MFEM_FOREACH_THREAD(dx,x,D1D)
417 double u[2] = {0.0, 0.0};
418 for (
int qy = 0; qy < Q1D; ++qy)
420 u[0] += Bt(qy,dy) * DQ0(qy,dx);
421 u[1] += Bt(qy,dy) * DQ1(qy,dx);
423 Y(dx,dy,0,e) += u[0];
424 Y(dx,dy,1,e) += u[1];
431 template<
int MD1,
int MQ1,
int NBZ>
432 MFEM_HOST_DEVICE
inline void GradX(
const int D1D,
const int Q1D,
433 const double (&sBG)[2][MQ1*MD1],
434 const double (&sX)[2][NBZ][MD1*MD1],
435 double (&sDQ)[4][NBZ][MD1*MQ1])
437 const int tidz = MFEM_THREAD_ID(z);
447 MFEM_FOREACH_THREAD(dy,y,D1D)
449 MFEM_FOREACH_THREAD(qx,x,Q1D)
451 double u[2] = {0.0, 0.0};
452 double v[2] = {0.0, 0.0};
453 for (
int dx = 0; dx < D1D; ++dx)
455 const double Bx = B(dx,qx);
456 const double Gx = G(dx,qx);
457 const double x0 = X0(dx,dy);
458 const double x1 = X1(dx,dy);
474 template<
int MD1,
int MQ1,
int NBZ>
475 MFEM_HOST_DEVICE
inline void GradY(
const int D1D,
const int Q1D,
476 const double (&sBG)[2][MQ1*MD1],
477 const double (&sDQ)[4][NBZ][MD1*MQ1],
478 double (&sQQ)[4][NBZ][MQ1*MQ1])
480 const int tidz = MFEM_THREAD_ID(z);
492 MFEM_FOREACH_THREAD(qy,y,Q1D)
494 MFEM_FOREACH_THREAD(qx,x,Q1D)
496 double u[2] = {0.0, 0.0};
497 double v[2] = {0.0, 0.0};
498 for (
int dy = 0; dy < D1D; ++dy)
500 const double By = B(dy,qy);
501 const double Gy = G(dy,qy);
502 u[0] += X0G(qx,dy) * By;
503 v[0] += X0B(qx,dy) * Gy;
504 u[1] += X1G(qx,dy) * By;
505 v[1] += X1B(qx,dy) * Gy;
517 template<
int MQ1,
int NBZ>
518 MFEM_HOST_DEVICE
inline void PullGrad(
const int Q1D,
519 const int qx,
const int qy,
520 const double (&sQQ)[4][NBZ][MQ1*MQ1],
523 const int tidz = MFEM_THREAD_ID(z);
529 Jpr[0] = X0GB(qx,qy);
530 Jpr[1] = X1GB(qx,qy);
531 Jpr[2] = X0BG(qx,qy);
532 Jpr[3] = X1BG(qx,qy);
536 template<
int MQ1,
int NBZ>
537 MFEM_HOST_DEVICE
inline void PushGrad(
const int Q1D,
538 const int qx,
const int qy,
540 double (&sQQ)[4][NBZ][MQ1*MQ1])
542 const int tidz = MFEM_THREAD_ID(z);
555 template<
int MD1,
int MQ1,
int NBZ>
556 MFEM_HOST_DEVICE
inline void GradYt(
const int D1D,
const int Q1D,
557 const double (&sBG)[2][MQ1*MD1],
558 const double (&GQ)[4][NBZ][MQ1*MQ1],
559 double (&GD)[4][NBZ][MD1*MQ1])
561 const int tidz = MFEM_THREAD_ID(z);
573 MFEM_FOREACH_THREAD(qy,y,Q1D)
575 MFEM_FOREACH_THREAD(dx,x,D1D)
577 double u[2] = {0.0, 0.0};
578 double v[2] = {0.0, 0.0};
579 for (
int qx = 0; qx < Q1D; ++qx)
581 u[0] += Gt(qx,dx) * QQx0(qx,qy);
582 u[1] += Gt(qx,dx) * QQy0(qx,qy);
583 v[0] += Bt(qx,dx) * QQx1(qx,qy);
584 v[1] += Bt(qx,dx) * QQy1(qx,qy);
596 template<
int MD1,
int MQ1,
int NBZ>
597 MFEM_HOST_DEVICE
inline void GradXt(
const int D1D,
const int Q1D,
598 const double (&sBG)[2][MQ1*MD1],
599 const double (&GD)[4][NBZ][MD1*MQ1],
600 const DeviceTensor<4> &Y,
603 const int tidz = MFEM_THREAD_ID(z);
611 MFEM_FOREACH_THREAD(dy,y,D1D)
613 MFEM_FOREACH_THREAD(dx,x,D1D)
615 double u[2] = {0.0, 0.0};
616 double v[2] = {0.0, 0.0};
617 for (
int qy = 0; qy < Q1D; ++qy)
619 u[0] += DQxB(qy,dx) * Bt(qy,dy);
620 u[1] += DQyB(qy,dx) * Bt(qy,dy);
621 v[0] += DQxG(qy,dx) * Gt(qy,dy);
622 v[1] += DQyG(qy,dx) * Gt(qy,dy);
624 Y(dx,dy,0,e) += u[0] + v[0];
625 Y(dx,dy,1,e) += u[1] + v[1];
632 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
633 const DeviceTensor<4, const double> &x,
636 MFEM_FOREACH_THREAD(dz,z,D1D)
638 MFEM_FOREACH_THREAD(dy,y,D1D)
640 MFEM_FOREACH_THREAD(dx,x,D1D)
642 X(dx,dy,dz) = x(dx,dy,dz,e);
650 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
651 const DeviceTensor<4, const double> &x,
652 double (&sm)[MD1*MD1*MD1])
659 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
660 const DeviceTensor<5, const double> &x,
663 MFEM_FOREACH_THREAD(dz,z,D1D)
665 MFEM_FOREACH_THREAD(dy,y,D1D)
667 MFEM_FOREACH_THREAD(dx,x,D1D)
669 X(dx,dy,dz) = x(dx,dy,dz,c,e);
678 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
const int c,
679 const DeviceTensor<5, const double> &x,
680 double (&sm)[MD1*MD1*MD1])
683 return LoadX<MD1>(e,D1D,c,x,X);
687 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
692 MFEM_FOREACH_THREAD(dz,z,D1D)
694 MFEM_FOREACH_THREAD(dy,y,D1D)
696 MFEM_FOREACH_THREAD(qx,x,Q1D)
699 for (
int dx = 0; dx < D1D; ++dx)
701 const double Bx = B(dx,qx);
702 u += Bx * DDD(dx,dy,dz);
711 template<
int MD1,
int MQ1>
712 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
713 const double (&sB)[MQ1*MD1],
714 const double (&sDDD)[MD1*MD1*MD1],
715 double (&sDDQ)[MD1*MD1*MQ1])
720 EvalX(D1D,Q1D,B,DDD,DDQ);
724 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
729 MFEM_FOREACH_THREAD(dz,z,D1D)
731 MFEM_FOREACH_THREAD(qy,y,Q1D)
733 MFEM_FOREACH_THREAD(qx,x,Q1D)
736 for (
int dy = 0; dy < D1D; ++dy)
738 const double By = B(dy,qy);
739 u += DDQ(dz,dy,qx) * By;
748 template<
int MD1,
int MQ1>
749 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
750 const double (&sB)[MQ1*MD1],
751 const double (&sDDQ)[MD1*MD1*MQ1],
752 double (&sDQQ)[MD1*MQ1*MQ1])
757 EvalY(D1D,Q1D,B,DDQ,DQQ);
761 MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
766 MFEM_FOREACH_THREAD(qz,z,Q1D)
768 MFEM_FOREACH_THREAD(qy,y,Q1D)
770 MFEM_FOREACH_THREAD(qx,x,Q1D)
773 for (
int dz = 0; dz < D1D; ++dz)
775 const double Bz = B(dz,qz);
776 u += DQQ(dz,qy,qx) * Bz;
785 template<
int MD1,
int MQ1>
786 MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
787 const double (&sB)[MQ1*MD1],
788 const double (&sDQQ)[MD1*MQ1*MQ1],
789 double (&sQQQ)[MQ1*MQ1*MQ1])
794 EvalZ(D1D,Q1D,B,DQQ,QQQ);
798 MFEM_HOST_DEVICE
inline void PullEval(
const int x,
const int y,
const int z,
806 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
807 const int x,
const int y,
const int z,
808 const double (&sQQQ)[MQ1*MQ1*MQ1],
812 PullEval(x,y,z,QQQ,X);
817 MFEM_HOST_DEVICE
inline void LoadX(
const int e,
const int D1D,
818 const DeviceTensor<5, const double> &X,
819 double (*sm)[MD1*MD1*MD1])
825 MFEM_FOREACH_THREAD(dz,z,D1D)
827 MFEM_FOREACH_THREAD(dy,y,D1D)
829 MFEM_FOREACH_THREAD(dx,x,D1D)
831 Xx(dx,dy,dz) = X(dx,dy,dz,0,e);
832 Xy(dx,dy,dz) = X(dx,dy,dz,1,e);
833 Xz(dx,dy,dz) = X(dx,dy,dz,2,e);
841 template<
int MD1,
int MQ1>
842 MFEM_HOST_DEVICE
inline void EvalX(
const int D1D,
const int Q1D,
843 const double (&sB)[MQ1*MD1],
844 const double (&sDDD)[3][MD1*MD1*MD1],
845 double (&sDDQ)[3][MD1*MD1*MQ1])
855 MFEM_FOREACH_THREAD(dz,z,D1D)
857 MFEM_FOREACH_THREAD(dy,y,D1D)
859 MFEM_FOREACH_THREAD(qx,x,Q1D)
861 double u[3] = {0.0, 0.0, 0.0};
862 for (
int dx = 0; dx < D1D; ++dx)
864 const double Bx = B(dx,qx);
865 u[0] += Bx * Xx(dx,dy,dz);
866 u[1] += Bx * Xy(dx,dy,dz);
867 u[2] += Bx * Xz(dx,dy,dz);
869 XxB(qx,dy,dz) = u[0];
870 XyB(qx,dy,dz) = u[1];
871 XzB(qx,dy,dz) = u[2];
879 template<
int MD1,
int MQ1>
880 MFEM_HOST_DEVICE
inline void EvalY(
const int D1D,
const int Q1D,
881 const double (&sB)[MQ1*MD1],
882 const double (&sDDQ)[3][MD1*MD1*MQ1],
883 double (&sDQQ)[3][MD1*MQ1*MQ1])
893 MFEM_FOREACH_THREAD(dz,z,D1D)
895 MFEM_FOREACH_THREAD(qy,y,Q1D)
897 MFEM_FOREACH_THREAD(qx,x,Q1D)
899 double u[3] = {0.0, 0.0, 0.0};
900 for (
int dy = 0; dy < D1D; ++dy)
902 const double By = B(dy,qy);
903 u[0] += XxB(qx,dy,dz) * By;
904 u[1] += XyB(qx,dy,dz) * By;
905 u[2] += XzB(qx,dy,dz) * By;
907 XxBB(qx,qy,dz) = u[0];
908 XyBB(qx,qy,dz) = u[1];
909 XzBB(qx,qy,dz) = u[2];
917 template<
int MD1,
int MQ1>
918 MFEM_HOST_DEVICE
inline void EvalZ(
const int D1D,
const int Q1D,
919 const double (&sB)[MQ1*MD1],
920 const double (&sDQQ)[3][MD1*MQ1*MQ1],
921 double (&sQQQ)[3][MQ1*MQ1*MQ1])
931 MFEM_FOREACH_THREAD(qz,z,Q1D)
933 MFEM_FOREACH_THREAD(qy,y,Q1D)
935 MFEM_FOREACH_THREAD(qx,x,Q1D)
937 double u[3] = {0.0, 0.0, 0.0};
938 for (
int dz = 0; dz < D1D; ++dz)
940 const double Bz = B(dz,qz);
941 u[0] += XxBB(qx,qy,dz) * Bz;
942 u[1] += XyBB(qx,qy,dz) * Bz;
943 u[2] += XzBB(qx,qy,dz) * Bz;
945 XxBBB(qx,qy,qz) = u[0];
946 XyBBB(qx,qy,qz) = u[1];
947 XzBBB(qx,qy,qz) = u[2];
956 MFEM_HOST_DEVICE
inline void PullEval(
const int Q1D,
957 const int x,
const int y,
const int z,
958 const double (&sQQQ)[3][MQ1*MQ1*MQ1],
972 MFEM_HOST_DEVICE
inline void PushEval(
const int Q1D,
973 const int x,
const int y,
const int z,
974 const double (&A)[3],
975 double (&sQQQ)[3][MQ1*MQ1*MQ1])
987 template<
int MD1,
int MQ1>
988 MFEM_HOST_DEVICE
inline void EvalXt(
const int D1D,
const int Q1D,
989 const double (&sB)[MQ1*MD1],
990 const double (&sQQQ)[3][MQ1*MQ1*MQ1],
991 double (&sDQQ)[3][MD1*MQ1*MQ1])
1001 MFEM_FOREACH_THREAD(qz,z,Q1D)
1003 MFEM_FOREACH_THREAD(qy,y,Q1D)
1005 MFEM_FOREACH_THREAD(dx,x,D1D)
1007 double u[3] = {0.0, 0.0, 0.0};
1008 for (
int qx = 0; qx < Q1D; ++qx)
1010 const double Btx = Bt(qx,dx);
1011 u[0] += XxBBB(qx,qy,qz) * Btx;
1012 u[1] += XyBBB(qx,qy,qz) * Btx;
1013 u[2] += XzBBB(qx,qy,qz) * Btx;
1015 XxBB(qz,qy,dx) = u[0];
1016 XyBB(qz,qy,dx) = u[1];
1017 XzBB(qz,qy,dx) = u[2];
1025 template<
int MD1,
int MQ1>
1026 MFEM_HOST_DEVICE
inline void EvalYt(
const int D1D,
const int Q1D,
1027 const double (&sB)[MQ1*MD1],
1028 const double (&sDQQ)[3][MD1*MQ1*MQ1],
1029 double (&sDDQ)[3][MD1*MD1*MQ1])
1039 MFEM_FOREACH_THREAD(qz,z,Q1D)
1041 MFEM_FOREACH_THREAD(dy,y,D1D)
1043 MFEM_FOREACH_THREAD(dx,x,D1D)
1045 double u[3] = {0.0, 0.0, 0.0};
1046 for (
int qy = 0; qy < Q1D; ++qy)
1048 const double Bty = Bt(qy,dy);
1049 u[0] += XxBB(qz,qy,dx) * Bty;
1050 u[1] += XyBB(qz,qy,dx) * Bty;
1051 u[2] += XzBB(qz,qy,dx) * Bty;
1054 XxB(qz,dy,dx) = u[0];
1055 XyB(qz,dy,dx) = u[1];
1056 XzB(qz,dy,dx)= u[2];
1064 template<
int MD1,
int MQ1>
1065 MFEM_HOST_DEVICE
inline void EvalZt(
const int D1D,
const int Q1D,
1066 const double (&sB)[MQ1*MD1],
1067 const double (&sDDQ)[3][MD1*MD1*MQ1],
1068 const DeviceTensor<5> &Y,
1076 MFEM_FOREACH_THREAD(dz,z,D1D)
1078 MFEM_FOREACH_THREAD(dy,y,D1D)
1080 MFEM_FOREACH_THREAD(dx,x,D1D)
1082 double u[3] = {0.0, 0.0, 0.0};
1083 for (
int qz = 0; qz < Q1D; ++qz)
1085 const double Btz = Bt(qz,dz);
1086 u[0] += XxB(qz,dy,dx) * Btz;
1087 u[1] += XyB(qz,dy,dx) * Btz;
1088 u[2] += XzB(qz,dy,dx) * Btz;
1090 Y(dx,dy,dz,0,e) += u[0];
1091 Y(dx,dy,dz,1,e) += u[1];
1092 Y(dx,dy,dz,2,e) += u[2];
1099 template<
int MD1,
int MQ1>
1100 MFEM_HOST_DEVICE
inline void GradX(
const int D1D,
const int Q1D,
1101 const double (*sBG)[MQ1*MD1],
1102 const double (*sDDD)[MD1*MD1*MD1],
1103 double (*sDDQ)[MD1*MD1*MQ1])
1117 MFEM_FOREACH_THREAD(dz,z,D1D)
1119 MFEM_FOREACH_THREAD(dy,y,D1D)
1121 MFEM_FOREACH_THREAD(qx,x,Q1D)
1123 double u[3] = {0.0, 0.0, 0.0};
1124 double v[3] = {0.0, 0.0, 0.0};
1125 for (
int dx = 0; dx < D1D; ++dx)
1127 const double xx = Xx(dx,dy,dz);
1128 const double xy = Xy(dx,dy,dz);
1129 const double xz = Xz(dx,dy,dz);
1130 const double Bx = B(dx,qx);
1131 const double Gx = G(dx,qx);
1140 XxB(qx,dy,dz) = u[0];
1141 XyB(qx,dy,dz) = u[1];
1142 XzB(qx,dy,dz) = u[2];
1144 XxG(qx,dy,dz) = v[0];
1145 XyG(qx,dy,dz) = v[1];
1146 XzG(qx,dy,dz) = v[2];
1154 template<
int MD1,
int MQ1>
1155 MFEM_HOST_DEVICE
inline void GradY(
const int D1D,
const int Q1D,
1156 const double (*sBG)[MQ1*MD1],
1157 const double (*sDDQ)[MD1*MD1*MQ1],
1158 double (*sDQQ)[MD1*MQ1*MQ1])
1178 MFEM_FOREACH_THREAD(dz,z,D1D)
1180 MFEM_FOREACH_THREAD(qy,y,Q1D)
1182 MFEM_FOREACH_THREAD(qx,x,Q1D)
1184 double u[3] = {0.0, 0.0, 0.0};
1185 double v[3] = {0.0, 0.0, 0.0};
1186 double w[3] = {0.0, 0.0, 0.0};
1187 for (
int dy = 0; dy < D1D; ++dy)
1189 const double By = B(dy,qy);
1190 const double Gy = G(dy,qy);
1192 u[0] += XxB(qx,dy,dz) * By;
1193 u[1] += XyB(qx,dy,dz) * By;
1194 u[2] += XzB(qx,dy,dz) * By;
1196 v[0] += XxG(qx,dy,dz) * By;
1197 v[1] += XyG(qx,dy,dz) * By;
1198 v[2] += XzG(qx,dy,dz) * By;
1200 w[0] += XxB(qx,dy,dz) * Gy;
1201 w[1] += XyB(qx,dy,dz) * Gy;
1202 w[2] += XzB(qx,dy,dz) * Gy;
1204 XxBB(qx,qy,dz) = u[0];
1205 XyBB(qx,qy,dz) = u[1];
1206 XzBB(qx,qy,dz) = u[2];
1208 XxBG(qx,qy,dz) = v[0];
1209 XyBG(qx,qy,dz) = v[1];
1210 XzBG(qx,qy,dz) = v[2];
1212 XxGB(qx,qy,dz) = w[0];
1213 XyGB(qx,qy,dz) = w[1];
1214 XzGB(qx,qy,dz) = w[2];
1222 template<
int MD1,
int MQ1>
1223 MFEM_HOST_DEVICE
inline void GradZ(
const int D1D,
const int Q1D,
1224 const double (*sBG)[MQ1*MD1],
1225 const double (*sDQQ)[MD1*MQ1*MQ1],
1226 double (*sQQQ)[MQ1*MQ1*MQ1])
1249 MFEM_FOREACH_THREAD(qz,z,Q1D)
1251 MFEM_FOREACH_THREAD(qy,y,Q1D)
1253 MFEM_FOREACH_THREAD(qx,x,Q1D)
1255 double u[3] = {0.0, 0.0, 0.0};
1256 double v[3] = {0.0, 0.0, 0.0};
1257 double w[3] = {0.0, 0.0, 0.0};
1258 for (
int dz = 0; dz < D1D; ++dz)
1260 const double Bz = B(dz,qz);
1261 const double Gz = G(dz,qz);
1263 u[0] += XxBG(qx,qy,dz) * Bz;
1264 u[1] += XyBG(qx,qy,dz) * Bz;
1265 u[2] += XzBG(qx,qy,dz) * Bz;
1267 v[0] += XxGB(qx,qy,dz) * Bz;
1268 v[1] += XyGB(qx,qy,dz) * Bz;
1269 v[2] += XzGB(qx,qy,dz) * Bz;
1271 w[0] += XxBB(qx,qy,dz) * Gz;
1272 w[1] += XyBB(qx,qy,dz) * Gz;
1273 w[2] += XzBB(qx,qy,dz) * Gz;
1275 XxBBG(qx,qy,qz) = u[0];
1276 XyBBG(qx,qy,qz) = u[1];
1277 XzBBG(qx,qy,qz) = u[2];
1279 XxBGB(qx,qy,qz) = v[0];
1280 XyBGB(qx,qy,qz) = v[1];
1281 XzBGB(qx,qy,qz) = v[2];
1283 XxGBB(qx,qy,qz)= w[0];
1284 XyGBB(qx,qy,qz) = w[1];
1285 XzGBB(qx,qy,qz) = w[2];
1294 MFEM_HOST_DEVICE
inline void PullGrad(
const int Q1D,
1295 const int x,
const int y,
const int z,
1296 const double (*sQQQ)[MQ1*MQ1*MQ1],
1309 Jpr[0] = XxBBG(x,y,z);
1310 Jpr[3] = XxBGB(x,y,z);
1311 Jpr[6] = XxGBB(x,y,z);
1312 Jpr[1] = XyBBG(x,y,z);
1313 Jpr[4] = XyBGB(x,y,z);
1314 Jpr[7] = XyGBB(x,y,z);
1315 Jpr[2] = XzBBG(x,y,z);
1316 Jpr[5] = XzBGB(x,y,z);
1317 Jpr[8] = XzGBB(x,y,z);
1322 MFEM_HOST_DEVICE
inline void PushGrad(
const int Q1D,
1323 const int x,
const int y,
const int z,
1325 double (&sQQQ)[9][MQ1*MQ1*MQ1])
1337 XxBBG(x,y,z) = A[0];
1338 XxBGB(x,y,z) = A[1];
1339 XxGBB(x,y,z) = A[2];
1340 XyBBG(x,y,z) = A[3];
1341 XyBGB(x,y,z) = A[4];
1342 XyGBB(x,y,z) = A[5];
1343 XzBBG(x,y,z) = A[6];
1344 XzBGB(x,y,z) = A[7];
1345 XzGBB(x,y,z) = A[8];
1349 template<
int MD1,
int MQ1>
1350 MFEM_HOST_DEVICE
inline void GradZt(
const int D1D,
const int Q1D,
1351 const double (&sBG)[2][MQ1*MD1],
1352 const double (&sQQQ)[9][MQ1*MQ1*MQ1],
1353 double (&sDQQ)[9][MD1*MQ1*MQ1])
1377 MFEM_FOREACH_THREAD(qz,z,Q1D)
1379 MFEM_FOREACH_THREAD(qy,y,Q1D)
1381 MFEM_FOREACH_THREAD(dx,x,D1D)
1383 double u[3] = {0.0, 0.0, 0.0};
1384 double v[3] = {0.0, 0.0, 0.0};
1385 double w[3] = {0.0, 0.0, 0.0};
1386 for (
int qx = 0; qx < Q1D; ++qx)
1388 const double Btx = Bt(qx,dx);
1389 const double Gtx = Gt(qx,dx);
1391 u[0] += XxBBG(qx,qy,qz) * Gtx;
1392 v[0] += XxBGB(qx,qy,qz) * Btx;
1393 w[0] += XxGBB(qx,qy,qz) * Btx;
1395 u[1] += XyBBG(qx,qy,qz) * Gtx;
1396 v[1] += XyBGB(qx,qy,qz) * Btx;
1397 w[1] += XyGBB(qx,qy,qz) * Btx;
1399 u[2] += XzBBG(qx,qy,qz) * Gtx;
1400 v[2] += XzBGB(qx,qy,qz) * Btx;
1401 w[2] += XzGBB(qx,qy,qz) * Btx;
1403 XxBB(qz,qy,dx) = u[0];
1404 XxBG(qz,qy,dx) = v[0];
1405 XxGB(qz,qy,dx) = w[0];
1407 XyBB(qz,qy,dx) = u[1];
1408 XyBG(qz,qy,dx) = v[1];
1409 XyGB(qz,qy,dx) = w[1];
1411 XzBB(qz,qy,dx) = u[2];
1412 XzBG(qz,qy,dx) = v[2];
1413 XzGB(qz,qy,dx) = w[2];
1421 template<
int MD1,
int MQ1>
1422 MFEM_HOST_DEVICE
inline void GradYt(
const int D1D,
const int Q1D,
1423 const double (&sBG)[2][MQ1*MD1],
1424 const double (&sDQQ)[9][MD1*MQ1*MQ1],
1425 double (&sDDQ)[9][MD1*MD1*MQ1])
1448 MFEM_FOREACH_THREAD(qz,z,Q1D)
1450 MFEM_FOREACH_THREAD(dy,y,D1D)
1452 MFEM_FOREACH_THREAD(dx,x,D1D)
1454 double u[3] = {0.0, 0.0, 0.0};
1455 double v[3] = {0.0, 0.0, 0.0};
1456 double w[3] = {0.0, 0.0, 0.0};
1457 for (
int qy = 0; qy < Q1D; ++qy)
1459 const double Bty = Bt(qy,dy);
1460 const double Gty = Gt(qy,dy);
1462 u[0] += XxBB(qz,qy,dx) * Bty;
1463 v[0] += XxBG(qz,qy,dx) * Gty;
1464 w[0] += XxGB(qz,qy,dx) * Bty;
1466 u[1] += XyBB(qz,qy,dx) * Bty;
1467 v[1] += XyBG(qz,qy,dx) * Gty;
1468 w[1] += XyGB(qz,qy,dx) * Bty;
1470 u[2] += XzBB(qz,qy,dx) * Bty;
1471 v[2] += XzBG(qz,qy,dx) * Gty;
1472 w[2] += XzGB(qz,qy,dx) * Bty;
1475 XxB(qz,dy,dx) = u[0];
1476 XxC(qz,dy,dx) = v[0];
1477 XxG(qz,dy,dx) = w[0];
1479 XyB(qz,dy,dx) = u[1];
1480 XyC(qz,dy,dx) = v[1];
1481 XyG(qz,dy,dx) = w[1];
1483 XzB(qz,dy,dx) = u[2];
1484 XzC(qz,dy,dx) = v[2];
1485 XzG(qz,dy,dx) = w[2];
1493 template<
int MD1,
int MQ1>
1494 MFEM_HOST_DEVICE
inline void GradXt(
const int D1D,
const int Q1D,
1495 const double (&sBG)[2][MQ1*MD1],
1496 const double (&sDDQ)[9][MD1*MD1*MQ1],
1497 const DeviceTensor<5> &Y,
1512 MFEM_FOREACH_THREAD(dz,z,D1D)
1514 MFEM_FOREACH_THREAD(dy,y,D1D)
1516 MFEM_FOREACH_THREAD(dx,x,D1D)
1518 double u[3] = {0.0, 0.0, 0.0};
1519 double v[3] = {0.0, 0.0, 0.0};
1520 double w[3] = {0.0, 0.0, 0.0};
1521 for (
int qz = 0; qz < Q1D; ++qz)
1523 const double Btz = Bt(qz,dz);
1524 const double Gtz = Gt(qz,dz);
1526 u[0] += XxB(qz,dy,dx) * Btz;
1527 v[0] += XxC(qz,dy,dx) * Btz;
1528 w[0] += XxG(qz,dy,dx) * Gtz;
1530 u[1] += XyB(qz,dy,dx) * Btz;
1531 v[1] += XyC(qz,dy,dx)* Btz;
1532 w[1] += XyG(qz,dy,dx) * Gtz;
1534 u[2] += XzB(qz,dy,dx) * Btz;
1535 v[2] += XzC(qz,dy,dx) * Btz;
1536 w[2] += XzG(qz,dy,dx) * Gtz;
1538 Y(dx,dy,dz,0,e) += u[0] + v[0] + w[0];
1539 Y(dx,dy,dz,1,e) += u[1] + v[1] + w[1];
1540 Y(dx,dy,dz,2,e) += u[2] + v[2] + w[2];
1552 #endif // MFEM_FEM_KERNELS_HPP
DeviceTensor< 2, const double > ConstDeviceMatrix
DeviceTensor< 2, double > DeviceMatrix
DeviceTensor< 3, double > DeviceCube
double u(const Vector &xvec)
DeviceTensor< 3, const double > ConstDeviceCube