31 int r_MAX_D1D, r_MAX_Q1D;
34 r_MAX_D1D = 6; r_MAX_Q1D = 7;
38 r_MAX_D1D = 7; r_MAX_Q1D = 7;
46 constexpr int DIM = 3;
47 const int D1D = T_D1D ? T_D1D : d1d;
48 const int Q1D = T_Q1D ? T_Q1D : q1d;
49 MFEM_VERIFY(D1D <= r_MAX_D1D,
50 "D1D: " << D1D <<
", r_MAX_D1D: " << r_MAX_D1D);
51 MFEM_VERIFY(Q1D <= r_MAX_Q1D,
52 "Q1D: " << Q1D <<
", r_MAX_Q1D: " << r_MAX_Q1D);
54 const auto B =
Reshape(
b.Read(), Q1D, D1D);
64#if defined(__CUDA_ARCH__)
65 constexpr int MAX_D1D = 6;
66 constexpr int MAX_Q1D = 7;
67#elif defined(__HIP_DEVICE_COMPILE__)
68 constexpr int MAX_D1D = 7;
69 constexpr int MAX_Q1D = 7;
71 constexpr int MAX_D1D = DofQuadLimits::MAX_D1D;
72 constexpr int MAX_Q1D = DofQuadLimits::MAX_Q1D;
75 constexpr int DIM = 3;
76 const int D1D = T_D1D ? T_D1D : d1d;
77 const int Q1D = T_Q1D ? T_Q1D : q1d;
78 constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D;
79 constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D;
81 MFEM_SHARED
real_t bg[2*MQ1*MD1];
92 MFEM_FOREACH_THREAD(q,x,Q1D)
94 MFEM_FOREACH_THREAD(d,y,D1D)
96 MFEM_FOREACH_THREAD(dummy,z,1)
104 for (
int v = 0; v <
DIM; ++v)
107 MFEM_FOREACH_THREAD(qx,x,Q1D)
109 MFEM_FOREACH_THREAD(qy,y,Q1D)
111 MFEM_FOREACH_THREAD(qz,z,Q1D)
113 const real_t *Jtr = &J(0,0,qx,qy,qz,e);
120 for (
int s = 0;
s <
DIM;
s++)
122 for (
int t = 0;
t <
DIM;
t++)
124 H_loc(
s,
t) = H(v,
s,v,
t,qx,qy,qz,e);
128 for (
int m = 0; m <
DIM; m++)
130 for (
int n = 0; n <
DIM; n++)
134 Href(m,n,qx,qy,qz) = 0.0;
135 for (
int s = 0;
s <
DIM;
s++)
137 for (
int t = 0;
t <
DIM;
t++)
139 Href(m,n,qx,qy,qz) +=
140 Jrt(m,
s) * H_loc(
s,
t) * Jrt(n,
t);
151 MFEM_FOREACH_THREAD(qx,x,Q1D)
153 MFEM_FOREACH_THREAD(qy,y,Q1D)
155 MFEM_FOREACH_THREAD(dz,z,D1D)
157 for (
int m = 0; m <
DIM; m++)
159 for (
int n = 0; n <
DIM; n++)
161 QQD(m,n,qx,qy,dz) = 0.0;
166 for (
int qz = 0; qz < Q1D; ++qz)
168 const real_t Bz = B_sm(qz,dz);
169 const real_t Gz = G_sm(qz,dz);
170 for (
int m = 0; m <
DIM; m++)
172 for (
int n = 0; n <
DIM; n++)
174 const real_t L = (m == 2 ? Gz : Bz);
175 const real_t R = (n == 2 ? Gz : Bz);
176 QQD(m,n,qx,qy,dz) += L * Href(m,n,qx,qy,qz) * R;
186 MFEM_FOREACH_THREAD(qx,x,Q1D)
188 MFEM_FOREACH_THREAD(dz,z,D1D)
190 MFEM_FOREACH_THREAD(dy,y,D1D)
192 for (
int m = 0; m <
DIM; m++)
194 for (
int n = 0; n <
DIM; n++)
196 QDD(m,n,qx,dy,dz) = 0.0;
201 for (
int qy = 0; qy < Q1D; ++qy)
203 const real_t By = B_sm(qy,dy);
204 const real_t Gy = G_sm(qy,dy);
205 for (
int m = 0; m <
DIM; m++)
207 for (
int n = 0; n <
DIM; n++)
209 const real_t L = (m == 1 ? Gy : By);
210 const real_t R = (n == 1 ? Gy : By);
211 QDD(m,n,qx,dy,dz) += L * QQD(m,n,qx,qy,dz) * R;
221 MFEM_FOREACH_THREAD(dz,z,D1D)
223 MFEM_FOREACH_THREAD(dy,y,D1D)
225 MFEM_FOREACH_THREAD(dx,x,D1D)
229 for (
int qx = 0; qx < Q1D; ++qx)
231 const real_t Bx = B_sm(qx,dx);
232 const real_t Gx = G_sm(qx,dx);
233 for (
int m = 0; m <
DIM; m++)
235 for (
int n = 0; n <
DIM; n++)
237 const real_t L = (m == 0 ? Gx : Bx);
238 const real_t R = (n == 0 ? Gx : Bx);
239 d += L * QDD(m,n,qx,dy,dz) * R;
243 D(dx,dy,dz,v,e) += d;
MFEM_REGISTER_TMOP_KERNELS(void, DatcSize, const int NE, const int ncomp, const int sizeidx, const real_t input_min_size, const DenseMatrix &w_, const Array< real_t > &b_, const Vector &x_, const Vector &nc_reduce, DenseTensor &j_, const int d1d, const int q1d)