12#ifndef MFEM_BILININTEG_VECDIFFUSION_KERNELS_HPP
13#define MFEM_BILININTEG_VECDIFFUSION_KERNELS_HPP
22namespace mfem::internal
26template <
int T_D1D = 0,
int T_Q1D = 0,
int T_VDIM = 0>
28PAVectorDiffusionApply2D(
const int NE,
const Array<real_t> &
b,
29 const Array<real_t> &g,
const Array<real_t> &bt,
30 const Array<real_t> >,
const Vector &d_,
31 const Vector &x_, Vector &y_,
const int d1d = 0,
32 const int q1d = 0,
const int vdim = 0)
34 const int D1D = T_D1D ? T_D1D : d1d;
35 const int Q1D = T_Q1D ? T_Q1D : q1d;
36 const int VDIM = T_VDIM ? T_VDIM : vdim;
39 auto B =
Reshape(
b.Read(), Q1D, D1D);
40 auto G =
Reshape(g.Read(), Q1D, D1D);
41 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
42 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
43 auto D =
Reshape(d_.Read(), Q1D * Q1D, 3, NE);
44 auto x =
Reshape(x_.Read(), D1D, D1D, VDIM, NE);
45 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, VDIM, NE);
48 const int D1D = T_D1D ? T_D1D : d1d;
49 const int Q1D = T_Q1D ? T_Q1D : q1d;
50 const int VDIM = T_VDIM ? T_VDIM : vdim;
51 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
52 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
54 real_t grad[max_Q1D][max_Q1D][2];
55 for (
int c = 0; c < VDIM; c++)
57 for (
int qy = 0; qy < Q1D; ++qy)
59 for (
int qx = 0; qx < Q1D; ++qx)
61 grad[qy][qx][0] = 0.0;
62 grad[qy][qx][1] = 0.0;
65 for (
int dy = 0; dy < D1D; ++dy)
68 for (
int qx = 0; qx < Q1D; ++qx)
73 for (
int dx = 0; dx < D1D; ++dx)
75 const real_t s = x(dx, dy, c, e);
76 for (
int qx = 0; qx < Q1D; ++qx)
78 gradX[qx][0] += s * B(qx, dx);
79 gradX[qx][1] += s * G(qx, dx);
82 for (
int qy = 0; qy < Q1D; ++qy)
84 const real_t wy = B(qy, dy);
85 const real_t wDy = G(qy, dy);
86 for (
int qx = 0; qx < Q1D; ++qx)
88 grad[qy][qx][0] += gradX[qx][1] * wy;
89 grad[qy][qx][1] += gradX[qx][0] * wDy;
94 for (
int qy = 0; qy < Q1D; ++qy)
96 for (
int qx = 0; qx < Q1D; ++qx)
98 const int q = qx + qy * Q1D;
99 const real_t O11 = D(q, 0, e);
100 const real_t O12 = D(q, 1, e);
101 const real_t O22 = D(q, 2, e);
102 const real_t gradX = grad[qy][qx][0];
103 const real_t gradY = grad[qy][qx][1];
104 grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
105 grad[qy][qx][1] = (O12 * gradX) + (O22 * gradY);
108 for (
int qy = 0; qy < Q1D; ++qy)
111 for (
int dx = 0; dx < D1D; ++dx)
116 for (
int qx = 0; qx < Q1D; ++qx)
118 const real_t gX = grad[qy][qx][0];
119 const real_t gY = grad[qy][qx][1];
120 for (
int dx = 0; dx < D1D; ++dx)
122 const real_t wx = Bt(dx, qx);
123 const real_t wDx = Gt(dx, qx);
124 gradX[dx][0] += gX * wDx;
125 gradX[dx][1] += gY * wx;
128 for (
int dy = 0; dy < D1D; ++dy)
130 const real_t wy = Bt(dy, qy);
131 const real_t wDy = Gt(dy, qy);
132 for (
int dx = 0; dx < D1D; ++dx)
135 ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
144template <const
int T_D1D = 0, const
int T_Q1D = 0>
146PAVectorDiffusionApply3D(
const int NE,
const Array<real_t> &
b,
147 const Array<real_t> &g,
const Array<real_t> &bt,
148 const Array<real_t> >,
const Vector &op_,
149 const Vector &x_, Vector &y_,
const int d1d = 0,
150 const int q1d = 0,
const int sdim = 0)
152 const int D1D = T_D1D ? T_D1D : d1d;
153 const int Q1D = T_Q1D ? T_Q1D : q1d;
154 constexpr int VDIM = 3;
157 auto B =
Reshape(
b.Read(), Q1D, D1D);
158 auto G =
Reshape(g.Read(), Q1D, D1D);
159 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
160 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
161 auto op =
Reshape(op_.Read(), Q1D * Q1D * Q1D, 6, NE);
162 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, VDIM, NE);
163 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, VDIM, NE);
166 const int D1D = T_D1D ? T_D1D : d1d;
167 const int Q1D = T_Q1D ? T_Q1D : q1d;
168 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
169 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
170 for (
int c = 0; c < VDIM; ++c)
172 real_t grad[max_Q1D][max_Q1D][max_Q1D][3];
173 for (
int qz = 0; qz < Q1D; ++qz)
175 for (
int qy = 0; qy < Q1D; ++qy)
177 for (
int qx = 0; qx < Q1D; ++qx)
179 grad[qz][qy][qx][0] = 0.0;
180 grad[qz][qy][qx][1] = 0.0;
181 grad[qz][qy][qx][2] = 0.0;
185 for (
int dz = 0; dz < D1D; ++dz)
187 real_t gradXY[max_Q1D][max_Q1D][3];
188 for (
int qy = 0; qy < Q1D; ++qy)
190 for (
int qx = 0; qx < Q1D; ++qx)
192 gradXY[qy][qx][0] = 0.0;
193 gradXY[qy][qx][1] = 0.0;
194 gradXY[qy][qx][2] = 0.0;
197 for (
int dy = 0; dy < D1D; ++dy)
200 for (
int qx = 0; qx < Q1D; ++qx)
205 for (
int dx = 0; dx < D1D; ++dx)
207 const real_t s = x(dx, dy, dz, c, e);
208 for (
int qx = 0; qx < Q1D; ++qx)
210 gradX[qx][0] += s * B(qx, dx);
211 gradX[qx][1] += s * G(qx, dx);
214 for (
int qy = 0; qy < Q1D; ++qy)
216 const real_t wy = B(qy, dy);
217 const real_t wDy = G(qy, dy);
218 for (
int qx = 0; qx < Q1D; ++qx)
220 const real_t wx = gradX[qx][0];
221 const real_t wDx = gradX[qx][1];
222 gradXY[qy][qx][0] += wDx * wy;
223 gradXY[qy][qx][1] += wx * wDy;
224 gradXY[qy][qx][2] += wx * wy;
228 for (
int qz = 0; qz < Q1D; ++qz)
230 const real_t wz = B(qz, dz);
231 const real_t wDz = G(qz, dz);
232 for (
int qy = 0; qy < Q1D; ++qy)
234 for (
int qx = 0; qx < Q1D; ++qx)
236 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
237 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
238 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
244 for (
int qz = 0; qz < Q1D; ++qz)
246 for (
int qy = 0; qy < Q1D; ++qy)
248 for (
int qx = 0; qx < Q1D; ++qx)
250 const int q = qx + (qy + qz * Q1D) * Q1D;
251 const real_t O11 = op(q, 0, e);
252 const real_t O12 = op(q, 1, e);
253 const real_t O13 = op(q, 2, e);
254 const real_t O22 = op(q, 3, e);
255 const real_t O23 = op(q, 4, e);
256 const real_t O33 = op(q, 5, e);
257 const real_t gradX = grad[qz][qy][qx][0];
258 const real_t gradY = grad[qz][qy][qx][1];
259 const real_t gradZ = grad[qz][qy][qx][2];
260 grad[qz][qy][qx][0] =
261 (O11 * gradX) + (O12 * gradY) + (O13 * gradZ);
262 grad[qz][qy][qx][1] =
263 (O12 * gradX) + (O22 * gradY) + (O23 * gradZ);
264 grad[qz][qy][qx][2] =
265 (O13 * gradX) + (O23 * gradY) + (O33 * gradZ);
269 for (
int qz = 0; qz < Q1D; ++qz)
271 real_t gradXY[max_D1D][max_D1D][3];
272 for (
int dy = 0; dy < D1D; ++dy)
274 for (
int dx = 0; dx < D1D; ++dx)
276 gradXY[dy][dx][0] = 0;
277 gradXY[dy][dx][1] = 0;
278 gradXY[dy][dx][2] = 0;
281 for (
int qy = 0; qy < Q1D; ++qy)
284 for (
int dx = 0; dx < D1D; ++dx)
290 for (
int qx = 0; qx < Q1D; ++qx)
292 const real_t gX = grad[qz][qy][qx][0];
293 const real_t gY = grad[qz][qy][qx][1];
294 const real_t gZ = grad[qz][qy][qx][2];
295 for (
int dx = 0; dx < D1D; ++dx)
297 const real_t wx = Bt(dx, qx);
298 const real_t wDx = Gt(dx, qx);
299 gradX[dx][0] += gX * wDx;
300 gradX[dx][1] += gY * wx;
301 gradX[dx][2] += gZ * wx;
304 for (
int dy = 0; dy < D1D; ++dy)
306 const real_t wy = Bt(dy, qy);
307 const real_t wDy = Gt(dy, qy);
308 for (
int dx = 0; dx < D1D; ++dx)
310 gradXY[dy][dx][0] += gradX[dx][0] * wy;
311 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
312 gradXY[dy][dx][2] += gradX[dx][2] * wy;
316 for (
int dz = 0; dz < D1D; ++dz)
318 const real_t wz = Bt(dz, qz);
319 const real_t wDz = Gt(dz, qz);
320 for (
int dy = 0; dy < D1D; ++dy)
322 for (
int dx = 0; dx < D1D; ++dx)
324 y(dx, dy, dz, c, e) +=
325 ((gradXY[dy][dx][0] * wz) + (gradXY[dy][dx][1] * wz) +
326 (gradXY[dy][dx][2] * wDz));
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.