19template<
int T_D1D = 0,
int T_Q1D = 0>
static
20void DLFGradAssemble2D(
const int vdim,
const int ne,
const int d,
const int q,
25 const auto F = coeff.Read();
26 const auto M =
Reshape(markers, ne);
28 const auto G =
Reshape(g, q, d);
29 const auto J =
Reshape(jacobians, q, q, 2,2, ne);
30 const auto W =
Reshape(weights, q, q);
31 const bool cst = coeff.Size() == vdim*2;
32 const auto C = cst ?
Reshape(F,2,vdim,1,1,1) :
Reshape(F,2,vdim,q,q,ne);
33 auto Y =
Reshape(y, d,d, vdim, ne);
37 if (M(e) == 0) {
return; }
39 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
40 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
42 MFEM_SHARED
real_t sBGt[2][Q*D];
43 MFEM_SHARED
real_t sQQ[2][Q*Q];
44 MFEM_SHARED
real_t sDQ[2][D*Q];
48 kernels::internal::LoadBGt<D,Q>(d, q, B, G, sBGt);
56 for (
int c = 0; c < vdim; ++c)
58 const real_t cst_val0 = C(0,c,0,0,0);
59 const real_t cst_val1 = C(1,c,0,0,0);
61 MFEM_FOREACH_THREAD(x,x,q)
63 MFEM_FOREACH_THREAD(y,y,q)
66 const real_t J11 = J(x,y,0,0,e);
67 const real_t J21 = J(x,y,1,0,e);
68 const real_t J12 = J(x,y,0,1,e);
69 const real_t J22 = J(x,y,1,1,e);
70 const real_t u = cst ? cst_val0 : C(0,c,x,y,e);
71 const real_t v = cst ? cst_val1 : C(1,c,x,y,e);
73 QQ0(y,x) = w * (J22*
u - J12*v);
74 QQ1(y,x) = w * (J11*v - J21*
u);
78 MFEM_FOREACH_THREAD(qx,x,q)
80 MFEM_FOREACH_THREAD(dy,y,d)
83 for (
int qy = 0; qy < q; ++qy)
85 u += QQ0(qy,qx) * Bt(qy,dy);
86 v += QQ1(qy,qx) * Gt(qy,dy);
93 MFEM_FOREACH_THREAD(dx,x,d)
95 MFEM_FOREACH_THREAD(dy,y,d)
98 for (
int qx = 0; qx < q; ++qx)
100 u += DQ0(dy,qx) * Gt(qx,dx);
101 v += DQ1(dy,qx) * Bt(qx,dx);
103 Y(dx,dy,c,e) +=
u + v;
111template<
int T_D1D = 0,
int T_Q1D = 0>
static
112void DLFGradAssemble3D(
const int vdim,
const int ne,
const int d,
const int q,
115 const real_t *weights,
const Vector &coeff,
118 const auto F = coeff.Read();
119 const auto M =
Reshape(markers, ne);
121 const auto G =
Reshape(g, q,d);
122 const auto J =
Reshape(jacobians, q,q,q, 3,3, ne);
123 const auto W =
Reshape(weights, q,q,q);
124 const bool cst = coeff.Size() == vdim*3;
125 const auto C = cst ?
Reshape(F,3,vdim,1,1,1,1) :
Reshape(F,3,vdim,q,q,q,ne);
127 auto Y =
Reshape(output, d,d,d, vdim, ne);
131 if (M(e) == 0) {
return; }
133 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
134 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
135 constexpr int MQD = (Q >= D) ? Q : D;
137 MFEM_SHARED
real_t sBGt[2][Q*D];
140 MFEM_SHARED
real_t sQQQ[MQD*MQD*MQD];
143 kernels::internal::LoadBGt<D,Q>(d,q,B,G,sBGt);
145 for (
int c = 0; c < vdim; ++c)
147 const real_t cst_val_0 = C(0,c,0,0,0,0);
148 const real_t cst_val_1 = C(1,c,0,0,0,0);
149 const real_t cst_val_2 = C(2,c,0,0,0,0);
151 for (
int k = 0; k < 3; ++k)
153 for (
int z = 0; z < q; ++z)
155 MFEM_FOREACH_THREAD(y,y,q)
157 MFEM_FOREACH_THREAD(x,x,q)
159 const real_t J11 = J(x,y,z,0,0,e);
160 const real_t J21 = J(x,y,z,1,0,e);
161 const real_t J31 = J(x,y,z,2,0,e);
162 const real_t J12 = J(x,y,z,0,1,e);
163 const real_t J22 = J(x,y,z,1,1,e);
164 const real_t J32 = J(x,y,z,2,1,e);
165 const real_t J13 = J(x,y,z,0,2,e);
166 const real_t J23 = J(x,y,z,1,2,e);
167 const real_t J33 = J(x,y,z,2,2,e);
169 const real_t u = cst ? cst_val_0 : C(0,c,x,y,z,e);
170 const real_t v = cst ? cst_val_1 : C(1,c,x,y,z,e);
171 const real_t w = cst ? cst_val_2 : C(2,c,x,y,z,e);
175 const real_t A11 = (J22 * J33) - (J23 * J32);
176 const real_t A12 = (J32 * J13) - (J12 * J33);
177 const real_t A13 = (J12 * J23) - (J22 * J13);
178 QQQ(z,y,x) = A11*
u + A12*v + A13*w;
184 const real_t A21 = (J31 * J23) - (J21 * J33);
185 const real_t A22 = (J11 * J33) - (J13 * J31);
186 const real_t A23 = (J21 * J13) - (J11 * J23);
187 QQQ(z,y,x) = A21*
u + A22*v + A23*w;
192 const real_t A31 = (J21 * J32) - (J31 * J22);
193 const real_t A32 = (J31 * J12) - (J11 * J32);
194 const real_t A33 = (J11 * J22) - (J12 * J21);
195 QQQ(z,y,x) = A31*
u + A32*v + A33*w;
198 QQQ(z,y,x) *= W(x,y,z);
203 MFEM_FOREACH_THREAD(qz,x,q)
205 MFEM_FOREACH_THREAD(qy,y,q)
208 for (
int qx = 0; qx < q; ++qx) { r_u[qx] = QQQ(qz,qy,qx); }
209 for (
int dx = 0; dx < d; ++dx)
212 for (
int qx = 0; qx < q; ++qx)
214 u += (k == 0 ? Gt(qx,dx) : Bt(qx,dx)) * r_u[qx];
221 MFEM_FOREACH_THREAD(qz,y,q)
223 MFEM_FOREACH_THREAD(dx,x,d)
226 for (
int qy = 0; qy < q; ++qy) { r_u[qy] = QQQ(qz,qy,dx); }
227 for (
int dy = 0; dy < d; ++dy)
230 for (
int qy = 0; qy < q; ++qy)
232 u += (k == 1 ? Gt(qy,dy) : Bt(qy,dy)) * r_u[qy];
239 MFEM_FOREACH_THREAD(dy,y,d)
241 MFEM_FOREACH_THREAD(dx,x,d)
244 for (
int qz = 0; qz < q; ++qz) { r_u[qz] = QQQ(qz,dy,dx); }
245 for (
int dz = 0; dz < d; ++dz)
248 for (
int qz = 0; qz < q; ++qz)
250 u += (k == 2 ? Gt(qz,dz) : Bt(qz,dz)) * r_u[qz];
252 Y(dx,dy,dz,c,e) +=
u;
262static void DLFGradAssemble(
const FiniteElementSpace &fes,
263 const IntegrationRule *ir,
264 const Array<int> &markers,
268 Mesh *mesh = fes.GetMesh();
269 const int dim = mesh->Dimension();
270 const FiniteElement &el = *fes.GetTypicalFE();
273 const int d = maps.ndof, q = maps.nqpt;
275 const GeometricFactors *geom = mesh->GetGeometricFactors(*ir, flags, mt);
276 decltype(&DLFGradAssemble2D<>) ker =
277 dim == 2 ? DLFGradAssemble2D<> : DLFGradAssemble3D<>;
281 if (d==1 && q==1) { ker=DLFGradAssemble2D<1,1>; }
282 if (d==2 && q==2) { ker=DLFGradAssemble2D<2,2>; }
283 if (d==3 && q==3) { ker=DLFGradAssemble2D<3,3>; }
284 if (d==4 && q==4) { ker=DLFGradAssemble2D<4,4>; }
285 if (d==5 && q==5) { ker=DLFGradAssemble2D<5,5>; }
286 if (d==2 && q==3) { ker=DLFGradAssemble2D<2,3>; }
287 if (d==3 && q==4) { ker=DLFGradAssemble2D<3,4>; }
288 if (d==4 && q==5) { ker=DLFGradAssemble2D<4,5>; }
289 if (d==5 && q==6) { ker=DLFGradAssemble2D<5,6>; }
294 if (d==1 && q==1) { ker=DLFGradAssemble3D<1,1>; }
295 if (d==2 && q==2) { ker=DLFGradAssemble3D<2,2>; }
296 if (d==3 && q==3) { ker=DLFGradAssemble3D<3,3>; }
297 if (d==4 && q==4) { ker=DLFGradAssemble3D<4,4>; }
298 if (d==5 && q==5) { ker=DLFGradAssemble3D<5,5>; }
299 if (d==2 && q==3) { ker=DLFGradAssemble3D<2,3>; }
300 if (d==3 && q==4) { ker=DLFGradAssemble3D<3,4>; }
301 if (d==4 && q==5) { ker=DLFGradAssemble3D<4,5>; }
302 if (d==5 && q==6) { ker=DLFGradAssemble3D<5,6>; }
305 MFEM_VERIFY(ker,
"No kernel ndof " << d <<
" nqpt " << q);
307 const int vdim = fes.GetVDim();
308 const int ne = fes.GetMesh()->GetNE();
309 const int *M = markers.Read();
310 const real_t *B = maps.B.Read();
311 const real_t *G = maps.G.Read();
312 const real_t *J = geom->J.Read();
313 const real_t *W = ir->GetWeights().Read();
314 real_t *Y = y.ReadWrite();
315 ker(vdim, ne, d, q, M, B, G, J, W, coeff, Y);
324 const int qorder = 2 * fe.
GetOrder();
330 DLFGradAssemble(fes, ir, markers, coeff,
b);
338 const int qorder = 2 * fe.
GetOrder();
344 DLFGradAssemble(fes, ir, markers, coeff,
b);
Class to represent a coefficient evaluated at quadrature points.
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
void AssembleDevice(const FiniteElementSpace &fes, const Array< int > &markers, Vector &b) override
Method defining assembly on device.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Mesh * GetMesh() const
Returns the mesh.
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Abstract class for all finite elements.
int GetOrder() const
Returns the order of the finite element. In the case of anisotropic orders, returns the maximum order...
Geometry::Type GetGeomType() const
Returns the Geometry::Type of the reference element.
Class for an integration rule - an Array of IntegrationPoint.
const IntegrationRule & Get(int GeomType, int Order)
Returns an integration rule for given GeomType and Order.
const IntegrationRule * IntRule
Class representing the storage layout of a QuadratureFunction.
void AssembleDevice(const FiniteElementSpace &fes, const Array< int > &markers, Vector &b) override
Method defining assembly on device.
real_t u(const Vector &xvec)
DeviceTensor< 2, real_t > DeviceMatrix
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
@ COMPRESSED
Enable all above compressions.
void forall_2D(int N, int X, int Y, lambda &&body)
MemoryType
Memory types supported by MFEM.
DeviceTensor< 3, real_t > DeviceCube
IntegrationRules IntRules(0, Quadrature1D::GaussLegendre)
A global object with all integration rules (defined in intrules.cpp)