19template<
int T_D1D = 0,
int T_Q1D = 0>
20static void HdivDLFAssemble2D(
21 const int ne,
const int d,
const int q,
const int *markers,
const real_t *bo,
23 const Vector &coeff,
real_t *y)
26 "Problem size too large.");
28 "Problem size too large.");
30 static constexpr int vdim = 2;
31 const auto F = coeff.Read();
32 const auto M =
Reshape(markers, ne);
33 const auto BO =
Reshape(bo, q, d-1);
34 const auto BC =
Reshape(bc, q, d);
35 const auto J =
Reshape(j, q, q, vdim, vdim, ne);
36 const auto W =
Reshape(weights, q, q);
37 const bool cst = coeff.Size() == vdim;
38 const auto C = cst ?
Reshape(F,vdim,1,1,1) :
Reshape(F,vdim,q,q,ne);
39 auto Y =
Reshape(y, 2*(d-1)*d, ne);
43 if (M(e) == 0) {
return; }
45 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
46 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
48 MFEM_SHARED
real_t sBot[Q*D];
49 MFEM_SHARED
real_t sBct[Q*D];
50 MFEM_SHARED
real_t sQQ[vdim*Q*Q];
51 MFEM_SHARED
real_t sQD[vdim*Q*D];
55 kernels::internal::LoadB<D,Q>(d-1, q, BO, sBot);
57 kernels::internal::LoadB<D,Q>(d, q, BC, sBct);
62 MFEM_FOREACH_THREAD(vd,z,vdim)
64 const real_t cst_val_0 = C(0,0,0,0);
65 const real_t cst_val_1 = C(1,0,0,0);
66 MFEM_FOREACH_THREAD(y,y,q)
68 MFEM_FOREACH_THREAD(x,x,q)
70 const real_t J0 = J(x,y,0,vd,e);
71 const real_t J1 = J(x,y,1,vd,e);
72 const real_t C0 = cst ? cst_val_0 : C(0,x,y,e);
73 const real_t C1 = cst ? cst_val_1 : C(1,x,y,e);
74 QQ(x,y,vd) = W(x,y)*(J0*C0 + J1*C1);
79 MFEM_FOREACH_THREAD(vd,z,vdim)
81 const int nx = (vd == 0) ? d : d-1;
83 MFEM_FOREACH_THREAD(qy,y,q)
85 MFEM_FOREACH_THREAD(dx,x,nx)
88 for (
int qx = 0; qx < q; ++qx)
90 qd += QQ(qx,qy,vd) * Btx(dx,qx);
97 MFEM_FOREACH_THREAD(vd,z,vdim)
99 const int nx = (vd == 0) ? d : d-1;
100 const int ny = (vd == 1) ? d : d-1;
102 DeviceTensor<4> Yxy(Y, nx, ny, vdim, ne);
103 MFEM_FOREACH_THREAD(dy,y,ny)
105 MFEM_FOREACH_THREAD(dx,x,nx)
108 for (
int qy = 0; qy < q; ++qy)
110 dd += QD(dx,qy,vd) * Bty(dy,qy);
112 Yxy(dx,dy,vd,e) += dd;
120template<
int T_D1D = 0,
int T_Q1D = 0>
121static void HdivDLFAssemble3D(
122 const int ne,
const int d,
const int q,
const int *markers,
const real_t *bo,
124 const Vector &coeff,
real_t *y)
127 "Problem size too large.");
129 "Problem size too large.");
131 static constexpr int vdim = 3;
132 const auto F = coeff.Read();
133 const auto M =
Reshape(markers, ne);
134 const auto BO =
Reshape(bo, q, d-1);
135 const auto BC =
Reshape(bc, q, d);
136 const auto J =
Reshape(j, q, q, q, vdim, vdim, ne);
137 const auto W =
Reshape(weights, q, q, q);
138 const bool cst = coeff.Size() == vdim;
139 const auto C = cst ?
Reshape(F,vdim,1,1,1,1) :
Reshape(F,vdim,q,q,q,ne);
140 auto Y =
Reshape(y, 2*(d-1)*(d-1)*d, ne);
144 if (M(e) == 0) {
return; }
146 constexpr int Q = T_Q1D ? T_Q1D : DofQuadLimits::HDIV_MAX_Q1D;
147 constexpr int D = T_D1D ? T_D1D : DofQuadLimits::HDIV_MAX_D1D;
149 MFEM_SHARED
real_t sBot[Q*D];
150 MFEM_SHARED
real_t sBct[Q*D];
154 kernels::internal::LoadB<D,Q>(d-1, q, BO, sBot);
156 kernels::internal::LoadB<D,Q>(d, q, BC, sBct);
158 MFEM_SHARED
real_t sm0[vdim*Q*Q*Q];
159 MFEM_SHARED
real_t sm1[vdim*Q*Q*Q];
160 DeviceTensor<4> QQQ(sm1, q, q, q, vdim);
161 DeviceTensor<4> DQQ(sm0, d, q, q, vdim);
162 DeviceTensor<4> DDQ(sm1, d, d, q, vdim);
164 MFEM_FOREACH_THREAD(vd,z,vdim)
166 const real_t cst_val_0 = C(0,0,0,0,0);
167 const real_t cst_val_1 = C(1,0,0,0,0);
168 const real_t cst_val_2 = C(2,0,0,0,0);
169 MFEM_FOREACH_THREAD(y,y,q)
171 MFEM_FOREACH_THREAD(x,x,q)
173 for (
int z = 0; z < q; ++z)
175 const real_t J0 = J(x,y,z,0,vd,e);
176 const real_t J1 = J(x,y,z,1,vd,e);
177 const real_t J2 = J(x,y,z,2,vd,e);
178 const real_t C0 = cst ? cst_val_0 : C(0,x,y,z,e);
179 const real_t C1 = cst ? cst_val_1 : C(1,x,y,z,e);
180 const real_t C2 = cst ? cst_val_2 : C(2,x,y,z,e);
181 QQQ(x,y,z,vd) = W(x,y,z)*(J0*C0 + J1*C1 + J2*C2);
188 MFEM_FOREACH_THREAD(vd,z,vdim)
190 const int nx = (vd == 0) ? d : d-1;
192 MFEM_FOREACH_THREAD(qy,y,q)
194 MFEM_FOREACH_THREAD(dx,x,nx)
198 for (
int qz = 0; qz < q; ++qz) {
u[qz] = 0.0; }
200 for (
int qx = 0; qx < q; ++qx)
203 for (
int qz = 0; qz < q; ++qz)
205 u[qz] += QQQ(qx,qy,qz,vd) * Btx(dx,qx);
209 for (
int qz = 0; qz < q; ++qz) { DQQ(dx,qy,qz,vd) =
u[qz]; }
214 MFEM_FOREACH_THREAD(vd,z,vdim)
216 const int nx = (vd == 0) ? d : d-1;
217 const int ny = (vd == 1) ? d : d-1;
219 MFEM_FOREACH_THREAD(dy,y,ny)
221 MFEM_FOREACH_THREAD(dx,x,nx)
225 for (
int qz = 0; qz < q; ++qz) {
u[qz] = 0.0; }
227 for (
int qy = 0; qy < q; ++qy)
230 for (
int qz = 0; qz < q; ++qz)
232 u[qz] += DQQ(dx,qy,qz,vd) * Bty(dy,qy);
236 for (
int qz = 0; qz < q; ++qz) { DDQ(dx,dy,qz,vd) =
u[qz]; }
241 MFEM_FOREACH_THREAD(vd,z,vdim)
243 const int nx = (vd == 0) ? d : d-1;
244 const int ny = (vd == 1) ? d : d-1;
245 const int nz = (vd == 2) ? d : d-1;
246 DeviceTensor<5> Yxyz(Y, nx, ny, nz, vdim, ne);
248 MFEM_FOREACH_THREAD(dy,y,ny)
250 MFEM_FOREACH_THREAD(dx,x,nx)
254 for (
int dz = 0; dz < nz; ++dz) {
u[dz] = 0.0; }
256 for (
int qz = 0; qz < q; ++qz)
259 for (
int dz = 0; dz < nz; ++dz)
261 u[dz] += DDQ(dx,dy,qz,vd) * Btz(dz,qz);
265 for (
int dz = 0; dz < nz; ++dz) { Yxyz(dx,dy,dz,vd,e) +=
u[dz]; }
273static void HdivDLFAssemble(
const FiniteElementSpace &fes,
274 const IntegrationRule *ir,
275 const Array<int> &markers,
279 Mesh &mesh = *fes.GetMesh();
280 const int dim = mesh.Dimension();
281 const FiniteElement *el = fes.GetTypicalFE();
282 const auto *
vel =
dynamic_cast<const VectorTensorFiniteElement *
>(el);
283 MFEM_VERIFY(
vel !=
nullptr,
"Must be VectorTensorFiniteElement");
287 const int d = maps_c.ndof, q = maps_c.nqpt;
289 const GeometricFactors *geom = mesh.GetGeometricFactors(*ir, flags, mt);
290 decltype(&HdivDLFAssemble2D<>) ker =
291 dim == 2 ? HdivDLFAssemble2D<> : HdivDLFAssemble3D<>;
295 if (d==1 && q==1) { ker=HdivDLFAssemble2D<1,1>; }
296 if (d==2 && q==2) { ker=HdivDLFAssemble2D<2,2>; }
297 if (d==3 && q==3) { ker=HdivDLFAssemble2D<3,3>; }
298 if (d==4 && q==4) { ker=HdivDLFAssemble2D<4,4>; }
299 if (d==5 && q==5) { ker=HdivDLFAssemble2D<5,5>; }
300 if (d==6 && q==6) { ker=HdivDLFAssemble2D<6,6>; }
301 if (d==7 && q==7) { ker=HdivDLFAssemble2D<7,7>; }
302 if (d==8 && q==8) { ker=HdivDLFAssemble2D<8,8>; }
307 if (d==2 && q==2) { ker=HdivDLFAssemble3D<2,2>; }
308 if (d==3 && q==3) { ker=HdivDLFAssemble3D<3,3>; }
309 if (d==4 && q==4) { ker=HdivDLFAssemble3D<4,4>; }
310 if (d==5 && q==5) { ker=HdivDLFAssemble3D<5,5>; }
311 if (d==6 && q==6) { ker=HdivDLFAssemble3D<6,6>; }
312 if (d==7 && q==7) { ker=HdivDLFAssemble3D<7,7>; }
313 if (d==8 && q==8) { ker=HdivDLFAssemble3D<8,8>; }
316 MFEM_VERIFY(ker,
"No kernel ndof " << d <<
" nqpt " << q);
318 const int ne = mesh.GetNE();
319 const int *M = markers.Read();
320 const real_t *Bo = maps_o.B.Read();
321 const real_t *Bc = maps_c.B.Read();
322 const real_t *J = geom->J.Read();
323 const real_t *W = ir->GetWeights().Read();
324 real_t *Y = y.ReadWrite();
325 ker(ne, d, q, M, Bo, Bc, J, W, coeff, Y);
333 const int qorder = 2 * fe.
GetOrder();
343 HdivDLFAssemble(fes, ir, markers, coeff,
b);
347 MFEM_ABORT(
"Not implemented.");
Class to represent a coefficient evaluated at quadrature points.
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Mesh * GetMesh() const
Returns the mesh.
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Abstract class for all finite elements.
int GetOrder() const
Returns the order of the finite element. In the case of anisotropic orders, returns the maximum order...
int GetDerivType() const
Returns the FiniteElement::DerivType of the element describing the spatial derivative method implemen...
Geometry::Type GetGeomType() const
Returns the Geometry::Type of the reference element.
@ DIV
Implements CalcDivShape methods.
Class for an integration rule - an Array of IntegrationPoint.
const IntegrationRule & Get(int GeomType, int Order)
Returns an integration rule for given GeomType and Order.
const IntegrationRule * IntRule
Class representing the storage layout of a QuadratureFunction.
void AssembleDevice(const FiniteElementSpace &fes, const Array< int > &markers, Vector &b) override
Method defining assembly on device.
real_t u(const Vector &xvec)
DeviceTensor< 2, real_t > DeviceMatrix
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
@ COMPRESSED
Enable all above compressions.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
MemoryType
Memory types supported by MFEM.
DeviceTensor< 3, real_t > DeviceCube
IntegrationRules IntRules(0, Quadrature1D::GaussLegendre)
A global object with all integration rules (defined in intrules.cpp)
void vel(const Vector &x, real_t t, Vector &u)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.