14 #include "../../linalg/dtensor.hpp"
15 #include "../../general/forall.hpp"
25 static constexpr
int nv = 4;
26 static constexpr
int ne = 4;
27 static constexpr
int dim = 2;
28 static constexpr
int ddm2 = (dim*(dim+1))/2;
29 static constexpr
int ngeom = ddm2 + 1;
30 static constexpr
int o = ORDER;
31 static constexpr
int op1 = ORDER + 1;
32 static constexpr
int ndof_per_el = dim*o*op1;
33 static constexpr
int nnz_per_row = 7;
34 static constexpr
int sz_local_mat = ne*ne;
36 const bool const_mq =
c1.
Size() == 1;
37 const auto MQ = const_mq
40 const bool const_dq =
c2.
Size() == 1;
41 const auto DQ = const_dq
50 MFEM_FORALL_2D(iel_ho, nel_ho, ORDER, ORDER, 1,
52 MFEM_FOREACH_THREAD(iy,y,o)
54 MFEM_FOREACH_THREAD(ix,x,op1)
56 for (
int c=0; c<2; ++c)
58 for (
int j=0; j<nnz_per_row; ++j)
60 V(j,ix+iy*op1,c,iel_ho) = 0.0;
68 MFEM_FOREACH_THREAD(ky,y,ORDER)
70 MFEM_FOREACH_THREAD(kx,x,ORDER)
74 double local_mat_[sz_local_mat];
80 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
83 LORVertexCoordinates2D<ORDER>(X, iel_ho, kx, ky, vx, vy);
85 for (
int iqx=0; iqx<2; ++iqx)
87 for (
int iqy=0; iqy<2; ++iqy)
91 const double w = 1.0/4.0;
98 const double detJ =
Det2D(J);
99 const double w_detJ = w/detJ;
101 Q(0,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0));
102 Q(1,iqy,iqx) = w_detJ * (J(0,0)*J(0,1) + J(1,0)*J(1,1));
103 Q(2,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1));
104 Q(3,iqy,iqx) = w_detJ;
107 for (
int iqx=0; iqx<2; ++iqx)
109 for (
int iqy=0; iqy<2; ++iqy)
111 const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
112 const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
114 for (
int cj=0; cj<
dim; ++cj)
116 for (
int bj=0; bj<2; ++bj)
118 const double bxj = (cj == 0 && bj == iqx) ? 1 : 0;
119 const double byj = (cj == 1 && bj == iqy) ? 1 : 0;
120 const double div_j = (bj == 0) ? -1 : 1;
122 const double jj_loc = bj + 2*cj;
124 for (
int ci=0; ci<
dim; ++ci)
126 for (
int bi=0; bi<2; ++bi)
128 const double bxi = (ci == 0 && bi == iqx) ? 1 : 0;
129 const double byi = (ci == 1 && bi == iqy) ? 1 : 0;
130 const double div_i = (bi == 0) ? -1 : 1;
132 const double ii_loc = bi + 2*ci;
136 if (jj_loc > ii_loc) {
continue; }
139 val += bxi*bxj*Q(0,iqy,iqx);
140 val += byi*bxj*Q(1,iqy,iqx);
141 val += bxi*byj*Q(1,iqy,iqx);
142 val += byi*byj*Q(2,iqy,iqx);
144 val += dq*div_j*div_i*Q(3,iqy,iqx);
146 local_mat(ii_loc, jj_loc) += val;
156 for (
int ii_loc=0; ii_loc<ne; ++ii_loc)
158 const int ci = ii_loc/2;
159 const int bi = ii_loc%2;
160 const int ix = (ci == 0) ? bi : 0;
161 const int iy = (ci == 1) ? bi : 0;
163 int ii = kx+ix + (ky+iy)*((ci == 0) ? op1 : o);
165 for (
int jj_loc=0; jj_loc<ne; ++jj_loc)
167 const int cj = jj_loc/2;
168 const int bj = jj_loc%2;
170 const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + 1-bi + 2*bj);
173 const double val = (jj_loc <= ii_loc)
174 ? local_mat(ii_loc, jj_loc)
175 : local_mat(jj_loc, ii_loc);
176 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
186 for (
int ci=0; ci<2; ++ci)
188 const int i_off = (ci == 0) ? 0 : o*op1;
190 const int id1 = (ci+1)%2;
192 const int nxi = (ci == 0) ? op1 : o;
194 for (
int i0=0; i0<op1; ++i0)
196 for (
int i1=0; i1<o; ++i1)
201 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi;
203 for (
int cj_rel=0; cj_rel<2; ++cj_rel)
205 const int cj = (ci + cj_rel) % 2;
206 const int j_off = (cj == 0) ? 0 : o*op1;
207 const int nxj = (cj == 0) ? op1 : o;
209 const int j0_begin = (i0 > 0) ? i0-1 : i0;
210 const int j0_end = (cj_rel == 0)
211 ? ((i0 < o) ? i0+1 : i0)
212 : ((i0 < o) ? i0 : i0-1);
213 const int j1_begin = i1;
214 const int j1_end = (cj_rel == 0) ? i1 : i1+1;
216 for (
int j0=j0_begin; j0<=j0_end; ++j0)
218 const int d0 = 1 + j0 - i0;
219 for (
int j1=j1_begin; j1<=j1_end; ++j1)
221 const int d1 = j1 - i1;
225 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj;
226 const int jj_off = (cj_rel == 0) ? d0 : 3 + d0 + 2*d1;
227 map(jj_off, ii_el) = jj_el;
241 static constexpr
int nv = 8;
242 static constexpr
int nf = 6;
243 static constexpr
int dim = 3;
244 static constexpr
int ddm2 = (dim*(dim+1))/2;
245 static constexpr
int ngeom = ddm2 + 1;
246 static constexpr
int o = ORDER;
247 static constexpr
int op1 = ORDER + 1;
248 static constexpr
int ndof_per_el = dim*o*o*op1;
249 static constexpr
int nnz_per_row = 11;
250 static constexpr
int sz_local_mat = nf*nf;
252 const bool const_mq =
c1.
Size() == 1;
253 const auto MQ = const_mq
256 const bool const_dq =
c2.
Size() == 1;
257 const auto DQ = const_dq
267 MFEM_FORALL_3D(iel_ho, nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
269 MFEM_FOREACH_THREAD(iz,z,o)
271 MFEM_FOREACH_THREAD(iy,y,o)
273 MFEM_FOREACH_THREAD(ix,x,op1)
275 for (
int c=0; c<
dim; ++c)
277 for (
int j=0; j<nnz_per_row; ++j)
279 V(j,ix+iy*op1+iz*o*op1,c,iel_ho) = 0.0;
288 MFEM_FOREACH_THREAD(kz,z,ORDER)
290 MFEM_FOREACH_THREAD(ky,y,ORDER)
292 MFEM_FOREACH_THREAD(kx,x,ORDER)
298 double local_mat_[sz_local_mat];
300 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
302 double vx[8], vy[8], vz[8];
303 LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
305 for (
int iqz=0; iqz<2; ++iqz)
307 for (
int iqy=0; iqy<2; ++iqy)
309 for (
int iqx=0; iqx<2; ++iqx)
311 const double x = iqx;
312 const double y = iqy;
313 const double z = iqz;
314 const double w = 1.0/8.0;
321 const double detJ =
Det3D(J);
322 const double w_detJ = w/detJ;
324 Q(0,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0));
325 Q(1,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,0)+J(1,1)*J(1,0)+J(2,1)*J(2,0));
326 Q(2,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,0)+J(1,2)*J(1,0)+J(2,2)*J(2,0));
327 Q(3,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1));
328 Q(4,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,1)+J(1,2)*J(1,1)+J(2,2)*J(2,1));
329 Q(5,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2));
330 Q(6,iqz,iqy,iqx) = w_detJ;
334 for (
int iqz=0; iqz<2; ++iqz)
336 for (
int iqy=0; iqy<2; ++iqy)
338 for (
int iqx=0; iqx<2; ++iqx)
340 const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
341 const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
343 for (
int cj=0; cj<
dim; ++cj)
345 const double jq0 = (cj == 0) ? iqx : ((cj == 1) ? iqy : iqz);
348 const int jd_1 = (cj + 1)%3;
349 const int jd_2 = (cj + 2)%3;
351 for (
int bj=0; bj<2; ++bj)
353 const double div_j = (bj == 0) ? -1 : 1;
356 basis_j[jd_0] = (bj == jq0) ? 1 : 0;
360 const int jj_loc = bj + 2*cj;
362 for (
int ci=0; ci<
dim; ++ci)
364 const double iq0 = (ci == 0) ? iqx : ((ci == 1) ? iqy : iqz);
367 const int id_1 = (ci + 1)%3;
368 const int id_2 = (ci + 2)%3;
370 for (
int bi=0; bi<2; ++bi)
372 const double div_i = (bi == 0) ? -1 : 1;
375 basis_i[id_0] = (bi == iq0) ? 1 : 0;
379 const int ii_loc = bi + 2*ci;
383 if (jj_loc > ii_loc) {
continue; }
385 const double div_div = Q(6,iqz,iqy,iqx)*div_i*div_j;
387 double basis_basis = 0.0;
388 basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
389 basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
390 basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
391 basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
392 basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
393 basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
395 const double val = dq*div_div + mq*basis_basis;
398 local_mat(ii_loc, jj_loc) += val;
417 for (
int ii_loc=0; ii_loc<nf; ++ii_loc)
419 const int ci = ii_loc/2;
420 const int bi = ii_loc%2;
423 const int id1 = (ci+1)%3;
424 const int id2 = (ci+2)%3;
435 const int nx = (ci == 0) ? op1 : o;
436 const int ny = (ci == 1) ? op1 : o;
438 const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
440 for (
int jj_loc=0; jj_loc<nf; ++jj_loc)
442 const int cj = jj_loc/2;
445 const int cj_rel = (3 + cj - ci)%3;
447 const int bj = jj_loc%2;
449 const int jd0 = cj_rel;
450 const int jd1 = (cj_rel+1)%3;
451 const int jd2 = (cj_rel+2)%3;
458 const int d0 = jj_rel[0] - i0 + 1;
459 const int d1 = jj_rel[1] - i1;
460 const int d2 = jj_rel[2] - i2;
462 if (cj_rel == 0) { jj_off = d0; }
463 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
464 else { jj_off = 7 + d0 + 2*d2; }
467 const double val = (jj_loc <= ii_loc)
468 ? local_mat(ii_loc, jj_loc)
469 : local_mat(jj_loc, ii_loc);
470 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
481 for (
int ci=0; ci<
dim; ++ci)
483 const int i_off = ci*o*o*op1;
485 const int id1 = (ci+1)%3;
486 const int id2 = (ci+2)%3;
488 const int nxi = (ci == 0) ? op1 : o;
489 const int nyi = (ci == 1) ? op1 : o;
491 for (
int i0=0; i0<op1; ++i0)
493 for (
int i1=0; i1<o; ++i1)
495 for (
int i2=0; i2<o; ++i2)
501 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
503 for (
int cj_rel=0; cj_rel<
dim; ++cj_rel)
505 const int cj = (ci + cj_rel) % 3;
506 const int j_off = cj*o*o*op1;
508 const int nxj = (cj == 0) ? op1 : o;
509 const int nyj = (cj == 1) ? op1 : o;
511 const int j0_begin = (i0 > 0) ? i0-1 : i0;
512 const int j0_end = (cj_rel == 0)
513 ? ((i0 < o) ? i0+1 : i0)
514 : ((i0 < o) ? i0 : i0-1);
515 const int j1_begin = i1;
516 const int j1_end = (cj_rel == 1) ? i1+1 : i1;
517 const int j2_begin = i2;
518 const int j2_end = (cj_rel == 2) ? i2+1 : i2;
520 for (
int j0=j0_begin; j0<=j0_end; ++j0)
522 const int d0 = 1 + j0 - i0;
523 for (
int j1=j1_begin; j1<=j1_end; ++j1)
525 const int d1 = j1 - i1;
526 for (
int j2=j2_begin; j2<=j2_end; ++j2)
528 const int d2 = j2 - i2;
533 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
535 if (cj_rel == 0) { jj_off = d0; }
536 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
537 else { jj_off = 7 + d0 + 2*d2; }
538 map(jj_off, ii_el) = jj_el;
550 template void BatchedLOR_RT::Assemble2D<1>();
551 template void BatchedLOR_RT::Assemble2D<2>();
552 template void BatchedLOR_RT::Assemble2D<3>();
553 template void BatchedLOR_RT::Assemble2D<4>();
554 template void BatchedLOR_RT::Assemble2D<5>();
555 template void BatchedLOR_RT::Assemble2D<6>();
556 template void BatchedLOR_RT::Assemble2D<7>();
557 template void BatchedLOR_RT::Assemble2D<8>();
559 template void BatchedLOR_RT::Assemble3D<1>();
560 template void BatchedLOR_RT::Assemble3D<2>();
561 template void BatchedLOR_RT::Assemble3D<3>();
562 template void BatchedLOR_RT::Assemble3D<4>();
563 template void BatchedLOR_RT::Assemble3D<5>();
564 template void BatchedLOR_RT::Assemble3D<6>();
565 template void BatchedLOR_RT::Assemble3D<7>();
566 template void BatchedLOR_RT::Assemble3D<8>();
575 ProjectLORCoefficient<VectorFEMassIntegrator>(
a,
c1);
576 ProjectLORCoefficient<DivDivIntegrator>(
a,
c2);
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Abstract base class for the batched LOR assembly kernels.
Array< int > & sparse_mapping
Local element sparsity pattern.
void SetSize(int s)
Resize the vector to size s.
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
int Size() const
Returns the size of the vector.
CoefficientVector c2
Coefficient of second integrator.
BatchedLOR_RT(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array< int > &sparse_mapping_)
int GetNE() const
Returns number of elements in the mesh.
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Vector & sparse_ij
Local element sparsity matrix data.
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
MFEM_HOST_DEVICE void Jacobian3D(const double x, const double y, const double z, const double vx[8], const double vy[8], const double vz[8], DeviceMatrix &J)
FiniteElementSpace & fes_ho
The associated high-order space.
A basic generic Tensor class, appropriate for use on the GPU.
MFEM_HOST_DEVICE void Jacobian2D(const double x, const double y, const double vx[4], const double vy[4], DeviceMatrix &J)
Vector & X_vert
Mesh coordinate vector.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
CoefficientVector c1
Coefficient of first integrator.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.