14 #include "../../linalg/dtensor.hpp" 15 #include "../../general/forall.hpp" 25 static constexpr
int nv = 4;
26 static constexpr
int ne = 4;
27 static constexpr
int dim = 2;
28 static constexpr
int ddm2 = (
dim*(
dim+1))/2;
29 static constexpr
int ngeom = ddm2 + 1;
30 static constexpr
int o = ORDER;
31 static constexpr
int op1 = ORDER + 1;
32 static constexpr
int ndof_per_el =
dim*o*op1;
33 static constexpr
int nnz_per_row = 7;
34 static constexpr
int sz_local_mat = ne*ne;
36 const bool const_mq =
c1.
Size() == 1;
37 const auto MQ = const_mq
40 const bool const_dq =
c2.
Size() == 1;
41 const auto DQ = const_dq
50 MFEM_FORALL_2D(iel_ho, nel_ho, ORDER, ORDER, 1,
52 MFEM_FOREACH_THREAD(iy,y,o)
54 MFEM_FOREACH_THREAD(ix,x,op1)
56 for (
int c=0; c<2; ++c)
58 for (
int j=0; j<nnz_per_row; ++j)
60 V(j,ix+iy*op1,c,iel_ho) = 0.0;
68 MFEM_FOREACH_THREAD(ky,y,ORDER)
70 MFEM_FOREACH_THREAD(kx,x,ORDER)
74 double local_mat_[sz_local_mat];
80 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
83 LORVertexCoordinates2D<ORDER>(X, iel_ho, kx, ky, vx, vy);
85 for (
int iqx=0; iqx<2; ++iqx)
87 for (
int iqy=0; iqy<2; ++iqy)
91 const double w = 1.0/4.0;
98 const double detJ =
Det2D(J);
99 const double w_detJ = w/detJ;
101 Q(0,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0));
102 Q(1,iqy,iqx) = w_detJ * (J(0,0)*J(0,1) + J(1,0)*J(1,1));
103 Q(2,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1));
104 Q(3,iqy,iqx) = w_detJ;
107 for (
int iqx=0; iqx<2; ++iqx)
109 for (
int iqy=0; iqy<2; ++iqy)
111 const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
112 const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
114 for (
int cj=0; cj<
dim; ++cj)
116 for (
int bj=0; bj<2; ++bj)
118 const double bxj = (cj == 0 && bj == iqx) ? 1 : 0;
119 const double byj = (cj == 1 && bj == iqy) ? 1 : 0;
120 const double div_j = (bj == 0) ? -1 : 1;
122 const double jj_loc = bj + 2*cj;
124 for (
int ci=0; ci<
dim; ++ci)
126 for (
int bi=0; bi<2; ++bi)
128 const double bxi = (ci == 0 && bi == iqx) ? 1 : 0;
129 const double byi = (ci == 1 && bi == iqy) ? 1 : 0;
130 const double div_i = (bi == 0) ? -1 : 1;
132 const double ii_loc = bi + 2*ci;
136 if (jj_loc > ii_loc) {
continue; }
139 val += bxi*bxj*Q(0,iqy,iqx);
140 val += byi*bxj*Q(1,iqy,iqx);
141 val += bxi*byj*Q(1,iqy,iqx);
142 val += byi*byj*Q(2,iqy,iqx);
144 val += dq*div_j*div_i*Q(3,iqy,iqx);
146 local_mat(ii_loc, jj_loc) += val;
156 for (
int ii_loc=0; ii_loc<ne; ++ii_loc)
158 const int ci = ii_loc/2;
159 const int bi = ii_loc%2;
160 const int ix = (ci == 0) ? bi : 0;
161 const int iy = (ci == 1) ? bi : 0;
163 int ii = kx+ix + (ky+iy)*((ci == 0) ? op1 : o);
165 for (
int jj_loc=0; jj_loc<ne; ++jj_loc)
167 const int cj = jj_loc/2;
168 const int bj = jj_loc%2;
170 const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + 1-bi + 2*bj);
173 const double val = (jj_loc <= ii_loc)
174 ? local_mat(ii_loc, jj_loc)
175 : local_mat(jj_loc, ii_loc);
176 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
186 for (
int ci=0; ci<2; ++ci)
188 const int i_off = (ci == 0) ? 0 : o*op1;
190 const int id1 = (ci+1)%2;
192 const int nxi = (ci == 0) ? op1 : o;
194 for (
int i0=0; i0<op1; ++i0)
196 for (
int i1=0; i1<o; ++i1)
201 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi;
203 for (
int cj_rel=0; cj_rel<2; ++cj_rel)
205 const int cj = (ci + cj_rel) % 2;
206 const int j_off = (cj == 0) ? 0 : o*op1;
207 const int nxj = (cj == 0) ? op1 : o;
209 const int j0_begin = (i0 > 0) ? i0-1 : i0;
210 const int j0_end = (cj_rel == 0)
211 ? ((i0 < o) ? i0+1 : i0)
212 : ((i0 < o) ? i0 : i0-1);
213 const int j1_begin = i1;
214 const int j1_end = (cj_rel == 0) ? i1 : i1+1;
216 for (
int j0=j0_begin; j0<=j0_end; ++j0)
218 const int d0 = 1 + j0 - i0;
219 for (
int j1=j1_begin; j1<=j1_end; ++j1)
221 const int d1 = j1 - i1;
225 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj;
226 const int jj_off = (cj_rel == 0) ? d0 : 3 + d0 + 2*d1;
227 map(jj_off, ii_el) = jj_el;
241 static constexpr
int nv = 8;
242 static constexpr
int nf = 6;
243 static constexpr
int dim = 3;
244 static constexpr
int ddm2 = (
dim*(
dim+1))/2;
245 static constexpr
int ngeom = ddm2 + 1;
246 static constexpr
int o = ORDER;
247 static constexpr
int op1 = ORDER + 1;
248 static constexpr
int ndof_per_el =
dim*o*o*op1;
249 static constexpr
int nnz_per_row = 11;
250 static constexpr
int sz_local_mat = nf*nf;
252 const bool const_mq =
c1.
Size() == 1;
253 const auto MQ = const_mq
256 const bool const_dq =
c2.
Size() == 1;
257 const auto DQ = const_dq
267 MFEM_FORALL_3D(iel_ho, nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
269 MFEM_FOREACH_THREAD(iz,z,o)
271 MFEM_FOREACH_THREAD(iy,y,o)
273 MFEM_FOREACH_THREAD(ix,x,op1)
275 for (
int c=0; c<
dim; ++c)
277 for (
int j=0; j<nnz_per_row; ++j)
279 V(j,ix+iy*op1+iz*o*op1,c,iel_ho) = 0.0;
288 MFEM_FOREACH_THREAD(kz,z,ORDER)
290 MFEM_FOREACH_THREAD(ky,y,ORDER)
292 MFEM_FOREACH_THREAD(kx,x,ORDER)
298 double local_mat_[sz_local_mat];
300 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
302 double vx[8], vy[8], vz[8];
303 LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
305 for (
int iqz=0; iqz<2; ++iqz)
307 for (
int iqy=0; iqy<2; ++iqy)
309 for (
int iqx=0; iqx<2; ++iqx)
311 const double x = iqx;
312 const double y = iqy;
313 const double z = iqz;
314 const double w = 1.0/8.0;
321 const double detJ =
Det3D(J);
322 const double w_detJ = w/detJ;
324 Q(0,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0));
325 Q(1,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,0)+J(1,1)*J(1,0)+J(2,1)*J(2,0));
326 Q(2,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,0)+J(1,2)*J(1,0)+J(2,2)*J(2,0));
327 Q(3,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1));
328 Q(4,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,1)+J(1,2)*J(1,1)+J(2,2)*J(2,1));
329 Q(5,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2));
330 Q(6,iqz,iqy,iqx) = w_detJ;
334 for (
int iqz=0; iqz<2; ++iqz)
336 for (
int iqy=0; iqy<2; ++iqy)
338 for (
int iqx=0; iqx<2; ++iqx)
340 const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
341 const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
343 for (
int cj=0; cj<
dim; ++cj)
345 const double jq0 = (cj == 0) ? iqx : ((cj == 1) ? iqy : iqz);
348 const int jd_1 = (cj + 1)%3;
349 const int jd_2 = (cj + 2)%3;
351 for (
int bj=0; bj<2; ++bj)
353 const double div_j = (bj == 0) ? -1 : 1;
356 basis_j[jd_0] = (bj == jq0) ? 1 : 0;
360 const int jj_loc = bj + 2*cj;
362 for (
int ci=0; ci<
dim; ++ci)
364 const double iq0 = (ci == 0) ? iqx : ((ci == 1) ? iqy : iqz);
367 const int id_1 = (ci + 1)%3;
368 const int id_2 = (ci + 2)%3;
370 for (
int bi=0; bi<2; ++bi)
372 const double div_i = (bi == 0) ? -1 : 1;
375 basis_i[id_0] = (bi == iq0) ? 1 : 0;
379 const int ii_loc = bi + 2*ci;
383 if (jj_loc > ii_loc) {
continue; }
385 const double div_div = Q(6,iqz,iqy,iqx)*div_i*div_j;
387 double basis_basis = 0.0;
388 basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
389 basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
390 basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
391 basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
392 basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
393 basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
395 const double val = dq*div_div + mq*basis_basis;
398 local_mat(ii_loc, jj_loc) += val;
417 for (
int ii_loc=0; ii_loc<nf; ++ii_loc)
419 const int ci = ii_loc/2;
420 const int bi = ii_loc%2;
423 const int id1 = (ci+1)%3;
424 const int id2 = (ci+2)%3;
435 const int nx = (ci == 0) ? op1 : o;
436 const int ny = (ci == 1) ? op1 : o;
438 const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
440 for (
int jj_loc=0; jj_loc<nf; ++jj_loc)
442 const int cj = jj_loc/2;
445 const int cj_rel = (3 + cj - ci)%3;
447 const int bj = jj_loc%2;
449 const int jd0 = cj_rel;
450 const int jd1 = (cj_rel+1)%3;
451 const int jd2 = (cj_rel+2)%3;
458 const int d0 = jj_rel[0] - i0 + 1;
459 const int d1 = jj_rel[1] - i1;
460 const int d2 = jj_rel[2] - i2;
462 if (cj_rel == 0) { jj_off = d0; }
463 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
464 else { jj_off = 7 + d0 + 2*d2; }
467 const double val = (jj_loc <= ii_loc)
468 ? local_mat(ii_loc, jj_loc)
469 : local_mat(jj_loc, ii_loc);
470 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
481 for (
int ci=0; ci<
dim; ++ci)
483 const int i_off = ci*o*o*op1;
485 const int id1 = (ci+1)%3;
486 const int id2 = (ci+2)%3;
488 const int nxi = (ci == 0) ? op1 : o;
489 const int nyi = (ci == 1) ? op1 : o;
491 for (
int i0=0; i0<op1; ++i0)
493 for (
int i1=0; i1<o; ++i1)
495 for (
int i2=0; i2<o; ++i2)
501 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
503 for (
int cj_rel=0; cj_rel<
dim; ++cj_rel)
505 const int cj = (ci + cj_rel) % 3;
506 const int j_off = cj*o*o*op1;
508 const int nxj = (cj == 0) ? op1 : o;
509 const int nyj = (cj == 1) ? op1 : o;
511 const int j0_begin = (i0 > 0) ? i0-1 : i0;
512 const int j0_end = (cj_rel == 0)
513 ? ((i0 < o) ? i0+1 : i0)
514 : ((i0 < o) ? i0 : i0-1);
515 const int j1_begin = i1;
516 const int j1_end = (cj_rel == 1) ? i1+1 : i1;
517 const int j2_begin = i2;
518 const int j2_end = (cj_rel == 2) ? i2+1 : i2;
520 for (
int j0=j0_begin; j0<=j0_end; ++j0)
522 const int d0 = 1 + j0 - i0;
523 for (
int j1=j1_begin; j1<=j1_end; ++j1)
525 const int d1 = j1 - i1;
526 for (
int j2=j2_begin; j2<=j2_end; ++j2)
528 const int d2 = j2 - i2;
533 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
535 if (cj_rel == 0) { jj_off = d0; }
536 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
537 else { jj_off = 7 + d0 + 2*d2; }
538 map(jj_off, ii_el) = jj_el;
550 template void BatchedLOR_RT::Assemble2D<1>();
551 template void BatchedLOR_RT::Assemble2D<2>();
552 template void BatchedLOR_RT::Assemble2D<3>();
553 template void BatchedLOR_RT::Assemble2D<4>();
554 template void BatchedLOR_RT::Assemble2D<5>();
555 template void BatchedLOR_RT::Assemble2D<6>();
556 template void BatchedLOR_RT::Assemble2D<7>();
557 template void BatchedLOR_RT::Assemble2D<8>();
559 template void BatchedLOR_RT::Assemble3D<1>();
560 template void BatchedLOR_RT::Assemble3D<2>();
561 template void BatchedLOR_RT::Assemble3D<3>();
562 template void BatchedLOR_RT::Assemble3D<4>();
563 template void BatchedLOR_RT::Assemble3D<5>();
564 template void BatchedLOR_RT::Assemble3D<6>();
565 template void BatchedLOR_RT::Assemble3D<7>();
566 template void BatchedLOR_RT::Assemble3D<8>();
575 ProjectLORCoefficient<VectorFEMassIntegrator>(
a,
c1);
576 ProjectLORCoefficient<DivDivIntegrator>(
a,
c2);
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Abstract base class for the batched LOR assembly kernels.
Array< int > & sparse_mapping
Local element sparsity pattern.
void SetSize(int s)
Resize the vector to size s.
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
int Size() const
Returns the size of the vector.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
CoefficientVector c2
Coefficient of second integrator.
BatchedLOR_RT(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array< int > &sparse_mapping_)
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Vector & sparse_ij
Local element sparsity matrix data.
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
MFEM_HOST_DEVICE void Jacobian3D(const double x, const double y, const double z, const double vx[8], const double vy[8], const double vz[8], DeviceMatrix &J)
FiniteElementSpace & fes_ho
The associated high-order space.
A basic generic Tensor class, appropriate for use on the GPU.
int GetNE() const
Returns number of elements in the mesh.
MFEM_HOST_DEVICE void Jacobian2D(const double x, const double y, const double vx[4], const double vy[4], DeviceMatrix &J)
Vector & X_vert
Mesh coordinate vector.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
CoefficientVector c1
Coefficient of first integrator.