14 #include "../../linalg/dtensor.hpp"
15 #include "../../general/forall.hpp"
25 static constexpr
int nv = 4;
26 static constexpr
int dim = 2;
27 static constexpr
int ddm2 = (dim*(dim+1))/2;
28 static constexpr
int nd1d = ORDER + 1;
29 static constexpr
int ndof_per_el = nd1d*nd1d;
30 static constexpr
int nnz_per_row = 9;
31 static constexpr
int sz_local_mat = nv*nv;
33 const bool const_mq =
c1.
Size() == 1;
34 const auto MQ = const_mq
37 const bool const_dq =
c2.
Size() == 1;
38 const auto DQ = const_dq
47 MFEM_FORALL_2D(iel_ho, nel_ho, ORDER, ORDER, 1,
53 MFEM_FOREACH_THREAD(iy,y,nd1d)
55 MFEM_FOREACH_THREAD(ix,x,nd1d)
57 for (
int j=0; j<nnz_per_row; ++j)
59 V(j,ix,iy,iel_ho) = 0.0;
66 MFEM_FOREACH_THREAD(ky,y,ORDER)
68 MFEM_FOREACH_THREAD(kx,x,ORDER)
70 double Q_[(ddm2 + 1)*nv];
71 double local_mat_[sz_local_mat];
75 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
78 LORVertexCoordinates2D<ORDER>(X, iel_ho, kx, ky, vx, vy);
80 for (
int iqy=0; iqy<2; ++iqy)
82 for (
int iqx=0; iqx<2; ++iqx)
86 const double w = 1.0/4.0;
93 const double detJ =
Det2D(J);
94 const double w_detJ = w/detJ;
96 Q(0,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1));
97 Q(1,iqy,iqx) = -w_detJ * (J(0,1)*J(0,0) + J(1,1)*J(1,0));
98 Q(2,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0));
99 Q(3,iqy,iqx) = w*detJ;
102 for (
int iqx=0; iqx<2; ++iqx)
104 for (
int iqy=0; iqy<2; ++iqy)
106 const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
107 const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
108 for (
int jy=0; jy<2; ++jy)
110 const double bjy = (jy == iqy) ? 1.0 : 0.0;
111 const double gjy = (jy == 0) ? -1.0 : 1.0;
112 for (
int jx=0; jx<2; ++jx)
114 const double bjx = (jx == iqx) ? 1.0 : 0.0;
115 const double gjx = (jx == 0) ? -1.0 : 1.0;
117 const double djx = gjx*bjy;
118 const double djy = bjx*gjy;
120 int jj_loc = jx + 2*jy;
122 for (
int iy=0; iy<2; ++iy)
124 const double biy = (iy == iqy) ? 1.0 : 0.0;
125 const double giy = (iy == 0) ? -1.0 : 1.0;
126 for (
int ix=0; ix<2; ++ix)
128 const double bix = (ix == iqx) ? 1.0 : 0.0;
129 const double gix = (ix == 0) ? -1.0 : 1.0;
131 const double dix = gix*biy;
132 const double diy = bix*giy;
134 int ii_loc = ix + 2*iy;
138 if (jj_loc > ii_loc) {
continue; }
141 val += dix*djx*Q(0,iqy,iqx);
142 val += (dix*djy + diy*djx)*Q(1,iqy,iqx);
143 val += diy*djy*Q(2,iqy,iqx);
146 val += mq*bix*biy*bjx*bjy*Q(3,iqy,iqx);
148 local_mat(ii_loc, jj_loc) += val;
158 for (
int ii_loc=0; ii_loc<nv; ++ii_loc)
160 const int ix = ii_loc%2;
161 const int iy = ii_loc/2;
162 for (
int jj_loc=0; jj_loc<nv; ++jj_loc)
164 const int jx = jj_loc%2;
165 const int jy = jj_loc/2;
166 const int jj_off = (jx-ix+1) + 3*(jy-iy+1);
169 if (jj_loc <= ii_loc)
171 AtomicAdd(V(jj_off, ix+kx, iy+ky, iel_ho), local_mat(ii_loc, jj_loc));
175 AtomicAdd(V(jj_off, ix+kx, iy+ky, iel_ho), local_mat(jj_loc, ii_loc));
186 for (
int iy=0; iy<nd1d; ++iy)
188 const int jy_begin = (iy > 0) ? iy - 1 : 0;
189 const int jy_end = (iy < ORDER) ? iy + 1 : ORDER;
190 for (
int ix=0; ix<nd1d; ++ix)
192 const int jx_begin = (ix > 0) ? ix - 1 : 0;
193 const int jx_end = (ix < ORDER) ? ix + 1 : ORDER;
194 const int ii_el = ix + nd1d*iy;
195 for (
int jy=jy_begin; jy<=jy_end; ++jy)
197 for (
int jx=jx_begin; jx<=jx_end; ++jx)
199 const int jj_off = (jx-ix+1) + 3*(jy-iy+1);
200 const int jj_el = jx + nd1d*jy;
201 map(jj_off, ii_el) = jj_el;
212 static constexpr
int nv = 8;
213 static constexpr
int dim = 3;
214 static constexpr
int ddm2 = (dim*(dim+1))/2;
215 static constexpr
int nd1d = ORDER + 1;
216 static constexpr
int ndof_per_el = nd1d*nd1d*nd1d;
217 static constexpr
int nnz_per_row = 27;
218 static constexpr
int sz_grad_A = 3*3*2*2*2*2;
219 static constexpr
int sz_grad_B = sz_grad_A*2;
220 static constexpr
int sz_mass_A = 2*2*2*2;
221 static constexpr
int sz_mass_B = sz_mass_A*2;
222 static constexpr
int sz_local_mat = nv*nv;
224 const bool const_mq =
c1.
Size() == 1;
225 const auto MQ = const_mq
228 const bool const_dq =
c2.
Size() == 1;
229 const auto DQ = const_dq
239 MFEM_FORALL_3D(iel_ho, nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
244 MFEM_FOREACH_THREAD(iz,z,nd1d)
246 MFEM_FOREACH_THREAD(iy,y,nd1d)
248 MFEM_FOREACH_THREAD(ix,x,nd1d)
250 MFEM_UNROLL(nnz_per_row)
251 for (
int j=0; j<nnz_per_row; ++j)
253 V(j,ix,iy,iz,iel_ho) = 0.0;
261 MFEM_FOREACH_THREAD(kz,z,ORDER)
263 MFEM_FOREACH_THREAD(ky,y,ORDER)
265 MFEM_FOREACH_THREAD(kx,x,ORDER)
267 double Q_[(ddm2 + 1)*nv];
268 double grad_A_[sz_grad_A];
269 double grad_B_[sz_grad_B];
270 double mass_A_[sz_mass_A];
271 double mass_B_[sz_mass_B];
272 double local_mat_[sz_local_mat];
282 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
286 for (
int i=0; i<sz_grad_A; ++i) { grad_A[i] = 0.0; }
287 for (
int i=0; i<sz_grad_B; ++i) { grad_B[i] = 0.0; }
289 for (
int i=0; i<sz_mass_A; ++i) { mass_A[i] = 0.0; }
290 for (
int i=0; i<sz_mass_B; ++i) { mass_B[i] = 0.0; }
292 double vx[8], vy[8], vz[8];
293 LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
296 for (
int iqz=0; iqz<2; ++iqz)
299 for (
int iqy=0; iqy<2; ++iqy)
302 for (
int iqx=0; iqx<2; ++iqx)
304 const double x = iqx;
305 const double y = iqy;
306 const double z = iqz;
307 const double w = 1.0/8.0;
314 const double detJ =
Det3D(J);
315 const double w_detJ = w/detJ;
322 Q(0,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(0,0)+A(0,1)*A(0,1)+A(0,2)*A(0,2));
323 Q(1,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(1,0)+A(0,1)*A(1,1)+A(0,2)*A(1,2));
324 Q(2,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(2,0)+A(0,1)*A(2,1)+A(0,2)*A(2,2));
325 Q(3,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(1,0)+A(1,1)*A(1,1)+A(1,2)*A(1,2));
326 Q(4,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(2,0)+A(1,1)*A(2,1)+A(1,2)*A(2,2));
327 Q(5,iqz,iqy,iqx) = w_detJ*(A(2,0)*A(2,0)+A(2,1)*A(2,1)+A(2,2)*A(2,2));
328 Q(6,iqz,iqy,iqx) = w*detJ;
334 for (
int iqx=0; iqx<2; ++iqx)
337 for (
int jz=0; jz<2; ++jz)
342 for (
int iz=jz; iz<2; ++iz)
345 for (
int iqy=0; iqy<2; ++iqy)
348 for (
int iqz=0; iqz<2; ++iqz)
350 const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
351 const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
353 const double biz = (iz == iqz) ? 1.0 : 0.0;
354 const double giz = (iz == 0) ? -1.0 : 1.0;
356 const double bjz = (jz == iqz) ? 1.0 : 0.0;
357 const double gjz = (jz == 0) ? -1.0 : 1.0;
359 const double J11 = Q(0,iqz,iqy,iqx);
360 const double J21 = Q(1,iqz,iqy,iqx);
361 const double J31 = Q(2,iqz,iqy,iqx);
362 const double J12 = J21;
363 const double J22 = Q(3,iqz,iqy,iqx);
364 const double J32 = Q(4,iqz,iqy,iqx);
365 const double J13 = J31;
366 const double J23 = J32;
367 const double J33 = Q(5,iqz,iqy,iqx);
369 grad_A(0,0,iqy,iz,jz,iqx) += dq*J11*biz*bjz;
370 grad_A(1,0,iqy,iz,jz,iqx) += dq*J21*biz*bjz;
371 grad_A(2,0,iqy,iz,jz,iqx) += dq*J31*giz*bjz;
372 grad_A(0,1,iqy,iz,jz,iqx) += dq*J12*biz*bjz;
373 grad_A(1,1,iqy,iz,jz,iqx) += dq*J22*biz*bjz;
374 grad_A(2,1,iqy,iz,jz,iqx) += dq*J32*giz*bjz;
375 grad_A(0,2,iqy,iz,jz,iqx) += dq*J13*biz*gjz;
376 grad_A(1,2,iqy,iz,jz,iqx) += dq*J23*biz*gjz;
377 grad_A(2,2,iqy,iz,jz,iqx) += dq*J33*giz*gjz;
379 double wdetJ = Q(6,iqz,iqy,iqx);
380 mass_A(iqy,iz,jz,iqx) += mq*wdetJ*biz*bjz;
383 for (
int jy=0; jy<2; ++jy)
386 for (
int iy=0; iy<2; ++iy)
388 const double biy = (iy == iqy) ? 1.0 : 0.0;
389 const double giy = (iy == 0) ? -1.0 : 1.0;
391 const double bjy = (jy == iqy) ? 1.0 : 0.0;
392 const double gjy = (jy == 0) ? -1.0 : 1.0;
394 grad_B(0,0,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(0,0,iqy,iz,jz,iqx);
395 grad_B(1,0,iy,jy,iz,jz,iqx) += giy*bjy*grad_A(1,0,iqy,iz,jz,iqx);
396 grad_B(2,0,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(2,0,iqy,iz,jz,iqx);
397 grad_B(0,1,iy,jy,iz,jz,iqx) += biy*gjy*grad_A(0,1,iqy,iz,jz,iqx);
398 grad_B(1,1,iy,jy,iz,jz,iqx) += giy*gjy*grad_A(1,1,iqy,iz,jz,iqx);
399 grad_B(2,1,iy,jy,iz,jz,iqx) += biy*gjy*grad_A(2,1,iqy,iz,jz,iqx);
400 grad_B(0,2,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(0,2,iqy,iz,jz,iqx);
401 grad_B(1,2,iy,jy,iz,jz,iqx) += giy*bjy*grad_A(1,2,iqy,iz,jz,iqx);
402 grad_B(2,2,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(2,2,iqy,iz,jz,iqx);
404 mass_B(iy,jy,iz,jz,iqx) += biy*bjy*mass_A(iqy,iz,jz,iqx);
409 for (
int jy=0; jy<2; ++jy)
412 for (
int jx=0; jx<2; ++jx)
415 for (
int iy=0; iy<2; ++iy)
418 for (
int ix=0; ix<2; ++ix)
420 const double bix = (ix == iqx) ? 1.0 : 0.0;
421 const double gix = (ix == 0) ? -1.0 : 1.0;
423 const double bjx = (jx == iqx) ? 1.0 : 0.0;
424 const double gjx = (jx == 0) ? -1.0 : 1.0;
426 int ii_loc = ix + 2*iy + 4*iz;
427 int jj_loc = jx + 2*jy + 4*jz;
431 if (jj_loc > ii_loc) {
continue; }
434 val += gix*gjx*grad_B(0,0,iy,jy,iz,jz,iqx);
435 val += bix*gjx*grad_B(1,0,iy,jy,iz,jz,iqx);
436 val += bix*gjx*grad_B(2,0,iy,jy,iz,jz,iqx);
437 val += gix*bjx*grad_B(0,1,iy,jy,iz,jz,iqx);
438 val += bix*bjx*grad_B(1,1,iy,jy,iz,jz,iqx);
439 val += bix*bjx*grad_B(2,1,iy,jy,iz,jz,iqx);
440 val += gix*bjx*grad_B(0,2,iy,jy,iz,jz,iqx);
441 val += bix*bjx*grad_B(2,2,iy,jy,iz,jz,iqx);
442 val += bix*bjx*grad_B(1,2,iy,jy,iz,jz,iqx);
444 val += bix*bjx*mass_B(iy,jy,iz,jz,iqx);
446 local_mat(ii_loc, jj_loc) += val;
458 for (
int ii_loc=0; ii_loc<nv; ++ii_loc)
460 const int ix = ii_loc%2;
461 const int iy = (ii_loc/2)%2;
462 const int iz = ii_loc/2/2;
464 for (
int jj_loc=0; jj_loc<nv; ++jj_loc)
466 const int jx = jj_loc%2;
467 const int jy = (jj_loc/2)%2;
468 const int jz = jj_loc/2/2;
469 const int jj_off = (jx-ix+1) + 3*(jy-iy+1) + 9*(jz-iz+1);
471 if (jj_loc <= ii_loc)
473 AtomicAdd(V(jj_off, ix+kx, iy+ky, iz+kz, iel_ho), local_mat(ii_loc, jj_loc));
477 AtomicAdd(V(jj_off, ix+kx, iy+ky, iz+kz, iel_ho), local_mat(jj_loc, ii_loc));
489 for (
int iz=0; iz<nd1d; ++iz)
491 const int jz_begin = (iz > 0) ? iz - 1 : 0;
492 const int jz_end = (iz < ORDER) ? iz + 1 : ORDER;
493 for (
int iy=0; iy<nd1d; ++iy)
495 const int jy_begin = (iy > 0) ? iy - 1 : 0;
496 const int jy_end = (iy < ORDER) ? iy + 1 : ORDER;
497 for (
int ix=0; ix<nd1d; ++ix)
499 const int jx_begin = (ix > 0) ? ix - 1 : 0;
500 const int jx_end = (ix < ORDER) ? ix + 1 : ORDER;
502 const int ii_el = ix + nd1d*(iy + nd1d*iz);
504 for (
int jz=jz_begin; jz<=jz_end; ++jz)
506 for (
int jy=jy_begin; jy<=jy_end; ++jy)
508 for (
int jx=jx_begin; jx<=jx_end; ++jx)
510 const int jj_off = (jx-ix+1) + 3*(jy-iy+1) + 9*(jz-iz+1);
511 const int jj_el = jx + nd1d*(jy + nd1d*jz);
512 map(jj_off, ii_el) = jj_el;
522 template void BatchedLOR_H1::Assemble2D<1>();
523 template void BatchedLOR_H1::Assemble2D<2>();
524 template void BatchedLOR_H1::Assemble2D<3>();
525 template void BatchedLOR_H1::Assemble2D<4>();
526 template void BatchedLOR_H1::Assemble2D<5>();
527 template void BatchedLOR_H1::Assemble2D<6>();
528 template void BatchedLOR_H1::Assemble2D<7>();
529 template void BatchedLOR_H1::Assemble2D<8>();
531 template void BatchedLOR_H1::Assemble3D<1>();
532 template void BatchedLOR_H1::Assemble3D<2>();
533 template void BatchedLOR_H1::Assemble3D<3>();
534 template void BatchedLOR_H1::Assemble3D<4>();
535 template void BatchedLOR_H1::Assemble3D<5>();
536 template void BatchedLOR_H1::Assemble3D<6>();
537 template void BatchedLOR_H1::Assemble3D<7>();
538 template void BatchedLOR_H1::Assemble3D<8>();
547 ProjectLORCoefficient<MassIntegrator>(
a,
c1);
548 ProjectLORCoefficient<DiffusionIntegrator>(
a,
c2);
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Abstract base class for the batched LOR assembly kernels.
Array< int > & sparse_mapping
Local element sparsity pattern.
void SetSize(int s)
Resize the vector to size s.
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
int Size() const
Returns the size of the vector.
CoefficientVector c2
Coefficient of second integrator.
int GetNE() const
Returns number of elements in the mesh.
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Vector & sparse_ij
Local element sparsity matrix data.
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
MFEM_HOST_DEVICE void Jacobian3D(const double x, const double y, const double z, const double vx[8], const double vy[8], const double vz[8], DeviceMatrix &J)
FiniteElementSpace & fes_ho
The associated high-order space.
A basic generic Tensor class, appropriate for use on the GPU.
MFEM_HOST_DEVICE void Jacobian2D(const double x, const double y, const double vx[4], const double vy[4], DeviceMatrix &J)
Vector & X_vert
Mesh coordinate vector.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
BatchedLOR_H1(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array< int > &sparse_mapping_)
MFEM_HOST_DEVICE void Adjugate3D(const DeviceMatrix &J, DeviceMatrix &A)
CoefficientVector c1
Coefficient of first integrator.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.