14 #include "../../linalg/dtensor.hpp"
15 #include "../../general/forall.hpp"
25 static constexpr
int nv = 4;
26 static constexpr
int ne = 4;
27 static constexpr
int dim = 2;
28 static constexpr
int ddm2 = (dim*(dim+1))/2;
29 static constexpr
int ngeom = ddm2 + 1;
30 static constexpr
int o = ORDER;
31 static constexpr
int op1 = ORDER + 1;
32 static constexpr
int ndof_per_el = dim*o*op1;
33 static constexpr
int nnz_per_row = 7;
34 static constexpr
int sz_local_mat = ne*ne;
36 const bool const_mq =
c1.
Size() == 1;
37 const auto MQ = const_mq
40 const bool const_dq =
c2.
Size() == 1;
41 const auto DQ = const_dq
50 MFEM_FORALL_2D(iel_ho, nel_ho, ORDER, ORDER, 1,
56 MFEM_FOREACH_THREAD(iy,y,o)
58 MFEM_FOREACH_THREAD(ix,x,op1)
60 for (
int c=0; c<2; ++c)
62 for (
int j=0; j<nnz_per_row; ++j)
64 V(j,ix+iy*op1,c,iel_ho) = 0.0;
72 MFEM_FOREACH_THREAD(ky,y,ORDER)
74 MFEM_FOREACH_THREAD(kx,x,ORDER)
78 double local_mat_[sz_local_mat];
84 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
87 LORVertexCoordinates2D<ORDER>(X, iel_ho, kx, ky, vx, vy);
89 for (
int iqx=0; iqx<2; ++iqx)
91 for (
int iqy=0; iqy<2; ++iqy)
95 const double w = 1.0/4.0;
102 const double detJ =
Det2D(J);
103 const double w_detJ = w/detJ;
105 Q(0,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1));
106 Q(1,iqy,iqx) = -w_detJ * (J(0,1)*J(0,0) + J(1,1)*J(1,0));
107 Q(2,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0));
108 Q(3,iqy,iqx) = w_detJ;
111 for (
int iqx=0; iqx<2; ++iqx)
113 for (
int iqy=0; iqy<2; ++iqy)
115 const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
116 const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
118 for (
int cj=0; cj<
dim; ++cj)
120 for (
int bj=0; bj<2; ++bj)
122 const double curl_j = ((cj == 0) ? 1 : -1)*((bj == 0) ? 1 : -1);
123 const double bxj = (cj == 0) ? ((bj == iqy) ? 1 : 0) : 0;
124 const double byj = (cj == 1) ? ((bj == iqx) ? 1 : 0) : 0;
126 const double jj_loc = bj + 2*cj;
128 for (
int ci=0; ci<
dim; ++ci)
130 for (
int bi=0; bi<2; ++bi)
132 const double curl_i = ((ci == 0) ? 1 : -1)*((bi == 0) ? 1 : -1);
133 const double bxi = (ci == 0) ? ((bi == iqy) ? 1 : 0) : 0;
134 const double byi = (ci == 1) ? ((bi == iqx) ? 1 : 0) : 0;
136 const double ii_loc = bi + 2*ci;
140 if (jj_loc > ii_loc) {
continue; }
143 val += bxi*bxj*Q(0,iqy,iqx);
144 val += byi*bxj*Q(1,iqy,iqx);
145 val += bxi*byj*Q(1,iqy,iqx);
146 val += byi*byj*Q(2,iqy,iqx);
148 val += dq*curl_i*curl_j*Q(3,iqy,iqx);
150 local_mat(ii_loc, jj_loc) += val;
160 for (
int ii_loc=0; ii_loc<ne; ++ii_loc)
162 const int ci = ii_loc/2;
163 const int bi = ii_loc%2;
164 const int ix = (ci == 0) ? 0 : bi;
165 const int iy = (ci == 1) ? 0 : bi;
167 int ii = (ci == 0) ? kx+ix + (ky+iy)*o : kx+ix + (ky+iy)*op1;
169 for (
int jj_loc=0; jj_loc<ne; ++jj_loc)
171 const int cj = jj_loc/2;
172 const int bj = jj_loc%2;
174 const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + bj + (1-bi)*2);
177 const double val = (jj_loc <= ii_loc)
178 ? local_mat(ii_loc, jj_loc)
179 : local_mat(jj_loc, ii_loc);
180 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
190 for (
int ci=0; ci<2; ++ci)
192 for (
int i1=0; i1<o; ++i1)
194 for (
int i2=0; i2<op1; ++i2)
196 const int ii_el = (ci == 0) ? i1 + i2*o : i2 + i1*op1 + o*op1;
197 for (
int cj=0; cj<2; ++cj)
199 const int j1_begin = (ci == cj) ? i1 : ((i2 > 0) ? i2-1 : i2);
200 const int j1_end = (ci == cj) ? i1 : ((i2 < o) ? i2 : i2-1);
201 const int j2_begin = (ci == cj) ? ((i2 > 0) ? i2-1 : i2) : i1;
202 const int j2_end = (ci == cj) ? ((i2 < o) ? i2+1 : i2) : i1+1;
204 for (
int j1=j1_begin; j1<=j1_end; ++j1)
206 for (
int j2=j2_begin; j2<=j2_end; ++j2)
208 const int jj_el = (cj == 0) ? j1 + j2*o : j2 + j1*op1 + o*op1;
209 int jj_off = (ci == cj) ? (j2-i2+1) : 3 + (j2-i1) + 2*(j1-i2+1);
210 map(jj_off, ii_el) = jj_el;
224 static constexpr
int nv = 8;
225 static constexpr
int ne = 12;
226 static constexpr
int dim = 3;
227 static constexpr
int ddm2 = (dim*(dim+1))/2;
228 static constexpr
int ngeom = 2*ddm2;
229 static constexpr
int o = ORDER;
230 static constexpr
int op1 = ORDER + 1;
231 static constexpr
int ndof_per_el = dim*o*op1*op1;
232 static constexpr
int nnz_per_row = 33;
233 static constexpr
int sz_local_mat = ne*ne;
235 const bool const_mq =
c1.
Size() == 1;
236 const auto MQ = const_mq
239 const bool const_dq =
c2.
Size() == 1;
240 const auto DQ = const_dq
250 MFEM_FORALL_3D(iel_ho, nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
252 MFEM_FOREACH_THREAD(iz,z,o)
254 MFEM_FOREACH_THREAD(iy,y,op1)
256 MFEM_FOREACH_THREAD(ix,x,op1)
258 for (
int c=0; c<
dim; ++c)
260 for (
int j=0; j<nnz_per_row; ++j)
262 V(j,ix+iy*op1+iz*op1*op1,c,iel_ho) = 0.0;
271 MFEM_FOREACH_THREAD(kz,z,ORDER)
273 MFEM_FOREACH_THREAD(ky,y,ORDER)
275 MFEM_FOREACH_THREAD(kx,x,ORDER)
281 double local_mat_[sz_local_mat];
283 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
285 double vx[8], vy[8], vz[8];
286 LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
288 for (
int iqz=0; iqz<2; ++iqz)
290 for (
int iqy=0; iqy<2; ++iqy)
292 for (
int iqx=0; iqx<2; ++iqx)
294 const double x = iqx;
295 const double y = iqy;
296 const double z = iqz;
297 const double w = 1.0/8.0;
304 const double detJ =
Det3D(J);
305 const double w_detJ = w/detJ;
312 Q(0,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(0,0)+A(0,1)*A(0,1)+A(0,2)*A(0,2));
313 Q(1,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(1,0)+A(0,1)*A(1,1)+A(0,2)*A(1,2));
314 Q(2,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(2,0)+A(0,1)*A(2,1)+A(0,2)*A(2,2));
315 Q(3,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(1,0)+A(1,1)*A(1,1)+A(1,2)*A(1,2));
316 Q(4,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(2,0)+A(1,1)*A(2,1)+A(1,2)*A(2,2));
317 Q(5,iqz,iqy,iqx) = w_detJ*(A(2,0)*A(2,0)+A(2,1)*A(2,1)+A(2,2)*A(2,2));
320 Q(6,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0));
321 Q(7,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,1)+J(1,0)*J(1,1)+J(2,0)*J(2,1));
322 Q(8,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,2)+J(1,0)*J(1,2)+J(2,0)*J(2,2));
323 Q(9,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1));
324 Q(10,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,2)+J(1,1)*J(1,2)+J(2,1)*J(2,2));
325 Q(11,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2));
329 for (
int iqz=0; iqz<2; ++iqz)
331 for (
int iqy=0; iqy<2; ++iqy)
333 for (
int iqx=0; iqx<2; ++iqx)
335 const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
336 const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
338 for (
int cj=0; cj<
dim; ++cj)
340 const double jq1 = (cj == 0) ? iqy : ((cj == 1) ? iqz : iqx);
341 const double jq2 = (cj == 0) ? iqz : ((cj == 1) ? iqx : iqy);
344 const int jd_1 = (cj + 1)%3;
345 const int jd_2 = (cj + 2)%3;
347 for (
int bj=0; bj<4; ++bj)
349 const int bj1 = bj%2;
350 const int bj2 = bj/2;
354 curl_j[jd_1] = ((bj1 == 0) ? jq1 - 1 : -jq1)*((bj2 == 0) ? 1 : -1);
355 curl_j[jd_2] = ((bj2 == 0) ? 1 - jq2 : jq2)*((bj1 == 0) ? 1 : -1);
358 basis_j[jd_0] = ((bj1 == 0) ? 1 - jq1 : jq1)*((bj2 == 0) ? 1 - jq2 : jq2);
362 const int jj_loc = bj + 4*cj;
364 for (
int ci=0; ci<
dim; ++ci)
366 const double iq1 = (ci == 0) ? iqy : ((ci == 1) ? iqz : iqx);
367 const double iq2 = (ci == 0) ? iqz : ((ci == 1) ? iqx : iqy);
370 const int id_1 = (ci + 1)%3;
371 const int id_2 = (ci + 2)%3;
373 for (
int bi=0; bi<4; ++bi)
375 const int bi1 = bi%2;
376 const int bi2 = bi/2;
380 curl_i[id_1] = ((bi1 == 0) ? iq1 - 1 : -iq1)*((bi2 == 0) ? 1 : -1);
381 curl_i[id_2] = ((bi2 == 0) ? 1 - iq2 : iq2)*((bi1 == 0) ? 1 : -1);
384 basis_i[id_0] = ((bi1 == 0) ? 1 - iq1 : iq1)*((bi2 == 0) ? 1 - iq2 : iq2);
388 const int ii_loc = bi + 4*ci;
392 if (jj_loc > ii_loc) {
continue; }
394 double curl_curl = 0.0;
395 curl_curl += Q(6,iqz,iqy,iqx)*curl_i[0]*curl_j[0];
396 curl_curl += Q(7,iqz,iqy,iqx)*(curl_i[0]*curl_j[1] + curl_i[1]*curl_j[0]);
397 curl_curl += Q(8,iqz,iqy,iqx)*(curl_i[0]*curl_j[2] + curl_i[2]*curl_j[0]);
398 curl_curl += Q(9,iqz,iqy,iqx)*curl_i[1]*curl_j[1];
399 curl_curl += Q(10,iqz,iqy,iqx)*(curl_i[1]*curl_j[2] + curl_i[2]*curl_j[1]);
400 curl_curl += Q(11,iqz,iqy,iqx)*curl_i[2]*curl_j[2];
402 double basis_basis = 0.0;
403 basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
404 basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
405 basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
406 basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
407 basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
408 basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
410 const double val = dq*curl_curl + mq*basis_basis;
412 local_mat(ii_loc, jj_loc) += val;
431 for (
int ii_loc=0; ii_loc<ne; ++ii_loc)
433 const int ci = ii_loc/4;
434 const int bi = ii_loc%4;
437 const int id1 = (ci+1)%3;
438 const int id2 = (ci+2)%3;
449 const int nx = (ci == 0) ? o : op1;
450 const int ny = (ci == 1) ? o : op1;
452 const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
454 for (
int jj_loc=0; jj_loc<ne; ++jj_loc)
456 const int cj = jj_loc/4;
459 const int cj_rel = (3 + cj - ci)%3;
461 const int bj = jj_loc%4;
463 const int jd0 = cj_rel;
464 const int jd1 = (cj_rel+1)%3;
465 const int jd2 = (cj_rel+2)%3;
472 const int d0 = jj_rel[0] - i0;
473 const int d1 = 1 + jj_rel[1] - i1;
474 const int d2 = 1 + jj_rel[2] - i2;
476 if (cj_rel == 0) { jj_off = d1 + 3*d2; }
477 else if (cj_rel == 1) { jj_off = 9 + d0 + 2*d1 + 4*d2; }
478 else { jj_off = 21 + d0 + 2*d1 + 6*d2; }
481 const double val = (jj_loc <= ii_loc)
482 ? local_mat(ii_loc, jj_loc)
483 : local_mat(jj_loc, ii_loc);
484 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
495 for (
int ci=0; ci<
dim; ++ci)
497 const int i_off = ci*o*op1*op1;
499 const int id1 = (ci+1)%3;
500 const int id2 = (ci+2)%3;
502 const int nxi = (ci == 0) ? o : op1;
503 const int nyi = (ci == 1) ? o : op1;
505 for (
int i0=0; i0<o; ++i0)
507 for (
int i1=0; i1<op1; ++i1)
509 for (
int i2=0; i2<op1; ++i2)
515 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
517 for (
int cj_rel=0; cj_rel<
dim; ++cj_rel)
519 const int cj = (ci + cj_rel) % 3;
520 const int j_off = cj*o*op1*op1;
522 const int nxj = (cj == 0) ? o : op1;
523 const int nyj = (cj == 1) ? o : op1;
525 const int j0_begin = i0;
526 const int j0_end = (cj_rel == 0) ? i0 : i0 + 1;
527 const int j1_begin = (i1 > 0) ? i1-1 : i1;
528 const int j1_end = (cj_rel == 1)
529 ? ((i1 < o) ? i1 : i1-1)
530 : ((i1 < o) ? i1+1 : i1);
531 const int j2_begin = (i2 > 0) ? i2-1 : i2;
532 const int j2_end = (cj_rel == 2)
533 ? ((i2 < o) ? i2 : i2-1)
534 : ((i2 < o) ? i2+1 : i2);
536 for (
int j0=j0_begin; j0<=j0_end; ++j0)
538 const int d0 = j0 - i0;
539 for (
int j1=j1_begin; j1<=j1_end; ++j1)
541 const int d1 = j1 - i1 + 1;
542 for (
int j2=j2_begin; j2<=j2_end; ++j2)
544 const int d2 = j2 - i2 + 1;
549 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
551 if (cj_rel == 0) { jj_off = d1 + 3*d2; }
552 else if (cj_rel == 1) { jj_off = 9 + d0 + 2*d1 + 4*d2; }
553 else { jj_off = 21 + d0 + 2*d1 + 6*d2; }
554 map(jj_off, ii_el) = jj_el;
566 template void BatchedLOR_ND::Assemble2D<1>();
567 template void BatchedLOR_ND::Assemble2D<2>();
568 template void BatchedLOR_ND::Assemble2D<3>();
569 template void BatchedLOR_ND::Assemble2D<4>();
570 template void BatchedLOR_ND::Assemble2D<5>();
571 template void BatchedLOR_ND::Assemble2D<6>();
572 template void BatchedLOR_ND::Assemble2D<7>();
573 template void BatchedLOR_ND::Assemble2D<8>();
575 template void BatchedLOR_ND::Assemble3D<1>();
576 template void BatchedLOR_ND::Assemble3D<2>();
577 template void BatchedLOR_ND::Assemble3D<3>();
578 template void BatchedLOR_ND::Assemble3D<4>();
579 template void BatchedLOR_ND::Assemble3D<5>();
580 template void BatchedLOR_ND::Assemble3D<6>();
581 template void BatchedLOR_ND::Assemble3D<7>();
582 template void BatchedLOR_ND::Assemble3D<8>();
591 ProjectLORCoefficient<VectorFEMassIntegrator>(
a,
c1);
592 ProjectLORCoefficient<CurlCurlIntegrator>(
a,
c2);
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Abstract base class for the batched LOR assembly kernels.
BatchedLOR_ND(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array< int > &sparse_mapping_)
Array< int > & sparse_mapping
Local element sparsity pattern.
void SetSize(int s)
Resize the vector to size s.
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
int Size() const
Returns the size of the vector.
CoefficientVector c2
Coefficient of second integrator.
int GetNE() const
Returns number of elements in the mesh.
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Vector & sparse_ij
Local element sparsity matrix data.
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
MFEM_HOST_DEVICE void Jacobian3D(const double x, const double y, const double z, const double vx[8], const double vy[8], const double vz[8], DeviceMatrix &J)
FiniteElementSpace & fes_ho
The associated high-order space.
A basic generic Tensor class, appropriate for use on the GPU.
MFEM_HOST_DEVICE void Jacobian2D(const double x, const double y, const double vx[4], const double vy[4], DeviceMatrix &J)
Vector & X_vert
Mesh coordinate vector.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
MFEM_HOST_DEVICE void Adjugate3D(const DeviceMatrix &J, DeviceMatrix &A)
CoefficientVector c1
Coefficient of first integrator.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.