24 static constexpr int nv = 4;
25 static constexpr int ne = 4;
26 static constexpr int dim = 2;
27 static constexpr int ddm2 = (
dim*(
dim+1))/2;
28 static constexpr int ngeom = ddm2 + 1;
29 static constexpr int o = ORDER;
30 static constexpr int op1 = ORDER + 1;
31 static constexpr int ndof_per_el =
dim*o*op1;
32 static constexpr int nnz_per_row = 7;
33 static constexpr int sz_local_mat = ne*ne;
35 const bool const_mq =
c1.
Size() == 1;
36 const auto MQ = const_mq
39 const bool const_dq =
c2.
Size() == 1;
40 const auto DQ = const_dq
51 MFEM_FOREACH_THREAD(iy,y,o)
53 MFEM_FOREACH_THREAD(ix,x,op1)
55 for (
int c=0; c<2; ++c)
57 for (
int j=0; j<nnz_per_row; ++j)
59 V(j,ix+iy*op1,c,iel_ho) = 0.0;
67 MFEM_FOREACH_THREAD(ky,y,ORDER)
69 MFEM_FOREACH_THREAD(kx,x,ORDER)
73 real_t local_mat_[sz_local_mat];
79 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
83 for (
int iqx=0; iqx<2; ++iqx)
85 for (
int iqy=0; iqy<2; ++iqy)
87 const real_t mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
88 const real_t dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
90 for (
int cj=0; cj<
dim; ++cj)
92 for (
int bj=0; bj<2; ++bj)
94 const real_t bxj = (cj == 0 && bj == iqx) ? 1 : 0;
95 const real_t byj = (cj == 1 && bj == iqy) ? 1 : 0;
96 const real_t div_j = (bj == 0) ? -1 : 1;
98 const real_t jj_loc = bj + 2*cj;
100 for (
int ci=0; ci<
dim; ++ci)
102 for (
int bi=0; bi<2; ++bi)
104 const real_t bxi = (ci == 0 && bi == iqx) ? 1 : 0;
105 const real_t byi = (ci == 1 && bi == iqy) ? 1 : 0;
106 const real_t div_i = (bi == 0) ? -1 : 1;
108 const real_t ii_loc = bi + 2*ci;
112 if (jj_loc > ii_loc) {
continue; }
115 val += bxi*bxj*Q(0,iqy,iqx);
116 val += byi*bxj*Q(1,iqy,iqx);
117 val += bxi*byj*Q(1,iqy,iqx);
118 val += byi*byj*Q(2,iqy,iqx);
120 val += dq*div_j*div_i*Q(3,iqy,iqx);
122 local_mat(ii_loc, jj_loc) += val;
132 for (
int ii_loc=0; ii_loc<ne; ++ii_loc)
134 const int ci = ii_loc/2;
135 const int bi = ii_loc%2;
136 const int ix = (ci == 0) ? bi : 0;
137 const int iy = (ci == 1) ? bi : 0;
139 int ii = kx+ix + (ky+iy)*((ci == 0) ? op1 : o);
141 for (
int jj_loc=0; jj_loc<ne; ++jj_loc)
143 const int cj = jj_loc/2;
144 const int bj = jj_loc%2;
146 const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + 1-bi + 2*bj);
149 const real_t val = (jj_loc <= ii_loc)
150 ? local_mat(ii_loc, jj_loc)
151 : local_mat(jj_loc, ii_loc);
152 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
162 for (
int ci=0; ci<2; ++ci)
164 const int i_off = (ci == 0) ? 0 : o*op1;
166 const int id1 = (ci+1)%2;
168 const int nxi = (ci == 0) ? op1 : o;
170 for (
int i0=0; i0<op1; ++i0)
172 for (
int i1=0; i1<o; ++i1)
177 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi;
179 for (
int cj_rel=0; cj_rel<2; ++cj_rel)
181 const int cj = (ci + cj_rel) % 2;
182 const int j_off = (cj == 0) ? 0 : o*op1;
183 const int nxj = (cj == 0) ? op1 : o;
185 const int j0_begin = (i0 > 0) ? i0-1 : i0;
186 const int j0_end = (cj_rel == 0)
187 ? ((i0 < o) ? i0+1 : i0)
188 : ((i0 < o) ? i0 : i0-1);
189 const int j1_begin = i1;
190 const int j1_end = (cj_rel == 0) ? i1 : i1+1;
192 for (
int j0=j0_begin; j0<=j0_end; ++j0)
194 const int d0 = 1 + j0 - i0;
195 for (
int j1=j1_begin; j1<=j1_end; ++j1)
197 const int d1 = j1 - i1;
201 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj;
202 const int jj_off = (cj_rel == 0) ? d0 : 3 + d0 + 2*d1;
203 map(jj_off, ii_el) = jj_el;
217 static constexpr int nv = 8;
218 static constexpr int nf = 6;
219 static constexpr int dim = 3;
220 static constexpr int ddm2 = (
dim*(
dim+1))/2;
221 static constexpr int ngeom = ddm2 + 1;
222 static constexpr int o = ORDER;
223 static constexpr int op1 = ORDER + 1;
224 static constexpr int ndof_per_el =
dim*o*o*op1;
225 static constexpr int nnz_per_row = 11;
226 static constexpr int sz_local_mat = nf*nf;
228 const bool const_mq =
c1.
Size() == 1;
229 const auto MQ = const_mq
232 const bool const_dq =
c2.
Size() == 1;
233 const auto DQ = const_dq
244 [=] MFEM_HOST_DEVICE (
int iel_ho)
246 MFEM_FOREACH_THREAD(iz,z,o)
248 MFEM_FOREACH_THREAD(iy,y,o)
250 MFEM_FOREACH_THREAD(ix,x,op1)
252 for (
int c=0; c<
dim; ++c)
254 for (
int j=0; j<nnz_per_row; ++j)
256 V(j,ix+iy*op1+iz*o*op1,c,iel_ho) = 0.0;
265 MFEM_FOREACH_THREAD(kz,z,ORDER)
267 MFEM_FOREACH_THREAD(ky,y,ORDER)
269 MFEM_FOREACH_THREAD(kx,x,ORDER)
275 real_t local_mat_[sz_local_mat];
277 for (
int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
279 real_t vx[8], vy[8], vz[8];
282 for (
int iqz=0; iqz<2; ++iqz)
284 for (
int iqy=0; iqy<2; ++iqy)
286 for (
int iqx=0; iqx<2; ++iqx)
299 const real_t w_detJ = w/detJ;
301 Q(0,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0));
302 Q(1,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,0)+J(1,1)*J(1,0)+J(2,1)*J(2,0));
303 Q(2,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,0)+J(1,2)*J(1,0)+J(2,2)*J(2,0));
304 Q(3,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1));
305 Q(4,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,1)+J(1,2)*J(1,1)+J(2,2)*J(2,1));
306 Q(5,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2));
307 Q(6,iqz,iqy,iqx) = w_detJ;
311 for (
int iqz=0; iqz<2; ++iqz)
313 for (
int iqy=0; iqy<2; ++iqy)
315 for (
int iqx=0; iqx<2; ++iqx)
317 const real_t mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
318 const real_t dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
320 for (
int cj=0; cj<
dim; ++cj)
322 const real_t jq0 = (cj == 0) ? iqx : ((cj == 1) ? iqy : iqz);
325 const int jd_1 = (cj + 1)%3;
326 const int jd_2 = (cj + 2)%3;
328 for (
int bj=0; bj<2; ++bj)
330 const real_t div_j = (bj == 0) ? -1 : 1;
333 basis_j[jd_0] = (bj == jq0) ? 1 : 0;
337 const int jj_loc = bj + 2*cj;
339 for (
int ci=0; ci<
dim; ++ci)
341 const real_t iq0 = (ci == 0) ? iqx : ((ci == 1) ? iqy : iqz);
344 const int id_1 = (ci + 1)%3;
345 const int id_2 = (ci + 2)%3;
347 for (
int bi=0; bi<2; ++bi)
349 const real_t div_i = (bi == 0) ? -1 : 1;
352 basis_i[id_0] = (bi == iq0) ? 1 : 0;
356 const int ii_loc = bi + 2*ci;
360 if (jj_loc > ii_loc) {
continue; }
362 const real_t div_div = Q(6,iqz,iqy,iqx)*div_i*div_j;
365 basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
366 basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
367 basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
368 basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
369 basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
370 basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
372 const real_t val = dq*div_div + mq*basis_basis;
375 local_mat(ii_loc, jj_loc) += val;
394 for (
int ii_loc=0; ii_loc<nf; ++ii_loc)
396 const int ci = ii_loc/2;
397 const int bi = ii_loc%2;
400 const int id1 = (ci+1)%3;
401 const int id2 = (ci+2)%3;
412 const int nx = (ci == 0) ? op1 : o;
413 const int ny = (ci == 1) ? op1 : o;
415 const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
417 for (
int jj_loc=0; jj_loc<nf; ++jj_loc)
419 const int cj = jj_loc/2;
422 const int cj_rel = (3 + cj - ci)%3;
424 const int bj = jj_loc%2;
426 const int jd0 = cj_rel;
427 const int jd1 = (cj_rel+1)%3;
428 const int jd2 = (cj_rel+2)%3;
435 const int d0 = jj_rel[0] - i0 + 1;
436 const int d1 = jj_rel[1] - i1;
437 const int d2 = jj_rel[2] - i2;
439 if (cj_rel == 0) { jj_off = d0; }
440 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
441 else { jj_off = 7 + d0 + 2*d2; }
444 const real_t val = (jj_loc <= ii_loc)
445 ? local_mat(ii_loc, jj_loc)
446 : local_mat(jj_loc, ii_loc);
447 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
458 for (
int ci=0; ci<
dim; ++ci)
460 const int i_off = ci*o*o*op1;
462 const int id1 = (ci+1)%3;
463 const int id2 = (ci+2)%3;
465 const int nxi = (ci == 0) ? op1 : o;
466 const int nyi = (ci == 1) ? op1 : o;
468 for (
int i0=0; i0<op1; ++i0)
470 for (
int i1=0; i1<o; ++i1)
472 for (
int i2=0; i2<o; ++i2)
478 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
480 for (
int cj_rel=0; cj_rel<
dim; ++cj_rel)
482 const int cj = (ci + cj_rel) % 3;
483 const int j_off = cj*o*o*op1;
485 const int nxj = (cj == 0) ? op1 : o;
486 const int nyj = (cj == 1) ? op1 : o;
488 const int j0_begin = (i0 > 0) ? i0-1 : i0;
489 const int j0_end = (cj_rel == 0)
490 ? ((i0 < o) ? i0+1 : i0)
491 : ((i0 < o) ? i0 : i0-1);
492 const int j1_begin = i1;
493 const int j1_end = (cj_rel == 1) ? i1+1 : i1;
494 const int j2_begin = i2;
495 const int j2_end = (cj_rel == 2) ? i2+1 : i2;
497 for (
int j0=j0_begin; j0<=j0_end; ++j0)
499 const int d0 = 1 + j0 - i0;
500 for (
int j1=j1_begin; j1<=j1_end; ++j1)
502 const int d1 = j1 - i1;
503 for (
int j2=j2_begin; j2<=j2_end; ++j2)
505 const int d2 = j2 - i2;
510 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
512 if (cj_rel == 0) { jj_off = d0; }
513 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
514 else { jj_off = 7 + d0 + 2*d2; }
515 map(jj_off, ii_el) = jj_el;