12 #include "../general/forall.hpp"
24 static void PADivergenceSetup2D(
const int Q1D,
26 const Array<double> &w,
31 const int NQ = Q1D*Q1D;
33 auto J =
Reshape(j.Read(), NQ, 2, 2, NE);
34 auto y =
Reshape(op.Write(), NQ, 2, 2, NE);
38 for (
int q = 0; q < NQ; ++q)
40 const double J11 = J(q,0,0,e);
41 const double J12 = J(q,0,1,e);
42 const double J21 = J(q,1,0,e);
43 const double J22 = J(q,1,1,e);
45 y(q,0,0,e) = W[q] * COEFF * J22;
46 y(q,0,1,e) = W[q] * COEFF * -J12;
47 y(q,1,0,e) = W[q] * COEFF * -J21;
48 y(q,1,1,e) = W[q] * COEFF * J11;
54 static void PADivergenceSetup3D(
const int Q1D,
56 const Array<double> &w,
61 const int NQ = Q1D*Q1D*Q1D;
63 auto J =
Reshape(j.Read(), NQ, 3, 3, NE);
64 auto y =
Reshape(op.Write(), NQ, 3, 3, NE);
67 for (
int q = 0; q < NQ; ++q)
69 const double J11 = J(q,0,0,e);
70 const double J21 = J(q,1,0,e);
71 const double J31 = J(q,2,0,e);
72 const double J12 = J(q,0,1,e);
73 const double J22 = J(q,1,1,e);
74 const double J32 = J(q,2,1,e);
75 const double J13 = J(q,0,2,e);
76 const double J23 = J(q,1,2,e);
77 const double J33 = J(q,2,2,e);
78 const double cw = W[q] * COEFF;
80 const double A11 = (J22 * J33) - (J23 * J32);
81 const double A12 = (J32 * J13) - (J12 * J33);
82 const double A13 = (J12 * J23) - (J22 * J13);
83 const double A21 = (J31 * J23) - (J21 * J33);
84 const double A22 = (J11 * J33) - (J13 * J31);
85 const double A23 = (J21 * J13) - (J11 * J23);
86 const double A31 = (J21 * J32) - (J31 * J22);
87 const double A32 = (J31 * J12) - (J11 * J32);
88 const double A33 = (J11 * J22) - (J12 * J21);
90 y(q,0,0,e) = cw * A11;
91 y(q,0,1,e) = cw * A12;
92 y(q,0,2,e) = cw * A13;
93 y(q,1,0,e) = cw * A21;
94 y(q,1,1,e) = cw * A22;
95 y(q,1,2,e) = cw * A23;
96 y(q,2,0,e) = cw * A31;
97 y(q,2,1,e) = cw * A32;
98 y(q,2,2,e) = cw * A33;
103 static void PADivergenceSetup(
const int dim,
108 const Array<double> &W,
113 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADivergenceSetup"); }
116 PADivergenceSetup2D(Q1D, NE, W, J, COEFF, op);
120 PADivergenceSetup3D(Q1D, NE, W, J, COEFF, op);
128 MFEM_ASSERT(trial_fes.
GetOrdering() == Ordering::byNODES,
129 "PA Only supports Ordering::byNODES!");
136 const int dims = trial_fe.
GetDim();
137 const int dimsToStore = dims * dims;
139 dim = mesh->Dimension();
140 ne = trial_fes.
GetNE();
141 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
142 trial_maps = &trial_fe.
GetDofToQuad(*ir, DofToQuad::TENSOR);
143 trial_dofs1D = trial_maps->
ndof;
144 quad1D = trial_maps->nqpt;
145 test_maps = &test_fe.
GetDofToQuad(*ir, DofToQuad::TENSOR);
146 test_dofs1D = test_maps->
ndof;
147 MFEM_ASSERT(quad1D == test_maps->nqpt,
148 "PA requires test and trial space to have same number of quadrature points!");
154 MFEM_VERIFY(cQ != NULL,
"only ConstantCoefficient is supported!");
157 PADivergenceSetup(dim, trial_dofs1D, test_dofs1D, quad1D,
158 ne, ir->
GetWeights(), geom->J, coeff, pa_data);
162 template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
163 static void PADivergenceApply2D(
const int NE,
170 const int tr_d1d = 0,
171 const int te_d1d = 0,
174 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
175 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
176 const int Q1D = T_Q1D ? T_Q1D : q1d;
177 MFEM_VERIFY(TR_D1D <=
MAX_D1D,
"");
178 MFEM_VERIFY(TE_D1D <=
MAX_D1D,
"");
179 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
184 auto x =
Reshape(x_.
Read(), TR_D1D, TR_D1D, 2, NE);
188 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
189 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
190 const int Q1D = T_Q1D ? T_Q1D : q1d;
193 constexpr
int max_TE_D1D = T_TE_D1D ? T_TE_D1D :
MAX_D1D;
194 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
196 double grad[max_Q1D][max_Q1D][VDIM];
197 double div[max_Q1D][max_Q1D];
198 for (
int qy = 0; qy < Q1D; ++qy)
200 for (
int qx = 0; qx < Q1D; ++qx)
206 for (
int c = 0; c < VDIM; ++c)
208 for (
int qy = 0; qy < Q1D; ++qy)
210 for (
int qx = 0; qx < Q1D; ++qx)
212 grad[qy][qx][0] = 0.0;
213 grad[qy][qx][1] = 0.0;
216 for (
int dy = 0; dy < TR_D1D; ++dy)
218 double gradX[max_Q1D][VDIM];
219 for (
int qx = 0; qx < Q1D; ++qx)
224 for (
int dx = 0; dx < TR_D1D; ++dx)
226 const double s = x(dx,dy,c,e);
227 for (
int qx = 0; qx < Q1D; ++qx)
229 gradX[qx][0] += s * G(qx,dx);
230 gradX[qx][1] += s * B(qx,dx);
233 for (
int qy = 0; qy < Q1D; ++qy)
235 const double wy = B(qy,dy);
236 const double wDy = G(qy,dy);
237 for (
int qx = 0; qx < Q1D; ++qx)
239 grad[qy][qx][0] += gradX[qx][0] * wy;
240 grad[qy][qx][1] += gradX[qx][1] * wDy;
245 for (
int qy = 0; qy < Q1D; ++qy)
247 for (
int qx = 0; qx < Q1D; ++qx)
249 const int q = qx + qy * Q1D;
250 const double gradX = grad[qy][qx][0];
251 const double gradY = grad[qy][qx][1];
253 div[qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e);
258 for (
int qy = 0; qy < Q1D; ++qy)
260 double opX[max_TE_D1D];
261 for (
int dx = 0; dx < TE_D1D; ++dx)
264 for (
int qx = 0; qx < Q1D; ++qx)
266 opX[dx] += Bt(dx,qx)*div[qy][qx];
269 for (
int dy = 0; dy < TE_D1D; ++dy)
271 for (
int dx = 0; dx < TE_D1D; ++dx)
273 y(dx,dy,e) += Bt(dy,qy)*opX[dx];
282 template<
const int T_TR_D1D = 0,
const int T_TE_D1D = 0,
const int T_Q1D = 0,
284 static void SmemPADivergenceApply2D(
const int NE,
285 const Array<double> &b_,
286 const Array<double> &g_,
287 const Array<double> &bt_,
291 const int tr_d1d = 0,
292 const int te_d1d = 0,
296 MFEM_ASSERT(
false,
"SHARED MEM NOT PROGRAMMED YET");
300 template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
301 static void PADivergenceApplyTranspose2D(
const int NE,
302 const Array<double> &bt,
303 const Array<double> >,
304 const Array<double> &b,
308 const int tr_d1d = 0,
309 const int te_d1d = 0,
312 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
313 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
314 const int Q1D = T_Q1D ? T_Q1D : q1d;
315 MFEM_VERIFY(TR_D1D <=
MAX_D1D,
"");
316 MFEM_VERIFY(TE_D1D <=
MAX_D1D,
"");
317 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
318 auto Bt =
Reshape(bt.Read(), TR_D1D, Q1D);
319 auto Gt =
Reshape(gt.Read(), TR_D1D, Q1D);
320 auto B =
Reshape(b.Read(), Q1D, TE_D1D);
321 auto op =
Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
322 auto x =
Reshape(x_.Read(), TE_D1D, TE_D1D, NE);
323 auto y =
Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, 2, NE);
326 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
327 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
328 const int Q1D = T_Q1D ? T_Q1D : q1d;
331 constexpr
int max_TR_D1D = T_TR_D1D ? T_TR_D1D :
MAX_D1D;
332 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
334 double quadTest[max_Q1D][max_Q1D];
335 double grad[max_Q1D][max_Q1D][VDIM];
336 for (
int qy = 0; qy < Q1D; ++qy)
338 for (
int qx = 0; qx < Q1D; ++qx)
340 quadTest[qy][qx] = 0.0;
343 for (
int dy = 0; dy < TE_D1D; ++dy)
345 double quadTestX[max_Q1D];
346 for (
int qx = 0; qx < Q1D; ++qx)
350 for (
int dx = 0; dx < TE_D1D; ++dx)
352 const double s = x(dx,dy,e);
353 for (
int qx = 0; qx < Q1D; ++qx)
355 quadTestX[qx] += s * B(qx,dx);
358 for (
int qy = 0; qy < Q1D; ++qy)
360 const double wy = B(qy,dy);
361 for (
int qx = 0; qx < Q1D; ++qx)
363 quadTest[qy][qx] += quadTestX[qx] * wy;
368 for (
int c = 0; c < VDIM; ++c)
370 for (
int qy = 0; qy < Q1D; ++qy)
372 for (
int qx = 0; qx < Q1D; ++qx)
374 const int q = qx + qy * Q1D;
375 grad[qy][qx][0] = quadTest[qy][qx]*op(q,0,c,e);
376 grad[qy][qx][1] = quadTest[qy][qx]*op(q,1,c,e);
380 for (
int qy = 0; qy < Q1D; ++qy)
382 double gradX[max_TR_D1D][VDIM];
383 for (
int dx = 0; dx < TR_D1D; ++dx)
388 for (
int qx = 0; qx < Q1D; ++qx)
390 const double gX = grad[qy][qx][0];
391 const double gY = grad[qy][qx][1];
392 for (
int dx = 0; dx < TR_D1D; ++dx)
394 const double wx = Bt(dx,qx);
395 const double wDx = Gt(dx,qx);
396 gradX[dx][0] += gX * wDx;
397 gradX[dx][1] += gY * wx;
400 for (
int dy = 0; dy < TR_D1D; ++dy)
402 const double wy = Bt(dy,qy);
403 const double wDy = Gt(dy,qy);
404 for (
int dx = 0; dx < TR_D1D; ++dx)
406 y(dx,dy,c,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
416 template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
417 static void PADivergenceApply3D(
const int NE,
418 const Array<double> &b,
419 const Array<double> &g,
420 const Array<double> &bt,
428 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
429 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
430 const int Q1D = T_Q1D ? T_Q1D : q1d;
431 MFEM_VERIFY(TR_D1D <=
MAX_D1D,
"");
432 MFEM_VERIFY(TE_D1D <=
MAX_D1D,
"");
433 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
434 auto B =
Reshape(b.Read(), Q1D, TR_D1D);
435 auto G =
Reshape(g.Read(), Q1D, TR_D1D);
436 auto Bt =
Reshape(bt.Read(), TE_D1D, Q1D);
437 auto op =
Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
438 auto x =
Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
439 auto y =
Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
442 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
443 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
444 const int Q1D = T_Q1D ? T_Q1D : q1d;
447 constexpr
int max_TE_D1D = T_TE_D1D ? T_TE_D1D :
MAX_D1D;
448 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
450 double grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
451 double div[max_Q1D][max_Q1D][max_Q1D];
452 for (
int qz = 0; qz < Q1D; ++qz)
454 for (
int qy = 0; qy < Q1D; ++qy)
456 for (
int qx = 0; qx < Q1D; ++qx)
458 div[qz][qy][qx] = 0.0;
463 for (
int c = 0; c < VDIM; ++c)
465 for (
int qz = 0; qz < Q1D; ++qz)
467 for (
int qy = 0; qy < Q1D; ++qy)
469 for (
int qx = 0; qx < Q1D; ++qx)
471 grad[qz][qy][qx][0] = 0.0;
472 grad[qz][qy][qx][1] = 0.0;
473 grad[qz][qy][qx][2] = 0.0;
477 for (
int dz = 0; dz < TR_D1D; ++dz)
479 double gradXY[max_Q1D][max_Q1D][VDIM];
480 for (
int qy = 0; qy < Q1D; ++qy)
482 for (
int qx = 0; qx < Q1D; ++qx)
484 gradXY[qy][qx][0] = 0.0;
485 gradXY[qy][qx][1] = 0.0;
486 gradXY[qy][qx][2] = 0.0;
489 for (
int dy = 0; dy < TR_D1D; ++dy)
491 double gradX[max_Q1D][VDIM];
492 for (
int qx = 0; qx < Q1D; ++qx)
498 for (
int dx = 0; dx < TR_D1D; ++dx)
500 const double s = x(dx,dy,dz,c,e);
501 for (
int qx = 0; qx < Q1D; ++qx)
503 gradX[qx][0] += s * G(qx,dx);
504 gradX[qx][1] += s * B(qx,dx);
505 gradX[qx][2] += s * B(qx,dx);
508 for (
int qy = 0; qy < Q1D; ++qy)
510 const double wy = B(qy,dy);
511 const double wDy = G(qy,dy);
512 for (
int qx = 0; qx < Q1D; ++qx)
514 gradXY[qy][qx][0] += gradX[qx][0] * wy;
515 gradXY[qy][qx][1] += gradX[qx][1] * wDy;
516 gradXY[qy][qx][2] += gradX[qx][2] * wy;
520 for (
int qz = 0; qz < Q1D; ++qz)
522 const double wz = B(qz,dz);
523 const double wDz = G(qz,dz);
524 for (
int qy = 0; qy < Q1D; ++qy)
526 for (
int qx = 0; qx < Q1D; ++qx)
528 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
529 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
530 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
536 for (
int qz = 0; qz < Q1D; ++qz)
538 for (
int qy = 0; qy < Q1D; ++qy)
540 for (
int qx = 0; qx < Q1D; ++qx)
542 const int q = qx + (qy + qz * Q1D) * Q1D;
543 const double gradX = grad[qz][qy][qx][0];
544 const double gradY = grad[qz][qy][qx][1];
545 const double gradZ = grad[qz][qy][qx][2];
547 div[qz][qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e) + gradZ*op(q,2,c,e);
554 for (
int qz = 0; qz < Q1D; ++qz)
556 double opXY[max_TE_D1D][max_TE_D1D];
557 for (
int dy = 0; dy < TE_D1D; ++dy)
559 for (
int dx = 0; dx < TE_D1D; ++dx)
564 for (
int qy = 0; qy < Q1D; ++qy)
566 double opX[max_TE_D1D];
567 for (
int dx = 0; dx < TE_D1D; ++dx)
570 for (
int qx = 0; qx < Q1D; ++qx)
572 opX[dx] += Bt(dx,qx)*div[qz][qy][qx];
575 for (
int dy = 0; dy < TE_D1D; ++dy)
577 for (
int dx = 0; dx < TE_D1D; ++dx)
579 opXY[dy][dx] += Bt(dy,qy)*opX[dx];
583 for (
int dz = 0; dz < TE_D1D; ++dz)
585 for (
int dy = 0; dy < TE_D1D; ++dy)
587 for (
int dx = 0; dx < TE_D1D; ++dx)
589 y(dx,dy,dz,e) += Bt(dz,qz)*opXY[dy][dx];
599 template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
600 static void PADivergenceApplyTranspose3D(
const int NE,
601 const Array<double> &bt,
602 const Array<double> >,
603 const Array<double> &b,
611 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
612 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
613 const int Q1D = T_Q1D ? T_Q1D : q1d;
614 MFEM_VERIFY(TR_D1D <=
MAX_D1D,
"");
615 MFEM_VERIFY(TE_D1D <=
MAX_D1D,
"");
616 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
617 auto Bt =
Reshape(bt.Read(), TR_D1D, Q1D);
618 auto Gt =
Reshape(gt.Read(), TR_D1D, Q1D);
619 auto B =
Reshape(b.Read(), Q1D, TE_D1D);
620 auto op =
Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
621 auto x =
Reshape(x_.Read(), TE_D1D, TE_D1D, TE_D1D, NE);
622 auto y =
Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
625 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
626 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
627 const int Q1D = T_Q1D ? T_Q1D : q1d;
630 constexpr
int max_TR_D1D = T_TR_D1D ? T_TR_D1D :
MAX_D1D;
631 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
633 double quadTest[max_Q1D][max_Q1D][max_Q1D];
634 double grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
635 for (
int qz = 0; qz < Q1D; ++qz)
637 for (
int qy = 0; qy < Q1D; ++qy)
639 for (
int qx = 0; qx < Q1D; ++qx)
641 quadTest[qz][qy][qx] = 0.0;
645 for (
int dz = 0; dz < TE_D1D; ++dz)
647 double quadTestXY[max_Q1D][max_Q1D];
648 for (
int qy = 0; qy < Q1D; ++qy)
650 for (
int qx = 0; qx < Q1D; ++qx)
652 quadTestXY[qy][qx] = 0.0;
655 for (
int dy = 0; dy < TE_D1D; ++dy)
657 double quadTestX[max_Q1D];
658 for (
int qx = 0; qx < Q1D; ++qx)
662 for (
int dx = 0; dx < TE_D1D; ++dx)
664 const double s = x(dx,dy,dz,e);
665 for (
int qx = 0; qx < Q1D; ++qx)
667 quadTestX[qx] += s * B(qx,dx);
670 for (
int qy = 0; qy < Q1D; ++qy)
672 const double wy = B(qy,dy);
673 for (
int qx = 0; qx < Q1D; ++qx)
675 quadTestXY[qy][qx] += quadTestX[qx] * wy;
679 for (
int qz = 0; qz < Q1D; ++qz)
681 const double wz = B(qz,dz);
682 for (
int qy = 0; qy < Q1D; ++qy)
684 for (
int qx = 0; qx < Q1D; ++qx)
686 quadTest[qz][qy][qx] += quadTestXY[qy][qx] * wz;
692 for (
int c = 0; c < VDIM; ++c)
694 for (
int qz = 0; qz < Q1D; ++qz)
696 for (
int qy = 0; qy < Q1D; ++qy)
698 for (
int qx = 0; qx < Q1D; ++qx)
700 const int q = qx + (qy + qz * Q1D) * Q1D;
701 grad[qz][qy][qx][0] = quadTest[qz][qy][qx]*op(q,0,c,e);
702 grad[qz][qy][qx][1] = quadTest[qz][qy][qx]*op(q,1,c,e);
703 grad[qz][qy][qx][2] = quadTest[qz][qy][qx]*op(q,2,c,e);
708 for (
int qz = 0; qz < Q1D; ++qz)
710 double gradXY[max_TR_D1D][max_TR_D1D][VDIM];
711 for (
int dy = 0; dy < TR_D1D; ++dy)
713 for (
int dx = 0; dx < TR_D1D; ++dx)
715 gradXY[dy][dx][0] = 0.0;
716 gradXY[dy][dx][1] = 0.0;
717 gradXY[dy][dx][2] = 0.0;
720 for (
int qy = 0; qy < Q1D; ++qy)
722 double gradX[max_TR_D1D][VDIM];
723 for (
int dx = 0; dx < TR_D1D; ++dx)
729 for (
int qx = 0; qx < Q1D; ++qx)
731 const double gX = grad[qz][qy][qx][0];
732 const double gY = grad[qz][qy][qx][1];
733 const double gZ = grad[qz][qy][qx][2];
734 for (
int dx = 0; dx < TR_D1D; ++dx)
736 const double wx = Bt(dx,qx);
737 const double wDx = Gt(dx,qx);
738 gradX[dx][0] += gX * wDx;
739 gradX[dx][1] += gY * wx;
740 gradX[dx][2] += gZ * wx;
743 for (
int dy = 0; dy < TR_D1D; ++dy)
745 const double wy = Bt(dy,qy);
746 const double wDy = Gt(dy,qy);
747 for (
int dx = 0; dx < TR_D1D; ++dx)
749 gradXY[dy][dx][0] += gradX[dx][0] * wy;
750 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
751 gradXY[dy][dx][2] += gradX[dx][2] * wy;
755 for (
int dz = 0; dz < TR_D1D; ++dz)
757 const double wz = Bt(dz,qz);
758 const double wDz = Gt(dz,qz);
759 for (
int dy = 0; dy < TR_D1D; ++dy)
761 for (
int dx = 0; dx < TR_D1D; ++dx)
764 ((gradXY[dy][dx][0] * wz) +
765 (gradXY[dy][dx][1] * wz) +
766 (gradXY[dy][dx][2] * wDz));
777 template<const
int T_TR_D1D = 0, const
int T_TE_D1D = 0, const
int T_Q1D = 0>
778 static void SmemPADivergenceApply3D(
const int NE,
779 const Array<double> &b_,
780 const Array<double> &g_,
781 const Array<double> &bt_,
785 const int tr_d1d = 0,
786 const int te_d1d = 0,
789 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
790 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
791 const int Q1D = T_Q1D ? T_Q1D : q1d;
793 MFEM_VERIFY(TR_D1D <=
MAX_D1D,
"");
794 MFEM_VERIFY(TE_D1D <=
MAX_D1D,
"");
795 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
797 auto b =
Reshape(b_.Read(), Q1D, TR_D1D);
798 auto g =
Reshape(g_.Read(), Q1D, TR_D1D);
799 auto bt =
Reshape(bt_.Read(), TE_D1D, Q1D);
800 auto Q =
Reshape(q_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
801 auto x =
Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
802 auto y =
Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
804 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
806 constexpr
int VDIM = 3;
807 const int tidz = MFEM_THREAD_ID(z);
808 const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
809 const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
810 const int Q1D = T_Q1D ? T_Q1D : q1d;
811 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
812 constexpr
int MD1R = T_TR_D1D ? T_TR_D1D :
MAX_D1D;
813 constexpr
int MD1E = T_TE_D1D ? T_TE_D1D :
MAX_D1D;
814 constexpr
int MD1 = MD1E > MD1R ? MD1E : MD1R;
815 constexpr
int MDQ = MQ1 > MD1 ? MQ1 : MD1;
816 MFEM_SHARED
double sBG[2][MQ1*MD1];
817 double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
818 double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
819 double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
820 MFEM_SHARED
double sm0[3][MDQ*MDQ*MDQ];
821 MFEM_SHARED
double sm1[3][MDQ*MDQ*MDQ];
822 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
823 double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
824 double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
825 double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
826 double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
827 double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
828 double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
829 double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
830 double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
831 double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
832 double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
833 MFEM_SHARED
double div[MQ1][MQ1][MQ1];
837 MFEM_FOREACH_THREAD(d,y,D1DR)
839 MFEM_FOREACH_THREAD(q,x,Q1D)
847 MFEM_FOREACH_THREAD(qz,z,Q1D)
849 MFEM_FOREACH_THREAD(qy,y,Q1D)
851 MFEM_FOREACH_THREAD(qx,x,Q1D)
853 div[qz][qy][qx] = 0.0;
859 for (
int c = 0; c < VDIM; ++c)
861 MFEM_FOREACH_THREAD(qz,z,Q1D)
863 MFEM_FOREACH_THREAD(qy,y,Q1D)
865 MFEM_FOREACH_THREAD(qx,x,Q1D)
867 QQQ0[qz][qy][qx] = 0.0;
868 QQQ1[qz][qy][qx] = 0.0;
869 QQQ2[qz][qy][qx] = 0.0;
874 MFEM_FOREACH_THREAD(dz,z,D1DR)
876 MFEM_FOREACH_THREAD(dy,y,D1DR)
878 MFEM_FOREACH_THREAD(dx,x,D1DR)
880 X[dz][dy][dx] = x(dx,dy,dz,c,e);
885 MFEM_FOREACH_THREAD(dz,z,D1DR)
887 MFEM_FOREACH_THREAD(dy,y,D1DR)
889 MFEM_FOREACH_THREAD(qx,x,Q1D)
893 for (
int dx = 0; dx < D1DR; ++dx)
895 const double coord = X[dz][dy][dx];
896 u += coord * B[qx][dx];
897 v += coord * G[qx][dx];
899 DDQ0[dz][dy][qx] =
u;
900 DDQ1[dz][dy][qx] = v;
905 MFEM_FOREACH_THREAD(dz,z,D1DR)
907 MFEM_FOREACH_THREAD(qy,y,Q1D)
909 MFEM_FOREACH_THREAD(qx,x,Q1D)
914 for (
int dy = 0; dy < D1DR; ++dy)
916 u += DDQ1[dz][dy][qx] * B[qy][dy];
917 v += DDQ0[dz][dy][qx] * G[qy][dy];
918 w += DDQ0[dz][dy][qx] * B[qy][dy];
920 DQQ0[dz][qy][qx] =
u;
921 DQQ1[dz][qy][qx] = v;
922 DQQ2[dz][qy][qx] = w;
927 MFEM_FOREACH_THREAD(qz,z,Q1D)
929 MFEM_FOREACH_THREAD(qy,y,Q1D)
931 MFEM_FOREACH_THREAD(qx,x,Q1D)
936 for (
int dz = 0; dz < D1DR; ++dz)
938 u += DQQ0[dz][qy][qx] * B[qz][dz];
939 v += DQQ1[dz][qy][qx] * B[qz][dz];
940 w += DQQ2[dz][qy][qx] * G[qz][dz];
942 QQQ0[qz][qy][qx] =
u;
943 QQQ1[qz][qy][qx] = v;
944 QQQ2[qz][qy][qx] = w;
949 MFEM_FOREACH_THREAD(qz,z,Q1D)
951 MFEM_FOREACH_THREAD(qy,y,Q1D)
953 MFEM_FOREACH_THREAD(qx,x,Q1D)
955 const int q = qx + (qy + qz * Q1D) * Q1D;
956 const double gX = QQQ0[qz][qy][qx];
957 const double gY = QQQ1[qz][qy][qx];
958 const double gZ = QQQ2[qz][qy][qx];
959 div[qz][qy][qx] += gX*Q(q,0,c,e) + gY*Q(q,1,c,e) + gZ*Q(q,2,c,e);
968 MFEM_FOREACH_THREAD(d,y,D1DE)
970 MFEM_FOREACH_THREAD(q,x,Q1D)
978 MFEM_FOREACH_THREAD(qz,z,Q1D)
980 MFEM_FOREACH_THREAD(qy,y,Q1D)
982 MFEM_FOREACH_THREAD(dx,x,D1DE)
985 for (
int qx = 0; qx < Q1D; ++qx)
987 u += div[qz][qy][qx] * Bt[dx][qx];
989 QQD0[qz][qy][dx] =
u;
994 MFEM_FOREACH_THREAD(qz,z,Q1D)
996 MFEM_FOREACH_THREAD(dy,y,D1DE)
998 MFEM_FOREACH_THREAD(dx,x,D1DE)
1001 for (
int qy = 0; qy < Q1D; ++qy)
1003 u += QQD0[qz][qy][dx] * Bt[dy][qy];
1005 QDD0[qz][dy][dx] =
u;
1010 MFEM_FOREACH_THREAD(dz,z,D1DE)
1012 MFEM_FOREACH_THREAD(dy,y,D1DE)
1014 MFEM_FOREACH_THREAD(dx,x,D1DE)
1017 for (
int qz = 0; qz < Q1D; ++qz)
1019 u += QDD0[qz][dy][dx] * Bt[dz][qz];
1028 static void PADivergenceApply(
const int dim,
1033 const Array<double> &B,
1034 const Array<double> &G,
1035 const Array<double> &Bt,
1039 bool transpose=
false)
1043 return PADivergenceApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1047 return PADivergenceApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1049 MFEM_ABORT(
"Unknown kernel.");
1053 void VectorDivergenceIntegrator::AddMultPA(
const Vector &x,
Vector &y)
const
1055 PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1056 trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y,
1061 void VectorDivergenceIntegrator::AddMultTransposePA(
const Vector &x,
1064 PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1065 trial_maps->Bt, trial_maps->Gt, test_maps->B, pa_data, x, y,
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
Ordering::Type GetOrdering() const
Return the ordering method.
int GetDim() const
Returns the reference space dimension for the finite element.
void trans(const Vector &u, Vector &x)
Class for an integration rule - an Array of IntegrationPoint.
A coefficient that is constant across space and time.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
int GetNE() const
Returns number of elements in the mesh.
Mesh * GetMesh() const
Returns the mesh.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
double u(const Vector &xvec)
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.