12#ifndef MFEM_BILININTEG_HCURLHDIV_KERNELS_HPP
13#define MFEM_BILININTEG_HCURLHDIV_KERNELS_HPP
29void PAHcurlHdivMassSetup2D(
const int Q1D,
33 const Array<real_t> &w_,
39void PAHcurlHdivMassSetup3D(
const int Q1D,
43 const Array<real_t> &w_,
49void PAHcurlHdivMassApply2D(
const int D1D,
53 const bool scalarCoeff,
54 const bool trialHcurl,
56 const Array<real_t> &Bo_,
57 const Array<real_t> &Bc_,
58 const Array<real_t> &Bot_,
59 const Array<real_t> &Bct_,
65void PAHcurlHdivMassApply3D(
const int D1D,
69 const bool scalarCoeff,
70 const bool trialHcurl,
72 const Array<real_t> &Bo_,
73 const Array<real_t> &Bc_,
74 const Array<real_t> &Bot_,
75 const Array<real_t> &Bct_,
81template<
int T_D1D = 0,
int T_D1D_TEST = 0,
int T_Q1D = 0>
82inline void PAHcurlHdivApply3D(
const int d1d,
86 const Array<real_t> &bo,
87 const Array<real_t> &bc,
88 const Array<real_t> &bot,
89 const Array<real_t> &bct,
90 const Array<real_t> &gc,
91 const Vector &pa_data,
96 "Error: d1d > HCURL_MAX_D1D");
98 "Error: d1dtest > HCURL_MAX_D1D");
100 "Error: q1d > HCURL_MAX_Q1D");
101 const int D1D = T_D1D ? T_D1D : d1d;
102 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
103 const int Q1D = T_Q1D ? T_Q1D : q1d;
105 auto Bo =
Reshape(bo.Read(), Q1D, D1D-1);
106 auto Bc =
Reshape(bc.Read(), Q1D, D1D);
107 auto Bot =
Reshape(bot.Read(), D1Dtest-1, Q1D);
108 auto Bct =
Reshape(bct.Read(), D1Dtest, Q1D);
109 auto Gc =
Reshape(gc.Read(), Q1D, D1D);
110 auto op =
Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
111 auto X =
Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
112 auto Y =
Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1Dtest, NE);
123 constexpr int VDIM = 3;
124 constexpr int MD1D = T_D1D ? T_D1D :
125 DofQuadLimits::HCURL_MAX_D1D;
126 constexpr int MQ1D = T_Q1D ? T_Q1D : DofQuadLimits::HCURL_MAX_Q1D;
127 const int D1D = T_D1D ? T_D1D : d1d;
128 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
129 const int Q1D = T_Q1D ? T_Q1D : q1d;
131 real_t curl[MQ1D][MQ1D][MQ1D][VDIM];
134 for (
int qz = 0; qz < Q1D; ++qz)
136 for (
int qy = 0; qy < Q1D; ++qy)
138 for (
int qx = 0; qx < Q1D; ++qx)
140 for (
int c = 0; c < VDIM; ++c)
142 curl[qz][qy][qx][c] = 0.0;
154 const int D1Dz = D1D;
155 const int D1Dy = D1D;
156 const int D1Dx = D1D - 1;
158 for (
int dz = 0; dz < D1Dz; ++dz)
160 real_t gradXY[MQ1D][MQ1D][2];
161 for (
int qy = 0; qy < Q1D; ++qy)
163 for (
int qx = 0; qx < Q1D; ++qx)
165 for (
int d = 0; d < 2; ++d)
167 gradXY[qy][qx][d] = 0.0;
172 for (
int dy = 0; dy < D1Dy; ++dy)
175 for (
int qx = 0; qx < Q1D; ++qx)
180 for (
int dx = 0; dx < D1Dx; ++dx)
182 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
183 for (
int qx = 0; qx < Q1D; ++qx)
185 massX[qx] += t * Bo(qx,dx);
189 for (
int qy = 0; qy < Q1D; ++qy)
191 const real_t wy = Bc(qy,dy);
192 const real_t wDy = Gc(qy,dy);
193 for (
int qx = 0; qx < Q1D; ++qx)
195 const real_t wx = massX[qx];
196 gradXY[qy][qx][0] += wx * wDy;
197 gradXY[qy][qx][1] += wx * wy;
202 for (
int qz = 0; qz < Q1D; ++qz)
204 const real_t wz = Bc(qz,dz);
205 const real_t wDz = Gc(qz,dz);
206 for (
int qy = 0; qy < Q1D; ++qy)
208 for (
int qx = 0; qx < Q1D; ++qx)
211 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz;
212 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz;
218 osc += D1Dx * D1Dy * D1Dz;
223 const int D1Dz = D1D;
224 const int D1Dy = D1D - 1;
225 const int D1Dx = D1D;
227 for (
int dz = 0; dz < D1Dz; ++dz)
229 real_t gradXY[MQ1D][MQ1D][2];
230 for (
int qy = 0; qy < Q1D; ++qy)
232 for (
int qx = 0; qx < Q1D; ++qx)
234 for (
int d = 0; d < 2; ++d)
236 gradXY[qy][qx][d] = 0.0;
241 for (
int dx = 0; dx < D1Dx; ++dx)
244 for (
int qy = 0; qy < Q1D; ++qy)
249 for (
int dy = 0; dy < D1Dy; ++dy)
251 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
252 for (
int qy = 0; qy < Q1D; ++qy)
254 massY[qy] += t * Bo(qy,dy);
258 for (
int qx = 0; qx < Q1D; ++qx)
260 const real_t wx = Bc(qx,dx);
261 const real_t wDx = Gc(qx,dx);
262 for (
int qy = 0; qy < Q1D; ++qy)
264 const real_t wy = massY[qy];
265 gradXY[qy][qx][0] += wDx * wy;
266 gradXY[qy][qx][1] += wx * wy;
271 for (
int qz = 0; qz < Q1D; ++qz)
273 const real_t wz = Bc(qz,dz);
274 const real_t wDz = Gc(qz,dz);
275 for (
int qy = 0; qy < Q1D; ++qy)
277 for (
int qx = 0; qx < Q1D; ++qx)
280 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz;
281 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz;
287 osc += D1Dx * D1Dy * D1Dz;
292 const int D1Dz = D1D - 1;
293 const int D1Dy = D1D;
294 const int D1Dx = D1D;
296 for (
int dx = 0; dx < D1Dx; ++dx)
298 real_t gradYZ[MQ1D][MQ1D][2];
299 for (
int qz = 0; qz < Q1D; ++qz)
301 for (
int qy = 0; qy < Q1D; ++qy)
303 for (
int d = 0; d < 2; ++d)
305 gradYZ[qz][qy][d] = 0.0;
310 for (
int dy = 0; dy < D1Dy; ++dy)
313 for (
int qz = 0; qz < Q1D; ++qz)
318 for (
int dz = 0; dz < D1Dz; ++dz)
320 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
321 for (
int qz = 0; qz < Q1D; ++qz)
323 massZ[qz] += t * Bo(qz,dz);
327 for (
int qy = 0; qy < Q1D; ++qy)
329 const real_t wy = Bc(qy,dy);
330 const real_t wDy = Gc(qy,dy);
331 for (
int qz = 0; qz < Q1D; ++qz)
333 const real_t wz = massZ[qz];
334 gradYZ[qz][qy][0] += wz * wy;
335 gradYZ[qz][qy][1] += wz * wDy;
340 for (
int qx = 0; qx < Q1D; ++qx)
342 const real_t wx = Bc(qx,dx);
343 const real_t wDx = Gc(qx,dx);
345 for (
int qy = 0; qy < Q1D; ++qy)
347 for (
int qz = 0; qz < Q1D; ++qz)
350 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx;
351 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx;
359 for (
int qz = 0; qz < Q1D; ++qz)
361 for (
int qy = 0; qy < Q1D; ++qy)
363 for (
int qx = 0; qx < Q1D; ++qx)
365 const real_t O11 = op(qx,qy,qz,0,e);
366 const real_t O12 = op(qx,qy,qz,1,e);
367 const real_t O13 = op(qx,qy,qz,2,e);
368 const real_t O22 = op(qx,qy,qz,3,e);
369 const real_t O23 = op(qx,qy,qz,4,e);
370 const real_t O33 = op(qx,qy,qz,5,e);
372 const real_t c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
373 (O13 * curl[qz][qy][qx][2]);
374 const real_t c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
375 (O23 * curl[qz][qy][qx][2]);
376 const real_t c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
377 (O33 * curl[qz][qy][qx][2]);
379 curl[qz][qy][qx][0] = c1;
380 curl[qz][qy][qx][1] = c2;
381 curl[qz][qy][qx][2] = c3;
386 for (
int qz = 0; qz < Q1D; ++qz)
388 real_t massXY[MD1D][MD1D];
392 for (
int c = 0; c < VDIM; ++c)
394 const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
395 const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
396 const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
398 for (
int dy = 0; dy < D1Dy; ++dy)
400 for (
int dx = 0; dx < D1Dx; ++dx)
405 for (
int qy = 0; qy < Q1D; ++qy)
408 for (
int dx = 0; dx < D1Dx; ++dx)
412 for (
int qx = 0; qx < Q1D; ++qx)
414 for (
int dx = 0; dx < D1Dx; ++dx)
416 massX[dx] += curl[qz][qy][qx][c] *
417 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
420 for (
int dy = 0; dy < D1Dy; ++dy)
422 const real_t wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
423 for (
int dx = 0; dx < D1Dx; ++dx)
425 massXY[dy][dx] += massX[dx] * wy;
430 for (
int dz = 0; dz < D1Dz; ++dz)
432 const real_t wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
433 for (
int dy = 0; dy < D1Dy; ++dy)
435 for (
int dx = 0; dx < D1Dx; ++dx)
437 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
443 osc += D1Dx * D1Dy * D1Dz;
450template<
int T_D1D = 0,
int T_D1D_TEST = 0,
int T_Q1D = 0>
451inline void PAHcurlHdivApplyTranspose3D(
const int d1d,
455 const Array<real_t> &bo,
456 const Array<real_t> &bc,
457 const Array<real_t> &bot,
458 const Array<real_t> &bct,
459 const Array<real_t> &gct,
460 const Vector &pa_data,
465 "Error: d1d > HCURL_MAX_D1D");
467 "Error: d1dtest > HCURL_MAX_D1D");
469 "Error: q1d > HCURL_MAX_Q1D");
470 const int D1D = T_D1D ? T_D1D : d1d;
471 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
472 const int Q1D = T_Q1D ? T_Q1D : q1d;
474 auto Bo =
Reshape(bo.Read(), Q1D, D1D-1);
475 auto Bc =
Reshape(bc.Read(), Q1D, D1D);
476 auto Bot =
Reshape(bot.Read(), D1Dtest-1, Q1D);
477 auto Bct =
Reshape(bct.Read(), D1Dtest, Q1D);
478 auto Gct =
Reshape(gct.Read(), D1D, Q1D);
479 auto op =
Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
480 auto X =
Reshape(x.Read(), 3*(D1Dtest-1)*(D1Dtest-1)*D1Dtest, NE);
481 auto Y =
Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
492 constexpr int VDIM = 3;
493 constexpr int MD1D = T_D1D ? T_D1D :
494 DofQuadLimits::HCURL_MAX_D1D;
495 constexpr int MQ1D = T_Q1D ? T_Q1D : DofQuadLimits::HCURL_MAX_Q1D;
496 const int D1D = T_D1D ? T_D1D : d1d;
497 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
498 const int Q1D = T_Q1D ? T_Q1D : q1d;
500 real_t mass[MQ1D][MQ1D][MQ1D][VDIM];
502 for (
int qz = 0; qz < Q1D; ++qz)
504 for (
int qy = 0; qy < Q1D; ++qy)
506 for (
int qx = 0; qx < Q1D; ++qx)
508 for (
int c = 0; c < VDIM; ++c)
510 mass[qz][qy][qx][c] = 0.0;
518 for (
int c = 0; c < VDIM; ++c)
520 const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
521 const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
522 const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
524 for (
int dz = 0; dz < D1Dz; ++dz)
526 real_t massXY[MQ1D][MQ1D];
527 for (
int qy = 0; qy < Q1D; ++qy)
529 for (
int qx = 0; qx < Q1D; ++qx)
531 massXY[qy][qx] = 0.0;
535 for (
int dy = 0; dy < D1Dy; ++dy)
538 for (
int qx = 0; qx < Q1D; ++qx)
543 for (
int dx = 0; dx < D1Dx; ++dx)
545 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
546 for (
int qx = 0; qx < Q1D; ++qx)
548 massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
552 for (
int qy = 0; qy < Q1D; ++qy)
554 const real_t wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
555 for (
int qx = 0; qx < Q1D; ++qx)
557 const real_t wx = massX[qx];
558 massXY[qy][qx] += wx * wy;
563 for (
int qz = 0; qz < Q1D; ++qz)
565 const real_t wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
566 for (
int qy = 0; qy < Q1D; ++qy)
568 for (
int qx = 0; qx < Q1D; ++qx)
570 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
576 osc += D1Dx * D1Dy * D1Dz;
580 for (
int qz = 0; qz < Q1D; ++qz)
582 for (
int qy = 0; qy < Q1D; ++qy)
584 for (
int qx = 0; qx < Q1D; ++qx)
586 const real_t O11 = op(qx,qy,qz,0,e);
587 const real_t O12 = op(qx,qy,qz,1,e);
588 const real_t O13 = op(qx,qy,qz,2,e);
589 const real_t O22 = op(qx,qy,qz,3,e);
590 const real_t O23 = op(qx,qy,qz,4,e);
591 const real_t O33 = op(qx,qy,qz,5,e);
592 const real_t massX = mass[qz][qy][qx][0];
593 const real_t massY = mass[qz][qy][qx][1];
594 const real_t massZ = mass[qz][qy][qx][2];
595 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
596 mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
597 mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
605 const int D1Dz = D1D;
606 const int D1Dy = D1D;
607 const int D1Dx = D1D - 1;
609 for (
int qz = 0; qz < Q1D; ++qz)
611 real_t gradXY12[MD1D][MD1D];
612 real_t gradXY21[MD1D][MD1D];
614 for (
int dy = 0; dy < D1Dy; ++dy)
616 for (
int dx = 0; dx < D1Dx; ++dx)
618 gradXY12[dy][dx] = 0.0;
619 gradXY21[dy][dx] = 0.0;
622 for (
int qy = 0; qy < Q1D; ++qy)
625 for (
int dx = 0; dx < D1Dx; ++dx)
627 for (
int n = 0; n < 2; ++n)
632 for (
int qx = 0; qx < Q1D; ++qx)
634 for (
int dx = 0; dx < D1Dx; ++dx)
636 const real_t wx = Bot(dx,qx);
638 massX[dx][0] += wx * mass[qz][qy][qx][1];
639 massX[dx][1] += wx * mass[qz][qy][qx][2];
642 for (
int dy = 0; dy < D1Dy; ++dy)
644 const real_t wy = Bct(dy,qy);
645 const real_t wDy = Gct(dy,qy);
647 for (
int dx = 0; dx < D1Dx; ++dx)
649 gradXY21[dy][dx] += massX[dx][0] * wy;
650 gradXY12[dy][dx] += massX[dx][1] * wDy;
655 for (
int dz = 0; dz < D1Dz; ++dz)
657 const real_t wz = Bct(dz,qz);
658 const real_t wDz = Gct(dz,qz);
659 for (
int dy = 0; dy < D1Dy; ++dy)
661 for (
int dx = 0; dx < D1Dx; ++dx)
665 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
666 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
672 osc += D1Dx * D1Dy * D1Dz;
677 const int D1Dz = D1D;
678 const int D1Dy = D1D - 1;
679 const int D1Dx = D1D;
681 for (
int qz = 0; qz < Q1D; ++qz)
683 real_t gradXY02[MD1D][MD1D];
684 real_t gradXY20[MD1D][MD1D];
686 for (
int dy = 0; dy < D1Dy; ++dy)
688 for (
int dx = 0; dx < D1Dx; ++dx)
690 gradXY02[dy][dx] = 0.0;
691 gradXY20[dy][dx] = 0.0;
694 for (
int qx = 0; qx < Q1D; ++qx)
697 for (
int dy = 0; dy < D1Dy; ++dy)
702 for (
int qy = 0; qy < Q1D; ++qy)
704 for (
int dy = 0; dy < D1Dy; ++dy)
706 const real_t wy = Bot(dy,qy);
708 massY[dy][0] += wy * mass[qz][qy][qx][2];
709 massY[dy][1] += wy * mass[qz][qy][qx][0];
712 for (
int dx = 0; dx < D1Dx; ++dx)
714 const real_t wx = Bct(dx,qx);
715 const real_t wDx = Gct(dx,qx);
717 for (
int dy = 0; dy < D1Dy; ++dy)
719 gradXY02[dy][dx] += massY[dy][0] * wDx;
720 gradXY20[dy][dx] += massY[dy][1] * wx;
725 for (
int dz = 0; dz < D1Dz; ++dz)
727 const real_t wz = Bct(dz,qz);
728 const real_t wDz = Gct(dz,qz);
729 for (
int dy = 0; dy < D1Dy; ++dy)
731 for (
int dx = 0; dx < D1Dx; ++dx)
735 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
736 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
742 osc += D1Dx * D1Dy * D1Dz;
747 const int D1Dz = D1D - 1;
748 const int D1Dy = D1D;
749 const int D1Dx = D1D;
751 for (
int qx = 0; qx < Q1D; ++qx)
753 real_t gradYZ01[MD1D][MD1D];
754 real_t gradYZ10[MD1D][MD1D];
756 for (
int dy = 0; dy < D1Dy; ++dy)
758 for (
int dz = 0; dz < D1Dz; ++dz)
760 gradYZ01[dz][dy] = 0.0;
761 gradYZ10[dz][dy] = 0.0;
764 for (
int qy = 0; qy < Q1D; ++qy)
767 for (
int dz = 0; dz < D1Dz; ++dz)
769 for (
int n = 0; n < 2; ++n)
774 for (
int qz = 0; qz < Q1D; ++qz)
776 for (
int dz = 0; dz < D1Dz; ++dz)
778 const real_t wz = Bot(dz,qz);
780 massZ[dz][0] += wz * mass[qz][qy][qx][0];
781 massZ[dz][1] += wz * mass[qz][qy][qx][1];
784 for (
int dy = 0; dy < D1Dy; ++dy)
786 const real_t wy = Bct(dy,qy);
787 const real_t wDy = Gct(dy,qy);
789 for (
int dz = 0; dz < D1Dz; ++dz)
791 gradYZ01[dz][dy] += wy * massZ[dz][1];
792 gradYZ10[dz][dy] += wDy * massZ[dz][0];
797 for (
int dx = 0; dx < D1Dx; ++dx)
799 const real_t wx = Bct(dx,qx);
800 const real_t wDx = Gct(dx,qx);
802 for (
int dy = 0; dy < D1Dy; ++dy)
804 for (
int dz = 0; dz < D1Dz; ++dz)
808 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
809 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.