12#ifndef MFEM_BILININTEG_HCURLHDIV_KERNELS_HPP
13#define MFEM_BILININTEG_HCURLHDIV_KERNELS_HPP
30void PAHcurlHdivMassSetup2D(
const int Q1D,
34 const Array<real_t> &w_,
40void PAHcurlHdivMassSetup3D(
const int Q1D,
44 const Array<real_t> &w_,
50void PAHcurlHdivMassApply2D(
const int D1D,
54 const bool scalarCoeff,
55 const bool trialHcurl,
57 const Array<real_t> &Bo_,
58 const Array<real_t> &Bc_,
59 const Array<real_t> &Bot_,
60 const Array<real_t> &Bct_,
66void PAHcurlHdivMassApply3D(
const int D1D,
70 const bool scalarCoeff,
71 const bool trialHcurl,
73 const Array<real_t> &Bo_,
74 const Array<real_t> &Bc_,
75 const Array<real_t> &Bot_,
76 const Array<real_t> &Bct_,
82template<
int T_D1D = 0,
int T_D1D_TEST = 0,
int T_Q1D = 0>
83inline void PAHcurlHdivApply3D(
const int d1d,
87 const Array<real_t> &bo,
88 const Array<real_t> &bc,
89 const Array<real_t> &bot,
90 const Array<real_t> &bct,
91 const Array<real_t> &gc,
92 const Vector &pa_data,
97 "Error: d1d > HCURL_MAX_D1D");
99 "Error: d1dtest > HCURL_MAX_D1D");
101 "Error: q1d > HCURL_MAX_Q1D");
102 const int D1D = T_D1D ? T_D1D : d1d;
103 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
104 const int Q1D = T_Q1D ? T_Q1D : q1d;
106 auto Bo =
Reshape(bo.Read(), Q1D, D1D-1);
107 auto Bc =
Reshape(bc.Read(), Q1D, D1D);
108 auto Bot =
Reshape(bot.Read(), D1Dtest-1, Q1D);
109 auto Bct =
Reshape(bct.Read(), D1Dtest, Q1D);
110 auto Gc =
Reshape(gc.Read(), Q1D, D1D);
111 auto op =
Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
112 auto X =
Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
113 auto Y =
Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1Dtest, NE);
124 constexpr int VDIM = 3;
125 constexpr int MD1D = T_D1D ? T_D1D :
126 DofQuadLimits::HCURL_MAX_D1D;
127 constexpr int MQ1D = T_Q1D ? T_Q1D : DofQuadLimits::HCURL_MAX_Q1D;
128 const int D1D = T_D1D ? T_D1D : d1d;
129 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
130 const int Q1D = T_Q1D ? T_Q1D : q1d;
132 real_t curl[MQ1D][MQ1D][MQ1D][VDIM];
135 for (
int qz = 0; qz < Q1D; ++qz)
137 for (
int qy = 0; qy < Q1D; ++qy)
139 for (
int qx = 0; qx < Q1D; ++qx)
141 for (
int c = 0; c < VDIM; ++c)
143 curl[qz][qy][qx][c] = 0.0;
155 const int D1Dz = D1D;
156 const int D1Dy = D1D;
157 const int D1Dx = D1D - 1;
159 for (
int dz = 0; dz < D1Dz; ++dz)
161 real_t gradXY[MQ1D][MQ1D][2];
162 for (
int qy = 0; qy < Q1D; ++qy)
164 for (
int qx = 0; qx < Q1D; ++qx)
166 for (
int d = 0; d < 2; ++d)
168 gradXY[qy][qx][d] = 0.0;
173 for (
int dy = 0; dy < D1Dy; ++dy)
176 for (
int qx = 0; qx < Q1D; ++qx)
181 for (
int dx = 0; dx < D1Dx; ++dx)
183 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
184 for (
int qx = 0; qx < Q1D; ++qx)
186 massX[qx] += t * Bo(qx,dx);
190 for (
int qy = 0; qy < Q1D; ++qy)
192 const real_t wy = Bc(qy,dy);
193 const real_t wDy = Gc(qy,dy);
194 for (
int qx = 0; qx < Q1D; ++qx)
196 const real_t wx = massX[qx];
197 gradXY[qy][qx][0] += wx * wDy;
198 gradXY[qy][qx][1] += wx * wy;
203 for (
int qz = 0; qz < Q1D; ++qz)
205 const real_t wz = Bc(qz,dz);
206 const real_t wDz = Gc(qz,dz);
207 for (
int qy = 0; qy < Q1D; ++qy)
209 for (
int qx = 0; qx < Q1D; ++qx)
212 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz;
213 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz;
219 osc += D1Dx * D1Dy * D1Dz;
224 const int D1Dz = D1D;
225 const int D1Dy = D1D - 1;
226 const int D1Dx = D1D;
228 for (
int dz = 0; dz < D1Dz; ++dz)
230 real_t gradXY[MQ1D][MQ1D][2];
231 for (
int qy = 0; qy < Q1D; ++qy)
233 for (
int qx = 0; qx < Q1D; ++qx)
235 for (
int d = 0; d < 2; ++d)
237 gradXY[qy][qx][d] = 0.0;
242 for (
int dx = 0; dx < D1Dx; ++dx)
245 for (
int qy = 0; qy < Q1D; ++qy)
250 for (
int dy = 0; dy < D1Dy; ++dy)
252 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
253 for (
int qy = 0; qy < Q1D; ++qy)
255 massY[qy] += t * Bo(qy,dy);
259 for (
int qx = 0; qx < Q1D; ++qx)
261 const real_t wx = Bc(qx,dx);
262 const real_t wDx = Gc(qx,dx);
263 for (
int qy = 0; qy < Q1D; ++qy)
265 const real_t wy = massY[qy];
266 gradXY[qy][qx][0] += wDx * wy;
267 gradXY[qy][qx][1] += wx * wy;
272 for (
int qz = 0; qz < Q1D; ++qz)
274 const real_t wz = Bc(qz,dz);
275 const real_t wDz = Gc(qz,dz);
276 for (
int qy = 0; qy < Q1D; ++qy)
278 for (
int qx = 0; qx < Q1D; ++qx)
281 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz;
282 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz;
288 osc += D1Dx * D1Dy * D1Dz;
293 const int D1Dz = D1D - 1;
294 const int D1Dy = D1D;
295 const int D1Dx = D1D;
297 for (
int dx = 0; dx < D1Dx; ++dx)
299 real_t gradYZ[MQ1D][MQ1D][2];
300 for (
int qz = 0; qz < Q1D; ++qz)
302 for (
int qy = 0; qy < Q1D; ++qy)
304 for (
int d = 0; d < 2; ++d)
306 gradYZ[qz][qy][d] = 0.0;
311 for (
int dy = 0; dy < D1Dy; ++dy)
314 for (
int qz = 0; qz < Q1D; ++qz)
319 for (
int dz = 0; dz < D1Dz; ++dz)
321 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
322 for (
int qz = 0; qz < Q1D; ++qz)
324 massZ[qz] += t * Bo(qz,dz);
328 for (
int qy = 0; qy < Q1D; ++qy)
330 const real_t wy = Bc(qy,dy);
331 const real_t wDy = Gc(qy,dy);
332 for (
int qz = 0; qz < Q1D; ++qz)
334 const real_t wz = massZ[qz];
335 gradYZ[qz][qy][0] += wz * wy;
336 gradYZ[qz][qy][1] += wz * wDy;
341 for (
int qx = 0; qx < Q1D; ++qx)
343 const real_t wx = Bc(qx,dx);
344 const real_t wDx = Gc(qx,dx);
346 for (
int qy = 0; qy < Q1D; ++qy)
348 for (
int qz = 0; qz < Q1D; ++qz)
351 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx;
352 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx;
360 for (
int qz = 0; qz < Q1D; ++qz)
362 for (
int qy = 0; qy < Q1D; ++qy)
364 for (
int qx = 0; qx < Q1D; ++qx)
366 const real_t O11 = op(qx,qy,qz,0,e);
367 const real_t O12 = op(qx,qy,qz,1,e);
368 const real_t O13 = op(qx,qy,qz,2,e);
369 const real_t O22 = op(qx,qy,qz,3,e);
370 const real_t O23 = op(qx,qy,qz,4,e);
371 const real_t O33 = op(qx,qy,qz,5,e);
373 const real_t c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
374 (O13 * curl[qz][qy][qx][2]);
375 const real_t c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
376 (O23 * curl[qz][qy][qx][2]);
377 const real_t c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
378 (O33 * curl[qz][qy][qx][2]);
380 curl[qz][qy][qx][0] = c1;
381 curl[qz][qy][qx][1] = c2;
382 curl[qz][qy][qx][2] = c3;
387 for (
int qz = 0; qz < Q1D; ++qz)
389 real_t massXY[MD1D][MD1D];
393 for (
int c = 0; c < VDIM; ++c)
395 const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
396 const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
397 const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
399 for (
int dy = 0; dy < D1Dy; ++dy)
401 for (
int dx = 0; dx < D1Dx; ++dx)
406 for (
int qy = 0; qy < Q1D; ++qy)
409 for (
int dx = 0; dx < D1Dx; ++dx)
413 for (
int qx = 0; qx < Q1D; ++qx)
415 for (
int dx = 0; dx < D1Dx; ++dx)
417 massX[dx] += curl[qz][qy][qx][c] *
418 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
421 for (
int dy = 0; dy < D1Dy; ++dy)
423 const real_t wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
424 for (
int dx = 0; dx < D1Dx; ++dx)
426 massXY[dy][dx] += massX[dx] * wy;
431 for (
int dz = 0; dz < D1Dz; ++dz)
433 const real_t wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
434 for (
int dy = 0; dy < D1Dy; ++dy)
436 for (
int dx = 0; dx < D1Dx; ++dx)
438 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
444 osc += D1Dx * D1Dy * D1Dz;
451template<
int T_D1D = 0,
int T_D1D_TEST = 0,
int T_Q1D = 0>
452inline void PAHcurlHdivApplyTranspose3D(
const int d1d,
456 const Array<real_t> &bo,
457 const Array<real_t> &bc,
458 const Array<real_t> &bot,
459 const Array<real_t> &bct,
460 const Array<real_t> &gct,
461 const Vector &pa_data,
466 "Error: d1d > HCURL_MAX_D1D");
468 "Error: d1dtest > HCURL_MAX_D1D");
470 "Error: q1d > HCURL_MAX_Q1D");
471 const int D1D = T_D1D ? T_D1D : d1d;
472 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
473 const int Q1D = T_Q1D ? T_Q1D : q1d;
475 auto Bo =
Reshape(bo.Read(), Q1D, D1D-1);
476 auto Bc =
Reshape(bc.Read(), Q1D, D1D);
477 auto Bot =
Reshape(bot.Read(), D1Dtest-1, Q1D);
478 auto Bct =
Reshape(bct.Read(), D1Dtest, Q1D);
479 auto Gct =
Reshape(gct.Read(), D1D, Q1D);
480 auto op =
Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
481 auto X =
Reshape(x.Read(), 3*(D1Dtest-1)*(D1Dtest-1)*D1Dtest, NE);
482 auto Y =
Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
493 constexpr int VDIM = 3;
494 constexpr int MD1D = T_D1D ? T_D1D :
495 DofQuadLimits::HCURL_MAX_D1D;
496 constexpr int MQ1D = T_Q1D ? T_Q1D : DofQuadLimits::HCURL_MAX_Q1D;
497 const int D1D = T_D1D ? T_D1D : d1d;
498 const int D1Dtest = T_D1D_TEST ? T_D1D_TEST : d1dtest;
499 const int Q1D = T_Q1D ? T_Q1D : q1d;
501 real_t mass[MQ1D][MQ1D][MQ1D][VDIM];
503 for (
int qz = 0; qz < Q1D; ++qz)
505 for (
int qy = 0; qy < Q1D; ++qy)
507 for (
int qx = 0; qx < Q1D; ++qx)
509 for (
int c = 0; c < VDIM; ++c)
511 mass[qz][qy][qx][c] = 0.0;
519 for (
int c = 0; c < VDIM; ++c)
521 const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
522 const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
523 const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
525 for (
int dz = 0; dz < D1Dz; ++dz)
527 real_t massXY[MQ1D][MQ1D];
528 for (
int qy = 0; qy < Q1D; ++qy)
530 for (
int qx = 0; qx < Q1D; ++qx)
532 massXY[qy][qx] = 0.0;
536 for (
int dy = 0; dy < D1Dy; ++dy)
539 for (
int qx = 0; qx < Q1D; ++qx)
544 for (
int dx = 0; dx < D1Dx; ++dx)
546 const real_t t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
547 for (
int qx = 0; qx < Q1D; ++qx)
549 massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx));
553 for (
int qy = 0; qy < Q1D; ++qy)
555 const real_t wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy);
556 for (
int qx = 0; qx < Q1D; ++qx)
558 const real_t wx = massX[qx];
559 massXY[qy][qx] += wx * wy;
564 for (
int qz = 0; qz < Q1D; ++qz)
566 const real_t wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz);
567 for (
int qy = 0; qy < Q1D; ++qy)
569 for (
int qx = 0; qx < Q1D; ++qx)
571 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
577 osc += D1Dx * D1Dy * D1Dz;
581 for (
int qz = 0; qz < Q1D; ++qz)
583 for (
int qy = 0; qy < Q1D; ++qy)
585 for (
int qx = 0; qx < Q1D; ++qx)
587 const real_t O11 = op(qx,qy,qz,0,e);
588 const real_t O12 = op(qx,qy,qz,1,e);
589 const real_t O13 = op(qx,qy,qz,2,e);
590 const real_t O22 = op(qx,qy,qz,3,e);
591 const real_t O23 = op(qx,qy,qz,4,e);
592 const real_t O33 = op(qx,qy,qz,5,e);
593 const real_t massX = mass[qz][qy][qx][0];
594 const real_t massY = mass[qz][qy][qx][1];
595 const real_t massZ = mass[qz][qy][qx][2];
596 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
597 mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
598 mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
606 const int D1Dz = D1D;
607 const int D1Dy = D1D;
608 const int D1Dx = D1D - 1;
610 for (
int qz = 0; qz < Q1D; ++qz)
612 real_t gradXY12[MD1D][MD1D];
613 real_t gradXY21[MD1D][MD1D];
615 for (
int dy = 0; dy < D1Dy; ++dy)
617 for (
int dx = 0; dx < D1Dx; ++dx)
619 gradXY12[dy][dx] = 0.0;
620 gradXY21[dy][dx] = 0.0;
623 for (
int qy = 0; qy < Q1D; ++qy)
626 for (
int dx = 0; dx < D1Dx; ++dx)
628 for (
int n = 0; n < 2; ++n)
633 for (
int qx = 0; qx < Q1D; ++qx)
635 for (
int dx = 0; dx < D1Dx; ++dx)
637 const real_t wx = Bot(dx,qx);
639 massX[dx][0] += wx * mass[qz][qy][qx][1];
640 massX[dx][1] += wx * mass[qz][qy][qx][2];
643 for (
int dy = 0; dy < D1Dy; ++dy)
645 const real_t wy = Bct(dy,qy);
646 const real_t wDy = Gct(dy,qy);
648 for (
int dx = 0; dx < D1Dx; ++dx)
650 gradXY21[dy][dx] += massX[dx][0] * wy;
651 gradXY12[dy][dx] += massX[dx][1] * wDy;
656 for (
int dz = 0; dz < D1Dz; ++dz)
658 const real_t wz = Bct(dz,qz);
659 const real_t wDz = Gct(dz,qz);
660 for (
int dy = 0; dy < D1Dy; ++dy)
662 for (
int dx = 0; dx < D1Dx; ++dx)
666 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
667 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
673 osc += D1Dx * D1Dy * D1Dz;
678 const int D1Dz = D1D;
679 const int D1Dy = D1D - 1;
680 const int D1Dx = D1D;
682 for (
int qz = 0; qz < Q1D; ++qz)
684 real_t gradXY02[MD1D][MD1D];
685 real_t gradXY20[MD1D][MD1D];
687 for (
int dy = 0; dy < D1Dy; ++dy)
689 for (
int dx = 0; dx < D1Dx; ++dx)
691 gradXY02[dy][dx] = 0.0;
692 gradXY20[dy][dx] = 0.0;
695 for (
int qx = 0; qx < Q1D; ++qx)
698 for (
int dy = 0; dy < D1Dy; ++dy)
703 for (
int qy = 0; qy < Q1D; ++qy)
705 for (
int dy = 0; dy < D1Dy; ++dy)
707 const real_t wy = Bot(dy,qy);
709 massY[dy][0] += wy * mass[qz][qy][qx][2];
710 massY[dy][1] += wy * mass[qz][qy][qx][0];
713 for (
int dx = 0; dx < D1Dx; ++dx)
715 const real_t wx = Bct(dx,qx);
716 const real_t wDx = Gct(dx,qx);
718 for (
int dy = 0; dy < D1Dy; ++dy)
720 gradXY02[dy][dx] += massY[dy][0] * wDx;
721 gradXY20[dy][dx] += massY[dy][1] * wx;
726 for (
int dz = 0; dz < D1Dz; ++dz)
728 const real_t wz = Bct(dz,qz);
729 const real_t wDz = Gct(dz,qz);
730 for (
int dy = 0; dy < D1Dy; ++dy)
732 for (
int dx = 0; dx < D1Dx; ++dx)
736 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
737 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
743 osc += D1Dx * D1Dy * D1Dz;
748 const int D1Dz = D1D - 1;
749 const int D1Dy = D1D;
750 const int D1Dx = D1D;
752 for (
int qx = 0; qx < Q1D; ++qx)
754 real_t gradYZ01[MD1D][MD1D];
755 real_t gradYZ10[MD1D][MD1D];
757 for (
int dy = 0; dy < D1Dy; ++dy)
759 for (
int dz = 0; dz < D1Dz; ++dz)
761 gradYZ01[dz][dy] = 0.0;
762 gradYZ10[dz][dy] = 0.0;
765 for (
int qy = 0; qy < Q1D; ++qy)
768 for (
int dz = 0; dz < D1Dz; ++dz)
770 for (
int n = 0; n < 2; ++n)
775 for (
int qz = 0; qz < Q1D; ++qz)
777 for (
int dz = 0; dz < D1Dz; ++dz)
779 const real_t wz = Bot(dz,qz);
781 massZ[dz][0] += wz * mass[qz][qy][qx][0];
782 massZ[dz][1] += wz * mass[qz][qy][qx][1];
785 for (
int dy = 0; dy < D1Dy; ++dy)
787 const real_t wy = Bct(dy,qy);
788 const real_t wDy = Gct(dy,qy);
790 for (
int dz = 0; dz < D1Dz; ++dz)
792 gradYZ01[dz][dy] += wy * massZ[dz][1];
793 gradYZ10[dz][dy] += wDy * massZ[dz][0];
798 for (
int dx = 0; dx < D1Dx; ++dx)
800 const real_t wx = Bct(dx,qx);
801 const real_t wDx = Gct(dx,qx);
803 for (
int dy = 0; dy < D1Dy; ++dy)
805 for (
int dz = 0; dz < D1Dz; ++dz)
809 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
810 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall(int N, lambda &&body)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.