12 #include "../general/forall.hpp"
25 const Array<double> &w_,
42 constexpr
static int VDIM = 2;
50 auto op =
Reshape(pa_data.
Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
58 for (
int qy = 0; qy < Q1D; ++qy)
60 for (
int qx = 0; qx < Q1D; ++qx)
62 for (
int c = 0; c < VDIM; ++c)
64 mass[qy][qx][c] = 0.0;
71 for (
int c = 0; c < VDIM; ++c)
73 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
74 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
76 for (
int dy = 0; dy < D1Dy; ++dy)
79 for (
int qx = 0; qx < Q1D; ++qx)
84 for (
int dx = 0; dx < D1Dx; ++dx)
86 const double t = X(dx + (dy * D1Dx) + osc, e);
87 for (
int qx = 0; qx < Q1D; ++qx)
89 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
93 for (
int qy = 0; qy < Q1D; ++qy)
95 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
96 for (
int qx = 0; qx < Q1D; ++qx)
98 mass[qy][qx][c] += massX[qx] * wy;
107 for (
int qy = 0; qy < Q1D; ++qy)
109 for (
int qx = 0; qx < Q1D; ++qx)
111 const double O11 = op(qx,qy,0,e);
112 const double O21 = op(qx,qy,1,e);
113 const double O12 = symmetric ? O21 : op(qx,qy,2,e);
114 const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
115 const double massX = mass[qy][qx][0];
116 const double massY = mass[qy][qx][1];
117 mass[qy][qx][0] = (O11*massX)+(O12*massY);
118 mass[qy][qx][1] = (O21*massX)+(O22*massY);
122 for (
int qy = 0; qy < Q1D; ++qy)
126 for (
int c = 0; c < VDIM; ++c)
128 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
129 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
132 for (
int dx = 0; dx < D1Dx; ++dx)
136 for (
int qx = 0; qx < Q1D; ++qx)
138 for (
int dx = 0; dx < D1Dx; ++dx)
140 massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
144 for (
int dy = 0; dy < D1Dy; ++dy)
146 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
148 for (
int dx = 0; dx < D1Dx; ++dx)
150 Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
163 const bool symmetric,
169 constexpr
static int VDIM = 2;
174 auto op =
Reshape(pa_data.
Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
181 for (
int c = 0; c < VDIM; ++c)
183 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
184 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
188 for (
int dy = 0; dy < D1Dy; ++dy)
190 for (
int qx = 0; qx < Q1D; ++qx)
193 for (
int qy = 0; qy < Q1D; ++qy)
195 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
197 mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
198 op(qx,qy,symmetric ? 2 : 3, e));
202 for (
int dx = 0; dx < D1Dx; ++dx)
204 for (
int qx = 0; qx < Q1D; ++qx)
206 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
207 D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
220 const bool symmetric,
229 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
230 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
231 constexpr static int VDIM = 3;
233 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
234 auto Bc = Reshape(bc.Read(), Q1D, D1D);
235 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
236 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
242 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
244 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
245 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
246 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
248 const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
249 (symmetric ? 5 : 8));
251 double mass[MAX_Q1D];
253 for (int dz = 0; dz < D1Dz; ++dz)
255 for (int dy = 0; dy < D1Dy; ++dy)
257 for (int qx = 0; qx < Q1D; ++qx)
260 for (int qy = 0; qy < Q1D; ++qy)
262 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
264 for (int qz = 0; qz < Q1D; ++qz)
266 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
268 mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
273 for (int dx = 0; dx < D1Dx; ++dx)
275 for (int qx = 0; qx < Q1D; ++qx)
277 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
278 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
284 osc += D1Dx * D1Dy * D1Dz;
286 }); // end of element loop
289 template<int T_D1D, int T_Q1D>
290 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
293 const bool symmetric,
294 const Array<double> &bo,
295 const Array<double> &bc,
296 const Vector &pa_data,
299 MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D
");
300 MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D
");
302 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
303 auto Bc = Reshape(bc.Read(), Q1D, D1D);
304 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
305 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
307 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
309 constexpr int VDIM = 3;
310 constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
311 constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
313 MFEM_SHARED double sBo[tQ1D][tD1D];
314 MFEM_SHARED double sBc[tQ1D][tD1D];
317 MFEM_SHARED double sop[3][tQ1D][tQ1D];
319 MFEM_FOREACH_THREAD(qx,x,Q1D)
321 MFEM_FOREACH_THREAD(qy,y,Q1D)
323 MFEM_FOREACH_THREAD(qz,z,Q1D)
325 op3[0] = op(qx,qy,qz,0,e);
326 op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
327 op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
332 const int tidx = MFEM_THREAD_ID(x);
333 const int tidy = MFEM_THREAD_ID(y);
334 const int tidz = MFEM_THREAD_ID(z);
338 MFEM_FOREACH_THREAD(d,y,D1D)
340 MFEM_FOREACH_THREAD(q,x,Q1D)
353 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
355 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
356 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
357 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
361 for (int qz=0; qz < Q1D; ++qz)
365 for (int i=0; i<3; ++i)
367 sop[i][tidx][tidy] = op3[i];
373 MFEM_FOREACH_THREAD(dz,z,D1Dz)
375 const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
377 MFEM_FOREACH_THREAD(dy,y,D1Dy)
379 MFEM_FOREACH_THREAD(dx,x,D1Dx)
381 for (int qy = 0; qy < Q1D; ++qy)
383 const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
385 for (int qx = 0; qx < Q1D; ++qx)
387 const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
388 dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
398 MFEM_FOREACH_THREAD(dz,z,D1Dz)
400 MFEM_FOREACH_THREAD(dy,y,D1Dy)
402 MFEM_FOREACH_THREAD(dx,x,D1Dx)
404 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
409 osc += D1Dx * D1Dy * D1Dz;
411 }); // end of element loop
414 void PAHcurlMassApply3D(const int D1D,
417 const bool symmetric,
418 const Array<double> &bo,
419 const Array<double> &bc,
420 const Array<double> &bot,
421 const Array<double> &bct,
422 const Vector &pa_data,
426 constexpr static int MAX_D1D = HCURL_MAX_D1D;
427 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
429 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
430 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
431 constexpr static int VDIM = 3;
433 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
434 auto Bc = Reshape(bc.Read(), Q1D, D1D);
435 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
436 auto Bct = Reshape(bct.Read(), D1D, Q1D);
437 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
438 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
439 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
443 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
445 for (int qz = 0; qz < Q1D; ++qz)
447 for (int qy = 0; qy < Q1D; ++qy)
449 for (int qx = 0; qx < Q1D; ++qx)
451 for (int c = 0; c < VDIM; ++c)
453 mass[qz][qy][qx][c] = 0.0;
461 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
463 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
464 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
465 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
467 for (int dz = 0; dz < D1Dz; ++dz)
469 double massXY[MAX_Q1D][MAX_Q1D];
470 for (int qy = 0; qy < Q1D; ++qy)
472 for (int qx = 0; qx < Q1D; ++qx)
474 massXY[qy][qx] = 0.0;
478 for (int dy = 0; dy < D1Dy; ++dy)
480 double massX[MAX_Q1D];
481 for (int qx = 0; qx < Q1D; ++qx)
486 for (int dx = 0; dx < D1Dx; ++dx)
488 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
489 for (int qx = 0; qx < Q1D; ++qx)
491 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
495 for (int qy = 0; qy < Q1D; ++qy)
497 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
498 for (int qx = 0; qx < Q1D; ++qx)
500 const double wx = massX[qx];
501 massXY[qy][qx] += wx * wy;
506 for (int qz = 0; qz < Q1D; ++qz)
508 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
509 for (int qy = 0; qy < Q1D; ++qy)
511 for (int qx = 0; qx < Q1D; ++qx)
513 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
519 osc += D1Dx * D1Dy * D1Dz;
520 } // loop (c) over components
523 for (int qz = 0; qz < Q1D; ++qz)
525 for (int qy = 0; qy < Q1D; ++qy)
527 for (int qx = 0; qx < Q1D; ++qx)
529 const double O11 = op(qx,qy,qz,0,e);
530 const double O12 = op(qx,qy,qz,1,e);
531 const double O13 = op(qx,qy,qz,2,e);
532 const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
533 const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
534 const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
535 const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
536 const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
537 const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
538 const double massX = mass[qz][qy][qx][0];
539 const double massY = mass[qz][qy][qx][1];
540 const double massZ = mass[qz][qy][qx][2];
541 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
542 mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
543 mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
548 for (int qz = 0; qz < Q1D; ++qz)
550 double massXY[MAX_D1D][MAX_D1D];
554 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
556 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
557 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
558 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
560 for (int dy = 0; dy < D1Dy; ++dy)
562 for (int dx = 0; dx < D1Dx; ++dx)
564 massXY[dy][dx] = 0.0;
567 for (int qy = 0; qy < Q1D; ++qy)
569 double massX[MAX_D1D];
570 for (int dx = 0; dx < D1Dx; ++dx)
574 for (int qx = 0; qx < Q1D; ++qx)
576 for (int dx = 0; dx < D1Dx; ++dx)
578 massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
581 for (int dy = 0; dy < D1Dy; ++dy)
583 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
584 for (int dx = 0; dx < D1Dx; ++dx)
586 massXY[dy][dx] += massX[dx] * wy;
591 for (int dz = 0; dz < D1Dz; ++dz)
593 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
594 for (int dy = 0; dy < D1Dy; ++dy)
596 for (int dx = 0; dx < D1Dx; ++dx)
598 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
603 osc += D1Dx * D1Dy * D1Dz;
606 }); // end of element loop
609 template<int T_D1D, int T_Q1D>
610 void SmemPAHcurlMassApply3D(const int D1D,
613 const bool symmetric,
614 const Array<double> &bo,
615 const Array<double> &bc,
616 const Array<double> &bot,
617 const Array<double> &bct,
618 const Vector &pa_data,
622 MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D
");
623 MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D
");
625 const int dataSize = symmetric ? 6 : 9;
627 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
628 auto Bc = Reshape(bc.Read(), Q1D, D1D);
629 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
630 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
631 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
633 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
635 constexpr int VDIM = 3;
636 constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
637 constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
639 MFEM_SHARED double sBo[tQ1D][tD1D];
640 MFEM_SHARED double sBc[tQ1D][tD1D];
643 MFEM_SHARED double sop[9*tQ1D*tQ1D];
644 MFEM_SHARED double mass[tQ1D][tQ1D][3];
646 MFEM_SHARED double sX[tD1D][tD1D][tD1D];
648 MFEM_FOREACH_THREAD(qx,x,Q1D)
650 MFEM_FOREACH_THREAD(qy,y,Q1D)
652 MFEM_FOREACH_THREAD(qz,z,Q1D)
654 for (int i=0; i<dataSize; ++i)
656 op9[i] = op(qx,qy,qz,i,e);
662 const int tidx = MFEM_THREAD_ID(x);
663 const int tidy = MFEM_THREAD_ID(y);
664 const int tidz = MFEM_THREAD_ID(z);
668 MFEM_FOREACH_THREAD(d,y,D1D)
670 MFEM_FOREACH_THREAD(q,x,Q1D)
682 for (int qz=0; qz < Q1D; ++qz)
685 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
687 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
688 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
689 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
691 MFEM_FOREACH_THREAD(dz,z,D1Dz)
693 MFEM_FOREACH_THREAD(dy,y,D1Dy)
695 MFEM_FOREACH_THREAD(dx,x,D1Dx)
697 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
705 for (int i=0; i<dataSize; ++i)
707 sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
710 MFEM_FOREACH_THREAD(qy,y,Q1D)
712 MFEM_FOREACH_THREAD(qx,x,Q1D)
716 for (int dz = 0; dz < D1Dz; ++dz)
718 const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
719 for (int dy = 0; dy < D1Dy; ++dy)
721 const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
722 for (int dx = 0; dx < D1Dx; ++dx)
724 const double t = sX[dz][dy][dx];
725 const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
726 u += t * wx * wy * wz;
736 osc += D1Dx * D1Dy * D1Dz;
740 MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
743 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
745 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
746 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
747 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
751 MFEM_FOREACH_THREAD(dz,z,D1Dz)
753 const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
755 MFEM_FOREACH_THREAD(dy,y,D1Dy)
757 MFEM_FOREACH_THREAD(dx,x,D1Dx)
759 for (int qy = 0; qy < Q1D; ++qy)
761 const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
762 for (int qx = 0; qx < Q1D; ++qx)
764 const int os = (dataSize*qx) + (dataSize*Q1D*qy);
765 const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
766 (symmetric ? 2 : 6))); // O11, O21, O31
767 const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
768 (symmetric ? 4 : 7))); // O12, O22, O32
769 const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
770 (symmetric ? 5 : 8))); // O13, O23, O33
772 const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
773 (sop[id3] * mass[qy][qx][2]);
775 const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
776 dxyz += m_c * wx * wy * wz;
785 MFEM_FOREACH_THREAD(dz,z,D1Dz)
787 MFEM_FOREACH_THREAD(dy,y,D1Dy)
789 MFEM_FOREACH_THREAD(dx,x,D1Dx)
791 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
796 osc += D1Dx * D1Dy * D1Dz;
799 }); // end of element loop
802 // PA H(curl) curl-curl assemble 2D kernel
803 static void PACurlCurlSetup2D(const int Q1D,
805 const Array<double> &w,
810 const int NQ = Q1D*Q1D;
812 auto J = Reshape(j.Read(), NQ, 2, 2, NE);
813 auto C = Reshape(coeff.Read(), NQ, NE);
814 auto y = Reshape(op.Write(), NQ, NE);
817 for (int q = 0; q < NQ; ++q)
819 const double J11 = J(q,0,0,e);
820 const double J21 = J(q,1,0,e);
821 const double J12 = J(q,0,1,e);
822 const double J22 = J(q,1,1,e);
823 const double detJ = (J11*J22)-(J21*J12);
824 y(q,e) = W[q] * C(q,e) / detJ;
829 // PA H(curl) curl-curl assemble 3D kernel
830 static void PACurlCurlSetup3D(const int Q1D,
833 const Array<double> &w,
838 const int NQ = Q1D*Q1D*Q1D;
839 const bool symmetric = (coeffDim != 9);
841 auto J = Reshape(j.Read(), NQ, 3, 3, NE);
842 auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
843 auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
847 for (int q = 0; q < NQ; ++q)
849 const double J11 = J(q,0,0,e);
850 const double J21 = J(q,1,0,e);
851 const double J31 = J(q,2,0,e);
852 const double J12 = J(q,0,1,e);
853 const double J22 = J(q,1,1,e);
854 const double J32 = J(q,2,1,e);
855 const double J13 = J(q,0,2,e);
856 const double J23 = J(q,1,2,e);
857 const double J33 = J(q,2,2,e);
858 const double detJ = J11 * (J22 * J33 - J32 * J23) -
859 /* */ J21 * (J12 * J33 - J32 * J13) +
860 /* */ J31 * (J12 * J23 - J22 * J13);
862 const double c_detJ = W[q] / detJ;
864 if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
866 // Set y to the 6 or 9 entries of J^T M J / det
867 const double M11 = C(0, q, e);
868 const double M12 = C(1, q, e);
869 const double M13 = C(2, q, e);
870 const double M21 = (!symmetric) ? C(3, q, e) : M12;
871 const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
872 const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
873 const double M31 = (!symmetric) ? C(6, q, e) : M13;
874 const double M32 = (!symmetric) ? C(7, q, e) : M23;
875 const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
877 // First compute R = MJ
878 const double R11 = M11*J11 + M12*J21 + M13*J31;
879 const double R12 = M11*J12 + M12*J22 + M13*J32;
880 const double R13 = M11*J13 + M12*J23 + M13*J33;
881 const double R21 = M21*J11 + M22*J21 + M23*J31;
882 const double R22 = M21*J12 + M22*J22 + M23*J32;
883 const double R23 = M21*J13 + M22*J23 + M23*J33;
884 const double R31 = M31*J11 + M32*J21 + M33*J31;
885 const double R32 = M31*J12 + M32*J22 + M33*J32;
886 const double R33 = M31*J13 + M32*J23 + M33*J33;
888 // Now set y to J^T R / det
889 y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
890 const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
891 y(q,1,e) = Y12; // 1,2
892 y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
894 const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
895 const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
896 const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
898 const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
900 y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
901 y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
902 y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
906 y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
907 y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
908 y(q,8,e) = Y33; // 3,3
911 else // Vector or scalar coefficient version
913 // Set y to the 6 entries of J^T D J / det^2
914 const double D1 = C(0, q, e);
915 const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
916 const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
918 y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
919 y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
920 y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
921 y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
922 y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
923 y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
929 // PA H(curl)-L2 assemble 2D kernel
930 static void PACurlL2Setup2D(const int Q1D,
932 const Array<double> &w,
936 const int NQ = Q1D*Q1D;
938 auto C = Reshape(coeff.Read(), NQ, NE);
939 auto y = Reshape(op.Write(), NQ, NE);
942 for (int q = 0; q < NQ; ++q)
944 y(q,e) = W[q] * C(q,e);
949 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
951 // Assumes tensor-product elements
952 Mesh *mesh = fes.GetMesh();
953 const FiniteElement *fel = fes.GetFE(0);
955 const VectorTensorFiniteElement *el =
956 dynamic_cast<const VectorTensorFiniteElement*>(fel);
959 const IntegrationRule *ir
960 = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
961 *mesh->GetElementTransformation(0));
963 const int dims = el->GetDim();
964 MFEM_VERIFY(dims == 2 || dims == 3, "");
966 nq = ir->GetNPoints();
967 dim = mesh->Dimension();
968 MFEM_VERIFY(dim == 2 || dim == 3, "");
970 const int dimc = (dim == 3) ? 3 : 1;
973 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
974 mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
975 mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
976 dofs1D = mapsC->ndof;
977 quad1D = mapsC->nqpt;
979 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
981 const int MQsymmDim = SMQ ? (SMQ->GetSize() * (SMQ->GetSize() + 1)) / 2 : 0;
982 const int MQfullDim = MQ ? (MQ->GetHeight() * MQ->GetWidth()) : 0;
983 const int MQdim = MQ ? MQfullDim : MQsymmDim;
984 const int coeffDim = (MQ || SMQ) ? MQdim : (DQ ? DQ->GetVDim() : 1);
986 symmetric = (MQ == NULL);
988 const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
989 const int ndata = (dim == 2) ? 1 : (symmetric ? symmDims : MQfullDim);
990 pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
992 Vector coeff(coeffDim * ne * nq);
994 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
995 if (Q || DQ || MQ || SMQ)
997 Vector DM(DQ ? coeffDim : 0);
999 DenseSymmetricMatrix SM;
1003 MFEM_VERIFY(coeffDim == dimc, "");
1008 MFEM_VERIFY(coeffDim == MQdim, "");
1009 MFEM_VERIFY(MQ->GetHeight() == dimc && MQ->GetWidth() == dimc, "");
1014 MFEM_VERIFY(SMQ->GetSize() == dimc, "");
1017 for (int e=0; e<ne; ++e)
1019 ElementTransformation *tr = mesh->GetElementTransformation(e);
1020 for (int p=0; p<nq; ++p)
1024 MQ->Eval(GM, *tr, ir->IntPoint(p));
1026 for (int i=0; i<dimc; ++i)
1027 for (int j=0; j<dimc; ++j)
1029 coeffh(j+(i*dimc), p, e) = GM(i,j);
1035 SMQ->Eval(SM, *tr, ir->IntPoint(p));
1038 for (int i=0; i<dimc; ++i)
1039 for (int j=i; j<dimc; ++j, ++cnt)
1041 coeffh(cnt, p, e) = SM(i,j);
1047 DQ->Eval(DM, *tr, ir->IntPoint(p));
1048 for (int i=0; i<coeffDim; ++i)
1050 coeffh(i, p, e) = DM[i];
1055 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
1061 if (el->GetDerivType() != mfem::FiniteElement::CURL)
1063 MFEM_ABORT("Unknown kernel.
");
1068 PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
1073 PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1077 static void PACurlCurlApply2D(const int D1D,
1080 const Array<double> &bo,
1081 const Array<double> &bot,
1082 const Array<double> &gc,
1083 const Array<double> &gct,
1084 const Vector &pa_data,
1088 constexpr static int VDIM = 2;
1089 constexpr static int MAX_D1D = HCURL_MAX_D1D;
1090 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1092 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1093 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1094 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1095 auto Gct = Reshape(gct.Read(), D1D, Q1D);
1096 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1097 auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1098 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1102 double curl[MAX_Q1D][MAX_Q1D];
1104 // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1106 for (int qy = 0; qy < Q1D; ++qy)
1108 for (int qx = 0; qx < Q1D; ++qx)
1116 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1118 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1119 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1121 for (int dy = 0; dy < D1Dy; ++dy)
1123 double gradX[MAX_Q1D];
1124 for (int qx = 0; qx < Q1D; ++qx)
1129 for (int dx = 0; dx < D1Dx; ++dx)
1131 const double t = X(dx + (dy * D1Dx) + osc, e);
1132 for (int qx = 0; qx < Q1D; ++qx)
1134 gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1138 for (int qy = 0; qy < Q1D; ++qy)
1140 const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1141 for (int qx = 0; qx < Q1D; ++qx)
1143 curl[qy][qx] += gradX[qx] * wy;
1149 } // loop (c) over components
1151 // Apply D operator.
1152 for (int qy = 0; qy < Q1D; ++qy)
1154 for (int qx = 0; qx < Q1D; ++qx)
1156 curl[qy][qx] *= op(qx,qy,e);
1160 for (int qy = 0; qy < Q1D; ++qy)
1164 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1166 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1167 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1169 double gradX[MAX_D1D];
1170 for (int dx = 0; dx < D1Dx; ++dx)
1174 for (int qx = 0; qx < Q1D; ++qx)
1176 for (int dx = 0; dx < D1Dx; ++dx)
1178 gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1181 for (int dy = 0; dy < D1Dy; ++dy)
1183 const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1185 for (int dx = 0; dx < D1Dx; ++dx)
1187 Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1194 }); // end of element loop
1197 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1198 static void PACurlCurlApply3D(const int D1D,
1200 const bool symmetric,
1202 const Array<double> &bo,
1203 const Array<double> &bc,
1204 const Array<double> &bot,
1205 const Array<double> &bct,
1206 const Array<double> &gc,
1207 const Array<double> &gct,
1208 const Vector &pa_data,
1212 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
1213 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
1214 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1215 // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1216 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1217 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1218 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1220 constexpr static int VDIM = 3;
1222 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1223 auto Bc = Reshape(bc.Read(), Q1D, D1D);
1224 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1225 auto Bct = Reshape(bct.Read(), D1D, Q1D);
1226 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1227 auto Gct = Reshape(gct.Read(), D1D, Q1D);
1228 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1229 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1230 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1234 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1235 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1237 for (int qz = 0; qz < Q1D; ++qz)
1239 for (int qy = 0; qy < Q1D; ++qy)
1241 for (int qx = 0; qx < Q1D; ++qx)
1243 for (int c = 0; c < VDIM; ++c)
1245 curl[qz][qy][qx][c] = 0.0;
1251 // We treat x, y, z components separately for optimization specific to each.
1257 const int D1Dz = D1D;
1258 const int D1Dy = D1D;
1259 const int D1Dx = D1D - 1;
1261 for (int dz = 0; dz < D1Dz; ++dz)
1263 double gradXY[MAX_Q1D][MAX_Q1D][2];
1264 for (int qy = 0; qy < Q1D; ++qy)
1266 for (int qx = 0; qx < Q1D; ++qx)
1268 for (int d = 0; d < 2; ++d)
1270 gradXY[qy][qx][d] = 0.0;
1275 for (int dy = 0; dy < D1Dy; ++dy)
1277 double massX[MAX_Q1D];
1278 for (int qx = 0; qx < Q1D; ++qx)
1283 for (int dx = 0; dx < D1Dx; ++dx)
1285 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1286 for (int qx = 0; qx < Q1D; ++qx)
1288 massX[qx] += t * Bo(qx,dx);
1292 for (int qy = 0; qy < Q1D; ++qy)
1294 const double wy = Bc(qy,dy);
1295 const double wDy = Gc(qy,dy);
1296 for (int qx = 0; qx < Q1D; ++qx)
1298 const double wx = massX[qx];
1299 gradXY[qy][qx][0] += wx * wDy;
1300 gradXY[qy][qx][1] += wx * wy;
1305 for (int qz = 0; qz < Q1D; ++qz)
1307 const double wz = Bc(qz,dz);
1308 const double wDz = Gc(qz,dz);
1309 for (int qy = 0; qy < Q1D; ++qy)
1311 for (int qx = 0; qx < Q1D; ++qx)
1313 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1314 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1315 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1321 osc += D1Dx * D1Dy * D1Dz;
1326 const int D1Dz = D1D;
1327 const int D1Dy = D1D - 1;
1328 const int D1Dx = D1D;
1330 for (int dz = 0; dz < D1Dz; ++dz)
1332 double gradXY[MAX_Q1D][MAX_Q1D][2];
1333 for (int qy = 0; qy < Q1D; ++qy)
1335 for (int qx = 0; qx < Q1D; ++qx)
1337 for (int d = 0; d < 2; ++d)
1339 gradXY[qy][qx][d] = 0.0;
1344 for (int dx = 0; dx < D1Dx; ++dx)
1346 double massY[MAX_Q1D];
1347 for (int qy = 0; qy < Q1D; ++qy)
1352 for (int dy = 0; dy < D1Dy; ++dy)
1354 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1355 for (int qy = 0; qy < Q1D; ++qy)
1357 massY[qy] += t * Bo(qy,dy);
1361 for (int qx = 0; qx < Q1D; ++qx)
1363 const double wx = Bc(qx,dx);
1364 const double wDx = Gc(qx,dx);
1365 for (int qy = 0; qy < Q1D; ++qy)
1367 const double wy = massY[qy];
1368 gradXY[qy][qx][0] += wDx * wy;
1369 gradXY[qy][qx][1] += wx * wy;
1374 for (int qz = 0; qz < Q1D; ++qz)
1376 const double wz = Bc(qz,dz);
1377 const double wDz = Gc(qz,dz);
1378 for (int qy = 0; qy < Q1D; ++qy)
1380 for (int qx = 0; qx < Q1D; ++qx)
1382 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1383 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1384 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1390 osc += D1Dx * D1Dy * D1Dz;
1395 const int D1Dz = D1D - 1;
1396 const int D1Dy = D1D;
1397 const int D1Dx = D1D;
1399 for (int dx = 0; dx < D1Dx; ++dx)
1401 double gradYZ[MAX_Q1D][MAX_Q1D][2];
1402 for (int qz = 0; qz < Q1D; ++qz)
1404 for (int qy = 0; qy < Q1D; ++qy)
1406 for (int d = 0; d < 2; ++d)
1408 gradYZ[qz][qy][d] = 0.0;
1413 for (int dy = 0; dy < D1Dy; ++dy)
1415 double massZ[MAX_Q1D];
1416 for (int qz = 0; qz < Q1D; ++qz)
1421 for (int dz = 0; dz < D1Dz; ++dz)
1423 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1424 for (int qz = 0; qz < Q1D; ++qz)
1426 massZ[qz] += t * Bo(qz,dz);
1430 for (int qy = 0; qy < Q1D; ++qy)
1432 const double wy = Bc(qy,dy);
1433 const double wDy = Gc(qy,dy);
1434 for (int qz = 0; qz < Q1D; ++qz)
1436 const double wz = massZ[qz];
1437 gradYZ[qz][qy][0] += wz * wy;
1438 gradYZ[qz][qy][1] += wz * wDy;
1443 for (int qx = 0; qx < Q1D; ++qx)
1445 const double wx = Bc(qx,dx);
1446 const double wDx = Gc(qx,dx);
1448 for (int qy = 0; qy < Q1D; ++qy)
1450 for (int qz = 0; qz < Q1D; ++qz)
1452 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1453 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1454 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1461 // Apply D operator.
1462 for (int qz = 0; qz < Q1D; ++qz)
1464 for (int qy = 0; qy < Q1D; ++qy)
1466 for (int qx = 0; qx < Q1D; ++qx)
1468 const double O11 = op(qx,qy,qz,0,e);
1469 const double O12 = op(qx,qy,qz,1,e);
1470 const double O13 = op(qx,qy,qz,2,e);
1471 const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1472 const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1473 const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1474 const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1475 const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1476 const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1478 const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1479 (O13 * curl[qz][qy][qx][2]);
1480 const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1481 (O23 * curl[qz][qy][qx][2]);
1482 const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1483 (O33 * curl[qz][qy][qx][2]);
1485 curl[qz][qy][qx][0] = c1;
1486 curl[qz][qy][qx][1] = c2;
1487 curl[qz][qy][qx][2] = c3;
1495 const int D1Dz = D1D;
1496 const int D1Dy = D1D;
1497 const int D1Dx = D1D - 1;
1499 for (int qz = 0; qz < Q1D; ++qz)
1501 double gradXY12[MAX_D1D][MAX_D1D];
1502 double gradXY21[MAX_D1D][MAX_D1D];
1504 for (int dy = 0; dy < D1Dy; ++dy)
1506 for (int dx = 0; dx < D1Dx; ++dx)
1508 gradXY12[dy][dx] = 0.0;
1509 gradXY21[dy][dx] = 0.0;
1512 for (int qy = 0; qy < Q1D; ++qy)
1514 double massX[MAX_D1D][2];
1515 for (int dx = 0; dx < D1Dx; ++dx)
1517 for (int n = 0; n < 2; ++n)
1522 for (int qx = 0; qx < Q1D; ++qx)
1524 for (int dx = 0; dx < D1Dx; ++dx)
1526 const double wx = Bot(dx,qx);
1528 massX[dx][0] += wx * curl[qz][qy][qx][1];
1529 massX[dx][1] += wx * curl[qz][qy][qx][2];
1532 for (int dy = 0; dy < D1Dy; ++dy)
1534 const double wy = Bct(dy,qy);
1535 const double wDy = Gct(dy,qy);
1537 for (int dx = 0; dx < D1Dx; ++dx)
1539 gradXY21[dy][dx] += massX[dx][0] * wy;
1540 gradXY12[dy][dx] += massX[dx][1] * wDy;
1545 for (int dz = 0; dz < D1Dz; ++dz)
1547 const double wz = Bct(dz,qz);
1548 const double wDz = Gct(dz,qz);
1549 for (int dy = 0; dy < D1Dy; ++dy)
1551 for (int dx = 0; dx < D1Dx; ++dx)
1553 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1554 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1555 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1556 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1562 osc += D1Dx * D1Dy * D1Dz;
1567 const int D1Dz = D1D;
1568 const int D1Dy = D1D - 1;
1569 const int D1Dx = D1D;
1571 for (int qz = 0; qz < Q1D; ++qz)
1573 double gradXY02[MAX_D1D][MAX_D1D];
1574 double gradXY20[MAX_D1D][MAX_D1D];
1576 for (int dy = 0; dy < D1Dy; ++dy)
1578 for (int dx = 0; dx < D1Dx; ++dx)
1580 gradXY02[dy][dx] = 0.0;
1581 gradXY20[dy][dx] = 0.0;
1584 for (int qx = 0; qx < Q1D; ++qx)
1586 double massY[MAX_D1D][2];
1587 for (int dy = 0; dy < D1Dy; ++dy)
1592 for (int qy = 0; qy < Q1D; ++qy)
1594 for (int dy = 0; dy < D1Dy; ++dy)
1596 const double wy = Bot(dy,qy);
1598 massY[dy][0] += wy * curl[qz][qy][qx][2];
1599 massY[dy][1] += wy * curl[qz][qy][qx][0];
1602 for (int dx = 0; dx < D1Dx; ++dx)
1604 const double wx = Bct(dx,qx);
1605 const double wDx = Gct(dx,qx);
1607 for (int dy = 0; dy < D1Dy; ++dy)
1609 gradXY02[dy][dx] += massY[dy][0] * wDx;
1610 gradXY20[dy][dx] += massY[dy][1] * wx;
1615 for (int dz = 0; dz < D1Dz; ++dz)
1617 const double wz = Bct(dz,qz);
1618 const double wDz = Gct(dz,qz);
1619 for (int dy = 0; dy < D1Dy; ++dy)
1621 for (int dx = 0; dx < D1Dx; ++dx)
1623 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1624 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1625 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1626 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1632 osc += D1Dx * D1Dy * D1Dz;
1637 const int D1Dz = D1D - 1;
1638 const int D1Dy = D1D;
1639 const int D1Dx = D1D;
1641 for (int qx = 0; qx < Q1D; ++qx)
1643 double gradYZ01[MAX_D1D][MAX_D1D];
1644 double gradYZ10[MAX_D1D][MAX_D1D];
1646 for (int dy = 0; dy < D1Dy; ++dy)
1648 for (int dz = 0; dz < D1Dz; ++dz)
1650 gradYZ01[dz][dy] = 0.0;
1651 gradYZ10[dz][dy] = 0.0;
1654 for (int qy = 0; qy < Q1D; ++qy)
1656 double massZ[MAX_D1D][2];
1657 for (int dz = 0; dz < D1Dz; ++dz)
1659 for (int n = 0; n < 2; ++n)
1664 for (int qz = 0; qz < Q1D; ++qz)
1666 for (int dz = 0; dz < D1Dz; ++dz)
1668 const double wz = Bot(dz,qz);
1670 massZ[dz][0] += wz * curl[qz][qy][qx][0];
1671 massZ[dz][1] += wz * curl[qz][qy][qx][1];
1674 for (int dy = 0; dy < D1Dy; ++dy)
1676 const double wy = Bct(dy,qy);
1677 const double wDy = Gct(dy,qy);
1679 for (int dz = 0; dz < D1Dz; ++dz)
1681 gradYZ01[dz][dy] += wy * massZ[dz][1];
1682 gradYZ10[dz][dy] += wDy * massZ[dz][0];
1687 for (int dx = 0; dx < D1Dx; ++dx)
1689 const double wx = Bct(dx,qx);
1690 const double wDx = Gct(dx,qx);
1692 for (int dy = 0; dy < D1Dy; ++dy)
1694 for (int dz = 0; dz < D1Dz; ++dz)
1696 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1697 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1698 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1699 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1705 }); // end of element loop
1708 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1709 static void SmemPACurlCurlApply3D(const int D1D,
1711 const bool symmetric,
1713 const Array<double> &bo,
1714 const Array<double> &bc,
1715 const Array<double> &bot,
1716 const Array<double> &bct,
1717 const Array<double> &gc,
1718 const Array<double> &gct,
1719 const Vector &pa_data,
1723 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
1724 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
1725 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1726 // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1727 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1728 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1729 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1731 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1732 auto Bc = Reshape(bc.Read(), Q1D, D1D);
1733 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1734 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1735 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1736 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1738 const int s = symmetric ? 6 : 9;
1740 auto device_kernel = [=] MFEM_DEVICE (int e)
1742 constexpr int VDIM = 3;
1744 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1745 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1746 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1749 MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1750 MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1752 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1754 MFEM_FOREACH_THREAD(qx,x,Q1D)
1756 MFEM_FOREACH_THREAD(qy,y,Q1D)
1758 MFEM_FOREACH_THREAD(qz,z,Q1D)
1760 for (int i=0; i<s; ++i)
1762 ope[i] = op(qx,qy,qz,i,e);
1768 const int tidx = MFEM_THREAD_ID(x);
1769 const int tidy = MFEM_THREAD_ID(y);
1770 const int tidz = MFEM_THREAD_ID(z);
1774 MFEM_FOREACH_THREAD(d,y,D1D)
1776 MFEM_FOREACH_THREAD(q,x,Q1D)
1778 sBc[d][q] = Bc(q,d);
1779 sGc[d][q] = Gc(q,d);
1782 sBo[d][q] = Bo(q,d);
1789 for (int qz=0; qz < Q1D; ++qz)
1793 MFEM_FOREACH_THREAD(qy,y,Q1D)
1795 MFEM_FOREACH_THREAD(qx,x,Q1D)
1797 for (int i=0; i<3; ++i)
1799 curl[qy][qx][i] = 0.0;
1806 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1808 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1809 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1810 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1812 MFEM_FOREACH_THREAD(dz,z,D1Dz)
1814 MFEM_FOREACH_THREAD(dy,y,D1Dy)
1816 MFEM_FOREACH_THREAD(dx,x,D1Dx)
1818 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1828 for (int i=0; i<s; ++i)
1830 sop[i][tidx][tidy] = ope[i];
1834 MFEM_FOREACH_THREAD(qy,y,Q1D)
1836 MFEM_FOREACH_THREAD(qx,x,Q1D)
1841 // We treat x, y, z components separately for optimization specific to each.
1842 if (c == 0) // x component
1844 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1846 for (int dz = 0; dz < D1Dz; ++dz)
1848 const double wz = sBc[dz][qz];
1849 const double wDz = sGc[dz][qz];
1851 for (int dy = 0; dy < D1Dy; ++dy)
1853 const double wy = sBc[dy][qy];
1854 const double wDy = sGc[dy][qy];
1856 for (int dx = 0; dx < D1Dx; ++dx)
1858 const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1865 curl[qy][qx][1] += v; // (u_0)_{x_2}
1866 curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1868 else if (c == 1) // y component
1870 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1872 for (int dz = 0; dz < D1Dz; ++dz)
1874 const double wz = sBc[dz][qz];
1875 const double wDz = sGc[dz][qz];
1877 for (int dy = 0; dy < D1Dy; ++dy)
1879 const double wy = sBo[dy][qy];
1881 for (int dx = 0; dx < D1Dx; ++dx)
1883 const double t = sX[dz][dy][dx];
1884 const double wx = t * sBc[dx][qx];
1885 const double wDx = t * sGc[dx][qx];
1893 curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1894 curl[qy][qx][2] += u; // (u_1)_{x_0}
1898 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1900 for (int dz = 0; dz < D1Dz; ++dz)
1902 const double wz = sBo[dz][qz];
1904 for (int dy = 0; dy < D1Dy; ++dy)
1906 const double wy = sBc[dy][qy];
1907 const double wDy = sGc[dy][qy];
1909 for (int dx = 0; dx < D1Dx; ++dx)
1911 const double t = sX[dz][dy][dx];
1912 const double wx = t * sBc[dx][qx];
1913 const double wDx = t * sGc[dx][qx];
1921 curl[qy][qx][0] += v; // (u_2)_{x_1}
1922 curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1928 osc += D1Dx * D1Dy * D1Dz;
1936 MFEM_FOREACH_THREAD(dz,z,D1D)
1938 const double wcz = sBc[dz][qz];
1939 const double wcDz = sGc[dz][qz];
1940 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1942 MFEM_FOREACH_THREAD(dy,y,D1D)
1944 MFEM_FOREACH_THREAD(dx,x,D1D)
1946 for (int qy = 0; qy < Q1D; ++qy)
1948 const double wcy = sBc[dy][qy];
1949 const double wcDy = sGc[dy][qy];
1950 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1952 for (int qx = 0; qx < Q1D; ++qx)
1954 const double O11 = sop[0][qx][qy];
1955 const double O12 = sop[1][qx][qy];
1956 const double O13 = sop[2][qx][qy];
1957 const double O21 = symmetric ? O12 : sop[3][qx][qy];
1958 const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1959 const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1960 const double O31 = symmetric ? O13 : sop[6][qx][qy];
1961 const double O32 = symmetric ? O23 : sop[7][qx][qy];
1962 const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1964 const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1965 (O13 * curl[qy][qx][2]);
1966 const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1967 (O23 * curl[qy][qx][2]);
1968 const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1969 (O33 * curl[qy][qx][2]);
1971 const double wcx = sBc[dx][qx];
1972 const double wDx = sGc[dx][qx];
1976 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1977 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1978 const double wx = sBo[dx][qx];
1979 dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1982 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1983 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1984 dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1986 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1987 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1988 dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1997 MFEM_FOREACH_THREAD(dz,z,D1D)
1999 MFEM_FOREACH_THREAD(dy,y,D1D)
2001 MFEM_FOREACH_THREAD(dx,x,D1D)
2005 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
2009 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
2013 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
2019 }; // end of element loop
2021 auto host_kernel = [&] MFEM_LAMBDA (int)
2023 MFEM_ABORT_KERNEL("This kernel should only be used on GPU.
");
2026 ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
2029 static void PACurlL2Apply2D(const int D1D,
2033 const Array<double> &bo,
2034 const Array<double> &bot,
2035 const Array<double> &bt,
2036 const Array<double> &gc,
2037 const Vector &pa_data,
2038 const Vector &x, // trial = H(curl)
2039 Vector &y) // test = L2 or H1
2041 constexpr static int VDIM = 2;
2042 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2043 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2044 const int H1 = (D1Dtest == D1D);
2046 MFEM_VERIFY(y.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension
");
2048 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2049 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2050 auto Bt = Reshape(bt.Read(), D1D, Q1D);
2051 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2052 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2053 auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
2054 auto Y = Reshape(y.ReadWrite(), D1Dtest, D1Dtest, NE);
2058 double curl[MAX_Q1D][MAX_Q1D];
2060 // curl[qy][qx] will be computed as du_y/dx - du_x/dy
2062 for (int qy = 0; qy < Q1D; ++qy)
2064 for (int qx = 0; qx < Q1D; ++qx)
2072 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2074 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2075 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2077 for (int dy = 0; dy < D1Dy; ++dy)
2079 double gradX[MAX_Q1D];
2080 for (int qx = 0; qx < Q1D; ++qx)
2085 for (int dx = 0; dx < D1Dx; ++dx)
2087 const double t = X(dx + (dy * D1Dx) + osc, e);
2088 for (int qx = 0; qx < Q1D; ++qx)
2090 gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2094 for (int qy = 0; qy < Q1D; ++qy)
2096 const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
2097 for (int qx = 0; qx < Q1D; ++qx)
2099 curl[qy][qx] += gradX[qx] * wy;
2105 } // loop (c) over components
2107 // Apply D operator.
2108 for (int qy = 0; qy < Q1D; ++qy)
2110 for (int qx = 0; qx < Q1D; ++qx)
2112 curl[qy][qx] *= op(qx,qy,e);
2116 for (int qy = 0; qy < Q1D; ++qy)
2118 double sol_x[MAX_D1D];
2119 for (int dx = 0; dx < D1Dtest; ++dx)
2123 for (int qx = 0; qx < Q1D; ++qx)
2125 const double s = curl[qy][qx];
2126 for (int dx = 0; dx < D1Dtest; ++dx)
2128 sol_x[dx] += s * ((H1 == 1) ? Bt(dx,qx) : Bot(dx,qx));
2131 for (int dy = 0; dy < D1Dtest; ++dy)
2133 const double wy = (H1 == 1) ? Bt(dy,qy) : Bot(dy,qy);
2135 for (int dx = 0; dx < D1Dtest; ++dx)
2137 Y(dx,dy,e) += sol_x[dx] * wy;
2141 }); // end of element loop
2144 static void PACurlL2ApplyTranspose2D(const int D1D,
2148 const Array<double> &bo,
2149 const Array<double> &bot,
2150 const Array<double> &b,
2151 const Array<double> &gct,
2152 const Vector &pa_data,
2153 const Vector &x, // trial = H(curl)
2154 Vector &y) // test = L2 or H1
2156 constexpr static int VDIM = 2;
2157 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2158 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2159 const int H1 = (D1Dtest == D1D);
2161 MFEM_VERIFY(x.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension
");
2163 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2164 auto B = Reshape(b.Read(), Q1D, D1D);
2165 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2166 auto Gct = Reshape(gct.Read(), D1D, Q1D);
2167 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2168 auto X = Reshape(x.Read(), D1Dtest, D1Dtest, NE);
2169 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2173 double mass[MAX_Q1D][MAX_Q1D];
2175 // Zero-order term in L2 or H1 test space
2177 for (int qy = 0; qy < Q1D; ++qy)
2179 for (int qx = 0; qx < Q1D; ++qx)
2185 for (int dy = 0; dy < D1Dtest; ++dy)
2187 double sol_x[MAX_Q1D];
2188 for (int qy = 0; qy < Q1D; ++qy)
2192 for (int dx = 0; dx < D1Dtest; ++dx)
2194 const double s = X(dx,dy,e);
2195 for (int qx = 0; qx < Q1D; ++qx)
2197 sol_x[qx] += s * ((H1 == 1) ? B(qx,dx) : Bo(qx,dx));
2200 for (int qy = 0; qy < Q1D; ++qy)
2202 const double d2q = (H1 == 1) ? B(qy,dy) : Bo(qy,dy);
2203 for (int qx = 0; qx < Q1D; ++qx)
2205 mass[qy][qx] += d2q * sol_x[qx];
2210 // Apply D operator.
2211 for (int qy = 0; qy < Q1D; ++qy)
2213 for (int qx = 0; qx < Q1D; ++qx)
2215 mass[qy][qx] *= op(qx,qy,e);
2219 for (int qy = 0; qy < Q1D; ++qy)
2223 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2225 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2226 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2228 double gradX[MAX_D1D];
2229 for (int dx = 0; dx < D1Dx; ++dx)
2233 for (int qx = 0; qx < Q1D; ++qx)
2235 for (int dx = 0; dx < D1Dx; ++dx)
2237 gradX[dx] += mass[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
2240 for (int dy = 0; dy < D1Dy; ++dy)
2242 const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
2244 for (int dx = 0; dx < D1Dx; ++dx)
2246 Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
2253 }); // end of element loop
2256 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
2260 if (Device::Allows(Backend::DEVICE_MASK))
2262 const int ID = (dofs1D << 4) | quad1D;
2265 case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2266 mapsO->B, mapsC->B, mapsO->Bt,
2267 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2268 case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2269 mapsO->B, mapsC->B, mapsO->Bt,
2270 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2271 case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2273 mapsC->B, mapsO->Bt,
2274 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2275 case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2276 mapsO->B, mapsC->B, mapsO->Bt,
2277 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2278 default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2279 mapsC->B, mapsO->Bt, mapsC->Bt,
2280 mapsC->G, mapsC->Gt, pa_data, x, y);
2284 PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2285 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2289 PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2290 mapsC->G, mapsC->Gt, pa_data, x, y);
2294 MFEM_ABORT("Unsupported dimension!
");
2298 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2301 const Array<double> &bo,
2302 const Array<double> &gc,
2303 const Vector &pa_data,
2306 constexpr static int VDIM = 2;
2307 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2309 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2310 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2311 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2312 auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2318 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2320 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2321 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2325 for (int dy = 0; dy < D1Dy; ++dy)
2327 for (int qx = 0; qx < Q1D; ++qx)
2330 for (int qy = 0; qy < Q1D; ++qy)
2332 const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2333 t[qx] += wy * wy * op(qx,qy,e);
2337 for (int dx = 0; dx < D1Dx; ++dx)
2339 for (int qx = 0; qx < Q1D; ++qx)
2341 const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2342 D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2349 }); // end of element loop
2352 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2353 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2355 const bool symmetric,
2357 const Array<double> &bo,
2358 const Array<double> &bc,
2359 const Array<double> &go,
2360 const Array<double> &gc,
2361 const Vector &pa_data,
2364 constexpr static int VDIM = 3;
2365 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2366 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2368 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2369 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2370 auto Go = Reshape(go.Read(), Q1D, D1D-1);
2371 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2372 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2373 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2375 const int s = symmetric ? 6 : 9;
2379 const int i21 = symmetric ? i12 : 3;
2380 const int i22 = symmetric ? 3 : 4;
2381 const int i23 = symmetric ? 4 : 5;
2382 const int i31 = symmetric ? i13 : 6;
2383 const int i32 = symmetric ? i23 : 7;
2384 const int i33 = symmetric ? 5 : 8;
2388 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2389 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2390 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2391 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2392 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2394 // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2395 // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2399 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2401 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2402 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2403 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2405 double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2408 for (int qx = 0; qx < Q1D; ++qx)
2410 for (int qy = 0; qy < Q1D; ++qy)
2412 for (int dz = 0; dz < D1Dz; ++dz)
2414 for (int i=0; i<s; ++i)
2416 for (int d=0; d<3; ++d)
2418 zt[qx][qy][dz][i][d] = 0.0;
2422 for (int qz = 0; qz < Q1D; ++qz)
2424 const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2425 const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2427 for (int i=0; i<s; ++i)
2429 zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2430 zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2431 zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2436 } // end of z contraction
2438 double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2441 for (int qx = 0; qx < Q1D; ++qx)
2443 for (int dz = 0; dz < D1Dz; ++dz)
2445 for (int dy = 0; dy < D1Dy; ++dy)
2447 for (int i=0; i<s; ++i)
2449 for (int d=0; d<3; ++d)
2450 for (int j=0; j<3; ++j)
2452 yt[qx][dy][dz][i][d][j] = 0.0;
2456 for (int qy = 0; qy < Q1D; ++qy)
2458 const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2459 const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2461 for (int i=0; i<s; ++i)
2463 for (int d=0; d<3; ++d)
2465 yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2466 yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2467 yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2473 } // end of y contraction
2476 for (int dz = 0; dz < D1Dz; ++dz)
2478 for (int dy = 0; dy < D1Dy; ++dy)
2480 for (int dx = 0; dx < D1Dx; ++dx)
2482 for (int qx = 0; qx < Q1D; ++qx)
2484 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2485 const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2487 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2488 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2489 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2490 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2491 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2494 const double O11 = op(q,0,e);
2495 const double O12 = op(q,1,e);
2496 const double O13 = op(q,2,e);
2497 const double O22 = op(q,3,e);
2498 const double O23 = op(q,4,e);
2499 const double O33 = op(q,5,e);
2504 // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2505 const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2506 - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2508 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2512 // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2513 const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2514 - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2515 + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2517 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2521 // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2522 const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2523 - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2524 + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2526 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2531 } // end of x contraction
2533 osc += D1Dx * D1Dy * D1Dz;
2535 }); // end of element loop
2538 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2539 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2541 const bool symmetric,
2543 const Array<double> &bo,
2544 const Array<double> &bc,
2545 const Array<double> &go,
2546 const Array<double> &gc,
2547 const Vector &pa_data,
2550 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2551 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2553 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2554 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2555 auto Go = Reshape(go.Read(), Q1D, D1D-1);
2556 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2557 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2558 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2560 const int s = symmetric ? 6 : 9;
2564 const int i21 = symmetric ? i12 : 3;
2565 const int i22 = symmetric ? 3 : 4;
2566 const int i23 = symmetric ? 4 : 5;
2567 const int i31 = symmetric ? i13 : 6;
2568 const int i32 = symmetric ? i23 : 7;
2569 const int i33 = symmetric ? 5 : 8;
2571 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2573 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2574 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2575 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2576 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2577 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2579 constexpr int VDIM = 3;
2581 MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2582 MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2583 MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2584 MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2587 MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2589 MFEM_FOREACH_THREAD(qx,x,Q1D)
2591 MFEM_FOREACH_THREAD(qy,y,Q1D)
2593 MFEM_FOREACH_THREAD(qz,z,Q1D)
2595 for (int i=0; i<s; ++i)
2597 ope[i] = op(qx,qy,qz,i,e);
2603 const int tidx = MFEM_THREAD_ID(x);
2604 const int tidy = MFEM_THREAD_ID(y);
2605 const int tidz = MFEM_THREAD_ID(z);
2609 MFEM_FOREACH_THREAD(d,y,D1D)
2611 MFEM_FOREACH_THREAD(q,x,Q1D)
2613 sBc[q][d] = Bc(q,d);
2614 sGc[q][d] = Gc(q,d);
2617 sBo[q][d] = Bo(q,d);
2618 sGo[q][d] = Go(q,d);
2626 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2628 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2629 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2630 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2634 for (int qz=0; qz < Q1D; ++qz)
2638 for (int i=0; i<s; ++i)
2640 sop[i][tidx][tidy] = ope[i];
2646 MFEM_FOREACH_THREAD(dz,z,D1Dz)
2648 const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2649 const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2651 MFEM_FOREACH_THREAD(dy,y,D1Dy)
2653 MFEM_FOREACH_THREAD(dx,x,D1Dx)
2655 for (int qy = 0; qy < Q1D; ++qy)
2657 const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2658 const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2660 for (int qx = 0; qx < Q1D; ++qx)
2662 const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2663 const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2667 // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2669 // (u_0)_{x_2} O22 (u_0)_{x_2}
2670 dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2672 // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2673 dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2675 // (u_0)_{x_1} O33 (u_0)_{x_1}
2676 dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2680 // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2682 // (u_1)_{x_2} O11 (u_1)_{x_2}
2683 dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2685 // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2686 dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2688 // (u_1)_{x_0} O33 (u_1)_{x_0})
2689 dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2693 // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2695 // (u_2)_{x_1} O11 (u_2)_{x_1}
2696 dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2698 // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2699 dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2701 // (u_2)_{x_0} O22 (u_2)_{x_0}
2702 dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2713 MFEM_FOREACH_THREAD(dz,z,D1Dz)
2715 MFEM_FOREACH_THREAD(dy,y,D1Dy)
2717 MFEM_FOREACH_THREAD(dx,x,D1Dx)
2719 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2724 osc += D1Dx * D1Dy * D1Dz;
2726 }); // end of element loop
2729 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2733 if (Device::Allows(Backend::DEVICE_MASK))
2735 const int ID = (dofs1D << 4) | quad1D;
2738 case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2743 case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2748 case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2753 case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2758 default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2765 PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2772 PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2773 mapsO->B, mapsC->G, pa_data, diag);
2777 MFEM_ABORT("Unsupported dimension!
");
2781 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2782 // integrated against H(curl) test functions corresponding to y.
2783 void PAHcurlH1Apply3D(const int D1D,
2786 const Array<double> &bc,
2787 const Array<double> &gc,
2788 const Array<double> &bot,
2789 const Array<double> &bct,
2790 const Vector &pa_data,
2794 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2795 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2797 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2798 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2800 constexpr static int VDIM = 3;
2802 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2803 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2804 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2805 auto Bct = Reshape(bct.Read(), D1D, Q1D);
2806 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2807 auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2808 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2812 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2814 for (int qz = 0; qz < Q1D; ++qz)
2816 for (int qy = 0; qy < Q1D; ++qy)
2818 for (int qx = 0; qx < Q1D; ++qx)
2820 for (int c = 0; c < VDIM; ++c)
2822 mass[qz][qy][qx][c] = 0.0;
2828 for (int dz = 0; dz < D1D; ++dz)
2830 double gradXY[MAX_Q1D][MAX_Q1D][3];
2831 for (int qy = 0; qy < Q1D; ++qy)
2833 for (int qx = 0; qx < Q1D; ++qx)
2835 gradXY[qy][qx][0] = 0.0;
2836 gradXY[qy][qx][1] = 0.0;
2837 gradXY[qy][qx][2] = 0.0;
2840 for (int dy = 0; dy < D1D; ++dy)
2842 double gradX[MAX_Q1D][2];
2843 for (int qx = 0; qx < Q1D; ++qx)
2848 for (int dx = 0; dx < D1D; ++dx)
2850 const double s = X(dx,dy,dz,e);
2851 for (int qx = 0; qx < Q1D; ++qx)
2853 gradX[qx][0] += s * Bc(qx,dx);
2854 gradX[qx][1] += s * Gc(qx,dx);
2857 for (int qy = 0; qy < Q1D; ++qy)
2859 const double wy = Bc(qy,dy);
2860 const double wDy = Gc(qy,dy);
2861 for (int qx = 0; qx < Q1D; ++qx)
2863 const double wx = gradX[qx][0];
2864 const double wDx = gradX[qx][1];
2865 gradXY[qy][qx][0] += wDx * wy;
2866 gradXY[qy][qx][1] += wx * wDy;
2867 gradXY[qy][qx][2] += wx * wy;
2871 for (int qz = 0; qz < Q1D; ++qz)
2873 const double wz = Bc(qz,dz);
2874 const double wDz = Gc(qz,dz);
2875 for (int qy = 0; qy < Q1D; ++qy)
2877 for (int qx = 0; qx < Q1D; ++qx)
2879 mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2880 mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2881 mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2887 // Apply D operator.
2888 for (int qz = 0; qz < Q1D; ++qz)
2890 for (int qy = 0; qy < Q1D; ++qy)
2892 for (int qx = 0; qx < Q1D; ++qx)
2894 const double O11 = op(qx,qy,qz,0,e);
2895 const double O12 = op(qx,qy,qz,1,e);
2896 const double O13 = op(qx,qy,qz,2,e);
2897 const double O22 = op(qx,qy,qz,3,e);
2898 const double O23 = op(qx,qy,qz,4,e);
2899 const double O33 = op(qx,qy,qz,5,e);
2900 const double massX = mass[qz][qy][qx][0];
2901 const double massY = mass[qz][qy][qx][1];
2902 const double massZ = mass[qz][qy][qx][2];
2903 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2904 mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2905 mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2910 for (int qz = 0; qz < Q1D; ++qz)
2912 double massXY[MAX_D1D][MAX_D1D];
2916 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2918 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2919 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2920 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2922 for (int dy = 0; dy < D1Dy; ++dy)
2924 for (int dx = 0; dx < D1Dx; ++dx)
2926 massXY[dy][dx] = 0.0;
2929 for (int qy = 0; qy < Q1D; ++qy)
2931 double massX[MAX_D1D];
2932 for (int dx = 0; dx < D1Dx; ++dx)
2936 for (int qx = 0; qx < Q1D; ++qx)
2938 for (int dx = 0; dx < D1Dx; ++dx)
2940 massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2943 for (int dy = 0; dy < D1Dy; ++dy)
2945 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2946 for (int dx = 0; dx < D1Dx; ++dx)
2948 massXY[dy][dx] += massX[dx] * wy;
2953 for (int dz = 0; dz < D1Dz; ++dz)
2955 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2956 for (int dy = 0; dy < D1Dy; ++dy)
2958 for (int dx = 0; dx < D1Dx; ++dx)
2960 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2965 osc += D1Dx * D1Dy * D1Dz;
2968 }); // end of element loop
2971 // Apply to x corresponding to DOF's in H(curl), integrated
2972 // against gradients of H^1 functions corresponding to y.
2973 void PAHcurlH1ApplyTranspose3D(const int D1D,
2976 const Array<double> &bc,
2977 const Array<double> &bo,
2978 const Array<double> &bct,
2979 const Array<double> &gct,
2980 const Vector &pa_data,
2984 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2985 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2987 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2988 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2990 constexpr static int VDIM = 3;
2992 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2993 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2994 auto Bt = Reshape(bct.Read(), D1D, Q1D);
2995 auto Gt = Reshape(gct.Read(), D1D, Q1D);
2996 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2997 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2998 auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
3002 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3004 for (int qz = 0; qz < Q1D; ++qz)
3006 for (int qy = 0; qy < Q1D; ++qy)
3008 for (int qx = 0; qx < Q1D; ++qx)
3010 for (int c = 0; c < VDIM; ++c)
3012 mass[qz][qy][qx][c] = 0.0;
3020 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3022 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3023 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3024 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3026 for (int dz = 0; dz < D1Dz; ++dz)
3028 double massXY[MAX_Q1D][MAX_Q1D];
3029 for (int qy = 0; qy < Q1D; ++qy)
3031 for (int qx = 0; qx < Q1D; ++qx)
3033 massXY[qy][qx] = 0.0;
3037 for (int dy = 0; dy < D1Dy; ++dy)
3039 double massX[MAX_Q1D];
3040 for (int qx = 0; qx < Q1D; ++qx)
3045 for (int dx = 0; dx < D1Dx; ++dx)
3047 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3048 for (int qx = 0; qx < Q1D; ++qx)
3050 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
3054 for (int qy = 0; qy < Q1D; ++qy)
3056 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
3057 for (int qx = 0; qx < Q1D; ++qx)
3059 const double wx = massX[qx];
3060 massXY[qy][qx] += wx * wy;
3065 for (int qz = 0; qz < Q1D; ++qz)
3067 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
3068 for (int qy = 0; qy < Q1D; ++qy)
3070 for (int qx = 0; qx < Q1D; ++qx)
3072 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
3078 osc += D1Dx * D1Dy * D1Dz;
3079 } // loop (c) over components
3081 // Apply D operator.
3082 for (int qz = 0; qz < Q1D; ++qz)
3084 for (int qy = 0; qy < Q1D; ++qy)
3086 for (int qx = 0; qx < Q1D; ++qx)
3088 const double O11 = op(qx,qy,qz,0,e);
3089 const double O12 = op(qx,qy,qz,1,e);
3090 const double O13 = op(qx,qy,qz,2,e);
3091 const double O22 = op(qx,qy,qz,3,e);
3092 const double O23 = op(qx,qy,qz,4,e);
3093 const double O33 = op(qx,qy,qz,5,e);
3094 const double massX = mass[qz][qy][qx][0];
3095 const double massY = mass[qz][qy][qx][1];
3096 const double massZ = mass[qz][qy][qx][2];
3097 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
3098 mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
3099 mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
3104 for (int qz = 0; qz < Q1D; ++qz)
3106 double gradXY[MAX_D1D][MAX_D1D][3];
3107 for (int dy = 0; dy < D1D; ++dy)
3109 for (int dx = 0; dx < D1D; ++dx)
3111 gradXY[dy][dx][0] = 0;
3112 gradXY[dy][dx][1] = 0;
3113 gradXY[dy][dx][2] = 0;
3116 for (int qy = 0; qy < Q1D; ++qy)
3118 double gradX[MAX_D1D][3];
3119 for (int dx = 0; dx < D1D; ++dx)
3125 for (int qx = 0; qx < Q1D; ++qx)
3127 const double gX = mass[qz][qy][qx][0];
3128 const double gY = mass[qz][qy][qx][1];
3129 const double gZ = mass[qz][qy][qx][2];
3130 for (int dx = 0; dx < D1D; ++dx)
3132 const double wx = Bt(dx,qx);
3133 const double wDx = Gt(dx,qx);
3134 gradX[dx][0] += gX * wDx;
3135 gradX[dx][1] += gY * wx;
3136 gradX[dx][2] += gZ * wx;
3139 for (int dy = 0; dy < D1D; ++dy)
3141 const double wy = Bt(dy,qy);
3142 const double wDy = Gt(dy,qy);
3143 for (int dx = 0; dx < D1D; ++dx)
3145 gradXY[dy][dx][0] += gradX[dx][0] * wy;
3146 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
3147 gradXY[dy][dx][2] += gradX[dx][2] * wy;
3151 for (int dz = 0; dz < D1D; ++dz)
3153 const double wz = Bt(dz,qz);
3154 const double wDz = Gt(dz,qz);
3155 for (int dy = 0; dy < D1D; ++dy)
3157 for (int dx = 0; dx < D1D; ++dx)
3160 ((gradXY[dy][dx][0] * wz) +
3161 (gradXY[dy][dx][1] * wz) +
3162 (gradXY[dy][dx][2] * wDz));
3167 }); // end of element loop
3170 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
3171 // integrated against H(curl) test functions corresponding to y.
3172 void PAHcurlH1Apply2D(const int D1D,
3175 const Array<double> &bc,
3176 const Array<double> &gc,
3177 const Array<double> &bot,
3178 const Array<double> &bct,
3179 const Vector &pa_data,
3183 constexpr static int VDIM = 2;
3184 constexpr static int MAX_D1D = HCURL_MAX_D1D;
3185 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3187 auto Bc = Reshape(bc.Read(), Q1D, D1D);
3188 auto Gc = Reshape(gc.Read(), Q1D, D1D);
3189 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3190 auto Bct = Reshape(bct.Read(), D1D, Q1D);
3191 auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3192 auto X = Reshape(x.Read(), D1D, D1D, NE);
3193 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
3197 double mass[MAX_Q1D][MAX_Q1D][VDIM];
3199 for (int qy = 0; qy < Q1D; ++qy)
3201 for (int qx = 0; qx < Q1D; ++qx)
3203 for (int c = 0; c < VDIM; ++c)
3205 mass[qy][qx][c] = 0.0;
3210 for (int dy = 0; dy < D1D; ++dy)
3212 double gradX[MAX_Q1D][2];
3213 for (int qx = 0; qx < Q1D; ++qx)
3218 for (int dx = 0; dx < D1D; ++dx)
3220 const double s = X(dx,dy,e);
3221 for (int qx = 0; qx < Q1D; ++qx)
3223 gradX[qx][0] += s * Bc(qx,dx);
3224 gradX[qx][1] += s * Gc(qx,dx);
3227 for (int qy = 0; qy < Q1D; ++qy)
3229 const double wy = Bc(qy,dy);
3230 const double wDy = Gc(qy,dy);
3231 for (int qx = 0; qx < Q1D; ++qx)
3233 const double wx = gradX[qx][0];
3234 const double wDx = gradX[qx][1];
3235 mass[qy][qx][0] += wDx * wy;
3236 mass[qy][qx][1] += wx * wDy;
3241 // Apply D operator.
3242 for (int qy = 0; qy < Q1D; ++qy)
3244 for (int qx = 0; qx < Q1D; ++qx)
3246 const double O11 = op(qx,qy,0,e);
3247 const double O12 = op(qx,qy,1,e);
3248 const double O22 = op(qx,qy,2,e);
3249 const double massX = mass[qy][qx][0];
3250 const double massY = mass[qy][qx][1];
3251 mass[qy][qx][0] = (O11*massX)+(O12*massY);
3252 mass[qy][qx][1] = (O12*massX)+(O22*massY);
3256 for (int qy = 0; qy < Q1D; ++qy)
3260 for (int c = 0; c < VDIM; ++c) // loop over x, y components
3262 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3263 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3265 double massX[MAX_D1D];
3266 for (int dx = 0; dx < D1Dx; ++dx)
3270 for (int qx = 0; qx < Q1D; ++qx)
3272 for (int dx = 0; dx < D1Dx; ++dx)
3274 massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3278 for (int dy = 0; dy < D1Dy; ++dy)
3280 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3282 for (int dx = 0; dx < D1Dx; ++dx)
3284 Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
3291 }); // end of element loop
3294 // Apply to x corresponding to DOF's in H(curl), integrated
3295 // against gradients of H^1 functions corresponding to y.
3296 void PAHcurlH1ApplyTranspose2D(const int D1D,
3299 const Array<double> &bc,
3300 const Array<double> &bo,
3301 const Array<double> &bct,
3302 const Array<double> &gct,
3303 const Vector &pa_data,
3307 constexpr static int VDIM = 2;
3308 constexpr static int MAX_D1D = HCURL_MAX_D1D;
3309 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
3311 auto Bc = Reshape(bc.Read(), Q1D, D1D);
3312 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3313 auto Bt = Reshape(bct.Read(), D1D, Q1D);
3314 auto Gt = Reshape(gct.Read(), D1D, Q1D);
3315 auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
3316 auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
3317 auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE);
3321 double mass[MAX_Q1D][MAX_Q1D][VDIM];
3323 for (int qy = 0; qy < Q1D; ++qy)
3325 for (int qx = 0; qx < Q1D; ++qx)
3327 for (int c = 0; c < VDIM; ++c)
3329 mass[qy][qx][c] = 0.0;
3336 for (int c = 0; c < VDIM; ++c) // loop over x, y components
3338 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3339 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3341 for (int dy = 0; dy < D1Dy; ++dy)
3343 double massX[MAX_Q1D];
3344 for (int qx = 0; qx < Q1D; ++qx)
3349 for (int dx = 0; dx < D1Dx; ++dx)
3351 const double t = X(dx + (dy * D1Dx) + osc, e);
3352 for (int qx = 0; qx < Q1D; ++qx)
3354 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
3358 for (int qy = 0; qy < Q1D; ++qy)
3360 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
3361 for (int qx = 0; qx < Q1D; ++qx)
3363 mass[qy][qx][c] += massX[qx] * wy;
3369 } // loop (c) over components
3371 // Apply D operator.
3372 for (int qy = 0; qy < Q1D; ++qy)
3374 for (int qx = 0; qx < Q1D; ++qx)
3376 const double O11 = op(qx,qy,0,e);
3377 const double O12 = op(qx,qy,1,e);
3378 const double O22 = op(qx,qy,2,e);
3379 const double massX = mass[qy][qx][0];
3380 const double massY = mass[qy][qx][1];
3381 mass[qy][qx][0] = (O11*massX)+(O12*massY);
3382 mass[qy][qx][1] = (O12*massX)+(O22*massY);
3386 for (int qy = 0; qy < Q1D; ++qy)
3388 double gradX[MAX_D1D][2];
3389 for (int dx = 0; dx < D1D; ++dx)
3394 for (int qx = 0; qx < Q1D; ++qx)
3396 const double gX = mass[qy][qx][0];
3397 const double gY = mass[qy][qx][1];
3398 for (int dx = 0; dx < D1D; ++dx)
3400 const double wx = Bt(dx,qx);
3401 const double wDx = Gt(dx,qx);
3402 gradX[dx][0] += gX * wDx;
3403 gradX[dx][1] += gY * wx;
3406 for (int dy = 0; dy < D1D; ++dy)
3408 const double wy = Bt(dy,qy);
3409 const double wDy = Gt(dy,qy);
3410 for (int dx = 0; dx < D1D; ++dx)
3412 Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
3416 }); // end of element loop
3419 // PA H(curl) Mass Assemble 3D kernel
3420 void PAHcurlL2Setup(const int NQ,
3423 const Array<double> &w,
3428 auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
3429 auto y = Reshape(op.Write(), coeffDim, NQ, NE);
3433 for (int q = 0; q < NQ; ++q)
3435 for (int c=0; c<coeffDim; ++c)
3437 y(c,q,e) = W[q] * C(c,q,e);
3443 void MixedScalarCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3444 const FiniteElementSpace &test_fes)
3446 // Assumes tensor-product elements
3447 Mesh *mesh = trial_fes.GetMesh();
3448 const FiniteElement *fel = trial_fes.GetFE(0); // In H(curl)
3449 const FiniteElement *eltest = test_fes.GetFE(0); // In scalar space
3451 const VectorTensorFiniteElement *el =
3452 dynamic_cast<const VectorTensorFiniteElement*>(fel);
3455 if (el->GetDerivType() != mfem::FiniteElement::CURL)
3457 MFEM_ABORT("Unknown kernel.
");
3460 const IntegrationRule *ir
3461 = IntRule ? IntRule : &MassIntegrator::GetRule(*eltest, *eltest,
3462 *mesh->GetElementTransformation(0));
3464 const int dims = el->GetDim();
3465 MFEM_VERIFY(dims == 2, "");
3467 const int nq = ir->GetNPoints();
3468 dim = mesh->Dimension();
3469 MFEM_VERIFY(dim == 2, "");
3471 ne = test_fes.GetNE();
3472 mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3473 mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3474 dofs1D = mapsC->ndof;
3475 quad1D = mapsC->nqpt;
3477 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3479 if (el->GetOrder() == eltest->GetOrder())
3481 dofs1Dtest = dofs1D;
3485 dofs1Dtest = dofs1D - 1;
3488 pa_data.SetSize(nq * ne, Device::GetMemoryType());
3490 Vector coeff(ne * nq);
3492 auto coeffh = Reshape(coeff.HostWrite(), nq, ne);
3495 for (int e=0; e<ne; ++e)
3497 ElementTransformation *tr = mesh->GetElementTransformation(e);
3498 for (int p=0; p<nq; ++p)
3500 coeffh(p, e) = Q->Eval(*tr, ir->IntPoint(p));
3507 PACurlL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data);
3511 MFEM_ABORT("Unsupported dimension!
");
3515 void MixedScalarCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3519 PACurlL2Apply2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3520 mapsC->Bt, mapsC->G, pa_data, x, y);
3524 MFEM_ABORT("Unsupported dimension!
");
3528 void MixedScalarCurlIntegrator::AddMultTransposePA(const Vector &x,
3533 PACurlL2ApplyTranspose2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt,
3534 mapsC->B, mapsC->Gt, pa_data, x, y);
3538 MFEM_ABORT("Unsupported dimension!
");
3542 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
3543 const FiniteElementSpace &test_fes)
3545 // Assumes tensor-product elements, with vector test and trial spaces.
3546 Mesh *mesh = trial_fes.GetMesh();
3547 const FiniteElement *trial_fel = trial_fes.GetFE(0);
3548 const FiniteElement *test_fel = test_fes.GetFE(0);
3550 const VectorTensorFiniteElement *trial_el =
3551 dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3554 const VectorTensorFiniteElement *test_el =
3555 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
3558 const IntegrationRule *ir
3559 = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
3560 *mesh->GetElementTransformation(0));
3561 const int dims = trial_el->GetDim();
3562 MFEM_VERIFY(dims == 3, "");
3564 const int nq = ir->GetNPoints();
3565 dim = mesh->Dimension();
3566 MFEM_VERIFY(dim == 3, "");
3568 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
3570 ne = trial_fes.GetNE();
3571 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
3572 mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3573 mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3574 mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
3575 mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
3576 dofs1D = mapsC->ndof;
3577 quad1D = mapsC->nqpt;
3578 dofs1Dtest = mapsCtest->ndof;
3580 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
3582 testType = test_el->GetDerivType();
3583 trialType = trial_el->GetDerivType();
3585 const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
3586 coeffDim = (DQ ? 3 : 1);
3588 const bool curlSpaces = (testType == mfem::FiniteElement::CURL &&
3589 trialType == mfem::FiniteElement::CURL);
3591 const int ndata = curlSpaces ? (coeffDim == 1 ? 1 : 9) : symmDims;
3592 pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
3594 Vector coeff(coeffDim * nq * ne);
3596 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
3602 MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
3605 for (int e=0; e<ne; ++e)
3607 ElementTransformation *tr = mesh->GetElementTransformation(e);
3609 for (int p=0; p<nq; ++p)
3613 DQ->Eval(V, *tr, ir->IntPoint(p));
3614 for (int i=0; i<coeffDim; ++i)
3616 coeffh(i, p, e) = V[i];
3621 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
3627 if (testType == mfem::FiniteElement::CURL &&
3628 trialType == mfem::FiniteElement::CURL && dim == 3)
3632 PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
3636 PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), geom->J,
3640 else if (testType == mfem::FiniteElement::DIV &&
3641 trialType == mfem::FiniteElement::CURL && dim == 3 &&
3642 test_fel->GetOrder() == trial_fel->GetOrder())
3644 PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
3649 MFEM_ABORT("Unknown kernel.
");
3653 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3654 // integrated against H(curl) test functions corresponding to y.
3655 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3656 static void PAHcurlL2Apply3D(const int D1D,
3660 const Array<double> &bo,
3661 const Array<double> &bc,
3662 const Array<double> &bot,
3663 const Array<double> &bct,
3664 const Array<double> &gc,
3665 const Vector &pa_data,
3669 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
3670 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
3671 // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
3672 // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
3673 // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
3674 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3675 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3676 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3678 constexpr static int VDIM = 3;
3680 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3681 auto Bc = Reshape(bc.Read(), Q1D, D1D);
3682 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
3683 auto Bct = Reshape(bct.Read(), D1D, Q1D);
3684 auto Gc = Reshape(gc.Read(), Q1D, D1D);
3685 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3686 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3687 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3691 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3692 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3694 for (int qz = 0; qz < Q1D; ++qz)
3696 for (int qy = 0; qy < Q1D; ++qy)
3698 for (int qx = 0; qx < Q1D; ++qx)
3700 for (int c = 0; c < VDIM; ++c)
3702 curl[qz][qy][qx][c] = 0.0;
3708 // We treat x, y, z components separately for optimization specific to each.
3714 const int D1Dz = D1D;
3715 const int D1Dy = D1D;
3716 const int D1Dx = D1D - 1;
3718 for (int dz = 0; dz < D1Dz; ++dz)
3720 double gradXY[MAX_Q1D][MAX_Q1D][2];
3721 for (int qy = 0; qy < Q1D; ++qy)
3723 for (int qx = 0; qx < Q1D; ++qx)
3725 for (int d = 0; d < 2; ++d)
3727 gradXY[qy][qx][d] = 0.0;
3732 for (int dy = 0; dy < D1Dy; ++dy)
3734 double massX[MAX_Q1D];
3735 for (int qx = 0; qx < Q1D; ++qx)
3740 for (int dx = 0; dx < D1Dx; ++dx)
3742 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3743 for (int qx = 0; qx < Q1D; ++qx)
3745 massX[qx] += t * Bo(qx,dx);
3749 for (int qy = 0; qy < Q1D; ++qy)
3751 const double wy = Bc(qy,dy);
3752 const double wDy = Gc(qy,dy);
3753 for (int qx = 0; qx < Q1D; ++qx)
3755 const double wx = massX[qx];
3756 gradXY[qy][qx][0] += wx * wDy;
3757 gradXY[qy][qx][1] += wx * wy;
3762 for (int qz = 0; qz < Q1D; ++qz)
3764 const double wz = Bc(qz,dz);
3765 const double wDz = Gc(qz,dz);
3766 for (int qy = 0; qy < Q1D; ++qy)
3768 for (int qx = 0; qx < Q1D; ++qx)
3770 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3771 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3772 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3778 osc += D1Dx * D1Dy * D1Dz;
3783 const int D1Dz = D1D;
3784 const int D1Dy = D1D - 1;
3785 const int D1Dx = D1D;
3787 for (int dz = 0; dz < D1Dz; ++dz)
3789 double gradXY[MAX_Q1D][MAX_Q1D][2];
3790 for (int qy = 0; qy < Q1D; ++qy)
3792 for (int qx = 0; qx < Q1D; ++qx)
3794 for (int d = 0; d < 2; ++d)
3796 gradXY[qy][qx][d] = 0.0;
3801 for (int dx = 0; dx < D1Dx; ++dx)
3803 double massY[MAX_Q1D];
3804 for (int qy = 0; qy < Q1D; ++qy)
3809 for (int dy = 0; dy < D1Dy; ++dy)
3811 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3812 for (int qy = 0; qy < Q1D; ++qy)
3814 massY[qy] += t * Bo(qy,dy);
3818 for (int qx = 0; qx < Q1D; ++qx)
3820 const double wx = Bc(qx,dx);
3821 const double wDx = Gc(qx,dx);
3822 for (int qy = 0; qy < Q1D; ++qy)
3824 const double wy = massY[qy];
3825 gradXY[qy][qx][0] += wDx * wy;
3826 gradXY[qy][qx][1] += wx * wy;
3831 for (int qz = 0; qz < Q1D; ++qz)
3833 const double wz = Bc(qz,dz);
3834 const double wDz = Gc(qz,dz);
3835 for (int qy = 0; qy < Q1D; ++qy)
3837 for (int qx = 0; qx < Q1D; ++qx)
3839 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3840 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3841 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3847 osc += D1Dx * D1Dy * D1Dz;
3852 const int D1Dz = D1D - 1;
3853 const int D1Dy = D1D;
3854 const int D1Dx = D1D;
3856 for (int dx = 0; dx < D1Dx; ++dx)
3858 double gradYZ[MAX_Q1D][MAX_Q1D][2];
3859 for (int qz = 0; qz < Q1D; ++qz)
3861 for (int qy = 0; qy < Q1D; ++qy)
3863 for (int d = 0; d < 2; ++d)
3865 gradYZ[qz][qy][d] = 0.0;
3870 for (int dy = 0; dy < D1Dy; ++dy)
3872 double massZ[MAX_Q1D];
3873 for (int qz = 0; qz < Q1D; ++qz)
3878 for (int dz = 0; dz < D1Dz; ++dz)
3880 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3881 for (int qz = 0; qz < Q1D; ++qz)
3883 massZ[qz] += t * Bo(qz,dz);
3887 for (int qy = 0; qy < Q1D; ++qy)
3889 const double wy = Bc(qy,dy);
3890 const double wDy = Gc(qy,dy);
3891 for (int qz = 0; qz < Q1D; ++qz)
3893 const double wz = massZ[qz];
3894 gradYZ[qz][qy][0] += wz * wy;
3895 gradYZ[qz][qy][1] += wz * wDy;
3900 for (int qx = 0; qx < Q1D; ++qx)
3902 const double wx = Bc(qx,dx);
3903 const double wDx = Gc(qx,dx);
3905 for (int qy = 0; qy < Q1D; ++qy)
3907 for (int qz = 0; qz < Q1D; ++qz)
3909 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3910 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3911 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3918 // Apply D operator.
3919 for (int qz = 0; qz < Q1D; ++qz)
3921 for (int qy = 0; qy < Q1D; ++qy)
3923 for (int qx = 0; qx < Q1D; ++qx)
3925 const double O11 = op(0,qx,qy,qz,e);
3928 for (int c = 0; c < VDIM; ++c)
3930 curl[qz][qy][qx][c] *= O11;
3935 const double O21 = op(1,qx,qy,qz,e);
3936 const double O31 = op(2,qx,qy,qz,e);
3937 const double O12 = op(3,qx,qy,qz,e);
3938 const double O22 = op(4,qx,qy,qz,e);
3939 const double O32 = op(5,qx,qy,qz,e);
3940 const double O13 = op(6,qx,qy,qz,e);
3941 const double O23 = op(7,qx,qy,qz,e);
3942 const double O33 = op(8,qx,qy,qz,e);
3943 const double curlX = curl[qz][qy][qx][0];
3944 const double curlY = curl[qz][qy][qx][1];
3945 const double curlZ = curl[qz][qy][qx][2];
3946 curl[qz][qy][qx][0] = (O11*curlX)+(O12*curlY)+(O13*curlZ);
3947 curl[qz][qy][qx][1] = (O21*curlX)+(O22*curlY)+(O23*curlZ);
3948 curl[qz][qy][qx][2] = (O31*curlX)+(O32*curlY)+(O33*curlZ);
3954 for (int qz = 0; qz < Q1D; ++qz)
3956 double massXY[MAX_D1D][MAX_D1D];
3960 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3962 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3963 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3964 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3966 for (int dy = 0; dy < D1Dy; ++dy)
3968 for (int dx = 0; dx < D1Dx; ++dx)
3973 for (int qy = 0; qy < Q1D; ++qy)
3975 double massX[MAX_D1D];
3976 for (int dx = 0; dx < D1Dx; ++dx)
3980 for (int qx = 0; qx < Q1D; ++qx)
3982 for (int dx = 0; dx < D1Dx; ++dx)
3984 massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3988 for (int dy = 0; dy < D1Dy; ++dy)
3990 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3991 for (int dx = 0; dx < D1Dx; ++dx)
3993 massXY[dy][dx] += massX[dx] * wy;
3998 for (int dz = 0; dz < D1Dz; ++dz)
4000 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
4001 for (int dy = 0; dy < D1Dy; ++dy)
4003 for (int dx = 0; dx < D1Dx; ++dx)
4005 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
4010 osc += D1Dx * D1Dy * D1Dz;
4013 }); // end of element loop
4016 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
4017 // integrated against H(curl) test functions corresponding to y.
4018 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4019 static void SmemPAHcurlL2Apply3D(const int D1D,
4023 const Array<double> &bo,
4024 const Array<double> &bc,
4025 const Array<double> &gc,
4026 const Vector &pa_data,
4030 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
4031 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
4033 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4034 auto Bc = Reshape(bc.Read(), Q1D, D1D);
4035 auto Gc = Reshape(gc.Read(), Q1D, D1D);
4036 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4037 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4038 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4040 auto device_kernel = [=] MFEM_DEVICE (int e)
4042 constexpr int VDIM = 3;
4043 constexpr int maxCoeffDim = 9;
4045 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
4046 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
4047 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
4049 double opc[maxCoeffDim];
4050 MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
4051 MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
4053 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
4055 MFEM_FOREACH_THREAD(qx,x,Q1D)
4057 MFEM_FOREACH_THREAD(qy,y,Q1D)
4059 MFEM_FOREACH_THREAD(qz,z,Q1D)
4061 for (int i=0; i<coeffDim; ++i)
4063 opc[i] = op(i,qx,qy,qz,e);
4069 const int tidx = MFEM_THREAD_ID(x);
4070 const int tidy = MFEM_THREAD_ID(y);
4071 const int tidz = MFEM_THREAD_ID(z);
4075 MFEM_FOREACH_THREAD(d,y,D1D)
4077 MFEM_FOREACH_THREAD(q,x,Q1D)
4079 sBc[d][q] = Bc(q,d);
4080 sGc[d][q] = Gc(q,d);
4083 sBo[d][q] = Bo(q,d);
4090 for (int qz=0; qz < Q1D; ++qz)
4094 MFEM_FOREACH_THREAD(qy,y,Q1D)
4096 MFEM_FOREACH_THREAD(qx,x,Q1D)
4098 for (int i=0; i<3; ++i)
4100 curl[qy][qx][i] = 0.0;
4107 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4109 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4110 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4111 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4113 MFEM_FOREACH_THREAD(dz,z,D1Dz)
4115 MFEM_FOREACH_THREAD(dy,y,D1Dy)
4117 MFEM_FOREACH_THREAD(dx,x,D1Dx)
4119 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4129 for (int i=0; i<coeffDim; ++i)
4131 sop[i][tidx][tidy] = opc[i];
4135 MFEM_FOREACH_THREAD(qy,y,Q1D)
4137 MFEM_FOREACH_THREAD(qx,x,Q1D)
4142 // We treat x, y, z components separately for optimization specific to each.
4143 if (c == 0) // x component
4145 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4147 for (int dz = 0; dz < D1Dz; ++dz)
4149 const double wz = sBc[dz][qz];
4150 const double wDz = sGc[dz][qz];
4152 for (int dy = 0; dy < D1Dy; ++dy)
4154 const double wy = sBc[dy][qy];
4155 const double wDy = sGc[dy][qy];
4157 for (int dx = 0; dx < D1Dx; ++dx)
4159 const double wx = sX[dz][dy][dx] * sBo[dx][qx];
4166 curl[qy][qx][1] += v; // (u_0)_{x_2}
4167 curl[qy][qx][2] -= u; // -(u_0)_{x_1}
4169 else if (c == 1) // y component
4171 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4173 for (int dz = 0; dz < D1Dz; ++dz)
4175 const double wz = sBc[dz][qz];
4176 const double wDz = sGc[dz][qz];
4178 for (int dy = 0; dy < D1Dy; ++dy)
4180 const double wy = sBo[dy][qy];
4182 for (int dx = 0; dx < D1Dx; ++dx)
4184 const double t = sX[dz][dy][dx];
4185 const double wx = t * sBc[dx][qx];
4186 const double wDx = t * sGc[dx][qx];
4194 curl[qy][qx][0] -= v; // -(u_1)_{x_2}
4195 curl[qy][qx][2] += u; // (u_1)_{x_0}
4199 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4201 for (int dz = 0; dz < D1Dz; ++dz)
4203 const double wz = sBo[dz][qz];
4205 for (int dy = 0; dy < D1Dy; ++dy)
4207 const double wy = sBc[dy][qy];
4208 const double wDy = sGc[dy][qy];
4210 for (int dx = 0; dx < D1Dx; ++dx)
4212 const double t = sX[dz][dy][dx];
4213 const double wx = t * sBc[dx][qx];
4214 const double wDx = t * sGc[dx][qx];
4222 curl[qy][qx][0] += v; // (u_2)_{x_1}
4223 curl[qy][qx][1] -= u; // -(u_2)_{x_0}
4229 osc += D1Dx * D1Dy * D1Dz;
4237 MFEM_FOREACH_THREAD(dz,z,D1D)
4239 const double wcz = sBc[dz][qz];
4240 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4242 MFEM_FOREACH_THREAD(dy,y,D1D)
4244 MFEM_FOREACH_THREAD(dx,x,D1D)
4246 for (int qy = 0; qy < Q1D; ++qy)
4248 const double wcy = sBc[dy][qy];
4249 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4251 for (int qx = 0; qx < Q1D; ++qx)
4253 const double O11 = sop[0][qx][qy];
4257 c1 = O11 * curl[qy][qx][0];
4258 c2 = O11 * curl[qy][qx][1];
4259 c3 = O11 * curl[qy][qx][2];
4263 const double O21 = sop[1][qx][qy];
4264 const double O31 = sop[2][qx][qy];
4265 const double O12 = sop[3][qx][qy];
4266 const double O22 = sop[4][qx][qy];
4267 const double O32 = sop[5][qx][qy];
4268 const double O13 = sop[6][qx][qy];
4269 const double O23 = sop[7][qx][qy];
4270 const double O33 = sop[8][qx][qy];
4271 c1 = (O11*curl[qy][qx][0])+(O12*curl[qy][qx][1])+(O13*curl[qy][qx][2]);
4272 c2 = (O21*curl[qy][qx][0])+(O22*curl[qy][qx][1])+(O23*curl[qy][qx][2]);
4273 c3 = (O31*curl[qy][qx][0])+(O32*curl[qy][qx][1])+(O33*curl[qy][qx][2]);
4276 const double wcx = sBc[dx][qx];
4280 const double wx = sBo[dx][qx];
4281 dxyz1 += c1 * wx * wcy * wcz;
4284 dxyz2 += c2 * wcx * wy * wcz;
4285 dxyz3 += c3 * wcx * wcy * wz;
4294 MFEM_FOREACH_THREAD(dz,z,D1D)
4296 MFEM_FOREACH_THREAD(dy,y,D1D)
4298 MFEM_FOREACH_THREAD(dx,x,D1D)
4302 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4306 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4310 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4316 }; // end of element loop
4318 auto host_kernel = [&] MFEM_LAMBDA (int)
4320 MFEM_ABORT_KERNEL("This kernel should only be used on GPU.
");
4323 ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
4326 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
4327 // integrated against H(div) test functions corresponding to y.
4328 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4329 static void PAHcurlHdivApply3D(const int D1D,
4333 const Array<double> &bo,
4334 const Array<double> &bc,
4335 const Array<double> &bot,
4336 const Array<double> &bct,
4337 const Array<double> &gc,
4338 const Vector &pa_data,
4342 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
4343 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
4344 // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
4345 // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
4346 // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
4347 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4348 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4349 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4351 constexpr static int VDIM = 3;
4353 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4354 auto Bc = Reshape(bc.Read(), Q1D, D1D);
4355 auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
4356 auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
4357 auto Gc = Reshape(gc.Read(), Q1D, D1D);
4358 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
4359 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4360 auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
4364 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4365 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
4367 for (int qz = 0; qz < Q1D; ++qz)
4369 for (int qy = 0; qy < Q1D; ++qy)
4371 for (int qx = 0; qx < Q1D; ++qx)
4373 for (int c = 0; c < VDIM; ++c)
4375 curl[qz][qy][qx][c] = 0.0;
4381 // We treat x, y, z components separately for optimization specific to each.
4387 const int D1Dz = D1D;
4388 const int D1Dy = D1D;
4389 const int D1Dx = D1D - 1;
4391 for (int dz = 0; dz < D1Dz; ++dz)
4393 double gradXY[MAX_Q1D][MAX_Q1D][2];
4394 for (int qy = 0; qy < Q1D; ++qy)
4396 for (int qx = 0; qx < Q1D; ++qx)
4398 for (int d = 0; d < 2; ++d)
4400 gradXY[qy][qx][d] = 0.0;
4405 for (int dy = 0; dy < D1Dy; ++dy)
4407 double massX[MAX_Q1D];
4408 for (int qx = 0; qx < Q1D; ++qx)
4413 for (int dx = 0; dx < D1Dx; ++dx)
4415 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4416 for (int qx = 0; qx < Q1D; ++qx)
4418 massX[qx] += t * Bo(qx,dx);
4422 for (int qy = 0; qy < Q1D; ++qy)
4424 const double wy = Bc(qy,dy);
4425 const double wDy = Gc(qy,dy);
4426 for (int qx = 0; qx < Q1D; ++qx)
4428 const double wx = massX[qx];
4429 gradXY[qy][qx][0] += wx * wDy;
4430 gradXY[qy][qx][1] += wx * wy;
4435 for (int qz = 0; qz < Q1D; ++qz)
4437 const double wz = Bc(qz,dz);
4438 const double wDz = Gc(qz,dz);
4439 for (int qy = 0; qy < Q1D; ++qy)
4441 for (int qx = 0; qx < Q1D; ++qx)
4443 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4444 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
4445 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
4451 osc += D1Dx * D1Dy * D1Dz;
4456 const int D1Dz = D1D;
4457 const int D1Dy = D1D - 1;
4458 const int D1Dx = D1D;
4460 for (int dz = 0; dz < D1Dz; ++dz)
4462 double gradXY[MAX_Q1D][MAX_Q1D][2];
4463 for (int qy = 0; qy < Q1D; ++qy)
4465 for (int qx = 0; qx < Q1D; ++qx)
4467 for (int d = 0; d < 2; ++d)
4469 gradXY[qy][qx][d] = 0.0;
4474 for (int dx = 0; dx < D1Dx; ++dx)
4476 double massY[MAX_Q1D];
4477 for (int qy = 0; qy < Q1D; ++qy)
4482 for (int dy = 0; dy < D1Dy; ++dy)
4484 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4485 for (int qy = 0; qy < Q1D; ++qy)
4487 massY[qy] += t * Bo(qy,dy);
4491 for (int qx = 0; qx < Q1D; ++qx)
4493 const double wx = Bc(qx,dx);
4494 const double wDx = Gc(qx,dx);
4495 for (int qy = 0; qy < Q1D; ++qy)
4497 const double wy = massY[qy];
4498 gradXY[qy][qx][0] += wDx * wy;
4499 gradXY[qy][qx][1] += wx * wy;
4504 for (int qz = 0; qz < Q1D; ++qz)
4506 const double wz = Bc(qz,dz);
4507 const double wDz = Gc(qz,dz);
4508 for (int qy = 0; qy < Q1D; ++qy)
4510 for (int qx = 0; qx < Q1D; ++qx)
4512 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4513 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
4514 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
4520 osc += D1Dx * D1Dy * D1Dz;
4525 const int D1Dz = D1D - 1;
4526 const int D1Dy = D1D;
4527 const int D1Dx = D1D;
4529 for (int dx = 0; dx < D1Dx; ++dx)
4531 double gradYZ[MAX_Q1D][MAX_Q1D][2];
4532 for (int qz = 0; qz < Q1D; ++qz)
4534 for (int qy = 0; qy < Q1D; ++qy)
4536 for (int d = 0; d < 2; ++d)
4538 gradYZ[qz][qy][d] = 0.0;
4543 for (int dy = 0; dy < D1Dy; ++dy)
4545 double massZ[MAX_Q1D];
4546 for (int qz = 0; qz < Q1D; ++qz)
4551 for (int dz = 0; dz < D1Dz; ++dz)
4553 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4554 for (int qz = 0; qz < Q1D; ++qz)
4556 massZ[qz] += t * Bo(qz,dz);
4560 for (int qy = 0; qy < Q1D; ++qy)
4562 const double wy = Bc(qy,dy);
4563 const double wDy = Gc(qy,dy);
4564 for (int qz = 0; qz < Q1D; ++qz)
4566 const double wz = massZ[qz];
4567 gradYZ[qz][qy][0] += wz * wy;
4568 gradYZ[qz][qy][1] += wz * wDy;
4573 for (int qx = 0; qx < Q1D; ++qx)
4575 const double wx = Bc(qx,dx);
4576 const double wDx = Gc(qx,dx);
4578 for (int qy = 0; qy < Q1D; ++qy)
4580 for (int qz = 0; qz < Q1D; ++qz)
4582 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4583 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
4584 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
4591 // Apply D operator.
4592 for (int qz = 0; qz < Q1D; ++qz)
4594 for (int qy = 0; qy < Q1D; ++qy)
4596 for (int qx = 0; qx < Q1D; ++qx)
4598 const double O11 = op(qx,qy,qz,0,e);
4599 const double O12 = op(qx,qy,qz,1,e);
4600 const double O13 = op(qx,qy,qz,2,e);
4601 const double O22 = op(qx,qy,qz,3,e);
4602 const double O23 = op(qx,qy,qz,4,e);
4603 const double O33 = op(qx,qy,qz,5,e);
4605 const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
4606 (O13 * curl[qz][qy][qx][2]);
4607 const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
4608 (O23 * curl[qz][qy][qx][2]);
4609 const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
4610 (O33 * curl[qz][qy][qx][2]);
4612 curl[qz][qy][qx][0] = c1;
4613 curl[qz][qy][qx][1] = c2;
4614 curl[qz][qy][qx][2] = c3;
4619 for (int qz = 0; qz < Q1D; ++qz)
4621 double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
4625 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4627 const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
4628 const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
4629 const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
4631 for (int dy = 0; dy < D1Dy; ++dy)
4633 for (int dx = 0; dx < D1Dx; ++dx)
4638 for (int qy = 0; qy < Q1D; ++qy)
4640 double massX[HCURL_MAX_D1D];
4641 for (int dx = 0; dx < D1Dx; ++dx)
4645 for (int qx = 0; qx < Q1D; ++qx)
4647 for (int dx = 0; dx < D1Dx; ++dx)
4649 massX[dx] += curl[qz][qy][qx][c] *
4650 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
4653 for (int dy = 0; dy < D1Dy; ++dy)
4655 const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
4656 for (int dx = 0; dx < D1Dx; ++dx)
4658 massXY[dy][dx] += massX[dx] * wy;
4663 for (int dz = 0; dz < D1Dz; ++dz)
4665 const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
4666 for (int dy = 0; dy < D1Dy; ++dy)
4668 for (int dx = 0; dx < D1Dx; ++dx)
4670 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
4671 massXY[dy][dx] * wz;
4676 osc += D1Dx * D1Dy * D1Dz;
4679 }); // end of element loop
4682 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4684 if (testType == mfem::FiniteElement::CURL &&
4685 trialType == mfem::FiniteElement::CURL && dim == 3)
4687 const int ndata = coeffDim == 1 ? 1 : 9;
4689 if (Device::Allows(Backend::DEVICE_MASK))
4691 const int ID = (dofs1D << 4) | quad1D;
4694 case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, ndata, ne,
4696 mapsC->G, pa_data, x, y);
4697 case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, ndata, ne,
4699 mapsC->G, pa_data, x, y);
4700 case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, ndata, ne,
4702 mapsC->G, pa_data, x, y);
4703 case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, ndata, ne,
4705 mapsC->G, pa_data, x, y);
4706 default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne,
4707 mapsO->B, mapsC->B, mapsC->G,
4712 PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B,
4713 mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
4715 else if (testType == mfem::FiniteElement::DIV &&
4716 trialType == mfem::FiniteElement::CURL && dim == 3)
4717 PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
4718 mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
4722 MFEM_ABORT("Unsupported dimension or
space!
");
4726 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
4728 const FiniteElementSpace &test_fes)
4730 // Assumes tensor-product elements, with vector test and trial spaces.
4731 Mesh *mesh = trial_fes.GetMesh();
4732 const FiniteElement *trial_fel = trial_fes.GetFE(0);
4733 const FiniteElement *test_fel = test_fes.GetFE(0);
4735 const VectorTensorFiniteElement *trial_el =
4736 dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
4739 const VectorTensorFiniteElement *test_el =
4740 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
4743 const IntegrationRule *ir
4744 = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
4745 *mesh->GetElementTransformation(0));
4746 const int dims = trial_el->GetDim();
4747 MFEM_VERIFY(dims == 3, "");
4749 const int nq = ir->GetNPoints();
4750 dim = mesh->Dimension();
4751 MFEM_VERIFY(dim == 3, "");
4753 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
4755 ne = trial_fes.GetNE();
4756 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
4757 mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
4758 mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
4759 dofs1D = mapsC->ndof;
4760 quad1D = mapsC->nqpt;
4762 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
4764 coeffDim = DQ ? 3 : 1;
4765 const int ndata = DQ ? 9 : 1;
4767 pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
4769 Vector coeff(coeffDim * nq * ne);
4771 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
4777 MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
4780 for (int e=0; e<ne; ++e)
4782 ElementTransformation *tr = mesh->GetElementTransformation(e);
4784 for (int p=0; p<nq; ++p)
4788 DQ->Eval(V, *tr, ir->IntPoint(p));
4789 for (int i=0; i<coeffDim; ++i)
4791 coeffh(i, p, e) = V[i];
4796 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
4802 testType = test_el->GetDerivType();
4803 trialType = trial_el->GetDerivType();
4805 if (trialType == mfem::FiniteElement::CURL && dim == 3)
4809 PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
4813 PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), geom->J,
4819 MFEM_ABORT("Unknown kernel.
");
4823 // Apply to x corresponding to DOF's in H(curl) (trial), integrated against curl
4824 // of H(curl) test functions corresponding to y.
4825 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4826 static void PAHcurlL2Apply3DTranspose(const int D1D,
4830 const Array<double> &bo,
4831 const Array<double> &bc,
4832 const Array<double> &bot,
4833 const Array<double> &bct,
4834 const Array<double> &gct,
4835 const Vector &pa_data,
4839 // See PAHcurlL2Apply3D for comments.
4841 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
4842 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
4844 constexpr static int VDIM = 3;
4846 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4847 auto Bc = Reshape(bc.Read(), Q1D, D1D);
4848 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
4849 auto Bct = Reshape(bct.Read(), D1D, Q1D);
4850 auto Gct = Reshape(gct.Read(), D1D, Q1D);
4851 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4852 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4853 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4857 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4859 for (int qz = 0; qz < Q1D; ++qz)
4861 for (int qy = 0; qy < Q1D; ++qy)
4863 for (int qx = 0; qx < Q1D; ++qx)
4865 for (int c = 0; c < VDIM; ++c)
4867 mass[qz][qy][qx][c] = 0.0;
4875 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4877 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4878 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4879 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4881 for (int dz = 0; dz < D1Dz; ++dz)
4883 double massXY[MAX_Q1D][MAX_Q1D];
4884 for (int qy = 0; qy < Q1D; ++qy)
4886 for (int qx = 0; qx < Q1D; ++qx)
4888 massXY[qy][qx] = 0.0;
4892 for (int dy = 0; dy < D1Dy; ++dy)
4894 double massX[MAX_Q1D];
4895 for (int qx = 0; qx < Q1D; ++qx)
4900 for (int dx = 0; dx < D1Dx; ++dx)
4902 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4903 for (int qx = 0; qx < Q1D; ++qx)
4905 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
4909 for (int qy = 0; qy < Q1D; ++qy)
4911 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
4912 for (int qx = 0; qx < Q1D; ++qx)
4914 const double wx = massX[qx];
4915 massXY[qy][qx] += wx * wy;
4920 for (int qz = 0; qz < Q1D; ++qz)
4922 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
4923 for (int qy = 0; qy < Q1D; ++qy)
4925 for (int qx = 0; qx < Q1D; ++qx)
4927 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4933 osc += D1Dx * D1Dy * D1Dz;
4934 } // loop (c) over components
4936 // Apply D operator.
4937 for (int qz = 0; qz < Q1D; ++qz)
4939 for (int qy = 0; qy < Q1D; ++qy)
4941 for (int qx = 0; qx < Q1D; ++qx)
4943 const double O11 = op(0,qx,qy,qz,e);
4946 for (int c = 0; c < VDIM; ++c)
4948 mass[qz][qy][qx][c] *= O11;
4953 const double O12 = op(1,qx,qy,qz,e);
4954 const double O13 = op(2,qx,qy,qz,e);
4955 const double O21 = op(3,qx,qy,qz,e);
4956 const double O22 = op(4,qx,qy,qz,e);
4957 const double O23 = op(5,qx,qy,qz,e);
4958 const double O31 = op(6,qx,qy,qz,e);
4959 const double O32 = op(7,qx,qy,qz,e);
4960 const double O33 = op(8,qx,qy,qz,e);
4961 const double massX = mass[qz][qy][qx][0];
4962 const double massY = mass[qz][qy][qx][1];
4963 const double massZ = mass[qz][qy][qx][2];
4964 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
4965 mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
4966 mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
4975 const int D1Dz = D1D;
4976 const int D1Dy = D1D;
4977 const int D1Dx = D1D - 1;
4979 for (int qz = 0; qz < Q1D; ++qz)
4981 double gradXY12[MAX_D1D][MAX_D1D];
4982 double gradXY21[MAX_D1D][MAX_D1D];
4984 for (int dy = 0; dy < D1Dy; ++dy)
4986 for (int dx = 0; dx < D1Dx; ++dx)
4988 gradXY12[dy][dx] = 0.0;
4989 gradXY21[dy][dx] = 0.0;
4992 for (int qy = 0; qy < Q1D; ++qy)
4994 double massX[MAX_D1D][2];
4995 for (int dx = 0; dx < D1Dx; ++dx)
4997 for (int n = 0; n < 2; ++n)
5002 for (int qx = 0; qx < Q1D; ++qx)
5004 for (int dx = 0; dx < D1Dx; ++dx)
5006 const double wx = Bot(dx,qx);
5008 massX[dx][0] += wx * mass[qz][qy][qx][1];
5009 massX[dx][1] += wx * mass[qz][qy][qx][2];
5012 for (int dy = 0; dy < D1Dy; ++dy)
5014 const double wy = Bct(dy,qy);
5015 const double wDy = Gct(dy,qy);
5017 for (int dx = 0; dx < D1Dx; ++dx)
5019 gradXY21[dy][dx] += massX[dx][0] * wy;
5020 gradXY12[dy][dx] += massX[dx][1] * wDy;
5025 for (int dz = 0; dz < D1Dz; ++dz)
5027 const double wz = Bct(dz,qz);
5028 const double wDz = Gct(dz,qz);
5029 for (int dy = 0; dy < D1Dy; ++dy)
5031 for (int dx = 0; dx < D1Dx; ++dx)
5033 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
5034 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
5035 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5036 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
5042 osc += D1Dx * D1Dy * D1Dz;
5047 const int D1Dz = D1D;
5048 const int D1Dy = D1D - 1;
5049 const int D1Dx = D1D;
5051 for (int qz = 0; qz < Q1D; ++qz)
5053 double gradXY02[MAX_D1D][MAX_D1D];
5054 double gradXY20[MAX_D1D][MAX_D1D];
5056 for (int dy = 0; dy < D1Dy; ++dy)
5058 for (int dx = 0; dx < D1Dx; ++dx)
5060 gradXY02[dy][dx] = 0.0;
5061 gradXY20[dy][dx] = 0.0;
5064 for (int qx = 0; qx < Q1D; ++qx)
5066 double massY[MAX_D1D][2];
5067 for (int dy = 0; dy < D1Dy; ++dy)
5072 for (int qy = 0; qy < Q1D; ++qy)
5074 for (int dy = 0; dy < D1Dy; ++dy)
5076 const double wy = Bot(dy,qy);
5078 massY[dy][0] += wy * mass[qz][qy][qx][2];
5079 massY[dy][1] += wy * mass[qz][qy][qx][0];
5082 for (int dx = 0; dx < D1Dx; ++dx)
5084 const double wx = Bct(dx,qx);
5085 const double wDx = Gct(dx,qx);
5087 for (int dy = 0; dy < D1Dy; ++dy)
5089 gradXY02[dy][dx] += massY[dy][0] * wDx;
5090 gradXY20[dy][dx] += massY[dy][1] * wx;
5095 for (int dz = 0; dz < D1Dz; ++dz)
5097 const double wz = Bct(dz,qz);
5098 const double wDz = Gct(dz,qz);
5099 for (int dy = 0; dy < D1Dy; ++dy)
5101 for (int dx = 0; dx < D1Dx; ++dx)
5103 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
5104 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
5105 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5106 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
5112 osc += D1Dx * D1Dy * D1Dz;
5117 const int D1Dz = D1D - 1;
5118 const int D1Dy = D1D;
5119 const int D1Dx = D1D;
5121 for (int qx = 0; qx < Q1D; ++qx)
5123 double gradYZ01[MAX_D1D][MAX_D1D];
5124 double gradYZ10[MAX_D1D][MAX_D1D];
5126 for (int dy = 0; dy < D1Dy; ++dy)
5128 for (int dz = 0; dz < D1Dz; ++dz)
5130 gradYZ01[dz][dy] = 0.0;
5131 gradYZ10[dz][dy] = 0.0;
5134 for (int qy = 0; qy < Q1D; ++qy)
5136 double massZ[MAX_D1D][2];
5137 for (int dz = 0; dz < D1Dz; ++dz)
5139 for (int n = 0; n < 2; ++n)
5144 for (int qz = 0; qz < Q1D; ++qz)
5146 for (int dz = 0; dz < D1Dz; ++dz)
5148 const double wz = Bot(dz,qz);
5150 massZ[dz][0] += wz * mass[qz][qy][qx][0];
5151 massZ[dz][1] += wz * mass[qz][qy][qx][1];
5154 for (int dy = 0; dy < D1Dy; ++dy)
5156 const double wy = Bct(dy,qy);
5157 const double wDy = Gct(dy,qy);
5159 for (int dz = 0; dz < D1Dz; ++dz)
5161 gradYZ01[dz][dy] += wy * massZ[dz][1];
5162 gradYZ10[dz][dy] += wDy * massZ[dz][0];
5167 for (int dx = 0; dx < D1Dx; ++dx)
5169 const double wx = Bct(dx,qx);
5170 const double wDx = Gct(dx,qx);
5172 for (int dy = 0; dy < D1Dy; ++dy)
5174 for (int dz = 0; dz < D1Dz; ++dz)
5176 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
5177 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
5178 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
5179 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
5188 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
5189 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
5193 const Array<double> &bo,
5194 const Array<double> &bc,
5195 const Array<double> &gc,
5196 const Vector &pa_data,
5200 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
5201 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
5203 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
5204 auto Bc = Reshape(bc.Read(), Q1D, D1D);
5205 auto Gc = Reshape(gc.Read(), Q1D, D1D);
5206 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
5207 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
5208 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
5210 auto device_kernel = [=] MFEM_DEVICE (int e)
5212 constexpr int VDIM = 3;
5213 constexpr int maxCoeffDim = 9;
5215 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
5216 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
5217 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
5219 double opc[maxCoeffDim];
5220 MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
5221 MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
5223 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
5225 MFEM_FOREACH_THREAD(qx,x,Q1D)
5227 MFEM_FOREACH_THREAD(qy,y,Q1D)
5229 MFEM_FOREACH_THREAD(qz,z,Q1D)
5231 for (int i=0; i<coeffDim; ++i)
5233 opc[i] = op(i,qx,qy,qz,e);
5239 const int tidx = MFEM_THREAD_ID(x);
5240 const int tidy = MFEM_THREAD_ID(y);
5241 const int tidz = MFEM_THREAD_ID(z);
5245 MFEM_FOREACH_THREAD(d,y,D1D)
5247 MFEM_FOREACH_THREAD(q,x,Q1D)
5249 sBc[d][q] = Bc(q,d);
5250 sGc[d][q] = Gc(q,d);
5253 sBo[d][q] = Bo(q,d);
5260 for (int qz=0; qz < Q1D; ++qz)
5264 MFEM_FOREACH_THREAD(qy,y,Q1D)
5266 MFEM_FOREACH_THREAD(qx,x,Q1D)
5268 for (int i=0; i<3; ++i)
5270 mass[qy][qx][i] = 0.0;
5277 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
5279 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
5280 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
5281 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
5283 MFEM_FOREACH_THREAD(dz,z,D1Dz)
5285 MFEM_FOREACH_THREAD(dy,y,D1Dy)
5287 MFEM_FOREACH_THREAD(dx,x,D1Dx)
5289 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
5299 for (int i=0; i<coeffDim; ++i)
5301 sop[i][tidx][tidy] = opc[i];
5305 MFEM_FOREACH_THREAD(qy,y,Q1D)
5307 MFEM_FOREACH_THREAD(qx,x,Q1D)
5311 for (int dz = 0; dz < D1Dz; ++dz)
5313 const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
5315 for (int dy = 0; dy < D1Dy; ++dy)
5317 const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
5319 for (int dx = 0; dx < D1Dx; ++dx)
5321 const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
5327 mass[qy][qx][c] += u;
5332 osc += D1Dx * D1Dy * D1Dz;
5340 MFEM_FOREACH_THREAD(dz,z,D1D)
5342 const double wcz = sBc[dz][qz];
5343 const double wcDz = sGc[dz][qz];
5344 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
5346 MFEM_FOREACH_THREAD(dy,y,D1D)
5348 MFEM_FOREACH_THREAD(dx,x,D1D)
5350 for (int qy = 0; qy < Q1D; ++qy)
5352 const double wcy = sBc[dy][qy];
5353 const double wcDy = sGc[dy][qy];
5354 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
5356 for (int qx = 0; qx < Q1D; ++qx)
5358 const double O11 = sop[0][qx][qy];
5362 c1 = O11 * mass[qy][qx][0];
5363 c2 = O11 * mass[qy][qx][1];
5364 c3 = O11 * mass[qy][qx][2];
5368 const double O12 = sop[1][qx][qy];
5369 const double O13 = sop[2][qx][qy];
5370 const double O21 = sop[3][qx][qy];
5371 const double O22 = sop[4][qx][qy];
5372 const double O23 = sop[5][qx][qy];
5373 const double O31 = sop[6][qx][qy];
5374 const double O32 = sop[7][qx][qy];
5375 const double O33 = sop[8][qx][qy];
5377 c1 = (O11*mass[qy][qx][0])+(O12*mass[qy][qx][1])+(O13*mass[qy][qx][2]);
5378 c2 = (O21*mass[qy][qx][0])+(O22*mass[qy][qx][1])+(O23*mass[qy][qx][2]);
5379 c3 = (O31*mass[qy][qx][0])+(O32*mass[qy][qx][1])+(O33*mass[qy][qx][2]);
5382 const double wcx = sBc[dx][qx];
5383 const double wDx = sGc[dx][qx];
5387 const double wx = sBo[dx][qx];
5388 dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
5391 dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
5393 dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
5402 MFEM_FOREACH_THREAD(dz,z,D1D)
5404 MFEM_FOREACH_THREAD(dy,y,D1D)
5406 MFEM_FOREACH_THREAD(dx,x,D1D)
5410 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
5414 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
5418 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
5424 }; // end of element loop
5426 auto host_kernel = [&] MFEM_LAMBDA (int)
5428 MFEM_ABORT_KERNEL("This kernel should only be used on GPU.
");
5431 ForallWrap<3>(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D);
5434 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
5436 if (testType == mfem::FiniteElement::CURL &&
5437 trialType == mfem::FiniteElement::CURL && dim == 3)
5439 const int ndata = coeffDim == 1 ? 1 : 9;
5440 if (Device::Allows(Backend::DEVICE_MASK))
5442 const int ID = (dofs1D << 4) | quad1D;
5445 case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, ndata,
5446 ne, mapsO->B, mapsC->B,
5447 mapsC->G, pa_data, x, y);
5448 case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, ndata,
5449 ne, mapsO->B, mapsC->B,
5450 mapsC->G, pa_data, x, y);
5451 case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, ndata,
5452 ne, mapsO->B, mapsC->B,
5453 mapsC->G, pa_data, x, y);
5454 case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, ndata,
5455 ne, mapsO->B, mapsC->B,
5456 mapsC->G, pa_data, x, y);
5457 default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne,
5459 mapsC->G, pa_data, x, y);
5463 PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B,
5464 mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
5468 MFEM_ABORT("Unsupported dimension or
space!
");
5472 // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient
5473 // to get a dof in H(curl) (range). You can think of the range as the "test
" space
5474 // and the domain as the "trial
" space, but there's no integration.
5475 static void PAHcurlApplyGradient2D(const int c_dofs1D,
5478 const Array<double> &B_,
5479 const Array<double> &G_,
5483 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5484 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5486 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5487 auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5489 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5490 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5494 double w[MAX_D1D][MAX_D1D];
5497 for (int dx = 0; dx < c_dofs1D; ++dx)
5499 for (int ey = 0; ey < c_dofs1D; ++ey)
5502 for (int dy = 0; dy < c_dofs1D; ++dy)
5504 w[dx][ey] += B(ey, dy) * x(dx, dy, e);
5509 for (int ey = 0; ey < c_dofs1D; ++ey)
5511 for (int ex = 0; ex < o_dofs1D; ++ex)
5514 for (int dx = 0; dx < c_dofs1D; ++dx)
5516 s += G(ex, dx) * w[dx][ey];
5518 const int local_index = ey*o_dofs1D + ex;
5519 y(local_index, e) += s;
5524 for (int dx = 0; dx < c_dofs1D; ++dx)
5526 for (int ey = 0; ey < o_dofs1D; ++ey)
5529 for (int dy = 0; dy < c_dofs1D; ++dy)
5531 w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5536 for (int ey = 0; ey < o_dofs1D; ++ey)
5538 for (int ex = 0; ex < c_dofs1D; ++ex)
5541 for (int dx = 0; dx < c_dofs1D; ++dx)
5543 s += B(ex, dx) * w[dx][ey];
5545 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5546 y(local_index, e) += s;
5552 // Specialization of PAHcurlApplyGradient2D to the case where B is identity
5553 static void PAHcurlApplyGradient2DBId(const int c_dofs1D,
5556 const Array<double> &G_,
5560 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5562 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
5563 auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
5565 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5566 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5570 double w[MAX_D1D][MAX_D1D];
5573 for (int dx = 0; dx < c_dofs1D; ++dx)
5575 for (int ey = 0; ey < c_dofs1D; ++ey)
5578 w[dx][ey] = x(dx, dy, e);
5582 for (int ey = 0; ey < c_dofs1D; ++ey)
5584 for (int ex = 0; ex < o_dofs1D; ++ex)
5587 for (int dx = 0; dx < c_dofs1D; ++dx)
5589 s += G(ex, dx) * w[dx][ey];
5591 const int local_index = ey*o_dofs1D + ex;
5592 y(local_index, e) += s;
5597 for (int dx = 0; dx < c_dofs1D; ++dx)
5599 for (int ey = 0; ey < o_dofs1D; ++ey)
5602 for (int dy = 0; dy < c_dofs1D; ++dy)
5604 w[dx][ey] += G(ey, dy) * x(dx, dy, e);
5609 for (int ey = 0; ey < o_dofs1D; ++ey)
5611 for (int ex = 0; ex < c_dofs1D; ++ex)
5614 const double s = w[dx][ey];
5615 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5616 y(local_index, e) += s;
5622 static void PAHcurlApplyGradientTranspose2D(
5623 const int c_dofs1D, const int o_dofs1D, const int NE,
5624 const Array<double> &B_, const Array<double> &G_,
5625 const Vector &x_, Vector &y_)
5627 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5628 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5630 auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5631 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5633 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5634 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5638 double w[MAX_D1D][MAX_D1D];
5640 // horizontal part (open x, closed y)
5641 for (int dy = 0; dy < c_dofs1D; ++dy)
5643 for (int ex = 0; ex < o_dofs1D; ++ex)
5646 for (int ey = 0; ey < c_dofs1D; ++ey)
5648 const int local_index = ey*o_dofs1D + ex;
5649 w[dy][ex] += B(ey, dy) * x(local_index, e);
5654 for (int dy = 0; dy < c_dofs1D; ++dy)
5656 for (int dx = 0; dx < c_dofs1D; ++dx)
5659 for (int ex = 0; ex < o_dofs1D; ++ex)
5661 s += G(ex, dx) * w[dy][ex];
5667 // vertical part (open y, closed x)
5668 for (int dy = 0; dy < c_dofs1D; ++dy)
5670 for (int ex = 0; ex < c_dofs1D; ++ex)
5673 for (int ey = 0; ey < o_dofs1D; ++ey)
5675 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5676 w[dy][ex] += G(ey, dy) * x(local_index, e);
5681 for (int dy = 0; dy < c_dofs1D; ++dy)
5683 for (int dx = 0; dx < c_dofs1D; ++dx)
5686 for (int ex = 0; ex < c_dofs1D; ++ex)
5688 s += B(ex, dx) * w[dy][ex];
5696 // Specialization of PAHcurlApplyGradientTranspose2D to the case where
5698 static void PAHcurlApplyGradientTranspose2DBId(
5699 const int c_dofs1D, const int o_dofs1D, const int NE,
5700 const Array<double> &G_,
5701 const Vector &x_, Vector &y_)
5703 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5705 auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
5706 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
5708 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5709 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5713 double w[MAX_D1D][MAX_D1D];
5715 // horizontal part (open x, closed y)
5716 for (int dy = 0; dy < c_dofs1D; ++dy)
5718 for (int ex = 0; ex < o_dofs1D; ++ex)
5721 const int local_index = ey*o_dofs1D + ex;
5722 w[dy][ex] = x(local_index, e);
5726 for (int dy = 0; dy < c_dofs1D; ++dy)
5728 for (int dx = 0; dx < c_dofs1D; ++dx)
5731 for (int ex = 0; ex < o_dofs1D; ++ex)
5733 s += G(ex, dx) * w[dy][ex];
5739 // vertical part (open y, closed x)
5740 for (int dy = 0; dy < c_dofs1D; ++dy)
5742 for (int ex = 0; ex < c_dofs1D; ++ex)
5745 for (int ey = 0; ey < o_dofs1D; ++ey)
5747 const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
5748 w[dy][ex] += G(ey, dy) * x(local_index, e);
5753 for (int dy = 0; dy < c_dofs1D; ++dy)
5755 for (int dx = 0; dx < c_dofs1D; ++dx)
5758 const double s = w[dy][ex];
5765 static void PAHcurlApplyGradient3D(const int c_dofs1D,
5768 const Array<double> &B_,
5769 const Array<double> &G_,
5773 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5774 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5776 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5777 auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5779 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5780 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5784 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5785 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5788 // dofs that point parallel to x-axis (open in x, closed in y, z)
5792 for (int ez = 0; ez < c_dofs1D; ++ez)
5794 for (int dx = 0; dx < c_dofs1D; ++dx)
5796 for (int dy = 0; dy < c_dofs1D; ++dy)
5798 w1[dx][dy][ez] = 0.0;
5799 for (int dz = 0; dz < c_dofs1D; ++dz)
5801 w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5808 for (int ez = 0; ez < c_dofs1D; ++ez)
5810 for (int ey = 0; ey < c_dofs1D; ++ey)
5812 for (int dx = 0; dx < c_dofs1D; ++dx)
5814 w2[dx][ey][ez] = 0.0;
5815 for (int dy = 0; dy < c_dofs1D; ++dy)
5817 w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5824 for (int ez = 0; ez < c_dofs1D; ++ez)
5826 for (int ey = 0; ey < c_dofs1D; ++ey)
5828 for (int ex = 0; ex < o_dofs1D; ++ex)
5831 for (int dx = 0; dx < c_dofs1D; ++dx)
5833 s += G(ex, dx) * w2[dx][ey][ez];
5835 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5836 y(local_index, e) += s;
5842 // dofs that point parallel to y-axis (open in y, closed in x, z)
5846 for (int ez = 0; ez < c_dofs1D; ++ez)
5848 for (int dx = 0; dx < c_dofs1D; ++dx)
5850 for (int dy = 0; dy < c_dofs1D; ++dy)
5852 w1[dx][dy][ez] = 0.0;
5853 for (int dz = 0; dz < c_dofs1D; ++dz)
5855 w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5862 for (int ez = 0; ez < c_dofs1D; ++ez)
5864 for (int ey = 0; ey < o_dofs1D; ++ey)
5866 for (int dx = 0; dx < c_dofs1D; ++dx)
5868 w2[dx][ey][ez] = 0.0;
5869 for (int dy = 0; dy < c_dofs1D; ++dy)
5871 w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5878 for (int ez = 0; ez < c_dofs1D; ++ez)
5880 for (int ey = 0; ey < o_dofs1D; ++ey)
5882 for (int ex = 0; ex < c_dofs1D; ++ex)
5885 for (int dx = 0; dx < c_dofs1D; ++dx)
5887 s += B(ex, dx) * w2[dx][ey][ez];
5889 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5890 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5891 y(local_index, e) += s;
5897 // dofs that point parallel to z-axis (open in z, closed in x, y)
5901 for (int ez = 0; ez < o_dofs1D; ++ez)
5903 for (int dx = 0; dx < c_dofs1D; ++dx)
5905 for (int dy = 0; dy < c_dofs1D; ++dy)
5907 w1[dx][dy][ez] = 0.0;
5908 for (int dz = 0; dz < c_dofs1D; ++dz)
5910 w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5917 for (int ez = 0; ez < o_dofs1D; ++ez)
5919 for (int ey = 0; ey < c_dofs1D; ++ey)
5921 for (int dx = 0; dx < c_dofs1D; ++dx)
5923 w2[dx][ey][ez] = 0.0;
5924 for (int dy = 0; dy < c_dofs1D; ++dy)
5926 w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5933 for (int ez = 0; ez < o_dofs1D; ++ez)
5935 for (int ey = 0; ey < c_dofs1D; ++ey)
5937 for (int ex = 0; ex < c_dofs1D; ++ex)
5940 for (int dx = 0; dx < c_dofs1D; ++dx)
5942 s += B(ex, dx) * w2[dx][ey][ez];
5944 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5945 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5946 y(local_index, e) += s;
5953 // Specialization of PAHcurlApplyGradient3D to the case where
5954 static void PAHcurlApplyGradient3DBId(const int c_dofs1D,
5957 const Array<double> &G_,
5961 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5963 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5964 auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5966 constexpr static int MAX_D1D = HCURL_MAX_D1D;
5967 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5971 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5972 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5975 // dofs that point parallel to x-axis (open in x, closed in y, z)
5979 for (int ez = 0; ez < c_dofs1D; ++ez)
5981 for (int dx = 0; dx < c_dofs1D; ++dx)
5983 for (int dy = 0; dy < c_dofs1D; ++dy)
5986 w1[dx][dy][ez] = x(dx, dy, dz, e);
5992 for (int ez = 0; ez < c_dofs1D; ++ez)
5994 for (int ey = 0; ey < c_dofs1D; ++ey)
5996 for (int dx = 0; dx < c_dofs1D; ++dx)
5999 w2[dx][ey][ez] = w1[dx][dy][ez];
6005 for (int ez = 0; ez < c_dofs1D; ++ez)
6007 for (int ey = 0; ey < c_dofs1D; ++ey)
6009 for (int ex = 0; ex < o_dofs1D; ++ex)
6012 for (int dx = 0; dx < c_dofs1D; ++dx)
6014 s += G(ex, dx) * w2[dx][ey][ez];
6016 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6017 y(local_index, e) += s;
6023 // dofs that point parallel to y-axis (open in y, closed in x, z)
6027 for (int ez = 0; ez < c_dofs1D; ++ez)
6029 for (int dx = 0; dx < c_dofs1D; ++dx)
6031 for (int dy = 0; dy < c_dofs1D; ++dy)
6034 w1[dx][dy][ez] = x(dx, dy, dz, e);
6040 for (int ez = 0; ez < c_dofs1D; ++ez)
6042 for (int ey = 0; ey < o_dofs1D; ++ey)
6044 for (int dx = 0; dx < c_dofs1D; ++dx)
6046 w2[dx][ey][ez] = 0.0;
6047 for (int dy = 0; dy < c_dofs1D; ++dy)
6049 w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
6056 for (int ez = 0; ez < c_dofs1D; ++ez)
6058 for (int ey = 0; ey < o_dofs1D; ++ey)
6060 for (int ex = 0; ex < c_dofs1D; ++ex)
6063 const double s = w2[dx][ey][ez];
6064 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6065 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6066 y(local_index, e) += s;
6072 // dofs that point parallel to z-axis (open in z, closed in x, y)
6076 for (int ez = 0; ez < o_dofs1D; ++ez)
6078 for (int dx = 0; dx < c_dofs1D; ++dx)
6080 for (int dy = 0; dy < c_dofs1D; ++dy)
6082 w1[dx][dy][ez] = 0.0;
6083 for (int dz = 0; dz < c_dofs1D; ++dz)
6085 w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
6092 for (int ez = 0; ez < o_dofs1D; ++ez)
6094 for (int ey = 0; ey < c_dofs1D; ++ey)
6096 for (int dx = 0; dx < c_dofs1D; ++dx)
6099 w2[dx][ey][ez] = w1[dx][dy][ez];
6105 for (int ez = 0; ez < o_dofs1D; ++ez)
6107 for (int ey = 0; ey < c_dofs1D; ++ey)
6109 for (int ex = 0; ex < c_dofs1D; ++ex)
6112 const double s = w2[dx][ey][ez];
6113 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6114 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6115 y(local_index, e) += s;
6122 static void PAHcurlApplyGradientTranspose3D(
6123 const int c_dofs1D, const int o_dofs1D, const int NE,
6124 const Array<double> &B_, const Array<double> &G_,
6125 const Vector &x_, Vector &y_)
6127 auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
6128 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6130 auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6131 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6133 constexpr static int MAX_D1D = HCURL_MAX_D1D;
6134 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6138 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6139 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6141 // dofs that point parallel to x-axis (open in x, closed in y, z)
6145 for (int dz = 0; dz < c_dofs1D; ++dz)
6147 for (int ex = 0; ex < o_dofs1D; ++ex)
6149 for (int ey = 0; ey < c_dofs1D; ++ey)
6151 w1[ex][ey][dz] = 0.0;
6152 for (int ez = 0; ez < c_dofs1D; ++ez)
6154 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6155 w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6162 for (int dz = 0; dz < c_dofs1D; ++dz)
6164 for (int dy = 0; dy < c_dofs1D; ++dy)
6166 for (int ex = 0; ex < o_dofs1D; ++ex)
6168 w2[ex][dy][dz] = 0.0;
6169 for (int ey = 0; ey < c_dofs1D; ++ey)
6171 w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6178 for (int dz = 0; dz < c_dofs1D; ++dz)
6180 for (int dy = 0; dy < c_dofs1D; ++dy)
6182 for (int dx = 0; dx < c_dofs1D; ++dx)
6185 for (int ex = 0; ex < o_dofs1D; ++ex)
6187 s += G(ex, dx) * w2[ex][dy][dz];
6189 y(dx, dy, dz, e) += s;
6195 // dofs that point parallel to y-axis (open in y, closed in x, z)
6199 for (int dz = 0; dz < c_dofs1D; ++dz)
6201 for (int ex = 0; ex < c_dofs1D; ++ex)
6203 for (int ey = 0; ey < o_dofs1D; ++ey)
6205 w1[ex][ey][dz] = 0.0;
6206 for (int ez = 0; ez < c_dofs1D; ++ez)
6208 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6209 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6210 w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
6217 for (int dz = 0; dz < c_dofs1D; ++dz)
6219 for (int dy = 0; dy < c_dofs1D; ++dy)
6221 for (int ex = 0; ex < c_dofs1D; ++ex)
6223 w2[ex][dy][dz] = 0.0;
6224 for (int ey = 0; ey < o_dofs1D; ++ey)
6226 w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6233 for (int dz = 0; dz < c_dofs1D; ++dz)
6235 for (int dy = 0; dy < c_dofs1D; ++dy)
6237 for (int dx = 0; dx < c_dofs1D; ++dx)
6240 for (int ex = 0; ex < c_dofs1D; ++ex)
6242 s += B(ex, dx) * w2[ex][dy][dz];
6244 y(dx, dy, dz, e) += s;
6250 // dofs that point parallel to z-axis (open in z, closed in x, y)
6254 for (int dz = 0; dz < c_dofs1D; ++dz)
6256 for (int ex = 0; ex < c_dofs1D; ++ex)
6258 for (int ey = 0; ey < c_dofs1D; ++ey)
6260 w1[ex][ey][dz] = 0.0;
6261 for (int ez = 0; ez < o_dofs1D; ++ez)
6263 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6264 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6265 w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6272 for (int dz = 0; dz < c_dofs1D; ++dz)
6274 for (int dy = 0; dy < c_dofs1D; ++dy)
6276 for (int ex = 0; ex < c_dofs1D; ++ex)
6278 w2[ex][dy][dz] = 0.0;
6279 for (int ey = 0; ey < c_dofs1D; ++ey)
6281 w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
6288 for (int dz = 0; dz < c_dofs1D; ++dz)
6290 for (int dy = 0; dy < c_dofs1D; ++dy)
6292 for (int dx = 0; dx < c_dofs1D; ++dx)
6295 for (int ex = 0; ex < c_dofs1D; ++ex)
6297 s += B(ex, dx) * w2[ex][dy][dz];
6299 y(dx, dy, dz, e) += s;
6306 // Specialization of PAHcurlApplyGradientTranspose3D to the case where
6307 static void PAHcurlApplyGradientTranspose3DBId(
6308 const int c_dofs1D, const int o_dofs1D, const int NE,
6309 const Array<double> &G_,
6310 const Vector &x_, Vector &y_)
6312 auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
6314 auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6315 auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
6317 constexpr static int MAX_D1D = HCURL_MAX_D1D;
6318 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6322 double w1[MAX_D1D][MAX_D1D][MAX_D1D];
6323 double w2[MAX_D1D][MAX_D1D][MAX_D1D];
6325 // dofs that point parallel to x-axis (open in x, closed in y, z)
6329 for (int dz = 0; dz < c_dofs1D; ++dz)
6331 for (int ex = 0; ex < o_dofs1D; ++ex)
6333 for (int ey = 0; ey < c_dofs1D; ++ey)
6336 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6337 w1[ex][ey][dz] = x(local_index, e);
6343 for (int dz = 0; dz < c_dofs1D; ++dz)
6345 for (int dy = 0; dy < c_dofs1D; ++dy)
6347 for (int ex = 0; ex < o_dofs1D; ++ex)
6350 w2[ex][dy][dz] = w1[ex][ey][dz];
6356 for (int dz = 0; dz < c_dofs1D; ++dz)
6358 for (int dy = 0; dy < c_dofs1D; ++dy)
6360 for (int dx = 0; dx < c_dofs1D; ++dx)
6363 for (int ex = 0; ex < o_dofs1D; ++ex)
6365 s += G(ex, dx) * w2[ex][dy][dz];
6367 y(dx, dy, dz, e) += s;
6373 // dofs that point parallel to y-axis (open in y, closed in x, z)
6377 for (int dz = 0; dz < c_dofs1D; ++dz)
6379 for (int ex = 0; ex < c_dofs1D; ++ex)
6381 for (int ey = 0; ey < o_dofs1D; ++ey)
6384 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6385 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6386 w1[ex][ey][dz] = x(local_index, e);
6392 for (int dz = 0; dz < c_dofs1D; ++dz)
6394 for (int dy = 0; dy < c_dofs1D; ++dy)
6396 for (int ex = 0; ex < c_dofs1D; ++ex)
6398 w2[ex][dy][dz] = 0.0;
6399 for (int ey = 0; ey < o_dofs1D; ++ey)
6401 w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
6408 for (int dz = 0; dz < c_dofs1D; ++dz)
6410 for (int dy = 0; dy < c_dofs1D; ++dy)
6412 for (int dx = 0; dx < c_dofs1D; ++dx)
6415 double s = w2[ex][dy][dz];
6416 y(dx, dy, dz, e) += s;
6422 // dofs that point parallel to z-axis (open in z, closed in x, y)
6426 for (int dz = 0; dz < c_dofs1D; ++dz)
6428 for (int ex = 0; ex < c_dofs1D; ++ex)
6430 for (int ey = 0; ey < c_dofs1D; ++ey)
6432 w1[ex][ey][dz] = 0.0;
6433 for (int ez = 0; ez < o_dofs1D; ++ez)
6435 const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6436 ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6437 w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
6444 for (int dz = 0; dz < c_dofs1D; ++dz)
6446 for (int dy = 0; dy < c_dofs1D; ++dy)
6448 for (int ex = 0; ex < c_dofs1D; ++ex)
6451 w2[ex][dy][dz] = w1[ex][ey][dz];
6457 for (int dz = 0; dz < c_dofs1D; ++dz)
6459 for (int dy = 0; dy < c_dofs1D; ++dy)
6461 for (int dx = 0; dx < c_dofs1D; ++dx)
6464 double s = w2[ex][dy][dz];
6465 y(dx, dy, dz, e) += s;
6472 void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
6473 const FiniteElementSpace &test_fes)
6475 // Assumes tensor-product elements, with a vector test space and H^1 trial space.
6476 Mesh *mesh = trial_fes.GetMesh();
6477 const FiniteElement *trial_fel = trial_fes.GetFE(0);
6478 const FiniteElement *test_fel = test_fes.GetFE(0);
6480 const NodalTensorFiniteElement *trial_el =
6481 dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
6484 const VectorTensorFiniteElement *test_el =
6485 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
6488 const int dims = trial_el->GetDim();
6489 MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!
");
6490 dim = mesh->Dimension();
6491 MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!
");
6492 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(),
6493 "Orders
do not match!
");
6494 ne = trial_fes.GetNE();
6496 const int order = trial_el->GetOrder();
6497 dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType());
6498 mfem::QuadratureFunctions1D qf1d;
6499 mfem::IntegrationRule closed_ir;
6500 closed_ir.SetSize(order + 1);
6501 qf1d.GaussLobatto(order + 1, &closed_ir);
6502 mfem::IntegrationRule open_ir;
6503 open_ir.SetSize(order);
6504 qf1d.GaussLegendre(order, &open_ir);
6506 maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
6507 o_dofs1D = maps_O_C->nqpt;
6508 if (trial_el->GetBasisType() == BasisType::GaussLobatto)
6511 c_dofs1D = maps_O_C->ndof;
6516 maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
6517 c_dofs1D = maps_C_C->nqpt;
6521 void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const
6527 PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne,
6532 PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6540 PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne,
6545 PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G,
6551 mfem_error("Bad dimension!
");
6555 void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
6561 PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne,
6566 PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6574 PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne,
6579 PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6585 mfem_error("Bad dimension!
");
6589 static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D,
6592 const Array<double> &Bclosed,
6593 const Array<double> &Bopen,
6594 const Vector &pa_data,
6598 auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6599 auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6601 auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
6602 auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6604 auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
6607 constexpr static int MAX_D1D = HCURL_MAX_D1D;
6608 MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6612 double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
6613 double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
6615 // dofs that point parallel to x-axis (open in x, closed in y, z)
6618 for (int ez = 0; ez < c_dofs1D; ++ez)
6620 for (int dx = 0; dx < c_dofs1D; ++dx)
6622 for (int dy = 0; dy < c_dofs1D; ++dy)
6624 for (int j=0; j<3; ++j)
6626 w1[j][dx][dy][ez] = 0.0;
6627 for (int dz = 0; dz < c_dofs1D; ++dz)
6629 w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
6637 for (int ez = 0; ez < c_dofs1D; ++ez)
6639 for (int ey = 0; ey < c_dofs1D; ++ey)
6641 for (int dx = 0; dx < c_dofs1D; ++dx)
6643 for (int j=0; j<3; ++j)
6645 w2[j][dx][ey][ez] = 0.0;
6646 for (int dy = 0; dy < c_dofs1D; ++dy)
6648 w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
6656 for (int ez = 0; ez < c_dofs1D; ++ez)
6658 for (int ey = 0; ey < c_dofs1D; ++ey)
6660 for (int ex = 0; ex < o_dofs1D; ++ex)
6662 for (int j=0; j<3; ++j)
6665 for (int dx = 0; dx < c_dofs1D; ++dx)
6667 s += Bo(ex, dx) * w2[j][dx][ey][ez];
6669 const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6670 y(local_index, e) += s * vk(j, local_index, e);
6676 // dofs that point parallel to y-axis (open in y, closed in x, z)
6679 for (int ez = 0; ez < c_dofs1D; ++ez)
6681 for (int dx = 0; dx < c_dofs1D; ++dx)
6683 for (int dy = 0; dy < c_dofs1D; ++dy)
6685 for (int j=0; j<3; ++j)
6687 w1[j][dx][dy][ez] = 0.0;
6688 for (int dz = 0; dz < c_dofs1D; ++dz)
6690 w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
6698 for (int ez = 0; ez < c_dofs1D; ++ez)
6700 for (int ey = 0; ey < o_dofs1D; ++ey)
6702 for (int dx = 0; dx < c_dofs1D; ++dx)
6704 for (int j=0; j<3; ++j)
6706 w2[j][dx][ey][ez] = 0.0;
6707 for (int dy = 0; dy < c_dofs1D; ++dy)
6709 w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez];
6717 for (int ez = 0; ez < c_dofs1D; ++ez)
6719 for (int ey = 0; ey < o_dofs1D; ++ey)
6721 for (int ex = 0; ex < c_dofs1D; ++ex)
6723 for (int j=0; j<3; ++j)
6726 for (int dx = 0; dx < c_dofs1D; ++dx)
6728 s += Bc(ex, dx) * w2[j][dx][ey][ez];
6730 const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6731 ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6732 y(local_index, e) += s * vk(j, local_index, e);
6738 // dofs that point parallel to z-axis (open in z, closed in x, y)
6741 for (int ez = 0; ez < o_dofs1D; ++ez)
6743 for (int dx = 0; dx < c_dofs1D; ++dx)
6745 for (int dy = 0; dy < c_dofs1D; ++dy)
6747 for (int j=0; j<3; ++j)
6749 w1[j][dx][dy][ez] = 0.0;
6750 for (int dz = 0; dz < c_dofs1D; ++dz)
6752 w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e);
6760 for (int ez = 0; ez < o_dofs1D; ++ez)
6762 for (int ey = 0; ey < c_dofs1D; ++ey)