12 #include "../general/forall.hpp"
15 #include "libceed/mass.hpp"
34 constexpr
static int VDIM = 2;
42 auto op =
Reshape(pa_data.
Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
50 for (
int qy = 0; qy < Q1D; ++qy)
52 for (
int qx = 0; qx < Q1D; ++qx)
54 for (
int c = 0; c < VDIM; ++c)
56 mass[qy][qx][c] = 0.0;
63 for (
int c = 0; c < VDIM; ++c)
65 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
66 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
68 for (
int dy = 0; dy < D1Dy; ++dy)
71 for (
int qx = 0; qx < Q1D; ++qx)
76 for (
int dx = 0; dx < D1Dx; ++dx)
78 const double t = X(dx + (dy * D1Dx) + osc, e);
79 for (
int qx = 0; qx < Q1D; ++qx)
81 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
85 for (
int qy = 0; qy < Q1D; ++qy)
87 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
88 for (
int qx = 0; qx < Q1D; ++qx)
90 mass[qy][qx][c] += massX[qx] * wy;
99 for (
int qy = 0; qy < Q1D; ++qy)
101 for (
int qx = 0; qx < Q1D; ++qx)
103 const double O11 = op(qx,qy,0,e);
104 const double O21 = op(qx,qy,1,e);
105 const double O12 = symmetric ? O21 : op(qx,qy,2,e);
106 const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
107 const double massX = mass[qy][qx][0];
108 const double massY = mass[qy][qx][1];
109 mass[qy][qx][0] = (O11*massX)+(O12*massY);
110 mass[qy][qx][1] = (O21*massX)+(O22*massY);
114 for (
int qy = 0; qy < Q1D; ++qy)
118 for (
int c = 0; c < VDIM; ++c)
120 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
121 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
124 for (
int dx = 0; dx < D1Dx; ++dx)
128 for (
int qx = 0; qx < Q1D; ++qx)
130 for (
int dx = 0; dx < D1Dx; ++dx)
132 massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
136 for (
int dy = 0; dy < D1Dy; ++dy)
138 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
140 for (
int dx = 0; dx < D1Dx; ++dx)
142 Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
155 const bool symmetric,
161 constexpr
static int VDIM = 2;
166 auto op =
Reshape(pa_data.
Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
173 for (
int c = 0; c < VDIM; ++c)
175 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
176 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
180 for (
int dy = 0; dy < D1Dy; ++dy)
182 for (
int qx = 0; qx < Q1D; ++qx)
185 for (
int qy = 0; qy < Q1D; ++qy)
187 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
189 mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
190 op(qx,qy,symmetric ? 2 : 3, e));
194 for (
int dx = 0; dx < D1Dx; ++dx)
196 for (
int qx = 0; qx < Q1D; ++qx)
198 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
199 D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
212 const bool symmetric,
221 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
222 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
223 constexpr static int VDIM = 3;
225 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
226 auto Bc = Reshape(bc.Read(), Q1D, D1D);
227 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
228 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
234 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
236 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
237 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
238 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
240 const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
241 (symmetric ? 5 : 8));
243 double mass[MAX_Q1D];
245 for (int dz = 0; dz < D1Dz; ++dz)
247 for (int dy = 0; dy < D1Dy; ++dy)
249 for (int qx = 0; qx < Q1D; ++qx)
252 for (int qy = 0; qy < Q1D; ++qy)
254 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
256 for (int qz = 0; qz < Q1D; ++qz)
258 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
260 mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
265 for (int dx = 0; dx < D1Dx; ++dx)
267 for (int qx = 0; qx < Q1D; ++qx)
269 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
270 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
276 osc += D1Dx * D1Dy * D1Dz;
278 }); // end of element loop
281 template<int T_D1D, int T_Q1D>
282 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
285 const bool symmetric,
286 const Array<double> &bo,
287 const Array<double> &bc,
288 const Vector &pa_data,
291 MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D
");
292 MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D
");
294 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
295 auto Bc = Reshape(bc.Read(), Q1D, D1D);
296 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
297 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
299 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
301 constexpr int VDIM = 3;
302 constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
303 constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
305 MFEM_SHARED double sBo[tQ1D][tD1D];
306 MFEM_SHARED double sBc[tQ1D][tD1D];
309 MFEM_SHARED double sop[3][tQ1D][tQ1D];
311 MFEM_FOREACH_THREAD(qx,x,Q1D)
313 MFEM_FOREACH_THREAD(qy,y,Q1D)
315 MFEM_FOREACH_THREAD(qz,z,Q1D)
317 op3[0] = op(qx,qy,qz,0,e);
318 op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
319 op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
324 const int tidx = MFEM_THREAD_ID(x);
325 const int tidy = MFEM_THREAD_ID(y);
326 const int tidz = MFEM_THREAD_ID(z);
330 MFEM_FOREACH_THREAD(d,y,D1D)
332 MFEM_FOREACH_THREAD(q,x,Q1D)
345 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
347 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
348 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
349 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
353 for (int qz=0; qz < Q1D; ++qz)
357 for (int i=0; i<3; ++i)
359 sop[i][tidx][tidy] = op3[i];
365 MFEM_FOREACH_THREAD(dz,z,D1Dz)
367 const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
369 MFEM_FOREACH_THREAD(dy,y,D1Dy)
371 MFEM_FOREACH_THREAD(dx,x,D1Dx)
373 for (int qy = 0; qy < Q1D; ++qy)
375 const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
377 for (int qx = 0; qx < Q1D; ++qx)
379 const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
380 dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
390 MFEM_FOREACH_THREAD(dz,z,D1Dz)
392 MFEM_FOREACH_THREAD(dy,y,D1Dy)
394 MFEM_FOREACH_THREAD(dx,x,D1Dx)
396 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
401 osc += D1Dx * D1Dy * D1Dz;
403 }); // end of element loop
406 void PAHcurlMassApply3D(const int D1D,
409 const bool symmetric,
410 const Array<double> &bo,
411 const Array<double> &bc,
412 const Array<double> &bot,
413 const Array<double> &bct,
414 const Vector &pa_data,
418 constexpr static int MAX_D1D = HCURL_MAX_D1D;
419 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
421 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
422 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
423 constexpr static int VDIM = 3;
425 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
426 auto Bc = Reshape(bc.Read(), Q1D, D1D);
427 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
428 auto Bct = Reshape(bct.Read(), D1D, Q1D);
429 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
430 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
431 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
435 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
437 for (int qz = 0; qz < Q1D; ++qz)
439 for (int qy = 0; qy < Q1D; ++qy)
441 for (int qx = 0; qx < Q1D; ++qx)
443 for (int c = 0; c < VDIM; ++c)
445 mass[qz][qy][qx][c] = 0.0;
453 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
455 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
456 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
457 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
459 for (int dz = 0; dz < D1Dz; ++dz)
461 double massXY[MAX_Q1D][MAX_Q1D];
462 for (int qy = 0; qy < Q1D; ++qy)
464 for (int qx = 0; qx < Q1D; ++qx)
466 massXY[qy][qx] = 0.0;
470 for (int dy = 0; dy < D1Dy; ++dy)
472 double massX[MAX_Q1D];
473 for (int qx = 0; qx < Q1D; ++qx)
478 for (int dx = 0; dx < D1Dx; ++dx)
480 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
481 for (int qx = 0; qx < Q1D; ++qx)
483 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
487 for (int qy = 0; qy < Q1D; ++qy)
489 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
490 for (int qx = 0; qx < Q1D; ++qx)
492 const double wx = massX[qx];
493 massXY[qy][qx] += wx * wy;
498 for (int qz = 0; qz < Q1D; ++qz)
500 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
501 for (int qy = 0; qy < Q1D; ++qy)
503 for (int qx = 0; qx < Q1D; ++qx)
505 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
511 osc += D1Dx * D1Dy * D1Dz;
512 } // loop (c) over components
515 for (int qz = 0; qz < Q1D; ++qz)
517 for (int qy = 0; qy < Q1D; ++qy)
519 for (int qx = 0; qx < Q1D; ++qx)
521 const double O11 = op(qx,qy,qz,0,e);
522 const double O12 = op(qx,qy,qz,1,e);
523 const double O13 = op(qx,qy,qz,2,e);
524 const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
525 const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
526 const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
527 const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
528 const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
529 const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
530 const double massX = mass[qz][qy][qx][0];
531 const double massY = mass[qz][qy][qx][1];
532 const double massZ = mass[qz][qy][qx][2];
533 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
534 mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
535 mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
540 for (int qz = 0; qz < Q1D; ++qz)
542 double massXY[MAX_D1D][MAX_D1D];
546 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
548 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
549 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
550 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
552 for (int dy = 0; dy < D1Dy; ++dy)
554 for (int dx = 0; dx < D1Dx; ++dx)
556 massXY[dy][dx] = 0.0;
559 for (int qy = 0; qy < Q1D; ++qy)
561 double massX[MAX_D1D];
562 for (int dx = 0; dx < D1Dx; ++dx)
566 for (int qx = 0; qx < Q1D; ++qx)
568 for (int dx = 0; dx < D1Dx; ++dx)
570 massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
573 for (int dy = 0; dy < D1Dy; ++dy)
575 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
576 for (int dx = 0; dx < D1Dx; ++dx)
578 massXY[dy][dx] += massX[dx] * wy;
583 for (int dz = 0; dz < D1Dz; ++dz)
585 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
586 for (int dy = 0; dy < D1Dy; ++dy)
588 for (int dx = 0; dx < D1Dx; ++dx)
590 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
595 osc += D1Dx * D1Dy * D1Dz;
598 }); // end of element loop
601 template<int T_D1D, int T_Q1D>
602 void SmemPAHcurlMassApply3D(const int D1D,
605 const bool symmetric,
606 const Array<double> &bo,
607 const Array<double> &bc,
608 const Array<double> &bot,
609 const Array<double> &bct,
610 const Vector &pa_data,
614 MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D
");
615 MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D
");
617 const int dataSize = symmetric ? 6 : 9;
619 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
620 auto Bc = Reshape(bc.Read(), Q1D, D1D);
621 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
622 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
623 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
625 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
627 constexpr int VDIM = 3;
628 constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
629 constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
631 MFEM_SHARED double sBo[tQ1D][tD1D];
632 MFEM_SHARED double sBc[tQ1D][tD1D];
635 MFEM_SHARED double sop[9*tQ1D*tQ1D];
636 MFEM_SHARED double mass[tQ1D][tQ1D][3];
638 MFEM_SHARED double sX[tD1D][tD1D][tD1D];
640 MFEM_FOREACH_THREAD(qx,x,Q1D)
642 MFEM_FOREACH_THREAD(qy,y,Q1D)
644 MFEM_FOREACH_THREAD(qz,z,Q1D)
646 for (int i=0; i<dataSize; ++i)
648 op9[i] = op(qx,qy,qz,i,e);
654 const int tidx = MFEM_THREAD_ID(x);
655 const int tidy = MFEM_THREAD_ID(y);
656 const int tidz = MFEM_THREAD_ID(z);
660 MFEM_FOREACH_THREAD(d,y,D1D)
662 MFEM_FOREACH_THREAD(q,x,Q1D)
674 for (int qz=0; qz < Q1D; ++qz)
677 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
679 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
680 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
681 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
683 MFEM_FOREACH_THREAD(dz,z,D1Dz)
685 MFEM_FOREACH_THREAD(dy,y,D1Dy)
687 MFEM_FOREACH_THREAD(dx,x,D1Dx)
689 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
697 for (int i=0; i<dataSize; ++i)
699 sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
702 MFEM_FOREACH_THREAD(qy,y,Q1D)
704 MFEM_FOREACH_THREAD(qx,x,Q1D)
708 for (int dz = 0; dz < D1Dz; ++dz)
710 const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
711 for (int dy = 0; dy < D1Dy; ++dy)
713 const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
714 for (int dx = 0; dx < D1Dx; ++dx)
716 const double t = sX[dz][dy][dx];
717 const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
718 u += t * wx * wy * wz;
728 osc += D1Dx * D1Dy * D1Dz;
732 MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
735 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
737 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
738 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
739 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
743 MFEM_FOREACH_THREAD(dz,z,D1Dz)
745 const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
747 MFEM_FOREACH_THREAD(dy,y,D1Dy)
749 MFEM_FOREACH_THREAD(dx,x,D1Dx)
751 for (int qy = 0; qy < Q1D; ++qy)
753 const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
754 for (int qx = 0; qx < Q1D; ++qx)
756 const int os = (dataSize*qx) + (dataSize*Q1D*qy);
757 const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
758 (symmetric ? 2 : 6))); // O11, O21, O31
759 const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
760 (symmetric ? 4 : 7))); // O12, O22, O32
761 const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
762 (symmetric ? 5 : 8))); // O13, O23, O33
764 const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
765 (sop[id3] * mass[qy][qx][2]);
767 const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
768 dxyz += m_c * wx * wy * wz;
777 MFEM_FOREACH_THREAD(dz,z,D1Dz)
779 MFEM_FOREACH_THREAD(dy,y,D1Dy)
781 MFEM_FOREACH_THREAD(dx,x,D1Dx)
783 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
788 osc += D1Dx * D1Dy * D1Dz;
791 }); // end of element loop
794 // PA H(curl) curl-curl assemble 2D kernel
795 static void PACurlCurlSetup2D(const int Q1D,
797 const Array<double> &w,
802 const int NQ = Q1D*Q1D;
804 auto J = Reshape(j.Read(), NQ, 2, 2, NE);
805 auto C = Reshape(coeff.Read(), NQ, NE);
806 auto y = Reshape(op.Write(), NQ, NE);
809 for (int q = 0; q < NQ; ++q)
811 const double J11 = J(q,0,0,e);
812 const double J21 = J(q,1,0,e);
813 const double J12 = J(q,0,1,e);
814 const double J22 = J(q,1,1,e);
815 const double detJ = (J11*J22)-(J21*J12);
816 y(q,e) = W[q] * C(q,e) / detJ;
821 // PA H(curl) curl-curl assemble 3D kernel
822 static void PACurlCurlSetup3D(const int Q1D,
825 const Array<double> &w,
830 const int NQ = Q1D*Q1D*Q1D;
831 const bool symmetric = (coeffDim != 9);
833 auto J = Reshape(j.Read(), NQ, 3, 3, NE);
834 auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
835 auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
839 for (int q = 0; q < NQ; ++q)
841 const double J11 = J(q,0,0,e);
842 const double J21 = J(q,1,0,e);
843 const double J31 = J(q,2,0,e);
844 const double J12 = J(q,0,1,e);
845 const double J22 = J(q,1,1,e);
846 const double J32 = J(q,2,1,e);
847 const double J13 = J(q,0,2,e);
848 const double J23 = J(q,1,2,e);
849 const double J33 = J(q,2,2,e);
850 const double detJ = J11 * (J22 * J33 - J32 * J23) -
851 /* */ J21 * (J12 * J33 - J32 * J13) +
852 /* */ J31 * (J12 * J23 - J22 * J13);
854 const double c_detJ = W[q] / detJ;
856 if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
858 // Set y to the 6 or 9 entries of J^T M J / det
859 const double M11 = C(0, q, e);
860 const double M12 = C(1, q, e);
861 const double M13 = C(2, q, e);
862 const double M21 = (!symmetric) ? C(3, q, e) : M12;
863 const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
864 const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
865 const double M31 = (!symmetric) ? C(6, q, e) : M13;
866 const double M32 = (!symmetric) ? C(7, q, e) : M23;
867 const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
869 // First compute R = MJ
870 const double R11 = M11*J11 + M12*J21 + M13*J31;
871 const double R12 = M11*J12 + M12*J22 + M13*J32;
872 const double R13 = M11*J13 + M12*J23 + M13*J33;
873 const double R21 = M21*J11 + M22*J21 + M23*J31;
874 const double R22 = M21*J12 + M22*J22 + M23*J32;
875 const double R23 = M21*J13 + M22*J23 + M23*J33;
876 const double R31 = M31*J11 + M32*J21 + M33*J31;
877 const double R32 = M31*J12 + M32*J22 + M33*J32;
878 const double R33 = M31*J13 + M32*J23 + M33*J33;
880 // Now set y to J^T R / det
881 y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
882 const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
883 y(q,1,e) = Y12; // 1,2
884 y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
886 const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
887 const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
888 const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
890 const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
892 y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
893 y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
894 y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
898 y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
899 y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
900 y(q,8,e) = Y33; // 3,3
903 else // Vector or scalar coefficient version
905 // Set y to the 6 entries of J^T D J / det^2
906 const double D1 = C(0, q, e);
907 const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
908 const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
910 y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
911 y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
912 y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
913 y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
914 y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
915 y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
921 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
923 // Assumes tensor-product elements
924 Mesh *mesh = fes.GetMesh();
925 const FiniteElement *fel = fes.GetFE(0);
927 const VectorTensorFiniteElement *el =
928 dynamic_cast<const VectorTensorFiniteElement*>(fel);
931 const IntegrationRule *ir
932 = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
933 *mesh->GetElementTransformation(0));
934 const int dims = el->GetDim();
935 MFEM_VERIFY(dims == 2 || dims == 3, "");
937 const int nq = ir->GetNPoints();
938 dim = mesh->Dimension();
939 MFEM_VERIFY(dim == 2 || dim == 3, "");
941 const int dimc = (dim == 3) ? 3 : 1;
944 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
945 mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
946 mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
947 dofs1D = mapsC->ndof;
948 quad1D = mapsC->nqpt;
950 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
952 const int MQsymmDim = MQ ? (MQ->GetWidth() * (MQ->GetWidth() + 1)) / 2 : 0;
953 const int MQfullDim = MQ ? (MQ->GetHeight() * MQ->GetWidth()) : 0;
954 const int MQdim = MQ ? (MQ->IsSymmetric() ? MQsymmDim : MQfullDim) : 0;
955 const int coeffDim = MQ ? MQdim : (DQ ? DQ->GetVDim() : 1);
957 symmetric = MQ ? MQ->IsSymmetric() : true;
959 const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
960 const int ndata = (dim == 2) ? 1 : (symmetric ? symmDims : MQfullDim);
961 pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
963 Vector coeff(coeffDim * ne * nq);
965 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
968 Vector D(DQ ? coeffDim : 0);
975 Msymm.SetSize(MQsymmDim);
985 MFEM_VERIFY(coeffDim == dimc, "");
989 MFEM_VERIFY(coeffDim == MQdim, "");
990 MFEM_VERIFY(MQ->GetHeight() == dimc && MQ->GetWidth() == dimc, "");
993 for (int e=0; e<ne; ++e)
995 ElementTransformation *tr = mesh->GetElementTransformation(e);
996 for (int p=0; p<nq; ++p)
1000 if (MQ->IsSymmetric())
1002 MQ->EvalSymmetric(Msymm, *tr, ir->IntPoint(p));
1004 for (int i=0; i<MQsymmDim; ++i)
1006 coeffh(i, p, e) = Msymm[i];
1011 MQ->Eval(M, *tr, ir->IntPoint(p));
1013 for (int i=0; i<dimc; ++i)
1014 for (int j=0; j<dimc; ++j)
1016 coeffh(j+(i*dimc), p, e) = M(i,j);
1022 DQ->Eval(D, *tr, ir->IntPoint(p));
1023 for (int i=0; i<coeffDim; ++i)
1025 coeffh(i, p, e) = D[i];
1030 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
1036 if (el->GetDerivType() != mfem::FiniteElement::CURL)
1038 MFEM_ABORT("Unknown kernel.
");
1043 PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
1048 PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1052 static void PACurlCurlApply2D(const int D1D,
1055 const Array<double> &bo,
1056 const Array<double> &bot,
1057 const Array<double> &gc,
1058 const Array<double> &gct,
1059 const Vector &pa_data,
1063 constexpr static int VDIM = 2;
1064 constexpr static int MAX_D1D = HCURL_MAX_D1D;
1065 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1067 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1068 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1069 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1070 auto Gct = Reshape(gct.Read(), D1D, Q1D);
1071 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1072 auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1073 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1077 double curl[MAX_Q1D][MAX_Q1D];
1079 // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1081 for (int qy = 0; qy < Q1D; ++qy)
1083 for (int qx = 0; qx < Q1D; ++qx)
1091 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1093 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1094 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1096 for (int dy = 0; dy < D1Dy; ++dy)
1098 double gradX[MAX_Q1D];
1099 for (int qx = 0; qx < Q1D; ++qx)
1104 for (int dx = 0; dx < D1Dx; ++dx)
1106 const double t = X(dx + (dy * D1Dx) + osc, e);
1107 for (int qx = 0; qx < Q1D; ++qx)
1109 gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1113 for (int qy = 0; qy < Q1D; ++qy)
1115 const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1116 for (int qx = 0; qx < Q1D; ++qx)
1118 curl[qy][qx] += gradX[qx] * wy;
1124 } // loop (c) over components
1126 // Apply D operator.
1127 for (int qy = 0; qy < Q1D; ++qy)
1129 for (int qx = 0; qx < Q1D; ++qx)
1131 curl[qy][qx] *= op(qx,qy,e);
1135 for (int qy = 0; qy < Q1D; ++qy)
1139 for (int c = 0; c < VDIM; ++c) // loop over x, y components
1141 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1142 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1144 double gradX[MAX_D1D];
1145 for (int dx = 0; dx < D1Dx; ++dx)
1149 for (int qx = 0; qx < Q1D; ++qx)
1151 for (int dx = 0; dx < D1Dx; ++dx)
1153 gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1156 for (int dy = 0; dy < D1Dy; ++dy)
1158 const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1160 for (int dx = 0; dx < D1Dx; ++dx)
1162 Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1169 }); // end of element loop
1172 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1173 static void PACurlCurlApply3D(const int D1D,
1175 const bool symmetric,
1177 const Array<double> &bo,
1178 const Array<double> &bc,
1179 const Array<double> &bot,
1180 const Array<double> &bct,
1181 const Array<double> &gc,
1182 const Array<double> &gct,
1183 const Vector &pa_data,
1187 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
1188 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
1189 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1190 // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1191 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1192 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1193 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1195 constexpr static int VDIM = 3;
1197 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1198 auto Bc = Reshape(bc.Read(), Q1D, D1D);
1199 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1200 auto Bct = Reshape(bct.Read(), D1D, Q1D);
1201 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1202 auto Gct = Reshape(gct.Read(), D1D, Q1D);
1203 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1204 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1205 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1209 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1210 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1212 for (int qz = 0; qz < Q1D; ++qz)
1214 for (int qy = 0; qy < Q1D; ++qy)
1216 for (int qx = 0; qx < Q1D; ++qx)
1218 for (int c = 0; c < VDIM; ++c)
1220 curl[qz][qy][qx][c] = 0.0;
1226 // We treat x, y, z components separately for optimization specific to each.
1232 const int D1Dz = D1D;
1233 const int D1Dy = D1D;
1234 const int D1Dx = D1D - 1;
1236 for (int dz = 0; dz < D1Dz; ++dz)
1238 double gradXY[MAX_Q1D][MAX_Q1D][2];
1239 for (int qy = 0; qy < Q1D; ++qy)
1241 for (int qx = 0; qx < Q1D; ++qx)
1243 for (int d = 0; d < 2; ++d)
1245 gradXY[qy][qx][d] = 0.0;
1250 for (int dy = 0; dy < D1Dy; ++dy)
1252 double massX[MAX_Q1D];
1253 for (int qx = 0; qx < Q1D; ++qx)
1258 for (int dx = 0; dx < D1Dx; ++dx)
1260 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1261 for (int qx = 0; qx < Q1D; ++qx)
1263 massX[qx] += t * Bo(qx,dx);
1267 for (int qy = 0; qy < Q1D; ++qy)
1269 const double wy = Bc(qy,dy);
1270 const double wDy = Gc(qy,dy);
1271 for (int qx = 0; qx < Q1D; ++qx)
1273 const double wx = massX[qx];
1274 gradXY[qy][qx][0] += wx * wDy;
1275 gradXY[qy][qx][1] += wx * wy;
1280 for (int qz = 0; qz < Q1D; ++qz)
1282 const double wz = Bc(qz,dz);
1283 const double wDz = Gc(qz,dz);
1284 for (int qy = 0; qy < Q1D; ++qy)
1286 for (int qx = 0; qx < Q1D; ++qx)
1288 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1289 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1290 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1296 osc += D1Dx * D1Dy * D1Dz;
1301 const int D1Dz = D1D;
1302 const int D1Dy = D1D - 1;
1303 const int D1Dx = D1D;
1305 for (int dz = 0; dz < D1Dz; ++dz)
1307 double gradXY[MAX_Q1D][MAX_Q1D][2];
1308 for (int qy = 0; qy < Q1D; ++qy)
1310 for (int qx = 0; qx < Q1D; ++qx)
1312 for (int d = 0; d < 2; ++d)
1314 gradXY[qy][qx][d] = 0.0;
1319 for (int dx = 0; dx < D1Dx; ++dx)
1321 double massY[MAX_Q1D];
1322 for (int qy = 0; qy < Q1D; ++qy)
1327 for (int dy = 0; dy < D1Dy; ++dy)
1329 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1330 for (int qy = 0; qy < Q1D; ++qy)
1332 massY[qy] += t * Bo(qy,dy);
1336 for (int qx = 0; qx < Q1D; ++qx)
1338 const double wx = Bc(qx,dx);
1339 const double wDx = Gc(qx,dx);
1340 for (int qy = 0; qy < Q1D; ++qy)
1342 const double wy = massY[qy];
1343 gradXY[qy][qx][0] += wDx * wy;
1344 gradXY[qy][qx][1] += wx * wy;
1349 for (int qz = 0; qz < Q1D; ++qz)
1351 const double wz = Bc(qz,dz);
1352 const double wDz = Gc(qz,dz);
1353 for (int qy = 0; qy < Q1D; ++qy)
1355 for (int qx = 0; qx < Q1D; ++qx)
1357 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1358 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1359 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1365 osc += D1Dx * D1Dy * D1Dz;
1370 const int D1Dz = D1D - 1;
1371 const int D1Dy = D1D;
1372 const int D1Dx = D1D;
1374 for (int dx = 0; dx < D1Dx; ++dx)
1376 double gradYZ[MAX_Q1D][MAX_Q1D][2];
1377 for (int qz = 0; qz < Q1D; ++qz)
1379 for (int qy = 0; qy < Q1D; ++qy)
1381 for (int d = 0; d < 2; ++d)
1383 gradYZ[qz][qy][d] = 0.0;
1388 for (int dy = 0; dy < D1Dy; ++dy)
1390 double massZ[MAX_Q1D];
1391 for (int qz = 0; qz < Q1D; ++qz)
1396 for (int dz = 0; dz < D1Dz; ++dz)
1398 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1399 for (int qz = 0; qz < Q1D; ++qz)
1401 massZ[qz] += t * Bo(qz,dz);
1405 for (int qy = 0; qy < Q1D; ++qy)
1407 const double wy = Bc(qy,dy);
1408 const double wDy = Gc(qy,dy);
1409 for (int qz = 0; qz < Q1D; ++qz)
1411 const double wz = massZ[qz];
1412 gradYZ[qz][qy][0] += wz * wy;
1413 gradYZ[qz][qy][1] += wz * wDy;
1418 for (int qx = 0; qx < Q1D; ++qx)
1420 const double wx = Bc(qx,dx);
1421 const double wDx = Gc(qx,dx);
1423 for (int qy = 0; qy < Q1D; ++qy)
1425 for (int qz = 0; qz < Q1D; ++qz)
1427 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1428 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1429 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1436 // Apply D operator.
1437 for (int qz = 0; qz < Q1D; ++qz)
1439 for (int qy = 0; qy < Q1D; ++qy)
1441 for (int qx = 0; qx < Q1D; ++qx)
1443 const double O11 = op(qx,qy,qz,0,e);
1444 const double O12 = op(qx,qy,qz,1,e);
1445 const double O13 = op(qx,qy,qz,2,e);
1446 const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1447 const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1448 const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1449 const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1450 const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1451 const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1453 const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1454 (O13 * curl[qz][qy][qx][2]);
1455 const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1456 (O23 * curl[qz][qy][qx][2]);
1457 const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1458 (O33 * curl[qz][qy][qx][2]);
1460 curl[qz][qy][qx][0] = c1;
1461 curl[qz][qy][qx][1] = c2;
1462 curl[qz][qy][qx][2] = c3;
1470 const int D1Dz = D1D;
1471 const int D1Dy = D1D;
1472 const int D1Dx = D1D - 1;
1474 for (int qz = 0; qz < Q1D; ++qz)
1476 double gradXY12[MAX_D1D][MAX_D1D];
1477 double gradXY21[MAX_D1D][MAX_D1D];
1479 for (int dy = 0; dy < D1Dy; ++dy)
1481 for (int dx = 0; dx < D1Dx; ++dx)
1483 gradXY12[dy][dx] = 0.0;
1484 gradXY21[dy][dx] = 0.0;
1487 for (int qy = 0; qy < Q1D; ++qy)
1489 double massX[MAX_D1D][2];
1490 for (int dx = 0; dx < D1Dx; ++dx)
1492 for (int n = 0; n < 2; ++n)
1497 for (int qx = 0; qx < Q1D; ++qx)
1499 for (int dx = 0; dx < D1Dx; ++dx)
1501 const double wx = Bot(dx,qx);
1503 massX[dx][0] += wx * curl[qz][qy][qx][1];
1504 massX[dx][1] += wx * curl[qz][qy][qx][2];
1507 for (int dy = 0; dy < D1Dy; ++dy)
1509 const double wy = Bct(dy,qy);
1510 const double wDy = Gct(dy,qy);
1512 for (int dx = 0; dx < D1Dx; ++dx)
1514 gradXY21[dy][dx] += massX[dx][0] * wy;
1515 gradXY12[dy][dx] += massX[dx][1] * wDy;
1520 for (int dz = 0; dz < D1Dz; ++dz)
1522 const double wz = Bct(dz,qz);
1523 const double wDz = Gct(dz,qz);
1524 for (int dy = 0; dy < D1Dy; ++dy)
1526 for (int dx = 0; dx < D1Dx; ++dx)
1528 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1529 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1530 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1531 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1537 osc += D1Dx * D1Dy * D1Dz;
1542 const int D1Dz = D1D;
1543 const int D1Dy = D1D - 1;
1544 const int D1Dx = D1D;
1546 for (int qz = 0; qz < Q1D; ++qz)
1548 double gradXY02[MAX_D1D][MAX_D1D];
1549 double gradXY20[MAX_D1D][MAX_D1D];
1551 for (int dy = 0; dy < D1Dy; ++dy)
1553 for (int dx = 0; dx < D1Dx; ++dx)
1555 gradXY02[dy][dx] = 0.0;
1556 gradXY20[dy][dx] = 0.0;
1559 for (int qx = 0; qx < Q1D; ++qx)
1561 double massY[MAX_D1D][2];
1562 for (int dy = 0; dy < D1Dy; ++dy)
1567 for (int qy = 0; qy < Q1D; ++qy)
1569 for (int dy = 0; dy < D1Dy; ++dy)
1571 const double wy = Bot(dy,qy);
1573 massY[dy][0] += wy * curl[qz][qy][qx][2];
1574 massY[dy][1] += wy * curl[qz][qy][qx][0];
1577 for (int dx = 0; dx < D1Dx; ++dx)
1579 const double wx = Bct(dx,qx);
1580 const double wDx = Gct(dx,qx);
1582 for (int dy = 0; dy < D1Dy; ++dy)
1584 gradXY02[dy][dx] += massY[dy][0] * wDx;
1585 gradXY20[dy][dx] += massY[dy][1] * wx;
1590 for (int dz = 0; dz < D1Dz; ++dz)
1592 const double wz = Bct(dz,qz);
1593 const double wDz = Gct(dz,qz);
1594 for (int dy = 0; dy < D1Dy; ++dy)
1596 for (int dx = 0; dx < D1Dx; ++dx)
1598 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1599 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1600 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1601 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1607 osc += D1Dx * D1Dy * D1Dz;
1612 const int D1Dz = D1D - 1;
1613 const int D1Dy = D1D;
1614 const int D1Dx = D1D;
1616 for (int qx = 0; qx < Q1D; ++qx)
1618 double gradYZ01[MAX_D1D][MAX_D1D];
1619 double gradYZ10[MAX_D1D][MAX_D1D];
1621 for (int dy = 0; dy < D1Dy; ++dy)
1623 for (int dz = 0; dz < D1Dz; ++dz)
1625 gradYZ01[dz][dy] = 0.0;
1626 gradYZ10[dz][dy] = 0.0;
1629 for (int qy = 0; qy < Q1D; ++qy)
1631 double massZ[MAX_D1D][2];
1632 for (int dz = 0; dz < D1Dz; ++dz)
1634 for (int n = 0; n < 2; ++n)
1639 for (int qz = 0; qz < Q1D; ++qz)
1641 for (int dz = 0; dz < D1Dz; ++dz)
1643 const double wz = Bot(dz,qz);
1645 massZ[dz][0] += wz * curl[qz][qy][qx][0];
1646 massZ[dz][1] += wz * curl[qz][qy][qx][1];
1649 for (int dy = 0; dy < D1Dy; ++dy)
1651 const double wy = Bct(dy,qy);
1652 const double wDy = Gct(dy,qy);
1654 for (int dz = 0; dz < D1Dz; ++dz)
1656 gradYZ01[dz][dy] += wy * massZ[dz][1];
1657 gradYZ10[dz][dy] += wDy * massZ[dz][0];
1662 for (int dx = 0; dx < D1Dx; ++dx)
1664 const double wx = Bct(dx,qx);
1665 const double wDx = Gct(dx,qx);
1667 for (int dy = 0; dy < D1Dy; ++dy)
1669 for (int dz = 0; dz < D1Dz; ++dz)
1671 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1672 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1673 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1674 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1680 }); // end of element loop
1683 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1684 static void SmemPACurlCurlApply3D(const int D1D,
1686 const bool symmetric,
1688 const Array<double> &bo,
1689 const Array<double> &bc,
1690 const Array<double> &bot,
1691 const Array<double> &bct,
1692 const Array<double> &gc,
1693 const Array<double> &gct,
1694 const Vector &pa_data,
1698 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
1699 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
1700 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1701 // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1702 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1703 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1704 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1706 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1707 auto Bc = Reshape(bc.Read(), Q1D, D1D);
1708 auto Gc = Reshape(gc.Read(), Q1D, D1D);
1709 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1710 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1711 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1713 const int s = symmetric ? 6 : 9;
1715 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1717 constexpr int VDIM = 3;
1719 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1720 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1721 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1724 MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1725 MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1727 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1729 MFEM_FOREACH_THREAD(qx,x,Q1D)
1731 MFEM_FOREACH_THREAD(qy,y,Q1D)
1733 MFEM_FOREACH_THREAD(qz,z,Q1D)
1735 for (int i=0; i<s; ++i)
1737 ope[i] = op(qx,qy,qz,i,e);
1743 const int tidx = MFEM_THREAD_ID(x);
1744 const int tidy = MFEM_THREAD_ID(y);
1745 const int tidz = MFEM_THREAD_ID(z);
1749 MFEM_FOREACH_THREAD(d,y,D1D)
1751 MFEM_FOREACH_THREAD(q,x,Q1D)
1753 sBc[d][q] = Bc(q,d);
1754 sGc[d][q] = Gc(q,d);
1757 sBo[d][q] = Bo(q,d);
1764 for (int qz=0; qz < Q1D; ++qz)
1768 MFEM_FOREACH_THREAD(qy,y,Q1D)
1770 MFEM_FOREACH_THREAD(qx,x,Q1D)
1772 for (int i=0; i<3; ++i)
1774 curl[qy][qx][i] = 0.0;
1781 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1783 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1784 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1785 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1787 MFEM_FOREACH_THREAD(dz,z,D1Dz)
1789 MFEM_FOREACH_THREAD(dy,y,D1Dy)
1791 MFEM_FOREACH_THREAD(dx,x,D1Dx)
1793 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1803 for (int i=0; i<s; ++i)
1805 sop[i][tidx][tidy] = ope[i];
1809 MFEM_FOREACH_THREAD(qy,y,Q1D)
1811 MFEM_FOREACH_THREAD(qx,x,Q1D)
1816 // We treat x, y, z components separately for optimization specific to each.
1817 if (c == 0) // x component
1819 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1821 for (int dz = 0; dz < D1Dz; ++dz)
1823 const double wz = sBc[dz][qz];
1824 const double wDz = sGc[dz][qz];
1826 for (int dy = 0; dy < D1Dy; ++dy)
1828 const double wy = sBc[dy][qy];
1829 const double wDy = sGc[dy][qy];
1831 for (int dx = 0; dx < D1Dx; ++dx)
1833 const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1840 curl[qy][qx][1] += v; // (u_0)_{x_2}
1841 curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1843 else if (c == 1) // y component
1845 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1847 for (int dz = 0; dz < D1Dz; ++dz)
1849 const double wz = sBc[dz][qz];
1850 const double wDz = sGc[dz][qz];
1852 for (int dy = 0; dy < D1Dy; ++dy)
1854 const double wy = sBo[dy][qy];
1856 for (int dx = 0; dx < D1Dx; ++dx)
1858 const double t = sX[dz][dy][dx];
1859 const double wx = t * sBc[dx][qx];
1860 const double wDx = t * sGc[dx][qx];
1868 curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1869 curl[qy][qx][2] += u; // (u_1)_{x_0}
1873 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1875 for (int dz = 0; dz < D1Dz; ++dz)
1877 const double wz = sBo[dz][qz];
1879 for (int dy = 0; dy < D1Dy; ++dy)
1881 const double wy = sBc[dy][qy];
1882 const double wDy = sGc[dy][qy];
1884 for (int dx = 0; dx < D1Dx; ++dx)
1886 const double t = sX[dz][dy][dx];
1887 const double wx = t * sBc[dx][qx];
1888 const double wDx = t * sGc[dx][qx];
1896 curl[qy][qx][0] += v; // (u_2)_{x_1}
1897 curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1903 osc += D1Dx * D1Dy * D1Dz;
1911 MFEM_FOREACH_THREAD(dz,z,D1D)
1913 const double wcz = sBc[dz][qz];
1914 const double wcDz = sGc[dz][qz];
1915 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1917 MFEM_FOREACH_THREAD(dy,y,D1D)
1919 MFEM_FOREACH_THREAD(dx,x,D1D)
1921 for (int qy = 0; qy < Q1D; ++qy)
1923 const double wcy = sBc[dy][qy];
1924 const double wcDy = sGc[dy][qy];
1925 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1927 for (int qx = 0; qx < Q1D; ++qx)
1929 const double O11 = sop[0][qx][qy];
1930 const double O12 = sop[1][qx][qy];
1931 const double O13 = sop[2][qx][qy];
1932 const double O21 = symmetric ? O12 : sop[3][qx][qy];
1933 const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1934 const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1935 const double O31 = symmetric ? O13 : sop[6][qx][qy];
1936 const double O32 = symmetric ? O23 : sop[7][qx][qy];
1937 const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1939 const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1940 (O13 * curl[qy][qx][2]);
1941 const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1942 (O23 * curl[qy][qx][2]);
1943 const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1944 (O33 * curl[qy][qx][2]);
1946 const double wcx = sBc[dx][qx];
1947 const double wDx = sGc[dx][qx];
1951 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1952 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1953 const double wx = sBo[dx][qx];
1954 dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1957 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1958 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1959 dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1961 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1962 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1963 dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1972 MFEM_FOREACH_THREAD(dz,z,D1D)
1974 MFEM_FOREACH_THREAD(dy,y,D1D)
1976 MFEM_FOREACH_THREAD(dx,x,D1D)
1980 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
1984 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
1988 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
1994 }); // end of element loop
1997 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
2001 if (Device::Allows(Backend::DEVICE_MASK))
2003 const int ID = (dofs1D << 4) | quad1D;
2006 case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2007 mapsO->B, mapsC->B, mapsO->Bt,
2008 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2009 case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2010 mapsO->B, mapsC->B, mapsO->Bt,
2011 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2012 case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2014 mapsC->B, mapsO->Bt,
2015 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2016 case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2017 mapsO->B, mapsC->B, mapsO->Bt,
2018 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2019 default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2020 mapsC->B, mapsO->Bt, mapsC->Bt,
2021 mapsC->G, mapsC->Gt, pa_data, x, y);
2025 PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2026 mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2030 PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2031 mapsC->G, mapsC->Gt, pa_data, x, y);
2035 MFEM_ABORT("Unsupported dimension!
");
2039 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2042 const Array<double> &bo,
2043 const Array<double> &gc,
2044 const Vector &pa_data,
2047 constexpr static int VDIM = 2;
2048 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2050 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2051 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2052 auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2053 auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2059 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2061 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2062 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2066 for (int dy = 0; dy < D1Dy; ++dy)
2068 for (int qx = 0; qx < Q1D; ++qx)
2071 for (int qy = 0; qy < Q1D; ++qy)
2073 const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2074 t[qx] += wy * wy * op(qx,qy,e);
2078 for (int dx = 0; dx < D1Dx; ++dx)
2080 for (int qx = 0; qx < Q1D; ++qx)
2082 const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2083 D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2090 }); // end of element loop
2093 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2094 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2096 const bool symmetric,
2098 const Array<double> &bo,
2099 const Array<double> &bc,
2100 const Array<double> &go,
2101 const Array<double> &gc,
2102 const Vector &pa_data,
2105 constexpr static int VDIM = 3;
2106 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2107 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2109 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2110 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2111 auto Go = Reshape(go.Read(), Q1D, D1D-1);
2112 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2113 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2114 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2116 const int s = symmetric ? 6 : 9;
2120 const int i21 = symmetric ? i12 : 3;
2121 const int i22 = symmetric ? 3 : 4;
2122 const int i23 = symmetric ? 4 : 5;
2123 const int i31 = symmetric ? i13 : 6;
2124 const int i32 = symmetric ? i23 : 7;
2125 const int i33 = symmetric ? 5 : 8;
2129 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2130 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2131 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2132 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2133 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2135 // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2136 // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2140 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2142 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2143 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2144 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2146 double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2149 for (int qx = 0; qx < Q1D; ++qx)
2151 for (int qy = 0; qy < Q1D; ++qy)
2153 for (int dz = 0; dz < D1Dz; ++dz)
2155 for (int i=0; i<s; ++i)
2157 for (int d=0; d<3; ++d)
2159 zt[qx][qy][dz][i][d] = 0.0;
2163 for (int qz = 0; qz < Q1D; ++qz)
2165 const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2166 const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2168 for (int i=0; i<s; ++i)
2170 zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2171 zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2172 zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2177 } // end of z contraction
2179 double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2182 for (int qx = 0; qx < Q1D; ++qx)
2184 for (int dz = 0; dz < D1Dz; ++dz)
2186 for (int dy = 0; dy < D1Dy; ++dy)
2188 for (int i=0; i<s; ++i)
2190 for (int d=0; d<3; ++d)
2191 for (int j=0; j<3; ++j)
2193 yt[qx][dy][dz][i][d][j] = 0.0;
2197 for (int qy = 0; qy < Q1D; ++qy)
2199 const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2200 const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2202 for (int i=0; i<s; ++i)
2204 for (int d=0; d<3; ++d)
2206 yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2207 yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2208 yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2214 } // end of y contraction
2217 for (int dz = 0; dz < D1Dz; ++dz)
2219 for (int dy = 0; dy < D1Dy; ++dy)
2221 for (int dx = 0; dx < D1Dx; ++dx)
2223 for (int qx = 0; qx < Q1D; ++qx)
2225 const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2226 const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2228 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2229 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2230 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2231 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2232 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2235 const double O11 = op(q,0,e);
2236 const double O12 = op(q,1,e);
2237 const double O13 = op(q,2,e);
2238 const double O22 = op(q,3,e);
2239 const double O23 = op(q,4,e);
2240 const double O33 = op(q,5,e);
2245 // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2246 const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2247 - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2249 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2253 // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2254 const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2255 - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2256 + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2258 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2262 // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2263 const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2264 - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2265 + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2267 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2272 } // end of x contraction
2274 osc += D1Dx * D1Dy * D1Dz;
2276 }); // end of element loop
2279 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2280 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2282 const bool symmetric,
2284 const Array<double> &bo,
2285 const Array<double> &bc,
2286 const Array<double> &go,
2287 const Array<double> &gc,
2288 const Vector &pa_data,
2291 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2292 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2294 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2295 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2296 auto Go = Reshape(go.Read(), Q1D, D1D-1);
2297 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2298 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2299 auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2301 const int s = symmetric ? 6 : 9;
2305 const int i21 = symmetric ? i12 : 3;
2306 const int i22 = symmetric ? 3 : 4;
2307 const int i23 = symmetric ? 4 : 5;
2308 const int i31 = symmetric ? i13 : 6;
2309 const int i32 = symmetric ? i23 : 7;
2310 const int i33 = symmetric ? 5 : 8;
2312 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2314 // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2315 // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2316 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2317 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2318 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2320 constexpr int VDIM = 3;
2322 MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2323 MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2324 MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2325 MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2328 MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2330 MFEM_FOREACH_THREAD(qx,x,Q1D)
2332 MFEM_FOREACH_THREAD(qy,y,Q1D)
2334 MFEM_FOREACH_THREAD(qz,z,Q1D)
2336 for (int i=0; i<s; ++i)
2338 ope[i] = op(qx,qy,qz,i,e);
2344 const int tidx = MFEM_THREAD_ID(x);
2345 const int tidy = MFEM_THREAD_ID(y);
2346 const int tidz = MFEM_THREAD_ID(z);
2350 MFEM_FOREACH_THREAD(d,y,D1D)
2352 MFEM_FOREACH_THREAD(q,x,Q1D)
2354 sBc[q][d] = Bc(q,d);
2355 sGc[q][d] = Gc(q,d);
2358 sBo[q][d] = Bo(q,d);
2359 sGo[q][d] = Go(q,d);
2367 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2369 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2370 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2371 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2375 for (int qz=0; qz < Q1D; ++qz)
2379 for (int i=0; i<s; ++i)
2381 sop[i][tidx][tidy] = ope[i];
2387 MFEM_FOREACH_THREAD(dz,z,D1Dz)
2389 const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2390 const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2392 MFEM_FOREACH_THREAD(dy,y,D1Dy)
2394 MFEM_FOREACH_THREAD(dx,x,D1Dx)
2396 for (int qy = 0; qy < Q1D; ++qy)
2398 const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2399 const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2401 for (int qx = 0; qx < Q1D; ++qx)
2403 const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2404 const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2408 // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2410 // (u_0)_{x_2} O22 (u_0)_{x_2}
2411 dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2413 // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2414 dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2416 // (u_0)_{x_1} O33 (u_0)_{x_1}
2417 dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2421 // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2423 // (u_1)_{x_2} O11 (u_1)_{x_2}
2424 dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2426 // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2427 dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2429 // (u_1)_{x_0} O33 (u_1)_{x_0})
2430 dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2434 // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2436 // (u_2)_{x_1} O11 (u_2)_{x_1}
2437 dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2439 // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2440 dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2442 // (u_2)_{x_0} O22 (u_2)_{x_0}
2443 dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2454 MFEM_FOREACH_THREAD(dz,z,D1Dz)
2456 MFEM_FOREACH_THREAD(dy,y,D1Dy)
2458 MFEM_FOREACH_THREAD(dx,x,D1Dx)
2460 D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2465 osc += D1Dx * D1Dy * D1Dz;
2467 }); // end of element loop
2470 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2474 if (Device::Allows(Backend::DEVICE_MASK))
2476 const int ID = (dofs1D << 4) | quad1D;
2479 case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2484 case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2489 case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2494 case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2499 default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2506 PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2513 PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2514 mapsO->B, mapsC->G, pa_data, diag);
2518 MFEM_ABORT("Unsupported dimension!
");
2522 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2523 // integrated against H(curl) test functions corresponding to y.
2524 void PAHcurlH1Apply3D(const int D1D,
2527 const Array<double> &bc,
2528 const Array<double> &gc,
2529 const Array<double> &bot,
2530 const Array<double> &bct,
2531 const Vector &pa_data,
2535 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2536 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2538 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2539 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2541 constexpr static int VDIM = 3;
2543 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2544 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2545 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2546 auto Bct = Reshape(bct.Read(), D1D, Q1D);
2547 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2548 auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2549 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2553 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2555 for (int qz = 0; qz < Q1D; ++qz)
2557 for (int qy = 0; qy < Q1D; ++qy)
2559 for (int qx = 0; qx < Q1D; ++qx)
2561 for (int c = 0; c < VDIM; ++c)
2563 mass[qz][qy][qx][c] = 0.0;
2569 for (int dz = 0; dz < D1D; ++dz)
2571 double gradXY[MAX_Q1D][MAX_Q1D][3];
2572 for (int qy = 0; qy < Q1D; ++qy)
2574 for (int qx = 0; qx < Q1D; ++qx)
2576 gradXY[qy][qx][0] = 0.0;
2577 gradXY[qy][qx][1] = 0.0;
2578 gradXY[qy][qx][2] = 0.0;
2581 for (int dy = 0; dy < D1D; ++dy)
2583 double gradX[MAX_Q1D][2];
2584 for (int qx = 0; qx < Q1D; ++qx)
2589 for (int dx = 0; dx < D1D; ++dx)
2591 const double s = X(dx,dy,dz,e);
2592 for (int qx = 0; qx < Q1D; ++qx)
2594 gradX[qx][0] += s * Bc(qx,dx);
2595 gradX[qx][1] += s * Gc(qx,dx);
2598 for (int qy = 0; qy < Q1D; ++qy)
2600 const double wy = Bc(qy,dy);
2601 const double wDy = Gc(qy,dy);
2602 for (int qx = 0; qx < Q1D; ++qx)
2604 const double wx = gradX[qx][0];
2605 const double wDx = gradX[qx][1];
2606 gradXY[qy][qx][0] += wDx * wy;
2607 gradXY[qy][qx][1] += wx * wDy;
2608 gradXY[qy][qx][2] += wx * wy;
2612 for (int qz = 0; qz < Q1D; ++qz)
2614 const double wz = Bc(qz,dz);
2615 const double wDz = Gc(qz,dz);
2616 for (int qy = 0; qy < Q1D; ++qy)
2618 for (int qx = 0; qx < Q1D; ++qx)
2620 mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2621 mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2622 mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2628 // Apply D operator.
2629 for (int qz = 0; qz < Q1D; ++qz)
2631 for (int qy = 0; qy < Q1D; ++qy)
2633 for (int qx = 0; qx < Q1D; ++qx)
2635 const double O11 = op(qx,qy,qz,0,e);
2636 const double O12 = op(qx,qy,qz,1,e);
2637 const double O13 = op(qx,qy,qz,2,e);
2638 const double O22 = op(qx,qy,qz,3,e);
2639 const double O23 = op(qx,qy,qz,4,e);
2640 const double O33 = op(qx,qy,qz,5,e);
2641 const double massX = mass[qz][qy][qx][0];
2642 const double massY = mass[qz][qy][qx][1];
2643 const double massZ = mass[qz][qy][qx][2];
2644 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2645 mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2646 mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2651 for (int qz = 0; qz < Q1D; ++qz)
2653 double massXY[MAX_D1D][MAX_D1D];
2657 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2659 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2660 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2661 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2663 for (int dy = 0; dy < D1Dy; ++dy)
2665 for (int dx = 0; dx < D1Dx; ++dx)
2667 massXY[dy][dx] = 0.0;
2670 for (int qy = 0; qy < Q1D; ++qy)
2672 double massX[MAX_D1D];
2673 for (int dx = 0; dx < D1Dx; ++dx)
2677 for (int qx = 0; qx < Q1D; ++qx)
2679 for (int dx = 0; dx < D1Dx; ++dx)
2681 massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2684 for (int dy = 0; dy < D1Dy; ++dy)
2686 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2687 for (int dx = 0; dx < D1Dx; ++dx)
2689 massXY[dy][dx] += massX[dx] * wy;
2694 for (int dz = 0; dz < D1Dz; ++dz)
2696 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2697 for (int dy = 0; dy < D1Dy; ++dy)
2699 for (int dx = 0; dx < D1Dx; ++dx)
2701 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2706 osc += D1Dx * D1Dy * D1Dz;
2709 }); // end of element loop
2712 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2713 // integrated against H(curl) test functions corresponding to y.
2714 void PAHcurlH1Apply2D(const int D1D,
2717 const Array<double> &bc,
2718 const Array<double> &gc,
2719 const Array<double> &bot,
2720 const Array<double> &bct,
2721 const Vector &pa_data,
2725 constexpr static int VDIM = 2;
2726 constexpr static int MAX_D1D = HCURL_MAX_D1D;
2727 constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2729 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2730 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2731 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2732 auto Bct = Reshape(bct.Read(), D1D, Q1D);
2733 auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
2734 auto X = Reshape(x.Read(), D1D, D1D, NE);
2735 auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2739 double mass[MAX_Q1D][MAX_Q1D][VDIM];
2741 for (int qy = 0; qy < Q1D; ++qy)
2743 for (int qx = 0; qx < Q1D; ++qx)
2745 for (int c = 0; c < VDIM; ++c)
2747 mass[qy][qx][c] = 0.0;
2752 for (int dy = 0; dy < D1D; ++dy)
2754 double gradX[MAX_Q1D][2];
2755 for (int qx = 0; qx < Q1D; ++qx)
2760 for (int dx = 0; dx < D1D; ++dx)
2762 const double s = X(dx,dy,e);
2763 for (int qx = 0; qx < Q1D; ++qx)
2765 gradX[qx][0] += s * Bc(qx,dx);
2766 gradX[qx][1] += s * Gc(qx,dx);
2769 for (int qy = 0; qy < Q1D; ++qy)
2771 const double wy = Bc(qy,dy);
2772 const double wDy = Gc(qy,dy);
2773 for (int qx = 0; qx < Q1D; ++qx)
2775 const double wx = gradX[qx][0];
2776 const double wDx = gradX[qx][1];
2777 mass[qy][qx][0] += wDx * wy;
2778 mass[qy][qx][1] += wx * wDy;
2783 // Apply D operator.
2784 for (int qy = 0; qy < Q1D; ++qy)
2786 for (int qx = 0; qx < Q1D; ++qx)
2788 const double O11 = op(qx,qy,0,e);
2789 const double O12 = op(qx,qy,1,e);
2790 const double O22 = op(qx,qy,2,e);
2791 const double massX = mass[qy][qx][0];
2792 const double massY = mass[qy][qx][1];
2793 mass[qy][qx][0] = (O11*massX)+(O12*massY);
2794 mass[qy][qx][1] = (O12*massX)+(O22*massY);
2798 for (int qy = 0; qy < Q1D; ++qy)
2802 for (int c = 0; c < VDIM; ++c) // loop over x, y components
2804 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2805 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2807 double massX[MAX_D1D];
2808 for (int dx = 0; dx < D1Dx; ++dx)
2812 for (int qx = 0; qx < Q1D; ++qx)
2814 for (int dx = 0; dx < D1Dx; ++dx)
2816 massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2820 for (int dy = 0; dy < D1Dy; ++dy)
2822 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2824 for (int dx = 0; dx < D1Dx; ++dx)
2826 Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
2833 }); // end of element loop
2836 // PA H(curl) assemble kernel
2837 void PAHcurlL2Setup(const int NQ,
2840 const Array<double> &w,
2845 auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
2846 auto y = Reshape(op.Write(), coeffDim, NQ, NE);
2850 for (int q = 0; q < NQ; ++q)
2852 for (int c=0; c<coeffDim; ++c)
2854 y(c,q,e) = W[q] * C(c,q,e);
2860 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
2861 const FiniteElementSpace &test_fes)
2863 // Assumes tensor-product elements, with vector test and trial spaces.
2864 Mesh *mesh = trial_fes.GetMesh();
2865 const FiniteElement *trial_fel = trial_fes.GetFE(0);
2866 const FiniteElement *test_fel = test_fes.GetFE(0);
2868 const VectorTensorFiniteElement *trial_el =
2869 dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
2872 const VectorTensorFiniteElement *test_el =
2873 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
2876 const IntegrationRule *ir
2877 = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
2878 *mesh->GetElementTransformation(0));
2879 const int dims = trial_el->GetDim();
2880 MFEM_VERIFY(dims == 3, "");
2882 const int nq = ir->GetNPoints();
2883 dim = mesh->Dimension();
2884 MFEM_VERIFY(dim == 3, "");
2886 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
2888 ne = trial_fes.GetNE();
2889 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
2890 mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2891 mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2892 mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2893 mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2894 dofs1D = mapsC->ndof;
2895 quad1D = mapsC->nqpt;
2896 dofs1Dtest = mapsCtest->ndof;
2898 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
2900 testType = test_el->GetDerivType();
2901 trialType = trial_el->GetDerivType();
2903 const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
2904 coeffDim = (DQ ? 3 : 1);
2906 pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType());
2908 Vector coeff(coeffDim * nq * ne);
2910 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
2916 MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
2919 for (int e=0; e<ne; ++e)
2921 ElementTransformation *tr = mesh->GetElementTransformation(e);
2923 for (int p=0; p<nq; ++p)
2927 DQ->Eval(V, *tr, ir->IntPoint(p));
2928 for (int i=0; i<coeffDim; ++i)
2930 coeffh(i, p, e) = V[i];
2935 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
2941 if (testType == mfem::FiniteElement::CURL &&
2942 trialType == mfem::FiniteElement::CURL && dim == 3)
2944 PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
2946 else if (testType == mfem::FiniteElement::DIV &&
2947 trialType == mfem::FiniteElement::CURL && dim == 3 &&
2948 test_fel->GetOrder() == trial_fel->GetOrder())
2950 PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
2955 MFEM_ABORT("Unknown kernel.
");
2959 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
2960 // integrated against H(curl) test functions corresponding to y.
2961 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2962 static void PAHcurlL2Apply3D(const int D1D,
2966 const Array<double> &bo,
2967 const Array<double> &bc,
2968 const Array<double> &bot,
2969 const Array<double> &bct,
2970 const Array<double> &gc,
2971 const Vector &pa_data,
2975 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
2976 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
2977 // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2978 // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
2979 // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
2980 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2981 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2982 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2984 constexpr static int VDIM = 3;
2986 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2987 auto Bc = Reshape(bc.Read(), Q1D, D1D);
2988 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2989 auto Bct = Reshape(bct.Read(), D1D, Q1D);
2990 auto Gc = Reshape(gc.Read(), Q1D, D1D);
2991 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
2992 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2993 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2997 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2998 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3000 for (int qz = 0; qz < Q1D; ++qz)
3002 for (int qy = 0; qy < Q1D; ++qy)
3004 for (int qx = 0; qx < Q1D; ++qx)
3006 for (int c = 0; c < VDIM; ++c)
3008 curl[qz][qy][qx][c] = 0.0;
3014 // We treat x, y, z components separately for optimization specific to each.
3020 const int D1Dz = D1D;
3021 const int D1Dy = D1D;
3022 const int D1Dx = D1D - 1;
3024 for (int dz = 0; dz < D1Dz; ++dz)
3026 double gradXY[MAX_Q1D][MAX_Q1D][2];
3027 for (int qy = 0; qy < Q1D; ++qy)
3029 for (int qx = 0; qx < Q1D; ++qx)
3031 for (int d = 0; d < 2; ++d)
3033 gradXY[qy][qx][d] = 0.0;
3038 for (int dy = 0; dy < D1Dy; ++dy)
3040 double massX[MAX_Q1D];
3041 for (int qx = 0; qx < Q1D; ++qx)
3046 for (int dx = 0; dx < D1Dx; ++dx)
3048 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3049 for (int qx = 0; qx < Q1D; ++qx)
3051 massX[qx] += t * Bo(qx,dx);
3055 for (int qy = 0; qy < Q1D; ++qy)
3057 const double wy = Bc(qy,dy);
3058 const double wDy = Gc(qy,dy);
3059 for (int qx = 0; qx < Q1D; ++qx)
3061 const double wx = massX[qx];
3062 gradXY[qy][qx][0] += wx * wDy;
3063 gradXY[qy][qx][1] += wx * wy;
3068 for (int qz = 0; qz < Q1D; ++qz)
3070 const double wz = Bc(qz,dz);
3071 const double wDz = Gc(qz,dz);
3072 for (int qy = 0; qy < Q1D; ++qy)
3074 for (int qx = 0; qx < Q1D; ++qx)
3076 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3077 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3078 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3084 osc += D1Dx * D1Dy * D1Dz;
3089 const int D1Dz = D1D;
3090 const int D1Dy = D1D - 1;
3091 const int D1Dx = D1D;
3093 for (int dz = 0; dz < D1Dz; ++dz)
3095 double gradXY[MAX_Q1D][MAX_Q1D][2];
3096 for (int qy = 0; qy < Q1D; ++qy)
3098 for (int qx = 0; qx < Q1D; ++qx)
3100 for (int d = 0; d < 2; ++d)
3102 gradXY[qy][qx][d] = 0.0;
3107 for (int dx = 0; dx < D1Dx; ++dx)
3109 double massY[MAX_Q1D];
3110 for (int qy = 0; qy < Q1D; ++qy)
3115 for (int dy = 0; dy < D1Dy; ++dy)
3117 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3118 for (int qy = 0; qy < Q1D; ++qy)
3120 massY[qy] += t * Bo(qy,dy);
3124 for (int qx = 0; qx < Q1D; ++qx)
3126 const double wx = Bc(qx,dx);
3127 const double wDx = Gc(qx,dx);
3128 for (int qy = 0; qy < Q1D; ++qy)
3130 const double wy = massY[qy];
3131 gradXY[qy][qx][0] += wDx * wy;
3132 gradXY[qy][qx][1] += wx * wy;
3137 for (int qz = 0; qz < Q1D; ++qz)
3139 const double wz = Bc(qz,dz);
3140 const double wDz = Gc(qz,dz);
3141 for (int qy = 0; qy < Q1D; ++qy)
3143 for (int qx = 0; qx < Q1D; ++qx)
3145 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3146 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3147 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3153 osc += D1Dx * D1Dy * D1Dz;
3158 const int D1Dz = D1D - 1;
3159 const int D1Dy = D1D;
3160 const int D1Dx = D1D;
3162 for (int dx = 0; dx < D1Dx; ++dx)
3164 double gradYZ[MAX_Q1D][MAX_Q1D][2];
3165 for (int qz = 0; qz < Q1D; ++qz)
3167 for (int qy = 0; qy < Q1D; ++qy)
3169 for (int d = 0; d < 2; ++d)
3171 gradYZ[qz][qy][d] = 0.0;
3176 for (int dy = 0; dy < D1Dy; ++dy)
3178 double massZ[MAX_Q1D];
3179 for (int qz = 0; qz < Q1D; ++qz)
3184 for (int dz = 0; dz < D1Dz; ++dz)
3186 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3187 for (int qz = 0; qz < Q1D; ++qz)
3189 massZ[qz] += t * Bo(qz,dz);
3193 for (int qy = 0; qy < Q1D; ++qy)
3195 const double wy = Bc(qy,dy);
3196 const double wDy = Gc(qy,dy);
3197 for (int qz = 0; qz < Q1D; ++qz)
3199 const double wz = massZ[qz];
3200 gradYZ[qz][qy][0] += wz * wy;
3201 gradYZ[qz][qy][1] += wz * wDy;
3206 for (int qx = 0; qx < Q1D; ++qx)
3208 const double wx = Bc(qx,dx);
3209 const double wDx = Gc(qx,dx);
3211 for (int qy = 0; qy < Q1D; ++qy)
3213 for (int qz = 0; qz < Q1D; ++qz)
3215 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3216 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3217 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3224 // Apply D operator.
3225 for (int qz = 0; qz < Q1D; ++qz)
3227 for (int qy = 0; qy < Q1D; ++qy)
3229 for (int qx = 0; qx < Q1D; ++qx)
3231 for (int c = 0; c < VDIM; ++c)
3233 curl[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
3239 for (int qz = 0; qz < Q1D; ++qz)
3241 double massXY[MAX_D1D][MAX_D1D];
3245 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3247 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3248 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3249 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3251 for (int dy = 0; dy < D1Dy; ++dy)
3253 for (int dx = 0; dx < D1Dx; ++dx)
3255 massXY[dy][dx] = 0.0;
3258 for (int qy = 0; qy < Q1D; ++qy)
3260 double massX[MAX_D1D];
3261 for (int dx = 0; dx < D1Dx; ++dx)
3265 for (int qx = 0; qx < Q1D; ++qx)
3267 for (int dx = 0; dx < D1Dx; ++dx)
3269 massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3272 for (int dy = 0; dy < D1Dy; ++dy)
3274 const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3275 for (int dx = 0; dx < D1Dx; ++dx)
3277 massXY[dy][dx] += massX[dx] * wy;
3282 for (int dz = 0; dz < D1Dz; ++dz)
3284 const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
3285 for (int dy = 0; dy < D1Dy; ++dy)
3287 for (int dx = 0; dx < D1Dx; ++dx)
3289 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
3294 osc += D1Dx * D1Dy * D1Dz;
3297 }); // end of element loop
3300 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3301 // integrated against H(curl) test functions corresponding to y.
3302 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3303 static void SmemPAHcurlL2Apply3D(const int D1D,
3307 const Array<double> &bo,
3308 const Array<double> &bc,
3309 const Array<double> &gc,
3310 const Vector &pa_data,
3314 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
3315 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
3317 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3318 auto Bc = Reshape(bc.Read(), Q1D, D1D);
3319 auto Gc = Reshape(gc.Read(), Q1D, D1D);
3320 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3321 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3322 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3324 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
3326 constexpr int VDIM = 3;
3327 constexpr int maxCoeffDim = 3;
3329 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
3330 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
3331 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
3333 double opc[maxCoeffDim];
3334 MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
3335 MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
3337 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
3339 MFEM_FOREACH_THREAD(qx,x,Q1D)
3341 MFEM_FOREACH_THREAD(qy,y,Q1D)
3343 MFEM_FOREACH_THREAD(qz,z,Q1D)
3345 for (int i=0; i<coeffDim; ++i)
3347 opc[i] = op(i,qx,qy,qz,e);
3353 const int tidx = MFEM_THREAD_ID(x);
3354 const int tidy = MFEM_THREAD_ID(y);
3355 const int tidz = MFEM_THREAD_ID(z);
3359 MFEM_FOREACH_THREAD(d,y,D1D)
3361 MFEM_FOREACH_THREAD(q,x,Q1D)
3363 sBc[d][q] = Bc(q,d);
3364 sGc[d][q] = Gc(q,d);
3367 sBo[d][q] = Bo(q,d);
3374 for (int qz=0; qz < Q1D; ++qz)
3378 MFEM_FOREACH_THREAD(qy,y,Q1D)
3380 MFEM_FOREACH_THREAD(qx,x,Q1D)
3382 for (int i=0; i<3; ++i)
3384 curl[qy][qx][i] = 0.0;
3391 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3393 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3394 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3395 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3397 MFEM_FOREACH_THREAD(dz,z,D1Dz)
3399 MFEM_FOREACH_THREAD(dy,y,D1Dy)
3401 MFEM_FOREACH_THREAD(dx,x,D1Dx)
3403 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3413 for (int i=0; i<coeffDim; ++i)
3415 sop[i][tidx][tidy] = opc[i];
3419 MFEM_FOREACH_THREAD(qy,y,Q1D)
3421 MFEM_FOREACH_THREAD(qx,x,Q1D)
3426 // We treat x, y, z components separately for optimization specific to each.
3427 if (c == 0) // x component
3429 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3431 for (int dz = 0; dz < D1Dz; ++dz)
3433 const double wz = sBc[dz][qz];
3434 const double wDz = sGc[dz][qz];
3436 for (int dy = 0; dy < D1Dy; ++dy)
3438 const double wy = sBc[dy][qy];
3439 const double wDy = sGc[dy][qy];
3441 for (int dx = 0; dx < D1Dx; ++dx)
3443 const double wx = sX[dz][dy][dx] * sBo[dx][qx];
3450 curl[qy][qx][1] += v; // (u_0)_{x_2}
3451 curl[qy][qx][2] -= u; // -(u_0)_{x_1}
3453 else if (c == 1) // y component
3455 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3457 for (int dz = 0; dz < D1Dz; ++dz)
3459 const double wz = sBc[dz][qz];
3460 const double wDz = sGc[dz][qz];
3462 for (int dy = 0; dy < D1Dy; ++dy)
3464 const double wy = sBo[dy][qy];
3466 for (int dx = 0; dx < D1Dx; ++dx)
3468 const double t = sX[dz][dy][dx];
3469 const double wx = t * sBc[dx][qx];
3470 const double wDx = t * sGc[dx][qx];
3478 curl[qy][qx][0] -= v; // -(u_1)_{x_2}
3479 curl[qy][qx][2] += u; // (u_1)_{x_0}
3483 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3485 for (int dz = 0; dz < D1Dz; ++dz)
3487 const double wz = sBo[dz][qz];
3489 for (int dy = 0; dy < D1Dy; ++dy)
3491 const double wy = sBc[dy][qy];
3492 const double wDy = sGc[dy][qy];
3494 for (int dx = 0; dx < D1Dx; ++dx)
3496 const double t = sX[dz][dy][dx];
3497 const double wx = t * sBc[dx][qx];
3498 const double wDx = t * sGc[dx][qx];
3506 curl[qy][qx][0] += v; // (u_2)_{x_1}
3507 curl[qy][qx][1] -= u; // -(u_2)_{x_0}
3513 osc += D1Dx * D1Dy * D1Dz;
3521 MFEM_FOREACH_THREAD(dz,z,D1D)
3523 const double wcz = sBc[dz][qz];
3524 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
3526 MFEM_FOREACH_THREAD(dy,y,D1D)
3528 MFEM_FOREACH_THREAD(dx,x,D1D)
3530 for (int qy = 0; qy < Q1D; ++qy)
3532 const double wcy = sBc[dy][qy];
3533 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
3535 for (int qx = 0; qx < Q1D; ++qx)
3537 const double O1 = sop[0][qx][qy];
3538 const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
3539 const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
3541 const double c1 = O1 * curl[qy][qx][0];
3542 const double c2 = O2 * curl[qy][qx][1];
3543 const double c3 = O3 * curl[qy][qx][2];
3545 const double wcx = sBc[dx][qx];
3549 const double wx = sBo[dx][qx];
3550 dxyz1 += c1 * wx * wcy * wcz;
3553 dxyz2 += c2 * wcx * wy * wcz;
3554 dxyz3 += c3 * wcx * wcy * wz;
3563 MFEM_FOREACH_THREAD(dz,z,D1D)
3565 MFEM_FOREACH_THREAD(dy,y,D1D)
3567 MFEM_FOREACH_THREAD(dx,x,D1D)
3571 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
3575 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
3579 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
3585 }); // end of element loop
3588 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3589 // integrated against H(div) test functions corresponding to y.
3590 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3591 static void PAHcurlHdivApply3D(const int D1D,
3595 const Array<double> &bo,
3596 const Array<double> &bc,
3597 const Array<double> &bot,
3598 const Array<double> &bct,
3599 const Array<double> &gc,
3600 const Vector &pa_data,
3604 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
3605 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
3606 // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
3607 // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
3608 // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
3609 // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3610 // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3611 // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3613 constexpr static int VDIM = 3;
3615 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3616 auto Bc = Reshape(bc.Read(), Q1D, D1D);
3617 auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
3618 auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
3619 auto Gc = Reshape(gc.Read(), Q1D, D1D);
3620 auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
3621 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3622 auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
3626 double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3627 // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3629 for (int qz = 0; qz < Q1D; ++qz)
3631 for (int qy = 0; qy < Q1D; ++qy)
3633 for (int qx = 0; qx < Q1D; ++qx)
3635 for (int c = 0; c < VDIM; ++c)
3637 curl[qz][qy][qx][c] = 0.0;
3643 // We treat x, y, z components separately for optimization specific to each.
3649 const int D1Dz = D1D;
3650 const int D1Dy = D1D;
3651 const int D1Dx = D1D - 1;
3653 for (int dz = 0; dz < D1Dz; ++dz)
3655 double gradXY[MAX_Q1D][MAX_Q1D][2];
3656 for (int qy = 0; qy < Q1D; ++qy)
3658 for (int qx = 0; qx < Q1D; ++qx)
3660 for (int d = 0; d < 2; ++d)
3662 gradXY[qy][qx][d] = 0.0;
3667 for (int dy = 0; dy < D1Dy; ++dy)
3669 double massX[MAX_Q1D];
3670 for (int qx = 0; qx < Q1D; ++qx)
3675 for (int dx = 0; dx < D1Dx; ++dx)
3677 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3678 for (int qx = 0; qx < Q1D; ++qx)
3680 massX[qx] += t * Bo(qx,dx);
3684 for (int qy = 0; qy < Q1D; ++qy)
3686 const double wy = Bc(qy,dy);
3687 const double wDy = Gc(qy,dy);
3688 for (int qx = 0; qx < Q1D; ++qx)
3690 const double wx = massX[qx];
3691 gradXY[qy][qx][0] += wx * wDy;
3692 gradXY[qy][qx][1] += wx * wy;
3697 for (int qz = 0; qz < Q1D; ++qz)
3699 const double wz = Bc(qz,dz);
3700 const double wDz = Gc(qz,dz);
3701 for (int qy = 0; qy < Q1D; ++qy)
3703 for (int qx = 0; qx < Q1D; ++qx)
3705 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3706 curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3707 curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3713 osc += D1Dx * D1Dy * D1Dz;
3718 const int D1Dz = D1D;
3719 const int D1Dy = D1D - 1;
3720 const int D1Dx = D1D;
3722 for (int dz = 0; dz < D1Dz; ++dz)
3724 double gradXY[MAX_Q1D][MAX_Q1D][2];
3725 for (int qy = 0; qy < Q1D; ++qy)
3727 for (int qx = 0; qx < Q1D; ++qx)
3729 for (int d = 0; d < 2; ++d)
3731 gradXY[qy][qx][d] = 0.0;
3736 for (int dx = 0; dx < D1Dx; ++dx)
3738 double massY[MAX_Q1D];
3739 for (int qy = 0; qy < Q1D; ++qy)
3744 for (int dy = 0; dy < D1Dy; ++dy)
3746 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3747 for (int qy = 0; qy < Q1D; ++qy)
3749 massY[qy] += t * Bo(qy,dy);
3753 for (int qx = 0; qx < Q1D; ++qx)
3755 const double wx = Bc(qx,dx);
3756 const double wDx = Gc(qx,dx);
3757 for (int qy = 0; qy < Q1D; ++qy)
3759 const double wy = massY[qy];
3760 gradXY[qy][qx][0] += wDx * wy;
3761 gradXY[qy][qx][1] += wx * wy;
3766 for (int qz = 0; qz < Q1D; ++qz)
3768 const double wz = Bc(qz,dz);
3769 const double wDz = Gc(qz,dz);
3770 for (int qy = 0; qy < Q1D; ++qy)
3772 for (int qx = 0; qx < Q1D; ++qx)
3774 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3775 curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3776 curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3782 osc += D1Dx * D1Dy * D1Dz;
3787 const int D1Dz = D1D - 1;
3788 const int D1Dy = D1D;
3789 const int D1Dx = D1D;
3791 for (int dx = 0; dx < D1Dx; ++dx)
3793 double gradYZ[MAX_Q1D][MAX_Q1D][2];
3794 for (int qz = 0; qz < Q1D; ++qz)
3796 for (int qy = 0; qy < Q1D; ++qy)
3798 for (int d = 0; d < 2; ++d)
3800 gradYZ[qz][qy][d] = 0.0;
3805 for (int dy = 0; dy < D1Dy; ++dy)
3807 double massZ[MAX_Q1D];
3808 for (int qz = 0; qz < Q1D; ++qz)
3813 for (int dz = 0; dz < D1Dz; ++dz)
3815 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3816 for (int qz = 0; qz < Q1D; ++qz)
3818 massZ[qz] += t * Bo(qz,dz);
3822 for (int qy = 0; qy < Q1D; ++qy)
3824 const double wy = Bc(qy,dy);
3825 const double wDy = Gc(qy,dy);
3826 for (int qz = 0; qz < Q1D; ++qz)
3828 const double wz = massZ[qz];
3829 gradYZ[qz][qy][0] += wz * wy;
3830 gradYZ[qz][qy][1] += wz * wDy;
3835 for (int qx = 0; qx < Q1D; ++qx)
3837 const double wx = Bc(qx,dx);
3838 const double wDx = Gc(qx,dx);
3840 for (int qy = 0; qy < Q1D; ++qy)
3842 for (int qz = 0; qz < Q1D; ++qz)
3844 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3845 curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3846 curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3853 // Apply D operator.
3854 for (int qz = 0; qz < Q1D; ++qz)
3856 for (int qy = 0; qy < Q1D; ++qy)
3858 for (int qx = 0; qx < Q1D; ++qx)
3860 const double O11 = op(qx,qy,qz,0,e);
3861 const double O12 = op(qx,qy,qz,1,e);
3862 const double O13 = op(qx,qy,qz,2,e);
3863 const double O22 = op(qx,qy,qz,3,e);
3864 const double O23 = op(qx,qy,qz,4,e);
3865 const double O33 = op(qx,qy,qz,5,e);
3867 const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
3868 (O13 * curl[qz][qy][qx][2]);
3869 const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
3870 (O23 * curl[qz][qy][qx][2]);
3871 const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
3872 (O33 * curl[qz][qy][qx][2]);
3874 curl[qz][qy][qx][0] = c1;
3875 curl[qz][qy][qx][1] = c2;
3876 curl[qz][qy][qx][2] = c3;
3881 for (int qz = 0; qz < Q1D; ++qz)
3883 double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
3887 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3889 const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
3890 const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
3891 const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
3893 for (int dy = 0; dy < D1Dy; ++dy)
3895 for (int dx = 0; dx < D1Dx; ++dx)
3897 massXY[dy][dx] = 0.0;
3900 for (int qy = 0; qy < Q1D; ++qy)
3902 double massX[HCURL_MAX_D1D];
3903 for (int dx = 0; dx < D1Dx; ++dx)
3907 for (int qx = 0; qx < Q1D; ++qx)
3909 for (int dx = 0; dx < D1Dx; ++dx)
3911 massX[dx] += curl[qz][qy][qx][c] *
3912 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
3915 for (int dy = 0; dy < D1Dy; ++dy)
3917 const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
3918 for (int dx = 0; dx < D1Dx; ++dx)
3920 massXY[dy][dx] += massX[dx] * wy;
3925 for (int dz = 0; dz < D1Dz; ++dz)
3927 const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
3928 for (int dy = 0; dy < D1Dy; ++dy)
3930 for (int dx = 0; dx < D1Dx; ++dx)
3932 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
3933 massXY[dy][dx] * wz;
3938 osc += D1Dx * D1Dy * D1Dz;
3941 }); // end of element loop
3944 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3946 if (testType == mfem::FiniteElement::CURL &&
3947 trialType == mfem::FiniteElement::CURL && dim == 3)
3949 if (Device::Allows(Backend::DEVICE_MASK))
3951 const int ID = (dofs1D << 4) | quad1D;
3954 case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, coeffDim, ne,
3956 mapsC->G, pa_data, x, y);
3957 case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, coeffDim, ne,
3959 mapsC->G, pa_data, x, y);
3960 case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, coeffDim, ne,
3962 mapsC->G, pa_data, x, y);
3963 case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, coeffDim, ne,
3965 mapsC->G, pa_data, x, y);
3966 default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B,
3968 mapsC->G, pa_data, x, y);
3972 PAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
3973 mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
3975 else if (testType == mfem::FiniteElement::DIV &&
3976 trialType == mfem::FiniteElement::CURL && dim == 3)
3977 PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
3978 mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
3982 MFEM_ABORT("Unsupported dimension or
space!
");
3986 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
3988 const FiniteElementSpace &test_fes)
3990 // Assumes tensor-product elements, with vector test and trial spaces.
3991 Mesh *mesh = trial_fes.GetMesh();
3992 const FiniteElement *trial_fel = trial_fes.GetFE(0);
3993 const FiniteElement *test_fel = test_fes.GetFE(0);
3995 const VectorTensorFiniteElement *trial_el =
3996 dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3999 const VectorTensorFiniteElement *test_el =
4000 dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
4003 const IntegrationRule *ir
4004 = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
4005 *mesh->GetElementTransformation(0));
4006 const int dims = trial_el->GetDim();
4007 MFEM_VERIFY(dims == 3, "");
4009 const int nq = ir->GetNPoints();
4010 dim = mesh->Dimension();
4011 MFEM_VERIFY(dim == 3, "");
4013 MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
4015 ne = trial_fes.GetNE();
4016 geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
4017 mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
4018 mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
4019 dofs1D = mapsC->ndof;
4020 quad1D = mapsC->nqpt;
4022 MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
4024 coeffDim = DQ ? 3 : 1;
4026 pa_data.SetSize(coeffDim * nq * ne, Device::GetMemoryType());
4028 Vector coeff(coeffDim * nq * ne);
4030 auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
4036 MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
4039 for (int e=0; e<ne; ++e)
4041 ElementTransformation *tr = mesh->GetElementTransformation(e);
4043 for (int p=0; p<nq; ++p)
4047 DQ->Eval(V, *tr, ir->IntPoint(p));
4048 for (int i=0; i<coeffDim; ++i)
4050 coeffh(i, p, e) = V[i];
4055 coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
4061 testType = test_el->GetDerivType();
4062 trialType = trial_el->GetDerivType();
4064 if (trialType == mfem::FiniteElement::CURL && dim == 3)
4066 PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
4070 MFEM_ABORT("Unknown kernel.
");
4074 // Apply to x corresponding to DOF's in H(curl) (trial), integrated against curl
4075 // of H(curl) test functions corresponding to y.
4076 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4077 static void PAHcurlL2Apply3DTranspose(const int D1D,
4081 const Array<double> &bo,
4082 const Array<double> &bc,
4083 const Array<double> &bot,
4084 const Array<double> &bct,
4085 const Array<double> &gct,
4086 const Vector &pa_data,
4090 // See PAHcurlL2Apply3D for comments.
4092 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
4093 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
4095 constexpr static int VDIM = 3;
4097 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4098 auto Bc = Reshape(bc.Read(), Q1D, D1D);
4099 auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
4100 auto Bct = Reshape(bct.Read(), D1D, Q1D);
4101 auto Gct = Reshape(gct.Read(), D1D, Q1D);
4102 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4103 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4104 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4108 double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4110 for (int qz = 0; qz < Q1D; ++qz)
4112 for (int qy = 0; qy < Q1D; ++qy)
4114 for (int qx = 0; qx < Q1D; ++qx)
4116 for (int c = 0; c < VDIM; ++c)
4118 mass[qz][qy][qx][c] = 0.0;
4126 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4128 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4129 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4130 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4132 for (int dz = 0; dz < D1Dz; ++dz)
4134 double massXY[MAX_Q1D][MAX_Q1D];
4135 for (int qy = 0; qy < Q1D; ++qy)
4137 for (int qx = 0; qx < Q1D; ++qx)
4139 massXY[qy][qx] = 0.0;
4143 for (int dy = 0; dy < D1Dy; ++dy)
4145 double massX[MAX_Q1D];
4146 for (int qx = 0; qx < Q1D; ++qx)
4151 for (int dx = 0; dx < D1Dx; ++dx)
4153 const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4154 for (int qx = 0; qx < Q1D; ++qx)
4156 massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
4160 for (int qy = 0; qy < Q1D; ++qy)
4162 const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
4163 for (int qx = 0; qx < Q1D; ++qx)
4165 const double wx = massX[qx];
4166 massXY[qy][qx] += wx * wy;
4171 for (int qz = 0; qz < Q1D; ++qz)
4173 const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
4174 for (int qy = 0; qy < Q1D; ++qy)
4176 for (int qx = 0; qx < Q1D; ++qx)
4178 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4184 osc += D1Dx * D1Dy * D1Dz;
4185 } // loop (c) over components
4187 // Apply D operator.
4188 for (int qz = 0; qz < Q1D; ++qz)
4190 for (int qy = 0; qy < Q1D; ++qy)
4192 for (int qx = 0; qx < Q1D; ++qx)
4194 for (int c=0; c<VDIM; ++c)
4196 mass[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
4205 const int D1Dz = D1D;
4206 const int D1Dy = D1D;
4207 const int D1Dx = D1D - 1;
4209 for (int qz = 0; qz < Q1D; ++qz)
4211 double gradXY12[MAX_D1D][MAX_D1D];
4212 double gradXY21[MAX_D1D][MAX_D1D];
4214 for (int dy = 0; dy < D1Dy; ++dy)
4216 for (int dx = 0; dx < D1Dx; ++dx)
4218 gradXY12[dy][dx] = 0.0;
4219 gradXY21[dy][dx] = 0.0;
4222 for (int qy = 0; qy < Q1D; ++qy)
4224 double massX[MAX_D1D][2];
4225 for (int dx = 0; dx < D1Dx; ++dx)
4227 for (int n = 0; n < 2; ++n)
4232 for (int qx = 0; qx < Q1D; ++qx)
4234 for (int dx = 0; dx < D1Dx; ++dx)
4236 const double wx = Bot(dx,qx);
4238 massX[dx][0] += wx * mass[qz][qy][qx][1];
4239 massX[dx][1] += wx * mass[qz][qy][qx][2];
4242 for (int dy = 0; dy < D1Dy; ++dy)
4244 const double wy = Bct(dy,qy);
4245 const double wDy = Gct(dy,qy);
4247 for (int dx = 0; dx < D1Dx; ++dx)
4249 gradXY21[dy][dx] += massX[dx][0] * wy;
4250 gradXY12[dy][dx] += massX[dx][1] * wDy;
4255 for (int dz = 0; dz < D1Dz; ++dz)
4257 const double wz = Bct(dz,qz);
4258 const double wDz = Gct(dz,qz);
4259 for (int dy = 0; dy < D1Dy; ++dy)
4261 for (int dx = 0; dx < D1Dx; ++dx)
4263 // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4264 // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
4265 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4266 e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
4272 osc += D1Dx * D1Dy * D1Dz;
4277 const int D1Dz = D1D;
4278 const int D1Dy = D1D - 1;
4279 const int D1Dx = D1D;
4281 for (int qz = 0; qz < Q1D; ++qz)
4283 double gradXY02[MAX_D1D][MAX_D1D];
4284 double gradXY20[MAX_D1D][MAX_D1D];
4286 for (int dy = 0; dy < D1Dy; ++dy)
4288 for (int dx = 0; dx < D1Dx; ++dx)
4290 gradXY02[dy][dx] = 0.0;
4291 gradXY20[dy][dx] = 0.0;
4294 for (int qx = 0; qx < Q1D; ++qx)
4296 double massY[MAX_D1D][2];
4297 for (int dy = 0; dy < D1Dy; ++dy)
4302 for (int qy = 0; qy < Q1D; ++qy)
4304 for (int dy = 0; dy < D1Dy; ++dy)
4306 const double wy = Bot(dy,qy);
4308 massY[dy][0] += wy * mass[qz][qy][qx][2];
4309 massY[dy][1] += wy * mass[qz][qy][qx][0];
4312 for (int dx = 0; dx < D1Dx; ++dx)
4314 const double wx = Bct(dx,qx);
4315 const double wDx = Gct(dx,qx);
4317 for (int dy = 0; dy < D1Dy; ++dy)
4319 gradXY02[dy][dx] += massY[dy][0] * wDx;
4320 gradXY20[dy][dx] += massY[dy][1] * wx;
4325 for (int dz = 0; dz < D1Dz; ++dz)
4327 const double wz = Bct(dz,qz);
4328 const double wDz = Gct(dz,qz);
4329 for (int dy = 0; dy < D1Dy; ++dy)
4331 for (int dx = 0; dx < D1Dx; ++dx)
4333 // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4334 // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
4335 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4336 e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
4342 osc += D1Dx * D1Dy * D1Dz;
4347 const int D1Dz = D1D - 1;
4348 const int D1Dy = D1D;
4349 const int D1Dx = D1D;
4351 for (int qx = 0; qx < Q1D; ++qx)
4353 double gradYZ01[MAX_D1D][MAX_D1D];
4354 double gradYZ10[MAX_D1D][MAX_D1D];
4356 for (int dy = 0; dy < D1Dy; ++dy)
4358 for (int dz = 0; dz < D1Dz; ++dz)
4360 gradYZ01[dz][dy] = 0.0;
4361 gradYZ10[dz][dy] = 0.0;
4364 for (int qy = 0; qy < Q1D; ++qy)
4366 double massZ[MAX_D1D][2];
4367 for (int dz = 0; dz < D1Dz; ++dz)
4369 for (int n = 0; n < 2; ++n)
4374 for (int qz = 0; qz < Q1D; ++qz)
4376 for (int dz = 0; dz < D1Dz; ++dz)
4378 const double wz = Bot(dz,qz);
4380 massZ[dz][0] += wz * mass[qz][qy][qx][0];
4381 massZ[dz][1] += wz * mass[qz][qy][qx][1];
4384 for (int dy = 0; dy < D1Dy; ++dy)
4386 const double wy = Bct(dy,qy);
4387 const double wDy = Gct(dy,qy);
4389 for (int dz = 0; dz < D1Dz; ++dz)
4391 gradYZ01[dz][dy] += wy * massZ[dz][1];
4392 gradYZ10[dz][dy] += wDy * massZ[dz][0];
4397 for (int dx = 0; dx < D1Dx; ++dx)
4399 const double wx = Bct(dx,qx);
4400 const double wDx = Gct(dx,qx);
4402 for (int dy = 0; dy < D1Dy; ++dy)
4404 for (int dz = 0; dz < D1Dz; ++dz)
4406 // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4407 // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
4408 Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4409 e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
4418 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4419 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
4423 const Array<double> &bo,
4424 const Array<double> &bc,
4425 const Array<double> &gc,
4426 const Vector &pa_data,
4430 MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D
");
4431 MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D
");
4433 auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4434 auto Bc = Reshape(bc.Read(), Q1D, D1D);
4435 auto Gc = Reshape(gc.Read(), Q1D, D1D);
4436 auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4437 auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4438 auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4440 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
4442 constexpr int VDIM = 3;
4443 constexpr int maxCoeffDim = 3;
4445 MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
4446 MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
4447 MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
4449 double opc[maxCoeffDim];
4450 MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
4451 MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
4453 MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
4455 MFEM_FOREACH_THREAD(qx,x,Q1D)
4457 MFEM_FOREACH_THREAD(qy,y,Q1D)
4459 MFEM_FOREACH_THREAD(qz,z,Q1D)
4461 for (int i=0; i<coeffDim; ++i)
4463 opc[i] = op(i,qx,qy,qz,e);
4469 const int tidx = MFEM_THREAD_ID(x);
4470 const int tidy = MFEM_THREAD_ID(y);
4471 const int tidz = MFEM_THREAD_ID(z);
4475 MFEM_FOREACH_THREAD(d,y,D1D)
4477 MFEM_FOREACH_THREAD(q,x,Q1D)
4479 sBc[d][q] = Bc(q,d);
4480 sGc[d][q] = Gc(q,d);
4483 sBo[d][q] = Bo(q,d);
4490 for (int qz=0; qz < Q1D; ++qz)
4494 MFEM_FOREACH_THREAD(qy,y,Q1D)
4496 MFEM_FOREACH_THREAD(qx,x,Q1D)
4498 for (int i=0; i<3; ++i)
4500 mass[qy][qx][i] = 0.0;
4507 for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4509 const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4510 const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4511 const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4513 MFEM_FOREACH_THREAD(dz,z,D1Dz)
4515 MFEM_FOREACH_THREAD(dy,y,D1Dy)
4517 MFEM_FOREACH_THREAD(dx,x,D1Dx)
4519 sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4529 for (int i=0; i<coeffDim; ++i)
4531 sop[i][tidx][tidy] = opc[i];
4535 MFEM_FOREACH_THREAD(qy,y,Q1D)
4537 MFEM_FOREACH_THREAD(qx,x,Q1D)
4541 for (int dz = 0; dz < D1Dz; ++dz)
4543 const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
4545 for (int dy = 0; dy < D1Dy; ++dy)
4547 const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
4549 for (int dx = 0; dx < D1Dx; ++dx)
4551 const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
4557 mass[qy][qx][c] += u;
4562 osc += D1Dx * D1Dy * D1Dz;
4570 MFEM_FOREACH_THREAD(dz,z,D1D)
4572 const double wcz = sBc[dz][qz];
4573 const double wcDz = sGc[dz][qz];
4574 const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4576 MFEM_FOREACH_THREAD(dy,y,D1D)
4578 MFEM_FOREACH_THREAD(dx,x,D1D)
4580 for (int qy = 0; qy < Q1D; ++qy)
4582 const double wcy = sBc[dy][qy];
4583 const double wcDy = sGc[dy][qy];
4584 const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4586 for (int qx = 0; qx < Q1D; ++qx)
4588 const double O1 = sop[0][qx][qy];
4589 const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
4590 const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
4592 const double c1 = O1 * mass[qy][qx][0];
4593 const double c2 = O2 * mass[qy][qx][1];
4594 const double c3 = O3 * mass[qy][qx][2];
4596 const double wcx = sBc[dx][qx];
4597 const double wDx = sGc[dx][qx];
4601 const double wx = sBo[dx][qx];
4602 dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
4605 dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
4607 dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
4616 MFEM_FOREACH_THREAD(dz,z,D1D)
4618 MFEM_FOREACH_THREAD(dy,y,D1D)
4620 MFEM_FOREACH_THREAD(dx,x,D1D)
4624 Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4628 Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4632 Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4638 }); // end of element loop
4641 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4643 if (testType == mfem::FiniteElement::CURL &&
4644 trialType == mfem::FiniteElement::CURL && dim == 3)
4646 if (Device::Allows(Backend::DEVICE_MASK))
4648 const int ID = (dofs1D << 4) | quad1D;
4651 case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, coeffDim,
4652 ne, mapsO->B, mapsC->B,
4653 mapsC->G, pa_data, x, y);
4654 case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, coeffDim,
4655 ne, mapsO->B, mapsC->B,
4656 mapsC->G, pa_data, x, y);
4657 case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, coeffDim,
4658 ne, mapsO->B, mapsC->B,
4659 mapsC->G, pa_data, x, y);
4660 case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, coeffDim,
4661 ne, mapsO->B, mapsC->B,
4662 mapsC->G, pa_data, x, y);
4663 default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne,
4665 mapsC->G, pa_data, x, y);
4669 PAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
4670 mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
4674 MFEM_ABORT("Unsupported dimension or
space!
");
4678 template void SmemPAHcurlMassAssembleDiagonal3D<0,0>(const int D1D,
4681 const bool symmetric,
4682 const Array<double> &bo,
4683 const Array<double> &bc,
4684 const Vector &pa_data,
4687 template void SmemPAHcurlMassAssembleDiagonal3D<2,3>(const int D1D,
4690 const bool symmetric,
4691 const Array<double> &bo,
4692 const Array<double> &bc,
4693 const Vector &pa_data,
4696 template void SmemPAHcurlMassAssembleDiagonal3D<3,4>(const int D1D,
4699 const bool symmetric,
4700 const Array<double> &bo,
4701 const Array<double> &bc,
4702 const Vector &pa_data,
4705 template void SmemPAHcurlMassAssembleDiagonal3D<4,5>(const int D1D,
4708 const bool symmetric,
4709 const Array<double> &bo,
4710 const Array<double> &bc,
4711 const Vector &pa_data,
4714 template void SmemPAHcurlMassAssembleDiagonal3D<5,6>(const int D1D,
4717 const bool symmetric,
4718 const Array<double> &bo,
4719 const Array<double> &bc,
4720 const Vector &pa_data,
4723 template void SmemPAHcurlMassApply3D<0,0>(const int D1D,
4726 const bool symmetric,
4727 const Array<double> &bo,
4728 const Array<double> &bc,
4729 const Array<double> &bot,
4730 const Array<double> &bct,
4731 const Vector &pa_data,
4735 template void SmemPAHcurlMassApply3D<2,3>(const int D1D,
4738 const bool symmetric,
4739 const Array<double> &bo,
4740 const Array<double> &bc,
4741 const Array<double> &bot,
4742 const Array<double> &bct,
4743 const Vector &pa_data,
4747 template void SmemPAHcurlMassApply3D<3,4>(const int D1D,
4750 const bool symmetric,
4751 const Array<double> &bo,
4752 const Array<double> &bc,
4753 const Array<double> &bot,
4754 const Array<double> &bct,
4755 const Vector &pa_data,
4759 template void SmemPAHcurlMassApply3D<4,5>(const int D1D,
4762 const bool symmetric,
4763 const Array<double> &bo,
4764 const Array<double> &bc,
4765 const Array<double> &bot,
4766 const Array<double> &bct,
4767 const Vector &pa_data,
4771 template void SmemPAHcurlMassApply3D<5,6>(const int D1D,
4774 const bool symmetric,
4775 const Array<double> &bo,
4776 const Array<double> &bc,
4777 const Array<double> &bot,
4778 const Array<double> &bct,
4779 const Vector &pa_data,
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void PAHcurlMassAssembleDiagonal2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Vector &pa_data, Vector &diag)
constexpr int HCURL_MAX_D1D
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
void PAHcurlMassAssembleDiagonal3D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Vector &pa_data, Vector &diag)
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void PAHcurlMassApply2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Array< double > &bot, const Array< double > &bct, const Vector &pa_data, const Vector &x, Vector &y)
constexpr int HCURL_MAX_Q1D