12 #include "../general/forall.hpp"
15 #include "ceed/diffusion.hpp"
26 static void OccaPADiffusionSetup2D(
const int D1D,
29 const Array<double> &W,
34 occa::properties props;
35 props[
"defines/D1D"] = D1D;
36 props[
"defines/Q1D"] = Q1D;
41 const bool const_c = C.Size() == 1;
42 const occa_id_t id = std::make_pair(D1D,Q1D);
44 if (OccaDiffSetup2D_ker.find(
id) == OccaDiffSetup2D_ker.end())
46 const occa::kernel DiffusionSetup2D =
48 "DiffusionSetup2D", props);
49 OccaDiffSetup2D_ker.emplace(
id, DiffusionSetup2D);
51 OccaDiffSetup2D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
54 static void OccaPADiffusionSetup3D(
const int D1D,
57 const Array<double> &W,
62 occa::properties props;
63 props[
"defines/D1D"] = D1D;
64 props[
"defines/Q1D"] = Q1D;
69 const bool const_c = C.Size() == 1;
70 const occa_id_t id = std::make_pair(D1D,Q1D);
72 if (OccaDiffSetup3D_ker.find(
id) == OccaDiffSetup3D_ker.end())
74 const occa::kernel DiffusionSetup3D =
76 "DiffusionSetup3D", props);
77 OccaDiffSetup3D_ker.emplace(
id, DiffusionSetup3D);
79 OccaDiffSetup3D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
81 #endif // MFEM_USE_OCCA
92 const bool symmetric = (coeffDim != 4);
93 const bool const_c = c.Size() == 1;
94 MFEM_VERIFY(coeffDim < 3 ||
95 !const_c,
"Constant matrix coefficient not supported");
96 const auto W =
Reshape(w.Read(), Q1D,Q1D);
97 const auto J =
Reshape(j.Read(), Q1D,Q1D,2,2,NE);
98 const auto C = const_c ?
Reshape(c.Read(), 1,1,1,1) :
99 Reshape(c.Read(), coeffDim,Q1D,Q1D,NE);
100 auto D =
Reshape(d.Write(), Q1D,Q1D, symmetric ? 3 : 4, NE);
101 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
103 MFEM_FOREACH_THREAD(qx,x,Q1D)
105 MFEM_FOREACH_THREAD(qy,y,Q1D)
107 const double J11 = J(qx,qy,0,0,e);
108 const double J21 = J(qx,qy,1,0,e);
109 const double J12 = J(qx,qy,0,1,e);
110 const double J22 = J(qx,qy,1,1,e);
111 const double w_detJ = W(qx,qy) / ((J11*J22)-(J21*J12));
112 if (coeffDim == 3 || coeffDim == 4)
115 const double M11 = C(0,qx,qy,e);
116 const double M12 = C(1,qx,qy,e);
117 const double M21 = symmetric ? M12 : C(2,qx,qy,e);
118 const double M22 = symmetric ? C(2,qx,qy,e) : C(3,qx,qy,e);
119 const double R11 = M11*J22 - M12*J12;
120 const double R21 = M21*J22 - M22*J12;
121 const double R12 = -M11*J21 + M12*J11;
122 const double R22 = -M21*J21 + M22*J11;
125 D(qx,qy,0,e) = w_detJ * ( J22*R11 - J12*R21);
126 D(qx,qy,1,e) = w_detJ * (-J21*R11 + J11*R21);
127 D(qx,qy,2,e) = w_detJ * (symmetric ? (-J21*R12 + J11*R22) :
128 (J22*R12 - J12*R22));
131 D(qx,qy,3,e) = w_detJ * (-J21*R12 + J11*R22);
136 const double C1 = const_c ? C(0,0,0,0) : C(0,qx,qy,e);
137 const double C2 = const_c ? C(0,0,0,0) :
138 (coeffDim == 2 ? C(1,qx,qy,e) : C(0,qx,qy,e));
140 D(qx,qy,0,e) = w_detJ * (C2*J12*J12 + C1*J22*J22);
141 D(qx,qy,1,e) = -w_detJ * (C2*J12*J11 + C1*J22*J21);
142 D(qx,qy,2,e) = w_detJ * (C2*J11*J11 + C1*J21*J21);
159 MFEM_VERIFY(coeffDim == 1,
"Matrix and vector coefficients not supported");
160 constexpr
int DIM = 2;
161 constexpr
int SDIM = 3;
162 const bool const_c = c.Size() == 1;
163 const auto W =
Reshape(w.Read(), Q1D,Q1D);
165 const auto C = const_c ?
Reshape(c.Read(), 1,1,1) :
167 auto D =
Reshape(d.Write(), Q1D,Q1D, 3, NE);
168 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
170 MFEM_FOREACH_THREAD(qx,x,Q1D)
172 MFEM_FOREACH_THREAD(qy,y,Q1D)
174 const double wq = W(qx,qy);
175 const double J11 = J(qx,qy,0,0,e);
176 const double J21 = J(qx,qy,1,0,e);
177 const double J31 = J(qx,qy,2,0,e);
178 const double J12 = J(qx,qy,0,1,e);
179 const double J22 = J(qx,qy,1,1,e);
180 const double J32 = J(qx,qy,2,1,e);
181 const double E = J11*J11 + J21*J21 + J31*J31;
182 const double G = J12*J12 + J22*J22 + J32*J32;
183 const double F = J11*J12 + J21*J22 + J31*J32;
184 const double iw = 1.0 / sqrt(E*G - F*F);
185 const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
186 const double alpha = wq * coeff * iw;
187 D(qx,qy,0,e) = alpha * G;
188 D(qx,qy,1,e) = -alpha * F;
189 D(qx,qy,2,e) = alpha * E;
204 const bool symmetric = (coeffDim != 9);
205 const bool const_c = c.
Size() == 1;
206 MFEM_VERIFY(coeffDim < 6 ||
207 !const_c,
"Constant matrix coefficient not supported");
209 const auto J =
Reshape(j.
Read(), Q1D,Q1D,Q1D,3,3,NE);
210 const auto C = const_c ?
Reshape(c.
Read(), 1,1,1,1,1) :
212 auto D =
Reshape(d.
Write(), Q1D,Q1D,Q1D, symmetric ? 6 : 9, NE);
213 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
215 MFEM_FOREACH_THREAD(qx,x,Q1D)
217 MFEM_FOREACH_THREAD(qy,y,Q1D)
219 MFEM_FOREACH_THREAD(qz,z,Q1D)
221 const double J11 = J(qx,qy,qz,0,0,e);
222 const double J21 = J(qx,qy,qz,1,0,e);
223 const double J31 = J(qx,qy,qz,2,0,e);
224 const double J12 = J(qx,qy,qz,0,1,e);
225 const double J22 = J(qx,qy,qz,1,1,e);
226 const double J32 = J(qx,qy,qz,2,1,e);
227 const double J13 = J(qx,qy,qz,0,2,e);
228 const double J23 = J(qx,qy,qz,1,2,e);
229 const double J33 = J(qx,qy,qz,2,2,e);
230 const double detJ = J11 * (J22 * J33 - J32 * J23) -
231 J21 * (J12 * J33 - J32 * J13) +
232 J31 * (J12 * J23 - J22 * J13);
233 const double w_detJ = W(qx,qy,qz) / detJ;
235 const double A11 = (J22 * J33) - (J23 * J32);
236 const double A12 = (J32 * J13) - (J12 * J33);
237 const double A13 = (J12 * J23) - (J22 * J13);
238 const double A21 = (J31 * J23) - (J21 * J33);
239 const double A22 = (J11 * J33) - (J13 * J31);
240 const double A23 = (J21 * J13) - (J11 * J23);
241 const double A31 = (J21 * J32) - (J31 * J22);
242 const double A32 = (J31 * J12) - (J11 * J32);
243 const double A33 = (J11 * J22) - (J12 * J21);
245 if (coeffDim == 6 || coeffDim == 9)
248 const double M11 = C(0, qx,qy,qz, e);
249 const double M12 = C(1, qx,qy,qz, e);
250 const double M13 = C(2, qx,qy,qz, e);
251 const double M21 = (!symmetric) ? C(3, qx,qy,qz, e) : M12;
252 const double M22 = (!symmetric) ? C(4, qx,qy,qz, e) : C(3, qx,qy,qz, e);
253 const double M23 = (!symmetric) ? C(5, qx,qy,qz, e) : C(4, qx,qy,qz, e);
254 const double M31 = (!symmetric) ? C(6, qx,qy,qz, e) : M13;
255 const double M32 = (!symmetric) ? C(7, qx,qy,qz, e) : M23;
256 const double M33 = (!symmetric) ? C(8, qx,qy,qz, e) : C(5, qx,qy,qz, e);
258 const double R11 = M11*A11 + M12*A12 + M13*A13;
259 const double R12 = M11*A21 + M12*A22 + M13*A23;
260 const double R13 = M11*A31 + M12*A32 + M13*A33;
261 const double R21 = M21*A11 + M22*A12 + M23*A13;
262 const double R22 = M21*A21 + M22*A22 + M23*A23;
263 const double R23 = M21*A31 + M22*A32 + M23*A33;
264 const double R31 = M31*A11 + M32*A12 + M33*A13;
265 const double R32 = M31*A21 + M32*A22 + M33*A23;
266 const double R33 = M31*A31 + M32*A32 + M33*A33;
269 D(qx,qy,qz,0,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31);
270 const double D12 = w_detJ * (A11*R12 + A12*R22 + A13*R32);
271 D(qx,qy,qz,1,e) = D12;
272 D(qx,qy,qz,2,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33);
274 const double D21 = w_detJ * (A21*R11 + A22*R21 + A23*R31);
275 const double D22 = w_detJ * (A21*R12 + A22*R22 + A23*R32);
276 const double D23 = w_detJ * (A21*R13 + A22*R23 + A23*R33);
278 const double D33 = w_detJ * (A31*R13 + A32*R23 + A33*R33);
280 D(qx,qy,qz,3,e) = symmetric ? D22 : D21;
281 D(qx,qy,qz,4,e) = symmetric ? D23 : D22;
282 D(qx,qy,qz,5,e) = symmetric ? D33 : D23;
286 D(qx,qy,qz,6,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31);
287 D(qx,qy,qz,7,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32);
288 D(qx,qy,qz,8,e) = D33;
293 const double C1 = const_c ? C(0,0,0,0,0) : C(0,qx,qy,qz,e);
294 const double C2 = const_c ? C(0,0,0,0,0) :
295 (coeffDim == 3 ? C(1,qx,qy,qz,e) : C(0,qx,qy,qz,e));
296 const double C3 = const_c ? C(0,0,0,0,0) :
297 (coeffDim == 3 ? C(2,qx,qy,qz,e) : C(0,qx,qy,qz,e));
300 D(qx,qy,qz,0,e) = w_detJ * (C1*A11*A11 + C2*A12*A12 + C3*A13*A13);
301 D(qx,qy,qz,1,e) = w_detJ * (C1*A11*A21 + C2*A12*A22 + C3*A13*A23);
302 D(qx,qy,qz,2,e) = w_detJ * (C1*A11*A31 + C2*A12*A32 + C3*A13*A33);
303 D(qx,qy,qz,3,e) = w_detJ * (C1*A21*A21 + C2*A22*A22 + C3*A23*A23);
304 D(qx,qy,qz,4,e) = w_detJ * (C1*A21*A31 + C2*A22*A32 + C3*A23*A33);
305 D(qx,qy,qz,5,e) = w_detJ * (C1*A31*A31 + C2*A32*A32 + C3*A33*A33);
313 static void PADiffusionSetup(
const int dim,
319 const Array<double> &W,
324 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADiffusionSetup"); }
330 OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
334 MFEM_CONTRACT_VAR(D1D);
335 #endif // MFEM_USE_OCCA
344 OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
347 #endif // MFEM_USE_OCCA
354 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
355 Device::GetDeviceMemoryType() : pa_mt;
359 if (mesh->
GetNE() == 0) {
return; }
362 if (DeviceCanUseCeed())
365 MFEM_VERIFY(!VQ && !MQ && !SMQ,
366 "Only scalar coefficient supported for DiffusionIntegrator"
368 ceedOp =
new ceed::PADiffusionIntegrator(fes, *ir, Q);
371 const int dims = el.
GetDim();
372 const int symmDims = (dims * (dims + 1)) / 2;
383 const int MQfullDim = MQ ? MQ->GetHeight() * MQ->GetWidth() : 0;
387 MFEM_VERIFY(MQ->GetHeight() == dim && MQ->GetWidth() ==
dim,
"");
389 coeffDim = MQfullDim;
391 coeff.
SetSize(MQfullDim * nq * ne);
397 for (
int e=0; e<ne; ++e)
400 for (
int p=0;
p<nq; ++
p)
403 for (
int i=0; i<
dim; ++i)
404 for (
int j=0; j<
dim; ++j)
406 C(j+(i*dim),
p, e) = M(i,j);
413 MFEM_VERIFY(SMQ->GetSize() ==
dim,
"");
415 coeff.
SetSize(symmDims * nq * ne);
422 for (
int e=0; e<ne; ++e)
425 for (
int p=0;
p<nq; ++
p)
429 for (
int i=0; i<
dim; ++i)
430 for (
int j=i; j<
dim; ++j, ++cnt)
432 C(cnt,
p, e) = M(i,j);
439 MFEM_VERIFY(VQ->GetVDim() ==
dim,
"");
440 coeffDim = VQ->GetVDim();
441 coeff.
SetSize(coeffDim * nq * ne);
444 for (
int e=0; e<ne; ++e)
447 for (
int p=0;
p<nq; ++
p)
450 for (
int i=0; i<coeffDim; ++i)
457 else if (Q ==
nullptr)
465 coeff(0) = cQ->constant;
468 dynamic_cast<QuadratureFunctionCoefficient*>(Q))
471 MFEM_VERIFY(qFun.
Size() == ne*nq,
472 "Incompatible QuadratureFunction dimension \n");
475 "IntegrationRule used within integrator and in"
476 " QuadratureFunction appear to be different");
478 coeff.
MakeRef(const_cast<QuadratureFunction &>(qFun),0);
484 for (
int e = 0; e < ne; ++e)
487 for (
int q = 0; q < nq; ++q)
489 C(q,e) = Q->Eval(T, ir->
IntPoint(q));
493 pa_data.SetSize((symmetric ? symmDims : MQfullDim) * nq * ne, mt);
494 PADiffusionSetup(dim, sdim, dofs1D, quad1D, coeffDim, ne, ir->
GetWeights(),
495 geom->J, coeff, pa_data);
498 template<
int T_D1D = 0,
int T_Q1D = 0>
499 static void PADiffusionDiagonal2D(
const int NE,
500 const bool symmetric,
508 const int D1D = T_D1D ? T_D1D : d1d;
509 const int Q1D = T_Q1D ? T_Q1D : q1d;
510 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
511 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
516 auto D =
Reshape(d.
Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
520 const int D1D = T_D1D ? T_D1D : d1d;
521 const int Q1D = T_Q1D ? T_Q1D : q1d;
522 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
523 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
525 double QD0[MQ1][MD1];
526 double QD1[MQ1][MD1];
527 double QD2[MQ1][MD1];
528 for (
int qx = 0; qx < Q1D; ++qx)
530 for (
int dy = 0; dy < D1D; ++dy)
535 for (
int qy = 0; qy < Q1D; ++qy)
537 const int q = qx + qy * Q1D;
538 const double D00 = D(q,0,e);
539 const double D10 = D(q,1,e);
540 const double D01 = symmetric ? D10 : D(q,2,e);
541 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
542 QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D00;
543 QD1[qx][dy] += B(qy, dy) * G(qy, dy) * (D01 + D10);
544 QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D11;
548 for (
int dy = 0; dy < D1D; ++dy)
550 for (
int dx = 0; dx < D1D; ++dx)
552 for (
int qx = 0; qx < Q1D; ++qx)
554 Y(dx,dy,e) += G(qx, dx) * G(qx, dx) * QD0[qx][dy];
555 Y(dx,dy,e) += G(qx, dx) * B(qx, dx) * QD1[qx][dy];
556 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD2[qx][dy];
564 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
565 static void SmemPADiffusionDiagonal2D(
const int NE,
566 const bool symmetric,
567 const Array<double> &b_,
568 const Array<double> &g_,
574 const int D1D = T_D1D ? T_D1D : d1d;
575 const int Q1D = T_Q1D ? T_Q1D : q1d;
576 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
577 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
578 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
579 MFEM_VERIFY(D1D <= MD1,
"");
580 MFEM_VERIFY(Q1D <= MQ1,
"");
581 auto b =
Reshape(b_.Read(), Q1D, D1D);
582 auto g =
Reshape(g_.Read(), Q1D, D1D);
583 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
584 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
585 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
587 const int tidz = MFEM_THREAD_ID(z);
588 const int D1D = T_D1D ? T_D1D : d1d;
589 const int Q1D = T_Q1D ? T_Q1D : q1d;
590 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
591 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
592 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
593 MFEM_SHARED
double BG[2][MQ1*MD1];
594 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
595 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
596 MFEM_SHARED
double QD[3][NBZ][MD1][MQ1];
597 double (*QD0)[MD1] = (double (*)[MD1])(QD[0] + tidz);
598 double (*QD1)[MD1] = (double (*)[MD1])(QD[1] + tidz);
599 double (*QD2)[MD1] = (double (*)[MD1])(QD[2] + tidz);
602 MFEM_FOREACH_THREAD(d,y,D1D)
604 MFEM_FOREACH_THREAD(q,x,Q1D)
612 MFEM_FOREACH_THREAD(qx,x,Q1D)
614 MFEM_FOREACH_THREAD(dy,y,D1D)
619 for (
int qy = 0; qy < Q1D; ++qy)
621 const int q = qx + qy * Q1D;
622 const double D00 = D(q,0,e);
623 const double D10 = D(q,1,e);
624 const double D01 = symmetric ? D10 : D(q,2,e);
625 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
626 const double By = B[qy][dy];
627 const double Gy = G[qy][dy];
628 const double BB = By * By;
629 const double BG = By * Gy;
630 const double GG = Gy * Gy;
631 QD0[qx][dy] += BB * D00;
632 QD1[qx][dy] += BG * (D01 + D10);
633 QD2[qx][dy] += GG * D11;
638 MFEM_FOREACH_THREAD(dy,y,D1D)
640 MFEM_FOREACH_THREAD(dx,x,D1D)
642 for (
int qx = 0; qx < Q1D; ++qx)
644 const double Bx = B[qx][dx];
645 const double Gx = G[qx][dx];
646 const double BB = Bx * Bx;
647 const double BG = Bx * Gx;
648 const double GG = Gx * Gx;
649 Y(dx,dy,e) += GG * QD0[qx][dy];
650 Y(dx,dy,e) += BG * QD1[qx][dy];
651 Y(dx,dy,e) += BB * QD2[qx][dy];
658 template<
int T_D1D = 0,
int T_Q1D = 0>
659 static void PADiffusionDiagonal3D(
const int NE,
660 const bool symmetric,
661 const Array<double> &b,
662 const Array<double> &g,
668 constexpr
int DIM = 3;
669 const int D1D = T_D1D ? T_D1D : d1d;
670 const int Q1D = T_Q1D ? T_Q1D : q1d;
671 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
672 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
673 MFEM_VERIFY(D1D <= MD1,
"");
674 MFEM_VERIFY(Q1D <= MQ1,
"");
675 auto B =
Reshape(b.Read(), Q1D, D1D);
676 auto G =
Reshape(g.Read(), Q1D, D1D);
677 auto Q =
Reshape(d.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
678 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
681 const int D1D = T_D1D ? T_D1D : d1d;
682 const int Q1D = T_Q1D ? T_Q1D : q1d;
683 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
684 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
685 double QQD[MQ1][MQ1][MD1];
686 double QDD[MQ1][MD1][MD1];
687 for (
int i = 0; i <
DIM; ++i)
689 for (
int j = 0; j <
DIM; ++j)
692 for (
int qx = 0; qx < Q1D; ++qx)
694 for (
int qy = 0; qy < Q1D; ++qy)
696 for (
int dz = 0; dz < D1D; ++dz)
698 QQD[qx][qy][dz] = 0.0;
699 for (
int qz = 0; qz < Q1D; ++qz)
701 const int q = qx + (qy + qz * Q1D) * Q1D;
702 const int ksym = j >= i ?
703 3 - (3-i)*(2-i)/2 + j:
704 3 - (3-j)*(2-j)/2 + i;
705 const int k = symmetric ? ksym : (i*
DIM) + j;
706 const double O = Q(q,k,e);
707 const double Bz = B(qz,dz);
708 const double Gz = G(qz,dz);
709 const double L = i==2 ? Gz : Bz;
710 const double R = j==2 ? Gz : Bz;
711 QQD[qx][qy][dz] += L * O * R;
717 for (
int qx = 0; qx < Q1D; ++qx)
719 for (
int dz = 0; dz < D1D; ++dz)
721 for (
int dy = 0; dy < D1D; ++dy)
723 QDD[qx][dy][dz] = 0.0;
724 for (
int qy = 0; qy < Q1D; ++qy)
726 const double By = B(qy,dy);
727 const double Gy = G(qy,dy);
728 const double L = i==1 ? Gy : By;
729 const double R = j==1 ? Gy : By;
730 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
736 for (
int dz = 0; dz < D1D; ++dz)
738 for (
int dy = 0; dy < D1D; ++dy)
740 for (
int dx = 0; dx < D1D; ++dx)
742 for (
int qx = 0; qx < Q1D; ++qx)
744 const double Bx = B(qx,dx);
745 const double Gx = G(qx,dx);
746 const double L = i==0 ? Gx : Bx;
747 const double R = j==0 ? Gx : Bx;
748 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
759 template<
int T_D1D = 0,
int T_Q1D = 0>
760 static void SmemPADiffusionDiagonal3D(
const int NE,
761 const bool symmetric,
762 const Array<double> &b_,
763 const Array<double> &g_,
769 constexpr
int DIM = 3;
770 const int D1D = T_D1D ? T_D1D : d1d;
771 const int Q1D = T_Q1D ? T_Q1D : q1d;
772 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
773 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
774 MFEM_VERIFY(D1D <= MD1,
"");
775 MFEM_VERIFY(Q1D <= MQ1,
"");
776 auto b =
Reshape(b_.Read(), Q1D, D1D);
777 auto g =
Reshape(g_.Read(), Q1D, D1D);
778 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
779 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
780 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
782 const int tidz = MFEM_THREAD_ID(z);
783 const int D1D = T_D1D ? T_D1D : d1d;
784 const int Q1D = T_Q1D ? T_Q1D : q1d;
785 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
786 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
787 MFEM_SHARED
double BG[2][MQ1*MD1];
788 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
789 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
790 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
791 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
794 MFEM_FOREACH_THREAD(d,y,D1D)
796 MFEM_FOREACH_THREAD(q,x,Q1D)
804 for (
int i = 0; i <
DIM; ++i)
806 for (
int j = 0; j <
DIM; ++j)
809 MFEM_FOREACH_THREAD(qx,x,Q1D)
811 MFEM_FOREACH_THREAD(qy,y,Q1D)
813 MFEM_FOREACH_THREAD(dz,z,D1D)
815 QQD[qx][qy][dz] = 0.0;
816 for (
int qz = 0; qz < Q1D; ++qz)
818 const int q = qx + (qy + qz * Q1D) * Q1D;
819 const int ksym = j >= i ?
820 3 - (3-i)*(2-i)/2 + j:
821 3 - (3-j)*(2-j)/2 + i;
822 const int k = symmetric ? ksym : (i*
DIM) + j;
823 const double O = D(q,k,e);
824 const double Bz = B[qz][dz];
825 const double Gz = G[qz][dz];
826 const double L = i==2 ? Gz : Bz;
827 const double R = j==2 ? Gz : Bz;
828 QQD[qx][qy][dz] += L * O * R;
835 MFEM_FOREACH_THREAD(qx,x,Q1D)
837 MFEM_FOREACH_THREAD(dz,z,D1D)
839 MFEM_FOREACH_THREAD(dy,y,D1D)
841 QDD[qx][dy][dz] = 0.0;
842 for (
int qy = 0; qy < Q1D; ++qy)
844 const double By = B[qy][dy];
845 const double Gy = G[qy][dy];
846 const double L = i==1 ? Gy : By;
847 const double R = j==1 ? Gy : By;
848 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
855 MFEM_FOREACH_THREAD(dz,z,D1D)
857 MFEM_FOREACH_THREAD(dy,y,D1D)
859 MFEM_FOREACH_THREAD(dx,x,D1D)
861 for (
int qx = 0; qx < Q1D; ++qx)
863 const double Bx = B[qx][dx];
864 const double Gx = G[qx][dx];
865 const double L = i==0 ? Gx : Bx;
866 const double R = j==0 ? Gx : Bx;
867 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
877 static void PADiffusionAssembleDiagonal(
const int dim,
882 const Array<double> &B,
883 const Array<double> &G,
889 switch ((D1D << 4 ) | Q1D)
891 case 0x22:
return SmemPADiffusionDiagonal2D<2,2,8>(NE,symm,B,G,D,Y);
892 case 0x33:
return SmemPADiffusionDiagonal2D<3,3,8>(NE,symm,B,G,D,Y);
893 case 0x44:
return SmemPADiffusionDiagonal2D<4,4,4>(NE,symm,B,G,D,Y);
894 case 0x55:
return SmemPADiffusionDiagonal2D<5,5,4>(NE,symm,B,G,D,Y);
895 case 0x66:
return SmemPADiffusionDiagonal2D<6,6,2>(NE,symm,B,G,D,Y);
896 case 0x77:
return SmemPADiffusionDiagonal2D<7,7,2>(NE,symm,B,G,D,Y);
897 case 0x88:
return SmemPADiffusionDiagonal2D<8,8,1>(NE,symm,B,G,D,Y);
898 case 0x99:
return SmemPADiffusionDiagonal2D<9,9,1>(NE,symm,B,G,D,Y);
899 default:
return PADiffusionDiagonal2D(NE,symm,B,G,D,Y,D1D,Q1D);
904 switch ((D1D << 4 ) | Q1D)
906 case 0x23:
return SmemPADiffusionDiagonal3D<2,3>(NE,symm,B,G,D,Y);
907 case 0x34:
return SmemPADiffusionDiagonal3D<3,4>(NE,symm,B,G,D,Y);
908 case 0x45:
return SmemPADiffusionDiagonal3D<4,5>(NE,symm,B,G,D,Y);
909 case 0x56:
return SmemPADiffusionDiagonal3D<5,6>(NE,symm,B,G,D,Y);
910 case 0x67:
return SmemPADiffusionDiagonal3D<6,7>(NE,symm,B,G,D,Y);
911 case 0x78:
return SmemPADiffusionDiagonal3D<7,8>(NE,symm,B,G,D,Y);
912 case 0x89:
return SmemPADiffusionDiagonal3D<8,9>(NE,symm,B,G,D,Y);
913 case 0x9A:
return SmemPADiffusionDiagonal3D<9,10>(NE,symm,B,G,D,Y);
914 default:
return PADiffusionDiagonal3D(NE,symm,B,G,D,Y,D1D,Q1D);
917 MFEM_ABORT(
"Unknown kernel.");
920 void DiffusionIntegrator::AssembleDiagonalPA(
Vector &diag)
922 if (DeviceCanUseCeed())
924 ceedOp->GetDiagonal(diag);
928 if (pa_data.Size()==0) { AssemblePA(*fespace); }
929 PADiffusionAssembleDiagonal(dim, dofs1D, quad1D, ne, symmetric,
930 maps->B, maps->G, pa_data, diag);
937 static void OccaPADiffusionApply2D(
const int D1D,
948 occa::properties props;
949 props[
"defines/D1D"] = D1D;
950 props[
"defines/Q1D"] = Q1D;
958 const occa_id_t id = std::make_pair(D1D,Q1D);
959 if (!Device::Allows(Backend::OCCA_CUDA))
962 if (OccaDiffApply2D_cpu.find(
id) == OccaDiffApply2D_cpu.end())
964 const occa::kernel DiffusionApply2D_CPU =
966 "DiffusionApply2D_CPU", props);
967 OccaDiffApply2D_cpu.emplace(
id, DiffusionApply2D_CPU);
969 OccaDiffApply2D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
974 if (OccaDiffApply2D_gpu.find(
id) == OccaDiffApply2D_gpu.end())
976 const occa::kernel DiffusionApply2D_GPU =
978 "DiffusionApply2D_GPU", props);
979 OccaDiffApply2D_gpu.emplace(
id, DiffusionApply2D_GPU);
981 OccaDiffApply2D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
986 static void OccaPADiffusionApply3D(
const int D1D,
989 const Array<double> &B,
990 const Array<double> &G,
991 const Array<double> &Bt,
992 const Array<double> &Gt,
997 occa::properties props;
998 props[
"defines/D1D"] = D1D;
999 props[
"defines/Q1D"] = Q1D;
1000 const occa::memory o_B =
OccaMemoryRead(B.GetMemory(), B.Size());
1001 const occa::memory o_G =
OccaMemoryRead(G.GetMemory(), G.Size());
1002 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
1003 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
1004 const occa::memory o_D =
OccaMemoryRead(D.GetMemory(), D.Size());
1005 const occa::memory o_X =
OccaMemoryRead(X.GetMemory(), X.Size());
1007 const occa_id_t id = std::make_pair(D1D,Q1D);
1008 if (!Device::Allows(Backend::OCCA_CUDA))
1011 if (OccaDiffApply3D_cpu.find(
id) == OccaDiffApply3D_cpu.end())
1013 const occa::kernel DiffusionApply3D_CPU =
1015 "DiffusionApply3D_CPU", props);
1016 OccaDiffApply3D_cpu.emplace(
id, DiffusionApply3D_CPU);
1018 OccaDiffApply3D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
1023 if (OccaDiffApply3D_gpu.find(
id) == OccaDiffApply3D_gpu.end())
1025 const occa::kernel DiffusionApply3D_GPU =
1027 "DiffusionApply3D_GPU", props);
1028 OccaDiffApply3D_gpu.emplace(
id, DiffusionApply3D_GPU);
1030 OccaDiffApply3D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
1033 #endif // MFEM_USE_OCCA
1036 template<
int T_D1D = 0,
int T_Q1D = 0>
1037 static void PADiffusionApply2D(
const int NE,
1038 const bool symmetric,
1039 const Array<double> &b_,
1040 const Array<double> &g_,
1041 const Array<double> &bt_,
1042 const Array<double> >_,
1049 const int D1D = T_D1D ? T_D1D : d1d;
1050 const int Q1D = T_Q1D ? T_Q1D : q1d;
1051 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
1052 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
1053 auto B =
Reshape(b_.Read(), Q1D, D1D);
1054 auto G =
Reshape(g_.Read(), Q1D, D1D);
1055 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
1056 auto Gt =
Reshape(gt_.Read(), D1D, Q1D);
1057 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
1058 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
1059 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
1062 const int D1D = T_D1D ? T_D1D : d1d;
1063 const int Q1D = T_Q1D ? T_Q1D : q1d;
1065 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
1066 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
1068 double grad[max_Q1D][max_Q1D][2];
1069 for (
int qy = 0; qy < Q1D; ++qy)
1071 for (
int qx = 0; qx < Q1D; ++qx)
1073 grad[qy][qx][0] = 0.0;
1074 grad[qy][qx][1] = 0.0;
1077 for (
int dy = 0; dy < D1D; ++dy)
1079 double gradX[max_Q1D][2];
1080 for (
int qx = 0; qx < Q1D; ++qx)
1085 for (
int dx = 0; dx < D1D; ++dx)
1087 const double s = X(dx,dy,e);
1088 for (
int qx = 0; qx < Q1D; ++qx)
1090 gradX[qx][0] += s * B(qx,dx);
1091 gradX[qx][1] += s * G(qx,dx);
1094 for (
int qy = 0; qy < Q1D; ++qy)
1096 const double wy = B(qy,dy);
1097 const double wDy = G(qy,dy);
1098 for (
int qx = 0; qx < Q1D; ++qx)
1100 grad[qy][qx][0] += gradX[qx][1] * wy;
1101 grad[qy][qx][1] += gradX[qx][0] * wDy;
1106 for (
int qy = 0; qy < Q1D; ++qy)
1108 for (
int qx = 0; qx < Q1D; ++qx)
1110 const int q = qx + qy * Q1D;
1112 const double O11 = D(q,0,e);
1113 const double O21 = D(q,1,e);
1114 const double O12 = symmetric ? O21 : D(q,2,e);
1115 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1117 const double gradX = grad[qy][qx][0];
1118 const double gradY = grad[qy][qx][1];
1120 grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
1121 grad[qy][qx][1] = (O21 * gradX) + (O22 * gradY);
1124 for (
int qy = 0; qy < Q1D; ++qy)
1126 double gradX[max_D1D][2];
1127 for (
int dx = 0; dx < D1D; ++dx)
1132 for (
int qx = 0; qx < Q1D; ++qx)
1134 const double gX = grad[qy][qx][0];
1135 const double gY = grad[qy][qx][1];
1136 for (
int dx = 0; dx < D1D; ++dx)
1138 const double wx = Bt(dx,qx);
1139 const double wDx = Gt(dx,qx);
1140 gradX[dx][0] += gX * wDx;
1141 gradX[dx][1] += gY * wx;
1144 for (
int dy = 0; dy < D1D; ++dy)
1146 const double wy = Bt(dy,qy);
1147 const double wDy = Gt(dy,qy);
1148 for (
int dx = 0; dx < D1D; ++dx)
1150 Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
1158 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
1159 static void SmemPADiffusionApply2D(
const int NE,
1160 const bool symmetric,
1161 const Array<double> &b_,
1162 const Array<double> &g_,
1169 const int D1D = T_D1D ? T_D1D : d1d;
1170 const int Q1D = T_Q1D ? T_Q1D : q1d;
1171 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1172 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1173 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1174 MFEM_VERIFY(D1D <= MD1,
"");
1175 MFEM_VERIFY(Q1D <= MQ1,
"");
1176 auto b =
Reshape(b_.Read(), Q1D, D1D);
1177 auto g =
Reshape(g_.Read(), Q1D, D1D);
1178 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
1179 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
1180 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
1181 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
1183 const int tidz = MFEM_THREAD_ID(z);
1184 const int D1D = T_D1D ? T_D1D : d1d;
1185 const int Q1D = T_Q1D ? T_Q1D : q1d;
1186 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1187 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1188 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1189 MFEM_SHARED
double sBG[2][MQ1*MD1];
1190 double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
1191 double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
1192 double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
1193 double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
1194 MFEM_SHARED
double Xz[NBZ][MD1][MD1];
1195 MFEM_SHARED
double GD[2][NBZ][MD1][MQ1];
1196 MFEM_SHARED
double GQ[2][NBZ][MD1][MQ1];
1197 double (*X)[MD1] = (double (*)[MD1])(Xz + tidz);
1198 double (*DQ0)[MD1] = (double (*)[MD1])(GD[0] + tidz);
1199 double (*DQ1)[MD1] = (double (*)[MD1])(GD[1] + tidz);
1200 double (*QQ0)[MD1] = (double (*)[MD1])(GQ[0] + tidz);
1201 double (*QQ1)[MD1] = (double (*)[MD1])(GQ[1] + tidz);
1202 MFEM_FOREACH_THREAD(dy,y,D1D)
1204 MFEM_FOREACH_THREAD(dx,x,D1D)
1206 X[dy][dx] = x(dx,dy,e);
1211 MFEM_FOREACH_THREAD(dy,y,D1D)
1213 MFEM_FOREACH_THREAD(q,x,Q1D)
1221 MFEM_FOREACH_THREAD(dy,y,D1D)
1223 MFEM_FOREACH_THREAD(qx,x,Q1D)
1227 for (
int dx = 0; dx < D1D; ++dx)
1229 const double coords = X[dy][dx];
1230 u += B[qx][dx] * coords;
1231 v += G[qx][dx] * coords;
1238 MFEM_FOREACH_THREAD(qy,y,Q1D)
1240 MFEM_FOREACH_THREAD(qx,x,Q1D)
1244 for (
int dy = 0; dy < D1D; ++dy)
1246 u += DQ1[dy][qx] * B[qy][dy];
1247 v += DQ0[dy][qx] * G[qy][dy];
1254 MFEM_FOREACH_THREAD(qy,y,Q1D)
1256 MFEM_FOREACH_THREAD(qx,x,Q1D)
1258 const int q = (qx + ((qy) * Q1D));
1259 const double O11 = D(q,0,e);
1260 const double O21 = D(q,1,e);
1261 const double O12 = symmetric ? O21 : D(q,2,e);
1262 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1263 const double gX = QQ0[qy][qx];
1264 const double gY = QQ1[qy][qx];
1265 QQ0[qy][qx] = (O11 * gX) + (O12 * gY);
1266 QQ1[qy][qx] = (O21 * gX) + (O22 * gY);
1272 MFEM_FOREACH_THREAD(dy,y,D1D)
1274 MFEM_FOREACH_THREAD(q,x,Q1D)
1276 Bt[dy][q] =
b(q,dy);
1277 Gt[dy][q] = g(q,dy);
1282 MFEM_FOREACH_THREAD(qy,y,Q1D)
1284 MFEM_FOREACH_THREAD(dx,x,D1D)
1288 for (
int qx = 0; qx < Q1D; ++qx)
1290 u += Gt[dx][qx] * QQ0[qy][qx];
1291 v += Bt[dx][qx] * QQ1[qy][qx];
1298 MFEM_FOREACH_THREAD(dy,y,D1D)
1300 MFEM_FOREACH_THREAD(dx,x,D1D)
1304 for (
int qy = 0; qy < Q1D; ++qy)
1306 u += DQ0[qy][dx] * Bt[dy][qy];
1307 v += DQ1[qy][dx] * Gt[dy][qy];
1309 Y(dx,dy,e) += (u + v);
1316 template<
int T_D1D = 0,
int T_Q1D = 0>
1317 static void PADiffusionApply3D(
const int NE,
1318 const bool symmetric,
1319 const Array<double> &b,
1320 const Array<double> &g,
1321 const Array<double> &bt,
1322 const Array<double> >,
1326 int d1d = 0,
int q1d = 0)
1328 const int D1D = T_D1D ? T_D1D : d1d;
1329 const int Q1D = T_Q1D ? T_Q1D : q1d;
1330 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
1331 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
1332 auto B =
Reshape(b.Read(), Q1D, D1D);
1333 auto G =
Reshape(g.Read(), Q1D, D1D);
1334 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
1335 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
1336 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
1337 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1338 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1341 const int D1D = T_D1D ? T_D1D : d1d;
1342 const int Q1D = T_Q1D ? T_Q1D : q1d;
1343 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
1344 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
1345 double grad[max_Q1D][max_Q1D][max_Q1D][3];
1346 for (
int qz = 0; qz < Q1D; ++qz)
1348 for (
int qy = 0; qy < Q1D; ++qy)
1350 for (
int qx = 0; qx < Q1D; ++qx)
1352 grad[qz][qy][qx][0] = 0.0;
1353 grad[qz][qy][qx][1] = 0.0;
1354 grad[qz][qy][qx][2] = 0.0;
1358 for (
int dz = 0; dz < D1D; ++dz)
1360 double gradXY[max_Q1D][max_Q1D][3];
1361 for (
int qy = 0; qy < Q1D; ++qy)
1363 for (
int qx = 0; qx < Q1D; ++qx)
1365 gradXY[qy][qx][0] = 0.0;
1366 gradXY[qy][qx][1] = 0.0;
1367 gradXY[qy][qx][2] = 0.0;
1370 for (
int dy = 0; dy < D1D; ++dy)
1372 double gradX[max_Q1D][2];
1373 for (
int qx = 0; qx < Q1D; ++qx)
1378 for (
int dx = 0; dx < D1D; ++dx)
1380 const double s = X(dx,dy,dz,e);
1381 for (
int qx = 0; qx < Q1D; ++qx)
1383 gradX[qx][0] += s * B(qx,dx);
1384 gradX[qx][1] += s * G(qx,dx);
1387 for (
int qy = 0; qy < Q1D; ++qy)
1389 const double wy = B(qy,dy);
1390 const double wDy = G(qy,dy);
1391 for (
int qx = 0; qx < Q1D; ++qx)
1393 const double wx = gradX[qx][0];
1394 const double wDx = gradX[qx][1];
1395 gradXY[qy][qx][0] += wDx * wy;
1396 gradXY[qy][qx][1] += wx * wDy;
1397 gradXY[qy][qx][2] += wx * wy;
1401 for (
int qz = 0; qz < Q1D; ++qz)
1403 const double wz = B(qz,dz);
1404 const double wDz = G(qz,dz);
1405 for (
int qy = 0; qy < Q1D; ++qy)
1407 for (
int qx = 0; qx < Q1D; ++qx)
1409 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
1410 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
1411 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
1417 for (
int qz = 0; qz < Q1D; ++qz)
1419 for (
int qy = 0; qy < Q1D; ++qy)
1421 for (
int qx = 0; qx < Q1D; ++qx)
1423 const int q = qx + (qy + qz * Q1D) * Q1D;
1424 const double O11 = D(q,0,e);
1425 const double O12 = D(q,1,e);
1426 const double O13 = D(q,2,e);
1427 const double O21 = symmetric ? O12 : D(q,3,e);
1428 const double O22 = symmetric ? D(q,3,e) : D(q,4,e);
1429 const double O23 = symmetric ? D(q,4,e) : D(q,5,e);
1430 const double O31 = symmetric ? O13 : D(q,6,e);
1431 const double O32 = symmetric ? O23 : D(q,7,e);
1432 const double O33 = symmetric ? D(q,5,e) : D(q,8,e);
1433 const double gradX = grad[qz][qy][qx][0];
1434 const double gradY = grad[qz][qy][qx][1];
1435 const double gradZ = grad[qz][qy][qx][2];
1436 grad[qz][qy][qx][0] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
1437 grad[qz][qy][qx][1] = (O21*gradX)+(O22*gradY)+(O23*gradZ);
1438 grad[qz][qy][qx][2] = (O31*gradX)+(O32*gradY)+(O33*gradZ);
1442 for (
int qz = 0; qz < Q1D; ++qz)
1444 double gradXY[max_D1D][max_D1D][3];
1445 for (
int dy = 0; dy < D1D; ++dy)
1447 for (
int dx = 0; dx < D1D; ++dx)
1449 gradXY[dy][dx][0] = 0;
1450 gradXY[dy][dx][1] = 0;
1451 gradXY[dy][dx][2] = 0;
1454 for (
int qy = 0; qy < Q1D; ++qy)
1456 double gradX[max_D1D][3];
1457 for (
int dx = 0; dx < D1D; ++dx)
1463 for (
int qx = 0; qx < Q1D; ++qx)
1465 const double gX = grad[qz][qy][qx][0];
1466 const double gY = grad[qz][qy][qx][1];
1467 const double gZ = grad[qz][qy][qx][2];
1468 for (
int dx = 0; dx < D1D; ++dx)
1470 const double wx = Bt(dx,qx);
1471 const double wDx = Gt(dx,qx);
1472 gradX[dx][0] += gX * wDx;
1473 gradX[dx][1] += gY * wx;
1474 gradX[dx][2] += gZ * wx;
1477 for (
int dy = 0; dy < D1D; ++dy)
1479 const double wy = Bt(dy,qy);
1480 const double wDy = Gt(dy,qy);
1481 for (
int dx = 0; dx < D1D; ++dx)
1483 gradXY[dy][dx][0] += gradX[dx][0] * wy;
1484 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
1485 gradXY[dy][dx][2] += gradX[dx][2] * wy;
1489 for (
int dz = 0; dz < D1D; ++dz)
1491 const double wz = Bt(dz,qz);
1492 const double wDz = Gt(dz,qz);
1493 for (
int dy = 0; dy < D1D; ++dy)
1495 for (
int dx = 0; dx < D1D; ++dx)
1498 ((gradXY[dy][dx][0] * wz) +
1499 (gradXY[dy][dx][1] * wz) +
1500 (gradXY[dy][dx][2] * wDz));
1510 static MFEM_HOST_DEVICE
inline int qi(
const int q,
const int d,
const int Q)
1512 return (q<=d) ? q : Q-1-q;
1515 static MFEM_HOST_DEVICE
inline int dj(
const int q,
const int d,
const int D)
1517 return (q<=d) ? d : D-1-d;
1520 static MFEM_HOST_DEVICE
inline int qk(
const int q,
const int d,
const int Q)
1522 return (q<=d) ? Q-1-q : q;
1525 static MFEM_HOST_DEVICE
inline int dl(
const int q,
const int d,
const int D)
1527 return (q<=d) ? D-1-d : d;
1530 static MFEM_HOST_DEVICE
inline double sign(
const int q,
const int d)
1532 return (q<=d) ? -1.0 : 1.0;
1535 template<
int T_D1D = 0,
int T_Q1D = 0>
1536 static void SmemPADiffusionApply3D(
const int NE,
1537 const bool symmetric,
1538 const Array<double> &b_,
1539 const Array<double> &g_,
1546 const int D1D = T_D1D ? T_D1D : d1d;
1547 const int Q1D = T_Q1D ? T_Q1D : q1d;
1548 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
1549 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
1550 MFEM_VERIFY(D1D <= M1D,
"");
1551 MFEM_VERIFY(Q1D <= M1Q,
"");
1552 auto b =
Reshape(b_.Read(), Q1D, D1D);
1553 auto g =
Reshape(g_.Read(), Q1D, D1D);
1554 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1555 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1556 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1557 MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
1559 const int D1D = T_D1D ? T_D1D : d1d;
1560 const int Q1D = T_Q1D ? T_Q1D : q1d;
1561 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1562 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1563 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
1564 MFEM_SHARED
double sBG[MQ1*MD1];
1565 double (*B)[MD1] = (double (*)[MD1]) sBG;
1566 double (*G)[MD1] = (double (*)[MD1]) sBG;
1567 double (*Bt)[MQ1] = (double (*)[MQ1]) sBG;
1568 double (*Gt)[MQ1] = (double (*)[MQ1]) sBG;
1569 MFEM_SHARED
double sm0[3][MDQ*MDQ*MDQ];
1570 MFEM_SHARED
double sm1[3][MDQ*MDQ*MDQ];
1571 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1572 double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
1573 double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
1574 double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
1575 double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
1576 double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
1577 double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
1578 double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
1579 double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
1580 double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
1581 double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
1582 double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
1583 double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
1584 double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
1585 double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1586 MFEM_FOREACH_THREAD(dy,y,D1D)
1588 MFEM_FOREACH_THREAD(dx,x,D1D)
1591 for (
int dz = 0; dz < D1D; ++dz)
1593 X[dz][dy][dx] = x(dx,dy,dz,e);
1596 MFEM_FOREACH_THREAD(qx,x,Q1D)
1598 const int i = qi(qx,dy,Q1D);
1599 const int j = dj(qx,dy,D1D);
1600 const int k = qk(qx,dy,Q1D);
1601 const int l = dl(qx,dy,D1D);
1603 G[k][l] = g(qx,dy) * sign(qx,dy);
1607 MFEM_FOREACH_THREAD(dy,y,D1D)
1609 MFEM_FOREACH_THREAD(qx,x,Q1D)
1611 double u[D1D], v[D1D];
1613 for (
int dz = 0; dz < D1D; dz++) { u[dz] = v[dz] = 0.0; }
1615 for (
int dx = 0; dx < D1D; ++dx)
1617 const int i = qi(qx,dx,Q1D);
1618 const int j = dj(qx,dx,D1D);
1619 const int k = qk(qx,dx,Q1D);
1620 const int l = dl(qx,dx,D1D);
1621 const double s = sign(qx,dx);
1623 for (
int dz = 0; dz < D1D; ++dz)
1625 const double coords = X[dz][dy][dx];
1626 u[dz] += coords * B[i][j];
1627 v[dz] += coords * G[k][l] *
s;
1631 for (
int dz = 0; dz < D1D; ++dz)
1633 DDQ0[dz][dy][qx] = u[dz];
1634 DDQ1[dz][dy][qx] = v[dz];
1639 MFEM_FOREACH_THREAD(qy,y,Q1D)
1641 MFEM_FOREACH_THREAD(qx,x,Q1D)
1643 double u[D1D], v[D1D], w[D1D];
1645 for (
int dz = 0; dz < D1D; dz++) { u[dz] = v[dz] = w[dz] = 0.0; }
1647 for (
int dy = 0; dy < D1D; ++dy)
1649 const int i = qi(qy,dy,Q1D);
1650 const int j = dj(qy,dy,D1D);
1651 const int k = qk(qy,dy,Q1D);
1652 const int l = dl(qy,dy,D1D);
1653 const double s = sign(qy,dy);
1655 for (
int dz = 0; dz < D1D; dz++)
1657 u[dz] += DDQ1[dz][dy][qx] * B[i][j];
1658 v[dz] += DDQ0[dz][dy][qx] * G[k][l] *
s;
1659 w[dz] += DDQ0[dz][dy][qx] * B[i][j];
1663 for (
int dz = 0; dz < D1D; dz++)
1665 DQQ0[dz][qy][qx] = u[dz];
1666 DQQ1[dz][qy][qx] = v[dz];
1667 DQQ2[dz][qy][qx] = w[dz];
1672 MFEM_FOREACH_THREAD(qy,y,Q1D)
1674 MFEM_FOREACH_THREAD(qx,x,Q1D)
1676 double u[Q1D], v[Q1D], w[Q1D];
1678 for (
int qz = 0; qz < Q1D; qz++) { u[qz] = v[qz] = w[qz] = 0.0; }
1680 for (
int dz = 0; dz < D1D; ++dz)
1683 for (
int qz = 0; qz < Q1D; qz++)
1685 const int i = qi(qz,dz,Q1D);
1686 const int j = dj(qz,dz,D1D);
1687 const int k = qk(qz,dz,Q1D);
1688 const int l = dl(qz,dz,D1D);
1689 const double s = sign(qz,dz);
1690 u[qz] += DQQ0[dz][qy][qx] * B[i][j];
1691 v[qz] += DQQ1[dz][qy][qx] * B[i][j];
1692 w[qz] += DQQ2[dz][qy][qx] * G[k][l] *
s;
1696 for (
int qz = 0; qz < Q1D; qz++)
1698 const double O11 = d(qx,qy,qz,0,e);
1699 const double O12 = d(qx,qy,qz,1,e);
1700 const double O13 = d(qx,qy,qz,2,e);
1701 const double O21 = symmetric ? O12 : d(qx,qy,qz,3,e);
1702 const double O22 = symmetric ? d(qx,qy,qz,3,e) : d(qx,qy,qz,4,e);
1703 const double O23 = symmetric ? d(qx,qy,qz,4,e) : d(qx,qy,qz,5,e);
1704 const double O31 = symmetric ? O13 : d(qx,qy,qz,6,e);
1705 const double O32 = symmetric ? O23 : d(qx,qy,qz,7,e);
1706 const double O33 = symmetric ? d(qx,qy,qz,5,e) : d(qx,qy,qz,8,e);
1707 const double gX = u[qz];
1708 const double gY = v[qz];
1709 const double gZ = w[qz];
1710 QQQ0[qz][qy][qx] = (O11*gX) + (O12*gY) + (O13*gZ);
1711 QQQ1[qz][qy][qx] = (O21*gX) + (O22*gY) + (O23*gZ);
1712 QQQ2[qz][qy][qx] = (O31*gX) + (O32*gY) + (O33*gZ);
1717 MFEM_FOREACH_THREAD(d,y,D1D)
1719 MFEM_FOREACH_THREAD(q,x,Q1D)
1721 const int i = qi(q,d,Q1D);
1722 const int j = dj(q,d,D1D);
1723 const int k = qk(q,d,Q1D);
1724 const int l = dl(q,d,D1D);
1726 Gt[l][k] = g(q,d) * sign(q,d);
1730 MFEM_FOREACH_THREAD(qy,y,Q1D)
1732 MFEM_FOREACH_THREAD(dx,x,D1D)
1734 double u[Q1D], v[Q1D], w[Q1D];
1736 for (
int qz = 0; qz < Q1D; ++qz) { u[qz] = v[qz] = w[qz] = 0.0; }
1738 for (
int qx = 0; qx < Q1D; ++qx)
1740 const int i = qi(qx,dx,Q1D);
1741 const int j = dj(qx,dx,D1D);
1742 const int k = qk(qx,dx,Q1D);
1743 const int l = dl(qx,dx,D1D);
1744 const double s = sign(qx,dx);
1746 for (
int qz = 0; qz < Q1D; ++qz)
1748 u[qz] += QQQ0[qz][qy][qx] * Gt[l][k] *
s;
1749 v[qz] += QQQ1[qz][qy][qx] * Bt[j][i];
1750 w[qz] += QQQ2[qz][qy][qx] * Bt[j][i];
1754 for (
int qz = 0; qz < Q1D; ++qz)
1756 QQD0[qz][qy][dx] = u[qz];
1757 QQD1[qz][qy][dx] = v[qz];
1758 QQD2[qz][qy][dx] = w[qz];
1763 MFEM_FOREACH_THREAD(dy,y,D1D)
1765 MFEM_FOREACH_THREAD(dx,x,D1D)
1767 double u[Q1D], v[Q1D], w[Q1D];
1769 for (
int qz = 0; qz < Q1D; ++qz) { u[qz] = v[qz] = w[qz] = 0.0; }
1771 for (
int qy = 0; qy < Q1D; ++qy)
1773 const int i = qi(qy,dy,Q1D);
1774 const int j = dj(qy,dy,D1D);
1775 const int k = qk(qy,dy,Q1D);
1776 const int l = dl(qy,dy,D1D);
1777 const double s = sign(qy,dy);
1779 for (
int qz = 0; qz < Q1D; ++qz)
1781 u[qz] += QQD0[qz][qy][dx] * Bt[j][i];
1782 v[qz] += QQD1[qz][qy][dx] * Gt[l][k] *
s;
1783 w[qz] += QQD2[qz][qy][dx] * Bt[j][i];
1787 for (
int qz = 0; qz < Q1D; ++qz)
1789 QDD0[qz][dy][dx] = u[qz];
1790 QDD1[qz][dy][dx] = v[qz];
1791 QDD2[qz][dy][dx] = w[qz];
1796 MFEM_FOREACH_THREAD(dy,y,D1D)
1798 MFEM_FOREACH_THREAD(dx,x,D1D)
1800 double u[D1D], v[D1D], w[D1D];
1802 for (
int dz = 0; dz < D1D; ++dz) { u[dz] = v[dz] = w[dz] = 0.0; }
1804 for (
int qz = 0; qz < Q1D; ++qz)
1807 for (
int dz = 0; dz < D1D; ++dz)
1809 const int i = qi(qz,dz,Q1D);
1810 const int j = dj(qz,dz,D1D);
1811 const int k = qk(qz,dz,Q1D);
1812 const int l = dl(qz,dz,D1D);
1813 const double s = sign(qz,dz);
1814 u[dz] += QDD0[qz][dy][dx] * Bt[j][i];
1815 v[dz] += QDD1[qz][dy][dx] * Bt[j][i];
1816 w[dz] += QDD2[qz][dy][dx] * Gt[l][k] *
s;
1820 for (
int dz = 0; dz < D1D; ++dz)
1822 y(dx,dy,dz,e) += (u[dz] + v[dz] + w[dz]);
1829 static void PADiffusionApply(
const int dim,
1834 const Array<double> &B,
1835 const Array<double> &G,
1836 const Array<double> &Bt,
1837 const Array<double> &Gt,
1842 #ifdef MFEM_USE_OCCA
1847 OccaPADiffusionApply2D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1852 OccaPADiffusionApply3D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1855 MFEM_ABORT(
"OCCA PADiffusionApply unknown kernel!");
1857 #endif // MFEM_USE_OCCA
1858 const int ID = (D1D << 4) | Q1D;
1864 case 0x22:
return SmemPADiffusionApply2D<2,2,16>(NE,symm,B,G,D,X,Y);
1865 case 0x33:
return SmemPADiffusionApply2D<3,3,16>(NE,symm,B,G,D,X,Y);
1866 case 0x44:
return SmemPADiffusionApply2D<4,4,8>(NE,symm,B,G,D,X,Y);
1867 case 0x55:
return SmemPADiffusionApply2D<5,5,8>(NE,symm,B,G,D,X,Y);
1868 case 0x66:
return SmemPADiffusionApply2D<6,6,4>(NE,symm,B,G,D,X,Y);
1869 case 0x77:
return SmemPADiffusionApply2D<7,7,4>(NE,symm,B,G,D,X,Y);
1870 case 0x88:
return SmemPADiffusionApply2D<8,8,2>(NE,symm,B,G,D,X,Y);
1871 case 0x99:
return SmemPADiffusionApply2D<9,9,2>(NE,symm,B,G,D,X,Y);
1872 default:
return PADiffusionApply2D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1880 case 0x23:
return SmemPADiffusionApply3D<2,3>(NE,symm,B,G,D,X,Y);
1881 case 0x34:
return SmemPADiffusionApply3D<3,4>(NE,symm,B,G,D,X,Y);
1882 case 0x45:
return SmemPADiffusionApply3D<4,5>(NE,symm,B,G,D,X,Y);
1883 case 0x46:
return SmemPADiffusionApply3D<4,6>(NE,symm,B,G,D,X,Y);
1884 case 0x56:
return SmemPADiffusionApply3D<5,6>(NE,symm,B,G,D,X,Y);
1885 case 0x58:
return SmemPADiffusionApply3D<5,8>(NE,symm,B,G,D,X,Y);
1886 case 0x67:
return SmemPADiffusionApply3D<6,7>(NE,symm,B,G,D,X,Y);
1887 case 0x78:
return SmemPADiffusionApply3D<7,8>(NE,symm,B,G,D,X,Y);
1888 case 0x89:
return SmemPADiffusionApply3D<8,9>(NE,symm,B,G,D,X,Y);
1889 default:
return PADiffusionApply3D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1892 MFEM_ABORT(
"Unknown kernel.");
1898 if (DeviceCanUseCeed())
1900 ceedOp->AddMult(x, y);
1904 PADiffusionApply(dim, dofs1D, quad1D, ne, symmetric,
1905 maps->B, maps->G, maps->Bt, maps->Gt,
1910 void DiffusionIntegrator::AddMultTransposePA(
const Vector &x,
Vector &y)
const
1918 MFEM_ABORT(
"DiffusionIntegrator::AddMultTransposePA only implemented in "
1919 "the symmetric case.")
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
int Size() const
Return the logical size of the array.
int GetDim() const
Returns the reference space dimension for the finite element.
Class for an integration rule - an Array of IntegrationPoint.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
A coefficient that is constant across space and time.
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
void SetSize(int s)
Change the size of the DenseSymmetricMatrix to s x s.
void SetSize(int s)
Resize the vector to size s.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Data type dense matrix using column-major storage.
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
int GetNE() const
Returns number of elements in the mesh.
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
Mesh * GetMesh() const
Returns the mesh.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
void PADiffusionSetup2D< 3 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
void PADiffusionSetup2D< 2 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
int SpaceDimension() const
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
double p(const Vector &x, double t)
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
MemoryType
Memory types supported by MFEM.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
void PADiffusionSetup3D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
std::pair< int, int > occa_id_t
double u(const Vector &xvec)
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Class representing a function through its values (scalar or vector) at quadrature points...
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).