12 #include "../general/forall.hpp"
15 #include "ceed/diffusion.hpp"
26 static void OccaPADiffusionSetup2D(
const int D1D,
29 const Array<double> &W,
34 occa::properties props;
35 props[
"defines/D1D"] = D1D;
36 props[
"defines/Q1D"] = Q1D;
41 const bool const_c = C.Size() == 1;
42 const occa_id_t id = std::make_pair(D1D,Q1D);
44 if (OccaDiffSetup2D_ker.find(
id) == OccaDiffSetup2D_ker.end())
46 const occa::kernel DiffusionSetup2D =
48 "DiffusionSetup2D", props);
49 OccaDiffSetup2D_ker.emplace(
id, DiffusionSetup2D);
51 OccaDiffSetup2D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
54 static void OccaPADiffusionSetup3D(
const int D1D,
57 const Array<double> &W,
62 occa::properties props;
63 props[
"defines/D1D"] = D1D;
64 props[
"defines/Q1D"] = Q1D;
69 const bool const_c = C.Size() == 1;
70 const occa_id_t id = std::make_pair(D1D,Q1D);
72 if (OccaDiffSetup3D_ker.find(
id) == OccaDiffSetup3D_ker.end())
74 const occa::kernel DiffusionSetup3D =
76 "DiffusionSetup3D", props);
77 OccaDiffSetup3D_ker.emplace(
id, DiffusionSetup3D);
79 OccaDiffSetup3D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
81 #endif // MFEM_USE_OCCA
92 const bool symmetric = (coeffDim != 4);
93 const bool const_c = c.Size() == 1;
94 MFEM_VERIFY(coeffDim < 3 ||
95 !const_c,
"Constant matrix coefficient not supported");
96 const auto W =
Reshape(w.Read(), Q1D,Q1D);
97 const auto J =
Reshape(j.Read(), Q1D,Q1D,2,2,NE);
98 const auto C = const_c ?
Reshape(c.Read(), 1,1,1,1) :
99 Reshape(c.Read(), coeffDim,Q1D,Q1D,NE);
100 auto D =
Reshape(d.Write(), Q1D,Q1D, symmetric ? 3 : 4, NE);
101 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
103 MFEM_FOREACH_THREAD(qx,x,Q1D)
105 MFEM_FOREACH_THREAD(qy,y,Q1D)
107 const double J11 = J(qx,qy,0,0,e);
108 const double J21 = J(qx,qy,1,0,e);
109 const double J12 = J(qx,qy,0,1,e);
110 const double J22 = J(qx,qy,1,1,e);
111 const double w_detJ = W(qx,qy) / ((J11*J22)-(J21*J12));
112 if (coeffDim == 3 || coeffDim == 4)
115 const double M11 = C(0,qx,qy,e);
116 const double M12 = C(1,qx,qy,e);
117 const double M21 = symmetric ? M12 : C(2,qx,qy,e);
118 const double M22 = symmetric ? C(2,qx,qy,e) : C(3,qx,qy,e);
119 const double R11 = M11*J22 - M12*J12;
120 const double R21 = M21*J22 - M22*J12;
121 const double R12 = -M11*J21 + M12*J11;
122 const double R22 = -M21*J21 + M22*J11;
125 D(qx,qy,0,e) = w_detJ * ( J22*R11 - J12*R21);
126 D(qx,qy,1,e) = w_detJ * (-J21*R11 + J11*R21);
127 D(qx,qy,2,e) = w_detJ * (symmetric ? (-J21*R12 + J11*R22) :
128 (J22*R12 - J12*R22));
131 D(qx,qy,3,e) = w_detJ * (-J21*R12 + J11*R22);
136 const double C1 = const_c ? C(0,0,0,0) : C(0,qx,qy,e);
137 const double C2 = const_c ? C(0,0,0,0) :
138 (coeffDim == 2 ? C(1,qx,qy,e) : C(0,qx,qy,e));
140 D(qx,qy,0,e) = w_detJ * (C2*J12*J12 + C1*J22*J22);
141 D(qx,qy,1,e) = -w_detJ * (C2*J12*J11 + C1*J22*J21);
142 D(qx,qy,2,e) = w_detJ * (C2*J11*J11 + C1*J21*J21);
159 MFEM_VERIFY(coeffDim == 1,
"Matrix and vector coefficients not supported");
160 constexpr
int DIM = 2;
161 constexpr
int SDIM = 3;
162 const bool const_c = c.Size() == 1;
163 const auto W =
Reshape(w.Read(), Q1D,Q1D);
165 const auto C = const_c ?
Reshape(c.Read(), 1,1,1) :
167 auto D =
Reshape(d.Write(), Q1D,Q1D, 3, NE);
168 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
170 MFEM_FOREACH_THREAD(qx,x,Q1D)
172 MFEM_FOREACH_THREAD(qy,y,Q1D)
174 const double wq = W(qx,qy);
175 const double J11 = J(qx,qy,0,0,e);
176 const double J21 = J(qx,qy,1,0,e);
177 const double J31 = J(qx,qy,2,0,e);
178 const double J12 = J(qx,qy,0,1,e);
179 const double J22 = J(qx,qy,1,1,e);
180 const double J32 = J(qx,qy,2,1,e);
181 const double E = J11*J11 + J21*J21 + J31*J31;
182 const double G = J12*J12 + J22*J22 + J32*J32;
183 const double F = J11*J12 + J21*J22 + J31*J32;
184 const double iw = 1.0 /
sqrt(E*G - F*F);
185 const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
186 const double alpha = wq * coeff * iw;
187 D(qx,qy,0,e) = alpha * G;
188 D(qx,qy,1,e) = -alpha * F;
189 D(qx,qy,2,e) = alpha * E;
204 const bool symmetric = (coeffDim != 9);
205 const bool const_c = c.
Size() == 1;
206 MFEM_VERIFY(coeffDim < 6 ||
207 !const_c,
"Constant matrix coefficient not supported");
209 const auto J =
Reshape(j.
Read(), Q1D,Q1D,Q1D,3,3,NE);
210 const auto C = const_c ?
Reshape(c.
Read(), 1,1,1,1,1) :
212 auto D =
Reshape(d.
Write(), Q1D,Q1D,Q1D, symmetric ? 6 : 9, NE);
213 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
215 MFEM_FOREACH_THREAD(qx,x,Q1D)
217 MFEM_FOREACH_THREAD(qy,y,Q1D)
219 MFEM_FOREACH_THREAD(qz,z,Q1D)
221 const double J11 = J(qx,qy,qz,0,0,e);
222 const double J21 = J(qx,qy,qz,1,0,e);
223 const double J31 = J(qx,qy,qz,2,0,e);
224 const double J12 = J(qx,qy,qz,0,1,e);
225 const double J22 = J(qx,qy,qz,1,1,e);
226 const double J32 = J(qx,qy,qz,2,1,e);
227 const double J13 = J(qx,qy,qz,0,2,e);
228 const double J23 = J(qx,qy,qz,1,2,e);
229 const double J33 = J(qx,qy,qz,2,2,e);
230 const double detJ = J11 * (J22 * J33 - J32 * J23) -
231 J21 * (J12 * J33 - J32 * J13) +
232 J31 * (J12 * J23 - J22 * J13);
233 const double w_detJ = W(qx,qy,qz) / detJ;
235 const double A11 = (J22 * J33) - (J23 * J32);
236 const double A12 = (J32 * J13) - (J12 * J33);
237 const double A13 = (J12 * J23) - (J22 * J13);
238 const double A21 = (J31 * J23) - (J21 * J33);
239 const double A22 = (J11 * J33) - (J13 * J31);
240 const double A23 = (J21 * J13) - (J11 * J23);
241 const double A31 = (J21 * J32) - (J31 * J22);
242 const double A32 = (J31 * J12) - (J11 * J32);
243 const double A33 = (J11 * J22) - (J12 * J21);
245 if (coeffDim == 6 || coeffDim == 9)
248 const double M11 = C(0, qx,qy,qz, e);
249 const double M12 = C(1, qx,qy,qz, e);
250 const double M13 = C(2, qx,qy,qz, e);
251 const double M21 = (!symmetric) ? C(3, qx,qy,qz, e) : M12;
252 const double M22 = (!symmetric) ? C(4, qx,qy,qz, e) : C(3, qx,qy,qz, e);
253 const double M23 = (!symmetric) ? C(5, qx,qy,qz, e) : C(4, qx,qy,qz, e);
254 const double M31 = (!symmetric) ? C(6, qx,qy,qz, e) : M13;
255 const double M32 = (!symmetric) ? C(7, qx,qy,qz, e) : M23;
256 const double M33 = (!symmetric) ? C(8, qx,qy,qz, e) : C(5, qx,qy,qz, e);
258 const double R11 = M11*A11 + M12*A12 + M13*A13;
259 const double R12 = M11*A21 + M12*A22 + M13*A23;
260 const double R13 = M11*A31 + M12*A32 + M13*A33;
261 const double R21 = M21*A11 + M22*A12 + M23*A13;
262 const double R22 = M21*A21 + M22*A22 + M23*A23;
263 const double R23 = M21*A31 + M22*A32 + M23*A33;
264 const double R31 = M31*A11 + M32*A12 + M33*A13;
265 const double R32 = M31*A21 + M32*A22 + M33*A23;
266 const double R33 = M31*A31 + M32*A32 + M33*A33;
269 D(qx,qy,qz,0,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31);
270 const double D12 = w_detJ * (A11*R12 + A12*R22 + A13*R32);
271 D(qx,qy,qz,1,e) = D12;
272 D(qx,qy,qz,2,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33);
274 const double D21 = w_detJ * (A21*R11 + A22*R21 + A23*R31);
275 const double D22 = w_detJ * (A21*R12 + A22*R22 + A23*R32);
276 const double D23 = w_detJ * (A21*R13 + A22*R23 + A23*R33);
278 const double D33 = w_detJ * (A31*R13 + A32*R23 + A33*R33);
280 D(qx,qy,qz,3,e) = symmetric ? D22 : D21;
281 D(qx,qy,qz,4,e) = symmetric ? D23 : D22;
282 D(qx,qy,qz,5,e) = symmetric ? D33 : D23;
286 D(qx,qy,qz,6,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31);
287 D(qx,qy,qz,7,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32);
288 D(qx,qy,qz,8,e) = D33;
293 const double C1 = const_c ? C(0,0,0,0,0) : C(0,qx,qy,qz,e);
294 const double C2 = const_c ? C(0,0,0,0,0) :
295 (coeffDim == 3 ? C(1,qx,qy,qz,e) : C(0,qx,qy,qz,e));
296 const double C3 = const_c ? C(0,0,0,0,0) :
297 (coeffDim == 3 ? C(2,qx,qy,qz,e) : C(0,qx,qy,qz,e));
300 D(qx,qy,qz,0,e) = w_detJ * (C1*A11*A11 + C2*A12*A12 + C3*A13*A13);
301 D(qx,qy,qz,1,e) = w_detJ * (C1*A11*A21 + C2*A12*A22 + C3*A13*A23);
302 D(qx,qy,qz,2,e) = w_detJ * (C1*A11*A31 + C2*A12*A32 + C3*A13*A33);
303 D(qx,qy,qz,3,e) = w_detJ * (C1*A21*A21 + C2*A22*A22 + C3*A23*A23);
304 D(qx,qy,qz,4,e) = w_detJ * (C1*A21*A31 + C2*A22*A32 + C3*A23*A33);
305 D(qx,qy,qz,5,e) = w_detJ * (C1*A31*A31 + C2*A32*A32 + C3*A33*A33);
313 static void PADiffusionSetup(
const int dim,
319 const Array<double> &W,
324 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADiffusionSetup"); }
330 OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
334 MFEM_CONTRACT_VAR(D1D);
335 #endif // MFEM_USE_OCCA
344 OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
347 #endif // MFEM_USE_OCCA
354 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
355 Device::GetDeviceMemoryType() : pa_mt;
359 if (mesh->
GetNE() == 0) {
return; }
362 if (DeviceCanUseCeed())
365 MFEM_VERIFY(!VQ && !MQ && !SMQ,
366 "Only scalar coefficient supported for DiffusionIntegrator"
368 ceedOp =
new ceed::PADiffusionIntegrator(fes, *ir, Q);
371 const int dims = el.
GetDim();
372 const int symmDims = (dims * (dims + 1)) / 2;
383 const int MQfullDim = MQ ? MQ->GetHeight() * MQ->GetWidth() : 0;
387 MFEM_VERIFY(MQ->GetHeight() == dim && MQ->GetWidth() ==
dim,
"");
389 coeffDim = MQfullDim;
391 coeff.
SetSize(MQfullDim * nq * ne);
397 for (
int e=0; e<ne; ++e)
400 for (
int p=0;
p<nq; ++
p)
403 for (
int i=0; i<
dim; ++i)
404 for (
int j=0; j<
dim; ++j)
406 C(j+(i*dim),
p, e) = GM(i,j);
413 MFEM_VERIFY(SMQ->GetSize() ==
dim,
"");
415 coeff.
SetSize(symmDims * nq * ne);
422 for (
int e=0; e<ne; ++e)
425 for (
int p=0;
p<nq; ++
p)
429 for (
int i=0; i<
dim; ++i)
430 for (
int j=i; j<
dim; ++j, ++cnt)
432 C(cnt,
p, e) = SM(i,j);
439 MFEM_VERIFY(VQ->GetVDim() ==
dim,
"");
440 coeffDim = VQ->GetVDim();
441 coeff.
SetSize(coeffDim * nq * ne);
444 for (
int e=0; e<ne; ++e)
447 for (
int p=0;
p<nq; ++
p)
450 for (
int i=0; i<coeffDim; ++i)
457 else if (Q ==
nullptr)
465 coeff(0) = cQ->constant;
468 dynamic_cast<QuadratureFunctionCoefficient*>(Q))
471 MFEM_VERIFY(qFun.
Size() == ne*nq,
472 "Incompatible QuadratureFunction dimension \n");
475 "IntegrationRule used within integrator and in"
476 " QuadratureFunction appear to be different");
478 coeff.
MakeRef(const_cast<QuadratureFunction &>(qFun),0);
484 for (
int e = 0; e < ne; ++e)
487 for (
int q = 0; q < nq; ++q)
489 C(q,e) = Q->Eval(T, ir->
IntPoint(q));
493 pa_data.SetSize((symmetric ? symmDims : MQfullDim) * nq * ne, mt);
494 PADiffusionSetup(dim, sdim, dofs1D, quad1D, coeffDim, ne, ir->
GetWeights(),
495 geom->J, coeff, pa_data);
498 template<
int T_D1D = 0,
int T_Q1D = 0>
499 static void PADiffusionDiagonal2D(
const int NE,
500 const bool symmetric,
508 const int D1D = T_D1D ? T_D1D : d1d;
509 const int Q1D = T_Q1D ? T_Q1D : q1d;
510 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
511 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
516 auto D =
Reshape(d.
Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
520 const int D1D = T_D1D ? T_D1D : d1d;
521 const int Q1D = T_Q1D ? T_Q1D : q1d;
522 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
523 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
525 double QD0[MQ1][MD1];
526 double QD1[MQ1][MD1];
527 double QD2[MQ1][MD1];
528 for (
int qx = 0; qx < Q1D; ++qx)
530 for (
int dy = 0; dy < D1D; ++dy)
535 for (
int qy = 0; qy < Q1D; ++qy)
537 const int q = qx + qy * Q1D;
538 const double D00 = D(q,0,e);
539 const double D10 = D(q,1,e);
540 const double D01 = symmetric ? D10 : D(q,2,e);
541 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
542 QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D00;
543 QD1[qx][dy] += B(qy, dy) * G(qy, dy) * (D01 + D10);
544 QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D11;
548 for (
int dy = 0; dy < D1D; ++dy)
550 for (
int dx = 0; dx < D1D; ++dx)
552 for (
int qx = 0; qx < Q1D; ++qx)
554 Y(dx,dy,e) += G(qx, dx) * G(qx, dx) * QD0[qx][dy];
555 Y(dx,dy,e) += G(qx, dx) * B(qx, dx) * QD1[qx][dy];
556 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD2[qx][dy];
564 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
565 static void SmemPADiffusionDiagonal2D(
const int NE,
566 const bool symmetric,
567 const Array<double> &b_,
568 const Array<double> &g_,
574 const int D1D = T_D1D ? T_D1D : d1d;
575 const int Q1D = T_Q1D ? T_Q1D : q1d;
576 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
577 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
578 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
579 MFEM_VERIFY(D1D <= MD1,
"");
580 MFEM_VERIFY(Q1D <= MQ1,
"");
581 auto b =
Reshape(b_.Read(), Q1D, D1D);
582 auto g =
Reshape(g_.Read(), Q1D, D1D);
583 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
584 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
585 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
587 const int tidz = MFEM_THREAD_ID(z);
588 const int D1D = T_D1D ? T_D1D : d1d;
589 const int Q1D = T_Q1D ? T_Q1D : q1d;
590 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
591 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
592 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
593 MFEM_SHARED
double BG[2][MQ1*MD1];
594 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
595 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
596 MFEM_SHARED
double QD[3][NBZ][MD1][MQ1];
597 double (*QD0)[MD1] = (double (*)[MD1])(QD[0] + tidz);
598 double (*QD1)[MD1] = (double (*)[MD1])(QD[1] + tidz);
599 double (*QD2)[MD1] = (double (*)[MD1])(QD[2] + tidz);
602 MFEM_FOREACH_THREAD(d,y,D1D)
604 MFEM_FOREACH_THREAD(q,x,Q1D)
612 MFEM_FOREACH_THREAD(qx,x,Q1D)
614 MFEM_FOREACH_THREAD(dy,y,D1D)
619 for (
int qy = 0; qy < Q1D; ++qy)
621 const int q = qx + qy * Q1D;
622 const double D00 = D(q,0,e);
623 const double D10 = D(q,1,e);
624 const double D01 = symmetric ? D10 : D(q,2,e);
625 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
626 const double By = B[qy][dy];
627 const double Gy = G[qy][dy];
628 const double BBy = By * By;
629 const double BGy = By * Gy;
630 const double GGy = Gy * Gy;
631 QD0[qx][dy] += BBy * D00;
632 QD1[qx][dy] += BGy * (D01 + D10);
633 QD2[qx][dy] += GGy * D11;
638 MFEM_FOREACH_THREAD(dy,y,D1D)
640 MFEM_FOREACH_THREAD(dx,x,D1D)
642 for (
int qx = 0; qx < Q1D; ++qx)
644 const double Bx = B[qx][dx];
645 const double Gx = G[qx][dx];
646 const double BBx = Bx * Bx;
647 const double BGx = Bx * Gx;
648 const double GGx = Gx * Gx;
649 Y(dx,dy,e) += GGx * QD0[qx][dy];
650 Y(dx,dy,e) += BGx * QD1[qx][dy];
651 Y(dx,dy,e) += BBx * QD2[qx][dy];
658 template<
int T_D1D = 0,
int T_Q1D = 0>
659 static void PADiffusionDiagonal3D(
const int NE,
660 const bool symmetric,
661 const Array<double> &b,
662 const Array<double> &g,
668 constexpr
int DIM = 3;
669 const int D1D = T_D1D ? T_D1D : d1d;
670 const int Q1D = T_Q1D ? T_Q1D : q1d;
671 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
672 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
673 MFEM_VERIFY(D1D <= MD1,
"");
674 MFEM_VERIFY(Q1D <= MQ1,
"");
675 auto B =
Reshape(b.Read(), Q1D, D1D);
676 auto G =
Reshape(g.Read(), Q1D, D1D);
677 auto Q =
Reshape(d.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
678 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
681 const int D1D = T_D1D ? T_D1D : d1d;
682 const int Q1D = T_Q1D ? T_Q1D : q1d;
683 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
684 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
685 double QQD[MQ1][MQ1][MD1];
686 double QDD[MQ1][MD1][MD1];
687 for (
int i = 0; i <
DIM; ++i)
689 for (
int j = 0; j <
DIM; ++j)
692 for (
int qx = 0; qx < Q1D; ++qx)
694 for (
int qy = 0; qy < Q1D; ++qy)
696 for (
int dz = 0; dz < D1D; ++dz)
698 QQD[qx][qy][dz] = 0.0;
699 for (
int qz = 0; qz < Q1D; ++qz)
701 const int q = qx + (qy + qz * Q1D) * Q1D;
702 const int ksym = j >= i ?
703 3 - (3-i)*(2-i)/2 + j:
704 3 - (3-j)*(2-j)/2 + i;
705 const int k = symmetric ? ksym : (i*
DIM) + j;
706 const double O = Q(q,k,e);
707 const double Bz = B(qz,dz);
708 const double Gz = G(qz,dz);
709 const double L = i==2 ? Gz : Bz;
710 const double R = j==2 ? Gz : Bz;
711 QQD[qx][qy][dz] += L * O * R;
717 for (
int qx = 0; qx < Q1D; ++qx)
719 for (
int dz = 0; dz < D1D; ++dz)
721 for (
int dy = 0; dy < D1D; ++dy)
723 QDD[qx][dy][dz] = 0.0;
724 for (
int qy = 0; qy < Q1D; ++qy)
726 const double By = B(qy,dy);
727 const double Gy = G(qy,dy);
728 const double L = i==1 ? Gy : By;
729 const double R = j==1 ? Gy : By;
730 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
736 for (
int dz = 0; dz < D1D; ++dz)
738 for (
int dy = 0; dy < D1D; ++dy)
740 for (
int dx = 0; dx < D1D; ++dx)
742 for (
int qx = 0; qx < Q1D; ++qx)
744 const double Bx = B(qx,dx);
745 const double Gx = G(qx,dx);
746 const double L = i==0 ? Gx : Bx;
747 const double R = j==0 ? Gx : Bx;
748 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
759 template<
int T_D1D = 0,
int T_Q1D = 0>
760 static void SmemPADiffusionDiagonal3D(
const int NE,
761 const bool symmetric,
762 const Array<double> &b_,
763 const Array<double> &g_,
769 constexpr
int DIM = 3;
770 const int D1D = T_D1D ? T_D1D : d1d;
771 const int Q1D = T_Q1D ? T_Q1D : q1d;
772 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
773 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
774 MFEM_VERIFY(D1D <= MD1,
"");
775 MFEM_VERIFY(Q1D <= MQ1,
"");
776 auto b =
Reshape(b_.Read(), Q1D, D1D);
777 auto g =
Reshape(g_.Read(), Q1D, D1D);
778 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
779 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
780 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
782 const int tidz = MFEM_THREAD_ID(z);
783 const int D1D = T_D1D ? T_D1D : d1d;
784 const int Q1D = T_Q1D ? T_Q1D : q1d;
785 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
786 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
787 MFEM_SHARED
double BG[2][MQ1*MD1];
788 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
789 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
790 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
791 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
794 MFEM_FOREACH_THREAD(d,y,D1D)
796 MFEM_FOREACH_THREAD(q,x,Q1D)
804 for (
int i = 0; i <
DIM; ++i)
806 for (
int j = 0; j <
DIM; ++j)
809 MFEM_FOREACH_THREAD(qx,x,Q1D)
811 MFEM_FOREACH_THREAD(qy,y,Q1D)
813 MFEM_FOREACH_THREAD(dz,z,D1D)
815 QQD[qx][qy][dz] = 0.0;
816 for (
int qz = 0; qz < Q1D; ++qz)
818 const int q = qx + (qy + qz * Q1D) * Q1D;
819 const int ksym = j >= i ?
820 3 - (3-i)*(2-i)/2 + j:
821 3 - (3-j)*(2-j)/2 + i;
822 const int k = symmetric ? ksym : (i*
DIM) + j;
823 const double O = D(q,k,e);
824 const double Bz = B[qz][dz];
825 const double Gz = G[qz][dz];
826 const double L = i==2 ? Gz : Bz;
827 const double R = j==2 ? Gz : Bz;
828 QQD[qx][qy][dz] += L * O * R;
835 MFEM_FOREACH_THREAD(qx,x,Q1D)
837 MFEM_FOREACH_THREAD(dz,z,D1D)
839 MFEM_FOREACH_THREAD(dy,y,D1D)
841 QDD[qx][dy][dz] = 0.0;
842 for (
int qy = 0; qy < Q1D; ++qy)
844 const double By = B[qy][dy];
845 const double Gy = G[qy][dy];
846 const double L = i==1 ? Gy : By;
847 const double R = j==1 ? Gy : By;
848 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
855 MFEM_FOREACH_THREAD(dz,z,D1D)
857 MFEM_FOREACH_THREAD(dy,y,D1D)
859 MFEM_FOREACH_THREAD(dx,x,D1D)
861 for (
int qx = 0; qx < Q1D; ++qx)
863 const double Bx = B[qx][dx];
864 const double Gx = G[qx][dx];
865 const double L = i==0 ? Gx : Bx;
866 const double R = j==0 ? Gx : Bx;
867 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
877 static void PADiffusionAssembleDiagonal(
const int dim,
882 const Array<double> &B,
883 const Array<double> &G,
889 switch ((D1D << 4 ) | Q1D)
891 case 0x22:
return SmemPADiffusionDiagonal2D<2,2,8>(NE,symm,B,G,D,Y);
892 case 0x33:
return SmemPADiffusionDiagonal2D<3,3,8>(NE,symm,B,G,D,Y);
893 case 0x44:
return SmemPADiffusionDiagonal2D<4,4,4>(NE,symm,B,G,D,Y);
894 case 0x55:
return SmemPADiffusionDiagonal2D<5,5,4>(NE,symm,B,G,D,Y);
895 case 0x66:
return SmemPADiffusionDiagonal2D<6,6,2>(NE,symm,B,G,D,Y);
896 case 0x77:
return SmemPADiffusionDiagonal2D<7,7,2>(NE,symm,B,G,D,Y);
897 case 0x88:
return SmemPADiffusionDiagonal2D<8,8,1>(NE,symm,B,G,D,Y);
898 case 0x99:
return SmemPADiffusionDiagonal2D<9,9,1>(NE,symm,B,G,D,Y);
899 default:
return PADiffusionDiagonal2D(NE,symm,B,G,D,Y,D1D,Q1D);
904 switch ((D1D << 4 ) | Q1D)
906 case 0x22:
return SmemPADiffusionDiagonal3D<2,2>(NE,symm,B,G,D,Y);
907 case 0x23:
return SmemPADiffusionDiagonal3D<2,3>(NE,symm,B,G,D,Y);
908 case 0x34:
return SmemPADiffusionDiagonal3D<3,4>(NE,symm,B,G,D,Y);
909 case 0x45:
return SmemPADiffusionDiagonal3D<4,5>(NE,symm,B,G,D,Y);
910 case 0x46:
return SmemPADiffusionDiagonal3D<4,6>(NE,symm,B,G,D,Y);
911 case 0x56:
return SmemPADiffusionDiagonal3D<5,6>(NE,symm,B,G,D,Y);
912 case 0x67:
return SmemPADiffusionDiagonal3D<6,7>(NE,symm,B,G,D,Y);
913 case 0x78:
return SmemPADiffusionDiagonal3D<7,8>(NE,symm,B,G,D,Y);
914 case 0x89:
return SmemPADiffusionDiagonal3D<8,9>(NE,symm,B,G,D,Y);
915 case 0x9A:
return SmemPADiffusionDiagonal3D<9,10>(NE,symm,B,G,D,Y);
916 default:
return PADiffusionDiagonal3D(NE,symm,B,G,D,Y,D1D,Q1D);
919 MFEM_ABORT(
"Unknown kernel.");
922 void DiffusionIntegrator::AssembleDiagonalPA(
Vector &diag)
924 if (DeviceCanUseCeed())
926 ceedOp->GetDiagonal(diag);
930 if (pa_data.Size()==0) { AssemblePA(*fespace); }
931 PADiffusionAssembleDiagonal(dim, dofs1D, quad1D, ne, symmetric,
932 maps->B, maps->G, pa_data, diag);
939 static void OccaPADiffusionApply2D(
const int D1D,
950 occa::properties props;
951 props[
"defines/D1D"] = D1D;
952 props[
"defines/Q1D"] = Q1D;
960 const occa_id_t id = std::make_pair(D1D,Q1D);
961 if (!Device::Allows(Backend::OCCA_CUDA))
964 if (OccaDiffApply2D_cpu.find(
id) == OccaDiffApply2D_cpu.end())
966 const occa::kernel DiffusionApply2D_CPU =
968 "DiffusionApply2D_CPU", props);
969 OccaDiffApply2D_cpu.emplace(
id, DiffusionApply2D_CPU);
971 OccaDiffApply2D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
976 if (OccaDiffApply2D_gpu.find(
id) == OccaDiffApply2D_gpu.end())
978 const occa::kernel DiffusionApply2D_GPU =
980 "DiffusionApply2D_GPU", props);
981 OccaDiffApply2D_gpu.emplace(
id, DiffusionApply2D_GPU);
983 OccaDiffApply2D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
988 static void OccaPADiffusionApply3D(
const int D1D,
991 const Array<double> &B,
992 const Array<double> &G,
993 const Array<double> &Bt,
994 const Array<double> &Gt,
999 occa::properties props;
1000 props[
"defines/D1D"] = D1D;
1001 props[
"defines/Q1D"] = Q1D;
1002 const occa::memory o_B =
OccaMemoryRead(B.GetMemory(), B.Size());
1003 const occa::memory o_G =
OccaMemoryRead(G.GetMemory(), G.Size());
1004 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
1005 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
1006 const occa::memory o_D =
OccaMemoryRead(D.GetMemory(), D.Size());
1007 const occa::memory o_X =
OccaMemoryRead(X.GetMemory(), X.Size());
1009 const occa_id_t id = std::make_pair(D1D,Q1D);
1010 if (!Device::Allows(Backend::OCCA_CUDA))
1013 if (OccaDiffApply3D_cpu.find(
id) == OccaDiffApply3D_cpu.end())
1015 const occa::kernel DiffusionApply3D_CPU =
1017 "DiffusionApply3D_CPU", props);
1018 OccaDiffApply3D_cpu.emplace(
id, DiffusionApply3D_CPU);
1020 OccaDiffApply3D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
1025 if (OccaDiffApply3D_gpu.find(
id) == OccaDiffApply3D_gpu.end())
1027 const occa::kernel DiffusionApply3D_GPU =
1029 "DiffusionApply3D_GPU", props);
1030 OccaDiffApply3D_gpu.emplace(
id, DiffusionApply3D_GPU);
1032 OccaDiffApply3D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
1035 #endif // MFEM_USE_OCCA
1038 template<
int T_D1D = 0,
int T_Q1D = 0>
1039 static void PADiffusionApply2D(
const int NE,
1040 const bool symmetric,
1041 const Array<double> &b_,
1042 const Array<double> &g_,
1043 const Array<double> &bt_,
1044 const Array<double> >_,
1051 const int D1D = T_D1D ? T_D1D : d1d;
1052 const int Q1D = T_Q1D ? T_Q1D : q1d;
1053 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
1054 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
1055 auto B =
Reshape(b_.Read(), Q1D, D1D);
1056 auto G =
Reshape(g_.Read(), Q1D, D1D);
1057 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
1058 auto Gt =
Reshape(gt_.Read(), D1D, Q1D);
1059 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
1060 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
1061 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
1064 const int D1D = T_D1D ? T_D1D : d1d;
1065 const int Q1D = T_Q1D ? T_Q1D : q1d;
1067 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
1068 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
1070 double grad[max_Q1D][max_Q1D][2];
1071 for (
int qy = 0; qy < Q1D; ++qy)
1073 for (
int qx = 0; qx < Q1D; ++qx)
1075 grad[qy][qx][0] = 0.0;
1076 grad[qy][qx][1] = 0.0;
1079 for (
int dy = 0; dy < D1D; ++dy)
1081 double gradX[max_Q1D][2];
1082 for (
int qx = 0; qx < Q1D; ++qx)
1087 for (
int dx = 0; dx < D1D; ++dx)
1089 const double s = X(dx,dy,e);
1090 for (
int qx = 0; qx < Q1D; ++qx)
1092 gradX[qx][0] += s * B(qx,dx);
1093 gradX[qx][1] += s * G(qx,dx);
1096 for (
int qy = 0; qy < Q1D; ++qy)
1098 const double wy = B(qy,dy);
1099 const double wDy = G(qy,dy);
1100 for (
int qx = 0; qx < Q1D; ++qx)
1102 grad[qy][qx][0] += gradX[qx][1] * wy;
1103 grad[qy][qx][1] += gradX[qx][0] * wDy;
1108 for (
int qy = 0; qy < Q1D; ++qy)
1110 for (
int qx = 0; qx < Q1D; ++qx)
1112 const int q = qx + qy * Q1D;
1114 const double O11 = D(q,0,e);
1115 const double O21 = D(q,1,e);
1116 const double O12 = symmetric ? O21 : D(q,2,e);
1117 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1119 const double gradX = grad[qy][qx][0];
1120 const double gradY = grad[qy][qx][1];
1122 grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
1123 grad[qy][qx][1] = (O21 * gradX) + (O22 * gradY);
1126 for (
int qy = 0; qy < Q1D; ++qy)
1128 double gradX[max_D1D][2];
1129 for (
int dx = 0; dx < D1D; ++dx)
1134 for (
int qx = 0; qx < Q1D; ++qx)
1136 const double gX = grad[qy][qx][0];
1137 const double gY = grad[qy][qx][1];
1138 for (
int dx = 0; dx < D1D; ++dx)
1140 const double wx = Bt(dx,qx);
1141 const double wDx = Gt(dx,qx);
1142 gradX[dx][0] += gX * wDx;
1143 gradX[dx][1] += gY * wx;
1146 for (
int dy = 0; dy < D1D; ++dy)
1148 const double wy = Bt(dy,qy);
1149 const double wDy = Gt(dy,qy);
1150 for (
int dx = 0; dx < D1D; ++dx)
1152 Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
1160 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
1161 static void SmemPADiffusionApply2D(
const int NE,
1162 const bool symmetric,
1163 const Array<double> &b_,
1164 const Array<double> &g_,
1171 const int D1D = T_D1D ? T_D1D : d1d;
1172 const int Q1D = T_Q1D ? T_Q1D : q1d;
1173 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1174 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1175 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1176 MFEM_VERIFY(D1D <= MD1,
"");
1177 MFEM_VERIFY(Q1D <= MQ1,
"");
1178 auto b =
Reshape(b_.Read(), Q1D, D1D);
1179 auto g =
Reshape(g_.Read(), Q1D, D1D);
1180 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
1181 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
1182 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
1183 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
1185 const int tidz = MFEM_THREAD_ID(z);
1186 const int D1D = T_D1D ? T_D1D : d1d;
1187 const int Q1D = T_Q1D ? T_Q1D : q1d;
1188 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1189 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1190 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1191 MFEM_SHARED
double sBG[2][MQ1*MD1];
1192 double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
1193 double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
1194 double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
1195 double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
1196 MFEM_SHARED
double Xz[NBZ][MD1][MD1];
1197 MFEM_SHARED
double GD[2][NBZ][MD1][MQ1];
1198 MFEM_SHARED
double GQ[2][NBZ][MD1][MQ1];
1199 double (*X)[MD1] = (double (*)[MD1])(Xz + tidz);
1200 double (*DQ0)[MD1] = (double (*)[MD1])(GD[0] + tidz);
1201 double (*DQ1)[MD1] = (double (*)[MD1])(GD[1] + tidz);
1202 double (*QQ0)[MD1] = (double (*)[MD1])(GQ[0] + tidz);
1203 double (*QQ1)[MD1] = (double (*)[MD1])(GQ[1] + tidz);
1204 MFEM_FOREACH_THREAD(dy,y,D1D)
1206 MFEM_FOREACH_THREAD(dx,x,D1D)
1208 X[dy][dx] = x(dx,dy,e);
1213 MFEM_FOREACH_THREAD(dy,y,D1D)
1215 MFEM_FOREACH_THREAD(q,x,Q1D)
1223 MFEM_FOREACH_THREAD(dy,y,D1D)
1225 MFEM_FOREACH_THREAD(qx,x,Q1D)
1229 for (
int dx = 0; dx < D1D; ++dx)
1231 const double coords = X[dy][dx];
1232 u += B[qx][dx] * coords;
1233 v += G[qx][dx] * coords;
1240 MFEM_FOREACH_THREAD(qy,y,Q1D)
1242 MFEM_FOREACH_THREAD(qx,x,Q1D)
1246 for (
int dy = 0; dy < D1D; ++dy)
1248 u += DQ1[dy][qx] * B[qy][dy];
1249 v += DQ0[dy][qx] * G[qy][dy];
1256 MFEM_FOREACH_THREAD(qy,y,Q1D)
1258 MFEM_FOREACH_THREAD(qx,x,Q1D)
1260 const int q = (qx + ((qy) * Q1D));
1261 const double O11 = D(q,0,e);
1262 const double O21 = D(q,1,e);
1263 const double O12 = symmetric ? O21 : D(q,2,e);
1264 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1265 const double gX = QQ0[qy][qx];
1266 const double gY = QQ1[qy][qx];
1267 QQ0[qy][qx] = (O11 * gX) + (O12 * gY);
1268 QQ1[qy][qx] = (O21 * gX) + (O22 * gY);
1274 MFEM_FOREACH_THREAD(dy,y,D1D)
1276 MFEM_FOREACH_THREAD(q,x,Q1D)
1278 Bt[dy][q] =
b(q,dy);
1279 Gt[dy][q] = g(q,dy);
1284 MFEM_FOREACH_THREAD(qy,y,Q1D)
1286 MFEM_FOREACH_THREAD(dx,x,D1D)
1290 for (
int qx = 0; qx < Q1D; ++qx)
1292 u += Gt[dx][qx] * QQ0[qy][qx];
1293 v += Bt[dx][qx] * QQ1[qy][qx];
1300 MFEM_FOREACH_THREAD(dy,y,D1D)
1302 MFEM_FOREACH_THREAD(dx,x,D1D)
1306 for (
int qy = 0; qy < Q1D; ++qy)
1308 u += DQ0[qy][dx] * Bt[dy][qy];
1309 v += DQ1[qy][dx] * Gt[dy][qy];
1311 Y(dx,dy,e) += (u + v);
1318 template<
int T_D1D = 0,
int T_Q1D = 0>
1319 static void PADiffusionApply3D(
const int NE,
1320 const bool symmetric,
1321 const Array<double> &b,
1322 const Array<double> &g,
1323 const Array<double> &bt,
1324 const Array<double> >,
1328 int d1d = 0,
int q1d = 0)
1330 const int D1D = T_D1D ? T_D1D : d1d;
1331 const int Q1D = T_Q1D ? T_Q1D : q1d;
1332 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
1333 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
1334 auto B =
Reshape(b.Read(), Q1D, D1D);
1335 auto G =
Reshape(g.Read(), Q1D, D1D);
1336 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
1337 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
1338 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
1339 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1340 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1343 const int D1D = T_D1D ? T_D1D : d1d;
1344 const int Q1D = T_Q1D ? T_Q1D : q1d;
1345 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
1346 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
1347 double grad[max_Q1D][max_Q1D][max_Q1D][3];
1348 for (
int qz = 0; qz < Q1D; ++qz)
1350 for (
int qy = 0; qy < Q1D; ++qy)
1352 for (
int qx = 0; qx < Q1D; ++qx)
1354 grad[qz][qy][qx][0] = 0.0;
1355 grad[qz][qy][qx][1] = 0.0;
1356 grad[qz][qy][qx][2] = 0.0;
1360 for (
int dz = 0; dz < D1D; ++dz)
1362 double gradXY[max_Q1D][max_Q1D][3];
1363 for (
int qy = 0; qy < Q1D; ++qy)
1365 for (
int qx = 0; qx < Q1D; ++qx)
1367 gradXY[qy][qx][0] = 0.0;
1368 gradXY[qy][qx][1] = 0.0;
1369 gradXY[qy][qx][2] = 0.0;
1372 for (
int dy = 0; dy < D1D; ++dy)
1374 double gradX[max_Q1D][2];
1375 for (
int qx = 0; qx < Q1D; ++qx)
1380 for (
int dx = 0; dx < D1D; ++dx)
1382 const double s = X(dx,dy,dz,e);
1383 for (
int qx = 0; qx < Q1D; ++qx)
1385 gradX[qx][0] += s * B(qx,dx);
1386 gradX[qx][1] += s * G(qx,dx);
1389 for (
int qy = 0; qy < Q1D; ++qy)
1391 const double wy = B(qy,dy);
1392 const double wDy = G(qy,dy);
1393 for (
int qx = 0; qx < Q1D; ++qx)
1395 const double wx = gradX[qx][0];
1396 const double wDx = gradX[qx][1];
1397 gradXY[qy][qx][0] += wDx * wy;
1398 gradXY[qy][qx][1] += wx * wDy;
1399 gradXY[qy][qx][2] += wx * wy;
1403 for (
int qz = 0; qz < Q1D; ++qz)
1405 const double wz = B(qz,dz);
1406 const double wDz = G(qz,dz);
1407 for (
int qy = 0; qy < Q1D; ++qy)
1409 for (
int qx = 0; qx < Q1D; ++qx)
1411 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
1412 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
1413 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
1419 for (
int qz = 0; qz < Q1D; ++qz)
1421 for (
int qy = 0; qy < Q1D; ++qy)
1423 for (
int qx = 0; qx < Q1D; ++qx)
1425 const int q = qx + (qy + qz * Q1D) * Q1D;
1426 const double O11 = D(q,0,e);
1427 const double O12 = D(q,1,e);
1428 const double O13 = D(q,2,e);
1429 const double O21 = symmetric ? O12 : D(q,3,e);
1430 const double O22 = symmetric ? D(q,3,e) : D(q,4,e);
1431 const double O23 = symmetric ? D(q,4,e) : D(q,5,e);
1432 const double O31 = symmetric ? O13 : D(q,6,e);
1433 const double O32 = symmetric ? O23 : D(q,7,e);
1434 const double O33 = symmetric ? D(q,5,e) : D(q,8,e);
1435 const double gradX = grad[qz][qy][qx][0];
1436 const double gradY = grad[qz][qy][qx][1];
1437 const double gradZ = grad[qz][qy][qx][2];
1438 grad[qz][qy][qx][0] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
1439 grad[qz][qy][qx][1] = (O21*gradX)+(O22*gradY)+(O23*gradZ);
1440 grad[qz][qy][qx][2] = (O31*gradX)+(O32*gradY)+(O33*gradZ);
1444 for (
int qz = 0; qz < Q1D; ++qz)
1446 double gradXY[max_D1D][max_D1D][3];
1447 for (
int dy = 0; dy < D1D; ++dy)
1449 for (
int dx = 0; dx < D1D; ++dx)
1451 gradXY[dy][dx][0] = 0;
1452 gradXY[dy][dx][1] = 0;
1453 gradXY[dy][dx][2] = 0;
1456 for (
int qy = 0; qy < Q1D; ++qy)
1458 double gradX[max_D1D][3];
1459 for (
int dx = 0; dx < D1D; ++dx)
1465 for (
int qx = 0; qx < Q1D; ++qx)
1467 const double gX = grad[qz][qy][qx][0];
1468 const double gY = grad[qz][qy][qx][1];
1469 const double gZ = grad[qz][qy][qx][2];
1470 for (
int dx = 0; dx < D1D; ++dx)
1472 const double wx = Bt(dx,qx);
1473 const double wDx = Gt(dx,qx);
1474 gradX[dx][0] += gX * wDx;
1475 gradX[dx][1] += gY * wx;
1476 gradX[dx][2] += gZ * wx;
1479 for (
int dy = 0; dy < D1D; ++dy)
1481 const double wy = Bt(dy,qy);
1482 const double wDy = Gt(dy,qy);
1483 for (
int dx = 0; dx < D1D; ++dx)
1485 gradXY[dy][dx][0] += gradX[dx][0] * wy;
1486 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
1487 gradXY[dy][dx][2] += gradX[dx][2] * wy;
1491 for (
int dz = 0; dz < D1D; ++dz)
1493 const double wz = Bt(dz,qz);
1494 const double wDz = Gt(dz,qz);
1495 for (
int dy = 0; dy < D1D; ++dy)
1497 for (
int dx = 0; dx < D1D; ++dx)
1500 ((gradXY[dy][dx][0] * wz) +
1501 (gradXY[dy][dx][1] * wz) +
1502 (gradXY[dy][dx][2] * wDz));
1510 template<
int T_D1D = 0,
int T_Q1D = 0>
1511 static void SmemPADiffusionApply3D(
const int NE,
1512 const bool symmetric,
1513 const Array<double> &b_,
1514 const Array<double> &g_,
1521 const int D1D = T_D1D ? T_D1D : d1d;
1522 const int Q1D = T_Q1D ? T_Q1D : q1d;
1523 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
1524 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
1525 MFEM_VERIFY(D1D <= M1D,
"");
1526 MFEM_VERIFY(Q1D <= M1Q,
"");
1527 auto b =
Reshape(b_.Read(), Q1D, D1D);
1528 auto g =
Reshape(g_.Read(), Q1D, D1D);
1529 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1530 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1531 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1532 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1534 const int D1D = T_D1D ? T_D1D : d1d;
1535 const int Q1D = T_Q1D ? T_Q1D : q1d;
1536 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1537 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1538 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
1539 MFEM_SHARED
double sBG[2][MQ1*MD1];
1540 double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
1541 double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
1542 double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
1543 double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
1544 MFEM_SHARED
double sm0[3][MDQ*MDQ*MDQ];
1545 MFEM_SHARED
double sm1[3][MDQ*MDQ*MDQ];
1546 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1547 double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
1548 double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
1549 double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
1550 double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
1551 double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
1552 double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
1553 double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
1554 double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
1555 double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
1556 double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
1557 double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
1558 double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
1559 double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
1560 double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1561 MFEM_FOREACH_THREAD(dz,z,D1D)
1563 MFEM_FOREACH_THREAD(dy,y,D1D)
1565 MFEM_FOREACH_THREAD(dx,x,D1D)
1567 X[dz][dy][dx] = x(dx,dy,dz,e);
1571 if (MFEM_THREAD_ID(z) == 0)
1573 MFEM_FOREACH_THREAD(dy,y,D1D)
1575 MFEM_FOREACH_THREAD(qx,x,Q1D)
1577 B[qx][dy] =
b(qx,dy);
1578 G[qx][dy] = g(qx,dy);
1583 MFEM_FOREACH_THREAD(dz,z,D1D)
1585 MFEM_FOREACH_THREAD(dy,y,D1D)
1587 MFEM_FOREACH_THREAD(qx,x,Q1D)
1589 double u = 0.0, v = 0.0;
1591 for (
int dx = 0; dx < D1D; ++dx)
1593 const double coords = X[dz][dy][dx];
1594 u += coords * B[qx][dx];
1595 v += coords * G[qx][dx];
1597 DDQ0[dz][dy][qx] =
u;
1598 DDQ1[dz][dy][qx] = v;
1603 MFEM_FOREACH_THREAD(dz,z,D1D)
1605 MFEM_FOREACH_THREAD(qy,y,Q1D)
1607 MFEM_FOREACH_THREAD(qx,x,Q1D)
1609 double u = 0.0, v = 0.0, w = 0.0;
1611 for (
int dy = 0; dy < D1D; ++dy)
1613 u += DDQ1[dz][dy][qx] * B[qy][dy];
1614 v += DDQ0[dz][dy][qx] * G[qy][dy];
1615 w += DDQ0[dz][dy][qx] * B[qy][dy];
1617 DQQ0[dz][qy][qx] =
u;
1618 DQQ1[dz][qy][qx] = v;
1619 DQQ2[dz][qy][qx] = w;
1624 MFEM_FOREACH_THREAD(qz,z,Q1D)
1626 MFEM_FOREACH_THREAD(qy,y,Q1D)
1628 MFEM_FOREACH_THREAD(qx,x,Q1D)
1630 double u = 0.0, v = 0.0, w = 0.0;
1632 for (
int dz = 0; dz < D1D; ++dz)
1634 u += DQQ0[dz][qy][qx] * B[qz][dz];
1635 v += DQQ1[dz][qy][qx] * B[qz][dz];
1636 w += DQQ2[dz][qy][qx] * G[qz][dz];
1638 const double O11 = d(qx,qy,qz,0,e);
1639 const double O12 = d(qx,qy,qz,1,e);
1640 const double O13 = d(qx,qy,qz,2,e);
1641 const double O21 = symmetric ? O12 : d(qx,qy,qz,3,e);
1642 const double O22 = symmetric ? d(qx,qy,qz,3,e) : d(qx,qy,qz,4,e);
1643 const double O23 = symmetric ? d(qx,qy,qz,4,e) : d(qx,qy,qz,5,e);
1644 const double O31 = symmetric ? O13 : d(qx,qy,qz,6,e);
1645 const double O32 = symmetric ? O23 : d(qx,qy,qz,7,e);
1646 const double O33 = symmetric ? d(qx,qy,qz,5,e) : d(qx,qy,qz,8,e);
1647 const double gX =
u;
1648 const double gY = v;
1649 const double gZ = w;
1650 QQQ0[qz][qy][qx] = (O11*gX) + (O12*gY) + (O13*gZ);
1651 QQQ1[qz][qy][qx] = (O21*gX) + (O22*gY) + (O23*gZ);
1652 QQQ2[qz][qy][qx] = (O31*gX) + (O32*gY) + (O33*gZ);
1657 if (MFEM_THREAD_ID(z) == 0)
1659 MFEM_FOREACH_THREAD(dy,y,D1D)
1661 MFEM_FOREACH_THREAD(qx,x,Q1D)
1663 Bt[dy][qx] =
b(qx,dy);
1664 Gt[dy][qx] = g(qx,dy);
1669 MFEM_FOREACH_THREAD(qz,z,Q1D)
1671 MFEM_FOREACH_THREAD(qy,y,Q1D)
1673 MFEM_FOREACH_THREAD(dx,x,D1D)
1675 double u = 0.0, v = 0.0, w = 0.0;
1677 for (
int qx = 0; qx < Q1D; ++qx)
1679 u += QQQ0[qz][qy][qx] * Gt[dx][qx];
1680 v += QQQ1[qz][qy][qx] * Bt[dx][qx];
1681 w += QQQ2[qz][qy][qx] * Bt[dx][qx];
1683 QQD0[qz][qy][dx] =
u;
1684 QQD1[qz][qy][dx] = v;
1685 QQD2[qz][qy][dx] = w;
1690 MFEM_FOREACH_THREAD(qz,z,Q1D)
1692 MFEM_FOREACH_THREAD(dy,y,D1D)
1694 MFEM_FOREACH_THREAD(dx,x,D1D)
1696 double u = 0.0, v = 0.0, w = 0.0;
1698 for (
int qy = 0; qy < Q1D; ++qy)
1700 u += QQD0[qz][qy][dx] * Bt[dy][qy];
1701 v += QQD1[qz][qy][dx] * Gt[dy][qy];
1702 w += QQD2[qz][qy][dx] * Bt[dy][qy];
1704 QDD0[qz][dy][dx] =
u;
1705 QDD1[qz][dy][dx] = v;
1706 QDD2[qz][dy][dx] = w;
1711 MFEM_FOREACH_THREAD(dz,z,D1D)
1713 MFEM_FOREACH_THREAD(dy,y,D1D)
1715 MFEM_FOREACH_THREAD(dx,x,D1D)
1717 double u = 0.0, v = 0.0, w = 0.0;
1719 for (
int qz = 0; qz < Q1D; ++qz)
1721 u += QDD0[qz][dy][dx] * Bt[dz][qz];
1722 v += QDD1[qz][dy][dx] * Bt[dz][qz];
1723 w += QDD2[qz][dy][dx] * Gt[dz][qz];
1725 y(dx,dy,dz,e) += (u + v + w);
1732 static void PADiffusionApply(
const int dim,
1737 const Array<double> &B,
1738 const Array<double> &G,
1739 const Array<double> &Bt,
1740 const Array<double> &Gt,
1745 #ifdef MFEM_USE_OCCA
1750 OccaPADiffusionApply2D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1755 OccaPADiffusionApply3D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1758 MFEM_ABORT(
"OCCA PADiffusionApply unknown kernel!");
1760 #endif // MFEM_USE_OCCA
1761 const int id = (D1D << 4) | Q1D;
1767 case 0x22:
return SmemPADiffusionApply2D<2,2,16>(NE,symm,B,G,D,X,Y);
1768 case 0x33:
return SmemPADiffusionApply2D<3,3,16>(NE,symm,B,G,D,X,Y);
1769 case 0x44:
return SmemPADiffusionApply2D<4,4,8>(NE,symm,B,G,D,X,Y);
1770 case 0x55:
return SmemPADiffusionApply2D<5,5,8>(NE,symm,B,G,D,X,Y);
1771 case 0x66:
return SmemPADiffusionApply2D<6,6,4>(NE,symm,B,G,D,X,Y);
1772 case 0x77:
return SmemPADiffusionApply2D<7,7,4>(NE,symm,B,G,D,X,Y);
1773 case 0x88:
return SmemPADiffusionApply2D<8,8,2>(NE,symm,B,G,D,X,Y);
1774 case 0x99:
return SmemPADiffusionApply2D<9,9,2>(NE,symm,B,G,D,X,Y);
1775 default:
return PADiffusionApply2D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1783 case 0x22:
return SmemPADiffusionApply3D<2,2>(NE,symm,B,G,D,X,Y);
1784 case 0x23:
return SmemPADiffusionApply3D<2,3>(NE,symm,B,G,D,X,Y);
1785 case 0x34:
return SmemPADiffusionApply3D<3,4>(NE,symm,B,G,D,X,Y);
1786 case 0x45:
return SmemPADiffusionApply3D<4,5>(NE,symm,B,G,D,X,Y);
1787 case 0x46:
return SmemPADiffusionApply3D<4,6>(NE,symm,B,G,D,X,Y);
1788 case 0x56:
return SmemPADiffusionApply3D<5,6>(NE,symm,B,G,D,X,Y);
1789 case 0x58:
return SmemPADiffusionApply3D<5,8>(NE,symm,B,G,D,X,Y);
1790 case 0x67:
return SmemPADiffusionApply3D<6,7>(NE,symm,B,G,D,X,Y);
1791 case 0x78:
return SmemPADiffusionApply3D<7,8>(NE,symm,B,G,D,X,Y);
1792 case 0x89:
return SmemPADiffusionApply3D<8,9>(NE,symm,B,G,D,X,Y);
1793 default:
return PADiffusionApply3D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1796 MFEM_ABORT(
"Unknown kernel: 0x"<<std::hex <<
id << std::dec);
1802 if (DeviceCanUseCeed())
1804 ceedOp->AddMult(x, y);
1808 PADiffusionApply(dim, dofs1D, quad1D, ne, symmetric,
1809 maps->B, maps->G, maps->Bt, maps->Gt,
1814 void DiffusionIntegrator::AddMultTransposePA(
const Vector &x,
Vector &y)
const
1822 MFEM_ABORT(
"DiffusionIntegrator::AddMultTransposePA only implemented in "
1823 "the symmetric case.")
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
int Size() const
Return the logical size of the array.
int GetDim() const
Returns the reference space dimension for the finite element.
Class for an integration rule - an Array of IntegrationPoint.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
A coefficient that is constant across space and time.
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
void SetSize(int s)
Change the size of the DenseSymmetricMatrix to s x s.
void SetSize(int s)
Resize the vector to size s.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Data type dense matrix using column-major storage.
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
int GetNE() const
Returns number of elements in the mesh.
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
Mesh * GetMesh() const
Returns the mesh.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
void PADiffusionSetup2D< 3 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
void PADiffusionSetup2D< 2 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
int SpaceDimension() const
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
double p(const Vector &x, double t)
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
MemoryType
Memory types supported by MFEM.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
void PADiffusionSetup3D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
FDualNumber< tbase > sqrt(const FDualNumber< tbase > &f)
sqrt([dual number])
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
std::pair< int, int > occa_id_t
double u(const Vector &xvec)
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Class representing a function through its values (scalar or vector) at quadrature points...
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).