12 #include "../general/forall.hpp"
15 #include "libceed/diffusion.hpp"
26 static void OccaPADiffusionSetup2D(
const int D1D,
29 const Array<double> &W,
34 occa::properties props;
35 props[
"defines/D1D"] = D1D;
36 props[
"defines/Q1D"] = Q1D;
41 const bool const_c = C.Size() == 1;
42 const occa_id_t id = std::make_pair(D1D,Q1D);
44 if (OccaDiffSetup2D_ker.find(
id) == OccaDiffSetup2D_ker.end())
46 const occa::kernel DiffusionSetup2D =
48 "DiffusionSetup2D", props);
49 OccaDiffSetup2D_ker.emplace(
id, DiffusionSetup2D);
51 OccaDiffSetup2D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
54 static void OccaPADiffusionSetup3D(
const int D1D,
57 const Array<double> &W,
62 occa::properties props;
63 props[
"defines/D1D"] = D1D;
64 props[
"defines/Q1D"] = Q1D;
69 const bool const_c = C.Size() == 1;
70 const occa_id_t id = std::make_pair(D1D,Q1D);
72 if (OccaDiffSetup3D_ker.find(
id) == OccaDiffSetup3D_ker.end())
74 const occa::kernel DiffusionSetup3D =
76 "DiffusionSetup3D", props);
77 OccaDiffSetup3D_ker.emplace(
id, DiffusionSetup3D);
79 OccaDiffSetup3D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
81 #endif // MFEM_USE_OCCA
92 const bool symmetric = (coeffDim != 4);
93 const bool const_c = c.Size() == 1;
94 MFEM_VERIFY(coeffDim < 3 ||
95 !const_c,
"Constant matrix coefficient not supported");
96 const auto W =
Reshape(w.Read(), Q1D,Q1D);
97 const auto J =
Reshape(j.Read(), Q1D,Q1D,2,2,NE);
98 const auto C = const_c ?
Reshape(c.Read(), 1,1,1,1) :
99 Reshape(c.Read(), coeffDim,Q1D,Q1D,NE);
100 auto D =
Reshape(d.Write(), Q1D,Q1D, symmetric ? 3 : 4, NE);
101 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
103 MFEM_FOREACH_THREAD(qx,x,Q1D)
105 MFEM_FOREACH_THREAD(qy,y,Q1D)
107 const double J11 = J(qx,qy,0,0,e);
108 const double J21 = J(qx,qy,1,0,e);
109 const double J12 = J(qx,qy,0,1,e);
110 const double J22 = J(qx,qy,1,1,e);
111 const double w_detJ = W(qx,qy) / ((J11*J22)-(J21*J12));
112 if (coeffDim == 3 || coeffDim == 4)
115 const double M11 = C(0,qx,qy,e);
116 const double M12 = C(1,qx,qy,e);
117 const double M21 = symmetric ? M12 : C(2,qx,qy,e);
118 const double M22 = symmetric ? C(2,qx,qy,e) : C(3,qx,qy,e);
119 const double R11 = M11*J22 - M12*J12;
120 const double R21 = M21*J22 - M22*J12;
121 const double R12 = -M11*J21 + M12*J11;
122 const double R22 = -M21*J21 + M22*J11;
125 D(qx,qy,0,e) = w_detJ * ( J22*R11 - J12*R21);
126 D(qx,qy,1,e) = w_detJ * (-J21*R11 + J11*R21);
127 D(qx,qy,2,e) = w_detJ * (symmetric ? (-J21*R12 + J11*R22) :
128 (J22*R12 - J12*R22));
131 D(qx,qy,3,e) = w_detJ * (-J21*R12 + J11*R22);
136 const double C1 = const_c ? C(0,0,0,0) : C(0,qx,qy,e);
137 const double C2 = const_c ? C(0,0,0,0) :
138 (coeffDim == 2 ? C(1,qx,qy,e) : C(0,qx,qy,e));
140 D(qx,qy,0,e) = w_detJ * (C2*J12*J12 + C1*J22*J22);
141 D(qx,qy,1,e) = -w_detJ * (C2*J12*J11 + C1*J22*J21);
142 D(qx,qy,2,e) = w_detJ * (C2*J11*J11 + C1*J21*J21);
159 MFEM_VERIFY(coeffDim == 1,
"Matrix and vector coefficients not supported");
160 constexpr
int DIM = 2;
161 constexpr
int SDIM = 3;
162 const bool const_c = c.Size() == 1;
163 const auto W =
Reshape(w.Read(), Q1D,Q1D);
165 const auto C = const_c ?
Reshape(c.Read(), 1,1,1) :
167 auto D =
Reshape(d.Write(), Q1D,Q1D, 3, NE);
168 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
170 MFEM_FOREACH_THREAD(qx,x,Q1D)
172 MFEM_FOREACH_THREAD(qy,y,Q1D)
174 const double wq = W(qx,qy);
175 const double J11 = J(qx,qy,0,0,e);
176 const double J21 = J(qx,qy,1,0,e);
177 const double J31 = J(qx,qy,2,0,e);
178 const double J12 = J(qx,qy,0,1,e);
179 const double J22 = J(qx,qy,1,1,e);
180 const double J32 = J(qx,qy,2,1,e);
181 const double E = J11*J11 + J21*J21 + J31*J31;
182 const double G = J12*J12 + J22*J22 + J32*J32;
183 const double F = J11*J12 + J21*J22 + J31*J32;
184 const double iw = 1.0 / sqrt(E*G - F*F);
185 const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
186 const double alpha = wq * coeff * iw;
187 D(qx,qy,0,e) = alpha * G;
188 D(qx,qy,1,e) = -alpha * F;
189 D(qx,qy,2,e) = alpha * E;
204 const bool symmetric = (coeffDim != 9);
205 const bool const_c = c.
Size() == 1;
206 MFEM_VERIFY(coeffDim < 6 ||
207 !const_c,
"Constant matrix coefficient not supported");
209 const auto J =
Reshape(j.
Read(), Q1D,Q1D,Q1D,3,3,NE);
210 const auto C = const_c ?
Reshape(c.
Read(), 1,1,1,1,1) :
212 auto D =
Reshape(d.
Write(), Q1D,Q1D,Q1D, symmetric ? 6 : 9, NE);
213 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
215 MFEM_FOREACH_THREAD(qx,x,Q1D)
217 MFEM_FOREACH_THREAD(qy,y,Q1D)
219 MFEM_FOREACH_THREAD(qz,z,Q1D)
221 const double J11 = J(qx,qy,qz,0,0,e);
222 const double J21 = J(qx,qy,qz,1,0,e);
223 const double J31 = J(qx,qy,qz,2,0,e);
224 const double J12 = J(qx,qy,qz,0,1,e);
225 const double J22 = J(qx,qy,qz,1,1,e);
226 const double J32 = J(qx,qy,qz,2,1,e);
227 const double J13 = J(qx,qy,qz,0,2,e);
228 const double J23 = J(qx,qy,qz,1,2,e);
229 const double J33 = J(qx,qy,qz,2,2,e);
230 const double detJ = J11 * (J22 * J33 - J32 * J23) -
231 J21 * (J12 * J33 - J32 * J13) +
232 J31 * (J12 * J23 - J22 * J13);
233 const double w_detJ = W(qx,qy,qz) / detJ;
235 const double A11 = (J22 * J33) - (J23 * J32);
236 const double A12 = (J32 * J13) - (J12 * J33);
237 const double A13 = (J12 * J23) - (J22 * J13);
238 const double A21 = (J31 * J23) - (J21 * J33);
239 const double A22 = (J11 * J33) - (J13 * J31);
240 const double A23 = (J21 * J13) - (J11 * J23);
241 const double A31 = (J21 * J32) - (J31 * J22);
242 const double A32 = (J31 * J12) - (J11 * J32);
243 const double A33 = (J11 * J22) - (J12 * J21);
245 if (coeffDim == 6 || coeffDim == 9)
248 const double M11 = C(0, qx,qy,qz, e);
249 const double M12 = C(1, qx,qy,qz, e);
250 const double M13 = C(2, qx,qy,qz, e);
251 const double M21 = (!symmetric) ? C(3, qx,qy,qz, e) : M12;
252 const double M22 = (!symmetric) ? C(4, qx,qy,qz, e) : C(3, qx,qy,qz, e);
253 const double M23 = (!symmetric) ? C(5, qx,qy,qz, e) : C(4, qx,qy,qz, e);
254 const double M31 = (!symmetric) ? C(6, qx,qy,qz, e) : M13;
255 const double M32 = (!symmetric) ? C(7, qx,qy,qz, e) : M23;
256 const double M33 = (!symmetric) ? C(8, qx,qy,qz, e) : C(5, qx,qy,qz, e);
258 const double R11 = M11*A11 + M12*A12 + M13*A13;
259 const double R12 = M11*A21 + M12*A22 + M13*A23;
260 const double R13 = M11*A31 + M12*A32 + M13*A33;
261 const double R21 = M21*A11 + M22*A12 + M23*A13;
262 const double R22 = M21*A21 + M22*A22 + M23*A23;
263 const double R23 = M21*A31 + M22*A32 + M23*A33;
264 const double R31 = M31*A11 + M32*A12 + M33*A13;
265 const double R32 = M31*A21 + M32*A22 + M33*A23;
266 const double R33 = M31*A31 + M32*A32 + M33*A33;
269 D(qx,qy,qz,0,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31);
270 const double D12 = w_detJ * (A11*R12 + A12*R22 + A13*R32);
271 D(qx,qy,qz,1,e) = D12;
272 D(qx,qy,qz,2,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33);
274 const double D21 = w_detJ * (A21*R11 + A22*R21 + A23*R31);
275 const double D22 = w_detJ * (A21*R12 + A22*R22 + A23*R32);
276 const double D23 = w_detJ * (A21*R13 + A22*R23 + A23*R33);
278 const double D33 = w_detJ * (A31*R13 + A32*R23 + A33*R33);
280 D(qx,qy,qz,3,e) = symmetric ? D22 : D21;
281 D(qx,qy,qz,4,e) = symmetric ? D23 : D22;
282 D(qx,qy,qz,5,e) = symmetric ? D33 : D23;
286 D(qx,qy,qz,6,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31);
287 D(qx,qy,qz,7,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32);
288 D(qx,qy,qz,8,e) = D33;
293 const double C1 = const_c ? C(0,0,0,0,0) : C(0,qx,qy,qz,e);
294 const double C2 = const_c ? C(0,0,0,0,0) :
295 (coeffDim == 3 ? C(1,qx,qy,qz,e) : C(0,qx,qy,qz,e));
296 const double C3 = const_c ? C(0,0,0,0,0) :
297 (coeffDim == 3 ? C(2,qx,qy,qz,e) : C(0,qx,qy,qz,e));
300 D(qx,qy,qz,0,e) = w_detJ * (C1*A11*A11 + C2*A12*A12 + C3*A13*A13);
301 D(qx,qy,qz,1,e) = w_detJ * (C1*A11*A21 + C2*A12*A22 + C3*A13*A23);
302 D(qx,qy,qz,2,e) = w_detJ * (C1*A11*A31 + C2*A12*A32 + C3*A13*A33);
303 D(qx,qy,qz,3,e) = w_detJ * (C1*A21*A21 + C2*A22*A22 + C3*A23*A23);
304 D(qx,qy,qz,4,e) = w_detJ * (C1*A21*A31 + C2*A22*A32 + C3*A23*A33);
305 D(qx,qy,qz,5,e) = w_detJ * (C1*A31*A31 + C2*A32*A32 + C3*A33*A33);
313 static void PADiffusionSetup(
const int dim,
319 const Array<double> &W,
324 if (dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADiffusionSetup"); }
330 OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
334 MFEM_CONTRACT_VAR(D1D);
335 #endif // MFEM_USE_OCCA
344 OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
347 #endif // MFEM_USE_OCCA
357 if (mesh->
GetNE() == 0) {
return; }
360 if (DeviceCanUseCeed())
363 ceedDataPtr =
new CeedData;
364 InitCeedCoeff(Q, *mesh, *ir, ceedDataPtr);
365 return CeedPADiffusionAssemble(fes, *ir, *ceedDataPtr);
367 const int dims = el.
GetDim();
368 const int symmDims = (dims * (dims + 1)) / 2;
379 const int MQfullDim = MQ ? MQ->GetHeight() * MQ->GetWidth() : 0;
382 MFEM_VERIFY(MQ->GetHeight() == dim && MQ->GetWidth() ==
dim,
"");
383 const int MQsymmDim = MQ->GetWidth() * (MQ->GetWidth() + 1) / 2;
385 const int MQdim = MQ->IsSymmetric() ? MQsymmDim : MQfullDim;
388 coeff.
SetSize(MQdim * nq * ne);
389 symmetric = MQ ? MQ->IsSymmetric() :
true;
403 for (
int e=0; e<ne; ++e)
406 for (
int p=0;
p<nq; ++
p)
408 if (MQ->IsSymmetric())
410 MQ->EvalSymmetric(Msymm, *tr, ir->
IntPoint(
p));
412 for (
int i=0; i<MQsymmDim; ++i)
414 C(i,
p, e) = Msymm[i];
421 for (
int i=0; i<
dim; ++i)
422 for (
int j=0; j<
dim; ++j)
424 C(j+(i*dim),
p, e) = M(i,j);
432 MFEM_VERIFY(VQ->GetVDim() ==
dim,
"");
433 coeffDim = VQ->GetVDim();
434 coeff.
SetSize(coeffDim * nq * ne);
437 for (
int e=0; e<ne; ++e)
440 for (
int p=0;
p<nq; ++
p)
443 for (
int i=0; i<coeffDim; ++i)
450 else if (Q ==
nullptr)
458 coeff(0) = cQ->constant;
461 dynamic_cast<QuadratureFunctionCoefficient*>(Q))
464 MFEM_VERIFY(qFun.
Size() == ne*nq,
465 "Incompatible QuadratureFunction dimension \n");
468 "IntegrationRule used within integrator and in"
469 " QuadratureFunction appear to be different");
471 coeff.
MakeRef(const_cast<QuadratureFunction &>(qFun),0);
477 for (
int e = 0; e < ne; ++e)
480 for (
int q = 0; q < nq; ++q)
482 C(q,e) = Q->Eval(T, ir->
IntPoint(q));
486 pa_data.SetSize((symmetric ? symmDims : MQfullDim) * nq * ne,
487 Device::GetDeviceMemoryType());
488 PADiffusionSetup(dim, sdim, dofs1D, quad1D, coeffDim, ne, ir->
GetWeights(),
489 geom->J, coeff, pa_data);
492 template<
int T_D1D = 0,
int T_Q1D = 0>
493 static void PADiffusionDiagonal2D(
const int NE,
494 const bool symmetric,
502 const int D1D = T_D1D ? T_D1D : d1d;
503 const int Q1D = T_Q1D ? T_Q1D : q1d;
504 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
505 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
510 auto D =
Reshape(d.
Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
514 const int D1D = T_D1D ? T_D1D : d1d;
515 const int Q1D = T_Q1D ? T_Q1D : q1d;
516 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
517 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
519 double QD0[MQ1][MD1];
520 double QD1[MQ1][MD1];
521 double QD2[MQ1][MD1];
522 for (
int qx = 0; qx < Q1D; ++qx)
524 for (
int dy = 0; dy < D1D; ++dy)
529 for (
int qy = 0; qy < Q1D; ++qy)
531 const int q = qx + qy * Q1D;
532 const double D00 = D(q,0,e);
533 const double D10 = D(q,1,e);
534 const double D01 = symmetric ? D10 : D(q,2,e);
535 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
536 QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D00;
537 QD1[qx][dy] += B(qy, dy) * G(qy, dy) * (D01 + D10);
538 QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D11;
542 for (
int dy = 0; dy < D1D; ++dy)
544 for (
int dx = 0; dx < D1D; ++dx)
546 for (
int qx = 0; qx < Q1D; ++qx)
548 Y(dx,dy,e) += G(qx, dx) * G(qx, dx) * QD0[qx][dy];
549 Y(dx,dy,e) += G(qx, dx) * B(qx, dx) * QD1[qx][dy];
550 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD2[qx][dy];
558 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
559 static void SmemPADiffusionDiagonal2D(
const int NE,
560 const bool symmetric,
561 const Array<double> &b_,
562 const Array<double> &g_,
568 const int D1D = T_D1D ? T_D1D : d1d;
569 const int Q1D = T_Q1D ? T_Q1D : q1d;
570 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
571 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
572 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
573 MFEM_VERIFY(D1D <= MD1,
"");
574 MFEM_VERIFY(Q1D <= MQ1,
"");
575 auto b =
Reshape(b_.Read(), Q1D, D1D);
576 auto g =
Reshape(g_.Read(), Q1D, D1D);
577 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
578 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
579 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
581 const int tidz = MFEM_THREAD_ID(z);
582 const int D1D = T_D1D ? T_D1D : d1d;
583 const int Q1D = T_Q1D ? T_Q1D : q1d;
584 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
585 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
586 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
587 MFEM_SHARED
double BG[2][MQ1*MD1];
588 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
589 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
590 MFEM_SHARED
double QD[3][NBZ][MD1][MQ1];
591 double (*QD0)[MD1] = (double (*)[MD1])(QD[0] + tidz);
592 double (*QD1)[MD1] = (double (*)[MD1])(QD[1] + tidz);
593 double (*QD2)[MD1] = (double (*)[MD1])(QD[2] + tidz);
596 MFEM_FOREACH_THREAD(d,y,D1D)
598 MFEM_FOREACH_THREAD(q,x,Q1D)
606 MFEM_FOREACH_THREAD(qx,x,Q1D)
608 MFEM_FOREACH_THREAD(dy,y,D1D)
613 for (
int qy = 0; qy < Q1D; ++qy)
615 const int q = qx + qy * Q1D;
616 const double D00 = D(q,0,e);
617 const double D10 = D(q,1,e);
618 const double D01 = symmetric ? D10 : D(q,2,e);
619 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
620 const double By = B[qy][dy];
621 const double Gy = G[qy][dy];
622 const double BB = By * By;
623 const double BG = By * Gy;
624 const double GG = Gy * Gy;
625 QD0[qx][dy] += BB * D00;
626 QD1[qx][dy] += BG * (D01 + D10);
627 QD2[qx][dy] += GG * D11;
632 MFEM_FOREACH_THREAD(dy,y,D1D)
634 MFEM_FOREACH_THREAD(dx,x,D1D)
636 for (
int qx = 0; qx < Q1D; ++qx)
638 const double Bx = B[qx][dx];
639 const double Gx = G[qx][dx];
640 const double BB = Bx * Bx;
641 const double BG = Bx * Gx;
642 const double GG = Gx * Gx;
643 Y(dx,dy,e) += GG * QD0[qx][dy];
644 Y(dx,dy,e) += BG * QD1[qx][dy];
645 Y(dx,dy,e) += BB * QD2[qx][dy];
652 template<
int T_D1D = 0,
int T_Q1D = 0>
653 static void PADiffusionDiagonal3D(
const int NE,
654 const bool symmetric,
655 const Array<double> &b,
656 const Array<double> &g,
662 constexpr
int DIM = 3;
663 const int D1D = T_D1D ? T_D1D : d1d;
664 const int Q1D = T_Q1D ? T_Q1D : q1d;
665 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
666 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
667 MFEM_VERIFY(D1D <= MD1,
"");
668 MFEM_VERIFY(Q1D <= MQ1,
"");
669 auto B =
Reshape(b.Read(), Q1D, D1D);
670 auto G =
Reshape(g.Read(), Q1D, D1D);
671 auto Q =
Reshape(d.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
672 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
675 const int D1D = T_D1D ? T_D1D : d1d;
676 const int Q1D = T_Q1D ? T_Q1D : q1d;
677 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
678 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
679 double QQD[MQ1][MQ1][MD1];
680 double QDD[MQ1][MD1][MD1];
681 for (
int i = 0; i <
DIM; ++i)
683 for (
int j = 0; j <
DIM; ++j)
686 for (
int qx = 0; qx < Q1D; ++qx)
688 for (
int qy = 0; qy < Q1D; ++qy)
690 for (
int dz = 0; dz < D1D; ++dz)
692 QQD[qx][qy][dz] = 0.0;
693 for (
int qz = 0; qz < Q1D; ++qz)
695 const int q = qx + (qy + qz * Q1D) * Q1D;
696 const int ksym = j >= i ?
697 3 - (3-i)*(2-i)/2 + j:
698 3 - (3-j)*(2-j)/2 + i;
699 const int k = symmetric ? ksym : (i*
DIM) + j;
700 const double O = Q(q,k,e);
701 const double Bz = B(qz,dz);
702 const double Gz = G(qz,dz);
703 const double L = i==2 ? Gz : Bz;
704 const double R = j==2 ? Gz : Bz;
705 QQD[qx][qy][dz] += L * O * R;
711 for (
int qx = 0; qx < Q1D; ++qx)
713 for (
int dz = 0; dz < D1D; ++dz)
715 for (
int dy = 0; dy < D1D; ++dy)
717 QDD[qx][dy][dz] = 0.0;
718 for (
int qy = 0; qy < Q1D; ++qy)
720 const double By = B(qy,dy);
721 const double Gy = G(qy,dy);
722 const double L = i==1 ? Gy : By;
723 const double R = j==1 ? Gy : By;
724 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
730 for (
int dz = 0; dz < D1D; ++dz)
732 for (
int dy = 0; dy < D1D; ++dy)
734 for (
int dx = 0; dx < D1D; ++dx)
736 for (
int qx = 0; qx < Q1D; ++qx)
738 const double Bx = B(qx,dx);
739 const double Gx = G(qx,dx);
740 const double L = i==0 ? Gx : Bx;
741 const double R = j==0 ? Gx : Bx;
742 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
753 template<
int T_D1D = 0,
int T_Q1D = 0>
754 static void SmemPADiffusionDiagonal3D(
const int NE,
755 const bool symmetric,
756 const Array<double> &b_,
757 const Array<double> &g_,
763 constexpr
int DIM = 3;
764 const int D1D = T_D1D ? T_D1D : d1d;
765 const int Q1D = T_Q1D ? T_Q1D : q1d;
766 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
767 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
768 MFEM_VERIFY(D1D <= MD1,
"");
769 MFEM_VERIFY(Q1D <= MQ1,
"");
770 auto b =
Reshape(b_.Read(), Q1D, D1D);
771 auto g =
Reshape(g_.Read(), Q1D, D1D);
772 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
773 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
774 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
776 const int tidz = MFEM_THREAD_ID(z);
777 const int D1D = T_D1D ? T_D1D : d1d;
778 const int Q1D = T_Q1D ? T_Q1D : q1d;
779 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
780 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
781 MFEM_SHARED
double BG[2][MQ1*MD1];
782 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
783 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
784 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
785 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
788 MFEM_FOREACH_THREAD(d,y,D1D)
790 MFEM_FOREACH_THREAD(q,x,Q1D)
798 for (
int i = 0; i <
DIM; ++i)
800 for (
int j = 0; j <
DIM; ++j)
803 MFEM_FOREACH_THREAD(qx,x,Q1D)
805 MFEM_FOREACH_THREAD(qy,y,Q1D)
807 MFEM_FOREACH_THREAD(dz,z,D1D)
809 QQD[qx][qy][dz] = 0.0;
810 for (
int qz = 0; qz < Q1D; ++qz)
812 const int q = qx + (qy + qz * Q1D) * Q1D;
813 const int ksym = j >= i ?
814 3 - (3-i)*(2-i)/2 + j:
815 3 - (3-j)*(2-j)/2 + i;
816 const int k = symmetric ? ksym : (i*
DIM) + j;
817 const double O = D(q,k,e);
818 const double Bz = B[qz][dz];
819 const double Gz = G[qz][dz];
820 const double L = i==2 ? Gz : Bz;
821 const double R = j==2 ? Gz : Bz;
822 QQD[qx][qy][dz] += L * O * R;
829 MFEM_FOREACH_THREAD(qx,x,Q1D)
831 MFEM_FOREACH_THREAD(dz,z,D1D)
833 MFEM_FOREACH_THREAD(dy,y,D1D)
835 QDD[qx][dy][dz] = 0.0;
836 for (
int qy = 0; qy < Q1D; ++qy)
838 const double By = B[qy][dy];
839 const double Gy = G[qy][dy];
840 const double L = i==1 ? Gy : By;
841 const double R = j==1 ? Gy : By;
842 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
849 MFEM_FOREACH_THREAD(dz,z,D1D)
851 MFEM_FOREACH_THREAD(dy,y,D1D)
853 MFEM_FOREACH_THREAD(dx,x,D1D)
855 for (
int qx = 0; qx < Q1D; ++qx)
857 const double Bx = B[qx][dx];
858 const double Gx = G[qx][dx];
859 const double L = i==0 ? Gx : Bx;
860 const double R = j==0 ? Gx : Bx;
861 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
871 static void PADiffusionAssembleDiagonal(
const int dim,
876 const Array<double> &B,
877 const Array<double> &G,
883 switch ((D1D << 4 ) | Q1D)
885 case 0x22:
return SmemPADiffusionDiagonal2D<2,2,8>(NE,symm,B,G,D,Y);
886 case 0x33:
return SmemPADiffusionDiagonal2D<3,3,8>(NE,symm,B,G,D,Y);
887 case 0x44:
return SmemPADiffusionDiagonal2D<4,4,4>(NE,symm,B,G,D,Y);
888 case 0x55:
return SmemPADiffusionDiagonal2D<5,5,4>(NE,symm,B,G,D,Y);
889 case 0x66:
return SmemPADiffusionDiagonal2D<6,6,2>(NE,symm,B,G,D,Y);
890 case 0x77:
return SmemPADiffusionDiagonal2D<7,7,2>(NE,symm,B,G,D,Y);
891 case 0x88:
return SmemPADiffusionDiagonal2D<8,8,1>(NE,symm,B,G,D,Y);
892 case 0x99:
return SmemPADiffusionDiagonal2D<9,9,1>(NE,symm,B,G,D,Y);
893 default:
return PADiffusionDiagonal2D(NE,symm,B,G,D,Y,D1D,Q1D);
898 switch ((D1D << 4 ) | Q1D)
900 case 0x23:
return SmemPADiffusionDiagonal3D<2,3>(NE,symm,B,G,D,Y);
901 case 0x34:
return SmemPADiffusionDiagonal3D<3,4>(NE,symm,B,G,D,Y);
902 case 0x45:
return SmemPADiffusionDiagonal3D<4,5>(NE,symm,B,G,D,Y);
903 case 0x56:
return SmemPADiffusionDiagonal3D<5,6>(NE,symm,B,G,D,Y);
904 case 0x67:
return SmemPADiffusionDiagonal3D<6,7>(NE,symm,B,G,D,Y);
905 case 0x78:
return SmemPADiffusionDiagonal3D<7,8>(NE,symm,B,G,D,Y);
906 case 0x89:
return SmemPADiffusionDiagonal3D<8,9>(NE,symm,B,G,D,Y);
907 case 0x9A:
return SmemPADiffusionDiagonal3D<9,10>(NE,symm,B,G,D,Y);
908 default:
return PADiffusionDiagonal3D(NE,symm,B,G,D,Y,D1D,Q1D);
911 MFEM_ABORT(
"Unknown kernel.");
914 void DiffusionIntegrator::AssembleDiagonalPA(
Vector &diag)
916 if (DeviceCanUseCeed())
918 CeedAssembleDiagonal(ceedDataPtr, diag);
922 if (pa_data.Size()==0) { AssemblePA(*fespace); }
923 PADiffusionAssembleDiagonal(dim, dofs1D, quad1D, ne, symmetric,
924 maps->B, maps->G, pa_data, diag);
931 static void OccaPADiffusionApply2D(
const int D1D,
942 occa::properties props;
943 props[
"defines/D1D"] = D1D;
944 props[
"defines/Q1D"] = Q1D;
952 const occa_id_t id = std::make_pair(D1D,Q1D);
953 if (!Device::Allows(Backend::OCCA_CUDA))
956 if (OccaDiffApply2D_cpu.find(
id) == OccaDiffApply2D_cpu.end())
958 const occa::kernel DiffusionApply2D_CPU =
960 "DiffusionApply2D_CPU", props);
961 OccaDiffApply2D_cpu.emplace(
id, DiffusionApply2D_CPU);
963 OccaDiffApply2D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
968 if (OccaDiffApply2D_gpu.find(
id) == OccaDiffApply2D_gpu.end())
970 const occa::kernel DiffusionApply2D_GPU =
972 "DiffusionApply2D_GPU", props);
973 OccaDiffApply2D_gpu.emplace(
id, DiffusionApply2D_GPU);
975 OccaDiffApply2D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
980 static void OccaPADiffusionApply3D(
const int D1D,
983 const Array<double> &B,
984 const Array<double> &G,
985 const Array<double> &Bt,
986 const Array<double> &Gt,
991 occa::properties props;
992 props[
"defines/D1D"] = D1D;
993 props[
"defines/Q1D"] = Q1D;
996 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
997 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
1001 const occa_id_t id = std::make_pair(D1D,Q1D);
1002 if (!Device::Allows(Backend::OCCA_CUDA))
1005 if (OccaDiffApply3D_cpu.find(
id) == OccaDiffApply3D_cpu.end())
1007 const occa::kernel DiffusionApply3D_CPU =
1009 "DiffusionApply3D_CPU", props);
1010 OccaDiffApply3D_cpu.emplace(
id, DiffusionApply3D_CPU);
1012 OccaDiffApply3D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
1017 if (OccaDiffApply3D_gpu.find(
id) == OccaDiffApply3D_gpu.end())
1019 const occa::kernel DiffusionApply3D_GPU =
1021 "DiffusionApply3D_GPU", props);
1022 OccaDiffApply3D_gpu.emplace(
id, DiffusionApply3D_GPU);
1024 OccaDiffApply3D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
1027 #endif // MFEM_USE_OCCA
1030 template<
int T_D1D = 0,
int T_Q1D = 0>
1031 static void PADiffusionApply2D(
const int NE,
1032 const bool symmetric,
1033 const Array<double> &b_,
1034 const Array<double> &g_,
1035 const Array<double> &bt_,
1036 const Array<double> >_,
1043 const int D1D = T_D1D ? T_D1D : d1d;
1044 const int Q1D = T_Q1D ? T_Q1D : q1d;
1045 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
1046 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
1047 auto B =
Reshape(b_.Read(), Q1D, D1D);
1048 auto G =
Reshape(g_.Read(), Q1D, D1D);
1049 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
1050 auto Gt =
Reshape(gt_.Read(), D1D, Q1D);
1051 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
1052 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
1053 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
1056 const int D1D = T_D1D ? T_D1D : d1d;
1057 const int Q1D = T_Q1D ? T_Q1D : q1d;
1059 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
1060 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
1062 double grad[max_Q1D][max_Q1D][2];
1063 for (
int qy = 0; qy < Q1D; ++qy)
1065 for (
int qx = 0; qx < Q1D; ++qx)
1067 grad[qy][qx][0] = 0.0;
1068 grad[qy][qx][1] = 0.0;
1071 for (
int dy = 0; dy < D1D; ++dy)
1073 double gradX[max_Q1D][2];
1074 for (
int qx = 0; qx < Q1D; ++qx)
1079 for (
int dx = 0; dx < D1D; ++dx)
1081 const double s = X(dx,dy,e);
1082 for (
int qx = 0; qx < Q1D; ++qx)
1084 gradX[qx][0] += s * B(qx,dx);
1085 gradX[qx][1] += s * G(qx,dx);
1088 for (
int qy = 0; qy < Q1D; ++qy)
1090 const double wy = B(qy,dy);
1091 const double wDy = G(qy,dy);
1092 for (
int qx = 0; qx < Q1D; ++qx)
1094 grad[qy][qx][0] += gradX[qx][1] * wy;
1095 grad[qy][qx][1] += gradX[qx][0] * wDy;
1100 for (
int qy = 0; qy < Q1D; ++qy)
1102 for (
int qx = 0; qx < Q1D; ++qx)
1104 const int q = qx + qy * Q1D;
1106 const double O11 = D(q,0,e);
1107 const double O21 = D(q,1,e);
1108 const double O12 = symmetric ? O21 : D(q,2,e);
1109 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1111 const double gradX = grad[qy][qx][0];
1112 const double gradY = grad[qy][qx][1];
1114 grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
1115 grad[qy][qx][1] = (O21 * gradX) + (O22 * gradY);
1118 for (
int qy = 0; qy < Q1D; ++qy)
1120 double gradX[max_D1D][2];
1121 for (
int dx = 0; dx < D1D; ++dx)
1126 for (
int qx = 0; qx < Q1D; ++qx)
1128 const double gX = grad[qy][qx][0];
1129 const double gY = grad[qy][qx][1];
1130 for (
int dx = 0; dx < D1D; ++dx)
1132 const double wx = Bt(dx,qx);
1133 const double wDx = Gt(dx,qx);
1134 gradX[dx][0] += gX * wDx;
1135 gradX[dx][1] += gY * wx;
1138 for (
int dy = 0; dy < D1D; ++dy)
1140 const double wy = Bt(dy,qy);
1141 const double wDy = Gt(dy,qy);
1142 for (
int dx = 0; dx < D1D; ++dx)
1144 Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
1152 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
1153 static void SmemPADiffusionApply2D(
const int NE,
1154 const bool symmetric,
1155 const Array<double> &b_,
1156 const Array<double> &g_,
1163 const int D1D = T_D1D ? T_D1D : d1d;
1164 const int Q1D = T_Q1D ? T_Q1D : q1d;
1165 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1166 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1167 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1168 MFEM_VERIFY(D1D <= MD1,
"");
1169 MFEM_VERIFY(Q1D <= MQ1,
"");
1170 auto b =
Reshape(b_.Read(), Q1D, D1D);
1171 auto g =
Reshape(g_.Read(), Q1D, D1D);
1172 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
1173 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
1174 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
1175 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
1177 const int tidz = MFEM_THREAD_ID(z);
1178 const int D1D = T_D1D ? T_D1D : d1d;
1179 const int Q1D = T_Q1D ? T_Q1D : q1d;
1180 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1181 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1182 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1183 MFEM_SHARED
double sBG[2][MQ1*MD1];
1184 double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
1185 double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
1186 double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
1187 double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
1188 MFEM_SHARED
double Xz[NBZ][MD1][MD1];
1189 MFEM_SHARED
double GD[2][NBZ][MD1][MQ1];
1190 MFEM_SHARED
double GQ[2][NBZ][MD1][MQ1];
1191 double (*X)[MD1] = (double (*)[MD1])(Xz + tidz);
1192 double (*DQ0)[MD1] = (double (*)[MD1])(GD[0] + tidz);
1193 double (*DQ1)[MD1] = (double (*)[MD1])(GD[1] + tidz);
1194 double (*QQ0)[MD1] = (double (*)[MD1])(GQ[0] + tidz);
1195 double (*QQ1)[MD1] = (double (*)[MD1])(GQ[1] + tidz);
1196 MFEM_FOREACH_THREAD(dy,y,D1D)
1198 MFEM_FOREACH_THREAD(dx,x,D1D)
1200 X[dy][dx] = x(dx,dy,e);
1205 MFEM_FOREACH_THREAD(dy,y,D1D)
1207 MFEM_FOREACH_THREAD(q,x,Q1D)
1215 MFEM_FOREACH_THREAD(dy,y,D1D)
1217 MFEM_FOREACH_THREAD(qx,x,Q1D)
1221 for (
int dx = 0; dx < D1D; ++dx)
1223 const double coords = X[dy][dx];
1224 u += B[qx][dx] * coords;
1225 v += G[qx][dx] * coords;
1232 MFEM_FOREACH_THREAD(qy,y,Q1D)
1234 MFEM_FOREACH_THREAD(qx,x,Q1D)
1238 for (
int dy = 0; dy < D1D; ++dy)
1240 u += DQ1[dy][qx] * B[qy][dy];
1241 v += DQ0[dy][qx] * G[qy][dy];
1248 MFEM_FOREACH_THREAD(qy,y,Q1D)
1250 MFEM_FOREACH_THREAD(qx,x,Q1D)
1252 const int q = (qx + ((qy) * Q1D));
1253 const double O11 = D(q,0,e);
1254 const double O21 = D(q,1,e);
1255 const double O12 = symmetric ? O21 : D(q,2,e);
1256 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1257 const double gX = QQ0[qy][qx];
1258 const double gY = QQ1[qy][qx];
1259 QQ0[qy][qx] = (O11 * gX) + (O12 * gY);
1260 QQ1[qy][qx] = (O21 * gX) + (O22 * gY);
1266 MFEM_FOREACH_THREAD(dy,y,D1D)
1268 MFEM_FOREACH_THREAD(q,x,Q1D)
1270 Bt[dy][q] =
b(q,dy);
1271 Gt[dy][q] = g(q,dy);
1276 MFEM_FOREACH_THREAD(qy,y,Q1D)
1278 MFEM_FOREACH_THREAD(dx,x,D1D)
1282 for (
int qx = 0; qx < Q1D; ++qx)
1284 u += Gt[dx][qx] * QQ0[qy][qx];
1285 v += Bt[dx][qx] * QQ1[qy][qx];
1292 MFEM_FOREACH_THREAD(dy,y,D1D)
1294 MFEM_FOREACH_THREAD(dx,x,D1D)
1298 for (
int qy = 0; qy < Q1D; ++qy)
1300 u += DQ0[qy][dx] * Bt[dy][qy];
1301 v += DQ1[qy][dx] * Gt[dy][qy];
1303 Y(dx,dy,e) += (u + v);
1310 template<
int T_D1D = 0,
int T_Q1D = 0>
1311 static void PADiffusionApply3D(
const int NE,
1312 const bool symmetric,
1313 const Array<double> &b,
1314 const Array<double> &g,
1315 const Array<double> &bt,
1316 const Array<double> >,
1320 int d1d = 0,
int q1d = 0)
1322 const int D1D = T_D1D ? T_D1D : d1d;
1323 const int Q1D = T_Q1D ? T_Q1D : q1d;
1324 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
1325 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
1326 auto B =
Reshape(b.Read(), Q1D, D1D);
1327 auto G =
Reshape(g.Read(), Q1D, D1D);
1328 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
1329 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
1330 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
1331 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1332 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1335 const int D1D = T_D1D ? T_D1D : d1d;
1336 const int Q1D = T_Q1D ? T_Q1D : q1d;
1337 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
1338 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
1339 double grad[max_Q1D][max_Q1D][max_Q1D][3];
1340 for (
int qz = 0; qz < Q1D; ++qz)
1342 for (
int qy = 0; qy < Q1D; ++qy)
1344 for (
int qx = 0; qx < Q1D; ++qx)
1346 grad[qz][qy][qx][0] = 0.0;
1347 grad[qz][qy][qx][1] = 0.0;
1348 grad[qz][qy][qx][2] = 0.0;
1352 for (
int dz = 0; dz < D1D; ++dz)
1354 double gradXY[max_Q1D][max_Q1D][3];
1355 for (
int qy = 0; qy < Q1D; ++qy)
1357 for (
int qx = 0; qx < Q1D; ++qx)
1359 gradXY[qy][qx][0] = 0.0;
1360 gradXY[qy][qx][1] = 0.0;
1361 gradXY[qy][qx][2] = 0.0;
1364 for (
int dy = 0; dy < D1D; ++dy)
1366 double gradX[max_Q1D][2];
1367 for (
int qx = 0; qx < Q1D; ++qx)
1372 for (
int dx = 0; dx < D1D; ++dx)
1374 const double s = X(dx,dy,dz,e);
1375 for (
int qx = 0; qx < Q1D; ++qx)
1377 gradX[qx][0] += s * B(qx,dx);
1378 gradX[qx][1] += s * G(qx,dx);
1381 for (
int qy = 0; qy < Q1D; ++qy)
1383 const double wy = B(qy,dy);
1384 const double wDy = G(qy,dy);
1385 for (
int qx = 0; qx < Q1D; ++qx)
1387 const double wx = gradX[qx][0];
1388 const double wDx = gradX[qx][1];
1389 gradXY[qy][qx][0] += wDx * wy;
1390 gradXY[qy][qx][1] += wx * wDy;
1391 gradXY[qy][qx][2] += wx * wy;
1395 for (
int qz = 0; qz < Q1D; ++qz)
1397 const double wz = B(qz,dz);
1398 const double wDz = G(qz,dz);
1399 for (
int qy = 0; qy < Q1D; ++qy)
1401 for (
int qx = 0; qx < Q1D; ++qx)
1403 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
1404 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
1405 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
1411 for (
int qz = 0; qz < Q1D; ++qz)
1413 for (
int qy = 0; qy < Q1D; ++qy)
1415 for (
int qx = 0; qx < Q1D; ++qx)
1417 const int q = qx + (qy + qz * Q1D) * Q1D;
1418 const double O11 = D(q,0,e);
1419 const double O12 = D(q,1,e);
1420 const double O13 = D(q,2,e);
1421 const double O21 = symmetric ? O12 : D(q,3,e);
1422 const double O22 = symmetric ? D(q,3,e) : D(q,4,e);
1423 const double O23 = symmetric ? D(q,4,e) : D(q,5,e);
1424 const double O31 = symmetric ? O13 : D(q,6,e);
1425 const double O32 = symmetric ? O23 : D(q,7,e);
1426 const double O33 = symmetric ? D(q,5,e) : D(q,8,e);
1427 const double gradX = grad[qz][qy][qx][0];
1428 const double gradY = grad[qz][qy][qx][1];
1429 const double gradZ = grad[qz][qy][qx][2];
1430 grad[qz][qy][qx][0] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
1431 grad[qz][qy][qx][1] = (O21*gradX)+(O22*gradY)+(O23*gradZ);
1432 grad[qz][qy][qx][2] = (O31*gradX)+(O32*gradY)+(O33*gradZ);
1436 for (
int qz = 0; qz < Q1D; ++qz)
1438 double gradXY[max_D1D][max_D1D][3];
1439 for (
int dy = 0; dy < D1D; ++dy)
1441 for (
int dx = 0; dx < D1D; ++dx)
1443 gradXY[dy][dx][0] = 0;
1444 gradXY[dy][dx][1] = 0;
1445 gradXY[dy][dx][2] = 0;
1448 for (
int qy = 0; qy < Q1D; ++qy)
1450 double gradX[max_D1D][3];
1451 for (
int dx = 0; dx < D1D; ++dx)
1457 for (
int qx = 0; qx < Q1D; ++qx)
1459 const double gX = grad[qz][qy][qx][0];
1460 const double gY = grad[qz][qy][qx][1];
1461 const double gZ = grad[qz][qy][qx][2];
1462 for (
int dx = 0; dx < D1D; ++dx)
1464 const double wx = Bt(dx,qx);
1465 const double wDx = Gt(dx,qx);
1466 gradX[dx][0] += gX * wDx;
1467 gradX[dx][1] += gY * wx;
1468 gradX[dx][2] += gZ * wx;
1471 for (
int dy = 0; dy < D1D; ++dy)
1473 const double wy = Bt(dy,qy);
1474 const double wDy = Gt(dy,qy);
1475 for (
int dx = 0; dx < D1D; ++dx)
1477 gradXY[dy][dx][0] += gradX[dx][0] * wy;
1478 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
1479 gradXY[dy][dx][2] += gradX[dx][2] * wy;
1483 for (
int dz = 0; dz < D1D; ++dz)
1485 const double wz = Bt(dz,qz);
1486 const double wDz = Gt(dz,qz);
1487 for (
int dy = 0; dy < D1D; ++dy)
1489 for (
int dx = 0; dx < D1D; ++dx)
1492 ((gradXY[dy][dx][0] * wz) +
1493 (gradXY[dy][dx][1] * wz) +
1494 (gradXY[dy][dx][2] * wDz));
1504 static MFEM_HOST_DEVICE
inline int qi(
const int q,
const int d,
const int Q)
1506 return (q<=d) ? q : Q-1-q;
1509 static MFEM_HOST_DEVICE
inline int dj(
const int q,
const int d,
const int D)
1511 return (q<=d) ? d : D-1-d;
1514 static MFEM_HOST_DEVICE
inline int qk(
const int q,
const int d,
const int Q)
1516 return (q<=d) ? Q-1-q : q;
1519 static MFEM_HOST_DEVICE
inline int dl(
const int q,
const int d,
const int D)
1521 return (q<=d) ? D-1-d : d;
1524 static MFEM_HOST_DEVICE
inline double sign(
const int q,
const int d)
1526 return (q<=d) ? -1.0 : 1.0;
1529 template<
int T_D1D = 0,
int T_Q1D = 0>
1530 static void SmemPADiffusionApply3D(
const int NE,
1531 const bool symmetric,
1532 const Array<double> &b_,
1533 const Array<double> &g_,
1540 const int D1D = T_D1D ? T_D1D : d1d;
1541 const int Q1D = T_Q1D ? T_Q1D : q1d;
1542 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
1543 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
1544 MFEM_VERIFY(D1D <= M1D,
"");
1545 MFEM_VERIFY(Q1D <= M1Q,
"");
1546 auto b =
Reshape(b_.Read(), Q1D, D1D);
1547 auto g =
Reshape(g_.Read(), Q1D, D1D);
1548 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1549 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1550 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1551 MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
1553 const int D1D = T_D1D ? T_D1D : d1d;
1554 const int Q1D = T_Q1D ? T_Q1D : q1d;
1555 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1556 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1557 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
1558 MFEM_SHARED
double sBG[MQ1*MD1];
1559 double (*B)[MD1] = (double (*)[MD1]) sBG;
1560 double (*G)[MD1] = (double (*)[MD1]) sBG;
1561 double (*Bt)[MQ1] = (double (*)[MQ1]) sBG;
1562 double (*Gt)[MQ1] = (double (*)[MQ1]) sBG;
1563 MFEM_SHARED
double sm0[3][MDQ*MDQ*MDQ];
1564 MFEM_SHARED
double sm1[3][MDQ*MDQ*MDQ];
1565 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1566 double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
1567 double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
1568 double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
1569 double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
1570 double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
1571 double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
1572 double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
1573 double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
1574 double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
1575 double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
1576 double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
1577 double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
1578 double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
1579 double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1580 MFEM_FOREACH_THREAD(dy,y,D1D)
1582 MFEM_FOREACH_THREAD(dx,x,D1D)
1585 for (
int dz = 0; dz < D1D; ++dz)
1587 X[dz][dy][dx] = x(dx,dy,dz,e);
1590 MFEM_FOREACH_THREAD(qx,x,Q1D)
1592 const int i = qi(qx,dy,Q1D);
1593 const int j = dj(qx,dy,D1D);
1594 const int k = qk(qx,dy,Q1D);
1595 const int l = dl(qx,dy,D1D);
1597 G[k][l] = g(qx,dy) * sign(qx,dy);
1601 MFEM_FOREACH_THREAD(dy,y,D1D)
1603 MFEM_FOREACH_THREAD(qx,x,Q1D)
1605 double u[D1D], v[D1D];
1607 for (
int dz = 0; dz < D1D; dz++) { u[dz] = v[dz] = 0.0; }
1609 for (
int dx = 0; dx < D1D; ++dx)
1611 const int i = qi(qx,dx,Q1D);
1612 const int j = dj(qx,dx,D1D);
1613 const int k = qk(qx,dx,Q1D);
1614 const int l = dl(qx,dx,D1D);
1615 const double s = sign(qx,dx);
1617 for (
int dz = 0; dz < D1D; ++dz)
1619 const double coords = X[dz][dy][dx];
1620 u[dz] += coords * B[i][j];
1621 v[dz] += coords * G[k][l] * s;
1625 for (
int dz = 0; dz < D1D; ++dz)
1627 DDQ0[dz][dy][qx] = u[dz];
1628 DDQ1[dz][dy][qx] = v[dz];
1633 MFEM_FOREACH_THREAD(qy,y,Q1D)
1635 MFEM_FOREACH_THREAD(qx,x,Q1D)
1637 double u[D1D], v[D1D], w[D1D];
1639 for (
int dz = 0; dz < D1D; dz++) { u[dz] = v[dz] = w[dz] = 0.0; }
1641 for (
int dy = 0; dy < D1D; ++dy)
1643 const int i = qi(qy,dy,Q1D);
1644 const int j = dj(qy,dy,D1D);
1645 const int k = qk(qy,dy,Q1D);
1646 const int l = dl(qy,dy,D1D);
1647 const double s = sign(qy,dy);
1649 for (
int dz = 0; dz < D1D; dz++)
1651 u[dz] += DDQ1[dz][dy][qx] * B[i][j];
1652 v[dz] += DDQ0[dz][dy][qx] * G[k][l] * s;
1653 w[dz] += DDQ0[dz][dy][qx] * B[i][j];
1657 for (
int dz = 0; dz < D1D; dz++)
1659 DQQ0[dz][qy][qx] = u[dz];
1660 DQQ1[dz][qy][qx] = v[dz];
1661 DQQ2[dz][qy][qx] = w[dz];
1666 MFEM_FOREACH_THREAD(qy,y,Q1D)
1668 MFEM_FOREACH_THREAD(qx,x,Q1D)
1670 double u[Q1D], v[Q1D], w[Q1D];
1672 for (
int qz = 0; qz < Q1D; qz++) { u[qz] = v[qz] = w[qz] = 0.0; }
1674 for (
int dz = 0; dz < D1D; ++dz)
1677 for (
int qz = 0; qz < Q1D; qz++)
1679 const int i = qi(qz,dz,Q1D);
1680 const int j = dj(qz,dz,D1D);
1681 const int k = qk(qz,dz,Q1D);
1682 const int l = dl(qz,dz,D1D);
1683 const double s = sign(qz,dz);
1684 u[qz] += DQQ0[dz][qy][qx] * B[i][j];
1685 v[qz] += DQQ1[dz][qy][qx] * B[i][j];
1686 w[qz] += DQQ2[dz][qy][qx] * G[k][l] * s;
1690 for (
int qz = 0; qz < Q1D; qz++)
1692 const double O11 = d(qx,qy,qz,0,e);
1693 const double O12 = d(qx,qy,qz,1,e);
1694 const double O13 = d(qx,qy,qz,2,e);
1695 const double O21 = symmetric ? O12 : d(qx,qy,qz,3,e);
1696 const double O22 = symmetric ? d(qx,qy,qz,3,e) : d(qx,qy,qz,4,e);
1697 const double O23 = symmetric ? d(qx,qy,qz,4,e) : d(qx,qy,qz,5,e);
1698 const double O31 = symmetric ? O13 : d(qx,qy,qz,6,e);
1699 const double O32 = symmetric ? O23 : d(qx,qy,qz,7,e);
1700 const double O33 = symmetric ? d(qx,qy,qz,5,e) : d(qx,qy,qz,8,e);
1701 const double gX = u[qz];
1702 const double gY = v[qz];
1703 const double gZ = w[qz];
1704 QQQ0[qz][qy][qx] = (O11*gX) + (O12*gY) + (O13*gZ);
1705 QQQ1[qz][qy][qx] = (O21*gX) + (O22*gY) + (O23*gZ);
1706 QQQ2[qz][qy][qx] = (O31*gX) + (O32*gY) + (O33*gZ);
1711 MFEM_FOREACH_THREAD(d,y,D1D)
1713 MFEM_FOREACH_THREAD(q,x,Q1D)
1715 const int i = qi(q,d,Q1D);
1716 const int j = dj(q,d,D1D);
1717 const int k = qk(q,d,Q1D);
1718 const int l = dl(q,d,D1D);
1720 Gt[l][k] = g(q,d) * sign(q,d);
1724 MFEM_FOREACH_THREAD(qy,y,Q1D)
1726 MFEM_FOREACH_THREAD(dx,x,D1D)
1728 double u[Q1D], v[Q1D], w[Q1D];
1730 for (
int qz = 0; qz < Q1D; ++qz) { u[qz] = v[qz] = w[qz] = 0.0; }
1732 for (
int qx = 0; qx < Q1D; ++qx)
1734 const int i = qi(qx,dx,Q1D);
1735 const int j = dj(qx,dx,D1D);
1736 const int k = qk(qx,dx,Q1D);
1737 const int l = dl(qx,dx,D1D);
1738 const double s = sign(qx,dx);
1740 for (
int qz = 0; qz < Q1D; ++qz)
1742 u[qz] += QQQ0[qz][qy][qx] * Gt[l][k] * s;
1743 v[qz] += QQQ1[qz][qy][qx] * Bt[j][i];
1744 w[qz] += QQQ2[qz][qy][qx] * Bt[j][i];
1748 for (
int qz = 0; qz < Q1D; ++qz)
1750 QQD0[qz][qy][dx] = u[qz];
1751 QQD1[qz][qy][dx] = v[qz];
1752 QQD2[qz][qy][dx] = w[qz];
1757 MFEM_FOREACH_THREAD(dy,y,D1D)
1759 MFEM_FOREACH_THREAD(dx,x,D1D)
1761 double u[Q1D], v[Q1D], w[Q1D];
1763 for (
int qz = 0; qz < Q1D; ++qz) { u[qz] = v[qz] = w[qz] = 0.0; }
1765 for (
int qy = 0; qy < Q1D; ++qy)
1767 const int i = qi(qy,dy,Q1D);
1768 const int j = dj(qy,dy,D1D);
1769 const int k = qk(qy,dy,Q1D);
1770 const int l = dl(qy,dy,D1D);
1771 const double s = sign(qy,dy);
1773 for (
int qz = 0; qz < Q1D; ++qz)
1775 u[qz] += QQD0[qz][qy][dx] * Bt[j][i];
1776 v[qz] += QQD1[qz][qy][dx] * Gt[l][k] * s;
1777 w[qz] += QQD2[qz][qy][dx] * Bt[j][i];
1781 for (
int qz = 0; qz < Q1D; ++qz)
1783 QDD0[qz][dy][dx] = u[qz];
1784 QDD1[qz][dy][dx] = v[qz];
1785 QDD2[qz][dy][dx] = w[qz];
1790 MFEM_FOREACH_THREAD(dy,y,D1D)
1792 MFEM_FOREACH_THREAD(dx,x,D1D)
1794 double u[D1D], v[D1D], w[D1D];
1796 for (
int dz = 0; dz < D1D; ++dz) { u[dz] = v[dz] = w[dz] = 0.0; }
1798 for (
int qz = 0; qz < Q1D; ++qz)
1801 for (
int dz = 0; dz < D1D; ++dz)
1803 const int i = qi(qz,dz,Q1D);
1804 const int j = dj(qz,dz,D1D);
1805 const int k = qk(qz,dz,Q1D);
1806 const int l = dl(qz,dz,D1D);
1807 const double s = sign(qz,dz);
1808 u[dz] += QDD0[qz][dy][dx] * Bt[j][i];
1809 v[dz] += QDD1[qz][dy][dx] * Bt[j][i];
1810 w[dz] += QDD2[qz][dy][dx] * Gt[l][k] * s;
1814 for (
int dz = 0; dz < D1D; ++dz)
1816 y(dx,dy,dz,e) += (u[dz] + v[dz] + w[dz]);
1823 static void PADiffusionApply(
const int dim,
1828 const Array<double> &B,
1829 const Array<double> &G,
1830 const Array<double> &Bt,
1831 const Array<double> &Gt,
1836 #ifdef MFEM_USE_OCCA
1841 OccaPADiffusionApply2D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1846 OccaPADiffusionApply3D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1849 MFEM_ABORT(
"OCCA PADiffusionApply unknown kernel!");
1851 #endif // MFEM_USE_OCCA
1852 const int ID = (D1D << 4) | Q1D;
1858 case 0x22:
return SmemPADiffusionApply2D<2,2,16>(NE,symm,B,G,D,X,Y);
1859 case 0x33:
return SmemPADiffusionApply2D<3,3,16>(NE,symm,B,G,D,X,Y);
1860 case 0x44:
return SmemPADiffusionApply2D<4,4,8>(NE,symm,B,G,D,X,Y);
1861 case 0x55:
return SmemPADiffusionApply2D<5,5,8>(NE,symm,B,G,D,X,Y);
1862 case 0x66:
return SmemPADiffusionApply2D<6,6,4>(NE,symm,B,G,D,X,Y);
1863 case 0x77:
return SmemPADiffusionApply2D<7,7,4>(NE,symm,B,G,D,X,Y);
1864 case 0x88:
return SmemPADiffusionApply2D<8,8,2>(NE,symm,B,G,D,X,Y);
1865 case 0x99:
return SmemPADiffusionApply2D<9,9,2>(NE,symm,B,G,D,X,Y);
1866 default:
return PADiffusionApply2D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1874 case 0x23:
return SmemPADiffusionApply3D<2,3>(NE,symm,B,G,D,X,Y);
1875 case 0x34:
return SmemPADiffusionApply3D<3,4>(NE,symm,B,G,D,X,Y);
1876 case 0x45:
return SmemPADiffusionApply3D<4,5>(NE,symm,B,G,D,X,Y);
1877 case 0x46:
return SmemPADiffusionApply3D<4,6>(NE,symm,B,G,D,X,Y);
1878 case 0x56:
return SmemPADiffusionApply3D<5,6>(NE,symm,B,G,D,X,Y);
1879 case 0x58:
return SmemPADiffusionApply3D<5,8>(NE,symm,B,G,D,X,Y);
1880 case 0x67:
return SmemPADiffusionApply3D<6,7>(NE,symm,B,G,D,X,Y);
1881 case 0x78:
return SmemPADiffusionApply3D<7,8>(NE,symm,B,G,D,X,Y);
1882 case 0x89:
return SmemPADiffusionApply3D<8,9>(NE,symm,B,G,D,X,Y);
1883 default:
return PADiffusionApply3D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1886 MFEM_ABORT(
"Unknown kernel.");
1892 if (DeviceCanUseCeed())
1894 CeedAddMult(ceedDataPtr, x, y);
1898 PADiffusionApply(dim, dofs1D, quad1D, ne, symmetric,
1899 maps->B, maps->G, maps->Bt, maps->Gt,
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
int Size() const
Return the logical size of the array.
int GetDim() const
Returns the reference space dimension for the finite element.
Class for an integration rule - an Array of IntegrationPoint.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
A coefficient that is constant across space and time.
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
void SetSize(int s)
Resize the vector to size s.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Data type dense matrix using column-major storage.
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
int GetNE() const
Returns number of elements in the mesh.
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Mesh * GetMesh() const
Returns the mesh.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
void PADiffusionSetup2D< 3 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
void PADiffusionSetup2D< 2 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
int SpaceDimension() const
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
double p(const Vector &x, double t)
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
void PADiffusionSetup3D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
std::pair< int, int > occa_id_t
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Class representing a function through its values (scalar or vector) at quadrature points...