12 #include "../general/forall.hpp" 27 static void OccaPADiffusionSetup2D(
const int D1D,
30 const Array<double> &W,
35 occa::properties props;
36 props[
"defines/D1D"] = D1D;
37 props[
"defines/Q1D"] = Q1D;
42 const bool const_c = C.Size() == 1;
43 const occa_id_t id = std::make_pair(D1D,Q1D);
45 if (OccaDiffSetup2D_ker.find(
id) == OccaDiffSetup2D_ker.end())
47 const occa::kernel DiffusionSetup2D =
49 "DiffusionSetup2D", props);
50 OccaDiffSetup2D_ker.emplace(
id, DiffusionSetup2D);
52 OccaDiffSetup2D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
55 static void OccaPADiffusionSetup3D(
const int D1D,
58 const Array<double> &W,
63 occa::properties props;
64 props[
"defines/D1D"] = D1D;
65 props[
"defines/Q1D"] = Q1D;
70 const bool const_c = C.Size() == 1;
71 const occa_id_t id = std::make_pair(D1D,Q1D);
73 if (OccaDiffSetup3D_ker.find(
id) == OccaDiffSetup3D_ker.end())
75 const occa::kernel DiffusionSetup3D =
77 "DiffusionSetup3D", props);
78 OccaDiffSetup3D_ker.emplace(
id, DiffusionSetup3D);
80 OccaDiffSetup3D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
82 #endif // MFEM_USE_OCCA 93 const bool symmetric = (coeffDim != 4);
94 const bool const_c = c.Size() == 1;
95 MFEM_VERIFY(coeffDim < 3 ||
96 !const_c,
"Constant matrix coefficient not supported");
97 const auto W =
Reshape(w.Read(), Q1D,Q1D);
98 const auto J =
Reshape(j.Read(), Q1D,Q1D,2,2,NE);
99 const auto C = const_c ?
Reshape(c.Read(), 1,1,1,1) :
100 Reshape(c.Read(), coeffDim,Q1D,Q1D,NE);
101 auto D =
Reshape(d.Write(), Q1D,Q1D, symmetric ? 3 : 4, NE);
102 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
104 MFEM_FOREACH_THREAD(qx,x,Q1D)
106 MFEM_FOREACH_THREAD(qy,y,Q1D)
108 const double J11 = J(qx,qy,0,0,e);
109 const double J21 = J(qx,qy,1,0,e);
110 const double J12 = J(qx,qy,0,1,e);
111 const double J22 = J(qx,qy,1,1,e);
112 const double w_detJ = W(qx,qy) / ((J11*J22)-(J21*J12));
113 if (coeffDim == 3 || coeffDim == 4)
116 const double M11 = C(0,qx,qy,e);
117 const double M12 = C(1,qx,qy,e);
118 const double M21 = symmetric ? M12 : C(2,qx,qy,e);
119 const double M22 = symmetric ? C(2,qx,qy,e) : C(3,qx,qy,e);
120 const double R11 = M11*J22 - M12*J12;
121 const double R21 = M21*J22 - M22*J12;
122 const double R12 = -M11*J21 + M12*J11;
123 const double R22 = -M21*J21 + M22*J11;
126 D(qx,qy,0,e) = w_detJ * ( J22*R11 - J12*R21);
127 D(qx,qy,1,e) = w_detJ * (-J21*R11 + J11*R21);
128 D(qx,qy,2,e) = w_detJ * (symmetric ? (-J21*R12 + J11*R22) :
129 (J22*R12 - J12*R22));
132 D(qx,qy,3,e) = w_detJ * (-J21*R12 + J11*R22);
137 const double C1 = const_c ? C(0,0,0,0) : C(0,qx,qy,e);
138 const double C2 = const_c ? C(0,0,0,0) :
139 (coeffDim == 2 ? C(1,qx,qy,e) : C(0,qx,qy,e));
141 D(qx,qy,0,e) = w_detJ * (C2*J12*J12 + C1*J22*J22);
142 D(qx,qy,1,e) = -w_detJ * (C2*J12*J11 + C1*J22*J21);
143 D(qx,qy,2,e) = w_detJ * (C2*J11*J11 + C1*J21*J21);
160 MFEM_VERIFY(coeffDim == 1,
"Matrix and vector coefficients not supported");
161 constexpr
int DIM = 2;
162 constexpr
int SDIM = 3;
163 const bool const_c = c.Size() == 1;
164 const auto W =
Reshape(w.Read(), Q1D,Q1D);
166 const auto C = const_c ?
Reshape(c.Read(), 1,1,1) :
168 auto D =
Reshape(d.Write(), Q1D,Q1D, 3, NE);
169 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
171 MFEM_FOREACH_THREAD(qx,x,Q1D)
173 MFEM_FOREACH_THREAD(qy,y,Q1D)
175 const double wq = W(qx,qy);
176 const double J11 = J(qx,qy,0,0,e);
177 const double J21 = J(qx,qy,1,0,e);
178 const double J31 = J(qx,qy,2,0,e);
179 const double J12 = J(qx,qy,0,1,e);
180 const double J22 = J(qx,qy,1,1,e);
181 const double J32 = J(qx,qy,2,1,e);
182 const double E = J11*J11 + J21*J21 + J31*J31;
183 const double G = J12*J12 + J22*J22 + J32*J32;
184 const double F = J11*J12 + J21*J22 + J31*J32;
185 const double iw = 1.0 / sqrt(E*G - F*F);
186 const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
187 const double alpha = wq * coeff * iw;
188 D(qx,qy,0,e) =
alpha * G;
189 D(qx,qy,1,e) = -
alpha * F;
190 D(qx,qy,2,e) =
alpha * E;
205 const bool symmetric = (coeffDim != 9);
206 const bool const_c = c.
Size() == 1;
207 MFEM_VERIFY(coeffDim < 6 ||
208 !const_c,
"Constant matrix coefficient not supported");
210 const auto J =
Reshape(j.
Read(), Q1D,Q1D,Q1D,3,3,NE);
211 const auto C = const_c ?
Reshape(c.
Read(), 1,1,1,1,1) :
213 auto D =
Reshape(d.
Write(), Q1D,Q1D,Q1D, symmetric ? 6 : 9, NE);
214 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
216 MFEM_FOREACH_THREAD(qx,x,Q1D)
218 MFEM_FOREACH_THREAD(qy,y,Q1D)
220 MFEM_FOREACH_THREAD(qz,z,Q1D)
222 const double J11 = J(qx,qy,qz,0,0,e);
223 const double J21 = J(qx,qy,qz,1,0,e);
224 const double J31 = J(qx,qy,qz,2,0,e);
225 const double J12 = J(qx,qy,qz,0,1,e);
226 const double J22 = J(qx,qy,qz,1,1,e);
227 const double J32 = J(qx,qy,qz,2,1,e);
228 const double J13 = J(qx,qy,qz,0,2,e);
229 const double J23 = J(qx,qy,qz,1,2,e);
230 const double J33 = J(qx,qy,qz,2,2,e);
231 const double detJ = J11 * (J22 * J33 - J32 * J23) -
232 J21 * (J12 * J33 - J32 * J13) +
233 J31 * (J12 * J23 - J22 * J13);
234 const double w_detJ = W(qx,qy,qz) / detJ;
236 const double A11 = (J22 * J33) - (J23 * J32);
237 const double A12 = (J32 * J13) - (J12 * J33);
238 const double A13 = (J12 * J23) - (J22 * J13);
239 const double A21 = (J31 * J23) - (J21 * J33);
240 const double A22 = (J11 * J33) - (J13 * J31);
241 const double A23 = (J21 * J13) - (J11 * J23);
242 const double A31 = (J21 * J32) - (J31 * J22);
243 const double A32 = (J31 * J12) - (J11 * J32);
244 const double A33 = (J11 * J22) - (J12 * J21);
246 if (coeffDim == 6 || coeffDim == 9)
249 const double M11 = C(0, qx,qy,qz, e);
250 const double M12 = C(1, qx,qy,qz, e);
251 const double M13 = C(2, qx,qy,qz, e);
252 const double M21 = (!symmetric) ? C(3, qx,qy,qz, e) : M12;
253 const double M22 = (!symmetric) ? C(4, qx,qy,qz, e) : C(3, qx,qy,qz, e);
254 const double M23 = (!symmetric) ? C(5, qx,qy,qz, e) : C(4, qx,qy,qz, e);
255 const double M31 = (!symmetric) ? C(6, qx,qy,qz, e) : M13;
256 const double M32 = (!symmetric) ? C(7, qx,qy,qz, e) : M23;
257 const double M33 = (!symmetric) ? C(8, qx,qy,qz, e) : C(5, qx,qy,qz, e);
259 const double R11 = M11*A11 + M12*A12 + M13*A13;
260 const double R12 = M11*A21 + M12*A22 + M13*A23;
261 const double R13 = M11*A31 + M12*A32 + M13*A33;
262 const double R21 = M21*A11 + M22*A12 + M23*A13;
263 const double R22 = M21*A21 + M22*A22 + M23*A23;
264 const double R23 = M21*A31 + M22*A32 + M23*A33;
265 const double R31 = M31*A11 + M32*A12 + M33*A13;
266 const double R32 = M31*A21 + M32*A22 + M33*A23;
267 const double R33 = M31*A31 + M32*A32 + M33*A33;
270 D(qx,qy,qz,0,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31);
271 const double D12 = w_detJ * (A11*R12 + A12*R22 + A13*R32);
272 D(qx,qy,qz,1,e) = D12;
273 D(qx,qy,qz,2,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33);
275 const double D22 = w_detJ * (A21*R12 + A22*R22 + A23*R32);
276 const double D23 = w_detJ * (A21*R13 + A22*R23 + A23*R33);
278 const double D33 = w_detJ * (A31*R13 + A32*R23 + A33*R33);
280 D(qx,qy,qz,4,e) = symmetric ? D23 : D22;
281 D(qx,qy,qz,5,e) = symmetric ? D33 : D23;
285 D(qx,qy,qz,3,e) = D22;
289 D(qx,qy,qz,3,e) = w_detJ * (A21*R11 + A22*R21 + A23*R31);
290 D(qx,qy,qz,6,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31);
291 D(qx,qy,qz,7,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32);
292 D(qx,qy,qz,8,e) = D33;
297 const double C1 = const_c ? C(0,0,0,0,0) : C(0,qx,qy,qz,e);
298 const double C2 = const_c ? C(0,0,0,0,0) :
299 (coeffDim == 3 ? C(1,qx,qy,qz,e) : C(0,qx,qy,qz,e));
300 const double C3 = const_c ? C(0,0,0,0,0) :
301 (coeffDim == 3 ? C(2,qx,qy,qz,e) : C(0,qx,qy,qz,e));
304 D(qx,qy,qz,0,e) = w_detJ * (C1*A11*A11 + C2*A12*A12 + C3*A13*A13);
305 D(qx,qy,qz,1,e) = w_detJ * (C1*A11*A21 + C2*A12*A22 + C3*A13*A23);
306 D(qx,qy,qz,2,e) = w_detJ * (C1*A11*A31 + C2*A12*A32 + C3*A13*A33);
307 D(qx,qy,qz,3,e) = w_detJ * (C1*A21*A21 + C2*A22*A22 + C3*A23*A23);
308 D(qx,qy,qz,4,e) = w_detJ * (C1*A21*A31 + C2*A22*A32 + C3*A23*A33);
309 D(qx,qy,qz,5,e) = w_detJ * (C1*A31*A31 + C2*A32*A32 + C3*A33*A33);
317 static void PADiffusionSetup(
const int dim,
323 const Array<double> &W,
328 if (
dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADiffusionSetup"); }
334 OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
338 MFEM_CONTRACT_VAR(D1D);
339 #endif // MFEM_USE_OCCA 348 OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
351 #endif // MFEM_USE_OCCA 358 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
359 Device::GetDeviceMemoryType() : pa_mt;
363 if (mesh->
GetNE() == 0) {
return; }
369 MFEM_VERIFY(!VQ && !MQ,
370 "Only scalar coefficient supported for DiffusionIntegrator" 384 const int dims = el.
GetDim();
385 const int symmDims = (dims * (dims + 1)) / 2;
399 else if (VQ) { coeff.
Project(*VQ); }
400 else if (Q) { coeff.
Project(*Q); }
403 const int coeff_dim = coeff.
GetVDim();
404 symmetric = (coeff_dim != dims*dims);
405 const int pa_size = symmetric ? symmDims : dims*dims;
407 pa_data.SetSize(pa_size * nq * ne, mt);
408 PADiffusionSetup(
dim, sdim, dofs1D, quad1D, coeff_dim, ne, ir->
GetWeights(),
409 geom->J, coeff, pa_data);
412 template<
int T_D1D = 0,
int T_Q1D = 0>
413 static void PADiffusionDiagonal2D(
const int NE,
414 const bool symmetric,
422 const int D1D = T_D1D ? T_D1D : d1d;
423 const int Q1D = T_Q1D ? T_Q1D : q1d;
424 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
425 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
426 auto B =
Reshape(
b.Read(), Q1D, D1D);
430 auto D =
Reshape(d.
Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
434 const int D1D = T_D1D ? T_D1D : d1d;
435 const int Q1D = T_Q1D ? T_Q1D : q1d;
436 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
437 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
439 double QD0[MQ1][MD1];
440 double QD1[MQ1][MD1];
441 double QD2[MQ1][MD1];
442 for (
int qx = 0; qx < Q1D; ++qx)
444 for (
int dy = 0; dy < D1D; ++dy)
449 for (
int qy = 0; qy < Q1D; ++qy)
451 const int q = qx + qy * Q1D;
452 const double D00 = D(q,0,e);
453 const double D10 = D(q,1,e);
454 const double D01 = symmetric ? D10 : D(q,2,e);
455 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
456 QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D00;
457 QD1[qx][dy] += B(qy, dy) * G(qy, dy) * (D01 + D10);
458 QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D11;
462 for (
int dy = 0; dy < D1D; ++dy)
464 for (
int dx = 0; dx < D1D; ++dx)
466 for (
int qx = 0; qx < Q1D; ++qx)
468 Y(dx,dy,e) += G(qx, dx) * G(qx, dx) * QD0[qx][dy];
469 Y(dx,dy,e) += G(qx, dx) * B(qx, dx) * QD1[qx][dy];
470 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD2[qx][dy];
478 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
479 static void SmemPADiffusionDiagonal2D(
const int NE,
480 const bool symmetric,
481 const Array<double> &b_,
482 const Array<double> &g_,
488 const int D1D = T_D1D ? T_D1D : d1d;
489 const int Q1D = T_Q1D ? T_Q1D : q1d;
490 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
491 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
492 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
493 MFEM_VERIFY(D1D <= MD1,
"");
494 MFEM_VERIFY(Q1D <= MQ1,
"");
495 auto b =
Reshape(b_.Read(), Q1D, D1D);
496 auto g =
Reshape(g_.Read(), Q1D, D1D);
497 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
498 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
499 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
501 const int tidz = MFEM_THREAD_ID(z);
502 const int D1D = T_D1D ? T_D1D : d1d;
503 const int Q1D = T_Q1D ? T_Q1D : q1d;
504 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
505 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
506 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
507 MFEM_SHARED
double BG[2][MQ1*MD1];
508 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
509 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
510 MFEM_SHARED
double QD[3][NBZ][MD1][MQ1];
511 double (*QD0)[MD1] = (double (*)[MD1])(QD[0] + tidz);
512 double (*QD1)[MD1] = (double (*)[MD1])(QD[1] + tidz);
513 double (*QD2)[MD1] = (double (*)[MD1])(QD[2] + tidz);
516 MFEM_FOREACH_THREAD(d,y,D1D)
518 MFEM_FOREACH_THREAD(q,x,Q1D)
526 MFEM_FOREACH_THREAD(qx,x,Q1D)
528 MFEM_FOREACH_THREAD(dy,y,D1D)
533 for (
int qy = 0; qy < Q1D; ++qy)
535 const int q = qx + qy * Q1D;
536 const double D00 = D(q,0,e);
537 const double D10 = D(q,1,e);
538 const double D01 = symmetric ? D10 : D(q,2,e);
539 const double D11 = symmetric ? D(q,2,e) : D(q,3,e);
540 const double By = B[qy][dy];
541 const double Gy = G[qy][dy];
542 const double BBy = By * By;
543 const double BGy = By * Gy;
544 const double GGy = Gy * Gy;
545 QD0[qx][dy] += BBy * D00;
546 QD1[qx][dy] += BGy * (D01 + D10);
547 QD2[qx][dy] += GGy * D11;
552 MFEM_FOREACH_THREAD(dy,y,D1D)
554 MFEM_FOREACH_THREAD(dx,x,D1D)
556 for (
int qx = 0; qx < Q1D; ++qx)
558 const double Bx = B[qx][dx];
559 const double Gx = G[qx][dx];
560 const double BBx = Bx * Bx;
561 const double BGx = Bx * Gx;
562 const double GGx = Gx * Gx;
563 Y(dx,dy,e) += GGx * QD0[qx][dy];
564 Y(dx,dy,e) += BGx * QD1[qx][dy];
565 Y(dx,dy,e) += BBx * QD2[qx][dy];
572 template<
int T_D1D = 0,
int T_Q1D = 0>
573 static void PADiffusionDiagonal3D(
const int NE,
574 const bool symmetric,
575 const Array<double> &
b,
576 const Array<double> &g,
582 constexpr
int DIM = 3;
583 const int D1D = T_D1D ? T_D1D : d1d;
584 const int Q1D = T_Q1D ? T_Q1D : q1d;
585 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
586 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
587 MFEM_VERIFY(D1D <= MD1,
"");
588 MFEM_VERIFY(Q1D <= MQ1,
"");
589 auto B =
Reshape(
b.Read(), Q1D, D1D);
590 auto G =
Reshape(g.Read(), Q1D, D1D);
591 auto Q =
Reshape(d.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
592 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
595 const int D1D = T_D1D ? T_D1D : d1d;
596 const int Q1D = T_Q1D ? T_Q1D : q1d;
597 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
598 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
599 double QQD[MQ1][MQ1][MD1];
600 double QDD[MQ1][MD1][MD1];
601 for (
int i = 0; i <
DIM; ++i)
603 for (
int j = 0; j <
DIM; ++j)
606 for (
int qx = 0; qx < Q1D; ++qx)
608 for (
int qy = 0; qy < Q1D; ++qy)
610 for (
int dz = 0; dz < D1D; ++dz)
612 QQD[qx][qy][dz] = 0.0;
613 for (
int qz = 0; qz < Q1D; ++qz)
615 const int q = qx + (qy + qz * Q1D) * Q1D;
616 const int ksym = j >= i ?
617 3 - (3-i)*(2-i)/2 + j:
618 3 - (3-j)*(2-j)/2 + i;
619 const int k = symmetric ? ksym : (i*
DIM) + j;
620 const double O = Q(q,k,e);
621 const double Bz = B(qz,dz);
622 const double Gz = G(qz,dz);
623 const double L = i==2 ? Gz : Bz;
624 const double R = j==2 ? Gz : Bz;
625 QQD[qx][qy][dz] += L * O * R;
631 for (
int qx = 0; qx < Q1D; ++qx)
633 for (
int dz = 0; dz < D1D; ++dz)
635 for (
int dy = 0; dy < D1D; ++dy)
637 QDD[qx][dy][dz] = 0.0;
638 for (
int qy = 0; qy < Q1D; ++qy)
640 const double By = B(qy,dy);
641 const double Gy = G(qy,dy);
642 const double L = i==1 ? Gy : By;
643 const double R = j==1 ? Gy : By;
644 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
650 for (
int dz = 0; dz < D1D; ++dz)
652 for (
int dy = 0; dy < D1D; ++dy)
654 for (
int dx = 0; dx < D1D; ++dx)
656 for (
int qx = 0; qx < Q1D; ++qx)
658 const double Bx = B(qx,dx);
659 const double Gx = G(qx,dx);
660 const double L = i==0 ? Gx : Bx;
661 const double R = j==0 ? Gx : Bx;
662 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
673 template<
int T_D1D = 0,
int T_Q1D = 0>
674 static void SmemPADiffusionDiagonal3D(
const int NE,
675 const bool symmetric,
676 const Array<double> &b_,
677 const Array<double> &g_,
683 constexpr
int DIM = 3;
684 const int D1D = T_D1D ? T_D1D : d1d;
685 const int Q1D = T_Q1D ? T_Q1D : q1d;
686 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
687 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
688 MFEM_VERIFY(D1D <= MD1,
"");
689 MFEM_VERIFY(Q1D <= MQ1,
"");
690 auto b =
Reshape(b_.Read(), Q1D, D1D);
691 auto g =
Reshape(g_.Read(), Q1D, D1D);
692 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
693 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
694 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
696 const int tidz = MFEM_THREAD_ID(z);
697 const int D1D = T_D1D ? T_D1D : d1d;
698 const int Q1D = T_Q1D ? T_Q1D : q1d;
699 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
700 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
701 MFEM_SHARED
double BG[2][MQ1*MD1];
702 double (*B)[MD1] = (double (*)[MD1]) (BG+0);
703 double (*G)[MD1] = (double (*)[MD1]) (BG+1);
704 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
705 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
708 MFEM_FOREACH_THREAD(d,y,D1D)
710 MFEM_FOREACH_THREAD(q,x,Q1D)
718 for (
int i = 0; i <
DIM; ++i)
720 for (
int j = 0; j <
DIM; ++j)
723 MFEM_FOREACH_THREAD(qx,x,Q1D)
725 MFEM_FOREACH_THREAD(qy,y,Q1D)
727 MFEM_FOREACH_THREAD(dz,z,D1D)
729 QQD[qx][qy][dz] = 0.0;
730 for (
int qz = 0; qz < Q1D; ++qz)
732 const int q = qx + (qy + qz * Q1D) * Q1D;
733 const int ksym = j >= i ?
734 3 - (3-i)*(2-i)/2 + j:
735 3 - (3-j)*(2-j)/2 + i;
736 const int k = symmetric ? ksym : (i*
DIM) + j;
737 const double O = D(q,k,e);
738 const double Bz = B[qz][dz];
739 const double Gz = G[qz][dz];
740 const double L = i==2 ? Gz : Bz;
741 const double R = j==2 ? Gz : Bz;
742 QQD[qx][qy][dz] += L * O * R;
749 MFEM_FOREACH_THREAD(qx,x,Q1D)
751 MFEM_FOREACH_THREAD(dz,z,D1D)
753 MFEM_FOREACH_THREAD(dy,y,D1D)
755 QDD[qx][dy][dz] = 0.0;
756 for (
int qy = 0; qy < Q1D; ++qy)
758 const double By = B[qy][dy];
759 const double Gy = G[qy][dy];
760 const double L = i==1 ? Gy : By;
761 const double R = j==1 ? Gy : By;
762 QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R;
769 MFEM_FOREACH_THREAD(dz,z,D1D)
771 MFEM_FOREACH_THREAD(dy,y,D1D)
773 MFEM_FOREACH_THREAD(dx,x,D1D)
775 for (
int qx = 0; qx < Q1D; ++qx)
777 const double Bx = B[qx][dx];
778 const double Gx = G[qx][dx];
779 const double L = i==0 ? Gx : Bx;
780 const double R = j==0 ? Gx : Bx;
781 Y(dx, dy, dz, e) += L * QDD[qx][dy][dz] * R;
791 static void PADiffusionAssembleDiagonal(
const int dim,
796 const Array<double> &B,
797 const Array<double> &G,
803 switch ((D1D << 4 ) | Q1D)
805 case 0x22:
return SmemPADiffusionDiagonal2D<2,2,8>(NE,symm,B,G,D,Y);
806 case 0x33:
return SmemPADiffusionDiagonal2D<3,3,8>(NE,symm,B,G,D,Y);
807 case 0x44:
return SmemPADiffusionDiagonal2D<4,4,4>(NE,symm,B,G,D,Y);
808 case 0x55:
return SmemPADiffusionDiagonal2D<5,5,4>(NE,symm,B,G,D,Y);
809 case 0x66:
return SmemPADiffusionDiagonal2D<6,6,2>(NE,symm,B,G,D,Y);
810 case 0x77:
return SmemPADiffusionDiagonal2D<7,7,2>(NE,symm,B,G,D,Y);
811 case 0x88:
return SmemPADiffusionDiagonal2D<8,8,1>(NE,symm,B,G,D,Y);
812 case 0x99:
return SmemPADiffusionDiagonal2D<9,9,1>(NE,symm,B,G,D,Y);
813 default:
return PADiffusionDiagonal2D(NE,symm,B,G,D,Y,D1D,Q1D);
818 switch ((D1D << 4 ) | Q1D)
820 case 0x22:
return SmemPADiffusionDiagonal3D<2,2>(NE,symm,B,G,D,Y);
821 case 0x23:
return SmemPADiffusionDiagonal3D<2,3>(NE,symm,B,G,D,Y);
822 case 0x34:
return SmemPADiffusionDiagonal3D<3,4>(NE,symm,B,G,D,Y);
823 case 0x45:
return SmemPADiffusionDiagonal3D<4,5>(NE,symm,B,G,D,Y);
824 case 0x46:
return SmemPADiffusionDiagonal3D<4,6>(NE,symm,B,G,D,Y);
825 case 0x56:
return SmemPADiffusionDiagonal3D<5,6>(NE,symm,B,G,D,Y);
826 case 0x67:
return SmemPADiffusionDiagonal3D<6,7>(NE,symm,B,G,D,Y);
827 case 0x78:
return SmemPADiffusionDiagonal3D<7,8>(NE,symm,B,G,D,Y);
828 case 0x89:
return SmemPADiffusionDiagonal3D<8,9>(NE,symm,B,G,D,Y);
829 case 0x9A:
return SmemPADiffusionDiagonal3D<9,10>(NE,symm,B,G,D,Y);
830 default:
return PADiffusionDiagonal3D(NE,symm,B,G,D,Y,D1D,Q1D);
833 MFEM_ABORT(
"Unknown kernel.");
836 void DiffusionIntegrator::AssembleDiagonalPA(
Vector &diag)
840 ceedOp->GetDiagonal(diag);
844 if (pa_data.Size()==0) { AssemblePA(*fespace); }
845 PADiffusionAssembleDiagonal(
dim, dofs1D, quad1D, ne, symmetric,
846 maps->B, maps->G, pa_data, diag);
853 static void OccaPADiffusionApply2D(
const int D1D,
864 occa::properties props;
865 props[
"defines/D1D"] = D1D;
866 props[
"defines/Q1D"] = Q1D;
874 const occa_id_t id = std::make_pair(D1D,Q1D);
875 if (!Device::Allows(Backend::OCCA_CUDA))
878 if (OccaDiffApply2D_cpu.find(
id) == OccaDiffApply2D_cpu.end())
880 const occa::kernel DiffusionApply2D_CPU =
882 "DiffusionApply2D_CPU", props);
883 OccaDiffApply2D_cpu.emplace(
id, DiffusionApply2D_CPU);
885 OccaDiffApply2D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
890 if (OccaDiffApply2D_gpu.find(
id) == OccaDiffApply2D_gpu.end())
892 const occa::kernel DiffusionApply2D_GPU =
894 "DiffusionApply2D_GPU", props);
895 OccaDiffApply2D_gpu.emplace(
id, DiffusionApply2D_GPU);
897 OccaDiffApply2D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
902 static void OccaPADiffusionApply3D(
const int D1D,
905 const Array<double> &B,
906 const Array<double> &G,
907 const Array<double> &Bt,
908 const Array<double> &Gt,
913 occa::properties props;
914 props[
"defines/D1D"] = D1D;
915 props[
"defines/Q1D"] = Q1D;
918 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
919 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
923 const occa_id_t id = std::make_pair(D1D,Q1D);
924 if (!Device::Allows(Backend::OCCA_CUDA))
927 if (OccaDiffApply3D_cpu.find(
id) == OccaDiffApply3D_cpu.end())
929 const occa::kernel DiffusionApply3D_CPU =
931 "DiffusionApply3D_CPU", props);
932 OccaDiffApply3D_cpu.emplace(
id, DiffusionApply3D_CPU);
934 OccaDiffApply3D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
939 if (OccaDiffApply3D_gpu.find(
id) == OccaDiffApply3D_gpu.end())
941 const occa::kernel DiffusionApply3D_GPU =
943 "DiffusionApply3D_GPU", props);
944 OccaDiffApply3D_gpu.emplace(
id, DiffusionApply3D_GPU);
946 OccaDiffApply3D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
949 #endif // MFEM_USE_OCCA 952 template<
int T_D1D = 0,
int T_Q1D = 0>
953 static void PADiffusionApply2D(
const int NE,
954 const bool symmetric,
955 const Array<double> &b_,
956 const Array<double> &g_,
957 const Array<double> &bt_,
958 const Array<double> >_,
965 const int D1D = T_D1D ? T_D1D : d1d;
966 const int Q1D = T_Q1D ? T_Q1D : q1d;
967 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
968 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
969 auto B =
Reshape(b_.Read(), Q1D, D1D);
970 auto G =
Reshape(g_.Read(), Q1D, D1D);
971 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
972 auto Gt =
Reshape(gt_.Read(), D1D, Q1D);
973 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
974 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
975 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
978 const int D1D = T_D1D ? T_D1D : d1d;
979 const int Q1D = T_Q1D ? T_Q1D : q1d;
981 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
982 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
984 double grad[max_Q1D][max_Q1D][2];
985 for (
int qy = 0; qy < Q1D; ++qy)
987 for (
int qx = 0; qx < Q1D; ++qx)
989 grad[qy][qx][0] = 0.0;
990 grad[qy][qx][1] = 0.0;
993 for (
int dy = 0; dy < D1D; ++dy)
995 double gradX[max_Q1D][2];
996 for (
int qx = 0; qx < Q1D; ++qx)
1001 for (
int dx = 0; dx < D1D; ++dx)
1003 const double s = X(dx,dy,e);
1004 for (
int qx = 0; qx < Q1D; ++qx)
1006 gradX[qx][0] +=
s * B(qx,dx);
1007 gradX[qx][1] +=
s * G(qx,dx);
1010 for (
int qy = 0; qy < Q1D; ++qy)
1012 const double wy = B(qy,dy);
1013 const double wDy = G(qy,dy);
1014 for (
int qx = 0; qx < Q1D; ++qx)
1016 grad[qy][qx][0] += gradX[qx][1] * wy;
1017 grad[qy][qx][1] += gradX[qx][0] * wDy;
1022 for (
int qy = 0; qy < Q1D; ++qy)
1024 for (
int qx = 0; qx < Q1D; ++qx)
1026 const int q = qx + qy * Q1D;
1028 const double O11 = D(q,0,e);
1029 const double O21 = D(q,1,e);
1030 const double O12 = symmetric ? O21 : D(q,2,e);
1031 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1033 const double gradX = grad[qy][qx][0];
1034 const double gradY = grad[qy][qx][1];
1036 grad[qy][qx][0] = (O11 * gradX) + (O12 * gradY);
1037 grad[qy][qx][1] = (O21 * gradX) + (O22 * gradY);
1040 for (
int qy = 0; qy < Q1D; ++qy)
1042 double gradX[max_D1D][2];
1043 for (
int dx = 0; dx < D1D; ++dx)
1048 for (
int qx = 0; qx < Q1D; ++qx)
1050 const double gX = grad[qy][qx][0];
1051 const double gY = grad[qy][qx][1];
1052 for (
int dx = 0; dx < D1D; ++dx)
1054 const double wx = Bt(dx,qx);
1055 const double wDx = Gt(dx,qx);
1056 gradX[dx][0] += gX * wDx;
1057 gradX[dx][1] += gY * wx;
1060 for (
int dy = 0; dy < D1D; ++dy)
1062 const double wy = Bt(dy,qy);
1063 const double wDy = Gt(dy,qy);
1064 for (
int dx = 0; dx < D1D; ++dx)
1066 Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
1074 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
1075 static void SmemPADiffusionApply2D(
const int NE,
1076 const bool symmetric,
1077 const Array<double> &b_,
1078 const Array<double> &g_,
1085 const int D1D = T_D1D ? T_D1D : d1d;
1086 const int Q1D = T_Q1D ? T_Q1D : q1d;
1087 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1088 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1089 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1090 MFEM_VERIFY(D1D <= MD1,
"");
1091 MFEM_VERIFY(Q1D <= MQ1,
"");
1092 auto b =
Reshape(b_.Read(), Q1D, D1D);
1093 auto g =
Reshape(g_.Read(), Q1D, D1D);
1094 auto D =
Reshape(d_.Read(), Q1D*Q1D, symmetric ? 3 : 4, NE);
1095 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
1096 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
1097 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
1099 const int tidz = MFEM_THREAD_ID(z);
1100 const int D1D = T_D1D ? T_D1D : d1d;
1101 const int Q1D = T_Q1D ? T_Q1D : q1d;
1102 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
1103 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1104 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1105 MFEM_SHARED
double sBG[2][MQ1*MD1];
1106 double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
1107 double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
1108 double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
1109 double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
1110 MFEM_SHARED
double Xz[NBZ][MD1][MD1];
1111 MFEM_SHARED
double GD[2][NBZ][MD1][MQ1];
1112 MFEM_SHARED
double GQ[2][NBZ][MD1][MQ1];
1113 double (*X)[MD1] = (double (*)[MD1])(Xz + tidz);
1114 double (*DQ0)[MD1] = (double (*)[MD1])(GD[0] + tidz);
1115 double (*DQ1)[MD1] = (double (*)[MD1])(GD[1] + tidz);
1116 double (*QQ0)[MD1] = (double (*)[MD1])(GQ[0] + tidz);
1117 double (*QQ1)[MD1] = (double (*)[MD1])(GQ[1] + tidz);
1118 MFEM_FOREACH_THREAD(dy,y,D1D)
1120 MFEM_FOREACH_THREAD(dx,x,D1D)
1122 X[dy][dx] = x(dx,dy,e);
1127 MFEM_FOREACH_THREAD(dy,y,D1D)
1129 MFEM_FOREACH_THREAD(q,x,Q1D)
1137 MFEM_FOREACH_THREAD(dy,y,D1D)
1139 MFEM_FOREACH_THREAD(qx,x,Q1D)
1143 for (
int dx = 0; dx < D1D; ++dx)
1145 const double coords = X[dy][dx];
1146 u += B[qx][dx] * coords;
1147 v += G[qx][dx] * coords;
1154 MFEM_FOREACH_THREAD(qy,y,Q1D)
1156 MFEM_FOREACH_THREAD(qx,x,Q1D)
1160 for (
int dy = 0; dy < D1D; ++dy)
1162 u += DQ1[dy][qx] * B[qy][dy];
1163 v += DQ0[dy][qx] * G[qy][dy];
1170 MFEM_FOREACH_THREAD(qy,y,Q1D)
1172 MFEM_FOREACH_THREAD(qx,x,Q1D)
1174 const int q = (qx + ((qy) * Q1D));
1175 const double O11 = D(q,0,e);
1176 const double O21 = D(q,1,e);
1177 const double O12 = symmetric ? O21 : D(q,2,e);
1178 const double O22 = symmetric ? D(q,2,e) : D(q,3,e);
1179 const double gX = QQ0[qy][qx];
1180 const double gY = QQ1[qy][qx];
1181 QQ0[qy][qx] = (O11 * gX) + (O12 * gY);
1182 QQ1[qy][qx] = (O21 * gX) + (O22 * gY);
1188 MFEM_FOREACH_THREAD(dy,y,D1D)
1190 MFEM_FOREACH_THREAD(q,x,Q1D)
1192 Bt[dy][q] =
b(q,dy);
1193 Gt[dy][q] = g(q,dy);
1198 MFEM_FOREACH_THREAD(qy,y,Q1D)
1200 MFEM_FOREACH_THREAD(dx,x,D1D)
1204 for (
int qx = 0; qx < Q1D; ++qx)
1206 u += Gt[dx][qx] * QQ0[qy][qx];
1207 v += Bt[dx][qx] * QQ1[qy][qx];
1214 MFEM_FOREACH_THREAD(dy,y,D1D)
1216 MFEM_FOREACH_THREAD(dx,x,D1D)
1220 for (
int qy = 0; qy < Q1D; ++qy)
1222 u += DQ0[qy][dx] * Bt[dy][qy];
1223 v += DQ1[qy][dx] * Gt[dy][qy];
1225 Y(dx,dy,e) += (
u + v);
1232 template<
int T_D1D = 0,
int T_Q1D = 0>
1233 static void PADiffusionApply3D(
const int NE,
1234 const bool symmetric,
1235 const Array<double> &
b,
1236 const Array<double> &g,
1237 const Array<double> &bt,
1238 const Array<double> >,
1242 int d1d = 0,
int q1d = 0)
1244 const int D1D = T_D1D ? T_D1D : d1d;
1245 const int Q1D = T_Q1D ? T_Q1D : q1d;
1246 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
1247 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
1248 auto B =
Reshape(
b.Read(), Q1D, D1D);
1249 auto G =
Reshape(g.Read(), Q1D, D1D);
1250 auto Bt =
Reshape(bt.Read(), D1D, Q1D);
1251 auto Gt =
Reshape(gt.Read(), D1D, Q1D);
1252 auto D =
Reshape(d_.Read(), Q1D*Q1D*Q1D, symmetric ? 6 : 9, NE);
1253 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1254 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1257 const int D1D = T_D1D ? T_D1D : d1d;
1258 const int Q1D = T_Q1D ? T_Q1D : q1d;
1259 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
1260 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
1261 double grad[max_Q1D][max_Q1D][max_Q1D][3];
1262 for (
int qz = 0; qz < Q1D; ++qz)
1264 for (
int qy = 0; qy < Q1D; ++qy)
1266 for (
int qx = 0; qx < Q1D; ++qx)
1268 grad[qz][qy][qx][0] = 0.0;
1269 grad[qz][qy][qx][1] = 0.0;
1270 grad[qz][qy][qx][2] = 0.0;
1274 for (
int dz = 0; dz < D1D; ++dz)
1276 double gradXY[max_Q1D][max_Q1D][3];
1277 for (
int qy = 0; qy < Q1D; ++qy)
1279 for (
int qx = 0; qx < Q1D; ++qx)
1281 gradXY[qy][qx][0] = 0.0;
1282 gradXY[qy][qx][1] = 0.0;
1283 gradXY[qy][qx][2] = 0.0;
1286 for (
int dy = 0; dy < D1D; ++dy)
1288 double gradX[max_Q1D][2];
1289 for (
int qx = 0; qx < Q1D; ++qx)
1294 for (
int dx = 0; dx < D1D; ++dx)
1296 const double s = X(dx,dy,dz,e);
1297 for (
int qx = 0; qx < Q1D; ++qx)
1299 gradX[qx][0] +=
s * B(qx,dx);
1300 gradX[qx][1] +=
s * G(qx,dx);
1303 for (
int qy = 0; qy < Q1D; ++qy)
1305 const double wy = B(qy,dy);
1306 const double wDy = G(qy,dy);
1307 for (
int qx = 0; qx < Q1D; ++qx)
1309 const double wx = gradX[qx][0];
1310 const double wDx = gradX[qx][1];
1311 gradXY[qy][qx][0] += wDx * wy;
1312 gradXY[qy][qx][1] += wx * wDy;
1313 gradXY[qy][qx][2] += wx * wy;
1317 for (
int qz = 0; qz < Q1D; ++qz)
1319 const double wz = B(qz,dz);
1320 const double wDz = G(qz,dz);
1321 for (
int qy = 0; qy < Q1D; ++qy)
1323 for (
int qx = 0; qx < Q1D; ++qx)
1325 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
1326 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
1327 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
1333 for (
int qz = 0; qz < Q1D; ++qz)
1335 for (
int qy = 0; qy < Q1D; ++qy)
1337 for (
int qx = 0; qx < Q1D; ++qx)
1339 const int q = qx + (qy + qz * Q1D) * Q1D;
1340 const double O11 = D(q,0,e);
1341 const double O12 = D(q,1,e);
1342 const double O13 = D(q,2,e);
1343 const double O21 = symmetric ? O12 : D(q,3,e);
1344 const double O22 = symmetric ? D(q,3,e) : D(q,4,e);
1345 const double O23 = symmetric ? D(q,4,e) : D(q,5,e);
1346 const double O31 = symmetric ? O13 : D(q,6,e);
1347 const double O32 = symmetric ? O23 : D(q,7,e);
1348 const double O33 = symmetric ? D(q,5,e) : D(q,8,e);
1349 const double gradX = grad[qz][qy][qx][0];
1350 const double gradY = grad[qz][qy][qx][1];
1351 const double gradZ = grad[qz][qy][qx][2];
1352 grad[qz][qy][qx][0] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
1353 grad[qz][qy][qx][1] = (O21*gradX)+(O22*gradY)+(O23*gradZ);
1354 grad[qz][qy][qx][2] = (O31*gradX)+(O32*gradY)+(O33*gradZ);
1358 for (
int qz = 0; qz < Q1D; ++qz)
1360 double gradXY[max_D1D][max_D1D][3];
1361 for (
int dy = 0; dy < D1D; ++dy)
1363 for (
int dx = 0; dx < D1D; ++dx)
1365 gradXY[dy][dx][0] = 0;
1366 gradXY[dy][dx][1] = 0;
1367 gradXY[dy][dx][2] = 0;
1370 for (
int qy = 0; qy < Q1D; ++qy)
1372 double gradX[max_D1D][3];
1373 for (
int dx = 0; dx < D1D; ++dx)
1379 for (
int qx = 0; qx < Q1D; ++qx)
1381 const double gX = grad[qz][qy][qx][0];
1382 const double gY = grad[qz][qy][qx][1];
1383 const double gZ = grad[qz][qy][qx][2];
1384 for (
int dx = 0; dx < D1D; ++dx)
1386 const double wx = Bt(dx,qx);
1387 const double wDx = Gt(dx,qx);
1388 gradX[dx][0] += gX * wDx;
1389 gradX[dx][1] += gY * wx;
1390 gradX[dx][2] += gZ * wx;
1393 for (
int dy = 0; dy < D1D; ++dy)
1395 const double wy = Bt(dy,qy);
1396 const double wDy = Gt(dy,qy);
1397 for (
int dx = 0; dx < D1D; ++dx)
1399 gradXY[dy][dx][0] += gradX[dx][0] * wy;
1400 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
1401 gradXY[dy][dx][2] += gradX[dx][2] * wy;
1405 for (
int dz = 0; dz < D1D; ++dz)
1407 const double wz = Bt(dz,qz);
1408 const double wDz = Gt(dz,qz);
1409 for (
int dy = 0; dy < D1D; ++dy)
1411 for (
int dx = 0; dx < D1D; ++dx)
1414 ((gradXY[dy][dx][0] * wz) +
1415 (gradXY[dy][dx][1] * wz) +
1416 (gradXY[dy][dx][2] * wDz));
1424 template<
int T_D1D = 0,
int T_Q1D = 0>
1425 static void SmemPADiffusionApply3D(
const int NE,
1426 const bool symmetric,
1427 const Array<double> &b_,
1428 const Array<double> &g_,
1435 const int D1D = T_D1D ? T_D1D : d1d;
1436 const int Q1D = T_Q1D ? T_Q1D : q1d;
1437 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
1438 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
1439 MFEM_VERIFY(D1D <= M1D,
"");
1440 MFEM_VERIFY(Q1D <= M1Q,
"");
1441 auto b =
Reshape(b_.Read(), Q1D, D1D);
1442 auto g =
Reshape(g_.Read(), Q1D, D1D);
1443 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1444 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
1445 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
1446 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1448 const int D1D = T_D1D ? T_D1D : d1d;
1449 const int Q1D = T_Q1D ? T_Q1D : q1d;
1450 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
1451 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
1452 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
1453 MFEM_SHARED
double sBG[2][MQ1*MD1];
1454 double (*B)[MD1] = (double (*)[MD1]) (sBG+0);
1455 double (*G)[MD1] = (double (*)[MD1]) (sBG+1);
1456 double (*Bt)[MQ1] = (double (*)[MQ1]) (sBG+0);
1457 double (*Gt)[MQ1] = (double (*)[MQ1]) (sBG+1);
1458 MFEM_SHARED
double sm0[3][MDQ*MDQ*MDQ];
1459 MFEM_SHARED
double sm1[3][MDQ*MDQ*MDQ];
1460 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1461 double (*DDQ0)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+0);
1462 double (*DDQ1)[MD1][MQ1] = (double (*)[MD1][MQ1]) (sm0+1);
1463 double (*DQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+0);
1464 double (*DQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+1);
1465 double (*DQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm1+2);
1466 double (*QQQ0)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+0);
1467 double (*QQQ1)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+1);
1468 double (*QQQ2)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) (sm0+2);
1469 double (*QQD0)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+0);
1470 double (*QQD1)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+1);
1471 double (*QQD2)[MQ1][MD1] = (double (*)[MQ1][MD1]) (sm1+2);
1472 double (*QDD0)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+0);
1473 double (*QDD1)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+1);
1474 double (*QDD2)[MD1][MD1] = (double (*)[MD1][MD1]) (sm0+2);
1475 MFEM_FOREACH_THREAD(dz,z,D1D)
1477 MFEM_FOREACH_THREAD(dy,y,D1D)
1479 MFEM_FOREACH_THREAD(dx,x,D1D)
1481 X[dz][dy][dx] = x(dx,dy,dz,e);
1485 if (MFEM_THREAD_ID(z) == 0)
1487 MFEM_FOREACH_THREAD(dy,y,D1D)
1489 MFEM_FOREACH_THREAD(qx,x,Q1D)
1491 B[qx][dy] =
b(qx,dy);
1492 G[qx][dy] = g(qx,dy);
1497 MFEM_FOREACH_THREAD(dz,z,D1D)
1499 MFEM_FOREACH_THREAD(dy,y,D1D)
1501 MFEM_FOREACH_THREAD(qx,x,Q1D)
1503 double u = 0.0, v = 0.0;
1505 for (
int dx = 0; dx < D1D; ++dx)
1507 const double coords = X[dz][dy][dx];
1508 u += coords * B[qx][dx];
1509 v += coords * G[qx][dx];
1511 DDQ0[dz][dy][qx] =
u;
1512 DDQ1[dz][dy][qx] = v;
1517 MFEM_FOREACH_THREAD(dz,z,D1D)
1519 MFEM_FOREACH_THREAD(qy,y,Q1D)
1521 MFEM_FOREACH_THREAD(qx,x,Q1D)
1523 double u = 0.0, v = 0.0, w = 0.0;
1525 for (
int dy = 0; dy < D1D; ++dy)
1527 u += DDQ1[dz][dy][qx] * B[qy][dy];
1528 v += DDQ0[dz][dy][qx] * G[qy][dy];
1529 w += DDQ0[dz][dy][qx] * B[qy][dy];
1531 DQQ0[dz][qy][qx] =
u;
1532 DQQ1[dz][qy][qx] = v;
1533 DQQ2[dz][qy][qx] = w;
1538 MFEM_FOREACH_THREAD(qz,z,Q1D)
1540 MFEM_FOREACH_THREAD(qy,y,Q1D)
1542 MFEM_FOREACH_THREAD(qx,x,Q1D)
1544 double u = 0.0, v = 0.0, w = 0.0;
1546 for (
int dz = 0; dz < D1D; ++dz)
1548 u += DQQ0[dz][qy][qx] * B[qz][dz];
1549 v += DQQ1[dz][qy][qx] * B[qz][dz];
1550 w += DQQ2[dz][qy][qx] * G[qz][dz];
1552 const double O11 = d(qx,qy,qz,0,e);
1553 const double O12 = d(qx,qy,qz,1,e);
1554 const double O13 = d(qx,qy,qz,2,e);
1555 const double O21 = symmetric ? O12 : d(qx,qy,qz,3,e);
1556 const double O22 = symmetric ? d(qx,qy,qz,3,e) : d(qx,qy,qz,4,e);
1557 const double O23 = symmetric ? d(qx,qy,qz,4,e) : d(qx,qy,qz,5,e);
1558 const double O31 = symmetric ? O13 : d(qx,qy,qz,6,e);
1559 const double O32 = symmetric ? O23 : d(qx,qy,qz,7,e);
1560 const double O33 = symmetric ? d(qx,qy,qz,5,e) : d(qx,qy,qz,8,e);
1561 const double gX =
u;
1562 const double gY = v;
1563 const double gZ = w;
1564 QQQ0[qz][qy][qx] = (O11*gX) + (O12*gY) + (O13*gZ);
1565 QQQ1[qz][qy][qx] = (O21*gX) + (O22*gY) + (O23*gZ);
1566 QQQ2[qz][qy][qx] = (O31*gX) + (O32*gY) + (O33*gZ);
1571 if (MFEM_THREAD_ID(z) == 0)
1573 MFEM_FOREACH_THREAD(dy,y,D1D)
1575 MFEM_FOREACH_THREAD(qx,x,Q1D)
1577 Bt[dy][qx] =
b(qx,dy);
1578 Gt[dy][qx] = g(qx,dy);
1583 MFEM_FOREACH_THREAD(qz,z,Q1D)
1585 MFEM_FOREACH_THREAD(qy,y,Q1D)
1587 MFEM_FOREACH_THREAD(dx,x,D1D)
1589 double u = 0.0, v = 0.0, w = 0.0;
1591 for (
int qx = 0; qx < Q1D; ++qx)
1593 u += QQQ0[qz][qy][qx] * Gt[dx][qx];
1594 v += QQQ1[qz][qy][qx] * Bt[dx][qx];
1595 w += QQQ2[qz][qy][qx] * Bt[dx][qx];
1597 QQD0[qz][qy][dx] =
u;
1598 QQD1[qz][qy][dx] = v;
1599 QQD2[qz][qy][dx] = w;
1604 MFEM_FOREACH_THREAD(qz,z,Q1D)
1606 MFEM_FOREACH_THREAD(dy,y,D1D)
1608 MFEM_FOREACH_THREAD(dx,x,D1D)
1610 double u = 0.0, v = 0.0, w = 0.0;
1612 for (
int qy = 0; qy < Q1D; ++qy)
1614 u += QQD0[qz][qy][dx] * Bt[dy][qy];
1615 v += QQD1[qz][qy][dx] * Gt[dy][qy];
1616 w += QQD2[qz][qy][dx] * Bt[dy][qy];
1618 QDD0[qz][dy][dx] =
u;
1619 QDD1[qz][dy][dx] = v;
1620 QDD2[qz][dy][dx] = w;
1625 MFEM_FOREACH_THREAD(dz,z,D1D)
1627 MFEM_FOREACH_THREAD(dy,y,D1D)
1629 MFEM_FOREACH_THREAD(dx,x,D1D)
1631 double u = 0.0, v = 0.0, w = 0.0;
1633 for (
int qz = 0; qz < Q1D; ++qz)
1635 u += QDD0[qz][dy][dx] * Bt[dz][qz];
1636 v += QDD1[qz][dy][dx] * Bt[dz][qz];
1637 w += QDD2[qz][dy][dx] * Gt[dz][qz];
1639 y(dx,dy,dz,e) += (
u + v + w);
1646 static void PADiffusionApply(
const int dim,
1651 const Array<double> &B,
1652 const Array<double> &G,
1653 const Array<double> &Bt,
1654 const Array<double> &Gt,
1659 #ifdef MFEM_USE_OCCA 1664 OccaPADiffusionApply2D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1669 OccaPADiffusionApply3D(D1D,Q1D,NE,B,G,Bt,Gt,D,X,Y);
1672 MFEM_ABORT(
"OCCA PADiffusionApply unknown kernel!");
1674 #endif // MFEM_USE_OCCA 1675 const int id = (D1D << 4) | Q1D;
1681 case 0x22:
return SmemPADiffusionApply2D<2,2,16>(NE,symm,B,G,D,X,Y);
1682 case 0x33:
return SmemPADiffusionApply2D<3,3,16>(NE,symm,B,G,D,X,Y);
1683 case 0x44:
return SmemPADiffusionApply2D<4,4,8>(NE,symm,B,G,D,X,Y);
1684 case 0x55:
return SmemPADiffusionApply2D<5,5,8>(NE,symm,B,G,D,X,Y);
1685 case 0x66:
return SmemPADiffusionApply2D<6,6,4>(NE,symm,B,G,D,X,Y);
1686 case 0x77:
return SmemPADiffusionApply2D<7,7,4>(NE,symm,B,G,D,X,Y);
1687 case 0x88:
return SmemPADiffusionApply2D<8,8,2>(NE,symm,B,G,D,X,Y);
1688 case 0x99:
return SmemPADiffusionApply2D<9,9,2>(NE,symm,B,G,D,X,Y);
1689 default:
return PADiffusionApply2D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1697 case 0x22:
return SmemPADiffusionApply3D<2,2>(NE,symm,B,G,D,X,Y);
1698 case 0x23:
return SmemPADiffusionApply3D<2,3>(NE,symm,B,G,D,X,Y);
1699 case 0x34:
return SmemPADiffusionApply3D<3,4>(NE,symm,B,G,D,X,Y);
1700 case 0x45:
return SmemPADiffusionApply3D<4,5>(NE,symm,B,G,D,X,Y);
1701 case 0x46:
return SmemPADiffusionApply3D<4,6>(NE,symm,B,G,D,X,Y);
1702 case 0x56:
return SmemPADiffusionApply3D<5,6>(NE,symm,B,G,D,X,Y);
1703 case 0x58:
return SmemPADiffusionApply3D<5,8>(NE,symm,B,G,D,X,Y);
1704 case 0x67:
return SmemPADiffusionApply3D<6,7>(NE,symm,B,G,D,X,Y);
1705 case 0x78:
return SmemPADiffusionApply3D<7,8>(NE,symm,B,G,D,X,Y);
1706 case 0x89:
return SmemPADiffusionApply3D<8,9>(NE,symm,B,G,D,X,Y);
1707 default:
return PADiffusionApply3D(NE,symm,B,G,Bt,Gt,D,X,Y,D1D,Q1D);
1710 MFEM_ABORT(
"Unknown kernel: 0x"<<std::hex <<
id << std::dec);
1718 ceedOp->AddMult(x, y);
1722 PADiffusionApply(
dim, dofs1D, quad1D, ne, symmetric,
1723 maps->B, maps->G, maps->Bt, maps->Gt,
1728 void DiffusionIntegrator::AddMultTransposePA(
const Vector &x,
Vector &y)
const 1736 MFEM_ABORT(
"DiffusionIntegrator::AddMultTransposePA only implemented in " 1737 "the symmetric case.")
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Abstract class for all finite elements.
int GetNPoints() const
Returns the number of the points in the integration rule.
Class for an integration rule - an Array of IntegrationPoint.
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void SetConstant(double constant)
Set this vector to the given constant.
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
Class to represent a coefficient evaluated at quadrature points.
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
void Project(Coefficient &coeff)
Evaluate the given Coefficient at the quadrature points defined by qs.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
int GetNE() const
Returns number of elements in the mesh.
void PADiffusionSetup2D< 3 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
void PADiffusionSetup2D< 2 >(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
int GetVDim() const
Return the number of values per quadrature point.
Mesh * GetMesh() const
Returns the mesh.
Represent a DiffusionIntegrator with AssemblyLevel::Partial using libCEED.
int GetDim() const
Returns the reference space dimension for the finite element.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
MemoryType
Memory types supported by MFEM.
bool DeviceCanUseCeed()
Function that determines if a CEED kernel should be used, based on the current mfem::Device configura...
int SpaceDimension() const
int GetNE() const
Returns number of elements.
const IntegrationRule & GetRule(const Integrator &integ, const FiniteElement &trial_fe, const FiniteElement &test_fe, ElementTransformation &Trans)
void PADiffusionSetup3D(const int Q1D, const int coeffDim, const int NE, const Array< double > &w, const Vector &j, const Vector &c, Vector &d)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void ProjectTranspose(MatrixCoefficient &coeff)
Project the transpose of coeff.
int Size() const
Return the logical size of the array.
std::pair< int, int > occa_id_t
Class representing the storage layout of a QuadratureFunction.
double u(const Vector &xvec)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.