12 #include "../general/forall.hpp"
15 #include "ceed/mass.hpp"
28 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
29 Device::GetDeviceMemoryType() : pa_mt;
34 if (mesh->
GetNE() == 0) {
return; }
38 if (DeviceCanUseCeed())
41 ceedOp =
new ceed::PAMassIntegrator(fes, *ir, Q);
48 GeometricFactors::JACOBIANS, mt);
52 pa_data.SetSize(ne*nq, mt);
62 coeff(0) = cQ->constant;
65 dynamic_cast<QuadratureFunctionCoefficient*>(Q))
68 MFEM_VERIFY(qFun.
Size() == nq * ne,
69 "Incompatible QuadratureFunction dimension \n");
72 "IntegrationRule used within integrator and in"
73 " QuadratureFunction appear to be different");
75 coeff.
MakeRef(const_cast<QuadratureFunction &>(qFun),0);
81 for (
int e = 0; e < ne; ++e)
84 for (
int q = 0; q < nq; ++q)
86 C(q,e) = Q->Eval(T, ir->
IntPoint(q));
90 if (
dim==1) { MFEM_ABORT(
"Not supported yet... stay tuned!"); }
94 const int Q1D = quad1D;
95 const bool const_c = coeff.
Size() == 1;
97 const auto J =
Reshape(
geom->J.Read(), Q1D,Q1D,2,2,NE);
98 const auto C = const_c ?
Reshape(coeff.
Read(), 1,1,1) :
100 auto v =
Reshape(pa_data.Write(), Q1D,Q1D, NE);
101 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
103 MFEM_FOREACH_THREAD(qx,x,Q1D)
105 MFEM_FOREACH_THREAD(qy,y,Q1D)
107 const double J11 = J(qx,qy,0,0,e);
108 const double J12 = J(qx,qy,1,0,e);
109 const double J21 = J(qx,qy,0,1,e);
110 const double J22 = J(qx,qy,1,1,e);
111 const double detJ = (J11*J22)-(J21*J12);
112 const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
113 v(qx,qy,e) = W(qx,qy) * coeff * detJ;
121 const int Q1D = quad1D;
122 const bool const_c = coeff.
Size() == 1;
124 const auto J =
Reshape(
geom->J.Read(), Q1D,Q1D,Q1D,3,3,NE);
125 const auto C = const_c ?
Reshape(coeff.
Read(), 1,1,1,1) :
127 auto v =
Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
128 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
130 MFEM_FOREACH_THREAD(qx,x,Q1D)
132 MFEM_FOREACH_THREAD(qy,y,Q1D)
134 MFEM_FOREACH_THREAD(qz,z,Q1D)
136 const double J11 = J(qx,qy,qz,0,0,e);
137 const double J21 = J(qx,qy,qz,1,0,e);
138 const double J31 = J(qx,qy,qz,2,0,e);
139 const double J12 = J(qx,qy,qz,0,1,e);
140 const double J22 = J(qx,qy,qz,1,1,e);
141 const double J32 = J(qx,qy,qz,2,1,e);
142 const double J13 = J(qx,qy,qz,0,2,e);
143 const double J23 = J(qx,qy,qz,1,2,e);
144 const double J33 = J(qx,qy,qz,2,2,e);
145 const double detJ = J11 * (J22 * J33 - J32 * J23) -
146 J21 * (J12 * J33 - J32 * J13) +
147 J31 * (J12 * J23 - J22 * J13);
148 const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
149 v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * detJ;
157 template<
int T_D1D = 0,
int T_Q1D = 0>
158 static void PAMassAssembleDiagonal2D(
const int NE,
165 const int D1D = T_D1D ? T_D1D : d1d;
166 const int Q1D = T_Q1D ? T_Q1D : q1d;
167 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
168 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
174 const int D1D = T_D1D ? T_D1D : d1d;
175 const int Q1D = T_Q1D ? T_Q1D : q1d;
176 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
177 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
179 for (
int qx = 0; qx < Q1D; ++qx)
181 for (
int dy = 0; dy < D1D; ++dy)
184 for (
int qy = 0; qy < Q1D; ++qy)
186 QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
190 for (
int dy = 0; dy < D1D; ++dy)
192 for (
int dx = 0; dx < D1D; ++dx)
194 for (
int qx = 0; qx < Q1D; ++qx)
196 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
203 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
204 static void SmemPAMassAssembleDiagonal2D(
const int NE,
205 const Array<double> &b_,
211 const int D1D = T_D1D ? T_D1D : d1d;
212 const int Q1D = T_Q1D ? T_Q1D : q1d;
213 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
214 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
215 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
216 MFEM_VERIFY(D1D <= MD1,
"");
217 MFEM_VERIFY(Q1D <= MQ1,
"");
218 auto b =
Reshape(b_.Read(), Q1D, D1D);
219 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
220 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
221 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
223 const int tidz = MFEM_THREAD_ID(z);
224 const int D1D = T_D1D ? T_D1D : d1d;
225 const int Q1D = T_Q1D ? T_Q1D : q1d;
226 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
227 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
228 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
229 MFEM_SHARED
double B[MQ1][MD1];
230 MFEM_SHARED
double QDZ[NBZ][MQ1][MD1];
231 double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
234 MFEM_FOREACH_THREAD(d,y,D1D)
236 MFEM_FOREACH_THREAD(q,x,Q1D)
243 MFEM_FOREACH_THREAD(qx,x,Q1D)
245 MFEM_FOREACH_THREAD(dy,y,D1D)
248 for (
int qy = 0; qy < Q1D; ++qy)
250 QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
255 MFEM_FOREACH_THREAD(dy,y,D1D)
257 MFEM_FOREACH_THREAD(dx,x,D1D)
259 for (
int qx = 0; qx < Q1D; ++qx)
262 Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
269 template<
int T_D1D = 0,
int T_Q1D = 0>
270 static void PAMassAssembleDiagonal3D(
const int NE,
271 const Array<double> &b,
277 const int D1D = T_D1D ? T_D1D : d1d;
278 const int Q1D = T_Q1D ? T_Q1D : q1d;
279 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
280 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
281 auto B =
Reshape(b.Read(), Q1D, D1D);
282 auto D =
Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
283 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
286 const int D1D = T_D1D ? T_D1D : d1d;
287 const int Q1D = T_Q1D ? T_Q1D : q1d;
288 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
289 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
290 double QQD[MQ1][MQ1][MD1];
291 double QDD[MQ1][MD1][MD1];
292 for (
int qx = 0; qx < Q1D; ++qx)
294 for (
int qy = 0; qy < Q1D; ++qy)
296 for (
int dz = 0; dz < D1D; ++dz)
298 QQD[qx][qy][dz] = 0.0;
299 for (
int qz = 0; qz < Q1D; ++qz)
301 QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
306 for (
int qx = 0; qx < Q1D; ++qx)
308 for (
int dz = 0; dz < D1D; ++dz)
310 for (
int dy = 0; dy < D1D; ++dy)
312 QDD[qx][dy][dz] = 0.0;
313 for (
int qy = 0; qy < Q1D; ++qy)
315 QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
320 for (
int dz = 0; dz < D1D; ++dz)
322 for (
int dy = 0; dy < D1D; ++dy)
324 for (
int dx = 0; dx < D1D; ++dx)
327 for (
int qx = 0; qx < Q1D; ++qx)
329 t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
331 Y(dx, dy, dz, e) +=
t;
338 template<
int T_D1D = 0,
int T_Q1D = 0>
339 static void SmemPAMassAssembleDiagonal3D(
const int NE,
340 const Array<double> &b_,
346 const int D1D = T_D1D ? T_D1D : d1d;
347 const int Q1D = T_Q1D ? T_Q1D : q1d;
348 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
349 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
350 MFEM_VERIFY(D1D <= MD1,
"");
351 MFEM_VERIFY(Q1D <= MQ1,
"");
352 auto b =
Reshape(b_.Read(), Q1D, D1D);
353 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
354 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
355 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
357 const int tidz = MFEM_THREAD_ID(z);
358 const int D1D = T_D1D ? T_D1D : d1d;
359 const int Q1D = T_Q1D ? T_Q1D : q1d;
360 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
361 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
362 MFEM_SHARED
double B[MQ1][MD1];
363 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
364 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
367 MFEM_FOREACH_THREAD(d,y,D1D)
369 MFEM_FOREACH_THREAD(q,x,Q1D)
376 MFEM_FOREACH_THREAD(qx,x,Q1D)
378 MFEM_FOREACH_THREAD(qy,y,Q1D)
380 MFEM_FOREACH_THREAD(dz,z,D1D)
382 QQD[qx][qy][dz] = 0.0;
383 for (
int qz = 0; qz < Q1D; ++qz)
385 QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
391 MFEM_FOREACH_THREAD(qx,x,Q1D)
393 MFEM_FOREACH_THREAD(dz,z,D1D)
395 MFEM_FOREACH_THREAD(dy,y,D1D)
397 QDD[qx][dy][dz] = 0.0;
398 for (
int qy = 0; qy < Q1D; ++qy)
400 QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
406 MFEM_FOREACH_THREAD(dz,z,D1D)
408 MFEM_FOREACH_THREAD(dy,y,D1D)
410 MFEM_FOREACH_THREAD(dx,x,D1D)
413 for (
int qx = 0; qx < Q1D; ++qx)
415 t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
417 Y(dx, dy, dz, e) +=
t;
424 static void PAMassAssembleDiagonal(
const int dim,
const int D1D,
425 const int Q1D,
const int NE,
426 const Array<double> &B,
432 switch ((D1D << 4 ) | Q1D)
434 case 0x22:
return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
435 case 0x33:
return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
436 case 0x44:
return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
437 case 0x55:
return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
438 case 0x66:
return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
439 case 0x77:
return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
440 case 0x88:
return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
441 case 0x99:
return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
442 default:
return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
447 switch ((D1D << 4 ) | Q1D)
449 case 0x23:
return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
450 case 0x24:
return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y);
451 case 0x26:
return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y);
452 case 0x34:
return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
453 case 0x35:
return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y);
454 case 0x45:
return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
455 case 0x48:
return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y);
456 case 0x56:
return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
457 case 0x67:
return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
458 case 0x78:
return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
459 case 0x89:
return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
460 default:
return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
463 MFEM_ABORT(
"Unknown kernel.");
466 void MassIntegrator::AssembleDiagonalPA(
Vector &diag)
468 if (DeviceCanUseCeed())
470 ceedOp->GetDiagonal(diag);
474 PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
481 static void OccaPAMassApply2D(
const int D1D,
490 occa::properties props;
491 props[
"defines/D1D"] = D1D;
492 props[
"defines/Q1D"] = Q1D;
498 const occa_id_t id = std::make_pair(D1D,Q1D);
499 if (!Device::Allows(Backend::OCCA_CUDA))
502 if (OccaMassApply2D_cpu.find(
id) == OccaMassApply2D_cpu.end())
504 const occa::kernel MassApply2D_CPU =
506 "MassApply2D_CPU", props);
507 OccaMassApply2D_cpu.emplace(
id, MassApply2D_CPU);
509 OccaMassApply2D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
514 if (OccaMassApply2D_gpu.find(
id) == OccaMassApply2D_gpu.end())
516 const occa::kernel MassApply2D_GPU =
518 "MassApply2D_GPU", props);
519 OccaMassApply2D_gpu.emplace(
id, MassApply2D_GPU);
521 OccaMassApply2D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
526 static void OccaPAMassApply3D(
const int D1D,
529 const Array<double> &B,
530 const Array<double> &Bt,
535 occa::properties props;
536 props[
"defines/D1D"] = D1D;
537 props[
"defines/Q1D"] = Q1D;
539 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
543 const occa_id_t id = std::make_pair(D1D,Q1D);
544 if (!Device::Allows(Backend::OCCA_CUDA))
547 if (OccaMassApply3D_cpu.find(
id) == OccaMassApply3D_cpu.end())
549 const occa::kernel MassApply3D_CPU =
551 "MassApply3D_CPU", props);
552 OccaMassApply3D_cpu.emplace(
id, MassApply3D_CPU);
554 OccaMassApply3D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
559 if (OccaMassApply3D_gpu.find(
id) == OccaMassApply3D_gpu.end())
561 const occa::kernel MassApply3D_GPU =
563 "MassApply3D_GPU", props);
564 OccaMassApply3D_gpu.emplace(
id, MassApply3D_GPU);
566 OccaMassApply3D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
569 #endif // MFEM_USE_OCCA
571 template<
int T_D1D = 0,
int T_Q1D = 0>
572 static void PAMassApply2D(
const int NE,
573 const Array<double> &b_,
574 const Array<double> &bt_,
581 const int D1D = T_D1D ? T_D1D : d1d;
582 const int Q1D = T_Q1D ? T_Q1D : q1d;
583 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
584 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
585 auto B =
Reshape(b_.Read(), Q1D, D1D);
586 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
587 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
588 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
589 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
592 const int D1D = T_D1D ? T_D1D : d1d;
593 const int Q1D = T_Q1D ? T_Q1D : q1d;
595 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
596 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
597 double sol_xy[max_Q1D][max_Q1D];
598 for (
int qy = 0; qy < Q1D; ++qy)
600 for (
int qx = 0; qx < Q1D; ++qx)
602 sol_xy[qy][qx] = 0.0;
605 for (
int dy = 0; dy < D1D; ++dy)
607 double sol_x[max_Q1D];
608 for (
int qy = 0; qy < Q1D; ++qy)
612 for (
int dx = 0; dx < D1D; ++dx)
614 const double s = X(dx,dy,e);
615 for (
int qx = 0; qx < Q1D; ++qx)
617 sol_x[qx] += B(qx,dx)*
s;
620 for (
int qy = 0; qy < Q1D; ++qy)
622 const double d2q = B(qy,dy);
623 for (
int qx = 0; qx < Q1D; ++qx)
625 sol_xy[qy][qx] += d2q * sol_x[qx];
629 for (
int qy = 0; qy < Q1D; ++qy)
631 for (
int qx = 0; qx < Q1D; ++qx)
633 sol_xy[qy][qx] *= D(qx,qy,e);
636 for (
int qy = 0; qy < Q1D; ++qy)
638 double sol_x[max_D1D];
639 for (
int dx = 0; dx < D1D; ++dx)
643 for (
int qx = 0; qx < Q1D; ++qx)
645 const double s = sol_xy[qy][qx];
646 for (
int dx = 0; dx < D1D; ++dx)
648 sol_x[dx] += Bt(dx,qx) *
s;
651 for (
int dy = 0; dy < D1D; ++dy)
653 const double q2d = Bt(dy,qy);
654 for (
int dx = 0; dx < D1D; ++dx)
656 Y(dx,dy,e) += q2d * sol_x[dx];
663 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
664 static void SmemPAMassApply2D(
const int NE,
665 const Array<double> &b_,
666 const Array<double> &bt_,
673 MFEM_CONTRACT_VAR(bt_);
674 const int D1D = T_D1D ? T_D1D : d1d;
675 const int Q1D = T_Q1D ? T_Q1D : q1d;
676 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
677 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
678 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
679 MFEM_VERIFY(D1D <= MD1,
"");
680 MFEM_VERIFY(Q1D <= MQ1,
"");
681 auto b =
Reshape(b_.Read(), Q1D, D1D);
682 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
683 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
684 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
685 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
687 const int tidz = MFEM_THREAD_ID(z);
688 const int D1D = T_D1D ? T_D1D : d1d;
689 const int Q1D = T_Q1D ? T_Q1D : q1d;
690 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
691 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
692 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
693 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
694 MFEM_SHARED
double BBt[MQ1*MD1];
695 double (*B)[MD1] = (double (*)[MD1]) BBt;
696 double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
697 MFEM_SHARED
double sm0[NBZ][MDQ*MDQ];
698 MFEM_SHARED
double sm1[NBZ][MDQ*MDQ];
699 double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
700 double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
701 double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
702 double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
703 MFEM_FOREACH_THREAD(dy,y,D1D)
705 MFEM_FOREACH_THREAD(dx,x,D1D)
707 X[dy][dx] = x(dx,dy,e);
712 MFEM_FOREACH_THREAD(dy,y,D1D)
714 MFEM_FOREACH_THREAD(q,x,Q1D)
721 MFEM_FOREACH_THREAD(dy,y,D1D)
723 MFEM_FOREACH_THREAD(qx,x,Q1D)
726 for (
int dx = 0; dx < D1D; ++dx)
728 dq += X[dy][dx] * B[qx][dx];
734 MFEM_FOREACH_THREAD(qy,y,Q1D)
736 MFEM_FOREACH_THREAD(qx,x,Q1D)
739 for (
int dy = 0; dy < D1D; ++dy)
741 qq += DQ[dy][qx] * B[qy][dy];
743 QQ[qy][qx] = qq * D(qx, qy, e);
749 MFEM_FOREACH_THREAD(dy,y,D1D)
751 MFEM_FOREACH_THREAD(q,x,Q1D)
758 MFEM_FOREACH_THREAD(qy,y,Q1D)
760 MFEM_FOREACH_THREAD(dx,x,D1D)
763 for (
int qx = 0; qx < Q1D; ++qx)
765 dq += QQ[qy][qx] * Bt[dx][qx];
771 MFEM_FOREACH_THREAD(dy,y,D1D)
773 MFEM_FOREACH_THREAD(dx,x,D1D)
776 for (
int qy = 0; qy < Q1D; ++qy)
778 dd += (QD[qy][dx] * Bt[dy][qy]);
786 template<
int T_D1D = 0,
int T_Q1D = 0>
787 static void PAMassApply3D(
const int NE,
788 const Array<double> &b_,
789 const Array<double> &bt_,
796 const int D1D = T_D1D ? T_D1D : d1d;
797 const int Q1D = T_Q1D ? T_Q1D : q1d;
798 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
799 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
800 auto B =
Reshape(b_.Read(), Q1D, D1D);
801 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
802 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
803 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
804 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
807 const int D1D = T_D1D ? T_D1D : d1d;
808 const int Q1D = T_Q1D ? T_Q1D : q1d;
809 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
810 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
811 double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
812 for (
int qz = 0; qz < Q1D; ++qz)
814 for (
int qy = 0; qy < Q1D; ++qy)
816 for (
int qx = 0; qx < Q1D; ++qx)
818 sol_xyz[qz][qy][qx] = 0.0;
822 for (
int dz = 0; dz < D1D; ++dz)
824 double sol_xy[max_Q1D][max_Q1D];
825 for (
int qy = 0; qy < Q1D; ++qy)
827 for (
int qx = 0; qx < Q1D; ++qx)
829 sol_xy[qy][qx] = 0.0;
832 for (
int dy = 0; dy < D1D; ++dy)
834 double sol_x[max_Q1D];
835 for (
int qx = 0; qx < Q1D; ++qx)
839 for (
int dx = 0; dx < D1D; ++dx)
841 const double s = X(dx,dy,dz,e);
842 for (
int qx = 0; qx < Q1D; ++qx)
844 sol_x[qx] += B(qx,dx) *
s;
847 for (
int qy = 0; qy < Q1D; ++qy)
849 const double wy = B(qy,dy);
850 for (
int qx = 0; qx < Q1D; ++qx)
852 sol_xy[qy][qx] += wy * sol_x[qx];
856 for (
int qz = 0; qz < Q1D; ++qz)
858 const double wz = B(qz,dz);
859 for (
int qy = 0; qy < Q1D; ++qy)
861 for (
int qx = 0; qx < Q1D; ++qx)
863 sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
868 for (
int qz = 0; qz < Q1D; ++qz)
870 for (
int qy = 0; qy < Q1D; ++qy)
872 for (
int qx = 0; qx < Q1D; ++qx)
874 sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
878 for (
int qz = 0; qz < Q1D; ++qz)
880 double sol_xy[max_D1D][max_D1D];
881 for (
int dy = 0; dy < D1D; ++dy)
883 for (
int dx = 0; dx < D1D; ++dx)
888 for (
int qy = 0; qy < Q1D; ++qy)
890 double sol_x[max_D1D];
891 for (
int dx = 0; dx < D1D; ++dx)
895 for (
int qx = 0; qx < Q1D; ++qx)
897 const double s = sol_xyz[qz][qy][qx];
898 for (
int dx = 0; dx < D1D; ++dx)
900 sol_x[dx] += Bt(dx,qx) *
s;
903 for (
int dy = 0; dy < D1D; ++dy)
905 const double wy = Bt(dy,qy);
906 for (
int dx = 0; dx < D1D; ++dx)
908 sol_xy[dy][dx] += wy * sol_x[dx];
912 for (
int dz = 0; dz < D1D; ++dz)
914 const double wz = Bt(dz,qz);
915 for (
int dy = 0; dy < D1D; ++dy)
917 for (
int dx = 0; dx < D1D; ++dx)
919 Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
927 template<
int T_D1D = 0,
int T_Q1D = 0>
928 static void SmemPAMassApply3D(
const int NE,
929 const Array<double> &b_,
930 const Array<double> &bt_,
937 MFEM_CONTRACT_VAR(bt_);
938 const int D1D = T_D1D ? T_D1D : d1d;
939 const int Q1D = T_Q1D ? T_Q1D : q1d;
940 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
941 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
942 MFEM_VERIFY(D1D <= M1D,
"");
943 MFEM_VERIFY(Q1D <= M1Q,
"");
944 auto b =
Reshape(b_.Read(), Q1D, D1D);
945 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
946 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
947 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
948 MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
950 const int D1D = T_D1D ? T_D1D : d1d;
951 const int Q1D = T_Q1D ? T_Q1D : q1d;
952 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
953 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
954 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
955 MFEM_SHARED
double sDQ[MQ1*MD1];
956 double (*B)[MD1] = (double (*)[MD1]) sDQ;
957 double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
958 MFEM_SHARED
double sm0[MDQ*MDQ*MDQ];
959 MFEM_SHARED
double sm1[MDQ*MDQ*MDQ];
960 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
961 double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
962 double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
963 double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
964 double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
965 double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
966 MFEM_FOREACH_THREAD(dy,y,D1D)
968 MFEM_FOREACH_THREAD(dx,x,D1D)
971 for (
int dz = 0; dz < D1D; ++dz)
973 X[dz][dy][dx] = x(dx,dy,dz,e);
976 MFEM_FOREACH_THREAD(dx,x,Q1D)
978 B[dx][dy] =
b(dx,dy);
982 MFEM_FOREACH_THREAD(dy,y,D1D)
984 MFEM_FOREACH_THREAD(qx,x,Q1D)
988 for (
int dz = 0; dz < D1D; dz++)
993 for (
int dx = 0; dx < D1D; ++dx)
996 for (
int dz = 0; dz < D1D; ++dz)
998 u[dz] += X[dz][dy][dx] * B[qx][dx];
1002 for (
int dz = 0; dz < D1D; ++dz)
1004 DDQ[dz][dy][qx] = u[dz];
1009 MFEM_FOREACH_THREAD(qy,y,Q1D)
1011 MFEM_FOREACH_THREAD(qx,x,Q1D)
1015 for (
int dz = 0; dz < D1D; dz++)
1020 for (
int dy = 0; dy < D1D; ++dy)
1023 for (
int dz = 0; dz < D1D; dz++)
1025 u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
1029 for (
int dz = 0; dz < D1D; dz++)
1031 DQQ[dz][qy][qx] = u[dz];
1036 MFEM_FOREACH_THREAD(qy,y,Q1D)
1038 MFEM_FOREACH_THREAD(qx,x,Q1D)
1042 for (
int qz = 0; qz < Q1D; qz++)
1047 for (
int dz = 0; dz < D1D; ++dz)
1050 for (
int qz = 0; qz < Q1D; qz++)
1052 u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1056 for (
int qz = 0; qz < Q1D; qz++)
1058 QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1063 MFEM_FOREACH_THREAD(d,y,D1D)
1065 MFEM_FOREACH_THREAD(q,x,Q1D)
1071 MFEM_FOREACH_THREAD(qy,y,Q1D)
1073 MFEM_FOREACH_THREAD(dx,x,D1D)
1077 for (
int qz = 0; qz < Q1D; ++qz)
1082 for (
int qx = 0; qx < Q1D; ++qx)
1085 for (
int qz = 0; qz < Q1D; ++qz)
1087 u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1091 for (
int qz = 0; qz < Q1D; ++qz)
1093 QQD[qz][qy][dx] = u[qz];
1098 MFEM_FOREACH_THREAD(dy,y,D1D)
1100 MFEM_FOREACH_THREAD(dx,x,D1D)
1104 for (
int qz = 0; qz < Q1D; ++qz)
1109 for (
int qy = 0; qy < Q1D; ++qy)
1112 for (
int qz = 0; qz < Q1D; ++qz)
1114 u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1118 for (
int qz = 0; qz < Q1D; ++qz)
1120 QDD[qz][dy][dx] = u[qz];
1125 MFEM_FOREACH_THREAD(dy,y,D1D)
1127 MFEM_FOREACH_THREAD(dx,x,D1D)
1131 for (
int dz = 0; dz < D1D; ++dz)
1136 for (
int qz = 0; qz < Q1D; ++qz)
1139 for (
int dz = 0; dz < D1D; ++dz)
1141 u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1145 for (
int dz = 0; dz < D1D; ++dz)
1147 y(dx,dy,dz,e) += u[dz];
1154 static void PAMassApply(
const int dim,
1158 const Array<double> &B,
1159 const Array<double> &Bt,
1164 #ifdef MFEM_USE_OCCA
1169 return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1173 return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1175 MFEM_ABORT(
"OCCA PA Mass Apply unknown kernel!");
1177 #endif // MFEM_USE_OCCA
1178 const int id = (D1D << 4) | Q1D;
1183 case 0x22:
return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1184 case 0x24:
return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1185 case 0x33:
return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1186 case 0x34:
return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1187 case 0x35:
return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y);
1188 case 0x36:
return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1189 case 0x44:
return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1190 case 0x46:
return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y);
1191 case 0x48:
return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1192 case 0x55:
return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1193 case 0x57:
return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y);
1194 case 0x58:
return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1195 case 0x66:
return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1196 case 0x77:
return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1197 case 0x88:
return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1198 case 0x99:
return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1199 default:
return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1206 case 0x23:
return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1207 case 0x24:
return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1208 case 0x34:
return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1209 case 0x35:
return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y);
1210 case 0x36:
return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1211 case 0x37:
return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y);
1212 case 0x45:
return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1213 case 0x46:
return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1214 case 0x48:
return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1215 case 0x56:
return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1216 case 0x58:
return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1217 case 0x67:
return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1218 case 0x78:
return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1219 case 0x89:
return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1220 case 0x9A:
return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1221 default:
return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1224 mfem::out <<
"Unknown kernel 0x" << std::hex <<
id << std::endl;
1225 MFEM_ABORT(
"Unknown kernel.");
1230 if (DeviceCanUseCeed())
1232 ceedOp->AddMult(x, y);
1236 PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
int Size() const
Return the logical size of the array.
Class for an integration rule - an Array of IntegrationPoint.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
A coefficient that is constant across space and time.
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
void SetSize(int s)
Resize the vector to size s.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
Mesh * GetMesh() const
Returns the mesh.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
MemoryType
Memory types supported by MFEM.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
std::pair< int, int > occa_id_t
double u(const Vector &xvec)
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Class representing a function through its values (scalar or vector) at quadrature points...
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).