12 #include "../general/forall.hpp"
15 #include "libceed/mass.hpp"
31 if (mesh->
GetNE() == 0) {
return; }
35 if (DeviceCanUseCeed())
38 ceedDataPtr =
new CeedData;
39 InitCeedCoeff(Q, *mesh, *ir, ceedDataPtr);
40 return CeedPAMassAssemble(fes, *ir, *ceedDataPtr);
46 GeometricFactors::JACOBIANS);
50 pa_data.SetSize(ne*nq, Device::GetDeviceMemoryType());
60 coeff(0) = cQ->constant;
63 dynamic_cast<QuadratureFunctionCoefficient*>(Q))
66 MFEM_VERIFY(qFun.
Size() == nq * ne,
67 "Incompatible QuadratureFunction dimension \n");
70 "IntegrationRule used within integrator and in"
71 " QuadratureFunction appear to be different");
73 coeff.
MakeRef(const_cast<QuadratureFunction &>(qFun),0);
79 for (
int e = 0; e < ne; ++e)
82 for (
int q = 0; q < nq; ++q)
84 C(q,e) = Q->Eval(T, ir->
IntPoint(q));
88 if (
dim==1) { MFEM_ABORT(
"Not supported yet... stay tuned!"); }
92 const int Q1D = quad1D;
93 const bool const_c = coeff.
Size() == 1;
95 const auto J =
Reshape(
geom->J.Read(), Q1D,Q1D,2,2,NE);
96 const auto C = const_c ?
Reshape(coeff.
Read(), 1,1,1) :
98 auto v =
Reshape(pa_data.Write(), Q1D,Q1D, NE);
99 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
101 MFEM_FOREACH_THREAD(qx,x,Q1D)
103 MFEM_FOREACH_THREAD(qy,y,Q1D)
105 const double J11 = J(qx,qy,0,0,e);
106 const double J12 = J(qx,qy,1,0,e);
107 const double J21 = J(qx,qy,0,1,e);
108 const double J22 = J(qx,qy,1,1,e);
109 const double detJ = (J11*J22)-(J21*J12);
110 const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
111 v(qx,qy,e) = W(qx,qy) * coeff * detJ;
119 const int Q1D = quad1D;
120 const bool const_c = coeff.
Size() == 1;
122 const auto J =
Reshape(
geom->J.Read(), Q1D,Q1D,Q1D,3,3,NE);
123 const auto C = const_c ?
Reshape(coeff.
Read(), 1,1,1,1) :
125 auto v =
Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
126 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
128 MFEM_FOREACH_THREAD(qx,x,Q1D)
130 MFEM_FOREACH_THREAD(qy,y,Q1D)
132 MFEM_FOREACH_THREAD(qz,z,Q1D)
134 const double J11 = J(qx,qy,qz,0,0,e);
135 const double J21 = J(qx,qy,qz,1,0,e);
136 const double J31 = J(qx,qy,qz,2,0,e);
137 const double J12 = J(qx,qy,qz,0,1,e);
138 const double J22 = J(qx,qy,qz,1,1,e);
139 const double J32 = J(qx,qy,qz,2,1,e);
140 const double J13 = J(qx,qy,qz,0,2,e);
141 const double J23 = J(qx,qy,qz,1,2,e);
142 const double J33 = J(qx,qy,qz,2,2,e);
143 const double detJ = J11 * (J22 * J33 - J32 * J23) -
144 J21 * (J12 * J33 - J32 * J13) +
145 J31 * (J12 * J23 - J22 * J13);
146 const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
147 v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * detJ;
155 template<
int T_D1D = 0,
int T_Q1D = 0>
156 static void PAMassAssembleDiagonal2D(
const int NE,
163 const int D1D = T_D1D ? T_D1D : d1d;
164 const int Q1D = T_Q1D ? T_Q1D : q1d;
165 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
166 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
172 const int D1D = T_D1D ? T_D1D : d1d;
173 const int Q1D = T_Q1D ? T_Q1D : q1d;
174 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
175 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
177 for (
int qx = 0; qx < Q1D; ++qx)
179 for (
int dy = 0; dy < D1D; ++dy)
182 for (
int qy = 0; qy < Q1D; ++qy)
184 QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
188 for (
int dy = 0; dy < D1D; ++dy)
190 for (
int dx = 0; dx < D1D; ++dx)
192 for (
int qx = 0; qx < Q1D; ++qx)
194 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
201 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
202 static void SmemPAMassAssembleDiagonal2D(
const int NE,
203 const Array<double> &b_,
209 const int D1D = T_D1D ? T_D1D : d1d;
210 const int Q1D = T_Q1D ? T_Q1D : q1d;
211 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
212 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
213 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
214 MFEM_VERIFY(D1D <= MD1,
"");
215 MFEM_VERIFY(Q1D <= MQ1,
"");
216 auto b =
Reshape(b_.Read(), Q1D, D1D);
217 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
218 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
219 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
221 const int tidz = MFEM_THREAD_ID(z);
222 const int D1D = T_D1D ? T_D1D : d1d;
223 const int Q1D = T_Q1D ? T_Q1D : q1d;
224 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
225 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
226 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
227 MFEM_SHARED
double B[MQ1][MD1];
228 MFEM_SHARED
double QDZ[NBZ][MQ1][MD1];
229 double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
232 MFEM_FOREACH_THREAD(d,y,D1D)
234 MFEM_FOREACH_THREAD(q,x,Q1D)
241 MFEM_FOREACH_THREAD(qx,x,Q1D)
243 MFEM_FOREACH_THREAD(dy,y,D1D)
246 for (
int qy = 0; qy < Q1D; ++qy)
248 QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
253 MFEM_FOREACH_THREAD(dy,y,D1D)
255 MFEM_FOREACH_THREAD(dx,x,D1D)
257 for (
int qx = 0; qx < Q1D; ++qx)
260 Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
267 template<
int T_D1D = 0,
int T_Q1D = 0>
268 static void PAMassAssembleDiagonal3D(
const int NE,
269 const Array<double> &b,
275 const int D1D = T_D1D ? T_D1D : d1d;
276 const int Q1D = T_Q1D ? T_Q1D : q1d;
277 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
278 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
279 auto B =
Reshape(b.Read(), Q1D, D1D);
280 auto D =
Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
281 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
284 const int D1D = T_D1D ? T_D1D : d1d;
285 const int Q1D = T_Q1D ? T_Q1D : q1d;
286 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
287 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
288 double QQD[MQ1][MQ1][MD1];
289 double QDD[MQ1][MD1][MD1];
290 for (
int qx = 0; qx < Q1D; ++qx)
292 for (
int qy = 0; qy < Q1D; ++qy)
294 for (
int dz = 0; dz < D1D; ++dz)
296 QQD[qx][qy][dz] = 0.0;
297 for (
int qz = 0; qz < Q1D; ++qz)
299 QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
304 for (
int qx = 0; qx < Q1D; ++qx)
306 for (
int dz = 0; dz < D1D; ++dz)
308 for (
int dy = 0; dy < D1D; ++dy)
310 QDD[qx][dy][dz] = 0.0;
311 for (
int qy = 0; qy < Q1D; ++qy)
313 QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
318 for (
int dz = 0; dz < D1D; ++dz)
320 for (
int dy = 0; dy < D1D; ++dy)
322 for (
int dx = 0; dx < D1D; ++dx)
325 for (
int qx = 0; qx < Q1D; ++qx)
327 t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
329 Y(dx, dy, dz, e) += t;
336 template<
int T_D1D = 0,
int T_Q1D = 0>
337 static void SmemPAMassAssembleDiagonal3D(
const int NE,
338 const Array<double> &b_,
344 const int D1D = T_D1D ? T_D1D : d1d;
345 const int Q1D = T_Q1D ? T_Q1D : q1d;
346 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
347 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
348 MFEM_VERIFY(D1D <= MD1,
"");
349 MFEM_VERIFY(Q1D <= MQ1,
"");
350 auto b =
Reshape(b_.Read(), Q1D, D1D);
351 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
352 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
353 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
355 const int tidz = MFEM_THREAD_ID(z);
356 const int D1D = T_D1D ? T_D1D : d1d;
357 const int Q1D = T_Q1D ? T_Q1D : q1d;
358 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
359 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
360 MFEM_SHARED
double B[MQ1][MD1];
361 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
362 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
365 MFEM_FOREACH_THREAD(d,y,D1D)
367 MFEM_FOREACH_THREAD(q,x,Q1D)
374 MFEM_FOREACH_THREAD(qx,x,Q1D)
376 MFEM_FOREACH_THREAD(qy,y,Q1D)
378 MFEM_FOREACH_THREAD(dz,z,D1D)
380 QQD[qx][qy][dz] = 0.0;
381 for (
int qz = 0; qz < Q1D; ++qz)
383 QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
389 MFEM_FOREACH_THREAD(qx,x,Q1D)
391 MFEM_FOREACH_THREAD(dz,z,D1D)
393 MFEM_FOREACH_THREAD(dy,y,D1D)
395 QDD[qx][dy][dz] = 0.0;
396 for (
int qy = 0; qy < Q1D; ++qy)
398 QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
404 MFEM_FOREACH_THREAD(dz,z,D1D)
406 MFEM_FOREACH_THREAD(dy,y,D1D)
408 MFEM_FOREACH_THREAD(dx,x,D1D)
411 for (
int qx = 0; qx < Q1D; ++qx)
413 t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
415 Y(dx, dy, dz, e) += t;
422 static void PAMassAssembleDiagonal(
const int dim,
const int D1D,
423 const int Q1D,
const int NE,
424 const Array<double> &B,
430 switch ((D1D << 4 ) | Q1D)
432 case 0x22:
return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
433 case 0x33:
return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
434 case 0x44:
return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
435 case 0x55:
return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
436 case 0x66:
return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
437 case 0x77:
return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
438 case 0x88:
return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
439 case 0x99:
return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
440 default:
return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
445 switch ((D1D << 4 ) | Q1D)
447 case 0x23:
return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
448 case 0x34:
return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
449 case 0x45:
return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
450 case 0x56:
return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
451 case 0x67:
return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
452 case 0x78:
return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
453 case 0x89:
return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
454 default:
return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
457 MFEM_ABORT(
"Unknown kernel.");
460 void MassIntegrator::AssembleDiagonalPA(
Vector &diag)
462 if (DeviceCanUseCeed())
464 CeedAssembleDiagonal(ceedDataPtr, diag);
468 PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
475 static void OccaPAMassApply2D(
const int D1D,
484 occa::properties props;
485 props[
"defines/D1D"] = D1D;
486 props[
"defines/Q1D"] = Q1D;
492 const occa_id_t id = std::make_pair(D1D,Q1D);
493 if (!Device::Allows(Backend::OCCA_CUDA))
496 if (OccaMassApply2D_cpu.find(
id) == OccaMassApply2D_cpu.end())
498 const occa::kernel MassApply2D_CPU =
500 "MassApply2D_CPU", props);
501 OccaMassApply2D_cpu.emplace(
id, MassApply2D_CPU);
503 OccaMassApply2D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
508 if (OccaMassApply2D_gpu.find(
id) == OccaMassApply2D_gpu.end())
510 const occa::kernel MassApply2D_GPU =
512 "MassApply2D_GPU", props);
513 OccaMassApply2D_gpu.emplace(
id, MassApply2D_GPU);
515 OccaMassApply2D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
520 static void OccaPAMassApply3D(
const int D1D,
523 const Array<double> &B,
524 const Array<double> &Bt,
529 occa::properties props;
530 props[
"defines/D1D"] = D1D;
531 props[
"defines/Q1D"] = Q1D;
533 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
537 const occa_id_t id = std::make_pair(D1D,Q1D);
538 if (!Device::Allows(Backend::OCCA_CUDA))
541 if (OccaMassApply3D_cpu.find(
id) == OccaMassApply3D_cpu.end())
543 const occa::kernel MassApply3D_CPU =
545 "MassApply3D_CPU", props);
546 OccaMassApply3D_cpu.emplace(
id, MassApply3D_CPU);
548 OccaMassApply3D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
553 if (OccaMassApply3D_gpu.find(
id) == OccaMassApply3D_gpu.end())
555 const occa::kernel MassApply3D_GPU =
557 "MassApply3D_GPU", props);
558 OccaMassApply3D_gpu.emplace(
id, MassApply3D_GPU);
560 OccaMassApply3D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
563 #endif // MFEM_USE_OCCA
565 template<
int T_D1D = 0,
int T_Q1D = 0>
566 static void PAMassApply2D(
const int NE,
567 const Array<double> &b_,
568 const Array<double> &bt_,
575 const int D1D = T_D1D ? T_D1D : d1d;
576 const int Q1D = T_Q1D ? T_Q1D : q1d;
577 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
578 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
579 auto B =
Reshape(b_.Read(), Q1D, D1D);
580 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
581 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
582 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
583 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
586 const int D1D = T_D1D ? T_D1D : d1d;
587 const int Q1D = T_Q1D ? T_Q1D : q1d;
589 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
590 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
591 double sol_xy[max_Q1D][max_Q1D];
592 for (
int qy = 0; qy < Q1D; ++qy)
594 for (
int qx = 0; qx < Q1D; ++qx)
596 sol_xy[qy][qx] = 0.0;
599 for (
int dy = 0; dy < D1D; ++dy)
601 double sol_x[max_Q1D];
602 for (
int qy = 0; qy < Q1D; ++qy)
606 for (
int dx = 0; dx < D1D; ++dx)
608 const double s = X(dx,dy,e);
609 for (
int qx = 0; qx < Q1D; ++qx)
611 sol_x[qx] += B(qx,dx)* s;
614 for (
int qy = 0; qy < Q1D; ++qy)
616 const double d2q = B(qy,dy);
617 for (
int qx = 0; qx < Q1D; ++qx)
619 sol_xy[qy][qx] += d2q * sol_x[qx];
623 for (
int qy = 0; qy < Q1D; ++qy)
625 for (
int qx = 0; qx < Q1D; ++qx)
627 sol_xy[qy][qx] *= D(qx,qy,e);
630 for (
int qy = 0; qy < Q1D; ++qy)
632 double sol_x[max_D1D];
633 for (
int dx = 0; dx < D1D; ++dx)
637 for (
int qx = 0; qx < Q1D; ++qx)
639 const double s = sol_xy[qy][qx];
640 for (
int dx = 0; dx < D1D; ++dx)
642 sol_x[dx] += Bt(dx,qx) * s;
645 for (
int dy = 0; dy < D1D; ++dy)
647 const double q2d = Bt(dy,qy);
648 for (
int dx = 0; dx < D1D; ++dx)
650 Y(dx,dy,e) += q2d * sol_x[dx];
657 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
658 static void SmemPAMassApply2D(
const int NE,
659 const Array<double> &b_,
660 const Array<double> &bt_,
667 MFEM_CONTRACT_VAR(bt_);
668 const int D1D = T_D1D ? T_D1D : d1d;
669 const int Q1D = T_Q1D ? T_Q1D : q1d;
670 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
671 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
672 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
673 MFEM_VERIFY(D1D <= MD1,
"");
674 MFEM_VERIFY(Q1D <= MQ1,
"");
675 auto b =
Reshape(b_.Read(), Q1D, D1D);
676 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
677 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
678 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
679 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
681 const int tidz = MFEM_THREAD_ID(z);
682 const int D1D = T_D1D ? T_D1D : d1d;
683 const int Q1D = T_Q1D ? T_Q1D : q1d;
684 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
685 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
686 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
687 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
688 MFEM_SHARED
double BBt[MQ1*MD1];
689 double (*B)[MD1] = (double (*)[MD1]) BBt;
690 double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
691 MFEM_SHARED
double sm0[NBZ][MDQ*MDQ];
692 MFEM_SHARED
double sm1[NBZ][MDQ*MDQ];
693 double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
694 double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
695 double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
696 double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
697 MFEM_FOREACH_THREAD(dy,y,D1D)
699 MFEM_FOREACH_THREAD(dx,x,D1D)
701 X[dy][dx] = x(dx,dy,e);
706 MFEM_FOREACH_THREAD(dy,y,D1D)
708 MFEM_FOREACH_THREAD(q,x,Q1D)
715 MFEM_FOREACH_THREAD(dy,y,D1D)
717 MFEM_FOREACH_THREAD(qx,x,Q1D)
720 for (
int dx = 0; dx < D1D; ++dx)
722 dq += X[dy][dx] * B[qx][dx];
728 MFEM_FOREACH_THREAD(qy,y,Q1D)
730 MFEM_FOREACH_THREAD(qx,x,Q1D)
733 for (
int dy = 0; dy < D1D; ++dy)
735 qq += DQ[dy][qx] * B[qy][dy];
737 QQ[qy][qx] = qq * D(qx, qy, e);
743 MFEM_FOREACH_THREAD(dy,y,D1D)
745 MFEM_FOREACH_THREAD(q,x,Q1D)
752 MFEM_FOREACH_THREAD(qy,y,Q1D)
754 MFEM_FOREACH_THREAD(dx,x,D1D)
757 for (
int qx = 0; qx < Q1D; ++qx)
759 dq += QQ[qy][qx] * Bt[dx][qx];
765 MFEM_FOREACH_THREAD(dy,y,D1D)
767 MFEM_FOREACH_THREAD(dx,x,D1D)
770 for (
int qy = 0; qy < Q1D; ++qy)
772 dd += (QD[qy][dx] * Bt[dy][qy]);
780 template<
int T_D1D = 0,
int T_Q1D = 0>
781 static void PAMassApply3D(
const int NE,
782 const Array<double> &b_,
783 const Array<double> &bt_,
790 const int D1D = T_D1D ? T_D1D : d1d;
791 const int Q1D = T_Q1D ? T_Q1D : q1d;
792 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
793 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
794 auto B =
Reshape(b_.Read(), Q1D, D1D);
795 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
796 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
797 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
798 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
801 const int D1D = T_D1D ? T_D1D : d1d;
802 const int Q1D = T_Q1D ? T_Q1D : q1d;
803 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
804 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
805 double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
806 for (
int qz = 0; qz < Q1D; ++qz)
808 for (
int qy = 0; qy < Q1D; ++qy)
810 for (
int qx = 0; qx < Q1D; ++qx)
812 sol_xyz[qz][qy][qx] = 0.0;
816 for (
int dz = 0; dz < D1D; ++dz)
818 double sol_xy[max_Q1D][max_Q1D];
819 for (
int qy = 0; qy < Q1D; ++qy)
821 for (
int qx = 0; qx < Q1D; ++qx)
823 sol_xy[qy][qx] = 0.0;
826 for (
int dy = 0; dy < D1D; ++dy)
828 double sol_x[max_Q1D];
829 for (
int qx = 0; qx < Q1D; ++qx)
833 for (
int dx = 0; dx < D1D; ++dx)
835 const double s = X(dx,dy,dz,e);
836 for (
int qx = 0; qx < Q1D; ++qx)
838 sol_x[qx] += B(qx,dx) * s;
841 for (
int qy = 0; qy < Q1D; ++qy)
843 const double wy = B(qy,dy);
844 for (
int qx = 0; qx < Q1D; ++qx)
846 sol_xy[qy][qx] += wy * sol_x[qx];
850 for (
int qz = 0; qz < Q1D; ++qz)
852 const double wz = B(qz,dz);
853 for (
int qy = 0; qy < Q1D; ++qy)
855 for (
int qx = 0; qx < Q1D; ++qx)
857 sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
862 for (
int qz = 0; qz < Q1D; ++qz)
864 for (
int qy = 0; qy < Q1D; ++qy)
866 for (
int qx = 0; qx < Q1D; ++qx)
868 sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
872 for (
int qz = 0; qz < Q1D; ++qz)
874 double sol_xy[max_D1D][max_D1D];
875 for (
int dy = 0; dy < D1D; ++dy)
877 for (
int dx = 0; dx < D1D; ++dx)
882 for (
int qy = 0; qy < Q1D; ++qy)
884 double sol_x[max_D1D];
885 for (
int dx = 0; dx < D1D; ++dx)
889 for (
int qx = 0; qx < Q1D; ++qx)
891 const double s = sol_xyz[qz][qy][qx];
892 for (
int dx = 0; dx < D1D; ++dx)
894 sol_x[dx] += Bt(dx,qx) * s;
897 for (
int dy = 0; dy < D1D; ++dy)
899 const double wy = Bt(dy,qy);
900 for (
int dx = 0; dx < D1D; ++dx)
902 sol_xy[dy][dx] += wy * sol_x[dx];
906 for (
int dz = 0; dz < D1D; ++dz)
908 const double wz = Bt(dz,qz);
909 for (
int dy = 0; dy < D1D; ++dy)
911 for (
int dx = 0; dx < D1D; ++dx)
913 Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
921 template<
int T_D1D = 0,
int T_Q1D = 0>
922 static void SmemPAMassApply3D(
const int NE,
923 const Array<double> &b_,
924 const Array<double> &bt_,
931 MFEM_CONTRACT_VAR(bt_);
932 const int D1D = T_D1D ? T_D1D : d1d;
933 const int Q1D = T_Q1D ? T_Q1D : q1d;
934 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
935 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
936 MFEM_VERIFY(D1D <= M1D,
"");
937 MFEM_VERIFY(Q1D <= M1Q,
"");
938 auto b =
Reshape(b_.Read(), Q1D, D1D);
939 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
940 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
941 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
942 MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
944 const int D1D = T_D1D ? T_D1D : d1d;
945 const int Q1D = T_Q1D ? T_Q1D : q1d;
946 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
947 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
948 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
949 MFEM_SHARED
double sDQ[MQ1*MD1];
950 double (*B)[MD1] = (double (*)[MD1]) sDQ;
951 double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
952 MFEM_SHARED
double sm0[MDQ*MDQ*MDQ];
953 MFEM_SHARED
double sm1[MDQ*MDQ*MDQ];
954 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
955 double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
956 double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
957 double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
958 double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
959 double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
960 MFEM_FOREACH_THREAD(dy,y,D1D)
962 MFEM_FOREACH_THREAD(dx,x,D1D)
965 for (
int dz = 0; dz < D1D; ++dz)
967 X[dz][dy][dx] = x(dx,dy,dz,e);
970 MFEM_FOREACH_THREAD(dx,x,Q1D)
972 B[dx][dy] =
b(dx,dy);
976 MFEM_FOREACH_THREAD(dy,y,D1D)
978 MFEM_FOREACH_THREAD(qx,x,Q1D)
982 for (
int dz = 0; dz < D1D; dz++)
987 for (
int dx = 0; dx < D1D; ++dx)
990 for (
int dz = 0; dz < D1D; ++dz)
992 u[dz] += X[dz][dy][dx] * B[qx][dx];
996 for (
int dz = 0; dz < D1D; ++dz)
998 DDQ[dz][dy][qx] = u[dz];
1003 MFEM_FOREACH_THREAD(qy,y,Q1D)
1005 MFEM_FOREACH_THREAD(qx,x,Q1D)
1009 for (
int dz = 0; dz < D1D; dz++)
1014 for (
int dy = 0; dy < D1D; ++dy)
1017 for (
int dz = 0; dz < D1D; dz++)
1019 u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
1023 for (
int dz = 0; dz < D1D; dz++)
1025 DQQ[dz][qy][qx] = u[dz];
1030 MFEM_FOREACH_THREAD(qy,y,Q1D)
1032 MFEM_FOREACH_THREAD(qx,x,Q1D)
1036 for (
int qz = 0; qz < Q1D; qz++)
1041 for (
int dz = 0; dz < D1D; ++dz)
1044 for (
int qz = 0; qz < Q1D; qz++)
1046 u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1050 for (
int qz = 0; qz < Q1D; qz++)
1052 QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1057 MFEM_FOREACH_THREAD(d,y,D1D)
1059 MFEM_FOREACH_THREAD(q,x,Q1D)
1065 MFEM_FOREACH_THREAD(qy,y,Q1D)
1067 MFEM_FOREACH_THREAD(dx,x,D1D)
1071 for (
int qz = 0; qz < Q1D; ++qz)
1076 for (
int qx = 0; qx < Q1D; ++qx)
1079 for (
int qz = 0; qz < Q1D; ++qz)
1081 u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1085 for (
int qz = 0; qz < Q1D; ++qz)
1087 QQD[qz][qy][dx] = u[qz];
1092 MFEM_FOREACH_THREAD(dy,y,D1D)
1094 MFEM_FOREACH_THREAD(dx,x,D1D)
1098 for (
int qz = 0; qz < Q1D; ++qz)
1103 for (
int qy = 0; qy < Q1D; ++qy)
1106 for (
int qz = 0; qz < Q1D; ++qz)
1108 u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1112 for (
int qz = 0; qz < Q1D; ++qz)
1114 QDD[qz][dy][dx] = u[qz];
1119 MFEM_FOREACH_THREAD(dy,y,D1D)
1121 MFEM_FOREACH_THREAD(dx,x,D1D)
1125 for (
int dz = 0; dz < D1D; ++dz)
1130 for (
int qz = 0; qz < Q1D; ++qz)
1133 for (
int dz = 0; dz < D1D; ++dz)
1135 u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1139 for (
int dz = 0; dz < D1D; ++dz)
1141 y(dx,dy,dz,e) += u[dz];
1148 static void PAMassApply(
const int dim,
1152 const Array<double> &B,
1153 const Array<double> &Bt,
1158 #ifdef MFEM_USE_OCCA
1163 return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1167 return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1169 MFEM_ABORT(
"OCCA PA Mass Apply unknown kernel!");
1171 #endif // MFEM_USE_OCCA
1172 const int id = (D1D << 4) | Q1D;
1177 case 0x22:
return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1178 case 0x24:
return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1179 case 0x33:
return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1180 case 0x34:
return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1181 case 0x36:
return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1182 case 0x44:
return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1183 case 0x48:
return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1184 case 0x55:
return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1185 case 0x58:
return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1186 case 0x66:
return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1187 case 0x77:
return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1188 case 0x88:
return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1189 case 0x99:
return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1190 default:
return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1197 case 0x23:
return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1198 case 0x24:
return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1199 case 0x34:
return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1200 case 0x36:
return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1201 case 0x45:
return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1202 case 0x46:
return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1203 case 0x48:
return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1204 case 0x56:
return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1205 case 0x58:
return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1206 case 0x67:
return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1207 case 0x78:
return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1208 case 0x89:
return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1209 case 0x9A:
return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1210 default:
return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1213 mfem::out <<
"Unknown kernel 0x" << std::hex <<
id << std::endl;
1214 MFEM_ABORT(
"Unknown kernel.");
1219 if (DeviceCanUseCeed())
1221 CeedAddMult(ceedDataPtr, x, y);
1225 PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
int Size() const
Return the logical size of the array.
Class for an integration rule - an Array of IntegrationPoint.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
A coefficient that is constant across space and time.
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
void SetSize(int s)
Resize the vector to size s.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Mesh * GetMesh() const
Returns the mesh.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
std::pair< int, int > occa_id_t
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Class representing a function through its values (scalar or vector) at quadrature points...