12 #include "../general/forall.hpp"
15 #include "libceed/mass.hpp"
31 if (mesh->
GetNE() == 0) {
return; }
36 if (DeviceCanUseCeed() && !force)
38 if (ceedDataPtr) {
delete ceedDataPtr; }
39 CeedData* ptr =
new CeedData();
41 InitCeedCoeff(Q, ptr);
42 return CeedPAMassAssemble(fes, *ir, *ptr);
49 GeometricFactors::JACOBIANS);
53 pa_data.SetSize(ne*nq, Device::GetDeviceMemoryType());
63 coeff(0) = cQ->constant;
69 for (
int e = 0; e < ne; ++e)
72 for (
int q = 0; q < nq; ++q)
74 C(q,e) = Q->Eval(T, ir->
IntPoint(q));
78 if (
dim==1) { MFEM_ABORT(
"Not supported yet... stay tuned!"); }
83 const bool const_c = coeff.
Size() == 1;
88 auto v =
Reshape(pa_data.Write(), NQ, NE);
91 for (
int q = 0; q < NQ; ++q)
93 const double J11 = J(q,0,0,e);
94 const double J12 = J(q,1,0,e);
95 const double J21 = J(q,0,1,e);
96 const double J22 = J(q,1,1,e);
97 const double detJ = (J11*J22)-(J21*J12);
98 const double coeff = const_c ? C(0,0) : C(q,e);
99 v(q,e) = w[q] * coeff * detJ;
107 const bool const_c = coeff.
Size() == 1;
112 auto v =
Reshape(pa_data.Write(), NQ,NE);
115 for (
int q = 0; q < NQ; ++q)
117 const double J11 = J(q,0,0,e), J12 = J(q,0,1,e), J13 = J(q,0,2,e);
118 const double J21 = J(q,1,0,e), J22 = J(q,1,1,e), J23 = J(q,1,2,e);
119 const double J31 = J(q,2,0,e), J32 = J(q,2,1,e), J33 = J(q,2,2,e);
120 const double detJ = J11 * (J22 * J33 - J32 * J23) -
121 J21 * (J12 * J33 - J32 * J13) +
122 J31 * (J12 * J23 - J22 * J13);
123 const double coeff = const_c ? C(0,0) : C(q,e);
124 v(q,e) = W[q] * coeff * detJ;
136 template<
int T_D1D = 0,
int T_Q1D = 0>
137 static void PAMassAssembleDiagonal2D(
const int NE,
144 const int D1D = T_D1D ? T_D1D : d1d;
145 const int Q1D = T_Q1D ? T_Q1D : q1d;
146 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
147 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
153 const int D1D = T_D1D ? T_D1D : d1d;
154 const int Q1D = T_Q1D ? T_Q1D : q1d;
155 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
156 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
158 for (
int qx = 0; qx < Q1D; ++qx)
160 for (
int dy = 0; dy < D1D; ++dy)
163 for (
int qy = 0; qy < Q1D; ++qy)
165 QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
169 for (
int dy = 0; dy < D1D; ++dy)
171 for (
int dx = 0; dx < D1D; ++dx)
173 for (
int qx = 0; qx < Q1D; ++qx)
175 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
182 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
183 static void SmemPAMassAssembleDiagonal2D(
const int NE,
184 const Array<double> &b_,
190 const int D1D = T_D1D ? T_D1D : d1d;
191 const int Q1D = T_Q1D ? T_Q1D : q1d;
192 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
193 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
194 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
195 MFEM_VERIFY(D1D <= MD1,
"");
196 MFEM_VERIFY(Q1D <= MQ1,
"");
197 auto b =
Reshape(b_.Read(), Q1D, D1D);
198 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
199 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
200 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
202 const int tidz = MFEM_THREAD_ID(z);
203 const int D1D = T_D1D ? T_D1D : d1d;
204 const int Q1D = T_Q1D ? T_Q1D : q1d;
205 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
206 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
207 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
208 MFEM_SHARED
double B[MQ1][MD1];
209 MFEM_SHARED
double QDZ[NBZ][MQ1][MD1];
210 double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
213 MFEM_FOREACH_THREAD(d,y,D1D)
215 MFEM_FOREACH_THREAD(q,x,Q1D)
222 MFEM_FOREACH_THREAD(qx,x,Q1D)
224 MFEM_FOREACH_THREAD(dy,y,D1D)
227 for (
int qy = 0; qy < Q1D; ++qy)
229 QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
234 MFEM_FOREACH_THREAD(dy,y,D1D)
236 MFEM_FOREACH_THREAD(dx,x,D1D)
238 for (
int qx = 0; qx < Q1D; ++qx)
241 Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
248 template<
int T_D1D = 0,
int T_Q1D = 0>
249 static void PAMassAssembleDiagonal3D(
const int NE,
250 const Array<double> &b,
256 const int D1D = T_D1D ? T_D1D : d1d;
257 const int Q1D = T_Q1D ? T_Q1D : q1d;
258 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
259 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
260 auto B =
Reshape(b.Read(), Q1D, D1D);
261 auto D =
Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
262 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
265 const int D1D = T_D1D ? T_D1D : d1d;
266 const int Q1D = T_Q1D ? T_Q1D : q1d;
267 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
268 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
269 double QQD[MQ1][MQ1][MD1];
270 double QDD[MQ1][MD1][MD1];
271 for (
int qx = 0; qx < Q1D; ++qx)
273 for (
int qy = 0; qy < Q1D; ++qy)
275 for (
int dz = 0; dz < D1D; ++dz)
277 QQD[qx][qy][dz] = 0.0;
278 for (
int qz = 0; qz < Q1D; ++qz)
280 QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
285 for (
int qx = 0; qx < Q1D; ++qx)
287 for (
int dz = 0; dz < D1D; ++dz)
289 for (
int dy = 0; dy < D1D; ++dy)
291 QDD[qx][dy][dz] = 0.0;
292 for (
int qy = 0; qy < Q1D; ++qy)
294 QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
299 for (
int dz = 0; dz < D1D; ++dz)
301 for (
int dy = 0; dy < D1D; ++dy)
303 for (
int dx = 0; dx < D1D; ++dx)
306 for (
int qx = 0; qx < Q1D; ++qx)
308 t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
310 Y(dx, dy, dz, e) += t;
317 template<
int T_D1D = 0,
int T_Q1D = 0>
318 static void SmemPAMassAssembleDiagonal3D(
const int NE,
319 const Array<double> &b_,
325 const int D1D = T_D1D ? T_D1D : d1d;
326 const int Q1D = T_Q1D ? T_Q1D : q1d;
327 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
328 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
329 MFEM_VERIFY(D1D <= MD1,
"");
330 MFEM_VERIFY(Q1D <= MQ1,
"");
331 auto b =
Reshape(b_.Read(), Q1D, D1D);
332 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
333 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
334 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
336 const int tidz = MFEM_THREAD_ID(z);
337 const int D1D = T_D1D ? T_D1D : d1d;
338 const int Q1D = T_Q1D ? T_Q1D : q1d;
339 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
340 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
341 MFEM_SHARED
double B[MQ1][MD1];
342 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
343 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
346 MFEM_FOREACH_THREAD(d,y,D1D)
348 MFEM_FOREACH_THREAD(q,x,Q1D)
355 MFEM_FOREACH_THREAD(qx,x,Q1D)
357 MFEM_FOREACH_THREAD(qy,y,Q1D)
359 MFEM_FOREACH_THREAD(dz,z,D1D)
361 QQD[qx][qy][dz] = 0.0;
362 for (
int qz = 0; qz < Q1D; ++qz)
364 QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
370 MFEM_FOREACH_THREAD(qx,x,Q1D)
372 MFEM_FOREACH_THREAD(dz,z,D1D)
374 MFEM_FOREACH_THREAD(dy,y,D1D)
376 QDD[qx][dy][dz] = 0.0;
377 for (
int qy = 0; qy < Q1D; ++qy)
379 QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
385 MFEM_FOREACH_THREAD(dz,z,D1D)
387 MFEM_FOREACH_THREAD(dy,y,D1D)
389 MFEM_FOREACH_THREAD(dx,x,D1D)
392 for (
int qx = 0; qx < Q1D; ++qx)
394 t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
396 Y(dx, dy, dz, e) += t;
403 static void PAMassAssembleDiagonal(
const int dim,
const int D1D,
404 const int Q1D,
const int NE,
405 const Array<double> &B,
411 switch ((D1D << 4 ) | Q1D)
413 case 0x22:
return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
414 case 0x33:
return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
415 case 0x44:
return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
416 case 0x55:
return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
417 case 0x66:
return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
418 case 0x77:
return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
419 case 0x88:
return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
420 case 0x99:
return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
421 default:
return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
426 switch ((D1D << 4 ) | Q1D)
428 case 0x23:
return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
429 case 0x34:
return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
430 case 0x45:
return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
431 case 0x56:
return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
432 case 0x67:
return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
433 case 0x78:
return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
434 case 0x89:
return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
435 default:
return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
438 MFEM_ABORT(
"Unknown kernel.");
441 void MassIntegrator::AssembleDiagonalPA(
Vector &diag)
443 if (pa_data.Size()==0) { SetupPA(*fespace,
true); }
444 PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
450 static void OccaPAMassApply2D(
const int D1D,
459 occa::properties props;
460 props[
"defines/D1D"] = D1D;
461 props[
"defines/Q1D"] = Q1D;
467 const occa_id_t id = std::make_pair(D1D,Q1D);
468 if (!Device::Allows(Backend::OCCA_CUDA))
471 if (OccaMassApply2D_cpu.find(
id) == OccaMassApply2D_cpu.end())
473 const occa::kernel MassApply2D_CPU =
475 "MassApply2D_CPU", props);
476 OccaMassApply2D_cpu.emplace(
id, MassApply2D_CPU);
478 OccaMassApply2D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
483 if (OccaMassApply2D_gpu.find(
id) == OccaMassApply2D_gpu.end())
485 const occa::kernel MassApply2D_GPU =
487 "MassApply2D_GPU", props);
488 OccaMassApply2D_gpu.emplace(
id, MassApply2D_GPU);
490 OccaMassApply2D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
495 static void OccaPAMassApply3D(
const int D1D,
498 const Array<double> &B,
499 const Array<double> &Bt,
504 occa::properties props;
505 props[
"defines/D1D"] = D1D;
506 props[
"defines/Q1D"] = Q1D;
508 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
512 const occa_id_t id = std::make_pair(D1D,Q1D);
513 if (!Device::Allows(Backend::OCCA_CUDA))
516 if (OccaMassApply3D_cpu.find(
id) == OccaMassApply3D_cpu.end())
518 const occa::kernel MassApply3D_CPU =
520 "MassApply3D_CPU", props);
521 OccaMassApply3D_cpu.emplace(
id, MassApply3D_CPU);
523 OccaMassApply3D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
528 if (OccaMassApply3D_gpu.find(
id) == OccaMassApply3D_gpu.end())
530 const occa::kernel MassApply3D_GPU =
532 "MassApply3D_GPU", props);
533 OccaMassApply3D_gpu.emplace(
id, MassApply3D_GPU);
535 OccaMassApply3D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
538 #endif // MFEM_USE_OCCA
540 template<
int T_D1D = 0,
int T_Q1D = 0>
541 static void PAMassApply2D(
const int NE,
542 const Array<double> &b_,
543 const Array<double> &bt_,
550 const int D1D = T_D1D ? T_D1D : d1d;
551 const int Q1D = T_Q1D ? T_Q1D : q1d;
552 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
553 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
554 auto B =
Reshape(b_.Read(), Q1D, D1D);
555 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
556 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
557 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
558 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
561 const int D1D = T_D1D ? T_D1D : d1d;
562 const int Q1D = T_Q1D ? T_Q1D : q1d;
564 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
565 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
566 double sol_xy[max_Q1D][max_Q1D];
567 for (
int qy = 0; qy < Q1D; ++qy)
569 for (
int qx = 0; qx < Q1D; ++qx)
571 sol_xy[qy][qx] = 0.0;
574 for (
int dy = 0; dy < D1D; ++dy)
576 double sol_x[max_Q1D];
577 for (
int qy = 0; qy < Q1D; ++qy)
581 for (
int dx = 0; dx < D1D; ++dx)
583 const double s = X(dx,dy,e);
584 for (
int qx = 0; qx < Q1D; ++qx)
586 sol_x[qx] += B(qx,dx)* s;
589 for (
int qy = 0; qy < Q1D; ++qy)
591 const double d2q = B(qy,dy);
592 for (
int qx = 0; qx < Q1D; ++qx)
594 sol_xy[qy][qx] += d2q * sol_x[qx];
598 for (
int qy = 0; qy < Q1D; ++qy)
600 for (
int qx = 0; qx < Q1D; ++qx)
602 sol_xy[qy][qx] *= D(qx,qy,e);
605 for (
int qy = 0; qy < Q1D; ++qy)
607 double sol_x[max_D1D];
608 for (
int dx = 0; dx < D1D; ++dx)
612 for (
int qx = 0; qx < Q1D; ++qx)
614 const double s = sol_xy[qy][qx];
615 for (
int dx = 0; dx < D1D; ++dx)
617 sol_x[dx] += Bt(dx,qx) * s;
620 for (
int dy = 0; dy < D1D; ++dy)
622 const double q2d = Bt(dy,qy);
623 for (
int dx = 0; dx < D1D; ++dx)
625 Y(dx,dy,e) += q2d * sol_x[dx];
632 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
633 static void SmemPAMassApply2D(
const int NE,
634 const Array<double> &b_,
635 const Array<double> &bt_,
642 const int D1D = T_D1D ? T_D1D : d1d;
643 const int Q1D = T_Q1D ? T_Q1D : q1d;
644 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
645 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
646 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
647 MFEM_VERIFY(D1D <= MD1,
"");
648 MFEM_VERIFY(Q1D <= MQ1,
"");
649 auto b =
Reshape(b_.Read(), Q1D, D1D);
650 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
651 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
652 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
653 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
655 const int tidz = MFEM_THREAD_ID(z);
656 const int D1D = T_D1D ? T_D1D : d1d;
657 const int Q1D = T_Q1D ? T_Q1D : q1d;
658 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
659 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
660 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
661 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
662 MFEM_SHARED
double BBt[MQ1*MD1];
663 double (*B)[MD1] = (double (*)[MD1]) BBt;
664 double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
665 MFEM_SHARED
double sm0[NBZ][MDQ*MDQ];
666 MFEM_SHARED
double sm1[NBZ][MDQ*MDQ];
667 double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
668 double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
669 double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
670 double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
671 MFEM_FOREACH_THREAD(dy,y,D1D)
673 MFEM_FOREACH_THREAD(dx,x,D1D)
675 X[dy][dx] = x(dx,dy,e);
680 MFEM_FOREACH_THREAD(dy,y,D1D)
682 MFEM_FOREACH_THREAD(q,x,Q1D)
689 MFEM_FOREACH_THREAD(dy,y,D1D)
691 MFEM_FOREACH_THREAD(qx,x,Q1D)
694 for (
int dx = 0; dx < D1D; ++dx)
696 dq += X[dy][dx] * B[qx][dx];
702 MFEM_FOREACH_THREAD(qy,y,Q1D)
704 MFEM_FOREACH_THREAD(qx,x,Q1D)
707 for (
int dy = 0; dy < D1D; ++dy)
709 qq += DQ[dy][qx] * B[qy][dy];
711 QQ[qy][qx] = qq * D(qx, qy, e);
717 MFEM_FOREACH_THREAD(dy,y,D1D)
719 MFEM_FOREACH_THREAD(q,x,Q1D)
726 MFEM_FOREACH_THREAD(qy,y,Q1D)
728 MFEM_FOREACH_THREAD(dx,x,D1D)
731 for (
int qx = 0; qx < Q1D; ++qx)
733 dq += QQ[qy][qx] * Bt[dx][qx];
739 MFEM_FOREACH_THREAD(dy,y,D1D)
741 MFEM_FOREACH_THREAD(dx,x,D1D)
744 for (
int qy = 0; qy < Q1D; ++qy)
746 dd += (QD[qy][dx] * Bt[dy][qy]);
754 template<
int T_D1D = 0,
int T_Q1D = 0>
755 static void PAMassApply3D(
const int NE,
756 const Array<double> &b_,
757 const Array<double> &bt_,
764 const int D1D = T_D1D ? T_D1D : d1d;
765 const int Q1D = T_Q1D ? T_Q1D : q1d;
766 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
767 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
768 auto B =
Reshape(b_.Read(), Q1D, D1D);
769 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
770 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
771 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
772 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
775 const int D1D = T_D1D ? T_D1D : d1d;
776 const int Q1D = T_Q1D ? T_Q1D : q1d;
777 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
778 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
779 double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
780 for (
int qz = 0; qz < Q1D; ++qz)
782 for (
int qy = 0; qy < Q1D; ++qy)
784 for (
int qx = 0; qx < Q1D; ++qx)
786 sol_xyz[qz][qy][qx] = 0.0;
790 for (
int dz = 0; dz < D1D; ++dz)
792 double sol_xy[max_Q1D][max_Q1D];
793 for (
int qy = 0; qy < Q1D; ++qy)
795 for (
int qx = 0; qx < Q1D; ++qx)
797 sol_xy[qy][qx] = 0.0;
800 for (
int dy = 0; dy < D1D; ++dy)
802 double sol_x[max_Q1D];
803 for (
int qx = 0; qx < Q1D; ++qx)
807 for (
int dx = 0; dx < D1D; ++dx)
809 const double s = X(dx,dy,dz,e);
810 for (
int qx = 0; qx < Q1D; ++qx)
812 sol_x[qx] += B(qx,dx) * s;
815 for (
int qy = 0; qy < Q1D; ++qy)
817 const double wy = B(qy,dy);
818 for (
int qx = 0; qx < Q1D; ++qx)
820 sol_xy[qy][qx] += wy * sol_x[qx];
824 for (
int qz = 0; qz < Q1D; ++qz)
826 const double wz = B(qz,dz);
827 for (
int qy = 0; qy < Q1D; ++qy)
829 for (
int qx = 0; qx < Q1D; ++qx)
831 sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
836 for (
int qz = 0; qz < Q1D; ++qz)
838 for (
int qy = 0; qy < Q1D; ++qy)
840 for (
int qx = 0; qx < Q1D; ++qx)
842 sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
846 for (
int qz = 0; qz < Q1D; ++qz)
848 double sol_xy[max_D1D][max_D1D];
849 for (
int dy = 0; dy < D1D; ++dy)
851 for (
int dx = 0; dx < D1D; ++dx)
856 for (
int qy = 0; qy < Q1D; ++qy)
858 double sol_x[max_D1D];
859 for (
int dx = 0; dx < D1D; ++dx)
863 for (
int qx = 0; qx < Q1D; ++qx)
865 const double s = sol_xyz[qz][qy][qx];
866 for (
int dx = 0; dx < D1D; ++dx)
868 sol_x[dx] += Bt(dx,qx) * s;
871 for (
int dy = 0; dy < D1D; ++dy)
873 const double wy = Bt(dy,qy);
874 for (
int dx = 0; dx < D1D; ++dx)
876 sol_xy[dy][dx] += wy * sol_x[dx];
880 for (
int dz = 0; dz < D1D; ++dz)
882 const double wz = Bt(dz,qz);
883 for (
int dy = 0; dy < D1D; ++dy)
885 for (
int dx = 0; dx < D1D; ++dx)
887 Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
895 template<
int T_D1D = 0,
int T_Q1D = 0>
896 static void SmemPAMassApply3D(
const int NE,
897 const Array<double> &b_,
898 const Array<double> &bt_,
905 const int D1D = T_D1D ? T_D1D : d1d;
906 const int Q1D = T_Q1D ? T_Q1D : q1d;
907 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
908 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
909 MFEM_VERIFY(D1D <= M1D,
"");
910 MFEM_VERIFY(Q1D <= M1Q,
"");
911 auto b =
Reshape(b_.Read(), Q1D, D1D);
912 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
913 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
914 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
915 MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
917 const int D1D = T_D1D ? T_D1D : d1d;
918 const int Q1D = T_Q1D ? T_Q1D : q1d;
919 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
920 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
921 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
922 MFEM_SHARED
double sDQ[MQ1*MD1];
923 double (*B)[MD1] = (double (*)[MD1]) sDQ;
924 double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
925 MFEM_SHARED
double sm0[MDQ*MDQ*MDQ];
926 MFEM_SHARED
double sm1[MDQ*MDQ*MDQ];
927 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
928 double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
929 double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
930 double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
931 double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
932 double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
933 MFEM_FOREACH_THREAD(dy,y,D1D)
935 MFEM_FOREACH_THREAD(dx,x,D1D)
938 for (
int dz = 0; dz < D1D; ++dz)
940 X[dz][dy][dx] = x(dx,dy,dz,e);
943 MFEM_FOREACH_THREAD(dx,x,Q1D)
945 B[dx][dy] =
b(dx,dy);
949 MFEM_FOREACH_THREAD(dy,y,D1D)
951 MFEM_FOREACH_THREAD(qx,x,Q1D)
955 for (
int dz = 0; dz < D1D; dz++)
960 for (
int dx = 0; dx < D1D; ++dx)
963 for (
int dz = 0; dz < D1D; ++dz)
965 u[dz] += X[dz][dy][dx] * B[qx][dx];
969 for (
int dz = 0; dz < D1D; ++dz)
971 DDQ[dz][dy][qx] = u[dz];
976 MFEM_FOREACH_THREAD(qy,y,Q1D)
978 MFEM_FOREACH_THREAD(qx,x,Q1D)
982 for (
int dz = 0; dz < D1D; dz++)
987 for (
int dy = 0; dy < D1D; ++dy)
990 for (
int dz = 0; dz < D1D; dz++)
992 u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
996 for (
int dz = 0; dz < D1D; dz++)
998 DQQ[dz][qy][qx] = u[dz];
1003 MFEM_FOREACH_THREAD(qy,y,Q1D)
1005 MFEM_FOREACH_THREAD(qx,x,Q1D)
1009 for (
int qz = 0; qz < Q1D; qz++)
1014 for (
int dz = 0; dz < D1D; ++dz)
1017 for (
int qz = 0; qz < Q1D; qz++)
1019 u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1023 for (
int qz = 0; qz < Q1D; qz++)
1025 QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1030 MFEM_FOREACH_THREAD(d,y,D1D)
1032 MFEM_FOREACH_THREAD(q,x,Q1D)
1038 MFEM_FOREACH_THREAD(qy,y,Q1D)
1040 MFEM_FOREACH_THREAD(dx,x,D1D)
1044 for (
int qz = 0; qz < Q1D; ++qz)
1049 for (
int qx = 0; qx < Q1D; ++qx)
1052 for (
int qz = 0; qz < Q1D; ++qz)
1054 u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1058 for (
int qz = 0; qz < Q1D; ++qz)
1060 QQD[qz][qy][dx] = u[qz];
1065 MFEM_FOREACH_THREAD(dy,y,D1D)
1067 MFEM_FOREACH_THREAD(dx,x,D1D)
1071 for (
int qz = 0; qz < Q1D; ++qz)
1076 for (
int qy = 0; qy < Q1D; ++qy)
1079 for (
int qz = 0; qz < Q1D; ++qz)
1081 u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1085 for (
int qz = 0; qz < Q1D; ++qz)
1087 QDD[qz][dy][dx] = u[qz];
1092 MFEM_FOREACH_THREAD(dy,y,D1D)
1094 MFEM_FOREACH_THREAD(dx,x,D1D)
1098 for (
int dz = 0; dz < D1D; ++dz)
1103 for (
int qz = 0; qz < Q1D; ++qz)
1106 for (
int dz = 0; dz < D1D; ++dz)
1108 u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1112 for (
int dz = 0; dz < D1D; ++dz)
1114 y(dx,dy,dz,e) += u[dz];
1121 static void PAMassApply(
const int dim,
1125 const Array<double> &B,
1126 const Array<double> &Bt,
1131 #ifdef MFEM_USE_OCCA
1136 return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1140 return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1142 MFEM_ABORT(
"OCCA PA Mass Apply unknown kernel!");
1144 #endif // MFEM_USE_OCCA
1145 const int id = (D1D << 4) | Q1D;
1150 case 0x22:
return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1151 case 0x24:
return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1152 case 0x33:
return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1153 case 0x34:
return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1154 case 0x36:
return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1155 case 0x44:
return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1156 case 0x48:
return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1157 case 0x55:
return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1158 case 0x58:
return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1159 case 0x66:
return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1160 case 0x77:
return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1161 case 0x88:
return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1162 case 0x99:
return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1163 default:
return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1170 case 0x23:
return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1171 case 0x24:
return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1172 case 0x34:
return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1173 case 0x36:
return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1174 case 0x45:
return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1175 case 0x46:
return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1176 case 0x48:
return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1177 case 0x56:
return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1178 case 0x58:
return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1179 case 0x67:
return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1180 case 0x78:
return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1181 case 0x89:
return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1182 case 0x9A:
return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1183 default:
return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1186 mfem::out <<
"Unknown kernel 0x" << std::hex <<
id << std::endl;
1187 MFEM_ABORT(
"Unknown kernel.");
1192 #ifdef MFEM_USE_CEED
1193 if (DeviceCanUseCeed())
1195 const CeedScalar *x_ptr;
1198 CeedGetPreferredMemType(internal::ceed, &mem);
1199 if ( Device::Allows(Backend::CUDA) && mem==CEED_MEM_DEVICE )
1208 mem = CEED_MEM_HOST;
1210 CeedVectorSetArray(ceedDataPtr->u, mem, CEED_USE_POINTER,
1211 const_cast<CeedScalar*>(x_ptr));
1212 CeedVectorSetArray(ceedDataPtr->v, mem, CEED_USE_POINTER, y_ptr);
1214 CeedOperatorApplyAdd(ceedDataPtr->oper, ceedDataPtr->u, ceedDataPtr->v,
1215 CEED_REQUEST_IMMEDIATE);
1217 CeedVectorSyncArray(ceedDataPtr->v, mem);
1222 PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for Finite Elements.
int Size() const
Logical size of the array.
double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Class for an integration rule - an Array of IntegrationPoint.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Subclass constant coefficient.
void SetSize(int s)
Resize the vector to size s.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags)
Return the mesh geometric factors corresponding to the given integration rule.
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Mesh * GetMesh() const
Returns the mesh.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement associated with i'th element.
double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
std::pair< int, int > occa_id_t
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...