12 #include "../general/forall.hpp"
15 #include "ceed/mass.hpp"
28 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
29 Device::GetDeviceMemoryType() : pa_mt;
34 if (mesh->
GetNE() == 0) {
return; }
38 if (DeviceCanUseCeed())
41 ceedOp =
new ceed::PAMassIntegrator(fes, *ir, Q);
49 GeometricFactors::JACOBIANS, mt);
53 pa_data.SetSize(ne*nq, mt);
63 coeff(0) = cQ->constant;
66 dynamic_cast<QuadratureFunctionCoefficient*>(Q))
69 MFEM_VERIFY(qFun.
Size() == nq * ne,
70 "Incompatible QuadratureFunction dimension \n");
73 "IntegrationRule used within integrator and in"
74 " QuadratureFunction appear to be different");
76 coeff.
MakeRef(const_cast<QuadratureFunction &>(qFun),0);
82 for (
int e = 0; e < ne; ++e)
85 for (
int q = 0; q < nq; ++q)
87 C(q,e) = Q->Eval(T, ir->
IntPoint(q));
91 if (
dim==1) { MFEM_ABORT(
"Not supported yet... stay tuned!"); }
95 const int Q1D = quad1D;
96 const bool const_c = coeff.
Size() == 1;
97 const bool by_val = map_type == FiniteElement::VALUE;
99 const auto J =
Reshape(geom->J.Read(), Q1D,Q1D,2,2,NE);
100 const auto C = const_c ?
Reshape(coeff.
Read(), 1,1,1) :
102 auto v =
Reshape(pa_data.Write(), Q1D,Q1D, NE);
103 MFEM_FORALL_2D(e, NE, Q1D,Q1D,1,
105 MFEM_FOREACH_THREAD(qx,x,Q1D)
107 MFEM_FOREACH_THREAD(qy,y,Q1D)
109 const double J11 = J(qx,qy,0,0,e);
110 const double J12 = J(qx,qy,1,0,e);
111 const double J21 = J(qx,qy,0,1,e);
112 const double J22 = J(qx,qy,1,1,e);
113 const double detJ = (J11*J22)-(J21*J12);
114 const double coeff = const_c ? C(0,0,0) : C(qx,qy,e);
115 v(qx,qy,e) = W(qx,qy) * coeff * (by_val ? detJ : 1.0/detJ);
123 const int Q1D = quad1D;
124 const bool const_c = coeff.
Size() == 1;
125 const bool by_val = map_type == FiniteElement::VALUE;
127 const auto J =
Reshape(geom->J.Read(), Q1D,Q1D,Q1D,3,3,NE);
128 const auto C = const_c ?
Reshape(coeff.
Read(), 1,1,1,1) :
130 auto v =
Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE);
131 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
133 MFEM_FOREACH_THREAD(qx,x,Q1D)
135 MFEM_FOREACH_THREAD(qy,y,Q1D)
137 MFEM_FOREACH_THREAD(qz,z,Q1D)
139 const double J11 = J(qx,qy,qz,0,0,e);
140 const double J21 = J(qx,qy,qz,1,0,e);
141 const double J31 = J(qx,qy,qz,2,0,e);
142 const double J12 = J(qx,qy,qz,0,1,e);
143 const double J22 = J(qx,qy,qz,1,1,e);
144 const double J32 = J(qx,qy,qz,2,1,e);
145 const double J13 = J(qx,qy,qz,0,2,e);
146 const double J23 = J(qx,qy,qz,1,2,e);
147 const double J33 = J(qx,qy,qz,2,2,e);
148 const double detJ = J11 * (J22 * J33 - J32 * J23) -
149 J21 * (J12 * J33 - J32 * J13) +
150 J31 * (J12 * J23 - J22 * J13);
151 const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e);
152 v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * (by_val ? detJ : 1.0/detJ);
160 template<
int T_D1D = 0,
int T_Q1D = 0>
161 static void PAMassAssembleDiagonal2D(
const int NE,
168 const int D1D = T_D1D ? T_D1D : d1d;
169 const int Q1D = T_Q1D ? T_Q1D : q1d;
170 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
171 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
177 const int D1D = T_D1D ? T_D1D : d1d;
178 const int Q1D = T_Q1D ? T_Q1D : q1d;
179 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
180 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
182 for (
int qx = 0; qx < Q1D; ++qx)
184 for (
int dy = 0; dy < D1D; ++dy)
187 for (
int qy = 0; qy < Q1D; ++qy)
189 QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e);
193 for (
int dy = 0; dy < D1D; ++dy)
195 for (
int dx = 0; dx < D1D; ++dx)
197 for (
int qx = 0; qx < Q1D; ++qx)
199 Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy];
206 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
207 static void SmemPAMassAssembleDiagonal2D(
const int NE,
208 const Array<double> &b_,
214 const int D1D = T_D1D ? T_D1D : d1d;
215 const int Q1D = T_Q1D ? T_Q1D : q1d;
216 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
217 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
218 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
219 MFEM_VERIFY(D1D <= MD1,
"");
220 MFEM_VERIFY(Q1D <= MQ1,
"");
221 auto b =
Reshape(b_.Read(), Q1D, D1D);
222 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
223 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
224 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
226 const int tidz = MFEM_THREAD_ID(z);
227 const int D1D = T_D1D ? T_D1D : d1d;
228 const int Q1D = T_Q1D ? T_Q1D : q1d;
229 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
230 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
231 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
232 MFEM_SHARED
double B[MQ1][MD1];
233 MFEM_SHARED
double QDZ[NBZ][MQ1][MD1];
234 double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz);
237 MFEM_FOREACH_THREAD(d,y,D1D)
239 MFEM_FOREACH_THREAD(q,x,Q1D)
246 MFEM_FOREACH_THREAD(qx,x,Q1D)
248 MFEM_FOREACH_THREAD(dy,y,D1D)
251 for (
int qy = 0; qy < Q1D; ++qy)
253 QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e);
258 MFEM_FOREACH_THREAD(dy,y,D1D)
260 MFEM_FOREACH_THREAD(dx,x,D1D)
262 for (
int qx = 0; qx < Q1D; ++qx)
265 Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy];
272 template<
int T_D1D = 0,
int T_Q1D = 0>
273 static void PAMassAssembleDiagonal3D(
const int NE,
274 const Array<double> &b,
280 const int D1D = T_D1D ? T_D1D : d1d;
281 const int Q1D = T_Q1D ? T_Q1D : q1d;
282 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
283 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
284 auto B =
Reshape(b.Read(), Q1D, D1D);
285 auto D =
Reshape(d.Read(), Q1D, Q1D, Q1D, NE);
286 auto Y =
Reshape(y.ReadWrite(), D1D, D1D, D1D, NE);
289 const int D1D = T_D1D ? T_D1D : d1d;
290 const int Q1D = T_Q1D ? T_Q1D : q1d;
291 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
292 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
293 double QQD[MQ1][MQ1][MD1];
294 double QDD[MQ1][MD1][MD1];
295 for (
int qx = 0; qx < Q1D; ++qx)
297 for (
int qy = 0; qy < Q1D; ++qy)
299 for (
int dz = 0; dz < D1D; ++dz)
301 QQD[qx][qy][dz] = 0.0;
302 for (
int qz = 0; qz < Q1D; ++qz)
304 QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e);
309 for (
int qx = 0; qx < Q1D; ++qx)
311 for (
int dz = 0; dz < D1D; ++dz)
313 for (
int dy = 0; dy < D1D; ++dy)
315 QDD[qx][dy][dz] = 0.0;
316 for (
int qy = 0; qy < Q1D; ++qy)
318 QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz];
323 for (
int dz = 0; dz < D1D; ++dz)
325 for (
int dy = 0; dy < D1D; ++dy)
327 for (
int dx = 0; dx < D1D; ++dx)
330 for (
int qx = 0; qx < Q1D; ++qx)
332 t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz];
334 Y(dx, dy, dz, e) +=
t;
341 template<
int T_D1D = 0,
int T_Q1D = 0>
342 static void SmemPAMassAssembleDiagonal3D(
const int NE,
343 const Array<double> &b_,
349 const int D1D = T_D1D ? T_D1D : d1d;
350 const int Q1D = T_Q1D ? T_Q1D : q1d;
351 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
352 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
353 MFEM_VERIFY(D1D <= MD1,
"");
354 MFEM_VERIFY(Q1D <= MQ1,
"");
355 auto b =
Reshape(b_.Read(), Q1D, D1D);
356 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
357 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
358 MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
360 const int tidz = MFEM_THREAD_ID(z);
361 const int D1D = T_D1D ? T_D1D : d1d;
362 const int Q1D = T_Q1D ? T_Q1D : q1d;
363 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
364 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
365 MFEM_SHARED
double B[MQ1][MD1];
366 MFEM_SHARED
double QQD[MQ1][MQ1][MD1];
367 MFEM_SHARED
double QDD[MQ1][MD1][MD1];
370 MFEM_FOREACH_THREAD(d,y,D1D)
372 MFEM_FOREACH_THREAD(q,x,Q1D)
379 MFEM_FOREACH_THREAD(qx,x,Q1D)
381 MFEM_FOREACH_THREAD(qy,y,Q1D)
383 MFEM_FOREACH_THREAD(dz,z,D1D)
385 QQD[qx][qy][dz] = 0.0;
386 for (
int qz = 0; qz < Q1D; ++qz)
388 QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e);
394 MFEM_FOREACH_THREAD(qx,x,Q1D)
396 MFEM_FOREACH_THREAD(dz,z,D1D)
398 MFEM_FOREACH_THREAD(dy,y,D1D)
400 QDD[qx][dy][dz] = 0.0;
401 for (
int qy = 0; qy < Q1D; ++qy)
403 QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz];
409 MFEM_FOREACH_THREAD(dz,z,D1D)
411 MFEM_FOREACH_THREAD(dy,y,D1D)
413 MFEM_FOREACH_THREAD(dx,x,D1D)
416 for (
int qx = 0; qx < Q1D; ++qx)
418 t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz];
420 Y(dx, dy, dz, e) +=
t;
427 static void PAMassAssembleDiagonal(
const int dim,
const int D1D,
428 const int Q1D,
const int NE,
429 const Array<double> &B,
435 switch ((D1D << 4 ) | Q1D)
437 case 0x22:
return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y);
438 case 0x33:
return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y);
439 case 0x44:
return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y);
440 case 0x55:
return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y);
441 case 0x66:
return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y);
442 case 0x77:
return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y);
443 case 0x88:
return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y);
444 case 0x99:
return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y);
445 default:
return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D);
450 switch ((D1D << 4 ) | Q1D)
452 case 0x23:
return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y);
453 case 0x24:
return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y);
454 case 0x26:
return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y);
455 case 0x34:
return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y);
456 case 0x35:
return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y);
457 case 0x45:
return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y);
458 case 0x48:
return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y);
459 case 0x56:
return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y);
460 case 0x67:
return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y);
461 case 0x78:
return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y);
462 case 0x89:
return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y);
463 default:
return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D);
466 MFEM_ABORT(
"Unknown kernel.");
469 void MassIntegrator::AssembleDiagonalPA(
Vector &diag)
471 if (DeviceCanUseCeed())
473 ceedOp->GetDiagonal(diag);
477 PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag);
484 static void OccaPAMassApply2D(
const int D1D,
493 occa::properties props;
494 props[
"defines/D1D"] = D1D;
495 props[
"defines/Q1D"] = Q1D;
501 const occa_id_t id = std::make_pair(D1D,Q1D);
502 if (!Device::Allows(Backend::OCCA_CUDA))
505 if (OccaMassApply2D_cpu.find(
id) == OccaMassApply2D_cpu.end())
507 const occa::kernel MassApply2D_CPU =
509 "MassApply2D_CPU", props);
510 OccaMassApply2D_cpu.emplace(
id, MassApply2D_CPU);
512 OccaMassApply2D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
517 if (OccaMassApply2D_gpu.find(
id) == OccaMassApply2D_gpu.end())
519 const occa::kernel MassApply2D_GPU =
521 "MassApply2D_GPU", props);
522 OccaMassApply2D_gpu.emplace(
id, MassApply2D_GPU);
524 OccaMassApply2D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
529 static void OccaPAMassApply3D(
const int D1D,
532 const Array<double> &B,
533 const Array<double> &Bt,
538 occa::properties props;
539 props[
"defines/D1D"] = D1D;
540 props[
"defines/Q1D"] = Q1D;
542 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
546 const occa_id_t id = std::make_pair(D1D,Q1D);
547 if (!Device::Allows(Backend::OCCA_CUDA))
550 if (OccaMassApply3D_cpu.find(
id) == OccaMassApply3D_cpu.end())
552 const occa::kernel MassApply3D_CPU =
554 "MassApply3D_CPU", props);
555 OccaMassApply3D_cpu.emplace(
id, MassApply3D_CPU);
557 OccaMassApply3D_cpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
562 if (OccaMassApply3D_gpu.find(
id) == OccaMassApply3D_gpu.end())
564 const occa::kernel MassApply3D_GPU =
566 "MassApply3D_GPU", props);
567 OccaMassApply3D_gpu.emplace(
id, MassApply3D_GPU);
569 OccaMassApply3D_gpu.at(
id)(NE, o_B, o_Bt, o_D, o_X, o_Y);
572 #endif // MFEM_USE_OCCA
574 template<
int T_D1D = 0,
int T_Q1D = 0>
575 static void PAMassApply2D(
const int NE,
576 const Array<double> &b_,
577 const Array<double> &bt_,
584 const int D1D = T_D1D ? T_D1D : d1d;
585 const int Q1D = T_Q1D ? T_Q1D : q1d;
586 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
587 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
588 auto B =
Reshape(b_.Read(), Q1D, D1D);
589 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
590 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
591 auto X =
Reshape(x_.Read(), D1D, D1D, NE);
592 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
595 const int D1D = T_D1D ? T_D1D : d1d;
596 const int Q1D = T_Q1D ? T_Q1D : q1d;
598 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
599 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
600 double sol_xy[max_Q1D][max_Q1D];
601 for (
int qy = 0; qy < Q1D; ++qy)
603 for (
int qx = 0; qx < Q1D; ++qx)
605 sol_xy[qy][qx] = 0.0;
608 for (
int dy = 0; dy < D1D; ++dy)
610 double sol_x[max_Q1D];
611 for (
int qy = 0; qy < Q1D; ++qy)
615 for (
int dx = 0; dx < D1D; ++dx)
617 const double s = X(dx,dy,e);
618 for (
int qx = 0; qx < Q1D; ++qx)
620 sol_x[qx] += B(qx,dx)*
s;
623 for (
int qy = 0; qy < Q1D; ++qy)
625 const double d2q = B(qy,dy);
626 for (
int qx = 0; qx < Q1D; ++qx)
628 sol_xy[qy][qx] += d2q * sol_x[qx];
632 for (
int qy = 0; qy < Q1D; ++qy)
634 for (
int qx = 0; qx < Q1D; ++qx)
636 sol_xy[qy][qx] *= D(qx,qy,e);
639 for (
int qy = 0; qy < Q1D; ++qy)
641 double sol_x[max_D1D];
642 for (
int dx = 0; dx < D1D; ++dx)
646 for (
int qx = 0; qx < Q1D; ++qx)
648 const double s = sol_xy[qy][qx];
649 for (
int dx = 0; dx < D1D; ++dx)
651 sol_x[dx] += Bt(dx,qx) *
s;
654 for (
int dy = 0; dy < D1D; ++dy)
656 const double q2d = Bt(dy,qy);
657 for (
int dx = 0; dx < D1D; ++dx)
659 Y(dx,dy,e) += q2d * sol_x[dx];
666 template<
int T_D1D = 0,
int T_Q1D = 0,
int T_NBZ = 0>
667 static void SmemPAMassApply2D(
const int NE,
668 const Array<double> &b_,
669 const Array<double> &bt_,
676 MFEM_CONTRACT_VAR(bt_);
677 const int D1D = T_D1D ? T_D1D : d1d;
678 const int Q1D = T_Q1D ? T_Q1D : q1d;
679 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
680 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
681 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
682 MFEM_VERIFY(D1D <= MD1,
"");
683 MFEM_VERIFY(Q1D <= MQ1,
"");
684 auto b =
Reshape(b_.Read(), Q1D, D1D);
685 auto D =
Reshape(d_.Read(), Q1D, Q1D, NE);
686 auto x =
Reshape(x_.Read(), D1D, D1D, NE);
687 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, NE);
688 MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
690 const int tidz = MFEM_THREAD_ID(z);
691 const int D1D = T_D1D ? T_D1D : d1d;
692 const int Q1D = T_Q1D ? T_Q1D : q1d;
693 constexpr
int NBZ = T_NBZ ? T_NBZ : 1;
694 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
695 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
696 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
697 MFEM_SHARED
double BBt[MQ1*MD1];
698 double (*B)[MD1] = (double (*)[MD1]) BBt;
699 double (*Bt)[MQ1] = (double (*)[MQ1]) BBt;
700 MFEM_SHARED
double sm0[NBZ][MDQ*MDQ];
701 MFEM_SHARED
double sm1[NBZ][MDQ*MDQ];
702 double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz);
703 double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz);
704 double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz);
705 double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz);
706 MFEM_FOREACH_THREAD(dy,y,D1D)
708 MFEM_FOREACH_THREAD(dx,x,D1D)
710 X[dy][dx] = x(dx,dy,e);
715 MFEM_FOREACH_THREAD(dy,y,D1D)
717 MFEM_FOREACH_THREAD(q,x,Q1D)
724 MFEM_FOREACH_THREAD(dy,y,D1D)
726 MFEM_FOREACH_THREAD(qx,x,Q1D)
729 for (
int dx = 0; dx < D1D; ++dx)
731 dq += X[dy][dx] * B[qx][dx];
737 MFEM_FOREACH_THREAD(qy,y,Q1D)
739 MFEM_FOREACH_THREAD(qx,x,Q1D)
742 for (
int dy = 0; dy < D1D; ++dy)
744 qq += DQ[dy][qx] * B[qy][dy];
746 QQ[qy][qx] = qq * D(qx, qy, e);
752 MFEM_FOREACH_THREAD(dy,y,D1D)
754 MFEM_FOREACH_THREAD(q,x,Q1D)
761 MFEM_FOREACH_THREAD(qy,y,Q1D)
763 MFEM_FOREACH_THREAD(dx,x,D1D)
766 for (
int qx = 0; qx < Q1D; ++qx)
768 dq += QQ[qy][qx] * Bt[dx][qx];
774 MFEM_FOREACH_THREAD(dy,y,D1D)
776 MFEM_FOREACH_THREAD(dx,x,D1D)
779 for (
int qy = 0; qy < Q1D; ++qy)
781 dd += (QD[qy][dx] * Bt[dy][qy]);
789 template<
int T_D1D = 0,
int T_Q1D = 0>
790 static void PAMassApply3D(
const int NE,
791 const Array<double> &b_,
792 const Array<double> &bt_,
799 const int D1D = T_D1D ? T_D1D : d1d;
800 const int Q1D = T_Q1D ? T_Q1D : q1d;
801 MFEM_VERIFY(D1D <=
MAX_D1D,
"");
802 MFEM_VERIFY(Q1D <=
MAX_Q1D,
"");
803 auto B =
Reshape(b_.Read(), Q1D, D1D);
804 auto Bt =
Reshape(bt_.Read(), D1D, Q1D);
805 auto D =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
806 auto X =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
807 auto Y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
810 const int D1D = T_D1D ? T_D1D : d1d;
811 const int Q1D = T_Q1D ? T_Q1D : q1d;
812 constexpr
int max_D1D = T_D1D ? T_D1D :
MAX_D1D;
813 constexpr
int max_Q1D = T_Q1D ? T_Q1D :
MAX_Q1D;
814 double sol_xyz[max_Q1D][max_Q1D][max_Q1D];
815 for (
int qz = 0; qz < Q1D; ++qz)
817 for (
int qy = 0; qy < Q1D; ++qy)
819 for (
int qx = 0; qx < Q1D; ++qx)
821 sol_xyz[qz][qy][qx] = 0.0;
825 for (
int dz = 0; dz < D1D; ++dz)
827 double sol_xy[max_Q1D][max_Q1D];
828 for (
int qy = 0; qy < Q1D; ++qy)
830 for (
int qx = 0; qx < Q1D; ++qx)
832 sol_xy[qy][qx] = 0.0;
835 for (
int dy = 0; dy < D1D; ++dy)
837 double sol_x[max_Q1D];
838 for (
int qx = 0; qx < Q1D; ++qx)
842 for (
int dx = 0; dx < D1D; ++dx)
844 const double s = X(dx,dy,dz,e);
845 for (
int qx = 0; qx < Q1D; ++qx)
847 sol_x[qx] += B(qx,dx) *
s;
850 for (
int qy = 0; qy < Q1D; ++qy)
852 const double wy = B(qy,dy);
853 for (
int qx = 0; qx < Q1D; ++qx)
855 sol_xy[qy][qx] += wy * sol_x[qx];
859 for (
int qz = 0; qz < Q1D; ++qz)
861 const double wz = B(qz,dz);
862 for (
int qy = 0; qy < Q1D; ++qy)
864 for (
int qx = 0; qx < Q1D; ++qx)
866 sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx];
871 for (
int qz = 0; qz < Q1D; ++qz)
873 for (
int qy = 0; qy < Q1D; ++qy)
875 for (
int qx = 0; qx < Q1D; ++qx)
877 sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e);
881 for (
int qz = 0; qz < Q1D; ++qz)
883 double sol_xy[max_D1D][max_D1D];
884 for (
int dy = 0; dy < D1D; ++dy)
886 for (
int dx = 0; dx < D1D; ++dx)
891 for (
int qy = 0; qy < Q1D; ++qy)
893 double sol_x[max_D1D];
894 for (
int dx = 0; dx < D1D; ++dx)
898 for (
int qx = 0; qx < Q1D; ++qx)
900 const double s = sol_xyz[qz][qy][qx];
901 for (
int dx = 0; dx < D1D; ++dx)
903 sol_x[dx] += Bt(dx,qx) *
s;
906 for (
int dy = 0; dy < D1D; ++dy)
908 const double wy = Bt(dy,qy);
909 for (
int dx = 0; dx < D1D; ++dx)
911 sol_xy[dy][dx] += wy * sol_x[dx];
915 for (
int dz = 0; dz < D1D; ++dz)
917 const double wz = Bt(dz,qz);
918 for (
int dy = 0; dy < D1D; ++dy)
920 for (
int dx = 0; dx < D1D; ++dx)
922 Y(dx,dy,dz,e) += wz * sol_xy[dy][dx];
930 template<
int T_D1D = 0,
int T_Q1D = 0>
931 static void SmemPAMassApply3D(
const int NE,
932 const Array<double> &b_,
933 const Array<double> &bt_,
940 MFEM_CONTRACT_VAR(bt_);
941 const int D1D = T_D1D ? T_D1D : d1d;
942 const int Q1D = T_Q1D ? T_Q1D : q1d;
943 constexpr
int M1Q = T_Q1D ? T_Q1D :
MAX_Q1D;
944 constexpr
int M1D = T_D1D ? T_D1D :
MAX_D1D;
945 MFEM_VERIFY(D1D <= M1D,
"");
946 MFEM_VERIFY(Q1D <= M1Q,
"");
947 auto b =
Reshape(b_.Read(), Q1D, D1D);
948 auto d =
Reshape(d_.Read(), Q1D, Q1D, Q1D, NE);
949 auto x =
Reshape(x_.Read(), D1D, D1D, D1D, NE);
950 auto y =
Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE);
951 MFEM_FORALL_3D(e, NE, Q1D, Q1D, 1,
953 const int D1D = T_D1D ? T_D1D : d1d;
954 const int Q1D = T_Q1D ? T_Q1D : q1d;
955 constexpr
int MQ1 = T_Q1D ? T_Q1D :
MAX_Q1D;
956 constexpr
int MD1 = T_D1D ? T_D1D :
MAX_D1D;
957 constexpr
int MDQ = (MQ1 > MD1) ? MQ1 : MD1;
958 MFEM_SHARED
double sDQ[MQ1*MD1];
959 double (*B)[MD1] = (double (*)[MD1]) sDQ;
960 double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ;
961 MFEM_SHARED
double sm0[MDQ*MDQ*MDQ];
962 MFEM_SHARED
double sm1[MDQ*MDQ*MDQ];
963 double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0;
964 double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1;
965 double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0;
966 double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1;
967 double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0;
968 double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1;
969 MFEM_FOREACH_THREAD(dy,y,D1D)
971 MFEM_FOREACH_THREAD(dx,x,D1D)
974 for (
int dz = 0; dz < D1D; ++dz)
976 X[dz][dy][dx] = x(dx,dy,dz,e);
979 MFEM_FOREACH_THREAD(dx,x,Q1D)
981 B[dx][dy] =
b(dx,dy);
985 MFEM_FOREACH_THREAD(dy,y,D1D)
987 MFEM_FOREACH_THREAD(qx,x,Q1D)
991 for (
int dz = 0; dz < D1D; dz++)
996 for (
int dx = 0; dx < D1D; ++dx)
999 for (
int dz = 0; dz < D1D; ++dz)
1001 u[dz] += X[dz][dy][dx] * B[qx][dx];
1005 for (
int dz = 0; dz < D1D; ++dz)
1007 DDQ[dz][dy][qx] = u[dz];
1012 MFEM_FOREACH_THREAD(qy,y,Q1D)
1014 MFEM_FOREACH_THREAD(qx,x,Q1D)
1018 for (
int dz = 0; dz < D1D; dz++)
1023 for (
int dy = 0; dy < D1D; ++dy)
1026 for (
int dz = 0; dz < D1D; dz++)
1028 u[dz] += DDQ[dz][dy][qx] * B[qy][dy];
1032 for (
int dz = 0; dz < D1D; dz++)
1034 DQQ[dz][qy][qx] = u[dz];
1039 MFEM_FOREACH_THREAD(qy,y,Q1D)
1041 MFEM_FOREACH_THREAD(qx,x,Q1D)
1045 for (
int qz = 0; qz < Q1D; qz++)
1050 for (
int dz = 0; dz < D1D; ++dz)
1053 for (
int qz = 0; qz < Q1D; qz++)
1055 u[qz] += DQQ[dz][qy][qx] * B[qz][dz];
1059 for (
int qz = 0; qz < Q1D; qz++)
1061 QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e);
1066 MFEM_FOREACH_THREAD(d,y,D1D)
1068 MFEM_FOREACH_THREAD(q,x,Q1D)
1074 MFEM_FOREACH_THREAD(qy,y,Q1D)
1076 MFEM_FOREACH_THREAD(dx,x,D1D)
1080 for (
int qz = 0; qz < Q1D; ++qz)
1085 for (
int qx = 0; qx < Q1D; ++qx)
1088 for (
int qz = 0; qz < Q1D; ++qz)
1090 u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx];
1094 for (
int qz = 0; qz < Q1D; ++qz)
1096 QQD[qz][qy][dx] = u[qz];
1101 MFEM_FOREACH_THREAD(dy,y,D1D)
1103 MFEM_FOREACH_THREAD(dx,x,D1D)
1107 for (
int qz = 0; qz < Q1D; ++qz)
1112 for (
int qy = 0; qy < Q1D; ++qy)
1115 for (
int qz = 0; qz < Q1D; ++qz)
1117 u[qz] += QQD[qz][qy][dx] * Bt[dy][qy];
1121 for (
int qz = 0; qz < Q1D; ++qz)
1123 QDD[qz][dy][dx] = u[qz];
1128 MFEM_FOREACH_THREAD(dy,y,D1D)
1130 MFEM_FOREACH_THREAD(dx,x,D1D)
1134 for (
int dz = 0; dz < D1D; ++dz)
1139 for (
int qz = 0; qz < Q1D; ++qz)
1142 for (
int dz = 0; dz < D1D; ++dz)
1144 u[dz] += QDD[qz][dy][dx] * Bt[dz][qz];
1148 for (
int dz = 0; dz < D1D; ++dz)
1150 y(dx,dy,dz,e) += u[dz];
1157 static void PAMassApply(
const int dim,
1161 const Array<double> &B,
1162 const Array<double> &Bt,
1167 #ifdef MFEM_USE_OCCA
1172 return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y);
1176 return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y);
1178 MFEM_ABORT(
"OCCA PA Mass Apply unknown kernel!");
1180 #endif // MFEM_USE_OCCA
1181 const int id = (D1D << 4) | Q1D;
1187 case 0x22:
return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y);
1188 case 0x24:
return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y);
1189 case 0x33:
return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y);
1190 case 0x34:
return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y);
1191 case 0x35:
return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y);
1192 case 0x36:
return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y);
1193 case 0x44:
return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y);
1194 case 0x46:
return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y);
1195 case 0x48:
return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y);
1196 case 0x55:
return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y);
1197 case 0x57:
return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y);
1198 case 0x58:
return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y);
1199 case 0x66:
return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y);
1200 case 0x77:
return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y);
1201 case 0x88:
return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y);
1202 case 0x99:
return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y);
1203 default:
return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D);
1210 case 0x22:
return SmemPAMassApply3D<2,2>(NE,B,Bt,D,X,Y);
1211 case 0x23:
return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y);
1212 case 0x24:
return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y);
1213 case 0x26:
return SmemPAMassApply3D<2,6>(NE,B,Bt,D,X,Y);
1214 case 0x34:
return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y);
1215 case 0x35:
return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y);
1216 case 0x36:
return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y);
1217 case 0x37:
return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y);
1218 case 0x45:
return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y);
1219 case 0x46:
return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y);
1220 case 0x48:
return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y);
1221 case 0x56:
return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y);
1222 case 0x58:
return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y);
1223 case 0x67:
return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y);
1224 case 0x78:
return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y);
1225 case 0x89:
return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y);
1226 case 0x9A:
return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y);
1227 default:
return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D);
1230 mfem::out <<
"Unknown kernel 0x" << std::hex <<
id << std::endl;
1231 MFEM_ABORT(
"Unknown kernel.");
1236 if (DeviceCanUseCeed())
1238 ceedOp->AddMult(x, y);
1242 PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y);
int GetNPoints() const
Returns the number of the points in the integration rule.
Abstract class for all finite elements.
int Size() const
Return the logical size of the array.
Class for an integration rule - an Array of IntegrationPoint.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
A coefficient that is constant across space and time.
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
const IntegrationRule & GetElementIntRule(int idx) const
Get the IntegrationRule associated with mesh element idx.
void SetSize(int s)
Resize the vector to size s.
occa::device & OccaDev()
Return the default occa::device used by MFEM.
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
int Size() const
Returns the size of the vector.
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number...
int GetNE() const
Returns number of elements.
const Array< double > & GetWeights() const
Return the quadrature weights in a contiguous array.
int GetMapType() const
Returns the FiniteElement::MapType of the element describing how reference functions are mapped to ph...
std::map< occa_id_t, occa::kernel > occa_kernel_t
Memory< double > & GetMemory()
Return a reference to the Memory object used by the Vector.
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
IntegrationPoint & IntPoint(int i)
Returns a reference to the i-th integration point.
Quadrature function coefficient which requires that the quadrature rules used for this coefficient be...
Mesh * GetMesh() const
Returns the mesh.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
QuadratureSpace * GetSpace() const
Get the associated QuadratureSpace.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
ElementTransformation * GetElementTransformation(int i) const
Returns ElementTransformation for the i-th element.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
MemoryType
Memory types supported by MFEM.
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
void GetElementTransformation(int i, IsoparametricTransformation *ElTr)
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
void MakeRef(Vector &base, int offset, int size)
Reset the Vector to be a reference to a sub-vector of base.
std::pair< int, int > occa_id_t
double u(const Vector &xvec)
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Class representing a function through its values (scalar or vector) at quadrature points...
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).