47void PADiffusionSetup2D<2>(
const int Q1D,
56void PADiffusionSetup2D<3>(
const int Q1D,
64void PADiffusionSetup(
const int dim,
75 if (
dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADiffusionSetup"); }
81 OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
85 MFEM_CONTRACT_VAR(D1D);
87 if (sdim == 2) { PADiffusionSetup2D<2>(Q1D, coeffDim, NE, W, J, C, D); }
88 if (sdim == 3) { PADiffusionSetup2D<3>(Q1D, coeffDim, NE, W, J, C, D); }
95 OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
99 PADiffusionSetup3D(Q1D, coeffDim, NE, W, J, C, D);
104void PADiffusionSetup2D<2>(
const int Q1D,
107 const Array<real_t> &w,
112 const bool symmetric = (coeffDim != 4);
113 const bool const_c = c.Size() == 1;
114 MFEM_VERIFY(coeffDim < 3 ||
115 !const_c,
"Constant matrix coefficient not supported");
116 const auto W =
Reshape(w.Read(), Q1D,Q1D);
117 const auto J =
Reshape(j.Read(), Q1D,Q1D,2,2,NE);
118 const auto C = const_c ?
Reshape(c.Read(), 1,1,1,1) :
120 auto D =
Reshape(d.Write(), Q1D,Q1D, symmetric ? 3 : 4, NE);
123 MFEM_FOREACH_THREAD(qx,x,Q1D)
125 MFEM_FOREACH_THREAD(qy,y,Q1D)
127 const real_t J11 = J(qx,qy,0,0,e);
128 const real_t J21 = J(qx,qy,1,0,e);
129 const real_t J12 = J(qx,qy,0,1,e);
130 const real_t J22 = J(qx,qy,1,1,e);
131 const real_t w_detJ = W(qx,qy) / ((J11*J22)-(J21*J12));
132 if (coeffDim == 3 || coeffDim == 4)
135 const real_t M11 = C(0,qx,qy,e);
136 const real_t M12 = C(1,qx,qy,e);
137 const real_t M21 = symmetric ? M12 : C(2,qx,qy,e);
138 const real_t M22 = symmetric ? C(2,qx,qy,e) : C(3,qx,qy,e);
139 const real_t R11 = M11*J22 - M12*J12;
140 const real_t R21 = M21*J22 - M22*J12;
141 const real_t R12 = -M11*J21 + M12*J11;
142 const real_t R22 = -M21*J21 + M22*J11;
145 D(qx,qy,0,e) = w_detJ * ( J22*R11 - J12*R21);
146 D(qx,qy,1,e) = w_detJ * (-J21*R11 + J11*R21);
147 D(qx,qy,2,e) = w_detJ * (symmetric ? (-J21*R12 + J11*R22) :
148 (J22*R12 - J12*R22));
151 D(qx,qy,3,e) = w_detJ * (-J21*R12 + J11*R22);
156 const real_t C1 = const_c ? C(0,0,0,0) : C(0,qx,qy,e);
157 const real_t C2 = const_c ? C(0,0,0,0) :
158 (coeffDim == 2 ? C(1,qx,qy,e) : C(0,qx,qy,e));
160 D(qx,qy,0,e) = w_detJ * (C2*J12*J12 + C1*J22*J22);
161 D(qx,qy,1,e) = -w_detJ * (C2*J12*J11 + C1*J22*J21);
162 D(qx,qy,2,e) = w_detJ * (C2*J11*J11 + C1*J21*J21);
170void PADiffusionSetup2D<3>(
const int Q1D,
173 const Array<real_t> &w,
178 MFEM_VERIFY(coeffDim == 1,
"Matrix and vector coefficients not supported");
179 constexpr int DIM = 2;
180 constexpr int SDIM = 3;
181 const bool const_c = c.Size() == 1;
182 const auto W =
Reshape(w.Read(), Q1D,Q1D);
184 const auto C = const_c ?
Reshape(c.Read(), 1,1,1) :
186 auto D =
Reshape(d.Write(), Q1D,Q1D, 3, NE);
189 MFEM_FOREACH_THREAD(qx,x,Q1D)
191 MFEM_FOREACH_THREAD(qy,y,Q1D)
193 const real_t wq = W(qx,qy);
194 const real_t J11 = J(qx,qy,0,0,e);
195 const real_t J21 = J(qx,qy,1,0,e);
196 const real_t J31 = J(qx,qy,2,0,e);
197 const real_t J12 = J(qx,qy,0,1,e);
198 const real_t J22 = J(qx,qy,1,1,e);
199 const real_t J32 = J(qx,qy,2,1,e);
200 const real_t E = J11*J11 + J21*J21 + J31*J31;
201 const real_t G = J12*J12 + J22*J22 + J32*J32;
202 const real_t F = J11*J12 + J21*J22 + J31*J32;
203 const real_t iw = 1.0 / sqrt(E*G - F*F);
204 const real_t coeff = const_c ? C(0,0,0) : C(qx,qy,e);
206 D(qx,qy,0,e) =
alpha * G;
207 D(qx,qy,1,e) = -
alpha * F;
208 D(qx,qy,2,e) =
alpha * E;
214void PADiffusionSetup3D(
const int Q1D,
217 const Array<real_t> &w,
222 const bool symmetric = (coeffDim != 9);
223 const bool const_c = c.Size() == 1;
224 MFEM_VERIFY(coeffDim < 6 ||
225 !const_c,
"Constant matrix coefficient not supported");
226 const auto W =
Reshape(w.Read(), Q1D,Q1D,Q1D);
227 const auto J =
Reshape(j.Read(), Q1D,Q1D,Q1D,3,3,NE);
228 const auto C = const_c ?
Reshape(c.Read(), 1,1,1,1,1) :
230 auto D =
Reshape(d.Write(), Q1D,Q1D,Q1D, symmetric ? 6 : 9, NE);
233 MFEM_FOREACH_THREAD(qx,x,Q1D)
235 MFEM_FOREACH_THREAD(qy,y,Q1D)
237 MFEM_FOREACH_THREAD(qz,z,Q1D)
239 const real_t J11 = J(qx,qy,qz,0,0,e);
240 const real_t J21 = J(qx,qy,qz,1,0,e);
241 const real_t J31 = J(qx,qy,qz,2,0,e);
242 const real_t J12 = J(qx,qy,qz,0,1,e);
243 const real_t J22 = J(qx,qy,qz,1,1,e);
244 const real_t J32 = J(qx,qy,qz,2,1,e);
245 const real_t J13 = J(qx,qy,qz,0,2,e);
246 const real_t J23 = J(qx,qy,qz,1,2,e);
247 const real_t J33 = J(qx,qy,qz,2,2,e);
248 const real_t detJ = J11 * (J22 * J33 - J32 * J23) -
249 J21 * (J12 * J33 - J32 * J13) +
250 J31 * (J12 * J23 - J22 * J13);
251 const real_t w_detJ = W(qx,qy,qz) / detJ;
253 const real_t A11 = (J22 * J33) - (J23 * J32);
254 const real_t A12 = (J32 * J13) - (J12 * J33);
255 const real_t A13 = (J12 * J23) - (J22 * J13);
256 const real_t A21 = (J31 * J23) - (J21 * J33);
257 const real_t A22 = (J11 * J33) - (J13 * J31);
258 const real_t A23 = (J21 * J13) - (J11 * J23);
259 const real_t A31 = (J21 * J32) - (J31 * J22);
260 const real_t A32 = (J31 * J12) - (J11 * J32);
261 const real_t A33 = (J11 * J22) - (J12 * J21);
263 if (coeffDim == 6 || coeffDim == 9)
266 const real_t M11 = C(0, qx,qy,qz, e);
267 const real_t M12 = C(1, qx,qy,qz, e);
268 const real_t M13 = C(2, qx,qy,qz, e);
269 const real_t M21 = (!symmetric) ? C(3, qx,qy,qz, e) : M12;
270 const real_t M22 = (!symmetric) ? C(4, qx,qy,qz, e) : C(3, qx,qy,qz, e);
271 const real_t M23 = (!symmetric) ? C(5, qx,qy,qz, e) : C(4, qx,qy,qz, e);
272 const real_t M31 = (!symmetric) ? C(6, qx,qy,qz, e) : M13;
273 const real_t M32 = (!symmetric) ? C(7, qx,qy,qz, e) : M23;
274 const real_t M33 = (!symmetric) ? C(8, qx,qy,qz, e) : C(5, qx,qy,qz, e);
276 const real_t R11 = M11*A11 + M12*A12 + M13*A13;
277 const real_t R12 = M11*A21 + M12*A22 + M13*A23;
278 const real_t R13 = M11*A31 + M12*A32 + M13*A33;
279 const real_t R21 = M21*A11 + M22*A12 + M23*A13;
280 const real_t R22 = M21*A21 + M22*A22 + M23*A23;
281 const real_t R23 = M21*A31 + M22*A32 + M23*A33;
282 const real_t R31 = M31*A11 + M32*A12 + M33*A13;
283 const real_t R32 = M31*A21 + M32*A22 + M33*A23;
284 const real_t R33 = M31*A31 + M32*A32 + M33*A33;
287 D(qx,qy,qz,0,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31);
288 const real_t D12 = w_detJ * (A11*R12 + A12*R22 + A13*R32);
289 D(qx,qy,qz,1,e) = D12;
290 D(qx,qy,qz,2,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33);
292 const real_t D22 = w_detJ * (A21*R12 + A22*R22 + A23*R32);
293 const real_t D23 = w_detJ * (A21*R13 + A22*R23 + A23*R33);
295 const real_t D33 = w_detJ * (A31*R13 + A32*R23 + A33*R33);
297 D(qx,qy,qz,4,e) = symmetric ? D23 : D22;
298 D(qx,qy,qz,5,e) = symmetric ? D33 : D23;
302 D(qx,qy,qz,3,e) = D22;
306 D(qx,qy,qz,3,e) = w_detJ * (A21*R11 + A22*R21 + A23*R31);
307 D(qx,qy,qz,6,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31);
308 D(qx,qy,qz,7,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32);
309 D(qx,qy,qz,8,e) = D33;
314 const real_t C1 = const_c ? C(0,0,0,0,0) : C(0,qx,qy,qz,e);
315 const real_t C2 = const_c ? C(0,0,0,0,0) :
316 (coeffDim == 3 ? C(1,qx,qy,qz,e) : C(0,qx,qy,qz,e));
317 const real_t C3 = const_c ? C(0,0,0,0,0) :
318 (coeffDim == 3 ? C(2,qx,qy,qz,e) : C(0,qx,qy,qz,e));
321 D(qx,qy,qz,0,e) = w_detJ * (C1*A11*A11 + C2*A12*A12 + C3*A13*A13);
322 D(qx,qy,qz,1,e) = w_detJ * (C1*A11*A21 + C2*A12*A22 + C3*A13*A23);
323 D(qx,qy,qz,2,e) = w_detJ * (C1*A11*A31 + C2*A12*A32 + C3*A13*A33);
324 D(qx,qy,qz,3,e) = w_detJ * (C1*A21*A21 + C2*A22*A22 + C3*A23*A23);
325 D(qx,qy,qz,4,e) = w_detJ * (C1*A21*A31 + C2*A22*A32 + C3*A23*A33);
326 D(qx,qy,qz,5,e) = w_detJ * (C1*A31*A31 + C2*A32*A32 + C3*A33*A33);
335void OccaPADiffusionSetup2D(
const int D1D,
338 const Array<real_t> &W,
343 occa::properties props;
344 props[
"defines/D1D"] = D1D;
345 props[
"defines/Q1D"] = Q1D;
350 const bool const_c = C.Size() == 1;
351 const occa_id_t id = std::make_pair(D1D,Q1D);
353 if (OccaDiffSetup2D_ker.find(
id) == OccaDiffSetup2D_ker.end())
355 const occa::kernel DiffusionSetup2D =
357 "DiffusionSetup2D", props);
358 OccaDiffSetup2D_ker.emplace(
id, DiffusionSetup2D);
360 OccaDiffSetup2D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
363void OccaPADiffusionSetup3D(
const int D1D,
366 const Array<real_t> &W,
371 occa::properties props;
372 props[
"defines/D1D"] = D1D;
373 props[
"defines/Q1D"] = Q1D;
378 const bool const_c = C.Size() == 1;
379 const occa_id_t id = std::make_pair(D1D,Q1D);
381 if (OccaDiffSetup3D_ker.find(
id) == OccaDiffSetup3D_ker.end())
383 const occa::kernel DiffusionSetup3D =
385 "DiffusionSetup3D", props);
386 OccaDiffSetup3D_ker.emplace(
id, DiffusionSetup3D);
388 OccaDiffSetup3D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
391void OccaPADiffusionApply2D(
const int D1D,
394 const Array<real_t> &B,
395 const Array<real_t> &G,
396 const Array<real_t> &Bt,
397 const Array<real_t> &Gt,
402 occa::properties props;
403 props[
"defines/D1D"] = D1D;
404 props[
"defines/Q1D"] = Q1D;
407 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
408 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
412 const occa_id_t id = std::make_pair(D1D,Q1D);
413 if (!Device::Allows(Backend::OCCA_CUDA))
416 if (OccaDiffApply2D_cpu.find(
id) == OccaDiffApply2D_cpu.end())
418 const occa::kernel DiffusionApply2D_CPU =
420 "DiffusionApply2D_CPU", props);
421 OccaDiffApply2D_cpu.emplace(
id, DiffusionApply2D_CPU);
423 OccaDiffApply2D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
428 if (OccaDiffApply2D_gpu.find(
id) == OccaDiffApply2D_gpu.end())
430 const occa::kernel DiffusionApply2D_GPU =
432 "DiffusionApply2D_GPU", props);
433 OccaDiffApply2D_gpu.emplace(
id, DiffusionApply2D_GPU);
435 OccaDiffApply2D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
439void OccaPADiffusionApply3D(
const int D1D,
442 const Array<real_t> &B,
443 const Array<real_t> &G,
444 const Array<real_t> &Bt,
445 const Array<real_t> &Gt,
450 occa::properties props;
451 props[
"defines/D1D"] = D1D;
452 props[
"defines/Q1D"] = Q1D;
455 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
456 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
460 const occa_id_t id = std::make_pair(D1D,Q1D);
461 if (!Device::Allows(Backend::OCCA_CUDA))
464 if (OccaDiffApply3D_cpu.find(
id) == OccaDiffApply3D_cpu.end())
466 const occa::kernel DiffusionApply3D_CPU =
468 "DiffusionApply3D_CPU", props);
469 OccaDiffApply3D_cpu.emplace(
id, DiffusionApply3D_CPU);
471 OccaDiffApply3D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
476 if (OccaDiffApply3D_gpu.find(
id) == OccaDiffApply3D_gpu.end())
478 const occa::kernel DiffusionApply3D_GPU =
480 "DiffusionApply3D_GPU", props);
481 OccaDiffApply3D_gpu.emplace(
id, DiffusionApply3D_GPU);
483 OccaDiffApply3D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
static void AddSpecialization()
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass....
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass....
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D(int N, int X, int Y, lambda &&body)
std::map< occa_id_t, occa::kernel > occa_kernel_t
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass....
occa::device & OccaDev()
Return the default occa::device used by MFEM.
std::pair< int, int > occa_id_t