19DiffusionIntegrator::Kernels::Kernels()
71void PADiffusionSetup2D<2>(
const int Q1D,
80void PADiffusionSetup2D<3>(
const int Q1D,
88void PADiffusionSetup(
const int dim,
99 if (
dim == 1) { MFEM_ABORT(
"dim==1 not supported in PADiffusionSetup"); }
105 OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
109 MFEM_CONTRACT_VAR(D1D);
111 if (sdim == 2) { PADiffusionSetup2D<2>(Q1D, coeffDim, NE, W, J, C, D); }
112 if (sdim == 3) { PADiffusionSetup2D<3>(Q1D, coeffDim, NE, W, J, C, D); }
119 OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
123 PADiffusionSetup3D(Q1D, coeffDim, NE, W, J, C, D);
128void PADiffusionSetup2D<2>(
const int Q1D,
131 const Array<real_t> &w,
136 const bool symmetric = (coeffDim != 4);
137 const bool const_c = c.Size() == coeffDim;
138 const auto W =
Reshape(w.Read(), Q1D,Q1D);
139 const auto J =
Reshape(j.Read(), Q1D,Q1D,2,2,NE);
140 const auto C = const_c ?
Reshape(c.Read(), coeffDim,1,1,1) :
142 auto D =
Reshape(d.Write(), Q1D,Q1D, symmetric ? 3 : 4, NE);
144 auto get_coeff = [const_c] MFEM_HOST_DEVICE
145 (
const decltype(C) &C,
int i,
int qx,
int qy,
int e)
147 return const_c ? C(i,0,0,0) : C(i,qx,qy,e);
152 MFEM_FOREACH_THREAD(qx,x,Q1D)
154 MFEM_FOREACH_THREAD(qy,y,Q1D)
156 const real_t J11 = J(qx,qy,0,0,e);
157 const real_t J21 = J(qx,qy,1,0,e);
158 const real_t J12 = J(qx,qy,0,1,e);
159 const real_t J22 = J(qx,qy,1,1,e);
160 const real_t w_detJ = W(qx,qy) / ((J11*J22)-(J21*J12));
161 if (coeffDim == 3 || coeffDim == 4)
164 const real_t M11 = get_coeff(C,0,qx,qy,e);
165 const real_t M12 = get_coeff(C,1,qx,qy,e);
166 const real_t M21 = symmetric ? M12 : get_coeff(C,2,qx,qy,e);
167 const real_t M22 = symmetric ? get_coeff(C,2,qx,qy,e)
168 : get_coeff(C,3,qx,qy,e);
169 const real_t R11 = M11*J22 - M12*J12;
170 const real_t R21 = M21*J22 - M22*J12;
171 const real_t R12 = -M11*J21 + M12*J11;
172 const real_t R22 = -M21*J21 + M22*J11;
175 D(qx,qy,0,e) = w_detJ * ( J22*R11 - J12*R21);
176 D(qx,qy,1,e) = w_detJ * (-J21*R11 + J11*R21);
177 D(qx,qy,2,e) = w_detJ * (symmetric ? (-J21*R12 + J11*R22) :
178 (J22*R12 - J12*R22));
181 D(qx,qy,3,e) = w_detJ * (-J21*R12 + J11*R22);
186 const real_t C1 = get_coeff(C,0,qx,qy,e);
187 const real_t C2 = get_coeff(C,coeffDim==2?1:0,qx,qy,e);
189 D(qx,qy,0,e) = w_detJ * (C2*J12*J12 + C1*J22*J22);
190 D(qx,qy,1,e) = -w_detJ * (C2*J12*J11 + C1*J22*J21);
191 D(qx,qy,2,e) = w_detJ * (C2*J11*J11 + C1*J21*J21);
199void PADiffusionSetup2D<3>(
const int Q1D,
202 const Array<real_t> &w,
207 MFEM_VERIFY(coeffDim == 1,
"Matrix and vector coefficients not supported");
208 constexpr int DIM = 2;
209 constexpr int SDIM = 3;
210 const bool const_c = c.Size() == 1;
211 const auto W =
Reshape(w.Read(), Q1D,Q1D);
213 const auto C = const_c ?
Reshape(c.Read(), 1,1,1) :
215 auto D =
Reshape(d.Write(), Q1D,Q1D, 3, NE);
218 MFEM_FOREACH_THREAD(qx,x,Q1D)
220 MFEM_FOREACH_THREAD(qy,y,Q1D)
222 const real_t wq = W(qx,qy);
223 const real_t J11 = J(qx,qy,0,0,e);
224 const real_t J21 = J(qx,qy,1,0,e);
225 const real_t J31 = J(qx,qy,2,0,e);
226 const real_t J12 = J(qx,qy,0,1,e);
227 const real_t J22 = J(qx,qy,1,1,e);
228 const real_t J32 = J(qx,qy,2,1,e);
229 const real_t E = J11*J11 + J21*J21 + J31*J31;
230 const real_t G = J12*J12 + J22*J22 + J32*J32;
231 const real_t F = J11*J12 + J21*J22 + J31*J32;
232 const real_t iw = 1.0 / std::sqrt(E*G - F*F);
233 const real_t coeff = const_c ? C(0,0,0) : C(qx,qy,e);
235 D(qx,qy,0,e) =
alpha * G;
236 D(qx,qy,1,e) = -
alpha * F;
237 D(qx,qy,2,e) =
alpha * E;
243void PADiffusionSetup3D(
const int Q1D,
246 const Array<real_t> &w,
251 const bool symmetric = (coeffDim != 9);
252 const bool const_c = c.Size() == coeffDim;
253 const auto W =
Reshape(w.Read(), Q1D,Q1D,Q1D);
254 const auto J =
Reshape(j.Read(), Q1D,Q1D,Q1D,3,3,NE);
255 const auto C = const_c ?
Reshape(c.Read(), coeffDim,1,1,1,1) :
257 auto D =
Reshape(d.Write(), Q1D,Q1D,Q1D, symmetric ? 6 : 9, NE);
259 auto get_coeff = [const_c] MFEM_HOST_DEVICE
260 (
const decltype(C) &C,
int i,
int qx,
int qy,
int qz,
int e)
262 return const_c ? C(i,0,0,0,0) : C(i,qx,qy,qz,e);
267 MFEM_FOREACH_THREAD(qx,x,Q1D)
269 MFEM_FOREACH_THREAD(qy,y,Q1D)
271 MFEM_FOREACH_THREAD(qz,z,Q1D)
273 const real_t J11 = J(qx,qy,qz,0,0,e);
274 const real_t J21 = J(qx,qy,qz,1,0,e);
275 const real_t J31 = J(qx,qy,qz,2,0,e);
276 const real_t J12 = J(qx,qy,qz,0,1,e);
277 const real_t J22 = J(qx,qy,qz,1,1,e);
278 const real_t J32 = J(qx,qy,qz,2,1,e);
279 const real_t J13 = J(qx,qy,qz,0,2,e);
280 const real_t J23 = J(qx,qy,qz,1,2,e);
281 const real_t J33 = J(qx,qy,qz,2,2,e);
282 const real_t detJ = J11 * (J22 * J33 - J32 * J23) -
283 J21 * (J12 * J33 - J32 * J13) +
284 J31 * (J12 * J23 - J22 * J13);
285 const real_t w_detJ = W(qx,qy,qz) / detJ;
287 const real_t A11 = (J22 * J33) - (J23 * J32);
288 const real_t A12 = (J32 * J13) - (J12 * J33);
289 const real_t A13 = (J12 * J23) - (J22 * J13);
290 const real_t A21 = (J31 * J23) - (J21 * J33);
291 const real_t A22 = (J11 * J33) - (J13 * J31);
292 const real_t A23 = (J21 * J13) - (J11 * J23);
293 const real_t A31 = (J21 * J32) - (J31 * J22);
294 const real_t A32 = (J31 * J12) - (J11 * J32);
295 const real_t A33 = (J11 * J22) - (J12 * J21);
297 if (coeffDim == 6 || coeffDim == 9)
300 const real_t M11 = get_coeff(C, 0, qx,qy,qz, e);
301 const real_t M12 = get_coeff(C, 1, qx,qy,qz, e);
302 const real_t M13 = get_coeff(C, 2, qx,qy,qz, e);
303 const real_t M21 = (!symmetric) ? get_coeff(C, 3, qx,qy,qz, e) : M12;
304 const real_t M22 = (!symmetric) ? get_coeff(C, 4, qx,qy,qz, e)
305 : get_coeff(C, 3, qx,qy,qz, e);
306 const real_t M23 = (!symmetric) ? get_coeff(C, 5, qx,qy,qz, e)
307 : get_coeff(C, 4, qx,qy,qz, e);
308 const real_t M31 = (!symmetric) ? get_coeff(C, 6, qx,qy,qz, e) : M13;
309 const real_t M32 = (!symmetric) ? get_coeff(C, 7, qx,qy,qz, e) : M23;
310 const real_t M33 = (!symmetric) ? get_coeff(C, 8, qx,qy,qz, e)
311 : get_coeff(C, 5, qx,qy,qz, e);
313 const real_t R11 = M11*A11 + M12*A12 + M13*A13;
314 const real_t R12 = M11*A21 + M12*A22 + M13*A23;
315 const real_t R13 = M11*A31 + M12*A32 + M13*A33;
316 const real_t R21 = M21*A11 + M22*A12 + M23*A13;
317 const real_t R22 = M21*A21 + M22*A22 + M23*A23;
318 const real_t R23 = M21*A31 + M22*A32 + M23*A33;
319 const real_t R31 = M31*A11 + M32*A12 + M33*A13;
320 const real_t R32 = M31*A21 + M32*A22 + M33*A23;
321 const real_t R33 = M31*A31 + M32*A32 + M33*A33;
324 D(qx,qy,qz,0,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31);
325 const real_t D12 = w_detJ * (A11*R12 + A12*R22 + A13*R32);
326 D(qx,qy,qz,1,e) = D12;
327 D(qx,qy,qz,2,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33);
329 const real_t D22 = w_detJ * (A21*R12 + A22*R22 + A23*R32);
330 const real_t D23 = w_detJ * (A21*R13 + A22*R23 + A23*R33);
332 const real_t D33 = w_detJ * (A31*R13 + A32*R23 + A33*R33);
334 D(qx,qy,qz,4,e) = symmetric ? D23 : D22;
335 D(qx,qy,qz,5,e) = symmetric ? D33 : D23;
339 D(qx,qy,qz,3,e) = D22;
343 D(qx,qy,qz,3,e) = w_detJ * (A21*R11 + A22*R21 + A23*R31);
344 D(qx,qy,qz,6,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31);
345 D(qx,qy,qz,7,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32);
346 D(qx,qy,qz,8,e) = D33;
351 const real_t C1 = get_coeff(C,0,qx,qy,qz,e);
352 const real_t C2 = get_coeff(C,coeffDim==3?1:0,qx,qy,qz,e);
353 const real_t C3 = get_coeff(C,coeffDim==3?2:0,qx,qy,qz,e);
356 D(qx,qy,qz,0,e) = w_detJ * (C1*A11*A11 + C2*A12*A12 + C3*A13*A13);
357 D(qx,qy,qz,1,e) = w_detJ * (C1*A11*A21 + C2*A12*A22 + C3*A13*A23);
358 D(qx,qy,qz,2,e) = w_detJ * (C1*A11*A31 + C2*A12*A32 + C3*A13*A33);
359 D(qx,qy,qz,3,e) = w_detJ * (C1*A21*A21 + C2*A22*A22 + C3*A23*A23);
360 D(qx,qy,qz,4,e) = w_detJ * (C1*A21*A31 + C2*A22*A32 + C3*A23*A33);
361 D(qx,qy,qz,5,e) = w_detJ * (C1*A31*A31 + C2*A32*A32 + C3*A33*A33);
370void OccaPADiffusionSetup2D(
const int D1D,
373 const Array<real_t> &W,
378 occa::properties props;
379 props[
"defines/D1D"] = D1D;
380 props[
"defines/Q1D"] = Q1D;
385 const bool const_c = C.Size() == 1;
386 const occa_id_t id = std::make_pair(D1D,Q1D);
388 if (OccaDiffSetup2D_ker.find(
id) == OccaDiffSetup2D_ker.end())
390 const occa::kernel DiffusionSetup2D =
392 "DiffusionSetup2D", props);
393 OccaDiffSetup2D_ker.emplace(
id, DiffusionSetup2D);
395 OccaDiffSetup2D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
398void OccaPADiffusionSetup3D(
const int D1D,
401 const Array<real_t> &W,
406 occa::properties props;
407 props[
"defines/D1D"] = D1D;
408 props[
"defines/Q1D"] = Q1D;
413 const bool const_c = C.Size() == 1;
414 const occa_id_t id = std::make_pair(D1D,Q1D);
416 if (OccaDiffSetup3D_ker.find(
id) == OccaDiffSetup3D_ker.end())
418 const occa::kernel DiffusionSetup3D =
420 "DiffusionSetup3D", props);
421 OccaDiffSetup3D_ker.emplace(
id, DiffusionSetup3D);
423 OccaDiffSetup3D_ker.at(
id)(NE, o_W, o_J, o_C, o_op, const_c);
426void OccaPADiffusionApply2D(
const int D1D,
429 const Array<real_t> &B,
430 const Array<real_t> &G,
431 const Array<real_t> &Bt,
432 const Array<real_t> &Gt,
437 occa::properties props;
438 props[
"defines/D1D"] = D1D;
439 props[
"defines/Q1D"] = Q1D;
442 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
443 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
447 const occa_id_t id = std::make_pair(D1D,Q1D);
448 if (!Device::Allows(Backend::OCCA_CUDA))
451 if (OccaDiffApply2D_cpu.find(
id) == OccaDiffApply2D_cpu.end())
453 const occa::kernel DiffusionApply2D_CPU =
455 "DiffusionApply2D_CPU", props);
456 OccaDiffApply2D_cpu.emplace(
id, DiffusionApply2D_CPU);
458 OccaDiffApply2D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
463 if (OccaDiffApply2D_gpu.find(
id) == OccaDiffApply2D_gpu.end())
465 const occa::kernel DiffusionApply2D_GPU =
467 "DiffusionApply2D_GPU", props);
468 OccaDiffApply2D_gpu.emplace(
id, DiffusionApply2D_GPU);
470 OccaDiffApply2D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
474void OccaPADiffusionApply3D(
const int D1D,
477 const Array<real_t> &B,
478 const Array<real_t> &G,
479 const Array<real_t> &Bt,
480 const Array<real_t> &Gt,
485 occa::properties props;
486 props[
"defines/D1D"] = D1D;
487 props[
"defines/Q1D"] = Q1D;
490 const occa::memory o_Bt =
OccaMemoryRead(Bt.GetMemory(), Bt.Size());
491 const occa::memory o_Gt =
OccaMemoryRead(Gt.GetMemory(), Gt.Size());
495 const occa_id_t id = std::make_pair(D1D,Q1D);
496 if (!Device::Allows(Backend::OCCA_CUDA))
499 if (OccaDiffApply3D_cpu.find(
id) == OccaDiffApply3D_cpu.end())
501 const occa::kernel DiffusionApply3D_CPU =
503 "DiffusionApply3D_CPU", props);
504 OccaDiffApply3D_cpu.emplace(
id, DiffusionApply3D_CPU);
506 OccaDiffApply3D_cpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
511 if (OccaDiffApply3D_gpu.find(
id) == OccaDiffApply3D_gpu.end())
513 const occa::kernel DiffusionApply3D_GPU =
515 "DiffusionApply3D_GPU", props);
516 OccaDiffApply3D_gpu.emplace(
id, DiffusionApply3D_GPU);
518 OccaDiffApply3D_gpu.at(
id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
static void AddSpecialization()
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass....
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass....
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
void forall_2D(int N, int X, int Y, lambda &&body)
std::map< occa_id_t, occa::kernel > occa_kernel_t
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass....
occa::device & OccaDev()
Return the default occa::device used by MFEM.
std::pair< int, int > occa_id_t