MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_diffusion_kernels.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13
14namespace mfem
15{
16
17// PA Diffusion Integrator
18
19DiffusionIntegrator::Kernels::Kernels()
20{
21 // 2D
22 // Q = P+1
32 // Q = P+2
42 // others
43 // 3D
44 // Q = P+1
53 // Q = P+2
62 // others
65}
66
67namespace internal
68{
69
70template<>
71void PADiffusionSetup2D<2>(const int Q1D,
72 const int coeffDim,
73 const int NE,
74 const Array<real_t> &w,
75 const Vector &j,
76 const Vector &c,
77 Vector &d);
78
79template<>
80void PADiffusionSetup2D<3>(const int Q1D,
81 const int coeffDim,
82 const int NE,
83 const Array<real_t> &w,
84 const Vector &j,
85 const Vector &c,
86 Vector &d);
87
88void PADiffusionSetup(const int dim,
89 const int sdim,
90 const int D1D,
91 const int Q1D,
92 const int coeffDim,
93 const int NE,
94 const Array<real_t> &W,
95 const Vector &J,
96 const Vector &C,
97 Vector &D)
98{
99 if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADiffusionSetup"); }
100 if (dim == 2)
101 {
102#ifdef MFEM_USE_OCCA
103 if (DeviceCanUseOcca())
104 {
105 OccaPADiffusionSetup2D(D1D, Q1D, NE, W, J, C, D);
106 return;
107 }
108#else
109 MFEM_CONTRACT_VAR(D1D);
110#endif // MFEM_USE_OCCA
111 if (sdim == 2) { PADiffusionSetup2D<2>(Q1D, coeffDim, NE, W, J, C, D); }
112 if (sdim == 3) { PADiffusionSetup2D<3>(Q1D, coeffDim, NE, W, J, C, D); }
113 }
114 if (dim == 3)
115 {
116#ifdef MFEM_USE_OCCA
117 if (DeviceCanUseOcca())
118 {
119 OccaPADiffusionSetup3D(D1D, Q1D, NE, W, J, C, D);
120 return;
121 }
122#endif // MFEM_USE_OCCA
123 PADiffusionSetup3D(Q1D, coeffDim, NE, W, J, C, D);
124 }
125}
126
127template<>
128void PADiffusionSetup2D<2>(const int Q1D,
129 const int coeffDim,
130 const int NE,
131 const Array<real_t> &w,
132 const Vector &j,
133 const Vector &c,
134 Vector &d)
135{
136 const bool symmetric = (coeffDim != 4);
137 const bool const_c = c.Size() == coeffDim;
138 const auto W = Reshape(w.Read(), Q1D,Q1D);
139 const auto J = Reshape(j.Read(), Q1D,Q1D,2,2,NE);
140 const auto C = const_c ? Reshape(c.Read(), coeffDim,1,1,1) :
141 Reshape(c.Read(), coeffDim,Q1D,Q1D,NE);
142 auto D = Reshape(d.Write(), Q1D,Q1D, symmetric ? 3 : 4, NE);
143
144 auto get_coeff = [const_c] MFEM_HOST_DEVICE
145 (const decltype(C) &C, int i, int qx, int qy, int e)
146 {
147 return const_c ? C(i,0,0,0) : C(i,qx,qy,e);
148 };
149
150 mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
151 {
152 MFEM_FOREACH_THREAD(qx,x,Q1D)
153 {
154 MFEM_FOREACH_THREAD(qy,y,Q1D)
155 {
156 const real_t J11 = J(qx,qy,0,0,e);
157 const real_t J21 = J(qx,qy,1,0,e);
158 const real_t J12 = J(qx,qy,0,1,e);
159 const real_t J22 = J(qx,qy,1,1,e);
160 const real_t w_detJ = W(qx,qy) / ((J11*J22)-(J21*J12));
161 if (coeffDim == 3 || coeffDim == 4) // Matrix coefficient
162 {
163 // First compute entries of R = MJ^{-T}, without det J factor.
164 const real_t M11 = get_coeff(C,0,qx,qy,e);
165 const real_t M12 = get_coeff(C,1,qx,qy,e);
166 const real_t M21 = symmetric ? M12 : get_coeff(C,2,qx,qy,e);
167 const real_t M22 = symmetric ? get_coeff(C,2,qx,qy,e)
168 : get_coeff(C,3,qx,qy,e);
169 const real_t R11 = M11*J22 - M12*J12;
170 const real_t R21 = M21*J22 - M22*J12;
171 const real_t R12 = -M11*J21 + M12*J11;
172 const real_t R22 = -M21*J21 + M22*J11;
173
174 // Now set y to J^{-1}R.
175 D(qx,qy,0,e) = w_detJ * ( J22*R11 - J12*R21); // 1,1
176 D(qx,qy,1,e) = w_detJ * (-J21*R11 + J11*R21); // 2,1
177 D(qx,qy,2,e) = w_detJ * (symmetric ? (-J21*R12 + J11*R22) :
178 (J22*R12 - J12*R22)); // 2,2 or 1,2
179 if (!symmetric)
180 {
181 D(qx,qy,3,e) = w_detJ * (-J21*R12 + J11*R22); // 2,2
182 }
183 }
184 else // Vector or scalar coefficient
185 {
186 const real_t C1 = get_coeff(C,0,qx,qy,e);
187 const real_t C2 = get_coeff(C,coeffDim==2?1:0,qx,qy,e);
188
189 D(qx,qy,0,e) = w_detJ * (C2*J12*J12 + C1*J22*J22); // 1,1
190 D(qx,qy,1,e) = -w_detJ * (C2*J12*J11 + C1*J22*J21); // 1,2
191 D(qx,qy,2,e) = w_detJ * (C2*J11*J11 + C1*J21*J21); // 2,2
192 }
193 }
194 }
195 });
196}
197
198template<>
199void PADiffusionSetup2D<3>(const int Q1D,
200 const int coeffDim,
201 const int NE,
202 const Array<real_t> &w,
203 const Vector &j,
204 const Vector &c,
205 Vector &d)
206{
207 MFEM_VERIFY(coeffDim == 1, "Matrix and vector coefficients not supported");
208 constexpr int DIM = 2;
209 constexpr int SDIM = 3;
210 const bool const_c = c.Size() == 1;
211 const auto W = Reshape(w.Read(), Q1D,Q1D);
212 const auto J = Reshape(j.Read(), Q1D,Q1D,SDIM,DIM,NE);
213 const auto C = const_c ? Reshape(c.Read(), 1,1,1) :
214 Reshape(c.Read(), Q1D,Q1D,NE);
215 auto D = Reshape(d.Write(), Q1D,Q1D, 3, NE);
216 mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
217 {
218 MFEM_FOREACH_THREAD(qx,x,Q1D)
219 {
220 MFEM_FOREACH_THREAD(qy,y,Q1D)
221 {
222 const real_t wq = W(qx,qy);
223 const real_t J11 = J(qx,qy,0,0,e);
224 const real_t J21 = J(qx,qy,1,0,e);
225 const real_t J31 = J(qx,qy,2,0,e);
226 const real_t J12 = J(qx,qy,0,1,e);
227 const real_t J22 = J(qx,qy,1,1,e);
228 const real_t J32 = J(qx,qy,2,1,e);
229 const real_t E = J11*J11 + J21*J21 + J31*J31;
230 const real_t G = J12*J12 + J22*J22 + J32*J32;
231 const real_t F = J11*J12 + J21*J22 + J31*J32;
232 const real_t iw = 1.0 / std::sqrt(E*G - F*F);
233 const real_t coeff = const_c ? C(0,0,0) : C(qx,qy,e);
234 const real_t alpha = wq * coeff * iw;
235 D(qx,qy,0,e) = alpha * G; // 1,1
236 D(qx,qy,1,e) = -alpha * F; // 1,2
237 D(qx,qy,2,e) = alpha * E; // 2,2
238 }
239 }
240 });
241}
242
243void PADiffusionSetup3D(const int Q1D,
244 const int coeffDim,
245 const int NE,
246 const Array<real_t> &w,
247 const Vector &j,
248 const Vector &c,
249 Vector &d)
250{
251 const bool symmetric = (coeffDim != 9);
252 const bool const_c = c.Size() == coeffDim;
253 const auto W = Reshape(w.Read(), Q1D,Q1D,Q1D);
254 const auto J = Reshape(j.Read(), Q1D,Q1D,Q1D,3,3,NE);
255 const auto C = const_c ? Reshape(c.Read(), coeffDim,1,1,1,1) :
256 Reshape(c.Read(), coeffDim,Q1D,Q1D,Q1D,NE);
257 auto D = Reshape(d.Write(), Q1D,Q1D,Q1D, symmetric ? 6 : 9, NE);
258
259 auto get_coeff = [const_c] MFEM_HOST_DEVICE
260 (const decltype(C) &C, int i, int qx, int qy, int qz, int e)
261 {
262 return const_c ? C(i,0,0,0,0) : C(i,qx,qy,qz,e);
263 };
264
265 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
266 {
267 MFEM_FOREACH_THREAD(qx,x,Q1D)
268 {
269 MFEM_FOREACH_THREAD(qy,y,Q1D)
270 {
271 MFEM_FOREACH_THREAD(qz,z,Q1D)
272 {
273 const real_t J11 = J(qx,qy,qz,0,0,e);
274 const real_t J21 = J(qx,qy,qz,1,0,e);
275 const real_t J31 = J(qx,qy,qz,2,0,e);
276 const real_t J12 = J(qx,qy,qz,0,1,e);
277 const real_t J22 = J(qx,qy,qz,1,1,e);
278 const real_t J32 = J(qx,qy,qz,2,1,e);
279 const real_t J13 = J(qx,qy,qz,0,2,e);
280 const real_t J23 = J(qx,qy,qz,1,2,e);
281 const real_t J33 = J(qx,qy,qz,2,2,e);
282 const real_t detJ = J11 * (J22 * J33 - J32 * J23) -
283 J21 * (J12 * J33 - J32 * J13) +
284 J31 * (J12 * J23 - J22 * J13);
285 const real_t w_detJ = W(qx,qy,qz) / detJ;
286 // adj(J)
287 const real_t A11 = (J22 * J33) - (J23 * J32);
288 const real_t A12 = (J32 * J13) - (J12 * J33);
289 const real_t A13 = (J12 * J23) - (J22 * J13);
290 const real_t A21 = (J31 * J23) - (J21 * J33);
291 const real_t A22 = (J11 * J33) - (J13 * J31);
292 const real_t A23 = (J21 * J13) - (J11 * J23);
293 const real_t A31 = (J21 * J32) - (J31 * J22);
294 const real_t A32 = (J31 * J12) - (J11 * J32);
295 const real_t A33 = (J11 * J22) - (J12 * J21);
296
297 if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
298 {
299 // Compute entries of R = MJ^{-T} = M adj(J)^T, without det J.
300 const real_t M11 = get_coeff(C, 0, qx,qy,qz, e);
301 const real_t M12 = get_coeff(C, 1, qx,qy,qz, e);
302 const real_t M13 = get_coeff(C, 2, qx,qy,qz, e);
303 const real_t M21 = (!symmetric) ? get_coeff(C, 3, qx,qy,qz, e) : M12;
304 const real_t M22 = (!symmetric) ? get_coeff(C, 4, qx,qy,qz, e)
305 : get_coeff(C, 3, qx,qy,qz, e);
306 const real_t M23 = (!symmetric) ? get_coeff(C, 5, qx,qy,qz, e)
307 : get_coeff(C, 4, qx,qy,qz, e);
308 const real_t M31 = (!symmetric) ? get_coeff(C, 6, qx,qy,qz, e) : M13;
309 const real_t M32 = (!symmetric) ? get_coeff(C, 7, qx,qy,qz, e) : M23;
310 const real_t M33 = (!symmetric) ? get_coeff(C, 8, qx,qy,qz, e)
311 : get_coeff(C, 5, qx,qy,qz, e);
312
313 const real_t R11 = M11*A11 + M12*A12 + M13*A13;
314 const real_t R12 = M11*A21 + M12*A22 + M13*A23;
315 const real_t R13 = M11*A31 + M12*A32 + M13*A33;
316 const real_t R21 = M21*A11 + M22*A12 + M23*A13;
317 const real_t R22 = M21*A21 + M22*A22 + M23*A23;
318 const real_t R23 = M21*A31 + M22*A32 + M23*A33;
319 const real_t R31 = M31*A11 + M32*A12 + M33*A13;
320 const real_t R32 = M31*A21 + M32*A22 + M33*A23;
321 const real_t R33 = M31*A31 + M32*A32 + M33*A33;
322
323 // Now set D to J^{-1} R = adj(J) R
324 D(qx,qy,qz,0,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31); // 1,1
325 const real_t D12 = w_detJ * (A11*R12 + A12*R22 + A13*R32);
326 D(qx,qy,qz,1,e) = D12; // 1,2
327 D(qx,qy,qz,2,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33); // 1,3
328
329 const real_t D22 = w_detJ * (A21*R12 + A22*R22 + A23*R32);
330 const real_t D23 = w_detJ * (A21*R13 + A22*R23 + A23*R33);
331
332 const real_t D33 = w_detJ * (A31*R13 + A32*R23 + A33*R33);
333
334 D(qx,qy,qz,4,e) = symmetric ? D23 : D22; // 2,3 or 2,2
335 D(qx,qy,qz,5,e) = symmetric ? D33 : D23; // 3,3 or 2,3
336
337 if (symmetric)
338 {
339 D(qx,qy,qz,3,e) = D22; // 2,2
340 }
341 else
342 {
343 D(qx,qy,qz,3,e) = w_detJ * (A21*R11 + A22*R21 + A23*R31); // 2,1
344 D(qx,qy,qz,6,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31); // 3,1
345 D(qx,qy,qz,7,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32); // 3,2
346 D(qx,qy,qz,8,e) = D33; // 3,3
347 }
348 }
349 else // Vector or scalar coefficient version
350 {
351 const real_t C1 = get_coeff(C,0,qx,qy,qz,e);
352 const real_t C2 = get_coeff(C,coeffDim==3?1:0,qx,qy,qz,e);
353 const real_t C3 = get_coeff(C,coeffDim==3?2:0,qx,qy,qz,e);
354
355 // detJ J^{-1} J^{-T} = (1/detJ) adj(J) adj(J)^T
356 D(qx,qy,qz,0,e) = w_detJ * (C1*A11*A11 + C2*A12*A12 + C3*A13*A13); // 1,1
357 D(qx,qy,qz,1,e) = w_detJ * (C1*A11*A21 + C2*A12*A22 + C3*A13*A23); // 2,1
358 D(qx,qy,qz,2,e) = w_detJ * (C1*A11*A31 + C2*A12*A32 + C3*A13*A33); // 3,1
359 D(qx,qy,qz,3,e) = w_detJ * (C1*A21*A21 + C2*A22*A22 + C3*A23*A23); // 2,2
360 D(qx,qy,qz,4,e) = w_detJ * (C1*A21*A31 + C2*A22*A32 + C3*A23*A33); // 3,2
361 D(qx,qy,qz,5,e) = w_detJ * (C1*A31*A31 + C2*A32*A32 + C3*A33*A33); // 3,3
362 }
363 }
364 }
365 }
366 });
367}
368
369#ifdef MFEM_USE_OCCA
370void OccaPADiffusionSetup2D(const int D1D,
371 const int Q1D,
372 const int NE,
373 const Array<real_t> &W,
374 const Vector &J,
375 const Vector &C,
376 Vector &op)
377{
378 occa::properties props;
379 props["defines/D1D"] = D1D;
380 props["defines/Q1D"] = Q1D;
381 const occa::memory o_W = OccaMemoryRead(W.GetMemory(), W.Size());
382 const occa::memory o_J = OccaMemoryRead(J.GetMemory(), J.Size());
383 const occa::memory o_C = OccaMemoryRead(C.GetMemory(), C.Size());
384 occa::memory o_op = OccaMemoryWrite(op.GetMemory(), op.Size());
385 const bool const_c = C.Size() == 1;
386 const occa_id_t id = std::make_pair(D1D,Q1D);
387 static occa_kernel_t OccaDiffSetup2D_ker;
388 if (OccaDiffSetup2D_ker.find(id) == OccaDiffSetup2D_ker.end())
389 {
390 const occa::kernel DiffusionSetup2D =
391 mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
392 "DiffusionSetup2D", props);
393 OccaDiffSetup2D_ker.emplace(id, DiffusionSetup2D);
394 }
395 OccaDiffSetup2D_ker.at(id)(NE, o_W, o_J, o_C, o_op, const_c);
396}
397
398void OccaPADiffusionSetup3D(const int D1D,
399 const int Q1D,
400 const int NE,
401 const Array<real_t> &W,
402 const Vector &J,
403 const Vector &C,
404 Vector &op)
405{
406 occa::properties props;
407 props["defines/D1D"] = D1D;
408 props["defines/Q1D"] = Q1D;
409 const occa::memory o_W = OccaMemoryRead(W.GetMemory(), W.Size());
410 const occa::memory o_J = OccaMemoryRead(J.GetMemory(), J.Size());
411 const occa::memory o_C = OccaMemoryRead(C.GetMemory(), C.Size());
412 occa::memory o_op = OccaMemoryWrite(op.GetMemory(), op.Size());
413 const bool const_c = C.Size() == 1;
414 const occa_id_t id = std::make_pair(D1D,Q1D);
415 static occa_kernel_t OccaDiffSetup3D_ker;
416 if (OccaDiffSetup3D_ker.find(id) == OccaDiffSetup3D_ker.end())
417 {
418 const occa::kernel DiffusionSetup3D =
419 mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
420 "DiffusionSetup3D", props);
421 OccaDiffSetup3D_ker.emplace(id, DiffusionSetup3D);
422 }
423 OccaDiffSetup3D_ker.at(id)(NE, o_W, o_J, o_C, o_op, const_c);
424}
425
426void OccaPADiffusionApply2D(const int D1D,
427 const int Q1D,
428 const int NE,
429 const Array<real_t> &B,
430 const Array<real_t> &G,
431 const Array<real_t> &Bt,
432 const Array<real_t> &Gt,
433 const Vector &D,
434 const Vector &X,
435 Vector &Y)
436{
437 occa::properties props;
438 props["defines/D1D"] = D1D;
439 props["defines/Q1D"] = Q1D;
440 const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
441 const occa::memory o_G = OccaMemoryRead(G.GetMemory(), G.Size());
442 const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
443 const occa::memory o_Gt = OccaMemoryRead(Gt.GetMemory(), Gt.Size());
444 const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
445 const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
446 occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
447 const occa_id_t id = std::make_pair(D1D,Q1D);
448 if (!Device::Allows(Backend::OCCA_CUDA))
449 {
450 static occa_kernel_t OccaDiffApply2D_cpu;
451 if (OccaDiffApply2D_cpu.find(id) == OccaDiffApply2D_cpu.end())
452 {
453 const occa::kernel DiffusionApply2D_CPU =
454 mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
455 "DiffusionApply2D_CPU", props);
456 OccaDiffApply2D_cpu.emplace(id, DiffusionApply2D_CPU);
457 }
458 OccaDiffApply2D_cpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
459 }
460 else
461 {
462 static occa_kernel_t OccaDiffApply2D_gpu;
463 if (OccaDiffApply2D_gpu.find(id) == OccaDiffApply2D_gpu.end())
464 {
465 const occa::kernel DiffusionApply2D_GPU =
466 mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
467 "DiffusionApply2D_GPU", props);
468 OccaDiffApply2D_gpu.emplace(id, DiffusionApply2D_GPU);
469 }
470 OccaDiffApply2D_gpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
471 }
472}
473
474void OccaPADiffusionApply3D(const int D1D,
475 const int Q1D,
476 const int NE,
477 const Array<real_t> &B,
478 const Array<real_t> &G,
479 const Array<real_t> &Bt,
480 const Array<real_t> &Gt,
481 const Vector &D,
482 const Vector &X,
483 Vector &Y)
484{
485 occa::properties props;
486 props["defines/D1D"] = D1D;
487 props["defines/Q1D"] = Q1D;
488 const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size());
489 const occa::memory o_G = OccaMemoryRead(G.GetMemory(), G.Size());
490 const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size());
491 const occa::memory o_Gt = OccaMemoryRead(Gt.GetMemory(), Gt.Size());
492 const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size());
493 const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size());
494 occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size());
495 const occa_id_t id = std::make_pair(D1D,Q1D);
496 if (!Device::Allows(Backend::OCCA_CUDA))
497 {
498 static occa_kernel_t OccaDiffApply3D_cpu;
499 if (OccaDiffApply3D_cpu.find(id) == OccaDiffApply3D_cpu.end())
500 {
501 const occa::kernel DiffusionApply3D_CPU =
502 mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
503 "DiffusionApply3D_CPU", props);
504 OccaDiffApply3D_cpu.emplace(id, DiffusionApply3D_CPU);
505 }
506 OccaDiffApply3D_cpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
507 }
508 else
509 {
510 static occa_kernel_t OccaDiffApply3D_gpu;
511 if (OccaDiffApply3D_gpu.find(id) == OccaDiffApply3D_gpu.end())
512 {
513 const occa::kernel DiffusionApply3D_GPU =
514 mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl",
515 "DiffusionApply3D_GPU", props);
516 OccaDiffApply3D_gpu.emplace(id, DiffusionApply3D_GPU);
517 }
518 OccaDiffApply3D_gpu.at(id)(NE, o_B, o_G, o_Bt, o_Gt, o_D, o_X, o_Y);
519 }
520}
521#endif // MFEM_USE_OCCA
522
523} // namespace internal
524
525} // namespace mfem
static void AddSpecialization()
Vector data type.
Definition vector.hpp:82
const real_t alpha
Definition ex15.cpp:369
int dim
Definition ex24.cpp:53
constexpr int SDIM
constexpr int DIM
mfem::real_t real_t
occa::memory OccaMemoryReadWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read-write access with the mfem::Device MemoryClass....
Definition occa.hpp:59
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:348
occa::memory OccaMemoryWrite(Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for write only access with the mfem::Device MemoryClass....
Definition occa.hpp:48
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:925
std::map< occa_id_t, occa::kernel > occa_kernel_t
Definition occa.hpp:79
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:937
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition occa.hpp:69
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass....
Definition occa.hpp:37
occa::device & OccaDev()
Return the default occa::device used by MFEM.
Definition occa.cpp:27
std::pair< int, int > occa_id_t
Definition occa.hpp:78