MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
qfunction_apply.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11#pragma once
12
13#include "util.hpp"
15
16namespace mfem::future
17{
18
19/// @brief Call a qfunction with the given parameters.
20///
21/// @param qfunc the qfunction to call.
22/// @param input_shmem the input shared memory.
23/// @param residual_shmem the residual shared memory.
24/// @param rs_qp the size of the residual.
25/// @param num_qp the number of quadrature points.
26/// @param q1d the number of quadrature points in 1D.
27/// @param dimension the spatial dimension.
28/// @param use_sum_factorization whether to use sum factorization.
29/// @tparam qf_param_ts the tuple type of the qfunction parameters.
30template <
31 typename qf_param_ts,
32 typename qfunc_t,
33 std::size_t num_fields>
34MFEM_HOST_DEVICE inline
36 qfunc_t &qfunc,
37 const std::array<DeviceTensor<2>, num_fields> &input_shmem,
38 DeviceTensor<2> &residual_shmem,
39 const int &rs_qp,
40 const int &num_qp,
41 const int &q1d,
42 const int &dimension,
43 const bool &use_sum_factorization)
44{
45 if (use_sum_factorization)
46 {
47 if (dimension == 1)
48 {
49 MFEM_FOREACH_THREAD_DIRECT(q, x, q1d)
50 {
51 auto qf_args = decay_tuple<qf_param_ts> {};
52 auto r = Reshape(&residual_shmem(0, q), rs_qp);
53 apply_kernel(r, qfunc, qf_args, input_shmem, q);
54 }
55 }
56 else if (dimension == 2)
57 {
58 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
59 {
60 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
61 {
62 const int q = qx + q1d * qy;
63 auto qf_args = decay_tuple<qf_param_ts> {};
64 auto r = Reshape(&residual_shmem(0, q), rs_qp);
65 apply_kernel(r, qfunc, qf_args, input_shmem, q);
66 }
67 }
68 }
69 else if (dimension == 3)
70 {
71 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
72 {
73 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
74 {
75 MFEM_FOREACH_THREAD_DIRECT(qz, z, q1d)
76 {
77 const int q = qx + q1d * (qy + q1d * qz);
78 auto qf_args = decay_tuple<qf_param_ts> {};
79 auto r = Reshape(&residual_shmem(0, q), rs_qp);
80 apply_kernel(r, qfunc, qf_args, input_shmem, q);
81 }
82 }
83 }
84 }
85 else
86 {
87#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
88 MFEM_ABORT("unsupported dimension for sum factorization");
89#endif
90 }
91 MFEM_SYNC_THREAD;
92 }
93 else
94 {
95 MFEM_FOREACH_THREAD_DIRECT(q, x, num_qp)
96 {
97 auto qf_args = decay_tuple<qf_param_ts> {};
98 auto r = Reshape(&residual_shmem(0, q), rs_qp);
99 apply_kernel(r, qfunc, qf_args, input_shmem, q);
100 }
101 }
102}
103
104/// @brief Call a qfunction with the given parameters and
105/// compute it's derivative action.
106///
107/// @param qfunc the qfunction to call.
108/// @param input_shmem the input shared memory.
109/// @param shadow_shmem the shadow shared memory.
110/// @param residual_shmem the residual shared memory.
111/// @param das_qp the size of the derivative action.
112/// @param num_qp the number of quadrature points.
113/// @param q1d the number of quadrature points in 1D.
114/// @param dimension the spatial dimension.
115/// @param use_sum_factorization whether to use sum factorization.
116/// @tparam qf_param_ts the tuple type of the qfunction parameters.
117template <
118 typename qf_param_ts,
119 typename qfunc_t,
120 std::size_t num_fields>
121MFEM_HOST_DEVICE inline
123 qfunc_t &qfunc,
124 const std::array<DeviceTensor<2>, num_fields> &input_shmem,
125 const std::array<DeviceTensor<2>, num_fields> &shadow_shmem,
126 DeviceTensor<2> &residual_shmem,
127 const int &das_qp,
128 const int &num_qp,
129 const int &q1d,
130 const int &dimension,
131 const bool &use_sum_factorization)
132{
133 if (use_sum_factorization)
134 {
135 if (dimension == 1)
136 {
137 MFEM_FOREACH_THREAD_DIRECT(q, x, q1d)
138 {
139 auto r = Reshape(&residual_shmem(0, q), das_qp);
140 auto qf_args = decay_tuple<qf_param_ts> {};
141#ifdef MFEM_USE_ENZYME
142 auto qf_shadow_args = decay_tuple<qf_param_ts> {};
143 apply_kernel_fwddiff_enzyme(r, qfunc, qf_args, qf_shadow_args, input_shmem,
144 shadow_shmem, q);
145#else
146 apply_kernel_native_dual(r, qfunc, qf_args, input_shmem, shadow_shmem, q);
147#endif
148 }
149 }
150 else if (dimension == 2)
151 {
152 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
153 {
154 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
155 {
156 const int q = qx + q1d * qy;
157 auto r = Reshape(&residual_shmem(0, q), das_qp);
158 auto qf_args = decay_tuple<qf_param_ts> {};
159#ifdef MFEM_USE_ENZYME
160 auto qf_shadow_args = decay_tuple<qf_param_ts> {};
161 apply_kernel_fwddiff_enzyme(r, qfunc, qf_args, qf_shadow_args, input_shmem,
162 shadow_shmem, q);
163#else
164 apply_kernel_native_dual(r, qfunc, qf_args, input_shmem, shadow_shmem, q);
165#endif
166 }
167 }
168 }
169 else if (dimension == 3)
170 {
171 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
172 {
173 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
174 {
175 MFEM_FOREACH_THREAD_DIRECT(qz, z, q1d)
176 {
177 const int q = qx + q1d * (qy + q1d * qz);
178 auto r = Reshape(&residual_shmem(0, q), das_qp);
179 auto qf_args = decay_tuple<qf_param_ts> {};
180#ifdef MFEM_USE_ENZYME
181 auto qf_shadow_args = decay_tuple<qf_param_ts> {};
182 apply_kernel_fwddiff_enzyme(r, qfunc, qf_args, qf_shadow_args, input_shmem,
183 shadow_shmem, q);
184#else
185 apply_kernel_native_dual(r, qfunc, qf_args, input_shmem, shadow_shmem, q);
186#endif
187 }
188 }
189 }
190 }
191 else
192 {
193 MFEM_ABORT_KERNEL("unsupported dimension");
194 }
195 }
196 else
197 {
198 MFEM_FOREACH_THREAD_DIRECT(q, x, num_qp)
199 {
200 auto r = Reshape(&residual_shmem(0, q), das_qp);
201 auto qf_args = decay_tuple<qf_param_ts> {};
202#ifdef MFEM_USE_ENZYME
203 auto qf_shadow_args = decay_tuple<qf_param_ts> {};
204 apply_kernel_fwddiff_enzyme(r, qfunc, qf_args, qf_shadow_args, input_shmem,
205 shadow_shmem, q);
206#else
207 apply_kernel_native_dual(r, qfunc, qf_args, input_shmem, shadow_shmem, q);
208#endif
209 }
210 }
211 MFEM_SYNC_THREAD;
212}
213
214namespace detail
215{
216template <
217 typename qf_param_ts,
218 typename qfunc_t,
219 std::size_t num_fields>
220MFEM_HOST_DEVICE inline
222 qfunc_t &qfunc,
223 const std::array<DeviceTensor<2>, num_fields> &input_shmem,
224 const std::array<DeviceTensor<2>, num_fields> &shadow_shmem,
225 DeviceTensor<2> &residual_shmem,
226 DeviceTensor<5> &qpdc,
228 const int &das_qp,
229 const int &q)
230{
231 const int test_vdim = qpdc.GetShape()[0];
232 const int test_op_dim = qpdc.GetShape()[1];
233 const int trial_vdim = qpdc.GetShape()[2];
234 const int num_qp = qpdc.GetShape()[4];
235 const size_t num_inputs = itod.GetShape()[0];
236
237 for (int j = 0; j < trial_vdim; j++)
238 {
239 int m_offset = 0;
240 for (size_t s = 0; s < num_inputs; s++)
241 {
242 const int trial_op_dim = static_cast<int>(itod(s));
243 if (trial_op_dim == 0)
244 {
245 continue;
246 }
247
248 auto d_qp = Reshape(&(shadow_shmem[s])[0], trial_vdim, trial_op_dim, num_qp);
249 for (int m = 0; m < trial_op_dim; m++)
250 {
251 d_qp(j, m, q) = 1.0;
252
253 auto r = Reshape(&residual_shmem(0, q), das_qp);
254 auto qf_args = decay_tuple<qf_param_ts> {};
255#ifdef MFEM_USE_ENZYME
256 auto qf_shadow_args = decay_tuple<qf_param_ts> {};
257 apply_kernel_fwddiff_enzyme(r, qfunc, qf_args, qf_shadow_args, input_shmem,
258 shadow_shmem, q);
259#else
260 apply_kernel_native_dual(r, qfunc, qf_args, input_shmem, shadow_shmem, q);
261#endif
262 d_qp(j, m, q) = 0.0;
263
264 auto f = Reshape(&r(0), test_vdim, test_op_dim);
265 for (int i = 0; i < test_vdim; i++)
266 {
267 for (int k = 0; k < test_op_dim; k++)
268 {
269 qpdc(i, k, j, m + m_offset, q) = f(i, k);
270 }
271 }
272 }
273 m_offset += trial_op_dim;
274 }
275 }
276}
277}
278
279/// @brief Call a qfunction with the given parameters and
280/// compute it's derivative represented by the Jacobian on
281/// each quadrature point.
282///
283/// @param qfunc the qfunction to call.
284/// @param input_shmem the input shared memory.
285/// @param shadow_shmem the shadow shared memory.
286/// @param residual_shmem the residual shared memory.
287/// @param qpdc the quadrature point data cache holding the resulting
288/// Jacobians on each quadrature point.
289/// @param itod inputs trial operator dimension.
290/// If input is dependent the value corresponds to the spatial dimension, otherwise
291/// a zero indicates non-dependence on the variable.
292/// @param das_qp the size of the derivative action.
293/// @param q1d the number of quadrature points in 1D.
294/// @param dimension the spatial dimension.
295/// @param use_sum_factorization whether to use sum factorization.
296/// @tparam qf_param_ts the tuple type of the qfunction parameters.
297template <
298 typename qf_param_ts,
299 typename qfunc_t,
300 std::size_t num_fields>
301MFEM_HOST_DEVICE inline
303 qfunc_t &qfunc,
304 const std::array<DeviceTensor<2>, num_fields> &input_shmem,
305 const std::array<DeviceTensor<2>, num_fields> &shadow_shmem,
306 DeviceTensor<2> &residual_shmem,
307 DeviceTensor<5> &qpdc,
309 const int &das_qp,
310 const int &q1d,
311 const int &dimension,
312 const bool &use_sum_factorization)
313{
314 if (use_sum_factorization)
315 {
316 if (dimension == 1)
317 {
318 MFEM_FOREACH_THREAD_DIRECT(q, x, q1d)
319 {
321 qfunc, input_shmem, shadow_shmem, residual_shmem, qpdc, itod, das_qp, q);
322 }
323 }
324 else if (dimension == 2)
325 {
326 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
327 {
328 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
329 {
330 const int q = qx + q1d * qy;
332 qfunc, input_shmem, shadow_shmem, residual_shmem, qpdc, itod, das_qp, q);
333 }
334 }
335 }
336 else if (dimension == 3)
337 {
338 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
339 {
340 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
341 {
342 MFEM_FOREACH_THREAD_DIRECT(qz, z, q1d)
343 {
344 const int q = qx + q1d * (qy + q1d * qz);
346 qfunc, input_shmem, shadow_shmem, residual_shmem, qpdc, itod, das_qp, q);
347 }
348 }
349 }
350 }
351 else
352 {
353 MFEM_ABORT_KERNEL("unsupported dimension");
354 }
355 }
356 else
357 {
358 const int num_qp = qpdc.GetShape()[4];
359 MFEM_FOREACH_THREAD_DIRECT(q, x, num_qp)
360 {
362 qfunc, input_shmem, shadow_shmem, residual_shmem, qpdc, itod, das_qp, q);
363 }
364 }
365 MFEM_SYNC_THREAD;
366}
367
368namespace detail
369{
370
371/// @brief Apply the quadrature point data cache (qpdc) to a vector
372/// (usually a direction) on quadrature point q.
373///
374/// The qpdc consists of compatible data to be used for integration with a test
375/// operator, e.g. Jacobians of a linearization from a FE operation with a trial
376/// function including integration weights and necessesary transformations.
377///
378/// @param fhat the qpdc applied to a vector in shadow_memory.
379/// @param shadow_shmem the shadow shared memory.
380/// @param qpdc the quadrature point data cache holding the resulting
381/// Jacobians on each quadrature point.
382/// @param itod inputs trial operator dimension.
383/// If input is dependent the value corresponds to the spatial dimension, otherwise
384/// a zero indicates non-dependence on the variable.
385/// @param q the current quadrature point index.
386template <size_t num_fields>
387MFEM_HOST_DEVICE inline
389 DeviceTensor<3> &fhat,
390 const std::array<DeviceTensor<2>, num_fields> &shadow_shmem,
393 const int &q)
394{
395 const int test_vdim = qpdc.GetShape()[0];
396 const int test_op_dim = qpdc.GetShape()[1];
397 const int trial_vdim = qpdc.GetShape()[2];
398 const int num_qp = qpdc.GetShape()[4];
399 const size_t num_inputs = itod.GetShape()[0];
400
401 for (int i = 0; i < test_vdim; i++)
402 {
403 for (int k = 0; k < test_op_dim; k++)
404 {
405 real_t sum = 0.0;
406 int m_offset = 0;
407 for (size_t s = 0; s < num_inputs; s++)
408 {
409 const int trial_op_dim = static_cast<int>(itod(s));
410 if (trial_op_dim == 0)
411 {
412 continue;
413 }
414 const auto d_qp =
415 Reshape(&(shadow_shmem[s])[0], trial_vdim, trial_op_dim, num_qp);
416 for (int j = 0; j < trial_vdim; j++)
417 {
418 for (int m = 0; m < trial_op_dim; m++)
419 {
420 sum += qpdc(i, k, j, m + m_offset, q) * d_qp(j, m, q);
421 }
422 }
423 m_offset += trial_op_dim;
424 }
425 fhat(i, k, q) = sum;
426 }
427 }
428}
429}
430
431/// @brief Apply the quadrature point data cache (qpdc) to a vector
432/// (usually a direction).
433///
434/// The qpdc consists of compatible data to be used for integration with a test
435/// operator, e.g. Jacobians of a linearization from a FE operation with a trial
436/// function including integration weights and necessesary transformations.
437///
438/// @param fhat the qpdc applied to a vector in shadow_memory.
439/// @param shadow_shmem the shadow shared memory.
440/// @param qpdc the quadrature point data cache holding the resulting
441/// Jacobians on each quadrature point.
442/// @param itod inputs trial operator dimension.
443/// If input is dependent the value corresponds to the spatial dimension, otherwise
444/// a zero indicates non-dependence on the variable.
445/// @param q1d number of quadrature points in 1D.
446/// @param dimension spatial dimension.
447/// @param use_sum_factorization whether to use sum factorization.
448template <size_t num_fields>
449MFEM_HOST_DEVICE inline
451 DeviceTensor<3> &fhat,
452 const std::array<DeviceTensor<2>, num_fields> &shadow_shmem,
455 const int &q1d,
456 const int &dimension,
457 const bool &use_sum_factorization)
458{
459 if (use_sum_factorization)
460 {
461 if (dimension == 1)
462 {
463 MFEM_FOREACH_THREAD_DIRECT(q, x, q1d)
464 {
465 detail::apply_qpdc(fhat, shadow_shmem, qpdc, itod, q);
466 }
467 }
468 else if (dimension == 2)
469 {
470 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
471 {
472 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
473 {
474 const int q = qx + q1d * qy;
475 detail::apply_qpdc(fhat, shadow_shmem, qpdc, itod, q);
476 }
477 }
478 }
479 else if (dimension == 3)
480 {
481 MFEM_FOREACH_THREAD_DIRECT(qx, x, q1d)
482 {
483 MFEM_FOREACH_THREAD_DIRECT(qy, y, q1d)
484 {
485 MFEM_FOREACH_THREAD_DIRECT(qz, z, q1d)
486 {
487 const int q = qx + q1d * (qy + q1d * qz);
488 detail::apply_qpdc(fhat, shadow_shmem, qpdc, itod, q);
489 }
490 }
491 }
492 }
493 else
494 {
495 MFEM_ABORT_KERNEL("unsupported dimension");
496 }
497 }
498 else
499 {
500 const int num_qp = qpdc.GetShape()[4];
501 MFEM_FOREACH_THREAD_DIRECT(q, x, num_qp)
502 {
503 detail::apply_qpdc(fhat, shadow_shmem, qpdc, itod, q);
504 }
505 }
506}
507
508template <typename qfunc_t, typename args_ts, size_t num_args>
509MFEM_HOST_DEVICE inline
512 const qfunc_t &qfunc,
513 args_ts &args,
514 const std::array<DeviceTensor<2>, num_args> &u,
515 int qp)
516{
517 process_qf_args(u, args, qp);
518 process_qf_result(f_qp, get<0>(apply(qfunc, args)));
519}
520
521template <typename qfunc_t, typename arg_ts, size_t num_args>
522MFEM_HOST_DEVICE inline
525 const qfunc_t &qfunc,
526 arg_ts &args,
527 const std::array<DeviceTensor<2>, num_args> &u,
528 const std::array<DeviceTensor<2>, num_args> &v,
529 const int &qp_idx)
530{
531 process_qf_args(u, v, args, qp_idx);
532 auto r = get<0>(apply(qfunc, args));
534}
535
536#ifdef MFEM_USE_ENZYME
537
538template <typename func_t, typename... arg_ts>
539MFEM_HOST_DEVICE inline
540auto qfunction_wrapper(const func_t &f, arg_ts &&...args)
541{
542 return f(args...);
543}
544
545// Version for active function arguments only
546//
547// This is an Enzyme regression and can be removed in later versions.
548template <typename qfunc_t, typename arg_ts, std::size_t... Is,
549 typename inactive_arg_ts>
550MFEM_HOST_DEVICE inline
551auto fwddiff_apply_enzyme_indexed(qfunc_t &qfunc, arg_ts &&args,
552 arg_ts &&shadow_args,
553 std::index_sequence<Is...>,
554 inactive_arg_ts &&inactive_args,
555 std::index_sequence<>)
556{
557 using qf_return_t = typename create_function_signature<
558 decltype(&qfunc_t::operator())>::type::return_t;
560 qfunction_wrapper<qfunc_t, decltype(get<Is>(args))...>, enzyme_const,
561 (void *)&qfunc, enzyme_dup, &get<Is>(args)..., enzyme_interleave,
562 &get<Is>(shadow_args)...);
563}
564
565// Interleave function arguments for enzyme
566template <typename qfunc_t, typename arg_ts, std::size_t... Is,
567 typename inactive_arg_ts, std::size_t... Js>
568MFEM_HOST_DEVICE inline
569auto fwddiff_apply_enzyme_indexed(qfunc_t &qfunc, arg_ts &&args,
570 arg_ts &&shadow_args,
571 std::index_sequence<Is...>,
572 inactive_arg_ts &&inactive_args,
573 std::index_sequence<Js...>)
574{
575 using qf_return_t = typename create_function_signature<
576 decltype(&qfunc_t::operator())>::type::return_t;
578 qfunction_wrapper<qfunc_t, decltype(get<Is>(args))...,
579 decltype(get<Js>(inactive_args))...>,
580 enzyme_const, (void *)&qfunc, enzyme_dup, &get<Is>(args)...,
581 enzyme_const, &get<Js>(inactive_args)..., enzyme_interleave,
582 &get<Is>(shadow_args)...);
583}
584
585template <typename qfunc_t, typename arg_ts, typename inactive_arg_ts>
586MFEM_HOST_DEVICE inline
587auto fwddiff_apply_enzyme(qfunc_t &qfunc, arg_ts &&args,
588 arg_ts &&shadow_args,
589 inactive_arg_ts &&inactive_args)
590{
591 auto arg_indices = std::make_index_sequence<
593
594 auto inactive_arg_indices = std::make_index_sequence<
596
597 return fwddiff_apply_enzyme_indexed(qfunc, args, shadow_args, arg_indices,
598 inactive_args, inactive_arg_indices);
599}
600
601template <typename qfunc_t, typename arg_ts, size_t num_args>
602MFEM_HOST_DEVICE inline
605 qfunc_t &qfunc,
606 arg_ts &args,
607 arg_ts &shadow_args,
608 const std::array<DeviceTensor<2>, num_args> &u,
609 const std::array<DeviceTensor<2>, num_args> &v,
610 int qp_idx)
611{
612 process_qf_args(u, args, qp_idx);
613 process_qf_args(v, shadow_args, qp_idx);
615 get<0>(fwddiff_apply_enzyme(qfunc, args, shadow_args, tuple<> {})));
616}
617#endif // MFEM_USE_ENZYME
618
619} // namespace mfem::future
A basic generic Tensor class, appropriate for use on the GPU.
Definition dtensor.hpp:84
MFEM_HOST_DEVICE auto & GetShape() const
Returns the shape of the tensor.
Definition dtensor.hpp:131
int enzyme_dup
int enzyme_interleave
MFEM_HOST_DEVICE return_type __enzyme_fwddiff(Args...)
int enzyme_const
constexpr int dimension
This example only works in 3D. Kernels for 2D are not implemented.
Definition hooke.cpp:45
MFEM_HOST_DEVICE void apply_qpdc(DeviceTensor< 3 > &fhat, const std::array< DeviceTensor< 2 >, num_fields > &shadow_shmem, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const int &q)
Apply the quadrature point data cache (qpdc) to a vector (usually a direction) on quadrature point q.
MFEM_HOST_DEVICE void call_qfunction_derivative(qfunc_t &qfunc, const std::array< DeviceTensor< 2 >, num_fields > &input_shmem, const std::array< DeviceTensor< 2 >, num_fields > &shadow_shmem, DeviceTensor< 2 > &residual_shmem, DeviceTensor< 5 > &qpdc, const DeviceTensor< 1, const real_t > &itod, const int &das_qp, const int &q)
MFEM_HOST_DEVICE auto qfunction_wrapper(const func_t &f, arg_ts &&...args)
MFEM_HOST_DEVICE auto fwddiff_apply_enzyme_indexed(qfunc_t &qfunc, arg_ts &&args, arg_ts &&shadow_args, std::index_sequence< Is... >, inactive_arg_ts &&inactive_args, std::index_sequence<>)
MFEM_HOST_DEVICE void process_derivative_from_native_dual(DeviceTensor< 1, T > &r, const tensor< dual< T, T >, n, m > &x)
MFEM_HOST_DEVICE void process_qf_result(DeviceTensor< 1, T > &r, const tensor< dual< T, T >, n > &x)
decltype(decay_types(std::declval< T >())) decay_tuple
Definition util.hpp:420
MFEM_HOST_DEVICE void call_qfunction_derivative_action(qfunc_t &qfunc, const std::array< DeviceTensor< 2 >, num_fields > &input_shmem, const std::array< DeviceTensor< 2 >, num_fields > &shadow_shmem, DeviceTensor< 2 > &residual_shmem, const int &das_qp, const int &num_qp, const int &q1d, const int &dimension, const bool &use_sum_factorization)
Call a qfunction with the given parameters and compute it's derivative action.
MFEM_HOST_DEVICE auto fwddiff_apply_enzyme(qfunc_t &qfunc, arg_ts &&args, arg_ts &&shadow_args, inactive_arg_ts &&inactive_args)
MFEM_HOST_DEVICE void apply_qpdc(DeviceTensor< 3 > &fhat, const std::array< DeviceTensor< 2 >, num_fields > &shadow_shmem, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const int &q1d, const int &dimension, const bool &use_sum_factorization)
Apply the quadrature point data cache (qpdc) to a vector (usually a direction).
MFEM_HOST_DEVICE auto apply(lambda f, tuple< T... > &args)
a way of passing an n-tuple to a function that expects n separate arguments
Definition tuple.hpp:783
MFEM_HOST_DEVICE void call_qfunction(qfunc_t &qfunc, const std::array< DeviceTensor< 2 >, num_fields > &input_shmem, DeviceTensor< 2 > &residual_shmem, const int &rs_qp, const int &num_qp, const int &q1d, const int &dimension, const bool &use_sum_factorization)
Call a qfunction with the given parameters.
MFEM_HOST_DEVICE void call_qfunction_derivative(qfunc_t &qfunc, const std::array< DeviceTensor< 2 >, num_fields > &input_shmem, const std::array< DeviceTensor< 2 >, num_fields > &shadow_shmem, DeviceTensor< 2 > &residual_shmem, DeviceTensor< 5 > &qpdc, const DeviceTensor< 1, const real_t > &itod, const int &das_qp, const int &q1d, const int &dimension, const bool &use_sum_factorization)
Call a qfunction with the given parameters and compute it's derivative represented by the Jacobian on...
MFEM_HOST_DEVICE void apply_kernel(DeviceTensor< 1, real_t > &f_qp, const qfunc_t &qfunc, args_ts &args, const std::array< DeviceTensor< 2 >, num_args > &u, int qp)
MFEM_HOST_DEVICE void apply_kernel_fwddiff_enzyme(DeviceTensor< 1, real_t > &f_qp, qfunc_t &qfunc, arg_ts &args, arg_ts &shadow_args, const std::array< DeviceTensor< 2 >, num_args > &u, const std::array< DeviceTensor< 2 >, num_args > &v, int qp_idx)
MFEM_HOST_DEVICE void apply_kernel_native_dual(DeviceTensor< 1, real_t > &f_qp, const qfunc_t &qfunc, arg_ts &args, const std::array< DeviceTensor< 2 >, num_args > &u, const std::array< DeviceTensor< 2 >, num_args > &v, const int &qp_idx)
MFEM_HOST_DEVICE zero & get(zero &x)
let zero be accessed like a tuple
Definition tensor.hpp:281
MFEM_HOST_DEVICE void process_qf_args(const std::array< DeviceTensor< 2 >, num_fields > &u, const std::array< DeviceTensor< 2 >, num_fields > &v, qf_args &args, const int &qp)
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
float real_t
Definition config.hpp:46
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
This is a class that mimics most of std::tuple's interface, except that it is usable in CUDA kernels ...
Definition tuple.hpp:49