4.5.2/tensor_8hpp_source.html

 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
 // LICENSE and NOTICE for details. LLNL-CODE-806117.
 //
 // This file is part of the MFEM library. For more information and source code
 // availability visit https://mfem.org.
 //
 // MFEM is free software; you can redistribute it and/or modify it under the
 // terms of the BSD-3 license. We welcome feedback and contributions, see file
 // CONTRIBUTING.md for details.

 /**
  * @file tensor.hpp
  *
  * @brief Implementation of the tensor class
  */

 #ifndef MFEM_INTERNAL_TENSOR_HPP
 #define MFEM_INTERNAL_TENSOR_HPP

 #include "dual.hpp"
 #include <type_traits> // for std::false_type

 namespace mfem
 {
 namespace internal
 {

 #if defined(__CUDACC__)
 #if __CUDAVER__ >= 75000
 #define MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
 #else
 #define MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
 #endif
 #else  //__CUDACC__
 #define MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
 #endif

 template <typename T, int... n>
 struct tensor;

 /// The implementation can be drastically generalized by using concepts of the
 /// c++17 standard.

 template < typename T >
 struct tensor<T>
 {
    using type = T;
    static constexpr int ndim      = 1;
    static constexpr int first_dim = 0;
    MFEM_HOST_DEVICE T& operator[](int /*unused*/) { return values; }
    MFEM_HOST_DEVICE const T& operator[](int /*unused*/) const { return values; }
    MFEM_HOST_DEVICE T& operator()(int /*unused*/) { return values; }
    MFEM_HOST_DEVICE const T& operator()(int /*unused*/) const { return values; }
    MFEM_HOST_DEVICE operator T() const { return values; }
    T values;
 };

 template < typename T, int n0 >
 struct tensor<T, n0>
 {
    using type = T;
    static constexpr int ndim      = 1;
    static constexpr int first_dim = n0;
    MFEM_HOST_DEVICE T& operator[](int i) { return values[i]; }
    MFEM_HOST_DEVICE const T& operator[](int i) const { return values[i]; }
    MFEM_HOST_DEVICE T& operator()(int i) { return values[i]; }
    MFEM_HOST_DEVICE const T& operator()(int i) const { return values[i]; }
    T values[n0];
 };

 template < typename T, int n0, int n1 >
 struct tensor<T, n0, n1>
 {
    using type = T;
    static constexpr int ndim      = 2;
    static constexpr int first_dim = n0;
    MFEM_HOST_DEVICE tensor< T, n1 >& operator[](int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1 >& operator[](int i) const { return values[i]; }
    MFEM_HOST_DEVICE tensor< T, n1 >& operator()(int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1 >& operator()(int i) const { return values[i]; }
    MFEM_HOST_DEVICE T& operator()(int i, int j) { return values[i][j]; }
    MFEM_HOST_DEVICE const T& operator()(int i, int j) const { return values[i][j]; }
    tensor < T, n1 > values[n0];
 };

 template < typename T, int n0, int n1, int n2 >
 struct tensor<T, n0, n1, n2>
 {
    using type = T;
    static constexpr int ndim      = 3;
    static constexpr int first_dim = n0;
    MFEM_HOST_DEVICE tensor< T, n1, n2 >& operator[](int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1, n2 >& operator[](int i) const { return values[i]; }
    MFEM_HOST_DEVICE tensor< T, n1, n2 >& operator()(int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1, n2 >& operator()(int i) const { return values[i]; }
    MFEM_HOST_DEVICE tensor< T, n2 >& operator()(int i, int j) { return values[i][j]; }
    MFEM_HOST_DEVICE const tensor< T, n2 >& operator()(int i, int j) const { return values[i][j]; }
    MFEM_HOST_DEVICE T& operator()(int i, int j, int k) { return values[i][j][k]; }
    MFEM_HOST_DEVICE const T& operator()(int i, int j, int k) const { return values[i][j][k]; }
    tensor < T, n1, n2 > values[n0];
 };

 template < typename T, int n0, int n1, int n2, int n3 >
 struct tensor<T, n0, n1, n2, n3>
 {
    using type = T;
    static constexpr int ndim      = 4;
    static constexpr int first_dim = n0;
    MFEM_HOST_DEVICE tensor< T, n1, n2, n3 >& operator[](int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1, n2, n3 >& operator[](int i) const { return values[i]; }
    MFEM_HOST_DEVICE tensor< T, n1, n2, n3 >& operator()(int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1, n2, n3 >& operator()(int i) const { return values[i]; }
    MFEM_HOST_DEVICE tensor< T, n2, n3 >& operator()(int i, int j) { return values[i][j]; }
    MFEM_HOST_DEVICE const tensor< T, n2, n3 >& operator()(int i, int j) const { return values[i][j]; }
    MFEM_HOST_DEVICE tensor< T, n3 >& operator()(int i, int j, int k) { return values[i][j][k]; }
    MFEM_HOST_DEVICE const tensor< T, n3 >& operator()(int i, int j, int k) const { return values[i][j][k]; }
    MFEM_HOST_DEVICE T& operator()(int i, int j, int k, int l) { return values[i][j][k][l]; }
    MFEM_HOST_DEVICE const T&  operator()(int i, int j, int k, int l) const { return values[i][j][k][l]; }
    tensor < T, n1, n2, n3 > values[n0];
 };

 template < typename T, int n0, int n1, int n2, int n3, int n4 >
 struct tensor<T, n0, n1, n2, n3, n4>
 {
    using type = T;
    static constexpr int ndim      = 5;
    static constexpr int first_dim = n0;
    MFEM_HOST_DEVICE tensor< T, n1, n2, n3, n4 >& operator[](int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1, n2, n3, n4 >& operator[](int i) const { return values[i]; }
    MFEM_HOST_DEVICE tensor< T, n1, n2, n3, n4 >& operator()(int i) { return values[i]; }
    MFEM_HOST_DEVICE const tensor< T, n1, n2, n3, n4 >& operator()(int i) const { return values[i]; }
    MFEM_HOST_DEVICE tensor< T, n2, n3, n4 >& operator()(int i, int j) { return values[i][j]; }
    MFEM_HOST_DEVICE const tensor< T, n2, n3, n4 >& operator()(int i,
                                                               int j) const { return values[i][j]; }
    MFEM_HOST_DEVICE tensor< T, n3, n4>& operator()(int i, int j, int k) { return values[i][j][k]; }
    MFEM_HOST_DEVICE const tensor< T, n3, n4>& operator()(int i, int j,
                                                          int k) const { return values[i][j][k]; }
    MFEM_HOST_DEVICE tensor< T, n4 >& operator()(int i, int j, int k, int l) { return values[i][j][k][l]; }
    MFEM_HOST_DEVICE const tensor< T, n4 >& operator()(int i, int j, int k,
                                                       int l) const { return values[i][j][k][l]; }
    MFEM_HOST_DEVICE T& operator()(int i, int j, int k, int l, int m) { return values[i][j][k][l][m]; }
    MFEM_HOST_DEVICE const T& operator()(int i, int j, int k, int l, int m) const { return values[i][j][k][l][m]; }
    tensor < T, n1, n2, n3, n4 > values[n0];
 };

 /**
  * @brief A sentinel struct for eliding no-op tensor operations
  */
 struct zero
 {
    /** @brief `zero` is implicitly convertible to double with value 0.0 */
    MFEM_HOST_DEVICE operator double() { return 0.0; }

    /** @brief `zero` is implicitly convertible to a tensor of any shape */
    template <typename T, int... n>
    MFEM_HOST_DEVICE operator tensor<T, n...>()
    {
       return tensor<T, n...> {};
    }

    /** @brief `zero` can be accessed like a multidimensional array */
    template <typename... T>
    MFEM_HOST_DEVICE zero operator()(T...)
    {
       return zero{};
    }

    /** @brief anything assigned to `zero` does not change its value and returns `zero` */
    template <typename T>
    MFEM_HOST_DEVICE zero operator=(T)
    {
       return zero{};
    }
 };

 /** @brief checks if a type is `zero` */
 template <typename T>
 struct is_zero : std::false_type
 {
 };

 /** @overload */
 template <>
 struct is_zero<zero> : std::true_type
 {
 };

 /** @brief the sum of two `zero`s is `zero` */
 MFEM_HOST_DEVICE constexpr zero operator+(zero, zero) { return zero{}; }

 /** @brief the sum of `zero` with something non-`zero` just returns the other value */
 template <typename T>
 MFEM_HOST_DEVICE constexpr T operator+(zero, T other)
 {
    return other;
 }

 /** @brief the sum of `zero` with something non-`zero` just returns the other value */
 template <typename T>
 MFEM_HOST_DEVICE constexpr T operator+(T other, zero)
 {
    return other;
 }

 /////////////////////////////////////////////////

 /** @brief the unary negation of `zero` is `zero` */
 MFEM_HOST_DEVICE constexpr zero operator-(zero) { return zero{}; }

 /** @brief the difference of two `zero`s is `zero` */
 MFEM_HOST_DEVICE constexpr zero operator-(zero, zero) { return zero{}; }

 /** @brief the difference of `zero` with something else is the unary negation of the other thing */
 template <typename T>
 MFEM_HOST_DEVICE constexpr T operator-(zero, T other)
 {
    return -other;
 }

 /** @brief the difference of something else with `zero` is the other thing itself */
 template <typename T>
 MFEM_HOST_DEVICE constexpr T operator-(T other, zero)
 {
    return other;
 }

 /////////////////////////////////////////////////

 /** @brief the product of two `zero`s is `zero` */
 MFEM_HOST_DEVICE constexpr zero operator*(zero, zero) { return zero{}; }

 /** @brief the product `zero` with something else is also `zero` */
 template <typename T>
 MFEM_HOST_DEVICE constexpr zero operator*(zero, T /*other*/)
 {
    return zero{};
 }

 /** @brief the product `zero` with something else is also `zero` */
 template <typename T>
 MFEM_HOST_DEVICE constexpr zero operator*(T /*other*/, zero)
 {
    return zero{};
 }

 /** @brief `zero` divided by something is `zero` */
 template <typename T>
 MFEM_HOST_DEVICE constexpr zero operator/(zero, T /*other*/)
 {
    return zero{};
 }

 /** @brief `zero` plus `zero` is `zero */
 MFEM_HOST_DEVICE constexpr zero operator+=(zero, zero) { return zero{}; }

 /** @brief `zero` minus `zero` is `zero */
 MFEM_HOST_DEVICE constexpr zero operator-=(zero, zero) { return zero{}; }

 /** @brief let `zero` be accessed like a tuple */
 template <int i>
 MFEM_HOST_DEVICE zero& get(zero& x)
 {
    return x;
 }

 /** @brief the dot product of anything with `zero` is `zero` */
 template <typename T>
 MFEM_HOST_DEVICE zero dot(const T&, zero)
 {
    return zero{};
 }

 /** @brief the dot product of anything with `zero` is `zero` */
 template <typename T>
 MFEM_HOST_DEVICE zero dot(zero, const T&)
 {
    return zero{};
 }

 /**
  * @brief Removes 1s from tensor dimensions
  * For example, a tensor<T, 1, 10> is equivalent to a tensor<T, 10>
  * @tparam T The scalar type of the tensor
  * @tparam n1 The first dimension
  * @tparam n2 The second dimension
  */
 template <typename T, int n1, int n2 = 1>
 using reduced_tensor = typename std::conditional<
                        (n1 == 1 && n2 == 1), T,
                        typename std::conditional<n1 == 1, tensor<T, n2>,
                        typename std::conditional<n2 == 1, tensor<T, n1>, tensor<T, n1, n2>
                        >::type
                        >::type
                        >::type;

 /**
  * @brief Creates a tensor of requested dimension by subsequent calls to a functor
  * Can be thought of as analogous to @p std::transform in that the set of possible
  * indices for dimensions @p n are transformed into the values of the tensor by @a f
  * @tparam lambda_type The type of the functor
  * @param[in] f The functor to generate the tensor values from
  *
  * @note the different cases of 0D, 1D, 2D, 3D, and 4D are implemented separately
  *       to work around a limitation in nvcc involving __host__ __device__ lambdas with `auto` parameters.
  */
 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
 template <typename lambda_type>
 MFEM_HOST_DEVICE constexpr auto make_tensor(lambda_type f) ->
 tensor<decltype(f())>
 {
    return {f()};
 }

 /**
  * @brief Creates a tensor of requested dimension by subsequent calls to a functor
  *
  * @tparam n1 The dimension of the tensor
  * @tparam lambda_type The type of the functor
  * @param[in] f The functor to generate the tensor values from
  * @pre @a f must accept @p n1 arguments of type @p int
  *
  * @note the different cases of 0D, 1D, 2D, 3D, and 4D are implemented separately
  *       to work around a limitation in nvcc involving __host__ __device__ lambdas with `auto` parameters.
  */
 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
 template <int n1, typename lambda_type>
 MFEM_HOST_DEVICE auto make_tensor(lambda_type f) ->
 tensor<decltype(f(n1)), n1>
 {
    using T = decltype(f(n1));
    tensor<T, n1> A{};
    for (int i = 0; i < n1; i++)
    {
       A(i) = f(i);
    }
    return A;
 }

 /**
  * @brief Creates a tensor of requested dimension by subsequent calls to a functor
  *
  * @tparam n1 The first dimension of the tensor
  * @tparam n2 The second dimension of the tensor
  * @tparam lambda_type The type of the functor
  * @param[in] f The functor to generate the tensor values from
  * @pre @a f must accept @p n1 x @p n2 arguments of type @p int
  *
  * @note the different cases of 0D, 1D, 2D, 3D, and 4D are implemented separately
  *       to work around a limitation in nvcc involving __host__ __device__ lambdas with `auto` parameters.
  */
 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
 template <int n1, int n2, typename lambda_type>
 MFEM_HOST_DEVICE auto make_tensor(lambda_type f) ->
 tensor<decltype(f(n1, n2)), n1, n2>
 {
    using T = decltype(f(n1, n2));
    tensor<T, n1, n2> A{};
    for (int i = 0; i < n1; i++)
    {
       for (int j = 0; j < n2; j++)
       {
          A(i, j) = f(i, j);
       }
    }
    return A;
 }

 /**
  * @brief Creates a tensor of requested dimension by subsequent calls to a functor
  *
  * @tparam n1 The first dimension of the tensor
  * @tparam n2 The second dimension of the tensor
  * @tparam n3 The third dimension of the tensor
  * @tparam lambda_type The type of the functor
  * @param[in] f The functor to generate the tensor values from
  * @pre @a f must accept @p n1 x @p n2 x @p n3 arguments of type @p int
  *
  * @note the different cases of 0D, 1D, 2D, 3D, and 4D are implemented separately
  *       to work around a limitation in nvcc involving __host__ __device__ lambdas with `auto` parameters.
  */
 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
 template <int n1, int n2, int n3, typename lambda_type>
 MFEM_HOST_DEVICE auto make_tensor(lambda_type f) ->
 tensor<decltype(f(n1, n2, n3)), n1, n2, n3>
 {
    using T = decltype(f(n1, n2, n3));
    tensor<T, n1, n2, n3> A{};
    for (int i = 0; i < n1; i++)
    {
       for (int j = 0; j < n2; j++)
       {
          for (int k = 0; k < n3; k++)
          {
             A(i, j, k) = f(i, j, k);
          }
       }
    }
    return A;
 }

 /**
  * @brief Creates a tensor of requested dimension by subsequent calls to a functor
  *
  * @tparam n1 The first dimension of the tensor
  * @tparam n2 The second dimension of the tensor
  * @tparam n3 The third dimension of the tensor
  * @tparam n4 The fourth dimension of the tensor
  * @tparam lambda_type The type of the functor
  * @param[in] f The functor to generate the tensor values from
  * @pre @a f must accept @p n1 x @p n2 x @p n3 x @p n4 arguments of type @p int
  *
  * @note the different cases of 0D, 1D, 2D, 3D, and 4D are implemented separately
  *       to work around a limitation in nvcc involving __host__ __device__ lambdas with `auto` parameters.
  */
 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
 template <int n1, int n2, int n3, int n4, typename lambda_type>
 MFEM_HOST_DEVICE auto make_tensor(lambda_type f) ->
 tensor<decltype(f(n1, n2, n3, n4)), n1, n2, n3, n4>
 {
    using T = decltype(f(n1, n2, n3, n4));
    tensor<T, n1, n2, n3, n4> A{};
    for (int i = 0; i < n1; i++)
    {
       for (int j = 0; j < n2; j++)
       {
          for (int k = 0; k < n3; k++)
          {
             for (int l = 0; l < n4; l++)
             {
                A(i, j, k, l) = f(i, j, k, l);
             }
          }
       }
    }
    return A;
 }

 /**
  * @brief return the sum of two tensors
  * @tparam S the underlying type of the lefthand argument
  * @tparam T the underlying type of the righthand argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The lefthand operand
  * @param[in] B The righthand operand
  */
 template <typename S, typename T, int... n>
 MFEM_HOST_DEVICE auto operator+(const tensor<S, n...>& A,
                                 const tensor<T, n...>& B) ->
 tensor<decltype(S {} + T{}), n...>
 {
    tensor<decltype(S{} + T{}), n...> C{};
    for (int i = 0; i < tensor<T, n...>::first_dim; i++)
    {
       C[i] = A[i] + B[i];
    }
    return C;
 }

 /**
  * @brief return the unary negation of a tensor
  * @tparam T the underlying type of the righthand argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The tensor to negate
  */
 template <typename T, int... n>
 MFEM_HOST_DEVICE tensor<T, n...> operator-(const tensor<T, n...>& A)
 {
    tensor<T, n...> B{};
    for (int i = 0; i < tensor<T, n...>::first_dim; i++)
    {
       B[i] = -A[i];
    }
    return B;
 }

 /**
  * @brief return the difference of two tensors
  * @tparam S the underlying type of the lefthand argument
  * @tparam T the underlying type of the righthand argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The lefthand operand
  * @param[in] B The righthand operand
  */
 template <typename S, typename T, int... n>
 MFEM_HOST_DEVICE auto operator-(const tensor<S, n...>& A,
                                 const tensor<T, n...>& B) ->
 tensor<decltype(S {} + T{}), n...>
 {
    tensor<decltype(S{} + T{}), n...> C{};
    for (int i = 0; i < tensor<T, n...>::first_dim; i++)
    {
       C[i] = A[i] - B[i];
    }
    return C;
 }

 /**
  * @brief multiply a tensor by a scalar value
  * @tparam S the scalar value type. Must be arithmetic (e.g. float, double, int) or a dual number
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] scale The scaling factor
  * @param[in] A The tensor to be scaled
  */
 template <typename S, typename T, int... n,
           typename = typename std::enable_if<std::is_arithmetic<S>::value ||
                                              is_dual_number<S>::value>::type>
 MFEM_HOST_DEVICE auto operator*(S scale, const tensor<T, n...>& A) ->
 tensor<decltype(S {} * T{}), n...>
 {
    tensor<decltype(S{} * T{}), n...> C{};
    for (int i = 0; i < tensor<T, n...>::first_dim; i++)
    {
       C[i] = scale * A[i];
    }
    return C;
 }

 /**
  * @brief multiply a tensor by a scalar value
  * @tparam S the scalar value type. Must be arithmetic (e.g. float, double, int) or a dual number
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The tensor to be scaled
  * @param[in] scale The scaling factor
  */
 template <typename S, typename T, int... n,
           typename = typename std::enable_if<std::is_arithmetic<S>::value ||
                                              is_dual_number<S>::value>::type>
 MFEM_HOST_DEVICE auto operator*(const tensor<T, n...>& A, S scale) ->
 tensor<decltype(T {} * S{}), n...>
 {
    tensor<decltype(T{} * S{}), n...> C{};
    for (int i = 0; i < tensor<T, n...>::first_dim; i++)
    {
       C[i] = A[i] * scale;
    }
    return C;
 }

 /**
  * @brief divide a scalar by each element in a tensor
  * @tparam S the scalar value type. Must be arithmetic (e.g. float, double, int) or a dual number
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] scale The numerator
  * @param[in] A The tensor of denominators
  */
 template <typename S, typename T, int... n,
           typename = typename std::enable_if<std::is_arithmetic<S>::value ||
                                              is_dual_number<S>::value>::type>
 MFEM_HOST_DEVICE auto operator/(S scale, const tensor<T, n...>& A) ->
 tensor<decltype(S {} * T{}), n...>
 {
    tensor<decltype(S{} * T{}), n...> C{};
    for (int i = 0; i < tensor<T, n...>::first_dim; i++)
    {
       C[i] = scale / A[i];
    }
    return C;
 }

 /**
  * @brief divide a tensor by a scalar
  * @tparam S the scalar value type. Must be arithmetic (e.g. float, double, int) or a dual number
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The tensor of numerators
  * @param[in] scale The denominator
  */
 template <typename S, typename T, int... n,
           typename = typename std::enable_if<std::is_arithmetic<S>::value ||
                                              is_dual_number<S>::value>::type>
 MFEM_HOST_DEVICE auto operator/(const tensor<T, n...>& A, S scale) ->
 tensor<decltype(T {} * S{}), n...>
 {
    tensor<decltype(T{} * S{}), n...> C{};
    for (int i = 0; i < tensor<T, n...>::first_dim; i++)
    {
       C[i] = A[i] / scale;
    }
    return C;
 }

 /**
  * @brief compound assignment (+) on tensors
  * @tparam S the underlying type of the tensor (lefthand) argument
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename S, typename T, int... n> MFEM_HOST_DEVICE
 tensor<S, n...>& operator+=(tensor<S, n...>& A,
                             const tensor<T, n...>& B)
 {
    for (int i = 0; i < tensor<S, n...>::first_dim; i++)
    {
       A[i] += B[i];
    }
    return A;
 }

 /**
  * @brief compound assignment (+) on tensors
  * @tparam T the underlying type of the tensor argument
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename T> MFEM_HOST_DEVICE
 tensor<T>& operator+=(tensor<T>& A, const T& B)
 {
    return A.values += B;
 }

 /**
  * @brief compound assignment (+) on tensors
  * @tparam T the underlying type of the tensor argument
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename T> MFEM_HOST_DEVICE
 tensor<T, 1>& operator+=(tensor<T, 1>& A, const T& B)
 {
    return A.values += B;
 }

 /**
  * @brief compound assignment (+) on tensors
  * @tparam T the underlying type of the tensor argument
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename T> MFEM_HOST_DEVICE
 tensor<T, 1, 1>& operator+=(tensor<T, 1, 1>& A, const T& B)
 {
    return A.values += B;
 }

 /**
  * @brief compound assignment (+) between a tensor and zero (no-op)
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The lefthand tensor
  */
 template <typename T, int... n> MFEM_HOST_DEVICE
 tensor<T, n...>& operator+=(tensor<T, n...>& A, zero)
 {
    return A;
 }

 /**
  * @brief compound assignment (-) on tensors
  * @tparam S the underlying type of the tensor (lefthand) argument
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename S, typename T, int... n> MFEM_HOST_DEVICE
 tensor<S, n...>& operator-=(tensor<S, n...>& A, const tensor<T, n...>& B)
 {
    for (int i = 0; i < tensor<S, n...>::first_dim; i++)
    {
       A[i] -= B[i];
    }
    return A;
 }

 /**
  * @brief compound assignment (-) between a tensor and zero (no-op)
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The lefthand tensor
  */
 template <typename T, int... n> MFEM_HOST_DEVICE
 constexpr tensor<T, n...>& operator-=(tensor<T, n...>& A, zero)
 {
    return A;
 }

 /**
  * @brief compute the outer product of two tensors
  * @tparam S the type of the lefthand argument
  * @tparam T the type of the righthand argument
  * @param[in] A The lefthand argument
  * @param[in] B The righthand argument
  *
  * @note this overload implements the special case where both arguments are scalars
  */
 template <typename S, typename T> MFEM_HOST_DEVICE
 auto outer(S A, T B) -> decltype(A * B)
 {
    static_assert(std::is_arithmetic<S>::value && std::is_arithmetic<T>::value,
                  "outer product types must be tensor or arithmetic_type");
    return A * B;
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is a scalar, and the right argument is a tensor
  */
 template <typename S, typename T, int n> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), n> outer(S A, tensor<T, n> B)
 {
    static_assert(std::is_arithmetic<S>::value,
                  "outer product types must be tensor or arithmetic_type");
    tensor<decltype(S{} * T{}), n> AB{};
    for (int i = 0; i < n; i++)
    {
       AB[i] = A * B[i];
    }
    return AB;
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is a tensor, and the right argument is a scalar
  */
 template <typename S, typename T, int m> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), m> outer(const tensor<S, m>& A, T B)
 {
    static_assert(std::is_arithmetic<T>::value,
                  "outer product types must be tensor or arithmetic_type");
    tensor<decltype(S{} * T{}), m> AB{};
    for (int i = 0; i < m; i++)
    {
       AB[i] = A[i] * B;
    }
    return AB;
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is `zero`, and the right argument is a tensor
  */
 template <typename T, int n> MFEM_HOST_DEVICE
 zero outer(zero, const tensor<T, n>&)
 {
    return zero{};
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is a tensor, and the right argument is `zero`
  */
 template <typename T, int n> MFEM_HOST_DEVICE
 zero outer(const tensor<T, n>&, zero)
 {
    return zero{};
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is a scalar,
  * and the right argument is a tensor
  */
 template <typename S, typename T, int m, int n> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), m, n> outer(S A, const tensor<T, m, n>& B)
 {
    static_assert(std::is_arithmetic<S>::value,
                  "outer product types must be tensor or arithmetic_type");
    tensor<decltype(S{} * T{}), m, n> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          AB[i][j] = A * B[i][j];
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note this overload implements the case where both arguments are vectors
  */
 template <typename S, typename T, int m, int n> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), m, n> outer(const tensor<S, m>& A,
                                         const tensor<T, n>& B)
 {
    tensor<decltype(S{} * T{}), m, n> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          AB[i][j] = A[i] * B[j];
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is a 2nd order tensor, and the right argument is a
  * scalar
  */
 template <typename S, typename T, int m, int n> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), m, n> outer(const tensor<S, m, n>& A, T B)
 {
    static_assert(std::is_arithmetic<T>::value,
                  "outer product types must be tensor or arithmetic_type");
    tensor<decltype(S{} * T{}), m, n> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          AB[i][j] = A[i][j] * B;
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is a 2nd order tensor, and the right argument is a
  * first order tensor
  */
 template <typename S, typename T, int m, int n, int p> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), m, n, p> outer(const tensor<S, m, n>& A,
                                            const tensor<T, p>& B)
 {
    tensor<decltype(S{} * T{}), m, n, p> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          for (int k = 0; k < p; k++)
          {
             AB[i][j][k] = A[i][j] * B[k];
          }
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note this overload implements the case where the left argument is a 1st order tensor, and the right argument is a
  * 2nd order tensor
  */
 template <typename S, typename T, int m, int n, int p> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), m, n, p> outer(const tensor<S, m>& A,
                                            const tensor<T, n, p>& B)
 {
    tensor<decltype(S{} * T{}), m, n, p> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          for (int k = 0; k < p; k++)
          {
             AB[i][j][k] = A[i] * B[j][k];
          }
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note this overload implements the case where both arguments are second order tensors
  */
 template <typename S, typename T, int m, int n, int p, int q> MFEM_HOST_DEVICE
 tensor<decltype(S{} * T{}), m, n, p, q> outer(const tensor<S, m, n>& A,
                                               const tensor<T, p, q>& B)
 {
    tensor<decltype(S{} * T{}), m, n, p, q> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          for (int k = 0; k < p; k++)
          {
             for (int l = 0; l < q; l++)
             {
                AB[i][j][k][l] = A[i][j] * B[k][l];
             }
          }
       }
    }
    return AB;
 }

 /**
  * @brief this function contracts over all indices of the two tensor arguments
  * @tparam S the underlying type of the tensor (lefthand) argument
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam m the number of rows
  * @tparam n the number of columns
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename S, typename T, int m, int n> MFEM_HOST_DEVICE
 auto inner(const tensor<S, m, n>& A, const tensor<T, m, n>& B) ->
 decltype(S {} * T{})
 {
    decltype(S{} * T{}) sum{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          sum += A[i][j] * B[i][j];
       }
    }
    return sum;
 }

 /**
  * @brief this function contracts over the "middle" index of the two tensor
  * arguments. E.g. returns tensor C, such that C_ij = sum_kl A_ijkl B_kl.
  * @tparam S the underlying type of the tensor (lefthand) argument
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam n integers describing the tensor shape
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename S, typename T, int m, int n, int p> MFEM_HOST_DEVICE
 auto dot(const tensor<S, m, n>& A,
          const tensor<T, n, p>& B) ->
 tensor<decltype(S {} * T{}), m, p>
 {
    tensor<decltype(S{} * T{}), m, p> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < p; j++)
       {
          for (int k = 0; k < n; k++)
          {
             AB[i][j] = AB[i][j] + A[i][k] * B[k][j];
          }
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note vector . matrix
  */
 template <typename S, typename T, int m, int n> MFEM_HOST_DEVICE
 auto dot(const tensor<S, m>& A, const tensor<T, m, n>& B) ->
 tensor<decltype(S {} * T{}), n>
 {
    tensor<decltype(S{} * T{}), n> AB{};
    for (int i = 0; i < n; i++)
    {
       for (int j = 0; j < m; j++)
       {
          AB[i] = AB[i] + A[j] * B[j][i];
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note matrix . vector
  */
 template <typename S, typename T, int m, int n> MFEM_HOST_DEVICE
 auto dot(const tensor<S, m, n>& A, const tensor<T, n>& B) ->
 tensor<decltype(S {} * T{}), m>
 {
    tensor<decltype(S{} * T{}), m> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          AB[i] = AB[i] + A[i][j] * B[j];
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note 3rd-order-tensor . vector
  */
 template <typename S, typename T, int m, int n, int p> MFEM_HOST_DEVICE
 auto dot(const tensor<S, m, n, p>& A, const tensor<T, p>& B) ->
 tensor<decltype(S {} * T{}), m, n>
 {
    tensor<decltype(S{} * T{}), m, n> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          for (int k = 0; k < p; k++)
          {
             AB[i][j] += A[i][j][k] * B[k];
          }
       }
    }
    return AB;
 }

 // /**
 //  * @brief Dot product of a vector . vector and vector . tensor
 //  *
 //  * @tparam S the underlying type of the tensor (lefthand) argument
 //  * @tparam T the underlying type of the tensor (righthand) argument
 //  * @tparam m the dimension of the first tensor
 //  * @tparam n the parameter pack of dimensions of the second tensor
 //  * @param A The lefthand tensor
 //  * @param B The righthand tensor
 //  * @return The computed dot product
 //  */
 // template <typename S, typename T, int m, int... n>
 // auto dot(const tensor<S, m>& A, const tensor<T, m, n...>& B)
 // {
 //    // this dot product function includes the vector * vector implementation and
 //    // the vector * tensor one, since clang emits an error about ambiguous
 //    // overloads if they are separate functions. The `if constexpr` expression avoids
 //    // using an `else` because that confuses nvcc (11.2) into thinking there's not
 //    // a return statement
 //    if constexpr (sizeof...(n) == 0)
 //    {
 //       decltype(S{} * T{}) AB{};
 //       for (int i = 0; i < m; i++)
 //       {
 //          AB += A[i] * B[i];
 //       }
 //       return AB;
 //    }

 //    if constexpr (sizeof...(n) > 0)
 //    {
 //       constexpr int                     dimensions[] = {n...};
 //       tensor<decltype(S{} * T{}), n...> AB{};
 //       for (int i = 0; i < dimensions[0]; i++)
 //       {
 //          for (int j = 0; j < m; j++)
 //          {
 //             AB[i] = AB[i] + A[j] * B[j][i];
 //          }
 //       }
 //       return AB;
 //    }
 // }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE
 auto dot(const tensor<S, m>& A, const tensor<T, m>& B) ->
 decltype(S {} * T{})
 {
    decltype(S{} * T{}) AB{};
    for (int i = 0; i < m; i++)
    {
       AB += A[i] * B[i];
    }
    return AB;
 }

 template <typename S, typename T, int m, int... n> MFEM_HOST_DEVICE
 auto dot(const tensor<S, m>& A, const tensor<T, m, n...>& B) ->
 tensor<decltype(S {} * T{}), n...>
 {
    constexpr int dimensions[] = {n...};
    tensor<decltype(S{} * T{}), n...> AB{};
    for (int i = 0; i < dimensions[0]; i++)
    {
       for (int j = 0; j < m; j++)
       {
          AB[i] = AB[i] + A[j] * B[j][i];
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note vector . matrix . vector
  */
 template <typename S, typename T, typename U, int m, int n> MFEM_HOST_DEVICE
 auto dot(const tensor<S, m>& u, const tensor<T, m, n>& A,
          const tensor<U, n>& v) ->
 decltype(S {} * T{} * U{})
 {
    decltype(S{} * T{} * U{}) uAv{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          uAv += u[i] * A[i][j] * v[j];
       }
    }
    return uAv;
 }

 /**
  * @brief double dot product, contracting over the two "middle" indices
  * @tparam S the underlying type of the tensor (lefthand) argument
  * @tparam T the underlying type of the tensor (righthand) argument
  * @tparam m first dimension of A
  * @tparam n second dimension of A
  * @tparam p third dimension of A, first dimensions of B
  * @tparam q fourth dimension of A, second dimensions of B
  * @param[in] A The lefthand tensor
  * @param[in] B The righthand tensor
  */
 template <typename S, typename T, int m, int n, int p, int q> MFEM_HOST_DEVICE
 auto ddot(const tensor<S, m, n, p, q>& A, const tensor<T, p, q>& B) ->
 tensor<decltype(S {} * T{}), m, n>
 {
    tensor<decltype(S{} * T{}), m, n> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          for (int k = 0; k < p; k++)
          {
             for (int l = 0; l < q; l++)
             {
                AB[i][j] += A[i][j][k][l] * B[k][l];
             }
          }
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note 3rd-order-tensor : 2nd-order-tensor. Returns vector C, such that C_i =
  * sum_jk A_ijk B_jk.
  */
 template <typename S, typename T, int m, int n, int p> MFEM_HOST_DEVICE
 auto ddot(const tensor<S, m, n, p>& A, const tensor<T, n, p>& B) ->
 tensor<decltype(S {} * T{}), m>
 {
    tensor<decltype(S{} * T{}), m> AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          for (int k = 0; k < p; k++)
          {
             AB[i] += A[i][j][k] * B[j][k];
          }
       }
    }
    return AB;
 }

 /**
  * @overload
  * @note 2nd-order-tensor : 2nd-order-tensor, like inner()
  */
 template <typename S, typename T, int m, int n> MFEM_HOST_DEVICE
 auto ddot(const tensor<S, m, n>& A, const tensor<T, m, n>& B) ->
 decltype(S {} * T{})
 {
    decltype(S{} * T{}) AB{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          AB += A[i][j] * B[i][j];
       }
    }
    return AB;
 }

 /**
  * @brief this is a shorthand for dot(A, B)
  */
 template <typename S, typename T, int... m, int... n> MFEM_HOST_DEVICE
 auto operator*(const tensor<S, m...>& A, const tensor<T, n...>& B) ->
 decltype(dot(A, B))
 {
    return dot(A, B);
 }

 /**
  * @brief Returns the squared Frobenius norm of the tensor
  * @param[in] A The tensor to obtain the squared norm from
  */
 template <typename T, int m> MFEM_HOST_DEVICE
 T sqnorm(const tensor<T, m>& A)
 {
    T total{};
    for (int i = 0; i < m; i++)
    {
       total += A[i] * A[i];
    }
    return total;
 }

 /**
  * @overload
  * @brief Returns the squared Frobenius norm of the tensor
  */
 template <typename T, int m, int n> MFEM_HOST_DEVICE
 T sqnorm(const tensor<T, m, n>& A)
 {
    T total{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          total += A[i][j] * A[i][j];
       }
    }
    return total;
 }

 /**
  * @brief Returns the Frobenius norm of the tensor
  * @param[in] A The tensor to obtain the norm from
  */
 template <typename T, int... n> MFEM_HOST_DEVICE
 T norm(const tensor<T, n...>& A)
 {
    return std::sqrt(sqnorm(A));
 }

 /**
  * @brief Normalizes the tensor
  * Each element is divided by the Frobenius norm of the tensor, @see norm
  * @param[in] A The tensor to normalize
  */
 template <typename T, int... n> MFEM_HOST_DEVICE
 auto normalize(const tensor<T, n...>& A) ->
 decltype(A / norm(A))
 {
    return A / norm(A);
 }

 /**
  * @brief Returns the trace of a square matrix
  * @param[in] A The matrix to compute the trace of
  * @return The sum of the elements on the main diagonal
  */
 template <typename T, int n> MFEM_HOST_DEVICE
 T tr(const tensor<T, n, n>& A)
 {
    T trA{};
    for (int i = 0; i < n; i++)
    {
       trA = trA + A[i][i];
    }
    return trA;
 }

 /**
  * @brief Returns the symmetric part of a square matrix
  * @param[in] A The matrix to obtain the symmetric part of
  * @return (1/2) * (A + A^T)
  */
 template <typename T, int n> MFEM_HOST_DEVICE
 tensor<T, n, n> sym(const tensor<T, n, n>& A)
 {
    tensor<T, n, n> symA{};
    for (int i = 0; i < n; i++)
    {
       for (int j = 0; j < n; j++)
       {
          symA[i][j] = 0.5 * (A[i][j] + A[j][i]);
       }
    }
    return symA;
 }

 /**
  * @brief Calculates the deviator of a matrix (rank-2 tensor)
  * @param[in] A The matrix to calculate the deviator of
  * In the context of stress tensors, the deviator is obtained by
  * subtracting the mean stress (average of main diagonal elements)
  * from each element on the main diagonal
  */
 template <typename T, int n> MFEM_HOST_DEVICE
 tensor<T, n, n> dev(const tensor<T, n, n>& A)
 {
    auto devA = A;
    auto trA  = tr(A);
    for (int i = 0; i < n; i++)
    {
       devA[i][i] -= trA / n;
    }
    return devA;
 }

 /**
  * @brief Obtains the identity matrix of the specified dimension
  * @return I_dim
  */
 template <int dim>
 MFEM_HOST_DEVICE tensor<double, dim, dim> Identity()
 {
    tensor<double, dim, dim> I{};
    for (int i = 0; i < dim; i++)
    {
       for (int j = 0; j < dim; j++)
       {
          I[i][j] = (i == j);
       }
    }
    return I;
 }

 /**
  * @brief Returns the transpose of the matrix
  * @param[in] A The matrix to obtain the transpose of
  */
 template <typename T, int m, int n> MFEM_HOST_DEVICE
 tensor<T, n, m> transpose(const tensor<T, m, n>& A)
 {
    tensor<T, n, m> AT{};
    for (int i = 0; i < n; i++)
    {
       for (int j = 0; j < m; j++)
       {
          AT[i][j] = A[j][i];
       }
    }
    return AT;
 }

 /**
  * @brief Returns the determinant of a matrix
  * @param[in] A The matrix to obtain the determinant of
  */
 template <typename T> MFEM_HOST_DEVICE
 T det(const tensor<T, 2, 2>& A)
 {
    return A[0][0] * A[1][1] - A[0][1] * A[1][0];
 }
 /// @overload
 template <typename T> MFEM_HOST_DEVICE
 T det(const tensor<T, 3, 3>& A)
 {
    return A[0][0] * A[1][1] * A[2][2] + A[0][1] * A[1][2] * A[2][0] + A[0][2] *
           A[1][0] * A[2][1] -
           A[0][0] * A[1][2] * A[2][1] - A[0][1] * A[1][0] * A[2][2] - A[0][2] * A[1][1] *
           A[2][0];
 }

 /**
  * @brief Return whether a square rank 2 tensor is symmetric
  *
  * @tparam n The height of the tensor
  * @param A The square rank 2 tensor
  * @param abs_tolerance The absolute tolerance to check for symmetry
  * @return Whether the square rank 2 tensor (matrix) is symmetric
  */
 template <int n> MFEM_HOST_DEVICE
 bool is_symmetric(tensor<double, n, n> A, double abs_tolerance = 1.0e-8)
 {
    for (int i = 0; i < n; ++i)
    {
       for (int j = i + 1; j < n; ++j)
       {
          if (std::abs(A(i, j) - A(j, i)) > abs_tolerance)
          {
             return false;
          }
       }
    }
    return true;
 }

 /**
  * @brief Return whether a matrix is symmetric and positive definite
  * This check uses Sylvester's criterion, checking that each upper left subtensor has a
  * determinant greater than zero.
  *
  * @param A The matrix to test for positive definiteness
  * @return Whether the matrix is positive definite
  */
 inline MFEM_HOST_DEVICE
 bool is_symmetric_and_positive_definite(tensor<double, 2, 2> A)
 {
    if (!is_symmetric(A))
    {
       return false;
    }
    if (A(0, 0) < 0.0)
    {
       return false;
    }
    if (det(A) < 0.0)
    {
       return false;
    }
    return true;
 }
 /// @overload
 inline MFEM_HOST_DEVICE
 bool is_symmetric_and_positive_definite(tensor<double, 3, 3> A)
 {
    if (!is_symmetric(A))
    {
       return false;
    }
    if (det(A) < 0.0)
    {
       return false;
    }
    auto subtensor = make_tensor<2, 2>([A](int i, int j) { return A(i, j); });
    if (!is_symmetric_and_positive_definite(subtensor))
    {
       return false;
    }
    return true;
 }

 /**
  * @brief Solves Ax = b for x using Gaussian elimination with partial pivoting
  * @param[in] A The coefficient matrix A
  * @param[in] b The righthand side vector b
  * @note @a A and @a b are by-value as they are mutated as part of the elimination
  */
 template <typename T, int n> MFEM_HOST_DEVICE
 tensor<T, n> linear_solve(tensor<T, n, n> A, const tensor<T, n> b)
 {
    auto abs  = [](double x) { return (x < 0) ? -x : x; };
    auto swap_vector = [](tensor<T, n>& x, tensor<T, n>& y)
    {
       auto tmp = x;
       x        = y;
       y        = tmp;
    };
    auto swap_scalar = [](T& x, T& y)
    {
       auto tmp = x;
       x        = y;
       y        = tmp;
    };


    tensor<double, n> x{};

    for (int i = 0; i < n; i++)
    {
       // Search for maximum in this column
       double max_val = abs(A[i][i]);

       int max_row = i;
       for (int j = i + 1; j < n; j++)
       {
          if (abs(A[j][i]) > max_val)
          {
             max_val = abs(A[j][i]);
             max_row = j;
          }
       }

       swap_scalar(b[max_row], b[i]);
       swap_vector(A[max_row], A[i]);

       // zero entries below in this column
       for (int j = i + 1; j < n; j++)
       {
          double c = -A[j][i] / A[i][i];
          A[j] += c * A[i];
          b[j] += c * b[i];
          A[j][i] = 0;
       }
    }

    // Solve equation Ax=b for an upper triangular matrix A
    for (int i = n - 1; i >= 0; i--)
    {
       x[i] = b[i] / A[i][i];
       for (int j = i - 1; j >= 0; j--)
       {
          b[j] -= A[j][i] * x[i];
       }
    }

    return x;
 }

 /**
  * @brief Inverts a matrix
  * @param[in] A The matrix to invert
  * @note Uses a shortcut for inverting a 2-by-2 matrix
  */
 inline MFEM_HOST_DEVICE tensor<double, 2, 2> inv(const tensor<double, 2, 2>& A)
 {
    double inv_detA(1.0 / det(A));

    tensor<double, 2, 2> invA{};

    invA[0][0] = A[1][1] * inv_detA;
    invA[0][1] = -A[0][1] * inv_detA;
    invA[1][0] = -A[1][0] * inv_detA;
    invA[1][1] = A[0][0] * inv_detA;

    return invA;
 }

 /**
  * @overload
  * @note Uses a shortcut for inverting a 3-by-3 matrix
  */
 inline MFEM_HOST_DEVICE tensor<double, 3, 3> inv(const tensor<double, 3, 3>& A)
 {
    double inv_detA(1.0 / det(A));

    tensor<double, 3, 3> invA{};

    invA[0][0] = (A[1][1] * A[2][2] - A[1][2] * A[2][1]) * inv_detA;
    invA[0][1] = (A[0][2] * A[2][1] - A[0][1] * A[2][2]) * inv_detA;
    invA[0][2] = (A[0][1] * A[1][2] - A[0][2] * A[1][1]) * inv_detA;
    invA[1][0] = (A[1][2] * A[2][0] - A[1][0] * A[2][2]) * inv_detA;
    invA[1][1] = (A[0][0] * A[2][2] - A[0][2] * A[2][0]) * inv_detA;
    invA[1][2] = (A[0][2] * A[1][0] - A[0][0] * A[1][2]) * inv_detA;
    invA[2][0] = (A[1][0] * A[2][1] - A[1][1] * A[2][0]) * inv_detA;
    invA[2][1] = (A[0][1] * A[2][0] - A[0][0] * A[2][1]) * inv_detA;
    invA[2][2] = (A[0][0] * A[1][1] - A[0][1] * A[1][0]) * inv_detA;

    return invA;
 }
 /**
  * @overload
  * @note For N-by-N matrices with N > 3, requires Gaussian elimination
  * with partial pivoting
  */
 template <typename T, int n> MFEM_HOST_DEVICE
 tensor<T, n, n> inv(const tensor<T, n, n>& A)
 {
    auto abs  = [](double x) { return (x < 0) ? -x : x; };
    auto swap = [](tensor<T, n>& x, tensor<T, n>& y)
    {
       auto tmp = x;
       x        = y;
       y        = tmp;
    };

    tensor<double, n, n> B = Identity<n>();

    for (int i = 0; i < n; i++)
    {
       // Search for maximum in this column
       double max_val = abs(A[i][i]);

       int max_row = i;
       for (int j = i + 1; j < n; j++)
       {
          if (abs(A[j][i]) > max_val)
          {
             max_val = abs(A[j][i]);
             max_row = j;
          }
       }

       swap(B[max_row], B[i]);
       swap(A[max_row], A[i]);

       // zero entries below in this column
       for (int j = i + 1; j < n; j++)
       {
          if (A[j][i] != 0.0)
          {
             double c = -A[j][i] / A[i][i];
             A[j] += c * A[i];
             B[j] += c * B[i];
             A[j][i] = 0;
          }
       }
    }

    // upper triangular solve
    for (int i = n - 1; i >= 0; i--)
    {
       B[i] = B[i] / A[i][i];
       for (int j = i - 1; j >= 0; j--)
       {
          if (A[j][i] != 0.0)
          {
             B[j] -= A[j][i] * B[i];
          }
       }
    }

    return B;
 }

 /**
  * @overload
  * @note when inverting a tensor of dual numbers,
  * hardcode the analytic derivative of the
  * inverse of a square matrix, rather than
  * apply Gauss elimination directly on the dual number types
  *
  * TODO: compare performance of this hardcoded implementation to just using inv() directly
  */
 template <typename value_type, typename gradient_type, int n> MFEM_HOST_DEVICE
 dual<value_type, gradient_type> inv(
    tensor<dual<value_type, gradient_type>, n, n> A)
 {
    auto invA = inv(get_value(A));
    return make_tensor<n, n>([&](int i, int j)
    {
       auto          value = invA[i][j];
       gradient_type gradient{};
       for (int k = 0; k < n; k++)
       {
          for (int l = 0; l < n; l++)
          {
             gradient -= invA[i][k] * A[k][l].gradient * invA[l][j];
          }
       }
       return dual<value_type, gradient_type> {value, gradient};
    });
 }

 /**
  * @brief recursively serialize the entries in a tensor to an output stream.
  * Output format uses braces and comma separators to mimic C syntax for multidimensional array
  * initialization.
  *
  * @param[in] os The stream to work with standard output streams
  * @param[in] A The tensor to write out
  */
 template <typename T, int... n>
 std::ostream& operator<<(std::ostream& os, const tensor<T, n...>& A)
 {
    os << '{' << A[0];
    for (int i = 1; i < tensor<T, n...>::first_dim; i++)
    {
       os << ", " << A[i];
    }
    os << '}';
    return os;
 }

 /**
  * @brief replace all entries in a tensor satisfying |x| < 1.0e-10 by literal zero
  * @param[in] A The tensor to "chop"
  */
 template <int n> MFEM_HOST_DEVICE
 tensor<double, n> chop(const tensor<double, n>& A)
 {
    auto copy = A;
    for (int i = 0; i < n; i++)
    {
       if (copy[i] * copy[i] < 1.0e-20)
       {
          copy[i] = 0.0;
       }
    }
    return copy;
 }

 /// @overload
 template <int m, int n> MFEM_HOST_DEVICE
 tensor<double, m, n> chop(const tensor<double, m, n>& A)
 {
    auto copy = A;
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < n; j++)
       {
          if (copy[i][j] * copy[i][j] < 1.0e-20)
          {
             copy[i][j] = 0.0;
          }
       }
    }
    return copy;
 }

 /// @cond
 namespace detail
 {
 template <typename T1, typename T2>
 struct outer_prod;

 template <int... m, int... n>
 struct outer_prod<tensor<double, m...>, tensor<double, n...>>
 {
    using type = tensor<double, m..., n...>;
 };

 template <int... n>
 struct outer_prod<double, tensor<double, n...>>
 {
    using type = tensor<double, n...>;
 };

 template <int... n>
 struct outer_prod<tensor<double, n...>, double>
 {
    using type = tensor<double, n...>;
 };

 template <>
 struct outer_prod<double, double>
 {
    using type = tensor<double>;
 };

 template <typename T>
 struct outer_prod<zero, T>
 {
    using type = zero;
 };

 template <typename T>
 struct outer_prod<T, zero>
 {
    using type = zero;
 };

 }  // namespace detail
 /// @endcond

 /**
  * @brief a type function that returns the tensor type of an outer product of two tensors
  * @tparam T1 the first argument to the outer product
  * @tparam T2 the second argument to the outer product
  */
 template <typename T1, typename T2>
 using outer_product_t = typename detail::outer_prod<T1, T2>::type;

 /**
  * @brief Retrieves the gradient component of a double (which is nothing)
  * @return The sentinel, @see zero
  */
 inline MFEM_HOST_DEVICE zero get_gradient(double /* arg */) { return zero{}; }

 /**
  * @brief get the gradient of type `tensor` (note: since its stored type is not a dual
  * number, the derivative term is identically zero)
  * @return The sentinel, @see zero
  */
 template <int... n>
 MFEM_HOST_DEVICE zero get_gradient(const tensor<double, n...>& /* arg */)
 {
    return zero{};
 }

 /**
  * @brief evaluate the change (to first order) in a function, f, given a small change in the input argument, dx.
  */
 inline MFEM_HOST_DEVICE zero chain_rule(const zero /* df_dx */,
                                         const zero /* dx */) { return zero{}; }

 /**
  * @overload
  * @note this overload implements a no-op for the case where the gradient w.r.t. an input argument is identically zero
  */
 template <typename T>
 MFEM_HOST_DEVICE zero chain_rule(const zero /* df_dx */,
                                  const T /* dx */)
 {
    return zero{};
 }

 /**
  * @overload
  * @note this overload implements a no-op for the case where the small change is identically zero
  */
 template <typename T>
 MFEM_HOST_DEVICE zero chain_rule(const T /* df_dx */,
                                  const zero /* dx */)
 {
    return zero{};
 }

 /**
  * @overload
  * @note for a scalar-valued function of a scalar, the chain rule is just multiplication
  */
 inline MFEM_HOST_DEVICE double chain_rule(const double df_dx,
                                           const double dx) { return df_dx * dx; }

 /**
  * @overload
  * @note for a tensor-valued function of a scalar, the chain rule is just scalar multiplication
  */
 template <int... n>
 MFEM_HOST_DEVICE auto chain_rule(const tensor<double, n...>& df_dx,
                                  const double dx) ->
 decltype(df_dx * dx)
 {
    return df_dx * dx;
 }

 template <int n> struct always_false : std::false_type { };

 template <typename T, int... n> struct isotropic_tensor;

 template <typename T, int n>
 struct isotropic_tensor<T, n>
 {
    static_assert(always_false<n> {},
                  "error: there is no such thing as a rank-1 isotropic tensor!");
 };

 // rank-2 isotropic tensors are just identity matrices
 template <typename T, int m>
 struct isotropic_tensor<T, m, m>
 {
    MFEM_HOST_DEVICE constexpr T operator()(int i, int j) const
    {
       return (i == j) * value;
    }
    T value;
 };

 template <int m>
 MFEM_HOST_DEVICE constexpr isotropic_tensor<double, m, m> IsotropicIdentity()
 {
    return isotropic_tensor<double, m, m> {1.0};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator*(S scale,
                const isotropic_tensor<T, m, m> & I)
 -> isotropic_tensor<decltype(S {} * T{}), m, m>
 {
    return {I.value * scale};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator*(const isotropic_tensor<T, m, m> & I,
                const S scale)
 -> isotropic_tensor<decltype(S {}, T{}), m, m>
 {
    return {I.value * scale};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator+(const isotropic_tensor<S, m, m>& I1,
                const isotropic_tensor<T, m, m>& I2)
 -> isotropic_tensor<decltype(S {} + T{}), m, m>
 {
    return {I1.value + I2.value};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator-(const isotropic_tensor<S, m, m>& I1,
                const isotropic_tensor<T, m, m>& I2)
 -> isotropic_tensor<decltype(S {} - T{}), m, m>
 {
    return {I1.value - I2.value};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE //constexpr
 auto operator+(const isotropic_tensor<S, m, m>& I,
                const tensor<T, m, m>& A)
 -> tensor<decltype(S {} + T{}), m, m>
 {
    tensor<decltype(S{} + T{}), m, m> output{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < m; j++)
       {
          output[i][j] = I.value * (i == j) + A[i][j];
       }
    }
    return output;
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE //constexpr
 auto operator+(const tensor<S, m, m>& A,
                const isotropic_tensor<T, m, m>& I)
 -> tensor<decltype(S {} + T{}), m, m>
 {
    tensor<decltype(S{} + T{}), m, m> output{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < m; j++)
       {
          output[i][j] = A[i][j] + I.value * (i == j);
       }
    }
    return output;
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE //constexpr
 auto operator-(const isotropic_tensor<S, m, m>& I,
                const tensor<T, m, m>& A)
 -> tensor<decltype(S {} - T{}), m, m>
 {
    tensor<decltype(S{} - T{}), m, m> output{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < m; j++)
       {
          output[i][j] = I.value * (i == j) - A[i][j];
       }
    }
    return output;
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE // constexpr
 auto operator-(const tensor<S, m, m>& A,
                const isotropic_tensor<T, m, m>& I)
 -> tensor<decltype(S {} - T{}), m, m>
 {
    tensor<decltype(S{} - T{}), m, m> output{};
    for (int i = 0; i < m; i++)
    {
       for (int j = 0; j < m; j++)
       {
          output[i][j] = A[i][j] - I.value * (i == j);
       }
    }
    return output;
 }

 template <typename S, typename T, int m, int... n> MFEM_HOST_DEVICE constexpr
 auto dot(const isotropic_tensor<S, m, m>& I,
          const tensor<T, m, n...>& A)
 -> tensor<decltype(S {} * T{}), m, n...>
 {
    return I.value * A;
 }

 template <typename S, typename T, int m, int... n> MFEM_HOST_DEVICE //constexpr
 auto dot(const tensor<S, n...>& A,
          const isotropic_tensor<T, m, m> & I)
 -> tensor<decltype(S {} * T{}), n...>
 {
    constexpr int dimensions[sizeof...(n)] = {n...};
    static_assert(dimensions[sizeof...(n) - 1] == m, "n-1 != m");
    return A * I.value;
 }

 template <typename S, typename T, int m, int... n> MFEM_HOST_DEVICE constexpr
 auto ddot(const isotropic_tensor<S, m, m>& I,
           const tensor<T, m, m>& A)
 -> decltype(S {} * T{})
 {
    return I.value * tr(A);
 }

 template <typename T, int m> MFEM_HOST_DEVICE constexpr
 auto sym(const isotropic_tensor<T, m, m>& I) -> isotropic_tensor<T, m, m>
 {
    return I;
 }

 template <typename T, int m> MFEM_HOST_DEVICE constexpr
 auto antisym(const isotropic_tensor<T, m, m>&) -> zero
 {
    return zero{};
 }

 template <typename T, int m> MFEM_HOST_DEVICE constexpr
 auto tr(const isotropic_tensor<T, m, m>& I) -> decltype(T {} * m)
 {
    return I.value * m;
 }

 template <typename T, int m> MFEM_HOST_DEVICE constexpr
 auto transpose(const isotropic_tensor<T, m, m>& I) -> isotropic_tensor<T, m, m>
 {
    return I;
 }

 template <typename T, int m> MFEM_HOST_DEVICE constexpr
 auto det(const isotropic_tensor<T, m, m>& I) -> T
 {
    return std::pow(I.value, m);
 }

 template <typename T, int m> MFEM_HOST_DEVICE constexpr
 auto norm(const isotropic_tensor<T, m, m>& I) -> T
 {
    return sqrt(I.value * I.value * m);
 }

 template <typename T, int m> MFEM_HOST_DEVICE constexpr
 auto sqnorm(const isotropic_tensor<T, m, m>& I) -> T
 {
    return I.value * I.value * m;
 }

 // rank-3 isotropic tensors are just the alternating symbol
 template <typename T>
 struct isotropic_tensor<T, 3, 3, 3>
 {
    MFEM_HOST_DEVICE constexpr T operator()(int i, int j, int k) const
    {
       return 0.5 * (i - j) * (j - k) * (k - i) * value;
    }
    T value;
 };

 // there are 3 linearly-independent rank-4 isotropic tensors,
 // so the general one will be some linear combination of them
 template <typename T, int m>
 struct isotropic_tensor<T, m, m, m, m>
 {
    T c1, c2, c3;

    MFEM_HOST_DEVICE constexpr T operator()(int i, int j, int k, int l) const
    {
       return c1 * (i == j) * (k == l)
              + c2 * ((i == k) * (j == l) + (i == l) * (j == k)) * 0.5
              + c3 * ((i == k) * (j == l) - (i == l) * (j == k)) * 0.5;
    }
 };

 template <int m> MFEM_HOST_DEVICE constexpr
 auto SymmetricIdentity() -> isotropic_tensor<double, m, m, m, m>
 {
    return {0.0, 1.0, 0.0};
 }

 template <int m>MFEM_HOST_DEVICE constexpr
 auto AntisymmetricIdentity() -> isotropic_tensor<double, m, m, m, m>
 {
    return {0.0, 0.0, 1.0};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator*(S scale,
                isotropic_tensor<T, m, m, m, m> I)
 -> isotropic_tensor<decltype(S {} * T{}), m, m, m, m>
 {
    return {I.c1 * scale, I.c2 * scale, I.c3 * scale};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator*(isotropic_tensor<S, m, m, m, m> I,
                T scale)
 -> isotropic_tensor<decltype(S {} * T{}), m, m, m, m>
 {
    return {I.c1 * scale, I.c2 * scale, I.c3 * scale};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator+(isotropic_tensor<S, m, m, m, m> I1,
                isotropic_tensor<T, m, m, m, m> I2)
 -> isotropic_tensor<decltype(S {} + T{}), m, m, m, m>
 {
    return {I1.c1 + I2.c1, I1.c2 + I2.c2, I1.c3 + I2.c3};
 }

 template <typename S, typename T, int m> MFEM_HOST_DEVICE constexpr
 auto operator-(isotropic_tensor<S, m, m, m, m> I1,
                isotropic_tensor<T, m, m, m, m> I2)
 -> isotropic_tensor<decltype(S {} - T{}), m, m, m, m>
 {
    return {I1.c1 - I2.c1, I1.c2 - I2.c2, I1.c3 - I2.c3};
 }

 template <typename S, typename T, int m, int... n> MFEM_HOST_DEVICE constexpr
 auto ddot(const isotropic_tensor<S, m, m, m, m>& I,
           const tensor<T, m, m>& A)
 -> tensor<decltype(S {} * T{}), m, m>
 {
    return I.c1 * tr(A) * Identity<m>() + I.c2 * sym(A) + I.c3 * antisym(A);
 }

 } // namespace internal

 } // namespace mfem

 #endif
mfem::operator/
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator/(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
Definition: auto.hpp:271

mfem::operator+
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator+(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
Definition: auto.hpp:238

mfem::f
double f(const Vector &xvec)
Definition: lor_mms.hpp:32

mfem
Definition: CodeDocumentation.dox:1

b
double b
Definition: lissajous.cpp:42

p
double p(const Vector &x, double t)
Definition: navier_mms.cpp:53

dim
int dim
Definition: ex24.cpp:53

dual.hpp
This file contains the declaration of a dual number class.

mfem::u
double u(const Vector &xvec)
Definition: lor_mms.hpp:24

mfem::operator*
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
Definition: mem_manager.cpp:125

mfem::operator-
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator-(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
Definition: auto.hpp:249