18 #ifndef MFEM_INTERNAL_TENSOR_HPP
19 #define MFEM_INTERNAL_TENSOR_HPP
22 #include <type_traits>
29 #if defined(__CUDACC__)
30 #if __CUDAVER__ >= 75000
31 #define MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma nv_exec_check_disable
33 #define MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING #pragma hd_warning_disable
36 #define MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
39 template <
typename T,
int... n>
45 template <
typename T >
49 static constexpr
int ndim = 1;
50 static constexpr
int first_dim = 0;
51 MFEM_HOST_DEVICE T& operator[](
int ) {
return values; }
52 MFEM_HOST_DEVICE
const T& operator[](
int )
const {
return values; }
53 MFEM_HOST_DEVICE T& operator()(
int ) {
return values; }
54 MFEM_HOST_DEVICE
const T& operator()(
int )
const {
return values; }
55 MFEM_HOST_DEVICE
operator T()
const {
return values; }
59 template <
typename T,
int n0 >
63 static constexpr
int ndim = 1;
64 static constexpr
int first_dim = n0;
65 MFEM_HOST_DEVICE T& operator[](
int i) {
return values[i]; }
66 MFEM_HOST_DEVICE
const T& operator[](
int i)
const {
return values[i]; }
67 MFEM_HOST_DEVICE T& operator()(
int i) {
return values[i]; }
68 MFEM_HOST_DEVICE
const T& operator()(
int i)
const {
return values[i]; }
72 template <
typename T,
int n0,
int n1 >
73 struct tensor<T, n0, n1>
76 static constexpr
int ndim = 2;
77 static constexpr
int first_dim = n0;
78 MFEM_HOST_DEVICE tensor< T, n1 >& operator[](
int i) {
return values[i]; }
79 MFEM_HOST_DEVICE
const tensor< T, n1 >& operator[](
int i)
const {
return values[i]; }
80 MFEM_HOST_DEVICE tensor< T, n1 >& operator()(
int i) {
return values[i]; }
81 MFEM_HOST_DEVICE
const tensor< T, n1 >& operator()(
int i)
const {
return values[i]; }
82 MFEM_HOST_DEVICE T& operator()(
int i,
int j) {
return values[i][j]; }
83 MFEM_HOST_DEVICE
const T& operator()(
int i,
int j)
const {
return values[i][j]; }
84 tensor < T, n1 > values[n0];
87 template <
typename T,
int n0,
int n1,
int n2 >
88 struct tensor<T, n0, n1, n2>
91 static constexpr
int ndim = 3;
92 static constexpr
int first_dim = n0;
93 MFEM_HOST_DEVICE tensor< T, n1, n2 >& operator[](
int i) {
return values[i]; }
94 MFEM_HOST_DEVICE
const tensor< T, n1, n2 >& operator[](
int i)
const {
return values[i]; }
95 MFEM_HOST_DEVICE tensor< T, n1, n2 >& operator()(
int i) {
return values[i]; }
96 MFEM_HOST_DEVICE
const tensor< T, n1, n2 >& operator()(
int i)
const {
return values[i]; }
97 MFEM_HOST_DEVICE tensor< T, n2 >& operator()(
int i,
int j) {
return values[i][j]; }
98 MFEM_HOST_DEVICE
const tensor< T, n2 >& operator()(
int i,
int j)
const {
return values[i][j]; }
99 MFEM_HOST_DEVICE T& operator()(
int i,
int j,
int k) {
return values[i][j][k]; }
100 MFEM_HOST_DEVICE
const T& operator()(
int i,
int j,
int k)
const {
return values[i][j][k]; }
101 tensor < T, n1, n2 > values[n0];
104 template <
typename T,
int n0,
int n1,
int n2,
int n3 >
105 struct tensor<T, n0, n1, n2, n3>
108 static constexpr
int ndim = 4;
109 static constexpr
int first_dim = n0;
110 MFEM_HOST_DEVICE tensor< T, n1, n2, n3 >& operator[](
int i) {
return values[i]; }
111 MFEM_HOST_DEVICE
const tensor< T, n1, n2, n3 >& operator[](
int i)
const {
return values[i]; }
112 MFEM_HOST_DEVICE tensor< T, n1, n2, n3 >& operator()(
int i) {
return values[i]; }
113 MFEM_HOST_DEVICE
const tensor< T, n1, n2, n3 >& operator()(
int i)
const {
return values[i]; }
114 MFEM_HOST_DEVICE tensor< T, n2, n3 >& operator()(
int i,
int j) {
return values[i][j]; }
115 MFEM_HOST_DEVICE
const tensor< T, n2, n3 >& operator()(
int i,
int j)
const {
return values[i][j]; }
116 MFEM_HOST_DEVICE tensor< T, n3 >& operator()(
int i,
int j,
int k) {
return values[i][j][k]; }
117 MFEM_HOST_DEVICE
const tensor< T, n3 >& operator()(
int i,
int j,
int k)
const {
return values[i][j][k]; }
118 MFEM_HOST_DEVICE T& operator()(
int i,
int j,
int k,
int l) {
return values[i][j][k][l]; }
119 MFEM_HOST_DEVICE
const T& operator()(
int i,
int j,
int k,
int l)
const {
return values[i][j][k][l]; }
120 tensor < T, n1, n2, n3 > values[n0];
123 template <
typename T,
int n0,
int n1,
int n2,
int n3,
int n4 >
124 struct tensor<T, n0, n1, n2, n3, n4>
127 static constexpr
int ndim = 5;
128 static constexpr
int first_dim = n0;
129 MFEM_HOST_DEVICE tensor< T, n1, n2, n3, n4 >& operator[](
int i) {
return values[i]; }
130 MFEM_HOST_DEVICE
const tensor< T, n1, n2, n3, n4 >& operator[](
int i)
const {
return values[i]; }
131 MFEM_HOST_DEVICE tensor< T, n1, n2, n3, n4 >& operator()(
int i) {
return values[i]; }
132 MFEM_HOST_DEVICE
const tensor< T, n1, n2, n3, n4 >& operator()(
int i)
const {
return values[i]; }
133 MFEM_HOST_DEVICE tensor< T, n2, n3, n4 >& operator()(
int i,
int j) {
return values[i][j]; }
134 MFEM_HOST_DEVICE
const tensor< T, n2, n3, n4 >& operator()(
int i,
135 int j)
const {
return values[i][j]; }
136 MFEM_HOST_DEVICE tensor< T, n3, n4>& operator()(
int i,
int j,
int k) {
return values[i][j][k]; }
137 MFEM_HOST_DEVICE
const tensor< T, n3, n4>& operator()(
int i,
int j,
138 int k)
const {
return values[i][j][k]; }
139 MFEM_HOST_DEVICE tensor< T, n4 >& operator()(
int i,
int j,
int k,
int l) {
return values[i][j][k][l]; }
140 MFEM_HOST_DEVICE
const tensor< T, n4 >& operator()(
int i,
int j,
int k,
141 int l)
const {
return values[i][j][k][l]; }
142 MFEM_HOST_DEVICE T& operator()(
int i,
int j,
int k,
int l,
int m) {
return values[i][j][k][l][m]; }
143 MFEM_HOST_DEVICE
const T& operator()(
int i,
int j,
int k,
int l,
int m)
const {
return values[i][j][k][l][m]; }
144 tensor < T, n1, n2, n3, n4 > values[n0];
153 MFEM_HOST_DEVICE
operator double() {
return 0.0; }
156 template <
typename T,
int... n>
157 MFEM_HOST_DEVICE
operator tensor<T, n...>()
159 return tensor<T, n...> {};
163 template <
typename... T>
164 MFEM_HOST_DEVICE zero operator()(T...)
170 template <
typename T>
171 MFEM_HOST_DEVICE zero operator=(T)
178 template <
typename T>
179 struct is_zero : std::false_type
185 struct is_zero<zero> : std::true_type
190 MFEM_HOST_DEVICE constexpr zero
operator+(zero, zero) {
return zero{}; }
193 template <
typename T>
194 MFEM_HOST_DEVICE constexpr T
operator+(zero, T other)
200 template <
typename T>
201 MFEM_HOST_DEVICE constexpr T
operator+(T other, zero)
209 MFEM_HOST_DEVICE constexpr zero
operator-(zero) {
return zero{}; }
212 MFEM_HOST_DEVICE constexpr zero
operator-(zero, zero) {
return zero{}; }
215 template <
typename T>
216 MFEM_HOST_DEVICE constexpr T
operator-(zero, T other)
222 template <
typename T>
223 MFEM_HOST_DEVICE constexpr T
operator-(T other, zero)
231 MFEM_HOST_DEVICE constexpr zero
operator*(zero, zero) {
return zero{}; }
234 template <
typename T>
235 MFEM_HOST_DEVICE constexpr zero
operator*(zero, T )
241 template <
typename T>
242 MFEM_HOST_DEVICE constexpr zero
operator*(T , zero)
248 template <
typename T>
249 MFEM_HOST_DEVICE constexpr zero
operator/(zero, T )
255 MFEM_HOST_DEVICE constexpr zero operator+=(zero, zero) {
return zero{}; }
258 MFEM_HOST_DEVICE constexpr zero operator-=(zero, zero) {
return zero{}; }
262 MFEM_HOST_DEVICE zero&
get(zero& x)
268 template <
typename T>
269 MFEM_HOST_DEVICE zero dot(
const T&, zero)
275 template <
typename T>
276 MFEM_HOST_DEVICE zero dot(zero,
const T&)
288 template <
typename T,
int n1,
int n2 = 1>
289 using reduced_tensor =
typename std::conditional<
290 (n1 == 1 && n2 == 1), T,
291 typename std::conditional<n1 == 1, tensor<T, n2>,
292 typename std::conditional<n2 == 1, tensor<T, n1>, tensor<T, n1, n2>
307 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
308 template <
typename lambda_type>
309 MFEM_HOST_DEVICE constexpr
auto make_tensor(lambda_type
f) ->
310 tensor<decltype(
f())>
326 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
327 template <
int n1,
typename lambda_type>
328 MFEM_HOST_DEVICE
auto make_tensor(lambda_type
f) ->
329 tensor<decltype(
f(n1)), n1>
331 using T = decltype(
f(n1));
333 for (
int i = 0; i < n1; i++)
352 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
353 template <
int n1,
int n2,
typename lambda_type>
354 MFEM_HOST_DEVICE
auto make_tensor(lambda_type
f) ->
355 tensor<decltype(
f(n1, n2)), n1, n2>
357 using T = decltype(
f(n1, n2));
358 tensor<T, n1, n2> A{};
359 for (
int i = 0; i < n1; i++)
361 for (
int j = 0; j < n2; j++)
382 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
383 template <
int n1,
int n2,
int n3,
typename lambda_type>
384 MFEM_HOST_DEVICE
auto make_tensor(lambda_type
f) ->
385 tensor<decltype(
f(n1, n2, n3)), n1, n2, n3>
387 using T = decltype(
f(n1, n2, n3));
388 tensor<T, n1, n2, n3> A{};
389 for (
int i = 0; i < n1; i++)
391 for (
int j = 0; j < n2; j++)
393 for (
int k = 0; k < n3; k++)
395 A(i, j, k) =
f(i, j, k);
416 MFEM_SUPPRESS_NVCC_HOSTDEVICE_WARNING
417 template <
int n1,
int n2,
int n3,
int n4,
typename lambda_type>
418 MFEM_HOST_DEVICE
auto make_tensor(lambda_type
f) ->
419 tensor<decltype(
f(n1, n2, n3, n4)), n1, n2, n3, n4>
421 using T = decltype(
f(n1, n2, n3, n4));
422 tensor<T, n1, n2, n3, n4> A{};
423 for (
int i = 0; i < n1; i++)
425 for (
int j = 0; j < n2; j++)
427 for (
int k = 0; k < n3; k++)
429 for (
int l = 0; l < n4; l++)
431 A(i, j, k, l) =
f(i, j, k, l);
447 template <
typename S,
typename T,
int... n>
448 MFEM_HOST_DEVICE
auto operator+(
const tensor<S, n...>& A,
449 const tensor<T, n...>& B) ->
450 tensor<decltype(S {} + T{}), n...>
452 tensor<decltype(S{} + T{}), n...> C{};
453 for (
int i = 0; i < tensor<T, n...>::first_dim; i++)
466 template <
typename T,
int... n>
467 MFEM_HOST_DEVICE tensor<T, n...> operator-(
const tensor<T, n...>& A)
470 for (
int i = 0; i < tensor<T, n...>::first_dim; i++)
485 template <
typename S,
typename T,
int... n>
486 MFEM_HOST_DEVICE
auto operator-(
const tensor<S, n...>& A,
487 const tensor<T, n...>& B) ->
488 tensor<decltype(S {} + T{}), n...>
490 tensor<decltype(S{} + T{}), n...> C{};
491 for (
int i = 0; i < tensor<T, n...>::first_dim; i++)
506 template <
typename S,
typename T,
int... n,
507 typename =
typename std::enable_if<std::is_arithmetic<S>::value ||
508 is_dual_number<S>::value>::type>
509 MFEM_HOST_DEVICE
auto operator*(S scale,
const tensor<T, n...>& A) ->
510 tensor<decltype(S {} * T{}), n...>
512 tensor<decltype(S{} * T{}), n...> C{};
513 for (
int i = 0; i < tensor<T, n...>::first_dim; i++)
528 template <
typename S,
typename T,
int... n,
529 typename =
typename std::enable_if<std::is_arithmetic<S>::value ||
530 is_dual_number<S>::value>::type>
531 MFEM_HOST_DEVICE
auto operator*(
const tensor<T, n...>& A, S scale) ->
532 tensor<decltype(T {} * S{}), n...>
534 tensor<decltype(T{} * S{}), n...> C{};
535 for (
int i = 0; i < tensor<T, n...>::first_dim; i++)
550 template <
typename S,
typename T,
int... n,
551 typename =
typename std::enable_if<std::is_arithmetic<S>::value ||
552 is_dual_number<S>::value>::type>
553 MFEM_HOST_DEVICE
auto operator/(S scale,
const tensor<T, n...>& A) ->
554 tensor<decltype(S {} * T{}), n...>
556 tensor<decltype(S{} * T{}), n...> C{};
557 for (
int i = 0; i < tensor<T, n...>::first_dim; i++)
572 template <
typename S,
typename T,
int... n,
573 typename =
typename std::enable_if<std::is_arithmetic<S>::value ||
574 is_dual_number<S>::value>::type>
575 MFEM_HOST_DEVICE
auto operator/(
const tensor<T, n...>& A, S scale) ->
576 tensor<decltype(T {} * S{}), n...>
578 tensor<decltype(T{} * S{}), n...> C{};
579 for (
int i = 0; i < tensor<T, n...>::first_dim; i++)
594 template <
typename S,
typename T,
int... n> MFEM_HOST_DEVICE
595 tensor<S, n...>& operator+=(tensor<S, n...>& A,
596 const tensor<T, n...>& B)
598 for (
int i = 0; i < tensor<S, n...>::first_dim; i++)
611 template <
typename T> MFEM_HOST_DEVICE
612 tensor<T>& operator+=(tensor<T>& A,
const T& B)
614 return A.values += B;
623 template <
typename T> MFEM_HOST_DEVICE
624 tensor<T, 1>& operator+=(tensor<T, 1>& A,
const T& B)
626 return A.values += B;
635 template <
typename T> MFEM_HOST_DEVICE
636 tensor<T, 1, 1>& operator+=(tensor<T, 1, 1>& A,
const T& B)
638 return A.values += B;
647 template <
typename T,
int... n> MFEM_HOST_DEVICE
648 tensor<T, n...>& operator+=(tensor<T, n...>& A, zero)
661 template <
typename S,
typename T,
int... n> MFEM_HOST_DEVICE
662 tensor<S, n...>& operator-=(tensor<S, n...>& A,
const tensor<T, n...>& B)
664 for (
int i = 0; i < tensor<S, n...>::first_dim; i++)
677 template <
typename T,
int... n> MFEM_HOST_DEVICE
678 constexpr tensor<T, n...>& operator-=(tensor<T, n...>& A, zero)
692 template <
typename S,
typename T> MFEM_HOST_DEVICE
693 auto outer(S A, T B) -> decltype(A * B)
695 static_assert(std::is_arithmetic<S>::value && std::is_arithmetic<T>::value,
696 "outer product types must be tensor or arithmetic_type");
704 template <
typename S,
typename T,
int n> MFEM_HOST_DEVICE
705 tensor<decltype(S{} * T{}), n> outer(S A, tensor<T, n> B)
707 static_assert(std::is_arithmetic<S>::value,
708 "outer product types must be tensor or arithmetic_type");
709 tensor<decltype(S{} * T{}), n> AB{};
710 for (
int i = 0; i < n; i++)
721 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE
722 tensor<decltype(S{} * T{}), m> outer(
const tensor<S, m>& A, T B)
724 static_assert(std::is_arithmetic<T>::value,
725 "outer product types must be tensor or arithmetic_type");
726 tensor<decltype(S{} * T{}), m> AB{};
727 for (
int i = 0; i < m; i++)
738 template <
typename T,
int n> MFEM_HOST_DEVICE
739 zero outer(zero,
const tensor<T, n>&)
748 template <
typename T,
int n> MFEM_HOST_DEVICE
749 zero outer(
const tensor<T, n>&, zero)
759 template <
typename S,
typename T,
int m,
int n> MFEM_HOST_DEVICE
760 tensor<decltype(S{} * T{}), m, n> outer(S A,
const tensor<T, m, n>& B)
762 static_assert(std::is_arithmetic<S>::value,
763 "outer product types must be tensor or arithmetic_type");
764 tensor<decltype(S{} * T{}), m, n> AB{};
765 for (
int i = 0; i < m; i++)
767 for (
int j = 0; j < n; j++)
769 AB[i][j] = A * B[i][j];
779 template <
typename S,
typename T,
int m,
int n> MFEM_HOST_DEVICE
780 tensor<decltype(S{} * T{}), m, n> outer(
const tensor<S, m>& A,
781 const tensor<T, n>& B)
783 tensor<decltype(S{} * T{}), m, n> AB{};
784 for (
int i = 0; i < m; i++)
786 for (
int j = 0; j < n; j++)
788 AB[i][j] = A[i] * B[j];
799 template <
typename S,
typename T,
int m,
int n> MFEM_HOST_DEVICE
800 tensor<decltype(S{} * T{}), m, n> outer(
const tensor<S, m, n>& A, T B)
802 static_assert(std::is_arithmetic<T>::value,
803 "outer product types must be tensor or arithmetic_type");
804 tensor<decltype(S{} * T{}), m, n> AB{};
805 for (
int i = 0; i < m; i++)
807 for (
int j = 0; j < n; j++)
809 AB[i][j] = A[i][j] * B;
820 template <
typename S,
typename T,
int m,
int n,
int p> MFEM_HOST_DEVICE
821 tensor<decltype(S{} * T{}), m, n,
p> outer(
const tensor<S, m, n>& A,
822 const tensor<T, p>& B)
824 tensor<decltype(S{} * T{}), m, n,
p> AB{};
825 for (
int i = 0; i < m; i++)
827 for (
int j = 0; j < n; j++)
829 for (
int k = 0; k <
p; k++)
831 AB[i][j][k] = A[i][j] * B[k];
843 template <
typename S,
typename T,
int m,
int n,
int p> MFEM_HOST_DEVICE
844 tensor<decltype(S{} * T{}), m, n,
p> outer(
const tensor<S, m>& A,
845 const tensor<T, n, p>& B)
847 tensor<decltype(S{} * T{}), m, n,
p> AB{};
848 for (
int i = 0; i < m; i++)
850 for (
int j = 0; j < n; j++)
852 for (
int k = 0; k <
p; k++)
854 AB[i][j][k] = A[i] * B[j][k];
865 template <
typename S,
typename T,
int m,
int n,
int p,
int q> MFEM_HOST_DEVICE
866 tensor<decltype(S{} * T{}), m, n,
p, q> outer(
const tensor<S, m, n>& A,
867 const tensor<T, p, q>& B)
869 tensor<decltype(S{} * T{}), m, n,
p, q> AB{};
870 for (
int i = 0; i < m; i++)
872 for (
int j = 0; j < n; j++)
874 for (
int k = 0; k <
p; k++)
876 for (
int l = 0; l < q; l++)
878 AB[i][j][k][l] = A[i][j] * B[k][l];
895 template <
typename S,
typename T,
int m,
int n> MFEM_HOST_DEVICE
896 auto inner(
const tensor<S, m, n>& A,
const tensor<T, m, n>& B) ->
899 decltype(S{} * T{}) sum{};
900 for (
int i = 0; i < m; i++)
902 for (
int j = 0; j < n; j++)
904 sum += A[i][j] * B[i][j];
919 template <
typename S,
typename T,
int m,
int n,
int p> MFEM_HOST_DEVICE
920 auto dot(
const tensor<S, m, n>& A,
921 const tensor<T, n, p>& B) ->
922 tensor<decltype(S {} * T{}), m,
p>
924 tensor<decltype(S{} * T{}), m,
p> AB{};
925 for (
int i = 0; i < m; i++)
927 for (
int j = 0; j <
p; j++)
929 for (
int k = 0; k < n; k++)
931 AB[i][j] = AB[i][j] + A[i][k] * B[k][j];
942 template <
typename S,
typename T,
int m,
int n> MFEM_HOST_DEVICE
943 auto dot(
const tensor<S, m>& A,
const tensor<T, m, n>& B) ->
944 tensor<decltype(S {} * T{}), n>
946 tensor<decltype(S{} * T{}), n> AB{};
947 for (
int i = 0; i < n; i++)
949 for (
int j = 0; j < m; j++)
951 AB[i] = AB[i] + A[j] * B[j][i];
961 template <
typename S,
typename T,
int m,
int n> MFEM_HOST_DEVICE
962 auto dot(
const tensor<S, m, n>& A,
const tensor<T, n>& B) ->
963 tensor<decltype(S {} * T{}), m>
965 tensor<decltype(S{} * T{}), m> AB{};
966 for (
int i = 0; i < m; i++)
968 for (
int j = 0; j < n; j++)
970 AB[i] = AB[i] + A[i][j] * B[j];
980 template <
typename S,
typename T,
int m,
int n,
int p> MFEM_HOST_DEVICE
981 auto dot(
const tensor<S, m, n, p>& A,
const tensor<T, p>& B) ->
982 tensor<decltype(S {} * T{}), m, n>
984 tensor<decltype(S{} * T{}), m, n> AB{};
985 for (
int i = 0; i < m; i++)
987 for (
int j = 0; j < n; j++)
989 for (
int k = 0; k <
p; k++)
991 AB[i][j] += A[i][j][k] * B[k];
1042 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE
1043 auto dot(
const tensor<S, m>& A,
const tensor<T, m>& B) ->
1044 decltype(S {} * T{})
1046 decltype(S{} * T{}) AB{};
1047 for (
int i = 0; i < m; i++)
1054 template <
typename S,
typename T,
int m,
int... n> MFEM_HOST_DEVICE
1055 auto dot(
const tensor<S, m>& A,
const tensor<T, m, n...>& B) ->
1056 tensor<decltype(S {} * T{}), n...>
1058 constexpr
int dimensions[] = {n...};
1059 tensor<decltype(S{} * T{}), n...> AB{};
1060 for (
int i = 0; i < dimensions[0]; i++)
1062 for (
int j = 0; j < m; j++)
1064 AB[i] = AB[i] + A[j] * B[j][i];
1074 template <
typename S,
typename T,
typename U,
int m,
int n> MFEM_HOST_DEVICE
1075 auto dot(
const tensor<S, m>&
u,
const tensor<T, m, n>& A,
1076 const tensor<U, n>& v) ->
1077 decltype(S {} * T{} * U{})
1079 decltype(S{} * T{} * U{}) uAv{};
1080 for (
int i = 0; i < m; i++)
1082 for (
int j = 0; j < n; j++)
1084 uAv +=
u[i] * A[i][j] * v[j];
1101 template <
typename S,
typename T,
int m,
int n,
int p,
int q> MFEM_HOST_DEVICE
1102 auto ddot(
const tensor<S, m, n, p, q>& A,
const tensor<T, p, q>& B) ->
1103 tensor<decltype(S {} * T{}), m, n>
1105 tensor<decltype(S{} * T{}), m, n> AB{};
1106 for (
int i = 0; i < m; i++)
1108 for (
int j = 0; j < n; j++)
1110 for (
int k = 0; k <
p; k++)
1112 for (
int l = 0; l < q; l++)
1114 AB[i][j] += A[i][j][k][l] * B[k][l];
1127 template <
typename S,
typename T,
int m,
int n,
int p> MFEM_HOST_DEVICE
1128 auto ddot(
const tensor<S, m, n, p>& A,
const tensor<T, n, p>& B) ->
1129 tensor<decltype(S {} * T{}), m>
1131 tensor<decltype(S{} * T{}), m> AB{};
1132 for (
int i = 0; i < m; i++)
1134 for (
int j = 0; j < n; j++)
1136 for (
int k = 0; k <
p; k++)
1138 AB[i] += A[i][j][k] * B[j][k];
1149 template <
typename S,
typename T,
int m,
int n> MFEM_HOST_DEVICE
1150 auto ddot(
const tensor<S, m, n>& A,
const tensor<T, m, n>& B) ->
1151 decltype(S {} * T{})
1153 decltype(S{} * T{}) AB{};
1154 for (
int i = 0; i < m; i++)
1156 for (
int j = 0; j < n; j++)
1158 AB += A[i][j] * B[i][j];
1167 template <
typename S,
typename T,
int... m,
int... n> MFEM_HOST_DEVICE
1168 auto operator*(
const tensor<S, m...>& A,
const tensor<T, n...>& B) ->
1178 template <
typename T,
int m> MFEM_HOST_DEVICE
1179 T sqnorm(
const tensor<T, m>& A)
1182 for (
int i = 0; i < m; i++)
1184 total += A[i] * A[i];
1193 template <
typename T,
int m,
int n> MFEM_HOST_DEVICE
1194 T sqnorm(
const tensor<T, m, n>& A)
1197 for (
int i = 0; i < m; i++)
1199 for (
int j = 0; j < n; j++)
1201 total += A[i][j] * A[i][j];
1211 template <
typename T,
int... n> MFEM_HOST_DEVICE
1212 T norm(
const tensor<T, n...>& A)
1214 return std::sqrt(sqnorm(A));
1222 template <
typename T,
int... n> MFEM_HOST_DEVICE
1223 auto normalize(
const tensor<T, n...>& A) ->
1224 decltype(A / norm(A))
1234 template <
typename T,
int n> MFEM_HOST_DEVICE
1235 T tr(
const tensor<T, n, n>& A)
1238 for (
int i = 0; i < n; i++)
1240 trA = trA + A[i][i];
1250 template <
typename T,
int n> MFEM_HOST_DEVICE
1251 tensor<T, n, n> sym(
const tensor<T, n, n>& A)
1253 tensor<T, n, n> symA{};
1254 for (
int i = 0; i < n; i++)
1256 for (
int j = 0; j < n; j++)
1258 symA[i][j] = 0.5 * (A[i][j] + A[j][i]);
1271 template <
typename T,
int n> MFEM_HOST_DEVICE
1272 tensor<T, n, n> dev(
const tensor<T, n, n>& A)
1276 for (
int i = 0; i < n; i++)
1278 devA[i][i] -= trA / n;
1288 MFEM_HOST_DEVICE tensor<double, dim, dim> Identity()
1290 tensor<double, dim, dim> I{};
1291 for (
int i = 0; i <
dim; i++)
1293 for (
int j = 0; j <
dim; j++)
1305 template <
typename T,
int m,
int n> MFEM_HOST_DEVICE
1306 tensor<T, n, m> transpose(
const tensor<T, m, n>& A)
1308 tensor<T, n, m> AT{};
1309 for (
int i = 0; i < n; i++)
1311 for (
int j = 0; j < m; j++)
1323 template <
typename T> MFEM_HOST_DEVICE
1324 T det(
const tensor<T, 2, 2>& A)
1326 return A[0][0] * A[1][1] - A[0][1] * A[1][0];
1329 template <
typename T> MFEM_HOST_DEVICE
1330 T det(
const tensor<T, 3, 3>& A)
1332 return A[0][0] * A[1][1] * A[2][2] + A[0][1] * A[1][2] * A[2][0] + A[0][2] *
1334 A[0][0] * A[1][2] * A[2][1] - A[0][1] * A[1][0] * A[2][2] - A[0][2] * A[1][1] *
1346 template <
int n> MFEM_HOST_DEVICE
1347 bool is_symmetric(tensor<double, n, n> A,
double abs_tolerance = 1.0e-8)
1349 for (
int i = 0; i < n; ++i)
1351 for (
int j = i + 1; j < n; ++j)
1353 if (std::abs(A(i, j) - A(j, i)) > abs_tolerance)
1370 inline MFEM_HOST_DEVICE
1371 bool is_symmetric_and_positive_definite(tensor<double, 2, 2> A)
1373 if (!is_symmetric(A))
1388 inline MFEM_HOST_DEVICE
1389 bool is_symmetric_and_positive_definite(tensor<double, 3, 3> A)
1391 if (!is_symmetric(A))
1399 auto subtensor = make_tensor<2, 2>([A](
int i,
int j) {
return A(i, j); });
1400 if (!is_symmetric_and_positive_definite(subtensor))
1413 template <
typename T,
int n> MFEM_HOST_DEVICE
1414 tensor<T, n> linear_solve(tensor<T, n, n> A,
const tensor<T, n>
b)
1416 auto abs = [](
double x) {
return (x < 0) ? -x : x; };
1417 auto swap_vector = [](tensor<T, n>& x, tensor<T, n>& y)
1423 auto swap_scalar = [](T& x, T& y)
1431 tensor<double, n> x{};
1433 for (
int i = 0; i < n; i++)
1436 double max_val = abs(A[i][i]);
1439 for (
int j = i + 1; j < n; j++)
1441 if (abs(A[j][i]) > max_val)
1443 max_val = abs(A[j][i]);
1448 swap_scalar(b[max_row], b[i]);
1449 swap_vector(A[max_row], A[i]);
1452 for (
int j = i + 1; j < n; j++)
1454 double c = -A[j][i] / A[i][i];
1462 for (
int i = n - 1; i >= 0; i--)
1464 x[i] = b[i] / A[i][i];
1465 for (
int j = i - 1; j >= 0; j--)
1467 b[j] -= A[j][i] * x[i];
1479 inline MFEM_HOST_DEVICE tensor<double, 2, 2> inv(
const tensor<double, 2, 2>& A)
1481 double inv_detA(1.0 / det(A));
1483 tensor<double, 2, 2> invA{};
1485 invA[0][0] = A[1][1] * inv_detA;
1486 invA[0][1] = -A[0][1] * inv_detA;
1487 invA[1][0] = -A[1][0] * inv_detA;
1488 invA[1][1] = A[0][0] * inv_detA;
1497 inline MFEM_HOST_DEVICE tensor<double, 3, 3> inv(
const tensor<double, 3, 3>& A)
1499 double inv_detA(1.0 / det(A));
1501 tensor<double, 3, 3> invA{};
1503 invA[0][0] = (A[1][1] * A[2][2] - A[1][2] * A[2][1]) * inv_detA;
1504 invA[0][1] = (A[0][2] * A[2][1] - A[0][1] * A[2][2]) * inv_detA;
1505 invA[0][2] = (A[0][1] * A[1][2] - A[0][2] * A[1][1]) * inv_detA;
1506 invA[1][0] = (A[1][2] * A[2][0] - A[1][0] * A[2][2]) * inv_detA;
1507 invA[1][1] = (A[0][0] * A[2][2] - A[0][2] * A[2][0]) * inv_detA;
1508 invA[1][2] = (A[0][2] * A[1][0] - A[0][0] * A[1][2]) * inv_detA;
1509 invA[2][0] = (A[1][0] * A[2][1] - A[1][1] * A[2][0]) * inv_detA;
1510 invA[2][1] = (A[0][1] * A[2][0] - A[0][0] * A[2][1]) * inv_detA;
1511 invA[2][2] = (A[0][0] * A[1][1] - A[0][1] * A[1][0]) * inv_detA;
1520 template <
typename T,
int n> MFEM_HOST_DEVICE
1521 tensor<T, n, n> inv(
const tensor<T, n, n>& A)
1523 auto abs = [](
double x) {
return (x < 0) ? -x : x; };
1524 auto swap = [](tensor<T, n>& x, tensor<T, n>& y)
1531 tensor<double, n, n> B = Identity<n>();
1533 for (
int i = 0; i < n; i++)
1536 double max_val = abs(A[i][i]);
1539 for (
int j = i + 1; j < n; j++)
1541 if (abs(A[j][i]) > max_val)
1543 max_val = abs(A[j][i]);
1548 swap(B[max_row], B[i]);
1549 swap(A[max_row], A[i]);
1552 for (
int j = i + 1; j < n; j++)
1556 double c = -A[j][i] / A[i][i];
1565 for (
int i = n - 1; i >= 0; i--)
1567 B[i] = B[i] / A[i][i];
1568 for (
int j = i - 1; j >= 0; j--)
1572 B[j] -= A[j][i] * B[i];
1589 template <
typename value_type,
typename gradient_type,
int n> MFEM_HOST_DEVICE
1590 dual<value_type, gradient_type> inv(
1591 tensor<dual<value_type, gradient_type>, n, n> A)
1593 auto invA = inv(get_value(A));
1594 return make_tensor<n, n>([&](
int i,
int j)
1596 auto value = invA[i][j];
1597 gradient_type gradient{};
1598 for (
int k = 0; k < n; k++)
1600 for (
int l = 0; l < n; l++)
1602 gradient -= invA[i][k] * A[k][l].gradient * invA[l][j];
1605 return dual<value_type, gradient_type> {value, gradient};
1617 template <
typename T,
int... n>
1618 std::ostream& operator<<(std::ostream& os,
const tensor<T, n...>& A)
1621 for (
int i = 1; i < tensor<T, n...>::first_dim; i++)
1633 template <
int n> MFEM_HOST_DEVICE
1634 tensor<double, n> chop(
const tensor<double, n>& A)
1637 for (
int i = 0; i < n; i++)
1639 if (copy[i] * copy[i] < 1.0e-20)
1648 template <
int m,
int n> MFEM_HOST_DEVICE
1649 tensor<double, m, n> chop(
const tensor<double, m, n>& A)
1652 for (
int i = 0; i < m; i++)
1654 for (
int j = 0; j < n; j++)
1656 if (copy[i][j] * copy[i][j] < 1.0e-20)
1668 template <
typename T1,
typename T2>
1671 template <
int... m,
int... n>
1672 struct outer_prod<tensor<double, m...>, tensor<double, n...>>
1674 using type = tensor<double, m..., n...>;
1678 struct outer_prod<double, tensor<double, n...>>
1680 using type = tensor<double, n...>;
1684 struct outer_prod<tensor<double, n...>, double>
1686 using type = tensor<double, n...>;
1690 struct outer_prod<double, double>
1692 using type = tensor<double>;
1695 template <
typename T>
1696 struct outer_prod<zero, T>
1701 template <
typename T>
1702 struct outer_prod<T, zero>
1715 template <
typename T1,
typename T2>
1716 using outer_product_t =
typename detail::outer_prod<T1, T2>::type;
1722 inline MFEM_HOST_DEVICE zero get_gradient(
double ) {
return zero{}; }
1730 MFEM_HOST_DEVICE zero get_gradient(
const tensor<double, n...>& )
1738 inline MFEM_HOST_DEVICE zero chain_rule(
const zero ,
1739 const zero ) {
return zero{}; }
1745 template <
typename T>
1746 MFEM_HOST_DEVICE zero chain_rule(
const zero ,
1756 template <
typename T>
1757 MFEM_HOST_DEVICE zero chain_rule(
const T ,
1767 inline MFEM_HOST_DEVICE
double chain_rule(
const double df_dx,
1768 const double dx) {
return df_dx * dx; }
1775 MFEM_HOST_DEVICE
auto chain_rule(
const tensor<double, n...>& df_dx,
1777 decltype(df_dx * dx)
1782 template <
int n>
struct always_false : std::false_type { };
1784 template <
typename T,
int... n>
struct isotropic_tensor;
1786 template <
typename T,
int n>
1787 struct isotropic_tensor<T, n>
1789 static_assert(always_false<n> {},
1790 "error: there is no such thing as a rank-1 isotropic tensor!");
1794 template <
typename T,
int m>
1795 struct isotropic_tensor<T, m, m>
1797 MFEM_HOST_DEVICE constexpr T operator()(
int i,
int j)
const
1799 return (i == j) * value;
1805 MFEM_HOST_DEVICE constexpr isotropic_tensor<double, m, m> IsotropicIdentity()
1807 return isotropic_tensor<double, m, m> {1.0};
1810 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
1812 const isotropic_tensor<T, m, m> & I)
1813 -> isotropic_tensor<decltype(S {} * T{}), m, m>
1815 return {I.value * scale};
1818 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
1819 auto operator*(
const isotropic_tensor<T, m, m> & I,
1821 -> isotropic_tensor<decltype(S {}, T{}), m, m>
1823 return {I.value * scale};
1826 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
1827 auto operator+(
const isotropic_tensor<S, m, m>& I1,
1828 const isotropic_tensor<T, m, m>& I2)
1829 -> isotropic_tensor<decltype(S {} + T{}), m, m>
1831 return {I1.value + I2.value};
1834 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
1835 auto operator-(
const isotropic_tensor<S, m, m>& I1,
1836 const isotropic_tensor<T, m, m>& I2)
1837 -> isotropic_tensor<decltype(S {} - T{}), m, m>
1839 return {I1.value - I2.value};
1842 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE
1843 auto operator+(
const isotropic_tensor<S, m, m>& I,
1844 const tensor<T, m, m>& A)
1845 -> tensor<decltype(S {} + T{}), m, m>
1847 tensor<decltype(S{} + T{}), m, m> output{};
1848 for (
int i = 0; i < m; i++)
1850 for (
int j = 0; j < m; j++)
1852 output[i][j] = I.value * (i == j) + A[i][j];
1858 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE
1859 auto operator+(
const tensor<S, m, m>& A,
1860 const isotropic_tensor<T, m, m>& I)
1861 -> tensor<decltype(S {} + T{}), m, m>
1863 tensor<decltype(S{} + T{}), m, m> output{};
1864 for (
int i = 0; i < m; i++)
1866 for (
int j = 0; j < m; j++)
1868 output[i][j] = A[i][j] + I.value * (i == j);
1874 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE
1875 auto operator-(
const isotropic_tensor<S, m, m>& I,
1876 const tensor<T, m, m>& A)
1877 -> tensor<decltype(S {} - T{}), m, m>
1879 tensor<decltype(S{} - T{}), m, m> output{};
1880 for (
int i = 0; i < m; i++)
1882 for (
int j = 0; j < m; j++)
1884 output[i][j] = I.value * (i == j) - A[i][j];
1890 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE
1891 auto operator-(
const tensor<S, m, m>& A,
1892 const isotropic_tensor<T, m, m>& I)
1893 -> tensor<decltype(S {} - T{}), m, m>
1895 tensor<decltype(S{} - T{}), m, m> output{};
1896 for (
int i = 0; i < m; i++)
1898 for (
int j = 0; j < m; j++)
1900 output[i][j] = A[i][j] - I.value * (i == j);
1906 template <
typename S,
typename T,
int m,
int... n> MFEM_HOST_DEVICE constexpr
1907 auto dot(
const isotropic_tensor<S, m, m>& I,
1908 const tensor<T, m, n...>& A)
1909 -> tensor<decltype(S {} * T{}), m, n...>
1914 template <
typename S,
typename T,
int m,
int... n> MFEM_HOST_DEVICE
1915 auto dot(
const tensor<S, n...>& A,
1916 const isotropic_tensor<T, m, m> & I)
1917 -> tensor<decltype(S {} * T{}), n...>
1919 constexpr
int dimensions[
sizeof...(n)] = {n...};
1920 static_assert(dimensions[
sizeof...(n) - 1] == m,
"n-1 != m");
1924 template <
typename S,
typename T,
int m,
int... n> MFEM_HOST_DEVICE constexpr
1925 auto ddot(
const isotropic_tensor<S, m, m>& I,
1926 const tensor<T, m, m>& A)
1927 -> decltype(S {} * T{})
1929 return I.value * tr(A);
1932 template <
typename T,
int m> MFEM_HOST_DEVICE constexpr
1933 auto sym(
const isotropic_tensor<T, m, m>& I) -> isotropic_tensor<T, m, m>
1938 template <
typename T,
int m> MFEM_HOST_DEVICE constexpr
1939 auto antisym(
const isotropic_tensor<T, m, m>&) -> zero
1944 template <
typename T,
int m> MFEM_HOST_DEVICE constexpr
1945 auto tr(
const isotropic_tensor<T, m, m>& I) -> decltype(T {} * m)
1950 template <
typename T,
int m> MFEM_HOST_DEVICE constexpr
1951 auto transpose(
const isotropic_tensor<T, m, m>& I) -> isotropic_tensor<T, m, m>
1956 template <
typename T,
int m> MFEM_HOST_DEVICE constexpr
1957 auto det(
const isotropic_tensor<T, m, m>& I) -> T
1959 return std::pow(I.value, m);
1962 template <
typename T,
int m> MFEM_HOST_DEVICE constexpr
1963 auto norm(
const isotropic_tensor<T, m, m>& I) -> T
1965 return sqrt(I.value * I.value * m);
1968 template <
typename T,
int m> MFEM_HOST_DEVICE constexpr
1969 auto sqnorm(
const isotropic_tensor<T, m, m>& I) -> T
1971 return I.value * I.value * m;
1975 template <
typename T>
1976 struct isotropic_tensor<T, 3, 3, 3>
1978 MFEM_HOST_DEVICE constexpr T operator()(
int i,
int j,
int k)
const
1980 return 0.5 * (i - j) * (j - k) * (k - i) * value;
1987 template <
typename T,
int m>
1988 struct isotropic_tensor<T, m, m, m, m>
1992 MFEM_HOST_DEVICE constexpr T operator()(
int i,
int j,
int k,
int l)
const
1994 return c1 * (i == j) * (k == l)
1995 + c2 * ((i == k) * (j == l) + (i == l) * (j == k)) * 0.5
1996 + c3 * ((i == k) * (j == l) - (i == l) * (j == k)) * 0.5;
2000 template <
int m> MFEM_HOST_DEVICE constexpr
2001 auto SymmetricIdentity() -> isotropic_tensor<double, m, m, m, m>
2003 return {0.0, 1.0, 0.0};
2006 template <
int m>MFEM_HOST_DEVICE constexpr
2007 auto AntisymmetricIdentity() -> isotropic_tensor<double, m, m, m, m>
2009 return {0.0, 0.0, 1.0};
2012 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
2014 isotropic_tensor<T, m, m, m, m> I)
2015 -> isotropic_tensor<decltype(S {} * T{}), m, m, m, m>
2017 return {I.c1 * scale, I.c2 * scale, I.c3 * scale};
2020 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
2021 auto operator*(isotropic_tensor<S, m, m, m, m> I,
2023 -> isotropic_tensor<decltype(S {} * T{}), m, m, m, m>
2025 return {I.c1 * scale, I.c2 * scale, I.c3 * scale};
2028 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
2029 auto operator+(isotropic_tensor<S, m, m, m, m> I1,
2030 isotropic_tensor<T, m, m, m, m> I2)
2031 -> isotropic_tensor<decltype(S {} + T{}), m, m, m, m>
2033 return {I1.c1 + I2.c1, I1.c2 + I2.c2, I1.c3 + I2.c3};
2036 template <
typename S,
typename T,
int m> MFEM_HOST_DEVICE constexpr
2037 auto operator-(isotropic_tensor<S, m, m, m, m> I1,
2038 isotropic_tensor<T, m, m, m, m> I2)
2039 -> isotropic_tensor<decltype(S {} - T{}), m, m, m, m>
2041 return {I1.c1 - I2.c1, I1.c2 - I2.c2, I1.c3 - I2.c3};
2044 template <
typename S,
typename T,
int m,
int... n> MFEM_HOST_DEVICE constexpr
2045 auto ddot(
const isotropic_tensor<S, m, m, m, m>& I,
2046 const tensor<T, m, m>& A)
2047 -> tensor<decltype(S {} * T{}), m, m>
2049 return I.c1 * tr(A) * Identity<m>() + I.c2 * sym(A) + I.c3 * antisym(A);
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator/(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator+(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
double f(const Vector &xvec)
double p(const Vector &x, double t)
This file contains the declaration of a dual number class.
double u(const Vector &xvec)
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator-(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)