12#ifndef MFEM_TEMPLATE_TENSOR
13#define MFEM_TEMPLATE_TENSOR
38 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
40 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
43 MFEM_STATIC_ASSERT(A_layout_t::rank == 1,
"invalid rank");
44 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
51 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
54 static void AssignHD(
const A_layout_t &A_layout, A_data_t &A_data,
57 MFEM_STATIC_ASSERT(A_layout_t::rank == 1,
"invalid rank");
58 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
66 typename A_layout_t,
typename A_data_t,
67 typename B_layout_t,
typename B_data_t>
68 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
69 const B_layout_t &B_layout,
const B_data_t &B_data)
71 MFEM_STATIC_ASSERT(A_layout_t::rank == 1 && B_layout_t::rank == 1,
73 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1,
74 "invalid dimensions");
75 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
86 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
88 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
91 MFEM_STATIC_ASSERT(A_layout_t::rank == 2,
"invalid rank");
92 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
94 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
102 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
105 static void AssignHD(
const A_layout_t &A_layout, A_data_t &A_data,
108 MFEM_STATIC_ASSERT(A_layout_t::rank == 2,
"invalid rank");
109 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
111 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
120 typename A_layout_t,
typename A_data_t,
121 typename B_layout_t,
typename B_data_t>
122 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
123 const B_layout_t &B_layout,
const B_data_t &B_data)
125 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
127 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
128 A_layout_t::dim_2 == B_layout_t::dim_2,
129 "invalid dimensions");
130 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
132 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
135 B_data[B_layout.ind(i1,i2)]);
145 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
147 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
150 MFEM_STATIC_ASSERT(A_layout_t::rank == 3,
"invalid rank");
151 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
153 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
155 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
165 typename A_layout_t,
typename A_data_t,
166 typename B_layout_t,
typename B_data_t>
167 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
168 const B_layout_t &B_layout,
const B_data_t &B_data)
170 MFEM_STATIC_ASSERT(A_layout_t::rank == 3 && B_layout_t::rank == 3,
172 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
173 A_layout_t::dim_2 == B_layout_t::dim_2 &&
174 A_layout_t::dim_3 == B_layout_t::dim_3,
175 "invalid dimensions");
176 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
178 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
180 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
183 B_data[B_layout.ind(i1,i2,i3)]);
194 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
196 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
199 MFEM_STATIC_ASSERT(A_layout_t::rank == 4,
"invalid rank");
200 for (
int i4 = 0; i4 < A_layout_t::dim_4; i4++)
202 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
204 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
206 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
217 typename A_layout_t,
typename A_data_t,
218 typename B_layout_t,
typename B_data_t>
219 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
220 const B_layout_t &B_layout,
const B_data_t &B_data)
222 MFEM_STATIC_ASSERT(A_layout_t::rank == 4 && B_layout_t::rank == 4,
224 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
225 A_layout_t::dim_2 == B_layout_t::dim_2 &&
226 A_layout_t::dim_3 == B_layout_t::dim_3 &&
227 A_layout_t::dim_4 == B_layout_t::dim_4,
228 "invalid dimensions");
229 for (
int i4 = 0; i4 < A_layout_t::dim_4; i4++)
231 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
233 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
235 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
238 B_data[B_layout.ind(i1,i2,i3,i4)]);
249template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
251inline void TAssign(
const A_layout_t &A_layout, A_data_t &A_data,
252 const scalar_t value)
254 internal::TensorOps<A_layout_t::rank>::
260template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
263inline void TAssignHD(
const A_layout_t &A_layout, A_data_t &A_data,
264 const scalar_t value)
266 internal::TensorOps<A_layout_t::rank>::
274 typename A_layout_t,
typename A_data_t,
275 typename B_layout_t,
typename B_data_t>
276inline void TAssign(
const A_layout_t &A_layout, A_data_t &A_data,
277 const B_layout_t &B_layout,
const B_data_t &B_data)
279 internal::TensorOps<A_layout_t::rank>::
280 template
Assign<Op>(A_layout, A_data, B_layout, B_data);
285template <
int S,
typename data_t =
double,
bool align = false>
300 template <AssignOp::Type Op>
306 template <AssignOp::Type Op,
typename src_data_t>
312 template <AssignOp::Type Op,
typename dest_data_t>
323 template <
typename src_data_t>
324 void Set(
const src_data_t &src)
329 template <
typename dest_data_t>
341template <
int S,
typename data_t,
bool align>
346template <
int N1,
int N2,
typename data_t =
double,
bool align = false>
377template <
int N1,
int N2,
typename data_t,
bool align>
382template <
int N1,
int N2,
int N3,
typename data_t =
double,
bool align = false>
391 static inline int ind(
int i1,
int i2,
int i3)
399template <
int N1,
int N2,
int N3,
typename data_t,
bool align>
403template <
int N1,
int N2,
int N3,
int N4,
typename data_t = double,
413 static inline int ind(
int i1,
int i2,
int i3,
int i4)
422template <
int N1,
int N2,
int N3,
int N4,
typename data_t,
bool align>
431 typename A_layout_t,
typename A_data_t,
432 typename B_layout_t,
typename B_data_t,
433 typename C_layout_t,
typename C_data_t>
434MFEM_ALWAYS_INLINE
inline
435void Mult_1_2(
const A_layout_t &A_layout,
const A_data_t &A_data,
436 const B_layout_t &B_layout,
const B_data_t &B_data,
437 const C_layout_t &C_layout, C_data_t &C_data)
439 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
440 C_layout_t::rank == 3,
"invalid ranks");
441 const int B3 = B_layout_t::dim_3;
442 const int C3 = C_layout_t::dim_3;
443 MFEM_STATIC_ASSERT(B3 == C3,
"invalid dimensions");
444 for (
int k = 0; k < B3; k++)
448 C_layout.ind3(k), C_data);
454 typename A_layout_t,
typename A_data_t,
455 typename B_layout_t,
typename B_data_t,
456 typename C_layout_t,
typename C_data_t>
457MFEM_ALWAYS_INLINE
inline
458void Mult_2_1(
const A_layout_t &A_layout,
const A_data_t &A_data,
459 const B_layout_t &B_layout,
const B_data_t &B_data,
460 const C_layout_t &C_layout, C_data_t &C_data)
462 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
463 C_layout_t::rank == 3,
"invalid ranks");
465 B_layout.merge_23(), B_data,
466 C_layout.merge_23(), C_data);
471 typename A_layout_t,
typename A_data_t,
472 typename B_layout_t,
typename B_data_t,
473 typename C_layout_t,
typename C_data_t>
474MFEM_ALWAYS_INLINE
inline
476 const B_layout_t &B_layout,
const B_data_t &B_data,
477 const C_layout_t &C_layout, C_data_t &C_data)
479 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
480 C_layout_t::rank == 4,
"invalid ranks");
481 const int A1 = A_layout_t::dim_1;
482 const int A2 = A_layout_t::dim_2;
483 const int B1 = B_layout_t::dim_1;
484 const int B2 = B_layout_t::dim_2;
485 const int B3 = B_layout_t::dim_3;
486 const int C1 = C_layout_t::dim_1;
487 const int C2 = C_layout_t::dim_2;
488 const int C3 = C_layout_t::dim_3;
489 const int C4 = C_layout_t::dim_4;
490 MFEM_STATIC_ASSERT(A1 == B2 && A2 == C1 && A2 == C3 && B1 == C2 && B3 == C4,
491 "invalid dimensions");
495 MFEM_FLOPS_ADD(3*A1*A2*A2*B1*B3);
497 for (
int j = 0; j < A2; j++)
499 for (
int i = 0; i < A2; i++)
501 for (
int l = 0; l < B3; l++)
503 for (
int k = 0; k < B1; k++)
505 for (
int s = 0; s < A1; s++)
508 C_data[C_layout.ind(i,k,j,l)] +=
509 A_data[A_layout.ind(s,i)] *
510 A_data[A_layout.ind(s,j)] *
511 B_data[B_layout.ind(k,s,l)];
520 for (
int s = 0; s < A1; s++)
522 for (
int i = 0; i < A2; i++)
524 for (
int k = 0; k < B1; k++)
526 for (
int j = 0; j < A2; j++)
528 for (
int l = 0; l < B3; l++)
531 C_data[C_layout.ind(i,k,j,l)] +=
532 A_data[A_layout.ind(s,i)] *
533 A_data[A_layout.ind(s,j)] *
534 B_data[B_layout.ind(k,s,l)];
545 typename A_layout_t,
typename A_data_t,
546 typename B_layout_t,
typename B_data_t,
547 typename C_layout_t,
typename C_data_t,
548 typename D_layout_t,
typename D_data_t>
549MFEM_ALWAYS_INLINE
inline
551 const B_layout_t &B_layout,
const B_data_t &B_data,
552 const C_layout_t &C_layout,
const C_data_t &C_data,
553 const D_layout_t &D_layout, D_data_t &D_data)
555 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
556 C_layout_t::rank == 3 && D_layout_t::rank == 4,
558 const int A1 = A_layout_t::dim_1;
559 const int A2 = A_layout_t::dim_2;
560 const int B1 = B_layout_t::dim_1;
561 const int B2 = B_layout_t::dim_2;
562 const int C1 = C_layout_t::dim_1;
563 const int C2 = C_layout_t::dim_2;
564 const int C3 = C_layout_t::dim_3;
565 const int D1 = D_layout_t::dim_1;
566 const int D2 = D_layout_t::dim_2;
567 const int D3 = D_layout_t::dim_3;
568 const int D4 = D_layout_t::dim_4;
569 MFEM_STATIC_ASSERT(A2 == B1 && A2 == C2 && A1 == D1 && B2 == D3 &&
570 C1 == D2 && C3 == D4,
"invalid dimensions");
575 for (
int l = 0; l < C3; l++)
577 for (
int s = 0; s < B1; s++)
579 for (
int k = 0; k < C1; k++)
581 for (
int i = 0; i < A1; i++)
583 H(i,k,s,l) = A_data[A_layout.ind(i,s)]*
584 C_data[C_layout.ind(k,s,l)];
591 D_layout.merge_12(), D_data);
593 MFEM_FLOPS_ADD(A1*B1*C1*C3);
594 for (
int l = 0; l < C3; l++)
598 for (
int s = 0; s < B1; s++)
600 for (
int k = 0; k < C1; k++)
602 for (
int i = 0; i < A1; i++)
604 H(i,k,s) = A_data[A_layout.ind(i,s)]*
605 C_data[C_layout.ind(k,s,l)];
611 D_layout.merge_12().ind3(l), D_data);
615 for (
int l = 0; l < C3; l++)
617 for (
int j = 0; j < B2; j++)
619 for (
int k = 0; k < C1; k++)
621 for (
int s = 0; s < B1; s++)
623 F(s,k,j,l) = B_data[B_layout.ind(s,j)]*
624 C_data[C_layout.ind(k,s,l)];
630 D_layout.merge_34().merge_23(), D_data);
637 typename A_layout_t,
typename A_data_t,
638 typename B_layout_t,
typename B_data_t,
639 typename C_layout_t,
typename C_data_t>
640MFEM_ALWAYS_INLINE
inline
642 const B_layout_t &
b,
const B_data_t &B,
643 const C_layout_t &c, C_data_t &C)
645 const int A1 = A_layout_t::dim_1;
646 const int A2 = A_layout_t::dim_2;
647 const int A3 = A_layout_t::dim_3;
648 const int B1 = B_layout_t::dim_1;
649 const int B2 = B_layout_t::dim_2;
650 const int C1 = C_layout_t::dim_1;
651 const int C2 = C_layout_t::dim_2;
652 const int C3 = C_layout_t::dim_3;
653 const int C4 = C_layout_t::dim_4;
654 MFEM_STATIC_ASSERT(A1 == C1 && A2 == B1 && A2 == C2 && A3 == C3 && B2 == C4,
655 "invalid dimensions");
657 MFEM_FLOPS_ADD(A1*A2*A3*B2);
658 for (
int l = 0; l < B2; l++)
660 for (
int k = 0; k < A3; k++)
662 for (
int j = 0; j < A2; j++)
664 for (
int i = 0; i < A1; i++)
667 A[
a.ind(i,j,k)]*B[
b.ind(j,l)]);
MFEM_ALWAYS_INLINE void Mult_1_2(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
MFEM_HOST_DEVICE void TAssignHD(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
lvalue_t & Assign(lvalue_t &a, const rvalue_t &b)
scalar_t TDet(const layout_t &a, const data_t &A)
MFEM_ALWAYS_INLINE void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
MFEM_ALWAYS_INLINE void Mult_2_1(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
MFEM_ALWAYS_INLINE void TensorProduct(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, const B_data_t &B, const C_layout_t &c, C_data_t &C)
void TAdjugate(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
void TAssign(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
MFEM_ALWAYS_INLINE void TensorAssemble(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
MFEM_HOST_DEVICE lvalue_t & AssignHD(lvalue_t &a, const rvalue_t &b)
static MFEM_HOST_DEVICE int ind(int i1, int i2)
static StridedLayout2D< N1 *N2, S1, N3, S3 > merge_12()
static int ind(int i1, int i2, int i3)
static int ind(int i1, int i2, int i3, int i4)
static StridedLayout3D< N1, S1, N2, S2, N3 *N4, S3 > merge_34()
static StridedLayout3D< N1 *N2, S1, N3, S3, N4, S4 > merge_12()
static const layout_type layout
data_t & operator()(int i, int j)
data_t data[aligned_size >0?aligned_size:1]
const data_t & operator()(int i, int j) const
static int ind(int i1, int i2)
ColumnMajorLayout2D< N1, N2 > layout_type
TVector< N1 *N2, data_t, align > base_class
void Adjugate(TMatrix< N1, N2, data_t > &adj) const
data_t AdjDet(TMatrix< N2, N1, data_t > &adj) const
ColumnMajorLayout3D< N1, N2, N3 > layout_type
data_t data[aligned_size >0?aligned_size:1]
static const layout_type layout
data_t & operator()(int i, int j, int k)
const data_t & operator()(int i, int j, int k) const
TVector< N1 *N2 *N3, data_t, align > base_class
static int ind(int i1, int i2, int i3)
const data_t & operator()(int i, int j, int k, int l) const
data_t data[aligned_size >0?aligned_size:1]
data_t & operator()(int i, int j, int k, int l)
TVector< N1 *N2 *N3 *N4, data_t, align > base_class
ColumnMajorLayout4D< N1, N2, N3, N4 > layout_type
static int ind(int i1, int i2, int i3, int i4)
static const layout_type layout
static const int aligned_size
void Scale(const data_t scale)
static const layout_type layout
const data_t & operator[](int i) const
data_t data[aligned_size >0?aligned_size:1]
data_t & operator[](int i)
void Assign(const src_data_t &src)
void AssignTo(dest_data_t &dest)
void Assemble(dest_data_t &dest) const
StridedLayout1D< S, 1 > layout_type
void Assign(const data_t d)
void Set(const src_data_t &src)