12 #ifndef MFEM_TEMPLATE_TENSOR
13 #define MFEM_TEMPLATE_TENSOR
15 #include "../config/tconfig.hpp"
16 #include "../linalg/simd.hpp"
17 #include "../general/tassign.hpp"
18 #include "../general/backends.hpp"
39 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
41 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
44 MFEM_STATIC_ASSERT(A_layout_t::rank == 1,
"invalid rank");
45 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
47 mfem::Assign<Op>(A_data[A_layout.ind(i1)], value);
52 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
55 static void AssignHD(
const A_layout_t &A_layout, A_data_t &A_data,
58 MFEM_STATIC_ASSERT(A_layout_t::rank == 1,
"invalid rank");
59 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
61 mfem::AssignHD<Op>(A_data[A_layout.ind(i1)], value);
67 typename A_layout_t,
typename A_data_t,
68 typename B_layout_t,
typename B_data_t>
69 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
70 const B_layout_t &B_layout,
const B_data_t &B_data)
72 MFEM_STATIC_ASSERT(A_layout_t::rank == 1 && B_layout_t::rank == 1,
74 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1,
75 "invalid dimensions");
76 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
78 mfem::Assign<Op>(A_data[A_layout.ind(i1)], B_data[B_layout.ind(i1)]);
87 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
89 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
92 MFEM_STATIC_ASSERT(A_layout_t::rank == 2,
"invalid rank");
93 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
95 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
97 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2)], value);
103 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
106 static void AssignHD(
const A_layout_t &A_layout, A_data_t &A_data,
109 MFEM_STATIC_ASSERT(A_layout_t::rank == 2,
"invalid rank");
110 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
112 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
114 mfem::AssignHD<Op>(A_data[A_layout.ind(i1,i2)], value);
121 typename A_layout_t,
typename A_data_t,
122 typename B_layout_t,
typename B_data_t>
123 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
124 const B_layout_t &B_layout,
const B_data_t &B_data)
126 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
128 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
129 A_layout_t::dim_2 == B_layout_t::dim_2,
130 "invalid dimensions");
131 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
133 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
135 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2)],
136 B_data[B_layout.ind(i1,i2)]);
146 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
148 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
151 MFEM_STATIC_ASSERT(A_layout_t::rank == 3,
"invalid rank");
152 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
154 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
156 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
158 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3)], value);
166 typename A_layout_t,
typename A_data_t,
167 typename B_layout_t,
typename B_data_t>
168 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
169 const B_layout_t &B_layout,
const B_data_t &B_data)
171 MFEM_STATIC_ASSERT(A_layout_t::rank == 3 && B_layout_t::rank == 3,
173 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
174 A_layout_t::dim_2 == B_layout_t::dim_2 &&
175 A_layout_t::dim_3 == B_layout_t::dim_3,
176 "invalid dimensions");
177 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
179 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
181 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
183 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3)],
184 B_data[B_layout.ind(i1,i2,i3)]);
195 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
197 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
200 MFEM_STATIC_ASSERT(A_layout_t::rank == 4,
"invalid rank");
201 for (
int i4 = 0; i4 < A_layout_t::dim_4; i4++)
203 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
205 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
207 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
209 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3,i4)], value);
218 typename A_layout_t,
typename A_data_t,
219 typename B_layout_t,
typename B_data_t>
220 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
221 const B_layout_t &B_layout,
const B_data_t &B_data)
223 MFEM_STATIC_ASSERT(A_layout_t::rank == 4 && B_layout_t::rank == 4,
225 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
226 A_layout_t::dim_2 == B_layout_t::dim_2 &&
227 A_layout_t::dim_3 == B_layout_t::dim_3 &&
228 A_layout_t::dim_4 == B_layout_t::dim_4,
229 "invalid dimensions");
230 for (
int i4 = 0; i4 < A_layout_t::dim_4; i4++)
232 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
234 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
236 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
238 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3,i4)],
239 B_data[B_layout.ind(i1,i2,i3,i4)]);
250 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
252 inline void TAssign(
const A_layout_t &A_layout, A_data_t &A_data,
253 const scalar_t value)
255 internal::TensorOps<A_layout_t::rank>::
256 template Assign<Op>(A_layout, A_data, value);
261 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
264 inline void TAssignHD(
const A_layout_t &A_layout, A_data_t &A_data,
265 const scalar_t value)
267 internal::TensorOps<A_layout_t::rank>::
268 template AssignHD<Op>(A_layout, A_data, value);
275 typename A_layout_t,
typename A_data_t,
276 typename B_layout_t,
typename B_data_t>
277 inline void TAssign(
const A_layout_t &A_layout, A_data_t &A_data,
278 const B_layout_t &B_layout,
const B_data_t &B_data)
280 internal::TensorOps<A_layout_t::rank>::
281 template Assign<Op>(A_layout, A_data, B_layout, B_data);
287 template <
int S,
typename data_t =
double,
bool align = false>
302 template <AssignOp::Type Op>
308 template <AssignOp::Type Op,
typename src_data_t>
314 template <AssignOp::Type Op,
typename dest_data_t>
322 Assign<AssignOp::Set>(d);
325 template <
typename src_data_t>
326 void Set(
const src_data_t &src)
328 Assign<AssignOp::Set>(src);
331 template <
typename dest_data_t>
334 AssignTo<AssignOp::Add>(dest);
339 Assign<AssignOp::Mult>(scale);
343 template <
int S,
typename data_t,
bool align>
344 const typename TVector<S,data_t,align>::layout_type
345 TVector<S,data_t,align>::layout = layout_type();
348 template <
int N1,
int N2,
typename data_t =
double,
bool align = false>
379 template <
int N1,
int N2,
typename data_t,
bool align>
380 const typename TMatrix<N1,N2,data_t,align>::layout_type
381 TMatrix<N1,N2,data_t,align>::layout = layout_type();
384 template <
int N1,
int N2,
int N3,
typename data_t =
double,
bool align = false>
393 static inline int ind(
int i1,
int i2,
int i3)
401 template <
int N1,
int N2,
int N3,
typename data_t,
bool align>
402 const typename TTensor3<N1,N2,N3,data_t,align>::layout_type
403 TTensor3<N1,N2,N3,data_t,align>::layout = layout_type();
405 template <
int N1,
int N2,
int N3,
int N4,
typename data_t = double,
415 static inline int ind(
int i1,
int i2,
int i3,
int i4)
424 template <
int N1,
int N2,
int N3,
int N4,
typename data_t,
bool align>
425 const typename TTensor4<N1,N2,N3,N4,data_t,align>::layout_type
426 TTensor4<N1,N2,N3,N4,data_t,align>::layout = layout_type();
433 typename A_layout_t,
typename A_data_t,
434 typename B_layout_t,
typename B_data_t,
435 typename C_layout_t,
typename C_data_t>
436 MFEM_ALWAYS_INLINE
inline
437 void Mult_1_2(
const A_layout_t &A_layout,
const A_data_t &A_data,
438 const B_layout_t &B_layout,
const B_data_t &B_data,
439 const C_layout_t &C_layout, C_data_t &C_data)
441 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
442 C_layout_t::rank == 3,
"invalid ranks");
443 const int B3 = B_layout_t::dim_3;
444 const int C3 = C_layout_t::dim_3;
445 MFEM_STATIC_ASSERT(B3 == C3,
"invalid dimensions");
446 for (
int k = 0; k < B3; k++)
448 Mult_AB<Add>(B_layout.ind3(k), B_data,
450 C_layout.ind3(k), C_data);
456 typename A_layout_t,
typename A_data_t,
457 typename B_layout_t,
typename B_data_t,
458 typename C_layout_t,
typename C_data_t>
459 MFEM_ALWAYS_INLINE
inline
460 void Mult_2_1(
const A_layout_t &A_layout,
const A_data_t &A_data,
461 const B_layout_t &B_layout,
const B_data_t &B_data,
462 const C_layout_t &C_layout, C_data_t &C_data)
464 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
465 C_layout_t::rank == 3,
"invalid ranks");
466 Mult_AB<Add>(A_layout, A_data,
467 B_layout.merge_23(), B_data,
468 C_layout.merge_23(), C_data);
473 typename A_layout_t,
typename A_data_t,
474 typename B_layout_t,
typename B_data_t,
475 typename C_layout_t,
typename C_data_t>
476 MFEM_ALWAYS_INLINE
inline
478 const B_layout_t &B_layout,
const B_data_t &B_data,
479 const C_layout_t &C_layout, C_data_t &C_data)
481 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
482 C_layout_t::rank == 4,
"invalid ranks");
483 const int A1 = A_layout_t::dim_1;
484 const int A2 = A_layout_t::dim_2;
485 const int B1 = B_layout_t::dim_1;
486 const int B2 = B_layout_t::dim_2;
487 const int B3 = B_layout_t::dim_3;
488 const int C1 = C_layout_t::dim_1;
489 const int C2 = C_layout_t::dim_2;
490 const int C3 = C_layout_t::dim_3;
491 const int C4 = C_layout_t::dim_4;
492 MFEM_STATIC_ASSERT(A1 == B2 && A2 == C1 && A2 == C3 && B1 == C2 && B3 == C4,
493 "invalid dimensions");
497 MFEM_FLOPS_ADD(3*A1*A2*A2*B1*B3);
498 if (!
Add) { TAssign<AssignOp::Set>(C_layout, C_data, 0.0); }
499 for (
int j = 0; j < A2; j++)
501 for (
int i = 0; i < A2; i++)
503 for (
int l = 0; l < B3; l++)
505 for (
int k = 0; k < B1; k++)
507 for (
int s = 0;
s < A1;
s++)
510 C_data[C_layout.ind(i,k,j,l)] +=
511 A_data[A_layout.ind(
s,i)] *
512 A_data[A_layout.ind(
s,j)] *
513 B_data[B_layout.ind(k,
s,l)];
521 if (!
Add) { TAssign<AssignOp::Set>(C_layout, C_data, 0.0); }
522 for (
int s = 0;
s < A1;
s++)
524 for (
int i = 0; i < A2; i++)
526 for (
int k = 0; k < B1; k++)
528 for (
int j = 0; j < A2; j++)
530 for (
int l = 0; l < B3; l++)
533 C_data[C_layout.ind(i,k,j,l)] +=
534 A_data[A_layout.ind(
s,i)] *
535 A_data[A_layout.ind(
s,j)] *
536 B_data[B_layout.ind(k,
s,l)];
547 typename A_layout_t,
typename A_data_t,
548 typename B_layout_t,
typename B_data_t,
549 typename C_layout_t,
typename C_data_t,
550 typename D_layout_t,
typename D_data_t>
551 MFEM_ALWAYS_INLINE
inline
553 const B_layout_t &B_layout,
const B_data_t &B_data,
554 const C_layout_t &C_layout,
const C_data_t &C_data,
555 const D_layout_t &D_layout, D_data_t &D_data)
557 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
558 C_layout_t::rank == 3 && D_layout_t::rank == 4,
560 const int A1 = A_layout_t::dim_1;
561 const int A2 = A_layout_t::dim_2;
562 const int B1 = B_layout_t::dim_1;
563 const int B2 = B_layout_t::dim_2;
564 const int C1 = C_layout_t::dim_1;
565 const int C2 = C_layout_t::dim_2;
566 const int C3 = C_layout_t::dim_3;
567 const int D1 = D_layout_t::dim_1;
568 const int D2 = D_layout_t::dim_2;
569 const int D3 = D_layout_t::dim_3;
570 const int D4 = D_layout_t::dim_4;
571 MFEM_STATIC_ASSERT(A2 == B1 && A2 == C2 && A1 == D1 && B2 == D3 &&
572 C1 == D2 && C3 == D4,
"invalid dimensions");
577 for (
int l = 0; l < C3; l++)
579 for (
int s = 0;
s < B1;
s++)
581 for (
int k = 0; k < C1; k++)
583 for (
int i = 0; i < A1; i++)
585 H(i,k,
s,l) = A_data[A_layout.ind(i,
s)]*
586 C_data[C_layout.ind(k,
s,l)];
592 Mult_1_2<Add>(B_layout, B_data, H.
layout.merge_12(), H,
593 D_layout.merge_12(), D_data);
595 MFEM_FLOPS_ADD(A1*B1*C1*C3);
596 for (
int l = 0; l < C3; l++)
600 for (
int s = 0;
s < B1;
s++)
602 for (
int k = 0; k < C1; k++)
604 for (
int i = 0; i < A1; i++)
606 H(i,k,
s) = A_data[A_layout.ind(i,
s)]*
607 C_data[C_layout.ind(k,
s,l)];
612 Mult_AB<Add>(H.
layout.merge_12(), H, B_layout, B_data,
613 D_layout.merge_12().ind3(l), D_data);
617 for (
int l = 0; l < C3; l++)
619 for (
int j = 0; j < B2; j++)
621 for (
int k = 0; k < C1; k++)
623 for (
int s = 0;
s < B1;
s++)
625 F(
s,k,j,l) = B_data[B_layout.ind(
s,j)]*
626 C_data[C_layout.ind(k,
s,l)];
631 Mult_AB<Add>(A_layout, A_data, F.
layout.merge_34().merge_23(), F,
632 D_layout.merge_34().merge_23(), D_data);
639 typename A_layout_t,
typename A_data_t,
640 typename B_layout_t,
typename B_data_t,
641 typename C_layout_t,
typename C_data_t>
642 MFEM_ALWAYS_INLINE
inline
644 const B_layout_t &
b,
const B_data_t &B,
645 const C_layout_t &c, C_data_t &C)
647 const int A1 = A_layout_t::dim_1;
648 const int A2 = A_layout_t::dim_2;
649 const int A3 = A_layout_t::dim_3;
650 const int B1 = B_layout_t::dim_1;
651 const int B2 = B_layout_t::dim_2;
652 const int C1 = C_layout_t::dim_1;
653 const int C2 = C_layout_t::dim_2;
654 const int C3 = C_layout_t::dim_3;
655 const int C4 = C_layout_t::dim_4;
656 MFEM_STATIC_ASSERT(A1 == C1 && A2 == B1 && A2 == C2 && A3 == C3 && B2 == C4,
657 "invalid dimensions");
659 MFEM_FLOPS_ADD(A1*A2*A3*B2);
660 for (
int l = 0; l < B2; l++)
662 for (
int k = 0; k < A3; k++)
664 for (
int j = 0; j < A2; j++)
666 for (
int i = 0; i < A1; i++)
668 mfem::Assign<Op>(C[c.ind(i,j,k,l)],
669 A[a.ind(i,j,k)]*B[b.ind(j,l)]);
678 #endif // MFEM_TEMPLATE_TENSOR
void Assemble(dest_data_t &dest) const
ColumnMajorLayout3D< N1, N2, N3 > layout_type
ColumnMajorLayout4D< N1, N2, N3, N4 > layout_type
static const layout_type layout
void Adjugate(TMatrix< N1, N2, data_t > &adj) const
void Assign(const src_data_t &src)
void AssignTo(dest_data_t &dest)
const data_t & operator()(int i, int j, int k) const
void Assign(const data_t d)
static const layout_type layout
static int ind(int i1, int i2, int i3)
const data_t & operator()(int i, int j) const
data_t & operator()(int i, int j)
MFEM_ALWAYS_INLINE void TensorAssemble(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
void Add(const DenseMatrix &A, const DenseMatrix &B, double alpha, DenseMatrix &C)
C = A + alpha*B.
static const layout_type layout
static int ind(int i1, int i2, int i3, int i4)
static int ind(int i1, int i2, int i3, int i4)
MFEM_HOST_DEVICE lvalue_t & AssignHD(lvalue_t &a, const rvalue_t &b)
void TAssign(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
data_t & operator[](int i)
const data_t & operator()(int i, int j, int k, int l) const
static int ind(int i1, int i2, int i3)
TVector< N1 *N2, data_t, align > base_class
MFEM_ALWAYS_INLINE void Mult_2_1(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
ColumnMajorLayout2D< N1, N2 > layout_type
MFEM_ALWAYS_INLINE void TensorProduct(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, const B_data_t &B, const C_layout_t &c, C_data_t &C)
static const layout_type layout
lvalue_t & Assign(lvalue_t &a, const rvalue_t &b)
MFEM_HOST_DEVICE void TAssignHD(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
TVector< N1 *N2 *N3 *N4, data_t, align > base_class
static int ind(int i1, int i2)
data_t AdjDet(TMatrix< N2, N1, data_t > &adj) const
MFEM_ALWAYS_INLINE void Mult_1_2(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
TVector< N1 *N2 *N3, data_t, align > base_class
StridedLayout1D< S, 1 > layout_type
static MFEM_HOST_DEVICE int ind(int i1, int i2)
data_t data[aligned_size >0?aligned_size:1]
void Set(const src_data_t &src)
const data_t & operator[](int i) const
static const int aligned_size
void Scale(const data_t scale)
data_t & operator()(int i, int j, int k, int l)
data_t & operator()(int i, int j, int k)