12 #ifndef MFEM_TEMPLATE_TENSOR
13 #define MFEM_TEMPLATE_TENSOR
15 #include "../config/tconfig.hpp"
16 #include "../general/tassign.hpp"
19 #include "../general/cuda.hpp"
20 #include "../general/hip.hpp"
39 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
42 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
45 MFEM_STATIC_ASSERT(A_layout_t::rank == 1,
"invalid rank");
46 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
48 mfem::Assign<Op>(A_data[A_layout.ind(i1)], value);
54 typename A_layout_t,
typename A_data_t,
55 typename B_layout_t,
typename B_data_t>
56 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
57 const B_layout_t &B_layout,
const B_data_t &B_data)
59 MFEM_STATIC_ASSERT(A_layout_t::rank == 1 && B_layout_t::rank == 1,
61 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1,
62 "invalid dimensions");
63 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
65 mfem::Assign<Op>(A_data[A_layout.ind(i1)], B_data[B_layout.ind(i1)]);
74 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
77 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
80 MFEM_STATIC_ASSERT(A_layout_t::rank == 2,
"invalid rank");
81 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
83 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
85 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2)], value);
92 typename A_layout_t,
typename A_data_t,
93 typename B_layout_t,
typename B_data_t>
94 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
95 const B_layout_t &B_layout,
const B_data_t &B_data)
97 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
99 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
100 A_layout_t::dim_2 == B_layout_t::dim_2,
101 "invalid dimensions");
102 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
104 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
106 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2)],
107 B_data[B_layout.ind(i1,i2)]);
117 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
119 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
122 MFEM_STATIC_ASSERT(A_layout_t::rank == 3,
"invalid rank");
123 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
125 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
127 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
129 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3)], value);
137 typename A_layout_t,
typename A_data_t,
138 typename B_layout_t,
typename B_data_t>
139 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
140 const B_layout_t &B_layout,
const B_data_t &B_data)
142 MFEM_STATIC_ASSERT(A_layout_t::rank == 3 && B_layout_t::rank == 3,
144 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
145 A_layout_t::dim_2 == B_layout_t::dim_2 &&
146 A_layout_t::dim_3 == B_layout_t::dim_3,
147 "invalid dimensions");
148 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
150 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
152 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
154 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3)],
155 B_data[B_layout.ind(i1,i2,i3)]);
166 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
168 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
171 MFEM_STATIC_ASSERT(A_layout_t::rank == 4,
"invalid rank");
172 for (
int i4 = 0; i4 < A_layout_t::dim_4; i4++)
174 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
176 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
178 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
180 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3,i4)], value);
189 typename A_layout_t,
typename A_data_t,
190 typename B_layout_t,
typename B_data_t>
191 static void Assign(
const A_layout_t &A_layout, A_data_t &A_data,
192 const B_layout_t &B_layout,
const B_data_t &B_data)
194 MFEM_STATIC_ASSERT(A_layout_t::rank == 4 && B_layout_t::rank == 4,
196 MFEM_STATIC_ASSERT(A_layout_t::dim_1 == B_layout_t::dim_1 &&
197 A_layout_t::dim_2 == B_layout_t::dim_2 &&
198 A_layout_t::dim_3 == B_layout_t::dim_3 &&
199 A_layout_t::dim_4 == B_layout_t::dim_4,
200 "invalid dimensions");
201 for (
int i4 = 0; i4 < A_layout_t::dim_4; i4++)
203 for (
int i3 = 0; i3 < A_layout_t::dim_3; i3++)
205 for (
int i2 = 0; i2 < A_layout_t::dim_2; i2++)
207 for (
int i1 = 0; i1 < A_layout_t::dim_1; i1++)
209 mfem::Assign<Op>(A_data[A_layout.ind(i1,i2,i3,i4)],
210 B_data[B_layout.ind(i1,i2,i3,i4)]);
221 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
224 inline void TAssign(
const A_layout_t &A_layout, A_data_t &A_data,
227 internal::TensorOps<A_layout_t::rank>::
228 template Assign<Op>(A_layout, A_data, value);
235 typename A_layout_t,
typename A_data_t,
236 typename B_layout_t,
typename B_data_t>
237 inline void TAssign(
const A_layout_t &A_layout, A_data_t &A_data,
238 const B_layout_t &B_layout,
const B_data_t &B_data)
240 internal::TensorOps<A_layout_t::rank>::
241 template Assign<Op>(A_layout, A_data, B_layout, B_data);
247 template <
int S,
typename data_t =
double,
bool align = false>
262 template <AssignOp::Type Op>
268 template <AssignOp::Type Op,
typename src_data_t>
274 template <AssignOp::Type Op,
typename dest_data_t>
282 Assign<AssignOp::Set>(d);
285 template <
typename src_data_t>
286 void Set(
const src_data_t &src)
288 Assign<AssignOp::Set>(src);
291 template <
typename dest_data_t>
294 AssignTo<AssignOp::Add>(dest);
299 Assign<AssignOp::Mult>(scale);
303 template <
int S,
typename data_t,
bool align>
304 const typename TVector<S,data_t,align>::layout_type
305 TVector<S,data_t,align>::layout = layout_type();
308 template <
int N1,
int N2,
typename data_t =
double,
bool align = false>
339 template <
int N1,
int N2,
typename data_t,
bool align>
340 const typename TMatrix<N1,N2,data_t,align>::layout_type
341 TMatrix<N1,N2,data_t,align>::layout = layout_type();
344 template <
int N1,
int N2,
int N3,
typename data_t =
double,
bool align = false>
353 static inline int ind(
int i1,
int i2,
int i3)
361 template <
int N1,
int N2,
int N3,
typename data_t,
bool align>
362 const typename TTensor3<N1,N2,N3,data_t,align>::layout_type
363 TTensor3<N1,N2,N3,data_t,align>::layout = layout_type();
365 template <
int N1,
int N2,
int N3,
int N4,
typename data_t = double,
375 static inline int ind(
int i1,
int i2,
int i3,
int i4)
384 template <
int N1,
int N2,
int N3,
int N4,
typename data_t,
bool align>
385 const typename TTensor4<N1,N2,N3,N4,data_t,align>::layout_type
386 TTensor4<N1,N2,N3,N4,data_t,align>::layout = layout_type();
393 typename A_layout_t,
typename A_data_t,
394 typename B_layout_t,
typename B_data_t,
395 typename C_layout_t,
typename C_data_t>
396 MFEM_ALWAYS_INLINE
inline
397 void Mult_1_2(
const A_layout_t &A_layout,
const A_data_t &A_data,
398 const B_layout_t &B_layout,
const B_data_t &B_data,
399 const C_layout_t &C_layout, C_data_t &C_data)
401 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
402 C_layout_t::rank == 3,
"invalid ranks");
403 const int B3 = B_layout_t::dim_3;
404 const int C3 = C_layout_t::dim_3;
405 MFEM_STATIC_ASSERT(B3 == C3,
"invalid dimentions");
406 for (
int k = 0; k < B3; k++)
408 Mult_AB<Add>(B_layout.ind3(k), B_data,
410 C_layout.ind3(k), C_data);
416 typename A_layout_t,
typename A_data_t,
417 typename B_layout_t,
typename B_data_t,
418 typename C_layout_t,
typename C_data_t>
419 MFEM_ALWAYS_INLINE
inline
420 void Mult_2_1(
const A_layout_t &A_layout,
const A_data_t &A_data,
421 const B_layout_t &B_layout,
const B_data_t &B_data,
422 const C_layout_t &C_layout, C_data_t &C_data)
424 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
425 C_layout_t::rank == 3,
"invalid ranks");
426 Mult_AB<Add>(A_layout, A_data,
427 B_layout.merge_23(), B_data,
428 C_layout.merge_23(), C_data);
433 typename A_layout_t,
typename A_data_t,
434 typename B_layout_t,
typename B_data_t,
435 typename C_layout_t,
typename C_data_t>
436 MFEM_ALWAYS_INLINE
inline
438 const B_layout_t &B_layout,
const B_data_t &B_data,
439 const C_layout_t &C_layout, C_data_t &C_data)
441 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 3 &&
442 C_layout_t::rank == 4,
"invalid ranks");
443 const int A1 = A_layout_t::dim_1;
444 const int A2 = A_layout_t::dim_2;
445 const int B1 = B_layout_t::dim_1;
446 const int B2 = B_layout_t::dim_2;
447 const int B3 = B_layout_t::dim_3;
448 const int C1 = C_layout_t::dim_1;
449 const int C2 = C_layout_t::dim_2;
450 const int C3 = C_layout_t::dim_3;
451 const int C4 = C_layout_t::dim_4;
452 MFEM_STATIC_ASSERT(A1 == B2 && A2 == C1 && A2 == C3 && B1 == C2 && B3 == C4,
453 "invalid dimensions");
457 MFEM_FLOPS_ADD(3*A1*A2*A2*B1*B3);
458 if (!
Add) { TAssign<AssignOp::Set>(C_layout, C_data, 0.0); }
459 for (
int j = 0; j < A2; j++)
461 for (
int i = 0; i < A2; i++)
463 for (
int l = 0; l < B3; l++)
465 for (
int k = 0; k < B1; k++)
467 for (
int s = 0; s < A1; s++)
470 C_data[C_layout.ind(i,k,j,l)] +=
471 A_data[A_layout.ind(s,i)] *
472 A_data[A_layout.ind(s,j)] *
473 B_data[B_layout.ind(k,s,l)];
481 if (!
Add) { TAssign<AssignOp::Set>(C_layout, C_data, 0.0); }
482 for (
int s = 0; s < A1; s++)
484 for (
int i = 0; i < A2; i++)
486 for (
int k = 0; k < B1; k++)
488 for (
int j = 0; j < A2; j++)
490 for (
int l = 0; l < B3; l++)
493 C_data[C_layout.ind(i,k,j,l)] +=
494 A_data[A_layout.ind(s,i)] *
495 A_data[A_layout.ind(s,j)] *
496 B_data[B_layout.ind(k,s,l)];
507 typename A_layout_t,
typename A_data_t,
508 typename B_layout_t,
typename B_data_t,
509 typename C_layout_t,
typename C_data_t,
510 typename D_layout_t,
typename D_data_t>
511 MFEM_ALWAYS_INLINE
inline
513 const B_layout_t &B_layout,
const B_data_t &B_data,
514 const C_layout_t &C_layout,
const C_data_t &C_data,
515 const D_layout_t &D_layout, D_data_t &D_data)
517 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
518 C_layout_t::rank == 3 && D_layout_t::rank == 4,
520 const int A1 = A_layout_t::dim_1;
521 const int A2 = A_layout_t::dim_2;
522 const int B1 = B_layout_t::dim_1;
523 const int B2 = B_layout_t::dim_2;
524 const int C1 = C_layout_t::dim_1;
525 const int C2 = C_layout_t::dim_2;
526 const int C3 = C_layout_t::dim_3;
527 const int D1 = D_layout_t::dim_1;
528 const int D2 = D_layout_t::dim_2;
529 const int D3 = D_layout_t::dim_3;
530 const int D4 = D_layout_t::dim_4;
531 MFEM_STATIC_ASSERT(A2 == B1 && A2 == C2 && A1 == D1 && B2 == D3 &&
532 C1 == D2 && C3 == D4,
"invalid dimensions");
537 for (
int l = 0; l < C3; l++)
539 for (
int s = 0; s < B1; s++)
541 for (
int k = 0; k < C1; k++)
543 for (
int i = 0; i < A1; i++)
545 H(i,k,s,l) = A_data[A_layout.ind(i,s)]*
546 C_data[C_layout.ind(k,s,l)];
552 Mult_1_2<Add>(B_layout, B_data, H.
layout.merge_12(), H,
553 D_layout.merge_12(), D_data);
555 MFEM_FLOPS_ADD(A1*B1*C1*C3);
556 for (
int l = 0; l < C3; l++)
560 for (
int s = 0; s < B1; s++)
562 for (
int k = 0; k < C1; k++)
564 for (
int i = 0; i < A1; i++)
566 H(i,k,s) = A_data[A_layout.ind(i,s)]*
567 C_data[C_layout.ind(k,s,l)];
572 Mult_AB<Add>(H.
layout.merge_12(), H, B_layout, B_data,
573 D_layout.merge_12().ind3(l), D_data);
577 for (
int l = 0; l < C3; l++)
579 for (
int j = 0; j < B2; j++)
581 for (
int k = 0; k < C1; k++)
583 for (
int s = 0; s < B1; s++)
585 F(s,k,j,l) = B_data[B_layout.ind(s,j)]*
586 C_data[C_layout.ind(k,s,l)];
591 Mult_AB<Add>(A_layout, A_data, F.
layout.merge_34().merge_23(), F,
592 D_layout.merge_34().merge_23(), D_data);
599 typename A_layout_t,
typename A_data_t,
600 typename B_layout_t,
typename B_data_t,
601 typename C_layout_t,
typename C_data_t>
602 MFEM_ALWAYS_INLINE
inline
604 const B_layout_t &
b,
const B_data_t &B,
605 const C_layout_t &c, C_data_t &C)
607 const int A1 = A_layout_t::dim_1;
608 const int A2 = A_layout_t::dim_2;
609 const int A3 = A_layout_t::dim_3;
610 const int B1 = B_layout_t::dim_1;
611 const int B2 = B_layout_t::dim_2;
612 const int C1 = C_layout_t::dim_1;
613 const int C2 = C_layout_t::dim_2;
614 const int C3 = C_layout_t::dim_3;
615 const int C4 = C_layout_t::dim_4;
616 MFEM_STATIC_ASSERT(A1 == C1 && A2 == B1 && A2 == C2 && A3 == C3 && B2 == C4,
617 "invalid dimensions");
619 MFEM_FLOPS_ADD(A1*A2*A3*B2);
620 for (
int l = 0; l < B2; l++)
622 for (
int k = 0; k < A3; k++)
624 for (
int j = 0; j < A2; j++)
626 for (
int i = 0; i < A1; i++)
628 mfem::Assign<Op>(C[c.ind(i,j,k,l)],
629 A[a.ind(i,j,k)]*B[b.ind(j,l)]);
638 #endif // MFEM_TEMPLATE_TENSOR
void Assemble(dest_data_t &dest) const
ColumnMajorLayout3D< N1, N2, N3 > layout_type
ColumnMajorLayout4D< N1, N2, N3, N4 > layout_type
static const layout_type layout
void Adjugate(TMatrix< N1, N2, data_t > &adj) const
void Assign(const src_data_t &src)
void AssignTo(dest_data_t &dest)
const data_t & operator()(int i, int j, int k) const
MFEM_HOST_DEVICE void TAssign(const A_layout_t &A_layout, A_data_t &A_data, scalar_t value)
void Assign(const data_t d)
static const layout_type layout
static int ind(int i1, int i2, int i3)
const data_t & operator()(int i, int j) const
data_t & operator()(int i, int j)
MFEM_ALWAYS_INLINE void TensorAssemble(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
void Add(const DenseMatrix &A, const DenseMatrix &B, double alpha, DenseMatrix &C)
C = A + alpha*B.
static const layout_type layout
static int ind(int i1, int i2, int i3, int i4)
static int ind(int i1, int i2, int i3, int i4)
MFEM_HOST_DEVICE data_t & operator[](int i)
const data_t & operator()(int i, int j, int k, int l) const
static int ind(int i1, int i2, int i3)
TVector< N1 *N2, data_t, align > base_class
MFEM_ALWAYS_INLINE void Mult_2_1(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
ColumnMajorLayout2D< N1, N2 > layout_type
MFEM_ALWAYS_INLINE void TensorProduct(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, const B_data_t &B, const C_layout_t &c, C_data_t &C)
static const layout_type layout
MFEM_HOST_DEVICE lvalue_t & Assign(lvalue_t &a, const rvalue_t &b)
TVector< N1 *N2 *N3 *N4, data_t, align > base_class
static int ind(int i1, int i2)
data_t AdjDet(TMatrix< N2, N1, data_t > &adj) const
MFEM_ALWAYS_INLINE void Mult_1_2(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
TVector< N1 *N2 *N3, data_t, align > base_class
StridedLayout1D< S, 1 > layout_type
static MFEM_HOST_DEVICE int ind(int i1, int i2)
data_t data[aligned_size >0?aligned_size:1]
void Set(const src_data_t &src)
static const int aligned_size
MFEM_HOST_DEVICE const data_t & operator[](int i) const
void Scale(const data_t scale)
data_t & operator()(int i, int j, int k, int l)
data_t & operator()(int i, int j, int k)