12 #ifndef MFEM_TEMPLATE_MATRIX
13 #define MFEM_TEMPLATE_MATRIX
15 #include "../config/tconfig.hpp"
16 #include "../general/tassign.hpp"
17 #include "../general/cuda.hpp"
18 #include "../general/hip.hpp"
27 typename A_layout_t,
typename A_data_t,
28 typename B_layout_t,
typename B_data_t,
29 typename C_layout_t,
typename C_data_t>
30 MFEM_ALWAYS_INLINE
inline
31 void sMult_AB(
const A_layout_t &A_layout,
const A_data_t &A_data,
32 const B_layout_t &B_layout,
const B_data_t &B_data,
33 const C_layout_t &C_layout, C_data_t &C_data)
35 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
36 C_layout_t::rank == 2,
"invalid ranks");
37 const int A1 = A_layout_t::dim_1;
38 const int A2 = A_layout_t::dim_2;
39 const int B1 = B_layout_t::dim_1;
40 const int B2 = B_layout_t::dim_2;
41 const int C1 = C_layout_t::dim_1;
42 const int C2 = C_layout_t::dim_2;
43 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
44 "invalid dimensions");
46 MFEM_FLOPS_ADD(
Add ? 2*A1*A2*B2 : 2*A1*A2*B2-A1*B2);
47 for (
int b2 = 0; b2 < B2; b2++)
49 for (
int s = 0; s < A2; s++)
51 for (
int a1 = 0; a1 < A1; a1++)
56 C_data[C_layout.ind(a1,b2)] =
57 A_data[A_layout.ind(a1,s)] * B_data[B_layout.ind(s,b2)];
62 C_data[C_layout.ind(a1,b2)] +=
63 A_data[A_layout.ind(a1,s)] * B_data[B_layout.ind(s,b2)];
71 template <
int bA1,
int bA2,
int bB2,
73 typename A_layout_t,
typename A_data_t,
74 typename B_layout_t,
typename B_data_t,
75 typename C_layout_t,
typename C_data_t>
76 MFEM_ALWAYS_INLINE
inline
77 void bMult_AB(
const A_layout_t &A_layout,
const A_data_t &A_data,
78 const B_layout_t &B_layout,
const B_data_t &B_data,
79 const C_layout_t &C_layout, C_data_t &C_data)
81 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
82 C_layout_t::rank == 2,
"invalid ranks");
83 const int A1 = A_layout_t::dim_1;
84 const int A2 = A_layout_t::dim_2;
85 const int B1 = B_layout_t::dim_1;
86 const int B2 = B_layout_t::dim_2;
87 const int C1 = C_layout_t::dim_1;
88 const int C2 = C_layout_t::dim_2;
89 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
90 "invalid dimensions");
92 const int rA1 = A1%bA1;
93 const int rA2 = A2%bA2;
94 const int rB2 = B2%bB2;
96 for (
int b2_b = 0; b2_b < B2/bB2; b2_b++)
101 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
104 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
105 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
106 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
111 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
112 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
113 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
115 for (
int s_b = 1; s_b < A2/bA2; s_b++)
117 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
120 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
121 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
122 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
127 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
128 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
129 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
135 const bool rAdd =
Add || (A2/bA2 > 0);
136 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
139 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
140 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
141 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
146 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
147 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
148 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
157 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
160 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
161 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
162 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
167 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
168 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
169 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
174 for (
int s_b = 1; s_b < A2/bA2; s_b++)
176 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
179 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
180 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
181 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
186 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
187 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
188 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
194 const bool rAdd =
Add || (A2/bA2 > 0);
195 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
198 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
199 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
200 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
205 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
206 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
207 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
214 typename A_layout_t,
typename A_data_t,
215 typename B_layout_t,
typename B_data_t,
216 typename C_layout_t,
typename C_data_t>
217 MFEM_ALWAYS_INLINE
inline
218 void Mult_AB(
const A_layout_t &A_layout,
const A_data_t &A_data,
219 const B_layout_t &B_layout,
const B_data_t &B_data,
220 const C_layout_t &C_layout, C_data_t &C_data)
222 const int b = MFEM_TEMPLATE_BLOCK_SIZE;
223 bMult_AB<b,b,b,Add>(A_layout, A_data, B_layout, B_data, C_layout, C_data);
232 template <
int N1,
int N2>
233 struct MatrixOps { };
236 struct MatrixOps<1,1>
239 template <
typename scalar_t,
typename layout_t,
typename data_t>
240 static inline scalar_t
Det(
const layout_t &
a,
const data_t &A)
242 return A[a.ind(0,0)];
246 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
248 static inline void Det(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
250 const int M = A_layout_t::dim_1;
251 for (
int i = 0; i < M; i++)
253 Assign<Op>(D[i], A[a.ind(i,0,0)]);
258 template <
typename scalar_t,
259 typename A_layout_t,
typename A_data_t,
260 typename B_layout_t,
typename B_data_t>
261 static inline void Adjugate(
const A_layout_t &a,
const A_data_t &A,
262 const B_layout_t &
b, B_data_t &B)
264 B[b.ind(0,0)] = scalar_t(1);
268 template <
typename scalar_t,
269 typename A_layout_t,
typename A_data_t,
270 typename B_layout_t,
typename B_data_t>
272 static inline scalar_t AdjDet(
const A_layout_t &a,
const A_data_t &A,
273 const B_layout_t &b, B_data_t &B)
275 Adjugate<scalar_t>(
a, A,
b, B);
276 return Det<scalar_t>(
a, A);
281 struct MatrixOps<2,2>
284 template <
typename scalar_t,
typename layout_t,
typename data_t>
286 static inline scalar_t
Det(
const layout_t &a,
const data_t &A)
289 return (A[a.ind(0,0)]*A[a.ind(1,1)] -
290 A[a.ind(1,0)]*A[a.ind(0,1)]);
294 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
296 static inline void Det(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
298 const int M = A_layout_t::dim_1;
300 for (
int i = 0; i < M; i++)
302 Assign<Op>(D[i], (A[a.ind(i,0,0)]*A[a.ind(i,1,1)] -
303 A[a.ind(i,1,0)]*A[a.ind(i,0,1)]));
308 template <
typename scalar_t,
309 typename A_layout_t,
typename A_data_t,
310 typename B_layout_t,
typename B_data_t>
312 static inline void Adjugate(
const A_layout_t &a,
const A_data_t &A,
313 const B_layout_t &
b, B_data_t &B)
315 B[b.ind(0,0)] = A[a.ind(1,1)];
316 B[b.ind(0,1)] = -A[a.ind(0,1)];
317 B[b.ind(1,0)] = -A[a.ind(1,0)];
318 B[b.ind(1,1)] = A[a.ind(0,0)];
322 template <
typename scalar_t,
323 typename A_layout_t,
typename A_data_t,
324 typename B_layout_t,
typename B_data_t>
326 static inline scalar_t AdjDet(
const A_layout_t &a,
const A_data_t &A,
327 const B_layout_t &b, B_data_t &B)
329 Adjugate<scalar_t>(
a, A,
b, B);
330 return Det<scalar_t>(
a, A);
333 template <
bool symm>
struct Symm;
337 struct MatrixOps<2,2>::Symm<true>
339 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
340 static inline MFEM_ALWAYS_INLINE
341 void Set(
const A_layout_t &a, A_data_t &A,
342 const scalar_t a11,
const scalar_t a21,
const scalar_t a22)
351 struct MatrixOps<2,2>::Symm<false>
353 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
354 static inline MFEM_ALWAYS_INLINE
355 void Set(
const A_layout_t &a, A_data_t &A,
356 const scalar_t a11,
const scalar_t a21,
const scalar_t a22)
366 struct MatrixOps<3,3>
369 template <
typename scalar_t,
typename layout_t,
typename data_t>
371 static inline scalar_t
Det(
const layout_t &a,
const data_t &A)
374 return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
375 A[a.ind(2,1)]*A[a.ind(1,2)]) -
376 A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
377 A[a.ind(2,1)]*A[a.ind(0,2)]) +
378 A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
379 A[a.ind(1,1)]*A[a.ind(0,2)]));
383 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
385 static inline void Det(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
387 const int M = A_layout_t::dim_1;
388 MFEM_FLOPS_ADD(14*M);
389 for (
int i = 0; i < M; i++)
393 A[a.ind(i,0,0)]*(A[a.ind(i,1,1)]*A[a.ind(i,2,2)] -
394 A[a.ind(i,2,1)]*A[a.ind(i,1,2)]) -
395 A[a.ind(i,1,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,2,2)] -
396 A[a.ind(i,2,1)]*A[a.ind(i,0,2)]) +
397 A[a.ind(i,2,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,1,2)] -
398 A[a.ind(i,1,1)]*A[a.ind(i,0,2)]));
403 template <
typename scalar_t,
404 typename A_layout_t,
typename A_data_t,
405 typename B_layout_t,
typename B_data_t>
407 static inline void Adjugate(
const A_layout_t &a,
const A_data_t &A,
408 const B_layout_t &b, B_data_t &B)
411 B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
412 B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
413 B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
414 B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
415 B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
416 B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
417 B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
418 B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
419 B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
423 template <
typename scalar_t,
424 typename A_layout_t,
typename A_data_t,
425 typename B_layout_t,
typename B_data_t>
427 static inline scalar_t AdjDet(
const A_layout_t &a,
const A_data_t &A,
428 const B_layout_t &b, B_data_t &B)
431 Adjugate<scalar_t>(
a, A,
b, B);
432 return (A[a.ind(0,0)]*B[b.ind(0,0)] +
433 A[a.ind(1,0)]*B[b.ind(0,1)] +
434 A[a.ind(2,0)]*B[b.ind(0,2)]);
437 template <
bool symm>
struct Symm;
441 struct MatrixOps<3,3>::Symm<true>
443 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
444 static inline MFEM_ALWAYS_INLINE
445 void Set(
const A_layout_t &a, A_data_t &A,
446 const scalar_t a11,
const scalar_t a21,
const scalar_t a31,
447 const scalar_t a22,
const scalar_t a32,
const scalar_t a33)
459 struct MatrixOps<3,3>::Symm<false>
461 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
462 static inline MFEM_ALWAYS_INLINE
463 void Set(
const A_layout_t &a, A_data_t &A,
464 const scalar_t a11,
const scalar_t a21,
const scalar_t a31,
465 const scalar_t a22,
const scalar_t a32,
const scalar_t a33)
482 template <
typename scalar_t,
typename layout_t,
typename data_t>
484 inline scalar_t
TDet(
const layout_t &a,
const data_t &A)
486 MFEM_STATIC_ASSERT(layout_t::rank == 2,
"invalid rank");
487 #if !defined(__xlC__) || (__xlC__ >= 0x0d00)
488 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
489 template Det<scalar_t>(
a, A);
491 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
498 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
501 inline void TDet(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
503 MFEM_STATIC_ASSERT(A_layout_t::rank == 3,
"invalid rank");
504 #if !defined(__xlC__) || (__xlC__ >= 0x0d00)
505 internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
506 template Det<Op>(
a, A, D);
508 internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
514 template <
typename scalar_t,
515 typename A_layout_t,
typename A_data_t,
516 typename B_layout_t,
typename B_data_t>
517 inline void TAdjugate(
const A_layout_t &a,
const A_data_t &A,
518 const B_layout_t &b, B_data_t &B)
520 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
522 internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
523 template Adjugate<scalar_t>(
a, A,
b, B);
528 template <
typename scalar_t,
529 typename A_layout_t,
typename A_data_t,
530 typename B_layout_t,
typename B_data_t>
532 inline scalar_t
TAdjDet(
const A_layout_t &a,
const A_data_t &A,
533 const B_layout_t &b, B_data_t &B)
535 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
537 return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
538 template AdjDet<scalar_t>(
a, A,
b, B);
543 #endif // MFEM_TEMPLATE_MATRIX
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
MFEM_HOST_DEVICE scalar_t TDet(const layout_t &a, const data_t &A)
MFEM_HOST_DEVICE scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
MFEM_ALWAYS_INLINE void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
void Add(const DenseMatrix &A, const DenseMatrix &B, double alpha, DenseMatrix &C)
C = A + alpha*B.
void TAdjugate(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
MFEM_ALWAYS_INLINE void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
MFEM_ALWAYS_INLINE void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)