12 #ifndef MFEM_TEMPLATE_MATRIX
13 #define MFEM_TEMPLATE_MATRIX
15 #include "../config/tconfig.hpp"
16 #include "../general/tassign.hpp"
25 typename A_layout_t,
typename A_data_t,
26 typename B_layout_t,
typename B_data_t,
27 typename C_layout_t,
typename C_data_t>
28 MFEM_ALWAYS_INLINE
inline
29 void sMult_AB(
const A_layout_t &A_layout,
const A_data_t &A_data,
30 const B_layout_t &B_layout,
const B_data_t &B_data,
31 const C_layout_t &C_layout, C_data_t &C_data)
33 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
34 C_layout_t::rank == 2,
"invalid ranks");
35 const int A1 = A_layout_t::dim_1;
36 const int A2 = A_layout_t::dim_2;
37 const int B1 = B_layout_t::dim_1;
38 const int B2 = B_layout_t::dim_2;
39 const int C1 = C_layout_t::dim_1;
40 const int C2 = C_layout_t::dim_2;
41 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
42 "invalid dimensions");
44 MFEM_FLOPS_ADD(
Add ? 2*A1*A2*B2 : 2*A1*A2*B2-A1*B2);
45 for (
int b2 = 0; b2 < B2; b2++)
47 for (
int s = 0; s < A2; s++)
49 for (
int a1 = 0; a1 < A1; a1++)
54 C_data[C_layout.ind(a1,b2)] =
55 A_data[A_layout.ind(a1,s)] * B_data[B_layout.ind(s,b2)];
60 C_data[C_layout.ind(a1,b2)] +=
61 A_data[A_layout.ind(a1,s)] * B_data[B_layout.ind(s,b2)];
69 template <
int bA1,
int bA2,
int bB2,
71 typename A_layout_t,
typename A_data_t,
72 typename B_layout_t,
typename B_data_t,
73 typename C_layout_t,
typename C_data_t>
74 MFEM_ALWAYS_INLINE
inline
75 void bMult_AB(
const A_layout_t &A_layout,
const A_data_t &A_data,
76 const B_layout_t &B_layout,
const B_data_t &B_data,
77 const C_layout_t &C_layout, C_data_t &C_data)
79 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
80 C_layout_t::rank == 2,
"invalid ranks");
81 const int A1 = A_layout_t::dim_1;
82 const int A2 = A_layout_t::dim_2;
83 const int B1 = B_layout_t::dim_1;
84 const int B2 = B_layout_t::dim_2;
85 const int C1 = C_layout_t::dim_1;
86 const int C2 = C_layout_t::dim_2;
87 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
88 "invalid dimensions");
90 const int rA1 = A1%bA1;
91 const int rA2 = A2%bA2;
92 const int rB2 = B2%bB2;
94 for (
int b2_b = 0; b2_b < B2/bB2; b2_b++)
99 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
102 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
103 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
104 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
109 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
110 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
111 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
113 for (
int s_b = 1; s_b < A2/bA2; s_b++)
115 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
118 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
119 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
120 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
125 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
126 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
127 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
133 const bool rAdd =
Add || (A2/bA2 > 0);
134 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
137 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
138 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
139 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
144 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
145 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
146 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
155 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
158 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
159 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
160 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
165 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
166 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
167 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
172 for (
int s_b = 1; s_b < A2/bA2; s_b++)
174 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
177 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
178 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
179 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
184 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
185 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
186 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
192 const bool rAdd =
Add || (A2/bA2 > 0);
193 for (
int a1_b = 0; a1_b < A1/bA1; a1_b++)
196 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
197 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
198 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
203 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
204 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
205 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
212 typename A_layout_t,
typename A_data_t,
213 typename B_layout_t,
typename B_data_t,
214 typename C_layout_t,
typename C_data_t>
215 MFEM_ALWAYS_INLINE
inline
216 void Mult_AB(
const A_layout_t &A_layout,
const A_data_t &A_data,
217 const B_layout_t &B_layout,
const B_data_t &B_data,
218 const C_layout_t &C_layout, C_data_t &C_data)
220 const int b = MFEM_TEMPLATE_BLOCK_SIZE;
221 bMult_AB<b,b,b,Add>(A_layout, A_data, B_layout, B_data, C_layout, C_data);
230 template <
int N1,
int N2>
231 struct MatrixOps { };
234 struct MatrixOps<1,1>
237 template <
typename scalar_t,
typename layout_t,
typename data_t>
238 static inline scalar_t Det(
const layout_t &a,
const data_t &A)
240 return A[a.ind(0,0)];
244 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
246 static inline void Det(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
248 const int M = A_layout_t::dim_1;
249 for (
int i = 0; i < M; i++)
251 Assign<Op>(D[i], A[a.ind(i,0,0)]);
256 template <
typename scalar_t,
257 typename A_layout_t,
typename A_data_t,
258 typename B_layout_t,
typename B_data_t>
259 static inline void Adjugate(
const A_layout_t &a,
const A_data_t &A,
260 const B_layout_t &b, B_data_t &B)
262 B[b.ind(0,0)] = scalar_t(1);
266 template <
typename scalar_t,
267 typename A_layout_t,
typename A_data_t,
268 typename B_layout_t,
typename B_data_t>
269 static inline scalar_t AdjDet(
const A_layout_t &a,
const A_data_t &A,
270 const B_layout_t &b, B_data_t &B)
272 Adjugate<scalar_t>(a, A, b, B);
273 return Det<scalar_t>(a, A);
278 struct MatrixOps<2,2>
281 template <
typename scalar_t,
typename layout_t,
typename data_t>
282 static inline scalar_t Det(
const layout_t &a,
const data_t &A)
285 return (A[a.ind(0,0)]*A[a.ind(1,1)] -
286 A[a.ind(1,0)]*A[a.ind(0,1)]);
290 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
292 static inline void Det(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
294 const int M = A_layout_t::dim_1;
296 for (
int i = 0; i < M; i++)
298 Assign<Op>(D[i], (A[a.ind(i,0,0)]*A[a.ind(i,1,1)] -
299 A[a.ind(i,1,0)]*A[a.ind(i,0,1)]));
304 template <
typename scalar_t,
305 typename A_layout_t,
typename A_data_t,
306 typename B_layout_t,
typename B_data_t>
307 static inline void Adjugate(
const A_layout_t &a,
const A_data_t &A,
308 const B_layout_t &b, B_data_t &B)
310 B[b.ind(0,0)] = A[a.ind(1,1)];
311 B[b.ind(0,1)] = -A[a.ind(0,1)];
312 B[b.ind(1,0)] = -A[a.ind(1,0)];
313 B[b.ind(1,1)] = A[a.ind(0,0)];
317 template <
typename scalar_t,
318 typename A_layout_t,
typename A_data_t,
319 typename B_layout_t,
typename B_data_t>
320 static inline scalar_t AdjDet(
const A_layout_t &a,
const A_data_t &A,
321 const B_layout_t &b, B_data_t &B)
323 Adjugate<scalar_t>(a, A, b, B);
324 return Det<scalar_t>(a, A);
327 template <
bool symm>
struct Symm;
331 struct MatrixOps<2,2>::Symm<true>
333 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
334 static inline MFEM_ALWAYS_INLINE
335 void Set(
const A_layout_t &a, A_data_t &A,
336 const scalar_t a11,
const scalar_t a21,
const scalar_t a22)
345 struct MatrixOps<2,2>::Symm<false>
347 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
348 static inline MFEM_ALWAYS_INLINE
349 void Set(
const A_layout_t &a, A_data_t &A,
350 const scalar_t a11,
const scalar_t a21,
const scalar_t a22)
360 struct MatrixOps<3,3>
363 template <
typename scalar_t,
typename layout_t,
typename data_t>
364 static inline scalar_t Det(
const layout_t &a,
const data_t &A)
367 return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
368 A[a.ind(2,1)]*A[a.ind(1,2)]) -
369 A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
370 A[a.ind(2,1)]*A[a.ind(0,2)]) +
371 A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
372 A[a.ind(1,1)]*A[a.ind(0,2)]));
376 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
378 static inline void Det(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
380 const int M = A_layout_t::dim_1;
381 MFEM_FLOPS_ADD(14*M);
382 for (
int i = 0; i < M; i++)
386 A[a.ind(i,0,0)]*(A[a.ind(i,1,1)]*A[a.ind(i,2,2)] -
387 A[a.ind(i,2,1)]*A[a.ind(i,1,2)]) -
388 A[a.ind(i,1,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,2,2)] -
389 A[a.ind(i,2,1)]*A[a.ind(i,0,2)]) +
390 A[a.ind(i,2,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,1,2)] -
391 A[a.ind(i,1,1)]*A[a.ind(i,0,2)]));
396 template <
typename scalar_t,
397 typename A_layout_t,
typename A_data_t,
398 typename B_layout_t,
typename B_data_t>
399 static inline void Adjugate(
const A_layout_t &a,
const A_data_t &A,
400 const B_layout_t &b, B_data_t &B)
403 B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
404 B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
405 B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
406 B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
407 B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
408 B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
409 B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
410 B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
411 B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
415 template <
typename scalar_t,
416 typename A_layout_t,
typename A_data_t,
417 typename B_layout_t,
typename B_data_t>
418 static inline scalar_t AdjDet(
const A_layout_t &a,
const A_data_t &A,
419 const B_layout_t &b, B_data_t &B)
422 Adjugate<scalar_t>(a, A, b, B);
423 return (A[a.ind(0,0)]*B[b.ind(0,0)] +
424 A[a.ind(1,0)]*B[b.ind(0,1)] +
425 A[a.ind(2,0)]*B[b.ind(0,2)]);
428 template <
bool symm>
struct Symm;
432 struct MatrixOps<3,3>::Symm<true>
434 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
435 static inline MFEM_ALWAYS_INLINE
436 void Set(
const A_layout_t &a, A_data_t &A,
437 const scalar_t a11,
const scalar_t a21,
const scalar_t a31,
438 const scalar_t a22,
const scalar_t a32,
const scalar_t a33)
450 struct MatrixOps<3,3>::Symm<false>
452 template <
typename A_layout_t,
typename A_data_t,
typename scalar_t>
453 static inline MFEM_ALWAYS_INLINE
454 void Set(
const A_layout_t &a, A_data_t &A,
455 const scalar_t a11,
const scalar_t a21,
const scalar_t a31,
456 const scalar_t a22,
const scalar_t a32,
const scalar_t a33)
473 template <
typename scalar_t,
typename layout_t,
typename data_t>
474 inline scalar_t
TDet(
const layout_t &a,
const data_t &A)
476 MFEM_STATIC_ASSERT(layout_t::rank == 2,
"invalid rank");
477 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
478 template Det<scalar_t>(a, A);
483 template <
AssignOp::Type Op,
typename A_layout_t,
typename A_data_t,
485 inline void TDet(
const A_layout_t &a,
const A_data_t &A, D_data_t &D)
487 MFEM_STATIC_ASSERT(A_layout_t::rank == 3,
"invalid rank");
488 internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
489 template Det<Op>(a, A, D);
493 template <
typename scalar_t,
494 typename A_layout_t,
typename A_data_t,
495 typename B_layout_t,
typename B_data_t>
496 inline void TAdjugate(
const A_layout_t &a,
const A_data_t &A,
497 const B_layout_t &b, B_data_t &B)
499 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
501 internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
502 template Adjugate<scalar_t>(a, A, b, B);
507 template <
typename scalar_t,
508 typename A_layout_t,
typename A_data_t,
509 typename B_layout_t,
typename B_data_t>
510 inline scalar_t
TAdjDet(
const A_layout_t &a,
const A_data_t &A,
511 const B_layout_t &b, B_data_t &B)
513 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
515 return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
516 template AdjDet<scalar_t>(a, A, b, B);
521 #endif // MFEM_TEMPLATE_MATRIX
scalar_t TDet(const layout_t &a, const data_t &A)
MFEM_ALWAYS_INLINE void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
void Add(const DenseMatrix &A, const DenseMatrix &B, double alpha, DenseMatrix &C)
C = A + alpha*B.
void TAdjugate(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
MFEM_ALWAYS_INLINE void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
MFEM_ALWAYS_INLINE void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)