MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
tmatrix.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_TEMPLATE_MATRIX
13#define MFEM_TEMPLATE_MATRIX
14
15#include "../config/tconfig.hpp"
17
18namespace mfem
19{
20
21// Matrix-matrix products
22
23namespace internal
24{
25
26template <typename T> struct entry_type { typedef typename T::data_type type; };
27
28template <typename T> struct entry_type<T*> { typedef T type; };
29
30} // namespace mfem::internal
31
32
33// C {=|+=} A.B -- simple version (no blocks)
34template <bool Add,
35 typename A_layout_t, typename A_data_t,
36 typename B_layout_t, typename B_data_t,
37 typename C_layout_t, typename C_data_t>
38MFEM_ALWAYS_INLINE inline
39void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
40 const B_layout_t &B_layout, const B_data_t &B_data,
41 const C_layout_t &C_layout, C_data_t &C_data)
42{
43 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
44 C_layout_t::rank == 2, "invalid ranks");
45 const int A1 = A_layout_t::dim_1;
46 const int A2 = A_layout_t::dim_2;
47 const int B1 = B_layout_t::dim_1;
48 const int B2 = B_layout_t::dim_2;
49 const int C1 = C_layout_t::dim_1;
50 const int C2 = C_layout_t::dim_2;
51 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
52 "invalid dimensions");
53
54 MFEM_FLOPS_ADD(Add ? 2*A1*A2*B2 : 2*A1*A2*B2-A1*B2);
55 for (int b2 = 0; b2 < B2; b2++)
56 {
57 for (int a1 = 0; a1 < A1; a1++)
58 {
59 typename internal::entry_type<C_data_t>::type c_a1_b2;
60 if (Add)
61 {
62 // C(a1,b2) += A(a1,0) * B(0,b2);
63 c_a1_b2 = C_data[C_layout.ind(a1,b2)];
64 c_a1_b2.fma(A_data[A_layout.ind(a1,0)], B_data[B_layout.ind(0,b2)]);
65 }
66 else
67 {
68 // C(a1,b2) = A(a1,0) * B(0,b2);
69 c_a1_b2.mul(A_data[A_layout.ind(a1,0)], B_data[B_layout.ind(0,b2)]);
70 }
71 for (int s = 1; s < A2; s++)
72 {
73 // C(a1,b2) += A(a1,s) * B(s,b2);
74 c_a1_b2.fma(A_data[A_layout.ind(a1,s)], B_data[B_layout.ind(s,b2)]);
75 }
76 C_data[C_layout.ind(a1,b2)] = c_a1_b2;
77 }
78 }
79}
80
81// C {=|+=} A.B -- block version
82template <int bA1, int bA2, int bB2, // block sizes
83 bool Add,
84 typename A_layout_t, typename A_data_t,
85 typename B_layout_t, typename B_data_t,
86 typename C_layout_t, typename C_data_t>
87MFEM_ALWAYS_INLINE inline
88void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
89 const B_layout_t &B_layout, const B_data_t &B_data,
90 const C_layout_t &C_layout, C_data_t &C_data)
91{
92 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
93 C_layout_t::rank == 2, "invalid ranks");
94 const int A1 = A_layout_t::dim_1;
95 const int A2 = A_layout_t::dim_2;
96 const int B1 = B_layout_t::dim_1;
97 const int B2 = B_layout_t::dim_2;
98 const int C1 = C_layout_t::dim_1;
99 const int C2 = C_layout_t::dim_2;
100 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
101 "invalid dimensions");
102
103 const int rA1 = A1%bA1;
104 const int rA2 = A2%bA2;
105 const int rB2 = B2%bB2;
106
107 for (int b2_b = 0; b2_b < B2/bB2; b2_b++)
108 {
109 if (A2/bA2 > 0)
110 {
111 // s_b == 0
112 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
113 {
115 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
116 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
117 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
118 }
119 if (rA1)
120 {
122 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
123 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
124 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
125 }
126 for (int s_b = 1; s_b < A2/bA2; s_b++)
127 {
128 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
129 {
131 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
132 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
133 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
134 }
135 if (rA1)
136 {
138 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
139 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
140 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
141 }
142 }
143 }
144 if (rA2)
145 {
146 const bool rAdd = Add || (A2/bA2 > 0);
147 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
148 {
150 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
151 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
152 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
153 }
154 if (rA1)
155 {
157 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
158 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
159 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
160 }
161 }
162 }
163 if (rB2)
164 {
165 if (A2/bA2 > 0)
166 {
167 // s_b == 0
168 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
169 {
171 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
172 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
173 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
174 }
175 if (rA1)
176 {
178 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
179 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
180 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
181 }
182 }
183 if (A2/bA2 > 1)
184 {
185 for (int s_b = 1; s_b < A2/bA2; s_b++)
186 {
187 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
188 {
190 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
191 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
192 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
193 }
194 if (rA1)
195 {
197 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
198 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
199 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
200 }
201 }
202 }
203 if (rA2)
204 {
205 const bool rAdd = Add || (A2/bA2 > 0);
206 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
207 {
209 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
210 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
211 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
212 }
213 if (rA1)
214 {
216 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
217 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
218 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
219 }
220 }
221 }
222}
223
224template <bool Add,
225 typename A_layout_t, typename A_data_t,
226 typename B_layout_t, typename B_data_t,
227 typename C_layout_t, typename C_data_t>
228MFEM_ALWAYS_INLINE inline
229void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
230 const B_layout_t &B_layout, const B_data_t &B_data,
231 const C_layout_t &C_layout, C_data_t &C_data)
232{
233 const int b = MFEM_TEMPLATE_BLOCK_SIZE;
234 bMult_AB<b,b,b,Add>(A_layout, A_data, B_layout, B_data, C_layout, C_data);
235}
236
237
238// Small matrix operations (determinant, adjugate,...) defined by specialization
239
240namespace internal
241{
242
243template <int N1, int N2>
244struct MatrixOps { };
245
246template <>
247struct MatrixOps<1,1>
248{
249 // Compute det(A).
250 template <typename scalar_t, typename layout_t, typename data_t>
251 static inline scalar_t Det(const layout_t &a, const data_t &A)
252 {
253 return A[a.ind(0,0)];
254 }
255
256 // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
257 template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
258 typename D_data_t>
259 static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
260 {
261 const int M = A_layout_t::dim_1;
262 for (int i = 0; i < M; i++)
263 {
264 Assign<Op>(D[i], A[a.ind(i,0,0)]);
265 }
266 }
267
268 // Compute B = adj(A).
269 template <typename scalar_t,
270 typename A_layout_t, typename A_data_t,
271 typename B_layout_t, typename B_data_t>
272 static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
273 const B_layout_t &b, B_data_t &B)
274 {
275 B[b.ind(0,0)] = scalar_t(1);
276 }
277
278 // Compute adj(A) and det(A).
279 template <typename scalar_t,
280 typename A_layout_t, typename A_data_t,
281 typename B_layout_t, typename B_data_t>
282 static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
283 const B_layout_t &b, B_data_t &B)
284 {
285 Adjugate<scalar_t>(a, A, b, B);
286 return Det<scalar_t>(a, A);
287 }
288};
289
290template <>
291struct MatrixOps<2,2>
292{
293 // Compute det(A).
294 template <typename scalar_t, typename layout_t, typename data_t>
295 static inline scalar_t Det(const layout_t &a, const data_t &A)
296 {
297 MFEM_FLOPS_ADD(3);
298 return (A[a.ind(0,0)]*A[a.ind(1,1)] -
299 A[a.ind(1,0)]*A[a.ind(0,1)]);
300 }
301
302 // Compute det(A), host+device version.
303 template <typename scalar_t, typename layout_t, typename data_t>
304 MFEM_HOST_DEVICE
305 static inline scalar_t DetHD(const layout_t &a, const data_t &A)
306 {
307 MFEM_FLOPS_ADD(3);
308 return (A[a.ind(0,0)]*A[a.ind(1,1)] -
309 A[a.ind(1,0)]*A[a.ind(0,1)]);
310 }
311
312 // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
313 template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
314 typename D_data_t>
315 static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
316 {
317 const int M = A_layout_t::dim_1;
318 MFEM_FLOPS_ADD(3*M);
319 for (int i = 0; i < M; i++)
320 {
321 Assign<Op>(D[i], (A[a.ind(i,0,0)]*A[a.ind(i,1,1)] -
322 A[a.ind(i,1,0)]*A[a.ind(i,0,1)]));
323 }
324 }
325
326 // Compute B = adj(A).
327 template <typename scalar_t,
328 typename A_layout_t, typename A_data_t,
329 typename B_layout_t, typename B_data_t>
330 static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
331 const B_layout_t &b, B_data_t &B)
332 {
333 B[b.ind(0,0)] = A[a.ind(1,1)];
334 B[b.ind(0,1)] = -A[a.ind(0,1)];
335 B[b.ind(1,0)] = -A[a.ind(1,0)];
336 B[b.ind(1,1)] = A[a.ind(0,0)];
337 }
338
339 // Compute B = adj(A), host+device version.
340 template <typename scalar_t,
341 typename A_layout_t, typename A_data_t,
342 typename B_layout_t, typename B_data_t>
343 MFEM_HOST_DEVICE
344 static inline void AdjugateHD(const A_layout_t &a, const A_data_t &A,
345 const B_layout_t &b, B_data_t &B)
346 {
347 B[b.ind(0,0)] = A[a.ind(1,1)];
348 B[b.ind(0,1)] = -A[a.ind(0,1)];
349 B[b.ind(1,0)] = -A[a.ind(1,0)];
350 B[b.ind(1,1)] = A[a.ind(0,0)];
351 }
352
353 // Compute adj(A) and det(A).
354 template <typename scalar_t,
355 typename A_layout_t, typename A_data_t,
356 typename B_layout_t, typename B_data_t>
357 static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
358 const B_layout_t &b, B_data_t &B)
359 {
360 Adjugate<scalar_t>(a, A, b, B);
361 return Det<scalar_t>(a, A);
362 }
363
364 // Compute adj(A) and det(A), host+device version.
365 template <typename scalar_t,
366 typename A_layout_t, typename A_data_t,
367 typename B_layout_t, typename B_data_t>
368 MFEM_HOST_DEVICE
369 static inline scalar_t AdjDetHD(const A_layout_t &a, const A_data_t &A,
370 const B_layout_t &b, B_data_t &B)
371 {
372 AdjugateHD<scalar_t>(a, A, b, B);
373 return DetHD<scalar_t>(a, A);
374 }
375
376 template <bool symm> struct Symm;
377};
378
379template <>
380struct MatrixOps<2,2>::Symm<true>
381{
382 template <typename A_layout_t, typename A_data_t, typename scalar_t>
383 static inline MFEM_ALWAYS_INLINE
384 void Set(const A_layout_t &a, A_data_t &A,
385 const scalar_t a11, const scalar_t a21, const scalar_t a22)
386 {
387 A[a.ind(0)] = a11;
388 A[a.ind(1)] = a21;
389 A[a.ind(2)] = a22;
390 }
391};
392
393template <>
394struct MatrixOps<2,2>::Symm<false>
395{
396 template <typename A_layout_t, typename A_data_t, typename scalar_t>
397 static inline MFEM_ALWAYS_INLINE
398 void Set(const A_layout_t &a, A_data_t &A,
399 const scalar_t a11, const scalar_t a21, const scalar_t a22)
400 {
401 A[a.ind(0,0)] = a11;
402 A[a.ind(1,0)] = a21;
403 A[a.ind(0,1)] = a21;
404 A[a.ind(1,1)] = a22;
405 }
406};
407
408template <>
409struct MatrixOps<3,3>
410{
411 // Compute det(A).
412 template <typename scalar_t, typename layout_t, typename data_t>
413 static inline scalar_t Det(const layout_t &a, const data_t &A)
414 {
415 MFEM_FLOPS_ADD(14);
416 return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
417 A[a.ind(2,1)]*A[a.ind(1,2)]) -
418 A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
419 A[a.ind(2,1)]*A[a.ind(0,2)]) +
420 A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
421 A[a.ind(1,1)]*A[a.ind(0,2)]));
422 }
423
424 // Compute det(A), host+device version.
425 template <typename scalar_t, typename layout_t, typename data_t>
426 MFEM_HOST_DEVICE
427 static inline scalar_t DetHD(const layout_t &a, const data_t &A)
428 {
429 MFEM_FLOPS_ADD(14);
430 return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
431 A[a.ind(2,1)]*A[a.ind(1,2)]) -
432 A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
433 A[a.ind(2,1)]*A[a.ind(0,2)]) +
434 A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
435 A[a.ind(1,1)]*A[a.ind(0,2)]));
436 }
437
438 // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
439 template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
440 typename D_data_t>
441 static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
442 {
443 const int M = A_layout_t::dim_1;
444 MFEM_FLOPS_ADD(14*M);
445 for (int i = 0; i < M; i++)
446 {
448 D[i],
449 A[a.ind(i,0,0)]*(A[a.ind(i,1,1)]*A[a.ind(i,2,2)] -
450 A[a.ind(i,2,1)]*A[a.ind(i,1,2)]) -
451 A[a.ind(i,1,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,2,2)] -
452 A[a.ind(i,2,1)]*A[a.ind(i,0,2)]) +
453 A[a.ind(i,2,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,1,2)] -
454 A[a.ind(i,1,1)]*A[a.ind(i,0,2)]));
455 }
456 }
457
458 // Compute B = adj(A).
459 template <typename scalar_t,
460 typename A_layout_t, typename A_data_t,
461 typename B_layout_t, typename B_data_t>
462 static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
463 const B_layout_t &b, B_data_t &B)
464 {
465 MFEM_FLOPS_ADD(27);
466 B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
467 B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
468 B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
469 B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
470 B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
471 B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
472 B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
473 B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
474 B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
475 }
476
477 // Compute B = adj(A), host+device version.
478 template <typename scalar_t,
479 typename A_layout_t, typename A_data_t,
480 typename B_layout_t, typename B_data_t>
481 MFEM_HOST_DEVICE
482 static inline void AdjugateHD(const A_layout_t &a, const A_data_t &A,
483 const B_layout_t &b, B_data_t &B)
484 {
485 MFEM_FLOPS_ADD(27);
486 B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
487 B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
488 B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
489 B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
490 B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
491 B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
492 B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
493 B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
494 B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
495 }
496
497 // Compute adj(A) and det(A).
498 template <typename scalar_t,
499 typename A_layout_t, typename A_data_t,
500 typename B_layout_t, typename B_data_t>
501 static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
502 const B_layout_t &b, B_data_t &B)
503 {
504 MFEM_FLOPS_ADD(5);
505 Adjugate<scalar_t>(a, A, b, B);
506 return (A[a.ind(0,0)]*B[b.ind(0,0)] +
507 A[a.ind(1,0)]*B[b.ind(0,1)] +
508 A[a.ind(2,0)]*B[b.ind(0,2)]);
509 }
510
511 // Compute adj(A) and det(A), host+device version.
512 template <typename scalar_t,
513 typename A_layout_t, typename A_data_t,
514 typename B_layout_t, typename B_data_t>
515 MFEM_HOST_DEVICE
516 static inline scalar_t AdjDetHD(const A_layout_t &a, const A_data_t &A,
517 const B_layout_t &b, B_data_t &B)
518 {
519 MFEM_FLOPS_ADD(5);
520 AdjugateHD<scalar_t>(a, A, b, B);
521 return (A[a.ind(0,0)]*B[b.ind(0,0)] +
522 A[a.ind(1,0)]*B[b.ind(0,1)] +
523 A[a.ind(2,0)]*B[b.ind(0,2)]);
524 }
525
526 template <bool symm> struct Symm;
527};
528
529template <>
530struct MatrixOps<3,3>::Symm<true>
531{
532 template <typename A_layout_t, typename A_data_t, typename scalar_t>
533 static inline MFEM_ALWAYS_INLINE
534 void Set(const A_layout_t &a, A_data_t &A,
535 const scalar_t a11, const scalar_t a21, const scalar_t a31,
536 const scalar_t a22, const scalar_t a32, const scalar_t a33)
537 {
538 A[a.ind(0)] = a11;
539 A[a.ind(1)] = a21;
540 A[a.ind(2)] = a31;
541 A[a.ind(3)] = a22;
542 A[a.ind(4)] = a32;
543 A[a.ind(5)] = a33;
544 }
545};
546
547template <>
548struct MatrixOps<3,3>::Symm<false>
549{
550 template <typename A_layout_t, typename A_data_t, typename scalar_t>
551 static inline MFEM_ALWAYS_INLINE
552 void Set(const A_layout_t &a, A_data_t &A,
553 const scalar_t a11, const scalar_t a21, const scalar_t a31,
554 const scalar_t a22, const scalar_t a32, const scalar_t a33)
555 {
556 A[a.ind(0,0)] = a11;
557 A[a.ind(1,0)] = a21;
558 A[a.ind(2,0)] = a31;
559 A[a.ind(0,1)] = a21;
560 A[a.ind(1,1)] = a22;
561 A[a.ind(2,1)] = a32;
562 A[a.ind(0,2)] = a31;
563 A[a.ind(1,2)] = a32;
564 A[a.ind(2,2)] = a33;
565 }
566};
567
568} // namespace mfem::internal
569
570// Compute the determinant of a (small) matrix: det(A).
571template <typename scalar_t, typename layout_t, typename data_t>
572inline scalar_t TDet(const layout_t &a, const data_t &A)
573{
574 MFEM_STATIC_ASSERT(layout_t::rank == 2, "invalid rank");
575#if !defined(__xlC__) || (__xlC__ >= 0x0d00)
576 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
577 template Det<scalar_t>(a, A);
578#else
579 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
580 Det<scalar_t>(a, A);
581#endif
582}
583
584// Compute the determinant of a (small) matrix: det(A). Host+device version.
585template <typename scalar_t, typename layout_t, typename data_t>
586MFEM_HOST_DEVICE
587inline scalar_t TDetHD(const layout_t &a, const data_t &A)
588{
589 MFEM_STATIC_ASSERT(layout_t::rank == 2, "invalid rank");
590#if !defined(__xlC__) || (__xlC__ >= 0x0d00)
591 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
592 template DetHD<scalar_t>(a, A);
593#else
594 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
595 DetHD<scalar_t>(a, A);
596#endif
597}
598
599// Compute the determinants of a set of (small) matrices: D[i] = det(A[i,*,*]).
600// The layout of A is (M x N1 x N2) and the size of D is M.
601template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
602 typename D_data_t>
603inline void TDet(const A_layout_t &a, const A_data_t &A, D_data_t &D)
604{
605 MFEM_STATIC_ASSERT(A_layout_t::rank == 3, "invalid rank");
606#if !defined(__xlC__) || (__xlC__ >= 0x0d00)
607 internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
608 template Det<Op>(a, A, D);
609#else
610 internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
611 Det<Op>(a, A, D);
612#endif
613}
614
615// Compute the adjugate matrix of a (small) matrix: B = adj(A).
616template <typename scalar_t,
617 typename A_layout_t, typename A_data_t,
618 typename B_layout_t, typename B_data_t>
619inline void TAdjugate(const A_layout_t &a, const A_data_t &A,
620 const B_layout_t &b, B_data_t &B)
621{
622 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
623 "invalid ranks");
624 internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
625 template Adjugate<scalar_t>(a, A, b, B);
626}
627
628// Compute the adjugate matrix of a (small) matrix: B = adj(A).
629// Host+device version.
630template <typename scalar_t,
631 typename A_layout_t, typename A_data_t,
632 typename B_layout_t, typename B_data_t>
633MFEM_HOST_DEVICE
634inline void TAdjugateHD(const A_layout_t &a, const A_data_t &A,
635 const B_layout_t &b, B_data_t &B)
636{
637 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
638 "invalid ranks");
639 internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
640 template AdjugateHD<scalar_t>(a, A, b, B);
641}
642
643// Compute the adjugate and the determinant of a (small) matrix: B = adj(A),
644// return det(A).
645template <typename scalar_t,
646 typename A_layout_t, typename A_data_t,
647 typename B_layout_t, typename B_data_t>
648inline scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A,
649 const B_layout_t &b, B_data_t &B)
650{
651 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
652 "invalid ranks");
653 return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
654 template AdjDet<scalar_t>(a, A, b, B);
655}
656
657// Compute the adjugate and the determinant of a (small) matrix: B = adj(A),
658// return det(A). Host+device version.
659template <typename scalar_t,
660 typename A_layout_t, typename A_data_t,
661 typename B_layout_t, typename B_data_t>
662MFEM_HOST_DEVICE
663inline scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A,
664 const B_layout_t &b, B_data_t &B)
665{
666 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
667 "invalid ranks");
668 return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
669 template AdjDetHD<scalar_t>(a, A, b, B);
670}
671
672} // namespace mfem
673
674#endif // MFEM_TEMPLATE_MATRIX
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
MFEM_HOST_DEVICE constexpr auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
Definition tuple.hpp:347
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
Definition kernels.hpp:386
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition kernels.hpp:297
MFEM_HOST_DEVICE scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:663
MFEM_ALWAYS_INLINE void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition tmatrix.hpp:39
lvalue_t & Assign(lvalue_t &a, const rvalue_t &b)
Definition tassign.hpp:137
scalar_t TDet(const layout_t &a, const data_t &A)
Definition tmatrix.hpp:572
MFEM_ALWAYS_INLINE void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition tmatrix.hpp:229
MFEM_HOST_DEVICE scalar_t TDetHD(const layout_t &a, const data_t &A)
Definition tmatrix.hpp:587
void TAdjugate(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:619
scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:648
MFEM_ALWAYS_INLINE void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition tmatrix.hpp:88
MFEM_HOST_DEVICE void TAdjugateHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:634
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.