MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
tmatrix.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_TEMPLATE_MATRIX
13#define MFEM_TEMPLATE_MATRIX
14
15#include "../config/tconfig.hpp"
18
19namespace mfem
20{
21
22// Matrix-matrix products
23
24namespace internal
25{
26
27template <typename T> struct entry_type { typedef typename T::data_type type; };
28
29template <typename T> struct entry_type<T*> { typedef T type; };
30
31} // namespace mfem::internal
32
33
34// C {=|+=} A.B -- simple version (no blocks)
35template <bool Add,
36 typename A_layout_t, typename A_data_t,
37 typename B_layout_t, typename B_data_t,
38 typename C_layout_t, typename C_data_t>
39MFEM_ALWAYS_INLINE inline
40void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
41 const B_layout_t &B_layout, const B_data_t &B_data,
42 const C_layout_t &C_layout, C_data_t &C_data)
43{
44 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
45 C_layout_t::rank == 2, "invalid ranks");
46 const int A1 = A_layout_t::dim_1;
47 const int A2 = A_layout_t::dim_2;
48 const int B1 = B_layout_t::dim_1;
49 const int B2 = B_layout_t::dim_2;
50 const int C1 = C_layout_t::dim_1;
51 const int C2 = C_layout_t::dim_2;
52 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
53 "invalid dimensions");
54
55 MFEM_FLOPS_ADD(Add ? 2*A1*A2*B2 : 2*A1*A2*B2-A1*B2);
56 for (int b2 = 0; b2 < B2; b2++)
57 {
58 for (int a1 = 0; a1 < A1; a1++)
59 {
60 typename internal::entry_type<C_data_t>::type c_a1_b2;
61 if (Add)
62 {
63 // C(a1,b2) += A(a1,0) * B(0,b2);
64 c_a1_b2 = C_data[C_layout.ind(a1,b2)];
65 c_a1_b2.fma(A_data[A_layout.ind(a1,0)], B_data[B_layout.ind(0,b2)]);
66 }
67 else
68 {
69 // C(a1,b2) = A(a1,0) * B(0,b2);
70 c_a1_b2.mul(A_data[A_layout.ind(a1,0)], B_data[B_layout.ind(0,b2)]);
71 }
72 for (int s = 1; s < A2; s++)
73 {
74 // C(a1,b2) += A(a1,s) * B(s,b2);
75 c_a1_b2.fma(A_data[A_layout.ind(a1,s)], B_data[B_layout.ind(s,b2)]);
76 }
77 C_data[C_layout.ind(a1,b2)] = c_a1_b2;
78 }
79 }
80}
81
82// C {=|+=} A.B -- block version
83template <int bA1, int bA2, int bB2, // block sizes
84 bool Add,
85 typename A_layout_t, typename A_data_t,
86 typename B_layout_t, typename B_data_t,
87 typename C_layout_t, typename C_data_t>
88MFEM_ALWAYS_INLINE inline
89void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
90 const B_layout_t &B_layout, const B_data_t &B_data,
91 const C_layout_t &C_layout, C_data_t &C_data)
92{
93 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
94 C_layout_t::rank == 2, "invalid ranks");
95 const int A1 = A_layout_t::dim_1;
96 const int A2 = A_layout_t::dim_2;
97 const int B1 = B_layout_t::dim_1;
98 const int B2 = B_layout_t::dim_2;
99 const int C1 = C_layout_t::dim_1;
100 const int C2 = C_layout_t::dim_2;
101 MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
102 "invalid dimensions");
103
104 const int rA1 = A1%bA1;
105 const int rA2 = A2%bA2;
106 const int rB2 = B2%bB2;
107
108 for (int b2_b = 0; b2_b < B2/bB2; b2_b++)
109 {
110 if (A2/bA2 > 0)
111 {
112 // s_b == 0
113 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
114 {
116 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
117 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
118 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
119 }
120 if (rA1)
121 {
123 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
124 B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
125 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
126 }
127 for (int s_b = 1; s_b < A2/bA2; s_b++)
128 {
129 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
130 {
132 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
133 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
134 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
135 }
136 if (rA1)
137 {
139 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
140 B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
141 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
142 }
143 }
144 }
145 if (rA2)
146 {
147 const bool rAdd = Add || (A2/bA2 > 0);
148 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
149 {
151 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
152 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
153 C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
154 }
155 if (rA1)
156 {
158 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
159 B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
160 C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
161 }
162 }
163 }
164 if (rB2)
165 {
166 if (A2/bA2 > 0)
167 {
168 // s_b == 0
169 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
170 {
172 A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
173 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
174 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
175 }
176 if (rA1)
177 {
179 A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
180 B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
181 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
182 }
183 }
184 if (A2/bA2 > 1)
185 {
186 for (int s_b = 1; s_b < A2/bA2; s_b++)
187 {
188 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
189 {
191 A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
192 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
193 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
194 }
195 if (rA1)
196 {
198 A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
199 B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
200 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
201 }
202 }
203 }
204 if (rA2)
205 {
206 const bool rAdd = Add || (A2/bA2 > 0);
207 for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
208 {
210 A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
211 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
212 C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
213 }
214 if (rA1)
215 {
217 A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
218 B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
219 C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
220 }
221 }
222 }
223}
224
225template <bool Add,
226 typename A_layout_t, typename A_data_t,
227 typename B_layout_t, typename B_data_t,
228 typename C_layout_t, typename C_data_t>
229MFEM_ALWAYS_INLINE inline
230void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
231 const B_layout_t &B_layout, const B_data_t &B_data,
232 const C_layout_t &C_layout, C_data_t &C_data)
233{
234 const int b = MFEM_TEMPLATE_BLOCK_SIZE;
235 bMult_AB<b,b,b,Add>(A_layout, A_data, B_layout, B_data, C_layout, C_data);
236}
237
238
239// Small matrix operations (determinant, adjugate,...) defined by specialization
240
241namespace internal
242{
243
244template <int N1, int N2>
245struct MatrixOps { };
246
247template <>
248struct MatrixOps<1,1>
249{
250 // Compute det(A).
251 template <typename scalar_t, typename layout_t, typename data_t>
252 static inline scalar_t Det(const layout_t &a, const data_t &A)
253 {
254 return A[a.ind(0,0)];
255 }
256
257 // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
258 template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
259 typename D_data_t>
260 static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
261 {
262 const int M = A_layout_t::dim_1;
263 for (int i = 0; i < M; i++)
264 {
265 Assign<Op>(D[i], A[a.ind(i,0,0)]);
266 }
267 }
268
269 // Compute B = adj(A).
270 template <typename scalar_t,
271 typename A_layout_t, typename A_data_t,
272 typename B_layout_t, typename B_data_t>
273 static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
274 const B_layout_t &b, B_data_t &B)
275 {
276 B[b.ind(0,0)] = scalar_t(1);
277 }
278
279 // Compute adj(A) and det(A).
280 template <typename scalar_t,
281 typename A_layout_t, typename A_data_t,
282 typename B_layout_t, typename B_data_t>
283 static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
284 const B_layout_t &b, B_data_t &B)
285 {
286 Adjugate<scalar_t>(a, A, b, B);
287 return Det<scalar_t>(a, A);
288 }
289};
290
291template <>
292struct MatrixOps<2,2>
293{
294 // Compute det(A).
295 template <typename scalar_t, typename layout_t, typename data_t>
296 static inline scalar_t Det(const layout_t &a, const data_t &A)
297 {
298 MFEM_FLOPS_ADD(3);
299 return (A[a.ind(0,0)]*A[a.ind(1,1)] -
300 A[a.ind(1,0)]*A[a.ind(0,1)]);
301 }
302
303 // Compute det(A), host+device version.
304 template <typename scalar_t, typename layout_t, typename data_t>
305 MFEM_HOST_DEVICE
306 static inline scalar_t DetHD(const layout_t &a, const data_t &A)
307 {
308 MFEM_FLOPS_ADD(3);
309 return (A[a.ind(0,0)]*A[a.ind(1,1)] -
310 A[a.ind(1,0)]*A[a.ind(0,1)]);
311 }
312
313 // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
314 template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
315 typename D_data_t>
316 static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
317 {
318 const int M = A_layout_t::dim_1;
319 MFEM_FLOPS_ADD(3*M);
320 for (int i = 0; i < M; i++)
321 {
322 Assign<Op>(D[i], (A[a.ind(i,0,0)]*A[a.ind(i,1,1)] -
323 A[a.ind(i,1,0)]*A[a.ind(i,0,1)]));
324 }
325 }
326
327 // Compute B = adj(A).
328 template <typename scalar_t,
329 typename A_layout_t, typename A_data_t,
330 typename B_layout_t, typename B_data_t>
331 static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
332 const B_layout_t &b, B_data_t &B)
333 {
334 B[b.ind(0,0)] = A[a.ind(1,1)];
335 B[b.ind(0,1)] = -A[a.ind(0,1)];
336 B[b.ind(1,0)] = -A[a.ind(1,0)];
337 B[b.ind(1,1)] = A[a.ind(0,0)];
338 }
339
340 // Compute B = adj(A), host+device version.
341 template <typename scalar_t,
342 typename A_layout_t, typename A_data_t,
343 typename B_layout_t, typename B_data_t>
344 MFEM_HOST_DEVICE
345 static inline void AdjugateHD(const A_layout_t &a, const A_data_t &A,
346 const B_layout_t &b, B_data_t &B)
347 {
348 B[b.ind(0,0)] = A[a.ind(1,1)];
349 B[b.ind(0,1)] = -A[a.ind(0,1)];
350 B[b.ind(1,0)] = -A[a.ind(1,0)];
351 B[b.ind(1,1)] = A[a.ind(0,0)];
352 }
353
354 // Compute adj(A) and det(A).
355 template <typename scalar_t,
356 typename A_layout_t, typename A_data_t,
357 typename B_layout_t, typename B_data_t>
358 static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
359 const B_layout_t &b, B_data_t &B)
360 {
361 Adjugate<scalar_t>(a, A, b, B);
362 return Det<scalar_t>(a, A);
363 }
364
365 // Compute adj(A) and det(A), host+device version.
366 template <typename scalar_t,
367 typename A_layout_t, typename A_data_t,
368 typename B_layout_t, typename B_data_t>
369 MFEM_HOST_DEVICE
370 static inline scalar_t AdjDetHD(const A_layout_t &a, const A_data_t &A,
371 const B_layout_t &b, B_data_t &B)
372 {
373 AdjugateHD<scalar_t>(a, A, b, B);
374 return DetHD<scalar_t>(a, A);
375 }
376
377 template <bool symm> struct Symm;
378};
379
380template <>
381struct MatrixOps<2,2>::Symm<true>
382{
383 template <typename A_layout_t, typename A_data_t, typename scalar_t>
384 static inline MFEM_ALWAYS_INLINE
385 void Set(const A_layout_t &a, A_data_t &A,
386 const scalar_t a11, const scalar_t a21, const scalar_t a22)
387 {
388 A[a.ind(0)] = a11;
389 A[a.ind(1)] = a21;
390 A[a.ind(2)] = a22;
391 }
392};
393
394template <>
395struct MatrixOps<2,2>::Symm<false>
396{
397 template <typename A_layout_t, typename A_data_t, typename scalar_t>
398 static inline MFEM_ALWAYS_INLINE
399 void Set(const A_layout_t &a, A_data_t &A,
400 const scalar_t a11, const scalar_t a21, const scalar_t a22)
401 {
402 A[a.ind(0,0)] = a11;
403 A[a.ind(1,0)] = a21;
404 A[a.ind(0,1)] = a21;
405 A[a.ind(1,1)] = a22;
406 }
407};
408
409template <>
410struct MatrixOps<3,3>
411{
412 // Compute det(A).
413 template <typename scalar_t, typename layout_t, typename data_t>
414 static inline scalar_t Det(const layout_t &a, const data_t &A)
415 {
416 MFEM_FLOPS_ADD(14);
417 return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
418 A[a.ind(2,1)]*A[a.ind(1,2)]) -
419 A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
420 A[a.ind(2,1)]*A[a.ind(0,2)]) +
421 A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
422 A[a.ind(1,1)]*A[a.ind(0,2)]));
423 }
424
425 // Compute det(A), host+device version.
426 template <typename scalar_t, typename layout_t, typename data_t>
427 MFEM_HOST_DEVICE
428 static inline scalar_t DetHD(const layout_t &a, const data_t &A)
429 {
430 MFEM_FLOPS_ADD(14);
431 return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
432 A[a.ind(2,1)]*A[a.ind(1,2)]) -
433 A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
434 A[a.ind(2,1)]*A[a.ind(0,2)]) +
435 A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
436 A[a.ind(1,1)]*A[a.ind(0,2)]));
437 }
438
439 // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
440 template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
441 typename D_data_t>
442 static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
443 {
444 const int M = A_layout_t::dim_1;
445 MFEM_FLOPS_ADD(14*M);
446 for (int i = 0; i < M; i++)
447 {
449 D[i],
450 A[a.ind(i,0,0)]*(A[a.ind(i,1,1)]*A[a.ind(i,2,2)] -
451 A[a.ind(i,2,1)]*A[a.ind(i,1,2)]) -
452 A[a.ind(i,1,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,2,2)] -
453 A[a.ind(i,2,1)]*A[a.ind(i,0,2)]) +
454 A[a.ind(i,2,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,1,2)] -
455 A[a.ind(i,1,1)]*A[a.ind(i,0,2)]));
456 }
457 }
458
459 // Compute B = adj(A).
460 template <typename scalar_t,
461 typename A_layout_t, typename A_data_t,
462 typename B_layout_t, typename B_data_t>
463 static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
464 const B_layout_t &b, B_data_t &B)
465 {
466 MFEM_FLOPS_ADD(27);
467 B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
468 B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
469 B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
470 B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
471 B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
472 B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
473 B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
474 B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
475 B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
476 }
477
478 // Compute B = adj(A), host+device version.
479 template <typename scalar_t,
480 typename A_layout_t, typename A_data_t,
481 typename B_layout_t, typename B_data_t>
482 MFEM_HOST_DEVICE
483 static inline void AdjugateHD(const A_layout_t &a, const A_data_t &A,
484 const B_layout_t &b, B_data_t &B)
485 {
486 MFEM_FLOPS_ADD(27);
487 B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
488 B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
489 B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
490 B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
491 B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
492 B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
493 B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
494 B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
495 B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
496 }
497
498 // Compute adj(A) and det(A).
499 template <typename scalar_t,
500 typename A_layout_t, typename A_data_t,
501 typename B_layout_t, typename B_data_t>
502 static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
503 const B_layout_t &b, B_data_t &B)
504 {
505 MFEM_FLOPS_ADD(5);
506 Adjugate<scalar_t>(a, A, b, B);
507 return (A[a.ind(0,0)]*B[b.ind(0,0)] +
508 A[a.ind(1,0)]*B[b.ind(0,1)] +
509 A[a.ind(2,0)]*B[b.ind(0,2)]);
510 }
511
512 // Compute adj(A) and det(A), host+device version.
513 template <typename scalar_t,
514 typename A_layout_t, typename A_data_t,
515 typename B_layout_t, typename B_data_t>
516 MFEM_HOST_DEVICE
517 static inline scalar_t AdjDetHD(const A_layout_t &a, const A_data_t &A,
518 const B_layout_t &b, B_data_t &B)
519 {
520 MFEM_FLOPS_ADD(5);
521 AdjugateHD<scalar_t>(a, A, b, B);
522 return (A[a.ind(0,0)]*B[b.ind(0,0)] +
523 A[a.ind(1,0)]*B[b.ind(0,1)] +
524 A[a.ind(2,0)]*B[b.ind(0,2)]);
525 }
526
527 template <bool symm> struct Symm;
528};
529
530template <>
531struct MatrixOps<3,3>::Symm<true>
532{
533 template <typename A_layout_t, typename A_data_t, typename scalar_t>
534 static inline MFEM_ALWAYS_INLINE
535 void Set(const A_layout_t &a, A_data_t &A,
536 const scalar_t a11, const scalar_t a21, const scalar_t a31,
537 const scalar_t a22, const scalar_t a32, const scalar_t a33)
538 {
539 A[a.ind(0)] = a11;
540 A[a.ind(1)] = a21;
541 A[a.ind(2)] = a31;
542 A[a.ind(3)] = a22;
543 A[a.ind(4)] = a32;
544 A[a.ind(5)] = a33;
545 }
546};
547
548template <>
549struct MatrixOps<3,3>::Symm<false>
550{
551 template <typename A_layout_t, typename A_data_t, typename scalar_t>
552 static inline MFEM_ALWAYS_INLINE
553 void Set(const A_layout_t &a, A_data_t &A,
554 const scalar_t a11, const scalar_t a21, const scalar_t a31,
555 const scalar_t a22, const scalar_t a32, const scalar_t a33)
556 {
557 A[a.ind(0,0)] = a11;
558 A[a.ind(1,0)] = a21;
559 A[a.ind(2,0)] = a31;
560 A[a.ind(0,1)] = a21;
561 A[a.ind(1,1)] = a22;
562 A[a.ind(2,1)] = a32;
563 A[a.ind(0,2)] = a31;
564 A[a.ind(1,2)] = a32;
565 A[a.ind(2,2)] = a33;
566 }
567};
568
569} // namespace mfem::internal
570
571// Compute the determinant of a (small) matrix: det(A).
572template <typename scalar_t, typename layout_t, typename data_t>
573inline scalar_t TDet(const layout_t &a, const data_t &A)
574{
575 MFEM_STATIC_ASSERT(layout_t::rank == 2, "invalid rank");
576#if !defined(__xlC__) || (__xlC__ >= 0x0d00)
577 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
578 template Det<scalar_t>(a, A);
579#else
580 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
581 Det<scalar_t>(a, A);
582#endif
583}
584
585// Compute the determinant of a (small) matrix: det(A). Host+device version.
586template <typename scalar_t, typename layout_t, typename data_t>
587MFEM_HOST_DEVICE
588inline scalar_t TDetHD(const layout_t &a, const data_t &A)
589{
590 MFEM_STATIC_ASSERT(layout_t::rank == 2, "invalid rank");
591#if !defined(__xlC__) || (__xlC__ >= 0x0d00)
592 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
593 template DetHD<scalar_t>(a, A);
594#else
595 return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
596 DetHD<scalar_t>(a, A);
597#endif
598}
599
600// Compute the determinants of a set of (small) matrices: D[i] = det(A[i,*,*]).
601// The layout of A is (M x N1 x N2) and the size of D is M.
602template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
603 typename D_data_t>
604inline void TDet(const A_layout_t &a, const A_data_t &A, D_data_t &D)
605{
606 MFEM_STATIC_ASSERT(A_layout_t::rank == 3, "invalid rank");
607#if !defined(__xlC__) || (__xlC__ >= 0x0d00)
608 internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
609 template Det<Op>(a, A, D);
610#else
611 internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
612 Det<Op>(a, A, D);
613#endif
614}
615
616// Compute the adjugate matrix of a (small) matrix: B = adj(A).
617template <typename scalar_t,
618 typename A_layout_t, typename A_data_t,
619 typename B_layout_t, typename B_data_t>
620inline void TAdjugate(const A_layout_t &a, const A_data_t &A,
621 const B_layout_t &b, B_data_t &B)
622{
623 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
624 "invalid ranks");
625 internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
626 template Adjugate<scalar_t>(a, A, b, B);
627}
628
629// Compute the adjugate matrix of a (small) matrix: B = adj(A).
630// Host+device version.
631template <typename scalar_t,
632 typename A_layout_t, typename A_data_t,
633 typename B_layout_t, typename B_data_t>
634MFEM_HOST_DEVICE
635inline void TAdjugateHD(const A_layout_t &a, const A_data_t &A,
636 const B_layout_t &b, B_data_t &B)
637{
638 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
639 "invalid ranks");
640 internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
641 template AdjugateHD<scalar_t>(a, A, b, B);
642}
643
644// Compute the adjugate and the determinant of a (small) matrix: B = adj(A),
645// return det(A).
646template <typename scalar_t,
647 typename A_layout_t, typename A_data_t,
648 typename B_layout_t, typename B_data_t>
649inline scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A,
650 const B_layout_t &b, B_data_t &B)
651{
652 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
653 "invalid ranks");
654 return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
655 template AdjDet<scalar_t>(a, A, b, B);
656}
657
658// Compute the adjugate and the determinant of a (small) matrix: B = adj(A),
659// return det(A). Host+device version.
660template <typename scalar_t,
661 typename A_layout_t, typename A_data_t,
662 typename B_layout_t, typename B_data_t>
663MFEM_HOST_DEVICE
664inline scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A,
665 const B_layout_t &b, B_data_t &B)
666{
667 MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
668 "invalid ranks");
669 return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
670 template AdjDetHD<scalar_t>(a, A, b, B);
671}
672
673} // namespace mfem
674
675#endif // MFEM_TEMPLATE_MATRIX
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
Definition kernels.hpp:326
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition kernels.hpp:237
MFEM_HOST_DEVICE scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:664
MFEM_ALWAYS_INLINE void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition tmatrix.hpp:40
lvalue_t & Assign(lvalue_t &a, const rvalue_t &b)
Definition tassign.hpp:137
scalar_t TDet(const layout_t &a, const data_t &A)
Definition tmatrix.hpp:573
MFEM_ALWAYS_INLINE void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition tmatrix.hpp:230
MFEM_HOST_DEVICE scalar_t TDetHD(const layout_t &a, const data_t &A)
Definition tmatrix.hpp:588
void TAdjugate(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:620
scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:649
MFEM_ALWAYS_INLINE void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition tmatrix.hpp:89
MFEM_HOST_DEVICE void TAdjugateHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:635
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
RefCoord s[3]