MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
tmatrix.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_TEMPLATE_MATRIX
13 #define MFEM_TEMPLATE_MATRIX
14 
15 #include "../config/tconfig.hpp"
16 #include "../general/tassign.hpp"
17 #include "../general/backends.hpp"
18 
19 namespace mfem
20 {
21 
22 // Matrix-matrix products
23 
24 namespace internal
25 {
26 
27 template <typename T> struct entry_type { typedef typename T::data_type type; };
28 
29 template <typename T> struct entry_type<T*> { typedef T type; };
30 
31 } // namespace mfem::internal
32 
33 
34 // C {=|+=} A.B -- simple version (no blocks)
35 template <bool Add,
36  typename A_layout_t, typename A_data_t,
37  typename B_layout_t, typename B_data_t,
38  typename C_layout_t, typename C_data_t>
39 MFEM_ALWAYS_INLINE inline
40 void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
41  const B_layout_t &B_layout, const B_data_t &B_data,
42  const C_layout_t &C_layout, C_data_t &C_data)
43 {
44  MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
45  C_layout_t::rank == 2, "invalid ranks");
46  const int A1 = A_layout_t::dim_1;
47  const int A2 = A_layout_t::dim_2;
48  const int B1 = B_layout_t::dim_1;
49  const int B2 = B_layout_t::dim_2;
50  const int C1 = C_layout_t::dim_1;
51  const int C2 = C_layout_t::dim_2;
52  MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
53  "invalid dimensions");
54 
55  MFEM_FLOPS_ADD(Add ? 2*A1*A2*B2 : 2*A1*A2*B2-A1*B2);
56  for (int b2 = 0; b2 < B2; b2++)
57  {
58  for (int a1 = 0; a1 < A1; a1++)
59  {
60  typename internal::entry_type<C_data_t>::type c_a1_b2;
61  if (Add)
62  {
63  // C(a1,b2) += A(a1,0) * B(0,b2);
64  c_a1_b2 = C_data[C_layout.ind(a1,b2)];
65  c_a1_b2.fma(A_data[A_layout.ind(a1,0)], B_data[B_layout.ind(0,b2)]);
66  }
67  else
68  {
69  // C(a1,b2) = A(a1,0) * B(0,b2);
70  c_a1_b2.mul(A_data[A_layout.ind(a1,0)], B_data[B_layout.ind(0,b2)]);
71  }
72  for (int s = 1; s < A2; s++)
73  {
74  // C(a1,b2) += A(a1,s) * B(s,b2);
75  c_a1_b2.fma(A_data[A_layout.ind(a1,s)], B_data[B_layout.ind(s,b2)]);
76  }
77  C_data[C_layout.ind(a1,b2)] = c_a1_b2;
78  }
79  }
80 }
81 
82 // C {=|+=} A.B -- block version
83 template <int bA1, int bA2, int bB2, // block sizes
84  bool Add,
85  typename A_layout_t, typename A_data_t,
86  typename B_layout_t, typename B_data_t,
87  typename C_layout_t, typename C_data_t>
88 MFEM_ALWAYS_INLINE inline
89 void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
90  const B_layout_t &B_layout, const B_data_t &B_data,
91  const C_layout_t &C_layout, C_data_t &C_data)
92 {
93  MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2 &&
94  C_layout_t::rank == 2, "invalid ranks");
95  const int A1 = A_layout_t::dim_1;
96  const int A2 = A_layout_t::dim_2;
97  const int B1 = B_layout_t::dim_1;
98  const int B2 = B_layout_t::dim_2;
99  const int C1 = C_layout_t::dim_1;
100  const int C2 = C_layout_t::dim_2;
101  MFEM_STATIC_ASSERT(A2 == B1 && A1 == C1 && B2 == C2,
102  "invalid dimensions");
103 
104  const int rA1 = A1%bA1;
105  const int rA2 = A2%bA2;
106  const int rB2 = B2%bB2;
107 
108  for (int b2_b = 0; b2_b < B2/bB2; b2_b++)
109  {
110  if (A2/bA2 > 0)
111  {
112  // s_b == 0
113  for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
114  {
115  sMult_AB<Add>(
116  A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
117  B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
118  C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
119  }
120  if (rA1)
121  {
122  sMult_AB<Add>(
123  A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
124  B_layout.template sub<bA2,bB2>(0,b2_b*bB2), B_data,
125  C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
126  }
127  for (int s_b = 1; s_b < A2/bA2; s_b++)
128  {
129  for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
130  {
131  sMult_AB<true>(
132  A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
133  B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
134  C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
135  }
136  if (rA1)
137  {
138  sMult_AB<true>(
139  A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
140  B_layout.template sub<bA2,bB2>(s_b*bA2,b2_b*bB2), B_data,
141  C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
142  }
143  }
144  }
145  if (rA2)
146  {
147  const bool rAdd = Add || (A2/bA2 > 0);
148  for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
149  {
150  sMult_AB<rAdd>(
151  A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
152  B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
153  C_layout.template sub<bA1,bB2>(a1_b*bA1,b2_b*bB2), C_data);
154  }
155  if (rA1)
156  {
157  sMult_AB<rAdd>(
158  A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
159  B_layout.template sub<rA2,bB2>(A2-rA2,b2_b*bB2), B_data,
160  C_layout.template sub<rA1,bB2>(A1-rA1,b2_b*bB2), C_data);
161  }
162  }
163  }
164  if (rB2)
165  {
166  if (A2/bA2 > 0)
167  {
168  // s_b == 0
169  for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
170  {
171  sMult_AB<Add>(
172  A_layout.template sub<bA1,bA2>(a1_b*bA1,0), A_data,
173  B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
174  C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
175  }
176  if (rA1)
177  {
178  sMult_AB<Add>(
179  A_layout.template sub<rA1,bA2>(A1-rA1,0), A_data,
180  B_layout.template sub<bA2,rB2>(0,B2-rB2), B_data,
181  C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
182  }
183  }
184  if (A2/bA2 > 1)
185  {
186  for (int s_b = 1; s_b < A2/bA2; s_b++)
187  {
188  for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
189  {
190  sMult_AB<true>(
191  A_layout.template sub<bA1,bA2>(a1_b*bA1,s_b*bA2), A_data,
192  B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
193  C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
194  }
195  if (rA1)
196  {
197  sMult_AB<true>(
198  A_layout.template sub<rA1,bA2>(A1-rA1,s_b*bA2), A_data,
199  B_layout.template sub<bA2,rB2>(s_b*bA2,B2-rB2), B_data,
200  C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
201  }
202  }
203  }
204  if (rA2)
205  {
206  const bool rAdd = Add || (A2/bA2 > 0);
207  for (int a1_b = 0; a1_b < A1/bA1; a1_b++)
208  {
209  sMult_AB<rAdd>(
210  A_layout.template sub<bA1,rA2>(a1_b*bA1,A2-rA2), A_data,
211  B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
212  C_layout.template sub<bA1,rB2>(a1_b*bA1,B2-rB2), C_data);
213  }
214  if (rA1)
215  {
216  sMult_AB<rAdd>(
217  A_layout.template sub<rA1,rA2>(A1-rA1,A2-rA2), A_data,
218  B_layout.template sub<rA2,rB2>(A2-rA2,B2-rB2), B_data,
219  C_layout.template sub<rA1,rB2>(A1-rA1,B2-rB2), C_data);
220  }
221  }
222  }
223 }
224 
225 template <bool Add,
226  typename A_layout_t, typename A_data_t,
227  typename B_layout_t, typename B_data_t,
228  typename C_layout_t, typename C_data_t>
229 MFEM_ALWAYS_INLINE inline
230 void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data,
231  const B_layout_t &B_layout, const B_data_t &B_data,
232  const C_layout_t &C_layout, C_data_t &C_data)
233 {
234  const int b = MFEM_TEMPLATE_BLOCK_SIZE;
235  bMult_AB<b,b,b,Add>(A_layout, A_data, B_layout, B_data, C_layout, C_data);
236 }
237 
238 
239 // Small matrix operations (determinant, adjugate,...) defined by specialization
240 
241 namespace internal
242 {
243 
244 template <int N1, int N2>
245 struct MatrixOps { };
246 
247 template <>
248 struct MatrixOps<1,1>
249 {
250  // Compute det(A).
251  template <typename scalar_t, typename layout_t, typename data_t>
252  static inline scalar_t Det(const layout_t &a, const data_t &A)
253  {
254  return A[a.ind(0,0)];
255  }
256 
257  // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
258  template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
259  typename D_data_t>
260  static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
261  {
262  const int M = A_layout_t::dim_1;
263  for (int i = 0; i < M; i++)
264  {
265  Assign<Op>(D[i], A[a.ind(i,0,0)]);
266  }
267  }
268 
269  // Compute B = adj(A).
270  template <typename scalar_t,
271  typename A_layout_t, typename A_data_t,
272  typename B_layout_t, typename B_data_t>
273  static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
274  const B_layout_t &b, B_data_t &B)
275  {
276  B[b.ind(0,0)] = scalar_t(1);
277  }
278 
279  // Compute adj(A) and det(A).
280  template <typename scalar_t,
281  typename A_layout_t, typename A_data_t,
282  typename B_layout_t, typename B_data_t>
283  static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
284  const B_layout_t &b, B_data_t &B)
285  {
286  Adjugate<scalar_t>(a, A, b, B);
287  return Det<scalar_t>(a, A);
288  }
289 };
290 
291 template <>
292 struct MatrixOps<2,2>
293 {
294  // Compute det(A).
295  template <typename scalar_t, typename layout_t, typename data_t>
296  static inline scalar_t Det(const layout_t &a, const data_t &A)
297  {
298  MFEM_FLOPS_ADD(3);
299  return (A[a.ind(0,0)]*A[a.ind(1,1)] -
300  A[a.ind(1,0)]*A[a.ind(0,1)]);
301  }
302 
303  // Compute det(A), host+device version.
304  template <typename scalar_t, typename layout_t, typename data_t>
305  MFEM_HOST_DEVICE
306  static inline scalar_t DetHD(const layout_t &a, const data_t &A)
307  {
308  MFEM_FLOPS_ADD(3);
309  return (A[a.ind(0,0)]*A[a.ind(1,1)] -
310  A[a.ind(1,0)]*A[a.ind(0,1)]);
311  }
312 
313  // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
314  template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
315  typename D_data_t>
316  static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
317  {
318  const int M = A_layout_t::dim_1;
319  MFEM_FLOPS_ADD(3*M);
320  for (int i = 0; i < M; i++)
321  {
322  Assign<Op>(D[i], (A[a.ind(i,0,0)]*A[a.ind(i,1,1)] -
323  A[a.ind(i,1,0)]*A[a.ind(i,0,1)]));
324  }
325  }
326 
327  // Compute B = adj(A).
328  template <typename scalar_t,
329  typename A_layout_t, typename A_data_t,
330  typename B_layout_t, typename B_data_t>
331  static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
332  const B_layout_t &b, B_data_t &B)
333  {
334  B[b.ind(0,0)] = A[a.ind(1,1)];
335  B[b.ind(0,1)] = -A[a.ind(0,1)];
336  B[b.ind(1,0)] = -A[a.ind(1,0)];
337  B[b.ind(1,1)] = A[a.ind(0,0)];
338  }
339 
340  // Compute B = adj(A), host+device version.
341  template <typename scalar_t,
342  typename A_layout_t, typename A_data_t,
343  typename B_layout_t, typename B_data_t>
344  MFEM_HOST_DEVICE
345  static inline void AdjugateHD(const A_layout_t &a, const A_data_t &A,
346  const B_layout_t &b, B_data_t &B)
347  {
348  B[b.ind(0,0)] = A[a.ind(1,1)];
349  B[b.ind(0,1)] = -A[a.ind(0,1)];
350  B[b.ind(1,0)] = -A[a.ind(1,0)];
351  B[b.ind(1,1)] = A[a.ind(0,0)];
352  }
353 
354  // Compute adj(A) and det(A).
355  template <typename scalar_t,
356  typename A_layout_t, typename A_data_t,
357  typename B_layout_t, typename B_data_t>
358  static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
359  const B_layout_t &b, B_data_t &B)
360  {
361  Adjugate<scalar_t>(a, A, b, B);
362  return Det<scalar_t>(a, A);
363  }
364 
365  // Compute adj(A) and det(A), host+device version.
366  template <typename scalar_t,
367  typename A_layout_t, typename A_data_t,
368  typename B_layout_t, typename B_data_t>
369  MFEM_HOST_DEVICE
370  static inline scalar_t AdjDetHD(const A_layout_t &a, const A_data_t &A,
371  const B_layout_t &b, B_data_t &B)
372  {
373  AdjugateHD<scalar_t>(a, A, b, B);
374  return DetHD<scalar_t>(a, A);
375  }
376 
377  template <bool symm> struct Symm;
378 };
379 
380 template <>
381 struct MatrixOps<2,2>::Symm<true>
382 {
383  template <typename A_layout_t, typename A_data_t, typename scalar_t>
384  static inline MFEM_ALWAYS_INLINE
385  void Set(const A_layout_t &a, A_data_t &A,
386  const scalar_t a11, const scalar_t a21, const scalar_t a22)
387  {
388  A[a.ind(0)] = a11;
389  A[a.ind(1)] = a21;
390  A[a.ind(2)] = a22;
391  }
392 };
393 
394 template <>
395 struct MatrixOps<2,2>::Symm<false>
396 {
397  template <typename A_layout_t, typename A_data_t, typename scalar_t>
398  static inline MFEM_ALWAYS_INLINE
399  void Set(const A_layout_t &a, A_data_t &A,
400  const scalar_t a11, const scalar_t a21, const scalar_t a22)
401  {
402  A[a.ind(0,0)] = a11;
403  A[a.ind(1,0)] = a21;
404  A[a.ind(0,1)] = a21;
405  A[a.ind(1,1)] = a22;
406  }
407 };
408 
409 template <>
410 struct MatrixOps<3,3>
411 {
412  // Compute det(A).
413  template <typename scalar_t, typename layout_t, typename data_t>
414  static inline scalar_t Det(const layout_t &a, const data_t &A)
415  {
416  MFEM_FLOPS_ADD(14);
417  return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
418  A[a.ind(2,1)]*A[a.ind(1,2)]) -
419  A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
420  A[a.ind(2,1)]*A[a.ind(0,2)]) +
421  A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
422  A[a.ind(1,1)]*A[a.ind(0,2)]));
423  }
424 
425  // Compute det(A), host+device version.
426  template <typename scalar_t, typename layout_t, typename data_t>
427  MFEM_HOST_DEVICE
428  static inline scalar_t DetHD(const layout_t &a, const data_t &A)
429  {
430  MFEM_FLOPS_ADD(14);
431  return (A[a.ind(0,0)]*(A[a.ind(1,1)]*A[a.ind(2,2)] -
432  A[a.ind(2,1)]*A[a.ind(1,2)]) -
433  A[a.ind(1,0)]*(A[a.ind(0,1)]*A[a.ind(2,2)] -
434  A[a.ind(2,1)]*A[a.ind(0,2)]) +
435  A[a.ind(2,0)]*(A[a.ind(0,1)]*A[a.ind(1,2)] -
436  A[a.ind(1,1)]*A[a.ind(0,2)]));
437  }
438 
439  // Compute det(A). Batched version: D[i] {=,+=,*=} det(A[i,*,*])
440  template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
441  typename D_data_t>
442  static inline void Det(const A_layout_t &a, const A_data_t &A, D_data_t &D)
443  {
444  const int M = A_layout_t::dim_1;
445  MFEM_FLOPS_ADD(14*M);
446  for (int i = 0; i < M; i++)
447  {
448  Assign<Op>(
449  D[i],
450  A[a.ind(i,0,0)]*(A[a.ind(i,1,1)]*A[a.ind(i,2,2)] -
451  A[a.ind(i,2,1)]*A[a.ind(i,1,2)]) -
452  A[a.ind(i,1,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,2,2)] -
453  A[a.ind(i,2,1)]*A[a.ind(i,0,2)]) +
454  A[a.ind(i,2,0)]*(A[a.ind(i,0,1)]*A[a.ind(i,1,2)] -
455  A[a.ind(i,1,1)]*A[a.ind(i,0,2)]));
456  }
457  }
458 
459  // Compute B = adj(A).
460  template <typename scalar_t,
461  typename A_layout_t, typename A_data_t,
462  typename B_layout_t, typename B_data_t>
463  static inline void Adjugate(const A_layout_t &a, const A_data_t &A,
464  const B_layout_t &b, B_data_t &B)
465  {
466  MFEM_FLOPS_ADD(27);
467  B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
468  B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
469  B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
470  B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
471  B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
472  B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
473  B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
474  B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
475  B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
476  }
477 
478  // Compute B = adj(A), host+device version.
479  template <typename scalar_t,
480  typename A_layout_t, typename A_data_t,
481  typename B_layout_t, typename B_data_t>
482  MFEM_HOST_DEVICE
483  static inline void AdjugateHD(const A_layout_t &a, const A_data_t &A,
484  const B_layout_t &b, B_data_t &B)
485  {
486  MFEM_FLOPS_ADD(27);
487  B[b.ind(0,0)] = A[a.ind(1,1)]*A[a.ind(2,2)] - A[a.ind(1,2)]*A[a.ind(2,1)];
488  B[b.ind(0,1)] = A[a.ind(0,2)]*A[a.ind(2,1)] - A[a.ind(0,1)]*A[a.ind(2,2)];
489  B[b.ind(0,2)] = A[a.ind(0,1)]*A[a.ind(1,2)] - A[a.ind(0,2)]*A[a.ind(1,1)];
490  B[b.ind(1,0)] = A[a.ind(1,2)]*A[a.ind(2,0)] - A[a.ind(1,0)]*A[a.ind(2,2)];
491  B[b.ind(1,1)] = A[a.ind(0,0)]*A[a.ind(2,2)] - A[a.ind(0,2)]*A[a.ind(2,0)];
492  B[b.ind(1,2)] = A[a.ind(0,2)]*A[a.ind(1,0)] - A[a.ind(0,0)]*A[a.ind(1,2)];
493  B[b.ind(2,0)] = A[a.ind(1,0)]*A[a.ind(2,1)] - A[a.ind(1,1)]*A[a.ind(2,0)];
494  B[b.ind(2,1)] = A[a.ind(0,1)]*A[a.ind(2,0)] - A[a.ind(0,0)]*A[a.ind(2,1)];
495  B[b.ind(2,2)] = A[a.ind(0,0)]*A[a.ind(1,1)] - A[a.ind(0,1)]*A[a.ind(1,0)];
496  }
497 
498  // Compute adj(A) and det(A).
499  template <typename scalar_t,
500  typename A_layout_t, typename A_data_t,
501  typename B_layout_t, typename B_data_t>
502  static inline scalar_t AdjDet(const A_layout_t &a, const A_data_t &A,
503  const B_layout_t &b, B_data_t &B)
504  {
505  MFEM_FLOPS_ADD(5);
506  Adjugate<scalar_t>(a, A, b, B);
507  return (A[a.ind(0,0)]*B[b.ind(0,0)] +
508  A[a.ind(1,0)]*B[b.ind(0,1)] +
509  A[a.ind(2,0)]*B[b.ind(0,2)]);
510  }
511 
512  // Compute adj(A) and det(A), host+device version.
513  template <typename scalar_t,
514  typename A_layout_t, typename A_data_t,
515  typename B_layout_t, typename B_data_t>
516  MFEM_HOST_DEVICE
517  static inline scalar_t AdjDetHD(const A_layout_t &a, const A_data_t &A,
518  const B_layout_t &b, B_data_t &B)
519  {
520  MFEM_FLOPS_ADD(5);
521  AdjugateHD<scalar_t>(a, A, b, B);
522  return (A[a.ind(0,0)]*B[b.ind(0,0)] +
523  A[a.ind(1,0)]*B[b.ind(0,1)] +
524  A[a.ind(2,0)]*B[b.ind(0,2)]);
525  }
526 
527  template <bool symm> struct Symm;
528 };
529 
530 template <>
531 struct MatrixOps<3,3>::Symm<true>
532 {
533  template <typename A_layout_t, typename A_data_t, typename scalar_t>
534  static inline MFEM_ALWAYS_INLINE
535  void Set(const A_layout_t &a, A_data_t &A,
536  const scalar_t a11, const scalar_t a21, const scalar_t a31,
537  const scalar_t a22, const scalar_t a32, const scalar_t a33)
538  {
539  A[a.ind(0)] = a11;
540  A[a.ind(1)] = a21;
541  A[a.ind(2)] = a31;
542  A[a.ind(3)] = a22;
543  A[a.ind(4)] = a32;
544  A[a.ind(5)] = a33;
545  }
546 };
547 
548 template <>
549 struct MatrixOps<3,3>::Symm<false>
550 {
551  template <typename A_layout_t, typename A_data_t, typename scalar_t>
552  static inline MFEM_ALWAYS_INLINE
553  void Set(const A_layout_t &a, A_data_t &A,
554  const scalar_t a11, const scalar_t a21, const scalar_t a31,
555  const scalar_t a22, const scalar_t a32, const scalar_t a33)
556  {
557  A[a.ind(0,0)] = a11;
558  A[a.ind(1,0)] = a21;
559  A[a.ind(2,0)] = a31;
560  A[a.ind(0,1)] = a21;
561  A[a.ind(1,1)] = a22;
562  A[a.ind(2,1)] = a32;
563  A[a.ind(0,2)] = a31;
564  A[a.ind(1,2)] = a32;
565  A[a.ind(2,2)] = a33;
566  }
567 };
568 
569 } // namespace mfem::internal
570 
571 // Compute the determinant of a (small) matrix: det(A).
572 template <typename scalar_t, typename layout_t, typename data_t>
573 inline scalar_t TDet(const layout_t &a, const data_t &A)
574 {
575  MFEM_STATIC_ASSERT(layout_t::rank == 2, "invalid rank");
576 #if !defined(__xlC__) || (__xlC__ >= 0x0d00)
577  return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
578  template Det<scalar_t>(a, A);
579 #else
580  return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
581  Det<scalar_t>(a, A);
582 #endif
583 }
584 
585 // Compute the determinant of a (small) matrix: det(A). Host+device version.
586 template <typename scalar_t, typename layout_t, typename data_t>
587 MFEM_HOST_DEVICE
588 inline scalar_t TDetHD(const layout_t &a, const data_t &A)
589 {
590  MFEM_STATIC_ASSERT(layout_t::rank == 2, "invalid rank");
591 #if !defined(__xlC__) || (__xlC__ >= 0x0d00)
592  return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
593  template DetHD<scalar_t>(a, A);
594 #else
595  return internal::MatrixOps<layout_t::dim_1,layout_t::dim_2>::
596  DetHD<scalar_t>(a, A);
597 #endif
598 }
599 
600 // Compute the determinants of a set of (small) matrices: D[i] = det(A[i,*,*]).
601 // The layout of A is (M x N1 x N2) and the size of D is M.
602 template <AssignOp::Type Op, typename A_layout_t, typename A_data_t,
603  typename D_data_t>
604 inline void TDet(const A_layout_t &a, const A_data_t &A, D_data_t &D)
605 {
606  MFEM_STATIC_ASSERT(A_layout_t::rank == 3, "invalid rank");
607 #if !defined(__xlC__) || (__xlC__ >= 0x0d00)
608  internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
609  template Det<Op>(a, A, D);
610 #else
611  internal::MatrixOps<A_layout_t::dim_2,A_layout_t::dim_3>::
612  Det<Op>(a, A, D);
613 #endif
614 }
615 
616 // Compute the adjugate matrix of a (small) matrix: B = adj(A).
617 template <typename scalar_t,
618  typename A_layout_t, typename A_data_t,
619  typename B_layout_t, typename B_data_t>
620 inline void TAdjugate(const A_layout_t &a, const A_data_t &A,
621  const B_layout_t &b, B_data_t &B)
622 {
623  MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
624  "invalid ranks");
625  internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
626  template Adjugate<scalar_t>(a, A, b, B);
627 }
628 
629 // Compute the adjugate and the determinant of a (small) matrix: B = adj(A),
630 // return det(A).
631 template <typename scalar_t,
632  typename A_layout_t, typename A_data_t,
633  typename B_layout_t, typename B_data_t>
634 inline scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A,
635  const B_layout_t &b, B_data_t &B)
636 {
637  MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
638  "invalid ranks");
639  return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
640  template AdjDet<scalar_t>(a, A, b, B);
641 }
642 
643 // Compute the adjugate and the determinant of a (small) matrix: B = adj(A),
644 // return det(A). Host+device version.
645 template <typename scalar_t,
646  typename A_layout_t, typename A_data_t,
647  typename B_layout_t, typename B_data_t>
648 MFEM_HOST_DEVICE
649 inline scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A,
650  const B_layout_t &b, B_data_t &B)
651 {
652  MFEM_STATIC_ASSERT(A_layout_t::rank == 2 && B_layout_t::rank == 2,
653  "invalid ranks");
654  return internal::MatrixOps<A_layout_t::dim_1,A_layout_t::dim_2>::
655  template AdjDetHD<scalar_t>(a, A, b, B);
656 }
657 
658 } // namespace mfem
659 
660 #endif // MFEM_TEMPLATE_MATRIX
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition: kernels.hpp:121
MFEM_HOST_DEVICE scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition: tmatrix.hpp:649
scalar_t TDet(const layout_t &a, const data_t &A)
Definition: tmatrix.hpp:573
MFEM_ALWAYS_INLINE void bMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition: tmatrix.hpp:89
scalar_t TAdjDet(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition: tmatrix.hpp:634
void Add(const DenseMatrix &A, const DenseMatrix &B, double alpha, DenseMatrix &C)
C = A + alpha*B.
Definition: densemat.cpp:1928
double b
Definition: lissajous.cpp:42
void TAdjugate(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition: tmatrix.hpp:620
MFEM_HOST_DEVICE scalar_t TDetHD(const layout_t &a, const data_t &A)
Definition: tmatrix.hpp:588
MFEM_ALWAYS_INLINE void Mult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition: tmatrix.hpp:230
double a
Definition: lissajous.cpp:41
MFEM_ALWAYS_INLINE void sMult_AB(const A_layout_t &A_layout, const A_data_t &A_data, const B_layout_t &B_layout, const B_data_t &B_data, const C_layout_t &C_layout, C_data_t &C_data)
Definition: tmatrix.hpp:40