MFEM v4.8.0
Finite element discretization library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
kernels.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_LINALG_KERNELS_HPP
13#define MFEM_LINALG_KERNELS_HPP
14
15#include "../config/config.hpp"
18
19#include "matrix.hpp"
20#include "tmatrix.hpp"
21#include "tlayout.hpp"
22#include "ttensor.hpp"
23
24// This header contains stand-alone functions for "small" dense linear algebra
25// (at quadrature point or element-level) designed to be inlined directly into
26// device kernels.
27
28// Many methods of the DenseMatrix class and some of the Vector class call these
29// kernels directly on the host, see the implementations in linalg/densemat.cpp
30// and linalg/vector.cpp.
31
32namespace mfem
33{
34
35namespace kernels
36{
37
38/// Compute the square of the Euclidean distance to another vector
39template<int dim>
40MFEM_HOST_DEVICE inline real_t DistanceSquared(const real_t *x, const real_t *y)
41{
42 real_t d = 0.0;
43 for (int i = 0; i < dim; i++) { d += (x[i]-y[i])*(x[i]-y[i]); }
44 return d;
45}
46
47/// Creates n x n diagonal matrix with diagonal elements c
48template<int dim>
49MFEM_HOST_DEVICE inline void Diag(const real_t c, real_t *data)
50{
51 const int N = dim*dim;
52 for (int i = 0; i < N; i++) { data[i] = 0.0; }
53 for (int i = 0; i < dim; i++) { data[i*(dim+1)] = c; }
54}
55
56/// Vector subtraction operation: z = a * (x - y)
57template<int dim>
58MFEM_HOST_DEVICE inline void Subtract(const real_t a,
59 const real_t *x, const real_t *y,
60 real_t *z)
61{
62 for (int i = 0; i < dim; i++) { z[i] = a * (x[i] - y[i]); }
63}
64
65/// Dense matrix operation: VWt += v w^t
66template<int dim>
67MFEM_HOST_DEVICE inline void AddMultVWt(const real_t *v, const real_t *w,
68 real_t *VWt)
69{
70 for (int i = 0; i < dim; i++)
71 {
72 const real_t vi = v[i];
73 for (int j = 0; j < dim; j++) { VWt[i*dim+j] += vi * w[j]; }
74 }
75}
76
77template<int H, int W, typename T>
78MFEM_HOST_DEVICE inline
79void FNorm(real_t &scale_factor, real_t &scaled_fnorm2, const T *data)
80{
81 int i, hw = H * W;
82 T max_norm = 0.0, entry, fnorm2;
83
84 for (i = 0; i < hw; i++)
85 {
86 entry = fabs(data[i]);
87 if (entry > max_norm)
88 {
89 max_norm = entry;
90 }
91 }
92
93 if (max_norm == 0.0)
94 {
95 scale_factor = scaled_fnorm2 = 0.0;
96 return;
97 }
98
99 fnorm2 = 0.0;
100 for (i = 0; i < hw; i++)
101 {
102 entry = data[i] / max_norm;
103 fnorm2 += entry * entry;
104 }
105
106 scale_factor = max_norm;
107 scaled_fnorm2 = fnorm2;
108}
109
110/// Compute the Frobenius norm of the matrix
111template<int H, int W, typename T>
112MFEM_HOST_DEVICE inline
113real_t FNorm(const T *data)
114{
115 real_t s, n2;
116 kernels::FNorm<H,W>(s, n2, data);
117 return s*sqrt(n2);
118}
119
120/// Compute the square of the Frobenius norm of the matrix
121template<int H, int W, typename T>
122MFEM_HOST_DEVICE inline
123real_t FNorm2(const T *data)
124{
125 real_t s, n2;
126 kernels::FNorm<H,W>(s, n2, data);
127 return s*s*n2;
128}
129
130/// Returns the l2 norm of the Vector with given @a size and @a data.
131template<typename T>
132MFEM_HOST_DEVICE inline
133real_t Norml2(const int size, const T *data)
134{
135 if (0 == size) { return 0.0; }
136 if (1 == size) { return std::abs(data[0]); }
137 T scale = 0.0;
138 T sum = 0.0;
139 for (int i = 0; i < size; i++)
140 {
141 if (data[i] != 0.0)
142 {
143 const T absdata = fabs(data[i]);
144 if (scale <= absdata)
145 {
146 const T sqr_arg = scale / absdata;
147 sum = 1.0 + sum * (sqr_arg * sqr_arg);
148 scale = absdata;
149 continue;
150 } // end if scale <= absdata
151 const T sqr_arg = absdata / scale;
152 sum += (sqr_arg * sqr_arg); // else scale > absdata
153 } // end if data[i] != 0
154 }
155 return scale * sqrt(sum);
156}
157
158/** @brief Matrix vector multiplication: y = A x, where the matrix A is of size
159 @a height x @a width with given @a data, while @a x and @a y specify the
160 data of the input and output vectors. */
161template<typename TA, typename TX, typename TY>
162MFEM_HOST_DEVICE inline
163void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
164{
165 if (width == 0)
166 {
167 for (int row = 0; row < height; row++)
168 {
169 y[row] = 0.0;
170 }
171 return;
172 }
173 const TA *d_col = data;
174 TX x_col = x[0];
175 for (int row = 0; row < height; row++)
176 {
177 y[row] = x_col*d_col[row];
178 }
179 d_col += height;
180 for (int col = 1; col < width; col++)
181 {
182 x_col = x[col];
183 for (int row = 0; row < height; row++)
184 {
185 y[row] += x_col*d_col[row];
186 }
187 d_col += height;
188 }
189}
190
191/** @brief Matrix transpose vector multiplication: y = At x, where the matrix A
192 is of size @a height x @a width with given @a data, while @a x and @a y
193 specify the data of the input and output vectors. */
194template<typename TA, typename TX, typename TY>
195MFEM_HOST_DEVICE inline
196void MultTranspose(const int height, const int width, const TA *data,
197 const TX *x, TY *y)
198{
199 if (height == 0)
200 {
201 for (int row = 0; row < width; row++)
202 {
203 y[row] = 0.0;
204 }
205 return;
206 }
207 TY *y_off = y;
208 for (int i = 0; i < width; ++i)
209 {
210 TY val = 0.0;
211 for (int j = 0; j < height; ++j)
212 {
213 val += x[j] * data[i * height + j];
214 }
215 *y_off = val;
216 y_off++;
217 }
218}
219
220/// Symmetrize a square matrix with given @a size and @a data: A -> (A+A^T)/2.
221template<typename T>
222MFEM_HOST_DEVICE inline
223void Symmetrize(const int size, T *data)
224{
225 for (int i = 0; i < size; i++)
226 {
227 for (int j = 0; j < i; j++)
228 {
229 const T a = 0.5 * (data[i*size+j] + data[j*size+i]);
230 data[j*size+i] = data[i*size+j] = a;
231 }
232 }
233}
234
235/// Compute the determinant of a square matrix of size dim with given @a data.
236template<int dim, typename T>
237MFEM_HOST_DEVICE inline T Det(const T *data)
238{
240}
241
242/** @brief Return the inverse of a matrix with given @a size and @a data into
243 the matrix with data @a inv_data. */
244template<int dim, typename T>
245MFEM_HOST_DEVICE inline
246void CalcInverse(const T *data, T *inv_data)
247{
248 typedef ColumnMajorLayout2D<dim,dim> layout_t;
249 const T det = TAdjDetHD<T>(layout_t(), data, layout_t(), inv_data);
250 TAssignHD<AssignOp::Mult>(layout_t(), inv_data, static_cast<T>(1.0)/det);
251}
252
253/** @brief Return the adjugate of a matrix */
254template<int dim, typename T>
255MFEM_HOST_DEVICE inline
256void CalcAdjugate(const T *data, T *adj_data)
257{
258 typedef ColumnMajorLayout2D<dim,dim> layout_t;
259 TAdjugateHD<T>(layout_t(), data, layout_t(), adj_data);
260}
261
262/** @brief Compute C = A + alpha*B, where the matrices A, B and C are of size @a
263 height x @a width with data @a Adata, @a Bdata and @a Cdata. */
264template<typename TALPHA, typename TA, typename TB, typename TC>
265MFEM_HOST_DEVICE inline
266void Add(const int height, const int width, const TALPHA alpha,
267 const TA *Adata, const TB *Bdata, TC *Cdata)
268{
269 for (int j = 0; j < width; j++)
270 {
271 for (int i = 0; i < height; i++)
272 {
273 const int n = i*width+j;
274 Cdata[n] = Adata[n] + alpha * Bdata[n];
275 }
276 }
277}
278
279/** @brief Compute C = alpha*A + beta*B, where the matrices A, B and C are of
280 size @a height x @a width with data @a Adata, @a Bdata and @a Cdata. */
281template<typename TALPHA, typename TBETA, typename TA, typename TB, typename TC>
282MFEM_HOST_DEVICE inline
283void Add(const int height, const int width,
284 const TALPHA alpha, const TA *Adata,
285 const TBETA beta, const TB *Bdata,
286 TC *Cdata)
287{
288 const int m = height * width;
289 for (int i = 0; i < m; i++)
290 {
291 Cdata[i] = alpha * Adata[i] + beta * Bdata[i];
292 }
293}
294
295/** @brief Compute B += A, where the matrices A and B are of size
296 @a height x @a width with data @a Adata and @a Bdata. */
297template<typename TA, typename TB>
298MFEM_HOST_DEVICE inline
299void Add(const int height, const int width, const TA *Adata, TB *Bdata)
300{
301 const int m = height * width;
302 for (int i = 0; i < m; i++)
303 {
304 Bdata[i] += Adata[i];
305 }
306}
307
308/** @brief Compute B +=alpha*A, where the matrices A and B are of size
309 @a height x @a width with data @a Adata and @a Bdata. */
310template<typename TA, typename TB>
311MFEM_HOST_DEVICE inline
312void Add(const int height, const int width,
313 const real_t alpha, const TA *Adata, TB *Bdata)
314{
315 const int m = height * width;
316 for (int i = 0; i < m; i++)
317 {
318 Bdata[i] += alpha * Adata[i];
319 }
320}
321
322/** @brief Compute B = alpha*A, where the matrices A and B are of size
323 @a height x @a width with data @a Adata and @a Bdata. */
324template<typename TA, typename TB>
325MFEM_HOST_DEVICE inline
326void Set(const int height, const int width,
327 const real_t alpha, const TA *Adata, TB *Bdata)
328{
329 const int m = height * width;
330 for (int i = 0; i < m; i++)
331 {
332 Bdata[i] = alpha * Adata[i];
333 }
334}
335
336/** @brief Matrix-matrix multiplication: A = alpha * B * C + beta * A, where the
337 matrices A, B and C are of sizes @a Aheight x @a Awidth, @a Aheight x @a
338 Bwidth and @a Bwidth x @a Awidth, respectively. */
339template<typename TA, typename TB, typename TC>
340MFEM_HOST_DEVICE inline
341void AddMult(const int Aheight, const int Awidth, const int Bwidth,
342 const TB *Bdata, const TC *Cdata, TA *Adata, const TB alpha,
343 const TA beta)
344{
345 const int ah_x_aw = Aheight * Awidth;
346 if (beta == 0.0)
347 {
348 for (int i = 0; i < ah_x_aw; i++) { Adata[i] = 0.0; }
349 }
350 else if (beta != 1.0)
351 {
352 for (int i = 0; i < ah_x_aw; i++) { Adata[i] *= beta; }
353 }
354 for (int j = 0; j < Awidth; j++)
355 {
356 for (int k = 0; k < Bwidth; k++)
357 {
358 const real_t val = alpha * Cdata[k+j*Bwidth];
359 for (int i = 0; i < Aheight; i++)
360 {
361 Adata[i+j*Aheight] += val * Bdata[i+k*Aheight];
362 }
363 }
364 }
365}
366
367/** @brief Matrix-matrix multiplication: A = B * C, where the matrices A, B and
368 C are of sizes @a Aheight x @a Awidth, @a Aheight x @a Bwidth and @a Bwidth
369 x @a Awidth, respectively. */
370template<typename TA, typename TB, typename TC>
371MFEM_HOST_DEVICE inline
372void Mult(const int Aheight, const int Awidth, const int Bwidth,
373 const TB *Bdata, const TC *Cdata, TA *Adata)
374{
375 AddMult(Aheight, Awidth, Bwidth, Bdata, Cdata, Adata, TB(1.0), TA(0.0));
376}
377
378/** @brief Multiply a matrix of size @a Aheight x @a Awidth and data @a Adata
379 with the transpose of a matrix of size @a Bheight x @a Awidth and data @a
380 Bdata: A * Bt. Return the result in a matrix with data @a ABtdata. */
381template<typename TA, typename TB, typename TC>
382MFEM_HOST_DEVICE inline
383void MultABt(const int Aheight, const int Awidth, const int Bheight,
384 const TA *Adata, const TB *Bdata, TC *ABtdata)
385{
386 const int ah_x_bh = Aheight * Bheight;
387 for (int i = 0; i < ah_x_bh; i++) { ABtdata[i] = 0.0; }
388 for (int k = 0; k < Awidth; k++)
389 {
390 TC *c = ABtdata;
391 for (int j = 0; j < Bheight; j++)
392 {
393 const real_t bjk = Bdata[j];
394 for (int i = 0; i < Aheight; i++)
395 {
396 c[i] += Adata[i] * bjk;
397 }
398 c += Aheight;
399 }
400 Adata += Aheight;
401 Bdata += Bheight;
402 }
403}
404
405/** @brief Compute C = alpha*At*B + beta*C.
406
407 Multiply the transpose of a matrix of size @a Aheight x @a Awidth and data
408 @a Adata with a matrix of size @a Aheight x @a Bwidth and data @a Bdata. */
409template<typename TA, typename TB, typename TC>
410MFEM_HOST_DEVICE inline
411void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth,
412 const TA *Adata, const TB *Bdata, TC *Cdata, const TB alpha,
413 const TA beta)
414{
415 const int aw_x_bw = Awidth * Bwidth;
416
417 if (beta == 0.0)
418 {
419 for (int i = 0; i < aw_x_bw; i++) { Cdata[i] = 0.0; }
420 }
421 else if (beta != 1.0)
422 {
423 for (int i = 0; i < aw_x_bw; i++) { Cdata[i] *= beta; }
424 }
425
426 TC *c = Cdata;
427 for (int i = 0; i < Bwidth; ++i)
428 {
429 for (int j = 0; j < Awidth; ++j)
430 {
431 TC val = 0.0;
432 for (int k = 0; k < Aheight; ++k)
433 {
434 val += alpha * Adata[j * Aheight + k] * Bdata[i * Aheight + k];
435 }
436 *c += val;
437 c++;
438 }
439 }
440}
441
442/** @brief Multiply the transpose of a matrix of size @a Aheight x @a Awidth
443 and data @a Adata with a matrix of size @a Aheight x @a Bwidth and data @a
444 Bdata: At * B. Return the result in a matrix with data @a AtBdata. */
445template<typename TA, typename TB, typename TC>
446MFEM_HOST_DEVICE inline
447void MultAtB(const int Aheight, const int Awidth, const int Bwidth,
448 const TA *Adata, const TB *Bdata, TC *AtBdata)
449{
450 AddMultAtB(Aheight, Awidth, Bwidth, Adata, Bdata, AtBdata, TB(1.0), TA(0.0));
451}
452
453/** @brief Multiply the transpose of a matrix of size @a Aheight x @a Awidth
454 and data @a Adata with a matrix of size @a Aheight x @a Bwidth and data @a
455 Bdata: At * B. Add the result to the matrix with data @a AtBdata. */
456template<typename TA, typename TB, typename TC>
457MFEM_HOST_DEVICE inline
458void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth,
459 const TA *Adata, const TB *Bdata, TC *AtBdata)
460{
461 TC *c = AtBdata;
462 for (int i = 0; i < Bwidth; ++i)
463 {
464 for (int j = 0; j < Awidth; ++j)
465 {
466 TC val = 0.0;
467 for (int k = 0; k < Aheight; ++k)
468 {
469 val += Adata[j * Aheight + k] * Bdata[i * Aheight + k];
470 }
471 *c += val;
472 c++;
473 }
474 }
475}
476
477/// Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
478template<int HEIGHT, int WIDTH> MFEM_HOST_DEVICE
479void CalcLeftInverse(const real_t *data, real_t *left_inv);
480
481/// Compute the spectrum of the matrix of size dim with given @a data, returning
482/// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
483/// vec (listed consecutively).
484template<int dim> MFEM_HOST_DEVICE
485void CalcEigenvalues(const real_t *data, real_t *lambda, real_t *vec);
486
487/// Return the i'th singular value of the matrix of size dim with given @a data.
488template<int dim> MFEM_HOST_DEVICE
489real_t CalcSingularvalue(const real_t *data, const int i);
490
491
492// Utility functions for CalcEigenvalues and CalcSingularvalue
493namespace internal
494{
495
496/// Utility function to swap the values of @a a and @a b.
497template<typename T>
498MFEM_HOST_DEVICE static inline
499void Swap(T &a, T &b)
500{
501 T tmp = a;
502 a = b;
503 b = tmp;
504}
505
506const real_t Epsilon = std::numeric_limits<real_t>::epsilon();
507
508/// Utility function used in CalcSingularvalue<3>.
509MFEM_HOST_DEVICE static inline
510void Eigenvalues2S(const real_t &d12, real_t &d1, real_t &d2)
511{
512 const real_t sqrt_1_eps = sqrt(1./Epsilon);
513 if (d12 != 0.)
514 {
515 // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
516 real_t t;
517 const real_t zeta = (d2 - d1)/(2*d12); // inf/inf from overflows?
518 if (fabs(zeta) < sqrt_1_eps)
519 {
520 t = d12*copysign(1./(fabs(zeta) + sqrt(1. + zeta*zeta)), zeta);
521 }
522 else
523 {
524 t = d12*copysign(0.5/fabs(zeta), zeta);
525 }
526 d1 -= t;
527 d2 += t;
528 }
529}
530
531/// Utility function used in CalcEigenvalues().
532MFEM_HOST_DEVICE static inline
533void Eigensystem2S(const real_t &d12, real_t &d1, real_t &d2,
534 real_t &c, real_t &s)
535{
536 const real_t sqrt_1_eps = sqrt(1./Epsilon);
537 if (d12 == 0.0)
538 {
539 c = 1.;
540 s = 0.;
541 }
542 else
543 {
544 // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
545 real_t t;
546 const real_t zeta = (d2 - d1)/(2*d12);
547 const real_t azeta = fabs(zeta);
548 if (azeta < sqrt_1_eps)
549 {
550 t = copysign(1./(azeta + sqrt(1. + zeta*zeta)), zeta);
551 }
552 else
553 {
554 t = copysign(0.5/azeta, zeta);
555 }
556 c = sqrt(1./(1. + t*t));
557 s = c*t;
558 t *= d12;
559 d1 -= t;
560 d2 += t;
561 }
562}
563
564
565/// Utility function used in CalcEigenvalues<3>.
566MFEM_HOST_DEVICE static inline
567void GetScalingFactor(const real_t &d_max, real_t &mult)
568{
569 int d_exp;
570 if (d_max > 0.)
571 {
572 mult = frexp(d_max, &d_exp);
573 if (d_exp == std::numeric_limits<real_t>::max_exponent)
574 {
575 mult *= std::numeric_limits<real_t>::radix;
576 }
577 mult = d_max/mult;
578 }
579 else
580 {
581 mult = 1.;
582 }
583 // mult = 2^d_exp is such that d_max/mult is in [0.5,1) or in other words
584 // d_max is in the interval [0.5,1)*mult
585}
586
587/// Utility function used in CalcEigenvalues<3>.
588MFEM_HOST_DEVICE static inline
589bool KernelVector2G(const int &mode,
590 real_t &d1, real_t &d12, real_t &d21, real_t &d2)
591{
592 // Find a vector (z1,z2) in the "near"-kernel of the matrix
593 // | d1 d12 |
594 // | d21 d2 |
595 // using QR factorization.
596 // The vector (z1,z2) is returned in (d1,d2). Return 'true' if the matrix
597 // is zero without setting (d1,d2).
598 // Note: in the current implementation |z1| + |z2| = 1.
599
600 // l1-norms of the columns
601 real_t n1 = fabs(d1) + fabs(d21);
602 real_t n2 = fabs(d2) + fabs(d12);
603
604 bool swap_columns = (n2 > n1);
605 real_t mu;
606
607 if (!swap_columns)
608 {
609 if (n1 == 0.)
610 {
611 return true;
612 }
613
614 if (mode == 0) // eliminate the larger entry in the column
615 {
616 if (fabs(d1) > fabs(d21))
617 {
618 Swap(d1, d21);
619 Swap(d12, d2);
620 }
621 }
622 else // eliminate the smaller entry in the column
623 {
624 if (fabs(d1) < fabs(d21))
625 {
626 Swap(d1, d21);
627 Swap(d12, d2);
628 }
629 }
630 }
631 else
632 {
633 // n2 > n1, swap columns 1 and 2
634 if (mode == 0) // eliminate the larger entry in the column
635 {
636 if (fabs(d12) > fabs(d2))
637 {
638 Swap(d1, d2);
639 Swap(d12, d21);
640 }
641 else
642 {
643 Swap(d1, d12);
644 Swap(d21, d2);
645 }
646 }
647 else // eliminate the smaller entry in the column
648 {
649 if (fabs(d12) < fabs(d2))
650 {
651 Swap(d1, d2);
652 Swap(d12, d21);
653 }
654 else
655 {
656 Swap(d1, d12);
657 Swap(d21, d2);
658 }
659 }
660 }
661
662 n1 = hypot(d1, d21);
663
664 if (d21 != 0.)
665 {
666 // v = (n1, n2)^t, |v| = 1
667 // Q = I - 2 v v^t, Q (d1, d21)^t = (mu, 0)^t
668 mu = copysign(n1, d1);
669 n1 = -d21*(d21/(d1 + mu)); // = d1 - mu
670 d1 = mu;
671 // normalize (n1,d21) to avoid overflow/underflow
672 // normalize (n1,d21) by the max-norm to avoid the sqrt call
673 if (fabs(n1) <= fabs(d21))
674 {
675 // (n1,n2) <-- (n1/d21,1)
676 n1 = n1/d21;
677 mu = (2./(1. + n1*n1))*(n1*d12 + d2);
678 d2 = d2 - mu;
679 d12 = d12 - mu*n1;
680 }
681 else
682 {
683 // (n1,n2) <-- (1,d21/n1)
684 n2 = d21/n1;
685 mu = (2./(1. + n2*n2))*(d12 + n2*d2);
686 d2 = d2 - mu*n2;
687 d12 = d12 - mu;
688 }
689 }
690
691 // Solve:
692 // | d1 d12 | | z1 | = | 0 |
693 // | 0 d2 | | z2 | | 0 |
694
695 // choose (z1,z2) to minimize |d1*z1 + d12*z2| + |d2*z2|
696 // under the condition |z1| + |z2| = 1, z2 >= 0 (for uniqueness)
697 // set t = z1, z2 = 1 - |t|, -1 <= t <= 1
698 // objective function is:
699 // |d1*t + d12*(1 - |t|)| + |d2|*(1 - |t|) -- piecewise linear with
700 // possible minima are -1,0,1,t1 where t1: d1*t1 + d12*(1 - |t1|) = 0
701 // values: @t=+/-1 -> |d1|, @t=0 -> |n1| + |d2|, @t=t1 -> |d2|*(1 - |t1|)
702
703 // evaluate z2 @t=t1
704 mu = -d12/d1;
705 // note: |mu| <= 1, if using l2-norm for column pivoting
706 // |mu| <= sqrt(2), if using l1-norm
707 n2 = 1./(1. + fabs(mu));
708 // check if |d1|<=|d2|*z2
709 if (fabs(d1) <= n2*fabs(d2))
710 {
711 d2 = 0.;
712 d1 = 1.;
713 }
714 else
715 {
716 d2 = n2;
717 // d1 = (n2 < 0.5) ? copysign(1. - n2, mu) : mu*n2;
718 d1 = mu*n2;
719 }
720
721 if (swap_columns)
722 {
723 Swap(d1, d2);
724 }
725
726 return false;
727}
728
729/// Utility function used in CalcEigenvalues<3>.
730MFEM_HOST_DEVICE static inline
731void Vec_normalize3_aux(const real_t &x1, const real_t &x2,
732 const real_t &x3,
733 real_t &n1, real_t &n2, real_t &n3)
734{
735 real_t t, r;
736
737 const real_t m = fabs(x1);
738 r = x2/m;
739 t = 1. + r*r;
740 r = x3/m;
741 t = sqrt(1./(t + r*r));
742 n1 = copysign(t, x1);
743 t /= m;
744 n2 = x2*t;
745 n3 = x3*t;
746}
747
748/// Utility function used in CalcEigenvalues<3>.
749MFEM_HOST_DEVICE static inline
750void Vec_normalize3(const real_t &x1, const real_t &x2, const real_t &x3,
751 real_t &n1, real_t &n2, real_t &n3)
752{
753 // should work ok when xk is the same as nk for some or all k
754 if (fabs(x1) >= fabs(x2))
755 {
756 if (fabs(x1) >= fabs(x3))
757 {
758 if (x1 != 0.)
759 {
760 Vec_normalize3_aux(x1, x2, x3, n1, n2, n3);
761 }
762 else
763 {
764 n1 = n2 = n3 = 0.;
765 }
766 return;
767 }
768 }
769 else if (fabs(x2) >= fabs(x3))
770 {
771 Vec_normalize3_aux(x2, x1, x3, n2, n1, n3);
772 return;
773 }
774 Vec_normalize3_aux(x3, x1, x2, n3, n1, n2);
775}
776
777/// Utility function used in CalcEigenvalues<3>.
778MFEM_HOST_DEVICE static inline
779int KernelVector3G_aux(const int &mode,
780 real_t &d1, real_t &d2, real_t &d3,
781 real_t &c12, real_t &c13, real_t &c23,
782 real_t &c21, real_t &c31, real_t &c32)
783{
784 int kdim;
785 real_t mu, n1, n2, n3, s1, s2, s3;
786
787 s1 = hypot(c21, c31);
788 n1 = hypot(d1, s1);
789
790 if (s1 != 0.)
791 {
792 // v = (s1, s2, s3)^t, |v| = 1
793 // Q = I - 2 v v^t, Q (d1, c12, c13)^t = (mu, 0, 0)^t
794 mu = copysign(n1, d1);
795 n1 = -s1*(s1/(d1 + mu)); // = d1 - mu
796 d1 = mu;
797
798 // normalize (n1,c21,c31) to avoid overflow/underflow
799 // normalize (n1,c21,c31) by the max-norm to avoid the sqrt call
800 if (fabs(n1) >= fabs(c21))
801 {
802 if (fabs(n1) >= fabs(c31))
803 {
804 // n1 is max, (s1,s2,s3) <-- (1,c21/n1,c31/n1)
805 s2 = c21/n1;
806 s3 = c31/n1;
807 mu = 2./(1. + s2*s2 + s3*s3);
808 n2 = mu*(c12 + s2*d2 + s3*c32);
809 n3 = mu*(c13 + s2*c23 + s3*d3);
810 c12 = c12 - n2;
811 d2 = d2 - s2*n2;
812 c32 = c32 - s3*n2;
813 c13 = c13 - n3;
814 c23 = c23 - s2*n3;
815 d3 = d3 - s3*n3;
816 goto done_column_1;
817 }
818 }
819 else if (fabs(c21) >= fabs(c31))
820 {
821 // c21 is max, (s1,s2,s3) <-- (n1/c21,1,c31/c21)
822 s1 = n1/c21;
823 s3 = c31/c21;
824 mu = 2./(1. + s1*s1 + s3*s3);
825 n2 = mu*(s1*c12 + d2 + s3*c32);
826 n3 = mu*(s1*c13 + c23 + s3*d3);
827 c12 = c12 - s1*n2;
828 d2 = d2 - n2;
829 c32 = c32 - s3*n2;
830 c13 = c13 - s1*n3;
831 c23 = c23 - n3;
832 d3 = d3 - s3*n3;
833 goto done_column_1;
834 }
835 // c31 is max, (s1,s2,s3) <-- (n1/c31,c21/c31,1)
836 s1 = n1/c31;
837 s2 = c21/c31;
838 mu = 2./(1. + s1*s1 + s2*s2);
839 n2 = mu*(s1*c12 + s2*d2 + c32);
840 n3 = mu*(s1*c13 + s2*c23 + d3);
841 c12 = c12 - s1*n2;
842 d2 = d2 - s2*n2;
843 c32 = c32 - n2;
844 c13 = c13 - s1*n3;
845 c23 = c23 - s2*n3;
846 d3 = d3 - n3;
847 }
848
849done_column_1:
850
851 // Solve:
852 // | d2 c23 | | z2 | = | 0 |
853 // | c32 d3 | | z3 | | 0 |
854 if (KernelVector2G(mode, d2, c23, c32, d3))
855 {
856 // Have two solutions:
857 // two vectors in the kernel are P (-c12/d1, 1, 0)^t and
858 // P (-c13/d1, 0, 1)^t where P is the permutation matrix swapping
859 // entries 1 and col.
860
861 // A vector orthogonal to both these vectors is P (1, c12/d1, c13/d1)^t
862 d2 = c12/d1;
863 d3 = c13/d1;
864 d1 = 1.;
865 kdim = 2;
866 }
867 else
868 {
869 // solve for z1:
870 // note: |z1| <= a since |z2| + |z3| = 1, and
871 // max{|c12|,|c13|} <= max{norm(col. 2),norm(col. 3)}
872 // <= norm(col. 1) <= a |d1|
873 // a = 1, if using l2-norm for column pivoting
874 // a = sqrt(3), if using l1-norm
875 d1 = -(c12*d2 + c13*d3)/d1;
876 kdim = 1;
877 }
878
879 Vec_normalize3(d1, d2, d3, d1, d2, d3);
880
881 return kdim;
882}
883
884/// Utility function used in CalcEigenvalues<3>.
885MFEM_HOST_DEVICE static inline
886int KernelVector3S(const int &mode, const real_t &d12,
887 const real_t &d13, const real_t &d23,
888 real_t &d1, real_t &d2, real_t &d3)
889{
890 // Find a unit vector (z1,z2,z3) in the "near"-kernel of the matrix
891 // | d1 d12 d13 |
892 // | d12 d2 d23 |
893 // | d13 d23 d3 |
894 // using QR factorization.
895 // The vector (z1,z2,z3) is returned in (d1,d2,d3).
896 // Returns the dimension of the kernel, kdim, but never zero.
897 // - if kdim == 3, then (d1,d2,d3) is not defined,
898 // - if kdim == 2, then (d1,d2,d3) is a vector orthogonal to the kernel,
899 // - otherwise kdim == 1 and (d1,d2,d3) is a vector in the "near"-kernel.
900
901 real_t c12 = d12, c13 = d13, c23 = d23;
902 real_t c21, c31, c32;
903 int col, row;
904
905 // l1-norms of the columns:
906 c32 = fabs(d1) + fabs(c12) + fabs(c13);
907 c31 = fabs(d2) + fabs(c12) + fabs(c23);
908 c21 = fabs(d3) + fabs(c13) + fabs(c23);
909
910 // column pivoting: choose the column with the largest norm
911 if (c32 >= c21)
912 {
913 col = (c32 >= c31) ? 1 : 2;
914 }
915 else
916 {
917 col = (c31 >= c21) ? 2 : 3;
918 }
919 switch (col)
920 {
921 case 1:
922 if (c32 == 0.) // zero matrix
923 {
924 return 3;
925 }
926 break;
927
928 case 2:
929 if (c31 == 0.) // zero matrix
930 {
931 return 3;
932 }
933 Swap(c13, c23);
934 Swap(d1, d2);
935 break;
936
937 case 3:
938 if (c21 == 0.) // zero matrix
939 {
940 return 3;
941 }
942 Swap(c12, c23);
943 Swap(d1, d3);
944 }
945
946 // row pivoting depending on 'mode'
947 if (mode == 0)
948 {
949 if (fabs(d1) <= fabs(c13))
950 {
951 row = (fabs(d1) <= fabs(c12)) ? 1 : 2;
952 }
953 else
954 {
955 row = (fabs(c12) <= fabs(c13)) ? 2 : 3;
956 }
957 }
958 else
959 {
960 if (fabs(d1) >= fabs(c13))
961 {
962 row = (fabs(d1) >= fabs(c12)) ? 1 : 2;
963 }
964 else
965 {
966 row = (fabs(c12) >= fabs(c13)) ? 2 : 3;
967 }
968 }
969 switch (row)
970 {
971 case 1:
972 c21 = c12;
973 c31 = c13;
974 c32 = c23;
975 break;
976
977 case 2:
978 c21 = d1;
979 c31 = c13;
980 c32 = c23;
981 d1 = c12;
982 c12 = d2;
983 d2 = d1;
984 c13 = c23;
985 c23 = c31;
986 break;
987
988 case 3:
989 c21 = c12;
990 c31 = d1;
991 c32 = c12;
992 d1 = c13;
993 c12 = c23;
994 c13 = d3;
995 d3 = d1;
996 }
997 row = KernelVector3G_aux(mode, d1, d2, d3, c12, c13, c23, c21, c31, c32);
998 // row is kdim
999
1000 switch (col)
1001 {
1002 case 2:
1003 Swap(d1, d2);
1004 break;
1005
1006 case 3:
1007 Swap(d1, d3);
1008 }
1009 return row;
1010}
1011
1012/// Utility function used in CalcEigenvalues<3>.
1013MFEM_HOST_DEVICE static inline
1014int Reduce3S(const int &mode,
1015 real_t &d1, real_t &d2, real_t &d3,
1016 real_t &d12, real_t &d13, real_t &d23,
1017 real_t &z1, real_t &z2, real_t &z3,
1018 real_t &v1, real_t &v2, real_t &v3,
1019 real_t &g)
1020{
1021 // Given the matrix
1022 // | d1 d12 d13 |
1023 // A = | d12 d2 d23 |
1024 // | d13 d23 d3 |
1025 // and a unit eigenvector z=(z1,z2,z3), transform the matrix A into the
1026 // matrix B = Q P A P Q that has the form
1027 // | b1 0 0 |
1028 // B = Q P A P Q = | 0 b2 b23 |
1029 // | 0 b23 b3 |
1030 // where P is the permutation matrix switching entries 1 and k, and
1031 // Q is the reflection matrix Q = I - g v v^t, defined by: set y = P z and
1032 // v = c(y - e_1); if y = e_1, then v = 0 and Q = I.
1033 // Note: Q y = e_1, Q e_1 = y ==> Q P A P Q e_1 = ... = lambda e_1.
1034 // The entries (b1,b2,b3,b23) are returned in (d1,d2,d3,d23), and the
1035 // return value of the function is k. The variable g = 2/(v1^2+v2^2+v3^3).
1036
1037 int k;
1038 real_t s, w1, w2, w3;
1039
1040 if (mode == 0)
1041 {
1042 // choose k such that z^t e_k = zk has the smallest absolute value, i.e.
1043 // the angle between z and e_k is closest to pi/2
1044 if (fabs(z1) <= fabs(z3))
1045 {
1046 k = (fabs(z1) <= fabs(z2)) ? 1 : 2;
1047 }
1048 else
1049 {
1050 k = (fabs(z2) <= fabs(z3)) ? 2 : 3;
1051 }
1052 }
1053 else
1054 {
1055 // choose k such that zk is the largest by absolute value
1056 if (fabs(z1) >= fabs(z3))
1057 {
1058 k = (fabs(z1) >= fabs(z2)) ? 1 : 2;
1059 }
1060 else
1061 {
1062 k = (fabs(z2) >= fabs(z3)) ? 2 : 3;
1063 }
1064 }
1065 switch (k)
1066 {
1067 case 2:
1068 Swap(d13, d23);
1069 Swap(d1, d2);
1070 Swap(z1, z2);
1071 break;
1072
1073 case 3:
1074 Swap(d12, d23);
1075 Swap(d1, d3);
1076 Swap(z1, z3);
1077 }
1078
1079 s = hypot(z2, z3);
1080
1081 if (s == 0.)
1082 {
1083 // s can not be zero, if zk is the smallest (mode == 0)
1084 v1 = v2 = v3 = 0.;
1085 g = 1.;
1086 }
1087 else
1088 {
1089 g = copysign(1., z1);
1090 v1 = -s*(s/(z1 + g)); // = z1 - g
1091 // normalize (v1,z2,z3) by its max-norm, avoiding the sqrt call
1092 g = fabs(v1);
1093 if (fabs(z2) > g) { g = fabs(z2); }
1094 if (fabs(z3) > g) { g = fabs(z3); }
1095 v1 = v1/g;
1096 v2 = z2/g;
1097 v3 = z3/g;
1098 g = 2./(v1*v1 + v2*v2 + v3*v3);
1099
1100 // Compute Q A Q = A - v w^t - w v^t, where
1101 // w = u - (g/2)(v^t u) v, and u = g A v
1102 // set w = g A v
1103 w1 = g*( d1*v1 + d12*v2 + d13*v3);
1104 w2 = g*(d12*v1 + d2*v2 + d23*v3);
1105 w3 = g*(d13*v1 + d23*v2 + d3*v3);
1106 // w := w - (g/2)(v^t w) v
1107 s = (g/2)*(v1*w1 + v2*w2 + v3*w3);
1108 w1 -= s*v1;
1109 w2 -= s*v2;
1110 w3 -= s*v3;
1111 // dij -= vi*wj + wi*vj
1112 d1 -= 2*v1*w1;
1113 d2 -= 2*v2*w2;
1114 d23 -= v2*w3 + v3*w2;
1115 d3 -= 2*v3*w3;
1116 // compute the off-diagonal entries on the first row/column of B which
1117 // should be zero (for debugging):
1118#if 0
1119 s = d12 - v1*w2 - v2*w1; // b12 = 0
1120 s = d13 - v1*w3 - v3*w1; // b13 = 0
1121#endif
1122 }
1123
1124 switch (k)
1125 {
1126 case 2:
1127 Swap(z1, z2);
1128 break;
1129 case 3:
1130 Swap(z1, z3);
1131 }
1132 return k;
1133}
1134
1135} // namespace kernels::internal
1136
1137// Implementations of CalcLeftInverse for dim = 1, 2.
1138
1139template<> MFEM_HOST_DEVICE inline
1140void CalcLeftInverse<2,1>(const real_t *d, real_t *left_inv)
1141{
1142 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1]);
1143 left_inv[0] = d[0] * t;
1144 left_inv[1] = d[1] * t;
1145}
1146
1147template<> MFEM_HOST_DEVICE inline
1148void CalcLeftInverse<3,1>(const real_t *d, real_t *left_inv)
1149{
1150 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1] + d[2]*d[2]);
1151 left_inv[0] = d[0] * t;
1152 left_inv[1] = d[1] * t;
1153 left_inv[2] = d[2] * t;
1154}
1155
1156template<> MFEM_HOST_DEVICE inline
1157void CalcLeftInverse<3,2>(const real_t *d, real_t *left_inv)
1158{
1159 real_t e = d[0]*d[0] + d[1]*d[1] + d[2]*d[2];
1160 real_t g = d[3]*d[3] + d[4]*d[4] + d[5]*d[5];
1161 real_t f = d[0]*d[3] + d[1]*d[4] + d[2]*d[5];
1162 const real_t t = 1.0 / (e*g - f*f);
1163 e *= t; g *= t; f *= t;
1164
1165 left_inv[0] = d[0]*g - d[3]*f;
1166 left_inv[1] = d[3]*e - d[0]*f;
1167 left_inv[2] = d[1]*g - d[4]*f;
1168 left_inv[3] = d[4]*e - d[1]*f;
1169 left_inv[4] = d[2]*g - d[5]*f;
1170 left_inv[5] = d[5]*e - d[2]*f;
1171}
1172
1173// Implementations of CalcEigenvalues and CalcSingularvalue for dim = 2, 3.
1174
1175/// Compute the spectrum of the matrix of size 2 with given @a data, returning
1176/// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1177/// vec (listed consecutively).
1178template<> MFEM_HOST_DEVICE inline
1179void CalcEigenvalues<2>(const real_t *data, real_t *lambda, real_t *vec)
1180{
1181 real_t d0 = data[0];
1182 real_t d2 = data[2]; // use the upper triangular entry
1183 real_t d3 = data[3];
1184 real_t c, s;
1185 internal::Eigensystem2S(d2, d0, d3, c, s);
1186 if (d0 <= d3)
1187 {
1188 lambda[0] = d0;
1189 lambda[1] = d3;
1190 vec[0] = c;
1191 vec[1] = -s;
1192 vec[2] = s;
1193 vec[3] = c;
1194 }
1195 else
1196 {
1197 lambda[0] = d3;
1198 lambda[1] = d0;
1199 vec[0] = s;
1200 vec[1] = c;
1201 vec[2] = c;
1202 vec[3] = -s;
1203 }
1204}
1205
1206/// Compute the spectrum of the matrix of size 3 with given @a data, returning
1207/// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1208/// vec (listed consecutively).
1209template<> MFEM_HOST_DEVICE inline
1210void CalcEigenvalues<3>(const real_t *data, real_t *lambda, real_t *vec)
1211{
1212 real_t d11 = data[0];
1213 real_t d12 = data[3]; // use the upper triangular entries
1214 real_t d22 = data[4];
1215 real_t d13 = data[6];
1216 real_t d23 = data[7];
1217 real_t d33 = data[8];
1218
1219 real_t mult;
1220 {
1221 real_t d_max = fabs(d11);
1222 if (d_max < fabs(d22)) { d_max = fabs(d22); }
1223 if (d_max < fabs(d33)) { d_max = fabs(d33); }
1224 if (d_max < fabs(d12)) { d_max = fabs(d12); }
1225 if (d_max < fabs(d13)) { d_max = fabs(d13); }
1226 if (d_max < fabs(d23)) { d_max = fabs(d23); }
1227
1228 internal::GetScalingFactor(d_max, mult);
1229 }
1230
1231 d11 /= mult; d22 /= mult; d33 /= mult;
1232 d12 /= mult; d13 /= mult; d23 /= mult;
1233
1234 real_t aa = (d11 + d22 + d33)/3; // aa = tr(A)/3
1235 real_t c1 = d11 - aa;
1236 real_t c2 = d22 - aa;
1237 real_t c3 = d33 - aa;
1238
1239 real_t Q, R;
1240
1241 Q = (2*(d12*d12 + d13*d13 + d23*d23) + c1*c1 + c2*c2 + c3*c3)/6;
1242 R = (c1*(d23*d23 - c2*c3)+ d12*(d12*c3 - 2*d13*d23) + d13*d13*c2)/2;
1243
1244 if (Q <= 0.)
1245 {
1246 lambda[0] = lambda[1] = lambda[2] = aa;
1247 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1248 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1249 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1250 }
1251 else
1252 {
1253 real_t sqrtQ = sqrt(Q);
1254 real_t sqrtQ3 = Q*sqrtQ;
1255 // real_t sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1256 // real_t sqrtQ3 = pow(Q, 1.5);
1257 real_t r;
1258 if (fabs(R) >= sqrtQ3)
1259 {
1260 if (R < 0.)
1261 {
1262 // R = -1.;
1263 r = 2*sqrtQ;
1264 }
1265 else
1266 {
1267 // R = 1.;
1268 r = -2*sqrtQ;
1269 }
1270 }
1271 else
1272 {
1273 R = R/sqrtQ3;
1274
1275 if (R < 0.)
1276 {
1277 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1278 }
1279 else
1280 {
1281 r = -2*sqrtQ*cos(acos(R)/3); // min
1282 }
1283 }
1284
1285 aa += r;
1286 c1 = d11 - aa;
1287 c2 = d22 - aa;
1288 c3 = d33 - aa;
1289
1290 // Type of Householder reflections: z --> mu ek, where k is the index
1291 // of the entry in z with:
1292 // mode == 0: smallest absolute value --> angle closest to pi/2
1293 // mode == 1: largest absolute value --> angle farthest from pi/2
1294 // Observations:
1295 // mode == 0 produces better eigenvectors, less accurate eigenvalues?
1296 // mode == 1 produces better eigenvalues, less accurate eigenvectors?
1297 const int mode = 0;
1298
1299 // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1300 // | c1 d12 d13 |
1301 // | d12 c2 d23 | = A - aa*I
1302 // | d13 d23 c3 |
1303 // This vector is also an eigenvector for A corresponding to aa.
1304 // The vector z overwrites (c1,c2,c3).
1305 switch (internal::KernelVector3S(mode, d12, d13, d23, c1, c2, c3))
1306 {
1307 case 3:
1308 // 'aa' is a triple eigenvalue
1309 lambda[0] = lambda[1] = lambda[2] = aa;
1310 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1311 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1312 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1313 goto done_3d;
1314
1315 case 2:
1316 // ok, continue with the returned vector orthogonal to the kernel
1317 case 1:
1318 // ok, continue with the returned vector in the "near"-kernel
1319 ;
1320 }
1321
1322 // Using the eigenvector c=(c1,c2,c3) transform A into
1323 // | d11 0 0 |
1324 // A <-- Q P A P Q = | 0 d22 d23 |
1325 // | 0 d23 d33 |
1326 real_t v1, v2, v3, g;
1327 int k = internal::Reduce3S(mode, d11, d22, d33, d12, d13, d23,
1328 c1, c2, c3, v1, v2, v3, g);
1329 // Q = I - 2 v v^t
1330 // P - permutation matrix switching entries 1 and k
1331
1332 // find the eigenvalues and eigenvectors for
1333 // | d22 d23 |
1334 // | d23 d33 |
1335 real_t c, s;
1336 internal::Eigensystem2S(d23, d22, d33, c, s);
1337 // d22 <-> P Q (0, c, -s), d33 <-> P Q (0, s, c)
1338
1339 real_t *vec_1, *vec_2, *vec_3;
1340 if (d11 <= d22)
1341 {
1342 if (d22 <= d33)
1343 {
1344 lambda[0] = d11; vec_1 = vec;
1345 lambda[1] = d22; vec_2 = vec + 3;
1346 lambda[2] = d33; vec_3 = vec + 6;
1347 }
1348 else if (d11 <= d33)
1349 {
1350 lambda[0] = d11; vec_1 = vec;
1351 lambda[1] = d33; vec_3 = vec + 3;
1352 lambda[2] = d22; vec_2 = vec + 6;
1353 }
1354 else
1355 {
1356 lambda[0] = d33; vec_3 = vec;
1357 lambda[1] = d11; vec_1 = vec + 3;
1358 lambda[2] = d22; vec_2 = vec + 6;
1359 }
1360 }
1361 else
1362 {
1363 if (d11 <= d33)
1364 {
1365 lambda[0] = d22; vec_2 = vec;
1366 lambda[1] = d11; vec_1 = vec + 3;
1367 lambda[2] = d33; vec_3 = vec + 6;
1368 }
1369 else if (d22 <= d33)
1370 {
1371 lambda[0] = d22; vec_2 = vec;
1372 lambda[1] = d33; vec_3 = vec + 3;
1373 lambda[2] = d11; vec_1 = vec + 6;
1374 }
1375 else
1376 {
1377 lambda[0] = d33; vec_3 = vec;
1378 lambda[1] = d22; vec_2 = vec + 3;
1379 lambda[2] = d11; vec_1 = vec + 6;
1380 }
1381 }
1382
1383 vec_1[0] = c1;
1384 vec_1[1] = c2;
1385 vec_1[2] = c3;
1386 d22 = g*(v2*c - v3*s);
1387 d33 = g*(v2*s + v3*c);
1388 vec_2[0] = - v1*d22; vec_3[0] = - v1*d33;
1389 vec_2[1] = c - v2*d22; vec_3[1] = s - v2*d33;
1390 vec_2[2] = -s - v3*d22; vec_3[2] = c - v3*d33;
1391 switch (k)
1392 {
1393 case 2:
1394 internal::Swap(vec_2[0], vec_2[1]);
1395 internal::Swap(vec_3[0], vec_3[1]);
1396 break;
1397
1398 case 3:
1399 internal::Swap(vec_2[0], vec_2[2]);
1400 internal::Swap(vec_3[0], vec_3[2]);
1401 }
1402 }
1403
1404done_3d:
1405 lambda[0] *= mult;
1406 lambda[1] *= mult;
1407 lambda[2] *= mult;
1408}
1409
1410/// Return the i'th singular value of the matrix of size 2 with given @a data.
1411template<> MFEM_HOST_DEVICE inline
1412real_t CalcSingularvalue<2>(const real_t *data, const int i)
1413{
1414 real_t d0, d1, d2, d3;
1415 d0 = data[0];
1416 d1 = data[1];
1417 d2 = data[2];
1418 d3 = data[3];
1419 real_t mult;
1420
1421 {
1422 real_t d_max = fabs(d0);
1423 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1424 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1425 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1426 internal::GetScalingFactor(d_max, mult);
1427 }
1428
1429 d0 /= mult;
1430 d1 /= mult;
1431 d2 /= mult;
1432 d3 /= mult;
1433
1434 real_t t = 0.5*((d0+d2)*(d0-d2)+(d1-d3)*(d1+d3));
1435 real_t s = d0*d2 + d1*d3;
1436 s = sqrt(0.5*(d0*d0 + d1*d1 + d2*d2 + d3*d3) + sqrt(t*t + s*s));
1437
1438 if (s == 0.0)
1439 {
1440 return 0.0;
1441 }
1442 t = fabs(d0*d3 - d1*d2) / s;
1443 if (t > s)
1444 {
1445 if (i == 0)
1446 {
1447 return t*mult;
1448 }
1449 return s*mult;
1450 }
1451 if (i == 0)
1452 {
1453 return s*mult;
1454 }
1455 return t*mult;
1456}
1457
1458/// Return the i'th singular value of the matrix of size 3 with given @a data.
1459template<> MFEM_HOST_DEVICE inline
1460real_t CalcSingularvalue<3>(const real_t *data, const int i)
1461{
1462 real_t d0, d1, d2, d3, d4, d5, d6, d7, d8;
1463 d0 = data[0]; d3 = data[3]; d6 = data[6];
1464 d1 = data[1]; d4 = data[4]; d7 = data[7];
1465 d2 = data[2]; d5 = data[5]; d8 = data[8];
1466 real_t mult;
1467 {
1468 real_t d_max = fabs(d0);
1469 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1470 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1471 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1472 if (d_max < fabs(d4)) { d_max = fabs(d4); }
1473 if (d_max < fabs(d5)) { d_max = fabs(d5); }
1474 if (d_max < fabs(d6)) { d_max = fabs(d6); }
1475 if (d_max < fabs(d7)) { d_max = fabs(d7); }
1476 if (d_max < fabs(d8)) { d_max = fabs(d8); }
1477 internal::GetScalingFactor(d_max, mult);
1478 }
1479
1480 d0 /= mult; d1 /= mult; d2 /= mult;
1481 d3 /= mult; d4 /= mult; d5 /= mult;
1482 d6 /= mult; d7 /= mult; d8 /= mult;
1483
1484 real_t b11 = d0*d0 + d1*d1 + d2*d2;
1485 real_t b12 = d0*d3 + d1*d4 + d2*d5;
1486 real_t b13 = d0*d6 + d1*d7 + d2*d8;
1487 real_t b22 = d3*d3 + d4*d4 + d5*d5;
1488 real_t b23 = d3*d6 + d4*d7 + d5*d8;
1489 real_t b33 = d6*d6 + d7*d7 + d8*d8;
1490
1491 // double a, b, c;
1492 // a = -(b11 + b22 + b33);
1493 // b = b11*(b22 + b33) + b22*b33 - b12*b12 - b13*b13 - b23*b23;
1494 // c = b11*(b23*b23 - b22*b33) + b12*(b12*b33 - 2*b13*b23) + b13*b13*b22;
1495
1496 // double Q = (a * a - 3 * b) / 9;
1497 // double Q = (b12*b12 + b13*b13 + b23*b23 +
1498 // ((b11 - b22)*(b11 - b22) +
1499 // (b11 - b33)*(b11 - b33) +
1500 // (b22 - b33)*(b22 - b33))/6)/3;
1501 // Q = (3*(b12^2 + b13^2 + b23^2) +
1502 // ((b11 - b22)^2 + (b11 - b33)^2 + (b22 - b33)^2)/2)/9
1503 // or
1504 // Q = (1/6)*|B-tr(B)/3|_F^2
1505 // Q >= 0 and
1506 // Q = 0 <==> B = scalar * I
1507 // double R = (2 * a * a * a - 9 * a * b + 27 * c) / 54;
1508 real_t aa = (b11 + b22 + b33)/3; // aa = tr(B)/3
1509 real_t c1, c2, c3;
1510 // c1 = b11 - aa; // ((b11 - b22) + (b11 - b33))/3
1511 // c2 = b22 - aa; // ((b22 - b11) + (b22 - b33))/3
1512 // c3 = b33 - aa; // ((b33 - b11) + (b33 - b22))/3
1513 {
1514 real_t b11_b22 = ((d0-d3)*(d0+d3)+(d1-d4)*(d1+d4)+(d2-d5)*(d2+d5));
1515 real_t b22_b33 = ((d3-d6)*(d3+d6)+(d4-d7)*(d4+d7)+(d5-d8)*(d5+d8));
1516 real_t b33_b11 = ((d6-d0)*(d6+d0)+(d7-d1)*(d7+d1)+(d8-d2)*(d8+d2));
1517 c1 = (b11_b22 - b33_b11)/3;
1518 c2 = (b22_b33 - b11_b22)/3;
1519 c3 = (b33_b11 - b22_b33)/3;
1520 }
1521 real_t Q, R;
1522 Q = (2*(b12*b12 + b13*b13 + b23*b23) + c1*c1 + c2*c2 + c3*c3)/6;
1523 R = (c1*(b23*b23 - c2*c3)+ b12*(b12*c3 - 2*b13*b23) +b13*b13*c2)/2;
1524 // R = (-1/2)*det(B-(tr(B)/3)*I)
1525 // Note: 54*(det(S))^2 <= |S|_F^6, when S^t=S and tr(S)=0, S is 3x3
1526 // Therefore: R^2 <= Q^3
1527
1528 if (Q <= 0.) { ; }
1529
1530 // else if (fabs(R) >= sqrtQ3)
1531 // {
1532 // double det = (d[0] * (d[4] * d[8] - d[5] * d[7]) +
1533 // d[3] * (d[2] * d[7] - d[1] * d[8]) +
1534 // d[6] * (d[1] * d[5] - d[2] * d[4]));
1535 //
1536 // if (R > 0.)
1537 // {
1538 // if (i == 2)
1539 // // aa -= 2*sqrtQ;
1540 // return fabs(det)/(aa + sqrtQ);
1541 // else
1542 // aa += sqrtQ;
1543 // }
1544 // else
1545 // {
1546 // if (i != 0)
1547 // aa -= sqrtQ;
1548 // // aa = fabs(det)/sqrt(aa + 2*sqrtQ);
1549 // else
1550 // aa += 2*sqrtQ;
1551 // }
1552 // }
1553
1554 else
1555 {
1556 real_t sqrtQ = sqrt(Q);
1557 real_t sqrtQ3 = Q*sqrtQ;
1558 // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1559 // double sqrtQ3 = pow(Q, 1.5);
1560 real_t r;
1561
1562 if (fabs(R) >= sqrtQ3)
1563 {
1564 if (R < 0.)
1565 {
1566 // R = -1.;
1567 r = 2*sqrtQ;
1568 }
1569 else
1570 {
1571 // R = 1.;
1572 r = -2*sqrtQ;
1573 }
1574 }
1575 else
1576 {
1577 R = R/sqrtQ3;
1578
1579 // if (fabs(R) <= 0.95)
1580 if (fabs(R) <= 0.9)
1581 {
1582 if (i == 2)
1583 {
1584 aa -= 2*sqrtQ*cos(acos(R)/3); // min
1585 }
1586 else if (i == 0)
1587 {
1588 aa -= 2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1589 }
1590 else
1591 {
1592 aa -= 2*sqrtQ*cos((acos(R) - 2.0*M_PI)/3); // mid
1593 }
1594 goto have_aa;
1595 }
1596
1597 if (R < 0.)
1598 {
1599 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1600 if (i == 0)
1601 {
1602 aa += r;
1603 goto have_aa;
1604 }
1605 }
1606 else
1607 {
1608 r = -2*sqrtQ*cos(acos(R)/3); // min
1609 if (i == 2)
1610 {
1611 aa += r;
1612 goto have_aa;
1613 }
1614 }
1615 }
1616
1617 // (tr(B)/3 + r) is the root which is separated from the other
1618 // two roots which are close to each other when |R| is close to 1
1619
1620 c1 -= r;
1621 c2 -= r;
1622 c3 -= r;
1623 // aa += r;
1624
1625 // Type of Householder reflections: z --> mu ek, where k is the index
1626 // of the entry in z with:
1627 // mode == 0: smallest absolute value --> angle closest to pi/2
1628 // (eliminate large entries)
1629 // mode == 1: largest absolute value --> angle farthest from pi/2
1630 // (eliminate small entries)
1631 const int mode = 1;
1632
1633 // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1634 // | c1 b12 b13 |
1635 // | b12 c2 b23 | = B - aa*I
1636 // | b13 b23 c3 |
1637 // This vector is also an eigenvector for B corresponding to aa
1638 // The vector z overwrites (c1,c2,c3).
1639 switch (internal::KernelVector3S(mode, b12, b13, b23, c1, c2, c3))
1640 {
1641 case 3:
1642 aa += r;
1643 goto have_aa;
1644 case 2:
1645 // ok, continue with the returned vector orthogonal to the kernel
1646 case 1:
1647 // ok, continue with the returned vector in the "near"-kernel
1648 ;
1649 }
1650
1651 // Using the eigenvector c = (c1,c2,c3) to transform B into
1652 // | b11 0 0 |
1653 // B <-- Q P B P Q = | 0 b22 b23 |
1654 // | 0 b23 b33 |
1655 real_t v1, v2, v3, g;
1656 internal::Reduce3S(mode, b11, b22, b33, b12, b13, b23,
1657 c1, c2, c3, v1, v2, v3, g);
1658 // Q = I - g v v^t
1659 // P - permutation matrix switching rows and columns 1 and k
1660
1661 // find the eigenvalues of
1662 // | b22 b23 |
1663 // | b23 b33 |
1664 internal::Eigenvalues2S(b23, b22, b33);
1665
1666 if (i == 2)
1667 {
1668 aa = fmin(fmin(b11, b22), b33);
1669 }
1670 else if (i == 1)
1671 {
1672 if (b11 <= b22)
1673 {
1674 aa = (b22 <= b33) ? b22 : fmax(b11, b33);
1675 }
1676 else
1677 {
1678 aa = (b11 <= b33) ? b11 : fmax(b33, b22);
1679 }
1680 }
1681 else
1682 {
1683 aa = fmax(fmax(b11, b22), b33);
1684 }
1685 }
1686
1687have_aa:
1688
1689 return sqrt(fabs(aa))*mult; // take abs before we sort?
1690}
1691
1692/// @brief Assuming L.U = P.A factored matrix of size (m x m), compute
1693/// X <- L^{-1} P X, for a vector X of length m.
1694//
1695// @param [in] data LU factorization of A
1696// @param [in] m square matrix height
1697// @param [in] ipiv array storing pivots
1698// @param [in, out] x vector storing right-hand side and then solution
1699MFEM_HOST_DEVICE
1700inline void LSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
1701{
1702 // X <- P X
1703 for (int i = 0; i < m; i++)
1704 {
1705 internal::Swap<real_t>(x[i], x[ipiv[i]]);
1706 }
1707 // X <- L^{-1} X
1708 for (int j = 0; j < m; j++)
1709 {
1710 const real_t x_j = x[j];
1711 for (int i = j + 1; i < m; i++)
1712 {
1713 x[i] -= data[i + j * m] * x_j;
1714 }
1715 }
1716}
1717
1718/// @brief Assuming L.U = P.A factored matrix of size (m x m), compute
1719/// X <- U^{-1} X, for a vector X of length m.
1720//
1721// @param [in] data LU factorization of A
1722// @param [in] m square matrix height
1723// @param [in, out] x vector storing right-hand side and then solution
1724MFEM_HOST_DEVICE
1725inline void USolve(const real_t *data, const int m, real_t *x)
1726{
1727 for (int j = m - 1; j >= 0; j--)
1728 {
1729 const real_t x_j = (x[j] /= data[j + j * m]);
1730 for (int i = 0; i < j; i++)
1731 {
1732 x[i] -= data[i + j * m] * x_j;
1733 }
1734 }
1735}
1736
1737/// @brief Assuming L.U = P.A for a factored matrix (m x m),
1738// compute x <- A x
1739//
1740// @param [in] data LU factorization of A
1741// @param [in] m square matrix height
1742// @param [in] ipiv array storing pivot information
1743// @param [in, out] x vector storing right-hand side and then solution
1744MFEM_HOST_DEVICE
1745inline void LUSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
1746{
1747 LSolve(data, m, ipiv, x);
1748 USolve(data, m, x);
1749}
1750
1751
1752/// @brief Given an (n x m) matrix A21, compute X2 <- X2 - A21 X1, for matrices
1753/// X1, and X2 of size (m x r) and (n x r), respectively.
1754MFEM_HOST_DEVICE
1755inline void SubMult(const int m, const int n, const int r, const real_t *A21,
1756 const real_t *X1, real_t *X2)
1757{
1758 // X2 <- X2 - A21 X1
1759 for (int k = 0; k < r; k++)
1760 {
1761 for (int j = 0; j < m; j++)
1762 {
1763 const real_t x1_jk = X1[j+k*m];
1764 for (int i = 0; i < n; i++)
1765 {
1766 X2[i+k*n] -= A21[i+j*n] * x1_jk;
1767 }
1768 }
1769 }
1770}
1771
1772/// Assuming P.A = L.U factored data of size (m x m), compute the 2x2 block
1773/// decomposition:
1774/// | P 0 | | A A12 | = | L 0 | | U U12 |
1775/// | 0 I | | A21 A22 | | L21 I | | 0 S22 |
1776/// where A12, A21, and A22 are matrices of size (m x n), (n x m), and
1777/// (n x n), respectively. The blocks are overwritten as follows:
1778/// A12 <- U12 = L^{-1} P A12
1779/// A21 <- L21 = A21 U^{-1}
1780/// A22 <- S22 = A22 - L21 U12.
1781/// The block S22 is the Schur complement.
1782MFEM_HOST_DEVICE
1783inline void BlockFactor(const real_t *data, int m, const int *ipiv,
1784 int n, real_t *A12, real_t *A21, real_t *A22)
1785{
1786 // A12 <- L^{-1} P A12
1787 for (int i = 0; i < n; ++i)
1788 {
1789 LSolve(data, m, ipiv, A12 + i*m);
1790 }
1791 // A21 <- A21 U^{-1}
1792 for (int j = 0; j < m; j++)
1793 {
1794 const real_t u_jj_inv = 1.0/data[j+j*m];
1795 for (int i = 0; i < n; i++)
1796 {
1797 A21[i+j*n] *= u_jj_inv;
1798 }
1799 for (int k = j+1; k < m; k++)
1800 {
1801 const real_t u_jk = data[j+k*m];
1802 for (int i = 0; i < n; i++)
1803 {
1804 A21[i+k*n] -= A21[i+j*n] * u_jk;
1805 }
1806 }
1807 }
1808 // A22 <- A22 - A21 A12
1809 SubMult(m, n, n, A21, A12, A22);
1810}
1811
1812/// @brief Compute the LU factorization of the m x m matrix @a A.
1813///
1814/// Factorize the matrix of size (m x m) overwriting it with the LU factors. The
1815/// factorization is such that L.U = P.A, where A is the original matrix and P
1816/// is a permutation matrix represented by ipiv.
1817///
1818/// @param [in, out] A matrix
1819/// @param [in] m size of the square matrix
1820/// @param [out] ipiv array of pivots (length m)
1821/// @param [in] tol optional fuzzy comparison tolerance. Defaults to 0.0.
1822///
1823/// @return true if the factorization succeeds, false otherwise (zero pivot).
1824MFEM_HOST_DEVICE
1825inline bool LUFactor(real_t *A, const int m, int *ipiv, const real_t tol=0.0)
1826{
1827 bool pivot_flag = true;
1828
1829 for (int i = 0; i < m; i++)
1830 {
1831 // pivoting
1832 {
1833 int piv = i;
1834 real_t a = fabs(A[piv + m*i]);
1835 for (int j = i+1; j < m; j++)
1836 {
1837 const real_t b = fabs(A[j + m*i]);
1838 if (b > a)
1839 {
1840 a = b;
1841 piv = j;
1842 }
1843 }
1844 ipiv[i] = piv;
1845 if (piv != i)
1846 {
1847 // swap rows i and piv in both L and U parts
1848 for (int j = 0; j < m; j++)
1849 {
1850 internal::Swap<real_t>(A[i + m*j], A[piv + m*j]);
1851 }
1852 }
1853 } // pivot end
1854
1855 if (fabs(A[i + m*i]) <= tol)
1856 {
1857 pivot_flag = false;
1858 }
1859
1860 const real_t a_ii_inv = 1.0 / A[i + m*i];
1861 for (int j = i+1; j < m; j++)
1862 {
1863 A[j + m*i] *= a_ii_inv;
1864 }
1865
1866 for (int k = i+1; k < m; k++)
1867 {
1868 const real_t a_ik = A[i + m*k];
1869 for (int j = i+1; j < m; j++)
1870 {
1871 A[j + m*k] -= a_ik * A[j + m*i];
1872 }
1873 }
1874 }
1875
1876 return pivot_flag;
1877}
1878
1879} // namespace kernels
1880
1881} // namespace mfem
1882
1883#endif // MFEM_LINALG_KERNELS_HPP
Vector beta
const real_t alpha
Definition ex15.cpp:369
int dim
Definition ex24.cpp:53
real_t mu
Definition ex25.cpp:140
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data.
Definition kernels.hpp:246
MFEM_HOST_DEVICE real_t DistanceSquared(const real_t *x, const real_t *y)
Compute the square of the Euclidean distance to another vector.
Definition kernels.hpp:40
MFEM_HOST_DEVICE real_t CalcSingularvalue< 2 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 2 with given data.
Definition kernels.hpp:1412
MFEM_HOST_DEVICE void MultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *AtBdata)
Multiply the transpose of a matrix of size Aheight x Awidth and data Adata with a matrix of size Ahei...
Definition kernels.hpp:447
MFEM_HOST_DEVICE void CalcEigenvalues< 2 >(const real_t *data, real_t *lambda, real_t *vec)
Definition kernels.hpp:1179
MFEM_HOST_DEVICE void CalcLeftInverse(const real_t *data, real_t *left_inv)
Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
MFEM_HOST_DEVICE void CalcAdjugate(const T *data, T *adj_data)
Return the adjugate of a matrix.
Definition kernels.hpp:256
MFEM_HOST_DEVICE void Add(const int height, const int width, const TALPHA alpha, const TA *Adata, const TB *Bdata, TC *Cdata)
Compute C = A + alpha*B, where the matrices A, B and C are of size height x width with data Adata,...
Definition kernels.hpp:266
MFEM_HOST_DEVICE void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *Cdata, const TB alpha, const TA beta)
Compute C = alpha*At*B + beta*C.
Definition kernels.hpp:411
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data,...
Definition kernels.hpp:163
MFEM_HOST_DEVICE real_t Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition kernels.hpp:133
MFEM_HOST_DEVICE void FNorm(real_t &scale_factor, real_t &scaled_fnorm2, const T *data)
Definition kernels.hpp:79
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const real_t *d, real_t *left_inv)
Definition kernels.hpp:1140
MFEM_HOST_DEVICE void LSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- L^{-1} P X, for a vector X of length...
Definition kernels.hpp:1700
MFEM_HOST_DEVICE void MultABt(const int Aheight, const int Awidth, const int Bheight, const TA *Adata, const TB *Bdata, TC *ABtdata)
Multiply a matrix of size Aheight x Awidth and data Adata with the transpose of a matrix of size Bhei...
Definition kernels.hpp:383
MFEM_HOST_DEVICE void MultTranspose(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix transpose vector multiplication: y = At x, where the matrix A is of size height x width with g...
Definition kernels.hpp:196
MFEM_HOST_DEVICE void USolve(const real_t *data, const int m, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- U^{-1} X, for a vector X of length m...
Definition kernels.hpp:1725
MFEM_HOST_DEVICE void AddMult(const int Aheight, const int Awidth, const int Bwidth, const TB *Bdata, const TC *Cdata, TA *Adata, const TB alpha, const TA beta)
Matrix-matrix multiplication: A = alpha * B * C + beta * A, where the matrices A, B and C are of size...
Definition kernels.hpp:341
MFEM_HOST_DEVICE real_t FNorm2(const T *data)
Compute the square of the Frobenius norm of the matrix.
Definition kernels.hpp:123
MFEM_HOST_DEVICE real_t CalcSingularvalue(const real_t *data, const int i)
Return the i'th singular value of the matrix of size dim with given data.
MFEM_HOST_DEVICE void Symmetrize(const int size, T *data)
Symmetrize a square matrix with given size and data: A -> (A+A^T)/2.
Definition kernels.hpp:223
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const real_t *d, real_t *left_inv)
Definition kernels.hpp:1157
MFEM_HOST_DEVICE void BlockFactor(const real_t *data, int m, const int *ipiv, int n, real_t *A12, real_t *A21, real_t *A22)
Definition kernels.hpp:1783
MFEM_HOST_DEVICE void AddMultVWt(const real_t *v, const real_t *w, real_t *VWt)
Dense matrix operation: VWt += v w^t.
Definition kernels.hpp:67
MFEM_HOST_DEVICE void Diag(const real_t c, real_t *data)
Creates n x n diagonal matrix with diagonal elements c.
Definition kernels.hpp:49
MFEM_HOST_DEVICE bool LUFactor(real_t *A, const int m, int *ipiv, const real_t tol=0.0)
Compute the LU factorization of the m x m matrix A.
Definition kernels.hpp:1825
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
Definition kernels.hpp:326
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const real_t *d, real_t *left_inv)
Definition kernels.hpp:1148
MFEM_HOST_DEVICE void CalcEigenvalues(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition kernels.hpp:237
MFEM_HOST_DEVICE real_t CalcSingularvalue< 3 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 3 with given data.
Definition kernels.hpp:1460
MFEM_HOST_DEVICE void CalcEigenvalues< 3 >(const real_t *data, real_t *lambda, real_t *vec)
Definition kernels.hpp:1210
MFEM_HOST_DEVICE void SubMult(const int m, const int n, const int r, const real_t *A21, const real_t *X1, real_t *X2)
Given an (n x m) matrix A21, compute X2 <- X2 - A21 X1, for matrices X1, and X2 of size (m x r) and (...
Definition kernels.hpp:1755
MFEM_HOST_DEVICE void Subtract(const real_t a, const real_t *x, const real_t *y, real_t *z)
Vector subtraction operation: z = a * (x - y)
Definition kernels.hpp:58
MFEM_HOST_DEVICE void LUSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A for a factored matrix (m x m),.
Definition kernels.hpp:1745
void Swap(Array< T > &, Array< T > &)
Definition array.hpp:672
MFEM_HOST_DEVICE void TAssignHD(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
Definition ttensor.hpp:264
MFEM_HOST_DEVICE scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:664
MFEM_HOST_DEVICE scalar_t TDetHD(const layout_t &a, const data_t &A)
Definition tmatrix.hpp:588
float real_t
Definition config.hpp:43
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
MFEM_HOST_DEVICE void TAdjugateHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:635