MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
kernels.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_LINALG_KERNELS_HPP
13#define MFEM_LINALG_KERNELS_HPP
14
15#include "../config/config.hpp"
16
17#include "tmatrix.hpp"
18#include "tlayout.hpp"
19#include "ttensor.hpp"
20
21// This header contains stand-alone functions for "small" dense linear algebra
22// (at quadrature point or element-level) designed to be inlined directly into
23// device kernels.
24
25// Many methods of the DenseMatrix class and some of the Vector class call these
26// kernels directly on the host, see the implementations in linalg/densemat.cpp
27// and linalg/vector.cpp.
28
29namespace mfem
30{
31
32namespace kernels
33{
34
35/// Compute the square of the Euclidean distance to another vector
36template<int dim>
37MFEM_HOST_DEVICE inline real_t DistanceSquared(const real_t *x, const real_t *y)
38{
39 real_t d = 0.0;
40 for (int i = 0; i < dim; i++) { d += (x[i]-y[i])*(x[i]-y[i]); }
41 return d;
42}
43
44/// Creates n x n diagonal matrix with diagonal elements c
45template<int dim>
46MFEM_HOST_DEVICE inline void Diag(const real_t c, real_t *data)
47{
48 const int N = dim*dim;
49 for (int i = 0; i < N; i++) { data[i] = 0.0; }
50 for (int i = 0; i < dim; i++) { data[i*(dim+1)] = c; }
51}
52
53/// Vector subtraction operation: z = a * (x - y)
54template<int dim>
55MFEM_HOST_DEVICE inline void Subtract(const real_t a,
56 const real_t *x, const real_t *y,
57 real_t *z)
58{
59 for (int i = 0; i < dim; i++) { z[i] = a * (x[i] - y[i]); }
60}
61
62/// Dense matrix operation: VWt += v w^t
63template<int dim>
64MFEM_HOST_DEVICE inline void AddMultVWt(const real_t *v, const real_t *w,
65 real_t *VWt)
66{
67 for (int i = 0; i < dim; i++)
68 {
69 const real_t vi = v[i];
70 for (int j = 0; j < dim; j++) { VWt[i*dim+j] += vi * w[j]; }
71 }
72}
73
74template<int H, int W, typename T>
75MFEM_HOST_DEVICE inline
76void FNorm(real_t &scale_factor, real_t &scaled_fnorm2, const T *data)
77{
78 int i, hw = H * W;
79 T max_norm = 0.0, entry, fnorm2;
80
81 for (i = 0; i < hw; i++)
82 {
83 entry = fabs(data[i]);
84 if (entry > max_norm)
85 {
86 max_norm = entry;
87 }
88 }
89
90 if (max_norm == 0.0)
91 {
92 scale_factor = scaled_fnorm2 = 0.0;
93 return;
94 }
95
96 fnorm2 = 0.0;
97 for (i = 0; i < hw; i++)
98 {
99 entry = data[i] / max_norm;
100 fnorm2 += entry * entry;
101 }
102
103 scale_factor = max_norm;
104 scaled_fnorm2 = fnorm2;
105}
106
107/// Compute the Frobenius norm of the matrix
108template<int H, int W, typename T>
109MFEM_HOST_DEVICE inline
110real_t FNorm(const T *data)
111{
112 real_t s, n2;
113 kernels::FNorm<H,W>(s, n2, data);
114 return s*sqrt(n2);
115}
116
117/// Compute the square of the Frobenius norm of the matrix
118template<int H, int W, typename T>
119MFEM_HOST_DEVICE inline
120real_t FNorm2(const T *data)
121{
122 real_t s, n2;
123 kernels::FNorm<H,W>(s, n2, data);
124 return s*s*n2;
125}
126
127/// Returns the l2 norm of the Vector with given @a size and @a data.
128template<typename T>
129MFEM_HOST_DEVICE inline
130real_t Norml2(const int size, const T *data)
131{
132 if (0 == size) { return 0.0; }
133 if (1 == size) { return std::abs(data[0]); }
134 T scale = 0.0;
135 T sum = 0.0;
136 for (int i = 0; i < size; i++)
137 {
138 if (data[i] != 0.0)
139 {
140 const T absdata = fabs(data[i]);
141 if (scale <= absdata)
142 {
143 const T sqr_arg = scale / absdata;
144 sum = 1.0 + sum * (sqr_arg * sqr_arg);
145 scale = absdata;
146 continue;
147 } // end if scale <= absdata
148 const T sqr_arg = absdata / scale;
149 sum += (sqr_arg * sqr_arg); // else scale > absdata
150 } // end if data[i] != 0
151 }
152 return scale * sqrt(sum);
153}
154
155/** @brief Matrix vector multiplication: y = A x, where the matrix A is of size
156 @a height x @a width with given @a data, while @a x and @a y specify the
157 data of the input and output vectors. */
158template<typename TA, typename TX, typename TY>
159MFEM_HOST_DEVICE inline
160void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
161{
162 if (width == 0)
163 {
164 for (int row = 0; row < height; row++)
165 {
166 y[row] = 0.0;
167 }
168 return;
169 }
170 const TA *d_col = data;
171 TX x_col = x[0];
172 for (int row = 0; row < height; row++)
173 {
174 y[row] = x_col*d_col[row];
175 }
176 d_col += height;
177 for (int col = 1; col < width; col++)
178 {
179 x_col = x[col];
180 for (int row = 0; row < height; row++)
181 {
182 y[row] += x_col*d_col[row];
183 }
184 d_col += height;
185 }
186}
187
188/** @brief Absolute-value matrix vector multiplication: y = |A| x, where the
189 matrix A is of size @a height x @a width with given @a data, while @a x and
190 @a y specify the data of the input and output vectors. */
191template<typename TA, typename TX, typename TY>
192MFEM_HOST_DEVICE inline
193void AbsMult(const int height, const int width, const TA *data,
194 const TX *x, TY *y)
195{
196 if (width == 0)
197 {
198 for (int row = 0; row < height; row++)
199 {
200 y[row] = 0.0;
201 }
202 return;
203 }
204 const TA *d_col = data;
205 TX x_col = x[0];
206 for (int row = 0; row < height; row++)
207 {
208 y[row] = x_col*std::fabs(d_col[row]);
209 }
210 d_col += height;
211 for (int col = 1; col < width; col++)
212 {
213 x_col = x[col];
214 for (int row = 0; row < height; row++)
215 {
216 y[row] += x_col*std::fabs(d_col[row]);
217 }
218 d_col += height;
219 }
220}
221
222/** @brief Matrix transpose vector multiplication: y = At x, where the matrix A
223 is of size @a height x @a width with given @a data, while @a x and @a y
224 specify the data of the input and output vectors. */
225template<typename TA, typename TX, typename TY>
226MFEM_HOST_DEVICE inline
227void MultTranspose(const int height, const int width, const TA *data,
228 const TX *x, TY *y)
229{
230 if (height == 0)
231 {
232 for (int row = 0; row < width; row++)
233 {
234 y[row] = 0.0;
235 }
236 return;
237 }
238 TY *y_off = y;
239 for (int i = 0; i < width; ++i)
240 {
241 TY val = 0.0;
242 for (int j = 0; j < height; ++j)
243 {
244 val += x[j] * data[i * height + j];
245 }
246 *y_off = val;
247 y_off++;
248 }
249}
250
251/** @brief Absolute-value matrix transpose vector multiplication: y = |At| x,
252 where the matrix A is of size @a height x @a width with given @a data, while
253 @a x and @a y specify the data of the input and output vectors. */
254template<typename TA, typename TX, typename TY>
255MFEM_HOST_DEVICE inline
256void AbsMultTranspose(const int height, const int width, const TA *data,
257 const TX *x, TY *y)
258{
259 if (height == 0)
260 {
261 for (int row = 0; row < width; row++)
262 {
263 y[row] = 0.0;
264 }
265 return;
266 }
267 TY *y_off = y;
268 for (int i = 0; i < width; ++i)
269 {
270 TY val = 0.0;
271 for (int j = 0; j < height; ++j)
272 {
273 val += x[j] * std::fabs(data[i * height + j]);
274 }
275 *y_off = val;
276 y_off++;
277 }
278}
279
280/// Symmetrize a square matrix with given @a size and @a data: A -> (A+A^T)/2.
281template<typename T>
282MFEM_HOST_DEVICE inline
283void Symmetrize(const int size, T *data)
284{
285 for (int i = 0; i < size; i++)
286 {
287 for (int j = 0; j < i; j++)
288 {
289 const T a = 0.5 * (data[i*size+j] + data[j*size+i]);
290 data[j*size+i] = data[i*size+j] = a;
291 }
292 }
293}
294
295/// Compute the determinant of a square matrix of size dim with given @a data.
296template<int dim, typename T>
297MFEM_HOST_DEVICE inline T Det(const T *data)
298{
300}
301
302/** @brief Return the inverse of a matrix with given @a size and @a data into
303 the matrix with data @a inv_data. */
304template<int dim, typename T>
305MFEM_HOST_DEVICE inline
306void CalcInverse(const T *data, T *inv_data)
307{
308 typedef ColumnMajorLayout2D<dim,dim> layout_t;
309 const T det = TAdjDetHD<T>(layout_t(), data, layout_t(), inv_data);
310 TAssignHD<AssignOp::Mult>(layout_t(), inv_data, static_cast<T>(1.0)/det);
311}
312
313/** @brief Return the adjugate of a matrix */
314template<int dim, typename T>
315MFEM_HOST_DEVICE inline
316void CalcAdjugate(const T *data, T *adj_data)
317{
318 typedef ColumnMajorLayout2D<dim,dim> layout_t;
319 TAdjugateHD<T>(layout_t(), data, layout_t(), adj_data);
320}
321
322/** @brief Compute C = A + alpha*B, where the matrices A, B and C are of size @a
323 height x @a width with data @a Adata, @a Bdata and @a Cdata. */
324template<typename TALPHA, typename TA, typename TB, typename TC>
325MFEM_HOST_DEVICE inline
326void Add(const int height, const int width, const TALPHA alpha,
327 const TA *Adata, const TB *Bdata, TC *Cdata)
328{
329 for (int j = 0; j < width; j++)
330 {
331 for (int i = 0; i < height; i++)
332 {
333 const int n = i*width+j;
334 Cdata[n] = Adata[n] + alpha * Bdata[n];
335 }
336 }
337}
338
339/** @brief Compute C = alpha*A + beta*B, where the matrices A, B and C are of
340 size @a height x @a width with data @a Adata, @a Bdata and @a Cdata. */
341template<typename TALPHA, typename TBETA, typename TA, typename TB, typename TC>
342MFEM_HOST_DEVICE inline
343void Add(const int height, const int width,
344 const TALPHA alpha, const TA *Adata,
345 const TBETA beta, const TB *Bdata,
346 TC *Cdata)
347{
348 const int m = height * width;
349 for (int i = 0; i < m; i++)
350 {
351 Cdata[i] = alpha * Adata[i] + beta * Bdata[i];
352 }
353}
354
355/** @brief Compute B += A, where the matrices A and B are of size
356 @a height x @a width with data @a Adata and @a Bdata. */
357template<typename TA, typename TB>
358MFEM_HOST_DEVICE inline
359void Add(const int height, const int width, const TA *Adata, TB *Bdata)
360{
361 const int m = height * width;
362 for (int i = 0; i < m; i++)
363 {
364 Bdata[i] += Adata[i];
365 }
366}
367
368/** @brief Compute B +=alpha*A, where the matrices A and B are of size
369 @a height x @a width with data @a Adata and @a Bdata. */
370template<typename TA, typename TB>
371MFEM_HOST_DEVICE inline
372void Add(const int height, const int width,
373 const real_t alpha, const TA *Adata, TB *Bdata)
374{
375 const int m = height * width;
376 for (int i = 0; i < m; i++)
377 {
378 Bdata[i] += alpha * Adata[i];
379 }
380}
381
382/** @brief Compute B = alpha*A, where the matrices A and B are of size
383 @a height x @a width with data @a Adata and @a Bdata. */
384template<typename TA, typename TB>
385MFEM_HOST_DEVICE inline
386void Set(const int height, const int width,
387 const real_t alpha, const TA *Adata, TB *Bdata)
388{
389 const int m = height * width;
390 for (int i = 0; i < m; i++)
391 {
392 Bdata[i] = alpha * Adata[i];
393 }
394}
395
396/** @brief Matrix-matrix multiplication: A = alpha * B * C + beta * A, where the
397 matrices A, B and C are of sizes @a Aheight x @a Awidth, @a Aheight x @a
398 Bwidth and @a Bwidth x @a Awidth, respectively. */
399template<typename TA, typename TB, typename TC>
400MFEM_HOST_DEVICE inline
401void AddMult(const int Aheight, const int Awidth, const int Bwidth,
402 const TB *Bdata, const TC *Cdata, TA *Adata, const TB alpha,
403 const TA beta)
404{
405 const int ah_x_aw = Aheight * Awidth;
406 if (beta == 0.0)
407 {
408 for (int i = 0; i < ah_x_aw; i++) { Adata[i] = 0.0; }
409 }
410 else if (beta != 1.0)
411 {
412 for (int i = 0; i < ah_x_aw; i++) { Adata[i] *= beta; }
413 }
414 for (int j = 0; j < Awidth; j++)
415 {
416 for (int k = 0; k < Bwidth; k++)
417 {
418 const real_t val = alpha * Cdata[k+j*Bwidth];
419 for (int i = 0; i < Aheight; i++)
420 {
421 Adata[i+j*Aheight] += val * Bdata[i+k*Aheight];
422 }
423 }
424 }
425}
426
427/** @brief Matrix-matrix multiplication: A = B * C, where the matrices A, B and
428 C are of sizes @a Aheight x @a Awidth, @a Aheight x @a Bwidth and @a Bwidth
429 x @a Awidth, respectively. */
430template<typename TA, typename TB, typename TC>
431MFEM_HOST_DEVICE inline
432void Mult(const int Aheight, const int Awidth, const int Bwidth,
433 const TB *Bdata, const TC *Cdata, TA *Adata)
434{
435 AddMult(Aheight, Awidth, Bwidth, Bdata, Cdata, Adata, TB(1.0), TA(0.0));
436}
437
438/** @brief Multiply a matrix of size @a Aheight x @a Awidth and data @a Adata
439 with the transpose of a matrix of size @a Bheight x @a Awidth and data @a
440 Bdata: A * Bt. Return the result in a matrix with data @a ABtdata. */
441template<typename TA, typename TB, typename TC>
442MFEM_HOST_DEVICE inline
443void MultABt(const int Aheight, const int Awidth, const int Bheight,
444 const TA *Adata, const TB *Bdata, TC *ABtdata)
445{
446 const int ah_x_bh = Aheight * Bheight;
447 for (int i = 0; i < ah_x_bh; i++) { ABtdata[i] = 0.0; }
448 for (int k = 0; k < Awidth; k++)
449 {
450 TC *c = ABtdata;
451 for (int j = 0; j < Bheight; j++)
452 {
453 const real_t bjk = Bdata[j];
454 for (int i = 0; i < Aheight; i++)
455 {
456 c[i] += Adata[i] * bjk;
457 }
458 c += Aheight;
459 }
460 Adata += Aheight;
461 Bdata += Bheight;
462 }
463}
464
465/** @brief Compute C = alpha*At*B + beta*C.
466
467 Multiply the transpose of a matrix of size @a Aheight x @a Awidth and data
468 @a Adata with a matrix of size @a Aheight x @a Bwidth and data @a Bdata. */
469template<typename TA, typename TB, typename TC>
470MFEM_HOST_DEVICE inline
471void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth,
472 const TA *Adata, const TB *Bdata, TC *Cdata, const TB alpha,
473 const TA beta)
474{
475 const int aw_x_bw = Awidth * Bwidth;
476
477 if (beta == 0.0)
478 {
479 for (int i = 0; i < aw_x_bw; i++) { Cdata[i] = 0.0; }
480 }
481 else if (beta != 1.0)
482 {
483 for (int i = 0; i < aw_x_bw; i++) { Cdata[i] *= beta; }
484 }
485
486 TC *c = Cdata;
487 for (int i = 0; i < Bwidth; ++i)
488 {
489 for (int j = 0; j < Awidth; ++j)
490 {
491 TC val = 0.0;
492 for (int k = 0; k < Aheight; ++k)
493 {
494 val += alpha * Adata[j * Aheight + k] * Bdata[i * Aheight + k];
495 }
496 *c += val;
497 c++;
498 }
499 }
500}
501
502/** @brief Multiply the transpose of a matrix of size @a Aheight x @a Awidth
503 and data @a Adata with a matrix of size @a Aheight x @a Bwidth and data @a
504 Bdata: At * B. Return the result in a matrix with data @a AtBdata. */
505template<typename TA, typename TB, typename TC>
506MFEM_HOST_DEVICE inline
507void MultAtB(const int Aheight, const int Awidth, const int Bwidth,
508 const TA *Adata, const TB *Bdata, TC *AtBdata)
509{
510 AddMultAtB(Aheight, Awidth, Bwidth, Adata, Bdata, AtBdata, TB(1.0), TA(0.0));
511}
512
513/** @brief Multiply the transpose of a matrix of size @a Aheight x @a Awidth
514 and data @a Adata with a matrix of size @a Aheight x @a Bwidth and data @a
515 Bdata: At * B. Add the result to the matrix with data @a AtBdata. */
516template<typename TA, typename TB, typename TC>
517MFEM_HOST_DEVICE inline
518void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth,
519 const TA *Adata, const TB *Bdata, TC *AtBdata)
520{
521 TC *c = AtBdata;
522 for (int i = 0; i < Bwidth; ++i)
523 {
524 for (int j = 0; j < Awidth; ++j)
525 {
526 TC val = 0.0;
527 for (int k = 0; k < Aheight; ++k)
528 {
529 val += Adata[j * Aheight + k] * Bdata[i * Aheight + k];
530 }
531 *c += val;
532 c++;
533 }
534 }
535}
536
537/// Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
538template<int HEIGHT, int WIDTH> MFEM_HOST_DEVICE
539void CalcLeftInverse(const real_t *data, real_t *left_inv);
540
541/// Compute the spectrum of the matrix of size dim with given @a data, returning
542/// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
543/// vec (listed consecutively).
544template<int dim> MFEM_HOST_DEVICE
545void CalcEigenvalues(const real_t *data, real_t *lambda, real_t *vec);
546
547/// Return the i'th singular value of the matrix of size dim with given @a data.
548template<int dim> MFEM_HOST_DEVICE
549real_t CalcSingularvalue(const real_t *data, const int i);
550
551
552// Utility functions for CalcEigenvalues and CalcSingularvalue
553namespace internal
554{
555
556/// Utility function to swap the values of @a a and @a b.
557template<typename T>
558MFEM_HOST_DEVICE static inline
559void Swap(T &a, T &b)
560{
561 T tmp = a;
562 a = b;
563 b = tmp;
564}
565
566const real_t Epsilon = std::numeric_limits<real_t>::epsilon();
567
568/// Utility function used in CalcSingularvalue<3>.
569MFEM_HOST_DEVICE static inline
570void Eigenvalues2S(const real_t &d12, real_t &d1, real_t &d2)
571{
572 const real_t sqrt_1_eps = sqrt(1./Epsilon);
573 if (d12 != 0.)
574 {
575 // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
576 real_t t;
577 const real_t zeta = (d2 - d1)/(2*d12); // inf/inf from overflows?
578 if (fabs(zeta) < sqrt_1_eps)
579 {
580 t = d12*copysign(1./(fabs(zeta) + sqrt(1. + zeta*zeta)), zeta);
581 }
582 else
583 {
584 t = d12*copysign(0.5/fabs(zeta), zeta);
585 }
586 d1 -= t;
587 d2 += t;
588 }
589}
590
591/// Utility function used in CalcEigenvalues().
592MFEM_HOST_DEVICE static inline
593void Eigensystem2S(const real_t &d12, real_t &d1, real_t &d2,
594 real_t &c, real_t &s)
595{
596 const real_t sqrt_1_eps = sqrt(1./Epsilon);
597 if (d12 == 0.0)
598 {
599 c = 1.;
600 s = 0.;
601 }
602 else
603 {
604 // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
605 real_t t;
606 const real_t zeta = (d2 - d1)/(2*d12);
607 const real_t azeta = fabs(zeta);
608 if (azeta < sqrt_1_eps)
609 {
610 t = copysign(1./(azeta + sqrt(1. + zeta*zeta)), zeta);
611 }
612 else
613 {
614 t = copysign(0.5/azeta, zeta);
615 }
616 c = sqrt(1./(1. + t*t));
617 s = c*t;
618 t *= d12;
619 d1 -= t;
620 d2 += t;
621 }
622}
623
624
625/// Utility function used in CalcEigenvalues<3>.
626MFEM_HOST_DEVICE static inline
627void GetScalingFactor(const real_t &d_max, real_t &mult)
628{
629 int d_exp;
630 if (d_max > 0.)
631 {
632 mult = frexp(d_max, &d_exp);
633 if (d_exp == std::numeric_limits<real_t>::max_exponent)
634 {
635 mult *= std::numeric_limits<real_t>::radix;
636 }
637 mult = d_max/mult;
638 }
639 else
640 {
641 mult = 1.;
642 }
643 // mult = 2^d_exp is such that d_max/mult is in [0.5,1) or in other words
644 // d_max is in the interval [0.5,1)*mult
645}
646
647/// Utility function used in CalcEigenvalues<3>.
648MFEM_HOST_DEVICE static inline
649bool KernelVector2G(const int &mode,
650 real_t &d1, real_t &d12, real_t &d21, real_t &d2)
651{
652 // Find a vector (z1,z2) in the "near"-kernel of the matrix
653 // | d1 d12 |
654 // | d21 d2 |
655 // using QR factorization.
656 // The vector (z1,z2) is returned in (d1,d2). Return 'true' if the matrix
657 // is zero without setting (d1,d2).
658 // Note: in the current implementation |z1| + |z2| = 1.
659
660 // l1-norms of the columns
661 real_t n1 = fabs(d1) + fabs(d21);
662 real_t n2 = fabs(d2) + fabs(d12);
663
664 bool swap_columns = (n2 > n1);
665 real_t mu;
666
667 if (!swap_columns)
668 {
669 if (n1 == 0.)
670 {
671 return true;
672 }
673
674 if (mode == 0) // eliminate the larger entry in the column
675 {
676 if (fabs(d1) > fabs(d21))
677 {
678 Swap(d1, d21);
679 Swap(d12, d2);
680 }
681 }
682 else // eliminate the smaller entry in the column
683 {
684 if (fabs(d1) < fabs(d21))
685 {
686 Swap(d1, d21);
687 Swap(d12, d2);
688 }
689 }
690 }
691 else
692 {
693 // n2 > n1, swap columns 1 and 2
694 if (mode == 0) // eliminate the larger entry in the column
695 {
696 if (fabs(d12) > fabs(d2))
697 {
698 Swap(d1, d2);
699 Swap(d12, d21);
700 }
701 else
702 {
703 Swap(d1, d12);
704 Swap(d21, d2);
705 }
706 }
707 else // eliminate the smaller entry in the column
708 {
709 if (fabs(d12) < fabs(d2))
710 {
711 Swap(d1, d2);
712 Swap(d12, d21);
713 }
714 else
715 {
716 Swap(d1, d12);
717 Swap(d21, d2);
718 }
719 }
720 }
721
722 n1 = hypot(d1, d21);
723
724 if (d21 != 0.)
725 {
726 // v = (n1, n2)^t, |v| = 1
727 // Q = I - 2 v v^t, Q (d1, d21)^t = (mu, 0)^t
728 mu = copysign(n1, d1);
729 n1 = -d21*(d21/(d1 + mu)); // = d1 - mu
730 d1 = mu;
731 // normalize (n1,d21) to avoid overflow/underflow
732 // normalize (n1,d21) by the max-norm to avoid the sqrt call
733 if (fabs(n1) <= fabs(d21))
734 {
735 // (n1,n2) <-- (n1/d21,1)
736 n1 = n1/d21;
737 mu = (2./(1. + n1*n1))*(n1*d12 + d2);
738 d2 = d2 - mu;
739 d12 = d12 - mu*n1;
740 }
741 else
742 {
743 // (n1,n2) <-- (1,d21/n1)
744 n2 = d21/n1;
745 mu = (2./(1. + n2*n2))*(d12 + n2*d2);
746 d2 = d2 - mu*n2;
747 d12 = d12 - mu;
748 }
749 }
750
751 // Solve:
752 // | d1 d12 | | z1 | = | 0 |
753 // | 0 d2 | | z2 | | 0 |
754
755 // choose (z1,z2) to minimize |d1*z1 + d12*z2| + |d2*z2|
756 // under the condition |z1| + |z2| = 1, z2 >= 0 (for uniqueness)
757 // set t = z1, z2 = 1 - |t|, -1 <= t <= 1
758 // objective function is:
759 // |d1*t + d12*(1 - |t|)| + |d2|*(1 - |t|) -- piecewise linear with
760 // possible minima are -1,0,1,t1 where t1: d1*t1 + d12*(1 - |t1|) = 0
761 // values: @t=+/-1 -> |d1|, @t=0 -> |n1| + |d2|, @t=t1 -> |d2|*(1 - |t1|)
762
763 // evaluate z2 @t=t1
764 mu = -d12/d1;
765 // note: |mu| <= 1, if using l2-norm for column pivoting
766 // |mu| <= sqrt(2), if using l1-norm
767 n2 = 1./(1. + fabs(mu));
768 // check if |d1|<=|d2|*z2
769 if (fabs(d1) <= n2*fabs(d2))
770 {
771 d2 = 0.;
772 d1 = 1.;
773 }
774 else
775 {
776 d2 = n2;
777 // d1 = (n2 < 0.5) ? copysign(1. - n2, mu) : mu*n2;
778 d1 = mu*n2;
779 }
780
781 if (swap_columns)
782 {
783 Swap(d1, d2);
784 }
785
786 return false;
787}
788
789/// Utility function used in CalcEigenvalues<3>.
790MFEM_HOST_DEVICE static inline
791void Vec_normalize3_aux(const real_t &x1, const real_t &x2,
792 const real_t &x3,
793 real_t &n1, real_t &n2, real_t &n3)
794{
795 real_t t, r;
796
797 const real_t m = fabs(x1);
798 r = x2/m;
799 t = 1. + r*r;
800 r = x3/m;
801 t = sqrt(1./(t + r*r));
802 n1 = copysign(t, x1);
803 t /= m;
804 n2 = x2*t;
805 n3 = x3*t;
806}
807
808/// Utility function used in CalcEigenvalues<3>.
809MFEM_HOST_DEVICE static inline
810void Vec_normalize3(const real_t &x1, const real_t &x2, const real_t &x3,
811 real_t &n1, real_t &n2, real_t &n3)
812{
813 // should work ok when xk is the same as nk for some or all k
814 if (fabs(x1) >= fabs(x2))
815 {
816 if (fabs(x1) >= fabs(x3))
817 {
818 if (x1 != 0.)
819 {
820 Vec_normalize3_aux(x1, x2, x3, n1, n2, n3);
821 }
822 else
823 {
824 n1 = n2 = n3 = 0.;
825 }
826 return;
827 }
828 }
829 else if (fabs(x2) >= fabs(x3))
830 {
831 Vec_normalize3_aux(x2, x1, x3, n2, n1, n3);
832 return;
833 }
834 Vec_normalize3_aux(x3, x1, x2, n3, n1, n2);
835}
836
837/// Utility function used in CalcEigenvalues<3>.
838MFEM_HOST_DEVICE static inline
839int KernelVector3G_aux(const int &mode,
840 real_t &d1, real_t &d2, real_t &d3,
841 real_t &c12, real_t &c13, real_t &c23,
842 real_t &c21, real_t &c31, real_t &c32)
843{
844 int kdim;
845 real_t mu, n1, n2, n3, s1, s2, s3;
846
847 s1 = hypot(c21, c31);
848 n1 = hypot(d1, s1);
849
850 if (s1 != 0.)
851 {
852 // v = (s1, s2, s3)^t, |v| = 1
853 // Q = I - 2 v v^t, Q (d1, c12, c13)^t = (mu, 0, 0)^t
854 mu = copysign(n1, d1);
855 n1 = -s1*(s1/(d1 + mu)); // = d1 - mu
856 d1 = mu;
857
858 // normalize (n1,c21,c31) to avoid overflow/underflow
859 // normalize (n1,c21,c31) by the max-norm to avoid the sqrt call
860 if (fabs(n1) >= fabs(c21))
861 {
862 if (fabs(n1) >= fabs(c31))
863 {
864 // n1 is max, (s1,s2,s3) <-- (1,c21/n1,c31/n1)
865 s2 = c21/n1;
866 s3 = c31/n1;
867 mu = 2./(1. + s2*s2 + s3*s3);
868 n2 = mu*(c12 + s2*d2 + s3*c32);
869 n3 = mu*(c13 + s2*c23 + s3*d3);
870 c12 = c12 - n2;
871 d2 = d2 - s2*n2;
872 c32 = c32 - s3*n2;
873 c13 = c13 - n3;
874 c23 = c23 - s2*n3;
875 d3 = d3 - s3*n3;
876 goto done_column_1;
877 }
878 }
879 else if (fabs(c21) >= fabs(c31))
880 {
881 // c21 is max, (s1,s2,s3) <-- (n1/c21,1,c31/c21)
882 s1 = n1/c21;
883 s3 = c31/c21;
884 mu = 2./(1. + s1*s1 + s3*s3);
885 n2 = mu*(s1*c12 + d2 + s3*c32);
886 n3 = mu*(s1*c13 + c23 + s3*d3);
887 c12 = c12 - s1*n2;
888 d2 = d2 - n2;
889 c32 = c32 - s3*n2;
890 c13 = c13 - s1*n3;
891 c23 = c23 - n3;
892 d3 = d3 - s3*n3;
893 goto done_column_1;
894 }
895 // c31 is max, (s1,s2,s3) <-- (n1/c31,c21/c31,1)
896 s1 = n1/c31;
897 s2 = c21/c31;
898 mu = 2./(1. + s1*s1 + s2*s2);
899 n2 = mu*(s1*c12 + s2*d2 + c32);
900 n3 = mu*(s1*c13 + s2*c23 + d3);
901 c12 = c12 - s1*n2;
902 d2 = d2 - s2*n2;
903 c32 = c32 - n2;
904 c13 = c13 - s1*n3;
905 c23 = c23 - s2*n3;
906 d3 = d3 - n3;
907 }
908
909done_column_1:
910
911 // Solve:
912 // | d2 c23 | | z2 | = | 0 |
913 // | c32 d3 | | z3 | | 0 |
914 if (KernelVector2G(mode, d2, c23, c32, d3))
915 {
916 // Have two solutions:
917 // two vectors in the kernel are P (-c12/d1, 1, 0)^t and
918 // P (-c13/d1, 0, 1)^t where P is the permutation matrix swapping
919 // entries 1 and col.
920
921 // A vector orthogonal to both these vectors is P (1, c12/d1, c13/d1)^t
922 d2 = c12/d1;
923 d3 = c13/d1;
924 d1 = 1.;
925 kdim = 2;
926 }
927 else
928 {
929 // solve for z1:
930 // note: |z1| <= a since |z2| + |z3| = 1, and
931 // max{|c12|,|c13|} <= max{norm(col. 2),norm(col. 3)}
932 // <= norm(col. 1) <= a |d1|
933 // a = 1, if using l2-norm for column pivoting
934 // a = sqrt(3), if using l1-norm
935 d1 = -(c12*d2 + c13*d3)/d1;
936 kdim = 1;
937 }
938
939 Vec_normalize3(d1, d2, d3, d1, d2, d3);
940
941 return kdim;
942}
943
944/// Utility function used in CalcEigenvalues<3>.
945MFEM_HOST_DEVICE static inline
946int KernelVector3S(const int &mode, const real_t &d12,
947 const real_t &d13, const real_t &d23,
948 real_t &d1, real_t &d2, real_t &d3)
949{
950 // Find a unit vector (z1,z2,z3) in the "near"-kernel of the matrix
951 // | d1 d12 d13 |
952 // | d12 d2 d23 |
953 // | d13 d23 d3 |
954 // using QR factorization.
955 // The vector (z1,z2,z3) is returned in (d1,d2,d3).
956 // Returns the dimension of the kernel, kdim, but never zero.
957 // - if kdim == 3, then (d1,d2,d3) is not defined,
958 // - if kdim == 2, then (d1,d2,d3) is a vector orthogonal to the kernel,
959 // - otherwise kdim == 1 and (d1,d2,d3) is a vector in the "near"-kernel.
960
961 real_t c12 = d12, c13 = d13, c23 = d23;
962 real_t c21, c31, c32;
963 int col, row;
964
965 // l1-norms of the columns:
966 c32 = fabs(d1) + fabs(c12) + fabs(c13);
967 c31 = fabs(d2) + fabs(c12) + fabs(c23);
968 c21 = fabs(d3) + fabs(c13) + fabs(c23);
969
970 // column pivoting: choose the column with the largest norm
971 if (c32 >= c21)
972 {
973 col = (c32 >= c31) ? 1 : 2;
974 }
975 else
976 {
977 col = (c31 >= c21) ? 2 : 3;
978 }
979 switch (col)
980 {
981 case 1:
982 if (c32 == 0.) // zero matrix
983 {
984 return 3;
985 }
986 break;
987
988 case 2:
989 if (c31 == 0.) // zero matrix
990 {
991 return 3;
992 }
993 Swap(c13, c23);
994 Swap(d1, d2);
995 break;
996
997 case 3:
998 if (c21 == 0.) // zero matrix
999 {
1000 return 3;
1001 }
1002 Swap(c12, c23);
1003 Swap(d1, d3);
1004 }
1005
1006 // row pivoting depending on 'mode'
1007 if (mode == 0)
1008 {
1009 if (fabs(d1) <= fabs(c13))
1010 {
1011 row = (fabs(d1) <= fabs(c12)) ? 1 : 2;
1012 }
1013 else
1014 {
1015 row = (fabs(c12) <= fabs(c13)) ? 2 : 3;
1016 }
1017 }
1018 else
1019 {
1020 if (fabs(d1) >= fabs(c13))
1021 {
1022 row = (fabs(d1) >= fabs(c12)) ? 1 : 2;
1023 }
1024 else
1025 {
1026 row = (fabs(c12) >= fabs(c13)) ? 2 : 3;
1027 }
1028 }
1029 switch (row)
1030 {
1031 case 1:
1032 c21 = c12;
1033 c31 = c13;
1034 c32 = c23;
1035 break;
1036
1037 case 2:
1038 c21 = d1;
1039 c31 = c13;
1040 c32 = c23;
1041 d1 = c12;
1042 c12 = d2;
1043 d2 = d1;
1044 c13 = c23;
1045 c23 = c31;
1046 break;
1047
1048 case 3:
1049 c21 = c12;
1050 c31 = d1;
1051 c32 = c12;
1052 d1 = c13;
1053 c12 = c23;
1054 c13 = d3;
1055 d3 = d1;
1056 }
1057 row = KernelVector3G_aux(mode, d1, d2, d3, c12, c13, c23, c21, c31, c32);
1058 // row is kdim
1059
1060 switch (col)
1061 {
1062 case 2:
1063 Swap(d1, d2);
1064 break;
1065
1066 case 3:
1067 Swap(d1, d3);
1068 }
1069 return row;
1070}
1071
1072/// Utility function used in CalcEigenvalues<3>.
1073MFEM_HOST_DEVICE static inline
1074int Reduce3S(const int &mode,
1075 real_t &d1, real_t &d2, real_t &d3,
1076 real_t &d12, real_t &d13, real_t &d23,
1077 real_t &z1, real_t &z2, real_t &z3,
1078 real_t &v1, real_t &v2, real_t &v3,
1079 real_t &g)
1080{
1081 // Given the matrix
1082 // | d1 d12 d13 |
1083 // A = | d12 d2 d23 |
1084 // | d13 d23 d3 |
1085 // and a unit eigenvector z=(z1,z2,z3), transform the matrix A into the
1086 // matrix B = Q P A P Q that has the form
1087 // | b1 0 0 |
1088 // B = Q P A P Q = | 0 b2 b23 |
1089 // | 0 b23 b3 |
1090 // where P is the permutation matrix switching entries 1 and k, and
1091 // Q is the reflection matrix Q = I - g v v^t, defined by: set y = P z and
1092 // v = c(y - e_1); if y = e_1, then v = 0 and Q = I.
1093 // Note: Q y = e_1, Q e_1 = y ==> Q P A P Q e_1 = ... = lambda e_1.
1094 // The entries (b1,b2,b3,b23) are returned in (d1,d2,d3,d23), and the
1095 // return value of the function is k. The variable g = 2/(v1^2+v2^2+v3^3).
1096
1097 int k;
1098 real_t s, w1, w2, w3;
1099
1100 if (mode == 0)
1101 {
1102 // choose k such that z^t e_k = zk has the smallest absolute value, i.e.
1103 // the angle between z and e_k is closest to pi/2
1104 if (fabs(z1) <= fabs(z3))
1105 {
1106 k = (fabs(z1) <= fabs(z2)) ? 1 : 2;
1107 }
1108 else
1109 {
1110 k = (fabs(z2) <= fabs(z3)) ? 2 : 3;
1111 }
1112 }
1113 else
1114 {
1115 // choose k such that zk is the largest by absolute value
1116 if (fabs(z1) >= fabs(z3))
1117 {
1118 k = (fabs(z1) >= fabs(z2)) ? 1 : 2;
1119 }
1120 else
1121 {
1122 k = (fabs(z2) >= fabs(z3)) ? 2 : 3;
1123 }
1124 }
1125 switch (k)
1126 {
1127 case 2:
1128 Swap(d13, d23);
1129 Swap(d1, d2);
1130 Swap(z1, z2);
1131 break;
1132
1133 case 3:
1134 Swap(d12, d23);
1135 Swap(d1, d3);
1136 Swap(z1, z3);
1137 }
1138
1139 s = hypot(z2, z3);
1140
1141 if (s == 0.)
1142 {
1143 // s can not be zero, if zk is the smallest (mode == 0)
1144 v1 = v2 = v3 = 0.;
1145 g = 1.;
1146 }
1147 else
1148 {
1149 g = copysign(1., z1);
1150 v1 = -s*(s/(z1 + g)); // = z1 - g
1151 // normalize (v1,z2,z3) by its max-norm, avoiding the sqrt call
1152 g = fabs(v1);
1153 if (fabs(z2) > g) { g = fabs(z2); }
1154 if (fabs(z3) > g) { g = fabs(z3); }
1155 v1 = v1/g;
1156 v2 = z2/g;
1157 v3 = z3/g;
1158 g = 2./(v1*v1 + v2*v2 + v3*v3);
1159
1160 // Compute Q A Q = A - v w^t - w v^t, where
1161 // w = u - (g/2)(v^t u) v, and u = g A v
1162 // set w = g A v
1163 w1 = g*( d1*v1 + d12*v2 + d13*v3);
1164 w2 = g*(d12*v1 + d2*v2 + d23*v3);
1165 w3 = g*(d13*v1 + d23*v2 + d3*v3);
1166 // w := w - (g/2)(v^t w) v
1167 s = (g/2)*(v1*w1 + v2*w2 + v3*w3);
1168 w1 -= s*v1;
1169 w2 -= s*v2;
1170 w3 -= s*v3;
1171 // dij -= vi*wj + wi*vj
1172 d1 -= 2*v1*w1;
1173 d2 -= 2*v2*w2;
1174 d23 -= v2*w3 + v3*w2;
1175 d3 -= 2*v3*w3;
1176 // compute the off-diagonal entries on the first row/column of B which
1177 // should be zero (for debugging):
1178#if 0
1179 s = d12 - v1*w2 - v2*w1; // b12 = 0
1180 s = d13 - v1*w3 - v3*w1; // b13 = 0
1181#endif
1182 }
1183
1184 switch (k)
1185 {
1186 case 2:
1187 Swap(z1, z2);
1188 break;
1189 case 3:
1190 Swap(z1, z3);
1191 }
1192 return k;
1193}
1194
1195} // namespace kernels::internal
1196
1197// Implementations of CalcLeftInverse for dim = 1, 2.
1198
1199template<> MFEM_HOST_DEVICE inline
1200void CalcLeftInverse<2,1>(const real_t *d, real_t *left_inv)
1201{
1202 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1]);
1203 left_inv[0] = d[0] * t;
1204 left_inv[1] = d[1] * t;
1205}
1206
1207template<> MFEM_HOST_DEVICE inline
1208void CalcLeftInverse<3,1>(const real_t *d, real_t *left_inv)
1209{
1210 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1] + d[2]*d[2]);
1211 left_inv[0] = d[0] * t;
1212 left_inv[1] = d[1] * t;
1213 left_inv[2] = d[2] * t;
1214}
1215
1216template<> MFEM_HOST_DEVICE inline
1217void CalcLeftInverse<3,2>(const real_t *d, real_t *left_inv)
1218{
1219 real_t e = d[0]*d[0] + d[1]*d[1] + d[2]*d[2];
1220 real_t g = d[3]*d[3] + d[4]*d[4] + d[5]*d[5];
1221 real_t f = d[0]*d[3] + d[1]*d[4] + d[2]*d[5];
1222 const real_t t = 1.0 / (e*g - f*f);
1223 e *= t; g *= t; f *= t;
1224
1225 left_inv[0] = d[0]*g - d[3]*f;
1226 left_inv[1] = d[3]*e - d[0]*f;
1227 left_inv[2] = d[1]*g - d[4]*f;
1228 left_inv[3] = d[4]*e - d[1]*f;
1229 left_inv[4] = d[2]*g - d[5]*f;
1230 left_inv[5] = d[5]*e - d[2]*f;
1231}
1232
1233// Implementations of CalcEigenvalues and CalcSingularvalue for dim = 2, 3.
1234
1235/// Compute the spectrum of the matrix of size 2 with given @a data, returning
1236/// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1237/// vec (listed consecutively).
1238template<> MFEM_HOST_DEVICE inline
1239void CalcEigenvalues<2>(const real_t *data, real_t *lambda, real_t *vec)
1240{
1241 real_t d0 = data[0];
1242 real_t d2 = data[2]; // use the upper triangular entry
1243 real_t d3 = data[3];
1244 real_t c, s;
1245 internal::Eigensystem2S(d2, d0, d3, c, s);
1246 if (d0 <= d3)
1247 {
1248 lambda[0] = d0;
1249 lambda[1] = d3;
1250 vec[0] = c;
1251 vec[1] = -s;
1252 vec[2] = s;
1253 vec[3] = c;
1254 }
1255 else
1256 {
1257 lambda[0] = d3;
1258 lambda[1] = d0;
1259 vec[0] = s;
1260 vec[1] = c;
1261 vec[2] = c;
1262 vec[3] = -s;
1263 }
1264}
1265
1266/// Compute the spectrum of the matrix of size 3 with given @a data, returning
1267/// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1268/// vec (listed consecutively).
1269template<> MFEM_HOST_DEVICE inline
1270void CalcEigenvalues<3>(const real_t *data, real_t *lambda, real_t *vec)
1271{
1272 real_t d11 = data[0];
1273 real_t d12 = data[3]; // use the upper triangular entries
1274 real_t d22 = data[4];
1275 real_t d13 = data[6];
1276 real_t d23 = data[7];
1277 real_t d33 = data[8];
1278
1279 real_t mult;
1280 {
1281 real_t d_max = fabs(d11);
1282 if (d_max < fabs(d22)) { d_max = fabs(d22); }
1283 if (d_max < fabs(d33)) { d_max = fabs(d33); }
1284 if (d_max < fabs(d12)) { d_max = fabs(d12); }
1285 if (d_max < fabs(d13)) { d_max = fabs(d13); }
1286 if (d_max < fabs(d23)) { d_max = fabs(d23); }
1287
1288 internal::GetScalingFactor(d_max, mult);
1289 }
1290
1291 d11 /= mult; d22 /= mult; d33 /= mult;
1292 d12 /= mult; d13 /= mult; d23 /= mult;
1293
1294 real_t aa = (d11 + d22 + d33)/3; // aa = tr(A)/3
1295 real_t c1 = d11 - aa;
1296 real_t c2 = d22 - aa;
1297 real_t c3 = d33 - aa;
1298
1299 real_t Q, R;
1300
1301 Q = (2*(d12*d12 + d13*d13 + d23*d23) + c1*c1 + c2*c2 + c3*c3)/6;
1302 R = (c1*(d23*d23 - c2*c3)+ d12*(d12*c3 - 2*d13*d23) + d13*d13*c2)/2;
1303
1304 if (Q <= 0.)
1305 {
1306 lambda[0] = lambda[1] = lambda[2] = aa;
1307 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1308 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1309 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1310 }
1311 else
1312 {
1313 real_t sqrtQ = sqrt(Q);
1314 real_t sqrtQ3 = Q*sqrtQ;
1315 // real_t sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1316 // real_t sqrtQ3 = pow(Q, 1.5);
1317 real_t r;
1318 if (fabs(R) >= sqrtQ3)
1319 {
1320 if (R < 0.)
1321 {
1322 // R = -1.;
1323 r = 2*sqrtQ;
1324 }
1325 else
1326 {
1327 // R = 1.;
1328 r = -2*sqrtQ;
1329 }
1330 }
1331 else
1332 {
1333 R = R/sqrtQ3;
1334
1335 if (R < 0.)
1336 {
1337 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1338 }
1339 else
1340 {
1341 r = -2*sqrtQ*cos(acos(R)/3); // min
1342 }
1343 }
1344
1345 aa += r;
1346 c1 = d11 - aa;
1347 c2 = d22 - aa;
1348 c3 = d33 - aa;
1349
1350 // Type of Householder reflections: z --> mu ek, where k is the index
1351 // of the entry in z with:
1352 // mode == 0: smallest absolute value --> angle closest to pi/2
1353 // mode == 1: largest absolute value --> angle farthest from pi/2
1354 // Observations:
1355 // mode == 0 produces better eigenvectors, less accurate eigenvalues?
1356 // mode == 1 produces better eigenvalues, less accurate eigenvectors?
1357 const int mode = 0;
1358
1359 // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1360 // | c1 d12 d13 |
1361 // | d12 c2 d23 | = A - aa*I
1362 // | d13 d23 c3 |
1363 // This vector is also an eigenvector for A corresponding to aa.
1364 // The vector z overwrites (c1,c2,c3).
1365 switch (internal::KernelVector3S(mode, d12, d13, d23, c1, c2, c3))
1366 {
1367 case 3:
1368 // 'aa' is a triple eigenvalue
1369 lambda[0] = lambda[1] = lambda[2] = aa;
1370 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1371 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1372 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1373 goto done_3d;
1374
1375 case 2:
1376 // ok, continue with the returned vector orthogonal to the kernel
1377 case 1:
1378 // ok, continue with the returned vector in the "near"-kernel
1379 ;
1380 }
1381
1382 // Using the eigenvector c=(c1,c2,c3) transform A into
1383 // | d11 0 0 |
1384 // A <-- Q P A P Q = | 0 d22 d23 |
1385 // | 0 d23 d33 |
1386 real_t v1, v2, v3, g;
1387 int k = internal::Reduce3S(mode, d11, d22, d33, d12, d13, d23,
1388 c1, c2, c3, v1, v2, v3, g);
1389 // Q = I - 2 v v^t
1390 // P - permutation matrix switching entries 1 and k
1391
1392 // find the eigenvalues and eigenvectors for
1393 // | d22 d23 |
1394 // | d23 d33 |
1395 real_t c, s;
1396 internal::Eigensystem2S(d23, d22, d33, c, s);
1397 // d22 <-> P Q (0, c, -s), d33 <-> P Q (0, s, c)
1398
1399 real_t *vec_1, *vec_2, *vec_3;
1400 if (d11 <= d22)
1401 {
1402 if (d22 <= d33)
1403 {
1404 lambda[0] = d11; vec_1 = vec;
1405 lambda[1] = d22; vec_2 = vec + 3;
1406 lambda[2] = d33; vec_3 = vec + 6;
1407 }
1408 else if (d11 <= d33)
1409 {
1410 lambda[0] = d11; vec_1 = vec;
1411 lambda[1] = d33; vec_3 = vec + 3;
1412 lambda[2] = d22; vec_2 = vec + 6;
1413 }
1414 else
1415 {
1416 lambda[0] = d33; vec_3 = vec;
1417 lambda[1] = d11; vec_1 = vec + 3;
1418 lambda[2] = d22; vec_2 = vec + 6;
1419 }
1420 }
1421 else
1422 {
1423 if (d11 <= d33)
1424 {
1425 lambda[0] = d22; vec_2 = vec;
1426 lambda[1] = d11; vec_1 = vec + 3;
1427 lambda[2] = d33; vec_3 = vec + 6;
1428 }
1429 else if (d22 <= d33)
1430 {
1431 lambda[0] = d22; vec_2 = vec;
1432 lambda[1] = d33; vec_3 = vec + 3;
1433 lambda[2] = d11; vec_1 = vec + 6;
1434 }
1435 else
1436 {
1437 lambda[0] = d33; vec_3 = vec;
1438 lambda[1] = d22; vec_2 = vec + 3;
1439 lambda[2] = d11; vec_1 = vec + 6;
1440 }
1441 }
1442
1443 vec_1[0] = c1;
1444 vec_1[1] = c2;
1445 vec_1[2] = c3;
1446 d22 = g*(v2*c - v3*s);
1447 d33 = g*(v2*s + v3*c);
1448 vec_2[0] = - v1*d22; vec_3[0] = - v1*d33;
1449 vec_2[1] = c - v2*d22; vec_3[1] = s - v2*d33;
1450 vec_2[2] = -s - v3*d22; vec_3[2] = c - v3*d33;
1451 switch (k)
1452 {
1453 case 2:
1454 internal::Swap(vec_2[0], vec_2[1]);
1455 internal::Swap(vec_3[0], vec_3[1]);
1456 break;
1457
1458 case 3:
1459 internal::Swap(vec_2[0], vec_2[2]);
1460 internal::Swap(vec_3[0], vec_3[2]);
1461 }
1462 }
1463
1464done_3d:
1465 lambda[0] *= mult;
1466 lambda[1] *= mult;
1467 lambda[2] *= mult;
1468}
1469
1470/// Return the i'th singular value of the matrix of size 2 with given @a data.
1471template<> MFEM_HOST_DEVICE inline
1472real_t CalcSingularvalue<2>(const real_t *data, const int i)
1473{
1474 real_t d0, d1, d2, d3;
1475 d0 = data[0];
1476 d1 = data[1];
1477 d2 = data[2];
1478 d3 = data[3];
1479 real_t mult;
1480
1481 {
1482 real_t d_max = fabs(d0);
1483 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1484 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1485 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1486 internal::GetScalingFactor(d_max, mult);
1487 }
1488
1489 d0 /= mult;
1490 d1 /= mult;
1491 d2 /= mult;
1492 d3 /= mult;
1493
1494 real_t t = 0.5*((d0+d2)*(d0-d2)+(d1-d3)*(d1+d3));
1495 real_t s = d0*d2 + d1*d3;
1496 s = sqrt(0.5*(d0*d0 + d1*d1 + d2*d2 + d3*d3) + sqrt(t*t + s*s));
1497
1498 if (s == 0.0)
1499 {
1500 return 0.0;
1501 }
1502 t = fabs(d0*d3 - d1*d2) / s;
1503 if (t > s)
1504 {
1505 if (i == 0)
1506 {
1507 return t*mult;
1508 }
1509 return s*mult;
1510 }
1511 if (i == 0)
1512 {
1513 return s*mult;
1514 }
1515 return t*mult;
1516}
1517
1518/// Return the i'th singular value of the matrix of size 3 with given @a data.
1519template<> MFEM_HOST_DEVICE inline
1520real_t CalcSingularvalue<3>(const real_t *data, const int i)
1521{
1522 real_t d0, d1, d2, d3, d4, d5, d6, d7, d8;
1523 d0 = data[0]; d3 = data[3]; d6 = data[6];
1524 d1 = data[1]; d4 = data[4]; d7 = data[7];
1525 d2 = data[2]; d5 = data[5]; d8 = data[8];
1526 real_t mult;
1527 {
1528 real_t d_max = fabs(d0);
1529 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1530 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1531 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1532 if (d_max < fabs(d4)) { d_max = fabs(d4); }
1533 if (d_max < fabs(d5)) { d_max = fabs(d5); }
1534 if (d_max < fabs(d6)) { d_max = fabs(d6); }
1535 if (d_max < fabs(d7)) { d_max = fabs(d7); }
1536 if (d_max < fabs(d8)) { d_max = fabs(d8); }
1537 internal::GetScalingFactor(d_max, mult);
1538 }
1539
1540 d0 /= mult; d1 /= mult; d2 /= mult;
1541 d3 /= mult; d4 /= mult; d5 /= mult;
1542 d6 /= mult; d7 /= mult; d8 /= mult;
1543
1544 real_t b11 = d0*d0 + d1*d1 + d2*d2;
1545 real_t b12 = d0*d3 + d1*d4 + d2*d5;
1546 real_t b13 = d0*d6 + d1*d7 + d2*d8;
1547 real_t b22 = d3*d3 + d4*d4 + d5*d5;
1548 real_t b23 = d3*d6 + d4*d7 + d5*d8;
1549 real_t b33 = d6*d6 + d7*d7 + d8*d8;
1550
1551 // double a, b, c;
1552 // a = -(b11 + b22 + b33);
1553 // b = b11*(b22 + b33) + b22*b33 - b12*b12 - b13*b13 - b23*b23;
1554 // c = b11*(b23*b23 - b22*b33) + b12*(b12*b33 - 2*b13*b23) + b13*b13*b22;
1555
1556 // double Q = (a * a - 3 * b) / 9;
1557 // double Q = (b12*b12 + b13*b13 + b23*b23 +
1558 // ((b11 - b22)*(b11 - b22) +
1559 // (b11 - b33)*(b11 - b33) +
1560 // (b22 - b33)*(b22 - b33))/6)/3;
1561 // Q = (3*(b12^2 + b13^2 + b23^2) +
1562 // ((b11 - b22)^2 + (b11 - b33)^2 + (b22 - b33)^2)/2)/9
1563 // or
1564 // Q = (1/6)*|B-tr(B)/3|_F^2
1565 // Q >= 0 and
1566 // Q = 0 <==> B = scalar * I
1567 // double R = (2 * a * a * a - 9 * a * b + 27 * c) / 54;
1568 real_t aa = (b11 + b22 + b33)/3; // aa = tr(B)/3
1569 real_t c1, c2, c3;
1570 // c1 = b11 - aa; // ((b11 - b22) + (b11 - b33))/3
1571 // c2 = b22 - aa; // ((b22 - b11) + (b22 - b33))/3
1572 // c3 = b33 - aa; // ((b33 - b11) + (b33 - b22))/3
1573 {
1574 real_t b11_b22 = ((d0-d3)*(d0+d3)+(d1-d4)*(d1+d4)+(d2-d5)*(d2+d5));
1575 real_t b22_b33 = ((d3-d6)*(d3+d6)+(d4-d7)*(d4+d7)+(d5-d8)*(d5+d8));
1576 real_t b33_b11 = ((d6-d0)*(d6+d0)+(d7-d1)*(d7+d1)+(d8-d2)*(d8+d2));
1577 c1 = (b11_b22 - b33_b11)/3;
1578 c2 = (b22_b33 - b11_b22)/3;
1579 c3 = (b33_b11 - b22_b33)/3;
1580 }
1581 real_t Q, R;
1582 Q = (2*(b12*b12 + b13*b13 + b23*b23) + c1*c1 + c2*c2 + c3*c3)/6;
1583 R = (c1*(b23*b23 - c2*c3)+ b12*(b12*c3 - 2*b13*b23) +b13*b13*c2)/2;
1584 // R = (-1/2)*det(B-(tr(B)/3)*I)
1585 // Note: 54*(det(S))^2 <= |S|_F^6, when S^t=S and tr(S)=0, S is 3x3
1586 // Therefore: R^2 <= Q^3
1587
1588 if (Q <= 0.) { ; }
1589
1590 // else if (fabs(R) >= sqrtQ3)
1591 // {
1592 // double det = (d[0] * (d[4] * d[8] - d[5] * d[7]) +
1593 // d[3] * (d[2] * d[7] - d[1] * d[8]) +
1594 // d[6] * (d[1] * d[5] - d[2] * d[4]));
1595 //
1596 // if (R > 0.)
1597 // {
1598 // if (i == 2)
1599 // // aa -= 2*sqrtQ;
1600 // return fabs(det)/(aa + sqrtQ);
1601 // else
1602 // aa += sqrtQ;
1603 // }
1604 // else
1605 // {
1606 // if (i != 0)
1607 // aa -= sqrtQ;
1608 // // aa = fabs(det)/sqrt(aa + 2*sqrtQ);
1609 // else
1610 // aa += 2*sqrtQ;
1611 // }
1612 // }
1613
1614 else
1615 {
1616 real_t sqrtQ = sqrt(Q);
1617 real_t sqrtQ3 = Q*sqrtQ;
1618 // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1619 // double sqrtQ3 = pow(Q, 1.5);
1620 real_t r;
1621
1622 if (fabs(R) >= sqrtQ3)
1623 {
1624 if (R < 0.)
1625 {
1626 // R = -1.;
1627 r = 2*sqrtQ;
1628 }
1629 else
1630 {
1631 // R = 1.;
1632 r = -2*sqrtQ;
1633 }
1634 }
1635 else
1636 {
1637 R = R/sqrtQ3;
1638
1639 // if (fabs(R) <= 0.95)
1640 if (fabs(R) <= 0.9)
1641 {
1642 if (i == 2)
1643 {
1644 aa -= 2*sqrtQ*cos(acos(R)/3); // min
1645 }
1646 else if (i == 0)
1647 {
1648 aa -= 2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1649 }
1650 else
1651 {
1652 aa -= 2*sqrtQ*cos((acos(R) - 2.0*M_PI)/3); // mid
1653 }
1654 goto have_aa;
1655 }
1656
1657 if (R < 0.)
1658 {
1659 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1660 if (i == 0)
1661 {
1662 aa += r;
1663 goto have_aa;
1664 }
1665 }
1666 else
1667 {
1668 r = -2*sqrtQ*cos(acos(R)/3); // min
1669 if (i == 2)
1670 {
1671 aa += r;
1672 goto have_aa;
1673 }
1674 }
1675 }
1676
1677 // (tr(B)/3 + r) is the root which is separated from the other
1678 // two roots which are close to each other when |R| is close to 1
1679
1680 c1 -= r;
1681 c2 -= r;
1682 c3 -= r;
1683 // aa += r;
1684
1685 // Type of Householder reflections: z --> mu ek, where k is the index
1686 // of the entry in z with:
1687 // mode == 0: smallest absolute value --> angle closest to pi/2
1688 // (eliminate large entries)
1689 // mode == 1: largest absolute value --> angle farthest from pi/2
1690 // (eliminate small entries)
1691 const int mode = 1;
1692
1693 // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1694 // | c1 b12 b13 |
1695 // | b12 c2 b23 | = B - aa*I
1696 // | b13 b23 c3 |
1697 // This vector is also an eigenvector for B corresponding to aa
1698 // The vector z overwrites (c1,c2,c3).
1699 switch (internal::KernelVector3S(mode, b12, b13, b23, c1, c2, c3))
1700 {
1701 case 3:
1702 aa += r;
1703 goto have_aa;
1704 case 2:
1705 // ok, continue with the returned vector orthogonal to the kernel
1706 case 1:
1707 // ok, continue with the returned vector in the "near"-kernel
1708 ;
1709 }
1710
1711 // Using the eigenvector c = (c1,c2,c3) to transform B into
1712 // | b11 0 0 |
1713 // B <-- Q P B P Q = | 0 b22 b23 |
1714 // | 0 b23 b33 |
1715 real_t v1, v2, v3, g;
1716 internal::Reduce3S(mode, b11, b22, b33, b12, b13, b23,
1717 c1, c2, c3, v1, v2, v3, g);
1718 // Q = I - g v v^t
1719 // P - permutation matrix switching rows and columns 1 and k
1720
1721 // find the eigenvalues of
1722 // | b22 b23 |
1723 // | b23 b33 |
1724 internal::Eigenvalues2S(b23, b22, b33);
1725
1726 if (i == 2)
1727 {
1728 aa = fmin(fmin(b11, b22), b33);
1729 }
1730 else if (i == 1)
1731 {
1732 if (b11 <= b22)
1733 {
1734 aa = (b22 <= b33) ? b22 : fmax(b11, b33);
1735 }
1736 else
1737 {
1738 aa = (b11 <= b33) ? b11 : fmax(b33, b22);
1739 }
1740 }
1741 else
1742 {
1743 aa = fmax(fmax(b11, b22), b33);
1744 }
1745 }
1746
1747have_aa:
1748
1749 return sqrt(fabs(aa))*mult; // take abs before we sort?
1750}
1751
1752/// @brief Assuming L.U = P.A factored matrix of size (m x m), compute
1753/// X <- L^{-1} P X, for a vector X of length m.
1754//
1755// @param [in] data LU factorization of A
1756// @param [in] m square matrix height
1757// @param [in] ipiv array storing pivots
1758// @param [in, out] x vector storing right-hand side and then solution
1759MFEM_HOST_DEVICE
1760inline void LSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
1761{
1762 // X <- P X
1763 for (int i = 0; i < m; i++)
1764 {
1765 internal::Swap<real_t>(x[i], x[ipiv[i] - 1]);
1766 }
1767 // X <- L^{-1} X
1768 for (int j = 0; j < m; j++)
1769 {
1770 const real_t x_j = x[j];
1771 for (int i = j + 1; i < m; i++)
1772 {
1773 x[i] -= data[i + j * m] * x_j;
1774 }
1775 }
1776}
1777
1778/// @brief Assuming L.U = P.A factored matrix of size (m x m), compute
1779/// X <- U^{-1} X, for a vector X of length m.
1780//
1781// @param [in] data LU factorization of A
1782// @param [in] m square matrix height
1783// @param [in, out] x vector storing right-hand side and then solution
1784MFEM_HOST_DEVICE
1785inline void USolve(const real_t *data, const int m, real_t *x)
1786{
1787 for (int j = m - 1; j >= 0; j--)
1788 {
1789 const real_t x_j = (x[j] /= data[j + j * m]);
1790 for (int i = 0; i < j; i++)
1791 {
1792 x[i] -= data[i + j * m] * x_j;
1793 }
1794 }
1795}
1796
1797/// @brief Assuming L.U = P.A for a factored matrix (m x m),
1798// compute x <- A x
1799//
1800// @param [in] data LU factorization of A
1801// @param [in] m square matrix height
1802// @param [in] ipiv array storing pivot information
1803// @param [in, out] x vector storing right-hand side and then solution
1804MFEM_HOST_DEVICE
1805inline void LUSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
1806{
1807 LSolve(data, m, ipiv, x);
1808 USolve(data, m, x);
1809}
1810
1811
1812/// @brief Given an (n x m) matrix A21, compute X2 <- X2 - A21 X1, for matrices
1813/// X1, and X2 of size (m x r) and (n x r), respectively.
1814MFEM_HOST_DEVICE
1815inline void SubMult(const int m, const int n, const int r, const real_t *A21,
1816 const real_t *X1, real_t *X2)
1817{
1818 // X2 <- X2 - A21 X1
1819 for (int k = 0; k < r; k++)
1820 {
1821 for (int j = 0; j < m; j++)
1822 {
1823 const real_t x1_jk = X1[j+k*m];
1824 for (int i = 0; i < n; i++)
1825 {
1826 X2[i+k*n] -= A21[i+j*n] * x1_jk;
1827 }
1828 }
1829 }
1830}
1831
1832/// Assuming P.A = L.U factored data of size (m x m), compute the 2x2 block
1833/// decomposition:
1834/// | P 0 | | A A12 | = | L 0 | | U U12 |
1835/// | 0 I | | A21 A22 | | L21 I | | 0 S22 |
1836/// where A12, A21, and A22 are matrices of size (m x n), (n x m), and
1837/// (n x n), respectively. The blocks are overwritten as follows:
1838/// A12 <- U12 = L^{-1} P A12
1839/// A21 <- L21 = A21 U^{-1}
1840/// A22 <- S22 = A22 - L21 U12.
1841/// The block S22 is the Schur complement.
1842MFEM_HOST_DEVICE
1843inline void BlockFactor(const real_t *data, int m, const int *ipiv,
1844 int n, real_t *A12, real_t *A21, real_t *A22)
1845{
1846 // A12 <- L^{-1} P A12
1847 for (int i = 0; i < n; ++i)
1848 {
1849 LSolve(data, m, ipiv, A12 + i*m);
1850 }
1851 // A21 <- A21 U^{-1}
1852 for (int j = 0; j < m; j++)
1853 {
1854 const real_t u_jj_inv = 1.0/data[j+j*m];
1855 for (int i = 0; i < n; i++)
1856 {
1857 A21[i+j*n] *= u_jj_inv;
1858 }
1859 for (int k = j+1; k < m; k++)
1860 {
1861 const real_t u_jk = data[j+k*m];
1862 for (int i = 0; i < n; i++)
1863 {
1864 A21[i+k*n] -= A21[i+j*n] * u_jk;
1865 }
1866 }
1867 }
1868 // A22 <- A22 - A21 A12
1869 SubMult(m, n, n, A21, A12, A22);
1870}
1871
1872/// @brief Compute the LU factorization of the m x m matrix @a A.
1873///
1874/// Factorize the matrix of size (m x m) overwriting it with the LU factors. The
1875/// factorization is such that L.U = P.A, where A is the original matrix and P
1876/// is a permutation matrix represented by ipiv.
1877///
1878/// @param [in, out] A matrix
1879/// @param [in] m size of the square matrix
1880/// @param [out] ipiv array of pivots (length m)
1881/// @param [in] tol optional fuzzy comparison tolerance. Defaults to 0.0.
1882///
1883/// @return true if the factorization succeeds, false otherwise (zero pivot).
1884MFEM_HOST_DEVICE
1885inline bool LUFactor(real_t *A, const int m, int *ipiv, const real_t tol=0.0)
1886{
1887 bool pivot_flag = true;
1888
1889 for (int i = 0; i < m; i++)
1890 {
1891 // pivoting
1892 {
1893 int piv = i;
1894 real_t a = fabs(A[piv + m*i]);
1895 for (int j = i+1; j < m; j++)
1896 {
1897 const real_t b = fabs(A[j + m*i]);
1898 if (b > a)
1899 {
1900 a = b;
1901 piv = j;
1902 }
1903 }
1904 ipiv[i] = piv + 1;
1905 if (piv != i)
1906 {
1907 // swap rows i and piv in both L and U parts
1908 for (int j = 0; j < m; j++)
1909 {
1910 internal::Swap<real_t>(A[i + m*j], A[piv + m*j]);
1911 }
1912 }
1913 } // pivot end
1914
1915 if (fabs(A[i + m*i]) <= tol)
1916 {
1917 pivot_flag = false;
1918 }
1919
1920 const real_t a_ii_inv = 1.0 / A[i + m*i];
1921 for (int j = i+1; j < m; j++)
1922 {
1923 A[j + m*i] *= a_ii_inv;
1924 }
1925
1926 for (int k = i+1; k < m; k++)
1927 {
1928 const real_t a_ik = A[i + m*k];
1929 for (int j = i+1; j < m; j++)
1930 {
1931 A[j + m*k] -= a_ik * A[j + m*i];
1932 }
1933 }
1934 }
1935
1936 return pivot_flag;
1937}
1938
1939} // namespace kernels
1940
1941} // namespace mfem
1942
1943#endif // MFEM_LINALG_KERNELS_HPP
const real_t alpha
Definition ex15.cpp:369
int dim
Definition ex24.cpp:53
real_t mu
Definition ex25.cpp:140
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
mfem::real_t real_t
MFEM_HOST_DEVICE dual< value_type, gradient_type > sqrt(dual< value_type, gradient_type > x)
implementation of square root for dual numbers
Definition dual.hpp:288
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data.
Definition kernels.hpp:306
MFEM_HOST_DEVICE real_t DistanceSquared(const real_t *x, const real_t *y)
Compute the square of the Euclidean distance to another vector.
Definition kernels.hpp:37
MFEM_HOST_DEVICE real_t CalcSingularvalue< 2 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 2 with given data.
Definition kernels.hpp:1472
MFEM_HOST_DEVICE void MultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *AtBdata)
Multiply the transpose of a matrix of size Aheight x Awidth and data Adata with a matrix of size Ahei...
Definition kernels.hpp:507
MFEM_HOST_DEVICE void CalcEigenvalues< 2 >(const real_t *data, real_t *lambda, real_t *vec)
Definition kernels.hpp:1239
MFEM_HOST_DEVICE void CalcLeftInverse(const real_t *data, real_t *left_inv)
Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
MFEM_HOST_DEVICE void CalcAdjugate(const T *data, T *adj_data)
Return the adjugate of a matrix.
Definition kernels.hpp:316
MFEM_HOST_DEVICE void Add(const int height, const int width, const TALPHA alpha, const TA *Adata, const TB *Bdata, TC *Cdata)
Compute C = A + alpha*B, where the matrices A, B and C are of size height x width with data Adata,...
Definition kernels.hpp:326
MFEM_HOST_DEVICE void AbsMult(const int height, const int width, const TA *data, const TX *x, TY *y)
Absolute-value matrix vector multiplication: y = |A| x, where the matrix A is of size height x width ...
Definition kernels.hpp:193
MFEM_HOST_DEVICE void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *Cdata, const TB alpha, const TA beta)
Compute C = alpha*At*B + beta*C.
Definition kernels.hpp:471
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data,...
Definition kernels.hpp:160
MFEM_HOST_DEVICE real_t Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition kernels.hpp:130
MFEM_HOST_DEVICE void FNorm(real_t &scale_factor, real_t &scaled_fnorm2, const T *data)
Definition kernels.hpp:76
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const real_t *d, real_t *left_inv)
Definition kernels.hpp:1200
MFEM_HOST_DEVICE void LSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- L^{-1} P X, for a vector X of length...
Definition kernels.hpp:1760
MFEM_HOST_DEVICE void MultABt(const int Aheight, const int Awidth, const int Bheight, const TA *Adata, const TB *Bdata, TC *ABtdata)
Multiply a matrix of size Aheight x Awidth and data Adata with the transpose of a matrix of size Bhei...
Definition kernels.hpp:443
MFEM_HOST_DEVICE void MultTranspose(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix transpose vector multiplication: y = At x, where the matrix A is of size height x width with g...
Definition kernels.hpp:227
MFEM_HOST_DEVICE void USolve(const real_t *data, const int m, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- U^{-1} X, for a vector X of length m...
Definition kernels.hpp:1785
MFEM_HOST_DEVICE void AbsMultTranspose(const int height, const int width, const TA *data, const TX *x, TY *y)
Absolute-value matrix transpose vector multiplication: y = |At| x, where the matrix A is of size heig...
Definition kernels.hpp:256
MFEM_HOST_DEVICE void AddMult(const int Aheight, const int Awidth, const int Bwidth, const TB *Bdata, const TC *Cdata, TA *Adata, const TB alpha, const TA beta)
Matrix-matrix multiplication: A = alpha * B * C + beta * A, where the matrices A, B and C are of size...
Definition kernels.hpp:401
MFEM_HOST_DEVICE real_t FNorm2(const T *data)
Compute the square of the Frobenius norm of the matrix.
Definition kernels.hpp:120
MFEM_HOST_DEVICE real_t CalcSingularvalue(const real_t *data, const int i)
Return the i'th singular value of the matrix of size dim with given data.
MFEM_HOST_DEVICE void Symmetrize(const int size, T *data)
Symmetrize a square matrix with given size and data: A -> (A+A^T)/2.
Definition kernels.hpp:283
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const real_t *d, real_t *left_inv)
Definition kernels.hpp:1217
MFEM_HOST_DEVICE void BlockFactor(const real_t *data, int m, const int *ipiv, int n, real_t *A12, real_t *A21, real_t *A22)
Definition kernels.hpp:1843
MFEM_HOST_DEVICE void AddMultVWt(const real_t *v, const real_t *w, real_t *VWt)
Dense matrix operation: VWt += v w^t.
Definition kernels.hpp:64
MFEM_HOST_DEVICE void Diag(const real_t c, real_t *data)
Creates n x n diagonal matrix with diagonal elements c.
Definition kernels.hpp:46
MFEM_HOST_DEVICE bool LUFactor(real_t *A, const int m, int *ipiv, const real_t tol=0.0)
Compute the LU factorization of the m x m matrix A.
Definition kernels.hpp:1885
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
Definition kernels.hpp:386
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const real_t *d, real_t *left_inv)
Definition kernels.hpp:1208
MFEM_HOST_DEVICE void CalcEigenvalues(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition kernels.hpp:297
MFEM_HOST_DEVICE real_t CalcSingularvalue< 3 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 3 with given data.
Definition kernels.hpp:1520
MFEM_HOST_DEVICE void CalcEigenvalues< 3 >(const real_t *data, real_t *lambda, real_t *vec)
Definition kernels.hpp:1270
MFEM_HOST_DEVICE void SubMult(const int m, const int n, const int r, const real_t *A21, const real_t *X1, real_t *X2)
Given an (n x m) matrix A21, compute X2 <- X2 - A21 X1, for matrices X1, and X2 of size (m x r) and (...
Definition kernels.hpp:1815
MFEM_HOST_DEVICE void Subtract(const real_t a, const real_t *x, const real_t *y, real_t *z)
Vector subtraction operation: z = a * (x - y)
Definition kernels.hpp:55
MFEM_HOST_DEVICE void LUSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A for a factored matrix (m x m),.
Definition kernels.hpp:1805
MFEM_HOST_DEVICE void TAssignHD(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
Definition ttensor.hpp:263
MFEM_HOST_DEVICE scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:663
void Swap(T &a, T &b)
Swap objects of type T. The operation is performed using the most specialized swap function from the ...
Definition array.hpp:738
MFEM_HOST_DEVICE scalar_t TDetHD(const layout_t &a, const data_t &A)
Definition tmatrix.hpp:587
float real_t
Definition config.hpp:46
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
MFEM_HOST_DEVICE void TAdjugateHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
Definition tmatrix.hpp:634