MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
kernels.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_KERNELS_HPP
13 #define MFEM_KERNELS_HPP
14 
15 #ifdef _WIN32
16 #define _USE_MATH_DEFINES
17 #include <cmath>
18 #endif
19 
20 #include "../config/config.hpp"
21 #include "../general/backends.hpp"
22 #include "../general/globals.hpp"
23 
24 #include "matrix.hpp"
25 #include "tmatrix.hpp"
26 #include "tlayout.hpp"
27 #include "ttensor.hpp"
28 
29 // This header contains stand-alone functions for "small" dense linear algebra
30 // (at quadrature point or element-level) designed to be inlined directly into
31 // device kernels.
32 
33 // Many methods of the DenseMatrix class and some of the Vector class call these
34 // kernels directly on the host, see the implementations in linalg/densemat.cpp
35 // and linalag.vector.cpp.
36 
37 namespace mfem
38 {
39 
40 namespace kernels
41 {
42 
43 /// Returns the l2 norm of the Vector with given @a size and @a data.
44 template<typename T>
45 MFEM_HOST_DEVICE inline
46 double Norml2(const int size, const T *data)
47 {
48  if (0 == size) { return 0.0; }
49  if (1 == size) { return std::abs(data[0]); }
50  T scale = 0.0;
51  T sum = 0.0;
52  for (int i = 0; i < size; i++)
53  {
54  if (data[i] != 0.0)
55  {
56  const T absdata = fabs(data[i]);
57  if (scale <= absdata)
58  {
59  const T sqr_arg = scale / absdata;
60  sum = 1.0 + sum * (sqr_arg * sqr_arg);
61  scale = absdata;
62  continue;
63  } // end if scale <= absdata
64  const T sqr_arg = absdata / scale;
65  sum += (sqr_arg * sqr_arg); // else scale > absdata
66  } // end if data[i] != 0
67  }
68  return scale * sqrt(sum);
69 }
70 
71 /** @brief Matrix vector multiplication: y = A x, where the matrix A is of size
72  @a height x @a width with given @a data, while @a x and @a y specify the
73  data of the input and output vectors. */
74 template<typename TA, typename TX, typename TY>
75 MFEM_HOST_DEVICE inline
76 void Mult(const int height, const int width, TA *data, const TX *x, TY *y)
77 {
78  if (width == 0)
79  {
80  for (int row = 0; row < height; row++)
81  {
82  y[row] = 0.0;
83  }
84  return;
85  }
86  TA *d_col = data;
87  TX x_col = x[0];
88  for (int row = 0; row < height; row++)
89  {
90  y[row] = x_col*d_col[row];
91  }
92  d_col += height;
93  for (int col = 1; col < width; col++)
94  {
95  x_col = x[col];
96  for (int row = 0; row < height; row++)
97  {
98  y[row] += x_col*d_col[row];
99  }
100  d_col += height;
101  }
102 }
103 
104 /// Symmetrize a square matrix with given @a size and @a data: A -> (A+A^T)/2.
105 template<typename T>
106 MFEM_HOST_DEVICE inline
107 void Symmetrize(const int size, T *data)
108 {
109  for (int i = 0; i < size; i++)
110  {
111  for (int j = 0; j < i; j++)
112  {
113  const T a = 0.5 * (data[i*size+j] + data[j*size+i]);
114  data[j*size+i] = data[i*size+j] = a;
115  }
116  }
117 }
118 
119 /// Compute the determinant of a square matrix of size dim with given @a data.
120 template<int dim, typename T>
121 MFEM_HOST_DEVICE inline T Det(const T *data)
122 {
123  return TDetHD<T>(ColumnMajorLayout2D<dim,dim>(), data);
124 }
125 
126 /** @brief Return the inverse a matrix with given @a size and @a data into the
127  matrix with data @a inv_data. */
128 template<int dim, typename T>
129 MFEM_HOST_DEVICE inline
130 void CalcInverse(const T *data, T *inv_data)
131 {
132  typedef ColumnMajorLayout2D<dim,dim> layout_t;
133  const T det = TAdjDetHD<T>(layout_t(), data, layout_t(), inv_data);
134  TAssignHD<AssignOp::Mult>(layout_t(), inv_data, static_cast<T>(1.0)/det);
135 }
136 
137 /** @brief Compute C = A + alpha*B, where the matrices A, B and C are of size @a
138  height x @a width with data @a Adata, @a Bdata and @a Cdata. */
139 template<typename TALPHA, typename TA, typename TB, typename TC>
140 MFEM_HOST_DEVICE inline
141 void Add(const int height, const int width, const TALPHA alpha,
142  const TA *Adata, const TB *Bdata, TC *Cdata)
143 {
144  for (int j = 0; j < width; j++)
145  {
146  for (int i = 0; i < height; i++)
147  {
148  const int n = i*width+j;
149  Cdata[n] = Adata[n] + alpha * Bdata[n];
150  }
151  }
152 }
153 
154 /** @brief Matrix-matrix multiplication: A = B * C, where the matrices A, B and
155  C are of sizes @a Aheight x @a Awidth, @a Aheight x @a Bwidth and @a Bwidth
156  x @a Awidth, respectively. */
157 template<typename TA, typename TB, typename TC>
158 MFEM_HOST_DEVICE inline
159 void Mult(const int Aheight, const int Awidth, const int Bwidth,
160  const TB *Bdata, const TC *Cdata, TA *Adata)
161 {
162  const int ah_x_aw = Aheight * Awidth;
163  for (int i = 0; i < ah_x_aw; i++) { Adata[i] = 0.0; }
164  for (int j = 0; j < Awidth; j++)
165  {
166  for (int k = 0; k < Bwidth; k++)
167  {
168  for (int i = 0; i < Aheight; i++)
169  {
170  Adata[i+j*Aheight] += Bdata[i+k*Aheight] * Cdata[k+j*Bwidth];
171  }
172  }
173  }
174 }
175 
176 /** @brief Multiply a matrix of size @a Aheight x @a Awidth and data @a Adata
177  with the transpose of a matrix of size @a Bheight x @a Awidth and data @a
178  Bdata: A * Bt. Return the result in a matrix with data @a ABtdata. */
179 template<typename TA, typename TB, typename TC>
180 MFEM_HOST_DEVICE inline
181 void MultABt(const int Aheight, const int Awidth, const int Bheight,
182  const TA *Adata, const TB *Bdata, TC *ABtdata)
183 {
184  const int ah_x_bh = Aheight * Bheight;
185  for (int i = 0; i < ah_x_bh; i++) { ABtdata[i] = 0.0; }
186  for (int k = 0; k < Awidth; k++)
187  {
188  TC *c = ABtdata;
189  for (int j = 0; j < Bheight; j++)
190  {
191  const double bjk = Bdata[j];
192  for (int i = 0; i < Aheight; i++)
193  {
194  c[i] += Adata[i] * bjk;
195  }
196  c += Aheight;
197  }
198  Adata += Aheight;
199  Bdata += Bheight;
200  }
201 }
202 
203 /// Compute the spectrum of the matrix of size dim with given @a data, returning
204 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
205 /// vec (listed consecutively).
206 template<int dim> MFEM_HOST_DEVICE
207 void CalcEigenvalues(const double *data, double *lambda, double *vec);
208 
209 /// Return the i'th singular value of the matrix of size dim with given @a data.
210 template<int dim> MFEM_HOST_DEVICE
211 double CalcSingularvalue(const double *data, const int i);
212 
213 
214 // Utility functions for CalcEigenvalues and CalcSingularvalue
215 namespace internal
216 {
217 
218 /// Utility function to swap the values of @a a and @a b.
219 template<typename T>
220 MFEM_HOST_DEVICE static inline
221 void Swap(T &a, T &b)
222 {
223  T tmp = a;
224  a = b;
225  b = tmp;
226 }
227 
228 const double Epsilon = std::numeric_limits<double>::epsilon();
229 
230 /// Utility function used in CalcSingularvalue<3>.
231 MFEM_HOST_DEVICE static inline
232 void Eigenvalues2S(const double &d12, double &d1, double &d2)
233 {
234  const double sqrt_1_eps = sqrt(1./Epsilon);
235  if (d12 != 0.)
236  {
237  // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
238  double t;
239  const double zeta = (d2 - d1)/(2*d12); // inf/inf from overflows?
240  if (fabs(zeta) < sqrt_1_eps)
241  {
242  t = d12*copysign(1./(fabs(zeta) + sqrt(1. + zeta*zeta)), zeta);
243  }
244  else
245  {
246  t = d12*copysign(0.5/fabs(zeta), zeta);
247  }
248  d1 -= t;
249  d2 += t;
250  }
251 }
252 
253 /// Utility function used in CalcEigenvalues().
254 MFEM_HOST_DEVICE static inline
255 void Eigensystem2S(const double &d12, double &d1, double &d2,
256  double &c, double &s)
257 {
258  const double sqrt_1_eps = sqrt(1./Epsilon);
259  if (d12 == 0.0)
260  {
261  c = 1.;
262  s = 0.;
263  }
264  else
265  {
266  // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
267  double t;
268  const double zeta = (d2 - d1)/(2*d12);
269  const double azeta = fabs(zeta);
270  if (azeta < sqrt_1_eps)
271  {
272  t = copysign(1./(azeta + sqrt(1. + zeta*zeta)), zeta);
273  }
274  else
275  {
276  t = copysign(0.5/azeta, zeta);
277  }
278  c = sqrt(1./(1. + t*t));
279  s = c*t;
280  t *= d12;
281  d1 -= t;
282  d2 += t;
283  }
284 }
285 
286 
287 /// Utility function used in CalcEigenvalues<3>.
288 MFEM_HOST_DEVICE static inline
289 void GetScalingFactor(const double &d_max, double &mult)
290 {
291  int d_exp;
292  if (d_max > 0.)
293  {
294  mult = frexp(d_max, &d_exp);
295  if (d_exp == std::numeric_limits<double>::max_exponent)
296  {
297  mult *= std::numeric_limits<double>::radix;
298  }
299  mult = d_max/mult;
300  }
301  else
302  {
303  mult = 1.;
304  }
305  // mult = 2^d_exp is such that d_max/mult is in [0.5,1) or in other words
306  // d_max is in the interval [0.5,1)*mult
307 }
308 
309 /// Utility function used in CalcEigenvalues<3>.
310 MFEM_HOST_DEVICE static inline
311 bool KernelVector2G(const int &mode,
312  double &d1, double &d12, double &d21, double &d2)
313 {
314  // Find a vector (z1,z2) in the "near"-kernel of the matrix
315  // | d1 d12 |
316  // | d21 d2 |
317  // using QR factorization.
318  // The vector (z1,z2) is returned in (d1,d2). Return 'true' if the matrix
319  // is zero without setting (d1,d2).
320  // Note: in the current implementation |z1| + |z2| = 1.
321 
322  // l1-norms of the columns
323  double n1 = fabs(d1) + fabs(d21);
324  double n2 = fabs(d2) + fabs(d12);
325 
326  bool swap_columns = (n2 > n1);
327  double mu;
328 
329  if (!swap_columns)
330  {
331  if (n1 == 0.)
332  {
333  return true;
334  }
335 
336  if (mode == 0) // eliminate the larger entry in the column
337  {
338  if (fabs(d1) > fabs(d21))
339  {
340  Swap(d1, d21);
341  Swap(d12, d2);
342  }
343  }
344  else // eliminate the smaller entry in the column
345  {
346  if (fabs(d1) < fabs(d21))
347  {
348  Swap(d1, d21);
349  Swap(d12, d2);
350  }
351  }
352  }
353  else
354  {
355  // n2 > n1, swap columns 1 and 2
356  if (mode == 0) // eliminate the larger entry in the column
357  {
358  if (fabs(d12) > fabs(d2))
359  {
360  Swap(d1, d2);
361  Swap(d12, d21);
362  }
363  else
364  {
365  Swap(d1, d12);
366  Swap(d21, d2);
367  }
368  }
369  else // eliminate the smaller entry in the column
370  {
371  if (fabs(d12) < fabs(d2))
372  {
373  Swap(d1, d2);
374  Swap(d12, d21);
375  }
376  else
377  {
378  Swap(d1, d12);
379  Swap(d21, d2);
380  }
381  }
382  }
383 
384  n1 = hypot(d1, d21);
385 
386  if (d21 != 0.)
387  {
388  // v = (n1, n2)^t, |v| = 1
389  // Q = I - 2 v v^t, Q (d1, d21)^t = (mu, 0)^t
390  mu = copysign(n1, d1);
391  n1 = -d21*(d21/(d1 + mu)); // = d1 - mu
392  d1 = mu;
393  // normalize (n1,d21) to avoid overflow/underflow
394  // normalize (n1,d21) by the max-norm to avoid the sqrt call
395  if (fabs(n1) <= fabs(d21))
396  {
397  // (n1,n2) <-- (n1/d21,1)
398  n1 = n1/d21;
399  mu = (2./(1. + n1*n1))*(n1*d12 + d2);
400  d2 = d2 - mu;
401  d12 = d12 - mu*n1;
402  }
403  else
404  {
405  // (n1,n2) <-- (1,d21/n1)
406  n2 = d21/n1;
407  mu = (2./(1. + n2*n2))*(d12 + n2*d2);
408  d2 = d2 - mu*n2;
409  d12 = d12 - mu;
410  }
411  }
412 
413  // Solve:
414  // | d1 d12 | | z1 | = | 0 |
415  // | 0 d2 | | z2 | | 0 |
416 
417  // choose (z1,z2) to minimize |d1*z1 + d12*z2| + |d2*z2|
418  // under the condition |z1| + |z2| = 1, z2 >= 0 (for uniqueness)
419  // set t = z1, z2 = 1 - |t|, -1 <= t <= 1
420  // objective function is:
421  // |d1*t + d12*(1 - |t|)| + |d2|*(1 - |t|) -- piecewise linear with
422  // possible minima are -1,0,1,t1 where t1: d1*t1 + d12*(1 - |t1|) = 0
423  // values: @t=+/-1 -> |d1|, @t=0 -> |n1| + |d2|, @t=t1 -> |d2|*(1 - |t1|)
424 
425  // evaluate z2 @t=t1
426  mu = -d12/d1;
427  // note: |mu| <= 1, if using l2-norm for column pivoting
428  // |mu| <= sqrt(2), if using l1-norm
429  n2 = 1./(1. + fabs(mu));
430  // check if |d1|<=|d2|*z2
431  if (fabs(d1) <= n2*fabs(d2))
432  {
433  d2 = 0.;
434  d1 = 1.;
435  }
436  else
437  {
438  d2 = n2;
439  // d1 = (n2 < 0.5) ? copysign(1. - n2, mu) : mu*n2;
440  d1 = mu*n2;
441  }
442 
443  if (swap_columns)
444  {
445  Swap(d1, d2);
446  }
447 
448  return false;
449 }
450 
451 /// Utility function used in CalcEigenvalues<3>.
452 MFEM_HOST_DEVICE static inline
453 void Vec_normalize3_aux(const double &x1, const double &x2,
454  const double &x3,
455  double &n1, double &n2, double &n3)
456 {
457  double t, r;
458 
459  const double m = fabs(x1);
460  r = x2/m;
461  t = 1. + r*r;
462  r = x3/m;
463  t = sqrt(1./(t + r*r));
464  n1 = copysign(t, x1);
465  t /= m;
466  n2 = x2*t;
467  n3 = x3*t;
468 }
469 
470 /// Utility function used in CalcEigenvalues<3>.
471 MFEM_HOST_DEVICE static inline
472 void Vec_normalize3(const double &x1, const double &x2, const double &x3,
473  double &n1, double &n2, double &n3)
474 {
475  // should work ok when xk is the same as nk for some or all k
476  if (fabs(x1) >= fabs(x2))
477  {
478  if (fabs(x1) >= fabs(x3))
479  {
480  if (x1 != 0.)
481  {
482  Vec_normalize3_aux(x1, x2, x3, n1, n2, n3);
483  }
484  else
485  {
486  n1 = n2 = n3 = 0.;
487  }
488  return;
489  }
490  }
491  else if (fabs(x2) >= fabs(x3))
492  {
493  Vec_normalize3_aux(x2, x1, x3, n2, n1, n3);
494  return;
495  }
496  Vec_normalize3_aux(x3, x1, x2, n3, n1, n2);
497 }
498 
499 /// Utility function used in CalcEigenvalues<3>.
500 MFEM_HOST_DEVICE static inline
501 int KernelVector3G_aux(const int &mode,
502  double &d1, double &d2, double &d3,
503  double &c12, double &c13, double &c23,
504  double &c21, double &c31, double &c32)
505 {
506  int kdim;
507  double mu, n1, n2, n3, s1, s2, s3;
508 
509  s1 = hypot(c21, c31);
510  n1 = hypot(d1, s1);
511 
512  if (s1 != 0.)
513  {
514  // v = (s1, s2, s3)^t, |v| = 1
515  // Q = I - 2 v v^t, Q (d1, c12, c13)^t = (mu, 0, 0)^t
516  mu = copysign(n1, d1);
517  n1 = -s1*(s1/(d1 + mu)); // = d1 - mu
518  d1 = mu;
519 
520  // normalize (n1,c21,c31) to avoid overflow/underflow
521  // normalize (n1,c21,c31) by the max-norm to avoid the sqrt call
522  if (fabs(n1) >= fabs(c21))
523  {
524  if (fabs(n1) >= fabs(c31))
525  {
526  // n1 is max, (s1,s2,s3) <-- (1,c21/n1,c31/n1)
527  s2 = c21/n1;
528  s3 = c31/n1;
529  mu = 2./(1. + s2*s2 + s3*s3);
530  n2 = mu*(c12 + s2*d2 + s3*c32);
531  n3 = mu*(c13 + s2*c23 + s3*d3);
532  c12 = c12 - n2;
533  d2 = d2 - s2*n2;
534  c32 = c32 - s3*n2;
535  c13 = c13 - n3;
536  c23 = c23 - s2*n3;
537  d3 = d3 - s3*n3;
538  goto done_column_1;
539  }
540  }
541  else if (fabs(c21) >= fabs(c31))
542  {
543  // c21 is max, (s1,s2,s3) <-- (n1/c21,1,c31/c21)
544  s1 = n1/c21;
545  s3 = c31/c21;
546  mu = 2./(1. + s1*s1 + s3*s3);
547  n2 = mu*(s1*c12 + d2 + s3*c32);
548  n3 = mu*(s1*c13 + c23 + s3*d3);
549  c12 = c12 - s1*n2;
550  d2 = d2 - n2;
551  c32 = c32 - s3*n2;
552  c13 = c13 - s1*n3;
553  c23 = c23 - n3;
554  d3 = d3 - s3*n3;
555  goto done_column_1;
556  }
557  // c31 is max, (s1,s2,s3) <-- (n1/c31,c21/c31,1)
558  s1 = n1/c31;
559  s2 = c21/c31;
560  mu = 2./(1. + s1*s1 + s2*s2);
561  n2 = mu*(s1*c12 + s2*d2 + c32);
562  n3 = mu*(s1*c13 + s2*c23 + d3);
563  c12 = c12 - s1*n2;
564  d2 = d2 - s2*n2;
565  c32 = c32 - n2;
566  c13 = c13 - s1*n3;
567  c23 = c23 - s2*n3;
568  d3 = d3 - n3;
569  }
570 
571 done_column_1:
572 
573  // Solve:
574  // | d2 c23 | | z2 | = | 0 |
575  // | c32 d3 | | z3 | | 0 |
576  if (KernelVector2G(mode, d2, c23, c32, d3))
577  {
578  // Have two solutions:
579  // two vectors in the kernel are P (-c12/d1, 1, 0)^t and
580  // P (-c13/d1, 0, 1)^t where P is the permutation matrix swapping
581  // entries 1 and col.
582 
583  // A vector orthogonal to both these vectors is P (1, c12/d1, c13/d1)^t
584  d2 = c12/d1;
585  d3 = c13/d1;
586  d1 = 1.;
587  kdim = 2;
588  }
589  else
590  {
591  // solve for z1:
592  // note: |z1| <= a since |z2| + |z3| = 1, and
593  // max{|c12|,|c13|} <= max{norm(col. 2),norm(col. 3)}
594  // <= norm(col. 1) <= a |d1|
595  // a = 1, if using l2-norm for column pivoting
596  // a = sqrt(3), if using l1-norm
597  d1 = -(c12*d2 + c13*d3)/d1;
598  kdim = 1;
599  }
600 
601  Vec_normalize3(d1, d2, d3, d1, d2, d3);
602 
603  return kdim;
604 }
605 
606 /// Utility function used in CalcEigenvalues<3>.
607 MFEM_HOST_DEVICE static inline
608 int KernelVector3S(const int &mode, const double &d12,
609  const double &d13, const double &d23,
610  double &d1, double &d2, double &d3)
611 {
612  // Find a unit vector (z1,z2,z3) in the "near"-kernel of the matrix
613  // | d1 d12 d13 |
614  // | d12 d2 d23 |
615  // | d13 d23 d3 |
616  // using QR factorization.
617  // The vector (z1,z2,z3) is returned in (d1,d2,d3).
618  // Returns the dimension of the kernel, kdim, but never zero.
619  // - if kdim == 3, then (d1,d2,d3) is not defined,
620  // - if kdim == 2, then (d1,d2,d3) is a vector orthogonal to the kernel,
621  // - otherwise kdim == 1 and (d1,d2,d3) is a vector in the "near"-kernel.
622 
623  double c12 = d12, c13 = d13, c23 = d23;
624  double c21, c31, c32;
625  int col, row;
626 
627  // l1-norms of the columns:
628  c32 = fabs(d1) + fabs(c12) + fabs(c13);
629  c31 = fabs(d2) + fabs(c12) + fabs(c23);
630  c21 = fabs(d3) + fabs(c13) + fabs(c23);
631 
632  // column pivoting: choose the column with the largest norm
633  if (c32 >= c21)
634  {
635  col = (c32 >= c31) ? 1 : 2;
636  }
637  else
638  {
639  col = (c31 >= c21) ? 2 : 3;
640  }
641  switch (col)
642  {
643  case 1:
644  if (c32 == 0.) // zero matrix
645  {
646  return 3;
647  }
648  break;
649 
650  case 2:
651  if (c31 == 0.) // zero matrix
652  {
653  return 3;
654  }
655  Swap(c13, c23);
656  Swap(d1, d2);
657  break;
658 
659  case 3:
660  if (c21 == 0.) // zero matrix
661  {
662  return 3;
663  }
664  Swap(c12, c23);
665  Swap(d1, d3);
666  }
667 
668  // row pivoting depending on 'mode'
669  if (mode == 0)
670  {
671  if (fabs(d1) <= fabs(c13))
672  {
673  row = (fabs(d1) <= fabs(c12)) ? 1 : 2;
674  }
675  else
676  {
677  row = (fabs(c12) <= fabs(c13)) ? 2 : 3;
678  }
679  }
680  else
681  {
682  if (fabs(d1) >= fabs(c13))
683  {
684  row = (fabs(d1) >= fabs(c12)) ? 1 : 2;
685  }
686  else
687  {
688  row = (fabs(c12) >= fabs(c13)) ? 2 : 3;
689  }
690  }
691  switch (row)
692  {
693  case 1:
694  c21 = c12;
695  c31 = c13;
696  c32 = c23;
697  break;
698 
699  case 2:
700  c21 = d1;
701  c31 = c13;
702  c32 = c23;
703  d1 = c12;
704  c12 = d2;
705  d2 = d1;
706  c13 = c23;
707  c23 = c31;
708  break;
709 
710  case 3:
711  c21 = c12;
712  c31 = d1;
713  c32 = c12;
714  d1 = c13;
715  c12 = c23;
716  c13 = d3;
717  d3 = d1;
718  }
719  row = KernelVector3G_aux(mode, d1, d2, d3, c12, c13, c23, c21, c31, c32);
720  // row is kdim
721 
722  switch (col)
723  {
724  case 2:
725  Swap(d1, d2);
726  break;
727 
728  case 3:
729  Swap(d1, d3);
730  }
731  return row;
732 }
733 
734 /// Utility function used in CalcEigenvalues<3>.
735 MFEM_HOST_DEVICE static inline
736 int Reduce3S(const int &mode,
737  double &d1, double &d2, double &d3,
738  double &d12, double &d13, double &d23,
739  double &z1, double &z2, double &z3,
740  double &v1, double &v2, double &v3,
741  double &g)
742 {
743  // Given the matrix
744  // | d1 d12 d13 |
745  // A = | d12 d2 d23 |
746  // | d13 d23 d3 |
747  // and a unit eigenvector z=(z1,z2,z3), transform the matrix A into the
748  // matrix B = Q P A P Q that has the form
749  // | b1 0 0 |
750  // B = Q P A P Q = | 0 b2 b23 |
751  // | 0 b23 b3 |
752  // where P is the permutation matrix switching entries 1 and k, and
753  // Q is the reflection matrix Q = I - g v v^t, defined by: set y = P z and
754  // v = c(y - e_1); if y = e_1, then v = 0 and Q = I.
755  // Note: Q y = e_1, Q e_1 = y ==> Q P A P Q e_1 = ... = lambda e_1.
756  // The entries (b1,b2,b3,b23) are returned in (d1,d2,d3,d23), and the
757  // return value of the function is k. The variable g = 2/(v1^2+v2^2+v3^3).
758 
759  int k;
760  double s, w1, w2, w3;
761 
762  if (mode == 0)
763  {
764  // choose k such that z^t e_k = zk has the smallest absolute value, i.e.
765  // the angle between z and e_k is closest to pi/2
766  if (fabs(z1) <= fabs(z3))
767  {
768  k = (fabs(z1) <= fabs(z2)) ? 1 : 2;
769  }
770  else
771  {
772  k = (fabs(z2) <= fabs(z3)) ? 2 : 3;
773  }
774  }
775  else
776  {
777  // choose k such that zk is the largest by absolute value
778  if (fabs(z1) >= fabs(z3))
779  {
780  k = (fabs(z1) >= fabs(z2)) ? 1 : 2;
781  }
782  else
783  {
784  k = (fabs(z2) >= fabs(z3)) ? 2 : 3;
785  }
786  }
787  switch (k)
788  {
789  case 2:
790  Swap(d13, d23);
791  Swap(d1, d2);
792  Swap(z1, z2);
793  break;
794 
795  case 3:
796  Swap(d12, d23);
797  Swap(d1, d3);
798  Swap(z1, z3);
799  }
800 
801  s = hypot(z2, z3);
802 
803  if (s == 0.)
804  {
805  // s can not be zero, if zk is the smallest (mode == 0)
806  v1 = v2 = v3 = 0.;
807  g = 1.;
808  }
809  else
810  {
811  g = copysign(1., z1);
812  v1 = -s*(s/(z1 + g)); // = z1 - g
813  // normalize (v1,z2,z3) by its max-norm, avoiding the sqrt call
814  g = fabs(v1);
815  if (fabs(z2) > g) { g = fabs(z2); }
816  if (fabs(z3) > g) { g = fabs(z3); }
817  v1 = v1/g;
818  v2 = z2/g;
819  v3 = z3/g;
820  g = 2./(v1*v1 + v2*v2 + v3*v3);
821 
822  // Compute Q A Q = A - v w^t - w v^t, where
823  // w = u - (g/2)(v^t u) v, and u = g A v
824  // set w = g A v
825  w1 = g*( d1*v1 + d12*v2 + d13*v3);
826  w2 = g*(d12*v1 + d2*v2 + d23*v3);
827  w3 = g*(d13*v1 + d23*v2 + d3*v3);
828  // w := w - (g/2)(v^t w) v
829  s = (g/2)*(v1*w1 + v2*w2 + v3*w3);
830  w1 -= s*v1;
831  w2 -= s*v2;
832  w3 -= s*v3;
833  // dij -= vi*wj + wi*vj
834  d1 -= 2*v1*w1;
835  d2 -= 2*v2*w2;
836  d23 -= v2*w3 + v3*w2;
837  d3 -= 2*v3*w3;
838  // compute the off-diagonal entries on the first row/column of B which
839  // should be zero (for debugging):
840 #if 0
841  s = d12 - v1*w2 - v2*w1; // b12 = 0
842  s = d13 - v1*w3 - v3*w1; // b13 = 0
843 #endif
844  }
845 
846  switch (k)
847  {
848  case 2:
849  Swap(z1, z2);
850  break;
851  case 3:
852  Swap(z1, z3);
853  }
854  return k;
855 }
856 
857 } // namespace kernels::internal
858 
859 
860 // Implementations of CalcEigenvalues and CalcSingularvalue for dim = 2, 3.
861 
862 /// Compute the spectrum of the matrix of size 2 with given @a data, returning
863 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
864 /// vec (listed consecutively).
865 template<> MFEM_HOST_DEVICE inline
866 void CalcEigenvalues<2>(const double *data, double *lambda, double *vec)
867 {
868  double d0 = data[0];
869  double d2 = data[2]; // use the upper triangular entry
870  double d3 = data[3];
871  double c, s;
872  internal::Eigensystem2S(d2, d0, d3, c, s);
873  if (d0 <= d3)
874  {
875  lambda[0] = d0;
876  lambda[1] = d3;
877  vec[0] = c;
878  vec[1] = -s;
879  vec[2] = s;
880  vec[3] = c;
881  }
882  else
883  {
884  lambda[0] = d3;
885  lambda[1] = d0;
886  vec[0] = s;
887  vec[1] = c;
888  vec[2] = c;
889  vec[3] = -s;
890  }
891 }
892 
893 /// Compute the spectrum of the matrix of size 3 with given @a data, returning
894 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
895 /// vec (listed consecutively).
896 template<> MFEM_HOST_DEVICE inline
897 void CalcEigenvalues<3>(const double *data, double *lambda, double *vec)
898 {
899  double d11 = data[0];
900  double d12 = data[3]; // use the upper triangular entries
901  double d22 = data[4];
902  double d13 = data[6];
903  double d23 = data[7];
904  double d33 = data[8];
905 
906  double mult;
907  {
908  double d_max = fabs(d11);
909  if (d_max < fabs(d22)) { d_max = fabs(d22); }
910  if (d_max < fabs(d33)) { d_max = fabs(d33); }
911  if (d_max < fabs(d12)) { d_max = fabs(d12); }
912  if (d_max < fabs(d13)) { d_max = fabs(d13); }
913  if (d_max < fabs(d23)) { d_max = fabs(d23); }
914 
915  internal::GetScalingFactor(d_max, mult);
916  }
917 
918  d11 /= mult; d22 /= mult; d33 /= mult;
919  d12 /= mult; d13 /= mult; d23 /= mult;
920 
921  double aa = (d11 + d22 + d33)/3; // aa = tr(A)/3
922  double c1 = d11 - aa;
923  double c2 = d22 - aa;
924  double c3 = d33 - aa;
925 
926  double Q, R;
927 
928  Q = (2*(d12*d12 + d13*d13 + d23*d23) + c1*c1 + c2*c2 + c3*c3)/6;
929  R = (c1*(d23*d23 - c2*c3)+ d12*(d12*c3 - 2*d13*d23) + d13*d13*c2)/2;
930 
931  if (Q <= 0.)
932  {
933  lambda[0] = lambda[1] = lambda[2] = aa;
934  vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
935  vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
936  vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
937  }
938  else
939  {
940  double sqrtQ = sqrt(Q);
941  double sqrtQ3 = Q*sqrtQ;
942  // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
943  // double sqrtQ3 = pow(Q, 1.5);
944  double r;
945  if (fabs(R) >= sqrtQ3)
946  {
947  if (R < 0.)
948  {
949  // R = -1.;
950  r = 2*sqrtQ;
951  }
952  else
953  {
954  // R = 1.;
955  r = -2*sqrtQ;
956  }
957  }
958  else
959  {
960  R = R/sqrtQ3;
961 
962  if (R < 0.)
963  {
964  r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
965  }
966  else
967  {
968  r = -2*sqrtQ*cos(acos(R)/3); // min
969  }
970  }
971 
972  aa += r;
973  c1 = d11 - aa;
974  c2 = d22 - aa;
975  c3 = d33 - aa;
976 
977  // Type of Householder reflections: z --> mu ek, where k is the index
978  // of the entry in z with:
979  // mode == 0: smallest absolute value --> angle closest to pi/2
980  // mode == 1: largest absolute value --> angle farthest from pi/2
981  // Observations:
982  // mode == 0 produces better eigenvectors, less accurate eigenvalues?
983  // mode == 1 produces better eigenvalues, less accurate eigenvectors?
984  const int mode = 0;
985 
986  // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
987  // | c1 d12 d13 |
988  // | d12 c2 d23 | = A - aa*I
989  // | d13 d23 c3 |
990  // This vector is also an eigenvector for A corresponding to aa.
991  // The vector z overwrites (c1,c2,c3).
992  switch (internal::KernelVector3S(mode, d12, d13, d23, c1, c2, c3))
993  {
994  case 3:
995  // 'aa' is a triple eigenvalue
996  lambda[0] = lambda[1] = lambda[2] = aa;
997  vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
998  vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
999  vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1000  goto done_3d;
1001 
1002  case 2:
1003  // ok, continue with the returned vector orthogonal to the kernel
1004  case 1:
1005  // ok, continue with the returned vector in the "near"-kernel
1006  ;
1007  }
1008 
1009  // Using the eigenvector c=(c1,c2,c3) transform A into
1010  // | d11 0 0 |
1011  // A <-- Q P A P Q = | 0 d22 d23 |
1012  // | 0 d23 d33 |
1013  double v1, v2, v3, g;
1014  int k = internal::Reduce3S(mode, d11, d22, d33, d12, d13, d23,
1015  c1, c2, c3, v1, v2, v3, g);
1016  // Q = I - 2 v v^t
1017  // P - permutation matrix switching entries 1 and k
1018 
1019  // find the eigenvalues and eigenvectors for
1020  // | d22 d23 |
1021  // | d23 d33 |
1022  double c, s;
1023  internal::Eigensystem2S(d23, d22, d33, c, s);
1024  // d22 <-> P Q (0, c, -s), d33 <-> P Q (0, s, c)
1025 
1026  double *vec_1, *vec_2, *vec_3;
1027  if (d11 <= d22)
1028  {
1029  if (d22 <= d33)
1030  {
1031  lambda[0] = d11; vec_1 = vec;
1032  lambda[1] = d22; vec_2 = vec + 3;
1033  lambda[2] = d33; vec_3 = vec + 6;
1034  }
1035  else if (d11 <= d33)
1036  {
1037  lambda[0] = d11; vec_1 = vec;
1038  lambda[1] = d33; vec_3 = vec + 3;
1039  lambda[2] = d22; vec_2 = vec + 6;
1040  }
1041  else
1042  {
1043  lambda[0] = d33; vec_3 = vec;
1044  lambda[1] = d11; vec_1 = vec + 3;
1045  lambda[2] = d22; vec_2 = vec + 6;
1046  }
1047  }
1048  else
1049  {
1050  if (d11 <= d33)
1051  {
1052  lambda[0] = d22; vec_2 = vec;
1053  lambda[1] = d11; vec_1 = vec + 3;
1054  lambda[2] = d33; vec_3 = vec + 6;
1055  }
1056  else if (d22 <= d33)
1057  {
1058  lambda[0] = d22; vec_2 = vec;
1059  lambda[1] = d33; vec_3 = vec + 3;
1060  lambda[2] = d11; vec_1 = vec + 6;
1061  }
1062  else
1063  {
1064  lambda[0] = d33; vec_3 = vec;
1065  lambda[1] = d22; vec_2 = vec + 3;
1066  lambda[2] = d11; vec_1 = vec + 6;
1067  }
1068  }
1069 
1070  vec_1[0] = c1;
1071  vec_1[1] = c2;
1072  vec_1[2] = c3;
1073  d22 = g*(v2*c - v3*s);
1074  d33 = g*(v2*s + v3*c);
1075  vec_2[0] = - v1*d22; vec_3[0] = - v1*d33;
1076  vec_2[1] = c - v2*d22; vec_3[1] = s - v2*d33;
1077  vec_2[2] = -s - v3*d22; vec_3[2] = c - v3*d33;
1078  switch (k)
1079  {
1080  case 2:
1081  internal::Swap(vec_2[0], vec_2[1]);
1082  internal::Swap(vec_3[0], vec_3[1]);
1083  break;
1084 
1085  case 3:
1086  internal::Swap(vec_2[0], vec_2[2]);
1087  internal::Swap(vec_3[0], vec_3[2]);
1088  }
1089  }
1090 
1091 done_3d:
1092  lambda[0] *= mult;
1093  lambda[1] *= mult;
1094  lambda[2] *= mult;
1095 }
1096 
1097 /// Return the i'th singular value of the matrix of size 2 with given @a data.
1098 template<> MFEM_HOST_DEVICE inline
1099 double CalcSingularvalue<2>(const double *data, const int i)
1100 {
1101  double d0, d1, d2, d3;
1102  d0 = data[0];
1103  d1 = data[1];
1104  d2 = data[2];
1105  d3 = data[3];
1106  double mult;
1107 
1108  {
1109  double d_max = fabs(d0);
1110  if (d_max < fabs(d1)) { d_max = fabs(d1); }
1111  if (d_max < fabs(d2)) { d_max = fabs(d2); }
1112  if (d_max < fabs(d3)) { d_max = fabs(d3); }
1113  internal::GetScalingFactor(d_max, mult);
1114  }
1115 
1116  d0 /= mult;
1117  d1 /= mult;
1118  d2 /= mult;
1119  d3 /= mult;
1120 
1121  double t = 0.5*((d0+d2)*(d0-d2)+(d1-d3)*(d1+d3));
1122  double s = d0*d2 + d1*d3;
1123  s = sqrt(0.5*(d0*d0 + d1*d1 + d2*d2 + d3*d3) + sqrt(t*t + s*s));
1124 
1125  if (s == 0.0)
1126  {
1127  return 0.0;
1128  }
1129  t = fabs(d0*d3 - d1*d2) / s;
1130  if (t > s)
1131  {
1132  if (i == 0)
1133  {
1134  return t*mult;
1135  }
1136  return s*mult;
1137  }
1138  if (i == 0)
1139  {
1140  return s*mult;
1141  }
1142  return t*mult;
1143 }
1144 
1145 /// Return the i'th singular value of the matrix of size 3 with given @a data.
1146 template<> MFEM_HOST_DEVICE inline
1147 double CalcSingularvalue<3>(const double *data, const int i)
1148 {
1149  double d0, d1, d2, d3, d4, d5, d6, d7, d8;
1150  d0 = data[0]; d3 = data[3]; d6 = data[6];
1151  d1 = data[1]; d4 = data[4]; d7 = data[7];
1152  d2 = data[2]; d5 = data[5]; d8 = data[8];
1153  double mult;
1154  {
1155  double d_max = fabs(d0);
1156  if (d_max < fabs(d1)) { d_max = fabs(d1); }
1157  if (d_max < fabs(d2)) { d_max = fabs(d2); }
1158  if (d_max < fabs(d3)) { d_max = fabs(d3); }
1159  if (d_max < fabs(d4)) { d_max = fabs(d4); }
1160  if (d_max < fabs(d5)) { d_max = fabs(d5); }
1161  if (d_max < fabs(d6)) { d_max = fabs(d6); }
1162  if (d_max < fabs(d7)) { d_max = fabs(d7); }
1163  if (d_max < fabs(d8)) { d_max = fabs(d8); }
1164  internal::GetScalingFactor(d_max, mult);
1165  }
1166 
1167  d0 /= mult; d1 /= mult; d2 /= mult;
1168  d3 /= mult; d4 /= mult; d5 /= mult;
1169  d6 /= mult; d7 /= mult; d8 /= mult;
1170 
1171  double b11 = d0*d0 + d1*d1 + d2*d2;
1172  double b12 = d0*d3 + d1*d4 + d2*d5;
1173  double b13 = d0*d6 + d1*d7 + d2*d8;
1174  double b22 = d3*d3 + d4*d4 + d5*d5;
1175  double b23 = d3*d6 + d4*d7 + d5*d8;
1176  double b33 = d6*d6 + d7*d7 + d8*d8;
1177 
1178  // double a, b, c;
1179  // a = -(b11 + b22 + b33);
1180  // b = b11*(b22 + b33) + b22*b33 - b12*b12 - b13*b13 - b23*b23;
1181  // c = b11*(b23*b23 - b22*b33) + b12*(b12*b33 - 2*b13*b23) + b13*b13*b22;
1182 
1183  // double Q = (a * a - 3 * b) / 9;
1184  // double Q = (b12*b12 + b13*b13 + b23*b23 +
1185  // ((b11 - b22)*(b11 - b22) +
1186  // (b11 - b33)*(b11 - b33) +
1187  // (b22 - b33)*(b22 - b33))/6)/3;
1188  // Q = (3*(b12^2 + b13^2 + b23^2) +
1189  // ((b11 - b22)^2 + (b11 - b33)^2 + (b22 - b33)^2)/2)/9
1190  // or
1191  // Q = (1/6)*|B-tr(B)/3|_F^2
1192  // Q >= 0 and
1193  // Q = 0 <==> B = scalar * I
1194  // double R = (2 * a * a * a - 9 * a * b + 27 * c) / 54;
1195  double aa = (b11 + b22 + b33)/3; // aa = tr(B)/3
1196  double c1, c2, c3;
1197  // c1 = b11 - aa; // ((b11 - b22) + (b11 - b33))/3
1198  // c2 = b22 - aa; // ((b22 - b11) + (b22 - b33))/3
1199  // c3 = b33 - aa; // ((b33 - b11) + (b33 - b22))/3
1200  {
1201  double b11_b22 = ((d0-d3)*(d0+d3)+(d1-d4)*(d1+d4)+(d2-d5)*(d2+d5));
1202  double b22_b33 = ((d3-d6)*(d3+d6)+(d4-d7)*(d4+d7)+(d5-d8)*(d5+d8));
1203  double b33_b11 = ((d6-d0)*(d6+d0)+(d7-d1)*(d7+d1)+(d8-d2)*(d8+d2));
1204  c1 = (b11_b22 - b33_b11)/3;
1205  c2 = (b22_b33 - b11_b22)/3;
1206  c3 = (b33_b11 - b22_b33)/3;
1207  }
1208  double Q, R;
1209  Q = (2*(b12*b12 + b13*b13 + b23*b23) + c1*c1 + c2*c2 + c3*c3)/6;
1210  R = (c1*(b23*b23 - c2*c3)+ b12*(b12*c3 - 2*b13*b23) +b13*b13*c2)/2;
1211  // R = (-1/2)*det(B-(tr(B)/3)*I)
1212  // Note: 54*(det(S))^2 <= |S|_F^6, when S^t=S and tr(S)=0, S is 3x3
1213  // Therefore: R^2 <= Q^3
1214 
1215  if (Q <= 0.) { ; }
1216 
1217  // else if (fabs(R) >= sqrtQ3)
1218  // {
1219  // double det = (d[0] * (d[4] * d[8] - d[5] * d[7]) +
1220  // d[3] * (d[2] * d[7] - d[1] * d[8]) +
1221  // d[6] * (d[1] * d[5] - d[2] * d[4]));
1222  //
1223  // if (R > 0.)
1224  // {
1225  // if (i == 2)
1226  // // aa -= 2*sqrtQ;
1227  // return fabs(det)/(aa + sqrtQ);
1228  // else
1229  // aa += sqrtQ;
1230  // }
1231  // else
1232  // {
1233  // if (i != 0)
1234  // aa -= sqrtQ;
1235  // // aa = fabs(det)/sqrt(aa + 2*sqrtQ);
1236  // else
1237  // aa += 2*sqrtQ;
1238  // }
1239  // }
1240 
1241  else
1242  {
1243  double sqrtQ = sqrt(Q);
1244  double sqrtQ3 = Q*sqrtQ;
1245  // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1246  // double sqrtQ3 = pow(Q, 1.5);
1247  double r;
1248 
1249  if (fabs(R) >= sqrtQ3)
1250  {
1251  if (R < 0.)
1252  {
1253  // R = -1.;
1254  r = 2*sqrtQ;
1255  }
1256  else
1257  {
1258  // R = 1.;
1259  r = -2*sqrtQ;
1260  }
1261  }
1262  else
1263  {
1264  R = R/sqrtQ3;
1265 
1266  // if (fabs(R) <= 0.95)
1267  if (fabs(R) <= 0.9)
1268  {
1269  if (i == 2)
1270  {
1271  aa -= 2*sqrtQ*cos(acos(R)/3); // min
1272  }
1273  else if (i == 0)
1274  {
1275  aa -= 2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1276  }
1277  else
1278  {
1279  aa -= 2*sqrtQ*cos((acos(R) - 2.0*M_PI)/3); // mid
1280  }
1281  goto have_aa;
1282  }
1283 
1284  if (R < 0.)
1285  {
1286  r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1287  if (i == 0)
1288  {
1289  aa += r;
1290  goto have_aa;
1291  }
1292  }
1293  else
1294  {
1295  r = -2*sqrtQ*cos(acos(R)/3); // min
1296  if (i == 2)
1297  {
1298  aa += r;
1299  goto have_aa;
1300  }
1301  }
1302  }
1303 
1304  // (tr(B)/3 + r) is the root which is separated from the other
1305  // two roots which are close to each other when |R| is close to 1
1306 
1307  c1 -= r;
1308  c2 -= r;
1309  c3 -= r;
1310  // aa += r;
1311 
1312  // Type of Householder reflections: z --> mu ek, where k is the index
1313  // of the entry in z with:
1314  // mode == 0: smallest absolute value --> angle closest to pi/2
1315  // (eliminate large entries)
1316  // mode == 1: largest absolute value --> angle farthest from pi/2
1317  // (eliminate small entries)
1318  const int mode = 1;
1319 
1320  // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1321  // | c1 b12 b13 |
1322  // | b12 c2 b23 | = B - aa*I
1323  // | b13 b23 c3 |
1324  // This vector is also an eigenvector for B corresponding to aa
1325  // The vector z overwrites (c1,c2,c3).
1326  switch (internal::KernelVector3S(mode, b12, b13, b23, c1, c2, c3))
1327  {
1328  case 3:
1329  aa += r;
1330  goto have_aa;
1331  case 2:
1332  // ok, continue with the returned vector orthogonal to the kernel
1333  case 1:
1334  // ok, continue with the returned vector in the "near"-kernel
1335  ;
1336  }
1337 
1338  // Using the eigenvector c = (c1,c2,c3) to transform B into
1339  // | b11 0 0 |
1340  // B <-- Q P B P Q = | 0 b22 b23 |
1341  // | 0 b23 b33 |
1342  double v1, v2, v3, g;
1343  internal::Reduce3S(mode, b11, b22, b33, b12, b13, b23,
1344  c1, c2, c3, v1, v2, v3, g);
1345  // Q = I - g v v^t
1346  // P - permutation matrix switching rows and columns 1 and k
1347 
1348  // find the eigenvalues of
1349  // | b22 b23 |
1350  // | b23 b33 |
1351  internal::Eigenvalues2S(b23, b22, b33);
1352 
1353  if (i == 2)
1354  {
1355  aa = fmin(fmin(b11, b22), b33);
1356  }
1357  else if (i == 1)
1358  {
1359  if (b11 <= b22)
1360  {
1361  aa = (b22 <= b33) ? b22 : fmax(b11, b33);
1362  }
1363  else
1364  {
1365  aa = (b11 <= b33) ? b11 : fmax(b33, b22);
1366  }
1367  }
1368  else
1369  {
1370  aa = fmax(fmax(b11, b22), b33);
1371  }
1372  }
1373 
1374 have_aa:
1375 
1376  return sqrt(fabs(aa))*mult; // take abs before we sort?
1377 }
1378 
1379 
1380 /// Assuming L.U = P.A for a factored matrix (m x m),
1381 // compute x <- A x
1382 //
1383 // @param [in] data LU factorization of A
1384 // @param [in] m square matrix height
1385 // @param [in] ipiv array storing pivot information
1386 // @param [in, out] x vector storing right-hand side and then solution
1387 MFEM_HOST_DEVICE
1388 inline void LUSolve(const double *data, const int m, const int *ipiv,
1389  double *x)
1390 {
1391  // X <- P X
1392  for (int i = 0; i < m; i++)
1393  {
1394  internal::Swap<double>(x[i], x[ipiv[i]]);
1395  }
1396 
1397  // X <- L^{-1} X
1398  for (int j = 0; j < m; j++)
1399  {
1400  const double x_j = x[j];
1401  for (int i = j + 1; i < m; i++)
1402  {
1403  x[i] -= data[i + j * m] * x_j;
1404  }
1405  }
1406 
1407  // X <- U^{-1} X
1408  for (int j = m - 1; j >= 0; j--)
1409  {
1410  const double x_j = (x[j] /= data[j + j * m]);
1411  for (int i = 0; i < j; i++)
1412  {
1413  x[i] -= data[i + j * m] * x_j;
1414  }
1415  }
1416 }
1417 
1418 } // namespace kernels
1419 
1420 } // namespace mfem
1421 
1422 #endif // MFEM_KERNELS_HPP
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition: kernels.hpp:121
MFEM_HOST_DEVICE void MultABt(const int Aheight, const int Awidth, const int Bheight, const TA *Adata, const TB *Bdata, TC *ABtdata)
Multiply a matrix of size Aheight x Awidth and data Adata with the transpose of a matrix of size Bhei...
Definition: kernels.hpp:181
double epsilon
Definition: ex25.cpp:136
MFEM_HOST_DEVICE void Add(const int height, const int width, const TALPHA alpha, const TA *Adata, const TB *Bdata, TC *Cdata)
Compute C = A + alpha*B, where the matrices A, B and C are of size height x width with data Adata...
Definition: kernels.hpp:141
MFEM_HOST_DEVICE double CalcSingularvalue< 3 >(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size 3 with given data.
Definition: kernels.hpp:1147
MFEM_HOST_DEVICE void CalcEigenvalues(const double *data, double *lambda, double *vec)
MFEM_HOST_DEVICE void CalcEigenvalues< 2 >(const double *data, double *lambda, double *vec)
Definition: kernels.hpp:866
double b
Definition: lissajous.cpp:42
MFEM_HOST_DEVICE void LUSolve(const double *data, const int m, const int *ipiv, double *x)
Assuming L.U = P.A for a factored matrix (m x m),.
Definition: kernels.hpp:1388
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition: kernels.hpp:46
void Swap(Array< T > &, Array< T > &)
Definition: array.hpp:611
MFEM_HOST_DEVICE double CalcSingularvalue(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size dim with given data.
MFEM_HOST_DEVICE double CalcSingularvalue< 2 >(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size 2 with given data.
Definition: kernels.hpp:1099
double a
Definition: lissajous.cpp:41
MFEM_HOST_DEVICE void CalcEigenvalues< 3 >(const double *data, double *lambda, double *vec)
Definition: kernels.hpp:897
double mu
Definition: ex25.cpp:135
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse a matrix with given size and data into the matrix with data inv_data.
Definition: kernels.hpp:130
const double alpha
Definition: ex15.cpp:336
MFEM_HOST_DEVICE void Mult(const int height, const int width, TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data...
Definition: kernels.hpp:76
MFEM_HOST_DEVICE void Symmetrize(const int size, T *data)
Symmetrize a square matrix with given size and data: A -&gt; (A+A^T)/2.
Definition: kernels.hpp:107