MFEM  v4.3.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
kernels.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_LINALG_KERNELS_HPP
13 #define MFEM_LINALG_KERNELS_HPP
14 
15 #include "../config/config.hpp"
16 #include "../general/backends.hpp"
17 #include "../general/globals.hpp"
18 
19 #include "matrix.hpp"
20 #include "tmatrix.hpp"
21 #include "tlayout.hpp"
22 #include "ttensor.hpp"
23 
24 // This header contains stand-alone functions for "small" dense linear algebra
25 // (at quadrature point or element-level) designed to be inlined directly into
26 // device kernels.
27 
28 // Many methods of the DenseMatrix class and some of the Vector class call these
29 // kernels directly on the host, see the implementations in linalg/densemat.cpp
30 // and linalg/vector.cpp.
31 
32 namespace mfem
33 {
34 
35 namespace kernels
36 {
37 
38 /// Compute the square of the Euclidean distance to another vector
39 template<int dim>
40 MFEM_HOST_DEVICE inline double DistanceSquared(const double *x, const double *y)
41 {
42  double d = 0.0;
43  for (int i = 0; i < dim; i++) { d += (x[i]-y[i])*(x[i]-y[i]); }
44  return d;
45 }
46 
47 /// Creates n x n diagonal matrix with diagonal elements c
48 template<int dim>
49 MFEM_HOST_DEVICE inline void Diag(const double c, double *data)
50 {
51  const int N = dim*dim;
52  for (int i = 0; i < N; i++) { data[i] = 0.0; }
53  for (int i = 0; i < dim; i++) { data[i*(dim+1)] = c; }
54 }
55 
56 /// Vector subtraction operation: z = a * (x - y)
57 template<int dim>
58 MFEM_HOST_DEVICE inline void Subtract(const double a,
59  const double *x, const double *y,
60  double *z)
61 {
62  for (int i = 0; i < dim; i++) { z[i] = a * (x[i] - y[i]); }
63 }
64 
65 /// Dense matrix operation: VWt += v w^t
66 template<int dim>
67 MFEM_HOST_DEVICE inline void AddMultVWt(const double *v, const double *w,
68  double *VWt)
69 {
70  for (int i = 0; i < dim; i++)
71  {
72  const double vi = v[i];
73  for (int j = 0; j < dim; j++) { VWt[i*dim+j] += vi * w[j]; }
74  }
75 }
76 
77 template<int H, int W, typename T>
78 MFEM_HOST_DEVICE inline
79 void FNorm(double &scale_factor, double &scaled_fnorm2, const T *data)
80 {
81  int i, hw = H * W;
82  T max_norm = 0.0, entry, fnorm2;
83 
84  for (i = 0; i < hw; i++)
85  {
86  entry = fabs(data[i]);
87  if (entry > max_norm)
88  {
89  max_norm = entry;
90  }
91  }
92 
93  if (max_norm == 0.0)
94  {
95  scale_factor = scaled_fnorm2 = 0.0;
96  return;
97  }
98 
99  fnorm2 = 0.0;
100  for (i = 0; i < hw; i++)
101  {
102  entry = data[i] / max_norm;
103  fnorm2 += entry * entry;
104  }
105 
106  scale_factor = max_norm;
107  scaled_fnorm2 = fnorm2;
108 }
109 
110 /// Compute the Frobenius norm of the matrix
111 template<int H, int W, typename T>
112 MFEM_HOST_DEVICE inline
113 double FNorm(const T *data)
114 {
115  double s, n2;
116  kernels::FNorm<H,W>(s, n2, data);
117  return s*sqrt(n2);
118 }
119 
120 /// Compute the square of the Frobenius norm of the matrix
121 template<int H, int W, typename T>
122 MFEM_HOST_DEVICE inline
123 double FNorm2(const T *data)
124 {
125  double s, n2;
126  kernels::FNorm<H,W>(s, n2, data);
127  return s*s*n2;
128 }
129 
130 /// Returns the l2 norm of the Vector with given @a size and @a data.
131 template<typename T>
132 MFEM_HOST_DEVICE inline
133 double Norml2(const int size, const T *data)
134 {
135  if (0 == size) { return 0.0; }
136  if (1 == size) { return std::abs(data[0]); }
137  T scale = 0.0;
138  T sum = 0.0;
139  for (int i = 0; i < size; i++)
140  {
141  if (data[i] != 0.0)
142  {
143  const T absdata = fabs(data[i]);
144  if (scale <= absdata)
145  {
146  const T sqr_arg = scale / absdata;
147  sum = 1.0 + sum * (sqr_arg * sqr_arg);
148  scale = absdata;
149  continue;
150  } // end if scale <= absdata
151  const T sqr_arg = absdata / scale;
152  sum += (sqr_arg * sqr_arg); // else scale > absdata
153  } // end if data[i] != 0
154  }
155  return scale * sqrt(sum);
156 }
157 
158 /** @brief Matrix vector multiplication: y = A x, where the matrix A is of size
159  @a height x @a width with given @a data, while @a x and @a y specify the
160  data of the input and output vectors. */
161 template<typename TA, typename TX, typename TY>
162 MFEM_HOST_DEVICE inline
163 void Mult(const int height, const int width, TA *data, const TX *x, TY *y)
164 {
165  if (width == 0)
166  {
167  for (int row = 0; row < height; row++)
168  {
169  y[row] = 0.0;
170  }
171  return;
172  }
173  TA *d_col = data;
174  TX x_col = x[0];
175  for (int row = 0; row < height; row++)
176  {
177  y[row] = x_col*d_col[row];
178  }
179  d_col += height;
180  for (int col = 1; col < width; col++)
181  {
182  x_col = x[col];
183  for (int row = 0; row < height; row++)
184  {
185  y[row] += x_col*d_col[row];
186  }
187  d_col += height;
188  }
189 }
190 
191 /// Symmetrize a square matrix with given @a size and @a data: A -> (A+A^T)/2.
192 template<typename T>
193 MFEM_HOST_DEVICE inline
194 void Symmetrize(const int size, T *data)
195 {
196  for (int i = 0; i < size; i++)
197  {
198  for (int j = 0; j < i; j++)
199  {
200  const T a = 0.5 * (data[i*size+j] + data[j*size+i]);
201  data[j*size+i] = data[i*size+j] = a;
202  }
203  }
204 }
205 
206 /// Compute the determinant of a square matrix of size dim with given @a data.
207 template<int dim, typename T>
208 MFEM_HOST_DEVICE inline T Det(const T *data)
209 {
210  return TDetHD<T>(ColumnMajorLayout2D<dim,dim>(), data);
211 }
212 
213 /** @brief Return the inverse of a matrix with given @a size and @a data into
214  the matrix with data @a inv_data. */
215 template<int dim, typename T>
216 MFEM_HOST_DEVICE inline
217 void CalcInverse(const T *data, T *inv_data)
218 {
219  typedef ColumnMajorLayout2D<dim,dim> layout_t;
220  const T det = TAdjDetHD<T>(layout_t(), data, layout_t(), inv_data);
221  TAssignHD<AssignOp::Mult>(layout_t(), inv_data, static_cast<T>(1.0)/det);
222 }
223 
224 /** @brief Return the adjugate of a matrix */
225 template<int dim, typename T>
226 MFEM_HOST_DEVICE inline
227 void CalcAdjugate(const T *data, T *adj_data)
228 {
229  typedef ColumnMajorLayout2D<dim,dim> layout_t;
230  TAdjugateHD<T>(layout_t(), data, layout_t(), adj_data);
231 }
232 
233 /** @brief Compute C = A + alpha*B, where the matrices A, B and C are of size @a
234  height x @a width with data @a Adata, @a Bdata and @a Cdata. */
235 template<typename TALPHA, typename TA, typename TB, typename TC>
236 MFEM_HOST_DEVICE inline
237 void Add(const int height, const int width, const TALPHA alpha,
238  const TA *Adata, const TB *Bdata, TC *Cdata)
239 {
240  for (int j = 0; j < width; j++)
241  {
242  for (int i = 0; i < height; i++)
243  {
244  const int n = i*width+j;
245  Cdata[n] = Adata[n] + alpha * Bdata[n];
246  }
247  }
248 }
249 
250 /** @brief Compute C = alpha*A + beta*B, where the matrices A, B and C are of
251  size @a height x @a width with data @a Adata, @a Bdata and @a Cdata. */
252 template<typename TALPHA, typename TBETA, typename TA, typename TB, typename TC>
253 MFEM_HOST_DEVICE inline
254 void Add(const int height, const int width,
255  const TALPHA alpha, const TA *Adata,
256  const TBETA beta, const TB *Bdata,
257  TC *Cdata)
258 {
259  const int m = height * width;
260  for (int i = 0; i < m; i++)
261  {
262  Cdata[i] = alpha * Adata[i] + beta * Bdata[i];
263  }
264 }
265 
266 /** @brief Compute B += A, where the matrices A and B are of size
267  @a height x @a width with data @a Adata and @a Bdata. */
268 template<typename TA, typename TB>
269 MFEM_HOST_DEVICE inline
270 void Add(const int height, const int width, const TA *Adata, TB *Bdata)
271 {
272  const int m = height * width;
273  for (int i = 0; i < m; i++)
274  {
275  Bdata[i] += Adata[i];
276  }
277 }
278 
279 /** @brief Compute B +=alpha*A, where the matrices A and B are of size
280  @a height x @a width with data @a Adata and @a Bdata. */
281 template<typename TA, typename TB>
282 MFEM_HOST_DEVICE inline
283 void Add(const int height, const int width,
284  const double alpha, const TA *Adata, TB *Bdata)
285 {
286  const int m = height * width;
287  for (int i = 0; i < m; i++)
288  {
289  Bdata[i] += alpha * Adata[i];
290  }
291 }
292 
293 /** @brief Compute B = alpha*A, where the matrices A and B are of size
294  @a height x @a width with data @a Adata and @a Bdata. */
295 template<typename TA, typename TB>
296 MFEM_HOST_DEVICE inline
297 void Set(const int height, const int width,
298  const double alpha, const TA *Adata, TB *Bdata)
299 {
300  const int m = height * width;
301  for (int i = 0; i < m; i++)
302  {
303  Bdata[i] = alpha * Adata[i];
304  }
305 }
306 
307 /** @brief Matrix-matrix multiplication: A = B * C, where the matrices A, B and
308  C are of sizes @a Aheight x @a Awidth, @a Aheight x @a Bwidth and @a Bwidth
309  x @a Awidth, respectively. */
310 template<typename TA, typename TB, typename TC>
311 MFEM_HOST_DEVICE inline
312 void Mult(const int Aheight, const int Awidth, const int Bwidth,
313  const TB *Bdata, const TC *Cdata, TA *Adata)
314 {
315  const int ah_x_aw = Aheight * Awidth;
316  for (int i = 0; i < ah_x_aw; i++) { Adata[i] = 0.0; }
317  for (int j = 0; j < Awidth; j++)
318  {
319  for (int k = 0; k < Bwidth; k++)
320  {
321  for (int i = 0; i < Aheight; i++)
322  {
323  Adata[i+j*Aheight] += Bdata[i+k*Aheight] * Cdata[k+j*Bwidth];
324  }
325  }
326  }
327 }
328 
329 /** @brief Multiply a matrix of size @a Aheight x @a Awidth and data @a Adata
330  with the transpose of a matrix of size @a Bheight x @a Awidth and data @a
331  Bdata: A * Bt. Return the result in a matrix with data @a ABtdata. */
332 template<typename TA, typename TB, typename TC>
333 MFEM_HOST_DEVICE inline
334 void MultABt(const int Aheight, const int Awidth, const int Bheight,
335  const TA *Adata, const TB *Bdata, TC *ABtdata)
336 {
337  const int ah_x_bh = Aheight * Bheight;
338  for (int i = 0; i < ah_x_bh; i++) { ABtdata[i] = 0.0; }
339  for (int k = 0; k < Awidth; k++)
340  {
341  TC *c = ABtdata;
342  for (int j = 0; j < Bheight; j++)
343  {
344  const double bjk = Bdata[j];
345  for (int i = 0; i < Aheight; i++)
346  {
347  c[i] += Adata[i] * bjk;
348  }
349  c += Aheight;
350  }
351  Adata += Aheight;
352  Bdata += Bheight;
353  }
354 }
355 
356 /// Compute the spectrum of the matrix of size dim with given @a data, returning
357 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
358 /// vec (listed consecutively).
359 template<int dim> MFEM_HOST_DEVICE
360 void CalcEigenvalues(const double *data, double *lambda, double *vec);
361 
362 /// Return the i'th singular value of the matrix of size dim with given @a data.
363 template<int dim> MFEM_HOST_DEVICE
364 double CalcSingularvalue(const double *data, const int i);
365 
366 
367 // Utility functions for CalcEigenvalues and CalcSingularvalue
368 namespace internal
369 {
370 
371 /// Utility function to swap the values of @a a and @a b.
372 template<typename T>
373 MFEM_HOST_DEVICE static inline
374 void Swap(T &a, T &b)
375 {
376  T tmp = a;
377  a = b;
378  b = tmp;
379 }
380 
381 const double Epsilon = std::numeric_limits<double>::epsilon();
382 
383 /// Utility function used in CalcSingularvalue<3>.
384 MFEM_HOST_DEVICE static inline
385 void Eigenvalues2S(const double &d12, double &d1, double &d2)
386 {
387  const double sqrt_1_eps = sqrt(1./Epsilon);
388  if (d12 != 0.)
389  {
390  // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
391  double t;
392  const double zeta = (d2 - d1)/(2*d12); // inf/inf from overflows?
393  if (fabs(zeta) < sqrt_1_eps)
394  {
395  t = d12*copysign(1./(fabs(zeta) + sqrt(1. + zeta*zeta)), zeta);
396  }
397  else
398  {
399  t = d12*copysign(0.5/fabs(zeta), zeta);
400  }
401  d1 -= t;
402  d2 += t;
403  }
404 }
405 
406 /// Utility function used in CalcEigenvalues().
407 MFEM_HOST_DEVICE static inline
408 void Eigensystem2S(const double &d12, double &d1, double &d2,
409  double &c, double &s)
410 {
411  const double sqrt_1_eps = sqrt(1./Epsilon);
412  if (d12 == 0.0)
413  {
414  c = 1.;
415  s = 0.;
416  }
417  else
418  {
419  // "The Symmetric Eigenvalue Problem", B. N. Parlett, pp.189-190
420  double t;
421  const double zeta = (d2 - d1)/(2*d12);
422  const double azeta = fabs(zeta);
423  if (azeta < sqrt_1_eps)
424  {
425  t = copysign(1./(azeta + sqrt(1. + zeta*zeta)), zeta);
426  }
427  else
428  {
429  t = copysign(0.5/azeta, zeta);
430  }
431  c = sqrt(1./(1. + t*t));
432  s = c*t;
433  t *= d12;
434  d1 -= t;
435  d2 += t;
436  }
437 }
438 
439 
440 /// Utility function used in CalcEigenvalues<3>.
441 MFEM_HOST_DEVICE static inline
442 void GetScalingFactor(const double &d_max, double &mult)
443 {
444  int d_exp;
445  if (d_max > 0.)
446  {
447  mult = frexp(d_max, &d_exp);
448  if (d_exp == std::numeric_limits<double>::max_exponent)
449  {
450  mult *= std::numeric_limits<double>::radix;
451  }
452  mult = d_max/mult;
453  }
454  else
455  {
456  mult = 1.;
457  }
458  // mult = 2^d_exp is such that d_max/mult is in [0.5,1) or in other words
459  // d_max is in the interval [0.5,1)*mult
460 }
461 
462 /// Utility function used in CalcEigenvalues<3>.
463 MFEM_HOST_DEVICE static inline
464 bool KernelVector2G(const int &mode,
465  double &d1, double &d12, double &d21, double &d2)
466 {
467  // Find a vector (z1,z2) in the "near"-kernel of the matrix
468  // | d1 d12 |
469  // | d21 d2 |
470  // using QR factorization.
471  // The vector (z1,z2) is returned in (d1,d2). Return 'true' if the matrix
472  // is zero without setting (d1,d2).
473  // Note: in the current implementation |z1| + |z2| = 1.
474 
475  // l1-norms of the columns
476  double n1 = fabs(d1) + fabs(d21);
477  double n2 = fabs(d2) + fabs(d12);
478 
479  bool swap_columns = (n2 > n1);
480  double mu;
481 
482  if (!swap_columns)
483  {
484  if (n1 == 0.)
485  {
486  return true;
487  }
488 
489  if (mode == 0) // eliminate the larger entry in the column
490  {
491  if (fabs(d1) > fabs(d21))
492  {
493  Swap(d1, d21);
494  Swap(d12, d2);
495  }
496  }
497  else // eliminate the smaller entry in the column
498  {
499  if (fabs(d1) < fabs(d21))
500  {
501  Swap(d1, d21);
502  Swap(d12, d2);
503  }
504  }
505  }
506  else
507  {
508  // n2 > n1, swap columns 1 and 2
509  if (mode == 0) // eliminate the larger entry in the column
510  {
511  if (fabs(d12) > fabs(d2))
512  {
513  Swap(d1, d2);
514  Swap(d12, d21);
515  }
516  else
517  {
518  Swap(d1, d12);
519  Swap(d21, d2);
520  }
521  }
522  else // eliminate the smaller entry in the column
523  {
524  if (fabs(d12) < fabs(d2))
525  {
526  Swap(d1, d2);
527  Swap(d12, d21);
528  }
529  else
530  {
531  Swap(d1, d12);
532  Swap(d21, d2);
533  }
534  }
535  }
536 
537  n1 = hypot(d1, d21);
538 
539  if (d21 != 0.)
540  {
541  // v = (n1, n2)^t, |v| = 1
542  // Q = I - 2 v v^t, Q (d1, d21)^t = (mu, 0)^t
543  mu = copysign(n1, d1);
544  n1 = -d21*(d21/(d1 + mu)); // = d1 - mu
545  d1 = mu;
546  // normalize (n1,d21) to avoid overflow/underflow
547  // normalize (n1,d21) by the max-norm to avoid the sqrt call
548  if (fabs(n1) <= fabs(d21))
549  {
550  // (n1,n2) <-- (n1/d21,1)
551  n1 = n1/d21;
552  mu = (2./(1. + n1*n1))*(n1*d12 + d2);
553  d2 = d2 - mu;
554  d12 = d12 - mu*n1;
555  }
556  else
557  {
558  // (n1,n2) <-- (1,d21/n1)
559  n2 = d21/n1;
560  mu = (2./(1. + n2*n2))*(d12 + n2*d2);
561  d2 = d2 - mu*n2;
562  d12 = d12 - mu;
563  }
564  }
565 
566  // Solve:
567  // | d1 d12 | | z1 | = | 0 |
568  // | 0 d2 | | z2 | | 0 |
569 
570  // choose (z1,z2) to minimize |d1*z1 + d12*z2| + |d2*z2|
571  // under the condition |z1| + |z2| = 1, z2 >= 0 (for uniqueness)
572  // set t = z1, z2 = 1 - |t|, -1 <= t <= 1
573  // objective function is:
574  // |d1*t + d12*(1 - |t|)| + |d2|*(1 - |t|) -- piecewise linear with
575  // possible minima are -1,0,1,t1 where t1: d1*t1 + d12*(1 - |t1|) = 0
576  // values: @t=+/-1 -> |d1|, @t=0 -> |n1| + |d2|, @t=t1 -> |d2|*(1 - |t1|)
577 
578  // evaluate z2 @t=t1
579  mu = -d12/d1;
580  // note: |mu| <= 1, if using l2-norm for column pivoting
581  // |mu| <= sqrt(2), if using l1-norm
582  n2 = 1./(1. + fabs(mu));
583  // check if |d1|<=|d2|*z2
584  if (fabs(d1) <= n2*fabs(d2))
585  {
586  d2 = 0.;
587  d1 = 1.;
588  }
589  else
590  {
591  d2 = n2;
592  // d1 = (n2 < 0.5) ? copysign(1. - n2, mu) : mu*n2;
593  d1 = mu*n2;
594  }
595 
596  if (swap_columns)
597  {
598  Swap(d1, d2);
599  }
600 
601  return false;
602 }
603 
604 /// Utility function used in CalcEigenvalues<3>.
605 MFEM_HOST_DEVICE static inline
606 void Vec_normalize3_aux(const double &x1, const double &x2,
607  const double &x3,
608  double &n1, double &n2, double &n3)
609 {
610  double t, r;
611 
612  const double m = fabs(x1);
613  r = x2/m;
614  t = 1. + r*r;
615  r = x3/m;
616  t = sqrt(1./(t + r*r));
617  n1 = copysign(t, x1);
618  t /= m;
619  n2 = x2*t;
620  n3 = x3*t;
621 }
622 
623 /// Utility function used in CalcEigenvalues<3>.
624 MFEM_HOST_DEVICE static inline
625 void Vec_normalize3(const double &x1, const double &x2, const double &x3,
626  double &n1, double &n2, double &n3)
627 {
628  // should work ok when xk is the same as nk for some or all k
629  if (fabs(x1) >= fabs(x2))
630  {
631  if (fabs(x1) >= fabs(x3))
632  {
633  if (x1 != 0.)
634  {
635  Vec_normalize3_aux(x1, x2, x3, n1, n2, n3);
636  }
637  else
638  {
639  n1 = n2 = n3 = 0.;
640  }
641  return;
642  }
643  }
644  else if (fabs(x2) >= fabs(x3))
645  {
646  Vec_normalize3_aux(x2, x1, x3, n2, n1, n3);
647  return;
648  }
649  Vec_normalize3_aux(x3, x1, x2, n3, n1, n2);
650 }
651 
652 /// Utility function used in CalcEigenvalues<3>.
653 MFEM_HOST_DEVICE static inline
654 int KernelVector3G_aux(const int &mode,
655  double &d1, double &d2, double &d3,
656  double &c12, double &c13, double &c23,
657  double &c21, double &c31, double &c32)
658 {
659  int kdim;
660  double mu, n1, n2, n3, s1, s2, s3;
661 
662  s1 = hypot(c21, c31);
663  n1 = hypot(d1, s1);
664 
665  if (s1 != 0.)
666  {
667  // v = (s1, s2, s3)^t, |v| = 1
668  // Q = I - 2 v v^t, Q (d1, c12, c13)^t = (mu, 0, 0)^t
669  mu = copysign(n1, d1);
670  n1 = -s1*(s1/(d1 + mu)); // = d1 - mu
671  d1 = mu;
672 
673  // normalize (n1,c21,c31) to avoid overflow/underflow
674  // normalize (n1,c21,c31) by the max-norm to avoid the sqrt call
675  if (fabs(n1) >= fabs(c21))
676  {
677  if (fabs(n1) >= fabs(c31))
678  {
679  // n1 is max, (s1,s2,s3) <-- (1,c21/n1,c31/n1)
680  s2 = c21/n1;
681  s3 = c31/n1;
682  mu = 2./(1. + s2*s2 + s3*s3);
683  n2 = mu*(c12 + s2*d2 + s3*c32);
684  n3 = mu*(c13 + s2*c23 + s3*d3);
685  c12 = c12 - n2;
686  d2 = d2 - s2*n2;
687  c32 = c32 - s3*n2;
688  c13 = c13 - n3;
689  c23 = c23 - s2*n3;
690  d3 = d3 - s3*n3;
691  goto done_column_1;
692  }
693  }
694  else if (fabs(c21) >= fabs(c31))
695  {
696  // c21 is max, (s1,s2,s3) <-- (n1/c21,1,c31/c21)
697  s1 = n1/c21;
698  s3 = c31/c21;
699  mu = 2./(1. + s1*s1 + s3*s3);
700  n2 = mu*(s1*c12 + d2 + s3*c32);
701  n3 = mu*(s1*c13 + c23 + s3*d3);
702  c12 = c12 - s1*n2;
703  d2 = d2 - n2;
704  c32 = c32 - s3*n2;
705  c13 = c13 - s1*n3;
706  c23 = c23 - n3;
707  d3 = d3 - s3*n3;
708  goto done_column_1;
709  }
710  // c31 is max, (s1,s2,s3) <-- (n1/c31,c21/c31,1)
711  s1 = n1/c31;
712  s2 = c21/c31;
713  mu = 2./(1. + s1*s1 + s2*s2);
714  n2 = mu*(s1*c12 + s2*d2 + c32);
715  n3 = mu*(s1*c13 + s2*c23 + d3);
716  c12 = c12 - s1*n2;
717  d2 = d2 - s2*n2;
718  c32 = c32 - n2;
719  c13 = c13 - s1*n3;
720  c23 = c23 - s2*n3;
721  d3 = d3 - n3;
722  }
723 
724 done_column_1:
725 
726  // Solve:
727  // | d2 c23 | | z2 | = | 0 |
728  // | c32 d3 | | z3 | | 0 |
729  if (KernelVector2G(mode, d2, c23, c32, d3))
730  {
731  // Have two solutions:
732  // two vectors in the kernel are P (-c12/d1, 1, 0)^t and
733  // P (-c13/d1, 0, 1)^t where P is the permutation matrix swapping
734  // entries 1 and col.
735 
736  // A vector orthogonal to both these vectors is P (1, c12/d1, c13/d1)^t
737  d2 = c12/d1;
738  d3 = c13/d1;
739  d1 = 1.;
740  kdim = 2;
741  }
742  else
743  {
744  // solve for z1:
745  // note: |z1| <= a since |z2| + |z3| = 1, and
746  // max{|c12|,|c13|} <= max{norm(col. 2),norm(col. 3)}
747  // <= norm(col. 1) <= a |d1|
748  // a = 1, if using l2-norm for column pivoting
749  // a = sqrt(3), if using l1-norm
750  d1 = -(c12*d2 + c13*d3)/d1;
751  kdim = 1;
752  }
753 
754  Vec_normalize3(d1, d2, d3, d1, d2, d3);
755 
756  return kdim;
757 }
758 
759 /// Utility function used in CalcEigenvalues<3>.
760 MFEM_HOST_DEVICE static inline
761 int KernelVector3S(const int &mode, const double &d12,
762  const double &d13, const double &d23,
763  double &d1, double &d2, double &d3)
764 {
765  // Find a unit vector (z1,z2,z3) in the "near"-kernel of the matrix
766  // | d1 d12 d13 |
767  // | d12 d2 d23 |
768  // | d13 d23 d3 |
769  // using QR factorization.
770  // The vector (z1,z2,z3) is returned in (d1,d2,d3).
771  // Returns the dimension of the kernel, kdim, but never zero.
772  // - if kdim == 3, then (d1,d2,d3) is not defined,
773  // - if kdim == 2, then (d1,d2,d3) is a vector orthogonal to the kernel,
774  // - otherwise kdim == 1 and (d1,d2,d3) is a vector in the "near"-kernel.
775 
776  double c12 = d12, c13 = d13, c23 = d23;
777  double c21, c31, c32;
778  int col, row;
779 
780  // l1-norms of the columns:
781  c32 = fabs(d1) + fabs(c12) + fabs(c13);
782  c31 = fabs(d2) + fabs(c12) + fabs(c23);
783  c21 = fabs(d3) + fabs(c13) + fabs(c23);
784 
785  // column pivoting: choose the column with the largest norm
786  if (c32 >= c21)
787  {
788  col = (c32 >= c31) ? 1 : 2;
789  }
790  else
791  {
792  col = (c31 >= c21) ? 2 : 3;
793  }
794  switch (col)
795  {
796  case 1:
797  if (c32 == 0.) // zero matrix
798  {
799  return 3;
800  }
801  break;
802 
803  case 2:
804  if (c31 == 0.) // zero matrix
805  {
806  return 3;
807  }
808  Swap(c13, c23);
809  Swap(d1, d2);
810  break;
811 
812  case 3:
813  if (c21 == 0.) // zero matrix
814  {
815  return 3;
816  }
817  Swap(c12, c23);
818  Swap(d1, d3);
819  }
820 
821  // row pivoting depending on 'mode'
822  if (mode == 0)
823  {
824  if (fabs(d1) <= fabs(c13))
825  {
826  row = (fabs(d1) <= fabs(c12)) ? 1 : 2;
827  }
828  else
829  {
830  row = (fabs(c12) <= fabs(c13)) ? 2 : 3;
831  }
832  }
833  else
834  {
835  if (fabs(d1) >= fabs(c13))
836  {
837  row = (fabs(d1) >= fabs(c12)) ? 1 : 2;
838  }
839  else
840  {
841  row = (fabs(c12) >= fabs(c13)) ? 2 : 3;
842  }
843  }
844  switch (row)
845  {
846  case 1:
847  c21 = c12;
848  c31 = c13;
849  c32 = c23;
850  break;
851 
852  case 2:
853  c21 = d1;
854  c31 = c13;
855  c32 = c23;
856  d1 = c12;
857  c12 = d2;
858  d2 = d1;
859  c13 = c23;
860  c23 = c31;
861  break;
862 
863  case 3:
864  c21 = c12;
865  c31 = d1;
866  c32 = c12;
867  d1 = c13;
868  c12 = c23;
869  c13 = d3;
870  d3 = d1;
871  }
872  row = KernelVector3G_aux(mode, d1, d2, d3, c12, c13, c23, c21, c31, c32);
873  // row is kdim
874 
875  switch (col)
876  {
877  case 2:
878  Swap(d1, d2);
879  break;
880 
881  case 3:
882  Swap(d1, d3);
883  }
884  return row;
885 }
886 
887 /// Utility function used in CalcEigenvalues<3>.
888 MFEM_HOST_DEVICE static inline
889 int Reduce3S(const int &mode,
890  double &d1, double &d2, double &d3,
891  double &d12, double &d13, double &d23,
892  double &z1, double &z2, double &z3,
893  double &v1, double &v2, double &v3,
894  double &g)
895 {
896  // Given the matrix
897  // | d1 d12 d13 |
898  // A = | d12 d2 d23 |
899  // | d13 d23 d3 |
900  // and a unit eigenvector z=(z1,z2,z3), transform the matrix A into the
901  // matrix B = Q P A P Q that has the form
902  // | b1 0 0 |
903  // B = Q P A P Q = | 0 b2 b23 |
904  // | 0 b23 b3 |
905  // where P is the permutation matrix switching entries 1 and k, and
906  // Q is the reflection matrix Q = I - g v v^t, defined by: set y = P z and
907  // v = c(y - e_1); if y = e_1, then v = 0 and Q = I.
908  // Note: Q y = e_1, Q e_1 = y ==> Q P A P Q e_1 = ... = lambda e_1.
909  // The entries (b1,b2,b3,b23) are returned in (d1,d2,d3,d23), and the
910  // return value of the function is k. The variable g = 2/(v1^2+v2^2+v3^3).
911 
912  int k;
913  double s, w1, w2, w3;
914 
915  if (mode == 0)
916  {
917  // choose k such that z^t e_k = zk has the smallest absolute value, i.e.
918  // the angle between z and e_k is closest to pi/2
919  if (fabs(z1) <= fabs(z3))
920  {
921  k = (fabs(z1) <= fabs(z2)) ? 1 : 2;
922  }
923  else
924  {
925  k = (fabs(z2) <= fabs(z3)) ? 2 : 3;
926  }
927  }
928  else
929  {
930  // choose k such that zk is the largest by absolute value
931  if (fabs(z1) >= fabs(z3))
932  {
933  k = (fabs(z1) >= fabs(z2)) ? 1 : 2;
934  }
935  else
936  {
937  k = (fabs(z2) >= fabs(z3)) ? 2 : 3;
938  }
939  }
940  switch (k)
941  {
942  case 2:
943  Swap(d13, d23);
944  Swap(d1, d2);
945  Swap(z1, z2);
946  break;
947 
948  case 3:
949  Swap(d12, d23);
950  Swap(d1, d3);
951  Swap(z1, z3);
952  }
953 
954  s = hypot(z2, z3);
955 
956  if (s == 0.)
957  {
958  // s can not be zero, if zk is the smallest (mode == 0)
959  v1 = v2 = v3 = 0.;
960  g = 1.;
961  }
962  else
963  {
964  g = copysign(1., z1);
965  v1 = -s*(s/(z1 + g)); // = z1 - g
966  // normalize (v1,z2,z3) by its max-norm, avoiding the sqrt call
967  g = fabs(v1);
968  if (fabs(z2) > g) { g = fabs(z2); }
969  if (fabs(z3) > g) { g = fabs(z3); }
970  v1 = v1/g;
971  v2 = z2/g;
972  v3 = z3/g;
973  g = 2./(v1*v1 + v2*v2 + v3*v3);
974 
975  // Compute Q A Q = A - v w^t - w v^t, where
976  // w = u - (g/2)(v^t u) v, and u = g A v
977  // set w = g A v
978  w1 = g*( d1*v1 + d12*v2 + d13*v3);
979  w2 = g*(d12*v1 + d2*v2 + d23*v3);
980  w3 = g*(d13*v1 + d23*v2 + d3*v3);
981  // w := w - (g/2)(v^t w) v
982  s = (g/2)*(v1*w1 + v2*w2 + v3*w3);
983  w1 -= s*v1;
984  w2 -= s*v2;
985  w3 -= s*v3;
986  // dij -= vi*wj + wi*vj
987  d1 -= 2*v1*w1;
988  d2 -= 2*v2*w2;
989  d23 -= v2*w3 + v3*w2;
990  d3 -= 2*v3*w3;
991  // compute the off-diagonal entries on the first row/column of B which
992  // should be zero (for debugging):
993 #if 0
994  s = d12 - v1*w2 - v2*w1; // b12 = 0
995  s = d13 - v1*w3 - v3*w1; // b13 = 0
996 #endif
997  }
998 
999  switch (k)
1000  {
1001  case 2:
1002  Swap(z1, z2);
1003  break;
1004  case 3:
1005  Swap(z1, z3);
1006  }
1007  return k;
1008 }
1009 
1010 } // namespace kernels::internal
1011 
1012 
1013 // Implementations of CalcEigenvalues and CalcSingularvalue for dim = 2, 3.
1014 
1015 /// Compute the spectrum of the matrix of size 2 with given @a data, returning
1016 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1017 /// vec (listed consecutively).
1018 template<> MFEM_HOST_DEVICE inline
1019 void CalcEigenvalues<2>(const double *data, double *lambda, double *vec)
1020 {
1021  double d0 = data[0];
1022  double d2 = data[2]; // use the upper triangular entry
1023  double d3 = data[3];
1024  double c, s;
1025  internal::Eigensystem2S(d2, d0, d3, c, s);
1026  if (d0 <= d3)
1027  {
1028  lambda[0] = d0;
1029  lambda[1] = d3;
1030  vec[0] = c;
1031  vec[1] = -s;
1032  vec[2] = s;
1033  vec[3] = c;
1034  }
1035  else
1036  {
1037  lambda[0] = d3;
1038  lambda[1] = d0;
1039  vec[0] = s;
1040  vec[1] = c;
1041  vec[2] = c;
1042  vec[3] = -s;
1043  }
1044 }
1045 
1046 /// Compute the spectrum of the matrix of size 3 with given @a data, returning
1047 /// the eigenvalues in the array @a lambda and the eigenvectors in the array @a
1048 /// vec (listed consecutively).
1049 template<> MFEM_HOST_DEVICE inline
1050 void CalcEigenvalues<3>(const double *data, double *lambda, double *vec)
1051 {
1052  double d11 = data[0];
1053  double d12 = data[3]; // use the upper triangular entries
1054  double d22 = data[4];
1055  double d13 = data[6];
1056  double d23 = data[7];
1057  double d33 = data[8];
1058 
1059  double mult;
1060  {
1061  double d_max = fabs(d11);
1062  if (d_max < fabs(d22)) { d_max = fabs(d22); }
1063  if (d_max < fabs(d33)) { d_max = fabs(d33); }
1064  if (d_max < fabs(d12)) { d_max = fabs(d12); }
1065  if (d_max < fabs(d13)) { d_max = fabs(d13); }
1066  if (d_max < fabs(d23)) { d_max = fabs(d23); }
1067 
1068  internal::GetScalingFactor(d_max, mult);
1069  }
1070 
1071  d11 /= mult; d22 /= mult; d33 /= mult;
1072  d12 /= mult; d13 /= mult; d23 /= mult;
1073 
1074  double aa = (d11 + d22 + d33)/3; // aa = tr(A)/3
1075  double c1 = d11 - aa;
1076  double c2 = d22 - aa;
1077  double c3 = d33 - aa;
1078 
1079  double Q, R;
1080 
1081  Q = (2*(d12*d12 + d13*d13 + d23*d23) + c1*c1 + c2*c2 + c3*c3)/6;
1082  R = (c1*(d23*d23 - c2*c3)+ d12*(d12*c3 - 2*d13*d23) + d13*d13*c2)/2;
1083 
1084  if (Q <= 0.)
1085  {
1086  lambda[0] = lambda[1] = lambda[2] = aa;
1087  vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1088  vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1089  vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1090  }
1091  else
1092  {
1093  double sqrtQ = sqrt(Q);
1094  double sqrtQ3 = Q*sqrtQ;
1095  // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1096  // double sqrtQ3 = pow(Q, 1.5);
1097  double r;
1098  if (fabs(R) >= sqrtQ3)
1099  {
1100  if (R < 0.)
1101  {
1102  // R = -1.;
1103  r = 2*sqrtQ;
1104  }
1105  else
1106  {
1107  // R = 1.;
1108  r = -2*sqrtQ;
1109  }
1110  }
1111  else
1112  {
1113  R = R/sqrtQ3;
1114 
1115  if (R < 0.)
1116  {
1117  r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1118  }
1119  else
1120  {
1121  r = -2*sqrtQ*cos(acos(R)/3); // min
1122  }
1123  }
1124 
1125  aa += r;
1126  c1 = d11 - aa;
1127  c2 = d22 - aa;
1128  c3 = d33 - aa;
1129 
1130  // Type of Householder reflections: z --> mu ek, where k is the index
1131  // of the entry in z with:
1132  // mode == 0: smallest absolute value --> angle closest to pi/2
1133  // mode == 1: largest absolute value --> angle farthest from pi/2
1134  // Observations:
1135  // mode == 0 produces better eigenvectors, less accurate eigenvalues?
1136  // mode == 1 produces better eigenvalues, less accurate eigenvectors?
1137  const int mode = 0;
1138 
1139  // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1140  // | c1 d12 d13 |
1141  // | d12 c2 d23 | = A - aa*I
1142  // | d13 d23 c3 |
1143  // This vector is also an eigenvector for A corresponding to aa.
1144  // The vector z overwrites (c1,c2,c3).
1145  switch (internal::KernelVector3S(mode, d12, d13, d23, c1, c2, c3))
1146  {
1147  case 3:
1148  // 'aa' is a triple eigenvalue
1149  lambda[0] = lambda[1] = lambda[2] = aa;
1150  vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1151  vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1152  vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1153  goto done_3d;
1154 
1155  case 2:
1156  // ok, continue with the returned vector orthogonal to the kernel
1157  case 1:
1158  // ok, continue with the returned vector in the "near"-kernel
1159  ;
1160  }
1161 
1162  // Using the eigenvector c=(c1,c2,c3) transform A into
1163  // | d11 0 0 |
1164  // A <-- Q P A P Q = | 0 d22 d23 |
1165  // | 0 d23 d33 |
1166  double v1, v2, v3, g;
1167  int k = internal::Reduce3S(mode, d11, d22, d33, d12, d13, d23,
1168  c1, c2, c3, v1, v2, v3, g);
1169  // Q = I - 2 v v^t
1170  // P - permutation matrix switching entries 1 and k
1171 
1172  // find the eigenvalues and eigenvectors for
1173  // | d22 d23 |
1174  // | d23 d33 |
1175  double c, s;
1176  internal::Eigensystem2S(d23, d22, d33, c, s);
1177  // d22 <-> P Q (0, c, -s), d33 <-> P Q (0, s, c)
1178 
1179  double *vec_1, *vec_2, *vec_3;
1180  if (d11 <= d22)
1181  {
1182  if (d22 <= d33)
1183  {
1184  lambda[0] = d11; vec_1 = vec;
1185  lambda[1] = d22; vec_2 = vec + 3;
1186  lambda[2] = d33; vec_3 = vec + 6;
1187  }
1188  else if (d11 <= d33)
1189  {
1190  lambda[0] = d11; vec_1 = vec;
1191  lambda[1] = d33; vec_3 = vec + 3;
1192  lambda[2] = d22; vec_2 = vec + 6;
1193  }
1194  else
1195  {
1196  lambda[0] = d33; vec_3 = vec;
1197  lambda[1] = d11; vec_1 = vec + 3;
1198  lambda[2] = d22; vec_2 = vec + 6;
1199  }
1200  }
1201  else
1202  {
1203  if (d11 <= d33)
1204  {
1205  lambda[0] = d22; vec_2 = vec;
1206  lambda[1] = d11; vec_1 = vec + 3;
1207  lambda[2] = d33; vec_3 = vec + 6;
1208  }
1209  else if (d22 <= d33)
1210  {
1211  lambda[0] = d22; vec_2 = vec;
1212  lambda[1] = d33; vec_3 = vec + 3;
1213  lambda[2] = d11; vec_1 = vec + 6;
1214  }
1215  else
1216  {
1217  lambda[0] = d33; vec_3 = vec;
1218  lambda[1] = d22; vec_2 = vec + 3;
1219  lambda[2] = d11; vec_1 = vec + 6;
1220  }
1221  }
1222 
1223  vec_1[0] = c1;
1224  vec_1[1] = c2;
1225  vec_1[2] = c3;
1226  d22 = g*(v2*c - v3*s);
1227  d33 = g*(v2*s + v3*c);
1228  vec_2[0] = - v1*d22; vec_3[0] = - v1*d33;
1229  vec_2[1] = c - v2*d22; vec_3[1] = s - v2*d33;
1230  vec_2[2] = -s - v3*d22; vec_3[2] = c - v3*d33;
1231  switch (k)
1232  {
1233  case 2:
1234  internal::Swap(vec_2[0], vec_2[1]);
1235  internal::Swap(vec_3[0], vec_3[1]);
1236  break;
1237 
1238  case 3:
1239  internal::Swap(vec_2[0], vec_2[2]);
1240  internal::Swap(vec_3[0], vec_3[2]);
1241  }
1242  }
1243 
1244 done_3d:
1245  lambda[0] *= mult;
1246  lambda[1] *= mult;
1247  lambda[2] *= mult;
1248 }
1249 
1250 /// Return the i'th singular value of the matrix of size 2 with given @a data.
1251 template<> MFEM_HOST_DEVICE inline
1252 double CalcSingularvalue<2>(const double *data, const int i)
1253 {
1254  double d0, d1, d2, d3;
1255  d0 = data[0];
1256  d1 = data[1];
1257  d2 = data[2];
1258  d3 = data[3];
1259  double mult;
1260 
1261  {
1262  double d_max = fabs(d0);
1263  if (d_max < fabs(d1)) { d_max = fabs(d1); }
1264  if (d_max < fabs(d2)) { d_max = fabs(d2); }
1265  if (d_max < fabs(d3)) { d_max = fabs(d3); }
1266  internal::GetScalingFactor(d_max, mult);
1267  }
1268 
1269  d0 /= mult;
1270  d1 /= mult;
1271  d2 /= mult;
1272  d3 /= mult;
1273 
1274  double t = 0.5*((d0+d2)*(d0-d2)+(d1-d3)*(d1+d3));
1275  double s = d0*d2 + d1*d3;
1276  s = sqrt(0.5*(d0*d0 + d1*d1 + d2*d2 + d3*d3) + sqrt(t*t + s*s));
1277 
1278  if (s == 0.0)
1279  {
1280  return 0.0;
1281  }
1282  t = fabs(d0*d3 - d1*d2) / s;
1283  if (t > s)
1284  {
1285  if (i == 0)
1286  {
1287  return t*mult;
1288  }
1289  return s*mult;
1290  }
1291  if (i == 0)
1292  {
1293  return s*mult;
1294  }
1295  return t*mult;
1296 }
1297 
1298 /// Return the i'th singular value of the matrix of size 3 with given @a data.
1299 template<> MFEM_HOST_DEVICE inline
1300 double CalcSingularvalue<3>(const double *data, const int i)
1301 {
1302  double d0, d1, d2, d3, d4, d5, d6, d7, d8;
1303  d0 = data[0]; d3 = data[3]; d6 = data[6];
1304  d1 = data[1]; d4 = data[4]; d7 = data[7];
1305  d2 = data[2]; d5 = data[5]; d8 = data[8];
1306  double mult;
1307  {
1308  double d_max = fabs(d0);
1309  if (d_max < fabs(d1)) { d_max = fabs(d1); }
1310  if (d_max < fabs(d2)) { d_max = fabs(d2); }
1311  if (d_max < fabs(d3)) { d_max = fabs(d3); }
1312  if (d_max < fabs(d4)) { d_max = fabs(d4); }
1313  if (d_max < fabs(d5)) { d_max = fabs(d5); }
1314  if (d_max < fabs(d6)) { d_max = fabs(d6); }
1315  if (d_max < fabs(d7)) { d_max = fabs(d7); }
1316  if (d_max < fabs(d8)) { d_max = fabs(d8); }
1317  internal::GetScalingFactor(d_max, mult);
1318  }
1319 
1320  d0 /= mult; d1 /= mult; d2 /= mult;
1321  d3 /= mult; d4 /= mult; d5 /= mult;
1322  d6 /= mult; d7 /= mult; d8 /= mult;
1323 
1324  double b11 = d0*d0 + d1*d1 + d2*d2;
1325  double b12 = d0*d3 + d1*d4 + d2*d5;
1326  double b13 = d0*d6 + d1*d7 + d2*d8;
1327  double b22 = d3*d3 + d4*d4 + d5*d5;
1328  double b23 = d3*d6 + d4*d7 + d5*d8;
1329  double b33 = d6*d6 + d7*d7 + d8*d8;
1330 
1331  // double a, b, c;
1332  // a = -(b11 + b22 + b33);
1333  // b = b11*(b22 + b33) + b22*b33 - b12*b12 - b13*b13 - b23*b23;
1334  // c = b11*(b23*b23 - b22*b33) + b12*(b12*b33 - 2*b13*b23) + b13*b13*b22;
1335 
1336  // double Q = (a * a - 3 * b) / 9;
1337  // double Q = (b12*b12 + b13*b13 + b23*b23 +
1338  // ((b11 - b22)*(b11 - b22) +
1339  // (b11 - b33)*(b11 - b33) +
1340  // (b22 - b33)*(b22 - b33))/6)/3;
1341  // Q = (3*(b12^2 + b13^2 + b23^2) +
1342  // ((b11 - b22)^2 + (b11 - b33)^2 + (b22 - b33)^2)/2)/9
1343  // or
1344  // Q = (1/6)*|B-tr(B)/3|_F^2
1345  // Q >= 0 and
1346  // Q = 0 <==> B = scalar * I
1347  // double R = (2 * a * a * a - 9 * a * b + 27 * c) / 54;
1348  double aa = (b11 + b22 + b33)/3; // aa = tr(B)/3
1349  double c1, c2, c3;
1350  // c1 = b11 - aa; // ((b11 - b22) + (b11 - b33))/3
1351  // c2 = b22 - aa; // ((b22 - b11) + (b22 - b33))/3
1352  // c3 = b33 - aa; // ((b33 - b11) + (b33 - b22))/3
1353  {
1354  double b11_b22 = ((d0-d3)*(d0+d3)+(d1-d4)*(d1+d4)+(d2-d5)*(d2+d5));
1355  double b22_b33 = ((d3-d6)*(d3+d6)+(d4-d7)*(d4+d7)+(d5-d8)*(d5+d8));
1356  double b33_b11 = ((d6-d0)*(d6+d0)+(d7-d1)*(d7+d1)+(d8-d2)*(d8+d2));
1357  c1 = (b11_b22 - b33_b11)/3;
1358  c2 = (b22_b33 - b11_b22)/3;
1359  c3 = (b33_b11 - b22_b33)/3;
1360  }
1361  double Q, R;
1362  Q = (2*(b12*b12 + b13*b13 + b23*b23) + c1*c1 + c2*c2 + c3*c3)/6;
1363  R = (c1*(b23*b23 - c2*c3)+ b12*(b12*c3 - 2*b13*b23) +b13*b13*c2)/2;
1364  // R = (-1/2)*det(B-(tr(B)/3)*I)
1365  // Note: 54*(det(S))^2 <= |S|_F^6, when S^t=S and tr(S)=0, S is 3x3
1366  // Therefore: R^2 <= Q^3
1367 
1368  if (Q <= 0.) { ; }
1369 
1370  // else if (fabs(R) >= sqrtQ3)
1371  // {
1372  // double det = (d[0] * (d[4] * d[8] - d[5] * d[7]) +
1373  // d[3] * (d[2] * d[7] - d[1] * d[8]) +
1374  // d[6] * (d[1] * d[5] - d[2] * d[4]));
1375  //
1376  // if (R > 0.)
1377  // {
1378  // if (i == 2)
1379  // // aa -= 2*sqrtQ;
1380  // return fabs(det)/(aa + sqrtQ);
1381  // else
1382  // aa += sqrtQ;
1383  // }
1384  // else
1385  // {
1386  // if (i != 0)
1387  // aa -= sqrtQ;
1388  // // aa = fabs(det)/sqrt(aa + 2*sqrtQ);
1389  // else
1390  // aa += 2*sqrtQ;
1391  // }
1392  // }
1393 
1394  else
1395  {
1396  double sqrtQ = sqrt(Q);
1397  double sqrtQ3 = Q*sqrtQ;
1398  // double sqrtQ3 = sqrtQ*sqrtQ*sqrtQ;
1399  // double sqrtQ3 = pow(Q, 1.5);
1400  double r;
1401 
1402  if (fabs(R) >= sqrtQ3)
1403  {
1404  if (R < 0.)
1405  {
1406  // R = -1.;
1407  r = 2*sqrtQ;
1408  }
1409  else
1410  {
1411  // R = 1.;
1412  r = -2*sqrtQ;
1413  }
1414  }
1415  else
1416  {
1417  R = R/sqrtQ3;
1418 
1419  // if (fabs(R) <= 0.95)
1420  if (fabs(R) <= 0.9)
1421  {
1422  if (i == 2)
1423  {
1424  aa -= 2*sqrtQ*cos(acos(R)/3); // min
1425  }
1426  else if (i == 0)
1427  {
1428  aa -= 2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1429  }
1430  else
1431  {
1432  aa -= 2*sqrtQ*cos((acos(R) - 2.0*M_PI)/3); // mid
1433  }
1434  goto have_aa;
1435  }
1436 
1437  if (R < 0.)
1438  {
1439  r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3); // max
1440  if (i == 0)
1441  {
1442  aa += r;
1443  goto have_aa;
1444  }
1445  }
1446  else
1447  {
1448  r = -2*sqrtQ*cos(acos(R)/3); // min
1449  if (i == 2)
1450  {
1451  aa += r;
1452  goto have_aa;
1453  }
1454  }
1455  }
1456 
1457  // (tr(B)/3 + r) is the root which is separated from the other
1458  // two roots which are close to each other when |R| is close to 1
1459 
1460  c1 -= r;
1461  c2 -= r;
1462  c3 -= r;
1463  // aa += r;
1464 
1465  // Type of Householder reflections: z --> mu ek, where k is the index
1466  // of the entry in z with:
1467  // mode == 0: smallest absolute value --> angle closest to pi/2
1468  // (eliminate large entries)
1469  // mode == 1: largest absolute value --> angle farthest from pi/2
1470  // (eliminate small entries)
1471  const int mode = 1;
1472 
1473  // Find a unit vector z = (z1,z2,z3) in the "near"-kernel of
1474  // | c1 b12 b13 |
1475  // | b12 c2 b23 | = B - aa*I
1476  // | b13 b23 c3 |
1477  // This vector is also an eigenvector for B corresponding to aa
1478  // The vector z overwrites (c1,c2,c3).
1479  switch (internal::KernelVector3S(mode, b12, b13, b23, c1, c2, c3))
1480  {
1481  case 3:
1482  aa += r;
1483  goto have_aa;
1484  case 2:
1485  // ok, continue with the returned vector orthogonal to the kernel
1486  case 1:
1487  // ok, continue with the returned vector in the "near"-kernel
1488  ;
1489  }
1490 
1491  // Using the eigenvector c = (c1,c2,c3) to transform B into
1492  // | b11 0 0 |
1493  // B <-- Q P B P Q = | 0 b22 b23 |
1494  // | 0 b23 b33 |
1495  double v1, v2, v3, g;
1496  internal::Reduce3S(mode, b11, b22, b33, b12, b13, b23,
1497  c1, c2, c3, v1, v2, v3, g);
1498  // Q = I - g v v^t
1499  // P - permutation matrix switching rows and columns 1 and k
1500 
1501  // find the eigenvalues of
1502  // | b22 b23 |
1503  // | b23 b33 |
1504  internal::Eigenvalues2S(b23, b22, b33);
1505 
1506  if (i == 2)
1507  {
1508  aa = fmin(fmin(b11, b22), b33);
1509  }
1510  else if (i == 1)
1511  {
1512  if (b11 <= b22)
1513  {
1514  aa = (b22 <= b33) ? b22 : fmax(b11, b33);
1515  }
1516  else
1517  {
1518  aa = (b11 <= b33) ? b11 : fmax(b33, b22);
1519  }
1520  }
1521  else
1522  {
1523  aa = fmax(fmax(b11, b22), b33);
1524  }
1525  }
1526 
1527 have_aa:
1528 
1529  return sqrt(fabs(aa))*mult; // take abs before we sort?
1530 }
1531 
1532 
1533 /// Assuming L.U = P.A for a factored matrix (m x m),
1534 // compute x <- A x
1535 //
1536 // @param [in] data LU factorization of A
1537 // @param [in] m square matrix height
1538 // @param [in] ipiv array storing pivot information
1539 // @param [in, out] x vector storing right-hand side and then solution
1540 MFEM_HOST_DEVICE
1541 inline void LUSolve(const double *data, const int m, const int *ipiv,
1542  double *x)
1543 {
1544  // X <- P X
1545  for (int i = 0; i < m; i++)
1546  {
1547  internal::Swap<double>(x[i], x[ipiv[i]]);
1548  }
1549 
1550  // X <- L^{-1} X
1551  for (int j = 0; j < m; j++)
1552  {
1553  const double x_j = x[j];
1554  for (int i = j + 1; i < m; i++)
1555  {
1556  x[i] -= data[i + j * m] * x_j;
1557  }
1558  }
1559 
1560  // X <- U^{-1} X
1561  for (int j = m - 1; j >= 0; j--)
1562  {
1563  const double x_j = (x[j] /= data[j + j * m]);
1564  for (int i = 0; i < j; i++)
1565  {
1566  x[i] -= data[i + j * m] * x_j;
1567  }
1568  }
1569 }
1570 
1571 } // namespace kernels
1572 
1573 } // namespace mfem
1574 
1575 #endif // MFEM_LINALG_KERNELS_HPP
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition: kernels.hpp:208
MFEM_HOST_DEVICE double DistanceSquared(const double *x, const double *y)
Compute the square of the Euclidean distance to another vector.
Definition: kernels.hpp:40
MFEM_HOST_DEVICE void FNorm(double &scale_factor, double &scaled_fnorm2, const T *data)
Definition: kernels.hpp:79
MFEM_HOST_DEVICE void MultABt(const int Aheight, const int Awidth, const int Bheight, const TA *Adata, const TB *Bdata, TC *ABtdata)
Multiply a matrix of size Aheight x Awidth and data Adata with the transpose of a matrix of size Bhei...
Definition: kernels.hpp:334
double epsilon
Definition: ex25.cpp:140
MFEM_HOST_DEVICE void Diag(const double c, double *data)
Creates n x n diagonal matrix with diagonal elements c.
Definition: kernels.hpp:49
MFEM_HOST_DEVICE void Add(const int height, const int width, const TALPHA alpha, const TA *Adata, const TB *Bdata, TC *Cdata)
Compute C = A + alpha*B, where the matrices A, B and C are of size height x width with data Adata...
Definition: kernels.hpp:237
MFEM_HOST_DEVICE double CalcSingularvalue< 3 >(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size 3 with given data.
Definition: kernels.hpp:1300
MFEM_HOST_DEVICE void CalcEigenvalues(const double *data, double *lambda, double *vec)
MFEM_HOST_DEVICE void CalcEigenvalues< 2 >(const double *data, double *lambda, double *vec)
Definition: kernels.hpp:1019
double b
Definition: lissajous.cpp:42
MFEM_HOST_DEVICE void LUSolve(const double *data, const int m, const int *ipiv, double *x)
Assuming L.U = P.A for a factored matrix (m x m),.
Definition: kernels.hpp:1541
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition: kernels.hpp:133
MFEM_HOST_DEVICE void CalcAdjugate(const T *data, T *adj_data)
Return the adjugate of a matrix.
Definition: kernels.hpp:227
void Swap(Array< T > &, Array< T > &)
Definition: array.hpp:625
MFEM_HOST_DEVICE double CalcSingularvalue(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size dim with given data.
MFEM_HOST_DEVICE double CalcSingularvalue< 2 >(const double *data, const int i)
Return the i&#39;th singular value of the matrix of size 2 with given data.
Definition: kernels.hpp:1252
double a
Definition: lissajous.cpp:41
MFEM_HOST_DEVICE void CalcEigenvalues< 3 >(const double *data, double *lambda, double *vec)
Definition: kernels.hpp:1050
double mu
Definition: ex25.cpp:139
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data...
Definition: kernels.hpp:217
int dim
Definition: ex24.cpp:53
MFEM_HOST_DEVICE void Subtract(const double a, const double *x, const double *y, double *z)
Vector subtraction operation: z = a * (x - y)
Definition: kernels.hpp:58
RefCoord t[3]
MFEM_HOST_DEVICE void Set(const int height, const int width, const double alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata...
Definition: kernels.hpp:297
const double alpha
Definition: ex15.cpp:369
MFEM_HOST_DEVICE void Mult(const int height, const int width, TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data...
Definition: kernels.hpp:163
RefCoord s[3]
MFEM_HOST_DEVICE void AddMultVWt(const double *v, const double *w, double *VWt)
Dense matrix operation: VWt += v w^t.
Definition: kernels.hpp:67
MFEM_HOST_DEVICE void Symmetrize(const int size, T *data)
Symmetrize a square matrix with given size and data: A -&gt; (A+A^T)/2.
Definition: kernels.hpp:194
MFEM_HOST_DEVICE double FNorm2(const T *data)
Compute the square of the Frobenius norm of the matrix.
Definition: kernels.hpp:123