12#ifndef MFEM_LINALG_KERNELS_HPP
13#define MFEM_LINALG_KERNELS_HPP
40 for (
int i = 0; i <
dim; i++) { d += (x[i]-y[i])*(x[i]-y[i]); }
49 for (
int i = 0; i < N; i++) { data[i] = 0.0; }
50 for (
int i = 0; i <
dim; i++) { data[i*(
dim+1)] = c; }
59 for (
int i = 0; i <
dim; i++) { z[i] =
a * (x[i] - y[i]); }
67 for (
int i = 0; i <
dim; i++)
70 for (
int j = 0; j <
dim; j++) { VWt[i*
dim+j] += vi * w[j]; }
74template<
int H,
int W,
typename T>
75MFEM_HOST_DEVICE
inline
79 T max_norm = 0.0, entry, fnorm2;
81 for (i = 0; i < hw; i++)
83 entry = fabs(data[i]);
92 scale_factor = scaled_fnorm2 = 0.0;
97 for (i = 0; i < hw; i++)
99 entry = data[i] / max_norm;
100 fnorm2 += entry * entry;
103 scale_factor = max_norm;
104 scaled_fnorm2 = fnorm2;
108template<
int H,
int W,
typename T>
109MFEM_HOST_DEVICE
inline
118template<
int H,
int W,
typename T>
119MFEM_HOST_DEVICE
inline
129MFEM_HOST_DEVICE
inline
132 if (0 == size) {
return 0.0; }
133 if (1 == size) {
return std::abs(data[0]); }
136 for (
int i = 0; i < size; i++)
140 const T absdata = fabs(data[i]);
141 if (scale <= absdata)
143 const T sqr_arg = scale / absdata;
144 sum = 1.0 + sum * (sqr_arg * sqr_arg);
148 const T sqr_arg = absdata / scale;
149 sum += (sqr_arg * sqr_arg);
152 return scale * sqrt(sum);
158template<
typename TA,
typename TX,
typename TY>
159MFEM_HOST_DEVICE
inline
160void Mult(
const int height,
const int width,
const TA *data,
const TX *x, TY *y)
164 for (
int row = 0; row < height; row++)
170 const TA *d_col = data;
172 for (
int row = 0; row < height; row++)
174 y[row] = x_col*d_col[row];
177 for (
int col = 1; col < width; col++)
180 for (
int row = 0; row < height; row++)
182 y[row] += x_col*d_col[row];
191template<
typename TA,
typename TX,
typename TY>
192MFEM_HOST_DEVICE
inline
193void AbsMult(
const int height,
const int width,
const TA *data,
198 for (
int row = 0; row < height; row++)
204 const TA *d_col = data;
206 for (
int row = 0; row < height; row++)
208 y[row] = x_col*std::fabs(d_col[row]);
211 for (
int col = 1; col < width; col++)
214 for (
int row = 0; row < height; row++)
216 y[row] += x_col*std::fabs(d_col[row]);
225template<
typename TA,
typename TX,
typename TY>
226MFEM_HOST_DEVICE
inline
232 for (
int row = 0; row < width; row++)
239 for (
int i = 0; i < width; ++i)
242 for (
int j = 0; j < height; ++j)
244 val += x[j] * data[i * height + j];
254template<
typename TA,
typename TX,
typename TY>
255MFEM_HOST_DEVICE
inline
261 for (
int row = 0; row < width; row++)
268 for (
int i = 0; i < width; ++i)
271 for (
int j = 0; j < height; ++j)
273 val += x[j] * std::fabs(data[i * height + j]);
282MFEM_HOST_DEVICE
inline
285 for (
int i = 0; i < size; i++)
287 for (
int j = 0; j < i; j++)
289 const T
a = 0.5 * (data[i*size+j] + data[j*size+i]);
290 data[j*size+i] = data[i*size+j] =
a;
296template<
int dim,
typename T>
297MFEM_HOST_DEVICE
inline T
Det(
const T *data)
304template<
int dim,
typename T>
305MFEM_HOST_DEVICE
inline
309 const T det =
TAdjDetHD<T>(layout_t(), data, layout_t(), inv_data);
314template<
int dim,
typename T>
315MFEM_HOST_DEVICE
inline
324template<
typename TALPHA,
typename TA,
typename TB,
typename TC>
325MFEM_HOST_DEVICE
inline
326void Add(
const int height,
const int width,
const TALPHA
alpha,
327 const TA *Adata,
const TB *Bdata, TC *Cdata)
329 for (
int j = 0; j < width; j++)
331 for (
int i = 0; i < height; i++)
333 const int n = i*width+j;
334 Cdata[n] = Adata[n] +
alpha * Bdata[n];
341template<
typename TALPHA,
typename TBETA,
typename TA,
typename TB,
typename TC>
342MFEM_HOST_DEVICE
inline
343void Add(
const int height,
const int width,
344 const TALPHA
alpha,
const TA *Adata,
345 const TBETA beta,
const TB *Bdata,
348 const int m = height * width;
349 for (
int i = 0; i < m; i++)
351 Cdata[i] =
alpha * Adata[i] + beta * Bdata[i];
357template<
typename TA,
typename TB>
358MFEM_HOST_DEVICE
inline
359void Add(
const int height,
const int width,
const TA *Adata, TB *Bdata)
361 const int m = height * width;
362 for (
int i = 0; i < m; i++)
364 Bdata[i] += Adata[i];
370template<
typename TA,
typename TB>
371MFEM_HOST_DEVICE
inline
372void Add(
const int height,
const int width,
375 const int m = height * width;
376 for (
int i = 0; i < m; i++)
378 Bdata[i] +=
alpha * Adata[i];
384template<
typename TA,
typename TB>
385MFEM_HOST_DEVICE
inline
386void Set(
const int height,
const int width,
389 const int m = height * width;
390 for (
int i = 0; i < m; i++)
392 Bdata[i] =
alpha * Adata[i];
399template<
typename TA,
typename TB,
typename TC>
400MFEM_HOST_DEVICE
inline
401void AddMult(
const int Aheight,
const int Awidth,
const int Bwidth,
402 const TB *Bdata,
const TC *Cdata, TA *Adata,
const TB
alpha,
405 const int ah_x_aw = Aheight * Awidth;
408 for (
int i = 0; i < ah_x_aw; i++) { Adata[i] = 0.0; }
410 else if (beta != 1.0)
412 for (
int i = 0; i < ah_x_aw; i++) { Adata[i] *= beta; }
414 for (
int j = 0; j < Awidth; j++)
416 for (
int k = 0; k < Bwidth; k++)
419 for (
int i = 0; i < Aheight; i++)
421 Adata[i+j*Aheight] += val * Bdata[i+k*Aheight];
430template<
typename TA,
typename TB,
typename TC>
431MFEM_HOST_DEVICE
inline
432void Mult(
const int Aheight,
const int Awidth,
const int Bwidth,
433 const TB *Bdata,
const TC *Cdata, TA *Adata)
435 AddMult(Aheight, Awidth, Bwidth, Bdata, Cdata, Adata, TB(1.0), TA(0.0));
441template<
typename TA,
typename TB,
typename TC>
442MFEM_HOST_DEVICE
inline
443void MultABt(
const int Aheight,
const int Awidth,
const int Bheight,
444 const TA *Adata,
const TB *Bdata, TC *ABtdata)
446 const int ah_x_bh = Aheight * Bheight;
447 for (
int i = 0; i < ah_x_bh; i++) { ABtdata[i] = 0.0; }
448 for (
int k = 0; k < Awidth; k++)
451 for (
int j = 0; j < Bheight; j++)
453 const real_t bjk = Bdata[j];
454 for (
int i = 0; i < Aheight; i++)
456 c[i] += Adata[i] * bjk;
469template<
typename TA,
typename TB,
typename TC>
470MFEM_HOST_DEVICE
inline
471void AddMultAtB(
const int Aheight,
const int Awidth,
const int Bwidth,
472 const TA *Adata,
const TB *Bdata, TC *Cdata,
const TB
alpha,
475 const int aw_x_bw = Awidth * Bwidth;
479 for (
int i = 0; i < aw_x_bw; i++) { Cdata[i] = 0.0; }
481 else if (beta != 1.0)
483 for (
int i = 0; i < aw_x_bw; i++) { Cdata[i] *= beta; }
487 for (
int i = 0; i < Bwidth; ++i)
489 for (
int j = 0; j < Awidth; ++j)
492 for (
int k = 0; k < Aheight; ++k)
494 val +=
alpha * Adata[j * Aheight + k] * Bdata[i * Aheight + k];
505template<
typename TA,
typename TB,
typename TC>
506MFEM_HOST_DEVICE
inline
507void MultAtB(
const int Aheight,
const int Awidth,
const int Bwidth,
508 const TA *Adata,
const TB *Bdata, TC *AtBdata)
510 AddMultAtB(Aheight, Awidth, Bwidth, Adata, Bdata, AtBdata, TB(1.0), TA(0.0));
516template<
typename TA,
typename TB,
typename TC>
517MFEM_HOST_DEVICE
inline
518void AddMultAtB(
const int Aheight,
const int Awidth,
const int Bwidth,
519 const TA *Adata,
const TB *Bdata, TC *AtBdata)
522 for (
int i = 0; i < Bwidth; ++i)
524 for (
int j = 0; j < Awidth; ++j)
527 for (
int k = 0; k < Aheight; ++k)
529 val += Adata[j * Aheight + k] * Bdata[i * Aheight + k];
538template<
int HEIGHT,
int WIDTH> MFEM_HOST_DEVICE
544template<
int dim> MFEM_HOST_DEVICE
548template<
int dim> MFEM_HOST_DEVICE
558MFEM_HOST_DEVICE
static inline
566const real_t Epsilon = std::numeric_limits<real_t>::epsilon();
569MFEM_HOST_DEVICE
static inline
572 const real_t sqrt_1_eps = sqrt(1./Epsilon);
577 const real_t zeta = (d2 - d1)/(2*d12);
578 if (fabs(zeta) < sqrt_1_eps)
580 t = d12*copysign(1./(fabs(zeta) + sqrt(1. + zeta*zeta)), zeta);
584 t = d12*copysign(0.5/fabs(zeta), zeta);
592MFEM_HOST_DEVICE
static inline
606 const real_t zeta = (d2 - d1)/(2*d12);
607 const real_t azeta = fabs(zeta);
608 if (azeta < sqrt_1_eps)
610 t = copysign(1./(azeta +
sqrt(1. + zeta*zeta)), zeta);
614 t = copysign(0.5/azeta, zeta);
616 c =
sqrt(1./(1. + t*t));
626MFEM_HOST_DEVICE
static inline
632 mult = frexp(d_max, &d_exp);
633 if (d_exp == std::numeric_limits<real_t>::max_exponent)
635 mult *= std::numeric_limits<real_t>::radix;
648MFEM_HOST_DEVICE
static inline
649bool KernelVector2G(
const int &mode,
661 real_t n1 = fabs(d1) + fabs(d21);
662 real_t n2 = fabs(d2) + fabs(d12);
664 bool swap_columns = (n2 > n1);
676 if (fabs(d1) > fabs(d21))
684 if (fabs(d1) < fabs(d21))
696 if (fabs(d12) > fabs(d2))
709 if (fabs(d12) < fabs(d2))
728 mu = copysign(n1, d1);
729 n1 = -d21*(d21/(d1 +
mu));
733 if (fabs(n1) <= fabs(d21))
737 mu = (2./(1. + n1*n1))*(n1*d12 + d2);
745 mu = (2./(1. + n2*n2))*(d12 + n2*d2);
767 n2 = 1./(1. + fabs(
mu));
769 if (fabs(d1) <= n2*fabs(d2))
790MFEM_HOST_DEVICE
static inline
791void Vec_normalize3_aux(
const real_t &x1,
const real_t &x2,
797 const real_t m = fabs(x1);
801 t =
sqrt(1./(t + r*r));
802 n1 = copysign(t, x1);
809MFEM_HOST_DEVICE
static inline
814 if (fabs(x1) >= fabs(x2))
816 if (fabs(x1) >= fabs(x3))
820 Vec_normalize3_aux(x1, x2, x3, n1, n2, n3);
829 else if (fabs(x2) >= fabs(x3))
831 Vec_normalize3_aux(x2, x1, x3, n2, n1, n3);
834 Vec_normalize3_aux(x3, x1, x2, n3, n1, n2);
838MFEM_HOST_DEVICE
static inline
839int KernelVector3G_aux(
const int &mode,
847 s1 = hypot(c21, c31);
854 mu = copysign(n1, d1);
855 n1 = -s1*(s1/(d1 +
mu));
860 if (fabs(n1) >= fabs(c21))
862 if (fabs(n1) >= fabs(c31))
867 mu = 2./(1. + s2*s2 + s3*s3);
868 n2 =
mu*(c12 + s2*d2 + s3*c32);
869 n3 =
mu*(c13 + s2*c23 + s3*d3);
879 else if (fabs(c21) >= fabs(c31))
884 mu = 2./(1. + s1*s1 + s3*s3);
885 n2 =
mu*(s1*c12 + d2 + s3*c32);
886 n3 =
mu*(s1*c13 + c23 + s3*d3);
898 mu = 2./(1. + s1*s1 + s2*s2);
899 n2 =
mu*(s1*c12 + s2*d2 + c32);
900 n3 =
mu*(s1*c13 + s2*c23 + d3);
914 if (KernelVector2G(mode, d2, c23, c32, d3))
935 d1 = -(c12*d2 + c13*d3)/d1;
939 Vec_normalize3(d1, d2, d3, d1, d2, d3);
945MFEM_HOST_DEVICE
static inline
946int KernelVector3S(
const int &mode,
const real_t &d12,
961 real_t c12 = d12, c13 = d13, c23 = d23;
966 c32 = fabs(d1) + fabs(c12) + fabs(c13);
967 c31 = fabs(d2) + fabs(c12) + fabs(c23);
968 c21 = fabs(d3) + fabs(c13) + fabs(c23);
973 col = (c32 >= c31) ? 1 : 2;
977 col = (c31 >= c21) ? 2 : 3;
1009 if (fabs(d1) <= fabs(c13))
1011 row = (fabs(d1) <= fabs(c12)) ? 1 : 2;
1015 row = (fabs(c12) <= fabs(c13)) ? 2 : 3;
1020 if (fabs(d1) >= fabs(c13))
1022 row = (fabs(d1) >= fabs(c12)) ? 1 : 2;
1026 row = (fabs(c12) >= fabs(c13)) ? 2 : 3;
1057 row = KernelVector3G_aux(mode, d1, d2, d3, c12, c13, c23, c21, c31, c32);
1073MFEM_HOST_DEVICE
static inline
1074int Reduce3S(
const int &mode,
1104 if (fabs(z1) <= fabs(z3))
1106 k = (fabs(z1) <= fabs(z2)) ? 1 : 2;
1110 k = (fabs(z2) <= fabs(z3)) ? 2 : 3;
1116 if (fabs(z1) >= fabs(z3))
1118 k = (fabs(z1) >= fabs(z2)) ? 1 : 2;
1122 k = (fabs(z2) >= fabs(z3)) ? 2 : 3;
1149 g = copysign(1., z1);
1150 v1 = -s*(s/(z1 + g));
1153 if (fabs(z2) > g) { g = fabs(z2); }
1154 if (fabs(z3) > g) { g = fabs(z3); }
1158 g = 2./(v1*v1 + v2*v2 + v3*v3);
1163 w1 = g*( d1*v1 + d12*v2 + d13*v3);
1164 w2 = g*(d12*v1 + d2*v2 + d23*v3);
1165 w3 = g*(d13*v1 + d23*v2 + d3*v3);
1167 s = (g/2)*(v1*w1 + v2*w2 + v3*w3);
1174 d23 -= v2*w3 + v3*w2;
1179 s = d12 - v1*w2 - v2*w1;
1180 s = d13 - v1*w3 - v3*w1;
1199template<> MFEM_HOST_DEVICE
inline
1202 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1]);
1203 left_inv[0] = d[0] * t;
1204 left_inv[1] = d[1] * t;
1207template<> MFEM_HOST_DEVICE
inline
1210 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1] + d[2]*d[2]);
1211 left_inv[0] = d[0] * t;
1212 left_inv[1] = d[1] * t;
1213 left_inv[2] = d[2] * t;
1216template<> MFEM_HOST_DEVICE
inline
1219 real_t e = d[0]*d[0] + d[1]*d[1] + d[2]*d[2];
1220 real_t g = d[3]*d[3] + d[4]*d[4] + d[5]*d[5];
1221 real_t f = d[0]*d[3] + d[1]*d[4] + d[2]*d[5];
1222 const real_t t = 1.0 / (e*g -
f*
f);
1223 e *= t; g *= t;
f *= t;
1225 left_inv[0] = d[0]*g - d[3]*
f;
1226 left_inv[1] = d[3]*e - d[0]*
f;
1227 left_inv[2] = d[1]*g - d[4]*
f;
1228 left_inv[3] = d[4]*e - d[1]*
f;
1229 left_inv[4] = d[2]*g - d[5]*
f;
1230 left_inv[5] = d[5]*e - d[2]*
f;
1238template<> MFEM_HOST_DEVICE
inline
1245 internal::Eigensystem2S(d2, d0, d3, c, s);
1269template<> MFEM_HOST_DEVICE
inline
1281 real_t d_max = fabs(d11);
1282 if (d_max < fabs(d22)) { d_max = fabs(d22); }
1283 if (d_max < fabs(d33)) { d_max = fabs(d33); }
1284 if (d_max < fabs(d12)) { d_max = fabs(d12); }
1285 if (d_max < fabs(d13)) { d_max = fabs(d13); }
1286 if (d_max < fabs(d23)) { d_max = fabs(d23); }
1288 internal::GetScalingFactor(d_max, mult);
1291 d11 /= mult; d22 /= mult; d33 /= mult;
1292 d12 /= mult; d13 /= mult; d23 /= mult;
1294 real_t aa = (d11 + d22 + d33)/3;
1301 Q = (2*(d12*d12 + d13*d13 + d23*d23) + c1*c1 + c2*c2 + c3*c3)/6;
1302 R = (c1*(d23*d23 - c2*c3)+ d12*(d12*c3 - 2*d13*d23) + d13*d13*c2)/2;
1306 lambda[0] = lambda[1] = lambda[2] = aa;
1307 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1308 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1309 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1318 if (fabs(R) >= sqrtQ3)
1337 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3);
1341 r = -2*sqrtQ*cos(acos(R)/3);
1365 switch (internal::KernelVector3S(mode, d12, d13, d23, c1, c2, c3))
1369 lambda[0] = lambda[1] = lambda[2] = aa;
1370 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1371 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1372 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1387 int k = internal::Reduce3S(mode, d11, d22, d33, d12, d13, d23,
1388 c1, c2, c3, v1, v2, v3, g);
1396 internal::Eigensystem2S(d23, d22, d33, c, s);
1399 real_t *vec_1, *vec_2, *vec_3;
1404 lambda[0] = d11; vec_1 = vec;
1405 lambda[1] = d22; vec_2 = vec + 3;
1406 lambda[2] = d33; vec_3 = vec + 6;
1408 else if (d11 <= d33)
1410 lambda[0] = d11; vec_1 = vec;
1411 lambda[1] = d33; vec_3 = vec + 3;
1412 lambda[2] = d22; vec_2 = vec + 6;
1416 lambda[0] = d33; vec_3 = vec;
1417 lambda[1] = d11; vec_1 = vec + 3;
1418 lambda[2] = d22; vec_2 = vec + 6;
1425 lambda[0] = d22; vec_2 = vec;
1426 lambda[1] = d11; vec_1 = vec + 3;
1427 lambda[2] = d33; vec_3 = vec + 6;
1429 else if (d22 <= d33)
1431 lambda[0] = d22; vec_2 = vec;
1432 lambda[1] = d33; vec_3 = vec + 3;
1433 lambda[2] = d11; vec_1 = vec + 6;
1437 lambda[0] = d33; vec_3 = vec;
1438 lambda[1] = d22; vec_2 = vec + 3;
1439 lambda[2] = d11; vec_1 = vec + 6;
1446 d22 = g*(v2*c - v3*s);
1447 d33 = g*(v2*s + v3*c);
1448 vec_2[0] = - v1*d22; vec_3[0] = - v1*d33;
1449 vec_2[1] = c - v2*d22; vec_3[1] = s - v2*d33;
1450 vec_2[2] = -s - v3*d22; vec_3[2] = c - v3*d33;
1454 internal::Swap(vec_2[0], vec_2[1]);
1455 internal::Swap(vec_3[0], vec_3[1]);
1459 internal::Swap(vec_2[0], vec_2[2]);
1460 internal::Swap(vec_3[0], vec_3[2]);
1471template<> MFEM_HOST_DEVICE
inline
1483 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1484 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1485 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1486 internal::GetScalingFactor(d_max, mult);
1494 real_t t = 0.5*((d0+d2)*(d0-d2)+(d1-d3)*(d1+d3));
1495 real_t s = d0*d2 + d1*d3;
1496 s = sqrt(0.5*(d0*d0 + d1*d1 + d2*d2 + d3*d3) + sqrt(t*t + s*s));
1502 t = fabs(d0*d3 - d1*d2) / s;
1519template<> MFEM_HOST_DEVICE
inline
1522 real_t d0, d1, d2, d3, d4, d5, d6, d7, d8;
1523 d0 = data[0]; d3 = data[3]; d6 = data[6];
1524 d1 = data[1]; d4 = data[4]; d7 = data[7];
1525 d2 = data[2]; d5 = data[5]; d8 = data[8];
1529 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1530 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1531 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1532 if (d_max < fabs(d4)) { d_max = fabs(d4); }
1533 if (d_max < fabs(d5)) { d_max = fabs(d5); }
1534 if (d_max < fabs(d6)) { d_max = fabs(d6); }
1535 if (d_max < fabs(d7)) { d_max = fabs(d7); }
1536 if (d_max < fabs(d8)) { d_max = fabs(d8); }
1537 internal::GetScalingFactor(d_max, mult);
1540 d0 /= mult; d1 /= mult; d2 /= mult;
1541 d3 /= mult; d4 /= mult; d5 /= mult;
1542 d6 /= mult; d7 /= mult; d8 /= mult;
1544 real_t b11 = d0*d0 + d1*d1 + d2*d2;
1545 real_t b12 = d0*d3 + d1*d4 + d2*d5;
1546 real_t b13 = d0*d6 + d1*d7 + d2*d8;
1547 real_t b22 = d3*d3 + d4*d4 + d5*d5;
1548 real_t b23 = d3*d6 + d4*d7 + d5*d8;
1549 real_t b33 = d6*d6 + d7*d7 + d8*d8;
1568 real_t aa = (b11 + b22 + b33)/3;
1574 real_t b11_b22 = ((d0-d3)*(d0+d3)+(d1-d4)*(d1+d4)+(d2-d5)*(d2+d5));
1575 real_t b22_b33 = ((d3-d6)*(d3+d6)+(d4-d7)*(d4+d7)+(d5-d8)*(d5+d8));
1576 real_t b33_b11 = ((d6-d0)*(d6+d0)+(d7-d1)*(d7+d1)+(d8-d2)*(d8+d2));
1577 c1 = (b11_b22 - b33_b11)/3;
1578 c2 = (b22_b33 - b11_b22)/3;
1579 c3 = (b33_b11 - b22_b33)/3;
1582 Q = (2*(b12*b12 + b13*b13 + b23*b23) + c1*c1 + c2*c2 + c3*c3)/6;
1583 R = (c1*(b23*b23 - c2*c3)+ b12*(b12*c3 - 2*b13*b23) +b13*b13*c2)/2;
1622 if (fabs(R) >= sqrtQ3)
1644 aa -= 2*sqrtQ*cos(acos(R)/3);
1648 aa -= 2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3);
1652 aa -= 2*sqrtQ*cos((acos(R) - 2.0*M_PI)/3);
1659 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3);
1668 r = -2*sqrtQ*cos(acos(R)/3);
1699 switch (internal::KernelVector3S(mode, b12, b13, b23, c1, c2, c3))
1716 internal::Reduce3S(mode, b11, b22, b33, b12, b13, b23,
1717 c1, c2, c3, v1, v2, v3, g);
1724 internal::Eigenvalues2S(b23, b22, b33);
1728 aa = fmin(fmin(b11, b22), b33);
1734 aa = (b22 <= b33) ? b22 : fmax(b11, b33);
1738 aa = (b11 <= b33) ? b11 : fmax(b33, b22);
1743 aa = fmax(fmax(b11, b22), b33);
1749 return sqrt(fabs(aa))*mult;
1763 for (
int i = 0; i < m; i++)
1765 internal::Swap<real_t>(x[i], x[ipiv[i] - 1]);
1768 for (
int j = 0; j < m; j++)
1771 for (
int i = j + 1; i < m; i++)
1773 x[i] -= data[i + j * m] * x_j;
1787 for (
int j = m - 1; j >= 0; j--)
1789 const real_t x_j = (x[j] /= data[j + j * m]);
1790 for (
int i = 0; i < j; i++)
1792 x[i] -= data[i + j * m] * x_j;
1807 LSolve(data, m, ipiv, x);
1819 for (
int k = 0; k < r; k++)
1821 for (
int j = 0; j < m; j++)
1823 const real_t x1_jk = X1[j+k*m];
1824 for (
int i = 0; i < n; i++)
1826 X2[i+k*n] -= A21[i+j*n] * x1_jk;
1847 for (
int i = 0; i < n; ++i)
1849 LSolve(data, m, ipiv, A12 + i*m);
1852 for (
int j = 0; j < m; j++)
1854 const real_t u_jj_inv = 1.0/data[j+j*m];
1855 for (
int i = 0; i < n; i++)
1857 A21[i+j*n] *= u_jj_inv;
1859 for (
int k = j+1; k < m; k++)
1861 const real_t u_jk = data[j+k*m];
1862 for (
int i = 0; i < n; i++)
1864 A21[i+k*n] -= A21[i+j*n] * u_jk;
1869 SubMult(m, n, n, A21, A12, A22);
1887 bool pivot_flag =
true;
1889 for (
int i = 0; i < m; i++)
1894 real_t a = fabs(A[piv + m*i]);
1895 for (
int j = i+1; j < m; j++)
1897 const real_t b = fabs(A[j + m*i]);
1908 for (
int j = 0; j < m; j++)
1910 internal::Swap<real_t>(A[i + m*j], A[piv + m*j]);
1915 if (fabs(A[i + m*i]) <= tol)
1920 const real_t a_ii_inv = 1.0 / A[i + m*i];
1921 for (
int j = i+1; j < m; j++)
1923 A[j + m*i] *= a_ii_inv;
1926 for (
int k = i+1; k < m; k++)
1928 const real_t a_ik = A[i + m*k];
1929 for (
int j = i+1; j < m; j++)
1931 A[j + m*k] -= a_ik * A[j + m*i];
MFEM_HOST_DEVICE dual< value_type, gradient_type > sqrt(dual< value_type, gradient_type > x)
implementation of square root for dual numbers
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data.
MFEM_HOST_DEVICE real_t DistanceSquared(const real_t *x, const real_t *y)
Compute the square of the Euclidean distance to another vector.
MFEM_HOST_DEVICE real_t CalcSingularvalue< 2 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 2 with given data.
MFEM_HOST_DEVICE void MultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *AtBdata)
Multiply the transpose of a matrix of size Aheight x Awidth and data Adata with a matrix of size Ahei...
MFEM_HOST_DEVICE void CalcEigenvalues< 2 >(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE void CalcLeftInverse(const real_t *data, real_t *left_inv)
Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
MFEM_HOST_DEVICE void CalcAdjugate(const T *data, T *adj_data)
Return the adjugate of a matrix.
MFEM_HOST_DEVICE void Add(const int height, const int width, const TALPHA alpha, const TA *Adata, const TB *Bdata, TC *Cdata)
Compute C = A + alpha*B, where the matrices A, B and C are of size height x width with data Adata,...
MFEM_HOST_DEVICE void AbsMult(const int height, const int width, const TA *data, const TX *x, TY *y)
Absolute-value matrix vector multiplication: y = |A| x, where the matrix A is of size height x width ...
MFEM_HOST_DEVICE void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *Cdata, const TB alpha, const TA beta)
Compute C = alpha*At*B + beta*C.
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data,...
MFEM_HOST_DEVICE real_t Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
MFEM_HOST_DEVICE void FNorm(real_t &scale_factor, real_t &scaled_fnorm2, const T *data)
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void LSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- L^{-1} P X, for a vector X of length...
MFEM_HOST_DEVICE void MultABt(const int Aheight, const int Awidth, const int Bheight, const TA *Adata, const TB *Bdata, TC *ABtdata)
Multiply a matrix of size Aheight x Awidth and data Adata with the transpose of a matrix of size Bhei...
MFEM_HOST_DEVICE void MultTranspose(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix transpose vector multiplication: y = At x, where the matrix A is of size height x width with g...
MFEM_HOST_DEVICE void USolve(const real_t *data, const int m, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- U^{-1} X, for a vector X of length m...
MFEM_HOST_DEVICE void AbsMultTranspose(const int height, const int width, const TA *data, const TX *x, TY *y)
Absolute-value matrix transpose vector multiplication: y = |At| x, where the matrix A is of size heig...
MFEM_HOST_DEVICE void AddMult(const int Aheight, const int Awidth, const int Bwidth, const TB *Bdata, const TC *Cdata, TA *Adata, const TB alpha, const TA beta)
Matrix-matrix multiplication: A = alpha * B * C + beta * A, where the matrices A, B and C are of size...
MFEM_HOST_DEVICE real_t FNorm2(const T *data)
Compute the square of the Frobenius norm of the matrix.
MFEM_HOST_DEVICE real_t CalcSingularvalue(const real_t *data, const int i)
Return the i'th singular value of the matrix of size dim with given data.
MFEM_HOST_DEVICE void Symmetrize(const int size, T *data)
Symmetrize a square matrix with given size and data: A -> (A+A^T)/2.
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void BlockFactor(const real_t *data, int m, const int *ipiv, int n, real_t *A12, real_t *A21, real_t *A22)
MFEM_HOST_DEVICE void AddMultVWt(const real_t *v, const real_t *w, real_t *VWt)
Dense matrix operation: VWt += v w^t.
MFEM_HOST_DEVICE void Diag(const real_t c, real_t *data)
Creates n x n diagonal matrix with diagonal elements c.
MFEM_HOST_DEVICE bool LUFactor(real_t *A, const int m, int *ipiv, const real_t tol=0.0)
Compute the LU factorization of the m x m matrix A.
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void CalcEigenvalues(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
MFEM_HOST_DEVICE real_t CalcSingularvalue< 3 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 3 with given data.
MFEM_HOST_DEVICE void CalcEigenvalues< 3 >(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE void SubMult(const int m, const int n, const int r, const real_t *A21, const real_t *X1, real_t *X2)
Given an (n x m) matrix A21, compute X2 <- X2 - A21 X1, for matrices X1, and X2 of size (m x r) and (...
MFEM_HOST_DEVICE void Subtract(const real_t a, const real_t *x, const real_t *y, real_t *z)
Vector subtraction operation: z = a * (x - y)
MFEM_HOST_DEVICE void LUSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A for a factored matrix (m x m),.
MFEM_HOST_DEVICE void TAssignHD(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
MFEM_HOST_DEVICE scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
void Swap(T &a, T &b)
Swap objects of type T. The operation is performed using the most specialized swap function from the ...
MFEM_HOST_DEVICE scalar_t TDetHD(const layout_t &a, const data_t &A)
std::function< real_t(const Vector &)> f(real_t mass_coeff)
MFEM_HOST_DEVICE void TAdjugateHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)