12#ifndef MFEM_LINALG_KERNELS_HPP
13#define MFEM_LINALG_KERNELS_HPP
43 for (
int i = 0; i <
dim; i++) { d += (x[i]-y[i])*(x[i]-y[i]); }
52 for (
int i = 0; i < N; i++) { data[i] = 0.0; }
53 for (
int i = 0; i <
dim; i++) { data[i*(
dim+1)] = c; }
62 for (
int i = 0; i <
dim; i++) { z[i] =
a * (x[i] - y[i]); }
70 for (
int i = 0; i <
dim; i++)
73 for (
int j = 0; j <
dim; j++) { VWt[i*
dim+j] += vi * w[j]; }
77template<
int H,
int W,
typename T>
78MFEM_HOST_DEVICE
inline
82 T max_norm = 0.0, entry, fnorm2;
84 for (i = 0; i < hw; i++)
86 entry = fabs(data[i]);
95 scale_factor = scaled_fnorm2 = 0.0;
100 for (i = 0; i < hw; i++)
102 entry = data[i] / max_norm;
103 fnorm2 += entry * entry;
106 scale_factor = max_norm;
107 scaled_fnorm2 = fnorm2;
111template<
int H,
int W,
typename T>
112MFEM_HOST_DEVICE
inline
121template<
int H,
int W,
typename T>
122MFEM_HOST_DEVICE
inline
132MFEM_HOST_DEVICE
inline
135 if (0 == size) {
return 0.0; }
136 if (1 == size) {
return std::abs(data[0]); }
139 for (
int i = 0; i < size; i++)
143 const T absdata = fabs(data[i]);
144 if (scale <= absdata)
146 const T sqr_arg = scale / absdata;
147 sum = 1.0 + sum * (sqr_arg * sqr_arg);
151 const T sqr_arg = absdata / scale;
152 sum += (sqr_arg * sqr_arg);
155 return scale * sqrt(sum);
161template<
typename TA,
typename TX,
typename TY>
162MFEM_HOST_DEVICE
inline
163void Mult(
const int height,
const int width,
const TA *data,
const TX *x, TY *y)
167 for (
int row = 0; row < height; row++)
173 const TA *d_col = data;
175 for (
int row = 0; row < height; row++)
177 y[row] = x_col*d_col[row];
180 for (
int col = 1; col < width; col++)
183 for (
int row = 0; row < height; row++)
185 y[row] += x_col*d_col[row];
163void Mult(
const int height,
const int width,
const TA *data,
const TX *x, TY *y) {
…}
194template<
typename TA,
typename TX,
typename TY>
195MFEM_HOST_DEVICE
inline
201 for (
int row = 0; row < width; row++)
208 for (
int i = 0; i < width; ++i)
211 for (
int j = 0; j < height; ++j)
213 val += x[j] * data[i * height + j];
222MFEM_HOST_DEVICE
inline
225 for (
int i = 0; i < size; i++)
227 for (
int j = 0; j < i; j++)
229 const T
a = 0.5 * (data[i*size+j] + data[j*size+i]);
230 data[j*size+i] = data[i*size+j] =
a;
236template<
int dim,
typename T>
237MFEM_HOST_DEVICE
inline T
Det(
const T *data)
237MFEM_HOST_DEVICE
inline T
Det(
const T *data) {
…}
244template<
int dim,
typename T>
245MFEM_HOST_DEVICE
inline
249 const T det =
TAdjDetHD<T>(layout_t(), data, layout_t(), inv_data);
254template<
int dim,
typename T>
255MFEM_HOST_DEVICE
inline
264template<
typename TALPHA,
typename TA,
typename TB,
typename TC>
265MFEM_HOST_DEVICE
inline
266void Add(
const int height,
const int width,
const TALPHA
alpha,
267 const TA *Adata,
const TB *Bdata, TC *Cdata)
269 for (
int j = 0; j < width; j++)
271 for (
int i = 0; i < height; i++)
273 const int n = i*width+j;
274 Cdata[n] = Adata[n] +
alpha * Bdata[n];
266void Add(
const int height,
const int width,
const TALPHA
alpha, {
…}
281template<
typename TALPHA,
typename TBETA,
typename TA,
typename TB,
typename TC>
282MFEM_HOST_DEVICE
inline
283void Add(
const int height,
const int width,
284 const TALPHA
alpha,
const TA *Adata,
285 const TBETA
beta,
const TB *Bdata,
288 const int m = height * width;
289 for (
int i = 0; i < m; i++)
291 Cdata[i] =
alpha * Adata[i] +
beta * Bdata[i];
283void Add(
const int height,
const int width, {
…}
297template<
typename TA,
typename TB>
298MFEM_HOST_DEVICE
inline
299void Add(
const int height,
const int width,
const TA *Adata, TB *Bdata)
301 const int m = height * width;
302 for (
int i = 0; i < m; i++)
304 Bdata[i] += Adata[i];
299void Add(
const int height,
const int width,
const TA *Adata, TB *Bdata) {
…}
310template<
typename TA,
typename TB>
311MFEM_HOST_DEVICE
inline
312void Add(
const int height,
const int width,
315 const int m = height * width;
316 for (
int i = 0; i < m; i++)
318 Bdata[i] +=
alpha * Adata[i];
312void Add(
const int height,
const int width, {
…}
324template<
typename TA,
typename TB>
325MFEM_HOST_DEVICE
inline
326void Set(
const int height,
const int width,
329 const int m = height * width;
330 for (
int i = 0; i < m; i++)
332 Bdata[i] =
alpha * Adata[i];
326void Set(
const int height,
const int width, {
…}
339template<
typename TA,
typename TB,
typename TC>
340MFEM_HOST_DEVICE
inline
341void AddMult(
const int Aheight,
const int Awidth,
const int Bwidth,
342 const TB *Bdata,
const TC *Cdata, TA *Adata,
const TB
alpha,
345 const int ah_x_aw = Aheight * Awidth;
348 for (
int i = 0; i < ah_x_aw; i++) { Adata[i] = 0.0; }
350 else if (
beta != 1.0)
352 for (
int i = 0; i < ah_x_aw; i++) { Adata[i] *=
beta; }
354 for (
int j = 0; j < Awidth; j++)
356 for (
int k = 0; k < Bwidth; k++)
359 for (
int i = 0; i < Aheight; i++)
361 Adata[i+j*Aheight] += val * Bdata[i+k*Aheight];
341void AddMult(
const int Aheight,
const int Awidth,
const int Bwidth, {
…}
370template<
typename TA,
typename TB,
typename TC>
371MFEM_HOST_DEVICE
inline
372void Mult(
const int Aheight,
const int Awidth,
const int Bwidth,
373 const TB *Bdata,
const TC *Cdata, TA *Adata)
375 AddMult(Aheight, Awidth, Bwidth, Bdata, Cdata, Adata, TB(1.0), TA(0.0));
372void Mult(
const int Aheight,
const int Awidth,
const int Bwidth, {
…}
381template<
typename TA,
typename TB,
typename TC>
382MFEM_HOST_DEVICE
inline
383void MultABt(
const int Aheight,
const int Awidth,
const int Bheight,
384 const TA *Adata,
const TB *Bdata, TC *ABtdata)
386 const int ah_x_bh = Aheight * Bheight;
387 for (
int i = 0; i < ah_x_bh; i++) { ABtdata[i] = 0.0; }
388 for (
int k = 0; k < Awidth; k++)
391 for (
int j = 0; j < Bheight; j++)
393 const real_t bjk = Bdata[j];
394 for (
int i = 0; i < Aheight; i++)
396 c[i] += Adata[i] * bjk;
383void MultABt(
const int Aheight,
const int Awidth,
const int Bheight, {
…}
409template<
typename TA,
typename TB,
typename TC>
410MFEM_HOST_DEVICE
inline
411void AddMultAtB(
const int Aheight,
const int Awidth,
const int Bwidth,
412 const TA *Adata,
const TB *Bdata, TC *Cdata,
const TB
alpha,
415 const int aw_x_bw = Awidth * Bwidth;
419 for (
int i = 0; i < aw_x_bw; i++) { Cdata[i] = 0.0; }
421 else if (
beta != 1.0)
423 for (
int i = 0; i < aw_x_bw; i++) { Cdata[i] *=
beta; }
427 for (
int i = 0; i < Bwidth; ++i)
429 for (
int j = 0; j < Awidth; ++j)
432 for (
int k = 0; k < Aheight; ++k)
434 val +=
alpha * Adata[j * Aheight + k] * Bdata[i * Aheight + k];
411void AddMultAtB(
const int Aheight,
const int Awidth,
const int Bwidth, {
…}
445template<
typename TA,
typename TB,
typename TC>
446MFEM_HOST_DEVICE
inline
447void MultAtB(
const int Aheight,
const int Awidth,
const int Bwidth,
448 const TA *Adata,
const TB *Bdata, TC *AtBdata)
450 AddMultAtB(Aheight, Awidth, Bwidth, Adata, Bdata, AtBdata, TB(1.0), TA(0.0));
447void MultAtB(
const int Aheight,
const int Awidth,
const int Bwidth, {
…}
456template<
typename TA,
typename TB,
typename TC>
457MFEM_HOST_DEVICE
inline
458void AddMultAtB(
const int Aheight,
const int Awidth,
const int Bwidth,
459 const TA *Adata,
const TB *Bdata, TC *AtBdata)
462 for (
int i = 0; i < Bwidth; ++i)
464 for (
int j = 0; j < Awidth; ++j)
467 for (
int k = 0; k < Aheight; ++k)
469 val += Adata[j * Aheight + k] * Bdata[i * Aheight + k];
458void AddMultAtB(
const int Aheight,
const int Awidth,
const int Bwidth, {
…}
478template<
int HEIGHT,
int WIDTH> MFEM_HOST_DEVICE
484template<
int dim> MFEM_HOST_DEVICE
488template<
int dim> MFEM_HOST_DEVICE
498MFEM_HOST_DEVICE
static inline
506const real_t Epsilon = std::numeric_limits<real_t>::epsilon();
509MFEM_HOST_DEVICE
static inline
512 const real_t sqrt_1_eps = sqrt(1./Epsilon);
517 const real_t zeta = (d2 - d1)/(2*d12);
518 if (fabs(zeta) < sqrt_1_eps)
520 t = d12*copysign(1./(fabs(zeta) + sqrt(1. + zeta*zeta)), zeta);
524 t = d12*copysign(0.5/fabs(zeta), zeta);
532MFEM_HOST_DEVICE
static inline
536 const real_t sqrt_1_eps = sqrt(1./Epsilon);
546 const real_t zeta = (d2 - d1)/(2*d12);
547 const real_t azeta = fabs(zeta);
548 if (azeta < sqrt_1_eps)
550 t = copysign(1./(azeta + sqrt(1. + zeta*zeta)), zeta);
554 t = copysign(0.5/azeta, zeta);
556 c = sqrt(1./(1. + t*t));
566MFEM_HOST_DEVICE
static inline
572 mult = frexp(d_max, &d_exp);
573 if (d_exp == std::numeric_limits<real_t>::max_exponent)
575 mult *= std::numeric_limits<real_t>::radix;
588MFEM_HOST_DEVICE
static inline
589bool KernelVector2G(
const int &mode,
601 real_t n1 = fabs(d1) + fabs(d21);
602 real_t n2 = fabs(d2) + fabs(d12);
604 bool swap_columns = (n2 > n1);
616 if (fabs(d1) > fabs(d21))
624 if (fabs(d1) < fabs(d21))
636 if (fabs(d12) > fabs(d2))
649 if (fabs(d12) < fabs(d2))
668 mu = copysign(n1, d1);
669 n1 = -d21*(d21/(d1 +
mu));
673 if (fabs(n1) <= fabs(d21))
677 mu = (2./(1. + n1*n1))*(n1*d12 + d2);
685 mu = (2./(1. + n2*n2))*(d12 + n2*d2);
707 n2 = 1./(1. + fabs(
mu));
709 if (fabs(d1) <= n2*fabs(d2))
730MFEM_HOST_DEVICE
static inline
731void Vec_normalize3_aux(
const real_t &x1,
const real_t &x2,
737 const real_t m = fabs(x1);
741 t = sqrt(1./(t + r*r));
742 n1 = copysign(t, x1);
749MFEM_HOST_DEVICE
static inline
754 if (fabs(x1) >= fabs(x2))
756 if (fabs(x1) >= fabs(x3))
760 Vec_normalize3_aux(x1, x2, x3, n1, n2, n3);
769 else if (fabs(x2) >= fabs(x3))
771 Vec_normalize3_aux(x2, x1, x3, n2, n1, n3);
774 Vec_normalize3_aux(x3, x1, x2, n3, n1, n2);
778MFEM_HOST_DEVICE
static inline
779int KernelVector3G_aux(
const int &mode,
787 s1 = hypot(c21, c31);
794 mu = copysign(n1, d1);
795 n1 = -s1*(s1/(d1 +
mu));
800 if (fabs(n1) >= fabs(c21))
802 if (fabs(n1) >= fabs(c31))
807 mu = 2./(1. + s2*s2 + s3*s3);
808 n2 =
mu*(c12 + s2*d2 + s3*c32);
809 n3 =
mu*(c13 + s2*c23 + s3*d3);
819 else if (fabs(c21) >= fabs(c31))
824 mu = 2./(1. + s1*s1 + s3*s3);
825 n2 =
mu*(s1*c12 + d2 + s3*c32);
826 n3 =
mu*(s1*c13 + c23 + s3*d3);
838 mu = 2./(1. + s1*s1 + s2*s2);
839 n2 =
mu*(s1*c12 + s2*d2 + c32);
840 n3 =
mu*(s1*c13 + s2*c23 + d3);
854 if (KernelVector2G(mode, d2, c23, c32, d3))
875 d1 = -(c12*d2 + c13*d3)/d1;
879 Vec_normalize3(d1, d2, d3, d1, d2, d3);
885MFEM_HOST_DEVICE
static inline
886int KernelVector3S(
const int &mode,
const real_t &d12,
901 real_t c12 = d12, c13 = d13, c23 = d23;
906 c32 = fabs(d1) + fabs(c12) + fabs(c13);
907 c31 = fabs(d2) + fabs(c12) + fabs(c23);
908 c21 = fabs(d3) + fabs(c13) + fabs(c23);
913 col = (c32 >= c31) ? 1 : 2;
917 col = (c31 >= c21) ? 2 : 3;
949 if (fabs(d1) <= fabs(c13))
951 row = (fabs(d1) <= fabs(c12)) ? 1 : 2;
955 row = (fabs(c12) <= fabs(c13)) ? 2 : 3;
960 if (fabs(d1) >= fabs(c13))
962 row = (fabs(d1) >= fabs(c12)) ? 1 : 2;
966 row = (fabs(c12) >= fabs(c13)) ? 2 : 3;
997 row = KernelVector3G_aux(mode, d1, d2, d3, c12, c13, c23, c21, c31, c32);
1013MFEM_HOST_DEVICE
static inline
1014int Reduce3S(
const int &mode,
1044 if (fabs(z1) <= fabs(z3))
1046 k = (fabs(z1) <= fabs(z2)) ? 1 : 2;
1050 k = (fabs(z2) <= fabs(z3)) ? 2 : 3;
1056 if (fabs(z1) >= fabs(z3))
1058 k = (fabs(z1) >= fabs(z2)) ? 1 : 2;
1062 k = (fabs(z2) >= fabs(z3)) ? 2 : 3;
1089 g = copysign(1., z1);
1090 v1 = -s*(s/(z1 + g));
1093 if (fabs(z2) > g) { g = fabs(z2); }
1094 if (fabs(z3) > g) { g = fabs(z3); }
1098 g = 2./(v1*v1 + v2*v2 + v3*v3);
1103 w1 = g*( d1*v1 + d12*v2 + d13*v3);
1104 w2 = g*(d12*v1 + d2*v2 + d23*v3);
1105 w3 = g*(d13*v1 + d23*v2 + d3*v3);
1107 s = (g/2)*(v1*w1 + v2*w2 + v3*w3);
1114 d23 -= v2*w3 + v3*w2;
1119 s = d12 - v1*w2 - v2*w1;
1120 s = d13 - v1*w3 - v3*w1;
1139template<> MFEM_HOST_DEVICE
inline
1142 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1]);
1143 left_inv[0] = d[0] * t;
1144 left_inv[1] = d[1] * t;
1147template<> MFEM_HOST_DEVICE
inline
1150 const real_t t = 1.0 / (d[0]*d[0] + d[1]*d[1] + d[2]*d[2]);
1151 left_inv[0] = d[0] * t;
1152 left_inv[1] = d[1] * t;
1153 left_inv[2] = d[2] * t;
1156template<> MFEM_HOST_DEVICE
inline
1159 real_t e = d[0]*d[0] + d[1]*d[1] + d[2]*d[2];
1160 real_t g = d[3]*d[3] + d[4]*d[4] + d[5]*d[5];
1161 real_t f = d[0]*d[3] + d[1]*d[4] + d[2]*d[5];
1162 const real_t t = 1.0 / (e*g -
f*
f);
1163 e *= t; g *= t;
f *= t;
1165 left_inv[0] = d[0]*g - d[3]*
f;
1166 left_inv[1] = d[3]*e - d[0]*
f;
1167 left_inv[2] = d[1]*g - d[4]*
f;
1168 left_inv[3] = d[4]*e - d[1]*
f;
1169 left_inv[4] = d[2]*g - d[5]*
f;
1170 left_inv[5] = d[5]*e - d[2]*
f;
1178template<> MFEM_HOST_DEVICE
inline
1185 internal::Eigensystem2S(d2, d0, d3, c, s);
1209template<> MFEM_HOST_DEVICE
inline
1221 real_t d_max = fabs(d11);
1222 if (d_max < fabs(d22)) { d_max = fabs(d22); }
1223 if (d_max < fabs(d33)) { d_max = fabs(d33); }
1224 if (d_max < fabs(d12)) { d_max = fabs(d12); }
1225 if (d_max < fabs(d13)) { d_max = fabs(d13); }
1226 if (d_max < fabs(d23)) { d_max = fabs(d23); }
1228 internal::GetScalingFactor(d_max, mult);
1231 d11 /= mult; d22 /= mult; d33 /= mult;
1232 d12 /= mult; d13 /= mult; d23 /= mult;
1234 real_t aa = (d11 + d22 + d33)/3;
1241 Q = (2*(d12*d12 + d13*d13 + d23*d23) + c1*c1 + c2*c2 + c3*c3)/6;
1242 R = (c1*(d23*d23 - c2*c3)+ d12*(d12*c3 - 2*d13*d23) + d13*d13*c2)/2;
1246 lambda[0] = lambda[1] = lambda[2] = aa;
1247 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1248 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1249 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1258 if (fabs(R) >= sqrtQ3)
1277 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3);
1281 r = -2*sqrtQ*cos(acos(R)/3);
1305 switch (internal::KernelVector3S(mode, d12, d13, d23, c1, c2, c3))
1309 lambda[0] = lambda[1] = lambda[2] = aa;
1310 vec[0] = 1.; vec[3] = 0.; vec[6] = 0.;
1311 vec[1] = 0.; vec[4] = 1.; vec[7] = 0.;
1312 vec[2] = 0.; vec[5] = 0.; vec[8] = 1.;
1327 int k = internal::Reduce3S(mode, d11, d22, d33, d12, d13, d23,
1328 c1, c2, c3, v1, v2, v3, g);
1336 internal::Eigensystem2S(d23, d22, d33, c, s);
1339 real_t *vec_1, *vec_2, *vec_3;
1344 lambda[0] = d11; vec_1 = vec;
1345 lambda[1] = d22; vec_2 = vec + 3;
1346 lambda[2] = d33; vec_3 = vec + 6;
1348 else if (d11 <= d33)
1350 lambda[0] = d11; vec_1 = vec;
1351 lambda[1] = d33; vec_3 = vec + 3;
1352 lambda[2] = d22; vec_2 = vec + 6;
1356 lambda[0] = d33; vec_3 = vec;
1357 lambda[1] = d11; vec_1 = vec + 3;
1358 lambda[2] = d22; vec_2 = vec + 6;
1365 lambda[0] = d22; vec_2 = vec;
1366 lambda[1] = d11; vec_1 = vec + 3;
1367 lambda[2] = d33; vec_3 = vec + 6;
1369 else if (d22 <= d33)
1371 lambda[0] = d22; vec_2 = vec;
1372 lambda[1] = d33; vec_3 = vec + 3;
1373 lambda[2] = d11; vec_1 = vec + 6;
1377 lambda[0] = d33; vec_3 = vec;
1378 lambda[1] = d22; vec_2 = vec + 3;
1379 lambda[2] = d11; vec_1 = vec + 6;
1386 d22 = g*(v2*c - v3*s);
1387 d33 = g*(v2*s + v3*c);
1388 vec_2[0] = - v1*d22; vec_3[0] = - v1*d33;
1389 vec_2[1] = c - v2*d22; vec_3[1] = s - v2*d33;
1390 vec_2[2] = -s - v3*d22; vec_3[2] = c - v3*d33;
1394 internal::Swap(vec_2[0], vec_2[1]);
1395 internal::Swap(vec_3[0], vec_3[1]);
1399 internal::Swap(vec_2[0], vec_2[2]);
1400 internal::Swap(vec_3[0], vec_3[2]);
1411template<> MFEM_HOST_DEVICE
inline
1423 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1424 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1425 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1426 internal::GetScalingFactor(d_max, mult);
1434 real_t t = 0.5*((d0+d2)*(d0-d2)+(d1-d3)*(d1+d3));
1435 real_t s = d0*d2 + d1*d3;
1436 s = sqrt(0.5*(d0*d0 + d1*d1 + d2*d2 + d3*d3) + sqrt(t*t + s*s));
1442 t = fabs(d0*d3 - d1*d2) / s;
1459template<> MFEM_HOST_DEVICE
inline
1462 real_t d0, d1, d2, d3, d4, d5, d6, d7, d8;
1463 d0 = data[0]; d3 = data[3]; d6 = data[6];
1464 d1 = data[1]; d4 = data[4]; d7 = data[7];
1465 d2 = data[2]; d5 = data[5]; d8 = data[8];
1469 if (d_max < fabs(d1)) { d_max = fabs(d1); }
1470 if (d_max < fabs(d2)) { d_max = fabs(d2); }
1471 if (d_max < fabs(d3)) { d_max = fabs(d3); }
1472 if (d_max < fabs(d4)) { d_max = fabs(d4); }
1473 if (d_max < fabs(d5)) { d_max = fabs(d5); }
1474 if (d_max < fabs(d6)) { d_max = fabs(d6); }
1475 if (d_max < fabs(d7)) { d_max = fabs(d7); }
1476 if (d_max < fabs(d8)) { d_max = fabs(d8); }
1477 internal::GetScalingFactor(d_max, mult);
1480 d0 /= mult; d1 /= mult; d2 /= mult;
1481 d3 /= mult; d4 /= mult; d5 /= mult;
1482 d6 /= mult; d7 /= mult; d8 /= mult;
1484 real_t b11 = d0*d0 + d1*d1 + d2*d2;
1485 real_t b12 = d0*d3 + d1*d4 + d2*d5;
1486 real_t b13 = d0*d6 + d1*d7 + d2*d8;
1487 real_t b22 = d3*d3 + d4*d4 + d5*d5;
1488 real_t b23 = d3*d6 + d4*d7 + d5*d8;
1489 real_t b33 = d6*d6 + d7*d7 + d8*d8;
1508 real_t aa = (b11 + b22 + b33)/3;
1514 real_t b11_b22 = ((d0-d3)*(d0+d3)+(d1-d4)*(d1+d4)+(d2-d5)*(d2+d5));
1515 real_t b22_b33 = ((d3-d6)*(d3+d6)+(d4-d7)*(d4+d7)+(d5-d8)*(d5+d8));
1516 real_t b33_b11 = ((d6-d0)*(d6+d0)+(d7-d1)*(d7+d1)+(d8-d2)*(d8+d2));
1517 c1 = (b11_b22 - b33_b11)/3;
1518 c2 = (b22_b33 - b11_b22)/3;
1519 c3 = (b33_b11 - b22_b33)/3;
1522 Q = (2*(b12*b12 + b13*b13 + b23*b23) + c1*c1 + c2*c2 + c3*c3)/6;
1523 R = (c1*(b23*b23 - c2*c3)+ b12*(b12*c3 - 2*b13*b23) +b13*b13*c2)/2;
1562 if (fabs(R) >= sqrtQ3)
1584 aa -= 2*sqrtQ*cos(acos(R)/3);
1588 aa -= 2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3);
1592 aa -= 2*sqrtQ*cos((acos(R) - 2.0*M_PI)/3);
1599 r = -2*sqrtQ*cos((acos(R) + 2.0*M_PI)/3);
1608 r = -2*sqrtQ*cos(acos(R)/3);
1639 switch (internal::KernelVector3S(mode, b12, b13, b23, c1, c2, c3))
1656 internal::Reduce3S(mode, b11, b22, b33, b12, b13, b23,
1657 c1, c2, c3, v1, v2, v3, g);
1664 internal::Eigenvalues2S(b23, b22, b33);
1668 aa = fmin(fmin(b11, b22), b33);
1674 aa = (b22 <= b33) ? b22 : fmax(b11, b33);
1678 aa = (b11 <= b33) ? b11 : fmax(b33, b22);
1683 aa = fmax(fmax(b11, b22), b33);
1689 return sqrt(fabs(aa))*mult;
1703 for (
int i = 0; i < m; i++)
1705 internal::Swap<real_t>(x[i], x[ipiv[i]]);
1708 for (
int j = 0; j < m; j++)
1711 for (
int i = j + 1; i < m; i++)
1713 x[i] -= data[i + j * m] * x_j;
1727 for (
int j = m - 1; j >= 0; j--)
1729 const real_t x_j = (x[j] /= data[j + j * m]);
1730 for (
int i = 0; i < j; i++)
1732 x[i] -= data[i + j * m] * x_j;
1747 LSolve(data, m, ipiv, x);
1759 for (
int k = 0; k < r; k++)
1761 for (
int j = 0; j < m; j++)
1763 const real_t x1_jk = X1[j+k*m];
1764 for (
int i = 0; i < n; i++)
1766 X2[i+k*n] -= A21[i+j*n] * x1_jk;
1787 for (
int i = 0; i < n; ++i)
1789 LSolve(data, m, ipiv, A12 + i*m);
1792 for (
int j = 0; j < m; j++)
1794 const real_t u_jj_inv = 1.0/data[j+j*m];
1795 for (
int i = 0; i < n; i++)
1797 A21[i+j*n] *= u_jj_inv;
1799 for (
int k = j+1; k < m; k++)
1801 const real_t u_jk = data[j+k*m];
1802 for (
int i = 0; i < n; i++)
1804 A21[i+k*n] -= A21[i+j*n] * u_jk;
1809 SubMult(m, n, n, A21, A12, A22);
1827 bool pivot_flag =
true;
1829 for (
int i = 0; i < m; i++)
1834 real_t a = fabs(A[piv + m*i]);
1835 for (
int j = i+1; j < m; j++)
1837 const real_t b = fabs(A[j + m*i]);
1848 for (
int j = 0; j < m; j++)
1850 internal::Swap<real_t>(A[i + m*j], A[piv + m*j]);
1855 if (fabs(A[i + m*i]) <= tol)
1860 const real_t a_ii_inv = 1.0 / A[i + m*i];
1861 for (
int j = i+1; j < m; j++)
1863 A[j + m*i] *= a_ii_inv;
1866 for (
int k = i+1; k < m; k++)
1868 const real_t a_ik = A[i + m*k];
1869 for (
int j = i+1; j < m; j++)
1871 A[j + m*k] -= a_ik * A[j + m*i];
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data.
MFEM_HOST_DEVICE real_t DistanceSquared(const real_t *x, const real_t *y)
Compute the square of the Euclidean distance to another vector.
MFEM_HOST_DEVICE real_t CalcSingularvalue< 2 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 2 with given data.
MFEM_HOST_DEVICE void MultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *AtBdata)
Multiply the transpose of a matrix of size Aheight x Awidth and data Adata with a matrix of size Ahei...
MFEM_HOST_DEVICE void CalcEigenvalues< 2 >(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE void CalcLeftInverse(const real_t *data, real_t *left_inv)
Given a matrix of size 2x1, 3x1, or 3x2, compute the left inverse.
MFEM_HOST_DEVICE void CalcAdjugate(const T *data, T *adj_data)
Return the adjugate of a matrix.
MFEM_HOST_DEVICE void Add(const int height, const int width, const TALPHA alpha, const TA *Adata, const TB *Bdata, TC *Cdata)
Compute C = A + alpha*B, where the matrices A, B and C are of size height x width with data Adata,...
MFEM_HOST_DEVICE void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *Cdata, const TB alpha, const TA beta)
Compute C = alpha*At*B + beta*C.
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data,...
MFEM_HOST_DEVICE real_t Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
MFEM_HOST_DEVICE void FNorm(real_t &scale_factor, real_t &scaled_fnorm2, const T *data)
MFEM_HOST_DEVICE void CalcLeftInverse< 2, 1 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void LSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- L^{-1} P X, for a vector X of length...
MFEM_HOST_DEVICE void MultABt(const int Aheight, const int Awidth, const int Bheight, const TA *Adata, const TB *Bdata, TC *ABtdata)
Multiply a matrix of size Aheight x Awidth and data Adata with the transpose of a matrix of size Bhei...
MFEM_HOST_DEVICE void MultTranspose(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix transpose vector multiplication: y = At x, where the matrix A is of size height x width with g...
MFEM_HOST_DEVICE void USolve(const real_t *data, const int m, real_t *x)
Assuming L.U = P.A factored matrix of size (m x m), compute X <- U^{-1} X, for a vector X of length m...
MFEM_HOST_DEVICE void AddMult(const int Aheight, const int Awidth, const int Bwidth, const TB *Bdata, const TC *Cdata, TA *Adata, const TB alpha, const TA beta)
Matrix-matrix multiplication: A = alpha * B * C + beta * A, where the matrices A, B and C are of size...
MFEM_HOST_DEVICE real_t FNorm2(const T *data)
Compute the square of the Frobenius norm of the matrix.
MFEM_HOST_DEVICE real_t CalcSingularvalue(const real_t *data, const int i)
Return the i'th singular value of the matrix of size dim with given data.
MFEM_HOST_DEVICE void Symmetrize(const int size, T *data)
Symmetrize a square matrix with given size and data: A -> (A+A^T)/2.
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 2 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void BlockFactor(const real_t *data, int m, const int *ipiv, int n, real_t *A12, real_t *A21, real_t *A22)
MFEM_HOST_DEVICE void AddMultVWt(const real_t *v, const real_t *w, real_t *VWt)
Dense matrix operation: VWt += v w^t.
MFEM_HOST_DEVICE void Diag(const real_t c, real_t *data)
Creates n x n diagonal matrix with diagonal elements c.
MFEM_HOST_DEVICE bool LUFactor(real_t *A, const int m, int *ipiv, const real_t tol=0.0)
Compute the LU factorization of the m x m matrix A.
MFEM_HOST_DEVICE void Set(const int height, const int width, const real_t alpha, const TA *Adata, TB *Bdata)
Compute B = alpha*A, where the matrices A and B are of size height x width with data Adata and Bdata.
MFEM_HOST_DEVICE void CalcLeftInverse< 3, 1 >(const real_t *d, real_t *left_inv)
MFEM_HOST_DEVICE void CalcEigenvalues(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
MFEM_HOST_DEVICE real_t CalcSingularvalue< 3 >(const real_t *data, const int i)
Return the i'th singular value of the matrix of size 3 with given data.
MFEM_HOST_DEVICE void CalcEigenvalues< 3 >(const real_t *data, real_t *lambda, real_t *vec)
MFEM_HOST_DEVICE void SubMult(const int m, const int n, const int r, const real_t *A21, const real_t *X1, real_t *X2)
Given an (n x m) matrix A21, compute X2 <- X2 - A21 X1, for matrices X1, and X2 of size (m x r) and (...
MFEM_HOST_DEVICE void Subtract(const real_t a, const real_t *x, const real_t *y, real_t *z)
Vector subtraction operation: z = a * (x - y)
MFEM_HOST_DEVICE void LUSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A for a factored matrix (m x m),.
void Swap(Array< T > &, Array< T > &)
MFEM_HOST_DEVICE void TAssignHD(const A_layout_t &A_layout, A_data_t &A_data, const scalar_t value)
MFEM_HOST_DEVICE scalar_t TAdjDetHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)
MFEM_HOST_DEVICE scalar_t TDetHD(const layout_t &a, const data_t &A)
std::function< real_t(const Vector &)> f(real_t mass_coeff)
MFEM_HOST_DEVICE void TAdjugateHD(const A_layout_t &a, const A_data_t &A, const B_layout_t &b, B_data_t &B)