MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
native.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "../kernels.hpp"
13#include "native.hpp"
14#include "../dtensor.hpp"
16
17namespace mfem
18{
19
22 Op op) const
23{
24 const bool tr = (op == Op::T);
25
26 const int m = A.SizeI();
27 const int n = A.SizeJ();
28 const int n_mat = A.SizeK();
29 const int k = x.Size() / (tr ? m : n) / n_mat;
30
31 auto d_A = Reshape(A.Read(), m, n, n_mat);
32 auto d_x = Reshape(x.Read(), (tr ? m : n), k, n_mat);
33 auto d_y = Reshape(beta == 0.0 ? y.Write() : y.ReadWrite(),
34 (tr ? n : m), k, n_mat);
35
36 if (tr)
37 {
38 mfem::forall(n_mat, [=] MFEM_HOST_DEVICE (int i)
39 {
40 kernels::AddMultAtB(m, n, k, &d_A(0,0,i), &d_x(0,0,i), &d_y(0,0,i),
41 alpha, beta);
42 });
43 }
44 else
45 {
46 mfem::forall(n_mat, [=] MFEM_HOST_DEVICE (int i)
47 {
48 kernels::AddMult(m, k, n, &d_A(0,0,i), &d_x(0,0,i), &d_y(0,0,i),
49 alpha, beta);
50 });
51 }
52
53 // Alternative approach, threading also over the second index. Which one is
54 // better?
55
56 // mfem::forall(n_mat * k, [=] MFEM_HOST_DEVICE (int idx)
57 // {
58 // const int i = idx % k;
59 // const int j = idx / k;
60 // kernels::Mult(m, n, &d_A(0,0,j), &d_x(0,i,j), &d_y(0,i,j));
61 // });
62}
63
65{
66 const int m = A.SizeI();
67 const int NE = A.SizeK();
68 DenseTensor LU = A;
69 Array<int> P(m*NE);
70
71 LUFactor(LU, P);
72
73 auto data_all = Reshape(LU.Read(), m, m, NE);
74 auto piv_all = Reshape(P.Read(), m, NE);
75 auto inv_all = Reshape(A.Write(), m, m, NE);
76
77 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
78 {
79 // A^{-1} = U^{-1} L^{-1} P
80 // X <- U^{-1} (set only the upper triangular part of X)
81 real_t *X = &inv_all(0, 0, e);
82 real_t *x = X;
83 const real_t *data = &data_all(0, 0, e);
84 const int *ipiv = &piv_all(0, e);
85
86 for (int k = 0; k < m; k++)
87 {
88 const real_t minus_x_k = -(x[k] = 1.0 / data[k + k * m]);
89 for (int i = 0; i < k; i++)
90 {
91 x[i] = data[i + k * m] * minus_x_k;
92 }
93 for (int j = k - 1; j >= 0; j--)
94 {
95 const real_t x_j = (x[j] /= data[j + j * m]);
96 for (int i = 0; i < j; i++)
97 {
98 x[i] -= data[i + j * m] * x_j;
99 }
100 }
101 x += m;
102 }
103
104 // X <- X L^{-1} (use input only from the upper triangular part of X)
105 {
106 int k = m - 1;
107 for (int j = 0; j < k; j++)
108 {
109 const real_t minus_L_kj = -data[k + j * m];
110 for (int i = 0; i <= j; i++)
111 {
112 X[i + j * m] += X[i + k * m] * minus_L_kj;
113 }
114 for (int i = j + 1; i < m; i++)
115 {
116 X[i + j * m] = X[i + k * m] * minus_L_kj;
117 }
118 }
119 }
120 for (int k = m - 2; k >= 0; k--)
121 {
122 for (int j = 0; j < k; j++)
123 {
124 const real_t L_kj = data[k + j * m];
125 for (int i = 0; i < m; i++)
126 {
127 X[i + j * m] -= X[i + k * m] * L_kj;
128 }
129 }
130 }
131
132 // X <- X P
133 for (int k = m - 1; k >= 0; k--)
134 {
135 const int piv_k = ipiv[k];
136 if (k != piv_k)
137 {
138 for (int i = 0; i < m; i++)
139 {
140 kernels::internal::Swap(X[i + k * m], X[i + piv_k * m]);
141 }
142 }
143 }
144 });
145}
146
148{
149 const int m = A.SizeI();
150 const int NE = A.SizeK();
151 P.SetSize(m*NE);
152
153 auto data_all = Reshape(A.ReadWrite(), m, m, NE);
154 auto ipiv_all = Reshape(P.Write(), m, NE);
155 Array<bool> pivot_flag(1);
156 pivot_flag[0] = true;
157 bool *d_pivot_flag = pivot_flag.ReadWrite();
158
159 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
160 {
161 const bool flag = kernels::LUFactor(&data_all(0,0,e), m, &ipiv_all(0,e));
162 if (!flag) { d_pivot_flag[0] = false; }
163 });
164
165 MFEM_VERIFY(pivot_flag.HostRead()[0], "Batch LU factorization failed");
166}
167
169 Vector &x) const
170{
171 const int m = LU.SizeI();
172 const int n_mat = LU.SizeK();
173 const int n_rhs = x.Size() / m / n_mat;
174
175 auto d_LU = Reshape(LU.Read(), m, m, n_mat);
176 auto d_P = Reshape(P.Read(), m, n_mat);
177 auto d_x = Reshape(x.Write(), m, n_rhs, n_mat);
178
179 mfem::forall(n_mat * n_rhs, [=] MFEM_HOST_DEVICE (int idx)
180 {
181 const int i_rhs = idx % n_rhs;
182 const int i_mat = idx / n_rhs;
183
184 kernels::LUSolve(&d_LU(0,0,i_mat), m, &d_P(0,i_mat), &d_x(0,i_rhs,i_mat));
185 });
186}
187
188}
const T * HostRead() const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), false).
Definition array.hpp:341
T * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:353
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:758
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:345
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
Op
Operation type (transposed or not transposed)
Definition batched.hpp:54
Rank 3 tensor (array of matrices)
int SizeJ() const
const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read( GetMemory(), TotalSize(), on_dev).
real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(GetMemory(), TotalSize(), on_dev).
real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(GetMemory(), TotalSize(), on_dev).
int SizeI() const
int SizeK() const
void LUSolve(const DenseTensor &LU, const Array< int > &P, Vector &x) const override
See BatchedLinAlg::LUSolve.
Definition native.cpp:168
void AddMult(const DenseTensor &A, const Vector &x, Vector &y, real_t alpha, real_t beta, Op op) const override
See BatchedLinAlg::AddMult.
Definition native.cpp:20
void Invert(DenseTensor &A) const override
See BatchedLinAlg::Invert.
Definition native.cpp:64
void LUFactor(DenseTensor &A, Array< int > &P) const override
See BatchedLinAlg::LUFactor.
Definition native.cpp:147
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
int Size() const
Returns the size of the vector.
Definition vector.hpp:226
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:502
Vector beta
const real_t alpha
Definition ex15.cpp:369
MFEM_HOST_DEVICE void AddMultAtB(const int Aheight, const int Awidth, const int Bwidth, const TA *Adata, const TB *Bdata, TC *Cdata, const TB alpha, const TA beta)
Compute C = alpha*At*B + beta*C.
Definition kernels.hpp:411
MFEM_HOST_DEVICE void AddMult(const int Aheight, const int Awidth, const int Bwidth, const TB *Bdata, const TC *Cdata, TA *Adata, const TB alpha, const TA beta)
Matrix-matrix multiplication: A = alpha * B * C + beta * A, where the matrices A, B and C are of size...
Definition kernels.hpp:341
MFEM_HOST_DEVICE bool LUFactor(real_t *A, const int m, int *ipiv, const real_t tol=0.0)
Compute the LU factorization of the m x m matrix A.
Definition kernels.hpp:1825
MFEM_HOST_DEVICE void LUSolve(const real_t *data, const int m, const int *ipiv, real_t *x)
Assuming L.U = P.A for a factored matrix (m x m),.
Definition kernels.hpp:1745
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
float real_t
Definition config.hpp:43
void forall(int N, lambda &&body)
Definition forall.hpp:753