35#if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP)
36 gko::version_info gko_version = gko::version_info::get();
37 bool gko_with_omp_support = (strcmp(gko_version.omp_version.tag,
38 "not compiled") != 0);
44 executor = gko::ReferenceExecutor::create();
49 executor = gko::OmpExecutor::create();
54 if (gko::CudaExecutor::get_num_devices() > 0)
57 int current_device = 0;
58 MFEM_GPU_CHECK(cudaGetDevice(¤t_device));
59 if (gko_with_omp_support)
61 executor = gko::CudaExecutor::create(current_device,
62 gko::OmpExecutor::create());
66 executor = gko::CudaExecutor::create(current_device,
67 gko::ReferenceExecutor::create());
73 MFEM_ABORT(
"gko::CudaExecutor::get_num_devices() did not report "
74 "any valid devices.");
80 if (gko::HipExecutor::get_num_devices() > 0)
83 int current_device = 0;
84 MFEM_GPU_CHECK(hipGetDevice(¤t_device));
85 if (gko_with_omp_support)
87 executor = gko::HipExecutor::create(current_device,
88 gko::OmpExecutor::create());
92 executor = gko::HipExecutor::create(current_device,
93 gko::ReferenceExecutor::create());
99 MFEM_ABORT(
"gko::HipExecutor::get_num_devices() did not report "
100 "any valid devices.");
105 MFEM_ABORT(
"Invalid ExecType specified");
117 MFEM_WARNING(
"Parameter host_exec_type ignored for CPU GinkgoExecutor.");
118 executor = gko::ReferenceExecutor::create();
123 MFEM_WARNING(
"Parameter host_exec_type ignored for CPU GinkgoExecutor.");
124 executor = gko::OmpExecutor::create();
129 if (gko::CudaExecutor::get_num_devices() > 0)
132 int current_device = 0;
133 MFEM_GPU_CHECK(cudaGetDevice(¤t_device));
136 executor = gko::CudaExecutor::create(current_device,
137 gko::OmpExecutor::create());
141 executor = gko::CudaExecutor::create(current_device,
142 gko::ReferenceExecutor::create());
148 MFEM_ABORT(
"gko::CudaExecutor::get_num_devices() did not report "
149 "any valid devices.");
155 if (gko::HipExecutor::get_num_devices() > 0)
158 int current_device = 0;
159 MFEM_GPU_CHECK(hipGetDevice(¤t_device));
162 executor = gko::HipExecutor::create(current_device,
163 gko::OmpExecutor::create());
167 executor = gko::HipExecutor::create(current_device,
168 gko::ReferenceExecutor::create());
174 MFEM_ABORT(
"gko::HipExecutor::get_num_devices() did not report "
175 "any valid devices.");
180 MFEM_ABORT(
"Invalid ExecType specified");
187 gko::version_info gko_version = gko::version_info::get();
188 bool gko_with_omp_support = (strcmp(gko_version.omp_version.tag,
189 "not compiled") != 0);
192 if (gko::CudaExecutor::get_num_devices() > 0)
195 int current_device = 0;
196 MFEM_GPU_CHECK(cudaGetDevice(¤t_device));
197 if (gko_with_omp_support)
199 executor = gko::CudaExecutor::create(current_device,
200 gko::OmpExecutor::create());
204 executor = gko::CudaExecutor::create(current_device,
205 gko::ReferenceExecutor::create());
211 MFEM_ABORT(
"gko::CudaExecutor::get_num_devices() did not report "
212 "any valid devices.");
217 if (gko::HipExecutor::get_num_devices() > 0)
220 int current_device = 0;
221 MFEM_GPU_CHECK(hipGetDevice(¤t_device));
222 if (gko_with_omp_support)
224 executor = gko::HipExecutor::create(current_device,
225 gko::OmpExecutor::create());
229 executor = gko::HipExecutor::create(current_device,
230 gko::ReferenceExecutor::create());
236 MFEM_ABORT(
"gko::HipExecutor::get_num_devices() did not report "
237 "any valid devices.");
245 if (gko_with_omp_support)
247 executor = gko::OmpExecutor::create();
251 executor = gko::ReferenceExecutor::create();
256 executor = gko::ReferenceExecutor::create();
269 if (gko::CudaExecutor::get_num_devices() > 0)
272 int current_device = 0;
273 MFEM_GPU_CHECK(cudaGetDevice(¤t_device));
276 executor = gko::CudaExecutor::create(current_device,
277 gko::OmpExecutor::create());
281 executor = gko::CudaExecutor::create(current_device,
282 gko::ReferenceExecutor::create());
288 MFEM_ABORT(
"gko::CudaExecutor::get_num_devices() did not report "
289 "any valid devices.");
294 if (gko::HipExecutor::get_num_devices() > 0)
297 int current_device = 0;
298 MFEM_GPU_CHECK(hipGetDevice(¤t_device));
301 executor = gko::HipExecutor::create(current_device,
302 gko::OmpExecutor::create());
306 executor = gko::HipExecutor::create(current_device,
307 gko::ReferenceExecutor::create());
313 MFEM_ABORT(
"gko::HipExecutor::get_num_devices() did not report "
314 "any valid devices.");
319 MFEM_WARNING(
"Parameter host_exec_type ignored for CPU GinkgoExecutor.");
323 gko::version_info gko_version = gko::version_info::get();
324 bool gko_with_omp_support = (strcmp(gko_version.omp_version.tag,
325 "not compiled") != 0);
326 if (gko_with_omp_support)
328 executor = gko::OmpExecutor::create();
332 executor = gko::ReferenceExecutor::create();
337 executor = gko::ReferenceExecutor::create();
343 bool use_implicit_res_norm)
345 use_implicit_res_norm(use_implicit_res_norm)
362 using ResidualCriterionFactory = gko::stop::ResidualNorm<>;
363 using ImplicitResidualCriterionFactory = gko::stop::ImplicitResidualNorm<>;
368 .with_reduction_factor(sqrt(
rel_tol))
369 .with_baseline(gko::stop::mode::initial_resnorm)
372 .with_reduction_factor(sqrt(
abs_tol))
373 .with_baseline(gko::stop::mode::absolute)
376 gko::stop::Combined::build()
379 gko::stop::Iteration::build()
380 .with_max_iters(
static_cast<unsigned long>(
max_iter))
387 .with_reduction_factor(
rel_tol)
388 .with_baseline(gko::stop::mode::initial_resnorm)
391 .with_reduction_factor(
abs_tol)
392 .with_baseline(gko::stop::mode::absolute)
395 gko::stop::Combined::build()
398 gko::stop::Iteration::build()
399 .with_max_iters(
static_cast<unsigned long>(
max_iter))
406GinkgoIterativeSolver::initialize_ginkgo_log(gko::matrix::Dense<real_t>*
b)
412 gko::log::Logger::criterion_check_completed_mask);
414 system_oper.get(),
b);
430 const gko::LinOp *
beta,
439 if (
alpha->get_size()[0] > 1 ||
alpha->get_size()[1] > 1)
441 throw gko::BadDimension(
442 __FILE__, __LINE__, __func__,
"alpha",
alpha->get_size()[0],
443 alpha->get_size()[1],
444 "Expected an object of size [1 x 1] for scaling "
445 " in this operator's apply_impl");
447 if (
beta->get_size()[0] > 1 ||
beta->get_size()[1] > 1)
449 throw gko::BadDimension(
450 __FILE__, __LINE__, __func__,
"beta",
beta->get_size()[0],
452 "Expected an object of size [1 x 1] for scaling "
453 " in this operator's apply_impl");
458 if (
alpha->get_executor() ==
alpha->get_executor()->get_master())
461 alpha_f = gko::as<gko::matrix::Dense<real_t>>(
alpha)->at(0, 0);
466 this->get_executor()->get_master().get()->copy_from(
467 this->get_executor().get(),
468 1, gko::as<gko::matrix::Dense<real_t>>(
alpha)->get_const_values(),
471 if (
beta->get_executor() ==
beta->get_executor()->get_master())
474 beta_f = gko::as<gko::matrix::Dense<real_t>>(
beta)->at(0, 0);
479 this->get_executor()->get_master().get()->copy_from(
480 this->get_executor().get(),
481 1, gko::as<gko::matrix::Dense<real_t>>(
beta)->get_const_values(),
506 MFEM_VERIFY(system_oper,
"System matrix or operator not initialized");
507 MFEM_VERIFY(
executor,
"executor is not initialized");
509 "Mismatching sizes for rhs and solution");
511 using vec = gko::matrix::Dense<real_t>;
519 bool on_device =
false;
524 std::unique_ptr<vec> gko_x;
525 std::unique_ptr<vec> gko_y;
535 x.
Read(on_device))), 1);
543 gko_x = std::unique_ptr<vec>(
545 const_cast<Vector *
>(&x),
false));
546 gko_y = std::unique_ptr<vec>(
553 initialize_ginkgo_log(gko_x.get());
569 solver->apply(gko_x, gko_y);
575 real_t final_res_norm = 0.0;
588 auto imp_residual_norm_d =
589 gko::as<gko::matrix::Dense<real_t>>(imp_residual_norm);
590 auto imp_residual_norm_d_master =
591 gko::matrix::Dense<real_t>::create(
executor->get_master(),
593 imp_residual_norm_d_master->copy_from(imp_residual_norm_d);
595 final_res_norm = imp_residual_norm_d_master->at(0,0);
599 auto residual_norm_d =
600 gko::as<gko::matrix::Dense<real_t>>(residual_norm);
601 auto residual_norm_d_master =
602 gko::matrix::Dense<real_t>::create(
executor->get_master(),
604 residual_norm_d_master->copy_from(residual_norm_d);
606 final_res_norm = residual_norm_d_master->at(0,0);
626 " iterations with final residual norm "
627 << final_res_norm <<
'\n';
657 "System matrix is not square");
659 bool on_device =
false;
665 using mtx = gko::matrix::Csr<real_t, int>;
667 system_oper = mtx::create(
682 system_oper = std::shared_ptr<OperatorWrapper>(
698 using cg = gko::solver::Cg<real_t>;
707 using cg = gko::solver::Cg<real_t>;
713 .with_generated_preconditioner(
717 GetGeneratedPreconditioner().get()))
727 .with_preconditioner(preconditioner.
GetFactory())
737 using bicgstab = gko::solver::Bicgstab<real_t>;
747 using bicgstab = gko::solver::Bicgstab<real_t>;
752 .with_generated_preconditioner(
756 GetGeneratedPreconditioner().get()))
766 .with_preconditioner(preconditioner.
GetFactory())
776 using cgs = gko::solver::Cgs<real_t>;
785 using cgs = gko::solver::Cgs<real_t>;
790 .with_generated_preconditioner(
794 GetGeneratedPreconditioner().get()))
804 .with_preconditioner(preconditioner.
GetFactory())
814 using fcg = gko::solver::Fcg<real_t>;
823 using fcg = gko::solver::Fcg<real_t>;
828 .with_generated_preconditioner(
832 GetGeneratedPreconditioner().get()))
842 .with_preconditioner(preconditioner.
GetFactory())
853 using gmres = gko::solver::Gmres<real_t>;
863 .with_krylov_dim(
static_cast<unsigned long>(
m))
874 using gmres = gko::solver::Gmres<real_t>;
882 .with_generated_preconditioner(
886 GetGeneratedPreconditioner().get()))
896 .with_preconditioner(preconditioner.
GetFactory())
905 .with_krylov_dim(
static_cast<unsigned long>(
m))
907 .with_generated_preconditioner(
911 GetGeneratedPreconditioner().get()))
920 .with_krylov_dim(
static_cast<unsigned long>(
m))
922 .with_preconditioner(preconditioner.
GetFactory())
931 using gmres = gko::solver::Gmres<real_t>;
933 auto current_params = gko::as<gmres::Factory>(
solver_gen)->get_parameters();
934 this->
solver_gen = current_params.with_krylov_dim(
static_cast<unsigned long>(
m))
938 gko::as<gmres>(
solver)->set_krylov_dim(
static_cast<unsigned long>(
m));
944 storage_precision prec)
948 using gmres = gko::solver::CbGmres<real_t>;
953 .with_storage_precision(prec)
959 .with_krylov_dim(
static_cast<unsigned long>(
m))
961 .with_storage_precision(prec)
968 int dim, storage_precision prec)
972 using gmres = gko::solver::CbGmres<real_t>;
980 .with_storage_precision(prec)
981 .with_generated_preconditioner(
985 GetGeneratedPreconditioner().get()))
995 .with_storage_precision(prec)
996 .with_preconditioner(preconditioner.
GetFactory())
1005 .with_krylov_dim(
static_cast<unsigned long>(
m))
1007 .with_storage_precision(prec)
1008 .with_generated_preconditioner(
1010 .on(this->executor);
1012 GetGeneratedPreconditioner().get()))
1021 .with_krylov_dim(
static_cast<unsigned long>(
m))
1023 .with_storage_precision(prec)
1024 .with_preconditioner(preconditioner.
GetFactory())
1025 .on(this->executor);
1033 using gmres = gko::solver::CbGmres<real_t>;
1035 auto current_params = gko::as<gmres::Factory>(
solver_gen)->get_parameters();
1036 this->
solver_gen = current_params.with_krylov_dim(
static_cast<unsigned long>(
m))
1037 .on(this->executor);
1040 gko::as<gmres>(
solver)->set_krylov_dim(
static_cast<unsigned long>(
m));
1048 using ir = gko::solver::Ir<real_t>;
1057 using ir = gko::solver::Ir<real_t>;
1061 .on(this->executor);
1084 MFEM_VERIFY(
executor,
"executor is not initialized");
1086 using vec = gko::matrix::Dense<real_t>;
1094 bool on_device =
false;
1099 auto gko_x = vec::create(
executor, gko::dim<2>(x.
Size(), 1),
1102 x.
Read(on_device))), 1);
1103 auto gko_y = vec::create(
executor, gko::dim<2>(y.
Size(), 1),
1122 MFEM_VERIFY(op_mat != NULL,
1123 "GinkgoPreconditioner::SetOperator : not a SparseMatrix!");
1125 bool on_device =
false;
1131 using mtx = gko::matrix::Csr<real_t, int>;
1133 auto gko_matrix = mtx::create(
1156 const std::string &storage_opt,
1158 const int max_block_size
1163 if (storage_opt ==
"auto")
1165 precond_gen = gko::preconditioner::Jacobi<real_t, int>::build()
1166 .with_storage_optimization(
1167 gko::precision_reduction::autodetect())
1168 .with_accuracy(accuracy)
1169 .with_max_block_size(
static_cast<unsigned int>(max_block_size))
1174 precond_gen = gko::preconditioner::Jacobi<real_t, int>::build()
1175 .with_storage_optimization(
1176 gko::precision_reduction(0, 0))
1177 .with_accuracy(accuracy)
1178 .with_max_block_size(
static_cast<unsigned int>(max_block_size))
1187 const std::string &factorization_type,
1189 const bool skip_sort
1193 if (factorization_type ==
"exact")
1195 using ilu_fact_type = gko::factorization::Ilu<real_t, int>;
1196 std::shared_ptr<ilu_fact_type::Factory> fact_factory =
1197 ilu_fact_type::build()
1198 .with_skip_sorting(skip_sort)
1201 .with_factorization(fact_factory)
1206 using ilu_fact_type = gko::factorization::ParIlu<real_t, int>;
1207 std::shared_ptr<ilu_fact_type::Factory> fact_factory =
1208 ilu_fact_type::build()
1209 .with_iterations(
static_cast<unsigned long>(sweeps))
1210 .with_skip_sorting(skip_sort)
1213 .with_factorization(fact_factory)
1221 const std::string &factorization_type,
1223 const int sparsity_power,
1224 const bool skip_sort
1228 using l_solver_type = gko::preconditioner::LowerIsai<>;
1229 using u_solver_type = gko::preconditioner::UpperIsai<>;
1231 std::shared_ptr<l_solver_type::Factory> l_solver_factory =
1232 l_solver_type::build()
1233 .with_sparsity_power(sparsity_power)
1235 std::shared_ptr<u_solver_type::Factory> u_solver_factory =
1236 u_solver_type::build()
1237 .with_sparsity_power(sparsity_power)
1242 if (factorization_type ==
"exact")
1244 using ilu_fact_type = gko::factorization::Ilu<real_t, int>;
1245 std::shared_ptr<ilu_fact_type::Factory> fact_factory =
1246 ilu_fact_type::build()
1247 .with_skip_sorting(skip_sort)
1249 precond_gen = gko::preconditioner::Ilu<l_solver_type,
1250 u_solver_type>::build()
1251 .with_factorization(fact_factory)
1252 .with_l_solver(l_solver_factory)
1253 .with_u_solver(u_solver_factory)
1259 using ilu_fact_type = gko::factorization::ParIlu<real_t, int>;
1260 std::shared_ptr<ilu_fact_type::Factory> fact_factory =
1261 ilu_fact_type::build()
1262 .with_iterations(
static_cast<unsigned long>(sweeps))
1263 .with_skip_sorting(skip_sort)
1265 precond_gen = gko::preconditioner::Ilu<l_solver_type,
1266 u_solver_type>::build()
1267 .with_factorization(fact_factory)
1268 .with_l_solver(l_solver_factory)
1269 .with_u_solver(u_solver_factory)
1278 const std::string &factorization_type,
1280 const bool skip_sort
1285 if (factorization_type ==
"exact")
1287 using ic_fact_type = gko::factorization::Ic<real_t, int>;
1288 std::shared_ptr<ic_fact_type::Factory> fact_factory =
1289 ic_fact_type::build()
1290 .with_both_factors(
false)
1291 .with_skip_sorting(skip_sort)
1294 .with_factorization(fact_factory)
1299 using ic_fact_type = gko::factorization::ParIc<real_t, int>;
1300 std::shared_ptr<ic_fact_type::Factory> fact_factory =
1301 ic_fact_type::build()
1302 .with_both_factors(
false)
1303 .with_iterations(
static_cast<unsigned long>(sweeps))
1304 .with_skip_sorting(skip_sort)
1307 .with_factorization(fact_factory)
1314 const std::string &factorization_type,
1316 const int sparsity_power,
1317 const bool skip_sort
1322 using l_solver_type = gko::preconditioner::LowerIsai<>;
1323 std::shared_ptr<l_solver_type::Factory> l_solver_factory =
1324 l_solver_type::build()
1325 .with_sparsity_power(sparsity_power)
1327 if (factorization_type ==
"exact")
1329 using ic_fact_type = gko::factorization::Ic<real_t, int>;
1330 std::shared_ptr<ic_fact_type::Factory> fact_factory =
1331 ic_fact_type::build()
1332 .with_both_factors(
false)
1333 .with_skip_sorting(skip_sort)
1335 precond_gen = gko::preconditioner::Ic<l_solver_type>::build()
1336 .with_factorization(fact_factory)
1337 .with_l_solver(l_solver_factory)
1342 using ic_fact_type = gko::factorization::ParIc<real_t, int>;
1343 std::shared_ptr<ic_fact_type::Factory> fact_factory =
1344 ic_fact_type::build()
1345 .with_both_factors(
false)
1346 .with_iterations(
static_cast<unsigned long>(sweeps))
1347 .with_skip_sorting(skip_sort)
1349 precond_gen = gko::preconditioner::Ic<l_solver_type>::build()
1350 .with_factorization(fact_factory)
1351 .with_l_solver(l_solver_factory)
1359 const Solver &mfem_precond
1365 mfem_precond.
Height(), &mfem_precond));
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
BICGSTABSolver(GinkgoExecutor &exec)
CBGMRESSolver(GinkgoExecutor &exec, int dim=0, storage_precision prec=storage_precision::reduce1)
CGSSolver(GinkgoExecutor &exec)
FCGSolver(GinkgoExecutor &exec)
std::shared_ptr< gko::Executor > GetExecutor() const
GinkgoExecutor(ExecType exec_type)
@ REFERENCE
Reference CPU Executor.
@ OMP
OpenMP CPU Executor.
void SetOperator(const Operator &op) override
bool sub_op_needs_wrapped_vecs
bool UsesVectorWrappers() const
std::shared_ptr< gko::stop::ImplicitResidualNorm<>::Factory > imp_abs_criterion
void Mult(const Vector &x, Vector &y) const override
std::shared_ptr< ResidualLogger<> > residual_logger
void update_stop_factory()
bool use_implicit_res_norm
std::shared_ptr< gko::stop::ResidualNorm<>::Factory > abs_criterion
std::shared_ptr< gko::stop::Combined::Factory > combined_factory
const std::shared_ptr< gko::LinOpFactory > GetFactory() const
std::shared_ptr< gko::log::Convergence<> > convergence_logger
GinkgoIterativeSolver(GinkgoExecutor &exec, bool use_implicit_res_norm)
std::shared_ptr< gko::LinOpFactory > solver_gen
std::shared_ptr< gko::stop::ResidualNorm<>::Factory > rel_criterion
std::shared_ptr< gko::stop::ImplicitResidualNorm<>::Factory > imp_rel_criterion
std::shared_ptr< gko::Executor > executor
std::shared_ptr< gko::LinOp > solver
void SetOperator(const Operator &op) override
std::shared_ptr< gko::Executor > executor
const std::shared_ptr< gko::LinOpFactory > GetFactory() const
void Mult(const Vector &x, Vector &y) const override
std::shared_ptr< gko::LinOp > generated_precond
bool HasGeneratedPreconditioner() const
bool has_generated_precond
GinkgoPreconditioner(GinkgoExecutor &exec)
std::shared_ptr< gko::LinOpFactory > precond_gen
const std::shared_ptr< gko::LinOp > GetGeneratedPreconditioner() const
IRSolver(GinkgoExecutor &exec)
IcIsaiPreconditioner(GinkgoExecutor &exec, const std::string &factorization_type="exact", const int sweeps=0, const int sparsity_power=1, const bool skip_sort=false)
IcPreconditioner(GinkgoExecutor &exec, const std::string &factorization_type="exact", const int sweeps=0, const bool skip_sort=false)
IluIsaiPreconditioner(GinkgoExecutor &exec, const std::string &factorization_type="exact", const int sweeps=0, const int sparsity_power=1, const bool skip_sort=false)
IluPreconditioner(GinkgoExecutor &exec, const std::string &factorization_type="exact", const int sweeps=0, const bool skip_sort=false)
JacobiPreconditioner(GinkgoExecutor &exec, const std::string &storage_opt="none", const real_t accuracy=1.e-1, const int max_block_size=32)
MFEMPreconditioner(GinkgoExecutor &exec, const Solver &mfem_precond)
void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override
Vector & get_mfem_vec_ref()
const Vector & get_mfem_vec_const_ref() const
int Capacity() const
Return the size of the allocated memory.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
int width
Dimension of the input / number of columns in the matrix.
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
int height
Dimension of the output / number of rows in the matrix.
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
bool iterative_mode
If true, use the second argument of Mult() as an initial guess.
int * ReadWriteI(bool on_dev=true)
real_t * ReadWriteData(bool on_dev=true)
int * ReadWriteJ(bool on_dev=true)
Memory< real_t > & GetMemoryData()
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Memory< real_t > & GetMemory()
Return a reference to the Memory object used by the Vector.
void Destroy()
Destroy a vector.
int Size() const
Returns the size of the vector.
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Vector & Add(const real_t a, const Vector &Va)
(*this) += a * Va
gko::array< T > gko_array
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
@ HIP_MASK
Biwise-OR of all HIP backends.
@ CUDA_MASK
Biwise-OR of all CUDA backends.
@ OMP_MASK
Biwise-OR of all OpenMP backends.