MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
solvers.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "linalg.hpp"
13#include "lapack.hpp"
15#include "../general/forall.hpp"
18#include <iostream>
19#include <iomanip>
20#include <algorithm>
21#include <cmath>
22#include <set>
23
24namespace mfem
25{
26
27using namespace std;
28
30 : Solver(0, true)
31{
32 oper = NULL;
33 prec = NULL;
34 max_iter = 10;
35 rel_tol = abs_tol = 0.0;
36#ifdef MFEM_USE_MPI
37 dot_prod_type = 0;
38#endif
39}
40
41#ifdef MFEM_USE_MPI
42
44 : Solver(0, true)
45{
46 oper = NULL;
47 prec = NULL;
48 max_iter = 10;
49 rel_tol = abs_tol = 0.0;
50 dot_prod_type = 1;
51 comm = comm_;
52}
53
54#endif // MFEM_USE_MPI
55
56real_t IterativeSolver::Dot(const Vector &x, const Vector &y) const
57{
58#ifndef MFEM_USE_MPI
59 return (x * y);
60#else
61 if (dot_prod_type == 0)
62 {
63 return (x * y);
64 }
65 else
66 {
67 return InnerProduct(comm, x, y);
68 }
69#endif
70}
71
73{
75 int print_level_ = print_lvl;
76
77#ifdef MFEM_USE_MPI
78 if (dot_prod_type != 0)
79 {
80 int rank;
81 MPI_Comm_rank(comm, &rank);
82 if (rank != 0) // Suppress output.
83 {
84 print_level_ = -1;
86 }
87 }
88#endif
89
90 print_level = print_level_;
91}
92
94{
95 print_options = options;
96
97 int derived_print_level = GuessLegacyPrintLevel(options);
98
99#ifdef MFEM_USE_MPI
100 if (dot_prod_type != 0)
101 {
102 int rank;
103 MPI_Comm_rank(comm, &rank);
104 if (rank != 0)
105 {
106 derived_print_level = -1;
108 }
109 }
110#endif
111
112 print_level = derived_print_level;
113}
114
116 int print_level_)
117{
118#ifdef MFEM_USE_MPI
119 int rank = 0;
120 if (comm != MPI_COMM_NULL)
121 {
122 MPI_Comm_rank(comm, &rank);
123 }
124#endif
125
126 switch (print_level_)
127 {
128 case -1:
129 return PrintLevel();
130 case 0:
131 return PrintLevel().Errors().Warnings();
132 case 1:
133 return PrintLevel().Errors().Warnings().Iterations();
134 case 2:
135 return PrintLevel().Errors().Warnings().Summary();
136 case 3:
137 return PrintLevel().Errors().Warnings().FirstAndLast();
138 default:
139#ifdef MFEM_USE_MPI
140 if (rank == 0)
141#endif
142 {
143 MFEM_WARNING("Unknown print level " << print_level_ <<
144 ". Defaulting to level 0.");
145 }
146 return PrintLevel().Errors().Warnings();
147 }
148}
149
151{
152 if (print_options_.iterations)
153 {
154 return 1;
155 }
156 else if (print_options_.first_and_last)
157 {
158 return 3;
159 }
160 else if (print_options_.summary)
161 {
162 return 2;
163 }
164 else if (print_options_.errors && print_options_.warnings)
165 {
166 return 0;
167 }
168 else
169 {
170 return -1;
171 }
172}
173
175{
176 prec = &pr;
177 prec->iterative_mode = false;
178}
179
181{
182 oper = &op;
183 height = op.Height();
184 width = op.Width();
185 if (prec)
186 {
188 }
189}
190
191bool IterativeSolver::Monitor(int it, real_t norm, const Vector& r,
192 const Vector& x, bool final) const
193{
194 if (monitor != nullptr)
195 {
196 if (it == 0 && !final)
197 {
198 monitor->Reset();
199 }
200 monitor->MonitorResidual(it, norm, r, final);
201 monitor->MonitorSolution(it, norm, x, final);
202 return monitor->HasConverged();
203 }
204 return false;
205}
206
208 : damping(dmpng),
209 ess_tdof_list(nullptr),
210 oper(nullptr),
211 allow_updates(true)
212{ }
213
215 const Array<int> &ess_tdofs,
216 const real_t dmpng)
217 :
218 Solver(a.FESpace()->GetTrueVSize()),
219 dinv(height),
220 damping(dmpng),
221 ess_tdof_list(&ess_tdofs),
222 residual(height),
223 allow_updates(false)
224{
225 Vector &diag(residual);
226 a.AssembleDiagonal(diag);
227 // 'a' cannot be used for iterative_mode == true because its size may be
228 // different.
229 oper = nullptr;
230 Setup(diag);
231}
232
234 const Array<int> &ess_tdofs,
235 const real_t dmpng)
236 :
237 Solver(d.Size()),
238 dinv(height),
239 damping(dmpng),
240 ess_tdof_list(&ess_tdofs),
241 residual(height),
242 oper(NULL),
243 allow_updates(false)
244{
245 Setup(d);
246}
247
249{
250 if (!allow_updates)
251 {
252 // original behavior of this method
253 oper = &op; return;
254 }
255
256 // Treat (Par)BilinearForm objects as a special case since their
257 // AssembleDiagonal method returns the true-dof diagonal whereas the form
258 // itself may act as an ldof operator. This is for compatibility with the
259 // constructor that takes a BilinearForm parameter.
260 const BilinearForm *blf = dynamic_cast<const BilinearForm *>(&op);
261 if (blf)
262 {
263 // 'a' cannot be used for iterative_mode == true because its size may be
264 // different.
265 oper = nullptr;
266 height = width = blf->FESpace()->GetTrueVSize();
267 }
268 else
269 {
270 oper = &op;
271 height = op.Height();
272 width = op.Width();
273 MFEM_VERIFY(height == width, "not a square matrix!");
274 // ess_tdof_list is only used with BilinearForm
275 ess_tdof_list = nullptr;
276 }
277 dinv.SetSize(height);
278 residual.SetSize(height);
279 Vector &diag(residual);
280 op.AssembleDiagonal(diag);
281 Setup(diag);
282}
283
285{
286 residual.UseDevice(true);
287 const real_t delta = damping;
288 auto D = diag.Read();
289 auto DI = dinv.Write();
290 const bool use_abs_diag_ = use_abs_diag;
291 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
292 {
293 if (D[i] == 0.0)
294 {
295 MFEM_ABORT_KERNEL("Zero diagonal entry in OperatorJacobiSmoother");
296 }
297 if (!use_abs_diag_) { DI[i] = delta / D[i]; }
298 else { DI[i] = delta / std::abs(D[i]); }
299 });
300 if (ess_tdof_list && ess_tdof_list->Size() > 0)
301 {
302 auto I = ess_tdof_list->Read();
303 mfem::forall(ess_tdof_list->Size(), [=] MFEM_HOST_DEVICE (int i)
304 {
305 DI[I[i]] = delta;
306 });
307 }
308}
309
311{
312 // For empty MPI ranks, height may be 0:
313 // MFEM_VERIFY(Height() > 0, "The diagonal hasn't been computed.");
314 MFEM_VERIFY(x.Size() == Width(), "invalid input vector");
315 MFEM_VERIFY(y.Size() == Height(), "invalid output vector");
316
317 if (iterative_mode)
318 {
319 MFEM_VERIFY(oper, "iterative_mode == true requires the forward operator");
320 oper->Mult(y, residual); // r = A y
321 subtract(x, residual, residual); // r = x - A y
322 }
323 else
324 {
325 residual = x;
326 y.UseDevice(true);
327 y = 0.0;
328 }
329 auto DI = dinv.Read();
330 auto R = residual.Read();
331 auto Y = y.ReadWrite();
332 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
333 {
334 Y[i] += DI[i] * R[i];
335 });
336}
337
339 const Vector &d,
340 const Array<int>& ess_tdofs,
341 int order_, real_t max_eig_estimate_)
342 :
343 Solver(d.Size()),
344 order(order_),
345 max_eig_estimate(max_eig_estimate_),
346 N(d.Size()),
347 dinv(N),
348 diag(d),
349 coeffs(order),
350 ess_tdof_list(ess_tdofs),
351 residual(N),
352 oper(&oper_) { Setup(); }
353
354#ifdef MFEM_USE_MPI
356 const Vector &d,
357 const Array<int>& ess_tdofs,
358 int order_, MPI_Comm comm,
359 int power_iterations,
360 real_t power_tolerance,
361 int power_seed)
362#else
364 const Vector &d,
365 const Array<int>& ess_tdofs,
366 int order_,
367 int power_iterations,
368 real_t power_tolerance,
369 int power_seed)
370#endif
371 : Solver(d.Size()),
372 order(order_),
373 N(d.Size()),
374 dinv(N),
375 diag(d),
376 coeffs(order),
377 ess_tdof_list(ess_tdofs),
378 residual(N),
379 oper(&oper_)
380{
381 OperatorJacobiSmoother invDiagOperator(diag, ess_tdofs, 1.0);
382 ProductOperator diagPrecond(&invDiagOperator, oper, false, false);
383
384#ifdef MFEM_USE_MPI
385 PowerMethod powerMethod(comm);
386#else
387 PowerMethod powerMethod;
388#endif
389 Vector ev(oper->Width());
390 MFEM_VERIFY(power_seed != 0, "invalid seed!");
391 max_eig_estimate = powerMethod.EstimateLargestEigenvalue(diagPrecond, ev,
392 power_iterations,
393 power_tolerance,
394 power_seed);
395
396 Setup();
397}
398
400 const Vector &d,
401 const Array<int>& ess_tdofs,
402 int order_, real_t max_eig_estimate_)
403 : OperatorChebyshevSmoother(*oper_, d, ess_tdofs, order_, max_eig_estimate_) { }
404
405#ifdef MFEM_USE_MPI
407 const Vector &d,
408 const Array<int>& ess_tdofs,
409 int order_, MPI_Comm comm, int power_iterations, real_t power_tolerance)
410 : OperatorChebyshevSmoother(*oper_, d, ess_tdofs, order_, comm,
411 power_iterations, power_tolerance) { }
412#else
414 const Vector &d,
415 const Array<int>& ess_tdofs,
416 int order_, int power_iterations, real_t power_tolerance)
417 : OperatorChebyshevSmoother(*oper_, d, ess_tdofs, order_, power_iterations,
418 power_tolerance) { }
419#endif
420
422{
423 // Invert diagonal
424 residual.UseDevice(true);
425 helperVector.UseDevice(true);
426 auto D = diag.Read();
427 auto X = dinv.Write();
428 mfem::forall(N, [=] MFEM_HOST_DEVICE (int i) { X[i] = 1.0 / D[i]; });
429 auto I = ess_tdof_list.Read();
430 mfem::forall(ess_tdof_list.Size(), [=] MFEM_HOST_DEVICE (int i)
431 {
432 X[I[i]] = 1.0;
433 });
434
435 // Set up Chebyshev coefficients
436 // For reference, see e.g., Parallel multigrid smoothing: polynomial versus
437 // Gauss-Seidel by Adams et al.
438 real_t upper_bound = 1.2 * max_eig_estimate;
439 real_t lower_bound = 0.3 * max_eig_estimate;
440 real_t theta = 0.5 * (upper_bound + lower_bound);
441 real_t delta = 0.5 * (upper_bound - lower_bound);
442
443 switch (order-1)
444 {
445 case 0:
446 {
447 coeffs[0] = 1.0 / theta;
448 break;
449 }
450 case 1:
451 {
452 real_t tmp_0 = 1.0/(pow(delta, 2) - 2*pow(theta, 2));
453 coeffs[0] = -4*theta*tmp_0;
454 coeffs[1] = 2*tmp_0;
455 break;
456 }
457 case 2:
458 {
459 real_t tmp_0 = 3*pow(delta, 2);
460 real_t tmp_1 = pow(theta, 2);
461 real_t tmp_2 = 1.0/(-4*pow(theta, 3) + theta*tmp_0);
462 coeffs[0] = tmp_2*(tmp_0 - 12*tmp_1);
463 coeffs[1] = 12/(tmp_0 - 4*tmp_1);
464 coeffs[2] = -4*tmp_2;
465 break;
466 }
467 case 3:
468 {
469 real_t tmp_0 = pow(delta, 2);
470 real_t tmp_1 = pow(theta, 2);
471 real_t tmp_2 = 8*tmp_0;
472 real_t tmp_3 = 1.0/(pow(delta, 4) + 8*pow(theta, 4) - tmp_1*tmp_2);
473 coeffs[0] = tmp_3*(32*pow(theta, 3) - 16*theta*tmp_0);
474 coeffs[1] = tmp_3*(-48*tmp_1 + tmp_2);
475 coeffs[2] = 32*theta*tmp_3;
476 coeffs[3] = -8*tmp_3;
477 break;
478 }
479 case 4:
480 {
481 real_t tmp_0 = 5*pow(delta, 4);
482 real_t tmp_1 = pow(theta, 4);
483 real_t tmp_2 = pow(theta, 2);
484 real_t tmp_3 = pow(delta, 2);
485 real_t tmp_4 = 60*tmp_3;
486 real_t tmp_5 = 20*tmp_3;
487 real_t tmp_6 = 1.0/(16*pow(theta, 5) - pow(theta, 3)*tmp_5 + theta*tmp_0);
488 real_t tmp_7 = 160*tmp_2;
489 real_t tmp_8 = 1.0/(tmp_0 + 16*tmp_1 - tmp_2*tmp_5);
490 coeffs[0] = tmp_6*(tmp_0 + 80*tmp_1 - tmp_2*tmp_4);
491 coeffs[1] = tmp_8*(tmp_4 - tmp_7);
492 coeffs[2] = tmp_6*(-tmp_5 + tmp_7);
493 coeffs[3] = -80*tmp_8;
494 coeffs[4] = 16*tmp_6;
495 break;
496 }
497 default:
498 MFEM_ABORT("Chebyshev smoother not implemented for order = " << order);
499 }
500}
501
503{
504 if (iterative_mode)
505 {
506 MFEM_ABORT("Chebyshev smoother not implemented for iterative mode");
507 }
508
509 if (!oper)
510 {
511 MFEM_ABORT("Chebyshev smoother requires operator");
512 }
513
514 residual = x;
515 helperVector.SetSize(x.Size());
516 helperVector.UseDevice(true);
517
518 y.UseDevice(true);
519 y = 0.0;
520
521 for (int k = 0; k < order; ++k)
522 {
523 // Apply
524 if (k > 0)
525 {
526 oper->Mult(residual, helperVector);
527 residual = helperVector;
528 }
529
530 // Scale residual by inverse diagonal
531 const int n = N;
532 auto Dinv = dinv.Read();
533 auto R = residual.ReadWrite();
534 mfem::forall(n, [=] MFEM_HOST_DEVICE (int i) { R[i] *= Dinv[i]; });
535
536 // Add weighted contribution to y
537 auto Y = y.ReadWrite();
538 auto C = coeffs.Read();
539 mfem::forall(n, [=] MFEM_HOST_DEVICE (int i) { Y[i] += C[k] * R[i]; });
540 }
541}
542
544{
545 r.SetSize(width);
546 r.UseDevice(true);
547
548 z.SetSize(width);
549 z.UseDevice(true);
550}
551
552void SLISolver::Mult(const Vector &b, Vector &x) const
553{
554 const bool zero_b = (b.Size() == 0);
555 int i;
556
557 MFEM_VERIFY(!zero_b || iterative_mode || width == 0,
558 "empty 'b' can be used only in iterative mode!");
559
560 // Optimized preconditioned SLI with fixed number of iterations and given
561 // initial guess
562 if (rel_tol == 0.0 && iterative_mode && prec)
563 {
564 for (i = 0; i < max_iter; i++)
565 {
566 oper->Mult(x, r); // r = A x
567 if (!zero_b) { subtract(b, r, r); } // r = b - A x
568 prec->Mult(r, z); // z = B r
569 if (!zero_b) { x += z; } // x = x + B (b - A x)
570 else { x -= z; } // x = x - B (A x)
571 }
572 converged = true;
573 final_iter = i;
574 return;
575 }
576
577 // Optimized preconditioned SLI with fixed number of iterations and zero
578 // initial guess
579 if (rel_tol == 0.0 && !iterative_mode && prec)
580 {
581 prec->Mult(b, x); // x = B b (initial guess 0)
582 for (i = 1; i < max_iter; i++)
583 {
584 oper->Mult(x, r); // r = A x
585 subtract(b, r, r); // r = b - A x
586 prec->Mult(r, z); // z = B r
587 x += z; // x = x + B (b - A x)
588 }
589 converged = true;
590 final_iter = i;
591 return;
592 }
593
594 // General version of SLI with a relative tolerance, optional preconditioner
595 // and optional initial guess
596 real_t r0, nom, nom0, nomold = 1, cf;
597
598 if (iterative_mode)
599 {
600 oper->Mult(x, r);
601 if (!zero_b) { subtract(b, r, r); } // r = b - A x
602 }
603 else
604 {
605 r = b;
606 x = 0.0;
607 }
608
609 if (prec)
610 {
611 prec->Mult(r, z); // z = B r
612 nom0 = nom = sqrt(Dot(z, z));
613 }
614 else
615 {
616 nom0 = nom = sqrt(Dot(r, r));
617 }
618 initial_norm = nom0;
619
621 {
622 mfem::out << " Iteration : " << setw(3) << right << 0 << " ||Br|| = "
623 << nom << (print_options.first_and_last ? " ..." : "") << '\n';
624 }
625
626 r0 = std::max(nom*rel_tol, abs_tol);
627 if (nom <= r0)
628 {
629 converged = true;
630 final_iter = 0;
631 final_norm = nom;
632 return;
633 }
634
635 // start iteration
636 converged = false;
638 for (i = 1; true; )
639 {
640 if (prec)
641 {
642 if (!zero_b) { x += z; } // x = x + B (b - A x)
643 else { x -= z; } // x = x - B (A x)
644 }
645 else
646 {
647 if (!zero_b) { x += r; } // x = x + (b - A x)
648 else { x -= r; } // x = x - (A x)
649 }
650
651 oper->Mult(x, r);
652 if (!zero_b) { subtract(b, r, r); } // r = b - A x
653
654 if (prec)
655 {
656 prec->Mult(r, z); // z = B r
657 nom = sqrt(Dot(z, z));
658 }
659 else
660 {
661 nom = sqrt(Dot(r, r));
662 }
663
664 cf = nom/nomold;
665 nomold = nom;
666
667 bool done = false;
668 if (nom < r0)
669 {
670 converged = true;
671 final_iter = i;
672 done = true;
673 }
674
675 if (++i > max_iter)
676 {
677 done = true;
678 }
679
681 {
682 mfem::out << " Iteration : " << setw(3) << right << (i-1)
683 << " ||Br|| = " << setw(11) << left << nom
684 << "\tConv. rate: " << cf << '\n';
685 }
686
687 if (done) { break; }
688 }
689
691 {
692 const auto rf = pow (nom/nom0, 1.0/final_iter);
693 mfem::out << "SLI: Number of iterations: " << final_iter << '\n'
694 << "Conv. rate: " << cf << '\n'
695 << "Average reduction factor: "<< rf << '\n';
696 }
698 {
699 mfem::out << "SLI: No convergence!" << '\n';
700 }
701
702 final_norm = nom;
703}
704
705void SLI(const Operator &A, const Vector &b, Vector &x,
706 int print_iter, int max_num_iter,
707 real_t RTOLERANCE, real_t ATOLERANCE)
708{
709 MFEM_PERF_FUNCTION;
710
711 SLISolver sli;
712 sli.SetPrintLevel(print_iter);
713 sli.SetMaxIter(max_num_iter);
714 sli.SetRelTol(sqrt(RTOLERANCE));
715 sli.SetAbsTol(sqrt(ATOLERANCE));
716 sli.SetOperator(A);
717 sli.Mult(b, x);
718}
719
720void SLI(const Operator &A, Solver &B, const Vector &b, Vector &x,
721 int print_iter, int max_num_iter,
722 real_t RTOLERANCE, real_t ATOLERANCE)
723{
724 MFEM_PERF_FUNCTION;
725
726 SLISolver sli;
727 sli.SetPrintLevel(print_iter);
728 sli.SetMaxIter(max_num_iter);
729 sli.SetRelTol(sqrt(RTOLERANCE));
730 sli.SetAbsTol(sqrt(ATOLERANCE));
731 sli.SetOperator(A);
732 sli.SetPreconditioner(B);
733 sli.Mult(b, x);
734}
735
736
738{
740
741 r.SetSize(width, mt);
742 r.UseDevice(true);
743
744 d.SetSize(width, mt);
745 d.UseDevice(true);
746
747 z.SetSize(width, mt);
748 z.UseDevice(true);
749}
750
751void CGSolver::Mult(const Vector &b, Vector &x) const
752{
753 int i;
754 real_t r0, den, nom, nom0, betanom, alpha, beta;
755
756 x.UseDevice(true);
757 if (iterative_mode)
758 {
759 oper->Mult(x, r);
760 subtract(b, r, r); // r = b - A x
761 }
762 else
763 {
764 r = b;
765 x = 0.0;
766 }
767
768 if (prec)
769 {
770 prec->Mult(r, z); // z = B r
771 d = z;
772 }
773 else
774 {
775 d = r;
776 }
777 nom0 = nom = Dot(d, r);
778 if (nom0 >= 0.0) { initial_norm = sqrt(nom0); }
779 MFEM_VERIFY(IsFinite(nom), "nom = " << nom);
781 {
782 mfem::out << " Iteration : " << setw(3) << 0 << " (B r, r) = "
783 << nom << (print_options.first_and_last ? " ...\n" : "\n");
784 }
785
786 if (nom < 0.0)
787 {
789 {
790 mfem::out << "PCG: The preconditioner is not positive definite. (Br, r) = "
791 << nom << '\n';
792 }
793 converged = false;
794 final_iter = 0;
795 initial_norm = nom;
796 final_norm = nom;
797
798 Monitor(0, nom, r, x, true);
799 return;
800 }
801 r0 = std::max(nom*rel_tol*rel_tol, abs_tol*abs_tol);
802 if (Monitor(0, nom, r, x) || nom <= r0)
803 {
804 converged = true;
805 final_iter = 0;
806 final_norm = sqrt(nom);
807
808 Monitor(0, nom, r, x, true);
809 return;
810 }
811
812 oper->Mult(d, z); // z = A d
813 den = Dot(z, d);
814 MFEM_VERIFY(IsFinite(den), "den = " << den);
815 if (den <= 0.0)
816 {
817 if (Dot(d, d) > 0.0 && print_options.warnings)
818 {
819 mfem::out << "PCG: The operator is not positive definite. (Ad, d) = "
820 << den << '\n';
821 }
822 if (den == 0.0)
823 {
824 converged = false;
825 final_iter = 0;
826 final_norm = sqrt(nom);
827
828 Monitor(0, nom, r, x, true);
829 return;
830 }
831 }
832
833 // start iteration
834 converged = false;
836 for (i = 1; true; )
837 {
838 alpha = nom/den;
839 add(x, alpha, d, x); // x = x + alpha d
840 add(r, -alpha, z, r); // r = r - alpha A d
841
842 if (prec)
843 {
844 prec->Mult(r, z); // z = B r
845 betanom = Dot(r, z);
846 }
847 else
848 {
849 betanom = Dot(r, r);
850 }
851 MFEM_VERIFY(IsFinite(betanom), "betanom = " << betanom);
852 if (betanom < 0.0)
853 {
855 {
856 mfem::out << "PCG: The preconditioner is not positive definite. (Br, r) = "
857 << betanom << '\n';
858 }
859 converged = false;
860 final_iter = i;
861 break;
862 }
863
865 {
866 mfem::out << " Iteration : " << setw(3) << i << " (B r, r) = "
867 << betanom << std::endl;
868 }
869
870 if (Monitor(i, betanom, r, x) || betanom <= r0)
871 {
872 converged = true;
873 final_iter = i;
874 break;
875 }
876
877 if (++i > max_iter)
878 {
879 break;
880 }
881
882 beta = betanom/nom;
883 if (prec)
884 {
885 add(z, beta, d, d); // d = z + beta d
886 }
887 else
888 {
889 add(r, beta, d, d);
890 }
891 oper->Mult(d, z); // z = A d
892 den = Dot(d, z);
893 MFEM_VERIFY(IsFinite(den), "den = " << den);
894 if (den <= 0.0)
895 {
896 if (Dot(d, d) > 0.0 && print_options.warnings)
897 {
898 mfem::out << "PCG: The operator is not positive definite. (Ad, d) = "
899 << den << '\n';
900 }
901 if (den == 0.0)
902 {
903 final_iter = i;
904 break;
905 }
906 }
907 nom = betanom;
908 }
910 {
911 mfem::out << " Iteration : " << setw(3) << final_iter << " (B r, r) = "
912 << betanom << '\n';
913 }
915 {
916 mfem::out << "PCG: Number of iterations: " << final_iter << '\n';
917 }
920 {
921 const auto arf = pow (betanom/nom0, 0.5/final_iter);
922 mfem::out << "Average reduction factor = " << arf << '\n';
923 }
925 {
926 mfem::out << "PCG: No convergence!" << '\n';
927 }
928
929 final_norm = sqrt(betanom);
930
931 Monitor(final_iter, final_norm, r, x, true);
932}
933
934void CG(const Operator &A, const Vector &b, Vector &x,
935 int print_iter, int max_num_iter,
936 real_t RTOLERANCE, real_t ATOLERANCE)
937{
938 MFEM_PERF_FUNCTION;
939
940 CGSolver cg;
941 cg.SetPrintLevel(print_iter);
942 cg.SetMaxIter(max_num_iter);
943 cg.SetRelTol(sqrt(RTOLERANCE));
944 cg.SetAbsTol(sqrt(ATOLERANCE));
945 cg.SetOperator(A);
946 cg.Mult(b, x);
947}
948
949void PCG(const Operator &A, Solver &B, const Vector &b, Vector &x,
950 int print_iter, int max_num_iter,
951 real_t RTOLERANCE, real_t ATOLERANCE)
952{
953 MFEM_PERF_FUNCTION;
954
955 CGSolver pcg;
956 pcg.SetPrintLevel(print_iter);
957 pcg.SetMaxIter(max_num_iter);
958 pcg.SetRelTol(sqrt(RTOLERANCE));
959 pcg.SetAbsTol(sqrt(ATOLERANCE));
960 pcg.SetOperator(A);
961 pcg.SetPreconditioner(B);
962 pcg.Mult(b, x);
963}
964
965
967 real_t &cs, real_t &sn)
968{
969 if (dy == 0.0)
970 {
971 cs = 1.0;
972 sn = 0.0;
973 }
974 else if (fabs(dy) > fabs(dx))
975 {
976 real_t temp = dx / dy;
977 sn = 1.0 / sqrt( 1.0 + temp*temp );
978 cs = temp * sn;
979 }
980 else
981 {
982 real_t temp = dy / dx;
983 cs = 1.0 / sqrt( 1.0 + temp*temp );
984 sn = temp * cs;
985 }
986}
987
988inline void ApplyPlaneRotation(real_t &dx, real_t &dy, real_t &cs, real_t &sn)
989{
990 real_t temp = cs * dx + sn * dy;
991 dy = -sn * dx + cs * dy;
992 dx = temp;
993}
994
995inline void Update(Vector &x, int k, DenseMatrix &h, Vector &s,
997{
998 Vector y(s);
999
1000 // Backsolve:
1001 for (int i = k; i >= 0; i--)
1002 {
1003 y(i) /= h(i,i);
1004 for (int j = i - 1; j >= 0; j--)
1005 {
1006 y(j) -= h(j,i) * y(i);
1007 }
1008 }
1009
1010 for (int j = 0; j <= k; j++)
1011 {
1012 x.Add(y(j), *v[j]);
1013 }
1014}
1015
1016void GMRESSolver::Mult(const Vector &b, Vector &x) const
1017{
1018 // Generalized Minimum Residual method following the algorithm
1019 // on p. 20 of the SIAM Templates book.
1020
1021 int n = width;
1022
1023 DenseMatrix H(m+1, m);
1024 Vector s(m+1), cs(m+1), sn(m+1);
1025 Vector r(n), w(n);
1027
1028 b.UseDevice(true);
1029 x.UseDevice(true);
1030 r.UseDevice(true);
1031 w.UseDevice(true);
1032
1033 int i, j, k;
1034
1035 if (iterative_mode)
1036 {
1037 oper->Mult(x, r);
1038 }
1039 else
1040 {
1041 x = 0.0;
1042 }
1043
1044 if (prec)
1045 {
1046 if (iterative_mode)
1047 {
1048 subtract(b, r, w);
1049 prec->Mult(w, r); // r = M (b - A x)
1050 }
1051 else
1052 {
1053 prec->Mult(b, r);
1054 }
1055 }
1056 else
1057 {
1058 if (iterative_mode)
1059 {
1060 subtract(b, r, r);
1061 }
1062 else
1063 {
1064 r = b;
1065 }
1066 }
1067 real_t beta = initial_norm = Norm(r); // beta = ||r||
1068 MFEM_VERIFY(IsFinite(beta), "beta = " << beta);
1069
1070 final_norm = std::max(rel_tol*beta, abs_tol);
1071
1072 if (Monitor(0, beta, r, x) || beta <= final_norm)
1073 {
1074 final_norm = beta;
1075 final_iter = 0;
1076 converged = true;
1077 j = 0;
1078 goto finish;
1079 }
1080
1082 {
1083 mfem::out << " Pass : " << setw(2) << 1
1084 << " Iteration : " << setw(3) << 0
1085 << " ||B r|| = " << beta
1086 << (print_options.first_and_last ? " ...\n" : "\n");
1087 }
1088
1089 v.SetSize(m+1, NULL);
1090
1091 for (j = 1; j <= max_iter; )
1092 {
1093 if (v[0] == NULL)
1094 {
1095 v[0] = new Vector(n);
1096 v[0]->UseDevice(true);
1097 }
1098 v[0]->Set(1.0/beta, r);
1099 s = 0.0; s(0) = beta;
1100
1101 for (i = 0; i < m && j <= max_iter; i++, j++)
1102 {
1103 if (prec)
1104 {
1105 oper->Mult(*v[i], r);
1106 prec->Mult(r, w); // w = M A v[i]
1107 }
1108 else
1109 {
1110 oper->Mult(*v[i], w);
1111 }
1112
1113 for (k = 0; k <= i; k++)
1114 {
1115 H(k,i) = Dot(w, *v[k]); // H(k,i) = w * v[k]
1116 w.Add(-H(k,i), *v[k]); // w -= H(k,i) * v[k]
1117 }
1118
1119 H(i+1,i) = Norm(w); // H(i+1,i) = ||w||
1120 MFEM_VERIFY(IsFinite(H(i+1,i)), "Norm(w) = " << H(i+1,i));
1121 if (v[i+1] == NULL) { v[i+1] = new Vector(n); }
1122 v[i+1]->Set(1.0/H(i+1,i), w); // v[i+1] = w / H(i+1,i)
1123
1124 for (k = 0; k < i; k++)
1125 {
1126 ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
1127 }
1128
1129 GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1130 ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1131 ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
1132
1133 const real_t resid = fabs(s(i+1));
1134 MFEM_VERIFY(IsFinite(resid), "resid = " << resid);
1135
1136 if (Monitor(j, resid, r, x) || resid <= final_norm)
1137 {
1138 Update(x, i, H, s, v);
1139 final_norm = resid;
1140 final_iter = j;
1141 converged = true;
1142 goto finish;
1143 }
1144
1146 {
1147 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1148 << " Iteration : " << setw(3) << j
1149 << " ||B r|| = " << resid << '\n';
1150 }
1151 }
1152
1153 if (print_options.iterations && j <= max_iter)
1154 {
1155 mfem::out << "Restarting..." << '\n';
1156 }
1157
1158 Update(x, i-1, H, s, v);
1159
1160 oper->Mult(x, r);
1161 if (prec)
1162 {
1163 subtract(b, r, w);
1164 prec->Mult(w, r); // r = M (b - A x)
1165 }
1166 else
1167 {
1168 subtract(b, r, r);
1169 }
1170 beta = Norm(r); // beta = ||r||
1171 MFEM_VERIFY(IsFinite(beta), "beta = " << beta);
1172 if (beta <= final_norm)
1173 {
1174 final_norm = beta;
1175 final_iter = j;
1176 converged = true;
1177 goto finish;
1178 }
1179 }
1180
1181 final_norm = beta;
1183 converged = false;
1184
1185finish:
1187 {
1188 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1189 << " Iteration : " << setw(3) << final_iter
1190 << " ||B r|| = " << final_norm << '\n';
1191 }
1193 {
1194 mfem::out << "GMRES: Number of iterations: " << final_iter << '\n';
1195 }
1197 {
1198 mfem::out << "GMRES: No convergence!\n";
1199 }
1200
1201 Monitor(final_iter, final_norm, r, x, true);
1202
1203 for (i = 0; i < v.Size(); i++)
1204 {
1205 delete v[i];
1206 }
1207}
1208
1209void FGMRESSolver::Mult(const Vector &b, Vector &x) const
1210{
1211 DenseMatrix H(m+1,m);
1212 Vector s(m+1), cs(m+1), sn(m+1);
1213 Vector r(b.Size());
1214
1215 b.UseDevice(true);
1216 x.UseDevice(true);
1217 r.UseDevice(true);
1218
1219 int i, j, k;
1220
1221 if (iterative_mode)
1222 {
1223 oper->Mult(x, r);
1224 subtract(b,r,r);
1225 }
1226 else
1227 {
1228 x = 0.;
1229 r = b;
1230 }
1231 real_t beta = initial_norm = Norm(r); // beta = ||r||
1232 MFEM_VERIFY(IsFinite(beta), "beta = " << beta);
1233
1234 final_norm = std::max(rel_tol*beta, abs_tol);
1235
1236 if (Monitor(0, beta, r, x) || beta <= final_norm)
1237 {
1238 converged = true;
1239 final_norm = beta;
1240 final_iter = 0;
1241
1242 Monitor(0, beta, r, x, true);
1243 return;
1244 }
1245
1246 // initialize the first pass
1247 converged = false;
1248
1250 {
1251 mfem::out << " Pass : " << setw(2) << 1
1252 << " Iteration : " << setw(3) << 0
1253 << " || r || = " << beta
1254 << (print_options.first_and_last ? " ...\n" : "\n");
1255 }
1256
1257 Array<Vector*> v(m+1);
1258 Array<Vector*> z(m+1);
1259 for (i= 0; i<=m; i++)
1260 {
1261 v[i] = NULL;
1262 z[i] = NULL;
1263 }
1264
1265 j = 1;
1266 while (j <= max_iter)
1267 {
1268 if (v[0] == NULL)
1269 {
1270 v[0] = new Vector(b.Size());
1271 v[0]->UseDevice(true);
1272 }
1273 (*v[0]) = 0.0;
1274 v[0] -> Add (1.0/beta, r); // v[0] = r / ||r||
1275 s = 0.0; s(0) = beta;
1276
1277 for (i = 0; i < m && j <= max_iter; i++, j++)
1278 {
1279
1280 if (z[i] == NULL)
1281 {
1282 z[i] = new Vector(b.Size());
1283 z[i]->UseDevice(true);
1284 }
1285 (*z[i]) = 0.0;
1286
1287 if (prec)
1288 {
1289 prec->Mult(*v[i], *z[i]);
1290 }
1291 else
1292 {
1293 (*z[i]) = (*v[i]);
1294 }
1295 oper->Mult(*z[i], r);
1296
1297 for (k = 0; k <= i; k++)
1298 {
1299 H(k,i) = Dot( r, *v[k]); // H(k,i) = r * v[k]
1300 r.Add(-H(k,i), (*v[k])); // r -= H(k,i) * v[k]
1301 }
1302
1303 H(i+1,i) = Norm(r); // H(i+1,i) = ||r||
1304 if (v[i+1] == NULL)
1305 {
1306 v[i+1] = new Vector(b.Size());
1307 v[i+1]->UseDevice(true);
1308 }
1309 (*v[i+1]) = 0.0;
1310 v[i+1] -> Add (1.0/H(i+1,i), r); // v[i+1] = r / H(i+1,i)
1311
1312 for (k = 0; k < i; k++)
1313 {
1314 ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
1315 }
1316
1317 GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1318 ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1319 ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
1320
1321 const real_t resid = fabs(s(i+1));
1322 MFEM_VERIFY(IsFinite(resid), "resid = " << resid);
1324 resid <= final_norm))
1325 {
1326 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1327 << " Iteration : " << setw(3) << j
1328 << " || r || = " << resid << endl;
1329 }
1330
1331 if (Monitor(j, resid, r, x, resid <= final_norm) || resid <= final_norm)
1332 {
1333 Update(x, i, H, s, z);
1334 final_norm = resid;
1335 final_iter = j;
1336 converged = true;
1337
1339 {
1340 mfem::out << "FGMRES: Number of iterations: " << final_iter << '\n';
1341 }
1342
1343 for (i= 0; i<=m; i++)
1344 {
1345 if (v[i]) { delete v[i]; }
1346 if (z[i]) { delete z[i]; }
1347 }
1348
1349 Monitor(j, resid, r, x, true);
1350 return;
1351 }
1352 }
1353
1355 {
1356 mfem::out << "Restarting..." << endl;
1357 }
1358
1359 Update(x, i-1, H, s, z);
1360
1361 oper->Mult(x, r);
1362 subtract(b,r,r);
1363 beta = Norm(r);
1364 MFEM_VERIFY(IsFinite(beta), "beta = " << beta);
1365 if (beta <= final_norm)
1366 {
1367 converged = true;
1368
1369 break;
1370 }
1371 }
1372
1373 // Clean buffers up
1374 for (i = 0; i <= m; i++)
1375 {
1376 if (v[i]) { delete v[i]; }
1377 if (z[i]) { delete z[i]; }
1378 }
1379
1380 final_norm = beta;
1382
1383 // Note: j is off by one when we arrive here
1385 {
1386 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1387 << " Iteration : " << setw(3) << j-1
1388 << " || r || = " << final_norm << endl;
1389 }
1391 {
1392 mfem::out << "FGMRES: Number of iterations: " << final_iter << '\n';
1393 }
1395 {
1396 mfem::out << "FGMRES: No convergence!\n";
1397 }
1398
1399 Monitor(final_iter, final_norm, r, x, true);
1400}
1401
1402
1403int GMRES(const Operator &A, Vector &x, const Vector &b, Solver &M,
1404 int &max_iter, int m, real_t &tol, real_t atol, int printit)
1405{
1406 MFEM_PERF_FUNCTION;
1407
1408 GMRESSolver gmres;
1409 gmres.SetPrintLevel(printit);
1410 gmres.SetMaxIter(max_iter);
1411 gmres.SetKDim(m);
1412 gmres.SetRelTol(sqrt(tol));
1413 gmres.SetAbsTol(sqrt(atol));
1414 gmres.SetOperator(A);
1415 gmres.SetPreconditioner(M);
1416 gmres.Mult(b, x);
1417 max_iter = gmres.GetNumIterations();
1418 tol = gmres.GetFinalNorm()*gmres.GetFinalNorm();
1419 return gmres.GetConverged();
1420}
1421
1422void GMRES(const Operator &A, Solver &B, const Vector &b, Vector &x,
1423 int print_iter, int max_num_iter, int m, real_t rtol, real_t atol)
1424{
1425 GMRES(A, x, b, B, max_num_iter, m, rtol, atol, print_iter);
1426}
1427
1428
1430{
1431 p.SetSize(width);
1432 p.UseDevice(true);
1433
1435 phat.UseDevice(true);
1436
1437 s.SetSize(width);
1438 s.UseDevice(true);
1439
1441 shat.UseDevice(true);
1442
1443 t.SetSize(width);
1444 t.UseDevice(true);
1445
1446 v.SetSize(width);
1447 v.UseDevice(true);
1448
1449 r.SetSize(width);
1450 r.UseDevice(true);
1451
1453 rtilde.UseDevice(true);
1454}
1455
1456void BiCGSTABSolver::Mult(const Vector &b, Vector &x) const
1457{
1458 // BiConjugate Gradient Stabilized method following the algorithm
1459 // on p. 27 of the SIAM Templates book.
1460
1461 int i;
1462 real_t resid, tol_goal;
1463 real_t rho_1, rho_2=1.0, alpha=1.0, beta, omega=1.0;
1464
1465 b.UseDevice(true);
1466 x.UseDevice(true);
1467
1468 if (iterative_mode)
1469 {
1470 oper->Mult(x, r);
1471 subtract(b, r, r); // r = b - A x
1472 }
1473 else
1474 {
1475 x = 0.0;
1476 r = b;
1477 }
1478 rtilde = r;
1479
1480 resid = initial_norm = Norm(r);
1481 MFEM_VERIFY(IsFinite(resid), "resid = " << resid);
1483 {
1484 mfem::out << " Iteration : " << setw(3) << 0
1485 << " ||r|| = " << resid << (print_options.first_and_last ? " ...\n" : "\n");
1486 }
1487
1488 tol_goal = std::max(resid*rel_tol, abs_tol);
1489
1490 if (Monitor(0, resid, r, x) || resid <= tol_goal)
1491 {
1492 final_norm = resid;
1493 final_iter = 0;
1494 converged = true;
1495
1496 Monitor(0, resid, r, x, true);
1497 return;
1498 }
1499
1500 for (i = 1; i <= max_iter; i++)
1501 {
1502 rho_1 = Dot(rtilde, r);
1503 if (rho_1 == 0)
1504 {
1506 {
1507 mfem::out << " Iteration : " << setw(3) << i
1508 << " ||r|| = " << resid << '\n';
1509 }
1510
1511 final_norm = resid;
1512 final_iter = i;
1513 converged = false;
1515 {
1516 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1517 }
1519 {
1520 mfem::out << "BiCGStab: No convergence!\n";
1521 }
1522
1523 Monitor(i, resid, r, x, true);
1524 return;
1525 }
1526 if (i == 1)
1527 {
1528 p = r;
1529 }
1530 else
1531 {
1532 beta = (rho_1/rho_2) * (alpha/omega);
1533 add(p, -omega, v, p); // p = p - omega * v
1534 add(r, beta, p, p); // p = r + beta * p
1535 }
1536 if (prec)
1537 {
1538 prec->Mult(p, phat); // phat = M^{-1} * p
1539 }
1540 else
1541 {
1542 phat = p;
1543 }
1544 oper->Mult(phat, v); // v = A * phat
1545 alpha = rho_1 / Dot(rtilde, v);
1546 add(r, -alpha, v, s); // s = r - alpha * v
1547 resid = Norm(s);
1548 MFEM_VERIFY(IsFinite(resid), "resid = " << resid);
1549 if (Monitor(i, resid, r, x) || resid < tol_goal)
1550 {
1551 x.Add(alpha, phat); // x = x + alpha * phat
1553 {
1554 mfem::out << " Iteration : " << setw(3) << i
1555 << " ||s|| = " << resid << '\n';
1556 }
1557 final_norm = resid;
1558 final_iter = i;
1559 converged = true;
1561 {
1562 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1563 }
1564
1565 Monitor(i, resid, r, x, true);
1566 return;
1567 }
1569 {
1570 mfem::out << " Iteration : " << setw(3) << i
1571 << " ||s|| = " << resid;
1572 }
1573 if (prec)
1574 {
1575 prec->Mult(s, shat); // shat = M^{-1} * s
1576 }
1577 else
1578 {
1579 shat = s;
1580 }
1581 oper->Mult(shat, t); // t = A * shat
1582 omega = Dot(t, s) / Dot(t, t);
1583 x.Add(alpha, phat); // x += alpha * phat
1584 x.Add(omega, shat); // x += omega * shat
1585 add(s, -omega, t, r); // r = s - omega * t
1586
1587 rho_2 = rho_1;
1588 resid = Norm(r);
1589 MFEM_VERIFY(IsFinite(resid), "resid = " << resid);
1591 {
1592 mfem::out << " ||r|| = " << resid << '\n';
1593 }
1594 if (Monitor(i, resid, r, x) || resid < tol_goal)
1595 {
1596 final_norm = resid;
1597 final_iter = i;
1598 converged = true;
1600 {
1601 mfem::out << " Iteration : " << setw(3) << i
1602 << " ||r|| = " << resid << '\n';
1603 }
1605 {
1606 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1607 }
1608
1609 Monitor(i, resid, r, x, true);
1610 return;
1611 }
1612 if (omega == 0)
1613 {
1614 final_norm = resid;
1615 final_iter = i;
1616 converged = false;
1618 {
1619 mfem::out << " Iteration : " << setw(3) << i
1620 << " ||r|| = " << resid << '\n';
1621 }
1623 {
1624 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1625 }
1627 {
1628 mfem::out << "BiCGStab: No convergence!\n";
1629 }
1630
1631 Monitor(i, resid, r, x, true);
1632 return;
1633 }
1634 }
1635
1636 final_norm = resid;
1638 converged = false;
1639
1641 {
1642 mfem::out << " Iteration : " << setw(3) << final_iter
1643 << " ||r|| = " << resid << '\n';
1644 }
1646 {
1647 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1648 }
1650 {
1651 mfem::out << "BiCGStab: No convergence!\n";
1652 }
1653
1654 Monitor(final_iter, final_norm, r, x, true);
1655}
1656
1657int BiCGSTAB(const Operator &A, Vector &x, const Vector &b, Solver &M,
1658 int &max_iter, real_t &tol, real_t atol, int printit)
1659{
1660 BiCGSTABSolver bicgstab;
1661 bicgstab.SetPrintLevel(printit);
1662 bicgstab.SetMaxIter(max_iter);
1663 bicgstab.SetRelTol(sqrt(tol));
1664 bicgstab.SetAbsTol(sqrt(atol));
1665 bicgstab.SetOperator(A);
1666 bicgstab.SetPreconditioner(M);
1667 bicgstab.Mult(b, x);
1668 max_iter = bicgstab.GetNumIterations();
1669 tol = bicgstab.GetFinalNorm()*bicgstab.GetFinalNorm();
1670 return bicgstab.GetConverged();
1671}
1672
1673void BiCGSTAB(const Operator &A, Solver &B, const Vector &b, Vector &x,
1674 int print_iter, int max_num_iter, real_t rtol, real_t atol)
1675{
1676 BiCGSTAB(A, x, b, B, max_num_iter, rtol, atol, print_iter);
1677}
1678
1679
1681{
1683 v0.SetSize(width);
1684 v0.UseDevice(true);
1685
1686 v1.SetSize(width);
1687 v1.UseDevice(true);
1688
1689 w0.SetSize(width);
1690 w0.UseDevice(true);
1691
1692 w1.SetSize(width);
1693 w1.UseDevice(true);
1694
1695 q.SetSize(width);
1696 q.UseDevice(true);
1697
1698 if (prec)
1699 {
1700 u1.SetSize(width);
1701 u1.UseDevice(true);
1702 }
1703}
1704
1705void MINRESSolver::Mult(const Vector &b, Vector &x) const
1706{
1707 // Based on the MINRES algorithm on p. 86, Fig. 6.9 in
1708 // "Iterative Krylov Methods for Large Linear Systems",
1709 // by Henk A. van der Vorst, 2003.
1710 // Extended to support an SPD preconditioner.
1711
1712 b.UseDevice(true);
1713 x.UseDevice(true);
1714
1715 int it;
1716 real_t beta, eta, gamma0, gamma1, sigma0, sigma1;
1717 real_t alpha, delta, rho1, rho2, rho3, norm_goal;
1718 Vector *z = (prec) ? &u1 : &v1;
1719
1720 converged = true;
1721
1722 if (!iterative_mode)
1723 {
1724 v1 = b;
1725 x = 0.;
1726 }
1727 else
1728 {
1729 oper->Mult(x, v1);
1730 subtract(b, v1, v1);
1731 }
1732
1733 if (prec)
1734 {
1735 prec->Mult(v1, u1);
1736 }
1737 eta = beta = initial_norm = sqrt(Dot(*z, v1));
1738 MFEM_VERIFY(IsFinite(eta), "eta = " << eta);
1739 gamma0 = gamma1 = 1.;
1740 sigma0 = sigma1 = 0.;
1741
1742 norm_goal = std::max(rel_tol*eta, abs_tol);
1743
1744 if (Monitor(0, eta, *z, x) || eta <= norm_goal)
1745 {
1746 it = 0;
1747 goto loop_end;
1748 }
1749
1751 {
1752 mfem::out << "MINRES: iteration " << setw(3) << 0 << ": ||r||_B = "
1753 << eta << (print_options.first_and_last ? " ..." : "") << '\n';
1754 }
1755
1756 for (it = 1; it <= max_iter; it++)
1757 {
1758 v1 /= beta;
1759 if (prec)
1760 {
1761 u1 /= beta;
1762 }
1763 oper->Mult(*z, q);
1764 alpha = Dot(*z, q);
1765 MFEM_VERIFY(IsFinite(alpha), "alpha = " << alpha);
1766 if (it > 1) // (v0 == 0) for (it == 1)
1767 {
1768 q.Add(-beta, v0);
1769 }
1770 add(q, -alpha, v1, v0);
1771
1772 delta = gamma1*alpha - gamma0*sigma1*beta;
1773 rho3 = sigma0*beta;
1774 rho2 = sigma1*alpha + gamma0*gamma1*beta;
1775 if (!prec)
1776 {
1777 beta = Norm(v0);
1778 }
1779 else
1780 {
1781 prec->Mult(v0, q);
1782 beta = sqrt(Dot(v0, q));
1783 }
1784 MFEM_VERIFY(IsFinite(beta), "beta = " << beta);
1785 rho1 = std::hypot(delta, beta);
1786
1787 if (it == 1)
1788 {
1789 w0.Set(1./rho1, *z); // (w0 == 0) and (w1 == 0)
1790 }
1791 else if (it == 2)
1792 {
1793 add(1./rho1, *z, -rho2/rho1, w1, w0); // (w0 == 0)
1794 }
1795 else
1796 {
1797 add(-rho3/rho1, w0, -rho2/rho1, w1, w0);
1798 w0.Add(1./rho1, *z);
1799 }
1800
1801 gamma0 = gamma1;
1802 gamma1 = delta/rho1;
1803
1804 x.Add(gamma1*eta, w0);
1805
1806 sigma0 = sigma1;
1807 sigma1 = beta/rho1;
1808
1809 eta = -sigma1*eta;
1810 MFEM_VERIFY(IsFinite(eta), "eta = " << eta);
1811
1812 if (fabs(eta) <= norm_goal)
1813 {
1814 goto loop_end;
1815 }
1816
1818 {
1819 mfem::out << "MINRES: iteration " << setw(3) << it << ": ||r||_B = "
1820 << fabs(eta) << '\n';
1821 }
1822 if (Monitor(it, fabs(eta), *z, x)) { goto loop_end; }
1823
1824 if (prec)
1825 {
1826 Swap(u1, q);
1827 }
1828 Swap(v0, v1);
1829 Swap(w0, w1);
1830 }
1831 converged = false;
1832 it--;
1833
1834loop_end:
1835 final_iter = it;
1836 final_norm = fabs(eta);
1837
1839 {
1840 mfem::out << "MINRES: iteration " << setw(3) << it << ": ||r||_B = "
1841 << fabs(eta) << '\n';
1842 }
1843
1845 {
1846 mfem::out << "MINRES: Number of iterations: " << setw(3) << final_iter << '\n';
1847 }
1848
1849 converged = Monitor(final_iter, final_norm, *z, x, true) || converged;
1850
1851 // if (print_options.iteration_details || (!converged && print_options.errors))
1852 // {
1853 // oper->Mult(x, v1);
1854 // subtract(b, v1, v1);
1855 // if (prec)
1856 // {
1857 // prec->Mult(v1, u1);
1858 // }
1859 // eta = sqrt(Dot(*z, v1));
1860 // mfem::out << "MINRES: iteration " << setw(3) << it << '\n'
1861 // << " ||r||_B = " << eta << " (re-computed)" << '\n';
1862 // }
1863
1865 {
1866 mfem::out << "MINRES: No convergence!\n";
1867 }
1868}
1869
1870void MINRES(const Operator &A, const Vector &b, Vector &x, int print_it,
1871 int max_it, real_t rtol, real_t atol)
1872{
1873 MFEM_PERF_FUNCTION;
1874
1875 MINRESSolver minres;
1876 minres.SetPrintLevel(print_it);
1877 minres.SetMaxIter(max_it);
1878 minres.SetRelTol(sqrt(rtol));
1879 minres.SetAbsTol(sqrt(atol));
1880 minres.SetOperator(A);
1881 minres.Mult(b, x);
1882}
1883
1884void MINRES(const Operator &A, Solver &B, const Vector &b, Vector &x,
1885 int print_it, int max_it, real_t rtol, real_t atol)
1886{
1887 MINRESSolver minres;
1888 minres.SetPrintLevel(print_it);
1889 minres.SetMaxIter(max_it);
1890 minres.SetRelTol(sqrt(rtol));
1891 minres.SetAbsTol(sqrt(atol));
1892 minres.SetOperator(A);
1893 minres.SetPreconditioner(B);
1894 minres.Mult(b, x);
1895}
1896
1897
1899{
1900 oper = &op;
1901 height = op.Height();
1902 width = op.Width();
1903 MFEM_VERIFY(height == width, "square Operator is required.");
1904
1905 r.SetSize(width);
1906 r.UseDevice(true);
1907
1908 c.SetSize(width);
1909 c.UseDevice(true);
1910}
1911
1912void NewtonSolver::Mult(const Vector &b, Vector &x) const
1913{
1914 MFEM_VERIFY(oper != NULL, "the Operator is not set (use SetOperator).");
1915 MFEM_VERIFY(prec != NULL, "the Solver is not set (use SetSolver).");
1916
1917 int it;
1918 real_t norm0, norm, norm_goal;
1919 const bool have_b = (b.Size() == Height());
1920
1921 if (!iterative_mode)
1922 {
1923 x = 0.0;
1924 }
1925
1926 ProcessNewState(x);
1927
1928 oper->Mult(x, r);
1929 if (have_b)
1930 {
1931 r -= b;
1932 }
1933
1934 norm0 = norm = initial_norm = Norm(r);
1936 {
1937 mfem::out << "Newton iteration " << setw(2) << 0
1938 << " : ||r|| = " << norm << "...\n";
1939 }
1940 norm_goal = std::max(rel_tol*norm, abs_tol);
1941
1942 prec->iterative_mode = false;
1943
1944 // x_{i+1} = x_i - [DF(x_i)]^{-1} [F(x_i)-b]
1945 for (it = 0; true; it++)
1946 {
1947 MFEM_VERIFY(IsFinite(norm), "norm = " << norm);
1949 {
1950 mfem::out << "Newton iteration " << setw(2) << it
1951 << " : ||r|| = " << norm;
1952 if (it > 0)
1953 {
1954 mfem::out << ", ||r||/||r_0|| = " << norm/norm0;
1955 }
1956 mfem::out << '\n';
1957 }
1958
1959 if (Monitor(it, norm, r, x) || norm <= norm_goal)
1960 {
1961 converged = true;
1962 break;
1963 }
1964
1965 if (it >= max_iter)
1966 {
1967 converged = false;
1968 break;
1969 }
1970
1971 grad = &oper->GetGradient(x);
1973
1974 if (lin_rtol_type)
1975 {
1976 AdaptiveLinRtolPreSolve(x, it, norm);
1977 }
1978
1979 prec->Mult(r, c); // c = [DF(x_i)]^{-1} [F(x_i)-b]
1980
1981 if (lin_rtol_type)
1982 {
1983 AdaptiveLinRtolPostSolve(c, r, it, norm);
1984 }
1985
1986 const real_t c_scale = ComputeScalingFactor(x, b);
1987 if (c_scale == 0.0)
1988 {
1989 converged = false;
1990 break;
1991 }
1992 add(x, -c_scale, c, x);
1993
1994 ProcessNewState(x);
1995
1996 oper->Mult(x, r);
1997 if (have_b)
1998 {
1999 r -= b;
2000 }
2001 norm = Norm(r);
2002 }
2003
2004 final_iter = it;
2005 final_norm = norm;
2006
2009 {
2010 mfem::out << "Newton: Number of iterations: " << final_iter << '\n'
2011 << " ||r|| = " << final_norm
2012 << ", ||r||/||r_0|| = " << final_norm/norm0 << '\n';
2013 }
2015 {
2016 mfem::out << "Newton: No convergence!\n";
2017 }
2018
2019 Monitor(final_iter, final_norm, r, x, true);
2020}
2021
2023 const real_t rtol0,
2024 const real_t rtol_max,
2025 const real_t alpha_,
2026 const real_t gamma_)
2027{
2028 lin_rtol_type = type;
2029 lin_rtol0 = rtol0;
2030 lin_rtol_max = rtol_max;
2031 this->alpha = alpha_;
2032 this->gamma = gamma_;
2033}
2034
2036 const int it,
2037 const real_t fnorm) const
2038{
2039 // Assume that when adaptive linear solver relative tolerance is activated,
2040 // we are working with an iterative solver.
2041 auto iterative_solver = static_cast<IterativeSolver *>(prec);
2042 // Adaptive linear solver relative tolerance
2043 real_t eta;
2044 // Safeguard threshold
2045 real_t sg_threshold = 0.1;
2046
2047 if (it == 0)
2048 {
2049 eta = lin_rtol0;
2050 }
2051 else
2052 {
2053 if (lin_rtol_type == 1)
2054 {
2055 // eta = gamma * abs(||F(x1)|| - ||F(x0) + DF(x0) s0||) / ||F(x0)||
2056 eta = gamma * abs(fnorm - lnorm_last) / fnorm_last;
2057 }
2058 else if (lin_rtol_type == 2)
2059 {
2060 // eta = gamma * (||F(x1)|| / ||F(x0)||)^alpha
2061 eta = gamma * pow(fnorm / fnorm_last, alpha);
2062 }
2063 else
2064 {
2065 MFEM_ABORT("Unknown adaptive linear solver rtol version");
2066 }
2067
2068 // Safeguard rtol from "oversolving" ?!
2069 const real_t sg_eta = gamma * pow(eta_last, alpha);
2070 if (sg_eta > sg_threshold) { eta = std::max(eta, sg_eta); }
2071 }
2072
2073 eta = std::min(eta, lin_rtol_max);
2074 iterative_solver->SetRelTol(eta);
2075 eta_last = eta;
2077 {
2078 mfem::out << "Eisenstat-Walker rtol = " << eta << "\n";
2079 }
2080}
2081
2083 const Vector &b,
2084 const int it,
2085 const real_t fnorm) const
2086{
2087 fnorm_last = fnorm;
2088
2089 // If version 1 is chosen, the true linear residual norm has to be computed
2090 // and in most cases we can only retrieve the preconditioned linear residual
2091 // norm.
2092 if (lin_rtol_type == 1)
2093 {
2094 // lnorm_last = ||F(x0) + DF(x0) s0||
2095 Vector linres(x.Size());
2096 linres.UseDevice(true);
2097 grad->Mult(x, linres);
2098 linres -= b;
2099 lnorm_last = Norm(linres);
2100 }
2101}
2102
2103void LBFGSSolver::Mult(const Vector &b, Vector &x) const
2104{
2105 MFEM_VERIFY(oper != NULL, "the Operator is not set (use SetOperator).");
2106
2107 b.UseDevice(true);
2108 x.UseDevice(true);
2109
2110 // Quadrature points that are checked for negative Jacobians etc.
2111 Vector sk, rk, yk, rho, alpha;
2112
2113 // r - r_{k+1}, c - descent direction
2114 sk.SetSize(width); // x_{k+1}-x_k
2115 sk.UseDevice(true);
2116
2117 rk.SetSize(width); // nabla(f(x_{k}))
2118 rk.UseDevice(true);
2119
2120 yk.SetSize(width); // r_{k+1}-r_{k}
2121 yk.UseDevice(true);
2122
2123 rho.SetSize(m); // 1/(dot(yk,sk)
2124 rho.UseDevice(true);
2125
2126 alpha.SetSize(m); // rhok*sk'*c
2127 alpha.UseDevice(true);
2128
2129 int last_saved_id = -1;
2130
2131 int it;
2132 real_t norm0, norm, norm_goal;
2133 const bool have_b = (b.Size() == Height());
2134
2135 if (!iterative_mode)
2136 {
2137 x = 0.0;
2138 }
2139
2140 ProcessNewState(x);
2141
2142 // r = F(x)-b
2143 oper->Mult(x, r);
2144 if (have_b) { r -= b; }
2145
2146 c = r; // initial descent direction
2147
2148 norm0 = norm = initial_norm = Norm(r);
2150 {
2151 mfem::out << "LBFGS iteration " << setw(2) << 0
2152 << " : ||r|| = " << norm << "...\n";
2153 }
2154 norm_goal = std::max(rel_tol*norm, abs_tol);
2155 for (it = 0; true; it++)
2156 {
2157 MFEM_VERIFY(IsFinite(norm), "norm = " << norm);
2159 {
2160 mfem::out << "LBFGS iteration " << it
2161 << " : ||r|| = " << norm;
2162 if (it > 0)
2163 {
2164 mfem::out << ", ||r||/||r_0|| = " << norm/norm0;
2165 }
2166 mfem::out << '\n';
2167 }
2168
2169 if (norm <= norm_goal)
2170 {
2171 converged = true;
2172 break;
2173 }
2174
2175 if (it >= max_iter)
2176 {
2177 converged = false;
2178 break;
2179 }
2180
2181 rk = r;
2182 const real_t c_scale = ComputeScalingFactor(x, b);
2183 if (c_scale == 0.0)
2184 {
2185 converged = false;
2186 break;
2187 }
2188 add(x, -c_scale, c, x); // x_{k+1} = x_k - c_scale*c
2189
2190 ProcessNewState(x);
2191
2192 oper->Mult(x, r);
2193 if (have_b)
2194 {
2195 r -= b;
2196 }
2197
2198 // LBFGS - construct descent direction
2199 subtract(r, rk, yk); // yk = r_{k+1} - r_{k}
2200 sk = c; sk *= -c_scale; //sk = x_{k+1} - x_{k} = -c_scale*c
2201 const real_t gamma = Dot(sk, yk)/Dot(yk, yk);
2202
2203 // Save last m vectors
2204 last_saved_id = (last_saved_id == m-1) ? 0 : last_saved_id+1;
2205 *skArray[last_saved_id] = sk;
2206 *ykArray[last_saved_id] = yk;
2207
2208 c = r;
2209 for (int i = last_saved_id; i > -1; i--)
2210 {
2211 rho(i) = 1.0/Dot((*skArray[i]),(*ykArray[i]));
2212 alpha(i) = rho(i)*Dot((*skArray[i]),c);
2213 add(c, -alpha(i), (*ykArray[i]), c);
2214 }
2215 if (it > m-1)
2216 {
2217 for (int i = m-1; i > last_saved_id; i--)
2218 {
2219 rho(i) = 1./Dot((*skArray[i]), (*ykArray[i]));
2220 alpha(i) = rho(i)*Dot((*skArray[i]),c);
2221 add(c, -alpha(i), (*ykArray[i]), c);
2222 }
2223 }
2224
2225 c *= gamma; // scale search direction
2226 if (it > m-1)
2227 {
2228 for (int i = last_saved_id+1; i < m ; i++)
2229 {
2230 real_t betai = rho(i)*Dot((*ykArray[i]), c);
2231 add(c, alpha(i)-betai, (*skArray[i]), c);
2232 }
2233 }
2234 for (int i = 0; i < last_saved_id+1 ; i++)
2235 {
2236 real_t betai = rho(i)*Dot((*ykArray[i]), c);
2237 add(c, alpha(i)-betai, (*skArray[i]), c);
2238 }
2239
2240 norm = Norm(r);
2241 }
2242
2243 final_iter = it;
2244 final_norm = norm;
2245
2248 {
2249 mfem::out << "LBFGS: Number of iterations: " << final_iter << '\n'
2250 << " ||r|| = " << final_norm
2251 << ", ||r||/||r_0|| = " << final_norm/norm0 << '\n';
2252 }
2254 {
2255 mfem::out << "LBFGS: No convergence!\n";
2256 }
2257}
2258
2259int aGMRES(const Operator &A, Vector &x, const Vector &b,
2260 const Operator &M, int &max_iter,
2261 int m_max, int m_min, int m_step, real_t cf,
2262 real_t &tol, real_t &atol, int printit)
2263{
2264 int n = A.Width();
2265
2266 int m = m_max;
2267
2268 DenseMatrix H(m+1,m);
2269 Vector s(m+1), cs(m+1), sn(m+1);
2270 Vector w(n), av(n);
2271
2272 b.UseDevice(true);
2273 x.UseDevice(true);
2274 w.UseDevice(true);
2275 av.UseDevice(true);
2276
2277 real_t r1, resid;
2278 int i, j, k;
2279
2280 M.Mult(b,w);
2281 real_t normb = w.Norml2(); // normb = ||M b||
2282 if (normb == 0.0)
2283 {
2284 normb = 1;
2285 }
2286
2287 Vector r(n);
2288 r.UseDevice(true);
2289 A.Mult(x, r);
2290 subtract(b,r,w);
2291 M.Mult(w, r); // r = M (b - A x)
2292 real_t beta = r.Norml2(); // beta = ||r||
2293
2294 resid = beta / normb;
2295
2296 if (resid * resid <= tol)
2297 {
2298 tol = resid * resid;
2299 max_iter = 0;
2300 return 0;
2301 }
2302
2303 if (printit)
2304 {
2305 mfem::out << " Pass : " << setw(2) << 1
2306 << " Iteration : " << setw(3) << 0
2307 << " (r, r) = " << beta*beta << '\n';
2308 }
2309
2310 tol *= (normb*normb);
2311 tol = (atol > tol) ? atol : tol;
2312
2313 m = m_max;
2314 Array<Vector *> v(m+1);
2315 for (i= 0; i<=m; i++)
2316 {
2317 v[i] = new Vector(n);
2318 v[i]->UseDevice(true);
2319 (*v[i]) = 0.0;
2320 }
2321
2322 j = 1;
2323 while (j <= max_iter)
2324 {
2325 (*v[0]) = 0.0;
2326 v[0] -> Add (1.0/beta, r); // v[0] = r / ||r||
2327 s = 0.0; s(0) = beta;
2328
2329 r1 = beta;
2330
2331 for (i = 0; i < m && j <= max_iter; i++)
2332 {
2333 A.Mult((*v[i]),av);
2334 M.Mult(av,w); // w = M A v[i]
2335
2336 for (k = 0; k <= i; k++)
2337 {
2338 H(k,i) = w * (*v[k]); // H(k,i) = w * v[k]
2339 w.Add(-H(k,i), (*v[k])); // w -= H(k,i) * v[k]
2340 }
2341
2342 H(i+1,i) = w.Norml2(); // H(i+1,i) = ||w||
2343 (*v[i+1]) = 0.0;
2344 v[i+1] -> Add (1.0/H(i+1,i), w); // v[i+1] = w / H(i+1,i)
2345
2346 for (k = 0; k < i; k++)
2347 {
2348 ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
2349 }
2350
2351 GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
2352 ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
2353 ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
2354
2355 resid = fabs(s(i+1));
2356 if (printit)
2357 {
2358 mfem::out << " Pass : " << setw(2) << j
2359 << " Iteration : " << setw(3) << i+1
2360 << " (r, r) = " << resid*resid << '\n';
2361 }
2362
2363 if ( resid*resid < tol)
2364 {
2365 Update(x, i, H, s, v);
2366 tol = resid * resid;
2367 max_iter = j;
2368 for (i= 0; i<=m; i++)
2369 {
2370 delete v[i];
2371 }
2372 return 0;
2373 }
2374 }
2375
2376 if (printit)
2377 {
2378 mfem::out << "Restarting..." << '\n';
2379 }
2380
2381 Update(x, i-1, H, s, v);
2382
2383 A.Mult(x, r);
2384 subtract(b,r,w);
2385 M.Mult(w, r); // r = M (b - A x)
2386 beta = r.Norml2(); // beta = ||r||
2387 if ( resid*resid < tol)
2388 {
2389 tol = resid * resid;
2390 max_iter = j;
2391 for (i= 0; i<=m; i++)
2392 {
2393 delete v[i];
2394 }
2395 return 0;
2396 }
2397
2398 if (beta/r1 > cf)
2399 {
2400 if (m - m_step >= m_min)
2401 {
2402 m -= m_step;
2403 }
2404 else
2405 {
2406 m = m_max;
2407 }
2408 }
2409
2410 j++;
2411 }
2412
2413 tol = resid * resid;
2414 for (i= 0; i<=m; i++)
2415 {
2416 delete v[i];
2417 }
2418 return 1;
2419}
2420
2422 const Operator *C_,
2423 const Operator *D_)
2424 : C(C_), D(D_), c_e(NULL), d_lo(NULL), d_hi(NULL), x_lo(NULL), x_hi(NULL),
2425 input_size(insize)
2426{
2427 if (C) { MFEM_VERIFY(C->Width() == input_size, "Wrong width of C."); }
2428 if (D) { MFEM_VERIFY(D->Width() == input_size, "Wrong width of D."); }
2429}
2430
2432{
2433 MFEM_VERIFY(C, "The C operator is unspecified -- can't set constraints.");
2434 MFEM_VERIFY(c.Size() == C->Height(), "Wrong size of the constraint.");
2435
2436 c_e = &c;
2437}
2438
2440 const Vector &dh)
2441{
2442 MFEM_VERIFY(D, "The D operator is unspecified -- can't set constraints.");
2443 MFEM_VERIFY(dl.Size() == D->Height() && dh.Size() == D->Height(),
2444 "Wrong size of the constraint.");
2445
2446 d_lo = &dl; d_hi = &dh;
2447}
2448
2450{
2451 MFEM_VERIFY(xl.Size() == input_size && xh.Size() == input_size,
2452 "Wrong size of the constraint.");
2453
2454 x_lo = &xl; x_hi = &xh;
2455}
2456
2458{
2459 int m = 0;
2460 if (C) { m += C->Height(); }
2461 if (D) { m += D->Height(); }
2462 return m;
2463}
2464
2466{
2468 {
2469 MFEM_WARNING("Objective functional is ignored as SLBQP always minimizes"
2470 "the l2 norm of (x - x_target).");
2471 }
2472 MFEM_VERIFY(prob.GetC(), "Linear constraint is not set.");
2473 MFEM_VERIFY(prob.GetC()->Height() == 1, "Solver expects scalar constraint.");
2474
2475 problem = &prob;
2476}
2477
2478void SLBQPOptimizer::SetBounds(const Vector &lo_, const Vector &hi_)
2479{
2480 lo.SetDataAndSize(lo_.GetData(), lo_.Size());
2481 hi.SetDataAndSize(hi_.GetData(), hi_.Size());
2482}
2483
2485{
2486 w.SetDataAndSize(w_.GetData(), w_.Size());
2487 a = a_;
2488}
2489
2490inline void SLBQPOptimizer::print_iteration(int it, real_t r, real_t l) const
2491{
2493 {
2494 mfem::out << "SLBQP iteration " << it << ": residual = " << r
2495 << ", lambda = " << l << '\n';
2496 }
2497}
2498
2499void SLBQPOptimizer::Mult(const Vector& xt, Vector& x) const
2500{
2501 // Based on code provided by Denis Ridzal, dridzal@sandia.gov.
2502 // Algorithm adapted from Dai and Fletcher, "New Algorithms for
2503 // Singly Linearly Constrained Quadratic Programs Subject to Lower
2504 // and Upper Bounds", Numerical Analysis Report NA/216, 2003.
2505
2506 // Set some algorithm-specific constants and temporaries.
2507 int nclip = 0;
2508 real_t l = 0;
2509 real_t llow = 0;
2510 real_t lupp = 0;
2511 real_t lnew = 0;
2512 real_t dl = 2;
2513 real_t r = 0;
2514 real_t rlow = 0;
2515 real_t rupp = 0;
2516 real_t s = 0;
2517
2518 const real_t smin = 0.1;
2519
2520 const real_t tol = max(abs_tol, rel_tol*a);
2521
2522 // *** Start bracketing phase of SLBQP ***
2524 {
2525 mfem::out << "SLBQP bracketing phase" << '\n';
2526 }
2527
2528 // Solve QP with fixed Lagrange multiplier
2529 r = initial_norm = solve(l,xt,x,nclip);
2530 print_iteration(nclip, r, l);
2531
2532
2533 // If x=xt was already within bounds and satisfies the linear
2534 // constraint, then we already have the solution.
2535 if (fabs(r) <= tol)
2536 {
2537 converged = true;
2538 goto slbqp_done;
2539 }
2540
2541 if (r < 0)
2542 {
2543 llow = l; rlow = r; l = l + dl;
2544
2545 // Solve QP with fixed Lagrange multiplier
2546 r = solve(l,xt,x,nclip);
2547 print_iteration(nclip, r, l);
2548
2549 while ((r < 0) && (nclip < max_iter))
2550 {
2551 llow = l;
2552 s = rlow/r - 1.0;
2553 if (s < smin) { s = smin; }
2554 dl = dl + dl/s;
2555 l = l + dl;
2556
2557 // Solve QP with fixed Lagrange multiplier
2558 r = solve(l,xt,x,nclip);
2559 print_iteration(nclip, r, l);
2560 }
2561
2562 lupp = l; rupp = r;
2563 }
2564 else
2565 {
2566 lupp = l; rupp = r; l = l - dl;
2567
2568 // Solve QP with fixed Lagrange multiplier
2569 r = solve(l,xt,x,nclip);
2570 print_iteration(nclip, r, l);
2571
2572 while ((r > 0) && (nclip < max_iter))
2573 {
2574 lupp = l;
2575 s = rupp/r - 1.0;
2576 if (s < smin) { s = smin; }
2577 dl = dl + dl/s;
2578 l = l - dl;
2579
2580 // Solve QP with fixed Lagrange multiplier
2581 r = solve(l,xt,x,nclip);
2582 print_iteration(nclip, r, l);
2583 }
2584
2585 llow = l; rlow = r;
2586 }
2587
2588 // *** Stop bracketing phase of SLBQP ***
2589
2590
2591 // *** Start secant phase of SLBQP ***
2593 {
2594 mfem::out << "SLBQP secant phase" << '\n';
2595 }
2596
2597 s = 1.0 - rlow/rupp; dl = dl/s; l = lupp - dl;
2598
2599 // Solve QP with fixed Lagrange multiplier
2600 r = solve(l,xt,x,nclip);
2601 print_iteration(nclip, r, l);
2602
2603 while ( (fabs(r) > tol) && (nclip < max_iter) )
2604 {
2605 if (r > 0)
2606 {
2607 if (s <= 2.0)
2608 {
2609 lupp = l; rupp = r; s = 1.0 - rlow/rupp;
2610 dl = (lupp - llow)/s; l = lupp - dl;
2611 }
2612 else
2613 {
2614 s = rupp/r - 1.0;
2615 if (s < smin) { s = smin; }
2616 dl = (lupp - l)/s;
2617 lnew = 0.75*llow + 0.25*l;
2618 if (lnew < l-dl) { lnew = l-dl; }
2619 lupp = l; rupp = r; l = lnew;
2620 s = (lupp - llow)/(lupp - l);
2621 }
2622
2623 }
2624 else
2625 {
2626 if (s >= 2.0)
2627 {
2628 llow = l; rlow = r; s = 1.0 - rlow/rupp;
2629 dl = (lupp - llow)/s; l = lupp - dl;
2630 }
2631 else
2632 {
2633 s = rlow/r - 1.0;
2634 if (s < smin) { s = smin; }
2635 dl = (l - llow)/s;
2636 lnew = 0.75*lupp + 0.25*l;
2637 if (lnew < l+dl) { lnew = l+dl; }
2638 llow = l; rlow = r; l = lnew;
2639 s = (lupp - llow)/(lupp - l);
2640 }
2641 }
2642
2643 // Solve QP with fixed Lagrange multiplier
2644 r = solve(l,xt,x,nclip);
2645 print_iteration(nclip, r, l);
2646 }
2647
2648 // *** Stop secant phase of SLBQP ***
2649 converged = (fabs(r) <= tol);
2650
2651slbqp_done:
2652
2653 final_iter = nclip;
2654 final_norm = r;
2655
2658 {
2659 mfem::out << "SLBQP: Number of iterations: " << final_iter << '\n'
2660 << " lambda = " << l << '\n'
2661 << " ||r|| = " << final_norm << '\n';
2662 }
2664 {
2665 mfem::out << "SLBQP: No convergence!" << '\n';
2666 }
2667}
2668
2669struct WeightMinHeap
2670{
2671 const std::vector<real_t> &w;
2672 std::vector<size_t> c;
2673 std::vector<int> loc;
2674
2675 WeightMinHeap(const std::vector<real_t> &w_) : w(w_)
2676 {
2677 c.reserve(w.size());
2678 loc.resize(w.size());
2679 for (size_t i=0; i<w.size(); ++i) { push(i); }
2680 }
2681
2682 size_t percolate_up(size_t pos, real_t val)
2683 {
2684 for (; pos > 0 && w[c[(pos-1)/2]] > val; pos = (pos-1)/2)
2685 {
2686 c[pos] = c[(pos-1)/2];
2687 loc[c[(pos-1)/2]] = static_cast<int>(pos);
2688 }
2689 return pos;
2690 }
2691
2692 size_t percolate_down(size_t pos, real_t val)
2693 {
2694 while (2*pos+1 < c.size())
2695 {
2696 size_t left = 2*pos+1;
2697 size_t right = left+1;
2698 size_t tgt;
2699 if (right < c.size() && w[c[right]] < w[c[left]]) { tgt = right; }
2700 else { tgt = left; }
2701 if (w[c[tgt]] < val)
2702 {
2703 c[pos] = c[tgt];
2704 loc[c[tgt]] = static_cast<int>(pos);
2705 pos = tgt;
2706 }
2707 else
2708 {
2709 break;
2710 }
2711 }
2712 return pos;
2713 }
2714
2715 void push(size_t i)
2716 {
2717 real_t val = w[i];
2718 c.push_back(0);
2719 size_t pos = c.size()-1;
2720 pos = percolate_up(pos, val);
2721 c[pos] = i;
2722 loc[i] = static_cast<int>(pos);
2723 }
2724
2725 int pop()
2726 {
2727 size_t i = c[0];
2728 size_t j = c.back();
2729 c.pop_back();
2730 // Mark as removed
2731 loc[i] = -1;
2732 if (c.empty()) { return static_cast<int>(i); }
2733 real_t val = w[j];
2734 size_t pos = 0;
2735 pos = percolate_down(pos, val);
2736 c[pos] = j;
2737 loc[j] = static_cast<int>(pos);
2738 return static_cast<int>(i);
2739 }
2740
2741 void update(size_t i)
2742 {
2743 size_t pos = loc[i];
2744 real_t val = w[i];
2745 pos = percolate_up(pos, val);
2746 pos = percolate_down(pos, val);
2747 c[pos] = i;
2748 loc[i] = static_cast<int>(pos);
2749 }
2750
2751 bool picked(size_t i)
2752 {
2753 return loc[i] < 0;
2754 }
2755};
2756
2758{
2759 int n = C.Width();
2760 // Scale rows by reciprocal of diagonal and take absolute value
2761 Vector D;
2762 C.GetDiag(D);
2763 D.HostRead();
2764
2765 const int *I = C.HostReadI(), *J = C.HostReadJ();
2766 real_t *V = C.HostReadWriteData();
2767 for (int i=0; i<n; ++i)
2768 {
2769 for (int j=I[i]; j<I[i+1]; ++j)
2770 {
2771 V[j] = abs(V[j]/D[i]);
2772 }
2773 }
2774
2775 std::vector<real_t> w(n, 0.0);
2776 for (int k=0; k<n; ++k)
2777 {
2778 // Find all neighbors i of k
2779 for (int ii=I[k]; ii<I[k+1]; ++ii)
2780 {
2781 int i = J[ii];
2782 // Find value of (i,k)
2783 real_t C_ik = 0.0;
2784 for (int kk=I[i]; kk<I[i+1]; ++kk)
2785 {
2786 if (J[kk] == k)
2787 {
2788 C_ik = V[kk];
2789 break;
2790 }
2791 }
2792 for (int jj=I[k]; jj<I[k+1]; ++jj)
2793 {
2794 int j = J[jj];
2795 if (j == k) { continue; }
2796 real_t C_kj = V[jj];
2797 bool ij_exists = false;
2798 for (int jj2=I[i]; jj2<I[i+1]; ++jj2)
2799 {
2800 if (J[jj2] == j)
2801 {
2802 ij_exists = true;
2803 break;
2804 }
2805 }
2806 if (!ij_exists) { w[k] += pow(C_ik*C_kj,2); }
2807 }
2808 }
2809 w[k] = sqrt(w[k]);
2810 }
2811
2812 WeightMinHeap w_heap(w);
2813
2814 // Compute ordering
2815 p.SetSize(n);
2816 for (int ii=0; ii<n; ++ii)
2817 {
2818 int pi = w_heap.pop();
2819 p[ii] = pi;
2820 w[pi] = -1;
2821 for (int kk=I[pi]; kk<I[pi+1]; ++kk)
2822 {
2823 int k = J[kk];
2824 if (w_heap.picked(k)) { continue; }
2825 // Recompute weight
2826 w[k] = 0.0;
2827 // Find all neighbors i of k
2828 for (int ii2=I[k]; ii2<I[k+1]; ++ii2)
2829 {
2830 int i = J[ii2];
2831 if (w_heap.picked(i)) { continue; }
2832 // Find value of (i,k)
2833 real_t C_ik = 0.0;
2834 for (int kk2=I[i]; kk2<I[i+1]; ++kk2)
2835 {
2836 if (J[kk2] == k)
2837 {
2838 C_ik = V[kk2];
2839 break;
2840 }
2841 }
2842 for (int jj=I[k]; jj<I[k+1]; ++jj)
2843 {
2844 int j = J[jj];
2845 if (j == k || w_heap.picked(j)) { continue; }
2846 real_t C_kj = V[jj];
2847 bool ij_exists = false;
2848 for (int jj2=I[i]; jj2<I[i+1]; ++jj2)
2849 {
2850 if (J[jj2] == j)
2851 {
2852 ij_exists = true;
2853 break;
2854 }
2855 }
2856 if (!ij_exists) { w[k] += pow(C_ik*C_kj,2); }
2857 }
2858 }
2859 w[k] = sqrt(w[k]);
2860 w_heap.update(k);
2861 }
2862 }
2863}
2864
2865BlockILU::BlockILU(int block_size_,
2866 Reordering reordering_,
2867 int k_fill_)
2868 : Solver(0),
2869 block_size(block_size_),
2870 k_fill(k_fill_),
2871 reordering(reordering_)
2872{ }
2873
2875 int block_size_,
2876 Reordering reordering_,
2877 int k_fill_)
2878 : BlockILU(block_size_, reordering_, k_fill_)
2879{
2880 SetOperator(op);
2881}
2882
2884{
2885 const SparseMatrix *A = NULL;
2886#ifdef MFEM_USE_MPI
2887 const HypreParMatrix *A_par = dynamic_cast<const HypreParMatrix *>(&op);
2888 SparseMatrix A_par_diag;
2889 if (A_par != NULL)
2890 {
2891 A_par->GetDiag(A_par_diag);
2892 A = &A_par_diag;
2893 }
2894#endif
2895 if (A == NULL)
2896 {
2897 A = dynamic_cast<const SparseMatrix *>(&op);
2898 if (A == NULL)
2899 {
2900 MFEM_ABORT("BlockILU must be created with a SparseMatrix or HypreParMatrix");
2901 }
2902 }
2903 height = op.Height();
2904 width = op.Width();
2905 MFEM_VERIFY(A->Finalized(), "Matrix must be finalized.");
2906 CreateBlockPattern(*A);
2907 Factorize();
2908}
2909
2910void BlockILU::CreateBlockPattern(const SparseMatrix &A)
2911{
2912 MFEM_VERIFY(k_fill == 0, "Only block ILU(0) is currently supported.");
2913 if (A.Height() % block_size != 0)
2914 {
2915 MFEM_ABORT("BlockILU: block size must evenly divide the matrix size");
2916 }
2917
2918 int nrows = A.Height();
2919 const int *I = A.HostReadI();
2920 const int *J = A.HostReadJ();
2921 const real_t *V = A.HostReadData();
2922 int nnz = 0;
2923 int nblockrows = nrows / block_size;
2924
2925 std::vector<std::set<int>> unique_block_cols(nblockrows);
2926
2927 for (int iblock = 0; iblock < nblockrows; ++iblock)
2928 {
2929 for (int bi = 0; bi < block_size; ++bi)
2930 {
2931 int i = iblock * block_size + bi;
2932 for (int k = I[i]; k < I[i + 1]; ++k)
2933 {
2934 unique_block_cols[iblock].insert(J[k] / block_size);
2935 }
2936 }
2937 nnz += static_cast<int>(unique_block_cols[iblock].size());
2938 }
2939
2940 if (reordering != Reordering::NONE)
2941 {
2942 SparseMatrix C(nblockrows, nblockrows);
2943 for (int iblock = 0; iblock < nblockrows; ++iblock)
2944 {
2945 for (int jblock : unique_block_cols[iblock])
2946 {
2947 for (int bi = 0; bi < block_size; ++bi)
2948 {
2949 int i = iblock * block_size + bi;
2950 for (int k = I[i]; k < I[i + 1]; ++k)
2951 {
2952 int j = J[k];
2953 if (j >= jblock * block_size && j < (jblock + 1) * block_size)
2954 {
2955 C.Add(iblock, jblock, V[k]*V[k]);
2956 }
2957 }
2958 }
2959 }
2960 }
2961 C.Finalize(false);
2962 real_t *CV = C.GetData();
2963 for (int i=0; i<C.NumNonZeroElems(); ++i)
2964 {
2965 CV[i] = sqrt(CV[i]);
2966 }
2967
2968 switch (reordering)
2969 {
2972 break;
2973 default:
2974 MFEM_ABORT("BlockILU: unknown reordering")
2975 }
2976 }
2977 else
2978 {
2979 // No reordering: permutation is identity
2980 P.SetSize(nblockrows);
2981 for (int i=0; i<nblockrows; ++i)
2982 {
2983 P[i] = i;
2984 }
2985 }
2986
2987 // Compute inverse permutation
2988 Pinv.SetSize(nblockrows);
2989 for (int i=0; i<nblockrows; ++i)
2990 {
2991 Pinv[P[i]] = i;
2992 }
2993
2994 // Permute columns
2995 std::vector<std::vector<int>> unique_block_cols_perminv(nblockrows);
2996 for (int i=0; i<nblockrows; ++i)
2997 {
2998 std::vector<int> &cols = unique_block_cols_perminv[i];
2999 for (int j : unique_block_cols[P[i]])
3000 {
3001 cols.push_back(Pinv[j]);
3002 }
3003 std::sort(cols.begin(), cols.end());
3004 }
3005
3006 ID.SetSize(nblockrows);
3007 IB.SetSize(nblockrows + 1);
3008 IB[0] = 0;
3009 JB.SetSize(nnz);
3010 AB.SetSize(block_size, block_size, nnz);
3011 DB.SetSize(block_size, block_size, nblockrows);
3012 AB = 0.0;
3013 DB = 0.0;
3014 ipiv.SetSize(block_size*nblockrows);
3015 int counter = 0;
3016
3017 for (int iblock = 0; iblock < nblockrows; ++iblock)
3018 {
3019 int iblock_perm = P[iblock];
3020 for (int jblock : unique_block_cols_perminv[iblock])
3021 {
3022 int jblock_perm = P[jblock];
3023 if (iblock == jblock)
3024 {
3025 ID[iblock] = counter;
3026 }
3027 JB[counter] = jblock;
3028 for (int bi = 0; bi < block_size; ++bi)
3029 {
3030 int i = iblock_perm*block_size + bi;
3031 for (int k = I[i]; k < I[i + 1]; ++k)
3032 {
3033 int j = J[k];
3034 if (j >= jblock_perm*block_size && j < (jblock_perm + 1)*block_size)
3035 {
3036 int bj = j - jblock_perm*block_size;
3037 real_t val = V[k];
3038 AB(bi, bj, counter) = val;
3039 // Extract the diagonal
3040 if (iblock == jblock)
3041 {
3042 DB(bi, bj, iblock) = val;
3043 }
3044 }
3045 }
3046 }
3047 ++counter;
3048 }
3049 IB[iblock + 1] = counter;
3050 }
3051}
3052
3053void BlockILU::Factorize()
3054{
3055 int nblockrows = Height()/block_size;
3056
3057 // Precompute LU factorization of diagonal blocks
3058 for (int i=0; i<nblockrows; ++i)
3059 {
3060 LUFactors factorization(DB.GetData(i), &ipiv[i*block_size]);
3061 factorization.Factor(block_size);
3062 }
3063
3064 // Note: we use UseExternalData to extract submatrices from the tensor AB
3065 // instead of the DenseTensor call operator, because the call operator does
3066 // not allow for two simultaneous submatrix views into the same tensor
3067 DenseMatrix A_ik, A_ij, A_kj;
3068 // Loop over block rows (starting with second block row)
3069 for (int i=1; i<nblockrows; ++i)
3070 {
3071 // Find all nonzeros to the left of the diagonal in row i
3072 for (int kk=IB[i]; kk<IB[i+1]; ++kk)
3073 {
3074 int k = JB[kk];
3075 // Make sure we're still to the left of the diagonal
3076 if (k == i) { break; }
3077 if (k > i)
3078 {
3079 MFEM_ABORT("Matrix must be sorted with nonzero diagonal");
3080 }
3081 LUFactors A_kk_inv(DB.GetData(k), &ipiv[k*block_size]);
3082 A_ik.UseExternalData(&AB(0,0,kk), block_size, block_size);
3083 // A_ik = A_ik * A_kk^{-1}
3084 A_kk_inv.RightSolve(block_size, block_size, A_ik.GetData());
3085 // Modify everything to the right of k in row i
3086 for (int jj=kk+1; jj<IB[i+1]; ++jj)
3087 {
3088 int j = JB[jj];
3089 if (j <= k) { continue; } // Superfluous because JB is sorted?
3090 A_ij.UseExternalData(&AB(0,0,jj), block_size, block_size);
3091 for (int ll=IB[k]; ll<IB[k+1]; ++ll)
3092 {
3093 int l = JB[ll];
3094 if (l == j)
3095 {
3096 A_kj.UseExternalData(&AB(0,0,ll), block_size, block_size);
3097 // A_ij = A_ij - A_ik*A_kj;
3098 AddMult_a(-1.0, A_ik, A_kj, A_ij);
3099 // If we need to, update diagonal factorization
3100 if (j == i)
3101 {
3102 DB(i) = A_ij;
3103 LUFactors factorization(DB.GetData(i), &ipiv[i*block_size]);
3104 factorization.Factor(block_size);
3105 }
3106 break;
3107 }
3108 }
3109 }
3110 }
3111 }
3112}
3113
3114void BlockILU::Mult(const Vector &b, Vector &x) const
3115{
3116 MFEM_VERIFY(height > 0, "BlockILU(0) preconditioner is not constructed");
3117 int nblockrows = Height()/block_size;
3118 y.SetSize(Height());
3119
3120 DenseMatrix B;
3121 Vector yi, yj, xi, xj;
3122 Vector tmp(block_size);
3123 // Forward substitute to solve Ly = b
3124 // Implicitly, L has identity on the diagonal
3125 y = 0.0;
3126 for (int i=0; i<nblockrows; ++i)
3127 {
3128 yi.SetDataAndSize(&y[i*block_size], block_size);
3129 for (int ib=0; ib<block_size; ++ib)
3130 {
3131 yi[ib] = b[ib + P[i]*block_size];
3132 }
3133 for (int k=IB[i]; k<ID[i]; ++k)
3134 {
3135 int j = JB[k];
3136 const DenseMatrix &L_ij = AB(k);
3137 yj.SetDataAndSize(&y[j*block_size], block_size);
3138 // y_i = y_i - L_ij*y_j
3139 L_ij.AddMult_a(-1.0, yj, yi);
3140 }
3141 }
3142 // Backward substitution to solve Ux = y
3143 for (int i=nblockrows-1; i >= 0; --i)
3144 {
3145 xi.SetDataAndSize(&x[P[i]*block_size], block_size);
3146 for (int ib=0; ib<block_size; ++ib)
3147 {
3148 xi[ib] = y[ib + i*block_size];
3149 }
3150 for (int k=ID[i]+1; k<IB[i+1]; ++k)
3151 {
3152 int j = JB[k];
3153 const DenseMatrix &U_ij = AB(k);
3154 xj.SetDataAndSize(&x[P[j]*block_size], block_size);
3155 // x_i = x_i - U_ij*x_j
3156 U_ij.AddMult_a(-1.0, xj, xi);
3157 }
3158 LUFactors A_ii_inv(&DB(0,0,i), &ipiv[i*block_size]);
3159 // x_i = D_ii^{-1} x_i
3160 A_ii_inv.Solve(block_size, 1, xi.GetData());
3161 }
3162}
3163
3164
3166 int it, real_t norm, const Vector &r, bool final)
3167{
3168 if (!ess_dofs_list) { return; }
3169
3170 real_t bc_norm_squared = 0.0;
3171 r.HostRead();
3173 for (int i = 0; i < ess_dofs_list->Size(); i++)
3174 {
3175 const real_t r_entry = r((*ess_dofs_list)[i]);
3176 bc_norm_squared += r_entry*r_entry;
3177 }
3178 bool print = true;
3179#ifdef MFEM_USE_MPI
3180 MPI_Comm comm = iter_solver->GetComm();
3181 if (comm != MPI_COMM_NULL)
3182 {
3183 double glob_bc_norm_squared = 0.0;
3184 MPI_Reduce(&bc_norm_squared, &glob_bc_norm_squared, 1,
3186 MPI_SUM, 0, comm);
3187 bc_norm_squared = glob_bc_norm_squared;
3188 int rank;
3189 MPI_Comm_rank(comm, &rank);
3190 print = (rank == 0);
3191 }
3192#endif
3193 if ((it == 0 || final || bc_norm_squared > 0.0) && print)
3194 {
3195 mfem::out << " ResidualBCMonitor : b.c. residual norm = "
3196 << sqrt(bc_norm_squared) << endl;
3197 }
3198}
3199
3200
3201#ifdef MFEM_USE_SUITESPARSE
3202
3204{
3205 mat = NULL;
3206 Numeric = NULL;
3207 AI = AJ = NULL;
3208 if (!use_long_ints)
3209 {
3210 umfpack_di_defaults(Control);
3211 }
3212 else
3213 {
3214 umfpack_dl_defaults(Control);
3215 }
3216}
3217
3219{
3220 void *Symbolic;
3221
3222 if (Numeric)
3223 {
3224 if (!use_long_ints)
3225 {
3226 umfpack_di_free_numeric(&Numeric);
3227 }
3228 else
3229 {
3230 umfpack_dl_free_numeric(&Numeric);
3231 }
3232 }
3233
3234 mat = const_cast<SparseMatrix *>(dynamic_cast<const SparseMatrix *>(&op));
3235 MFEM_VERIFY(mat, "not a SparseMatrix");
3236
3237 // UMFPack requires that the column-indices in mat corresponding to each
3238 // row be sorted.
3239 // Generally, this will modify the ordering of the entries of mat.
3241
3242 height = mat->Height();
3243 width = mat->Width();
3244 MFEM_VERIFY(width == height, "not a square matrix");
3245
3246 const int * Ap = mat->HostReadI();
3247 const int * Ai = mat->HostReadJ();
3248 const real_t * Ax = mat->HostReadData();
3249
3250 if (!use_long_ints)
3251 {
3252 int status = umfpack_di_symbolic(width, width, Ap, Ai, Ax, &Symbolic,
3253 Control, Info);
3254 if (status < 0)
3255 {
3256 umfpack_di_report_info(Control, Info);
3257 umfpack_di_report_status(Control, status);
3258 mfem_error("UMFPackSolver::SetOperator :"
3259 " umfpack_di_symbolic() failed!");
3260 }
3261
3262 status = umfpack_di_numeric(Ap, Ai, Ax, Symbolic, &Numeric,
3263 Control, Info);
3264 if (status < 0)
3265 {
3266 umfpack_di_report_info(Control, Info);
3267 umfpack_di_report_status(Control, status);
3268 mfem_error("UMFPackSolver::SetOperator :"
3269 " umfpack_di_numeric() failed!");
3270 }
3271 umfpack_di_free_symbolic(&Symbolic);
3272 }
3273 else
3274 {
3275 SuiteSparse_long status;
3276
3277 delete [] AJ;
3278 delete [] AI;
3279 AI = new SuiteSparse_long[width + 1];
3280 AJ = new SuiteSparse_long[Ap[width]];
3281 for (int i = 0; i <= width; i++)
3282 {
3283 AI[i] = (SuiteSparse_long)(Ap[i]);
3284 }
3285 for (int i = 0; i < Ap[width]; i++)
3286 {
3287 AJ[i] = (SuiteSparse_long)(Ai[i]);
3288 }
3289
3290 status = umfpack_dl_symbolic(width, width, AI, AJ, Ax, &Symbolic,
3291 Control, Info);
3292 if (status < 0)
3293 {
3294 umfpack_dl_report_info(Control, Info);
3295 umfpack_dl_report_status(Control, status);
3296 mfem_error("UMFPackSolver::SetOperator :"
3297 " umfpack_dl_symbolic() failed!");
3298 }
3299
3300 status = umfpack_dl_numeric(AI, AJ, Ax, Symbolic, &Numeric,
3301 Control, Info);
3302 if (status < 0)
3303 {
3304 umfpack_dl_report_info(Control, Info);
3305 umfpack_dl_report_status(Control, status);
3306 mfem_error("UMFPackSolver::SetOperator :"
3307 " umfpack_dl_numeric() failed!");
3308 }
3309 umfpack_dl_free_symbolic(&Symbolic);
3310 }
3311}
3312
3313void UMFPackSolver::Mult(const Vector &b, Vector &x) const
3314{
3315 if (mat == NULL)
3316 mfem_error("UMFPackSolver::Mult : matrix is not set!"
3317 " Call SetOperator first!");
3318 b.HostRead();
3319 x.HostReadWrite();
3320 if (!use_long_ints)
3321 {
3322 int status =
3323 umfpack_di_solve(UMFPACK_At, mat->HostReadI(), mat->HostReadJ(),
3324 mat->HostReadData(), x.HostWrite(), b.HostRead(),
3325 Numeric, Control, Info);
3326 umfpack_di_report_info(Control, Info);
3327 if (status < 0)
3328 {
3329 umfpack_di_report_status(Control, status);
3330 mfem_error("UMFPackSolver::Mult : umfpack_di_solve() failed!");
3331 }
3332 }
3333 else
3334 {
3335 SuiteSparse_long status =
3336 umfpack_dl_solve(UMFPACK_At, AI, AJ, mat->HostReadData(),
3337 x.HostWrite(), b.HostRead(), Numeric, Control,
3338 Info);
3339 umfpack_dl_report_info(Control, Info);
3340 if (status < 0)
3341 {
3342 umfpack_dl_report_status(Control, status);
3343 mfem_error("UMFPackSolver::Mult : umfpack_dl_solve() failed!");
3344 }
3345 }
3346}
3347
3349{
3350 if (mat == NULL)
3351 mfem_error("UMFPackSolver::MultTranspose : matrix is not set!"
3352 " Call SetOperator first!");
3353 b.HostRead();
3354 x.HostReadWrite();
3355 if (!use_long_ints)
3356 {
3357 int status =
3358 umfpack_di_solve(UMFPACK_A, mat->HostReadI(), mat->HostReadJ(),
3359 mat->HostReadData(), x.HostWrite(), b.HostRead(),
3360 Numeric, Control, Info);
3361 umfpack_di_report_info(Control, Info);
3362 if (status < 0)
3363 {
3364 umfpack_di_report_status(Control, status);
3365 mfem_error("UMFPackSolver::MultTranspose :"
3366 " umfpack_di_solve() failed!");
3367 }
3368 }
3369 else
3370 {
3371 SuiteSparse_long status =
3372 umfpack_dl_solve(UMFPACK_A, AI, AJ, mat->HostReadData(),
3373 x.HostWrite(), b.HostRead(), Numeric, Control,
3374 Info);
3375 umfpack_dl_report_info(Control, Info);
3376 if (status < 0)
3377 {
3378 umfpack_dl_report_status(Control, status);
3379 mfem_error("UMFPackSolver::MultTranspose :"
3380 " umfpack_dl_solve() failed!");
3381 }
3382 }
3383}
3384
3386{
3387 delete [] AJ;
3388 delete [] AI;
3389 if (Numeric)
3390 {
3391 if (!use_long_ints)
3392 {
3393 umfpack_di_free_numeric(&Numeric);
3394 }
3395 else
3396 {
3397 umfpack_dl_free_numeric(&Numeric);
3398 }
3399 }
3400}
3401
3403{
3404 klu_defaults(&Common);
3405}
3406
3408{
3409 if (Numeric)
3410 {
3411 MFEM_VERIFY(Symbolic != 0,
3412 "Had Numeric pointer in KLU, but not Symbolic");
3413 klu_free_symbolic(&Symbolic, &Common);
3414 Symbolic = 0;
3415 klu_free_numeric(&Numeric, &Common);
3416 Numeric = 0;
3417 }
3418
3419 mat = const_cast<SparseMatrix *>(dynamic_cast<const SparseMatrix *>(&op));
3420 MFEM_VERIFY(mat != NULL, "not a SparseMatrix");
3421
3422 // KLU requires that the column-indices in mat corresponding to each row be
3423 // sorted. Generally, this will modify the ordering of the entries of mat.
3425
3426 height = mat->Height();
3427 width = mat->Width();
3428 MFEM_VERIFY(width == height, "not a square matrix");
3429
3430 int * Ap = mat->GetI();
3431 int * Ai = mat->GetJ();
3432 real_t * Ax = mat->GetData();
3433
3434 Symbolic = klu_analyze( height, Ap, Ai, &Common);
3435 Numeric = klu_factor(Ap, Ai, Ax, Symbolic, &Common);
3436}
3437
3438void KLUSolver::Mult(const Vector &b, Vector &x) const
3439{
3440 MFEM_VERIFY(mat != NULL,
3441 "KLUSolver::Mult : matrix is not set! Call SetOperator first!");
3442
3443 int n = mat->Height();
3444 int numRhs = 1;
3445 // Copy B into X, so we can pass it in and overwrite it.
3446 x = b;
3447 // Solve the transpose, since KLU thinks the matrix is compressed column
3448 // format.
3449 klu_tsolve( Symbolic, Numeric, n, numRhs, x.GetData(), &Common);
3450}
3451
3453{
3454 MFEM_VERIFY(mat != NULL,
3455 "KLUSolver::Mult : matrix is not set! Call SetOperator first!");
3456
3457 int n = mat->Height();
3458 int numRhs = 1;
3459 // Copy B into X, so we can pass it in and overwrite it.
3460 x = b;
3461 // Solve the regular matrix, not the transpose, since KLU thinks the matrix
3462 // is compressed column format.
3463 klu_solve( Symbolic, Numeric, n, numRhs, x.GetData(), &Common);
3464}
3465
3467{
3468 klu_free_symbolic (&Symbolic, &Common) ;
3469 klu_free_numeric (&Numeric, &Common) ;
3470 Symbolic = 0;
3471 Numeric = 0;
3472}
3473
3474#endif // MFEM_USE_SUITESPARSE
3475
3477 const SparseMatrix &block_dof_)
3478 : Solver(A.NumRows()), block_dof(const_cast<SparseMatrix&>(block_dof_)),
3479 block_solvers(new DenseMatrixInverse[block_dof.NumRows()])
3480{
3481 DenseMatrix sub_A;
3482 for (int i = 0; i < block_dof.NumRows(); ++i)
3483 {
3484 local_dofs.MakeRef(block_dof.GetRowColumns(i), block_dof.RowSize(i));
3485 sub_A.SetSize(local_dofs.Size());
3486 A.GetSubMatrix(local_dofs, local_dofs, sub_A);
3487 block_solvers[i].SetOperator(sub_A);
3488 }
3489}
3490
3492{
3493 y.SetSize(x.Size());
3494 y = 0.0;
3495
3496 for (int i = 0; i < block_dof.NumRows(); ++i)
3497 {
3498 local_dofs.MakeRef(block_dof.GetRowColumns(i), block_dof.RowSize(i));
3499 x.GetSubVector(local_dofs, sub_rhs);
3500 sub_sol.SetSize(local_dofs.Size());
3501 block_solvers[i].Mult(sub_rhs, sub_sol);
3502 y.AddElementVector(local_dofs, sub_sol);
3503 }
3504}
3505
3506void ProductSolver::Mult(const Vector & x, Vector & y) const
3507{
3508 y.SetSize(x.Size());
3509 y = 0.0;
3510 S0->Mult(x, y);
3511
3512 Vector z(x.Size());
3513 z = 0.0;
3514 A->Mult(y, z);
3515 add(-1.0, z, 1.0, x, z); // z = (I - A * S0) x
3516
3517 Vector S1z(x.Size());
3518 S1z = 0.0;
3519 S1->Mult(z, S1z);
3520 y += S1z;
3521}
3522
3524{
3525 y.SetSize(x.Size());
3526 y = 0.0;
3527 S1->MultTranspose(x, y);
3528
3529 Vector z(x.Size());
3530 z = 0.0;
3531 A->MultTranspose(y, z);
3532 add(-1.0, z, 1.0, x, z); // z = (I - A^T * S1^T) x
3533
3534 Vector S0Tz(x.Size());
3535 S0Tz = 0.0;
3536 S0->MultTranspose(z, S0Tz);
3537 y += S0Tz;
3538}
3539
3541 : Solver(0, false), global_size(-1)
3542#ifdef MFEM_USE_MPI
3543 , parallel(false)
3544#endif
3545{ }
3546
3547#ifdef MFEM_USE_MPI
3548OrthoSolver::OrthoSolver(MPI_Comm mycomm_)
3549 : Solver(0, false), mycomm(mycomm_), global_size(-1), parallel(true) { }
3550#endif
3551
3553{
3554 solver = &s;
3555 height = s.Height();
3556 width = s.Width();
3557 MFEM_VERIFY(height == width, "Solver must be a square Operator!");
3558 global_size = -1; // lazy evaluated
3559}
3560
3562{
3563 MFEM_VERIFY(solver, "Solver hasn't been set, call SetSolver() first.");
3564 solver->SetOperator(op);
3565 height = solver->Height();
3566 width = solver->Width();
3567 MFEM_VERIFY(height == width, "Solver must be a square Operator!");
3568 global_size = -1; // lazy evaluated
3569}
3570
3571void OrthoSolver::Mult(const Vector &b, Vector &x) const
3572{
3573 MFEM_VERIFY(solver, "Solver hasn't been set, call SetSolver() first.");
3574 MFEM_VERIFY(height == solver->Height(),
3575 "solver was modified externally! call SetSolver() again!");
3576 MFEM_VERIFY(height == b.Size(), "incompatible input Vector size!");
3577 MFEM_VERIFY(height == x.Size(), "incompatible output Vector size!");
3578
3579 // Orthogonalize input
3580 Orthogonalize(b, b_ortho);
3581
3582 // Propagate iterative_mode to the solver:
3584
3585 // Apply the Solver
3586 solver->Mult(b_ortho, x);
3587
3588 // Orthogonalize output
3589 Orthogonalize(x, x);
3590}
3591
3592void OrthoSolver::Orthogonalize(const Vector &v, Vector &v_ortho) const
3593{
3594 if (global_size == -1)
3595 {
3596 global_size = height;
3597#ifdef MFEM_USE_MPI
3598 if (parallel)
3599 {
3600 MPI_Allreduce(MPI_IN_PLACE, &global_size, 1, HYPRE_MPI_BIG_INT,
3601 MPI_SUM, mycomm);
3602 }
3603#endif
3604 }
3605
3606 // TODO: GPU/device implementation
3607
3608 real_t global_sum = v.Sum();
3609
3610#ifdef MFEM_USE_MPI
3611 if (parallel)
3612 {
3613 MPI_Allreduce(MPI_IN_PLACE, &global_sum, 1, MPITypeMap<real_t>::mpi_type,
3614 MPI_SUM, mycomm);
3615 }
3616#endif
3617
3618 real_t ratio = global_sum / static_cast<real_t>(global_size);
3619 v_ortho.SetSize(v.Size());
3620 v_ortho.UseDevice(true);
3621 v.HostRead();
3622 v_ortho.HostWrite();
3623 for (int i = 0; i < v_ortho.Size(); ++i)
3624 {
3625 v_ortho(i) = v(i) - ratio;
3626 }
3627}
3628
3629#ifdef MFEM_USE_MPI
3631 HypreParMatrix *aux_map,
3632 bool op_is_symmetric,
3633 bool own_aux_map)
3634 : Solver(op.NumRows()), aux_map_(aux_map, own_aux_map)
3635{
3636 aux_system_.Reset(RAP(&op, aux_map));
3637 aux_system_.As<HypreParMatrix>()->EliminateZeroRows();
3638 aux_smoother_.Reset(new HypreSmoother(*aux_system_.As<HypreParMatrix>()));
3639 aux_smoother_.As<HypreSmoother>()->SetOperatorSymmetry(op_is_symmetric);
3640}
3641
3642void AuxSpaceSmoother::Mult(const Vector &x, Vector &y, bool transpose) const
3643{
3644 Vector aux_rhs(aux_map_->NumCols());
3645 aux_map_->MultTranspose(x, aux_rhs);
3646
3647 Vector aux_sol(aux_rhs.Size());
3648 if (transpose)
3649 {
3650 aux_smoother_->MultTranspose(aux_rhs, aux_sol);
3651 }
3652 else
3653 {
3654 aux_smoother_->Mult(aux_rhs, aux_sol);
3655 }
3656
3657 y.SetSize(aux_map_->NumRows());
3658 aux_map_->Mult(aux_sol, y);
3659}
3660#endif // MFEM_USE_MPI
3661
3662#ifdef MFEM_USE_LAPACK
3663
3665 : Solver(0), mat(nullptr), const_tol_(1.0e-14), min_nnz_(0),
3666 max_nnz_(0), verbosity_(0), res_change_termination_tol_(1.0e-4),
3667 zero_tol_(1.0e-14), rhs_delta_(1.0e-11), n_outer_(100000),
3668 n_inner_(100000), nStallCheck_(100), normalize_(true),
3669 NNLS_qrres_on_(false), qr_residual_mode_(QRresidualMode::hybrid)
3670{}
3671
3673{
3674 mat = dynamic_cast<const DenseMatrix*>(&op);
3675 MFEM_VERIFY(mat, "NNLSSolver operator must be of type DenseMatrix");
3676
3677 // The size of this operator is that of the transpose of op.
3678 height = op.Width();
3679 width = op.Height();
3680
3681 row_scaling_.SetSize(mat->NumRows());
3682 row_scaling_.UseDevice(true);
3683 row_scaling_ = 1.0;
3684}
3685
3687{
3688 qr_residual_mode_ = qr_residual_mode;
3689 if (qr_residual_mode_ == QRresidualMode::on)
3690 {
3691 NNLS_qrres_on_ = true;
3692 }
3693}
3694
3696{
3697 // Scale everything so that rescaled half gap is the same for all constraints
3698 const int m = mat->NumRows();
3699
3700 MFEM_VERIFY(rhs_lb.Size() == m && rhs_ub.Size() == m, "");
3701
3702 Vector rhs_avg = rhs_ub;
3703 rhs_avg += rhs_lb;
3704 rhs_avg *= 0.5;
3705
3706 Vector rhs_halfgap = rhs_ub;
3707 rhs_halfgap -= rhs_lb;
3708 rhs_halfgap *= 0.5;
3709
3710 Vector rhs_avg_glob = rhs_avg;
3711 Vector rhs_halfgap_glob = rhs_halfgap;
3712 Vector halfgap_target(m);
3713 halfgap_target = 1.0e3 * const_tol_;
3714
3715 row_scaling_.SetSize(m);
3716 row_scaling_.UseDevice(true);
3717
3718 for (int i=0; i<m; ++i)
3719 {
3720 const real_t s = halfgap_target(i) / rhs_halfgap_glob(i);
3721 row_scaling_[i] = s;
3722
3723 rhs_lb(i) = (rhs_avg(i) * s) - halfgap_target(i);
3724 rhs_ub(i) = (rhs_avg(i) * s) + halfgap_target(i);
3725 }
3726}
3727
3728void NNLSSolver::Mult(const Vector &w, Vector &sol) const
3729{
3730 MFEM_VERIFY(mat, "NNLSSolver operator must be of type DenseMatrix");
3731 Vector rhs_ub(mat->NumRows());
3732 mat->Mult(w, rhs_ub);
3733 rhs_ub *= row_scaling_;
3734
3735 Vector rhs_lb(rhs_ub);
3736 Vector rhs_Gw(rhs_ub);
3737
3738 for (int i=0; i<rhs_ub.Size(); ++i)
3739 {
3740 rhs_lb(i) -= rhs_delta_;
3741 rhs_ub(i) += rhs_delta_;
3742 }
3743
3744 if (normalize_) { NormalizeConstraints(rhs_lb, rhs_ub); }
3745 Solve(rhs_lb, rhs_ub, sol);
3746
3747 if (verbosity_ > 1)
3748 {
3749 int nnz = 0;
3750 for (int i=0; i<sol.Size(); ++i)
3751 {
3752 if (sol(i) != 0.0)
3753 {
3754 nnz++;
3755 }
3756 }
3757
3758 mfem::out << "Number of nonzeros in NNLSSolver solution: " << nnz
3759 << ", out of " << sol.Size() << endl;
3760
3761 // Check residual of NNLS solution
3762 Vector res(mat->NumRows());
3763 mat->Mult(sol, res);
3764 res *= row_scaling_;
3765
3766 const real_t normGsol = res.Norml2();
3767 const real_t normRHS = rhs_Gw.Norml2();
3768
3769 res -= rhs_Gw;
3770 const real_t relNorm = res.Norml2() / std::max(normGsol, normRHS);
3771 mfem::out << "Relative residual norm for NNLSSolver solution of Gs = Gw: "
3772 << relNorm << endl;
3773 }
3774}
3775
3776void NNLSSolver::Solve(const Vector& rhs_lb, const Vector& rhs_ub,
3777 Vector& soln) const
3778{
3779 int m = mat->NumRows();
3780 int n = mat->NumCols();
3781
3782 MFEM_VERIFY(rhs_lb.Size() == m && rhs_lb.Size() == m && soln.Size() == n, "");
3783 MFEM_VERIFY(n >= m, "NNLSSolver system cannot be over-determined.");
3784
3785 if (max_nnz_ == 0)
3786 {
3787 max_nnz_ = mat->NumCols();
3788 }
3789
3790 // Prepare right hand side
3791 Vector rhs_avg(rhs_ub);
3792 rhs_avg += rhs_lb;
3793 rhs_avg *= 0.5;
3794
3795 Vector rhs_halfgap(rhs_ub);
3796 rhs_halfgap -= rhs_lb;
3797 rhs_halfgap *= 0.5;
3798
3799 Vector rhs_avg_glob(rhs_avg);
3800 Vector rhs_halfgap_glob(rhs_halfgap);
3801
3802 int ione = 1;
3803 real_t fone = 1.0;
3804
3805 char lside = 'L';
3806 char trans = 'T';
3807 char notrans = 'N';
3808
3809 std::vector<unsigned int> nz_ind(m);
3810 Vector res_glob(m);
3811 Vector mu(n);
3812 Vector mu2(n);
3813 int n_nz_ind = 0;
3814 int n_glob = 0;
3815 int m_update;
3816 int min_nnz_cap = std::min(static_cast<int>(min_nnz_), std::min(m,n));
3817 int info;
3818 std::vector<real_t> l2_res_hist;
3819 std::vector<unsigned int> stalled_indices;
3820 int stalledFlag = 0;
3821 int num_stalled = 0;
3822 int nz_ind_zero = 0;
3823
3824 Vector soln_nz_glob(m);
3825 Vector soln_nz_glob_up(m);
3826
3827 // The following matrices are stored in column-major format as Vectors
3828 Vector mat_0_data(m * n);
3829 Vector mat_qr_data(m * n);
3830 Vector submat_data(m * n);
3831
3832 Vector tau(n);
3833 Vector sub_tau = tau;
3834 Vector vec1(m);
3835
3836 // Temporary work arrays
3837 int lwork;
3838 std::vector<real_t> work;
3839 int n_outer_iter = 0;
3840 int n_total_inner_iter = 0;
3841 int i_qr_start;
3842 int n_update;
3843 // 0 = converged; 1 = maximum iterations reached;
3844 // 2 = NNLS stalled (no change in residual for many iterations)
3845 int exit_flag = 1;
3846
3847 res_glob = rhs_avg_glob;
3848 Vector qt_rhs_glob = rhs_avg_glob;
3849 Vector qqt_rhs_glob = qt_rhs_glob;
3850 Vector sub_qt = rhs_avg_glob;
3851
3852 // Compute threshold tolerance for the Lagrange multiplier mu
3853 real_t mu_tol = 0.0;
3854
3855 {
3856 Vector rhs_scaled(rhs_halfgap_glob);
3857 Vector tmp(n);
3858 rhs_scaled *= row_scaling_;
3859 mat->MultTranspose(rhs_scaled, tmp);
3860
3861 mu_tol = 1.0e-15 * tmp.Max();
3862 }
3863
3864 real_t rmax = 0.0;
3865 real_t mumax = 0.0;
3866
3867 for (int oiter = 0; oiter < n_outer_; ++oiter)
3868 {
3869 stalledFlag = 0;
3870
3871 rmax = fabs(res_glob(0)) - rhs_halfgap_glob(0);
3872 for (int i=1; i<m; ++i)
3873 {
3874 rmax = std::max(rmax, fabs(res_glob(i)) - rhs_halfgap_glob(i));
3875 }
3876
3877 l2_res_hist.push_back(res_glob.Norml2());
3878
3879 if (verbosity_ > 1)
3880 {
3881 mfem::out << "NNLS " << oiter << " " << n_total_inner_iter << " " << m
3882 << " " << n << " " << n_glob << " " << rmax << " "
3883 << l2_res_hist[oiter] << endl;
3884 }
3885 if (rmax <= const_tol_ && n_glob >= min_nnz_cap)
3886 {
3887 if (verbosity_ > 1)
3888 {
3889 mfem::out << "NNLS target tolerance met" << endl;
3890 }
3891 exit_flag = 0;
3892 break;
3893 }
3894
3895 if (n_glob >= max_nnz_)
3896 {
3897 if (verbosity_ > 1)
3898 {
3899 mfem::out << "NNLS target nnz met" << endl;
3900 }
3901 exit_flag = 0;
3902 break;
3903 }
3904
3905 if (n_glob >= m)
3906 {
3907 if (verbosity_ > 1)
3908 {
3909 mfem::out << "NNLS system is square... exiting" << endl;
3910 }
3911 exit_flag = 3;
3912 break;
3913 }
3914
3915 // Check for stall after the first nStallCheck iterations
3916 if (oiter > nStallCheck_)
3917 {
3918 real_t mean0 = 0.0;
3919 real_t mean1 = 0.0;
3920 for (int i=0; i<nStallCheck_/2; ++i)
3921 {
3922 mean0 += l2_res_hist[oiter - i];
3923 mean1 += l2_res_hist[oiter - (nStallCheck_) - i];
3924 }
3925
3926 real_t mean_res_change = (mean1 / mean0) - 1.0;
3927 if (std::abs(mean_res_change) < res_change_termination_tol_)
3928 {
3929 if (verbosity_ > 1)
3930 {
3931 mfem::out << "NNLSSolver stall detected... exiting" << endl;
3932 }
3933 exit_flag = 2;
3934 break;
3935 }
3936 }
3937
3938 // Find the next index
3939 res_glob *= row_scaling_;
3940 mat->MultTranspose(res_glob, mu);
3941
3942 for (int i = 0; i < n_nz_ind; ++i)
3943 {
3944 mu(nz_ind[i]) = 0.0;
3945 }
3946 for (unsigned int i = 0; i < stalled_indices.size(); ++i)
3947 {
3948 mu(stalled_indices[i]) = 0.0;
3949 }
3950
3951 mumax = mu.Max();
3952
3953 if (mumax < mu_tol)
3954 {
3955 num_stalled = stalled_indices.size();
3956 if (num_stalled > 0)
3957 {
3958 if (verbosity_ > 0)
3959 {
3960 mfem::out << "NNLS Lagrange multiplier is below the minimum "
3961 << "threshold: mumax = " << mumax << ", mutol = "
3962 << mu_tol << "\n" << " Resetting stalled indices "
3963 << "vector of size " << num_stalled << "\n";
3964 }
3965 stalled_indices.resize(0);
3966
3967 mat->MultTranspose(res_glob, mu);
3968
3969 for (int i = 0; i < n_nz_ind; ++i)
3970 {
3971 mu(nz_ind[i]) = 0.0;
3972 }
3973
3974 mumax = mu.Max();
3975 }
3976 }
3977
3978 int imax = 0;
3979 {
3980 real_t tmax = mu(0);
3981 for (int i=1; i<n; ++i)
3982 {
3983 if (mu(i) > tmax)
3984 {
3985 tmax = mu(i);
3986 imax = i;
3987 }
3988 }
3989 }
3990
3991 // Record the local value of the next index
3992 nz_ind[n_nz_ind] = imax;
3993 ++n_nz_ind;
3994
3995 if (verbosity_ > 2)
3996 {
3997 mfem::out << "Found next index: " << imax << " " << mumax << endl;
3998 }
3999
4000 for (int i=0; i<m; ++i)
4001 {
4002 mat_0_data(i + (n_glob*m)) = (*mat)(i,imax) * row_scaling_[i];
4003 mat_qr_data(i + (n_glob*m)) = mat_0_data(i + (n_glob*m));
4004 }
4005
4006 i_qr_start = n_glob;
4007 ++n_glob; // Increment the size of the global matrix
4008
4009 if (verbosity_ > 2)
4010 {
4011 mfem::out << "Updated matrix with new index" << endl;
4012 }
4013
4014 for (int iiter = 0; iiter < n_inner_; ++iiter)
4015 {
4016 ++n_total_inner_iter;
4017
4018 // Initialize
4019 const bool incremental_update = true;
4020 n_update = n_glob - i_qr_start;
4021 m_update = m - i_qr_start;
4022 if (incremental_update)
4023 {
4024 // Apply Householder reflectors to compute Q^T new_cols
4025 lwork = -1;
4026 work.resize(10);
4027
4028 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &trans, &m, &n_update,
4029 &i_qr_start, mat_qr_data.GetData(), &m,
4030 tau.GetData(),
4031 mat_qr_data.GetData() + (i_qr_start * m),
4032 &m, work.data(), &lwork, &info);
4033 MFEM_VERIFY(info == 0, ""); // Q^T A update work calculation failed
4034 lwork = static_cast<int>(work[0]);
4035 work.resize(lwork);
4036 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &trans, &m, &n_update,
4037 &i_qr_start, mat_qr_data.GetData(), &m,
4038 tau.GetData(),
4039 mat_qr_data.GetData() + (i_qr_start * m),
4040 &m, work.data(), &lwork, &info);
4041 MFEM_VERIFY(info == 0, ""); // Q^T A update failed
4042 // Compute QR factorization of the submatrix
4043 lwork = -1;
4044 work.resize(10);
4045
4046 // Copy m_update-by-n_update submatrix of mat_qr_data,
4047 // starting at (i_qr_start, i_qr_start)
4048 for (int i=0; i<m_update; ++i)
4049 for (int j=0; j<n_update; ++j)
4050 {
4051 submat_data[i + (j * m_update)] =
4052 mat_qr_data[i + i_qr_start + ((j + i_qr_start) * m)];
4053 }
4054
4055 // Copy tau subvector of length n_update, starting at i_qr_start
4056 for (int j=0; j<n_update; ++j)
4057 {
4058 sub_tau[j] = tau[i_qr_start + j];
4059 }
4060
4061 MFEM_LAPACK_PREFIX(geqrf_)(&m_update, &n_update, submat_data.GetData(),
4062 &m_update, sub_tau.GetData(), work.data(),
4063 &lwork, &info);
4064 MFEM_VERIFY(info == 0, ""); // QR update factorization work calc
4065 lwork = static_cast<int>(work[0]);
4066 if (lwork == 0) { lwork = 1; }
4067 work.resize(lwork);
4068 MFEM_LAPACK_PREFIX(geqrf_)(&m_update, &n_update, submat_data.GetData(),
4069 &m_update, sub_tau.GetData(), work.data(),
4070 &lwork, &info);
4071 MFEM_VERIFY(info == 0, ""); // QR update factorization failed
4072
4073 // Copy result back
4074 for (int i=0; i<m_update; ++i)
4075 for (int j=0; j<n_update; ++j)
4076 {
4077 mat_qr_data[i + i_qr_start + ((j + i_qr_start)* m)] =
4078 submat_data[i + (j * m_update)];
4079 }
4080
4081 for (int j=0; j<n_update; ++j)
4082 {
4083 tau[i_qr_start + j] = sub_tau[j];
4084 }
4085 }
4086 else
4087 {
4088 // Copy everything to mat_qr then do full QR
4089 for (int i=0; i<m; ++i)
4090 for (int j=0; j<n_glob; ++j)
4091 {
4092 mat_qr_data(i + (j*m)) = mat_0_data(i + (j*m));
4093 }
4094
4095 // Compute qr factorization (first find the size of work and then
4096 // perform qr)
4097 lwork = -1;
4098 work.resize(10);
4099 MFEM_LAPACK_PREFIX(geqrf_)(&m, &n_glob, mat_qr_data.GetData(), &m,
4100 tau.GetData(), work.data(), &lwork, &info);
4101 MFEM_VERIFY(info == 0, ""); // QR factorization work calculation
4102 lwork = static_cast<int>(work[0]);
4103 work.resize(lwork);
4104 MFEM_LAPACK_PREFIX(geqrf_)(&m, &n_glob, mat_qr_data.GetData(), &m,
4105 tau.GetData(), work.data(), &lwork, &info);
4106 MFEM_VERIFY(info == 0, ""); // QR factorization failed
4107 }
4108
4109 if (verbosity_ > 2)
4110 {
4111 mfem::out << "Updated QR " << iiter << endl;
4112 }
4113
4114 // Apply Householder reflectors to compute Q^T b
4115 if (incremental_update && iiter == 0)
4116 {
4117 lwork = -1;
4118 work.resize(10);
4119
4120 // Copy submatrix of mat_qr_data starting at
4121 // (i_qr_start, i_qr_start), of size m_update-by-1
4122 // Copy submatrix of qt_rhs_glob starting at (i_qr_start, 0),
4123 // of size m_update-by-1
4124
4125 for (int i=0; i<m_update; ++i)
4126 {
4127 submat_data[i] = mat_qr_data[i + i_qr_start + (i_qr_start * m)];
4128 sub_qt[i] = qt_rhs_glob[i + i_qr_start];
4129 }
4130
4131 sub_tau[0] = tau[i_qr_start];
4132
4133 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &trans, &m_update, &ione, &ione,
4134 submat_data.GetData(), &m_update,
4135 sub_tau.GetData(), sub_qt.GetData(),
4136 &m_update, work.data(), &lwork, &info);
4137 MFEM_VERIFY(info == 0, ""); // H_last y work calculation failed
4138 lwork = static_cast<int>(work[0]);
4139 work.resize(lwork);
4140 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &trans, &m_update, &ione, &ione,
4141 submat_data.GetData(), &m_update,
4142 sub_tau.GetData(), sub_qt.GetData(),
4143 &m_update, work.data(), &lwork, &info);
4144 MFEM_VERIFY(info == 0, ""); // H_last y failed
4145 // Copy result back
4146 for (int i=0; i<m_update; ++i)
4147 {
4148 qt_rhs_glob[i + i_qr_start] = sub_qt[i];
4149 }
4150 }
4151 else
4152 {
4153 // Compute Q^T b from scratch
4154 qt_rhs_glob = rhs_avg_glob;
4155 lwork = -1;
4156 work.resize(10);
4157 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &trans, &m, &ione, &n_glob,
4158 mat_qr_data.GetData(), &m, tau.GetData(),
4159 qt_rhs_glob.GetData(), &m,
4160 work.data(), &lwork, &info);
4161 MFEM_VERIFY(info == 0, ""); // Q^T b work calculation failed
4162 lwork = static_cast<int>(work[0]);
4163 work.resize(lwork);
4164 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &trans, &m, &ione, &n_glob,
4165 mat_qr_data.GetData(), &m, tau.GetData(),
4166 qt_rhs_glob.GetData(), &m,
4167 work.data(), &lwork, &info);
4168 MFEM_VERIFY(info == 0, ""); // Q^T b failed
4169 }
4170
4171 if (verbosity_ > 2)
4172 {
4173 mfem::out << "Updated rhs " << iiter << endl;
4174 }
4175
4176 // Apply R^{-1}; first n_glob entries of vec1 are overwritten
4177 char upper = 'U';
4178 char nounit = 'N';
4179 vec1 = qt_rhs_glob;
4180 MFEM_LAPACK_PREFIX(trsm_)(&lside, &upper, &notrans, &nounit,
4181 &n_glob, &ione, &fone,
4182 mat_qr_data.GetData(), &m,
4183 vec1.GetData(), &n_glob);
4184
4185 if (verbosity_ > 2)
4186 {
4187 mfem::out << "Solved triangular system " << iiter << endl;
4188 }
4189
4190 // Check if all entries are positive
4191 int pos_ibool = 0;
4192 real_t smin = n_glob > 0 ? vec1(0) : 0.0;
4193 for (int i=0; i<n_glob; ++i)
4194 {
4195 soln_nz_glob_up(i) = vec1(i);
4196 smin = std::min(smin, soln_nz_glob_up(i));
4197 }
4198
4199 if (smin > zero_tol_)
4200 {
4201 pos_ibool = 1;
4202 for (int i=0; i<n_glob; ++i)
4203 {
4204 soln_nz_glob(i) = soln_nz_glob_up(i);
4205 }
4206 }
4207
4208 if (pos_ibool == 1)
4209 {
4210 break;
4211 }
4212
4213 if (verbosity_ > 2)
4214 {
4215 mfem::out << "Start pruning " << iiter << endl;
4216 for (int i = 0; i < n_glob; ++i)
4217 {
4218 if (soln_nz_glob_up(i) <= zero_tol_)
4219 {
4220 mfem::out << i << " " << n_glob << " " << soln_nz_glob_up(i) << endl;
4221 }
4222 }
4223 }
4224
4225 if (soln_nz_glob_up(n_glob - 1) <= zero_tol_)
4226 {
4227 stalledFlag = 1;
4228 if (verbosity_ > 2)
4229 {
4230 if (qr_residual_mode_ == QRresidualMode::hybrid)
4231 {
4232 mfem::out << "Detected stall due to adding and removing same "
4233 << "column. Switching to QR residual calculation "
4234 << "method." << endl;
4235 }
4236 else
4237 {
4238 mfem::out << "Detected stall due to adding and removing same"
4239 << " column. Exiting now." << endl;
4240 }
4241 }
4242 }
4243
4244 if (stalledFlag == 1 && qr_residual_mode_ == QRresidualMode::hybrid)
4245 {
4246 NNLS_qrres_on_ = true;
4247 break;
4248 }
4249
4250 real_t alpha = numeric_limits<real_t>::max();
4251
4252 // Find maximum permissible step
4253 for (int i = 0; i < n_glob; ++i)
4254 {
4255 if (soln_nz_glob_up(i) <= zero_tol_)
4256 {
4257 alpha = std::min(alpha, soln_nz_glob(i)/(soln_nz_glob(i) - soln_nz_glob_up(i)));
4258 }
4259 }
4260 // Update solution
4261 smin = 0.0;
4262 for (int i = 0; i < n_glob; ++i)
4263 {
4264 soln_nz_glob(i) += alpha*(soln_nz_glob_up(i) - soln_nz_glob(i));
4265 if (i == 0 || soln_nz_glob(i) < smin)
4266 {
4267 smin = soln_nz_glob(i);
4268 }
4269 }
4270
4271 while (smin > zero_tol_)
4272 {
4273 // This means there was a rounding error, as we should have
4274 // a zero element by definition. Recalculate alpha based on
4275 // the index that corresponds to the element that should be
4276 // zero.
4277
4278 int index_min = 0;
4279 smin = soln_nz_glob(0);
4280 for (int i = 1; i < n_glob; ++i)
4281 {
4282 if (soln_nz_glob(i) < smin)
4283 {
4284 smin = soln_nz_glob(i);
4285 index_min = i;
4286 }
4287 }
4288
4289 alpha = soln_nz_glob(index_min)/(soln_nz_glob(index_min)
4290 - soln_nz_glob_up(index_min));
4291
4292 // Reupdate solution
4293 for (int i = 0; i < n_glob; ++i)
4294 {
4295 soln_nz_glob(i) += alpha*(soln_nz_glob_up(i) - soln_nz_glob(i));
4296 }
4297 }
4298
4299 // Clean up zeroed entry
4300 i_qr_start = n_glob+1;
4301 while (true)
4302 {
4303 // Check if there is a zero entry
4304 int zero_ibool;
4305
4306 smin = n_glob > 0 ? soln_nz_glob(0) : 0.0;
4307 for (int i=1; i<n_glob; ++i)
4308 {
4309 smin = std::min(smin, soln_nz_glob(i));
4310 }
4311
4312 if (smin < zero_tol_)
4313 {
4314 zero_ibool = 1;
4315 }
4316 else
4317 {
4318 zero_ibool = 0;
4319 }
4320
4321 if (zero_ibool == 0) // Break if there is no more zero entry
4322 {
4323 break;
4324 }
4325
4326 int ind_zero = -1; // Index where the first zero is encountered
4327 nz_ind_zero = 0;
4328
4329 // Identify global index of the zeroed element
4330 for (int i = 0; i < n_glob; ++i)
4331 {
4332 if (soln_nz_glob(i) < zero_tol_)
4333 {
4334 ind_zero = i;
4335 break;
4336 }
4337 }
4338 MFEM_VERIFY(ind_zero != -1, "");
4339 // Identify the local index for nz_ind to which the zeroed entry
4340 // belongs
4341 for (int i = 0; i < ind_zero; ++i)
4342 {
4343 ++nz_ind_zero;
4344 }
4345
4346 {
4347 // Copy mat_0.cols[ind_zero+1,n_glob) to mat_qr.cols[ind_zero,n_glob-1)
4348 for (int i=0; i<m; ++i)
4349 for (int j=ind_zero; j<n_glob-1; ++j)
4350 {
4351 mat_qr_data(i + (j*m)) = mat_0_data(i + ((j+1)*m));
4352 }
4353
4354 // Copy mat_qr.cols[ind_zero,n_glob-1) to
4355 // mat_0.cols[ind_zero,n_glob-1)
4356 for (int i=0; i<m; ++i)
4357 for (int j=ind_zero; j<n_glob-1; ++j)
4358 {
4359 mat_0_data(i + (j*m)) = mat_qr_data(i + (j*m));
4360 }
4361 }
4362
4363 // Remove the zeroed entry from the local matrix index
4364 for (int i = nz_ind_zero; i < n_nz_ind-1; ++i)
4365 {
4366 nz_ind[i] = nz_ind[i+1];
4367 }
4368 --n_nz_ind;
4369
4370 // Shift soln_nz_glob and proc_index
4371 for (int i = ind_zero; i < n_glob-1; ++i)
4372 {
4373 soln_nz_glob(i) = soln_nz_glob(i+1);
4374 }
4375
4376 i_qr_start = std::min(i_qr_start, ind_zero);
4377 --n_glob;
4378 } // End of pruning loop
4379
4380 if (verbosity_ > 2)
4381 {
4382 mfem::out << "Finished pruning " << iiter << endl;
4383 }
4384 } // End of inner loop
4385
4386 // Check if we have stalled
4387 if (stalledFlag == 1)
4388 {
4389 --n_glob;
4390 --n_nz_ind;
4391 num_stalled = stalled_indices.size();
4392 stalled_indices.resize(num_stalled + 1);
4393 stalled_indices[num_stalled] = imax;
4394 if (verbosity_ > 2)
4395 {
4396 mfem::out << "Adding index " << imax << " to stalled index list "
4397 << "of size " << num_stalled << endl;
4398 }
4399 }
4400
4401 // Compute residual
4402 if (!NNLS_qrres_on_)
4403 {
4404 res_glob = rhs_avg_glob;
4405 real_t fmone = -1.0;
4406 MFEM_LAPACK_PREFIX(gemv_)(&notrans, &m, &n_glob, &fmone,
4407 mat_0_data.GetData(), &m,
4408 soln_nz_glob.GetData(), &ione, &fone,
4409 res_glob.GetData(), &ione);
4410 }
4411 else
4412 {
4413 // Compute residual using res = b - Q*Q^T*b, where Q is from an
4414 // economical QR decomposition
4415 lwork = -1;
4416 work.resize(10);
4417 qqt_rhs_glob = 0.0;
4418 for (int i=0; i<n_glob; ++i)
4419 {
4420 qqt_rhs_glob(i) = qt_rhs_glob(i);
4421 }
4422
4423 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &notrans, &m, &ione, &n_glob,
4424 mat_qr_data.GetData(), &m,
4425 tau.GetData(), qqt_rhs_glob.GetData(), &m,
4426 work.data(), &lwork, &info);
4427
4428 MFEM_VERIFY(info == 0, ""); // Q Q^T b work calculation failed.
4429 lwork = static_cast<int>(work[0]);
4430 work.resize(lwork);
4431 MFEM_LAPACK_PREFIX(ormqr_)(&lside, &notrans, &m, &ione, &n_glob,
4432 mat_qr_data.GetData(), &m,
4433 tau.GetData(), qqt_rhs_glob.GetData(), &m,
4434 work.data(), &lwork, &info);
4435 MFEM_VERIFY(info == 0, ""); // Q Q^T b calculation failed.
4436 res_glob = rhs_avg_glob;
4437 res_glob -= qqt_rhs_glob;
4438 }
4439
4440 if (verbosity_ > 2)
4441 {
4442 mfem::out << "Computed residual" << endl;
4443 }
4444
4445 ++n_outer_iter;
4446 } // End of outer loop
4447
4448 // Insert the solutions
4449 MFEM_VERIFY(n_glob == n_nz_ind, "");
4450 soln = 0.0;
4451 for (int i = 0; i < n_glob; ++i)
4452 {
4453 soln(nz_ind[i]) = soln_nz_glob(i);
4454 }
4455
4456 if (verbosity_ > 0)
4457 {
4458 mfem::out << "NNLS solver: m = " << m << ", n = " << n
4459 << ", outer_iter = " << n_outer_iter << ", inner_iter = "
4460 << n_total_inner_iter;
4461
4462 if (exit_flag == 0)
4463 {
4464 mfem::out << ": converged" << endl;
4465 }
4466 else
4467 {
4468 mfem::out << endl << "Warning, NNLS convergence stalled: "
4469 << (exit_flag == 2) << endl;
4470 mfem::out << "resErr = " << rmax << " vs tol = " << const_tol_
4471 << "; mumax = " << mumax << " vs tol = " << mu_tol << endl;
4472 }
4473 }
4474}
4475#endif // MFEM_USE_LAPACK
4476
4477}
const T * HostRead() const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), false).
Definition array.hpp:341
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:758
int Size() const
Return the logical size of the array.
Definition array.hpp:147
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
Definition array.hpp:132
void MakeRef(T *data_, int size_, bool own_data=false)
Make this Array a reference to a pointer.
Definition array.hpp:943
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
AuxSpaceSmoother(const HypreParMatrix &op, HypreParMatrix *aux_map, bool op_is_symmetric=true, bool own_aux_map=false)
Definition solvers.cpp:3630
BiCGSTAB method.
Definition solvers.hpp:621
void Mult(const Vector &b, Vector &x) const override
Iterative solution of the linear system using the BiCGSTAB method.
Definition solvers.cpp:1456
void SetOperator(const Operator &op) override
Set/update the solver for the given operator.
Definition solvers.hpp:634
A "square matrix" operator for the associated FE space and BLFIntegrators The sum of all the BLFInteg...
FiniteElementSpace * FESpace()
Return the FE space associated with the BilinearForm.
Reordering
The reordering method used by the BlockILU factorization.
Definition solvers.hpp:1019
BlockILU(int block_size_, Reordering reordering_=Reordering::MINIMUM_DISCARDED_FILL, int k_fill_=0)
Definition solvers.cpp:2865
void Mult(const Vector &b, Vector &x) const
Solve the system LUx = b, where L and U are the block ILU factors.
Definition solvers.cpp:3114
void SetOperator(const Operator &op)
Definition solvers.cpp:2883
Conjugate gradient method.
Definition solvers.hpp:538
void Mult(const Vector &b, Vector &x) const override
Iterative solution of the linear system using the Conjugate Gradient method.
Definition solvers.cpp:751
void SetOperator(const Operator &op) override
Set/update the solver for the given operator.
Definition solvers.hpp:551
void UpdateVectors()
Definition solvers.cpp:737
Data type dense matrix using column-major storage.
Definition densemat.hpp:24
void AddMult_a(real_t a, const Vector &x, Vector &y) const
y += a * A.x
Definition densemat.cpp:261
void Mult(const real_t *x, real_t *y) const
Matrix vector multiplication.
Definition densemat.cpp:125
void MultTranspose(const real_t *x, real_t *y) const
Multiply a vector with the transpose matrix.
Definition densemat.cpp:172
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Definition densemat.hpp:108
void Add(const real_t c, const DenseMatrix &A)
Adds the matrix A multiplied by the number c to the matrix.
Definition densemat.cpp:609
void SetSize(int i, int j, int k, MemoryType mt_=MemoryType::PRESERVE)
real_t * GetData(int k)
void Mult(const Vector &x, Vector &y) const override
Direct solution of the block diagonal linear system.
Definition solvers.cpp:3491
DirectSubBlockSolver(const SparseMatrix &A, const SparseMatrix &block_dof)
Definition solvers.cpp:3476
void Mult(const Vector &b, Vector &x) const override
Iterative solution of the linear system using the FGMRES method.
Definition solvers.cpp:1209
virtual int GetTrueVSize() const
Return the number of vector true (conforming) dofs.
Definition fespace.hpp:851
GMRES method.
Definition solvers.hpp:572
void SetKDim(int dim)
Set the number of iteration to perform between restarts, default is 50.
Definition solvers.hpp:584
void Mult(const Vector &b, Vector &x) const override
Iterative solution of the linear system using the GMRES method.
Definition solvers.cpp:1016
Wrapper for hypre's ParCSR matrix class.
Definition hypre.hpp:408
void GetDiag(Vector &diag) const
Get the local diagonal of the matrix.
Definition hypre.cpp:1605
Parallel smoothers in hypre.
Definition hypre.hpp:1046
virtual void MonitorSolution(int it, real_t norm, const Vector &x, bool final)
Monitor the solution vector x.
Definition solvers.hpp:70
const class IterativeSolver * iter_solver
The last IterativeSolver to which this controller was attached.
Definition solvers.hpp:40
virtual void MonitorResidual(int it, real_t norm, const Vector &r, bool final)
Monitor the solution vector r.
Definition solvers.hpp:64
Abstract base class for iterative solver.
Definition solvers.hpp:86
real_t abs_tol
Absolute tolerance.
Definition solvers.hpp:177
real_t rel_tol
Relative tolerance.
Definition solvers.hpp:174
PrintLevel print_options
Output behavior for the iterative solver.
Definition solvers.hpp:157
real_t GetFinalNorm() const
Returns the final residual norm after termination of the solver during the last call to Mult().
Definition solvers.hpp:295
void SetOperator(const Operator &op) override
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.cpp:180
virtual real_t Dot(const Vector &x, const Vector &y) const
Return the standard (l2, i.e., Euclidean) inner product of x and y.
Definition solvers.cpp:56
const Operator * oper
Definition solvers.hpp:140
void SetRelTol(real_t rtol)
Definition solvers.hpp:229
virtual void SetPreconditioner(Solver &pr)
This should be called before SetOperator.
Definition solvers.cpp:174
int print_level
(DEPRECATED) Legacy print level definition, which is left for compatibility with custom iterative sol...
Definition solvers.hpp:150
int GetNumIterations() const
Returns the number of iterations taken during the last call to Mult()
Definition solvers.hpp:280
virtual void SetPrintLevel(int print_lvl)
Legacy method to set the level of verbosity of the solver output.
Definition solvers.cpp:72
int max_iter
Limit for the number of iterations the solver is allowed to do.
Definition solvers.hpp:171
void SetMaxIter(int max_it)
Definition solvers.hpp:231
bool Monitor(int it, real_t norm, const Vector &r, const Vector &x, bool final=false) const
Monitor both the residual r and the solution x.
Definition solvers.cpp:191
MPI_Comm GetComm() const
Return the associated MPI communicator, or MPI_COMM_NULL if no communicator is set.
Definition solvers.hpp:322
static int GuessLegacyPrintLevel(PrintLevel)
Use some heuristics to guess a legacy print level corresponding to the given PrintLevel.
Definition solvers.cpp:150
IterativeSolverMonitor * monitor
Definition solvers.hpp:142
bool GetConverged() const
Returns true if the last call to Mult() converged successfully.
Definition solvers.hpp:282
PrintLevel FromLegacyPrintLevel(int)
Convert a legacy print level integer to a PrintLevel object.
Definition solvers.cpp:115
void SetAbsTol(real_t atol)
Definition solvers.hpp:230
real_t Norm(const Vector &x) const
Return the inner product norm of x, using the inner product defined by Dot()
Definition solvers.hpp:200
virtual ~KLUSolver()
Definition solvers.cpp:3466
void MultTranspose(const Vector &b, Vector &x) const override
Direct solution of the transposed linear system using KLU.
Definition solvers.cpp:3452
klu_symbolic * Symbolic
Definition solvers.hpp:1168
SparseMatrix * mat
Definition solvers.hpp:1167
klu_common Common
Definition solvers.hpp:1192
klu_numeric * Numeric
Definition solvers.hpp:1169
void Mult(const Vector &b, Vector &x) const override
Direct solution of the linear system using KLU.
Definition solvers.cpp:3438
void SetOperator(const Operator &op) override
Set/update the solver for the given operator.
Definition solvers.cpp:3407
Array< Vector * > skArray
Definition solvers.hpp:780
void Mult(const Vector &b, Vector &x) const override
Solve the nonlinear system with right-hand side b.
Definition solvers.cpp:2103
Array< Vector * > ykArray
Definition solvers.hpp:780
void Solve(int m, int n, real_t *X) const override
MINRES method.
Definition solvers.hpp:653
void Mult(const Vector &b, Vector &x) const override
Iterative solution of the linear system using the MINRES method.
Definition solvers.cpp:1705
void SetOperator(const Operator &op) override
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.cpp:1680
void SetPreconditioner(Solver &pr) override
This should be called before SetOperator.
Definition solvers.hpp:665
void Solve(const Vector &rhs_lb, const Vector &rhs_ub, Vector &soln) const
Solve the NNLS problem. Specifically, we find a vector soln, such that rhs_lb < mat*soln < rhs_ub is ...
Definition solvers.cpp:3776
QRresidualMode
Enumerated types of QRresidual mode. Options are 'off': the residual is calculated normally,...
Definition solvers.hpp:1378
void Mult(const Vector &w, Vector &sol) const override
Compute the non-negative least squares solution to the underdetermined system.
Definition solvers.cpp:3728
void SetQRResidualMode(const QRresidualMode qr_residual_mode)
Set the residual calculation mode for the NNLS solver. See QRresidualMode enum above for details.
Definition solvers.cpp:3686
void SetOperator(const Operator &op) override
The operator must be a DenseMatrix.
Definition solvers.cpp:3672
void NormalizeConstraints(Vector &rhs_lb, Vector &rhs_ub) const
Normalize the constraints such that the tolerances for each constraint (i.e. (UB - LB)/2) are equal....
Definition solvers.cpp:3695
void SetAdaptiveLinRtol(const int type=2, const real_t rtol0=0.5, const real_t rtol_max=0.9, const real_t alpha=0.5 *(1.0+sqrt(5.0)), const real_t gamma=1.0)
Enable adaptive linear solver relative tolerance algorithm.
Definition solvers.cpp:2022
void AdaptiveLinRtolPreSolve(const Vector &x, const int it, const real_t fnorm) const
Method for the adaptive linear solver rtol invoked before the linear solve.
Definition solvers.cpp:2035
Operator * grad
Definition solvers.hpp:695
virtual real_t ComputeScalingFactor(const Vector &x, const Vector &b) const
This method can be overloaded in derived classes to implement line search algorithms.
Definition solvers.hpp:749
void Mult(const Vector &b, Vector &x) const override
Solve the nonlinear system with right-hand side b.
Definition solvers.cpp:1912
void AdaptiveLinRtolPostSolve(const Vector &x, const Vector &b, const int it, const real_t fnorm) const
Method for the adaptive linear solver rtol invoked after the linear solve.
Definition solvers.cpp:2082
virtual void ProcessNewState(const Vector &x) const
This method can be overloaded in derived classes to perform computations that need knowledge of the n...
Definition solvers.hpp:754
void SetOperator(const Operator &op) override
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.cpp:1898
Chebyshev accelerated smoothing with given vector, no matrix necessary.
Definition solvers.hpp:414
void Mult(const Vector &x, Vector &y) const
Approach the solution of the linear system by applying Chebyshev smoothing.
Definition solvers.cpp:502
OperatorChebyshevSmoother(const Operator &oper_, const Vector &d, const Array< int > &ess_tdof_list, int order, real_t max_eig_estimate)
Definition solvers.cpp:338
OpType * As() const
Return the Operator pointer statically cast to a specified OpType. Similar to the method Get().
Definition handle.hpp:104
void Reset(OpType *A, bool own_A=true)
Reset the OperatorHandle to the given OpType pointer, A.
Definition handle.hpp:145
Jacobi smoothing for a given bilinear form (no matrix necessary).
Definition solvers.hpp:333
void Setup(const Vector &diag)
Definition solvers.cpp:284
OperatorJacobiSmoother(const real_t damping=1.0)
Default constructor: the diagonal will be computed by subsequent calls to SetOperator() using the Ope...
Definition solvers.cpp:207
void SetOperator(const Operator &op)
Recompute the diagonal using the method AssembleDiagonal of the given new Operator,...
Definition solvers.cpp:248
void Mult(const Vector &x, Vector &y) const
Approach the solution of the linear system by applying Jacobi smoothing.
Definition solvers.cpp:310
Abstract operator.
Definition operator.hpp:25
virtual MemoryClass GetMemoryClass() const
Return the MemoryClass preferred by the Operator.
Definition operator.hpp:86
int width
Dimension of the input / number of columns in the matrix.
Definition operator.hpp:28
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition operator.hpp:66
int height
Dimension of the output / number of rows in the matrix.
Definition operator.hpp:27
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
int NumCols() const
Get the number of columns (size of input) of the Operator. Synonym with Width().
Definition operator.hpp:75
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
Definition operator.hpp:72
virtual void AssembleDiagonal(Vector &diag) const
Computes the diagonal entries into diag. Typically, this operation only makes sense for linear Operat...
Definition operator.hpp:131
int NumRows() const
Get the number of rows (size of output) of the Operator. Synonym with Height().
Definition operator.hpp:69
virtual void MultTranspose(const Vector &x, Vector &y) const
Action of the transpose operator: y=A^t(x). The default behavior in class Operator is to generate an ...
Definition operator.hpp:93
virtual Operator & GetGradient(const Vector &x) const
Evaluate the gradient operator at the point x. The default behavior in class Operator is to generate ...
Definition operator.hpp:122
const Operator * C
Not owned, some can remain unused (NULL).
Definition solvers.hpp:878
void SetSolutionBounds(const Vector &xl, const Vector &xh)
Definition solvers.cpp:2449
void SetEqualityConstraint(const Vector &c)
Definition solvers.cpp:2431
int GetNumConstraints() const
Definition solvers.cpp:2457
const Operator * D
Definition solvers.hpp:878
OptimizationProblem(int insize, const Operator *C_, const Operator *D_)
In parallel, insize is the number of the local true dofs.
Definition solvers.cpp:2421
void SetInequalityConstraint(const Vector &dl, const Vector &dh)
Definition solvers.cpp:2439
const OptimizationProblem * problem
Definition solvers.hpp:918
void SetOperator(const Operator &op) override
Set the Operator that is the OrthoSolver is to invert (approximately).
Definition solvers.cpp:3561
void SetSolver(Solver &s)
Set the solver used by the OrthoSolver.
Definition solvers.cpp:3552
void Mult(const Vector &b, Vector &x) const override
Perform the action of the OrthoSolver: P * solver * P where P is the projection to the subspace of ve...
Definition solvers.cpp:3571
PowerMethod helper class to estimate the largest eigenvalue of an operator using the iterative power ...
real_t EstimateLargestEigenvalue(Operator &opr, Vector &v0, int numSteps=10, real_t tolerance=1e-8, int seed=12345)
Returns an estimate of the largest eigenvalue of the operator opr using the iterative power method.
Definition operator.cpp:781
General product operator: x -> (A*B)(x) = A(B(x)).
Definition operator.hpp:894
void Mult(const Vector &x, Vector &y) const override
Solution of the linear system using a product of subsolvers.
Definition solvers.cpp:3506
void MultTranspose(const Vector &x, Vector &y) const override
Solution of the transposed linear system using a product of subsolvers.
Definition solvers.cpp:3523
void MonitorResidual(int it, real_t norm, const Vector &r, bool final) override
Monitor the solution vector r.
Definition solvers.cpp:3165
const Array< int > * ess_dofs_list
Not owned.
Definition solvers.hpp:1106
void Mult(const Vector &xt, Vector &x) const override
Definition solvers.cpp:2499
void SetBounds(const Vector &lo_, const Vector &hi_)
Definition solvers.cpp:2478
void SetLinearConstraint(const Vector &w_, real_t a_)
Definition solvers.cpp:2484
void print_iteration(int it, real_t r, real_t l) const
Definition solvers.cpp:2490
void SetOptimizationProblem(const OptimizationProblem &prob) override
Definition solvers.cpp:2465
real_t solve(real_t l, const Vector &xt, Vector &x, int &nclip) const
Solve QP at fixed lambda.
Definition solvers.hpp:954
Stationary linear iteration: x <- x + B (b - A x)
Definition solvers.hpp:502
void Mult(const Vector &b, Vector &x) const override
Iterative solution of the linear system using Stationary Linear Iteration.
Definition solvers.cpp:552
void SetOperator(const Operator &op) override
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.hpp:515
void UpdateVectors()
Definition solvers.cpp:543
Base class for solvers.
Definition operator.hpp:780
bool iterative_mode
If true, use the second argument of Mult() as an initial guess.
Definition operator.hpp:783
virtual void SetOperator(const Operator &op)=0
Set/update the solver for the given operator.
Data type sparse matrix.
Definition sparsemat.hpp:51
const int * HostReadJ() const
void GetDiag(Vector &d) const
Returns the Diagonal of A.
void MultTranspose(const Vector &x, Vector &y) const override
Multiply a vector with the transposed matrix. y = At * x.
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
bool Finalized() const
Returns whether or not CSR format has been finalized.
const real_t * HostReadData() const
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
int RowSize(const int i) const
Returns the number of elements in row i.
real_t * HostReadWriteData()
void Mult(const Vector &x, Vector &y) const override
Matrix vector multiplication.
real_t * GetData()
Return the element data, i.e. the array A.
void SortColumnIndices()
Sort the column indices corresponding to each row.
int * GetJ()
Return the array J.
int * GetI()
Return the array I.
const int * HostReadI() const
SuiteSparse_long * AJ
Definition solvers.hpp:1126
real_t Control[UMFPACK_CONTROL]
Definition solvers.hpp:1131
SparseMatrix * mat
Definition solvers.hpp:1124
void SetOperator(const Operator &op) override
Factorize the given Operator op which must be a SparseMatrix.
Definition solvers.cpp:3218
SuiteSparse_long * AI
Definition solvers.hpp:1126
void MultTranspose(const Vector &b, Vector &x) const override
Direct solution of the transposed linear system using UMFPACK.
Definition solvers.cpp:3348
real_t Info[UMFPACK_INFO]
Definition solvers.hpp:1132
void Mult(const Vector &b, Vector &x) const override
Direct solution of the linear system using UMFPACK.
Definition solvers.cpp:3313
virtual ~UMFPackSolver()
Definition solvers.cpp:3385
Vector data type.
Definition vector.hpp:82
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:498
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetDataAndSize(real_t *d, int s)
Set the Vector data and size.
Definition vector.hpp:183
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Definition vector.cpp:745
real_t Norml2() const
Returns the l2 norm of the vector.
Definition vector.cpp:931
Vector & Set(const real_t a, const Vector &x)
(*this) = a * x
Definition vector.cpp:337
real_t Max() const
Returns the maximal element of the vector.
Definition vector.cpp:1164
int Size() const
Returns the size of the vector.
Definition vector.hpp:226
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition vector.hpp:144
real_t Sum() const
Return the sum of the vector entries.
Definition vector.cpp:1204
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
virtual real_t * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:506
real_t * GetData() const
Return a pointer to the beginning of the Vector data.
Definition vector.hpp:235
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:514
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
Definition vector.cpp:653
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:502
Vector & Add(const real_t a, const Vector &Va)
(*this) += a * Va
Definition vector.cpp:322
Vector beta
const real_t alpha
Definition ex15.cpp:369
real_t omega
Definition ex25.cpp:142
real_t mu
Definition ex25.cpp:140
prob_type prob
Definition ex25.cpp:156
void trans(const Vector &u, Vector &x)
Definition ex27.cpp:412
real_t b
Definition lissajous.cpp:42
real_t delta
Definition lissajous.cpp:43
real_t a
Definition lissajous.cpp:41
int BiCGSTAB(const Operator &A, Vector &x, const Vector &b, Solver &M, int &max_iter, real_t &tol, real_t atol, int printit)
BiCGSTAB method. (tolerances are squared)
Definition solvers.cpp:1657
void Swap(Array< T > &, Array< T > &)
Definition array.hpp:672
void mfem_error(const char *msg)
Definition error.cpp:154
void AddMult_a(real_t alpha, const DenseMatrix &b, const DenseMatrix &c, DenseMatrix &a)
Matrix matrix multiplication. A += alpha * B * C.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:391
bool IsFinite(const real_t &val)
Definition vector.hpp:527
real_t InnerProduct(HypreParVector *x, HypreParVector *y)
Definition hypre.cpp:468
int GMRES(const Operator &A, Vector &x, const Vector &b, Solver &M, int &max_iter, int m, real_t &tol, real_t atol, int printit)
GMRES method. (tolerances are squared)
Definition solvers.cpp:1403
void ApplyPlaneRotation(real_t &dx, real_t &dy, real_t &cs, real_t &sn)
Definition solvers.cpp:988
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
int aGMRES(const Operator &A, Vector &x, const Vector &b, const Operator &M, int &max_iter, int m_max, int m_min, int m_step, real_t cf, real_t &tol, real_t &atol, int printit)
Definition solvers.cpp:2259
void SLI(const Operator &A, const Vector &b, Vector &x, int print_iter, int max_num_iter, real_t RTOLERANCE, real_t ATOLERANCE)
Stationary linear iteration. (tolerances are squared)
Definition solvers.cpp:705
void PCG(const Operator &A, Solver &B, const Vector &b, Vector &x, int print_iter, int max_num_iter, real_t RTOLERANCE, real_t ATOLERANCE)
Preconditioned conjugate gradient method. (tolerances are squared)
Definition solvers.cpp:949
void CG(const Operator &A, const Vector &b, Vector &x, int print_iter, int max_num_iter, real_t RTOLERANCE, real_t ATOLERANCE)
Conjugate gradient method. (tolerances are squared)
Definition solvers.cpp:934
void GeneratePlaneRotation(real_t &dx, real_t &dy, real_t &cs, real_t &sn)
Definition solvers.cpp:966
void MINRES(const Operator &A, const Vector &b, Vector &x, int print_it, int max_it, real_t rtol, real_t atol)
MINRES method without preconditioner. (tolerances are squared)
Definition solvers.cpp:1870
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition vector.cpp:547
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
void MinimumDiscardedFillOrdering(SparseMatrix &C, Array< int > &p)
Definition solvers.cpp:2757
void forall(int N, lambda &&body)
Definition forall.hpp:753
void Update(Vector &x, int k, DenseMatrix &h, Vector &s, Array< Vector * > &v)
Definition solvers.cpp:995
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
real_t p(const Vector &x, real_t t)
Settings for the output behavior of the IterativeSolver.
Definition solvers.hpp:98
bool errors
If a fatal problem has been detected the failure will be reported to mfem::err.
Definition solvers.hpp:101
bool iterations
Detailed information about each iteration will be reported to mfem::out.
Definition solvers.hpp:107
bool warnings
If a non-fatal problem has been detected some context-specific information will be reported to mfem::...
Definition solvers.hpp:104
bool first_and_last
Information about the first and last iteration will be printed to mfem::out.
Definition solvers.hpp:113
bool summary
A summary of the solver process will be reported after the last iteration to mfem::out.
Definition solvers.hpp:110
Helper struct to convert a C++ type to an MPI type.