MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
solvers.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "linalg.hpp"
14#include "../general/forall.hpp"
17#include <iostream>
18#include <iomanip>
19#include <algorithm>
20#include <cmath>
21#include <set>
22
23namespace mfem
24{
25
26using namespace std;
27
29 : Solver(0, true)
30{
31 oper = NULL;
32 prec = NULL;
33 max_iter = 10;
34 rel_tol = abs_tol = 0.0;
35#ifdef MFEM_USE_MPI
36 dot_prod_type = 0;
37#endif
38}
39
40#ifdef MFEM_USE_MPI
41
43 : Solver(0, true)
44{
45 oper = NULL;
46 prec = NULL;
47 max_iter = 10;
48 rel_tol = abs_tol = 0.0;
49 dot_prod_type = 1;
50 comm = comm_;
51}
52
53#endif // MFEM_USE_MPI
54
55real_t IterativeSolver::Dot(const Vector &x, const Vector &y) const
56{
57#ifndef MFEM_USE_MPI
58 return (x * y);
59#else
60 if (dot_prod_type == 0)
61 {
62 return (x * y);
63 }
64 else
65 {
66 return InnerProduct(comm, x, y);
67 }
68#endif
69}
70
72{
74 int print_level_ = print_lvl;
75
76#ifdef MFEM_USE_MPI
77 if (dot_prod_type != 0)
78 {
79 int rank;
80 MPI_Comm_rank(comm, &rank);
81 if (rank != 0) // Suppress output.
82 {
83 print_level_ = -1;
85 }
86 }
87#endif
88
89 print_level = print_level_;
90}
91
93{
94 print_options = options;
95
96 int derived_print_level = GuessLegacyPrintLevel(options);
97
98#ifdef MFEM_USE_MPI
99 if (dot_prod_type != 0)
100 {
101 int rank;
102 MPI_Comm_rank(comm, &rank);
103 if (rank != 0)
104 {
105 derived_print_level = -1;
107 }
108 }
109#endif
110
111 print_level = derived_print_level;
112}
113
115 int print_level_)
116{
117#ifdef MFEM_USE_MPI
118 int rank = 0;
119 if (comm != MPI_COMM_NULL)
120 {
121 MPI_Comm_rank(comm, &rank);
122 }
123#endif
124
125 switch (print_level_)
126 {
127 case -1:
128 return PrintLevel();
129 case 0:
130 return PrintLevel().Errors().Warnings();
131 case 1:
132 return PrintLevel().Errors().Warnings().Iterations();
133 case 2:
134 return PrintLevel().Errors().Warnings().Summary();
135 case 3:
136 return PrintLevel().Errors().Warnings().FirstAndLast();
137 default:
138#ifdef MFEM_USE_MPI
139 if (rank == 0)
140#endif
141 {
142 MFEM_WARNING("Unknown print level " << print_level_ <<
143 ". Defaulting to level 0.");
144 }
145 return PrintLevel().Errors().Warnings();
146 }
147}
148
150{
151 if (print_options_.iterations)
152 {
153 return 1;
154 }
155 else if (print_options_.first_and_last)
156 {
157 return 3;
158 }
159 else if (print_options_.summary)
160 {
161 return 2;
162 }
163 else if (print_options_.errors && print_options_.warnings)
164 {
165 return 0;
166 }
167 else
168 {
169 return -1;
170 }
171}
172
174{
175 prec = &pr;
176 prec->iterative_mode = false;
177}
178
180{
181 oper = &op;
182 height = op.Height();
183 width = op.Width();
184 if (prec)
185 {
187 }
188}
189
190void IterativeSolver::Monitor(int it, real_t norm, const Vector& r,
191 const Vector& x, bool final) const
192{
193 if (monitor != nullptr)
194 {
195 monitor->MonitorResidual(it, norm, r, final);
196 monitor->MonitorSolution(it, norm, x, final);
197 }
198}
199
201 : damping(dmpng),
202 ess_tdof_list(nullptr),
203 oper(nullptr),
204 allow_updates(true)
205{ }
206
208 const Array<int> &ess_tdofs,
209 const real_t dmpng)
210 :
211 Solver(a.FESpace()->GetTrueVSize()),
212 dinv(height),
213 damping(dmpng),
214 ess_tdof_list(&ess_tdofs),
215 residual(height),
216 allow_updates(false)
217{
218 Vector &diag(residual);
219 a.AssembleDiagonal(diag);
220 // 'a' cannot be used for iterative_mode == true because its size may be
221 // different.
222 oper = nullptr;
223 Setup(diag);
224}
225
227 const Array<int> &ess_tdofs,
228 const real_t dmpng)
229 :
230 Solver(d.Size()),
231 dinv(height),
232 damping(dmpng),
233 ess_tdof_list(&ess_tdofs),
234 residual(height),
235 oper(NULL),
236 allow_updates(false)
237{
238 Setup(d);
239}
240
242{
243 if (!allow_updates)
244 {
245 // original behavior of this method
246 oper = &op; return;
247 }
248
249 // Treat (Par)BilinearForm objects as a special case since their
250 // AssembleDiagonal method returns the true-dof diagonal whereas the form
251 // itself may act as an ldof operator. This is for compatibility with the
252 // constructor that takes a BilinearForm parameter.
253 const BilinearForm *blf = dynamic_cast<const BilinearForm *>(&op);
254 if (blf)
255 {
256 // 'a' cannot be used for iterative_mode == true because its size may be
257 // different.
258 oper = nullptr;
259 height = width = blf->FESpace()->GetTrueVSize();
260 }
261 else
262 {
263 oper = &op;
264 height = op.Height();
265 width = op.Width();
266 MFEM_ASSERT(height == width, "not a square matrix!");
267 // ess_tdof_list is only used with BilinearForm
268 ess_tdof_list = nullptr;
269 }
270 dinv.SetSize(height);
271 residual.SetSize(height);
272 Vector &diag(residual);
273 op.AssembleDiagonal(diag);
274 Setup(diag);
275}
276
278{
279 residual.UseDevice(true);
280 const real_t delta = damping;
281 auto D = diag.Read();
282 auto DI = dinv.Write();
283 const bool use_abs_diag_ = use_abs_diag;
284 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
285 {
286 if (D[i] == 0.0)
287 {
288 MFEM_ABORT_KERNEL("Zero diagonal entry in OperatorJacobiSmoother");
289 }
290 if (!use_abs_diag_) { DI[i] = delta / D[i]; }
291 else { DI[i] = delta / std::abs(D[i]); }
292 });
293 if (ess_tdof_list && ess_tdof_list->Size() > 0)
294 {
295 auto I = ess_tdof_list->Read();
296 mfem::forall(ess_tdof_list->Size(), [=] MFEM_HOST_DEVICE (int i)
297 {
298 DI[I[i]] = delta;
299 });
300 }
301}
302
304{
305 // For empty MPI ranks, height may be 0:
306 // MFEM_VERIFY(Height() > 0, "The diagonal hasn't been computed.");
307 MFEM_ASSERT(x.Size() == Width(), "invalid input vector");
308 MFEM_ASSERT(y.Size() == Height(), "invalid output vector");
309
310 if (iterative_mode)
311 {
312 MFEM_VERIFY(oper, "iterative_mode == true requires the forward operator");
313 oper->Mult(y, residual); // r = A y
314 subtract(x, residual, residual); // r = x - A y
315 }
316 else
317 {
318 residual = x;
319 y.UseDevice(true);
320 y = 0.0;
321 }
322 auto DI = dinv.Read();
323 auto R = residual.Read();
324 auto Y = y.ReadWrite();
325 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
326 {
327 Y[i] += DI[i] * R[i];
328 });
329}
330
332 const Vector &d,
333 const Array<int>& ess_tdofs,
334 int order_, real_t max_eig_estimate_)
335 :
336 Solver(d.Size()),
337 order(order_),
338 max_eig_estimate(max_eig_estimate_),
339 N(d.Size()),
340 dinv(N),
341 diag(d),
342 coeffs(order),
343 ess_tdof_list(ess_tdofs),
344 residual(N),
345 oper(&oper_) { Setup(); }
346
347#ifdef MFEM_USE_MPI
349 const Vector &d,
350 const Array<int>& ess_tdofs,
351 int order_, MPI_Comm comm, int power_iterations, real_t power_tolerance)
352#else
354 const Vector &d,
355 const Array<int>& ess_tdofs,
356 int order_, int power_iterations, real_t power_tolerance)
357#endif
358 : Solver(d.Size()),
359 order(order_),
360 N(d.Size()),
361 dinv(N),
362 diag(d),
363 coeffs(order),
364 ess_tdof_list(ess_tdofs),
365 residual(N),
366 oper(&oper_)
367{
368 OperatorJacobiSmoother invDiagOperator(diag, ess_tdofs, 1.0);
369 ProductOperator diagPrecond(&invDiagOperator, oper, false, false);
370
371#ifdef MFEM_USE_MPI
372 PowerMethod powerMethod(comm);
373#else
374 PowerMethod powerMethod;
375#endif
376 Vector ev(oper->Width());
377 max_eig_estimate = powerMethod.EstimateLargestEigenvalue(diagPrecond, ev,
378 power_iterations, power_tolerance);
379
380 Setup();
381}
382
384 const Vector &d,
385 const Array<int>& ess_tdofs,
386 int order_, real_t max_eig_estimate_)
387 : OperatorChebyshevSmoother(*oper_, d, ess_tdofs, order_, max_eig_estimate_) { }
388
389#ifdef MFEM_USE_MPI
391 const Vector &d,
392 const Array<int>& ess_tdofs,
393 int order_, MPI_Comm comm, int power_iterations, real_t power_tolerance)
394 : OperatorChebyshevSmoother(*oper_, d, ess_tdofs, order_, comm,
395 power_iterations, power_tolerance) { }
396#else
398 const Vector &d,
399 const Array<int>& ess_tdofs,
400 int order_, int power_iterations, real_t power_tolerance)
401 : OperatorChebyshevSmoother(*oper_, d, ess_tdofs, order_, power_iterations,
402 power_tolerance) { }
403#endif
404
406{
407 // Invert diagonal
408 residual.UseDevice(true);
409 auto D = diag.Read();
410 auto X = dinv.Write();
411 mfem::forall(N, [=] MFEM_HOST_DEVICE (int i) { X[i] = 1.0 / D[i]; });
412 auto I = ess_tdof_list.Read();
413 mfem::forall(ess_tdof_list.Size(), [=] MFEM_HOST_DEVICE (int i)
414 {
415 X[I[i]] = 1.0;
416 });
417
418 // Set up Chebyshev coefficients
419 // For reference, see e.g., Parallel multigrid smoothing: polynomial versus
420 // Gauss-Seidel by Adams et al.
421 real_t upper_bound = 1.2 * max_eig_estimate;
422 real_t lower_bound = 0.3 * max_eig_estimate;
423 real_t theta = 0.5 * (upper_bound + lower_bound);
424 real_t delta = 0.5 * (upper_bound - lower_bound);
425
426 switch (order-1)
427 {
428 case 0:
429 {
430 coeffs[0] = 1.0 / theta;
431 break;
432 }
433 case 1:
434 {
435 real_t tmp_0 = 1.0/(pow(delta, 2) - 2*pow(theta, 2));
436 coeffs[0] = -4*theta*tmp_0;
437 coeffs[1] = 2*tmp_0;
438 break;
439 }
440 case 2:
441 {
442 real_t tmp_0 = 3*pow(delta, 2);
443 real_t tmp_1 = pow(theta, 2);
444 real_t tmp_2 = 1.0/(-4*pow(theta, 3) + theta*tmp_0);
445 coeffs[0] = tmp_2*(tmp_0 - 12*tmp_1);
446 coeffs[1] = 12/(tmp_0 - 4*tmp_1);
447 coeffs[2] = -4*tmp_2;
448 break;
449 }
450 case 3:
451 {
452 real_t tmp_0 = pow(delta, 2);
453 real_t tmp_1 = pow(theta, 2);
454 real_t tmp_2 = 8*tmp_0;
455 real_t tmp_3 = 1.0/(pow(delta, 4) + 8*pow(theta, 4) - tmp_1*tmp_2);
456 coeffs[0] = tmp_3*(32*pow(theta, 3) - 16*theta*tmp_0);
457 coeffs[1] = tmp_3*(-48*tmp_1 + tmp_2);
458 coeffs[2] = 32*theta*tmp_3;
459 coeffs[3] = -8*tmp_3;
460 break;
461 }
462 case 4:
463 {
464 real_t tmp_0 = 5*pow(delta, 4);
465 real_t tmp_1 = pow(theta, 4);
466 real_t tmp_2 = pow(theta, 2);
467 real_t tmp_3 = pow(delta, 2);
468 real_t tmp_4 = 60*tmp_3;
469 real_t tmp_5 = 20*tmp_3;
470 real_t tmp_6 = 1.0/(16*pow(theta, 5) - pow(theta, 3)*tmp_5 + theta*tmp_0);
471 real_t tmp_7 = 160*tmp_2;
472 real_t tmp_8 = 1.0/(tmp_0 + 16*tmp_1 - tmp_2*tmp_5);
473 coeffs[0] = tmp_6*(tmp_0 + 80*tmp_1 - tmp_2*tmp_4);
474 coeffs[1] = tmp_8*(tmp_4 - tmp_7);
475 coeffs[2] = tmp_6*(-tmp_5 + tmp_7);
476 coeffs[3] = -80*tmp_8;
477 coeffs[4] = 16*tmp_6;
478 break;
479 }
480 default:
481 MFEM_ABORT("Chebyshev smoother not implemented for order = " << order);
482 }
483}
484
486{
487 if (iterative_mode)
488 {
489 MFEM_ABORT("Chebyshev smoother not implemented for iterative mode");
490 }
491
492 if (!oper)
493 {
494 MFEM_ABORT("Chebyshev smoother requires operator");
495 }
496
497 residual = x;
498 helperVector.SetSize(x.Size());
499
500 y.UseDevice(true);
501 y = 0.0;
502
503 for (int k = 0; k < order; ++k)
504 {
505 // Apply
506 if (k > 0)
507 {
508 oper->Mult(residual, helperVector);
509 residual = helperVector;
510 }
511
512 // Scale residual by inverse diagonal
513 const int n = N;
514 auto Dinv = dinv.Read();
515 auto R = residual.ReadWrite();
516 mfem::forall(n, [=] MFEM_HOST_DEVICE (int i) { R[i] *= Dinv[i]; });
517
518 // Add weighted contribution to y
519 auto Y = y.ReadWrite();
520 auto C = coeffs.Read();
521 mfem::forall(n, [=] MFEM_HOST_DEVICE (int i) { Y[i] += C[k] * R[i]; });
522 }
523}
524
526{
527 r.SetSize(width);
528 z.SetSize(width);
529}
530
531void SLISolver::Mult(const Vector &b, Vector &x) const
532{
533 int i;
534
535 // Optimized preconditioned SLI with fixed number of iterations and given
536 // initial guess
537 if (rel_tol == 0.0 && iterative_mode && prec)
538 {
539 for (i = 0; i < max_iter; i++)
540 {
541 oper->Mult(x, r); // r = A x
542 subtract(b, r, r); // r = b - A x
543 prec->Mult(r, z); // z = B r
544 add(x, 1.0, z, x); // x = x + B (b - A x)
545 }
546 converged = true;
547 final_iter = i;
548 return;
549 }
550
551 // Optimized preconditioned SLI with fixed number of iterations and zero
552 // initial guess
553 if (rel_tol == 0.0 && !iterative_mode && prec)
554 {
555 prec->Mult(b, x); // x = B b (initial guess 0)
556 for (i = 1; i < max_iter; i++)
557 {
558 oper->Mult(x, r); // r = A x
559 subtract(b, r, r); // r = b - A x
560 prec->Mult(r, z); // z = B r
561 add(x, 1.0, z, x); // x = x + B (b - A x)
562 }
563 converged = true;
564 final_iter = i;
565 return;
566 }
567
568 // General version of SLI with a relative tolerance, optional preconditioner
569 // and optional initial guess
570 real_t r0, nom, nom0, nomold = 1, cf;
571
572 if (iterative_mode)
573 {
574 oper->Mult(x, r);
575 subtract(b, r, r); // r = b - A x
576 }
577 else
578 {
579 r = b;
580 x = 0.0;
581 }
582
583 if (prec)
584 {
585 prec->Mult(r, z); // z = B r
586 nom0 = nom = sqrt(Dot(z, z));
587 }
588 else
589 {
590 nom0 = nom = sqrt(Dot(r, r));
591 }
592 initial_norm = nom0;
593
595 {
596 mfem::out << " Iteration : " << setw(3) << right << 0 << " ||Br|| = "
597 << nom << (print_options.first_and_last ? " ..." : "") << '\n';
598 }
599
600 r0 = std::max(nom*rel_tol, abs_tol);
601 if (nom <= r0)
602 {
603 converged = true;
604 final_iter = 0;
605 final_norm = nom;
606 return;
607 }
608
609 // start iteration
610 converged = false;
612 for (i = 1; true; )
613 {
614 if (prec) // x = x + B (b - A x)
615 {
616 add(x, 1.0, z, x);
617 }
618 else
619 {
620 add(x, 1.0, r, x);
621 }
622
623 oper->Mult(x, r);
624 subtract(b, r, r); // r = b - A x
625
626 if (prec)
627 {
628 prec->Mult(r, z); // z = B r
629 nom = sqrt(Dot(z, z));
630 }
631 else
632 {
633 nom = sqrt(Dot(r, r));
634 }
635
636 cf = nom/nomold;
637 nomold = nom;
638
639 bool done = false;
640 if (nom < r0)
641 {
642 converged = true;
643 final_iter = i;
644 done = true;
645 }
646
647 if (++i > max_iter)
648 {
649 done = true;
650 }
651
653 {
654 mfem::out << " Iteration : " << setw(3) << right << (i-1)
655 << " ||Br|| = " << setw(11) << left << nom
656 << "\tConv. rate: " << cf << '\n';
657 }
658
659 if (done) { break; }
660 }
661
663 {
664 const auto rf = pow (nom/nom0, 1.0/final_iter);
665 mfem::out << "SLI: Number of iterations: " << final_iter << '\n'
666 << "Conv. rate: " << cf << '\n'
667 << "Average reduction factor: "<< rf << '\n';
668 }
670 {
671 mfem::out << "SLI: No convergence!" << '\n';
672 }
673
674 final_norm = nom;
675}
676
677void SLI(const Operator &A, const Vector &b, Vector &x,
678 int print_iter, int max_num_iter,
679 real_t RTOLERANCE, real_t ATOLERANCE)
680{
681 MFEM_PERF_FUNCTION;
682
683 SLISolver sli;
684 sli.SetPrintLevel(print_iter);
685 sli.SetMaxIter(max_num_iter);
686 sli.SetRelTol(sqrt(RTOLERANCE));
687 sli.SetAbsTol(sqrt(ATOLERANCE));
688 sli.SetOperator(A);
689 sli.Mult(b, x);
690}
691
692void SLI(const Operator &A, Solver &B, const Vector &b, Vector &x,
693 int print_iter, int max_num_iter,
694 real_t RTOLERANCE, real_t ATOLERANCE)
695{
696 MFEM_PERF_FUNCTION;
697
698 SLISolver sli;
699 sli.SetPrintLevel(print_iter);
700 sli.SetMaxIter(max_num_iter);
701 sli.SetRelTol(sqrt(RTOLERANCE));
702 sli.SetAbsTol(sqrt(ATOLERANCE));
703 sli.SetOperator(A);
704 sli.SetPreconditioner(B);
705 sli.Mult(b, x);
706}
707
708
710{
712
713 r.SetSize(width, mt); r.UseDevice(true);
714 d.SetSize(width, mt); d.UseDevice(true);
715 z.SetSize(width, mt); z.UseDevice(true);
716}
717
718void CGSolver::Mult(const Vector &b, Vector &x) const
719{
720 int i;
721 real_t r0, den, nom, nom0, betanom, alpha, beta;
722
723 x.UseDevice(true);
724 if (iterative_mode)
725 {
726 oper->Mult(x, r);
727 subtract(b, r, r); // r = b - A x
728 }
729 else
730 {
731 r = b;
732 x = 0.0;
733 }
734
735 if (prec)
736 {
737 prec->Mult(r, z); // z = B r
738 d = z;
739 }
740 else
741 {
742 d = r;
743 }
744 nom0 = nom = Dot(d, r);
745 if (nom0 >= 0.0) { initial_norm = sqrt(nom0); }
746 MFEM_ASSERT(IsFinite(nom), "nom = " << nom);
748 {
749 mfem::out << " Iteration : " << setw(3) << 0 << " (B r, r) = "
750 << nom << (print_options.first_and_last ? " ...\n" : "\n");
751 }
752 Monitor(0, nom, r, x);
753
754 if (nom < 0.0)
755 {
757 {
758 mfem::out << "PCG: The preconditioner is not positive definite. (Br, r) = "
759 << nom << '\n';
760 }
761 converged = false;
762 final_iter = 0;
763 initial_norm = nom;
764 final_norm = nom;
765 return;
766 }
767 r0 = std::max(nom*rel_tol*rel_tol, abs_tol*abs_tol);
768 if (nom <= r0)
769 {
770 converged = true;
771 final_iter = 0;
772 final_norm = sqrt(nom);
773 return;
774 }
775
776 oper->Mult(d, z); // z = A d
777 den = Dot(z, d);
778 MFEM_ASSERT(IsFinite(den), "den = " << den);
779 if (den <= 0.0)
780 {
781 if (Dot(d, d) > 0.0 && print_options.warnings)
782 {
783 mfem::out << "PCG: The operator is not positive definite. (Ad, d) = "
784 << den << '\n';
785 }
786 if (den == 0.0)
787 {
788 converged = false;
789 final_iter = 0;
790 final_norm = sqrt(nom);
791 return;
792 }
793 }
794
795 // start iteration
796 converged = false;
798 for (i = 1; true; )
799 {
800 alpha = nom/den;
801 add(x, alpha, d, x); // x = x + alpha d
802 add(r, -alpha, z, r); // r = r - alpha A d
803
804 if (prec)
805 {
806 prec->Mult(r, z); // z = B r
807 betanom = Dot(r, z);
808 }
809 else
810 {
811 betanom = Dot(r, r);
812 }
813 MFEM_ASSERT(IsFinite(betanom), "betanom = " << betanom);
814 if (betanom < 0.0)
815 {
817 {
818 mfem::out << "PCG: The preconditioner is not positive definite. (Br, r) = "
819 << betanom << '\n';
820 }
821 converged = false;
822 final_iter = i;
823 break;
824 }
825
827 {
828 mfem::out << " Iteration : " << setw(3) << i << " (B r, r) = "
829 << betanom << std::endl;
830 }
831
832 Monitor(i, betanom, r, x);
833
834 if (betanom <= r0)
835 {
836 converged = true;
837 final_iter = i;
838 break;
839 }
840
841 if (++i > max_iter)
842 {
843 break;
844 }
845
846 beta = betanom/nom;
847 if (prec)
848 {
849 add(z, beta, d, d); // d = z + beta d
850 }
851 else
852 {
853 add(r, beta, d, d);
854 }
855 oper->Mult(d, z); // z = A d
856 den = Dot(d, z);
857 MFEM_ASSERT(IsFinite(den), "den = " << den);
858 if (den <= 0.0)
859 {
860 if (Dot(d, d) > 0.0 && print_options.warnings)
861 {
862 mfem::out << "PCG: The operator is not positive definite. (Ad, d) = "
863 << den << '\n';
864 }
865 if (den == 0.0)
866 {
867 final_iter = i;
868 break;
869 }
870 }
871 nom = betanom;
872 }
874 {
875 mfem::out << " Iteration : " << setw(3) << final_iter << " (B r, r) = "
876 << betanom << '\n';
877 }
879 {
880 mfem::out << "PCG: Number of iterations: " << final_iter << '\n';
881 }
884 {
885 const auto arf = pow (betanom/nom0, 0.5/final_iter);
886 mfem::out << "Average reduction factor = " << arf << '\n';
887 }
889 {
890 mfem::out << "PCG: No convergence!" << '\n';
891 }
892
893 final_norm = sqrt(betanom);
894
895 Monitor(final_iter, final_norm, r, x, true);
896}
897
898void CG(const Operator &A, const Vector &b, Vector &x,
899 int print_iter, int max_num_iter,
900 real_t RTOLERANCE, real_t ATOLERANCE)
901{
902 MFEM_PERF_FUNCTION;
903
904 CGSolver cg;
905 cg.SetPrintLevel(print_iter);
906 cg.SetMaxIter(max_num_iter);
907 cg.SetRelTol(sqrt(RTOLERANCE));
908 cg.SetAbsTol(sqrt(ATOLERANCE));
909 cg.SetOperator(A);
910 cg.Mult(b, x);
911}
912
913void PCG(const Operator &A, Solver &B, const Vector &b, Vector &x,
914 int print_iter, int max_num_iter,
915 real_t RTOLERANCE, real_t ATOLERANCE)
916{
917 MFEM_PERF_FUNCTION;
918
919 CGSolver pcg;
920 pcg.SetPrintLevel(print_iter);
921 pcg.SetMaxIter(max_num_iter);
922 pcg.SetRelTol(sqrt(RTOLERANCE));
923 pcg.SetAbsTol(sqrt(ATOLERANCE));
924 pcg.SetOperator(A);
925 pcg.SetPreconditioner(B);
926 pcg.Mult(b, x);
927}
928
929
931 real_t &cs, real_t &sn)
932{
933 if (dy == 0.0)
934 {
935 cs = 1.0;
936 sn = 0.0;
937 }
938 else if (fabs(dy) > fabs(dx))
939 {
940 real_t temp = dx / dy;
941 sn = 1.0 / sqrt( 1.0 + temp*temp );
942 cs = temp * sn;
943 }
944 else
945 {
946 real_t temp = dy / dx;
947 cs = 1.0 / sqrt( 1.0 + temp*temp );
948 sn = temp * cs;
949 }
950}
951
952inline void ApplyPlaneRotation(real_t &dx, real_t &dy, real_t &cs, real_t &sn)
953{
954 real_t temp = cs * dx + sn * dy;
955 dy = -sn * dx + cs * dy;
956 dx = temp;
957}
958
959inline void Update(Vector &x, int k, DenseMatrix &h, Vector &s,
961{
962 Vector y(s);
963
964 // Backsolve:
965 for (int i = k; i >= 0; i--)
966 {
967 y(i) /= h(i,i);
968 for (int j = i - 1; j >= 0; j--)
969 {
970 y(j) -= h(j,i) * y(i);
971 }
972 }
973
974 for (int j = 0; j <= k; j++)
975 {
976 x.Add(y(j), *v[j]);
977 }
978}
979
980void GMRESSolver::Mult(const Vector &b, Vector &x) const
981{
982 // Generalized Minimum Residual method following the algorithm
983 // on p. 20 of the SIAM Templates book.
984
985 int n = width;
986
987 DenseMatrix H(m+1, m);
988 Vector s(m+1), cs(m+1), sn(m+1);
989 Vector r(n), w(n);
991
992 int i, j, k;
993
994 if (iterative_mode)
995 {
996 oper->Mult(x, r);
997 }
998 else
999 {
1000 x = 0.0;
1001 }
1002
1003 if (prec)
1004 {
1005 if (iterative_mode)
1006 {
1007 subtract(b, r, w);
1008 prec->Mult(w, r); // r = M (b - A x)
1009 }
1010 else
1011 {
1012 prec->Mult(b, r);
1013 }
1014 }
1015 else
1016 {
1017 if (iterative_mode)
1018 {
1019 subtract(b, r, r);
1020 }
1021 else
1022 {
1023 r = b;
1024 }
1025 }
1026 real_t beta = initial_norm = Norm(r); // beta = ||r||
1027 MFEM_ASSERT(IsFinite(beta), "beta = " << beta);
1028
1029 final_norm = std::max(rel_tol*beta, abs_tol);
1030
1031 if (beta <= final_norm)
1032 {
1033 final_norm = beta;
1034 final_iter = 0;
1035 converged = true;
1036 j = 0;
1037 goto finish;
1038 }
1039
1041 {
1042 mfem::out << " Pass : " << setw(2) << 1
1043 << " Iteration : " << setw(3) << 0
1044 << " ||B r|| = " << beta
1045 << (print_options.first_and_last ? " ...\n" : "\n");
1046 }
1047
1048 Monitor(0, beta, r, x);
1049
1050 v.SetSize(m+1, NULL);
1051
1052 for (j = 1; j <= max_iter; )
1053 {
1054 if (v[0] == NULL) { v[0] = new Vector(n); }
1055 v[0]->Set(1.0/beta, r);
1056 s = 0.0; s(0) = beta;
1057
1058 for (i = 0; i < m && j <= max_iter; i++, j++)
1059 {
1060 if (prec)
1061 {
1062 oper->Mult(*v[i], r);
1063 prec->Mult(r, w); // w = M A v[i]
1064 }
1065 else
1066 {
1067 oper->Mult(*v[i], w);
1068 }
1069
1070 for (k = 0; k <= i; k++)
1071 {
1072 H(k,i) = Dot(w, *v[k]); // H(k,i) = w * v[k]
1073 w.Add(-H(k,i), *v[k]); // w -= H(k,i) * v[k]
1074 }
1075
1076 H(i+1,i) = Norm(w); // H(i+1,i) = ||w||
1077 MFEM_ASSERT(IsFinite(H(i+1,i)), "Norm(w) = " << H(i+1,i));
1078 if (v[i+1] == NULL) { v[i+1] = new Vector(n); }
1079 v[i+1]->Set(1.0/H(i+1,i), w); // v[i+1] = w / H(i+1,i)
1080
1081 for (k = 0; k < i; k++)
1082 {
1083 ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
1084 }
1085
1086 GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1087 ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1088 ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
1089
1090 const real_t resid = fabs(s(i+1));
1091 MFEM_ASSERT(IsFinite(resid), "resid = " << resid);
1092
1093 if (resid <= final_norm)
1094 {
1095 Update(x, i, H, s, v);
1096 final_norm = resid;
1097 final_iter = j;
1098 converged = true;
1099 goto finish;
1100 }
1101
1103 {
1104 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1105 << " Iteration : " << setw(3) << j
1106 << " ||B r|| = " << resid << '\n';
1107 }
1108
1109 Monitor(j, resid, r, x);
1110 }
1111
1112 if (print_options.iterations && j <= max_iter)
1113 {
1114 mfem::out << "Restarting..." << '\n';
1115 }
1116
1117 Update(x, i-1, H, s, v);
1118
1119 oper->Mult(x, r);
1120 if (prec)
1121 {
1122 subtract(b, r, w);
1123 prec->Mult(w, r); // r = M (b - A x)
1124 }
1125 else
1126 {
1127 subtract(b, r, r);
1128 }
1129 beta = Norm(r); // beta = ||r||
1130 MFEM_ASSERT(IsFinite(beta), "beta = " << beta);
1131 if (beta <= final_norm)
1132 {
1133 final_norm = beta;
1134 final_iter = j;
1135 converged = true;
1136 goto finish;
1137 }
1138 }
1139
1140 final_norm = beta;
1142 converged = false;
1143
1144finish:
1146 {
1147 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1148 << " Iteration : " << setw(3) << final_iter
1149 << " ||B r|| = " << final_norm << '\n';
1150 }
1152 {
1153 mfem::out << "GMRES: Number of iterations: " << final_iter << '\n';
1154 }
1156 {
1157 mfem::out << "GMRES: No convergence!\n";
1158 }
1159
1160 Monitor(final_iter, final_norm, r, x, true);
1161
1162 for (i = 0; i < v.Size(); i++)
1163 {
1164 delete v[i];
1165 }
1166}
1167
1168void FGMRESSolver::Mult(const Vector &b, Vector &x) const
1169{
1170 DenseMatrix H(m+1,m);
1171 Vector s(m+1), cs(m+1), sn(m+1);
1172 Vector r(b.Size());
1173
1174 int i, j, k;
1175
1176 if (iterative_mode)
1177 {
1178 oper->Mult(x, r);
1179 subtract(b,r,r);
1180 }
1181 else
1182 {
1183 x = 0.;
1184 r = b;
1185 }
1186 real_t beta = initial_norm = Norm(r); // beta = ||r||
1187 MFEM_ASSERT(IsFinite(beta), "beta = " << beta);
1188
1189 final_norm = std::max(rel_tol*beta, abs_tol);
1190
1191 converged = false;
1192
1193 if (beta <= final_norm)
1194 {
1195 final_norm = beta;
1196 final_iter = 0;
1197 converged = true;
1198 return;
1199 }
1200
1202 {
1203 mfem::out << " Pass : " << setw(2) << 1
1204 << " Iteration : " << setw(3) << 0
1205 << " || r || = " << beta
1206 << (print_options.first_and_last ? " ...\n" : "\n");
1207 }
1208
1209 Monitor(0, beta, r, x);
1210
1211 Array<Vector*> v(m+1);
1212 Array<Vector*> z(m+1);
1213 for (i= 0; i<=m; i++)
1214 {
1215 v[i] = NULL;
1216 z[i] = NULL;
1217 }
1218
1219 j = 1;
1220 while (j <= max_iter)
1221 {
1222 if (v[0] == NULL) { v[0] = new Vector(b.Size()); }
1223 (*v[0]) = 0.0;
1224 v[0] -> Add (1.0/beta, r); // v[0] = r / ||r||
1225 s = 0.0; s(0) = beta;
1226
1227 for (i = 0; i < m && j <= max_iter; i++, j++)
1228 {
1229
1230 if (z[i] == NULL) { z[i] = new Vector(b.Size()); }
1231 (*z[i]) = 0.0;
1232
1233 if (prec)
1234 {
1235 prec->Mult(*v[i], *z[i]);
1236 }
1237 else
1238 {
1239 (*z[i]) = (*v[i]);
1240 }
1241 oper->Mult(*z[i], r);
1242
1243 for (k = 0; k <= i; k++)
1244 {
1245 H(k,i) = Dot( r, *v[k]); // H(k,i) = r * v[k]
1246 r.Add(-H(k,i), (*v[k])); // r -= H(k,i) * v[k]
1247 }
1248
1249 H(i+1,i) = Norm(r); // H(i+1,i) = ||r||
1250 if (v[i+1] == NULL) { v[i+1] = new Vector(b.Size()); }
1251 (*v[i+1]) = 0.0;
1252 v[i+1] -> Add (1.0/H(i+1,i), r); // v[i+1] = r / H(i+1,i)
1253
1254 for (k = 0; k < i; k++)
1255 {
1256 ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
1257 }
1258
1259 GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1260 ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
1261 ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
1262
1263 const real_t resid = fabs(s(i+1));
1264 MFEM_ASSERT(IsFinite(resid), "resid = " << resid);
1266 resid <= final_norm))
1267 {
1268 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1269 << " Iteration : " << setw(3) << j
1270 << " || r || = " << resid << endl;
1271 }
1272 Monitor(j, resid, r, x, resid <= final_norm);
1273
1274 if (resid <= final_norm)
1275 {
1276 Update(x, i, H, s, z);
1277 final_norm = resid;
1278 final_iter = j;
1279 converged = true;
1280
1282 {
1283 mfem::out << "FGMRES: Number of iterations: " << final_iter << '\n';
1284 }
1285
1286 for (i= 0; i<=m; i++)
1287 {
1288 if (v[i]) { delete v[i]; }
1289 if (z[i]) { delete z[i]; }
1290 }
1291 return;
1292 }
1293 }
1294
1296 {
1297 mfem::out << "Restarting..." << endl;
1298 }
1299
1300 Update(x, i-1, H, s, z);
1301
1302 oper->Mult(x, r);
1303 subtract(b,r,r);
1304 beta = Norm(r);
1305 MFEM_ASSERT(IsFinite(beta), "beta = " << beta);
1306 if (beta <= final_norm)
1307 {
1308 converged = true;
1309
1310 break;
1311 }
1312 }
1313
1314 // Clean buffers up
1315 for (i = 0; i <= m; i++)
1316 {
1317 if (v[i]) { delete v[i]; }
1318 if (z[i]) { delete z[i]; }
1319 }
1320
1321 final_norm = beta;
1323
1324 // Note: j is off by one when we arrive here
1326 {
1327 mfem::out << " Pass : " << setw(2) << (j-1)/m+1
1328 << " Iteration : " << setw(3) << j-1
1329 << " || r || = " << final_norm << endl;
1330 }
1332 {
1333 mfem::out << "FGMRES: Number of iterations: " << final_iter << '\n';
1334 }
1336 {
1337 mfem::out << "FGMRES: No convergence!\n";
1338 }
1339}
1340
1341
1342int GMRES(const Operator &A, Vector &x, const Vector &b, Solver &M,
1343 int &max_iter, int m, real_t &tol, real_t atol, int printit)
1344{
1345 MFEM_PERF_FUNCTION;
1346
1347 GMRESSolver gmres;
1348 gmres.SetPrintLevel(printit);
1349 gmres.SetMaxIter(max_iter);
1350 gmres.SetKDim(m);
1351 gmres.SetRelTol(sqrt(tol));
1352 gmres.SetAbsTol(sqrt(atol));
1353 gmres.SetOperator(A);
1354 gmres.SetPreconditioner(M);
1355 gmres.Mult(b, x);
1356 max_iter = gmres.GetNumIterations();
1357 tol = gmres.GetFinalNorm()*gmres.GetFinalNorm();
1358 return gmres.GetConverged();
1359}
1360
1361void GMRES(const Operator &A, Solver &B, const Vector &b, Vector &x,
1362 int print_iter, int max_num_iter, int m, real_t rtol, real_t atol)
1363{
1364 GMRES(A, x, b, B, max_num_iter, m, rtol, atol, print_iter);
1365}
1366
1367
1369{
1370 p.SetSize(width);
1372 s.SetSize(width);
1374 t.SetSize(width);
1375 v.SetSize(width);
1376 r.SetSize(width);
1378}
1379
1380void BiCGSTABSolver::Mult(const Vector &b, Vector &x) const
1381{
1382 // BiConjugate Gradient Stabilized method following the algorithm
1383 // on p. 27 of the SIAM Templates book.
1384
1385 int i;
1386 real_t resid, tol_goal;
1387 real_t rho_1, rho_2=1.0, alpha=1.0, beta, omega=1.0;
1388
1389 if (iterative_mode)
1390 {
1391 oper->Mult(x, r);
1392 subtract(b, r, r); // r = b - A x
1393 }
1394 else
1395 {
1396 x = 0.0;
1397 r = b;
1398 }
1399 rtilde = r;
1400
1401 resid = initial_norm = Norm(r);
1402 MFEM_ASSERT(IsFinite(resid), "resid = " << resid);
1404 {
1405 mfem::out << " Iteration : " << setw(3) << 0
1406 << " ||r|| = " << resid << (print_options.first_and_last ? " ...\n" : "\n");
1407 }
1408
1409 Monitor(0, resid, r, x);
1410
1411 tol_goal = std::max(resid*rel_tol, abs_tol);
1412
1413 if (resid <= tol_goal)
1414 {
1415 final_norm = resid;
1416 final_iter = 0;
1417 converged = true;
1418 return;
1419 }
1420
1421 for (i = 1; i <= max_iter; i++)
1422 {
1423 rho_1 = Dot(rtilde, r);
1424 if (rho_1 == 0)
1425 {
1427 {
1428 mfem::out << " Iteration : " << setw(3) << i
1429 << " ||r|| = " << resid << '\n';
1430 }
1431
1432 Monitor(i, resid, r, x);
1433
1434 final_norm = resid;
1435 final_iter = i;
1436 converged = false;
1438 {
1439 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1440 }
1442 {
1443 mfem::out << "BiCGStab: No convergence!\n";
1444 }
1445 return;
1446 }
1447 if (i == 1)
1448 {
1449 p = r;
1450 }
1451 else
1452 {
1453 beta = (rho_1/rho_2) * (alpha/omega);
1454 add(p, -omega, v, p); // p = p - omega * v
1455 add(r, beta, p, p); // p = r + beta * p
1456 }
1457 if (prec)
1458 {
1459 prec->Mult(p, phat); // phat = M^{-1} * p
1460 }
1461 else
1462 {
1463 phat = p;
1464 }
1465 oper->Mult(phat, v); // v = A * phat
1466 alpha = rho_1 / Dot(rtilde, v);
1467 add(r, -alpha, v, s); // s = r - alpha * v
1468 resid = Norm(s);
1469 MFEM_ASSERT(IsFinite(resid), "resid = " << resid);
1470 if (resid < tol_goal)
1471 {
1472 x.Add(alpha, phat); // x = x + alpha * phat
1474 {
1475 mfem::out << " Iteration : " << setw(3) << i
1476 << " ||s|| = " << resid << '\n';
1477 }
1478 final_norm = resid;
1479 final_iter = i;
1480 converged = true;
1482 {
1483 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1484 }
1485 return;
1486 }
1488 {
1489 mfem::out << " Iteration : " << setw(3) << i
1490 << " ||s|| = " << resid;
1491 }
1492 Monitor(i, resid, r, x);
1493 if (prec)
1494 {
1495 prec->Mult(s, shat); // shat = M^{-1} * s
1496 }
1497 else
1498 {
1499 shat = s;
1500 }
1501 oper->Mult(shat, t); // t = A * shat
1502 omega = Dot(t, s) / Dot(t, t);
1503 x.Add(alpha, phat); // x += alpha * phat
1504 x.Add(omega, shat); // x += omega * shat
1505 add(s, -omega, t, r); // r = s - omega * t
1506
1507 rho_2 = rho_1;
1508 resid = Norm(r);
1509 MFEM_ASSERT(IsFinite(resid), "resid = " << resid);
1511 {
1512 mfem::out << " ||r|| = " << resid << '\n';
1513 }
1514 Monitor(i, resid, r, x);
1515 if (resid < tol_goal)
1516 {
1517 final_norm = resid;
1518 final_iter = i;
1519 converged = true;
1521 {
1522 mfem::out << " Iteration : " << setw(3) << i
1523 << " ||r|| = " << resid << '\n';
1524 }
1526 {
1527 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1528 }
1529 return;
1530 }
1531 if (omega == 0)
1532 {
1533 final_norm = resid;
1534 final_iter = i;
1535 converged = false;
1537 {
1538 mfem::out << " Iteration : " << setw(3) << i
1539 << " ||r|| = " << resid << '\n';
1540 }
1542 {
1543 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1544 }
1546 {
1547 mfem::out << "BiCGStab: No convergence!\n";
1548 }
1549 return;
1550 }
1551 }
1552
1553 final_norm = resid;
1555 converged = false;
1556
1558 {
1559 mfem::out << " Iteration : " << setw(3) << final_iter
1560 << " ||r|| = " << resid << '\n';
1561 }
1563 {
1564 mfem::out << "BiCGStab: Number of iterations: " << final_iter << '\n';
1565 }
1567 {
1568 mfem::out << "BiCGStab: No convergence!\n";
1569 }
1570}
1571
1572int BiCGSTAB(const Operator &A, Vector &x, const Vector &b, Solver &M,
1573 int &max_iter, real_t &tol, real_t atol, int printit)
1574{
1575 BiCGSTABSolver bicgstab;
1576 bicgstab.SetPrintLevel(printit);
1577 bicgstab.SetMaxIter(max_iter);
1578 bicgstab.SetRelTol(sqrt(tol));
1579 bicgstab.SetAbsTol(sqrt(atol));
1580 bicgstab.SetOperator(A);
1581 bicgstab.SetPreconditioner(M);
1582 bicgstab.Mult(b, x);
1583 max_iter = bicgstab.GetNumIterations();
1584 tol = bicgstab.GetFinalNorm()*bicgstab.GetFinalNorm();
1585 return bicgstab.GetConverged();
1586}
1587
1588void BiCGSTAB(const Operator &A, Solver &B, const Vector &b, Vector &x,
1589 int print_iter, int max_num_iter, real_t rtol, real_t atol)
1590{
1591 BiCGSTAB(A, x, b, B, max_num_iter, rtol, atol, print_iter);
1592}
1593
1594
1596{
1598 v0.SetSize(width);
1599 v1.SetSize(width);
1600 w0.SetSize(width);
1601 w1.SetSize(width);
1602 q.SetSize(width);
1603 if (prec)
1604 {
1605 u1.SetSize(width);
1606 }
1607
1608 v0.UseDevice(true);
1609 v1.UseDevice(true);
1610 w0.UseDevice(true);
1611 w1.UseDevice(true);
1612 q.UseDevice(true);
1613 u1.UseDevice(true);
1614}
1615
1616void MINRESSolver::Mult(const Vector &b, Vector &x) const
1617{
1618 // Based on the MINRES algorithm on p. 86, Fig. 6.9 in
1619 // "Iterative Krylov Methods for Large Linear Systems",
1620 // by Henk A. van der Vorst, 2003.
1621 // Extended to support an SPD preconditioner.
1622
1623 b.UseDevice(true);
1624 x.UseDevice(true);
1625
1626 int it;
1627 real_t beta, eta, gamma0, gamma1, sigma0, sigma1;
1628 real_t alpha, delta, rho1, rho2, rho3, norm_goal;
1629 Vector *z = (prec) ? &u1 : &v1;
1630
1631 converged = true;
1632
1633 if (!iterative_mode)
1634 {
1635 v1 = b;
1636 x = 0.;
1637 }
1638 else
1639 {
1640 oper->Mult(x, v1);
1641 subtract(b, v1, v1);
1642 }
1643
1644 if (prec)
1645 {
1646 prec->Mult(v1, u1);
1647 }
1648 eta = beta = initial_norm = sqrt(Dot(*z, v1));
1649 MFEM_ASSERT(IsFinite(eta), "eta = " << eta);
1650 gamma0 = gamma1 = 1.;
1651 sigma0 = sigma1 = 0.;
1652
1653 norm_goal = std::max(rel_tol*eta, abs_tol);
1654
1655 if (eta <= norm_goal)
1656 {
1657 it = 0;
1658 goto loop_end;
1659 }
1660
1662 {
1663 mfem::out << "MINRES: iteration " << setw(3) << 0 << ": ||r||_B = "
1664 << eta << (print_options.first_and_last ? " ..." : "") << '\n';
1665 }
1666 Monitor(0, eta, *z, x);
1667
1668 for (it = 1; it <= max_iter; it++)
1669 {
1670 v1 /= beta;
1671 if (prec)
1672 {
1673 u1 /= beta;
1674 }
1675 oper->Mult(*z, q);
1676 alpha = Dot(*z, q);
1677 MFEM_ASSERT(IsFinite(alpha), "alpha = " << alpha);
1678 if (it > 1) // (v0 == 0) for (it == 1)
1679 {
1680 q.Add(-beta, v0);
1681 }
1682 add(q, -alpha, v1, v0);
1683
1684 delta = gamma1*alpha - gamma0*sigma1*beta;
1685 rho3 = sigma0*beta;
1686 rho2 = sigma1*alpha + gamma0*gamma1*beta;
1687 if (!prec)
1688 {
1689 beta = Norm(v0);
1690 }
1691 else
1692 {
1693 prec->Mult(v0, q);
1694 beta = sqrt(Dot(v0, q));
1695 }
1696 MFEM_ASSERT(IsFinite(beta), "beta = " << beta);
1697 rho1 = std::hypot(delta, beta);
1698
1699 if (it == 1)
1700 {
1701 w0.Set(1./rho1, *z); // (w0 == 0) and (w1 == 0)
1702 }
1703 else if (it == 2)
1704 {
1705 add(1./rho1, *z, -rho2/rho1, w1, w0); // (w0 == 0)
1706 }
1707 else
1708 {
1709 add(-rho3/rho1, w0, -rho2/rho1, w1, w0);
1710 w0.Add(1./rho1, *z);
1711 }
1712
1713 gamma0 = gamma1;
1714 gamma1 = delta/rho1;
1715
1716 x.Add(gamma1*eta, w0);
1717
1718 sigma0 = sigma1;
1719 sigma1 = beta/rho1;
1720
1721 eta = -sigma1*eta;
1722 MFEM_ASSERT(IsFinite(eta), "eta = " << eta);
1723
1724 if (fabs(eta) <= norm_goal)
1725 {
1726 goto loop_end;
1727 }
1728
1730 {
1731 mfem::out << "MINRES: iteration " << setw(3) << it << ": ||r||_B = "
1732 << fabs(eta) << '\n';
1733 }
1734 Monitor(it, fabs(eta), *z, x);
1735
1736 if (prec)
1737 {
1738 Swap(u1, q);
1739 }
1740 Swap(v0, v1);
1741 Swap(w0, w1);
1742 }
1743 converged = false;
1744 it--;
1745
1746loop_end:
1747 final_iter = it;
1748 final_norm = fabs(eta);
1749
1751 {
1752 mfem::out << "MINRES: iteration " << setw(3) << it << ": ||r||_B = "
1753 << fabs(eta) << '\n';
1754 }
1755
1757 {
1758 mfem::out << "MINRES: Number of iterations: " << setw(3) << final_iter << '\n';
1759 }
1760
1761 Monitor(final_iter, final_norm, *z, x, true);
1762
1763 // if (print_options.iteration_details || (!converged && print_options.errors))
1764 // {
1765 // oper->Mult(x, v1);
1766 // subtract(b, v1, v1);
1767 // if (prec)
1768 // {
1769 // prec->Mult(v1, u1);
1770 // }
1771 // eta = sqrt(Dot(*z, v1));
1772 // mfem::out << "MINRES: iteration " << setw(3) << it << '\n'
1773 // << " ||r||_B = " << eta << " (re-computed)" << '\n';
1774 // }
1775
1777 {
1778 mfem::out << "MINRES: No convergence!\n";
1779 }
1780}
1781
1782void MINRES(const Operator &A, const Vector &b, Vector &x, int print_it,
1783 int max_it, real_t rtol, real_t atol)
1784{
1785 MFEM_PERF_FUNCTION;
1786
1787 MINRESSolver minres;
1788 minres.SetPrintLevel(print_it);
1789 minres.SetMaxIter(max_it);
1790 minres.SetRelTol(sqrt(rtol));
1791 minres.SetAbsTol(sqrt(atol));
1792 minres.SetOperator(A);
1793 minres.Mult(b, x);
1794}
1795
1796void MINRES(const Operator &A, Solver &B, const Vector &b, Vector &x,
1797 int print_it, int max_it, real_t rtol, real_t atol)
1798{
1799 MINRESSolver minres;
1800 minres.SetPrintLevel(print_it);
1801 minres.SetMaxIter(max_it);
1802 minres.SetRelTol(sqrt(rtol));
1803 minres.SetAbsTol(sqrt(atol));
1804 minres.SetOperator(A);
1805 minres.SetPreconditioner(B);
1806 minres.Mult(b, x);
1807}
1808
1809
1811{
1812 oper = &op;
1813 height = op.Height();
1814 width = op.Width();
1815 MFEM_ASSERT(height == width, "square Operator is required.");
1816
1817 r.SetSize(width);
1818 c.SetSize(width);
1819}
1820
1821void NewtonSolver::Mult(const Vector &b, Vector &x) const
1822{
1823 MFEM_ASSERT(oper != NULL, "the Operator is not set (use SetOperator).");
1824 MFEM_ASSERT(prec != NULL, "the Solver is not set (use SetSolver).");
1825
1826 int it;
1827 real_t norm0, norm, norm_goal;
1828 const bool have_b = (b.Size() == Height());
1829
1830 if (!iterative_mode)
1831 {
1832 x = 0.0;
1833 }
1834
1835 ProcessNewState(x);
1836
1837 oper->Mult(x, r);
1838 if (have_b)
1839 {
1840 r -= b;
1841 }
1842
1843 norm0 = norm = initial_norm = Norm(r);
1845 {
1846 mfem::out << "Newton iteration " << setw(2) << 0
1847 << " : ||r|| = " << norm << "...\n";
1848 }
1849 norm_goal = std::max(rel_tol*norm, abs_tol);
1850
1851 prec->iterative_mode = false;
1852
1853 // x_{i+1} = x_i - [DF(x_i)]^{-1} [F(x_i)-b]
1854 for (it = 0; true; it++)
1855 {
1856 MFEM_ASSERT(IsFinite(norm), "norm = " << norm);
1858 {
1859 mfem::out << "Newton iteration " << setw(2) << it
1860 << " : ||r|| = " << norm;
1861 if (it > 0)
1862 {
1863 mfem::out << ", ||r||/||r_0|| = " << norm/norm0;
1864 }
1865 mfem::out << '\n';
1866 }
1867 Monitor(it, norm, r, x);
1868
1869 if (norm <= norm_goal)
1870 {
1871 converged = true;
1872 break;
1873 }
1874
1875 if (it >= max_iter)
1876 {
1877 converged = false;
1878 break;
1879 }
1880
1881 grad = &oper->GetGradient(x);
1883
1884 if (lin_rtol_type)
1885 {
1886 AdaptiveLinRtolPreSolve(x, it, norm);
1887 }
1888
1889 prec->Mult(r, c); // c = [DF(x_i)]^{-1} [F(x_i)-b]
1890
1891 if (lin_rtol_type)
1892 {
1893 AdaptiveLinRtolPostSolve(c, r, it, norm);
1894 }
1895
1896 const real_t c_scale = ComputeScalingFactor(x, b);
1897 if (c_scale == 0.0)
1898 {
1899 converged = false;
1900 break;
1901 }
1902 add(x, -c_scale, c, x);
1903
1904 ProcessNewState(x);
1905
1906 oper->Mult(x, r);
1907 if (have_b)
1908 {
1909 r -= b;
1910 }
1911 norm = Norm(r);
1912 }
1913
1914 final_iter = it;
1915 final_norm = norm;
1916
1919 {
1920 mfem::out << "Newton: Number of iterations: " << final_iter << '\n'
1921 << " ||r|| = " << final_norm << '\n';
1922 }
1924 {
1925 mfem::out << "Newton: No convergence!\n";
1926 }
1927}
1928
1930 const real_t rtol0,
1931 const real_t rtol_max,
1932 const real_t alpha_,
1933 const real_t gamma_)
1934{
1935 lin_rtol_type = type;
1936 lin_rtol0 = rtol0;
1937 lin_rtol_max = rtol_max;
1938 this->alpha = alpha_;
1939 this->gamma = gamma_;
1940}
1941
1943 const int it,
1944 const real_t fnorm) const
1945{
1946 // Assume that when adaptive linear solver relative tolerance is activated,
1947 // we are working with an iterative solver.
1948 auto iterative_solver = static_cast<IterativeSolver *>(prec);
1949 // Adaptive linear solver relative tolerance
1950 real_t eta;
1951 // Safeguard threshold
1952 real_t sg_threshold = 0.1;
1953
1954 if (it == 0)
1955 {
1956 eta = lin_rtol0;
1957 }
1958 else
1959 {
1960 if (lin_rtol_type == 1)
1961 {
1962 // eta = gamma * abs(||F(x1)|| - ||F(x0) + DF(x0) s0||) / ||F(x0)||
1963 eta = gamma * abs(fnorm - lnorm_last) / fnorm_last;
1964 }
1965 else if (lin_rtol_type == 2)
1966 {
1967 // eta = gamma * (||F(x1)|| / ||F(x0)||)^alpha
1968 eta = gamma * pow(fnorm / fnorm_last, alpha);
1969 }
1970 else
1971 {
1972 MFEM_ABORT("Unknown adaptive linear solver rtol version");
1973 }
1974
1975 // Safeguard rtol from "oversolving" ?!
1976 const real_t sg_eta = gamma * pow(eta_last, alpha);
1977 if (sg_eta > sg_threshold) { eta = std::max(eta, sg_eta); }
1978 }
1979
1980 eta = std::min(eta, lin_rtol_max);
1981 iterative_solver->SetRelTol(eta);
1982 eta_last = eta;
1984 {
1985 mfem::out << "Eisenstat-Walker rtol = " << eta << "\n";
1986 }
1987}
1988
1990 const Vector &b,
1991 const int it,
1992 const real_t fnorm) const
1993{
1994 fnorm_last = fnorm;
1995
1996 // If version 1 is chosen, the true linear residual norm has to be computed
1997 // and in most cases we can only retrieve the preconditioned linear residual
1998 // norm.
1999 if (lin_rtol_type == 1)
2000 {
2001 // lnorm_last = ||F(x0) + DF(x0) s0||
2002 Vector linres(x.Size());
2003 grad->Mult(x, linres);
2004 linres -= b;
2005 lnorm_last = Norm(linres);
2006 }
2007}
2008
2009void LBFGSSolver::Mult(const Vector &b, Vector &x) const
2010{
2011 MFEM_VERIFY(oper != NULL, "the Operator is not set (use SetOperator).");
2012
2013 // Quadrature points that are checked for negative Jacobians etc.
2014 Vector sk, rk, yk, rho, alpha;
2015
2016 // r - r_{k+1}, c - descent direction
2017 sk.SetSize(width); // x_{k+1}-x_k
2018 rk.SetSize(width); // nabla(f(x_{k}))
2019 yk.SetSize(width); // r_{k+1}-r_{k}
2020 rho.SetSize(m); // 1/(dot(yk,sk)
2021 alpha.SetSize(m); // rhok*sk'*c
2022 int last_saved_id = -1;
2023
2024 int it;
2025 real_t norm0, norm, norm_goal;
2026 const bool have_b = (b.Size() == Height());
2027
2028 if (!iterative_mode)
2029 {
2030 x = 0.0;
2031 }
2032
2033 ProcessNewState(x);
2034
2035 // r = F(x)-b
2036 oper->Mult(x, r);
2037 if (have_b) { r -= b; }
2038
2039 c = r; // initial descent direction
2040
2041 norm0 = norm = initial_norm = Norm(r);
2043 {
2044 mfem::out << "LBFGS iteration " << setw(2) << 0
2045 << " : ||r|| = " << norm << "...\n";
2046 }
2047 norm_goal = std::max(rel_tol*norm, abs_tol);
2048 for (it = 0; true; it++)
2049 {
2050 MFEM_ASSERT(IsFinite(norm), "norm = " << norm);
2052 {
2053 mfem::out << "LBFGS iteration " << it
2054 << " : ||r|| = " << norm;
2055 if (it > 0)
2056 {
2057 mfem::out << ", ||r||/||r_0|| = " << norm/norm0;
2058 }
2059 mfem::out << '\n';
2060 }
2061
2062 if (norm <= norm_goal)
2063 {
2064 converged = true;
2065 break;
2066 }
2067
2068 if (it >= max_iter)
2069 {
2070 converged = false;
2071 break;
2072 }
2073
2074 rk = r;
2075 const real_t c_scale = ComputeScalingFactor(x, b);
2076 if (c_scale == 0.0)
2077 {
2078 converged = false;
2079 break;
2080 }
2081 add(x, -c_scale, c, x); // x_{k+1} = x_k - c_scale*c
2082
2083 ProcessNewState(x);
2084
2085 oper->Mult(x, r);
2086 if (have_b)
2087 {
2088 r -= b;
2089 }
2090
2091 // LBFGS - construct descent direction
2092 subtract(r, rk, yk); // yk = r_{k+1} - r_{k}
2093 sk = c; sk *= -c_scale; //sk = x_{k+1} - x_{k} = -c_scale*c
2094 const real_t gamma = Dot(sk, yk)/Dot(yk, yk);
2095
2096 // Save last m vectors
2097 last_saved_id = (last_saved_id == m-1) ? 0 : last_saved_id+1;
2098 *skArray[last_saved_id] = sk;
2099 *ykArray[last_saved_id] = yk;
2100
2101 c = r;
2102 for (int i = last_saved_id; i > -1; i--)
2103 {
2104 rho(i) = 1.0/Dot((*skArray[i]),(*ykArray[i]));
2105 alpha(i) = rho(i)*Dot((*skArray[i]),c);
2106 add(c, -alpha(i), (*ykArray[i]), c);
2107 }
2108 if (it > m-1)
2109 {
2110 for (int i = m-1; i > last_saved_id; i--)
2111 {
2112 rho(i) = 1./Dot((*skArray[i]), (*ykArray[i]));
2113 alpha(i) = rho(i)*Dot((*skArray[i]),c);
2114 add(c, -alpha(i), (*ykArray[i]), c);
2115 }
2116 }
2117
2118 c *= gamma; // scale search direction
2119 if (it > m-1)
2120 {
2121 for (int i = last_saved_id+1; i < m ; i++)
2122 {
2123 real_t betai = rho(i)*Dot((*ykArray[i]), c);
2124 add(c, alpha(i)-betai, (*skArray[i]), c);
2125 }
2126 }
2127 for (int i = 0; i < last_saved_id+1 ; i++)
2128 {
2129 real_t betai = rho(i)*Dot((*ykArray[i]), c);
2130 add(c, alpha(i)-betai, (*skArray[i]), c);
2131 }
2132
2133 norm = Norm(r);
2134 }
2135
2136 final_iter = it;
2137 final_norm = norm;
2138
2141 {
2142 mfem::out << "LBFGS: Number of iterations: " << final_iter << '\n'
2143 << " ||r|| = " << final_norm << '\n';
2144 }
2146 {
2147 mfem::out << "LBFGS: No convergence!\n";
2148 }
2149}
2150
2151int aGMRES(const Operator &A, Vector &x, const Vector &b,
2152 const Operator &M, int &max_iter,
2153 int m_max, int m_min, int m_step, real_t cf,
2154 real_t &tol, real_t &atol, int printit)
2155{
2156 int n = A.Width();
2157
2158 int m = m_max;
2159
2160 DenseMatrix H(m+1,m);
2161 Vector s(m+1), cs(m+1), sn(m+1);
2162 Vector w(n), av(n);
2163
2164 real_t r1, resid;
2165 int i, j, k;
2166
2167 M.Mult(b,w);
2168 real_t normb = w.Norml2(); // normb = ||M b||
2169 if (normb == 0.0)
2170 {
2171 normb = 1;
2172 }
2173
2174 Vector r(n);
2175 A.Mult(x, r);
2176 subtract(b,r,w);
2177 M.Mult(w, r); // r = M (b - A x)
2178 real_t beta = r.Norml2(); // beta = ||r||
2179
2180 resid = beta / normb;
2181
2182 if (resid * resid <= tol)
2183 {
2184 tol = resid * resid;
2185 max_iter = 0;
2186 return 0;
2187 }
2188
2189 if (printit)
2190 {
2191 mfem::out << " Pass : " << setw(2) << 1
2192 << " Iteration : " << setw(3) << 0
2193 << " (r, r) = " << beta*beta << '\n';
2194 }
2195
2196 tol *= (normb*normb);
2197 tol = (atol > tol) ? atol : tol;
2198
2199 m = m_max;
2200 Array<Vector *> v(m+1);
2201 for (i= 0; i<=m; i++)
2202 {
2203 v[i] = new Vector(n);
2204 (*v[i]) = 0.0;
2205 }
2206
2207 j = 1;
2208 while (j <= max_iter)
2209 {
2210 (*v[0]) = 0.0;
2211 v[0] -> Add (1.0/beta, r); // v[0] = r / ||r||
2212 s = 0.0; s(0) = beta;
2213
2214 r1 = beta;
2215
2216 for (i = 0; i < m && j <= max_iter; i++)
2217 {
2218 A.Mult((*v[i]),av);
2219 M.Mult(av,w); // w = M A v[i]
2220
2221 for (k = 0; k <= i; k++)
2222 {
2223 H(k,i) = w * (*v[k]); // H(k,i) = w * v[k]
2224 w.Add(-H(k,i), (*v[k])); // w -= H(k,i) * v[k]
2225 }
2226
2227 H(i+1,i) = w.Norml2(); // H(i+1,i) = ||w||
2228 (*v[i+1]) = 0.0;
2229 v[i+1] -> Add (1.0/H(i+1,i), w); // v[i+1] = w / H(i+1,i)
2230
2231 for (k = 0; k < i; k++)
2232 {
2233 ApplyPlaneRotation(H(k,i), H(k+1,i), cs(k), sn(k));
2234 }
2235
2236 GeneratePlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
2237 ApplyPlaneRotation(H(i,i), H(i+1,i), cs(i), sn(i));
2238 ApplyPlaneRotation(s(i), s(i+1), cs(i), sn(i));
2239
2240 resid = fabs(s(i+1));
2241 if (printit)
2242 {
2243 mfem::out << " Pass : " << setw(2) << j
2244 << " Iteration : " << setw(3) << i+1
2245 << " (r, r) = " << resid*resid << '\n';
2246 }
2247
2248 if ( resid*resid < tol)
2249 {
2250 Update(x, i, H, s, v);
2251 tol = resid * resid;
2252 max_iter = j;
2253 for (i= 0; i<=m; i++)
2254 {
2255 delete v[i];
2256 }
2257 return 0;
2258 }
2259 }
2260
2261 if (printit)
2262 {
2263 mfem::out << "Restarting..." << '\n';
2264 }
2265
2266 Update(x, i-1, H, s, v);
2267
2268 A.Mult(x, r);
2269 subtract(b,r,w);
2270 M.Mult(w, r); // r = M (b - A x)
2271 beta = r.Norml2(); // beta = ||r||
2272 if ( resid*resid < tol)
2273 {
2274 tol = resid * resid;
2275 max_iter = j;
2276 for (i= 0; i<=m; i++)
2277 {
2278 delete v[i];
2279 }
2280 return 0;
2281 }
2282
2283 if (beta/r1 > cf)
2284 {
2285 if (m - m_step >= m_min)
2286 {
2287 m -= m_step;
2288 }
2289 else
2290 {
2291 m = m_max;
2292 }
2293 }
2294
2295 j++;
2296 }
2297
2298 tol = resid * resid;
2299 for (i= 0; i<=m; i++)
2300 {
2301 delete v[i];
2302 }
2303 return 1;
2304}
2305
2307 const Operator *C_,
2308 const Operator *D_)
2309 : C(C_), D(D_), c_e(NULL), d_lo(NULL), d_hi(NULL), x_lo(NULL), x_hi(NULL),
2310 input_size(insize)
2311{
2312 if (C) { MFEM_ASSERT(C->Width() == input_size, "Wrong width of C."); }
2313 if (D) { MFEM_ASSERT(D->Width() == input_size, "Wrong width of D."); }
2314}
2315
2317{
2318 MFEM_ASSERT(C, "The C operator is unspecified -- can't set constraints.");
2319 MFEM_ASSERT(c.Size() == C->Height(), "Wrong size of the constraint.");
2320
2321 c_e = &c;
2322}
2323
2325 const Vector &dh)
2326{
2327 MFEM_ASSERT(D, "The D operator is unspecified -- can't set constraints.");
2328 MFEM_ASSERT(dl.Size() == D->Height() && dh.Size() == D->Height(),
2329 "Wrong size of the constraint.");
2330
2331 d_lo = &dl; d_hi = &dh;
2332}
2333
2335{
2336 MFEM_ASSERT(xl.Size() == input_size && xh.Size() == input_size,
2337 "Wrong size of the constraint.");
2338
2339 x_lo = &xl; x_hi = &xh;
2340}
2341
2343{
2344 int m = 0;
2345 if (C) { m += C->Height(); }
2346 if (D) { m += D->Height(); }
2347 return m;
2348}
2349
2351{
2353 {
2354 MFEM_WARNING("Objective functional is ignored as SLBQP always minimizes"
2355 "the l2 norm of (x - x_target).");
2356 }
2357 MFEM_ASSERT(prob.GetC(), "Linear constraint is not set.");
2358 MFEM_ASSERT(prob.GetC()->Height() == 1, "Solver expects scalar constraint.");
2359
2360 problem = &prob;
2361}
2362
2363void SLBQPOptimizer::SetBounds(const Vector &lo_, const Vector &hi_)
2364{
2365 lo.SetDataAndSize(lo_.GetData(), lo_.Size());
2366 hi.SetDataAndSize(hi_.GetData(), hi_.Size());
2367}
2368
2370{
2371 w.SetDataAndSize(w_.GetData(), w_.Size());
2372 a = a_;
2373}
2374
2375inline void SLBQPOptimizer::print_iteration(int it, real_t r, real_t l) const
2376{
2378 {
2379 mfem::out << "SLBQP iteration " << it << ": residual = " << r
2380 << ", lambda = " << l << '\n';
2381 }
2382}
2383
2384void SLBQPOptimizer::Mult(const Vector& xt, Vector& x) const
2385{
2386 // Based on code provided by Denis Ridzal, dridzal@sandia.gov.
2387 // Algorithm adapted from Dai and Fletcher, "New Algorithms for
2388 // Singly Linearly Constrained Quadratic Programs Subject to Lower
2389 // and Upper Bounds", Numerical Analysis Report NA/216, 2003.
2390
2391 // Set some algorithm-specific constants and temporaries.
2392 int nclip = 0;
2393 real_t l = 0;
2394 real_t llow = 0;
2395 real_t lupp = 0;
2396 real_t lnew = 0;
2397 real_t dl = 2;
2398 real_t r = 0;
2399 real_t rlow = 0;
2400 real_t rupp = 0;
2401 real_t s = 0;
2402
2403 const real_t smin = 0.1;
2404
2405 const real_t tol = max(abs_tol, rel_tol*a);
2406
2407 // *** Start bracketing phase of SLBQP ***
2409 {
2410 mfem::out << "SLBQP bracketing phase" << '\n';
2411 }
2412
2413 // Solve QP with fixed Lagrange multiplier
2414 r = initial_norm = solve(l,xt,x,nclip);
2415 print_iteration(nclip, r, l);
2416
2417
2418 // If x=xt was already within bounds and satisfies the linear
2419 // constraint, then we already have the solution.
2420 if (fabs(r) <= tol)
2421 {
2422 converged = true;
2423 goto slbqp_done;
2424 }
2425
2426 if (r < 0)
2427 {
2428 llow = l; rlow = r; l = l + dl;
2429
2430 // Solve QP with fixed Lagrange multiplier
2431 r = solve(l,xt,x,nclip);
2432 print_iteration(nclip, r, l);
2433
2434 while ((r < 0) && (nclip < max_iter))
2435 {
2436 llow = l;
2437 s = rlow/r - 1.0;
2438 if (s < smin) { s = smin; }
2439 dl = dl + dl/s;
2440 l = l + dl;
2441
2442 // Solve QP with fixed Lagrange multiplier
2443 r = solve(l,xt,x,nclip);
2444 print_iteration(nclip, r, l);
2445 }
2446
2447 lupp = l; rupp = r;
2448 }
2449 else
2450 {
2451 lupp = l; rupp = r; l = l - dl;
2452
2453 // Solve QP with fixed Lagrange multiplier
2454 r = solve(l,xt,x,nclip);
2455 print_iteration(nclip, r, l);
2456
2457 while ((r > 0) && (nclip < max_iter))
2458 {
2459 lupp = l;
2460 s = rupp/r - 1.0;
2461 if (s < smin) { s = smin; }
2462 dl = dl + dl/s;
2463 l = l - dl;
2464
2465 // Solve QP with fixed Lagrange multiplier
2466 r = solve(l,xt,x,nclip);
2467 print_iteration(nclip, r, l);
2468 }
2469
2470 llow = l; rlow = r;
2471 }
2472
2473 // *** Stop bracketing phase of SLBQP ***
2474
2475
2476 // *** Start secant phase of SLBQP ***
2478 {
2479 mfem::out << "SLBQP secant phase" << '\n';
2480 }
2481
2482 s = 1.0 - rlow/rupp; dl = dl/s; l = lupp - dl;
2483
2484 // Solve QP with fixed Lagrange multiplier
2485 r = solve(l,xt,x,nclip);
2486 print_iteration(nclip, r, l);
2487
2488 while ( (fabs(r) > tol) && (nclip < max_iter) )
2489 {
2490 if (r > 0)
2491 {
2492 if (s <= 2.0)
2493 {
2494 lupp = l; rupp = r; s = 1.0 - rlow/rupp;
2495 dl = (lupp - llow)/s; l = lupp - dl;
2496 }
2497 else
2498 {
2499 s = rupp/r - 1.0;
2500 if (s < smin) { s = smin; }
2501 dl = (lupp - l)/s;
2502 lnew = 0.75*llow + 0.25*l;
2503 if (lnew < l-dl) { lnew = l-dl; }
2504 lupp = l; rupp = r; l = lnew;
2505 s = (lupp - llow)/(lupp - l);
2506 }
2507
2508 }
2509 else
2510 {
2511 if (s >= 2.0)
2512 {
2513 llow = l; rlow = r; s = 1.0 - rlow/rupp;
2514 dl = (lupp - llow)/s; l = lupp - dl;
2515 }
2516 else
2517 {
2518 s = rlow/r - 1.0;
2519 if (s < smin) { s = smin; }
2520 dl = (l - llow)/s;
2521 lnew = 0.75*lupp + 0.25*l;
2522 if (lnew < l+dl) { lnew = l+dl; }
2523 llow = l; rlow = r; l = lnew;
2524 s = (lupp - llow)/(lupp - l);
2525 }
2526 }
2527
2528 // Solve QP with fixed Lagrange multiplier
2529 r = solve(l,xt,x,nclip);
2530 print_iteration(nclip, r, l);
2531 }
2532
2533 // *** Stop secant phase of SLBQP ***
2534 converged = (fabs(r) <= tol);
2535
2536slbqp_done:
2537
2538 final_iter = nclip;
2539 final_norm = r;
2540
2543 {
2544 mfem::out << "SLBQP: Number of iterations: " << final_iter << '\n'
2545 << " lambda = " << l << '\n'
2546 << " ||r|| = " << final_norm << '\n';
2547 }
2549 {
2550 mfem::out << "SLBQP: No convergence!" << '\n';
2551 }
2552}
2553
2554struct WeightMinHeap
2555{
2556 const std::vector<real_t> &w;
2557 std::vector<size_t> c;
2558 std::vector<int> loc;
2559
2560 WeightMinHeap(const std::vector<real_t> &w_) : w(w_)
2561 {
2562 c.reserve(w.size());
2563 loc.resize(w.size());
2564 for (size_t i=0; i<w.size(); ++i) { push(i); }
2565 }
2566
2567 size_t percolate_up(size_t pos, real_t val)
2568 {
2569 for (; pos > 0 && w[c[(pos-1)/2]] > val; pos = (pos-1)/2)
2570 {
2571 c[pos] = c[(pos-1)/2];
2572 loc[c[(pos-1)/2]] = pos;
2573 }
2574 return pos;
2575 }
2576
2577 size_t percolate_down(size_t pos, real_t val)
2578 {
2579 while (2*pos+1 < c.size())
2580 {
2581 size_t left = 2*pos+1;
2582 size_t right = left+1;
2583 size_t tgt;
2584 if (right < c.size() && w[c[right]] < w[c[left]]) { tgt = right; }
2585 else { tgt = left; }
2586 if (w[c[tgt]] < val)
2587 {
2588 c[pos] = c[tgt];
2589 loc[c[tgt]] = pos;
2590 pos = tgt;
2591 }
2592 else
2593 {
2594 break;
2595 }
2596 }
2597 return pos;
2598 }
2599
2600 void push(size_t i)
2601 {
2602 real_t val = w[i];
2603 c.push_back(0);
2604 size_t pos = c.size()-1;
2605 pos = percolate_up(pos, val);
2606 c[pos] = i;
2607 loc[i] = pos;
2608 }
2609
2610 int pop()
2611 {
2612 size_t i = c[0];
2613 size_t j = c.back();
2614 c.pop_back();
2615 // Mark as removed
2616 loc[i] = -1;
2617 if (c.empty()) { return i; }
2618 real_t val = w[j];
2619 size_t pos = 0;
2620 pos = percolate_down(pos, val);
2621 c[pos] = j;
2622 loc[j] = pos;
2623 return i;
2624 }
2625
2626 void update(size_t i)
2627 {
2628 size_t pos = loc[i];
2629 real_t val = w[i];
2630 pos = percolate_up(pos, val);
2631 pos = percolate_down(pos, val);
2632 c[pos] = i;
2633 loc[i] = pos;
2634 }
2635
2636 bool picked(size_t i)
2637 {
2638 return loc[i] < 0;
2639 }
2640};
2641
2643{
2644 int n = C.Width();
2645 // Scale rows by reciprocal of diagonal and take absolute value
2646 Vector D;
2647 C.GetDiag(D);
2648 int *I = C.GetI();
2649 int *J = C.GetJ();
2650 real_t *V = C.GetData();
2651 for (int i=0; i<n; ++i)
2652 {
2653 for (int j=I[i]; j<I[i+1]; ++j)
2654 {
2655 V[j] = abs(V[j]/D[i]);
2656 }
2657 }
2658
2659 std::vector<real_t> w(n, 0.0);
2660 for (int k=0; k<n; ++k)
2661 {
2662 // Find all neighbors i of k
2663 for (int ii=I[k]; ii<I[k+1]; ++ii)
2664 {
2665 int i = J[ii];
2666 // Find value of (i,k)
2667 real_t C_ik = 0.0;
2668 for (int kk=I[i]; kk<I[i+1]; ++kk)
2669 {
2670 if (J[kk] == k)
2671 {
2672 C_ik = V[kk];
2673 break;
2674 }
2675 }
2676 for (int jj=I[k]; jj<I[k+1]; ++jj)
2677 {
2678 int j = J[jj];
2679 if (j == k) { continue; }
2680 real_t C_kj = V[jj];
2681 bool ij_exists = false;
2682 for (int jj2=I[i]; jj2<I[i+1]; ++jj2)
2683 {
2684 if (J[jj2] == j)
2685 {
2686 ij_exists = true;
2687 break;
2688 }
2689 }
2690 if (!ij_exists) { w[k] += pow(C_ik*C_kj,2); }
2691 }
2692 }
2693 w[k] = sqrt(w[k]);
2694 }
2695
2696 WeightMinHeap w_heap(w);
2697
2698 // Compute ordering
2699 p.SetSize(n);
2700 for (int ii=0; ii<n; ++ii)
2701 {
2702 int pi = w_heap.pop();
2703 p[ii] = pi;
2704 w[pi] = -1;
2705 for (int kk=I[pi]; kk<I[pi+1]; ++kk)
2706 {
2707 int k = J[kk];
2708 if (w_heap.picked(k)) { continue; }
2709 // Recompute weight
2710 w[k] = 0.0;
2711 // Find all neighbors i of k
2712 for (int ii2=I[k]; ii2<I[k+1]; ++ii2)
2713 {
2714 int i = J[ii2];
2715 if (w_heap.picked(i)) { continue; }
2716 // Find value of (i,k)
2717 real_t C_ik = 0.0;
2718 for (int kk2=I[i]; kk2<I[i+1]; ++kk2)
2719 {
2720 if (J[kk2] == k)
2721 {
2722 C_ik = V[kk2];
2723 break;
2724 }
2725 }
2726 for (int jj=I[k]; jj<I[k+1]; ++jj)
2727 {
2728 int j = J[jj];
2729 if (j == k || w_heap.picked(j)) { continue; }
2730 real_t C_kj = V[jj];
2731 bool ij_exists = false;
2732 for (int jj2=I[i]; jj2<I[i+1]; ++jj2)
2733 {
2734 if (J[jj2] == j)
2735 {
2736 ij_exists = true;
2737 break;
2738 }
2739 }
2740 if (!ij_exists) { w[k] += pow(C_ik*C_kj,2); }
2741 }
2742 }
2743 w[k] = sqrt(w[k]);
2744 w_heap.update(k);
2745 }
2746 }
2747}
2748
2749BlockILU::BlockILU(int block_size_,
2750 Reordering reordering_,
2751 int k_fill_)
2752 : Solver(0),
2753 block_size(block_size_),
2754 k_fill(k_fill_),
2755 reordering(reordering_)
2756{ }
2757
2759 int block_size_,
2760 Reordering reordering_,
2761 int k_fill_)
2762 : BlockILU(block_size_, reordering_, k_fill_)
2763{
2764 SetOperator(op);
2765}
2766
2768{
2769 const SparseMatrix *A = NULL;
2770#ifdef MFEM_USE_MPI
2771 const HypreParMatrix *A_par = dynamic_cast<const HypreParMatrix *>(&op);
2772 SparseMatrix A_par_diag;
2773 if (A_par != NULL)
2774 {
2775 A_par->GetDiag(A_par_diag);
2776 A = &A_par_diag;
2777 }
2778#endif
2779 if (A == NULL)
2780 {
2781 A = dynamic_cast<const SparseMatrix *>(&op);
2782 if (A == NULL)
2783 {
2784 MFEM_ABORT("BlockILU must be created with a SparseMatrix or HypreParMatrix");
2785 }
2786 }
2787 height = op.Height();
2788 width = op.Width();
2789 MFEM_ASSERT(A->Finalized(), "Matrix must be finalized.");
2790 CreateBlockPattern(*A);
2791 Factorize();
2792}
2793
2794void BlockILU::CreateBlockPattern(const SparseMatrix &A)
2795{
2796 MFEM_VERIFY(k_fill == 0, "Only block ILU(0) is currently supported.");
2797 if (A.Height() % block_size != 0)
2798 {
2799 MFEM_ABORT("BlockILU: block size must evenly divide the matrix size");
2800 }
2801
2802 int nrows = A.Height();
2803 const int *I = A.GetI();
2804 const int *J = A.GetJ();
2805 const real_t *V = A.GetData();
2806 int nnz = 0;
2807 int nblockrows = nrows / block_size;
2808
2809 std::vector<std::set<int>> unique_block_cols(nblockrows);
2810
2811 for (int iblock = 0; iblock < nblockrows; ++iblock)
2812 {
2813 for (int bi = 0; bi < block_size; ++bi)
2814 {
2815 int i = iblock * block_size + bi;
2816 for (int k = I[i]; k < I[i + 1]; ++k)
2817 {
2818 unique_block_cols[iblock].insert(J[k] / block_size);
2819 }
2820 }
2821 nnz += unique_block_cols[iblock].size();
2822 }
2823
2824 if (reordering != Reordering::NONE)
2825 {
2826 SparseMatrix C(nblockrows, nblockrows);
2827 for (int iblock = 0; iblock < nblockrows; ++iblock)
2828 {
2829 for (int jblock : unique_block_cols[iblock])
2830 {
2831 for (int bi = 0; bi < block_size; ++bi)
2832 {
2833 int i = iblock * block_size + bi;
2834 for (int k = I[i]; k < I[i + 1]; ++k)
2835 {
2836 int j = J[k];
2837 if (j >= jblock * block_size && j < (jblock + 1) * block_size)
2838 {
2839 C.Add(iblock, jblock, V[k]*V[k]);
2840 }
2841 }
2842 }
2843 }
2844 }
2845 C.Finalize(false);
2846 real_t *CV = C.GetData();
2847 for (int i=0; i<C.NumNonZeroElems(); ++i)
2848 {
2849 CV[i] = sqrt(CV[i]);
2850 }
2851
2852 switch (reordering)
2853 {
2856 break;
2857 default:
2858 MFEM_ABORT("BlockILU: unknown reordering")
2859 }
2860 }
2861 else
2862 {
2863 // No reordering: permutation is identity
2864 P.SetSize(nblockrows);
2865 for (int i=0; i<nblockrows; ++i)
2866 {
2867 P[i] = i;
2868 }
2869 }
2870
2871 // Compute inverse permutation
2872 Pinv.SetSize(nblockrows);
2873 for (int i=0; i<nblockrows; ++i)
2874 {
2875 Pinv[P[i]] = i;
2876 }
2877
2878 // Permute columns
2879 std::vector<std::vector<int>> unique_block_cols_perminv(nblockrows);
2880 for (int i=0; i<nblockrows; ++i)
2881 {
2882 std::vector<int> &cols = unique_block_cols_perminv[i];
2883 for (int j : unique_block_cols[P[i]])
2884 {
2885 cols.push_back(Pinv[j]);
2886 }
2887 std::sort(cols.begin(), cols.end());
2888 }
2889
2890 ID.SetSize(nblockrows);
2891 IB.SetSize(nblockrows + 1);
2892 IB[0] = 0;
2893 JB.SetSize(nnz);
2894 AB.SetSize(block_size, block_size, nnz);
2895 DB.SetSize(block_size, block_size, nblockrows);
2896 AB = 0.0;
2897 DB = 0.0;
2898 ipiv.SetSize(block_size*nblockrows);
2899 int counter = 0;
2900
2901 for (int iblock = 0; iblock < nblockrows; ++iblock)
2902 {
2903 int iblock_perm = P[iblock];
2904 for (int jblock : unique_block_cols_perminv[iblock])
2905 {
2906 int jblock_perm = P[jblock];
2907 if (iblock == jblock)
2908 {
2909 ID[iblock] = counter;
2910 }
2911 JB[counter] = jblock;
2912 for (int bi = 0; bi < block_size; ++bi)
2913 {
2914 int i = iblock_perm*block_size + bi;
2915 for (int k = I[i]; k < I[i + 1]; ++k)
2916 {
2917 int j = J[k];
2918 if (j >= jblock_perm*block_size && j < (jblock_perm + 1)*block_size)
2919 {
2920 int bj = j - jblock_perm*block_size;
2921 real_t val = V[k];
2922 AB(bi, bj, counter) = val;
2923 // Extract the diagonal
2924 if (iblock == jblock)
2925 {
2926 DB(bi, bj, iblock) = val;
2927 }
2928 }
2929 }
2930 }
2931 ++counter;
2932 }
2933 IB[iblock + 1] = counter;
2934 }
2935}
2936
2937void BlockILU::Factorize()
2938{
2939 int nblockrows = Height()/block_size;
2940
2941 // Precompute LU factorization of diagonal blocks
2942 for (int i=0; i<nblockrows; ++i)
2943 {
2944 LUFactors factorization(DB.GetData(i), &ipiv[i*block_size]);
2945 factorization.Factor(block_size);
2946 }
2947
2948 // Note: we use UseExternalData to extract submatrices from the tensor AB
2949 // instead of the DenseTensor call operator, because the call operator does
2950 // not allow for two simultaneous submatrix views into the same tensor
2951 DenseMatrix A_ik, A_ij, A_kj;
2952 // Loop over block rows (starting with second block row)
2953 for (int i=1; i<nblockrows; ++i)
2954 {
2955 // Find all nonzeros to the left of the diagonal in row i
2956 for (int kk=IB[i]; kk<IB[i+1]; ++kk)
2957 {
2958 int k = JB[kk];
2959 // Make sure we're still to the left of the diagonal
2960 if (k == i) { break; }
2961 if (k > i)
2962 {
2963 MFEM_ABORT("Matrix must be sorted with nonzero diagonal");
2964 }
2965 LUFactors A_kk_inv(DB.GetData(k), &ipiv[k*block_size]);
2966 A_ik.UseExternalData(&AB(0,0,kk), block_size, block_size);
2967 // A_ik = A_ik * A_kk^{-1}
2968 A_kk_inv.RightSolve(block_size, block_size, A_ik.GetData());
2969 // Modify everything to the right of k in row i
2970 for (int jj=kk+1; jj<IB[i+1]; ++jj)
2971 {
2972 int j = JB[jj];
2973 if (j <= k) { continue; } // Superfluous because JB is sorted?
2974 A_ij.UseExternalData(&AB(0,0,jj), block_size, block_size);
2975 for (int ll=IB[k]; ll<IB[k+1]; ++ll)
2976 {
2977 int l = JB[ll];
2978 if (l == j)
2979 {
2980 A_kj.UseExternalData(&AB(0,0,ll), block_size, block_size);
2981 // A_ij = A_ij - A_ik*A_kj;
2982 AddMult_a(-1.0, A_ik, A_kj, A_ij);
2983 // If we need to, update diagonal factorization
2984 if (j == i)
2985 {
2986 DB(i) = A_ij;
2987 LUFactors factorization(DB.GetData(i), &ipiv[i*block_size]);
2988 factorization.Factor(block_size);
2989 }
2990 break;
2991 }
2992 }
2993 }
2994 }
2995 }
2996}
2997
2998void BlockILU::Mult(const Vector &b, Vector &x) const
2999{
3000 MFEM_ASSERT(height > 0, "BlockILU(0) preconditioner is not constructed");
3001 int nblockrows = Height()/block_size;
3002 y.SetSize(Height());
3003
3004 DenseMatrix B;
3005 Vector yi, yj, xi, xj;
3006 Vector tmp(block_size);
3007 // Forward substitute to solve Ly = b
3008 // Implicitly, L has identity on the diagonal
3009 y = 0.0;
3010 for (int i=0; i<nblockrows; ++i)
3011 {
3012 yi.SetDataAndSize(&y[i*block_size], block_size);
3013 for (int ib=0; ib<block_size; ++ib)
3014 {
3015 yi[ib] = b[ib + P[i]*block_size];
3016 }
3017 for (int k=IB[i]; k<ID[i]; ++k)
3018 {
3019 int j = JB[k];
3020 const DenseMatrix &L_ij = AB(k);
3021 yj.SetDataAndSize(&y[j*block_size], block_size);
3022 // y_i = y_i - L_ij*y_j
3023 L_ij.AddMult_a(-1.0, yj, yi);
3024 }
3025 }
3026 // Backward substitution to solve Ux = y
3027 for (int i=nblockrows-1; i >= 0; --i)
3028 {
3029 xi.SetDataAndSize(&x[P[i]*block_size], block_size);
3030 for (int ib=0; ib<block_size; ++ib)
3031 {
3032 xi[ib] = y[ib + i*block_size];
3033 }
3034 for (int k=ID[i]+1; k<IB[i+1]; ++k)
3035 {
3036 int j = JB[k];
3037 const DenseMatrix &U_ij = AB(k);
3038 xj.SetDataAndSize(&x[P[j]*block_size], block_size);
3039 // x_i = x_i - U_ij*x_j
3040 U_ij.AddMult_a(-1.0, xj, xi);
3041 }
3042 LUFactors A_ii_inv(&DB(0,0,i), &ipiv[i*block_size]);
3043 // x_i = D_ii^{-1} x_i
3044 A_ii_inv.Solve(block_size, 1, xi.GetData());
3045 }
3046}
3047
3048
3050 int it, real_t norm, const Vector &r, bool final)
3051{
3052 if (!ess_dofs_list) { return; }
3053
3054 real_t bc_norm_squared = 0.0;
3055 r.HostRead();
3057 for (int i = 0; i < ess_dofs_list->Size(); i++)
3058 {
3059 const real_t r_entry = r((*ess_dofs_list)[i]);
3060 bc_norm_squared += r_entry*r_entry;
3061 }
3062 bool print = true;
3063#ifdef MFEM_USE_MPI
3064 MPI_Comm comm = iter_solver->GetComm();
3065 if (comm != MPI_COMM_NULL)
3066 {
3067 double glob_bc_norm_squared = 0.0;
3068 MPI_Reduce(&bc_norm_squared, &glob_bc_norm_squared, 1,
3070 MPI_SUM, 0, comm);
3071 bc_norm_squared = glob_bc_norm_squared;
3072 int rank;
3073 MPI_Comm_rank(comm, &rank);
3074 print = (rank == 0);
3075 }
3076#endif
3077 if ((it == 0 || final || bc_norm_squared > 0.0) && print)
3078 {
3079 mfem::out << " ResidualBCMonitor : b.c. residual norm = "
3080 << sqrt(bc_norm_squared) << endl;
3081 }
3082}
3083
3084
3085#ifdef MFEM_USE_SUITESPARSE
3086
3088{
3089 mat = NULL;
3090 Numeric = NULL;
3091 AI = AJ = NULL;
3092 if (!use_long_ints)
3093 {
3094 umfpack_di_defaults(Control);
3095 }
3096 else
3097 {
3098 umfpack_dl_defaults(Control);
3099 }
3100}
3101
3103{
3104 void *Symbolic;
3105
3106 if (Numeric)
3107 {
3108 if (!use_long_ints)
3109 {
3110 umfpack_di_free_numeric(&Numeric);
3111 }
3112 else
3113 {
3114 umfpack_dl_free_numeric(&Numeric);
3115 }
3116 }
3117
3118 mat = const_cast<SparseMatrix *>(dynamic_cast<const SparseMatrix *>(&op));
3119 MFEM_VERIFY(mat, "not a SparseMatrix");
3120
3121 // UMFPack requires that the column-indices in mat corresponding to each
3122 // row be sorted.
3123 // Generally, this will modify the ordering of the entries of mat.
3125
3126 height = mat->Height();
3127 width = mat->Width();
3128 MFEM_VERIFY(width == height, "not a square matrix");
3129
3130 const int * Ap = mat->HostReadI();
3131 const int * Ai = mat->HostReadJ();
3132 const real_t * Ax = mat->HostReadData();
3133
3134 if (!use_long_ints)
3135 {
3136 int status = umfpack_di_symbolic(width, width, Ap, Ai, Ax, &Symbolic,
3137 Control, Info);
3138 if (status < 0)
3139 {
3140 umfpack_di_report_info(Control, Info);
3141 umfpack_di_report_status(Control, status);
3142 mfem_error("UMFPackSolver::SetOperator :"
3143 " umfpack_di_symbolic() failed!");
3144 }
3145
3146 status = umfpack_di_numeric(Ap, Ai, Ax, Symbolic, &Numeric,
3147 Control, Info);
3148 if (status < 0)
3149 {
3150 umfpack_di_report_info(Control, Info);
3151 umfpack_di_report_status(Control, status);
3152 mfem_error("UMFPackSolver::SetOperator :"
3153 " umfpack_di_numeric() failed!");
3154 }
3155 umfpack_di_free_symbolic(&Symbolic);
3156 }
3157 else
3158 {
3159 SuiteSparse_long status;
3160
3161 delete [] AJ;
3162 delete [] AI;
3163 AI = new SuiteSparse_long[width + 1];
3164 AJ = new SuiteSparse_long[Ap[width]];
3165 for (int i = 0; i <= width; i++)
3166 {
3167 AI[i] = (SuiteSparse_long)(Ap[i]);
3168 }
3169 for (int i = 0; i < Ap[width]; i++)
3170 {
3171 AJ[i] = (SuiteSparse_long)(Ai[i]);
3172 }
3173
3174 status = umfpack_dl_symbolic(width, width, AI, AJ, Ax, &Symbolic,
3175 Control, Info);
3176 if (status < 0)
3177 {
3178 umfpack_dl_report_info(Control, Info);
3179 umfpack_dl_report_status(Control, status);
3180 mfem_error("UMFPackSolver::SetOperator :"
3181 " umfpack_dl_symbolic() failed!");
3182 }
3183
3184 status = umfpack_dl_numeric(AI, AJ, Ax, Symbolic, &Numeric,
3185 Control, Info);
3186 if (status < 0)
3187 {
3188 umfpack_dl_report_info(Control, Info);
3189 umfpack_dl_report_status(Control, status);
3190 mfem_error("UMFPackSolver::SetOperator :"
3191 " umfpack_dl_numeric() failed!");
3192 }
3193 umfpack_dl_free_symbolic(&Symbolic);
3194 }
3195}
3196
3197void UMFPackSolver::Mult(const Vector &b, Vector &x) const
3198{
3199 if (mat == NULL)
3200 mfem_error("UMFPackSolver::Mult : matrix is not set!"
3201 " Call SetOperator first!");
3202 b.HostRead();
3203 x.HostReadWrite();
3204 if (!use_long_ints)
3205 {
3206 int status =
3207 umfpack_di_solve(UMFPACK_At, mat->HostReadI(), mat->HostReadJ(),
3208 mat->HostReadData(), x.HostWrite(), b.HostRead(),
3209 Numeric, Control, Info);
3210 umfpack_di_report_info(Control, Info);
3211 if (status < 0)
3212 {
3213 umfpack_di_report_status(Control, status);
3214 mfem_error("UMFPackSolver::Mult : umfpack_di_solve() failed!");
3215 }
3216 }
3217 else
3218 {
3219 SuiteSparse_long status =
3220 umfpack_dl_solve(UMFPACK_At, AI, AJ, mat->HostReadData(),
3221 x.HostWrite(), b.HostRead(), Numeric, Control,
3222 Info);
3223 umfpack_dl_report_info(Control, Info);
3224 if (status < 0)
3225 {
3226 umfpack_dl_report_status(Control, status);
3227 mfem_error("UMFPackSolver::Mult : umfpack_dl_solve() failed!");
3228 }
3229 }
3230}
3231
3233{
3234 if (mat == NULL)
3235 mfem_error("UMFPackSolver::MultTranspose : matrix is not set!"
3236 " Call SetOperator first!");
3237 b.HostRead();
3238 x.HostReadWrite();
3239 if (!use_long_ints)
3240 {
3241 int status =
3242 umfpack_di_solve(UMFPACK_A, mat->HostReadI(), mat->HostReadJ(),
3243 mat->HostReadData(), x.HostWrite(), b.HostRead(),
3244 Numeric, Control, Info);
3245 umfpack_di_report_info(Control, Info);
3246 if (status < 0)
3247 {
3248 umfpack_di_report_status(Control, status);
3249 mfem_error("UMFPackSolver::MultTranspose :"
3250 " umfpack_di_solve() failed!");
3251 }
3252 }
3253 else
3254 {
3255 SuiteSparse_long status =
3256 umfpack_dl_solve(UMFPACK_A, AI, AJ, mat->HostReadData(),
3257 x.HostWrite(), b.HostRead(), Numeric, Control,
3258 Info);
3259 umfpack_dl_report_info(Control, Info);
3260 if (status < 0)
3261 {
3262 umfpack_dl_report_status(Control, status);
3263 mfem_error("UMFPackSolver::MultTranspose :"
3264 " umfpack_dl_solve() failed!");
3265 }
3266 }
3267}
3268
3270{
3271 delete [] AJ;
3272 delete [] AI;
3273 if (Numeric)
3274 {
3275 if (!use_long_ints)
3276 {
3277 umfpack_di_free_numeric(&Numeric);
3278 }
3279 else
3280 {
3281 umfpack_dl_free_numeric(&Numeric);
3282 }
3283 }
3284}
3285
3287{
3288 klu_defaults(&Common);
3289}
3290
3292{
3293 if (Numeric)
3294 {
3295 MFEM_ASSERT(Symbolic != 0,
3296 "Had Numeric pointer in KLU, but not Symbolic");
3297 klu_free_symbolic(&Symbolic, &Common);
3298 Symbolic = 0;
3299 klu_free_numeric(&Numeric, &Common);
3300 Numeric = 0;
3301 }
3302
3303 mat = const_cast<SparseMatrix *>(dynamic_cast<const SparseMatrix *>(&op));
3304 MFEM_VERIFY(mat != NULL, "not a SparseMatrix");
3305
3306 // KLU requires that the column-indices in mat corresponding to each row be
3307 // sorted. Generally, this will modify the ordering of the entries of mat.
3309
3310 height = mat->Height();
3311 width = mat->Width();
3312 MFEM_VERIFY(width == height, "not a square matrix");
3313
3314 int * Ap = mat->GetI();
3315 int * Ai = mat->GetJ();
3316 real_t * Ax = mat->GetData();
3317
3318 Symbolic = klu_analyze( height, Ap, Ai, &Common);
3319 Numeric = klu_factor(Ap, Ai, Ax, Symbolic, &Common);
3320}
3321
3322void KLUSolver::Mult(const Vector &b, Vector &x) const
3323{
3324 MFEM_VERIFY(mat != NULL,
3325 "KLUSolver::Mult : matrix is not set! Call SetOperator first!");
3326
3327 int n = mat->Height();
3328 int numRhs = 1;
3329 // Copy B into X, so we can pass it in and overwrite it.
3330 x = b;
3331 // Solve the transpose, since KLU thinks the matrix is compressed column
3332 // format.
3333 klu_tsolve( Symbolic, Numeric, n, numRhs, x.GetData(), &Common);
3334}
3335
3337{
3338 MFEM_VERIFY(mat != NULL,
3339 "KLUSolver::Mult : matrix is not set! Call SetOperator first!");
3340
3341 int n = mat->Height();
3342 int numRhs = 1;
3343 // Copy B into X, so we can pass it in and overwrite it.
3344 x = b;
3345 // Solve the regular matrix, not the transpose, since KLU thinks the matrix
3346 // is compressed column format.
3347 klu_solve( Symbolic, Numeric, n, numRhs, x.GetData(), &Common);
3348}
3349
3351{
3352 klu_free_symbolic (&Symbolic, &Common) ;
3353 klu_free_numeric (&Numeric, &Common) ;
3354 Symbolic = 0;
3355 Numeric = 0;
3356}
3357
3358#endif // MFEM_USE_SUITESPARSE
3359
3361 const SparseMatrix &block_dof_)
3362 : Solver(A.NumRows()), block_dof(const_cast<SparseMatrix&>(block_dof_)),
3363 block_solvers(new DenseMatrixInverse[block_dof.NumRows()])
3364{
3365 DenseMatrix sub_A;
3366 for (int i = 0; i < block_dof.NumRows(); ++i)
3367 {
3368 local_dofs.MakeRef(block_dof.GetRowColumns(i), block_dof.RowSize(i));
3369 sub_A.SetSize(local_dofs.Size());
3370 A.GetSubMatrix(local_dofs, local_dofs, sub_A);
3371 block_solvers[i].SetOperator(sub_A);
3372 }
3373}
3374
3376{
3377 y.SetSize(x.Size());
3378 y = 0.0;
3379
3380 for (int i = 0; i < block_dof.NumRows(); ++i)
3381 {
3382 local_dofs.MakeRef(block_dof.GetRowColumns(i), block_dof.RowSize(i));
3383 x.GetSubVector(local_dofs, sub_rhs);
3384 sub_sol.SetSize(local_dofs.Size());
3385 block_solvers[i].Mult(sub_rhs, sub_sol);
3386 y.AddElementVector(local_dofs, sub_sol);
3387 }
3388}
3389
3390void ProductSolver::Mult(const Vector & x, Vector & y) const
3391{
3392 y.SetSize(x.Size());
3393 y = 0.0;
3394 S0->Mult(x, y);
3395
3396 Vector z(x.Size());
3397 z = 0.0;
3398 A->Mult(y, z);
3399 add(-1.0, z, 1.0, x, z); // z = (I - A * S0) x
3400
3401 Vector S1z(x.Size());
3402 S1z = 0.0;
3403 S1->Mult(z, S1z);
3404 y += S1z;
3405}
3406
3408{
3409 y.SetSize(x.Size());
3410 y = 0.0;
3411 S1->MultTranspose(x, y);
3412
3413 Vector z(x.Size());
3414 z = 0.0;
3415 A->MultTranspose(y, z);
3416 add(-1.0, z, 1.0, x, z); // z = (I - A^T * S1^T) x
3417
3418 Vector S0Tz(x.Size());
3419 S0Tz = 0.0;
3420 S0->MultTranspose(z, S0Tz);
3421 y += S0Tz;
3422}
3423
3425 : Solver(0, false), global_size(-1)
3426#ifdef MFEM_USE_MPI
3427 , parallel(false)
3428#endif
3429{ }
3430
3431#ifdef MFEM_USE_MPI
3432OrthoSolver::OrthoSolver(MPI_Comm mycomm_)
3433 : Solver(0, false), mycomm(mycomm_), global_size(-1), parallel(true) { }
3434#endif
3435
3437{
3438 solver = &s;
3439 height = s.Height();
3440 width = s.Width();
3441 MFEM_VERIFY(height == width, "Solver must be a square Operator!");
3442 global_size = -1; // lazy evaluated
3443}
3444
3446{
3447 MFEM_VERIFY(solver, "Solver hasn't been set, call SetSolver() first.");
3448 solver->SetOperator(op);
3449 height = solver->Height();
3450 width = solver->Width();
3451 MFEM_VERIFY(height == width, "Solver must be a square Operator!");
3452 global_size = -1; // lazy evaluated
3453}
3454
3455void OrthoSolver::Mult(const Vector &b, Vector &x) const
3456{
3457 MFEM_VERIFY(solver, "Solver hasn't been set, call SetSolver() first.");
3458 MFEM_VERIFY(height == solver->Height(),
3459 "solver was modified externally! call SetSolver() again!");
3460 MFEM_VERIFY(height == b.Size(), "incompatible input Vector size!");
3461 MFEM_VERIFY(height == x.Size(), "incompatible output Vector size!");
3462
3463 // Orthogonalize input
3464 Orthogonalize(b, b_ortho);
3465
3466 // Propagate iterative_mode to the solver:
3468
3469 // Apply the Solver
3470 solver->Mult(b_ortho, x);
3471
3472 // Orthogonalize output
3473 Orthogonalize(x, x);
3474}
3475
3476void OrthoSolver::Orthogonalize(const Vector &v, Vector &v_ortho) const
3477{
3478 if (global_size == -1)
3479 {
3480 global_size = height;
3481#ifdef MFEM_USE_MPI
3482 if (parallel)
3483 {
3484 MPI_Allreduce(MPI_IN_PLACE, &global_size, 1, HYPRE_MPI_BIG_INT,
3485 MPI_SUM, mycomm);
3486 }
3487#endif
3488 }
3489
3490 // TODO: GPU/device implementation
3491
3492 real_t global_sum = v.Sum();
3493
3494#ifdef MFEM_USE_MPI
3495 if (parallel)
3496 {
3497 MPI_Allreduce(MPI_IN_PLACE, &global_sum, 1, MPITypeMap<real_t>::mpi_type,
3498 MPI_SUM, mycomm);
3499 }
3500#endif
3501
3502 real_t ratio = global_sum / static_cast<real_t>(global_size);
3503 v_ortho.SetSize(v.Size());
3504 v.HostRead();
3505 v_ortho.HostWrite();
3506 for (int i = 0; i < v_ortho.Size(); ++i)
3507 {
3508 v_ortho(i) = v(i) - ratio;
3509 }
3510}
3511
3512#ifdef MFEM_USE_MPI
3514 HypreParMatrix *aux_map,
3515 bool op_is_symmetric,
3516 bool own_aux_map)
3517 : Solver(op.NumRows()), aux_map_(aux_map, own_aux_map)
3518{
3519 aux_system_.Reset(RAP(&op, aux_map));
3520 aux_system_.As<HypreParMatrix>()->EliminateZeroRows();
3521 aux_smoother_.Reset(new HypreSmoother(*aux_system_.As<HypreParMatrix>()));
3522 aux_smoother_.As<HypreSmoother>()->SetOperatorSymmetry(op_is_symmetric);
3523}
3524
3525void AuxSpaceSmoother::Mult(const Vector &x, Vector &y, bool transpose) const
3526{
3527 Vector aux_rhs(aux_map_->NumCols());
3528 aux_map_->MultTranspose(x, aux_rhs);
3529
3530 Vector aux_sol(aux_rhs.Size());
3531 if (transpose)
3532 {
3533 aux_smoother_->MultTranspose(aux_rhs, aux_sol);
3534 }
3535 else
3536 {
3537 aux_smoother_->Mult(aux_rhs, aux_sol);
3538 }
3539
3540 y.SetSize(aux_map_->NumRows());
3541 aux_map_->Mult(aux_sol, y);
3542}
3543#endif // MFEM_USE_MPI
3544
3545#ifdef MFEM_USE_LAPACK
3546// LAPACK routines for NNLSSolver
3547#ifdef MFEM_USE_SINGLE
3548extern "C" void
3549sormqr_(char *, char *, int *, int *, int *, float *, int*, float *,
3550 float *, int *, float *, int*, int*);
3551
3552extern "C" void
3553sgeqrf_(int *, int *, float *, int *, float *, float *, int *, int *);
3554
3555extern "C" void
3556sgemv_(char *, int *, int *, float *, float *, int *, float *, int *,
3557 float *, float *, int *);
3558
3559extern "C" void
3560strsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n,
3561 float *alpha, float *a, int *lda, float *b, int *ldb);
3562#elif defined MFEM_USE_DOUBLE
3563extern "C" void
3564dormqr_(char *, char *, int *, int *, int *, double *, int*, double *,
3565 double *, int *, double *, int*, int*);
3566
3567extern "C" void
3568dgeqrf_(int *, int *, double *, int *, double *, double *, int *, int *);
3569
3570extern "C" void
3571dgemv_(char *, int *, int *, double *, double *, int *, double *, int *,
3572 double *, double *, int *);
3573
3574extern "C" void
3575dtrsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n,
3576 double *alpha, double *a, int *lda, double *b, int *ldb);
3577#endif
3578
3580 : Solver(0), mat(nullptr), const_tol_(1.0e-14), min_nnz_(0),
3581 max_nnz_(0), verbosity_(0), res_change_termination_tol_(1.0e-4),
3582 zero_tol_(1.0e-14), rhs_delta_(1.0e-11), n_outer_(100000),
3583 n_inner_(100000), nStallCheck_(100), normalize_(true),
3584 NNLS_qrres_on_(false), qr_residual_mode_(QRresidualMode::hybrid)
3585{}
3586
3588{
3589 mat = dynamic_cast<const DenseMatrix*>(&op);
3590 MFEM_VERIFY(mat, "NNLSSolver operator must be of type DenseMatrix");
3591
3592 // The size of this operator is that of the transpose of op.
3593 height = op.Width();
3594 width = op.Height();
3595
3596 row_scaling_.SetSize(mat->NumRows());
3597 row_scaling_ = 1.0;
3598}
3599
3601{
3602 qr_residual_mode_ = qr_residual_mode;
3603 if (qr_residual_mode_ == QRresidualMode::on)
3604 {
3605 NNLS_qrres_on_ = true;
3606 }
3607}
3608
3610{
3611 // Scale everything so that rescaled half gap is the same for all constraints
3612 const int m = mat->NumRows();
3613
3614 MFEM_VERIFY(rhs_lb.Size() == m && rhs_ub.Size() == m, "");
3615
3616 Vector rhs_avg = rhs_ub;
3617 rhs_avg += rhs_lb;
3618 rhs_avg *= 0.5;
3619
3620 Vector rhs_halfgap = rhs_ub;
3621 rhs_halfgap -= rhs_lb;
3622 rhs_halfgap *= 0.5;
3623
3624 Vector rhs_avg_glob = rhs_avg;
3625 Vector rhs_halfgap_glob = rhs_halfgap;
3626 Vector halfgap_target(m);
3627 halfgap_target = 1.0e3 * const_tol_;
3628
3629 row_scaling_.SetSize(m);
3630
3631 for (int i=0; i<m; ++i)
3632 {
3633 const real_t s = halfgap_target(i) / rhs_halfgap_glob(i);
3634 row_scaling_[i] = s;
3635
3636 rhs_lb(i) = (rhs_avg(i) * s) - halfgap_target(i);
3637 rhs_ub(i) = (rhs_avg(i) * s) + halfgap_target(i);
3638 }
3639}
3640
3641void NNLSSolver::Mult(const Vector &w, Vector &sol) const
3642{
3643 MFEM_VERIFY(mat, "NNLSSolver operator must be of type DenseMatrix");
3644 Vector rhs_ub(mat->NumRows());
3645 mat->Mult(w, rhs_ub);
3646 rhs_ub *= row_scaling_;
3647
3648 Vector rhs_lb(rhs_ub);
3649 Vector rhs_Gw(rhs_ub);
3650
3651 for (int i=0; i<rhs_ub.Size(); ++i)
3652 {
3653 rhs_lb(i) -= rhs_delta_;
3654 rhs_ub(i) += rhs_delta_;
3655 }
3656
3657 if (normalize_) { NormalizeConstraints(rhs_lb, rhs_ub); }
3658 Solve(rhs_lb, rhs_ub, sol);
3659
3660 if (verbosity_ > 1)
3661 {
3662 int nnz = 0;
3663 for (int i=0; i<sol.Size(); ++i)
3664 {
3665 if (sol(i) != 0.0)
3666 {
3667 nnz++;
3668 }
3669 }
3670
3671 mfem::out << "Number of nonzeros in NNLSSolver solution: " << nnz
3672 << ", out of " << sol.Size() << endl;
3673
3674 // Check residual of NNLS solution
3675 Vector res(mat->NumRows());
3676 mat->Mult(sol, res);
3677 res *= row_scaling_;
3678
3679 const real_t normGsol = res.Norml2();
3680 const real_t normRHS = rhs_Gw.Norml2();
3681
3682 res -= rhs_Gw;
3683 const real_t relNorm = res.Norml2() / std::max(normGsol, normRHS);
3684 mfem::out << "Relative residual norm for NNLSSolver solution of Gs = Gw: "
3685 << relNorm << endl;
3686 }
3687}
3688
3689void NNLSSolver::Solve(const Vector& rhs_lb, const Vector& rhs_ub,
3690 Vector& soln) const
3691{
3692 int m = mat->NumRows();
3693 int n = mat->NumCols();
3694
3695 MFEM_VERIFY(rhs_lb.Size() == m && rhs_lb.Size() == m && soln.Size() == n, "");
3696 MFEM_VERIFY(n >= m, "NNLSSolver system cannot be over-determined.");
3697
3698 if (max_nnz_ == 0)
3699 {
3700 max_nnz_ = mat->NumCols();
3701 }
3702
3703 // Prepare right hand side
3704 Vector rhs_avg(rhs_ub);
3705 rhs_avg += rhs_lb;
3706 rhs_avg *= 0.5;
3707
3708 Vector rhs_halfgap(rhs_ub);
3709 rhs_halfgap -= rhs_lb;
3710 rhs_halfgap *= 0.5;
3711
3712 Vector rhs_avg_glob(rhs_avg);
3713 Vector rhs_halfgap_glob(rhs_halfgap);
3714
3715 int ione = 1;
3716 real_t fone = 1.0;
3717
3718 char lside = 'L';
3719 char trans = 'T';
3720 char notrans = 'N';
3721
3722 std::vector<unsigned int> nz_ind(m);
3723 Vector res_glob(m);
3724 Vector mu(n);
3725 Vector mu2(n);
3726 int n_nz_ind = 0;
3727 int n_glob = 0;
3728 int m_update;
3729 int min_nnz_cap = std::min(static_cast<int>(min_nnz_), std::min(m,n));
3730 int info;
3731 std::vector<real_t> l2_res_hist;
3732 std::vector<unsigned int> stalled_indices;
3733 int stalledFlag = 0;
3734 int num_stalled = 0;
3735 int nz_ind_zero = 0;
3736
3737 Vector soln_nz_glob(m);
3738 Vector soln_nz_glob_up(m);
3739
3740 // The following matrices are stored in column-major format as Vectors
3741 Vector mat_0_data(m * n);
3742 Vector mat_qr_data(m * n);
3743 Vector submat_data(m * n);
3744
3745 Vector tau(n);
3746 Vector sub_tau = tau;
3747 Vector vec1(m);
3748
3749 // Temporary work arrays
3750 int lwork;
3751 std::vector<real_t> work;
3752 int n_outer_iter = 0;
3753 int n_total_inner_iter = 0;
3754 int i_qr_start;
3755 int n_update;
3756 // 0 = converged; 1 = maximum iterations reached;
3757 // 2 = NNLS stalled (no change in residual for many iterations)
3758 int exit_flag = 1;
3759
3760 res_glob = rhs_avg_glob;
3761 Vector qt_rhs_glob = rhs_avg_glob;
3762 Vector qqt_rhs_glob = qt_rhs_glob;
3763 Vector sub_qt = rhs_avg_glob;
3764
3765 // Compute threshold tolerance for the Lagrange multiplier mu
3766 real_t mu_tol = 0.0;
3767
3768 {
3769 Vector rhs_scaled(rhs_halfgap_glob);
3770 Vector tmp(n);
3771 rhs_scaled *= row_scaling_;
3772 mat->MultTranspose(rhs_scaled, tmp);
3773
3774 mu_tol = 1.0e-15 * tmp.Max();
3775 }
3776
3777 real_t rmax = 0.0;
3778 real_t mumax = 0.0;
3779
3780 for (int oiter = 0; oiter < n_outer_; ++oiter)
3781 {
3782 stalledFlag = 0;
3783
3784 rmax = fabs(res_glob(0)) - rhs_halfgap_glob(0);
3785 for (int i=1; i<m; ++i)
3786 {
3787 rmax = std::max(rmax, fabs(res_glob(i)) - rhs_halfgap_glob(i));
3788 }
3789
3790 l2_res_hist.push_back(res_glob.Norml2());
3791
3792 if (verbosity_ > 1)
3793 {
3794 mfem::out << "NNLS " << oiter << " " << n_total_inner_iter << " " << m
3795 << " " << n << " " << n_glob << " " << rmax << " "
3796 << l2_res_hist[oiter] << endl;
3797 }
3798 if (rmax <= const_tol_ && n_glob >= min_nnz_cap)
3799 {
3800 if (verbosity_ > 1)
3801 {
3802 mfem::out << "NNLS target tolerance met" << endl;
3803 }
3804 exit_flag = 0;
3805 break;
3806 }
3807
3808 if (n_glob >= max_nnz_)
3809 {
3810 if (verbosity_ > 1)
3811 {
3812 mfem::out << "NNLS target nnz met" << endl;
3813 }
3814 exit_flag = 0;
3815 break;
3816 }
3817
3818 if (n_glob >= m)
3819 {
3820 if (verbosity_ > 1)
3821 {
3822 mfem::out << "NNLS system is square... exiting" << endl;
3823 }
3824 exit_flag = 3;
3825 break;
3826 }
3827
3828 // Check for stall after the first nStallCheck iterations
3829 if (oiter > nStallCheck_)
3830 {
3831 real_t mean0 = 0.0;
3832 real_t mean1 = 0.0;
3833 for (int i=0; i<nStallCheck_/2; ++i)
3834 {
3835 mean0 += l2_res_hist[oiter - i];
3836 mean1 += l2_res_hist[oiter - (nStallCheck_) - i];
3837 }
3838
3839 real_t mean_res_change = (mean1 / mean0) - 1.0;
3840 if (std::abs(mean_res_change) < res_change_termination_tol_)
3841 {
3842 if (verbosity_ > 1)
3843 {
3844 mfem::out << "NNLSSolver stall detected... exiting" << endl;
3845 }
3846 exit_flag = 2;
3847 break;
3848 }
3849 }
3850
3851 // Find the next index
3852 res_glob *= row_scaling_;
3853 mat->MultTranspose(res_glob, mu);
3854
3855 for (int i = 0; i < n_nz_ind; ++i)
3856 {
3857 mu(nz_ind[i]) = 0.0;
3858 }
3859 for (unsigned int i = 0; i < stalled_indices.size(); ++i)
3860 {
3861 mu(stalled_indices[i]) = 0.0;
3862 }
3863
3864 mumax = mu.Max();
3865
3866 if (mumax < mu_tol)
3867 {
3868 num_stalled = stalled_indices.size();
3869 if (num_stalled > 0)
3870 {
3871 if (verbosity_ > 0)
3872 {
3873 mfem::out << "NNLS Lagrange multiplier is below the minimum "
3874 << "threshold: mumax = " << mumax << ", mutol = "
3875 << mu_tol << "\n" << " Resetting stalled indices "
3876 << "vector of size " << num_stalled << "\n";
3877 }
3878 stalled_indices.resize(0);
3879
3880 mat->MultTranspose(res_glob, mu);
3881
3882 for (int i = 0; i < n_nz_ind; ++i)
3883 {
3884 mu(nz_ind[i]) = 0.0;
3885 }
3886
3887 mumax = mu.Max();
3888 }
3889 }
3890
3891 int imax = 0;
3892 {
3893 real_t tmax = mu(0);
3894 for (int i=1; i<n; ++i)
3895 {
3896 if (mu(i) > tmax)
3897 {
3898 tmax = mu(i);
3899 imax = i;
3900 }
3901 }
3902 }
3903
3904 // Record the local value of the next index
3905 nz_ind[n_nz_ind] = imax;
3906 ++n_nz_ind;
3907
3908 if (verbosity_ > 2)
3909 {
3910 mfem::out << "Found next index: " << imax << " " << mumax << endl;
3911 }
3912
3913 for (int i=0; i<m; ++i)
3914 {
3915 mat_0_data(i + (n_glob*m)) = (*mat)(i,imax) * row_scaling_[i];
3916 mat_qr_data(i + (n_glob*m)) = mat_0_data(i + (n_glob*m));
3917 }
3918
3919 i_qr_start = n_glob;
3920 ++n_glob; // Increment the size of the global matrix
3921
3922 if (verbosity_ > 2)
3923 {
3924 mfem::out << "Updated matrix with new index" << endl;
3925 }
3926
3927 for (int iiter = 0; iiter < n_inner_; ++iiter)
3928 {
3929 ++n_total_inner_iter;
3930
3931 // Initialize
3932 const bool incremental_update = true;
3933 n_update = n_glob - i_qr_start;
3934 m_update = m - i_qr_start;
3935 if (incremental_update)
3936 {
3937 // Apply Householder reflectors to compute Q^T new_cols
3938 lwork = -1;
3939 work.resize(10);
3940
3941#ifdef MFEM_USE_SINGLE
3942 sormqr_(&lside, &trans, &m, &n_update, &i_qr_start,
3943#elif defined MFEM_USE_DOUBLE
3944 dormqr_(&lside, &trans, &m, &n_update, &i_qr_start,
3945#endif
3946 mat_qr_data.GetData(), &m, tau.GetData(),
3947 mat_qr_data.GetData() + (i_qr_start * m), &m,
3948 work.data(), &lwork, &info);
3949 MFEM_VERIFY(info == 0, ""); // Q^T A update work calculation failed
3950 lwork = static_cast<int>(work[0]);
3951 work.resize(lwork);
3952#ifdef MFEM_USE_SINGLE
3953 sormqr_(&lside, &trans, &m, &n_update, &i_qr_start,
3954#elif defined MFEM_USE_DOUBLE
3955 dormqr_(&lside, &trans, &m, &n_update, &i_qr_start,
3956#endif
3957 mat_qr_data.GetData(), &m, tau.GetData(),
3958 mat_qr_data.GetData() + (i_qr_start * m), &m,
3959 work.data(), &lwork, &info);
3960 MFEM_VERIFY(info == 0, ""); // Q^T A update failed
3961 // Compute QR factorization of the submatrix
3962 lwork = -1;
3963 work.resize(10);
3964
3965 // Copy m_update-by-n_update submatrix of mat_qr_data,
3966 // starting at (i_qr_start, i_qr_start)
3967 for (int i=0; i<m_update; ++i)
3968 for (int j=0; j<n_update; ++j)
3969 {
3970 submat_data[i + (j * m_update)] =
3971 mat_qr_data[i + i_qr_start + ((j + i_qr_start) * m)];
3972 }
3973
3974 // Copy tau subvector of length n_update, starting at i_qr_start
3975 for (int j=0; j<n_update; ++j)
3976 {
3977 sub_tau[j] = tau[i_qr_start + j];
3978 }
3979
3980#ifdef MFEM_USE_SINGLE
3981 sgeqrf_(&m_update, &n_update,
3982#elif defined MFEM_USE_DOUBLE
3983 dgeqrf_(&m_update, &n_update,
3984#endif
3985 submat_data.GetData(), &m_update, sub_tau.GetData(),
3986 work.data(), &lwork, &info);
3987 MFEM_VERIFY(info == 0, ""); // QR update factorization work calc
3988 lwork = static_cast<int>(work[0]);
3989 if (lwork == 0) { lwork = 1; }
3990 work.resize(lwork);
3991#ifdef MFEM_USE_SINGLE
3992 sgeqrf_(&m_update, &n_update,
3993#elif defined MFEM_USE_DOUBLE
3994 dgeqrf_(&m_update, &n_update,
3995#endif
3996 submat_data.GetData(), &m_update, sub_tau.GetData(),
3997 work.data(), &lwork, &info);
3998 MFEM_VERIFY(info == 0, ""); // QR update factorization failed
3999
4000 // Copy result back
4001 for (int i=0; i<m_update; ++i)
4002 for (int j=0; j<n_update; ++j)
4003 {
4004 mat_qr_data[i + i_qr_start + ((j + i_qr_start)* m)] =
4005 submat_data[i + (j * m_update)];
4006 }
4007
4008 for (int j=0; j<n_update; ++j)
4009 {
4010 tau[i_qr_start + j] = sub_tau[j];
4011 }
4012 }
4013 else
4014 {
4015 // Copy everything to mat_qr then do full QR
4016 for (int i=0; i<m; ++i)
4017 for (int j=0; j<n_glob; ++j)
4018 {
4019 mat_qr_data(i + (j*m)) = mat_0_data(i + (j*m));
4020 }
4021
4022 // Compute qr factorization (first find the size of work and then
4023 // perform qr)
4024 lwork = -1;
4025 work.resize(10);
4026#ifdef MFEM_USE_SINGLE
4027 sgeqrf_(&m, &n_glob,
4028#elif defined MFEM_USE_DOUBLE
4029 dgeqrf_(&m, &n_glob,
4030#endif
4031 mat_qr_data.GetData(), &m, tau.GetData(),
4032 work.data(), &lwork, &info);
4033 MFEM_VERIFY(info == 0, ""); // QR factorization work calculation
4034 lwork = static_cast<int>(work[0]);
4035 work.resize(lwork);
4036#ifdef MFEM_USE_SINGLE
4037 sgeqrf_(&m, &n_glob,
4038#elif defined MFEM_USE_DOUBLE
4039 dgeqrf_(&m, &n_glob,
4040#endif
4041 mat_qr_data.GetData(), &m, tau.GetData(),
4042 work.data(), &lwork, &info);
4043 MFEM_VERIFY(info == 0, ""); // QR factorization failed
4044 }
4045
4046 if (verbosity_ > 2)
4047 {
4048 mfem::out << "Updated QR " << iiter << endl;
4049 }
4050
4051 // Apply Householder reflectors to compute Q^T b
4052 if (incremental_update && iiter == 0)
4053 {
4054 lwork = -1;
4055 work.resize(10);
4056
4057 // Copy submatrix of mat_qr_data starting at
4058 // (i_qr_start, i_qr_start), of size m_update-by-1
4059 // Copy submatrix of qt_rhs_glob starting at (i_qr_start, 0),
4060 // of size m_update-by-1
4061
4062 for (int i=0; i<m_update; ++i)
4063 {
4064 submat_data[i] = mat_qr_data[i + i_qr_start + (i_qr_start * m)];
4065 sub_qt[i] = qt_rhs_glob[i + i_qr_start];
4066 }
4067
4068 sub_tau[0] = tau[i_qr_start];
4069
4070#ifdef MFEM_USE_SINGLE
4071 sormqr_(&lside, &trans, &m_update, &ione, &ione,
4072#elif defined MFEM_USE_DOUBLE
4073 dormqr_(&lside, &trans, &m_update, &ione, &ione,
4074#endif
4075 submat_data.GetData(), &m_update, sub_tau.GetData(),
4076 sub_qt.GetData(), &m_update,
4077 work.data(), &lwork, &info);
4078 MFEM_VERIFY(info == 0, ""); // H_last y work calculation failed
4079 lwork = static_cast<int>(work[0]);
4080 work.resize(lwork);
4081#ifdef MFEM_USE_SINGLE
4082 sormqr_(&lside, &trans, &m_update, &ione, &ione,
4083#elif defined MFEM_USE_DOUBLE
4084 dormqr_(&lside, &trans, &m_update, &ione, &ione,
4085#endif
4086 submat_data.GetData(), &m_update, sub_tau.GetData(),
4087 sub_qt.GetData(), &m_update,
4088 work.data(), &lwork, &info);
4089 MFEM_VERIFY(info == 0, ""); // H_last y failed
4090 // Copy result back
4091 for (int i=0; i<m_update; ++i)
4092 {
4093 qt_rhs_glob[i + i_qr_start] = sub_qt[i];
4094 }
4095 }
4096 else
4097 {
4098 // Compute Q^T b from scratch
4099 qt_rhs_glob = rhs_avg_glob;
4100 lwork = -1;
4101 work.resize(10);
4102#ifdef MFEM_USE_SINGLE
4103 sormqr_(&lside, &trans, &m, &ione, &n_glob,
4104#elif defined MFEM_USE_DOUBLE
4105 dormqr_(&lside, &trans, &m, &ione, &n_glob,
4106#endif
4107 mat_qr_data.GetData(), &m, tau.GetData(),
4108 qt_rhs_glob.GetData(), &m,
4109 work.data(), &lwork, &info);
4110 MFEM_VERIFY(info == 0, ""); // Q^T b work calculation failed
4111 lwork = static_cast<int>(work[0]);
4112 work.resize(lwork);
4113#ifdef MFEM_USE_SINGLE
4114 sormqr_(&lside, &trans, &m, &ione, &n_glob,
4115#elif defined MFEM_USE_DOUBLE
4116 dormqr_(&lside, &trans, &m, &ione, &n_glob,
4117#endif
4118 mat_qr_data.GetData(), &m, tau.GetData(),
4119 qt_rhs_glob.GetData(), &m,
4120 work.data(), &lwork, &info);
4121 MFEM_VERIFY(info == 0, ""); // Q^T b failed
4122 }
4123
4124 if (verbosity_ > 2)
4125 {
4126 mfem::out << "Updated rhs " << iiter << endl;
4127 }
4128
4129 // Apply R^{-1}; first n_glob entries of vec1 are overwritten
4130 char upper = 'U';
4131 char nounit = 'N';
4132 vec1 = qt_rhs_glob;
4133#ifdef MFEM_USE_SINGLE
4134 strsm_(&lside, &upper, &notrans, &nounit,
4135#elif defined MFEM_USE_DOUBLE
4136 dtrsm_(&lside, &upper, &notrans, &nounit,
4137#endif
4138 &n_glob, &ione, &fone,
4139 mat_qr_data.GetData(), &m,
4140 vec1.GetData(), &n_glob);
4141
4142 if (verbosity_ > 2)
4143 {
4144 mfem::out << "Solved triangular system " << iiter << endl;
4145 }
4146
4147 // Check if all entries are positive
4148 int pos_ibool = 0;
4149 real_t smin = n_glob > 0 ? vec1(0) : 0.0;
4150 for (int i=0; i<n_glob; ++i)
4151 {
4152 soln_nz_glob_up(i) = vec1(i);
4153 smin = std::min(smin, soln_nz_glob_up(i));
4154 }
4155
4156 if (smin > zero_tol_)
4157 {
4158 pos_ibool = 1;
4159 for (int i=0; i<n_glob; ++i)
4160 {
4161 soln_nz_glob(i) = soln_nz_glob_up(i);
4162 }
4163 }
4164
4165 if (pos_ibool == 1)
4166 {
4167 break;
4168 }
4169
4170 if (verbosity_ > 2)
4171 {
4172 mfem::out << "Start pruning " << iiter << endl;
4173 for (int i = 0; i < n_glob; ++i)
4174 {
4175 if (soln_nz_glob_up(i) <= zero_tol_)
4176 {
4177 mfem::out << i << " " << n_glob << " " << soln_nz_glob_up(i) << endl;
4178 }
4179 }
4180 }
4181
4182 if (soln_nz_glob_up(n_glob - 1) <= zero_tol_)
4183 {
4184 stalledFlag = 1;
4185 if (verbosity_ > 2)
4186 {
4187 if (qr_residual_mode_ == QRresidualMode::hybrid)
4188 {
4189 mfem::out << "Detected stall due to adding and removing same "
4190 << "column. Switching to QR residual calculation "
4191 << "method." << endl;
4192 }
4193 else
4194 {
4195 mfem::out << "Detected stall due to adding and removing same"
4196 << " column. Exiting now." << endl;
4197 }
4198 }
4199 }
4200
4201 if (stalledFlag == 1 && qr_residual_mode_ == QRresidualMode::hybrid)
4202 {
4203 NNLS_qrres_on_ = true;
4204 break;
4205 }
4206
4207 real_t alpha = numeric_limits<real_t>::max();
4208
4209 // Find maximum permissible step
4210 for (int i = 0; i < n_glob; ++i)
4211 {
4212 if (soln_nz_glob_up(i) <= zero_tol_)
4213 {
4214 alpha = std::min(alpha, soln_nz_glob(i)/(soln_nz_glob(i) - soln_nz_glob_up(i)));
4215 }
4216 }
4217 // Update solution
4218 smin = 0.0;
4219 for (int i = 0; i < n_glob; ++i)
4220 {
4221 soln_nz_glob(i) += alpha*(soln_nz_glob_up(i) - soln_nz_glob(i));
4222 if (i == 0 || soln_nz_glob(i) < smin)
4223 {
4224 smin = soln_nz_glob(i);
4225 }
4226 }
4227
4228 while (smin > zero_tol_)
4229 {
4230 // This means there was a rounding error, as we should have
4231 // a zero element by definition. Recalculate alpha based on
4232 // the index that corresponds to the element that should be
4233 // zero.
4234
4235 int index_min = 0;
4236 smin = soln_nz_glob(0);
4237 for (int i = 1; i < n_glob; ++i)
4238 {
4239 if (soln_nz_glob(i) < smin)
4240 {
4241 smin = soln_nz_glob(i);
4242 index_min = i;
4243 }
4244 }
4245
4246 alpha = soln_nz_glob(index_min)/(soln_nz_glob(index_min)
4247 - soln_nz_glob_up(index_min));
4248
4249 // Reupdate solution
4250 for (int i = 0; i < n_glob; ++i)
4251 {
4252 soln_nz_glob(i) += alpha*(soln_nz_glob_up(i) - soln_nz_glob(i));
4253 }
4254 }
4255
4256 // Clean up zeroed entry
4257 i_qr_start = n_glob+1;
4258 while (true)
4259 {
4260 // Check if there is a zero entry
4261 int zero_ibool;
4262
4263 smin = n_glob > 0 ? soln_nz_glob(0) : 0.0;
4264 for (int i=1; i<n_glob; ++i)
4265 {
4266 smin = std::min(smin, soln_nz_glob(i));
4267 }
4268
4269 if (smin < zero_tol_)
4270 {
4271 zero_ibool = 1;
4272 }
4273 else
4274 {
4275 zero_ibool = 0;
4276 }
4277
4278 if (zero_ibool == 0) // Break if there is no more zero entry
4279 {
4280 break;
4281 }
4282
4283 int ind_zero = -1; // Index where the first zero is encountered
4284 nz_ind_zero = 0;
4285
4286 // Identify global index of the zeroed element
4287 for (int i = 0; i < n_glob; ++i)
4288 {
4289 if (soln_nz_glob(i) < zero_tol_)
4290 {
4291 ind_zero = i;
4292 break;
4293 }
4294 }
4295 MFEM_VERIFY(ind_zero != -1, "");
4296 // Identify the local index for nz_ind to which the zeroed entry
4297 // belongs
4298 for (int i = 0; i < ind_zero; ++i)
4299 {
4300 ++nz_ind_zero;
4301 }
4302
4303 {
4304 // Copy mat_0.cols[ind_zero+1,n_glob) to mat_qr.cols[ind_zero,n_glob-1)
4305 for (int i=0; i<m; ++i)
4306 for (int j=ind_zero; j<n_glob-1; ++j)
4307 {
4308 mat_qr_data(i + (j*m)) = mat_0_data(i + ((j+1)*m));
4309 }
4310
4311 // Copy mat_qr.cols[ind_zero,n_glob-1) to
4312 // mat_0.cols[ind_zero,n_glob-1)
4313 for (int i=0; i<m; ++i)
4314 for (int j=ind_zero; j<n_glob-1; ++j)
4315 {
4316 mat_0_data(i + (j*m)) = mat_qr_data(i + (j*m));
4317 }
4318 }
4319
4320 // Remove the zeroed entry from the local matrix index
4321 for (int i = nz_ind_zero; i < n_nz_ind-1; ++i)
4322 {
4323 nz_ind[i] = nz_ind[i+1];
4324 }
4325 --n_nz_ind;
4326
4327 // Shift soln_nz_glob and proc_index
4328 for (int i = ind_zero; i < n_glob-1; ++i)
4329 {
4330 soln_nz_glob(i) = soln_nz_glob(i+1);
4331 }
4332
4333 i_qr_start = std::min(i_qr_start, ind_zero);
4334 --n_glob;
4335 } // End of pruning loop
4336
4337 if (verbosity_ > 2)
4338 {
4339 mfem::out << "Finished pruning " << iiter << endl;
4340 }
4341 } // End of inner loop
4342
4343 // Check if we have stalled
4344 if (stalledFlag == 1)
4345 {
4346 --n_glob;
4347 --n_nz_ind;
4348 num_stalled = stalled_indices.size();
4349 stalled_indices.resize(num_stalled + 1);
4350 stalled_indices[num_stalled] = imax;
4351 if (verbosity_ > 2)
4352 {
4353 mfem::out << "Adding index " << imax << " to stalled index list "
4354 << "of size " << num_stalled << endl;
4355 }
4356 }
4357
4358 // Compute residual
4359 if (!NNLS_qrres_on_)
4360 {
4361 res_glob = rhs_avg_glob;
4362 real_t fmone = -1.0;
4363#ifdef MFEM_USE_SINGLE
4364 sgemv_(&notrans, &m, &n_glob, &fmone,
4365#elif defined MFEM_USE_DOUBLE
4366 dgemv_(&notrans, &m, &n_glob, &fmone,
4367#endif
4368 mat_0_data.GetData(), &m,
4369 soln_nz_glob.GetData(), &ione, &fone,
4370 res_glob.GetData(), &ione);
4371 }
4372 else
4373 {
4374 // Compute residual using res = b - Q*Q^T*b, where Q is from an
4375 // economical QR decomposition
4376 lwork = -1;
4377 work.resize(10);
4378 qqt_rhs_glob = 0.0;
4379 for (int i=0; i<n_glob; ++i)
4380 {
4381 qqt_rhs_glob(i) = qt_rhs_glob(i);
4382 }
4383
4384#ifdef MFEM_USE_SINGLE
4385 sormqr_(&lside, &notrans, &m, &ione, &n_glob, mat_qr_data.GetData(), &m,
4386#elif defined MFEM_USE_DOUBLE
4387 dormqr_(&lside, &notrans, &m, &ione, &n_glob, mat_qr_data.GetData(), &m,
4388#endif
4389 tau.GetData(), qqt_rhs_glob.GetData(), &m,
4390 work.data(), &lwork, &info);
4391
4392 MFEM_VERIFY(info == 0, ""); // Q Q^T b work calculation failed.
4393 lwork = static_cast<int>(work[0]);
4394 work.resize(lwork);
4395#ifdef MFEM_USE_SINGLE
4396 sormqr_(&lside, &notrans, &m, &ione, &n_glob, mat_qr_data.GetData(), &m,
4397#elif defined MFEM_USE_DOUBLE
4398 dormqr_(&lside, &notrans, &m, &ione, &n_glob, mat_qr_data.GetData(), &m,
4399#endif
4400 tau.GetData(), qqt_rhs_glob.GetData(), &m,
4401 work.data(), &lwork, &info);
4402 MFEM_VERIFY(info == 0, ""); // Q Q^T b calculation failed.
4403 res_glob = rhs_avg_glob;
4404 res_glob -= qqt_rhs_glob;
4405 }
4406
4407 if (verbosity_ > 2)
4408 {
4409 mfem::out << "Computed residual" << endl;
4410 }
4411
4412 ++n_outer_iter;
4413 } // End of outer loop
4414
4415 // Insert the solutions
4416 MFEM_VERIFY(n_glob == n_nz_ind, "");
4417 soln = 0.0;
4418 for (int i = 0; i < n_glob; ++i)
4419 {
4420 soln(nz_ind[i]) = soln_nz_glob(i);
4421 }
4422
4423 if (verbosity_ > 0)
4424 {
4425 mfem::out << "NNLS solver: m = " << m << ", n = " << n
4426 << ", outer_iter = " << n_outer_iter << ", inner_iter = "
4427 << n_total_inner_iter;
4428
4429 if (exit_flag == 0)
4430 {
4431 mfem::out << ": converged" << endl;
4432 }
4433 else
4434 {
4435 mfem::out << endl << "Warning, NNLS convergence stalled: "
4436 << (exit_flag == 2) << endl;
4437 mfem::out << "resErr = " << rmax << " vs tol = " << const_tol_
4438 << "; mumax = " << mumax << " vs tol = " << mu_tol << endl;
4439 }
4440 }
4441}
4442#endif // MFEM_USE_LAPACK
4443
4444}
const T * HostRead() const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), false).
Definition array.hpp:321
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:697
int Size() const
Return the logical size of the array.
Definition array.hpp:144
void MakeRef(T *data_, int size_, bool own_data=false)
Make this Array a reference to a pointer.
Definition array.hpp:882
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:317
AuxSpaceSmoother(const HypreParMatrix &op, HypreParMatrix *aux_map, bool op_is_symmetric=true, bool own_aux_map=false)
Definition solvers.cpp:3513
BiCGSTAB method.
Definition solvers.hpp:596
virtual void Mult(const Vector &b, Vector &x) const
Iterative solution of the linear system using the BiCGSTAB method.
Definition solvers.cpp:1380
virtual void SetOperator(const Operator &op)
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.hpp:609
A "square matrix" operator for the associated FE space and BLFIntegrators The sum of all the BLFInteg...
FiniteElementSpace * FESpace()
Return the FE space associated with the BilinearForm.
Reordering
The reordering method used by the BlockILU factorization.
Definition solvers.hpp:994
BlockILU(int block_size_, Reordering reordering_=Reordering::MINIMUM_DISCARDED_FILL, int k_fill_=0)
Definition solvers.cpp:2749
void Mult(const Vector &b, Vector &x) const
Solve the system LUx = b, where L and U are the block ILU factors.
Definition solvers.cpp:2998
void SetOperator(const Operator &op)
Definition solvers.cpp:2767
Conjugate gradient method.
Definition solvers.hpp:513
virtual void SetOperator(const Operator &op)
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.hpp:526
virtual void Mult(const Vector &b, Vector &x) const
Iterative solution of the linear system using the Conjugate Gradient method.
Definition solvers.cpp:718
void UpdateVectors()
Definition solvers.cpp:709
Data type dense matrix using column-major storage.
Definition densemat.hpp:24
void AddMult_a(real_t a, const Vector &x, Vector &y) const
y += a * A.x
Definition densemat.cpp:346
void Mult(const real_t *x, real_t *y) const
Matrix vector multiplication.
Definition densemat.cpp:220
void MultTranspose(const real_t *x, real_t *y) const
Multiply a vector with the transpose matrix.
Definition densemat.cpp:262
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Definition densemat.hpp:105
void Add(const real_t c, const DenseMatrix &A)
Adds the matrix A multiplied by the number c to the matrix.
Definition densemat.cpp:628
void SetSize(int i, int j, int k, MemoryType mt_=MemoryType::PRESERVE)
real_t * GetData(int k)
DirectSubBlockSolver(const SparseMatrix &A, const SparseMatrix &block_dof)
Definition solvers.cpp:3360
virtual void Mult(const Vector &x, Vector &y) const
Direct solution of the block diagonal linear system.
Definition solvers.cpp:3375
virtual void Mult(const Vector &b, Vector &x) const
Iterative solution of the linear system using the FGMRES method.
Definition solvers.cpp:1168
virtual int GetTrueVSize() const
Return the number of vector true (conforming) dofs.
Definition fespace.hpp:716
GMRES method.
Definition solvers.hpp:547
void SetKDim(int dim)
Set the number of iteration to perform between restarts, default is 50.
Definition solvers.hpp:559
virtual void Mult(const Vector &b, Vector &x) const
Iterative solution of the linear system using the GMRES method.
Definition solvers.cpp:980
Wrapper for hypre's ParCSR matrix class.
Definition hypre.hpp:388
void GetDiag(Vector &diag) const
Get the local diagonal of the matrix.
Definition hypre.cpp:1557
Parallel smoothers in hypre.
Definition hypre.hpp:1020
const class IterativeSolver * iter_solver
The last IterativeSolver to which this monitor was attached.
Definition solvers.hpp:40
virtual void MonitorSolution(int it, real_t norm, const Vector &x, bool final)
Monitor the solution vector x.
Definition solvers.hpp:54
virtual void MonitorResidual(int it, real_t norm, const Vector &r, bool final)
Monitor the residual vector r.
Definition solvers.hpp:48
Abstract base class for iterative solver.
Definition solvers.hpp:67
real_t abs_tol
Absolute tolerance.
Definition solvers.hpp:158
void Monitor(int it, real_t norm, const Vector &r, const Vector &x, bool final=false) const
Monitor both the residual r and the solution x.
Definition solvers.cpp:190
real_t rel_tol
Relative tolerance.
Definition solvers.hpp:155
PrintLevel print_options
Output behavior for the iterative solver.
Definition solvers.hpp:138
real_t GetFinalNorm() const
Returns the final residual norm after termination of the solver during the last call to Mult().
Definition solvers.hpp:275
virtual void SetOperator(const Operator &op) override
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.cpp:179
virtual real_t Dot(const Vector &x, const Vector &y) const
Return the standard (l2, i.e., Euclidean) inner product of x and y.
Definition solvers.cpp:55
const Operator * oper
Definition solvers.hpp:121
void SetRelTol(real_t rtol)
Definition solvers.hpp:209
virtual void SetPreconditioner(Solver &pr)
This should be called before SetOperator.
Definition solvers.cpp:173
int print_level
(DEPRECATED) Legacy print level definition, which is left for compatibility with custom iterative sol...
Definition solvers.hpp:131
int GetNumIterations() const
Returns the number of iterations taken during the last call to Mult()
Definition solvers.hpp:260
virtual void SetPrintLevel(int print_lvl)
Legacy method to set the level of verbosity of the solver output.
Definition solvers.cpp:71
int max_iter
Limit for the number of iterations the solver is allowed to do.
Definition solvers.hpp:152
void SetMaxIter(int max_it)
Definition solvers.hpp:211
MPI_Comm GetComm() const
Return the associated MPI communicator, or MPI_COMM_NULL if no communicator is set.
Definition solvers.hpp:302
static int GuessLegacyPrintLevel(PrintLevel)
Use some heuristics to guess a legacy print level corresponding to the given PrintLevel.
Definition solvers.cpp:149
IterativeSolverMonitor * monitor
Definition solvers.hpp:123
bool GetConverged() const
Returns true if the last call to Mult() converged successfully.
Definition solvers.hpp:262
PrintLevel FromLegacyPrintLevel(int)
Convert a legacy print level integer to a PrintLevel object.
Definition solvers.cpp:114
void SetAbsTol(real_t atol)
Definition solvers.hpp:210
real_t Norm(const Vector &x) const
Return the inner product norm of x, using the inner product defined by Dot()
Definition solvers.hpp:180
virtual void MultTranspose(const Vector &b, Vector &x) const
Direct solution of the transposed linear system using KLU.
Definition solvers.cpp:3336
virtual ~KLUSolver()
Definition solvers.cpp:3350
klu_symbolic * Symbolic
Definition solvers.hpp:1143
virtual void Mult(const Vector &b, Vector &x) const
Direct solution of the linear system using KLU.
Definition solvers.cpp:3322
SparseMatrix * mat
Definition solvers.hpp:1142
virtual void SetOperator(const Operator &op)
Set/update the solver for the given operator.
Definition solvers.cpp:3291
klu_common Common
Definition solvers.hpp:1167
klu_numeric * Numeric
Definition solvers.hpp:1144
Array< Vector * > skArray
Definition solvers.hpp:755
Array< Vector * > ykArray
Definition solvers.hpp:755
virtual void Mult(const Vector &b, Vector &x) const
Solve the nonlinear system with right-hand side b.
Definition solvers.cpp:2009
virtual void Solve(int m, int n, real_t *X) const
MINRES method.
Definition solvers.hpp:628
virtual void Mult(const Vector &b, Vector &x) const
Iterative solution of the linear system using the MINRES method.
Definition solvers.cpp:1616
virtual void SetOperator(const Operator &op)
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.cpp:1595
virtual void SetPreconditioner(Solver &pr)
This should be called before SetOperator.
Definition solvers.hpp:640
void Solve(const Vector &rhs_lb, const Vector &rhs_ub, Vector &soln) const
Solve the NNLS problem. Specifically, we find a vector soln, such that rhs_lb < mat*soln < rhs_ub is ...
Definition solvers.cpp:3689
QRresidualMode
Enumerated types of QRresidual mode. Options are 'off': the residual is calculated normally,...
Definition solvers.hpp:1353
void Mult(const Vector &w, Vector &sol) const override
Compute the non-negative least squares solution to the underdetermined system.
Definition solvers.cpp:3641
void SetQRResidualMode(const QRresidualMode qr_residual_mode)
Set the residual calculation mode for the NNLS solver. See QRresidualMode enum above for details.
Definition solvers.cpp:3600
void SetOperator(const Operator &op) override
The operator must be a DenseMatrix.
Definition solvers.cpp:3587
void NormalizeConstraints(Vector &rhs_lb, Vector &rhs_ub) const
Normalize the constraints such that the tolerances for each constraint (i.e. (UB - LB)/2) are equal....
Definition solvers.cpp:3609
void SetAdaptiveLinRtol(const int type=2, const real_t rtol0=0.5, const real_t rtol_max=0.9, const real_t alpha=0.5 *(1.0+sqrt(5.0)), const real_t gamma=1.0)
Enable adaptive linear solver relative tolerance algorithm.
Definition solvers.cpp:1929
void AdaptiveLinRtolPreSolve(const Vector &x, const int it, const real_t fnorm) const
Method for the adaptive linear solver rtol invoked before the linear solve.
Definition solvers.cpp:1942
Operator * grad
Definition solvers.hpp:670
virtual void SetOperator(const Operator &op)
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.cpp:1810
virtual real_t ComputeScalingFactor(const Vector &x, const Vector &b) const
This method can be overloaded in derived classes to implement line search algorithms.
Definition solvers.hpp:724
void AdaptiveLinRtolPostSolve(const Vector &x, const Vector &b, const int it, const real_t fnorm) const
Method for the adaptive linear solver rtol invoked after the linear solve.
Definition solvers.cpp:1989
virtual void ProcessNewState(const Vector &x) const
This method can be overloaded in derived classes to perform computations that need knowledge of the n...
Definition solvers.hpp:729
virtual void Mult(const Vector &b, Vector &x) const
Solve the nonlinear system with right-hand side b.
Definition solvers.cpp:1821
Chebyshev accelerated smoothing with given vector, no matrix necessary.
Definition solvers.hpp:394
void Mult(const Vector &x, Vector &y) const
Approach the solution of the linear system by applying Chebyshev smoothing.
Definition solvers.cpp:485
OperatorChebyshevSmoother(const Operator &oper_, const Vector &d, const Array< int > &ess_tdof_list, int order, real_t max_eig_estimate)
Definition solvers.cpp:331
OpType * As() const
Return the Operator pointer statically cast to a specified OpType. Similar to the method Get().
Definition handle.hpp:104
void Reset(OpType *A, bool own_A=true)
Reset the OperatorHandle to the given OpType pointer, A.
Definition handle.hpp:145
Jacobi smoothing for a given bilinear form (no matrix necessary).
Definition solvers.hpp:313
void Setup(const Vector &diag)
Definition solvers.cpp:277
OperatorJacobiSmoother(const real_t damping=1.0)
Default constructor: the diagonal will be computed by subsequent calls to SetOperator() using the Ope...
Definition solvers.cpp:200
void SetOperator(const Operator &op)
Recompute the diagonal using the method AssembleDiagonal of the given new Operator,...
Definition solvers.cpp:241
void Mult(const Vector &x, Vector &y) const
Approach the solution of the linear system by applying Jacobi smoothing.
Definition solvers.cpp:303
Abstract operator.
Definition operator.hpp:25
virtual MemoryClass GetMemoryClass() const
Return the MemoryClass preferred by the Operator.
Definition operator.hpp:86
int width
Dimension of the input / number of columns in the matrix.
Definition operator.hpp:28
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition operator.hpp:66
int height
Dimension of the output / number of rows in the matrix.
Definition operator.hpp:27
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
int NumCols() const
Get the number of columns (size of input) of the Operator. Synonym with Width().
Definition operator.hpp:75
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
Definition operator.hpp:72
virtual void AssembleDiagonal(Vector &diag) const
Computes the diagonal entries into diag. Typically, this operation only makes sense for linear Operat...
Definition operator.hpp:131
int NumRows() const
Get the number of rows (size of output) of the Operator. Synonym with Height().
Definition operator.hpp:69
virtual void MultTranspose(const Vector &x, Vector &y) const
Action of the transpose operator: y=A^t(x). The default behavior in class Operator is to generate an ...
Definition operator.hpp:93
virtual Operator & GetGradient(const Vector &x) const
Evaluate the gradient operator at the point x. The default behavior in class Operator is to generate ...
Definition operator.hpp:122
const Operator * C
Not owned, some can remain unused (NULL).
Definition solvers.hpp:853
void SetSolutionBounds(const Vector &xl, const Vector &xh)
Definition solvers.cpp:2334
void SetEqualityConstraint(const Vector &c)
Definition solvers.cpp:2316
int GetNumConstraints() const
Definition solvers.cpp:2342
const Operator * D
Definition solvers.hpp:853
OptimizationProblem(int insize, const Operator *C_, const Operator *D_)
In parallel, insize is the number of the local true dofs.
Definition solvers.cpp:2306
void SetInequalityConstraint(const Vector &dl, const Vector &dh)
Definition solvers.cpp:2324
const OptimizationProblem * problem
Definition solvers.hpp:893
void Mult(const Vector &b, Vector &x) const
Perform the action of the OrthoSolver: P * solver * P where P is the projection to the subspace of ve...
Definition solvers.cpp:3455
void SetSolver(Solver &s)
Set the solver used by the OrthoSolver.
Definition solvers.cpp:3436
virtual void SetOperator(const Operator &op)
Set the Operator that is the OrthoSolver is to invert (approximately).
Definition solvers.cpp:3445
PowerMethod helper class to estimate the largest eigenvalue of an operator using the iterative power ...
real_t EstimateLargestEigenvalue(Operator &opr, Vector &v0, int numSteps=10, real_t tolerance=1e-8, int seed=12345)
Returns an estimate of the largest eigenvalue of the operator opr using the iterative power method.
Definition operator.cpp:781
General product operator: x -> (A*B)(x) = A(B(x)).
Definition operator.hpp:797
virtual void MultTranspose(const Vector &x, Vector &y) const
Solution of the transposed linear system using a product of subsolvers.
Definition solvers.cpp:3407
virtual void Mult(const Vector &x, Vector &y) const
Solution of the linear system using a product of subsolvers.
Definition solvers.cpp:3390
void MonitorResidual(int it, real_t norm, const Vector &r, bool final) override
Monitor the residual vector r.
Definition solvers.cpp:3049
const Array< int > * ess_dofs_list
Not owned.
Definition solvers.hpp:1081
virtual void Mult(const Vector &xt, Vector &x) const
Definition solvers.cpp:2384
virtual void SetOptimizationProblem(const OptimizationProblem &prob)
Definition solvers.cpp:2350
void SetBounds(const Vector &lo_, const Vector &hi_)
Definition solvers.cpp:2363
void SetLinearConstraint(const Vector &w_, real_t a_)
Definition solvers.cpp:2369
void print_iteration(int it, real_t r, real_t l) const
Definition solvers.cpp:2375
real_t solve(real_t l, const Vector &xt, Vector &x, int &nclip) const
Solve QP at fixed lambda.
Definition solvers.hpp:929
Stationary linear iteration: x <- x + B (b - A x)
Definition solvers.hpp:480
virtual void Mult(const Vector &b, Vector &x) const
Iterative solution of the linear system using Stationary Linear Iteration.
Definition solvers.cpp:531
virtual void SetOperator(const Operator &op)
Also calls SetOperator for the preconditioner if there is one.
Definition solvers.hpp:493
void UpdateVectors()
Definition solvers.cpp:525
Base class for solvers.
Definition operator.hpp:683
bool iterative_mode
If true, use the second argument of Mult() as an initial guess.
Definition operator.hpp:686
virtual void SetOperator(const Operator &op)=0
Set/update the solver for the given operator.
Data type sparse matrix.
Definition sparsemat.hpp:51
const int * HostReadJ() const
void GetDiag(Vector &d) const
Returns the Diagonal of A.
virtual void MultTranspose(const Vector &x, Vector &y) const
Multiply a vector with the transposed matrix. y = At * x.
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
bool Finalized() const
Returns whether or not CSR format has been finalized.
const real_t * HostReadData() const
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
int RowSize(const int i) const
Returns the number of elements in row i.
virtual void Mult(const Vector &x, Vector &y) const
Matrix vector multiplication.
real_t * GetData()
Return the element data, i.e. the array A.
void SortColumnIndices()
Sort the column indices corresponding to each row.
int * GetJ()
Return the array J.
int * GetI()
Return the array I.
const int * HostReadI() const
SuiteSparse_long * AJ
Definition solvers.hpp:1101
virtual void SetOperator(const Operator &op)
Factorize the given Operator op which must be a SparseMatrix.
Definition solvers.cpp:3102
real_t Control[UMFPACK_CONTROL]
Definition solvers.hpp:1106
virtual void Mult(const Vector &b, Vector &x) const
Direct solution of the linear system using UMFPACK.
Definition solvers.cpp:3197
SparseMatrix * mat
Definition solvers.hpp:1099
virtual void MultTranspose(const Vector &b, Vector &x) const
Direct solution of the transposed linear system using UMFPACK.
Definition solvers.cpp:3232
SuiteSparse_long * AI
Definition solvers.hpp:1101
real_t Info[UMFPACK_INFO]
Definition solvers.hpp:1107
virtual ~UMFPackSolver()
Definition solvers.cpp:3269
Vector data type.
Definition vector.hpp:80
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:478
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:474
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:490
void SetDataAndSize(real_t *d, int s)
Set the Vector data and size.
Definition vector.hpp:175
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Definition vector.cpp:670
real_t Norml2() const
Returns the l2 norm of the vector.
Definition vector.cpp:832
Vector & Set(const real_t a, const Vector &x)
(*this) = a * x
Definition vector.cpp:262
real_t Max() const
Returns the maximal element of the vector.
Definition vector.cpp:922
int Size() const
Returns the size of the vector.
Definition vector.hpp:218
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition vector.hpp:136
real_t Sum() const
Return the sum of the vector entries.
Definition vector.cpp:1283
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:538
virtual real_t * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:486
real_t * GetData() const
Return a pointer to the beginning of the Vector data.
Definition vector.hpp:227
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:494
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
Definition vector.cpp:578
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:482
Vector & Add(const real_t a, const Vector &Va)
(*this) += a * Va
Definition vector.cpp:247
Vector beta
const real_t alpha
Definition ex15.cpp:369
real_t omega
Definition ex25.cpp:142
real_t mu
Definition ex25.cpp:140
prob_type prob
Definition ex25.cpp:156
void trans(const Vector &u, Vector &x)
Definition ex27.cpp:412
real_t b
Definition lissajous.cpp:42
real_t delta
Definition lissajous.cpp:43
real_t a
Definition lissajous.cpp:41
int BiCGSTAB(const Operator &A, Vector &x, const Vector &b, Solver &M, int &max_iter, real_t &tol, real_t atol, int printit)
BiCGSTAB method. (tolerances are squared)
Definition solvers.cpp:1572
void Swap(Array< T > &, Array< T > &)
Definition array.hpp:648
void dtrsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *a, int *lda, double *b, int *ldb)
void mfem_error(const char *msg)
Definition error.cpp:154
void AddMult_a(real_t alpha, const DenseMatrix &b, const DenseMatrix &c, DenseMatrix &a)
Matrix matrix multiplication. A += alpha * B * C.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:316
bool IsFinite(const real_t &val)
Definition vector.hpp:507
real_t InnerProduct(HypreParVector *x, HypreParVector *y)
Definition hypre.cpp:439
int GMRES(const Operator &A, Vector &x, const Vector &b, Solver &M, int &max_iter, int m, real_t &tol, real_t atol, int printit)
GMRES method. (tolerances are squared)
Definition solvers.cpp:1342
void sgemv_(char *, int *, int *, float *, float *, int *, float *, int *, float *, float *, int *)
void ApplyPlaneRotation(real_t &dx, real_t &dy, real_t &cs, real_t &sn)
Definition solvers.cpp:952
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
int aGMRES(const Operator &A, Vector &x, const Vector &b, const Operator &M, int &max_iter, int m_max, int m_min, int m_step, real_t cf, real_t &tol, real_t &atol, int printit)
Definition solvers.cpp:2151
void SLI(const Operator &A, const Vector &b, Vector &x, int print_iter, int max_num_iter, real_t RTOLERANCE, real_t ATOLERANCE)
Stationary linear iteration. (tolerances are squared)
Definition solvers.cpp:677
void PCG(const Operator &A, Solver &B, const Vector &b, Vector &x, int print_iter, int max_num_iter, real_t RTOLERANCE, real_t ATOLERANCE)
Preconditioned conjugate gradient method. (tolerances are squared)
Definition solvers.cpp:913
void strsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb)
void CG(const Operator &A, const Vector &b, Vector &x, int print_iter, int max_num_iter, real_t RTOLERANCE, real_t ATOLERANCE)
Conjugate gradient method. (tolerances are squared)
Definition solvers.cpp:898
void dgemv_(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *)
void GeneratePlaneRotation(real_t &dx, real_t &dy, real_t &cs, real_t &sn)
Definition solvers.cpp:930
void MINRES(const Operator &A, const Vector &b, Vector &x, int print_it, int max_it, real_t rtol, real_t atol)
MINRES method without preconditioner. (tolerances are squared)
Definition solvers.cpp:1782
void sgeqrf_(int *, int *, float *, int *, float *, float *, int *, int *)
void dormqr_(char *, char *, int *, int *, int *, double *, int *, double *, double *, int *, double *, int *, int *)
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition vector.cpp:472
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
void MinimumDiscardedFillOrdering(SparseMatrix &C, Array< int > &p)
Definition solvers.cpp:2642
void dgeqrf_(int *, int *, double *, int *, double *, double *, int *, int *)
void sormqr_(char *, char *, int *, int *, int *, float *, int *, float *, float *, int *, float *, int *, int *)
void forall(int N, lambda &&body)
Definition forall.hpp:754
void Update(Vector &x, int k, DenseMatrix &h, Vector &s, Array< Vector * > &v)
Definition solvers.cpp:959
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
real_t p(const Vector &x, real_t t)
RefCoord s[3]
Settings for the output behavior of the IterativeSolver.
Definition solvers.hpp:79
bool errors
If a fatal problem has been detected the failure will be reported to mfem::err.
Definition solvers.hpp:82
bool iterations
Detailed information about each iteration will be reported to mfem::out.
Definition solvers.hpp:88
bool warnings
If a non-fatal problem has been detected some context-specific information will be reported to mfem::...
Definition solvers.hpp:85
bool first_and_last
Information about the first and last iteration will be printed to mfem::out.
Definition solvers.hpp:94
bool summary
A summary of the solver process will be reported after the last iteration to mfem::out.
Definition solvers.hpp:91
Helper struct to convert a C++ type to an MPI type.