MFEM  v4.6.0
Finite element discretization library
mumps.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../config/config.hpp"
13 
14 #ifdef MFEM_USE_MUMPS
15 #ifdef MFEM_USE_MPI
16 
17 #include "mumps.hpp"
18 
19 #include <algorithm>
20 
21 #if MFEM_MUMPS_VERSION >= 530
22 #ifdef MUMPS_INTSIZE64
23 #error "Full 64-bit MUMPS is not yet supported"
24 #endif
25 #else
26 #ifdef INTSIZE64
27 #error "Full 64-bit MUMPS is not yet supported"
28 #endif
29 #endif
30 
31 // Macro s.t. indices match MUMPS documentation
32 #define MUMPS_ICNTL(I) icntl[(I) -1]
33 #define MUMPS_CNTL(I) cntl[(I) -1]
34 #define MUMPS_INFO(I) info[(I) -1]
35 #define MUMPS_INFOG(I) infog[(I) -1]
36 
37 namespace mfem
38 {
39 
40 MUMPSSolver::MUMPSSolver(MPI_Comm comm_)
41 {
42  Init(comm_);
43 }
44 
46 {
47  auto APtr = dynamic_cast<const HypreParMatrix *>(&op);
48  MFEM_VERIFY(APtr, "Not a compatible matrix type");
49  Init(APtr->GetComm());
50  SetOperator(op);
51 }
52 
53 void MUMPSSolver::Init(MPI_Comm comm_)
54 {
55  id = nullptr;
56  comm = comm_;
57  MPI_Comm_size(comm, &numProcs);
58  MPI_Comm_rank(comm, &myid);
59 
60  mat_type = MatType::UNSYMMETRIC;
61  print_level = 0;
62  reorder_method = ReorderingStrategy::AUTOMATIC;
63  reorder_reuse = false;
64  blr_tol = 0.0;
65 
66 #if MFEM_MUMPS_VERSION >= 530
67  irhs_loc = nullptr;
68  rhs_loc = nullptr;
69  isol_loc = nullptr;
70  sol_loc = nullptr;
71 #else
72  recv_counts = nullptr;
73  displs = nullptr;
74  rhs_glob = nullptr;
75 #endif
76 }
77 
79 {
80 #if MFEM_MUMPS_VERSION >= 530
81  delete [] irhs_loc;
82  delete [] rhs_loc;
83  delete [] isol_loc;
84  delete [] sol_loc;
85 #else
86  delete [] recv_counts;
87  delete [] displs;
88  delete [] rhs_glob;
89 #endif
90  if (id)
91  {
92  id->job = -2;
93  dmumps_c(id);
94  delete id;
95  }
96 }
97 
99 {
100  auto APtr = dynamic_cast<const HypreParMatrix *>(&op);
101  MFEM_VERIFY(APtr, "Not a compatible matrix type");
102 
103  height = op.Height();
104  width = op.Width();
105 
106  auto parcsr_op = (hypre_ParCSRMatrix *)const_cast<HypreParMatrix &>(*APtr);
107  APtr->HostRead();
108  hypre_CSRMatrix *csr_op = hypre_MergeDiagAndOffd(parcsr_op);
109  APtr->HypreRead();
110  HYPRE_Int *Iptr = csr_op->i;
111 #if MFEM_HYPRE_VERSION >= 21600
112  HYPRE_BigInt *Jptr = csr_op->big_j;
113 #else
114  HYPRE_Int *Jptr = csr_op->j;
115 #endif
116 
117  int n_loc = internal::to_int(csr_op->num_rows);
118  row_start = internal::to_int(parcsr_op->first_row_index);
119 
120  MUMPS_INT8 nnz = 0, k = 0;
121  if (mat_type)
122  {
123  // Count nnz in case of symmetric mode
124  for (int i = 0; i < n_loc; i++)
125  {
126  for (HYPRE_Int j = Iptr[i]; j < Iptr[i + 1]; j++)
127  {
128  int ii = row_start + i + 1;
129 #if MFEM_HYPRE_VERSION >= 21600
130  HYPRE_BigInt jj = Jptr[k] + 1;
131 #else
132  HYPRE_Int jj = Jptr[k] + 1;
133 #endif
134  if (ii >= jj) { nnz++; }
135  k++;
136  }
137  }
138  }
139  else
140  {
141  nnz = csr_op->num_nonzeros;
142  }
143  int *I = new int[nnz];
144  int *J = new int[nnz];
145 
146  // Fill in I and J arrays for
147  // COO format in 1-based indexing
148  k = 0;
149  double *data;
150  if (mat_type)
151  {
152  MUMPS_INT8 l = 0;
153  data = new double[nnz];
154  for (int i = 0; i < n_loc; i++)
155  {
156  for (HYPRE_Int j = Iptr[i]; j < Iptr[i + 1]; j++)
157  {
158  int ii = row_start + i + 1;
159 #if MFEM_HYPRE_VERSION >= 21600
160  HYPRE_BigInt jj = Jptr[k] + 1;
161 #else
162  HYPRE_Int jj = Jptr[k] + 1;
163 #endif
164  if (ii >= jj)
165  {
166  I[l] = ii;
167  J[l] = internal::to_int(jj);
168  data[l++] = csr_op->data[k];
169  }
170  k++;
171  }
172  }
173  }
174  else
175  {
176  for (int i = 0; i < n_loc; i++)
177  {
178  for (HYPRE_Int j = Iptr[i]; j < Iptr[i + 1]; j++)
179  {
180  I[k] = row_start + i + 1;
181  J[k] = internal::to_int(Jptr[k] + 1);
182  k++;
183  }
184  }
185  data = csr_op->data;
186  }
187 
188  // New MUMPS object or reuse the one from a previous matrix
189  if (!id || !reorder_reuse)
190  {
191  if (id)
192  {
193  id->job = -2;
194  dmumps_c(id);
195  delete id;
196  }
197  id = new DMUMPS_STRUC_C;
198  id->sym = mat_type;
199 
200  // C to Fortran communicator
201  id->comm_fortran = (MUMPS_INT)MPI_Comm_c2f(comm);
202 
203  // Host is involved in computation
204  id->par = 1;
205 
206  // MUMPS init
207  id->job = -1;
208  dmumps_c(id);
209 
210  // Set MUMPS default parameters
211  SetParameters();
212 
213  id->n = internal::to_int(parcsr_op->global_num_rows);
214  id->nnz_loc = nnz;
215  id->irn_loc = I;
216  id->jcn_loc = J;
217  id->a_loc = data;
218 
219  // MUMPS analysis
220  id->job = 1;
221  dmumps_c(id);
222  }
223  else
224  {
225  id->irn_loc = I;
226  id->jcn_loc = J;
227  id->a_loc = data;
228  }
229 
230  // MUMPS factorization
231  id->job = 2;
232  {
233  const int mem_relax_lim = 200;
234  while (true)
235  {
236  dmumps_c(id);
237  if (id->MUMPS_INFOG(1) < 0)
238  {
239  if (id->MUMPS_INFOG(1) == -8 || id->MUMPS_INFOG(1) == -9)
240  {
241  id->MUMPS_ICNTL(14) += 20;
242  MFEM_VERIFY(id->MUMPS_ICNTL(14) <= mem_relax_lim,
243  "Memory relaxation limit reached for MUMPS factorization");
244  if (myid == 0 && print_level > 0)
245  {
246  mfem::out << "Re-running MUMPS factorization with memory relaxation "
247  << id->MUMPS_ICNTL(14) << '\n';
248  }
249  }
250  else
251  {
252  MFEM_ABORT("Error during MUMPS numerical factorization");
253  }
254  }
255  else { break; }
256  }
257  }
258 
259  hypre_CSRMatrixDestroy(csr_op);
260  delete [] I;
261  delete [] J;
262  if (mat_type) { delete [] data; }
263 
264  id->nrhs = -1; // Set up solution storage on first call to Mult
265 #if MFEM_MUMPS_VERSION >= 530
266  delete [] irhs_loc;
267  delete [] isol_loc;
268  id->nloc_rhs = n_loc;
269  id->lrhs_loc = n_loc;
270  id->lsol_loc = id->MUMPS_INFO(23);
271  irhs_loc = new int[id->lrhs_loc];
272  isol_loc = new int[id->lsol_loc];
273  for (int i = 0; i < n_loc; i++)
274  {
275  irhs_loc[i] = row_start + i + 1;
276  }
277  id->irhs_loc = irhs_loc;
278  id->isol_loc = isol_loc;
279 
280  row_starts.SetSize(numProcs);
281  MPI_Allgather(&row_start, 1, MPI_INT, row_starts, 1, MPI_INT, comm);
282 #else
283  id->lrhs = id->n;
284  if (myid == 0)
285  {
286  delete [] recv_counts;
287  delete [] displs;
288  recv_counts = new int[numProcs];
289  displs = new int[numProcs];
290  }
291  MPI_Gather(&n_loc, 1, MPI_INT, recv_counts, 1, MPI_INT, 0, comm);
292  if (myid == 0)
293  {
294  displs[0] = 0;
295  int s = 0;
296  for (int k = 0; k < numProcs-1; k++)
297  {
298  s += recv_counts[k];
299  displs[k+1] = s;
300  }
301  }
302 #endif
303 }
304 
305 void MUMPSSolver::InitRhsSol(int nrhs) const
306 {
307  if (id->nrhs != nrhs)
308  {
309 #if MFEM_MUMPS_VERSION >= 530
310  delete [] rhs_loc;
311  delete [] sol_loc;
312  rhs_loc = (nrhs > 1) ? new double[nrhs * id->lrhs_loc] : nullptr;
313  sol_loc = new double[nrhs * id->lsol_loc];
314  id->rhs_loc = rhs_loc;
315  id->sol_loc = sol_loc;
316 #else
317  if (myid == 0)
318  {
319  delete rhs_glob;
320  rhs_glob = new double[nrhs * id->lrhs];
321  id->rhs = rhs_glob;
322  }
323 #endif
324  }
325  id->nrhs = nrhs;
326 }
327 
328 void MUMPSSolver::Mult(const Vector &x, Vector &y) const
329 {
331  Array<Vector *> Y(1);
332  X[0] = &x;
333  Y[0] = &y;
334  ArrayMult(X, Y);
335 }
336 
338  Array<Vector *> &Y) const
339 {
340  MFEM_ASSERT(X.Size() == Y.Size(),
341  "Number of columns mismatch in MUMPSSolver::Mult!");
342  InitRhsSol(X.Size());
343 #if MFEM_MUMPS_VERSION >= 530
344  if (id->nrhs == 1)
345  {
346  MFEM_ASSERT(X.Size() == 1 && X[0], "Missing Vector in MUMPSSolver::Mult!");
347  X[0]->HostRead();
348  id->rhs_loc = X[0]->GetData();
349  }
350  else
351  {
352  for (int i = 0; i < id->nrhs; i++)
353  {
354  MFEM_ASSERT(X[i], "Missing Vector in MUMPSSolver::Mult!");
355  X[i]->HostRead();
356  std::copy(X[i]->GetData(), X[i]->GetData() + X[i]->Size(),
357  id->rhs_loc + i * id->lrhs_loc);
358  }
359  }
360 
361  // MUMPS solve
362  id->job = 3;
363  dmumps_c(id);
364 
365  RedistributeSol(id->isol_loc, id->sol_loc, id->lsol_loc, Y);
366 #else
367  for (int i = 0; i < id->nrhs; i++)
368  {
369  MFEM_ASSERT(X[i], "Missing Vector in MUMPSSolver::Mult!");
370  X[i]->HostRead();
371  MPI_Gatherv(X[i]->GetData(), X[i]->Size(), MPI_DOUBLE,
372  id->rhs + i * id->lrhs, recv_counts, displs, MPI_DOUBLE, 0, comm);
373  }
374 
375  // MUMPS solve
376  id->job = 3;
377  dmumps_c(id);
378 
379  for (int i = 0; i < id->nrhs; i++)
380  {
381  MFEM_ASSERT(Y[i], "Missing Vector in MUMPSSolver::Mult!");
382  Y[i]->HostWrite();
383  MPI_Scatterv(id->rhs + i * id->lrhs, recv_counts, displs, MPI_DOUBLE,
384  Y[i]->GetData(), Y[i]->Size(), MPI_DOUBLE, 0, comm);
385  }
386 #endif
387 }
388 
389 void MUMPSSolver::MultTranspose(const Vector &x, Vector &y) const
390 {
391  // Set flag for transpose solve
392  id->MUMPS_ICNTL(9) = 0;
393  Mult(x, y);
394 
395  // Reset the flag
396  id->MUMPS_ICNTL(9) = 1;
397 }
398 
400  Array<Vector *> &Y) const
401 {
402  // Set flag for transpose solve
403  id->MUMPS_ICNTL(9) = 0;
404  ArrayMult(X, Y);
405 
406  // Reset the flag
407  id->MUMPS_ICNTL(9) = 1;
408 }
409 
410 void MUMPSSolver::SetPrintLevel(int print_lvl)
411 {
412  print_level = print_lvl;
413 }
414 
416 {
417  mat_type = mtype;
418 }
419 
421 {
422  reorder_method = method;
423 }
424 
426 {
427  reorder_reuse = reuse;
428 }
429 
430 #if MFEM_MUMPS_VERSION >= 510
431 void MUMPSSolver::SetBLRTol(double tol)
432 {
433  blr_tol = tol;
434 }
435 #endif
436 
437 void MUMPSSolver::SetParameters()
438 {
439  // Output stream for error messages
440  id->MUMPS_ICNTL(1) = 6;
441  // Output stream for diagnosting printing local to each proc
442  id->MUMPS_ICNTL(2) = 0;
443  // Output stream for global info
444  id->MUMPS_ICNTL(3) = 6;
445  // Level of error printing
446  id->MUMPS_ICNTL(4) = print_level;
447  // Input matrix format (assembled)
448  id->MUMPS_ICNTL(5) = 0;
449  // Use A or A^T
450  id->MUMPS_ICNTL(9) = 1;
451  // Iterative refinement (disabled)
452  id->MUMPS_ICNTL(10) = 0;
453  // Error analysis-statistics (disabled)
454  id->MUMPS_ICNTL(11) = 0;
455  // Use of ScaLAPACK (Parallel factorization on root)
456  id->MUMPS_ICNTL(13) = 0;
457  // Percentage increase of estimated workspace (default = 20%)
458  id->MUMPS_ICNTL(14) = 20;
459  // Number of OpenMP threads (default)
460  id->MUMPS_ICNTL(16) = 0;
461  // Matrix input format (distributed)
462  id->MUMPS_ICNTL(18) = 3;
463  // Schur complement (no Schur complement matrix returned)
464  id->MUMPS_ICNTL(19) = 0;
465 #if MFEM_MUMPS_VERSION >= 530
466  // Distributed RHS
467  id->MUMPS_ICNTL(20) = 10;
468  // Distributed Sol
469  id->MUMPS_ICNTL(21) = 1;
470 #else
471  // Centralized RHS
472  id->MUMPS_ICNTL(20) = 0;
473  // Centralized Sol
474  id->MUMPS_ICNTL(21) = 0;
475 #endif
476  // Out of core factorization and solve (disabled)
477  id->MUMPS_ICNTL(22) = 0;
478  // Max size of working memory (default = based on estimates)
479  id->MUMPS_ICNTL(23) = 0;
480  // Configure reordering
481  switch (reorder_method)
482  {
483  case ReorderingStrategy::AUTOMATIC:
484  id->MUMPS_ICNTL(28) = 0;
485  id->MUMPS_ICNTL(7) = 7;
486  id->MUMPS_ICNTL(29) = 0;
487  break;
488  case ReorderingStrategy::AMD:
489  id->MUMPS_ICNTL(28) = 1;
490  id->MUMPS_ICNTL(7) = 0;
491  break;
492  case ReorderingStrategy::AMF:
493  id->MUMPS_ICNTL(28) = 1;
494  id->MUMPS_ICNTL(7) = 2;
495  break;
496  case ReorderingStrategy::PORD:
497  id->MUMPS_ICNTL(28) = 1;
498  id->MUMPS_ICNTL(7) = 4;
499  break;
500  case ReorderingStrategy::METIS:
501  id->MUMPS_ICNTL(28) = 1;
502  id->MUMPS_ICNTL(7) = 5;
503  break;
505  id->MUMPS_ICNTL(28) = 2;
506  id->MUMPS_ICNTL(29) = 2;
507  break;
508  case ReorderingStrategy::SCOTCH:
509  id->MUMPS_ICNTL(28) = 1;
510  id->MUMPS_ICNTL(7) = 3;
511  break;
512  case ReorderingStrategy::PTSCOTCH:
513  id->MUMPS_ICNTL(28) = 2;
514  id->MUMPS_ICNTL(29) = 1;
515  break;
516  default:
517  break; // This should be unreachable
518  }
519  // Option to activate BLR factorization
520 #if MFEM_MUMPS_VERSION >= 510
521  if (blr_tol > 0.0)
522  {
523  id->MUMPS_ICNTL(35) = 1;
524  id->MUMPS_CNTL(7) = blr_tol;
525  }
526 #endif
527 }
528 
529 #if MFEM_MUMPS_VERSION >= 530
530 int MUMPSSolver::GetRowRank(int i, const Array<int> &row_starts_) const
531 {
532  if (row_starts_.Size() == 1)
533  {
534  return 0;
535  }
536  auto up = std::upper_bound(row_starts_.begin(), row_starts_.end(), i);
537  return std::distance(row_starts_.begin(), up) - 1;
538 }
539 
540 void MUMPSSolver::RedistributeSol(const int *rmap, const double *x,
541  const int lx_loc, Array<Vector *> &Y) const
542 {
543  int *send_count = new int[numProcs]();
544  for (int i = 0; i < lx_loc; i++)
545  {
546  int j = rmap[i] - 1;
547  int row_rank = GetRowRank(j, row_starts);
548  if (myid == row_rank) { continue; }
549  send_count[row_rank]++;
550  }
551 
552  int *recv_count = new int[numProcs];
553  MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm);
554 
555  int *send_displ = new int[numProcs]; send_displ[0] = 0;
556  int *recv_displ = new int[numProcs]; recv_displ[0] = 0;
557  int sbuff_size = send_count[numProcs-1];
558  int rbuff_size = recv_count[numProcs-1];
559  for (int k = 0; k < numProcs - 1; k++)
560  {
561  send_displ[k + 1] = send_displ[k] + send_count[k];
562  recv_displ[k + 1] = recv_displ[k] + recv_count[k];
563  sbuff_size += send_count[k];
564  rbuff_size += recv_count[k];
565  }
566 
567  int *sendbuf_index = new int[sbuff_size];
568  double *sendbuf_values = new double[sbuff_size];
569  int *recvbuf_index = new int[rbuff_size];
570  double *recvbuf_values = new double[rbuff_size];
571  int *soffs = new int[numProcs]();
572 
573  for (int i = 0; i < lx_loc; i++)
574  {
575  int j = rmap[i] - 1;
576  int row_rank = GetRowRank(j, row_starts);
577  if (myid != row_rank)
578  {
579  int k = send_displ[row_rank] + soffs[row_rank];
580  sendbuf_index[k] = j;
581  soffs[row_rank]++;
582  }
583  }
584 
585  MPI_Alltoallv(sendbuf_index, send_count, send_displ, MPI_INT,
586  recvbuf_index, recv_count, recv_displ, MPI_INT, comm);
587 
588  for (int rhs = 0; rhs < Y.Size(); rhs++)
589  {
590  MFEM_ASSERT(Y[rhs], "Missing Vector in MUMPSSolver::Mult!");
591  Y[rhs]->HostWrite();
592 
593  std::fill(soffs, soffs + numProcs, 0);
594  for (int i = 0; i < lx_loc; i++)
595  {
596  int j = rmap[i] - 1;
597  int row_rank = GetRowRank(j, row_starts);
598  if (myid == row_rank)
599  {
600  int local_index = j - row_start;
601  (*Y[rhs])(local_index) = x[rhs * lx_loc + i];
602  }
603  else
604  {
605  int k = send_displ[row_rank] + soffs[row_rank];
606  sendbuf_values[k] = x[rhs * lx_loc + i];
607  soffs[row_rank]++;
608  }
609  }
610 
611  MPI_Alltoallv(sendbuf_values, send_count, send_displ, MPI_DOUBLE,
612  recvbuf_values, recv_count, recv_displ, MPI_DOUBLE, comm);
613 
614  // Unpack recv buffer
615  for (int i = 0; i < rbuff_size; i++)
616  {
617  int local_index = recvbuf_index[i] - row_start;
618  (*Y[rhs])(local_index) = recvbuf_values[i];
619  }
620  }
621 
622  delete [] recvbuf_values;
623  delete [] recvbuf_index;
624  delete [] soffs;
625  delete [] sendbuf_values;
626  delete [] sendbuf_index;
627  delete [] recv_displ;
628  delete [] send_displ;
629  delete [] recv_count;
630  delete [] send_count;
631 }
632 #endif
633 
634 } // namespace mfem
635 
636 #endif // MFEM_USE_MPI
637 #endif // MFEM_USE_MUMPS
void ArrayMultTranspose(const Array< const Vector *> &X, Array< Vector *> &Y) const
Action of the transpose operator on a matrix: Y=A^t(X).
Definition: mumps.cpp:399
void SetOperator(const Operator &op)
Set the Operator and perform factorization.
Definition: mumps.cpp:98
void SetPrintLevel(int print_lvl)
Set the error print level for MUMPS.
Definition: mumps.cpp:410
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
Definition: operator.hpp:72
T * GetData()
Returns the data.
Definition: array.hpp:115
MUMPSSolver(MPI_Comm comm_)
Constructor with MPI_Comm parameter.
Definition: mumps.cpp:40
void SetMatrixSymType(MatType mtype)
Set the matrix type.
Definition: mumps.cpp:415
int to_int(const std::string &str)
Convert a string to an int.
Definition: text.hpp:62
void Mult(const Vector &x, Vector &y) const
Solve y = Op^{-1} x.
Definition: mumps.cpp:328
void ArrayMult(const Array< const Vector *> &X, Array< Vector *> &Y) const
Operator application on a matrix: Y=A(X).
Definition: mumps.cpp:337
void SetReorderingStrategy(ReorderingStrategy method)
Set the reordering strategy.
Definition: mumps.cpp:420
const T * HostRead() const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), false).
Definition: array.hpp:311
void SetBLRTol(double tol)
Set the tolerance for activating block low-rank (BLR) approximate factorization.
Definition: mumps.cpp:431
void SetReorderingReuse(bool reuse)
Set the flag controlling reuse of the symbolic factorization for multiple operators.
Definition: mumps.cpp:425
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:687
HYPRE_Int HYPRE_BigInt
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition: operator.hpp:66
A class to initialize the size of a Tensor.
Definition: dtensor.hpp:54
int height
Dimension of the output / number of rows in the matrix.
Definition: operator.hpp:27
T * HostWrite()
Shortcut for mfem::Write(a.GetMemory(), a.Size(), false).
Definition: array.hpp:319
void MultTranspose(const Vector &x, Vector &y) const
Transpose Solve y = Op^{-T} x.
Definition: mumps.cpp:389
int Size() const
Return the logical size of the array.
Definition: array.hpp:141
Vector data type.
Definition: vector.hpp:58
RefCoord s[3]
Abstract operator.
Definition: operator.hpp:24
Wrapper for hypre&#39;s ParCSR matrix class.
Definition: hypre.hpp:343
int width
Dimension of the input / number of columns in the matrix.
Definition: operator.hpp:28