1 // MFEM Example 1 - Parallel Version
2 // AmgX Modification
3 //
4 // Compile with: make ex1p
5 //
6 // AmgX sample runs:
7 // mpirun -np 4 ex1p
8 // mpirun -np 4 ex1p -d cuda
9 // mpirun -np 10 ex1p --amgx-file amg_pcg.json --amgx-mpi-teams
10 // mpirun -np 4 ex1p --amgx-file amg_pcg.json
11 //
12 // Description: This example code demonstrates the use of MFEM to define a
13 // simple finite element discretization of the Laplace problem
14 // -Delta u = 1 with homogeneous Dirichlet boundary conditions.
15 // Specifically, we discretize using a FE space of the specified
16 // order, or if order < 1 using an isoparametric/isogeometric
18 // NURBS mesh, etc.)
19 //
20 // The example highlights the use of mesh refinement, finite
21 // element grid functions, as well as linear and bilinear forms
22 // corresponding to the left-hand side and right-hand side of the
23 // discrete linear system. We also cover the explicit elimination
24 // of essential boundary conditions, static condensation, and the
25 // optional connection to the GLVis tool for visualization.
26
27 #include "mfem.hpp"
28 #include <fstream>
29 #include <iostream>
30
31 using namespace std;
32 using namespace mfem;
33
34 #ifndef MFEM_USE_AMGX
35 #error This example requires that MFEM is built with MFEM_USE_AMGX=YES
36 #endif
37
38 int main(int argc, char *argv[])
39 {
40  // 1. Initialize MPI.
41  int num_procs, myid;
42  MPI_Init(&argc, &argv);
43  MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
44  MPI_Comm_rank(MPI_COMM_WORLD, &myid);
45
46  // 2. Parse command-line options.
47  const char *mesh_file = "../../data/star.mesh";
48  int order = 1;
49  bool static_cond = false;
50  bool pa = false;
51  const char *device_config = "cpu";
52  bool visualization = true;
53  bool amgx_lib = true;
54  bool amgx_mpi_teams = false;
55  const char* amgx_json_file = ""; // JSON file for AmgX
56  int ndevices = 1;
57
58  OptionsParser args(argc, argv);
60  "Mesh file to use.");
62  "Finite element order (polynomial degree) or -1 for"
63  " isoparametric space.");
65  "--no-static-condensation", "Enable static condensation.");
67  "--no-partial-assembly", "Enable Partial Assembly.");
69  "--no-amgx-lib", "Use AmgX in example.");
71  "AMGX solver config file (overrides --amgx-solver, --amgx-verbose)");
73  "--amgx-mpi-gpu-exclusive", "--amgx-mpi-gpu-exclusive",
74  "Create MPI teams when using AmgX to load balance between ranks and GPUs.");
76  "Device configuration string, see Device::Configure().");
78  "--no-visualization",
79  "Enable or disable GLVis visualization.");
81  "Number of GPU devices per node (Only used if amgx_mpi_teams is true).");
82
83  args.Parse();
84  if (!args.Good())
85  {
86  if (myid == 0)
87  {
88  args.PrintUsage(cout);
89  }
90  MPI_Finalize();
91  return 1;
92  }
93  if (myid == 0)
94  {
95  args.PrintOptions(cout);
96  }
97
98  // 3. Enable hardware devices such as GPUs, and programming models such as
99  // CUDA, OCCA, RAJA and OpenMP based on command line options.
100  Device device(device_config);
101  if (myid == 0) { device.Print(); }
102
103  // 4. Read the (serial) mesh from the given mesh file on all processors. We
104  // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface
105  // and volume meshes with the same code.
106  Mesh mesh(mesh_file, 1, 1);
107  int dim = mesh.Dimension();
108
109  // 5. Refine the serial mesh on all processors to increase the resolution. In
110  // this example we do 'ref_levels' of uniform refinement. We choose
111  // 'ref_levels' to be the largest number that gives a final mesh with no
112  // more than 10,000 elements.
113  {
114  int ref_levels =
115  (int)floor(log(10000./mesh.GetNE())/log(2.)/dim);
116  for (int l = 0; l < ref_levels; l++)
117  {
118  mesh.UniformRefinement();
119  }
120  }
121
122  // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine
123  // this mesh further in parallel to increase the resolution. Once the
124  // parallel mesh is defined, the serial mesh can be deleted.
125  ParMesh pmesh(MPI_COMM_WORLD, mesh);
126  mesh.Clear();
127  {
128  int par_ref_levels = 2;
129  for (int l = 0; l < par_ref_levels; l++)
130  {
131  pmesh.UniformRefinement();
132  }
133  }
134
135  // 7. Define a parallel finite element space on the parallel mesh. Here we
136  // use continuous Lagrange finite elements of the specified order. If
137  // order < 1, we instead use an isoparametric/isogeometric space.
139  bool delete_fec;
140  if (order > 0)
141  {
142  fec = new H1_FECollection(order, dim);
143  delete_fec = true;
144  }
145  else if (pmesh.GetNodes())
146  {
147  fec = pmesh.GetNodes()->OwnFEC();
148  delete_fec = false;
149  if (myid == 0)
150  {
151  cout << "Using isoparametric FEs: " << fec->Name() << endl;
152  }
153  }
154  else
155  {
156  fec = new H1_FECollection(order = 1, dim);
157  delete_fec = true;
158  }
159  ParFiniteElementSpace fespace(&pmesh, fec);
160  HYPRE_BigInt size = fespace.GlobalTrueVSize();
161  if (myid == 0)
162  {
163  cout << "Number of finite element unknowns: " << size << endl;
164  }
165
166  // 8. Determine the list of true (i.e. parallel conforming) essential
167  // boundary dofs. In this example, the boundary conditions are defined
168  // by marking all the boundary attributes from the mesh as essential
169  // (Dirichlet) and converting them to a list of true dofs.
170  Array<int> ess_tdof_list;
171  if (pmesh.bdr_attributes.Size())
172  {
173  Array<int> ess_bdr(pmesh.bdr_attributes.Max());
174  ess_bdr = 1;
175  fespace.GetEssentialTrueDofs(ess_bdr, ess_tdof_list);
176  }
177
178  // 9. Set up the parallel linear form b(.) which corresponds to the
179  // right-hand side of the FEM linear system, which in this case is
180  // (1,phi_i) where phi_i are the basis functions in fespace.
181  ParLinearForm b(&fespace);
182  ConstantCoefficient one(1.0);
184  b.Assemble();
185
186  // 10. Define the solution vector x as a parallel finite element grid function
187  // corresponding to fespace. Initialize x with initial guess of zero,
188  // which satisfies the boundary conditions.
189  ParGridFunction x(&fespace);
190  x = 0.0;
191
192  // 11. Set up the parallel bilinear form a(.,.) on the finite element space
193  // corresponding to the Laplacian operator -Delta, by adding the Diffusion
194  // domain integrator.
195  ParBilinearForm a(&fespace);
196  if (pa) { a.SetAssemblyLevel(AssemblyLevel::PARTIAL); }
198
199  // 12. Assemble the parallel bilinear form and the corresponding linear
200  // system, applying any necessary transformations such as: parallel
201  // assembly, eliminating boundary conditions, applying conforming
202  // constraints for non-conforming AMR, static condensation, etc.
203  if (static_cond) { a.EnableStaticCondensation(); }
204  a.Assemble();
205
206  OperatorPtr A;
207  Vector B, X;
208  a.FormLinearSystem(ess_tdof_list, x, b, A, X, B);
209
210  // 13. Solve the linear system A X = B.
211  // * With full assembly, use the BoomerAMG preconditioner from hypre.
212  // * If AmgX is available solve using amg preconditioner.
213  // * With partial assembly, use Jacobi smoothing, for now.
214  Solver *prec = NULL;
215  if (pa)
216  {
217  if (UsesTensorBasis(fespace))
218  {
219  prec = new OperatorJacobiSmoother(a, ess_tdof_list);
220  }
221
222  CGSolver cg(MPI_COMM_WORLD);
223  cg.SetRelTol(1e-12);
224  cg.SetMaxIter(2000);
225  cg.SetPrintLevel(1);
226  if (prec) { cg.SetPreconditioner(*prec); }
227  cg.SetOperator(*A);
228  cg.Mult(B, X);
229  delete prec;
230  }
231  else if (amgx_lib && strcmp(amgx_json_file,"") == 0)
232  {
233  MFEM_VERIFY(!amgx_mpi_teams,
234  "Please add JSON file to try AmgX with MPI teams mode");
235
236  bool amgx_verbose = false;
237  prec = new AmgXSolver(MPI_COMM_WORLD, AmgXSolver::PRECONDITIONER,
238  amgx_verbose);
239
240  CGSolver cg(MPI_COMM_WORLD);
241  cg.SetRelTol(1e-12);
242  cg.SetMaxIter(2000);
243  cg.SetPrintLevel(1);
244  if (prec) { cg.SetPreconditioner(*prec); }
245  cg.SetOperator(*A);
246  cg.Mult(B, X);
247  delete prec;
248
249  }
250  else if (amgx_lib && strcmp(amgx_json_file,"") != 0)
251  {
252  AmgXSolver amgx;
254
255  if (amgx_mpi_teams)
256  {
257  // Forms MPI teams to load balance between MPI ranks and GPUs
258  amgx.InitMPITeams(MPI_COMM_WORLD, ndevices);
259  }
260  else
261  {
262  // Assumes each MPI rank is paired with a GPU
263  amgx.InitExclusiveGPU(MPI_COMM_WORLD);
264  }
265
266  amgx.SetOperator(*A.As<HypreParMatrix>());
267  amgx.SetConvergenceCheck(true);
268  amgx.Mult(B, X);
269
270  // Release MPI communicators and resources created by AmgX
271  amgx.Finalize();
272  }
273  else
274  {
275  prec = new HypreBoomerAMG;
276
277  CGSolver cg(MPI_COMM_WORLD);
278  cg.SetRelTol(1e-12);
279  cg.SetMaxIter(2000);
280  cg.SetPrintLevel(1);
281  if (prec) { cg.SetPreconditioner(*prec); }
282  cg.SetOperator(*A);
283  cg.Mult(B, X);
284  delete prec;
285  }
286
287  // 14. Recover the parallel grid function corresponding to X. This is the
288  // local finite element solution on each processor.
289  a.RecoverFEMSolution(X, b, x);
290
291  // 15. Save the refined mesh and the solution in parallel. This output can
292  // be viewed later using GLVis: "glvis -np <np> -m mesh -g sol".
293  {
294  ostringstream mesh_name, sol_name;
295  mesh_name << "mesh." << setfill('0') << setw(6) << myid;
296  sol_name << "sol." << setfill('0') << setw(6) << myid;
297
298  ofstream mesh_ofs(mesh_name.str().c_str());
299  mesh_ofs.precision(8);
300  pmesh.Print(mesh_ofs);
301
302  ofstream sol_ofs(sol_name.str().c_str());
303  sol_ofs.precision(8);
304  x.Save(sol_ofs);
305  }
306
307  // 16. Send the solution by socket to a GLVis server.
308  if (visualization)
309  {
310  char vishost[] = "localhost";
311  int visport = 19916;
312  socketstream sol_sock(vishost, visport);
313  sol_sock << "parallel " << num_procs << " " << myid << "\n";
314  sol_sock.precision(8);
315  sol_sock << "solution\n" << pmesh << x << flush;
316  }
317
318  // 17. Free the used memory.
319  if (delete_fec)
320  {
321  delete fec;
322  }
323  MPI_Finalize();
324
325  return 0;
326 }
