MFEM  v4.1.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
communication.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_COMMUNICATION
13 #define MFEM_COMMUNICATION
14 
15 #include "../config/config.hpp"
16 
17 #ifdef MFEM_USE_MPI
18 
19 #include "array.hpp"
20 #include "table.hpp"
21 #include "sets.hpp"
22 #include "globals.hpp"
23 #include <mpi.h>
24 
25 
26 namespace mfem
27 {
28 
29 /** @brief A simple convenience class that calls MPI_Init() at construction and
30  MPI_Finalize() at destruction. It also provides easy access to
31  MPI_COMM_WORLD's rank and size. */
33 {
34 protected:
36  void GetRankAndSize();
37 public:
38  MPI_Session() { MPI_Init(NULL, NULL); GetRankAndSize(); }
39  MPI_Session(int &argc, char **&argv)
40  { MPI_Init(&argc, &argv); GetRankAndSize(); }
41  ~MPI_Session() { MPI_Finalize(); }
42  /// Return MPI_COMM_WORLD's rank.
43  int WorldRank() const { return world_rank; }
44  /// Return MPI_COMM_WORLD's size.
45  int WorldSize() const { return world_size; }
46  /// Return true if WorldRank() == 0.
47  bool Root() const { return world_rank == 0; }
48 };
49 
51 {
52 private:
53  MPI_Comm MyComm;
54 
55  /* The shared entities (e.g. vertices, faces and edges) are split into
56  groups, each group determined by the set of participating processors.
57  They are numbered locally in lproc. Assumptions:
58  - group 0 is the 'local' group
59  - groupmaster_lproc[0] = 0
60  - lproc_proc[0] = MyRank */
61 
62  // Neighbor ids (lproc) in each group.
63  Table group_lproc;
64  // Master neighbor id for each group.
65  Array<int> groupmaster_lproc;
66  // MPI rank of each neighbor.
67  Array<int> lproc_proc;
68  // Group --> Group number in the master.
69  Array<int> group_mgroup;
70 
71  void ProcToLProc();
72 
73 public:
74  GroupTopology() : MyComm(0) {}
75  GroupTopology(MPI_Comm comm) { MyComm = comm; }
76 
77  /// Copy constructor
78  GroupTopology(const GroupTopology &gt);
79  void SetComm(MPI_Comm comm) { MyComm = comm; }
80 
81  MPI_Comm GetComm() const { return MyComm; }
82  int MyRank() const { int r; MPI_Comm_rank(MyComm, &r); return r; }
83  int NRanks() const { int s; MPI_Comm_size(MyComm, &s); return s; }
84 
85  void Create(ListOfIntegerSets &groups, int mpitag);
86 
87  int NGroups() const { return group_lproc.Size(); }
88  // return the number of neighbors including the local processor
89  int GetNumNeighbors() const { return lproc_proc.Size(); }
90  int GetNeighborRank(int i) const { return lproc_proc[i]; }
91  // am I master for group 'g'?
92  bool IAmMaster(int g) const { return (groupmaster_lproc[g] == 0); }
93  // return the neighbor index of the group master for a given group.
94  // neighbor 0 is the local processor
95  int GetGroupMaster(int g) const { return groupmaster_lproc[g]; }
96  // return the rank of the group master for a given group
97  int GetGroupMasterRank(int g) const
98  { return lproc_proc[groupmaster_lproc[g]]; }
99  // for a given group return the group number in the master
100  int GetGroupMasterGroup(int g) const { return group_mgroup[g]; }
101  // get the number of processors in a group
102  int GetGroupSize(int g) const { return group_lproc.RowSize(g); }
103  // return a pointer to a list of neighbors for a given group.
104  // neighbor 0 is the local processor
105  const int *GetGroup(int g) const { return group_lproc.GetRow(g); }
106 
107  /// Save the data in a stream.
108  void Save(std::ostream &out) const;
109  /// Load the data from a stream.
110  void Load(std::istream &in);
111 
112  /// Copy
113  void Copy(GroupTopology & copy) const;
114 
115  virtual ~GroupTopology() {}
116 };
117 
118 /** @brief Communicator performing operations within groups defined by a
119  GroupTopology with arbitrary-size data associated with each group. */
121 {
122 public:
123  /// Communication mode.
124  enum Mode
125  {
126  byGroup, ///< Communications are performed one group at a time.
127  byNeighbor /**< Communications are performed one neighbor at a time,
128  aggregating over groups. */
129  };
130 
131 protected:
135  Table group_ltdof; // only for groups for which this processor is master.
138  MPI_Request *requests;
139  // MPI_Status *statuses;
140  // comm_lock: 0 - no lock, 1 - locked for Bcast, 2 - locked for Reduce
141  mutable int comm_lock;
142  mutable int num_requests;
144  int *buf_offsets; // size = max(number of groups, number of neighbors)
146 
147 public:
148  /// Construct a GroupCommunicator object.
149  /** The object must be initialized before it can be used to perform any
150  operations. To initialize the object, either
151  - call Create() or
152  - initialize the Table reference returned by GroupLDofTable() and then
153  call Finalize().
154  */
156 
157  /** @brief Initialize the communicator from a local-dof to group map.
158  Finalize() is called internally. */
159  void Create(const Array<int> &ldof_group);
160 
161  /** @brief Fill-in the returned Table reference to initialize the
162  GroupCommunicator then call Finalize(). */
164 
165  /// Read-only access to group-ldof Table.
166  const Table &GroupLDofTable() const { return group_ldof; }
167 
168  /// Allocate internal buffers after the GroupLDofTable is defined
169  void Finalize();
170 
171  /// Initialize the internal group_ltdof Table.
172  /** This method must be called before performing operations that use local
173  data layout 2, see CopyGroupToBuffer() for layout descriptions. */
174  void SetLTDofTable(const Array<int> &ldof_ltdof);
175 
176  /// Get a reference to the associated GroupTopology object
178 
179  /// Get a const reference to the associated GroupTopology object
180  const GroupTopology &GetGroupTopology() const { return gtopo; }
181 
182  /// Dofs to be sent to communication neighbors
183  void GetNeighborLTDofTable(Table &nbr_ltdof) const;
184 
185  /// Dofs to be received from communication neighbors
186  void GetNeighborLDofTable(Table &nbr_ldof) const;
187 
188  /** @brief Data structure on which we define reduce operations.
189 
190  The data is associated with (and the operation is performed on) one group
191  at a time. */
192  template <class T> struct OpData
193  {
194  int nldofs, nb;
195  const int *ldofs;
196  T *ldata, *buf;
197  };
198 
199  /** @brief Copy the entries corresponding to the group @a group from the
200  local array @a ldata to the buffer @a buf. */
201  /** The @a layout of the local array can be:
202  - 0 - @a ldata is an array on all ldofs: copied indices:
203  `{ J[j] : I[group] <= j < I[group+1] }` where `I,J=group_ldof.{I,J}`
204  - 1 - @a ldata is an array on the shared ldofs: copied indices:
205  `{ j : I[group] <= j < I[group+1] }` where `I,J=group_ldof.{I,J}`
206  - 2 - @a ldata is an array on the true ldofs, ltdofs: copied indices:
207  `{ J[j] : I[group] <= j < I[group+1] }` where `I,J=group_ltdof.{I,J}`.
208  @returns The pointer @a buf plus the number of elements in the group. */
209  template <class T>
210  T *CopyGroupToBuffer(const T *ldata, T *buf, int group, int layout) const;
211 
212  /** @brief Copy the entries corresponding to the group @a group from the
213  buffer @a buf to the local array @a ldata. */
214  /** For a description of @a layout, see CopyGroupToBuffer().
215  @returns The pointer @a buf plus the number of elements in the group. */
216  template <class T>
217  const T *CopyGroupFromBuffer(const T *buf, T *ldata, int group,
218  int layout) const;
219 
220  /** @brief Perform the reduction operation @a Op to the entries of group
221  @a group using the values from the buffer @a buf and the values from the
222  local array @a ldata, saving the result in the latter. */
223  /** For a description of @a layout, see CopyGroupToBuffer().
224  @returns The pointer @a buf plus the number of elements in the group. */
225  template <class T>
226  const T *ReduceGroupFromBuffer(const T *buf, T *ldata, int group,
227  int layout, void (*Op)(OpData<T>)) const;
228 
229  /// Begin a broadcast within each group where the master is the root.
230  /** For a description of @a layout, see CopyGroupToBuffer(). */
231  template <class T> void BcastBegin(T *ldata, int layout) const;
232 
233  /** @brief Finalize a broadcast started with BcastBegin().
234 
235  The output data @a layout can be:
236  - 0 - @a ldata is an array on all ldofs; the input layout should be
237  either 0 or 2
238  - 1 - @a ldata is the same array as given to BcastBegin(); the input
239  layout should be 1.
240 
241  For more details about @a layout, see CopyGroupToBuffer(). */
242  template <class T> void BcastEnd(T *ldata, int layout) const;
243 
244  /** @brief Broadcast within each group where the master is the root.
245 
246  The data @a layout can be either 0 or 1.
247 
248  For a description of @a layout, see CopyGroupToBuffer(). */
249  template <class T> void Bcast(T *ldata, int layout) const
250  {
251  BcastBegin(ldata, layout);
252  BcastEnd(ldata, layout);
253  }
254 
255  /// Broadcast within each group where the master is the root.
256  template <class T> void Bcast(T *ldata) const { Bcast<T>(ldata, 0); }
257  /// Broadcast within each group where the master is the root.
258  template <class T> void Bcast(Array<T> &ldata) const
259  { Bcast<T>((T *)ldata); }
260 
261  /** @brief Begin reduction operation within each group where the master is
262  the root. */
263  /** The input data layout is an array on all ldofs, i.e. layout 0, see
264  CopyGroupToBuffer().
265 
266  The reduce operation will be specified when calling ReduceEnd(). This
267  method is instantiated for int and double. */
268  template <class T> void ReduceBegin(const T *ldata) const;
269 
270  /** @brief Finalize reduction operation started with ReduceBegin().
271 
272  The output data @a layout can be either 0 or 2, see CopyGroupToBuffer().
273 
274  The reduce operation is given by the third argument (see below for list
275  of the supported operations.) This method is instantiated for int and
276  double.
277 
278  @note If the output data layout is 2, then the data from the @a ldata
279  array passed to this call is used in the reduction operation, instead of
280  the data from the @a ldata array passed to ReduceBegin(). Therefore, the
281  data for master-groups has to be identical in both arrays.
282  */
283  template <class T> void ReduceEnd(T *ldata, int layout,
284  void (*Op)(OpData<T>)) const;
285 
286  /** @brief Reduce within each group where the master is the root.
287 
288  The reduce operation is given by the second argument (see below for list
289  of the supported operations.) */
290  template <class T> void Reduce(T *ldata, void (*Op)(OpData<T>)) const
291  {
292  ReduceBegin(ldata);
293  ReduceEnd(ldata, 0, Op);
294  }
295 
296  /// Reduce within each group where the master is the root.
297  template <class T> void Reduce(Array<T> &ldata, void (*Op)(OpData<T>)) const
298  { Reduce<T>((T *)ldata, Op); }
299 
300  /// Reduce operation Sum, instantiated for int and double
301  template <class T> static void Sum(OpData<T>);
302  /// Reduce operation Min, instantiated for int and double
303  template <class T> static void Min(OpData<T>);
304  /// Reduce operation Max, instantiated for int and double
305  template <class T> static void Max(OpData<T>);
306  /// Reduce operation bitwise OR, instantiated for int only
307  template <class T> static void BitOR(OpData<T>);
308 
309  /// Print information about the GroupCommunicator from all MPI ranks.
310  void PrintInfo(std::ostream &out = mfem::out) const;
311 
312  /** @brief Destroy a GroupCommunicator object, deallocating internal data
313  structures and buffers. */
315 };
316 
317 
318 /// \brief Variable-length MPI message containing unspecific binary data.
319 template<int Tag>
321 {
322  std::string data;
323  MPI_Request send_request;
324 
325  /** Non-blocking send to processor 'rank'. Returns immediately. Completion
326  (as tested by MPI_Wait/Test) does not mean the message was received --
327  it may be on its way or just buffered locally. */
328  void Isend(int rank, MPI_Comm comm)
329  {
330  Encode(rank);
331  MPI_Isend((void*) data.data(), data.length(), MPI_BYTE, rank, Tag, comm,
332  &send_request);
333  }
334 
335  /** Non-blocking synchronous send to processor 'rank'. Returns immediately.
336  Completion (MPI_Wait/Test) means that the message was received. */
337  void Issend(int rank, MPI_Comm comm)
338  {
339  Encode(rank);
340  MPI_Issend((void*) data.data(), data.length(), MPI_BYTE, rank, Tag, comm,
341  &send_request);
342  }
343 
344  /// Helper to send all messages in a rank-to-message map container.
345  template<typename MapT>
346  static void IsendAll(MapT& rank_msg, MPI_Comm comm)
347  {
348  for (auto it = rank_msg.begin(); it != rank_msg.end(); ++it)
349  {
350  it->second.Isend(it->first, comm);
351  }
352  }
353 
354  /// Helper to wait for all messages in a map container to be sent.
355  template<typename MapT>
356  static void WaitAllSent(MapT& rank_msg)
357  {
358  for (auto it = rank_msg.begin(); it != rank_msg.end(); ++it)
359  {
360  MPI_Wait(&it->second.send_request, MPI_STATUS_IGNORE);
361  it->second.Clear();
362  }
363  }
364 
365  /** Return true if all messages in the map container were sent, otherwise
366  return false, without waiting. */
367  template<typename MapT>
368  static bool TestAllSent(MapT& rank_msg)
369  {
370  for (auto it = rank_msg.begin(); it != rank_msg.end(); ++it)
371  {
372  VarMessage &msg = it->second;
373  if (msg.send_request != MPI_REQUEST_NULL)
374  {
375  int sent;
376  MPI_Test(&msg.send_request, &sent, MPI_STATUS_IGNORE);
377  if (!sent) { return false; }
378  msg.Clear();
379  }
380  }
381  return true;
382  }
383 
384  /** Blocking probe for incoming message of this type from any rank.
385  Returns the rank and message size. */
386  static void Probe(int &rank, int &size, MPI_Comm comm)
387  {
388  MPI_Status status;
389  MPI_Probe(MPI_ANY_SOURCE, Tag, comm, &status);
390  rank = status.MPI_SOURCE;
391  MPI_Get_count(&status, MPI_BYTE, &size);
392  }
393 
394  /** Non-blocking probe for incoming message of this type from any rank.
395  If there is an incoming message, returns true and sets 'rank' and 'size'.
396  Otherwise returns false. */
397  static bool IProbe(int &rank, int &size, MPI_Comm comm)
398  {
399  int flag;
400  MPI_Status status;
401  MPI_Iprobe(MPI_ANY_SOURCE, Tag, comm, &flag, &status);
402  if (!flag) { return false; }
403 
404  rank = status.MPI_SOURCE;
405  MPI_Get_count(&status, MPI_BYTE, &size);
406  return true;
407  }
408 
409  /// Post-probe receive from processor 'rank' of message size 'size'.
410  void Recv(int rank, int size, MPI_Comm comm)
411  {
412  MFEM_ASSERT(size >= 0, "");
413  data.resize(size);
414  MPI_Status status;
415  MPI_Recv((void*) data.data(), size, MPI_BYTE, rank, Tag, comm, &status);
416 #ifdef MFEM_DEBUG
417  int count;
418  MPI_Get_count(&status, MPI_BYTE, &count);
419  MFEM_VERIFY(count == size, "");
420 #endif
421  Decode(rank);
422  }
423 
424  /// Like Recv(), but throw away the messsage.
425  void RecvDrop(int rank, int size, MPI_Comm comm)
426  {
427  data.resize(size);
428  MPI_Status status;
429  MPI_Recv((void*) data.data(), size, MPI_BYTE, rank, Tag, comm, &status);
430  data.resize(0); // don't decode
431  }
432 
433  /// Helper to receive all messages in a rank-to-message map container.
434  template<typename MapT>
435  static void RecvAll(MapT& rank_msg, MPI_Comm comm)
436  {
437  int recv_left = rank_msg.size();
438  while (recv_left > 0)
439  {
440  int rank, size;
441  Probe(rank, size, comm);
442  MFEM_ASSERT(rank_msg.find(rank) != rank_msg.end(), "Unexpected message"
443  " (tag " << Tag << ") from rank " << rank);
444  // NOTE: no guard against receiving two messages from the same rank
445  rank_msg[rank].Recv(rank, size, comm);
446  --recv_left;
447  }
448  }
449 
450  VarMessage() : send_request(MPI_REQUEST_NULL) {}
451  void Clear() { data.clear(); send_request = MPI_REQUEST_NULL; }
452 
453  virtual ~VarMessage()
454  {
455  MFEM_ASSERT(send_request == MPI_REQUEST_NULL,
456  "WaitAllSent was not called after Isend");
457  }
458 
459  VarMessage(const VarMessage &other)
460  : data(other.data), send_request(other.send_request)
461  {
462  MFEM_ASSERT(send_request == MPI_REQUEST_NULL,
463  "Cannot copy message with a pending send.");
464  }
465 
466 protected:
467  virtual void Encode(int rank) {}
468  virtual void Decode(int rank) {}
469 };
470 
471 
472 /// Helper struct to convert a C++ type to an MPI type
473 template <typename Type> struct MPITypeMap;
474 
475 // Specializations of MPITypeMap; mpi_type initialized in communication.cpp:
476 template<> struct MPITypeMap<int> { static const MPI_Datatype mpi_type; };
477 template<> struct MPITypeMap<double> { static const MPI_Datatype mpi_type; };
478 
479 
480 /** Reorder MPI ranks to follow the Z-curve within the physical machine topology
481  (provided that functions to query physical node coordinates are available).
482  Returns a new communicator with reordered ranks. */
483 MPI_Comm ReorderRanksZCurve(MPI_Comm comm);
484 
485 
486 } // namespace mfem
487 
488 #endif
489 
490 #endif
int WorldSize() const
Return MPI_COMM_WORLD&#39;s size.
int GetGroupMasterRank(int g) const
void Create(ListOfIntegerSets &groups, int mpitag)
void ReduceEnd(T *ldata, int layout, void(*Op)(OpData< T >)) const
Finalize reduction operation started with ReduceBegin().
int Size() const
Logical size of the array.
Definition: array.hpp:124
void Reduce(T *ldata, void(*Op)(OpData< T >)) const
Reduce within each group where the master is the root.
void GetNeighborLDofTable(Table &nbr_ldof) const
Dofs to be received from communication neighbors.
int GetGroupMasterGroup(int g) const
void Bcast(T *ldata, int layout) const
Broadcast within each group where the master is the root.
Helper struct to convert a C++ type to an MPI type.
void GetNeighborLTDofTable(Table &nbr_ltdof) const
Dofs to be sent to communication neighbors.
MPI_Comm ReorderRanksZCurve(MPI_Comm comm)
GroupTopology(MPI_Comm comm)
void Create(const Array< int > &ldof_group)
Initialize the communicator from a local-dof to group map. Finalize() is called internally.
int GetGroupSize(int g) const
static const MPI_Datatype mpi_type
const int * GetGroup(int g) const
const Table & GroupLDofTable() const
Read-only access to group-ldof Table.
void GetRow(int i, Array< int > &row) const
Return row i in array row (the Table must be finalized)
Definition: table.cpp:191
void RecvDrop(int rank, int size, MPI_Comm comm)
Like Recv(), but throw away the messsage.
const T * CopyGroupFromBuffer(const T *buf, T *ldata, int group, int layout) const
Copy the entries corresponding to the group group from the buffer buf to the local array ldata...
void Recv(int rank, int size, MPI_Comm comm)
Post-probe receive from processor &#39;rank&#39; of message size &#39;size&#39;.
void BcastEnd(T *ldata, int layout) const
Finalize a broadcast started with BcastBegin().
bool IAmMaster(int g) const
VarMessage(const VarMessage &other)
void Bcast(T *ldata) const
Broadcast within each group where the master is the root.
A simple convenience class that calls MPI_Init() at construction and MPI_Finalize() at destruction...
Communicator performing operations within groups defined by a GroupTopology with arbitrary-size data ...
static void Sum(OpData< T >)
Reduce operation Sum, instantiated for int and double.
int GetNumNeighbors() const
void Finalize()
Allocate internal buffers after the GroupLDofTable is defined.
MPI_Comm GetComm() const
bool Root() const
Return true if WorldRank() == 0.
T * CopyGroupToBuffer(const T *ldata, T *buf, int group, int layout) const
Copy the entries corresponding to the group group from the local array ldata to the buffer buf...
MPI_Request send_request
virtual void Encode(int rank)
static void Min(OpData< T >)
Reduce operation Min, instantiated for int and double.
static void IsendAll(MapT &rank_msg, MPI_Comm comm)
Helper to send all messages in a rank-to-message map container.
void Reduce(Array< T > &ldata, void(*Op)(OpData< T >)) const
Reduce within each group where the master is the root.
int Size() const
Returns the number of TYPE I elements.
Definition: table.hpp:92
GroupCommunicator(GroupTopology &gt, Mode m=byNeighbor)
Construct a GroupCommunicator object.
GroupTopology & GetGroupTopology()
Get a reference to the associated GroupTopology object.
static bool TestAllSent(MapT &rank_msg)
int GetGroupMaster(int g) const
void Copy(GroupTopology &copy) const
Copy.
void Save(std::ostream &out) const
Save the data in a stream.
int WorldRank() const
Return MPI_COMM_WORLD&#39;s rank.
const T * ReduceGroupFromBuffer(const T *buf, T *ldata, int group, int layout, void(*Op)(OpData< T >)) const
Perform the reduction operation Op to the entries of group group using the values from the buffer buf...
virtual void Decode(int rank)
int GetNeighborRank(int i) const
static const MPI_Datatype mpi_type
static void Max(OpData< T >)
Reduce operation Max, instantiated for int and double.
Communications are performed one group at a time.
static void RecvAll(MapT &rank_msg, MPI_Comm comm)
Helper to receive all messages in a rank-to-message map container.
void SetLTDofTable(const Array< int > &ldof_ltdof)
Initialize the internal group_ltdof Table.
Table & GroupLDofTable()
Fill-in the returned Table reference to initialize the GroupCommunicator then call Finalize()...
void BcastBegin(T *ldata, int layout) const
Begin a broadcast within each group where the master is the root.
Data structure on which we define reduce operations.
void Issend(int rank, MPI_Comm comm)
~GroupCommunicator()
Destroy a GroupCommunicator object, deallocating internal data structures and buffers.
Mode
Communication mode.
MPI_Session(int &argc, char **&argv)
void Isend(int rank, MPI_Comm comm)
void Bcast(Array< T > &ldata) const
Broadcast within each group where the master is the root.
int NGroups() const
void ReduceBegin(const T *ldata) const
Begin reduction operation within each group where the master is the root.
void SetComm(MPI_Comm comm)
static void WaitAllSent(MapT &rank_msg)
Helper to wait for all messages in a map container to be sent.
int RowSize(int i) const
Definition: table.hpp:108
static bool IProbe(int &rank, int &size, MPI_Comm comm)
List of integer sets.
Definition: sets.hpp:54
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
void PrintInfo(std::ostream &out=mfem::out) const
Print information about the GroupCommunicator from all MPI ranks.
Variable-length MPI message containing unspecific binary data.
void Load(std::istream &in)
Load the data from a stream.
const GroupTopology & GetGroupTopology() const
Get a const reference to the associated GroupTopology object.
static void Probe(int &rank, int &size, MPI_Comm comm)
static void BitOR(OpData< T >)
Reduce operation bitwise OR, instantiated for int only.