MFEM  v3.3.2
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
communication.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-443211. All Rights
3 // reserved. See file COPYRIGHT for details.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability see http://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the GNU Lesser General Public License (as published by the Free
10 // Software Foundation) version 2.1 dated February 1999.
11 
12 #ifndef MFEM_COMMUNICATION
13 #define MFEM_COMMUNICATION
14 
15 #include "../config/config.hpp"
16 
17 #ifdef MFEM_USE_MPI
18 
19 #include "array.hpp"
20 #include "table.hpp"
21 #include "sets.hpp"
22 #include "globals.hpp"
23 #include <mpi.h>
24 
25 
26 namespace mfem
27 {
28 
29 /** @brief A simple convenience class that calls MPI_Init() at construction and
30  MPI_Finalize() at destruction. It also provides easy access to
31  MPI_COMM_WORLD's rank and size. */
33 {
34 protected:
36  void GetRankAndSize();
37 public:
38  MPI_Session() { MPI_Init(NULL, NULL); GetRankAndSize(); }
39  MPI_Session(int &argc, char **&argv)
40  { MPI_Init(&argc, &argv); GetRankAndSize(); }
41  ~MPI_Session() { MPI_Finalize(); }
42  /// Return MPI_COMM_WORLD's rank.
43  int WorldRank() const { return world_rank; }
44  /// Return MPI_COMM_WORLD's size.
45  int WorldSize() const { return world_size; }
46  /// Return true if WorldRank() == 0.
47  bool Root() const { return world_rank == 0; }
48 };
49 
51 {
52 private:
53  MPI_Comm MyComm;
54 
55  /* The shared entities (e.g. vertices, faces and edges) are split into
56  groups, each group determined by the set of participating processors.
57  They are numbered locally in lproc. Assumptions:
58  - group 0 is the 'local' group
59  - groupmaster_lproc[0] = 0
60  - lproc_proc[0] = MyRank */
61 
62  // Neighbor ids (lproc) in each group.
63  Table group_lproc;
64  // Master neighbor id for each group.
65  Array<int> groupmaster_lproc;
66  // MPI rank of each neighbor.
67  Array<int> lproc_proc;
68  // Group --> Group number in the master.
69  Array<int> group_mgroup;
70 
71  void ProcToLProc();
72 
73 public:
74  GroupTopology() : MyComm(0) {}
75  GroupTopology(MPI_Comm comm) { MyComm = comm; }
76 
77  /// Copy constructor
78  GroupTopology(const GroupTopology &gt);
79  void SetComm(MPI_Comm comm) { MyComm = comm; }
80 
81  MPI_Comm GetComm() { return MyComm; }
82  int MyRank() { int r; MPI_Comm_rank(MyComm, &r); return r; }
83  int NRanks() { int s; MPI_Comm_size(MyComm, &s); return s; }
84 
85  void Create(ListOfIntegerSets &groups, int mpitag);
86 
87  int NGroups() const { return group_lproc.Size(); }
88  // return the number of neighbors including the local processor
89  int GetNumNeighbors() const { return lproc_proc.Size(); }
90  int GetNeighborRank(int i) const { return lproc_proc[i]; }
91  // am I master for group 'g'?
92  bool IAmMaster(int g) const { return (groupmaster_lproc[g] == 0); }
93  // return the neighbor index of the group master for a given group.
94  // neighbor 0 is the local processor
95  int GetGroupMaster(int g) const { return groupmaster_lproc[g]; }
96  // return the rank of the group master for a given group
97  int GetGroupMasterRank(int g) const
98  { return lproc_proc[groupmaster_lproc[g]]; }
99  // for a given group return the group number in the master
100  int GetGroupMasterGroup(int g) const { return group_mgroup[g]; }
101  // get the number of processors in a group
102  int GetGroupSize(int g) const { return group_lproc.RowSize(g); }
103  // return a pointer to a list of neighbors for a given group.
104  // neighbor 0 is the local processor
105  const int *GetGroup(int g) const { return group_lproc.GetRow(g); }
106 
107  /// Save the data in a stream.
108  void Save(std::ostream &out) const;
109  /// Load the data from a stream.
110  void Load(std::istream &in);
111 
112  virtual ~GroupTopology() {}
113 };
114 
115 /** @brief Communicator performing operations within groups defined by a
116  GroupTopology with arbitrary-size data associated with each group. */
118 {
119 public:
120  /// Communication mode.
121  enum Mode
122  {
123  byGroup, ///< Communications are performed one group at a time.
124  byNeighbor /**< Communications are performed one neighbor at a time,
125  aggregating over groups. */
126  };
127 
128 private:
129  GroupTopology &gtopo;
130  Mode mode;
131  Table group_ldof;
132  Table group_ltdof; // only for groups for which this processor is master.
133  int group_buf_size;
134  Array<char> group_buf;
135  MPI_Request *requests;
136  // MPI_Status *statuses;
137  int comm_lock; // 0 - no lock, 1 - locked for Bcast, 2 - locked for Reduce
138  int num_requests;
139  int *request_marker;
140  int *buf_offsets; // size = max(number of groups, number of neighbors)
141  Table nbr_send_groups, nbr_recv_groups; // nbr 0 = me
142 
143 public:
144  /// Construct a GroupCommunicator object.
145  /** The object must be initialized before it can be used to perform any
146  operations. To initialize the object, either
147  - call Create() or
148  - initialize the Table reference returned by GroupLDofTable() and then
149  call Finalize().
150  */
152 
153  /** @brief Initialize the communicator from a local-dof to group map.
154  Finalize() is called internally. */
155  void Create(Array<int> &ldof_group);
156 
157  /** @brief Fill-in the returned Table reference to initialize the
158  GroupCommunicator then call Finalize(). */
159  Table &GroupLDofTable() { return group_ldof; }
160 
161  /// Allocate internal buffers after the GroupLDofTable is defined
162  void Finalize();
163 
164  /// Initialize the internal group_ltdof Table.
165  /** This method must be called before performing operations that use local
166  data layout 2, see CopyGroupToBuffer() for layout descriptions. */
167  void SetLTDofTable(Array<int> &ldof_ltdof);
168 
169  /// Get a reference to the associated GroupTopology object
170  GroupTopology &GetGroupTopology() { return gtopo; }
171 
172  /** @brief Data structure on which we define reduce operations.
173 
174  The data is associated with (and the operation is performed on) one group
175  at a time. */
176  template <class T> struct OpData
177  {
178  int nldofs, nb, *ldofs;
179  T *ldata, *buf;
180  };
181 
182  /** @brief Copy the entries corresponding to the group @a group from the
183  local array @a ldata to the buffer @a buf. */
184  /** The @a layout of the local array can be:
185  - 0 - @a ldata is an array on all ldofs: copied indices:
186  `{ J[j] : I[group] <= j < I[group+1] }` where `I,J=group_ldof.{I,J}`
187  - 1 - @a ldata is an array on the shared ldofs: copied indices:
188  `{ j : I[group] <= j < I[group+1] }` where `I,J=group_ldof.{I,J}`
189  - 2 - @a ldata is an array on the true ldofs, ltdofs: copied indices:
190  `{ J[j] : I[group] <= j < I[group+1] }` where `I,J=group_ltdof.{I,J}`.
191  @returns The pointer @a buf plus the number of elements in the group. */
192  template <class T>
193  T *CopyGroupToBuffer(const T *ldata, T *buf, int group, int layout) const;
194 
195  /** @brief Copy the entries corresponding to the group @a group from the
196  buffer @a buf to the local array @a ldata. */
197  /** For a description of @a layout, see CopyGroupToBuffer().
198  @returns The pointer @a buf plus the number of elements in the group. */
199  template <class T>
200  const T *CopyGroupFromBuffer(const T *buf, T *ldata, int group,
201  int layout) const;
202 
203  /** @brief Perform the reduction operation @a Op to the entries of group
204  @a group using the values from the buffer @a buf and the values from the
205  local array @a ldata, saving the result in the latter. */
206  /** For a description of @a layout, see CopyGroupToBuffer().
207  @returns The pointer @a buf plus the number of elements in the group. */
208  template <class T>
209  const T *ReduceGroupFromBuffer(const T *buf, T *ldata, int group,
210  int layout, void (*Op)(OpData<T>)) const;
211 
212  /// Begin a broadcast within each group where the master is the root.
213  /** For a description of @a layout, see CopyGroupToBuffer(). */
214  template <class T> void BcastBegin(T *ldata, int layout);
215 
216  /** @brief Finalize a broadcast started with BcastBegin().
217 
218  The output data @a layout can be:
219  - 0 - @a ldata is an array on all ldofs; the input layout should be
220  either 0 or 2
221  - 1 - @a ldata is the same array as given to BcastBegin(); the input
222  layout should be 1.
223 
224  For more details about @a layout, see CopyGroupToBuffer(). */
225  template <class T> void BcastEnd(T *ldata, int layout);
226 
227  /** @brief Broadcast within each group where the master is the root.
228 
229  The data @a layout can be either 0 or 1.
230 
231  For a description of @a layout, see CopyGroupToBuffer(). */
232  template <class T> void Bcast(T *ldata, int layout)
233  {
234  BcastBegin(ldata, layout);
235  BcastEnd(ldata, layout);
236  }
237 
238  /// Broadcast within each group where the master is the root.
239  template <class T> void Bcast(T *ldata) { Bcast<T>(ldata, 0); }
240  /// Broadcast within each group where the master is the root.
241  template <class T> void Bcast(Array<T> &ldata) { Bcast<T>((T *)ldata); }
242 
243  /** @brief Begin reduction operation within each group where the master is
244  the root. */
245  /** The input data layout is an array on all ldofs, i.e. layout 0, see
246  CopyGroupToBuffer().
247 
248  The reduce operation will be specified when calling ReduceEnd(). This
249  method is instantiated for int and double. */
250  template <class T> void ReduceBegin(const T *ldata);
251 
252  /** @brief Finalize reduction operation started with ReduceBegin().
253 
254  The output data @a layout can be either 0 or 2, see CopyGroupToBuffer().
255 
256  The reduce operation is given by the third argument (see below for list
257  of the supported operations.) This method is instantiated for int and
258  double.
259 
260  @note If the output data layout is 2, then the data from the @a ldata
261  array passed to this call is used in the reduction operation, instead of
262  the data from the @a ldata array passed to ReduceBegin(). Therefore, the
263  data for master-groups has to be identical in both arrays.
264  */
265  template <class T> void ReduceEnd(T *ldata, int layout,
266  void (*Op)(OpData<T>));
267 
268  /** @brief Reduce within each group where the master is the root.
269 
270  The reduce operation is given by the second argument (see below for list
271  of the supported operations.) */
272  template <class T> void Reduce(T *ldata, void (*Op)(OpData<T>))
273  {
274  ReduceBegin(ldata);
275  ReduceEnd(ldata, 0, Op);
276  }
277 
278  /// Reduce within each group where the master is the root.
279  template <class T> void Reduce(Array<T> &ldata, void (*Op)(OpData<T>))
280  { Reduce<T>((T *)ldata, Op); }
281 
282  /// Reduce operation Sum, instantiated for int and double
283  template <class T> static void Sum(OpData<T>);
284  /// Reduce operation Min, instantiated for int and double
285  template <class T> static void Min(OpData<T>);
286  /// Reduce operation Max, instantiated for int and double
287  template <class T> static void Max(OpData<T>);
288  /// Reduce operation bitwise OR, instantiated for int only
289  template <class T> static void BitOR(OpData<T>);
290 
291  /// Print information about the GroupCommunicator from all MPI ranks.
292  void PrintInfo(std::ostream &out = mfem::out) const;
293 
294  /** @brief Destroy a GroupCommunicator object, deallocating internal data
295  structures and buffers. */
297 };
298 
299 
300 /// \brief Variable-length MPI message containing unspecific binary data.
301 template<int Tag>
303 {
304  std::string data;
305  MPI_Request send_request;
306 
307  /// Non-blocking send to processor 'rank'.
308  void Isend(int rank, MPI_Comm comm)
309  {
310  Encode();
311  MPI_Isend((void*) data.data(), data.length(), MPI_BYTE, rank, Tag, comm,
312  &send_request);
313  }
314 
315  /// Helper to send all messages in a rank-to-message map container.
316  template<typename MapT>
317  static void IsendAll(MapT& rank_msg, MPI_Comm comm)
318  {
319  typename MapT::iterator it;
320  for (it = rank_msg.begin(); it != rank_msg.end(); ++it)
321  {
322  it->second.Isend(it->first, comm);
323  }
324  }
325 
326  /// Helper to wait for all messages in a map container to be sent.
327  template<typename MapT>
328  static void WaitAllSent(MapT& rank_msg)
329  {
330  typename MapT::iterator it;
331  for (it = rank_msg.begin(); it != rank_msg.end(); ++it)
332  {
333  MPI_Wait(&it->second.send_request, MPI_STATUS_IGNORE);
334  it->second.Clear();
335  }
336  }
337 
338  /** Blocking probe for incoming message of this type from any rank.
339  Returns the rank and message size. */
340  static void Probe(int &rank, int &size, MPI_Comm comm)
341  {
342  MPI_Status status;
343  MPI_Probe(MPI_ANY_SOURCE, Tag, comm, &status);
344  rank = status.MPI_SOURCE;
345  MPI_Get_count(&status, MPI_BYTE, &size);
346  }
347 
348  /** Non-blocking probe for incoming message of this type from any rank.
349  If there is an incoming message, returns true and sets 'rank' and 'size'.
350  Otherwise returns false. */
351  static bool IProbe(int &rank, int &size, MPI_Comm comm)
352  {
353  int flag;
354  MPI_Status status;
355  MPI_Iprobe(MPI_ANY_SOURCE, Tag, comm, &flag, &status);
356  if (!flag) { return false; }
357 
358  rank = status.MPI_SOURCE;
359  MPI_Get_count(&status, MPI_BYTE, &size);
360  return true;
361  }
362 
363  /// Post-probe receive from processor 'rank' of message size 'size'.
364  void Recv(int rank, int size, MPI_Comm comm)
365  {
366  MFEM_ASSERT(size >= 0, "");
367  data.resize(size);
368  MPI_Status status;
369  MPI_Recv((void*) data.data(), size, MPI_BYTE, rank, Tag, comm, &status);
370 #ifdef MFEM_DEBUG
371  int count;
372  MPI_Get_count(&status, MPI_BYTE, &count);
373  MFEM_VERIFY(count == size, "");
374 #endif
375  Decode();
376  }
377 
378  /// Helper to receive all messages in a rank-to-message map container.
379  template<typename MapT>
380  static void RecvAll(MapT& rank_msg, MPI_Comm comm)
381  {
382  int recv_left = rank_msg.size();
383  while (recv_left > 0)
384  {
385  int rank, size;
386  Probe(rank, size, comm);
387  MFEM_ASSERT(rank_msg.find(rank) != rank_msg.end(), "Unexpected message"
388  " (tag " << Tag << ") from rank " << rank);
389  // NOTE: no guard against receiving two messages from the same rank
390  rank_msg[rank].Recv(rank, size, comm);
391  --recv_left;
392  }
393  }
394 
395  VarMessage() : send_request(MPI_REQUEST_NULL) {}
396  void Clear() { data.clear(); send_request = MPI_REQUEST_NULL; }
397 
398  virtual ~VarMessage()
399  {
400  MFEM_ASSERT(send_request == MPI_REQUEST_NULL,
401  "WaitAllSent was not called after Isend");
402  }
403 
404  VarMessage(const VarMessage &other)
405  : data(other.data), send_request(other.send_request)
406  {
407  MFEM_ASSERT(send_request == MPI_REQUEST_NULL,
408  "Cannot copy message with a pending send.");
409  }
410 
411 protected:
412  virtual void Encode() {}
413  virtual void Decode() {}
414 };
415 
416 
417 /// Helper struct to convert a C++ type to an MPI type
418 template <typename Type>
419 struct MPITypeMap { static const MPI_Datatype mpi_type; };
420 
421 
422 /** Reorder MPI ranks to follow the Z-curve within the physical machine topology
423  (provided that functions to query physical node coordinates are available).
424  Returns a new communicator with reordered ranks. */
425 MPI_Comm ReorderRanksZCurve(MPI_Comm comm);
426 
427 
428 } // namespace mfem
429 
430 #endif
431 
432 #endif
int WorldSize() const
Return MPI_COMM_WORLD&#39;s size.
int GetGroupMasterRank(int g) const
void Create(ListOfIntegerSets &groups, int mpitag)
int Size() const
Logical size of the array.
Definition: array.hpp:110
void Bcast(Array< T > &ldata)
Broadcast within each group where the master is the root.
int GetGroupMasterGroup(int g) const
void ReduceEnd(T *ldata, int layout, void(*Op)(OpData< T >))
Finalize reduction operation started with ReduceBegin().
Helper struct to convert a C++ type to an MPI type.
MPI_Comm ReorderRanksZCurve(MPI_Comm comm)
GroupTopology(MPI_Comm comm)
int GetGroupSize(int g) const
virtual void Decode()
const int * GetGroup(int g) const
void GetRow(int i, Array< int > &row) const
Return row i in array row (the Table must be finalized)
Definition: table.cpp:179
void Bcast(T *ldata)
Broadcast within each group where the master is the root.
void Reduce(Array< T > &ldata, void(*Op)(OpData< T >))
Reduce within each group where the master is the root.
const T * CopyGroupFromBuffer(const T *buf, T *ldata, int group, int layout) const
Copy the entries corresponding to the group group from the buffer buf to the local array ldata...
void Recv(int rank, int size, MPI_Comm comm)
Post-probe receive from processor &#39;rank&#39; of message size &#39;size&#39;.
bool IAmMaster(int g) const
VarMessage(const VarMessage &other)
A simple convenience class that calls MPI_Init() at construction and MPI_Finalize() at destruction...
Communicator performing operations within groups defined by a GroupTopology with arbitrary-size data ...
static void Sum(OpData< T >)
Reduce operation Sum, instantiated for int and double.
int GetNumNeighbors() const
void Finalize()
Allocate internal buffers after the GroupLDofTable is defined.
bool Root() const
Return true if WorldRank() == 0.
T * CopyGroupToBuffer(const T *ldata, T *buf, int group, int layout) const
Copy the entries corresponding to the group group from the local array ldata to the buffer buf...
MPI_Request send_request
void BcastBegin(T *ldata, int layout)
Begin a broadcast within each group where the master is the root.
static void Min(OpData< T >)
Reduce operation Min, instantiated for int and double.
static void IsendAll(MapT &rank_msg, MPI_Comm comm)
Helper to send all messages in a rank-to-message map container.
void ReduceBegin(const T *ldata)
Begin reduction operation within each group where the master is the root.
int Size() const
Returns the number of TYPE I elements.
Definition: table.hpp:89
GroupCommunicator(GroupTopology &gt, Mode m=byNeighbor)
Construct a GroupCommunicator object.
GroupTopology & GetGroupTopology()
Get a reference to the associated GroupTopology object.
int GetGroupMaster(int g) const
virtual void Encode()
void Save(std::ostream &out) const
Save the data in a stream.
int WorldRank() const
Return MPI_COMM_WORLD&#39;s rank.
const T * ReduceGroupFromBuffer(const T *buf, T *ldata, int group, int layout, void(*Op)(OpData< T >)) const
Perform the reduction operation Op to the entries of group group using the values from the buffer buf...
int GetNeighborRank(int i) const
static void Max(OpData< T >)
Reduce operation Max, instantiated for int and double.
Communications are performed one group at a time.
static void RecvAll(MapT &rank_msg, MPI_Comm comm)
Helper to receive all messages in a rank-to-message map container.
Table & GroupLDofTable()
Fill-in the returned Table reference to initialize the GroupCommunicator then call Finalize()...
void BcastEnd(T *ldata, int layout)
Finalize a broadcast started with BcastBegin().
Data structure on which we define reduce operations.
~GroupCommunicator()
Destroy a GroupCommunicator object, deallocating internal data structures and buffers.
Mode
Communication mode.
MPI_Session(int &argc, char **&argv)
void Reduce(T *ldata, void(*Op)(OpData< T >))
Reduce within each group where the master is the root.
void Isend(int rank, MPI_Comm comm)
Non-blocking send to processor &#39;rank&#39;.
int NGroups() const
void Create(Array< int > &ldof_group)
Initialize the communicator from a local-dof to group map. Finalize() is called internally.
void SetComm(MPI_Comm comm)
static void WaitAllSent(MapT &rank_msg)
Helper to wait for all messages in a map container to be sent.
void SetLTDofTable(Array< int > &ldof_ltdof)
Initialize the internal group_ltdof Table.
int RowSize(int i) const
Definition: table.hpp:105
static bool IProbe(int &rank, int &size, MPI_Comm comm)
List of integer sets.
Definition: sets.hpp:54
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:64
static const MPI_Datatype mpi_type
void PrintInfo(std::ostream &out=mfem::out) const
Print information about the GroupCommunicator from all MPI ranks.
Variable-length MPI message containing unspecific binary data.
void Load(std::istream &in)
Load the data from a stream.
static void Probe(int &rank, int &size, MPI_Comm comm)
void Bcast(T *ldata, int layout)
Broadcast within each group where the master is the root.
static void BitOR(OpData< T >)
Reduce operation bitwise OR, instantiated for int only.