MFEM  v3.3.2
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
communication.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-443211. All Rights
3 // reserved. See file COPYRIGHT for details.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability see http://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the GNU Lesser General Public License (as published by the Free
10 // Software Foundation) version 2.1 dated February 1999.
11 
12 #include "../config/config.hpp"
13 
14 #ifdef MFEM_USE_MPI
15 
16 #include <mpi.h>
17 #ifdef __bgq__
18 #include <mpix.h>
19 #endif
20 
21 #include "array.hpp"
22 #include "table.hpp"
23 #include "sets.hpp"
24 #include "communication.hpp"
25 #include "text.hpp"
26 #include "sort_pairs.hpp"
27 #include "globals.hpp"
28 
29 #include <iostream>
30 #include <map>
31 
32 using namespace std;
33 
34 namespace mfem
35 {
36 
37 void MPI_Session::GetRankAndSize()
38 {
39  MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
40  MPI_Comm_size(MPI_COMM_WORLD, &world_size);
41 }
42 
43 
44 GroupTopology::GroupTopology(const GroupTopology &gt)
45  : MyComm(gt.MyComm),
46  group_lproc(gt.group_lproc)
47 {
48  gt.groupmaster_lproc.Copy(groupmaster_lproc);
49  gt.lproc_proc.Copy(lproc_proc);
50  gt.group_mgroup.Copy(group_mgroup);
51 }
52 
53 void GroupTopology::ProcToLProc()
54 {
55  int NRanks;
56  MPI_Comm_size(MyComm, &NRanks);
57 
58  map<int, int> proc_lproc;
59 
60  int lproc_counter = 0;
61  for (int i = 0; i < group_lproc.Size_of_connections(); i++)
62  {
63  const pair<const int, int> p(group_lproc.GetJ()[i], lproc_counter);
64  if (proc_lproc.insert(p).second)
65  {
66  lproc_counter++;
67  }
68  }
69  // Note: group_lproc.GetJ()[0] == MyRank --> proc_lproc[MyRank] == 0
70 
71  lproc_proc.SetSize(lproc_counter);
72  for (map<int, int>::iterator it = proc_lproc.begin();
73  it != proc_lproc.end(); ++it)
74  {
75  lproc_proc[it->second] = it->first;
76  }
77 
78  for (int i = 0; i < group_lproc.Size_of_connections(); i++)
79  {
80  group_lproc.GetJ()[i] = proc_lproc[group_lproc.GetJ()[i]];
81  }
82 
83  for (int i = 0; i < NGroups(); i++)
84  {
85  groupmaster_lproc[i] = proc_lproc[groupmaster_lproc[i]];
86  }
87 }
88 
89 void GroupTopology::Create(ListOfIntegerSets &groups, int mpitag)
90 {
91  groups.AsTable(group_lproc); // group_lproc = group_proc
92 
93  Table group_mgroupandproc;
94  group_mgroupandproc.SetDims(NGroups(),
95  group_lproc.Size_of_connections() + NGroups());
96  for (int i = 0; i < NGroups(); i++)
97  {
98  int j = group_mgroupandproc.GetI()[i];
99  group_mgroupandproc.GetI()[i+1] = j + group_lproc.RowSize(i) + 1;
100  group_mgroupandproc.GetJ()[j] = i;
101  j++;
102  for (int k = group_lproc.GetI()[i];
103  j < group_mgroupandproc.GetI()[i+1]; j++, k++)
104  {
105  group_mgroupandproc.GetJ()[j] = group_lproc.GetJ()[k];
106  }
107  }
108 
109  // build groupmaster_lproc with lproc = proc
110  groupmaster_lproc.SetSize(NGroups());
111 
112  // simplest choice of the group owner
113  for (int i = 0; i < NGroups(); i++)
114  {
115  groupmaster_lproc[i] = groups.PickElementInSet(i);
116  }
117 
118  // load-balanced choice of the group owner, which however can lead to
119  // isolated dofs
120  // for (i = 0; i < NGroups(); i++)
121  // groupmaster_lproc[i] = groups.PickRandomElementInSet(i);
122 
123  ProcToLProc();
124 
125  // build group_mgroup
126  group_mgroup.SetSize(NGroups());
127  group_mgroup[0] = 0; // the local group
128 
129  int send_counter = 0;
130  int recv_counter = 0;
131  for (int i = 1; i < NGroups(); i++)
132  if (groupmaster_lproc[i] != 0) // we are not the master
133  {
134  recv_counter++;
135  }
136  else
137  {
138  send_counter += group_lproc.RowSize(i)-1;
139  }
140 
141  MPI_Request *requests = new MPI_Request[send_counter];
142  MPI_Status *statuses = new MPI_Status[send_counter];
143 
144  int max_recv_size = 0;
145  send_counter = 0;
146  for (int i = 1; i < NGroups(); i++)
147  {
148  if (groupmaster_lproc[i] == 0) // we are the master
149  {
150  group_mgroup[i] = i;
151 
152  for (int j = group_lproc.GetI()[i];
153  j < group_lproc.GetI()[i+1]; j++)
154  {
155  if (group_lproc.GetJ()[j] != 0)
156  {
157  MPI_Isend(group_mgroupandproc.GetRow(i),
158  group_mgroupandproc.RowSize(i),
159  MPI_INT,
160  lproc_proc[group_lproc.GetJ()[j]],
161  mpitag,
162  MyComm,
163  &requests[send_counter]);
164  send_counter++;
165  }
166  }
167  }
168  else // we are not the master
169  if (max_recv_size < group_lproc.RowSize(i))
170  {
171  max_recv_size = group_lproc.RowSize(i);
172  }
173  }
174  max_recv_size++;
175 
176  IntegerSet group;
177  if (recv_counter > 0)
178  {
179  int count;
180  MPI_Status status;
181  int *recv_buf = new int[max_recv_size];
182  for ( ; recv_counter > 0; recv_counter--)
183  {
184  MPI_Recv(recv_buf, max_recv_size, MPI_INT,
185  MPI_ANY_SOURCE, mpitag, MyComm, &status);
186 
187  MPI_Get_count(&status, MPI_INT, &count);
188 
189  group.Recreate(count-1, recv_buf+1);
190  int g = groups.Lookup(group);
191  group_mgroup[g] = recv_buf[0];
192 
193  if (lproc_proc[groupmaster_lproc[g]] != status.MPI_SOURCE)
194  {
195  mfem::err << "\n\n\nGroupTopology::GroupTopology: "
196  << MyRank() << ": ERROR\n\n\n" << endl;
197  mfem_error();
198  }
199  }
200  delete [] recv_buf;
201  }
202 
203  MPI_Waitall(send_counter, requests, statuses);
204 
205  delete [] statuses;
206  delete [] requests;
207 }
208 
209 void GroupTopology::Save(ostream &out) const
210 {
211  out << "\ncommunication_groups\n";
212  out << "number_of_groups " << NGroups() << "\n\n";
213 
214  out << "# number of entities in each group, followed by group ids in group\n";
215  for (int group_id = 0; group_id < NGroups(); ++group_id)
216  {
217  int group_size = GetGroupSize(group_id);
218  const int * group_ptr = GetGroup(group_id);
219  out << group_size;
220  for ( int group_member_index = 0; group_member_index < group_size;
221  ++group_member_index)
222  {
223  out << " " << GetNeighborRank( group_ptr[group_member_index] );
224  }
225  out << "\n";
226  }
227 
228  // For future use, optional ownership strategy.
229  // out << "# ownership";
230 }
231 
232 void GroupTopology::Load(istream &in)
233 {
234  // Load in group topology and create list of integer sets. Use constructor
235  // that uses list of integer sets.
236  std::string ident;
237 
238  // Read in number of groups
239  int number_of_groups = -1;
240  in >> ident;
241  MFEM_VERIFY(ident == "number_of_groups",
242  "GroupTopology::Load - expected 'number_of_groups' entry.");
243  in >> number_of_groups;
244 
245  // Skip number of entries in each group comment.
246  skip_comment_lines(in, '#');
247 
248  ListOfIntegerSets integer_sets;
249  for (int group_id = 0; group_id < number_of_groups; ++group_id)
250  {
251  IntegerSet integer_set;
252  Array<int>& array = integer_set;
253  int group_size;
254  in >> group_size;
255  array.Reserve(group_size);
256  for ( int index = 0; index < group_size; ++index )
257  {
258  int value;
259  in >> value;
260  array.Append(value);
261  }
262  integer_sets.Insert(integer_set);
263  }
264 
265  Create(integer_sets, 823);
266 }
267 
268 
269 // specializations of the MPITypeMap static member
270 template<> const MPI_Datatype MPITypeMap<int>::mpi_type = MPI_INT;
271 template<> const MPI_Datatype MPITypeMap<double>::mpi_type = MPI_DOUBLE;
272 
273 
275  : gtopo(gt), mode(m)
276 {
277  group_buf_size = 0;
278  requests = NULL;
279  // statuses = NULL;
280  comm_lock = 0;
281  num_requests = 0;
282  request_marker = NULL;
283  buf_offsets = NULL;
284 }
285 
287 {
288  group_ldof.MakeI(gtopo.NGroups());
289  for (int i = 0; i < ldof_group.Size(); i++)
290  {
291  int group = ldof_group[i];
292  if (group != 0)
293  {
294  group_ldof.AddAColumnInRow(group);
295  }
296  }
297  group_ldof.MakeJ();
298 
299  for (int i = 0; i < ldof_group.Size(); i++)
300  {
301  int group = ldof_group[i];
302  if (group != 0)
303  {
304  group_ldof.AddConnection(group, i);
305  }
306  }
307  group_ldof.ShiftUpI();
308 
309  Finalize();
310 }
311 
313 {
314  int request_counter = 0;
315 
316  // size buf_offsets = max(number of groups, number of neighbors)
317  buf_offsets = new int[max(group_ldof.Size(), gtopo.GetNumNeighbors())];
318  buf_offsets[0] = 0;
319  for (int gr = 1; gr < group_ldof.Size(); gr++)
320  {
321  if (group_ldof.RowSize(gr) != 0)
322  {
323  int gr_requests;
324  if (!gtopo.IAmMaster(gr)) // we are not the master
325  {
326  gr_requests = 1;
327  }
328  else
329  {
330  gr_requests = gtopo.GetGroupSize(gr)-1;
331  }
332 
333  request_counter += gr_requests;
334  group_buf_size += gr_requests * group_ldof.RowSize(gr);
335  }
336  }
337 
338  requests = new MPI_Request[request_counter];
339  // statuses = new MPI_Status[request_counter];
340  request_marker = new int[request_counter];
341 
342  // Construct nbr_send_groups and nbr_recv_groups: (nbr 0 = me)
343  nbr_send_groups.MakeI(gtopo.GetNumNeighbors());
344  nbr_recv_groups.MakeI(gtopo.GetNumNeighbors());
345  for (int gr = 1; gr < group_ldof.Size(); gr++)
346  {
347  const int nldofs = group_ldof.RowSize(gr);
348  if (nldofs == 0) { continue; }
349 
350  if (!gtopo.IAmMaster(gr)) // we are not the master
351  {
352  nbr_recv_groups.AddAColumnInRow(gtopo.GetGroupMaster(gr));
353  }
354  else // we are the master
355  {
356  const int grp_size = gtopo.GetGroupSize(gr);
357  const int *grp_nbr_list = gtopo.GetGroup(gr);
358  for (int i = 0; i < grp_size; i++)
359  {
360  if (grp_nbr_list[i] != 0)
361  {
362  nbr_send_groups.AddAColumnInRow(grp_nbr_list[i]);
363  }
364  }
365  }
366  }
367  nbr_send_groups.MakeJ();
368  nbr_recv_groups.MakeJ();
369  for (int gr = 1; gr < group_ldof.Size(); gr++)
370  {
371  const int nldofs = group_ldof.RowSize(gr);
372  if (nldofs == 0) { continue; }
373 
374  if (!gtopo.IAmMaster(gr)) // we are not the master
375  {
376  nbr_recv_groups.AddConnection(gtopo.GetGroupMaster(gr), gr);
377  }
378  else // we are the master
379  {
380  const int grp_size = gtopo.GetGroupSize(gr);
381  const int *grp_nbr_list = gtopo.GetGroup(gr);
382  for (int i = 0; i < grp_size; i++)
383  {
384  if (grp_nbr_list[i] != 0)
385  {
386  nbr_send_groups.AddConnection(grp_nbr_list[i], gr);
387  }
388  }
389  }
390  }
391  nbr_send_groups.ShiftUpI();
392  nbr_recv_groups.ShiftUpI();
393  // The above construction creates the Tables with the column indices
394  // sorted, i.e. the group lists are sorted. To coordinate this order between
395  // processors, we will sort the group lists in the nbr_recv_groups Table
396  // according to their indices in the master. This does not require any
397  // communication because we have access to the group indices in the master
398  // by calling: master_group_id = gtopo.GetGroupMasterGroup(my_group_id).
399  Array<Pair<int,int> > group_ids;
400  for (int nbr = 1; nbr < nbr_recv_groups.Size(); nbr++)
401  {
402  const int num_recv_groups = nbr_recv_groups.RowSize(nbr);
403  if (num_recv_groups > 0)
404  {
405  int *grp_list = nbr_recv_groups.GetRow(nbr);
406  group_ids.SetSize(num_recv_groups);
407  for (int i = 0; i < num_recv_groups; i++)
408  {
409  group_ids[i].one = gtopo.GetGroupMasterGroup(grp_list[i]);
410  group_ids[i].two = grp_list[i]; // my_group_id
411  }
412  group_ids.Sort();
413  for (int i = 0; i < num_recv_groups; i++)
414  {
415  grp_list[i] = group_ids[i].two;
416  }
417  }
418  }
419 }
420 
422 {
423  if (group_ltdof.Size() == group_ldof.Size()) { return; }
424 
425  group_ltdof.MakeI(group_ldof.Size());
426  for (int gr = 1; gr < group_ldof.Size(); gr++)
427  {
428  if (gtopo.IAmMaster(gr))
429  {
430  group_ltdof.AddColumnsInRow(gr, group_ldof.RowSize(gr));
431  }
432  }
433  group_ltdof.MakeJ();
434  for (int gr = 1; gr < group_ldof.Size(); gr++)
435  {
436  if (gtopo.IAmMaster(gr))
437  {
438  const int *ldofs = group_ldof.GetRow(gr);
439  const int nldofs = group_ldof.RowSize(gr);
440  for (int i = 0; i < nldofs; i++)
441  {
442  group_ltdof.AddConnection(gr, ldof_ltdof[ldofs[i]]);
443  }
444  }
445  }
446  group_ltdof.ShiftUpI();
447 }
448 
449 template <class T>
450 T *GroupCommunicator::CopyGroupToBuffer(const T *ldata, T *buf, int group,
451  int layout) const
452 {
453  switch (layout)
454  {
455  case 1:
456  {
457  return std::copy(ldata + group_ldof.GetI()[group],
458  ldata + group_ldof.GetI()[group+1],
459  buf);
460  }
461  case 2:
462  {
463  const int nltdofs = group_ltdof.RowSize(group);
464  const int *ltdofs = group_ltdof.GetRow(group);
465  for (int j = 0; j < nltdofs; j++)
466  {
467  buf[j] = ldata[ltdofs[j]];
468  }
469  return buf + nltdofs;
470  }
471  default:
472  {
473  const int nldofs = group_ldof.RowSize(group);
474  const int *ldofs = group_ldof.GetRow(group);
475  for (int j = 0; j < nldofs; j++)
476  {
477  buf[j] = ldata[ldofs[j]];
478  }
479  return buf + nldofs;
480  }
481  }
482 }
483 
484 template <class T>
485 const T *GroupCommunicator::CopyGroupFromBuffer(const T *buf, T *ldata,
486  int group, int layout) const
487 {
488  const int nldofs = group_ldof.RowSize(group);
489  switch (layout)
490  {
491  case 1:
492  {
493  std::copy(buf, buf + nldofs, ldata + group_ldof.GetI()[group]);
494  break;
495  }
496  case 2:
497  {
498  const int *ltdofs = group_ltdof.GetRow(group);
499  for (int j = 0; j < nldofs; j++)
500  {
501  ldata[ltdofs[j]] = buf[j];
502  }
503  break;
504  }
505  default:
506  {
507  const int *ldofs = group_ldof.GetRow(group);
508  for (int j = 0; j < nldofs; j++)
509  {
510  ldata[ldofs[j]] = buf[j];
511  }
512  break;
513  }
514  }
515  return buf + nldofs;
516 }
517 
518 template <class T>
519 const T *GroupCommunicator::ReduceGroupFromBuffer(const T *buf, T *ldata,
520  int group, int layout,
521  void (*Op)(OpData<T>)) const
522 {
523  OpData<T> opd;
524  opd.ldata = ldata;
525  opd.nldofs = group_ldof.RowSize(group);
526  opd.nb = 1;
527  opd.buf = const_cast<T*>(buf);
528 
529  switch (layout)
530  {
531  case 1:
532  {
533  MFEM_ABORT("layout 1 is not supported");
534  T *dest = ldata + group_ldof.GetI()[group];
535  for (int j = 0; j < opd.nldofs; j++)
536  {
537  dest[j] += buf[j];
538  }
539  break;
540  }
541  case 2:
542  {
543  opd.ldofs = const_cast<int*>(group_ltdof.GetRow(group));
544  Op(opd);
545  break;
546  }
547  default:
548  {
549  opd.ldofs = const_cast<int*>(group_ldof.GetRow(group));
550  Op(opd);
551  break;
552  }
553  }
554  return buf + opd.nldofs;
555 }
556 
557 template <class T>
558 void GroupCommunicator::BcastBegin(T *ldata, int layout)
559 {
560  MFEM_VERIFY(comm_lock == 0, "object is already in use");
561 
562  if (group_buf_size == 0) { return; }
563 
564  int request_counter = 0;
565  switch (mode)
566  {
567  case byGroup: // ***** Communication by groups *****
568  {
569  T *buf;
570  if (layout != 1)
571  {
572  group_buf.SetSize(group_buf_size*sizeof(T));
573  buf = (T *)group_buf.GetData();
574  MFEM_VERIFY(layout != 2 || group_ltdof.Size() == group_ldof.Size(),
575  "'group_ltdof' is not set, use SetLTDofTable()");
576  }
577  else
578  {
579  buf = ldata;
580  }
581 
582  for (int gr = 1; gr < group_ldof.Size(); gr++)
583  {
584  const int nldofs = group_ldof.RowSize(gr);
585 
586  // ignore groups without dofs
587  if (nldofs == 0) { continue; }
588 
589  if (!gtopo.IAmMaster(gr)) // we are not the master
590  {
591  MPI_Irecv(buf,
592  nldofs,
594  gtopo.GetGroupMasterRank(gr),
595  40822 + gtopo.GetGroupMasterGroup(gr),
596  gtopo.GetComm(),
597  &requests[request_counter]);
598  request_marker[request_counter] = gr;
599  request_counter++;
600  }
601  else // we are the master
602  {
603  if (layout != 1)
604  {
605  CopyGroupToBuffer(ldata, buf, gr, layout);
606  }
607  const int gs = gtopo.GetGroupSize(gr);
608  const int *nbs = gtopo.GetGroup(gr);
609  for (int i = 0; i < gs; i++)
610  {
611  if (nbs[i] != 0)
612  {
613  MPI_Isend(buf,
614  nldofs,
616  gtopo.GetNeighborRank(nbs[i]),
617  40822 + gtopo.GetGroupMasterGroup(gr),
618  gtopo.GetComm(),
619  &requests[request_counter]);
620  request_marker[request_counter] = -1; // mark as send req.
621  request_counter++;
622  }
623  }
624  }
625  buf += nldofs;
626  }
627  break;
628  }
629 
630  case byNeighbor: // ***** Communication by neighbors *****
631  {
632  group_buf.SetSize(group_buf_size*sizeof(T));
633  T *buf = (T *)group_buf.GetData();
634  for (int nbr = 1; nbr < nbr_send_groups.Size(); nbr++)
635  {
636  const int num_send_groups = nbr_send_groups.RowSize(nbr);
637  if (num_send_groups > 0)
638  {
639  // Possible optimization:
640  // if (num_send_groups == 1) and (layout == 1) then we do not
641  // need to copy the data in order to send it.
642  T *buf_start = buf;
643  const int *grp_list = nbr_send_groups.GetRow(nbr);
644  for (int i = 0; i < num_send_groups; i++)
645  {
646  buf = CopyGroupToBuffer(ldata, buf, grp_list[i], layout);
647  }
648  MPI_Isend(buf_start,
649  buf - buf_start,
651  gtopo.GetNeighborRank(nbr),
652  40822,
653  gtopo.GetComm(),
654  &requests[request_counter]);
655  request_marker[request_counter] = -1; // mark as send request
656  request_counter++;
657  }
658 
659  const int num_recv_groups = nbr_recv_groups.RowSize(nbr);
660  if (num_recv_groups > 0)
661  {
662  // Possible optimization (requires interface change):
663  // if (num_recv_groups == 1) and the (output layout == 1) then
664  // we can receive directly in the output buffer; however, at
665  // this point we do not have that information.
666  const int *grp_list = nbr_recv_groups.GetRow(nbr);
667  int recv_size = 0;
668  for (int i = 0; i < num_recv_groups; i++)
669  {
670  recv_size += group_ldof.RowSize(grp_list[i]);
671  }
672  MPI_Irecv(buf,
673  recv_size,
675  gtopo.GetNeighborRank(nbr),
676  40822,
677  gtopo.GetComm(),
678  &requests[request_counter]);
679  request_marker[request_counter] = nbr;
680  request_counter++;
681  buf_offsets[nbr] = buf - (T*)group_buf.GetData();
682  buf += recv_size;
683  }
684  }
685  MFEM_ASSERT(buf - (T*)group_buf.GetData() == group_buf_size, "");
686  break;
687  }
688  }
689 
690  comm_lock = 1; // 1 - locked fot Bcast
691  num_requests = request_counter;
692 }
693 
694 template <class T>
695 void GroupCommunicator::BcastEnd(T *ldata, int layout)
696 {
697  if (comm_lock == 0) { return; }
698  // The above also handles the case (group_buf_size == 0).
699  MFEM_VERIFY(comm_lock == 1, "object is NOT locked for Bcast");
700 
701  switch (mode)
702  {
703  case byGroup: // ***** Communication by groups *****
704  {
705  if (layout == 1)
706  {
707  MPI_Waitall(num_requests, requests, MPI_STATUSES_IGNORE);
708  }
709  else if (layout == 0)
710  {
711  // copy the received data from the buffer to ldata, as it arrives
712  int idx;
713  while (MPI_Waitany(num_requests, requests, &idx, MPI_STATUS_IGNORE),
714  idx != MPI_UNDEFINED)
715  {
716  int gr = request_marker[idx];
717  if (gr == -1) { continue; } // skip send requests
718 
719  // groups without dofs are skipped, so here nldofs > 0.
720  T *buf = (T *)group_buf.GetData() + group_ldof.GetI()[gr];
721  CopyGroupFromBuffer(buf, ldata, gr, layout);
722  }
723  }
724  break;
725  }
726 
727  case byNeighbor: // ***** Communication by neighbors *****
728  {
729  // copy the received data from the buffer to ldata, as it arrives
730  int idx;
731  while (MPI_Waitany(num_requests, requests, &idx, MPI_STATUS_IGNORE),
732  idx != MPI_UNDEFINED)
733  {
734  int nbr = request_marker[idx];
735  if (nbr == -1) { continue; } // skip send requests
736 
737  const int num_recv_groups = nbr_recv_groups.RowSize(nbr);
738  if (num_recv_groups > 0)
739  {
740  const int *grp_list = nbr_recv_groups.GetRow(nbr);
741  const T *buf = (T*)group_buf.GetData() + buf_offsets[nbr];
742  for (int i = 0; i < num_recv_groups; i++)
743  {
744  buf = CopyGroupFromBuffer(buf, ldata, grp_list[i], layout);
745  }
746  }
747  }
748  break;
749  }
750  }
751 
752  comm_lock = 0; // 0 - no lock
753  num_requests = 0;
754 }
755 
756 template <class T>
757 void GroupCommunicator::ReduceBegin(const T *ldata)
758 {
759  MFEM_VERIFY(comm_lock == 0, "object is already in use");
760 
761  if (group_buf_size == 0) { return; }
762 
763  int request_counter = 0;
764  group_buf.SetSize(group_buf_size*sizeof(T));
765  T *buf = (T *)group_buf.GetData();
766  switch (mode)
767  {
768  case byGroup: // ***** Communication by groups *****
769  {
770  for (int gr = 1; gr < group_ldof.Size(); gr++)
771  {
772  const int nldofs = group_ldof.RowSize(gr);
773  // ignore groups without dofs
774  if (nldofs == 0) { continue; }
775 
776  if (!gtopo.IAmMaster(gr)) // we are not the master
777  {
778  const int layout = 0;
779  CopyGroupToBuffer(ldata, buf, gr, layout);
780  MPI_Isend(buf,
781  nldofs,
783  gtopo.GetGroupMasterRank(gr),
784  43822 + gtopo.GetGroupMasterGroup(gr),
785  gtopo.GetComm(),
786  &requests[request_counter]);
787  request_marker[request_counter] = -1; // mark as send request
788  request_counter++;
789  buf += nldofs;
790  }
791  else // we are the master
792  {
793  const int gs = gtopo.GetGroupSize(gr);
794  const int *nbs = gtopo.GetGroup(gr);
795  buf_offsets[gr] = buf - (T *)group_buf.GetData();
796  for (int i = 0; i < gs; i++)
797  {
798  if (nbs[i] != 0)
799  {
800  MPI_Irecv(buf,
801  nldofs,
803  gtopo.GetNeighborRank(nbs[i]),
804  43822 + gtopo.GetGroupMasterGroup(gr),
805  gtopo.GetComm(),
806  &requests[request_counter]);
807  request_marker[request_counter] = gr;
808  request_counter++;
809  buf += nldofs;
810  }
811  }
812  }
813  }
814  break;
815  }
816 
817  case byNeighbor: // ***** Communication by neighbors *****
818  {
819  for (int nbr = 1; nbr < nbr_send_groups.Size(); nbr++)
820  {
821  // In Reduce operation: send_groups <--> recv_groups
822  const int num_send_groups = nbr_recv_groups.RowSize(nbr);
823  if (num_send_groups > 0)
824  {
825  T *buf_start = buf;
826  const int *grp_list = nbr_recv_groups.GetRow(nbr);
827  for (int i = 0; i < num_send_groups; i++)
828  {
829  const int layout = 0; // ldata is an array on all ldofs
830  buf = CopyGroupToBuffer(ldata, buf, grp_list[i], layout);
831  }
832  MPI_Isend(buf_start,
833  buf - buf_start,
835  gtopo.GetNeighborRank(nbr),
836  43822,
837  gtopo.GetComm(),
838  &requests[request_counter]);
839  request_marker[request_counter] = -1; // mark as send request
840  request_counter++;
841  }
842 
843  // In Reduce operation: send_groups <--> recv_groups
844  const int num_recv_groups = nbr_send_groups.RowSize(nbr);
845  if (num_recv_groups > 0)
846  {
847  const int *grp_list = nbr_send_groups.GetRow(nbr);
848  int recv_size = 0;
849  for (int i = 0; i < num_recv_groups; i++)
850  {
851  recv_size += group_ldof.RowSize(grp_list[i]);
852  }
853  MPI_Irecv(buf,
854  recv_size,
856  gtopo.GetNeighborRank(nbr),
857  43822,
858  gtopo.GetComm(),
859  &requests[request_counter]);
860  request_marker[request_counter] = nbr;
861  request_counter++;
862  buf_offsets[nbr] = buf - (T*)group_buf.GetData();
863  buf += recv_size;
864  }
865  }
866  MFEM_ASSERT(buf - (T*)group_buf.GetData() == group_buf_size, "");
867  break;
868  }
869  }
870 
871  comm_lock = 2;
872  num_requests = request_counter;
873 }
874 
875 template <class T>
876 void GroupCommunicator::ReduceEnd(T *ldata, int layout, void (*Op)(OpData<T>))
877 {
878  if (comm_lock == 0) { return; }
879  // The above also handles the case (group_buf_size == 0).
880  MFEM_VERIFY(comm_lock == 2, "object is NOT locked for Reduce");
881 
882  switch (mode)
883  {
884  case byGroup: // ***** Communication by groups *****
885  {
886  OpData<T> opd;
887  opd.ldata = ldata;
888  Array<int> group_num_req(group_ldof.Size());
889  for (int gr = 1; gr < group_ldof.Size(); gr++)
890  {
891  group_num_req[gr] =
892  gtopo.IAmMaster(gr) ? gtopo.GetGroupSize(gr)-1 : 0;
893  }
894  int idx;
895  while (MPI_Waitany(num_requests, requests, &idx, MPI_STATUS_IGNORE),
896  idx != MPI_UNDEFINED)
897  {
898  int gr = request_marker[idx];
899  if (gr == -1) { continue; } // skip send requests
900 
901  // Delay the processing of a group until all receive requests, for
902  // that group, are done:
903  if ((--group_num_req[gr]) != 0) { continue; }
904 
905  opd.nldofs = group_ldof.RowSize(gr);
906  // groups without dofs are skipped, so here nldofs > 0.
907 
908  opd.buf = (T *)group_buf.GetData() + buf_offsets[gr];
909  opd.ldofs = (layout == 0) ?
910  group_ldof.GetRow(gr) : group_ltdof.GetRow(gr);
911  opd.nb = gtopo.GetGroupSize(gr)-1;
912  Op(opd);
913  }
914  break;
915  }
916 
917  case byNeighbor: // ***** Communication by neighbors *****
918  {
919  MPI_Waitall(num_requests, requests, MPI_STATUSES_IGNORE);
920 
921  for (int nbr = 1; nbr < nbr_send_groups.Size(); nbr++)
922  {
923  // In Reduce operation: send_groups <--> recv_groups
924  const int num_recv_groups = nbr_send_groups.RowSize(nbr);
925  if (num_recv_groups > 0)
926  {
927  const int *grp_list = nbr_send_groups.GetRow(nbr);
928  const T *buf = (T*)group_buf.GetData() + buf_offsets[nbr];
929  for (int i = 0; i < num_recv_groups; i++)
930  {
931  buf = ReduceGroupFromBuffer(buf, ldata, grp_list[i],
932  layout, Op);
933  }
934  }
935  }
936  break;
937  }
938  }
939 
940  comm_lock = 0; // 0 - no lock
941  num_requests = 0;
942 }
943 
944 template <class T>
946 {
947  if (opd.nb == 1)
948  {
949  for (int i = 0; i < opd.nldofs; i++)
950  {
951  opd.ldata[opd.ldofs[i]] += opd.buf[i];
952  }
953  }
954  else
955  {
956  for (int i = 0; i < opd.nldofs; i++)
957  {
958  T data = opd.ldata[opd.ldofs[i]];
959  for (int j = 0; j < opd.nb; j++)
960  {
961  data += opd.buf[j*opd.nldofs+i];
962  }
963  opd.ldata[opd.ldofs[i]] = data;
964  }
965  }
966 }
967 
968 template <class T>
970 {
971  for (int i = 0; i < opd.nldofs; i++)
972  {
973  T data = opd.ldata[opd.ldofs[i]];
974  for (int j = 0; j < opd.nb; j++)
975  {
976  T b = opd.buf[j*opd.nldofs+i];
977  if (data > b)
978  {
979  data = b;
980  }
981  }
982  opd.ldata[opd.ldofs[i]] = data;
983  }
984 }
985 
986 template <class T>
988 {
989  for (int i = 0; i < opd.nldofs; i++)
990  {
991  T data = opd.ldata[opd.ldofs[i]];
992  for (int j = 0; j < opd.nb; j++)
993  {
994  T b = opd.buf[j*opd.nldofs+i];
995  if (data < b)
996  {
997  data = b;
998  }
999  }
1000  opd.ldata[opd.ldofs[i]] = data;
1001  }
1002 }
1003 
1004 template <class T>
1006 {
1007  for (int i = 0; i < opd.nldofs; i++)
1008  {
1009  T data = opd.ldata[opd.ldofs[i]];
1010  for (int j = 0; j < opd.nb; j++)
1011  {
1012  data |= opd.buf[j*opd.nldofs+i];
1013  }
1014  opd.ldata[opd.ldofs[i]] = data;
1015  }
1016 }
1017 
1018 void GroupCommunicator::PrintInfo(std::ostream &out) const
1019 {
1020  char c = '\0';
1021  const int tag = 46800;
1022  const int myid = gtopo.MyRank();
1023 
1024  int num_sends = 0, num_recvs = 0;
1025  size_t mem_sends = 0, mem_recvs = 0;
1026  int num_master_groups = 0, num_empty_groups = 0;
1027  int num_active_neighbors = 0; // for mode == byNeighbor
1028  switch (mode)
1029  {
1030  case byGroup:
1031  for (int gr = 1; gr < group_ldof.Size(); gr++)
1032  {
1033  const int nldofs = group_ldof.RowSize(gr);
1034  if (nldofs == 0)
1035  {
1036  num_empty_groups++;
1037  continue;
1038  }
1039  if (gtopo.IAmMaster(gr))
1040  {
1041  num_sends += (gtopo.GetGroupSize(gr)-1);
1042  mem_sends += sizeof(double)*nldofs*(gtopo.GetGroupSize(gr)-1);
1043  num_master_groups++;
1044  }
1045  else
1046  {
1047  num_recvs++;
1048  mem_recvs += sizeof(double)*nldofs;
1049  }
1050  }
1051  break;
1052 
1053  case byNeighbor:
1054  for (int gr = 1; gr < group_ldof.Size(); gr++)
1055  {
1056  const int nldofs = group_ldof.RowSize(gr);
1057  if (nldofs == 0)
1058  {
1059  num_empty_groups++;
1060  continue;
1061  }
1062  if (gtopo.IAmMaster(gr))
1063  {
1064  num_master_groups++;
1065  }
1066  }
1067  for (int nbr = 1; nbr < nbr_send_groups.Size(); nbr++)
1068  {
1069  const int num_send_groups = nbr_send_groups.RowSize(nbr);
1070  if (num_send_groups > 0)
1071  {
1072  const int *grp_list = nbr_send_groups.GetRow(nbr);
1073  for (int i = 0; i < num_send_groups; i++)
1074  {
1075  mem_sends += sizeof(double)*group_ldof.RowSize(grp_list[i]);
1076  }
1077  num_sends++;
1078  }
1079 
1080  const int num_recv_groups = nbr_recv_groups.RowSize(nbr);
1081  if (num_recv_groups > 0)
1082  {
1083  const int *grp_list = nbr_recv_groups.GetRow(nbr);
1084  for (int i = 0; i < num_recv_groups; i++)
1085  {
1086  mem_recvs += sizeof(double)*group_ldof.RowSize(grp_list[i]);
1087  }
1088  num_recvs++;
1089  }
1090  if (num_send_groups > 0 || num_recv_groups > 0)
1091  {
1092  num_active_neighbors++;
1093  }
1094  }
1095  break;
1096  }
1097  if (myid != 0)
1098  {
1099  MPI_Recv(&c, 1, MPI_CHAR, myid-1, tag, gtopo.GetComm(),
1100  MPI_STATUS_IGNORE);
1101  }
1102  else
1103  {
1104  out << "\nGroupCommunicator:\n";
1105  }
1106  out << "Rank " << myid << ":\n"
1107  " mode = " <<
1108  (mode == byGroup ? "byGroup" : "byNeighbor") << "\n"
1109  " number of sends = " << num_sends <<
1110  " (" << mem_sends << " bytes)\n"
1111  " number of recvs = " << num_recvs <<
1112  " (" << mem_recvs << " bytes)\n";
1113  out <<
1114  " num groups = " << group_ldof.Size() << " = " <<
1115  num_master_groups << " + " <<
1116  group_ldof.Size()-num_master_groups-num_empty_groups << " + " <<
1117  num_empty_groups << " (master + slave + empty)\n";
1118  if (mode == byNeighbor)
1119  {
1120  out <<
1121  " num neighbors = " << nbr_send_groups.Size() << " = " <<
1122  num_active_neighbors << " + " <<
1123  nbr_send_groups.Size()-num_active_neighbors <<
1124  " (active + inactive)\n";
1125  }
1126  if (myid != gtopo.NRanks()-1)
1127  {
1128  out << std::flush;
1129  MPI_Send(&c, 1, MPI_CHAR, myid+1, tag, gtopo.GetComm());
1130  }
1131  else
1132  {
1133  out << std::endl;
1134  }
1135  MPI_Barrier(gtopo.GetComm());
1136 }
1137 
1139 {
1140  delete [] buf_offsets;
1141  delete [] request_marker;
1142  // delete [] statuses;
1143  delete [] requests;
1144 }
1145 
1146 // @cond DOXYGEN_SKIP
1147 
1148 // instantiate GroupCommunicator::Bcast and Reduce for int and double
1149 template void GroupCommunicator::BcastBegin<int>(int *, int);
1150 template void GroupCommunicator::BcastEnd<int>(int *, int);
1151 template void GroupCommunicator::ReduceBegin<int>(const int *);
1152 template void GroupCommunicator::ReduceEnd<int>(
1153  int *, int, void (*)(OpData<int>));
1154 
1155 template void GroupCommunicator::BcastBegin<double>(double *, int);
1156 template void GroupCommunicator::BcastEnd<double>(double *, int);
1157 template void GroupCommunicator::ReduceBegin<double>(const double *);
1158 template void GroupCommunicator::ReduceEnd<double>(
1159  double *, int, void (*)(OpData<double>));
1160 
1161 // @endcond
1162 
1163 // instantiate reduce operators for int and double
1164 template void GroupCommunicator::Sum<int>(OpData<int>);
1165 template void GroupCommunicator::Min<int>(OpData<int>);
1166 template void GroupCommunicator::Max<int>(OpData<int>);
1167 template void GroupCommunicator::BitOR<int>(OpData<int>);
1168 
1169 template void GroupCommunicator::Sum<double>(OpData<double>);
1170 template void GroupCommunicator::Min<double>(OpData<double>);
1171 template void GroupCommunicator::Max<double>(OpData<double>);
1172 
1173 
1174 #ifdef __bgq__
1175 static void DebugRankCoords(int** coords, int dim, int size)
1176 {
1177  for (int i = 0; i < size; i++)
1178  {
1179  mfem::out << "Rank " << i << " coords: ";
1180  for (int j = 0; j < dim; j++)
1181  {
1182  mfem::out << coords[i][j] << " ";
1183  }
1184  mfem::out << endl;
1185  }
1186 }
1187 
1188 struct CompareCoords
1189 {
1190  CompareCoords(int coord) : coord(coord) {}
1191  int coord;
1192 
1193  bool operator()(int* const &a, int* const &b) const
1194  { return a[coord] < b[coord]; }
1195 };
1196 
1197 void KdTreeSort(int** coords, int d, int dim, int size)
1198 {
1199  if (size > 1)
1200  {
1201  bool all_same = true;
1202  for (int i = 1; i < size && all_same; i++)
1203  {
1204  for (int j = 0; j < dim; j++)
1205  {
1206  if (coords[i][j] != coords[0][j]) { all_same = false; break; }
1207  }
1208  }
1209  if (all_same) { return; }
1210 
1211  // sort by coordinate 'd'
1212  std::sort(coords, coords + size, CompareCoords(d));
1213  int next = (d + 1) % dim;
1214 
1215  if (coords[0][d] < coords[size-1][d])
1216  {
1217  KdTreeSort(coords, next, dim, size/2);
1218  KdTreeSort(coords + size/2, next, dim, size - size/2);
1219  }
1220  else
1221  {
1222  // skip constant dimension
1223  KdTreeSort(coords, next, dim, size);
1224  }
1225  }
1226 }
1227 
1228 MPI_Comm ReorderRanksZCurve(MPI_Comm comm)
1229 {
1230  MPI_Status status;
1231 
1232  int rank, size;
1233  MPI_Comm_rank(comm, &rank);
1234  MPI_Comm_size(comm, &size);
1235 
1236  int dim;
1237  MPIX_Torus_ndims(&dim);
1238 
1239  int* mycoords = new int[dim + 1];
1240  MPIX_Rank2torus(rank, mycoords);
1241 
1242  MPI_Send(mycoords, dim, MPI_INT, 0, 111, comm);
1243  delete [] mycoords;
1244 
1245  if (rank == 0)
1246  {
1247  int** coords = new int*[size];
1248  for (int i = 0; i < size; i++)
1249  {
1250  coords[i] = new int[dim + 1];
1251  coords[i][dim] = i;
1252  MPI_Recv(coords[i], dim, MPI_INT, i, 111, comm, &status);
1253  }
1254 
1255  KdTreeSort(coords, 0, dim, size);
1256 
1257  //DebugRankCoords(coords, dim, size);
1258 
1259  for (int i = 0; i < size; i++)
1260  {
1261  MPI_Send(&coords[i][dim], 1, MPI_INT, i, 112, comm);
1262  delete [] coords[i];
1263  }
1264  delete [] coords;
1265  }
1266 
1267  int new_rank;
1268  MPI_Recv(&new_rank, 1, MPI_INT, 0, 112, comm, &status);
1269 
1270  MPI_Comm new_comm;
1271  MPI_Comm_split(comm, 0, new_rank, &new_comm);
1272  return new_comm;
1273 }
1274 
1275 #else // __bgq__
1276 
1277 MPI_Comm ReorderRanksZCurve(MPI_Comm comm)
1278 {
1279  // pass
1280  return comm;
1281 }
1282 #endif // __bgq__
1283 
1284 } // namespace mfem
1285 
1286 #endif
int Lookup(IntegerSet &s)
Definition: sets.cpp:95
int GetGroupMasterRank(int g) const
void Create(ListOfIntegerSets &groups, int mpitag)
int Size() const
Logical size of the array.
Definition: array.hpp:110
void Recreate(const int n, const int *p)
Definition: sets.cpp:59
int GetGroupMasterGroup(int g) const
int * GetJ()
Definition: table.hpp:111
void ReduceEnd(T *ldata, int layout, void(*Op)(OpData< T >))
Finalize reduction operation started with ReduceBegin().
void AsTable(Table &t)
Definition: sets.cpp:107
Helper struct to convert a C++ type to an MPI type.
MPI_Comm ReorderRanksZCurve(MPI_Comm comm)
void AddColumnsInRow(int r, int ncol)
Definition: table.hpp:75
void MakeI(int nrows)
Next 7 methods are used together with the default constructor.
Definition: table.cpp:74
int GetGroupSize(int g) const
void SetDims(int rows, int nnz)
Definition: table.cpp:132
const int * GetGroup(int g) const
void Copy(Array &copy) const
Create a copy of the current array.
Definition: array.hpp:161
T * GetData()
Returns the data.
Definition: array.hpp:92
void GetRow(int i, Array< int > &row) const
Return row i in array row (the Table must be finalized)
Definition: table.cpp:179
const T * CopyGroupFromBuffer(const T *buf, T *ldata, int group, int layout) const
Copy the entries corresponding to the group group from the buffer buf to the local array ldata...
int Size_of_connections() const
Definition: table.hpp:95
void skip_comment_lines(std::istream &is, const char comment_char)
Definition: text.hpp:25
int PickElementInSet(int i)
Definition: sets.hpp:63
bool IAmMaster(int g) const
int dim
Definition: ex3.cpp:47
static void Sum(OpData< T >)
Reduce operation Sum, instantiated for int and double.
int Append(const T &el)
Append element to array, resize if necessary.
Definition: array.hpp:548
int GetNumNeighbors() const
void Finalize()
Allocate internal buffers after the GroupLDofTable is defined.
void AddConnection(int r, int c)
Definition: table.hpp:77
int Insert(IntegerSet &s)
Definition: sets.cpp:82
void Reserve(int capacity)
Ensures that the allocated size is at least the given size.
Definition: array.hpp:123
T * CopyGroupToBuffer(const T *ldata, T *buf, int group, int layout) const
Copy the entries corresponding to the group group from the local array ldata to the buffer buf...
void BcastBegin(T *ldata, int layout)
Begin a broadcast within each group where the master is the root.
static void Min(OpData< T >)
Reduce operation Min, instantiated for int and double.
void Sort()
Sorts the array. This requires operator&lt; to be defined for T.
Definition: array.hpp:208
void ReduceBegin(const T *ldata)
Begin reduction operation within each group where the master is the root.
int Size() const
Returns the number of TYPE I elements.
Definition: table.hpp:89
GroupCommunicator(GroupTopology &gt, Mode m=byNeighbor)
Construct a GroupCommunicator object.
int GetGroupMaster(int g) const
void Save(std::ostream &out) const
Save the data in a stream.
const T * ReduceGroupFromBuffer(const T *buf, T *ldata, int group, int layout, void(*Op)(OpData< T >)) const
Perform the reduction operation Op to the entries of group group using the values from the buffer buf...
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition: globals.hpp:69
int GetNeighborRank(int i) const
static void Max(OpData< T >)
Reduce operation Max, instantiated for int and double.
Communications are performed one group at a time.
void AddAColumnInRow(int r)
Definition: table.hpp:74
void mfem_error(const char *msg)
Definition: error.cpp:107
void SetSize(int nsize)
Change logical size of the array, keep existing entries.
Definition: array.hpp:503
void ShiftUpI()
Definition: table.cpp:107
void BcastEnd(T *ldata, int layout)
Finalize a broadcast started with BcastBegin().
Data structure on which we define reduce operations.
~GroupCommunicator()
Destroy a GroupCommunicator object, deallocating internal data structures and buffers.
A set of integers.
Definition: sets.hpp:23
Mode
Communication mode.
void MakeJ()
Definition: table.cpp:84
void KdTreeSort(int **coords, int d, int dim, int size)
int NGroups() const
void Create(Array< int > &ldof_group)
Initialize the communicator from a local-dof to group map. Finalize() is called internally.
int * GetI()
Definition: table.hpp:110
void SetLTDofTable(Array< int > &ldof_ltdof)
Initialize the internal group_ltdof Table.
int RowSize(int i) const
Definition: table.hpp:105
List of integer sets.
Definition: sets.hpp:54
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:64
void PrintInfo(std::ostream &out=mfem::out) const
Print information about the GroupCommunicator from all MPI ranks.
void Load(std::istream &in)
Load the data from a stream.
static void BitOR(OpData< T >)
Reduce operation bitwise OR, instantiated for int only.