MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
device.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "occa.hpp"
14 #ifdef MFEM_USE_CEED
15 #include "../fem/libceed/ceed.hpp"
16 #endif
17 
18 #include <unordered_map>
19 #include <string>
20 #include <map>
21 
22 namespace mfem
23 {
24 
25 // Place the following variables in the mfem::internal namespace, so that they
26 // will not be included in the doxygen documentation.
27 namespace internal
28 {
29 
30 #ifdef MFEM_USE_OCCA
31 // Default occa::device used by MFEM.
32 occa::device occaDevice;
33 #endif
34 
35 #ifdef MFEM_USE_CEED
36 Ceed ceed = NULL;
37 
38 CeedBasisMap ceed_basis_map;
39 CeedRestrMap ceed_restr_map;
40 #endif
41 
42 // Backends listed by priority, high to low:
43 static const Backend::Id backend_list[Backend::NUM_BACKENDS] =
44 {
49 };
50 
51 // Backend names listed by priority, high to low:
52 static const char *backend_name[Backend::NUM_BACKENDS] =
53 {
54  "ceed-cuda", "occa-cuda", "raja-cuda", "cuda",
55  "ceed-hip", "hip", "debug",
56  "occa-omp", "raja-omp", "omp",
57  "ceed-cpu", "occa-cpu", "raja-cpu", "cpu"
58 };
59 
60 } // namespace mfem::internal
61 
62 
63 // Initialize the unique global Device variable.
64 Device Device::device_singleton;
65 bool Device::device_env = false;
66 bool Device::mem_host_env = false;
67 bool Device::mem_device_env = false;
68 
69 Device::Device() : mode(Device::SEQUENTIAL),
70  backends(Backend::CPU),
71  destroy_mm(false),
72  mpi_gpu_aware(false),
73  host_mem_type(MemoryType::HOST),
74  host_mem_class(MemoryClass::HOST),
75  device_mem_type(MemoryType::HOST),
76  device_mem_class(MemoryClass::HOST)
77 {
78  if (getenv("MFEM_MEMORY") && !mem_host_env && !mem_device_env)
79  {
80  std::string mem_backend(getenv("MFEM_MEMORY"));
81  if (mem_backend == "host")
82  {
83  mem_host_env = true;
84  host_mem_type = MemoryType::HOST;
85  device_mem_type = MemoryType::HOST;
86  }
87  else if (mem_backend == "host32")
88  {
89  mem_host_env = true;
90  host_mem_type = MemoryType::HOST_32;
91  device_mem_type = MemoryType::HOST_32;
92  }
93  else if (mem_backend == "host64")
94  {
95  mem_host_env = true;
96  host_mem_type = MemoryType::HOST_64;
97  device_mem_type = MemoryType::HOST_64;
98  }
99  else if (mem_backend == "umpire")
100  {
101  mem_host_env = true;
102  host_mem_type = MemoryType::HOST_UMPIRE;
103  // Note: device_mem_type will be set to MemoryType::DEVICE_UMPIRE only
104  // when an actual device is configured -- this is done later in
105  // Device::UpdateMemoryTypeAndClass().
106  device_mem_type = MemoryType::HOST_UMPIRE;
107  }
108  else if (mem_backend == "debug")
109  {
110  mem_host_env = true;
111  host_mem_type = MemoryType::HOST_DEBUG;
112  // Note: device_mem_type will be set to MemoryType::DEVICE_DEBUG only
113  // when an actual device is configured -- this is done later in
114  // Device::UpdateMemoryTypeAndClass().
115  device_mem_type = MemoryType::HOST_DEBUG;
116  }
117  else if (false
118 #ifdef MFEM_USE_CUDA
119  || mem_backend == "cuda"
120 #endif
121 #ifdef MFEM_USE_HIP
122  || mem_backend == "hip"
123 #endif
124  )
125  {
126  mem_host_env = true;
127  host_mem_type = MemoryType::HOST;
128  mem_device_env = true;
129  device_mem_type = MemoryType::DEVICE;
130  }
131  else if (mem_backend == "uvm")
132  {
133  mem_host_env = true;
134  mem_device_env = true;
135  host_mem_type = MemoryType::MANAGED;
136  device_mem_type = MemoryType::MANAGED;
137  }
138  else
139  {
140  MFEM_ABORT("Unknown memory backend!");
141  }
142  mm.Configure(host_mem_type, device_mem_type);
143  }
144 
145  if (getenv("MFEM_DEVICE"))
146  {
147  std::string device(getenv("MFEM_DEVICE"));
148  Configure(device);
149  device_env = true;
150  }
151 }
152 
153 
155 {
156  if ( device_env && !destroy_mm) { return; }
157  if (!device_env && destroy_mm && !mem_host_env)
158  {
159  free(device_option);
160 #ifdef MFEM_USE_CEED
161  // Destroy FES -> CeedBasis, CeedElemRestriction hash table contents
162  for (auto entry : internal::ceed_basis_map)
163  {
164  CeedBasisDestroy(&entry.second);
165  }
166  internal::ceed_basis_map.clear();
167  for (auto entry : internal::ceed_restr_map)
168  {
169  CeedElemRestrictionDestroy(&entry.second);
170  }
171  internal::ceed_restr_map.clear();
172  // Destroy Ceed context
173  CeedDestroy(&internal::ceed);
174 #endif
175  mm.Destroy();
176  }
177  Get().ngpu = -1;
178  Get().mode = SEQUENTIAL;
179  Get().backends = Backend::CPU;
180  Get().host_mem_type = MemoryType::HOST;
181  Get().host_mem_class = MemoryClass::HOST;
182  Get().device_mem_type = MemoryType::HOST;
183  Get().device_mem_class = MemoryClass::HOST;
184 }
185 
186 void Device::Configure(const std::string &device, const int dev)
187 {
188  // If a device was configured via the environment, skip the configuration,
189  // and avoid the 'singleton_device' to destroy the mm.
190  if (device_env)
191  {
192  std::memcpy(this, &Get(), sizeof(Device));
193  Get().destroy_mm = false;
194  return;
195  }
196 
197  std::map<std::string, Backend::Id> bmap;
198  for (int i = 0; i < Backend::NUM_BACKENDS; i++)
199  {
200  bmap[internal::backend_name[i]] = internal::backend_list[i];
201  }
202  std::string::size_type beg = 0, end, option;
203  while (1)
204  {
205  end = device.find(',', beg);
206  end = (end != std::string::npos) ? end : device.size();
207  const std::string bname = device.substr(beg, end - beg);
208  option = bname.find(':');
209  if (option==std::string::npos) // No option
210  {
211  const std::string backend = bname;
212  std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
213  MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');
214  Get().MarkBackend(it->second);
215  }
216  else
217  {
218  const std::string backend = bname.substr(0, option);
219  const std::string boption = bname.substr(option+1);
220  Get().device_option = strdup(boption.c_str());
221  std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
222  MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');
223  Get().MarkBackend(it->second);
224  }
225  if (end == device.size()) { break; }
226  beg = end + 1;
227  }
228 
229  // OCCA_CUDA and CEED_CUDA need CUDA or RAJA_CUDA:
232  {
233  Get().MarkBackend(Backend::CUDA);
234  }
235  // CEED_HIP needs HIP:
237  {
238  Get().MarkBackend(Backend::HIP);
239  }
240  // OCCA_OMP will use OMP or RAJA_OMP unless MFEM_USE_OPENMP=NO:
241 #ifdef MFEM_USE_OPENMP
243  {
244  Get().MarkBackend(Backend::OMP);
245  }
246 #endif
247 
248  // Perform setup.
249  Get().Setup(dev);
250 
251  // Enable the device
252  Enable();
253 
254  // Copy all data members from the global 'singleton_device' into '*this'.
255  if (this != &Get()) { std::memcpy(this, &Get(), sizeof(Device)); }
256 
257  // Only '*this' will call the MemoryManager::Destroy() method.
258  destroy_mm = true;
259 }
260 
261 void Device::Print(std::ostream &out)
262 {
263  out << "Device configuration: ";
264  bool add_comma = false;
265  for (int i = 0; i < Backend::NUM_BACKENDS; i++)
266  {
267  if (backends & internal::backend_list[i])
268  {
269  if (add_comma) { out << ','; }
270  add_comma = true;
271  out << internal::backend_name[i];
272  }
273  }
274  out << '\n';
275 #ifdef MFEM_USE_CEED
277  {
278  const char *ceed_backend;
279  CeedGetResource(internal::ceed, &ceed_backend);
280  out << "libCEED backend: " << ceed_backend << '\n';
281  }
282 #endif
283  out << "Memory configuration: "
284  << MemoryTypeName[static_cast<int>(host_mem_type)];
286  {
287  out << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];
288  }
289  out << std::endl;
290 }
291 
292 void Device::UpdateMemoryTypeAndClass()
293 {
294  const bool debug = Device::Allows(Backend::DEBUG_DEVICE);
295 
296  const bool device = Device::Allows(Backend::DEVICE_MASK);
297 
298 #ifdef MFEM_USE_UMPIRE
299  // If MFEM has been compiled with Umpire support, use it as the default
300  if (!mem_host_env) { host_mem_type = MemoryType::HOST_UMPIRE; }
301 #endif
302 
303  // Enable the device memory type
304  if (device)
305  {
306  if (!mem_device_env)
307  {
308  if (mem_host_env)
309  {
310  switch (host_mem_type)
311  {
313  device_mem_type = MemoryType::DEVICE_UMPIRE;
314  break;
316  device_mem_type = MemoryType::DEVICE_DEBUG;
317  break;
318  default:
319  device_mem_type = MemoryType::DEVICE;
320  }
321  }
322  else
323  {
324 #ifndef MFEM_USE_UMPIRE
325  device_mem_type = MemoryType::DEVICE;
326 #else
327  device_mem_type = MemoryType::DEVICE_UMPIRE;
328 #endif
329  }
330  }
331  device_mem_class = MemoryClass::DEVICE;
332  }
333 
334  // Enable the UVM shortcut when requested
335  if (device && device_option && !strcmp(device_option, "uvm"))
336  {
337  host_mem_type = MemoryType::MANAGED;
338  device_mem_type = MemoryType::MANAGED;
339  }
340 
341  // Enable the DEBUG mode when requested
342  if (debug)
343  {
344  host_mem_type = MemoryType::HOST_DEBUG;
345  device_mem_type = MemoryType::DEVICE_DEBUG;
346  }
347 
348  // Update the memory manager with the new settings
349  mm.Configure(host_mem_type, device_mem_type);
350 }
351 
352 void Device::Enable()
353 {
354  const bool accelerated = Get().backends & ~(Backend::CPU);
355  if (accelerated) { Get().mode = Device::ACCELERATED;}
356  Get().UpdateMemoryTypeAndClass();
357 }
358 
359 #ifdef MFEM_USE_CUDA
360 static void DeviceSetup(const int dev, int &ngpu)
361 {
362  ngpu = CuGetDeviceCount();
363  MFEM_VERIFY(ngpu > 0, "No CUDA device found!");
364  MFEM_GPU_CHECK(cudaSetDevice(dev));
365 }
366 #endif
367 
368 static void CudaDeviceSetup(const int dev, int &ngpu)
369 {
370 #ifdef MFEM_USE_CUDA
371  DeviceSetup(dev, ngpu);
372 #else
373  MFEM_CONTRACT_VAR(dev);
374  MFEM_CONTRACT_VAR(ngpu);
375 #endif
376 }
377 
378 static void HipDeviceSetup(const int dev, int &ngpu)
379 {
380 #ifdef MFEM_USE_HIP
381  int deviceId;
382  MFEM_GPU_CHECK(hipGetDevice(&deviceId));
383  hipDeviceProp_t props;
384  MFEM_GPU_CHECK(hipGetDeviceProperties(&props, deviceId));
385  MFEM_VERIFY(dev==deviceId,"");
386  ngpu = 1;
387 #else
388  MFEM_CONTRACT_VAR(dev);
389  MFEM_CONTRACT_VAR(ngpu);
390 #endif
391 }
392 
393 static void RajaDeviceSetup(const int dev, int &ngpu)
394 {
395 #ifdef MFEM_USE_CUDA
396  if (ngpu <= 0) { DeviceSetup(dev, ngpu); }
397 #else
398  MFEM_CONTRACT_VAR(dev);
399  MFEM_CONTRACT_VAR(ngpu);
400 #endif
401 }
402 
403 static void OccaDeviceSetup(const int dev)
404 {
405 #ifdef MFEM_USE_OCCA
406  const int cpu = Device::Allows(Backend::OCCA_CPU);
407  const int omp = Device::Allows(Backend::OCCA_OMP);
408  const int cuda = Device::Allows(Backend::OCCA_CUDA);
409  if (cpu + omp + cuda > 1)
410  {
411  MFEM_ABORT("Only one OCCA backend can be configured at a time!");
412  }
413  if (cuda)
414  {
415 #if OCCA_CUDA_ENABLED
416  std::string mode("mode: 'CUDA', device_id : ");
417  internal::occaDevice.setup(mode.append(1,'0'+dev));
418 #else
419  MFEM_ABORT("the OCCA CUDA backend requires OCCA built with CUDA!");
420 #endif
421  }
422  else if (omp)
423  {
424 #if OCCA_OPENMP_ENABLED
425  internal::occaDevice.setup("mode: 'OpenMP'");
426 #else
427  MFEM_ABORT("the OCCA OpenMP backend requires OCCA built with OpenMP!");
428 #endif
429  }
430  else
431  {
432  internal::occaDevice.setup("mode: 'Serial'");
433  }
434 
435  std::string mfemDir;
436  if (occa::io::exists(MFEM_INSTALL_DIR "/include/mfem/"))
437  {
438  mfemDir = MFEM_INSTALL_DIR "/include/mfem/";
439  }
440  else if (occa::io::exists(MFEM_SOURCE_DIR))
441  {
442  mfemDir = MFEM_SOURCE_DIR;
443  }
444  else
445  {
446  MFEM_ABORT("Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");
447  }
448 
449  occa::io::addLibraryPath("mfem", mfemDir);
450  occa::loadKernels("mfem");
451 #else
452  MFEM_CONTRACT_VAR(dev);
453  MFEM_ABORT("the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");
454 #endif
455 }
456 
457 static void CeedDeviceSetup(const char* ceed_spec)
458 {
459 #ifdef MFEM_USE_CEED
460  CeedInit(ceed_spec, &internal::ceed);
461  const char *ceed_backend;
462  CeedGetResource(internal::ceed, &ceed_backend);
463  if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec, "/cpu/self") &&
464  strcmp(ceed_spec, "/gpu/hip"))
465  {
466  mfem::out << std::endl << "WARNING!!!\n"
467  "libCEED is not using the requested backend!!!\n"
468  "WARNING!!!\n" << std::endl;
469  }
470 #else
471  MFEM_CONTRACT_VAR(ceed_spec);
472 #endif
473 }
474 
475 void Device::Setup(const int device)
476 {
477  MFEM_VERIFY(ngpu == -1, "the mfem::Device is already configured!");
478 
479  ngpu = 0;
480  dev = device;
481 #ifndef MFEM_USE_CUDA
482  MFEM_VERIFY(!Allows(Backend::CUDA_MASK),
483  "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");
484 #endif
485 #ifndef MFEM_USE_HIP
486  MFEM_VERIFY(!Allows(Backend::HIP_MASK),
487  "the HIP backends require MFEM built with MFEM_USE_HIP=YES");
488 #endif
489 #ifndef MFEM_USE_RAJA
490  MFEM_VERIFY(!Allows(Backend::RAJA_MASK),
491  "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");
492 #endif
493 #ifndef MFEM_USE_OPENMP
494  MFEM_VERIFY(!Allows(Backend::OMP|Backend::RAJA_OMP),
495  "the OpenMP and RAJA OpenMP backends require MFEM built with"
496  " MFEM_USE_OPENMP=YES");
497 #endif
498 #ifndef MFEM_USE_CEED
499  MFEM_VERIFY(!Allows(Backend::CEED_MASK),
500  "the CEED backends require MFEM built with MFEM_USE_CEED=YES");
501 #else
502  int ceed_cpu = Allows(Backend::CEED_CPU);
503  int ceed_cuda = Allows(Backend::CEED_CUDA);
504  int ceed_hip = Allows(Backend::CEED_HIP);
505  MFEM_VERIFY(ceed_cpu + ceed_cuda + ceed_hip <= 1,
506  "Only one CEED backend can be enabled at a time!");
507 #endif
508  if (Allows(Backend::CUDA)) { CudaDeviceSetup(dev, ngpu); }
509  if (Allows(Backend::HIP)) { HipDeviceSetup(dev, ngpu); }
510  if (Allows(Backend::RAJA_CUDA)) { RajaDeviceSetup(dev, ngpu); }
511  // The check for MFEM_USE_OCCA is in the function OccaDeviceSetup().
512  if (Allows(Backend::OCCA_MASK)) { OccaDeviceSetup(dev); }
514  {
515  if (!device_option)
516  {
517  CeedDeviceSetup("/cpu/self");
518  }
519  else
520  {
521  CeedDeviceSetup(device_option);
522  }
523  }
525  {
526  if (!device_option)
527  {
528  // NOTE: libCEED's /gpu/cuda/gen backend is non-deterministic!
529  CeedDeviceSetup("/gpu/cuda/gen");
530  }
531  else
532  {
533  CeedDeviceSetup(device_option);
534  }
535  }
537  {
538  if (!device_option)
539  {
540  CeedDeviceSetup("/gpu/hip");
541  }
542  else
543  {
544  CeedDeviceSetup(device_option);
545  }
546  }
547  if (Allows(Backend::DEBUG_DEVICE)) { ngpu = 1; }
548 }
549 
550 } // mfem
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:56
Host memory; aligned at 64 bytes.
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition: device.hpp:53
~Device()
Destructor.
Definition: device.cpp:154
Device memory; using CUDA or HIP *Malloc and *Free.
Device memory; using Umpire.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition: device.hpp:63
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES...
Definition: device.hpp:45
Biwise-OR of all HIP backends.
Definition: device.hpp:87
Host memory; allocated from a &quot;host-debug&quot; pool.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition: device.cpp:261
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:48
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition: cuda.cpp:155
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
Definition: device.cpp:186
Host memory; aligned at 32 bytes.
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used...
Definition: device.cpp:69
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition: device.hpp:30
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES...
Definition: device.hpp:51
Number of backends: from (1 &lt;&lt; 0) to (1 &lt;&lt; (NUM_BACKENDS-1)).
Definition: device.hpp:80
MFEM backends.
Definition: device.hpp:26
void Destroy()
Free all the device memories.
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES...
Definition: device.hpp:42
[host] Default CPU backend: sequential execution on each MPI rank.
Definition: device.hpp:33
Biwise-OR of all CUDA backends.
Definition: device.hpp:85
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:28
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers...
Definition: device.hpp:59
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition: device.hpp:35
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:246
MemoryManager mm
The (single) global memory manager object.
Host memory; using new[] and delete[].
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
Definition: device.hpp:66
Biwise-OR of all OCCA backends.
Definition: device.hpp:98
Biwise-OR of all RAJA backends.
Definition: device.hpp:96
Host memory; using Umpire.
Biwise-OR of all device backends.
Definition: device.hpp:93
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition: device.hpp:118
Bitwise-OR of all CEED backends.
Definition: device.hpp:91
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition: device.hpp:39
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition: device.hpp:37
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:58
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the &quot;device&quot; code-path (using separate host/device memory pools and host &lt;-&gt; device transfers) without any GPU hardware. As &#39;DEBUG&#39; is sometimes used as a macro, _DEVICE has been added to avoid conflicts.
Definition: device.hpp:72