Loading [MathJax]/extensions/TeX/AMSsymbols.js
MFEM  v4.1.0
Finite element discretization library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
device.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "occa.hpp"
14 #ifdef MFEM_USE_CEED
15 #include <ceed.h>
16 #endif
17 
18 #include <string>
19 #include <map>
20 
21 namespace mfem
22 {
23 
24 // Place the following variables in the mfem::internal namespace, so that they
25 // will not be included in the doxygen documentation.
26 namespace internal
27 {
28 
29 #ifdef MFEM_USE_OCCA
30 // Default occa::device used by MFEM.
31 occa::device occaDevice;
32 #endif
33 
34 #ifdef MFEM_USE_CEED
35 Ceed ceed = NULL;
36 #endif
37 
38 // Backends listed by priority, high to low:
39 static const Backend::Id backend_list[Backend::NUM_BACKENDS] =
40 {
45 };
46 
47 // Backend names listed by priority, high to low:
48 static const char *backend_name[Backend::NUM_BACKENDS] =
49 {
50  "ceed-cuda", "occa-cuda", "raja-cuda", "cuda",
51  "hip", "debug",
52  "occa-omp", "raja-omp", "omp",
53  "ceed-cpu", "occa-cpu", "raja-cpu", "cpu"
54 };
55 
56 } // namespace mfem::internal
57 
58 
59 // Initialize the unique global Device variable.
60 Device Device::device_singleton;
61 bool Device::device_env = false;
62 bool Device::mem_host_env = false;
63 bool Device::mem_device_env = false;
64 
65 Device::Device() : mode(Device::SEQUENTIAL),
66  backends(Backend::CPU),
67  destroy_mm(false),
68  mpi_gpu_aware(false),
69  host_mem_type(MemoryType::HOST),
70  host_mem_class(MemoryClass::HOST),
71  device_mem_type(MemoryType::HOST),
72  device_mem_class(MemoryClass::HOST)
73 {
74  if (getenv("MFEM_MEMORY") && !mem_host_env && !mem_device_env)
75  {
76  std::string mem_backend(getenv("MFEM_MEMORY"));
77  if (mem_backend == "host")
78  {
79  mem_host_env = true;
80  host_mem_type = MemoryType::HOST;
81  device_mem_type = MemoryType::HOST;
82  }
83  else if (mem_backend == "host32")
84  {
85  mem_host_env = true;
86  host_mem_type = MemoryType::HOST_32;
87  device_mem_type = MemoryType::HOST_32;
88  }
89  else if (mem_backend == "host64")
90  {
91  mem_host_env = true;
92  host_mem_type = MemoryType::HOST_64;
93  device_mem_type = MemoryType::HOST_64;
94  }
95  else if (mem_backend == "umpire")
96  {
97  mem_host_env = true;
98  host_mem_type = MemoryType::HOST_UMPIRE;
99  // Note: device_mem_type will be set to MemoryType::DEVICE_UMPIRE only
100  // when an actual device is configured -- this is done later in
101  // Device::UpdateMemoryTypeAndClass().
102  device_mem_type = MemoryType::HOST_UMPIRE;
103  }
104  else if (mem_backend == "debug")
105  {
106  mem_host_env = true;
107  host_mem_type = MemoryType::HOST_DEBUG;
108  // Note: device_mem_type will be set to MemoryType::DEVICE_DEBUG only
109  // when an actual device is configured -- this is done later in
110  // Device::UpdateMemoryTypeAndClass().
111  device_mem_type = MemoryType::HOST_DEBUG;
112  }
113  else if (false
114 #ifdef MFEM_USE_CUDA
115  || mem_backend == "cuda"
116 #endif
117 #ifdef MFEM_USE_HIP
118  || mem_backend == "hip"
119 #endif
120  )
121  {
122  mem_host_env = true;
123  host_mem_type = MemoryType::HOST;
124  mem_device_env = true;
125  device_mem_type = MemoryType::DEVICE;
126  }
127  else if (mem_backend == "uvm")
128  {
129  mem_host_env = true;
130  mem_device_env = true;
131  host_mem_type = MemoryType::MANAGED;
132  device_mem_type = MemoryType::MANAGED;
133  }
134  else
135  {
136  MFEM_ABORT("Unknown memory backend!");
137  }
138  mm.Configure(host_mem_type, device_mem_type);
139  }
140 
141  if (getenv("MFEM_DEVICE"))
142  {
143  std::string device(getenv("MFEM_DEVICE"));
144  Configure(device);
145  device_env = true;
146  }
147 }
148 
149 
151 {
152  if ( device_env && !destroy_mm) { return; }
153  if (!device_env && destroy_mm && !mem_host_env)
154  {
155  free(device_option);
156 #ifdef MFEM_USE_CEED
157  CeedDestroy(&internal::ceed);
158 #endif
159  mm.Destroy();
160  }
161  Get().ngpu = -1;
162  Get().mode = SEQUENTIAL;
163  Get().backends = Backend::CPU;
164  Get().host_mem_type = MemoryType::HOST;
165  Get().host_mem_class = MemoryClass::HOST;
166  Get().device_mem_type = MemoryType::HOST;
167  Get().device_mem_class = MemoryClass::HOST;
168 }
169 
170 void Device::Configure(const std::string &device, const int dev)
171 {
172  // If a device was configured via the environment, skip the configuration,
173  // and avoid the 'singleton_device' to destroy the mm.
174  if (device_env)
175  {
176  std::memcpy(this, &Get(), sizeof(Device));
177  Get().destroy_mm = false;
178  return;
179  }
180 
181  std::map<std::string, Backend::Id> bmap;
182  for (int i = 0; i < Backend::NUM_BACKENDS; i++)
183  {
184  bmap[internal::backend_name[i]] = internal::backend_list[i];
185  }
186  std::string::size_type beg = 0, end, option;
187  while (1)
188  {
189  end = device.find(',', beg);
190  end = (end != std::string::npos) ? end : device.size();
191  const std::string bname = device.substr(beg, end - beg);
192  option = bname.find(':');
193  if (option==std::string::npos) // No option
194  {
195  const std::string backend = bname;
196  std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
197  MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');
198  Get().MarkBackend(it->second);
199  }
200  else
201  {
202  const std::string backend = bname.substr(0, option);
203  const std::string boption = bname.substr(option+1);
204  Get().device_option = strdup(boption.c_str());
205  std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
206  MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');
207  Get().MarkBackend(it->second);
208  }
209  if (end == device.size()) { break; }
210  beg = end + 1;
211  }
212 
213  // OCCA_CUDA needs CUDA or RAJA_CUDA:
215  {
216  Get().MarkBackend(Backend::CUDA);
217  }
219  {
220  Get().MarkBackend(Backend::CUDA);
221  }
222 
223  // Perform setup.
224  Get().Setup(dev);
225 
226  // Enable the device
227  Enable();
228 
229  // Copy all data members from the global 'singleton_device' into '*this'.
230  if (this != &Get()) { std::memcpy(this, &Get(), sizeof(Device)); }
231 
232  // Only '*this' will call the MemoryManager::Destroy() method.
233  destroy_mm = true;
234 }
235 
236 void Device::Print(std::ostream &out)
237 {
238  out << "Device configuration: ";
239  bool add_comma = false;
240  for (int i = 0; i < Backend::NUM_BACKENDS; i++)
241  {
242  if (backends & internal::backend_list[i])
243  {
244  if (add_comma) { out << ','; }
245  add_comma = true;
246  out << internal::backend_name[i];
247  }
248  }
249  out << '\n';
250 #ifdef MFEM_USE_CEED
252  {
253  const char *ceed_backend;
254  CeedGetResource(internal::ceed, &ceed_backend);
255  out << "libCEED backend: " << ceed_backend << '\n';
256  }
257 #endif
258  out << "Memory configuration: "
259  << MemoryTypeName[static_cast<int>(host_mem_type)];
261  {
262  out << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];
263  }
264  out << std::endl;
265 }
266 
267 void Device::UpdateMemoryTypeAndClass()
268 {
269  const bool debug = Device::Allows(Backend::DEBUG);
270 
271  const bool device = Device::Allows(Backend::DEVICE_MASK);
272 
273 #ifdef MFEM_USE_UMPIRE
274  // If MFEM has been compiled with Umpire support, use it as the default
275  if (!mem_host_env) { host_mem_type = MemoryType::HOST_UMPIRE; }
276 #endif
277 
278  // Enable the device memory type
279  if (device)
280  {
281  if (!mem_device_env)
282  {
283  if (mem_host_env)
284  {
285  switch (host_mem_type)
286  {
288  device_mem_type = MemoryType::DEVICE_UMPIRE;
289  break;
291  device_mem_type = MemoryType::DEVICE_DEBUG;
292  break;
293  default:
294  device_mem_type = MemoryType::DEVICE;
295  }
296  }
297  else
298  {
299 #ifndef MFEM_USE_UMPIRE
300  device_mem_type = MemoryType::DEVICE;
301 #else
302  device_mem_type = MemoryType::DEVICE_UMPIRE;
303 #endif
304  }
305  }
306  device_mem_class = MemoryClass::DEVICE;
307  }
308 
309  // Enable the UVM shortcut when requested
310  if (device && device_option && !strcmp(device_option, "uvm"))
311  {
312  host_mem_type = MemoryType::MANAGED;
313  device_mem_type = MemoryType::MANAGED;
314  }
315 
316  // Enable the DEBUG mode when requested
317  if (debug)
318  {
319  host_mem_type = MemoryType::HOST_DEBUG;
320  device_mem_type = MemoryType::DEVICE_DEBUG;
321  }
322 
323  // Update the memory manager with the new settings
324  mm.Configure(host_mem_type, device_mem_type);
325 }
326 
327 void Device::Enable()
328 {
329  const bool accelerated = Get().backends & ~(Backend::CPU);
330  if (accelerated) { Get().mode = Device::ACCELERATED;}
331  Get().UpdateMemoryTypeAndClass();
332 }
333 
334 #ifdef MFEM_USE_CUDA
335 static void DeviceSetup(const int dev, int &ngpu)
336 {
337  ngpu = CuGetDeviceCount();
338  MFEM_VERIFY(ngpu > 0, "No CUDA device found!");
339  MFEM_GPU_CHECK(cudaSetDevice(dev));
340 }
341 #endif
342 
343 static void CudaDeviceSetup(const int dev, int &ngpu)
344 {
345 #ifdef MFEM_USE_CUDA
346  DeviceSetup(dev, ngpu);
347 #else
348  MFEM_CONTRACT_VAR(dev);
349  MFEM_CONTRACT_VAR(ngpu);
350 #endif
351 }
352 
353 static void HipDeviceSetup(const int dev, int &ngpu)
354 {
355 #ifdef MFEM_USE_HIP
356  int deviceId;
357  MFEM_GPU_CHECK(hipGetDevice(&deviceId));
358  hipDeviceProp_t props;
359  MFEM_GPU_CHECK(hipGetDeviceProperties(&props, deviceId));
360  MFEM_VERIFY(dev==deviceId,"");
361  ngpu = 1;
362 #else
363  MFEM_CONTRACT_VAR(dev);
364  MFEM_CONTRACT_VAR(ngpu);
365 #endif
366 }
367 
368 static void RajaDeviceSetup(const int dev, int &ngpu)
369 {
370 #ifdef MFEM_USE_CUDA
371  if (ngpu <= 0) { DeviceSetup(dev, ngpu); }
372 #else
373  MFEM_CONTRACT_VAR(dev);
374  MFEM_CONTRACT_VAR(ngpu);
375 #endif
376 }
377 
378 static void OccaDeviceSetup(const int dev)
379 {
380 #ifdef MFEM_USE_OCCA
381  const int cpu = Device::Allows(Backend::OCCA_CPU);
382  const int omp = Device::Allows(Backend::OCCA_OMP);
383  const int cuda = Device::Allows(Backend::OCCA_CUDA);
384  if (cpu + omp + cuda > 1)
385  {
386  MFEM_ABORT("Only one OCCA backend can be configured at a time!");
387  }
388  if (cuda)
389  {
390 #if OCCA_CUDA_ENABLED
391  std::string mode("mode: 'CUDA', device_id : ");
392  internal::occaDevice.setup(mode.append(1,'0'+dev));
393 #else
394  MFEM_ABORT("the OCCA CUDA backend requires OCCA built with CUDA!");
395 #endif
396  }
397  else if (omp)
398  {
399 #if OCCA_OPENMP_ENABLED
400  internal::occaDevice.setup("mode: 'OpenMP'");
401 #else
402  MFEM_ABORT("the OCCA OpenMP backend requires OCCA built with OpenMP!");
403 #endif
404  }
405  else
406  {
407  internal::occaDevice.setup("mode: 'Serial'");
408  }
409 
410  std::string mfemDir;
411  if (occa::io::exists(MFEM_INSTALL_DIR "/include/mfem/"))
412  {
413  mfemDir = MFEM_INSTALL_DIR "/include/mfem/";
414  }
415  else if (occa::io::exists(MFEM_SOURCE_DIR))
416  {
417  mfemDir = MFEM_SOURCE_DIR;
418  }
419  else
420  {
421  MFEM_ABORT("Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");
422  }
423 
424  occa::io::addLibraryPath("mfem", mfemDir);
425  occa::loadKernels("mfem");
426 #else
427  MFEM_CONTRACT_VAR(dev);
428  MFEM_ABORT("the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");
429 #endif
430 }
431 
432 static void CeedDeviceSetup(const char* ceed_spec)
433 {
434 #ifdef MFEM_USE_CEED
435  CeedInit(ceed_spec, &internal::ceed);
436  const char *ceed_backend;
437  CeedGetResource(internal::ceed, &ceed_backend);
438  if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec, "/cpu/self"))
439  {
440  mfem::out << std::endl << "WARNING!!!\n"
441  "libCEED is not using the requested backend!!!\n"
442  "WARNING!!!\n" << std::endl;
443  }
444 #else
445  MFEM_CONTRACT_VAR(ceed_spec);
446 #endif
447 }
448 
449 void Device::Setup(const int device)
450 {
451  MFEM_VERIFY(ngpu == -1, "the mfem::Device is already configured!");
452 
453  ngpu = 0;
454  dev = device;
455 #ifndef MFEM_USE_CUDA
456  MFEM_VERIFY(!Allows(Backend::CUDA_MASK),
457  "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");
458 #endif
459 #ifndef MFEM_USE_HIP
460  MFEM_VERIFY(!Allows(Backend::HIP_MASK),
461  "the HIP backends require MFEM built with MFEM_USE_HIP=YES");
462 #endif
463 #ifndef MFEM_USE_RAJA
464  MFEM_VERIFY(!Allows(Backend::RAJA_MASK),
465  "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");
466 #endif
467 #ifndef MFEM_USE_OPENMP
468  MFEM_VERIFY(!Allows(Backend::OMP|Backend::RAJA_OMP),
469  "the OpenMP and RAJA OpenMP backends require MFEM built with"
470  " MFEM_USE_OPENMP=YES");
471 #endif
472 #ifndef MFEM_USE_CEED
473  MFEM_VERIFY(!Allows(Backend::CEED_MASK),
474  "the CEED backends require MFEM built with MFEM_USE_CEED=YES");
475 #else
477  "Only one CEED backend can be enabled at a time!");
478 #endif
479  if (Allows(Backend::CUDA)) { CudaDeviceSetup(dev, ngpu); }
480  if (Allows(Backend::HIP)) { HipDeviceSetup(dev, ngpu); }
481  if (Allows(Backend::RAJA_CUDA)) { RajaDeviceSetup(dev, ngpu); }
482  // The check for MFEM_USE_OCCA is in the function OccaDeviceSetup().
483  if (Allows(Backend::OCCA_MASK)) { OccaDeviceSetup(dev); }
485  {
486  if (!device_option)
487  {
488  CeedDeviceSetup("/cpu/self");
489  }
490  else
491  {
492  CeedDeviceSetup(device_option);
493  }
494  }
496  {
497  if (!device_option)
498  {
499  // NOTE: libCEED's /gpu/cuda/gen backend is non-deterministic!
500  CeedDeviceSetup("/gpu/cuda/gen");
501  }
502  else
503  {
504  CeedDeviceSetup(device_option);
505  }
506  }
507  if (Allows(Backend::DEBUG)) { ngpu = 1; }
508 }
509 
510 } // mfem
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:56
Host memory; aligned at 64 bytes.
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition: device.hpp:53
~Device()
Destructor.
Definition: device.cpp:150
Device memory; using CUDA or HIP *Malloc and *Free.
Device memory; using Umpire.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition: device.hpp:63
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES...
Definition: device.hpp:45
Biwise-OR of all HIP backends.
Definition: device.hpp:83
Host memory; allocated from a &quot;host-debug&quot; pool.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition: device.cpp:236
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:48
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition: cuda.cpp:155
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
Definition: device.cpp:170
Host memory; aligned at 32 bytes.
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used...
Definition: device.cpp:65
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition: device.hpp:30
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES...
Definition: device.hpp:51
Number of backends: from (1 &lt;&lt; 0) to (1 &lt;&lt; (NUM_BACKENDS-1)).
Definition: device.hpp:76
MFEM backends.
Definition: device.hpp:26
void Destroy()
Free all the device memories.
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES...
Definition: device.hpp:42
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the &quot;device&quot; code-path (using separate host/device memory pools and host &lt;-&gt; device transfers) without any GPU hardware.
Definition: device.hpp:68
[host] Default CPU backend: sequential execution on each MPI rank.
Definition: device.hpp:33
Biwise-OR of all CUDA backends.
Definition: device.hpp:81
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:27
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers...
Definition: device.hpp:59
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition: device.hpp:35
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:234
MemoryManager mm
The (single) global memory manager object.
Host memory; using new[] and delete[].
Biwise-OR of all OCCA backends.
Definition: device.hpp:94
Biwise-OR of all RAJA backends.
Definition: device.hpp:92
Host memory; using Umpire.
Biwise-OR of all device backends.
Definition: device.hpp:89
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition: device.hpp:114
Bitwise-OR of all CEED backends.
Definition: device.hpp:87
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition: device.hpp:39
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition: device.hpp:37
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:57