MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
device.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_DEVICE_HPP
13 #define MFEM_DEVICE_HPP
14 
15 #include "globals.hpp"
16 #include "mem_manager.hpp"
17 
18 namespace mfem
19 {
20 
21 /// MFEM backends.
22 /** Individual backends will generally implement only a subset of the kernels
23  implemented by the default CPU backend. The goal of the backends is to
24  accelerate data-parallel portions of the code and they can use a device
25  memory space (e.g. GPUs) or share the memory space of the host (OpenMP). */
26 struct Backend
27 {
28  /** @brief In the documentation below, we use square brackets to indicate the
29  type of the backend: host or device. */
30  enum Id: unsigned long
31  {
32  /// [host] Default CPU backend: sequential execution on each MPI rank.
33  CPU = 1 << 0,
34  /// [host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
35  OMP = 1 << 1,
36  /// [device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
37  CUDA = 1 << 2,
38  /// [device] HIP backend. Enabled when MFEM_USE_HIP = YES.
39  HIP = 1 << 3,
40  /** @brief [host] RAJA CPU backend: sequential execution on each MPI rank.
41  Enabled when MFEM_USE_RAJA = YES. */
42  RAJA_CPU = 1 << 4,
43  /** @brief [host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES
44  and MFEM_USE_OPENMP = YES. */
45  RAJA_OMP = 1 << 5,
46  /** @brief [device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES
47  and MFEM_USE_CUDA = YES. */
48  RAJA_CUDA = 1 << 6,
49  /** @brief [host] OCCA CPU backend: sequential execution on each MPI rank.
50  Enabled when MFEM_USE_OCCA = YES. */
51  OCCA_CPU = 1 << 7,
52  /// [host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
53  OCCA_OMP = 1 << 8,
54  /** @brief [device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES
55  and MFEM_USE_CUDA = YES. */
56  OCCA_CUDA = 1 << 9,
57  /** @brief [host] CEED CPU backend. GPU backends can still be used, but
58  with expensive memory transfers. Enabled when MFEM_USE_CEED = YES. */
59  CEED_CPU = 1 << 10,
60  /** @brief [device] CEED CUDA backend working together with the CUDA
61  backend. Enabled when MFEM_USE_CEED = YES and MFEM_USE_CUDA = YES.
62  NOTE: The current default libCEED CUDA backend is non-deterministic! */
63  CEED_CUDA = 1 << 11,
64  /** @brief [device] CEED HIP backend working together with the HIP
65  backend. Enabled when MFEM_USE_CEED = YES and MFEM_USE_HIP = YES. */
66  CEED_HIP = 1 << 12,
67  /** @brief [device] Debug backend: host memory is READ/WRITE protected
68  while a device is in use. It allows to test the "device" code-path
69  (using separate host/device memory pools and host <-> device
70  transfers) without any GPU hardware. As 'DEBUG' is sometimes used
71  as a macro, `_DEVICE` has been added to avoid conflicts. */
72  DEBUG_DEVICE = 1 << 13
73  };
74 
75  /** @brief Additional useful constants. For example, the *_MASK constants can
76  be used with Device::Allows(). */
77  enum
78  {
79  /// Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
81 
82  /// Biwise-OR of all CPU backends
84  /// Biwise-OR of all CUDA backends
86  /// Biwise-OR of all HIP backends
88  /// Biwise-OR of all OpenMP backends
90  /// Bitwise-OR of all CEED backends
92  /// Biwise-OR of all device backends
94 
95  /// Biwise-OR of all RAJA backends
97  /// Biwise-OR of all OCCA backends
99  };
100 };
101 
102 
103 /** @brief The MFEM Device class abstracts hardware devices such as GPUs, as
104  well as programming models such as CUDA, OCCA, RAJA and OpenMP. */
105 /** This class represents a "virtual device" with the following properties:
106  - At most one object of this class can be constructed and that object is
107  controlled by its static methods.
108  - If no Device object is constructed, the static methods will use a default
109  global object which is never configured and always uses Backend::CPU.
110  - Once configured, the object cannot be re-configured during the program
111  lifetime.
112  - MFEM classes use this object to determine where (host or device) to
113  perform an operation and which backend implementation to use.
114  - Multiple backends can be configured at the same time; currently, a fixed
115  priority order is used to select a specific backend from the list of
116  configured backends. See the Backend class and the Configure() method in
117  this class for details. */
118 class Device
119 {
120 private:
121  friend class MemoryManager;
122  enum MODES {SEQUENTIAL, ACCELERATED};
123 
124  static bool device_env, mem_host_env, mem_device_env;
125  static Device device_singleton;
126 
127  MODES mode;
128  int dev = 0; ///< Device ID of the configured device.
129  int ngpu = -1; ///< Number of detected devices; -1: not initialized.
130  unsigned long backends; ///< Bitwise-OR of all configured backends.
131  /// Set to true during configuration, except in 'device_singleton'.
132  bool destroy_mm;
133  bool mpi_gpu_aware;
134 
135  MemoryType host_mem_type; ///< Current Host MemoryType
136  MemoryClass host_mem_class; ///< Current Host MemoryClass
137 
138  MemoryType device_mem_type; ///< Current Device MemoryType
139  MemoryClass device_mem_class; ///< Current Device MemoryClass
140 
141  char *device_option = NULL;
142  Device(Device const&);
143  void operator=(Device const&);
144  static Device& Get() { return device_singleton; }
145 
146  /// Setup switcher based on configuration settings
147  void Setup(const int dev = 0);
148 
149  void MarkBackend(Backend::Id b) { backends |= b; }
150 
151  void UpdateMemoryTypeAndClass();
152 
153  /// Enable the use of the configured device in the code that follows.
154  /** After this call MFEM classes will use the backend kernels whenever
155  possible, transferring data automatically to the device, if necessary.
156 
157  If the only configured backend is the default host CPU one, the device
158  will remain disabled.
159 
160  If the device is actually enabled, this method will also update the
161  current host/device MemoryType and MemoryClass. */
162  static void Enable();
163 
164 public:
165  /** @brief Default constructor. Unless Configure() is called later, the
166  default Backend::CPU will be used. */
167  /** @note At most one Device object can be constructed during the lifetime of
168  a program.
169  @note This object should be destroyed after all other MFEM objects that
170  use the Device are destroyed. */
171  Device();
172 
173  /** @brief Construct a Device and configure it based on the @a device string.
174  See Configure() for more details. */
175  /** @note At most one Device object can be constructed during the lifetime of
176  a program.
177  @note This object should be destroyed after all other MFEM objects that
178  use the Device are destroyed. */
179  Device(const std::string &device, const int dev = 0)
180  : mode(Device::SEQUENTIAL),
181  backends(Backend::CPU),
182  destroy_mm(false),
183  mpi_gpu_aware(false),
184  host_mem_type(MemoryType::HOST),
185  host_mem_class(MemoryClass::HOST),
186  device_mem_type(MemoryType::HOST),
187  device_mem_class(MemoryClass::HOST)
188  { Configure(device, dev); }
189 
190  /// Destructor.
191  ~Device();
192 
193  /// Configure the Device backends.
194  /** The string parameter @a device must be a comma-separated list of backend
195  string names (see below). The @a dev argument specifies the ID of the
196  actual devices (e.g. GPU) to use.
197  * The available backends are described by the Backend class.
198  * The string name of a backend is the lowercase version of the
199  Backend::Id enumeration constant with '_' replaced by '-', e.g. the
200  string name of 'RAJA_CPU' is 'raja-cpu'. The string name of the debug
201  backend (Backend::Id 'DEBUG_DEVICE') is exceptionally set to 'debug'.
202  * The 'cpu' backend is always enabled with lowest priority.
203  * The current backend priority from highest to lowest is:
204  'ceed-cuda', 'occa-cuda', 'raja-cuda', 'cuda',
205  'ceed-hip', 'hip', 'debug',
206  'occa-omp', 'raja-omp', 'omp',
207  'ceed-cpu', 'occa-cpu', 'raja-cpu', 'cpu'.
208  * Multiple backends can be configured at the same time.
209  * Only one 'occa-*' backend can be configured at a time.
210  * The backend 'occa-cuda' enables the 'cuda' backend unless 'raja-cuda'
211  is already enabled.
212  * The backend 'occa-omp' enables the 'omp' backend (if MFEM was built
213  with MFEM_USE_OPENMP=YES) unless 'raja-omp' is already enabled.
214  * Only one 'ceed-*' backend can be configured at a time.
215  * The backend 'ceed-cpu' delegates to a libCEED CPU backend the setup and
216  evaluation of the operator.
217  * The backend 'ceed-cuda' delegates to a libCEED CUDA backend the setup
218  and evaluation of operators and enables the 'cuda' backend to avoid
219  transfers between host and device.
220  * The backend 'ceed-hip' delegates to a libCEED HIP backend the setup
221  and evaluation of operators and enables the 'hip' backend to avoid
222  transfers between host and device.
223  * The 'debug' backend should not be combined with other device backends.
224  */
225  void Configure(const std::string &device, const int dev = 0);
226 
227  /// Print the configuration of the MFEM virtual device object.
228  void Print(std::ostream &out = mfem::out);
229 
230  /// Return true if Configure() has been called previously.
231  static inline bool IsConfigured() { return Get().ngpu >= 0; }
232 
233  /// Return true if an actual device (e.g. GPU) has been configured.
234  static inline bool IsAvailable() { return Get().ngpu > 0; }
235 
236  /// Return true if any backend other than Backend::CPU is enabled.
237  static inline bool IsEnabled() { return Get().mode == ACCELERATED; }
238 
239  /// The opposite of IsEnabled().
240  static inline bool IsDisabled() { return !IsEnabled(); }
241 
242  /** @brief Return true if any of the backends in the backend mask, @a b_mask,
243  are allowed. */
244  /** This method can be used with any of the Backend::Id constants, the
245  Backend::*_MASK, or combinations of those. */
246  static inline bool Allows(unsigned long b_mask)
247  { return Get().backends & b_mask; }
248 
249  /** @brief Get the current Host MemoryType. This is the MemoryType used by
250  most MFEM classes when allocating memory used on the host.
251  */
252  static inline MemoryType GetHostMemoryType() { return Get().host_mem_type; }
253 
254  /** @brief Get the current Host MemoryClass. This is the MemoryClass used
255  by most MFEM host Memory objects. */
256  static inline MemoryClass GetHostMemoryClass() { return Get().host_mem_class; }
257 
258  /** @brief Get the current Device MemoryType. This is the MemoryType used by
259  most MFEM classes when allocating memory to be used with device kernels.
260  */
261  static inline MemoryType GetDeviceMemoryType() { return Get().device_mem_type; }
262 
263  /// (DEPRECATED) Equivalent to GetDeviceMemoryType().
264  /** @deprecated Use GetDeviceMemoryType() instead. */
265  static inline MemoryType GetMemoryType() { return Get().device_mem_type; }
266 
267  /** @brief Get the current Device MemoryClass. This is the MemoryClass used
268  by most MFEM device kernels to access Memory objects. */
269  static inline MemoryClass GetDeviceMemoryClass() { return Get().device_mem_class; }
270 
271  /// (DEPRECATED) Equivalent to GetDeviceMemoryClass().
272  /** @deprecated Use GetDeviceMemoryClass() instead. */
273  static inline MemoryClass GetMemoryClass() { return Get().device_mem_class; }
274 
275  static void SetGPUAwareMPI(const bool force = true)
276  { Get().mpi_gpu_aware = force; }
277 
278  static bool GetGPUAwareMPI() { return Get().mpi_gpu_aware; }
279 };
280 
281 
282 // Inline Memory access functions using the mfem::Device DeviceMemoryClass or
283 // the mfem::Device HostMemoryClass.
284 
285 /** @brief Return the memory class to be used by the functions Read(), Write(),
286  and ReadWrite(), while setting the device use flag in @a mem, if @a on_dev
287  is true. */
288 template <typename T>
289 MemoryClass GetMemoryClass(const Memory<T> &mem, bool on_dev)
290 {
291  if (!on_dev)
292  {
294  }
295  else
296  {
297  mem.UseDevice(true);
299  }
300 }
301 
302 /** @brief Get a pointer for read access to @a mem with the mfem::Device's
303  DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
304  HostMemoryClass, otherwise. */
305 /** Also, if @a on_dev = true, the device flag of @a mem will be set. */
306 template <typename T>
307 inline const T *Read(const Memory<T> &mem, int size, bool on_dev = true)
308 {
309  return mem.Read(GetMemoryClass(mem, on_dev), size);
310 }
311 
312 /** @brief Shortcut to Read(const Memory<T> &mem, int size, false) */
313 template <typename T>
314 inline const T *HostRead(const Memory<T> &mem, int size)
315 {
316  return mfem::Read(mem, size, false);
317 }
318 
319 /** @brief Get a pointer for write access to @a mem with the mfem::Device's
320  DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
321  HostMemoryClass, otherwise. */
322 /** Also, if @a on_dev = true, the device flag of @a mem will be set. */
323 template <typename T>
324 inline T *Write(Memory<T> &mem, int size, bool on_dev = true)
325 {
326  return mem.Write(GetMemoryClass(mem, on_dev), size);
327 }
328 
329 /** @brief Shortcut to Write(const Memory<T> &mem, int size, false) */
330 template <typename T>
331 inline T *HostWrite(Memory<T> &mem, int size)
332 {
333  return mfem::Write(mem, size, false);
334 }
335 
336 /** @brief Get a pointer for read+write access to @a mem with the mfem::Device's
337  DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
338  HostMemoryClass, otherwise. */
339 /** Also, if @a on_dev = true, the device flag of @a mem will be set. */
340 template <typename T>
341 inline T *ReadWrite(Memory<T> &mem, int size, bool on_dev = true)
342 {
343  return mem.ReadWrite(GetMemoryClass(mem, on_dev), size);
344 }
345 
346 /** @brief Shortcut to ReadWrite(Memory<T> &mem, int size, false) */
347 template <typename T>
348 inline T *HostReadWrite(Memory<T> &mem, int size)
349 {
350  return mfem::ReadWrite(mem, size, false);
351 }
352 
353 } // mfem
354 
355 #endif // MFEM_DEVICE_HPP
static MemoryClass GetMemoryClass()
(DEPRECATED) Equivalent to GetDeviceMemoryClass().
Definition: device.hpp:273
static bool IsAvailable()
Return true if an actual device (e.g. GPU) has been configured.
Definition: device.hpp:234
static bool IsConfigured()
Return true if Configure() has been called previously.
Definition: device.hpp:231
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:56
static MemoryClass GetHostMemoryClass()
Get the current Host MemoryClass. This is the MemoryClass used by most MFEM host Memory objects...
Definition: device.hpp:256
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition: device.hpp:53
~Device()
Destructor.
Definition: device.cpp:154
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition: device.hpp:63
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES...
Definition: device.hpp:45
Biwise-OR of all HIP backends.
Definition: device.hpp:87
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true...
Definition: device.hpp:324
Device(const std::string &device, const int dev=0)
Construct a Device and configure it based on the device string. See Configure() for more details...
Definition: device.hpp:179
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition: device.cpp:261
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:48
static bool IsEnabled()
Return true if any backend other than Backend::CPU is enabled.
Definition: device.hpp:237
static bool IsDisabled()
The opposite of IsEnabled().
Definition: device.hpp:240
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
Definition: device.cpp:186
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used...
Definition: device.cpp:69
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition: device.hpp:30
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES...
Definition: device.hpp:51
Number of backends: from (1 &lt;&lt; 0) to (1 &lt;&lt; (NUM_BACKENDS-1)).
Definition: device.hpp:80
static MemoryClass GetDeviceMemoryClass()
Get the current Device MemoryClass. This is the MemoryClass used by most MFEM device kernels to acces...
Definition: device.hpp:269
double b
Definition: lissajous.cpp:42
MFEM backends.
Definition: device.hpp:26
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:261
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition: device.hpp:265
Biwise-OR of all OpenMP backends.
Definition: device.hpp:89
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES...
Definition: device.hpp:42
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true...
Definition: device.hpp:307
[host] Default CPU backend: sequential execution on each MPI rank.
Definition: device.hpp:33
Biwise-OR of all CUDA backends.
Definition: device.hpp:85
Biwise-OR of all CPU backends.
Definition: device.hpp:83
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:331
static void SetGPUAwareMPI(const bool force=true)
Definition: device.hpp:275
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition: device.hpp:252
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:28
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers...
Definition: device.hpp:59
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition: device.hpp:35
static bool GetGPUAwareMPI()
Definition: device.hpp:278
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:246
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:314
MemoryClass GetMemoryClass(const Memory< T > &mem, bool on_dev)
Return the memory class to be used by the functions Read(), Write(), and ReadWrite(), while setting the device use flag in mem, if on_dev is true.
Definition: device.hpp:289
T * ReadWrite(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read+write access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true, or the mfem::Device&#39;s HostMemoryClass, otherwise.
Definition: device.hpp:341
Host memory; using new[] and delete[].
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
Definition: device.hpp:66
Biwise-OR of all OCCA backends.
Definition: device.hpp:98
Class used by MFEM to store pointers to host and/or device memory.
Biwise-OR of all RAJA backends.
Definition: device.hpp:96
bool UseDevice() const
Read the internal device flag.
Biwise-OR of all device backends.
Definition: device.hpp:93
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition: device.hpp:118
Bitwise-OR of all CEED backends.
Definition: device.hpp:91
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition: device.hpp:39
T * HostReadWrite(Memory< T > &mem, int size)
Shortcut to ReadWrite(Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:348
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition: device.hpp:37
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:58
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the &quot;device&quot; code-path (using separate host/device memory pools and host &lt;-&gt; device transfers) without any GPU hardware. As &#39;DEBUG&#39; is sometimes used as a macro, _DEVICE has been added to avoid conflicts.
Definition: device.hpp:72