MFEM  v4.1.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
device.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_DEVICE_HPP
13 #define MFEM_DEVICE_HPP
14 
15 #include "globals.hpp"
16 #include "mem_manager.hpp"
17 
18 namespace mfem
19 {
20 
21 /// MFEM backends.
22 /** Individual backends will generally implement only a subset of the kernels
23  implemented by the default CPU backend. The goal of the backends is to
24  accelerate data-parallel portions of the code and they can use a device
25  memory space (e.g. GPUs) or share the memory space of the host (OpenMP). */
26 struct Backend
27 {
28  /** @brief In the documentation below, we use square brackets to indicate the
29  type of the backend: host or device. */
30  enum Id: unsigned long
31  {
32  /// [host] Default CPU backend: sequential execution on each MPI rank.
33  CPU = 1 << 0,
34  /// [host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
35  OMP = 1 << 1,
36  /// [device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
37  CUDA = 1 << 2,
38  /// [device] HIP backend. Enabled when MFEM_USE_HIP = YES.
39  HIP = 1 << 3,
40  /** @brief [host] RAJA CPU backend: sequential execution on each MPI rank.
41  Enabled when MFEM_USE_RAJA = YES. */
42  RAJA_CPU = 1 << 4,
43  /** @brief [host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES
44  and MFEM_USE_OPENMP = YES. */
45  RAJA_OMP = 1 << 5,
46  /** @brief [device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES
47  and MFEM_USE_CUDA = YES. */
48  RAJA_CUDA = 1 << 6,
49  /** @brief [host] OCCA CPU backend: sequential execution on each MPI rank.
50  Enabled when MFEM_USE_OCCA = YES. */
51  OCCA_CPU = 1 << 7,
52  /// [host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
53  OCCA_OMP = 1 << 8,
54  /** @brief [device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES
55  and MFEM_USE_CUDA = YES. */
56  OCCA_CUDA = 1 << 9,
57  /** @brief [host] CEED CPU backend. GPU backends can still be used, but
58  with expensive memory transfers. Enabled when MFEM_USE_CEED = YES. */
59  CEED_CPU = 1 << 10,
60  /** @brief [device] CEED CUDA backend working together with the CUDA
61  backend. Enabled when MFEM_USE_CEED = YES and MFEM_USE_CUDA = YES.
62  NOTE: The current default libCEED GPU backend is non-deterministic! */
63  CEED_CUDA = 1 << 11,
64  /** @brief [device] Debug backend: host memory is READ/WRITE protected
65  while a device is in use. It allows to test the "device" code-path
66  (using separate host/device memory pools and host <-> device
67  transfers) without any GPU hardware. */
68  DEBUG = 1 << 12
69  };
70 
71  /** @brief Additional useful constants. For example, the *_MASK constants can
72  be used with Device::Allows(). */
73  enum
74  {
75  /// Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
77 
78  /// Biwise-OR of all CPU backends
80  /// Biwise-OR of all CUDA backends
82  /// Biwise-OR of all HIP backends
84  /// Biwise-OR of all OpenMP backends
86  /// Bitwise-OR of all CEED backends
88  /// Biwise-OR of all device backends
90 
91  /// Biwise-OR of all RAJA backends
93  /// Biwise-OR of all OCCA backends
95  };
96 };
97 
98 
99 /** @brief The MFEM Device class abstracts hardware devices such as GPUs, as
100  well as programming models such as CUDA, OCCA, RAJA and OpenMP. */
101 /** This class represents a "virtual device" with the following properties:
102  - At most one object of this class can be constructed and that object is
103  controlled by its static methods.
104  - If no Device object is constructed, the static methods will use a default
105  global object which is never configured and always uses Backend::CPU.
106  - Once configured, the object cannot be re-configured during the program
107  lifetime.
108  - MFEM classes use this object to determine where (host or device) to
109  perform an operation and which backend implementation to use.
110  - Multiple backends can be configured at the same time; currently, a fixed
111  priority order is used to select a specific backend from the list of
112  configured backends. See the Backend class and the Configure() method in
113  this class for details. */
114 class Device
115 {
116 private:
117  friend class MemoryManager;
118  enum MODES {SEQUENTIAL, ACCELERATED};
119 
120  static bool device_env, mem_host_env, mem_device_env;
121  static Device device_singleton;
122 
123  MODES mode;
124  int dev = 0; ///< Device ID of the configured device.
125  int ngpu = -1; ///< Number of detected devices; -1: not initialized.
126  unsigned long backends; ///< Bitwise-OR of all configured backends.
127  /// Set to true during configuration, except in 'device_singleton'.
128  bool destroy_mm;
129  bool mpi_gpu_aware;
130 
131  MemoryType host_mem_type; ///< Current Host MemoryType
132  MemoryClass host_mem_class; ///< Current Host MemoryClass
133 
134  MemoryType device_mem_type; ///< Current Device MemoryType
135  MemoryClass device_mem_class; ///< Current Device MemoryClass
136 
137  char *device_option = NULL;
138  Device(Device const&);
139  void operator=(Device const&);
140  static Device& Get() { return device_singleton; }
141 
142  /// Setup switcher based on configuration settings
143  void Setup(const int dev = 0);
144 
145  void MarkBackend(Backend::Id b) { backends |= b; }
146 
147  void UpdateMemoryTypeAndClass();
148 
149  /// Enable the use of the configured device in the code that follows.
150  /** After this call MFEM classes will use the backend kernels whenever
151  possible, transferring data automatically to the device, if necessary.
152 
153  If the only configured backend is the default host CPU one, the device
154  will remain disabled.
155 
156  If the device is actually enabled, this method will also update the
157  current host/device MemoryType and MemoryClass. */
158  static void Enable();
159 
160 public:
161  /** @brief Default constructor. Unless Configure() is called later, the
162  default Backend::CPU will be used. */
163  /** @note At most one Device object can be constructed during the lifetime of
164  a program.
165  @note This object should be destroyed after all other MFEM objects that
166  use the Device are destroyed. */
167  Device();
168 
169  /** @brief Construct a Device and configure it based on the @a device string.
170  See Configure() for more details. */
171  /** @note At most one Device object can be constructed during the lifetime of
172  a program.
173  @note This object should be destroyed after all other MFEM objects that
174  use the Device are destroyed. */
175  Device(const std::string &device, const int dev = 0)
176  : mode(Device::SEQUENTIAL),
177  backends(Backend::CPU),
178  destroy_mm(false),
179  mpi_gpu_aware(false),
180  host_mem_type(MemoryType::HOST),
181  host_mem_class(MemoryClass::HOST),
182  device_mem_type(MemoryType::HOST),
183  device_mem_class(MemoryClass::HOST)
184  { Configure(device, dev); }
185 
186  /// Destructor.
187  ~Device();
188 
189  /// Configure the Device backends.
190  /** The string parameter @a device must be a comma-separated list of backend
191  string names (see below). The @a dev argument specifies the ID of the
192  actual devices (e.g. GPU) to use.
193  * The available backends are described by the Backend class.
194  * The string name of a backend is the lowercase version of the
195  Backend::Id enumeration constant with '_' replaced by '-', e.g. the
196  string name of 'RAJA_CPU' is 'raja-cpu'.
197  * The 'cpu' backend is always enabled with lowest priority.
198  * The current backend priority from highest to lowest is:
199  'ceed-cuda', 'occa-cuda', 'raja-cuda', 'cuda', 'hip', 'debug',
200  'occa-omp', 'raja-omp', 'omp',
201  'ceed-cpu', 'occa-cpu', 'raja-cpu', 'cpu'.
202  * Multiple backends can be configured at the same time.
203  * Only one 'occa-*' backend can be configured at a time.
204  * The backend 'occa-cuda' enables the 'cuda' backend unless 'raja-cuda'
205  is already enabled.
206  * The backend 'ceed-cpu' delegates to a libCEED CPU backend the setup and
207  evaluation of the operator.
208  * The backend 'ceed-cuda' delegates to a libCEED CUDA backend the setup
209  and evaluation of the operator and enables the 'cuda' backend to avoid
210  transfer between host and device.
211  * The 'debug' backend should not be combined with other device backends.
212  */
213  void Configure(const std::string &device, const int dev = 0);
214 
215  /// Print the configuration of the MFEM virtual device object.
216  void Print(std::ostream &out = mfem::out);
217 
218  /// Return true if Configure() has been called previously.
219  static inline bool IsConfigured() { return Get().ngpu >= 0; }
220 
221  /// Return true if an actual device (e.g. GPU) has been configured.
222  static inline bool IsAvailable() { return Get().ngpu > 0; }
223 
224  /// Return true if any backend other than Backend::CPU is enabled.
225  static inline bool IsEnabled() { return Get().mode == ACCELERATED; }
226 
227  /// The opposite of IsEnabled().
228  static inline bool IsDisabled() { return !IsEnabled(); }
229 
230  /** @brief Return true if any of the backends in the backend mask, @a b_mask,
231  are allowed. */
232  /** This method can be used with any of the Backend::Id constants, the
233  Backend::*_MASK, or combinations of those. */
234  static inline bool Allows(unsigned long b_mask)
235  { return Get().backends & b_mask; }
236 
237  /** @brief Get the current Host MemoryType. This is the MemoryType used by
238  most MFEM classes when allocating memory used on the host.
239  */
240  static inline MemoryType GetHostMemoryType() { return Get().host_mem_type; }
241 
242  /** @brief Get the current Host MemoryClass. This is the MemoryClass used
243  by most MFEM host Memory objects. */
244  static inline MemoryClass GetHostMemoryClass() { return Get().host_mem_class; }
245 
246  /** @brief Get the current Device MemoryType. This is the MemoryType used by
247  most MFEM classes when allocating memory to be used with device kernels.
248  */
249  static inline MemoryType GetDeviceMemoryType() { return Get().device_mem_type; }
250 
251  /// (DEPRECATED) Equivalent to GetDeviceMemoryType().
252  /** @deprecated Use GetDeviceMemoryType() instead. */
253  static inline MemoryType GetMemoryType() { return Get().device_mem_type; }
254 
255  /** @brief Get the current Device MemoryClass. This is the MemoryClass used
256  by most MFEM device kernels to access Memory objects. */
257  static inline MemoryClass GetDeviceMemoryClass() { return Get().device_mem_class; }
258 
259  /// (DEPRECATED) Equivalent to GetDeviceMemoryClass().
260  /** @deprecated Use GetDeviceMemoryClass() instead. */
261  static inline MemoryClass GetMemoryClass() { return Get().device_mem_class; }
262 
263  static void SetGPUAwareMPI(const bool force = true)
264  { Get().mpi_gpu_aware = force; }
265 
266  static bool GetGPUAwareMPI() { return Get().mpi_gpu_aware; }
267 };
268 
269 
270 // Inline Memory access functions using the mfem::Device DeviceMemoryClass or
271 // the mfem::Device HostMemoryClass.
272 
273 /** @brief Return the memory class to be used by the functions Read(), Write(),
274  and ReadWrite(), while setting the device use flag in @a mem, if @a on_dev
275  is true. */
276 template <typename T>
277 MemoryClass GetMemoryClass(const Memory<T> &mem, bool on_dev)
278 {
279  if (!on_dev)
280  {
282  }
283  else
284  {
285  mem.UseDevice(true);
287  }
288 }
289 
290 /** @brief Get a pointer for read access to @a mem with the mfem::Device's
291  DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
292  HostMemoryClass, otherwise. */
293 /** Also, if @a on_dev = true, the device flag of @a mem will be set. */
294 template <typename T>
295 inline const T *Read(const Memory<T> &mem, int size, bool on_dev = true)
296 {
297  return mem.Read(GetMemoryClass(mem, on_dev), size);
298 }
299 
300 /** @brief Shortcut to Read(const Memory<T> &mem, int size, false) */
301 template <typename T>
302 inline const T *HostRead(const Memory<T> &mem, int size)
303 {
304  return mfem::Read(mem, size, false);
305 }
306 
307 /** @brief Get a pointer for write access to @a mem with the mfem::Device's
308  DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
309  HostMemoryClass, otherwise. */
310 /** Also, if @a on_dev = true, the device flag of @a mem will be set. */
311 template <typename T>
312 inline T *Write(Memory<T> &mem, int size, bool on_dev = true)
313 {
314  return mem.Write(GetMemoryClass(mem, on_dev), size);
315 }
316 
317 /** @brief Shortcut to Write(const Memory<T> &mem, int size, false) */
318 template <typename T>
319 inline T *HostWrite(Memory<T> &mem, int size)
320 {
321  return mfem::Write(mem, size, false);
322 }
323 
324 /** @brief Get a pointer for read+write access to @a mem with the mfem::Device's
325  DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
326  HostMemoryClass, otherwise. */
327 /** Also, if @a on_dev = true, the device flag of @a mem will be set. */
328 template <typename T>
329 inline T *ReadWrite(Memory<T> &mem, int size, bool on_dev = true)
330 {
331  return mem.ReadWrite(GetMemoryClass(mem, on_dev), size);
332 }
333 
334 /** @brief Shortcut to ReadWrite(Memory<T> &mem, int size, false) */
335 template <typename T>
336 inline T *HostReadWrite(Memory<T> &mem, int size)
337 {
338  return mfem::ReadWrite(mem, size, false);
339 }
340 
341 } // mfem
342 
343 #endif // MFEM_DEVICE_HPP
static MemoryClass GetMemoryClass()
(DEPRECATED) Equivalent to GetDeviceMemoryClass().
Definition: device.hpp:261
static bool IsAvailable()
Return true if an actual device (e.g. GPU) has been configured.
Definition: device.hpp:222
static bool IsConfigured()
Return true if Configure() has been called previously.
Definition: device.hpp:219
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:56
static MemoryClass GetHostMemoryClass()
Get the current Host MemoryClass. This is the MemoryClass used by most MFEM host Memory objects...
Definition: device.hpp:244
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition: device.hpp:53
~Device()
Destructor.
Definition: device.cpp:150
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition: device.hpp:63
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES...
Definition: device.hpp:45
Biwise-OR of all HIP backends.
Definition: device.hpp:83
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true...
Definition: device.hpp:312
Device(const std::string &device, const int dev=0)
Construct a Device and configure it based on the device string. See Configure() for more details...
Definition: device.hpp:175
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition: device.cpp:236
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:48
static bool IsEnabled()
Return true if any backend other than Backend::CPU is enabled.
Definition: device.hpp:225
static bool IsDisabled()
The opposite of IsEnabled().
Definition: device.hpp:228
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
Definition: device.cpp:170
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used...
Definition: device.cpp:65
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition: device.hpp:30
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES...
Definition: device.hpp:51
The memory manager class.
Number of backends: from (1 &lt;&lt; 0) to (1 &lt;&lt; (NUM_BACKENDS-1)).
Definition: device.hpp:76
static MemoryClass GetDeviceMemoryClass()
Get the current Device MemoryClass. This is the MemoryClass used by most MFEM device kernels to acces...
Definition: device.hpp:257
double b
Definition: lissajous.cpp:42
MFEM backends.
Definition: device.hpp:26
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:249
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition: device.hpp:253
Biwise-OR of all OpenMP backends.
Definition: device.hpp:85
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES...
Definition: device.hpp:42
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the &quot;device&quot; code-path (using separate host/device memory pools and host &lt;-&gt; device transfers) without any GPU hardware.
Definition: device.hpp:68
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true...
Definition: device.hpp:295
[host] Default CPU backend: sequential execution on each MPI rank.
Definition: device.hpp:33
Biwise-OR of all CUDA backends.
Definition: device.hpp:81
Biwise-OR of all CPU backends.
Definition: device.hpp:79
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:319
static void SetGPUAwareMPI(const bool force=true)
Definition: device.hpp:263
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition: device.hpp:240
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:27
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers...
Definition: device.hpp:59
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition: device.hpp:35
static bool GetGPUAwareMPI()
Definition: device.hpp:266
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:234
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:302
MemoryClass GetMemoryClass(const Memory< T > &mem, bool on_dev)
Return the memory class to be used by the functions Read(), Write(), and ReadWrite(), while setting the device use flag in mem, if on_dev is true.
Definition: device.hpp:277
T * ReadWrite(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read+write access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true, or the mfem::Device&#39;s HostMemoryClass, otherwise.
Definition: device.hpp:329
Host memory; using new[] and delete[].
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
Biwise-OR of all OCCA backends.
Definition: device.hpp:94
Class used by MFEM to store pointers to host and/or device memory.
Biwise-OR of all RAJA backends.
Definition: device.hpp:92
bool UseDevice() const
Read the internal device flag.
Biwise-OR of all device backends.
Definition: device.hpp:89
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition: device.hpp:114
Bitwise-OR of all CEED backends.
Definition: device.hpp:87
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition: device.hpp:39
T * HostReadWrite(Memory< T > &mem, int size)
Shortcut to ReadWrite(Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:336
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition: device.hpp:37
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:57