MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
device.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_DEVICE_HPP
13#define MFEM_DEVICE_HPP
14
15#include "enzyme.hpp"
16#include "globals.hpp"
17#include "mem_manager.hpp"
18
19namespace mfem
20{
21
22/// MFEM backends.
23/** Individual backends will generally implement only a subset of the kernels
24 implemented by the default CPU backend. The goal of the backends is to
25 accelerate data-parallel portions of the code and they can use a device
26 memory space (e.g. GPUs) or share the memory space of the host (OpenMP). */
27struct Backend
28{
29 /** @brief In the documentation below, we use square brackets to indicate the
30 type of the backend: host or device. */
31 enum Id: unsigned long
32 {
33 /// [host] Default CPU backend: sequential execution on each MPI rank.
34 CPU = 1 << 0,
35 /// [host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
36 OMP = 1 << 1,
37 /// [device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
38 CUDA = 1 << 2,
39 /// [device] HIP backend. Enabled when MFEM_USE_HIP = YES.
40 HIP = 1 << 3,
41 /** @brief [host] RAJA CPU backend: sequential execution on each MPI rank.
42 Enabled when MFEM_USE_RAJA = YES. */
43 RAJA_CPU = 1 << 4,
44 /** @brief [host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES
45 and MFEM_USE_OPENMP = YES. */
46 RAJA_OMP = 1 << 5,
47 /** @brief [device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES
48 and MFEM_USE_CUDA = YES. */
49 RAJA_CUDA = 1 << 6,
50 /** @brief [device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES
51 and MFEM_USE_HIP = YES. */
52 RAJA_HIP = 1 << 7,
53 /** @brief [host] OCCA CPU backend: sequential execution on each MPI rank.
54 Enabled when MFEM_USE_OCCA = YES. */
55 OCCA_CPU = 1 << 8,
56 /// [host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
57 OCCA_OMP = 1 << 9,
58 /** @brief [device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES
59 and MFEM_USE_CUDA = YES. */
60 OCCA_CUDA = 1 << 10,
61 /** @brief [host] CEED CPU backend. GPU backends can still be used, but
62 with expensive memory transfers. Enabled when MFEM_USE_CEED = YES. */
63 CEED_CPU = 1 << 11,
64 /** @brief [device] CEED CUDA backend working together with the CUDA
65 backend. Enabled when MFEM_USE_CEED = YES and MFEM_USE_CUDA = YES.
66 NOTE: The current default libCEED CUDA backend is non-deterministic! */
67 CEED_CUDA = 1 << 12,
68 /** @brief [device] CEED HIP backend working together with the HIP
69 backend. Enabled when MFEM_USE_CEED = YES and MFEM_USE_HIP = YES. */
70 CEED_HIP = 1 << 13,
71 /** @brief [device] Debug backend: host memory is READ/WRITE protected
72 while a device is in use. It allows to test the "device" code-path
73 (using separate host/device memory pools and host <-> device
74 transfers) without any GPU hardware. As 'DEBUG' is sometimes used
75 as a macro, `_DEVICE` has been added to avoid conflicts. */
76 DEBUG_DEVICE = 1 << 14
77 };
78
79 /** @brief Additional useful constants. For example, the *_MASK constants can
80 be used with Device::Allows(). */
81 enum
82 {
83 /// Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
85
86 /// Biwise-OR of all CPU backends
88 /// Biwise-OR of all CUDA backends
90 /// Biwise-OR of all HIP backends
92 /// Biwise-OR of all OpenMP backends
94 /// Bitwise-OR of all CEED backends
96 /// Biwise-OR of all device backends
98
99 /// Biwise-OR of all RAJA backends
101 /// Biwise-OR of all OCCA backends
103 };
104};
105
106
107/** @brief The MFEM Device class abstracts hardware devices such as GPUs, as
108 well as programming models such as CUDA, OCCA, RAJA and OpenMP. */
109/** This class represents a "virtual device" with the following properties:
110 - At most one object of this class can be constructed and that object is
111 controlled by its static methods.
112 - If no Device object is constructed, the static methods will use a default
113 global object which is never configured and always uses Backend::CPU.
114 - Once configured, the object cannot be re-configured during the program
115 lifetime.
116 - MFEM classes use this object to determine where (host or device) to
117 perform an operation and which backend implementation to use.
118 - Multiple backends can be configured at the same time; currently, a fixed
119 priority order is used to select a specific backend from the list of
120 configured backends. See the Backend class and the Configure() method in
121 this class for details. */
123{
124private:
125 friend class MemoryManager;
126 enum MODES {SEQUENTIAL, ACCELERATED};
127
128 static bool device_env, mem_host_env, mem_device_env, mem_types_set;
129 MFEM_ENZYME_INACTIVE static MFEM_EXPORT Device device_singleton;
130
131 MODES mode = Device::SEQUENTIAL;
132 int dev = 0; ///< Device ID of the configured device.
133 int ngpu = -1; ///< Number of detected devices; -1: not initialized.
134 /// Bitwise-OR of all configured backends.
135 unsigned long backends = Backend::CPU;
136 /// Set to true during configuration, except in 'device_singleton'.
137 bool destroy_mm = false;
138 bool mpi_gpu_aware = false;
139
140 MemoryType host_mem_type = MemoryType::HOST; ///< Current Host MemoryType
141 MemoryClass host_mem_class = MemoryClass::HOST; ///< Current Host MemoryClass
142
143 /// Current Device MemoryType
144 MemoryType device_mem_type = MemoryType::HOST;
145 /// Current Device MemoryClass
146 MemoryClass device_mem_class = MemoryClass::HOST;
147
148 char *device_option = NULL;
149 Device(Device const&);
150 void operator=(Device const&);
151 static Device& Get() { return device_singleton; }
152
153 /// Setup switcher based on configuration settings
154 void Setup(const int device_id = 0);
155
156 void MarkBackend(Backend::Id b) { backends |= b; }
157
158 void UpdateMemoryTypeAndClass();
159
160 /// Enable the use of the configured device in the code that follows.
161 /** After this call MFEM classes will use the backend kernels whenever
162 possible, transferring data automatically to the device, if necessary.
163
164 If the only configured backend is the default host CPU one, the device
165 will remain disabled.
166
167 If the device is actually enabled, this method will also update the
168 current host/device MemoryType and MemoryClass. */
169 static void Enable();
170
171public:
172 /** @brief Default constructor. Unless Configure() is called later, the
173 default Backend::CPU will be used. */
174 /** @note At most one Device object can be constructed during the lifetime of
175 a program.
176 @note This object should be destroyed after all other MFEM objects that
177 use the Device are destroyed. */
178 Device();
179
180 /** @brief Construct a Device and configure it based on the @a device string.
181 See Configure() for more details. */
182 /** @note At most one Device object can be constructed during the lifetime of
183 a program.
184 @note This object should be destroyed after all other MFEM objects that
185 use the Device are destroyed. */
186 Device(const std::string &device, const int dev = 0)
187 { Configure(device, dev); }
188
189 /// Destructor.
190 ~Device();
191
192 /// Configure the Device backends.
193 /** The string parameter @a device must be a comma-separated list of backend
194 string names (see below). The @a dev argument specifies the ID of the
195 actual devices (e.g. GPU) to use.
196 * The available backends are described by the Backend class.
197 * The string name of a backend is the lowercase version of the
198 Backend::Id enumeration constant with '_' replaced by '-', e.g. the
199 string name of 'RAJA_CPU' is 'raja-cpu'. The string name of the debug
200 backend (Backend::Id 'DEBUG_DEVICE') is exceptionally set to 'debug'.
201 * The 'cpu' backend is always enabled with lowest priority.
202 * The current backend priority from highest to lowest is:
203 'ceed-cuda', 'occa-cuda', 'raja-cuda', 'cuda',
204 'ceed-hip', 'hip', 'debug',
205 'occa-omp', 'raja-omp', 'omp',
206 'ceed-cpu', 'occa-cpu', 'raja-cpu', 'cpu'.
207 * Multiple backends can be configured at the same time.
208 * Only one 'occa-*' backend can be configured at a time.
209 * The backend 'occa-cuda' enables the 'cuda' backend unless 'raja-cuda'
210 is already enabled.
211 * The backend 'occa-omp' enables the 'omp' backend (if MFEM was built
212 with MFEM_USE_OPENMP=YES) unless 'raja-omp' is already enabled.
213 * Only one 'ceed-*' backend can be configured at a time.
214 * The backend 'ceed-cpu' delegates to a libCEED CPU backend the setup and
215 evaluation of the operator.
216 * The backend 'ceed-cuda' delegates to a libCEED CUDA backend the setup
217 and evaluation of operators and enables the 'cuda' backend to avoid
218 transfers between host and device.
219 * The backend 'ceed-hip' delegates to a libCEED HIP backend the setup
220 and evaluation of operators and enables the 'hip' backend to avoid
221 transfers between host and device.
222 * The 'debug' backend should not be combined with other device backends.
223 */
224 void Configure(const std::string &device, const int dev = 0);
225
226 /// Set the default host and device MemoryTypes, @a h_mt and @a d_mt.
227 /** The host and device MemoryTypes are also set to be dual to each other.
228
229 These two MemoryType%s are used by most MFEM classes when allocating
230 memory used on host and device, respectively.
231
232 This method can only be called before Device construction and
233 configuration, and the specified memory types must be compatible with
234 the subsequent Device configuration. */
235 static void SetMemoryTypes(MemoryType h_mt, MemoryType d_mt);
236
237 /// Print the configuration of the MFEM virtual device object.
238 void Print(std::ostream &out = mfem::out);
239
240 /// Return true if Configure() has been called previously.
241 static inline bool IsConfigured() { return Get().ngpu >= 0; }
242
243 /// Return true if an actual device (e.g. GPU) has been configured.
244 static inline bool IsAvailable() { return Get().ngpu > 0; }
245
246 /// Return true if any backend other than Backend::CPU is enabled.
247 static inline bool IsEnabled() { return Get().mode == ACCELERATED; }
248
249 /// The opposite of IsEnabled().
250 static inline bool IsDisabled() { return !IsEnabled(); }
251
252 /// Get the device id of the configured device.
253 static inline int GetId() { return Get().dev; }
254
255 /** @brief Return true if any of the backends in the backend mask, @a b_mask,
256 are allowed. */
257 /** This method can be used with any of the Backend::Id constants, the
258 Backend::*_MASK, or combinations of those. */
259 static inline bool Allows(unsigned long b_mask)
260 { return Get().backends & b_mask; }
261
262 /** @brief Get the current Host MemoryType. This is the MemoryType used by
263 most MFEM classes when allocating memory used on the host.
264 */
265 static inline MemoryType GetHostMemoryType() { return Get().host_mem_type; }
266
267 /** @brief Get the current Host MemoryClass. This is the MemoryClass used
268 by most MFEM host Memory objects. */
269 static inline MemoryClass GetHostMemoryClass() { return Get().host_mem_class; }
270
271 /** @brief Get the current Device MemoryType. This is the MemoryType used by
272 most MFEM classes when allocating memory to be used with device kernels.
273 */
274 static inline MemoryType GetDeviceMemoryType() { return Get().device_mem_type; }
275
276 /// (DEPRECATED) Equivalent to GetDeviceMemoryType().
277 /** @deprecated Use GetDeviceMemoryType() instead. */
278 static inline MemoryType GetMemoryType() { return Get().device_mem_type; }
279
280 /** @brief Get the current Device MemoryClass. This is the MemoryClass used
281 by most MFEM device kernels to access Memory objects. */
282 static inline MemoryClass GetDeviceMemoryClass() { return Get().device_mem_class; }
283
284 /// (DEPRECATED) Equivalent to GetDeviceMemoryClass().
285 /** @deprecated Use GetDeviceMemoryClass() instead. */
286 static inline MemoryClass GetMemoryClass() { return Get().device_mem_class; }
287
288 static void SetGPUAwareMPI(const bool force = true)
289 { Get().mpi_gpu_aware = force; }
290
291 static bool GetGPUAwareMPI() { return Get().mpi_gpu_aware; }
292};
293
294
295// Inline Memory access functions using the mfem::Device DeviceMemoryClass or
296// the mfem::Device HostMemoryClass.
297
298/** @brief Return the memory class to be used by the functions Read(), Write(),
299 and ReadWrite(), while setting the device use flag in @a mem, if @a on_dev
300 is true. */
301template <typename T>
302MemoryClass GetMemoryClass(const Memory<T> &mem, bool on_dev)
303{
304 if (!on_dev)
305 {
307 }
308 else
309 {
310 mem.UseDevice(true);
312 }
313}
314
315/** @brief Get a pointer for read access to @a mem with the mfem::Device's
316 DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
317 HostMemoryClass, otherwise. */
318/** Also, if @a on_dev = true, the device flag of @a mem will be set. */
319template <typename T>
320inline const T *Read(const Memory<T> &mem, int size, bool on_dev = true)
321{
322 return mem.Read(GetMemoryClass(mem, on_dev), size);
323}
324
325/** @brief Shortcut to Read(const Memory<T> &mem, int size, false) */
326template <typename T>
327inline const T *HostRead(const Memory<T> &mem, int size)
328{
329 return mfem::Read(mem, size, false);
330}
331
332/** @brief Get a pointer for write access to @a mem with the mfem::Device's
333 DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
334 HostMemoryClass, otherwise. */
335/** Also, if @a on_dev = true, the device flag of @a mem will be set. */
336template <typename T>
337inline T *Write(Memory<T> &mem, int size, bool on_dev = true)
338{
339 return mem.Write(GetMemoryClass(mem, on_dev), size);
340}
341
342/** @brief Shortcut to Write(const Memory<T> &mem, int size, false) */
343template <typename T>
344inline T *HostWrite(Memory<T> &mem, int size)
345{
346 return mfem::Write(mem, size, false);
347}
348
349/** @brief Get a pointer for read+write access to @a mem with the mfem::Device's
350 DeviceMemoryClass, if @a on_dev = true, or the mfem::Device's
351 HostMemoryClass, otherwise. */
352/** Also, if @a on_dev = true, the device flag of @a mem will be set. */
353template <typename T>
354inline T *ReadWrite(Memory<T> &mem, int size, bool on_dev = true)
355{
356 return mem.ReadWrite(GetMemoryClass(mem, on_dev), size);
357}
358
359/** @brief Shortcut to ReadWrite(Memory<T> &mem, int size, false) */
360template <typename T>
361inline T *HostReadWrite(Memory<T> &mem, int size)
362{
363 return mfem::ReadWrite(mem, size, false);
364}
365
366} // mfem
367
368#endif // MFEM_DEVICE_HPP
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition device.hpp:123
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition device.hpp:265
~Device()
Destructor.
Definition device.cpp:151
static MemoryClass GetMemoryClass()
(DEPRECATED) Equivalent to GetDeviceMemoryClass().
Definition device.hpp:286
static MemoryClass GetHostMemoryClass()
Get the current Host MemoryClass. This is the MemoryClass used by most MFEM host Memory objects.
Definition device.hpp:269
static bool IsAvailable()
Return true if an actual device (e.g. GPU) has been configured.
Definition device.hpp:244
static void SetGPUAwareMPI(const bool force=true)
Definition device.hpp:288
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition device.hpp:278
static bool IsConfigured()
Return true if Configure() has been called previously.
Definition device.hpp:241
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
Definition device.cpp:183
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition device.cpp:286
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259
Device(const std::string &device, const int dev=0)
Construct a Device and configure it based on the device string. See Configure() for more details.
Definition device.hpp:186
static void SetMemoryTypes(MemoryType h_mt, MemoryType d_mt)
Set the default host and device MemoryTypes, h_mt and d_mt.
Definition device.cpp:263
static bool GetGPUAwareMPI()
Definition device.hpp:291
static MemoryClass GetDeviceMemoryClass()
Get the current Device MemoryClass. This is the MemoryClass used by most MFEM device kernels to acces...
Definition device.hpp:282
static int GetId()
Get the device id of the configured device.
Definition device.hpp:253
static bool IsEnabled()
Return true if any backend other than Backend::CPU is enabled.
Definition device.hpp:247
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used.
Definition device.cpp:73
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
static bool IsDisabled()
The opposite of IsEnabled().
Definition device.hpp:250
Class used by MFEM to store pointers to host and/or device memory.
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
bool UseDevice() const
Read the internal device flag.
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
real_t b
Definition lissajous.cpp:42
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:320
MemoryClass GetMemoryClass(const Memory< T > &mem, bool on_dev)
Return the memory class to be used by the functions Read(), Write(), and ReadWrite(),...
Definition device.hpp:302
T * HostReadWrite(Memory< T > &mem, int size)
Shortcut to ReadWrite(Memory<T> &mem, int size, false)
Definition device.hpp:361
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory<T> &mem, int size, false)
Definition device.hpp:327
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:337
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
MemoryClass
Memory classes identify sets of memory types.
T * ReadWrite(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read+write access to mem with the mfem::Device's DeviceMemoryClass,...
Definition device.hpp:354
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory<T> &mem, int size, false)
Definition device.hpp:344
MemoryType
Memory types supported by MFEM.
@ HOST
Host memory; using new[] and delete[].
MFEM backends.
Definition device.hpp:28
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition device.hpp:32
@ RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES.
Definition device.hpp:46
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition device.hpp:49
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
Definition device.hpp:76
@ RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES.
Definition device.hpp:43
@ OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition device.hpp:36
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition device.hpp:40
@ OCCA_OMP
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition device.hpp:57
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
Definition device.hpp:52
@ OCCA_CUDA
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition device.hpp:60
@ CEED_CPU
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers....
Definition device.hpp:63
@ OCCA_CPU
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES.
Definition device.hpp:55
@ CEED_CUDA
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition device.hpp:67
@ CPU
[host] Default CPU backend: sequential execution on each MPI rank.
Definition device.hpp:34
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition device.hpp:38
@ CEED_HIP
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
Definition device.hpp:70
@ RAJA_MASK
Biwise-OR of all RAJA backends.
Definition device.hpp:100
@ DEVICE_MASK
Biwise-OR of all device backends.
Definition device.hpp:97
@ CEED_MASK
Bitwise-OR of all CEED backends.
Definition device.hpp:95
@ OCCA_MASK
Biwise-OR of all OCCA backends.
Definition device.hpp:102
@ HIP_MASK
Biwise-OR of all HIP backends.
Definition device.hpp:91
@ CPU_MASK
Biwise-OR of all CPU backends.
Definition device.hpp:87
@ NUM_BACKENDS
Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
Definition device.hpp:84
@ CUDA_MASK
Biwise-OR of all CUDA backends.
Definition device.hpp:89
@ OMP_MASK
Biwise-OR of all OpenMP backends.
Definition device.hpp:93