21#include <unordered_map>
35occa::device occaDevice;
57 "ceed-cuda",
"occa-cuda",
"raja-cuda",
"cuda",
58 "ceed-hip",
"raja-hip",
"hip",
"debug",
59 "occa-omp",
"raja-omp",
"omp",
60 "ceed-cpu",
"occa-cpu",
"raja-cpu",
"cpu"
67Device Device::device_singleton;
68bool Device::device_env =
false;
69bool Device::mem_host_env =
false;
70bool Device::mem_device_env =
false;
71bool Device::mem_types_set =
false;
75 if (getenv(
"MFEM_MEMORY") && !mem_host_env && !mem_device_env)
77 std::string mem_backend(getenv(
"MFEM_MEMORY"));
78 if (mem_backend ==
"host")
84 else if (mem_backend ==
"host32")
90 else if (mem_backend ==
"host64")
96 else if (mem_backend ==
"umpire")
105 else if (mem_backend ==
"debug")
116 || mem_backend ==
"cuda"
119 || mem_backend ==
"hip"
125 mem_device_env =
true;
128 else if (mem_backend ==
"uvm")
131 mem_device_env =
true;
137 MFEM_ABORT(
"Unknown memory backend!");
142 if (getenv(
"MFEM_DEVICE"))
144 std::string device(getenv(
"MFEM_DEVICE"));
153 if ( device_env && !destroy_mm) {
return; }
154 if (!device_env && destroy_mm && !mem_host_env)
159 for (
auto entry : internal::ceed_basis_map)
161 CeedBasisDestroy(&entry.second);
163 internal::ceed_basis_map.clear();
164 for (
auto entry : internal::ceed_restr_map)
166 CeedElemRestrictionDestroy(&entry.second);
168 internal::ceed_restr_map.clear();
170 CeedDestroy(&internal::ceed);
175 Get().mode = SEQUENTIAL;
189 std::memcpy(
this, &Get(),
sizeof(
Device));
190 Get().destroy_mm =
false;
194 std::map<std::string, Backend::Id> bmap;
197 bmap[internal::backend_name[i]] = internal::backend_list[i];
199 std::string::size_type beg = 0, end, option;
202 end = device.find(
',', beg);
203 end = (end != std::string::npos) ? end : device.size();
204 const std::string bname = device.substr(beg, end - beg);
205 option = bname.find(
':');
206 if (option==std::string::npos)
208 const std::string backend = bname;
209 std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
210 MFEM_VERIFY(it != bmap.end(),
"invalid backend name: '" << backend <<
'\'');
211 Get().MarkBackend(it->second);
215 const std::string backend = bname.substr(0, option);
216 const std::string boption = bname.substr(option+1);
217 Get().device_option = strdup(boption.c_str());
218 std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
219 MFEM_VERIFY(it != bmap.end(),
"invalid backend name: '" << backend <<
'\'');
220 Get().MarkBackend(it->second);
222 if (end == device.size()) {
break; }
238#ifdef MFEM_USE_OPENMP
246 Get().Setup(device_id);
252 if (
this != &Get()) { std::memcpy(
this, &Get(),
sizeof(
Device)); }
268 if (mem_host_env || mem_device_env || device_env) {
return; }
270 MFEM_VERIFY(!
IsConfigured(),
"the default MemoryTypes can only be set before"
271 " Device construction and configuration");
273 "invalid host MemoryType, h_mt = " << (
int)h_mt);
275 "invalid device MemoryType, d_mt = " << (
int)d_mt
276 <<
" (h_mt = " << (
int)h_mt <<
')');
278 Get().host_mem_type = h_mt;
279 Get().device_mem_type = d_mt;
280 mem_types_set =
true;
288 os <<
"Device configuration: ";
289 bool add_comma =
false;
292 if (backends & internal::backend_list[i])
294 if (add_comma) { os <<
','; }
296 os << internal::backend_name[i];
303 const char *ceed_backend;
304 CeedGetResource(internal::ceed, &ceed_backend);
305 os <<
"libCEED backend: " << ceed_backend <<
'\n';
308 os <<
"Memory configuration: "
312 os << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];
317void Device::UpdateMemoryTypeAndClass()
323#ifdef MFEM_USE_UMPIRE
325 if (!mem_host_env && !mem_types_set)
342 switch (host_mem_type)
354 else if (!mem_types_set)
356#ifndef MFEM_USE_UMPIRE
367 if (device && device_option && !strcmp(device_option,
"uvm"))
381 "invalid device memory configuration!");
389 const bool accelerated = Get().backends & ~(
Backend::CPU);
390 if (accelerated) { Get().mode = Device::ACCELERATED;}
391 Get().UpdateMemoryTypeAndClass();
395static void DeviceSetup(
const int dev,
int &ngpu)
398 MFEM_VERIFY(ngpu > 0,
"No CUDA device found!");
399 MFEM_GPU_CHECK(cudaSetDevice(dev));
403static void CudaDeviceSetup(
const int dev,
int &ngpu)
406 DeviceSetup(dev, ngpu);
408 MFEM_CONTRACT_VAR(dev);
409 MFEM_CONTRACT_VAR(ngpu);
413static void HipDeviceSetup(
const int dev,
int &ngpu)
416 MFEM_GPU_CHECK(hipGetDeviceCount(&ngpu));
417 MFEM_VERIFY(ngpu > 0,
"No HIP device found!");
418 MFEM_GPU_CHECK(hipSetDevice(dev));
420 MFEM_CONTRACT_VAR(dev);
421 MFEM_CONTRACT_VAR(ngpu);
425static void RajaDeviceSetup(
const int dev,
int &ngpu)
428 if (ngpu <= 0) { DeviceSetup(dev, ngpu); }
429#elif defined(MFEM_USE_HIP)
430 HipDeviceSetup(dev, ngpu);
432 MFEM_CONTRACT_VAR(dev);
433 MFEM_CONTRACT_VAR(ngpu);
437static void OccaDeviceSetup(
const int dev)
443 if (cpu + omp + cuda > 1)
445 MFEM_ABORT(
"Only one OCCA backend can be configured at a time!");
450 std::string mode(
"mode: 'CUDA', device_id : ");
451 internal::occaDevice.setup(mode.append(1,
'0'+dev));
453 MFEM_ABORT(
"the OCCA CUDA backend requires OCCA built with CUDA!");
458#if OCCA_OPENMP_ENABLED
459 internal::occaDevice.setup(
"mode: 'OpenMP'");
461 MFEM_ABORT(
"the OCCA OpenMP backend requires OCCA built with OpenMP!");
466 internal::occaDevice.setup(
"mode: 'Serial'");
470 if (occa::io::exists(MFEM_INSTALL_DIR
"/include/mfem/"))
472 mfemDir = MFEM_INSTALL_DIR
"/include/mfem/";
474 else if (occa::io::exists(MFEM_SOURCE_DIR))
476 mfemDir = MFEM_SOURCE_DIR;
480 MFEM_ABORT(
"Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");
483 occa::io::addLibraryPath(
"mfem", mfemDir);
484 occa::loadKernels(
"mfem");
486 MFEM_CONTRACT_VAR(dev);
487 MFEM_ABORT(
"the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");
491static void CeedDeviceSetup(
const char* ceed_spec)
494 CeedInit(ceed_spec, &internal::ceed);
495 const char *ceed_backend;
496 CeedGetResource(internal::ceed, &ceed_backend);
497 if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec,
"/cpu/self") &&
498 strcmp(ceed_spec,
"/gpu/hip"))
501 "libCEED is not using the requested backend!!!\n"
502 "WARNING!!!\n" << std::endl;
505 CeedSetErrorHandler(internal::ceed, CeedErrorStore);
508 MFEM_CONTRACT_VAR(ceed_spec);
512void Device::Setup(
const int device_id)
514 MFEM_VERIFY(ngpu == -1,
"the mfem::Device is already configured!");
520 "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");
524 "the HIP backends require MFEM built with MFEM_USE_HIP=YES");
528 "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");
530#ifndef MFEM_USE_OPENMP
532 "the OpenMP and RAJA OpenMP backends require MFEM built with"
533 " MFEM_USE_OPENMP=YES");
537 "the CEED backends require MFEM built with MFEM_USE_CEED=YES");
542 MFEM_VERIFY(ceed_cpu + ceed_cuda + ceed_hip <= 1,
543 "Only one CEED backend can be enabled at a time!");
548 { RajaDeviceSetup(dev, ngpu); }
555 CeedDeviceSetup(
"/cpu/self");
559 CeedDeviceSetup(device_option);
567 CeedDeviceSetup(
"/gpu/cuda/gen");
571 CeedDeviceSetup(device_option);
578 CeedDeviceSetup(
"/gpu/hip");
582 CeedDeviceSetup(device_option);
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
static bool IsConfigured()
Return true if Configure() has been called previously.
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static void SetMemoryTypes(MemoryType h_mt, MemoryType d_mt)
Set the default host and device MemoryTypes, h_mt and d_mt.
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used.
static void InitDevice()
Configure HYPRE's compute and memory policy.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
void Destroy()
Free all the device memories.
std::unordered_map< const BasisKey, CeedBasis, BasisHash > BasisMap
std::unordered_map< const RestrKey, CeedElemRestriction, RestrHash > RestrMap
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
MemoryManager mm
The (single) global memory manager object.
int CuGetDeviceCount()
Get the number of CUDA devices.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
MemoryType
Memory types supported by MFEM.
@ HOST_32
Host memory; aligned at 32 bytes.
@ HOST_64
Host memory; aligned at 64 bytes.
@ HOST
Host memory; using new[] and delete[].
@ HOST_DEBUG
Host memory; allocated from a "host-debug" pool.
@ DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
@ RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES.
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
@ RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES.
@ OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
@ OCCA_OMP
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
@ OCCA_CUDA
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
@ CEED_CPU
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers....
@ OCCA_CPU
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES.
@ CEED_CUDA
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
@ CPU
[host] Default CPU backend: sequential execution on each MPI rank.
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
@ CEED_HIP
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
@ RAJA_MASK
Biwise-OR of all RAJA backends.
@ DEVICE_MASK
Biwise-OR of all device backends.
@ CEED_MASK
Bitwise-OR of all CEED backends.
@ OCCA_MASK
Biwise-OR of all OCCA backends.
@ HIP_MASK
Biwise-OR of all HIP backends.
@ NUM_BACKENDS
Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
@ CUDA_MASK
Biwise-OR of all CUDA backends.