23#include <unordered_map>
36occa::device occaDevice;
58 "ceed-cuda",
"occa-cuda",
"raja-cuda",
"cuda",
59 "ceed-hip",
"raja-hip",
"hip",
"debug",
60 "occa-omp",
"raja-omp",
"omp",
61 "ceed-cpu",
"occa-cpu",
"raja-cpu",
"cpu"
68Device Device::device_singleton;
69bool Device::device_env =
false;
70bool Device::mem_host_env =
false;
71bool Device::mem_device_env =
false;
72bool Device::mem_types_set =
false;
76 if (
GetEnv(
"MFEM_MEMORY") && !mem_host_env && !mem_device_env)
78 std::string mem_backend(
GetEnv(
"MFEM_MEMORY"));
79 if (mem_backend ==
"host")
85 else if (mem_backend ==
"host32")
91 else if (mem_backend ==
"host64")
97 else if (mem_backend ==
"umpire")
106 else if (mem_backend ==
"debug")
117 || mem_backend ==
"cuda"
120 || mem_backend ==
"hip"
126 mem_device_env =
true;
129 else if (mem_backend ==
"uvm")
132 mem_device_env =
true;
138 MFEM_ABORT(
"Unknown memory backend!");
143 if (
GetEnv(
"MFEM_DEVICE"))
145 std::string device(
GetEnv(
"MFEM_DEVICE"));
150 if (
GetEnv(
"MFEM_GPU_AWARE_MPI"))
161 if ( device_env && !destroy_mm) {
return; }
162 if (!device_env && destroy_mm && !mem_host_env)
166 for (
auto entry : internal::ceed_basis_map)
168 CeedBasisDestroy(&entry.second);
170 internal::ceed_basis_map.clear();
171 for (
auto entry : internal::ceed_restr_map)
173 CeedElemRestrictionDestroy(&entry.second);
175 internal::ceed_restr_map.clear();
177 CeedDestroy(&internal::ceed);
195 std::memcpy((
void*)
this, &Get(),
sizeof(
Device));
196 Get().destroy_mm =
false;
200 std::map<std::string, Backend::Id> bmap;
203 bmap[internal::backend_name[i]] = internal::backend_list[i];
216#elif defined(MFEM_USE_CUDA)
228 std::string device_option;
229 std::string::size_type beg = 0, end;
232 end = device.find(
',', beg);
233 end = (end != std::string::npos) ? end : device.size();
234 const std::string bname = device.substr(beg, end - beg);
235 const auto option = bname.find(
':');
236 const std::string backend = (option != std::string::npos) ?
237 bname.substr(0, option) : bname;
238 const auto it = bmap.find(backend);
239 MFEM_VERIFY(it != bmap.end(),
"Invalid backend name: '" << backend <<
'\'');
240 Get().MarkBackend(it->second);
241 if (option != std::string::npos)
243 device_option += bname.substr(option);
245 if (end == device.size()) {
break; }
261#ifdef MFEM_USE_OPENMP
269 Get().Setup(device_option, device_id);
272 Get().UpdateMemoryTypeAndClass(device_option);
275 if (
this != &Get()) { std::memcpy((
void*)
this, &Get(),
sizeof(
Device)); }
281#if defined(HYPRE_USING_GPU) && (MFEM_HYPRE_VERSION >= 23100)
285 if (HYPRE_Initialized())
299 if (mem_host_env || mem_device_env || device_env) {
return; }
301 MFEM_VERIFY(!
IsConfigured(),
"the default MemoryTypes can only be set before"
302 " Device construction and configuration");
304 "invalid host MemoryType, h_mt = " << (
int)h_mt);
306 "invalid device MemoryType, d_mt = " << (
int)d_mt
307 <<
" (h_mt = " << (
int)h_mt <<
')');
309 Get().host_mem_type = h_mt;
310 Get().device_mem_type = d_mt;
311 mem_types_set =
true;
319 os <<
"Device configuration: ";
320 bool add_comma =
false;
323 if (backends & internal::backend_list[i])
325 if (add_comma) { os <<
','; }
327 os << internal::backend_name[i];
334 const char *ceed_backend;
335 CeedGetResource(internal::ceed, &ceed_backend);
336 os <<
"libCEED backend: " << ceed_backend <<
'\n';
339 os <<
"Memory configuration: "
343 os << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];
349 os <<
"\nUse GPU-aware MPI: " << (
GetGPUAwareMPI() ?
"yes" :
"no");
355void Device::UpdateMemoryTypeAndClass(
const std::string &device_option)
360#ifdef MFEM_USE_UMPIRE
362 if (!mem_host_env && !mem_types_set)
379 switch (host_mem_type)
391 else if (!mem_types_set)
393#ifndef MFEM_USE_UMPIRE
404 if (device && device_option.find(
":uvm") != std::string::npos)
418 "invalid device memory configuration!");
427 if (Get().ngpu >= 0) {
return Get().ngpu; }
428#if defined(MFEM_USE_CUDA)
430#elif defined(MFEM_USE_HIP)
432 MFEM_GPU_CHECK(hipGetDeviceCount(&ngpu));
435 MFEM_ABORT(
"Unable to query number of available devices without"
436 " MFEM_USE_CUDA or MFEM_USE_HIP!");
441static void CudaDeviceSetup(
const int dev,
int &ngpu)
445 MFEM_VERIFY(ngpu > 0,
"No CUDA device found!");
446 MFEM_GPU_CHECK(cudaSetDevice(dev));
448 MFEM_CONTRACT_VAR(dev);
449 MFEM_CONTRACT_VAR(ngpu);
453static void HipDeviceSetup(
const int dev,
int &ngpu)
456 MFEM_GPU_CHECK(hipGetDeviceCount(&ngpu));
457 MFEM_VERIFY(ngpu > 0,
"No HIP device found!");
458 MFEM_GPU_CHECK(hipSetDevice(dev));
460 MFEM_CONTRACT_VAR(dev);
461 MFEM_CONTRACT_VAR(ngpu);
465static void RajaDeviceSetup(
const int dev,
int &ngpu)
468 CudaDeviceSetup(dev, ngpu);
469#elif defined(MFEM_USE_HIP)
470 HipDeviceSetup(dev, ngpu);
472 MFEM_CONTRACT_VAR(dev);
473 MFEM_CONTRACT_VAR(ngpu);
477static void OccaDeviceSetup(
const int dev)
483 if (cpu + omp + cuda > 1)
485 MFEM_ABORT(
"Only one OCCA backend can be configured at a time!");
490 std::string mode(
"mode: 'CUDA', device_id : ");
491 internal::occaDevice.setup(mode.append(1,
'0'+dev));
493 MFEM_ABORT(
"the OCCA CUDA backend requires OCCA built with CUDA!");
498#if OCCA_OPENMP_ENABLED
499 internal::occaDevice.setup(
"mode: 'OpenMP'");
501 MFEM_ABORT(
"the OCCA OpenMP backend requires OCCA built with OpenMP!");
506 internal::occaDevice.setup(
"mode: 'Serial'");
510 if (occa::io::exists(MFEM_INSTALL_DIR
"/include/mfem/"))
512 mfemDir = MFEM_INSTALL_DIR
"/include/mfem/";
514 else if (occa::io::exists(MFEM_SOURCE_DIR))
516 mfemDir = MFEM_SOURCE_DIR;
520 MFEM_ABORT(
"Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");
523 occa::io::addLibraryPath(
"mfem", mfemDir);
524 occa::loadKernels(
"mfem");
526 MFEM_CONTRACT_VAR(dev);
527 MFEM_ABORT(
"the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");
531static void CeedDeviceSetup(
const char* ceed_spec)
534 CeedInit(ceed_spec, &internal::ceed);
535 const char *ceed_backend;
536 CeedGetResource(internal::ceed, &ceed_backend);
537 if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec,
"/cpu/self") &&
538 strcmp(ceed_spec,
"/gpu/hip"))
541 "libCEED is not using the requested backend!!!\n"
542 "WARNING!!!\n" << std::endl;
545 CeedSetErrorHandler(internal::ceed, CeedErrorStore);
548 MFEM_CONTRACT_VAR(ceed_spec);
552void Device::Setup(
const std::string &device_option,
const int device_id)
554 MFEM_VERIFY(ngpu == -1,
"the mfem::Device is already configured!");
560 "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");
564 "the HIP backends require MFEM built with MFEM_USE_HIP=YES");
568 "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");
570#ifndef MFEM_USE_OPENMP
572 "the OpenMP and RAJA OpenMP backends require MFEM built with"
573 " MFEM_USE_OPENMP=YES");
577 "the CEED backends require MFEM built with MFEM_USE_CEED=YES");
582 { RajaDeviceSetup(dev, ngpu); }
590 MFEM_VERIFY(ceed_cpu + ceed_cuda + ceed_hip == 1,
591 "Only one CEED backend can be enabled at a time!");
595 const char *ceed_spec_search =
599 const char *ceed_spec_default =
603 std::string::size_type beg = device_option.find(ceed_spec_search), end;
604 if (beg == std::string::npos)
606 CeedDeviceSetup(ceed_spec_default);
610 end = device_option.find(
':', beg + 1);
611 end = (end != std::string::npos) ? end : device_option.size();
612 CeedDeviceSetup(device_option.substr(beg + 1, end - beg - 1).c_str());
622#if defined(MFEM_USE_CUDA)
623 struct cudaPointerAttributes attr;
625#if (CUDART_VERSION >= 11000)
626 MFEM_GPU_CHECK(cudaPointerGetAttributes(&attr, ptr));
628 cudaPointerGetAttributes(&attr, ptr);
629 if (
err != cudaSuccess)
637 case cudaMemoryTypeUnregistered:
640 case cudaMemoryTypeHost:
643 case cudaMemoryTypeDevice:
646 case cudaMemoryTypeManaged:
651#elif defined(MFEM_USE_HIP)
652 struct hipPointerAttribute_t attr;
654 hipError_t
err = hipPointerGetAttributes(&attr, ptr);
655 if (
err != hipSuccess)
657 if (
err == hipErrorInvalidValue)
668 else if (attr.isManaged)
672#if (HIP_VERSION_MAJOR >= 6)
673 else if (attr.type == hipMemoryTypeDevice)
675 else if (attr.memoryType == hipMemoryTypeDevice)
680#if (HIP_VERSION_MAJOR >= 6)
681 else if (attr.type == hipMemoryTypeHost)
683 else if (attr.memoryType == hipMemoryTypeHost)
688#if (HIP_VERSION_MAJOR >= 6)
689 else if (attr.type == hipMemoryTypeUnregistered)
695 MFEM_CONTRACT_VAR(ptr);
702#if defined(MFEM_USE_CUDA)
703 cudaMemGetInfo(free, total);
704#elif defined(MFEM_USE_HIP)
705 hipMemGetInfo(free, total);
721#if defined(MFEM_USE_CUDA)
723 cudaDeviceGetAttribute(&res, cudaDevAttrMultiProcessorCount, dev);
725#elif defined(MFEM_USE_HIP)
727 hipDeviceGetAttribute(&res, hipDeviceAttributeMultiprocessorCount, dev);
731 MFEM_CONTRACT_VAR(dev);
739#if defined(MFEM_USE_CUDA)
741#elif defined(MFEM_USE_HIP)
749#if defined(MFEM_USE_CUDA)
751 cudaDeviceGetAttribute(&res, cudaDevAttrWarpSize, dev);
753#elif defined(MFEM_USE_HIP)
755 hipDeviceGetAttribute(&res, hipDeviceAttributeWarpSize, dev);
759 MFEM_CONTRACT_VAR(dev);
767#if defined(MFEM_USE_CUDA)
769#elif defined(MFEM_USE_HIP)
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
static void DeviceMem(size_t *free, size_t *total)
Gets the free and total memory on the device.
void Configure(const std::string &device, const int device_id=0)
Configure the Device backends.
static void SetGPUAwareMPI(const bool force=true)
Manually set the status of GPU-aware MPI flag for use in MPI communication routines which have optimi...
static int NumMultiprocessors()
Same as NumMultiprocessors(int), for the currently active device.
static bool IsConfigured()
Return true if Configure() has been called previously.
static MemoryType QueryMemoryType(const void *ptr)
void Print(std::ostream &os=mfem::out)
Print the configuration of the MFEM virtual device object.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static void SetMemoryTypes(MemoryType h_mt, MemoryType d_mt)
Set the default host and device MemoryTypes, h_mt and d_mt.
static bool GetGPUAwareMPI()
Get the status of GPU-aware MPI flag.
static int WarpSize()
Same as WarpSize(int), for the currently active device.
static int GetDeviceCount()
Get the number of available devices (may be called before configuration).
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used.
static void InitDevice()
Configure HYPRE's compute and memory policy.
static void Finalize()
Finalize hypre (called automatically at program exit if Hypre::Init() has been called).
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
void Destroy()
Free all the device memories.
static bool IsFinalized()
Return true if MPI has been finalized.
static bool IsInitialized()
Return true if MPI has been initialized.
std::unordered_map< const BasisKey, CeedBasis, BasisHash > BasisMap
std::unordered_map< const RestrKey, CeedElemRestriction, RestrHash > RestrMap
MFEM_HOST_DEVICE tensor< T, n, n > dev(const tensor< T, n, n > &A)
Calculates the deviator of a matrix (rank-2 tensor)
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
const char * GetEnv(const char *name)
Wrapper for std::getenv.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
MemoryManager mm
The (single) global memory manager object.
int CuGetDeviceCount()
Get the number of CUDA devices.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
MemoryType
Memory types supported by MFEM.
@ HOST_32
Host memory; aligned at 32 bytes.
@ HOST_64
Host memory; aligned at 64 bytes.
@ HOST
Host memory; using new[] and delete[].
@ HOST_PINNED
Host memory: pinned (page-locked)
@ HOST_DEBUG
Host memory; allocated from a "host-debug" pool.
@ DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
@ RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES.
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
@ RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES.
@ OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
@ OCCA_OMP
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
@ OCCA_CUDA
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
@ CEED_CPU
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers....
@ OCCA_CPU
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES.
@ CEED_CUDA
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
@ CPU
[host] Default CPU backend: sequential execution on each MPI rank.
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
@ CEED_HIP
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
@ RAJA_MASK
Biwise-OR of all RAJA backends.
@ DEVICE_MASK
Biwise-OR of all device backends.
@ CEED_MASK
Bitwise-OR of all CEED backends.
@ OCCA_MASK
Biwise-OR of all OCCA backends.
@ HIP_MASK
Biwise-OR of all HIP backends.
@ NUM_BACKENDS
Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
@ CUDA_MASK
Biwise-OR of all CUDA backends.