21#include <unordered_map>
35occa::device occaDevice;
57 "ceed-cuda",
"occa-cuda",
"raja-cuda",
"cuda",
58 "ceed-hip",
"raja-hip",
"hip",
"debug",
59 "occa-omp",
"raja-omp",
"omp",
60 "ceed-cpu",
"occa-cpu",
"raja-cpu",
"cpu"
67Device Device::device_singleton;
68bool Device::device_env =
false;
69bool Device::mem_host_env =
false;
70bool Device::mem_device_env =
false;
71bool Device::mem_types_set =
false;
75 if (
GetEnv(
"MFEM_MEMORY") && !mem_host_env && !mem_device_env)
77 std::string mem_backend(
GetEnv(
"MFEM_MEMORY"));
78 if (mem_backend ==
"host")
84 else if (mem_backend ==
"host32")
90 else if (mem_backend ==
"host64")
96 else if (mem_backend ==
"umpire")
105 else if (mem_backend ==
"debug")
116 || mem_backend ==
"cuda"
119 || mem_backend ==
"hip"
125 mem_device_env =
true;
128 else if (mem_backend ==
"uvm")
131 mem_device_env =
true;
137 MFEM_ABORT(
"Unknown memory backend!");
142 if (
GetEnv(
"MFEM_DEVICE"))
144 std::string device(
GetEnv(
"MFEM_DEVICE"));
156 if ( device_env && !destroy_mm) {
return; }
157 if (!device_env && destroy_mm && !mem_host_env)
162 for (
auto entry : internal::ceed_basis_map)
164 CeedBasisDestroy(&entry.second);
166 internal::ceed_basis_map.clear();
167 for (
auto entry : internal::ceed_restr_map)
169 CeedElemRestrictionDestroy(&entry.second);
171 internal::ceed_restr_map.clear();
173 CeedDestroy(&internal::ceed);
178 Get().mode = SEQUENTIAL;
192 std::memcpy(
this, &Get(),
sizeof(
Device));
193 Get().destroy_mm =
false;
197 std::map<std::string, Backend::Id> bmap;
200 bmap[internal::backend_name[i]] = internal::backend_list[i];
202 std::string::size_type beg = 0, end, option;
205 end = device.find(
',', beg);
206 end = (end != std::string::npos) ? end : device.size();
207 const std::string bname = device.substr(beg, end - beg);
208 option = bname.find(
':');
209 if (option==std::string::npos)
211 const std::string backend = bname;
212 std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
213 MFEM_VERIFY(it != bmap.end(),
"invalid backend name: '" << backend <<
'\'');
214 Get().MarkBackend(it->second);
218 const std::string backend = bname.substr(0, option);
219 const std::string boption = bname.substr(option+1);
220 Get().device_option = strdup(boption.c_str());
221 std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
222 MFEM_VERIFY(it != bmap.end(),
"invalid backend name: '" << backend <<
'\'');
223 Get().MarkBackend(it->second);
225 if (end == device.size()) {
break; }
241#ifdef MFEM_USE_OPENMP
249 Get().Setup(device_id);
255 if (
this != &Get()) { std::memcpy(
this, &Get(),
sizeof(
Device)); }
261#if defined(HYPRE_USING_GPU) && (MFEM_HYPRE_VERSION >= 23100)
265 if (HYPRE_Initialized())
279 if (mem_host_env || mem_device_env || device_env) {
return; }
281 MFEM_VERIFY(!
IsConfigured(),
"the default MemoryTypes can only be set before"
282 " Device construction and configuration");
284 "invalid host MemoryType, h_mt = " << (
int)h_mt);
286 "invalid device MemoryType, d_mt = " << (
int)d_mt
287 <<
" (h_mt = " << (
int)h_mt <<
')');
289 Get().host_mem_type = h_mt;
290 Get().device_mem_type = d_mt;
291 mem_types_set =
true;
299 os <<
"Device configuration: ";
300 bool add_comma =
false;
303 if (backends & internal::backend_list[i])
305 if (add_comma) { os <<
','; }
307 os << internal::backend_name[i];
314 const char *ceed_backend;
315 CeedGetResource(internal::ceed, &ceed_backend);
316 os <<
"libCEED backend: " << ceed_backend <<
'\n';
319 os <<
"Memory configuration: "
323 os << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];
328void Device::UpdateMemoryTypeAndClass()
334#ifdef MFEM_USE_UMPIRE
336 if (!mem_host_env && !mem_types_set)
353 switch (host_mem_type)
365 else if (!mem_types_set)
367#ifndef MFEM_USE_UMPIRE
378 if (device && device_option && !strcmp(device_option,
"uvm"))
392 "invalid device memory configuration!");
400 const bool accelerated = Get().backends & ~(
Backend::CPU);
401 if (accelerated) { Get().mode = Device::ACCELERATED;}
402 Get().UpdateMemoryTypeAndClass();
406static void DeviceSetup(
const int dev,
int &ngpu)
409 MFEM_VERIFY(ngpu > 0,
"No CUDA device found!");
410 MFEM_GPU_CHECK(cudaSetDevice(dev));
414static void CudaDeviceSetup(
const int dev,
int &ngpu)
417 DeviceSetup(dev, ngpu);
419 MFEM_CONTRACT_VAR(dev);
420 MFEM_CONTRACT_VAR(ngpu);
424static void HipDeviceSetup(
const int dev,
int &ngpu)
427 MFEM_GPU_CHECK(hipGetDeviceCount(&ngpu));
428 MFEM_VERIFY(ngpu > 0,
"No HIP device found!");
429 MFEM_GPU_CHECK(hipSetDevice(dev));
431 MFEM_CONTRACT_VAR(dev);
432 MFEM_CONTRACT_VAR(ngpu);
436static void RajaDeviceSetup(
const int dev,
int &ngpu)
439 if (ngpu <= 0) { DeviceSetup(dev, ngpu); }
440#elif defined(MFEM_USE_HIP)
441 HipDeviceSetup(dev, ngpu);
443 MFEM_CONTRACT_VAR(dev);
444 MFEM_CONTRACT_VAR(ngpu);
448static void OccaDeviceSetup(
const int dev)
454 if (cpu + omp + cuda > 1)
456 MFEM_ABORT(
"Only one OCCA backend can be configured at a time!");
461 std::string mode(
"mode: 'CUDA', device_id : ");
462 internal::occaDevice.setup(mode.append(1,
'0'+dev));
464 MFEM_ABORT(
"the OCCA CUDA backend requires OCCA built with CUDA!");
469#if OCCA_OPENMP_ENABLED
470 internal::occaDevice.setup(
"mode: 'OpenMP'");
472 MFEM_ABORT(
"the OCCA OpenMP backend requires OCCA built with OpenMP!");
477 internal::occaDevice.setup(
"mode: 'Serial'");
481 if (occa::io::exists(MFEM_INSTALL_DIR
"/include/mfem/"))
483 mfemDir = MFEM_INSTALL_DIR
"/include/mfem/";
485 else if (occa::io::exists(MFEM_SOURCE_DIR))
487 mfemDir = MFEM_SOURCE_DIR;
491 MFEM_ABORT(
"Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");
494 occa::io::addLibraryPath(
"mfem", mfemDir);
495 occa::loadKernels(
"mfem");
497 MFEM_CONTRACT_VAR(dev);
498 MFEM_ABORT(
"the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");
502static void CeedDeviceSetup(
const char* ceed_spec)
505 CeedInit(ceed_spec, &internal::ceed);
506 const char *ceed_backend;
507 CeedGetResource(internal::ceed, &ceed_backend);
508 if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec,
"/cpu/self") &&
509 strcmp(ceed_spec,
"/gpu/hip"))
512 "libCEED is not using the requested backend!!!\n"
513 "WARNING!!!\n" << std::endl;
516 CeedSetErrorHandler(internal::ceed, CeedErrorStore);
519 MFEM_CONTRACT_VAR(ceed_spec);
523void Device::Setup(
const int device_id)
525 MFEM_VERIFY(ngpu == -1,
"the mfem::Device is already configured!");
531 "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");
535 "the HIP backends require MFEM built with MFEM_USE_HIP=YES");
539 "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");
541#ifndef MFEM_USE_OPENMP
543 "the OpenMP and RAJA OpenMP backends require MFEM built with"
544 " MFEM_USE_OPENMP=YES");
548 "the CEED backends require MFEM built with MFEM_USE_CEED=YES");
553 MFEM_VERIFY(ceed_cpu + ceed_cuda + ceed_hip <= 1,
554 "Only one CEED backend can be enabled at a time!");
559 { RajaDeviceSetup(dev, ngpu); }
566 CeedDeviceSetup(
"/cpu/self");
570 CeedDeviceSetup(device_option);
578 CeedDeviceSetup(
"/gpu/cuda/gen");
582 CeedDeviceSetup(device_option);
589 CeedDeviceSetup(
"/gpu/hip");
593 CeedDeviceSetup(device_option);
603#if defined(MFEM_USE_CUDA)
604 struct cudaPointerAttributes attr;
606#if (CUDART_VERSION >= 11000)
607 MFEM_GPU_CHECK(cudaPointerGetAttributes(&attr, ptr));
609 cudaPointerGetAttributes(&attr, ptr);
610 if (
err != cudaSuccess)
618 case cudaMemoryTypeUnregistered:
621 case cudaMemoryTypeHost:
624 case cudaMemoryTypeDevice:
627 case cudaMemoryTypeManaged:
632#elif defined(MFEM_USE_HIP)
633 struct hipPointerAttribute_t attr;
635 hipError_t
err = hipPointerGetAttributes(&attr, ptr);
636 if (
err != hipSuccess)
638 if (
err == hipErrorInvalidValue)
649 else if (attr.isManaged)
653#if (HIP_VERSION_MAJOR >= 6)
654 else if (attr.type == hipMemoryTypeDevice)
656 else if (attr.memoryType == hipMemoryTypeDevice)
661#if (HIP_VERSION_MAJOR >= 6)
662 else if (attr.type == hipMemoryTypeHost)
664 else if (attr.memoryType == hipMemoryTypeHost)
669#if (HIP_VERSION_MAJOR >= 6)
670 else if (attr.type == hipMemoryTypeUnregistered)
681#if defined(MFEM_USE_CUDA)
682 cudaMemGetInfo(free, total);
683#elif defined(MFEM_USE_HIP)
684 hipMemGetInfo(free, total);
700#if defined(MFEM_USE_CUDA)
702 cudaDeviceGetAttribute(&res, cudaDevAttrMultiProcessorCount, dev);
704#elif defined(MFEM_USE_HIP)
706 hipDeviceGetAttribute(&res, hipDeviceAttributeMultiprocessorCount, dev);
717#if defined(MFEM_USE_CUDA)
719#elif defined(MFEM_USE_HIP)
727#if defined(MFEM_USE_CUDA)
729 cudaDeviceGetAttribute(&res, cudaDevAttrWarpSize, dev);
731#elif defined(MFEM_USE_HIP)
733 hipDeviceGetAttribute(&res, hipDeviceAttributeWarpSize, dev);
744#if defined(MFEM_USE_CUDA)
746#elif defined(MFEM_USE_HIP)
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
static void DeviceMem(size_t *free, size_t *total)
Gets the free and total memory on the device.
static MemoryType QueryMemoryType(void *ptr)
static int NumMultiprocessors()
Same as NumMultiprocessors(int), for the currently active device.
static bool IsConfigured()
Return true if Configure() has been called previously.
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static void SetMemoryTypes(MemoryType h_mt, MemoryType d_mt)
Set the default host and device MemoryTypes, h_mt and d_mt.
static int WarpSize()
Same as WarpSize(int), for the currently active device.
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used.
static void InitDevice()
Configure HYPRE's compute and memory policy.
static void Finalize()
Finalize hypre (called automatically at program exit if Hypre::Init() has been called).
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
void Destroy()
Free all the device memories.
std::unordered_map< const BasisKey, CeedBasis, BasisHash > BasisMap
std::unordered_map< const RestrKey, CeedElemRestriction, RestrHash > RestrMap
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
const char * GetEnv(const char *name)
Wrapper for std::getenv.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
MemoryManager mm
The (single) global memory manager object.
int CuGetDeviceCount()
Get the number of CUDA devices.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
MemoryType
Memory types supported by MFEM.
@ HOST_32
Host memory; aligned at 32 bytes.
@ HOST_64
Host memory; aligned at 64 bytes.
@ HOST
Host memory; using new[] and delete[].
@ HOST_PINNED
Host memory: pinned (page-locked)
@ HOST_DEBUG
Host memory; allocated from a "host-debug" pool.
@ DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
@ RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES.
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
@ RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES.
@ OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
@ OCCA_OMP
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
@ OCCA_CUDA
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
@ CEED_CPU
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers....
@ OCCA_CPU
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES.
@ CEED_CUDA
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
@ CPU
[host] Default CPU backend: sequential execution on each MPI rank.
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
@ CEED_HIP
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
@ RAJA_MASK
Biwise-OR of all RAJA backends.
@ DEVICE_MASK
Biwise-OR of all device backends.
@ CEED_MASK
Bitwise-OR of all CEED backends.
@ OCCA_MASK
Biwise-OR of all OCCA backends.
@ HIP_MASK
Biwise-OR of all HIP backends.
@ NUM_BACKENDS
Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
@ CUDA_MASK
Biwise-OR of all CUDA backends.