4.1/device_8cpp_source.html

 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced

 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files

 // LICENSE and NOTICE for details. LLNL-CODE-806117.

 //

 // This file is part of the MFEM library. For more information and source code

 // availability visit https://mfem.org.

 //

 // MFEM is free software; you can redistribute it and/or modify it under the

 // terms of the BSD-3 license. We welcome feedback and contributions, see file

 // CONTRIBUTING.md for details.


 #include "forall.hpp"

 #include "occa.hpp"

 #ifdef MFEM_USE_CEED

 #include <ceed.h>

 #endif


 #include <string>

 #include <map>


 namespace mfem

 {


 // Place the following variables in the mfem::internal namespace, so that they

 // will not be included in the doxygen documentation.

 namespace internal

 {


 #ifdef MFEM_USE_OCCA

 // Default occa::device used by MFEM.

 occa::device occaDevice;

 #endif


 #ifdef MFEM_USE_CEED

 Ceed ceed = NULL;

 #endif


 // Backends listed by priority, high to low:

 static const Backend::Id backend_list[Backend::NUM_BACKENDS] =

 {

    Backend::CEED_CUDA, Backend::OCCA_CUDA, Backend::RAJA_CUDA, Backend::CUDA,

    Backend::HIP, Backend::DEBUG,

    Backend::OCCA_OMP, Backend::RAJA_OMP, Backend::OMP,

    Backend::CEED_CPU, Backend::OCCA_CPU, Backend::RAJA_CPU, Backend::CPU

 };


 // Backend names listed by priority, high to low:

 static const char *backend_name[Backend::NUM_BACKENDS] =

 {

    "ceed-cuda", "occa-cuda", "raja-cuda", "cuda",

    "hip", "debug",

    "occa-omp", "raja-omp", "omp",

    "ceed-cpu", "occa-cpu", "raja-cpu", "cpu"

 };


 } // namespace mfem::internal


 // Initialize the unique global Device variable.

 Device Device::device_singleton;

 bool Device::device_env = false;

 bool Device::mem_host_env = false;

 bool Device::mem_device_env = false;


 Device::Device() : mode(Device::SEQUENTIAL),

    backends(Backend::CPU),

    destroy_mm(false),

    mpi_gpu_aware(false),

    host_mem_type(MemoryType::HOST),

    host_mem_class(MemoryClass::HOST),

    device_mem_type(MemoryType::HOST),

    device_mem_class(MemoryClass::HOST)

 {

    if (getenv("MFEM_MEMORY") && !mem_host_env && !mem_device_env)

    {

       std::string mem_backend(getenv("MFEM_MEMORY"));

       if (mem_backend == "host")

       {

          mem_host_env = true;

          host_mem_type = MemoryType::HOST;

          device_mem_type = MemoryType::HOST;

       }

       else if (mem_backend == "host32")

       {

          mem_host_env = true;

          host_mem_type = MemoryType::HOST_32;

          device_mem_type = MemoryType::HOST_32;

       }

       else if (mem_backend == "host64")

       {

          mem_host_env = true;

          host_mem_type = MemoryType::HOST_64;

          device_mem_type = MemoryType::HOST_64;

       }

       else if (mem_backend == "umpire")

       {

          mem_host_env = true;

          host_mem_type = MemoryType::HOST_UMPIRE;

          // Note: device_mem_type will be set to MemoryType::DEVICE_UMPIRE only

          // when an actual device is configured -- this is done later in

          // Device::UpdateMemoryTypeAndClass().

          device_mem_type = MemoryType::HOST_UMPIRE;

       }

       else if (mem_backend == "debug")

       {

          mem_host_env = true;

          host_mem_type = MemoryType::HOST_DEBUG;

          // Note: device_mem_type will be set to MemoryType::DEVICE_DEBUG only

          // when an actual device is configured -- this is done later in

          // Device::UpdateMemoryTypeAndClass().

          device_mem_type = MemoryType::HOST_DEBUG;

       }

       else if (false

 #ifdef MFEM_USE_CUDA

                || mem_backend == "cuda"

 #endif

 #ifdef MFEM_USE_HIP

                || mem_backend == "hip"

 #endif

               )

       {

          mem_host_env = true;

          host_mem_type = MemoryType::HOST;

          mem_device_env = true;

          device_mem_type = MemoryType::DEVICE;

       }

       else if (mem_backend == "uvm")

       {

          mem_host_env = true;

          mem_device_env = true;

          host_mem_type = MemoryType::MANAGED;

          device_mem_type = MemoryType::MANAGED;

       }

       else

       {

          MFEM_ABORT("Unknown memory backend!");

       }

       mm.Configure(host_mem_type, device_mem_type);

    }


    if (getenv("MFEM_DEVICE"))

    {

       std::string device(getenv("MFEM_DEVICE"));

       Configure(device);

       device_env = true;

    }

 }


 Device::~Device()

 {

    if ( device_env && !destroy_mm) { return; }

    if (!device_env &&  destroy_mm && !mem_host_env)

    {

       free(device_option);

 #ifdef MFEM_USE_CEED

       CeedDestroy(&internal::ceed);

 #endif

       mm.Destroy();

    }

    Get().ngpu = -1;

    Get().mode = SEQUENTIAL;

    Get().backends = Backend::CPU;

    Get().host_mem_type = MemoryType::HOST;

    Get().host_mem_class = MemoryClass::HOST;

    Get().device_mem_type = MemoryType::HOST;

    Get().device_mem_class = MemoryClass::HOST;

 }


 void Device::Configure(const std::string &device, const int dev)

 {

    // If a device was configured via the environment, skip the configuration,

    // and avoid the 'singleton_device' to destroy the mm.

    if (device_env)

    {

       std::memcpy(this, &Get(), sizeof(Device));

       Get().destroy_mm = false;

       return;

    }


    std::map<std::string, Backend::Id> bmap;

    for (int i = 0; i < Backend::NUM_BACKENDS; i++)

    {

       bmap[internal::backend_name[i]] = internal::backend_list[i];

    }

    std::string::size_type beg = 0, end, option;

    while (1)

    {

       end = device.find(',', beg);

       end = (end != std::string::npos) ? end : device.size();

       const std::string bname = device.substr(beg, end - beg);

       option = bname.find(':');

       if (option==std::string::npos) // No option

       {

          const std::string backend = bname;

          std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);

          MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');

          Get().MarkBackend(it->second);

       }

       else

       {

          const std::string backend = bname.substr(0, option);

          const std::string boption = bname.substr(option+1);

          Get().device_option = strdup(boption.c_str());

          std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);

          MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');

          Get().MarkBackend(it->second);

       }

       if (end == device.size()) { break; }

       beg = end + 1;

    }


    // OCCA_CUDA needs CUDA or RAJA_CUDA:

    if (Allows(Backend::OCCA_CUDA) && !Allows(Backend::RAJA_CUDA))

    {

       Get().MarkBackend(Backend::CUDA);

    }

    if (Allows(Backend::CEED_CUDA))

    {

       Get().MarkBackend(Backend::CUDA);

    }


    // Perform setup.

    Get().Setup(dev);


    // Enable the device

    Enable();


    // Copy all data members from the global 'singleton_device' into '*this'.

    if (this != &Get()) { std::memcpy(this, &Get(), sizeof(Device)); }


    // Only '*this' will call the MemoryManager::Destroy() method.

    destroy_mm = true;

 }


 void Device::Print(std::ostream &out)

 {

    out << "Device configuration: ";

    bool add_comma = false;

    for (int i = 0; i < Backend::NUM_BACKENDS; i++)

    {

       if (backends & internal::backend_list[i])

       {

          if (add_comma) { out << ','; }

          add_comma = true;

          out << internal::backend_name[i];

       }

    }

    out << '\n';

 #ifdef MFEM_USE_CEED

    if (Allows(Backend::CEED_MASK))

    {

       const char *ceed_backend;

       CeedGetResource(internal::ceed, &ceed_backend);

       out << "libCEED backend: " << ceed_backend << '\n';

    }

 #endif

    out << "Memory configuration: "

        << MemoryTypeName[static_cast<int>(host_mem_type)];

    if (Device::Allows(Backend::DEVICE_MASK))

    {

       out << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];

    }

    out << std::endl;

 }


 void Device::UpdateMemoryTypeAndClass()

 {

    const bool debug = Device::Allows(Backend::DEBUG);


    const bool device = Device::Allows(Backend::DEVICE_MASK);


 #ifdef MFEM_USE_UMPIRE

    // If MFEM has been compiled with Umpire support, use it as the default

    if (!mem_host_env) { host_mem_type = MemoryType::HOST_UMPIRE; }

 #endif


    // Enable the device memory type

    if (device)

    {

       if (!mem_device_env)

       {

          if (mem_host_env)

          {

             switch (host_mem_type)

             {

                case MemoryType::HOST_UMPIRE:

                   device_mem_type = MemoryType::DEVICE_UMPIRE;

                   break;

                case MemoryType::HOST_DEBUG:

                   device_mem_type = MemoryType::DEVICE_DEBUG;

                   break;

                default:

                   device_mem_type = MemoryType::DEVICE;

             }

          }

          else

          {

 #ifndef MFEM_USE_UMPIRE

             device_mem_type = MemoryType::DEVICE;

 #else

             device_mem_type = MemoryType::DEVICE_UMPIRE;

 #endif

          }

       }

       device_mem_class = MemoryClass::DEVICE;

    }


    // Enable the UVM shortcut when requested

    if (device && device_option && !strcmp(device_option, "uvm"))

    {

       host_mem_type = MemoryType::MANAGED;

       device_mem_type = MemoryType::MANAGED;

    }


    // Enable the DEBUG mode when requested

    if (debug)

    {

       host_mem_type = MemoryType::HOST_DEBUG;

       device_mem_type = MemoryType::DEVICE_DEBUG;

    }


    // Update the memory manager with the new settings

    mm.Configure(host_mem_type, device_mem_type);

 }


 void Device::Enable()

 {

    const bool accelerated = Get().backends & ~(Backend::CPU);

    if (accelerated) { Get().mode = Device::ACCELERATED;}

    Get().UpdateMemoryTypeAndClass();

 }


 #ifdef MFEM_USE_CUDA

 static void DeviceSetup(const int dev, int &ngpu)

 {

    ngpu = CuGetDeviceCount();

    MFEM_VERIFY(ngpu > 0, "No CUDA device found!");

    MFEM_GPU_CHECK(cudaSetDevice(dev));

 }

 #endif


 static void CudaDeviceSetup(const int dev, int &ngpu)

 {

 #ifdef MFEM_USE_CUDA

    DeviceSetup(dev, ngpu);

 #else

    MFEM_CONTRACT_VAR(dev);

    MFEM_CONTRACT_VAR(ngpu);

 #endif

 }


 static void HipDeviceSetup(const int dev, int &ngpu)

 {

 #ifdef MFEM_USE_HIP

    int deviceId;

    MFEM_GPU_CHECK(hipGetDevice(&deviceId));

    hipDeviceProp_t props;

    MFEM_GPU_CHECK(hipGetDeviceProperties(&props, deviceId));

    MFEM_VERIFY(dev==deviceId,"");

    ngpu = 1;

 #else

    MFEM_CONTRACT_VAR(dev);

    MFEM_CONTRACT_VAR(ngpu);

 #endif

 }


 static void RajaDeviceSetup(const int dev, int &ngpu)

 {

 #ifdef MFEM_USE_CUDA

    if (ngpu <= 0) { DeviceSetup(dev, ngpu); }

 #else

    MFEM_CONTRACT_VAR(dev);

    MFEM_CONTRACT_VAR(ngpu);

 #endif

 }


 static void OccaDeviceSetup(const int dev)

 {

 #ifdef MFEM_USE_OCCA

    const int cpu  = Device::Allows(Backend::OCCA_CPU);

    const int omp  = Device::Allows(Backend::OCCA_OMP);

    const int cuda = Device::Allows(Backend::OCCA_CUDA);

    if (cpu + omp + cuda > 1)

    {

       MFEM_ABORT("Only one OCCA backend can be configured at a time!");

    }

    if (cuda)

    {

 #if OCCA_CUDA_ENABLED

       std::string mode("mode: 'CUDA', device_id : ");

       internal::occaDevice.setup(mode.append(1,'0'+dev));

 #else

       MFEM_ABORT("the OCCA CUDA backend requires OCCA built with CUDA!");

 #endif

    }

    else if (omp)

    {

 #if OCCA_OPENMP_ENABLED

       internal::occaDevice.setup("mode: 'OpenMP'");

 #else

       MFEM_ABORT("the OCCA OpenMP backend requires OCCA built with OpenMP!");

 #endif

    }

    else

    {

       internal::occaDevice.setup("mode: 'Serial'");

    }


    std::string mfemDir;

    if (occa::io::exists(MFEM_INSTALL_DIR "/include/mfem/"))

    {

       mfemDir = MFEM_INSTALL_DIR "/include/mfem/";

    }

    else if (occa::io::exists(MFEM_SOURCE_DIR))

    {

       mfemDir = MFEM_SOURCE_DIR;

    }

    else

    {

       MFEM_ABORT("Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");

    }


    occa::io::addLibraryPath("mfem", mfemDir);

    occa::loadKernels("mfem");

 #else

    MFEM_CONTRACT_VAR(dev);

    MFEM_ABORT("the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");

 #endif

 }


 static void CeedDeviceSetup(const char* ceed_spec)

 {

 #ifdef MFEM_USE_CEED

    CeedInit(ceed_spec, &internal::ceed);

    const char *ceed_backend;

    CeedGetResource(internal::ceed, &ceed_backend);

    if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec, "/cpu/self"))

    {

       mfem::out << std::endl << "WARNING!!!\n"

                 "libCEED is not using the requested backend!!!\n"

                 "WARNING!!!\n" << std::endl;

    }

 #else

    MFEM_CONTRACT_VAR(ceed_spec);

 #endif

 }


 void Device::Setup(const int device)

 {

    MFEM_VERIFY(ngpu == -1, "the mfem::Device is already configured!");


    ngpu = 0;

    dev = device;

 #ifndef MFEM_USE_CUDA

    MFEM_VERIFY(!Allows(Backend::CUDA_MASK),

                "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");

 #endif

 #ifndef MFEM_USE_HIP

    MFEM_VERIFY(!Allows(Backend::HIP_MASK),

                "the HIP backends require MFEM built with MFEM_USE_HIP=YES");

 #endif

 #ifndef MFEM_USE_RAJA

    MFEM_VERIFY(!Allows(Backend::RAJA_MASK),

                "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");

 #endif

 #ifndef MFEM_USE_OPENMP

    MFEM_VERIFY(!Allows(Backend::OMP|Backend::RAJA_OMP),

                "the OpenMP and RAJA OpenMP backends require MFEM built with"

                " MFEM_USE_OPENMP=YES");

 #endif

 #ifndef MFEM_USE_CEED

    MFEM_VERIFY(!Allows(Backend::CEED_MASK),

                "the CEED backends require MFEM built with MFEM_USE_CEED=YES");

 #else

    MFEM_VERIFY(!Allows(Backend::CEED_CPU) || !Allows(Backend::CEED_CUDA),

                "Only one CEED backend can be enabled at a time!");

 #endif

    if (Allows(Backend::CUDA)) { CudaDeviceSetup(dev, ngpu); }

    if (Allows(Backend::HIP)) { HipDeviceSetup(dev, ngpu); }

    if (Allows(Backend::RAJA_CUDA)) { RajaDeviceSetup(dev, ngpu); }

    // The check for MFEM_USE_OCCA is in the function OccaDeviceSetup().

    if (Allows(Backend::OCCA_MASK)) { OccaDeviceSetup(dev); }

    if (Allows(Backend::CEED_CPU))

    {

       if (!device_option)

       {

          CeedDeviceSetup("/cpu/self");

       }

       else

       {

          CeedDeviceSetup(device_option);

       }

    }

    if (Allows(Backend::CEED_CUDA))

    {

       if (!device_option)

       {

          // NOTE: libCEED's /gpu/cuda/gen backend is non-deterministic!

          CeedDeviceSetup("/gpu/cuda/gen");

       }

       else

       {

          CeedDeviceSetup(device_option);

       }

    }

    if (Allows(Backend::DEBUG)) { ngpu = 1; }

 }


 } // mfem

mfem::Backend::OCCA_CUDA
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:56

mfem::MemoryType::HOST_64
Host memory; aligned at 64 bytes.

mfem::Backend::OCCA_OMP
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition: device.hpp:53

mfem::Device::~Device
~Device()
Destructor.
Definition: device.cpp:150

mfem::MemoryType::DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.

mfem::MemoryType::DEVICE_UMPIRE
Device memory; using Umpire.

mfem::MemoryTypeName
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
Definition: mem_manager.cpp:1388

mfem::Backend::CEED_CUDA
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition: device.hpp:63

mfem::Backend::RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES...
Definition: device.hpp:45

mfem::Backend::HIP_MASK
Biwise-OR of all HIP backends.
Definition: device.hpp:83

mfem::MemoryType::HOST_DEBUG
Host memory; allocated from a &quot;host-debug&quot; pool.

mfem::MemoryType::MANAGED

mfem::MemoryManager::Configure
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Definition: mem_manager.cpp:1261

mfem::Device::Print
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition: device.cpp:236

mfem::Backend::RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition: device.hpp:48

mfem::CuGetDeviceCount
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition: cuda.cpp:155

mfem::Device::Configure
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
Definition: device.cpp:170

mfem::MemoryType::HOST_32
Host memory; aligned at 32 bytes.

mfem::Device::Device
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used...
Definition: device.cpp:65

mfem::Backend::Id
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition: device.hpp:30

mfem::Backend::OCCA_CPU
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES...
Definition: device.hpp:51

occa.hpp

mfem::Backend::NUM_BACKENDS
Number of backends: from (1 &lt;&lt; 0) to (1 &lt;&lt; (NUM_BACKENDS-1)).
Definition: device.hpp:76

mfem::Backend
MFEM backends.
Definition: device.hpp:26

mfem::MemoryManager::Destroy
void Destroy()
Free all the device memories.
Definition: mem_manager.cpp:1278

mfem::Backend::RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES...
Definition: device.hpp:42

mfem::Backend::DEBUG
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the &quot;device&quot; code-path (using separate host/device memory pools and host &lt;-&gt; device transfers) without any GPU hardware.
Definition: device.hpp:68

mfem::Backend::CPU
[host] Default CPU backend: sequential execution on each MPI rank.
Definition: device.hpp:33

mfem::Backend::CUDA_MASK
Biwise-OR of all CUDA backends.
Definition: device.hpp:81

mfem::MemoryType
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:27

mfem::Backend::CEED_CPU
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers...
Definition: device.hpp:59

mfem::Backend::OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition: device.hpp:35

mfem::Device::Allows
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:234

mfem::mm
MemoryManager mm
The (single) global memory manager object.
Definition: mem_manager.cpp:1376

mfem::MemoryType::HOST
Host memory; using new[] and delete[].

mfem::MemoryType::DEVICE_DEBUG

mfem::Backend::OCCA_MASK
Biwise-OR of all OCCA backends.
Definition: device.hpp:94

mfem::Backend::RAJA_MASK
Biwise-OR of all RAJA backends.
Definition: device.hpp:92

mfem::MemoryType::HOST_UMPIRE
Host memory; using Umpire.

mfem::Backend::DEVICE_MASK
Biwise-OR of all device backends.
Definition: device.hpp:89

mfem::out
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66

mfem::Device
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition: device.hpp:114

mfem::Backend::CEED_MASK
Bitwise-OR of all CEED backends.
Definition: device.hpp:87

mfem::Backend::HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition: device.hpp:39

forall.hpp

mfem::Backend::CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition: device.hpp:37

mfem::MemoryClass
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:57