html/cuda_8hpp_source.html

// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced

// at the Lawrence Livermore National Laboratory. All Rights reserved. See files

// LICENSE and NOTICE for details. LLNL-CODE-806117.

//

// This file is part of the MFEM library. For more information and source code

// availability visit https://mfem.org.

//

// MFEM is free software; you can redistribute it and/or modify it under the

// terms of the BSD-3 license. We welcome feedback and contributions, see file

// CONTRIBUTING.md for details.


#ifndef MFEM_CUDA_HPP

#define MFEM_CUDA_HPP


#include "../config/config.hpp"

#include "error.hpp"


// CUDA block size used by MFEM.

#define MFEM_CUDA_BLOCKS 256


#if defined(MFEM_USE_CUDA) && defined(__CUDACC__)

#define MFEM_USE_CUDA_OR_HIP

#define MFEM_DEVICE __device__

#define MFEM_HOST __host__

#define MFEM_LAMBDA __host__

// #define MFEM_HOST_DEVICE __host__ __device__ // defined in config/config.hpp

#define MFEM_DEVICE_SYNC MFEM_GPU_CHECK(cudaDeviceSynchronize())

#define MFEM_STREAM_SYNC MFEM_GPU_CHECK(cudaStreamSynchronize(0))

// Define a CUDA error check macro, MFEM_GPU_CHECK(x), where x returns/is of

// type 'cudaError_t'. This macro evaluates 'x' and raises an error if the

// result is not cudaSuccess.

#define MFEM_GPU_CHECK(x)                                                      \

  do {                                                                         \

    cudaError_t mfem_err_internal_var_name = (x);                              \

    if (mfem_err_internal_var_name != cudaSuccess) {                           \

      ::mfem::mfem_cuda_error(mfem_err_internal_var_name, #x, _MFEM_FUNC_NAME, \

                              __FILE__, __LINE__);                             \

    }                                                                          \

  } while (0)


// Define the MFEM inner threading macros

#if defined(__CUDA_ARCH__)

#define MFEM_SHARED __shared__

#define MFEM_SYNC_THREAD __syncthreads()

#define MFEM_BLOCK_ID(k) blockIdx.k

#define MFEM_THREAD_ID(k) threadIdx.k

#define MFEM_THREAD_SIZE(k) blockDim.k

#define MFEM_FOREACH_THREAD(i,k,N) for(int i=threadIdx.k; i<N; i+=blockDim.k)

#define MFEM_FOREACH_THREAD_DIRECT(i,k,N) if(const int i=threadIdx.k; i<N)

#endif // defined(__CUDA_ARCH__)

#endif // defined(MFEM_USE_CUDA) && defined(__CUDACC__)


namespace mfem

{


#if defined(MFEM_USE_CUDA) && defined(__CUDACC__)

// Function used by the macro MFEM_GPU_CHECK.

void mfem_cuda_error(cudaError_t err, const char *expr, const char *func,

                     const char *file, int line);

#endif


/// Allocates device memory and returns destination ptr.

void* CuMemAlloc(void **d_ptr, size_t bytes);


/// Allocates managed device memory

void* CuMallocManaged(void **d_ptr, size_t bytes);


/// Allocates page-locked (pinned) host memory

void* CuMemAllocHostPinned(void **ptr, size_t bytes);


/// Frees device memory and returns destination ptr.

void* CuMemFree(void *d_ptr);


/// Frees page-locked (pinned) host memory and returns destination ptr.

void* CuMemFreeHostPinned(void *ptr);


/// Copies memory from Host to Device and returns destination ptr.

void* CuMemcpyHtoD(void *d_dst, const void *h_src, size_t bytes);


/// Copies memory from Host to Device and returns destination ptr.

void* CuMemcpyHtoDAsync(void *d_dst, const void *h_src, size_t bytes);


/// Copies memory from Device to Device

void* CuMemcpyDtoD(void *d_dst, const void *d_src, size_t bytes);


/// Copies memory from Device to Device

void* CuMemcpyDtoDAsync(void *d_dst, const void *d_src, size_t bytes);


/// Copies memory from Device to Host

void* CuMemcpyDtoH(void *h_dst, const void *d_src, size_t bytes);


/// Copies memory from Device to Host

void* CuMemcpyDtoHAsync(void *h_dst, const void *d_src, size_t bytes);


/// Check the error code returned by cudaGetLastError(), aborting on error.

void CuCheckLastError();


/// Get the number of CUDA devices

int CuGetDeviceCount();


} // namespace mfem


#endif // MFEM_CUDA_HPP

config.hpp

error.hpp

mfem
Definition CodeDocumentation.dox:1

mfem::CuMemAlloc
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition cuda.cpp:34

mfem::CuMemFree
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition cuda.cpp:79

mfem::CuMemcpyDtoHAsync
void * CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:170

mfem::CuMallocManaged
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition cuda.cpp:49

mfem::CuMemcpyDtoH
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:155

mfem::CuMemAllocHostPinned
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition cuda.cpp:64

mfem::CuMemFreeHostPinned
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition cuda.cpp:94

mfem::mfem_cuda_error
void mfem_cuda_error(cudaError_t err, const char *expr, const char *func, const char *file, int line)
Definition cuda.cpp:23

mfem::CuMemcpyHtoD
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:109

mfem::CuGetDeviceCount
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition cuda.cpp:185

mfem::err
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition globals.hpp:71

mfem::CuMemcpyHtoDAsync
void * CuMemcpyHtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:124

mfem::CuCheckLastError
void CuCheckLastError()
Check the error code returned by cudaGetLastError(), aborting on error.
Definition cuda.cpp:178

mfem::CuMemcpyDtoDAsync
void * CuMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:147

mfem::CuMemcpyDtoD
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:132