19#define MFEM_CUDA_BLOCKS 256 
   22#define MFEM_USE_CUDA_OR_HIP 
   23#define MFEM_DEVICE __device__ 
   24#define MFEM_HOST __host__ 
   25#define MFEM_LAMBDA __host__ 
   27#define MFEM_DEVICE_SYNC MFEM_GPU_CHECK(cudaDeviceSynchronize()) 
   28#define MFEM_STREAM_SYNC MFEM_GPU_CHECK(cudaStreamSynchronize(0)) 
   32#define MFEM_GPU_CHECK(x)                                                      \ 
   34    cudaError_t mfem_err_internal_var_name = (x);                              \ 
   35    if (mfem_err_internal_var_name != cudaSuccess) {                           \ 
   36      ::mfem::mfem_cuda_error(mfem_err_internal_var_name, #x, _MFEM_FUNC_NAME, \ 
   37                              __FILE__, __LINE__);                             \ 
   43#if defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__) 
   44#define MFEM_SHARED __shared__ 
   45#define MFEM_SYNC_THREAD __syncthreads() 
   46#define MFEM_BLOCK_ID(k) blockIdx.k 
   47#define MFEM_THREAD_ID(k) threadIdx.k 
   48#define MFEM_THREAD_SIZE(k) blockDim.k 
   49#define MFEM_FOREACH_THREAD(i,k,N) for(int i=threadIdx.k; i<N; i+=blockDim.k) 
   58                     const char *file, 
int line);
 
   77void* 
CuMemcpyHtoD(
void *d_dst, 
const void *h_src, 
size_t bytes);
 
   83void* 
CuMemcpyDtoD(
void *d_dst, 
const void *d_src, 
size_t bytes);
 
   89void* 
CuMemcpyDtoH(
void *h_dst, 
const void *d_src, 
size_t bytes);
 
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
void * CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
void mfem_cuda_error(cudaError_t err, const char *expr, const char *func, const char *file, int line)
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
int CuGetDeviceCount()
Get the number of CUDA devices.
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
void * CuMemcpyHtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
void CuCheckLastError()
Check the error code returned by cudaGetLastError(), aborting on error.
void * CuMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.