15 #include "../config/config.hpp"
19 #include <cuda_runtime.h>
24 #define MFEM_CUDA_BLOCKS 256
27 #define MFEM_DEVICE __device__
28 #define MFEM_HOST_DEVICE __host__ __device__
32 #define MFEM_GPU_CHECK(x) \
35 cudaError_t err = (x); \
36 if (err != cudaSuccess) \
38 mfem_cuda_error(err, #x, _MFEM_FUNC_NAME, __FILE__, __LINE__); \
42 #define MFEM_DEVICE_SYNC MFEM_GPU_CHECK(cudaDeviceSynchronize())
43 #define MFEM_STREAM_SYNC MFEM_GPU_CHECK(cudaStreamSynchronize(0))
44 #endif // MFEM_USE_CUDA
47 #if defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)
48 #define MFEM_SHARED __shared__
49 #define MFEM_SYNC_THREAD __syncthreads()
50 #define MFEM_THREAD_ID(k) threadIdx.k
51 #define MFEM_THREAD_SIZE(k) blockDim.k
52 #define MFEM_FOREACH_THREAD(i,k,N) for(int i=threadIdx.k; i<N; i+=blockDim.k)
55 #if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
57 #define MFEM_HOST_DEVICE
58 #define MFEM_DEVICE_SYNC
59 #define MFEM_STREAM_SYNC
62 #if !((defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)) || \
63 (defined(MFEM_USE_HIP) && defined(__ROCM_ARCH__)))
65 #define MFEM_SYNC_THREAD
66 #define MFEM_THREAD_ID(k) 0
67 #define MFEM_THREAD_SIZE(k) 1
68 #define MFEM_FOREACH_THREAD(i,k,N) for(int i=0; i<N; i++)
77 const char *file,
int line);
90 void*
CuMemcpyHtoD(
void *d_dst,
const void *h_src,
size_t bytes);
96 void*
CuMemcpyDtoD(
void *d_dst,
const void *d_src,
size_t bytes);
102 void*
CuMemcpyDtoH(
void *h_dst,
const void *d_src,
size_t bytes);
115 #endif // MFEM_CUDA_HPP
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
void * CuMemFree(void *dptr)
Frees device memory.
void CuCheckLastError()
Check the error code returned by cudaGetLastError(), aborting on error.
int CuGetDeviceCount()
Get the number of CUDA devices.
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
void mfem_cuda_error(cudaError_t err, const char *expr, const char *func, const char *file, int line)
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
void * CuMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
void * CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
void * CuMemcpyHtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.