MFEM v4.8.0
Finite element discretization library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
cuda.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_CUDA_HPP
13#define MFEM_CUDA_HPP
14
15#include "../config/config.hpp"
16#include "error.hpp"
17
18// CUDA block size used by MFEM.
19#define MFEM_CUDA_BLOCKS 256
20
21#ifdef MFEM_USE_CUDA
22#define MFEM_USE_CUDA_OR_HIP
23#define MFEM_DEVICE __device__
24#define MFEM_HOST __host__
25#define MFEM_LAMBDA __host__
26// #define MFEM_HOST_DEVICE __host__ __device__ // defined in config/config.hpp
27#define MFEM_DEVICE_SYNC MFEM_GPU_CHECK(cudaDeviceSynchronize())
28#define MFEM_STREAM_SYNC MFEM_GPU_CHECK(cudaStreamSynchronize(0))
29// Define a CUDA error check macro, MFEM_GPU_CHECK(x), where x returns/is of
30// type 'cudaError_t'. This macro evaluates 'x' and raises an error if the
31// result is not cudaSuccess.
32#define MFEM_GPU_CHECK(x) \
33 do { \
34 cudaError_t mfem_err_internal_var_name = (x); \
35 if (mfem_err_internal_var_name != cudaSuccess) { \
36 ::mfem::mfem_cuda_error(mfem_err_internal_var_name, #x, _MFEM_FUNC_NAME, \
37 __FILE__, __LINE__); \
38 } \
39 } while (0)
40#endif // MFEM_USE_CUDA
41
42// Define the MFEM inner threading macros
43#if defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)
44#define MFEM_SHARED __shared__
45#define MFEM_SYNC_THREAD __syncthreads()
46#define MFEM_BLOCK_ID(k) blockIdx.k
47#define MFEM_THREAD_ID(k) threadIdx.k
48#define MFEM_THREAD_SIZE(k) blockDim.k
49#define MFEM_FOREACH_THREAD(i,k,N) for(int i=threadIdx.k; i<N; i+=blockDim.k)
50#endif
51
52namespace mfem
53{
54
55#ifdef MFEM_USE_CUDA
56// Function used by the macro MFEM_GPU_CHECK.
57void mfem_cuda_error(cudaError_t err, const char *expr, const char *func,
58 const char *file, int line);
59#endif
60
61/// Allocates device memory and returns destination ptr.
62void* CuMemAlloc(void **d_ptr, size_t bytes);
63
64/// Allocates managed device memory
65void* CuMallocManaged(void **d_ptr, size_t bytes);
66
67/// Allocates page-locked (pinned) host memory
68void* CuMemAllocHostPinned(void **ptr, size_t bytes);
69
70/// Frees device memory and returns destination ptr.
71void* CuMemFree(void *d_ptr);
72
73/// Frees page-locked (pinned) host memory and returns destination ptr.
74void* CuMemFreeHostPinned(void *ptr);
75
76/// Copies memory from Host to Device and returns destination ptr.
77void* CuMemcpyHtoD(void *d_dst, const void *h_src, size_t bytes);
78
79/// Copies memory from Host to Device and returns destination ptr.
80void* CuMemcpyHtoDAsync(void *d_dst, const void *h_src, size_t bytes);
81
82/// Copies memory from Device to Device
83void* CuMemcpyDtoD(void *d_dst, const void *d_src, size_t bytes);
84
85/// Copies memory from Device to Device
86void* CuMemcpyDtoDAsync(void *d_dst, const void *d_src, size_t bytes);
87
88/// Copies memory from Device to Host
89void* CuMemcpyDtoH(void *h_dst, const void *d_src, size_t bytes);
90
91/// Copies memory from Device to Host
92void* CuMemcpyDtoHAsync(void *h_dst, const void *d_src, size_t bytes);
93
94/// Check the error code returned by cudaGetLastError(), aborting on error.
95void CuCheckLastError();
96
97/// Get the number of CUDA devices
99
100} // namespace mfem
101
102#endif // MFEM_CUDA_HPP
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition cuda.cpp:34
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition cuda.cpp:79
void * CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:170
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition cuda.cpp:49
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:155
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition cuda.cpp:64
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition cuda.cpp:94
void mfem_cuda_error(cudaError_t err, const char *expr, const char *func, const char *file, int line)
Definition cuda.cpp:23
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:109
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition cuda.cpp:185
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition globals.hpp:71
void * CuMemcpyHtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:124
void CuCheckLastError()
Check the error code returned by cudaGetLastError(), aborting on error.
Definition cuda.cpp:178
void * CuMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:147
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:132