MFEM  v4.6.0
Finite element discretization library
cuda.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "backends.hpp"
13 #include "globals.hpp"
14 
15 namespace mfem
16 {
17 
18 // Internal debug option, useful for tracking CUDA allocations, deallocations
19 // and transfers.
20 // #define MFEM_TRACK_CUDA_MEM
21 
22 #ifdef MFEM_USE_CUDA
23 void mfem_cuda_error(cudaError_t err, const char *expr, const char *func,
24  const char *file, int line)
25 {
26  mfem::err << "\n\nCUDA error: (" << expr << ") failed with error:\n --> "
27  << cudaGetErrorString(err)
28  << "\n ... in function: " << func
29  << "\n ... in file: " << file << ':' << line << '\n';
30  mfem_error();
31 }
32 #endif
33 
34 void* CuMemAlloc(void** dptr, size_t bytes)
35 {
36 #ifdef MFEM_USE_CUDA
37 #ifdef MFEM_TRACK_CUDA_MEM
38  mfem::out << "CuMemAlloc(): allocating " << bytes << " bytes ... "
39  << std::flush;
40 #endif
41  MFEM_GPU_CHECK(cudaMalloc(dptr, bytes));
42 #ifdef MFEM_TRACK_CUDA_MEM
43  mfem::out << "done: " << *dptr << std::endl;
44 #endif
45 #endif
46  return *dptr;
47 }
48 
49 void* CuMallocManaged(void** dptr, size_t bytes)
50 {
51 #ifdef MFEM_USE_CUDA
52 #ifdef MFEM_TRACK_CUDA_MEM
53  mfem::out << "CuMallocManaged(): allocating " << bytes << " bytes ... "
54  << std::flush;
55 #endif
56  MFEM_GPU_CHECK(cudaMallocManaged(dptr, bytes));
57 #ifdef MFEM_TRACK_CUDA_MEM
58  mfem::out << "done: " << *dptr << std::endl;
59 #endif
60 #endif
61  return *dptr;
62 }
63 
64 void* CuMemAllocHostPinned(void** ptr, size_t bytes)
65 {
66 #ifdef MFEM_USE_CUDA
67 #ifdef MFEM_TRACK_CUDA_MEM
68  mfem::out << "CuMemAllocHostPinned(): allocating " << bytes << " bytes ... "
69  << std::flush;
70 #endif
71  MFEM_GPU_CHECK(cudaMallocHost(ptr, bytes));
72 #ifdef MFEM_TRACK_CUDA_MEM
73  mfem::out << "done: " << *ptr << std::endl;
74 #endif
75 #endif
76  return *ptr;
77 }
78 
79 void* CuMemFree(void *dptr)
80 {
81 #ifdef MFEM_USE_CUDA
82 #ifdef MFEM_TRACK_CUDA_MEM
83  mfem::out << "CuMemFree(): deallocating memory @ " << dptr << " ... "
84  << std::flush;
85 #endif
86  MFEM_GPU_CHECK(cudaFree(dptr));
87 #ifdef MFEM_TRACK_CUDA_MEM
88  mfem::out << "done." << std::endl;
89 #endif
90 #endif
91  return dptr;
92 }
93 
94 void* CuMemFreeHostPinned(void *ptr)
95 {
96 #ifdef MFEM_USE_CUDA
97 #ifdef MFEM_TRACK_CUDA_MEM
98  mfem::out << "CuMemFreeHostPinned(): deallocating memory @ " << ptr << " ... "
99  << std::flush;
100 #endif
101  MFEM_GPU_CHECK(cudaFreeHost(ptr));
102 #ifdef MFEM_TRACK_CUDA_MEM
103  mfem::out << "done." << std::endl;
104 #endif
105 #endif
106  return ptr;
107 }
108 
109 void* CuMemcpyHtoD(void* dst, const void* src, size_t bytes)
110 {
111 #ifdef MFEM_USE_CUDA
112 #ifdef MFEM_TRACK_CUDA_MEM
113  mfem::out << "CuMemcpyHtoD(): copying " << bytes << " bytes from "
114  << src << " to " << dst << " ... " << std::flush;
115 #endif
116  MFEM_GPU_CHECK(cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice));
117 #ifdef MFEM_TRACK_CUDA_MEM
118  mfem::out << "done." << std::endl;
119 #endif
120 #endif
121  return dst;
122 }
123 
124 void* CuMemcpyHtoDAsync(void* dst, const void* src, size_t bytes)
125 {
126 #ifdef MFEM_USE_CUDA
127  MFEM_GPU_CHECK(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyHostToDevice));
128 #endif
129  return dst;
130 }
131 
132 void* CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
133 {
134 #ifdef MFEM_USE_CUDA
135 #ifdef MFEM_TRACK_CUDA_MEM
136  mfem::out << "CuMemcpyDtoD(): copying " << bytes << " bytes from "
137  << src << " to " << dst << " ... " << std::flush;
138 #endif
139  MFEM_GPU_CHECK(cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToDevice));
140 #ifdef MFEM_TRACK_CUDA_MEM
141  mfem::out << "done." << std::endl;
142 #endif
143 #endif
144  return dst;
145 }
146 
147 void* CuMemcpyDtoDAsync(void* dst, const void *src, size_t bytes)
148 {
149 #ifdef MFEM_USE_CUDA
150  MFEM_GPU_CHECK(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyDeviceToDevice));
151 #endif
152  return dst;
153 }
154 
155 void* CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
156 {
157 #ifdef MFEM_USE_CUDA
158 #ifdef MFEM_TRACK_CUDA_MEM
159  mfem::out << "CuMemcpyDtoH(): copying " << bytes << " bytes from "
160  << src << " to " << dst << " ... " << std::flush;
161 #endif
162  MFEM_GPU_CHECK(cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost));
163 #ifdef MFEM_TRACK_CUDA_MEM
164  mfem::out << "done." << std::endl;
165 #endif
166 #endif
167  return dst;
168 }
169 
170 void* CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
171 {
172 #ifdef MFEM_USE_CUDA
173  MFEM_GPU_CHECK(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyDeviceToHost));
174 #endif
175  return dst;
176 }
177 
179 {
180 #ifdef MFEM_USE_CUDA
181  MFEM_GPU_CHECK(cudaGetLastError());
182 #endif
183 }
184 
186 {
187  int num_gpus = -1;
188 #ifdef MFEM_USE_CUDA
189  MFEM_GPU_CHECK(cudaGetDeviceCount(&num_gpus));
190 #endif
191  return num_gpus;
192 }
193 
194 } // namespace mfem
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition: cuda.cpp:109
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition: cuda.cpp:79
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition: globals.hpp:71
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: cuda.cpp:94
void CuCheckLastError()
Check the error code returned by cudaGetLastError(), aborting on error.
Definition: cuda.cpp:178
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition: cuda.cpp:185
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition: cuda.cpp:49
void mfem_cuda_error(cudaError_t err, const char *expr, const char *func, const char *file, int line)
Definition: cuda.cpp:23
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:154
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:132
void * CuMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:147
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
void * CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:170
void * CuMemcpyHtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition: cuda.cpp:124
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: cuda.cpp:64
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition: cuda.cpp:34
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:155