MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
cuda.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "backends.hpp"
13#include "globals.hpp"
14
15namespace mfem
16{
17
18// Internal debug option, useful for tracking CUDA allocations, deallocations
19// and transfers.
20// #define MFEM_TRACK_CUDA_MEM
21
22#ifdef MFEM_USE_CUDA
23void mfem_cuda_error(cudaError_t err, const char *expr, const char *func,
24 const char *file, int line)
25{
26 mfem::err << "\n\nCUDA error: (" << expr << ") failed with error:\n --> "
27 << cudaGetErrorString(err)
28 << "\n ... in function: " << func
29 << "\n ... in file: " << file << ':' << line << '\n';
30 mfem_error();
31}
32#endif
33
34void* CuMemAlloc(void** dptr, size_t bytes)
35{
36#ifdef MFEM_USE_CUDA
37#ifdef MFEM_TRACK_CUDA_MEM
38 mfem::out << "CuMemAlloc(): allocating " << bytes << " bytes ... "
39 << std::flush;
40#endif
41 MFEM_GPU_CHECK(cudaMalloc(dptr, bytes));
42#ifdef MFEM_TRACK_CUDA_MEM
43 mfem::out << "done: " << *dptr << std::endl;
44#endif
45#endif
46 return *dptr;
47}
48
49void* CuMallocManaged(void** dptr, size_t bytes)
50{
51#ifdef MFEM_USE_CUDA
52#ifdef MFEM_TRACK_CUDA_MEM
53 mfem::out << "CuMallocManaged(): allocating " << bytes << " bytes ... "
54 << std::flush;
55#endif
56 MFEM_GPU_CHECK(cudaMallocManaged(dptr, bytes));
57#ifdef MFEM_TRACK_CUDA_MEM
58 mfem::out << "done: " << *dptr << std::endl;
59#endif
60#endif
61 return *dptr;
62}
63
64void* CuMemAllocHostPinned(void** ptr, size_t bytes)
65{
66#ifdef MFEM_USE_CUDA
67#ifdef MFEM_TRACK_CUDA_MEM
68 mfem::out << "CuMemAllocHostPinned(): allocating " << bytes << " bytes ... "
69 << std::flush;
70#endif
71 MFEM_GPU_CHECK(cudaMallocHost(ptr, bytes));
72#ifdef MFEM_TRACK_CUDA_MEM
73 mfem::out << "done: " << *ptr << std::endl;
74#endif
75#endif
76 return *ptr;
77}
78
79void* CuMemFree(void *dptr)
80{
81#ifdef MFEM_USE_CUDA
82#ifdef MFEM_TRACK_CUDA_MEM
83 mfem::out << "CuMemFree(): deallocating memory @ " << dptr << " ... "
84 << std::flush;
85#endif
86 MFEM_GPU_CHECK(cudaFree(dptr));
87#ifdef MFEM_TRACK_CUDA_MEM
88 mfem::out << "done." << std::endl;
89#endif
90#endif
91 return dptr;
92}
93
94void* CuMemFreeHostPinned(void *ptr)
95{
96#ifdef MFEM_USE_CUDA
97#ifdef MFEM_TRACK_CUDA_MEM
98 mfem::out << "CuMemFreeHostPinned(): deallocating memory @ " << ptr << " ... "
99 << std::flush;
100#endif
101 MFEM_GPU_CHECK(cudaFreeHost(ptr));
102#ifdef MFEM_TRACK_CUDA_MEM
103 mfem::out << "done." << std::endl;
104#endif
105#endif
106 return ptr;
107}
108
109void* CuMemcpyHtoD(void* dst, const void* src, size_t bytes)
110{
111#ifdef MFEM_USE_CUDA
112#ifdef MFEM_TRACK_CUDA_MEM
113 mfem::out << "CuMemcpyHtoD(): copying " << bytes << " bytes from "
114 << src << " to " << dst << " ... " << std::flush;
115#endif
116 MFEM_GPU_CHECK(cudaMemcpy(dst, src, bytes, cudaMemcpyHostToDevice));
117#ifdef MFEM_TRACK_CUDA_MEM
118 mfem::out << "done." << std::endl;
119#endif
120#endif
121 return dst;
122}
123
124void* CuMemcpyHtoDAsync(void* dst, const void* src, size_t bytes)
125{
126#ifdef MFEM_USE_CUDA
127 MFEM_GPU_CHECK(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyHostToDevice));
128#endif
129 return dst;
130}
131
132void* CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
133{
134#ifdef MFEM_USE_CUDA
135#ifdef MFEM_TRACK_CUDA_MEM
136 mfem::out << "CuMemcpyDtoD(): copying " << bytes << " bytes from "
137 << src << " to " << dst << " ... " << std::flush;
138#endif
139 MFEM_GPU_CHECK(cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToDevice));
140#ifdef MFEM_TRACK_CUDA_MEM
141 mfem::out << "done." << std::endl;
142#endif
143#endif
144 return dst;
145}
146
147void* CuMemcpyDtoDAsync(void* dst, const void *src, size_t bytes)
148{
149#ifdef MFEM_USE_CUDA
150 MFEM_GPU_CHECK(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyDeviceToDevice));
151#endif
152 return dst;
153}
154
155void* CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
156{
157#ifdef MFEM_USE_CUDA
158#ifdef MFEM_TRACK_CUDA_MEM
159 mfem::out << "CuMemcpyDtoH(): copying " << bytes << " bytes from "
160 << src << " to " << dst << " ... " << std::flush;
161#endif
162 MFEM_GPU_CHECK(cudaMemcpy(dst, src, bytes, cudaMemcpyDeviceToHost));
163#ifdef MFEM_TRACK_CUDA_MEM
164 mfem::out << "done." << std::endl;
165#endif
166#endif
167 return dst;
168}
169
170void* CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
171{
172#ifdef MFEM_USE_CUDA
173 MFEM_GPU_CHECK(cudaMemcpyAsync(dst, src, bytes, cudaMemcpyDeviceToHost));
174#endif
175 return dst;
176}
177
179{
180#ifdef MFEM_USE_CUDA
181 MFEM_GPU_CHECK(cudaGetLastError());
182#endif
183}
184
186{
187 int num_gpus = -1;
188#ifdef MFEM_USE_CUDA
189 MFEM_GPU_CHECK(cudaGetDeviceCount(&num_gpus));
190#endif
191 return num_gpus;
192}
193
194} // namespace mfem
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition cuda.cpp:34
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition cuda.cpp:79
void * CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:170
void mfem_error(const char *msg)
Definition error.cpp:154
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition cuda.cpp:49
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:155
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition cuda.cpp:64
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition cuda.cpp:94
void mfem_cuda_error(cudaError_t err, const char *expr, const char *func, const char *file, int line)
Definition cuda.cpp:23
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:109
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition cuda.cpp:185
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition globals.hpp:71
void * CuMemcpyHtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:124
void CuCheckLastError()
Check the error code returned by cudaGetLastError(), aborting on error.
Definition cuda.cpp:178
void * CuMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:147
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:132