MFEM  v4.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
cuda.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-443211. All Rights
3 // reserved. See file COPYRIGHT for details.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability see http://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the GNU Lesser General Public License (as published by the Free
10 // Software Foundation) version 2.1 dated February 1999.
11 
12 #ifndef MFEM_CUDA_HPP
13 #define MFEM_CUDA_HPP
14 
15 #include "../config/config.hpp"
16 #include "error.hpp"
17 
18 #ifdef MFEM_USE_CUDA
19 #include <cuda_runtime.h>
20 #include <cuda.h>
21 #endif
22 
23 // CUDA block size used by MFEM.
24 #define MFEM_CUDA_BLOCKS 256
25 
26 #ifdef MFEM_USE_CUDA
27 #define MFEM_DEVICE __device__
28 #define MFEM_HOST_DEVICE __host__ __device__
29 // Define a CUDA error check macro, MFEM_CUDA_CHECK(x), where x returns/is of
30 // type 'cudaError_t'. This macro evaluates 'x' and raises an error if the
31 // result is not cudaSuccess.
32 #define MFEM_CUDA_CHECK(x) \
33  do \
34  { \
35  cudaError_t err = (x); \
36  if (err != cudaSuccess) \
37  { \
38  mfem_cuda_error(err, #x, _MFEM_FUNC_NAME, __FILE__, __LINE__); \
39  } \
40  } \
41  while (0)
42 #else
43 #define MFEM_DEVICE
44 #define MFEM_HOST_DEVICE
45 #endif // MFEM_USE_CUDA
46 
47 // Define the MFEM inner threading macros
48 #if defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)
49 #define MFEM_SHARED __shared__
50 #define MFEM_SYNC_THREAD __syncthreads()
51 #define MFEM_THREAD_ID(k) threadIdx.k
52 #define MFEM_THREAD_SIZE(k) blockDim.k
53 #define MFEM_FOREACH_THREAD(i,k,N) for(int i=threadIdx.k; i<N; i+=blockDim.k)
54 #else
55 #define MFEM_SHARED
56 #define MFEM_SYNC_THREAD
57 #define MFEM_THREAD_ID(k) 0
58 #define MFEM_THREAD_SIZE(k) 1
59 #define MFEM_FOREACH_THREAD(i,k,N) for(int i=0; i<N; i++)
60 #endif
61 
62 
63 namespace mfem
64 {
65 
66 #ifdef MFEM_USE_CUDA
67 // Function used by the macro MFEM_CUDA_CHECK.
68 void mfem_cuda_error(cudaError_t err, const char *expr, const char *func,
69  const char *file, int line);
70 #endif
71 
72 /// Allocates device memory
73 void* CuMemAlloc(void **d_ptr, size_t bytes);
74 
75 /// Frees device memory
76 void* CuMemFree(void *d_ptr);
77 
78 /// Copies memory from Host to Device
79 void* CuMemcpyHtoD(void *d_dst, const void *h_src, size_t bytes);
80 
81 /// Copies memory from Host to Device
82 void* CuMemcpyHtoDAsync(void *d_dst, const void *h_src, size_t bytes);
83 
84 /// Copies memory from Device to Device
85 void* CuMemcpyDtoD(void *d_dst, const void *d_src, size_t bytes);
86 
87 /// Copies memory from Device to Device
88 void* CuMemcpyDtoDAsync(void *d_dst, const void *d_src, size_t bytes);
89 
90 /// Copies memory from Device to Host
91 void* CuMemcpyDtoH(void *h_dst, const void *d_src, size_t bytes);
92 
93 /// Copies memory from Device to Host
94 void* CuMemcpyDtoHAsync(void *h_dst, const void *d_src, size_t bytes);
95 
96 } // namespace mfem
97 
98 #endif // MFEM_CUDA_HPP
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: cuda.cpp:64
void * CuMemFree(void *dptr)
Frees device memory.
Definition: cuda.cpp:49
void mfem_cuda_error(cudaError_t err, const char *expr, const char *func, const char *file, int line)
Definition: cuda.cpp:23
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:87
void * CuMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:102
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition: globals.hpp:69
void * CuMemcpyDtoHAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:125
void * CuMemcpyHtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: cuda.cpp:79
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: cuda.cpp:34
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:110