MFEM  v4.6.0
Finite element discretization library
backends.hpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #ifndef MFEM_BACKENDS_HPP
13 #define MFEM_BACKENDS_HPP
14 
15 #include "../config/config.hpp"
16 
17 #ifdef MFEM_USE_CUDA
18 #include <cusparse.h>
19 #include <library_types.h>
20 #include <cuda_runtime.h>
21 #include <cuda.h>
22 #endif
23 #include "cuda.hpp"
24 
25 #ifdef MFEM_USE_HIP
26 #include <hip/hip_runtime.h>
27 #endif
28 #include "hip.hpp"
29 
30 #ifdef MFEM_USE_OCCA
31 #include "occa.hpp"
32 #endif
33 
34 #ifdef MFEM_USE_RAJA
35 // The following two definitions suppress CUB and THRUST deprecation warnings
36 // about requiring c++14 with c++11 deprecated but still supported (to be
37 // removed in a future release).
38 #define CUB_IGNORE_DEPRECATED_CPP_DIALECT
39 #define THRUST_IGNORE_DEPRECATED_CPP_DIALECT
40 #include "RAJA/RAJA.hpp"
41 #if defined(RAJA_ENABLE_CUDA) && !defined(MFEM_USE_CUDA)
42 #error When RAJA is built with CUDA, MFEM_USE_CUDA=YES is required
43 #endif
44 #endif
45 
46 #if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
47 #define MFEM_DEVICE
48 #define MFEM_LAMBDA
49 #define MFEM_HOST_DEVICE
50 // MFEM_DEVICE_SYNC is made available for debugging purposes
51 #define MFEM_DEVICE_SYNC
52 // MFEM_STREAM_SYNC is used for UVM and MPI GPU-Aware kernels
53 #define MFEM_STREAM_SYNC
54 #endif
55 
56 #if !((defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)) || \
57  (defined(MFEM_USE_HIP) && defined(__HIP_DEVICE_COMPILE__)))
58 #define MFEM_SHARED
59 #define MFEM_SYNC_THREAD
60 #define MFEM_BLOCK_ID(k) 0
61 #define MFEM_THREAD_ID(k) 0
62 #define MFEM_THREAD_SIZE(k) 1
63 #define MFEM_FOREACH_THREAD(i,k,N) for(int i=0; i<N; i++)
64 #endif
65 
66 // 'double' atomicAdd implementation for previous versions of CUDA
67 #if defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
68 MFEM_DEVICE inline double atomicAdd(double *add, double val)
69 {
70  unsigned long long int *ptr = (unsigned long long int *) add;
71  unsigned long long int old = *ptr, reg;
72  do
73  {
74  reg = old;
75  old = atomicCAS(ptr, reg,
76  __double_as_longlong(val + __longlong_as_double(reg)));
77  }
78  while (reg != old);
79  return __longlong_as_double(old);
80 }
81 #endif
82 
83 template <typename T>
84 MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
85 {
86 #if ((defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)) || \
87  (defined(MFEM_USE_HIP) && defined(__HIP_DEVICE_COMPILE__)))
88  return atomicAdd(&add,val);
89 #else
90  T old = add;
91 #ifdef MFEM_USE_OPENMP
92  #pragma omp atomic
93 #endif
94  add += val;
95  return old;
96 #endif
97 }
98 
99 #endif // MFEM_BACKENDS_HPP
MFEM_DEVICE double atomicAdd(double *add, double val)
Definition: backends.hpp:68
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition: backends.hpp:84
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition: vector.cpp:317