MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
backends.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#ifndef MFEM_BACKENDS_HPP
13#define MFEM_BACKENDS_HPP
14
15#include "../config/config.hpp"
16
17#ifdef MFEM_USE_CUDA
18#include <cusparse.h>
19#include <library_types.h>
20#include <cuda_runtime.h>
21#include <cuda.h>
22#endif
23#include "cuda.hpp"
24
25#ifdef MFEM_USE_HIP
26#include <hip/hip_runtime.h>
27#endif
28#include "hip.hpp"
29
30#ifdef MFEM_USE_OCCA
31#include "occa.hpp"
32#endif
33
34#ifdef MFEM_USE_RAJA
35// The following two definitions suppress CUB and THRUST deprecation warnings
36// about requiring c++14 with c++11 deprecated but still supported (to be
37// removed in a future release).
38#define CUB_IGNORE_DEPRECATED_CPP_DIALECT
39#define THRUST_IGNORE_DEPRECATED_CPP_DIALECT
40#include "RAJA/RAJA.hpp"
41#if defined(RAJA_ENABLE_CUDA) && !defined(MFEM_USE_CUDA)
42#error When RAJA is built with CUDA, MFEM_USE_CUDA=YES is required
43#endif
44#endif
45
46#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
47#define MFEM_DEVICE
48#define MFEM_LAMBDA
49// #define MFEM_HOST_DEVICE // defined in config/config.hpp
50// MFEM_DEVICE_SYNC is made available for debugging purposes
51#define MFEM_DEVICE_SYNC
52// MFEM_STREAM_SYNC is used for UVM and MPI GPU-Aware kernels
53#define MFEM_STREAM_SYNC
54#endif
55
56#if !((defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)) || \
57 (defined(MFEM_USE_HIP) && defined(__HIP_DEVICE_COMPILE__)))
58#define MFEM_SHARED
59#define MFEM_SYNC_THREAD
60#define MFEM_BLOCK_ID(k) 0
61#define MFEM_THREAD_ID(k) 0
62#define MFEM_THREAD_SIZE(k) 1
63#define MFEM_FOREACH_THREAD(i,k,N) for(int i=0; i<N; i++)
64#endif
65
66// 'double' atomicAdd implementation for previous versions of CUDA
67#if defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
68MFEM_DEVICE inline real_t atomicAdd(real_t *add, real_t val)
69{
70 unsigned long long int *ptr = (unsigned long long int *) add;
71 unsigned long long int old = *ptr, reg;
72 do
73 {
74 reg = old;
75 old = atomicCAS(ptr, reg,
76#ifdef MFEM_USE_SINGLE
77 __float_as_int(val + __int_as_float(reg)));
78#else
79 __double_as_longlong(val + __longlong_as_double(reg)));
80#endif
81 }
82 while (reg != old);
83#ifdef MFEM_USE_SINGLE
84 return __int_as_float(old);
85#else
86 return __longlong_as_double(old);
87#endif
88}
89#endif
90
91template <typename T>
92MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
93{
94#if ((defined(MFEM_USE_CUDA) && defined(__CUDA_ARCH__)) || \
95 (defined(MFEM_USE_HIP) && defined(__HIP_DEVICE_COMPILE__)))
96 return atomicAdd(&add,val);
97#else
98 T old = add;
99#ifdef MFEM_USE_OPENMP
100 #pragma omp atomic
101#endif
102 add += val;
103 return old;
104#endif
105}
106
107#endif // MFEM_BACKENDS_HPP
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition backends.hpp:92
MFEM_DEVICE real_t atomicAdd(real_t *add, real_t val)
Definition backends.hpp:68