MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
mem_manager.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "mem_manager.hpp"
14 
15 #include <list>
16 #include <cstring> // std::memcpy, std::memcmp
17 #include <unordered_map>
18 #include <algorithm> // std::max
19 
20 // Uncomment to try _WIN32 platform
21 //#define _WIN32
22 //#define _aligned_malloc(s,a) malloc(s)
23 
24 #ifndef _WIN32
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/mman.h>
28 #define mfem_memalign(p,a,s) posix_memalign(p,a,s)
29 #define mfem_aligned_free free
30 #else
31 #define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
32 #define mfem_aligned_free _aligned_free
33 #endif
34 
35 #ifdef MFEM_USE_UMPIRE
36 #include "umpire/Umpire.hpp"
37 
38 // Make sure Umpire is build with CUDA support if MFEM is built with it.
39 #if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
40 #error "CUDA is not enabled in Umpire!"
41 #endif
42 // Make sure Umpire is build with HIP support if MFEM is built with it.
43 #if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
44 #error "HIP is not enabled in Umpire!"
45 #endif
46 #endif // MFEM_USE_UMPIRE
47 
48 namespace mfem
49 {
50 
52 {
53  switch (mc)
54  {
55  case MemoryClass::HOST: return mm.GetHostMemoryType();
60  }
61  MFEM_VERIFY(false,"");
62  return MemoryType::HOST;
63 }
64 
65 // We want to keep this pairs, as it is checked in MFEM_VERIFY_TYPES
66 MemoryType MemoryManager::GetDualMemoryType_(MemoryType mt)
67 {
68  switch (mt)
69  {
79  default: mfem_error("Unknown memory type!");
80  }
81  MFEM_VERIFY(false,"");
82  return MemoryType::HOST;
83 }
84 
85 static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
86 {
87  MFEM_ASSERT(IsHostMemory(h_mt),"");
88  MFEM_ASSERT(IsDeviceMemory(d_mt),"");
89  const bool sync =
91  (h_mt == MemoryType::HOST_DEBUG && d_mt == MemoryType::DEVICE_DEBUG) ||
92  (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
93  (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
94  (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
95  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE);
96  MFEM_VERIFY(sync, "");
97 }
98 
100 {
101  // | HOST HOST_32 HOST_64 DEVICE MANAGED
102  // ---------+---------------------------------------------
103  // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
104  // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
105  // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
106  // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
107  // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
108 
109  // Using the enumeration ordering:
110  // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
111  // the above table is simply: a*b = max(a,b).
112 
113  return std::max(mc1, mc2);
114 }
115 
116 
117 // Instantiate Memory<T>::PrintFlags for T = int and T = double.
118 template void Memory<int>::PrintFlags() const;
119 template void Memory<double>::PrintFlags() const;
120 
121 // Instantiate Memory<T>::CompareHostAndDevice for T = int and T = double.
122 template int Memory<int>::CompareHostAndDevice(int size) const;
123 template int Memory<double>::CompareHostAndDevice(int size) const;
124 
125 
126 namespace internal
127 {
128 
129 /// Memory class that holds:
130 /// - the host and the device pointer
131 /// - the size in bytes of this memory region
132 /// - the host and device type of this memory region
133 struct Memory
134 {
135  void *const h_ptr;
136  void *d_ptr;
137  const size_t bytes;
138  const MemoryType h_mt, d_mt;
139  mutable bool h_rw, d_rw;
140  Memory(void *p, size_t b, MemoryType h, MemoryType d):
141  h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
142  h_rw(true), d_rw(true) { }
143 };
144 
145 /// Alias class that holds the base memory region and the offset
146 struct Alias
147 {
148  Memory *const mem;
149  const size_t offset, bytes;
150  size_t counter;
151  const MemoryType h_mt;
152 };
153 
154 /// Maps for the Memory and the Alias classes
155 typedef std::unordered_map<const void*, Memory> MemoryMap;
156 typedef std::unordered_map<const void*, Alias> AliasMap;
157 
158 struct Maps
159 {
160  MemoryMap memories;
161  AliasMap aliases;
162 };
163 
164 } // namespace mfem::internal
165 
166 static internal::Maps *maps;
167 
168 namespace internal
169 {
170 
171 /// The host memory space base abstract class
172 class HostMemorySpace
173 {
174 public:
175  virtual ~HostMemorySpace() { }
176  virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
177  virtual void Dealloc(void *ptr) { std::free(ptr); }
178  virtual void Protect(const Memory&, size_t) { }
179  virtual void Unprotect(const Memory&, size_t) { }
180  virtual void AliasProtect(const void*, size_t) { }
181  virtual void AliasUnprotect(const void*, size_t) { }
182 };
183 
184 /// The device memory space base abstract class
185 class DeviceMemorySpace
186 {
187 public:
188  virtual ~DeviceMemorySpace() { }
189  virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
190  virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
191  virtual void Protect(const Memory&) { }
192  virtual void Unprotect(const Memory&) { }
193  virtual void AliasProtect(const void*, size_t) { }
194  virtual void AliasUnprotect(const void*, size_t) { }
195  virtual void *HtoD(void *dst, const void *src, size_t bytes)
196  { return std::memcpy(dst, src, bytes); }
197  virtual void *DtoD(void *dst, const void *src, size_t bytes)
198  { return std::memcpy(dst, src, bytes); }
199  virtual void *DtoH(void *dst, const void *src, size_t bytes)
200  { return std::memcpy(dst, src, bytes); }
201 };
202 
203 /// The default std:: host memory space
204 class StdHostMemorySpace : public HostMemorySpace { };
205 
206 /// The No host memory space
207 struct NoHostMemorySpace : public HostMemorySpace
208 {
209  void Alloc(void**, const size_t) { mfem_error("! Host Alloc error"); }
210 };
211 
212 /// The aligned 32 host memory space
213 class Aligned32HostMemorySpace : public HostMemorySpace
214 {
215 public:
216  Aligned32HostMemorySpace(): HostMemorySpace() { }
217  void Alloc(void **ptr, size_t bytes)
218  { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
219  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
220 };
221 
222 /// The aligned 64 host memory space
223 class Aligned64HostMemorySpace : public HostMemorySpace
224 {
225 public:
226  Aligned64HostMemorySpace(): HostMemorySpace() { }
227  void Alloc(void **ptr, size_t bytes)
228  { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
229  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
230 };
231 
232 #ifndef _WIN32
233 static uintptr_t pagesize = 0;
234 static uintptr_t pagemask = 0;
235 
236 /// Returns the restricted base address of the DEBUG segment
237 inline const void *MmuAddrR(const void *ptr)
238 {
239  const uintptr_t addr = (uintptr_t) ptr;
240  return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
241 }
242 
243 /// Returns the prolongated base address of the MMU segment
244 inline const void *MmuAddrP(const void *ptr)
245 {
246  const uintptr_t addr = (uintptr_t) ptr;
247  return (void*) (addr & ~pagemask);
248 }
249 
250 /// Compute the restricted length for the MMU segment
251 inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
252 {
253  // a ---->A:| |:B<---- b
254  const uintptr_t a = (uintptr_t) ptr;
255  const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
256  MFEM_ASSERT(a <= A, "");
257  const uintptr_t b = a + bytes;
258  const uintptr_t B = b & ~pagemask;
259  MFEM_ASSERT(B <= b, "");
260  const uintptr_t length = B > A ? B - A : 0;
261  MFEM_ASSERT(length % pagesize == 0,"");
262  return length;
263 }
264 
265 /// Compute the prolongated length for the MMU segment
266 inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
267 {
268  // |:A<----a | | b---->B:|
269  const uintptr_t a = (uintptr_t) ptr;
270  const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
271  MFEM_ASSERT(A <= a, "");
272  const uintptr_t b = a + bytes;
273  const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
274  MFEM_ASSERT(b <= B, "");
275  MFEM_ASSERT(B >= A,"");
276  const uintptr_t length = B - A;
277  MFEM_ASSERT(length % pagesize == 0,"");
278  return length;
279 }
280 
281 /// The protected access error, used for the host
282 static void MmuError(int, siginfo_t *si, void*)
283 {
284  fflush(0);
285  char str[64];
286  const void *ptr = si->si_addr;
287  sprintf(str, "Error while accessing address %p!", ptr);
288  mfem::out << std::endl << "An illegal memory access was made!";
289  MFEM_ABORT(str);
290 }
291 
292 /// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
293 static void MmuInit()
294 {
295  if (pagesize > 0) { return; }
296  struct sigaction sa;
297  sa.sa_flags = SA_SIGINFO;
298  sigemptyset(&sa.sa_mask);
299  sa.sa_sigaction = MmuError;
300  if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
301  if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
302  pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
303  MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
304  pagemask = pagesize - 1;
305 }
306 
307 /// MMU allocation, through ::mmap
308 inline void MmuAlloc(void **ptr, const size_t bytes)
309 {
310  const size_t length = bytes == 0 ? 8 : bytes;
311  const int prot = PROT_READ | PROT_WRITE;
312  const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
313  *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
314  if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
315 }
316 
317 /// MMU deallocation, through ::munmap
318 inline void MmuDealloc(void *ptr, const size_t bytes)
319 {
320  const size_t length = bytes == 0 ? 8 : bytes;
321  if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
322 }
323 
324 /// MMU protection, through ::mprotect with no read/write accesses
325 inline void MmuProtect(const void *ptr, const size_t bytes)
326 {
327  if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
328  mfem_error("MMU protection (NONE) error");
329 }
330 
331 /// MMU un-protection, through ::mprotect with read/write accesses
332 inline void MmuAllow(const void *ptr, const size_t bytes)
333 {
334  const int RW = PROT_READ | PROT_WRITE;
335  if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
336  mfem_error("MMU protection (R/W) error");
337 }
338 #else
339 inline void MmuInit() { }
340 inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
341 inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
342 inline void MmuProtect(const void*, const size_t) { }
343 inline void MmuAllow(const void*, const size_t) { }
344 inline const void *MmuAddrR(const void *a) { return a; }
345 inline const void *MmuAddrP(const void *a) { return a; }
346 inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
347 inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
348 #endif
349 
350 /// The MMU host memory space
351 class MmuHostMemorySpace : public HostMemorySpace
352 {
353 public:
354  MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
355  void Alloc(void **ptr, size_t bytes) { MmuAlloc(ptr, bytes); }
356  void Dealloc(void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
357  void Protect(const Memory& mem, size_t bytes)
358  { if (mem.h_rw) { mem.h_rw = false; MmuProtect(mem.h_ptr, bytes); } }
359  void Unprotect(const Memory &mem, size_t bytes)
360  { if (!mem.h_rw) { mem.h_rw = true; MmuAllow(mem.h_ptr, bytes); } }
361  /// Aliases need to be restricted during protection
362  void AliasProtect(const void *ptr, size_t bytes)
363  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
364  /// Aliases need to be prolongated for un-protection
365  void AliasUnprotect(const void *ptr, size_t bytes)
366  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
367 };
368 
369 /// The UVM host memory space
370 class UvmHostMemorySpace : public HostMemorySpace
371 {
372 public:
373  UvmHostMemorySpace(): HostMemorySpace() { }
374  void Alloc(void **ptr, size_t bytes) { CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
375  void Dealloc(void *ptr) { CuMemFree(ptr); }
376 };
377 
378 /// The 'No' device memory space
379 class NoDeviceMemorySpace: public DeviceMemorySpace
380 {
381 public:
382  void Alloc(internal::Memory&) { mfem_error("! Device Alloc"); }
383  void Dealloc(Memory&) { mfem_error("! Device Dealloc"); }
384  void *HtoD(void*, const void*, size_t) { mfem_error("!HtoD"); return nullptr; }
385  void *DtoD(void*, const void*, size_t) { mfem_error("!DtoD"); return nullptr; }
386  void *DtoH(void*, const void*, size_t) { mfem_error("!DtoH"); return nullptr; }
387 };
388 
389 /// The std:: device memory space, used with the 'debug' device
390 class StdDeviceMemorySpace : public DeviceMemorySpace { };
391 
392 /// The CUDA device memory space
393 class CudaDeviceMemorySpace: public DeviceMemorySpace
394 {
395 public:
396  CudaDeviceMemorySpace(): DeviceMemorySpace() { }
397  void Alloc(Memory &base) { CuMemAlloc(&base.d_ptr, base.bytes); }
398  void Dealloc(Memory &base) { CuMemFree(base.d_ptr); }
399  void *HtoD(void *dst, const void *src, size_t bytes)
400  { return CuMemcpyHtoD(dst, src, bytes); }
401  void *DtoD(void* dst, const void* src, size_t bytes)
402  { return CuMemcpyDtoD(dst, src, bytes); }
403  void *DtoH(void *dst, const void *src, size_t bytes)
404  { return CuMemcpyDtoH(dst, src, bytes); }
405 };
406 
407 /// The HIP device memory space
408 class HipDeviceMemorySpace: public DeviceMemorySpace
409 {
410 public:
411  HipDeviceMemorySpace(): DeviceMemorySpace() { }
412  void Alloc(Memory &base) { HipMemAlloc(&base.d_ptr, base.bytes); }
413  void Dealloc(Memory &base) { HipMemFree(base.d_ptr); }
414  void *HtoD(void *dst, const void *src, size_t bytes)
415  { return HipMemcpyHtoD(dst, src, bytes); }
416  void *DtoD(void* dst, const void* src, size_t bytes)
417  { return HipMemcpyDtoD(dst, src, bytes); }
418  void *DtoH(void *dst, const void *src, size_t bytes)
419  { return HipMemcpyDtoH(dst, src, bytes); }
420 };
421 
422 /// The UVM device memory space.
423 class UvmCudaMemorySpace : public DeviceMemorySpace
424 {
425 public:
426  void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
427  void Dealloc(Memory&) { }
428  void *HtoD(void *dst, const void *src, size_t bytes)
429  {
430  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
431  return CuMemcpyHtoD(dst, src, bytes);
432  }
433  void *DtoD(void* dst, const void* src, size_t bytes)
434  { return CuMemcpyDtoD(dst, src, bytes); }
435  void *DtoH(void *dst, const void *src, size_t bytes)
436  {
437  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
438  return CuMemcpyDtoH(dst, src, bytes);
439  }
440 };
441 
442 /// The MMU device memory space
443 class MmuDeviceMemorySpace : public DeviceMemorySpace
444 {
445 public:
446  MmuDeviceMemorySpace(): DeviceMemorySpace() { }
447  void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
448  void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
449  void Protect(const Memory &m)
450  { if (m.d_rw) { m.d_rw = false; MmuProtect(m.d_ptr, m.bytes); } }
451  void Unprotect(const Memory &m)
452  { if (!m.d_rw) { m.d_rw = true; MmuAllow(m.d_ptr, m.bytes); } }
453  /// Aliases need to be restricted during protection
454  void AliasProtect(const void *ptr, size_t bytes)
455  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
456  /// Aliases need to be prolongated for un-protection
457  void AliasUnprotect(const void *ptr, size_t bytes)
458  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
459  void *HtoD(void *dst, const void *src, size_t bytes)
460  { return std::memcpy(dst, src, bytes); }
461  void *DtoD(void *dst, const void *src, size_t bytes)
462  { return std::memcpy(dst, src, bytes); }
463  void *DtoH(void *dst, const void *src, size_t bytes)
464  { return std::memcpy(dst, src, bytes); }
465 };
466 
467 #ifndef MFEM_USE_UMPIRE
468 class UmpireHostMemorySpace : public NoHostMemorySpace { };
469 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace { };
470 #else
471 /// The Umpire host memory space
472 class UmpireHostMemorySpace : public HostMemorySpace
473 {
474 private:
475  const char *name;
476  umpire::ResourceManager &rm;
477  umpire::Allocator h_allocator;
478  umpire::strategy::AllocationStrategy *strat;
479 public:
480  ~UmpireHostMemorySpace() { h_allocator.release(); }
481  UmpireHostMemorySpace():
482  HostMemorySpace(),
483  name(mm.GetUmpireAllocatorHostName()),
484  rm(umpire::ResourceManager::getInstance()),
485  h_allocator(rm.isAllocator(name)? rm.getAllocator(name):
486  rm.makeAllocator<umpire::strategy::DynamicPool>
487  (name, rm.getAllocator("HOST"))),
488  strat(h_allocator.getAllocationStrategy()) { }
489  void Alloc(void **ptr, size_t bytes) { *ptr = h_allocator.allocate(bytes); }
490  void Dealloc(void *ptr) { h_allocator.deallocate(ptr); }
491  void Insert(void *ptr, size_t bytes)
492  { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
493 };
494 
495 /// The Umpire device memory space
496 #ifdef MFEM_USE_CUDA
497 class UmpireDeviceMemorySpace : public DeviceMemorySpace
498 {
499 private:
500  const char *name;
501  umpire::ResourceManager &rm;
502  umpire::Allocator d_allocator;
503 public:
504  ~UmpireDeviceMemorySpace() { d_allocator.release(); }
505  UmpireDeviceMemorySpace():
506  DeviceMemorySpace(),
507  name(mm.GetUmpireAllocatorDeviceName()),
508  rm(umpire::ResourceManager::getInstance()),
509  d_allocator(rm.isAllocator(name)? rm.getAllocator(name):
510  rm.makeAllocator<umpire::strategy::DynamicPool>
511  (name, rm.getAllocator("DEVICE"))) { }
512  void Alloc(Memory &base) { base.d_ptr = d_allocator.allocate(base.bytes); }
513  void Dealloc(Memory &base) { d_allocator.deallocate(base.d_ptr); }
514  void *HtoD(void *dst, const void *src, size_t bytes)
515  {
516 #ifdef MFEM_USE_CUDA
517  return CuMemcpyHtoD(dst, src, bytes);
518 #endif
519 #ifdef MFEM_USE_HIP
520  return HipMemcpyHtoD(dst, src, bytes);
521 #endif
522  //rm.copy(dst, const_cast<void*>(src), bytes); return dst;
523  }
524  void *DtoD(void* dst, const void* src, size_t bytes)
525  {
526 #ifdef MFEM_USE_CUDA
527  return CuMemcpyDtoD(dst, src, bytes);
528 #endif
529 #ifdef MFEM_USE_HIP
530  return HipMemcpyDtoD(dst, src, bytes);
531 #endif
532  //rm.copy(dst, const_cast<void*>(src), bytes); return dst;
533  }
534  void *DtoH(void *dst, const void *src, size_t bytes)
535  {
536 #ifdef MFEM_USE_CUDA
537  return CuMemcpyDtoH(dst, src, bytes);
538 #endif
539 #ifdef MFEM_USE_HIP
540  return HipMemcpyDtoH(dst, src, bytes);
541 #endif
542  //rm.copy(dst, const_cast<void*>(src), bytes); return dst;
543  }
544 };
545 #else
546 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace { };
547 #endif // MFEM_USE_CUDA
548 #endif // MFEM_USE_UMPIRE
549 
550 /// Memory space controller class
551 class Ctrl
552 {
553  typedef MemoryType MT;
554 
555 public:
556  HostMemorySpace *host[HostMemoryTypeSize];
557  DeviceMemorySpace *device[DeviceMemoryTypeSize];
558 
559 public:
560  Ctrl(): host{nullptr}, device{nullptr} { }
561 
562  void Configure()
563  {
564  if (host[HostMemoryType])
565  {
566  mfem_error("Memory backends have already been configured!");
567  }
568 
569  // Filling the host memory backends
570  // HOST, HOST_32 & HOST_64 are always ready
571  // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
572  host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
573  host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
574  host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
575  // HOST_DEBUG is delayed, as it reroutes signals
576  host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
577  host[static_cast<int>(MT::HOST_UMPIRE)] = new UmpireHostMemorySpace();
578  host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
579 
580  // Filling the device memory backends, shifting with the device size
581  constexpr int shift = DeviceMemoryType;
582  device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
583  // All other devices controllers are delayed
584  device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
585  device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
586  device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
587  }
588 
589  HostMemorySpace* Host(const MemoryType mt)
590  {
591  const int mt_i = static_cast<int>(mt);
592  // Delayed host controllers initialization
593  if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
594  MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
595  return host[mt_i];
596  }
597 
598  DeviceMemorySpace* Device(const MemoryType mt)
599  {
600  const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
601  MFEM_ASSERT(mt_i >= 0,"");
602  // Lazy device controller initializations
603  if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
604  MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
605  return device[mt_i];
606  }
607 
608  ~Ctrl()
609  {
610  constexpr int mt_h = HostMemoryType;
611  constexpr int mt_d = DeviceMemoryType;
612  for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
613  for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
614  }
615 
616 private:
617  HostMemorySpace* NewHostCtrl(const MemoryType mt)
618  {
619  if (mt == MT::HOST_DEBUG) { return new MmuHostMemorySpace(); }
620  MFEM_ABORT("Unknown host memory controller!");
621  return nullptr;
622  }
623 
624  DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
625  {
626  switch (mt)
627  {
628  case MT::DEVICE_UMPIRE: return new UmpireDeviceMemorySpace();
629  case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
630  case MT::DEVICE:
631  {
632 #if defined(MFEM_USE_CUDA)
633  return new CudaDeviceMemorySpace();
634 #elif defined(MFEM_USE_HIP)
635  return new HipDeviceMemorySpace();
636 #else
637  MFEM_ABORT("No device memory controller!");
638  break;
639 #endif
640  }
641  default: MFEM_ABORT("Unknown device memory controller!");
642  }
643  return nullptr;
644  }
645 };
646 
647 } // namespace mfem::internal
648 
649 static internal::Ctrl *ctrl;
650 
651 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
652  unsigned &flags)
653 {
654  MFEM_ASSERT(exists, "Internal error!");
655  MFEM_ASSERT(mt != MemoryType::HOST, "Internal error!");
656  const bool is_host_mem = IsHostMemory(mt);
657  const MemType dual_mt = GetDualMemoryType_(mt);
658  const MemType h_mt = is_host_mem ? mt : dual_mt;
659  const MemType d_mt = is_host_mem ? dual_mt : mt;
660  MFEM_VERIFY_TYPES(h_mt, d_mt);
661  void *h_ptr = h_tmp;
662  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
663  flags = Mem::REGISTERED;
665  flags |= is_host_mem ? Mem::VALID_HOST : Mem::VALID_DEVICE;
666  if (is_host_mem) { mm.Insert(h_ptr, bytes, h_mt, d_mt); }
667  else { mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); }
668  CheckHostMemoryType_(h_mt, h_ptr);
669  return h_ptr;
670 }
671 
672 void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
673  MemoryType mt,
674  bool own, bool alias, unsigned &flags)
675 {
676  MFEM_CONTRACT_VAR(alias);
677  MFEM_ASSERT(exists, "Internal error!");
678  MFEM_ASSERT(!alias, "Cannot register an alias!");
679  const bool is_host_mem = IsHostMemory(mt);
680  const MemType dual_mt = GetDualMemoryType_(mt);
681  const MemType h_mt = is_host_mem ? mt : dual_mt;
682  const MemType d_mt = is_host_mem ? dual_mt : mt;
683  MFEM_VERIFY_TYPES(h_mt, d_mt);
684 
685  if (ptr == nullptr && h_tmp == nullptr)
686  {
687  MFEM_VERIFY(bytes == 0, "internal error");
688  return nullptr;
689  }
690 
692  void *h_ptr;
693 
694  if (is_host_mem) // HOST TYPES + MANAGED
695  {
696  h_ptr = ptr;
697  mm.Insert(h_ptr, bytes, h_mt, d_mt);
698  flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
700  }
701  else // DEVICE TYPES
702  {
703  h_ptr = h_tmp;
704  if (own && h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
705  mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
706  flags = own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE;
707  flags = own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST;
708  flags |= Mem::VALID_DEVICE;
709  }
710  CheckHostMemoryType_(h_mt, h_ptr);
711  return h_ptr;
712 }
713 
714 void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
715  unsigned base_flags, unsigned &flags)
716 {
717  mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
718  base_flags & Mem::ALIAS);
719  flags = (base_flags | Mem::ALIAS | Mem::OWNS_INTERNAL) &
721 }
722 
723 MemoryType MemoryManager::Delete_(void *h_ptr, MemoryType mt, unsigned flags)
724 {
725  const bool alias = flags & Mem::ALIAS;
726  const bool registered = flags & Mem::REGISTERED;
727  const bool owns_host = flags & Mem::OWNS_HOST;
728  const bool owns_device = flags & Mem::OWNS_DEVICE;
729  const bool owns_internal = flags & Mem::OWNS_INTERNAL;
730  MFEM_ASSERT(registered || IsHostMemory(mt),"");
731  MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
732  if (!mm.exists || !registered) { return mt; }
733  if (alias)
734  {
735  if (owns_internal)
736  {
737  const MemoryType h_mt = maps->aliases.at(h_ptr).h_mt;
738  MFEM_ASSERT(mt == h_mt,"");
739  mm.EraseAlias(h_ptr);
740  return h_mt;
741  }
742  }
743  else // Known
744  {
745  const MemoryType h_mt = mt;
746  MFEM_ASSERT(!owns_internal ||
747  mt == maps->memories.at(h_ptr).h_mt,"");
748  if (owns_host && (h_mt != MemoryType::HOST))
749  { ctrl->Host(h_mt)->Dealloc(h_ptr); }
750  if (owns_internal) { mm.Erase(h_ptr, owns_device); }
751  return h_mt;
752  }
753  return mt;
754 }
755 
756 bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
757  MemoryType h_mt, size_t bytes,
758  unsigned flags)
759 {
760  if (!h_ptr)
761  {
762  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
763  return true;
764  }
765 
766  const bool known = mm.IsKnown(h_ptr);
767  const bool alias = mm.IsAlias(h_ptr);
768  const bool check = known || ((flags & Mem::ALIAS) && alias);
769  MFEM_VERIFY(check,"");
770  const internal::Memory &mem =
771  (flags & Mem::ALIAS) ?
772  *maps->aliases.at(h_ptr).mem : maps->memories.at(h_ptr);
773  const MemoryType &d_mt = mem.d_mt;
774  switch (mc)
775  {
777  {
778  MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
779  h_mt == MemoryType::HOST_64,"");
780  return true;
781  }
783  {
784  MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
785  return true;
786  }
787  case MemoryClass::DEVICE:
788  {
789  MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
790  d_mt == MemoryType::DEVICE_DEBUG ||
791  d_mt == MemoryType::DEVICE_UMPIRE ||
792  d_mt == MemoryType::MANAGED,"");
793  return true;
794  }
796  {
797  MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
798  d_mt == MemoryType::MANAGED),"");
799  return true;
800  }
801  default: break;
802  }
803  return true;
804 }
805 
806 void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
807  size_t bytes, unsigned &flags)
808 {
809  MemoryManager::CheckHostMemoryType_(h_mt, h_ptr);
810  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
811  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
813  {
814  const bool copy = !(flags & Mem::VALID_HOST);
815  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
816  if (flags & Mem::ALIAS)
817  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
818  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
819  }
820  else
821  {
822  const bool copy = !(flags & Mem::VALID_DEVICE);
823  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
824  if (flags & Mem::ALIAS)
825  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
826  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
827  }
828 }
829 
830 const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
831  size_t bytes, unsigned &flags)
832 {
833  CheckHostMemoryType_(h_mt, h_ptr);
834  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
835  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
837  {
838  const bool copy = !(flags & Mem::VALID_HOST);
839  flags |= Mem::VALID_HOST;
840  if (flags & Mem::ALIAS)
841  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
842  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
843  }
844  else
845  {
846  const bool copy = !(flags & Mem::VALID_DEVICE);
847  flags |= Mem::VALID_DEVICE;
848  if (flags & Mem::ALIAS)
849  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
850  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
851  }
852 }
853 
854 void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
855  size_t bytes, unsigned &flags)
856 {
857  CheckHostMemoryType_(h_mt, h_ptr);
858  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
859  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
861  {
862  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
863  if (flags & Mem::ALIAS)
864  { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
865  else { return mm.GetHostPtr(h_ptr, bytes, false); }
866  }
867  else
868  {
869  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
870  if (flags & Mem::ALIAS)
871  { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
872  else { return mm.GetDevicePtr(h_ptr, bytes, false); }
873 
874  }
875 }
876 
877 void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
878  size_t alias_bytes, unsigned base_flags,
879  unsigned &alias_flags)
880 {
881  // This is called only when (base_flags & Mem::REGISTERED) is true.
882  // Note that (alias_flags & REGISTERED) may not be true.
883  MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
884  if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
885  {
886  mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
887  }
888  if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
889  {
890  if (!(alias_flags & Mem::REGISTERED))
891  {
892  mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
893  alias_flags = (alias_flags | Mem::REGISTERED | Mem::OWNS_INTERNAL) &
894  ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
895  }
896  mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
897  }
898  alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
899  (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
900 }
901 
902 MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr)
903 {
904  if (mm.exists)
905  {
906  const bool known = mm.IsKnown(h_ptr);
907  if (known)
908  {
909  internal::Memory &mem = maps->memories.at(h_ptr);
910  return mem.d_mt;
911  }
912  const bool alias = mm.IsAlias(h_ptr);
913  if (alias)
914  {
915  internal::Memory *mem = maps->aliases.at(h_ptr).mem;
916  return mem->d_mt;
917  }
918  }
919  MFEM_ABORT("internal error");
920  return MemoryManager::host_mem_type;
921 }
922 
923 MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
924 {
925  if (!mm.exists) { return MemoryManager::host_mem_type; }
926  if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
927  if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).mem->h_mt; }
928  return MemoryManager::host_mem_type;
929 }
930 
931 void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
932  size_t bytes, unsigned src_flags,
933  unsigned &dst_flags)
934 {
935  // Type of copy to use based on the src and dest validity flags:
936  // | src
937  // | h | d | hd
938  // -----------+-----+-----+------
939  // h | h2h d2h h2h
940  // dest d | h2d d2d d2d
941  // hd | h2h d2d d2d
942 
943  const bool dst_on_host =
944  (dst_flags & Mem::VALID_HOST) &&
945  (!(dst_flags & Mem::VALID_DEVICE) ||
946  ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
947 
948  dst_flags = dst_flags &
949  ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
950 
951  const bool src_on_host =
952  (src_flags & Mem::VALID_HOST) &&
953  (!(src_flags & Mem::VALID_DEVICE) ||
954  ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
955 
956  const void *src_d_ptr =
957  src_on_host ? NULL :
958  ((src_flags & Mem::ALIAS) ?
959  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
960  mm.GetDevicePtr(src_h_ptr, bytes, false));
961 
962  if (dst_on_host)
963  {
964  if (src_on_host)
965  {
966  if (dst_h_ptr != src_h_ptr && bytes != 0)
967  {
968  MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
969  (const char*)src_h_ptr + bytes <= dst_h_ptr,
970  "data overlaps!");
971  std::memcpy(dst_h_ptr, src_h_ptr, bytes);
972  }
973  }
974  else
975  {
976  if (dst_h_ptr != src_d_ptr && bytes != 0)
977  {
978  internal::Memory &src_d_base = maps->memories.at(src_d_ptr);
979  MemoryType src_d_mt = src_d_base.d_mt;
980  ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
981  }
982  }
983  }
984  else
985  {
986  void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
987  mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
988  mm.GetDevicePtr(dst_h_ptr, bytes, false);
989  if (src_on_host)
990  {
991  const bool known = mm.IsKnown(dst_h_ptr);
992  const bool alias = dst_flags & Mem::ALIAS;
993  MFEM_VERIFY(alias||known,"");
994  const MemoryType d_mt = known ?
995  maps->memories.at(dst_h_ptr).d_mt :
996  maps->aliases.at(dst_h_ptr).mem->d_mt;
997  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
998  }
999  else
1000  {
1001  if (dest_d_ptr != src_d_ptr && bytes != 0)
1002  {
1003  const bool known = mm.IsKnown(dst_h_ptr);
1004  const bool alias = dst_flags & Mem::ALIAS;
1005  MFEM_VERIFY(alias||known,"");
1006  const MemoryType d_mt = known ?
1007  maps->memories.at(dst_h_ptr).d_mt :
1008  maps->aliases.at(dst_h_ptr).mem->d_mt;
1009  ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1010  }
1011  }
1012  }
1013 }
1014 
1015 void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1016  size_t bytes, unsigned src_flags)
1017 {
1018  const bool src_on_host = src_flags & Mem::VALID_HOST;
1019  if (src_on_host)
1020  {
1021  if (dest_h_ptr != src_h_ptr && bytes != 0)
1022  {
1023  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1024  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1025  "data overlaps!");
1026  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1027  }
1028  }
1029  else
1030  {
1031  MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1032  const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1033  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1034  mm.GetDevicePtr(src_h_ptr, bytes, false);
1035  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1036  const MemoryType d_mt = base.d_mt;
1037  ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1038  }
1039 }
1040 
1041 void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1042  size_t bytes, unsigned &dest_flags)
1043 {
1044  const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1045  if (dest_on_host)
1046  {
1047  if (dest_h_ptr != src_h_ptr && bytes != 0)
1048  {
1049  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1050  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1051  "data overlaps!");
1052  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1053  }
1054  }
1055  else
1056  {
1057  void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1058  mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1059  mm.GetDevicePtr(dest_h_ptr, bytes, false);
1060  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1061  const MemoryType d_mt = base.d_mt;
1062  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1063  }
1064  dest_flags = dest_flags &
1065  ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1066 }
1067 
1068 bool MemoryManager::IsKnown_(const void *h_ptr)
1069 {
1070  return maps->memories.find(h_ptr) != maps->memories.end();
1071 }
1072 
1073 bool MemoryManager::IsAlias_(const void *h_ptr)
1074 {
1075  return maps->aliases.find(h_ptr) != maps->aliases.end();
1076 }
1077 
1078 void MemoryManager::Insert(void *h_ptr, size_t bytes,
1079  MemoryType h_mt, MemoryType d_mt)
1080 {
1081  if (h_ptr == NULL)
1082  {
1083  MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1084  return;
1085  }
1086  MFEM_VERIFY_TYPES(h_mt, d_mt);
1087 #ifdef MFEM_DEBUG
1088  auto res =
1089 #endif
1090  maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1091 #ifdef MFEM_DEBUG
1092  if (res.second == false)
1093  {
1094  auto &m = res.first->second;
1095  MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt && m.d_mt == d_mt,
1096  "Address already present with different attributes!");
1097  }
1098 #endif
1099 }
1100 
1101 void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1102  MemoryType h_mt, MemoryType d_mt)
1103 {
1104  MFEM_VERIFY_TYPES(h_mt, d_mt);
1105  MFEM_ASSERT(h_ptr != NULL, "internal error");
1106  Insert(h_ptr, bytes, h_mt, d_mt);
1107  internal::Memory &mem = maps->memories.at(h_ptr);
1108  if (d_ptr == NULL) { ctrl->Device(d_mt)->Alloc(mem); }
1109  else { mem.d_ptr = d_ptr; }
1110 }
1111 
1112 void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1113  const size_t bytes, const bool base_is_alias)
1114 {
1115  size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1116  static_cast<const char*>(base_ptr));
1117  if (!base_ptr)
1118  {
1119  MFEM_VERIFY(offset == 0,
1120  "Trying to add alias to NULL at offset " << offset);
1121  return;
1122  }
1123  if (base_is_alias)
1124  {
1125  const internal::Alias &alias = maps->aliases.at(base_ptr);
1126  MFEM_ASSERT(alias.mem,"");
1127  base_ptr = alias.mem->h_ptr;
1128  offset += alias.offset;
1129  }
1130  internal::Memory &mem = maps->memories.at(base_ptr);
1131  auto res =
1132  maps->aliases.emplace(alias_ptr,
1133  internal::Alias{&mem, offset, bytes, 1, mem.h_mt});
1134  if (res.second == false) // alias_ptr was already in the map
1135  {
1136  if (res.first->second.mem != &mem || res.first->second.offset != offset)
1137  {
1138  mfem_error("alias already exists with different base/offset!");
1139  }
1140  else
1141  {
1142  res.first->second.counter++;
1143  }
1144  }
1145 }
1146 
1147 void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1148 {
1149  if (!h_ptr) { return; }
1150  auto mem_map_iter = maps->memories.find(h_ptr);
1151  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1152  internal::Memory &mem = mem_map_iter->second;
1153  if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1154  maps->memories.erase(mem_map_iter);
1155 }
1156 
1157 void MemoryManager::EraseAlias(void *alias_ptr)
1158 {
1159  if (!alias_ptr) { return; }
1160  auto alias_map_iter = maps->aliases.find(alias_ptr);
1161  if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1162  internal::Alias &alias = alias_map_iter->second;
1163  if (--alias.counter) { return; }
1164  maps->aliases.erase(alias_map_iter);
1165 }
1166 
1167 void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1168  bool copy_data)
1169 {
1170  if (!h_ptr)
1171  {
1172  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1173  return NULL;
1174  }
1175  internal::Memory &mem = maps->memories.at(h_ptr);
1176  const MemoryType &h_mt = mem.h_mt;
1177  const MemoryType &d_mt = mem.d_mt;
1178  MFEM_VERIFY_TYPES(h_mt, d_mt);
1179  if (!mem.d_ptr) { ctrl->Device(d_mt)->Alloc(mem); }
1180  // Aliases might have done some protections
1181  ctrl->Device(d_mt)->Unprotect(mem);
1182  if (copy_data)
1183  {
1184  MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1185  ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes);
1186  }
1187  ctrl->Host(h_mt)->Protect(mem, bytes);
1188  return mem.d_ptr;
1189 }
1190 
1191 void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1192  bool copy)
1193 {
1194  if (!alias_ptr)
1195  {
1196  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1197  return NULL;
1198  }
1199  auto &alias_map = maps->aliases;
1200  auto alias_map_iter = alias_map.find(alias_ptr);
1201  if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1202  const internal::Alias &alias = alias_map_iter->second;
1203  const size_t offset = alias.offset;
1204  internal::Memory &mem = *alias.mem;
1205  const MemoryType &h_mt = mem.h_mt;
1206  const MemoryType &d_mt = mem.d_mt;
1207  MFEM_VERIFY_TYPES(h_mt, d_mt);
1208  if (!mem.d_ptr) { ctrl->Device(d_mt)->Alloc(mem); }
1209  void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1210  void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1211  MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1212  MFEM_ASSERT(bytes <= alias.bytes, "internal error");
1213  mem.d_rw = false;
1214  ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes);
1215  ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1216  if (copy) { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1217  ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1218  return alias_d_ptr;
1219 }
1220 
1221 void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1222 {
1223  const internal::Memory &mem = maps->memories.at(ptr);
1224  MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1225  MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1226  const MemoryType &h_mt = mem.h_mt;
1227  const MemoryType &d_mt = mem.d_mt;
1228  MFEM_VERIFY_TYPES(h_mt, d_mt);
1229  // Aliases might have done some protections
1230  ctrl->Host(h_mt)->Unprotect(mem, bytes);
1231  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1232  if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1233  if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1234  return mem.h_ptr;
1235 }
1236 
1237 void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1238  bool copy_data)
1239 {
1240  const internal::Alias &alias = maps->aliases.at(ptr);
1241  const internal::Memory *const mem = alias.mem;
1242  const MemoryType &h_mt = mem->h_mt;
1243  const MemoryType &d_mt = mem->d_mt;
1244  MFEM_VERIFY_TYPES(h_mt, d_mt);
1245  void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1246  void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1247  MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1248  mem->h_rw = false;
1249  ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1250  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1251  if (copy_data && mem->d_ptr)
1252  { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1253  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1254  return alias_h_ptr;
1255 }
1256 
1258 {
1259  if (exists) { return; }
1260  maps = new internal::Maps();
1261  ctrl = new internal::Ctrl();
1262  ctrl->Configure();
1263  exists = true;
1264 }
1265 
1267 
1268 MemoryManager::~MemoryManager() { if (exists) { Destroy(); } }
1269 
1271  const MemoryType device_mt)
1272 {
1273  Init();
1274  host_mem_type = host_mt;
1275  device_mem_type = device_mt;
1276 }
1277 
1278 #ifdef MFEM_USE_UMPIRE
1280  const char *d_name)
1281 {
1282  h_umpire_name = h_name;
1283  d_umpire_name = d_name;
1284 }
1285 #endif
1286 
1288 {
1289  MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1290  for (auto& n : maps->memories)
1291  {
1292  internal::Memory &mem = n.second;
1293  bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1294  if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1295  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1296  }
1297  delete maps; maps = nullptr;
1298  delete ctrl; ctrl = nullptr;
1299  host_mem_type = MemoryType::HOST;
1300  device_mem_type = MemoryType::HOST;
1301  exists = false;
1302 }
1303 
1305 {
1306  if (ptr != NULL)
1307  {
1308  if (!IsKnown(ptr))
1309  {
1310  mfem_error("Pointer is not registered!");
1311  }
1312  }
1313 }
1314 
1315 int MemoryManager::PrintPtrs(std::ostream &out)
1316 {
1317  int n_out = 0;
1318  for (const auto& n : maps->memories)
1319  {
1320  const internal::Memory &mem = n.second;
1321  out << "\nkey " << n.first << ", "
1322  << "h_ptr " << mem.h_ptr << ", "
1323  << "d_ptr " << mem.d_ptr;
1324  n_out++;
1325  }
1326  if (maps->memories.size() > 0) { out << std::endl; }
1327  return n_out;
1328 }
1329 
1331 {
1332  int n_out = 0;
1333  for (const auto& n : maps->aliases)
1334  {
1335  const internal::Alias &alias = n.second;
1336  out << "\nalias: key " << n.first << ", "
1337  << "h_ptr " << alias.mem->h_ptr << ", "
1338  << "offset " << alias.offset << ", "
1339  << "bytes " << alias.bytes << ", "
1340  << "counter " << alias.counter;
1341  n_out++;
1342  }
1343  if (maps->aliases.size() > 0) { out << std::endl; }
1344  return n_out;
1345 }
1346 
1347 int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1348  unsigned flags)
1349 {
1350  void *d_ptr = (flags & Mem::ALIAS) ?
1351  mm.GetAliasDevicePtr(h_ptr, size, false) :
1352  mm.GetDevicePtr(h_ptr, size, false);
1353  char *h_buf = new char[size];
1354  CuMemcpyDtoH(h_buf, d_ptr, size);
1355  int res = std::memcmp(h_ptr, h_buf, size);
1356  delete [] h_buf;
1357  return res;
1358 }
1359 
1360 
1361 void MemoryPrintFlags(unsigned flags)
1362 {
1363  typedef Memory<int> Mem;
1364  mfem::out
1365  << "\n registered = " << bool(flags & Mem::REGISTERED)
1366  << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1367  << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1368  << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1369  << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1370  << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1371  << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1372  << "\n alias = " << bool(flags & Mem::ALIAS)
1373  << std::endl;
1374 }
1375 
1376 void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr)
1377 {
1378  if (!mm.exists) {return;}
1379  const bool known = mm.IsKnown(h_ptr);
1380  const bool alias = mm.IsAlias(h_ptr);
1381  if (known) { MFEM_VERIFY(h_mt == maps->memories.at(h_ptr).h_mt,""); }
1382  if (alias) { MFEM_VERIFY(h_mt == maps->aliases.at(h_ptr).mem->h_mt,""); }
1383 }
1384 
1386 
1387 bool MemoryManager::exists = false;
1388 
1389 #ifdef MFEM_USE_UMPIRE
1390 const char* MemoryManager::h_umpire_name = "HOST";
1391 const char* MemoryManager::d_umpire_name = "DEVICE";
1392 #endif
1393 
1394 MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1395 MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1396 
1398 {
1399  "host-std", "host-32", "host-64", "host-debug", "host-umpire",
1400 #if defined(MFEM_USE_CUDA)
1401  "cuda-uvm",
1402  "cuda",
1403 #elif defined(MFEM_USE_HIP)
1404  "hip-uvm",
1405  "hip",
1406 #else
1407  "managed",
1408  "device",
1409 #endif
1410  "device-debug",
1411 #if defined(MFEM_USE_CUDA)
1412  "cuda-umpire"
1413 #elif defined(MFEM_USE_HIP)
1414  "hip-umpire"
1415 #else
1416  "device-umpire"
1417 #endif
1418 };
1419 
1420 } // namespace mfem
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition: cuda.cpp:79
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition: cuda.cpp:64
Host memory; aligned at 64 bytes.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
Definition: mem_manager.hpp:69
Device memory; using CUDA or HIP *Malloc and *Free.
void PrintFlags() const
Print the internal flags.
Device memory; using Umpire.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
static MemoryType GetHostMemoryType()
Host pointer is valid.
Host memory; allocated from a &quot;host-debug&quot; pool.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
int PrintAliases(std::ostream &out=mfem::out)
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition: cuda.cpp:49
Host memory; aligned at 32 bytes.
void SetUmpireAllocatorNames(const char *h_name, const char *d_name)
Set the host and device UMpire allocator names.
constexpr int DeviceMemoryType
Definition: mem_manager.hpp:48
constexpr int HostMemoryType
Definition: mem_manager.hpp:46
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:153
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
void * HipMemFree(void *dptr)
Frees device memory.
Definition: hip.cpp:64
Ownership flag for internal Memory data.
Device pointer is valid
void Destroy()
Free all the device memories.
int PrintPtrs(std::ostream &out=mfem::out)
The host pointer will be deleted by Delete()
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:102
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
constexpr int MemoryTypeSize
Static casts to &#39;int&#39; and sizes of some useful memory types.
Definition: mem_manager.hpp:45
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: hip.cpp:125
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: hip.cpp:34
void Init()
Initialize the memory manager.
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:28
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
constexpr int HostMemoryTypeSize
Definition: mem_manager.hpp:47
bool IsDeviceMemory(MemoryType mt)
Definition: mem_manager.hpp:70
Pointer is an alias.
MemoryManager mm
The (single) global memory manager object.
double a
Definition: lissajous.cpp:41
Host memory; using new[] and delete[].
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: hip.cpp:79
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:51
void * HipMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: hip.cpp:102
constexpr int DeviceMemoryTypeSize
Definition: mem_manager.hpp:49
Host memory; using Umpire.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
Definition: mem_manager.cpp:99
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition: cuda.cpp:34
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:58
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:125
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger. See also Memory&lt;T&gt;::PrintFlags().