MFEM  v4.1.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
mem_manager.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "mem_manager.hpp"
14 
15 #include <list>
16 #include <cstring> // std::memcpy, std::memcmp
17 #include <unordered_map>
18 #include <algorithm> // std::max
19 
20 // Uncomment to try _WIN32 platform
21 //#define _WIN32
22 //#define _aligned_malloc(s,a) malloc(s)
23 
24 #ifndef _WIN32
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/mman.h>
28 #define mfem_memalign(p,a,s) posix_memalign(p,a,s)
29 #else
30 #define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
31 #endif
32 
33 #ifdef MFEM_USE_UMPIRE
34 #include "umpire/Umpire.hpp"
35 
36 // Make sure Umpire is build with CUDA support if MFEM is built with it.
37 #if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
38 #error "CUDA is not enabled in Umpire!"
39 #endif
40 // Make sure Umpire is build with HIP support if MFEM is built with it.
41 #if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
42 #error "HIP is not enabled in Umpire!"
43 #endif
44 #endif // MFEM_USE_UMPIRE
45 
46 namespace mfem
47 {
48 
50 {
51  switch (mc)
52  {
53  case MemoryClass::HOST: return mm.GetHostMemoryType();
58  }
59  MFEM_VERIFY(false,"");
60  return MemoryType::HOST;
61 }
62 
63 // We want to keep this pairs, as it is checked in MFEM_VERIFY_TYPES
64 MemoryType MemoryManager::GetDualMemoryType_(MemoryType mt)
65 {
66  switch (mt)
67  {
77  default: mfem_error("Unknown memory type!");
78  }
79  MFEM_VERIFY(false,"");
80  return MemoryType::HOST;
81 }
82 
83 static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
84 {
85  MFEM_ASSERT(IsHostMemory(h_mt),"");
86  MFEM_ASSERT(IsDeviceMemory(d_mt),"");
87  const bool sync =
89  (h_mt == MemoryType::HOST_DEBUG && d_mt == MemoryType::DEVICE_DEBUG) ||
90  (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
91  (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
92  (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
93  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE);
94  MFEM_VERIFY(sync, "");
95 }
96 
98 {
99  // | HOST HOST_32 HOST_64 DEVICE MANAGED
100  // ---------+---------------------------------------------
101  // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
102  // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
103  // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
104  // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
105  // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
106 
107  // Using the enumeration ordering:
108  // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
109  // the above table is simply: a*b = max(a,b).
110 
111  return std::max(mc1, mc2);
112 }
113 
114 
115 // Instantiate Memory<T>::PrintFlags for T = int and T = double.
116 template void Memory<int>::PrintFlags() const;
117 template void Memory<double>::PrintFlags() const;
118 
119 // Instantiate Memory<T>::CompareHostAndDevice for T = int and T = double.
120 template int Memory<int>::CompareHostAndDevice(int size) const;
121 template int Memory<double>::CompareHostAndDevice(int size) const;
122 
123 
124 namespace internal
125 {
126 
127 /// Memory class that holds:
128 /// - the host and the device pointer
129 /// - the size in bytes of this memory region
130 /// - the host and device type of this memory region
131 struct Memory
132 {
133  void *const h_ptr;
134  void *d_ptr;
135  const size_t bytes;
136  const MemoryType h_mt, d_mt;
137  Memory(void *p, size_t b, MemoryType h, MemoryType d):
138  h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d) { }
139 };
140 
141 /// Alias class that holds the base memory region and the offset
142 struct Alias
143 {
144  Memory *const mem;
145  const size_t offset, bytes;
146  size_t counter;
147  const MemoryType h_mt;
148 };
149 
150 /// Maps for the Memory and the Alias classes
151 typedef std::unordered_map<const void*, Memory> MemoryMap;
152 typedef std::unordered_map<const void*, Alias> AliasMap;
153 
154 struct Maps
155 {
156  MemoryMap memories;
157  AliasMap aliases;
158 };
159 
160 } // namespace mfem::internal
161 
162 static internal::Maps *maps;
163 
164 namespace internal
165 {
166 
167 /// The host memory space base abstract class
168 class HostMemorySpace
169 {
170 public:
171  virtual ~HostMemorySpace() { }
172  virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
173  virtual void Dealloc(void *ptr) { std::free(ptr); }
174  virtual void Protect(const void*, size_t) { }
175  virtual void Unprotect(const void*, size_t) { }
176  virtual void AliasProtect(const void*, size_t) { }
177  virtual void AliasUnprotect(const void*, size_t) { }
178 };
179 
180 /// The device memory space base abstract class
181 class DeviceMemorySpace
182 {
183 public:
184  virtual ~DeviceMemorySpace() { }
185  virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
186  virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
187  virtual void Protect(const Memory&) { }
188  virtual void Unprotect(const Memory&) { }
189  virtual void AliasProtect(const void*, size_t) { }
190  virtual void AliasUnprotect(const void*, size_t) { }
191  virtual void *HtoD(void *dst, const void *src, size_t bytes)
192  { return std::memcpy(dst, src, bytes); }
193  virtual void *DtoD(void *dst, const void *src, size_t bytes)
194  { return std::memcpy(dst, src, bytes); }
195  virtual void *DtoH(void *dst, const void *src, size_t bytes)
196  { return std::memcpy(dst, src, bytes); }
197 };
198 
199 /// The default std:: host memory space
200 class StdHostMemorySpace : public HostMemorySpace { };
201 
202 /// The No host memory space
203 struct NoHostMemorySpace : public HostMemorySpace
204 {
205  void Alloc(void**, const size_t) { mfem_error("! Host Alloc error"); }
206 };
207 
208 /// The aligned 32 host memory space
209 class Aligned32HostMemorySpace : public HostMemorySpace
210 {
211 public:
212  Aligned32HostMemorySpace(): HostMemorySpace() { }
213  void Alloc(void **ptr, size_t bytes)
214  { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
215  void Dealloc(void *ptr) { std::free(ptr); }
216 };
217 
218 /// The aligned 64 host memory space
219 class Aligned64HostMemorySpace : public HostMemorySpace
220 {
221 public:
222  Aligned64HostMemorySpace(): HostMemorySpace() { }
223  void Alloc(void **ptr, size_t bytes)
224  { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
225 };
226 
227 #ifndef _WIN32
228 static uintptr_t pagesize = 0;
229 static uintptr_t pagemask = 0;
230 
231 /// Returns the restricted base address of the DEBUG segment
232 inline const void *MmuAddrR(const void *ptr)
233 {
234  const uintptr_t addr = (uintptr_t) ptr;
235  return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
236 }
237 
238 /// Returns the prolongated base address of the MMU segment
239 inline const void *MmuAddrP(const void *ptr)
240 {
241  const uintptr_t addr = (uintptr_t) ptr;
242  return (void*) (addr & ~pagemask);
243 }
244 
245 /// Compute the restricted length for the MMU segment
246 inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
247 {
248  // a ---->A:| |:B<---- b
249  const uintptr_t a = (uintptr_t) ptr;
250  const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
251  MFEM_ASSERT(a <= A, "");
252  const uintptr_t b = a + bytes;
253  const uintptr_t B = b & ~pagemask;
254  MFEM_ASSERT(B <= b, "");
255  const uintptr_t length = B > A ? B - A : 0;
256  MFEM_ASSERT(length % pagesize == 0,"");
257  return length;
258 }
259 
260 /// Compute the prolongated length for the MMU segment
261 inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
262 {
263  // |:A<----a | | b---->B:|
264  const uintptr_t a = (uintptr_t) ptr;
265  const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
266  MFEM_ASSERT(A <= a, "");
267  const uintptr_t b = a + bytes;
268  const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
269  MFEM_ASSERT(b <= B, "");
270  MFEM_ASSERT(B >= A,"");
271  const uintptr_t length = B - A;
272  MFEM_ASSERT(length % pagesize == 0,"");
273  return length;
274 }
275 
276 /// The protected access error, used for the host
277 static void MmuError(int, siginfo_t *si, void*)
278 {
279  fflush(0);
280  char str[64];
281  const void *ptr = si->si_addr;
282  sprintf(str, "Error while accessing address %p!", ptr);
283  mfem::out << std::endl << "An illegal memory access was made!";
284  MFEM_ABORT(str);
285 }
286 
287 /// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
288 static void MmuInit()
289 {
290  if (pagesize > 0) { return; }
291  struct sigaction sa;
292  sa.sa_flags = SA_SIGINFO;
293  sigemptyset(&sa.sa_mask);
294  sa.sa_sigaction = MmuError;
295  if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
296  if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
297  pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
298  MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
299  pagemask = pagesize - 1;
300 }
301 
302 /// MMU allocation, through ::mmap
303 inline void MmuAlloc(void **ptr, const size_t bytes)
304 {
305  const size_t length = bytes == 0 ? 8 : bytes;
306  const int prot = PROT_READ | PROT_WRITE;
307  const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
308  *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
309  if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
310 }
311 
312 /// MMU deallocation, through ::munmap
313 inline void MmuDealloc(void *ptr, const size_t bytes)
314 {
315  const size_t length = bytes == 0 ? 8 : bytes;
316  if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
317 }
318 
319 /// MMU protection, through ::mprotect with no read/write accesses
320 inline void MmuProtect(const void *ptr, const size_t bytes)
321 {
322  if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
323  mfem_error("MMU protection (NONE) error");
324 }
325 
326 /// MMU un-protection, through ::mprotect with read/write accesses
327 inline void MmuAllow(const void *ptr, const size_t bytes)
328 {
329  const int RW = PROT_READ | PROT_WRITE;
330  if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
331  mfem_error("MMU protection (R/W) error");
332 }
333 #else
334 inline void MmuInit() { }
335 inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
336 inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
337 inline void MmuProtect(const void*, const size_t) { }
338 inline void MmuAllow(const void*, const size_t) { }
339 inline const void *MmuAddrR(const void *a) { return a; }
340 inline const void *MmuAddrP(const void *a) { return a; }
341 inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
342 inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
343 #endif
344 
345 /// The MMU host memory space
346 class MmuHostMemorySpace : public HostMemorySpace
347 {
348 public:
349  MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
350  void Alloc(void **ptr, size_t bytes) { MmuAlloc(ptr, bytes); }
351  void Dealloc(void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
352  void Protect(const void *ptr, size_t bytes) { MmuProtect(ptr, bytes); }
353  void Unprotect(const void *ptr, size_t bytes) { MmuAllow(ptr, bytes); }
354  /// Aliases need to be restricted during protection
355  void AliasProtect(const void *ptr, size_t bytes)
356  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
357  /// Aliases need to be prolongated for un-protection
358  void AliasUnprotect(const void *ptr, size_t bytes)
359  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
360 };
361 
362 /// The UVM host memory space
363 class UvmHostMemorySpace : public HostMemorySpace
364 {
365 public:
366  UvmHostMemorySpace(): HostMemorySpace() { }
367  void Alloc(void **ptr, size_t bytes) { CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
368  void Dealloc(void *ptr) { CuMemFree(ptr); }
369 };
370 
371 /// The 'No' device memory space
372 class NoDeviceMemorySpace: public DeviceMemorySpace
373 {
374 public:
375  void Alloc(internal::Memory&) { mfem_error("! Device Alloc"); }
376  void Dealloc(Memory&) { mfem_error("! Device Dealloc"); }
377  void *HtoD(void*, const void*, size_t) { mfem_error("!HtoD"); return nullptr; }
378  void *DtoD(void*, const void*, size_t) { mfem_error("!DtoD"); return nullptr; }
379  void *DtoH(void*, const void*, size_t) { mfem_error("!DtoH"); return nullptr; }
380 };
381 
382 /// The std:: device memory space, used with the 'debug' device
383 class StdDeviceMemorySpace : public DeviceMemorySpace { };
384 
385 /// The CUDA device memory space
386 class CudaDeviceMemorySpace: public DeviceMemorySpace
387 {
388 public:
389  CudaDeviceMemorySpace(): DeviceMemorySpace() { }
390  void Alloc(Memory &base) { CuMemAlloc(&base.d_ptr, base.bytes); }
391  void Dealloc(Memory &base) { CuMemFree(base.d_ptr); }
392  void *HtoD(void *dst, const void *src, size_t bytes)
393  { return CuMemcpyHtoD(dst, src, bytes); }
394  void *DtoD(void* dst, const void* src, size_t bytes)
395  { return CuMemcpyDtoD(dst, src, bytes); }
396  void *DtoH(void *dst, const void *src, size_t bytes)
397  { return CuMemcpyDtoH(dst, src, bytes); }
398 };
399 
400 /// The HIP device memory space
401 class HipDeviceMemorySpace: public DeviceMemorySpace
402 {
403 public:
404  HipDeviceMemorySpace(): DeviceMemorySpace() { }
405  void Alloc(Memory &base) { HipMemAlloc(&base.d_ptr, base.bytes); }
406  void Dealloc(Memory &base) { HipMemFree(base.d_ptr); }
407  void *HtoD(void *dst, const void *src, size_t bytes)
408  { return HipMemcpyHtoD(dst, src, bytes); }
409  void *DtoD(void* dst, const void* src, size_t bytes)
410  { return HipMemcpyDtoD(dst, src, bytes); }
411  void *DtoH(void *dst, const void *src, size_t bytes)
412  { return HipMemcpyDtoH(dst, src, bytes); }
413 };
414 
415 /// The UVM device memory space.
416 class UvmCudaMemorySpace : public DeviceMemorySpace
417 {
418 public:
419  void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
420  void Dealloc(Memory&) { }
421  void *HtoD(void *dst, const void *src, size_t bytes)
422  {
423  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
424  return CuMemcpyHtoD(dst, src, bytes);
425  }
426  void *DtoD(void* dst, const void* src, size_t bytes)
427  { return CuMemcpyDtoD(dst, src, bytes); }
428  void *DtoH(void *dst, const void *src, size_t bytes)
429  {
430  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
431  return CuMemcpyDtoH(dst, src, bytes);
432  }
433 };
434 
435 /// The MMU device memory space
436 class MmuDeviceMemorySpace : public DeviceMemorySpace
437 {
438 public:
439  MmuDeviceMemorySpace(): DeviceMemorySpace() { }
440  void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
441  void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
442  void Protect(const Memory &m) { MmuProtect(m.d_ptr, m.bytes); }
443  void Unprotect(const Memory &m) { MmuAllow(m.d_ptr, m.bytes); }
444  /// Aliases need to be restricted during protection
445  void AliasProtect(const void *ptr, size_t bytes)
446  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
447  /// Aliases need to be prolongated for un-protection
448  void AliasUnprotect(const void *ptr, size_t bytes)
449  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
450  void *HtoD(void *dst, const void *src, size_t bytes)
451  { return std::memcpy(dst, src, bytes); }
452  void *DtoD(void *dst, const void *src, size_t bytes)
453  { return std::memcpy(dst, src, bytes); }
454  void *DtoH(void *dst, const void *src, size_t bytes)
455  { return std::memcpy(dst, src, bytes); }
456 };
457 
458 #ifndef MFEM_USE_UMPIRE
459 class UmpireHostMemorySpace : public NoHostMemorySpace { };
460 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace { };
461 #else
462 /// The Umpire host memory space
463 class UmpireHostMemorySpace : public HostMemorySpace
464 {
465 private:
466  const char *name;
467  umpire::ResourceManager &rm;
468  umpire::Allocator h_allocator;
469  umpire::strategy::AllocationStrategy *strat;
470 public:
471  ~UmpireHostMemorySpace() { h_allocator.release(); }
472  UmpireHostMemorySpace():
473  HostMemorySpace(),
474  name(mm.GetUmpireAllocatorHostName()),
475  rm(umpire::ResourceManager::getInstance()),
476  h_allocator(rm.isAllocator(name)? rm.getAllocator(name):
477  rm.makeAllocator<umpire::strategy::DynamicPool>
478  (name, rm.getAllocator("HOST"))),
479  strat(h_allocator.getAllocationStrategy()) { }
480  void Alloc(void **ptr, size_t bytes) { *ptr = h_allocator.allocate(bytes); }
481  void Dealloc(void *ptr) { h_allocator.deallocate(ptr); }
482  void Insert(void *ptr, size_t bytes)
483  { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
484 };
485 
486 /// The Umpire device memory space
487 #ifdef MFEM_USE_CUDA
488 class UmpireDeviceMemorySpace : public DeviceMemorySpace
489 {
490 private:
491  const char *name;
492  umpire::ResourceManager &rm;
493  umpire::Allocator d_allocator;
494 public:
495  ~UmpireDeviceMemorySpace() { d_allocator.release(); }
496  UmpireDeviceMemorySpace():
497  DeviceMemorySpace(),
498  name(mm.GetUmpireAllocatorDeviceName()),
499  rm(umpire::ResourceManager::getInstance()),
500  d_allocator(rm.isAllocator(name)? rm.getAllocator(name):
501  rm.makeAllocator<umpire::strategy::DynamicPool>
502  (name, rm.getAllocator("DEVICE"))) { }
503  void Alloc(Memory &base) { base.d_ptr = d_allocator.allocate(base.bytes); }
504  void Dealloc(Memory &base) { d_allocator.deallocate(base.d_ptr); }
505  void *HtoD(void *dst, const void *src, size_t bytes)
506  {
507 #ifdef MFEM_USE_CUDA
508  return CuMemcpyHtoD(dst, src, bytes);
509 #endif
510 #ifdef MFEM_USE_HIP
511  return HipMemcpyHtoD(dst, src, bytes);
512 #endif
513  //rm.copy(dst, const_cast<void*>(src), bytes); return dst;
514  }
515  void *DtoD(void* dst, const void* src, size_t bytes)
516  {
517 #ifdef MFEM_USE_CUDA
518  return CuMemcpyDtoD(dst, src, bytes);
519 #endif
520 #ifdef MFEM_USE_HIP
521  return HipMemcpyDtoD(dst, src, bytes);
522 #endif
523  //rm.copy(dst, const_cast<void*>(src), bytes); return dst;
524  }
525  void *DtoH(void *dst, const void *src, size_t bytes)
526  {
527 #ifdef MFEM_USE_CUDA
528  return CuMemcpyDtoH(dst, src, bytes);
529 #endif
530 #ifdef MFEM_USE_HIP
531  return HipMemcpyDtoH(dst, src, bytes);
532 #endif
533  //rm.copy(dst, const_cast<void*>(src), bytes); return dst;
534  }
535 };
536 #else
537 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace { };
538 #endif // MFEM_USE_CUDA
539 #endif // MFEM_USE_UMPIRE
540 
541 /// Memory space controller class
542 class Ctrl
543 {
544  typedef MemoryType MT;
545 
546 public:
547  HostMemorySpace *host[HostMemoryTypeSize];
548  DeviceMemorySpace *device[DeviceMemoryTypeSize];
549 
550 public:
551  Ctrl(): host{nullptr}, device{nullptr} { }
552 
553  void Configure()
554  {
555  if (host[HostMemoryType])
556  {
557  mfem_error("Memory backends have already been configured!");
558  }
559 
560  // Filling the host memory backends
561  // HOST, HOST_32 & HOST_64 are always ready
562  // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
563  host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
564  host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
565  host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
566  // HOST_DEBUG is delayed, as it reroutes signals
567  host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
568  host[static_cast<int>(MT::HOST_UMPIRE)] = new UmpireHostMemorySpace();
569  host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
570 
571  // Filling the device memory backends, shifting with the device size
572  constexpr int shift = DeviceMemoryType;
573  device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
574  // All other devices controllers are delayed
575  device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
576  device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
577  device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
578  }
579 
580  HostMemorySpace* Host(const MemoryType mt)
581  {
582  const int mt_i = static_cast<int>(mt);
583  // Delayed host controllers initialization
584  if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
585  MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
586  return host[mt_i];
587  }
588 
589  DeviceMemorySpace* Device(const MemoryType mt)
590  {
591  const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
592  MFEM_ASSERT(mt_i >= 0,"");
593  // Lazy device controller initializations
594  if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
595  MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
596  return device[mt_i];
597  }
598 
599  ~Ctrl()
600  {
601  constexpr int mt_h = HostMemoryType;
602  constexpr int mt_d = DeviceMemoryType;
603  for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
604  for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
605  }
606 
607 private:
608  HostMemorySpace* NewHostCtrl(const MemoryType mt)
609  {
610  if (mt == MT::HOST_DEBUG) { return new MmuHostMemorySpace(); }
611  MFEM_ABORT("Unknown host memory controller!");
612  return nullptr;
613  }
614 
615  DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
616  {
617  switch (mt)
618  {
619  case MT::DEVICE_UMPIRE: return new UmpireDeviceMemorySpace();
620  case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
621  case MT::DEVICE:
622  {
623 #if defined(MFEM_USE_CUDA)
624  return new CudaDeviceMemorySpace();
625 #elif defined(MFEM_USE_HIP)
626  return new HipDeviceMemorySpace();
627 #else
628  MFEM_ABORT("No device memory controller!");
629  break;
630 #endif
631  }
632  default: MFEM_ABORT("Unknown device memory controller!");
633  }
634  return nullptr;
635  }
636 };
637 
638 } // namespace mfem::internal
639 
640 static internal::Ctrl *ctrl;
641 
642 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
643  unsigned &flags)
644 {
645  MFEM_ASSERT(exists, "Internal error!");
646  MFEM_ASSERT(mt != MemoryType::HOST, "Internal error!");
647  const bool is_host_mem = IsHostMemory(mt);
648  const MemType dual_mt = GetDualMemoryType_(mt);
649  const MemType h_mt = is_host_mem ? mt : dual_mt;
650  const MemType d_mt = is_host_mem ? dual_mt : mt;
651  MFEM_VERIFY_TYPES(h_mt, d_mt);
652  void *h_ptr = h_tmp;
653  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
654  flags = Mem::REGISTERED;
656  flags |= is_host_mem ? Mem::VALID_HOST : Mem::VALID_DEVICE;
657  if (is_host_mem) { mm.Insert(h_ptr, bytes, h_mt, d_mt); }
658  else { mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); }
659  CheckHostMemoryType_(h_mt, h_ptr);
660  return h_ptr;
661 }
662 
663 void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
664  MemoryType mt,
665  bool own, bool alias, unsigned &flags)
666 {
667  MFEM_CONTRACT_VAR(alias);
668  MFEM_ASSERT(exists, "Internal error!");
669  MFEM_ASSERT(IsHostMemory(mt), "Internal error!");
670  MFEM_ASSERT(!alias, "Cannot register an alias!");
671  const bool is_host_mem = IsHostMemory(mt);
672  const MemType dual_mt = GetDualMemoryType_(mt);
673  const MemType h_mt = mt;
674  const MemType d_mt = dual_mt;
675  MFEM_VERIFY_TYPES(h_mt, d_mt);
676 
677  if (ptr == nullptr && h_tmp == nullptr)
678  {
679  MFEM_VERIFY(bytes == 0, "internal error");
680  return nullptr;
681  }
682 
684  void *h_ptr;
685 
686  if (is_host_mem) // HOST TYPES + MANAGED
687  {
688  h_ptr = ptr;
689  mm.Insert(h_ptr, bytes, h_mt, d_mt);
690  flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
692  }
693  else // DEVICE TYPES
694  {
695  h_ptr = h_tmp;
696  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
697  mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
698  flags = (own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE) |
700  }
701  CheckHostMemoryType_(h_mt, h_ptr);
702  return h_ptr;
703 }
704 
705 void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
706  unsigned base_flags, unsigned &flags)
707 {
708  mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
709  base_flags & Mem::ALIAS);
710  flags = (base_flags | Mem::ALIAS | Mem::OWNS_INTERNAL) &
712 }
713 
714 MemoryType MemoryManager::Delete_(void *h_ptr, MemoryType mt, unsigned flags)
715 {
716  const bool alias = flags & Mem::ALIAS;
717  const bool registered = flags & Mem::REGISTERED;
718  const bool owns_host = flags & Mem::OWNS_HOST;
719  const bool owns_device = flags & Mem::OWNS_DEVICE;
720  const bool owns_internal = flags & Mem::OWNS_INTERNAL;
721  MFEM_ASSERT(registered || IsHostMemory(mt),"");
722  MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
723  if (!mm.exists || !registered) { return mt; }
724  if (alias)
725  {
726  if (owns_internal)
727  {
728  const MemoryType h_mt = maps->aliases.at(h_ptr).h_mt;
729  MFEM_ASSERT(mt == h_mt,"");
730  mm.EraseAlias(h_ptr);
731  return h_mt;
732  }
733  }
734  else // Known
735  {
736  const MemoryType h_mt = mt;
737  MFEM_ASSERT(!owns_internal ||
738  mt == maps->memories.at(h_ptr).h_mt,"");
739  if (owns_host && (h_mt != MemoryType::HOST))
740  { ctrl->Host(h_mt)->Dealloc(h_ptr); }
741  if (owns_internal) { mm.Erase(h_ptr, owns_device); }
742  return h_mt;
743  }
744  return mt;
745 }
746 
747 bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
748  MemoryType h_mt, size_t bytes,
749  unsigned flags)
750 {
751  if (!h_ptr)
752  {
753  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
754  return true;
755  }
756 
757  const bool known = mm.IsKnown(h_ptr);
758  const bool alias = mm.IsAlias(h_ptr);
759  const bool check = known || ((flags & Mem::ALIAS) && alias);
760  MFEM_VERIFY(check,"");
761  const internal::Memory &mem =
762  (flags & Mem::ALIAS) ?
763  *maps->aliases.at(h_ptr).mem : maps->memories.at(h_ptr);
764  const MemoryType &d_mt = mem.d_mt;
765  switch (mc)
766  {
768  {
769  MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
770  h_mt == MemoryType::HOST_64,"");
771  return true;
772  }
774  {
775  MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
776  return true;
777  }
778  case MemoryClass::DEVICE:
779  {
780  MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
781  d_mt == MemoryType::DEVICE_DEBUG ||
782  d_mt == MemoryType::DEVICE_UMPIRE ||
783  d_mt == MemoryType::MANAGED,"");
784  return true;
785  }
787  {
788  MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
789  d_mt == MemoryType::MANAGED),"");
790  return true;
791  }
792  default: break;
793  }
794  return true;
795 }
796 
797 void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
798  size_t bytes, unsigned &flags)
799 {
800  MemoryManager::CheckHostMemoryType_(h_mt, h_ptr);
801  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
802  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
804  {
805  const bool copy = !(flags & Mem::VALID_HOST);
806  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
807  if (flags & Mem::ALIAS)
808  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
809  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
810  }
811  else
812  {
813  const bool copy = !(flags & Mem::VALID_DEVICE);
814  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
815  if (flags & Mem::ALIAS)
816  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
817  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
818  }
819 }
820 
821 const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
822  size_t bytes, unsigned &flags)
823 {
824  CheckHostMemoryType_(h_mt, h_ptr);
825  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
826  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
828  {
829  const bool copy = !(flags & Mem::VALID_HOST);
830  flags |= Mem::VALID_HOST;
831  if (flags & Mem::ALIAS)
832  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
833  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
834  }
835  else
836  {
837  const bool copy = !(flags & Mem::VALID_DEVICE);
838  flags |= Mem::VALID_DEVICE;
839  if (flags & Mem::ALIAS)
840  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
841  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
842  }
843 }
844 
845 void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
846  size_t bytes, unsigned &flags)
847 {
848  CheckHostMemoryType_(h_mt, h_ptr);
849  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
850  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
852  {
853  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
854  if (flags & Mem::ALIAS)
855  { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
856  else { return mm.GetHostPtr(h_ptr, bytes, false); }
857  }
858  else
859  {
860  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
861  if (flags & Mem::ALIAS)
862  { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
863  else { return mm.GetDevicePtr(h_ptr, bytes, false); }
864 
865  }
866 }
867 
868 void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
869  size_t alias_bytes, unsigned base_flags,
870  unsigned &alias_flags)
871 {
872  // This is called only when (base_flags & Mem::REGISTERED) is true.
873  // Note that (alias_flags & REGISTERED) may not be true.
874  MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
875  if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
876  {
877  mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
878  }
879  if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
880  {
881  if (!(alias_flags & Mem::REGISTERED))
882  {
883  mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
884  alias_flags = (alias_flags | Mem::REGISTERED | Mem::OWNS_INTERNAL) &
885  ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
886  }
887  mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
888  }
889  alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
890  (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
891 }
892 
893 MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr)
894 {
895  if (mm.exists)
896  {
897  const bool known = mm.IsKnown(h_ptr);
898  if (known)
899  {
900  internal::Memory &mem = maps->memories.at(h_ptr);
901  return mem.d_mt;
902  }
903  const bool alias = mm.IsAlias(h_ptr);
904  if (alias)
905  {
906  internal::Memory *mem = maps->aliases.at(h_ptr).mem;
907  return mem->d_mt;
908  }
909  }
910  MFEM_ABORT("internal error");
911  return MemoryManager::host_mem_type;
912 }
913 
914 MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
915 {
916  if (!mm.exists) { return MemoryManager::host_mem_type; }
917  if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
918  if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).mem->h_mt; }
919  return MemoryManager::host_mem_type;
920 }
921 
922 void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
923  size_t bytes, unsigned src_flags,
924  unsigned &dst_flags)
925 {
926  // Type of copy to use based on the src and dest validity flags:
927  // | src
928  // | h | d | hd
929  // -----------+-----+-----+------
930  // h | h2h d2h h2h
931  // dest d | h2d d2d d2d
932  // hd | h2h d2d d2d
933 
934  const bool dst_on_host =
935  (dst_flags & Mem::VALID_HOST) &&
936  (!(dst_flags & Mem::VALID_DEVICE) ||
937  ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
938 
939  dst_flags = dst_flags &
940  ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
941 
942  const bool src_on_host =
943  (src_flags & Mem::VALID_HOST) &&
944  (!(src_flags & Mem::VALID_DEVICE) ||
945  ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
946 
947  const void *src_d_ptr =
948  src_on_host ? NULL :
949  ((src_flags & Mem::ALIAS) ?
950  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
951  mm.GetDevicePtr(src_h_ptr, bytes, false));
952 
953  if (dst_on_host)
954  {
955  if (src_on_host)
956  {
957  if (dst_h_ptr != src_h_ptr && bytes != 0)
958  {
959  MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
960  (const char*)src_h_ptr + bytes <= dst_h_ptr,
961  "data overlaps!");
962  std::memcpy(dst_h_ptr, src_h_ptr, bytes);
963  }
964  }
965  else
966  {
967  if (dst_h_ptr != src_d_ptr && bytes != 0)
968  {
969  internal::Memory &dst_h_base = maps->memories.at(dst_h_ptr);
970  internal::Memory &src_d_base = maps->memories.at(src_d_ptr);
971  MemoryType dst_h_mt = dst_h_base.h_mt;
972  MemoryType src_d_mt = src_d_base.d_mt;
973  ctrl->Host(dst_h_mt)->Unprotect(dst_h_ptr, bytes);
974  ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
975  }
976  }
977  }
978  else
979  {
980  void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
981  mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
982  mm.GetDevicePtr(dst_h_ptr, bytes, false);
983  if (src_on_host)
984  {
985  const bool known = mm.IsKnown(dst_h_ptr);
986  const bool alias = dst_flags & Mem::ALIAS;
987  MFEM_VERIFY(alias||known,"");
988  const MemoryType d_mt = known ?
989  maps->memories.at(dst_h_ptr).d_mt :
990  maps->aliases.at(dst_h_ptr).mem->d_mt;
991  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
992  }
993  else
994  {
995  if (dest_d_ptr != src_d_ptr && bytes != 0)
996  {
997  const bool known = mm.IsKnown(dst_h_ptr);
998  const bool alias = dst_flags & Mem::ALIAS;
999  MFEM_VERIFY(alias||known,"");
1000  const MemoryType d_mt = known ?
1001  maps->memories.at(dst_h_ptr).d_mt :
1002  maps->aliases.at(dst_h_ptr).mem->d_mt;
1003  ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1004  }
1005  }
1006  }
1007 }
1008 
1009 void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1010  size_t bytes, unsigned src_flags)
1011 {
1012  const bool src_on_host = src_flags & Mem::VALID_HOST;
1013  if (src_on_host)
1014  {
1015  if (dest_h_ptr != src_h_ptr && bytes != 0)
1016  {
1017  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1018  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1019  "data overlaps!");
1020  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1021  }
1022  }
1023  else
1024  {
1025  MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1026  const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1027  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1028  mm.GetDevicePtr(src_h_ptr, bytes, false);
1029  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1030  const MemoryType d_mt = base.d_mt;
1031  ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1032  }
1033 }
1034 
1035 void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1036  size_t bytes, unsigned &dest_flags)
1037 {
1038  const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1039  if (dest_on_host)
1040  {
1041  if (dest_h_ptr != src_h_ptr && bytes != 0)
1042  {
1043  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1044  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1045  "data overlaps!");
1046  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1047  }
1048  }
1049  else
1050  {
1051  void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1052  mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1053  mm.GetDevicePtr(dest_h_ptr, bytes, false);
1054  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1055  const MemoryType d_mt = base.d_mt;
1056  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1057  }
1058  dest_flags = dest_flags &
1059  ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1060 }
1061 
1062 bool MemoryManager::IsKnown_(const void *h_ptr)
1063 {
1064  return maps->memories.find(h_ptr) != maps->memories.end();
1065 }
1066 
1067 bool MemoryManager::IsAlias_(const void *h_ptr)
1068 {
1069  return maps->aliases.find(h_ptr) != maps->aliases.end();
1070 }
1071 
1072 void MemoryManager::Insert(void *h_ptr, size_t bytes,
1073  MemoryType h_mt, MemoryType d_mt)
1074 {
1075  if (h_ptr == NULL)
1076  {
1077  MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1078  return;
1079  }
1080  MFEM_VERIFY_TYPES(h_mt, d_mt);
1081 #ifdef MFEM_DEBUG
1082  auto res =
1083 #endif
1084  maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1085 #ifdef MFEM_DEBUG
1086  if (res.second == false)
1087  {
1088  auto &m = res.first->second;
1089  MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt && m.d_mt == d_mt,
1090  "Address already present with different attributes!");
1091  }
1092 #endif
1093 }
1094 
1095 void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1096  MemoryType h_mt, MemoryType d_mt)
1097 {
1098  MFEM_VERIFY_TYPES(h_mt, d_mt);
1099  MFEM_ASSERT(h_ptr != NULL, "internal error");
1100  Insert(h_ptr, bytes, h_mt, d_mt);
1101  internal::Memory &mem = maps->memories.at(h_ptr);
1102  if (d_ptr == NULL) { ctrl->Device(d_mt)->Alloc(mem); }
1103  else { mem.d_ptr = d_ptr; }
1104 }
1105 
1106 void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1107  const size_t bytes, const bool base_is_alias)
1108 {
1109  size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1110  static_cast<const char*>(base_ptr));
1111  if (!base_ptr)
1112  {
1113  MFEM_VERIFY(offset == 0,
1114  "Trying to add alias to NULL at offset " << offset);
1115  return;
1116  }
1117  if (base_is_alias)
1118  {
1119  const internal::Alias &alias = maps->aliases.at(base_ptr);
1120  MFEM_ASSERT(alias.mem,"");
1121  base_ptr = alias.mem->h_ptr;
1122  offset += alias.offset;
1123  }
1124  internal::Memory &mem = maps->memories.at(base_ptr);
1125  auto res =
1126  maps->aliases.emplace(alias_ptr,
1127  internal::Alias{&mem, offset, bytes, 1, mem.h_mt});
1128  if (res.second == false) // alias_ptr was already in the map
1129  {
1130  if (res.first->second.mem != &mem || res.first->second.offset != offset)
1131  {
1132  mfem_error("alias already exists with different base/offset!");
1133  }
1134  else
1135  {
1136  res.first->second.counter++;
1137  }
1138  }
1139 }
1140 
1141 void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1142 {
1143  if (!h_ptr) { return; }
1144  auto mem_map_iter = maps->memories.find(h_ptr);
1145  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1146  internal::Memory &mem = mem_map_iter->second;
1147  if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1148  maps->memories.erase(mem_map_iter);
1149 }
1150 
1151 void MemoryManager::EraseAlias(void *alias_ptr)
1152 {
1153  if (!alias_ptr) { return; }
1154  auto alias_map_iter = maps->aliases.find(alias_ptr);
1155  if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1156  internal::Alias &alias = alias_map_iter->second;
1157  if (--alias.counter) { return; }
1158  maps->aliases.erase(alias_map_iter);
1159 }
1160 
1161 void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1162  bool copy_data)
1163 {
1164  if (!h_ptr)
1165  {
1166  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1167  return NULL;
1168  }
1169  internal::Memory &mem = maps->memories.at(h_ptr);
1170  const MemoryType &h_mt = mem.h_mt;
1171  const MemoryType &d_mt = mem.d_mt;
1172  MFEM_VERIFY_TYPES(h_mt, d_mt);
1173  if (!mem.d_ptr) { ctrl->Device(d_mt)->Alloc(mem); }
1174  ctrl->Device(d_mt)->Unprotect(mem);
1175  if (copy_data)
1176  {
1177  MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1178  ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes);
1179  }
1180  ctrl->Host(h_mt)->Protect(h_ptr, bytes);
1181  return mem.d_ptr;
1182 }
1183 
1184 void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1185  bool copy)
1186 {
1187  if (!alias_ptr)
1188  {
1189  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1190  return NULL;
1191  }
1192  auto &alias_map = maps->aliases;
1193  auto alias_map_iter = alias_map.find(alias_ptr);
1194  if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1195  const internal::Alias &alias = alias_map_iter->second;
1196  const size_t offset = alias.offset;
1197  internal::Memory &mem = *alias.mem;
1198  const MemoryType &h_mt = mem.h_mt;
1199  const MemoryType &d_mt = mem.d_mt;
1200  MFEM_VERIFY_TYPES(h_mt, d_mt);
1201  if (!mem.d_ptr) { ctrl->Device(d_mt)->Alloc(mem); }
1202  void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1203  void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1204  MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1205  MFEM_ASSERT(bytes <= alias.bytes, "internal error");
1206  ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes);
1207  ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1208  if (copy) { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1209  ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1210  return alias_d_ptr;
1211 }
1212 
1213 void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1214 {
1215  const internal::Memory &mem = maps->memories.at(ptr);
1216  MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1217  MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1218  const MemoryType &h_mt = mem.h_mt;
1219  const MemoryType &d_mt = mem.d_mt;
1220  MFEM_VERIFY_TYPES(h_mt, d_mt);
1221  ctrl->Host(h_mt)->Unprotect(mem.h_ptr, bytes);
1222  // Aliases might have done some protections
1223  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1224  if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1225  if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1226  return mem.h_ptr;
1227 }
1228 
1229 void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1230  bool copy_data)
1231 {
1232  const internal::Alias &alias = maps->aliases.at(ptr);
1233  const internal::Memory *const mem = alias.mem;
1234  const MemoryType &h_mt = mem->h_mt;
1235  const MemoryType &d_mt = mem->d_mt;
1236  MFEM_VERIFY_TYPES(h_mt, d_mt);
1237  void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1238  void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1239  MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1240  ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1241  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1242  if (copy_data && mem->d_ptr)
1243  { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1244  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1245  return alias_h_ptr;
1246 }
1247 
1249 {
1250  if (exists) { return; }
1251  maps = new internal::Maps();
1252  ctrl = new internal::Ctrl();
1253  ctrl->Configure();
1254  exists = true;
1255 }
1256 
1258 
1259 MemoryManager::~MemoryManager() { if (exists) { Destroy(); } }
1260 
1262  const MemoryType device_mt)
1263 {
1264  Init();
1265  host_mem_type = host_mt;
1266  device_mem_type = device_mt;
1267 }
1268 
1269 #ifdef MFEM_USE_UMPIRE
1271  const char *d_name)
1272 {
1273  h_umpire_name = h_name;
1274  d_umpire_name = d_name;
1275 }
1276 #endif
1277 
1279 {
1280  MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1281  for (auto& n : maps->memories)
1282  {
1283  internal::Memory &mem = n.second;
1284  bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1285  if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1286  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1287  }
1288  delete maps; maps = nullptr;
1289  delete ctrl; ctrl = nullptr;
1290  host_mem_type = MemoryType::HOST;
1291  device_mem_type = MemoryType::HOST;
1292  exists = false;
1293 }
1294 
1296 {
1297  if (ptr != NULL)
1298  {
1299  if (!IsKnown(ptr))
1300  {
1301  mfem_error("Pointer is not registered!");
1302  }
1303  }
1304 }
1305 
1306 int MemoryManager::PrintPtrs(std::ostream &out)
1307 {
1308  int n_out = 0;
1309  for (const auto& n : maps->memories)
1310  {
1311  const internal::Memory &mem = n.second;
1312  out << "\nkey " << n.first << ", "
1313  << "h_ptr " << mem.h_ptr << ", "
1314  << "d_ptr " << mem.d_ptr;
1315  n_out++;
1316  }
1317  if (maps->memories.size() > 0) { out << std::endl; }
1318  return n_out;
1319 }
1320 
1322 {
1323  int n_out = 0;
1324  for (const auto& n : maps->aliases)
1325  {
1326  const internal::Alias &alias = n.second;
1327  out << "\nalias: key " << n.first << ", "
1328  << "h_ptr " << alias.mem->h_ptr << ", "
1329  << "offset " << alias.offset << ", "
1330  << "bytes " << alias.bytes << ", "
1331  << "counter " << alias.counter;
1332  n_out++;
1333  }
1334  if (maps->aliases.size() > 0) { out << std::endl; }
1335  return n_out;
1336 }
1337 
1338 int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1339  unsigned flags)
1340 {
1341  void *d_ptr = (flags & Mem::ALIAS) ?
1342  mm.GetAliasDevicePtr(h_ptr, size, false) :
1343  mm.GetDevicePtr(h_ptr, size, false);
1344  char *h_buf = new char[size];
1345  CuMemcpyDtoH(h_buf, d_ptr, size);
1346  int res = std::memcmp(h_ptr, h_buf, size);
1347  delete [] h_buf;
1348  return res;
1349 }
1350 
1351 
1352 void MemoryPrintFlags(unsigned flags)
1353 {
1354  typedef Memory<int> Mem;
1355  mfem::out
1356  << "\n registered = " << bool(flags & Mem::REGISTERED)
1357  << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1358  << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1359  << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1360  << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1361  << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1362  << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1363  << "\n alias = " << bool(flags & Mem::ALIAS)
1364  << std::endl;
1365 }
1366 
1367 void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr)
1368 {
1369  if (!mm.exists) {return;}
1370  const bool known = mm.IsKnown(h_ptr);
1371  const bool alias = mm.IsAlias(h_ptr);
1372  if (known) { MFEM_VERIFY(h_mt == maps->memories.at(h_ptr).h_mt,""); }
1373  if (alias) { MFEM_VERIFY(h_mt == maps->aliases.at(h_ptr).mem->h_mt,""); }
1374 }
1375 
1377 
1378 bool MemoryManager::exists = false;
1379 
1380 #ifdef MFEM_USE_UMPIRE
1381 const char* MemoryManager::h_umpire_name = "HOST";
1382 const char* MemoryManager::d_umpire_name = "DEVICE";
1383 #endif
1384 
1385 MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1386 MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1387 
1389 {
1390  "host-std", "host-32", "host-64", "host-debug", "host-umpire",
1391 #if defined(MFEM_USE_CUDA)
1392  "cuda-uvm",
1393  "cuda",
1394 #elif defined(MFEM_USE_HIP)
1395  "hip-uvm",
1396  "hip",
1397 #else
1398  "managed",
1399  "device",
1400 #endif
1401  "device-debug",
1402 #if defined(MFEM_USE_CUDA)
1403  "cuda-umpire"
1404 #elif defined(MFEM_USE_HIP)
1405  "hip-umpire"
1406 #else
1407  "device-umpire"
1408 #endif
1409 };
1410 
1411 } // namespace mfem
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: cuda.cpp:79
void * CuMemFree(void *dptr)
Frees device memory.
Definition: cuda.cpp:64
Host memory; aligned at 64 bytes.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
Definition: mem_manager.hpp:68
Device memory; using CUDA or HIP *Malloc and *Free.
void PrintFlags() const
Print the internal flags.
Device memory; using Umpire.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
static MemoryType GetHostMemoryType()
Host pointer is valid.
Host memory; allocated from a &quot;host-debug&quot; pool.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
int PrintAliases(std::ostream &out=mfem::out)
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition: cuda.cpp:49
Host memory; aligned at 32 bytes.
void SetUmpireAllocatorNames(const char *h_name, const char *d_name)
Set the host and device UMpire allocator names.
The memory manager class.
constexpr int DeviceMemoryType
Definition: mem_manager.hpp:47
constexpr int HostMemoryType
Definition: mem_manager.hpp:45
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:153
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
void * HipMemFree(void *dptr)
Frees device memory.
Definition: hip.cpp:64
Ownership flag for internal Memory data.
Device pointer is valid
void Destroy()
Free all the device memories.
int PrintPtrs(std::ostream &out=mfem::out)
The host pointer will be deleted by Delete()
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:102
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
constexpr int MemoryTypeSize
Static casts to &#39;int&#39; and sizes of some useful memory types.
Definition: mem_manager.hpp:44
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: hip.cpp:125
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: hip.cpp:34
void Init()
Initialize the memory manager.
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:27
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
constexpr int HostMemoryTypeSize
Definition: mem_manager.hpp:46
bool IsDeviceMemory(MemoryType mt)
Definition: mem_manager.hpp:69
Pointer is an alias.
MemoryManager mm
The (single) global memory manager object.
double a
Definition: lissajous.cpp:41
Host memory; using new[] and delete[].
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: hip.cpp:79
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:49
void * HipMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: hip.cpp:102
constexpr int DeviceMemoryTypeSize
Definition: mem_manager.hpp:48
Host memory; using Umpire.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
Definition: mem_manager.cpp:97
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: cuda.cpp:34
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:57
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:125
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger. See also Memory&lt;T&gt;::PrintFlags().