MFEM  v4.6.0
Finite element discretization library
mem_manager.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "mem_manager.hpp"
14 
15 #include <list>
16 #include <cstring> // std::memcpy, std::memcmp
17 #include <unordered_map>
18 #include <algorithm> // std::max
19 #include <cstdint>
20 
21 // Uncomment to try _WIN32 platform
22 //#define _WIN32
23 //#define _aligned_malloc(s,a) malloc(s)
24 
25 #ifndef _WIN32
26 #include <unistd.h>
27 #include <signal.h>
28 #include <sys/mman.h>
29 #define mfem_memalign(p,a,s) posix_memalign(p,a,s)
30 #define mfem_aligned_free free
31 #else
32 #define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
33 #define mfem_aligned_free _aligned_free
34 #endif
35 
36 #ifdef MFEM_USE_UMPIRE
37 #include <umpire/Umpire.hpp>
38 #include <umpire/strategy/QuickPool.hpp>
39 
40 // Make sure Umpire is build with CUDA support if MFEM is built with it.
41 #if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
42 #error "CUDA is not enabled in Umpire!"
43 #endif
44 // Make sure Umpire is build with HIP support if MFEM is built with it.
45 #if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
46 #error "HIP is not enabled in Umpire!"
47 #endif
48 #endif // MFEM_USE_UMPIRE
49 
50 #ifndef MAP_ANONYMOUS
51 #define MAP_ANONYMOUS MAP_ANON
52 #endif
53 
54 // Internal debug option, useful for tracking some memory manager operations.
55 // #define MFEM_TRACK_MEM_MANAGER
56 
57 namespace mfem
58 {
59 
61 {
62  switch (mc)
63  {
64  case MemoryClass::HOST: return mm.GetHostMemoryType();
69  }
70  MFEM_VERIFY(false,"");
71  return MemoryType::HOST;
72 }
73 
74 
76 {
77  switch (mc)
78  {
79  case MemoryClass::HOST: return IsHostMemory(mt);
81  return (mt == MemoryType::HOST_32 ||
82  mt == MemoryType::HOST_64 ||
85  return (mt == MemoryType::HOST_64 ||
87  case MemoryClass::DEVICE: return IsDeviceMemory(mt);
89  return (mt == MemoryType::MANAGED);
90  }
91  MFEM_ABORT("invalid MemoryClass");
92  return false;
93 }
94 
95 
96 static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
97 {
98  MFEM_VERIFY(IsHostMemory(h_mt), "h_mt = " << (int)h_mt);
99  MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == MemoryType::DEFAULT,
100  "d_mt = " << (int)d_mt);
101  // If h_mt == MemoryType::HOST_DEBUG, then d_mt == MemoryType::DEVICE_DEBUG
102  // or d_mt == MemoryType::DEFAULT
103  MFEM_VERIFY(h_mt != MemoryType::HOST_DEBUG ||
104  d_mt == MemoryType::DEVICE_DEBUG ||
105  d_mt == MemoryType::DEFAULT,
106  "d_mt = " << MemoryTypeName[(int)d_mt]);
107  // If d_mt == MemoryType::DEVICE_DEBUG, then h_mt != MemoryType::MANAGED
108  MFEM_VERIFY(d_mt != MemoryType::DEVICE_DEBUG ||
109  h_mt != MemoryType::MANAGED,
110  "h_mt = " << MemoryTypeName[(int)h_mt]);
111 #if 0
112  const bool sync =
113  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE) ||
114  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE_UMPIRE) ||
116  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE) ||
117  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE_UMPIRE) ||
119  (h_mt == MemoryType::HOST_DEBUG && d_mt == MemoryType::DEVICE_DEBUG) ||
120  (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
121  (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
122  (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
123  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE) ||
124  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE) ||
125  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE_2);
126  MFEM_VERIFY(sync, "");
127 #endif
128 }
129 
131 {
132  // | HOST HOST_32 HOST_64 DEVICE MANAGED
133  // ---------+---------------------------------------------
134  // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
135  // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
136  // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
137  // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
138  // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
139 
140  // Using the enumeration ordering:
141  // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
142  // the above table is simply: a*b = max(a,b).
143 
144  return std::max(mc1, mc2);
145 }
146 
147 
148 // Instantiate Memory<T>::PrintFlags for T = int and T = double.
149 template void Memory<int>::PrintFlags() const;
150 template void Memory<double>::PrintFlags() const;
151 
152 // Instantiate Memory<T>::CompareHostAndDevice for T = int and T = double.
153 template int Memory<int>::CompareHostAndDevice(int size) const;
154 template int Memory<double>::CompareHostAndDevice(int size) const;
155 
156 
157 namespace internal
158 {
159 
160 /// Memory class that holds:
161 /// - the host and the device pointer
162 /// - the size in bytes of this memory region
163 /// - the host and device type of this memory region
164 struct Memory
165 {
166  void *const h_ptr;
167  void *d_ptr;
168  const size_t bytes;
169  const MemoryType h_mt;
170  MemoryType d_mt;
171  mutable bool h_rw, d_rw;
172  Memory(void *p, size_t b, MemoryType h, MemoryType d):
173  h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
174  h_rw(true), d_rw(true) { }
175 };
176 
177 /// Alias class that holds the base memory region and the offset
178 struct Alias
179 {
180  Memory *mem;
181  size_t offset;
182  size_t counter;
183  // 'h_mt' is already stored in 'mem', however, we use this field for type
184  // checking since the alias may be dangling, i.e. 'mem' may be invalid.
185  MemoryType h_mt;
186 };
187 
188 /// Maps for the Memory and the Alias classes
189 typedef std::unordered_map<const void*, Memory> MemoryMap;
190 typedef std::unordered_map<const void*, Alias> AliasMap;
191 
192 struct Maps
193 {
194  MemoryMap memories;
195  AliasMap aliases;
196 };
197 
198 } // namespace mfem::internal
199 
200 static internal::Maps *maps;
201 
202 namespace internal
203 {
204 
205 /// The host memory space base abstract class
206 class HostMemorySpace
207 {
208 public:
209  virtual ~HostMemorySpace() { }
210  virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
211  virtual void Dealloc(void *ptr) { std::free(ptr); }
212  virtual void Protect(const Memory&, size_t) { }
213  virtual void Unprotect(const Memory&, size_t) { }
214  virtual void AliasProtect(const void*, size_t) { }
215  virtual void AliasUnprotect(const void*, size_t) { }
216 };
217 
218 /// The device memory space base abstract class
219 class DeviceMemorySpace
220 {
221 public:
222  virtual ~DeviceMemorySpace() { }
223  virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
224  virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
225  virtual void Protect(const Memory&) { }
226  virtual void Unprotect(const Memory&) { }
227  virtual void AliasProtect(const void*, size_t) { }
228  virtual void AliasUnprotect(const void*, size_t) { }
229  virtual void *HtoD(void *dst, const void *src, size_t bytes)
230  { return std::memcpy(dst, src, bytes); }
231  virtual void *DtoD(void *dst, const void *src, size_t bytes)
232  { return std::memcpy(dst, src, bytes); }
233  virtual void *DtoH(void *dst, const void *src, size_t bytes)
234  { return std::memcpy(dst, src, bytes); }
235 };
236 
237 /// The default std:: host memory space
238 class StdHostMemorySpace : public HostMemorySpace { };
239 
240 /// The No host memory space
241 struct NoHostMemorySpace : public HostMemorySpace
242 {
243  void Alloc(void**, const size_t) { mfem_error("! Host Alloc error"); }
244 };
245 
246 /// The aligned 32 host memory space
247 class Aligned32HostMemorySpace : public HostMemorySpace
248 {
249 public:
250  Aligned32HostMemorySpace(): HostMemorySpace() { }
251  void Alloc(void **ptr, size_t bytes)
252  { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
253  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
254 };
255 
256 /// The aligned 64 host memory space
257 class Aligned64HostMemorySpace : public HostMemorySpace
258 {
259 public:
260  Aligned64HostMemorySpace(): HostMemorySpace() { }
261  void Alloc(void **ptr, size_t bytes)
262  { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
263  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
264 };
265 
266 #ifndef _WIN32
267 static uintptr_t pagesize = 0;
268 static uintptr_t pagemask = 0;
269 
270 /// Returns the restricted base address of the DEBUG segment
271 inline const void *MmuAddrR(const void *ptr)
272 {
273  const uintptr_t addr = (uintptr_t) ptr;
274  return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
275 }
276 
277 /// Returns the prolongated base address of the MMU segment
278 inline const void *MmuAddrP(const void *ptr)
279 {
280  const uintptr_t addr = (uintptr_t) ptr;
281  return (void*) (addr & ~pagemask);
282 }
283 
284 /// Compute the restricted length for the MMU segment
285 inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
286 {
287  // a ---->A:| |:B<---- b
288  const uintptr_t a = (uintptr_t) ptr;
289  const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
290  MFEM_ASSERT(a <= A, "");
291  const uintptr_t b = a + bytes;
292  const uintptr_t B = b & ~pagemask;
293  MFEM_ASSERT(B <= b, "");
294  const uintptr_t length = B > A ? B - A : 0;
295  MFEM_ASSERT(length % pagesize == 0,"");
296  return length;
297 }
298 
299 /// Compute the prolongated length for the MMU segment
300 inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
301 {
302  // |:A<----a | | b---->B:|
303  const uintptr_t a = (uintptr_t) ptr;
304  const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
305  MFEM_ASSERT(A <= a, "");
306  const uintptr_t b = a + bytes;
307  const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
308  MFEM_ASSERT(b <= B, "");
309  MFEM_ASSERT(B >= A,"");
310  const uintptr_t length = B - A;
311  MFEM_ASSERT(length % pagesize == 0,"");
312  return length;
313 }
314 
315 /// The protected access error, used for the host
316 static void MmuError(int, siginfo_t *si, void*)
317 {
318  constexpr size_t buf_size = 64;
319  fflush(0);
320  char str[buf_size];
321  const void *ptr = si->si_addr;
322  snprintf(str, buf_size, "Error while accessing address %p!", ptr);
323  mfem::out << std::endl << "An illegal memory access was made!";
324  MFEM_ABORT(str);
325 }
326 
327 /// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
328 static void MmuInit()
329 {
330  if (pagesize > 0) { return; }
331  struct sigaction sa;
332  sa.sa_flags = SA_SIGINFO;
333  sigemptyset(&sa.sa_mask);
334  sa.sa_sigaction = MmuError;
335  if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
336  if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
337  pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
338  MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
339  pagemask = pagesize - 1;
340 }
341 
342 /// MMU allocation, through ::mmap
343 inline void MmuAlloc(void **ptr, const size_t bytes)
344 {
345  const size_t length = bytes == 0 ? 8 : bytes;
346  const int prot = PROT_READ | PROT_WRITE;
347  const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
348  *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
349  if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
350 }
351 
352 /// MMU deallocation, through ::munmap
353 inline void MmuDealloc(void *ptr, const size_t bytes)
354 {
355  const size_t length = bytes == 0 ? 8 : bytes;
356  if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
357 }
358 
359 /// MMU protection, through ::mprotect with no read/write accesses
360 inline void MmuProtect(const void *ptr, const size_t bytes)
361 {
362  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
363  if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
364  if (mmu_protect_error) { mfem_error("MMU protection (NONE) error"); }
365 }
366 
367 /// MMU un-protection, through ::mprotect with read/write accesses
368 inline void MmuAllow(const void *ptr, const size_t bytes)
369 {
370  const int RW = PROT_READ | PROT_WRITE;
371  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
372  if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
373  if (mmu_protect_error) { mfem_error("MMU protection (R/W) error"); }
374 }
375 #else
376 inline void MmuInit() { }
377 inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
378 inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
379 inline void MmuProtect(const void*, const size_t) { }
380 inline void MmuAllow(const void*, const size_t) { }
381 inline const void *MmuAddrR(const void *a) { return a; }
382 inline const void *MmuAddrP(const void *a) { return a; }
383 inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
384 inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
385 #endif
386 
387 /// The MMU host memory space
388 class MmuHostMemorySpace : public HostMemorySpace
389 {
390 public:
391  MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
392  void Alloc(void **ptr, size_t bytes) { MmuAlloc(ptr, bytes); }
393  void Dealloc(void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
394  void Protect(const Memory& mem, size_t bytes)
395  { if (mem.h_rw) { mem.h_rw = false; MmuProtect(mem.h_ptr, bytes); } }
396  void Unprotect(const Memory &mem, size_t bytes)
397  { if (!mem.h_rw) { mem.h_rw = true; MmuAllow(mem.h_ptr, bytes); } }
398  /// Aliases need to be restricted during protection
399  void AliasProtect(const void *ptr, size_t bytes)
400  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
401  /// Aliases need to be prolongated for un-protection
402  void AliasUnprotect(const void *ptr, size_t bytes)
403  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
404 };
405 
406 /// The UVM host memory space
407 class UvmHostMemorySpace : public HostMemorySpace
408 {
409 public:
410  UvmHostMemorySpace(): HostMemorySpace() { }
411  void Alloc(void **ptr, size_t bytes) { CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
412  void Dealloc(void *ptr) { CuMemFree(ptr); }
413 };
414 
415 /// The 'No' device memory space
416 class NoDeviceMemorySpace: public DeviceMemorySpace
417 {
418 public:
419  void Alloc(internal::Memory&) { mfem_error("! Device Alloc"); }
420  void Dealloc(Memory&) { mfem_error("! Device Dealloc"); }
421  void *HtoD(void*, const void*, size_t) { mfem_error("!HtoD"); return nullptr; }
422  void *DtoD(void*, const void*, size_t) { mfem_error("!DtoD"); return nullptr; }
423  void *DtoH(void*, const void*, size_t) { mfem_error("!DtoH"); return nullptr; }
424 };
425 
426 /// The std:: device memory space, used with the 'debug' device
427 class StdDeviceMemorySpace : public DeviceMemorySpace { };
428 
429 /// The CUDA device memory space
430 class CudaDeviceMemorySpace: public DeviceMemorySpace
431 {
432 public:
433  CudaDeviceMemorySpace(): DeviceMemorySpace() { }
434  void Alloc(Memory &base) { CuMemAlloc(&base.d_ptr, base.bytes); }
435  void Dealloc(Memory &base) { CuMemFree(base.d_ptr); }
436  void *HtoD(void *dst, const void *src, size_t bytes)
437  { return CuMemcpyHtoD(dst, src, bytes); }
438  void *DtoD(void* dst, const void* src, size_t bytes)
439  { return CuMemcpyDtoD(dst, src, bytes); }
440  void *DtoH(void *dst, const void *src, size_t bytes)
441  { return CuMemcpyDtoH(dst, src, bytes); }
442 };
443 
444 /// The CUDA/HIP page-locked host memory space
445 class HostPinnedMemorySpace: public HostMemorySpace
446 {
447 public:
448  HostPinnedMemorySpace(): HostMemorySpace() { }
449  void Alloc(void ** ptr, size_t bytes) override
450  {
451 #ifdef MFEM_USE_CUDA
452  CuMemAllocHostPinned(ptr, bytes);
453 #endif
454 #ifdef MFEM_USE_HIP
455  HipMemAllocHostPinned(ptr, bytes);
456 #endif
457  }
458  void Dealloc(void *ptr) override
459  {
460 #ifdef MFEM_USE_CUDA
461  CuMemFreeHostPinned(ptr);
462 #endif
463 #ifdef MFEM_USE_HIP
465 #endif
466  }
467 };
468 
469 /// The HIP device memory space
470 class HipDeviceMemorySpace: public DeviceMemorySpace
471 {
472 public:
473  HipDeviceMemorySpace(): DeviceMemorySpace() { }
474  void Alloc(Memory &base) { HipMemAlloc(&base.d_ptr, base.bytes); }
475  void Dealloc(Memory &base) { HipMemFree(base.d_ptr); }
476  void *HtoD(void *dst, const void *src, size_t bytes)
477  { return HipMemcpyHtoD(dst, src, bytes); }
478  void *DtoD(void* dst, const void* src, size_t bytes)
479  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
480  // instead we use hipMemcpyAsync to get similar behavior.
481  // for more info see: https://github.com/mfem/mfem/pull/2780
482  { return HipMemcpyDtoDAsync(dst, src, bytes); }
483  void *DtoH(void *dst, const void *src, size_t bytes)
484  { return HipMemcpyDtoH(dst, src, bytes); }
485 };
486 
487 /// The UVM device memory space.
488 class UvmCudaMemorySpace : public DeviceMemorySpace
489 {
490 public:
491  void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
492  void Dealloc(Memory&) { }
493  void *HtoD(void *dst, const void *src, size_t bytes)
494  {
495  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
496  return CuMemcpyHtoD(dst, src, bytes);
497  }
498  void *DtoD(void* dst, const void* src, size_t bytes)
499  { return CuMemcpyDtoD(dst, src, bytes); }
500  void *DtoH(void *dst, const void *src, size_t bytes)
501  {
502  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
503  return CuMemcpyDtoH(dst, src, bytes);
504  }
505 };
506 
507 /// The MMU device memory space
508 class MmuDeviceMemorySpace : public DeviceMemorySpace
509 {
510 public:
511  MmuDeviceMemorySpace(): DeviceMemorySpace() { }
512  void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
513  void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
514  void Protect(const Memory &m)
515  { if (m.d_rw) { m.d_rw = false; MmuProtect(m.d_ptr, m.bytes); } }
516  void Unprotect(const Memory &m)
517  { if (!m.d_rw) { m.d_rw = true; MmuAllow(m.d_ptr, m.bytes); } }
518  /// Aliases need to be restricted during protection
519  void AliasProtect(const void *ptr, size_t bytes)
520  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
521  /// Aliases need to be prolongated for un-protection
522  void AliasUnprotect(const void *ptr, size_t bytes)
523  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
524  void *HtoD(void *dst, const void *src, size_t bytes)
525  { return std::memcpy(dst, src, bytes); }
526  void *DtoD(void *dst, const void *src, size_t bytes)
527  { return std::memcpy(dst, src, bytes); }
528  void *DtoH(void *dst, const void *src, size_t bytes)
529  { return std::memcpy(dst, src, bytes); }
530 };
531 
532 #ifdef MFEM_USE_UMPIRE
533 class UmpireMemorySpace
534 {
535 protected:
536  umpire::ResourceManager &rm;
537  umpire::Allocator allocator;
538  bool owns_allocator{false};
539 
540 public:
541  // TODO: this only releases unused memory
542  virtual ~UmpireMemorySpace() { if (owns_allocator) { allocator.release(); } }
543  UmpireMemorySpace(const char * name, const char * space)
544  : rm(umpire::ResourceManager::getInstance())
545  {
546  if (!rm.isAllocator(name))
547  {
548  allocator = rm.makeAllocator<umpire::strategy::QuickPool>(
549  name, rm.getAllocator(space));
550  owns_allocator = true;
551  }
552  else
553  {
554  allocator = rm.getAllocator(name);
555  owns_allocator = false;
556  }
557  }
558 };
559 
560 /// The Umpire host memory space
561 class UmpireHostMemorySpace : public HostMemorySpace, public UmpireMemorySpace
562 {
563 private:
564  umpire::strategy::AllocationStrategy *strat;
565 public:
566  UmpireHostMemorySpace(const char * name)
567  : HostMemorySpace(),
568  UmpireMemorySpace(name, "HOST"),
569  strat(allocator.getAllocationStrategy()) {}
570  void Alloc(void **ptr, size_t bytes) override
571  { *ptr = allocator.allocate(bytes); }
572  void Dealloc(void *ptr) override { allocator.deallocate(ptr); }
573  void Insert(void *ptr, size_t bytes)
574  { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
575 };
576 
577 /// The Umpire device memory space
578 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP)
579 class UmpireDeviceMemorySpace : public DeviceMemorySpace,
580  public UmpireMemorySpace
581 {
582 public:
583  UmpireDeviceMemorySpace(const char * name)
584  : DeviceMemorySpace(),
585  UmpireMemorySpace(name, "DEVICE") {}
586  void Alloc(Memory &base) override
587  { base.d_ptr = allocator.allocate(base.bytes); }
588  void Dealloc(Memory &base) override { rm.deallocate(base.d_ptr); }
589  void *HtoD(void *dst, const void *src, size_t bytes) override
590  {
591 #ifdef MFEM_USE_CUDA
592  return CuMemcpyHtoD(dst, src, bytes);
593 #endif
594 #ifdef MFEM_USE_HIP
595  return HipMemcpyHtoD(dst, src, bytes);
596 #endif
597  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
598  }
599  void *DtoD(void* dst, const void* src, size_t bytes) override
600  {
601 #ifdef MFEM_USE_CUDA
602  return CuMemcpyDtoD(dst, src, bytes);
603 #endif
604 #ifdef MFEM_USE_HIP
605  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
606  // instead we use hipMemcpyAsync to get similar behavior.
607  // for more info see: https://github.com/mfem/mfem/pull/2780
608  return HipMemcpyDtoDAsync(dst, src, bytes);
609 #endif
610  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
611  }
612  void *DtoH(void *dst, const void *src, size_t bytes) override
613  {
614 #ifdef MFEM_USE_CUDA
615  return CuMemcpyDtoH(dst, src, bytes);
616 #endif
617 #ifdef MFEM_USE_HIP
618  return HipMemcpyDtoH(dst, src, bytes);
619 #endif
620  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
621  }
622 };
623 #else
624 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace
625 {
626 public:
627  UmpireDeviceMemorySpace(const char * /*unused*/) {}
628 };
629 #endif // MFEM_USE_CUDA || MFEM_USE_HIP
630 #endif // MFEM_USE_UMPIRE
631 
632 /// Memory space controller class
633 class Ctrl
634 {
635  typedef MemoryType MT;
636 
637 public:
638  HostMemorySpace *host[HostMemoryTypeSize];
639  DeviceMemorySpace *device[DeviceMemoryTypeSize];
640 
641 public:
642  Ctrl(): host{nullptr}, device{nullptr} { }
643 
644  void Configure()
645  {
646  if (host[HostMemoryType])
647  {
648  mfem_error("Memory backends have already been configured!");
649  }
650 
651  // Filling the host memory backends
652  // HOST, HOST_32 & HOST_64 are always ready
653  // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
654  host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
655  host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
656  host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
657  // HOST_DEBUG is delayed, as it reroutes signals
658  host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
659  host[static_cast<int>(MT::HOST_UMPIRE)] = nullptr;
660  host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
661 
662  // Filling the device memory backends, shifting with the device size
663  constexpr int shift = DeviceMemoryType;
664  device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
665  // All other devices controllers are delayed
666  device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
667  device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
668  device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
669  device[static_cast<int>(MT::DEVICE_UMPIRE_2)-shift] = nullptr;
670  }
671 
672  HostMemorySpace* Host(const MemoryType mt)
673  {
674  const int mt_i = static_cast<int>(mt);
675  // Delayed host controllers initialization
676  if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
677  MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
678  return host[mt_i];
679  }
680 
681  DeviceMemorySpace* Device(const MemoryType mt)
682  {
683  const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
684  MFEM_ASSERT(mt_i >= 0,"");
685  // Lazy device controller initializations
686  if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
687  MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
688  return device[mt_i];
689  }
690 
691  ~Ctrl()
692  {
693  constexpr int mt_h = HostMemoryType;
694  constexpr int mt_d = DeviceMemoryType;
695  for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
696  for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
697  }
698 
699 private:
700  HostMemorySpace* NewHostCtrl(const MemoryType mt)
701  {
702  switch (mt)
703  {
704  case MT::HOST_DEBUG: return new MmuHostMemorySpace();
705 #ifdef MFEM_USE_UMPIRE
706  case MT::HOST_UMPIRE:
707  return new UmpireHostMemorySpace(
709 #else
710  case MT::HOST_UMPIRE: return new NoHostMemorySpace();
711 #endif
712  case MT::HOST_PINNED: return new HostPinnedMemorySpace();
713  default: MFEM_ABORT("Unknown host memory controller!");
714  }
715  return nullptr;
716  }
717 
718  DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
719  {
720  switch (mt)
721  {
722 #ifdef MFEM_USE_UMPIRE
723  case MT::DEVICE_UMPIRE:
724  return new UmpireDeviceMemorySpace(
726  case MT::DEVICE_UMPIRE_2:
727  return new UmpireDeviceMemorySpace(
729 #else
730  case MT::DEVICE_UMPIRE: return new NoDeviceMemorySpace();
731  case MT::DEVICE_UMPIRE_2: return new NoDeviceMemorySpace();
732 #endif
733  case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
734  case MT::DEVICE:
735  {
736 #if defined(MFEM_USE_CUDA)
737  return new CudaDeviceMemorySpace();
738 #elif defined(MFEM_USE_HIP)
739  return new HipDeviceMemorySpace();
740 #else
741  MFEM_ABORT("No device memory controller!");
742  break;
743 #endif
744  }
745  default: MFEM_ABORT("Unknown device memory controller!");
746  }
747  return nullptr;
748  }
749 };
750 
751 } // namespace mfem::internal
752 
753 static internal::Ctrl *ctrl;
754 
755 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
756  unsigned &flags)
757 {
758  MFEM_ASSERT(exists, "Internal error!");
759  if (IsHostMemory(mt))
760  {
761  MFEM_ASSERT(mt != MemoryType::HOST && h_tmp == nullptr,
762  "Internal error!");
763  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
764  // evaluated at the time when the device pointer is allocated, see
765  // GetDevicePtr() and GetAliasDevicePtr()
766  const MemoryType d_mt = MemoryType::DEFAULT;
767  // We rely on the next call using lazy dev alloc
768  return New_(h_tmp, bytes, mt, d_mt, Mem::VALID_HOST, flags);
769  }
770  else
771  {
772  const MemoryType h_mt = GetDualMemoryType(mt);
773  return New_(h_tmp, bytes, h_mt, mt, Mem::VALID_DEVICE, flags);
774  }
775 }
776 
777 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType h_mt,
778  MemoryType d_mt, unsigned valid_flags,
779  unsigned &flags)
780 {
781  MFEM_ASSERT(exists, "Internal error!");
782  MFEM_ASSERT(IsHostMemory(h_mt), "h_mt must be host type");
783  MFEM_ASSERT(IsDeviceMemory(d_mt) || d_mt == h_mt ||
784  d_mt == MemoryType::DEFAULT,
785  "d_mt must be device type, the same is h_mt, or DEFAULT");
786  MFEM_ASSERT((h_mt != MemoryType::HOST || h_tmp != nullptr) &&
787  (h_mt == MemoryType::HOST || h_tmp == nullptr),
788  "Internal error");
789  MFEM_ASSERT((valid_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) == 0,
790  "Internal error");
791  void *h_ptr;
792  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
793  else { h_ptr = h_tmp; }
795  Mem::OWNS_DEVICE | valid_flags;
796  // The other New_() method relies on this lazy allocation behavior.
797  mm.Insert(h_ptr, bytes, h_mt, d_mt); // lazy dev alloc
798  // mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); // non-lazy dev alloc
799 
800  // MFEM_VERIFY_TYPES(h_mt, mt); // done by mm.Insert() above
801  CheckHostMemoryType_(h_mt, h_ptr, false);
802 
803  return h_ptr;
804 }
805 
806 void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
807  MemoryType mt,
808  bool own, bool alias, unsigned &flags)
809 {
810  MFEM_ASSERT(exists, "Internal error!");
811  const bool is_host_mem = IsHostMemory(mt);
812  const MemType h_mt = is_host_mem ? mt : GetDualMemoryType(mt);
813  const MemType d_mt = is_host_mem ? MemoryType::DEFAULT : mt;
814  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
815  // evaluated at the time when the device pointer is allocated, see
816  // GetDevicePtr() and GetAliasDevicePtr()
817 
818  MFEM_VERIFY_TYPES(h_mt, d_mt);
819 
820  if (ptr == nullptr && h_tmp == nullptr)
821  {
822  MFEM_VERIFY(bytes == 0, "internal error");
823  return nullptr;
824  }
825 
826  MFEM_VERIFY(!alias, "Cannot register an alias!");
827 
829  void *h_ptr;
830 
831  if (is_host_mem) // HOST TYPES + MANAGED
832  {
833  h_ptr = ptr;
834  mm.Insert(h_ptr, bytes, h_mt, d_mt);
835  flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
837  }
838  else // DEVICE TYPES
839  {
840  MFEM_VERIFY(ptr || bytes == 0,
841  "cannot register NULL device pointer with bytes = " << bytes);
842  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
843  else { h_ptr = h_tmp; }
844  mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
845  flags = own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE;
846  flags |= (Mem::OWNS_HOST | Mem::VALID_DEVICE);
847  }
848  CheckHostMemoryType_(h_mt, h_ptr, alias);
849  return h_ptr;
850 }
851 
852 void MemoryManager::Register2_(void *h_ptr, void *d_ptr, size_t bytes,
853  MemoryType h_mt, MemoryType d_mt,
854  bool own, bool alias, unsigned &flags,
855  unsigned valid_flags)
856 {
857  MFEM_CONTRACT_VAR(alias);
858  MFEM_ASSERT(exists, "Internal error!");
859  MFEM_ASSERT(!alias, "Cannot register an alias!");
860  MFEM_VERIFY_TYPES(h_mt, d_mt);
861 
862  if (h_ptr == nullptr && d_ptr == nullptr)
863  {
864  MFEM_VERIFY(bytes == 0, "internal error");
865  return;
866  }
867 
869 
870  MFEM_VERIFY(d_ptr || bytes == 0,
871  "cannot register NULL device pointer with bytes = " << bytes);
872  mm.InsertDevice(d_ptr, h_ptr, bytes, h_mt, d_mt);
873  flags = (own ? flags | (Mem::OWNS_HOST | Mem::OWNS_DEVICE) :
874  flags & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE)) |
875  valid_flags;
876 
877  CheckHostMemoryType_(h_mt, h_ptr, alias);
878 }
879 
880 void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
881  unsigned base_flags, unsigned &flags)
882 {
883  mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
884  base_flags & Mem::ALIAS);
885  flags = (base_flags | Mem::ALIAS) & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
886  if (base_h_ptr) { flags |= Mem::OWNS_INTERNAL; }
887 }
888 
889 void MemoryManager::SetDeviceMemoryType_(void *h_ptr, unsigned flags,
890  MemoryType d_mt)
891 {
892  MFEM_VERIFY(h_ptr, "cannot set the device memory type: Memory is empty!");
893  if (!(flags & Mem::ALIAS))
894  {
895  auto mem_iter = maps->memories.find(h_ptr);
896  MFEM_VERIFY(mem_iter != maps->memories.end(), "internal error");
897  internal::Memory &mem = mem_iter->second;
898  if (mem.d_mt == d_mt) { return; }
899  MFEM_VERIFY(mem.d_ptr == nullptr, "cannot set the device memory type:"
900  " device memory is allocated!");
901  mem.d_mt = d_mt;
902  }
903  else
904  {
905  auto alias_iter = maps->aliases.find(h_ptr);
906  MFEM_VERIFY(alias_iter != maps->aliases.end(), "internal error");
907  internal::Alias &alias = alias_iter->second;
908  internal::Memory &base_mem = *alias.mem;
909  if (base_mem.d_mt == d_mt) { return; }
910  MFEM_VERIFY(base_mem.d_ptr == nullptr,
911  "cannot set the device memory type:"
912  " alias' base device memory is allocated!");
913  base_mem.d_mt = d_mt;
914  }
915 }
916 
917 void MemoryManager::Delete_(void *h_ptr, MemoryType h_mt, unsigned flags)
918 {
919  const bool alias = flags & Mem::ALIAS;
920  const bool registered = flags & Mem::Registered;
921  const bool owns_host = flags & Mem::OWNS_HOST;
922  const bool owns_device = flags & Mem::OWNS_DEVICE;
923  const bool owns_internal = flags & Mem::OWNS_INTERNAL;
924  MFEM_ASSERT(IsHostMemory(h_mt), "invalid h_mt = " << (int)h_mt);
925  // MFEM_ASSERT(registered || IsHostMemory(h_mt),"");
926  MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
927  // If at least one of the 'own_*' flags is true then 'registered' must be
928  // true too. An acceptable exception is the special case when 'h_ptr' is
929  // NULL, and both 'own_device' and 'own_internal' are false -- this case is
930  // an exception only when 'own_host' is true and 'registered' is false.
931  MFEM_ASSERT(registered || !(owns_host || owns_device || owns_internal) ||
932  (!(owns_device || owns_internal) && h_ptr == nullptr),
933  "invalid Memory state");
934  if (!mm.exists || !registered) { return; }
935  if (alias)
936  {
937  if (owns_internal)
938  {
939  MFEM_ASSERT(mm.IsAlias(h_ptr), "");
940  MFEM_ASSERT(h_mt == maps->aliases.at(h_ptr).h_mt, "");
941  mm.EraseAlias(h_ptr);
942  }
943  }
944  else // Known
945  {
946  if (owns_host && (h_mt != MemoryType::HOST))
947  { ctrl->Host(h_mt)->Dealloc(h_ptr); }
948  if (owns_internal)
949  {
950  MFEM_ASSERT(mm.IsKnown(h_ptr), "");
951  MFEM_ASSERT(h_mt == maps->memories.at(h_ptr).h_mt, "");
952  mm.Erase(h_ptr, owns_device);
953  }
954  }
955 }
956 
957 void MemoryManager::DeleteDevice_(void *h_ptr, unsigned & flags)
958 {
959  const bool owns_device = flags & Mem::OWNS_DEVICE;
960  if (owns_device)
961  {
962  mm.EraseDevice(h_ptr);
963  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
964  }
965 }
966 
967 bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
968  MemoryType h_mt, size_t bytes,
969  unsigned flags)
970 {
971  if (!h_ptr)
972  {
973  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
974  return true;
975  }
976  MemoryType d_mt;
977  if (!(flags & Mem::ALIAS))
978  {
979  auto iter = maps->memories.find(h_ptr);
980  MFEM_VERIFY(iter != maps->memories.end(), "internal error");
981  d_mt = iter->second.d_mt;
982  }
983  else
984  {
985  auto iter = maps->aliases.find(h_ptr);
986  MFEM_VERIFY(iter != maps->aliases.end(), "internal error");
987  d_mt = iter->second.mem->d_mt;
988  }
989  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
990  switch (mc)
991  {
993  {
994  MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
995  h_mt == MemoryType::HOST_64,"");
996  return true;
997  }
999  {
1000  MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
1001  return true;
1002  }
1003  case MemoryClass::DEVICE:
1004  {
1005  MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
1006  d_mt == MemoryType::DEVICE_DEBUG ||
1007  d_mt == MemoryType::DEVICE_UMPIRE ||
1008  d_mt == MemoryType::DEVICE_UMPIRE_2 ||
1009  d_mt == MemoryType::MANAGED,"");
1010  return true;
1011  }
1012  case MemoryClass::MANAGED:
1013  {
1014  MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
1015  d_mt == MemoryType::MANAGED),"");
1016  return true;
1017  }
1018  default: break;
1019  }
1020  return true;
1021 }
1022 
1023 void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1024  size_t bytes, unsigned &flags)
1025 {
1026  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1027  if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1028  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1030  {
1031  const bool copy = !(flags & Mem::VALID_HOST);
1032  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1033  if (flags & Mem::ALIAS)
1034  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1035  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1036  }
1037  else
1038  {
1039  const bool copy = !(flags & Mem::VALID_DEVICE);
1040  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1041  if (flags & Mem::ALIAS)
1042  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1043  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1044  }
1045 }
1046 
1047 const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1048  size_t bytes, unsigned &flags)
1049 {
1050  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1051  if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1052  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1054  {
1055  const bool copy = !(flags & Mem::VALID_HOST);
1056  flags |= Mem::VALID_HOST;
1057  if (flags & Mem::ALIAS)
1058  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1059  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1060  }
1061  else
1062  {
1063  const bool copy = !(flags & Mem::VALID_DEVICE);
1064  flags |= Mem::VALID_DEVICE;
1065  if (flags & Mem::ALIAS)
1066  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1067  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1068  }
1069 }
1070 
1071 void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1072  size_t bytes, unsigned &flags)
1073 {
1074  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1075  if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1076  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1078  {
1079  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1080  if (flags & Mem::ALIAS)
1081  { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
1082  else { return mm.GetHostPtr(h_ptr, bytes, false); }
1083  }
1084  else
1085  {
1086  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1087  if (flags & Mem::ALIAS)
1088  { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
1089  else { return mm.GetDevicePtr(h_ptr, bytes, false); }
1090  }
1091 }
1092 
1093 void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
1094  size_t alias_bytes, unsigned base_flags,
1095  unsigned &alias_flags)
1096 {
1097  // This is called only when (base_flags & Mem::Registered) is true.
1098  // Note that (alias_flags & Registered) may not be true.
1099  MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
1100  if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
1101  {
1102  mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
1103  }
1104  if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
1105  {
1106  if (!(alias_flags & Mem::Registered))
1107  {
1108  mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
1109  alias_flags = (alias_flags | Mem::Registered | Mem::OWNS_INTERNAL) &
1111  }
1112  mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
1113  }
1114  alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
1115  (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
1116 }
1117 
1118 MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr, bool alias)
1119 {
1120  if (mm.exists)
1121  {
1122  if (!alias)
1123  {
1124  auto iter = maps->memories.find(h_ptr);
1125  MFEM_ASSERT(iter != maps->memories.end(), "internal error");
1126  return iter->second.d_mt;
1127  }
1128  // alias == true
1129  auto iter = maps->aliases.find(h_ptr);
1130  MFEM_ASSERT(iter != maps->aliases.end(), "internal error");
1131  return iter->second.mem->d_mt;
1132  }
1133  MFEM_ABORT("internal error");
1134  return MemoryManager::host_mem_type;
1135 }
1136 
1137 MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
1138 {
1139  if (!mm.exists) { return MemoryManager::host_mem_type; }
1140  if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
1141  if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).h_mt; }
1142  return MemoryManager::host_mem_type;
1143 }
1144 
1145 void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
1146  size_t bytes, unsigned src_flags,
1147  unsigned &dst_flags)
1148 {
1149  // Type of copy to use based on the src and dest validity flags:
1150  // | src
1151  // | h | d | hd
1152  // -----------+-----+-----+------
1153  // h | h2h d2h h2h
1154  // dest d | h2d d2d d2d
1155  // hd | h2h d2d d2d
1156 
1157  const bool dst_on_host =
1158  (dst_flags & Mem::VALID_HOST) &&
1159  (!(dst_flags & Mem::VALID_DEVICE) ||
1160  ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
1161 
1162  dst_flags = dst_flags &
1163  ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1164 
1165  const bool src_on_host =
1166  (src_flags & Mem::VALID_HOST) &&
1167  (!(src_flags & Mem::VALID_DEVICE) ||
1168  ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
1169 
1170  const void *src_d_ptr =
1171  src_on_host ? NULL :
1172  ((src_flags & Mem::ALIAS) ?
1173  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1174  mm.GetDevicePtr(src_h_ptr, bytes, false));
1175 
1176  if (dst_on_host)
1177  {
1178  if (src_on_host)
1179  {
1180  if (dst_h_ptr != src_h_ptr && bytes != 0)
1181  {
1182  MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
1183  (const char*)src_h_ptr + bytes <= dst_h_ptr,
1184  "data overlaps!");
1185  std::memcpy(dst_h_ptr, src_h_ptr, bytes);
1186  }
1187  }
1188  else
1189  {
1190  if (dst_h_ptr != src_d_ptr && bytes != 0)
1191  {
1192  internal::Memory &src_d_base = maps->memories.at(src_h_ptr);
1193  MemoryType src_d_mt = src_d_base.d_mt;
1194  ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
1195  }
1196  }
1197  }
1198  else
1199  {
1200  void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
1201  mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
1202  mm.GetDevicePtr(dst_h_ptr, bytes, false);
1203  if (src_on_host)
1204  {
1205  const bool known = mm.IsKnown(dst_h_ptr);
1206  const bool alias = dst_flags & Mem::ALIAS;
1207  MFEM_VERIFY(alias||known,"");
1208  const MemoryType d_mt = known ?
1209  maps->memories.at(dst_h_ptr).d_mt :
1210  maps->aliases.at(dst_h_ptr).mem->d_mt;
1211  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1212  }
1213  else
1214  {
1215  if (dest_d_ptr != src_d_ptr && bytes != 0)
1216  {
1217  const bool known = mm.IsKnown(dst_h_ptr);
1218  const bool alias = dst_flags & Mem::ALIAS;
1219  MFEM_VERIFY(alias||known,"");
1220  const MemoryType d_mt = known ?
1221  maps->memories.at(dst_h_ptr).d_mt :
1222  maps->aliases.at(dst_h_ptr).mem->d_mt;
1223  ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1224  }
1225  }
1226  }
1227 }
1228 
1229 void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1230  size_t bytes, unsigned src_flags)
1231 {
1232  const bool src_on_host = src_flags & Mem::VALID_HOST;
1233  if (src_on_host)
1234  {
1235  if (dest_h_ptr != src_h_ptr && bytes != 0)
1236  {
1237  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1238  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1239  "data overlaps!");
1240  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1241  }
1242  }
1243  else
1244  {
1245  MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1246  const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1247  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1248  mm.GetDevicePtr(src_h_ptr, bytes, false);
1249  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1250  const MemoryType d_mt = base.d_mt;
1251  ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1252  }
1253 }
1254 
1255 void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1256  size_t bytes, unsigned &dest_flags)
1257 {
1258  const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1259  if (dest_on_host)
1260  {
1261  if (dest_h_ptr != src_h_ptr && bytes != 0)
1262  {
1263  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1264  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1265  "data overlaps!");
1266  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1267  }
1268  }
1269  else
1270  {
1271  void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1272  mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1273  mm.GetDevicePtr(dest_h_ptr, bytes, false);
1274  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1275  const MemoryType d_mt = base.d_mt;
1276  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1277  }
1278  dest_flags = dest_flags &
1279  ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1280 }
1281 
1282 bool MemoryManager::IsKnown_(const void *h_ptr)
1283 {
1284  return maps->memories.find(h_ptr) != maps->memories.end();
1285 }
1286 
1287 bool MemoryManager::IsAlias_(const void *h_ptr)
1288 {
1289  return maps->aliases.find(h_ptr) != maps->aliases.end();
1290 }
1291 
1292 void MemoryManager::Insert(void *h_ptr, size_t bytes,
1293  MemoryType h_mt, MemoryType d_mt)
1294 {
1295 #ifdef MFEM_TRACK_MEM_MANAGER
1296  mfem::out << "[mfem memory manager]: registering h_ptr: " << h_ptr
1297  << ", bytes: " << bytes << std::endl;
1298 #endif
1299  if (h_ptr == NULL)
1300  {
1301  MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1302  return;
1303  }
1304  MFEM_VERIFY_TYPES(h_mt, d_mt);
1305 #ifdef MFEM_DEBUG
1306  auto res =
1307 #endif
1308  maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1309 #ifdef MFEM_DEBUG
1310  if (res.second == false)
1311  {
1312  auto &m = res.first->second;
1313  MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt &&
1314  (m.d_mt == d_mt || (d_mt == MemoryType::DEFAULT &&
1315  m.d_mt == GetDualMemoryType(h_mt))),
1316  "Address already present with different attributes!");
1317 #ifdef MFEM_TRACK_MEM_MANAGER
1318  mfem::out << "[mfem memory manager]: repeated registration of h_ptr: "
1319  << h_ptr << std::endl;
1320 #endif
1321  }
1322 #endif
1323 }
1324 
1325 void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1326  MemoryType h_mt, MemoryType d_mt)
1327 {
1328  // MFEM_VERIFY_TYPES(h_mt, d_mt); // done by Insert() below
1329  MFEM_ASSERT(h_ptr != NULL, "internal error");
1330  Insert(h_ptr, bytes, h_mt, d_mt);
1331  internal::Memory &mem = maps->memories.at(h_ptr);
1332  if (d_ptr == NULL && bytes != 0) { ctrl->Device(d_mt)->Alloc(mem); }
1333  else { mem.d_ptr = d_ptr; }
1334 }
1335 
1336 void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1337  const size_t bytes, const bool base_is_alias)
1338 {
1339  size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1340  static_cast<const char*>(base_ptr));
1341 #ifdef MFEM_TRACK_MEM_MANAGER
1342  mfem::out << "[mfem memory manager]: registering alias of base_ptr: "
1343  << base_ptr << ", offset: " << offset << ", bytes: " << bytes
1344  << ", base is alias: " << base_is_alias << std::endl;
1345 #endif
1346  if (!base_ptr)
1347  {
1348  MFEM_VERIFY(offset == 0,
1349  "Trying to add alias to NULL at offset " << offset);
1350  return;
1351  }
1352  if (base_is_alias)
1353  {
1354  const internal::Alias &alias = maps->aliases.at(base_ptr);
1355  MFEM_ASSERT(alias.mem,"");
1356  base_ptr = alias.mem->h_ptr;
1357  offset += alias.offset;
1358 #ifdef MFEM_TRACK_MEM_MANAGER
1359  mfem::out << "[mfem memory manager]: real base_ptr: " << base_ptr
1360  << std::endl;
1361 #endif
1362  }
1363  internal::Memory &mem = maps->memories.at(base_ptr);
1364  MFEM_VERIFY(offset + bytes <= mem.bytes, "invalid alias");
1365  auto res =
1366  maps->aliases.emplace(alias_ptr,
1367  internal::Alias{&mem, offset, 1, mem.h_mt});
1368  if (res.second == false) // alias_ptr was already in the map
1369  {
1370  internal::Alias &alias = res.first->second;
1371  // Update the alias data in case the existing alias is dangling
1372  alias.mem = &mem;
1373  alias.offset = offset;
1374  alias.h_mt = mem.h_mt;
1375  alias.counter++;
1376  }
1377 }
1378 
1379 void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1380 {
1381 #ifdef MFEM_TRACK_MEM_MANAGER
1382  mfem::out << "[mfem memory manager]: un-registering h_ptr: " << h_ptr
1383  << std::endl;
1384 #endif
1385  if (!h_ptr) { return; }
1386  auto mem_map_iter = maps->memories.find(h_ptr);
1387  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1388  internal::Memory &mem = mem_map_iter->second;
1389  if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1390  maps->memories.erase(mem_map_iter);
1391 }
1392 
1393 void MemoryManager::EraseDevice(void *h_ptr)
1394 {
1395  if (!h_ptr) { return; }
1396  auto mem_map_iter = maps->memories.find(h_ptr);
1397  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1398  internal::Memory &mem = mem_map_iter->second;
1399  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1400  mem.d_ptr = nullptr;
1401 }
1402 
1403 void MemoryManager::EraseAlias(void *alias_ptr)
1404 {
1405 #ifdef MFEM_TRACK_MEM_MANAGER
1406  mfem::out << "[mfem memory manager]: un-registering alias_ptr: " << alias_ptr
1407  << std::endl;
1408 #endif
1409  if (!alias_ptr) { return; }
1410  auto alias_map_iter = maps->aliases.find(alias_ptr);
1411  if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1412  internal::Alias &alias = alias_map_iter->second;
1413  if (--alias.counter) { return; }
1414  maps->aliases.erase(alias_map_iter);
1415 }
1416 
1417 void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1418  bool copy_data)
1419 {
1420  if (!h_ptr)
1421  {
1422  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1423  return NULL;
1424  }
1425  internal::Memory &mem = maps->memories.at(h_ptr);
1426  const MemoryType &h_mt = mem.h_mt;
1427  MemoryType &d_mt = mem.d_mt;
1428  MFEM_VERIFY_TYPES(h_mt, d_mt);
1429  if (!mem.d_ptr)
1430  {
1431  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1432  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1433  }
1434  // Aliases might have done some protections
1435  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1436  if (copy_data)
1437  {
1438  MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1439  if (bytes) { ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes); }
1440  }
1441  ctrl->Host(h_mt)->Protect(mem, bytes);
1442  return mem.d_ptr;
1443 }
1444 
1445 void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1446  bool copy)
1447 {
1448  if (!alias_ptr)
1449  {
1450  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1451  return NULL;
1452  }
1453  auto &alias_map = maps->aliases;
1454  auto alias_map_iter = alias_map.find(alias_ptr);
1455  if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1456  const internal::Alias &alias = alias_map_iter->second;
1457  const size_t offset = alias.offset;
1458  internal::Memory &mem = *alias.mem;
1459  const MemoryType &h_mt = mem.h_mt;
1460  MemoryType &d_mt = mem.d_mt;
1461  MFEM_VERIFY_TYPES(h_mt, d_mt);
1462  if (!mem.d_ptr)
1463  {
1464  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1465  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1466  }
1467  void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1468  void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1469  MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1470  MFEM_ASSERT(offset + bytes <= mem.bytes, "internal error");
1471  mem.d_rw = mem.h_rw = false;
1472  if (mem.d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1473  ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1474  if (copy && mem.d_ptr)
1475  { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1476  ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1477  return alias_d_ptr;
1478 }
1479 
1480 void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1481 {
1482  const internal::Memory &mem = maps->memories.at(ptr);
1483  MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1484  MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1485  const MemoryType &h_mt = mem.h_mt;
1486  const MemoryType &d_mt = mem.d_mt;
1487  MFEM_VERIFY_TYPES(h_mt, d_mt);
1488  // Aliases might have done some protections
1489  ctrl->Host(h_mt)->Unprotect(mem, bytes);
1490  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1491  if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1492  if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1493  return mem.h_ptr;
1494 }
1495 
1496 void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1497  bool copy_data)
1498 {
1499  const internal::Alias &alias = maps->aliases.at(ptr);
1500  const internal::Memory *const mem = alias.mem;
1501  const MemoryType &h_mt = mem->h_mt;
1502  const MemoryType &d_mt = mem->d_mt;
1503  MFEM_VERIFY_TYPES(h_mt, d_mt);
1504  void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1505  void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1506  MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1507  mem->h_rw = false;
1508  ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1509  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1510  if (copy_data && mem->d_ptr)
1511  { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1512  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1513  return alias_h_ptr;
1514 }
1515 
1517 {
1518  if (exists) { return; }
1519  maps = new internal::Maps();
1520  ctrl = new internal::Ctrl();
1521  ctrl->Configure();
1522  exists = true;
1523 }
1524 
1526 
1527 MemoryManager::~MemoryManager() { if (exists) { Destroy(); } }
1528 
1530 {
1531  MFEM_VERIFY(!configured, "changing the dual MemoryTypes is not allowed after"
1532  " MemoryManager configuration!");
1533  UpdateDualMemoryType(mt, dual_mt);
1534 }
1535 
1536 void MemoryManager::UpdateDualMemoryType(MemoryType mt, MemoryType dual_mt)
1537 {
1538  MFEM_VERIFY((int)mt < MemoryTypeSize,
1539  "invalid MemoryType, mt = " << (int)mt);
1540  MFEM_VERIFY((int)dual_mt < MemoryTypeSize,
1541  "invalid dual MemoryType, dual_mt = " << (int)dual_mt);
1542 
1543  if ((IsHostMemory(mt) && IsDeviceMemory(dual_mt)) ||
1544  (IsDeviceMemory(mt) && IsHostMemory(dual_mt)))
1545  {
1546  dual_map[(int)mt] = dual_mt;
1547  }
1548  else
1549  {
1550  // mt + dual_mt is not a pair of host + device types: this is only allowed
1551  // when mt == dual_mt and mt is a host type; in this case we do not
1552  // actually update the dual
1553  MFEM_VERIFY(mt == dual_mt && IsHostMemory(mt),
1554  "invalid (mt, dual_mt) pair: ("
1555  << MemoryTypeName[(int)mt] << ", "
1556  << MemoryTypeName[(int)dual_mt] << ')');
1557  }
1558 }
1559 
1561  const MemoryType device_mt)
1562 {
1563  MemoryManager::UpdateDualMemoryType(host_mt, device_mt);
1564  MemoryManager::UpdateDualMemoryType(device_mt, host_mt);
1565  if (device_mt == MemoryType::DEVICE_DEBUG)
1566  {
1567  for (int mt = (int)MemoryType::HOST; mt < (int)MemoryType::MANAGED; mt++)
1568  {
1569  MemoryManager::UpdateDualMemoryType(
1571  }
1572  }
1573  Init();
1574  host_mem_type = host_mt;
1575  device_mem_type = device_mt;
1576  configured = true;
1577 }
1578 
1580 {
1581  MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1582 #ifdef MFEM_TRACK_MEM_MANAGER
1583  size_t num_memories = maps->memories.size();
1584  size_t num_aliases = maps->aliases.size();
1585  if (num_memories != 0 || num_aliases != 0)
1586  {
1587  MFEM_WARNING("...\n\t number of registered pointers: " << num_memories
1588  << "\n\t number of registered aliases : " << num_aliases);
1589  }
1590 #endif
1591  // Keep for debugging purposes:
1592 #if 0
1593  mfem::out << "Destroying the MemoryManager ...\n"
1594  << "remaining registered pointers : "
1595  << maps->memories.size() << '\n'
1596  << "remaining registered aliases : "
1597  << maps->aliases.size() << '\n';
1598 #endif
1599  for (auto& n : maps->memories)
1600  {
1601  internal::Memory &mem = n.second;
1602  bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1603  if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1604  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1605  }
1606  delete maps; maps = nullptr;
1607  delete ctrl; ctrl = nullptr;
1608  host_mem_type = MemoryType::HOST;
1609  device_mem_type = MemoryType::HOST;
1610  exists = false;
1611  configured = false;
1612 }
1613 
1615 {
1616  if (ptr != NULL)
1617  {
1618  if (!IsKnown(ptr))
1619  {
1620  mfem_error("Pointer is not registered!");
1621  }
1622  }
1623 }
1624 
1625 int MemoryManager::PrintPtrs(std::ostream &os)
1626 {
1627  int n_out = 0;
1628  for (const auto& n : maps->memories)
1629  {
1630  const internal::Memory &mem = n.second;
1631  os << "\nkey " << n.first << ", "
1632  << "h_ptr " << mem.h_ptr << ", "
1633  << "d_ptr " << mem.d_ptr;
1634  n_out++;
1635  }
1636  if (maps->memories.size() > 0) { os << std::endl; }
1637  return n_out;
1638 }
1639 
1640 int MemoryManager::PrintAliases(std::ostream &os)
1641 {
1642  int n_out = 0;
1643  for (const auto& n : maps->aliases)
1644  {
1645  const internal::Alias &alias = n.second;
1646  os << "\nalias: key " << n.first << ", "
1647  << "h_ptr " << alias.mem->h_ptr << ", "
1648  << "offset " << alias.offset << ", "
1649  << "counter " << alias.counter;
1650  n_out++;
1651  }
1652  if (maps->aliases.size() > 0) { os << std::endl; }
1653  return n_out;
1654 }
1655 
1656 int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1657  unsigned flags)
1658 {
1659  void *d_ptr = (flags & Mem::ALIAS) ?
1660  mm.GetAliasDevicePtr(h_ptr, size, false) :
1661  mm.GetDevicePtr(h_ptr, size, false);
1662  char *h_buf = new char[size];
1663 #if defined(MFEM_USE_CUDA)
1664  CuMemcpyDtoH(h_buf, d_ptr, size);
1665 #elif defined(MFEM_USE_HIP)
1666  HipMemcpyDtoH(h_buf, d_ptr, size);
1667 #else
1668  std::memcpy(h_buf, d_ptr, size);
1669 #endif
1670  int res = std::memcmp(h_ptr, h_buf, size);
1671  delete [] h_buf;
1672  return res;
1673 }
1674 
1675 
1676 void MemoryPrintFlags(unsigned flags)
1677 {
1678  typedef Memory<int> Mem;
1679  mfem::out
1680  << "\n registered = " << bool(flags & Mem::Registered)
1681  << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1682  << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1683  << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1684  << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1685  << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1686  << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1687  << "\n alias = " << bool(flags & Mem::ALIAS)
1688  << std::endl;
1689 }
1690 
1691 void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr,
1692  bool alias)
1693 {
1694  if (!mm.exists) {return;}
1695  if (!alias)
1696  {
1697  auto it = maps->memories.find(h_ptr);
1698  MFEM_VERIFY(it != maps->memories.end(),
1699  "host pointer is not registered: h_ptr = " << h_ptr);
1700  MFEM_VERIFY(h_mt == it->second.h_mt, "host pointer MemoryType mismatch");
1701  }
1702  else
1703  {
1704  auto it = maps->aliases.find(h_ptr);
1705  MFEM_VERIFY(it != maps->aliases.end(),
1706  "alias pointer is not registered: h_ptr = " << h_ptr);
1707  MFEM_VERIFY(h_mt == it->second.h_mt, "alias pointer MemoryType mismatch");
1708  }
1709 }
1710 
1712 
1713 bool MemoryManager::exists = false;
1714 bool MemoryManager::configured = false;
1715 
1716 MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1717 MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1718 
1719 MemoryType MemoryManager::dual_map[MemoryTypeSize] =
1720 {
1721  /* HOST */ MemoryType::DEVICE,
1722  /* HOST_32 */ MemoryType::DEVICE,
1723  /* HOST_64 */ MemoryType::DEVICE,
1724  /* HOST_DEBUG */ MemoryType::DEVICE_DEBUG,
1725  /* HOST_UMPIRE */ MemoryType::DEVICE_UMPIRE,
1726  /* HOST_PINNED */ MemoryType::DEVICE,
1727  /* MANAGED */ MemoryType::MANAGED,
1728  /* DEVICE */ MemoryType::HOST,
1729  /* DEVICE_DEBUG */ MemoryType::HOST_DEBUG,
1730  /* DEVICE_UMPIRE */ MemoryType::HOST_UMPIRE,
1731  /* DEVICE_UMPIRE_2 */ MemoryType::HOST_UMPIRE
1732 };
1733 
1734 #ifdef MFEM_USE_UMPIRE
1735 const char * MemoryManager::h_umpire_name = "MFEM_HOST";
1736 const char * MemoryManager::d_umpire_name = "MFEM_DEVICE";
1737 const char * MemoryManager::d_umpire_2_name = "MFEM_DEVICE_2";
1738 #endif
1739 
1740 
1742 {
1743  "host-std", "host-32", "host-64", "host-debug", "host-umpire", "host-pinned",
1744 #if defined(MFEM_USE_CUDA)
1745  "cuda-uvm",
1746  "cuda",
1747 #elif defined(MFEM_USE_HIP)
1748  "hip-uvm",
1749  "hip",
1750 #else
1751  "managed",
1752  "device",
1753 #endif
1754  "device-debug",
1755 #if defined(MFEM_USE_CUDA)
1756  "cuda-umpire",
1757  "cuda-umpire-2",
1758 #elif defined(MFEM_USE_HIP)
1759  "hip-umpire",
1760  "hip-umpire-2",
1761 #else
1762  "device-umpire",
1763  "device-umpire-2",
1764 #endif
1765 };
1766 
1767 } // namespace mfem
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition: cuda.cpp:109
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition: cuda.cpp:79
Host memory; aligned at 64 bytes.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
Definition: mem_manager.hpp:85
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
Device memory; using CUDA or HIP *Malloc and *Free.
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: cuda.cpp:94
static const char * GetUmpireHostAllocatorName()
Get the host Umpire allocator name used with MemoryType::HOST_UMPIRE.
static MemoryType GetHostMemoryType()
Host pointer is valid.
Host memory; allocated from a "host-debug" pool.
MemoryManager mm
The (single) global memory manager object.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
Host memory: pinned (page-locked)
int PrintAliases(std::ostream &out=mfem::out)
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
bool MemoryClassContainsType(MemoryClass mc, MemoryType mt)
Return true iff the MemoryType mt is contained in the MemoryClass mc.
Definition: mem_manager.cpp:75
void * HipMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: hip.cpp:147
void * HipMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: hip.cpp:94
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition: cuda.cpp:49
Host memory; aligned at 32 bytes.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
static MemoryType GetDualMemoryType(MemoryType mt)
Return the dual MemoryType of the given one, mt.
constexpr int DeviceMemoryType
Definition: mem_manager.hpp:63
static const char * GetUmpireDeviceAllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE.
void * HipMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: hip.cpp:64
constexpr int HostMemoryType
Definition: mem_manager.hpp:61
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:154
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
void * HipMemFree(void *dptr)
Frees device memory.
Definition: hip.cpp:79
Ownership flag for internal Memory data.
Device pointer is valid
void Destroy()
Free all the device memories.
int PrintPtrs(std::ostream &out=mfem::out)
The host pointer will be deleted by Delete()
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:132
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
constexpr int MemoryTypeSize
Static casts to &#39;int&#39; and sizes of some useful memory types.
Definition: mem_manager.hpp:60
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: hip.cpp:155
static const char * GetUmpireDevice2AllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE_2.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: hip.cpp:34
static void SetDualMemoryType(MemoryType mt, MemoryType dual_mt)
Set the dual memory type of mt to be dual_mt.
void Init()
Initialize the memory manager.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
string space
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
constexpr int HostMemoryTypeSize
Definition: mem_manager.hpp:62
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
Definition: mem_manager.hpp:88
Pointer is an alias.
double a
Definition: lissajous.cpp:41
Host memory; using new[] and delete[].
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: hip.cpp:109
void PrintFlags() const
Print the internal flags.
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:60
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: cuda.cpp:64
constexpr int DeviceMemoryTypeSize
Definition: mem_manager.hpp:64
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition: cuda.cpp:34
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:73
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:155
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger. See also Memory<T>::PrintFlags().