MFEM  v4.5.1
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
mem_manager.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "mem_manager.hpp"
14 
15 #include <list>
16 #include <cstring> // std::memcpy, std::memcmp
17 #include <unordered_map>
18 #include <algorithm> // std::max
19 
20 // Uncomment to try _WIN32 platform
21 //#define _WIN32
22 //#define _aligned_malloc(s,a) malloc(s)
23 
24 #ifndef _WIN32
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/mman.h>
28 #define mfem_memalign(p,a,s) posix_memalign(p,a,s)
29 #define mfem_aligned_free free
30 #else
31 #define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
32 #define mfem_aligned_free _aligned_free
33 #endif
34 
35 #ifdef MFEM_USE_UMPIRE
36 #include <umpire/Umpire.hpp>
37 #include <umpire/strategy/QuickPool.hpp>
38 
39 // Make sure Umpire is build with CUDA support if MFEM is built with it.
40 #if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
41 #error "CUDA is not enabled in Umpire!"
42 #endif
43 // Make sure Umpire is build with HIP support if MFEM is built with it.
44 #if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
45 #error "HIP is not enabled in Umpire!"
46 #endif
47 #endif // MFEM_USE_UMPIRE
48 
49 // Internal debug option, useful for tracking some memory manager operations.
50 // #define MFEM_TRACK_MEM_MANAGER
51 
52 namespace mfem
53 {
54 
56 {
57  switch (mc)
58  {
59  case MemoryClass::HOST: return mm.GetHostMemoryType();
64  }
65  MFEM_VERIFY(false,"");
66  return MemoryType::HOST;
67 }
68 
69 
71 {
72  switch (mc)
73  {
74  case MemoryClass::HOST: return IsHostMemory(mt);
76  return (mt == MemoryType::HOST_32 ||
77  mt == MemoryType::HOST_64 ||
80  return (mt == MemoryType::HOST_64 ||
82  case MemoryClass::DEVICE: return IsDeviceMemory(mt);
84  return (mt == MemoryType::MANAGED);
85  }
86  MFEM_ABORT("invalid MemoryClass");
87  return false;
88 }
89 
90 
91 static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
92 {
93  MFEM_VERIFY(IsHostMemory(h_mt), "h_mt = " << (int)h_mt);
94  MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == MemoryType::DEFAULT,
95  "d_mt = " << (int)d_mt);
96  // If h_mt == MemoryType::HOST_DEBUG, then d_mt == MemoryType::DEVICE_DEBUG
97  // or d_mt == MemoryType::DEFAULT
98  MFEM_VERIFY(h_mt != MemoryType::HOST_DEBUG ||
99  d_mt == MemoryType::DEVICE_DEBUG ||
100  d_mt == MemoryType::DEFAULT,
101  "d_mt = " << MemoryTypeName[(int)d_mt]);
102  // If d_mt == MemoryType::DEVICE_DEBUG, then h_mt != MemoryType::MANAGED
103  MFEM_VERIFY(d_mt != MemoryType::DEVICE_DEBUG ||
104  h_mt != MemoryType::MANAGED,
105  "h_mt = " << MemoryTypeName[(int)h_mt]);
106 #if 0
107  const bool sync =
108  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE) ||
109  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE_UMPIRE) ||
111  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE) ||
112  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE_UMPIRE) ||
114  (h_mt == MemoryType::HOST_DEBUG && d_mt == MemoryType::DEVICE_DEBUG) ||
115  (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
116  (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
117  (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
118  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE) ||
119  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE) ||
120  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE_2);
121  MFEM_VERIFY(sync, "");
122 #endif
123 }
124 
126 {
127  // | HOST HOST_32 HOST_64 DEVICE MANAGED
128  // ---------+---------------------------------------------
129  // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
130  // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
131  // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
132  // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
133  // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
134 
135  // Using the enumeration ordering:
136  // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
137  // the above table is simply: a*b = max(a,b).
138 
139  return std::max(mc1, mc2);
140 }
141 
142 
143 // Instantiate Memory<T>::PrintFlags for T = int and T = double.
144 template void Memory<int>::PrintFlags() const;
145 template void Memory<double>::PrintFlags() const;
146 
147 // Instantiate Memory<T>::CompareHostAndDevice for T = int and T = double.
148 template int Memory<int>::CompareHostAndDevice(int size) const;
149 template int Memory<double>::CompareHostAndDevice(int size) const;
150 
151 
152 namespace internal
153 {
154 
155 /// Memory class that holds:
156 /// - the host and the device pointer
157 /// - the size in bytes of this memory region
158 /// - the host and device type of this memory region
159 struct Memory
160 {
161  void *const h_ptr;
162  void *d_ptr;
163  const size_t bytes;
164  const MemoryType h_mt;
165  MemoryType d_mt;
166  mutable bool h_rw, d_rw;
167  Memory(void *p, size_t b, MemoryType h, MemoryType d):
168  h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
169  h_rw(true), d_rw(true) { }
170 };
171 
172 /// Alias class that holds the base memory region and the offset
173 struct Alias
174 {
175  Memory *mem;
176  size_t offset;
177  size_t counter;
178  // 'h_mt' is already stored in 'mem', however, we use this field for type
179  // checking since the alias may be dangling, i.e. 'mem' may be invalid.
180  MemoryType h_mt;
181 };
182 
183 /// Maps for the Memory and the Alias classes
184 typedef std::unordered_map<const void*, Memory> MemoryMap;
185 typedef std::unordered_map<const void*, Alias> AliasMap;
186 
187 struct Maps
188 {
189  MemoryMap memories;
190  AliasMap aliases;
191 };
192 
193 } // namespace mfem::internal
194 
195 static internal::Maps *maps;
196 
197 namespace internal
198 {
199 
200 /// The host memory space base abstract class
201 class HostMemorySpace
202 {
203 public:
204  virtual ~HostMemorySpace() { }
205  virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
206  virtual void Dealloc(void *ptr) { std::free(ptr); }
207  virtual void Protect(const Memory&, size_t) { }
208  virtual void Unprotect(const Memory&, size_t) { }
209  virtual void AliasProtect(const void*, size_t) { }
210  virtual void AliasUnprotect(const void*, size_t) { }
211 };
212 
213 /// The device memory space base abstract class
214 class DeviceMemorySpace
215 {
216 public:
217  virtual ~DeviceMemorySpace() { }
218  virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
219  virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
220  virtual void Protect(const Memory&) { }
221  virtual void Unprotect(const Memory&) { }
222  virtual void AliasProtect(const void*, size_t) { }
223  virtual void AliasUnprotect(const void*, size_t) { }
224  virtual void *HtoD(void *dst, const void *src, size_t bytes)
225  { return std::memcpy(dst, src, bytes); }
226  virtual void *DtoD(void *dst, const void *src, size_t bytes)
227  { return std::memcpy(dst, src, bytes); }
228  virtual void *DtoH(void *dst, const void *src, size_t bytes)
229  { return std::memcpy(dst, src, bytes); }
230 };
231 
232 /// The default std:: host memory space
233 class StdHostMemorySpace : public HostMemorySpace { };
234 
235 /// The No host memory space
236 struct NoHostMemorySpace : public HostMemorySpace
237 {
238  void Alloc(void**, const size_t) { mfem_error("! Host Alloc error"); }
239 };
240 
241 /// The aligned 32 host memory space
242 class Aligned32HostMemorySpace : public HostMemorySpace
243 {
244 public:
245  Aligned32HostMemorySpace(): HostMemorySpace() { }
246  void Alloc(void **ptr, size_t bytes)
247  { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
248  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
249 };
250 
251 /// The aligned 64 host memory space
252 class Aligned64HostMemorySpace : public HostMemorySpace
253 {
254 public:
255  Aligned64HostMemorySpace(): HostMemorySpace() { }
256  void Alloc(void **ptr, size_t bytes)
257  { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
258  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
259 };
260 
261 #ifndef _WIN32
262 static uintptr_t pagesize = 0;
263 static uintptr_t pagemask = 0;
264 
265 /// Returns the restricted base address of the DEBUG segment
266 inline const void *MmuAddrR(const void *ptr)
267 {
268  const uintptr_t addr = (uintptr_t) ptr;
269  return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
270 }
271 
272 /// Returns the prolongated base address of the MMU segment
273 inline const void *MmuAddrP(const void *ptr)
274 {
275  const uintptr_t addr = (uintptr_t) ptr;
276  return (void*) (addr & ~pagemask);
277 }
278 
279 /// Compute the restricted length for the MMU segment
280 inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
281 {
282  // a ---->A:| |:B<---- b
283  const uintptr_t a = (uintptr_t) ptr;
284  const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
285  MFEM_ASSERT(a <= A, "");
286  const uintptr_t b = a + bytes;
287  const uintptr_t B = b & ~pagemask;
288  MFEM_ASSERT(B <= b, "");
289  const uintptr_t length = B > A ? B - A : 0;
290  MFEM_ASSERT(length % pagesize == 0,"");
291  return length;
292 }
293 
294 /// Compute the prolongated length for the MMU segment
295 inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
296 {
297  // |:A<----a | | b---->B:|
298  const uintptr_t a = (uintptr_t) ptr;
299  const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
300  MFEM_ASSERT(A <= a, "");
301  const uintptr_t b = a + bytes;
302  const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
303  MFEM_ASSERT(b <= B, "");
304  MFEM_ASSERT(B >= A,"");
305  const uintptr_t length = B - A;
306  MFEM_ASSERT(length % pagesize == 0,"");
307  return length;
308 }
309 
310 /// The protected access error, used for the host
311 static void MmuError(int, siginfo_t *si, void*)
312 {
313  constexpr size_t buf_size = 64;
314  fflush(0);
315  char str[buf_size];
316  const void *ptr = si->si_addr;
317  snprintf(str, buf_size, "Error while accessing address %p!", ptr);
318  mfem::out << std::endl << "An illegal memory access was made!";
319  MFEM_ABORT(str);
320 }
321 
322 /// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
323 static void MmuInit()
324 {
325  if (pagesize > 0) { return; }
326  struct sigaction sa;
327  sa.sa_flags = SA_SIGINFO;
328  sigemptyset(&sa.sa_mask);
329  sa.sa_sigaction = MmuError;
330  if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
331  if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
332  pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
333  MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
334  pagemask = pagesize - 1;
335 }
336 
337 /// MMU allocation, through ::mmap
338 inline void MmuAlloc(void **ptr, const size_t bytes)
339 {
340  const size_t length = bytes == 0 ? 8 : bytes;
341  const int prot = PROT_READ | PROT_WRITE;
342  const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
343  *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
344  if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
345 }
346 
347 /// MMU deallocation, through ::munmap
348 inline void MmuDealloc(void *ptr, const size_t bytes)
349 {
350  const size_t length = bytes == 0 ? 8 : bytes;
351  if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
352 }
353 
354 /// MMU protection, through ::mprotect with no read/write accesses
355 inline void MmuProtect(const void *ptr, const size_t bytes)
356 {
357  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
358  if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
359  if (mmu_protect_error) { mfem_error("MMU protection (NONE) error"); }
360 }
361 
362 /// MMU un-protection, through ::mprotect with read/write accesses
363 inline void MmuAllow(const void *ptr, const size_t bytes)
364 {
365  const int RW = PROT_READ | PROT_WRITE;
366  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
367  if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
368  if (mmu_protect_error) { mfem_error("MMU protection (R/W) error"); }
369 }
370 #else
371 inline void MmuInit() { }
372 inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
373 inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
374 inline void MmuProtect(const void*, const size_t) { }
375 inline void MmuAllow(const void*, const size_t) { }
376 inline const void *MmuAddrR(const void *a) { return a; }
377 inline const void *MmuAddrP(const void *a) { return a; }
378 inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
379 inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
380 #endif
381 
382 /// The MMU host memory space
383 class MmuHostMemorySpace : public HostMemorySpace
384 {
385 public:
386  MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
387  void Alloc(void **ptr, size_t bytes) { MmuAlloc(ptr, bytes); }
388  void Dealloc(void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
389  void Protect(const Memory& mem, size_t bytes)
390  { if (mem.h_rw) { mem.h_rw = false; MmuProtect(mem.h_ptr, bytes); } }
391  void Unprotect(const Memory &mem, size_t bytes)
392  { if (!mem.h_rw) { mem.h_rw = true; MmuAllow(mem.h_ptr, bytes); } }
393  /// Aliases need to be restricted during protection
394  void AliasProtect(const void *ptr, size_t bytes)
395  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
396  /// Aliases need to be prolongated for un-protection
397  void AliasUnprotect(const void *ptr, size_t bytes)
398  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
399 };
400 
401 /// The UVM host memory space
402 class UvmHostMemorySpace : public HostMemorySpace
403 {
404 public:
405  UvmHostMemorySpace(): HostMemorySpace() { }
406  void Alloc(void **ptr, size_t bytes) { CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
407  void Dealloc(void *ptr) { CuMemFree(ptr); }
408 };
409 
410 /// The 'No' device memory space
411 class NoDeviceMemorySpace: public DeviceMemorySpace
412 {
413 public:
414  void Alloc(internal::Memory&) { mfem_error("! Device Alloc"); }
415  void Dealloc(Memory&) { mfem_error("! Device Dealloc"); }
416  void *HtoD(void*, const void*, size_t) { mfem_error("!HtoD"); return nullptr; }
417  void *DtoD(void*, const void*, size_t) { mfem_error("!DtoD"); return nullptr; }
418  void *DtoH(void*, const void*, size_t) { mfem_error("!DtoH"); return nullptr; }
419 };
420 
421 /// The std:: device memory space, used with the 'debug' device
422 class StdDeviceMemorySpace : public DeviceMemorySpace { };
423 
424 /// The CUDA device memory space
425 class CudaDeviceMemorySpace: public DeviceMemorySpace
426 {
427 public:
428  CudaDeviceMemorySpace(): DeviceMemorySpace() { }
429  void Alloc(Memory &base) { CuMemAlloc(&base.d_ptr, base.bytes); }
430  void Dealloc(Memory &base) { CuMemFree(base.d_ptr); }
431  void *HtoD(void *dst, const void *src, size_t bytes)
432  { return CuMemcpyHtoD(dst, src, bytes); }
433  void *DtoD(void* dst, const void* src, size_t bytes)
434  { return CuMemcpyDtoD(dst, src, bytes); }
435  void *DtoH(void *dst, const void *src, size_t bytes)
436  { return CuMemcpyDtoH(dst, src, bytes); }
437 };
438 
439 /// The CUDA/HIP page-locked host memory space
440 class HostPinnedMemorySpace: public HostMemorySpace
441 {
442 public:
443  HostPinnedMemorySpace(): HostMemorySpace() { }
444  void Alloc(void ** ptr, size_t bytes) override
445  {
446 #ifdef MFEM_USE_CUDA
447  CuMemAllocHostPinned(ptr, bytes);
448 #endif
449 #ifdef MFEM_USE_HIP
450  HipMemAllocHostPinned(ptr, bytes);
451 #endif
452  }
453  void Dealloc(void *ptr) override
454  {
455 #ifdef MFEM_USE_CUDA
456  CuMemFreeHostPinned(ptr);
457 #endif
458 #ifdef MFEM_USE_HIP
460 #endif
461  }
462 };
463 
464 /// The HIP device memory space
465 class HipDeviceMemorySpace: public DeviceMemorySpace
466 {
467 public:
468  HipDeviceMemorySpace(): DeviceMemorySpace() { }
469  void Alloc(Memory &base) { HipMemAlloc(&base.d_ptr, base.bytes); }
470  void Dealloc(Memory &base) { HipMemFree(base.d_ptr); }
471  void *HtoD(void *dst, const void *src, size_t bytes)
472  { return HipMemcpyHtoD(dst, src, bytes); }
473  void *DtoD(void* dst, const void* src, size_t bytes)
474  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
475  // instead we use hipMemcpyAsync to get similar behavior.
476  // for more info see: https://github.com/mfem/mfem/pull/2780
477  { return HipMemcpyDtoDAsync(dst, src, bytes); }
478  void *DtoH(void *dst, const void *src, size_t bytes)
479  { return HipMemcpyDtoH(dst, src, bytes); }
480 };
481 
482 /// The UVM device memory space.
483 class UvmCudaMemorySpace : public DeviceMemorySpace
484 {
485 public:
486  void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
487  void Dealloc(Memory&) { }
488  void *HtoD(void *dst, const void *src, size_t bytes)
489  {
490  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
491  return CuMemcpyHtoD(dst, src, bytes);
492  }
493  void *DtoD(void* dst, const void* src, size_t bytes)
494  { return CuMemcpyDtoD(dst, src, bytes); }
495  void *DtoH(void *dst, const void *src, size_t bytes)
496  {
497  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
498  return CuMemcpyDtoH(dst, src, bytes);
499  }
500 };
501 
502 /// The MMU device memory space
503 class MmuDeviceMemorySpace : public DeviceMemorySpace
504 {
505 public:
506  MmuDeviceMemorySpace(): DeviceMemorySpace() { }
507  void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
508  void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
509  void Protect(const Memory &m)
510  { if (m.d_rw) { m.d_rw = false; MmuProtect(m.d_ptr, m.bytes); } }
511  void Unprotect(const Memory &m)
512  { if (!m.d_rw) { m.d_rw = true; MmuAllow(m.d_ptr, m.bytes); } }
513  /// Aliases need to be restricted during protection
514  void AliasProtect(const void *ptr, size_t bytes)
515  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
516  /// Aliases need to be prolongated for un-protection
517  void AliasUnprotect(const void *ptr, size_t bytes)
518  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
519  void *HtoD(void *dst, const void *src, size_t bytes)
520  { return std::memcpy(dst, src, bytes); }
521  void *DtoD(void *dst, const void *src, size_t bytes)
522  { return std::memcpy(dst, src, bytes); }
523  void *DtoH(void *dst, const void *src, size_t bytes)
524  { return std::memcpy(dst, src, bytes); }
525 };
526 
527 #ifdef MFEM_USE_UMPIRE
528 class UmpireMemorySpace
529 {
530 protected:
531  umpire::ResourceManager &rm;
532  umpire::Allocator allocator;
533  bool owns_allocator{false};
534 
535 public:
536  // TODO: this only releases unused memory
537  virtual ~UmpireMemorySpace() { if (owns_allocator) { allocator.release(); } }
538  UmpireMemorySpace(const char * name, const char * space)
539  : rm(umpire::ResourceManager::getInstance())
540  {
541  if (!rm.isAllocator(name))
542  {
543  allocator = rm.makeAllocator<umpire::strategy::QuickPool>(
544  name, rm.getAllocator(space));
545  owns_allocator = true;
546  }
547  else
548  {
549  allocator = rm.getAllocator(name);
550  owns_allocator = false;
551  }
552  }
553 };
554 
555 /// The Umpire host memory space
556 class UmpireHostMemorySpace : public HostMemorySpace, public UmpireMemorySpace
557 {
558 private:
559  umpire::strategy::AllocationStrategy *strat;
560 public:
561  UmpireHostMemorySpace(const char * name)
562  : HostMemorySpace(),
563  UmpireMemorySpace(name, "HOST"),
564  strat(allocator.getAllocationStrategy()) {}
565  void Alloc(void **ptr, size_t bytes) override
566  { *ptr = allocator.allocate(bytes); }
567  void Dealloc(void *ptr) override { allocator.deallocate(ptr); }
568  void Insert(void *ptr, size_t bytes)
569  { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
570 };
571 
572 /// The Umpire device memory space
573 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP)
574 class UmpireDeviceMemorySpace : public DeviceMemorySpace,
575  public UmpireMemorySpace
576 {
577 public:
578  UmpireDeviceMemorySpace(const char * name)
579  : DeviceMemorySpace(),
580  UmpireMemorySpace(name, "DEVICE") {}
581  void Alloc(Memory &base) override
582  { base.d_ptr = allocator.allocate(base.bytes); }
583  void Dealloc(Memory &base) override { rm.deallocate(base.d_ptr); }
584  void *HtoD(void *dst, const void *src, size_t bytes) override
585  {
586 #ifdef MFEM_USE_CUDA
587  return CuMemcpyHtoD(dst, src, bytes);
588 #endif
589 #ifdef MFEM_USE_HIP
590  return HipMemcpyHtoD(dst, src, bytes);
591 #endif
592  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
593  }
594  void *DtoD(void* dst, const void* src, size_t bytes) override
595  {
596 #ifdef MFEM_USE_CUDA
597  return CuMemcpyDtoD(dst, src, bytes);
598 #endif
599 #ifdef MFEM_USE_HIP
600  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
601  // instead we use hipMemcpyAsync to get similar behavior.
602  // for more info see: https://github.com/mfem/mfem/pull/2780
603  return HipMemcpyDtoDAsync(dst, src, bytes);
604 #endif
605  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
606  }
607  void *DtoH(void *dst, const void *src, size_t bytes) override
608  {
609 #ifdef MFEM_USE_CUDA
610  return CuMemcpyDtoH(dst, src, bytes);
611 #endif
612 #ifdef MFEM_USE_HIP
613  return HipMemcpyDtoH(dst, src, bytes);
614 #endif
615  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
616  }
617 };
618 #else
619 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace
620 {
621 public:
622  UmpireDeviceMemorySpace(const char * /*unused*/) {}
623 };
624 #endif // MFEM_USE_CUDA || MFEM_USE_HIP
625 #endif // MFEM_USE_UMPIRE
626 
627 /// Memory space controller class
628 class Ctrl
629 {
630  typedef MemoryType MT;
631 
632 public:
633  HostMemorySpace *host[HostMemoryTypeSize];
634  DeviceMemorySpace *device[DeviceMemoryTypeSize];
635 
636 public:
637  Ctrl(): host{nullptr}, device{nullptr} { }
638 
639  void Configure()
640  {
641  if (host[HostMemoryType])
642  {
643  mfem_error("Memory backends have already been configured!");
644  }
645 
646  // Filling the host memory backends
647  // HOST, HOST_32 & HOST_64 are always ready
648  // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
649  host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
650  host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
651  host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
652  // HOST_DEBUG is delayed, as it reroutes signals
653  host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
654  host[static_cast<int>(MT::HOST_UMPIRE)] = nullptr;
655  host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
656 
657  // Filling the device memory backends, shifting with the device size
658  constexpr int shift = DeviceMemoryType;
659  device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
660  // All other devices controllers are delayed
661  device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
662  device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
663  device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
664  device[static_cast<int>(MT::DEVICE_UMPIRE_2)-shift] = nullptr;
665  }
666 
667  HostMemorySpace* Host(const MemoryType mt)
668  {
669  const int mt_i = static_cast<int>(mt);
670  // Delayed host controllers initialization
671  if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
672  MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
673  return host[mt_i];
674  }
675 
676  DeviceMemorySpace* Device(const MemoryType mt)
677  {
678  const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
679  MFEM_ASSERT(mt_i >= 0,"");
680  // Lazy device controller initializations
681  if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
682  MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
683  return device[mt_i];
684  }
685 
686  ~Ctrl()
687  {
688  constexpr int mt_h = HostMemoryType;
689  constexpr int mt_d = DeviceMemoryType;
690  for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
691  for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
692  }
693 
694 private:
695  HostMemorySpace* NewHostCtrl(const MemoryType mt)
696  {
697  switch (mt)
698  {
699  case MT::HOST_DEBUG: return new MmuHostMemorySpace();
700 #ifdef MFEM_USE_UMPIRE
701  case MT::HOST_UMPIRE:
702  return new UmpireHostMemorySpace(
704 #else
705  case MT::HOST_UMPIRE: return new NoHostMemorySpace();
706 #endif
707  case MT::HOST_PINNED: return new HostPinnedMemorySpace();
708  default: MFEM_ABORT("Unknown host memory controller!");
709  }
710  return nullptr;
711  }
712 
713  DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
714  {
715  switch (mt)
716  {
717 #ifdef MFEM_USE_UMPIRE
718  case MT::DEVICE_UMPIRE:
719  return new UmpireDeviceMemorySpace(
721  case MT::DEVICE_UMPIRE_2:
722  return new UmpireDeviceMemorySpace(
724 #else
725  case MT::DEVICE_UMPIRE: return new NoDeviceMemorySpace();
726  case MT::DEVICE_UMPIRE_2: return new NoDeviceMemorySpace();
727 #endif
728  case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
729  case MT::DEVICE:
730  {
731 #if defined(MFEM_USE_CUDA)
732  return new CudaDeviceMemorySpace();
733 #elif defined(MFEM_USE_HIP)
734  return new HipDeviceMemorySpace();
735 #else
736  MFEM_ABORT("No device memory controller!");
737  break;
738 #endif
739  }
740  default: MFEM_ABORT("Unknown device memory controller!");
741  }
742  return nullptr;
743  }
744 };
745 
746 } // namespace mfem::internal
747 
748 static internal::Ctrl *ctrl;
749 
750 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
751  unsigned &flags)
752 {
753  MFEM_ASSERT(exists, "Internal error!");
754  if (IsHostMemory(mt))
755  {
756  MFEM_ASSERT(mt != MemoryType::HOST && h_tmp == nullptr,
757  "Internal error!");
758  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
759  // evaluated at the time when the device pointer is allocated, see
760  // GetDevicePtr() and GetAliasDevicePtr()
761  const MemoryType d_mt = MemoryType::DEFAULT;
762  // We rely on the next call using lazy dev alloc
763  return New_(h_tmp, bytes, mt, d_mt, Mem::VALID_HOST, flags);
764  }
765  else
766  {
767  const MemoryType h_mt = GetDualMemoryType(mt);
768  return New_(h_tmp, bytes, h_mt, mt, Mem::VALID_DEVICE, flags);
769  }
770 }
771 
772 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType h_mt,
773  MemoryType d_mt, unsigned valid_flags,
774  unsigned &flags)
775 {
776  MFEM_ASSERT(exists, "Internal error!");
777  MFEM_ASSERT(IsHostMemory(h_mt), "h_mt must be host type");
778  MFEM_ASSERT(IsDeviceMemory(d_mt) || d_mt == h_mt ||
779  d_mt == MemoryType::DEFAULT,
780  "d_mt must be device type, the same is h_mt, or DEFAULT");
781  MFEM_ASSERT((h_mt != MemoryType::HOST || h_tmp != nullptr) &&
782  (h_mt == MemoryType::HOST || h_tmp == nullptr),
783  "Internal error");
784  MFEM_ASSERT((valid_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) == 0,
785  "Internal error");
786  void *h_ptr;
787  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
788  else { h_ptr = h_tmp; }
790  Mem::OWNS_DEVICE | valid_flags;
791  // The other New_() method relies on this lazy allocation behavior.
792  mm.Insert(h_ptr, bytes, h_mt, d_mt); // lazy dev alloc
793  // mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); // non-lazy dev alloc
794 
795  // MFEM_VERIFY_TYPES(h_mt, mt); // done by mm.Insert() above
796  CheckHostMemoryType_(h_mt, h_ptr, false);
797 
798  return h_ptr;
799 }
800 
801 void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
802  MemoryType mt,
803  bool own, bool alias, unsigned &flags)
804 {
805  MFEM_CONTRACT_VAR(alias);
806  MFEM_ASSERT(exists, "Internal error!");
807  MFEM_VERIFY(!alias, "Cannot register an alias!");
808  const bool is_host_mem = IsHostMemory(mt);
809  const MemType h_mt = is_host_mem ? mt : GetDualMemoryType(mt);
810  const MemType d_mt = is_host_mem ? MemoryType::DEFAULT : mt;
811  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
812  // evaluated at the time when the device pointer is allocated, see
813  // GetDevicePtr() and GetAliasDevicePtr()
814 
815  MFEM_VERIFY_TYPES(h_mt, d_mt);
816 
817  if (ptr == nullptr && h_tmp == nullptr)
818  {
819  MFEM_VERIFY(bytes == 0, "internal error");
820  return nullptr;
821  }
822 
824  void *h_ptr;
825 
826  if (is_host_mem) // HOST TYPES + MANAGED
827  {
828  h_ptr = ptr;
829  mm.Insert(h_ptr, bytes, h_mt, d_mt);
830  flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
832  }
833  else // DEVICE TYPES
834  {
835  MFEM_VERIFY(ptr || bytes == 0,
836  "cannot register NULL device pointer with bytes = " << bytes);
837  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
838  else { h_ptr = h_tmp; }
839  mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
840  flags = own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE;
841  flags |= (Mem::OWNS_HOST | Mem::VALID_DEVICE);
842  }
843  CheckHostMemoryType_(h_mt, h_ptr, alias);
844  return h_ptr;
845 }
846 
847 void MemoryManager::Register2_(void *h_ptr, void *d_ptr, size_t bytes,
848  MemoryType h_mt, MemoryType d_mt,
849  bool own, bool alias, unsigned &flags)
850 {
851  MFEM_CONTRACT_VAR(alias);
852  MFEM_ASSERT(exists, "Internal error!");
853  MFEM_ASSERT(!alias, "Cannot register an alias!");
854  MFEM_VERIFY_TYPES(h_mt, d_mt);
855 
856  if (h_ptr == nullptr && d_ptr == nullptr)
857  {
858  MFEM_VERIFY(bytes == 0, "internal error");
859  return;
860  }
861 
863 
864  MFEM_VERIFY(d_ptr || bytes == 0,
865  "cannot register NULL device pointer with bytes = " << bytes);
866  mm.InsertDevice(d_ptr, h_ptr, bytes, h_mt, d_mt);
867  flags = (own ? flags | (Mem::OWNS_HOST | Mem::OWNS_DEVICE) :
868  flags & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE)) |
870 
871  CheckHostMemoryType_(h_mt, h_ptr, alias);
872 }
873 
874 void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
875  unsigned base_flags, unsigned &flags)
876 {
877  mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
878  base_flags & Mem::ALIAS);
879  flags = (base_flags | Mem::ALIAS | Mem::OWNS_INTERNAL) &
881 }
882 
883 void MemoryManager::SetDeviceMemoryType_(void *h_ptr, unsigned flags,
884  MemoryType d_mt)
885 {
886  MFEM_VERIFY(h_ptr, "cannot set the device memory type: Memory is empty!");
887  if (!(flags & Mem::ALIAS))
888  {
889  auto mem_iter = maps->memories.find(h_ptr);
890  MFEM_VERIFY(mem_iter != maps->memories.end(), "internal error");
891  internal::Memory &mem = mem_iter->second;
892  if (mem.d_mt == d_mt) { return; }
893  MFEM_VERIFY(mem.d_ptr == nullptr, "cannot set the device memory type:"
894  " device memory is allocated!");
895  mem.d_mt = d_mt;
896  }
897  else
898  {
899  auto alias_iter = maps->aliases.find(h_ptr);
900  MFEM_VERIFY(alias_iter != maps->aliases.end(), "internal error");
901  internal::Alias &alias = alias_iter->second;
902  internal::Memory &base_mem = *alias.mem;
903  if (base_mem.d_mt == d_mt) { return; }
904  MFEM_VERIFY(base_mem.d_ptr == nullptr,
905  "cannot set the device memory type:"
906  " alias' base device memory is allocated!");
907  base_mem.d_mt = d_mt;
908  }
909 }
910 
911 MemoryType MemoryManager::Delete_(void *h_ptr, MemoryType h_mt, unsigned flags)
912 {
913  const bool alias = flags & Mem::ALIAS;
914  const bool registered = flags & Mem::REGISTERED;
915  const bool owns_host = flags & Mem::OWNS_HOST;
916  const bool owns_device = flags & Mem::OWNS_DEVICE;
917  const bool owns_internal = flags & Mem::OWNS_INTERNAL;
918  MFEM_ASSERT(IsHostMemory(h_mt), "invalid h_mt = " << (int)h_mt);
919  // MFEM_ASSERT(registered || IsHostMemory(h_mt),"");
920  MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
921  // If at least one of the 'own_*' flags is true then 'registered' must be
922  // true too. An acceptable exception is the special case when 'h_ptr' is
923  // NULL, and both 'own_device' and 'own_internal' are false -- this case is
924  // an exception only when 'own_host' is true and 'registered' is false.
925  MFEM_ASSERT(registered || !(owns_host || owns_device || owns_internal) ||
926  (!(owns_device || owns_internal) && h_ptr == nullptr),
927  "invalid Memory state");
928  if (!mm.exists || !registered) { return h_mt; }
929  if (alias)
930  {
931  if (owns_internal)
932  {
933  MFEM_ASSERT(mm.IsAlias(h_ptr), "");
934  MFEM_ASSERT(h_mt == maps->aliases.at(h_ptr).h_mt, "");
935  mm.EraseAlias(h_ptr);
936  }
937  }
938  else // Known
939  {
940  if (owns_host && (h_mt != MemoryType::HOST))
941  { ctrl->Host(h_mt)->Dealloc(h_ptr); }
942  if (owns_internal)
943  {
944  MFEM_ASSERT(mm.IsKnown(h_ptr), "");
945  MFEM_ASSERT(h_mt == maps->memories.at(h_ptr).h_mt, "");
946  mm.Erase(h_ptr, owns_device);
947  }
948  }
949  return h_mt;
950 }
951 
952 void MemoryManager::DeleteDevice_(void *h_ptr, unsigned & flags)
953 {
954  const bool owns_device = flags & Mem::OWNS_DEVICE;
955  if (owns_device)
956  {
957  mm.EraseDevice(h_ptr);
958  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
959  }
960 }
961 
962 bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
963  MemoryType h_mt, size_t bytes,
964  unsigned flags)
965 {
966  if (!h_ptr)
967  {
968  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
969  return true;
970  }
971  MemoryType d_mt;
972  if (!(flags & Mem::ALIAS))
973  {
974  auto iter = maps->memories.find(h_ptr);
975  MFEM_VERIFY(iter != maps->memories.end(), "internal error");
976  d_mt = iter->second.d_mt;
977  }
978  else
979  {
980  auto iter = maps->aliases.find(h_ptr);
981  MFEM_VERIFY(iter != maps->aliases.end(), "internal error");
982  d_mt = iter->second.mem->d_mt;
983  }
984  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
985  switch (mc)
986  {
988  {
989  MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
990  h_mt == MemoryType::HOST_64,"");
991  return true;
992  }
994  {
995  MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
996  return true;
997  }
998  case MemoryClass::DEVICE:
999  {
1000  MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
1001  d_mt == MemoryType::DEVICE_DEBUG ||
1002  d_mt == MemoryType::DEVICE_UMPIRE ||
1003  d_mt == MemoryType::DEVICE_UMPIRE_2 ||
1004  d_mt == MemoryType::MANAGED,"");
1005  return true;
1006  }
1007  case MemoryClass::MANAGED:
1008  {
1009  MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
1010  d_mt == MemoryType::MANAGED),"");
1011  return true;
1012  }
1013  default: break;
1014  }
1015  return true;
1016 }
1017 
1018 void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1019  size_t bytes, unsigned &flags)
1020 {
1021  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1022  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
1023  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1025  {
1026  const bool copy = !(flags & Mem::VALID_HOST);
1027  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1028  if (flags & Mem::ALIAS)
1029  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1030  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1031  }
1032  else
1033  {
1034  const bool copy = !(flags & Mem::VALID_DEVICE);
1035  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1036  if (flags & Mem::ALIAS)
1037  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1038  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1039  }
1040 }
1041 
1042 const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1043  size_t bytes, unsigned &flags)
1044 {
1045  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1046  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
1047  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1049  {
1050  const bool copy = !(flags & Mem::VALID_HOST);
1051  flags |= Mem::VALID_HOST;
1052  if (flags & Mem::ALIAS)
1053  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1054  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1055  }
1056  else
1057  {
1058  const bool copy = !(flags & Mem::VALID_DEVICE);
1059  flags |= Mem::VALID_DEVICE;
1060  if (flags & Mem::ALIAS)
1061  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1062  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1063  }
1064 }
1065 
1066 void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1067  size_t bytes, unsigned &flags)
1068 {
1069  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1070  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
1071  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1073  {
1074  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1075  if (flags & Mem::ALIAS)
1076  { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
1077  else { return mm.GetHostPtr(h_ptr, bytes, false); }
1078  }
1079  else
1080  {
1081  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1082  if (flags & Mem::ALIAS)
1083  { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
1084  else { return mm.GetDevicePtr(h_ptr, bytes, false); }
1085  }
1086 }
1087 
1088 void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
1089  size_t alias_bytes, unsigned base_flags,
1090  unsigned &alias_flags)
1091 {
1092  // This is called only when (base_flags & Mem::REGISTERED) is true.
1093  // Note that (alias_flags & REGISTERED) may not be true.
1094  MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
1095  if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
1096  {
1097  mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
1098  }
1099  if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
1100  {
1101  if (!(alias_flags & Mem::REGISTERED))
1102  {
1103  mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
1104  alias_flags = (alias_flags | Mem::REGISTERED | Mem::OWNS_INTERNAL) &
1105  ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
1106  }
1107  mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
1108  }
1109  alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
1110  (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
1111 }
1112 
1113 MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr, bool alias)
1114 {
1115  if (mm.exists)
1116  {
1117  if (!alias)
1118  {
1119  auto iter = maps->memories.find(h_ptr);
1120  MFEM_ASSERT(iter != maps->memories.end(), "internal error");
1121  return iter->second.d_mt;
1122  }
1123  // alias == true
1124  auto iter = maps->aliases.find(h_ptr);
1125  MFEM_ASSERT(iter != maps->aliases.end(), "internal error");
1126  return iter->second.mem->d_mt;
1127  }
1128  MFEM_ABORT("internal error");
1129  return MemoryManager::host_mem_type;
1130 }
1131 
1132 MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
1133 {
1134  if (!mm.exists) { return MemoryManager::host_mem_type; }
1135  if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
1136  if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).h_mt; }
1137  return MemoryManager::host_mem_type;
1138 }
1139 
1140 void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
1141  size_t bytes, unsigned src_flags,
1142  unsigned &dst_flags)
1143 {
1144  // Type of copy to use based on the src and dest validity flags:
1145  // | src
1146  // | h | d | hd
1147  // -----------+-----+-----+------
1148  // h | h2h d2h h2h
1149  // dest d | h2d d2d d2d
1150  // hd | h2h d2d d2d
1151 
1152  const bool dst_on_host =
1153  (dst_flags & Mem::VALID_HOST) &&
1154  (!(dst_flags & Mem::VALID_DEVICE) ||
1155  ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
1156 
1157  dst_flags = dst_flags &
1158  ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1159 
1160  const bool src_on_host =
1161  (src_flags & Mem::VALID_HOST) &&
1162  (!(src_flags & Mem::VALID_DEVICE) ||
1163  ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
1164 
1165  const void *src_d_ptr =
1166  src_on_host ? NULL :
1167  ((src_flags & Mem::ALIAS) ?
1168  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1169  mm.GetDevicePtr(src_h_ptr, bytes, false));
1170 
1171  if (dst_on_host)
1172  {
1173  if (src_on_host)
1174  {
1175  if (dst_h_ptr != src_h_ptr && bytes != 0)
1176  {
1177  MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
1178  (const char*)src_h_ptr + bytes <= dst_h_ptr,
1179  "data overlaps!");
1180  std::memcpy(dst_h_ptr, src_h_ptr, bytes);
1181  }
1182  }
1183  else
1184  {
1185  if (dst_h_ptr != src_d_ptr && bytes != 0)
1186  {
1187  internal::Memory &src_d_base = maps->memories.at(src_h_ptr);
1188  MemoryType src_d_mt = src_d_base.d_mt;
1189  ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
1190  }
1191  }
1192  }
1193  else
1194  {
1195  void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
1196  mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
1197  mm.GetDevicePtr(dst_h_ptr, bytes, false);
1198  if (src_on_host)
1199  {
1200  const bool known = mm.IsKnown(dst_h_ptr);
1201  const bool alias = dst_flags & Mem::ALIAS;
1202  MFEM_VERIFY(alias||known,"");
1203  const MemoryType d_mt = known ?
1204  maps->memories.at(dst_h_ptr).d_mt :
1205  maps->aliases.at(dst_h_ptr).mem->d_mt;
1206  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1207  }
1208  else
1209  {
1210  if (dest_d_ptr != src_d_ptr && bytes != 0)
1211  {
1212  const bool known = mm.IsKnown(dst_h_ptr);
1213  const bool alias = dst_flags & Mem::ALIAS;
1214  MFEM_VERIFY(alias||known,"");
1215  const MemoryType d_mt = known ?
1216  maps->memories.at(dst_h_ptr).d_mt :
1217  maps->aliases.at(dst_h_ptr).mem->d_mt;
1218  ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1219  }
1220  }
1221  }
1222 }
1223 
1224 void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1225  size_t bytes, unsigned src_flags)
1226 {
1227  const bool src_on_host = src_flags & Mem::VALID_HOST;
1228  if (src_on_host)
1229  {
1230  if (dest_h_ptr != src_h_ptr && bytes != 0)
1231  {
1232  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1233  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1234  "data overlaps!");
1235  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1236  }
1237  }
1238  else
1239  {
1240  MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1241  const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1242  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1243  mm.GetDevicePtr(src_h_ptr, bytes, false);
1244  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1245  const MemoryType d_mt = base.d_mt;
1246  ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1247  }
1248 }
1249 
1250 void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1251  size_t bytes, unsigned &dest_flags)
1252 {
1253  const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1254  if (dest_on_host)
1255  {
1256  if (dest_h_ptr != src_h_ptr && bytes != 0)
1257  {
1258  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1259  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1260  "data overlaps!");
1261  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1262  }
1263  }
1264  else
1265  {
1266  void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1267  mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1268  mm.GetDevicePtr(dest_h_ptr, bytes, false);
1269  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1270  const MemoryType d_mt = base.d_mt;
1271  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1272  }
1273  dest_flags = dest_flags &
1274  ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1275 }
1276 
1277 bool MemoryManager::IsKnown_(const void *h_ptr)
1278 {
1279  return maps->memories.find(h_ptr) != maps->memories.end();
1280 }
1281 
1282 bool MemoryManager::IsAlias_(const void *h_ptr)
1283 {
1284  return maps->aliases.find(h_ptr) != maps->aliases.end();
1285 }
1286 
1287 void MemoryManager::Insert(void *h_ptr, size_t bytes,
1288  MemoryType h_mt, MemoryType d_mt)
1289 {
1290 #ifdef MFEM_TRACK_MEM_MANAGER
1291  mfem::out << "[mfem memory manager]: registering h_ptr: " << h_ptr
1292  << ", bytes: " << bytes << std::endl;
1293 #endif
1294  if (h_ptr == NULL)
1295  {
1296  MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1297  return;
1298  }
1299  MFEM_VERIFY_TYPES(h_mt, d_mt);
1300 #ifdef MFEM_DEBUG
1301  auto res =
1302 #endif
1303  maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1304 #ifdef MFEM_DEBUG
1305  if (res.second == false)
1306  {
1307  auto &m = res.first->second;
1308  MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt &&
1309  (m.d_mt == d_mt || (d_mt == MemoryType::DEFAULT &&
1310  m.d_mt == GetDualMemoryType(h_mt))),
1311  "Address already present with different attributes!");
1312 #ifdef MFEM_TRACK_MEM_MANAGER
1313  mfem::out << "[mfem memory manager]: repeated registration of h_ptr: "
1314  << h_ptr << std::endl;
1315 #endif
1316  }
1317 #endif
1318 }
1319 
1320 void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1321  MemoryType h_mt, MemoryType d_mt)
1322 {
1323  // MFEM_VERIFY_TYPES(h_mt, d_mt); // done by Insert() below
1324  MFEM_ASSERT(h_ptr != NULL, "internal error");
1325  Insert(h_ptr, bytes, h_mt, d_mt);
1326  internal::Memory &mem = maps->memories.at(h_ptr);
1327  if (d_ptr == NULL && bytes != 0) { ctrl->Device(d_mt)->Alloc(mem); }
1328  else { mem.d_ptr = d_ptr; }
1329 }
1330 
1331 void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1332  const size_t bytes, const bool base_is_alias)
1333 {
1334  size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1335  static_cast<const char*>(base_ptr));
1336 #ifdef MFEM_TRACK_MEM_MANAGER
1337  mfem::out << "[mfem memory manager]: registering alias of base_ptr: "
1338  << base_ptr << ", offset: " << offset << ", bytes: " << bytes
1339  << ", base is alias: " << base_is_alias << std::endl;
1340 #endif
1341  if (!base_ptr)
1342  {
1343  MFEM_VERIFY(offset == 0,
1344  "Trying to add alias to NULL at offset " << offset);
1345  return;
1346  }
1347  if (base_is_alias)
1348  {
1349  const internal::Alias &alias = maps->aliases.at(base_ptr);
1350  MFEM_ASSERT(alias.mem,"");
1351  base_ptr = alias.mem->h_ptr;
1352  offset += alias.offset;
1353 #ifdef MFEM_TRACK_MEM_MANAGER
1354  mfem::out << "[mfem memory manager]: real base_ptr: " << base_ptr
1355  << std::endl;
1356 #endif
1357  }
1358  internal::Memory &mem = maps->memories.at(base_ptr);
1359  MFEM_VERIFY(offset + bytes <= mem.bytes, "invalid alias");
1360  auto res =
1361  maps->aliases.emplace(alias_ptr,
1362  internal::Alias{&mem, offset, 1, mem.h_mt});
1363  if (res.second == false) // alias_ptr was already in the map
1364  {
1365  internal::Alias &alias = res.first->second;
1366  // Update the alias data in case the existing alias is dangling
1367  alias.mem = &mem;
1368  alias.offset = offset;
1369  alias.h_mt = mem.h_mt;
1370  alias.counter++;
1371  }
1372 }
1373 
1374 void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1375 {
1376 #ifdef MFEM_TRACK_MEM_MANAGER
1377  mfem::out << "[mfem memory manager]: un-registering h_ptr: " << h_ptr
1378  << std::endl;
1379 #endif
1380  if (!h_ptr) { return; }
1381  auto mem_map_iter = maps->memories.find(h_ptr);
1382  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1383  internal::Memory &mem = mem_map_iter->second;
1384  if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1385  maps->memories.erase(mem_map_iter);
1386 }
1387 
1388 void MemoryManager::EraseDevice(void *h_ptr)
1389 {
1390  if (!h_ptr) { return; }
1391  auto mem_map_iter = maps->memories.find(h_ptr);
1392  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1393  internal::Memory &mem = mem_map_iter->second;
1394  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1395  mem.d_ptr = nullptr;
1396 }
1397 
1398 void MemoryManager::EraseAlias(void *alias_ptr)
1399 {
1400 #ifdef MFEM_TRACK_MEM_MANAGER
1401  mfem::out << "[mfem memory manager]: un-registering alias_ptr: " << alias_ptr
1402  << std::endl;
1403 #endif
1404  if (!alias_ptr) { return; }
1405  auto alias_map_iter = maps->aliases.find(alias_ptr);
1406  if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1407  internal::Alias &alias = alias_map_iter->second;
1408  if (--alias.counter) { return; }
1409  maps->aliases.erase(alias_map_iter);
1410 }
1411 
1412 void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1413  bool copy_data)
1414 {
1415  if (!h_ptr)
1416  {
1417  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1418  return NULL;
1419  }
1420  internal::Memory &mem = maps->memories.at(h_ptr);
1421  const MemoryType &h_mt = mem.h_mt;
1422  MemoryType &d_mt = mem.d_mt;
1423  MFEM_VERIFY_TYPES(h_mt, d_mt);
1424  if (!mem.d_ptr)
1425  {
1426  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1427  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1428  }
1429  // Aliases might have done some protections
1430  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1431  if (copy_data)
1432  {
1433  MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1434  if (bytes) { ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes); }
1435  }
1436  ctrl->Host(h_mt)->Protect(mem, bytes);
1437  return mem.d_ptr;
1438 }
1439 
1440 void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1441  bool copy)
1442 {
1443  if (!alias_ptr)
1444  {
1445  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1446  return NULL;
1447  }
1448  auto &alias_map = maps->aliases;
1449  auto alias_map_iter = alias_map.find(alias_ptr);
1450  if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1451  const internal::Alias &alias = alias_map_iter->second;
1452  const size_t offset = alias.offset;
1453  internal::Memory &mem = *alias.mem;
1454  const MemoryType &h_mt = mem.h_mt;
1455  MemoryType &d_mt = mem.d_mt;
1456  MFEM_VERIFY_TYPES(h_mt, d_mt);
1457  if (!mem.d_ptr)
1458  {
1459  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1460  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1461  }
1462  void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1463  void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1464  MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1465  MFEM_ASSERT(offset + bytes <= mem.bytes, "internal error");
1466  mem.d_rw = mem.h_rw = false;
1467  if (mem.d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1468  ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1469  if (copy && mem.d_ptr)
1470  { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1471  ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1472  return alias_d_ptr;
1473 }
1474 
1475 void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1476 {
1477  const internal::Memory &mem = maps->memories.at(ptr);
1478  MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1479  MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1480  const MemoryType &h_mt = mem.h_mt;
1481  const MemoryType &d_mt = mem.d_mt;
1482  MFEM_VERIFY_TYPES(h_mt, d_mt);
1483  // Aliases might have done some protections
1484  ctrl->Host(h_mt)->Unprotect(mem, bytes);
1485  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1486  if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1487  if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1488  return mem.h_ptr;
1489 }
1490 
1491 void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1492  bool copy_data)
1493 {
1494  const internal::Alias &alias = maps->aliases.at(ptr);
1495  const internal::Memory *const mem = alias.mem;
1496  const MemoryType &h_mt = mem->h_mt;
1497  const MemoryType &d_mt = mem->d_mt;
1498  MFEM_VERIFY_TYPES(h_mt, d_mt);
1499  void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1500  void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1501  MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1502  mem->h_rw = false;
1503  ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1504  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1505  if (copy_data && mem->d_ptr)
1506  { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1507  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1508  return alias_h_ptr;
1509 }
1510 
1512 {
1513  if (exists) { return; }
1514  maps = new internal::Maps();
1515  ctrl = new internal::Ctrl();
1516  ctrl->Configure();
1517  exists = true;
1518 }
1519 
1521 
1522 MemoryManager::~MemoryManager() { if (exists) { Destroy(); } }
1523 
1525 {
1526  MFEM_VERIFY(!configured, "changing the dual MemoryTypes is not allowed after"
1527  " MemoryManager configuration!");
1528  UpdateDualMemoryType(mt, dual_mt);
1529 }
1530 
1531 void MemoryManager::UpdateDualMemoryType(MemoryType mt, MemoryType dual_mt)
1532 {
1533  MFEM_VERIFY((int)mt < MemoryTypeSize,
1534  "invalid MemoryType, mt = " << (int)mt);
1535  MFEM_VERIFY((int)dual_mt < MemoryTypeSize,
1536  "invalid dual MemoryType, dual_mt = " << (int)dual_mt);
1537 
1538  if ((IsHostMemory(mt) && IsDeviceMemory(dual_mt)) ||
1539  (IsDeviceMemory(mt) && IsHostMemory(dual_mt)))
1540  {
1541  dual_map[(int)mt] = dual_mt;
1542  }
1543  else
1544  {
1545  // mt + dual_mt is not a pair of host + device types: this is only allowed
1546  // when mt == dual_mt and mt is a host type; in this case we do not
1547  // actually update the dual
1548  MFEM_VERIFY(mt == dual_mt && IsHostMemory(mt),
1549  "invalid (mt, dual_mt) pair: ("
1550  << MemoryTypeName[(int)mt] << ", "
1551  << MemoryTypeName[(int)dual_mt] << ')');
1552  }
1553 }
1554 
1556  const MemoryType device_mt)
1557 {
1558  MemoryManager::UpdateDualMemoryType(host_mt, device_mt);
1559  MemoryManager::UpdateDualMemoryType(device_mt, host_mt);
1560  if (device_mt == MemoryType::DEVICE_DEBUG)
1561  {
1562  for (int mt = (int)MemoryType::HOST; mt < (int)MemoryType::MANAGED; mt++)
1563  {
1564  MemoryManager::UpdateDualMemoryType(
1566  }
1567  }
1568  Init();
1569  host_mem_type = host_mt;
1570  device_mem_type = device_mt;
1571  configured = true;
1572 }
1573 
1575 {
1576  MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1577 #ifdef MFEM_TRACK_MEM_MANAGER
1578  size_t num_memories = maps->memories.size();
1579  size_t num_aliases = maps->aliases.size();
1580  if (num_memories != 0 || num_aliases != 0)
1581  {
1582  MFEM_WARNING("...\n\t number of registered pointers: " << num_memories
1583  << "\n\t number of registered aliases : " << num_aliases);
1584  }
1585 #endif
1586  // Keep for debugging purposes:
1587 #if 0
1588  mfem::out << "Destroying the MemoryManager ...\n"
1589  << "remaining registered pointers : "
1590  << maps->memories.size() << '\n'
1591  << "remaining registered aliases : "
1592  << maps->aliases.size() << '\n';
1593 #endif
1594  for (auto& n : maps->memories)
1595  {
1596  internal::Memory &mem = n.second;
1597  bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1598  if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1599  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1600  }
1601  delete maps; maps = nullptr;
1602  delete ctrl; ctrl = nullptr;
1603  host_mem_type = MemoryType::HOST;
1604  device_mem_type = MemoryType::HOST;
1605  exists = false;
1606  configured = false;
1607 }
1608 
1610 {
1611  if (ptr != NULL)
1612  {
1613  if (!IsKnown(ptr))
1614  {
1615  mfem_error("Pointer is not registered!");
1616  }
1617  }
1618 }
1619 
1620 int MemoryManager::PrintPtrs(std::ostream &os)
1621 {
1622  int n_out = 0;
1623  for (const auto& n : maps->memories)
1624  {
1625  const internal::Memory &mem = n.second;
1626  os << "\nkey " << n.first << ", "
1627  << "h_ptr " << mem.h_ptr << ", "
1628  << "d_ptr " << mem.d_ptr;
1629  n_out++;
1630  }
1631  if (maps->memories.size() > 0) { os << std::endl; }
1632  return n_out;
1633 }
1634 
1635 int MemoryManager::PrintAliases(std::ostream &os)
1636 {
1637  int n_out = 0;
1638  for (const auto& n : maps->aliases)
1639  {
1640  const internal::Alias &alias = n.second;
1641  os << "\nalias: key " << n.first << ", "
1642  << "h_ptr " << alias.mem->h_ptr << ", "
1643  << "offset " << alias.offset << ", "
1644  << "counter " << alias.counter;
1645  n_out++;
1646  }
1647  if (maps->aliases.size() > 0) { os << std::endl; }
1648  return n_out;
1649 }
1650 
1651 int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1652  unsigned flags)
1653 {
1654  void *d_ptr = (flags & Mem::ALIAS) ?
1655  mm.GetAliasDevicePtr(h_ptr, size, false) :
1656  mm.GetDevicePtr(h_ptr, size, false);
1657  char *h_buf = new char[size];
1658 #if defined(MFEM_USE_CUDA)
1659  CuMemcpyDtoH(h_buf, d_ptr, size);
1660 #elif defined(MFEM_USE_HIP)
1661  HipMemcpyDtoH(h_buf, d_ptr, size);
1662 #else
1663  std::memcpy(h_buf, d_ptr, size);
1664 #endif
1665  int res = std::memcmp(h_ptr, h_buf, size);
1666  delete [] h_buf;
1667  return res;
1668 }
1669 
1670 
1671 void MemoryPrintFlags(unsigned flags)
1672 {
1673  typedef Memory<int> Mem;
1674  mfem::out
1675  << "\n registered = " << bool(flags & Mem::REGISTERED)
1676  << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1677  << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1678  << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1679  << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1680  << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1681  << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1682  << "\n alias = " << bool(flags & Mem::ALIAS)
1683  << std::endl;
1684 }
1685 
1686 void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr,
1687  bool alias)
1688 {
1689  if (!mm.exists) {return;}
1690  if (!alias)
1691  {
1692  auto it = maps->memories.find(h_ptr);
1693  MFEM_VERIFY(it != maps->memories.end(),
1694  "host pointer is not registered: h_ptr = " << h_ptr);
1695  MFEM_VERIFY(h_mt == it->second.h_mt, "host pointer MemoryType mismatch");
1696  }
1697  else
1698  {
1699  auto it = maps->aliases.find(h_ptr);
1700  MFEM_VERIFY(it != maps->aliases.end(),
1701  "alias pointer is not registered: h_ptr = " << h_ptr);
1702  MFEM_VERIFY(h_mt == it->second.h_mt, "alias pointer MemoryType mismatch");
1703  }
1704 }
1705 
1707 
1708 bool MemoryManager::exists = false;
1709 bool MemoryManager::configured = false;
1710 
1711 MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1712 MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1713 
1714 MemoryType MemoryManager::dual_map[MemoryTypeSize] =
1715 {
1716  /* HOST */ MemoryType::DEVICE,
1717  /* HOST_32 */ MemoryType::DEVICE,
1718  /* HOST_64 */ MemoryType::DEVICE,
1719  /* HOST_DEBUG */ MemoryType::DEVICE_DEBUG,
1720  /* HOST_UMPIRE */ MemoryType::DEVICE_UMPIRE,
1721  /* HOST_PINNED */ MemoryType::DEVICE,
1722  /* MANAGED */ MemoryType::MANAGED,
1723  /* DEVICE */ MemoryType::HOST,
1724  /* DEVICE_DEBUG */ MemoryType::HOST_DEBUG,
1725  /* DEVICE_UMPIRE */ MemoryType::HOST_UMPIRE,
1726  /* DEVICE_UMPIRE_2 */ MemoryType::HOST_UMPIRE
1727 };
1728 
1729 #ifdef MFEM_USE_UMPIRE
1730 const char * MemoryManager::h_umpire_name = "MFEM_HOST";
1731 const char * MemoryManager::d_umpire_name = "MFEM_DEVICE";
1732 const char * MemoryManager::d_umpire_2_name = "MFEM_DEVICE_2";
1733 #endif
1734 
1735 
1737 {
1738  "host-std", "host-32", "host-64", "host-debug", "host-umpire", "host-pinned",
1739 #if defined(MFEM_USE_CUDA)
1740  "cuda-uvm",
1741  "cuda",
1742 #elif defined(MFEM_USE_HIP)
1743  "hip-uvm",
1744  "hip",
1745 #else
1746  "managed",
1747  "device",
1748 #endif
1749  "device-debug",
1750 #if defined(MFEM_USE_CUDA)
1751  "cuda-umpire",
1752  "cuda-umpire-2",
1753 #elif defined(MFEM_USE_HIP)
1754  "hip-umpire",
1755  "hip-umpire-2",
1756 #else
1757  "device-umpire",
1758  "device-umpire-2",
1759 #endif
1760 };
1761 
1762 } // namespace mfem
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition: cuda.cpp:109
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition: cuda.cpp:79
Host memory; aligned at 64 bytes.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
Definition: mem_manager.hpp:85
Device memory; using CUDA or HIP *Malloc and *Free.
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: cuda.cpp:94
void PrintFlags() const
Print the internal flags.
static const char * GetUmpireHostAllocatorName()
Get the host Umpire allocator name used with MemoryType::HOST_UMPIRE.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
static MemoryType GetHostMemoryType()
Host pointer is valid.
Host memory; allocated from a &quot;host-debug&quot; pool.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
Host memory: pinned (page-locked)
int PrintAliases(std::ostream &out=mfem::out)
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
bool MemoryClassContainsType(MemoryClass mc, MemoryType mt)
Return true iff the MemoryType mt is contained in the MemoryClass mc.
Definition: mem_manager.cpp:70
void * HipMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: hip.cpp:147
void * HipMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: hip.cpp:94
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition: cuda.cpp:49
Host memory; aligned at 32 bytes.
static MemoryType GetDualMemoryType(MemoryType mt)
Return the dual MemoryType of the given one, mt.
constexpr int DeviceMemoryType
Definition: mem_manager.hpp:63
static const char * GetUmpireDeviceAllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE.
void * HipMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: hip.cpp:64
constexpr int HostMemoryType
Definition: mem_manager.hpp:61
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:154
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
void * HipMemFree(void *dptr)
Frees device memory.
Definition: hip.cpp:79
Ownership flag for internal Memory data.
Device pointer is valid
void Destroy()
Free all the device memories.
int PrintPtrs(std::ostream &out=mfem::out)
The host pointer will be deleted by Delete()
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:132
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
constexpr int MemoryTypeSize
Static casts to &#39;int&#39; and sizes of some useful memory types.
Definition: mem_manager.hpp:60
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: hip.cpp:155
static const char * GetUmpireDevice2AllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE_2.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: hip.cpp:34
static void SetDualMemoryType(MemoryType mt, MemoryType dual_mt)
Set the dual memory type of mt to be dual_mt.
void Init()
Initialize the memory manager.
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
string space
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
constexpr int HostMemoryTypeSize
Definition: mem_manager.hpp:62
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
Definition: mem_manager.hpp:88
Pointer is an alias.
MemoryManager mm
The (single) global memory manager object.
double a
Definition: lissajous.cpp:41
Host memory; using new[] and delete[].
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: hip.cpp:109
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: cuda.cpp:64
constexpr int DeviceMemoryTypeSize
Definition: mem_manager.hpp:64
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition: cuda.cpp:34
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:73
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:155
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger. See also Memory&lt;T&gt;::PrintFlags().