MFEM  v4.4.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
mem_manager.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "mem_manager.hpp"
14 
15 #include <list>
16 #include <cstring> // std::memcpy, std::memcmp
17 #include <unordered_map>
18 #include <algorithm> // std::max
19 
20 // Uncomment to try _WIN32 platform
21 //#define _WIN32
22 //#define _aligned_malloc(s,a) malloc(s)
23 
24 #ifndef _WIN32
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/mman.h>
28 #define mfem_memalign(p,a,s) posix_memalign(p,a,s)
29 #define mfem_aligned_free free
30 #else
31 #define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
32 #define mfem_aligned_free _aligned_free
33 #endif
34 
35 #ifdef MFEM_USE_UMPIRE
36 #include <umpire/Umpire.hpp>
37 #include <umpire/strategy/QuickPool.hpp>
38 
39 // Make sure Umpire is build with CUDA support if MFEM is built with it.
40 #if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
41 #error "CUDA is not enabled in Umpire!"
42 #endif
43 // Make sure Umpire is build with HIP support if MFEM is built with it.
44 #if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
45 #error "HIP is not enabled in Umpire!"
46 #endif
47 #endif // MFEM_USE_UMPIRE
48 
49 // Internal debug option, useful for tracking some memory manager operations.
50 // #define MFEM_TRACK_MEM_MANAGER
51 
52 namespace mfem
53 {
54 
56 {
57  switch (mc)
58  {
59  case MemoryClass::HOST: return mm.GetHostMemoryType();
64  }
65  MFEM_VERIFY(false,"");
66  return MemoryType::HOST;
67 }
68 
69 
71 {
72  switch (mc)
73  {
74  case MemoryClass::HOST: return IsHostMemory(mt);
76  return (mt == MemoryType::HOST_32 ||
77  mt == MemoryType::HOST_64 ||
80  return (mt == MemoryType::HOST_64 ||
82  case MemoryClass::DEVICE: return IsDeviceMemory(mt);
84  return (mt == MemoryType::MANAGED);
85  }
86  MFEM_ABORT("invalid MemoryClass");
87  return false;
88 }
89 
90 
91 static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
92 {
93  MFEM_VERIFY(IsHostMemory(h_mt), "h_mt = " << (int)h_mt);
94  MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == MemoryType::DEFAULT,
95  "d_mt = " << (int)d_mt);
96  // If h_mt == MemoryType::HOST_DEBUG, then d_mt == MemoryType::DEVICE_DEBUG
97  // or d_mt == MemoryType::DEFAULT
98  MFEM_VERIFY(h_mt != MemoryType::HOST_DEBUG ||
99  d_mt == MemoryType::DEVICE_DEBUG ||
100  d_mt == MemoryType::DEFAULT,
101  "d_mt = " << MemoryTypeName[(int)d_mt]);
102  // If d_mt == MemoryType::DEVICE_DEBUG, then h_mt != MemoryType::MANAGED
103  MFEM_VERIFY(d_mt != MemoryType::DEVICE_DEBUG ||
104  h_mt != MemoryType::MANAGED,
105  "h_mt = " << MemoryTypeName[(int)h_mt]);
106 #if 0
107  const bool sync =
108  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE) ||
109  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE_UMPIRE) ||
111  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE) ||
112  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE_UMPIRE) ||
114  (h_mt == MemoryType::HOST_DEBUG && d_mt == MemoryType::DEVICE_DEBUG) ||
115  (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
116  (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
117  (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
118  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE) ||
119  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE) ||
120  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE_2);
121  MFEM_VERIFY(sync, "");
122 #endif
123 }
124 
126 {
127  // | HOST HOST_32 HOST_64 DEVICE MANAGED
128  // ---------+---------------------------------------------
129  // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
130  // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
131  // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
132  // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
133  // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
134 
135  // Using the enumeration ordering:
136  // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
137  // the above table is simply: a*b = max(a,b).
138 
139  return std::max(mc1, mc2);
140 }
141 
142 
143 // Instantiate Memory<T>::PrintFlags for T = int and T = double.
144 template void Memory<int>::PrintFlags() const;
145 template void Memory<double>::PrintFlags() const;
146 
147 // Instantiate Memory<T>::CompareHostAndDevice for T = int and T = double.
148 template int Memory<int>::CompareHostAndDevice(int size) const;
149 template int Memory<double>::CompareHostAndDevice(int size) const;
150 
151 
152 namespace internal
153 {
154 
155 /// Memory class that holds:
156 /// - the host and the device pointer
157 /// - the size in bytes of this memory region
158 /// - the host and device type of this memory region
159 struct Memory
160 {
161  void *const h_ptr;
162  void *d_ptr;
163  const size_t bytes;
164  const MemoryType h_mt;
165  MemoryType d_mt;
166  mutable bool h_rw, d_rw;
167  Memory(void *p, size_t b, MemoryType h, MemoryType d):
168  h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
169  h_rw(true), d_rw(true) { }
170 };
171 
172 /// Alias class that holds the base memory region and the offset
173 struct Alias
174 {
175  Memory *mem;
176  size_t offset;
177  size_t counter;
178  // 'h_mt' is already stored in 'mem', however, we use this field for type
179  // checking since the alias may be dangling, i.e. 'mem' may be invalid.
180  MemoryType h_mt;
181 };
182 
183 /// Maps for the Memory and the Alias classes
184 typedef std::unordered_map<const void*, Memory> MemoryMap;
185 typedef std::unordered_map<const void*, Alias> AliasMap;
186 
187 struct Maps
188 {
189  MemoryMap memories;
190  AliasMap aliases;
191 };
192 
193 } // namespace mfem::internal
194 
195 static internal::Maps *maps;
196 
197 namespace internal
198 {
199 
200 /// The host memory space base abstract class
201 class HostMemorySpace
202 {
203 public:
204  virtual ~HostMemorySpace() { }
205  virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
206  virtual void Dealloc(void *ptr) { std::free(ptr); }
207  virtual void Protect(const Memory&, size_t) { }
208  virtual void Unprotect(const Memory&, size_t) { }
209  virtual void AliasProtect(const void*, size_t) { }
210  virtual void AliasUnprotect(const void*, size_t) { }
211 };
212 
213 /// The device memory space base abstract class
214 class DeviceMemorySpace
215 {
216 public:
217  virtual ~DeviceMemorySpace() { }
218  virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
219  virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
220  virtual void Protect(const Memory&) { }
221  virtual void Unprotect(const Memory&) { }
222  virtual void AliasProtect(const void*, size_t) { }
223  virtual void AliasUnprotect(const void*, size_t) { }
224  virtual void *HtoD(void *dst, const void *src, size_t bytes)
225  { return std::memcpy(dst, src, bytes); }
226  virtual void *DtoD(void *dst, const void *src, size_t bytes)
227  { return std::memcpy(dst, src, bytes); }
228  virtual void *DtoH(void *dst, const void *src, size_t bytes)
229  { return std::memcpy(dst, src, bytes); }
230 };
231 
232 /// The default std:: host memory space
233 class StdHostMemorySpace : public HostMemorySpace { };
234 
235 /// The No host memory space
236 struct NoHostMemorySpace : public HostMemorySpace
237 {
238  void Alloc(void**, const size_t) { mfem_error("! Host Alloc error"); }
239 };
240 
241 /// The aligned 32 host memory space
242 class Aligned32HostMemorySpace : public HostMemorySpace
243 {
244 public:
245  Aligned32HostMemorySpace(): HostMemorySpace() { }
246  void Alloc(void **ptr, size_t bytes)
247  { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
248  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
249 };
250 
251 /// The aligned 64 host memory space
252 class Aligned64HostMemorySpace : public HostMemorySpace
253 {
254 public:
255  Aligned64HostMemorySpace(): HostMemorySpace() { }
256  void Alloc(void **ptr, size_t bytes)
257  { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
258  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
259 };
260 
261 #ifndef _WIN32
262 static uintptr_t pagesize = 0;
263 static uintptr_t pagemask = 0;
264 
265 /// Returns the restricted base address of the DEBUG segment
266 inline const void *MmuAddrR(const void *ptr)
267 {
268  const uintptr_t addr = (uintptr_t) ptr;
269  return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
270 }
271 
272 /// Returns the prolongated base address of the MMU segment
273 inline const void *MmuAddrP(const void *ptr)
274 {
275  const uintptr_t addr = (uintptr_t) ptr;
276  return (void*) (addr & ~pagemask);
277 }
278 
279 /// Compute the restricted length for the MMU segment
280 inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
281 {
282  // a ---->A:| |:B<---- b
283  const uintptr_t a = (uintptr_t) ptr;
284  const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
285  MFEM_ASSERT(a <= A, "");
286  const uintptr_t b = a + bytes;
287  const uintptr_t B = b & ~pagemask;
288  MFEM_ASSERT(B <= b, "");
289  const uintptr_t length = B > A ? B - A : 0;
290  MFEM_ASSERT(length % pagesize == 0,"");
291  return length;
292 }
293 
294 /// Compute the prolongated length for the MMU segment
295 inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
296 {
297  // |:A<----a | | b---->B:|
298  const uintptr_t a = (uintptr_t) ptr;
299  const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
300  MFEM_ASSERT(A <= a, "");
301  const uintptr_t b = a + bytes;
302  const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
303  MFEM_ASSERT(b <= B, "");
304  MFEM_ASSERT(B >= A,"");
305  const uintptr_t length = B - A;
306  MFEM_ASSERT(length % pagesize == 0,"");
307  return length;
308 }
309 
310 /// The protected access error, used for the host
311 static void MmuError(int, siginfo_t *si, void*)
312 {
313  fflush(0);
314  char str[64];
315  const void *ptr = si->si_addr;
316  sprintf(str, "Error while accessing address %p!", ptr);
317  mfem::out << std::endl << "An illegal memory access was made!";
318  MFEM_ABORT(str);
319 }
320 
321 /// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
322 static void MmuInit()
323 {
324  if (pagesize > 0) { return; }
325  struct sigaction sa;
326  sa.sa_flags = SA_SIGINFO;
327  sigemptyset(&sa.sa_mask);
328  sa.sa_sigaction = MmuError;
329  if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
330  if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
331  pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
332  MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
333  pagemask = pagesize - 1;
334 }
335 
336 /// MMU allocation, through ::mmap
337 inline void MmuAlloc(void **ptr, const size_t bytes)
338 {
339  const size_t length = bytes == 0 ? 8 : bytes;
340  const int prot = PROT_READ | PROT_WRITE;
341  const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
342  *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
343  if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
344 }
345 
346 /// MMU deallocation, through ::munmap
347 inline void MmuDealloc(void *ptr, const size_t bytes)
348 {
349  const size_t length = bytes == 0 ? 8 : bytes;
350  if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
351 }
352 
353 /// MMU protection, through ::mprotect with no read/write accesses
354 inline void MmuProtect(const void *ptr, const size_t bytes)
355 {
356  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
357  if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
358  if (mmu_protect_error) { mfem_error("MMU protection (NONE) error"); }
359 }
360 
361 /// MMU un-protection, through ::mprotect with read/write accesses
362 inline void MmuAllow(const void *ptr, const size_t bytes)
363 {
364  const int RW = PROT_READ | PROT_WRITE;
365  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
366  if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
367  if (mmu_protect_error) { mfem_error("MMU protection (R/W) error"); }
368 }
369 #else
370 inline void MmuInit() { }
371 inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
372 inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
373 inline void MmuProtect(const void*, const size_t) { }
374 inline void MmuAllow(const void*, const size_t) { }
375 inline const void *MmuAddrR(const void *a) { return a; }
376 inline const void *MmuAddrP(const void *a) { return a; }
377 inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
378 inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
379 #endif
380 
381 /// The MMU host memory space
382 class MmuHostMemorySpace : public HostMemorySpace
383 {
384 public:
385  MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
386  void Alloc(void **ptr, size_t bytes) { MmuAlloc(ptr, bytes); }
387  void Dealloc(void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
388  void Protect(const Memory& mem, size_t bytes)
389  { if (mem.h_rw) { mem.h_rw = false; MmuProtect(mem.h_ptr, bytes); } }
390  void Unprotect(const Memory &mem, size_t bytes)
391  { if (!mem.h_rw) { mem.h_rw = true; MmuAllow(mem.h_ptr, bytes); } }
392  /// Aliases need to be restricted during protection
393  void AliasProtect(const void *ptr, size_t bytes)
394  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
395  /// Aliases need to be prolongated for un-protection
396  void AliasUnprotect(const void *ptr, size_t bytes)
397  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
398 };
399 
400 /// The UVM host memory space
401 class UvmHostMemorySpace : public HostMemorySpace
402 {
403 public:
404  UvmHostMemorySpace(): HostMemorySpace() { }
405  void Alloc(void **ptr, size_t bytes) { CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
406  void Dealloc(void *ptr) { CuMemFree(ptr); }
407 };
408 
409 /// The 'No' device memory space
410 class NoDeviceMemorySpace: public DeviceMemorySpace
411 {
412 public:
413  void Alloc(internal::Memory&) { mfem_error("! Device Alloc"); }
414  void Dealloc(Memory&) { mfem_error("! Device Dealloc"); }
415  void *HtoD(void*, const void*, size_t) { mfem_error("!HtoD"); return nullptr; }
416  void *DtoD(void*, const void*, size_t) { mfem_error("!DtoD"); return nullptr; }
417  void *DtoH(void*, const void*, size_t) { mfem_error("!DtoH"); return nullptr; }
418 };
419 
420 /// The std:: device memory space, used with the 'debug' device
421 class StdDeviceMemorySpace : public DeviceMemorySpace { };
422 
423 /// The CUDA device memory space
424 class CudaDeviceMemorySpace: public DeviceMemorySpace
425 {
426 public:
427  CudaDeviceMemorySpace(): DeviceMemorySpace() { }
428  void Alloc(Memory &base) { CuMemAlloc(&base.d_ptr, base.bytes); }
429  void Dealloc(Memory &base) { CuMemFree(base.d_ptr); }
430  void *HtoD(void *dst, const void *src, size_t bytes)
431  { return CuMemcpyHtoD(dst, src, bytes); }
432  void *DtoD(void* dst, const void* src, size_t bytes)
433  { return CuMemcpyDtoD(dst, src, bytes); }
434  void *DtoH(void *dst, const void *src, size_t bytes)
435  { return CuMemcpyDtoH(dst, src, bytes); }
436 };
437 
438 /// The CUDA/HIP page-locked host memory space
439 class HostPinnedMemorySpace: public HostMemorySpace
440 {
441 public:
442  HostPinnedMemorySpace(): HostMemorySpace() { }
443  void Alloc(void ** ptr, size_t bytes) override
444  {
445 #ifdef MFEM_USE_CUDA
446  CuMemAllocHostPinned(ptr, bytes);
447 #endif
448 #ifdef MFEM_USE_HIP
449  HipMemAllocHostPinned(ptr, bytes);
450 #endif
451  }
452  void Dealloc(void *ptr) override
453  {
454 #ifdef MFEM_USE_CUDA
455  CuMemFreeHostPinned(ptr);
456 #endif
457 #ifdef MFEM_USE_HIP
459 #endif
460  }
461 };
462 
463 /// The HIP device memory space
464 class HipDeviceMemorySpace: public DeviceMemorySpace
465 {
466 public:
467  HipDeviceMemorySpace(): DeviceMemorySpace() { }
468  void Alloc(Memory &base) { HipMemAlloc(&base.d_ptr, base.bytes); }
469  void Dealloc(Memory &base) { HipMemFree(base.d_ptr); }
470  void *HtoD(void *dst, const void *src, size_t bytes)
471  { return HipMemcpyHtoD(dst, src, bytes); }
472  void *DtoD(void* dst, const void* src, size_t bytes)
473  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
474  // instead we use hipMemcpyAsync to get similar behavior.
475  // for more info see: https://github.com/mfem/mfem/pull/2780
476  { return HipMemcpyDtoDAsync(dst, src, bytes); }
477  void *DtoH(void *dst, const void *src, size_t bytes)
478  { return HipMemcpyDtoH(dst, src, bytes); }
479 };
480 
481 /// The UVM device memory space.
482 class UvmCudaMemorySpace : public DeviceMemorySpace
483 {
484 public:
485  void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
486  void Dealloc(Memory&) { }
487  void *HtoD(void *dst, const void *src, size_t bytes)
488  {
489  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
490  return CuMemcpyHtoD(dst, src, bytes);
491  }
492  void *DtoD(void* dst, const void* src, size_t bytes)
493  { return CuMemcpyDtoD(dst, src, bytes); }
494  void *DtoH(void *dst, const void *src, size_t bytes)
495  {
496  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
497  return CuMemcpyDtoH(dst, src, bytes);
498  }
499 };
500 
501 /// The MMU device memory space
502 class MmuDeviceMemorySpace : public DeviceMemorySpace
503 {
504 public:
505  MmuDeviceMemorySpace(): DeviceMemorySpace() { }
506  void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
507  void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
508  void Protect(const Memory &m)
509  { if (m.d_rw) { m.d_rw = false; MmuProtect(m.d_ptr, m.bytes); } }
510  void Unprotect(const Memory &m)
511  { if (!m.d_rw) { m.d_rw = true; MmuAllow(m.d_ptr, m.bytes); } }
512  /// Aliases need to be restricted during protection
513  void AliasProtect(const void *ptr, size_t bytes)
514  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
515  /// Aliases need to be prolongated for un-protection
516  void AliasUnprotect(const void *ptr, size_t bytes)
517  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
518  void *HtoD(void *dst, const void *src, size_t bytes)
519  { return std::memcpy(dst, src, bytes); }
520  void *DtoD(void *dst, const void *src, size_t bytes)
521  { return std::memcpy(dst, src, bytes); }
522  void *DtoH(void *dst, const void *src, size_t bytes)
523  { return std::memcpy(dst, src, bytes); }
524 };
525 
526 #ifdef MFEM_USE_UMPIRE
527 class UmpireMemorySpace
528 {
529 protected:
530  umpire::ResourceManager &rm;
531  umpire::Allocator allocator;
532  bool owns_allocator{false};
533 
534 public:
535  // TODO: this only releases unused memory
536  virtual ~UmpireMemorySpace() { if (owns_allocator) { allocator.release(); } }
537  UmpireMemorySpace(const char * name, const char * space)
538  : rm(umpire::ResourceManager::getInstance())
539  {
540  if (!rm.isAllocator(name))
541  {
542  allocator = rm.makeAllocator<umpire::strategy::QuickPool>(
543  name, rm.getAllocator(space));
544  owns_allocator = true;
545  }
546  else
547  {
548  allocator = rm.getAllocator(name);
549  owns_allocator = false;
550  }
551  }
552 };
553 
554 /// The Umpire host memory space
555 class UmpireHostMemorySpace : public HostMemorySpace, public UmpireMemorySpace
556 {
557 private:
558  umpire::strategy::AllocationStrategy *strat;
559 public:
560  UmpireHostMemorySpace(const char * name)
561  : HostMemorySpace(),
562  UmpireMemorySpace(name, "HOST"),
563  strat(allocator.getAllocationStrategy()) {}
564  void Alloc(void **ptr, size_t bytes) override
565  { *ptr = allocator.allocate(bytes); }
566  void Dealloc(void *ptr) override { allocator.deallocate(ptr); }
567  void Insert(void *ptr, size_t bytes)
568  { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
569 };
570 
571 /// The Umpire device memory space
572 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP)
573 class UmpireDeviceMemorySpace : public DeviceMemorySpace,
574  public UmpireMemorySpace
575 {
576 public:
577  UmpireDeviceMemorySpace(const char * name)
578  : DeviceMemorySpace(),
579  UmpireMemorySpace(name, "DEVICE") {}
580  void Alloc(Memory &base) override
581  { base.d_ptr = allocator.allocate(base.bytes); }
582  void Dealloc(Memory &base) override { rm.deallocate(base.d_ptr); }
583  void *HtoD(void *dst, const void *src, size_t bytes) override
584  {
585 #ifdef MFEM_USE_CUDA
586  return CuMemcpyHtoD(dst, src, bytes);
587 #endif
588 #ifdef MFEM_USE_HIP
589  return HipMemcpyHtoD(dst, src, bytes);
590 #endif
591  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
592  }
593  void *DtoD(void* dst, const void* src, size_t bytes) override
594  {
595 #ifdef MFEM_USE_CUDA
596  return CuMemcpyDtoD(dst, src, bytes);
597 #endif
598 #ifdef MFEM_USE_HIP
599  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
600  // instead we use hipMemcpyAsync to get similar behavior.
601  // for more info see: https://github.com/mfem/mfem/pull/2780
602  return HipMemcpyDtoDAsync(dst, src, bytes);
603 #endif
604  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
605  }
606  void *DtoH(void *dst, const void *src, size_t bytes) override
607  {
608 #ifdef MFEM_USE_CUDA
609  return CuMemcpyDtoH(dst, src, bytes);
610 #endif
611 #ifdef MFEM_USE_HIP
612  return HipMemcpyDtoH(dst, src, bytes);
613 #endif
614  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
615  }
616 };
617 #else
618 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace
619 {
620 public:
621  UmpireDeviceMemorySpace(const char * /*unused*/) {}
622 };
623 #endif // MFEM_USE_CUDA || MFEM_USE_HIP
624 #endif // MFEM_USE_UMPIRE
625 
626 /// Memory space controller class
627 class Ctrl
628 {
629  typedef MemoryType MT;
630 
631 public:
632  HostMemorySpace *host[HostMemoryTypeSize];
633  DeviceMemorySpace *device[DeviceMemoryTypeSize];
634 
635 public:
636  Ctrl(): host{nullptr}, device{nullptr} { }
637 
638  void Configure()
639  {
640  if (host[HostMemoryType])
641  {
642  mfem_error("Memory backends have already been configured!");
643  }
644 
645  // Filling the host memory backends
646  // HOST, HOST_32 & HOST_64 are always ready
647  // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
648  host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
649  host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
650  host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
651  // HOST_DEBUG is delayed, as it reroutes signals
652  host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
653  host[static_cast<int>(MT::HOST_UMPIRE)] = nullptr;
654  host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
655 
656  // Filling the device memory backends, shifting with the device size
657  constexpr int shift = DeviceMemoryType;
658  device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
659  // All other devices controllers are delayed
660  device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
661  device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
662  device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
663  device[static_cast<int>(MT::DEVICE_UMPIRE_2)-shift] = nullptr;
664  }
665 
666  HostMemorySpace* Host(const MemoryType mt)
667  {
668  const int mt_i = static_cast<int>(mt);
669  // Delayed host controllers initialization
670  if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
671  MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
672  return host[mt_i];
673  }
674 
675  DeviceMemorySpace* Device(const MemoryType mt)
676  {
677  const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
678  MFEM_ASSERT(mt_i >= 0,"");
679  // Lazy device controller initializations
680  if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
681  MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
682  return device[mt_i];
683  }
684 
685  ~Ctrl()
686  {
687  constexpr int mt_h = HostMemoryType;
688  constexpr int mt_d = DeviceMemoryType;
689  for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
690  for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
691  }
692 
693 private:
694  HostMemorySpace* NewHostCtrl(const MemoryType mt)
695  {
696  switch (mt)
697  {
698  case MT::HOST_DEBUG: return new MmuHostMemorySpace();
699 #ifdef MFEM_USE_UMPIRE
700  case MT::HOST_UMPIRE:
701  return new UmpireHostMemorySpace(
703 #else
704  case MT::HOST_UMPIRE: return new NoHostMemorySpace();
705 #endif
706  case MT::HOST_PINNED: return new HostPinnedMemorySpace();
707  default: MFEM_ABORT("Unknown host memory controller!");
708  }
709  return nullptr;
710  }
711 
712  DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
713  {
714  switch (mt)
715  {
716 #ifdef MFEM_USE_UMPIRE
717  case MT::DEVICE_UMPIRE:
718  return new UmpireDeviceMemorySpace(
720  case MT::DEVICE_UMPIRE_2:
721  return new UmpireDeviceMemorySpace(
723 #else
724  case MT::DEVICE_UMPIRE: return new NoDeviceMemorySpace();
725  case MT::DEVICE_UMPIRE_2: return new NoDeviceMemorySpace();
726 #endif
727  case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
728  case MT::DEVICE:
729  {
730 #if defined(MFEM_USE_CUDA)
731  return new CudaDeviceMemorySpace();
732 #elif defined(MFEM_USE_HIP)
733  return new HipDeviceMemorySpace();
734 #else
735  MFEM_ABORT("No device memory controller!");
736  break;
737 #endif
738  }
739  default: MFEM_ABORT("Unknown device memory controller!");
740  }
741  return nullptr;
742  }
743 };
744 
745 } // namespace mfem::internal
746 
747 static internal::Ctrl *ctrl;
748 
749 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
750  unsigned &flags)
751 {
752  MFEM_ASSERT(exists, "Internal error!");
753  if (IsHostMemory(mt))
754  {
755  MFEM_ASSERT(mt != MemoryType::HOST && h_tmp == nullptr,
756  "Internal error!");
757  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
758  // evaluated at the time when the device pointer is allocated, see
759  // GetDevicePtr() and GetAliasDevicePtr()
760  const MemoryType d_mt = MemoryType::DEFAULT;
761  // We rely on the next call using lazy dev alloc
762  return New_(h_tmp, bytes, mt, d_mt, Mem::VALID_HOST, flags);
763  }
764  else
765  {
766  const MemoryType h_mt = GetDualMemoryType(mt);
767  return New_(h_tmp, bytes, h_mt, mt, Mem::VALID_DEVICE, flags);
768  }
769 }
770 
771 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType h_mt,
772  MemoryType d_mt, unsigned valid_flags,
773  unsigned &flags)
774 {
775  MFEM_ASSERT(exists, "Internal error!");
776  MFEM_ASSERT(IsHostMemory(h_mt), "h_mt must be host type");
777  MFEM_ASSERT(IsDeviceMemory(d_mt) || d_mt == h_mt ||
778  d_mt == MemoryType::DEFAULT,
779  "d_mt must be device type, the same is h_mt, or DEFAULT");
780  MFEM_ASSERT((h_mt != MemoryType::HOST || h_tmp != nullptr) &&
781  (h_mt == MemoryType::HOST || h_tmp == nullptr),
782  "Internal error");
783  MFEM_ASSERT((valid_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) == 0,
784  "Internal error");
785  void *h_ptr;
786  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
787  else { h_ptr = h_tmp; }
789  Mem::OWNS_DEVICE | valid_flags;
790  // The other New_() method relies on this lazy allocation behavior.
791  mm.Insert(h_ptr, bytes, h_mt, d_mt); // lazy dev alloc
792  // mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); // non-lazy dev alloc
793 
794  // MFEM_VERIFY_TYPES(h_mt, mt); // done by mm.Insert() above
795  CheckHostMemoryType_(h_mt, h_ptr, false);
796 
797  return h_ptr;
798 }
799 
800 void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
801  MemoryType mt,
802  bool own, bool alias, unsigned &flags)
803 {
804  MFEM_CONTRACT_VAR(alias);
805  MFEM_ASSERT(exists, "Internal error!");
806  MFEM_VERIFY(!alias, "Cannot register an alias!");
807  const bool is_host_mem = IsHostMemory(mt);
808  const MemType h_mt = is_host_mem ? mt : GetDualMemoryType(mt);
809  const MemType d_mt = is_host_mem ? MemoryType::DEFAULT : mt;
810  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
811  // evaluated at the time when the device pointer is allocated, see
812  // GetDevicePtr() and GetAliasDevicePtr()
813 
814  MFEM_VERIFY_TYPES(h_mt, d_mt);
815 
816  if (ptr == nullptr && h_tmp == nullptr)
817  {
818  MFEM_VERIFY(bytes == 0, "internal error");
819  return nullptr;
820  }
821 
823  void *h_ptr;
824 
825  if (is_host_mem) // HOST TYPES + MANAGED
826  {
827  h_ptr = ptr;
828  mm.Insert(h_ptr, bytes, h_mt, d_mt);
829  flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
831  }
832  else // DEVICE TYPES
833  {
834  MFEM_VERIFY(ptr || bytes == 0,
835  "cannot register NULL device pointer with bytes = " << bytes);
836  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
837  else { h_ptr = h_tmp; }
838  mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
839  flags = own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE;
840  flags |= (Mem::OWNS_HOST | Mem::VALID_DEVICE);
841  }
842  CheckHostMemoryType_(h_mt, h_ptr, alias);
843  return h_ptr;
844 }
845 
846 void MemoryManager::Register2_(void *h_ptr, void *d_ptr, size_t bytes,
847  MemoryType h_mt, MemoryType d_mt,
848  bool own, bool alias, unsigned &flags)
849 {
850  MFEM_CONTRACT_VAR(alias);
851  MFEM_ASSERT(exists, "Internal error!");
852  MFEM_ASSERT(!alias, "Cannot register an alias!");
853  MFEM_VERIFY_TYPES(h_mt, d_mt);
854 
855  if (h_ptr == nullptr && d_ptr == nullptr)
856  {
857  MFEM_VERIFY(bytes == 0, "internal error");
858  return;
859  }
860 
862 
863  MFEM_VERIFY(d_ptr || bytes == 0,
864  "cannot register NULL device pointer with bytes = " << bytes);
865  mm.InsertDevice(d_ptr, h_ptr, bytes, h_mt, d_mt);
866  flags = (own ? flags | (Mem::OWNS_HOST | Mem::OWNS_DEVICE) :
867  flags & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE)) |
869 
870  CheckHostMemoryType_(h_mt, h_ptr, alias);
871 }
872 
873 void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
874  unsigned base_flags, unsigned &flags)
875 {
876  mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
877  base_flags & Mem::ALIAS);
878  flags = (base_flags | Mem::ALIAS | Mem::OWNS_INTERNAL) &
880 }
881 
882 void MemoryManager::SetDeviceMemoryType_(void *h_ptr, unsigned flags,
883  MemoryType d_mt)
884 {
885  MFEM_VERIFY(h_ptr, "cannot set the device memory type: Memory is empty!");
886  if (!(flags & Mem::ALIAS))
887  {
888  auto mem_iter = maps->memories.find(h_ptr);
889  MFEM_VERIFY(mem_iter != maps->memories.end(), "internal error");
890  internal::Memory &mem = mem_iter->second;
891  if (mem.d_mt == d_mt) { return; }
892  MFEM_VERIFY(mem.d_ptr == nullptr, "cannot set the device memory type:"
893  " device memory is allocated!");
894  mem.d_mt = d_mt;
895  }
896  else
897  {
898  auto alias_iter = maps->aliases.find(h_ptr);
899  MFEM_VERIFY(alias_iter != maps->aliases.end(), "internal error");
900  internal::Alias &alias = alias_iter->second;
901  internal::Memory &base_mem = *alias.mem;
902  if (base_mem.d_mt == d_mt) { return; }
903  MFEM_VERIFY(base_mem.d_ptr == nullptr,
904  "cannot set the device memory type:"
905  " alias' base device memory is allocated!");
906  base_mem.d_mt = d_mt;
907  }
908 }
909 
910 MemoryType MemoryManager::Delete_(void *h_ptr, MemoryType h_mt, unsigned flags)
911 {
912  const bool alias = flags & Mem::ALIAS;
913  const bool registered = flags & Mem::REGISTERED;
914  const bool owns_host = flags & Mem::OWNS_HOST;
915  const bool owns_device = flags & Mem::OWNS_DEVICE;
916  const bool owns_internal = flags & Mem::OWNS_INTERNAL;
917  MFEM_ASSERT(IsHostMemory(h_mt), "invalid h_mt = " << (int)h_mt);
918  // MFEM_ASSERT(registered || IsHostMemory(h_mt),"");
919  MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
920  // If at least one of the 'own_*' flags is true then 'registered' must be
921  // true too. An acceptable exception is the special case when 'h_ptr' is
922  // NULL, and both 'own_device' and 'own_internal' are false -- this case is
923  // an exception only when 'own_host' is true and 'registered' is false.
924  MFEM_ASSERT(registered || !(owns_host || owns_device || owns_internal) ||
925  (!(owns_device || owns_internal) && h_ptr == nullptr),
926  "invalid Memory state");
927  if (!mm.exists || !registered) { return h_mt; }
928  if (alias)
929  {
930  if (owns_internal)
931  {
932  MFEM_ASSERT(mm.IsAlias(h_ptr), "");
933  MFEM_ASSERT(h_mt == maps->aliases.at(h_ptr).h_mt, "");
934  mm.EraseAlias(h_ptr);
935  }
936  }
937  else // Known
938  {
939  if (owns_host && (h_mt != MemoryType::HOST))
940  { ctrl->Host(h_mt)->Dealloc(h_ptr); }
941  if (owns_internal)
942  {
943  MFEM_ASSERT(mm.IsKnown(h_ptr), "");
944  MFEM_ASSERT(h_mt == maps->memories.at(h_ptr).h_mt, "");
945  mm.Erase(h_ptr, owns_device);
946  }
947  }
948  return h_mt;
949 }
950 
951 void MemoryManager::DeleteDevice_(void *h_ptr, unsigned & flags)
952 {
953  const bool owns_device = flags & Mem::OWNS_DEVICE;
954  if (owns_device)
955  {
956  mm.EraseDevice(h_ptr);
957  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
958  }
959 }
960 
961 bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
962  MemoryType h_mt, size_t bytes,
963  unsigned flags)
964 {
965  if (!h_ptr)
966  {
967  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
968  return true;
969  }
970  MemoryType d_mt;
971  if (!(flags & Mem::ALIAS))
972  {
973  auto iter = maps->memories.find(h_ptr);
974  MFEM_VERIFY(iter != maps->memories.end(), "internal error");
975  d_mt = iter->second.d_mt;
976  }
977  else
978  {
979  auto iter = maps->aliases.find(h_ptr);
980  MFEM_VERIFY(iter != maps->aliases.end(), "internal error");
981  d_mt = iter->second.mem->d_mt;
982  }
983  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
984  switch (mc)
985  {
987  {
988  MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
989  h_mt == MemoryType::HOST_64,"");
990  return true;
991  }
993  {
994  MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
995  return true;
996  }
997  case MemoryClass::DEVICE:
998  {
999  MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
1000  d_mt == MemoryType::DEVICE_DEBUG ||
1001  d_mt == MemoryType::DEVICE_UMPIRE ||
1002  d_mt == MemoryType::DEVICE_UMPIRE_2 ||
1003  d_mt == MemoryType::MANAGED,"");
1004  return true;
1005  }
1006  case MemoryClass::MANAGED:
1007  {
1008  MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
1009  d_mt == MemoryType::MANAGED),"");
1010  return true;
1011  }
1012  default: break;
1013  }
1014  return true;
1015 }
1016 
1017 void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1018  size_t bytes, unsigned &flags)
1019 {
1020  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1021  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
1022  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1024  {
1025  const bool copy = !(flags & Mem::VALID_HOST);
1026  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1027  if (flags & Mem::ALIAS)
1028  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1029  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1030  }
1031  else
1032  {
1033  const bool copy = !(flags & Mem::VALID_DEVICE);
1034  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1035  if (flags & Mem::ALIAS)
1036  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1037  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1038  }
1039 }
1040 
1041 const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1042  size_t bytes, unsigned &flags)
1043 {
1044  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1045  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
1046  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1048  {
1049  const bool copy = !(flags & Mem::VALID_HOST);
1050  flags |= Mem::VALID_HOST;
1051  if (flags & Mem::ALIAS)
1052  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1053  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1054  }
1055  else
1056  {
1057  const bool copy = !(flags & Mem::VALID_DEVICE);
1058  flags |= Mem::VALID_DEVICE;
1059  if (flags & Mem::ALIAS)
1060  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1061  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1062  }
1063 }
1064 
1065 void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1066  size_t bytes, unsigned &flags)
1067 {
1068  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1069  if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,""); }
1070  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1072  {
1073  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1074  if (flags & Mem::ALIAS)
1075  { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
1076  else { return mm.GetHostPtr(h_ptr, bytes, false); }
1077  }
1078  else
1079  {
1080  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1081  if (flags & Mem::ALIAS)
1082  { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
1083  else { return mm.GetDevicePtr(h_ptr, bytes, false); }
1084  }
1085 }
1086 
1087 void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
1088  size_t alias_bytes, unsigned base_flags,
1089  unsigned &alias_flags)
1090 {
1091  // This is called only when (base_flags & Mem::REGISTERED) is true.
1092  // Note that (alias_flags & REGISTERED) may not be true.
1093  MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
1094  if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
1095  {
1096  mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
1097  }
1098  if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
1099  {
1100  if (!(alias_flags & Mem::REGISTERED))
1101  {
1102  mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
1103  alias_flags = (alias_flags | Mem::REGISTERED | Mem::OWNS_INTERNAL) &
1104  ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
1105  }
1106  mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
1107  }
1108  alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
1109  (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
1110 }
1111 
1112 MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr, bool alias)
1113 {
1114  if (mm.exists)
1115  {
1116  if (!alias)
1117  {
1118  auto iter = maps->memories.find(h_ptr);
1119  MFEM_ASSERT(iter != maps->memories.end(), "internal error");
1120  return iter->second.d_mt;
1121  }
1122  // alias == true
1123  auto iter = maps->aliases.find(h_ptr);
1124  MFEM_ASSERT(iter != maps->aliases.end(), "internal error");
1125  return iter->second.mem->d_mt;
1126  }
1127  MFEM_ABORT("internal error");
1128  return MemoryManager::host_mem_type;
1129 }
1130 
1131 MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
1132 {
1133  if (!mm.exists) { return MemoryManager::host_mem_type; }
1134  if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
1135  if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).h_mt; }
1136  return MemoryManager::host_mem_type;
1137 }
1138 
1139 void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
1140  size_t bytes, unsigned src_flags,
1141  unsigned &dst_flags)
1142 {
1143  // Type of copy to use based on the src and dest validity flags:
1144  // | src
1145  // | h | d | hd
1146  // -----------+-----+-----+------
1147  // h | h2h d2h h2h
1148  // dest d | h2d d2d d2d
1149  // hd | h2h d2d d2d
1150 
1151  const bool dst_on_host =
1152  (dst_flags & Mem::VALID_HOST) &&
1153  (!(dst_flags & Mem::VALID_DEVICE) ||
1154  ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
1155 
1156  dst_flags = dst_flags &
1157  ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1158 
1159  const bool src_on_host =
1160  (src_flags & Mem::VALID_HOST) &&
1161  (!(src_flags & Mem::VALID_DEVICE) ||
1162  ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
1163 
1164  const void *src_d_ptr =
1165  src_on_host ? NULL :
1166  ((src_flags & Mem::ALIAS) ?
1167  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1168  mm.GetDevicePtr(src_h_ptr, bytes, false));
1169 
1170  if (dst_on_host)
1171  {
1172  if (src_on_host)
1173  {
1174  if (dst_h_ptr != src_h_ptr && bytes != 0)
1175  {
1176  MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
1177  (const char*)src_h_ptr + bytes <= dst_h_ptr,
1178  "data overlaps!");
1179  std::memcpy(dst_h_ptr, src_h_ptr, bytes);
1180  }
1181  }
1182  else
1183  {
1184  if (dst_h_ptr != src_d_ptr && bytes != 0)
1185  {
1186  internal::Memory &src_d_base = maps->memories.at(src_h_ptr);
1187  MemoryType src_d_mt = src_d_base.d_mt;
1188  ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
1189  }
1190  }
1191  }
1192  else
1193  {
1194  void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
1195  mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
1196  mm.GetDevicePtr(dst_h_ptr, bytes, false);
1197  if (src_on_host)
1198  {
1199  const bool known = mm.IsKnown(dst_h_ptr);
1200  const bool alias = dst_flags & Mem::ALIAS;
1201  MFEM_VERIFY(alias||known,"");
1202  const MemoryType d_mt = known ?
1203  maps->memories.at(dst_h_ptr).d_mt :
1204  maps->aliases.at(dst_h_ptr).mem->d_mt;
1205  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1206  }
1207  else
1208  {
1209  if (dest_d_ptr != src_d_ptr && bytes != 0)
1210  {
1211  const bool known = mm.IsKnown(dst_h_ptr);
1212  const bool alias = dst_flags & Mem::ALIAS;
1213  MFEM_VERIFY(alias||known,"");
1214  const MemoryType d_mt = known ?
1215  maps->memories.at(dst_h_ptr).d_mt :
1216  maps->aliases.at(dst_h_ptr).mem->d_mt;
1217  ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1218  }
1219  }
1220  }
1221 }
1222 
1223 void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1224  size_t bytes, unsigned src_flags)
1225 {
1226  const bool src_on_host = src_flags & Mem::VALID_HOST;
1227  if (src_on_host)
1228  {
1229  if (dest_h_ptr != src_h_ptr && bytes != 0)
1230  {
1231  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1232  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1233  "data overlaps!");
1234  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1235  }
1236  }
1237  else
1238  {
1239  MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1240  const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1241  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1242  mm.GetDevicePtr(src_h_ptr, bytes, false);
1243  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1244  const MemoryType d_mt = base.d_mt;
1245  ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1246  }
1247 }
1248 
1249 void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1250  size_t bytes, unsigned &dest_flags)
1251 {
1252  const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1253  if (dest_on_host)
1254  {
1255  if (dest_h_ptr != src_h_ptr && bytes != 0)
1256  {
1257  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1258  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1259  "data overlaps!");
1260  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1261  }
1262  }
1263  else
1264  {
1265  void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1266  mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1267  mm.GetDevicePtr(dest_h_ptr, bytes, false);
1268  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1269  const MemoryType d_mt = base.d_mt;
1270  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1271  }
1272  dest_flags = dest_flags &
1273  ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1274 }
1275 
1276 bool MemoryManager::IsKnown_(const void *h_ptr)
1277 {
1278  return maps->memories.find(h_ptr) != maps->memories.end();
1279 }
1280 
1281 bool MemoryManager::IsAlias_(const void *h_ptr)
1282 {
1283  return maps->aliases.find(h_ptr) != maps->aliases.end();
1284 }
1285 
1286 void MemoryManager::Insert(void *h_ptr, size_t bytes,
1287  MemoryType h_mt, MemoryType d_mt)
1288 {
1289 #ifdef MFEM_TRACK_MEM_MANAGER
1290  mfem::out << "[mfem memory manager]: registering h_ptr: " << h_ptr
1291  << ", bytes: " << bytes << std::endl;
1292 #endif
1293  if (h_ptr == NULL)
1294  {
1295  MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1296  return;
1297  }
1298  MFEM_VERIFY_TYPES(h_mt, d_mt);
1299 #ifdef MFEM_DEBUG
1300  auto res =
1301 #endif
1302  maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1303 #ifdef MFEM_DEBUG
1304  if (res.second == false)
1305  {
1306  auto &m = res.first->second;
1307  MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt &&
1308  (m.d_mt == d_mt || (d_mt == MemoryType::DEFAULT &&
1309  m.d_mt == GetDualMemoryType(h_mt))),
1310  "Address already present with different attributes!");
1311 #ifdef MFEM_TRACK_MEM_MANAGER
1312  mfem::out << "[mfem memory manager]: repeated registration of h_ptr: "
1313  << h_ptr << std::endl;
1314 #endif
1315  }
1316 #endif
1317 }
1318 
1319 void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1320  MemoryType h_mt, MemoryType d_mt)
1321 {
1322  // MFEM_VERIFY_TYPES(h_mt, d_mt); // done by Insert() below
1323  MFEM_ASSERT(h_ptr != NULL, "internal error");
1324  Insert(h_ptr, bytes, h_mt, d_mt);
1325  internal::Memory &mem = maps->memories.at(h_ptr);
1326  if (d_ptr == NULL && bytes != 0) { ctrl->Device(d_mt)->Alloc(mem); }
1327  else { mem.d_ptr = d_ptr; }
1328 }
1329 
1330 void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1331  const size_t bytes, const bool base_is_alias)
1332 {
1333  size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1334  static_cast<const char*>(base_ptr));
1335 #ifdef MFEM_TRACK_MEM_MANAGER
1336  mfem::out << "[mfem memory manager]: registering alias of base_ptr: "
1337  << base_ptr << ", offset: " << offset << ", bytes: " << bytes
1338  << ", base is alias: " << base_is_alias << std::endl;
1339 #endif
1340  if (!base_ptr)
1341  {
1342  MFEM_VERIFY(offset == 0,
1343  "Trying to add alias to NULL at offset " << offset);
1344  return;
1345  }
1346  if (base_is_alias)
1347  {
1348  const internal::Alias &alias = maps->aliases.at(base_ptr);
1349  MFEM_ASSERT(alias.mem,"");
1350  base_ptr = alias.mem->h_ptr;
1351  offset += alias.offset;
1352 #ifdef MFEM_TRACK_MEM_MANAGER
1353  mfem::out << "[mfem memory manager]: real base_ptr: " << base_ptr
1354  << std::endl;
1355 #endif
1356  }
1357  internal::Memory &mem = maps->memories.at(base_ptr);
1358  MFEM_VERIFY(offset + bytes <= mem.bytes, "invalid alias");
1359  auto res =
1360  maps->aliases.emplace(alias_ptr,
1361  internal::Alias{&mem, offset, 1, mem.h_mt});
1362  if (res.second == false) // alias_ptr was already in the map
1363  {
1364  internal::Alias &alias = res.first->second;
1365  // Update the alias data in case the existing alias is dangling
1366  alias.mem = &mem;
1367  alias.offset = offset;
1368  alias.h_mt = mem.h_mt;
1369  alias.counter++;
1370  }
1371 }
1372 
1373 void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1374 {
1375 #ifdef MFEM_TRACK_MEM_MANAGER
1376  mfem::out << "[mfem memory manager]: un-registering h_ptr: " << h_ptr
1377  << std::endl;
1378 #endif
1379  if (!h_ptr) { return; }
1380  auto mem_map_iter = maps->memories.find(h_ptr);
1381  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1382  internal::Memory &mem = mem_map_iter->second;
1383  if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1384  maps->memories.erase(mem_map_iter);
1385 }
1386 
1387 void MemoryManager::EraseDevice(void *h_ptr)
1388 {
1389  if (!h_ptr) { return; }
1390  auto mem_map_iter = maps->memories.find(h_ptr);
1391  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1392  internal::Memory &mem = mem_map_iter->second;
1393  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1394  mem.d_ptr = nullptr;
1395 }
1396 
1397 void MemoryManager::EraseAlias(void *alias_ptr)
1398 {
1399 #ifdef MFEM_TRACK_MEM_MANAGER
1400  mfem::out << "[mfem memory manager]: un-registering alias_ptr: " << alias_ptr
1401  << std::endl;
1402 #endif
1403  if (!alias_ptr) { return; }
1404  auto alias_map_iter = maps->aliases.find(alias_ptr);
1405  if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1406  internal::Alias &alias = alias_map_iter->second;
1407  if (--alias.counter) { return; }
1408  maps->aliases.erase(alias_map_iter);
1409 }
1410 
1411 void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1412  bool copy_data)
1413 {
1414  if (!h_ptr)
1415  {
1416  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1417  return NULL;
1418  }
1419  internal::Memory &mem = maps->memories.at(h_ptr);
1420  const MemoryType &h_mt = mem.h_mt;
1421  MemoryType &d_mt = mem.d_mt;
1422  MFEM_VERIFY_TYPES(h_mt, d_mt);
1423  if (!mem.d_ptr)
1424  {
1425  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1426  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1427  }
1428  // Aliases might have done some protections
1429  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1430  if (copy_data)
1431  {
1432  MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1433  if (bytes) { ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes); }
1434  }
1435  ctrl->Host(h_mt)->Protect(mem, bytes);
1436  return mem.d_ptr;
1437 }
1438 
1439 void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1440  bool copy)
1441 {
1442  if (!alias_ptr)
1443  {
1444  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1445  return NULL;
1446  }
1447  auto &alias_map = maps->aliases;
1448  auto alias_map_iter = alias_map.find(alias_ptr);
1449  if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1450  const internal::Alias &alias = alias_map_iter->second;
1451  const size_t offset = alias.offset;
1452  internal::Memory &mem = *alias.mem;
1453  const MemoryType &h_mt = mem.h_mt;
1454  MemoryType &d_mt = mem.d_mt;
1455  MFEM_VERIFY_TYPES(h_mt, d_mt);
1456  if (!mem.d_ptr)
1457  {
1458  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1459  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1460  }
1461  void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1462  void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1463  MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1464  MFEM_ASSERT(offset + bytes <= mem.bytes, "internal error");
1465  mem.d_rw = mem.h_rw = false;
1466  if (mem.d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1467  ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1468  if (copy && mem.d_ptr)
1469  { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1470  ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1471  return alias_d_ptr;
1472 }
1473 
1474 void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1475 {
1476  const internal::Memory &mem = maps->memories.at(ptr);
1477  MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1478  MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1479  const MemoryType &h_mt = mem.h_mt;
1480  const MemoryType &d_mt = mem.d_mt;
1481  MFEM_VERIFY_TYPES(h_mt, d_mt);
1482  // Aliases might have done some protections
1483  ctrl->Host(h_mt)->Unprotect(mem, bytes);
1484  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1485  if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1486  if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1487  return mem.h_ptr;
1488 }
1489 
1490 void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1491  bool copy_data)
1492 {
1493  const internal::Alias &alias = maps->aliases.at(ptr);
1494  const internal::Memory *const mem = alias.mem;
1495  const MemoryType &h_mt = mem->h_mt;
1496  const MemoryType &d_mt = mem->d_mt;
1497  MFEM_VERIFY_TYPES(h_mt, d_mt);
1498  void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1499  void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1500  MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1501  mem->h_rw = false;
1502  ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1503  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1504  if (copy_data && mem->d_ptr)
1505  { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1506  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1507  return alias_h_ptr;
1508 }
1509 
1511 {
1512  if (exists) { return; }
1513  maps = new internal::Maps();
1514  ctrl = new internal::Ctrl();
1515  ctrl->Configure();
1516  exists = true;
1517 }
1518 
1520 
1521 MemoryManager::~MemoryManager() { if (exists) { Destroy(); } }
1522 
1524 {
1525  MFEM_VERIFY(!configured, "changing the dual MemoryTypes is not allowed after"
1526  " MemoryManager configuration!");
1527  UpdateDualMemoryType(mt, dual_mt);
1528 }
1529 
1530 void MemoryManager::UpdateDualMemoryType(MemoryType mt, MemoryType dual_mt)
1531 {
1532  MFEM_VERIFY((int)mt < MemoryTypeSize,
1533  "invalid MemoryType, mt = " << (int)mt);
1534  MFEM_VERIFY((int)dual_mt < MemoryTypeSize,
1535  "invalid dual MemoryType, dual_mt = " << (int)dual_mt);
1536 
1537  if ((IsHostMemory(mt) && IsDeviceMemory(dual_mt)) ||
1538  (IsDeviceMemory(mt) && IsHostMemory(dual_mt)))
1539  {
1540  dual_map[(int)mt] = dual_mt;
1541  }
1542  else
1543  {
1544  // mt + dual_mt is not a pair of host + device types: this is only allowed
1545  // when mt == dual_mt and mt is a host type; in this case we do not
1546  // actually update the dual
1547  MFEM_VERIFY(mt == dual_mt && IsHostMemory(mt),
1548  "invalid (mt, dual_mt) pair: ("
1549  << MemoryTypeName[(int)mt] << ", "
1550  << MemoryTypeName[(int)dual_mt] << ')');
1551  }
1552 }
1553 
1555  const MemoryType device_mt)
1556 {
1557  MemoryManager::UpdateDualMemoryType(host_mt, device_mt);
1558  MemoryManager::UpdateDualMemoryType(device_mt, host_mt);
1559  if (device_mt == MemoryType::DEVICE_DEBUG)
1560  {
1561  for (int mt = (int)MemoryType::HOST; mt < (int)MemoryType::MANAGED; mt++)
1562  {
1563  MemoryManager::UpdateDualMemoryType(
1565  }
1566  }
1567  Init();
1568  host_mem_type = host_mt;
1569  device_mem_type = device_mt;
1570  configured = true;
1571 }
1572 
1574 {
1575  MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1576 #ifdef MFEM_TRACK_MEM_MANAGER
1577  size_t num_memories = maps->memories.size();
1578  size_t num_aliases = maps->aliases.size();
1579  if (num_memories != 0 || num_aliases != 0)
1580  {
1581  MFEM_WARNING("...\n\t number of registered pointers: " << num_memories
1582  << "\n\t number of registered aliases : " << num_aliases);
1583  }
1584 #endif
1585  // Keep for debugging purposes:
1586 #if 0
1587  mfem::out << "Destroying the MemoryManager ...\n"
1588  << "remaining registered pointers : "
1589  << maps->memories.size() << '\n'
1590  << "remaining registered aliases : "
1591  << maps->aliases.size() << '\n';
1592 #endif
1593  for (auto& n : maps->memories)
1594  {
1595  internal::Memory &mem = n.second;
1596  bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1597  if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1598  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1599  }
1600  delete maps; maps = nullptr;
1601  delete ctrl; ctrl = nullptr;
1602  host_mem_type = MemoryType::HOST;
1603  device_mem_type = MemoryType::HOST;
1604  exists = false;
1605  configured = false;
1606 }
1607 
1609 {
1610  if (ptr != NULL)
1611  {
1612  if (!IsKnown(ptr))
1613  {
1614  mfem_error("Pointer is not registered!");
1615  }
1616  }
1617 }
1618 
1619 int MemoryManager::PrintPtrs(std::ostream &os)
1620 {
1621  int n_out = 0;
1622  for (const auto& n : maps->memories)
1623  {
1624  const internal::Memory &mem = n.second;
1625  os << "\nkey " << n.first << ", "
1626  << "h_ptr " << mem.h_ptr << ", "
1627  << "d_ptr " << mem.d_ptr;
1628  n_out++;
1629  }
1630  if (maps->memories.size() > 0) { os << std::endl; }
1631  return n_out;
1632 }
1633 
1634 int MemoryManager::PrintAliases(std::ostream &os)
1635 {
1636  int n_out = 0;
1637  for (const auto& n : maps->aliases)
1638  {
1639  const internal::Alias &alias = n.second;
1640  os << "\nalias: key " << n.first << ", "
1641  << "h_ptr " << alias.mem->h_ptr << ", "
1642  << "offset " << alias.offset << ", "
1643  << "counter " << alias.counter;
1644  n_out++;
1645  }
1646  if (maps->aliases.size() > 0) { os << std::endl; }
1647  return n_out;
1648 }
1649 
1650 int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1651  unsigned flags)
1652 {
1653  void *d_ptr = (flags & Mem::ALIAS) ?
1654  mm.GetAliasDevicePtr(h_ptr, size, false) :
1655  mm.GetDevicePtr(h_ptr, size, false);
1656  char *h_buf = new char[size];
1657 #ifdef MFEM_USE_CUDA
1658  CuMemcpyDtoH(h_buf, d_ptr, size);
1659 #elif MFE_USE_HIP
1660  HipMemcpyDtoH(h_buf, d_ptr, size);
1661 #else
1662  std::memcpy(h_buf, d_ptr, size);
1663 #endif
1664  int res = std::memcmp(h_ptr, h_buf, size);
1665  delete [] h_buf;
1666  return res;
1667 }
1668 
1669 
1670 void MemoryPrintFlags(unsigned flags)
1671 {
1672  typedef Memory<int> Mem;
1673  mfem::out
1674  << "\n registered = " << bool(flags & Mem::REGISTERED)
1675  << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1676  << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1677  << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1678  << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1679  << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1680  << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1681  << "\n alias = " << bool(flags & Mem::ALIAS)
1682  << std::endl;
1683 }
1684 
1685 void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr,
1686  bool alias)
1687 {
1688  if (!mm.exists) {return;}
1689  if (!alias)
1690  {
1691  auto it = maps->memories.find(h_ptr);
1692  MFEM_VERIFY(it != maps->memories.end(),
1693  "host pointer is not registered: h_ptr = " << h_ptr);
1694  MFEM_VERIFY(h_mt == it->second.h_mt, "host pointer MemoryType mismatch");
1695  }
1696  else
1697  {
1698  auto it = maps->aliases.find(h_ptr);
1699  MFEM_VERIFY(it != maps->aliases.end(),
1700  "alias pointer is not registered: h_ptr = " << h_ptr);
1701  MFEM_VERIFY(h_mt == it->second.h_mt, "alias pointer MemoryType mismatch");
1702  }
1703 }
1704 
1706 
1707 bool MemoryManager::exists = false;
1708 bool MemoryManager::configured = false;
1709 
1710 MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1711 MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1712 
1713 MemoryType MemoryManager::dual_map[MemoryTypeSize] =
1714 {
1715  /* HOST */ MemoryType::DEVICE,
1716  /* HOST_32 */ MemoryType::DEVICE,
1717  /* HOST_64 */ MemoryType::DEVICE,
1718  /* HOST_DEBUG */ MemoryType::DEVICE_DEBUG,
1719  /* HOST_UMPIRE */ MemoryType::DEVICE_UMPIRE,
1720  /* HOST_PINNED */ MemoryType::DEVICE,
1721  /* MANAGED */ MemoryType::MANAGED,
1722  /* DEVICE */ MemoryType::HOST,
1723  /* DEVICE_DEBUG */ MemoryType::HOST_DEBUG,
1724  /* DEVICE_UMPIRE */ MemoryType::HOST_UMPIRE,
1725  /* DEVICE_UMPIRE_2 */ MemoryType::HOST_UMPIRE
1726 };
1727 
1728 #ifdef MFEM_USE_UMPIRE
1729 const char * MemoryManager::h_umpire_name = "MFEM_HOST";
1730 const char * MemoryManager::d_umpire_name = "MFEM_DEVICE";
1731 const char * MemoryManager::d_umpire_2_name = "MFEM_DEVICE_2";
1732 #endif
1733 
1734 
1736 {
1737  "host-std", "host-32", "host-64", "host-debug", "host-umpire", "host-pinned",
1738 #if defined(MFEM_USE_CUDA)
1739  "cuda-uvm",
1740  "cuda",
1741 #elif defined(MFEM_USE_HIP)
1742  "hip-uvm",
1743  "hip",
1744 #else
1745  "managed",
1746  "device",
1747 #endif
1748  "device-debug",
1749 #if defined(MFEM_USE_CUDA)
1750  "cuda-umpire",
1751  "cuda-umpire-2",
1752 #elif defined(MFEM_USE_HIP)
1753  "hip-umpire",
1754  "hip-umpire-2",
1755 #else
1756  "device-umpire",
1757  "device-umpire-2",
1758 #endif
1759 };
1760 
1761 } // namespace mfem
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition: cuda.cpp:109
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition: cuda.cpp:79
Host memory; aligned at 64 bytes.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
Definition: mem_manager.hpp:85
Device memory; using CUDA or HIP *Malloc and *Free.
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: cuda.cpp:94
void PrintFlags() const
Print the internal flags.
static const char * GetUmpireHostAllocatorName()
Get the host Umpire allocator name used with MemoryType::HOST_UMPIRE.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
static MemoryType GetHostMemoryType()
Host pointer is valid.
Host memory; allocated from a &quot;host-debug&quot; pool.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
Host memory: pinned (page-locked)
int PrintAliases(std::ostream &out=mfem::out)
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
bool MemoryClassContainsType(MemoryClass mc, MemoryType mt)
Return true iff the MemoryType mt is contained in the MemoryClass mc.
Definition: mem_manager.cpp:70
void * HipMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: hip.cpp:147
void * HipMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: hip.cpp:94
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition: cuda.cpp:49
Host memory; aligned at 32 bytes.
static MemoryType GetDualMemoryType(MemoryType mt)
Return the dual MemoryType of the given one, mt.
constexpr int DeviceMemoryType
Definition: mem_manager.hpp:63
static const char * GetUmpireDeviceAllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE.
void * HipMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: hip.cpp:64
constexpr int HostMemoryType
Definition: mem_manager.hpp:61
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:154
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
void * HipMemFree(void *dptr)
Frees device memory.
Definition: hip.cpp:79
Ownership flag for internal Memory data.
Device pointer is valid
void Destroy()
Free all the device memories.
int PrintPtrs(std::ostream &out=mfem::out)
The host pointer will be deleted by Delete()
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:132
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
constexpr int MemoryTypeSize
Static casts to &#39;int&#39; and sizes of some useful memory types.
Definition: mem_manager.hpp:60
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: hip.cpp:155
static const char * GetUmpireDevice2AllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE_2.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: hip.cpp:34
static void SetDualMemoryType(MemoryType mt, MemoryType dual_mt)
Set the dual memory type of mt to be dual_mt.
void Init()
Initialize the memory manager.
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
string space
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
constexpr int HostMemoryTypeSize
Definition: mem_manager.hpp:62
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
Definition: mem_manager.hpp:88
Pointer is an alias.
MemoryManager mm
The (single) global memory manager object.
double a
Definition: lissajous.cpp:41
Host memory; using new[] and delete[].
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: hip.cpp:109
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: cuda.cpp:64
constexpr int DeviceMemoryTypeSize
Definition: mem_manager.hpp:64
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition: cuda.cpp:34
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:73
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:155
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger. See also Memory&lt;T&gt;::PrintFlags().