MFEM  v4.5.2
Finite element discretization library
mem_manager.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "forall.hpp"
13 #include "mem_manager.hpp"
14 
15 #include <list>
16 #include <cstring> // std::memcpy, std::memcmp
17 #include <unordered_map>
18 #include <algorithm> // std::max
19 
20 // Uncomment to try _WIN32 platform
21 //#define _WIN32
22 //#define _aligned_malloc(s,a) malloc(s)
23 
24 #ifndef _WIN32
25 #include <unistd.h>
26 #include <signal.h>
27 #include <sys/mman.h>
28 #define mfem_memalign(p,a,s) posix_memalign(p,a,s)
29 #define mfem_aligned_free free
30 #else
31 #define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
32 #define mfem_aligned_free _aligned_free
33 #endif
34 
35 #ifdef MFEM_USE_UMPIRE
36 #include <umpire/Umpire.hpp>
37 #include <umpire/strategy/QuickPool.hpp>
38 
39 // Make sure Umpire is build with CUDA support if MFEM is built with it.
40 #if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
41 #error "CUDA is not enabled in Umpire!"
42 #endif
43 // Make sure Umpire is build with HIP support if MFEM is built with it.
44 #if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
45 #error "HIP is not enabled in Umpire!"
46 #endif
47 #endif // MFEM_USE_UMPIRE
48 
49 // Internal debug option, useful for tracking some memory manager operations.
50 // #define MFEM_TRACK_MEM_MANAGER
51 
52 namespace mfem
53 {
54 
56 {
57  switch (mc)
58  {
59  case MemoryClass::HOST: return mm.GetHostMemoryType();
64  }
65  MFEM_VERIFY(false,"");
66  return MemoryType::HOST;
67 }
68 
69 
71 {
72  switch (mc)
73  {
74  case MemoryClass::HOST: return IsHostMemory(mt);
76  return (mt == MemoryType::HOST_32 ||
77  mt == MemoryType::HOST_64 ||
80  return (mt == MemoryType::HOST_64 ||
82  case MemoryClass::DEVICE: return IsDeviceMemory(mt);
84  return (mt == MemoryType::MANAGED);
85  }
86  MFEM_ABORT("invalid MemoryClass");
87  return false;
88 }
89 
90 
91 static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
92 {
93  MFEM_VERIFY(IsHostMemory(h_mt), "h_mt = " << (int)h_mt);
94  MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == MemoryType::DEFAULT,
95  "d_mt = " << (int)d_mt);
96  // If h_mt == MemoryType::HOST_DEBUG, then d_mt == MemoryType::DEVICE_DEBUG
97  // or d_mt == MemoryType::DEFAULT
98  MFEM_VERIFY(h_mt != MemoryType::HOST_DEBUG ||
99  d_mt == MemoryType::DEVICE_DEBUG ||
100  d_mt == MemoryType::DEFAULT,
101  "d_mt = " << MemoryTypeName[(int)d_mt]);
102  // If d_mt == MemoryType::DEVICE_DEBUG, then h_mt != MemoryType::MANAGED
103  MFEM_VERIFY(d_mt != MemoryType::DEVICE_DEBUG ||
104  h_mt != MemoryType::MANAGED,
105  "h_mt = " << MemoryTypeName[(int)h_mt]);
106 #if 0
107  const bool sync =
108  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE) ||
109  (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE_UMPIRE) ||
111  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE) ||
112  (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE_UMPIRE) ||
114  (h_mt == MemoryType::HOST_DEBUG && d_mt == MemoryType::DEVICE_DEBUG) ||
115  (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
116  (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
117  (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
118  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE) ||
119  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE) ||
120  (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE_2);
121  MFEM_VERIFY(sync, "");
122 #endif
123 }
124 
126 {
127  // | HOST HOST_32 HOST_64 DEVICE MANAGED
128  // ---------+---------------------------------------------
129  // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
130  // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
131  // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
132  // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
133  // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
134 
135  // Using the enumeration ordering:
136  // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
137  // the above table is simply: a*b = max(a,b).
138 
139  return std::max(mc1, mc2);
140 }
141 
142 
143 // Instantiate Memory<T>::PrintFlags for T = int and T = double.
144 template void Memory<int>::PrintFlags() const;
145 template void Memory<double>::PrintFlags() const;
146 
147 // Instantiate Memory<T>::CompareHostAndDevice for T = int and T = double.
148 template int Memory<int>::CompareHostAndDevice(int size) const;
149 template int Memory<double>::CompareHostAndDevice(int size) const;
150 
151 
152 namespace internal
153 {
154 
155 /// Memory class that holds:
156 /// - the host and the device pointer
157 /// - the size in bytes of this memory region
158 /// - the host and device type of this memory region
159 struct Memory
160 {
161  void *const h_ptr;
162  void *d_ptr;
163  const size_t bytes;
164  const MemoryType h_mt;
165  MemoryType d_mt;
166  mutable bool h_rw, d_rw;
167  Memory(void *p, size_t b, MemoryType h, MemoryType d):
168  h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
169  h_rw(true), d_rw(true) { }
170 };
171 
172 /// Alias class that holds the base memory region and the offset
173 struct Alias
174 {
175  Memory *mem;
176  size_t offset;
177  size_t counter;
178  // 'h_mt' is already stored in 'mem', however, we use this field for type
179  // checking since the alias may be dangling, i.e. 'mem' may be invalid.
180  MemoryType h_mt;
181 };
182 
183 /// Maps for the Memory and the Alias classes
184 typedef std::unordered_map<const void*, Memory> MemoryMap;
185 typedef std::unordered_map<const void*, Alias> AliasMap;
186 
187 struct Maps
188 {
189  MemoryMap memories;
190  AliasMap aliases;
191 };
192 
193 } // namespace mfem::internal
194 
195 static internal::Maps *maps;
196 
197 namespace internal
198 {
199 
200 /// The host memory space base abstract class
201 class HostMemorySpace
202 {
203 public:
204  virtual ~HostMemorySpace() { }
205  virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
206  virtual void Dealloc(void *ptr) { std::free(ptr); }
207  virtual void Protect(const Memory&, size_t) { }
208  virtual void Unprotect(const Memory&, size_t) { }
209  virtual void AliasProtect(const void*, size_t) { }
210  virtual void AliasUnprotect(const void*, size_t) { }
211 };
212 
213 /// The device memory space base abstract class
214 class DeviceMemorySpace
215 {
216 public:
217  virtual ~DeviceMemorySpace() { }
218  virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
219  virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
220  virtual void Protect(const Memory&) { }
221  virtual void Unprotect(const Memory&) { }
222  virtual void AliasProtect(const void*, size_t) { }
223  virtual void AliasUnprotect(const void*, size_t) { }
224  virtual void *HtoD(void *dst, const void *src, size_t bytes)
225  { return std::memcpy(dst, src, bytes); }
226  virtual void *DtoD(void *dst, const void *src, size_t bytes)
227  { return std::memcpy(dst, src, bytes); }
228  virtual void *DtoH(void *dst, const void *src, size_t bytes)
229  { return std::memcpy(dst, src, bytes); }
230 };
231 
232 /// The default std:: host memory space
233 class StdHostMemorySpace : public HostMemorySpace { };
234 
235 /// The No host memory space
236 struct NoHostMemorySpace : public HostMemorySpace
237 {
238  void Alloc(void**, const size_t) { mfem_error("! Host Alloc error"); }
239 };
240 
241 /// The aligned 32 host memory space
242 class Aligned32HostMemorySpace : public HostMemorySpace
243 {
244 public:
245  Aligned32HostMemorySpace(): HostMemorySpace() { }
246  void Alloc(void **ptr, size_t bytes)
247  { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
248  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
249 };
250 
251 /// The aligned 64 host memory space
252 class Aligned64HostMemorySpace : public HostMemorySpace
253 {
254 public:
255  Aligned64HostMemorySpace(): HostMemorySpace() { }
256  void Alloc(void **ptr, size_t bytes)
257  { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
258  void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
259 };
260 
261 #ifndef _WIN32
262 static uintptr_t pagesize = 0;
263 static uintptr_t pagemask = 0;
264 
265 /// Returns the restricted base address of the DEBUG segment
266 inline const void *MmuAddrR(const void *ptr)
267 {
268  const uintptr_t addr = (uintptr_t) ptr;
269  return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
270 }
271 
272 /// Returns the prolongated base address of the MMU segment
273 inline const void *MmuAddrP(const void *ptr)
274 {
275  const uintptr_t addr = (uintptr_t) ptr;
276  return (void*) (addr & ~pagemask);
277 }
278 
279 /// Compute the restricted length for the MMU segment
280 inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
281 {
282  // a ---->A:| |:B<---- b
283  const uintptr_t a = (uintptr_t) ptr;
284  const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
285  MFEM_ASSERT(a <= A, "");
286  const uintptr_t b = a + bytes;
287  const uintptr_t B = b & ~pagemask;
288  MFEM_ASSERT(B <= b, "");
289  const uintptr_t length = B > A ? B - A : 0;
290  MFEM_ASSERT(length % pagesize == 0,"");
291  return length;
292 }
293 
294 /// Compute the prolongated length for the MMU segment
295 inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
296 {
297  // |:A<----a | | b---->B:|
298  const uintptr_t a = (uintptr_t) ptr;
299  const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
300  MFEM_ASSERT(A <= a, "");
301  const uintptr_t b = a + bytes;
302  const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
303  MFEM_ASSERT(b <= B, "");
304  MFEM_ASSERT(B >= A,"");
305  const uintptr_t length = B - A;
306  MFEM_ASSERT(length % pagesize == 0,"");
307  return length;
308 }
309 
310 /// The protected access error, used for the host
311 static void MmuError(int, siginfo_t *si, void*)
312 {
313  constexpr size_t buf_size = 64;
314  fflush(0);
315  char str[buf_size];
316  const void *ptr = si->si_addr;
317  snprintf(str, buf_size, "Error while accessing address %p!", ptr);
318  mfem::out << std::endl << "An illegal memory access was made!";
319  MFEM_ABORT(str);
320 }
321 
322 /// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
323 static void MmuInit()
324 {
325  if (pagesize > 0) { return; }
326  struct sigaction sa;
327  sa.sa_flags = SA_SIGINFO;
328  sigemptyset(&sa.sa_mask);
329  sa.sa_sigaction = MmuError;
330  if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
331  if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
332  pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
333  MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
334  pagemask = pagesize - 1;
335 }
336 
337 /// MMU allocation, through ::mmap
338 inline void MmuAlloc(void **ptr, const size_t bytes)
339 {
340  const size_t length = bytes == 0 ? 8 : bytes;
341  const int prot = PROT_READ | PROT_WRITE;
342  const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
343  *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
344  if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
345 }
346 
347 /// MMU deallocation, through ::munmap
348 inline void MmuDealloc(void *ptr, const size_t bytes)
349 {
350  const size_t length = bytes == 0 ? 8 : bytes;
351  if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
352 }
353 
354 /// MMU protection, through ::mprotect with no read/write accesses
355 inline void MmuProtect(const void *ptr, const size_t bytes)
356 {
357  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
358  if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
359  if (mmu_protect_error) { mfem_error("MMU protection (NONE) error"); }
360 }
361 
362 /// MMU un-protection, through ::mprotect with read/write accesses
363 inline void MmuAllow(const void *ptr, const size_t bytes)
364 {
365  const int RW = PROT_READ | PROT_WRITE;
366  static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
367  if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
368  if (mmu_protect_error) { mfem_error("MMU protection (R/W) error"); }
369 }
370 #else
371 inline void MmuInit() { }
372 inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
373 inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
374 inline void MmuProtect(const void*, const size_t) { }
375 inline void MmuAllow(const void*, const size_t) { }
376 inline const void *MmuAddrR(const void *a) { return a; }
377 inline const void *MmuAddrP(const void *a) { return a; }
378 inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
379 inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
380 #endif
381 
382 /// The MMU host memory space
383 class MmuHostMemorySpace : public HostMemorySpace
384 {
385 public:
386  MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
387  void Alloc(void **ptr, size_t bytes) { MmuAlloc(ptr, bytes); }
388  void Dealloc(void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
389  void Protect(const Memory& mem, size_t bytes)
390  { if (mem.h_rw) { mem.h_rw = false; MmuProtect(mem.h_ptr, bytes); } }
391  void Unprotect(const Memory &mem, size_t bytes)
392  { if (!mem.h_rw) { mem.h_rw = true; MmuAllow(mem.h_ptr, bytes); } }
393  /// Aliases need to be restricted during protection
394  void AliasProtect(const void *ptr, size_t bytes)
395  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
396  /// Aliases need to be prolongated for un-protection
397  void AliasUnprotect(const void *ptr, size_t bytes)
398  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
399 };
400 
401 /// The UVM host memory space
402 class UvmHostMemorySpace : public HostMemorySpace
403 {
404 public:
405  UvmHostMemorySpace(): HostMemorySpace() { }
406  void Alloc(void **ptr, size_t bytes) { CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
407  void Dealloc(void *ptr) { CuMemFree(ptr); }
408 };
409 
410 /// The 'No' device memory space
411 class NoDeviceMemorySpace: public DeviceMemorySpace
412 {
413 public:
414  void Alloc(internal::Memory&) { mfem_error("! Device Alloc"); }
415  void Dealloc(Memory&) { mfem_error("! Device Dealloc"); }
416  void *HtoD(void*, const void*, size_t) { mfem_error("!HtoD"); return nullptr; }
417  void *DtoD(void*, const void*, size_t) { mfem_error("!DtoD"); return nullptr; }
418  void *DtoH(void*, const void*, size_t) { mfem_error("!DtoH"); return nullptr; }
419 };
420 
421 /// The std:: device memory space, used with the 'debug' device
422 class StdDeviceMemorySpace : public DeviceMemorySpace { };
423 
424 /// The CUDA device memory space
425 class CudaDeviceMemorySpace: public DeviceMemorySpace
426 {
427 public:
428  CudaDeviceMemorySpace(): DeviceMemorySpace() { }
429  void Alloc(Memory &base) { CuMemAlloc(&base.d_ptr, base.bytes); }
430  void Dealloc(Memory &base) { CuMemFree(base.d_ptr); }
431  void *HtoD(void *dst, const void *src, size_t bytes)
432  { return CuMemcpyHtoD(dst, src, bytes); }
433  void *DtoD(void* dst, const void* src, size_t bytes)
434  { return CuMemcpyDtoD(dst, src, bytes); }
435  void *DtoH(void *dst, const void *src, size_t bytes)
436  { return CuMemcpyDtoH(dst, src, bytes); }
437 };
438 
439 /// The CUDA/HIP page-locked host memory space
440 class HostPinnedMemorySpace: public HostMemorySpace
441 {
442 public:
443  HostPinnedMemorySpace(): HostMemorySpace() { }
444  void Alloc(void ** ptr, size_t bytes) override
445  {
446 #ifdef MFEM_USE_CUDA
447  CuMemAllocHostPinned(ptr, bytes);
448 #endif
449 #ifdef MFEM_USE_HIP
450  HipMemAllocHostPinned(ptr, bytes);
451 #endif
452  }
453  void Dealloc(void *ptr) override
454  {
455 #ifdef MFEM_USE_CUDA
456  CuMemFreeHostPinned(ptr);
457 #endif
458 #ifdef MFEM_USE_HIP
460 #endif
461  }
462 };
463 
464 /// The HIP device memory space
465 class HipDeviceMemorySpace: public DeviceMemorySpace
466 {
467 public:
468  HipDeviceMemorySpace(): DeviceMemorySpace() { }
469  void Alloc(Memory &base) { HipMemAlloc(&base.d_ptr, base.bytes); }
470  void Dealloc(Memory &base) { HipMemFree(base.d_ptr); }
471  void *HtoD(void *dst, const void *src, size_t bytes)
472  { return HipMemcpyHtoD(dst, src, bytes); }
473  void *DtoD(void* dst, const void* src, size_t bytes)
474  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
475  // instead we use hipMemcpyAsync to get similar behavior.
476  // for more info see: https://github.com/mfem/mfem/pull/2780
477  { return HipMemcpyDtoDAsync(dst, src, bytes); }
478  void *DtoH(void *dst, const void *src, size_t bytes)
479  { return HipMemcpyDtoH(dst, src, bytes); }
480 };
481 
482 /// The UVM device memory space.
483 class UvmCudaMemorySpace : public DeviceMemorySpace
484 {
485 public:
486  void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
487  void Dealloc(Memory&) { }
488  void *HtoD(void *dst, const void *src, size_t bytes)
489  {
490  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
491  return CuMemcpyHtoD(dst, src, bytes);
492  }
493  void *DtoD(void* dst, const void* src, size_t bytes)
494  { return CuMemcpyDtoD(dst, src, bytes); }
495  void *DtoH(void *dst, const void *src, size_t bytes)
496  {
497  if (dst == src) { MFEM_STREAM_SYNC; return dst; }
498  return CuMemcpyDtoH(dst, src, bytes);
499  }
500 };
501 
502 /// The MMU device memory space
503 class MmuDeviceMemorySpace : public DeviceMemorySpace
504 {
505 public:
506  MmuDeviceMemorySpace(): DeviceMemorySpace() { }
507  void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
508  void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
509  void Protect(const Memory &m)
510  { if (m.d_rw) { m.d_rw = false; MmuProtect(m.d_ptr, m.bytes); } }
511  void Unprotect(const Memory &m)
512  { if (!m.d_rw) { m.d_rw = true; MmuAllow(m.d_ptr, m.bytes); } }
513  /// Aliases need to be restricted during protection
514  void AliasProtect(const void *ptr, size_t bytes)
515  { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
516  /// Aliases need to be prolongated for un-protection
517  void AliasUnprotect(const void *ptr, size_t bytes)
518  { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
519  void *HtoD(void *dst, const void *src, size_t bytes)
520  { return std::memcpy(dst, src, bytes); }
521  void *DtoD(void *dst, const void *src, size_t bytes)
522  { return std::memcpy(dst, src, bytes); }
523  void *DtoH(void *dst, const void *src, size_t bytes)
524  { return std::memcpy(dst, src, bytes); }
525 };
526 
527 #ifdef MFEM_USE_UMPIRE
528 class UmpireMemorySpace
529 {
530 protected:
531  umpire::ResourceManager &rm;
532  umpire::Allocator allocator;
533  bool owns_allocator{false};
534 
535 public:
536  // TODO: this only releases unused memory
537  virtual ~UmpireMemorySpace() { if (owns_allocator) { allocator.release(); } }
538  UmpireMemorySpace(const char * name, const char * space)
539  : rm(umpire::ResourceManager::getInstance())
540  {
541  if (!rm.isAllocator(name))
542  {
543  allocator = rm.makeAllocator<umpire::strategy::QuickPool>(
544  name, rm.getAllocator(space));
545  owns_allocator = true;
546  }
547  else
548  {
549  allocator = rm.getAllocator(name);
550  owns_allocator = false;
551  }
552  }
553 };
554 
555 /// The Umpire host memory space
556 class UmpireHostMemorySpace : public HostMemorySpace, public UmpireMemorySpace
557 {
558 private:
559  umpire::strategy::AllocationStrategy *strat;
560 public:
561  UmpireHostMemorySpace(const char * name)
562  : HostMemorySpace(),
563  UmpireMemorySpace(name, "HOST"),
564  strat(allocator.getAllocationStrategy()) {}
565  void Alloc(void **ptr, size_t bytes) override
566  { *ptr = allocator.allocate(bytes); }
567  void Dealloc(void *ptr) override { allocator.deallocate(ptr); }
568  void Insert(void *ptr, size_t bytes)
569  { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
570 };
571 
572 /// The Umpire device memory space
573 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP)
574 class UmpireDeviceMemorySpace : public DeviceMemorySpace,
575  public UmpireMemorySpace
576 {
577 public:
578  UmpireDeviceMemorySpace(const char * name)
579  : DeviceMemorySpace(),
580  UmpireMemorySpace(name, "DEVICE") {}
581  void Alloc(Memory &base) override
582  { base.d_ptr = allocator.allocate(base.bytes); }
583  void Dealloc(Memory &base) override { rm.deallocate(base.d_ptr); }
584  void *HtoD(void *dst, const void *src, size_t bytes) override
585  {
586 #ifdef MFEM_USE_CUDA
587  return CuMemcpyHtoD(dst, src, bytes);
588 #endif
589 #ifdef MFEM_USE_HIP
590  return HipMemcpyHtoD(dst, src, bytes);
591 #endif
592  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
593  }
594  void *DtoD(void* dst, const void* src, size_t bytes) override
595  {
596 #ifdef MFEM_USE_CUDA
597  return CuMemcpyDtoD(dst, src, bytes);
598 #endif
599 #ifdef MFEM_USE_HIP
600  // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
601  // instead we use hipMemcpyAsync to get similar behavior.
602  // for more info see: https://github.com/mfem/mfem/pull/2780
603  return HipMemcpyDtoDAsync(dst, src, bytes);
604 #endif
605  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
606  }
607  void *DtoH(void *dst, const void *src, size_t bytes) override
608  {
609 #ifdef MFEM_USE_CUDA
610  return CuMemcpyDtoH(dst, src, bytes);
611 #endif
612 #ifdef MFEM_USE_HIP
613  return HipMemcpyDtoH(dst, src, bytes);
614 #endif
615  // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
616  }
617 };
618 #else
619 class UmpireDeviceMemorySpace : public NoDeviceMemorySpace
620 {
621 public:
622  UmpireDeviceMemorySpace(const char * /*unused*/) {}
623 };
624 #endif // MFEM_USE_CUDA || MFEM_USE_HIP
625 #endif // MFEM_USE_UMPIRE
626 
627 /// Memory space controller class
628 class Ctrl
629 {
630  typedef MemoryType MT;
631 
632 public:
633  HostMemorySpace *host[HostMemoryTypeSize];
634  DeviceMemorySpace *device[DeviceMemoryTypeSize];
635 
636 public:
637  Ctrl(): host{nullptr}, device{nullptr} { }
638 
639  void Configure()
640  {
641  if (host[HostMemoryType])
642  {
643  mfem_error("Memory backends have already been configured!");
644  }
645 
646  // Filling the host memory backends
647  // HOST, HOST_32 & HOST_64 are always ready
648  // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
649  host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
650  host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
651  host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
652  // HOST_DEBUG is delayed, as it reroutes signals
653  host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
654  host[static_cast<int>(MT::HOST_UMPIRE)] = nullptr;
655  host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
656 
657  // Filling the device memory backends, shifting with the device size
658  constexpr int shift = DeviceMemoryType;
659  device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
660  // All other devices controllers are delayed
661  device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
662  device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
663  device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
664  device[static_cast<int>(MT::DEVICE_UMPIRE_2)-shift] = nullptr;
665  }
666 
667  HostMemorySpace* Host(const MemoryType mt)
668  {
669  const int mt_i = static_cast<int>(mt);
670  // Delayed host controllers initialization
671  if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
672  MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
673  return host[mt_i];
674  }
675 
676  DeviceMemorySpace* Device(const MemoryType mt)
677  {
678  const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
679  MFEM_ASSERT(mt_i >= 0,"");
680  // Lazy device controller initializations
681  if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
682  MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
683  return device[mt_i];
684  }
685 
686  ~Ctrl()
687  {
688  constexpr int mt_h = HostMemoryType;
689  constexpr int mt_d = DeviceMemoryType;
690  for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
691  for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
692  }
693 
694 private:
695  HostMemorySpace* NewHostCtrl(const MemoryType mt)
696  {
697  switch (mt)
698  {
699  case MT::HOST_DEBUG: return new MmuHostMemorySpace();
700 #ifdef MFEM_USE_UMPIRE
701  case MT::HOST_UMPIRE:
702  return new UmpireHostMemorySpace(
704 #else
705  case MT::HOST_UMPIRE: return new NoHostMemorySpace();
706 #endif
707  case MT::HOST_PINNED: return new HostPinnedMemorySpace();
708  default: MFEM_ABORT("Unknown host memory controller!");
709  }
710  return nullptr;
711  }
712 
713  DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
714  {
715  switch (mt)
716  {
717 #ifdef MFEM_USE_UMPIRE
718  case MT::DEVICE_UMPIRE:
719  return new UmpireDeviceMemorySpace(
721  case MT::DEVICE_UMPIRE_2:
722  return new UmpireDeviceMemorySpace(
724 #else
725  case MT::DEVICE_UMPIRE: return new NoDeviceMemorySpace();
726  case MT::DEVICE_UMPIRE_2: return new NoDeviceMemorySpace();
727 #endif
728  case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
729  case MT::DEVICE:
730  {
731 #if defined(MFEM_USE_CUDA)
732  return new CudaDeviceMemorySpace();
733 #elif defined(MFEM_USE_HIP)
734  return new HipDeviceMemorySpace();
735 #else
736  MFEM_ABORT("No device memory controller!");
737  break;
738 #endif
739  }
740  default: MFEM_ABORT("Unknown device memory controller!");
741  }
742  return nullptr;
743  }
744 };
745 
746 } // namespace mfem::internal
747 
748 static internal::Ctrl *ctrl;
749 
750 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
751  unsigned &flags)
752 {
753  MFEM_ASSERT(exists, "Internal error!");
754  if (IsHostMemory(mt))
755  {
756  MFEM_ASSERT(mt != MemoryType::HOST && h_tmp == nullptr,
757  "Internal error!");
758  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
759  // evaluated at the time when the device pointer is allocated, see
760  // GetDevicePtr() and GetAliasDevicePtr()
761  const MemoryType d_mt = MemoryType::DEFAULT;
762  // We rely on the next call using lazy dev alloc
763  return New_(h_tmp, bytes, mt, d_mt, Mem::VALID_HOST, flags);
764  }
765  else
766  {
767  const MemoryType h_mt = GetDualMemoryType(mt);
768  return New_(h_tmp, bytes, h_mt, mt, Mem::VALID_DEVICE, flags);
769  }
770 }
771 
772 void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType h_mt,
773  MemoryType d_mt, unsigned valid_flags,
774  unsigned &flags)
775 {
776  MFEM_ASSERT(exists, "Internal error!");
777  MFEM_ASSERT(IsHostMemory(h_mt), "h_mt must be host type");
778  MFEM_ASSERT(IsDeviceMemory(d_mt) || d_mt == h_mt ||
779  d_mt == MemoryType::DEFAULT,
780  "d_mt must be device type, the same is h_mt, or DEFAULT");
781  MFEM_ASSERT((h_mt != MemoryType::HOST || h_tmp != nullptr) &&
782  (h_mt == MemoryType::HOST || h_tmp == nullptr),
783  "Internal error");
784  MFEM_ASSERT((valid_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) == 0,
785  "Internal error");
786  void *h_ptr;
787  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
788  else { h_ptr = h_tmp; }
790  Mem::OWNS_DEVICE | valid_flags;
791  // The other New_() method relies on this lazy allocation behavior.
792  mm.Insert(h_ptr, bytes, h_mt, d_mt); // lazy dev alloc
793  // mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); // non-lazy dev alloc
794 
795  // MFEM_VERIFY_TYPES(h_mt, mt); // done by mm.Insert() above
796  CheckHostMemoryType_(h_mt, h_ptr, false);
797 
798  return h_ptr;
799 }
800 
801 void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
802  MemoryType mt,
803  bool own, bool alias, unsigned &flags)
804 {
805  MFEM_CONTRACT_VAR(alias);
806  MFEM_ASSERT(exists, "Internal error!");
807  MFEM_VERIFY(!alias, "Cannot register an alias!");
808  const bool is_host_mem = IsHostMemory(mt);
809  const MemType h_mt = is_host_mem ? mt : GetDualMemoryType(mt);
810  const MemType d_mt = is_host_mem ? MemoryType::DEFAULT : mt;
811  // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
812  // evaluated at the time when the device pointer is allocated, see
813  // GetDevicePtr() and GetAliasDevicePtr()
814 
815  MFEM_VERIFY_TYPES(h_mt, d_mt);
816 
817  if (ptr == nullptr && h_tmp == nullptr)
818  {
819  MFEM_VERIFY(bytes == 0, "internal error");
820  return nullptr;
821  }
822 
824  void *h_ptr;
825 
826  if (is_host_mem) // HOST TYPES + MANAGED
827  {
828  h_ptr = ptr;
829  mm.Insert(h_ptr, bytes, h_mt, d_mt);
830  flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
832  }
833  else // DEVICE TYPES
834  {
835  MFEM_VERIFY(ptr || bytes == 0,
836  "cannot register NULL device pointer with bytes = " << bytes);
837  if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
838  else { h_ptr = h_tmp; }
839  mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
840  flags = own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE;
841  flags |= (Mem::OWNS_HOST | Mem::VALID_DEVICE);
842  }
843  CheckHostMemoryType_(h_mt, h_ptr, alias);
844  return h_ptr;
845 }
846 
847 void MemoryManager::Register2_(void *h_ptr, void *d_ptr, size_t bytes,
848  MemoryType h_mt, MemoryType d_mt,
849  bool own, bool alias, unsigned &flags)
850 {
851  MFEM_CONTRACT_VAR(alias);
852  MFEM_ASSERT(exists, "Internal error!");
853  MFEM_ASSERT(!alias, "Cannot register an alias!");
854  MFEM_VERIFY_TYPES(h_mt, d_mt);
855 
856  if (h_ptr == nullptr && d_ptr == nullptr)
857  {
858  MFEM_VERIFY(bytes == 0, "internal error");
859  return;
860  }
861 
863 
864  MFEM_VERIFY(d_ptr || bytes == 0,
865  "cannot register NULL device pointer with bytes = " << bytes);
866  mm.InsertDevice(d_ptr, h_ptr, bytes, h_mt, d_mt);
867  flags = (own ? flags | (Mem::OWNS_HOST | Mem::OWNS_DEVICE) :
868  flags & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE)) |
870 
871  CheckHostMemoryType_(h_mt, h_ptr, alias);
872 }
873 
874 void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
875  unsigned base_flags, unsigned &flags)
876 {
877  mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
878  base_flags & Mem::ALIAS);
879  flags = (base_flags | Mem::ALIAS | Mem::OWNS_INTERNAL) &
881 }
882 
883 void MemoryManager::SetDeviceMemoryType_(void *h_ptr, unsigned flags,
884  MemoryType d_mt)
885 {
886  MFEM_VERIFY(h_ptr, "cannot set the device memory type: Memory is empty!");
887  if (!(flags & Mem::ALIAS))
888  {
889  auto mem_iter = maps->memories.find(h_ptr);
890  MFEM_VERIFY(mem_iter != maps->memories.end(), "internal error");
891  internal::Memory &mem = mem_iter->second;
892  if (mem.d_mt == d_mt) { return; }
893  MFEM_VERIFY(mem.d_ptr == nullptr, "cannot set the device memory type:"
894  " device memory is allocated!");
895  mem.d_mt = d_mt;
896  }
897  else
898  {
899  auto alias_iter = maps->aliases.find(h_ptr);
900  MFEM_VERIFY(alias_iter != maps->aliases.end(), "internal error");
901  internal::Alias &alias = alias_iter->second;
902  internal::Memory &base_mem = *alias.mem;
903  if (base_mem.d_mt == d_mt) { return; }
904  MFEM_VERIFY(base_mem.d_ptr == nullptr,
905  "cannot set the device memory type:"
906  " alias' base device memory is allocated!");
907  base_mem.d_mt = d_mt;
908  }
909 }
910 
911 void MemoryManager::Delete_(void *h_ptr, MemoryType h_mt, unsigned flags)
912 {
913  const bool alias = flags & Mem::ALIAS;
914  const bool registered = flags & Mem::Registered;
915  const bool owns_host = flags & Mem::OWNS_HOST;
916  const bool owns_device = flags & Mem::OWNS_DEVICE;
917  const bool owns_internal = flags & Mem::OWNS_INTERNAL;
918  MFEM_ASSERT(IsHostMemory(h_mt), "invalid h_mt = " << (int)h_mt);
919  // MFEM_ASSERT(registered || IsHostMemory(h_mt),"");
920  MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
921  // If at least one of the 'own_*' flags is true then 'registered' must be
922  // true too. An acceptable exception is the special case when 'h_ptr' is
923  // NULL, and both 'own_device' and 'own_internal' are false -- this case is
924  // an exception only when 'own_host' is true and 'registered' is false.
925  MFEM_ASSERT(registered || !(owns_host || owns_device || owns_internal) ||
926  (!(owns_device || owns_internal) && h_ptr == nullptr),
927  "invalid Memory state");
928  if (!mm.exists || !registered) { return; }
929  if (alias)
930  {
931  if (owns_internal)
932  {
933  MFEM_ASSERT(mm.IsAlias(h_ptr), "");
934  MFEM_ASSERT(h_mt == maps->aliases.at(h_ptr).h_mt, "");
935  mm.EraseAlias(h_ptr);
936  }
937  }
938  else // Known
939  {
940  if (owns_host && (h_mt != MemoryType::HOST))
941  { ctrl->Host(h_mt)->Dealloc(h_ptr); }
942  if (owns_internal)
943  {
944  MFEM_ASSERT(mm.IsKnown(h_ptr), "");
945  MFEM_ASSERT(h_mt == maps->memories.at(h_ptr).h_mt, "");
946  mm.Erase(h_ptr, owns_device);
947  }
948  }
949 }
950 
951 void MemoryManager::DeleteDevice_(void *h_ptr, unsigned & flags)
952 {
953  const bool owns_device = flags & Mem::OWNS_DEVICE;
954  if (owns_device)
955  {
956  mm.EraseDevice(h_ptr);
957  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
958  }
959 }
960 
961 bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
962  MemoryType h_mt, size_t bytes,
963  unsigned flags)
964 {
965  if (!h_ptr)
966  {
967  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
968  return true;
969  }
970  MemoryType d_mt;
971  if (!(flags & Mem::ALIAS))
972  {
973  auto iter = maps->memories.find(h_ptr);
974  MFEM_VERIFY(iter != maps->memories.end(), "internal error");
975  d_mt = iter->second.d_mt;
976  }
977  else
978  {
979  auto iter = maps->aliases.find(h_ptr);
980  MFEM_VERIFY(iter != maps->aliases.end(), "internal error");
981  d_mt = iter->second.mem->d_mt;
982  }
983  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
984  switch (mc)
985  {
987  {
988  MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
989  h_mt == MemoryType::HOST_64,"");
990  return true;
991  }
993  {
994  MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
995  return true;
996  }
997  case MemoryClass::DEVICE:
998  {
999  MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
1000  d_mt == MemoryType::DEVICE_DEBUG ||
1001  d_mt == MemoryType::DEVICE_UMPIRE ||
1002  d_mt == MemoryType::DEVICE_UMPIRE_2 ||
1003  d_mt == MemoryType::MANAGED,"");
1004  return true;
1005  }
1006  case MemoryClass::MANAGED:
1007  {
1008  MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
1009  d_mt == MemoryType::MANAGED),"");
1010  return true;
1011  }
1012  default: break;
1013  }
1014  return true;
1015 }
1016 
1017 void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1018  size_t bytes, unsigned &flags)
1019 {
1020  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1021  if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1022  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1024  {
1025  const bool copy = !(flags & Mem::VALID_HOST);
1026  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1027  if (flags & Mem::ALIAS)
1028  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1029  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1030  }
1031  else
1032  {
1033  const bool copy = !(flags & Mem::VALID_DEVICE);
1034  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1035  if (flags & Mem::ALIAS)
1036  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1037  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1038  }
1039 }
1040 
1041 const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1042  size_t bytes, unsigned &flags)
1043 {
1044  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1045  if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1046  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1048  {
1049  const bool copy = !(flags & Mem::VALID_HOST);
1050  flags |= Mem::VALID_HOST;
1051  if (flags & Mem::ALIAS)
1052  { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1053  else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1054  }
1055  else
1056  {
1057  const bool copy = !(flags & Mem::VALID_DEVICE);
1058  flags |= Mem::VALID_DEVICE;
1059  if (flags & Mem::ALIAS)
1060  { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1061  else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1062  }
1063 }
1064 
1065 void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1066  size_t bytes, unsigned &flags)
1067 {
1068  if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1069  if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1070  MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1072  {
1073  flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1074  if (flags & Mem::ALIAS)
1075  { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
1076  else { return mm.GetHostPtr(h_ptr, bytes, false); }
1077  }
1078  else
1079  {
1080  flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1081  if (flags & Mem::ALIAS)
1082  { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
1083  else { return mm.GetDevicePtr(h_ptr, bytes, false); }
1084  }
1085 }
1086 
1087 void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
1088  size_t alias_bytes, unsigned base_flags,
1089  unsigned &alias_flags)
1090 {
1091  // This is called only when (base_flags & Mem::Registered) is true.
1092  // Note that (alias_flags & Registered) may not be true.
1093  MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
1094  if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
1095  {
1096  mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
1097  }
1098  if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
1099  {
1100  if (!(alias_flags & Mem::Registered))
1101  {
1102  mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
1103  alias_flags = (alias_flags | Mem::Registered | Mem::OWNS_INTERNAL) &
1105  }
1106  mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
1107  }
1108  alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
1109  (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
1110 }
1111 
1112 MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr, bool alias)
1113 {
1114  if (mm.exists)
1115  {
1116  if (!alias)
1117  {
1118  auto iter = maps->memories.find(h_ptr);
1119  MFEM_ASSERT(iter != maps->memories.end(), "internal error");
1120  return iter->second.d_mt;
1121  }
1122  // alias == true
1123  auto iter = maps->aliases.find(h_ptr);
1124  MFEM_ASSERT(iter != maps->aliases.end(), "internal error");
1125  return iter->second.mem->d_mt;
1126  }
1127  MFEM_ABORT("internal error");
1128  return MemoryManager::host_mem_type;
1129 }
1130 
1131 MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
1132 {
1133  if (!mm.exists) { return MemoryManager::host_mem_type; }
1134  if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
1135  if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).h_mt; }
1136  return MemoryManager::host_mem_type;
1137 }
1138 
1139 void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
1140  size_t bytes, unsigned src_flags,
1141  unsigned &dst_flags)
1142 {
1143  // Type of copy to use based on the src and dest validity flags:
1144  // | src
1145  // | h | d | hd
1146  // -----------+-----+-----+------
1147  // h | h2h d2h h2h
1148  // dest d | h2d d2d d2d
1149  // hd | h2h d2d d2d
1150 
1151  const bool dst_on_host =
1152  (dst_flags & Mem::VALID_HOST) &&
1153  (!(dst_flags & Mem::VALID_DEVICE) ||
1154  ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
1155 
1156  dst_flags = dst_flags &
1157  ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1158 
1159  const bool src_on_host =
1160  (src_flags & Mem::VALID_HOST) &&
1161  (!(src_flags & Mem::VALID_DEVICE) ||
1162  ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
1163 
1164  const void *src_d_ptr =
1165  src_on_host ? NULL :
1166  ((src_flags & Mem::ALIAS) ?
1167  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1168  mm.GetDevicePtr(src_h_ptr, bytes, false));
1169 
1170  if (dst_on_host)
1171  {
1172  if (src_on_host)
1173  {
1174  if (dst_h_ptr != src_h_ptr && bytes != 0)
1175  {
1176  MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
1177  (const char*)src_h_ptr + bytes <= dst_h_ptr,
1178  "data overlaps!");
1179  std::memcpy(dst_h_ptr, src_h_ptr, bytes);
1180  }
1181  }
1182  else
1183  {
1184  if (dst_h_ptr != src_d_ptr && bytes != 0)
1185  {
1186  internal::Memory &src_d_base = maps->memories.at(src_h_ptr);
1187  MemoryType src_d_mt = src_d_base.d_mt;
1188  ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
1189  }
1190  }
1191  }
1192  else
1193  {
1194  void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
1195  mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
1196  mm.GetDevicePtr(dst_h_ptr, bytes, false);
1197  if (src_on_host)
1198  {
1199  const bool known = mm.IsKnown(dst_h_ptr);
1200  const bool alias = dst_flags & Mem::ALIAS;
1201  MFEM_VERIFY(alias||known,"");
1202  const MemoryType d_mt = known ?
1203  maps->memories.at(dst_h_ptr).d_mt :
1204  maps->aliases.at(dst_h_ptr).mem->d_mt;
1205  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1206  }
1207  else
1208  {
1209  if (dest_d_ptr != src_d_ptr && bytes != 0)
1210  {
1211  const bool known = mm.IsKnown(dst_h_ptr);
1212  const bool alias = dst_flags & Mem::ALIAS;
1213  MFEM_VERIFY(alias||known,"");
1214  const MemoryType d_mt = known ?
1215  maps->memories.at(dst_h_ptr).d_mt :
1216  maps->aliases.at(dst_h_ptr).mem->d_mt;
1217  ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1218  }
1219  }
1220  }
1221 }
1222 
1223 void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1224  size_t bytes, unsigned src_flags)
1225 {
1226  const bool src_on_host = src_flags & Mem::VALID_HOST;
1227  if (src_on_host)
1228  {
1229  if (dest_h_ptr != src_h_ptr && bytes != 0)
1230  {
1231  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1232  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1233  "data overlaps!");
1234  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1235  }
1236  }
1237  else
1238  {
1239  MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1240  const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1241  mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1242  mm.GetDevicePtr(src_h_ptr, bytes, false);
1243  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1244  const MemoryType d_mt = base.d_mt;
1245  ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1246  }
1247 }
1248 
1249 void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1250  size_t bytes, unsigned &dest_flags)
1251 {
1252  const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1253  if (dest_on_host)
1254  {
1255  if (dest_h_ptr != src_h_ptr && bytes != 0)
1256  {
1257  MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1258  (const char*)src_h_ptr + bytes <= dest_h_ptr,
1259  "data overlaps!");
1260  std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1261  }
1262  }
1263  else
1264  {
1265  void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1266  mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1267  mm.GetDevicePtr(dest_h_ptr, bytes, false);
1268  const internal::Memory &base = maps->memories.at(dest_h_ptr);
1269  const MemoryType d_mt = base.d_mt;
1270  ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1271  }
1272  dest_flags = dest_flags &
1273  ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1274 }
1275 
1276 bool MemoryManager::IsKnown_(const void *h_ptr)
1277 {
1278  return maps->memories.find(h_ptr) != maps->memories.end();
1279 }
1280 
1281 bool MemoryManager::IsAlias_(const void *h_ptr)
1282 {
1283  return maps->aliases.find(h_ptr) != maps->aliases.end();
1284 }
1285 
1286 void MemoryManager::Insert(void *h_ptr, size_t bytes,
1287  MemoryType h_mt, MemoryType d_mt)
1288 {
1289 #ifdef MFEM_TRACK_MEM_MANAGER
1290  mfem::out << "[mfem memory manager]: registering h_ptr: " << h_ptr
1291  << ", bytes: " << bytes << std::endl;
1292 #endif
1293  if (h_ptr == NULL)
1294  {
1295  MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1296  return;
1297  }
1298  MFEM_VERIFY_TYPES(h_mt, d_mt);
1299 #ifdef MFEM_DEBUG
1300  auto res =
1301 #endif
1302  maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1303 #ifdef MFEM_DEBUG
1304  if (res.second == false)
1305  {
1306  auto &m = res.first->second;
1307  MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt &&
1308  (m.d_mt == d_mt || (d_mt == MemoryType::DEFAULT &&
1309  m.d_mt == GetDualMemoryType(h_mt))),
1310  "Address already present with different attributes!");
1311 #ifdef MFEM_TRACK_MEM_MANAGER
1312  mfem::out << "[mfem memory manager]: repeated registration of h_ptr: "
1313  << h_ptr << std::endl;
1314 #endif
1315  }
1316 #endif
1317 }
1318 
1319 void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1320  MemoryType h_mt, MemoryType d_mt)
1321 {
1322  // MFEM_VERIFY_TYPES(h_mt, d_mt); // done by Insert() below
1323  MFEM_ASSERT(h_ptr != NULL, "internal error");
1324  Insert(h_ptr, bytes, h_mt, d_mt);
1325  internal::Memory &mem = maps->memories.at(h_ptr);
1326  if (d_ptr == NULL && bytes != 0) { ctrl->Device(d_mt)->Alloc(mem); }
1327  else { mem.d_ptr = d_ptr; }
1328 }
1329 
1330 void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1331  const size_t bytes, const bool base_is_alias)
1332 {
1333  size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1334  static_cast<const char*>(base_ptr));
1335 #ifdef MFEM_TRACK_MEM_MANAGER
1336  mfem::out << "[mfem memory manager]: registering alias of base_ptr: "
1337  << base_ptr << ", offset: " << offset << ", bytes: " << bytes
1338  << ", base is alias: " << base_is_alias << std::endl;
1339 #endif
1340  if (!base_ptr)
1341  {
1342  MFEM_VERIFY(offset == 0,
1343  "Trying to add alias to NULL at offset " << offset);
1344  return;
1345  }
1346  if (base_is_alias)
1347  {
1348  const internal::Alias &alias = maps->aliases.at(base_ptr);
1349  MFEM_ASSERT(alias.mem,"");
1350  base_ptr = alias.mem->h_ptr;
1351  offset += alias.offset;
1352 #ifdef MFEM_TRACK_MEM_MANAGER
1353  mfem::out << "[mfem memory manager]: real base_ptr: " << base_ptr
1354  << std::endl;
1355 #endif
1356  }
1357  internal::Memory &mem = maps->memories.at(base_ptr);
1358  MFEM_VERIFY(offset + bytes <= mem.bytes, "invalid alias");
1359  auto res =
1360  maps->aliases.emplace(alias_ptr,
1361  internal::Alias{&mem, offset, 1, mem.h_mt});
1362  if (res.second == false) // alias_ptr was already in the map
1363  {
1364  internal::Alias &alias = res.first->second;
1365  // Update the alias data in case the existing alias is dangling
1366  alias.mem = &mem;
1367  alias.offset = offset;
1368  alias.h_mt = mem.h_mt;
1369  alias.counter++;
1370  }
1371 }
1372 
1373 void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1374 {
1375 #ifdef MFEM_TRACK_MEM_MANAGER
1376  mfem::out << "[mfem memory manager]: un-registering h_ptr: " << h_ptr
1377  << std::endl;
1378 #endif
1379  if (!h_ptr) { return; }
1380  auto mem_map_iter = maps->memories.find(h_ptr);
1381  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1382  internal::Memory &mem = mem_map_iter->second;
1383  if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1384  maps->memories.erase(mem_map_iter);
1385 }
1386 
1387 void MemoryManager::EraseDevice(void *h_ptr)
1388 {
1389  if (!h_ptr) { return; }
1390  auto mem_map_iter = maps->memories.find(h_ptr);
1391  if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1392  internal::Memory &mem = mem_map_iter->second;
1393  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1394  mem.d_ptr = nullptr;
1395 }
1396 
1397 void MemoryManager::EraseAlias(void *alias_ptr)
1398 {
1399 #ifdef MFEM_TRACK_MEM_MANAGER
1400  mfem::out << "[mfem memory manager]: un-registering alias_ptr: " << alias_ptr
1401  << std::endl;
1402 #endif
1403  if (!alias_ptr) { return; }
1404  auto alias_map_iter = maps->aliases.find(alias_ptr);
1405  if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1406  internal::Alias &alias = alias_map_iter->second;
1407  if (--alias.counter) { return; }
1408  maps->aliases.erase(alias_map_iter);
1409 }
1410 
1411 void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1412  bool copy_data)
1413 {
1414  if (!h_ptr)
1415  {
1416  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1417  return NULL;
1418  }
1419  internal::Memory &mem = maps->memories.at(h_ptr);
1420  const MemoryType &h_mt = mem.h_mt;
1421  MemoryType &d_mt = mem.d_mt;
1422  MFEM_VERIFY_TYPES(h_mt, d_mt);
1423  if (!mem.d_ptr)
1424  {
1425  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1426  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1427  }
1428  // Aliases might have done some protections
1429  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1430  if (copy_data)
1431  {
1432  MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1433  if (bytes) { ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes); }
1434  }
1435  ctrl->Host(h_mt)->Protect(mem, bytes);
1436  return mem.d_ptr;
1437 }
1438 
1439 void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1440  bool copy)
1441 {
1442  if (!alias_ptr)
1443  {
1444  MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1445  return NULL;
1446  }
1447  auto &alias_map = maps->aliases;
1448  auto alias_map_iter = alias_map.find(alias_ptr);
1449  if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1450  const internal::Alias &alias = alias_map_iter->second;
1451  const size_t offset = alias.offset;
1452  internal::Memory &mem = *alias.mem;
1453  const MemoryType &h_mt = mem.h_mt;
1454  MemoryType &d_mt = mem.d_mt;
1455  MFEM_VERIFY_TYPES(h_mt, d_mt);
1456  if (!mem.d_ptr)
1457  {
1458  if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1459  if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1460  }
1461  void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1462  void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1463  MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1464  MFEM_ASSERT(offset + bytes <= mem.bytes, "internal error");
1465  mem.d_rw = mem.h_rw = false;
1466  if (mem.d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1467  ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1468  if (copy && mem.d_ptr)
1469  { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1470  ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1471  return alias_d_ptr;
1472 }
1473 
1474 void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1475 {
1476  const internal::Memory &mem = maps->memories.at(ptr);
1477  MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1478  MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1479  const MemoryType &h_mt = mem.h_mt;
1480  const MemoryType &d_mt = mem.d_mt;
1481  MFEM_VERIFY_TYPES(h_mt, d_mt);
1482  // Aliases might have done some protections
1483  ctrl->Host(h_mt)->Unprotect(mem, bytes);
1484  if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1485  if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1486  if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1487  return mem.h_ptr;
1488 }
1489 
1490 void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1491  bool copy_data)
1492 {
1493  const internal::Alias &alias = maps->aliases.at(ptr);
1494  const internal::Memory *const mem = alias.mem;
1495  const MemoryType &h_mt = mem->h_mt;
1496  const MemoryType &d_mt = mem->d_mt;
1497  MFEM_VERIFY_TYPES(h_mt, d_mt);
1498  void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1499  void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1500  MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1501  mem->h_rw = false;
1502  ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1503  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1504  if (copy_data && mem->d_ptr)
1505  { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1506  if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1507  return alias_h_ptr;
1508 }
1509 
1511 {
1512  if (exists) { return; }
1513  maps = new internal::Maps();
1514  ctrl = new internal::Ctrl();
1515  ctrl->Configure();
1516  exists = true;
1517 }
1518 
1520 
1521 MemoryManager::~MemoryManager() { if (exists) { Destroy(); } }
1522 
1524 {
1525  MFEM_VERIFY(!configured, "changing the dual MemoryTypes is not allowed after"
1526  " MemoryManager configuration!");
1527  UpdateDualMemoryType(mt, dual_mt);
1528 }
1529 
1530 void MemoryManager::UpdateDualMemoryType(MemoryType mt, MemoryType dual_mt)
1531 {
1532  MFEM_VERIFY((int)mt < MemoryTypeSize,
1533  "invalid MemoryType, mt = " << (int)mt);
1534  MFEM_VERIFY((int)dual_mt < MemoryTypeSize,
1535  "invalid dual MemoryType, dual_mt = " << (int)dual_mt);
1536 
1537  if ((IsHostMemory(mt) && IsDeviceMemory(dual_mt)) ||
1538  (IsDeviceMemory(mt) && IsHostMemory(dual_mt)))
1539  {
1540  dual_map[(int)mt] = dual_mt;
1541  }
1542  else
1543  {
1544  // mt + dual_mt is not a pair of host + device types: this is only allowed
1545  // when mt == dual_mt and mt is a host type; in this case we do not
1546  // actually update the dual
1547  MFEM_VERIFY(mt == dual_mt && IsHostMemory(mt),
1548  "invalid (mt, dual_mt) pair: ("
1549  << MemoryTypeName[(int)mt] << ", "
1550  << MemoryTypeName[(int)dual_mt] << ')');
1551  }
1552 }
1553 
1555  const MemoryType device_mt)
1556 {
1557  MemoryManager::UpdateDualMemoryType(host_mt, device_mt);
1558  MemoryManager::UpdateDualMemoryType(device_mt, host_mt);
1559  if (device_mt == MemoryType::DEVICE_DEBUG)
1560  {
1561  for (int mt = (int)MemoryType::HOST; mt < (int)MemoryType::MANAGED; mt++)
1562  {
1563  MemoryManager::UpdateDualMemoryType(
1565  }
1566  }
1567  Init();
1568  host_mem_type = host_mt;
1569  device_mem_type = device_mt;
1570  configured = true;
1571 }
1572 
1574 {
1575  MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1576 #ifdef MFEM_TRACK_MEM_MANAGER
1577  size_t num_memories = maps->memories.size();
1578  size_t num_aliases = maps->aliases.size();
1579  if (num_memories != 0 || num_aliases != 0)
1580  {
1581  MFEM_WARNING("...\n\t number of registered pointers: " << num_memories
1582  << "\n\t number of registered aliases : " << num_aliases);
1583  }
1584 #endif
1585  // Keep for debugging purposes:
1586 #if 0
1587  mfem::out << "Destroying the MemoryManager ...\n"
1588  << "remaining registered pointers : "
1589  << maps->memories.size() << '\n'
1590  << "remaining registered aliases : "
1591  << maps->aliases.size() << '\n';
1592 #endif
1593  for (auto& n : maps->memories)
1594  {
1595  internal::Memory &mem = n.second;
1596  bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1597  if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1598  if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1599  }
1600  delete maps; maps = nullptr;
1601  delete ctrl; ctrl = nullptr;
1602  host_mem_type = MemoryType::HOST;
1603  device_mem_type = MemoryType::HOST;
1604  exists = false;
1605  configured = false;
1606 }
1607 
1609 {
1610  if (ptr != NULL)
1611  {
1612  if (!IsKnown(ptr))
1613  {
1614  mfem_error("Pointer is not registered!");
1615  }
1616  }
1617 }
1618 
1619 int MemoryManager::PrintPtrs(std::ostream &os)
1620 {
1621  int n_out = 0;
1622  for (const auto& n : maps->memories)
1623  {
1624  const internal::Memory &mem = n.second;
1625  os << "\nkey " << n.first << ", "
1626  << "h_ptr " << mem.h_ptr << ", "
1627  << "d_ptr " << mem.d_ptr;
1628  n_out++;
1629  }
1630  if (maps->memories.size() > 0) { os << std::endl; }
1631  return n_out;
1632 }
1633 
1634 int MemoryManager::PrintAliases(std::ostream &os)
1635 {
1636  int n_out = 0;
1637  for (const auto& n : maps->aliases)
1638  {
1639  const internal::Alias &alias = n.second;
1640  os << "\nalias: key " << n.first << ", "
1641  << "h_ptr " << alias.mem->h_ptr << ", "
1642  << "offset " << alias.offset << ", "
1643  << "counter " << alias.counter;
1644  n_out++;
1645  }
1646  if (maps->aliases.size() > 0) { os << std::endl; }
1647  return n_out;
1648 }
1649 
1650 int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1651  unsigned flags)
1652 {
1653  void *d_ptr = (flags & Mem::ALIAS) ?
1654  mm.GetAliasDevicePtr(h_ptr, size, false) :
1655  mm.GetDevicePtr(h_ptr, size, false);
1656  char *h_buf = new char[size];
1657 #if defined(MFEM_USE_CUDA)
1658  CuMemcpyDtoH(h_buf, d_ptr, size);
1659 #elif defined(MFEM_USE_HIP)
1660  HipMemcpyDtoH(h_buf, d_ptr, size);
1661 #else
1662  std::memcpy(h_buf, d_ptr, size);
1663 #endif
1664  int res = std::memcmp(h_ptr, h_buf, size);
1665  delete [] h_buf;
1666  return res;
1667 }
1668 
1669 
1670 void MemoryPrintFlags(unsigned flags)
1671 {
1672  typedef Memory<int> Mem;
1673  mfem::out
1674  << "\n registered = " << bool(flags & Mem::Registered)
1675  << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1676  << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1677  << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1678  << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1679  << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1680  << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1681  << "\n alias = " << bool(flags & Mem::ALIAS)
1682  << std::endl;
1683 }
1684 
1685 void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr,
1686  bool alias)
1687 {
1688  if (!mm.exists) {return;}
1689  if (!alias)
1690  {
1691  auto it = maps->memories.find(h_ptr);
1692  MFEM_VERIFY(it != maps->memories.end(),
1693  "host pointer is not registered: h_ptr = " << h_ptr);
1694  MFEM_VERIFY(h_mt == it->second.h_mt, "host pointer MemoryType mismatch");
1695  }
1696  else
1697  {
1698  auto it = maps->aliases.find(h_ptr);
1699  MFEM_VERIFY(it != maps->aliases.end(),
1700  "alias pointer is not registered: h_ptr = " << h_ptr);
1701  MFEM_VERIFY(h_mt == it->second.h_mt, "alias pointer MemoryType mismatch");
1702  }
1703 }
1704 
1706 
1707 bool MemoryManager::exists = false;
1708 bool MemoryManager::configured = false;
1709 
1710 MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1711 MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1712 
1713 MemoryType MemoryManager::dual_map[MemoryTypeSize] =
1714 {
1715  /* HOST */ MemoryType::DEVICE,
1716  /* HOST_32 */ MemoryType::DEVICE,
1717  /* HOST_64 */ MemoryType::DEVICE,
1718  /* HOST_DEBUG */ MemoryType::DEVICE_DEBUG,
1719  /* HOST_UMPIRE */ MemoryType::DEVICE_UMPIRE,
1720  /* HOST_PINNED */ MemoryType::DEVICE,
1721  /* MANAGED */ MemoryType::MANAGED,
1722  /* DEVICE */ MemoryType::HOST,
1723  /* DEVICE_DEBUG */ MemoryType::HOST_DEBUG,
1724  /* DEVICE_UMPIRE */ MemoryType::HOST_UMPIRE,
1725  /* DEVICE_UMPIRE_2 */ MemoryType::HOST_UMPIRE
1726 };
1727 
1728 #ifdef MFEM_USE_UMPIRE
1729 const char * MemoryManager::h_umpire_name = "MFEM_HOST";
1730 const char * MemoryManager::d_umpire_name = "MFEM_DEVICE";
1731 const char * MemoryManager::d_umpire_2_name = "MFEM_DEVICE_2";
1732 #endif
1733 
1734 
1736 {
1737  "host-std", "host-32", "host-64", "host-debug", "host-umpire", "host-pinned",
1738 #if defined(MFEM_USE_CUDA)
1739  "cuda-uvm",
1740  "cuda",
1741 #elif defined(MFEM_USE_HIP)
1742  "hip-uvm",
1743  "hip",
1744 #else
1745  "managed",
1746  "device",
1747 #endif
1748  "device-debug",
1749 #if defined(MFEM_USE_CUDA)
1750  "cuda-umpire",
1751  "cuda-umpire-2",
1752 #elif defined(MFEM_USE_HIP)
1753  "hip-umpire",
1754  "hip-umpire-2",
1755 #else
1756  "device-umpire",
1757  "device-umpire-2",
1758 #endif
1759 };
1760 
1761 } // namespace mfem
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition: cuda.cpp:109
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition: cuda.cpp:79
Host memory; aligned at 64 bytes.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
Definition: mem_manager.hpp:85
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
Device memory; using CUDA or HIP *Malloc and *Free.
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: cuda.cpp:94
static const char * GetUmpireHostAllocatorName()
Get the host Umpire allocator name used with MemoryType::HOST_UMPIRE.
static MemoryType GetHostMemoryType()
Host pointer is valid.
Host memory; allocated from a "host-debug" pool.
MemoryManager mm
The (single) global memory manager object.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
Host memory: pinned (page-locked)
int PrintAliases(std::ostream &out=mfem::out)
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
bool MemoryClassContainsType(MemoryClass mc, MemoryType mt)
Return true iff the MemoryType mt is contained in the MemoryClass mc.
Definition: mem_manager.cpp:70
void * HipMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: hip.cpp:147
void * HipMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition: hip.cpp:94
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition: cuda.cpp:49
Host memory; aligned at 32 bytes.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
static MemoryType GetDualMemoryType(MemoryType mt)
Return the dual MemoryType of the given one, mt.
constexpr int DeviceMemoryType
Definition: mem_manager.hpp:63
static const char * GetUmpireDeviceAllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE.
void * HipMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: hip.cpp:64
constexpr int HostMemoryType
Definition: mem_manager.hpp:61
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:154
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
void * HipMemFree(void *dptr)
Frees device memory.
Definition: hip.cpp:79
Ownership flag for internal Memory data.
Device pointer is valid
void Destroy()
Free all the device memories.
int PrintPtrs(std::ostream &out=mfem::out)
The host pointer will be deleted by Delete()
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition: cuda.cpp:132
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
constexpr int MemoryTypeSize
Static casts to &#39;int&#39; and sizes of some useful memory types.
Definition: mem_manager.hpp:60
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: hip.cpp:155
static const char * GetUmpireDevice2AllocatorName()
Get the device Umpire allocator name used with MemoryType::DEVICE_UMPIRE_2.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: hip.cpp:34
static void SetDualMemoryType(MemoryType mt, MemoryType dual_mt)
Set the dual memory type of mt to be dual_mt.
void Init()
Initialize the memory manager.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31
string space
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
constexpr int HostMemoryTypeSize
Definition: mem_manager.hpp:62
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
Definition: mem_manager.hpp:88
Pointer is an alias.
double a
Definition: lissajous.cpp:41
Host memory; using new[] and delete[].
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition: hip.cpp:109
void PrintFlags() const
Print the internal flags.
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
Definition: mem_manager.cpp:55
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition: cuda.cpp:64
constexpr int DeviceMemoryTypeSize
Definition: mem_manager.hpp:64
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition: cuda.cpp:34
MemoryClass
Memory classes identify sets of memory types.
Definition: mem_manager.hpp:73
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition: cuda.cpp:155
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger. See also Memory<T>::PrintFlags().