MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
mem_manager.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "forall.hpp"
13#include "mem_manager.hpp"
14
15#include <list>
16#include <cstring> // std::memcpy, std::memcmp
17#include <unordered_map>
18#include <algorithm> // std::max
19#include <cstdint>
20
21// Uncomment to try _WIN32 platform
22//#define _WIN32
23//#define _aligned_malloc(s,a) malloc(s)
24
25#ifndef _WIN32
26#include <unistd.h>
27#include <signal.h>
28#include <sys/mman.h>
29#define mfem_memalign(p,a,s) posix_memalign(p,a,s)
30#define mfem_aligned_free free
31#else
32#define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
33#define mfem_aligned_free _aligned_free
34#endif
35
36#ifdef MFEM_USE_UMPIRE
37#include <umpire/Umpire.hpp>
38#include <umpire/strategy/QuickPool.hpp>
39
40// Make sure Umpire is build with CUDA support if MFEM is built with it.
41#if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
42#error "CUDA is not enabled in Umpire!"
43#endif
44// Make sure Umpire is build with HIP support if MFEM is built with it.
45#if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
46#error "HIP is not enabled in Umpire!"
47#endif
48#endif // MFEM_USE_UMPIRE
49
50#ifndef MAP_ANONYMOUS
51#define MAP_ANONYMOUS MAP_ANON
52#endif
53
54// Internal debug option, useful for tracking some memory manager operations.
55// #define MFEM_TRACK_MEM_MANAGER
56
57namespace mfem
58{
59
61{
62 switch (mc)
63 {
69 }
70 MFEM_VERIFY(false,"");
71 return MemoryType::HOST;
72}
73
74
76{
77 switch (mc)
78 {
79 case MemoryClass::HOST: return IsHostMemory(mt);
81 return (mt == MemoryType::HOST_32 ||
82 mt == MemoryType::HOST_64 ||
85 return (mt == MemoryType::HOST_64 ||
87 case MemoryClass::DEVICE: return IsDeviceMemory(mt);
89 return (mt == MemoryType::MANAGED);
90 }
91 MFEM_ABORT("invalid MemoryClass");
92 return false;
93}
94
95
96static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
97{
98 MFEM_VERIFY(IsHostMemory(h_mt), "h_mt = " << (int)h_mt);
99 MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == MemoryType::DEFAULT,
100 "d_mt = " << (int)d_mt);
101 // If h_mt == MemoryType::HOST_DEBUG, then d_mt == MemoryType::DEVICE_DEBUG
102 // or d_mt == MemoryType::DEFAULT
103 MFEM_VERIFY(h_mt != MemoryType::HOST_DEBUG ||
104 d_mt == MemoryType::DEVICE_DEBUG ||
105 d_mt == MemoryType::DEFAULT,
106 "d_mt = " << MemoryTypeName[(int)d_mt]);
107 // If d_mt == MemoryType::DEVICE_DEBUG, then h_mt != MemoryType::MANAGED
108 MFEM_VERIFY(d_mt != MemoryType::DEVICE_DEBUG ||
109 h_mt != MemoryType::MANAGED,
110 "h_mt = " << MemoryTypeName[(int)h_mt]);
111#if 0
112 const bool sync =
113 (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE) ||
116 (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE) ||
120 (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
121 (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
122 (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
123 (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE) ||
124 (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE) ||
126 MFEM_VERIFY(sync, "");
127#endif
128}
129
131{
132 // | HOST HOST_32 HOST_64 DEVICE MANAGED
133 // ---------+---------------------------------------------
134 // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
135 // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
136 // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
137 // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
138 // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
139
140 // Using the enumeration ordering:
141 // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
142 // the above table is simply: a*b = max(a,b).
143
144 return std::max(mc1, mc2);
145}
146
147
148// Instantiate Memory<T>::PrintFlags for T = int and T = real_t.
149template void Memory<int>::PrintFlags() const;
150template void Memory<real_t>::PrintFlags() const;
151
152// Instantiate Memory<T>::CompareHostAndDevice for T = int and T = real_t.
153template int Memory<int>::CompareHostAndDevice(int size) const;
154template int Memory<real_t>::CompareHostAndDevice(int size) const;
155
156
157namespace internal
158{
159
160/// Memory class that holds:
161/// - the host and the device pointer
162/// - the size in bytes of this memory region
163/// - the host and device type of this memory region
164struct Memory
165{
166 void *const h_ptr;
167 void *d_ptr;
168 const size_t bytes;
169 const MemoryType h_mt;
170 MemoryType d_mt;
171 mutable bool h_rw, d_rw;
172 Memory(void *p, size_t b, MemoryType h, MemoryType d):
173 h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
174 h_rw(true), d_rw(true) { }
175};
176
177/// Alias class that holds the base memory region and the offset
178struct Alias
179{
180 Memory *mem;
181 size_t offset;
182 size_t counter;
183 // 'h_mt' is already stored in 'mem', however, we use this field for type
184 // checking since the alias may be dangling, i.e. 'mem' may be invalid.
185 MemoryType h_mt;
186};
187
188/// Maps for the Memory and the Alias classes
189typedef std::unordered_map<const void*, Memory> MemoryMap;
190typedef std::unordered_map<const void*, Alias> AliasMap;
191
192struct Maps
193{
194 MemoryMap memories;
195 AliasMap aliases;
196};
197
198} // namespace mfem::internal
199
200static internal::Maps *maps;
201
202namespace internal
203{
204
205/// The host memory space base abstract class
206class HostMemorySpace
207{
208public:
209 virtual ~HostMemorySpace() { }
210 virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
211 virtual void Dealloc(void *ptr) { std::free(ptr); }
212 virtual void Protect(const Memory&, size_t) { }
213 virtual void Unprotect(const Memory&, size_t) { }
214 virtual void AliasProtect(const void*, size_t) { }
215 virtual void AliasUnprotect(const void*, size_t) { }
216};
217
218/// The device memory space base abstract class
219class DeviceMemorySpace
220{
221public:
222 virtual ~DeviceMemorySpace() { }
223 virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
224 virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
225 virtual void Protect(const Memory&) { }
226 virtual void Unprotect(const Memory&) { }
227 virtual void AliasProtect(const void*, size_t) { }
228 virtual void AliasUnprotect(const void*, size_t) { }
229 virtual void *HtoD(void *dst, const void *src, size_t bytes)
230 { return std::memcpy(dst, src, bytes); }
231 virtual void *DtoD(void *dst, const void *src, size_t bytes)
232 { return std::memcpy(dst, src, bytes); }
233 virtual void *DtoH(void *dst, const void *src, size_t bytes)
234 { return std::memcpy(dst, src, bytes); }
235};
236
237/// The default std:: host memory space
238class StdHostMemorySpace : public HostMemorySpace { };
239
240/// The No host memory space
241struct NoHostMemorySpace : public HostMemorySpace
242{
243 void Alloc(void**, const size_t) { mfem_error("! Host Alloc error"); }
244};
245
246/// The aligned 32 host memory space
247class Aligned32HostMemorySpace : public HostMemorySpace
248{
249public:
250 Aligned32HostMemorySpace(): HostMemorySpace() { }
251 void Alloc(void **ptr, size_t bytes)
252 { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
253 void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
254};
255
256/// The aligned 64 host memory space
257class Aligned64HostMemorySpace : public HostMemorySpace
258{
259public:
260 Aligned64HostMemorySpace(): HostMemorySpace() { }
261 void Alloc(void **ptr, size_t bytes)
262 { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
263 void Dealloc(void *ptr) { mfem_aligned_free(ptr); }
264};
265
266#ifndef _WIN32
267static uintptr_t pagesize = 0;
268static uintptr_t pagemask = 0;
269
270/// Returns the restricted base address of the DEBUG segment
271inline const void *MmuAddrR(const void *ptr)
272{
273 const uintptr_t addr = (uintptr_t) ptr;
274 return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
275}
276
277/// Returns the prolongated base address of the MMU segment
278inline const void *MmuAddrP(const void *ptr)
279{
280 const uintptr_t addr = (uintptr_t) ptr;
281 return (void*) (addr & ~pagemask);
282}
283
284/// Compute the restricted length for the MMU segment
285inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
286{
287 // a ---->A:| |:B<---- b
288 const uintptr_t a = (uintptr_t) ptr;
289 const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
290 MFEM_ASSERT(a <= A, "");
291 const uintptr_t b = a + bytes;
292 const uintptr_t B = b & ~pagemask;
293 MFEM_ASSERT(B <= b, "");
294 const uintptr_t length = B > A ? B - A : 0;
295 MFEM_ASSERT(length % pagesize == 0,"");
296 return length;
297}
298
299/// Compute the prolongated length for the MMU segment
300inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
301{
302 // |:A<----a | | b---->B:|
303 const uintptr_t a = (uintptr_t) ptr;
304 const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
305 MFEM_ASSERT(A <= a, "");
306 const uintptr_t b = a + bytes;
307 const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
308 MFEM_ASSERT(b <= B, "");
309 MFEM_ASSERT(B >= A,"");
310 const uintptr_t length = B - A;
311 MFEM_ASSERT(length % pagesize == 0,"");
312 return length;
313}
314
315/// The protected access error, used for the host
316static void MmuError(int, siginfo_t *si, void*)
317{
318 constexpr size_t buf_size = 64;
319 fflush(0);
320 char str[buf_size];
321 const void *ptr = si->si_addr;
322 snprintf(str, buf_size, "Error while accessing address %p!", ptr);
323 mfem::out << std::endl << "An illegal memory access was made!";
324 MFEM_ABORT(str);
325}
326
327/// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
328static void MmuInit()
329{
330 if (pagesize > 0) { return; }
331 struct sigaction sa;
332 sa.sa_flags = SA_SIGINFO;
333 sigemptyset(&sa.sa_mask);
334 sa.sa_sigaction = MmuError;
335 if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
336 if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
337 pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
338 MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
339 pagemask = pagesize - 1;
340}
341
342/// MMU allocation, through ::mmap
343inline void MmuAlloc(void **ptr, const size_t bytes)
344{
345 const size_t length = bytes == 0 ? 8 : bytes;
346 const int prot = PROT_READ | PROT_WRITE;
347 const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
348 *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
349 if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
350}
351
352/// MMU deallocation, through ::munmap
353inline void MmuDealloc(void *ptr, const size_t bytes)
354{
355 const size_t length = bytes == 0 ? 8 : bytes;
356 if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
357}
358
359/// MMU protection, through ::mprotect with no read/write accesses
360inline void MmuProtect(const void *ptr, const size_t bytes)
361{
362 static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
363 if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
364 if (mmu_protect_error) { mfem_error("MMU protection (NONE) error"); }
365}
366
367/// MMU un-protection, through ::mprotect with read/write accesses
368inline void MmuAllow(const void *ptr, const size_t bytes)
369{
370 const int RW = PROT_READ | PROT_WRITE;
371 static const bool mmu_protect_error = getenv("MFEM_MMU_PROTECT_ERROR");
372 if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
373 if (mmu_protect_error) { mfem_error("MMU protection (R/W) error"); }
374}
375#else
376inline void MmuInit() { }
377inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
378inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
379inline void MmuProtect(const void*, const size_t) { }
380inline void MmuAllow(const void*, const size_t) { }
381inline const void *MmuAddrR(const void *a) { return a; }
382inline const void *MmuAddrP(const void *a) { return a; }
383inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
384inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
385#endif
386
387/// The MMU host memory space
388class MmuHostMemorySpace : public HostMemorySpace
389{
390public:
391 MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
392 void Alloc(void **ptr, size_t bytes) { MmuAlloc(ptr, bytes); }
393 void Dealloc(void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
394 void Protect(const Memory& mem, size_t bytes)
395 { if (mem.h_rw) { mem.h_rw = false; MmuProtect(mem.h_ptr, bytes); } }
396 void Unprotect(const Memory &mem, size_t bytes)
397 { if (!mem.h_rw) { mem.h_rw = true; MmuAllow(mem.h_ptr, bytes); } }
398 /// Aliases need to be restricted during protection
399 void AliasProtect(const void *ptr, size_t bytes)
400 { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
401 /// Aliases need to be prolongated for un-protection
402 void AliasUnprotect(const void *ptr, size_t bytes)
403 { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
404};
405
406/// The UVM host memory space
407class UvmHostMemorySpace : public HostMemorySpace
408{
409public:
410 UvmHostMemorySpace(): HostMemorySpace() { }
411 void Alloc(void **ptr, size_t bytes) { CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
412 void Dealloc(void *ptr) { CuMemFree(ptr); }
413};
414
415/// The 'No' device memory space
416class NoDeviceMemorySpace: public DeviceMemorySpace
417{
418public:
419 void Alloc(internal::Memory&) { mfem_error("! Device Alloc"); }
420 void Dealloc(Memory&) { mfem_error("! Device Dealloc"); }
421 void *HtoD(void*, const void*, size_t) { mfem_error("!HtoD"); return nullptr; }
422 void *DtoD(void*, const void*, size_t) { mfem_error("!DtoD"); return nullptr; }
423 void *DtoH(void*, const void*, size_t) { mfem_error("!DtoH"); return nullptr; }
424};
425
426/// The std:: device memory space, used with the 'debug' device
427class StdDeviceMemorySpace : public DeviceMemorySpace { };
428
429/// The CUDA device memory space
430class CudaDeviceMemorySpace: public DeviceMemorySpace
431{
432public:
433 CudaDeviceMemorySpace(): DeviceMemorySpace() { }
434 void Alloc(Memory &base) { CuMemAlloc(&base.d_ptr, base.bytes); }
435 void Dealloc(Memory &base) { CuMemFree(base.d_ptr); }
436 void *HtoD(void *dst, const void *src, size_t bytes)
437 { return CuMemcpyHtoD(dst, src, bytes); }
438 void *DtoD(void* dst, const void* src, size_t bytes)
439 { return CuMemcpyDtoD(dst, src, bytes); }
440 void *DtoH(void *dst, const void *src, size_t bytes)
441 { return CuMemcpyDtoH(dst, src, bytes); }
442};
443
444/// The CUDA/HIP page-locked host memory space
445class HostPinnedMemorySpace: public HostMemorySpace
446{
447public:
448 HostPinnedMemorySpace(): HostMemorySpace() { }
449 void Alloc(void ** ptr, size_t bytes) override
450 {
451#ifdef MFEM_USE_CUDA
452 CuMemAllocHostPinned(ptr, bytes);
453#endif
454#ifdef MFEM_USE_HIP
455 HipMemAllocHostPinned(ptr, bytes);
456#endif
457 }
458 void Dealloc(void *ptr) override
459 {
460#ifdef MFEM_USE_CUDA
462#endif
463#ifdef MFEM_USE_HIP
465#endif
466 }
467};
468
469/// The HIP device memory space
470class HipDeviceMemorySpace: public DeviceMemorySpace
471{
472public:
473 HipDeviceMemorySpace(): DeviceMemorySpace() { }
474 void Alloc(Memory &base) { HipMemAlloc(&base.d_ptr, base.bytes); }
475 void Dealloc(Memory &base) { HipMemFree(base.d_ptr); }
476 void *HtoD(void *dst, const void *src, size_t bytes)
477 { return HipMemcpyHtoD(dst, src, bytes); }
478 void *DtoD(void* dst, const void* src, size_t bytes)
479 // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
480 // instead we use hipMemcpyAsync to get similar behavior.
481 // for more info see: https://github.com/mfem/mfem/pull/2780
482 { return HipMemcpyDtoDAsync(dst, src, bytes); }
483 void *DtoH(void *dst, const void *src, size_t bytes)
484 { return HipMemcpyDtoH(dst, src, bytes); }
485};
486
487/// The UVM device memory space.
488class UvmCudaMemorySpace : public DeviceMemorySpace
489{
490public:
491 void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
492 void Dealloc(Memory&) { }
493 void *HtoD(void *dst, const void *src, size_t bytes)
494 {
495 if (dst == src) { MFEM_STREAM_SYNC; return dst; }
496 return CuMemcpyHtoD(dst, src, bytes);
497 }
498 void *DtoD(void* dst, const void* src, size_t bytes)
499 { return CuMemcpyDtoD(dst, src, bytes); }
500 void *DtoH(void *dst, const void *src, size_t bytes)
501 {
502 if (dst == src) { MFEM_STREAM_SYNC; return dst; }
503 return CuMemcpyDtoH(dst, src, bytes);
504 }
505};
506
507/// The MMU device memory space
508class MmuDeviceMemorySpace : public DeviceMemorySpace
509{
510public:
511 MmuDeviceMemorySpace(): DeviceMemorySpace() { }
512 void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
513 void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
514 void Protect(const Memory &m)
515 { if (m.d_rw) { m.d_rw = false; MmuProtect(m.d_ptr, m.bytes); } }
516 void Unprotect(const Memory &m)
517 { if (!m.d_rw) { m.d_rw = true; MmuAllow(m.d_ptr, m.bytes); } }
518 /// Aliases need to be restricted during protection
519 void AliasProtect(const void *ptr, size_t bytes)
520 { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
521 /// Aliases need to be prolongated for un-protection
522 void AliasUnprotect(const void *ptr, size_t bytes)
523 { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
524 void *HtoD(void *dst, const void *src, size_t bytes)
525 { return std::memcpy(dst, src, bytes); }
526 void *DtoD(void *dst, const void *src, size_t bytes)
527 { return std::memcpy(dst, src, bytes); }
528 void *DtoH(void *dst, const void *src, size_t bytes)
529 { return std::memcpy(dst, src, bytes); }
530};
531
532#ifdef MFEM_USE_UMPIRE
533class UmpireMemorySpace
534{
535protected:
536 umpire::ResourceManager &rm;
537 umpire::Allocator allocator;
538 bool owns_allocator{false};
539
540public:
541 // TODO: this only releases unused memory
542 virtual ~UmpireMemorySpace() { if (owns_allocator) { allocator.release(); } }
543 UmpireMemorySpace(const char * name, const char * space)
544 : rm(umpire::ResourceManager::getInstance())
545 {
546 if (!rm.isAllocator(name))
547 {
548 allocator = rm.makeAllocator<umpire::strategy::QuickPool>(
549 name, rm.getAllocator(space));
550 owns_allocator = true;
551 }
552 else
553 {
554 allocator = rm.getAllocator(name);
555 owns_allocator = false;
556 }
557 }
558};
559
560/// The Umpire host memory space
561class UmpireHostMemorySpace : public HostMemorySpace, public UmpireMemorySpace
562{
563private:
564 umpire::strategy::AllocationStrategy *strat;
565public:
566 UmpireHostMemorySpace(const char * name)
567 : HostMemorySpace(),
568 UmpireMemorySpace(name, "HOST"),
569 strat(allocator.getAllocationStrategy()) {}
570 void Alloc(void **ptr, size_t bytes) override
571 { *ptr = allocator.allocate(bytes); }
572 void Dealloc(void *ptr) override { allocator.deallocate(ptr); }
573 void Insert(void *ptr, size_t bytes)
574 { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
575};
576
577/// The Umpire device memory space
578#if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP)
579class UmpireDeviceMemorySpace : public DeviceMemorySpace,
580 public UmpireMemorySpace
581{
582public:
583 UmpireDeviceMemorySpace(const char * name)
584 : DeviceMemorySpace(),
585 UmpireMemorySpace(name, "DEVICE") {}
586 void Alloc(Memory &base) override
587 { base.d_ptr = allocator.allocate(base.bytes); }
588 void Dealloc(Memory &base) override { rm.deallocate(base.d_ptr); }
589 void *HtoD(void *dst, const void *src, size_t bytes) override
590 {
591#ifdef MFEM_USE_CUDA
592 return CuMemcpyHtoD(dst, src, bytes);
593#endif
594#ifdef MFEM_USE_HIP
595 return HipMemcpyHtoD(dst, src, bytes);
596#endif
597 // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
598 }
599 void *DtoD(void* dst, const void* src, size_t bytes) override
600 {
601#ifdef MFEM_USE_CUDA
602 return CuMemcpyDtoD(dst, src, bytes);
603#endif
604#ifdef MFEM_USE_HIP
605 // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
606 // instead we use hipMemcpyAsync to get similar behavior.
607 // for more info see: https://github.com/mfem/mfem/pull/2780
608 return HipMemcpyDtoDAsync(dst, src, bytes);
609#endif
610 // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
611 }
612 void *DtoH(void *dst, const void *src, size_t bytes) override
613 {
614#ifdef MFEM_USE_CUDA
615 return CuMemcpyDtoH(dst, src, bytes);
616#endif
617#ifdef MFEM_USE_HIP
618 return HipMemcpyDtoH(dst, src, bytes);
619#endif
620 // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
621 }
622};
623#else
624class UmpireDeviceMemorySpace : public NoDeviceMemorySpace
625{
626public:
627 UmpireDeviceMemorySpace(const char * /*unused*/) {}
628};
629#endif // MFEM_USE_CUDA || MFEM_USE_HIP
630#endif // MFEM_USE_UMPIRE
631
632/// Memory space controller class
633class Ctrl
634{
635 typedef MemoryType MT;
636
637public:
638 HostMemorySpace *host[HostMemoryTypeSize];
639 DeviceMemorySpace *device[DeviceMemoryTypeSize];
640
641public:
642 Ctrl(): host{nullptr}, device{nullptr} { }
643
644 void Configure()
645 {
646 if (host[HostMemoryType])
647 {
648 mfem_error("Memory backends have already been configured!");
649 }
650
651 // Filling the host memory backends
652 // HOST, HOST_32 & HOST_64 are always ready
653 // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
654 host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
655 host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
656 host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
657 // HOST_DEBUG is delayed, as it reroutes signals
658 host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
659 host[static_cast<int>(MT::HOST_UMPIRE)] = nullptr;
660 host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
661
662 // Filling the device memory backends, shifting with the device size
663 constexpr int shift = DeviceMemoryType;
664 device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
665 // All other devices controllers are delayed
666 device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
667 device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
668 device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
669 device[static_cast<int>(MT::DEVICE_UMPIRE_2)-shift] = nullptr;
670 }
671
672 HostMemorySpace* Host(const MemoryType mt)
673 {
674 const int mt_i = static_cast<int>(mt);
675 // Delayed host controllers initialization
676 if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
677 MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
678 return host[mt_i];
679 }
680
681 DeviceMemorySpace* Device(const MemoryType mt)
682 {
683 const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
684 MFEM_ASSERT(mt_i >= 0,"");
685 // Lazy device controller initializations
686 if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
687 MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
688 return device[mt_i];
689 }
690
691 ~Ctrl()
692 {
693 constexpr int mt_h = HostMemoryType;
694 constexpr int mt_d = DeviceMemoryType;
695 for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
696 for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
697 }
698
699private:
700 HostMemorySpace* NewHostCtrl(const MemoryType mt)
701 {
702 switch (mt)
703 {
704 case MT::HOST_DEBUG: return new MmuHostMemorySpace();
705#ifdef MFEM_USE_UMPIRE
706 case MT::HOST_UMPIRE:
707 return new UmpireHostMemorySpace(
708 MemoryManager::GetUmpireHostAllocatorName());
709#else
710 case MT::HOST_UMPIRE: return new NoHostMemorySpace();
711#endif
712 case MT::HOST_PINNED: return new HostPinnedMemorySpace();
713 default: MFEM_ABORT("Unknown host memory controller!");
714 }
715 return nullptr;
716 }
717
718 DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
719 {
720 switch (mt)
721 {
722#ifdef MFEM_USE_UMPIRE
723 case MT::DEVICE_UMPIRE:
724 return new UmpireDeviceMemorySpace(
725 MemoryManager::GetUmpireDeviceAllocatorName());
726 case MT::DEVICE_UMPIRE_2:
727 return new UmpireDeviceMemorySpace(
728 MemoryManager::GetUmpireDevice2AllocatorName());
729#else
730 case MT::DEVICE_UMPIRE: return new NoDeviceMemorySpace();
731 case MT::DEVICE_UMPIRE_2: return new NoDeviceMemorySpace();
732#endif
733 case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
734 case MT::DEVICE:
735 {
736#if defined(MFEM_USE_CUDA)
737 return new CudaDeviceMemorySpace();
738#elif defined(MFEM_USE_HIP)
739 return new HipDeviceMemorySpace();
740#else
741 MFEM_ABORT("No device memory controller!");
742 break;
743#endif
744 }
745 default: MFEM_ABORT("Unknown device memory controller!");
746 }
747 return nullptr;
748 }
749};
750
751} // namespace mfem::internal
752
753static internal::Ctrl *ctrl;
754
755void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
756 unsigned &flags)
757{
758 MFEM_ASSERT(exists, "Internal error!");
759 if (IsHostMemory(mt))
760 {
761 MFEM_ASSERT(mt != MemoryType::HOST && h_tmp == nullptr,
762 "Internal error!");
763 // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
764 // evaluated at the time when the device pointer is allocated, see
765 // GetDevicePtr() and GetAliasDevicePtr()
766 const MemoryType d_mt = MemoryType::DEFAULT;
767 // We rely on the next call using lazy dev alloc
768 return New_(h_tmp, bytes, mt, d_mt, Mem::VALID_HOST, flags);
769 }
770 else
771 {
772 const MemoryType h_mt = GetDualMemoryType(mt);
773 return New_(h_tmp, bytes, h_mt, mt, Mem::VALID_DEVICE, flags);
774 }
775}
776
777void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType h_mt,
778 MemoryType d_mt, unsigned valid_flags,
779 unsigned &flags)
780{
781 MFEM_ASSERT(exists, "Internal error!");
782 MFEM_ASSERT(IsHostMemory(h_mt), "h_mt must be host type");
783 MFEM_ASSERT(IsDeviceMemory(d_mt) || d_mt == h_mt ||
784 d_mt == MemoryType::DEFAULT,
785 "d_mt must be device type, the same is h_mt, or DEFAULT");
786 MFEM_ASSERT((h_mt != MemoryType::HOST || h_tmp != nullptr) &&
787 (h_mt == MemoryType::HOST || h_tmp == nullptr),
788 "Internal error");
789 MFEM_ASSERT((valid_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) == 0,
790 "Internal error");
791 void *h_ptr;
792 if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
793 else { h_ptr = h_tmp; }
795 Mem::OWNS_DEVICE | valid_flags;
796 // The other New_() method relies on this lazy allocation behavior.
797 mm.Insert(h_ptr, bytes, h_mt, d_mt); // lazy dev alloc
798 // mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); // non-lazy dev alloc
799
800 // MFEM_VERIFY_TYPES(h_mt, mt); // done by mm.Insert() above
801 CheckHostMemoryType_(h_mt, h_ptr, false);
802
803 return h_ptr;
804}
805
806void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
807 MemoryType mt,
808 bool own, bool alias, unsigned &flags)
809{
810 MFEM_ASSERT(exists, "Internal error!");
811 const bool is_host_mem = IsHostMemory(mt);
812 const MemType h_mt = is_host_mem ? mt : GetDualMemoryType(mt);
813 const MemType d_mt = is_host_mem ? MemoryType::DEFAULT : mt;
814 // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
815 // evaluated at the time when the device pointer is allocated, see
816 // GetDevicePtr() and GetAliasDevicePtr()
817
818 MFEM_VERIFY_TYPES(h_mt, d_mt);
819
820 if (ptr == nullptr && h_tmp == nullptr)
821 {
822 MFEM_VERIFY(bytes == 0, "internal error");
823 return nullptr;
824 }
825
826 MFEM_VERIFY(!alias, "Cannot register an alias!");
827
829 void *h_ptr;
830
831 if (is_host_mem) // HOST TYPES + MANAGED
832 {
833 h_ptr = ptr;
834 mm.Insert(h_ptr, bytes, h_mt, d_mt);
835 flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
837 }
838 else // DEVICE TYPES
839 {
840 MFEM_VERIFY(ptr || bytes == 0,
841 "cannot register NULL device pointer with bytes = " << bytes);
842 if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
843 else { h_ptr = h_tmp; }
844 mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
845 flags = own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE;
847 }
848 CheckHostMemoryType_(h_mt, h_ptr, alias);
849 return h_ptr;
850}
851
852void MemoryManager::Register2_(void *h_ptr, void *d_ptr, size_t bytes,
853 MemoryType h_mt, MemoryType d_mt,
854 bool own, bool alias, unsigned &flags,
855 unsigned valid_flags)
856{
857 MFEM_CONTRACT_VAR(alias);
858 MFEM_ASSERT(exists, "Internal error!");
859 MFEM_ASSERT(!alias, "Cannot register an alias!");
860 MFEM_VERIFY_TYPES(h_mt, d_mt);
861
862 if (h_ptr == nullptr && d_ptr == nullptr)
863 {
864 MFEM_VERIFY(bytes == 0, "internal error");
865 return;
866 }
867
869
870 MFEM_VERIFY(d_ptr || bytes == 0,
871 "cannot register NULL device pointer with bytes = " << bytes);
872 mm.InsertDevice(d_ptr, h_ptr, bytes, h_mt, d_mt);
873 flags = (own ? flags | (Mem::OWNS_HOST | Mem::OWNS_DEVICE) :
874 flags & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE)) |
875 valid_flags;
876
877 CheckHostMemoryType_(h_mt, h_ptr, alias);
878}
879
880void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
881 unsigned base_flags, unsigned &flags)
882{
883 mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
884 base_flags & Mem::ALIAS);
885 flags = (base_flags | Mem::ALIAS) & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
886 if (base_h_ptr) { flags |= Mem::OWNS_INTERNAL; }
887}
888
889void MemoryManager::SetDeviceMemoryType_(void *h_ptr, unsigned flags,
890 MemoryType d_mt)
891{
892 MFEM_VERIFY(h_ptr, "cannot set the device memory type: Memory is empty!");
893 if (!(flags & Mem::ALIAS))
894 {
895 auto mem_iter = maps->memories.find(h_ptr);
896 MFEM_VERIFY(mem_iter != maps->memories.end(), "internal error");
897 internal::Memory &mem = mem_iter->second;
898 if (mem.d_mt == d_mt) { return; }
899 MFEM_VERIFY(mem.d_ptr == nullptr, "cannot set the device memory type:"
900 " device memory is allocated!");
901 mem.d_mt = d_mt;
902 }
903 else
904 {
905 auto alias_iter = maps->aliases.find(h_ptr);
906 MFEM_VERIFY(alias_iter != maps->aliases.end(), "internal error");
907 internal::Alias &alias = alias_iter->second;
908 internal::Memory &base_mem = *alias.mem;
909 if (base_mem.d_mt == d_mt) { return; }
910 MFEM_VERIFY(base_mem.d_ptr == nullptr,
911 "cannot set the device memory type:"
912 " alias' base device memory is allocated!");
913 base_mem.d_mt = d_mt;
914 }
915}
916
917void MemoryManager::Delete_(void *h_ptr, MemoryType h_mt, unsigned flags)
918{
919 const bool alias = flags & Mem::ALIAS;
920 const bool registered = flags & Mem::Registered;
921 const bool owns_host = flags & Mem::OWNS_HOST;
922 const bool owns_device = flags & Mem::OWNS_DEVICE;
923 const bool owns_internal = flags & Mem::OWNS_INTERNAL;
924 MFEM_ASSERT(IsHostMemory(h_mt), "invalid h_mt = " << (int)h_mt);
925 // MFEM_ASSERT(registered || IsHostMemory(h_mt),"");
926 MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
927 // If at least one of the 'own_*' flags is true then 'registered' must be
928 // true too. An acceptable exception is the special case when 'h_ptr' is
929 // NULL, and both 'own_device' and 'own_internal' are false -- this case is
930 // an exception only when 'own_host' is true and 'registered' is false.
931 MFEM_ASSERT(registered || !(owns_host || owns_device || owns_internal) ||
932 (!(owns_device || owns_internal) && h_ptr == nullptr),
933 "invalid Memory state");
934 if (!mm.exists || !registered) { return; }
935 if (alias)
936 {
937 if (owns_internal)
938 {
939 MFEM_ASSERT(mm.IsAlias(h_ptr), "");
940 MFEM_ASSERT(h_mt == maps->aliases.at(h_ptr).h_mt, "");
941 mm.EraseAlias(h_ptr);
942 }
943 }
944 else // Known
945 {
946 if (owns_host && (h_mt != MemoryType::HOST))
947 { ctrl->Host(h_mt)->Dealloc(h_ptr); }
948 if (owns_internal)
949 {
950 MFEM_ASSERT(mm.IsKnown(h_ptr), "");
951 MFEM_ASSERT(h_mt == maps->memories.at(h_ptr).h_mt, "");
952 mm.Erase(h_ptr, owns_device);
953 }
954 }
955}
956
957void MemoryManager::DeleteDevice_(void *h_ptr, unsigned & flags)
958{
959 const bool owns_device = flags & Mem::OWNS_DEVICE;
960 if (owns_device)
961 {
962 mm.EraseDevice(h_ptr);
963 flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
964 }
965}
966
967bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
968 MemoryType h_mt, size_t bytes,
969 unsigned flags)
970{
971 if (!h_ptr)
972 {
973 MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
974 return true;
975 }
976 MemoryType d_mt;
977 if (!(flags & Mem::ALIAS))
978 {
979 auto iter = maps->memories.find(h_ptr);
980 MFEM_VERIFY(iter != maps->memories.end(), "internal error");
981 d_mt = iter->second.d_mt;
982 }
983 else
984 {
985 auto iter = maps->aliases.find(h_ptr);
986 MFEM_VERIFY(iter != maps->aliases.end(), "internal error");
987 d_mt = iter->second.mem->d_mt;
988 }
989 if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
990 switch (mc)
991 {
993 {
994 MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
995 h_mt == MemoryType::HOST_64,"");
996 return true;
997 }
999 {
1000 MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
1001 return true;
1002 }
1004 {
1005 MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
1006 d_mt == MemoryType::DEVICE_DEBUG ||
1007 d_mt == MemoryType::DEVICE_UMPIRE ||
1009 d_mt == MemoryType::MANAGED,"");
1010 return true;
1011 }
1013 {
1014 MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
1015 d_mt == MemoryType::MANAGED),"");
1016 return true;
1017 }
1018 default: break;
1019 }
1020 return true;
1021}
1022
1023void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1024 size_t bytes, unsigned &flags)
1025{
1026 if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1027 if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1028 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1030 {
1031 const bool copy = !(flags & Mem::VALID_HOST);
1032 flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1033 if (flags & Mem::ALIAS)
1034 { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1035 else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1036 }
1037 else
1038 {
1039 const bool copy = !(flags & Mem::VALID_DEVICE);
1040 flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1041 if (flags & Mem::ALIAS)
1042 { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1043 else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1044 }
1045}
1046
1047const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1048 size_t bytes, unsigned &flags)
1049{
1050 if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1051 if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1052 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1054 {
1055 const bool copy = !(flags & Mem::VALID_HOST);
1056 flags |= Mem::VALID_HOST;
1057 if (flags & Mem::ALIAS)
1058 { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1059 else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1060 }
1061 else
1062 {
1063 const bool copy = !(flags & Mem::VALID_DEVICE);
1064 flags |= Mem::VALID_DEVICE;
1065 if (flags & Mem::ALIAS)
1066 { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1067 else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1068 }
1069}
1070
1071void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1072 size_t bytes, unsigned &flags)
1073{
1074 if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1075 if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1076 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1078 {
1079 flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1080 if (flags & Mem::ALIAS)
1081 { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
1082 else { return mm.GetHostPtr(h_ptr, bytes, false); }
1083 }
1084 else
1085 {
1086 flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1087 if (flags & Mem::ALIAS)
1088 { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
1089 else { return mm.GetDevicePtr(h_ptr, bytes, false); }
1090 }
1091}
1092
1093void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
1094 size_t alias_bytes, unsigned base_flags,
1095 unsigned &alias_flags)
1096{
1097 // This is called only when (base_flags & Mem::Registered) is true.
1098 // Note that (alias_flags & Registered) may not be true.
1099 MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
1100 if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
1101 {
1102 mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
1103 }
1104 if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
1105 {
1106 if (!(alias_flags & Mem::Registered))
1107 {
1108 mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
1109 alias_flags = (alias_flags | Mem::Registered | Mem::OWNS_INTERNAL) &
1111 }
1112 mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
1113 }
1114 alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
1115 (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
1116}
1117
1118MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr, bool alias)
1119{
1120 if (mm.exists)
1121 {
1122 if (!alias)
1123 {
1124 auto iter = maps->memories.find(h_ptr);
1125 MFEM_ASSERT(iter != maps->memories.end(), "internal error");
1126 return iter->second.d_mt;
1127 }
1128 // alias == true
1129 auto iter = maps->aliases.find(h_ptr);
1130 MFEM_ASSERT(iter != maps->aliases.end(), "internal error");
1131 return iter->second.mem->d_mt;
1132 }
1133 MFEM_ABORT("internal error");
1134 return MemoryManager::host_mem_type;
1135}
1136
1137MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
1138{
1139 if (!mm.exists) { return MemoryManager::host_mem_type; }
1140 if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
1141 if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).h_mt; }
1142 return MemoryManager::host_mem_type;
1143}
1144
1145void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
1146 size_t bytes, unsigned src_flags,
1147 unsigned &dst_flags)
1148{
1149 // Type of copy to use based on the src and dest validity flags:
1150 // | src
1151 // | h | d | hd
1152 // -----------+-----+-----+------
1153 // h | h2h d2h h2h
1154 // dest d | h2d d2d d2d
1155 // hd | h2h d2d d2d
1156
1157 MFEM_ASSERT(bytes != 0, "this method should not be called with bytes = 0");
1158 MFEM_ASSERT(dst_h_ptr != nullptr, "invalid dst_h_ptr = nullptr");
1159 MFEM_ASSERT(src_h_ptr != nullptr, "invalid src_h_ptr = nullptr");
1160
1161 const bool dst_on_host =
1162 (dst_flags & Mem::VALID_HOST) &&
1163 (!(dst_flags & Mem::VALID_DEVICE) ||
1164 ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
1165
1166 dst_flags = dst_flags &
1167 ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1168
1169 const bool src_on_host =
1170 (src_flags & Mem::VALID_HOST) &&
1171 (!(src_flags & Mem::VALID_DEVICE) ||
1172 ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
1173
1174 const void *src_d_ptr =
1175 src_on_host ? NULL :
1176 ((src_flags & Mem::ALIAS) ?
1177 mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1178 mm.GetDevicePtr(src_h_ptr, bytes, false));
1179
1180 if (dst_on_host)
1181 {
1182 if (src_on_host)
1183 {
1184 if (dst_h_ptr != src_h_ptr && bytes != 0)
1185 {
1186 MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
1187 (const char*)src_h_ptr + bytes <= dst_h_ptr,
1188 "data overlaps!");
1189 std::memcpy(dst_h_ptr, src_h_ptr, bytes);
1190 }
1191 }
1192 else
1193 {
1194 if (dst_h_ptr != src_d_ptr && bytes != 0)
1195 {
1196 internal::Memory &src_d_base = maps->memories.at(src_h_ptr);
1197 MemoryType src_d_mt = src_d_base.d_mt;
1198 ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
1199 }
1200 }
1201 }
1202 else
1203 {
1204 void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
1205 mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
1206 mm.GetDevicePtr(dst_h_ptr, bytes, false);
1207 if (src_on_host)
1208 {
1209 const bool known = mm.IsKnown(dst_h_ptr);
1210 const bool alias = dst_flags & Mem::ALIAS;
1211 MFEM_VERIFY(alias||known,"");
1212 const MemoryType d_mt = known ?
1213 maps->memories.at(dst_h_ptr).d_mt :
1214 maps->aliases.at(dst_h_ptr).mem->d_mt;
1215 ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1216 }
1217 else
1218 {
1219 if (dest_d_ptr != src_d_ptr && bytes != 0)
1220 {
1221 const bool known = mm.IsKnown(dst_h_ptr);
1222 const bool alias = dst_flags & Mem::ALIAS;
1223 MFEM_VERIFY(alias||known,"");
1224 const MemoryType d_mt = known ?
1225 maps->memories.at(dst_h_ptr).d_mt :
1226 maps->aliases.at(dst_h_ptr).mem->d_mt;
1227 ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1228 }
1229 }
1230 }
1231}
1232
1233void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1234 size_t bytes, unsigned src_flags)
1235{
1236 MFEM_ASSERT(bytes != 0, "this method should not be called with bytes = 0");
1237 MFEM_ASSERT(dest_h_ptr != nullptr, "invalid dest_h_ptr = nullptr");
1238 MFEM_ASSERT(src_h_ptr != nullptr, "invalid src_h_ptr = nullptr");
1239
1240 const bool src_on_host = src_flags & Mem::VALID_HOST;
1241 if (src_on_host)
1242 {
1243 if (dest_h_ptr != src_h_ptr && bytes != 0)
1244 {
1245 MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1246 (const char*)src_h_ptr + bytes <= dest_h_ptr,
1247 "data overlaps!");
1248 std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1249 }
1250 }
1251 else
1252 {
1253 MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1254 const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1255 mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1256 mm.GetDevicePtr(src_h_ptr, bytes, false);
1257 const internal::Memory &base = maps->memories.at(dest_h_ptr);
1258 const MemoryType d_mt = base.d_mt;
1259 ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1260 }
1261}
1262
1263void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1264 size_t bytes, unsigned &dest_flags)
1265{
1266 MFEM_ASSERT(bytes != 0, "this method should not be called with bytes = 0");
1267 MFEM_ASSERT(dest_h_ptr != nullptr, "invalid dest_h_ptr = nullptr");
1268 MFEM_ASSERT(src_h_ptr != nullptr, "invalid src_h_ptr = nullptr");
1269
1270 const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1271 if (dest_on_host)
1272 {
1273 if (dest_h_ptr != src_h_ptr && bytes != 0)
1274 {
1275 MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1276 (const char*)src_h_ptr + bytes <= dest_h_ptr,
1277 "data overlaps!");
1278 std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1279 }
1280 }
1281 else
1282 {
1283 void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1284 mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1285 mm.GetDevicePtr(dest_h_ptr, bytes, false);
1286 const internal::Memory &base = maps->memories.at(dest_h_ptr);
1287 const MemoryType d_mt = base.d_mt;
1288 ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1289 }
1290 dest_flags = dest_flags &
1291 ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1292}
1293
1294bool MemoryManager::IsKnown_(const void *h_ptr)
1295{
1296 return maps->memories.find(h_ptr) != maps->memories.end();
1297}
1298
1299bool MemoryManager::IsAlias_(const void *h_ptr)
1300{
1301 return maps->aliases.find(h_ptr) != maps->aliases.end();
1302}
1303
1304void MemoryManager::Insert(void *h_ptr, size_t bytes,
1305 MemoryType h_mt, MemoryType d_mt)
1306{
1307#ifdef MFEM_TRACK_MEM_MANAGER
1308 mfem::out << "[mfem memory manager]: registering h_ptr: " << h_ptr
1309 << ", bytes: " << bytes << std::endl;
1310#endif
1311 if (h_ptr == NULL)
1312 {
1313 MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1314 return;
1315 }
1316 MFEM_VERIFY_TYPES(h_mt, d_mt);
1317#ifdef MFEM_DEBUG
1318 auto res =
1319#endif
1320 maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1321#ifdef MFEM_DEBUG
1322 if (res.second == false)
1323 {
1324 auto &m = res.first->second;
1325 MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt &&
1326 (m.d_mt == d_mt || (d_mt == MemoryType::DEFAULT &&
1327 m.d_mt == GetDualMemoryType(h_mt))),
1328 "Address already present with different attributes!");
1329#ifdef MFEM_TRACK_MEM_MANAGER
1330 mfem::out << "[mfem memory manager]: repeated registration of h_ptr: "
1331 << h_ptr << std::endl;
1332#endif
1333 }
1334#endif
1335}
1336
1337void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1338 MemoryType h_mt, MemoryType d_mt)
1339{
1340 // MFEM_VERIFY_TYPES(h_mt, d_mt); // done by Insert() below
1341 MFEM_ASSERT(h_ptr != NULL, "internal error");
1342 Insert(h_ptr, bytes, h_mt, d_mt);
1343 internal::Memory &mem = maps->memories.at(h_ptr);
1344 if (d_ptr == NULL && bytes != 0) { ctrl->Device(d_mt)->Alloc(mem); }
1345 else { mem.d_ptr = d_ptr; }
1346}
1347
1348void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1349 const size_t bytes, const bool base_is_alias)
1350{
1351 size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1352 static_cast<const char*>(base_ptr));
1353#ifdef MFEM_TRACK_MEM_MANAGER
1354 mfem::out << "[mfem memory manager]: registering alias of base_ptr: "
1355 << base_ptr << ", offset: " << offset << ", bytes: " << bytes
1356 << ", base is alias: " << base_is_alias << std::endl;
1357#endif
1358 if (!base_ptr)
1359 {
1360 MFEM_VERIFY(offset == 0,
1361 "Trying to add alias to NULL at offset " << offset);
1362 return;
1363 }
1364 if (base_is_alias)
1365 {
1366 const internal::Alias &alias = maps->aliases.at(base_ptr);
1367 MFEM_ASSERT(alias.mem,"");
1368 base_ptr = alias.mem->h_ptr;
1369 offset += alias.offset;
1370#ifdef MFEM_TRACK_MEM_MANAGER
1371 mfem::out << "[mfem memory manager]: real base_ptr: " << base_ptr
1372 << std::endl;
1373#endif
1374 }
1375 internal::Memory &mem = maps->memories.at(base_ptr);
1376 MFEM_VERIFY(offset + bytes <= mem.bytes, "invalid alias");
1377 auto res =
1378 maps->aliases.emplace(alias_ptr,
1379 internal::Alias{&mem, offset, 1, mem.h_mt});
1380 if (res.second == false) // alias_ptr was already in the map
1381 {
1382 internal::Alias &alias = res.first->second;
1383 // Update the alias data in case the existing alias is dangling
1384 alias.mem = &mem;
1385 alias.offset = offset;
1386 alias.h_mt = mem.h_mt;
1387 alias.counter++;
1388 }
1389}
1390
1391void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1392{
1393#ifdef MFEM_TRACK_MEM_MANAGER
1394 mfem::out << "[mfem memory manager]: un-registering h_ptr: " << h_ptr
1395 << std::endl;
1396#endif
1397 if (!h_ptr) { return; }
1398 auto mem_map_iter = maps->memories.find(h_ptr);
1399 if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1400 internal::Memory &mem = mem_map_iter->second;
1401 if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1402 maps->memories.erase(mem_map_iter);
1403}
1404
1405void MemoryManager::EraseDevice(void *h_ptr)
1406{
1407 if (!h_ptr) { return; }
1408 auto mem_map_iter = maps->memories.find(h_ptr);
1409 if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1410 internal::Memory &mem = mem_map_iter->second;
1411 if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1412 mem.d_ptr = nullptr;
1413}
1414
1415void MemoryManager::EraseAlias(void *alias_ptr)
1416{
1417#ifdef MFEM_TRACK_MEM_MANAGER
1418 mfem::out << "[mfem memory manager]: un-registering alias_ptr: " << alias_ptr
1419 << std::endl;
1420#endif
1421 if (!alias_ptr) { return; }
1422 auto alias_map_iter = maps->aliases.find(alias_ptr);
1423 if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1424 internal::Alias &alias = alias_map_iter->second;
1425 if (--alias.counter) { return; }
1426 maps->aliases.erase(alias_map_iter);
1427}
1428
1429void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1430 bool copy_data)
1431{
1432 if (!h_ptr)
1433 {
1434 MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1435 return NULL;
1436 }
1437 internal::Memory &mem = maps->memories.at(h_ptr);
1438 const MemoryType &h_mt = mem.h_mt;
1439 MemoryType &d_mt = mem.d_mt;
1440 MFEM_VERIFY_TYPES(h_mt, d_mt);
1441 if (!mem.d_ptr)
1442 {
1443 if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1444 if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1445 }
1446 // Aliases might have done some protections
1447 if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1448 if (copy_data)
1449 {
1450 MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1451 if (bytes) { ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes); }
1452 }
1453 ctrl->Host(h_mt)->Protect(mem, bytes);
1454 return mem.d_ptr;
1455}
1456
1457void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1458 bool copy)
1459{
1460 if (!alias_ptr)
1461 {
1462 MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1463 return NULL;
1464 }
1465 auto &alias_map = maps->aliases;
1466 auto alias_map_iter = alias_map.find(alias_ptr);
1467 if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1468 const internal::Alias &alias = alias_map_iter->second;
1469 const size_t offset = alias.offset;
1470 internal::Memory &mem = *alias.mem;
1471 const MemoryType &h_mt = mem.h_mt;
1472 MemoryType &d_mt = mem.d_mt;
1473 MFEM_VERIFY_TYPES(h_mt, d_mt);
1474 if (!mem.d_ptr)
1475 {
1476 if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1477 if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1478 }
1479 void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1480 void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1481 MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1482 MFEM_ASSERT(offset + bytes <= mem.bytes, "internal error");
1483 mem.d_rw = mem.h_rw = false;
1484 if (mem.d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1485 ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1486 if (copy && mem.d_ptr)
1487 { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1488 ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1489 return alias_d_ptr;
1490}
1491
1492void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1493{
1494 const internal::Memory &mem = maps->memories.at(ptr);
1495 MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1496 MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1497 const MemoryType &h_mt = mem.h_mt;
1498 const MemoryType &d_mt = mem.d_mt;
1499 MFEM_VERIFY_TYPES(h_mt, d_mt);
1500 // Aliases might have done some protections
1501 ctrl->Host(h_mt)->Unprotect(mem, bytes);
1502 if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1503 if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1504 if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1505 return mem.h_ptr;
1506}
1507
1508void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1509 bool copy_data)
1510{
1511 const internal::Alias &alias = maps->aliases.at(ptr);
1512 const internal::Memory *const mem = alias.mem;
1513 const MemoryType &h_mt = mem->h_mt;
1514 const MemoryType &d_mt = mem->d_mt;
1515 MFEM_VERIFY_TYPES(h_mt, d_mt);
1516 void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1517 void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1518 MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1519 mem->h_rw = false;
1520 ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1521 if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1522 if (copy_data && mem->d_ptr)
1523 { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1524 if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1525 return alias_h_ptr;
1526}
1527
1529{
1530 if (exists) { return; }
1531 maps = new internal::Maps();
1532 ctrl = new internal::Ctrl();
1533 ctrl->Configure();
1534 exists = true;
1535}
1536
1538
1540
1542{
1543 MFEM_VERIFY(!configured, "changing the dual MemoryTypes is not allowed after"
1544 " MemoryManager configuration!");
1545 UpdateDualMemoryType(mt, dual_mt);
1546}
1547
1548void MemoryManager::UpdateDualMemoryType(MemoryType mt, MemoryType dual_mt)
1549{
1550 MFEM_VERIFY((int)mt < MemoryTypeSize,
1551 "invalid MemoryType, mt = " << (int)mt);
1552 MFEM_VERIFY((int)dual_mt < MemoryTypeSize,
1553 "invalid dual MemoryType, dual_mt = " << (int)dual_mt);
1554
1555 if ((IsHostMemory(mt) && IsDeviceMemory(dual_mt)) ||
1556 (IsDeviceMemory(mt) && IsHostMemory(dual_mt)))
1557 {
1558 dual_map[(int)mt] = dual_mt;
1559 }
1560 else
1561 {
1562 // mt + dual_mt is not a pair of host + device types: this is only allowed
1563 // when mt == dual_mt and mt is a host type; in this case we do not
1564 // actually update the dual
1565 MFEM_VERIFY(mt == dual_mt && IsHostMemory(mt),
1566 "invalid (mt, dual_mt) pair: ("
1567 << MemoryTypeName[(int)mt] << ", "
1568 << MemoryTypeName[(int)dual_mt] << ')');
1569 }
1570}
1571
1573 const MemoryType device_mt)
1574{
1575 MemoryManager::UpdateDualMemoryType(host_mt, device_mt);
1576 MemoryManager::UpdateDualMemoryType(device_mt, host_mt);
1577 if (device_mt == MemoryType::DEVICE_DEBUG)
1578 {
1579 for (int mt = (int)MemoryType::HOST; mt < (int)MemoryType::MANAGED; mt++)
1580 {
1581 MemoryManager::UpdateDualMemoryType(
1583 }
1584 }
1585 Init();
1586 host_mem_type = host_mt;
1587 device_mem_type = device_mt;
1588 configured = true;
1589}
1590
1592{
1593 MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1594#ifdef MFEM_TRACK_MEM_MANAGER
1595 size_t num_memories = maps->memories.size();
1596 size_t num_aliases = maps->aliases.size();
1597 if (num_memories != 0 || num_aliases != 0)
1598 {
1599 MFEM_WARNING("...\n\t number of registered pointers: " << num_memories
1600 << "\n\t number of registered aliases : " << num_aliases);
1601 }
1602#endif
1603 // Keep for debugging purposes:
1604#if 0
1605 mfem::out << "Destroying the MemoryManager ...\n"
1606 << "remaining registered pointers : "
1607 << maps->memories.size() << '\n'
1608 << "remaining registered aliases : "
1609 << maps->aliases.size() << '\n';
1610#endif
1611 for (auto& n : maps->memories)
1612 {
1613 internal::Memory &mem = n.second;
1614 bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1615 if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1616 if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1617 }
1618 delete maps; maps = nullptr;
1619 delete ctrl; ctrl = nullptr;
1620 host_mem_type = MemoryType::HOST;
1621 device_mem_type = MemoryType::HOST;
1622 exists = false;
1623 configured = false;
1624}
1625
1627{
1628 if (ptr != NULL)
1629 {
1630 if (!IsKnown(ptr))
1631 {
1632 mfem_error("Pointer is not registered!");
1633 }
1634 }
1635}
1636
1637int MemoryManager::PrintPtrs(std::ostream &os)
1638{
1639 int n_out = 0;
1640 for (const auto& n : maps->memories)
1641 {
1642 const internal::Memory &mem = n.second;
1643 os << "\nkey " << n.first << ", "
1644 << "h_ptr " << mem.h_ptr << ", "
1645 << "d_ptr " << mem.d_ptr;
1646 n_out++;
1647 }
1648 if (maps->memories.size() > 0) { os << std::endl; }
1649 return n_out;
1650}
1651
1652int MemoryManager::PrintAliases(std::ostream &os)
1653{
1654 int n_out = 0;
1655 for (const auto& n : maps->aliases)
1656 {
1657 const internal::Alias &alias = n.second;
1658 os << "\nalias: key " << n.first << ", "
1659 << "h_ptr " << alias.mem->h_ptr << ", "
1660 << "offset " << alias.offset << ", "
1661 << "counter " << alias.counter;
1662 n_out++;
1663 }
1664 if (maps->aliases.size() > 0) { os << std::endl; }
1665 return n_out;
1666}
1667
1668int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1669 unsigned flags)
1670{
1671 void *d_ptr = (flags & Mem::ALIAS) ?
1672 mm.GetAliasDevicePtr(h_ptr, size, false) :
1673 mm.GetDevicePtr(h_ptr, size, false);
1674 char *h_buf = new char[size];
1675#if defined(MFEM_USE_CUDA)
1676 CuMemcpyDtoH(h_buf, d_ptr, size);
1677#elif defined(MFEM_USE_HIP)
1678 HipMemcpyDtoH(h_buf, d_ptr, size);
1679#else
1680 std::memcpy(h_buf, d_ptr, size);
1681#endif
1682 int res = std::memcmp(h_ptr, h_buf, size);
1683 delete [] h_buf;
1684 return res;
1685}
1686
1687
1688void MemoryPrintFlags(unsigned flags)
1689{
1690 typedef Memory<int> Mem;
1691 mfem::out
1692 << "\n registered = " << bool(flags & Mem::Registered)
1693 << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1694 << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1695 << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1696 << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1697 << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1698 << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1699 << "\n alias = " << bool(flags & Mem::ALIAS)
1700 << std::endl;
1701}
1702
1703void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr,
1704 bool alias)
1705{
1706 if (!mm.exists) {return;}
1707 if (!alias)
1708 {
1709 auto it = maps->memories.find(h_ptr);
1710 MFEM_VERIFY(it != maps->memories.end(),
1711 "host pointer is not registered: h_ptr = " << h_ptr);
1712 MFEM_VERIFY(h_mt == it->second.h_mt, "host pointer MemoryType mismatch");
1713 }
1714 else
1715 {
1716 auto it = maps->aliases.find(h_ptr);
1717 MFEM_VERIFY(it != maps->aliases.end(),
1718 "alias pointer is not registered: h_ptr = " << h_ptr);
1719 MFEM_VERIFY(h_mt == it->second.h_mt, "alias pointer MemoryType mismatch");
1720 }
1721}
1722
1724
1725bool MemoryManager::exists = false;
1726bool MemoryManager::configured = false;
1727
1728MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1729MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1730
1731MemoryType MemoryManager::dual_map[MemoryTypeSize] =
1732{
1733 /* HOST */ MemoryType::DEVICE,
1734 /* HOST_32 */ MemoryType::DEVICE,
1735 /* HOST_64 */ MemoryType::DEVICE,
1736 /* HOST_DEBUG */ MemoryType::DEVICE_DEBUG,
1737 /* HOST_UMPIRE */ MemoryType::DEVICE_UMPIRE,
1738 /* HOST_PINNED */ MemoryType::DEVICE,
1739 /* MANAGED */ MemoryType::MANAGED,
1740 /* DEVICE */ MemoryType::HOST,
1741 /* DEVICE_DEBUG */ MemoryType::HOST_DEBUG,
1742 /* DEVICE_UMPIRE */ MemoryType::HOST_UMPIRE,
1743 /* DEVICE_UMPIRE_2 */ MemoryType::HOST_UMPIRE
1744};
1745
1746#ifdef MFEM_USE_UMPIRE
1747const char * MemoryManager::h_umpire_name = "MFEM_HOST";
1748const char * MemoryManager::d_umpire_name = "MFEM_DEVICE";
1749const char * MemoryManager::d_umpire_2_name = "MFEM_DEVICE_2";
1750#endif
1751
1752
1754{
1755 "host-std", "host-32", "host-64", "host-debug", "host-umpire", "host-pinned",
1756#if defined(MFEM_USE_CUDA)
1757 "cuda-uvm",
1758 "cuda",
1759#elif defined(MFEM_USE_HIP)
1760 "hip-uvm",
1761 "hip",
1762#else
1763 "managed",
1764 "device",
1765#endif
1766 "device-debug",
1767#if defined(MFEM_USE_CUDA)
1768 "cuda-umpire",
1769 "cuda-umpire-2",
1770#elif defined(MFEM_USE_HIP)
1771 "hip-umpire",
1772 "hip-umpire-2",
1773#else
1774 "device-umpire",
1775 "device-umpire-2",
1776#endif
1777};
1778
1779} // namespace mfem
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
int PrintPtrs(std::ostream &out=mfem::out)
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
static void SetDualMemoryType(MemoryType mt, MemoryType dual_mt)
Set the dual memory type of mt to be dual_mt.
int PrintAliases(std::ostream &out=mfem::out)
void Init()
Initialize the memory manager.
static MemoryType GetHostMemoryType()
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
static MemoryType GetDeviceMemoryType()
static MemoryType GetDualMemoryType(MemoryType mt)
Return the dual MemoryType of the given one, mt.
void Destroy()
Free all the device memories.
Class used by MFEM to store pointers to host and/or device memory.
void PrintFlags() const
Print the internal flags.
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
string space
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition cuda.cpp:34
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition cuda.cpp:79
void * HipMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition hip.cpp:147
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
constexpr int DeviceMemoryType
void * HipMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition hip.cpp:64
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
void mfem_error(const char *msg)
Definition error.cpp:154
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition cuda.cpp:49
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:155
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition cuda.cpp:64
MemoryClass
Memory classes identify sets of memory types.
@ HOST_32
Memory types: { HOST_32, HOST_64, HOST_DEBUG }.
@ HOST_64
Memory types: { HOST_64, HOST_DEBUG }.
@ MANAGED
Memory types: { MANAGED }.
constexpr int MemoryTypeSize
Static casts to 'int' and sizes of some useful memory types.
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition cuda.cpp:94
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:109
MemoryManager mm
The (single) global memory manager object.
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
constexpr int HostMemoryTypeSize
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition hip.cpp:109
void * HipMemFree(void *dptr)
Frees device memory.
Definition hip.cpp:79
constexpr int HostMemoryType
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition hip.cpp:155
void * HipMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition hip.cpp:94
constexpr int DeviceMemoryTypeSize
MemoryType
Memory types supported by MFEM.
@ HOST_32
Host memory; aligned at 32 bytes.
@ HOST_64
Host memory; aligned at 64 bytes.
@ HOST
Host memory; using new[] and delete[].
@ HOST_PINNED
Host memory: pinned (page-locked)
@ HOST_DEBUG
Host memory; allocated from a "host-debug" pool.
@ DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger....
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:132
bool MemoryClassContainsType(MemoryClass mc, MemoryType mt)
Return true iff the MemoryType mt is contained in the MemoryClass mc.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition hip.cpp:34
real_t p(const Vector &x, real_t t)