17 #include <unordered_map>
28 #define mfem_memalign(p,a,s) posix_memalign(p,a,s)
29 #define mfem_aligned_free free
31 #define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
32 #define mfem_aligned_free _aligned_free
35 #ifdef MFEM_USE_UMPIRE
36 #include "umpire/Umpire.hpp"
39 #if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
40 #error "CUDA is not enabled in Umpire!"
43 #if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
44 #error "HIP is not enabled in Umpire!"
46 #endif // MFEM_USE_UMPIRE
61 MFEM_VERIFY(
false,
"");
81 MFEM_VERIFY(
false,
"");
96 MFEM_VERIFY(sync,
"");
113 return std::max(mc1, mc2);
139 mutable bool h_rw, d_rw;
141 h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
142 h_rw(true), d_rw(true) { }
149 const size_t offset, bytes;
155 typedef std::unordered_map<const void*, Memory> MemoryMap;
156 typedef std::unordered_map<const void*, Alias> AliasMap;
166 static internal::Maps *maps;
172 class HostMemorySpace
175 virtual ~HostMemorySpace() { }
176 virtual void Alloc(
void **ptr,
size_t bytes) { *ptr = std::malloc(bytes); }
177 virtual void Dealloc(
void *ptr) { std::free(ptr); }
178 virtual void Protect(
const Memory&,
size_t) { }
179 virtual void Unprotect(
const Memory&,
size_t) { }
180 virtual void AliasProtect(
const void*,
size_t) { }
181 virtual void AliasUnprotect(
const void*,
size_t) { }
185 class DeviceMemorySpace
188 virtual ~DeviceMemorySpace() { }
189 virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
190 virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
191 virtual void Protect(
const Memory&) { }
192 virtual void Unprotect(
const Memory&) { }
193 virtual void AliasProtect(
const void*,
size_t) { }
194 virtual void AliasUnprotect(
const void*,
size_t) { }
195 virtual void *HtoD(
void *dst,
const void *src,
size_t bytes)
196 {
return std::memcpy(dst, src, bytes); }
197 virtual void *DtoD(
void *dst,
const void *src,
size_t bytes)
198 {
return std::memcpy(dst, src, bytes); }
199 virtual void *DtoH(
void *dst,
const void *src,
size_t bytes)
200 {
return std::memcpy(dst, src, bytes); }
204 class StdHostMemorySpace :
public HostMemorySpace { };
207 struct NoHostMemorySpace :
public HostMemorySpace
209 void Alloc(
void**,
const size_t) {
mfem_error(
"! Host Alloc error"); }
213 class Aligned32HostMemorySpace :
public HostMemorySpace
216 Aligned32HostMemorySpace(): HostMemorySpace() { }
217 void Alloc(
void **ptr,
size_t bytes)
218 {
if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
219 void Dealloc(
void *ptr) { mfem_aligned_free(ptr); }
223 class Aligned64HostMemorySpace :
public HostMemorySpace
226 Aligned64HostMemorySpace(): HostMemorySpace() { }
227 void Alloc(
void **ptr,
size_t bytes)
228 {
if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
229 void Dealloc(
void *ptr) { mfem_aligned_free(ptr); }
233 static uintptr_t pagesize = 0;
234 static uintptr_t pagemask = 0;
237 inline const void *MmuAddrR(
const void *ptr)
239 const uintptr_t addr = (uintptr_t) ptr;
240 return (addr & pagemask) ? (
void*) ((addr + pagesize) & ~pagemask) : ptr;
244 inline const void *MmuAddrP(
const void *ptr)
246 const uintptr_t addr = (uintptr_t) ptr;
247 return (
void*) (addr & ~pagemask);
251 inline uintptr_t MmuLengthR(
const void *ptr,
const size_t bytes)
254 const uintptr_t
a = (uintptr_t) ptr;
255 const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
256 MFEM_ASSERT(a <= A,
"");
257 const uintptr_t
b = a + bytes;
258 const uintptr_t B = b & ~pagemask;
259 MFEM_ASSERT(B <= b,
"");
260 const uintptr_t length = B > A ? B - A : 0;
261 MFEM_ASSERT(length % pagesize == 0,
"");
266 inline uintptr_t MmuLengthP(
const void *ptr,
const size_t bytes)
269 const uintptr_t a = (uintptr_t) ptr;
270 const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
271 MFEM_ASSERT(A <= a,
"");
272 const uintptr_t
b = a + bytes;
273 const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
274 MFEM_ASSERT(b <= B,
"");
275 MFEM_ASSERT(B >= A,
"");
276 const uintptr_t length = B - A;
277 MFEM_ASSERT(length % pagesize == 0,
"");
282 static void MmuError(
int, siginfo_t *si,
void*)
286 const void *ptr = si->si_addr;
287 sprintf(str,
"Error while accessing address %p!", ptr);
288 mfem::out << std::endl <<
"An illegal memory access was made!";
293 static void MmuInit()
295 if (pagesize > 0) {
return; }
297 sa.sa_flags = SA_SIGINFO;
298 sigemptyset(&sa.sa_mask);
299 sa.sa_sigaction = MmuError;
300 if (sigaction(SIGBUS, &sa, NULL) == -1) {
mfem_error(
"SIGBUS"); }
301 if (sigaction(SIGSEGV, &sa, NULL) == -1) {
mfem_error(
"SIGSEGV"); }
302 pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
303 MFEM_ASSERT(pagesize > 0,
"pagesize must not be less than 1");
304 pagemask = pagesize - 1;
308 inline void MmuAlloc(
void **ptr,
const size_t bytes)
310 const size_t length = bytes == 0 ? 8 : bytes;
311 const int prot = PROT_READ | PROT_WRITE;
312 const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
313 *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
314 if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
318 inline void MmuDealloc(
void *ptr,
const size_t bytes)
320 const size_t length = bytes == 0 ? 8 : bytes;
321 if (::munmap(ptr, length) == -1) {
mfem_error(
"Dealloc error!"); }
325 inline void MmuProtect(
const void *ptr,
const size_t bytes)
327 if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) {
return; }
332 inline void MmuAllow(
const void *ptr,
const size_t bytes)
334 const int RW = PROT_READ | PROT_WRITE;
335 if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) {
return; }
339 inline void MmuInit() { }
340 inline void MmuAlloc(
void **ptr,
const size_t bytes) { *ptr = std::malloc(bytes); }
341 inline void MmuDealloc(
void *ptr,
const size_t) { std::free(ptr); }
342 inline void MmuProtect(
const void*,
const size_t) { }
343 inline void MmuAllow(
const void*,
const size_t) { }
344 inline const void *MmuAddrR(
const void *a) {
return a; }
345 inline const void *MmuAddrP(
const void *a) {
return a; }
346 inline uintptr_t MmuLengthR(
const void*,
const size_t) {
return 0; }
347 inline uintptr_t MmuLengthP(
const void*,
const size_t) {
return 0; }
351 class MmuHostMemorySpace :
public HostMemorySpace
354 MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
355 void Alloc(
void **ptr,
size_t bytes) { MmuAlloc(ptr, bytes); }
356 void Dealloc(
void *ptr) { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
357 void Protect(
const Memory& mem,
size_t bytes)
358 {
if (mem.h_rw) { mem.h_rw =
false; MmuProtect(mem.h_ptr, bytes); } }
359 void Unprotect(
const Memory &mem,
size_t bytes)
360 {
if (!mem.h_rw) { mem.h_rw =
true; MmuAllow(mem.h_ptr, bytes); } }
362 void AliasProtect(
const void *ptr,
size_t bytes)
363 { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
365 void AliasUnprotect(
const void *ptr,
size_t bytes)
366 { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
370 class UvmHostMemorySpace :
public HostMemorySpace
373 UvmHostMemorySpace(): HostMemorySpace() { }
374 void Alloc(
void **ptr,
size_t bytes) {
CuMallocManaged(ptr, bytes == 0 ? 8 : bytes); }
375 void Dealloc(
void *ptr) {
CuMemFree(ptr); }
379 class NoDeviceMemorySpace:
public DeviceMemorySpace
382 void Alloc(internal::Memory&) {
mfem_error(
"! Device Alloc"); }
383 void Dealloc(Memory&) {
mfem_error(
"! Device Dealloc"); }
384 void *HtoD(
void*,
const void*,
size_t) {
mfem_error(
"!HtoD");
return nullptr; }
385 void *DtoD(
void*,
const void*,
size_t) {
mfem_error(
"!DtoD");
return nullptr; }
386 void *DtoH(
void*,
const void*,
size_t) {
mfem_error(
"!DtoH");
return nullptr; }
390 class StdDeviceMemorySpace :
public DeviceMemorySpace { };
393 class CudaDeviceMemorySpace:
public DeviceMemorySpace
396 CudaDeviceMemorySpace(): DeviceMemorySpace() { }
397 void Alloc(Memory &base) {
CuMemAlloc(&base.d_ptr, base.bytes); }
398 void Dealloc(Memory &base) {
CuMemFree(base.d_ptr); }
399 void *HtoD(
void *dst,
const void *src,
size_t bytes)
401 void *DtoD(
void* dst,
const void* src,
size_t bytes)
403 void *DtoH(
void *dst,
const void *src,
size_t bytes)
408 class HipDeviceMemorySpace:
public DeviceMemorySpace
411 HipDeviceMemorySpace(): DeviceMemorySpace() { }
412 void Alloc(Memory &base) {
HipMemAlloc(&base.d_ptr, base.bytes); }
413 void Dealloc(Memory &base) {
HipMemFree(base.d_ptr); }
414 void *HtoD(
void *dst,
const void *src,
size_t bytes)
416 void *DtoD(
void* dst,
const void* src,
size_t bytes)
418 void *DtoH(
void *dst,
const void *src,
size_t bytes)
423 class UvmCudaMemorySpace :
public DeviceMemorySpace
426 void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
427 void Dealloc(Memory&) { }
428 void *HtoD(
void *dst,
const void *src,
size_t bytes)
430 if (dst == src) { MFEM_STREAM_SYNC;
return dst; }
433 void *DtoD(
void* dst,
const void* src,
size_t bytes)
435 void *DtoH(
void *dst,
const void *src,
size_t bytes)
437 if (dst == src) { MFEM_STREAM_SYNC;
return dst; }
443 class MmuDeviceMemorySpace :
public DeviceMemorySpace
446 MmuDeviceMemorySpace(): DeviceMemorySpace() { }
447 void Alloc(Memory &m) { MmuAlloc(&m.d_ptr, m.bytes); }
448 void Dealloc(Memory &m) { MmuDealloc(m.d_ptr, m.bytes); }
449 void Protect(
const Memory &m)
450 {
if (m.d_rw) { m.d_rw =
false; MmuProtect(m.d_ptr, m.bytes); } }
451 void Unprotect(
const Memory &m)
452 {
if (!m.d_rw) { m.d_rw =
true; MmuAllow(m.d_ptr, m.bytes); } }
454 void AliasProtect(
const void *ptr,
size_t bytes)
455 { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
457 void AliasUnprotect(
const void *ptr,
size_t bytes)
458 { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
459 void *HtoD(
void *dst,
const void *src,
size_t bytes)
460 {
return std::memcpy(dst, src, bytes); }
461 void *DtoD(
void *dst,
const void *src,
size_t bytes)
462 {
return std::memcpy(dst, src, bytes); }
463 void *DtoH(
void *dst,
const void *src,
size_t bytes)
464 {
return std::memcpy(dst, src, bytes); }
467 #ifndef MFEM_USE_UMPIRE
468 class UmpireHostMemorySpace :
public NoHostMemorySpace { };
469 class UmpireDeviceMemorySpace :
public NoDeviceMemorySpace { };
472 class UmpireHostMemorySpace :
public HostMemorySpace
476 umpire::ResourceManager &rm;
477 umpire::Allocator h_allocator;
478 umpire::strategy::AllocationStrategy *strat;
480 ~UmpireHostMemorySpace() { h_allocator.release(); }
481 UmpireHostMemorySpace():
483 name(
mm.GetUmpireAllocatorHostName()),
484 rm(umpire::ResourceManager::getInstance()),
485 h_allocator(rm.isAllocator(name)? rm.getAllocator(name):
486 rm.makeAllocator<umpire::strategy::DynamicPool>
487 (name, rm.getAllocator(
"HOST"))),
488 strat(h_allocator.getAllocationStrategy()) { }
489 void Alloc(
void **ptr,
size_t bytes) { *ptr = h_allocator.allocate(bytes); }
490 void Dealloc(
void *ptr) { h_allocator.deallocate(ptr); }
491 void Insert(
void *ptr,
size_t bytes)
492 { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
497 class UmpireDeviceMemorySpace :
public DeviceMemorySpace
501 umpire::ResourceManager &rm;
502 umpire::Allocator d_allocator;
504 ~UmpireDeviceMemorySpace() { d_allocator.release(); }
505 UmpireDeviceMemorySpace():
507 name(
mm.GetUmpireAllocatorDeviceName()),
508 rm(umpire::ResourceManager::getInstance()),
509 d_allocator(rm.isAllocator(name)? rm.getAllocator(name):
510 rm.makeAllocator<umpire::strategy::DynamicPool>
511 (name, rm.getAllocator(
"DEVICE"))) { }
512 void Alloc(Memory &base) { base.d_ptr = d_allocator.allocate(base.bytes); }
513 void Dealloc(Memory &base) { d_allocator.deallocate(base.d_ptr); }
514 void *HtoD(
void *dst,
const void *src,
size_t bytes)
524 void *DtoD(
void* dst,
const void* src,
size_t bytes)
534 void *DtoH(
void *dst,
const void *src,
size_t bytes)
546 class UmpireDeviceMemorySpace :
public NoDeviceMemorySpace { };
547 #endif // MFEM_USE_CUDA
548 #endif // MFEM_USE_UMPIRE
560 Ctrl(): host{
nullptr}, device{
nullptr} { }
566 mfem_error(
"Memory backends have already been configured!");
572 host[
static_cast<int>(
MT::HOST)] =
new StdHostMemorySpace();
573 host[
static_cast<int>(
MT::HOST_32)] =
new Aligned32HostMemorySpace();
574 host[
static_cast<int>(
MT::HOST_64)] =
new Aligned64HostMemorySpace();
578 host[
static_cast<int>(
MT::MANAGED)] =
new UvmHostMemorySpace();
582 device[
static_cast<int>(
MT::MANAGED)-shift] =
new UvmCudaMemorySpace();
591 const int mt_i =
static_cast<int>(mt);
593 if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
594 MFEM_ASSERT(host[mt_i],
"Host memory controller is not configured!");
598 DeviceMemorySpace* Device(
const MemoryType mt)
601 MFEM_ASSERT(mt_i >= 0,
"");
603 if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
604 MFEM_ASSERT(device[mt_i],
"Memory manager has not been configured!");
613 for (
int mt = mt_d; mt <
MemoryTypeSize; mt++) {
delete device[mt-mt_d]; }
617 HostMemorySpace* NewHostCtrl(
const MemoryType mt)
620 MFEM_ABORT(
"Unknown host memory controller!");
624 DeviceMemorySpace* NewDeviceCtrl(
const MemoryType mt)
632 #if defined(MFEM_USE_CUDA)
633 return new CudaDeviceMemorySpace();
634 #elif defined(MFEM_USE_HIP)
635 return new HipDeviceMemorySpace();
637 MFEM_ABORT(
"No device memory controller!");
641 default: MFEM_ABORT(
"Unknown device memory controller!");
649 static internal::Ctrl *ctrl;
651 void *MemoryManager::New_(
void *h_tmp,
size_t bytes,
MemoryType mt,
654 MFEM_ASSERT(exists,
"Internal error!");
657 const MemType dual_mt = GetDualMemoryType_(mt);
658 const MemType h_mt = is_host_mem ? mt : dual_mt;
659 const MemType d_mt = is_host_mem ? dual_mt : mt;
660 MFEM_VERIFY_TYPES(h_mt, d_mt);
662 if (h_tmp ==
nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
666 if (is_host_mem) {
mm.Insert(h_ptr, bytes, h_mt, d_mt); }
667 else {
mm.InsertDevice(
nullptr, h_ptr, bytes, h_mt, d_mt); }
668 CheckHostMemoryType_(h_mt, h_ptr);
672 void *MemoryManager::Register_(
void *ptr,
void *h_tmp,
size_t bytes,
674 bool own,
bool alias,
unsigned &flags)
676 MFEM_CONTRACT_VAR(alias);
677 MFEM_ASSERT(exists,
"Internal error!");
678 MFEM_ASSERT(!alias,
"Cannot register an alias!");
680 const MemType dual_mt = GetDualMemoryType_(mt);
681 const MemType h_mt = is_host_mem ? mt : dual_mt;
682 const MemType d_mt = is_host_mem ? dual_mt : mt;
683 MFEM_VERIFY_TYPES(h_mt, d_mt);
685 if (ptr ==
nullptr && h_tmp ==
nullptr)
687 MFEM_VERIFY(bytes == 0,
"internal error");
697 mm.Insert(h_ptr, bytes, h_mt, d_mt);
698 flags = (own ? flags |
Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
704 if (own && h_tmp ==
nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
705 mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
710 CheckHostMemoryType_(h_mt, h_ptr);
714 void MemoryManager::Alias_(
void *base_h_ptr,
size_t offset,
size_t bytes,
715 unsigned base_flags,
unsigned &flags)
717 mm.InsertAlias(base_h_ptr, (
char*)base_h_ptr + offset, bytes,
731 MFEM_ASSERT(!owns_device || owns_internal,
"invalid Memory state");
732 if (!
mm.exists || !registered) {
return mt; }
737 const MemoryType h_mt = maps->aliases.at(h_ptr).h_mt;
738 MFEM_ASSERT(mt == h_mt,
"");
739 mm.EraseAlias(h_ptr);
746 MFEM_ASSERT(!owns_internal ||
747 mt == maps->memories.at(h_ptr).h_mt,
"");
749 { ctrl->Host(h_mt)->Dealloc(h_ptr); }
750 if (owns_internal) {
mm.Erase(h_ptr, owns_device); }
756 bool MemoryManager::MemoryClassCheck_(
MemoryClass mc,
void *h_ptr,
762 MFEM_VERIFY(bytes == 0,
"Trying to access NULL with size " << bytes);
768 const bool check = known || ((flags &
Mem::ALIAS) && alias);
769 MFEM_VERIFY(check,
"");
770 const internal::Memory &mem =
772 *maps->aliases.at(h_ptr).mem : maps->memories.at(h_ptr);
807 size_t bytes,
unsigned &flags)
809 MemoryManager::CheckHostMemoryType_(h_mt, h_ptr);
810 if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,
""); }
811 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),
"");
816 if (flags & Mem::ALIAS)
817 {
return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
818 else {
return mm.GetHostPtr(h_ptr, bytes, copy); }
824 if (flags & Mem::ALIAS)
825 {
return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
826 else {
return mm.GetDevicePtr(h_ptr, bytes, copy); }
831 size_t bytes,
unsigned &flags)
833 CheckHostMemoryType_(h_mt, h_ptr);
834 if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,
""); }
835 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),
"");
840 if (flags & Mem::ALIAS)
841 {
return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
842 else {
return mm.GetHostPtr(h_ptr, bytes, copy); }
848 if (flags & Mem::ALIAS)
849 {
return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
850 else {
return mm.GetDevicePtr(h_ptr, bytes, copy); }
855 size_t bytes,
unsigned &flags)
857 CheckHostMemoryType_(h_mt, h_ptr);
858 if (bytes > 0) { MFEM_VERIFY(flags & Mem::REGISTERED,
""); }
859 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),
"");
863 if (flags & Mem::ALIAS)
864 {
return mm.GetAliasHostPtr(h_ptr, bytes,
false); }
865 else {
return mm.GetHostPtr(h_ptr, bytes,
false); }
870 if (flags & Mem::ALIAS)
871 {
return mm.GetAliasDevicePtr(h_ptr, bytes,
false); }
872 else {
return mm.GetDevicePtr(h_ptr, bytes,
false); }
877 void MemoryManager::SyncAlias_(
const void *base_h_ptr,
void *alias_h_ptr,
878 size_t alias_bytes,
unsigned base_flags,
879 unsigned &alias_flags)
883 MFEM_ASSERT(alias_flags & Mem::ALIAS,
"not an alias");
884 if ((base_flags &
Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
886 mm.GetAliasHostPtr(alias_h_ptr, alias_bytes,
true);
890 if (!(alias_flags & Mem::REGISTERED))
892 mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
894 ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
896 mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes,
true);
902 MemoryType MemoryManager::GetDeviceMemoryType_(
void *h_ptr)
909 internal::Memory &mem = maps->memories.at(h_ptr);
915 internal::Memory *mem = maps->aliases.at(h_ptr).mem;
919 MFEM_ABORT(
"internal error");
920 return MemoryManager::host_mem_type;
923 MemoryType MemoryManager::GetHostMemoryType_(
void *h_ptr)
925 if (!
mm.exists) {
return MemoryManager::host_mem_type; }
926 if (
mm.
IsKnown(h_ptr)) {
return maps->memories.at(h_ptr).h_mt; }
927 if (
mm.
IsAlias(h_ptr)) {
return maps->aliases.at(h_ptr).mem->h_mt; }
928 return MemoryManager::host_mem_type;
931 void MemoryManager::Copy_(
void *dst_h_ptr,
const void *src_h_ptr,
932 size_t bytes,
unsigned src_flags,
943 const bool dst_on_host =
945 (!(dst_flags & Mem::VALID_DEVICE) ||
948 dst_flags = dst_flags &
951 const bool src_on_host =
953 (!(src_flags & Mem::VALID_DEVICE) ||
956 const void *src_d_ptr =
959 mm.GetAliasDevicePtr(src_h_ptr, bytes,
false) :
960 mm.GetDevicePtr(src_h_ptr, bytes,
false));
966 if (dst_h_ptr != src_h_ptr && bytes != 0)
968 MFEM_ASSERT((
const char*)dst_h_ptr + bytes <= src_h_ptr ||
969 (
const char*)src_h_ptr + bytes <= dst_h_ptr,
971 std::memcpy(dst_h_ptr, src_h_ptr, bytes);
976 if (dst_h_ptr != src_d_ptr && bytes != 0)
978 internal::Memory &src_d_base = maps->memories.at(src_d_ptr);
980 ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
987 mm.GetAliasDevicePtr(dst_h_ptr, bytes,
false) :
988 mm.GetDevicePtr(dst_h_ptr, bytes,
false);
991 const bool known =
mm.
IsKnown(dst_h_ptr);
993 MFEM_VERIFY(alias||known,
"");
995 maps->memories.at(dst_h_ptr).d_mt :
996 maps->aliases.at(dst_h_ptr).mem->d_mt;
997 ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1001 if (dest_d_ptr != src_d_ptr && bytes != 0)
1003 const bool known =
mm.
IsKnown(dst_h_ptr);
1005 MFEM_VERIFY(alias||known,
"");
1007 maps->memories.at(dst_h_ptr).d_mt :
1008 maps->aliases.at(dst_h_ptr).mem->d_mt;
1009 ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1015 void MemoryManager::CopyToHost_(
void *dest_h_ptr,
const void *src_h_ptr,
1016 size_t bytes,
unsigned src_flags)
1021 if (dest_h_ptr != src_h_ptr && bytes != 0)
1023 MFEM_ASSERT((
char*)dest_h_ptr + bytes <= src_h_ptr ||
1024 (
const char*)src_h_ptr + bytes <= dest_h_ptr,
1026 std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1031 MFEM_ASSERT(IsKnown_(src_h_ptr),
"internal error");
1032 const void *src_d_ptr = (src_flags &
Mem::ALIAS) ?
1033 mm.GetAliasDevicePtr(src_h_ptr, bytes,
false) :
1034 mm.GetDevicePtr(src_h_ptr, bytes,
false);
1035 const internal::Memory &base = maps->memories.at(dest_h_ptr);
1037 ctrl->Device(d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1041 void MemoryManager::CopyFromHost_(
void *dest_h_ptr,
const void *src_h_ptr,
1042 size_t bytes,
unsigned &dest_flags)
1047 if (dest_h_ptr != src_h_ptr && bytes != 0)
1049 MFEM_ASSERT((
char*)dest_h_ptr + bytes <= src_h_ptr ||
1050 (
const char*)src_h_ptr + bytes <= dest_h_ptr,
1052 std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1057 void *dest_d_ptr = (dest_flags &
Mem::ALIAS) ?
1058 mm.GetAliasDevicePtr(dest_h_ptr, bytes,
false) :
1059 mm.GetDevicePtr(dest_h_ptr, bytes,
false);
1060 const internal::Memory &base = maps->memories.at(dest_h_ptr);
1062 ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1064 dest_flags = dest_flags &
1068 bool MemoryManager::IsKnown_(
const void *h_ptr)
1070 return maps->memories.find(h_ptr) != maps->memories.end();
1073 bool MemoryManager::IsAlias_(
const void *h_ptr)
1075 return maps->aliases.find(h_ptr) != maps->aliases.end();
1078 void MemoryManager::Insert(
void *h_ptr,
size_t bytes,
1083 MFEM_VERIFY(bytes == 0,
"Trying to add NULL with size " << bytes);
1086 MFEM_VERIFY_TYPES(h_mt, d_mt);
1090 maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1092 if (res.second ==
false)
1094 auto &m = res.first->second;
1095 MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt && m.d_mt == d_mt,
1096 "Address already present with different attributes!");
1101 void MemoryManager::InsertDevice(
void *d_ptr,
void *h_ptr,
size_t bytes,
1104 MFEM_VERIFY_TYPES(h_mt, d_mt);
1105 MFEM_ASSERT(h_ptr != NULL,
"internal error");
1106 Insert(h_ptr, bytes, h_mt, d_mt);
1107 internal::Memory &mem = maps->memories.at(h_ptr);
1108 if (d_ptr == NULL) { ctrl->Device(d_mt)->Alloc(mem); }
1109 else { mem.d_ptr = d_ptr; }
1112 void MemoryManager::InsertAlias(
const void *base_ptr,
void *alias_ptr,
1113 const size_t bytes,
const bool base_is_alias)
1115 size_t offset =
static_cast<size_t>(
static_cast<const char*
>(alias_ptr) -
1116 static_cast<const char*>(base_ptr));
1119 MFEM_VERIFY(offset == 0,
1120 "Trying to add alias to NULL at offset " << offset);
1125 const internal::Alias &alias = maps->aliases.at(base_ptr);
1126 MFEM_ASSERT(alias.mem,
"");
1127 base_ptr = alias.mem->h_ptr;
1128 offset += alias.offset;
1130 internal::Memory &mem = maps->memories.at(base_ptr);
1132 maps->aliases.emplace(alias_ptr,
1133 internal::Alias{&mem, offset, bytes, 1, mem.h_mt});
1134 if (res.second ==
false)
1136 if (res.first->second.mem != &mem || res.first->second.offset != offset)
1138 mfem_error(
"alias already exists with different base/offset!");
1142 res.first->second.counter++;
1147 void MemoryManager::Erase(
void *h_ptr,
bool free_dev_ptr)
1149 if (!h_ptr) {
return; }
1150 auto mem_map_iter = maps->memories.find(h_ptr);
1151 if (mem_map_iter == maps->memories.end()) {
mfem_error(
"Unknown pointer!"); }
1152 internal::Memory &mem = mem_map_iter->second;
1153 if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1154 maps->memories.erase(mem_map_iter);
1157 void MemoryManager::EraseAlias(
void *alias_ptr)
1159 if (!alias_ptr) {
return; }
1160 auto alias_map_iter = maps->aliases.find(alias_ptr);
1161 if (alias_map_iter == maps->aliases.end()) {
mfem_error(
"Unknown alias!"); }
1162 internal::Alias &alias = alias_map_iter->second;
1163 if (--alias.counter) {
return; }
1164 maps->aliases.erase(alias_map_iter);
1167 void *MemoryManager::GetDevicePtr(
const void *h_ptr,
size_t bytes,
1172 MFEM_VERIFY(bytes == 0,
"Trying to access NULL with size " << bytes);
1175 internal::Memory &mem = maps->memories.at(h_ptr);
1178 MFEM_VERIFY_TYPES(h_mt, d_mt);
1179 if (!mem.d_ptr) { ctrl->Device(d_mt)->Alloc(mem); }
1181 ctrl->Device(d_mt)->Unprotect(mem);
1184 MFEM_ASSERT(bytes <= mem.bytes,
"invalid copy size");
1185 ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes);
1187 ctrl->Host(h_mt)->Protect(mem, bytes);
1191 void *MemoryManager::GetAliasDevicePtr(
const void *alias_ptr,
size_t bytes,
1196 MFEM_VERIFY(bytes == 0,
"Trying to access NULL with size " << bytes);
1199 auto &alias_map = maps->aliases;
1200 auto alias_map_iter = alias_map.find(alias_ptr);
1201 if (alias_map_iter == alias_map.end()) {
mfem_error(
"alias not found"); }
1202 const internal::Alias &alias = alias_map_iter->second;
1203 const size_t offset = alias.offset;
1204 internal::Memory &mem = *alias.mem;
1207 MFEM_VERIFY_TYPES(h_mt, d_mt);
1208 if (!mem.d_ptr) { ctrl->Device(d_mt)->Alloc(mem); }
1209 void *alias_h_ptr =
static_cast<char*
>(mem.h_ptr) + offset;
1210 void *alias_d_ptr =
static_cast<char*
>(mem.d_ptr) + offset;
1211 MFEM_ASSERT(alias_h_ptr == alias_ptr,
"internal error");
1212 MFEM_ASSERT(bytes <= alias.bytes,
"internal error");
1214 ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes);
1215 ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1216 if (copy) { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1217 ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1221 void *MemoryManager::GetHostPtr(
const void *ptr,
size_t bytes,
bool copy)
1223 const internal::Memory &mem = maps->memories.at(ptr);
1224 MFEM_ASSERT(mem.h_ptr == ptr,
"internal error");
1225 MFEM_ASSERT(bytes <= mem.bytes,
"internal error")
1228 MFEM_VERIFY_TYPES(h_mt, d_mt);
1230 ctrl->Host(h_mt)->Unprotect(mem, bytes);
1231 if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1232 if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1233 if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1237 void *MemoryManager::GetAliasHostPtr(
const void *ptr,
size_t bytes,
1240 const internal::Alias &alias = maps->aliases.at(ptr);
1241 const internal::Memory *
const mem = alias.mem;
1244 MFEM_VERIFY_TYPES(h_mt, d_mt);
1245 void *alias_h_ptr =
static_cast<char*
>(mem->h_ptr) + alias.offset;
1246 void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1247 MFEM_ASSERT(alias_h_ptr == ptr,
"internal error");
1249 ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1250 if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1251 if (copy_data && mem->d_ptr)
1252 { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1253 if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1259 if (exists) {
return; }
1260 maps =
new internal::Maps();
1261 ctrl =
new internal::Ctrl();
1274 host_mem_type = host_mt;
1275 device_mem_type = device_mt;
1278 #ifdef MFEM_USE_UMPIRE
1282 h_umpire_name = h_name;
1283 d_umpire_name = d_name;
1289 MFEM_VERIFY(exists,
"MemoryManager has already been destroyed!");
1290 for (
auto& n : maps->memories)
1292 internal::Memory &mem = n.second;
1294 if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1295 if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1297 delete maps; maps =
nullptr;
1298 delete ctrl; ctrl =
nullptr;
1318 for (
const auto& n : maps->memories)
1320 const internal::Memory &mem = n.second;
1321 out <<
"\nkey " << n.first <<
", "
1322 <<
"h_ptr " << mem.h_ptr <<
", "
1323 <<
"d_ptr " << mem.d_ptr;
1326 if (maps->memories.size() > 0) { out << std::endl; }
1333 for (
const auto& n : maps->aliases)
1335 const internal::Alias &alias = n.second;
1336 out <<
"\nalias: key " << n.first <<
", "
1337 <<
"h_ptr " << alias.mem->h_ptr <<
", "
1338 <<
"offset " << alias.offset <<
", "
1339 <<
"bytes " << alias.bytes <<
", "
1340 <<
"counter " << alias.counter;
1343 if (maps->aliases.size() > 0) { out << std::endl; }
1347 int MemoryManager::CompareHostAndDevice_(
void *h_ptr,
size_t size,
1351 mm.GetAliasDevicePtr(h_ptr, size,
false) :
1352 mm.GetDevicePtr(h_ptr, size,
false);
1353 char *h_buf =
new char[size];
1355 int res = std::memcmp(h_ptr, h_buf, size);
1365 <<
"\n registered = " << bool(flags & Mem::REGISTERED)
1366 <<
"\n owns host = " << bool(flags & Mem::OWNS_HOST)
1367 <<
"\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1368 <<
"\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1369 <<
"\n valid host = " << bool(flags & Mem::VALID_HOST)
1370 <<
"\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1371 <<
"\n device flag = " << bool(flags & Mem::USE_DEVICE)
1372 <<
"\n alias = " << bool(flags & Mem::ALIAS)
1376 void MemoryManager::CheckHostMemoryType_(
MemoryType h_mt,
void *h_ptr)
1378 if (!
mm.exists) {
return;}
1381 if (known) { MFEM_VERIFY(h_mt == maps->memories.at(h_ptr).h_mt,
""); }
1382 if (alias) { MFEM_VERIFY(h_mt == maps->aliases.at(h_ptr).mem->h_mt,
""); }
1387 bool MemoryManager::exists =
false;
1389 #ifdef MFEM_USE_UMPIRE
1390 const char* MemoryManager::h_umpire_name =
"HOST";
1391 const char* MemoryManager::d_umpire_name =
"DEVICE";
1399 "host-std",
"host-32",
"host-64",
"host-debug",
"host-umpire",
1400 #if defined(MFEM_USE_CUDA)
1403 #elif defined(MFEM_USE_HIP)
1411 #if defined(MFEM_USE_CUDA)
1413 #elif defined(MFEM_USE_HIP)
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Host memory; aligned at 64 bytes.
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
Device memory; using CUDA or HIP *Malloc and *Free.
void PrintFlags() const
Print the internal flags.
Device memory; using Umpire.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
static MemoryType GetHostMemoryType()
Host memory; allocated from a "host-debug" pool.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
int PrintAliases(std::ostream &out=mfem::out)
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Host memory; aligned at 32 bytes.
void SetUmpireAllocatorNames(const char *h_name, const char *d_name)
Set the host and device UMpire allocator names.
constexpr int DeviceMemoryType
constexpr int HostMemoryType
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
static MemoryType GetDeviceMemoryType()
void * HipMemFree(void *dptr)
Frees device memory.
Ownership flag for internal Memory data.
void Destroy()
Free all the device memories.
int PrintPtrs(std::ostream &out=mfem::out)
The host pointer will be deleted by Delete()
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
constexpr int MemoryTypeSize
Static casts to 'int' and sizes of some useful memory types.
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
double p(const Vector &x, double t)
void Init()
Initialize the memory manager.
MemoryType
Memory types supported by MFEM.
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
constexpr int HostMemoryTypeSize
bool IsDeviceMemory(MemoryType mt)
MemoryManager mm
The (single) global memory manager object.
Host memory; using new[] and delete[].
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
void * HipMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
constexpr int DeviceMemoryTypeSize
Host memory; using Umpire.
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
MemoryClass
Memory classes identify sets of memory types.
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger. See also Memory<T>::PrintFlags().