MFEM v4.8.0
Finite element discretization library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
mem_manager.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "forall.hpp"
13#include "mem_manager.hpp"
14
15#include <list>
16#include <cstring> // std::memcpy, std::memcmp
17#include <unordered_map>
18#include <algorithm> // std::max
19#include <cstdint>
20
21// Uncomment to try _WIN32 platform
22//#define _WIN32
23//#define _aligned_malloc(s,a) malloc(s)
24
25#ifndef _WIN32
26#include <unistd.h>
27#include <signal.h>
28#include <sys/mman.h>
29#define mfem_memalign(p,a,s) posix_memalign(p,a,s)
30#define mfem_aligned_free free
31#else
32#define mfem_memalign(p,a,s) (((*(p))=_aligned_malloc((s),(a))),*(p)?0:errno)
33#define mfem_aligned_free _aligned_free
34#endif
35
36#ifdef MFEM_USE_UMPIRE
37#include <umpire/Umpire.hpp>
38#include <umpire/strategy/QuickPool.hpp>
39
40// Make sure Umpire is build with CUDA support if MFEM is built with it.
41#if defined(MFEM_USE_CUDA) && !defined(UMPIRE_ENABLE_CUDA)
42#error "CUDA is not enabled in Umpire!"
43#endif
44// Make sure Umpire is build with HIP support if MFEM is built with it.
45#if defined(MFEM_USE_HIP) && !defined(UMPIRE_ENABLE_HIP)
46#error "HIP is not enabled in Umpire!"
47#endif
48#endif // MFEM_USE_UMPIRE
49
50#ifndef MAP_ANONYMOUS
51#define MAP_ANONYMOUS MAP_ANON
52#endif
53
54// Internal debug option, useful for tracking some memory manager operations.
55// #define MFEM_TRACK_MEM_MANAGER
56
57namespace mfem
58{
59
61{
62 switch (mc)
63 {
69 }
70 MFEM_VERIFY(false,"");
71 return MemoryType::HOST;
72}
73
74
76{
77 switch (mc)
78 {
79 case MemoryClass::HOST: return IsHostMemory(mt);
81 return (mt == MemoryType::HOST_32 ||
82 mt == MemoryType::HOST_64 ||
85 return (mt == MemoryType::HOST_64 ||
87 case MemoryClass::DEVICE: return IsDeviceMemory(mt);
89 return (mt == MemoryType::MANAGED);
90 }
91 MFEM_ABORT("invalid MemoryClass");
92 return false;
93}
94
95
96static void MFEM_VERIFY_TYPES(const MemoryType h_mt, const MemoryType d_mt)
97{
98 MFEM_VERIFY(IsHostMemory(h_mt), "h_mt = " << (int)h_mt);
99 MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == MemoryType::DEFAULT,
100 "d_mt = " << (int)d_mt);
101 // If h_mt == MemoryType::HOST_DEBUG, then d_mt == MemoryType::DEVICE_DEBUG
102 // or d_mt == MemoryType::DEFAULT
103 MFEM_VERIFY(h_mt != MemoryType::HOST_DEBUG ||
104 d_mt == MemoryType::DEVICE_DEBUG ||
105 d_mt == MemoryType::DEFAULT,
106 "d_mt = " << MemoryTypeName[(int)d_mt]);
107 // If d_mt == MemoryType::DEVICE_DEBUG, then h_mt != MemoryType::MANAGED
108 MFEM_VERIFY(d_mt != MemoryType::DEVICE_DEBUG ||
109 h_mt != MemoryType::MANAGED,
110 "h_mt = " << MemoryTypeName[(int)h_mt]);
111#if 0
112 const bool sync =
113 (h_mt == MemoryType::HOST_PINNED && d_mt == MemoryType::DEVICE) ||
116 (h_mt == MemoryType::HOST_UMPIRE && d_mt == MemoryType::DEVICE) ||
120 (h_mt == MemoryType::MANAGED && d_mt == MemoryType::MANAGED) ||
121 (h_mt == MemoryType::HOST_64 && d_mt == MemoryType::DEVICE) ||
122 (h_mt == MemoryType::HOST_32 && d_mt == MemoryType::DEVICE) ||
123 (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE) ||
124 (h_mt == MemoryType::HOST && d_mt == MemoryType::DEVICE_UMPIRE) ||
126 MFEM_VERIFY(sync, "");
127#endif
128}
129
131{
132 // | HOST HOST_32 HOST_64 DEVICE MANAGED
133 // ---------+---------------------------------------------
134 // HOST | HOST HOST_32 HOST_64 DEVICE MANAGED
135 // HOST_32 | HOST_32 HOST_32 HOST_64 DEVICE MANAGED
136 // HOST_64 | HOST_64 HOST_64 HOST_64 DEVICE MANAGED
137 // DEVICE | DEVICE DEVICE DEVICE DEVICE MANAGED
138 // MANAGED | MANAGED MANAGED MANAGED MANAGED MANAGED
139
140 // Using the enumeration ordering:
141 // HOST < HOST_32 < HOST_64 < DEVICE < MANAGED,
142 // the above table is simply: a*b = max(a,b).
143
144 return std::max(mc1, mc2);
145}
146
147
148// Instantiate Memory<T>::PrintFlags for T = int and T = real_t.
149template void Memory<int>::PrintFlags() const;
150template void Memory<real_t>::PrintFlags() const;
151
152// Instantiate Memory<T>::CompareHostAndDevice for T = int and T = real_t.
153template int Memory<int>::CompareHostAndDevice(int size) const;
154template int Memory<real_t>::CompareHostAndDevice(int size) const;
155
156
157namespace internal
158{
159
160/// Memory class that holds:
161/// - the host and the device pointer
162/// - the size in bytes of this memory region
163/// - the host and device type of this memory region
164struct Memory
165{
166 void *const h_ptr;
167 void *d_ptr;
168 const size_t bytes;
169 const MemoryType h_mt;
170 MemoryType d_mt;
171 mutable bool h_rw, d_rw;
172 Memory(void *p, size_t b, MemoryType h, MemoryType d):
173 h_ptr(p), d_ptr(nullptr), bytes(b), h_mt(h), d_mt(d),
174 h_rw(true), d_rw(true) { }
175};
176
177/// Alias class that holds the base memory region and the offset
178struct Alias
179{
180 Memory *mem;
181 size_t offset;
182 size_t counter;
183 // 'h_mt' is already stored in 'mem', however, we use this field for type
184 // checking since the alias may be dangling, i.e. 'mem' may be invalid.
185 MemoryType h_mt;
186};
187
188/// Maps for the Memory and the Alias classes
189typedef std::unordered_map<const void*, Memory> MemoryMap;
190typedef std::unordered_map<const void*, Alias> AliasMap;
191
192struct Maps
193{
194 MemoryMap memories;
195 AliasMap aliases;
196};
197
198} // namespace mfem::internal
199
200static internal::Maps *maps;
201
202namespace internal
203{
204
205/// The host memory space base abstract class
206class HostMemorySpace
207{
208public:
209 virtual ~HostMemorySpace() { }
210 virtual void Alloc(void **ptr, size_t bytes) { *ptr = std::malloc(bytes); }
211 virtual void Dealloc(void *ptr) { std::free(ptr); }
212 virtual void Protect(const Memory&, size_t) { }
213 virtual void Unprotect(const Memory&, size_t) { }
214 virtual void AliasProtect(const void*, size_t) { }
215 virtual void AliasUnprotect(const void*, size_t) { }
216};
217
218/// The device memory space base abstract class
219class DeviceMemorySpace
220{
221public:
222 virtual ~DeviceMemorySpace() { }
223 virtual void Alloc(Memory &base) { base.d_ptr = std::malloc(base.bytes); }
224 virtual void Dealloc(Memory &base) { std::free(base.d_ptr); }
225 virtual void Protect(const Memory&) { }
226 virtual void Unprotect(const Memory&) { }
227 virtual void AliasProtect(const void*, size_t) { }
228 virtual void AliasUnprotect(const void*, size_t) { }
229 virtual void *HtoD(void *dst, const void *src, size_t bytes)
230 { return std::memcpy(dst, src, bytes); }
231 virtual void *DtoD(void *dst, const void *src, size_t bytes)
232 { return std::memcpy(dst, src, bytes); }
233 virtual void *DtoH(void *dst, const void *src, size_t bytes)
234 { return std::memcpy(dst, src, bytes); }
235};
236
237/// The default std:: host memory space
238class StdHostMemorySpace : public HostMemorySpace { };
239
240/// The No host memory space
241struct NoHostMemorySpace : public HostMemorySpace
242{
243 void Alloc(void**, const size_t) override { mfem_error("! Host Alloc error"); }
244};
245
246/// The aligned 32 host memory space
247class Aligned32HostMemorySpace : public HostMemorySpace
248{
249public:
250 Aligned32HostMemorySpace(): HostMemorySpace() { }
251 void Alloc(void **ptr, size_t bytes) override
252 { if (mfem_memalign(ptr, 32, bytes) != 0) { throw ::std::bad_alloc(); } }
253 void Dealloc(void *ptr) override { mfem_aligned_free(ptr); }
254};
255
256/// The aligned 64 host memory space
257class Aligned64HostMemorySpace : public HostMemorySpace
258{
259public:
260 Aligned64HostMemorySpace(): HostMemorySpace() { }
261 void Alloc(void **ptr, size_t bytes) override
262 { if (mfem_memalign(ptr, 64, bytes) != 0) { throw ::std::bad_alloc(); } }
263 void Dealloc(void *ptr) override { mfem_aligned_free(ptr); }
264};
265
266#ifndef _WIN32
267static uintptr_t pagesize = 0;
268static uintptr_t pagemask = 0;
269
270/// Returns the restricted base address of the DEBUG segment
271inline const void *MmuAddrR(const void *ptr)
272{
273 const uintptr_t addr = (uintptr_t) ptr;
274 return (addr & pagemask) ? (void*) ((addr + pagesize) & ~pagemask) : ptr;
275}
276
277/// Returns the prolongated base address of the MMU segment
278inline const void *MmuAddrP(const void *ptr)
279{
280 const uintptr_t addr = (uintptr_t) ptr;
281 return (void*) (addr & ~pagemask);
282}
283
284/// Compute the restricted length for the MMU segment
285inline uintptr_t MmuLengthR(const void *ptr, const size_t bytes)
286{
287 // a ---->A:| |:B<---- b
288 const uintptr_t a = (uintptr_t) ptr;
289 const uintptr_t A = (uintptr_t) MmuAddrR(ptr);
290 MFEM_ASSERT(a <= A, "");
291 const uintptr_t b = a + bytes;
292 const uintptr_t B = b & ~pagemask;
293 MFEM_ASSERT(B <= b, "");
294 const uintptr_t length = B > A ? B - A : 0;
295 MFEM_ASSERT(length % pagesize == 0,"");
296 return length;
297}
298
299/// Compute the prolongated length for the MMU segment
300inline uintptr_t MmuLengthP(const void *ptr, const size_t bytes)
301{
302 // |:A<----a | | b---->B:|
303 const uintptr_t a = (uintptr_t) ptr;
304 const uintptr_t A = (uintptr_t) MmuAddrP(ptr);
305 MFEM_ASSERT(A <= a, "");
306 const uintptr_t b = a + bytes;
307 const uintptr_t B = b & pagemask ? (b + pagesize) & ~pagemask : b;
308 MFEM_ASSERT(b <= B, "");
309 MFEM_ASSERT(B >= A,"");
310 const uintptr_t length = B - A;
311 MFEM_ASSERT(length % pagesize == 0,"");
312 return length;
313}
314
315/// The protected access error, used for the host
316static void MmuError(int, siginfo_t *si, void*)
317{
318 constexpr size_t buf_size = 64;
319 fflush(0);
320 char str[buf_size];
321 const void *ptr = si->si_addr;
322 snprintf(str, buf_size, "Error while accessing address %p!", ptr);
323 mfem::out << std::endl << "An illegal memory access was made!";
324 MFEM_ABORT(str);
325}
326
327/// MMU initialization, setting SIGBUS & SIGSEGV signals to MmuError
328static void MmuInit()
329{
330 if (pagesize > 0) { return; }
331 struct sigaction sa;
332 sa.sa_flags = SA_SIGINFO;
333 sigemptyset(&sa.sa_mask);
334 sa.sa_sigaction = MmuError;
335 if (sigaction(SIGBUS, &sa, NULL) == -1) { mfem_error("SIGBUS"); }
336 if (sigaction(SIGSEGV, &sa, NULL) == -1) { mfem_error("SIGSEGV"); }
337 pagesize = (uintptr_t) sysconf(_SC_PAGE_SIZE);
338 MFEM_ASSERT(pagesize > 0, "pagesize must not be less than 1");
339 pagemask = pagesize - 1;
340}
341
342/// MMU allocation, through ::mmap
343inline void MmuAlloc(void **ptr, const size_t bytes)
344{
345 const size_t length = bytes == 0 ? 8 : bytes;
346 const int prot = PROT_READ | PROT_WRITE;
347 const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
348 *ptr = ::mmap(NULL, length, prot, flags, -1, 0);
349 if (*ptr == MAP_FAILED) { throw ::std::bad_alloc(); }
350}
351
352/// MMU deallocation, through ::munmap
353inline void MmuDealloc(void *ptr, const size_t bytes)
354{
355 const size_t length = bytes == 0 ? 8 : bytes;
356 if (::munmap(ptr, length) == -1) { mfem_error("Dealloc error!"); }
357}
358
359/// MMU protection, through ::mprotect with no read/write accesses
360inline void MmuProtect(const void *ptr, const size_t bytes)
361{
362 static const bool mmu_protect_error = GetEnv("MFEM_MMU_PROTECT_ERROR");
363 if (!::mprotect(const_cast<void*>(ptr), bytes, PROT_NONE)) { return; }
364 if (mmu_protect_error) { mfem_error("MMU protection (NONE) error"); }
365}
366
367/// MMU un-protection, through ::mprotect with read/write accesses
368inline void MmuAllow(const void *ptr, const size_t bytes)
369{
370 const int RW = PROT_READ | PROT_WRITE;
371 static const bool mmu_protect_error = GetEnv("MFEM_MMU_PROTECT_ERROR");
372 if (!::mprotect(const_cast<void*>(ptr), bytes, RW)) { return; }
373 if (mmu_protect_error) { mfem_error("MMU protection (R/W) error"); }
374}
375#else
376inline void MmuInit() { }
377inline void MmuAlloc(void **ptr, const size_t bytes) { *ptr = std::malloc(bytes); }
378inline void MmuDealloc(void *ptr, const size_t) { std::free(ptr); }
379inline void MmuProtect(const void*, const size_t) { }
380inline void MmuAllow(const void*, const size_t) { }
381inline const void *MmuAddrR(const void *a) { return a; }
382inline const void *MmuAddrP(const void *a) { return a; }
383inline uintptr_t MmuLengthR(const void*, const size_t) { return 0; }
384inline uintptr_t MmuLengthP(const void*, const size_t) { return 0; }
385#endif
386
387/// The MMU host memory space
388class MmuHostMemorySpace : public HostMemorySpace
389{
390public:
391 MmuHostMemorySpace(): HostMemorySpace() { MmuInit(); }
392 void Alloc(void **ptr, size_t bytes) override { MmuAlloc(ptr, bytes); }
393 void Dealloc(void *ptr) override { MmuDealloc(ptr, maps->memories.at(ptr).bytes); }
394 void Protect(const Memory& mem, size_t bytes) override
395 { if (mem.h_rw) { mem.h_rw = false; MmuProtect(mem.h_ptr, bytes); } }
396 void Unprotect(const Memory &mem, size_t bytes) override
397 { if (!mem.h_rw) { mem.h_rw = true; MmuAllow(mem.h_ptr, bytes); } }
398 /// Aliases need to be restricted during protection
399 void AliasProtect(const void *ptr, size_t bytes) override
400 { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
401 /// Aliases need to be prolongated for un-protection
402 void AliasUnprotect(const void *ptr, size_t bytes) override
403 { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
404};
405
406/// The UVM host memory space
407class UvmHostMemorySpace : public HostMemorySpace
408{
409public:
410 UvmHostMemorySpace(): HostMemorySpace() { }
411
412 void Alloc(void **ptr, size_t bytes) override
413 {
414#ifdef MFEM_USE_CUDA
415 CuMallocManaged(ptr, bytes == 0 ? 8 : bytes);
416#endif
417#ifdef MFEM_USE_HIP
418 HipMallocManaged(ptr, bytes == 0 ? 8 : bytes);
419#endif
420 }
421
422 void Dealloc(void *ptr) override
423 {
424#ifdef MFEM_USE_CUDA
425 CuMemFree(ptr);
426#endif
427#ifdef MFEM_USE_HIP
428 HipMemFree(ptr);
429#endif
430 }
431};
432
433/// The 'No' device memory space
434class NoDeviceMemorySpace: public DeviceMemorySpace
435{
436public:
437 void Alloc(internal::Memory&) override { mfem_error("! Device Alloc"); }
438 void Dealloc(Memory&) override { mfem_error("! Device Dealloc"); }
439 void *HtoD(void*, const void*, size_t) override { mfem_error("!HtoD"); return nullptr; }
440 void *DtoD(void*, const void*, size_t) override { mfem_error("!DtoD"); return nullptr; }
441 void *DtoH(void*, const void*, size_t) override { mfem_error("!DtoH"); return nullptr; }
442};
443
444/// The std:: device memory space, used with the 'debug' device
445class StdDeviceMemorySpace : public DeviceMemorySpace { };
446
447/// The CUDA device memory space
448class CudaDeviceMemorySpace: public DeviceMemorySpace
449{
450public:
451 CudaDeviceMemorySpace(): DeviceMemorySpace() { }
452 void Alloc(Memory &base) override { CuMemAlloc(&base.d_ptr, base.bytes); }
453 void Dealloc(Memory &base) override { CuMemFree(base.d_ptr); }
454 void *HtoD(void *dst, const void *src, size_t bytes) override
455 { return CuMemcpyHtoD(dst, src, bytes); }
456 void *DtoD(void* dst, const void* src, size_t bytes) override
457 { return CuMemcpyDtoD(dst, src, bytes); }
458 void *DtoH(void *dst, const void *src, size_t bytes) override
459 { return CuMemcpyDtoH(dst, src, bytes); }
460};
461
462/// The CUDA/HIP page-locked host memory space
463class HostPinnedMemorySpace: public HostMemorySpace
464{
465public:
466 HostPinnedMemorySpace(): HostMemorySpace() { }
467 void Alloc(void ** ptr, size_t bytes) override
468 {
469#ifdef MFEM_USE_CUDA
470 CuMemAllocHostPinned(ptr, bytes);
471#endif
472#ifdef MFEM_USE_HIP
473 HipMemAllocHostPinned(ptr, bytes);
474#endif
475 }
476 void Dealloc(void *ptr) override
477 {
478#ifdef MFEM_USE_CUDA
480#endif
481#ifdef MFEM_USE_HIP
483#endif
484 }
485};
486
487/// The HIP device memory space
488class HipDeviceMemorySpace: public DeviceMemorySpace
489{
490public:
491 HipDeviceMemorySpace(): DeviceMemorySpace() { }
492 void Alloc(Memory &base) override { HipMemAlloc(&base.d_ptr, base.bytes); }
493 void Dealloc(Memory &base) override { HipMemFree(base.d_ptr); }
494 void *HtoD(void *dst, const void *src, size_t bytes) override
495 { return HipMemcpyHtoD(dst, src, bytes); }
496 void *DtoD(void* dst, const void* src, size_t bytes) override
497 // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
498 // instead we use hipMemcpyAsync to get similar behavior.
499 // for more info see: https://github.com/mfem/mfem/pull/2780
500 { return HipMemcpyDtoDAsync(dst, src, bytes); }
501 void *DtoH(void *dst, const void *src, size_t bytes) override
502 { return HipMemcpyDtoH(dst, src, bytes); }
503};
504
505/// The UVM device memory space.
506class UvmCudaMemorySpace : public DeviceMemorySpace
507{
508public:
509 void Alloc(Memory &base) override { base.d_ptr = base.h_ptr; }
510 void Dealloc(Memory&) override { }
511 void *HtoD(void *dst, const void *src, size_t bytes) override
512 {
513 if (dst == src) { MFEM_STREAM_SYNC; return dst; }
514 return CuMemcpyHtoD(dst, src, bytes);
515 }
516 void *DtoD(void* dst, const void* src, size_t bytes) override
517 { return CuMemcpyDtoD(dst, src, bytes); }
518 void *DtoH(void *dst, const void *src, size_t bytes) override
519 {
520 if (dst == src) { MFEM_STREAM_SYNC; return dst; }
521 return CuMemcpyDtoH(dst, src, bytes);
522 }
523};
524
525class UvmHipMemorySpace : public DeviceMemorySpace
526{
527public:
528 void Alloc(Memory &base) { base.d_ptr = base.h_ptr; }
529 void Dealloc(Memory&) { }
530 void *HtoD(void *dst, const void *src, size_t bytes)
531 {
532 if (dst == src) { MFEM_STREAM_SYNC; return dst; }
533 return HipMemcpyHtoD(dst, src, bytes);
534 }
535 void *DtoD(void* dst, const void* src, size_t bytes)
536 { return HipMemcpyDtoD(dst, src, bytes); }
537 void *DtoH(void *dst, const void *src, size_t bytes)
538 {
539 if (dst == src) { MFEM_STREAM_SYNC; return dst; }
540 return HipMemcpyDtoH(dst, src, bytes);
541 }
542};
543
544/// The MMU device memory space
545class MmuDeviceMemorySpace : public DeviceMemorySpace
546{
547public:
548 MmuDeviceMemorySpace(): DeviceMemorySpace() { }
549 void Alloc(Memory &m) override { MmuAlloc(&m.d_ptr, m.bytes); }
550 void Dealloc(Memory &m) override { MmuDealloc(m.d_ptr, m.bytes); }
551 void Protect(const Memory &m) override
552 { if (m.d_rw) { m.d_rw = false; MmuProtect(m.d_ptr, m.bytes); } }
553 void Unprotect(const Memory &m) override
554 { if (!m.d_rw) { m.d_rw = true; MmuAllow(m.d_ptr, m.bytes); } }
555 /// Aliases need to be restricted during protection
556 void AliasProtect(const void *ptr, size_t bytes) override
557 { MmuProtect(MmuAddrR(ptr), MmuLengthR(ptr, bytes)); }
558 /// Aliases need to be prolongated for un-protection
559 void AliasUnprotect(const void *ptr, size_t bytes) override
560 { MmuAllow(MmuAddrP(ptr), MmuLengthP(ptr, bytes)); }
561 void *HtoD(void *dst, const void *src, size_t bytes) override
562 { return std::memcpy(dst, src, bytes); }
563 void *DtoD(void *dst, const void *src, size_t bytes) override
564 { return std::memcpy(dst, src, bytes); }
565 void *DtoH(void *dst, const void *src, size_t bytes) override
566 { return std::memcpy(dst, src, bytes); }
567};
568
569#ifdef MFEM_USE_UMPIRE
570class UmpireMemorySpace
571{
572protected:
573 umpire::ResourceManager &rm;
574 umpire::Allocator allocator;
575 bool owns_allocator{false};
576
577public:
578 // TODO: this only releases unused memory
579 virtual ~UmpireMemorySpace() { if (owns_allocator) { allocator.release(); } }
580 UmpireMemorySpace(const char * name, const char * space)
581 : rm(umpire::ResourceManager::getInstance())
582 {
583 if (!rm.isAllocator(name))
584 {
585 allocator = rm.makeAllocator<umpire::strategy::QuickPool>(
586 name, rm.getAllocator(space));
587 owns_allocator = true;
588 }
589 else
590 {
591 allocator = rm.getAllocator(name);
592 owns_allocator = false;
593 }
594 }
595};
596
597/// The Umpire host memory space
598class UmpireHostMemorySpace : public HostMemorySpace, public UmpireMemorySpace
599{
600private:
601 umpire::strategy::AllocationStrategy *strat;
602public:
603 UmpireHostMemorySpace(const char * name)
604 : HostMemorySpace(),
605 UmpireMemorySpace(name, "HOST"),
606 strat(allocator.getAllocationStrategy()) {}
607 void Alloc(void **ptr, size_t bytes) override
608 { *ptr = allocator.allocate(bytes); }
609 void Dealloc(void *ptr) override { allocator.deallocate(ptr); }
610 void Insert(void *ptr, size_t bytes)
611 { rm.registerAllocation(ptr, {ptr, bytes, strat}); }
612};
613
614/// The Umpire device memory space
615#if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP)
616class UmpireDeviceMemorySpace : public DeviceMemorySpace,
617 public UmpireMemorySpace
618{
619public:
620 UmpireDeviceMemorySpace(const char * name)
621 : DeviceMemorySpace(),
622 UmpireMemorySpace(name, "DEVICE") {}
623 void Alloc(Memory &base) override
624 { base.d_ptr = allocator.allocate(base.bytes); }
625 void Dealloc(Memory &base) override { rm.deallocate(base.d_ptr); }
626 void *HtoD(void *dst, const void *src, size_t bytes) override
627 {
628#ifdef MFEM_USE_CUDA
629 return CuMemcpyHtoD(dst, src, bytes);
630#endif
631#ifdef MFEM_USE_HIP
632 return HipMemcpyHtoD(dst, src, bytes);
633#endif
634 // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
635 }
636 void *DtoD(void* dst, const void* src, size_t bytes) override
637 {
638#ifdef MFEM_USE_CUDA
639 return CuMemcpyDtoD(dst, src, bytes);
640#endif
641#ifdef MFEM_USE_HIP
642 // Unlike cudaMemcpy(DtoD), hipMemcpy(DtoD) causes a host-side synchronization so
643 // instead we use hipMemcpyAsync to get similar behavior.
644 // for more info see: https://github.com/mfem/mfem/pull/2780
645 return HipMemcpyDtoDAsync(dst, src, bytes);
646#endif
647 // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
648 }
649 void *DtoH(void *dst, const void *src, size_t bytes) override
650 {
651#ifdef MFEM_USE_CUDA
652 return CuMemcpyDtoH(dst, src, bytes);
653#endif
654#ifdef MFEM_USE_HIP
655 return HipMemcpyDtoH(dst, src, bytes);
656#endif
657 // rm.copy(dst, const_cast<void*>(src), bytes); return dst;
658 }
659};
660#else
661class UmpireDeviceMemorySpace : public NoDeviceMemorySpace
662{
663public:
664 UmpireDeviceMemorySpace(const char * /*unused*/) {}
665};
666#endif // MFEM_USE_CUDA || MFEM_USE_HIP
667#endif // MFEM_USE_UMPIRE
668
669/// Memory space controller class
670class Ctrl
671{
672 typedef MemoryType MT;
673
674public:
675 HostMemorySpace *host[HostMemoryTypeSize];
676 DeviceMemorySpace *device[DeviceMemoryTypeSize];
677
678public:
679 Ctrl(): host{nullptr}, device{nullptr} { }
680
681 void Configure()
682 {
683 if (host[HostMemoryType])
684 {
685 mfem_error("Memory backends have already been configured!");
686 }
687
688 // Filling the host memory backends
689 // HOST, HOST_32 & HOST_64 are always ready
690 // MFEM_USE_UMPIRE will set either [No/Umpire] HostMemorySpace
691 host[static_cast<int>(MT::HOST)] = new StdHostMemorySpace();
692 host[static_cast<int>(MT::HOST_32)] = new Aligned32HostMemorySpace();
693 host[static_cast<int>(MT::HOST_64)] = new Aligned64HostMemorySpace();
694 // HOST_DEBUG is delayed, as it reroutes signals
695 host[static_cast<int>(MT::HOST_DEBUG)] = nullptr;
696 host[static_cast<int>(MT::HOST_UMPIRE)] = nullptr;
697 host[static_cast<int>(MT::MANAGED)] = new UvmHostMemorySpace();
698
699 // Filling the device memory backends, shifting with the device size
700 constexpr int shift = DeviceMemoryType;
701#if defined(MFEM_USE_CUDA)
702 device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
703#elif defined(MFEM_USE_HIP)
704 device[static_cast<int>(MT::MANAGED)-shift] = new UvmHipMemorySpace();
705#else
706 // this re-creates the original behavior, but should this be nullptr instead?
707 device[static_cast<int>(MT::MANAGED)-shift] = new UvmCudaMemorySpace();
708#endif
709
710 // All other devices controllers are delayed
711 device[static_cast<int>(MemoryType::DEVICE)-shift] = nullptr;
712 device[static_cast<int>(MT::DEVICE_DEBUG)-shift] = nullptr;
713 device[static_cast<int>(MT::DEVICE_UMPIRE)-shift] = nullptr;
714 device[static_cast<int>(MT::DEVICE_UMPIRE_2)-shift] = nullptr;
715 }
716
717 HostMemorySpace* Host(const MemoryType mt)
718 {
719 const int mt_i = static_cast<int>(mt);
720 // Delayed host controllers initialization
721 if (!host[mt_i]) { host[mt_i] = NewHostCtrl(mt); }
722 MFEM_ASSERT(host[mt_i], "Host memory controller is not configured!");
723 return host[mt_i];
724 }
725
726 DeviceMemorySpace* Device(const MemoryType mt)
727 {
728 const int mt_i = static_cast<int>(mt) - DeviceMemoryType;
729 MFEM_ASSERT(mt_i >= 0,"");
730 // Lazy device controller initializations
731 if (!device[mt_i]) { device[mt_i] = NewDeviceCtrl(mt); }
732 MFEM_ASSERT(device[mt_i], "Memory manager has not been configured!");
733 return device[mt_i];
734 }
735
736 ~Ctrl()
737 {
738 constexpr int mt_h = HostMemoryType;
739 constexpr int mt_d = DeviceMemoryType;
740 for (int mt = mt_h; mt < HostMemoryTypeSize; mt++) { delete host[mt]; }
741 for (int mt = mt_d; mt < MemoryTypeSize; mt++) { delete device[mt-mt_d]; }
742 }
743
744private:
745 HostMemorySpace* NewHostCtrl(const MemoryType mt)
746 {
747 switch (mt)
748 {
749 case MT::HOST_DEBUG: return new MmuHostMemorySpace();
750#ifdef MFEM_USE_UMPIRE
751 case MT::HOST_UMPIRE:
752 return new UmpireHostMemorySpace(
753 MemoryManager::GetUmpireHostAllocatorName());
754#else
755 case MT::HOST_UMPIRE: return new NoHostMemorySpace();
756#endif
757 case MT::HOST_PINNED: return new HostPinnedMemorySpace();
758 default: MFEM_ABORT("Unknown host memory controller!");
759 }
760 return nullptr;
761 }
762
763 DeviceMemorySpace* NewDeviceCtrl(const MemoryType mt)
764 {
765 switch (mt)
766 {
767#ifdef MFEM_USE_UMPIRE
768 case MT::DEVICE_UMPIRE:
769 return new UmpireDeviceMemorySpace(
770 MemoryManager::GetUmpireDeviceAllocatorName());
771 case MT::DEVICE_UMPIRE_2:
772 return new UmpireDeviceMemorySpace(
773 MemoryManager::GetUmpireDevice2AllocatorName());
774#else
775 case MT::DEVICE_UMPIRE: return new NoDeviceMemorySpace();
776 case MT::DEVICE_UMPIRE_2: return new NoDeviceMemorySpace();
777#endif
778 case MT::DEVICE_DEBUG: return new MmuDeviceMemorySpace();
779 case MT::DEVICE:
780 {
781#if defined(MFEM_USE_CUDA)
782 return new CudaDeviceMemorySpace();
783#elif defined(MFEM_USE_HIP)
784 return new HipDeviceMemorySpace();
785#else
786 MFEM_ABORT("No device memory controller!");
787 break;
788#endif
789 }
790 default: MFEM_ABORT("Unknown device memory controller!");
791 }
792 return nullptr;
793 }
794};
795
796} // namespace mfem::internal
797
798static internal::Ctrl *ctrl;
799
800void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType mt,
801 unsigned &flags)
802{
803 MFEM_ASSERT(exists, "Internal error!");
804 if (IsHostMemory(mt))
805 {
806 MFEM_ASSERT(mt != MemoryType::HOST && h_tmp == nullptr,
807 "Internal error!");
808 // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
809 // evaluated at the time when the device pointer is allocated, see
810 // GetDevicePtr() and GetAliasDevicePtr()
811 const MemoryType d_mt = MemoryType::DEFAULT;
812 // We rely on the next call using lazy dev alloc
813 return New_(h_tmp, bytes, mt, d_mt, Mem::VALID_HOST, flags);
814 }
815 else
816 {
817 const MemoryType h_mt = GetDualMemoryType(mt);
818 return New_(h_tmp, bytes, h_mt, mt, Mem::VALID_DEVICE, flags);
819 }
820}
821
822void *MemoryManager::New_(void *h_tmp, size_t bytes, MemoryType h_mt,
823 MemoryType d_mt, unsigned valid_flags,
824 unsigned &flags)
825{
826 MFEM_ASSERT(exists, "Internal error!");
827 MFEM_ASSERT(IsHostMemory(h_mt), "h_mt must be host type");
828 MFEM_ASSERT(IsDeviceMemory(d_mt) || d_mt == h_mt ||
829 d_mt == MemoryType::DEFAULT,
830 "d_mt must be device type, the same is h_mt, or DEFAULT");
831 MFEM_ASSERT((h_mt != MemoryType::HOST || h_tmp != nullptr) &&
832 (h_mt == MemoryType::HOST || h_tmp == nullptr),
833 "Internal error");
834 MFEM_ASSERT((valid_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) == 0,
835 "Internal error");
836 void *h_ptr;
837 if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
838 else { h_ptr = h_tmp; }
840 Mem::OWNS_DEVICE | valid_flags;
841 // The other New_() method relies on this lazy allocation behavior.
842 mm.Insert(h_ptr, bytes, h_mt, d_mt); // lazy dev alloc
843 // mm.InsertDevice(nullptr, h_ptr, bytes, h_mt, d_mt); // non-lazy dev alloc
844
845 // MFEM_VERIFY_TYPES(h_mt, mt); // done by mm.Insert() above
846 CheckHostMemoryType_(h_mt, h_ptr, false);
847
848 return h_ptr;
849}
850
851void *MemoryManager::Register_(void *ptr, void *h_tmp, size_t bytes,
852 MemoryType mt,
853 bool own, bool alias, unsigned &flags)
854{
855 MFEM_ASSERT(exists, "Internal error!");
856 const bool is_host_mem = IsHostMemory(mt);
857 const MemType h_mt = is_host_mem ? mt : GetDualMemoryType(mt);
858 const MemType d_mt = is_host_mem ? MemoryType::DEFAULT : mt;
859 // d_mt = MemoryType::DEFAULT means d_mt = GetDualMemoryType(h_mt),
860 // evaluated at the time when the device pointer is allocated, see
861 // GetDevicePtr() and GetAliasDevicePtr()
862
863 MFEM_VERIFY_TYPES(h_mt, d_mt);
864
865 if (ptr == nullptr && h_tmp == nullptr)
866 {
867 MFEM_VERIFY(bytes == 0, "internal error");
868 return nullptr;
869 }
870
871 MFEM_VERIFY(!alias, "Cannot register an alias!");
872
874 void *h_ptr;
875
876 if (is_host_mem) // HOST TYPES + MANAGED
877 {
878 h_ptr = ptr;
879 mm.Insert(h_ptr, bytes, h_mt, d_mt);
880 flags = (own ? flags | Mem::OWNS_HOST : flags & ~Mem::OWNS_HOST) |
882 }
883 else // DEVICE TYPES
884 {
885 MFEM_VERIFY(ptr || bytes == 0,
886 "cannot register NULL device pointer with bytes = " << bytes);
887 if (h_tmp == nullptr) { ctrl->Host(h_mt)->Alloc(&h_ptr, bytes); }
888 else { h_ptr = h_tmp; }
889 mm.InsertDevice(ptr, h_ptr, bytes, h_mt, d_mt);
890 flags = own ? flags | Mem::OWNS_DEVICE : flags & ~Mem::OWNS_DEVICE;
892 }
893 CheckHostMemoryType_(h_mt, h_ptr, alias);
894 return h_ptr;
895}
896
897void MemoryManager::Register2_(void *h_ptr, void *d_ptr, size_t bytes,
898 MemoryType h_mt, MemoryType d_mt,
899 bool own, bool alias, unsigned &flags,
900 unsigned valid_flags)
901{
902 MFEM_CONTRACT_VAR(alias);
903 MFEM_ASSERT(exists, "Internal error!");
904 MFEM_ASSERT(!alias, "Cannot register an alias!");
905 MFEM_VERIFY_TYPES(h_mt, d_mt);
906
907 if (h_ptr == nullptr && d_ptr == nullptr)
908 {
909 MFEM_VERIFY(bytes == 0, "internal error");
910 return;
911 }
912
914
915 MFEM_VERIFY(d_ptr || bytes == 0,
916 "cannot register NULL device pointer with bytes = " << bytes);
917 mm.InsertDevice(d_ptr, h_ptr, bytes, h_mt, d_mt);
918 flags = (own ? flags | (Mem::OWNS_HOST | Mem::OWNS_DEVICE) :
919 flags & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE)) |
920 valid_flags;
921
922 CheckHostMemoryType_(h_mt, h_ptr, alias);
923}
924
925void MemoryManager::Alias_(void *base_h_ptr, size_t offset, size_t bytes,
926 unsigned base_flags, unsigned &flags)
927{
928 mm.InsertAlias(base_h_ptr, (char*)base_h_ptr + offset, bytes,
929 base_flags & Mem::ALIAS);
930 flags = (base_flags | Mem::ALIAS) & ~(Mem::OWNS_HOST | Mem::OWNS_DEVICE);
931 if (base_h_ptr) { flags |= Mem::OWNS_INTERNAL; }
932}
933
934void MemoryManager::SetDeviceMemoryType_(void *h_ptr, unsigned flags,
935 MemoryType d_mt)
936{
937 MFEM_VERIFY(h_ptr, "cannot set the device memory type: Memory is empty!");
938 if (!(flags & Mem::ALIAS))
939 {
940 auto mem_iter = maps->memories.find(h_ptr);
941 MFEM_VERIFY(mem_iter != maps->memories.end(), "internal error");
942 internal::Memory &mem = mem_iter->second;
943 if (mem.d_mt == d_mt) { return; }
944 MFEM_VERIFY(mem.d_ptr == nullptr, "cannot set the device memory type:"
945 " device memory is allocated!");
946 mem.d_mt = d_mt;
947 }
948 else
949 {
950 auto alias_iter = maps->aliases.find(h_ptr);
951 MFEM_VERIFY(alias_iter != maps->aliases.end(), "internal error");
952 internal::Alias &alias = alias_iter->second;
953 internal::Memory &base_mem = *alias.mem;
954 if (base_mem.d_mt == d_mt) { return; }
955 MFEM_VERIFY(base_mem.d_ptr == nullptr,
956 "cannot set the device memory type:"
957 " alias' base device memory is allocated!");
958 base_mem.d_mt = d_mt;
959 }
960}
961
962void MemoryManager::Delete_(void *h_ptr, MemoryType h_mt, unsigned flags)
963{
964 const bool alias = flags & Mem::ALIAS;
965 const bool registered = flags & Mem::Registered;
966 const bool owns_host = flags & Mem::OWNS_HOST;
967 const bool owns_device = flags & Mem::OWNS_DEVICE;
968 const bool owns_internal = flags & Mem::OWNS_INTERNAL;
969 MFEM_ASSERT(IsHostMemory(h_mt), "invalid h_mt = " << (int)h_mt);
970 // MFEM_ASSERT(registered || IsHostMemory(h_mt),"");
971 MFEM_ASSERT(!owns_device || owns_internal, "invalid Memory state");
972 // If at least one of the 'own_*' flags is true then 'registered' must be
973 // true too. An acceptable exception is the special case when 'h_ptr' is
974 // NULL, and both 'own_device' and 'own_internal' are false -- this case is
975 // an exception only when 'own_host' is true and 'registered' is false.
976 MFEM_ASSERT(registered || !(owns_host || owns_device || owns_internal) ||
977 (!(owns_device || owns_internal) && h_ptr == nullptr),
978 "invalid Memory state");
979 if (!mm.exists || !registered) { return; }
980 if (alias)
981 {
982 if (owns_internal)
983 {
984 MFEM_ASSERT(mm.IsAlias(h_ptr), "");
985 MFEM_ASSERT(h_mt == maps->aliases.at(h_ptr).h_mt, "");
986 mm.EraseAlias(h_ptr);
987 }
988 }
989 else // Known
990 {
991 if (owns_host && (h_mt != MemoryType::HOST))
992 { ctrl->Host(h_mt)->Dealloc(h_ptr); }
993 if (owns_internal)
994 {
995 MFEM_ASSERT(mm.IsKnown(h_ptr), "");
996 MFEM_ASSERT(h_mt == maps->memories.at(h_ptr).h_mt, "");
997 mm.Erase(h_ptr, owns_device);
998 }
999 }
1000}
1001
1002void MemoryManager::DeleteDevice_(void *h_ptr, unsigned & flags)
1003{
1004 const bool owns_device = flags & Mem::OWNS_DEVICE;
1005 if (owns_device)
1006 {
1007 mm.EraseDevice(h_ptr);
1008 flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1009 }
1010}
1011
1012bool MemoryManager::MemoryClassCheck_(MemoryClass mc, void *h_ptr,
1013 MemoryType h_mt, size_t bytes,
1014 unsigned flags)
1015{
1016 if (!h_ptr)
1017 {
1018 MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1019 return true;
1020 }
1021 MemoryType d_mt;
1022 if (!(flags & Mem::ALIAS))
1023 {
1024 auto iter = maps->memories.find(h_ptr);
1025 MFEM_VERIFY(iter != maps->memories.end(), "internal error");
1026 d_mt = iter->second.d_mt;
1027 }
1028 else
1029 {
1030 auto iter = maps->aliases.find(h_ptr);
1031 MFEM_VERIFY(iter != maps->aliases.end(), "internal error");
1032 d_mt = iter->second.mem->d_mt;
1033 }
1034 if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1035 switch (mc)
1036 {
1038 {
1039 MFEM_VERIFY(h_mt == MemoryType::HOST_32 ||
1040 h_mt == MemoryType::HOST_64,"");
1041 return true;
1042 }
1044 {
1045 MFEM_VERIFY(h_mt == MemoryType::HOST_64,"");
1046 return true;
1047 }
1049 {
1050 MFEM_VERIFY(d_mt == MemoryType::DEVICE ||
1051 d_mt == MemoryType::DEVICE_DEBUG ||
1052 d_mt == MemoryType::DEVICE_UMPIRE ||
1054 d_mt == MemoryType::MANAGED,"");
1055 return true;
1056 }
1058 {
1059 MFEM_VERIFY((h_mt == MemoryType::MANAGED &&
1060 d_mt == MemoryType::MANAGED),"");
1061 return true;
1062 }
1063 default: break;
1064 }
1065 return true;
1066}
1067
1068void *MemoryManager::ReadWrite_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1069 size_t bytes, unsigned &flags)
1070{
1071 if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1072 if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1073 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1075 {
1076 const bool copy = !(flags & Mem::VALID_HOST);
1077 flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1078 if (flags & Mem::ALIAS)
1079 { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1080 else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1081 }
1082 else
1083 {
1084 const bool copy = !(flags & Mem::VALID_DEVICE);
1085 flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1086 if (flags & Mem::ALIAS)
1087 { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1088 else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1089 }
1090}
1091
1092const void *MemoryManager::Read_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1093 size_t bytes, unsigned &flags)
1094{
1095 if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1096 if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1097 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1099 {
1100 const bool copy = !(flags & Mem::VALID_HOST);
1101 flags |= Mem::VALID_HOST;
1102 if (flags & Mem::ALIAS)
1103 { return mm.GetAliasHostPtr(h_ptr, bytes, copy); }
1104 else { return mm.GetHostPtr(h_ptr, bytes, copy); }
1105 }
1106 else
1107 {
1108 const bool copy = !(flags & Mem::VALID_DEVICE);
1109 flags |= Mem::VALID_DEVICE;
1110 if (flags & Mem::ALIAS)
1111 { return mm.GetAliasDevicePtr(h_ptr, bytes, copy); }
1112 else { return mm.GetDevicePtr(h_ptr, bytes, copy); }
1113 }
1114}
1115
1116void *MemoryManager::Write_(void *h_ptr, MemoryType h_mt, MemoryClass mc,
1117 size_t bytes, unsigned &flags)
1118{
1119 if (h_ptr) { CheckHostMemoryType_(h_mt, h_ptr, flags & Mem::ALIAS); }
1120 if (bytes > 0) { MFEM_VERIFY(flags & Mem::Registered,""); }
1121 MFEM_ASSERT(MemoryClassCheck_(mc, h_ptr, h_mt, bytes, flags),"");
1123 {
1124 flags = (flags | Mem::VALID_HOST) & ~Mem::VALID_DEVICE;
1125 if (flags & Mem::ALIAS)
1126 { return mm.GetAliasHostPtr(h_ptr, bytes, false); }
1127 else { return mm.GetHostPtr(h_ptr, bytes, false); }
1128 }
1129 else
1130 {
1131 flags = (flags | Mem::VALID_DEVICE) & ~Mem::VALID_HOST;
1132 if (flags & Mem::ALIAS)
1133 { return mm.GetAliasDevicePtr(h_ptr, bytes, false); }
1134 else { return mm.GetDevicePtr(h_ptr, bytes, false); }
1135 }
1136}
1137
1138void MemoryManager::SyncAlias_(const void *base_h_ptr, void *alias_h_ptr,
1139 size_t alias_bytes, unsigned base_flags,
1140 unsigned &alias_flags)
1141{
1142 // This is called only when (base_flags & Mem::Registered) is true.
1143 // Note that (alias_flags & Registered) may not be true.
1144 MFEM_ASSERT(alias_flags & Mem::ALIAS, "not an alias");
1145 if ((base_flags & Mem::VALID_HOST) && !(alias_flags & Mem::VALID_HOST))
1146 {
1147 mm.GetAliasHostPtr(alias_h_ptr, alias_bytes, true);
1148 }
1149 if ((base_flags & Mem::VALID_DEVICE) && !(alias_flags & Mem::VALID_DEVICE))
1150 {
1151 if (!(alias_flags & Mem::Registered))
1152 {
1153 mm.InsertAlias(base_h_ptr, alias_h_ptr, alias_bytes, base_flags & Mem::ALIAS);
1154 alias_flags = (alias_flags | Mem::Registered | Mem::OWNS_INTERNAL) &
1156 }
1157 mm.GetAliasDevicePtr(alias_h_ptr, alias_bytes, true);
1158 }
1159 alias_flags = (alias_flags & ~(Mem::VALID_HOST | Mem::VALID_DEVICE)) |
1160 (base_flags & (Mem::VALID_HOST | Mem::VALID_DEVICE));
1161}
1162
1163MemoryType MemoryManager::GetDeviceMemoryType_(void *h_ptr, bool alias)
1164{
1165 if (mm.exists)
1166 {
1167 if (!alias)
1168 {
1169 auto iter = maps->memories.find(h_ptr);
1170 MFEM_ASSERT(iter != maps->memories.end(), "internal error");
1171 return iter->second.d_mt;
1172 }
1173 // alias == true
1174 auto iter = maps->aliases.find(h_ptr);
1175 MFEM_ASSERT(iter != maps->aliases.end(), "internal error");
1176 return iter->second.mem->d_mt;
1177 }
1178 MFEM_ABORT("internal error");
1179 return MemoryManager::host_mem_type;
1180}
1181
1182MemoryType MemoryManager::GetHostMemoryType_(void *h_ptr)
1183{
1184 if (!mm.exists) { return MemoryManager::host_mem_type; }
1185 if (mm.IsKnown(h_ptr)) { return maps->memories.at(h_ptr).h_mt; }
1186 if (mm.IsAlias(h_ptr)) { return maps->aliases.at(h_ptr).h_mt; }
1187 return MemoryManager::host_mem_type;
1188}
1189
1190void MemoryManager::Copy_(void *dst_h_ptr, const void *src_h_ptr,
1191 size_t bytes, unsigned src_flags,
1192 unsigned &dst_flags)
1193{
1194 // Type of copy to use based on the src and dest validity flags:
1195 // | src
1196 // | h | d | hd
1197 // -----------+-----+-----+------
1198 // h | h2h d2h h2h
1199 // dest d | h2d d2d d2d
1200 // hd | h2h d2d d2d
1201
1202 MFEM_ASSERT(bytes != 0, "this method should not be called with bytes = 0");
1203 MFEM_ASSERT(dst_h_ptr != nullptr, "invalid dst_h_ptr = nullptr");
1204 MFEM_ASSERT(src_h_ptr != nullptr, "invalid src_h_ptr = nullptr");
1205
1206 const bool dst_on_host =
1207 (dst_flags & Mem::VALID_HOST) &&
1208 (!(dst_flags & Mem::VALID_DEVICE) ||
1209 ((src_flags & Mem::VALID_HOST) && !(src_flags & Mem::VALID_DEVICE)));
1210
1211 dst_flags = dst_flags &
1212 ~(dst_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1213
1214 const bool src_on_host =
1215 (src_flags & Mem::VALID_HOST) &&
1216 (!(src_flags & Mem::VALID_DEVICE) ||
1217 ((dst_flags & Mem::VALID_HOST) && !(dst_flags & Mem::VALID_DEVICE)));
1218
1219 const void *src_d_ptr =
1220 src_on_host ? NULL :
1221 ((src_flags & Mem::ALIAS) ?
1222 mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1223 mm.GetDevicePtr(src_h_ptr, bytes, false));
1224
1225 if (dst_on_host)
1226 {
1227 if (src_on_host)
1228 {
1229 if (dst_h_ptr != src_h_ptr && bytes != 0)
1230 {
1231 MFEM_ASSERT((const char*)dst_h_ptr + bytes <= src_h_ptr ||
1232 (const char*)src_h_ptr + bytes <= dst_h_ptr,
1233 "data overlaps!");
1234 std::memcpy(dst_h_ptr, src_h_ptr, bytes);
1235 }
1236 }
1237 else
1238 {
1239 if (dst_h_ptr != src_d_ptr && bytes != 0)
1240 {
1241 MemoryType src_d_mt = (src_flags & Mem::ALIAS) ?
1242 maps->aliases.at(src_h_ptr).mem->d_mt :
1243 maps->memories.at(src_h_ptr).d_mt;
1244 ctrl->Device(src_d_mt)->DtoH(dst_h_ptr, src_d_ptr, bytes);
1245 }
1246 }
1247 }
1248 else
1249 {
1250 void *dest_d_ptr = (dst_flags & Mem::ALIAS) ?
1251 mm.GetAliasDevicePtr(dst_h_ptr, bytes, false) :
1252 mm.GetDevicePtr(dst_h_ptr, bytes, false);
1253 if (src_on_host)
1254 {
1255 const bool known = mm.IsKnown(dst_h_ptr);
1256 const bool alias = dst_flags & Mem::ALIAS;
1257 MFEM_VERIFY(alias||known,"");
1258 const MemoryType d_mt = known ?
1259 maps->memories.at(dst_h_ptr).d_mt :
1260 maps->aliases.at(dst_h_ptr).mem->d_mt;
1261 ctrl->Device(d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1262 }
1263 else
1264 {
1265 if (dest_d_ptr != src_d_ptr && bytes != 0)
1266 {
1267 const bool known = mm.IsKnown(dst_h_ptr);
1268 const bool alias = dst_flags & Mem::ALIAS;
1269 MFEM_VERIFY(alias||known,"");
1270 const MemoryType d_mt = known ?
1271 maps->memories.at(dst_h_ptr).d_mt :
1272 maps->aliases.at(dst_h_ptr).mem->d_mt;
1273 ctrl->Device(d_mt)->DtoD(dest_d_ptr, src_d_ptr, bytes);
1274 }
1275 }
1276 }
1277}
1278
1279void MemoryManager::CopyToHost_(void *dest_h_ptr, const void *src_h_ptr,
1280 size_t bytes, unsigned src_flags)
1281{
1282 MFEM_ASSERT(bytes != 0, "this method should not be called with bytes = 0");
1283 MFEM_ASSERT(dest_h_ptr != nullptr, "invalid dest_h_ptr = nullptr");
1284 MFEM_ASSERT(src_h_ptr != nullptr, "invalid src_h_ptr = nullptr");
1285
1286 const bool src_on_host = src_flags & Mem::VALID_HOST;
1287 if (src_on_host)
1288 {
1289 if (dest_h_ptr != src_h_ptr && bytes != 0)
1290 {
1291 MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1292 (const char*)src_h_ptr + bytes <= dest_h_ptr,
1293 "data overlaps!");
1294 std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1295 }
1296 }
1297 else
1298 {
1299 MFEM_ASSERT(IsKnown_(src_h_ptr), "internal error");
1300 const void *src_d_ptr = (src_flags & Mem::ALIAS) ?
1301 mm.GetAliasDevicePtr(src_h_ptr, bytes, false) :
1302 mm.GetDevicePtr(src_h_ptr, bytes, false);
1303 MemoryType src_d_mt = (src_flags & Mem::ALIAS) ?
1304 maps->aliases.at(src_h_ptr).mem->d_mt :
1305 maps->memories.at(src_h_ptr).d_mt;
1306 ctrl->Device(src_d_mt)->DtoH(dest_h_ptr, src_d_ptr, bytes);
1307 }
1308}
1309
1310void MemoryManager::CopyFromHost_(void *dest_h_ptr, const void *src_h_ptr,
1311 size_t bytes, unsigned &dest_flags)
1312{
1313 MFEM_ASSERT(bytes != 0, "this method should not be called with bytes = 0");
1314 MFEM_ASSERT(dest_h_ptr != nullptr, "invalid dest_h_ptr = nullptr");
1315 MFEM_ASSERT(src_h_ptr != nullptr, "invalid src_h_ptr = nullptr");
1316
1317 const bool dest_on_host = dest_flags & Mem::VALID_HOST;
1318 if (dest_on_host)
1319 {
1320 if (dest_h_ptr != src_h_ptr && bytes != 0)
1321 {
1322 MFEM_ASSERT((char*)dest_h_ptr + bytes <= src_h_ptr ||
1323 (const char*)src_h_ptr + bytes <= dest_h_ptr,
1324 "data overlaps!");
1325 std::memcpy(dest_h_ptr, src_h_ptr, bytes);
1326 }
1327 }
1328 else
1329 {
1330 void *dest_d_ptr = (dest_flags & Mem::ALIAS) ?
1331 mm.GetAliasDevicePtr(dest_h_ptr, bytes, false) :
1332 mm.GetDevicePtr(dest_h_ptr, bytes, false);
1333 MemoryType dest_d_mt = (dest_flags & Mem::ALIAS) ?
1334 maps->aliases.at(dest_h_ptr).mem->d_mt :
1335 maps->memories.at(dest_h_ptr).d_mt;
1336 ctrl->Device(dest_d_mt)->HtoD(dest_d_ptr, src_h_ptr, bytes);
1337 }
1338 dest_flags = dest_flags &
1339 ~(dest_on_host ? Mem::VALID_DEVICE : Mem::VALID_HOST);
1340}
1341
1342bool MemoryManager::IsKnown_(const void *h_ptr)
1343{
1344 return maps->memories.find(h_ptr) != maps->memories.end();
1345}
1346
1347bool MemoryManager::IsAlias_(const void *h_ptr)
1348{
1349 return maps->aliases.find(h_ptr) != maps->aliases.end();
1350}
1351
1352void MemoryManager::Insert(void *h_ptr, size_t bytes,
1353 MemoryType h_mt, MemoryType d_mt)
1354{
1355#ifdef MFEM_TRACK_MEM_MANAGER
1356 mfem::out << "[mfem memory manager]: registering h_ptr: " << h_ptr
1357 << ", bytes: " << bytes << std::endl;
1358#endif
1359 if (h_ptr == NULL)
1360 {
1361 MFEM_VERIFY(bytes == 0, "Trying to add NULL with size " << bytes);
1362 return;
1363 }
1364 MFEM_VERIFY_TYPES(h_mt, d_mt);
1365#ifdef MFEM_DEBUG
1366 auto res =
1367#endif
1368 maps->memories.emplace(h_ptr, internal::Memory(h_ptr, bytes, h_mt, d_mt));
1369#ifdef MFEM_DEBUG
1370 if (res.second == false)
1371 {
1372 auto &m = res.first->second;
1373 MFEM_VERIFY(m.bytes >= bytes && m.h_mt == h_mt &&
1374 (m.d_mt == d_mt || (d_mt == MemoryType::DEFAULT &&
1375 m.d_mt == GetDualMemoryType(h_mt))),
1376 "Address already present with different attributes!");
1377#ifdef MFEM_TRACK_MEM_MANAGER
1378 mfem::out << "[mfem memory manager]: repeated registration of h_ptr: "
1379 << h_ptr << std::endl;
1380#endif
1381 }
1382#endif
1383}
1384
1385void MemoryManager::InsertDevice(void *d_ptr, void *h_ptr, size_t bytes,
1386 MemoryType h_mt, MemoryType d_mt)
1387{
1388 // MFEM_VERIFY_TYPES(h_mt, d_mt); // done by Insert() below
1389 MFEM_ASSERT(h_ptr != NULL, "internal error");
1390 Insert(h_ptr, bytes, h_mt, d_mt);
1391 internal::Memory &mem = maps->memories.at(h_ptr);
1392 if (d_ptr == NULL && bytes != 0) { ctrl->Device(d_mt)->Alloc(mem); }
1393 else { mem.d_ptr = d_ptr; }
1394}
1395
1396void MemoryManager::InsertAlias(const void *base_ptr, void *alias_ptr,
1397 const size_t bytes, const bool base_is_alias)
1398{
1399 size_t offset = static_cast<size_t>(static_cast<const char*>(alias_ptr) -
1400 static_cast<const char*>(base_ptr));
1401#ifdef MFEM_TRACK_MEM_MANAGER
1402 mfem::out << "[mfem memory manager]: registering alias of base_ptr: "
1403 << base_ptr << ", offset: " << offset << ", bytes: " << bytes
1404 << ", base is alias: " << base_is_alias << std::endl;
1405#endif
1406 if (!base_ptr)
1407 {
1408 MFEM_VERIFY(offset == 0,
1409 "Trying to add alias to NULL at offset " << offset);
1410 return;
1411 }
1412 if (base_is_alias)
1413 {
1414 const internal::Alias &alias = maps->aliases.at(base_ptr);
1415 MFEM_ASSERT(alias.mem,"");
1416 base_ptr = alias.mem->h_ptr;
1417 offset += alias.offset;
1418#ifdef MFEM_TRACK_MEM_MANAGER
1419 mfem::out << "[mfem memory manager]: real base_ptr: " << base_ptr
1420 << std::endl;
1421#endif
1422 }
1423 internal::Memory &mem = maps->memories.at(base_ptr);
1424 MFEM_VERIFY(offset + bytes <= mem.bytes, "invalid alias");
1425 auto res =
1426 maps->aliases.emplace(alias_ptr,
1427 internal::Alias{&mem, offset, 1, mem.h_mt});
1428 if (res.second == false) // alias_ptr was already in the map
1429 {
1430 internal::Alias &alias = res.first->second;
1431 // Update the alias data in case the existing alias is dangling
1432 alias.mem = &mem;
1433 alias.offset = offset;
1434 alias.h_mt = mem.h_mt;
1435 alias.counter++;
1436 }
1437}
1438
1439void MemoryManager::Erase(void *h_ptr, bool free_dev_ptr)
1440{
1441#ifdef MFEM_TRACK_MEM_MANAGER
1442 mfem::out << "[mfem memory manager]: un-registering h_ptr: " << h_ptr
1443 << std::endl;
1444#endif
1445 if (!h_ptr) { return; }
1446 auto mem_map_iter = maps->memories.find(h_ptr);
1447 if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1448 internal::Memory &mem = mem_map_iter->second;
1449 if (mem.d_ptr && free_dev_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1450 maps->memories.erase(mem_map_iter);
1451}
1452
1453void MemoryManager::EraseDevice(void *h_ptr)
1454{
1455 if (!h_ptr) { return; }
1456 auto mem_map_iter = maps->memories.find(h_ptr);
1457 if (mem_map_iter == maps->memories.end()) { mfem_error("Unknown pointer!"); }
1458 internal::Memory &mem = mem_map_iter->second;
1459 if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem);}
1460 mem.d_ptr = nullptr;
1461}
1462
1463void MemoryManager::EraseAlias(void *alias_ptr)
1464{
1465#ifdef MFEM_TRACK_MEM_MANAGER
1466 mfem::out << "[mfem memory manager]: un-registering alias_ptr: " << alias_ptr
1467 << std::endl;
1468#endif
1469 if (!alias_ptr) { return; }
1470 auto alias_map_iter = maps->aliases.find(alias_ptr);
1471 if (alias_map_iter == maps->aliases.end()) { mfem_error("Unknown alias!"); }
1472 internal::Alias &alias = alias_map_iter->second;
1473 if (--alias.counter) { return; }
1474 maps->aliases.erase(alias_map_iter);
1475}
1476
1477void *MemoryManager::GetDevicePtr(const void *h_ptr, size_t bytes,
1478 bool copy_data)
1479{
1480 if (!h_ptr)
1481 {
1482 MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1483 return NULL;
1484 }
1485 internal::Memory &mem = maps->memories.at(h_ptr);
1486 const MemoryType &h_mt = mem.h_mt;
1487 MemoryType &d_mt = mem.d_mt;
1488 MFEM_VERIFY_TYPES(h_mt, d_mt);
1489 if (!mem.d_ptr)
1490 {
1491 if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1492 if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1493 }
1494 // Aliases might have done some protections
1495 if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1496 if (copy_data)
1497 {
1498 MFEM_ASSERT(bytes <= mem.bytes, "invalid copy size");
1499 if (bytes) { ctrl->Device(d_mt)->HtoD(mem.d_ptr, h_ptr, bytes); }
1500 }
1501 ctrl->Host(h_mt)->Protect(mem, bytes);
1502 return mem.d_ptr;
1503}
1504
1505void *MemoryManager::GetAliasDevicePtr(const void *alias_ptr, size_t bytes,
1506 bool copy)
1507{
1508 if (!alias_ptr)
1509 {
1510 MFEM_VERIFY(bytes == 0, "Trying to access NULL with size " << bytes);
1511 return NULL;
1512 }
1513 auto &alias_map = maps->aliases;
1514 auto alias_map_iter = alias_map.find(alias_ptr);
1515 if (alias_map_iter == alias_map.end()) { mfem_error("alias not found"); }
1516 const internal::Alias &alias = alias_map_iter->second;
1517 const size_t offset = alias.offset;
1518 internal::Memory &mem = *alias.mem;
1519 const MemoryType &h_mt = mem.h_mt;
1520 MemoryType &d_mt = mem.d_mt;
1521 MFEM_VERIFY_TYPES(h_mt, d_mt);
1522 if (!mem.d_ptr)
1523 {
1524 if (d_mt == MemoryType::DEFAULT) { d_mt = GetDualMemoryType(h_mt); }
1525 if (mem.bytes) { ctrl->Device(d_mt)->Alloc(mem); }
1526 }
1527 void *alias_h_ptr = static_cast<char*>(mem.h_ptr) + offset;
1528 void *alias_d_ptr = static_cast<char*>(mem.d_ptr) + offset;
1529 MFEM_ASSERT(alias_h_ptr == alias_ptr, "internal error");
1530 MFEM_ASSERT(offset + bytes <= mem.bytes, "internal error");
1531 mem.d_rw = mem.h_rw = false;
1532 if (mem.d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1533 ctrl->Host(h_mt)->AliasUnprotect(alias_ptr, bytes);
1534 if (copy && mem.d_ptr)
1535 { ctrl->Device(d_mt)->HtoD(alias_d_ptr, alias_h_ptr, bytes); }
1536 ctrl->Host(h_mt)->AliasProtect(alias_ptr, bytes);
1537 return alias_d_ptr;
1538}
1539
1540void *MemoryManager::GetHostPtr(const void *ptr, size_t bytes, bool copy)
1541{
1542 const internal::Memory &mem = maps->memories.at(ptr);
1543 MFEM_ASSERT(mem.h_ptr == ptr, "internal error");
1544 MFEM_ASSERT(bytes <= mem.bytes, "internal error")
1545 const MemoryType &h_mt = mem.h_mt;
1546 const MemoryType &d_mt = mem.d_mt;
1547 MFEM_VERIFY_TYPES(h_mt, d_mt);
1548 // Aliases might have done some protections
1549 ctrl->Host(h_mt)->Unprotect(mem, bytes);
1550 if (mem.d_ptr) { ctrl->Device(d_mt)->Unprotect(mem); }
1551 if (copy && mem.d_ptr) { ctrl->Device(d_mt)->DtoH(mem.h_ptr, mem.d_ptr, bytes); }
1552 if (mem.d_ptr) { ctrl->Device(d_mt)->Protect(mem); }
1553 return mem.h_ptr;
1554}
1555
1556void *MemoryManager::GetAliasHostPtr(const void *ptr, size_t bytes,
1557 bool copy_data)
1558{
1559 const internal::Alias &alias = maps->aliases.at(ptr);
1560 const internal::Memory *const mem = alias.mem;
1561 const MemoryType &h_mt = mem->h_mt;
1562 const MemoryType &d_mt = mem->d_mt;
1563 MFEM_VERIFY_TYPES(h_mt, d_mt);
1564 void *alias_h_ptr = static_cast<char*>(mem->h_ptr) + alias.offset;
1565 void *alias_d_ptr = static_cast<char*>(mem->d_ptr) + alias.offset;
1566 MFEM_ASSERT(alias_h_ptr == ptr, "internal error");
1567 mem->h_rw = false;
1568 ctrl->Host(h_mt)->AliasUnprotect(alias_h_ptr, bytes);
1569 if (mem->d_ptr) { ctrl->Device(d_mt)->AliasUnprotect(alias_d_ptr, bytes); }
1570 if (copy_data && mem->d_ptr)
1571 { ctrl->Device(d_mt)->DtoH(const_cast<void*>(ptr), alias_d_ptr, bytes); }
1572 if (mem->d_ptr) { ctrl->Device(d_mt)->AliasProtect(alias_d_ptr, bytes); }
1573 return alias_h_ptr;
1574}
1575
1577{
1578 if (exists) { return; }
1579 maps = new internal::Maps();
1580 ctrl = new internal::Ctrl();
1581 ctrl->Configure();
1582 exists = true;
1583}
1584
1586
1588
1590{
1591 MFEM_VERIFY(!configured, "changing the dual MemoryTypes is not allowed after"
1592 " MemoryManager configuration!");
1593 UpdateDualMemoryType(mt, dual_mt);
1594}
1595
1596void MemoryManager::UpdateDualMemoryType(MemoryType mt, MemoryType dual_mt)
1597{
1598 MFEM_VERIFY((int)mt < MemoryTypeSize,
1599 "invalid MemoryType, mt = " << (int)mt);
1600 MFEM_VERIFY((int)dual_mt < MemoryTypeSize,
1601 "invalid dual MemoryType, dual_mt = " << (int)dual_mt);
1602
1603 if ((IsHostMemory(mt) && IsDeviceMemory(dual_mt)) ||
1604 (IsDeviceMemory(mt) && IsHostMemory(dual_mt)))
1605 {
1606 dual_map[(int)mt] = dual_mt;
1607 }
1608 else
1609 {
1610 // mt + dual_mt is not a pair of host + device types: this is only allowed
1611 // when mt == dual_mt and mt is a host type; in this case we do not
1612 // actually update the dual
1613 MFEM_VERIFY(mt == dual_mt && IsHostMemory(mt),
1614 "invalid (mt, dual_mt) pair: ("
1615 << MemoryTypeName[(int)mt] << ", "
1616 << MemoryTypeName[(int)dual_mt] << ')');
1617 }
1618}
1619
1621 const MemoryType device_mt)
1622{
1623 MemoryManager::UpdateDualMemoryType(host_mt, device_mt);
1624 MemoryManager::UpdateDualMemoryType(device_mt, host_mt);
1625 if (device_mt == MemoryType::DEVICE_DEBUG)
1626 {
1627 for (int mt = (int)MemoryType::HOST; mt < (int)MemoryType::MANAGED; mt++)
1628 {
1629 MemoryManager::UpdateDualMemoryType(
1631 }
1632 }
1633 Init();
1634 host_mem_type = host_mt;
1635 device_mem_type = device_mt;
1636 configured = true;
1637}
1638
1640{
1641 MFEM_VERIFY(exists, "MemoryManager has already been destroyed!");
1642#ifdef MFEM_TRACK_MEM_MANAGER
1643 size_t num_memories = maps->memories.size();
1644 size_t num_aliases = maps->aliases.size();
1645 if (num_memories != 0 || num_aliases != 0)
1646 {
1647 MFEM_WARNING("...\n\t number of registered pointers: " << num_memories
1648 << "\n\t number of registered aliases : " << num_aliases);
1649 }
1650#endif
1651 // Keep for debugging purposes:
1652#if 0
1653 mfem::out << "Destroying the MemoryManager ...\n"
1654 << "remaining registered pointers : "
1655 << maps->memories.size() << '\n'
1656 << "remaining registered aliases : "
1657 << maps->aliases.size() << '\n';
1658#endif
1659 for (auto& n : maps->memories)
1660 {
1661 internal::Memory &mem = n.second;
1662 bool mem_h_ptr = mem.h_mt != MemoryType::HOST && mem.h_ptr;
1663 if (mem_h_ptr) { ctrl->Host(mem.h_mt)->Dealloc(mem.h_ptr); }
1664 if (mem.d_ptr) { ctrl->Device(mem.d_mt)->Dealloc(mem); }
1665 }
1666 delete maps; maps = nullptr;
1667 delete ctrl; ctrl = nullptr;
1668 host_mem_type = MemoryType::HOST;
1669 device_mem_type = MemoryType::HOST;
1670 exists = false;
1671 configured = false;
1672}
1673
1675{
1676 if (ptr != NULL)
1677 {
1678 if (!IsKnown(ptr))
1679 {
1680 mfem_error("Pointer is not registered!");
1681 }
1682 }
1683}
1684
1685int MemoryManager::PrintPtrs(std::ostream &os)
1686{
1687 int n_out = 0;
1688 for (const auto& n : maps->memories)
1689 {
1690 const internal::Memory &mem = n.second;
1691 os << "\nkey " << n.first << ", "
1692 << "h_ptr " << mem.h_ptr << ", "
1693 << "d_ptr " << mem.d_ptr;
1694 n_out++;
1695 }
1696 if (maps->memories.size() > 0) { os << std::endl; }
1697 return n_out;
1698}
1699
1700int MemoryManager::PrintAliases(std::ostream &os)
1701{
1702 int n_out = 0;
1703 for (const auto& n : maps->aliases)
1704 {
1705 const internal::Alias &alias = n.second;
1706 os << "\nalias: key " << n.first << ", "
1707 << "h_ptr " << alias.mem->h_ptr << ", "
1708 << "offset " << alias.offset << ", "
1709 << "counter " << alias.counter;
1710 n_out++;
1711 }
1712 if (maps->aliases.size() > 0) { os << std::endl; }
1713 return n_out;
1714}
1715
1716int MemoryManager::CompareHostAndDevice_(void *h_ptr, size_t size,
1717 unsigned flags)
1718{
1719 void *d_ptr = (flags & Mem::ALIAS) ?
1720 mm.GetAliasDevicePtr(h_ptr, size, false) :
1721 mm.GetDevicePtr(h_ptr, size, false);
1722 char *h_buf = new char[size];
1723#if defined(MFEM_USE_CUDA)
1724 CuMemcpyDtoH(h_buf, d_ptr, size);
1725#elif defined(MFEM_USE_HIP)
1726 HipMemcpyDtoH(h_buf, d_ptr, size);
1727#else
1728 std::memcpy(h_buf, d_ptr, size);
1729#endif
1730 int res = std::memcmp(h_ptr, h_buf, size);
1731 delete [] h_buf;
1732 return res;
1733}
1734
1735
1736void MemoryPrintFlags(unsigned flags)
1737{
1738 typedef Memory<int> Mem;
1739 mfem::out
1740 << "\n registered = " << bool(flags & Mem::Registered)
1741 << "\n owns host = " << bool(flags & Mem::OWNS_HOST)
1742 << "\n owns device = " << bool(flags & Mem::OWNS_DEVICE)
1743 << "\n owns internal = " << bool(flags & Mem::OWNS_INTERNAL)
1744 << "\n valid host = " << bool(flags & Mem::VALID_HOST)
1745 << "\n valid device = " << bool(flags & Mem::VALID_DEVICE)
1746 << "\n device flag = " << bool(flags & Mem::USE_DEVICE)
1747 << "\n alias = " << bool(flags & Mem::ALIAS)
1748 << std::endl;
1749}
1750
1751void MemoryManager::CheckHostMemoryType_(MemoryType h_mt, void *h_ptr,
1752 bool alias)
1753{
1754 if (!mm.exists) {return;}
1755 if (!alias)
1756 {
1757 auto it = maps->memories.find(h_ptr);
1758 MFEM_VERIFY(it != maps->memories.end(),
1759 "host pointer is not registered: h_ptr = " << h_ptr);
1760 MFEM_VERIFY(h_mt == it->second.h_mt, "host pointer MemoryType mismatch");
1761 }
1762 else
1763 {
1764 auto it = maps->aliases.find(h_ptr);
1765 MFEM_VERIFY(it != maps->aliases.end(),
1766 "alias pointer is not registered: h_ptr = " << h_ptr);
1767 MFEM_VERIFY(h_mt == it->second.h_mt, "alias pointer MemoryType mismatch");
1768 }
1769}
1770
1772
1773bool MemoryManager::exists = false;
1774bool MemoryManager::configured = false;
1775
1776MemoryType MemoryManager::host_mem_type = MemoryType::HOST;
1777MemoryType MemoryManager::device_mem_type = MemoryType::HOST;
1778
1779MemoryType MemoryManager::dual_map[MemoryTypeSize] =
1780{
1781 /* HOST */ MemoryType::DEVICE,
1782 /* HOST_32 */ MemoryType::DEVICE,
1783 /* HOST_64 */ MemoryType::DEVICE,
1784 /* HOST_DEBUG */ MemoryType::DEVICE_DEBUG,
1785 /* HOST_UMPIRE */ MemoryType::DEVICE_UMPIRE,
1786 /* HOST_PINNED */ MemoryType::DEVICE,
1787 /* MANAGED */ MemoryType::MANAGED,
1788 /* DEVICE */ MemoryType::HOST,
1789 /* DEVICE_DEBUG */ MemoryType::HOST_DEBUG,
1790 /* DEVICE_UMPIRE */ MemoryType::HOST_UMPIRE,
1791 /* DEVICE_UMPIRE_2 */ MemoryType::HOST_UMPIRE
1792};
1793
1794#ifdef MFEM_USE_UMPIRE
1795const char * MemoryManager::h_umpire_name = "MFEM_HOST";
1796const char * MemoryManager::d_umpire_name = "MFEM_DEVICE";
1797const char * MemoryManager::d_umpire_2_name = "MFEM_DEVICE_2";
1798#endif
1799
1800
1802{
1803 "host-std", "host-32", "host-64", "host-debug", "host-umpire", "host-pinned",
1804#if defined(MFEM_USE_CUDA)
1805 "cuda-uvm",
1806 "cuda",
1807#elif defined(MFEM_USE_HIP)
1808 "hip-uvm",
1809 "hip",
1810#else
1811 "managed",
1812 "device",
1813#endif
1814 "device-debug",
1815#if defined(MFEM_USE_CUDA)
1816 "cuda-umpire",
1817 "cuda-umpire-2",
1818#elif defined(MFEM_USE_HIP)
1819 "hip-umpire",
1820 "hip-umpire-2",
1821#else
1822 "device-umpire",
1823 "device-umpire-2",
1824#endif
1825};
1826
1827} // namespace mfem
bool IsKnown(const void *h_ptr)
Return true if the pointer is known by the memory manager.
int PrintPtrs(std::ostream &out=mfem::out)
void RegisterCheck(void *h_ptr)
Check if the host pointer has been registered in the memory manager.
static void SetDualMemoryType(MemoryType mt, MemoryType dual_mt)
Set the dual memory type of mt to be dual_mt.
int PrintAliases(std::ostream &out=mfem::out)
void Init()
Initialize the memory manager.
static MemoryType GetHostMemoryType()
bool IsAlias(const void *h_ptr)
Return true if the pointer is known by the memory manager as an alias.
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
static MemoryType GetDeviceMemoryType()
static MemoryType GetDualMemoryType(MemoryType mt)
Return the dual MemoryType of the given one, mt.
void Destroy()
Free all the device memories.
Class used by MFEM to store pointers to host and/or device memory.
void PrintFlags() const
Print the internal flags.
int CompareHostAndDevice(int size) const
If both the host and the device data are valid, compare their contents.
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
string space
void * HipMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition hip.cpp:132
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition cuda.cpp:34
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition cuda.cpp:79
void * HipMemcpyDtoDAsync(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition hip.cpp:147
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
constexpr int DeviceMemoryType
void * HipMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition hip.cpp:64
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
void mfem_error(const char *msg)
Definition error.cpp:154
void * CuMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition cuda.cpp:49
void * CuMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition cuda.cpp:155
const char * GetEnv(const char *name)
Wrapper for std::getenv.
Definition globals.cpp:79
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
void * CuMemAllocHostPinned(void **ptr, size_t bytes)
Allocates page-locked (pinned) host memory.
Definition cuda.cpp:64
MemoryClass
Memory classes identify sets of memory types.
@ HOST_32
Memory types: { HOST_32, HOST_64, HOST_DEBUG }.
@ HOST_64
Memory types: { HOST_64, HOST_DEBUG }.
@ MANAGED
Memory types: { MANAGED }.
constexpr int MemoryTypeSize
Static casts to 'int' and sizes of some useful memory types.
void * CuMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition cuda.cpp:94
void * CuMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device and returns destination ptr.
Definition cuda.cpp:109
MemoryManager mm
The (single) global memory manager object.
MemoryType GetMemoryType(MemoryClass mc)
Return a suitable MemoryType for a given MemoryClass.
constexpr int HostMemoryTypeSize
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
void * HipMemcpyHtoD(void *dst, const void *src, size_t bytes)
Copies memory from Host to Device.
Definition hip.cpp:109
void * HipMemFree(void *dptr)
Frees device memory.
Definition hip.cpp:79
constexpr int HostMemoryType
void * HipMemcpyDtoH(void *dst, const void *src, size_t bytes)
Copies memory from Device to Host.
Definition hip.cpp:155
void * HipMemFreeHostPinned(void *ptr)
Frees page-locked (pinned) host memory and returns destination ptr.
Definition hip.cpp:94
constexpr int DeviceMemoryTypeSize
MemoryType
Memory types supported by MFEM.
@ HOST_32
Host memory; aligned at 32 bytes.
@ HOST_64
Host memory; aligned at 64 bytes.
@ HOST
Host memory; using new[] and delete[].
@ HOST_PINNED
Host memory: pinned (page-locked)
@ HOST_DEBUG
Host memory; allocated from a "host-debug" pool.
@ DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.
void * HipMallocManaged(void **dptr, size_t bytes)
Allocates managed device memory.
Definition hip.cpp:49
void MemoryPrintFlags(unsigned flags)
Print the state of a Memory object based on its internal flags. Useful in a debugger....
void * CuMemcpyDtoD(void *dst, const void *src, size_t bytes)
Copies memory from Device to Device.
Definition cuda.cpp:132
bool MemoryClassContainsType(MemoryClass mc, MemoryType mt)
Return true iff the MemoryType mt is contained in the MemoryClass mc.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition hip.cpp:34
real_t p(const Vector &x, real_t t)