MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
device.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "device.hpp"
13#include "forall.hpp"
14#include "occa.hpp"
15#ifdef MFEM_USE_CEED
17#endif
18#ifdef MFEM_USE_MPI
19#include "communication.hpp"
20#include "../linalg/hypre.hpp"
21#endif
22
23#include <unordered_map>
24#include <map>
25
26namespace mfem
27{
28
29// Place the following variables in the mfem::internal namespace, so that they
30// will not be included in the doxygen documentation.
31namespace internal
32{
33
34#ifdef MFEM_USE_OCCA
35// Default occa::device used by MFEM.
36occa::device occaDevice;
37#endif
38
39#ifdef MFEM_USE_CEED
40Ceed ceed = NULL;
41
42ceed::BasisMap ceed_basis_map;
43ceed::RestrMap ceed_restr_map;
44#endif
45
46// Backends listed by priority, high to low:
47static const Backend::Id backend_list[Backend::NUM_BACKENDS] =
48{
53};
54
55// Backend names listed by priority, high to low:
56static const char *backend_name[Backend::NUM_BACKENDS] =
57{
58 "ceed-cuda", "occa-cuda", "raja-cuda", "cuda",
59 "ceed-hip", "raja-hip", "hip", "debug",
60 "occa-omp", "raja-omp", "omp",
61 "ceed-cpu", "occa-cpu", "raja-cpu", "cpu"
62};
63
64} // namespace mfem::internal
65
66
67// Initialize the unique global Device variable.
68Device Device::device_singleton;
69bool Device::device_env = false;
70bool Device::mem_host_env = false;
71bool Device::mem_device_env = false;
72bool Device::mem_types_set = false;
73
75{
76 if (GetEnv("MFEM_MEMORY") && !mem_host_env && !mem_device_env)
77 {
78 std::string mem_backend(GetEnv("MFEM_MEMORY"));
79 if (mem_backend == "host")
80 {
81 mem_host_env = true;
82 host_mem_type = MemoryType::HOST;
83 device_mem_type = MemoryType::HOST;
84 }
85 else if (mem_backend == "host32")
86 {
87 mem_host_env = true;
88 host_mem_type = MemoryType::HOST_32;
89 device_mem_type = MemoryType::HOST_32;
90 }
91 else if (mem_backend == "host64")
92 {
93 mem_host_env = true;
94 host_mem_type = MemoryType::HOST_64;
95 device_mem_type = MemoryType::HOST_64;
96 }
97 else if (mem_backend == "umpire")
98 {
99 mem_host_env = true;
100 host_mem_type = MemoryType::HOST_UMPIRE;
101 // Note: device_mem_type will be set to MemoryType::DEVICE_UMPIRE only
102 // when an actual device is configured -- this is done later in
103 // Device::UpdateMemoryTypeAndClass().
104 device_mem_type = MemoryType::HOST_UMPIRE;
105 }
106 else if (mem_backend == "debug")
107 {
108 mem_host_env = true;
109 host_mem_type = MemoryType::HOST_DEBUG;
110 // Note: device_mem_type will be set to MemoryType::DEVICE_DEBUG only
111 // when an actual device is configured -- this is done later in
112 // Device::UpdateMemoryTypeAndClass().
113 device_mem_type = MemoryType::HOST_DEBUG;
114 }
115 else if (false
116#ifdef MFEM_USE_CUDA
117 || mem_backend == "cuda"
118#endif
119#ifdef MFEM_USE_HIP
120 || mem_backend == "hip"
121#endif
122 )
123 {
124 mem_host_env = true;
125 host_mem_type = MemoryType::HOST;
126 mem_device_env = true;
127 device_mem_type = MemoryType::DEVICE;
128 }
129 else if (mem_backend == "uvm")
130 {
131 mem_host_env = true;
132 mem_device_env = true;
133 host_mem_type = MemoryType::MANAGED;
134 device_mem_type = MemoryType::MANAGED;
135 }
136 else
137 {
138 MFEM_ABORT("Unknown memory backend!");
139 }
140 mm.Configure(host_mem_type, device_mem_type);
141 }
142
143 if (GetEnv("MFEM_DEVICE"))
144 {
145 std::string device(GetEnv("MFEM_DEVICE"));
146 Configure(device);
147 device_env = true;
148 }
149
150 if (GetEnv("MFEM_GPU_AWARE_MPI"))
151 {
152 SetGPUAwareMPI(true);
153 }
154}
155
157{
158#ifdef MFEM_USE_MPI
160#endif
161 if ( device_env && !destroy_mm) { return; }
162 if (!device_env && destroy_mm && !mem_host_env)
163 {
164#ifdef MFEM_USE_CEED
165 // Destroy FES -> CeedBasis, CeedElemRestriction hash table contents
166 for (auto entry : internal::ceed_basis_map)
167 {
168 CeedBasisDestroy(&entry.second);
169 }
170 internal::ceed_basis_map.clear();
171 for (auto entry : internal::ceed_restr_map)
172 {
173 CeedElemRestrictionDestroy(&entry.second);
174 }
175 internal::ceed_restr_map.clear();
176 // Destroy Ceed context
177 CeedDestroy(&internal::ceed);
178#endif
179 mm.Destroy();
180 }
181 Get().ngpu = -1;
182 Get().backends = Backend::CPU;
183 Get().host_mem_type = MemoryType::HOST;
184 Get().host_mem_class = MemoryClass::HOST;
185 Get().device_mem_type = MemoryType::HOST;
186 Get().device_mem_class = MemoryClass::HOST;
187}
188
189void Device::Configure(const std::string &device, const int device_id)
190{
191 // If a device was configured via the environment, skip the configuration,
192 // and avoid the 'singleton_device' to destroy the mm.
193 if (device_env)
194 {
195 std::memcpy((void*)this, &Get(), sizeof(Device));
196 Get().destroy_mm = false;
197 return;
198 }
199
200 std::map<std::string, Backend::Id> bmap;
201 for (int i = 0; i < Backend::NUM_BACKENDS; i++)
202 {
203 bmap[internal::backend_name[i]] = internal::backend_list[i];
204 }
205 // auto-detect GPU configurations
206 // assumes only one of HIP or CUDA are available
207#ifdef MFEM_USE_HIP
208 bmap["gpu"] = Backend::HIP;
209#ifdef MFEM_USE_RAJA
210 bmap["raja-gpu"] = Backend::RAJA_HIP;
211#endif
212#ifdef MFEM_USE_CEED
213 bmap["ceed-gpu"] = Backend::CEED_HIP;
214#endif
215 // no OCCA+HIP?
216#elif defined(MFEM_USE_CUDA)
217 bmap["gpu"] = Backend::CUDA;
218#ifdef MFEM_USE_RAJA
219 bmap["raja-gpu"] = Backend::RAJA_CUDA;
220#endif
221#ifdef MFEM_USE_CEED
222 bmap["ceed-gpu"] = Backend::CEED_CUDA;
223#endif
224#ifdef MFEM_USE_OCCA
225 bmap["occa-gpu"] = Backend::OCCA_CUDA;
226#endif
227#endif
228 std::string device_option;
229 std::string::size_type beg = 0, end;
230 while (1)
231 {
232 end = device.find(',', beg);
233 end = (end != std::string::npos) ? end : device.size();
234 const std::string bname = device.substr(beg, end - beg);
235 const auto option = bname.find(':');
236 const std::string backend = (option != std::string::npos) ?
237 bname.substr(0, option) : bname;
238 const auto it = bmap.find(backend);
239 MFEM_VERIFY(it != bmap.end(), "Invalid backend name: '" << backend << '\'');
240 Get().MarkBackend(it->second);
241 if (option != std::string::npos)
242 {
243 device_option += bname.substr(option);
244 }
245 if (end == device.size()) { break; }
246 beg = end + 1;
247 }
248
249 // OCCA_CUDA and CEED_CUDA need CUDA or RAJA_CUDA:
252 {
253 Get().MarkBackend(Backend::CUDA);
254 }
255 // CEED_HIP needs HIP:
257 {
258 Get().MarkBackend(Backend::HIP);
259 }
260 // OCCA_OMP will use OMP or RAJA_OMP unless MFEM_USE_OPENMP=NO:
261#ifdef MFEM_USE_OPENMP
263 {
264 Get().MarkBackend(Backend::OMP);
265 }
266#endif
267
268 // Perform setup.
269 Get().Setup(device_option, device_id);
270
271 // Configure the host/device MemoryType/MemoryClass.
272 Get().UpdateMemoryTypeAndClass(device_option);
273
274 // Copy all data members from the global 'singleton_device' into '*this'.
275 if (this != &Get()) { std::memcpy((void*)this, &Get(), sizeof(Device)); }
276
277 // Only '*this' will call the MemoryManager::Destroy() method.
278 destroy_mm = true;
279
280#ifdef MFEM_USE_MPI
281#if defined(HYPRE_USING_GPU) && (MFEM_HYPRE_VERSION >= 23100)
282 // Skip the call to Hypre::InitDevice() if HYPRE is not initialized, e.g.
283 // * if running a serial code
284 // * if running with the environment variable MFEM_DEVICE set.
285 if (HYPRE_Initialized())
286 {
288 }
289#endif
290#endif
291}
292
293// static method
295{
296 // If the device and/or the MemoryTypes are configured through the
297 // environment (variables 'MFEM_DEVICE', 'MFEM_MEMORY'), ignore calls to this
298 // method.
299 if (mem_host_env || mem_device_env || device_env) { return; }
300
301 MFEM_VERIFY(!IsConfigured(), "the default MemoryTypes can only be set before"
302 " Device construction and configuration");
303 MFEM_VERIFY(IsHostMemory(h_mt),
304 "invalid host MemoryType, h_mt = " << (int)h_mt);
305 MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == h_mt,
306 "invalid device MemoryType, d_mt = " << (int)d_mt
307 << " (h_mt = " << (int)h_mt << ')');
308
309 Get().host_mem_type = h_mt;
310 Get().device_mem_type = d_mt;
311 mem_types_set = true;
312
313 // h_mt and d_mt will be set as dual to each other during configuration by
314 // the call mm.Configure(...) in UpdateMemoryTypeAndClass()
315}
316
317void Device::Print(std::ostream &os)
318{
319 os << "Device configuration: ";
320 bool add_comma = false;
321 for (int i = 0; i < Backend::NUM_BACKENDS; i++)
322 {
323 if (backends & internal::backend_list[i])
324 {
325 if (add_comma) { os << ','; }
326 add_comma = true;
327 os << internal::backend_name[i];
328 }
329 }
330 os << '\n';
331#ifdef MFEM_USE_CEED
333 {
334 const char *ceed_backend;
335 CeedGetResource(internal::ceed, &ceed_backend);
336 os << "libCEED backend: " << ceed_backend << '\n';
337 }
338#endif
339 os << "Memory configuration: "
340 << MemoryTypeName[static_cast<int>(host_mem_type)];
342 {
343 os << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];
344 }
345#ifdef MFEM_USE_MPI
348 {
349 os << "\nUse GPU-aware MPI: " << (GetGPUAwareMPI() ? "yes" : "no");
350 }
351#endif
352 os << std::endl;
353}
354
355void Device::UpdateMemoryTypeAndClass(const std::string &device_option)
356{
357 const bool debug = Device::Allows(Backend::DEBUG_DEVICE);
358 const bool device = Device::Allows(Backend::DEVICE_MASK);
359
360#ifdef MFEM_USE_UMPIRE
361 // If MFEM has been compiled with Umpire support, use it as the default
362 if (!mem_host_env && !mem_types_set)
363 {
364 host_mem_type = MemoryType::HOST_UMPIRE;
365 if (!mem_device_env)
366 {
367 device_mem_type = MemoryType::HOST_UMPIRE;
368 }
369 }
370#endif
371
372 // Enable the device memory type
373 if (device)
374 {
375 if (!mem_device_env)
376 {
377 if (mem_host_env)
378 {
379 switch (host_mem_type)
380 {
382 device_mem_type = MemoryType::DEVICE_UMPIRE;
383 break;
385 device_mem_type = MemoryType::DEVICE_DEBUG;
386 break;
387 default:
388 device_mem_type = MemoryType::DEVICE;
389 }
390 }
391 else if (!mem_types_set)
392 {
393#ifndef MFEM_USE_UMPIRE
394 device_mem_type = MemoryType::DEVICE;
395#else
396 device_mem_type = MemoryType::DEVICE_UMPIRE;
397#endif
398 }
399 }
400 device_mem_class = MemoryClass::DEVICE;
401 }
402
403 // Enable the UVM shortcut when requested
404 if (device && device_option.find(":uvm") != std::string::npos)
405 {
406 host_mem_type = MemoryType::MANAGED;
407 device_mem_type = MemoryType::MANAGED;
408 }
409
410 // Enable the DEBUG mode when requested
411 if (debug)
412 {
413 host_mem_type = MemoryType::HOST_DEBUG;
414 device_mem_type = MemoryType::DEVICE_DEBUG;
415 }
416
417 MFEM_VERIFY(!device || IsDeviceMemory(device_mem_type),
418 "invalid device memory configuration!");
419
420 // Update the memory manager with the new settings
421 mm.Configure(host_mem_type, device_mem_type);
422}
423
424// static method
426{
427 if (Get().ngpu >= 0) { return Get().ngpu; }
428#if defined(MFEM_USE_CUDA)
429 return CuGetDeviceCount();
430#elif defined(MFEM_USE_HIP)
431 int ngpu;
432 MFEM_GPU_CHECK(hipGetDeviceCount(&ngpu));
433 return ngpu;
434#else
435 MFEM_ABORT("Unable to query number of available devices without"
436 " MFEM_USE_CUDA or MFEM_USE_HIP!");
437 return -1;
438#endif
439}
440
441static void CudaDeviceSetup(const int dev, int &ngpu)
442{
443#ifdef MFEM_USE_CUDA
444 ngpu = CuGetDeviceCount();
445 MFEM_VERIFY(ngpu > 0, "No CUDA device found!");
446 MFEM_GPU_CHECK(cudaSetDevice(dev));
447#else
448 MFEM_CONTRACT_VAR(dev);
449 MFEM_CONTRACT_VAR(ngpu);
450#endif
451}
452
453static void HipDeviceSetup(const int dev, int &ngpu)
454{
455#ifdef MFEM_USE_HIP
456 MFEM_GPU_CHECK(hipGetDeviceCount(&ngpu));
457 MFEM_VERIFY(ngpu > 0, "No HIP device found!");
458 MFEM_GPU_CHECK(hipSetDevice(dev));
459#else
460 MFEM_CONTRACT_VAR(dev);
461 MFEM_CONTRACT_VAR(ngpu);
462#endif
463}
464
465static void RajaDeviceSetup(const int dev, int &ngpu)
466{
467#ifdef MFEM_USE_CUDA
468 CudaDeviceSetup(dev, ngpu);
469#elif defined(MFEM_USE_HIP)
470 HipDeviceSetup(dev, ngpu);
471#else
472 MFEM_CONTRACT_VAR(dev);
473 MFEM_CONTRACT_VAR(ngpu);
474#endif
475}
476
477static void OccaDeviceSetup(const int dev)
478{
479#ifdef MFEM_USE_OCCA
480 const int cpu = Device::Allows(Backend::OCCA_CPU);
481 const int omp = Device::Allows(Backend::OCCA_OMP);
482 const int cuda = Device::Allows(Backend::OCCA_CUDA);
483 if (cpu + omp + cuda > 1)
484 {
485 MFEM_ABORT("Only one OCCA backend can be configured at a time!");
486 }
487 if (cuda)
488 {
489#if OCCA_CUDA_ENABLED
490 std::string mode("mode: 'CUDA', device_id : ");
491 internal::occaDevice.setup(mode.append(1,'0'+dev));
492#else
493 MFEM_ABORT("the OCCA CUDA backend requires OCCA built with CUDA!");
494#endif
495 }
496 else if (omp)
497 {
498#if OCCA_OPENMP_ENABLED
499 internal::occaDevice.setup("mode: 'OpenMP'");
500#else
501 MFEM_ABORT("the OCCA OpenMP backend requires OCCA built with OpenMP!");
502#endif
503 }
504 else
505 {
506 internal::occaDevice.setup("mode: 'Serial'");
507 }
508
509 std::string mfemDir;
510 if (occa::io::exists(MFEM_INSTALL_DIR "/include/mfem/"))
511 {
512 mfemDir = MFEM_INSTALL_DIR "/include/mfem/";
513 }
514 else if (occa::io::exists(MFEM_SOURCE_DIR))
515 {
516 mfemDir = MFEM_SOURCE_DIR;
517 }
518 else
519 {
520 MFEM_ABORT("Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");
521 }
522
523 occa::io::addLibraryPath("mfem", mfemDir);
524 occa::loadKernels("mfem");
525#else
526 MFEM_CONTRACT_VAR(dev);
527 MFEM_ABORT("the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");
528#endif
529}
530
531static void CeedDeviceSetup(const char* ceed_spec)
532{
533#ifdef MFEM_USE_CEED
534 CeedInit(ceed_spec, &internal::ceed);
535 const char *ceed_backend;
536 CeedGetResource(internal::ceed, &ceed_backend);
537 if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec, "/cpu/self") &&
538 strcmp(ceed_spec, "/gpu/hip"))
539 {
540 mfem::out << std::endl << "WARNING!!!\n"
541 "libCEED is not using the requested backend!!!\n"
542 "WARNING!!!\n" << std::endl;
543 }
544#ifdef MFEM_DEBUG
545 CeedSetErrorHandler(internal::ceed, CeedErrorStore);
546#endif
547#else
548 MFEM_CONTRACT_VAR(ceed_spec);
549#endif
550}
551
552void Device::Setup(const std::string &device_option, const int device_id)
553{
554 MFEM_VERIFY(ngpu == -1, "the mfem::Device is already configured!");
555
556 ngpu = 0;
557 dev = device_id;
558#ifndef MFEM_USE_CUDA
559 MFEM_VERIFY(!Allows(Backend::CUDA_MASK),
560 "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");
561#endif
562#ifndef MFEM_USE_HIP
563 MFEM_VERIFY(!Allows(Backend::HIP_MASK),
564 "the HIP backends require MFEM built with MFEM_USE_HIP=YES");
565#endif
566#ifndef MFEM_USE_RAJA
567 MFEM_VERIFY(!Allows(Backend::RAJA_MASK),
568 "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");
569#endif
570#ifndef MFEM_USE_OPENMP
572 "the OpenMP and RAJA OpenMP backends require MFEM built with"
573 " MFEM_USE_OPENMP=YES");
574#endif
575#ifndef MFEM_USE_CEED
576 MFEM_VERIFY(!Allows(Backend::CEED_MASK),
577 "the CEED backends require MFEM built with MFEM_USE_CEED=YES");
578#endif
579 if (Allows(Backend::CUDA)) { CudaDeviceSetup(dev, ngpu); }
580 if (Allows(Backend::HIP)) { HipDeviceSetup(dev, ngpu); }
582 { RajaDeviceSetup(dev, ngpu); }
583 // The check for MFEM_USE_OCCA is in the function OccaDeviceSetup().
584 if (Allows(Backend::OCCA_MASK)) { OccaDeviceSetup(dev); }
586 {
587 int ceed_cpu = Allows(Backend::CEED_CPU);
588 int ceed_cuda = Allows(Backend::CEED_CUDA);
589 int ceed_hip = Allows(Backend::CEED_HIP);
590 MFEM_VERIFY(ceed_cpu + ceed_cuda + ceed_hip == 1,
591 "Only one CEED backend can be enabled at a time!");
592
593 // NOTE: libCEED's /gpu/cuda/gen and /gpu/hip/gen backends are non-
594 // deterministic!
595 const char *ceed_spec_search =
596 Allows(Backend::CEED_CPU) ? ":/cpu/self" :
597 (Allows(Backend::CEED_CUDA) ? ":/gpu/cuda" :
598 (Allows(Backend::CEED_HIP) ? ":/gpu/hip" : ""));
599 const char *ceed_spec_default =
600 Allows(Backend::CEED_CPU) ? "/cpu/self" :
601 (Allows(Backend::CEED_CUDA) ? "/gpu/cuda/gen" :
602 (Allows(Backend::CEED_HIP) ? "/gpu/hip/gen" : ""));
603 std::string::size_type beg = device_option.find(ceed_spec_search), end;
604 if (beg == std::string::npos)
605 {
606 CeedDeviceSetup(ceed_spec_default);
607 }
608 else
609 {
610 end = device_option.find(':', beg + 1);
611 end = (end != std::string::npos) ? end : device_option.size();
612 CeedDeviceSetup(device_option.substr(beg + 1, end - beg - 1).c_str());
613 }
614 }
615 if (Allows(Backend::DEBUG_DEVICE)) { ngpu = 1; }
616}
617
619{
620 // from HYPRE's hypre_GetPointerLocation
622#if defined(MFEM_USE_CUDA)
623 struct cudaPointerAttributes attr;
624
625#if (CUDART_VERSION >= 11000)
626 MFEM_GPU_CHECK(cudaPointerGetAttributes(&attr, ptr));
627#else
628 cudaPointerGetAttributes(&attr, ptr);
629 if (err != cudaSuccess)
630 {
631 /* clear the error */
632 cudaGetLastError();
633 }
634#endif
635 switch (attr.type)
636 {
637 case cudaMemoryTypeUnregistered:
638 // host
639 break;
640 case cudaMemoryTypeHost:
642 break;
643 case cudaMemoryTypeDevice:
644 res = MemoryType::DEVICE;
645 break;
646 case cudaMemoryTypeManaged:
648 break;
649 }
650
651#elif defined(MFEM_USE_HIP)
652 struct hipPointerAttribute_t attr;
653
654 hipError_t err = hipPointerGetAttributes(&attr, ptr);
655 if (err != hipSuccess)
656 {
657 if (err == hipErrorInvalidValue)
658 {
659 // host memory
660 /* clear the error */
661 hipGetLastError();
662 }
663 else
664 {
665 MFEM_GPU_CHECK(err);
666 }
667 }
668 else if (attr.isManaged)
669 {
671 }
672#if (HIP_VERSION_MAJOR >= 6)
673 else if (attr.type == hipMemoryTypeDevice)
674#else // (HIP_VERSION_MAJOR < 6)
675 else if (attr.memoryType == hipMemoryTypeDevice)
676#endif // (HIP_VERSION_MAJOR >= 6)
677 {
678 res = MemoryType::DEVICE;
679 }
680#if (HIP_VERSION_MAJOR >= 6)
681 else if (attr.type == hipMemoryTypeHost)
682#else // (HIP_VERSION_MAJOR < 6)
683 else if (attr.memoryType == hipMemoryTypeHost)
684#endif // (HIP_VERSION_MAJOR >= 6)
685 {
687 }
688#if (HIP_VERSION_MAJOR >= 6)
689 else if (attr.type == hipMemoryTypeUnregistered)
690 {
691 // host memory
692 }
693#endif
694#else
695 MFEM_CONTRACT_VAR(ptr);
696#endif
697 return res;
698}
699
700void Device::DeviceMem(size_t *free, size_t *total)
701{
702#if defined(MFEM_USE_CUDA)
703 cudaMemGetInfo(free, total);
704#elif defined(MFEM_USE_HIP)
705 hipMemGetInfo(free, total);
706#else
707 // not compiled with GPU support
708 if (free)
709 {
710 *free = 0;
711 }
712 if (*total)
713 {
714 *total = 0;
715 }
716#endif
717}
718
720{
721#if defined(MFEM_USE_CUDA)
722 int res;
723 cudaDeviceGetAttribute(&res, cudaDevAttrMultiProcessorCount, dev);
724 return res;
725#elif defined(MFEM_USE_HIP)
726 int res;
727 hipDeviceGetAttribute(&res, hipDeviceAttributeMultiprocessorCount, dev);
728 return res;
729#else
730 // not compiled with GPU support
731 MFEM_CONTRACT_VAR(dev);
732 return 0;
733#endif
734}
735
737{
738 int dev = 0;
739#if defined(MFEM_USE_CUDA)
740 cudaGetDevice(&dev);
741#elif defined(MFEM_USE_HIP)
742 hipGetDevice(&dev);
743#endif
744 return NumMultiprocessors(dev);
745}
746
748{
749#if defined(MFEM_USE_CUDA)
750 int res;
751 cudaDeviceGetAttribute(&res, cudaDevAttrWarpSize, dev);
752 return res;
753#elif defined(MFEM_USE_HIP)
754 int res;
755 hipDeviceGetAttribute(&res, hipDeviceAttributeWarpSize, dev);
756 return res;
757#else
758 // not compiled with GPU support
759 MFEM_CONTRACT_VAR(dev);
760 return 0;
761#endif
762}
763
765{
766 int dev = 0;
767#if defined(MFEM_USE_CUDA)
768 cudaGetDevice(&dev);
769#elif defined(MFEM_USE_HIP)
770 hipGetDevice(&dev);
771#endif
772 return WarpSize(dev);
773}
774
775} // namespace mfem
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition device.hpp:124
~Device()
Destructor.
Definition device.cpp:156
static void DeviceMem(size_t *free, size_t *total)
Gets the free and total memory on the device.
Definition device.cpp:700
void Configure(const std::string &device, const int device_id=0)
Configure the Device backends.
Definition device.cpp:189
static void SetGPUAwareMPI(const bool force=true)
Manually set the status of GPU-aware MPI flag for use in MPI communication routines which have optimi...
Definition device.hpp:294
static int NumMultiprocessors()
Same as NumMultiprocessors(int), for the currently active device.
Definition device.cpp:736
static bool IsConfigured()
Return true if Configure() has been called previously.
Definition device.hpp:241
static MemoryType QueryMemoryType(const void *ptr)
Definition device.cpp:618
void Print(std::ostream &os=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition device.cpp:317
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:262
static void SetMemoryTypes(MemoryType h_mt, MemoryType d_mt)
Set the default host and device MemoryTypes, h_mt and d_mt.
Definition device.cpp:294
static bool GetGPUAwareMPI()
Get the status of GPU-aware MPI flag.
Definition device.hpp:298
static int WarpSize()
Same as WarpSize(int), for the currently active device.
Definition device.cpp:764
static int GetDeviceCount()
Get the number of available devices (may be called before configuration).
Definition device.cpp:425
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used.
Definition device.cpp:74
static void InitDevice()
Configure HYPRE's compute and memory policy.
Definition hypre.cpp:50
static void Finalize()
Finalize hypre (called automatically at program exit if Hypre::Init() has been called).
Definition hypre.cpp:75
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
void Destroy()
Free all the device memories.
static bool IsFinalized()
Return true if MPI has been finalized.
static bool IsInitialized()
Return true if MPI has been initialized.
std::unordered_map< const BasisKey, CeedBasis, BasisHash > BasisMap
Definition util.hpp:144
std::unordered_map< const RestrKey, CeedElemRestriction, RestrHash > RestrMap
Definition util.hpp:165
MFEM_HOST_DEVICE tensor< T, n, n > dev(const tensor< T, n, n > &A)
Calculates the deviator of a matrix (rank-2 tensor)
Definition tensor.hpp:1354
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
const char * GetEnv(const char *name)
Wrapper for std::getenv.
Definition globals.cpp:79
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
MemoryManager mm
The (single) global memory manager object.
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition cuda.cpp:185
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition globals.hpp:71
MemoryType
Memory types supported by MFEM.
@ HOST_32
Host memory; aligned at 32 bytes.
@ HOST_64
Host memory; aligned at 64 bytes.
@ HOST
Host memory; using new[] and delete[].
@ HOST_PINNED
Host memory: pinned (page-locked)
@ HOST_DEBUG
Host memory; allocated from a "host-debug" pool.
@ DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition device.hpp:34
@ RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES.
Definition device.hpp:48
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition device.hpp:51
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
Definition device.hpp:78
@ RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES.
Definition device.hpp:45
@ OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition device.hpp:38
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition device.hpp:42
@ OCCA_OMP
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition device.hpp:59
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
Definition device.hpp:54
@ OCCA_CUDA
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition device.hpp:62
@ CEED_CPU
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers....
Definition device.hpp:65
@ OCCA_CPU
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES.
Definition device.hpp:57
@ CEED_CUDA
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition device.hpp:69
@ CPU
[host] Default CPU backend: sequential execution on each MPI rank.
Definition device.hpp:36
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition device.hpp:40
@ CEED_HIP
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
Definition device.hpp:72
@ RAJA_MASK
Biwise-OR of all RAJA backends.
Definition device.hpp:101
@ DEVICE_MASK
Biwise-OR of all device backends.
Definition device.hpp:99
@ CEED_MASK
Bitwise-OR of all CEED backends.
Definition device.hpp:97
@ OCCA_MASK
Biwise-OR of all OCCA backends.
Definition device.hpp:103
@ HIP_MASK
Biwise-OR of all HIP backends.
Definition device.hpp:93
@ NUM_BACKENDS
Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
Definition device.hpp:86
@ CUDA_MASK
Biwise-OR of all CUDA backends.
Definition device.hpp:91