MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
device.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "forall.hpp"
13#include "occa.hpp"
14#ifdef MFEM_USE_CEED
16#endif
17#ifdef MFEM_USE_MPI
18#include "../linalg/hypre.hpp"
19#endif
20
21#include <unordered_map>
22#include <string>
23#include <map>
24
25namespace mfem
26{
27
28// Place the following variables in the mfem::internal namespace, so that they
29// will not be included in the doxygen documentation.
30namespace internal
31{
32
33#ifdef MFEM_USE_OCCA
34// Default occa::device used by MFEM.
35occa::device occaDevice;
36#endif
37
38#ifdef MFEM_USE_CEED
39Ceed ceed = NULL;
40
41ceed::BasisMap ceed_basis_map;
42ceed::RestrMap ceed_restr_map;
43#endif
44
45// Backends listed by priority, high to low:
46static const Backend::Id backend_list[Backend::NUM_BACKENDS] =
47{
52};
53
54// Backend names listed by priority, high to low:
55static const char *backend_name[Backend::NUM_BACKENDS] =
56{
57 "ceed-cuda", "occa-cuda", "raja-cuda", "cuda",
58 "ceed-hip", "raja-hip", "hip", "debug",
59 "occa-omp", "raja-omp", "omp",
60 "ceed-cpu", "occa-cpu", "raja-cpu", "cpu"
61};
62
63} // namespace mfem::internal
64
65
66// Initialize the unique global Device variable.
67Device Device::device_singleton;
68bool Device::device_env = false;
69bool Device::mem_host_env = false;
70bool Device::mem_device_env = false;
71bool Device::mem_types_set = false;
72
74{
75 if (GetEnv("MFEM_MEMORY") && !mem_host_env && !mem_device_env)
76 {
77 std::string mem_backend(GetEnv("MFEM_MEMORY"));
78 if (mem_backend == "host")
79 {
80 mem_host_env = true;
81 host_mem_type = MemoryType::HOST;
82 device_mem_type = MemoryType::HOST;
83 }
84 else if (mem_backend == "host32")
85 {
86 mem_host_env = true;
87 host_mem_type = MemoryType::HOST_32;
88 device_mem_type = MemoryType::HOST_32;
89 }
90 else if (mem_backend == "host64")
91 {
92 mem_host_env = true;
93 host_mem_type = MemoryType::HOST_64;
94 device_mem_type = MemoryType::HOST_64;
95 }
96 else if (mem_backend == "umpire")
97 {
98 mem_host_env = true;
99 host_mem_type = MemoryType::HOST_UMPIRE;
100 // Note: device_mem_type will be set to MemoryType::DEVICE_UMPIRE only
101 // when an actual device is configured -- this is done later in
102 // Device::UpdateMemoryTypeAndClass().
103 device_mem_type = MemoryType::HOST_UMPIRE;
104 }
105 else if (mem_backend == "debug")
106 {
107 mem_host_env = true;
108 host_mem_type = MemoryType::HOST_DEBUG;
109 // Note: device_mem_type will be set to MemoryType::DEVICE_DEBUG only
110 // when an actual device is configured -- this is done later in
111 // Device::UpdateMemoryTypeAndClass().
112 device_mem_type = MemoryType::HOST_DEBUG;
113 }
114 else if (false
115#ifdef MFEM_USE_CUDA
116 || mem_backend == "cuda"
117#endif
118#ifdef MFEM_USE_HIP
119 || mem_backend == "hip"
120#endif
121 )
122 {
123 mem_host_env = true;
124 host_mem_type = MemoryType::HOST;
125 mem_device_env = true;
126 device_mem_type = MemoryType::DEVICE;
127 }
128 else if (mem_backend == "uvm")
129 {
130 mem_host_env = true;
131 mem_device_env = true;
132 host_mem_type = MemoryType::MANAGED;
133 device_mem_type = MemoryType::MANAGED;
134 }
135 else
136 {
137 MFEM_ABORT("Unknown memory backend!");
138 }
139 mm.Configure(host_mem_type, device_mem_type);
140 }
141
142 if (GetEnv("MFEM_DEVICE"))
143 {
144 std::string device(GetEnv("MFEM_DEVICE"));
145 Configure(device);
146 device_env = true;
147 }
148}
149
150
152{
153#ifdef MFEM_USE_MPI
155#endif
156 if ( device_env && !destroy_mm) { return; }
157 if (!device_env && destroy_mm && !mem_host_env)
158 {
159 free(device_option);
160#ifdef MFEM_USE_CEED
161 // Destroy FES -> CeedBasis, CeedElemRestriction hash table contents
162 for (auto entry : internal::ceed_basis_map)
163 {
164 CeedBasisDestroy(&entry.second);
165 }
166 internal::ceed_basis_map.clear();
167 for (auto entry : internal::ceed_restr_map)
168 {
169 CeedElemRestrictionDestroy(&entry.second);
170 }
171 internal::ceed_restr_map.clear();
172 // Destroy Ceed context
173 CeedDestroy(&internal::ceed);
174#endif
175 mm.Destroy();
176 }
177 Get().ngpu = -1;
178 Get().mode = SEQUENTIAL;
179 Get().backends = Backend::CPU;
180 Get().host_mem_type = MemoryType::HOST;
181 Get().host_mem_class = MemoryClass::HOST;
182 Get().device_mem_type = MemoryType::HOST;
183 Get().device_mem_class = MemoryClass::HOST;
184}
185
186void Device::Configure(const std::string &device, const int device_id)
187{
188 // If a device was configured via the environment, skip the configuration,
189 // and avoid the 'singleton_device' to destroy the mm.
190 if (device_env)
191 {
192 std::memcpy(this, &Get(), sizeof(Device));
193 Get().destroy_mm = false;
194 return;
195 }
196
197 std::map<std::string, Backend::Id> bmap;
198 for (int i = 0; i < Backend::NUM_BACKENDS; i++)
199 {
200 bmap[internal::backend_name[i]] = internal::backend_list[i];
201 }
202 std::string::size_type beg = 0, end, option;
203 while (1)
204 {
205 end = device.find(',', beg);
206 end = (end != std::string::npos) ? end : device.size();
207 const std::string bname = device.substr(beg, end - beg);
208 option = bname.find(':');
209 if (option==std::string::npos) // No option
210 {
211 const std::string backend = bname;
212 std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
213 MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');
214 Get().MarkBackend(it->second);
215 }
216 else
217 {
218 const std::string backend = bname.substr(0, option);
219 const std::string boption = bname.substr(option+1);
220 Get().device_option = strdup(boption.c_str());
221 std::map<std::string, Backend::Id>::iterator it = bmap.find(backend);
222 MFEM_VERIFY(it != bmap.end(), "invalid backend name: '" << backend << '\'');
223 Get().MarkBackend(it->second);
224 }
225 if (end == device.size()) { break; }
226 beg = end + 1;
227 }
228
229 // OCCA_CUDA and CEED_CUDA need CUDA or RAJA_CUDA:
232 {
233 Get().MarkBackend(Backend::CUDA);
234 }
235 // CEED_HIP needs HIP:
237 {
238 Get().MarkBackend(Backend::HIP);
239 }
240 // OCCA_OMP will use OMP or RAJA_OMP unless MFEM_USE_OPENMP=NO:
241#ifdef MFEM_USE_OPENMP
243 {
244 Get().MarkBackend(Backend::OMP);
245 }
246#endif
247
248 // Perform setup.
249 Get().Setup(device_id);
250
251 // Enable the device
252 Enable();
253
254 // Copy all data members from the global 'singleton_device' into '*this'.
255 if (this != &Get()) { std::memcpy(this, &Get(), sizeof(Device)); }
256
257 // Only '*this' will call the MemoryManager::Destroy() method.
258 destroy_mm = true;
259
260#ifdef MFEM_USE_MPI
261#if defined(HYPRE_USING_GPU) && (MFEM_HYPRE_VERSION >= 23100)
262 // Skip the call to Hypre::InitDevice() if HYPRE is not initialized, e.g.
263 // * if running a serial code
264 // * if running with the environment variable MFEM_DEVICE set.
265 if (HYPRE_Initialized())
266 {
268 }
269#endif
270#endif
271}
272
273// static method
275{
276 // If the device and/or the MemoryTypes are configured through the
277 // environment (variables 'MFEM_DEVICE', 'MFEM_MEMORY'), ignore calls to this
278 // method.
279 if (mem_host_env || mem_device_env || device_env) { return; }
280
281 MFEM_VERIFY(!IsConfigured(), "the default MemoryTypes can only be set before"
282 " Device construction and configuration");
283 MFEM_VERIFY(IsHostMemory(h_mt),
284 "invalid host MemoryType, h_mt = " << (int)h_mt);
285 MFEM_VERIFY(IsDeviceMemory(d_mt) || d_mt == h_mt,
286 "invalid device MemoryType, d_mt = " << (int)d_mt
287 << " (h_mt = " << (int)h_mt << ')');
288
289 Get().host_mem_type = h_mt;
290 Get().device_mem_type = d_mt;
291 mem_types_set = true;
292
293 // h_mt and d_mt will be set as dual to each other during configuration by
294 // the call mm.Configure(...) in UpdateMemoryTypeAndClass()
295}
296
297void Device::Print(std::ostream &os)
298{
299 os << "Device configuration: ";
300 bool add_comma = false;
301 for (int i = 0; i < Backend::NUM_BACKENDS; i++)
302 {
303 if (backends & internal::backend_list[i])
304 {
305 if (add_comma) { os << ','; }
306 add_comma = true;
307 os << internal::backend_name[i];
308 }
309 }
310 os << '\n';
311#ifdef MFEM_USE_CEED
313 {
314 const char *ceed_backend;
315 CeedGetResource(internal::ceed, &ceed_backend);
316 os << "libCEED backend: " << ceed_backend << '\n';
317 }
318#endif
319 os << "Memory configuration: "
320 << MemoryTypeName[static_cast<int>(host_mem_type)];
322 {
323 os << ',' << MemoryTypeName[static_cast<int>(device_mem_type)];
324 }
325 os << std::endl;
326}
327
328void Device::UpdateMemoryTypeAndClass()
329{
330 const bool debug = Device::Allows(Backend::DEBUG_DEVICE);
331
332 const bool device = Device::Allows(Backend::DEVICE_MASK);
333
334#ifdef MFEM_USE_UMPIRE
335 // If MFEM has been compiled with Umpire support, use it as the default
336 if (!mem_host_env && !mem_types_set)
337 {
338 host_mem_type = MemoryType::HOST_UMPIRE;
339 if (!mem_device_env)
340 {
341 device_mem_type = MemoryType::HOST_UMPIRE;
342 }
343 }
344#endif
345
346 // Enable the device memory type
347 if (device)
348 {
349 if (!mem_device_env)
350 {
351 if (mem_host_env)
352 {
353 switch (host_mem_type)
354 {
356 device_mem_type = MemoryType::DEVICE_UMPIRE;
357 break;
359 device_mem_type = MemoryType::DEVICE_DEBUG;
360 break;
361 default:
362 device_mem_type = MemoryType::DEVICE;
363 }
364 }
365 else if (!mem_types_set)
366 {
367#ifndef MFEM_USE_UMPIRE
368 device_mem_type = MemoryType::DEVICE;
369#else
370 device_mem_type = MemoryType::DEVICE_UMPIRE;
371#endif
372 }
373 }
374 device_mem_class = MemoryClass::DEVICE;
375 }
376
377 // Enable the UVM shortcut when requested
378 if (device && device_option && !strcmp(device_option, "uvm"))
379 {
380 host_mem_type = MemoryType::MANAGED;
381 device_mem_type = MemoryType::MANAGED;
382 }
383
384 // Enable the DEBUG mode when requested
385 if (debug)
386 {
387 host_mem_type = MemoryType::HOST_DEBUG;
388 device_mem_type = MemoryType::DEVICE_DEBUG;
389 }
390
391 MFEM_VERIFY(!device || IsDeviceMemory(device_mem_type),
392 "invalid device memory configuration!");
393
394 // Update the memory manager with the new settings
395 mm.Configure(host_mem_type, device_mem_type);
396}
397
398void Device::Enable()
399{
400 const bool accelerated = Get().backends & ~(Backend::CPU);
401 if (accelerated) { Get().mode = Device::ACCELERATED;}
402 Get().UpdateMemoryTypeAndClass();
403}
404
405#ifdef MFEM_USE_CUDA
406static void DeviceSetup(const int dev, int &ngpu)
407{
408 ngpu = CuGetDeviceCount();
409 MFEM_VERIFY(ngpu > 0, "No CUDA device found!");
410 MFEM_GPU_CHECK(cudaSetDevice(dev));
411}
412#endif
413
414static void CudaDeviceSetup(const int dev, int &ngpu)
415{
416#ifdef MFEM_USE_CUDA
417 DeviceSetup(dev, ngpu);
418#else
419 MFEM_CONTRACT_VAR(dev);
420 MFEM_CONTRACT_VAR(ngpu);
421#endif
422}
423
424static void HipDeviceSetup(const int dev, int &ngpu)
425{
426#ifdef MFEM_USE_HIP
427 MFEM_GPU_CHECK(hipGetDeviceCount(&ngpu));
428 MFEM_VERIFY(ngpu > 0, "No HIP device found!");
429 MFEM_GPU_CHECK(hipSetDevice(dev));
430#else
431 MFEM_CONTRACT_VAR(dev);
432 MFEM_CONTRACT_VAR(ngpu);
433#endif
434}
435
436static void RajaDeviceSetup(const int dev, int &ngpu)
437{
438#ifdef MFEM_USE_CUDA
439 if (ngpu <= 0) { DeviceSetup(dev, ngpu); }
440#elif defined(MFEM_USE_HIP)
441 HipDeviceSetup(dev, ngpu);
442#else
443 MFEM_CONTRACT_VAR(dev);
444 MFEM_CONTRACT_VAR(ngpu);
445#endif
446}
447
448static void OccaDeviceSetup(const int dev)
449{
450#ifdef MFEM_USE_OCCA
451 const int cpu = Device::Allows(Backend::OCCA_CPU);
452 const int omp = Device::Allows(Backend::OCCA_OMP);
453 const int cuda = Device::Allows(Backend::OCCA_CUDA);
454 if (cpu + omp + cuda > 1)
455 {
456 MFEM_ABORT("Only one OCCA backend can be configured at a time!");
457 }
458 if (cuda)
459 {
460#if OCCA_CUDA_ENABLED
461 std::string mode("mode: 'CUDA', device_id : ");
462 internal::occaDevice.setup(mode.append(1,'0'+dev));
463#else
464 MFEM_ABORT("the OCCA CUDA backend requires OCCA built with CUDA!");
465#endif
466 }
467 else if (omp)
468 {
469#if OCCA_OPENMP_ENABLED
470 internal::occaDevice.setup("mode: 'OpenMP'");
471#else
472 MFEM_ABORT("the OCCA OpenMP backend requires OCCA built with OpenMP!");
473#endif
474 }
475 else
476 {
477 internal::occaDevice.setup("mode: 'Serial'");
478 }
479
480 std::string mfemDir;
481 if (occa::io::exists(MFEM_INSTALL_DIR "/include/mfem/"))
482 {
483 mfemDir = MFEM_INSTALL_DIR "/include/mfem/";
484 }
485 else if (occa::io::exists(MFEM_SOURCE_DIR))
486 {
487 mfemDir = MFEM_SOURCE_DIR;
488 }
489 else
490 {
491 MFEM_ABORT("Cannot find OCCA kernels in MFEM_INSTALL_DIR or MFEM_SOURCE_DIR");
492 }
493
494 occa::io::addLibraryPath("mfem", mfemDir);
495 occa::loadKernels("mfem");
496#else
497 MFEM_CONTRACT_VAR(dev);
498 MFEM_ABORT("the OCCA backends require MFEM built with MFEM_USE_OCCA=YES");
499#endif
500}
501
502static void CeedDeviceSetup(const char* ceed_spec)
503{
504#ifdef MFEM_USE_CEED
505 CeedInit(ceed_spec, &internal::ceed);
506 const char *ceed_backend;
507 CeedGetResource(internal::ceed, &ceed_backend);
508 if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec, "/cpu/self") &&
509 strcmp(ceed_spec, "/gpu/hip"))
510 {
511 mfem::out << std::endl << "WARNING!!!\n"
512 "libCEED is not using the requested backend!!!\n"
513 "WARNING!!!\n" << std::endl;
514 }
515#ifdef MFEM_DEBUG
516 CeedSetErrorHandler(internal::ceed, CeedErrorStore);
517#endif
518#else
519 MFEM_CONTRACT_VAR(ceed_spec);
520#endif
521}
522
523void Device::Setup(const int device_id)
524{
525 MFEM_VERIFY(ngpu == -1, "the mfem::Device is already configured!");
526
527 ngpu = 0;
528 dev = device_id;
529#ifndef MFEM_USE_CUDA
530 MFEM_VERIFY(!Allows(Backend::CUDA_MASK),
531 "the CUDA backends require MFEM built with MFEM_USE_CUDA=YES");
532#endif
533#ifndef MFEM_USE_HIP
534 MFEM_VERIFY(!Allows(Backend::HIP_MASK),
535 "the HIP backends require MFEM built with MFEM_USE_HIP=YES");
536#endif
537#ifndef MFEM_USE_RAJA
538 MFEM_VERIFY(!Allows(Backend::RAJA_MASK),
539 "the RAJA backends require MFEM built with MFEM_USE_RAJA=YES");
540#endif
541#ifndef MFEM_USE_OPENMP
543 "the OpenMP and RAJA OpenMP backends require MFEM built with"
544 " MFEM_USE_OPENMP=YES");
545#endif
546#ifndef MFEM_USE_CEED
547 MFEM_VERIFY(!Allows(Backend::CEED_MASK),
548 "the CEED backends require MFEM built with MFEM_USE_CEED=YES");
549#else
550 int ceed_cpu = Allows(Backend::CEED_CPU);
551 int ceed_cuda = Allows(Backend::CEED_CUDA);
552 int ceed_hip = Allows(Backend::CEED_HIP);
553 MFEM_VERIFY(ceed_cpu + ceed_cuda + ceed_hip <= 1,
554 "Only one CEED backend can be enabled at a time!");
555#endif
556 if (Allows(Backend::CUDA)) { CudaDeviceSetup(dev, ngpu); }
557 if (Allows(Backend::HIP)) { HipDeviceSetup(dev, ngpu); }
559 { RajaDeviceSetup(dev, ngpu); }
560 // The check for MFEM_USE_OCCA is in the function OccaDeviceSetup().
561 if (Allows(Backend::OCCA_MASK)) { OccaDeviceSetup(dev); }
563 {
564 if (!device_option)
565 {
566 CeedDeviceSetup("/cpu/self");
567 }
568 else
569 {
570 CeedDeviceSetup(device_option);
571 }
572 }
574 {
575 if (!device_option)
576 {
577 // NOTE: libCEED's /gpu/cuda/gen backend is non-deterministic!
578 CeedDeviceSetup("/gpu/cuda/gen");
579 }
580 else
581 {
582 CeedDeviceSetup(device_option);
583 }
584 }
586 {
587 if (!device_option)
588 {
589 CeedDeviceSetup("/gpu/hip");
590 }
591 else
592 {
593 CeedDeviceSetup(device_option);
594 }
595 }
596 if (Allows(Backend::DEBUG_DEVICE)) { ngpu = 1; }
597}
598
600{
601 // from HYPRE's hypre_GetPointerLocation
603#if defined(MFEM_USE_CUDA)
604 struct cudaPointerAttributes attr;
605
606#if (CUDART_VERSION >= 11000)
607 MFEM_GPU_CHECK(cudaPointerGetAttributes(&attr, ptr));
608#else
609 cudaPointerGetAttributes(&attr, ptr);
610 if (err != cudaSuccess)
611 {
612 /* clear the error */
613 cudaGetLastError();
614 }
615#endif
616 switch (attr.type)
617 {
618 case cudaMemoryTypeUnregistered:
619 // host
620 break;
621 case cudaMemoryTypeHost:
623 break;
624 case cudaMemoryTypeDevice:
625 res = MemoryType::DEVICE;
626 break;
627 case cudaMemoryTypeManaged:
629 break;
630 }
631
632#elif defined(MFEM_USE_HIP)
633 struct hipPointerAttribute_t attr;
634
635 hipError_t err = hipPointerGetAttributes(&attr, ptr);
636 if (err != hipSuccess)
637 {
638 if (err == hipErrorInvalidValue)
639 {
640 // host memory
641 /* clear the error */
642 hipGetLastError();
643 }
644 else
645 {
646 MFEM_GPU_CHECK(err);
647 }
648 }
649 else if (attr.isManaged)
650 {
652 }
653#if (HIP_VERSION_MAJOR >= 6)
654 else if (attr.type == hipMemoryTypeDevice)
655#else // (HIP_VERSION_MAJOR < 6)
656 else if (attr.memoryType == hipMemoryTypeDevice)
657#endif // (HIP_VERSION_MAJOR >= 6)
658 {
659 res = MemoryType::DEVICE;
660 }
661#if (HIP_VERSION_MAJOR >= 6)
662 else if (attr.type == hipMemoryTypeHost)
663#else // (HIP_VERSION_MAJOR < 6)
664 else if (attr.memoryType == hipMemoryTypeHost)
665#endif // (HIP_VERSION_MAJOR >= 6)
666 {
668 }
669#if (HIP_VERSION_MAJOR >= 6)
670 else if (attr.type == hipMemoryTypeUnregistered)
671 {
672 // host memory
673 }
674#endif
675#endif
676 return res;
677}
678
679void Device::DeviceMem(size_t *free, size_t *total)
680{
681#if defined(MFEM_USE_CUDA)
682 cudaMemGetInfo(free, total);
683#elif defined(MFEM_USE_HIP)
684 hipMemGetInfo(free, total);
685#else
686 // not compiled with GPU support
687 if (free)
688 {
689 *free = 0;
690 }
691 if (*total)
692 {
693 *total = 0;
694 }
695#endif
696}
697
699{
700#if defined(MFEM_USE_CUDA)
701 int res;
702 cudaDeviceGetAttribute(&res, cudaDevAttrMultiProcessorCount, dev);
703 return res;
704#elif defined(MFEM_USE_HIP)
705 int res;
706 hipDeviceGetAttribute(&res, hipDeviceAttributeMultiprocessorCount, dev);
707 return res;
708#else
709 // not compiled with GPU support
710 return 0;
711#endif
712}
713
715{
716 int dev = 0;
717#if defined(MFEM_USE_CUDA)
718 cudaGetDevice(&dev);
719#elif defined(MFEM_USE_HIP)
720 hipGetDevice(&dev);
721#endif
722 return NumMultiprocessors(dev);
723}
724
726{
727#if defined(MFEM_USE_CUDA)
728 int res;
729 cudaDeviceGetAttribute(&res, cudaDevAttrWarpSize, dev);
730 return res;
731#elif defined(MFEM_USE_HIP)
732 int res;
733 hipDeviceGetAttribute(&res, hipDeviceAttributeWarpSize, dev);
734 return res;
735#else
736 // not compiled with GPU support
737 return 0;
738#endif
739}
740
742{
743 int dev = 0;
744#if defined(MFEM_USE_CUDA)
745 cudaGetDevice(&dev);
746#elif defined(MFEM_USE_HIP)
747 hipGetDevice(&dev);
748#endif
749 return WarpSize(dev);
750}
751
752} // mfem
The MFEM Device class abstracts hardware devices such as GPUs, as well as programming models such as ...
Definition device.hpp:123
~Device()
Destructor.
Definition device.cpp:151
static void DeviceMem(size_t *free, size_t *total)
Gets the free and total memory on the device.
Definition device.cpp:679
static MemoryType QueryMemoryType(void *ptr)
Definition device.cpp:599
static int NumMultiprocessors()
Same as NumMultiprocessors(int), for the currently active device.
Definition device.cpp:714
static bool IsConfigured()
Return true if Configure() has been called previously.
Definition device.hpp:241
void Configure(const std::string &device, const int dev=0)
Configure the Device backends.
Definition device.cpp:186
void Print(std::ostream &out=mfem::out)
Print the configuration of the MFEM virtual device object.
Definition device.cpp:297
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259
static void SetMemoryTypes(MemoryType h_mt, MemoryType d_mt)
Set the default host and device MemoryTypes, h_mt and d_mt.
Definition device.cpp:274
static int WarpSize()
Same as WarpSize(int), for the currently active device.
Definition device.cpp:741
Device()
Default constructor. Unless Configure() is called later, the default Backend::CPU will be used.
Definition device.cpp:73
static void InitDevice()
Configure HYPRE's compute and memory policy.
Definition hypre.cpp:50
static void Finalize()
Finalize hypre (called automatically at program exit if Hypre::Init() has been called).
Definition hypre.cpp:75
void Configure(const MemoryType h_mt, const MemoryType d_mt)
Configure the Memory manager with given default host and device types. This method will be called whe...
void Destroy()
Free all the device memories.
std::unordered_map< const BasisKey, CeedBasis, BasisHash > BasisMap
Definition util.hpp:144
std::unordered_map< const RestrKey, CeedElemRestriction, RestrHash > RestrMap
Definition util.hpp:165
bool IsDeviceMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::DEVICE.
const char * GetEnv(const char *name)
Wrapper for std::getenv.
Definition globals.cpp:79
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition globals.hpp:66
MemoryManager mm
The (single) global memory manager object.
int CuGetDeviceCount()
Get the number of CUDA devices.
Definition cuda.cpp:185
bool IsHostMemory(MemoryType mt)
Return true if the given memory type is in MemoryClass::HOST.
const char * MemoryTypeName[MemoryTypeSize]
Memory type names, used during Device:: configuration.
OutStream err(std::cerr)
Global stream used by the library for standard error output. Initially it uses the same std::streambu...
Definition globals.hpp:71
MemoryType
Memory types supported by MFEM.
@ HOST_32
Host memory; aligned at 32 bytes.
@ HOST_64
Host memory; aligned at 64 bytes.
@ HOST
Host memory; using new[] and delete[].
@ HOST_PINNED
Host memory: pinned (page-locked)
@ HOST_DEBUG
Host memory; allocated from a "host-debug" pool.
@ DEVICE
Device memory; using CUDA or HIP *Malloc and *Free.
Id
In the documentation below, we use square brackets to indicate the type of the backend: host or devic...
Definition device.hpp:32
@ RAJA_OMP
[host] RAJA OpenMP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_OPENMP = YES.
Definition device.hpp:46
@ RAJA_CUDA
[device] RAJA CUDA backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_CUDA = YES.
Definition device.hpp:49
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
Definition device.hpp:76
@ RAJA_CPU
[host] RAJA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_RAJA = YES.
Definition device.hpp:43
@ OMP
[host] OpenMP backend. Enabled when MFEM_USE_OPENMP = YES.
Definition device.hpp:36
@ HIP
[device] HIP backend. Enabled when MFEM_USE_HIP = YES.
Definition device.hpp:40
@ OCCA_OMP
[host] OCCA OpenMP backend. Enabled when MFEM_USE_OCCA = YES.
Definition device.hpp:57
@ RAJA_HIP
[device] RAJA HIP backend. Enabled when MFEM_USE_RAJA = YES and MFEM_USE_HIP = YES.
Definition device.hpp:52
@ OCCA_CUDA
[device] OCCA CUDA backend. Enabled when MFEM_USE_OCCA = YES and MFEM_USE_CUDA = YES.
Definition device.hpp:60
@ CEED_CPU
[host] CEED CPU backend. GPU backends can still be used, but with expensive memory transfers....
Definition device.hpp:63
@ OCCA_CPU
[host] OCCA CPU backend: sequential execution on each MPI rank. Enabled when MFEM_USE_OCCA = YES.
Definition device.hpp:55
@ CEED_CUDA
[device] CEED CUDA backend working together with the CUDA backend. Enabled when MFEM_USE_CEED = YES a...
Definition device.hpp:67
@ CPU
[host] Default CPU backend: sequential execution on each MPI rank.
Definition device.hpp:34
@ CUDA
[device] CUDA backend. Enabled when MFEM_USE_CUDA = YES.
Definition device.hpp:38
@ CEED_HIP
[device] CEED HIP backend working together with the HIP backend. Enabled when MFEM_USE_CEED = YES and...
Definition device.hpp:70
@ RAJA_MASK
Biwise-OR of all RAJA backends.
Definition device.hpp:100
@ DEVICE_MASK
Biwise-OR of all device backends.
Definition device.hpp:97
@ CEED_MASK
Bitwise-OR of all CEED backends.
Definition device.hpp:95
@ OCCA_MASK
Biwise-OR of all OCCA backends.
Definition device.hpp:102
@ HIP_MASK
Biwise-OR of all HIP backends.
Definition device.hpp:91
@ NUM_BACKENDS
Number of backends: from (1 << 0) to (1 << (NUM_BACKENDS-1)).
Definition device.hpp:84
@ CUDA_MASK
Biwise-OR of all CUDA backends.
Definition device.hpp:89