MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
vector.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12// Implementation of data type vector
13
14#include "kernels.hpp"
15#include "vector.hpp"
16#include "../general/forall.hpp"
17
18#ifdef MFEM_USE_OPENMP
19#include <omp.h>
20#endif
21
22#include <iostream>
23#include <iomanip>
24#include <cmath>
25#include <ctime>
26#include <limits>
27
28namespace mfem
29{
30
32{
33 const int s = v.Size();
34 size = s;
35 if (s > 0)
36 {
37 MFEM_ASSERT(!v.data.Empty(), "invalid source vector");
39 data.CopyFrom(v.data, s);
40 }
42}
43
45{
46 *this = std::move(v);
47}
48
49void Vector::Load(std::istream **in, int np, int *dim)
50{
51 int i, j, s;
52
53 s = 0;
54 for (i = 0; i < np; i++)
55 {
56 s += dim[i];
57 }
58
59 SetSize(s);
60 HostWrite();
61
62 int p = 0;
63 for (i = 0; i < np; i++)
64 {
65 for (j = 0; j < dim[i]; j++)
66 {
67 *in[i] >> data[p++];
68 // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
69 // so we reset the failbit here.
70 if (!*in[i] && errno == ERANGE)
71 {
72 in[i]->clear();
73 }
74 }
75 }
76}
77
78void Vector::Load(std::istream &in, int Size)
79{
81 HostWrite();
82
83 for (int i = 0; i < size; i++)
84 {
85 in >> data[i];
86 // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
87 // so we reset the failbit here.
88 if (!in && errno == ERANGE)
89 {
90 in.clear();
91 }
92 }
93}
94
96{
97 return operator()(i);
98}
99
100const real_t &Vector::Elem(int i) const
101{
102 return operator()(i);
103}
104
106{
107 real_t dot = 0.0;
108#ifdef MFEM_USE_LEGACY_OPENMP
109 #pragma omp parallel for reduction(+:dot)
110#endif
111 for (int i = 0; i < size; i++)
112 {
113 dot += data[i] * v[i];
114 }
115 return dot;
116}
117
119{
121 return *this;
122}
123
125{
126#if 0
127 SetSize(v.Size(), v.data.GetMemoryType());
128 data.CopyFrom(v.data, v.Size());
129 UseDevice(v.UseDevice());
130#else
131 SetSize(v.Size());
132 bool vuse = v.UseDevice();
133 const bool use_dev = UseDevice() || vuse;
134 v.UseDevice(use_dev);
135 // keep 'data' where it is, unless 'use_dev' is true
136 if (use_dev) { Write(); }
137 data.CopyFrom(v.data, v.Size());
138 v.UseDevice(vuse);
139#endif
140 return *this;
141}
142
144{
145 data = std::move(v.data);
146 // Self-assignment-safe way to move v.size to size:
147 const auto size_tmp = v.size;
148 v.size = 0;
149 size = size_tmp;
150 return *this;
151}
152
154{
155 const bool use_dev = UseDevice();
156 const int N = size;
157 auto y = Write(use_dev);
158 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = value; });
159 return *this;
160}
161
163{
164 const bool use_dev = UseDevice();
165 const int N = size;
166 auto y = ReadWrite(use_dev);
167 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= c; });
168 return *this;
169}
170
172{
173 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
174
175 const bool use_dev = UseDevice() || v.UseDevice();
176 const int N = size;
177 auto y = ReadWrite(use_dev);
178 auto x = v.Read(use_dev);
179 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= x[i]; });
180 return *this;
181}
182
184{
185 const bool use_dev = UseDevice();
186 const int N = size;
187 const real_t m = 1.0/c;
188 auto y = ReadWrite(use_dev);
189 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= m; });
190 return *this;
191}
192
194{
195 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
196
197 const bool use_dev = UseDevice() || v.UseDevice();
198 const int N = size;
199 auto y = ReadWrite(use_dev);
200 auto x = v.Read(use_dev);
201 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] /= x[i]; });
202 return *this;
203}
204
206{
207 const bool use_dev = UseDevice();
208 const int N = size;
209 auto y = ReadWrite(use_dev);
210 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] -= c; });
211 return *this;
212}
213
215{
216 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
217
218 const bool use_dev = UseDevice() || v.UseDevice();
219 const int N = size;
220 auto y = ReadWrite(use_dev);
221 auto x = v.Read(use_dev);
222 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] -= x[i]; });
223 return *this;
224}
225
227{
228 const bool use_dev = UseDevice();
229 const int N = size;
230 auto y = ReadWrite(use_dev);
231 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += c; });
232 return *this;
233}
234
236{
237 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
238
239 const bool use_dev = UseDevice() || v.UseDevice();
240 const int N = size;
241 auto y = ReadWrite(use_dev);
242 auto x = v.Read(use_dev);
243 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += x[i]; });
244 return *this;
245}
246
247Vector &Vector::Add(const real_t a, const Vector &Va)
248{
249 MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
250
251 if (a != 0.0)
252 {
253 const int N = size;
254 const bool use_dev = UseDevice() || Va.UseDevice();
255 auto y = ReadWrite(use_dev);
256 auto x = Va.Read(use_dev);
257 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += a * x[i]; });
258 }
259 return *this;
260}
261
262Vector &Vector::Set(const real_t a, const Vector &Va)
263{
264 MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
265
266 const bool use_dev = UseDevice() || Va.UseDevice();
267 const int N = size;
268 auto x = Va.Read(use_dev);
269 auto y = Write(use_dev);
270 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = a * x[i]; });
271 return *this;
272}
273
274void Vector::SetVector(const Vector &v, int offset)
275{
276 MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
277
278 const int vs = v.Size();
279 const real_t *vp = v.data;
280 real_t *p = data + offset;
281 for (int i = 0; i < vs; i++)
282 {
283 p[i] = vp[i];
284 }
285}
286
287void Vector::AddSubVector(const Vector &v, int offset)
288{
289 MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
290
291 const int vs = v.Size();
292 const real_t *vp = v.data;
293 real_t *p = data + offset;
294 for (int i = 0; i < vs; i++)
295 {
296 p[i] += vp[i];
297 }
298}
299
302 const bool use_dev = UseDevice();
303 const int N = size;
304 auto y = ReadWrite(use_dev);
305 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = -y[i]; });
307
310 const bool use_dev = UseDevice();
311 const int N = size;
312 auto y = ReadWrite(use_dev);
313 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = 1.0/y[i]; });
314}
315
316void add(const Vector &v1, const Vector &v2, Vector &v)
317{
318 MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
319 "incompatible Vectors!");
320
321#if !defined(MFEM_USE_LEGACY_OPENMP)
322 const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
323 const int N = v.size;
324 // Note: get read access first, in case v is the same as v1/v2.
325 auto x1 = v1.Read(use_dev);
326 auto x2 = v2.Read(use_dev);
327 auto y = v.Write(use_dev);
328 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = x1[i] + x2[i]; });
329#else
330 #pragma omp parallel for
331 for (int i = 0; i < v.size; i++)
332 {
333 v.data[i] = v1.data[i] + v2.data[i];
334 }
335#endif
336}
337
338void add(const Vector &v1, real_t alpha, const Vector &v2, Vector &v)
339{
340 MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
341 "incompatible Vectors!");
342
343 if (alpha == 0.0)
344 {
345 v = v1;
346 }
347 else if (alpha == 1.0)
348 {
349 add(v1, v2, v);
350 }
351 else
352 {
353#if !defined(MFEM_USE_LEGACY_OPENMP)
354 const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
355 const int N = v.size;
356 // Note: get read access first, in case v is the same as v1/v2.
357 auto d_x = v1.Read(use_dev);
358 auto d_y = v2.Read(use_dev);
359 auto d_z = v.Write(use_dev);
360 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
361 {
362 d_z[i] = d_x[i] + alpha * d_y[i];
363 });
364#else
365 const real_t *v1p = v1.data, *v2p = v2.data;
366 real_t *vp = v.data;
367 const int s = v.size;
368 #pragma omp parallel for
369 for (int i = 0; i < s; i++)
370 {
371 vp[i] = v1p[i] + alpha*v2p[i];
372 }
373#endif
374 }
375}
376
377void add(const real_t a, const Vector &x, const Vector &y, Vector &z)
378{
379 MFEM_ASSERT(x.size == y.size && x.size == z.size,
380 "incompatible Vectors!");
381
382 if (a == 0.0)
383 {
384 z = 0.0;
385 }
386 else if (a == 1.0)
387 {
388 add(x, y, z);
389 }
390 else
391 {
392#if !defined(MFEM_USE_LEGACY_OPENMP)
393 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
394 const int N = x.size;
395 // Note: get read access first, in case z is the same as x/y.
396 auto xd = x.Read(use_dev);
397 auto yd = y.Read(use_dev);
398 auto zd = z.Write(use_dev);
399 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
400 {
401 zd[i] = a * (xd[i] + yd[i]);
402 });
403#else
404 const real_t *xp = x.data;
405 const real_t *yp = y.data;
406 real_t *zp = z.data;
407 const int s = x.size;
408 #pragma omp parallel for
409 for (int i = 0; i < s; i++)
410 {
411 zp[i] = a * (xp[i] + yp[i]);
412 }
413#endif
414 }
415}
416
417void add(const real_t a, const Vector &x,
418 const real_t b, const Vector &y, Vector &z)
419{
420 MFEM_ASSERT(x.size == y.size && x.size == z.size,
421 "incompatible Vectors!");
422
423 if (a == 0.0)
424 {
425 z.Set(b, y);
426 }
427 else if (b == 0.0)
428 {
429 z.Set(a, x);
430 }
431#if 0
432 else if (a == 1.0)
433 {
434 add(x, b, y, z);
435 }
436 else if (b == 1.0)
437 {
438 add(y, a, x, z);
439 }
440 else if (a == b)
441 {
442 add(a, x, y, z);
443 }
444#endif
445 else
446 {
447#if !defined(MFEM_USE_LEGACY_OPENMP)
448 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
449 const int N = x.size;
450 // Note: get read access first, in case z is the same as x/y.
451 auto xd = x.Read(use_dev);
452 auto yd = y.Read(use_dev);
453 auto zd = z.Write(use_dev);
454 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
455 {
456 zd[i] = a * xd[i] + b * yd[i];
457 });
458#else
459 const real_t *xp = x.data;
460 const real_t *yp = y.data;
461 real_t *zp = z.data;
462 const int s = x.size;
463 #pragma omp parallel for
464 for (int i = 0; i < s; i++)
465 {
466 zp[i] = a * xp[i] + b * yp[i];
467 }
468#endif
469 }
470}
471
472void subtract(const Vector &x, const Vector &y, Vector &z)
473{
474 MFEM_ASSERT(x.size == y.size && x.size == z.size,
475 "incompatible Vectors!");
476
477#if !defined(MFEM_USE_LEGACY_OPENMP)
478 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
479 const int N = x.size;
480 // Note: get read access first, in case z is the same as x/y.
481 auto xd = x.Read(use_dev);
482 auto yd = y.Read(use_dev);
483 auto zd = z.Write(use_dev);
484 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
485 {
486 zd[i] = xd[i] - yd[i];
487 });
488#else
489 const real_t *xp = x.data;
490 const real_t *yp = y.data;
491 real_t *zp = z.data;
492 const int s = x.size;
493 #pragma omp parallel for
494 for (int i = 0; i < s; i++)
495 {
496 zp[i] = xp[i] - yp[i];
497 }
498#endif
499}
500
501void subtract(const real_t a, const Vector &x, const Vector &y, Vector &z)
502{
503 MFEM_ASSERT(x.size == y.size && x.size == z.size,
504 "incompatible Vectors!");
505
506 if (a == 0.)
507 {
508 z = 0.;
509 }
510 else if (a == 1.)
511 {
512 subtract(x, y, z);
513 }
514 else
515 {
516#if !defined(MFEM_USE_LEGACY_OPENMP)
517 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
518 const int N = x.size;
519 // Note: get read access first, in case z is the same as x/y.
520 auto xd = x.Read(use_dev);
521 auto yd = y.Read(use_dev);
522 auto zd = z.Write(use_dev);
523 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
524 {
525 zd[i] = a * (xd[i] - yd[i]);
526 });
527#else
528 const real_t *xp = x.data;
529 const real_t *yp = y.data;
530 real_t *zp = z.data;
531 const int s = x.size;
532 #pragma omp parallel for
533 for (int i = 0; i < s; i++)
534 {
535 zp[i] = a * (xp[i] - yp[i]);
536 }
537#endif
538 }
539}
540
541void Vector::cross3D(const Vector &vin, Vector &vout) const
542{
543 HostRead();
544 vin.HostRead();
545 vout.HostWrite();
546 MFEM_VERIFY(size == 3, "Only 3D vectors supported in cross.");
547 MFEM_VERIFY(vin.Size() == 3, "Only 3D vectors supported in cross.");
548 vout.SetSize(3);
549 vout(0) = data[1]*vin(2)-data[2]*vin(1);
550 vout(1) = data[2]*vin(0)-data[0]*vin(2);
551 vout(2) = data[0]*vin(1)-data[1]*vin(0);
552}
553
554void Vector::median(const Vector &lo, const Vector &hi)
555{
556 MFEM_ASSERT(size == lo.size && size == hi.size,
557 "incompatible Vectors!");
558
559 const bool use_dev = UseDevice() || lo.UseDevice() || hi.UseDevice();
560 const int N = size;
561 // Note: get read access first, in case *this is the same as lo/hi.
562 auto l = lo.Read(use_dev);
563 auto h = hi.Read(use_dev);
564 auto m = Write(use_dev);
565 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
566 {
567 if (m[i] < l[i])
568 {
569 m[i] = l[i];
570 }
571 else if (m[i] > h[i])
572 {
573 m[i] = h[i];
574 }
575 });
576}
577
578void Vector::GetSubVector(const Array<int> &dofs, Vector &elemvect) const
579{
580 const int n = dofs.Size();
581 elemvect.SetSize(n);
582 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
583 auto d_y = elemvect.Write(use_dev);
584 auto d_X = Read(use_dev);
585 auto d_dofs = dofs.Read(use_dev);
586 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
587 {
588 const int dof_i = d_dofs[i];
589 d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
590 });
591}
592
593void Vector::GetSubVector(const Array<int> &dofs, real_t *elem_data) const
594{
596 const int n = dofs.Size();
597 for (int i = 0; i < n; i++)
598 {
599 const int j = dofs[i];
600 elem_data[i] = (j >= 0) ? data[j] : -data[-1-j];
601 }
602}
603
604void Vector::SetSubVector(const Array<int> &dofs, const real_t value)
605{
606 const bool use_dev = dofs.UseDevice();
607 const int n = dofs.Size();
608 // Use read+write access for *this - we only modify some of its entries
609 auto d_X = ReadWrite(use_dev);
610 auto d_dofs = dofs.Read(use_dev);
611 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
612 {
613 const int j = d_dofs[i];
614 if (j >= 0)
615 {
616 d_X[j] = value;
617 }
618 else
619 {
620 d_X[-1-j] = -value;
621 }
622 });
623}
624
625void Vector::SetSubVector(const Array<int> &dofs, const Vector &elemvect)
626{
627 MFEM_ASSERT(dofs.Size() <= elemvect.Size(),
628 "Size mismatch: length of dofs is " << dofs.Size()
629 << ", length of elemvect is " << elemvect.Size());
630
631 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
632 const int n = dofs.Size();
633 // Use read+write access for X - we only modify some of its entries
634 auto d_X = ReadWrite(use_dev);
635 auto d_y = elemvect.Read(use_dev);
636 auto d_dofs = dofs.Read(use_dev);
637 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
638 {
639 const int dof_i = d_dofs[i];
640 if (dof_i >= 0)
641 {
642 d_X[dof_i] = d_y[i];
643 }
644 else
645 {
646 d_X[-1-dof_i] = -d_y[i];
647 }
648 });
649}
650
651void Vector::SetSubVector(const Array<int> &dofs, real_t *elem_data)
652{
653 // Use read+write access because we overwrite only part of the data.
655 const int n = dofs.Size();
656 for (int i = 0; i < n; i++)
657 {
658 const int j= dofs[i];
659 if (j >= 0)
660 {
661 operator()(j) = elem_data[i];
662 }
663 else
664 {
665 operator()(-1-j) = -elem_data[i];
666 }
667 }
668}
669
670void Vector::AddElementVector(const Array<int> &dofs, const Vector &elemvect)
671{
672 MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "
673 "length of dofs is " << dofs.Size() <<
674 ", length of elemvect is " << elemvect.Size());
675
676 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
677 const int n = dofs.Size();
678 auto d_y = elemvect.Read(use_dev);
679 auto d_X = ReadWrite(use_dev);
680 auto d_dofs = dofs.Read(use_dev);
681 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
682 {
683 const int j = d_dofs[i];
684 if (j >= 0)
685 {
686 d_X[j] += d_y[i];
687 }
688 else
689 {
690 d_X[-1-j] -= d_y[i];
691 }
692 });
693}
694
695void Vector::AddElementVector(const Array<int> &dofs, real_t *elem_data)
696{
698 const int n = dofs.Size();
699 for (int i = 0; i < n; i++)
700 {
701 const int j = dofs[i];
702 if (j >= 0)
703 {
704 operator()(j) += elem_data[i];
705 }
706 else
707 {
708 operator()(-1-j) -= elem_data[i];
709 }
710 }
711}
712
714 const Vector &elemvect)
715{
716 MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "
717 "length of dofs is " << dofs.Size() <<
718 ", length of elemvect is " << elemvect.Size());
719
720 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
721 const int n = dofs.Size();
722 auto d_y = ReadWrite(use_dev);
723 auto d_x = elemvect.Read(use_dev);
724 auto d_dofs = dofs.Read(use_dev);
725 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
726 {
727 const int j = d_dofs[i];
728 if (j >= 0)
729 {
730 d_y[j] += a * d_x[i];
731 }
732 else
733 {
734 d_y[-1-j] -= a * d_x[i];
735 }
736 });
737}
738
740{
741 const bool use_dev = UseDevice() || dofs.UseDevice();
742 const int n = dofs.Size();
743 const int N = size;
744 Vector dofs_vals(n, use_dev ?
747 auto d_data = ReadWrite(use_dev);
748 auto d_dofs_vals = dofs_vals.Write(use_dev);
749 auto d_dofs = dofs.Read(use_dev);
750 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i) { d_dofs_vals[i] = d_data[d_dofs[i]]; });
751 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { d_data[i] = val; });
752 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i) { d_data[d_dofs[i]] = d_dofs_vals[i]; });
753}
754
755void Vector::Print(std::ostream &os, int width) const
756{
757 if (!size) { return; }
759 for (int i = 0; 1; )
760 {
761 os << ZeroSubnormal(data[i]);
762 i++;
763 if (i == size)
764 {
765 break;
766 }
767 if ( i % width == 0 )
768 {
769 os << '\n';
770 }
771 else
772 {
773 os << ' ';
774 }
775 }
776 os << '\n';
777}
778
779#ifdef MFEM_USE_ADIOS2
781 const std::string& variable_name) const
782{
783 if (!size) { return; }
785 os.engine.Put(variable_name, &data[0] );
786}
787#endif
788
789void Vector::Print_HYPRE(std::ostream &os) const
790{
791 int i;
792 std::ios::fmtflags old_fmt = os.flags();
793 os.setf(std::ios::scientific);
794 std::streamsize old_prec = os.precision(14);
795
796 os << size << '\n'; // number of rows
797
799 for (i = 0; i < size; i++)
800 {
801 os << ZeroSubnormal(data[i]) << '\n';
802 }
803
804 os.precision(old_prec);
805 os.flags(old_fmt);
806}
807
808void Vector::PrintHash(std::ostream &os) const
809{
810 os << "size: " << size << '\n';
811 HashFunction hf;
813 os << "hash: " << hf.GetHash() << '\n';
814}
815
816void Vector::Randomize(int seed)
817{
818 if (seed == 0)
819 {
820 seed = (int)time(0);
821 }
822
823 srand((unsigned)seed);
824
825 HostWrite();
826 for (int i = 0; i < size; i++)
827 {
828 data[i] = rand_real();
829 }
830}
831
833{
834 // Scale entries of Vector on the fly, using algorithms from
835 // std::hypot() and LAPACK's drm2. This scaling ensures that the
836 // argument of each call to std::pow is <= 1 to avoid overflow.
837 if (0 == size)
838 {
839 return 0.0;
840 } // end if 0 == size
841
843 if (1 == size)
844 {
845 return std::abs(data[0]);
846 } // end if 1 == size
847 return kernels::Norml2(size, (const real_t*) data);
848}
849
851{
852 HostRead();
853 real_t max = 0.0;
854 for (int i = 0; i < size; i++)
855 {
856 max = std::max(std::abs(data[i]), max);
857 }
858 return max;
859}
860
862{
863 HostRead();
864 real_t sum = 0.0;
865 for (int i = 0; i < size; i++)
866 {
867 sum += std::abs(data[i]);
868 }
869 return sum;
870}
871
873{
874 MFEM_ASSERT(p > 0.0, "Vector::Normlp");
875
876 if (p == 1.0)
877 {
878 return Norml1();
879 }
880 if (p == 2.0)
881 {
882 return Norml2();
883 }
884 if (p < infinity())
885 {
886 // Scale entries of Vector on the fly, using algorithms from
887 // std::hypot() and LAPACK's drm2. This scaling ensures that the
888 // argument of each call to std::pow is <= 1 to avoid overflow.
889 if (0 == size)
890 {
891 return 0.0;
892 } // end if 0 == size
893
894 if (1 == size)
895 {
896 return std::abs(data[0]);
897 } // end if 1 == size
898
899 real_t scale = 0.0;
900 real_t sum = 0.0;
901
902 for (int i = 0; i < size; i++)
903 {
904 if (data[i] != 0.0)
905 {
906 const real_t absdata = std::abs(data[i]);
907 if (scale <= absdata)
908 {
909 sum = 1.0 + sum * std::pow(scale / absdata, p);
910 scale = absdata;
911 continue;
912 } // end if scale <= absdata
913 sum += std::pow(absdata / scale, p); // else scale > absdata
914 } // end if data[i] != 0
915 }
916 return scale * std::pow(sum, 1.0/p);
917 } // end if p < infinity()
918
919 return Normlinf(); // else p >= infinity()
920}
921
923{
924 if (size == 0) { return -infinity(); }
925
926 HostRead();
927 real_t max = data[0];
928
929 for (int i = 1; i < size; i++)
930 {
931 if (data[i] > max)
932 {
933 max = data[i];
934 }
935 }
936
937 return max;
938}
939
940#ifdef MFEM_USE_CUDA
941static __global__ void cuKernelMin(const int N, real_t *gdsr, const real_t *x)
942{
943 __shared__ real_t s_min[MFEM_CUDA_BLOCKS];
944 const int n = blockDim.x*blockIdx.x + threadIdx.x;
945 if (n>=N) { return; }
946 const int bid = blockIdx.x;
947 const int tid = threadIdx.x;
948 const int bbd = bid*blockDim.x;
949 const int rid = bbd+tid;
950 s_min[tid] = x[n];
951 for (int workers=blockDim.x>>1; workers>0; workers>>=1)
952 {
953 __syncthreads();
954 if (tid >= workers) { continue; }
955 if (rid >= N) { continue; }
956 const int dualTid = tid + workers;
957 if (dualTid >= N) { continue; }
958 const int rdd = bbd+dualTid;
959 if (rdd >= N) { continue; }
960 if (dualTid >= blockDim.x) { continue; }
961 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
962 }
963 if (tid==0) { gdsr[bid] = s_min[0]; }
964}
965
966static Array<real_t> cuda_reduce_buf;
967
968static real_t cuVectorMin(const int N, const real_t *X)
969{
970 const int tpb = MFEM_CUDA_BLOCKS;
971 const int blockSize = MFEM_CUDA_BLOCKS;
972 const int gridSize = (N+blockSize-1)/blockSize;
973 const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
974 cuda_reduce_buf.SetSize(min_sz);
975 Memory<real_t> &buf = cuda_reduce_buf.GetMemory();
976 real_t *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
977 cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
978 MFEM_GPU_CHECK(cudaGetLastError());
979 const real_t *h_min = buf.Read(MemoryClass::HOST, min_sz);
980 real_t min = std::numeric_limits<real_t>::infinity();
981 for (int i = 0; i < min_sz; i++) { min = std::min(min, h_min[i]); }
982 return min;
983}
984
985static __global__ void cuKernelDot(const int N, real_t *gdsr,
986 const real_t *x, const real_t *y)
987{
988 __shared__ real_t s_dot[MFEM_CUDA_BLOCKS];
989 const int n = blockDim.x*blockIdx.x + threadIdx.x;
990 if (n>=N) { return; }
991 const int bid = blockIdx.x;
992 const int tid = threadIdx.x;
993 const int bbd = bid*blockDim.x;
994 const int rid = bbd+tid;
995 s_dot[tid] = y ? (x[n] * y[n]) : x[n];
996 for (int workers=blockDim.x>>1; workers>0; workers>>=1)
997 {
998 __syncthreads();
999 if (tid >= workers) { continue; }
1000 if (rid >= N) { continue; }
1001 const int dualTid = tid + workers;
1002 if (dualTid >= N) { continue; }
1003 const int rdd = bbd+dualTid;
1004 if (rdd >= N) { continue; }
1005 if (dualTid >= blockDim.x) { continue; }
1006 s_dot[tid] += s_dot[dualTid];
1007 }
1008 if (tid==0) { gdsr[bid] = s_dot[0]; }
1009}
1010
1011static real_t cuVectorDot(const int N, const real_t *X, const real_t *Y)
1012{
1013 const int tpb = MFEM_CUDA_BLOCKS;
1014 const int blockSize = MFEM_CUDA_BLOCKS;
1015 const int gridSize = (N+blockSize-1)/blockSize;
1016 const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
1017 cuda_reduce_buf.SetSize(dot_sz, Device::GetDeviceMemoryType());
1018 Memory<real_t> &buf = cuda_reduce_buf.GetMemory();
1019 real_t *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
1020 cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
1021 MFEM_GPU_CHECK(cudaGetLastError());
1022 const real_t *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
1023 real_t dot = 0.0;
1024 for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1025 return dot;
1026}
1027#endif // MFEM_USE_CUDA
1028
1029#ifdef MFEM_USE_HIP
1030static __global__ void hipKernelMin(const int N, real_t *gdsr, const real_t *x)
1031{
1032 __shared__ real_t s_min[MFEM_HIP_BLOCKS];
1033 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1034 if (n>=N) { return; }
1035 const int bid = hipBlockIdx_x;
1036 const int tid = hipThreadIdx_x;
1037 const int bbd = bid*hipBlockDim_x;
1038 const int rid = bbd+tid;
1039 s_min[tid] = x[n];
1040 for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1041 {
1042 __syncthreads();
1043 if (tid >= workers) { continue; }
1044 if (rid >= N) { continue; }
1045 const int dualTid = tid + workers;
1046 if (dualTid >= N) { continue; }
1047 const int rdd = bbd+dualTid;
1048 if (rdd >= N) { continue; }
1049 if (dualTid >= hipBlockDim_x) { continue; }
1050 s_min[tid] = std::min(s_min[tid], s_min[dualTid]);
1051 }
1052 if (tid==0) { gdsr[bid] = s_min[0]; }
1053}
1054
1055static Array<real_t> hip_reduce_buf;
1056
1057static real_t hipVectorMin(const int N, const real_t *X)
1058{
1059 const int tpb = MFEM_HIP_BLOCKS;
1060 const int blockSize = MFEM_HIP_BLOCKS;
1061 const int gridSize = (N+blockSize-1)/blockSize;
1062 const int min_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1063 hip_reduce_buf.SetSize(min_sz);
1064 Memory<real_t> &buf = hip_reduce_buf.GetMemory();
1065 real_t *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
1066 hipLaunchKernelGGL(hipKernelMin,gridSize,blockSize,0,0,N,d_min,X);
1067 MFEM_GPU_CHECK(hipGetLastError());
1068 const real_t *h_min = buf.Read(MemoryClass::HOST, min_sz);
1069 real_t min = std::numeric_limits<real_t>::infinity();
1070 for (int i = 0; i < min_sz; i++) { min = std::min(min, h_min[i]); }
1071 return min;
1072}
1073
1074static __global__ void hipKernelDot(const int N, real_t *gdsr,
1075 const real_t *x, const real_t *y)
1076{
1077 __shared__ real_t s_dot[MFEM_HIP_BLOCKS];
1078 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1079 if (n>=N) { return; }
1080 const int bid = hipBlockIdx_x;
1081 const int tid = hipThreadIdx_x;
1082 const int bbd = bid*hipBlockDim_x;
1083 const int rid = bbd+tid;
1084 s_dot[tid] = y ? (x[n] * y[n]) : x[n];
1085 for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1086 {
1087 __syncthreads();
1088 if (tid >= workers) { continue; }
1089 if (rid >= N) { continue; }
1090 const int dualTid = tid + workers;
1091 if (dualTid >= N) { continue; }
1092 const int rdd = bbd+dualTid;
1093 if (rdd >= N) { continue; }
1094 if (dualTid >= hipBlockDim_x) { continue; }
1095 s_dot[tid] += s_dot[dualTid];
1096 }
1097 if (tid==0) { gdsr[bid] = s_dot[0]; }
1098}
1099
1100static real_t hipVectorDot(const int N, const real_t *X, const real_t *Y)
1101{
1102 const int tpb = MFEM_HIP_BLOCKS;
1103 const int blockSize = MFEM_HIP_BLOCKS;
1104 const int gridSize = (N+blockSize-1)/blockSize;
1105 const int dot_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1106 hip_reduce_buf.SetSize(dot_sz);
1107 Memory<real_t> &buf = hip_reduce_buf.GetMemory();
1108 real_t *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
1109 hipLaunchKernelGGL(hipKernelDot,gridSize,blockSize,0,0,N,d_dot,X,Y);
1110 MFEM_GPU_CHECK(hipGetLastError());
1111 const real_t *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
1112 real_t dot = 0.0;
1113 for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1114 return dot;
1115}
1116#endif // MFEM_USE_HIP
1117
1119{
1120 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
1121 if (size == 0) { return 0.0; }
1122
1123 const bool use_dev = UseDevice() || v.UseDevice();
1124#if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP) || defined(MFEM_USE_OPENMP)
1125 auto m_data = Read(use_dev);
1126#else
1127 Read(use_dev);
1128#endif
1129 auto v_data = v.Read(use_dev);
1130
1131 if (!use_dev) { goto vector_dot_cpu; }
1132
1133#ifdef MFEM_USE_OCCA
1134 if (DeviceCanUseOcca())
1135 {
1136 return occa::linalg::dot<real_t,real_t,real_t>(
1138 }
1139#endif
1140
1141#ifdef MFEM_USE_CUDA
1143 {
1144 return cuVectorDot(size, m_data, v_data);
1145 }
1146#endif
1147
1148#ifdef MFEM_USE_HIP
1150 {
1151 return hipVectorDot(size, m_data, v_data);
1152 }
1153#endif
1154
1155#ifdef MFEM_USE_OPENMP
1157 {
1158#define MFEM_USE_OPENMP_DETERMINISTIC_DOT
1159#ifdef MFEM_USE_OPENMP_DETERMINISTIC_DOT
1160 // By default, use a deterministic way of computing the dot product
1161 static Vector th_dot;
1162 #pragma omp parallel
1163 {
1164 const int nt = omp_get_num_threads();
1165 #pragma omp master
1166 th_dot.SetSize(nt);
1167 const int tid = omp_get_thread_num();
1168 const int stride = (size + nt - 1)/nt;
1169 const int start = tid*stride;
1170 const int stop = std::min(start + stride, size);
1171 real_t my_dot = 0.0;
1172 for (int i = start; i < stop; i++)
1173 {
1174 my_dot += m_data[i] * v_data[i];
1175 }
1176 #pragma omp barrier
1177 th_dot(tid) = my_dot;
1178 }
1179 return th_dot.Sum();
1180#else
1181 // The standard way of computing the dot product is non-deterministic
1182 real_t prod = 0.0;
1183 #pragma omp parallel for reduction(+:prod)
1184 for (int i = 0; i < size; i++)
1185 {
1186 prod += m_data[i] * v_data[i];
1187 }
1188 return prod;
1189#endif // MFEM_USE_OPENMP_DETERMINISTIC_DOT
1190 }
1191#endif // MFEM_USE_OPENMP
1193 {
1194 const int N = size;
1195 auto v_data_ = v.Read();
1196 auto m_data_ = Read();
1197 Vector dot(1);
1198 dot.UseDevice(true);
1199 auto d_dot = dot.Write();
1200 dot = 0.0;
1201 mfem::forall(N, [=] MFEM_HOST_DEVICE (int i)
1202 {
1203 d_dot[0] += m_data_[i] * v_data_[i];
1204 });
1205 dot.HostReadWrite();
1206 return dot[0];
1207 }
1208vector_dot_cpu:
1209 return operator*(v_data);
1210}
1211
1213{
1214 if (size == 0) { return infinity(); }
1215
1216 const bool use_dev = UseDevice();
1217 auto m_data = Read(use_dev);
1218
1219 if (!use_dev) { goto vector_min_cpu; }
1220
1221#ifdef MFEM_USE_OCCA
1222 if (DeviceCanUseOcca())
1223 {
1224 return occa::linalg::min<real_t,real_t>(OccaMemoryRead(data, size));
1225 }
1226#endif
1227
1228#ifdef MFEM_USE_CUDA
1230 {
1231 return cuVectorMin(size, m_data);
1232 }
1233#endif
1234
1235#ifdef MFEM_USE_HIP
1237 {
1238 return hipVectorMin(size, m_data);
1239 }
1240#endif
1241
1242#ifdef MFEM_USE_OPENMP
1244 {
1245 real_t minimum = m_data[0];
1246 #pragma omp parallel for reduction(min:minimum)
1247 for (int i = 0; i < size; i++)
1248 {
1249 minimum = std::min(minimum, m_data[i]);
1250 }
1251 return minimum;
1252 }
1253#endif
1254
1256 {
1257 const int N = size;
1258 auto m_data_ = Read();
1259 Vector min(1);
1260 min = infinity();
1261 min.UseDevice(true);
1262 auto d_min = min.ReadWrite();
1263 mfem::forall(N, [=] MFEM_HOST_DEVICE (int i)
1264 {
1265 d_min[0] = (d_min[0]<m_data_[i])?d_min[0]:m_data_[i];
1266 });
1267 min.HostReadWrite();
1268 return min[0];
1269 }
1270
1271vector_min_cpu:
1272 real_t minimum = data[0];
1273 for (int i = 1; i < size; i++)
1274 {
1275 if (m_data[i] < minimum)
1276 {
1277 minimum = m_data[i];
1278 }
1279 }
1280 return minimum;
1281}
1282
1284{
1285 if (size == 0) { return 0.0; }
1286
1287 if (UseDevice())
1288 {
1289#ifdef MFEM_USE_CUDA
1291 {
1292 return cuVectorDot(size, Read(), nullptr);
1293 }
1294#endif
1295#ifdef MFEM_USE_HIP
1297 {
1298 return hipVectorDot(size, Read(), nullptr);
1299 }
1300#endif
1302 {
1303 const int N = size;
1304 auto d_data = Read();
1305 Vector sum(1);
1306 sum.UseDevice(true);
1307 auto d_sum = sum.Write();
1308 d_sum[0] = 0.0;
1309 mfem::forall(N, [=] MFEM_HOST_DEVICE (int i)
1310 {
1311 d_sum[0] += d_data[i];
1312 });
1313 sum.HostReadWrite();
1314 return sum[0];
1315 }
1316 }
1317
1318 // CPU fallback
1319 const real_t *h_data = HostRead();
1320 real_t sum = 0.0;
1321 for (int i = 0; i < size; i++)
1322 {
1323 sum += h_data[i];
1324 }
1325 return sum;
1326}
1327
1328}
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition array.hpp:123
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:697
int Size() const
Return the logical size of the array.
Definition array.hpp:144
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
Definition array.hpp:129
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:317
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition device.hpp:265
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
Hash function for data sequences.
Definition hash.hpp:459
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
Definition hash.cpp:60
HashFunction & AppendDoubles(const real_t *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
Definition hash.hpp:511
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
bool Empty() const
Return true if the Memory object is empty, see Reset().
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
Vector data type.
Definition vector.hpp:80
void Randomize(int seed=0)
Set random values in the vector.
Definition vector.cpp:816
void PrintHash(std::ostream &out) const
Print the Vector size and hash of its data.
Definition vector.cpp:808
real_t & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Definition vector.cpp:95
void SetVector(const Vector &v, int offset)
Definition vector.cpp:274
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:478
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:474
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Definition vector.cpp:554
void Neg()
(*this) = -(*this)
Definition vector.cpp:300
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
Definition vector.cpp:755
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:490
real_t Normlinf() const
Returns the l_infinity norm of the vector.
Definition vector.cpp:850
real_t Norml1() const
Returns the l_1 norm of the vector.
Definition vector.cpp:861
void SetSubVector(const Array< int > &dofs, const real_t value)
Set the entries listed in dofs to the given value.
Definition vector.cpp:604
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Definition vector.cpp:670
real_t operator*(const real_t *) const
Dot product with a double * array.
Definition vector.cpp:105
Memory< real_t > data
Definition vector.hpp:83
void AddSubVector(const Vector &v, int offset)
Definition vector.cpp:287
Vector & operator*=(real_t c)
Definition vector.cpp:162
real_t Norml2() const
Returns the l2 norm of the vector.
Definition vector.cpp:832
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
Definition vector.cpp:49
Vector & Set(const real_t a, const Vector &x)
(*this) = a * x
Definition vector.cpp:262
real_t Max() const
Returns the maximal element of the vector.
Definition vector.cpp:922
virtual bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
Definition vector.hpp:139
int Size() const
Returns the size of the vector.
Definition vector.hpp:218
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition vector.hpp:136
real_t Sum() const
Return the sum of the vector entries.
Definition vector.cpp:1283
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
Definition vector.cpp:789
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:538
void Reciprocal()
(*this)(i) = 1.0 / (*this)(i)
Definition vector.cpp:308
virtual real_t * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:486
real_t Normlp(real_t p) const
Returns the l_p norm of the vector.
Definition vector.cpp:872
Vector & operator-=(real_t c)
Definition vector.cpp:205
Vector & operator=(const real_t *v)
Copy Size() entries from v.
Definition vector.cpp:118
void SetSubVectorComplement(const Array< int > &dofs, const real_t val)
Set all vector entries NOT in the dofs Array to the given val.
Definition vector.cpp:739
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:494
real_t Min() const
Returns the minimal element of the vector.
Definition vector.cpp:1212
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
Definition vector.cpp:578
Vector & operator+=(real_t c)
Definition vector.cpp:226
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:482
Vector & Add(const real_t a, const Vector &Va)
(*this) += a * Va
Definition vector.cpp:247
real_t & operator()(int i)
Access Vector entries using () for 0-based indexing.
Definition vector.hpp:624
void cross3D(const Vector &vin, Vector &vout) const
Definition vector.cpp:541
Vector & operator/=(real_t c)
Definition vector.cpp:183
const real_t alpha
Definition ex15.cpp:369
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
MFEM_HOST_DEVICE real_t Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition kernels.hpp:133
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
Definition vector.hpp:45
real_t rand_real()
Generate a random real_t number in the interval [0,1) using rand().
Definition vector.hpp:59
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:316
T ZeroSubnormal(T val)
Definition vector.hpp:502
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition occa.hpp:69
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition vector.cpp:472
float real_t
Definition config.hpp:43
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass....
Definition occa.hpp:37
void forall(int N, lambda &&body)
Definition forall.hpp:754
void forall_switch(bool use_dev, int N, lambda &&body)
Definition forall.hpp:757
real_t p(const Vector &x, real_t t)
RefCoord s[3]
@ DEBUG_DEVICE
[device] Debug backend: host memory is READ/WRITE protected while a device is in use....
Definition device.hpp:76
@ HIP_MASK
Biwise-OR of all HIP backends.
Definition device.hpp:91
@ CUDA_MASK
Biwise-OR of all CUDA backends.
Definition device.hpp:89
@ OMP_MASK
Biwise-OR of all OpenMP backends.
Definition device.hpp:93