MFEM v4.8.0
Finite element discretization library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
vector.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12// Implementation of data type vector
13
14#include "kernels.hpp"
15#include "vector.hpp"
16#include "../general/forall.hpp"
17
18#ifdef MFEM_USE_OPENMP
19#include <omp.h>
20#endif
21
22#include <iostream>
23#include <iomanip>
24#include <cmath>
25#include <ctime>
26#include <limits>
27
28namespace mfem
29{
30
31/**
32 * Reducer for helping to compute L2-norms. Given two partial results:
33 * a0 = sum_i (|v_i|/a1)^2
34 * b0 = sum_j (|v_j|/b1)^2 (j disjoint from i for vector v)
35 * computes:
36 * a1 = max(a1, b1)
37 * a0 = (a1 == 0 ? 0 : sum_{k in union(i,j)} (|v_k|/a1)^2)
38 *
39 * This form is resiliant against overflow/underflow, similar to std::hypot
40 */
41struct L2Reducer
42{
43 using value_type = DevicePair<real_t, real_t>;
44 static MFEM_HOST_DEVICE void Join(value_type& a, const value_type &b)
45 {
46 real_t scale = fmax(a.second, b.second);
47 if (scale > 0)
48 {
49 real_t s = a.second / scale;
50 a.first *= s * s;
51 s = b.second / scale;
52 a.first += b.first * s * s;
53 a.second = scale;
54 }
55 }
56
57 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
58 {
59 a.first = 0;
60 a.second = 0;
61 }
62};
63
64/**
65 * Reducer for helping to compute Lp-norms. Given two partial results:
66 * a0 = sum_i (|v_i|/a1)^p
67 * b0 = sum_j (|v_j|/b1)^p (j disjoint from i for vector v)
68 * computes:
69 * a1 = max(a1, b1)
70 * a0 = (a1 == 0 ? 0 : sum_{k in union(i,j)} (|v_k|/a1)^p)
71 *
72 * This form is resiliant against overflow/underflow, similar to std::hypot
73 */
74struct LpReducer
75{
76 real_t p;
77 using value_type = DevicePair<real_t, real_t>;
78 MFEM_HOST_DEVICE void Join(value_type& a, const value_type &b) const
79 {
80 real_t scale = fmax(a.second, b.second);
81 if (scale > 0)
82 {
83 a.first = a.first * pow(a.second / scale, p) +
84 b.first * pow(b.second / scale, p);
85 a.second = scale;
86 }
87 }
88
89 static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)
90 {
91 a.first = 0;
92 a.second = 0;
93 }
94};
95
96static Array<real_t>& vector_workspace()
97{
98 static Array<real_t> instance;
99 return instance;
100}
101
102static Array<DevicePair<real_t, real_t>> &Lpvector_workspace()
103{
104 static Array<DevicePair<real_t, real_t>> instance;
105 return instance;
106}
107
109{
110 const int s = v.Size();
111 size = s;
112 if (s > 0)
113 {
114 MFEM_ASSERT(!v.data.Empty(), "invalid source vector");
115 data.New(s, v.data.GetMemoryType());
116 data.CopyFrom(v.data, s);
117 }
118 UseDevice(v.UseDevice());
119}
120
122{
123 *this = std::move(v);
124}
125
126void Vector::Load(std::istream **in, int np, int *dim)
127{
128 int i, j, s;
129
130 s = 0;
131 for (i = 0; i < np; i++)
132 {
133 s += dim[i];
134 }
135
136 SetSize(s);
137 HostWrite();
138
139 int p = 0;
140 for (i = 0; i < np; i++)
141 {
142 for (j = 0; j < dim[i]; j++)
143 {
144 *in[i] >> data[p++];
145 // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
146 // so we reset the failbit here.
147 if (!*in[i] && errno == ERANGE)
148 {
149 in[i]->clear();
150 }
151 }
152 }
153}
154
155void Vector::Load(std::istream &in, int Size)
156{
157 SetSize(Size);
158 HostWrite();
159
160 for (int i = 0; i < size; i++)
161 {
162 in >> data[i];
163 // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
164 // so we reset the failbit here.
165 if (!in && errno == ERANGE)
166 {
167 in.clear();
168 }
169 }
170}
171
173{
174 return operator()(i);
175}
176
177const real_t &Vector::Elem(int i) const
178{
179 return operator()(i);
180}
181
183{
184 HostRead();
185 real_t dot = 0.0;
186#ifdef MFEM_USE_LEGACY_OPENMP
187 #pragma omp parallel for reduction(+:dot)
188#endif
189 for (int i = 0; i < size; i++)
190 {
191 dot += data[i] * v[i];
192 }
193 return dot;
194}
195
197{
199 return *this;
200}
201
203{
204#if 0
205 SetSize(v.Size(), v.data.GetMemoryType());
206 data.CopyFrom(v.data, v.Size());
207 UseDevice(v.UseDevice());
208#else
209 SetSize(v.Size());
210 bool vuse = v.UseDevice();
211 const bool use_dev = UseDevice() || vuse;
212 v.UseDevice(use_dev);
213 // keep 'data' where it is, unless 'use_dev' is true
214 if (use_dev) { Write(); }
215 data.CopyFrom(v.data, v.Size());
216 v.UseDevice(vuse);
217#endif
218 return *this;
219}
220
222{
223 v.Swap(*this);
224 if (this != &v) { v.Destroy(); }
225 return *this;
226}
227
229{
230 const bool use_dev = UseDevice();
231 const int N = size;
232 auto y = Write(use_dev);
233 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = value; });
234 return *this;
235}
236
238{
239 const bool use_dev = UseDevice();
240 const int N = size;
241 auto y = ReadWrite(use_dev);
242 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= c; });
243 return *this;
244}
245
247{
248 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
249
250 const bool use_dev = UseDevice() || v.UseDevice();
251 const int N = size;
252 auto y = ReadWrite(use_dev);
253 auto x = v.Read(use_dev);
254 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= x[i]; });
255 return *this;
256}
257
259{
260 const bool use_dev = UseDevice();
261 const int N = size;
262 const real_t m = 1.0/c;
263 auto y = ReadWrite(use_dev);
264 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= m; });
265 return *this;
266}
267
269{
270 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
271
272 const bool use_dev = UseDevice() || v.UseDevice();
273 const int N = size;
274 auto y = ReadWrite(use_dev);
275 auto x = v.Read(use_dev);
276 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] /= x[i]; });
277 return *this;
278}
279
281{
282 const bool use_dev = UseDevice();
283 const int N = size;
284 auto y = ReadWrite(use_dev);
285 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] -= c; });
286 return *this;
287}
288
290{
291 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
292
293 const bool use_dev = UseDevice() || v.UseDevice();
294 const int N = size;
295 auto y = ReadWrite(use_dev);
296 auto x = v.Read(use_dev);
297 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] -= x[i]; });
298 return *this;
299}
300
302{
303 const bool use_dev = UseDevice();
304 const int N = size;
305 auto y = ReadWrite(use_dev);
306 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += c; });
307 return *this;
308}
309
311{
312 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
313
314 const bool use_dev = UseDevice() || v.UseDevice();
315 const int N = size;
316 auto y = ReadWrite(use_dev);
317 auto x = v.Read(use_dev);
318 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += x[i]; });
319 return *this;
320}
321
322Vector &Vector::Add(const real_t a, const Vector &Va)
323{
324 MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
326 if (a != 0.0)
327 {
328 const int N = size;
329 const bool use_dev = UseDevice() || Va.UseDevice();
330 auto y = ReadWrite(use_dev);
331 auto x = Va.Read(use_dev);
332 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += a * x[i]; });
333 }
334 return *this;
335}
336
337Vector &Vector::Set(const real_t a, const Vector &Va)
338{
339 MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
340
341 const bool use_dev = UseDevice() || Va.UseDevice();
342 const int N = size;
343 auto x = Va.Read(use_dev);
344 auto y = Write(use_dev);
345 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = a * x[i]; });
346 return *this;
347}
348
349void Vector::SetVector(const Vector &v, int offset)
350{
351 MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
352
353 const int vs = v.Size();
354 const real_t *vp = v.data;
355 real_t *p = data + offset;
356 for (int i = 0; i < vs; i++)
357 {
358 p[i] = vp[i];
359 }
360}
361
362void Vector::AddSubVector(const Vector &v, int offset)
363{
364 MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
365
366 const int vs = v.Size();
367 const real_t *vp = v.data;
368 real_t *p = data + offset;
369 for (int i = 0; i < vs; i++)
370 {
371 p[i] += vp[i];
372 }
373}
374
376{
377 const bool use_dev = UseDevice();
378 const int N = size;
379 auto y = ReadWrite(use_dev);
380 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = -y[i]; });
381}
382
384{
385 const bool use_dev = UseDevice();
386 const int N = size;
387 auto y = ReadWrite(use_dev);
388 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = 1.0/y[i]; });
389}
390
391void add(const Vector &v1, const Vector &v2, Vector &v)
392{
393 MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
394 "incompatible Vectors!");
395
396#if !defined(MFEM_USE_LEGACY_OPENMP)
397 const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
398 const int N = v.size;
399 // Note: get read access first, in case v is the same as v1/v2.
400 auto x1 = v1.Read(use_dev);
401 auto x2 = v2.Read(use_dev);
402 auto y = v.Write(use_dev);
403 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = x1[i] + x2[i]; });
404#else
405 #pragma omp parallel for
406 for (int i = 0; i < v.size; i++)
407 {
408 v.data[i] = v1.data[i] + v2.data[i];
409 }
410#endif
411}
412
413void add(const Vector &v1, real_t alpha, const Vector &v2, Vector &v)
414{
415 MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
416 "incompatible Vectors!");
417
418 if (alpha == 0.0)
419 {
420 v = v1;
421 }
422 else if (alpha == 1.0)
423 {
424 add(v1, v2, v);
425 }
426 else
427 {
428#if !defined(MFEM_USE_LEGACY_OPENMP)
429 const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
430 const int N = v.size;
431 // Note: get read access first, in case v is the same as v1/v2.
432 auto d_x = v1.Read(use_dev);
433 auto d_y = v2.Read(use_dev);
434 auto d_z = v.Write(use_dev);
435 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
436 {
437 d_z[i] = d_x[i] + alpha * d_y[i];
438 });
439#else
440 const real_t *v1p = v1.data, *v2p = v2.data;
441 real_t *vp = v.data;
442 const int s = v.size;
443 #pragma omp parallel for
444 for (int i = 0; i < s; i++)
445 {
446 vp[i] = v1p[i] + alpha*v2p[i];
447 }
448#endif
449 }
450}
451
452void add(const real_t a, const Vector &x, const Vector &y, Vector &z)
453{
454 MFEM_ASSERT(x.size == y.size && x.size == z.size,
455 "incompatible Vectors!");
456
457 if (a == 0.0)
458 {
459 z = 0.0;
460 }
461 else if (a == 1.0)
462 {
463 add(x, y, z);
464 }
465 else
466 {
467#if !defined(MFEM_USE_LEGACY_OPENMP)
468 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
469 const int N = x.size;
470 // Note: get read access first, in case z is the same as x/y.
471 auto xd = x.Read(use_dev);
472 auto yd = y.Read(use_dev);
473 auto zd = z.Write(use_dev);
474 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
475 {
476 zd[i] = a * (xd[i] + yd[i]);
477 });
478#else
479 const real_t *xp = x.data;
480 const real_t *yp = y.data;
481 real_t *zp = z.data;
482 const int s = x.size;
483 #pragma omp parallel for
484 for (int i = 0; i < s; i++)
485 {
486 zp[i] = a * (xp[i] + yp[i]);
487 }
488#endif
489 }
490}
491
492void add(const real_t a, const Vector &x,
493 const real_t b, const Vector &y, Vector &z)
494{
495 MFEM_ASSERT(x.size == y.size && x.size == z.size,
496 "incompatible Vectors!");
497
498 if (a == 0.0)
499 {
500 z.Set(b, y);
501 }
502 else if (b == 0.0)
503 {
504 z.Set(a, x);
505 }
506#if 0
507 else if (a == 1.0)
508 {
509 add(x, b, y, z);
510 }
511 else if (b == 1.0)
512 {
513 add(y, a, x, z);
514 }
515 else if (a == b)
516 {
517 add(a, x, y, z);
518 }
519#endif
520 else
521 {
522#if !defined(MFEM_USE_LEGACY_OPENMP)
523 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
524 const int N = x.size;
525 // Note: get read access first, in case z is the same as x/y.
526 auto xd = x.Read(use_dev);
527 auto yd = y.Read(use_dev);
528 auto zd = z.Write(use_dev);
529 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
530 {
531 zd[i] = a * xd[i] + b * yd[i];
532 });
533#else
534 const real_t *xp = x.data;
535 const real_t *yp = y.data;
536 real_t *zp = z.data;
537 const int s = x.size;
538 #pragma omp parallel for
539 for (int i = 0; i < s; i++)
540 {
541 zp[i] = a * xp[i] + b * yp[i];
542 }
543#endif
544 }
545}
546
547void subtract(const Vector &x, const Vector &y, Vector &z)
548{
549 MFEM_ASSERT(x.size == y.size && x.size == z.size,
550 "incompatible Vectors!");
551
552#if !defined(MFEM_USE_LEGACY_OPENMP)
553 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
554 const int N = x.size;
555 // Note: get read access first, in case z is the same as x/y.
556 auto xd = x.Read(use_dev);
557 auto yd = y.Read(use_dev);
558 auto zd = z.Write(use_dev);
559 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
560 {
561 zd[i] = xd[i] - yd[i];
562 });
563#else
564 const real_t *xp = x.data;
565 const real_t *yp = y.data;
566 real_t *zp = z.data;
567 const int s = x.size;
568 #pragma omp parallel for
569 for (int i = 0; i < s; i++)
570 {
571 zp[i] = xp[i] - yp[i];
572 }
573#endif
574}
575
576void subtract(const real_t a, const Vector &x, const Vector &y, Vector &z)
577{
578 MFEM_ASSERT(x.size == y.size && x.size == z.size,
579 "incompatible Vectors!");
580
581 if (a == 0.)
582 {
583 z = 0.;
584 }
585 else if (a == 1.)
586 {
587 subtract(x, y, z);
588 }
589 else
590 {
591#if !defined(MFEM_USE_LEGACY_OPENMP)
592 const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
593 const int N = x.size;
594 // Note: get read access first, in case z is the same as x/y.
595 auto xd = x.Read(use_dev);
596 auto yd = y.Read(use_dev);
597 auto zd = z.Write(use_dev);
598 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
599 {
600 zd[i] = a * (xd[i] - yd[i]);
601 });
602#else
603 const real_t *xp = x.data;
604 const real_t *yp = y.data;
605 real_t *zp = z.data;
606 const int s = x.size;
607 #pragma omp parallel for
608 for (int i = 0; i < s; i++)
609 {
610 zp[i] = a * (xp[i] - yp[i]);
611 }
612#endif
613 }
614}
615
616void Vector::cross3D(const Vector &vin, Vector &vout) const
617{
618 HostRead();
619 vin.HostRead();
620 vout.HostWrite();
621 MFEM_VERIFY(size == 3, "Only 3D vectors supported in cross.");
622 MFEM_VERIFY(vin.Size() == 3, "Only 3D vectors supported in cross.");
623 vout.SetSize(3);
624 vout(0) = data[1]*vin(2)-data[2]*vin(1);
625 vout(1) = data[2]*vin(0)-data[0]*vin(2);
626 vout(2) = data[0]*vin(1)-data[1]*vin(0);
627}
628
629void Vector::median(const Vector &lo, const Vector &hi)
630{
631 MFEM_ASSERT(size == lo.size && size == hi.size,
632 "incompatible Vectors!");
633
634 const bool use_dev = UseDevice() || lo.UseDevice() || hi.UseDevice();
635 const int N = size;
636 // Note: get read access first, in case *this is the same as lo/hi.
637 auto l = lo.Read(use_dev);
638 auto h = hi.Read(use_dev);
639 auto m = Write(use_dev);
640 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)
641 {
642 if (m[i] < l[i])
643 {
644 m[i] = l[i];
645 }
646 else if (m[i] > h[i])
647 {
648 m[i] = h[i];
649 }
650 });
651}
652
653void Vector::GetSubVector(const Array<int> &dofs, Vector &elemvect) const
654{
655 const int n = dofs.Size();
656 elemvect.SetSize(n);
657 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
658 auto d_y = elemvect.Write(use_dev);
659 auto d_X = Read(use_dev);
660 auto d_dofs = dofs.Read(use_dev);
661 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
662 {
663 const int dof_i = d_dofs[i];
664 d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
665 });
666}
667
668void Vector::GetSubVector(const Array<int> &dofs, real_t *elem_data) const
669{
671 const int n = dofs.Size();
672 for (int i = 0; i < n; i++)
673 {
674 const int j = dofs[i];
675 elem_data[i] = (j >= 0) ? data[j] : -data[-1-j];
676 }
677}
678
679void Vector::SetSubVector(const Array<int> &dofs, const real_t value)
680{
681 const bool use_dev = dofs.UseDevice();
682 const int n = dofs.Size();
683 // Use read+write access for *this - we only modify some of its entries
684 auto d_X = ReadWrite(use_dev);
685 auto d_dofs = dofs.Read(use_dev);
686 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
687 {
688 const int j = d_dofs[i];
689 if (j >= 0)
690 {
691 d_X[j] = value;
692 }
693 else
694 {
695 d_X[-1-j] = -value;
696 }
697 });
698}
699
700void Vector::SetSubVector(const Array<int> &dofs, const Vector &elemvect)
701{
702 MFEM_ASSERT(dofs.Size() <= elemvect.Size(),
703 "Size mismatch: length of dofs is " << dofs.Size()
704 << ", length of elemvect is " << elemvect.Size());
705
706 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
707 const int n = dofs.Size();
708 // Use read+write access for X - we only modify some of its entries
709 auto d_X = ReadWrite(use_dev);
710 auto d_y = elemvect.Read(use_dev);
711 auto d_dofs = dofs.Read(use_dev);
712 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
713 {
714 const int dof_i = d_dofs[i];
715 if (dof_i >= 0)
716 {
717 d_X[dof_i] = d_y[i];
718 }
719 else
720 {
721 d_X[-1-dof_i] = -d_y[i];
722 }
723 });
724}
725
726void Vector::SetSubVector(const Array<int> &dofs, real_t *elem_data)
727{
728 // Use read+write access because we overwrite only part of the data.
730 const int n = dofs.Size();
731 for (int i = 0; i < n; i++)
732 {
733 const int j= dofs[i];
734 if (j >= 0)
735 {
736 operator()(j) = elem_data[i];
737 }
738 else
739 {
740 operator()(-1-j) = -elem_data[i];
741 }
742 }
743}
744
745void Vector::AddElementVector(const Array<int> &dofs, const Vector &elemvect)
746{
747 MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "
748 "length of dofs is " << dofs.Size() <<
749 ", length of elemvect is " << elemvect.Size());
750
751 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
752 const int n = dofs.Size();
753 auto d_y = elemvect.Read(use_dev);
754 auto d_X = ReadWrite(use_dev);
755 auto d_dofs = dofs.Read(use_dev);
756 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
757 {
758 const int j = d_dofs[i];
759 if (j >= 0)
760 {
761 d_X[j] += d_y[i];
762 }
763 else
764 {
765 d_X[-1-j] -= d_y[i];
766 }
767 });
768}
769
770void Vector::AddElementVector(const Array<int> &dofs, real_t *elem_data)
771{
773 const int n = dofs.Size();
774 for (int i = 0; i < n; i++)
775 {
776 const int j = dofs[i];
777 if (j >= 0)
778 {
779 operator()(j) += elem_data[i];
780 }
781 else
782 {
783 operator()(-1-j) -= elem_data[i];
784 }
785 }
786}
787
789 const Vector &elemvect)
790{
791 MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "
792 "length of dofs is " << dofs.Size() <<
793 ", length of elemvect is " << elemvect.Size());
794
795 const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
796 const int n = dofs.Size();
797 auto d_y = ReadWrite(use_dev);
798 auto d_x = elemvect.Read(use_dev);
799 auto d_dofs = dofs.Read(use_dev);
800 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)
801 {
802 const int j = d_dofs[i];
803 if (j >= 0)
804 {
805 d_y[j] += a * d_x[i];
806 }
807 else
808 {
809 d_y[-1-j] -= a * d_x[i];
810 }
811 });
812}
813
815{
816 const bool use_dev = UseDevice() || dofs.UseDevice();
817 const int n = dofs.Size();
818 const int N = size;
819 Vector dofs_vals(n, use_dev ?
822 auto d_data = ReadWrite(use_dev);
823 auto d_dofs_vals = dofs_vals.Write(use_dev);
824 auto d_dofs = dofs.Read(use_dev);
825 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i) { d_dofs_vals[i] = d_data[d_dofs[i]]; });
826 mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { d_data[i] = val; });
827 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i) { d_data[d_dofs[i]] = d_dofs_vals[i]; });
828}
829
830void Vector::Print(std::ostream &os, int width) const
831{
832 if (!size) { return; }
834 for (int i = 0; 1; )
835 {
836 os << ZeroSubnormal(data[i]);
837 i++;
838 if (i == size)
839 {
840 break;
841 }
842 if ( i % width == 0 )
843 {
844 os << '\n';
845 }
846 else
847 {
848 os << ' ';
849 }
850 }
851 os << '\n';
852}
853
854#ifdef MFEM_USE_ADIOS2
856 const std::string& variable_name) const
857{
858 if (!size) { return; }
860 os.engine.Put(variable_name, &data[0] );
861}
862#endif
863
864void Vector::Print_HYPRE(std::ostream &os) const
865{
866 int i;
867 std::ios::fmtflags old_fmt = os.flags();
868 os.setf(std::ios::scientific);
869 std::streamsize old_prec = os.precision(14);
870
871 os << size << '\n'; // number of rows
872
874 for (i = 0; i < size; i++)
875 {
876 os << ZeroSubnormal(data[i]) << '\n';
877 }
878
879 os.precision(old_prec);
880 os.flags(old_fmt);
881}
882
883void Vector::PrintMathematica(std::ostream & os) const
884{
885 std::ios::fmtflags old_fmt = os.flags();
886 os.setf(std::ios::scientific);
887 std::streamsize old_prec = os.precision(14);
888
889 os << "(* Read file into Mathematica using: "
890 << "myVec = Get[\"this_file_name\"] *)\n";
891 os << "{\n";
892
894 for (int i = 0; i < size; i++)
895 {
896 os << "Internal`StringToMReal[\"" << ZeroSubnormal(data[i]) << "\"]";
897 if (i < size - 1) { os << ','; }
898 os << '\n';
899 }
900
901 os << "}\n";
902
903 os.precision(old_prec);
904 os.flags(old_fmt);
905}
906
907void Vector::PrintHash(std::ostream &os) const
908{
909 os << "size: " << size << '\n';
910 HashFunction hf;
912 os << "hash: " << hf.GetHash() << '\n';
913}
914
915void Vector::Randomize(int seed)
916{
917 if (seed == 0)
918 {
919 seed = (int)time(0);
920 }
921
922 srand((unsigned)seed);
923
924 HostWrite();
925 for (int i = 0; i < size; i++)
926 {
927 data[i] = rand_real();
928 }
929}
930
932{
933 // Scale entries of Vector on the fly, using algorithms from
934 // std::hypot() and LAPACK's drm2. This scaling ensures that the
935 // argument of each call to std::pow is <= 1 to avoid overflow.
936 if (size == 0)
937 {
938 return 0.0;
939 }
940
941 auto m_data = Read(UseDevice());
942 using value_type = DevicePair<real_t, real_t>;
943 value_type res;
944 res.first = 0;
945 res.second = 0;
946 // first compute sum (|m_data|/scale)^2
947 reduce(
948 size, res,
949 [=] MFEM_HOST_DEVICE(int i, value_type &r)
950 {
951 real_t n = fabs(m_data[i]);
952 if (n > 0)
953 {
954 if (r.second <= n)
955 {
956 real_t arg = r.second / n;
957 r.first = r.first * (arg * arg) + 1;
958 r.second = n;
959 }
960 else
961 {
962 real_t arg = n / r.second;
963 r.first += arg * arg;
964 }
965 }
966 },
967 L2Reducer{}, UseDevice(), Lpvector_workspace());
968 // final answer
969 return res.second * sqrt(res.first);
970}
971
973{
974 if (size == 0) { return 0; }
975
976 auto m_data = Read(UseDevice());
977 real_t res = 0;
978 reduce(
979 size, res,
980 [=] MFEM_HOST_DEVICE(int i, real_t &r) { r = fmax(r, fabs(m_data[i])); },
981 MaxReducer<real_t> {}, UseDevice(), vector_workspace());
982 return res;
983}
984
986{
987 if (size == 0) { return 0.0; }
988
989 auto m_data = Read(UseDevice());
990 real_t res = 0;
991 reduce(
992 size, res,
993 [=] MFEM_HOST_DEVICE(int i, real_t &r) { r += fabs(m_data[i]); },
994 SumReducer<real_t> {}, UseDevice(), vector_workspace());
995 return res;
996}
997
999{
1000 MFEM_ASSERT(p > 0.0, "Vector::Normlp");
1001
1002 if (p == 1.0)
1003 {
1004 return Norml1();
1005 }
1006 if (p == 2.0)
1007 {
1008 return Norml2();
1009 }
1010 if (p < infinity())
1011 {
1012 // Scale entries of Vector on the fly, using algorithms from
1013 // std::hypot() and LAPACK's drm2. This scaling ensures that the
1014 // argument of each call to std::pow is <= 1 to avoid overflow.
1015 if (size == 0)
1016 {
1017 return 0.0;
1018 }
1019
1020 auto m_data = Read(UseDevice());
1021 using value_type = DevicePair<real_t, real_t>;
1022 value_type res;
1023 res.first = 0;
1024 res.second = 0;
1025 // first compute sum (|m_data|/scale)^p
1026 reduce(
1027 size, res,
1028 [=] MFEM_HOST_DEVICE(int i, value_type &r)
1029 {
1030 real_t n = fabs(m_data[i]);
1031 if (n > 0)
1032 {
1033 if (r.second <= n)
1034 {
1035 real_t arg = r.second / n;
1036 r.first = r.first * pow(arg, p) + 1;
1037 r.second = n;
1038 }
1039 else
1040 {
1041 real_t arg = n / r.second;
1042 r.first += pow(arg, p);
1043 }
1044 }
1045 },
1046 LpReducer{p}, UseDevice(), Lpvector_workspace());
1047 // final answer
1048 return res.second * pow(res.first, 1.0 / p);
1049 } // end if p < infinity()
1050
1051 return Normlinf(); // else p >= infinity()
1052}
1053
1055{
1056 MFEM_ASSERT(size == v.size, "incompatible Vectors!");
1057 if (size == 0) { return 0.0; }
1058
1059 const bool use_dev = UseDevice() || v.UseDevice();
1060
1061 auto m_data = Read(use_dev);
1062 auto v_data = v.Read(use_dev);
1063
1064 if (use_dev)
1065 {
1066 // special path for OCCA and OpenMP
1067#ifdef MFEM_USE_OCCA
1068 if (DeviceCanUseOcca())
1069 {
1070 return occa::linalg::dot<real_t, real_t, real_t>(
1072 }
1073#endif
1074
1075#ifdef MFEM_USE_OPENMP
1077 {
1078#define MFEM_USE_OPENMP_DETERMINISTIC_DOT
1079#ifdef MFEM_USE_OPENMP_DETERMINISTIC_DOT
1080 // By default, use a deterministic way of computing the dot product
1081 static Vector th_dot;
1082 #pragma omp parallel
1083 {
1084 const int nt = omp_get_num_threads();
1085 #pragma omp master
1086 th_dot.SetSize(nt);
1087 const int tid = omp_get_thread_num();
1088 const int stride = (size + nt - 1) / nt;
1089 const int start = tid * stride;
1090 const int stop = std::min(start + stride, size);
1091 real_t my_dot = 0.0;
1092 for (int i = start; i < stop; i++)
1093 {
1094 my_dot += m_data[i] * v_data[i];
1095 }
1096 #pragma omp barrier
1097 th_dot(tid) = my_dot;
1098 }
1099 return th_dot.Sum();
1100#else
1101 // The standard way of computing the dot product is non-deterministic
1102 real_t prod = 0.0;
1103 #pragma omp parallel for reduction(+ : prod)
1104 for (int i = 0; i < size; i++)
1105 {
1106 prod += m_data[i] * v_data[i];
1107 }
1108 return prod;
1109#endif // MFEM_USE_OPENMP_DETERMINISTIC_DOT
1110 }
1111#endif // MFEM_USE_OPENMP
1112 }
1113
1114 // normal path for everything else (cuda, hip, debug, cpu)
1115 real_t res = 0;
1116 reduce(
1117 size, res,
1118 [=] MFEM_HOST_DEVICE(int i, real_t &r) { r += m_data[i] * v_data[i]; },
1119 SumReducer<real_t> {}, use_dev, vector_workspace());
1120 return res;
1121}
1122
1124{
1125 if (size == 0) { return infinity(); }
1126
1127 const bool use_dev = UseDevice();
1128 auto m_data = Read(use_dev);
1129
1130 if (use_dev)
1131 {
1132 // special case for OCCA and OpenMP
1133
1134#ifdef MFEM_USE_OCCA
1135 if (DeviceCanUseOcca())
1136 {
1137 return occa::linalg::min<real_t,real_t>(OccaMemoryRead(data, size));
1138 }
1139#endif
1140
1141#ifdef MFEM_USE_OPENMP
1143 {
1144 real_t minimum = m_data[0];
1145 #pragma omp parallel for reduction(min:minimum)
1146 for (int i = 0; i < size; i++)
1147 {
1148 minimum = std::min(minimum, m_data[i]);
1149 }
1150 return minimum;
1151 }
1152#endif
1153 }
1154
1155 // normal path for everything else (cuda, hip, debug, cpu)
1156 real_t res = infinity();
1157 reduce(
1158 size, res,
1159 [=] MFEM_HOST_DEVICE(int i, real_t &r) { r = fmin(r, m_data[i]); },
1160 MinReducer<real_t> {}, use_dev, vector_workspace());
1161 return res;
1162}
1163
1165{
1166 if (size == 0) { return -infinity(); }
1167
1168 const bool use_dev = UseDevice();
1169 auto m_data = Read(use_dev);
1170
1171 if (use_dev)
1172 {
1173 // special cases where OCCA or OenMP are used
1174#ifdef MFEM_USE_OCCA
1175 if (DeviceCanUseOcca())
1176 {
1177 return occa::linalg::max<real_t, real_t>(OccaMemoryRead(data, size));
1178 }
1179#endif
1180
1181#ifdef MFEM_USE_OPENMP
1183 {
1184 real_t maximum = m_data[0];
1185 #pragma omp parallel for reduction(max : maximum)
1186 for (int i = 0; i < size; i++)
1187 {
1188 maximum = fmax(maximum, m_data[i]);
1189 }
1190 return maximum;
1191 }
1192#endif
1193 }
1194
1195 // normal path for everything else (cuda, hip, debug, cpu)
1196 real_t res = -infinity();
1197 reduce(
1198 size, res,
1199 [=] MFEM_HOST_DEVICE(int i, real_t &r) { r = fmax(r, m_data[i]); },
1200 MaxReducer<real_t> {}, use_dev, vector_workspace());
1201 return res;
1202}
1203
1205{
1206 if (size == 0) { return 0.0; }
1207
1208 auto m_data = Read(UseDevice());
1209 real_t res = 0;
1210 reduce(
1211 size, res, [=] MFEM_HOST_DEVICE(int i, real_t &r) { r += m_data[i]; },
1212 SumReducer<real_t> {}, UseDevice(), vector_workspace());
1213 return res;
1214}
1215
1216}
int Size() const
Return the logical size of the array.
Definition array.hpp:147
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
Definition array.hpp:132
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition device.hpp:265
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
Hash function for data sequences.
Definition hash.hpp:530
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
Definition hash.cpp:60
HashFunction & AppendDoubles(const real_t *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
Definition hash.hpp:582
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
bool Empty() const
Return true if the Memory object is empty, see Reset().
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
Vector data type.
Definition vector.hpp:82
void Randomize(int seed=0)
Set random values in the vector.
Definition vector.cpp:915
void PrintHash(std::ostream &out) const
Print the Vector size and hash of its data.
Definition vector.cpp:907
real_t & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Definition vector.cpp:172
void SetVector(const Vector &v, int offset)
Definition vector.cpp:349
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:498
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Definition vector.cpp:629
void Neg()
(*this) = -(*this)
Definition vector.cpp:375
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
Definition vector.cpp:830
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
real_t operator*(const real_t *v) const
Definition vector.cpp:182
real_t Normlinf() const
Returns the l_infinity norm of the vector.
Definition vector.cpp:972
real_t Norml1() const
Returns the l_1 norm of the vector.
Definition vector.cpp:985
void SetSubVector(const Array< int > &dofs, const real_t value)
Set the entries listed in dofs to the given value.
Definition vector.cpp:679
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Definition vector.cpp:745
Memory< real_t > data
Definition vector.hpp:85
void AddSubVector(const Vector &v, int offset)
Definition vector.cpp:362
void Swap(Vector &other)
Swap the contents of two Vectors.
Definition vector.hpp:660
Vector & operator*=(real_t c)
Definition vector.cpp:237
real_t Norml2() const
Returns the l2 norm of the vector.
Definition vector.cpp:931
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
Definition vector.cpp:126
Vector & Set(const real_t a, const Vector &x)
(*this) = a * x
Definition vector.cpp:337
real_t Max() const
Returns the maximal element of the vector.
Definition vector.cpp:1164
virtual bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
Definition vector.hpp:147
int Size() const
Returns the size of the vector.
Definition vector.hpp:226
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition vector.hpp:144
real_t Sum() const
Return the sum of the vector entries.
Definition vector.cpp:1204
void PrintMathematica(std::ostream &out=mfem::out) const
Prints vector as a List for importing into Mathematica.
Definition vector.cpp:883
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
Definition vector.cpp:864
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
void Reciprocal()
(*this)(i) = 1.0 / (*this)(i)
Definition vector.cpp:383
virtual real_t * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:506
real_t Normlp(real_t p) const
Returns the l_p norm of the vector.
Definition vector.cpp:998
Vector & operator-=(real_t c)
Definition vector.cpp:280
Vector & operator=(const real_t *v)
Copy Size() entries from v.
Definition vector.cpp:196
void SetSubVectorComplement(const Array< int > &dofs, const real_t val)
Set all vector entries NOT in the dofs Array to the given val.
Definition vector.cpp:814
real_t Min() const
Returns the minimal element of the vector.
Definition vector.cpp:1123
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
Definition vector.cpp:653
Vector & operator+=(real_t c)
Definition vector.cpp:301
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:502
Vector & Add(const real_t a, const Vector &Va)
(*this) += a * Va
Definition vector.cpp:322
real_t & operator()(int i)
Access Vector entries using () for 0-based indexing.
Definition vector.hpp:644
void cross3D(const Vector &vin, Vector &vout) const
Definition vector.cpp:616
Vector & operator/=(real_t c)
Definition vector.cpp:258
const real_t alpha
Definition ex15.cpp:369
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
Definition vector.hpp:47
void reduce(int N, T &res, B &&body, const R &reducer, bool use_dev, Array< T > &workspace)
Performs a 1D reduction on the range [0,N). res initial value and where the result will be written....
Definition forall.hpp:942
real_t rand_real()
Generate a random real_t number in the interval [0,1) using rand().
Definition vector.hpp:61
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:391
T ZeroSubnormal(T val)
Definition vector.hpp:522
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition occa.hpp:69
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition vector.cpp:547
float real_t
Definition config.hpp:43
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass....
Definition occa.hpp:37
void forall_switch(bool use_dev, int N, lambda &&body)
Definition forall.hpp:756
real_t p(const Vector &x, real_t t)
@ OMP_MASK
Biwise-OR of all OpenMP backends.
Definition device.hpp:93
Pair of values which can be used in device code.
Definition reducers.hpp:28
a = max(a,b)
Definition reducers.hpp:146
a = min(a,b)
Definition reducers.hpp:99