MFEM  v4.5.2
Finite element discretization library
vector.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 // Implementation of data type vector
13 
14 #include "kernels.hpp"
15 #include "vector.hpp"
16 #include "../general/forall.hpp"
17 
18 #if defined(MFEM_USE_SUNDIALS)
19 #include "sundials.hpp"
20 #if defined(MFEM_USE_MPI)
21 #include <nvector/nvector_parallel.h>
22 #endif
23 #endif
24 
25 #ifdef MFEM_USE_OPENMP
26 #include <omp.h>
27 #endif
28 
29 #include <iostream>
30 #include <iomanip>
31 #include <cmath>
32 #include <cstdlib>
33 #include <ctime>
34 #include <limits>
35 
36 namespace mfem
37 {
38 
40 {
41  const int s = v.Size();
42  size = s;
43  if (s > 0)
44  {
45  MFEM_ASSERT(!v.data.Empty(), "invalid source vector");
46  data.New(s, v.data.GetMemoryType());
47  data.CopyFrom(v.data, s);
48  }
49  UseDevice(v.UseDevice());
50 }
51 
53 {
54  *this = std::move(v);
55 }
56 
57 void Vector::Load(std::istream **in, int np, int *dim)
58 {
59  int i, j, s;
60 
61  s = 0;
62  for (i = 0; i < np; i++)
63  {
64  s += dim[i];
65  }
66 
67  SetSize(s);
68 
69  int p = 0;
70  for (i = 0; i < np; i++)
71  {
72  for (j = 0; j < dim[i]; j++)
73  {
74  *in[i] >> data[p++];
75  // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
76  // so we reset the failbit here.
77  if (!*in[i] && errno == ERANGE)
78  {
79  in[i]->clear();
80  }
81  }
82  }
83 }
84 
85 void Vector::Load(std::istream &in, int Size)
86 {
87  SetSize(Size);
88 
89  for (int i = 0; i < size; i++)
90  {
91  in >> data[i];
92  // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
93  // so we reset the failbit here.
94  if (!in && errno == ERANGE)
95  {
96  in.clear();
97  }
98  }
99 }
100 
101 double &Vector::Elem(int i)
102 {
103  return operator()(i);
104 }
105 
106 const double &Vector::Elem(int i) const
107 {
108  return operator()(i);
109 }
110 
111 double Vector::operator*(const double *v) const
112 {
113  double dot = 0.0;
114 #ifdef MFEM_USE_LEGACY_OPENMP
115  #pragma omp parallel for reduction(+:dot)
116 #endif
117  for (int i = 0; i < size; i++)
118  {
119  dot += data[i] * v[i];
120  }
121  return dot;
122 }
123 
124 Vector &Vector::operator=(const double *v)
125 {
126  data.CopyFromHost(v, size);
127  return *this;
128 }
129 
131 {
132 #if 0
133  SetSize(v.Size(), v.data.GetMemoryType());
134  data.CopyFrom(v.data, v.Size());
135  UseDevice(v.UseDevice());
136 #else
137  SetSize(v.Size());
138  bool vuse = v.UseDevice();
139  const bool use_dev = UseDevice() || vuse;
140  v.UseDevice(use_dev);
141  // keep 'data' where it is, unless 'use_dev' is true
142  if (use_dev) { Write(); }
143  data.CopyFrom(v.data, v.Size());
144  v.UseDevice(vuse);
145 #endif
146  return *this;
147 }
148 
150 {
151  data = std::move(v.data);
152  // Self-assignment-safe way to move v.size to size:
153  const auto size_tmp = v.size;
154  v.size = 0;
155  size = size_tmp;
156  return *this;
157 }
158 
159 Vector &Vector::operator=(double value)
160 {
161  const bool use_dev = UseDevice();
162  const int N = size;
163  auto y = Write(use_dev);
164  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = value;);
165  return *this;
166 }
167 
169 {
170  const bool use_dev = UseDevice();
171  const int N = size;
172  auto y = ReadWrite(use_dev);
173  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= c;);
174  return *this;
175 }
176 
178 {
179  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
180 
181  const bool use_dev = UseDevice() || v.UseDevice();
182  const int N = size;
183  auto y = ReadWrite(use_dev);
184  auto x = v.Read(use_dev);
185  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= x[i];);
186  return *this;
187 }
188 
190 {
191  const bool use_dev = UseDevice();
192  const int N = size;
193  const double m = 1.0/c;
194  auto y = ReadWrite(use_dev);
195  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= m;);
196  return *this;
197 }
198 
200 {
201  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
202 
203  const bool use_dev = UseDevice() || v.UseDevice();
204  const int N = size;
205  auto y = ReadWrite(use_dev);
206  auto x = v.Read(use_dev);
207  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] /= x[i];);
208  return *this;
209 }
210 
212 {
213  const bool use_dev = UseDevice();
214  const int N = size;
215  auto y = ReadWrite(use_dev);
216  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= c;);
217  return *this;
218 }
219 
221 {
222  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
223 
224  const bool use_dev = UseDevice() || v.UseDevice();
225  const int N = size;
226  auto y = ReadWrite(use_dev);
227  auto x = v.Read(use_dev);
228  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= x[i];);
229  return *this;
230 }
231 
233 {
234  const bool use_dev = UseDevice();
235  const int N = size;
236  auto y = ReadWrite(use_dev);
237  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += c;);
238  return *this;
239 }
240 
242 {
243  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
244 
245  const bool use_dev = UseDevice() || v.UseDevice();
246  const int N = size;
247  auto y = ReadWrite(use_dev);
248  auto x = v.Read(use_dev);
249  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += x[i];);
250  return *this;
251 }
252 
253 Vector &Vector::Add(const double a, const Vector &Va)
254 {
255  MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
256 
257  if (a != 0.0)
258  {
259  const int N = size;
260  const bool use_dev = UseDevice() || Va.UseDevice();
261  auto y = ReadWrite(use_dev);
262  auto x = Va.Read(use_dev);
263  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += a * x[i];);
264  }
265  return *this;
266 }
267 
268 Vector &Vector::Set(const double a, const Vector &Va)
269 {
270  MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
271 
272  const bool use_dev = UseDevice() || Va.UseDevice();
273  const int N = size;
274  auto x = Va.Read(use_dev);
275  auto y = Write(use_dev);
276  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = a * x[i];);
277  return *this;
278 }
279 
280 void Vector::SetVector(const Vector &v, int offset)
281 {
282  MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
283 
284  const int vs = v.Size();
285  const double *vp = v.data;
286  double *p = data + offset;
287  for (int i = 0; i < vs; i++)
288  {
289  p[i] = vp[i];
290  }
291 }
292 
293 void Vector::AddSubVector(const Vector &v, int offset)
294 {
295  MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
296 
297  const int vs = v.Size();
298  const double *vp = v.data;
299  double *p = data + offset;
300  for (int i = 0; i < vs; i++)
301  {
302  p[i] += vp[i];
303  }
304 }
305 
307 {
308  const bool use_dev = UseDevice();
309  const int N = size;
310  auto y = ReadWrite(use_dev);
311  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = -y[i];);
312 }
313 
314 void add(const Vector &v1, const Vector &v2, Vector &v)
315 {
316  MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
317  "incompatible Vectors!");
318 
319 #if !defined(MFEM_USE_LEGACY_OPENMP)
320  const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
321  const int N = v.size;
322  // Note: get read access first, in case v is the same as v1/v2.
323  auto x1 = v1.Read(use_dev);
324  auto x2 = v2.Read(use_dev);
325  auto y = v.Write(use_dev);
326  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = x1[i] + x2[i];);
327 #else
328  #pragma omp parallel for
329  for (int i = 0; i < v.size; i++)
330  {
331  v.data[i] = v1.data[i] + v2.data[i];
332  }
333 #endif
334 }
335 
336 void add(const Vector &v1, double alpha, const Vector &v2, Vector &v)
337 {
338  MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
339  "incompatible Vectors!");
340 
341  if (alpha == 0.0)
342  {
343  v = v1;
344  }
345  else if (alpha == 1.0)
346  {
347  add(v1, v2, v);
348  }
349  else
350  {
351 #if !defined(MFEM_USE_LEGACY_OPENMP)
352  const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
353  const int N = v.size;
354  // Note: get read access first, in case v is the same as v1/v2.
355  auto d_x = v1.Read(use_dev);
356  auto d_y = v2.Read(use_dev);
357  auto d_z = v.Write(use_dev);
358  MFEM_FORALL_SWITCH(use_dev, i, N, d_z[i] = d_x[i] + alpha * d_y[i];);
359 #else
360  const double *v1p = v1.data, *v2p = v2.data;
361  double *vp = v.data;
362  const int s = v.size;
363  #pragma omp parallel for
364  for (int i = 0; i < s; i++)
365  {
366  vp[i] = v1p[i] + alpha*v2p[i];
367  }
368 #endif
369  }
370 }
371 
372 void add(const double a, const Vector &x, const Vector &y, Vector &z)
373 {
374  MFEM_ASSERT(x.size == y.size && x.size == z.size,
375  "incompatible Vectors!");
376 
377  if (a == 0.0)
378  {
379  z = 0.0;
380  }
381  else if (a == 1.0)
382  {
383  add(x, y, z);
384  }
385  else
386  {
387 #if !defined(MFEM_USE_LEGACY_OPENMP)
388  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
389  const int N = x.size;
390  // Note: get read access first, in case z is the same as x/y.
391  auto xd = x.Read(use_dev);
392  auto yd = y.Read(use_dev);
393  auto zd = z.Write(use_dev);
394  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] + yd[i]););
395 #else
396  const double *xp = x.data;
397  const double *yp = y.data;
398  double *zp = z.data;
399  const int s = x.size;
400  #pragma omp parallel for
401  for (int i = 0; i < s; i++)
402  {
403  zp[i] = a * (xp[i] + yp[i]);
404  }
405 #endif
406  }
407 }
408 
409 void add(const double a, const Vector &x,
410  const double b, const Vector &y, Vector &z)
411 {
412  MFEM_ASSERT(x.size == y.size && x.size == z.size,
413  "incompatible Vectors!");
414 
415  if (a == 0.0)
416  {
417  z.Set(b, y);
418  }
419  else if (b == 0.0)
420  {
421  z.Set(a, x);
422  }
423 #if 0
424  else if (a == 1.0)
425  {
426  add(x, b, y, z);
427  }
428  else if (b == 1.0)
429  {
430  add(y, a, x, z);
431  }
432  else if (a == b)
433  {
434  add(a, x, y, z);
435  }
436 #endif
437  else
438  {
439 #if !defined(MFEM_USE_LEGACY_OPENMP)
440  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
441  const int N = x.size;
442  // Note: get read access first, in case z is the same as x/y.
443  auto xd = x.Read(use_dev);
444  auto yd = y.Read(use_dev);
445  auto zd = z.Write(use_dev);
446  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * xd[i] + b * yd[i];);
447 #else
448  const double *xp = x.data;
449  const double *yp = y.data;
450  double *zp = z.data;
451  const int s = x.size;
452  #pragma omp parallel for
453  for (int i = 0; i < s; i++)
454  {
455  zp[i] = a * xp[i] + b * yp[i];
456  }
457 #endif
458  }
459 }
460 
461 void subtract(const Vector &x, const Vector &y, Vector &z)
462 {
463  MFEM_ASSERT(x.size == y.size && x.size == z.size,
464  "incompatible Vectors!");
465 
466 #if !defined(MFEM_USE_LEGACY_OPENMP)
467  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
468  const int N = x.size;
469  // Note: get read access first, in case z is the same as x/y.
470  auto xd = x.Read(use_dev);
471  auto yd = y.Read(use_dev);
472  auto zd = z.Write(use_dev);
473  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = xd[i] - yd[i];);
474 #else
475  const double *xp = x.data;
476  const double *yp = y.data;
477  double *zp = z.data;
478  const int s = x.size;
479  #pragma omp parallel for
480  for (int i = 0; i < s; i++)
481  {
482  zp[i] = xp[i] - yp[i];
483  }
484 #endif
485 }
486 
487 void subtract(const double a, const Vector &x, const Vector &y, Vector &z)
488 {
489  MFEM_ASSERT(x.size == y.size && x.size == z.size,
490  "incompatible Vectors!");
491 
492  if (a == 0.)
493  {
494  z = 0.;
495  }
496  else if (a == 1.)
497  {
498  subtract(x, y, z);
499  }
500  else
501  {
502 #if !defined(MFEM_USE_LEGACY_OPENMP)
503  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
504  const int N = x.size;
505  // Note: get read access first, in case z is the same as x/y.
506  auto xd = x.Read(use_dev);
507  auto yd = y.Read(use_dev);
508  auto zd = z.Write(use_dev);
509  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] - yd[i]););
510 #else
511  const double *xp = x.data;
512  const double *yp = y.data;
513  double *zp = z.data;
514  const int s = x.size;
515  #pragma omp parallel for
516  for (int i = 0; i < s; i++)
517  {
518  zp[i] = a * (xp[i] - yp[i]);
519  }
520 #endif
521  }
522 }
523 
524 void Vector::median(const Vector &lo, const Vector &hi)
525 {
526  MFEM_ASSERT(size == lo.size && size == hi.size,
527  "incompatible Vectors!");
528 
529  const bool use_dev = UseDevice() || lo.UseDevice() || hi.UseDevice();
530  const int N = size;
531  // Note: get read access first, in case *this is the same as lo/hi.
532  auto l = lo.Read(use_dev);
533  auto h = hi.Read(use_dev);
534  auto m = Write(use_dev);
535  MFEM_FORALL_SWITCH(use_dev, i, N,
536  {
537  if (m[i] < l[i])
538  {
539  m[i] = l[i];
540  }
541  else if (m[i] > h[i])
542  {
543  m[i] = h[i];
544  }
545  });
546 }
547 
548 void Vector::GetSubVector(const Array<int> &dofs, Vector &elemvect) const
549 {
550  const int n = dofs.Size();
551  elemvect.SetSize(n);
552  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
553  auto d_y = elemvect.Write(use_dev);
554  auto d_X = Read(use_dev);
555  auto d_dofs = dofs.Read(use_dev);
556  MFEM_FORALL_SWITCH(use_dev, i, n,
557  {
558  const int dof_i = d_dofs[i];
559  d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
560  });
561 }
562 
563 void Vector::GetSubVector(const Array<int> &dofs, double *elem_data) const
564 {
566  const int n = dofs.Size();
567  for (int i = 0; i < n; i++)
568  {
569  const int j = dofs[i];
570  elem_data[i] = (j >= 0) ? data[j] : -data[-1-j];
571  }
572 }
573 
574 void Vector::SetSubVector(const Array<int> &dofs, const double value)
575 {
576  const bool use_dev = dofs.UseDevice();
577  const int n = dofs.Size();
578  // Use read+write access for *this - we only modify some of its entries
579  auto d_X = ReadWrite(use_dev);
580  auto d_dofs = dofs.Read(use_dev);
581  MFEM_FORALL_SWITCH(use_dev, i, n,
582  {
583  const int j = d_dofs[i];
584  if (j >= 0)
585  {
586  d_X[j] = value;
587  }
588  else
589  {
590  d_X[-1-j] = -value;
591  }
592  });
593 }
594 
595 void Vector::SetSubVector(const Array<int> &dofs, const Vector &elemvect)
596 {
597  MFEM_ASSERT(dofs.Size() <= elemvect.Size(),
598  "Size mismatch: length of dofs is " << dofs.Size()
599  << ", length of elemvect is " << elemvect.Size());
600 
601  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
602  const int n = dofs.Size();
603  // Use read+write access for X - we only modify some of its entries
604  auto d_X = ReadWrite(use_dev);
605  auto d_y = elemvect.Read(use_dev);
606  auto d_dofs = dofs.Read(use_dev);
607  MFEM_FORALL_SWITCH(use_dev, i, n,
608  {
609  const int dof_i = d_dofs[i];
610  if (dof_i >= 0)
611  {
612  d_X[dof_i] = d_y[i];
613  }
614  else
615  {
616  d_X[-1-dof_i] = -d_y[i];
617  }
618  });
619 }
620 
621 void Vector::SetSubVector(const Array<int> &dofs, double *elem_data)
622 {
623  // Use read+write access because we overwrite only part of the data.
625  const int n = dofs.Size();
626  for (int i = 0; i < n; i++)
627  {
628  const int j= dofs[i];
629  if (j >= 0)
630  {
631  operator()(j) = elem_data[i];
632  }
633  else
634  {
635  operator()(-1-j) = -elem_data[i];
636  }
637  }
638 }
639 
640 void Vector::AddElementVector(const Array<int> &dofs, const Vector &elemvect)
641 {
642  MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "
643  "length of dofs is " << dofs.Size() <<
644  ", length of elemvect is " << elemvect.Size());
645 
646  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
647  const int n = dofs.Size();
648  auto d_y = elemvect.Read(use_dev);
649  auto d_X = ReadWrite(use_dev);
650  auto d_dofs = dofs.Read(use_dev);
651  MFEM_FORALL_SWITCH(use_dev, i, n,
652  {
653  const int j = d_dofs[i];
654  if (j >= 0)
655  {
656  d_X[j] += d_y[i];
657  }
658  else
659  {
660  d_X[-1-j] -= d_y[i];
661  }
662  });
663 }
664 
665 void Vector::AddElementVector(const Array<int> &dofs, double *elem_data)
666 {
668  const int n = dofs.Size();
669  for (int i = 0; i < n; i++)
670  {
671  const int j = dofs[i];
672  if (j >= 0)
673  {
674  operator()(j) += elem_data[i];
675  }
676  else
677  {
678  operator()(-1-j) -= elem_data[i];
679  }
680  }
681 }
682 
683 void Vector::AddElementVector(const Array<int> &dofs, const double a,
684  const Vector &elemvect)
685 {
686  MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "
687  "length of dofs is " << dofs.Size() <<
688  ", length of elemvect is " << elemvect.Size());
689 
690  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
691  const int n = dofs.Size();
692  auto d_y = ReadWrite(use_dev);
693  auto d_x = elemvect.Read(use_dev);
694  auto d_dofs = dofs.Read(use_dev);
695  MFEM_FORALL_SWITCH(use_dev, i, n,
696  {
697  const int j = d_dofs[i];
698  if (j >= 0)
699  {
700  d_y[j] += a * d_x[i];
701  }
702  else
703  {
704  d_y[-1-j] -= a * d_x[i];
705  }
706  });
707 }
708 
709 void Vector::SetSubVectorComplement(const Array<int> &dofs, const double val)
710 {
711  const bool use_dev = UseDevice() || dofs.UseDevice();
712  const int n = dofs.Size();
713  const int N = size;
714  Vector dofs_vals(n, use_dev ?
717  auto d_data = ReadWrite(use_dev);
718  auto d_dofs_vals = dofs_vals.Write(use_dev);
719  auto d_dofs = dofs.Read(use_dev);
720  MFEM_FORALL_SWITCH(use_dev, i, n, d_dofs_vals[i] = d_data[d_dofs[i]];);
721  MFEM_FORALL_SWITCH(use_dev, i, N, d_data[i] = val;);
722  MFEM_FORALL_SWITCH(use_dev, i, n, d_data[d_dofs[i]] = d_dofs_vals[i];);
723 }
724 
725 void Vector::Print(std::ostream &os, int width) const
726 {
727  if (!size) { return; }
729  for (int i = 0; 1; )
730  {
731  os << ZeroSubnormal(data[i]);
732  i++;
733  if (i == size)
734  {
735  break;
736  }
737  if ( i % width == 0 )
738  {
739  os << '\n';
740  }
741  else
742  {
743  os << ' ';
744  }
745  }
746  os << '\n';
747 }
748 
749 #ifdef MFEM_USE_ADIOS2
751  const std::string& variable_name) const
752 {
753  if (!size) { return; }
755  os.engine.Put(variable_name, &data[0] );
756 }
757 #endif
758 
759 void Vector::Print_HYPRE(std::ostream &os) const
760 {
761  int i;
762  std::ios::fmtflags old_fmt = os.flags();
763  os.setf(std::ios::scientific);
764  std::streamsize old_prec = os.precision(14);
765 
766  os << size << '\n'; // number of rows
767 
769  for (i = 0; i < size; i++)
770  {
771  os << ZeroSubnormal(data[i]) << '\n';
772  }
773 
774  os.precision(old_prec);
775  os.flags(old_fmt);
776 }
777 
778 void Vector::PrintHash(std::ostream &os) const
779 {
780  os << "size: " << size << '\n';
781  HashFunction hf;
782  hf.AppendDoubles(HostRead(), size);
783  os << "hash: " << hf.GetHash() << '\n';
784 }
785 
786 void Vector::Randomize(int seed)
787 {
788  const double max = (double)(RAND_MAX) + 1.;
789 
790  if (seed == 0)
791  {
792  seed = (int)time(0);
793  }
794 
795  srand((unsigned)seed);
796 
797  HostWrite();
798  for (int i = 0; i < size; i++)
799  {
800  data[i] = std::abs(rand()/max);
801  }
802 }
803 
804 double Vector::Norml2() const
805 {
806  // Scale entries of Vector on the fly, using algorithms from
807  // std::hypot() and LAPACK's drm2. This scaling ensures that the
808  // argument of each call to std::pow is <= 1 to avoid overflow.
809  if (0 == size)
810  {
811  return 0.0;
812  } // end if 0 == size
813 
815  if (1 == size)
816  {
817  return std::abs(data[0]);
818  } // end if 1 == size
819  return kernels::Norml2(size, (const double*) data);
820 }
821 
822 double Vector::Normlinf() const
823 {
824  HostRead();
825  double max = 0.0;
826  for (int i = 0; i < size; i++)
827  {
828  max = std::max(std::abs(data[i]), max);
829  }
830  return max;
831 }
832 
833 double Vector::Norml1() const
834 {
835  HostRead();
836  double sum = 0.0;
837  for (int i = 0; i < size; i++)
838  {
839  sum += std::abs(data[i]);
840  }
841  return sum;
842 }
843 
844 double Vector::Normlp(double p) const
845 {
846  MFEM_ASSERT(p > 0.0, "Vector::Normlp");
847 
848  if (p == 1.0)
849  {
850  return Norml1();
851  }
852  if (p == 2.0)
853  {
854  return Norml2();
855  }
856  if (p < infinity())
857  {
858  // Scale entries of Vector on the fly, using algorithms from
859  // std::hypot() and LAPACK's drm2. This scaling ensures that the
860  // argument of each call to std::pow is <= 1 to avoid overflow.
861  if (0 == size)
862  {
863  return 0.0;
864  } // end if 0 == size
865 
866  if (1 == size)
867  {
868  return std::abs(data[0]);
869  } // end if 1 == size
870 
871  double scale = 0.0;
872  double sum = 0.0;
873 
874  for (int i = 0; i < size; i++)
875  {
876  if (data[i] != 0.0)
877  {
878  const double absdata = std::abs(data[i]);
879  if (scale <= absdata)
880  {
881  sum = 1.0 + sum * std::pow(scale / absdata, p);
882  scale = absdata;
883  continue;
884  } // end if scale <= absdata
885  sum += std::pow(absdata / scale, p); // else scale > absdata
886  } // end if data[i] != 0
887  }
888  return scale * std::pow(sum, 1.0/p);
889  } // end if p < infinity()
890 
891  return Normlinf(); // else p >= infinity()
892 }
893 
894 double Vector::Max() const
895 {
896  if (size == 0) { return -infinity(); }
897 
898  HostRead();
899  double max = data[0];
900 
901  for (int i = 1; i < size; i++)
902  {
903  if (data[i] > max)
904  {
905  max = data[i];
906  }
907  }
908 
909  return max;
910 }
911 
912 double Vector::Sum() const
913 {
914  double sum = 0.0;
915 
916  const double *h_data = this->HostRead();
917  for (int i = 0; i < size; i++)
918  {
919  sum += h_data[i];
920  }
921 
922  return sum;
923 }
924 
925 #ifdef MFEM_USE_CUDA
926 static __global__ void cuKernelMin(const int N, double *gdsr, const double *x)
927 {
928  __shared__ double s_min[MFEM_CUDA_BLOCKS];
929  const int n = blockDim.x*blockIdx.x + threadIdx.x;
930  if (n>=N) { return; }
931  const int bid = blockIdx.x;
932  const int tid = threadIdx.x;
933  const int bbd = bid*blockDim.x;
934  const int rid = bbd+tid;
935  s_min[tid] = x[n];
936  for (int workers=blockDim.x>>1; workers>0; workers>>=1)
937  {
938  __syncthreads();
939  if (tid >= workers) { continue; }
940  if (rid >= N) { continue; }
941  const int dualTid = tid + workers;
942  if (dualTid >= N) { continue; }
943  const int rdd = bbd+dualTid;
944  if (rdd >= N) { continue; }
945  if (dualTid >= blockDim.x) { continue; }
946  s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
947  }
948  if (tid==0) { gdsr[bid] = s_min[0]; }
949 }
950 
951 static Array<double> cuda_reduce_buf;
952 
953 static double cuVectorMin(const int N, const double *X)
954 {
955  const int tpb = MFEM_CUDA_BLOCKS;
956  const int blockSize = MFEM_CUDA_BLOCKS;
957  const int gridSize = (N+blockSize-1)/blockSize;
958  const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
959  cuda_reduce_buf.SetSize(min_sz);
960  Memory<double> &buf = cuda_reduce_buf.GetMemory();
961  double *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
962  cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
963  MFEM_GPU_CHECK(cudaGetLastError());
964  const double *h_min = buf.Read(MemoryClass::HOST, min_sz);
966  for (int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
967  return min;
968 }
969 
970 static __global__ void cuKernelDot(const int N, double *gdsr,
971  const double *x, const double *y)
972 {
973  __shared__ double s_dot[MFEM_CUDA_BLOCKS];
974  const int n = blockDim.x*blockIdx.x + threadIdx.x;
975  if (n>=N) { return; }
976  const int bid = blockIdx.x;
977  const int tid = threadIdx.x;
978  const int bbd = bid*blockDim.x;
979  const int rid = bbd+tid;
980  s_dot[tid] = x[n] * y[n];
981  for (int workers=blockDim.x>>1; workers>0; workers>>=1)
982  {
983  __syncthreads();
984  if (tid >= workers) { continue; }
985  if (rid >= N) { continue; }
986  const int dualTid = tid + workers;
987  if (dualTid >= N) { continue; }
988  const int rdd = bbd+dualTid;
989  if (rdd >= N) { continue; }
990  if (dualTid >= blockDim.x) { continue; }
991  s_dot[tid] += s_dot[dualTid];
992  }
993  if (tid==0) { gdsr[bid] = s_dot[0]; }
994 }
995 
996 static double cuVectorDot(const int N, const double *X, const double *Y)
997 {
998  const int tpb = MFEM_CUDA_BLOCKS;
999  const int blockSize = MFEM_CUDA_BLOCKS;
1000  const int gridSize = (N+blockSize-1)/blockSize;
1001  const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
1002  cuda_reduce_buf.SetSize(dot_sz, Device::GetDeviceMemoryType());
1003  Memory<double> &buf = cuda_reduce_buf.GetMemory();
1004  double *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
1005  cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
1006  MFEM_GPU_CHECK(cudaGetLastError());
1007  const double *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
1008  double dot = 0.0;
1009  for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1010  return dot;
1011 }
1012 #endif // MFEM_USE_CUDA
1013 
1014 #ifdef MFEM_USE_HIP
1015 static __global__ void hipKernelMin(const int N, double *gdsr, const double *x)
1016 {
1017  __shared__ double s_min[MFEM_HIP_BLOCKS];
1018  const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1019  if (n>=N) { return; }
1020  const int bid = hipBlockIdx_x;
1021  const int tid = hipThreadIdx_x;
1022  const int bbd = bid*hipBlockDim_x;
1023  const int rid = bbd+tid;
1024  s_min[tid] = x[n];
1025  for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1026  {
1027  __syncthreads();
1028  if (tid >= workers) { continue; }
1029  if (rid >= N) { continue; }
1030  const int dualTid = tid + workers;
1031  if (dualTid >= N) { continue; }
1032  const int rdd = bbd+dualTid;
1033  if (rdd >= N) { continue; }
1034  if (dualTid >= hipBlockDim_x) { continue; }
1035  s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
1036  }
1037  if (tid==0) { gdsr[bid] = s_min[0]; }
1038 }
1039 
1040 static Array<double> cuda_reduce_buf;
1041 
1042 static double hipVectorMin(const int N, const double *X)
1043 {
1044  const int tpb = MFEM_HIP_BLOCKS;
1045  const int blockSize = MFEM_HIP_BLOCKS;
1046  const int gridSize = (N+blockSize-1)/blockSize;
1047  const int min_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1048  cuda_reduce_buf.SetSize(min_sz);
1049  Memory<double> &buf = cuda_reduce_buf.GetMemory();
1050  double *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
1051  hipLaunchKernelGGL(hipKernelMin,gridSize,blockSize,0,0,N,d_min,X);
1052  MFEM_GPU_CHECK(hipGetLastError());
1053  const double *h_min = buf.Read(MemoryClass::HOST, min_sz);
1055  for (int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
1056  return min;
1057 }
1058 
1059 static __global__ void hipKernelDot(const int N, double *gdsr,
1060  const double *x, const double *y)
1061 {
1062  __shared__ double s_dot[MFEM_HIP_BLOCKS];
1063  const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1064  if (n>=N) { return; }
1065  const int bid = hipBlockIdx_x;
1066  const int tid = hipThreadIdx_x;
1067  const int bbd = bid*hipBlockDim_x;
1068  const int rid = bbd+tid;
1069  s_dot[tid] = x[n] * y[n];
1070  for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1071  {
1072  __syncthreads();
1073  if (tid >= workers) { continue; }
1074  if (rid >= N) { continue; }
1075  const int dualTid = tid + workers;
1076  if (dualTid >= N) { continue; }
1077  const int rdd = bbd+dualTid;
1078  if (rdd >= N) { continue; }
1079  if (dualTid >= hipBlockDim_x) { continue; }
1080  s_dot[tid] += s_dot[dualTid];
1081  }
1082  if (tid==0) { gdsr[bid] = s_dot[0]; }
1083 }
1084 
1085 static double hipVectorDot(const int N, const double *X, const double *Y)
1086 {
1087  const int tpb = MFEM_HIP_BLOCKS;
1088  const int blockSize = MFEM_HIP_BLOCKS;
1089  const int gridSize = (N+blockSize-1)/blockSize;
1090  const int dot_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1091  cuda_reduce_buf.SetSize(dot_sz);
1092  Memory<double> &buf = cuda_reduce_buf.GetMemory();
1093  double *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
1094  hipLaunchKernelGGL(hipKernelDot,gridSize,blockSize,0,0,N,d_dot,X,Y);
1095  MFEM_GPU_CHECK(hipGetLastError());
1096  const double *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
1097  double dot = 0.0;
1098  for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1099  return dot;
1100 }
1101 #endif // MFEM_USE_HIP
1102 
1103 double Vector::operator*(const Vector &v) const
1104 {
1105  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
1106  if (size == 0) { return 0.0; }
1107 
1108  const bool use_dev = UseDevice() || v.UseDevice();
1109 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP) || defined(MFEM_USE_OPENMP)
1110  auto m_data = Read(use_dev);
1111 #else
1112  Read(use_dev);
1113 #endif
1114  auto v_data = v.Read(use_dev);
1115 
1116  if (!use_dev) { goto vector_dot_cpu; }
1117 
1118 #ifdef MFEM_USE_OCCA
1119  if (DeviceCanUseOcca())
1120  {
1121  return occa::linalg::dot<double,double,double>(
1123  }
1124 #endif
1125 
1126 #ifdef MFEM_USE_CUDA
1128  {
1129  return cuVectorDot(size, m_data, v_data);
1130  }
1131 #endif
1132 
1133 #ifdef MFEM_USE_HIP
1135  {
1136  return hipVectorDot(size, m_data, v_data);
1137  }
1138 #endif
1139 
1140 #ifdef MFEM_USE_OPENMP
1142  {
1143 #define MFEM_USE_OPENMP_DETERMINISTIC_DOT
1144 #ifdef MFEM_USE_OPENMP_DETERMINISTIC_DOT
1145  // By default, use a deterministic way of computing the dot product
1146  static Vector th_dot;
1147  #pragma omp parallel
1148  {
1149  const int nt = omp_get_num_threads();
1150  #pragma omp master
1151  th_dot.SetSize(nt);
1152  const int tid = omp_get_thread_num();
1153  const int stride = (size + nt - 1)/nt;
1154  const int start = tid*stride;
1155  const int stop = std::min(start + stride, size);
1156  double my_dot = 0.0;
1157  for (int i = start; i < stop; i++)
1158  {
1159  my_dot += m_data[i] * v_data[i];
1160  }
1161  #pragma omp barrier
1162  th_dot(tid) = my_dot;
1163  }
1164  return th_dot.Sum();
1165 #else
1166  // The standard way of computing the dot product is non-deterministic
1167  double prod = 0.0;
1168  #pragma omp parallel for reduction(+:prod)
1169  for (int i = 0; i < size; i++)
1170  {
1171  prod += m_data[i] * v_data[i];
1172  }
1173  return prod;
1174 #endif // MFEM_USE_OPENMP_DETERMINISTIC_DOT
1175  }
1176 #endif // MFEM_USE_OPENMP
1178  {
1179  const int N = size;
1180  auto v_data_ = v.Read();
1181  auto m_data_ = Read();
1182  Vector dot(1);
1183  dot.UseDevice(true);
1184  auto d_dot = dot.Write();
1185  dot = 0.0;
1186  MFEM_FORALL(i, N, d_dot[0] += m_data_[i] * v_data_[i];);
1187  dot.HostReadWrite();
1188  return dot[0];
1189  }
1190 vector_dot_cpu:
1191  return operator*(v_data);
1192 }
1193 
1194 double Vector::Min() const
1195 {
1196  if (size == 0) { return infinity(); }
1197 
1198  const bool use_dev = UseDevice();
1199  auto m_data = Read(use_dev);
1200 
1201  if (!use_dev) { goto vector_min_cpu; }
1202 
1203 #ifdef MFEM_USE_OCCA
1204  if (DeviceCanUseOcca())
1205  {
1206  return occa::linalg::min<double,double>(OccaMemoryRead(data, size));
1207  }
1208 #endif
1209 
1210 #ifdef MFEM_USE_CUDA
1212  {
1213  return cuVectorMin(size, m_data);
1214  }
1215 #endif
1216 
1217 #ifdef MFEM_USE_HIP
1219  {
1220  return hipVectorMin(size, m_data);
1221  }
1222 #endif
1223 
1224 #ifdef MFEM_USE_OPENMP
1226  {
1227  double minimum = m_data[0];
1228  #pragma omp parallel for reduction(min:minimum)
1229  for (int i = 0; i < size; i++)
1230  {
1231  minimum = std::min(minimum, m_data[i]);
1232  }
1233  return minimum;
1234  }
1235 #endif
1236 
1238  {
1239  const int N = size;
1240  auto m_data_ = Read();
1241  Vector min(1);
1242  min = infinity();
1243  min.UseDevice(true);
1244  auto d_min = min.ReadWrite();
1245  MFEM_FORALL(i, N, d_min[0] = (d_min[0]<m_data_[i])?d_min[0]:m_data_[i];);
1246  min.HostReadWrite();
1247  return min[0];
1248  }
1249 
1250 vector_min_cpu:
1251  double minimum = data[0];
1252  for (int i = 1; i < size; i++)
1253  {
1254  if (m_data[i] < minimum)
1255  {
1256  minimum = m_data[i];
1257  }
1258  }
1259  return minimum;
1260 }
1261 
1262 }
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:307
Hash function for data sequences.
Definition: hash.hpp:455
void SetSubVector(const Array< int > &dofs, const double value)
Set the entries listed in dofs to the given value.
Definition: vector.cpp:574
void SetVector(const Vector &v, int offset)
Definition: vector.cpp:280
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:120
Memory< double > data
Definition: vector.hpp:64
double & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Definition: vector.cpp:101
Device memory; using CUDA or HIP *Malloc and *Free.
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
Definition: vector.cpp:759
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:512
virtual const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:452
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition: vector.hpp:117
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
Definition: vector.cpp:725
double & operator()(int i)
Access Vector entries using () for 0-based indexing.
Definition: vector.hpp:598
Biwise-OR of all HIP backends.
Definition: device.hpp:90
int Size() const
Returns the size of the vector.
Definition: vector.hpp:199
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
Definition: hash.cpp:60
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:460
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:448
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
Definition: vector.cpp:548
void Randomize(int seed=0)
Set random values in the vector.
Definition: vector.cpp:786
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition: vector.cpp:314
HashFunction & AppendDoubles(const double *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
Definition: hash.hpp:508
T ZeroSubnormal(T val)
Definition: vector.hpp:476
void AddSubVector(const Vector &v, int offset)
Definition: vector.cpp:293
Vector & operator=(const double *v)
Copy Size() entries from v.
Definition: vector.cpp:124
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
Definition: vector.cpp:57
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
bool Empty() const
Return true if the Memory object is empty, see Reset().
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:456
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:273
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
double Sum() const
Return the sum of the vector entries.
Definition: vector.cpp:912
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Definition: vector.cpp:524
Biwise-OR of all OpenMP backends.
Definition: device.hpp:92
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
Definition: array.hpp:126
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition: kernels.hpp:133
double Normlp(double p) const
Returns the l_p norm of the vector.
Definition: vector.cpp:844
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Definition: vector.cpp:640
Vector & operator/=(double c)
Definition: vector.cpp:189
Biwise-OR of all CUDA backends.
Definition: device.hpp:88
void SetSubVectorComplement(const Array< int > &dofs, const double val)
Set all vector entries NOT in the dofs Array to the given val.
Definition: vector.cpp:709
Vector & operator*=(double c)
Definition: vector.cpp:168
Vector & operator+=(double c)
Definition: vector.cpp:232
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition: device.hpp:264
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition: vector.cpp:461
double Min() const
Returns the minimal element of the vector.
Definition: vector.cpp:1194
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:684
void PrintHash(std::ostream &out) const
Print the Vector size and hash of its data.
Definition: vector.cpp:778
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:258
double Max() const
Returns the maximal element of the vector.
Definition: vector.cpp:894
double Norml1() const
Returns the l_1 norm of the vector.
Definition: vector.cpp:833
Vector & Set(const double a, const Vector &x)
(*this) = a * x
Definition: vector.cpp:268
double a
Definition: lissajous.cpp:41
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:464
Vector & Add(const double a, const Vector &Va)
(*this) += a * Va
Definition: vector.cpp:253
Host memory; using new[] and delete[].
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
int dim
Definition: ex24.cpp:53
Vector & operator-=(double c)
Definition: vector.cpp:211
double infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
Definition: vector.hpp:46
double Norml2() const
Returns the l2 norm of the vector.
Definition: vector.cpp:804
int Size() const
Return the logical size of the array.
Definition: array.hpp:141
const double alpha
Definition: ex15.cpp:369
virtual bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
Definition: vector.hpp:120
Vector data type.
Definition: vector.hpp:60
RefCoord s[3]
double Normlinf() const
Returns the l_infinity norm of the vector.
Definition: vector.cpp:822
virtual double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:468
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
double operator*(const double *) const
Dot product with a double * array.
Definition: vector.cpp:111
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the "device" code-path (using separate host/device memory pools and host <-> device transfers) without any GPU hardware. As &#39;DEBUG&#39; is sometimes used as a macro, _DEVICE has been added to avoid conflicts.
Definition: device.hpp:75
void Neg()
(*this) = -(*this)
Definition: vector.cpp:306