MFEM  v4.2.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
vector.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 // Implementation of data type vector
13 
14 #include "kernels.hpp"
15 #include "vector.hpp"
16 #include "../general/forall.hpp"
17 
18 #if defined(MFEM_USE_SUNDIALS)
19 #include "sundials.hpp"
20 #if defined(MFEM_USE_MPI)
21 #include <nvector/nvector_parallel.h>
22 #endif
23 #endif
24 
25 #include <iostream>
26 #include <iomanip>
27 #include <cmath>
28 #include <cstdlib>
29 #include <ctime>
30 #include <limits>
31 
32 namespace mfem
33 {
34 
36 {
37  const int s = v.Size();
38  if (s > 0)
39  {
40  MFEM_ASSERT(!v.data.Empty(), "invalid source vector");
41  size = s;
42  data.New(s, v.data.GetMemoryType());
43  data.CopyFrom(v.data, s);
44  }
45  else
46  {
47  size = 0;
48  data.Reset();
49  }
50  UseDevice(v.UseDevice());
51 }
52 
53 void Vector::Load(std::istream **in, int np, int *dim)
54 {
55  int i, j, s;
56 
57  s = 0;
58  for (i = 0; i < np; i++)
59  {
60  s += dim[i];
61  }
62 
63  SetSize(s);
64 
65  int p = 0;
66  for (i = 0; i < np; i++)
67  {
68  for (j = 0; j < dim[i]; j++)
69  {
70  *in[i] >> data[p++];
71  // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
72  // so we reset the failbit here.
73  if (!*in[i] && errno == ERANGE)
74  {
75  in[i]->clear();
76  }
77  }
78  }
79 }
80 
81 void Vector::Load(std::istream &in, int Size)
82 {
83  SetSize(Size);
84 
85  for (int i = 0; i < size; i++)
86  {
87  in >> data[i];
88  // Clang's libc++ sets the failbit when (correctly) parsing subnormals,
89  // so we reset the failbit here.
90  if (!in && errno == ERANGE)
91  {
92  in.clear();
93  }
94  }
95 }
96 
97 double &Vector::Elem(int i)
98 {
99  return operator()(i);
100 }
101 
102 const double &Vector::Elem(int i) const
103 {
104  return operator()(i);
105 }
106 
107 double Vector::operator*(const double *v) const
108 {
109  double dot = 0.0;
110 #ifdef MFEM_USE_LEGACY_OPENMP
111  #pragma omp parallel for reduction(+:dot)
112 #endif
113  for (int i = 0; i < size; i++)
114  {
115  dot += data[i] * v[i];
116  }
117  return dot;
118 }
119 
120 Vector &Vector::operator=(const double *v)
121 {
122  data.CopyFromHost(v, size);
123  return *this;
124 }
125 
127 {
128 #if 0
129  SetSize(v.Size(), v.data.GetMemoryType());
130  data.CopyFrom(v.data, v.Size());
131  UseDevice(v.UseDevice());
132 #else
133  SetSize(v.Size());
134  const bool use_dev = UseDevice() || v.UseDevice();
135  v.UseDevice(use_dev);
136  // keep 'data' where it is, unless 'use_dev' is true
137  if (use_dev) { Write(); }
138  data.CopyFrom(v.data, v.Size());
139 #endif
140  return *this;
141 }
142 
143 Vector &Vector::operator=(double value)
144 {
145  const bool use_dev = UseDevice();
146  const int N = size;
147  auto y = Write(use_dev);
148  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = value;);
149  return *this;
150 }
151 
153 {
154  const bool use_dev = UseDevice();
155  const int N = size;
156  auto y = ReadWrite(use_dev);
157  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= c;);
158  return *this;
159 }
160 
162 {
163  const bool use_dev = UseDevice();
164  const int N = size;
165  const double m = 1.0/c;
166  auto y = ReadWrite(use_dev);
167  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= m;);
168  return *this;
169 }
170 
172 {
173  const bool use_dev = UseDevice();
174  const int N = size;
175  auto y = ReadWrite(use_dev);
176  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= c;);
177  return *this;
178 }
179 
181 {
182  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
183 
184  const bool use_dev = UseDevice() || v.UseDevice();
185  const int N = size;
186  auto y = ReadWrite(use_dev);
187  auto x = v.Read(use_dev);
188  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= x[i];);
189  return *this;
190 }
191 
193 {
194  const bool use_dev = UseDevice();
195  const int N = size;
196  auto y = ReadWrite(use_dev);
197  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += c;);
198  return *this;
199 }
200 
202 {
203  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
204 
205  const bool use_dev = UseDevice() || v.UseDevice();
206  const int N = size;
207  auto y = ReadWrite(use_dev);
208  auto x = v.Read(use_dev);
209  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += x[i];);
210  return *this;
211 }
212 
213 Vector &Vector::Add(const double a, const Vector &Va)
214 {
215  MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
216 
217  if (a != 0.0)
218  {
219  const int N = size;
220  const bool use_dev = UseDevice() || Va.UseDevice();
221  auto y = ReadWrite(use_dev);
222  auto x = Va.Read(use_dev);
223  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += a * x[i];);
224  }
225  return *this;
226 }
227 
228 Vector &Vector::Set(const double a, const Vector &Va)
229 {
230  MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
231 
232  const bool use_dev = UseDevice() || Va.UseDevice();
233  const int N = size;
234  auto x = Va.Read(use_dev);
235  auto y = Write(use_dev);
236  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = a * x[i];);
237  return *this;
238 }
239 
240 void Vector::SetVector(const Vector &v, int offset)
241 {
242  MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
243 
244  const int vs = v.Size();
245  const double *vp = v.data;
246  double *p = data + offset;
247  for (int i = 0; i < vs; i++)
248  {
249  p[i] = vp[i];
250  }
251 }
252 
254 {
255  const bool use_dev = UseDevice();
256  const int N = size;
257  auto y = ReadWrite(use_dev);
258  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = -y[i];);
259 }
260 
261 void add(const Vector &v1, const Vector &v2, Vector &v)
262 {
263  MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
264  "incompatible Vectors!");
265 
266 #if !defined(MFEM_USE_LEGACY_OPENMP)
267  const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
268  const int N = v.size;
269  // Note: get read access first, in case v is the same as v1/v2.
270  auto x1 = v1.Read(use_dev);
271  auto x2 = v2.Read(use_dev);
272  auto y = v.Write(use_dev);
273  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = x1[i] + x2[i];);
274 #else
275  #pragma omp parallel for
276  for (int i = 0; i < v.size; i++)
277  {
278  v.data[i] = v1.data[i] + v2.data[i];
279  }
280 #endif
281 }
282 
283 void add(const Vector &v1, double alpha, const Vector &v2, Vector &v)
284 {
285  MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
286  "incompatible Vectors!");
287 
288  if (alpha == 0.0)
289  {
290  v = v1;
291  }
292  else if (alpha == 1.0)
293  {
294  add(v1, v2, v);
295  }
296  else
297  {
298 #if !defined(MFEM_USE_LEGACY_OPENMP)
299  const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
300  const int N = v.size;
301  // Note: get read access first, in case v is the same as v1/v2.
302  auto d_x = v1.Read(use_dev);
303  auto d_y = v2.Read(use_dev);
304  auto d_z = v.Write(use_dev);
305  MFEM_FORALL_SWITCH(use_dev, i, N, d_z[i] = d_x[i] + alpha * d_y[i];);
306 #else
307  const double *v1p = v1.data, *v2p = v2.data;
308  double *vp = v.data;
309  const int s = v.size;
310  #pragma omp parallel for
311  for (int i = 0; i < s; i++)
312  {
313  vp[i] = v1p[i] + alpha*v2p[i];
314  }
315 #endif
316  }
317 }
318 
319 void add(const double a, const Vector &x, const Vector &y, Vector &z)
320 {
321  MFEM_ASSERT(x.size == y.size && x.size == z.size,
322  "incompatible Vectors!");
323 
324  if (a == 0.0)
325  {
326  z = 0.0;
327  }
328  else if (a == 1.0)
329  {
330  add(x, y, z);
331  }
332  else
333  {
334 #if !defined(MFEM_USE_LEGACY_OPENMP)
335  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
336  const int N = x.size;
337  // Note: get read access first, in case z is the same as x/y.
338  auto xd = x.Read(use_dev);
339  auto yd = y.Read(use_dev);
340  auto zd = z.Write(use_dev);
341  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] + yd[i]););
342 #else
343  const double *xp = x.data;
344  const double *yp = y.data;
345  double *zp = z.data;
346  const int s = x.size;
347  #pragma omp parallel for
348  for (int i = 0; i < s; i++)
349  {
350  zp[i] = a * (xp[i] + yp[i]);
351  }
352 #endif
353  }
354 }
355 
356 void add(const double a, const Vector &x,
357  const double b, const Vector &y, Vector &z)
358 {
359  MFEM_ASSERT(x.size == y.size && x.size == z.size,
360  "incompatible Vectors!");
361 
362  if (a == 0.0)
363  {
364  z.Set(b, y);
365  }
366  else if (b == 0.0)
367  {
368  z.Set(a, x);
369  }
370 #if 0
371  else if (a == 1.0)
372  {
373  add(x, b, y, z);
374  }
375  else if (b == 1.0)
376  {
377  add(y, a, x, z);
378  }
379  else if (a == b)
380  {
381  add(a, x, y, z);
382  }
383 #endif
384  else
385  {
386 #if !defined(MFEM_USE_LEGACY_OPENMP)
387  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
388  const int N = x.size;
389  // Note: get read access first, in case z is the same as x/y.
390  auto xd = x.Read(use_dev);
391  auto yd = y.Read(use_dev);
392  auto zd = z.Write(use_dev);
393  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * xd[i] + b * yd[i];);
394 #else
395  const double *xp = x.data;
396  const double *yp = y.data;
397  double *zp = z.data;
398  const int s = x.size;
399  #pragma omp parallel for
400  for (int i = 0; i < s; i++)
401  {
402  zp[i] = a * xp[i] + b * yp[i];
403  }
404 #endif
405  }
406 }
407 
408 void subtract(const Vector &x, const Vector &y, Vector &z)
409 {
410  MFEM_ASSERT(x.size == y.size && x.size == z.size,
411  "incompatible Vectors!");
412 
413 #if !defined(MFEM_USE_LEGACY_OPENMP)
414  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
415  const int N = x.size;
416  // Note: get read access first, in case z is the same as x/y.
417  auto xd = x.Read(use_dev);
418  auto yd = y.Read(use_dev);
419  auto zd = z.Write(use_dev);
420  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = xd[i] - yd[i];);
421 #else
422  const double *xp = x.data;
423  const double *yp = y.data;
424  double *zp = z.data;
425  const int s = x.size;
426  #pragma omp parallel for
427  for (int i = 0; i < s; i++)
428  {
429  zp[i] = xp[i] - yp[i];
430  }
431 #endif
432 }
433 
434 void subtract(const double a, const Vector &x, const Vector &y, Vector &z)
435 {
436  MFEM_ASSERT(x.size == y.size && x.size == z.size,
437  "incompatible Vectors!");
438 
439  if (a == 0.)
440  {
441  z = 0.;
442  }
443  else if (a == 1.)
444  {
445  subtract(x, y, z);
446  }
447  else
448  {
449 #if !defined(MFEM_USE_LEGACY_OPENMP)
450  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
451  const int N = x.size;
452  // Note: get read access first, in case z is the same as x/y.
453  auto xd = x.Read(use_dev);
454  auto yd = y.Read(use_dev);
455  auto zd = z.Write(use_dev);
456  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] - yd[i]););
457 #else
458  const double *xp = x.data;
459  const double *yp = y.data;
460  double *zp = z.data;
461  const int s = x.size;
462  #pragma omp parallel for
463  for (int i = 0; i < s; i++)
464  {
465  zp[i] = a * (xp[i] - yp[i]);
466  }
467 #endif
468  }
469 }
470 
471 void Vector::median(const Vector &lo, const Vector &hi)
472 {
473  MFEM_ASSERT(size == lo.size && size == hi.size,
474  "incompatible Vectors!");
475 
476  const bool use_dev = UseDevice() || lo.UseDevice() || hi.UseDevice();
477  const int N = size;
478  // Note: get read access first, in case *this is the same as lo/hi.
479  auto l = lo.Read(use_dev);
480  auto h = hi.Read(use_dev);
481  auto m = Write(use_dev);
482  MFEM_FORALL_SWITCH(use_dev, i, N,
483  {
484  if (m[i] < l[i])
485  {
486  m[i] = l[i];
487  }
488  else if (m[i] > h[i])
489  {
490  m[i] = h[i];
491  }
492  });
493 }
494 
495 void Vector::GetSubVector(const Array<int> &dofs, Vector &elemvect) const
496 {
497  const int n = dofs.Size();
498  elemvect.SetSize(n);
499  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
500  auto d_y = elemvect.Write(use_dev);
501  auto d_X = Read(use_dev);
502  auto d_dofs = dofs.Read(use_dev);
503  MFEM_FORALL_SWITCH(use_dev, i, n,
504  {
505  const int dof_i = d_dofs[i];
506  d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
507  });
508 }
509 
510 void Vector::GetSubVector(const Array<int> &dofs, double *elem_data) const
511 {
513  const int n = dofs.Size();
514  for (int i = 0; i < n; i++)
515  {
516  const int j = dofs[i];
517  elem_data[i] = (j >= 0) ? data[j] : -data[-1-j];
518  }
519 }
520 
521 void Vector::SetSubVector(const Array<int> &dofs, const double value)
522 {
523  const bool use_dev = dofs.UseDevice();
524  const int n = dofs.Size();
525  // Use read+write access for *this - we only modify some of its entries
526  auto d_X = ReadWrite(use_dev);
527  auto d_dofs = dofs.Read(use_dev);
528  MFEM_FORALL_SWITCH(use_dev, i, n,
529  {
530  const int j = d_dofs[i];
531  if (j >= 0)
532  {
533  d_X[j] = value;
534  }
535  else
536  {
537  d_X[-1-j] = -value;
538  }
539  });
540 }
541 
542 void Vector::SetSubVector(const Array<int> &dofs, const Vector &elemvect)
543 {
544  MFEM_ASSERT(dofs.Size() == elemvect.Size(),
545  "Size mismatch: length of dofs is " << dofs.Size()
546  << ", length of elemvect is " << elemvect.Size());
547 
548  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
549  const int n = dofs.Size();
550  // Use read+write access for X - we only modify some of its entries
551  auto d_X = ReadWrite(use_dev);
552  auto d_y = elemvect.Read(use_dev);
553  auto d_dofs = dofs.Read(use_dev);
554  MFEM_FORALL_SWITCH(use_dev, i, n,
555  {
556  const int dof_i = d_dofs[i];
557  if (dof_i >= 0)
558  {
559  d_X[dof_i] = d_y[i];
560  }
561  else
562  {
563  d_X[-1-dof_i] = -d_y[i];
564  }
565  });
566 }
567 
568 void Vector::SetSubVector(const Array<int> &dofs, double *elem_data)
569 {
570  // Use read+write access because we overwrite only part of the data.
572  const int n = dofs.Size();
573  for (int i = 0; i < n; i++)
574  {
575  const int j= dofs[i];
576  if (j >= 0)
577  {
578  operator()(j) = elem_data[i];
579  }
580  else
581  {
582  operator()(-1-j) = -elem_data[i];
583  }
584  }
585 }
586 
587 void Vector::AddElementVector(const Array<int> &dofs, const Vector &elemvect)
588 {
589  MFEM_ASSERT(dofs.Size() == elemvect.Size(), "Size mismatch: "
590  "length of dofs is " << dofs.Size() <<
591  ", length of elemvect is " << elemvect.Size());
592 
593  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
594  const int n = dofs.Size();
595  auto d_y = elemvect.Read(use_dev);
596  auto d_X = ReadWrite(use_dev);
597  auto d_dofs = dofs.Read(use_dev);
598  MFEM_FORALL_SWITCH(use_dev, i, n,
599  {
600  const int j = d_dofs[i];
601  if (j >= 0)
602  {
603  d_X[j] += d_y[i];
604  }
605  else
606  {
607  d_X[-1-j] -= d_y[i];
608  }
609  });
610 }
611 
612 void Vector::AddElementVector(const Array<int> &dofs, double *elem_data)
613 {
615  const int n = dofs.Size();
616  for (int i = 0; i < n; i++)
617  {
618  const int j = dofs[i];
619  if (j >= 0)
620  {
621  operator()(j) += elem_data[i];
622  }
623  else
624  {
625  operator()(-1-j) -= elem_data[i];
626  }
627  }
628 }
629 
630 void Vector::AddElementVector(const Array<int> &dofs, const double a,
631  const Vector &elemvect)
632 {
633  MFEM_ASSERT(dofs.Size() == elemvect.Size(), "Size mismatch: "
634  "length of dofs is " << dofs.Size() <<
635  ", length of elemvect is " << elemvect.Size());
636 
637  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
638  const int n = dofs.Size();
639  auto d_y = ReadWrite(use_dev);
640  auto d_x = elemvect.Read(use_dev);
641  auto d_dofs = dofs.Read(use_dev);
642  MFEM_FORALL_SWITCH(use_dev, i, n,
643  {
644  const int j = d_dofs[i];
645  if (j >= 0)
646  {
647  d_y[j] += a * d_x[i];
648  }
649  else
650  {
651  d_y[-1-j] -= a * d_x[i];
652  }
653  });
654 }
655 
656 void Vector::SetSubVectorComplement(const Array<int> &dofs, const double val)
657 {
658  const bool use_dev = UseDevice() || dofs.UseDevice();
659  const int n = dofs.Size();
660  const int N = size;
661  Vector dofs_vals(n, use_dev ?
664  auto d_data = ReadWrite(use_dev);
665  auto d_dofs_vals = dofs_vals.Write(use_dev);
666  auto d_dofs = dofs.Read(use_dev);
667  MFEM_FORALL_SWITCH(use_dev, i, n, d_dofs_vals[i] = d_data[d_dofs[i]];);
668  MFEM_FORALL_SWITCH(use_dev, i, N, d_data[i] = val;);
669  MFEM_FORALL_SWITCH(use_dev, i, n, d_data[d_dofs[i]] = d_dofs_vals[i];);
670 }
671 
672 void Vector::Print(std::ostream &out, int width) const
673 {
674  if (!size) { return; }
676  for (int i = 0; 1; )
677  {
678  out << ZeroSubnormal(data[i]);
679  i++;
680  if (i == size)
681  {
682  break;
683  }
684  if ( i % width == 0 )
685  {
686  out << '\n';
687  }
688  else
689  {
690  out << ' ';
691  }
692  }
693  out << '\n';
694 }
695 
696 #ifdef MFEM_USE_ADIOS2
698  const std::string& variable_name) const
699 {
700  if (!size) { return; }
702  out.engine.Put(variable_name, &data[0] );
703 }
704 #endif
705 
706 void Vector::Print_HYPRE(std::ostream &out) const
707 {
708  int i;
709  std::ios::fmtflags old_fmt = out.flags();
710  out.setf(std::ios::scientific);
711  std::streamsize old_prec = out.precision(14);
712 
713  out << size << '\n'; // number of rows
714 
715  data.Read(MemoryClass::HOST, size);
716  for (i = 0; i < size; i++)
717  {
718  out << ZeroSubnormal(data[i]) << '\n';
719  }
720 
721  out.precision(old_prec);
722  out.flags(old_fmt);
723 }
724 
725 void Vector::Randomize(int seed)
726 {
727  // static unsigned int seed = time(0);
728  const double max = (double)(RAND_MAX) + 1.;
729 
730  if (seed == 0)
731  {
732  seed = (int)time(0);
733  }
734 
735  // srand(seed++);
736  srand((unsigned)seed);
737 
738  for (int i = 0; i < size; i++)
739  {
740  data[i] = std::abs(rand()/max);
741  }
742 }
743 
744 double Vector::Norml2() const
745 {
746  // Scale entries of Vector on the fly, using algorithms from
747  // std::hypot() and LAPACK's drm2. This scaling ensures that the
748  // argument of each call to std::pow is <= 1 to avoid overflow.
749  if (0 == size)
750  {
751  return 0.0;
752  } // end if 0 == size
753 
755  if (1 == size)
756  {
757  return std::abs(data[0]);
758  } // end if 1 == size
759  return kernels::Norml2(size, (const double*) data);
760 }
761 
762 double Vector::Normlinf() const
763 {
764  double max = 0.0;
765  for (int i = 0; i < size; i++)
766  {
767  max = std::max(std::abs(data[i]), max);
768  }
769  return max;
770 }
771 
772 double Vector::Norml1() const
773 {
774  double sum = 0.0;
775  for (int i = 0; i < size; i++)
776  {
777  sum += std::abs(data[i]);
778  }
779  return sum;
780 }
781 
782 double Vector::Normlp(double p) const
783 {
784  MFEM_ASSERT(p > 0.0, "Vector::Normlp");
785 
786  if (p == 1.0)
787  {
788  return Norml1();
789  }
790  if (p == 2.0)
791  {
792  return Norml2();
793  }
794  if (p < infinity())
795  {
796  // Scale entries of Vector on the fly, using algorithms from
797  // std::hypot() and LAPACK's drm2. This scaling ensures that the
798  // argument of each call to std::pow is <= 1 to avoid overflow.
799  if (0 == size)
800  {
801  return 0.0;
802  } // end if 0 == size
803 
804  if (1 == size)
805  {
806  return std::abs(data[0]);
807  } // end if 1 == size
808 
809  double scale = 0.0;
810  double sum = 0.0;
811 
812  for (int i = 0; i < size; i++)
813  {
814  if (data[i] != 0.0)
815  {
816  const double absdata = std::abs(data[i]);
817  if (scale <= absdata)
818  {
819  sum = 1.0 + sum * std::pow(scale / absdata, p);
820  scale = absdata;
821  continue;
822  } // end if scale <= absdata
823  sum += std::pow(absdata / scale, p); // else scale > absdata
824  } // end if data[i] != 0
825  }
826  return scale * std::pow(sum, 1.0/p);
827  } // end if p < infinity()
828 
829  return Normlinf(); // else p >= infinity()
830 }
831 
832 double Vector::Max() const
833 {
834  if (size == 0) { return -infinity(); }
835 
836  double max = data[0];
837 
838  for (int i = 1; i < size; i++)
839  {
840  if (data[i] > max)
841  {
842  max = data[i];
843  }
844  }
845 
846  return max;
847 }
848 
849 double Vector::Sum() const
850 {
851  double sum = 0.0;
852 
853  const double *h_data = this->HostRead();
854  for (int i = 0; i < size; i++)
855  {
856  sum += h_data[i];
857  }
858 
859  return sum;
860 }
861 
862 #ifdef MFEM_USE_CUDA
863 static __global__ void cuKernelMin(const int N, double *gdsr, const double *x)
864 {
865  __shared__ double s_min[MFEM_CUDA_BLOCKS];
866  const int n = blockDim.x*blockIdx.x + threadIdx.x;
867  if (n>=N) { return; }
868  const int bid = blockIdx.x;
869  const int tid = threadIdx.x;
870  const int bbd = bid*blockDim.x;
871  const int rid = bbd+tid;
872  s_min[tid] = x[n];
873  for (int workers=blockDim.x>>1; workers>0; workers>>=1)
874  {
875  __syncthreads();
876  if (tid >= workers) { continue; }
877  if (rid >= N) { continue; }
878  const int dualTid = tid + workers;
879  if (dualTid >= N) { continue; }
880  const int rdd = bbd+dualTid;
881  if (rdd >= N) { continue; }
882  if (dualTid >= blockDim.x) { continue; }
883  s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
884  }
885  if (tid==0) { gdsr[bid] = s_min[0]; }
886 }
887 
888 static Array<double> cuda_reduce_buf;
889 
890 static double cuVectorMin(const int N, const double *X)
891 {
892  const int tpb = MFEM_CUDA_BLOCKS;
893  const int blockSize = MFEM_CUDA_BLOCKS;
894  const int gridSize = (N+blockSize-1)/blockSize;
895  const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
896  cuda_reduce_buf.SetSize(min_sz);
897  Memory<double> &buf = cuda_reduce_buf.GetMemory();
898  double *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
899  cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
900  MFEM_GPU_CHECK(cudaGetLastError());
901  const double *h_min = buf.Read(MemoryClass::HOST, min_sz);
903  for (int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
904  return min;
905 }
906 
907 static __global__ void cuKernelDot(const int N, double *gdsr,
908  const double *x, const double *y)
909 {
910  __shared__ double s_dot[MFEM_CUDA_BLOCKS];
911  const int n = blockDim.x*blockIdx.x + threadIdx.x;
912  if (n>=N) { return; }
913  const int bid = blockIdx.x;
914  const int tid = threadIdx.x;
915  const int bbd = bid*blockDim.x;
916  const int rid = bbd+tid;
917  s_dot[tid] = x[n] * y[n];
918  for (int workers=blockDim.x>>1; workers>0; workers>>=1)
919  {
920  __syncthreads();
921  if (tid >= workers) { continue; }
922  if (rid >= N) { continue; }
923  const int dualTid = tid + workers;
924  if (dualTid >= N) { continue; }
925  const int rdd = bbd+dualTid;
926  if (rdd >= N) { continue; }
927  if (dualTid >= blockDim.x) { continue; }
928  s_dot[tid] += s_dot[dualTid];
929  }
930  if (tid==0) { gdsr[bid] = s_dot[0]; }
931 }
932 
933 static double cuVectorDot(const int N, const double *X, const double *Y)
934 {
935  const int tpb = MFEM_CUDA_BLOCKS;
936  const int blockSize = MFEM_CUDA_BLOCKS;
937  const int gridSize = (N+blockSize-1)/blockSize;
938  const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
939  cuda_reduce_buf.SetSize(dot_sz, MemoryType::DEVICE);
940  Memory<double> &buf = cuda_reduce_buf.GetMemory();
941  double *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
942  cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
943  MFEM_GPU_CHECK(cudaGetLastError());
944  const double *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
945  double dot = 0.0;
946  for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
947  return dot;
948 }
949 #endif // MFEM_USE_CUDA
950 
951 #ifdef MFEM_USE_HIP
952 static __global__ void hipKernelMin(const int N, double *gdsr, const double *x)
953 {
954  __shared__ double s_min[MFEM_HIP_BLOCKS];
955  const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
956  if (n>=N) { return; }
957  const int bid = hipBlockIdx_x;
958  const int tid = hipThreadIdx_x;
959  const int bbd = bid*hipBlockDim_x;
960  const int rid = bbd+tid;
961  s_min[tid] = x[n];
962  for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
963  {
964  __syncthreads();
965  if (tid >= workers) { continue; }
966  if (rid >= N) { continue; }
967  const int dualTid = tid + workers;
968  if (dualTid >= N) { continue; }
969  const int rdd = bbd+dualTid;
970  if (rdd >= N) { continue; }
971  if (dualTid >= hipBlockDim_x) { continue; }
972  s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
973  }
974  if (tid==0) { gdsr[bid] = s_min[0]; }
975 }
976 
977 static Array<double> cuda_reduce_buf;
978 
979 static double hipVectorMin(const int N, const double *X)
980 {
981  const int tpb = MFEM_HIP_BLOCKS;
982  const int blockSize = MFEM_HIP_BLOCKS;
983  const int gridSize = (N+blockSize-1)/blockSize;
984  const int min_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
985  cuda_reduce_buf.SetSize(min_sz);
986  Memory<double> &buf = cuda_reduce_buf.GetMemory();
987  double *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
988  hipLaunchKernelGGL(hipKernelMin,gridSize,blockSize,0,0,N,d_min,X);
989  MFEM_GPU_CHECK(hipGetLastError());
990  const double *h_min = buf.Read(MemoryClass::HOST, min_sz);
992  for (int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
993  return min;
994 }
995 
996 static __global__ void hipKernelDot(const int N, double *gdsr,
997  const double *x, const double *y)
998 {
999  __shared__ double s_dot[MFEM_HIP_BLOCKS];
1000  const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1001  if (n>=N) { return; }
1002  const int bid = hipBlockIdx_x;
1003  const int tid = hipThreadIdx_x;
1004  const int bbd = bid*hipBlockDim_x;
1005  const int rid = bbd+tid;
1006  s_dot[tid] = x[n] * y[n];
1007  for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1008  {
1009  __syncthreads();
1010  if (tid >= workers) { continue; }
1011  if (rid >= N) { continue; }
1012  const int dualTid = tid + workers;
1013  if (dualTid >= N) { continue; }
1014  const int rdd = bbd+dualTid;
1015  if (rdd >= N) { continue; }
1016  if (dualTid >= hipBlockDim_x) { continue; }
1017  s_dot[tid] += s_dot[dualTid];
1018  }
1019  if (tid==0) { gdsr[bid] = s_dot[0]; }
1020 }
1021 
1022 static double hipVectorDot(const int N, const double *X, const double *Y)
1023 {
1024  const int tpb = MFEM_HIP_BLOCKS;
1025  const int blockSize = MFEM_HIP_BLOCKS;
1026  const int gridSize = (N+blockSize-1)/blockSize;
1027  const int dot_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1028  cuda_reduce_buf.SetSize(dot_sz);
1029  Memory<double> &buf = cuda_reduce_buf.GetMemory();
1030  double *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
1031  hipLaunchKernelGGL(hipKernelDot,gridSize,blockSize,0,0,N,d_dot,X,Y);
1032  MFEM_GPU_CHECK(hipGetLastError());
1033  const double *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
1034  double dot = 0.0;
1035  for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1036  return dot;
1037 }
1038 #endif // MFEM_USE_HIP
1039 
1040 double Vector::operator*(const Vector &v) const
1041 {
1042  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
1043 
1044  const bool use_dev = UseDevice() || v.UseDevice();
1045 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP) || defined(MFEM_USE_OPENMP)
1046  auto m_data = Read(use_dev);
1047 #else
1048  Read(use_dev);
1049 #endif
1050  auto v_data = v.Read(use_dev);
1051 
1052  if (!use_dev) { goto vector_dot_cpu; }
1053 
1054 #ifdef MFEM_USE_OCCA
1055  if (DeviceCanUseOcca())
1056  {
1057  return occa::linalg::dot<double,double,double>(
1059  }
1060 #endif
1061 
1062 #ifdef MFEM_USE_CUDA
1064  {
1065  return cuVectorDot(size, m_data, v_data);
1066  }
1067 #endif
1068 
1069 #ifdef MFEM_USE_HIP
1071  {
1072  return hipVectorDot(size, m_data, v_data);
1073  }
1074 #endif
1075 
1076 #ifdef MFEM_USE_OPENMP
1078  {
1079  double prod = 0.0;
1080  #pragma omp parallel for reduction(+:prod)
1081  for (int i = 0; i < size; i++)
1082  {
1083  prod += m_data[i] * v_data[i];
1084  }
1085  return prod;
1086  }
1087 #endif
1089  {
1090  const int N = size;
1091  auto v_data = v.Read();
1092  auto m_data = Read();
1093  Vector dot(1);
1094  dot.UseDevice(true);
1095  auto d_dot = dot.Write();
1096  dot = 0.0;
1097  MFEM_FORALL(i, N, d_dot[0] += m_data[i] * v_data[i];);
1098  dot.HostReadWrite();
1099  return dot[0];
1100  }
1101 vector_dot_cpu:
1102  return operator*(v_data);
1103 }
1104 
1105 double Vector::Min() const
1106 {
1107  if (size == 0) { return infinity(); }
1108 
1109  const bool use_dev = UseDevice();
1110  auto m_data = Read(use_dev);
1111 
1112  if (!use_dev) { goto vector_min_cpu; }
1113 
1114 #ifdef MFEM_USE_OCCA
1115  if (DeviceCanUseOcca())
1116  {
1117  return occa::linalg::min<double,double>(OccaMemoryRead(data, size));
1118  }
1119 #endif
1120 
1121 #ifdef MFEM_USE_CUDA
1123  {
1124  return cuVectorMin(size, m_data);
1125  }
1126 #endif
1127 
1128 #ifdef MFEM_USE_HIP
1130  {
1131  return hipVectorMin(size, m_data);
1132  }
1133 #endif
1134 
1135 #ifdef MFEM_USE_OPENMP
1137  {
1138  double minimum = m_data[0];
1139  #pragma omp parallel for reduction(min:minimum)
1140  for (int i = 0; i < size; i++)
1141  {
1142  minimum = std::min(minimum, m_data[i]);
1143  }
1144  return minimum;
1145  }
1146 #endif
1147 
1149  {
1150  const int N = size;
1151  auto m_data = Read();
1152  Vector min(1);
1153  min = infinity();
1154  min.UseDevice(true);
1155  auto d_min = min.ReadWrite();
1156  MFEM_FORALL(i, N, d_min[0] = (d_min[0]<m_data[i])?d_min[0]:m_data[i];);
1157  min.HostReadWrite();
1158  return min[0];
1159  }
1160 
1161 vector_min_cpu:
1162  double minimum = data[0];
1163  for (int i = 1; i < size; i++)
1164  {
1165  if (m_data[i] < minimum)
1166  {
1167  minimum = m_data[i];
1168  }
1169  }
1170  return minimum;
1171 }
1172 
1173 
1174 #ifdef MFEM_USE_SUNDIALS
1175 
1176 Vector::Vector(N_Vector nv)
1177 {
1178  N_Vector_ID nvid = N_VGetVectorID(nv);
1179 
1180  switch (nvid)
1181  {
1182  case SUNDIALS_NVEC_SERIAL:
1183  SetDataAndSize(NV_DATA_S(nv), NV_LENGTH_S(nv));
1184  break;
1185 #ifdef MFEM_USE_MPI
1186  case SUNDIALS_NVEC_PARALLEL:
1187  SetDataAndSize(NV_DATA_P(nv), NV_LOCLENGTH_P(nv));
1188  break;
1189 #endif
1190  default:
1191  MFEM_ABORT("N_Vector type " << nvid << " is not supported");
1192  }
1193 }
1194 
1195 void Vector::ToNVector(N_Vector &nv, long global_length)
1196 {
1197  MFEM_ASSERT(nv, "N_Vector handle is NULL");
1198  N_Vector_ID nvid = N_VGetVectorID(nv);
1199 
1200  switch (nvid)
1201  {
1202  case SUNDIALS_NVEC_SERIAL:
1203  MFEM_ASSERT(NV_OWN_DATA_S(nv) == SUNFALSE, "invalid serial N_Vector");
1204  NV_DATA_S(nv) = data;
1205  NV_LENGTH_S(nv) = size;
1206  break;
1207 #ifdef MFEM_USE_MPI
1208  case SUNDIALS_NVEC_PARALLEL:
1209  MFEM_ASSERT(NV_OWN_DATA_P(nv) == SUNFALSE, "invalid parallel N_Vector");
1210  NV_DATA_P(nv) = data;
1211  NV_LOCLENGTH_P(nv) = size;
1212  if (global_length == 0)
1213  {
1214  global_length = NV_GLOBLENGTH_P(nv);
1215 
1216  if (global_length == 0 && global_length != size)
1217  {
1218  MPI_Comm sundials_comm = NV_COMM_P(nv);
1219  long local_size = size;
1220  MPI_Allreduce(&local_size, &global_length, 1, MPI_LONG,
1221  MPI_SUM,sundials_comm);
1222  }
1223  }
1224  NV_GLOBLENGTH_P(nv) = global_length;
1225  break;
1226 #endif
1227  default:
1228  MFEM_ABORT("N_Vector type " << nvid << " is not supported");
1229  }
1230 }
1231 
1232 #endif // MFEM_USE_SUNDIALS
1233 
1234 }
int Size() const
Return the logical size of the array.
Definition: array.hpp:124
void SetSubVector(const Array< int > &dofs, const double value)
Set the entries listed in dofs to the given value.
Definition: vector.cpp:521
void SetVector(const Vector &v, int offset)
Definition: vector.cpp:240
double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:392
Vector()
Default constructor for Vector. Sets size = 0 and data = NULL.
Definition: vector.hpp:61
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:103
Memory< double > data
Definition: vector.hpp:55
double & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Definition: vector.cpp:97
Device memory; using CUDA or HIP *Malloc and *Free.
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:459
double Norml2() const
Returns the l2 norm of the vector.
Definition: vector.cpp:744
double & operator()(int i)
Access Vector entries using () for 0-based indexing.
Definition: vector.hpp:539
Biwise-OR of all HIP backends.
Definition: device.hpp:87
void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition: vector.hpp:89
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
Definition: vector.cpp:495
int Size() const
Returns the size of the vector.
Definition: vector.hpp:160
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
Definition: array.hpp:109
double Normlinf() const
Returns the l_infinity norm of the vector.
Definition: vector.cpp:762
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Randomize(int seed=0)
Set random values in the vector.
Definition: vector.cpp:725
double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:380
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition: vector.cpp:261
double operator*(const double *) const
Dot product with a double * array.
Definition: vector.cpp:107
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
T ZeroSubnormal(T val)
Definition: vector.hpp:419
Vector & operator=(const double *v)
Copy Size() entries from v.
Definition: vector.cpp:120
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
Definition: vector.cpp:53
const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:376
double Normlp(double p) const
Returns the l_p norm of the vector.
Definition: vector.cpp:782
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:388
bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
Definition: vector.hpp:92
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:261
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo &lt;= hi.
Definition: vector.cpp:471
Biwise-OR of all OpenMP backends.
Definition: device.hpp:89
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:290
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition: kernels.hpp:46
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Definition: vector.cpp:587
Vector & operator/=(double c)
Definition: vector.cpp:161
Biwise-OR of all CUDA backends.
Definition: device.hpp:85
void SetSubVectorComplement(const Array< int > &dofs, const double val)
Set all vector entries NOT in the dofs Array to the given val.
Definition: vector.cpp:656
Vector & operator*=(double c)
Definition: vector.cpp:152
double Min() const
Returns the minimal element of the vector.
Definition: vector.cpp:1105
Vector & operator+=(double c)
Definition: vector.cpp:192
double Norml1() const
Returns the l_1 norm of the vector.
Definition: vector.cpp:772
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition: device.hpp:252
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
Definition: vector.cpp:672
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition: vector.cpp:408
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:654
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
Definition: vector.cpp:706
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:246
void SetDataAndSize(double *d, int s)
Set the Vector data and size.
Definition: vector.hpp:128
Vector & Set(const double a, const Vector &x)
(*this) = a * x
Definition: vector.cpp:228
double a
Definition: lissajous.cpp:41
Vector & Add(const double a, const Vector &Va)
(*this) += a * Va
Definition: vector.cpp:213
Host memory; using new[] and delete[].
virtual MFEM_DEPRECATED N_Vector ToNVector()
(DEPRECATED) Return a new wrapper SUNDIALS N_Vector of type SUNDIALS_NVEC_SERIAL. ...
Definition: vector.hpp:401
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:372
double Max() const
Returns the maximal element of the vector.
Definition: vector.cpp:832
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
bool Empty() const
Return true if the Memory object is empty, see Reset().
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
int dim
Definition: ex24.cpp:53
Vector & operator-=(double c)
Definition: vector.cpp:171
double infinity()
Define a shortcut for std::numeric_limits&lt;double&gt;::infinity()
Definition: vector.hpp:45
const double alpha
Definition: ex15.cpp:336
Vector data type.
Definition: vector.hpp:51
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
double Sum() const
Return the sum of the vector entries.
Definition: vector.cpp:849
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the &quot;device&quot; code-path (using separate host/device memory pools and host &lt;-&gt; device transfers) without any GPU hardware. As &#39;DEBUG&#39; is sometimes used as a macro, _DEVICE has been added to avoid conflicts.
Definition: device.hpp:72
void Neg()
(*this) = -(*this)
Definition: vector.cpp:253