MFEM  v4.1.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
vector.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 // Implementation of data type vector
13 
14 #include "kernels.hpp"
15 #include "vector.hpp"
16 #include "../general/forall.hpp"
17 
18 #if defined(MFEM_USE_SUNDIALS) && defined(MFEM_USE_MPI)
19 #include <nvector/nvector_parallel.h>
20 #include <nvector/nvector_parhyp.h>
21 #endif
22 
23 #include <iostream>
24 #include <iomanip>
25 #include <cmath>
26 #include <cstdlib>
27 #include <ctime>
28 #include <limits>
29 
30 namespace mfem
31 {
32 
34 {
35  const int s = v.Size();
36  if (s > 0)
37  {
38  MFEM_ASSERT(!v.data.Empty(), "invalid source vector");
39  size = s;
40  data.New(s, v.data.GetMemoryType());
41  data.CopyFrom(v.data, s);
42  }
43  else
44  {
45  size = 0;
46  data.Reset();
47  }
48  UseDevice(v.UseDevice());
49 }
50 
51 void Vector::Load(std::istream **in, int np, int *dim)
52 {
53  int i, j, s;
54 
55  s = 0;
56  for (i = 0; i < np; i++)
57  {
58  s += dim[i];
59  }
60 
61  SetSize(s);
62 
63  int p = 0;
64  for (i = 0; i < np; i++)
65  {
66  for (j = 0; j < dim[i]; j++)
67  {
68  *in[i] >> data[p++];
69  }
70  }
71 }
72 
73 void Vector::Load(std::istream &in, int Size)
74 {
75  SetSize(Size);
76 
77  for (int i = 0; i < size; i++)
78  {
79  in >> data[i];
80  }
81 }
82 
83 double &Vector::Elem(int i)
84 {
85  return operator()(i);
86 }
87 
88 const double &Vector::Elem(int i) const
89 {
90  return operator()(i);
91 }
92 
93 double Vector::operator*(const double *v) const
94 {
95  double dot = 0.0;
96 #ifdef MFEM_USE_LEGACY_OPENMP
97  #pragma omp parallel for reduction(+:dot)
98 #endif
99  for (int i = 0; i < size; i++)
100  {
101  dot += data[i] * v[i];
102  }
103  return dot;
104 }
105 
106 Vector &Vector::operator=(const double *v)
107 {
108  data.CopyFromHost(v, size);
109  return *this;
110 }
111 
113 {
114 #if 0
115  SetSize(v.Size(), v.data.GetMemoryType());
116  data.CopyFrom(v.data, v.Size());
117  UseDevice(v.UseDevice());
118 #else
119  SetSize(v.Size());
120  const bool use_dev = UseDevice() || v.UseDevice();
121  v.UseDevice(use_dev);
122  // keep 'data' where it is, unless 'use_dev' is true
123  if (use_dev) { Write(); }
124  data.CopyFrom(v.data, v.Size());
125 #endif
126  return *this;
127 }
128 
129 Vector &Vector::operator=(double value)
130 {
131  const bool use_dev = UseDevice();
132  const int N = size;
133  auto y = Write(use_dev);
134  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = value;);
135  return *this;
136 }
137 
139 {
140  const bool use_dev = UseDevice();
141  const int N = size;
142  auto y = ReadWrite(use_dev);
143  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= c;);
144  return *this;
145 }
146 
148 {
149  const bool use_dev = UseDevice();
150  const int N = size;
151  const double m = 1.0/c;
152  auto y = ReadWrite(use_dev);
153  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= m;);
154  return *this;
155 }
156 
158 {
159  const bool use_dev = UseDevice();
160  const int N = size;
161  auto y = ReadWrite(use_dev);
162  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= c;);
163  return *this;
164 }
165 
167 {
168  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
169 
170  const bool use_dev = UseDevice() || v.UseDevice();
171  const int N = size;
172  auto y = ReadWrite(use_dev);
173  auto x = v.Read(use_dev);
174  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= x[i];);
175  return *this;
176 }
177 
179 {
180  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
181 
182  const bool use_dev = UseDevice() || v.UseDevice();
183  const int N = size;
184  auto y = ReadWrite(use_dev);
185  auto x = v.Read(use_dev);
186  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += x[i];);
187  return *this;
188 }
189 
190 Vector &Vector::Add(const double a, const Vector &Va)
191 {
192  MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
193 
194  if (a != 0.0)
195  {
196  const int N = size;
197  const bool use_dev = UseDevice() || Va.UseDevice();
198  auto y = ReadWrite(use_dev);
199  auto x = Va.Read(use_dev);
200  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += a * x[i];);
201  }
202  return *this;
203 }
204 
205 Vector &Vector::Set(const double a, const Vector &Va)
206 {
207  MFEM_ASSERT(size == Va.size, "incompatible Vectors!");
208 
209  const bool use_dev = UseDevice() || Va.UseDevice();
210  const int N = size;
211  auto x = Va.Read(use_dev);
212  auto y = Write(use_dev);
213  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = a * x[i];);
214  return *this;
215 }
216 
217 void Vector::SetVector(const Vector &v, int offset)
218 {
219  MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");
220 
221  const int vs = v.Size();
222  const double *vp = v.data;
223  double *p = data + offset;
224  for (int i = 0; i < vs; i++)
225  {
226  p[i] = vp[i];
227  }
228 }
229 
231 {
232  const bool use_dev = UseDevice();
233  const int N = size;
234  auto y = ReadWrite(use_dev);
235  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = -y[i];);
236 }
237 
238 void add(const Vector &v1, const Vector &v2, Vector &v)
239 {
240  MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
241  "incompatible Vectors!");
242 
243 #if !defined(MFEM_USE_LEGACY_OPENMP)
244  const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
245  const int N = v.size;
246  // Note: get read access first, in case v is the same as v1/v2.
247  auto x1 = v1.Read(use_dev);
248  auto x2 = v2.Read(use_dev);
249  auto y = v.Write(use_dev);
250  MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = x1[i] + x2[i];);
251 #else
252  #pragma omp parallel for
253  for (int i = 0; i < v.size; i++)
254  {
255  v.data[i] = v1.data[i] + v2.data[i];
256  }
257 #endif
258 }
259 
260 void add(const Vector &v1, double alpha, const Vector &v2, Vector &v)
261 {
262  MFEM_ASSERT(v.size == v1.size && v.size == v2.size,
263  "incompatible Vectors!");
264 
265  if (alpha == 0.0)
266  {
267  v = v1;
268  }
269  else if (alpha == 1.0)
270  {
271  add(v1, v2, v);
272  }
273  else
274  {
275 #if !defined(MFEM_USE_LEGACY_OPENMP)
276  const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();
277  const int N = v.size;
278  // Note: get read access first, in case v is the same as v1/v2.
279  auto d_x = v1.Read(use_dev);
280  auto d_y = v2.Read(use_dev);
281  auto d_z = v.Write(use_dev);
282  MFEM_FORALL_SWITCH(use_dev, i, N, d_z[i] = d_x[i] + alpha * d_y[i];);
283 #else
284  const double *v1p = v1.data, *v2p = v2.data;
285  double *vp = v.data;
286  const int s = v.size;
287  #pragma omp parallel for
288  for (int i = 0; i < s; i++)
289  {
290  vp[i] = v1p[i] + alpha*v2p[i];
291  }
292 #endif
293  }
294 }
295 
296 void add(const double a, const Vector &x, const Vector &y, Vector &z)
297 {
298  MFEM_ASSERT(x.size == y.size && x.size == z.size,
299  "incompatible Vectors!");
300 
301  if (a == 0.0)
302  {
303  z = 0.0;
304  }
305  else if (a == 1.0)
306  {
307  add(x, y, z);
308  }
309  else
310  {
311 #if !defined(MFEM_USE_LEGACY_OPENMP)
312  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
313  const int N = x.size;
314  // Note: get read access first, in case z is the same as x/y.
315  auto xd = x.Read(use_dev);
316  auto yd = y.Read(use_dev);
317  auto zd = z.Write(use_dev);
318  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] + yd[i]););
319 #else
320  const double *xp = x.data;
321  const double *yp = y.data;
322  double *zp = z.data;
323  const int s = x.size;
324  #pragma omp parallel for
325  for (int i = 0; i < s; i++)
326  {
327  zp[i] = a * (xp[i] + yp[i]);
328  }
329 #endif
330  }
331 }
332 
333 void add(const double a, const Vector &x,
334  const double b, const Vector &y, Vector &z)
335 {
336  MFEM_ASSERT(x.size == y.size && x.size == z.size,
337  "incompatible Vectors!");
338 
339  if (a == 0.0)
340  {
341  z.Set(b, y);
342  }
343  else if (b == 0.0)
344  {
345  z.Set(a, x);
346  }
347 #if 0
348  else if (a == 1.0)
349  {
350  add(x, b, y, z);
351  }
352  else if (b == 1.0)
353  {
354  add(y, a, x, z);
355  }
356  else if (a == b)
357  {
358  add(a, x, y, z);
359  }
360 #endif
361  else
362  {
363 #if !defined(MFEM_USE_LEGACY_OPENMP)
364  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
365  const int N = x.size;
366  // Note: get read access first, in case z is the same as x/y.
367  auto xd = x.Read(use_dev);
368  auto yd = y.Read(use_dev);
369  auto zd = z.Write(use_dev);
370  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * xd[i] + b * yd[i];);
371 #else
372  const double *xp = x.data;
373  const double *yp = y.data;
374  double *zp = z.data;
375  const int s = x.size;
376  #pragma omp parallel for
377  for (int i = 0; i < s; i++)
378  {
379  zp[i] = a * xp[i] + b * yp[i];
380  }
381 #endif
382  }
383 }
384 
385 void subtract(const Vector &x, const Vector &y, Vector &z)
386 {
387  MFEM_ASSERT(x.size == y.size && x.size == z.size,
388  "incompatible Vectors!");
389 
390 #if !defined(MFEM_USE_LEGACY_OPENMP)
391  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
392  const int N = x.size;
393  // Note: get read access first, in case z is the same as x/y.
394  auto xd = x.Read(use_dev);
395  auto yd = y.Read(use_dev);
396  auto zd = z.Write(use_dev);
397  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = xd[i] - yd[i];);
398 #else
399  const double *xp = x.data;
400  const double *yp = y.data;
401  double *zp = z.data;
402  const int s = x.size;
403  #pragma omp parallel for
404  for (int i = 0; i < s; i++)
405  {
406  zp[i] = xp[i] - yp[i];
407  }
408 #endif
409 }
410 
411 void subtract(const double a, const Vector &x, const Vector &y, Vector &z)
412 {
413  MFEM_ASSERT(x.size == y.size && x.size == z.size,
414  "incompatible Vectors!");
415 
416  if (a == 0.)
417  {
418  z = 0.;
419  }
420  else if (a == 1.)
421  {
422  subtract(x, y, z);
423  }
424  else
425  {
426 #if !defined(MFEM_USE_LEGACY_OPENMP)
427  const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();
428  const int N = x.size;
429  // Note: get read access first, in case z is the same as x/y.
430  auto xd = x.Read(use_dev);
431  auto yd = y.Read(use_dev);
432  auto zd = z.Write(use_dev);
433  MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] - yd[i]););
434 #else
435  const double *xp = x.data;
436  const double *yp = y.data;
437  double *zp = z.data;
438  const int s = x.size;
439  #pragma omp parallel for
440  for (int i = 0; i < s; i++)
441  {
442  zp[i] = a * (xp[i] - yp[i]);
443  }
444 #endif
445  }
446 }
447 
448 void Vector::median(const Vector &lo, const Vector &hi)
449 {
450  MFEM_ASSERT(size == lo.size && size == hi.size,
451  "incompatible Vectors!");
452 
453  const bool use_dev = UseDevice() || lo.UseDevice() || hi.UseDevice();
454  const int N = size;
455  // Note: get read access first, in case *this is the same as lo/hi.
456  auto l = lo.Read(use_dev);
457  auto h = hi.Read(use_dev);
458  auto m = Write(use_dev);
459  MFEM_FORALL_SWITCH(use_dev, i, N,
460  {
461  if (m[i] < l[i])
462  {
463  m[i] = l[i];
464  }
465  else if (m[i] > h[i])
466  {
467  m[i] = h[i];
468  }
469  });
470 }
471 
472 void Vector::GetSubVector(const Array<int> &dofs, Vector &elemvect) const
473 {
474  const int n = dofs.Size();
475  elemvect.SetSize(n);
476  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
477  auto d_y = elemvect.Write(use_dev);
478  auto d_X = Read(use_dev);
479  auto d_dofs = dofs.Read(use_dev);
480  MFEM_FORALL_SWITCH(use_dev, i, n,
481  {
482  const int dof_i = d_dofs[i];
483  d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
484  });
485 }
486 
487 void Vector::GetSubVector(const Array<int> &dofs, double *elem_data) const
488 {
490  const int n = dofs.Size();
491  for (int i = 0; i < n; i++)
492  {
493  const int j = dofs[i];
494  elem_data[i] = (j >= 0) ? data[j] : -data[-1-j];
495  }
496 }
497 
498 void Vector::SetSubVector(const Array<int> &dofs, const double value)
499 {
500  const bool use_dev = dofs.UseDevice();
501  const int n = dofs.Size();
502  // Use read+write access for *this - we only modify some of its entries
503  auto d_X = ReadWrite(use_dev);
504  auto d_dofs = dofs.Read(use_dev);
505  MFEM_FORALL_SWITCH(use_dev, i, n,
506  {
507  const int j = d_dofs[i];
508  if (j >= 0)
509  {
510  d_X[j] = value;
511  }
512  else
513  {
514  d_X[-1-j] = -value;
515  }
516  });
517 }
518 
519 void Vector::SetSubVector(const Array<int> &dofs, const Vector &elemvect)
520 {
521  MFEM_ASSERT(dofs.Size() == elemvect.Size(),
522  "Size mismatch: length of dofs is " << dofs.Size()
523  << ", length of elemvect is " << elemvect.Size());
524 
525  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
526  const int n = dofs.Size();
527  // Use read+write access for X - we only modify some of its entries
528  auto d_X = ReadWrite(use_dev);
529  auto d_y = elemvect.Read(use_dev);
530  auto d_dofs = dofs.Read(use_dev);
531  MFEM_FORALL_SWITCH(use_dev, i, n,
532  {
533  const int dof_i = d_dofs[i];
534  if (dof_i >= 0)
535  {
536  d_X[dof_i] = d_y[i];
537  }
538  else
539  {
540  d_X[-1-dof_i] = -d_y[i];
541  }
542  });
543 }
544 
545 void Vector::SetSubVector(const Array<int> &dofs, double *elem_data)
546 {
547  // Use read+write access because we overwrite only part of the data.
549  const int n = dofs.Size();
550  for (int i = 0; i < n; i++)
551  {
552  const int j= dofs[i];
553  if (j >= 0)
554  {
555  operator()(j) = elem_data[i];
556  }
557  else
558  {
559  operator()(-1-j) = -elem_data[i];
560  }
561  }
562 }
563 
564 void Vector::AddElementVector(const Array<int> &dofs, const Vector &elemvect)
565 {
566  MFEM_ASSERT(dofs.Size() == elemvect.Size(), "Size mismatch: "
567  "length of dofs is " << dofs.Size() <<
568  ", length of elemvect is " << elemvect.Size());
569 
570  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
571  const int n = dofs.Size();
572  auto d_y = elemvect.Read(use_dev);
573  auto d_X = ReadWrite(use_dev);
574  auto d_dofs = dofs.Read(use_dev);
575  MFEM_FORALL_SWITCH(use_dev, i, n,
576  {
577  const int j = d_dofs[i];
578  if (j >= 0)
579  {
580  d_X[j] += d_y[i];
581  }
582  else
583  {
584  d_X[-1-j] -= d_y[i];
585  }
586  });
587 }
588 
589 void Vector::AddElementVector(const Array<int> &dofs, double *elem_data)
590 {
592  const int n = dofs.Size();
593  for (int i = 0; i < n; i++)
594  {
595  const int j = dofs[i];
596  if (j >= 0)
597  {
598  operator()(j) += elem_data[i];
599  }
600  else
601  {
602  operator()(-1-j) -= elem_data[i];
603  }
604  }
605 }
606 
607 void Vector::AddElementVector(const Array<int> &dofs, const double a,
608  const Vector &elemvect)
609 {
610  MFEM_ASSERT(dofs.Size() == elemvect.Size(), "Size mismatch: "
611  "length of dofs is " << dofs.Size() <<
612  ", length of elemvect is " << elemvect.Size());
613 
614  const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();
615  const int n = dofs.Size();
616  auto d_y = ReadWrite(use_dev);
617  auto d_x = elemvect.Read(use_dev);
618  auto d_dofs = dofs.Read(use_dev);
619  MFEM_FORALL_SWITCH(use_dev, i, n,
620  {
621  const int j = d_dofs[i];
622  if (j >= 0)
623  {
624  d_y[j] += a * d_x[i];
625  }
626  else
627  {
628  d_y[-1-j] -= a * d_x[i];
629  }
630  });
631 }
632 
633 void Vector::SetSubVectorComplement(const Array<int> &dofs, const double val)
634 {
635  const bool use_dev = UseDevice() || dofs.UseDevice();
636  const int n = dofs.Size();
637  const int N = size;
638  Vector dofs_vals(n, use_dev ?
641  auto d_data = ReadWrite(use_dev);
642  auto d_dofs_vals = dofs_vals.Write(use_dev);
643  auto d_dofs = dofs.Read(use_dev);
644  MFEM_FORALL_SWITCH(use_dev, i, n, d_dofs_vals[i] = d_data[d_dofs[i]];);
645  MFEM_FORALL_SWITCH(use_dev, i, N, d_data[i] = val;);
646  MFEM_FORALL_SWITCH(use_dev, i, n, d_data[d_dofs[i]] = d_dofs_vals[i];);
647 }
648 
649 void Vector::Print(std::ostream &out, int width) const
650 {
651  if (!size) { return; }
653  for (int i = 0; 1; )
654  {
655  out << data[i];
656  i++;
657  if (i == size)
658  {
659  break;
660  }
661  if ( i % width == 0 )
662  {
663  out << '\n';
664  }
665  else
666  {
667  out << ' ';
668  }
669  }
670  out << '\n';
671 }
672 
673 void Vector::Print_HYPRE(std::ostream &out) const
674 {
675  int i;
676  std::ios::fmtflags old_fmt = out.flags();
677  out.setf(std::ios::scientific);
678  std::streamsize old_prec = out.precision(14);
679 
680  out << size << '\n'; // number of rows
681 
682  data.Read(MemoryClass::HOST, size);
683  for (i = 0; i < size; i++)
684  {
685  out << data[i] << '\n';
686  }
687 
688  out.precision(old_prec);
689  out.flags(old_fmt);
690 }
691 
692 void Vector::Randomize(int seed)
693 {
694  // static unsigned int seed = time(0);
695  const double max = (double)(RAND_MAX) + 1.;
696 
697  if (seed == 0)
698  {
699  seed = (int)time(0);
700  }
701 
702  // srand(seed++);
703  srand((unsigned)seed);
704 
705  for (int i = 0; i < size; i++)
706  {
707  data[i] = std::abs(rand()/max);
708  }
709 }
710 
711 double Vector::Norml2() const
712 {
713  // Scale entries of Vector on the fly, using algorithms from
714  // std::hypot() and LAPACK's drm2. This scaling ensures that the
715  // argument of each call to std::pow is <= 1 to avoid overflow.
716  if (0 == size)
717  {
718  return 0.0;
719  } // end if 0 == size
720 
722  if (1 == size)
723  {
724  return std::abs(data[0]);
725  } // end if 1 == size
726  return kernels::Norml2(size, (const double*) data);
727 }
728 
729 double Vector::Normlinf() const
730 {
731  double max = 0.0;
732  for (int i = 0; i < size; i++)
733  {
734  max = std::max(std::abs(data[i]), max);
735  }
736  return max;
737 }
738 
739 double Vector::Norml1() const
740 {
741  double sum = 0.0;
742  for (int i = 0; i < size; i++)
743  {
744  sum += std::abs(data[i]);
745  }
746  return sum;
747 }
748 
749 double Vector::Normlp(double p) const
750 {
751  MFEM_ASSERT(p > 0.0, "Vector::Normlp");
752 
753  if (p == 1.0)
754  {
755  return Norml1();
756  }
757  if (p == 2.0)
758  {
759  return Norml2();
760  }
761  if (p < infinity())
762  {
763  // Scale entries of Vector on the fly, using algorithms from
764  // std::hypot() and LAPACK's drm2. This scaling ensures that the
765  // argument of each call to std::pow is <= 1 to avoid overflow.
766  if (0 == size)
767  {
768  return 0.0;
769  } // end if 0 == size
770 
771  if (1 == size)
772  {
773  return std::abs(data[0]);
774  } // end if 1 == size
775 
776  double scale = 0.0;
777  double sum = 0.0;
778 
779  for (int i = 0; i < size; i++)
780  {
781  if (data[i] != 0.0)
782  {
783  const double absdata = std::abs(data[i]);
784  if (scale <= absdata)
785  {
786  sum = 1.0 + sum * std::pow(scale / absdata, p);
787  scale = absdata;
788  continue;
789  } // end if scale <= absdata
790  sum += std::pow(absdata / scale, p); // else scale > absdata
791  } // end if data[i] != 0
792  }
793  return scale * std::pow(sum, 1.0/p);
794  } // end if p < infinity()
795 
796  return Normlinf(); // else p >= infinity()
797 }
798 
799 double Vector::Max() const
800 {
801  if (size == 0) { return -infinity(); }
802 
803  double max = data[0];
804 
805  for (int i = 1; i < size; i++)
806  {
807  if (data[i] > max)
808  {
809  max = data[i];
810  }
811  }
812 
813  return max;
814 }
815 
816 double Vector::Sum() const
817 {
818  double sum = 0.0;
819 
820  const double *h_data = this->HostRead();
821  for (int i = 0; i < size; i++)
822  {
823  sum += h_data[i];
824  }
825 
826  return sum;
827 }
828 
829 #ifdef MFEM_USE_CUDA
830 static __global__ void cuKernelMin(const int N, double *gdsr, const double *x)
831 {
832  __shared__ double s_min[MFEM_CUDA_BLOCKS];
833  const int n = blockDim.x*blockIdx.x + threadIdx.x;
834  if (n>=N) { return; }
835  const int bid = blockIdx.x;
836  const int tid = threadIdx.x;
837  const int bbd = bid*blockDim.x;
838  const int rid = bbd+tid;
839  s_min[tid] = x[n];
840  for (int workers=blockDim.x>>1; workers>0; workers>>=1)
841  {
842  __syncthreads();
843  if (tid >= workers) { continue; }
844  if (rid >= N) { continue; }
845  const int dualTid = tid + workers;
846  if (dualTid >= N) { continue; }
847  const int rdd = bbd+dualTid;
848  if (rdd >= N) { continue; }
849  if (dualTid >= blockDim.x) { continue; }
850  s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
851  }
852  if (tid==0) { gdsr[bid] = s_min[0]; }
853 }
854 
855 static Array<double> cuda_reduce_buf;
856 
857 static double cuVectorMin(const int N, const double *X)
858 {
859  const int tpb = MFEM_CUDA_BLOCKS;
860  const int blockSize = MFEM_CUDA_BLOCKS;
861  const int gridSize = (N+blockSize-1)/blockSize;
862  const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
863  cuda_reduce_buf.SetSize(min_sz);
864  Memory<double> &buf = cuda_reduce_buf.GetMemory();
865  double *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
866  cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
867  MFEM_GPU_CHECK(cudaGetLastError());
868  const double *h_min = buf.Read(MemoryClass::HOST, min_sz);
870  for (int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
871  return min;
872 }
873 
874 static __global__ void cuKernelDot(const int N, double *gdsr,
875  const double *x, const double *y)
876 {
877  __shared__ double s_dot[MFEM_CUDA_BLOCKS];
878  const int n = blockDim.x*blockIdx.x + threadIdx.x;
879  if (n>=N) { return; }
880  const int bid = blockIdx.x;
881  const int tid = threadIdx.x;
882  const int bbd = bid*blockDim.x;
883  const int rid = bbd+tid;
884  s_dot[tid] = x[n] * y[n];
885  for (int workers=blockDim.x>>1; workers>0; workers>>=1)
886  {
887  __syncthreads();
888  if (tid >= workers) { continue; }
889  if (rid >= N) { continue; }
890  const int dualTid = tid + workers;
891  if (dualTid >= N) { continue; }
892  const int rdd = bbd+dualTid;
893  if (rdd >= N) { continue; }
894  if (dualTid >= blockDim.x) { continue; }
895  s_dot[tid] += s_dot[dualTid];
896  }
897  if (tid==0) { gdsr[bid] = s_dot[0]; }
898 }
899 
900 static double cuVectorDot(const int N, const double *X, const double *Y)
901 {
902  const int tpb = MFEM_CUDA_BLOCKS;
903  const int blockSize = MFEM_CUDA_BLOCKS;
904  const int gridSize = (N+blockSize-1)/blockSize;
905  const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
906  cuda_reduce_buf.SetSize(dot_sz, MemoryType::DEVICE);
907  Memory<double> &buf = cuda_reduce_buf.GetMemory();
908  double *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
909  cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
910  MFEM_GPU_CHECK(cudaGetLastError());
911  const double *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
912  double dot = 0.0;
913  for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
914  return dot;
915 }
916 #endif // MFEM_USE_CUDA
917 
918 #ifdef MFEM_USE_HIP
919 static __global__ void hipKernelMin(const int N, double *gdsr, const double *x)
920 {
921  __shared__ double s_min[MFEM_CUDA_BLOCKS];
922  const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
923  if (n>=N) { return; }
924  const int bid = hipBlockIdx_x;
925  const int tid = hipThreadIdx_x;
926  const int bbd = bid*hipBlockDim_x;
927  const int rid = bbd+tid;
928  s_min[tid] = x[n];
929  for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
930  {
931  __syncthreads();
932  if (tid >= workers) { continue; }
933  if (rid >= N) { continue; }
934  const int dualTid = tid + workers;
935  if (dualTid >= N) { continue; }
936  const int rdd = bbd+dualTid;
937  if (rdd >= N) { continue; }
938  if (dualTid >= hipBlockDim_x) { continue; }
939  s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
940  }
941  if (tid==0) { gdsr[bid] = s_min[0]; }
942 }
943 
944 static Array<double> cuda_reduce_buf;
945 
946 static double hipVectorMin(const int N, const double *X)
947 {
948  const int tpb = MFEM_CUDA_BLOCKS;
949  const int blockSize = MFEM_CUDA_BLOCKS;
950  const int gridSize = (N+blockSize-1)/blockSize;
951  const int min_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
952  cuda_reduce_buf.SetSize(min_sz);
953  Memory<double> &buf = cuda_reduce_buf.GetMemory();
954  double *d_min = buf.Write(MemoryClass::DEVICE, min_sz);
955  hipLaunchKernelGGL(hipKernelMin,gridSize,blockSize,0,0,N,d_min,X);
956  MFEM_GPU_CHECK(hipGetLastError());
957  const double *h_min = buf.Read(MemoryClass::HOST, min_sz);
959  for (int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
960  return min;
961 }
962 
963 static __global__ void hipKernelDot(const int N, double *gdsr,
964  const double *x, const double *y)
965 {
966  __shared__ double s_dot[MFEM_CUDA_BLOCKS];
967  const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
968  if (n>=N) { return; }
969  const int bid = hipBlockIdx_x;
970  const int tid = hipThreadIdx_x;
971  const int bbd = bid*hipBlockDim_x;
972  const int rid = bbd+tid;
973  s_dot[tid] = x[n] * y[n];
974  for (int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
975  {
976  __syncthreads();
977  if (tid >= workers) { continue; }
978  if (rid >= N) { continue; }
979  const int dualTid = tid + workers;
980  if (dualTid >= N) { continue; }
981  const int rdd = bbd+dualTid;
982  if (rdd >= N) { continue; }
983  if (dualTid >= hipBlockDim_x) { continue; }
984  s_dot[tid] += s_dot[dualTid];
985  }
986  if (tid==0) { gdsr[bid] = s_dot[0]; }
987 }
988 
989 static double hipVectorDot(const int N, const double *X, const double *Y)
990 {
991  const int tpb = MFEM_CUDA_BLOCKS;
992  const int blockSize = MFEM_CUDA_BLOCKS;
993  const int gridSize = (N+blockSize-1)/blockSize;
994  const int dot_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
995  cuda_reduce_buf.SetSize(dot_sz);
996  Memory<double> &buf = cuda_reduce_buf.GetMemory();
997  double *d_dot = buf.Write(MemoryClass::DEVICE, dot_sz);
998  hipLaunchKernelGGL(hipKernelDot,gridSize,blockSize,0,0,N,d_dot,X,Y);
999  MFEM_GPU_CHECK(hipGetLastError());
1000  const double *h_dot = buf.Read(MemoryClass::HOST, dot_sz);
1001  double dot = 0.0;
1002  for (int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1003  return dot;
1004 }
1005 #endif // MFEM_USE_HIP
1006 
1007 double Vector::operator*(const Vector &v) const
1008 {
1009  MFEM_ASSERT(size == v.size, "incompatible Vectors!");
1010 
1011  const bool use_dev = UseDevice() || v.UseDevice();
1012 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP) || defined(MFEM_USE_OPENMP)
1013  auto m_data = Read(use_dev);
1014 #else
1015  Read(use_dev);
1016 #endif
1017  auto v_data = v.Read(use_dev);
1018 
1019  if (!use_dev) { goto vector_dot_cpu; }
1020 
1021 #ifdef MFEM_USE_OCCA
1022  if (DeviceCanUseOcca())
1023  {
1024  return occa::linalg::dot<double,double,double>(
1026  }
1027 #endif
1028 
1029 #ifdef MFEM_USE_CUDA
1031  {
1032  return cuVectorDot(size, m_data, v_data);
1033  }
1034 #endif
1035 
1036 #ifdef MFEM_USE_HIP
1038  {
1039  return hipVectorDot(size, m_data, v_data);
1040  }
1041 #endif
1042 
1043 #ifdef MFEM_USE_OPENMP
1045  {
1046  double prod = 0.0;
1047  #pragma omp parallel for reduction(+:prod)
1048  for (int i = 0; i < size; i++)
1049  {
1050  prod += m_data[i] * v_data[i];
1051  }
1052  return prod;
1053  }
1054 #endif
1056  {
1057  const int N = size;
1058  auto v_data = v.Read();
1059  auto m_data = Read();
1060  Vector dot(1);
1061  dot.UseDevice(true);
1062  auto d_dot = dot.Write();
1063  dot = 0.0;
1064  MFEM_FORALL(i, N, d_dot[0] += m_data[i] * v_data[i];);
1065  dot.HostReadWrite();
1066  return dot[0];
1067  }
1068 vector_dot_cpu:
1069  return operator*(v_data);
1070 }
1071 
1072 double Vector::Min() const
1073 {
1074  if (size == 0) { return infinity(); }
1075 
1076  const bool use_dev = UseDevice();
1077  auto m_data = Read(use_dev);
1078 
1079  if (!use_dev) { goto vector_min_cpu; }
1080 
1081 #ifdef MFEM_USE_OCCA
1082  if (DeviceCanUseOcca())
1083  {
1084  return occa::linalg::min<double,double>(OccaMemoryRead(data, size));
1085  }
1086 #endif
1087 
1088 #ifdef MFEM_USE_CUDA
1090  {
1091  return cuVectorMin(size, m_data);
1092  }
1093 #endif
1094 
1095 #ifdef MFEM_USE_HIP
1097  {
1098  return hipVectorMin(size, m_data);
1099  }
1100 #endif
1101 
1102 #ifdef MFEM_USE_OPENMP
1104  {
1105  double minimum = m_data[0];
1106  #pragma omp parallel for reduction(min:minimum)
1107  for (int i = 0; i < size; i++)
1108  {
1109  minimum = std::min(minimum, m_data[i]);
1110  }
1111  return minimum;
1112  }
1113 #endif
1114 
1116  {
1117  const int N = size;
1118  auto m_data = Read();
1119  Vector min(1);
1120  min = infinity();
1121  min.UseDevice(true);
1122  auto d_min = min.ReadWrite();
1123  MFEM_FORALL(i, N, d_min[0] = (d_min[0]<m_data[i])?d_min[0]:m_data[i];);
1124  min.HostReadWrite();
1125  return min[0];
1126  }
1127 
1128 vector_min_cpu:
1129  double minimum = data[0];
1130  for (int i = 1; i < size; i++)
1131  {
1132  if (m_data[i] < minimum)
1133  {
1134  minimum = m_data[i];
1135  }
1136  }
1137  return minimum;
1138 }
1139 
1140 
1141 #ifdef MFEM_USE_SUNDIALS
1142 
1143 Vector::Vector(N_Vector nv)
1144 {
1145  N_Vector_ID nvid = N_VGetVectorID(nv);
1146  switch (nvid)
1147  {
1148  case SUNDIALS_NVEC_SERIAL:
1149  SetDataAndSize(NV_DATA_S(nv), NV_LENGTH_S(nv));
1150  break;
1151 #ifdef MFEM_USE_MPI
1152  case SUNDIALS_NVEC_PARALLEL:
1153  SetDataAndSize(NV_DATA_P(nv), NV_LOCLENGTH_P(nv));
1154  break;
1155  case SUNDIALS_NVEC_PARHYP:
1156  {
1157  hypre_Vector *hpv_local = N_VGetVector_ParHyp(nv)->local_vector;
1158  SetDataAndSize(hpv_local->data, hpv_local->size);
1159  break;
1160  }
1161 #endif
1162  default:
1163  MFEM_ABORT("N_Vector type " << nvid << " is not supported");
1164  }
1165 }
1166 
1167 void Vector::ToNVector(N_Vector &nv)
1168 {
1169  MFEM_ASSERT(nv, "N_Vector handle is NULL");
1170  N_Vector_ID nvid = N_VGetVectorID(nv);
1171  switch (nvid)
1172  {
1173  case SUNDIALS_NVEC_SERIAL:
1174  MFEM_ASSERT(NV_OWN_DATA_S(nv) == SUNFALSE, "invalid serial N_Vector");
1175  NV_DATA_S(nv) = data;
1176  NV_LENGTH_S(nv) = size;
1177  break;
1178 #ifdef MFEM_USE_MPI
1179  case SUNDIALS_NVEC_PARALLEL:
1180  MFEM_ASSERT(NV_OWN_DATA_P(nv) == SUNFALSE, "invalid parallel N_Vector");
1181  NV_DATA_P(nv) = data;
1182  NV_LOCLENGTH_P(nv) = size;
1183  break;
1184  case SUNDIALS_NVEC_PARHYP:
1185  {
1186  hypre_Vector *hpv_local = N_VGetVector_ParHyp(nv)->local_vector;
1187  MFEM_ASSERT(hpv_local->owns_data == false, "invalid hypre N_Vector");
1188  hpv_local->data = data;
1189  hpv_local->size = size;
1190  break;
1191  }
1192 #endif
1193  default:
1194  MFEM_ABORT("N_Vector type " << nvid << " is not supported");
1195  }
1196 }
1197 
1198 #endif // MFEM_USE_SUNDIALS
1199 
1200 }
int Size() const
Logical size of the array.
Definition: array.hpp:124
void SetSubVector(const Array< int > &dofs, const double value)
Set the entries listed in dofs to the given value.
Definition: vector.cpp:498
void SetVector(const Vector &v, int offset)
Definition: vector.cpp:217
double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:353
Vector()
Default constructor for Vector. Sets size = 0 and data = NULL.
Definition: vector.hpp:58
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:103
Memory< double > data
Definition: vector.hpp:52
double & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Definition: vector.cpp:83
Device memory; using CUDA or HIP *Malloc and *Free.
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:407
double Norml2() const
Returns the l2 norm of the vector.
Definition: vector.cpp:711
double & operator()(int i)
Access Vector entries using () for 0-based indexing.
Definition: vector.hpp:487
Biwise-OR of all HIP backends.
Definition: device.hpp:83
void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition: vector.hpp:86
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Definition: vector.cpp:472
int Size() const
Returns the size of the vector.
Definition: vector.hpp:157
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
Definition: array.hpp:109
double Normlinf() const
Returns the l_infinity norm of the vector.
Definition: vector.cpp:729
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Randomize(int seed=0)
Set random values in the vector.
Definition: vector.cpp:692
double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:341
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition: vector.cpp:238
double operator*(const double *) const
Dot product with a double * array.
Definition: vector.cpp:93
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
unsigned flags
Bit flags defined from the FlagMask enum.
Vector & operator=(const double *v)
Copy Size() entries from v.
Definition: vector.cpp:106
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
Definition: vector.cpp:51
const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:337
double Normlp(double p) const
Returns the l_p norm of the vector.
Definition: vector.cpp:749
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:349
bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
Definition: vector.hpp:89
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition: occa.hpp:69
double b
Definition: lissajous.cpp:42
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:249
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
Definition: occa.hpp:37
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo &lt;= hi.
Definition: vector.cpp:448
Biwise-OR of all OpenMP backends.
Definition: device.hpp:85
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:273
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the &quot;device&quot; code-path (using separate host/device memory pools and host &lt;-&gt; device transfers) without any GPU hardware.
Definition: device.hpp:68
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
Definition: kernels.hpp:46
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add (element) subvector to the vector.
Definition: vector.cpp:564
Vector & operator/=(double c)
Definition: vector.cpp:147
Biwise-OR of all CUDA backends.
Definition: device.hpp:81
void SetSubVectorComplement(const Array< int > &dofs, const double val)
Set all vector entries NOT in the &#39;dofs&#39; array to the given &#39;val&#39;.
Definition: vector.cpp:633
Vector & operator*=(double c)
Definition: vector.cpp:138
double Min() const
Returns the minimal element of the vector.
Definition: vector.cpp:1072
double Norml1() const
Returns the l_1 norm of the vector.
Definition: vector.cpp:739
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition: device.hpp:240
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
Definition: vector.cpp:649
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition: vector.cpp:385
void SetSize(int nsize)
Change logical size of the array, keep existing entries.
Definition: array.hpp:635
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
Definition: vector.cpp:673
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:234
void SetDataAndSize(double *d, int s)
Set the Vector data and size.
Definition: vector.hpp:125
Vector & Set(const double a, const Vector &x)
(*this) = a * x
Definition: vector.cpp:205
double a
Definition: lissajous.cpp:41
Vector & Add(const double a, const Vector &Va)
(*this) += a * Va
Definition: vector.cpp:190
Host memory; using new[] and delete[].
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:333
double Max() const
Returns the maximal element of the vector.
Definition: vector.cpp:799
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
bool Empty() const
Return true if the Memory object is empty, see Reset().
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
int dim
Definition: ex24.cpp:43
Vector & operator-=(double c)
Definition: vector.cpp:157
double infinity()
Define a shortcut for std::numeric_limits&lt;double&gt;::infinity()
Definition: vector.hpp:42
const double alpha
Definition: ex15.cpp:336
Vector data type.
Definition: vector.hpp:48
Vector & operator+=(const Vector &v)
Definition: vector.cpp:178
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
Definition: globals.hpp:66
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
double Sum() const
Return the sum of the vector entries.
Definition: vector.cpp:816
virtual N_Vector ToNVector()
Return a new wrapper SUNDIALS N_Vector of type SUNDIALS_NVEC_SERIAL.
Definition: vector.hpp:362
void Neg()
(*this) = -(*this)
Definition: vector.cpp:230