html/vector_8cpp_source.html

// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced

// at the Lawrence Livermore National Laboratory. All Rights reserved. See files

// LICENSE and NOTICE for details. LLNL-CODE-806117.

//

// This file is part of the MFEM library. For more information and source code

// availability visit https://mfem.org.

//

// MFEM is free software; you can redistribute it and/or modify it under the

// terms of the BSD-3 license. We welcome feedback and contributions, see file

// CONTRIBUTING.md for details.


// Implementation of data type vector


#include "kernels.hpp"

#include "vector.hpp"

#include "../general/forall.hpp"


#ifdef MFEM_USE_OPENMP

#include <omp.h>

#endif


#include <iostream>

#include <iomanip>

#include <cmath>

#include <ctime>

#include <limits>


namespace mfem

{


/**

 * Reducer for helping to compute L2-norms. Given two partial results:

 * a0 = sum_i (|v_i|/a1)^2

 * b0 = sum_j (|v_j|/b1)^2 (j disjoint from i for vector v)

 * computes:

 * a1 = max(a1, b1)

 * a0 = (a1 == 0 ? 0 : sum_{k in union(i,j)} (|v_k|/a1)^2)

 *

 * This form is resiliant against overflow/underflow, similar to std::hypot

 */

struct L2Reducer

{

   using value_type = DevicePair<real_t, real_t>;

   static MFEM_HOST_DEVICE void Join(value_type& a, const value_type &b)

   {

      real_t scale = fmax(a.second, b.second);

      if (scale > 0)

      {

         real_t s = a.second / scale;

         a.first *= s * s;

         s = b.second / scale;

         a.first += b.first * s * s;

         a.second = scale;

      }

   }


   static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)

   {

      a.first = 0;

      a.second = 0;

   }

};


/**

 * Reducer for helping to compute Lp-norms. Given two partial results:

 * a0 = sum_i (|v_i|/a1)^p

 * b0 = sum_j (|v_j|/b1)^p (j disjoint from i for vector v)

 * computes:

 * a1 = max(a1, b1)

 * a0 = (a1 == 0 ? 0 : sum_{k in union(i,j)} (|v_k|/a1)^p)

 *

 * This form is resiliant against overflow/underflow, similar to std::hypot

 */

struct LpReducer

{

   real_t p;

   using value_type = DevicePair<real_t, real_t>;

   MFEM_HOST_DEVICE void Join(value_type& a, const value_type &b) const

   {

      real_t scale = fmax(a.second, b.second);

      if (scale > 0)

      {

         a.first = a.first * pow(a.second / scale, p) +

                   b.first * pow(b.second / scale, p);

         a.second = scale;

      }

   }


   static MFEM_HOST_DEVICE void SetInitialValue(value_type &a)

   {

      a.first = 0;

      a.second = 0;

   }

};


static Array<real_t>& vector_workspace()

{

   static Array<real_t> instance;

   return instance;

}


static Array<DevicePair<real_t, real_t>> &Lpvector_workspace()

{

   static Array<DevicePair<real_t, real_t>> instance;

   return instance;

}


Vector::Vector(const Vector &v)

{

   const int s = v.Size();

   size = s;

   if (s > 0)

   {

      MFEM_ASSERT(!v.data.Empty(), "invalid source vector");

      data.New(s, v.data.GetMemoryType());

      data.CopyFrom(v.data, s);

   }

   UseDevice(v.UseDevice());

}


Vector::Vector(Vector &&v)

{

   *this = std::move(v);

}


void Vector::Load(std::istream **in, int np, int *dim)

{

   int i, j, s;


   s = 0;

   for (i = 0; i < np; i++)

   {

      s += dim[i];

   }


   SetSize(s);

   HostWrite();


   int p = 0;

   for (i = 0; i < np; i++)

   {

      for (j = 0; j < dim[i]; j++)

      {

         *in[i] >> data[p++];

         // Clang's libc++ sets the failbit when (correctly) parsing subnormals,

         // so we reset the failbit here.

         if (!*in[i] && errno == ERANGE)

         {

            in[i]->clear();

         }

      }

   }

}


void Vector::Load(std::istream &in, int Size)

{

   SetSize(Size);

   HostWrite();


   for (int i = 0; i < size; i++)

   {

      in >> data[i];

      // Clang's libc++ sets the failbit when (correctly) parsing subnormals,

      // so we reset the failbit here.

      if (!in && errno == ERANGE)

      {

         in.clear();

      }

   }

}


real_t &Vector::Elem(int i)

{

   return operator()(i);

}


const real_t &Vector::Elem(int i) const

{

   return operator()(i);

}


real_t Vector::operator*(const real_t *v) const

{

   HostRead();

   real_t dot = 0.0;

#ifdef MFEM_USE_LEGACY_OPENMP

   #pragma omp parallel for reduction(+:dot)

#endif

   for (int i = 0; i < size; i++)

   {

      dot += data[i] * v[i];

   }

   return dot;

}


Vector &Vector::operator=(const real_t *v)

{

   data.CopyFromHost(v, size);

   return *this;

}


Vector &Vector::operator=(const Vector &v)

{

#if 0

   SetSize(v.Size(), v.data.GetMemoryType());

   data.CopyFrom(v.data, v.Size());

   UseDevice(v.UseDevice());

#else

   SetSize(v.Size());

   bool vuse = v.UseDevice();

   const bool use_dev = UseDevice() || vuse;

   v.UseDevice(use_dev);

   // keep 'data' where it is, unless 'use_dev' is true

   if (use_dev) { Write(); }

   data.CopyFrom(v.data, v.Size());

   v.UseDevice(vuse);

#endif

   return *this;

}


Vector &Vector::operator=(Vector &&v)

{

   v.Swap(*this);

   if (this != &v) { v.Destroy(); }

   return *this;

}


Vector &Vector::operator=(real_t value)

{

   const bool use_dev = UseDevice();

   const int N = size;

   auto y = Write(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = value; });

   return *this;

}


Vector &Vector::operator*=(real_t c)

{

   const bool use_dev = UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= c; });

   return *this;

}


Vector &Vector::operator*=(const Vector &v)

{

   MFEM_ASSERT(size == v.size, "incompatible Vectors!");


   const bool use_dev = UseDevice() || v.UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   auto x = v.Read(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= x[i]; });

   return *this;

}


Vector &Vector::operator/=(real_t c)

{

   const bool use_dev = UseDevice();

   const int N = size;

   const real_t m = 1.0/c;

   auto y = ReadWrite(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] *= m; });

   return *this;

}


Vector &Vector::operator/=(const Vector &v)

{

   MFEM_ASSERT(size == v.size, "incompatible Vectors!");


   const bool use_dev = UseDevice() || v.UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   auto x = v.Read(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] /= x[i]; });

   return *this;

}


Vector &Vector::operator-=(real_t c)

{

   const bool use_dev = UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] -= c; });

   return *this;

}


Vector &Vector::operator-=(const Vector &v)

{

   MFEM_ASSERT(size == v.size, "incompatible Vectors!");


   const bool use_dev = UseDevice() || v.UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   auto x = v.Read(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] -= x[i]; });

   return *this;

}


Vector &Vector::operator+=(real_t c)

{

   const bool use_dev = UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += c; });

   return *this;

}


Vector &Vector::operator+=(const Vector &v)

{

   MFEM_ASSERT(size == v.size, "incompatible Vectors!");


   const bool use_dev = UseDevice() || v.UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   auto x = v.Read(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += x[i]; });


   return *this;

}


Vector &Vector::Add(const real_t a, const Vector &Va)

{

   MFEM_ASSERT(size == Va.size, "incompatible Vectors!");


   if (a != 0.0)

   {

      const int N = size;

      const bool use_dev = UseDevice() || Va.UseDevice();

      auto y = ReadWrite(use_dev);

      auto x = Va.Read(use_dev);

      mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] += a * x[i]; });

   }

   return *this;

}


Vector &Vector::Set(const real_t a, const Vector &Va)

{

   MFEM_ASSERT(size == Va.size, "incompatible Vectors!");


   const bool use_dev = UseDevice() || Va.UseDevice();

   const int N = size;

   auto x = Va.Read(use_dev);

   auto y = Write(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = a * x[i]; });

   return *this;

}


void Vector::SetVector(const Vector &v, int offset)

{

   MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");


   const int vs = v.Size();

   const real_t *vp = v.data;

   real_t *p = data + offset;

   for (int i = 0; i < vs; i++)

   {

      p[i] = vp[i];

   }

}


void Vector::AddSubVector(const Vector &v, int offset)

{

   MFEM_ASSERT(v.Size() + offset <= size, "invalid sub-vector");


   const int vs = v.Size();

   const real_t *vp = v.data;

   real_t *p = data + offset;

   for (int i = 0; i < vs; i++)

   {

      p[i] += vp[i];

   }

}


void Vector::Neg()

{

   const bool use_dev = UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = -y[i]; });

}


void Vector::Reciprocal()

{

   const bool use_dev = UseDevice();

   const int N = size;

   auto y = ReadWrite(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = 1.0/y[i]; });

}


void add(const Vector &v1, const Vector &v2, Vector &v)

{

   MFEM_ASSERT(v.size == v1.size && v.size == v2.size,

               "incompatible Vectors!");


#if !defined(MFEM_USE_LEGACY_OPENMP)

   const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();

   const int N = v.size;

   // Note: get read access first, in case v is the same as v1/v2.

   auto x1 = v1.Read(use_dev);

   auto x2 = v2.Read(use_dev);

   auto y = v.Write(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = x1[i] + x2[i]; });

#else

   #pragma omp parallel for

   for (int i = 0; i < v.size; i++)

   {

      v.data[i] = v1.data[i] + v2.data[i];

   }

#endif

}


void add(const Vector &v1, real_t alpha, const Vector &v2, Vector &v)

{

   MFEM_ASSERT(v.size == v1.size && v.size == v2.size,

               "incompatible Vectors!");


   if (alpha == 0.0)

   {

      v = v1;

   }

   else if (alpha == 1.0)

   {

      add(v1, v2, v);

   }

   else

   {

#if !defined(MFEM_USE_LEGACY_OPENMP)

      const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice();

      const int N = v.size;

      // Note: get read access first, in case v is the same as v1/v2.

      auto d_x = v1.Read(use_dev);

      auto d_y = v2.Read(use_dev);

      auto d_z = v.Write(use_dev);

      mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)

      {

         d_z[i] = d_x[i] + alpha * d_y[i];

      });

#else

      const real_t *v1p = v1.data, *v2p = v2.data;

      real_t *vp = v.data;

      const int s = v.size;

      #pragma omp parallel for

      for (int i = 0; i < s; i++)

      {

         vp[i] = v1p[i] + alpha*v2p[i];

      }

#endif

   }

}


void add(const real_t a, const Vector &x, const Vector &y, Vector &z)

{

   MFEM_ASSERT(x.size == y.size && x.size == z.size,

               "incompatible Vectors!");


   if (a == 0.0)

   {

      z = 0.0;

   }

   else if (a == 1.0)

   {

      add(x, y, z);

   }

   else

   {

#if !defined(MFEM_USE_LEGACY_OPENMP)

      const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();

      const int N = x.size;

      // Note: get read access first, in case z is the same as x/y.

      auto xd = x.Read(use_dev);

      auto yd = y.Read(use_dev);

      auto zd = z.Write(use_dev);

      mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)

      {

         zd[i] = a * (xd[i] + yd[i]);

      });

#else

      const real_t *xp = x.data;

      const real_t *yp = y.data;

      real_t       *zp = z.data;

      const int      s = x.size;

      #pragma omp parallel for

      for (int i = 0; i < s; i++)

      {

         zp[i] = a * (xp[i] + yp[i]);

      }

#endif

   }

}


void add(const real_t a, const Vector &x,

         const real_t b, const Vector &y, Vector &z)

{

   MFEM_ASSERT(x.size == y.size && x.size == z.size,

               "incompatible Vectors!");


   if (a == 0.0)

   {

      z.Set(b, y);

   }

   else if (b == 0.0)

   {

      z.Set(a, x);

   }

#if 0

   else if (a == 1.0)

   {

      add(x, b, y, z);

   }

   else if (b == 1.0)

   {

      add(y, a, x, z);

   }

   else if (a == b)

   {

      add(a, x, y, z);

   }

#endif

   else

   {

#if !defined(MFEM_USE_LEGACY_OPENMP)

      const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();

      const int N = x.size;

      // Note: get read access first, in case z is the same as x/y.

      auto xd = x.Read(use_dev);

      auto yd = y.Read(use_dev);

      auto zd = z.Write(use_dev);

      mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)

      {

         zd[i] = a * xd[i] + b * yd[i];

      });

#else

      const real_t *xp = x.data;

      const real_t *yp = y.data;

      real_t       *zp = z.data;

      const int      s = x.size;

      #pragma omp parallel for

      for (int i = 0; i < s; i++)

      {

         zp[i] = a * xp[i] + b * yp[i];

      }

#endif

   }

}


void subtract(const Vector &x, const Vector &y, Vector &z)

{

   MFEM_ASSERT(x.size == y.size && x.size == z.size,

               "incompatible Vectors!");


#if !defined(MFEM_USE_LEGACY_OPENMP)

   const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();

   const int N = x.size;

   // Note: get read access first, in case z is the same as x/y.

   auto xd = x.Read(use_dev);

   auto yd = y.Read(use_dev);

   auto zd = z.Write(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)

   {

      zd[i] = xd[i] - yd[i];

   });

#else

   const real_t *xp = x.data;

   const real_t *yp = y.data;

   real_t       *zp = z.data;

   const int     s = x.size;

   #pragma omp parallel for

   for (int i = 0; i < s; i++)

   {

      zp[i] = xp[i] - yp[i];

   }

#endif

}


void subtract(const real_t a, const Vector &x, const Vector &y, Vector &z)

{

   MFEM_ASSERT(x.size == y.size && x.size == z.size,

               "incompatible Vectors!");


   if (a == 0.)

   {

      z = 0.;

   }

   else if (a == 1.)

   {

      subtract(x, y, z);

   }

   else

   {

#if !defined(MFEM_USE_LEGACY_OPENMP)

      const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice();

      const int N = x.size;

      // Note: get read access first, in case z is the same as x/y.

      auto xd = x.Read(use_dev);

      auto yd = y.Read(use_dev);

      auto zd = z.Write(use_dev);

      mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)

      {

         zd[i] = a * (xd[i] - yd[i]);

      });

#else

      const real_t *xp = x.data;

      const real_t *yp = y.data;

      real_t       *zp = z.data;

      const int      s = x.size;

      #pragma omp parallel for

      for (int i = 0; i < s; i++)

      {

         zp[i] = a * (xp[i] - yp[i]);

      }

#endif

   }

}


void Vector::cross3D(const Vector &vin, Vector &vout) const

{

   HostRead();

   vin.HostRead();

   vout.HostWrite();

   MFEM_VERIFY(size == 3, "Only 3D vectors supported in cross.");

   MFEM_VERIFY(vin.Size() == 3, "Only 3D vectors supported in cross.");

   vout.SetSize(3);

   vout(0) = data[1]*vin(2)-data[2]*vin(1);

   vout(1) = data[2]*vin(0)-data[0]*vin(2);

   vout(2) = data[0]*vin(1)-data[1]*vin(0);

}


void Vector::median(const Vector &lo, const Vector &hi)

{

   MFEM_ASSERT(size == lo.size && size == hi.size,

               "incompatible Vectors!");


   const bool use_dev = UseDevice() || lo.UseDevice() || hi.UseDevice();

   const int N = size;

   // Note: get read access first, in case *this is the same as lo/hi.

   auto l = lo.Read(use_dev);

   auto h = hi.Read(use_dev);

   auto m = Write(use_dev);

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i)

   {

      if (m[i] < l[i])

      {

         m[i] = l[i];

      }

      else if (m[i] > h[i])

      {

         m[i] = h[i];

      }

   });

}


void Vector::GetSubVector(const Array<int> &dofs, Vector &elemvect) const

{

   const int n = dofs.Size();

   elemvect.SetSize(n);

   const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();

   auto d_y = elemvect.Write(use_dev);

   auto d_X = Read(use_dev);

   auto d_dofs = dofs.Read(use_dev);

   mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)

   {

      const int dof_i = d_dofs[i];

      d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];

   });

}


void Vector::GetSubVector(const Array<int> &dofs, real_t *elem_data) const

{

   data.Read(MemoryClass::HOST, size);

   const int n = dofs.Size();

   for (int i = 0; i < n; i++)

   {

      const int j = dofs[i];

      elem_data[i] = (j >= 0) ? data[j] : -data[-1-j];

   }

}


void Vector::SetSubVector(const Array<int> &dofs, const real_t value)

{

   const bool use_dev = dofs.UseDevice();

   const int n = dofs.Size();

   // Use read+write access for *this - we only modify some of its entries

   auto d_X = ReadWrite(use_dev);

   auto d_dofs = dofs.Read(use_dev);

   mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)

   {

      const int j = d_dofs[i];

      if (j >= 0)

      {

         d_X[j] = value;

      }

      else

      {

         d_X[-1-j] = -value;

      }

   });

}


void Vector::SetSubVector(const Array<int> &dofs, const Vector &elemvect)

{

   MFEM_ASSERT(dofs.Size() <= elemvect.Size(),

               "Size mismatch: length of dofs is " << dofs.Size()

               << ", length of elemvect is " << elemvect.Size());


   const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();

   const int n = dofs.Size();

   // Use read+write access for X - we only modify some of its entries

   auto d_X = ReadWrite(use_dev);

   auto d_y = elemvect.Read(use_dev);

   auto d_dofs = dofs.Read(use_dev);

   mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)

   {

      const int dof_i = d_dofs[i];

      if (dof_i >= 0)

      {

         d_X[dof_i] = d_y[i];

      }

      else

      {

         d_X[-1-dof_i] = -d_y[i];

      }

   });

}


void Vector::SetSubVector(const Array<int> &dofs, real_t *elem_data)

{

   // Use read+write access because we overwrite only part of the data.

   data.ReadWrite(MemoryClass::HOST, size);

   const int n = dofs.Size();

   for (int i = 0; i < n; i++)

   {

      const int j= dofs[i];

      if (j >= 0)

      {

         operator()(j) = elem_data[i];

      }

      else

      {

         operator()(-1-j) = -elem_data[i];

      }

   }

}


void Vector::AddElementVector(const Array<int> &dofs, const Vector &elemvect)

{

   MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "

               "length of dofs is " << dofs.Size() <<

               ", length of elemvect is " << elemvect.Size());


   const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();

   const int n = dofs.Size();

   auto d_y = elemvect.Read(use_dev);

   auto d_X = ReadWrite(use_dev);

   auto d_dofs = dofs.Read(use_dev);

   mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)

   {

      const int j = d_dofs[i];

      if (j >= 0)

      {

         d_X[j] += d_y[i];

      }

      else

      {

         d_X[-1-j] -= d_y[i];

      }

   });

}


void Vector::AddElementVector(const Array<int> &dofs, real_t *elem_data)

{

   data.ReadWrite(MemoryClass::HOST, size);

   const int n = dofs.Size();

   for (int i = 0; i < n; i++)

   {

      const int j = dofs[i];

      if (j >= 0)

      {

         operator()(j) += elem_data[i];

      }

      else

      {

         operator()(-1-j) -= elem_data[i];

      }

   }

}


void Vector::AddElementVector(const Array<int> &dofs, const real_t a,

                              const Vector &elemvect)

{

   MFEM_ASSERT(dofs.Size() <= elemvect.Size(), "Size mismatch: "

               "length of dofs is " << dofs.Size() <<

               ", length of elemvect is " << elemvect.Size());


   const bool use_dev = dofs.UseDevice() || elemvect.UseDevice();

   const int n = dofs.Size();

   auto d_y = ReadWrite(use_dev);

   auto d_x = elemvect.Read(use_dev);

   auto d_dofs = dofs.Read(use_dev);

   mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i)

   {

      const int j = d_dofs[i];

      if (j >= 0)

      {

         d_y[j] += a * d_x[i];

      }

      else

      {

         d_y[-1-j] -= a * d_x[i];

      }

   });

}


void Vector::SetSubVectorComplement(const Array<int> &dofs, const real_t val)

{

   const bool use_dev = UseDevice() || dofs.UseDevice();

   const int n = dofs.Size();

   const int N = size;

   Vector dofs_vals(n, use_dev ?

                    Device::GetDeviceMemoryType() :

                    Device::GetHostMemoryType());

   auto d_data = ReadWrite(use_dev);

   auto d_dofs_vals = dofs_vals.Write(use_dev);

   auto d_dofs = dofs.Read(use_dev);

   mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i) { d_dofs_vals[i] = d_data[d_dofs[i]]; });

   mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { d_data[i] = val; });

   mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (int i) { d_data[d_dofs[i]] = d_dofs_vals[i]; });

}


void Vector::Print(std::ostream &os, int width) const

{

   if (!size) { return; }

   data.Read(MemoryClass::HOST, size);

   for (int i = 0; 1; )

   {

      os << ZeroSubnormal(data[i]);

      i++;

      if (i == size)

      {

         break;

      }

      if ( i % width == 0 )

      {

         os << '\n';

      }

      else

      {

         os << ' ';

      }

   }

   os << '\n';

}


#ifdef MFEM_USE_ADIOS2


void Vector::Print(adios2stream &os,

                   const std::string& variable_name) const

{

   if (!size) { return; }

   data.Read(MemoryClass::HOST, size);

   os.engine.Put(variable_name, &data[0] );

}


#endif


void Vector::Print_HYPRE(std::ostream &os) const

{

   int i;

   std::ios::fmtflags old_fmt = os.flags();

   os.setf(std::ios::scientific);

   std::streamsize old_prec = os.precision(14);


   os << size << '\n';  // number of rows


   data.Read(MemoryClass::HOST, size);

   for (i = 0; i < size; i++)

   {

      os << ZeroSubnormal(data[i]) << '\n';

   }


   os.precision(old_prec);

   os.flags(old_fmt);

}


void Vector::PrintMathematica(std::ostream & os) const

{

   std::ios::fmtflags old_fmt = os.flags();

   os.setf(std::ios::scientific);

   std::streamsize old_prec = os.precision(14);


   os << "(* Read file into Mathematica using: "

      << "myVec = Get[\"this_file_name\"] *)\n";

   os << "{\n";


   data.Read(MemoryClass::HOST, size);

   for (int i = 0; i < size; i++)

   {

      os << "Internal`StringToMReal[\"" << ZeroSubnormal(data[i]) << "\"]";

      if (i < size - 1) { os << ','; }

      os << '\n';

   }


   os << "}\n";


   os.precision(old_prec);

   os.flags(old_fmt);

}


void Vector::PrintHash(std::ostream &os) const

{

   os << "size: " << size << '\n';

   HashFunction hf;

   hf.AppendDoubles(HostRead(), size);

   os << "hash: " << hf.GetHash() << '\n';

}


void Vector::Randomize(int seed)

{

   if (seed == 0)

   {

      seed = (int)time(0);

   }


   srand((unsigned)seed);


   HostWrite();

   for (int i = 0; i < size; i++)

   {

      data[i] = rand_real();

   }

}


real_t Vector::Norml2() const

{

   // Scale entries of Vector on the fly, using algorithms from

   // std::hypot() and LAPACK's drm2. This scaling ensures that the

   // argument of each call to std::pow is <= 1 to avoid overflow.

   if (size == 0)

   {

      return 0.0;

   }


   auto m_data = Read(UseDevice());

   using value_type = DevicePair<real_t, real_t>;

   value_type res;

   res.first = 0;

   res.second = 0;

   // first compute sum (|m_data|/scale)^2

   reduce(

      size, res,

      [=] MFEM_HOST_DEVICE(int i, value_type &r)

   {

      real_t n = fabs(m_data[i]);

      if (n > 0)

      {

         if (r.second <= n)

         {

            real_t arg = r.second / n;

            r.first = r.first * (arg * arg) + 1;

            r.second = n;

         }

         else

         {

            real_t arg = n / r.second;

            r.first += arg * arg;

         }

      }

   },

   L2Reducer{}, UseDevice(), Lpvector_workspace());

   // final answer

   return res.second * sqrt(res.first);

}


real_t Vector::Normlinf() const

{

   if (size == 0) { return 0; }


   auto m_data = Read(UseDevice());

   real_t res = 0;

   reduce(

      size, res,

   [=] MFEM_HOST_DEVICE(int i, real_t &r) { r = fmax(r, fabs(m_data[i])); },

   MaxReducer<real_t> {}, UseDevice(), vector_workspace());

   return res;

}


real_t Vector::Norml1() const

{

   if (size == 0) { return 0.0; }


   auto m_data = Read(UseDevice());

   real_t res = 0;

   reduce(

      size, res,

   [=] MFEM_HOST_DEVICE(int i, real_t &r) { r += fabs(m_data[i]); },

   SumReducer<real_t> {}, UseDevice(), vector_workspace());

   return res;

}


real_t Vector::Normlp(real_t p) const

{

   MFEM_ASSERT(p > 0.0, "Vector::Normlp");


   if (p == 1.0)

   {

      return Norml1();

   }

   if (p == 2.0)

   {

      return Norml2();

   }

   if (p < infinity())

   {

      // Scale entries of Vector on the fly, using algorithms from

      // std::hypot() and LAPACK's drm2. This scaling ensures that the

      // argument of each call to std::pow is <= 1 to avoid overflow.

      if (size == 0)

      {

         return 0.0;

      }


      auto m_data = Read(UseDevice());

      using value_type = DevicePair<real_t, real_t>;

      value_type res;

      res.first = 0;

      res.second = 0;

      // first compute sum (|m_data|/scale)^p

      reduce(

         size, res,

         [=] MFEM_HOST_DEVICE(int i, value_type &r)

      {

         real_t n = fabs(m_data[i]);

         if (n > 0)

         {

            if (r.second <= n)

            {

               real_t arg = r.second / n;

               r.first = r.first * pow(arg, p) + 1;

               r.second = n;

            }

            else

            {

               real_t arg = n / r.second;

               r.first += pow(arg, p);

            }

         }

      },

      LpReducer{p}, UseDevice(), Lpvector_workspace());

      // final answer

      return res.second * pow(res.first, 1.0 / p);

   } // end if p < infinity()


   return Normlinf(); // else p >= infinity()

}


real_t Vector::operator*(const Vector &v) const

{

   MFEM_ASSERT(size == v.size, "incompatible Vectors!");

   if (size == 0) { return 0.0; }


   const bool use_dev = UseDevice() || v.UseDevice();


   auto m_data = Read(use_dev);

   auto v_data = v.Read(use_dev);


   if (use_dev)

   {

      // special path for OCCA and OpenMP

#ifdef MFEM_USE_OCCA

      if (DeviceCanUseOcca())

      {

         return occa::linalg::dot<real_t, real_t, real_t>(

                   OccaMemoryRead(data, size), OccaMemoryRead(v.data, size));

      }

#endif


#ifdef MFEM_USE_OPENMP

      if (Device::Allows(Backend::OMP_MASK))

      {

#define MFEM_USE_OPENMP_DETERMINISTIC_DOT

#ifdef MFEM_USE_OPENMP_DETERMINISTIC_DOT

         // By default, use a deterministic way of computing the dot product

         static Vector th_dot;

         #pragma omp parallel

         {

            const int nt = omp_get_num_threads();

            #pragma omp master

            th_dot.SetSize(nt);

            const int tid = omp_get_thread_num();

            const int stride = (size + nt - 1) / nt;

            const int start = tid * stride;

            const int stop = std::min(start + stride, size);

            real_t my_dot = 0.0;

            for (int i = start; i < stop; i++)

            {

               my_dot += m_data[i] * v_data[i];

            }

            #pragma omp barrier

            th_dot(tid) = my_dot;

         }

         return th_dot.Sum();

#else

         // The standard way of computing the dot product is non-deterministic

         real_t prod = 0.0;

         #pragma omp parallel for reduction(+ : prod)

         for (int i = 0; i < size; i++)

         {

            prod += m_data[i] * v_data[i];

         }

         return prod;

#endif // MFEM_USE_OPENMP_DETERMINISTIC_DOT

      }

#endif // MFEM_USE_OPENMP

   }


   // normal path for everything else (cuda, hip, debug, cpu)

   real_t res = 0;

   reduce(

      size, res,

   [=] MFEM_HOST_DEVICE(int i, real_t &r) { r += m_data[i] * v_data[i]; },

   SumReducer<real_t> {}, use_dev, vector_workspace());

   return res;

}


real_t Vector::Min() const

{

   if (size == 0) { return infinity(); }


   const bool use_dev = UseDevice();

   auto m_data = Read(use_dev);


   if (use_dev)

   {

      // special case for OCCA and OpenMP


#ifdef MFEM_USE_OCCA

      if (DeviceCanUseOcca())

      {

         return occa::linalg::min<real_t,real_t>(OccaMemoryRead(data, size));

      }

#endif


#ifdef MFEM_USE_OPENMP

      if (Device::Allows(Backend::OMP_MASK))

      {

         real_t minimum = m_data[0];

         #pragma omp parallel for reduction(min:minimum)

         for (int i = 0; i < size; i++)

         {

            minimum = std::min(minimum, m_data[i]);

         }

         return minimum;

      }

#endif

   }


   // normal path for everything else (cuda, hip, debug, cpu)

   real_t res = infinity();

   reduce(

      size, res,

   [=] MFEM_HOST_DEVICE(int i, real_t &r) { r = fmin(r, m_data[i]); },

   MinReducer<real_t> {}, use_dev, vector_workspace());

   return res;

}


real_t Vector::Max() const

{

   if (size == 0) { return -infinity(); }


   const bool use_dev = UseDevice();

   auto m_data = Read(use_dev);


   if (use_dev)

   {

      // special cases where OCCA or OenMP are used

#ifdef MFEM_USE_OCCA

      if (DeviceCanUseOcca())

      {

         return occa::linalg::max<real_t, real_t>(OccaMemoryRead(data, size));

      }

#endif


#ifdef MFEM_USE_OPENMP

      if (Device::Allows(Backend::OMP_MASK))

      {

         real_t maximum = m_data[0];

         #pragma omp parallel for reduction(max : maximum)

         for (int i = 0; i < size; i++)

         {

            maximum = fmax(maximum, m_data[i]);

         }

         return maximum;

      }

#endif

   }


   // normal path for everything else (cuda, hip, debug, cpu)

   real_t res = -infinity();

   reduce(

      size, res,

   [=] MFEM_HOST_DEVICE(int i, real_t &r) { r = fmax(r, m_data[i]); },

   MaxReducer<real_t> {}, use_dev, vector_workspace());

   return res;

}


real_t Vector::Sum() const

{

   if (size == 0) { return 0.0; }


   auto m_data = Read(UseDevice());

   real_t res = 0;

   reduce(

   size, res, [=] MFEM_HOST_DEVICE(int i, real_t &r) { r += m_data[i]; },

   SumReducer<real_t> {}, UseDevice(), vector_workspace());

   return res;

}


}


mfem::Array
Definition array.hpp:47

mfem::Array::Size
int Size() const
Return the logical size of the array.
Definition array.hpp:147

mfem::Array::UseDevice
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
Definition array.hpp:132

mfem::Array::Read
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337

mfem::Device::GetHostMemoryType
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
Definition device.hpp:265

mfem::Device::Allows
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259

mfem::Device::GetDeviceMemoryType
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274

mfem::HashFunction
Hash function for data sequences.
Definition hash.hpp:530

mfem::HashFunction::GetHash
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
Definition hash.cpp:60

mfem::HashFunction::AppendDoubles
HashFunction & AppendDoubles(const real_t *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
Definition hash.hpp:582

mfem::Memory::CopyFromHost
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
Definition mem_manager.hpp:1284

mfem::Memory::ReadWrite
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
Definition mem_manager.hpp:1173

mfem::Memory::GetMemoryType
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
Definition mem_manager.hpp:1237

mfem::Memory::Empty
bool Empty() const
Return true if the Memory object is empty, see Reset().
Definition mem_manager.hpp:329

mfem::Memory::Read
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
Definition mem_manager.hpp:1186

mfem::Memory::CopyFrom
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
Definition mem_manager.hpp:1263

mfem::Memory::New
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
Definition mem_manager.hpp:941

mfem::Vector
Vector data type.
Definition vector.hpp:82

mfem::Vector::Randomize
void Randomize(int seed=0)
Set random values in the vector.
Definition vector.cpp:915

mfem::Vector::PrintHash
void PrintHash(std::ostream &out) const
Print the Vector size and hash of its data.
Definition vector.cpp:907

mfem::Vector::Elem
real_t & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Definition vector.cpp:172

mfem::Vector::SetVector
void SetVector(const Vector &v, int offset)
Definition vector.cpp:349

mfem::Vector::HostRead
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:498

mfem::Vector::Read
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494

mfem::Vector::median
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Definition vector.cpp:629

mfem::Vector::size
int size
Definition vector.hpp:86

mfem::Vector::Vector
Vector()
Definition vector.hpp:92

mfem::Vector::Neg
void Neg()
(*this) = -(*this)
Definition vector.cpp:375

mfem::Vector::Print
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
Definition vector.cpp:830

mfem::Vector::ReadWrite
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510

mfem::Vector::operator*
real_t operator*(const real_t *v) const
Definition vector.cpp:182

mfem::Vector::Normlinf
real_t Normlinf() const
Returns the l_infinity norm of the vector.
Definition vector.cpp:972

mfem::Vector::Norml1
real_t Norml1() const
Returns the l_1 norm of the vector.
Definition vector.cpp:985

mfem::Vector::SetSubVector
void SetSubVector(const Array< int > &dofs, const real_t value)
Set the entries listed in dofs to the given value.
Definition vector.cpp:679

mfem::Vector::AddElementVector
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Definition vector.cpp:745

mfem::Vector::data
Memory< real_t > data
Definition vector.hpp:85

mfem::Vector::AddSubVector
void AddSubVector(const Vector &v, int offset)
Definition vector.cpp:362

mfem::Vector::Swap
void Swap(Vector &other)
Swap the contents of two Vectors.
Definition vector.hpp:660

mfem::Vector::operator*=
Vector & operator*=(real_t c)
Definition vector.cpp:237

mfem::Vector::Norml2
real_t Norml2() const
Returns the l2 norm of the vector.
Definition vector.cpp:931

mfem::Vector::Load
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
Definition vector.cpp:126

mfem::Vector::Set
Vector & Set(const real_t a, const Vector &x)
(*this) = a * x
Definition vector.cpp:337

mfem::Vector::Max
real_t Max() const
Returns the maximal element of the vector.
Definition vector.cpp:1164

mfem::Vector::UseDevice
virtual bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
Definition vector.hpp:147

mfem::Vector::Size
int Size() const
Returns the size of the vector.
Definition vector.hpp:226

mfem::Vector::UseDevice
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition vector.hpp:144

mfem::Vector::Sum
real_t Sum() const
Return the sum of the vector entries.
Definition vector.cpp:1204

mfem::Vector::PrintMathematica
void PrintMathematica(std::ostream &out=mfem::out) const
Prints vector as a List for importing into Mathematica.
Definition vector.cpp:883

mfem::Vector::Print_HYPRE
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
Definition vector.cpp:864

mfem::Vector::SetSize
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558

mfem::Vector::Reciprocal
void Reciprocal()
(*this)(i) = 1.0 / (*this)(i)
Definition vector.cpp:383

mfem::Vector::HostWrite
virtual real_t * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:506

mfem::Vector::Normlp
real_t Normlp(real_t p) const
Returns the l_p norm of the vector.
Definition vector.cpp:998

mfem::Vector::operator-=
Vector & operator-=(real_t c)
Definition vector.cpp:280

mfem::Vector::operator=
Vector & operator=(const real_t *v)
Copy Size() entries from v.
Definition vector.cpp:196

mfem::Vector::SetSubVectorComplement
void SetSubVectorComplement(const Array< int > &dofs, const real_t val)
Set all vector entries NOT in the dofs Array to the given val.
Definition vector.cpp:814

mfem::Vector::Min
real_t Min() const
Returns the minimal element of the vector.
Definition vector.cpp:1123

mfem::Vector::GetSubVector
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
Definition vector.cpp:653

mfem::Vector::operator+=
Vector & operator+=(real_t c)
Definition vector.cpp:301

mfem::Vector::Write
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:502

mfem::Vector::Add
Vector & Add(const real_t a, const Vector &Va)
(*this) += a * Va
Definition vector.cpp:322

mfem::Vector::operator()
real_t & operator()(int i)
Access Vector entries using () for 0-based indexing.
Definition vector.hpp:644

mfem::Vector::cross3D
void cross3D(const Vector &vin, Vector &vout) const
Definition vector.cpp:616

mfem::Vector::operator/=
Vector & operator/=(real_t c)
Definition vector.cpp:258

mfem::adios2stream
Definition adios2stream.hpp:48

alpha
const real_t alpha
Definition ex15.cpp:369

dim
int dim
Definition ex24.cpp:53

forall.hpp

kernels.hpp

b
real_t b
Definition lissajous.cpp:42

a
real_t a
Definition lissajous.cpp:41

mfem
Definition CodeDocumentation.dox:1

mfem::infinity
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
Definition vector.hpp:47

mfem::reduce
void reduce(int N, T &res, B &&body, const R &reducer, bool use_dev, Array< T > &workspace)
Performs a 1D reduction on the range [0,N). res initial value and where the result will be written....
Definition forall.hpp:942

mfem::rand_real
real_t rand_real()
Generate a random real_t number in the interval [0,1) using rand().
Definition vector.hpp:61

mfem::add
void add(const Vector &v1, const Vector &v2, Vector &v)
Definition vector.cpp:391

mfem::MemoryClass::HOST
@ HOST

mfem::ZeroSubnormal
T ZeroSubnormal(T val)
Definition vector.hpp:522

mfem::DeviceCanUseOcca
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
Definition occa.hpp:69

mfem::subtract
void subtract(const Vector &x, const Vector &y, Vector &z)
Definition vector.cpp:547

mfem::real_t
float real_t
Definition config.hpp:43

mfem::OccaMemoryRead
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass....
Definition occa.hpp:37

mfem::forall_switch
void forall_switch(bool use_dev, int N, lambda &&body)
Definition forall.hpp:756

p
real_t p(const Vector &x, real_t t)
Definition navier_mms.cpp:53

mfem::Backend::OMP_MASK
@ OMP_MASK
Biwise-OR of all OpenMP backends.
Definition device.hpp:93

mfem::DevicePair
Pair of values which can be used in device code.
Definition reducers.hpp:28

mfem::DevicePair::first
A first
Definition reducers.hpp:29

mfem::MaxReducer
a = max(a,b)
Definition reducers.hpp:146

mfem::MinReducer
a = min(a,b)
Definition reducers.hpp:99

mfem::SumReducer
a += b
Definition reducers.hpp:44

vector.hpp