html/sve_8hpp_source.html

// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced

// at the Lawrence Livermore National Laboratory. All Rights reserved. See files

// LICENSE and NOTICE for details. LLNL-CODE-806117.

//

// This file is part of the MFEM library. For more information and source code

// availability visit https://mfem.org.

//

// MFEM is free software; you can redistribute it and/or modify it under the

// terms of the BSD-3 license. We welcome feedback and contributions, see file

// CONTRIBUTING.md for details.


#ifndef MFEM_SIMD_SVE_HPP

#define MFEM_SIMD_SVE_HPP


#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)


#include "../../config/tconfig.hpp"

#include <arm_sve.h>


namespace mfem

{


// Use this macro as a workaround for astyle formatting issue with 'alignas'

#define MFEM_AUTOSIMD_ALIGN_SVE alignas(64)


template <typename,int,int> struct AutoSIMD;


template <> struct MFEM_AUTOSIMD_ALIGN_SVE AutoSIMD<double,8,64>

{

   typedef double scalar_type;

   static constexpr int size = 8;

   static constexpr int align_bytes = 64;


   double vec[size];


   AutoSIMD() = default;


   AutoSIMD(const AutoSIMD &) = default;


   inline MFEM_ALWAYS_INLINE double &operator[](int i)

   {

      return vec[i];

   }


   inline MFEM_ALWAYS_INLINE const double &operator[](int i) const

   {

      return vec[i];

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator=(const AutoSIMD &v)

   {

      svst1_f64(svptrue_b64(), vec, svld1_f64(svptrue_b64(),v.vec));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator=(const double &e)

   {

      svst1_f64(svptrue_b64(), vec, svdup_n_f64(e));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator+=(const AutoSIMD &v)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,vvd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator+=(const double &e)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator-=(const AutoSIMD &v)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,vvd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator-=(const double &e)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator*=(const AutoSIMD &v)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,vvd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator*=(const double &e)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator/=(const AutoSIMD &v)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,vvd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &operator/=(const double &e)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator-() const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), r.vec, svneg_f64_z(svptrue_b64(),vd));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator+() const

   {

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator+(const AutoSIMD &v) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),vd,vvd));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator+(const double &e) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator-(const AutoSIMD &v) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),vd,vvd));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator-(const double &e) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator*(const AutoSIMD &v) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),vd,vvd));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator*(const double &e) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator/(const AutoSIMD &v) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),vd,vvd));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD operator/(const double &e) const

   {

      AutoSIMD r;

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));

      return r;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const AutoSIMD &v, const AutoSIMD &w)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.vec);

      svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),wvd,vd,vvd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const AutoSIMD &v, const double &e)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),vvd,svdup_n_f64(e),vd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &fma(const double &e, const AutoSIMD &v)

   {

      const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),svdup_n_f64(e),vvd,vd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const AutoSIMD &v, const AutoSIMD &w)

   {

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.vec);

      svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,wvd));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const AutoSIMD &v,const double &e)

   {

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,svdup_n_f64(e)));

      return *this;

   }


   inline MFEM_ALWAYS_INLINE AutoSIMD &mul(const double &e, const AutoSIMD &v)

   {

      const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

      svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));

      return *this;

   }


};


inline MFEM_ALWAYS_INLINE

AutoSIMD<double,8,64> operator+(const double &e, const AutoSIMD<double,8,64> &v)

{

   AutoSIMD<double,8,64> r;

   const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

   svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));

   return r;

}


inline MFEM_ALWAYS_INLINE

AutoSIMD<double,8,64> operator-(const double &e, const AutoSIMD<double,8,64> &v)

{

   AutoSIMD<double,8,64> r;

   const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

   svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));

   return r;

}


inline MFEM_ALWAYS_INLINE

AutoSIMD<double,8,64> operator*(const double &e, const AutoSIMD<double,8,64> &v)

{

   AutoSIMD<double,8,64> r;

   const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

   svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));

   return r;

}


inline MFEM_ALWAYS_INLINE

AutoSIMD<double,8,64> operator/(const double &e, const AutoSIMD<double,8,64> &v)

{

   AutoSIMD<double,8,64> r;

   const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);

   svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));

   return r;

}


} // namespace mfem


#endif // __aarch64__ && __ARM_FEATURE_SVE


#endif // MFEM_SIMD_SVE_HPP

mfem
Definition CodeDocumentation.dox:1

mfem::operator*
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
Definition mem_manager.cpp:130

mfem::operator+
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator+(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
Definition auto.hpp:238

mfem::operator-
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator-(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
Definition auto.hpp:249

mfem::operator/
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator/(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
Definition auto.hpp:271

mfem::AutoSIMD< double, 8, 64 >::operator*=
MFEM_ALWAYS_INLINE AutoSIMD & operator*=(const double &e)
Definition sve.hpp:100

mfem::AutoSIMD< double, 8, 64 >::AutoSIMD
AutoSIMD(const AutoSIMD &)=default

mfem::AutoSIMD< double, 8, 64 >::operator[]
MFEM_ALWAYS_INLINE const double & operator[](int i) const
Definition sve.hpp:45

mfem::AutoSIMD< double, 8, 64 >::operator+
MFEM_ALWAYS_INLINE AutoSIMD operator+() const
Definition sve.hpp:130

mfem::AutoSIMD< double, 8, 64 >::fma
MFEM_ALWAYS_INLINE AutoSIMD & fma(const double &e, const AutoSIMD &v)
Definition sve.hpp:220

mfem::AutoSIMD< double, 8, 64 >::operator*
MFEM_ALWAYS_INLINE AutoSIMD operator*(const AutoSIMD &v) const
Definition sve.hpp:169

mfem::AutoSIMD< double, 8, 64 >::operator=
MFEM_ALWAYS_INLINE AutoSIMD & operator=(const AutoSIMD &v)
Definition sve.hpp:50

mfem::AutoSIMD< double, 8, 64 >::operator-=
MFEM_ALWAYS_INLINE AutoSIMD & operator-=(const AutoSIMD &v)
Definition sve.hpp:77

mfem::AutoSIMD< double, 8, 64 >::fma
MFEM_ALWAYS_INLINE AutoSIMD & fma(const AutoSIMD &v, const AutoSIMD &w)
Definition sve.hpp:203

mfem::AutoSIMD< double, 8, 64 >::operator-=
MFEM_ALWAYS_INLINE AutoSIMD & operator-=(const double &e)
Definition sve.hpp:85

mfem::AutoSIMD< double, 8, 64 >::operator+=
MFEM_ALWAYS_INLINE AutoSIMD & operator+=(const double &e)
Definition sve.hpp:70

mfem::AutoSIMD< double, 8, 64 >::operator+=
MFEM_ALWAYS_INLINE AutoSIMD & operator+=(const AutoSIMD &v)
Definition sve.hpp:62

mfem::AutoSIMD< double, 8, 64 >::operator+
MFEM_ALWAYS_INLINE AutoSIMD operator+(const AutoSIMD &v) const
Definition sve.hpp:135

mfem::AutoSIMD< double, 8, 64 >::operator/
MFEM_ALWAYS_INLINE AutoSIMD operator/(const double &e) const
Definition sve.hpp:195

mfem::AutoSIMD< double, 8, 64 >::scalar_type
double scalar_type
Definition sve.hpp:30

mfem::AutoSIMD< double, 8, 64 >::fma
MFEM_ALWAYS_INLINE AutoSIMD & fma(const AutoSIMD &v, const double &e)
Definition sve.hpp:212

mfem::AutoSIMD< double, 8, 64 >::operator=
MFEM_ALWAYS_INLINE AutoSIMD & operator=(const double &e)
Definition sve.hpp:56

mfem::AutoSIMD< double, 8, 64 >::operator[]
MFEM_ALWAYS_INLINE double & operator[](int i)
Definition sve.hpp:40

mfem::AutoSIMD< double, 8, 64 >::operator+
MFEM_ALWAYS_INLINE AutoSIMD operator+(const double &e) const
Definition sve.hpp:144

mfem::AutoSIMD< double, 8, 64 >::operator-
MFEM_ALWAYS_INLINE AutoSIMD operator-() const
Definition sve.hpp:122

mfem::AutoSIMD< double, 8, 64 >::AutoSIMD
AutoSIMD()=default

mfem::AutoSIMD< double, 8, 64 >::operator*
MFEM_ALWAYS_INLINE AutoSIMD operator*(const double &e) const
Definition sve.hpp:178

mfem::AutoSIMD< double, 8, 64 >::operator*=
MFEM_ALWAYS_INLINE AutoSIMD & operator*=(const AutoSIMD &v)
Definition sve.hpp:92

mfem::AutoSIMD< double, 8, 64 >::operator/=
MFEM_ALWAYS_INLINE AutoSIMD & operator/=(const double &e)
Definition sve.hpp:115

mfem::AutoSIMD< double, 8, 64 >::operator-
MFEM_ALWAYS_INLINE AutoSIMD operator-(const AutoSIMD &v) const
Definition sve.hpp:152

mfem::AutoSIMD< double, 8, 64 >::operator-
MFEM_ALWAYS_INLINE AutoSIMD operator-(const double &e) const
Definition sve.hpp:161

mfem::AutoSIMD< double, 8, 64 >::operator/=
MFEM_ALWAYS_INLINE AutoSIMD & operator/=(const AutoSIMD &v)
Definition sve.hpp:107

mfem::AutoSIMD< double, 8, 64 >::mul
MFEM_ALWAYS_INLINE AutoSIMD & mul(const AutoSIMD &v, const double &e)
Definition sve.hpp:236

mfem::AutoSIMD< double, 8, 64 >::mul
MFEM_ALWAYS_INLINE AutoSIMD & mul(const double &e, const AutoSIMD &v)
Definition sve.hpp:243

mfem::AutoSIMD< double, 8, 64 >::mul
MFEM_ALWAYS_INLINE AutoSIMD & mul(const AutoSIMD &v, const AutoSIMD &w)
Definition sve.hpp:228

mfem::AutoSIMD< double, 8, 64 >::operator/
MFEM_ALWAYS_INLINE AutoSIMD operator/(const AutoSIMD &v) const
Definition sve.hpp:186

mfem::AutoSIMD
Definition vsx128.hpp:27

mfem::AutoSIMD::vec
scalar_t vec[size]
Definition auto.hpp:30

tconfig.hpp