12 #ifndef MFEM_SIMD_SVE_HPP 13 #define MFEM_SIMD_SVE_HPP 15 #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) 17 #include "../../config/tconfig.hpp" 24 #define MFEM_AUTOSIMD_ALIGN_SVE alignas(64) 26 template <
typename,
int,
int>
struct AutoSIMD;
28 template <>
struct MFEM_AUTOSIMD_ALIGN_SVE AutoSIMD<double,8,64>
31 static constexpr
int size = 8;
32 static constexpr
int align_bytes = 64;
45 inline MFEM_ALWAYS_INLINE
const double &
operator[](
int i)
const 52 svst1_f64(svptrue_b64(), vec, svld1_f64(svptrue_b64(),v.
vec));
58 svst1_f64(svptrue_b64(), vec, svdup_n_f64(e));
64 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
65 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
66 svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,vvd));
72 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
73 svst1_f64(svptrue_b64(), vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
79 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
80 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
81 svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,vvd));
87 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
88 svst1_f64(svptrue_b64(), vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
94 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
95 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
96 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,vvd));
102 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
103 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
109 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
110 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
111 svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,vvd));
117 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
118 svst1_f64(svptrue_b64(), vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
125 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
126 svst1_f64(svptrue_b64(), r.
vec, svneg_f64_z(svptrue_b64(),vd));
138 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
139 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
140 svst1_f64(svptrue_b64(), r.
vec, svadd_f64_z(svptrue_b64(),vd,vvd));
147 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
148 svst1_f64(svptrue_b64(), r.
vec, svadd_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
155 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
156 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
157 svst1_f64(svptrue_b64(), r.
vec, svsub_f64_z(svptrue_b64(),vd,vvd));
164 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
165 svst1_f64(svptrue_b64(), r.
vec, svsub_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
172 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
173 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
174 svst1_f64(svptrue_b64(), r.
vec, svmul_f64_z(svptrue_b64(),vd,vvd));
181 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
182 svst1_f64(svptrue_b64(), r.
vec, svmul_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
189 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
190 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
191 svst1_f64(svptrue_b64(), r.
vec, svdiv_f64_z(svptrue_b64(),vd,vvd));
198 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
199 svst1_f64(svptrue_b64(), r.
vec, svdiv_f64_z(svptrue_b64(),vd,svdup_n_f64(e)));
205 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
206 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
207 const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.
vec);
208 svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),wvd,vd,vvd));
214 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
215 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
216 svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),vvd,svdup_n_f64(e),vd));
222 const svfloat64_t vd = svld1_f64(svptrue_b64(), vec);
223 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
224 svst1_f64(svptrue_b64(), vec, svmad_f64_z(svptrue_b64(),svdup_n_f64(e),vvd,vd));
230 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
231 const svfloat64_t wvd = svld1_f64(svptrue_b64(), w.
vec);
232 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,wvd));
238 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
239 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),vvd,svdup_n_f64(e)));
245 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.
vec);
246 svst1_f64(svptrue_b64(), vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
251 inline MFEM_ALWAYS_INLINE
252 AutoSIMD<double,8,64>
operator+(
const double &e,
const AutoSIMD<double,8,64> &v)
254 AutoSIMD<double,8,64> r;
255 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
256 svst1_f64(svptrue_b64(), r.vec, svadd_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
260 inline MFEM_ALWAYS_INLINE
261 AutoSIMD<double,8,64>
operator-(
const double &e,
const AutoSIMD<double,8,64> &v)
263 AutoSIMD<double,8,64> r;
264 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
265 svst1_f64(svptrue_b64(), r.vec, svsub_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
269 inline MFEM_ALWAYS_INLINE
270 AutoSIMD<double,8,64>
operator*(
const double &e,
const AutoSIMD<double,8,64> &v)
272 AutoSIMD<double,8,64> r;
273 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
274 svst1_f64(svptrue_b64(), r.vec, svmul_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
278 inline MFEM_ALWAYS_INLINE
279 AutoSIMD<double,8,64>
operator/(
const double &e,
const AutoSIMD<double,8,64> &v)
281 AutoSIMD<double,8,64> r;
282 const svfloat64_t vvd = svld1_f64(svptrue_b64(), v.vec);
283 svst1_f64(svptrue_b64(), r.vec, svdiv_f64_z(svptrue_b64(),svdup_n_f64(e),vvd));
289 #endif // __aarch64__ && __ARM_FEATURE_SVE 291 #endif // MFEM_SIMD_SVE_HPP MFEM_ALWAYS_INLINE AutoSIMD & fma(const double &e, const AutoSIMD &v)
MFEM_ALWAYS_INLINE AutoSIMD operator+() const
MFEM_ALWAYS_INLINE AutoSIMD operator*(const double &e) const
MFEM_ALWAYS_INLINE AutoSIMD & operator=(const double &e)
MFEM_ALWAYS_INLINE AutoSIMD & fma(const AutoSIMD &v, const double &e)
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator/(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
MFEM_ALWAYS_INLINE AutoSIMD operator-(const AutoSIMD &v) const
MFEM_ALWAYS_INLINE AutoSIMD & mul(const AutoSIMD &v, const AutoSIMD &w)
MFEM_ALWAYS_INLINE AutoSIMD & operator/=(const double &e)
MFEM_ALWAYS_INLINE AutoSIMD & operator+=(const AutoSIMD &v)
MFEM_ALWAYS_INLINE AutoSIMD & fma(const AutoSIMD &v, const AutoSIMD &w)
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator+(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)
MFEM_ALWAYS_INLINE const double & operator[](int i) const
MFEM_ALWAYS_INLINE AutoSIMD operator+(const AutoSIMD &v) const
MFEM_ALWAYS_INLINE AutoSIMD & mul(const AutoSIMD &v, const double &e)
MFEM_ALWAYS_INLINE AutoSIMD & operator+=(const double &e)
MFEM_ALWAYS_INLINE AutoSIMD operator+(const double &e) const
MFEM_ALWAYS_INLINE double & operator[](int i)
MFEM_ALWAYS_INLINE AutoSIMD & operator-=(const AutoSIMD &v)
MFEM_ALWAYS_INLINE AutoSIMD operator-(const double &e) const
MFEM_ALWAYS_INLINE AutoSIMD & operator/=(const AutoSIMD &v)
MFEM_ALWAYS_INLINE AutoSIMD & operator=(const AutoSIMD &v)
MFEM_ALWAYS_INLINE AutoSIMD operator-() const
MFEM_ALWAYS_INLINE AutoSIMD operator/(const AutoSIMD &v) const
MFEM_ALWAYS_INLINE AutoSIMD & operator*=(const AutoSIMD &v)
MFEM_ALWAYS_INLINE AutoSIMD & mul(const double &e, const AutoSIMD &v)
MFEM_ALWAYS_INLINE AutoSIMD operator*(const AutoSIMD &v) const
MFEM_ALWAYS_INLINE AutoSIMD & operator*=(const double &e)
MFEM_ALWAYS_INLINE AutoSIMD operator/(const double &e) const
MFEM_ALWAYS_INLINE AutoSIMD & operator-=(const double &e)
MemoryClass operator*(MemoryClass mc1, MemoryClass mc2)
Return a suitable MemoryClass from a pair of MemoryClasses.
MFEM_ALWAYS_INLINE AutoSIMD< scalar_t, S, A > operator-(const scalar_t &e, const AutoSIMD< scalar_t, S, A > &v)