From 7f9585ca725029bcc7bad389f677071dcbf6669b Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Fri, 7 May 2021 01:46:24 +0300 Subject: [PATCH] arrm, neon: impove hadd performance --- include/cglm/simd/arm.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h index d0c4bc3..50cec46 100644 --- a/include/cglm/simd/arm.h +++ b/include/cglm/simd/arm.h @@ -47,8 +47,13 @@ glmm_abs(float32x4_t v) { static inline float32x4_t glmm_vhadd(float32x4_t v) { - v = vaddq_f32(v, vrev64q_f32(v)); - return vaddq_f32(v, vcombine_f32(vget_high_f32(v), vget_low_f32(v))); + return vaddq_f32(vaddq_f32(glmm_splat_x(v), glmm_splat_y(v)), + vaddq_f32(glmm_splat_z(v), glmm_splat_w(v))); + /* + this seems slower: + v = vaddq_f32(v, vrev64q_f32(v)); + return vaddq_f32(v, vcombine_f32(vget_high_f32(v), vget_low_f32(v))); + */ } static inline