diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h index eb999f1..3e3bb22 100644 --- a/include/cglm/simd/arm.h +++ b/include/cglm/simd/arm.h @@ -63,10 +63,14 @@ static inline float32x4_t glmm_max(float32x4_t a, float32x4_t b) { return vmaxq_ static inline float32x4_t glmm_vhadd(float32x4_t v) { +#if CGLM_ARM64 float32x4_t p; p = vpaddq_f32(v, v); /* [a+b, c+d, a+b, c+d] */ return vpaddq_f32(p, p); /* [t, t, t, t] */; - +#else + return vaddq_f32(vaddq_f32(glmm_splat_x(v), glmm_splat_y(v)), + vaddq_f32(glmm_splat_z(v), glmm_splat_w(v))); +#endif /* TODO: measure speed of this compare to above */ /* return vdupq_n_f32(vaddvq_f32(v)); */