From f65f1d491bec03b6de58fb12918e7083f7f5a1ce Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 22 Jan 2019 09:23:51 +0300 Subject: [PATCH] simd: optimize vec4_distance with sse and neon --- include/cglm/vec4.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 5d0e466..03dc405 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -707,10 +707,25 @@ glm_vec4_normalize(vec4 v) { CGLM_INLINE float glm_vec4_distance(vec4 a, vec4 b) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + __m128 x0; + x0 = _mm_sub_ps(glmm_load(b), glmm_load(a)); + x0 = _mm_mul_ps(x0, x0); + x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2)); + return _mm_cvtss_f32(_mm_sqrt_ss(_mm_add_ss(x0, + glmm_shuff1(x0, 0, 1, 0, 1)))); +#elif defined(CGLM_NEON_FP) + float32x4_t v0; + float32_t r; + v0 = vsubq_f32(vld1q_f32(a), vld1q_f32(b)); + r = vaddvq_f32(vmulq_f32(v0, v0)); + return sqrtf(r); +#else return sqrtf(glm_pow2(b[0] - a[0]) + glm_pow2(b[1] - a[1]) + glm_pow2(b[2] - a[2]) + glm_pow2(b[3] - a[3])); +#endif } /*!