From fc7f958167944c2a568a0748b0ab3c159ec9629d Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Fri, 25 Jan 2019 21:56:17 +0300 Subject: [PATCH] simd: remove re-load in SSE4 and SSE3 --- include/cglm/simd/x86.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index 520a834..99d2b8a 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -72,10 +72,10 @@ static inline __m128 glmm_vdots(__m128 a, __m128 b) { #if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) - return _mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF); + return _mm_dp_ps(a, b, 0xFF); #elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) __m128 x0, x1; - x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); + x0 = _mm_mul_ps(a, b); x1 = _mm_hadd_ps(x0, x0); return _mm_hadd_ps(x1, x1); #else @@ -87,10 +87,10 @@ static inline __m128 glmm_vdot(__m128 a, __m128 b) { #if (defined(__SSE4_1__) || defined(__SSE4_2__)) && defined(CGLM_SSE4_DOT) - return _mm_dp_ps(glmm_load(a), glmm_load(b), 0xFF); + return _mm_dp_ps(a, b, 0xFF); #elif defined(__SSE3__) && defined(CGLM_SSE3_DOT) __m128 x0, x1; - x0 = _mm_mul_ps(glmm_load(a), glmm_load(b)); + x0 = _mm_mul_ps(a, b); x1 = _mm_hadd_ps(x0, x0); return _mm_hadd_ps(x1, x1); #else