From faf6186c29bdea611bb185e4788ea5ed61ee5543 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 1 May 2021 03:44:04 +0300 Subject: [PATCH] sse: optimize glm_mat2_mul_sse2 with sse --- include/cglm/simd/sse2/mat2.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/cglm/simd/sse2/mat2.h b/include/cglm/simd/sse2/mat2.h index 1f832b0..31b3a29 100644 --- a/include/cglm/simd/sse2/mat2.h +++ b/include/cglm/simd/sse2/mat2.h @@ -15,20 +15,23 @@ CGLM_INLINE void glm_mat2_mul_sse2(mat2 m1, mat2 m2, mat2 dest) { - __m128 x0, x1, x2; + __m128 x0, x1, x2, x3, x4; x1 = glmm_load(m1[0]); /* d c b a */ x2 = glmm_load(m2[0]); /* h g f e */ + x3 = glmm_shuff1(x2, 2, 2, 0, 0); + x4 = glmm_shuff1(x2, 3, 3, 1, 1); + x0 = _mm_movelh_ps(x1, x1); + x2 = _mm_movehl_ps(x1, x1); + /* dest[0][0] = a * e + c * f; dest[0][1] = b * e + d * f; dest[1][0] = a * g + c * h; dest[1][1] = b * g + d * h; */ - x0 = glmm_fmadd(_mm_movelh_ps(x1, x1), glmm_shuff1(x2, 2, 2, 0, 0), - _mm_mul_ps(_mm_movehl_ps(x1, x1), - glmm_shuff1(x2, 3, 3, 1, 1))); + x0 = glmm_fmadd(x0, x3, _mm_mul_ps(x2, x4)); glmm_store(dest[0], x0); }