From 184069361aac00a8819cf2ef6de13f6927f9cd6f Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Fri, 16 Sep 2016 00:56:00 +0300 Subject: [PATCH] use non-SIMD scale version for non-SIMD inverse * this way is faster, probably mixing manual SSE with compiler result causes cache misses --- include/cglm-mat.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/cglm-mat.h b/include/cglm-mat.h index ac85561..0c62581 100644 --- a/include/cglm-mat.h +++ b/include/cglm-mat.h @@ -126,14 +126,20 @@ glm_mat_transpose_self(mat4 m) { CGLM_INLINE void -glm_mat4_scale(mat4 m, float s) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - CGLM_MAT_SCALE_SSE_4x4f(m, s); -#else +glm_mat4_scale_p(mat4 m, float s) { m[0][0] *= s; m[0][1] *= s; m[0][2] *= s; m[0][3] *= s; m[1][0] *= s; m[1][1] *= s; m[1][2] *= s; m[1][3] *= s; m[2][0] *= s; m[2][1] *= s; m[2][2] *= s; m[2][3] *= s; m[3][0] *= s; m[3][1] *= s; m[3][2] *= s; m[3][3] *= s; +} + +CGLM_INLINE +void +glm_mat4_scale(mat4 m, float s) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + CGLM_MAT_SCALE_SSE_4x4f(m, s); +#else + glm_mat4_scale_p(m, s); #endif } @@ -216,7 +222,7 @@ glm_mat4_inv(mat4 mat, mat4 dest) { det = 1.0f / (a * dest[0][0] + b * dest[1][0] + c * dest[2][0] + d * dest[3][0]); - glm_mat4_scale(dest, det); + glm_mat4_scale_p(dest, det); } #endif /* cglm_mat_h */