From e90f6b505db93f734275f1cfff7c6da9560862c4 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 13 Sep 2016 12:29:01 +0300 Subject: [PATCH] optimize individual translates --- include/cglm-affine.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/include/cglm-affine.h b/include/cglm-affine.h index f0a3c3a..fc8216a 100644 --- a/include/cglm-affine.h +++ b/include/cglm-affine.h @@ -75,25 +75,49 @@ glm_translate(mat4 m, vec3 v) { CGLM_INLINE void glm_translate_x(mat4 m, float to) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + _mm_store_ps(m[3], + _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]), + _mm_set1_ps(to)), + _mm_load_ps(m[3]))) + ; +#else vec4 v1; glm_vec4_scale(m[0], to, v1); glm_vec4_add(v1, m[3], m[3]); +#endif } CGLM_INLINE void glm_translate_y(mat4 m, float to) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + _mm_store_ps(m[3], + _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[1]), + _mm_set1_ps(to)), + _mm_load_ps(m[3]))) + ; +#else vec4 v1; glm_vec4_scale(m[1], to, v1); glm_vec4_add(v1, m[3], m[3]); +#endif } CGLM_INLINE void glm_translate_z(mat4 m, float to) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + _mm_store_ps(m[3], + _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]), + _mm_set1_ps(to)), + _mm_load_ps(m[3]))) + ; +#else vec4 v1; glm_vec4_scale(m[2], to, v1); glm_vec4_add(v1, m[3], m[3]); +#endif } /* scale */