diff --git a/include/cglm-affine.h b/include/cglm-affine.h index 2a16004..322aca2 100644 --- a/include/cglm-affine.h +++ b/include/cglm-affine.h @@ -18,13 +18,13 @@ glm_translate_to(mat4 m, vec3 v, mat4 dest) { vec4 v3; mat4 t = GLM_MAT_IDENTITY_4F; - glm_vec_scale4(t[0], v[0], v1); - glm_vec_scale4(t[1], v[1], v2); - glm_vec_scale4(t[2], v[2], v3); + glm_vec4_scale(t[0], v[0], v1); + glm_vec4_scale(t[1], v[1], v2); + glm_vec4_scale(t[2], v[2], v3); - glm_vec_add4(v1, t[3], t[3]); - glm_vec_add4(v2, t[3], t[3]); - glm_vec_add4(v3, t[3], t[3]); + glm_vec4_add(v1, t[3], t[3]); + glm_vec4_add(v2, t[3], t[3]); + glm_vec4_add(v3, t[3], t[3]); glm__memcpy(float, dest, t, sizeof(mat4)); } @@ -36,37 +36,37 @@ glm_translate(mat4 m, vec3 v) { vec4 v2; vec4 v3; - glm_vec_scale4(m[0], v[0], v1); - glm_vec_scale4(m[1], v[1], v2); - glm_vec_scale4(m[2], v[2], v3); + glm_vec4_scale(m[0], v[0], v1); + glm_vec4_scale(m[1], v[1], v2); + glm_vec4_scale(m[2], v[2], v3); - glm_vec_add4(v1, m[3], m[3]); - glm_vec_add4(v2, m[3], m[3]); - glm_vec_add4(v3, m[3], m[3]); + glm_vec4_add(v1, m[3], m[3]); + glm_vec4_add(v2, m[3], m[3]); + glm_vec4_add(v3, m[3], m[3]); } CGLM_INLINE void glm_translate_x(mat4 m, float to) { vec4 v1; - glm_vec_scale4(m[0], to, v1); - glm_vec_add4(v1, m[3], m[3]); + glm_vec4_scale(m[0], to, v1); + glm_vec4_add(v1, m[3], m[3]); } CGLM_INLINE void glm_translate_y(mat4 m, float to) { vec4 v1; - glm_vec_scale4(m[1], to, v1); - glm_vec_add4(v1, m[3], m[3]); + glm_vec4_scale(m[1], to, v1); + glm_vec4_add(v1, m[3], m[3]); } CGLM_INLINE void glm_translate_z(mat4 m, float to) { vec4 v1; - glm_vec_scale4(m[2], to, v1); - glm_vec_add4(v1, m[3], m[3]); + glm_vec4_scale(m[2], to, v1); + glm_vec4_add(v1, m[3], m[3]); } /* scale */ @@ -74,11 +74,11 @@ glm_translate_z(mat4 m, float to) { CGLM_INLINE void glm_scale_to(mat4 m, vec3 v, mat4 dest) { - glm_vec_scale4(m[0], v[0], dest[0]); - glm_vec_scale4(m[1], v[1], dest[1]); - glm_vec_scale4(m[2], v[2], dest[2]); + glm_vec4_scale(m[0], v[0], dest[0]); + glm_vec4_scale(m[1], v[1], dest[1]); + glm_vec4_scale(m[2], v[2], dest[2]); - glm_vec_dup4(m[3], dest[3]); + glm_vec4_dup(m[3], dest[3]); } CGLM_INLINE diff --git a/include/cglm-vec.h b/include/cglm-vec.h index 182ab2f..4f6baf9 100644 --- a/include/cglm-vec.h +++ b/include/cglm-vec.h @@ -9,6 +9,7 @@ #define cglm_vec_h #include "cglm.h" +#include "cglm-intrin.h" CGLM_INLINE void @@ -20,11 +21,15 @@ glm_vec_dup(vec3 a, vec3 dest) { CGLM_INLINE void -glm_vec_dup4(vec4 a, vec4 dest) { - dest[0] = a[0]; - dest[1] = a[1]; - dest[2] = a[2]; - dest[3] = a[3]; +glm_vec4_dup(vec4 v, vec4 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + _mm_store_ps(dest, _mm_load_ps(v)); +#else + dest[0] = v[0]; + dest[1] = v[1]; + dest[2] = v[2]; + dest[3] = v[3]; +#endif } CGLM_INLINE @@ -98,11 +103,17 @@ glm_vec_add(vec3 v1, vec3 v2, vec3 dest) { CGLM_INLINE void -glm_vec_add4(vec4 v1, vec4 v2, vec4 dest) { +glm_vec4_add(vec4 v1, vec4 v2, vec4 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + _mm_store_ps(dest, + _mm_add_ps(_mm_load_ps(v1), + _mm_load_ps(v2))); +#else dest[0] = v1[0] + v2[0]; dest[1] = v1[1] + v2[1]; dest[2] = v1[2] + v2[2]; dest[3] = v1[3] + v2[3]; +#endif } CGLM_INLINE @@ -115,11 +126,17 @@ glm_vec_sub(vec3 v1, vec3 v2, vec3 dest) { CGLM_INLINE void -glm_vec_sub4(vec4 v1, vec4 v2, vec4 dest) { +glm_vec4_sub(vec4 v1, vec4 v2, vec4 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + _mm_store_ps(dest, + _mm_sub_ps(_mm_load_ps(v1), + _mm_load_ps(v2))); +#else dest[0] = v1[0] - v2[0]; dest[1] = v1[1] - v2[1]; dest[2] = v1[2] - v2[2]; dest[3] = v1[3] - v2[3]; +#endif } CGLM_INLINE @@ -132,11 +149,17 @@ glm_vec_scale(vec3 v, float s, vec3 dest) { CGLM_INLINE void -glm_vec_scale4(vec3 v, float s, vec3 dest) { +glm_vec4_scale(vec4 v, float s, vec4 dest) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + _mm_store_ps(dest, + _mm_mul_ps(_mm_load_ps(v), + _mm_set1_ps(s))); +#else dest[0] = v[0] * s; dest[1] = v[1] * s; dest[2] = v[2] * s; dest[3] = v[3] * s; +#endif } CGLM_INLINE