use non-SIMD scale version for non-SIMD inverse

* this way is faster, probably mixing manual SSE with compiler result causes cache misses
2026-07-08 09:48:25 +00:00 · 2016-09-16 00:56:00 +03:00
parent 8a083c4873
commit 184069361a
1 changed files with 11 additions and 5 deletions
--- a/include/cglm-mat.h
+++ b/include/cglm-mat.h
@@ -126,14 +126,20 @@ glm_mat_transpose_self(mat4 m) {

 CGLM_INLINE
 void
-glm_mat4_scale(mat4 m, float s) {
-#if defined( __SSE__ ) || defined( __SSE2__ )
-  CGLM_MAT_SCALE_SSE_4x4f(m, s);
-#else
+glm_mat4_scale_p(mat4 m, float s) {
  m[0][0] *= s; m[0][1] *= s; m[0][2] *= s; m[0][3] *= s;
  m[1][0] *= s; m[1][1] *= s; m[1][2] *= s; m[1][3] *= s;
  m[2][0] *= s; m[2][1] *= s; m[2][2] *= s; m[2][3] *= s;
  m[3][0] *= s; m[3][1] *= s; m[3][2] *= s; m[3][3] *= s;
+}
+
+CGLM_INLINE
+void
+glm_mat4_scale(mat4 m, float s) {
+#if defined( __SSE__ ) || defined( __SSE2__ )
+  CGLM_MAT_SCALE_SSE_4x4f(m, s);
+#else
+  glm_mat4_scale_p(m, s);
 #endif
 }

@@ -216,7 +222,7 @@ glm_mat4_inv(mat4 mat, mat4 dest) {
  det = 1.0f / (a * dest[0][0] + b * dest[1][0]
              + c * dest[2][0] + d * dest[3][0]);

-  glm_mat4_scale(dest, det);
+  glm_mat4_scale_p(dest, det);
 }

 #endif /* cglm_mat_h */