optimize normalize quaternion with SIMD

* provide _to version for storing into another quat
2025-12-24 20:34:58 +00:00 · 2018-04-11 00:17:41 +03:00
parent 5dec68823c
commit 010dcc9837
2 changed files with 48 additions and 10 deletions
--- a/include/cglm/quat.h
+++ b/include/cglm/quat.h
@@ -158,6 +158,43 @@ glm_quat_norm(versor q) {
  return glm_vec4_norm(q);
 }

+/*!
+ * @brief normalize quaternion and store result in dest
+ *
+ * @param[in]   q     quaternion to normalze
+ * @param[out]  dest  destination quaternion
+ */
+CGLM_INLINE
+void
+glm_quat_normalize_to(versor q, versor dest) {
+#if defined( __SSE2__ ) || defined( __SSE2__ )
+  __m128 xdot, x0;
+  float  dot;
+
+  x0   = _mm_load_ps(q);
+  xdot = glm_simd_dot(x0, x0);
+  dot = _mm_cvtss_f32(xdot);
+
+  if (dot <= 0.0f) {
+    glm_quat_identity(dest);
+    return;
+  }
+
+  _mm_store_ps(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
+#else
+  float dot;
+
+  dot = glm_vec4_norm2(q);
+
+  if (dot <= 0.0f) {
+    glm_quat_identity(q);
+    return;
+  }
+
+  glm_vec4_scale(q, 1.0f / sqrtf(dot), dest);
+#endif
+}
+
 /*!
 * @brief normalize quaternion
 *
@@ -166,16 +203,7 @@ glm_quat_norm(versor q) {
 CGLM_INLINE
 void
 glm_quat_normalize(versor q) {
-  float sum;
-
-  sum = glm_vec4_norm2(q);
-
-  if (sum <= 0.0f) {
-    glm_quat_identity(q);
-    return;
-  }
-
-  glm_vec4_scale(q, 1.0f / sqrtf(sum), q);
+  glm_quat_normalize_to(q, q);
 }

 /*!
--- a/include/cglm/simd/intrin.h
+++ b/include/cglm/simd/intrin.h
@@ -30,6 +30,16 @@
 #  define _mm_shuffle2_ps(a, b, z0, y0, x0, w0, z1, y1, x1, w1)                \
     _mm_shuffle1_ps(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)),        \
                                    z1, y1, x1, w1)
+
+CGLM_INLINE
+__m128
+glm_simd_dot(__m128 a, __m128 b) {
+  __m128 x0;
+  x0 = _mm_mul_ps(a, b);
+  x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2));
+  return _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1));
+}
+
 #endif

 /* x86, x64 */