optimize slerp with SSE

This commit is contained in:
Recep Aslantas
2016-09-13 18:36:40 +03:00
parent 5825c24e8f
commit c26c1d7707

View File

@@ -9,6 +9,7 @@
#define cglm_quat_h
#include "cglm.h"
#include "cglm-intrin.h"
#include <math.h>
CGLM_INLINE
@@ -23,8 +24,52 @@ glm_quat_slerp(versor q,
versor r,
float t,
versor dest) {
float cosTheta, sinTheta, angle, a, b;
float cosTheta, sinTheta, angle, a, b, c;
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 xmm_q;
xmm_q = _mm_load_ps(q);
cosTheta = glm_quat_dot(q, r);
if (cosTheta < 0.0f) {
_mm_store_ps(q,
_mm_xor_ps(xmm_q,
_mm_set1_ps(-0.f))) ;
cosTheta = glm_quat_dot(q, r);
}
if (fabs(cosTheta) >= 1.0f) {
_mm_store_ps(dest, xmm_q);
return;
}
sinTheta = sqrt(1.0f - cosTheta * cosTheta);
c = 1.0f - t;
/* LERP */
if (fabs(sinTheta) < 0.001f) {
_mm_store_ps(dest, _mm_add_ps(_mm_mul_ps(_mm_set1_ps(c),
xmm_q),
_mm_mul_ps(_mm_set1_ps(t),
_mm_load_ps(r))));
return;
}
/* SLERP */
angle = acos(cosTheta);
a = sin(c * angle);
b = sin(t * angle);
_mm_store_ps(dest,
_mm_div_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(a),
xmm_q),
_mm_mul_ps(_mm_set1_ps(b),
_mm_load_ps(r))),
_mm_set1_ps(sinTheta)));
#else
cosTheta = glm_quat_dot(q, r);
if (cosTheta < 0.0f) {
q[0] *= -1.0f;
@@ -46,24 +91,28 @@ glm_quat_slerp(versor q,
sinTheta = sqrt(1.0f - cosTheta * cosTheta);
c = 1.0f - t;
/* LERP */
if (fabs(sinTheta) < 0.001f) {
dest[0] = (1.0f - t) * q[0] + t * r[0];
dest[1] = (1.0f - t) * q[1] + t * r[1];
dest[2] = (1.0f - t) * q[2] + t * r[2];
dest[3] = (1.0f - t) * q[3] + t * r[3];
dest[0] = c * q[0] + t * r[0];
dest[1] = c * q[1] + t * r[1];
dest[2] = c * q[2] + t * r[2];
dest[3] = c * q[3] + t * r[3];
return;
}
/* SLERP */
angle = acos(cosTheta);
a = sin((1.0f - t) * angle) / sinTheta;
b = sin(t * angle) / sinTheta;
a = sin(c * angle);
b = sin(t * angle);
dest[0] = q[0] * a + r[0] * b;
dest[1] = q[1] * a + r[1] * b;
dest[2] = q[2] * a + r[2] * b;
dest[3] = q[3] * a + r[3] * b;
dest[0] = (q[0] * a + r[0] * b) / sinTheta;
dest[1] = (q[1] * a + r[1] * b) / sinTheta;
dest[2] = (q[2] * a + r[2] * b) / sinTheta;
dest[3] = (q[3] * a + r[3] * b) / sinTheta;
#endif
}
#endif /* cglm_quat_h */