Files
cglm/include/cglm/simd/sse2/quat.h
Recep Aslantas 6f69da361b quaternion multiplication
* convert quaternion multiplication to xyzw
* previous implementation may be wrong, wikipedia version implemented
* implement SSE version
2018-04-09 23:56:09 +03:00

99 lines
2.5 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c), Recep Aslantas.
*
* MIT License (MIT), http://opensource.org/licenses/MIT
* Full license can be found in the LICENSE file
*/
#ifndef cglm_quat_simd_h
#define cglm_quat_simd_h
#if defined( __SSE__ ) || defined( __SSE2__ )
#include "../../common.h"
#include "../intrin.h"
CGLM_INLINE
void
glm_quat_mul_sse2(versor p, versor q, versor dest) {
/*
+ (a1 b2 + b1 a2 + c1 d2 d1 c2)i
+ (a1 c2 b1 d2 + c1 a2 + d1 b2)j
+ (a1 d2 + b1 c2 c1 b2 + d1 a2)k
a1 a2 b1 b2 c1 c2 d1 d2
*/
__m128 xp, xq, x0, r;
xp = _mm_load_ps(p); /* 3 2 1 0 */
xq = _mm_load_ps(q);
r = _mm_mul_ps(_mm_shuffle1_ps1(xp, 3), xq);
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 0, 1, 2, 3)));
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 1, 0, 3, 2)));
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 2, 3, 0, 1)));
_mm_store_ps(dest, r);
}
CGLM_INLINE
void
glm_quat_slerp_sse2(versor q,
versor r,
float t,
versor dest) {
/* https://en.wikipedia.org/wiki/Slerp */
float cosTheta, sinTheta, angle, a, b, c;
__m128 xmm_q;
xmm_q = _mm_load_ps(q);
cosTheta = glm_vec4_dot(q, r);
if (cosTheta < 0.0f) {
_mm_store_ps(q,
_mm_xor_ps(xmm_q,
_mm_set1_ps(-0.f))) ;
cosTheta = -cosTheta;
}
if (cosTheta >= 1.0f) {
_mm_store_ps(dest, xmm_q);
return;
}
sinTheta = sqrtf(1.0f - cosTheta * cosTheta);
c = 1.0f - t;
/* LERP */
if (sinTheta < 0.001f) {
_mm_store_ps(dest, _mm_add_ps(_mm_mul_ps(_mm_set1_ps(c),
xmm_q),
_mm_mul_ps(_mm_set1_ps(t),
_mm_load_ps(r))));
return;
}
/* SLERP */
angle = acosf(cosTheta);
a = sinf(c * angle);
b = sinf(t * angle);
_mm_store_ps(dest,
_mm_div_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(a),
xmm_q),
_mm_mul_ps(_mm_set1_ps(b),
_mm_load_ps(r))),
_mm_set1_ps(sinTheta)));
}
#endif
#endif /* cglm_quat_simd_h */