simd128: enable in quat

This commit is contained in:
myfreeer
2023-03-06 16:46:48 +08:00
parent 84c521c203
commit a0dd85f3d1
2 changed files with 22 additions and 2 deletions

View File

@@ -70,6 +70,10 @@
# include "simd/neon/quat.h" # include "simd/neon/quat.h"
#endif #endif
#ifdef CGLM_SIMD_WASM
# include "simd/wasm/quat.h"
#endif
CGLM_INLINE void glm_quat_normalize(versor q); CGLM_INLINE void glm_quat_normalize(versor q);
/* /*
@@ -252,6 +256,20 @@ glm_quat_normalize_to(versor q, versor dest) {
} }
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot))); glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
#elif defined(__wasm__) && defined(__wasm_simd128__)
glmm_128 xdot, x0;
float dot;
x0 = glmm_load(q);
xdot = glmm_vdot(x0, x0);
dot = _mm_cvtss_f32(xdot);
if (dot <= 0.0f) {
glm_quat_identity(dest);
return;
}
glmm_store(dest, wasm_f32x4_div(x0, wasm_f32x4_sqrt(xdot)));
#else #else
float dot; float dot;
@@ -440,6 +458,8 @@ glm_quat_mul(versor p, versor q, versor dest) {
*/ */
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
glm_quat_mul_sse2(p, q, dest); glm_quat_mul_sse2(p, q, dest);
#elif defined(__wasm__) && defined(__wasm_simd128__)
glm_quat_mul_wasm(p, q, dest);
#elif defined(CGLM_NEON_FP) #elif defined(CGLM_NEON_FP)
glm_quat_mul_neon(p, q, dest); glm_quat_mul_neon(p, q, dest);
#else #else

View File

@@ -14,7 +14,7 @@
CGLM_INLINE CGLM_INLINE
void void
glm_quat_mul_sse2(versor p, versor q, versor dest) { glm_quat_mul_wasm(versor p, versor q, versor dest) {
/* /*
+ (a1 b2 + b1 a2 + c1 d2 d1 c2)i + (a1 b2 + b1 a2 + c1 d2 d1 c2)i
+ (a1 c2 b1 d2 + c1 a2 + d1 b2)j + (a1 c2 b1 d2 + c1 a2 + d1 b2)j
@@ -26,7 +26,7 @@ glm_quat_mul_sse2(versor p, versor q, versor dest) {
xp = glmm_load(p); /* 3 2 1 0 */ xp = glmm_load(p); /* 3 2 1 0 */
xq = glmm_load(q); xq = glmm_load(q);
x1 = wasm_f32x4_make(-0.f, 0.f, -0.f, 0.f); /* TODO: _mm_set1_ss() + shuff ? */ x1 = _mm_set_ps(-0.f, 0.f, -0.f, 0.f); /* TODO: _mm_set1_ss() + shuff ? */
r = wasm_f32x4_mul(glmm_splat_w(xp), xq); r = wasm_f32x4_mul(glmm_splat_w(xp), xq);
x2 = _mm_unpackhi_ps(x1, x1); x2 = _mm_unpackhi_ps(x1, x1);