simd: fix glmm_set1, glmm_splat

This commit is contained in:
Recep Aslantas
2024-04-12 21:53:20 +03:00
parent 14c567d9d9
commit 45c1beff51
4 changed files with 26 additions and 21 deletions

View File

@@ -299,7 +299,7 @@ glm_mat4_inv_neon(mat4 mat, mat4 dest) {
vget_low_f32(vzipq_f32(v2, v3).val[0]));
/*
x0 = glmm_div(glmm_set1(1.0f), glmm_vhadd(vmulq_f32(x0, r0)));
x0 = glmm_div(glmm_set1_rval(1.0f), glmm_vhadd(vmulq_f32(x0, r0)));
glmm_store(dest[0], vmulq_f32(v0, x0));
glmm_store(dest[1], vmulq_f32(v1, x0));

View File

@@ -471,15 +471,15 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
x8 = _mm_shuffle_ps(x0, x3, _MM_SHUFFLE(3, 1, 3, 1)); /* k c j b */
x9 = _mm_shuffle_ps(x0, x3, _MM_SHUFFLE(2, 0, 2, 0)); /* o g n f */
x10 = glmm_shuff1(x2, 2, 0, 2, 0); /* p h p h */
x11 = glmm_shuff1(x2, 3, 1, 3, 1); /* l d l d */
x10 = glmm_shuff1(x2, 2, 0, 2, 0); /* p h p h */
x11 = glmm_shuff1(x2, 3, 1, 3, 1); /* l d l d */
#if 1 /* TODO measure both */
#if 0 /* TODO measure both */
x12 = _mm_shuffle_ps(x4, x5, _MM_SHUFFLE(1, 0, 1, 0)); /* i a k c */
x13 = _mm_shuffle_ps(x6, x7, _MM_SHUFFLE(1, 0, 1, 0)); /* m e o g */
#else
x12 = _mm_movelh_ps(x4, x5); /* i a k c */
x13 = _mm_movelh_ps(x6, x7); /* m e o g */
x12 = _mm_movelh_ps(x4, x5); /* i a k c */
x13 = _mm_movelh_ps(x6, x7); /* m e o g */
#endif
t0 = _mm_mul_ps(x12, x10);
@@ -494,7 +494,7 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
/* v0: c3 * c10 + c4 * c9 + c1 * c8 + c2 * c7 */
/* v1: c5 * c12 + c6 * c11 */
v5 = glmm_set1(1.0f);
v5 = glmm_set1_rval(1.0f);
v0 = glmm_shuff1(t2, 2, 3, 0, 1);
v1 = glmm_shuff1(t1, 0, 1, 2, 3);
v0 = _mm_mul_ps(t0, v0);

View File

@@ -37,21 +37,26 @@
#define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane)
#ifdef __AVX__
# define glmm_set1(x) _mm_broadcast_ss(&x)
# define glmm_set1_ptr(x) _mm_broadcast_ss(x)
# define glmm_splat_x(x) _mm_broadcastss_ps(x)
# define glmm_splat_y(x) _mm_permute_ps(x, _MM_SHUFFLE(1, 1, 1, 1))
# define glmm_splat_z(x) _mm_permute_ps(x, _MM_SHUFFLE(2, 2, 2, 2))
# define glmm_splat_w(x) _mm_permute_ps(x, _MM_SHUFFLE(3, 3, 3, 3))
# define glmm_set1(x) _mm_broadcast_ss(&x)
# define glmm_set1_ptr(x) _mm_broadcast_ss(x)
# define glmm_set1_rval(x) _mm_set1_ps(x)
# ifdef __AVX2__
# define glmm_splat_x(x) _mm_broadcastss_ps(x)
# else
# define glmm_splat_x(x) _mm_permute_ps(x, _MM_SHUFFLE(0, 0, 0, 0))
# endif
# define glmm_splat_y(x) _mm_permute_ps(x, _MM_SHUFFLE(1, 1, 1, 1))
# define glmm_splat_z(x) _mm_permute_ps(x, _MM_SHUFFLE(2, 2, 2, 2))
# define glmm_splat_w(x) _mm_permute_ps(x, _MM_SHUFFLE(3, 3, 3, 3))
#else
# define glmm_set1(x) _mm_set1_ps(x)
# define glmm_set1_ptr(x) _mm_set1_ps(*x)
# define glmm_set1(x) _mm_set1_ps(x)
# define glmm_set1_ptr(x) _mm_set1_ps(*x)
# define glmm_set1_rval(x) _mm_set1_ps(x)
# define glmm_splat_x(x) glmm_splat(x, 0)
# define glmm_splat_y(x) glmm_splat(x, 1)
# define glmm_splat_z(x) glmm_splat(x, 2)
# define glmm_splat_w(x) glmm_splat(x, 3)
# define glmm_splat_x(x) glmm_splat(x, 0)
# define glmm_splat_y(x) glmm_splat(x, 1)
# define glmm_splat_z(x) glmm_splat(x, 2)
# define glmm_splat_w(x) glmm_splat(x, 3)
#endif
#ifdef __AVX__

View File

@@ -215,7 +215,7 @@ glm_vec4_one(vec4 v) {
#if defined(__wasm__) && defined(__wasm_simd128__)
glmm_store(v, wasm_f32x4_const_splat(1.0f));
#elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(v, glmm_set1(1.0f));
glmm_store(v, glmm_set1_rval(1.0f));
#elif defined(CGLM_NEON_FP)
vst1q_f32(v, vdupq_n_f32(1.0f));
#else