mirror of
https://github.com/recp/cglm.git
synced 2025-10-03 16:51:35 +00:00
simd: fix glmm_set1, glmm_splat
This commit is contained in:
@@ -299,7 +299,7 @@ glm_mat4_inv_neon(mat4 mat, mat4 dest) {
|
||||
vget_low_f32(vzipq_f32(v2, v3).val[0]));
|
||||
|
||||
/*
|
||||
x0 = glmm_div(glmm_set1(1.0f), glmm_vhadd(vmulq_f32(x0, r0)));
|
||||
x0 = glmm_div(glmm_set1_rval(1.0f), glmm_vhadd(vmulq_f32(x0, r0)));
|
||||
|
||||
glmm_store(dest[0], vmulq_f32(v0, x0));
|
||||
glmm_store(dest[1], vmulq_f32(v1, x0));
|
||||
|
@@ -471,15 +471,15 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
|
||||
x8 = _mm_shuffle_ps(x0, x3, _MM_SHUFFLE(3, 1, 3, 1)); /* k c j b */
|
||||
x9 = _mm_shuffle_ps(x0, x3, _MM_SHUFFLE(2, 0, 2, 0)); /* o g n f */
|
||||
|
||||
x10 = glmm_shuff1(x2, 2, 0, 2, 0); /* p h p h */
|
||||
x11 = glmm_shuff1(x2, 3, 1, 3, 1); /* l d l d */
|
||||
x10 = glmm_shuff1(x2, 2, 0, 2, 0); /* p h p h */
|
||||
x11 = glmm_shuff1(x2, 3, 1, 3, 1); /* l d l d */
|
||||
|
||||
#if 1 /* TODO measure both */
|
||||
#if 0 /* TODO measure both */
|
||||
x12 = _mm_shuffle_ps(x4, x5, _MM_SHUFFLE(1, 0, 1, 0)); /* i a k c */
|
||||
x13 = _mm_shuffle_ps(x6, x7, _MM_SHUFFLE(1, 0, 1, 0)); /* m e o g */
|
||||
#else
|
||||
x12 = _mm_movelh_ps(x4, x5); /* i a k c */
|
||||
x13 = _mm_movelh_ps(x6, x7); /* m e o g */
|
||||
x12 = _mm_movelh_ps(x4, x5); /* i a k c */
|
||||
x13 = _mm_movelh_ps(x6, x7); /* m e o g */
|
||||
#endif
|
||||
|
||||
t0 = _mm_mul_ps(x12, x10);
|
||||
@@ -494,7 +494,7 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
|
||||
/* v0: c3 * c10 + c4 * c9 + c1 * c8 + c2 * c7 */
|
||||
/* v1: c5 * c12 + c6 * c11 */
|
||||
|
||||
v5 = glmm_set1(1.0f);
|
||||
v5 = glmm_set1_rval(1.0f);
|
||||
v0 = glmm_shuff1(t2, 2, 3, 0, 1);
|
||||
v1 = glmm_shuff1(t1, 0, 1, 2, 3);
|
||||
v0 = _mm_mul_ps(t0, v0);
|
||||
|
@@ -37,21 +37,26 @@
|
||||
#define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane)
|
||||
|
||||
#ifdef __AVX__
|
||||
# define glmm_set1(x) _mm_broadcast_ss(&x)
|
||||
# define glmm_set1_ptr(x) _mm_broadcast_ss(x)
|
||||
|
||||
# define glmm_splat_x(x) _mm_broadcastss_ps(x)
|
||||
# define glmm_splat_y(x) _mm_permute_ps(x, _MM_SHUFFLE(1, 1, 1, 1))
|
||||
# define glmm_splat_z(x) _mm_permute_ps(x, _MM_SHUFFLE(2, 2, 2, 2))
|
||||
# define glmm_splat_w(x) _mm_permute_ps(x, _MM_SHUFFLE(3, 3, 3, 3))
|
||||
# define glmm_set1(x) _mm_broadcast_ss(&x)
|
||||
# define glmm_set1_ptr(x) _mm_broadcast_ss(x)
|
||||
# define glmm_set1_rval(x) _mm_set1_ps(x)
|
||||
# ifdef __AVX2__
|
||||
# define glmm_splat_x(x) _mm_broadcastss_ps(x)
|
||||
# else
|
||||
# define glmm_splat_x(x) _mm_permute_ps(x, _MM_SHUFFLE(0, 0, 0, 0))
|
||||
# endif
|
||||
# define glmm_splat_y(x) _mm_permute_ps(x, _MM_SHUFFLE(1, 1, 1, 1))
|
||||
# define glmm_splat_z(x) _mm_permute_ps(x, _MM_SHUFFLE(2, 2, 2, 2))
|
||||
# define glmm_splat_w(x) _mm_permute_ps(x, _MM_SHUFFLE(3, 3, 3, 3))
|
||||
#else
|
||||
# define glmm_set1(x) _mm_set1_ps(x)
|
||||
# define glmm_set1_ptr(x) _mm_set1_ps(*x)
|
||||
# define glmm_set1(x) _mm_set1_ps(x)
|
||||
# define glmm_set1_ptr(x) _mm_set1_ps(*x)
|
||||
# define glmm_set1_rval(x) _mm_set1_ps(x)
|
||||
|
||||
# define glmm_splat_x(x) glmm_splat(x, 0)
|
||||
# define glmm_splat_y(x) glmm_splat(x, 1)
|
||||
# define glmm_splat_z(x) glmm_splat(x, 2)
|
||||
# define glmm_splat_w(x) glmm_splat(x, 3)
|
||||
# define glmm_splat_x(x) glmm_splat(x, 0)
|
||||
# define glmm_splat_y(x) glmm_splat(x, 1)
|
||||
# define glmm_splat_z(x) glmm_splat(x, 2)
|
||||
# define glmm_splat_w(x) glmm_splat(x, 3)
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__
|
||||
|
@@ -215,7 +215,7 @@ glm_vec4_one(vec4 v) {
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glmm_store(v, wasm_f32x4_const_splat(1.0f));
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glmm_store(v, glmm_set1(1.0f));
|
||||
glmm_store(v, glmm_set1_rval(1.0f));
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
vst1q_f32(v, vdupq_n_f32(1.0f));
|
||||
#else
|
||||
|
Reference in New Issue
Block a user