glmm, avx: optimize splat macros

This commit is contained in:
Recep Aslantas
2024-04-10 23:49:18 +03:00
parent 68bdec4510
commit de66f0a67f

View File

@@ -20,14 +20,6 @@
#define glmm_128 __m128 #define glmm_128 __m128
#ifdef __AVX__
# define glmm_set1(x) _mm_broadcast_ss(&x)
# define glmm_set1_ptr(x) _mm_broadcast_ss(x)
#else
# define glmm_set1(x) _mm_set1_ps(x)
# define glmm_set1_ptr(x) _mm_set1_ps(*x)
#endif
#if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__) #if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__)
# define glmm_shuff1(xmm, z, y, x, w) \ # define glmm_shuff1(xmm, z, y, x, w) \
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
@@ -39,10 +31,23 @@
#define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane) #define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane)
#ifdef __AVX__
# define glmm_set1(x) _mm_broadcast_ss(&x)
# define glmm_set1_ptr(x) _mm_broadcast_ss(x)
# define glmm_splat_x(x) _mm_broadcastss_ps(x)
# define glmm_splat_y(x) _mm_permute_ps(x, _MM_SHUFFLE(1, 1, 1, 1))
# define glmm_splat_z(x) _mm_permute_ps(x, _MM_SHUFFLE(2, 2, 2, 2))
# define glmm_splat_w(x) _mm_permute_ps(x, _MM_SHUFFLE(3, 3, 3, 3))
#else
# define glmm_set1(x) _mm_set1_ps(x)
# define glmm_set1_ptr(x) _mm_set1_ps(*x)
# define glmm_splat_x(x) glmm_splat(x, 0) # define glmm_splat_x(x) glmm_splat(x, 0)
# define glmm_splat_y(x) glmm_splat(x, 1) # define glmm_splat_y(x) glmm_splat(x, 1)
# define glmm_splat_z(x) glmm_splat(x, 2) # define glmm_splat_z(x) glmm_splat(x, 2)
# define glmm_splat_w(x) glmm_splat(x, 3) # define glmm_splat_w(x) glmm_splat(x, 3)
#endif
/* glmm_shuff1x() is DEPRECATED!, use glmm_splat() */ /* glmm_shuff1x() is DEPRECATED!, use glmm_splat() */
#define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x) #define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x)