sse: make use of int domain as default behavior if possible ( compiler may ignore it )

also use AVX's `_mm_permute_ps`for shuffling single vector
This commit is contained in:
Recep Aslantas
2024-04-11 21:57:16 +03:00
parent de66f0a67f
commit 480e1de048
2 changed files with 10 additions and 5 deletions

View File

@@ -76,7 +76,7 @@ SSE and SSE2 Shuffle Option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**_mm_shuffle_ps** generates **shufps** instruction even if registers are same.
You can force it to generate **pshufd** instruction by defining
**CGLM_USE_INT_DOMAIN** macro. As default it is not defined.
**CGLM_NO_INT_DOMAIN** macro. As default it is not defined.
SSE3 and SSE4 Dot Product Options
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@@ -20,13 +20,18 @@
#define glmm_128 __m128
#if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__)
#ifdef __AVX__
# define glmm_shuff1(xmm, z, y, x, w) \
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
_MM_SHUFFLE(z, y, x, w)))
_mm_permute_ps((xmm), _MM_SHUFFLE(z, y, x, w))
#else
# define glmm_shuff1(xmm, z, y, x, w) \
# if !defined(CGLM_NO_INT_DOMAIN) && defined(__SSE2__)
# define glmm_shuff1(xmm, z, y, x, w) \
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
_MM_SHUFFLE(z, y, x, w)))
# else
# define glmm_shuff1(xmm, z, y, x, w) \
_mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w))
# endif
#endif
#define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane)