mirror of
https://github.com/recp/cglm.git
synced 2025-10-03 16:51:35 +00:00
sse: make use of int domain as default behavior if possible ( compiler may ignore it )
also use AVX's `_mm_permute_ps`for shuffling single vector
This commit is contained in:
@@ -76,7 +76,7 @@ SSE and SSE2 Shuffle Option
|
|||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
**_mm_shuffle_ps** generates **shufps** instruction even if registers are same.
|
**_mm_shuffle_ps** generates **shufps** instruction even if registers are same.
|
||||||
You can force it to generate **pshufd** instruction by defining
|
You can force it to generate **pshufd** instruction by defining
|
||||||
**CGLM_USE_INT_DOMAIN** macro. As default it is not defined.
|
**CGLM_NO_INT_DOMAIN** macro. As default it is not defined.
|
||||||
|
|
||||||
SSE3 and SSE4 Dot Product Options
|
SSE3 and SSE4 Dot Product Options
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@@ -20,13 +20,18 @@
|
|||||||
|
|
||||||
#define glmm_128 __m128
|
#define glmm_128 __m128
|
||||||
|
|
||||||
#if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__)
|
#ifdef __AVX__
|
||||||
# define glmm_shuff1(xmm, z, y, x, w) \
|
# define glmm_shuff1(xmm, z, y, x, w) \
|
||||||
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
|
_mm_permute_ps((xmm), _MM_SHUFFLE(z, y, x, w))
|
||||||
_MM_SHUFFLE(z, y, x, w)))
|
|
||||||
#else
|
#else
|
||||||
# define glmm_shuff1(xmm, z, y, x, w) \
|
# if !defined(CGLM_NO_INT_DOMAIN) && defined(__SSE2__)
|
||||||
|
# define glmm_shuff1(xmm, z, y, x, w) \
|
||||||
|
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
|
||||||
|
_MM_SHUFFLE(z, y, x, w)))
|
||||||
|
# else
|
||||||
|
# define glmm_shuff1(xmm, z, y, x, w) \
|
||||||
_mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w))
|
_mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w))
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane)
|
#define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane)
|
||||||
|
Reference in New Issue
Block a user