From 480e1de0486b2ba921e452e12b15f5082986c542 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Thu, 11 Apr 2024 21:57:16 +0300 Subject: [PATCH] sse: make use of int domain as default behavior if possible ( compiler may ignore it ) also use AVX's `_mm_permute_ps`for shuffling single vector --- docs/source/opt.rst | 2 +- include/cglm/simd/x86.h | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/source/opt.rst b/docs/source/opt.rst index e3c4cd2..d41549e 100644 --- a/docs/source/opt.rst +++ b/docs/source/opt.rst @@ -76,7 +76,7 @@ SSE and SSE2 Shuffle Option ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ **_mm_shuffle_ps** generates **shufps** instruction even if registers are same. You can force it to generate **pshufd** instruction by defining -**CGLM_USE_INT_DOMAIN** macro. As default it is not defined. +**CGLM_NO_INT_DOMAIN** macro. As default it is not defined. SSE3 and SSE4 Dot Product Options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index b80f335..fda4b1c 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -20,13 +20,18 @@ #define glmm_128 __m128 -#if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__) +#ifdef __AVX__ # define glmm_shuff1(xmm, z, y, x, w) \ - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ - _MM_SHUFFLE(z, y, x, w))) + _mm_permute_ps((xmm), _MM_SHUFFLE(z, y, x, w)) #else -# define glmm_shuff1(xmm, z, y, x, w) \ +# if !defined(CGLM_NO_INT_DOMAIN) && defined(__SSE2__) +# define glmm_shuff1(xmm, z, y, x, w) \ + _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ + _MM_SHUFFLE(z, y, x, w))) +# else +# define glmm_shuff1(xmm, z, y, x, w) \ _mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w)) +# endif #endif #define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane)