From 9aebdc76b3ac72206274680886272a6aea85f497 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Tue, 30 Oct 2018 09:58:11 +0300 Subject: [PATCH] avx: implement scale matrix using AVX --- include/cglm/mat4.h | 4 +++- include/cglm/simd/avx/mat4.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h index 8fbcddd..b3af335 100644 --- a/include/cglm/mat4.h +++ b/include/cglm/mat4.h @@ -478,7 +478,9 @@ glm_mat4_scale_p(mat4 m, float s) { CGLM_INLINE void glm_mat4_scale(mat4 m, float s) { -#if defined( __SSE__ ) || defined( __SSE2__ ) +#ifdef __AVX__ + glm_mat4_scale_avx(m, s); +#elif defined( __SSE__ ) || defined( __SSE2__ ) glm_mat4_scale_sse2(m, s); #else glm_mat4_scale_p(m, s); diff --git a/include/cglm/simd/avx/mat4.h b/include/cglm/simd/avx/mat4.h index 3c7c397..44ed318 100644 --- a/include/cglm/simd/avx/mat4.h +++ b/include/cglm/simd/avx/mat4.h @@ -14,6 +14,16 @@ #include +CGLM_INLINE +void +glm_mat4_scale_avx(mat4 m, float s) { + __m256 y0; + y0 = _mm256_set1_ps(s); + + glmm_store256(m[0], _mm256_mul_ps(y0, glmm_load256(m[0]))); + glmm_store256(m[2], _mm256_mul_ps(y0, glmm_load256(m[2]))); +} + CGLM_INLINE void glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {