diff --git a/include/cglm/mat4.h b/include/cglm/mat4.h index cda5285..c099574 100644 --- a/include/cglm/mat4.h +++ b/include/cglm/mat4.h @@ -539,7 +539,9 @@ glm_mat4_scale_p(mat4 m, float s) { CGLM_INLINE void glm_mat4_scale(mat4 m, float s) { -#if defined( __SSE__ ) || defined( __SSE2__ ) +#ifdef __AVX__ + glm_mat4_scale_avx(m, s); +#elif defined( __SSE__ ) || defined( __SSE2__ ) glm_mat4_scale_sse2(m, s); #elif defined(CGLM_NEON_FP) glm_mat4_scale_neon(m, s); diff --git a/include/cglm/simd/avx/mat4.h b/include/cglm/simd/avx/mat4.h index 944769b..e8c36c8 100644 --- a/include/cglm/simd/avx/mat4.h +++ b/include/cglm/simd/avx/mat4.h @@ -14,6 +14,16 @@ #include +CGLM_INLINE +void +glm_mat4_scale_avx(mat4 m, float s) { + __m256 y0; + y0 = _mm256_set1_ps(s); + + glmm_store256(m[0], _mm256_mul_ps(y0, glmm_load256(m[0]))); + glmm_store256(m[2], _mm256_mul_ps(y0, glmm_load256(m[2]))); +} + CGLM_INLINE void glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {