From f3f29bd383f439d0dc1f949ac3839c5b594158f7 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sun, 18 Apr 2021 16:24:29 +0300 Subject: [PATCH] vec4: optimize muladd and muladds with fma --- include/cglm/vec4.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 7a4549c..54d487f 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -568,14 +568,8 @@ glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) { CGLM_INLINE void glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - glmm_store(dest, _mm_add_ps(glmm_load(dest), - _mm_mul_ps(glmm_load(a), - glmm_load(b)))); -#elif defined(CGLM_NEON_FP) - vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), - vmulq_f32(vld1q_f32(a), - vld1q_f32(b)))); +#if defined(CGLM_SIMD) + glmm_store(dest, glmm_fmadd(glmm_load(a), glmm_load(b), glmm_load(dest))); #else dest[0] += a[0] * b[0]; dest[1] += a[1] * b[1]; @@ -596,14 +590,8 @@ glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) { CGLM_INLINE void glm_vec4_muladds(vec4 a, float s, vec4 dest) { -#if defined( __SSE__ ) || defined( __SSE2__ ) - glmm_store(dest, _mm_add_ps(glmm_load(dest), - _mm_mul_ps(glmm_load(a), - _mm_set1_ps(s)))); -#elif defined(CGLM_NEON_FP) - vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), - vmulq_f32(vld1q_f32(a), - vdupq_n_f32(s)))); +#if defined(CGLM_SIMD) + glmm_store(dest, glmm_fmadd(glmm_load(a), _mm_set1_ps(s), glmm_load(dest))); #else dest[0] += a[0] * s; dest[1] += a[1] * s;