From 92151c63287d2e2893ad5428ddde61d4e460cd74 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sat, 24 Apr 2021 18:02:47 +0300 Subject: [PATCH] arm, neon: use div instead of mul by 1 / det for mat4_inv --- include/cglm/simd/neon/mat4.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/cglm/simd/neon/mat4.h b/include/cglm/simd/neon/mat4.h index b992acc..2fecebe 100644 --- a/include/cglm/simd/neon/mat4.h +++ b/include/cglm/simd/neon/mat4.h @@ -295,12 +295,21 @@ glm_mat4_inv_neon(mat4 mat, mat4 dest) { x0 = vcombine_f32(vget_low_f32(vzipq_f32(v0, v1).val[0]), vget_low_f32(vzipq_f32(v2, v3).val[0])); + /* x0 = glmm_div(glmm_set1(1.0f), glmm_vhadd(vmulq_f32(x0, r0))); glmm_store(dest[0], vmulq_f32(v0, x0)); glmm_store(dest[1], vmulq_f32(v1, x0)); glmm_store(dest[2], vmulq_f32(v2, x0)); glmm_store(dest[3], vmulq_f32(v3, x0)); + */ + + x0 = glmm_vhadd(vmulq_f32(x0, r0)); + + glmm_store(dest[0], glmm_div(v0, x0)); + glmm_store(dest[1], glmm_div(v1, x0)); + glmm_store(dest[2], glmm_div(v2, x0)); + glmm_store(dest[3], glmm_div(v3, x0)); } #endif