From 155eb109a8bb21e82992b628e02304448b387a7f Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sun, 25 Apr 2021 03:49:35 +0300 Subject: [PATCH] arm, neon: neon/fma support for glm_mul_rot() --- include/cglm/affine-mat.h | 2 ++ include/cglm/simd/neon/affine.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/include/cglm/affine-mat.h b/include/cglm/affine-mat.h index 8383283..c555eae 100644 --- a/include/cglm/affine-mat.h +++ b/include/cglm/affine-mat.h @@ -109,6 +109,8 @@ void glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) { #if defined( __SSE__ ) || defined( __SSE2__ ) glm_mul_rot_sse2(m1, m2, dest); +#elif defined(CGLM_NEON_FP) + glm_mul_rot_neon(m1, m2, dest); #else float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3], a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2], a13 = m1[1][3], diff --git a/include/cglm/simd/neon/affine.h b/include/cglm/simd/neon/affine.h index 90c9ab4..b511083 100644 --- a/include/cglm/simd/neon/affine.h +++ b/include/cglm/simd/neon/affine.h @@ -47,5 +47,34 @@ glm_mul_neon(mat4 m1, mat4 m2, mat4 dest) { glmm_store(dest[3], v3); } +CGLM_INLINE +void +glm_mul_rot_neon(mat4 m1, mat4 m2, mat4 dest) { + /* D = R * L (Column-Major) */ + + glmm_128 l0, l1, l2, r0, r1, r2, v0, v1, v2; + + l0 = glmm_load(m1[0]); r0 = glmm_load(m2[0]); + l1 = glmm_load(m1[1]); r1 = glmm_load(m2[1]); + l2 = glmm_load(m1[2]); r2 = glmm_load(m2[2]); + + v0 = vmulq_f32(glmm_splat_x(r0), l0); + v1 = vmulq_f32(glmm_splat_x(r1), l0); + v2 = vmulq_f32(glmm_splat_x(r2), l0); + + v0 = glmm_fmadd(glmm_splat_y(r0), l1, v0); + v1 = glmm_fmadd(glmm_splat_y(r1), l1, v1); + v2 = glmm_fmadd(glmm_splat_y(r2), l1, v2); + + v0 = glmm_fmadd(glmm_splat_z(r0), l2, v0); + v1 = glmm_fmadd(glmm_splat_z(r1), l2, v1); + v2 = glmm_fmadd(glmm_splat_z(r2), l2, v2); + + glmm_store(dest[0], v0); + glmm_store(dest[1], v1); + glmm_store(dest[2], v2); + glmm_store(dest[3], glmm_load(m1[3])); +} + #endif #endif /* cglm_affine_neon_h */