From 55ebbdbe40b903128a85dd54030b00fb2c08d4b6 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Wed, 28 Apr 2021 14:46:14 +0300 Subject: [PATCH] arm, neon: neon/fma support for glm_inv_tr() --- include/cglm/affine-mat.h | 2 ++ include/cglm/simd/neon/affine.h | 36 +++++++++++++++++++++++++++++++++ test/src/test_affine_mat.h | 25 +++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/include/cglm/affine-mat.h b/include/cglm/affine-mat.h index c555eae..75607e7 100644 --- a/include/cglm/affine-mat.h +++ b/include/cglm/affine-mat.h @@ -158,6 +158,8 @@ void glm_inv_tr(mat4 mat) { #if defined( __SSE__ ) || defined( __SSE2__ ) glm_inv_tr_sse2(mat); +#elif defined(CGLM_NEON_FP) + glm_inv_tr_neon(mat); #else CGLM_ALIGN_MAT mat3 r; CGLM_ALIGN(8) vec3 t; diff --git a/include/cglm/simd/neon/affine.h b/include/cglm/simd/neon/affine.h index b511083..3e0cccd 100644 --- a/include/cglm/simd/neon/affine.h +++ b/include/cglm/simd/neon/affine.h @@ -76,5 +76,41 @@ glm_mul_rot_neon(mat4 m1, mat4 m2, mat4 dest) { glmm_store(dest[3], glmm_load(m1[3])); } +CGLM_INLINE +void +glm_inv_tr_neon(mat4 mat) { + float32x4x4_t vmat; + glmm_128 r0, r1, r2, r3, x0; + + vmat = vld4q_f32(mat[0]); + r0 = vmat.val[0]; + r1 = vmat.val[1]; + r2 = vmat.val[2]; + r3 = vmat.val[3]; + + x0 = glmm_fmadd(r0, glmm_splat_w(r0), + glmm_fmadd(r1, glmm_splat_w(r1), + vmulq_f32(r2, glmm_splat_w(r2)))); + x0 = glmm_xor(x0, glmm_set1(-0.f)); + + glmm_store(mat[0], r0); + glmm_store(mat[1], r1); + glmm_store(mat[2], r2); + glmm_store(mat[3], x0); + + mat[0][3] = 0.0f; + mat[1][3] = 0.0f; + mat[2][3] = 0.0f; + mat[3][3] = 1.0f; + + /* TODO: ? + zo = vget_high_f32(r3); + vst1_lane_f32(&mat[0][3], zo, 0); + vst1_lane_f32(&mat[1][3], zo, 0); + vst1_lane_f32(&mat[2][3], zo, 0); + vst1_lane_f32(&mat[3][3], zo, 1); + */ +} + #endif #endif /* cglm_affine_neon_h */ diff --git a/test/src/test_affine_mat.h b/test/src/test_affine_mat.h index 1c1127f..e928f8e 100644 --- a/test/src/test_affine_mat.h +++ b/test/src/test_affine_mat.h @@ -7,6 +7,25 @@ #include "test_common.h" +#ifndef glm_affine_mat_test_guard +#define glm_affine_mat_test_guard +CGLM_INLINE +void +glm_inv_tr_raw(mat4 mat) { + CGLM_ALIGN_MAT mat3 r; + CGLM_ALIGN(8) vec3 t; + + /* rotate */ + glm_mat4_pick3t(mat, r); + glm_mat4_ins3(r, mat); + + /* translate */ + glm_mat3_mulv(r, mat[3], t); + glm_vec3_negate(t); + glm_vec3_copy(t, mat[3]); +} +#endif + TEST_IMPL(GLM_PREFIX, mul) { mat4 m1 = GLM_MAT4_IDENTITY_INIT; mat4 m2 = GLM_MAT4_IDENTITY_INIT; @@ -81,6 +100,12 @@ TEST_IMPL(GLM_PREFIX, inv_tr) { GLM(mat4_inv)(m1, m2); GLM(inv_tr)(m2); ASSERTIFY(test_assert_mat4_eq(m1, m2)) + + /* test with raw */ + glm_mat4_copy(m1, m2); + glm_inv_tr_raw(m2); + GLM(inv_tr)(m1); + ASSERTIFY(test_assert_mat4_eq(m1, m2)) } TEST_SUCCESS