fix building ARM NEON on windows & msvc

This commit is contained in:
Recep Aslantas
2023-03-07 13:11:08 +03:00
parent 799749fc6a
commit 8a117017ea
8 changed files with 501 additions and 14 deletions

View File

@@ -38,6 +38,12 @@
#define glmm_combine_lh(x, y) vcombine_f32(vget_low_f32(x), vget_high_f32(y))
#define glmm_combine_hh(x, y) vcombine_f32(vget_high_f32(x), vget_high_f32(y))
#if defined(_WIN32) && defined(_MSC_VER)
# define glmm_float32x4_init(x, y, z, w) { .n128_f32 = { x, y, z, w } }
#else
# define glmm_float32x4_init(x, y, z, w) { x, y, z, w }
#endif
static inline
float32x4_t
glmm_abs(float32x4_t v) {

View File

@@ -17,7 +17,13 @@
# ifndef __SSE__
# define __SSE__
# endif
# elif defined(_M_ARM64)
# ifndef __ARM_NEON
/* TODO: is this valid */
# define __ARM_NEON
# define __ARM_NEON_FP
# endif
#endif
/* do not use alignment for older visual studio versions */
# if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED
@@ -63,7 +69,7 @@
#endif
/* ARM Neon */
#if defined(__ARM_NEON)
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
# include <arm_neon.h>
# if defined(__ARM_NEON_FP)
# define CGLM_NEON_FP 1

View File

@@ -108,7 +108,7 @@ glm_mat4_det_neon(mat4 mat) {
float32x4_t r0, r1, r2, r3, x0, x1, x2;
float32x2_t ij, op, mn, kl, nn, mm, jj, ii, gh, ef, t12, t34;
float32x4x2_t a1;
float32x4_t x3 = { 0.f, -0.f, 0.f, -0.f };
float32x4_t x3 = glmm_float32x4_init(0.f, -0.f, 0.f, -0.f);
/* 127 <- 0, [square] det(A) = det(At) */
r0 = glmm_load(mat[0]); /* d c b a */
@@ -181,7 +181,7 @@ glm_mat4_inv_neon(mat4 mat, mat4 dest) {
x0, x1, x2, x3, x4, x5, x6, x7, x8;
float32x4x2_t a1;
float32x2_t lp, ko, hg, jn, im, fe, ae, bf, cg, dh;
float32x4_t x9 = { -0.f, 0.f, -0.f, 0.f };
float32x4_t x9 = glmm_float32x4_init(-0.f, 0.f, -0.f, 0.f);
x8 = vrev64q_f32(x9);

View File

@@ -23,7 +23,7 @@ glm_quat_mul_neon(versor p, versor q, versor dest) {
*/
glmm_128 xp, xq, xqr, r, x, y, z, s2, s3;
glmm_128 s1 = {-0.f, 0.f, 0.f, -0.f};
glmm_128 s1 = glmm_float32x4_init(-0.f, 0.f, 0.f, -0.f);
float32x2_t qh, ql;
xp = glmm_load(p); /* 3 2 1 0 */