mirror of
https://github.com/recp/cglm.git
synced 2025-10-04 01:00:46 +00:00
arm, neon: neon/fma support for glm_quat_mul()
This commit is contained in:
@@ -111,7 +111,8 @@ cglm_simd_avx_HEADERS = include/cglm/simd/avx/mat4.h \
|
||||
cglm_simd_neondir=$(includedir)/cglm/simd/neon
|
||||
cglm_simd_neon_HEADERS = include/cglm/simd/neon/mat4.h \
|
||||
include/cglm/simd/neon/mat2.h \
|
||||
include/cglm/simd/neon/affine.h
|
||||
include/cglm/simd/neon/affine.h \
|
||||
include/cglm/simd/neon/quat.h
|
||||
|
||||
cglm_structdir=$(includedir)/cglm/struct
|
||||
cglm_struct_HEADERS = include/cglm/struct/mat4.h \
|
||||
|
@@ -63,6 +63,10 @@
|
||||
# include "simd/sse2/quat.h"
|
||||
#endif
|
||||
|
||||
#ifdef CGLM_NEON_FP
|
||||
# include "simd/neon/quat.h"
|
||||
#endif
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest);
|
||||
@@ -412,6 +416,8 @@ glm_quat_mul(versor p, versor q, versor dest) {
|
||||
*/
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glm_quat_mul_sse2(p, q, dest);
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
glm_quat_mul_neon(p, q, dest);
|
||||
#else
|
||||
dest[0] = p[3] * q[0] + p[0] * q[3] + p[1] * q[2] - p[2] * q[1];
|
||||
dest[1] = p[3] * q[1] - p[0] * q[2] + p[1] * q[3] + p[2] * q[0];
|
||||
|
56
include/cglm/simd/neon/quat.h
Normal file
56
include/cglm/simd/neon/quat.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c), Recep Aslantas.
|
||||
*
|
||||
* MIT License (MIT), http://opensource.org/licenses/MIT
|
||||
* Full license can be found in the LICENSE file
|
||||
*/
|
||||
|
||||
#ifndef cglm_quat_neon_h
|
||||
#define cglm_quat_neon_h
|
||||
#if defined(__ARM_NEON_FP)
|
||||
|
||||
#include "../../common.h"
|
||||
#include "../intrin.h"
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_quat_mul_neon(versor p, versor q, versor dest) {
|
||||
/*
|
||||
+ (a1 b2 + b1 a2 + c1 d2 − d1 c2)i
|
||||
+ (a1 c2 − b1 d2 + c1 a2 + d1 b2)j
|
||||
+ (a1 d2 + b1 c2 − c1 b2 + d1 a2)k
|
||||
a1 a2 − b1 b2 − c1 c2 − d1 d2
|
||||
*/
|
||||
|
||||
glmm_128 xp, xq, xqr, r, x, y, z, s2, s3;
|
||||
glmm_128 s1 = {-0.f, 0.f, 0.f, -0.f};
|
||||
float32x2_t qh, ql;
|
||||
|
||||
xp = glmm_load(p); /* 3 2 1 0 */
|
||||
xq = glmm_load(q);
|
||||
|
||||
r = vmulq_f32(glmm_splat_w(xp), xq);
|
||||
x = glmm_splat_x(xp);
|
||||
y = glmm_splat_y(xp);
|
||||
z = glmm_splat_z(xp);
|
||||
|
||||
ql = vget_high_f32(s1);
|
||||
s3 = vcombine_f32(ql, ql);
|
||||
s2 = vzipq_f32(s3, s3).val[0];
|
||||
|
||||
xqr = vrev64q_f32(xq);
|
||||
qh = vget_high_f32(xqr);
|
||||
ql = vget_low_f32(xqr);
|
||||
|
||||
r = glmm_fmadd(glmm_xor(x, s3), vcombine_f32(qh, ql), r);
|
||||
|
||||
r = glmm_fmadd(glmm_xor(y, s2), vcombine_f32(vget_high_f32(xq),
|
||||
vget_low_f32(xq)), r);
|
||||
|
||||
r = glmm_fmadd(glmm_xor(z, s1), vcombine_f32(ql, qh), r);
|
||||
|
||||
glmm_store(dest, r);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* cglm_quat_neon_h */
|
@@ -41,6 +41,5 @@ glm_quat_mul_sse2(versor p, versor q, versor dest) {
|
||||
glmm_store(dest, r);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
#endif /* cglm_quat_simd_h */
|
||||
|
@@ -93,6 +93,7 @@
|
||||
<ClInclude Include="..\include\cglm\simd\neon\affine.h" />
|
||||
<ClInclude Include="..\include\cglm\simd\neon\mat2.h" />
|
||||
<ClInclude Include="..\include\cglm\simd\neon\mat4.h" />
|
||||
<ClInclude Include="..\include\cglm\simd\neon\quat.h" />
|
||||
<ClInclude Include="..\include\cglm\simd\sse2\affine.h" />
|
||||
<ClInclude Include="..\include\cglm\simd\sse2\mat2.h" />
|
||||
<ClInclude Include="..\include\cglm\simd\sse2\mat3.h" />
|
||||
|
@@ -376,5 +376,8 @@
|
||||
<ClInclude Include="..\include\cglm\simd\neon\mat2.h">
|
||||
<Filter>include\cglm\simd\neon</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\include\cglm\simd\neon\quat.h">
|
||||
<Filter>include\cglm\simd\neon</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
Reference in New Issue
Block a user