diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 95bab09..97df6d4 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -122,7 +122,14 @@ glm_vec4_copy(vec4 v, vec4 dest) { CGLM_INLINE float glm_vec4_dot(vec4 a, vec4 b) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + __m128 x0; + x0 = _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b)); + x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2)); + return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1))); +#else return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; +#endif } /*! @@ -139,7 +146,7 @@ glm_vec4_dot(vec4 a, vec4 b) { CGLM_INLINE float glm_vec4_norm2(vec4 v) { - return v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3]; + return glm_vec_dot(v, v); } /*! diff --git a/makefile.am b/makefile.am index 436973c..2922373 100644 --- a/makefile.am +++ b/makefile.am @@ -109,6 +109,8 @@ test_tests_SOURCES=\ test/src/test_project.c \ test/src/test_clamp.c \ test/src/test_euler.c \ - test/src/test_quat.c + test/src/test_quat.c \ + test/src/test_vec4.c + all-local: sh ./post-build.sh diff --git a/test/src/test_common.c b/test/src/test_common.c index c38c474..c13ac0e 100644 --- a/test/src/test_common.c +++ b/test/src/test_common.c @@ -36,6 +36,16 @@ test_rand_vec3(vec3 dest) { dest[2] = drand48(); } +void +test_rand_vec4(vec4 dest) { + srand((unsigned int)time(NULL)); + + dest[0] = drand48(); + dest[1] = drand48(); + dest[2] = drand48(); + dest[3] = drand48(); +} + float test_rand_angle(void) { srand((unsigned int)time(NULL)); diff --git a/test/src/test_common.h b/test/src/test_common.h index f692483..477e59d 100644 --- a/test/src/test_common.h +++ b/test/src/test_common.h @@ -40,6 +40,9 @@ test_assert_quat_eq(versor v1, versor v2); void test_rand_vec3(vec3 dest); +void +test_rand_vec4(vec4 dest) ; + float test_rand_angle(void); diff --git a/test/src/test_main.c b/test/src/test_main.c index 384250f..7995a5a 100644 --- a/test/src/test_main.c +++ b/test/src/test_main.c @@ -26,7 +26,10 @@ main(int argc, const char * argv[]) { cmocka_unit_test(test_euler), /* quaternion */ - cmocka_unit_test(test_quat) + cmocka_unit_test(test_quat), + + /* vec4 */ + cmocka_unit_test(test_vec4) }; return cmocka_run_group_tests(tests, NULL, NULL); diff --git a/test/src/test_tests.h b/test/src/test_tests.h index 398caa3..1dfbb5f 100644 --- a/test/src/test_tests.h +++ b/test/src/test_tests.h @@ -28,4 +28,7 @@ test_euler(void **state); void test_quat(void **state); +void +test_vec4(void **state); + #endif /* test_tests_h */ diff --git a/test/src/test_vec4.c b/test/src/test_vec4.c new file mode 100644 index 0000000..a45a700 --- /dev/null +++ b/test/src/test_vec4.c @@ -0,0 +1,30 @@ +/* + * Copyright (c), Recep Aslantas. + * + * MIT License (MIT), http://opensource.org/licenses/MIT + * Full license can be found in the LICENSE file + */ + +#include "test_common.h" + +CGLM_INLINE +float +test_vec4_dot(vec4 a, vec4 b) { + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; +} + +void +test_vec4(void **state) { + vec4 v; + int i; + float d1, d2; + + /* test SSE/SIMD dot product */ + for (i = 0; i < 100; i++) { + test_rand_vec4(v); + d1 = glm_vec4_dot(v, v); + d2 = test_vec4_dot(v, v); + + assert_true(fabsf(d1 - d2) <= 0.000009); + } +}