From 20360f22965e80176dbb6931e727eb6f41fa9796 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Fri, 12 Oct 2018 09:05:42 +0300 Subject: [PATCH 1/2] avx: replace binary constants with hex --- include/cglm/cam.h | 4 ++-- include/cglm/simd/avx/affine.h | 7 ++++--- include/cglm/simd/avx/mat4.h | 5 +++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/cglm/cam.h b/include/cglm/cam.h index 1360c81..284ae82 100644 --- a/include/cglm/cam.h +++ b/include/cglm/cam.h @@ -201,7 +201,7 @@ void glm_ortho_default(float aspect, mat4 dest) { if (aspect >= 1.0f) { glm_ortho(-aspect, aspect, -1.0f, 1.0f, -100.0f, 100.0f, dest); - return; + return; } aspect = 1.0f / aspect; @@ -229,7 +229,7 @@ glm_ortho_default_s(float aspect, -size - 100.0f, size + 100.0f, dest); - return; + return; } glm_ortho(-size, diff --git a/include/cglm/simd/avx/affine.h b/include/cglm/simd/avx/affine.h index 5c7f71c..369bcf7 100644 --- a/include/cglm/simd/avx/affine.h +++ b/include/cglm/simd/avx/affine.h @@ -27,13 +27,14 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) { y2 = glmm_load256(m1[0]); /* h g f e d c b a */ y3 = glmm_load256(m1[2]); /* p o n m l k j i */ - y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */ - y5 = _mm256_permute2f128_ps(y3, y3, 0b00000000); /* l k j i l k j i */ + /* 0x03: 0b00000011 */ + y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */ + y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i l k j i */ /* f f f f a a a a */ /* g g g g c c c c */ /* e e e e b b b b */ - y7 = _mm256_permute_ps(y0, 0b10101010); + y7 = _mm256_permute_ps(y0, 0xAA); /* 0xAA: 0b10101010 */ y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0)); y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); diff --git a/include/cglm/simd/avx/mat4.h b/include/cglm/simd/avx/mat4.h index b5859a7..944769b 100644 --- a/include/cglm/simd/avx/mat4.h +++ b/include/cglm/simd/avx/mat4.h @@ -27,8 +27,9 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) { y2 = glmm_load256(m1[0]); /* h g f e d c b a */ y3 = glmm_load256(m1[2]); /* p o n m l k j i */ - y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */ - y5 = _mm256_permute2f128_ps(y3, y3, 0b00000011); /* l k j i p o n m */ + /* 0x03: 0b00000011 */ + y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */ + y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */ /* f f f f a a a a */ /* h h h h c c c c */ From dadae4b773b01e1713b01f0c16180e2302568d49 Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Fri, 19 Oct 2018 09:40:40 +0300 Subject: [PATCH 2/2] avx: fix glm_mul_avx * use glm_mat4_mul_avx here. because it seems there is no big difference for now. --- include/cglm/simd/avx/affine.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/cglm/simd/avx/affine.h b/include/cglm/simd/avx/affine.h index 369bcf7..b02ff0c 100644 --- a/include/cglm/simd/avx/affine.h +++ b/include/cglm/simd/avx/affine.h @@ -29,20 +29,22 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) { /* 0x03: 0b00000011 */ y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */ - y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i l k j i */ + y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */ /* f f f f a a a a */ - /* g g g g c c c c */ + /* h h h h c c c c */ /* e e e e b b b b */ - y7 = _mm256_permute_ps(y0, 0xAA); /* 0xAA: 0b10101010 */ + /* g g g g d d d d */ y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0)); + y7 = _mm256_permutevar_ps(y0, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2)); y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); + y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3)); glmm_store256(dest[0], _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6), - _mm256_mul_ps(y4, y8)), - _mm256_mul_ps(y5, y7))); - + _mm256_mul_ps(y3, y7)), + _mm256_add_ps(_mm256_mul_ps(y4, y8), + _mm256_mul_ps(y5, y9)))); /* n n n n i i i i */ /* p p p p k k k k */