Merge pull request #63 from recp/avx

avx: replace binary constants (resolve https://github.com/recp/cglm/issues/62) with hex and fix glm_mul_avx
This commit is contained in:
Recep Aslantas
2018-10-19 09:52:32 +03:00
committed by GitHub
3 changed files with 15 additions and 11 deletions

View File

@@ -201,7 +201,7 @@ void
glm_ortho_default(float aspect, mat4 dest) {
if (aspect >= 1.0f) {
glm_ortho(-aspect, aspect, -1.0f, 1.0f, -100.0f, 100.0f, dest);
return;
return;
}
aspect = 1.0f / aspect;
@@ -229,7 +229,7 @@ glm_ortho_default_s(float aspect,
-size - 100.0f,
size + 100.0f,
dest);
return;
return;
}
glm_ortho(-size,

View File

@@ -27,21 +27,24 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000000); /* l k j i l k j i */
/* 0x03: 0b00000011 */
y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */
/* f f f f a a a a */
/* g g g g c c c c */
/* h h h h c c c c */
/* e e e e b b b b */
y7 = _mm256_permute_ps(y0, 0b10101010);
/* g g g g d d d d */
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
y7 = _mm256_permutevar_ps(y0, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y4, y8)),
_mm256_mul_ps(y5, y7)));
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
/* n n n n i i i i */
/* p p p p k k k k */

View File

@@ -27,8 +27,9 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000011); /* l k j i p o n m */
/* 0x03: 0b00000011 */
y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */
/* f f f f a a a a */
/* h h h h c c c c */