mirror of
https://github.com/recp/cglm.git
synced 2025-12-26 10:35:10 +00:00
avx: fix glm_mul_avx
* use glm_mat4_mul_avx here. because it seems there is no big difference for now.
This commit is contained in:
@@ -29,20 +29,22 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
|
||||
/* 0x03: 0b00000011 */
|
||||
y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */
|
||||
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i l k j i */
|
||||
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */
|
||||
|
||||
/* f f f f a a a a */
|
||||
/* g g g g c c c c */
|
||||
/* h h h h c c c c */
|
||||
/* e e e e b b b b */
|
||||
y7 = _mm256_permute_ps(y0, 0xAA); /* 0xAA: 0b10101010 */
|
||||
/* g g g g d d d d */
|
||||
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
|
||||
y7 = _mm256_permutevar_ps(y0, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
|
||||
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
|
||||
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
|
||||
|
||||
glmm_store256(dest[0],
|
||||
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
|
||||
_mm256_mul_ps(y4, y8)),
|
||||
_mm256_mul_ps(y5, y7)));
|
||||
|
||||
_mm256_mul_ps(y3, y7)),
|
||||
_mm256_add_ps(_mm256_mul_ps(y4, y8),
|
||||
_mm256_mul_ps(y5, y9))));
|
||||
|
||||
/* n n n n i i i i */
|
||||
/* p p p p k k k k */
|
||||
|
||||
Reference in New Issue
Block a user