mirror of
https://github.com/recp/cglm.git
synced 2025-10-03 08:41:55 +00:00
avx: implement transpose with AVX
This commit is contained in:
@@ -520,6 +520,8 @@ void
|
||||
glm_mat4_transpose_to(mat4 m, mat4 dest) {
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glm_mat4_transp_wasm(m, dest);
|
||||
#elif defined(__AVX__)
|
||||
glm_mat4_transp_avx(m, dest);
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glm_mat4_transp_sse2(m, dest);
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
@@ -546,6 +548,8 @@ void
|
||||
glm_mat4_transpose(mat4 m) {
|
||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||
glm_mat4_transp_wasm(m, m);
|
||||
#elif defined(__AVX__)
|
||||
glm_mat4_transp_avx(m, m);
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glm_mat4_transp_sse2(m, m);
|
||||
#elif defined(CGLM_NEON_FP)
|
||||
|
@@ -12,8 +12,6 @@
|
||||
#include "../../common.h"
|
||||
#include "../intrin.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_scale_avx(mat4 m, float s) {
|
||||
@@ -24,6 +22,31 @@ glm_mat4_scale_avx(mat4 m, float s) {
|
||||
glmm_store256(m[2], _mm256_mul_ps(y0, glmm_load256(m[2])));
|
||||
}
|
||||
|
||||
/* TODO: this must be tested and compared to SSE version, may be slower!!! */
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_transp_avx(mat4 m, mat4 dest) {
|
||||
__m256 y0, y1, y2, y3;
|
||||
|
||||
y0 = glmm_load256(m[0]); /* h g f e d c b a */
|
||||
y1 = glmm_load256(m[2]); /* p o n m l k j i */
|
||||
|
||||
y2 = _mm256_unpacklo_ps(y0, y1); /* n f m e j b i a */
|
||||
y3 = _mm256_unpackhi_ps(y0, y1); /* p h o g l d k c */
|
||||
|
||||
y0 = _mm256_permute2f128_ps(y2, y3, 0x20); /* l d k c j b i a */
|
||||
y1 = _mm256_permute2f128_ps(y2, y3, 0x31); /* p h o g n f m e */
|
||||
|
||||
y2 = _mm256_unpacklo_ps(y0, y1); /* o k g c m i e a */
|
||||
y3 = _mm256_unpackhi_ps(y0, y1); /* p l h d n j f b */
|
||||
|
||||
y0 = _mm256_permute2f128_ps(y2, y3, 0x20); /* n j f b m i e a */
|
||||
y1 = _mm256_permute2f128_ps(y2, y3, 0x31); /* p l h d o k g c */
|
||||
|
||||
glmm_store256(dest[0], y0);
|
||||
glmm_store256(dest[2], y1);
|
||||
}
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
|
Reference in New Issue
Block a user