diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index 1360e20..d1d9cfd 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -175,7 +175,7 @@ glmm_load3(float v[3]) { __m128i xy; __m128 z; - xy = _mm_loadl_epi64((const __m128i *)v); + xy = _mm_loadl_epi64(CGLM_CASTPTR_ASSUME_ALIGNED(v, const __m128i)); z = _mm_load_ss(&v[2]); return _mm_movelh_ps(_mm_castsi128_ps(xy), z); @@ -184,7 +184,7 @@ glmm_load3(float v[3]) { static inline void glmm_store3(float v[3], __m128 vx) { - _mm_storel_pi((__m64 *)&v[0], vx); + _mm_storel_pi(CGLM_CASTPTR_ASSUME_ALIGNED(v, __m64), vx); _mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2)); } diff --git a/include/cglm/types.h b/include/cglm/types.h index 60eb538..80463a2 100644 --- a/include/cglm/types.h +++ b/include/cglm/types.h @@ -32,6 +32,16 @@ # define CGLM_ALIGN_MAT CGLM_ALIGN(16) #endif +#ifdef __GNUC__ +# define CGLM_ASSUME_ALIGNED(expr, alignment) \ + __builtin_assume_aligned((expr), (alignment)) +#else +# define CGLM_ASSUME_ALIGNED(expr, alignment) (expr) +#endif + +#define CGLM_CASTPTR_ASSUME_ALIGNED(expr, type) \ + ((type*)CGLM_ASSUME_ALIGNED((expr), __alignof__(type))) + typedef float vec2[2]; typedef float vec3[3]; typedef int ivec3[3];