Merge branch 'master' into optimize-inv

This commit is contained in:
Recep Aslantas
2025-02-09 15:13:28 +03:00
committed by GitHub
7 changed files with 274 additions and 26 deletions

View File

@@ -13,8 +13,38 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-13, macos-14, ubuntu-22.04, ubuntu-24.04]
simd: [none, sse, sse2, sse3, sse4, avx, avx2, neon]
include:
# x86/x64 builds
- { os: macos-13, simd: none }
- { os: macos-13, simd: sse }
- { os: macos-13, simd: sse2 }
- { os: macos-13, simd: sse3 }
- { os: macos-13, simd: sse4 }
- { os: macos-13, simd: avx }
- { os: macos-13, simd: avx2 }
- { os: macos-14, simd: none }
- { os: macos-14, simd: sse }
- { os: macos-14, simd: sse2 }
- { os: macos-14, simd: sse3 }
- { os: macos-14, simd: sse4 }
- { os: macos-14, simd: avx }
- { os: macos-14, simd: avx2 }
- { os: ubuntu-22.04, simd: none }
- { os: ubuntu-22.04, simd: sse }
- { os: ubuntu-22.04, simd: sse2 }
- { os: ubuntu-22.04, simd: sse3 }
- { os: ubuntu-22.04, simd: sse4 }
- { os: ubuntu-22.04, simd: avx }
- { os: ubuntu-22.04, simd: avx2 }
- { os: ubuntu-24.04, simd: none }
- { os: ubuntu-24.04, simd: sse }
- { os: ubuntu-24.04, simd: sse2 }
- { os: ubuntu-24.04, simd: sse3 }
- { os: ubuntu-24.04, simd: sse4 }
- { os: ubuntu-24.04, simd: avx }
- { os: ubuntu-24.04, simd: avx2 }
# ARM64 builds
- { os: ubuntu-latest-arm64, simd: neon }
steps:
- uses: actions/checkout@v4
@@ -81,18 +111,58 @@ jobs:
run: cmake --build build
build_cmake_ubuntu:
name: CMake / ${{ matrix.target.os }} / ${{ matrix.target.cc }} / ${{ matrix.simd }}
runs-on: ${{ matrix.target.os }}
name: CMake / ${{ matrix.target.os }} / ${{ matrix.target.cc }} / ${{ matrix.target.arch }} / ${{ matrix.target.simd }}
runs-on: ${{ matrix.target.arch == 'arm64' && 'ubuntu-latest-arm64' || matrix.target.os }}
strategy:
fail-fast: false
matrix:
target:
- { os: ubuntu-20.04, cc: gcc-11 }
- { os: ubuntu-22.04, cc: gcc-12 }
- { os: ubuntu-24.04, cc: gcc-13 }
- { os: ubuntu-20.04, cc: clang-12 }
- { os: ubuntu-22.04, cc: clang-15 }
simd: [none, sse, sse2, sse3, sse4, avx, avx2, neon]
# GCC 11 builds
- { os: ubuntu-20.04, cc: gcc-11, arch: x64, simd: none }
- { os: ubuntu-20.04, cc: gcc-11, arch: x64, simd: sse }
- { os: ubuntu-20.04, cc: gcc-11, arch: x64, simd: sse2 }
- { os: ubuntu-20.04, cc: gcc-11, arch: x64, simd: sse3 }
- { os: ubuntu-20.04, cc: gcc-11, arch: x64, simd: sse4 }
- { os: ubuntu-20.04, cc: gcc-11, arch: x64, simd: avx }
- { os: ubuntu-20.04, cc: gcc-11, arch: x64, simd: avx2 }
# GCC 12 builds
- { os: ubuntu-22.04, cc: gcc-12, arch: x64, simd: none }
- { os: ubuntu-22.04, cc: gcc-12, arch: x64, simd: sse }
- { os: ubuntu-22.04, cc: gcc-12, arch: x64, simd: sse2 }
- { os: ubuntu-22.04, cc: gcc-12, arch: x64, simd: sse3 }
- { os: ubuntu-22.04, cc: gcc-12, arch: x64, simd: sse4 }
- { os: ubuntu-22.04, cc: gcc-12, arch: x64, simd: avx }
- { os: ubuntu-22.04, cc: gcc-12, arch: x64, simd: avx2 }
# GCC 13 builds
- { os: ubuntu-24.04, cc: gcc-13, arch: x64, simd: none }
- { os: ubuntu-24.04, cc: gcc-13, arch: x64, simd: sse }
- { os: ubuntu-24.04, cc: gcc-13, arch: x64, simd: sse2 }
- { os: ubuntu-24.04, cc: gcc-13, arch: x64, simd: sse3 }
- { os: ubuntu-24.04, cc: gcc-13, arch: x64, simd: sse4 }
- { os: ubuntu-24.04, cc: gcc-13, arch: x64, simd: avx }
- { os: ubuntu-24.04, cc: gcc-13, arch: x64, simd: avx2 }
# Clang 12 builds
- { os: ubuntu-20.04, cc: clang-12, arch: x64, simd: none }
- { os: ubuntu-20.04, cc: clang-12, arch: x64, simd: sse }
- { os: ubuntu-20.04, cc: clang-12, arch: x64, simd: sse2 }
- { os: ubuntu-20.04, cc: clang-12, arch: x64, simd: sse3 }
- { os: ubuntu-20.04, cc: clang-12, arch: x64, simd: sse4 }
- { os: ubuntu-20.04, cc: clang-12, arch: x64, simd: avx }
- { os: ubuntu-20.04, cc: clang-12, arch: x64, simd: avx2 }
# Clang 15 builds
- { os: ubuntu-22.04, cc: clang-15, arch: x64, simd: none }
- { os: ubuntu-22.04, cc: clang-15, arch: x64, simd: sse }
- { os: ubuntu-22.04, cc: clang-15, arch: x64, simd: sse2 }
- { os: ubuntu-22.04, cc: clang-15, arch: x64, simd: sse3 }
- { os: ubuntu-22.04, cc: clang-15, arch: x64, simd: sse4 }
- { os: ubuntu-22.04, cc: clang-15, arch: x64, simd: avx }
- { os: ubuntu-22.04, cc: clang-15, arch: x64, simd: avx2 }
# ARM64 builds
- { os: ubuntu-latest, cc: gcc-12, arch: arm64, simd: neon }
- { os: ubuntu-latest, cc: gcc-13, arch: arm64, simd: neon }
# ARMv7 builds
- { os: ubuntu-latest-arm64, cc: gcc-12, arch: armv7, simd: neon }
- { os: ubuntu-latest-arm64, cc: gcc-12, arch: armv7, simd: none }
steps:
- uses: actions/checkout@v4
@@ -130,7 +200,36 @@ jobs:
fi
- name: Configure CMake
run: cmake -B build -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=${{ matrix.target.cc }} -DCMAKE_C_FLAGS="$CFLAGS" -DCGLM_STATIC=ON -DCGLM_USE_TEST=ON
run: |
if [ "${{ matrix.target.arch }}" == "armv7" ]; then
# Build for ARMv7
neon_flags=""
if [ "${{ matrix.simd }}" == "neon" ]; then
neon_flags="-mfpu=neon -mfloat-abi=hard"
fi
cmake -B build -GNinja -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=${{ matrix.target.cc }} \
-DCMAKE_C_FLAGS="$CFLAGS -m32 -march=armv7-a ${neon_flags}" \
-DCGLM_STATIC=ON -DCGLM_USE_TEST=ON
elif [ "${{ matrix.target.arch }}" == "arm64" ]; then
# Build for ARM64 (AArch64)
neon_flags=""
if [ "${{ matrix.simd }}" == "neon" ]; then
neon_flags="+simd" # Enable SIMD/NEON features on ARM64
else
neon_flags="+nosimd" # Explicitly disable SIMD/NEON
fi
cmake -B build -GNinja -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=${{ matrix.target.cc }} \
-DCMAKE_C_FLAGS="$CFLAGS -march=armv8-a${neon_flags}" \
-DCGLM_STATIC=ON -DCGLM_USE_TEST=ON
else
# Normal build (x86/x64)
cmake -B build -GNinja -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=${{ matrix.target.cc }} \
-DCMAKE_C_FLAGS="$CFLAGS" \
-DCGLM_STATIC=ON -DCGLM_USE_TEST=ON
fi
- name: Build
run: cmake --build build
@@ -176,8 +275,31 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-13, macos-14, windows-2022]
simd: [none, sse, sse2, sse3, sse4, avx, avx2, neon]
include:
# x86/x64 builds
- { os: macos-13, simd: none }
- { os: macos-13, simd: sse }
- { os: macos-13, simd: sse2 }
- { os: macos-13, simd: sse3 }
- { os: macos-13, simd: sse4 }
- { os: macos-13, simd: avx }
- { os: macos-13, simd: avx2 }
- { os: macos-14, simd: none }
- { os: macos-14, simd: sse }
- { os: macos-14, simd: sse2 }
- { os: macos-14, simd: sse3 }
- { os: macos-14, simd: sse4 }
- { os: macos-14, simd: avx }
- { os: macos-14, simd: avx2 }
- { os: windows-2022, simd: none }
- { os: windows-2022, simd: sse }
- { os: windows-2022, simd: sse2 }
- { os: windows-2022, simd: sse3 }
- { os: windows-2022, simd: sse4 }
- { os: windows-2022, simd: avx }
- { os: windows-2022, simd: avx2 }
# ARM64 builds
- { os: macos-14-arm64, simd: neon }
steps:
- uses: actions/checkout@v4
@@ -258,12 +380,42 @@ jobs:
build_meson:
name: Meson / ${{ matrix.os }} / ${{ matrix.simd }}
runs-on: ${{ matrix.os }}
runs-on: ${{ contains(matrix.os, 'arm64') && 'ubuntu-latest-arm64' || matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-14, ubuntu-22.04, ubuntu-24.04, windows-2022]
simd: [none, sse, sse2, sse3, sse4, avx, avx2, neon]
include:
# x86/x64 builds
- { os: macos-14, simd: none }
- { os: macos-14, simd: sse }
- { os: macos-14, simd: sse2 }
- { os: macos-14, simd: sse3 }
- { os: macos-14, simd: sse4 }
- { os: macos-14, simd: avx }
- { os: macos-14, simd: avx2 }
- { os: ubuntu-22.04, simd: none }
- { os: ubuntu-22.04, simd: sse }
- { os: ubuntu-22.04, simd: sse2 }
- { os: ubuntu-22.04, simd: sse3 }
- { os: ubuntu-22.04, simd: sse4 }
- { os: ubuntu-22.04, simd: avx }
- { os: ubuntu-22.04, simd: avx2 }
- { os: ubuntu-24.04, simd: none }
- { os: ubuntu-24.04, simd: sse }
- { os: ubuntu-24.04, simd: sse2 }
- { os: ubuntu-24.04, simd: sse3 }
- { os: ubuntu-24.04, simd: sse4 }
- { os: ubuntu-24.04, simd: avx }
- { os: ubuntu-24.04, simd: avx2 }
- { os: windows-2022, simd: none }
- { os: windows-2022, simd: sse }
- { os: windows-2022, simd: sse2 }
- { os: windows-2022, simd: sse3 }
- { os: windows-2022, simd: sse4 }
- { os: windows-2022, simd: avx }
- { os: windows-2022, simd: avx2 }
# ARM64 builds
- { os: ubuntu-latest-arm64, simd: neon }
steps:
- uses: actions/checkout@v4
@@ -415,3 +567,79 @@ jobs:
- name: Build
run: swift build
build_cmake_arm:
name: CMake / ARM / ${{ matrix.os }} / ${{ matrix.arch }} / ${{ matrix.simd }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
# Linux ARM builds
- os: ubuntu-latest-arm64
arch: arm64
simd: neon
- os: ubuntu-latest-arm64
arch: armv7
simd: neon
- os: ubuntu-latest-arm64
arch: armv7
simd: none
# Windows ARM builds
- os: windows-latest-arm64
arch: arm64
simd: neon
- os: windows-latest-arm64
arch: arm
simd: neon
- os: windows-latest-arm64
arch: arm
simd: none
steps:
- uses: actions/checkout@v4
- name: Configure CMake (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
$flags = ""
if ("${{ matrix.arch }}" -eq "arm") {
$flags = "-m32 -march=armv7-a"
if ("${{ matrix.simd }}" -eq "neon") {
$flags += " -mfpu=neon"
}
}
elseif ("${{ matrix.simd }}" -eq "neon") {
$flags = "-march=armv8-a+simd"
}
cmake -B build -G "Visual Studio 17 2022" -A ${{ matrix.arch == 'arm64' && 'ARM64' || 'ARM' }} `
-DCMAKE_BUILD_TYPE=Release `
-DCMAKE_C_FLAGS="$flags" `
-DCGLM_STATIC=ON -DCGLM_USE_TEST=ON
- name: Configure CMake (Unix)
if: runner.os != 'Windows'
shell: bash
run: |
flags=""
if [ "${{ matrix.arch }}" = "armv7" ]; then
flags="-m32 -march=armv7-a"
if [ "${{ matrix.simd }}" = "neon" ]; then
flags="$flags -mfpu=neon -mfloat-abi=hard"
fi
elif [ "${{ matrix.simd }}" = "neon" ]; then
flags="-march=armv8-a+simd"
fi
cmake -B build -GNinja -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS="$flags" \
-DCGLM_STATIC=ON -DCGLM_USE_TEST=ON
- name: Build
run: cmake --build build
- name: Test
working-directory: build
run: ./tests

View File

@@ -34,7 +34,7 @@ array of matrices:
/* ... */
glUniformMatrix4fv(location, count, GL_FALSE, matrix[0][0]);
1. Cast matrix to pointer
2. Cast matrix to pointer
--------------------------
.. code-block:: c

View File

@@ -526,12 +526,8 @@ glm_vec4_div(vec4 a, vec4 b, vec4 dest) {
CGLM_INLINE
void
glm_vec4_divs(vec4 v, float s, vec4 dest) {
#if defined(__wasm__) && defined(__wasm_simd128__)
glmm_store(dest, wasm_f32x4_div(glmm_load(v), wasm_f32x4_splat(s)));
#elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_div_ps(glmm_load(v), glmm_set1(s)));
#elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vdivq_f32(vld1q_f32(v), vdupq_n_f32(s)));
#if defined(CGLM_SIMD)
glmm_store(dest, glmm_div(glmm_load(v), glmm_set1(s)));
#else
glm_vec4_scale(v, 1.0f / s, dest);
#endif
@@ -926,7 +922,7 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) {
return;
}
glmm_store(dest, wasm_f32x4_div(x0, wasm_f32x4_sqrt(xdot)));
glmm_store(dest, glmm_div(x0, wasm_f32x4_sqrt(xdot)));
#elif defined( __SSE__ ) || defined( __SSE2__ )
__m128 xdot, x0;
float dot;
@@ -940,7 +936,7 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) {
return;
}
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
glmm_store(dest, glmm_div(x0, _mm_sqrt_ps(xdot)));
#else
float norm;

View File

@@ -53,6 +53,7 @@
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\test\include\common.h" />
<ClInclude Include="..\test\src\test_aabb2d.h" />
<ClInclude Include="..\test\src\test_affine.h" />
<ClInclude Include="..\test\src\test_affine2d.h" />
<ClInclude Include="..\test\src\test_affine_mat.h" />
@@ -76,6 +77,7 @@
<ClInclude Include="..\test\src\test_mat4.h" />
<ClInclude Include="..\test\src\test_mat4x2.h" />
<ClInclude Include="..\test\src\test_mat4x3.h" />
<ClInclude Include="..\test\src\test_noise.h" />
<ClInclude Include="..\test\src\test_plane.h" />
<ClInclude Include="..\test\src\test_project.h" />
<ClInclude Include="..\test\src\test_quat.h" />

View File

@@ -130,5 +130,11 @@
<ClInclude Include="..\test\src\test_euler_to_quat_rh.h">
<Filter>src</Filter>
</ClInclude>
<ClInclude Include="..\test\src\test_aabb2d.h">
<Filter>src</Filter>
</ClInclude>
<ClInclude Include="..\test\src\test_noise.h">
<Filter>src</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@@ -80,6 +80,7 @@
<ClCompile Include="..\src\mat4.c" />
<ClCompile Include="..\src\mat4x2.c" />
<ClCompile Include="..\src\mat4x3.c" />
<ClCompile Include="..\src\noise.c" />
<ClCompile Include="..\src\plane.c" />
<ClCompile Include="..\src\project.c" />
<ClCompile Include="..\src\quat.c" />
@@ -137,6 +138,7 @@
<ClInclude Include="..\include\cglm\call\mat4.h" />
<ClInclude Include="..\include\cglm\call\mat4x2.h" />
<ClInclude Include="..\include\cglm\call\mat4x3.h" />
<ClInclude Include="..\include\cglm\call\noise.h" />
<ClInclude Include="..\include\cglm\call\plane.h" />
<ClInclude Include="..\include\cglm\call\project.h" />
<ClInclude Include="..\include\cglm\call\quat.h" />
@@ -183,6 +185,7 @@
<ClInclude Include="..\include\cglm\mat4.h" />
<ClInclude Include="..\include\cglm\mat4x2.h" />
<ClInclude Include="..\include\cglm\mat4x3.h" />
<ClInclude Include="..\include\cglm\noise.h" />
<ClInclude Include="..\include\cglm\plane.h" />
<ClInclude Include="..\include\cglm\project.h" />
<ClInclude Include="..\include\cglm\quat.h" />
@@ -250,6 +253,7 @@
<ClInclude Include="..\include\cglm\struct\mat4.h" />
<ClInclude Include="..\include\cglm\struct\mat4x2.h" />
<ClInclude Include="..\include\cglm\struct\mat4x3.h" />
<ClInclude Include="..\include\cglm\struct\noise.h" />
<ClInclude Include="..\include\cglm\struct\plane.h" />
<ClInclude Include="..\include\cglm\struct\project.h" />
<ClInclude Include="..\include\cglm\struct\quat.h" />
@@ -278,7 +282,7 @@
<ProjectGuid>{CA8BCAF9-CD25-4133-8F62-3D1449B5D2FC}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>cglm</RootNamespace>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">

View File

@@ -184,6 +184,9 @@
<ClCompile Include="..\src\aabb2d.c">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\src\noise.c">
<Filter>src</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\config.h">
@@ -732,5 +735,14 @@
<ClInclude Include="..\include\cglm\struct\ray.h">
<Filter>include\cglm\struct</Filter>
</ClInclude>
<ClInclude Include="..\include\cglm\noise.h">
<Filter>include\cglm</Filter>
</ClInclude>
<ClInclude Include="..\include\cglm\call\noise.h">
<Filter>include\cglm\call</Filter>
</ClInclude>
<ClInclude Include="..\include\cglm\struct\noise.h">
<Filter>include\cglm\struct</Filter>
</ClInclude>
</ItemGroup>
</Project>