Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8df7fb0
WIP: AVX10.1
stefanatwork Jun 17, 2025
4c01b7a
Added AVX10.1 flags for gcc and clang
stefanatwork Feb 18, 2026
75d34b1
Added CPU ID feature detection of AVX 10.x and APX
stefanatwork Mar 18, 2026
5ce6720
More changes to ISA detection and selection to include new instructio…
stefanatwork Mar 18, 2026
3d2278f
More updates for AVX10/APX support
stefanatwork Mar 18, 2026
d117ca7
Added display names for new CPUs
stefanatwork Mar 19, 2026
6f646dc
Corrected CPU feature detection
stefanatwork Mar 19, 2026
a760f20
Fixed clang APX compile flags
stefanatwork Mar 19, 2026
badd856
remoted duplicate macros in isa.h
stefanatwork Mar 19, 2026
aea9b43
Updated ISA macros and BVH build selections to AVX10.x and APX isas
stefanatwork Mar 20, 2026
dc4421b
Merge branch 'master' into sw/avx10
stefanatwork Mar 23, 2026
8638fda
Consolidating AVX10 and APX into one ISA
stefanatwork Apr 20, 2026
d2123f0
simd: add AVX10.2 guarded intrinsics in AVX-512 wrappers
stefanatwork Apr 20, 2026
e132ff4
simd: fix vuint16 multiply to use mullo_epi32
stefanatwork Apr 20, 2026
76ab1d6
Merge branch 'master' into sw/avx10
stefanatwork Jun 18, 2026
06bd0e1
Fix auto detection of compiler ISA
stefanatwork Jun 18, 2026
e2e97cd
Merge branch 'master' into sw/avx10
stefanatwork Jun 24, 2026
dad07cc
Revert "simd: add AVX10.2 guarded intrinsics in AVX-512 wrappers"
stefanatwork Jun 26, 2026
515afbd
Revert "simd: fix vuint16 multiply to use mullo_epi32"
stefanatwork Jun 26, 2026
1049cd2
Requiring AVX10.2 and prior to enable APX
stefanatwork Jun 26, 2026
d4d29c7
Removed AVX10.1/10.2 from command line parser
stefanatwork Jun 26, 2026
bcf9792
Gate AVX10 detection on 512-bit vector support
stefanatwork Jun 26, 2026
3a35c97
Removed reference to unreleased product
stefanatwork Jun 26, 2026
2a50980
Fixed a comment
stefanatwork Jun 26, 2026
6721a6a
Removed superfluous guard macro
stefanatwork Jun 26, 2026
d5b7801
Workaround for gcc code creation bug in apx
stefanatwork Jun 29, 2026
15f5a63
Support for APX in MSVC
stefanatwork Jun 29, 2026
fc06c0f
Fixed preprocesor macros for AVX10 to work across clang,gcc and MSVC
stefanatwork Jun 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 33 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ ENDIF()
IF (EMBREE_ARM)
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE NEON NEON2X)
ELSE()
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 AVX AVX2 AVX512 DEFAULT)
SET_PROPERTY(CACHE EMBREE_MAX_ISA PROPERTY STRINGS NONE SSE2 SSE4.2 AVX AVX2 AVX512 APX DEFAULT)
ENDIF()

IF (EMBREE_MAX_ISA STREQUAL "NONE")
Expand All @@ -380,18 +380,21 @@ IF (EMBREE_MAX_ISA STREQUAL "NONE")
OPTION(EMBREE_ISA_NEON2X "Enables NEON ISA double pumped." OFF)
ENDIF()
ELSE()
TRY_COMPILE(COMPILER_SUPPORTS_AVX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX})
TRY_COMPILE(COMPILER_SUPPORTS_AVX2 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX2})
TRY_COMPILE(COMPILER_SUPPORTS_AVX512 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX512})
TRY_COMPILE(COMPILER_SUPPORTS_AVX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX})
TRY_COMPILE(COMPILER_SUPPORTS_AVX2 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX2})
TRY_COMPILE(COMPILER_SUPPORTS_AVX512 "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_AVX512})
TRY_COMPILE(COMPILER_SUPPORTS_APX "${CMAKE_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/common/cmake/check_isa.cpp" COMPILE_DEFINITIONS ${FLAGS_APX})

OPTION(EMBREE_ISA_SSE2 "Enables SSE2 ISA." ON)
OPTION(EMBREE_ISA_SSE42 "Enables SSE4.2 ISA." ON)
OPTION(EMBREE_ISA_AVX "Enables AVX ISA." ${COMPILER_SUPPORTS_AVX})
OPTION(EMBREE_ISA_AVX2 "Enables AVX2 ISA." ${COMPILER_SUPPORTS_AVX2})
IF (APPLE)
OPTION(EMBREE_ISA_AVX512 "Enables AVX512 ISA." OFF)
OPTION(EMBREE_ISA_APX "Enables APX ISA." OFF)
ELSE()
OPTION(EMBREE_ISA_AVX512 "Enables AVX512 ISA." ${COMPILER_SUPPORTS_AVX512})
OPTION(EMBREE_ISA_APX "Enables APX ISA." ${COMPILER_SUPPORTS_APX})
ENDIF()
# Don't use OPTION, but still set them to OFF, so that embree-config.cmake is consisten with its definitions
SET(EMBREE_ISA_NEON OFF)
Expand All @@ -406,13 +409,15 @@ ELSEIF (EMBREE_MAX_ISA STREQUAL "DEFAULT")
UNSET(EMBREE_ISA_AVX CACHE)
UNSET(EMBREE_ISA_AVX2 CACHE)
UNSET(EMBREE_ISA_AVX512 CACHE)
UNSET(EMBREE_ISA_APX CACHE)
SET(EMBREE_ISA_NEON OFF)
SET(EMBREE_ISA_NEON2X OFF)
SET(EMBREE_ISA_SSE2 OFF)
SET(EMBREE_ISA_SSE42 OFF)
SET(EMBREE_ISA_AVX OFF)
SET(EMBREE_ISA_AVX2 OFF)
SET(EMBREE_ISA_AVX512 OFF)
SET(EMBREE_ISA_APX OFF)
MESSAGE(STATUS "Detecting default ISA...")
INCLUDE(check_isa_default)
CHECK_ISA_DEFAULT(EMBREE_ISA_DEFAULT)
Expand All @@ -427,6 +432,7 @@ ELSE()
UNSET(EMBREE_ISA_AVX CACHE)
UNSET(EMBREE_ISA_AVX2 CACHE)
UNSET(EMBREE_ISA_AVX512 CACHE)
UNSET(EMBREE_ISA_APX CACHE)

IF(EMBREE_MAX_ISA STREQUAL "NEON")
SET(ISA 1)
Expand All @@ -444,6 +450,8 @@ ELSE()
SET(ISA 5)
ELSEIF(EMBREE_MAX_ISA STREQUAL "AVX512SKX") # just for compatibility
SET(ISA 5)
ELSEIF(EMBREE_MAX_ISA STREQUAL "APX")
SET(ISA 6)
ELSE()
MESSAGE(FATAL_ERROR "Unsupported ISA specified: " ${EMBREE_MAX_ISA})
ENDIF()
Expand All @@ -455,6 +463,7 @@ ELSE()
SET(EMBREE_ISA_AVX OFF)
SET(EMBREE_ISA_AVX2 OFF)
SET(EMBREE_ISA_AVX512 OFF)
SET(EMBREE_ISA_APX OFF)

IF (EMBREE_ARM)
IF (ISA GREATER 0)
Expand All @@ -479,6 +488,9 @@ ELSE()
IF (ISA GREATER 4)
SET(EMBREE_ISA_AVX512 ON)
ENDIF ()
IF (ISA GREATER 5)
SET(EMBREE_ISA_APX ON)
ENDIF ()
ENDIF()
ENDIF()

Expand Down Expand Up @@ -511,6 +523,9 @@ IF (APPLE AND EMBREE_STATIC_LIB)
IF (EMBREE_ISA_AVX512)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()
IF (EMBREE_ISA_APX)
MATH(EXPR NUMISA "${NUMISA}+1")
ENDIF()

IF (NUMISA GREATER 1)
IF (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
Expand All @@ -530,6 +545,7 @@ SET(SSE42 1)
SET(AVX 2)
SET(AVX2 3)
SET(AVX512 4)
SET(APX 5)

UNSET(FLAGS_LOWEST)
SET(ISA_LOWEST -1)
Expand Down Expand Up @@ -612,6 +628,19 @@ IF (EMBREE_ISA_AVX512)
ENDIF()
ENDIF ()

IF (EMBREE_ISA_APX)
ADD_DEFINITIONS(-DEMBREE_TARGET_APX)
IF (NOT EMBREE_ARM)
# APX support in ISPC?
# LIST(APPEND ISPC_TARGETS "")
ENDIF()
IF(NOT FLAGS_LOWEST)
SET(ISA_LOWEST ${APX})
SET(ISA_LOWEST_AVX ${APX})
SET(FLAGS_LOWEST ${FLAGS_APX})
ENDIF()
ENDIF ()

IF (ISA_LOWEST EQUAL -1)
MESSAGE(FATAL_ERROR "You have to enable at least one ISA!")
ENDIF()
Expand Down
5 changes: 4 additions & 1 deletion common/cmake/check_isa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
// limitations under the License. //
// ======================================================================== //

#if \
// Require APX and 10.2 if both are available, otherwise require the highest available ISA
#if defined(__APX_F__) && ((defined(__AVX10_VER__) && (__AVX10_VER__ >= 2)) || defined(__AVX10_2__))
char const *info_isa = "ISA" ":" "APX";
#elif \
defined(__AVX512F__) && defined(__AVX512CD__) && \
defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
char const *info_isa = "ISA" ":" "AVX512";
Expand Down
1 change: 1 addition & 0 deletions common/cmake/clang.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ ELSE ()
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
_SET_IF_EMPTY(FLAGS_AVX512 "-march=skx")
_SET_IF_EMPTY(FLAGS_APX "-march=novalake")
ENDIF ()

IF (WIN32)
Expand Down
1 change: 1 addition & 0 deletions common/cmake/dpcpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ _SET_IF_EMPTY(FLAGS_SSE42 "-msse4.2")
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
_SET_IF_EMPTY(FLAGS_AVX512 "-march=skx")
_SET_IF_EMPTY(FLAGS_APX "-march=novalake")

IF (NOT WIN32)
OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
Expand Down
5 changes: 5 additions & 0 deletions common/cmake/embree-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ SET(EMBREE_ISA_SSE42 @EMBREE_ISA_SSE42@)
SET(EMBREE_ISA_AVX @EMBREE_ISA_AVX@)
SET(EMBREE_ISA_AVX2 @EMBREE_ISA_AVX2@)
SET(EMBREE_ISA_AVX512 @EMBREE_ISA_AVX512@)
SET(EMBREE_ISA_APX @EMBREE_ISA_APX@)
SET(EMBREE_ISA_AVX512SKX @EMBREE_ISA_AVX512@) # just for compatibility
SET(EMBREE_ISA_NEON @EMBREE_ISA_NEON@)
SET(EMBREE_ISA_NEON2X @EMBREE_ISA_NEON2X@)
Expand Down Expand Up @@ -85,6 +86,10 @@ IF (EMBREE_STATIC_LIB)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_avx512-targets.cmake")
ENDIF()

IF (EMBREE_ISA_APX)
INCLUDE("${EMBREE_ROOT_DIR}/@EMBREE_CMAKEEXPORT_DIR@/embree_apx-targets.cmake")
ENDIF()

ENDIF()

IF (EMBREE_SYCL_SUPPORT)
Expand Down
10 changes: 10 additions & 0 deletions common/cmake/gnu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ ELSE ()
_SET_IF_EMPTY(FLAGS_AVX "-mavx")
_SET_IF_EMPTY(FLAGS_AVX2 "-mf16c -mavx2 -mfma -mlzcnt -mbmi -mbmi2")
_SET_IF_EMPTY(FLAGS_AVX512 "-march=skylake-avx512")
_SET_IF_EMPTY(FLAGS_APX "${FLAGS_AVX512} -mavx10.1 -mavx10.2 -mapxf")

# GCC 15.0-15.2 can emit illegal vbroadcasti128 with -mapxf under high
# register pressure. Disable APX for affected compiler versions entirely.
IF (CMAKE_CXX_COMPILER_ID MATCHES "GNU"
AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "15.0"
AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "15.3")
SET(EMBREE_ISA_APX OFF CACHE BOOL "" FORCE)
MESSAGE(WARNING "Disabling APX ISA for GCC ${CMAKE_CXX_COMPILER_VERSION} due to known APX codegen bug; upgrade to GCC 15.3+.")
ENDIF()
ENDIF ()

OPTION(EMBREE_IGNORE_CMAKE_CXX_FLAGS "When enabled Embree ignores default CMAKE_CXX_FLAGS." ON)
Expand Down
1 change: 1 addition & 0 deletions common/cmake/msvc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ SET(FLAGS_SSE42 "${FLAGS_SSE2} /D__SSE3__ /D__SSSE3__ /D__SSE4_1__ /D__SSE4_2__"
SET(FLAGS_AVX "${FLAGS_SSE42} /arch:AVX")
SET(FLAGS_AVX2 "${FLAGS_SSE42} /arch:AVX2")
SET(FLAGS_AVX512 "${FLAGS_AVX2} /arch:AVX512")
SET(FLAGS_APX "${FLAGS_AVX512} /arch:AVX10.2 /vlen=512 /feature:APX /D__AVX10_VER__=102")

SET(COMMON_CXX_FLAGS "")
SET(COMMON_CXX_FLAGS "${COMMON_CXX_FLAGS} /EHsc") # catch C++ exceptions only and extern "C" functions never throw a C++ exception
Expand Down
90 changes: 82 additions & 8 deletions common/sys/sysinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,25 @@ namespace embree
uint32_t DisplayFamily_DisplayModel = (DisplayFamily << 8) + (DisplayModel << 0);

// Data from Intel® 64 and IA-32 Architectures, Volume 4, Chapter 2, Table 2-1 (CPUID Signature Values of DisplayFamily_DisplayModel)
if (DisplayFamily_DisplayModel == 0x06AF) return CPU::SIERRA_FOREST;
if (DisplayFamily_DisplayModel == 0x06AD) return CPU::GRANITE_RAPIDS;
if (DisplayFamily_DisplayModel == 0x06AE) return CPU::GRANITE_RAPIDS;
if (DisplayFamily_DisplayModel == 0x06CF) return CPU::EMERALD_RAPIDS;
if (DisplayFamily_DisplayModel == 0x068F) return CPU::SAPPHIRE_RAPIDS;
if (DisplayFamily_DisplayModel == 0x06D7) return CPU::BARTLETT_LAKE;
if (DisplayFamily_DisplayModel == 0x06CC) return CPU::PANTHER_LAKE;
if (DisplayFamily_DisplayModel == 0x06BD) return CPU::LUNAR_LAKE;
if (DisplayFamily_DisplayModel == 0x06B5) return CPU::ARROW_LAKE;
if (DisplayFamily_DisplayModel == 0x06C5) return CPU::ARROW_LAKE;
if (DisplayFamily_DisplayModel == 0x06C6) return CPU::ARROW_LAKE;
if (DisplayFamily_DisplayModel == 0x06AA) return CPU::METEOR_LAKE;
if (DisplayFamily_DisplayModel == 0x06AC) return CPU::METEOR_LAKE;
if (DisplayFamily_DisplayModel == 0x06B7) return CPU::RAPTOR_LAKE;
if (DisplayFamily_DisplayModel == 0x06BA) return CPU::RAPTOR_LAKE;
if (DisplayFamily_DisplayModel == 0x06BF) return CPU::RAPTOR_LAKE;
if (DisplayFamily_DisplayModel == 0x0697) return CPU::ALDER_LAKE;
if (DisplayFamily_DisplayModel == 0x069A) return CPU::ALDER_LAKE;
if (DisplayFamily_DisplayModel == 0x06A7) return CPU::ROCKET_LAKE;
if (DisplayFamily_DisplayModel == 0x067D) return CPU::CORE_ICE_LAKE;
if (DisplayFamily_DisplayModel == 0x067E) return CPU::CORE_ICE_LAKE;
if (DisplayFamily_DisplayModel == 0x068C) return CPU::CORE_TIGER_LAKE;
Expand Down Expand Up @@ -184,6 +203,19 @@ namespace embree
std::string stringOfCPUModel(CPU model)
{
switch (model) {
case CPU::DIAMOND_RAPIDS : return "Diamond Rapids";
case CPU::SIERRA_FOREST : return "Sierra Forest";
case CPU::GRANITE_RAPIDS : return "Granite Rapids";
case CPU::EMERALD_RAPIDS : return "Emerald Rapids";
case CPU::SAPPHIRE_RAPIDS : return "Sapphire Rapids";
case CPU::BARTLETT_LAKE : return "Bartlett Lake";
case CPU::PANTHER_LAKE : return "Panther Lake";
case CPU::LUNAR_LAKE : return "Lunar Lake";
case CPU::ARROW_LAKE : return "Arrow Lake";
case CPU::METEOR_LAKE : return "Meteor Lake";
case CPU::RAPTOR_LAKE : return "Raptor Lake";
case CPU::ALDER_LAKE : return "Alder Lake";
case CPU::ROCKET_LAKE : return "Rocket Lake";
case CPU::XEON_ICE_LAKE : return "Xeon Ice Lake";
case CPU::CORE_ICE_LAKE : return "Core Ice Lake";
case CPU::CORE_TIGER_LAKE : return "Core Tiger Lake";
Expand Down Expand Up @@ -253,6 +285,15 @@ namespace embree

/* cpuid[eax=7,ecx=0].ecx */
static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions)

/* cpuid[eax=7,ecx=1].edx */
static const int CPU_FEATURE_BIT_APX = 1 << 21; // APX (Advanced Performance Extensions)

/* cpuid[eax=7,ecx=1].edx */
static const int CPU_FEATURE_BIT_AVX10 = 1 << 19; // AVX-10 (256-bit and 512-bit vector instructions)

/* cpuid[eax=0x24,ecx=0].ebx */
static const int CPU_FEATURE_BIT_AVX10_512VL = 1 << 18; // AVX-10 supports 512-bit vectors
#endif

#if defined(__X86_ASM__)
Expand All @@ -270,11 +311,11 @@ namespace embree
}
#endif

int getCPUFeatures()
int64_t getCPUFeatures()
{
#if defined(__X86_ASM__)
/* cache CPU features access */
static int cpu_features = 0;
static int64_t cpu_features = 0;
if (cpu_features)
return cpu_features;

Expand All @@ -291,28 +332,34 @@ namespace embree
/* get CPUID leaves for EAX = 1,7, and 0x80000001 */
int cpuid_leaf_1[4] = { 0,0,0,0 };
int cpuid_leaf_7[4] = { 0,0,0,0 };
int cpuid_leaf_7_1[4] = { 0,0,0,0 };
int cpuid_leaf_e1[4] = { 0,0,0,0 };
if (nIds >= 1) __cpuid (cpuid_leaf_1,0x00000001);
#if _WIN32
#if _MSC_VER && (_MSC_FULL_VER < 160040219)
#else
if (nIds >= 7) __cpuidex(cpuid_leaf_7,0x00000007,0);
if (nIds >= 7) __cpuidex(cpuid_leaf_7_1,0x00000007,1);
#endif
#else
if (nIds >= 7) __cpuid_count(cpuid_leaf_7,0x00000007,0);
if (nIds >= 7) __cpuid_count(cpuid_leaf_7_1,0x00000007,1);
#endif
if (nExIds >= 0x80000001) __cpuid(cpuid_leaf_e1,0x80000001);

/* detect if OS saves XMM, YMM, and ZMM states */
/* detect if OS saves XMM, YMM, and ZMM states, and APX state */
bool xmm_enabled = true;
bool ymm_enabled = false;
bool zmm_enabled = false;
bool apx_enabled = false;
if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_OXSAVE) {
int64_t xcr0 = get_xcr0();
xmm_enabled = ((xcr0 & 0x02) == 0x02); /* checks if xmm are enabled in XCR0 */
ymm_enabled = xmm_enabled && ((xcr0 & 0x04) == 0x04); /* checks if ymm state are enabled in XCR0 */
zmm_enabled = ymm_enabled && ((xcr0 & 0xE0) == 0xE0); /* checks if OPMASK state, upper 256-bit of ZMM0-ZMM15 and ZMM16-ZMM31 state are enabled in XCR0 */
apx_enabled = ((xcr0 & (0x80000)) == 0x80000); /* checks if APX state (bit 19) is enabled in XCR0 */
}

if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED;
if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED;
if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED;
Expand Down Expand Up @@ -343,6 +390,29 @@ namespace embree
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA;
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL;
if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;

if ((cpuid_leaf_7_1[EDX] & CPU_FEATURE_BIT_APX) && apx_enabled) cpu_features |= CPU_FEATURE_APX;

/* detect AVX-10 version */
if ((cpuid_leaf_7_1[EDX] & CPU_FEATURE_BIT_AVX10) && (nIds >= 0x24)) {
int cpuid_leaf_24_0[4] = { 0, 0, 0, 0 };

#if _WIN32
#if _MSC_VER && (_MSC_FULL_VER < 160040219)
#else
__cpuidex(cpuid_leaf_24_0, 0x00000024, 0);
#endif
#else
__cpuid_count(cpuid_leaf_24_0, 0x00000024, 0);
#endif
// enable AVX-10 features only if AVX-512VL is supported and ZMM registers are enabled
const int avx10_version = cpuid_leaf_24_0[EBX] & 0xff;
const bool avx10_512vl = (cpuid_leaf_24_0[EBX] & CPU_FEATURE_BIT_AVX10_512VL) != 0;
if (avx10_512vl && zmm_enabled) {
if (avx10_version >= 1) cpu_features |= CPU_FEATURE_AVX10_1;
if (avx10_version >= 2) cpu_features |= CPU_FEATURE_AVX10_2;
}
}
Comment thread
stefanatwork marked this conversation as resolved.

#if defined(__MACOSX__)
if ( (cpu_features & CPU_FEATURE_AVX512F)
Expand Down Expand Up @@ -382,7 +452,7 @@ namespace embree
#endif
}

std::string stringOfCPUFeatures(int features)
std::string stringOfCPUFeatures(int64_t features)
{
std::string str;
if (features & CPU_FEATURE_XMM_ENABLED) str += "XMM ";
Expand Down Expand Up @@ -412,12 +482,15 @@ namespace embree
if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL ";
if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA ";
if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI ";
if (features & CPU_FEATURE_APX) str += "APX ";
if (features & CPU_FEATURE_AVX10_1) str += "AVX10.1 ";
if (features & CPU_FEATURE_AVX10_2) str += "AVX10.2 ";
if (features & CPU_FEATURE_NEON) str += "NEON ";
if (features & CPU_FEATURE_NEON_2X) str += "2xNEON ";
return str;
}

std::string stringOfISA (int isa)
std::string stringOfISA (int64_t isa)
{
if (isa == SSE) return "SSE";
if (isa == SSE2) return "SSE2";
Expand All @@ -428,17 +501,18 @@ namespace embree
if (isa == AVX) return "AVX";
if (isa == AVX2) return "AVX2";
if (isa == AVX512) return "AVX512";
if (isa == APX) return "APX";

if (isa == NEON) return "NEON";
if (isa == NEON_2X) return "2xNEON";
return "UNKNOWN";
}

bool hasISA(int features, int isa) {
bool hasISA(int64_t features, int64_t isa) {
return (features & isa) == isa;
}

std::string supportedTargetList (int features)
std::string supportedTargetList (int64_t features)
{
std::string v;
if (hasISA(features,SSE)) v += "SSE ";
Expand All @@ -451,7 +525,7 @@ namespace embree
if (hasISA(features,AVXI)) v += "AVXI ";
if (hasISA(features,AVX2)) v += "AVX2 ";
if (hasISA(features,AVX512)) v += "AVX512 ";

if (hasISA(features,APX)) v += "APX ";
if (hasISA(features,NEON)) v += "NEON ";
if (hasISA(features,NEON_2X)) v += "2xNEON ";
return v;
Expand Down
Loading
Loading