From 7fac1f92fb8630ac7ff041d607428702d99c611d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 27 Feb 2026 09:42:48 +0000 Subject: [PATCH 01/58] Arm64 SVE: Support scalable constant vectors and masks Adds support to GenTreeVecCon and GenTreeMskCon for constants with unknown sizes. Instead of having a blob of data, the constant is represented as being one of either: a repeated value, an sequence with start and step values, or a value in the first lane and the rest zeroed. To handle this the base type is also required. As this new structure is slightly bigger than a simd16, the simd_t typedef is pushed up to simd32 sized. For vector constants, a vector is scalable because if it is of TYP_SIMD. For mask constants, the type is always TYP_MASK. However on Arm64, masks are only used by SVE. Therefore to tell if a mask is scalable then JitUseScalableVectorT is checked. The IsAllBitsSet() on mask constants is updated to include a base type. A mask that is all set for TYP_LONG will not be all set for TYP_BYTE, and instead will be 100010001000... Given two scalable constants it may not be possible to add them together to produce a third scalable constant. Instead they will remain as two vectors in the IR. To show this implementation is workable, scalable support is added for: Sve.CreateTrueMask*() Sve.CreateFalseMask*() Vector.Create() Vector.CreateScalar() Vector.CreateScalarUnsafe() Vector.CreateSequence() Fixes #125057 --- src/coreclr/jit/codegenarm64.cpp | 117 +++- src/coreclr/jit/compiler.h | 15 +- src/coreclr/jit/compiler.hpp | 26 +- src/coreclr/jit/emitarm64.h | 32 +- src/coreclr/jit/gentree.cpp | 683 +++++++++++++++----- src/coreclr/jit/gentree.h | 127 +++- src/coreclr/jit/hwintrinsic.h | 3 + src/coreclr/jit/hwintrinsicarm64.cpp | 71 +- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 17 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 8 +- src/coreclr/jit/lowerarmarch.cpp | 15 +- src/coreclr/jit/lsraarm64.cpp | 38 +- src/coreclr/jit/simd.cpp | 71 +- src/coreclr/jit/simd.h | 129 +++- src/coreclr/jit/valuenum.cpp | 180 ++++++ src/coreclr/jit/valuenum.h | 94 +++ 16 files changed, 1368 insertions(+), 258 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 47ecfbea7dc7de..485dec0d383008 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2325,7 +2325,6 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre GenTreeVecCon* vecCon = tree->AsVecCon(); emitter* emit = GetEmitter(); - emitAttr attr = emitTypeSize(targetType); switch (tree->TypeGet()) { @@ -2333,6 +2332,8 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre case TYP_SIMD12: case TYP_SIMD16: { + emitAttr attr = emitTypeSize(targetType); + // We ignore any differences between SIMD12 and SIMD16 here if we can broadcast the value // via mvni/movi. const bool is8 = tree->TypeIs(TYP_SIMD8); @@ -2385,6 +2386,104 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre break; } + case TYP_SIMD: + { + simdscalable_t simdVal = vecCon->gtSimdScalableVal; + insOpts opt = emitter::optGetSveInsOpt(emitTypeSize(simdVal.gtSimdScalableBaseType)); + emitAttr emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + + auto loadConstantHelper = [&](uint64_t constValue) -> regNumber { + // Get a temp integer register to compute long address. + regNumber addrReg = internalRegisters.GetSingle(tree); + + // Store the index to memory + UNATIVE_OFFSET cnum = + emit->emitDataConst(&constValue, sizeof(constValue), sizeof(constValue), TYP_LONG); + CORINFO_FIELD_HANDLE hnd = m_compiler->eeFindJitDataOffs(cnum); + + // Load the constant + emit->emitIns_R_C(INS_ldr, emitSize, addrReg, addrReg, hnd, 0); + + return addrReg; + }; + + switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) + { + case SimdScalableRepeated: + if (emitter::isValidSimm<8>(simdVal.gtSimdScalableIndex) || + emitter::isValidSimm_MultipleOf<8, 256>(simdVal.gtSimdScalableIndex)) + { + emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, simdVal.gtSimdScalableIndex, + opt); + } + else + { + regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); + emit->emitInsSve_R_R(INS_sve_dup, emitSize, targetReg, indexReg, opt); + } + break; + + case SimdScalableSequence: + if (emitter::isValidSimm<5>(simdVal.gtSimdScalableIndex) && + emitter::isValidSimm<5>(simdVal.gtSimdScalableStep)) + { + emit->emitInsSve_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, + simdVal.gtSimdScalableIndex, simdVal.gtSimdScalableStep, opt); + } + else if (emitter::isValidSimm<5>(simdVal.gtSimdScalableIndex)) + { + regNumber stepReg = loadConstantHelper(simdVal.gtSimdScalableStep); + emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, stepReg, + simdVal.gtSimdScalableIndex, opt, INS_SCALABLE_OPTS_IMM_FIRST); + } + else if (emitter::isValidSimm<5>(simdVal.gtSimdScalableStep)) + { + regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); + emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, indexReg, + simdVal.gtSimdScalableStep, opt); + } + else + { + regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); + regNumber stepReg = loadConstantHelper(simdVal.gtSimdScalableStep); + emit->emitInsSve_R_R_R(INS_sve_index, emitSize, targetReg, indexReg, stepReg, opt); + } + break; + + case SimdScalableScalar: + { + // Clear the entire target register + emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, 0, opt); + + regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); + + // Use NEON instructions to load the constant (to avoid using predicates) + + if (varTypeIsIntegral(simdVal.gtSimdScalableBaseType) && + emitter::emitIns_valid_imm_for_mov(simdVal.gtSimdScalableIndex, emitSize)) + { + emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndex); + } + else if (varTypeIsFloating(simdVal.gtSimdScalableBaseType) && + emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) + { + emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]); + } + else + { + regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); + emit->emitIns_R_R(INS_ins, emitSize, targetReg, indexReg, INS_OPTS_16B); + } + break; + } + + default: + unreached(); + break; + } + break; + } + default: { unreached(); @@ -2399,14 +2498,26 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre GenTreeMskCon* mask = tree->AsMskCon(); emitter* emit = GetEmitter(); - // Try every type until a match is found - if (mask->IsZero()) { emit->emitInsSve_R(INS_sve_pfalse, EA_SCALABLE, targetReg, INS_OPTS_SCALABLE_B); break; } +#if defined(DEBUG) + if (JitConfig.JitUseScalableVectorT() == 1) + { + assert(mask->gtSimdScalableMaskVal.gtSimdMaskScalableIndex == 1); + + insOpts opt = + emitter::optGetSveInsOpt(emitTypeSize(mask->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType)); + emit->emitIns_R_PATTERN(INS_sve_ptrue, EA_SCALABLE, targetReg, opt, SVE_PATTERN_ALL); + break; + } +#endif // DEBUG + + // Fixed length vectors. Try every type until a match is found + insOpts opt = INS_OPTS_SCALABLE_B; SveMaskPattern pat = EvaluateSimdMaskToPattern(TYP_BYTE, mask->gtSimdMaskVal); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f35b36e63a6253..25fd8a71591e34 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3236,10 +3236,21 @@ class Compiler #if defined(FEATURE_SIMD) GenTreeVecCon* gtNewVconNode(var_types type); GenTreeVecCon* gtNewVconNode(var_types type, void* data); +#if defined(TARGET_ARM64) + GenTreeVecCon* gtNewSimdVconNode(var_types type, var_types baseType, SimdScalableKind kind, uint64_t index, uint64_t step = 0); + + inline GenTreeVecCon* gtNewSimdVconNode(var_types type, simdscalable_t* con) + { + return gtNewSimdVconNode(type, con->gtSimdScalableBaseType, con->gtSimdScalableKind, con->gtSimdScalableIndex, con->gtSimdScalableStep); + } +#endif // TARGET_ARM64 #endif // FEATURE_SIMD #if defined(FEATURE_MASKED_HW_INTRINSICS) GenTreeMskCon* gtNewMskConNode(var_types type); +#if defined(TARGET_ARM64) + GenTreeMskCon* gtNewMskConNode(var_types type, var_types baseType, bool index); +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS GenTree* gtNewAllBitsSetConNode(var_types type); @@ -3348,7 +3359,7 @@ class Compiler var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize); #if defined(TARGET_ARM64) - GenTree* gtNewSimdAllTrueMaskNode(var_types simdBaseType); + GenTree* gtNewSimdTrueMaskNode(var_types simdBaseType); GenTree* gtNewSimdFalseMaskByteNode(); #endif @@ -3916,7 +3927,7 @@ class Compiler #if defined(FEATURE_HW_INTRINSICS) GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree); - GenTreeMskCon* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon); + GenTree* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon); #endif // FEATURE_HW_INTRINSICS // Options to control behavior of gtTryRemoveBoxUpstreamEffects diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index e99fac8caa6115..a4704b69dd355b 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -102,11 +102,14 @@ inline bool genExactlyOneBit(T value) inline regMaskTP genFindLowestBit(regMaskTP value) { #ifdef HAS_MORE_THAN_64_REGISTERS - // If we ever need to use this method for predicate - // registers, then handle it. - assert(value.getHigh() == RBM_NONE); -#endif + if (value.getLow() != RBM_NONE) + { + return regMaskTP(genFindLowestBit(value.getLow())); + } + return regMaskTP(RBM_NONE, genFindLowestBit(value.getHigh())); +#else return regMaskTP(genFindLowestBit(value.getLow())); +#endif } /***************************************************************************** @@ -117,11 +120,18 @@ inline regMaskTP genFindLowestBit(regMaskTP value) inline bool genMaxOneBit(regMaskTP value) { #ifdef HAS_MORE_THAN_64_REGISTERS - // If we ever need to use this method for predicate - // registers, then handle it. - assert(value.getHigh() == RBM_NONE); -#endif + if (value.getLow() == RBM_NONE) + { + return genMaxOneBit(value.getHigh()); + } + if (value.getHigh() == RBM_NONE) + { + return genMaxOneBit(value.getLow()); + } + return false; +#else return genMaxOneBit(value.getLow()); +#endif } /***************************************************************************** diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 6b54d0e643b641..e6fe646882157c 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -804,22 +804,6 @@ static bool isValidUimm_MultipleOf(ssize_t value) return isValidUimm(value / mod) && (value % mod == 0); } -// Returns true if 'value' is a legal signed immediate with 'bits' number of bits. -template -static bool isValidSimm(ssize_t value) -{ - constexpr ssize_t max = 1 << (bits - 1); - return (-max <= value) && (value < max); -} - -// Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits. -template -static bool isValidSimm_MultipleOf(ssize_t value) -{ - static_assert(mod != 0); - return isValidSimm(value / mod) && (value % mod == 0); -} - // Returns true if 'imm' is a valid broadcast immediate for some SVE DUP variants static bool isValidBroadcastImm(ssize_t imm, emitAttr laneSize) { @@ -1085,6 +1069,22 @@ static bool canEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL, em // true if 'immDbl' can be encoded using a 'float immediate', also returns the encoding if wbFPI is non-null static bool canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI = nullptr); +// Returns true if 'value' is a legal signed immediate with 'bits' number of bits. +template +static bool isValidSimm(ssize_t value) +{ + constexpr ssize_t max = 1 << (bits - 1); + return (-max <= value) && (value < max); +} + +// Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits. +template +static bool isValidSimm_MultipleOf(ssize_t value) +{ + static_assert(mod != 0); + return isValidSimm(value / mod) && (value % mod == 0); +} + // Returns the number of bits used by the given 'size'. inline static unsigned getBitWidth(emitAttr size) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a0a8e62c337a40..87fa4bd92015e9 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3336,6 +3336,19 @@ unsigned Compiler::gtHashValue(GenTree* tree) break; } +#if defined(TARGET_ARM64) + case TYP_SIMD: + { + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdScalableVal.gtSimdScalableKind); + add = genTreeHashAdd(ulo32(add), vecCon->gtSimdScalableVal.gtSimdScalableBaseType); + add = genTreeHashAdd(ulo32(add), ulo32(vecCon->gtSimdScalableVal.gtSimdScalableIndex)); + add = genTreeHashAdd(ulo32(add), uhi32(vecCon->gtSimdScalableVal.gtSimdScalableIndex)); + add = genTreeHashAdd(ulo32(add), ulo32(vecCon->gtSimdScalableVal.gtSimdScalableStep)); + add = genTreeHashAdd(ulo32(add), uhi32(vecCon->gtSimdScalableVal.gtSimdScalableStep)); + break; + } +#endif // TARGET_ARM64 + default: { unreached(); @@ -9328,16 +9341,41 @@ GenTree* Compiler::gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle) #if defined(FEATURE_SIMD) GenTreeVecCon* Compiler::gtNewVconNode(var_types type) { +#if defined(TARGET_ARM64) + assert(type != TYP_SIMD); +#endif // defined(TARGET_ARM64) + GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type); return vecCon; } GenTreeVecCon* Compiler::gtNewVconNode(var_types type, void* data) { +#if defined(TARGET_ARM64) + assert(type != TYP_SIMD); +#endif // defined(TARGET_ARM64) + GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type); memcpy(&vecCon->gtSimdVal, data, genTypeSize(type)); return vecCon; } + +#if defined(TARGET_ARM64) +GenTreeVecCon* Compiler::gtNewSimdVconNode( + var_types type, var_types baseType, SimdScalableKind kind, uint64_t index, uint64_t step) +{ + assert(type == TYP_SIMD); + assert(!varTypeIsSIMD(baseType)); + + GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type); + vecCon->gtSimdScalableVal.gtSimdScalableKind = kind; + vecCon->gtSimdScalableVal.gtSimdScalableBaseType = baseType; + vecCon->gtSimdScalableVal.gtSimdScalableIndex = index; + vecCon->gtSimdScalableVal.gtSimdScalableStep = step; + return vecCon; +} +#endif // TARGET_ARM64 + #endif // FEATURE_SIMD #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -9346,6 +9384,16 @@ GenTreeMskCon* Compiler::gtNewMskConNode(var_types type) GenTreeMskCon* mskCon = new (this, GT_CNS_MSK) GenTreeMskCon(type); return mskCon; } + +#if defined(TARGET_ARM64) +GenTreeMskCon* Compiler::gtNewMskConNode(var_types type, var_types baseType, bool index) +{ + GenTreeMskCon* mskCon = new (this, GT_CNS_MSK) GenTreeMskCon(type); + mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType = baseType; + mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableIndex = index; + return mskCon; +} +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS GenTree* Compiler::gtNewAllBitsSetConNode(var_types type) @@ -9395,6 +9443,12 @@ GenTree* Compiler::gtNewAllBitsSetConNode(var_types type) GenTree* Compiler::gtNewZeroConNode(var_types type) { #ifdef FEATURE_SIMD +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) + { + return gtNewSimdVconNode(type, TYP_BYTE, SimdScalableRepeated, 0); + } +#endif // TARGET_ARM64 if (varTypeIsSIMD(type)) { GenTreeVecCon* vecCon = gtNewVconNode(type); @@ -9681,6 +9735,14 @@ GenTree* Compiler::gtNewConWithPattern(var_types type, uint8_t pattern) memset(&node->gtSimdVal, pattern, sizeof(node->gtSimdVal)); return node; } + +#if defined(TARGET_ARM64) + case TYP_SIMD: + { + return gtNewSimdVconNode(type, TYP_BYTE, SimdScalableRepeated, pattern); + } +#endif // TARGET_ARM64 + #endif // FEATURE_SIMD default: @@ -13604,6 +13666,40 @@ void Compiler::gtDispConst(GenTree* tree) break; } +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + printf("%-6s ", varTypeName(vecCon->gtSimdScalableVal.gtSimdScalableBaseType)); + + switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) + { + case SimdScalableRepeated: + printf("<0x%016llx, 0x%016llx, 0x%016llx... >", + vecCon->gtSimdScalableVal.gtSimdScalableIndex, + vecCon->gtSimdScalableVal.gtSimdScalableIndex, + vecCon->gtSimdScalableVal.gtSimdScalableIndex); + break; + + case SimdScalableSequence: + { + uint64_t index = vecCon->gtSimdScalableVal.gtSimdScalableIndex; + printf("<0x%016llx, ", index); + index += vecCon->gtSimdScalableVal.gtSimdScalableStep; + printf("0x%016llx, ", index); + index += vecCon->gtSimdScalableVal.gtSimdScalableStep; + printf("0x%016llx...>", index); + break; + } + + case SimdScalableScalar: + printf("<0x%016llx, 0x0, 0x0... >", vecCon->gtSimdScalableVal.gtSimdScalableIndex); + break; + + default: + unreached(); + } + break; + } #endif // TARGET_XARCH default: @@ -13620,6 +13716,17 @@ void Compiler::gtDispConst(GenTree* tree) case GT_CNS_MSK: { GenTreeMskCon* mskCon = tree->AsMskCon(); +#if defined(TARGET_ARM64) && defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + printf("%-6s <0x%x, 0x%x, 0x%x...>", + varTypeName(mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType), + mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableIndex, + mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableIndex, + mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableIndex); + break; + } +#endif // TARGET_ARM64 && DEBUG printf("<0x%08x, 0x%08x>", mskCon->gtSimdMaskVal.u32[0], mskCon->gtSimdMaskVal.u32[1]); break; } @@ -14392,7 +14499,14 @@ void Compiler::gtDispTree(GenTree* tree, case GT_HWINTRINSIC: { GenTreeHWIntrinsic* node = tree->AsHWIntrinsic(); - printf(" %u", node->GetSimdSize()); + if (node->GetSimdSize() == SIZE_UNKNOWN) + { + printf(" SCALABLE"); + } + else + { + printf(" %u", node->GetSimdSize()); + } if (node->GetSimdBaseType() != TYP_UNKNOWN) { printf(" %s", varTypeName(node->GetSimdBaseType())); @@ -23979,93 +24093,123 @@ GenTree* Compiler::gtNewSimdCndSelNode( // GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic hwIntrinsicID = NI_Vector128_Create; - if (op1->IsIntegralConst() || op1->IsCnsFltOrDbl()) { - GenTreeVecCon* vecCon = gtNewVconNode(type); - - switch (simdBaseType) +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) { - case TYP_BYTE: - case TYP_UBYTE: - { - uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + GenTreeVecCon* scalableVecCon = gtNewSimdVconNode(type, simdBaseType, SimdScalableRepeated, 0); - for (unsigned i = 0; i < simdSize; i++) - { - vecCon->gtSimdVal.u8[i] = cnsVal; - } - break; + if (varTypeIsIntegral(simdBaseType)) + { + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndex = + static_cast(op1->AsIntConCommon()->IntegralValue()); } - - case TYP_SHORT: - case TYP_USHORT: + else if (simdBaseType == TYP_FLOAT) { - uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - - for (unsigned i = 0; i < (simdSize / 2); i++) - { - vecCon->gtSimdVal.u16[i] = cnsVal; - } - break; + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF32[0] = + static_cast(op1->AsDblCon()->DconValue()); } - - case TYP_INT: - case TYP_UINT: + else if (simdBaseType == TYP_DOUBLE) { - uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF64[0] = + static_cast(op1->AsDblCon()->DconValue()); + } + else + { + unreached(); + } + return scalableVecCon; + } + else +#endif // TARGET_ARM64 + { + GenTreeVecCon* vecCon = gtNewVconNode(type); - for (unsigned i = 0; i < (simdSize / 4); i++) + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: { - vecCon->gtSimdVal.u32[i] = cnsVal; + uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + + for (unsigned i = 0; i < simdSize; i++) + { + vecCon->gtSimdVal.u8[i] = cnsVal; + } + break; } - break; - } - case TYP_LONG: - case TYP_ULONG: - { - uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + case TYP_SHORT: + case TYP_USHORT: + { + uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - for (unsigned i = 0; i < (simdSize / 8); i++) + for (unsigned i = 0; i < (simdSize / 2); i++) + { + vecCon->gtSimdVal.u16[i] = cnsVal; + } + break; + } + + case TYP_INT: + case TYP_UINT: { - vecCon->gtSimdVal.u64[i] = cnsVal; + uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + + for (unsigned i = 0; i < (simdSize / 4); i++) + { + vecCon->gtSimdVal.u32[i] = cnsVal; + } + break; } - break; - } - case TYP_FLOAT: - { - float cnsVal = static_cast(op1->AsDblCon()->DconValue()); + case TYP_LONG: + case TYP_ULONG: + { + uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - for (unsigned i = 0; i < (simdSize / 4); i++) + for (unsigned i = 0; i < (simdSize / 8); i++) + { + vecCon->gtSimdVal.u64[i] = cnsVal; + } + break; + } + + case TYP_FLOAT: { - vecCon->gtSimdVal.f32[i] = cnsVal; + float cnsVal = static_cast(op1->AsDblCon()->DconValue()); + + for (unsigned i = 0; i < (simdSize / 4); i++) + { + vecCon->gtSimdVal.f32[i] = cnsVal; + } + break; } - break; - } - case TYP_DOUBLE: - { - double cnsVal = static_cast(op1->AsDblCon()->DconValue()); + case TYP_DOUBLE: + { + double cnsVal = static_cast(op1->AsDblCon()->DconValue()); - for (unsigned i = 0; i < (simdSize / 8); i++) + for (unsigned i = 0; i < (simdSize / 8); i++) + { + vecCon->gtSimdVal.f64[i] = cnsVal; + } + break; + } + + default: { - vecCon->gtSimdVal.f64[i] = cnsVal; + unreached(); } - break; } - default: - { - unreached(); - } + return vecCon; } - - return vecCon; } + NamedIntrinsic hwIntrinsicID = NI_Vector128_Create; + #if defined(TARGET_XARCH) if (simdSize == 64) { @@ -24080,6 +24224,10 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, GenTree* op1, va { hwIntrinsicID = NI_Vector64_Create; } + else if (simdSize == SIZE_UNKNOWN) + { + hwIntrinsicID = NI_VectorT_Create; + } #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -24101,70 +24249,100 @@ GenTree* Compiler::gtNewSimdCreateBroadcastNode(var_types type, GenTree* op1, va // GenTree* Compiler::gtNewSimdCreateScalarNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic hwIntrinsicID = NI_Vector128_CreateScalar; - if (op1->IsIntegralConst() || op1->IsCnsFltOrDbl()) { - GenTreeVecCon* vecCon = gtNewVconNode(type); - vecCon->gtSimdVal = {}; - - switch (simdBaseType) +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) { - case TYP_BYTE: - case TYP_UBYTE: - { - uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimdVal.u8[0] = cnsVal; - break; - } + GenTreeVecCon* scalableVecCon = gtNewSimdVconNode(type, simdBaseType, SimdScalableScalar, 0); - case TYP_SHORT: - case TYP_USHORT: + if (varTypeIsIntegral(simdBaseType)) { - uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimdVal.u16[0] = cnsVal; - break; + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndex = + static_cast(op1->AsIntConCommon()->IntegralValue()); } - - case TYP_INT: - case TYP_UINT: + else if (simdBaseType == TYP_FLOAT) { - uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimdVal.u32[0] = cnsVal; - break; + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF32[0] = + static_cast(op1->AsDblCon()->DconValue()); } - - case TYP_LONG: - case TYP_ULONG: + else if (simdBaseType == TYP_DOUBLE) { - uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - vecCon->gtSimdVal.u64[0] = cnsVal; - break; + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF64[0] = + static_cast(op1->AsDblCon()->DconValue()); } - - case TYP_FLOAT: + else { - float cnsVal = static_cast(op1->AsDblCon()->DconValue()); - vecCon->gtSimdVal.f32[0] = cnsVal; - break; + unreached(); } + return scalableVecCon; + } + else +#endif // TARGET_ARM64 + { + GenTreeVecCon* vecCon = gtNewVconNode(type); + vecCon->gtSimdVal = {}; - case TYP_DOUBLE: + switch (simdBaseType) { - double cnsVal = static_cast(op1->AsDblCon()->DconValue()); - vecCon->gtSimdVal.f64[0] = cnsVal; - break; - } + case TYP_BYTE: + case TYP_UBYTE: + { + uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u8[0] = cnsVal; + break; + } - default: - { - unreached(); + case TYP_SHORT: + case TYP_USHORT: + { + uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u16[0] = cnsVal; + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u32[0] = cnsVal; + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + vecCon->gtSimdVal.u64[0] = cnsVal; + break; + } + + case TYP_FLOAT: + { + float cnsVal = static_cast(op1->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f32[0] = cnsVal; + break; + } + + case TYP_DOUBLE: + { + double cnsVal = static_cast(op1->AsDblCon()->DconValue()); + vecCon->gtSimdVal.f64[0] = cnsVal; + break; + } + + default: + { + unreached(); + } } - } - return vecCon; + return vecCon; + } } + NamedIntrinsic hwIntrinsicID = NI_Vector128_CreateScalar; + #if defined(TARGET_XARCH) if (simdSize == 32) { @@ -24179,6 +24357,10 @@ GenTree* Compiler::gtNewSimdCreateScalarNode(var_types type, GenTree* op1, var_t { hwIntrinsicID = (genTypeSize(simdBaseType) == 8) ? NI_Vector64_Create : NI_Vector64_CreateScalar; } + else if (simdSize == SIZE_UNKNOWN) + { + hwIntrinsicID = NI_VectorT_CreateScalar; + } #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -24206,99 +24388,129 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, var_types simdBaseType, unsigned simdSize) { - NamedIntrinsic hwIntrinsicID = NI_Vector128_CreateScalarUnsafe; - if (op1->IsIntegralConst() || op1->IsCnsFltOrDbl()) { - GenTreeVecCon* vecCon = gtNewVconNode(type); - // Since the upper bits are considered non-deterministic and we can therefore // set them to anything, we broadcast the value. // // We do this as it simplifies the logic and allows certain code paths to // have better codegen, such as for 0, AllBitsSet, or certain small constants - switch (simdBaseType) +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) { - case TYP_BYTE: - case TYP_UBYTE: - { - uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + GenTreeVecCon* scalableVecCon = gtNewSimdVconNode(type, simdBaseType, SimdScalableRepeated, 0); - for (unsigned i = 0; i < simdSize; i++) - { - vecCon->gtSimdVal.u8[i] = cnsVal; - } - break; + if (varTypeIsIntegral(simdBaseType)) + { + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndex = + static_cast(op1->AsIntConCommon()->IntegralValue()); } - - case TYP_SHORT: - case TYP_USHORT: + else if (simdBaseType == TYP_FLOAT) { - uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - - for (unsigned i = 0; i < (simdSize / 2); i++) - { - vecCon->gtSimdVal.u16[i] = cnsVal; - } - break; + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF32[0] = + static_cast(op1->AsDblCon()->DconValue()); } - - case TYP_INT: - case TYP_UINT: + else if (simdBaseType == TYP_DOUBLE) + { + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF64[0] = + static_cast(op1->AsDblCon()->DconValue()); + } + else { - uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + unreached(); + } + return scalableVecCon; + } + else +#endif // TARGET_ARM64 + { + GenTreeVecCon* vecCon = gtNewVconNode(type); - for (unsigned i = 0; i < (simdSize / 4); i++) + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: { - vecCon->gtSimdVal.u32[i] = cnsVal; + uint8_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + + for (unsigned i = 0; i < simdSize; i++) + { + vecCon->gtSimdVal.u8[i] = cnsVal; + } + break; } - break; - } - case TYP_LONG: - case TYP_ULONG: - { - uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + case TYP_SHORT: + case TYP_USHORT: + { + uint16_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - for (unsigned i = 0; i < (simdSize / 8); i++) + for (unsigned i = 0; i < (simdSize / 2); i++) + { + vecCon->gtSimdVal.u16[i] = cnsVal; + } + break; + } + + case TYP_INT: + case TYP_UINT: { - vecCon->gtSimdVal.u64[i] = cnsVal; + uint32_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); + + for (unsigned i = 0; i < (simdSize / 4); i++) + { + vecCon->gtSimdVal.u32[i] = cnsVal; + } + break; } - break; - } - case TYP_FLOAT: - { - float cnsVal = static_cast(op1->AsDblCon()->DconValue()); + case TYP_LONG: + case TYP_ULONG: + { + uint64_t cnsVal = static_cast(op1->AsIntConCommon()->IntegralValue()); - for (unsigned i = 0; i < (simdSize / 4); i++) + for (unsigned i = 0; i < (simdSize / 8); i++) + { + vecCon->gtSimdVal.u64[i] = cnsVal; + } + break; + } + + case TYP_FLOAT: { - vecCon->gtSimdVal.f32[i] = cnsVal; + float cnsVal = static_cast(op1->AsDblCon()->DconValue()); + + for (unsigned i = 0; i < (simdSize / 4); i++) + { + vecCon->gtSimdVal.f32[i] = cnsVal; + } + break; } - break; - } - case TYP_DOUBLE: - { - double cnsVal = static_cast(op1->AsDblCon()->DconValue()); + case TYP_DOUBLE: + { + double cnsVal = static_cast(op1->AsDblCon()->DconValue()); - for (unsigned i = 0; i < (simdSize / 8); i++) + for (unsigned i = 0; i < (simdSize / 8); i++) + { + vecCon->gtSimdVal.f64[i] = cnsVal; + } + break; + } + + default: { - vecCon->gtSimdVal.f64[i] = cnsVal; + unreached(); } - break; } - default: - { - unreached(); - } + return vecCon; } - - return vecCon; } + NamedIntrinsic hwIntrinsicID = NI_Vector128_CreateScalarUnsafe; + #if defined(TARGET_XARCH) if (simdSize == 32) { @@ -24313,6 +24525,10 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, { hwIntrinsicID = (genTypeSize(simdBaseType) == 8) ? NI_Vector64_Create : NI_Vector64_CreateScalarUnsafe; } + else if (simdSize == SIZE_UNKNOWN) + { + hwIntrinsicID = NI_VectorT_CreateScalarUnsafe; + } #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -24336,12 +24552,6 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, GenTree* Compiler::gtNewSimdCreateSequenceNode( var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) { - // This effectively does: (Indices * op2) + Create(op1) - // - // When both op2 and op1 are constant we can fully fold this to a constant. Additionally, - // if only op2 is a constant we can simplify the computation by a lot. However, if only op1 - // is constant than there isn't any real optimization we can do and we need the full computation. - assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24350,8 +24560,55 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( GenTree* result = nullptr; bool isPartial = true; +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) + { + // Only optimizatize when both op1 and op2 are constant + if (op1->OperIsConst() && op2->OperIsConst()) + { + GenTreeVecCon* scalableVecCon = gtNewSimdVconNode(type, simdBaseType, SimdScalableSequence, 0); + + if (varTypeIsIntegral(simdBaseType)) + { + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndex = + static_cast(op1->AsIntConCommon()->IntegralValue()); + scalableVecCon->gtSimdScalableVal.gtSimdScalableStep = + static_cast(op2->AsIntConCommon()->IntegralValue()); + } + else if (simdBaseType == TYP_FLOAT) + { + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF32[0] = + static_cast(op1->AsDblCon()->DconValue()); + scalableVecCon->gtSimdScalableVal.gtSimdScalableStepF32[0] = + static_cast(op2->AsDblCon()->DconValue()); + } + else if (simdBaseType == TYP_DOUBLE) + { + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF64[0] = + static_cast(op1->AsDblCon()->DconValue()); + scalableVecCon->gtSimdScalableVal.gtSimdScalableStepF64[0] = + static_cast(op2->AsDblCon()->DconValue()); + } + else + { + unreached(); + } + return scalableVecCon; + } + + // SVE can do this in a single instruction + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_VectorT_CreateSequence, simdBaseType, simdSize); + } +#endif // TARGET_ARM64 + if (op2->OperIsConst()) { + // This effectively does: (Indices * op2) + Create(op1) + // + // When both op2 and op1 are constant we can fully fold this to a constant. Additionally, + // if only op2 is a constant we can simplify the computation by a lot. However, if only op1 + // is constant than there isn't any real optimization we can do and we need the full computation. + GenTreeVecCon* vcon = gtNewVconNode(type); uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); @@ -33429,10 +33686,23 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) { GenTreeMskCon* mskCon = cnsNode->AsMskCon(); - simd_t simdVal; - EvaluateSimdCvtMaskToVector(simdBaseType, &simdVal, mskCon->gtSimdMaskVal); +#if defined(TARGET_ARM64) + if (retType == TYP_SIMD) + { + simdscalable_t vecConSimd; + if (EvaluateSimdCvtScalableMaskToVector(simdBaseType, &vecConSimd, mskCon->gtSimdScalableMaskVal)) + { + resultNode = gtNewSimdVconNode(retType, &vecConSimd); + } + } + else +#endif // defined(TARGET_ARM64) + { + simd_t simdVal; + EvaluateSimdCvtMaskToVector(simdBaseType, &simdVal, mskCon->gtSimdMaskVal); - resultNode = gtNewVconNode(retType, &simdVal); + resultNode = gtNewVconNode(retType, &simdVal); + } } #if defined(TARGET_XARCH) else if (tree->OperIsConvertVectorToMask()) @@ -34184,7 +34454,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } // Handle `x & AllBitsSet == x` and `AllBitsSet & x == x` - if (cnsNode->IsMaskAllBitsSet()) + if (cnsNode->IsMaskAllBitsSet(simdBaseType)) { resultNode = otherNode; } @@ -34497,7 +34767,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } // Handle `x | AllBitsSet == AllBitsSet` and `AllBitsSet | x == AllBitsSet` - if (cnsNode->IsMaskAllBitsSet()) + if (cnsNode->IsMaskAllBitsSet(simdBaseType)) { resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT); } @@ -35074,17 +35344,19 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } //------------------------------------------------------------------------------ -// gtFoldExprConvertVecCnsToMask: Folds a constant vector plus conversion to -// mask into a constant mask. +// gtFoldExprConvertVecCnsToMask: Attempts to folds a constant vector plus +// conversion to mask into a constant mask. // // Arguments: // tree - The convert vector to mask node // vecCon - The vector constant converted by the convert // // Return Value: -// Returns a constant mask +// Returns a constant mask or the original tree +// +// This may only fail to convert for vectors of TYP_SIMD // -GenTreeMskCon* Compiler::gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon) +GenTree* Compiler::gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon) { assert(tree->OperIsConvertVectorToMask()); assert(vecCon == tree->Op(1) || vecCon == tree->Op(2)); @@ -35125,6 +35397,18 @@ GenTreeMskCon* Compiler::gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val); break; } + +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + if (!EvaluateSimdCvtScalableVectorToMask(simdBaseType, &mskCon->gtSimdScalableMaskVal, + vecCon->gtSimdScalableVal)) + { + // Could not be converted to a mask. + return tree; + } + break; + } #endif // TARGET_XARCH default: @@ -35390,3 +35674,44 @@ GenTree* GenTree::gtFirstNodeInOperandOrder() return op; } + +//------------------------------------------------------------------------ +// IsTrueMask: Is the given node a true mask +// +// Arguments: +// simdBaseType - the base type of the mask +// +// Returns true if the node is a true mask for the given simdBaseType. +// +// Note that a byte true mask (1111...) is different to an int true mask +// (10001000...), therefore the simdBaseType of the mask needs to be +// taken into account. +// +bool GenTree::IsTrueMask(var_types simdBaseType) const +{ +#ifdef TARGET_ARM64 + // This should only be called when a mask is expected + assert(TypeGet() == TYP_MASK); + + if (IsCnsMsk()) + { +#if defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + if (AsMskCon()->gtSimdScalableMaskVal.gtSimdMaskScalableIndex != 1) + { + return false; + } + + // A true mask can be used with a larger type. Eg: A short true mask will be valid for a long vector. + var_types maskBaseType = AsMskCon()->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType; + + return (genTypeSize(maskBaseType) >= genTypeSize(simdBaseType)); + } +#endif // DEBUG + return SveMaskPatternAll == EvaluateSimdMaskToPattern(simdBaseType, AsMskCon()->gtSimdMaskVal); + } +#endif + + return false; +} diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index e55073378cddd9..a44e1cf2ce4503 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1960,8 +1960,8 @@ struct GenTree inline bool IsVectorAllBitsSet() const; inline bool IsVectorBroadcast(var_types simdBaseType) const; inline bool IsMaskZero() const; - inline bool IsMaskAllBitsSet() const; - inline bool IsTrueMask(var_types simdBaseType) const; + inline bool IsMaskAllBitsSet(var_types simdBaseType = TYP_BYTE) const; + bool IsTrueMask(var_types simdBaseType) const; inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType); @@ -6852,6 +6852,8 @@ struct GenTreeVecCon : public GenTree #if defined(TARGET_XARCH) simd32_t gtSimd32Val; simd64_t gtSimd64Val; +#elif defined(TARGET_ARM64) + simdscalable_t gtSimdScalableVal; #endif // TARGET_XARCH simd_t gtSimdVal; @@ -7268,6 +7270,11 @@ struct GenTreeVecCon : public GenTree return gtSimd64Val.IsAllBitsSet(); } +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + return gtSimdScalableVal.IsAllBitsSet(); + } #endif // TARGET_XARCH default: @@ -7316,6 +7323,11 @@ struct GenTreeVecCon : public GenTree return left->gtSimd64Val == right->gtSimd64Val; } +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + return left->gtSimdScalableVal == right->gtSimdScalableVal; + } #endif // TARGET_XARCH default: @@ -7359,6 +7371,11 @@ struct GenTreeVecCon : public GenTree return gtSimd64Val.IsZero(); } +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + return gtSimdScalableVal.IsZero(); + } #endif // TARGET_XARCH default: @@ -7513,6 +7530,9 @@ struct GenTreeVecCon : public GenTree #if defined(TARGET_XARCH) assert(sizeof(simd_t) == sizeof(simd64_t)); +#elif defined(TARGET_ARM64) + assert(sizeof(simd_t) == sizeof(simd32_t)); + assert(sizeof(simd_t) >= sizeof(simdscalable_t)); #else assert(sizeof(simd_t) == sizeof(simd16_t)); #endif @@ -7531,24 +7551,56 @@ struct GenTreeVecCon : public GenTree // struct GenTreeMskCon : public GenTree { - simdmask_t gtSimdMaskVal; + union + { + simdmask_t gtSimdMaskVal; + +#if defined(TARGET_ARM64) + // Variable length masks can not be differentiated by type, as only TYP_MASK is used. + // Instead, we assume masks are always fixed length (when JitUseScalableVectorT is not set) + // or always unknown length (when JitUseScalableVectorT is set). + // TODO-SVE: Eventually all masks on Arm64 should be scalable + simdmaskscalable_t gtSimdScalableMaskVal; +#endif // TARGET_ARM64 + }; void EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType, unsigned simdSize); void EvaluateBinaryInPlace( genTreeOps oper, bool scalar, var_types baseType, unsigned simdSize, GenTreeMskCon* other); - bool IsAllBitsSet() const + bool IsAllBitsSet(var_types simdBaseType = TYP_BYTE) const { +#if defined(TARGET_ARM64) && defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + return gtSimdScalableMaskVal.IsAllBitsSet(simdBaseType); + } +#endif // TARGET_ARM64 && DEBUG + return gtSimdMaskVal.IsAllBitsSet(); } static bool Equals(const GenTreeMskCon* left, const GenTreeMskCon* right) { +#if defined(TARGET_ARM64) && defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + return left->gtSimdScalableMaskVal == right->gtSimdScalableMaskVal; + } +#endif // TARGET_ARM64 && DEBUG + return left->gtSimdMaskVal == right->gtSimdMaskVal; } bool IsZero() const { +#if defined(TARGET_ARM64) && defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + return gtSimdScalableMaskVal.IsZero(); + } +#endif // TARGET_ARM64 && DEBUG + return gtSimdMaskVal.IsZero(); } @@ -7560,6 +7612,10 @@ struct GenTreeMskCon : public GenTree // Some uses of GenTreeMskCon do not specify all bits in the mask they are using but failing to zero out the // buffer will cause determinism issues with the compiler. memset(>SimdMaskVal, 0, sizeof(gtSimdMaskVal)); + +#if defined(TARGET_ARM64) + assert(sizeof(simdmask_t) >= sizeof(simdmaskscalable_t)); +#endif } #if DEBUGGABLE_GENTREE @@ -9795,49 +9851,28 @@ inline bool GenTree::IsMaskZero() const } //------------------------------------------------------------------- -// IsMaskAllBitsSet: returns true if this node is a mask constant with all bits set. +// IsMaskAllBitsSet: returns true if this node is a mask constant +// with all bits set for the given type +// +// Arguments: +// simdBaseType - the base type to check aginst // // Returns: // True if this node is a mask constant with all bits set +// for the given type // -inline bool GenTree::IsMaskAllBitsSet() const +inline bool GenTree::IsMaskAllBitsSet(var_types simdBaseType) const { #if defined(FEATURE_MASKED_HW_INTRINSICS) if (IsCnsMsk()) { - return AsMskCon()->IsAllBitsSet(); + return AsMskCon()->IsAllBitsSet(simdBaseType); } #endif // FEATURE_MASKED_HW_INTRINSICS return false; } -//------------------------------------------------------------------------ -// IsTrueMask: Is the given node a true mask -// -// Arguments: -// simdBaseType - the base type of the mask -// -// Returns true if the node is a true mask for the given simdBaseType. -// -// Note that a byte true mask (1111...) is different to an int true mask -// (10001000...), therefore the simdBaseType of the mask needs to be -// taken into account. -// -inline bool GenTree::IsTrueMask(var_types simdBaseType) const -{ -#ifdef TARGET_ARM64 - // TODO-SVE: For agnostic VL, vector type may not be simd16_t - - if (IsCnsMsk()) - { - return SveMaskPatternAll == EvaluateSimdMaskToPattern(simdBaseType, AsMskCon()->gtSimdMaskVal); - } -#endif - - return false; -} - //------------------------------------------------------------------- // GetIntegralVectorConstElement: Gets the value of a given element in an integral vector constant // @@ -9851,6 +9886,32 @@ inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index, var_types s { const GenTreeVecCon* node = AsVecCon(); +#if defined(TARGET_ARM64) + if (TypeGet() == TYP_SIMD) + { + // TODO-SVE: For now only support matching types. + assert(simdBaseType == node->gtSimdScalableVal.gtSimdScalableBaseType); + + switch (node->gtSimdScalableVal.gtSimdScalableKind) + { + case SimdScalableRepeated: + return node->gtSimdScalableVal.gtSimdScalableIndex; + + case SimdScalableSequence: + return node->gtSimdScalableVal.gtSimdScalableIndex + + (node->gtSimdScalableVal.gtSimdScalableStep * index); + + case SimdScalableScalar: + return (index == 0) ? node->gtSimdScalableVal.gtSimdScalableIndex : 0; + break; + + default: + unreached(); + break; + } + } +#endif + switch (simdBaseType) { case TYP_BYTE: diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 6ed6541fb309f3..ad19f6393dc089 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -930,6 +930,7 @@ struct HWIntrinsicInfo switch (id) { #if defined(TARGET_ARM64) + case NI_VectorT_Create: case NI_Vector64_Create: #endif // TARGET_ARM64 case NI_Vector128_Create: @@ -948,6 +949,7 @@ struct HWIntrinsicInfo switch (id) { #if defined(TARGET_ARM64) + case NI_VectorT_CreateScalar: case NI_Vector64_CreateScalar: #endif // TARGET_ARM64 case NI_Vector128_CreateScalar: @@ -966,6 +968,7 @@ struct HWIntrinsicInfo switch (id) { #if defined(TARGET_ARM64) + case NI_VectorT_CreateScalarUnsafe: case NI_Vector64_CreateScalarUnsafe: #endif // TARGET_ARM64 case NI_Vector128_CreateScalarUnsafe: diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 7275c2ffe4f305..972103ed5942c0 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1303,8 +1303,17 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_VectorT_Create: + { + assert(sig->numArgs == 1); + op1 = impPopStack().val; + retNode = gtNewSimdCreateBroadcastNode(retType, op1, simdBaseType, simdSize); + break; + } + case NI_Vector64_CreateScalar: case NI_Vector128_CreateScalar: + case NI_VectorT_CreateScalar: { assert(sig->numArgs == 1); @@ -1315,6 +1324,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_CreateSequence: case NI_Vector128_CreateSequence: + case NI_VectorT_CreateSequence: { assert(sig->numArgs == 2); @@ -1336,6 +1346,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_CreateScalarUnsafe: case NI_Vector128_CreateScalarUnsafe: + case NI_VectorT_CreateScalarUnsafe: { assert(sig->numArgs == 1); @@ -2914,6 +2925,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Sve_CreateFalseMaskUInt64: { // Import as a constant vector 0 + +#if defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + retNode = gtNewSimdVconNode(retType, simdBaseType, SimdScalableRepeated, 0); + break; + } +#endif // DEBUG + GenTreeVecCon* vecCon = gtNewVconNode(retType); vecCon->gtSimdVal = simd_t::Zero(); retNode = vecCon; @@ -2934,20 +2954,36 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); op1 = impPopStack().val; - // Where possible, import a constant mask to allow for optimisations. + // Where possible, import a constant vector to allow for optimisations. + + // For "all true" masks, import as a constant simd vector to allow for optimisations if (op1->IsIntegralConst()) { int64_t pattern = op1->AsIntConCommon()->IntegralValue(); - simd_t simdVal; - if (EvaluateSimdPatternToVector(simdBaseType, &simdVal, (SveMaskPattern)pattern)) +#if defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) { - retNode = gtNewVconNode(retType, &simdVal); - break; + if ((pattern == SVE_PATTERN_ALL) || (pattern == SVE_PATTERN_POW2)) + { + retNode = gtNewSimdVconNode(retType, simdBaseType, SimdScalableRepeated, 1); + break; + } + } + else +#endif /// DEBUG + { + simd_t simdVal; + + if (EvaluateSimdPatternToVector(simdBaseType, &simdVal, (SveMaskPattern)pattern)) + { + retNode = gtNewVconNode(retType, &simdVal); + break; + } } } - // Was not able to generate a pattern, instead import a truemaskall + // Was not able to generate a pattern, instead import the intrinsic node. retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, intrinsic, simdBaseType, simdSize); break; } @@ -3567,7 +3603,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdAllTrueMaskNode: Create a mask with all bits set to true +// gtNewSimdTrueMaskNode: Create a mask with all bits set to true for +// the base type. +// Eg: A u64int mask would be 10001000... // // Arguments: // simdBaseType -- the base type of the nodes being masked @@ -3575,13 +3613,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // Return Value: // The mask // -GenTree* Compiler::gtNewSimdAllTrueMaskNode(var_types simdBaseType) +GenTree* Compiler::gtNewSimdTrueMaskNode(var_types simdBaseType) { // Import as a constant mask - GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); +#if defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + return gtNewMskConNode(TYP_MASK, simdBaseType, 1); + } +#endif // DEBUG - // TODO-SVE: For agnostic VL, vector type may not be simd16_t + GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); bool found = EvaluateSimdPatternToMask(simdBaseType, &mskCon->gtSimdMaskVal, SveMaskPatternAll); assert(found); @@ -3598,6 +3641,14 @@ GenTree* Compiler::gtNewSimdAllTrueMaskNode(var_types simdBaseType) GenTree* Compiler::gtNewSimdFalseMaskByteNode() { // Import as a constant mask 0 + +#if defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + return gtNewMskConNode(TYP_MASK, TYP_BYTE, 0); + } +#endif // DEBUG + GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); mskCon->gtSimdMaskVal = simdmask_t::Zero(); return mskCon; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index bba6a1630c015e..87494522767f18 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -3068,6 +3068,23 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_VectorT_Create: + case NI_VectorT_CreateScalarUnsafe: + { + emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + GetEmitter()->emitInsSve_R_R(ins, emitSize, targetReg, op1Reg, opt); + break; + } + + case NI_VectorT_CreateSequence: + { + emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + + // Predicated merge broadcast of the constant + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + } + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 84e490e6490188..0b95de31078280 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -530,8 +530,12 @@ HARDWARE_INTRINSIC(Sve, ReverseElement_Predicates, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SVE Implementation of VectorT Intrinsics -#define FIRST_NI_VectorT NI_Illegal -#define LAST_NI_VectorT NI_Illegal +#define FIRST_NI_VectorT NI_VectorT_Create +HARDWARE_INTRINSIC(VectorT, Create, -1, 1, {INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(VectorT, CreateScalar, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(VectorT, CreateScalarUnsafe, -1, 1, {INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(VectorT, CreateSequence, -1, 2, {INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index, INS_sve_index}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +#define LAST_NI_VectorT NI_VectorT_CreateSequence #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 6d9fe77b48a6cf..93ba27c4e8d443 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1121,6 +1121,16 @@ void Lowering::LowerModPow2(GenTree* node) // GenTree* Lowering::LowerCnsMask(GenTreeMskCon* mask) { + // For !JitUseScalableVectorT, we need to ensure the mask can be encoded as ptrue/pfalse. + // For JitUseScalableVectorT, constant masks use the gtSimdScalableMaskVal encoding, so are always valid. + +#if defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + return mask->gtNext; + } +#endif // DEBUG + // Try every type until a match is found if (mask->IsZero()) @@ -1157,7 +1167,7 @@ GenTree* Lowering::LowerCnsMask(GenTreeMskCon* mask) // Create a vector constant GenTreeVecCon* vecCon = m_compiler->gtNewVconNode(TYP_SIMD16); - EvaluateSimdCvtMaskToVector(TYP_BYTE, &vecCon->gtSimdVal, mask->gtSimdMaskVal); + EvaluateSimdCvtMaskToVector(TYP_BYTE, &vecCon->gtSimd16Val, mask->gtSimdMaskVal); BlockRange().InsertBefore(mask, vecCon); // Convert the vector constant to a mask @@ -1669,6 +1679,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector128_Create: case NI_Vector64_CreateScalar: case NI_Vector128_CreateScalar: + case NI_VectorT_CreateScalar: { // We don't directly support the Vector64.Create or Vector128.Create methods in codegen // and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect @@ -2021,7 +2032,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) var_types simdType = Compiler::getSIMDTypeForSize(simdSize); bool foundUse = BlockRange().TryGetUse(node, &use); - GenTree* trueMask = m_compiler->gtNewSimdAllTrueMaskNode(node->GetSimdBaseType()); + GenTree* trueMask = m_compiler->gtNewSimdTrueMaskNode(node->GetSimdBaseType()); GenTree* falseVal = m_compiler->gtNewZeroConNode(simdType); var_types nodeType = simdType; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 3236db623472cd..8c6afd657057ea 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -749,6 +749,42 @@ int LinearScan::BuildNode(GenTree* tree) { // Directly encode constant to instructions. } + if (tree->TypeIs(TYP_SIMD)) + { + // If the constant doesn't fit into the instructions, then temps will be required + switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) + { + case SimdScalableRepeated: + if (!emitter::isValidSimm<8>(vecCon->gtSimdScalableVal.gtSimdScalableIndex) && + !emitter::isValidSimm_MultipleOf<8, 256>(vecCon->gtSimdScalableVal.gtSimdScalableIndex)) + { + buildInternalIntRegisterDefForNode(tree); + } + break; + + case SimdScalableSequence: + if (!emitter::isValidSimm<5>(vecCon->gtSimdScalableVal.gtSimdScalableIndex)) + { + buildInternalIntRegisterDefForNode(tree); + } + if (!emitter::isValidSimm<5>(vecCon->gtSimdScalableVal.gtSimdScalableStep)) + { + buildInternalIntRegisterDefForNode(tree); + } + break; + + case SimdScalableScalar: + if (!emitter::isValidSimm<8>(vecCon->gtSimdScalableVal.gtSimdScalableIndex)) + { + buildInternalIntRegisterDefForNode(tree); + } + break; + + default: + unreached(); + break; + } + } else { // Reserve int to load constant from memory (IF_LARGELDC) @@ -770,7 +806,7 @@ int LinearScan::BuildNode(GenTree* tree) { GenTreeMskCon* mskCon = tree->AsMskCon(); - if (mskCon->IsAllBitsSet() || mskCon->IsZero()) + if (mskCon->IsAllBitsSet(TYP_BYTE) || mskCon->IsZero()) { // Directly encode constant to instructions. } diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 7df04efcbc1d42..ca18275eaa520f 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -472,7 +472,11 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u if (simdBaseType != TYP_UNDEF) { - assert(size == info.compCompHnd->getClassSize(typeHnd)); +#if defined(TARGET_ARM64) + assert((size == info.compCompHnd->getClassSize(typeHnd)) || (size == SIZE_UNKNOWN)); +#else + assert((size == info.compCompHnd->getClassSize(typeHnd))); +#endif // TARGET_ARM64 setUsesSIMDTypes(true); } @@ -839,4 +843,69 @@ void Compiler::impMarkContiguousSIMDFieldStores(Statement* stmt) fgPreviousCandidateSIMDFieldStoreStmt = nullptr; } } + +#if defined(TARGET_ARM64) + +bool simdscalable_t::IsAllBitsSet() const +{ + return (gtSimdScalableKind == SimdScalableRepeated) && + (gtSimdScalableIndex == (uint64_t)((1 << genTypeSize(gtSimdScalableBaseType)) - 1)); +} + +bool simdmaskscalable_t::IsAllBitsSet(var_types simdBaseType) const +{ + return (gtSimdMaskScalableIndex == 1) && (genTypeSize(simdBaseType) == genTypeSize(gtSimdMaskScalableBaseType)); +} + +bool EvaluateSimdCvtScalableVectorToMask(var_types baseType, simdmaskscalable_t* maskCon, simdscalable_t vecCon) +{ + // All zero can always be converted to a mask, regardless of types + if (vecCon.IsZero()) + { + maskCon->gtSimdMaskScalableBaseType = baseType; + maskCon->gtSimdMaskScalableIndex = 0; + return true; + } + + if (vecCon.gtSimdScalableKind != SimdScalableRepeated) + { + return false; + } + + // size of the basetype must match + if (genTypeSize(baseType) != genTypeSize(vecCon.gtSimdScalableBaseType)) + { + return false; + } + + maskCon->gtSimdMaskScalableBaseType = baseType; + maskCon->gtSimdMaskScalableIndex = (vecCon.gtSimdScalableIndex == 1); + return true; +} + +bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vecCon, simdmaskscalable_t maskCon) +{ + // All zero can always be converted to a mask, regardless of types + if (maskCon.IsZero()) + { + vecCon->gtSimdScalableBaseType = baseType; + vecCon->gtSimdScalableKind = SimdScalableRepeated; + vecCon->gtSimdScalableIndex = 0; + return true; + } + + // size of the basetype must match + // TODO: We could work around this for masks? + if (genTypeSize(baseType) != genTypeSize(vecCon->gtSimdScalableBaseType)) + { + return false; + } + + vecCon->gtSimdScalableBaseType = baseType; + vecCon->gtSimdScalableKind = SimdScalableRepeated; + vecCon->gtSimdScalableIndex = 1; + return true; +} +#endif // TARGET_ARM64 + #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 43999e89edcebf..44cab43956c88b 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -192,7 +192,6 @@ struct simd16_t }; static_assert(sizeof(simd16_t) == 16); -#if defined(TARGET_XARCH) struct simd32_t { union @@ -248,6 +247,7 @@ struct simd32_t }; static_assert(sizeof(simd32_t) == 32); +#if defined(TARGET_XARCH) struct simd64_t { union @@ -378,8 +378,11 @@ struct simdmask_t static_assert(sizeof(simdmask_t) == 8); #endif // FEATURE_MASKED_HW_INTRINSICS +// Ensure simd_t is big enough to contain any simd type #if defined(TARGET_XARCH) typedef simd64_t simd_t; +#elif defined(TARGET_ARM64) +typedef simd32_t simd_t; #else typedef simd16_t simd_t; #endif @@ -1789,6 +1792,8 @@ void EvaluateSimdCvtVectorToMask(var_types baseType, simdmask_t* result, TSimd a #if defined(TARGET_ARM64) +// TODO-SVE: Once JitUseScalableVectorT is removed, the pattern evaluation functions can be removed too. + enum SveMaskPattern { SveMaskPatternLargestPowerOf2 = 0, // The largest power of 2. @@ -2115,6 +2120,128 @@ SveMaskPattern EvaluateSimdMaskToPattern(var_types baseType, simdmask_t arg0) } } +// Functionality for handling constant vectors of unknown size + +enum SimdScalableKind : uint8_t +{ + SimdScalableRepeated, // Each lane of the vector contains the same value. + SimdScalableSequence, // Each lane of the vector increments by a step value. + SimdScalableScalar, // First lane is set. The rest of the vector is zero +}; + +struct simdscalable_t +{ + var_types gtSimdScalableBaseType; + SimdScalableKind gtSimdScalableKind; + union + { + uint8_t gtSimdScalableIndexU8[8]; + float gtSimdScalableIndexF32[2]; + double gtSimdScalableIndexF64[1]; + uint64_t gtSimdScalableIndex; + }; + union + { + uint8_t gtSimdScalableStepU8[8]; + float gtSimdScalableStepF32[2]; + double gtSimdScalableStepF64[1]; + uint64_t gtSimdScalableStep; + }; + + bool operator==(const simdscalable_t& other) const + { + if (IsZero() && other.IsZero()) + { + return true; + } + + return (gtSimdScalableBaseType == other.gtSimdScalableBaseType) && + (gtSimdScalableKind == other.gtSimdScalableKind) && (gtSimdScalableIndex == other.gtSimdScalableIndex) && + (gtSimdScalableStep == other.gtSimdScalableStep); + } + + bool operator!=(const simdscalable_t& other) const + { + return !(*this == other); + } + + static simdscalable_t AllBitsSet() + { + simdscalable_t result; + + result.gtSimdScalableBaseType = TYP_BYTE; + result.gtSimdScalableKind = SimdScalableRepeated; + result.gtSimdScalableIndex = 0xff; + + return result; + } + + bool IsZero() const + { + return (gtSimdScalableIndex == 0) && (gtSimdScalableKind != SimdScalableSequence || gtSimdScalableStep == 0); + } + + bool IsAllBitsSet() const; +}; + +static_assert(sizeof(simd_t) >= sizeof(simdscalable_t)); + +struct simdmaskscalable_t +{ + var_types gtSimdMaskScalableBaseType; + uint8_t gtSimdMaskScalableIndex; + + bool operator==(const simdmaskscalable_t& other) const + { + if (IsZero() && other.IsZero()) + { + return true; + } + + return (gtSimdMaskScalableBaseType == other.gtSimdMaskScalableBaseType) && + (gtSimdMaskScalableIndex == other.gtSimdMaskScalableIndex); + } + + bool operator!=(const simdmaskscalable_t& other) const + { + return !(*this == other); + } + + static simdmaskscalable_t AllBitsSet() + { + simdmaskscalable_t result; + + result.gtSimdMaskScalableBaseType = TYP_BYTE; + result.gtSimdMaskScalableIndex = 0xff; + + return result; + } + + bool IsZero() const + { + return gtSimdMaskScalableIndex == 0; + } + + // A type is required when checking for all bits set, as a all bits set mask + // for TYP_LONG would not be all true when used for TYP_BYTE, and instead would + // be 000100010001... + bool IsAllBitsSet(var_types simdBaseType) const; +}; + +static_assert(sizeof(simdmask_t) >= sizeof(simdmaskscalable_t)); + +bool EvaluateSimdCvtScalableVectorToMask(var_types baseType, simdmaskscalable_t* maskCon, simdscalable_t vecCon); + +bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vecCon, simdmaskscalable_t maskCon); + +template +void BroadcastConstantToSimdScalable(simdscalable_t* result, var_types baseType, TBase arg0) +{ + result->gtSimdScalableBaseType = baseType; + result->gtSimdScalableKind = SimdScalableRepeated; + memcpy(&result->gtSimdScalableIndex, &arg0, sizeof(TBase)); +} + //------------------------------------------------------------------------ // NarrowAndDuplicateSimdLong: Narrow each ULONG element in arg0 to size // TSimd. Each element is then duplicated to the number of TSimd values diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index f5584a2d6f3735..15872d1cdf33c4 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -447,6 +447,9 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc) #if defined(TARGET_XARCH) , m_simd32CnsMap(nullptr) , m_simd64CnsMap(nullptr) +#elif defined(TARGET_ARM64) + , m_simdScalableCnsMap(nullptr) + , m_simdMaskScalableCnsMap(nullptr) #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) , m_simdMaskCnsMap(nullptr) @@ -1719,6 +1722,14 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type m_defs = new (alloc) Alloc::Type[ChunkSize]; break; } + +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + m_defs = new (alloc) Alloc::Type[ChunkSize]; + break; + } + #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -1899,6 +1910,18 @@ ValueNum ValueNumStore::VNForSimd64Con(const simd64_t& cnsVal) { return VnForConst(cnsVal, GetSimd64CnsMap(), TYP_SIMD64); } + +#elif defined(TARGET_ARM64) +ValueNum ValueNumStore::VNForSimdScalableCon(const simdscalable_t& cnsVal) +{ + return VnForConst(cnsVal, GetSimdScalableCnsMap(), TYP_SIMD); +} + +ValueNum ValueNumStore::VNForSimdMaskScalableCon(const simdmaskscalable_t& cnsVal) +{ + return VnForConst(cnsVal, GetSimdMaskScalableCnsMap(), TYP_MASK); +} + #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -2263,11 +2286,23 @@ ValueNum ValueNumStore::VNAllBitsForType(var_types typ, unsigned elementCount) { return VNForSimd64Con(simd64_t::AllBitsSet()); } + +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + return VNForSimdScalableCon(simdscalable_t::AllBitsSet()); + } #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: { +#if defined(TARGET_ARM64) && defined(DEBUG) + if (JitConfig.JitUseScalableVectorT()) + { + return VNForSimdMaskScalableCon(simdmaskscalable_t::AllBitsSet()); + } +#endif // TARGET_ARM64 && DEBUG return VNForSimdMaskCon(simdmask_t::AllBitsSet(elementCount)); } #endif // FEATURE_MASKED_HW_INTRINSICS @@ -2346,6 +2381,72 @@ TSimd BroadcastConstantToSimd(ValueNumStore* vns, var_types baseType, ValueNum a return result; } +#if defined(TARGET_ARM64) +simdscalable_t BroadcastConstantToSimdScalable(ValueNumStore* vns, var_types baseType, ValueNum argVN) +{ + assert(vns->IsVNConstant(argVN)); + assert(!varTypeIsSIMD(vns->TypeOfVN(argVN))); + + simdscalable_t result = {}; + + switch (baseType) + { + case TYP_FLOAT: + { + float arg = vns->GetConstantSingle(argVN); + BroadcastConstantToSimdScalable(&result, baseType, arg); + break; + } + + case TYP_DOUBLE: + { + double arg = vns->GetConstantDouble(argVN); + BroadcastConstantToSimdScalable(&result, baseType, arg); + break; + } + + case TYP_BYTE: + case TYP_UBYTE: + { + uint8_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimdScalable(&result, baseType, arg); + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + uint16_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimdScalable(&result, baseType, arg); + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint32_t arg = static_cast(vns->GetConstantInt32(argVN)); + BroadcastConstantToSimdScalable(&result, baseType, arg); + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t arg = static_cast(vns->GetConstantInt64(argVN)); + BroadcastConstantToSimdScalable(&result, baseType, arg); + break; + } + + default: + { + unreached(); + } + } + + return result; +} +#endif // defined TARGET_ARM64 + ValueNum ValueNumStore::VNBroadcastForSimdType(var_types simdType, var_types simdBaseType, ValueNum valVN) { assert(varTypeIsSIMD(simdType)); @@ -2383,6 +2484,13 @@ ValueNum ValueNumStore::VNBroadcastForSimdType(var_types simdType, var_types sim return VNForSimd64Con(result); } +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + simdscalable_t result = BroadcastConstantToSimdScalable(this, simdBaseType, valVN); + return VNForSimdScalableCon(result); + } + #endif // TARGET_XARCH default: @@ -4053,6 +4161,18 @@ simd64_t ValueNumStore::GetConstantSimd64(ValueNum argVN) return ConstantValue(argVN); } + +#elif defined(TARGET_ARM64) +// Given a simdscalable constant value number return its value as a simdscalable. +// +simdscalable_t ValueNumStore::GetConstantSimdScalable(ValueNum argVN) +{ + assert(IsVNConstant(argVN)); + assert(TypeOfVN(argVN) == TYP_SIMD); + + return ConstantValue(argVN); +} + #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -7760,6 +7880,20 @@ simd64_t GetConstantSimd64(ValueNumStore* vns, var_types baseType, ValueNum argV return BroadcastConstantToSimd(vns, baseType, argVN); } + +#elif defined(TARGET_ARM64) +simdscalable_t GetConstantSimdScalable(ValueNumStore* vns, var_types baseType, ValueNum argVN) +{ + assert(vns->IsVNConstant(argVN)); + + if (vns->TypeOfVN(argVN) == TYP_SIMD) + { + return vns->GetConstantSimdScalable(argVN); + } + + return BroadcastConstantToSimdScalable(vns, baseType, argVN); +} + #endif // TARGET_XARCH ValueNum EvaluateUnarySimd( @@ -10710,6 +10844,41 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) cnsVal.u64[6], cnsVal.u64[7]); break; } + +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + simdscalable_t cnsVal = GetConstantSimdScalable(vn); + printf("SimdScalableCns[%-6s ", varTypeName(cnsVal.gtSimdScalableBaseType)); + + switch (cnsVal.gtSimdScalableKind) + { + case SimdScalableRepeated: + printf("0x%016llx, 0x%016llx, 0x%016llx...]", cnsVal.gtSimdScalableIndex, + cnsVal.gtSimdScalableIndex, cnsVal.gtSimdScalableIndex); + break; + + case SimdScalableSequence: + { + uint64_t index = cnsVal.gtSimdScalableIndex; + printf("0x%016llx, ", index); + index += cnsVal.gtSimdScalableStep; + printf("0x%016llx, ", index); + index += cnsVal.gtSimdScalableStep; + printf("0x%016llx...]", index); + break; + } + + case SimdScalableScalar: + printf("0x%016llx, 0x0, 0x0...]", cnsVal.gtSimdScalableIndex); + break; + + default: + unreached(); + } + break; + } + #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -12299,6 +12468,17 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) tree->gtVNPair.SetBoth(vnStore->VNForSimd64Con(simd64Val)); break; } + +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + simdscalable_t simdVal; + memcpy(&simdVal, &tree->AsVecCon()->gtSimdScalableVal, sizeof(simdscalable_t)); + + tree->gtVNPair.SetBoth(vnStore->VNForSimdScalableCon(simdVal)); + break; + } + #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index b9d9ec6eff6b82..a18ffdddb90694 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -394,6 +394,8 @@ class ValueNumStore #if defined(TARGET_XARCH) simd32_t GetConstantSimd32(ValueNum argVN); simd64_t GetConstantSimd64(ValueNum argVN); +#elif defined(TARGET_ARM64) + simdscalable_t GetConstantSimdScalable(ValueNum argVN); #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) simdmask_t GetConstantSimdMask(ValueNum argVN); @@ -480,6 +482,9 @@ class ValueNumStore #if defined(TARGET_XARCH) ValueNum VNForSimd32Con(const simd32_t& cnsVal); ValueNum VNForSimd64Con(const simd64_t& cnsVal); +#elif defined(TARGET_ARM64) + ValueNum VNForSimdScalableCon(const simdscalable_t& cnsVal); + ValueNum VNForSimdMaskScalableCon(const simdmaskscalable_t& cnsVal); #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) ValueNum VNForSimdMaskCon(const simdmask_t& cnsVal); @@ -1920,6 +1925,67 @@ class ValueNumStore } return m_simd64CnsMap; } +#elif defined(TARGET_ARM64) + struct SimdScalablePrimitiveKeyFuncs : public JitKeyFuncsDefEquals + { + static bool Equals(const simdscalable_t& x, const simdscalable_t& y) + { + return x == y; + } + + static unsigned GetHashCode(const simdscalable_t& val) + { + unsigned hash = 0; + + hash = static_cast(hash ^ val.gtSimdScalableBaseType); + hash = static_cast(hash ^ val.gtSimdScalableKind); + hash = static_cast(hash ^ val.gtSimdScalableIndex); + hash = static_cast(hash ^ val.gtSimdScalableStep); + + return hash; + } + }; + + typedef VNMap SimdScalableToValueNumMap; + SimdScalableToValueNumMap* m_simdScalableCnsMap; + SimdScalableToValueNumMap* GetSimdScalableCnsMap() + { + if (m_simdScalableCnsMap == nullptr) + { + m_simdScalableCnsMap = new (m_alloc) SimdScalableToValueNumMap(m_alloc); + } + return m_simdScalableCnsMap; + } + + struct SimdMaskScalablePrimitiveKeyFuncs : public JitKeyFuncsDefEquals + { + static bool Equals(const simdmaskscalable_t& x, const simdmaskscalable_t& y) + { + return x == y; + } + + static unsigned GetHashCode(const simdmaskscalable_t& val) + { + unsigned hash = 0; + + hash = static_cast(hash ^ val.gtSimdMaskScalableBaseType); + hash = static_cast(hash ^ val.gtSimdMaskScalableIndex); + + return hash; + } + }; + + typedef VNMap SimdMaskScalableToValueNumMap; + SimdMaskScalableToValueNumMap* m_simdMaskScalableCnsMap; + SimdMaskScalableToValueNumMap* GetSimdMaskScalableCnsMap() + { + if (m_simdMaskScalableCnsMap == nullptr) + { + m_simdMaskScalableCnsMap = new (m_alloc) SimdMaskScalableToValueNumMap(m_alloc); + } + return m_simdMaskScalableCnsMap; + } + #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -2134,6 +2200,13 @@ struct ValueNumStore::VarTypConv typedef simd64_t Type; typedef simd64_t Lang; }; +#elif defined(TARGET_ARM64) +template <> +struct ValueNumStore::VarTypConv +{ + typedef simdscalable_t Type; + typedef simdscalable_t Lang; +}; #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -2219,6 +2292,13 @@ FORCEINLINE simd64_t ValueNumStore::SafeGetConstantValue(Chunk* c, uns assert(c->m_typ == TYP_SIMD64); return reinterpret_cast::Lang*>(c->m_defs)[offset]; } +#elif defined(TARGET_ARM64) +template <> +FORCEINLINE simdscalable_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +{ + assert(c->m_typ == TYP_SIMD); + return reinterpret_cast::Lang*>(c->m_defs)[offset]; +} #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) @@ -2300,6 +2380,20 @@ FORCEINLINE simd64_t ValueNumStore::ConstantValueInternal(ValueNum vn return SafeGetConstantValue(c, offset); } +#elif defined(TARGET_ARM64) +template <> +FORCEINLINE simdscalable_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) +{ + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); + assert(c->m_attribs == CEA_Const); + + unsigned offset = ChunkOffset(vn); + + assert(c->m_typ == TYP_SIMD); + assert(!coerce); + + return SafeGetConstantValue(c, offset); +} #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) From c3afbc8f587f3b161205fd4d0b8b5a5827106246 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 12:12:43 +0100 Subject: [PATCH 02/58] Fix AllBitsSet functionality --- src/coreclr/jit/simd.cpp | 14 ++++++++++---- src/coreclr/jit/simd.h | 6 +++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index ca18275eaa520f..530c007dd9a3df 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -848,8 +848,14 @@ void Compiler::impMarkContiguousSIMDFieldStores(Statement* stmt) bool simdscalable_t::IsAllBitsSet() const { - return (gtSimdScalableKind == SimdScalableRepeated) && - (gtSimdScalableIndex == (uint64_t)((1 << genTypeSize(gtSimdScalableBaseType)) - 1)); + if (gtSimdScalableKind != SimdScalableRepeated) + { + return false; + } + const unsigned elementBitSize = genTypeSize(gtSimdScalableBaseType) * 8; + const uint64_t allBitsSetMask = + (elementBitSize == 64) ? UINT64_MAX : (((uint64_t)1 << elementBitSize) - 1); + return gtSimdScalableIndex == allBitsSetMask; } bool simdmaskscalable_t::IsAllBitsSet(var_types simdBaseType) const @@ -885,7 +891,7 @@ bool EvaluateSimdCvtScalableVectorToMask(var_types baseType, simdmaskscalable_t* bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vecCon, simdmaskscalable_t maskCon) { - // All zero can always be converted to a mask, regardless of types + // All zero can always be converted to a vector, regardless of types if (maskCon.IsZero()) { vecCon->gtSimdScalableBaseType = baseType; @@ -896,7 +902,7 @@ bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vec // size of the basetype must match // TODO: We could work around this for masks? - if (genTypeSize(baseType) != genTypeSize(vecCon->gtSimdScalableBaseType)) + if (genTypeSize(baseType) != genTypeSize(maskCon.gtSimdMaskScalableBaseType)) { return false; } diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 44cab43956c88b..3e8a14e5b83e74 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2167,7 +2167,7 @@ struct simdscalable_t static simdscalable_t AllBitsSet() { - simdscalable_t result; + simdscalable_t result = {}; result.gtSimdScalableBaseType = TYP_BYTE; result.gtSimdScalableKind = SimdScalableRepeated; @@ -2209,10 +2209,10 @@ struct simdmaskscalable_t static simdmaskscalable_t AllBitsSet() { - simdmaskscalable_t result; + simdmaskscalable_t result = {}; result.gtSimdMaskScalableBaseType = TYP_BYTE; - result.gtSimdMaskScalableIndex = 0xff; + result.gtSimdMaskScalableIndex = 1; return result; } From fd84125cee991373eb83305786154a82a8ea2785 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 12:25:09 +0100 Subject: [PATCH 03/58] Remove duplicate loadConstantHelper --- src/coreclr/jit/codegenarm64.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 485dec0d383008..d82313ea62e8c7 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2455,8 +2455,6 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Clear the entire target register emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, 0, opt); - regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); - // Use NEON instructions to load the constant (to avoid using predicates) if (varTypeIsIntegral(simdVal.gtSimdScalableBaseType) && From 43f648bd62915869f47f4f230d2488da972f5e29 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 12:25:43 +0100 Subject: [PATCH 04/58] Remove extra break --- src/coreclr/jit/gentree.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 46a1b764990d86..2025253e007ceb 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -9860,7 +9860,7 @@ inline bool GenTree::IsMaskZero() const // with all bits set for the given type // // Arguments: -// simdBaseType - the base type to check aginst +// simdBaseType - the base type to check against // // Returns: // True if this node is a mask constant with all bits set @@ -9908,7 +9908,6 @@ inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index, var_types s case SimdScalableScalar: return (index == 0) ? node->gtSimdScalableVal.gtSimdScalableIndex : 0; - break; default: unreached(); From d6a99e113991224b6c8098c83d9477adc94776b4 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 12:40:44 +0100 Subject: [PATCH 05/58] Ensure index and step have distinct temp registers --- src/coreclr/jit/codegenarm64.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d82313ea62e8c7..3130e029702c23 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2393,8 +2393,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre emitAttr emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; auto loadConstantHelper = [&](uint64_t constValue) -> regNumber { - // Get a temp integer register to compute long address. - regNumber addrReg = internalRegisters.GetSingle(tree); + // Get a temp integer register to compute long address. Use Extract so multiple calls + // (index + step) get distinct temps when LSRA reserved more than one. + regNumber addrReg = internalRegisters.Extract(tree, RBM_ALLINT); // Store the index to memory UNATIVE_OFFSET cnum = From e04c472fa9564286a592ce2905876d4c85741a3e Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 13:53:36 +0100 Subject: [PATCH 06/58] fix formatting --- src/coreclr/jit/simd.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 530c007dd9a3df..1da4f0b3c26e46 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -853,8 +853,7 @@ bool simdscalable_t::IsAllBitsSet() const return false; } const unsigned elementBitSize = genTypeSize(gtSimdScalableBaseType) * 8; - const uint64_t allBitsSetMask = - (elementBitSize == 64) ? UINT64_MAX : (((uint64_t)1 << elementBitSize) - 1); + const uint64_t allBitsSetMask = (elementBitSize == 64) ? UINT64_MAX : (((uint64_t)1 << elementBitSize) - 1); return gtSimdScalableIndex == allBitsSetMask; } From 91d235cca8ba55208336a06ff0214c6f494f06dd Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 14:10:50 +0100 Subject: [PATCH 07/58] Fix GetHashCode for scalables --- src/coreclr/jit/valuenum.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index a18ffdddb90694..2a86735763b16a 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -1939,8 +1939,10 @@ class ValueNumStore hash = static_cast(hash ^ val.gtSimdScalableBaseType); hash = static_cast(hash ^ val.gtSimdScalableKind); - hash = static_cast(hash ^ val.gtSimdScalableIndex); - hash = static_cast(hash ^ val.gtSimdScalableStep); + hash = static_cast(hash ^ ulo32(val.gtSimdScalableIndex)); + hash = static_cast(hash ^ uhi32(val.gtSimdScalableIndex)); + hash = static_cast(hash ^ ulo32(val.gtSimdScalableStep)); + hash = static_cast(hash ^ uhi32(val.gtSimdScalableStep)); return hash; } @@ -1969,7 +1971,8 @@ class ValueNumStore unsigned hash = 0; hash = static_cast(hash ^ val.gtSimdMaskScalableBaseType); - hash = static_cast(hash ^ val.gtSimdMaskScalableIndex); + hash = static_cast(hash ^ ulo32(val.gtSimdMaskScalableIndex)); + hash = static_cast(hash ^ uhi32(val.gtSimdMaskScalableIndex)); return hash; } From 7c7ebbdccdaab27de52e31e2ecbb223273d6c462 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 14:14:28 +0100 Subject: [PATCH 08/58] Fix IsTrueMask logic --- src/coreclr/jit/gentree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7c4478fc29cc93..3d74314db4e723 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24637,7 +24637,7 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( #if defined(TARGET_ARM64) if (type == TYP_SIMD) { - // Only optimizatize when both op1 and op2 are constant + // Only optimize when both op1 and op2 are constant if (op1->OperIsConst() && op2->OperIsConst()) { GenTreeVecCon* scalableVecCon = gtNewSimdVconNode(type, simdBaseType, SimdScalableSequence, 0); @@ -35780,7 +35780,7 @@ bool GenTree::IsTrueMask(var_types simdBaseType) const // A true mask can be used with a larger type. Eg: A short true mask will be valid for a long vector. var_types maskBaseType = AsMskCon()->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType; - return (genTypeSize(maskBaseType) >= genTypeSize(simdBaseType)); + return (genTypeSize(maskBaseType) <= genTypeSize(simdBaseType)); } #endif // DEBUG return SveMaskPatternAll == EvaluateSimdMaskToPattern(simdBaseType, AsMskCon()->gtSimdMaskVal); From cec1eb00876df57153dfec85c5383bae31906279 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 14:33:39 +0100 Subject: [PATCH 09/58] Fix IsTrueMask logic --- src/coreclr/jit/simd.h | 2 ++ src/coreclr/jit/valuenum.h | 11 +++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 3e8a14e5b83e74..65340258c875b0 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2136,6 +2136,7 @@ struct simdscalable_t union { uint8_t gtSimdScalableIndexU8[8]; + uint32_t gtSimdScalableIndexU32[2]; float gtSimdScalableIndexF32[2]; double gtSimdScalableIndexF64[1]; uint64_t gtSimdScalableIndex; @@ -2143,6 +2144,7 @@ struct simdscalable_t union { uint8_t gtSimdScalableStepU8[8]; + uint32_t gtSimdScalableStepU32[2]; float gtSimdScalableStepF32[2]; double gtSimdScalableStepF64[1]; uint64_t gtSimdScalableStep; diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 2a86735763b16a..8c5606a9cb465b 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -1939,10 +1939,10 @@ class ValueNumStore hash = static_cast(hash ^ val.gtSimdScalableBaseType); hash = static_cast(hash ^ val.gtSimdScalableKind); - hash = static_cast(hash ^ ulo32(val.gtSimdScalableIndex)); - hash = static_cast(hash ^ uhi32(val.gtSimdScalableIndex)); - hash = static_cast(hash ^ ulo32(val.gtSimdScalableStep)); - hash = static_cast(hash ^ uhi32(val.gtSimdScalableStep)); + hash = static_cast(hash ^ val.gtSimdScalableIndexU32[0]); + hash = static_cast(hash ^ val.gtSimdScalableIndexU32[1]); + hash = static_cast(hash ^ val.gtSimdScalableStepU32[0]); + hash = static_cast(hash ^ val.gtSimdScalableStepU32[1]); return hash; } @@ -1971,8 +1971,7 @@ class ValueNumStore unsigned hash = 0; hash = static_cast(hash ^ val.gtSimdMaskScalableBaseType); - hash = static_cast(hash ^ ulo32(val.gtSimdMaskScalableIndex)); - hash = static_cast(hash ^ uhi32(val.gtSimdMaskScalableIndex)); + hash = static_cast(hash ^ val.gtSimdMaskScalableIndex); return hash; } From 183bc23b2085bdd993ad143cbaa9e81da14bd582 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 14:50:02 +0100 Subject: [PATCH 10/58] Reserve correct registers for constant vectors --- src/coreclr/jit/lsraarm64.cpp | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 5496eb56937a7b..e46f89c47ed9e6 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -749,18 +749,41 @@ int LinearScan::BuildNode(GenTree* tree) { // Directly encode constant to instructions. } - if (tree->TypeIs(TYP_SIMD)) + else if (vecCon->TypeIs(TYP_SIMD)) { // If the constant doesn't fit into the instructions, then temps will be required switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) { case SimdScalableRepeated: - if (!emitter::isValidSimm<8>(vecCon->gtSimdScalableVal.gtSimdScalableIndex) && - !emitter::isValidSimm_MultipleOf<8, 256>(vecCon->gtSimdScalableVal.gtSimdScalableIndex)) + { + bool canEncodeScalar = false; + var_types baseType = vecCon->gtSimdScalableVal.gtSimdScalableBaseType; + if (varTypeIsFloating(baseType)) + { + if (baseType == TYP_FLOAT) + { + canEncodeScalar = emitter::emitIns_valid_imm_for_fmov( + *reinterpret_cast(&vecCon->gtSimdScalableVal.gtSimdScalableIndex)); + } + else + { + assert(baseType == TYP_DOUBLE); + canEncodeScalar = emitter::emitIns_valid_imm_for_fmov( + *reinterpret_cast(&vecCon->gtSimdScalableVal.gtSimdScalableIndex)); + } + } + else + { + canEncodeScalar = emitter::emitIns_valid_imm_for_mov( + vecCon->gtSimdScalableVal.gtSimdScalableIndex, emitActualTypeSize(baseType)); + } + if (!canEncodeScalar) { buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); } break; + } case SimdScalableSequence: if (!emitter::isValidSimm<5>(vecCon->gtSimdScalableVal.gtSimdScalableIndex)) From 160810c0dc0c3bc7b9495fa612ad94d6233a1404 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 15:20:20 +0100 Subject: [PATCH 11/58] Canonicalize for simdscalable_t zero Change-Id: I627878463cd19d781d6fedaa4e7d3cc9257c4b1b --- src/coreclr/jit/gentree.cpp | 23 +++++++++++++++++------ src/coreclr/jit/lsraarm64.cpp | 5 +++-- src/coreclr/jit/valuenum.h | 8 ++++++++ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3d74314db4e723..5062a64c31e646 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3339,12 +3339,23 @@ unsigned Compiler::gtHashValue(GenTree* tree) #if defined(TARGET_ARM64) case TYP_SIMD: { - add = genTreeHashAdd(ulo32(add), vecCon->gtSimdScalableVal.gtSimdScalableKind); - add = genTreeHashAdd(ulo32(add), vecCon->gtSimdScalableVal.gtSimdScalableBaseType); - add = genTreeHashAdd(ulo32(add), ulo32(vecCon->gtSimdScalableVal.gtSimdScalableIndex)); - add = genTreeHashAdd(ulo32(add), uhi32(vecCon->gtSimdScalableVal.gtSimdScalableIndex)); - add = genTreeHashAdd(ulo32(add), ulo32(vecCon->gtSimdScalableVal.gtSimdScalableStep)); - add = genTreeHashAdd(ulo32(add), uhi32(vecCon->gtSimdScalableVal.gtSimdScalableStep)); + simdscalable_t simdVal = vecCon->gtSimdScalableVal; + + // Canonicalize zeros so hash aligns with equality, which treats all-zero encodings as equal. + if (simdVal.IsZero()) + { + simdVal.gtSimdScalableBaseType = TYP_BYTE; + simdVal.gtSimdScalableKind = SimdScalableRepeated; + simdVal.gtSimdScalableIndex = 0; + simdVal.gtSimdScalableStep = 0; + } + + add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableKind); + add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableBaseType); + add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableIndexU32[0]); + add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableIndexU32[1]); + add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableStepU32[0]); + add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableStepU32[1]); break; } #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e46f89c47ed9e6..487e66fcbb2cd5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -774,8 +774,9 @@ int LinearScan::BuildNode(GenTree* tree) } else { - canEncodeScalar = emitter::emitIns_valid_imm_for_mov( - vecCon->gtSimdScalableVal.gtSimdScalableIndex, emitActualTypeSize(baseType)); + canEncodeScalar = + emitter::emitIns_valid_imm_for_mov(vecCon->gtSimdScalableVal.gtSimdScalableIndex, + emitActualTypeSize(baseType)); } if (!canEncodeScalar) { diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 8c5606a9cb465b..c56700a6c67ee6 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -1937,6 +1937,14 @@ class ValueNumStore { unsigned hash = 0; + if (val.IsZero()) + { + // Canonicalize zero so all encodings hash the same. + hash = static_cast(hash ^ TYP_BYTE); + hash = static_cast(hash ^ SimdScalableRepeated); + return hash; + } + hash = static_cast(hash ^ val.gtSimdScalableBaseType); hash = static_cast(hash ^ val.gtSimdScalableKind); hash = static_cast(hash ^ val.gtSimdScalableIndexU32[0]); From 168b84d8dadf7678108a64eb10ff4f4ffdbbdf42 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 30 Apr 2026 15:52:21 +0100 Subject: [PATCH 12/58] Check all bits when converting to/from vector/mask --- src/coreclr/jit/simd.cpp | 41 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 1da4f0b3c26e46..00150e9191906c 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -883,8 +883,27 @@ bool EvaluateSimdCvtScalableVectorToMask(var_types baseType, simdmaskscalable_t* return false; } + uint64_t allBitsSet = 0; + switch (genTypeSize(baseType)) + { + case 1: + allBitsSet = 0xFF; + break; + case 2: + allBitsSet = 0xFFFF; + break; + case 4: + allBitsSet = 0xFFFFFFFFull; + break; + case 8: + allBitsSet = 0xFFFFFFFFFFFFFFFFull; + break; + default: + unreached(); + } + maskCon->gtSimdMaskScalableBaseType = baseType; - maskCon->gtSimdMaskScalableIndex = (vecCon.gtSimdScalableIndex == 1); + maskCon->gtSimdMaskScalableIndex = (vecCon.gtSimdScalableIndex == allBitsSet); return true; } @@ -908,7 +927,25 @@ bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vec vecCon->gtSimdScalableBaseType = baseType; vecCon->gtSimdScalableKind = SimdScalableRepeated; - vecCon->gtSimdScalableIndex = 1; + + switch (genTypeSize(baseType)) + { + case 1: + vecCon->gtSimdScalableIndex = 0xFF; + break; + case 2: + vecCon->gtSimdScalableIndex = 0xFFFF; + break; + case 4: + vecCon->gtSimdScalableIndex = 0xFFFFFFFFull; + break; + case 8: + vecCon->gtSimdScalableIndex = 0xFFFFFFFFFFFFFFFFull; + break; + default: + unreached(); + } + return true; } #endif // TARGET_ARM64 From 6c46277fbc0a6cae744fa199b8b4b93d60953a31 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 14:07:25 +0100 Subject: [PATCH 13/58] fix call to AllBitsSet --- src/coreclr/jit/lsraarm64.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 487e66fcbb2cd5..495f3df7794e7e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -830,7 +830,13 @@ int LinearScan::BuildNode(GenTree* tree) { GenTreeMskCon* mskCon = tree->AsMskCon(); - if (mskCon->IsAllBitsSet(TYP_BYTE) || mskCon->IsZero()) + var_types maskBaseType = TYP_BYTE; + if (JitConfig.JitUseScalableVectorT()) + { + maskBaseType = mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType; + } + + if (mskCon->IsAllBitsSet(maskBaseType) || mskCon->IsZero()) { // Directly encode constant to instructions. } From 7c02f28fa47f2cb0c232570610d678ca4503a532 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 14:11:14 +0100 Subject: [PATCH 14/58] Fix codegen for SimdScalableScalar floats --- src/coreclr/jit/codegenarm64.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 3130e029702c23..8bb6509451d6eb 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2463,11 +2463,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndex); } - else if (varTypeIsFloating(simdVal.gtSimdScalableBaseType) && - emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) + else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && + emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) { emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]); } + else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) && + emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF32[0])) + { + emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, + static_cast(simdVal.gtSimdScalableIndexF32[0])); + } else { regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); From 1c5bf920b176d80839b0706c1769242936b8deb0 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 14:15:09 +0100 Subject: [PATCH 15/58] use memcpy for getting floats --- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/jit/lsraarm64.cpp | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8bb6509451d6eb..a93bfab0bd2bc4 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2464,7 +2464,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndex); } else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && - emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) + emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) { emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]); } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 495f3df7794e7e..8fc066b4d03728 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -762,14 +762,16 @@ int LinearScan::BuildNode(GenTree* tree) { if (baseType == TYP_FLOAT) { - canEncodeScalar = emitter::emitIns_valid_imm_for_fmov( - *reinterpret_cast(&vecCon->gtSimdScalableVal.gtSimdScalableIndex)); + float value; + memcpy(&value, &vecCon->gtSimdScalableVal.gtSimdScalableIndex, sizeof(value)); + canEncodeScalar = emitter::emitIns_valid_imm_for_fmov(value); } else { assert(baseType == TYP_DOUBLE); - canEncodeScalar = emitter::emitIns_valid_imm_for_fmov( - *reinterpret_cast(&vecCon->gtSimdScalableVal.gtSimdScalableIndex)); + double value; + memcpy(&value, &vecCon->gtSimdScalableVal.gtSimdScalableIndex, sizeof(value)); + canEncodeScalar = emitter::emitIns_valid_imm_for_fmov(value); } } else From 3a3e1d369e1e90978cac8846cd4d8b4b962c80de Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 15:24:27 +0100 Subject: [PATCH 16/58] Add debug check --- src/coreclr/jit/lsraarm64.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 8fc066b4d03728..2a760bb9da00f2 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -833,10 +833,12 @@ int LinearScan::BuildNode(GenTree* tree) GenTreeMskCon* mskCon = tree->AsMskCon(); var_types maskBaseType = TYP_BYTE; +#if defined(DEBUG) if (JitConfig.JitUseScalableVectorT()) { maskBaseType = mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType; } +#endif // DEBUG if (mskCon->IsAllBitsSet(maskBaseType) || mskCon->IsZero()) { From 490bec78b0577c3a9f67dc6ee9816c9ad33b6aea Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 16:17:00 +0100 Subject: [PATCH 17/58] Add VN support for scalable+fixed masks --- src/coreclr/jit/simd.cpp | 1 + src/coreclr/jit/simd.h | 38 +++++++++++ src/coreclr/jit/valuenum.cpp | 32 +++++++-- src/coreclr/jit/valuenum.h | 121 +++++++++++++++++++++++------------ 4 files changed, 143 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 00150e9191906c..4f6eafd436b391 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -948,6 +948,7 @@ bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vec return true; } + #endif // TARGET_ARM64 #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 65340258c875b0..d8dc7744bc91f5 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -306,6 +306,10 @@ static_assert(sizeof(simd64_t) == 64); #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) +// Forward declarations for mask types used by simdmask_t helpers. +struct simdmaskscalable_t; +struct simdmaskvalue_t; + struct simdmask_t { union @@ -374,6 +378,7 @@ struct simdmask_t { return {}; } + }; static_assert(sizeof(simdmask_t) == 8); #endif // FEATURE_MASKED_HW_INTRINSICS @@ -2230,6 +2235,39 @@ struct simdmaskscalable_t bool IsAllBitsSet(var_types simdBaseType) const; }; +struct simdmaskvalue_t +{ + uint8_t isScalable; + simdmaskscalable_t scalable; + simdmask_t fixed; + + static simdmaskvalue_t FromFixed(const simdmask_t& mask) + { + simdmaskvalue_t result = {}; + + result.isScalable = 0; + result.fixed = mask; + + return result; + } + + static simdmaskvalue_t FromScalable(const simdmaskscalable_t& mask) + { + simdmaskvalue_t result = {}; + + result.isScalable = 1; + result.scalable = mask; + result.fixed = simdmask_t::Zero(); + + return result; + } + + bool IsScalable() const + { + return isScalable != 0; + } +}; + static_assert(sizeof(simdmask_t) >= sizeof(simdmaskscalable_t)); bool EvaluateSimdCvtScalableVectorToMask(var_types baseType, simdmaskscalable_t* maskCon, simdscalable_t vecCon); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index da5ebd52bc1f07..4b13fbe1f632da 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -449,7 +449,6 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc) , m_simd64CnsMap(nullptr) #elif defined(TARGET_ARM64) , m_simdScalableCnsMap(nullptr) - , m_simdMaskScalableCnsMap(nullptr) #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) , m_simdMaskCnsMap(nullptr) @@ -1919,7 +1918,7 @@ ValueNum ValueNumStore::VNForSimdScalableCon(const simdscalable_t& cnsVal) ValueNum ValueNumStore::VNForSimdMaskScalableCon(const simdmaskscalable_t& cnsVal) { - return VnForConst(cnsVal, GetSimdMaskScalableCnsMap(), TYP_MASK); + return VnForConst(simdmaskvalue_t::FromScalable(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); } #endif // TARGET_XARCH @@ -1927,7 +1926,7 @@ ValueNum ValueNumStore::VNForSimdMaskScalableCon(const simdmaskscalable_t& cnsVa #if defined(FEATURE_MASKED_HW_INTRINSICS) ValueNum ValueNumStore::VNForSimdMaskCon(const simdmask_t& cnsVal) { - return VnForConst(cnsVal, GetSimdMaskCnsMap(), TYP_MASK); + return VnForConst(simdmaskvalue_t::FromFixed(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); } #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD @@ -4178,12 +4177,19 @@ simdscalable_t ValueNumStore::GetConstantSimdScalable(ValueNum argVN) #if defined(FEATURE_MASKED_HW_INTRINSICS) // Given a simdmask constant value number return its value as a simdmask. // -simdmask_t ValueNumStore::GetConstantSimdMask(ValueNum argVN) +simdmaskvalue_t ValueNumStore::GetConstantSimdMaskValue(ValueNum argVN) { assert(IsVNConstant(argVN)); assert(TypeOfVN(argVN) == TYP_MASK); - return ConstantValue(argVN); + return ConstantValue(argVN); +} + +simdmask_t ValueNumStore::GetConstantSimdMask(ValueNum argVN) +{ + simdmaskvalue_t storage = GetConstantSimdMaskValue(argVN); + + return storage.fixed; } #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD @@ -10884,8 +10890,20 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) #if defined(FEATURE_MASKED_HW_INTRINSICS) case TYP_MASK: { - simdmask_t cnsVal = GetConstantSimdMask(vn); - printf("SimdMaskCns[0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1]); + simdmaskvalue_t cnsVal = GetConstantSimdMaskValue(vn); + if (cnsVal.IsScalable() +#if defined(TARGET_ARM64) && defined(DEBUG) + && JitConfig.JitUseScalableVectorT() +#endif + ) + { + printf("SimdMaskScalableCns[base:%s idx:%u]", varTypeName(cnsVal.scalable.gtSimdMaskScalableBaseType), + cnsVal.scalable.gtSimdMaskScalableIndex); + } + else + { + printf("SimdMaskCns[0x%08x, 0x%08x]", cnsVal.fixed.u32[0], cnsVal.fixed.u32[1]); + } break; } #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index c56700a6c67ee6..e1cc7f55e15de6 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -398,7 +398,8 @@ class ValueNumStore simdscalable_t GetConstantSimdScalable(ValueNum argVN); #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) - simdmask_t GetConstantSimdMask(ValueNum argVN); + simdmask_t GetConstantSimdMask(ValueNum argVN); + simdmaskvalue_t GetConstantSimdMaskValue(ValueNum argVN); #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD @@ -1967,57 +1968,40 @@ class ValueNumStore return m_simdScalableCnsMap; } - struct SimdMaskScalablePrimitiveKeyFuncs : public JitKeyFuncsDefEquals + struct SimdMaskPrimitiveKeyFuncs : public JitKeyFuncsDefEquals { - static bool Equals(const simdmaskscalable_t& x, const simdmaskscalable_t& y) + static bool Equals(const simdmaskvalue_t& x, const simdmaskvalue_t& y) { - return x == y; - } - - static unsigned GetHashCode(const simdmaskscalable_t& val) - { - unsigned hash = 0; - - hash = static_cast(hash ^ val.gtSimdMaskScalableBaseType); - hash = static_cast(hash ^ val.gtSimdMaskScalableIndex); - - return hash; - } - }; - - typedef VNMap SimdMaskScalableToValueNumMap; - SimdMaskScalableToValueNumMap* m_simdMaskScalableCnsMap; - SimdMaskScalableToValueNumMap* GetSimdMaskScalableCnsMap() - { - if (m_simdMaskScalableCnsMap == nullptr) - { - m_simdMaskScalableCnsMap = new (m_alloc) SimdMaskScalableToValueNumMap(m_alloc); - } - return m_simdMaskScalableCnsMap; - } - -#endif // TARGET_XARCH + if (x.IsScalable() != y.IsScalable()) + { + return false; + } -#if defined(FEATURE_MASKED_HW_INTRINSICS) - struct SimdMaskPrimitiveKeyFuncs : public JitKeyFuncsDefEquals - { - static bool Equals(const simdmask_t& x, const simdmask_t& y) - { - return x == y; + return x.IsScalable() ? (x.scalable == y.scalable) : (x.fixed == y.fixed); } - static unsigned GetHashCode(const simdmask_t& val) + static unsigned GetHashCode(const simdmaskvalue_t& val) { unsigned hash = 0; - hash = static_cast(hash ^ val.u32[0]); - hash = static_cast(hash ^ val.u32[1]); + if (val.IsScalable()) + { + hash = static_cast(hash ^ val.isScalable); + hash = static_cast(hash ^ val.scalable.gtSimdMaskScalableBaseType); + hash = static_cast(hash ^ val.scalable.gtSimdMaskScalableIndex); + } + else + { + hash = static_cast(hash ^ val.isScalable); + hash = static_cast(hash ^ val.fixed.u32[0]); + hash = static_cast(hash ^ val.fixed.u32[1]); + } return hash; } }; - typedef VNMap SimdMaskToValueNumMap; + typedef VNMap SimdMaskToValueNumMap; SimdMaskToValueNumMap* m_simdMaskCnsMap; SimdMaskToValueNumMap* GetSimdMaskCnsMap() { @@ -2223,8 +2207,8 @@ struct ValueNumStore::VarTypConv template <> struct ValueNumStore::VarTypConv { - typedef simdmask_t Type; - typedef simdmask_t Lang; + typedef simdmaskvalue_t Type; + typedef simdmaskvalue_t Lang; }; #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD @@ -2313,11 +2297,31 @@ FORCEINLINE simdscalable_t ValueNumStore::SafeGetConstantValue(C #if defined(FEATURE_MASKED_HW_INTRINSICS) template <> -FORCEINLINE simdmask_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +FORCEINLINE simdmaskvalue_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) { assert(c->m_typ == TYP_MASK); return reinterpret_cast::Lang*>(c->m_defs)[offset]; } + +template <> +FORCEINLINE simdmask_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +{ + assert(c->m_typ == TYP_MASK); + simdmaskvalue_t storage = SafeGetConstantValue(c, offset); + assert(!storage.IsScalable()); + return storage.fixed; +} + +#if defined(TARGET_ARM64) +template <> +FORCEINLINE simdmaskscalable_t ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) +{ + assert(c->m_typ == TYP_MASK); + simdmaskvalue_t storage = SafeGetConstantValue(c, offset); + assert(storage.IsScalable()); + return storage.scalable; +} +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS template <> @@ -2420,6 +2424,39 @@ FORCEINLINE simdmask_t ValueNumStore::ConstantValueInternal(ValueNum return SafeGetConstantValue(c, offset); } + +#if defined(FEATURE_MASKED_HW_INTRINSICS) +template <> +FORCEINLINE simdmaskvalue_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) +{ + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); + assert(c->m_attribs == CEA_Const); + + unsigned offset = ChunkOffset(vn); + + assert(c->m_typ == TYP_MASK); + assert(!coerce); + + return SafeGetConstantValue(c, offset); +} +#endif + +#if defined(TARGET_ARM64) +template <> +FORCEINLINE simdmaskscalable_t +ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) +{ + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); + assert(c->m_attribs == CEA_Const); + + unsigned offset = ChunkOffset(vn); + + assert(c->m_typ == TYP_MASK); + assert(!coerce); + + return SafeGetConstantValue(c, offset); +} +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD From b8d75a2114f9dee2966f30519322f2a3b5c66ddc Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 16:20:54 +0100 Subject: [PATCH 18/58] set step to 0 in EvaluateSimdCvtScalableMaskToVector --- src/coreclr/jit/simd.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 4f6eafd436b391..bb93889adf50e4 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -915,6 +915,7 @@ bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vec vecCon->gtSimdScalableBaseType = baseType; vecCon->gtSimdScalableKind = SimdScalableRepeated; vecCon->gtSimdScalableIndex = 0; + vecCon->gtSimdScalableStep = 0; return true; } @@ -927,6 +928,7 @@ bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vec vecCon->gtSimdScalableBaseType = baseType; vecCon->gtSimdScalableKind = SimdScalableRepeated; + vecCon->gtSimdScalableStep = 0; switch (genTypeSize(baseType)) { From 170665ebfc69ce98332ffe3cda11b7d397885281 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 16:35:35 +0100 Subject: [PATCH 19/58] formatting --- src/coreclr/jit/hwintrinsicarm64.cpp | 2 +- src/coreclr/jit/simd.h | 1 - src/coreclr/jit/valuenum.cpp | 5 +++-- src/coreclr/jit/valuenum.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 972103ed5942c0..28413cc52fca0a 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2971,7 +2971,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } else -#endif /// DEBUG +#endif // DEBUG { simd_t simdVal; diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index d8dc7744bc91f5..7796228349b196 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -378,7 +378,6 @@ struct simdmask_t { return {}; } - }; static_assert(sizeof(simdmask_t) == 8); #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 4b13fbe1f632da..3714e3bcfa5b47 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10895,9 +10895,10 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) #if defined(TARGET_ARM64) && defined(DEBUG) && JitConfig.JitUseScalableVectorT() #endif - ) + ) { - printf("SimdMaskScalableCns[base:%s idx:%u]", varTypeName(cnsVal.scalable.gtSimdMaskScalableBaseType), + printf("SimdMaskScalableCns[base:%s idx:%u]", + varTypeName(cnsVal.scalable.gtSimdMaskScalableBaseType), cnsVal.scalable.gtSimdMaskScalableIndex); } else diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index e1cc7f55e15de6..40c513af003d68 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -2002,8 +2002,8 @@ class ValueNumStore }; typedef VNMap SimdMaskToValueNumMap; - SimdMaskToValueNumMap* m_simdMaskCnsMap; - SimdMaskToValueNumMap* GetSimdMaskCnsMap() + SimdMaskToValueNumMap* m_simdMaskCnsMap; + SimdMaskToValueNumMap* GetSimdMaskCnsMap() { if (m_simdMaskCnsMap == nullptr) { From 319f7bd19f652bd8086145acac7053f13a2c50da Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 16:41:03 +0100 Subject: [PATCH 20/58] Add assert to GetConstantSimdMask --- src/coreclr/jit/valuenum.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 3714e3bcfa5b47..e63ce116e0074a 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -4188,6 +4188,7 @@ simdmaskvalue_t ValueNumStore::GetConstantSimdMaskValue(ValueNum argVN) simdmask_t ValueNumStore::GetConstantSimdMask(ValueNum argVN) { simdmaskvalue_t storage = GetConstantSimdMaskValue(argVN); + assert(!storage.IsScalable()); return storage.fixed; } From 064feaeb5ed53a673cb706ec98b27d5a1b2f1452 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 May 2026 16:44:03 +0100 Subject: [PATCH 21/58] fix FEATURE_MASKED_HW_INTRINSICS defines --- src/coreclr/jit/valuenum.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index e63ce116e0074a..63d8a20b5a6f8f 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -1915,15 +1915,14 @@ ValueNum ValueNumStore::VNForSimdScalableCon(const simdscalable_t& cnsVal) { return VnForConst(cnsVal, GetSimdScalableCnsMap(), TYP_SIMD); } +#endif // TARGET_XARCH +#if defined(FEATURE_MASKED_HW_INTRINSICS) ValueNum ValueNumStore::VNForSimdMaskScalableCon(const simdmaskscalable_t& cnsVal) { return VnForConst(simdmaskvalue_t::FromScalable(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); } -#endif // TARGET_XARCH - -#if defined(FEATURE_MASKED_HW_INTRINSICS) ValueNum ValueNumStore::VNForSimdMaskCon(const simdmask_t& cnsVal) { return VnForConst(simdmaskvalue_t::FromFixed(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); From 1be5041525b151247abaeebdff47b1d376d37b07 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 14 May 2026 15:14:08 +0100 Subject: [PATCH 22/58] Better float support for sequence nodes --- src/coreclr/jit/codegenarm64.cpp | 131 +++++++++++++++++++++++++------ src/coreclr/jit/gentree.cpp | 40 ++++------ src/coreclr/jit/simd.h | 20 ++++- 3 files changed, 139 insertions(+), 52 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index a93bfab0bd2bc4..26265784362ea8 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2392,12 +2392,12 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre insOpts opt = emitter::optGetSveInsOpt(emitTypeSize(simdVal.gtSimdScalableBaseType)); emitAttr emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; - auto loadConstantHelper = [&](uint64_t constValue) -> regNumber { + auto loadConstantHelper = [&](ssize_t constValue) -> regNumber { // Get a temp integer register to compute long address. Use Extract so multiple calls // (index + step) get distinct temps when LSRA reserved more than one. regNumber addrReg = internalRegisters.Extract(tree, RBM_ALLINT); - // Store the index to memory + // Store the constant to memory UNATIVE_OFFSET cnum = emit->emitDataConst(&constValue, sizeof(constValue), sizeof(constValue), TYP_LONG); CORINFO_FIELD_HANDLE hnd = m_compiler->eeFindJitDataOffs(cnum); @@ -2408,48 +2408,131 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre return addrReg; }; + ssize_t index = 0; + ssize_t step = 0; + switch (simdVal.gtSimdScalableBaseType) + { + case TYP_BYTE: + { + index = (size_t)simdVal.gtSimdScalableIndexU8[0]; + step = (size_t)simdVal.gtSimdScalableStepU8[0]; + break; + } + + case TYP_SHORT: + { + index = (size_t)simdVal.gtSimdScalableIndexU16[0]; + step = (size_t)simdVal.gtSimdScalableStepU16[0]; + break; + } + + case TYP_INT: + { + index = (size_t)simdVal.gtSimdScalableIndexU32[0]; + step = (size_t)simdVal.gtSimdScalableStepU32[0]; + break; + } + + case TYP_LONG: + { + index = (size_t)simdVal.gtSimdScalableIndexU64[0]; + step = (size_t)simdVal.gtSimdScalableStepU64[0]; + break; + } + + case TYP_UBYTE: + { + index = (size_t)simdVal.gtSimdScalableIndexI8[0]; + step = (size_t)simdVal.gtSimdScalableStepI8[0]; + break; + } + + case TYP_USHORT: + { + index = (size_t)simdVal.gtSimdScalableIndexI16[0]; + step = (size_t)simdVal.gtSimdScalableStepI16[0]; + break; + } + + case TYP_UINT: + { + index = (size_t)simdVal.gtSimdScalableIndexI32[0]; + step = (size_t)simdVal.gtSimdScalableStepI32[0]; + break; + } + + case TYP_ULONG: + { + index = (size_t)simdVal.gtSimdScalableIndexI64[0]; + step = (size_t)simdVal.gtSimdScalableStepI64[0]; + break; + } + + default: + { + assert(varTypeIsFloating(simdVal.gtSimdScalableBaseType)); + } + } + switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) { case SimdScalableRepeated: - if (emitter::isValidSimm<8>(simdVal.gtSimdScalableIndex) || - emitter::isValidSimm_MultipleOf<8, 256>(simdVal.gtSimdScalableIndex)) + { + if (varTypeIsIntegral(simdVal.gtSimdScalableBaseType) && + (emitter::isValidSimm<8>(index) || emitter::isValidSimm_MultipleOf<8, 256>(index))) + { + emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, index, opt); + } + else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && + emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF64[0])) + { + emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, + simdVal.gtSimdScalableIndexF64[0], INS_OPTS_SCALABLE_S); + } + else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) && + emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF32[0])) { - emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, simdVal.gtSimdScalableIndex, - opt); + emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, + static_cast(simdVal.gtSimdScalableIndexF32[0]), + INS_OPTS_SCALABLE_D); } else { - regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); + regNumber indexReg = loadConstantHelper(index); emit->emitInsSve_R_R(INS_sve_dup, emitSize, targetReg, indexReg, opt); } + break; + } case SimdScalableSequence: - if (emitter::isValidSimm<5>(simdVal.gtSimdScalableIndex) && - emitter::isValidSimm<5>(simdVal.gtSimdScalableStep)) + { + // FP sequences should have been imported into a set of nodes + assert(varTypeIsIntegral(simdVal.gtSimdScalableBaseType)); + + if (emitter::isValidSimm<5>(index) && emitter::isValidSimm<5>(step)) { - emit->emitInsSve_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, - simdVal.gtSimdScalableIndex, simdVal.gtSimdScalableStep, opt); + emit->emitInsSve_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, index, step, opt); } - else if (emitter::isValidSimm<5>(simdVal.gtSimdScalableIndex)) + else if (emitter::isValidSimm<5>(index)) { - regNumber stepReg = loadConstantHelper(simdVal.gtSimdScalableStep); - emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, stepReg, - simdVal.gtSimdScalableIndex, opt, INS_SCALABLE_OPTS_IMM_FIRST); + regNumber stepReg = loadConstantHelper(step); + emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, stepReg, index, opt, + INS_SCALABLE_OPTS_IMM_FIRST); } - else if (emitter::isValidSimm<5>(simdVal.gtSimdScalableStep)) + else if (emitter::isValidSimm<5>(step)) { - regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); - emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, indexReg, - simdVal.gtSimdScalableStep, opt); + regNumber indexReg = loadConstantHelper(index); + emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, indexReg, step, opt); } else { - regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); - regNumber stepReg = loadConstantHelper(simdVal.gtSimdScalableStep); + regNumber indexReg = loadConstantHelper(index); + regNumber stepReg = loadConstantHelper(step); emit->emitInsSve_R_R_R(INS_sve_index, emitSize, targetReg, indexReg, stepReg, opt); } break; + } case SimdScalableScalar: { @@ -2459,9 +2542,9 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Use NEON instructions to load the constant (to avoid using predicates) if (varTypeIsIntegral(simdVal.gtSimdScalableBaseType) && - emitter::emitIns_valid_imm_for_mov(simdVal.gtSimdScalableIndex, emitSize)) + emitter::emitIns_valid_imm_for_mov(index, emitSize)) { - emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndex); + emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, index); } else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) @@ -2476,7 +2559,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } else { - regNumber indexReg = loadConstantHelper(simdVal.gtSimdScalableIndex); + regNumber indexReg = loadConstantHelper(index); emit->emitIns_R_R(INS_ins, emitSize, targetReg, indexReg, INS_OPTS_16B); } break; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5062a64c31e646..d1608ff44bda5a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24648,41 +24648,33 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( #if defined(TARGET_ARM64) if (type == TYP_SIMD) { - // Only optimize when both op1 and op2 are constant - if (op1->OperIsConst() && op2->OperIsConst()) + // SVE can do this in a single instruction + if (varTypeIsIntegral(simdBaseType)) { - GenTreeVecCon* scalableVecCon = gtNewSimdVconNode(type, simdBaseType, SimdScalableSequence, 0); - - if (varTypeIsIntegral(simdBaseType)) + // Optimize when both op1 and op2 are constant + if (op1->OperIsConst() && op2->OperIsConst()) { + GenTreeVecCon* scalableVecCon = gtNewSimdVconNode(type, simdBaseType, SimdScalableSequence, 0); + scalableVecCon->gtSimdScalableVal.gtSimdScalableIndex = static_cast(op1->AsIntConCommon()->IntegralValue()); scalableVecCon->gtSimdScalableVal.gtSimdScalableStep = static_cast(op2->AsIntConCommon()->IntegralValue()); - } - else if (simdBaseType == TYP_FLOAT) - { - scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF32[0] = - static_cast(op1->AsDblCon()->DconValue()); - scalableVecCon->gtSimdScalableVal.gtSimdScalableStepF32[0] = - static_cast(op2->AsDblCon()->DconValue()); - } - else if (simdBaseType == TYP_DOUBLE) - { - scalableVecCon->gtSimdScalableVal.gtSimdScalableIndexF64[0] = - static_cast(op1->AsDblCon()->DconValue()); - scalableVecCon->gtSimdScalableVal.gtSimdScalableStepF64[0] = - static_cast(op2->AsDblCon()->DconValue()); + return scalableVecCon; } else { - unreached(); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_VectorT_CreateSequence, simdBaseType, simdSize); } - return scalableVecCon; } - - // SVE can do this in a single instruction - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_VectorT_CreateSequence, simdBaseType, simdSize); + else + { + GenTree* indices = gtNewSimdGetIndicesNode(type, simdBaseType, simdSize); + result = gtNewSimdBinOpNode(GT_MUL, type, indices, op2, simdBaseType, simdSize); + GenTree* start = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + result = gtNewSimdBinOpNode(GT_ADD, type, result, start, simdBaseType, simdSize); + return result; + } } #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 7796228349b196..b1c89092d49074 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2139,18 +2139,30 @@ struct simdscalable_t SimdScalableKind gtSimdScalableKind; union { - uint8_t gtSimdScalableIndexU8[8]; - uint32_t gtSimdScalableIndexU32[2]; float gtSimdScalableIndexF32[2]; double gtSimdScalableIndexF64[1]; + int8_t gtSimdScalableIndexI8[8]; + int16_t gtSimdScalableIndexI16[4]; + int32_t gtSimdScalableIndexI32[2]; + int64_t gtSimdScalableIndexI64[1]; + uint8_t gtSimdScalableIndexU8[8]; + uint16_t gtSimdScalableIndexU16[4]; + uint32_t gtSimdScalableIndexU32[2]; + uint64_t gtSimdScalableIndexU64[1]; uint64_t gtSimdScalableIndex; }; union { - uint8_t gtSimdScalableStepU8[8]; - uint32_t gtSimdScalableStepU32[2]; float gtSimdScalableStepF32[2]; double gtSimdScalableStepF64[1]; + int8_t gtSimdScalableStepI8[8]; + int16_t gtSimdScalableStepI16[4]; + int32_t gtSimdScalableStepI32[2]; + int64_t gtSimdScalableStepI64[1]; + uint8_t gtSimdScalableStepU8[8]; + uint16_t gtSimdScalableStepU16[4]; + uint32_t gtSimdScalableStepU32[2]; + uint64_t gtSimdScalableStepU64[1]; uint64_t gtSimdScalableStep; }; From 9ba8e2ecd3f610984dad183fc517b2a4d61e3f7d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 12:25:14 +0100 Subject: [PATCH 23/58] Fix use of simdmaskvalue_t --- src/coreclr/jit/simd.h | 33 ---------------------- src/coreclr/jit/valuenum.cpp | 17 ++++++------ src/coreclr/jit/valuenum.h | 54 ++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index b1c89092d49074..79893821140b02 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2246,39 +2246,6 @@ struct simdmaskscalable_t bool IsAllBitsSet(var_types simdBaseType) const; }; -struct simdmaskvalue_t -{ - uint8_t isScalable; - simdmaskscalable_t scalable; - simdmask_t fixed; - - static simdmaskvalue_t FromFixed(const simdmask_t& mask) - { - simdmaskvalue_t result = {}; - - result.isScalable = 0; - result.fixed = mask; - - return result; - } - - static simdmaskvalue_t FromScalable(const simdmaskscalable_t& mask) - { - simdmaskvalue_t result = {}; - - result.isScalable = 1; - result.scalable = mask; - result.fixed = simdmask_t::Zero(); - - return result; - } - - bool IsScalable() const - { - return isScalable != 0; - } -}; - static_assert(sizeof(simdmask_t) >= sizeof(simdmaskscalable_t)); bool EvaluateSimdCvtScalableVectorToMask(var_types baseType, simdmaskscalable_t* maskCon, simdscalable_t vecCon); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index bb353b39e65a88..d8e1a3c8b06f0a 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -1918,15 +1918,17 @@ ValueNum ValueNumStore::VNForSimdScalableCon(const simdscalable_t& cnsVal) #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) -ValueNum ValueNumStore::VNForSimdMaskScalableCon(const simdmaskscalable_t& cnsVal) +ValueNum ValueNumStore::VNForSimdMaskCon(const simdmask_t& cnsVal) { - return VnForConst(simdmaskvalue_t::FromScalable(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); + return VnForConst(simdmaskvalue_t::FromFixed(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); } -ValueNum ValueNumStore::VNForSimdMaskCon(const simdmask_t& cnsVal) +#if defined(TARGET_ARM64) +ValueNum ValueNumStore::VNForSimdMaskScalableCon(const simdmaskscalable_t& cnsVal) { - return VnForConst(simdmaskvalue_t::FromFixed(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); + return VnForConst(simdmaskvalue_t::FromScalable(cnsVal), GetSimdMaskCnsMap(), TYP_MASK); } +#endif // TARGET_ARM64 #endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD @@ -10921,17 +10923,16 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) case TYP_MASK: { simdmaskvalue_t cnsVal = GetConstantSimdMaskValue(vn); - if (cnsVal.IsScalable() + #if defined(TARGET_ARM64) && defined(DEBUG) - && JitConfig.JitUseScalableVectorT() -#endif - ) + if (cnsVal.IsScalable() && JitConfig.JitUseScalableVectorT()) { printf("SimdMaskScalableCns[base:%s idx:%u]", varTypeName(cnsVal.scalable.gtSimdMaskScalableBaseType), cnsVal.scalable.gtSimdMaskScalableIndex); } else +#endif // TARGET_ARM64 && DEBUG { printf("SimdMaskCns[0x%08x, 0x%08x]", cnsVal.fixed.u32[0], cnsVal.fixed.u32[1]); } diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 40c513af003d68..720f71f2eba0fd 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -158,6 +158,7 @@ // Defines the type SmallHashTable. #include "compiler.h" #include "smallhash.h" +#include "simd.h" // A "ValueNumStore" represents the "universe" of value numbers used in a single // compilation. @@ -243,6 +244,50 @@ static const var_types TYP_MEM = TYP_UNDEF; // We will use this placeholder type for memory maps representing "the heap" (GcHeap/ByrefExposed). static const var_types TYP_HEAP = TYP_UNKNOWN; +#if defined(FEATURE_MASKED_HW_INTRINSICS) +// Wrapper to hold a mask. The VN can only store a single type for each TYP (in order for +// VarTypConv and others to work), but on ARM64 the mask can be scalable or fixed. +struct simdmaskvalue_t +{ + uint8_t isScalable; + simdmask_t fixed; +#if defined(TARGET_ARM64) + simdmaskscalable_t scalable; +#endif // TARGET_ARM64 + + static simdmaskvalue_t FromFixed(const simdmask_t& mask) + { + simdmaskvalue_t result = {}; + + result.isScalable = 0; + result.fixed = mask; + + return result; + } + +#if defined(TARGET_ARM64) + static simdmaskvalue_t FromScalable(const simdmaskscalable_t& mask) + { + simdmaskvalue_t result = {}; + + result.isScalable = 1; + result.scalable = mask; + result.fixed = simdmask_t::Zero(); + + return result; + } +#endif // TARGET_ARM64 + + bool IsScalable() const + { +#if !defined(TARGET_ARM64) + assert(isScalable == 0); +#endif // !TARGET_ARM64 + return isScalable != 0; + } +}; +#endif // FEATURE_MASKED_HW_INTRINSICS + class ValueNumStore { @@ -1967,23 +2012,30 @@ class ValueNumStore } return m_simdScalableCnsMap; } +#endif // TARGET_XARCH +#if defined(FEATURE_MASKED_HW_INTRINSICS) struct SimdMaskPrimitiveKeyFuncs : public JitKeyFuncsDefEquals { static bool Equals(const simdmaskvalue_t& x, const simdmaskvalue_t& y) { +#if defined(TARGET_ARM64) if (x.IsScalable() != y.IsScalable()) { return false; } return x.IsScalable() ? (x.scalable == y.scalable) : (x.fixed == y.fixed); +#else + return x.fixed == y.fixed; +#endif // TARGET_ARM64 } static unsigned GetHashCode(const simdmaskvalue_t& val) { unsigned hash = 0; +#if defined(TARGET_ARM64) if (val.IsScalable()) { hash = static_cast(hash ^ val.isScalable); @@ -1991,6 +2043,7 @@ class ValueNumStore hash = static_cast(hash ^ val.scalable.gtSimdMaskScalableIndex); } else +#endif // TARGET_ARM64 { hash = static_cast(hash ^ val.isScalable); hash = static_cast(hash ^ val.fixed.u32[0]); @@ -2012,6 +2065,7 @@ class ValueNumStore return m_simdMaskCnsMap; } #endif // FEATURE_MASKED_HW_INTRINSICS + #endif // FEATURE_SIMD template From bf0123376e483a4122a17f8b78b1ab4cab583c9c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 14:42:19 +0100 Subject: [PATCH 24/58] Fix constants codegen --- src/coreclr/jit/codegenarm64.cpp | 58 ++++++++++++++++---------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 26265784362ea8..a2b998b211b727 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2408,63 +2408,63 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre return addrReg; }; - ssize_t index = 0; - ssize_t step = 0; + ssize_t index = -1; + ssize_t step = -1; switch (simdVal.gtSimdScalableBaseType) { case TYP_BYTE: { - index = (size_t)simdVal.gtSimdScalableIndexU8[0]; - step = (size_t)simdVal.gtSimdScalableStepU8[0]; + index = (size_t)simdVal.gtSimdScalableIndexI8[0]; + step = (size_t)simdVal.gtSimdScalableStepI8[0]; break; } case TYP_SHORT: { - index = (size_t)simdVal.gtSimdScalableIndexU16[0]; - step = (size_t)simdVal.gtSimdScalableStepU16[0]; + index = (size_t)simdVal.gtSimdScalableIndexI16[0]; + step = (size_t)simdVal.gtSimdScalableStepI16[0]; break; } case TYP_INT: { - index = (size_t)simdVal.gtSimdScalableIndexU32[0]; - step = (size_t)simdVal.gtSimdScalableStepU32[0]; + index = (size_t)simdVal.gtSimdScalableIndexI32[0]; + step = (size_t)simdVal.gtSimdScalableStepI32[0]; break; } case TYP_LONG: { - index = (size_t)simdVal.gtSimdScalableIndexU64[0]; - step = (size_t)simdVal.gtSimdScalableStepU64[0]; + index = (size_t)simdVal.gtSimdScalableIndexI64[0]; + step = (size_t)simdVal.gtSimdScalableStepI64[0]; break; } case TYP_UBYTE: { - index = (size_t)simdVal.gtSimdScalableIndexI8[0]; - step = (size_t)simdVal.gtSimdScalableStepI8[0]; + index = (size_t)simdVal.gtSimdScalableIndexU8[0]; + step = (size_t)simdVal.gtSimdScalableStepU8[0]; break; } case TYP_USHORT: { - index = (size_t)simdVal.gtSimdScalableIndexI16[0]; - step = (size_t)simdVal.gtSimdScalableStepI16[0]; + index = (size_t)simdVal.gtSimdScalableIndexU16[0]; + step = (size_t)simdVal.gtSimdScalableStepU16[0]; break; } case TYP_UINT: { - index = (size_t)simdVal.gtSimdScalableIndexI32[0]; - step = (size_t)simdVal.gtSimdScalableStepI32[0]; + index = (size_t)simdVal.gtSimdScalableIndexU32[0]; + step = (size_t)simdVal.gtSimdScalableStepU32[0]; break; } case TYP_ULONG: { - index = (size_t)simdVal.gtSimdScalableIndexI64[0]; - step = (size_t)simdVal.gtSimdScalableStepI64[0]; + index = (size_t)simdVal.gtSimdScalableIndexU64[0]; + step = (size_t)simdVal.gtSimdScalableStepU64[0]; break; } @@ -2483,17 +2483,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, index, opt); } - else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && - emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF64[0])) + else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) && + emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF32[0])) { emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, - simdVal.gtSimdScalableIndexF64[0], INS_OPTS_SCALABLE_S); + simdVal.gtSimdScalableIndexF32[0], INS_OPTS_SCALABLE_S); } - else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) && - emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF32[0])) + else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && + emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF64[0])) { emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, - static_cast(simdVal.gtSimdScalableIndexF32[0]), + static_cast(simdVal.gtSimdScalableIndexF64[0]), INS_OPTS_SCALABLE_D); } else @@ -2546,17 +2546,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, index); } - else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && - emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) - { - emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]); - } else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) && emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF32[0])) { emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, static_cast(simdVal.gtSimdScalableIndexF32[0])); } + else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && + emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) + { + emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]); + } else { regNumber indexReg = loadConstantHelper(index); From d34c1e667f6d59883a50fbc5ac96671c18c8df48 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 14:50:16 +0100 Subject: [PATCH 25/58] generate true mask with FFs --- src/coreclr/jit/hwintrinsicarm64.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 28413cc52fca0a..abb6503fd0e312 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2966,7 +2966,26 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { if ((pattern == SVE_PATTERN_ALL) || (pattern == SVE_PATTERN_POW2)) { - retNode = gtNewSimdVconNode(retType, simdBaseType, SimdScalableRepeated, 1); + uint64_t allOnes = 0; + switch (genTypeSize(simdBaseType)) + { + case 1: + allOnes = 0xFF; + break; + case 2: + allOnes = 0xFFFF; + break; + case 4: + allOnes = 0xFFFFFFFFull; + break; + case 8: + allOnes = 0xFFFFFFFFFFFFFFFFull; + break; + default: + unreached(); + } + + retNode = gtNewSimdVconNode(retType, simdBaseType, SimdScalableRepeated, allOnes); break; } } From c3dfe81b9e27efabbe3bbdcfab083c4c321c2e6a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 14:54:40 +0100 Subject: [PATCH 26/58] Special case zero in valuenum --- src/coreclr/jit/valuenum.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 720f71f2eba0fd..8f05996f12e937 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -2039,8 +2039,13 @@ class ValueNumStore if (val.IsScalable()) { hash = static_cast(hash ^ val.isScalable); - hash = static_cast(hash ^ val.scalable.gtSimdMaskScalableBaseType); - hash = static_cast(hash ^ val.scalable.gtSimdMaskScalableIndex); + // simdmaskscalable_t::operator== treats all-zero scalable masks as equal + // regardless of base type, so canonicalize that case in the hash as well. + if (!val.scalable.IsZero()) + { + hash = static_cast(hash ^ val.scalable.gtSimdMaskScalableBaseType); + hash = static_cast(hash ^ val.scalable.gtSimdMaskScalableIndex); + } } else #endif // TARGET_ARM64 From 9897c975ae44e3cb5030f0e88c086985e7762e1c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 15:19:42 +0100 Subject: [PATCH 27/58] Fix codegen casting --- src/coreclr/jit/codegenarm64.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index a2b998b211b727..c36f88b86e5f6f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2414,57 +2414,57 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { case TYP_BYTE: { - index = (size_t)simdVal.gtSimdScalableIndexI8[0]; - step = (size_t)simdVal.gtSimdScalableStepI8[0]; + index = static_cast(simdVal.gtSimdScalableIndexI8[0]); + step = static_cast(simdVal.gtSimdScalableStepI8[0]); break; } case TYP_SHORT: { - index = (size_t)simdVal.gtSimdScalableIndexI16[0]; - step = (size_t)simdVal.gtSimdScalableStepI16[0]; + index = static_cast(simdVal.gtSimdScalableIndexI16[0]); + step = static_cast(simdVal.gtSimdScalableStepI16[0]); break; } case TYP_INT: { - index = (size_t)simdVal.gtSimdScalableIndexI32[0]; - step = (size_t)simdVal.gtSimdScalableStepI32[0]; + index = static_cast(simdVal.gtSimdScalableIndexI32[0]); + step = static_cast(simdVal.gtSimdScalableStepI32[0]); break; } case TYP_LONG: { - index = (size_t)simdVal.gtSimdScalableIndexI64[0]; - step = (size_t)simdVal.gtSimdScalableStepI64[0]; + index = static_cast(simdVal.gtSimdScalableIndexI64[0]); + step = static_cast(simdVal.gtSimdScalableStepI64[0]); break; } case TYP_UBYTE: { - index = (size_t)simdVal.gtSimdScalableIndexU8[0]; - step = (size_t)simdVal.gtSimdScalableStepU8[0]; + index = static_cast(simdVal.gtSimdScalableIndexU8[0]); + step = static_cast(simdVal.gtSimdScalableStepU8[0]); break; } case TYP_USHORT: { - index = (size_t)simdVal.gtSimdScalableIndexU16[0]; - step = (size_t)simdVal.gtSimdScalableStepU16[0]; + index = static_cast(simdVal.gtSimdScalableIndexU16[0]); + step = static_cast(simdVal.gtSimdScalableStepU16[0]); break; } case TYP_UINT: { - index = (size_t)simdVal.gtSimdScalableIndexU32[0]; - step = (size_t)simdVal.gtSimdScalableStepU32[0]; + index = static_cast(simdVal.gtSimdScalableIndexU32[0]); + step = static_cast(simdVal.gtSimdScalableStepU32[0]); break; } case TYP_ULONG: { - index = (size_t)simdVal.gtSimdScalableIndexU64[0]; - step = (size_t)simdVal.gtSimdScalableStepU64[0]; + index = static_cast(simdVal.gtSimdScalableIndexU64[0]); + step = static_cast(simdVal.gtSimdScalableStepU64[0]); break; } From 691aaef5dfaa6a2e11e2c2121afded94eb620d62 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 15:21:37 +0100 Subject: [PATCH 28/58] handle large bits in isValidSimm --- src/coreclr/jit/emitarm64.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index e6fe646882157c..3d9a9cc2c10ab9 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1073,8 +1073,18 @@ static bool canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI = nullpt template static bool isValidSimm(ssize_t value) { - constexpr ssize_t max = 1 << (bits - 1); - return (-max <= value) && (value < max); + constexpr size_t ssize_t_bits = sizeof(ssize_t) * BITS_PER_BYTE; + static_assert(bits > 0); + static_assert(bits <= ssize_t_bits); + if constexpr (bits == ssize_t_bits) + { + return true; + } + else + { + constexpr size_t max = size_t{1} << (bits - 1); + return (-static_cast(max) <= value) && (value < static_cast(max)); + } } // Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits. From f24b9502a9c9a285989ed703f36e293f1ed4c4bf Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 15:22:42 +0100 Subject: [PATCH 29/58] Remove redundant ifdef --- src/coreclr/jit/valuenum.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 8f05996f12e937..6241d46fef2262 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -2484,7 +2484,6 @@ FORCEINLINE simdmask_t ValueNumStore::ConstantValueInternal(ValueNum return SafeGetConstantValue(c, offset); } -#if defined(FEATURE_MASKED_HW_INTRINSICS) template <> FORCEINLINE simdmaskvalue_t ValueNumStore::ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce)) { @@ -2498,7 +2497,6 @@ FORCEINLINE simdmaskvalue_t ValueNumStore::ConstantValueInternal(c, offset); } -#endif #if defined(TARGET_ARM64) template <> From f1cefa5969f796e45cef204820303856cb305c6d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 16:17:29 +0100 Subject: [PATCH 30/58] Copy floats --- src/coreclr/jit/codegenarm64.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index c36f88b86e5f6f..9c6812c71614c4 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2468,9 +2468,30 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre break; } + case TYP_FLOAT: + { + uint32_t indexBits = 0; + uint32_t stepBits = 0; + memcpy(&indexBits, &simdVal.gtSimdScalableIndexF32[0], sizeof(indexBits)); + memcpy(&stepBits, &simdVal.gtSimdScalableStepF32[0], sizeof(stepBits)); + index = static_cast(indexBits); + step = static_cast(stepBits); + break; + } + case TYP_DOUBLE: + { + uint64_t indexBits = 0; + uint64_t stepBits = 0; + memcpy(&indexBits, &simdVal.gtSimdScalableIndexF64[0], sizeof(indexBits)); + memcpy(&stepBits, &simdVal.gtSimdScalableStepF64[0], sizeof(stepBits)); + index = static_cast(indexBits); + step = static_cast(stepBits); + break; + } + default: { - assert(varTypeIsFloating(simdVal.gtSimdScalableBaseType)); + unreached(); } } From d7f5ac0851b3bc4f053c7031ba2612b6ad7f1a5b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 16:30:11 +0100 Subject: [PATCH 31/58] document gtSimdMaskScalableIndex --- src/coreclr/jit/simd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 79893821140b02..55aa72a323416f 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2207,6 +2207,7 @@ static_assert(sizeof(simd_t) >= sizeof(simdscalable_t)); struct simdmaskscalable_t { var_types gtSimdMaskScalableBaseType; + // Only 0 and 1 are valid values uint8_t gtSimdMaskScalableIndex; bool operator==(const simdmaskscalable_t& other) const From f6ee722f87bff6ed5153d17c1b6a0c8c3ac4c307 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 15 May 2026 16:31:04 +0100 Subject: [PATCH 32/58] initialise index in BroadcastConstantToSimdScalable --- src/coreclr/jit/simd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 55aa72a323416f..454b91b4e303fb 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2258,6 +2258,7 @@ void BroadcastConstantToSimdScalable(simdscalable_t* result, var_types baseType, { result->gtSimdScalableBaseType = baseType; result->gtSimdScalableKind = SimdScalableRepeated; + result->gtSimdScalableIndex = 0; memcpy(&result->gtSimdScalableIndex, &arg0, sizeof(TBase)); } From bd26a42efb8c56ca4e32dc542dfd8385dadb50c5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 May 2026 09:47:02 +0100 Subject: [PATCH 33/58] match lsra to codegen --- src/coreclr/jit/codegenarm64.cpp | 20 ++--- src/coreclr/jit/lsraarm64.cpp | 132 +++++++++++++++++++++++++------ src/coreclr/jit/simd.h | 2 +- 3 files changed, 121 insertions(+), 33 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 9c6812c71614c4..6221a8e8c7497c 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2389,7 +2389,8 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre case TYP_SIMD: { simdscalable_t simdVal = vecCon->gtSimdScalableVal; - insOpts opt = emitter::optGetSveInsOpt(emitTypeSize(simdVal.gtSimdScalableBaseType)); + var_types baseType = simdVal.gtSimdScalableBaseType; + insOpts opt = emitter::optGetSveInsOpt(emitTypeSize(baseType)); emitAttr emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; auto loadConstantHelper = [&](ssize_t constValue) -> regNumber { @@ -2410,7 +2411,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre ssize_t index = -1; ssize_t step = -1; - switch (simdVal.gtSimdScalableBaseType) + switch (baseType) { case TYP_BYTE: { @@ -2499,18 +2500,18 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { case SimdScalableRepeated: { - if (varTypeIsIntegral(simdVal.gtSimdScalableBaseType) && + if (varTypeIsIntegral(baseType) && (emitter::isValidSimm<8>(index) || emitter::isValidSimm_MultipleOf<8, 256>(index))) { emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, index, opt); } - else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) && + else if ((baseType == TYP_FLOAT) && emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF32[0])) { emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, simdVal.gtSimdScalableIndexF32[0], INS_OPTS_SCALABLE_S); } - else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && + else if ((baseType == TYP_DOUBLE) && emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF64[0])) { emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, @@ -2529,7 +2530,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre case SimdScalableSequence: { // FP sequences should have been imported into a set of nodes - assert(varTypeIsIntegral(simdVal.gtSimdScalableBaseType)); + assert(varTypeIsIntegral(baseType)); if (emitter::isValidSimm<5>(index) && emitter::isValidSimm<5>(step)) { @@ -2562,18 +2563,17 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Use NEON instructions to load the constant (to avoid using predicates) - if (varTypeIsIntegral(simdVal.gtSimdScalableBaseType) && - emitter::emitIns_valid_imm_for_mov(index, emitSize)) + if (varTypeIsIntegral(baseType) && emitter::emitIns_valid_imm_for_mov(index, emitSize)) { emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, index); } - else if ((simdVal.gtSimdScalableBaseType == TYP_FLOAT) && + else if ((baseType == TYP_FLOAT) && emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF32[0])) { emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, static_cast(simdVal.gtSimdScalableIndexF32[0])); } - else if ((simdVal.gtSimdScalableBaseType == TYP_DOUBLE) && + else if ((baseType == TYP_DOUBLE) && emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) { emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 2a760bb9da00f2..b21c4ff3eb4f7e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -751,35 +751,96 @@ int LinearScan::BuildNode(GenTree* tree) } else if (vecCon->TypeIs(TYP_SIMD)) { + simdscalable_t simdVal = vecCon->gtSimdScalableVal; + var_types baseType = simdVal.gtSimdScalableBaseType; + bool canEncodeScalar = false; + + ssize_t index = -1; + ssize_t step = -1; + switch (simdVal.gtSimdScalableBaseType) + { + case TYP_BYTE: + { + index = static_cast(simdVal.gtSimdScalableIndexI8[0]); + step = static_cast(simdVal.gtSimdScalableStepI8[0]); + break; + } + + case TYP_SHORT: + { + index = static_cast(simdVal.gtSimdScalableIndexI16[0]); + step = static_cast(simdVal.gtSimdScalableStepI16[0]); + break; + } + + case TYP_INT: + { + index = static_cast(simdVal.gtSimdScalableIndexI32[0]); + step = static_cast(simdVal.gtSimdScalableStepI32[0]); + break; + } + + case TYP_LONG: + { + index = static_cast(simdVal.gtSimdScalableIndexI64[0]); + step = static_cast(simdVal.gtSimdScalableStepI64[0]); + break; + } + + case TYP_UBYTE: + { + index = static_cast(simdVal.gtSimdScalableIndexU8[0]); + step = static_cast(simdVal.gtSimdScalableStepU8[0]); + break; + } + + case TYP_USHORT: + { + index = static_cast(simdVal.gtSimdScalableIndexU16[0]); + step = static_cast(simdVal.gtSimdScalableStepU16[0]); + break; + } + + case TYP_UINT: + { + index = static_cast(simdVal.gtSimdScalableIndexU32[0]); + step = static_cast(simdVal.gtSimdScalableStepU32[0]); + break; + } + + case TYP_ULONG: + { + index = static_cast(simdVal.gtSimdScalableIndexU64[0]); + step = static_cast(simdVal.gtSimdScalableStepU64[0]); + break; + } + + default: + { + unreached(); + } + } + // If the constant doesn't fit into the instructions, then temps will be required switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) { case SimdScalableRepeated: { - bool canEncodeScalar = false; - var_types baseType = vecCon->gtSimdScalableVal.gtSimdScalableBaseType; - if (varTypeIsFloating(baseType)) + if (varTypeIsIntegral(baseType)) + { + canEncodeScalar = + emitter::isValidSimm<8>(index) || emitter::isValidSimm_MultipleOf<8, 256>(index); + } + else if (baseType == TYP_FLOAT) { - if (baseType == TYP_FLOAT) - { - float value; - memcpy(&value, &vecCon->gtSimdScalableVal.gtSimdScalableIndex, sizeof(value)); - canEncodeScalar = emitter::emitIns_valid_imm_for_fmov(value); - } - else - { - assert(baseType == TYP_DOUBLE); - double value; - memcpy(&value, &vecCon->gtSimdScalableVal.gtSimdScalableIndex, sizeof(value)); - canEncodeScalar = emitter::emitIns_valid_imm_for_fmov(value); - } + canEncodeScalar = emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF32[0]); } else { - canEncodeScalar = - emitter::emitIns_valid_imm_for_mov(vecCon->gtSimdScalableVal.gtSimdScalableIndex, - emitActualTypeSize(baseType)); + assert(baseType == TYP_DOUBLE); + canEncodeScalar = emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF64[0]); } + if (!canEncodeScalar) { buildInternalIntRegisterDefForNode(tree); @@ -789,22 +850,49 @@ int LinearScan::BuildNode(GenTree* tree) } case SimdScalableSequence: - if (!emitter::isValidSimm<5>(vecCon->gtSimdScalableVal.gtSimdScalableIndex)) + { + if (!emitter::isValidSimm<5>(index)) { + canEncodeScalar = false; buildInternalIntRegisterDefForNode(tree); } - if (!emitter::isValidSimm<5>(vecCon->gtSimdScalableVal.gtSimdScalableStep)) + + if (!emitter::isValidSimm<5>(step)) { + canEncodeScalar = false; buildInternalIntRegisterDefForNode(tree); } + + if (!canEncodeScalar) + { + buildInternalRegisterUses(); + } break; + } case SimdScalableScalar: - if (!emitter::isValidSimm<8>(vecCon->gtSimdScalableVal.gtSimdScalableIndex)) + { + if (varTypeIsIntegral(baseType)) + { + canEncodeScalar = emitter::emitIns_valid_imm_for_mov(index, emitActualTypeSize(baseType)); + } + else if (baseType == TYP_FLOAT) + { + canEncodeScalar = emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF32[0]); + } + else + { + assert(baseType == TYP_DOUBLE); + canEncodeScalar = emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0]); + } + + if (!canEncodeScalar) { buildInternalIntRegisterDefForNode(tree); + buildInternalRegisterUses(); } break; + } default: unreached(); diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 454b91b4e303fb..ced94675eeb2b7 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2208,7 +2208,7 @@ struct simdmaskscalable_t { var_types gtSimdMaskScalableBaseType; // Only 0 and 1 are valid values - uint8_t gtSimdMaskScalableIndex; + uint8_t gtSimdMaskScalableIndex; bool operator==(const simdmaskscalable_t& other) const { From ff1728159b1395301f9a27468485a2d41ac373f0 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 May 2026 11:27:23 +0100 Subject: [PATCH 34/58] initialise canEncodeScalar --- src/coreclr/jit/lsraarm64.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index b21c4ff3eb4f7e..ea1f219c5ce29a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -851,6 +851,8 @@ int LinearScan::BuildNode(GenTree* tree) case SimdScalableSequence: { + canEncodeScalar = true; + if (!emitter::isValidSimm<5>(index)) { canEncodeScalar = false; From 7af9cee281dc162a600ba35112311899d2aab630 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 May 2026 11:40:43 +0100 Subject: [PATCH 35/58] Check for invalid index in EvaluateSimdCvtScalableMaskToVector --- src/coreclr/jit/simd.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index bb93889adf50e4..ef03957c5dba22 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -926,6 +926,13 @@ bool EvaluateSimdCvtScalableMaskToVector(var_types baseType, simdscalable_t* vec return false; } + // Only zero and one are valid + if (maskCon.gtSimdMaskScalableIndex != 1) + { + assert(false); + return false; + } + vecCon->gtSimdScalableBaseType = baseType; vecCon->gtSimdScalableKind = SimdScalableRepeated; vecCon->gtSimdScalableStep = 0; From e5f98294e89667f427fadf36449502f44aee2c45 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 May 2026 12:06:10 +0100 Subject: [PATCH 36/58] Use data version of gtNewVconNode --- src/coreclr/jit/assertionprop.cpp | 4 +--- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/gentree.cpp | 6 +----- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 4d2f951481518b..e4c2fc08f42d6b 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -3307,9 +3307,7 @@ GenTree* Compiler::optConstantAssertionProp(const AssertionDsc& curAssertion, assert(genTypeSize(tree->TypeGet()) == curAssertion.GetOp2().GetSimdSize()); // We can't bash a LCL_VAR into a GenTreeVecCon (different node size), so allocate a fresh node. - GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - memcpy(&vecCon->gtSimdVal, curAssertion.GetOp2().GetSimdConstant(), genTypeSize(tree->TypeGet())); - newTree = vecCon; + newTree = gtNewVconNode(tree->TypeGet(), curAssertion.GetOp2().GetSimdConstant()); break; } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 332d155b858bf8..a7272106c09abf 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3272,7 +3272,7 @@ class Compiler #if defined(FEATURE_SIMD) GenTreeVecCon* gtNewVconNode(var_types type); - GenTreeVecCon* gtNewVconNode(var_types type, void* data); + GenTreeVecCon* gtNewVconNode(var_types type, const void* data); #if defined(TARGET_ARM64) GenTreeVecCon* gtNewSimdVconNode(var_types type, var_types baseType, SimdScalableKind kind, uint64_t index, uint64_t step = 0); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3489fda8f54476..0db7ee86ae1139 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9378,12 +9378,8 @@ GenTreeVecCon* Compiler::gtNewVconNode(var_types type) return vecCon; } -GenTreeVecCon* Compiler::gtNewVconNode(var_types type, void* data) +GenTreeVecCon* Compiler::gtNewVconNode(var_types type, const void* data) { -#if defined(TARGET_ARM64) - assert(type != TYP_SIMD); -#endif // defined(TARGET_ARM64) - GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type); memcpy(&vecCon->gtSimdVal, data, genTypeSize(type)); return vecCon; From 878ea9dab05093e9285f835df0dfe62089e9d1b1 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 May 2026 12:08:58 +0100 Subject: [PATCH 37/58] allow default option in lsra switch --- src/coreclr/jit/lsraarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ea1f219c5ce29a..f1856bc353d48a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -817,7 +817,7 @@ int LinearScan::BuildNode(GenTree* tree) default: { - unreached(); + break; } } From c17777269f2830c1c4e9044d74fffa78aed81420 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 May 2026 12:15:50 +0100 Subject: [PATCH 38/58] initialise step in BroadcastConstantToSimdScalable --- src/coreclr/jit/simd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index ced94675eeb2b7..4e3634e123bd82 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2258,6 +2258,7 @@ void BroadcastConstantToSimdScalable(simdscalable_t* result, var_types baseType, { result->gtSimdScalableBaseType = baseType; result->gtSimdScalableKind = SimdScalableRepeated; + result->gtSimdScalableStep = 0; result->gtSimdScalableIndex = 0; memcpy(&result->gtSimdScalableIndex, &arg0, sizeof(TBase)); } From 024a8e325918186ca1904794a70429b0f78d40e2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 18 May 2026 12:18:01 +0100 Subject: [PATCH 39/58] noway assert in GetConstantSimdMask --- src/coreclr/jit/valuenum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index d8e1a3c8b06f0a..d8b401289926e8 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -4191,7 +4191,7 @@ simdmaskvalue_t ValueNumStore::GetConstantSimdMaskValue(ValueNum argVN) simdmask_t ValueNumStore::GetConstantSimdMask(ValueNum argVN) { simdmaskvalue_t storage = GetConstantSimdMaskValue(argVN); - assert(!storage.IsScalable()); + noway_assert(!storage.IsScalable()); return storage.fixed; } From 4d639e4be59a67fe432fe9d1d23c583b66810b08 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 19 May 2026 11:58:28 +0100 Subject: [PATCH 40/58] Call overload gtNewMskConNode from gtNewMskConNode --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0db7ee86ae1139..362ae2f279f9b4 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9413,7 +9413,7 @@ GenTreeMskCon* Compiler::gtNewMskConNode(var_types type) #if defined(TARGET_ARM64) GenTreeMskCon* Compiler::gtNewMskConNode(var_types type, var_types baseType, bool index) { - GenTreeMskCon* mskCon = new (this, GT_CNS_MSK) GenTreeMskCon(type); + GenTreeMskCon* mskCon = gtNewMskConNode(type); mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableBaseType = baseType; mskCon->gtSimdScalableMaskVal.gtSimdMaskScalableIndex = index; return mskCon; From adb6b9f2cfcc136e6a44c747487f5dc116747410 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 28 May 2026 10:55:48 +0100 Subject: [PATCH 41/58] Make VN isScalable Arm64 only --- src/coreclr/jit/valuenum.cpp | 2 ++ src/coreclr/jit/valuenum.h | 21 +++++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index d8b401289926e8..e4a7f4eae33760 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -4191,7 +4191,9 @@ simdmaskvalue_t ValueNumStore::GetConstantSimdMaskValue(ValueNum argVN) simdmask_t ValueNumStore::GetConstantSimdMask(ValueNum argVN) { simdmaskvalue_t storage = GetConstantSimdMaskValue(argVN); +#if defined(TARGET_ARM64) noway_assert(!storage.IsScalable()); +#endif // TARGET_ARM64 return storage.fixed; } diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 6241d46fef2262..e5fe77f3bfaa31 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -249,18 +249,20 @@ static const var_types TYP_HEAP = TYP_UNKNOWN; // VarTypConv and others to work), but on ARM64 the mask can be scalable or fixed. struct simdmaskvalue_t { - uint8_t isScalable; - simdmask_t fixed; #if defined(TARGET_ARM64) + uint8_t isScalable; simdmaskscalable_t scalable; #endif // TARGET_ARM64 + simdmask_t fixed; static simdmaskvalue_t FromFixed(const simdmask_t& mask) { simdmaskvalue_t result = {}; +#if defined(TARGET_ARM64) result.isScalable = 0; - result.fixed = mask; +#endif // TARGET_ARM64 + result.fixed = mask; return result; } @@ -276,15 +278,13 @@ struct simdmaskvalue_t return result; } -#endif // TARGET_ARM64 - bool IsScalable() const + inline bool IsScalable() const { -#if !defined(TARGET_ARM64) assert(isScalable == 0); -#endif // !TARGET_ARM64 return isScalable != 0; } +#endif // TARGET_ARM64 }; #endif // FEATURE_MASKED_HW_INTRINSICS @@ -2048,12 +2048,15 @@ class ValueNumStore } } else -#endif // TARGET_ARM64 { hash = static_cast(hash ^ val.isScalable); hash = static_cast(hash ^ val.fixed.u32[0]); hash = static_cast(hash ^ val.fixed.u32[1]); } +#else + hash = static_cast(hash ^ val.fixed.u32[0]); + hash = static_cast(hash ^ val.fixed.u32[1]); +#endif // TARGET_ARM64 return hash; } @@ -2367,7 +2370,9 @@ FORCEINLINE simdmask_t ValueNumStore::SafeGetConstantValue(Chunk* c, { assert(c->m_typ == TYP_MASK); simdmaskvalue_t storage = SafeGetConstantValue(c, offset); +#if defined(TARGET_ARM64) assert(!storage.IsScalable()); +#endif // TARGET_ARM64 return storage.fixed; } From 3a2a6f5bbad2ec23bd5c548800cae76659108c75 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 28 May 2026 11:12:43 +0100 Subject: [PATCH 42/58] Do not canonicalize zeros for gtHashValue --- src/coreclr/jit/gentree.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 362ae2f279f9b4..08b02a9d7d899d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -3350,15 +3350,6 @@ unsigned Compiler::gtHashValue(GenTree* tree) { simdscalable_t simdVal = vecCon->gtSimdScalableVal; - // Canonicalize zeros so hash aligns with equality, which treats all-zero encodings as equal. - if (simdVal.IsZero()) - { - simdVal.gtSimdScalableBaseType = TYP_BYTE; - simdVal.gtSimdScalableKind = SimdScalableRepeated; - simdVal.gtSimdScalableIndex = 0; - simdVal.gtSimdScalableStep = 0; - } - add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableKind); add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableBaseType); add = genTreeHashAdd(ulo32(add), simdVal.gtSimdScalableIndexU32[0]); From bc7b44fb2baeffdb8111558e85c2c5bc16ae3f4d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 28 May 2026 11:15:23 +0100 Subject: [PATCH 43/58] bool type for isScalable --- src/coreclr/jit/valuenum.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index e5fe77f3bfaa31..8f131e0d906591 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -250,7 +250,7 @@ static const var_types TYP_HEAP = TYP_UNKNOWN; struct simdmaskvalue_t { #if defined(TARGET_ARM64) - uint8_t isScalable; + bool isScalable; simdmaskscalable_t scalable; #endif // TARGET_ARM64 simdmask_t fixed; @@ -260,7 +260,7 @@ struct simdmaskvalue_t simdmaskvalue_t result = {}; #if defined(TARGET_ARM64) - result.isScalable = 0; + result.isScalable = false; #endif // TARGET_ARM64 result.fixed = mask; @@ -272,7 +272,7 @@ struct simdmaskvalue_t { simdmaskvalue_t result = {}; - result.isScalable = 1; + result.isScalable = true; result.scalable = mask; result.fixed = simdmask_t::Zero(); @@ -281,8 +281,7 @@ struct simdmaskvalue_t inline bool IsScalable() const { - assert(isScalable == 0); - return isScalable != 0; + return isScalable; } #endif // TARGET_ARM64 }; From 8d69cc9bbf7981e6584ca77d198dccab24a30c32 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 28 May 2026 11:21:18 +0100 Subject: [PATCH 44/58] Remove config check when printing valuenum constants --- src/coreclr/jit/valuenum.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index e4a7f4eae33760..67f9e9c0d0d881 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10927,7 +10927,7 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) simdmaskvalue_t cnsVal = GetConstantSimdMaskValue(vn); #if defined(TARGET_ARM64) && defined(DEBUG) - if (cnsVal.IsScalable() && JitConfig.JitUseScalableVectorT()) + if (cnsVal.IsScalable()) { printf("SimdMaskScalableCns[base:%s idx:%u]", varTypeName(cnsVal.scalable.gtSimdMaskScalableBaseType), From b856d3100e7124ae12c1954f7f1bba9d8a848df5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 29 May 2026 15:08:00 +0100 Subject: [PATCH 45/58] copy simdval directly in valuenu --- src/coreclr/jit/valuenum.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 67f9e9c0d0d881..e60c11c54fc0c0 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12525,8 +12525,7 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) #elif defined(TARGET_ARM64) case TYP_SIMD: { - simdscalable_t simdVal; - memcpy(&simdVal, &tree->AsVecCon()->gtSimdScalableVal, sizeof(simdscalable_t)); + simdscalable_t simdVal = tree->AsVecCon()->gtSimdScalableVal; tree->gtVNPair.SetBoth(vnStore->VNForSimdScalableCon(simdVal)); break; From 0e96535bb9c25b4cd0daeb8611a714aeea6a4588 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 29 May 2026 15:42:22 +0100 Subject: [PATCH 46/58] split out scalable printing --- src/coreclr/jit/gentree.cpp | 135 +++++++++++++++++++++++++++++------- 1 file changed, 111 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 08b02a9d7d899d..df4d745d40b3b4 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13665,35 +13665,122 @@ void Compiler::gtDispConst(GenTree* tree) #elif defined(TARGET_ARM64) case TYP_SIMD: { - printf("%-6s ", varTypeName(vecCon->gtSimdScalableVal.gtSimdScalableBaseType)); + const simdscalable_t& simdVal = vecCon->gtSimdScalableVal; + const var_types simdBaseType = simdVal.gtSimdScalableBaseType; - switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) - { - case SimdScalableRepeated: - printf("<0x%016llx, 0x%016llx, 0x%016llx... >", - vecCon->gtSimdScalableVal.gtSimdScalableIndex, - vecCon->gtSimdScalableVal.gtSimdScalableIndex, - vecCon->gtSimdScalableVal.gtSimdScalableIndex); - break; - - case SimdScalableSequence: + auto printElement = [&](unsigned index) { + switch (simdBaseType) { - uint64_t index = vecCon->gtSimdScalableVal.gtSimdScalableIndex; - printf("<0x%016llx, ", index); - index += vecCon->gtSimdScalableVal.gtSimdScalableStep; - printf("0x%016llx, ", index); - index += vecCon->gtSimdScalableVal.gtSimdScalableStep; - printf("0x%016llx...>", index); - break; + case TYP_BYTE: + case TYP_UBYTE: + { + uint8_t element = simdVal.gtSimdScalableIndexU8[0]; + if (simdVal.gtSimdScalableKind == SimdScalableSequence) + { + element = + static_cast(element + (index * simdVal.gtSimdScalableStepU8[0])); + } + else if ((simdVal.gtSimdScalableKind == SimdScalableScalar) && (index != 0)) + { + element = 0; + } + printf("0x%02x", static_cast(element)); + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + uint16_t element = simdVal.gtSimdScalableIndexU16[0]; + if (simdVal.gtSimdScalableKind == SimdScalableSequence) + { + element = + static_cast(element + (index * simdVal.gtSimdScalableStepU16[0])); + } + else if ((simdVal.gtSimdScalableKind == SimdScalableScalar) && (index != 0)) + { + element = 0; + } + printf("0x%04x", static_cast(element)); + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint32_t element = simdVal.gtSimdScalableIndexU32[0]; + if (simdVal.gtSimdScalableKind == SimdScalableSequence) + { + element = + static_cast(element + (index * simdVal.gtSimdScalableStepU32[0])); + } + else if ((simdVal.gtSimdScalableKind == SimdScalableScalar) && (index != 0)) + { + element = 0; + } + printf("0x%08x", element); + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t element = simdVal.gtSimdScalableIndexU64[0]; + if (simdVal.gtSimdScalableKind == SimdScalableSequence) + { + element += index * simdVal.gtSimdScalableStepU64[0]; + } + else if ((simdVal.gtSimdScalableKind == SimdScalableScalar) && (index != 0)) + { + element = 0; + } + printf("0x%016llx", static_cast(element)); + break; + } + + case TYP_FLOAT: + { + float element = simdVal.gtSimdScalableIndexF32[0]; + if (simdVal.gtSimdScalableKind == SimdScalableSequence) + { + element += index * simdVal.gtSimdScalableStepF32[0]; + } + else if ((simdVal.gtSimdScalableKind == SimdScalableScalar) && (index != 0)) + { + element = 0.0; + } + printf("%#.9g", element); + break; + } + + case TYP_DOUBLE: + { + double element = simdVal.gtSimdScalableIndexF64[0]; + if (simdVal.gtSimdScalableKind == SimdScalableSequence) + { + element += index * simdVal.gtSimdScalableStepF64[0]; + } + else if ((simdVal.gtSimdScalableKind == SimdScalableScalar) && (index != 0)) + { + element = 0.0; + } + printf("%#.17g", element); + break; + } + + default: + unreached(); } + }; - case SimdScalableScalar: - printf("<0x%016llx, 0x0, 0x0... >", vecCon->gtSimdScalableVal.gtSimdScalableIndex); - break; + printf("%-6s <", varTypeName(simdBaseType)); + printElement(0); + printf(", "); + printElement(1); + printf(", "); + printElement(2); + printf("...>"); - default: - unreached(); - } break; } #endif // TARGET_XARCH From 2a3984996a1b6600c74e6d1d46a2cd9848002db5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 29 May 2026 15:53:55 +0100 Subject: [PATCH 47/58] fix immediate extraction in codegen --- src/coreclr/jit/codegenarm64.cpp | 109 +++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 34 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 6221a8e8c7497c..35cd0e469f4203 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2393,7 +2393,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre insOpts opt = emitter::optGetSveInsOpt(emitTypeSize(baseType)); emitAttr emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; - auto loadConstantHelper = [&](ssize_t constValue) -> regNumber { + auto loadConstantHelper = [&](uint64_t constValue) -> regNumber { // Get a temp integer register to compute long address. Use Extract so multiple calls // (index + step) get distinct temps when LSRA reserved more than one. regNumber addrReg = internalRegisters.Extract(tree, RBM_ALLINT); @@ -2409,63 +2409,98 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre return addrReg; }; - ssize_t index = -1; - ssize_t step = -1; + ssize_t index = -1; + ssize_t step = -1; + bool indexHasImm = true; + bool stepHasImm = true; + uint64_t indexVal = 0; + uint64_t stepVal = 0; switch (baseType) { case TYP_BYTE: { - index = static_cast(simdVal.gtSimdScalableIndexI8[0]); - step = static_cast(simdVal.gtSimdScalableStepI8[0]); + index = static_cast(simdVal.gtSimdScalableIndexI8[0]); + step = static_cast(simdVal.gtSimdScalableStepI8[0]); + indexVal = static_cast(static_cast(index)); + stepVal = static_cast(static_cast(step)); break; } case TYP_SHORT: { - index = static_cast(simdVal.gtSimdScalableIndexI16[0]); - step = static_cast(simdVal.gtSimdScalableStepI16[0]); + index = static_cast(simdVal.gtSimdScalableIndexI16[0]); + step = static_cast(simdVal.gtSimdScalableStepI16[0]); + indexVal = static_cast(static_cast(index)); + stepVal = static_cast(static_cast(step)); break; } case TYP_INT: { - index = static_cast(simdVal.gtSimdScalableIndexI32[0]); - step = static_cast(simdVal.gtSimdScalableStepI32[0]); + index = static_cast(simdVal.gtSimdScalableIndexI32[0]); + step = static_cast(simdVal.gtSimdScalableStepI32[0]); + indexVal = static_cast(static_cast(index)); + stepVal = static_cast(static_cast(step)); break; } case TYP_LONG: { - index = static_cast(simdVal.gtSimdScalableIndexI64[0]); - step = static_cast(simdVal.gtSimdScalableStepI64[0]); + index = static_cast(simdVal.gtSimdScalableIndexI64[0]); + step = static_cast(simdVal.gtSimdScalableStepI64[0]); + indexVal = static_cast(simdVal.gtSimdScalableIndexI64[0]); + stepVal = static_cast(simdVal.gtSimdScalableStepI64[0]); break; } case TYP_UBYTE: { - index = static_cast(simdVal.gtSimdScalableIndexU8[0]); - step = static_cast(simdVal.gtSimdScalableStepU8[0]); + index = static_cast(simdVal.gtSimdScalableIndexU8[0]); + step = static_cast(simdVal.gtSimdScalableStepU8[0]); + indexVal = simdVal.gtSimdScalableIndexU8[0]; + stepVal = simdVal.gtSimdScalableStepU8[0]; break; } case TYP_USHORT: { - index = static_cast(simdVal.gtSimdScalableIndexU16[0]); - step = static_cast(simdVal.gtSimdScalableStepU16[0]); + index = static_cast(simdVal.gtSimdScalableIndexU16[0]); + step = static_cast(simdVal.gtSimdScalableStepU16[0]); + indexVal = simdVal.gtSimdScalableIndexU16[0]; + stepVal = simdVal.gtSimdScalableStepU16[0]; break; } case TYP_UINT: { - index = static_cast(simdVal.gtSimdScalableIndexU32[0]); - step = static_cast(simdVal.gtSimdScalableStepU32[0]); + index = static_cast(simdVal.gtSimdScalableIndexU32[0]); + step = static_cast(simdVal.gtSimdScalableStepU32[0]); + indexVal = simdVal.gtSimdScalableIndexU32[0]; + stepVal = simdVal.gtSimdScalableStepU32[0]; break; } case TYP_ULONG: { - index = static_cast(simdVal.gtSimdScalableIndexU64[0]); - step = static_cast(simdVal.gtSimdScalableStepU64[0]); + indexVal = simdVal.gtSimdScalableIndexU64[0]; + stepVal = simdVal.gtSimdScalableStepU64[0]; + if (indexVal <= static_cast(INT64_MAX)) + { + index = static_cast(indexVal); + } + else + { + indexHasImm = false; + } + + if (stepVal <= static_cast(INT64_MAX)) + { + step = static_cast(stepVal); + } + else + { + stepHasImm = false; + } break; } @@ -2475,8 +2510,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre uint32_t stepBits = 0; memcpy(&indexBits, &simdVal.gtSimdScalableIndexF32[0], sizeof(indexBits)); memcpy(&stepBits, &simdVal.gtSimdScalableStepF32[0], sizeof(stepBits)); - index = static_cast(indexBits); - step = static_cast(stepBits); + indexVal = indexBits; + stepVal = stepBits; + indexHasImm = false; + stepHasImm = false; break; } case TYP_DOUBLE: @@ -2485,8 +2522,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre uint64_t stepBits = 0; memcpy(&indexBits, &simdVal.gtSimdScalableIndexF64[0], sizeof(indexBits)); memcpy(&stepBits, &simdVal.gtSimdScalableStepF64[0], sizeof(stepBits)); - index = static_cast(indexBits); - step = static_cast(stepBits); + indexVal = indexBits; + stepVal = stepBits; + indexHasImm = false; + stepHasImm = false; break; } @@ -2500,7 +2539,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { case SimdScalableRepeated: { - if (varTypeIsIntegral(baseType) && + if (varTypeIsIntegral(baseType) && indexHasImm && (emitter::isValidSimm<8>(index) || emitter::isValidSimm_MultipleOf<8, 256>(index))) { emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, index, opt); @@ -2520,7 +2559,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } else { - regNumber indexReg = loadConstantHelper(index); + regNumber indexReg = loadConstantHelper(indexVal); emit->emitInsSve_R_R(INS_sve_dup, emitSize, targetReg, indexReg, opt); } @@ -2532,25 +2571,26 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // FP sequences should have been imported into a set of nodes assert(varTypeIsIntegral(baseType)); - if (emitter::isValidSimm<5>(index) && emitter::isValidSimm<5>(step)) + if (indexHasImm && stepHasImm && emitter::isValidSimm<5>(index) && + emitter::isValidSimm<5>(step)) { emit->emitInsSve_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, index, step, opt); } - else if (emitter::isValidSimm<5>(index)) + else if (indexHasImm && emitter::isValidSimm<5>(index)) { - regNumber stepReg = loadConstantHelper(step); + regNumber stepReg = loadConstantHelper(stepVal); emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, stepReg, index, opt, INS_SCALABLE_OPTS_IMM_FIRST); } - else if (emitter::isValidSimm<5>(step)) + else if (stepHasImm && emitter::isValidSimm<5>(step)) { - regNumber indexReg = loadConstantHelper(index); + regNumber indexReg = loadConstantHelper(indexVal); emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, indexReg, step, opt); } else { - regNumber indexReg = loadConstantHelper(index); - regNumber stepReg = loadConstantHelper(step); + regNumber indexReg = loadConstantHelper(indexVal); + regNumber stepReg = loadConstantHelper(stepVal); emit->emitInsSve_R_R_R(INS_sve_index, emitSize, targetReg, indexReg, stepReg, opt); } break; @@ -2563,7 +2603,8 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Use NEON instructions to load the constant (to avoid using predicates) - if (varTypeIsIntegral(baseType) && emitter::emitIns_valid_imm_for_mov(index, emitSize)) + if (varTypeIsIntegral(baseType) && indexHasImm && + emitter::emitIns_valid_imm_for_mov(index, emitSize)) { emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, index); } @@ -2580,7 +2621,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } else { - regNumber indexReg = loadConstantHelper(index); + regNumber indexReg = loadConstantHelper(indexVal); emit->emitIns_R_R(INS_ins, emitSize, targetReg, indexReg, INS_OPTS_16B); } break; From 09f2165aa41d1918ec5799072aa5d110e60379b7 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 1 Jun 2026 09:27:25 +0100 Subject: [PATCH 48/58] formatting --- src/coreclr/jit/gentree.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 32f121524826a9..5260375558764d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13703,8 +13703,7 @@ void Compiler::gtDispConst(GenTree* tree) uint8_t element = simdVal.gtSimdScalableIndexU8[0]; if (simdVal.gtSimdScalableKind == SimdScalableSequence) { - element = - static_cast(element + (index * simdVal.gtSimdScalableStepU8[0])); + element = static_cast(element + (index * simdVal.gtSimdScalableStepU8[0])); } else if ((simdVal.gtSimdScalableKind == SimdScalableScalar) && (index != 0)) { From 11c506dfe5a156d98000838064fa63b9cc68ea93 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 1 Jun 2026 17:02:34 +0100 Subject: [PATCH 49/58] duplicate uint64_t checks in lsra --- src/coreclr/jit/lsraarm64.cpp | 38 ++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index f1856bc353d48a..e95cdacccd0fc5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -755,8 +755,10 @@ int LinearScan::BuildNode(GenTree* tree) var_types baseType = simdVal.gtSimdScalableBaseType; bool canEncodeScalar = false; - ssize_t index = -1; - ssize_t step = -1; + ssize_t index = -1; + ssize_t step = -1; + bool indexHasImm = true; + bool stepHasImm = true; switch (simdVal.gtSimdScalableBaseType) { case TYP_BYTE: @@ -810,8 +812,25 @@ int LinearScan::BuildNode(GenTree* tree) case TYP_ULONG: { - index = static_cast(simdVal.gtSimdScalableIndexU64[0]); - step = static_cast(simdVal.gtSimdScalableStepU64[0]); + const uint64_t indexVal = simdVal.gtSimdScalableIndexU64[0]; + const uint64_t stepVal = simdVal.gtSimdScalableStepU64[0]; + if (indexVal <= static_cast(INT64_MAX)) + { + index = static_cast(indexVal); + } + else + { + indexHasImm = false; + } + + if (stepVal <= static_cast(INT64_MAX)) + { + step = static_cast(stepVal); + } + else + { + stepHasImm = false; + } break; } @@ -828,8 +847,8 @@ int LinearScan::BuildNode(GenTree* tree) { if (varTypeIsIntegral(baseType)) { - canEncodeScalar = - emitter::isValidSimm<8>(index) || emitter::isValidSimm_MultipleOf<8, 256>(index); + canEncodeScalar = indexHasImm && (emitter::isValidSimm<8>(index) || + emitter::isValidSimm_MultipleOf<8, 256>(index)); } else if (baseType == TYP_FLOAT) { @@ -853,13 +872,13 @@ int LinearScan::BuildNode(GenTree* tree) { canEncodeScalar = true; - if (!emitter::isValidSimm<5>(index)) + if (!indexHasImm || !emitter::isValidSimm<5>(index)) { canEncodeScalar = false; buildInternalIntRegisterDefForNode(tree); } - if (!emitter::isValidSimm<5>(step)) + if (!stepHasImm || !emitter::isValidSimm<5>(step)) { canEncodeScalar = false; buildInternalIntRegisterDefForNode(tree); @@ -876,7 +895,8 @@ int LinearScan::BuildNode(GenTree* tree) { if (varTypeIsIntegral(baseType)) { - canEncodeScalar = emitter::emitIns_valid_imm_for_mov(index, emitActualTypeSize(baseType)); + canEncodeScalar = + indexHasImm && emitter::emitIns_valid_imm_for_mov(index, emitActualTypeSize(baseType)); } else if (baseType == TYP_FLOAT) { From 0e728dce0d60f96a6005252098b1079eef9722b9 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 1 Jun 2026 17:05:48 +0100 Subject: [PATCH 50/58] const param for gtNewSimdVconNode --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8c4508e707b3d4..313c7605f4e78a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3294,7 +3294,7 @@ class Compiler #if defined(TARGET_ARM64) GenTreeVecCon* gtNewSimdVconNode(var_types type, var_types baseType, SimdScalableKind kind, uint64_t index, uint64_t step = 0); - inline GenTreeVecCon* gtNewSimdVconNode(var_types type, simdscalable_t* con) + inline GenTreeVecCon* gtNewSimdVconNode(var_types type, const simdscalable_t* con) { return gtNewSimdVconNode(type, con->gtSimdScalableBaseType, con->gtSimdScalableKind, con->gtSimdScalableIndex, con->gtSimdScalableStep); } From c97f62f2fe0af18d9dbb06783f820ff0ea8fdc1c Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 2 Jun 2026 13:39:35 +0100 Subject: [PATCH 51/58] Add simdscalable_t Zero() --- src/coreclr/jit/simd.h | 7 +++++++ src/coreclr/jit/valuenum.cpp | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 4e3634e123bd82..1318624a6a08f8 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2200,6 +2200,13 @@ struct simdscalable_t } bool IsAllBitsSet() const; + + static simdscalable_t Zero() + { + return {.gtSimdScalableBaseType = TYP_BYTE, + .gtSimdScalableKind = SimdScalableRepeated, + .gtSimdScalableIndex = 0}; + } }; static_assert(sizeof(simd_t) >= sizeof(simdscalable_t)); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 8c3d811c403e9b..8b78081e9bdd2d 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -2178,6 +2178,11 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) { return VNForSimd64Con(simd64_t::Zero()); } +#elif defined(TARGET_ARM64) + case TYP_SIMD: + { + return VNForSimdScalableCon(simdscalable_t::Zero()); + } #endif // TARGET_XARCH #if defined(FEATURE_MASKED_HW_INTRINSICS) From 8b03d4b2d5993022359c92fb1072b4bdd3fcf622 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 3 Jun 2026 16:12:35 +0100 Subject: [PATCH 52/58] Add TYP_SIMD for EvaluateUnaryInPlace --- src/coreclr/jit/gentree.cpp | 30 +++++-- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/simd.h | 161 ++++++++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5260375558764d..f9c7787a8c4de2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20358,7 +20358,7 @@ bool Compiler::IsValidForShuffle( // scalar - true if this is a scalar operation; otherwise, false // baseType - the base type of the constant being checked // -void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType) +bool GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType) { switch (gtType) { @@ -20367,7 +20367,7 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types simd8_t result = {}; EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd8Val); gtSimd8Val = result; - break; + return true; } case TYP_SIMD12: @@ -20375,7 +20375,7 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types simd12_t result = {}; EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd12Val); gtSimd12Val = result; - break; + return true; } case TYP_SIMD16: @@ -20383,7 +20383,7 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types simd16_t result = {}; EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd16Val); gtSimd16Val = result; - break; + return true; } #if defined(TARGET_XARCH) @@ -20392,7 +20392,7 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types simd32_t result = {}; EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd32Val); gtSimd32Val = result; - break; + return true; } case TYP_SIMD64: @@ -20400,10 +20400,23 @@ void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types simd64_t result = {}; EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd64Val); gtSimd64Val = result; - break; + return true; } #endif // TARGET_XARCH +#if defined(TARGET_ARM64) + case TYP_SIMD: + { + simdscalable_t result = {}; + if (!TryEvaluateUnarySimdScalable(oper, scalar, baseType, &result, gtSimdScalableVal)) + { + return false; + } + gtSimdScalableVal = result; + return true; + } +#endif // TARGET_ARM64 + default: { unreached(); @@ -34206,7 +34219,10 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } else { - cnsNode->AsVecCon()->EvaluateUnaryInPlace(oper, isScalar, simdBaseType); + if (!cnsNode->AsVecCon()->EvaluateUnaryInPlace(oper, isScalar, simdBaseType)) + { + return tree; + } } resultNode = cnsNode; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 8659c6858ce7d3..b05c39f7b49a36 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7085,7 +7085,7 @@ struct GenTreeVecCon : public GenTree #endif // FEATURE_HW_INTRINSICS - void EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType); + bool EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType); void EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_types baseType, GenTreeVecCon* other); template diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 1318624a6a08f8..f30d871f03c26d 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2270,6 +2270,167 @@ void BroadcastConstantToSimdScalable(simdscalable_t* result, var_types baseType, memcpy(&result->gtSimdScalableIndex, &arg0, sizeof(TBase)); } +template +bool SimdBitwiseEqual(TBase left, TBase right) +{ + return memcmp(&left, &right, sizeof(TBase)) == 0; +} + +template +bool TryEvaluateUnaryScalarForSimdScalable(genTreeOps oper, TBase arg0, TBase* result) +{ + if ((oper == GT_LZCNT) && (sizeof(TBase) < sizeof(uint32_t))) + { + return false; + } + + *result = EvaluateUnaryScalar(oper, arg0); + return true; +} + +template +bool TryEvaluateUnarySimdScalable( + genTreeOps oper, bool scalar, var_types baseType, simdscalable_t* result, const simdscalable_t& arg0) +{ + TBase index; + TBase step; + memcpy(&index, &arg0.gtSimdScalableIndex, sizeof(TBase)); + memcpy(&step, &arg0.gtSimdScalableStep, sizeof(TBase)); + + auto setResult = [=](SimdScalableKind kind, TBase resultIndex, TBase resultStep, simdscalable_t* result) { + result->gtSimdScalableBaseType = baseType; + result->gtSimdScalableKind = kind; + result->gtSimdScalableIndex = 0; + result->gtSimdScalableStep = 0; + memcpy(&result->gtSimdScalableIndex, &resultIndex, sizeof(TBase)); + memcpy(&result->gtSimdScalableStep, &resultStep, sizeof(TBase)); + }; + + TBase resultIndex; + if (!TryEvaluateUnaryScalarForSimdScalable(oper, index, &resultIndex)) + { + return false; + } + + TBase zero = {}; + + if (scalar) + { + setResult(SimdScalableScalar, resultIndex, zero, result); + return true; + } + + switch (arg0.gtSimdScalableKind) + { + case SimdScalableRepeated: + { + setResult(SimdScalableRepeated, resultIndex, zero, result); + return true; + } + + case SimdScalableSequence: + { + if (SimdBitwiseEqual(step, zero)) + { + setResult(SimdScalableRepeated, resultIndex, zero, result); + return true; + } + + switch (oper) + { + case GT_NEG: + { + setResult(SimdScalableSequence, resultIndex, static_cast(zero - step), result); + return true; + } + + case GT_NOT: + { + if (varTypeIsFloating(baseType)) + { + return false; + } + setResult(SimdScalableSequence, resultIndex, static_cast(zero - step), result); + return true; + } + + default: + return false; + } + } + + case SimdScalableScalar: + { + TBase upperValue; + if (!TryEvaluateUnaryScalarForSimdScalable(oper, zero, &upperValue)) + { + return false; + } + + if (SimdBitwiseEqual(upperValue, zero)) + { + setResult(SimdScalableScalar, resultIndex, zero, result); + return true; + } + + if (SimdBitwiseEqual(upperValue, resultIndex)) + { + setResult(SimdScalableRepeated, resultIndex, zero, result); + return true; + } + + return false; + } + + default: + unreached(); + } +} + +inline bool TryEvaluateUnarySimdScalable( + genTreeOps oper, bool scalar, var_types baseType, simdscalable_t* result, const simdscalable_t& arg0) +{ + switch (baseType) + { + case TYP_FLOAT: + { + if (IsUnaryBitwiseOperation(oper)) + { + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + } + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + } + + case TYP_DOUBLE: + { + if (IsUnaryBitwiseOperation(oper)) + { + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + } + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + } + + case TYP_BYTE: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + case TYP_SHORT: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + case TYP_INT: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + case TYP_LONG: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + case TYP_UBYTE: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + case TYP_USHORT: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + case TYP_UINT: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + case TYP_ULONG: + return TryEvaluateUnarySimdScalable(oper, scalar, baseType, result, arg0); + default: + unreached(); + } +} + //------------------------------------------------------------------------ // NarrowAndDuplicateSimdLong: Narrow each ULONG element in arg0 to size // TSimd. Each element is then duplicated to the number of TSimd values From 4b19e658f0589c8dce813a5158e5ed873100297e Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 3 Jun 2026 16:22:35 +0100 Subject: [PATCH 53/58] Add codegen for zero and allbits in constant codegen --- src/coreclr/jit/codegenarm64.cpp | 178 +++++++++++++++++-------------- 1 file changed, 95 insertions(+), 83 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5085c7077cf7d4..1ba9267c58520b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2563,101 +2563,113 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre } } - switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) + if (vecCon->IsZero()) { - case SimdScalableRepeated: + emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, 0, opt); + } + else if (vecCon->IsAllBitsSet()) + { + emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, -1, opt); + } + else + { + switch (vecCon->gtSimdScalableVal.gtSimdScalableKind) { - if (varTypeIsIntegral(baseType) && indexHasImm && - (emitter::isValidSimm<8>(index) || emitter::isValidSimm_MultipleOf<8, 256>(index))) + case SimdScalableRepeated: { - emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, index, opt); - } - else if ((baseType == TYP_FLOAT) && - emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF32[0])) - { - emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, - simdVal.gtSimdScalableIndexF32[0], INS_OPTS_SCALABLE_S); - } - else if ((baseType == TYP_DOUBLE) && - emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF64[0])) - { - emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, - static_cast(simdVal.gtSimdScalableIndexF64[0]), - INS_OPTS_SCALABLE_D); - } - else - { - regNumber indexReg = loadConstantHelper(indexVal); - emit->emitInsSve_R_R(INS_sve_dup, emitSize, targetReg, indexReg, opt); - } - - break; - } - - case SimdScalableSequence: - { - // FP sequences should have been imported into a set of nodes - assert(varTypeIsIntegral(baseType)); + if (varTypeIsIntegral(baseType) && indexHasImm && + (emitter::isValidSimm<8>(index) || emitter::isValidSimm_MultipleOf<8, 256>(index))) + { + emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, index, opt); + } + else if ((baseType == TYP_FLOAT) && + emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF32[0])) + { + emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, + simdVal.gtSimdScalableIndexF32[0], INS_OPTS_SCALABLE_S); + } + else if ((baseType == TYP_DOUBLE) && + emitter::canEncodeFloatImm8(simdVal.gtSimdScalableIndexF64[0])) + { + emit->emitIns_R_F(INS_sve_fdup, EA_SCALABLE, targetReg, + static_cast(simdVal.gtSimdScalableIndexF64[0]), + INS_OPTS_SCALABLE_D); + } + else + { + regNumber indexReg = loadConstantHelper(indexVal); + emit->emitInsSve_R_R(INS_sve_dup, emitSize, targetReg, indexReg, opt); + } - if (indexHasImm && stepHasImm && emitter::isValidSimm<5>(index) && - emitter::isValidSimm<5>(step)) - { - emit->emitInsSve_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, index, step, opt); - } - else if (indexHasImm && emitter::isValidSimm<5>(index)) - { - regNumber stepReg = loadConstantHelper(stepVal); - emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, stepReg, index, opt, - INS_SCALABLE_OPTS_IMM_FIRST); - } - else if (stepHasImm && emitter::isValidSimm<5>(step)) - { - regNumber indexReg = loadConstantHelper(indexVal); - emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, indexReg, step, opt); + break; } - else + + case SimdScalableSequence: { - regNumber indexReg = loadConstantHelper(indexVal); - regNumber stepReg = loadConstantHelper(stepVal); - emit->emitInsSve_R_R_R(INS_sve_index, emitSize, targetReg, indexReg, stepReg, opt); + // FP sequences should have been imported into a set of nodes + assert(varTypeIsIntegral(baseType)); + + if (indexHasImm && stepHasImm && emitter::isValidSimm<5>(index) && + emitter::isValidSimm<5>(step)) + { + emit->emitInsSve_R_I_I(INS_sve_index, EA_SCALABLE, targetReg, index, step, opt); + } + else if (indexHasImm && emitter::isValidSimm<5>(index)) + { + regNumber stepReg = loadConstantHelper(stepVal); + emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, stepReg, index, opt, + INS_SCALABLE_OPTS_IMM_FIRST); + } + else if (stepHasImm && emitter::isValidSimm<5>(step)) + { + regNumber indexReg = loadConstantHelper(indexVal); + emit->emitInsSve_R_R_I(INS_sve_index, emitSize, targetReg, indexReg, step, opt); + } + else + { + regNumber indexReg = loadConstantHelper(indexVal); + regNumber stepReg = loadConstantHelper(stepVal); + emit->emitInsSve_R_R_R(INS_sve_index, emitSize, targetReg, indexReg, stepReg, opt); + } + break; } - break; - } - case SimdScalableScalar: - { - // Clear the entire target register - emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, 0, opt); + case SimdScalableScalar: + { + // Clear the entire target register + emit->emitInsSve_R_I(INS_sve_dup, EA_SCALABLE, targetReg, 0, opt); - // Use NEON instructions to load the constant (to avoid using predicates) + // Use NEON instructions to load the constant (to avoid using predicates) - if (varTypeIsIntegral(baseType) && indexHasImm && - emitter::emitIns_valid_imm_for_mov(index, emitSize)) - { - emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, index); - } - else if ((baseType == TYP_FLOAT) && - emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF32[0])) - { - emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, - static_cast(simdVal.gtSimdScalableIndexF32[0])); - } - else if ((baseType == TYP_DOUBLE) && - emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) - { - emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, simdVal.gtSimdScalableIndexF64[0]); - } - else - { - regNumber indexReg = loadConstantHelper(indexVal); - emit->emitIns_R_R(INS_ins, emitSize, targetReg, indexReg, INS_OPTS_16B); + if (varTypeIsIntegral(baseType) && indexHasImm && + emitter::emitIns_valid_imm_for_mov(index, emitSize)) + { + emit->emitIns_R_I(INS_mov, EA_16BYTE, targetReg, index); + } + else if ((baseType == TYP_FLOAT) && + emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF32[0])) + { + emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, + static_cast(simdVal.gtSimdScalableIndexF32[0])); + } + else if ((baseType == TYP_DOUBLE) && + emitter::emitIns_valid_imm_for_fmov(simdVal.gtSimdScalableIndexF64[0])) + { + emit->emitIns_R_F(INS_fmov, EA_16BYTE, targetReg, + simdVal.gtSimdScalableIndexF64[0]); + } + else + { + regNumber indexReg = loadConstantHelper(indexVal); + emit->emitIns_R_R(INS_ins, emitSize, targetReg, indexReg, INS_OPTS_16B); + } + break; } - break; - } - default: - unreached(); - break; + default: + unreached(); + break; + } } break; } From dd6caa6e61dffc30fca76c9d8b16efe8c1cc080f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 4 Jun 2026 10:18:14 +0100 Subject: [PATCH 54/58] add additional TYP_SIMD checks --- src/coreclr/jit/gentree.cpp | 61 ++++++++++++++++++++++++---- src/coreclr/jit/hwintrinsicarm64.cpp | 8 +--- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index f9c7787a8c4de2..1d5a0b168fb6cf 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9437,6 +9437,12 @@ GenTreeMskCon* Compiler::gtNewMskConNode(var_types type, var_types baseType, boo GenTree* Compiler::gtNewAllBitsSetConNode(var_types type) { #ifdef FEATURE_SIMD +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) + { + return gtNewSimdVconNode(type, TYP_BYTE, SimdScalableRepeated, 0xFF); + } +#endif // TARGET_ARM64 if (varTypeIsSIMD(type)) { GenTreeVecCon* allBitsSet = gtNewVconNode(type); @@ -9526,6 +9532,25 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) GenTree* Compiler::gtNewOneConNode(var_types type, var_types simdBaseType /* = TYP_UNDEF */) { #if defined(FEATURE_SIMD) +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) + { + assert(simdBaseType != TYP_UNDEF); + + GenTree* one = nullptr; + if (varTypeIsIntegral(simdBaseType)) + { + one = varTypeIsLong(simdBaseType) ? gtNewLconNode(1) : gtNewIconNode(1); + } + else + { + assert(varTypeIsFloating(simdBaseType)); + one = gtNewDconNode(1.0, simdBaseType); + } + + return gtNewSimdCreateBroadcastNode(type, one, simdBaseType, SIZE_UNKNOWN); + } +#endif // TARGET_ARM64 if (varTypeIsSIMD(type)) { GenTreeVecCon* one = gtNewVconNode(type); @@ -24227,36 +24252,36 @@ GenTree* Compiler::gtNewSimdCmpOpNode( // We need to treat op1 and op2 as signed for comparison purpose after // the transformation. - var_types opType = simdBaseType; - GenTreeVecCon* vecCon1 = gtNewVconNode(type); + var_types opType = simdBaseType; + GenTree* vecCon1 = nullptr; switch (simdBaseType) { case TYP_UBYTE: { simdBaseType = TYP_BYTE; - vecCon1->EvaluateBroadcastInPlace(INT8_MIN); + vecCon1 = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(INT8_MIN), opType, simdSize); break; } case TYP_USHORT: { simdBaseType = TYP_SHORT; - vecCon1->EvaluateBroadcastInPlace(INT16_MIN); + vecCon1 = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(INT16_MIN), opType, simdSize); break; } case TYP_UINT: { simdBaseType = TYP_INT; - vecCon1->EvaluateBroadcastInPlace(INT32_MIN); + vecCon1 = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(INT32_MIN), opType, simdSize); break; } case TYP_ULONG: { simdBaseType = TYP_LONG; - vecCon1->EvaluateBroadcastInPlace(INT64_MIN); + vecCon1 = gtNewSimdCreateBroadcastNode(type, gtNewLconNode(INT64_MIN), opType, simdSize); break; } @@ -25471,6 +25496,28 @@ GenTree* Compiler::gtNewSimdGetIndicesNode(var_types type, var_types simdBaseTyp assert(varTypeIsArithmetic(simdBaseType)); +#if defined(TARGET_ARM64) + if (type == TYP_SIMD) + { + GenTreeVecCon* indices = gtNewSimdVconNode(type, simdBaseType, SimdScalableSequence, 0); + + if (simdBaseType == TYP_FLOAT) + { + indices->gtSimdScalableVal.gtSimdScalableStepF32[0] = 1.0f; + } + else if (simdBaseType == TYP_DOUBLE) + { + indices->gtSimdScalableVal.gtSimdScalableStepF64[0] = 1.0; + } + else + { + indices->gtSimdScalableVal.gtSimdScalableStep = 1; + } + + return indices; + } +#endif // TARGET_ARM64 + GenTreeVecCon* indices = gtNewVconNode(type); uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); @@ -33859,7 +33906,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) simd_t simdVal = {}; - if (GenTreeVecCon::IsHWIntrinsicCreateConstant(tree, simdVal)) + if ((retType != TYP_SIMD) && GenTreeVecCon::IsHWIntrinsicCreateConstant(tree, simdVal)) { GenTreeVecCon* vecCon = gtNewVconNode(retType); diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index abb6503fd0e312..72e705ac0e5c43 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2926,13 +2926,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { // Import as a constant vector 0 -#if defined(DEBUG) - if (JitConfig.JitUseScalableVectorT()) + if (retType == TYP_SIMD) { retNode = gtNewSimdVconNode(retType, simdBaseType, SimdScalableRepeated, 0); break; } -#endif // DEBUG GenTreeVecCon* vecCon = gtNewVconNode(retType); vecCon->gtSimdVal = simd_t::Zero(); @@ -2961,8 +2959,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { int64_t pattern = op1->AsIntConCommon()->IntegralValue(); -#if defined(DEBUG) - if (JitConfig.JitUseScalableVectorT()) + if (retType == TYP_SIMD) { if ((pattern == SVE_PATTERN_ALL) || (pattern == SVE_PATTERN_POW2)) { @@ -2990,7 +2987,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } else -#endif // DEBUG { simd_t simdVal; From f251de17336a001af25639d4680e0663e7ca17af Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 4 Jun 2026 10:45:55 +0100 Subject: [PATCH 55/58] fix VectorT create for floats --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 9 +++++++++ src/coreclr/jit/lsraarm64.cpp | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 1c42a6334b4e7a..2cf32904feb337 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -3114,6 +3114,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_VectorT_CreateScalarUnsafe: { emitSize = (opt == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + + if (varTypeIsFloating(intrin.baseType)) + { + regNumber tmpReg = internalRegisters.Extract(node, RBM_ALLINT); + insOpts fmovOpt = (emitSize == EA_8BYTE) ? INS_OPTS_D_TO_8BYTE : INS_OPTS_S_TO_4BYTE; + GetEmitter()->emitIns_Mov(INS_fmov, emitSize, tmpReg, op1Reg, /* canSkip */ false, fmovOpt); + op1Reg = tmpReg; + } + GetEmitter()->emitInsSve_R_R(ins, emitSize, targetReg, op1Reg, opt); break; } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e95cdacccd0fc5..207e14ed269f9c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1585,6 +1585,14 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // Build any additional special cases switch (intrin.id) { + case NI_VectorT_Create: + case NI_VectorT_CreateScalarUnsafe: + if (varTypeIsFloating(intrin.baseType)) + { + buildInternalIntRegisterDefForNode(intrinsicTree); + } + break; + case NI_Sve2_GatherVectorInt16SignExtendNonTemporal: case NI_Sve2_GatherVectorInt32SignExtendNonTemporal: case NI_Sve2_GatherVectorNonTemporal: From 00c8558cbcf81c3e9046f87caa21ab7bdb68da23 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 4 Jun 2026 11:26:39 +0100 Subject: [PATCH 56/58] TYP_SIMD is Arm64 only --- src/coreclr/jit/gentree.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index b2be0869689f30..6d8c30704e8a43 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -33901,7 +33901,11 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) simd_t simdVal = {}; - if ((retType != TYP_SIMD) && GenTreeVecCon::IsHWIntrinsicCreateConstant(tree, simdVal)) + if ( +#if defined(TARGET_ARM64) + (retType != TYP_SIMD) && +#endif // TARGET_ARM64 + GenTreeVecCon::IsHWIntrinsicCreateConstant(tree, simdVal)) { GenTreeVecCon* vecCon = gtNewVconNode(retType); From 88265cb6b8153bcf165922d1c19b26be41e6618a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 4 Jun 2026 11:58:45 +0100 Subject: [PATCH 57/58] don't use C++20 --- src/coreclr/jit/simd.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index f30d871f03c26d..a9229eb70ee12a 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -2203,9 +2203,13 @@ struct simdscalable_t static simdscalable_t Zero() { - return {.gtSimdScalableBaseType = TYP_BYTE, - .gtSimdScalableKind = SimdScalableRepeated, - .gtSimdScalableIndex = 0}; + simdscalable_t result = {}; + + result.gtSimdScalableBaseType = TYP_BYTE; + result.gtSimdScalableKind = SimdScalableRepeated; + result.gtSimdScalableIndex = 0; + + return result; } }; From 5d9259746370c9b86ce1f6bea98aa2b1c5251593 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 4 Jun 2026 13:16:45 +0100 Subject: [PATCH 58/58] Add valuenum casting --- src/coreclr/jit/valuenum.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index c87537c38519f9..32ef25a40b613b 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -2070,7 +2070,7 @@ class ValueNumStore #if defined(TARGET_ARM64) if (val.IsScalable()) { - hash = static_cast(hash ^ val.isScalable); + hash = static_cast(hash ^ static_cast(val.isScalable)); // simdmaskscalable_t::operator== treats all-zero scalable masks as equal // regardless of base type, so canonicalize that case in the hash as well. if (!val.scalable.IsZero()) @@ -2081,7 +2081,7 @@ class ValueNumStore } else { - hash = static_cast(hash ^ val.isScalable); + hash = static_cast(hash ^ static_cast(val.isScalable)); hash = static_cast(hash ^ val.fixed.u32[0]); hash = static_cast(hash ^ val.fixed.u32[1]); }