Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
7fac1f9
Arm64 SVE: Support scalable constant vectors and masks
a74nh Feb 27, 2026
71dd4f4
Merge branch 'main' into truemasknode_github
jakobbotsch Apr 30, 2026
c3afbc8
Fix AllBitsSet functionality
a74nh Apr 30, 2026
fd84125
Remove duplicate loadConstantHelper
a74nh Apr 30, 2026
43f648b
Remove extra break
a74nh Apr 30, 2026
d6a99e1
Ensure index and step have distinct temp registers
a74nh Apr 30, 2026
e04c472
fix formatting
a74nh Apr 30, 2026
91d235c
Fix GetHashCode for scalables
a74nh Apr 30, 2026
7c7ebbd
Fix IsTrueMask logic
a74nh Apr 30, 2026
cec1eb0
Fix IsTrueMask logic
a74nh Apr 30, 2026
183bc23
Reserve correct registers for constant vectors
a74nh Apr 30, 2026
160810c
Canonicalize for simdscalable_t zero
a74nh Apr 30, 2026
168b84d
Check all bits when converting to/from vector/mask
a74nh Apr 30, 2026
6c46277
fix call to AllBitsSet
a74nh May 1, 2026
7c02f28
Fix codegen for SimdScalableScalar floats
a74nh May 1, 2026
1c5bf92
use memcpy for getting floats
a74nh May 1, 2026
3a3e1d3
Add debug check
a74nh May 1, 2026
490bec7
Add VN support for scalable+fixed masks
a74nh May 1, 2026
b8d75a2
set step to 0 in EvaluateSimdCvtScalableMaskToVector
a74nh May 1, 2026
170665e
formatting
a74nh May 1, 2026
319f7bd
Add assert to GetConstantSimdMask
a74nh May 1, 2026
064feae
fix FEATURE_MASKED_HW_INTRINSICS defines
a74nh May 1, 2026
1be5041
Better float support for sequence nodes
a74nh May 14, 2026
ddfc802
merge main
a74nh May 15, 2026
9ba8e2e
Fix use of simdmaskvalue_t
a74nh May 15, 2026
bf01233
Fix constants codegen
a74nh May 15, 2026
d34c1e6
generate true mask with FFs
a74nh May 15, 2026
c3dfe81
Special case zero in valuenum
a74nh May 15, 2026
9897c97
Fix codegen casting
a74nh May 15, 2026
691aaef
handle large bits in isValidSimm
a74nh May 15, 2026
f24b950
Remove redundant ifdef
a74nh May 15, 2026
f1cefa5
Copy floats
a74nh May 15, 2026
d7f5ac0
document gtSimdMaskScalableIndex
a74nh May 15, 2026
f6ee722
initialise index in BroadcastConstantToSimdScalable
a74nh May 15, 2026
bd26a42
match lsra to codegen
a74nh May 18, 2026
ff17281
initialise canEncodeScalar
a74nh May 18, 2026
7af9cee
Check for invalid index in EvaluateSimdCvtScalableMaskToVector
a74nh May 18, 2026
e5f9829
Use data version of gtNewVconNode
a74nh May 18, 2026
878ea9d
allow default option in lsra switch
a74nh May 18, 2026
c177772
initialise step in BroadcastConstantToSimdScalable
a74nh May 18, 2026
024a8e3
noway assert in GetConstantSimdMask
a74nh May 18, 2026
4d639e4
Call overload gtNewMskConNode from gtNewMskConNode
a74nh May 19, 2026
adb6b9f
Make VN isScalable Arm64 only
a74nh May 28, 2026
3a2a6f5
Do not canonicalize zeros for gtHashValue
a74nh May 28, 2026
bc7b44f
bool type for isScalable
a74nh May 28, 2026
8d69cc9
Remove config check when printing valuenum constants
a74nh May 28, 2026
b856d31
copy simdval directly in valuenu
a74nh May 29, 2026
0e96535
split out scalable printing
a74nh May 29, 2026
2a39849
fix immediate extraction in codegen
a74nh May 29, 2026
8a345ee
merge main
a74nh May 29, 2026
09f2165
formatting
a74nh Jun 1, 2026
11c506d
duplicate uint64_t checks in lsra
a74nh Jun 1, 2026
0e728dc
const param for gtNewSimdVconNode
a74nh Jun 1, 2026
c97f62f
Add simdscalable_t Zero()
a74nh Jun 2, 2026
8b03d4b
Add TYP_SIMD for EvaluateUnaryInPlace
a74nh Jun 3, 2026
4b19e65
Add codegen for zero and allbits in constant codegen
a74nh Jun 3, 2026
dd6caa6
add additional TYP_SIMD checks
a74nh Jun 4, 2026
f251de1
fix VectorT create for floats
a74nh Jun 4, 2026
a728af2
merge main
a74nh Jun 4, 2026
00c8558
TYP_SIMD is Arm64 only
a74nh Jun 4, 2026
88265cb
don't use C++20
a74nh Jun 4, 2026
5d92597
Add valuenum casting
a74nh Jun 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3333,9 +3333,7 @@ GenTree* Compiler::optConstantAssertionProp(const AssertionDsc& curAssertion,
assert(genTypeSize(tree->TypeGet()) == curAssertion.GetOp2().GetSimdSize());

// We can't bash a LCL_VAR into a GenTreeVecCon (different node size), so allocate a fresh node.
GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet());
memcpy(&vecCon->gtSimdVal, curAssertion.GetOp2().GetSimdConstant(), genTypeSize(tree->TypeGet()));
newTree = vecCon;
newTree = gtNewVconNode(tree->TypeGet(), curAssertion.GetOp2().GetSimdConstant());
break;
}
#endif // FEATURE_HW_INTRINSICS
Expand Down
279 changes: 276 additions & 3 deletions src/coreclr/jit/codegenarm64.cpp

Large diffs are not rendered by default.

17 changes: 14 additions & 3 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3290,11 +3290,22 @@ class Compiler

#if defined(FEATURE_SIMD)
GenTreeVecCon* gtNewVconNode(var_types type);
GenTreeVecCon* gtNewVconNode(var_types type, void* data);
GenTreeVecCon* gtNewVconNode(var_types type, const void* data);
#if defined(TARGET_ARM64)
GenTreeVecCon* gtNewSimdVconNode(var_types type, var_types baseType, SimdScalableKind kind, uint64_t index, uint64_t step = 0);

inline GenTreeVecCon* gtNewSimdVconNode(var_types type, const simdscalable_t* con)
{
return gtNewSimdVconNode(type, con->gtSimdScalableBaseType, con->gtSimdScalableKind, con->gtSimdScalableIndex, con->gtSimdScalableStep);
}
Comment thread
a74nh marked this conversation as resolved.
#endif // TARGET_ARM64
#endif // FEATURE_SIMD

#if defined(FEATURE_MASKED_HW_INTRINSICS)
GenTreeMskCon* gtNewMskConNode(var_types type);
#if defined(TARGET_ARM64)
GenTreeMskCon* gtNewMskConNode(var_types type, var_types baseType, bool index);
#endif // TARGET_ARM64
#endif // FEATURE_MASKED_HW_INTRINSICS

GenTree* gtNewAllBitsSetConNode(var_types type);
Expand Down Expand Up @@ -3403,7 +3414,7 @@ class Compiler
var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize);

#if defined(TARGET_ARM64)
GenTree* gtNewSimdAllTrueMaskNode(var_types simdBaseType);
GenTree* gtNewSimdTrueMaskNode(var_types simdBaseType);
GenTree* gtNewSimdFalseMaskByteNode();
#endif

Expand Down Expand Up @@ -3992,7 +4003,7 @@ class Compiler

#if defined(FEATURE_HW_INTRINSICS)
GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree);
GenTreeMskCon* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon);
GenTree* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon);
#endif // FEATURE_HW_INTRINSICS

// Options to control behavior of gtTryRemoveBoxUpstreamEffects
Expand Down
26 changes: 18 additions & 8 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,14 @@ inline bool genExactlyOneBit(T value)
inline regMaskTP genFindLowestBit(regMaskTP value)
{
#ifdef HAS_MORE_THAN_64_REGISTERS
// If we ever need to use this method for predicate
// registers, then handle it.
assert(value.getHigh() == RBM_NONE);
#endif
if (value.getLow() != RBM_NONE)
{
return regMaskTP(genFindLowestBit(value.getLow()));
}
return regMaskTP(RBM_NONE, genFindLowestBit(value.getHigh()));
#else
return regMaskTP(genFindLowestBit(value.getLow()));
#endif
}

/*****************************************************************************
Expand All @@ -117,11 +120,18 @@ inline regMaskTP genFindLowestBit(regMaskTP value)
inline bool genMaxOneBit(regMaskTP value)
{
#ifdef HAS_MORE_THAN_64_REGISTERS
// If we ever need to use this method for predicate
// registers, then handle it.
assert(value.getHigh() == RBM_NONE);
#endif
if (value.getLow() == RBM_NONE)
{
return genMaxOneBit(value.getHigh());
}
if (value.getHigh() == RBM_NONE)
{
return genMaxOneBit(value.getLow());
}
return false;
#else
return genMaxOneBit(value.getLow());
#endif
}

/*****************************************************************************
Expand Down
42 changes: 26 additions & 16 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -804,22 +804,6 @@ static bool isValidUimm_MultipleOf(ssize_t value)
return isValidUimm<bits>(value / mod) && (value % mod == 0);
}

// Returns true if 'value' is a legal signed immediate with 'bits' number of bits.
template <const size_t bits>
static bool isValidSimm(ssize_t value)
{
constexpr ssize_t max = 1 << (bits - 1);
return (-max <= value) && (value < max);
}

// Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits.
template <const size_t bits, const ssize_t mod>
static bool isValidSimm_MultipleOf(ssize_t value)
{
static_assert(mod != 0);
return isValidSimm<bits>(value / mod) && (value % mod == 0);
}

// Returns true if 'imm' is a valid broadcast immediate for some SVE DUP variants
static bool isValidBroadcastImm(ssize_t imm, emitAttr laneSize)
{
Expand Down Expand Up @@ -1082,6 +1066,32 @@ static bool canEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL, em
// true if 'immDbl' can be encoded using a 'float immediate', also returns the encoding if wbFPI is non-null
static bool canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI = nullptr);

// Returns true if 'value' is a legal signed immediate with 'bits' number of bits.
template <const size_t bits>
static bool isValidSimm(ssize_t value)
{
constexpr size_t ssize_t_bits = sizeof(ssize_t) * BITS_PER_BYTE;
static_assert(bits > 0);
static_assert(bits <= ssize_t_bits);
if constexpr (bits == ssize_t_bits)
{
return true;
}
else
{
constexpr size_t max = size_t{1} << (bits - 1);
return (-static_cast<ssize_t>(max) <= value) && (value < static_cast<ssize_t>(max));
}
}

// Returns true if 'value' is a legal signed multiple of 'mod' immediate with 'bits' number of bits.
template <const size_t bits, const ssize_t mod>
static bool isValidSimm_MultipleOf(ssize_t value)
{
static_assert(mod != 0);
return isValidSimm<bits>(value / mod) && (value % mod == 0);
}

// Returns the number of bits used by the given 'size'.
inline static unsigned getBitWidth(emitAttr size)
{
Expand Down
Loading
Loading