diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 2246bfcb29bcfe..a6ef13deff4062 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -4042,6 +4042,7 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isPredicateRegister(reg1)); // MMMM assert(isPredicateRegister(reg2)); // gggg assert(isPredicateRegister(reg3)); // NNNN + opt = INS_OPTS_SCALABLE_B; fmt = IF_SVE_DC_3A; break; @@ -6336,11 +6337,15 @@ void emitter::emitInsSve_R_R_R_R(instruction ins, } else { - assert(opt == INS_OPTS_SCALABLE_B); + assert(insOptsScalable(opt)); assert(isPredicateRegister(reg1)); // dddd assert(isPredicateRegister(reg2)); // gggg assert(isPredicateRegister(reg3)); // nnnn assert(isPredicateRegister(reg4)); // mmmm + // We support all lane arrangements, although we require byte arrangement for the + // encoding as there is only one encoding. This operation is bitwise, so it will + // preserve other lane arrangements anyway. + opt = INS_OPTS_SCALABLE_B; fmt = IF_SVE_CZ_4A; } break; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index c1ad9732b7514c..2db668d8e155ef 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9435,6 +9435,14 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) vecCon->gtSimdVal = simd_t::Zero(); return vecCon; } +#ifdef FEATURE_MASKED_HW_INTRINSICS + else if (varTypeIsMask(type)) + { + GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); + mskCon->gtSimdMaskVal = simdmask_t::Zero(); + return mskCon; + } +#endif // FEATURE_MASKED_HW_INTRINSICS #endif // FEATURE_SIMD type = genActualType(type); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index a71ed68351c9b4..5ba64e22cff02c 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1968,6 +1968,7 @@ struct GenTree inline bool IsMaskZero() const; inline bool IsMaskAllBitsSet() const; inline bool IsTrueMask(var_types simdBaseType) const; + inline bool IsSelectZero() const; inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType); @@ -9847,6 +9848,22 @@ inline bool GenTree::IsTrueMask(var_types simdBaseType) const return false; } +//------------------------------------------------------------------------ +// IsSelectZero: Is the given node a zero value for the purposes of +// conditional selection. ConditionalSelect can operate on all +// vectors or all masks. +// +// Returns true if the node is an all false mask node or a zero vector node. +// +// If such a node is used in op3 of ConditionalSelect, it will result in a +// simple filtering operation on the vector or mask node in op2, using the mask +// provided in op1. +// +inline bool GenTree::IsSelectZero() const +{ + return IsVectorZero() || IsMaskZero(); +} + //------------------------------------------------------------------- // GetIntegralVectorConstElement: Gets the value of a given element in an integral vector constant // diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 7b7b211a8f9a75..f8485915b6f6e5 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -1197,6 +1197,20 @@ struct HWIntrinsicInfo } #endif // FEATURE_MASKED_HW_INTRINSICS + // IsSveConditionalSelect: Is this intrinsic a ConditionalSelect intrinsic? + // + // Arguments: + // id -- Intrinsic ID to test + // + // Return value: + // Returns true if the ID is either of the vector or mask variant of + // ConditionalSelect. + // + static bool IsSveConditionalSelect(NamedIntrinsic id) + { + return (id == NI_Sve_ConditionalSelect) || (id == NI_Sve_ConditionalSelect_Predicates); + } + #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 7275c2ffe4f305..4972efe7f72938 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2948,7 +2948,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } // Was not able to generate a pattern, instead import a truemaskall - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, intrinsic, simdBaseType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize); break; } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 5ab1fe078b43ed..555c69cf3b8a92 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -552,7 +552,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { // Handle case where op2 is operation that needs embedded mask GenTree* op2 = intrin.op2; - assert(intrin.id == NI_Sve_ConditionalSelect); + assert(HWIntrinsicInfo::IsSveConditionalSelect(intrin.id)); assert(op2->OperIsHWIntrinsic()); assert(op2->isContained()); @@ -597,7 +597,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // Shared code for setting up embedded mask arg for intrinsics with 3+ operands auto emitEmbeddedMaskSetupInstrs = [&] { - if (intrin.op3->IsVectorZero() || (targetReg != falseReg) || (targetReg != embMaskOp1Reg)) + if (intrin.op3->IsSelectZero() || (targetReg != falseReg) || (targetReg != embMaskOp1Reg)) { return 1; } @@ -605,7 +605,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) }; auto emitEmbeddedMaskSetup = [&] { - if (intrin.op3->IsVectorZero()) + if (intrin.op3->IsSelectZero()) { // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the // destination using /Z. @@ -712,7 +712,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (intrin.op3->isContained()) { - assert(intrin.op3->IsVectorZero()); + assert(intrin.op3->IsSelectZero()); if (intrin.op1->isContained() || intrin.op1->IsTrueMask(node->GetSimdBaseType())) { @@ -818,7 +818,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // Predicate functionality is currently not exposed for this API, // but the FADDA instruction only has a predicated variant. // Thus, we expect the JIT to wrap this with CndSel. - assert(intrin.op3->IsVectorZero()); + assert(intrin.op3->IsSelectZero()); break; case NI_Sve2_AddSaturate: @@ -881,7 +881,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } }; - if (intrin.op3->IsVectorZero()) + if ((intrin.op3->IsSelectZero())) { // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the // destination using /Z. @@ -1228,7 +1228,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // This handles optimizations for instructions that have // an implicit 'zero' vector of what would be the second operand. if (HWIntrinsicInfo::SupportsContainment(intrin.id) && intrin.op2->isContained() && - intrin.op2->IsVectorZero()) + intrin.op2->IsSelectZero()) { GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); } @@ -2787,7 +2787,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_CreateBreakAfterPropagateMask: case NI_Sve_CreateBreakBeforePropagateMask: - case NI_Sve_ConditionalSelect_Predicates: { GetEmitter()->emitInsSve_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, INS_OPTS_SCALABLE_B); break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index a636c5901c804e..33318b568bfefa 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -63,28 +63,28 @@ HARDWARE_INTRINSIC(Sve, CreateBreakAfterPropagateMask, HARDWARE_INTRINSIC(Sve, CreateBreakBeforeMask, -1, 2, {INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, CreateBreakBeforePropagateMask, -1, 3, {INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, CreateBreakPropagateMask, -1, -1, {INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, {INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, {INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateMaskForFirstActiveElement, -1, 2, {INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, CreateMaskForNextActiveElement, -1, 2, {INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskByte, -1, 2, {INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskDouble, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMaskInt16, -1, 2, {INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) @@ -546,7 +546,7 @@ HARDWARE_INTRINSIC(Sve, And_Predicates, HARDWARE_INTRINSIC(Sve, BitwiseClear_Predicates, -1, 2, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Or_Predicates, -1, 2, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, Xor_Predicates, -1, 2, {INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_sve_eor, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_EmbeddedMaskedOperation|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, ConditionalSelect_Predicates, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, ConditionalSelect_Predicates, -1, 3, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Sve, ZipHigh_Predicates, -1, 2, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, ZipLow_Predicates, -1, 2, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, UnzipEven_Predicates, -1, 2, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 6d9fe77b48a6cf..edf48da5dab83b 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1577,6 +1577,17 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) node->gtType = TYP_SIMD16; } +#ifdef TARGET_ARM64 + // Enforce invariant HW_Flag_ReturnPerElementMask <==> node->TypeIs(TYP_MASK) + // This should happen at all stages of the compiler, but it's especially important to check here, + // as some Lowering analyses (such as embedded masks) will depend on this consistency. + if (node->TypeIs(TYP_MASK) || HWIntrinsicInfo::ReturnsPerElementMask(node->GetHWIntrinsicId())) + { + assert(HWIntrinsicInfo::ReturnsPerElementMask(node->GetHWIntrinsicId())); + assert(node->TypeIs(TYP_MASK)); + } +#endif + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); bool isScalar = false; @@ -1869,6 +1880,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Sve_ConditionalSelect: + case NI_Sve_ConditionalSelect_Predicates: return LowerHWIntrinsicCndSel(node); case NI_Sve_SetFfr: @@ -2007,47 +2019,42 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // Use lastOp to verify if it's a ConditionlSelectNode. size_t lastOpNum = node->GetOperandCount(); - if (node->Op(lastOpNum)->OperIsHWIntrinsic() && - node->Op(lastOpNum)->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConditionalSelect && - TryContainingCselOp(node, node->Op(lastOpNum)->AsHWIntrinsic())) + if (node->Op(lastOpNum)->OperIsHWIntrinsic() && TryContainingCselOp(node, node->Op(lastOpNum)->AsHWIntrinsic())) { LABELEDDISPTREERANGE("Contained conditional select", BlockRange(), node); return node->gtNext; } - // Wrap a conditional select around the embedded mask operation + // Get the existing use of the node before modifying the graph. + bool foundUse = BlockRange().TryGetUse(node, &use); - unsigned simdSize = node->GetSimdSize(); - var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + if (foundUse) + { + // For Vector operations: ConditionalSelect(CreateTrueMask(), Op(...), Vector.Zero) + // For Mask operations: ConditionalSelect(CreateTrueMask(), Op(...), CreateFalseMask()) + bool isMaskOp = HWIntrinsicInfo::ReturnsPerElementMask(node->GetHWIntrinsicId()); - bool foundUse = BlockRange().TryGetUse(node, &use); - GenTree* trueMask = m_compiler->gtNewSimdAllTrueMaskNode(node->GetSimdBaseType()); - GenTree* falseVal = m_compiler->gtNewZeroConNode(simdType); - var_types nodeType = simdType; + var_types selectType = isMaskOp ? TYP_MASK : Compiler::getSIMDTypeForSize(node->GetSimdSize()); + NamedIntrinsic selectIntrin = isMaskOp ? NI_Sve_ConditionalSelect_Predicates : NI_Sve_ConditionalSelect; - if (HWIntrinsicInfo::ReturnsPerElementMask(node->GetHWIntrinsicId())) - { - nodeType = TYP_MASK; - } + GenTree* trueMask = m_compiler->gtNewSimdAllTrueMaskNode(node->GetSimdBaseType()); + GenTree* falseVal = m_compiler->gtNewZeroConNode(selectType); + BlockRange().InsertBefore(node, trueMask); + BlockRange().InsertBefore(node, falseVal); - BlockRange().InsertBefore(node, trueMask); - BlockRange().InsertBefore(node, falseVal); + GenTreeHWIntrinsic* condSelNode = + m_compiler->gtNewSimdHWIntrinsicNode(selectType, trueMask, node, falseVal, selectIntrin, + node->GetSimdBaseType(), node->GetSimdSize()); + BlockRange().InsertAfter(node, condSelNode); - GenTreeHWIntrinsic* condSelNode = - m_compiler->gtNewSimdHWIntrinsicNode(nodeType, trueMask, node, falseVal, NI_Sve_ConditionalSelect, - node->GetSimdBaseType(), simdSize); - BlockRange().InsertAfter(node, condSelNode); - if (foundUse) - { use.ReplaceWith(condSelNode); + + LABELEDDISPTREERANGE("Wrapped embedded-mask intrinsic with ConditionalSelect", BlockRange(), condSelNode); } else { - node->ClearUnusedValue(); - condSelNode->SetUnusedValue(); + assert(node->IsUnusedValue()); } - - LABELEDDISPTREERANGE("Embedded HWIntrinisic inside conditional select", BlockRange(), condSelNode); } ContainCheckHWIntrinsic(node); @@ -3745,7 +3752,10 @@ bool Lowering::TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next) // bool Lowering::TryContainingCselOp(GenTreeHWIntrinsic* parentNode, GenTreeHWIntrinsic* childNode) { - assert(childNode->GetHWIntrinsicId() == NI_Sve_ConditionalSelect); + if (!HWIntrinsicInfo::IsSveConditionalSelect(childNode->GetHWIntrinsicId())) + { + return false; + } if (childNode->Op(2)->IsEmbMaskOp()) { @@ -4010,6 +4020,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Sve_ConditionalSelect: + case NI_Sve_ConditionalSelect_Predicates: { assert(intrin.numOperands == 3); GenTree* op1 = intrin.op1; @@ -4085,7 +4096,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } // Handle op3 - if (op3->IsVectorZero() && op1->IsTrueMask(node->GetSimdBaseType()) && op2->IsEmbMaskOp()) + if (op3->IsSelectZero() && op1->IsTrueMask(node->GetSimdBaseType()) && op2->IsEmbMaskOp()) { // When we are merging with zero, we can specialize // and avoid instantiating the vector constant. @@ -4206,14 +4217,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) { - assert(cndSelNode->OperIsHWIntrinsic(NI_Sve_ConditionalSelect)); - + NamedIntrinsic selectIntrin = cndSelNode->GetHWIntrinsicId(); + assert(HWIntrinsicInfo::IsSveConditionalSelect(selectIntrin)); GenTree* op1 = cndSelNode->Op(1); GenTree* op2 = cndSelNode->Op(2); GenTree* op3 = cndSelNode->Op(3); GenTree* lowerCndSel = cndSelNode; - if (op2->OperIsHWIntrinsic(NI_Sve_ConditionalSelect)) + if (op2->OperIsHWIntrinsic(selectIntrin)) { // Handle cases where there is a nested ConditionalSelect for `trueValue` GenTreeHWIntrinsic* nestedCndSel = op2->AsHWIntrinsic(); @@ -4231,7 +4242,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) if (nestedOp1->IsTrueMask(cndSelNode->GetSimdBaseType()) && !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && - (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) + (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsSelectZero())) { GenTree* nestedOp2 = nestedCndSel->Op(2); GenTree* nestedOp3 = nestedCndSel->Op(3); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0d74688745910d..1e023bd619302b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2535,7 +2535,7 @@ GenTree* LinearScan::getConsecutiveRegistersOperand(const HWIntrinsic intrin, bo // GenTreeHWIntrinsic* LinearScan::getEmbeddedMaskOperand(const HWIntrinsic intrin) { - if ((intrin.id == NI_Sve_ConditionalSelect) && (intrin.op2->IsEmbMaskOp())) + if (HWIntrinsicInfo::IsSveConditionalSelect(intrin.id) && (intrin.op2->IsEmbMaskOp())) { assert(intrin.op2->OperIsHWIntrinsic()); return intrin.op2->AsHWIntrinsic(); @@ -2559,7 +2559,8 @@ GenTreeHWIntrinsic* LinearScan::getContainedCselOperand(GenTreeHWIntrinsic* intr GenTree* currentOp = intrinsicTree->Op(opNum); if (currentOp->OperIs(GT_HWINTRINSIC) && - (currentOp->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConditionalSelect) && currentOp->isContained()) + HWIntrinsicInfo::IsSveConditionalSelect(currentOp->AsHWIntrinsic()->GetHWIntrinsicId()) && + currentOp->isContained()) { return currentOp->AsHWIntrinsic(); }