diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index cc2e46f8203e8c..428266d7b7c240 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5234,6 +5234,10 @@ class Compiler bool hasFullRangeImm, bool *useFallback); +#if defined(TARGET_XARCH) + static var_types getHWIntrinsicWidenType(var_types simdBaseType); +#endif // TARGET_XARCH + #if defined(TARGET_ARM64) void getHWIntrinsicImmTypes(NamedIntrinsic intrinsic, diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index aed5d2f236e6d3..f9808f6ff44063 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1661,46 +1661,6 @@ static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC return (category != HW_Category_Special) && !HWIntrinsicInfo::HasSpecialImport(intrinsicId); } -//------------------------------------------------------------------------ -// isSupportedBaseType -// -// Arguments: -// intrinsicId - HW intrinsic id -// baseJitType - Base JIT type of the intrinsic. -// -// Return Value: -// returns true if the baseType is supported for given intrinsic. -// -static bool isSupportedBaseType(NamedIntrinsic intrinsic, CorInfoType baseJitType) -{ - if (baseJitType == CORINFO_TYPE_UNDEF) - { - return false; - } - - var_types baseType = JitType2PreciseVarType(baseJitType); - - // We don't actually check the intrinsic outside of the false case as we expect - // the exposed managed signatures are either generic and support all types - // or they are explicit and support the type indicated. - - if (varTypeIsArithmetic(baseType)) - { - return true; - } - -#ifdef DEBUG - CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic); -#ifdef TARGET_XARCH - assert((isa == InstructionSet_Vector512) || (isa == InstructionSet_Vector256) || (isa == InstructionSet_Vector128)); -#endif // TARGET_XARCH -#ifdef TARGET_ARM64 - assert((isa == InstructionSet_Vector64) || (isa == InstructionSet_Vector128)); -#endif // TARGET_ARM64 -#endif // DEBUG - return false; -} - static bool isSupportedBaseType(NamedIntrinsic intrinsic, var_types baseType) { if (baseType == TYP_UNDEF) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index de2322872c60fc..e952d8a30c11a1 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -113,7 +113,7 @@ HARDWARE_INTRINSIC(Vector128, MinNative, HARDWARE_INTRINSIC(Vector128, MinNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) @@ -252,7 +252,7 @@ HARDWARE_INTRINSIC(Vector256, MinNative, HARDWARE_INTRINSIC(Vector256, MinNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, NarrowWithSaturation, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, NarrowWithSaturation, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) @@ -393,7 +393,7 @@ HARDWARE_INTRINSIC(Vector512, MinNative, HARDWARE_INTRINSIC(Vector512, MinNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, NarrowWithSaturation, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, NarrowWithSaturation, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index ef52964d430eac..cca43e5d28d91c 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -3342,211 +3342,164 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - if (simdBaseType == TYP_DOUBLE) + if (varTypeIsFloating(simdBaseType)) { - // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType - retNode = gtNewSimdNarrowNode(retType, op1, op2, TYP_FLOAT, simdSize); + retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseType, simdSize); } - else if ((simdSize == 16) && ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_INT))) + else if (((simdSize == 16) || (simdSize == 32)) && + ((simdBaseType == TYP_BYTE) || (simdBaseType == TYP_SHORT))) { - // PackSignedSaturate uses the base type of the return for the simdBaseType - simdBaseType = (simdBaseType == TYP_SHORT) ? TYP_BYTE : TYP_SHORT; - - intrinsic = NI_X86Base_PackSignedSaturate; + intrinsic = (simdSize == 32) ? NI_AVX2_PackSignedSaturate : NI_X86Base_PackSignedSaturate; retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + + if (simdSize == 32) + { + retNode = gtNewSimdHWIntrinsicNode(retType, retNode, gtNewIconNode(SHUFFLE_WYZX), + NI_AVX2_Permute4x64, TYP_LONG, simdSize); + } } else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - if ((simdSize == 32) || (simdSize == 64)) + switch (simdBaseType) { - if (simdSize == 32) + case TYP_BYTE: { - intrinsic = NI_Vector256_ToVector512Unsafe; - - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD64, op1, intrinsic, simdBaseType, simdSize); - op1 = gtNewSimdWithUpperNode(TYP_SIMD64, op1, op2, simdBaseType, simdSize * 2); + intrinsic = (simdSize == 16) ? NI_AVX512_ConvertToVector128SByteWithSaturation + : NI_AVX512_ConvertToVector256SByteWithSaturation; + break; } - switch (simdBaseType) + case TYP_UBYTE: { - case TYP_SHORT: - { - intrinsic = NI_AVX512_ConvertToVector256SByteWithSaturation; - break; - } - - case TYP_USHORT: - { - intrinsic = NI_AVX512_ConvertToVector256ByteWithSaturation; - break; - } - - case TYP_INT: - { - intrinsic = NI_AVX512_ConvertToVector256Int16WithSaturation; - break; - } - - case TYP_UINT: - { - intrinsic = NI_AVX512_ConvertToVector256UInt16WithSaturation; - break; - } - - case TYP_LONG: - { - intrinsic = NI_AVX512_ConvertToVector256Int32WithSaturation; - break; - } - - case TYP_ULONG: - { - intrinsic = NI_AVX512_ConvertToVector256UInt32WithSaturation; - break; - } + intrinsic = (simdSize == 16) ? NI_AVX512_ConvertToVector128ByteWithSaturation + : NI_AVX512_ConvertToVector256ByteWithSaturation; + break; + } - default: - { - unreached(); - } + case TYP_SHORT: + { + intrinsic = (simdSize == 16) ? NI_AVX512_ConvertToVector128Int16WithSaturation + : NI_AVX512_ConvertToVector256Int16WithSaturation; + break; } - } - else - { - assert(simdSize == 16); - intrinsic = NI_Vector128_ToVector256Unsafe; - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseType, simdSize); - op1 = gtNewSimdWithUpperNode(TYP_SIMD32, op1, op2, simdBaseType, simdSize * 2); + case TYP_USHORT: + { + intrinsic = (simdSize == 16) ? NI_AVX512_ConvertToVector128UInt16WithSaturation + : NI_AVX512_ConvertToVector256UInt16WithSaturation; + break; + } - switch (simdBaseType) + case TYP_INT: { - case TYP_USHORT: - { - intrinsic = NI_AVX512_ConvertToVector128ByteWithSaturation; - break; - } + intrinsic = (simdSize == 16) ? NI_AVX512_ConvertToVector128Int32WithSaturation + : NI_AVX512_ConvertToVector256Int32WithSaturation; + break; + } - case TYP_UINT: - { - intrinsic = NI_AVX512_ConvertToVector128UInt16WithSaturation; - break; - } + case TYP_UINT: + { + intrinsic = (simdSize == 16) ? NI_AVX512_ConvertToVector128UInt32WithSaturation + : NI_AVX512_ConvertToVector256UInt32WithSaturation; + break; + } - case TYP_LONG: - { - intrinsic = NI_AVX512_ConvertToVector128Int32WithSaturation; - break; - } + default: + unreached(); + } - case TYP_ULONG: - { - intrinsic = NI_AVX512_ConvertToVector128UInt32WithSaturation; - break; - } + var_types opBaseType = getHWIntrinsicWidenType(simdBaseType); + unsigned tmpSimdSize = (simdSize == 64) ? (simdSize / 2) : (simdSize * 2); + var_types tmpSimdType = getSIMDTypeForSize(tmpSimdSize); - default: - { - unreached(); - } - } - } + NamedIntrinsic widenVector = + (simdSize == 16) ? NI_Vector128_ToVector256Unsafe : NI_Vector256_ToVector512Unsafe; if (simdSize == 64) { - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, intrinsic, simdBaseType, simdSize); - op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op2, intrinsic, simdBaseType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(tmpSimdType, op1, intrinsic, opBaseType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(tmpSimdType, op2, intrinsic, opBaseType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(retType, op1, widenVector, simdBaseType, tmpSimdSize); retNode = gtNewSimdWithUpperNode(retType, op1, op2, simdBaseType, simdSize); } else { - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseType, simdSize * 2); + op1 = gtNewSimdHWIntrinsicNode(tmpSimdType, op1, widenVector, opBaseType, simdSize); + op1 = gtNewSimdWithUpperNode(tmpSimdType, op1, op2, opBaseType, tmpSimdSize); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, opBaseType, tmpSimdSize); } } else { - // gtNewSimdNarrowNode uses the base type of the return for the simdBaseType - var_types narrowSimdBaseType; + assert(!varTypeIsSmall(simdBaseType) || varTypeIsUnsigned(simdBaseType)); - GenTreeVecCon* minCns = varTypeIsSigned(simdBaseType) ? gtNewVconNode(retType) : nullptr; - GenTreeVecCon* maxCns = gtNewVconNode(retType); + // This does a clamp which is defined as: Min(Max(value, min), max), which means + // that we do a Max computation for signed only. Unsigned already has a shared + // lower bound of 0. + + GenTreeVecCon* maxCns = gtNewVconNode(retType); + var_types opBaseType = getHWIntrinsicWidenType(simdBaseType); switch (simdBaseType) { - case TYP_SHORT: - { - minCns->EvaluateBroadcastInPlace(INT8_MIN); - maxCns->EvaluateBroadcastInPlace(INT8_MAX); - - narrowSimdBaseType = TYP_BYTE; - break; - } - - case TYP_USHORT: + case TYP_UBYTE: { maxCns->EvaluateBroadcastInPlace(UINT8_MAX); - narrowSimdBaseType = TYP_UBYTE; break; } - case TYP_INT: + case TYP_USHORT: { - minCns->EvaluateBroadcastInPlace(INT16_MIN); - maxCns->EvaluateBroadcastInPlace(INT16_MAX); - - narrowSimdBaseType = TYP_SHORT; + maxCns->EvaluateBroadcastInPlace(UINT16_MAX); break; } case TYP_UINT: { - maxCns->EvaluateBroadcastInPlace(UINT16_MAX); - narrowSimdBaseType = TYP_USHORT; + maxCns->EvaluateBroadcastInPlace(UINT32_MAX); break; } - case TYP_LONG: + case TYP_INT: { + GenTreeVecCon* minCns = gtNewVconNode(retType); minCns->EvaluateBroadcastInPlace(INT32_MIN); - maxCns->EvaluateBroadcastInPlace(INT32_MAX); - narrowSimdBaseType = TYP_INT; - break; - } + op1 = gtNewSimdMinMaxNode(retType, op1, minCns, opBaseType, simdSize, /* isMax */ true, + /* isMagnitude */ false, /* isNumber */ false); + op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(minCns), opBaseType, simdSize, + /* isMax */ true, /* isMagnitude */ false, /* isNumber */ false); - case TYP_ULONG: - { - maxCns->EvaluateBroadcastInPlace(UINT32_MAX); - narrowSimdBaseType = TYP_UINT; + maxCns->EvaluateBroadcastInPlace(INT32_MAX); break; } default: - { unreached(); - } - } - - // This does a clamp which is defined as: Min(Max(value, min), max) - // which means that we do a max computation if a minimum constant is specified - // There will be none specified for unsigned to unsigned narrowing since - // they share a lower bound (0) and will already be correct. - - if (minCns != nullptr) - { - op1 = gtNewSimdMinMaxNode(retType, op1, minCns, simdBaseType, simdSize, /* isMax */ true, - /* isMagnitude */ false, /* isNumber */ false); - op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(minCns), simdBaseType, simdSize, - /* isMax */ true, /* isMagnitude */ false, /* isNumber */ false); } - op1 = gtNewSimdMinMaxNode(retType, op1, maxCns, simdBaseType, simdSize, /* isMax */ false, + op1 = gtNewSimdMinMaxNode(retType, op1, maxCns, opBaseType, simdSize, /* isMax */ false, /* isMagnitude */ false, /* isNumber */ false); - op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(maxCns), simdBaseType, simdSize, + op2 = gtNewSimdMinMaxNode(retType, op2, gtCloneExpr(maxCns), opBaseType, simdSize, /* isMax */ false, /* isMagnitude */ false, /* isNumber */ false); - retNode = gtNewSimdNarrowNode(retType, op1, op2, narrowSimdBaseType, simdSize); + if (varTypeIsSmall(simdBaseType)) + { + intrinsic = (simdSize == 32) ? NI_AVX2_PackUnsignedSaturate : NI_X86Base_PackUnsignedSaturate; + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseType, simdSize); + + if (simdSize == 32) + { + retNode = gtNewSimdHWIntrinsicNode(retType, retNode, gtNewIconNode(SHUFFLE_WYZX), + NI_AVX2_Permute4x64, TYP_ULONG, simdSize); + } + } + else + { + retNode = gtNewSimdNarrowNode(retType, op1, op2, simdBaseType, simdSize); + } } } break; @@ -5593,4 +5546,39 @@ void Compiler::getHWIntrinsicImmOps(NamedIntrinsic intrinsic, } } +//------------------------------------------------------------------------ +// getHWIntrinsicWidenType: Gets the simdBaseType to use for widening intrinsics +// +// Arguments: +// simdBaseType -- The source type to be widened +// +// Return Value: +// the widened type. +// +var_types Compiler::getHWIntrinsicWidenType(var_types simdBaseType) +{ + assert(varTypeIsArithmetic(simdBaseType)); + assert(!varTypeIsLong(simdBaseType) && (simdBaseType != TYP_DOUBLE)); + + switch (simdBaseType) + { + case TYP_BYTE: + return TYP_SHORT; + case TYP_UBYTE: + return TYP_USHORT; + case TYP_SHORT: + return TYP_INT; + case TYP_USHORT: + return TYP_UINT; + case TYP_INT: + return TYP_LONG; + case TYP_UINT: + return TYP_ULONG; + case TYP_FLOAT: + return TYP_DOUBLE; + default: + unreached(); + } +} + #endif // FEATURE_HW_INTRINSICS