diff --git a/src/libraries/Common/src/System/HexConverter.cs b/src/libraries/Common/src/System/HexConverter.cs index 10316e0c610325..f0a051393f6660 100644 --- a/src/libraries/Common/src/System/HexConverter.cs +++ b/src/libraries/Common/src/System/HexConverter.cs @@ -96,9 +96,10 @@ public static void ToCharsBuffer(byte value, Span buffer, int startingInde [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] internal static (Vector128, Vector128) AsciiToHexVector128(Vector128 src, Vector128 hexMap) { - Debug.Assert(Ssse3.IsSupported || AdvSimd.Arm64.IsSupported); + Debug.Assert(Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported); // The algorithm is simple: a single srcVec (contains the whole 16b Guid) is converted // into nibbles and then, via hexMap, converted into a HEX representation via @@ -115,6 +116,7 @@ internal static (Vector128, Vector128) AsciiToHexVector128(Vector128 [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static void EncodeTo_Vector128(ReadOnlySpan source, Span destination, Casing casing) { Debug.Assert(source.Length >= (Vector128.Count / 2)); @@ -187,7 +189,7 @@ public static void EncodeToUtf8(ReadOnlySpan source, Span utf8Destin Debug.Assert(utf8Destination.Length >= (source.Length * 2)); #if SYSTEM_PRIVATE_CORELIB - if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported) && (source.Length >= (Vector128.Count / 2))) + if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported || PackedSimd.IsSupported) && (source.Length >= (Vector128.Count / 2))) { EncodeTo_Vector128(source, utf8Destination, casing); return; @@ -204,7 +206,7 @@ public static void EncodeToUtf16(ReadOnlySpan source, Span destinati Debug.Assert(destination.Length >= (source.Length * 2)); #if SYSTEM_PRIVATE_CORELIB - if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported) && (source.Length >= (Vector128.Count / 2))) + if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported || PackedSimd.IsSupported) && (source.Length >= (Vector128.Count / 2))) { EncodeTo_Vector128(source, Unsafe.BitCast, Span>(destination), casing); return; diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs index da54cdb3372b19..8b5d04065a544e 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs @@ -8,6 +8,7 @@ #if NET using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; #endif @@ -300,6 +301,23 @@ private static uint UpdateVector128(uint adler, ReadOnlySpan source) wprod2 = AdvSimd.MultiplyWideningUpperAndAdd(wprod2, bytes2, tap2.AsByte()); vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod2); } + else if (PackedSimd.IsSupported) + { + // Widening byte sum: each byte -> ushort pair sum -> uint pair sum, then accumulate into vs1. + // Because weights are all positive (1-32), unsigned byte * unsigned byte multiply is valid for vs2. + Vector128 sumPairs1 = PackedSimd.AddPairwiseWidening(bytes1); + Vector128 sumPairs2 = PackedSimd.AddPairwiseWidening(bytes2); + vs1 += PackedSimd.AddPairwiseWidening(sumPairs1) + PackedSimd.AddPairwiseWidening(sumPairs2); + + // bytes * weights -> 8 ushorts low + 8 ushorts high, sum pairwise to 4 uints + 4 uints. + Vector128 wprod1Lo = PackedSimd.MultiplyWideningLower(bytes1, tap1.AsByte()); + Vector128 wprod1Hi = PackedSimd.MultiplyWideningUpper(bytes1, tap1.AsByte()); + vs2 += PackedSimd.AddPairwiseWidening(wprod1Lo) + PackedSimd.AddPairwiseWidening(wprod1Hi); + + Vector128 wprod2Lo = PackedSimd.MultiplyWideningLower(bytes2, tap2.AsByte()); + Vector128 wprod2Hi = PackedSimd.MultiplyWideningUpper(bytes2, tap2.AsByte()); + vs2 += PackedSimd.AddPairwiseWidening(wprod2Lo) + PackedSimd.AddPairwiseWidening(wprod2Hi); + } else { (Vector128 lo1, Vector128 hi1) = Vector128.Widen(bytes1); diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHashShared.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHashShared.cs index 00f1f919a93c70..0f8a6dfa2e7540 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHashShared.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHashShared.cs @@ -9,6 +9,7 @@ #if NET using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; #endif @@ -702,12 +703,25 @@ private static Vector128 MultiplyWideningLower(Vector128 source) Vector64 sourceHigh = Vector128.Shuffle(source, Vector128.Create(1u, 3, 0, 0)).GetLower(); return AdvSimd.MultiplyWideningLower(sourceLow, sourceHigh); } + else if (Sse2.IsSupported) + { + Vector128 sourceLow = Vector128.Shuffle(source, Vector128.Create(1u, 0, 3, 0)); + return Sse2.Multiply(source, sourceLow); + } + else if (PackedSimd.IsSupported) + { + // PackedSimd.MultiplyWideningLower (i64x2.extmul_low_i32x4_u) does + // result[i] = (ulong)a[i] * (ulong)b[i] for i in {0, 1}. + // We need { source[0]*source[1], source[2]*source[3] } to match the Sse2/AdvSimd paths, + // so first move the even lanes into one operand and the odd lanes into the other. + Vector128 evens = Vector128.Shuffle(source, Vector128.Create(0u, 2, 0, 0)); + Vector128 odds = Vector128.Shuffle(source, Vector128.Create(1u, 3, 0, 0)); + return PackedSimd.MultiplyWideningLower(evens, odds); + } else { Vector128 sourceLow = Vector128.Shuffle(source, Vector128.Create(1u, 0, 3, 0)); - return Sse2.IsSupported ? - Sse2.Multiply(source, sourceLow) : - (source & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64() * (sourceLow & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64(); + return (source & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64() * (sourceLow & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64(); } } #endif diff --git a/src/libraries/System.Private.CoreLib/src/System/Guid.cs b/src/libraries/System.Private.CoreLib/src/System/Guid.cs index 60b6b289991ca3..1ac2e1ccf24451 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Guid.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Guid.cs @@ -10,6 +10,7 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; using System.Runtime.Versioning; using System.Text; @@ -1345,7 +1346,7 @@ internal unsafe bool TryFormatCore(Span destination, out int chars } flags >>= 8; - if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian) + if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && BitConverter.IsLittleEndian) { // Vectorized implementation for D, N, P and B formats: // [{|(]dddddddd[-]dddd[-]dddd[-]dddd[-]dddddddddddd[}|)] @@ -1513,9 +1514,10 @@ static void WriteHex(Span dest, int offset, int val, bool appendComma = t [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static (Vector128, Vector128, Vector128) FormatGuidVector128Utf8(Guid value, bool useDashes) { - Debug.Assert((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian); + Debug.Assert((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && BitConverter.IsLittleEndian); // Vectorized implementation for D, N, P and B formats: // [{|(]dddddddd[-]dddd[-]dddd[-]dddd[-]dddddddddddd[}|)] diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 16f46daddd7eab..98659953164aec 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -4388,33 +4388,59 @@ internal static void SetElementUnsafe(in this Vector128 vector, int index, [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(PackedSimd))] internal static Vector128 UnpackLow(Vector128 left, Vector128 right) { if (Sse2.IsSupported) { return Sse2.UnpackLow(left, right); } - else if (!AdvSimd.Arm64.IsSupported) + else if (AdvSimd.Arm64.IsSupported) { - ThrowHelper.ThrowNotSupportedException(); + return AdvSimd.Arm64.ZipLow(left, right); + } + else if (PackedSimd.IsSupported) + { + // Compose with two PackedSimd.Swizzle calls (clamp out-of-range to 0) plus OR. + // We call PackedSimd.Swizzle directly rather than Vector128.ShuffleNative because + // the latter goes through a Ssse3 -> AdvSimd.Arm64 -> PackedSimd dispatcher chain + // that the Mono SIMD intrinsic recognizer doesn't always lower cleanly. + // PackedSimd.Shuffle (two-vector i8x16.shuffle) requires constant lane indices + // and is impractical to call portably from generic code paths. + Vector128 leftPart = PackedSimd.Swizzle(left, + Vector128.Create((byte)0, 0xFF, 1, 0xFF, 2, 0xFF, 3, 0xFF, 4, 0xFF, 5, 0xFF, 6, 0xFF, 7, 0xFF)); + Vector128 rightPart = PackedSimd.Swizzle(right, + Vector128.Create((byte)0xFF, 0, 0xFF, 1, 0xFF, 2, 0xFF, 3, 0xFF, 4, 0xFF, 5, 0xFF, 6, 0xFF, 7)); + return leftPart | rightPart; } - return AdvSimd.Arm64.ZipLow(left, right); + ThrowHelper.ThrowNotSupportedException(); + return default; } [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(PackedSimd))] internal static Vector128 UnpackHigh(Vector128 left, Vector128 right) { if (Sse2.IsSupported) { return Sse2.UnpackHigh(left, right); } - else if (!AdvSimd.Arm64.IsSupported) + else if (AdvSimd.Arm64.IsSupported) { - ThrowHelper.ThrowNotSupportedException(); + return AdvSimd.Arm64.ZipHigh(left, right); + } + else if (PackedSimd.IsSupported) + { + Vector128 leftPart = PackedSimd.Swizzle(left, + Vector128.Create((byte)8, 0xFF, 9, 0xFF, 10, 0xFF, 11, 0xFF, 12, 0xFF, 13, 0xFF, 14, 0xFF, 15, 0xFF)); + Vector128 rightPart = PackedSimd.Swizzle(right, + Vector128.Create((byte)0xFF, 8, 0xFF, 9, 0xFF, 10, 0xFF, 11, 0xFF, 12, 0xFF, 13, 0xFF, 14, 0xFF, 15)); + return leftPart | rightPart; } - return AdvSimd.Arm64.ZipHigh(left, right); + ThrowHelper.ThrowNotSupportedException(); + return default; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs index 0b09377b503e17..7a895c1a7d3b08 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs @@ -80,7 +80,7 @@ public ProbabilisticMap(ReadOnlySpan values) [BypassReadyToRun] private static void SetCharBit(ref uint charMap, byte value) { - if (Sse41.IsSupported || AdvSimd.Arm64.IsSupported) + if (Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) { Unsafe.Add(ref Unsafe.As(ref charMap), value & VectorizedIndexMask) |= (byte)(1u << (value >> VectorizedIndexShift)); } @@ -92,7 +92,7 @@ private static void SetCharBit(ref uint charMap, byte value) [MethodImpl(MethodImplOptions.AggressiveInlining)] [BypassReadyToRun] - private static bool IsCharBitSet(ref uint charMap, byte value) => Sse41.IsSupported || AdvSimd.Arm64.IsSupported + private static bool IsCharBitSet(ref uint charMap, byte value) => Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported ? (Unsafe.Add(ref Unsafe.As(ref charMap), value & VectorizedIndexMask) & (1u << (value >> VectorizedIndexShift))) != 0 : (Unsafe.Add(ref charMap, value & PortableIndexMask) & (1u << (value >> PortableIndexShift))) != 0; @@ -220,6 +220,7 @@ private static Vector256 IsCharBitNotSetAvx2(Vector256 charMapLower, [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static Vector128 ContainsMask16Chars(Vector128 charMapLower, Vector128 charMapUpper, ref char searchSpace) { Vector128 source0 = Vector128.LoadUnsafe(ref searchSpace); @@ -238,6 +239,11 @@ private static Vector128 ContainsMask16Chars(Vector128 charMapLower, sourceLower = AdvSimd.Arm64.UnzipEven(source0.AsByte(), source1.AsByte()); sourceUpper = AdvSimd.Arm64.UnzipOdd(source0.AsByte(), source1.AsByte()); } + else if (PackedSimd.IsSupported) + { + sourceLower = PackedSimd.ConvertNarrowingSaturateUnsigned((source0 & Vector128.Create((ushort)255)).AsInt16(), (source1 & Vector128.Create((ushort)255)).AsInt16()); + sourceUpper = PackedSimd.ConvertNarrowingSaturateUnsigned((source0 >>> 8).AsInt16(), (source1 >>> 8).AsInt16()); + } else { // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead @@ -392,7 +398,7 @@ private static unsafe int ProbabilisticLastIndexOfAny(ref char searchSpace, int internal static int IndexOfAny(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state) where TUseFastContains : struct, SearchValues.IRuntimeConst { - if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceLength >= 16) + if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && searchSpaceLength >= 16) { return Vector512.IsHardwareAccelerated && Avx512Vbmi.VL.IsSupported ? IndexOfAnyVectorizedAvx512(ref searchSpace, searchSpaceLength, ref state) @@ -406,7 +412,7 @@ internal static int IndexOfAny(ref char searchSpace, int searc internal static int LastIndexOfAny(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state) where TUseFastContains : struct, SearchValues.IRuntimeConst { - if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceLength >= 16) + if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && searchSpaceLength >= 16) { return Vector512.IsHardwareAccelerated && Avx512Vbmi.VL.IsSupported ? LastIndexOfAnyVectorizedAvx512(ref searchSpace, searchSpaceLength, ref state) @@ -501,10 +507,11 @@ private static int IndexOfAnyVectorizedAvx512(ref char searchS [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(Sse41))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static int IndexOfAnyVectorized(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state) where TUseFastContains : struct, SearchValues.IRuntimeConst { - Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported); + Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported); Debug.Assert(searchSpaceLength >= 16); ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength); @@ -679,10 +686,11 @@ private static int LastIndexOfAnyVectorizedAvx512(ref char sea [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(Sse41))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static int LastIndexOfAnyVectorized(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state) where TUseFastContains : struct, SearchValues.IRuntimeConst { - Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported); + Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported); Debug.Assert(searchSpaceLength >= 16); ref char cur = ref Unsafe.Add(ref searchSpace, searchSpaceLength); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs index 0869ad1553f895..2d96c3009f9fdc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs @@ -8,6 +8,7 @@ using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; using static System.Buffers.StringSearchValuesHelper; using static System.Buffers.TeddyHelper; @@ -150,6 +151,7 @@ protected AsciiStringSearchValuesTeddyBase(string[][] buckets, ReadOnlySpan span) { // The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported @@ -170,6 +172,7 @@ protected int IndexOfAnyN2(ReadOnlySpan span) [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] protected int IndexOfAnyN3(ReadOnlySpan span) { // The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported @@ -190,6 +193,7 @@ protected int IndexOfAnyN3(ReadOnlySpan span) [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] private int IndexOfAnyN2Vector128(ReadOnlySpan span) { // See comments in 'IndexOfAnyN3Vector128' below. @@ -350,6 +354,7 @@ private int IndexOfAnyN2Avx512(ReadOnlySpan span) [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] private int IndexOfAnyN3Vector128(ReadOnlySpan span) { // We can't process inputs shorter than 18 characters in a vectorized manner here. diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs index 064838c98cf361..8d6eb67922e4fb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs @@ -4,6 +4,7 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; namespace System.Buffers @@ -17,6 +18,7 @@ internal static class TeddyHelper [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] public static (Vector128 Result, Vector128 Prev0) ProcessInputN2( Vector128 input, Vector128 prev0, @@ -90,6 +92,7 @@ public static (Vector512 Result, Vector512 Prev0) ProcessInputN2( [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] public static (Vector128 Result, Vector128 Prev0, Vector128 Prev1) ProcessInputN3( Vector128 input, Vector128 prev0, Vector128 prev1, @@ -216,6 +219,7 @@ public static (Vector512 Result, Vector512 Prev0, Vector512 Pr [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Sse2))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] public static Vector128 LoadAndPack16AsciiChars(ref char source) { Vector128 source0 = Vector128.LoadUnsafe(ref source); @@ -229,6 +233,10 @@ public static Vector128 LoadAndPack16AsciiChars(ref char source) { return AdvSimd.Arm64.UnzipEven(source0.AsByte(), source1.AsByte()); } + else if (PackedSimd.IsSupported) + { + return PackedSimd.ConvertNarrowingSaturateUnsigned(source0.AsInt16(), source1.AsInt16()); + } else { // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead @@ -276,10 +284,13 @@ public static Vector512 LoadAndPack64AsciiChars(ref char source) [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static (Vector128 Low, Vector128 High) GetNibbles(Vector128 input) { // 'low' is not strictly correct here, but we take advantage of Ssse3.Shuffle's behavior - // of doing an implicit 'AND 0xF' in order to skip the redundant AND. + // of doing an implicit 'AND 0xF' in order to skip the redundant AND. PackedSimd.Swizzle + // and AdvSimd's table lookup return 0 for indices >= 16 (instead of masking the low 4 + // bits), so they need the explicit AND. Vector128 low = Ssse3.IsSupported ? input : input & Vector128.Create((byte)0xF); @@ -316,6 +327,7 @@ private static (Vector512 Low, Vector512 High) GetNibbles(Vector512< [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static Vector128 Shuffle(Vector128 maskLow, Vector128 maskHigh, Vector128 low, Vector128 high) { return SearchValues.ShuffleNativeModified(maskLow, low) & Vector128.ShuffleNative(maskHigh, high); @@ -338,6 +350,7 @@ private static Vector512 Shuffle(Vector512 maskLow, Vector512 [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static Vector128 RightShift1(Vector128 left, Vector128 right) { // Given input vectors like @@ -354,6 +367,18 @@ private static Vector128 RightShift1(Vector128 left, Vector128 { return AdvSimd.ExtractVector128(left, right, 15); } + else if (PackedSimd.IsSupported) + { + // Call PackedSimd.Swizzle directly (i8x16.swizzle) rather than through + // Vector128.ShuffleNative's dispatcher chain, which the Mono SIMD intrinsic + // recognizer doesn't always lower cleanly. Swizzle clamps out-of-range + // indices (>= 16) to 0 so we can compose the two halves with OR. + Vector128 leftPart = PackedSimd.Swizzle(left, + Vector128.Create((byte)15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF)); + Vector128 rightPart = PackedSimd.Swizzle(right, + Vector128.Create((byte)0xFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); + return leftPart | rightPart; + } else { // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead @@ -365,6 +390,7 @@ private static Vector128 RightShift1(Vector128 left, Vector128 [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static Vector128 RightShift2(Vector128 left, Vector128 right) { // Given input vectors like @@ -381,6 +407,14 @@ private static Vector128 RightShift2(Vector128 left, Vector128 { return AdvSimd.ExtractVector128(left, right, 14); } + else if (PackedSimd.IsSupported) + { + Vector128 leftPart = PackedSimd.Swizzle(left, + Vector128.Create((byte)14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF)); + Vector128 rightPart = PackedSimd.Swizzle(right, + Vector128.Create((byte)0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); + return leftPart | rightPart; + } else { // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs index a9ac9452c215cb..6c95f892366bd6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/StringSearchValues.cs @@ -7,6 +7,7 @@ using System.Globalization; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; using System.Text; using System.Text.Unicode; @@ -128,7 +129,7 @@ private static SearchValues CreateFromNormalizedValues( return CreateForSingleValue(values[0], uniqueValues, ignoreCase, allAscii, asciiLettersOnly); } - if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && + if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && TryGetTeddyAcceleratedValues(values, uniqueValues, ignoreCase, allAscii, asciiLettersOnly, nonAsciiAffectedByCaseConversion, minLength) is { } searchValues) { return searchValues; @@ -198,7 +199,7 @@ static SearchValues PickAhoCorasickImplementation(AhoC int n = minLength == 2 ? 2 : 3; - if (Ssse3.IsSupported) + if (Ssse3.IsSupported || PackedSimd.IsSupported) { foreach (string value in values) { @@ -206,8 +207,9 @@ static SearchValues PickAhoCorasickImplementation(AhoC { // If we let null chars through here, Teddy would still work correctly, but it // would hit more false positives that the verification step would have to rule out. - // While we could flow a generic flag like Ssse3AndWasmHandleZeroInNeedle through, - // we expect such values to be rare enough that introducing more code is not worth it. + // Ssse3.PackUnsignedSaturate and PackedSimd.ConvertNarrowingSaturateUnsigned both + // treat negative signed-16 values as 0, so we filter out null-containing needles + // for both to avoid that source of false positives. return null; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs index ff095110653805..581d231076678f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs @@ -10,6 +10,7 @@ #if NET using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; #endif @@ -884,7 +885,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt #if NET Vector128 nonAsciiUtf16DataMask; - if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)) + if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || PackedSimd.IsSupported) { nonAsciiUtf16DataMask = Vector128.Create(unchecked((short)0xFF80)); // mask of non-ASCII bits in a UTF-16 char } @@ -944,7 +945,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining); #if NET - if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)) + if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || PackedSimd.IsSupported) { // Try reading and writing 8 elements per iteration. uint maxIters = minElementsRemaining / 8; @@ -982,6 +983,17 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // narrow and write Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64()); } + else if (PackedSimd.IsSupported) + { + if ((utf16Data & nonAsciiUtf16DataMask) != Vector128.Zero) + { + goto LoopTerminatedDueToNonAsciiDataInVectorLocal; + } + + // narrow and write low 8 bytes + Vector128 narrowed = PackedSimd.ConvertNarrowingSaturateUnsigned(utf16Data, utf16Data); + Unsafe.WriteUnaligned(pOutputBuffer, narrowed.AsUInt64().ToScalar()); + } else { // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead @@ -1015,6 +1027,11 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt { Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); } + else if (PackedSimd.IsSupported) + { + Vector128 narrowed = PackedSimd.ConvertNarrowingSaturateUnsigned(utf16Data, utf16Data); + Unsafe.WriteUnaligned(pOutputBuffer, narrowed.AsUInt32().ToScalar()); + } else { // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead @@ -1058,6 +1075,11 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt { Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); } + else if (PackedSimd.IsSupported) + { + Vector128 narrowed = PackedSimd.ConvertNarrowingSaturateUnsigned(utf16Data, utf16Data); + Unsafe.WriteUnaligned(pOutputBuffer, narrowed.AsUInt32().ToScalar()); + } else { // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index 0717ed348af4ed..dff9af0fdcac9d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -9,6 +9,7 @@ #if NET using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; #endif @@ -158,6 +159,15 @@ internal static unsafe partial class Utf8Utility goto LoopTerminatedEarlyDueToNonAsciiData; } } + else if (PackedSimd.IsSupported) + { + uint mask = Vector128.LoadUnsafe(ref *pInputBuffer).ExtractMostSignificantBits(); + if (mask != 0) + { + trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask); + goto LoopTerminatedEarlyDueToNonAsciiData; + } + } else #endif { @@ -180,9 +190,9 @@ internal static unsafe partial class Utf8Utility #if NET LoopTerminatedEarlyDueToNonAsciiData: - // x86 can only be little endian, while ARM can be big or little endian - // so if we reached this label we need to check both combinations are supported - Debug.Assert((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported); + // x86 and Wasm can only be little endian, while ARM can be big or little endian, + // so if we reached this label we need to check the LE-restricted combinations as well. + Debug.Assert((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported || PackedSimd.IsSupported); // The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit