Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/libraries/Common/src/System/HexConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ public static void ToCharsBuffer(byte value, Span<char> buffer, int startingInde
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Ssse3))]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(PackedSimd))]
internal static (Vector128<byte>, Vector128<byte>) AsciiToHexVector128(Vector128<byte> src, Vector128<byte> hexMap)
{
Debug.Assert(Ssse3.IsSupported || AdvSimd.Arm64.IsSupported);
Debug.Assert(Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported);
Comment thread
lewing marked this conversation as resolved.

// The algorithm is simple: a single srcVec (contains the whole 16b Guid) is converted
// into nibbles and then, via hexMap, converted into a HEX representation via
Expand All @@ -115,6 +116,7 @@ internal static (Vector128<byte>, Vector128<byte>) AsciiToHexVector128(Vector128

[CompExactlyDependsOn(typeof(Ssse3))]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(PackedSimd))]
private static void EncodeTo_Vector128<TChar>(ReadOnlySpan<byte> source, Span<TChar> destination, Casing casing)
{
Debug.Assert(source.Length >= (Vector128<TChar>.Count / 2));
Expand Down Expand Up @@ -187,7 +189,7 @@ public static void EncodeToUtf8(ReadOnlySpan<byte> source, Span<byte> utf8Destin
Debug.Assert(utf8Destination.Length >= (source.Length * 2));

#if SYSTEM_PRIVATE_CORELIB
if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported) && (source.Length >= (Vector128<byte>.Count / 2)))
if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported || PackedSimd.IsSupported) && (source.Length >= (Vector128<byte>.Count / 2)))
{
EncodeTo_Vector128(source, utf8Destination, casing);
return;
Expand All @@ -204,7 +206,7 @@ public static void EncodeToUtf16(ReadOnlySpan<byte> source, Span<char> destinati
Debug.Assert(destination.Length >= (source.Length * 2));

#if SYSTEM_PRIVATE_CORELIB
if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported) && (source.Length >= (Vector128<ushort>.Count / 2)))
if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported || PackedSimd.IsSupported) && (source.Length >= (Vector128<ushort>.Count / 2)))
{
EncodeTo_Vector128(source, Unsafe.BitCast<Span<char>, Span<ushort>>(destination), casing);
return;
Expand Down
18 changes: 18 additions & 0 deletions src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#if NET
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.Wasm;
using System.Runtime.Intrinsics.X86;
#endif

Expand Down Expand Up @@ -300,6 +301,23 @@ private static uint UpdateVector128(uint adler, ReadOnlySpan<byte> source)
wprod2 = AdvSimd.MultiplyWideningUpperAndAdd(wprod2, bytes2, tap2.AsByte());
vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod2);
}
else if (PackedSimd.IsSupported)
{
// Widening byte sum: each byte -> ushort pair sum -> uint pair sum, then accumulate into vs1.
// Because weights are all positive (1-32), unsigned byte * unsigned byte multiply is valid for vs2.
Vector128<ushort> sumPairs1 = PackedSimd.AddPairwiseWidening(bytes1);
Vector128<ushort> sumPairs2 = PackedSimd.AddPairwiseWidening(bytes2);
vs1 += PackedSimd.AddPairwiseWidening(sumPairs1) + PackedSimd.AddPairwiseWidening(sumPairs2);

// bytes * weights -> 8 ushorts low + 8 ushorts high, sum pairwise to 4 uints + 4 uints.
Vector128<ushort> wprod1Lo = PackedSimd.MultiplyWideningLower(bytes1, tap1.AsByte());
Vector128<ushort> wprod1Hi = PackedSimd.MultiplyWideningUpper(bytes1, tap1.AsByte());
vs2 += PackedSimd.AddPairwiseWidening(wprod1Lo) + PackedSimd.AddPairwiseWidening(wprod1Hi);

Vector128<ushort> wprod2Lo = PackedSimd.MultiplyWideningLower(bytes2, tap2.AsByte());
Vector128<ushort> wprod2Hi = PackedSimd.MultiplyWideningUpper(bytes2, tap2.AsByte());
vs2 += PackedSimd.AddPairwiseWidening(wprod2Lo) + PackedSimd.AddPairwiseWidening(wprod2Hi);
}
Comment thread
lewing marked this conversation as resolved.
else
{
(Vector128<ushort> lo1, Vector128<ushort> hi1) = Vector128.Widen(bytes1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#if NET
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.Wasm;
using System.Runtime.Intrinsics.X86;
#endif

Expand Down Expand Up @@ -702,12 +703,25 @@ private static Vector128<ulong> MultiplyWideningLower(Vector128<uint> source)
Vector64<uint> sourceHigh = Vector128.Shuffle(source, Vector128.Create(1u, 3, 0, 0)).GetLower();
return AdvSimd.MultiplyWideningLower(sourceLow, sourceHigh);
}
else if (Sse2.IsSupported)
{
Vector128<uint> sourceLow = Vector128.Shuffle(source, Vector128.Create(1u, 0, 3, 0));
return Sse2.Multiply(source, sourceLow);
}
else if (PackedSimd.IsSupported)
{
// PackedSimd.MultiplyWideningLower (i64x2.extmul_low_i32x4_u) does
// result[i] = (ulong)a[i] * (ulong)b[i] for i in {0, 1}.
// We need { source[0]*source[1], source[2]*source[3] } to match the Sse2/AdvSimd paths,
// so first move the even lanes into one operand and the odd lanes into the other.
Vector128<uint> evens = Vector128.Shuffle(source, Vector128.Create(0u, 2, 0, 0));
Vector128<uint> odds = Vector128.Shuffle(source, Vector128.Create(1u, 3, 0, 0));
return PackedSimd.MultiplyWideningLower(evens, odds);
}
else
{
Vector128<uint> sourceLow = Vector128.Shuffle(source, Vector128.Create(1u, 0, 3, 0));
return Sse2.IsSupported ?
Sse2.Multiply(source, sourceLow) :
(source & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64() * (sourceLow & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64();
return (source & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64() * (sourceLow & Vector128.Create(~0u, 0u, ~0u, 0u)).AsUInt64();
}
}
#endif
Expand Down
6 changes: 4 additions & 2 deletions src/libraries/System.Private.CoreLib/src/System/Guid.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.Wasm;
using System.Runtime.Intrinsics.X86;
using System.Runtime.Versioning;
using System.Text;
Expand Down Expand Up @@ -1345,7 +1346,7 @@ internal unsafe bool TryFormatCore<TChar>(Span<TChar> destination, out int chars
}
flags >>= 8;

if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian)
if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && BitConverter.IsLittleEndian)
{
// Vectorized implementation for D, N, P and B formats:
// [{|(]dddddddd[-]dddd[-]dddd[-]dddd[-]dddddddddddd[}|)]
Expand Down Expand Up @@ -1513,9 +1514,10 @@ static void WriteHex(Span<TChar> dest, int offset, int val, bool appendComma = t
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(Ssse3))]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(PackedSimd))]
private static (Vector128<byte>, Vector128<byte>, Vector128<byte>) FormatGuidVector128Utf8(Guid value, bool useDashes)
{
Debug.Assert((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian);
Debug.Assert((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && BitConverter.IsLittleEndian);
// Vectorized implementation for D, N, P and B formats:
// [{|(]dddddddd[-]dddd[-]dddd[-]dddd[-]dddddddddddd[}|)]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4388,33 +4388,59 @@ internal static void SetElementUnsafe<T>(in this Vector128<T> vector, int index,
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(Sse2))]
[CompExactlyDependsOn(typeof(PackedSimd))]
internal static Vector128<byte> UnpackLow(Vector128<byte> left, Vector128<byte> right)
{
if (Sse2.IsSupported)
{
return Sse2.UnpackLow(left, right);
}
else if (!AdvSimd.Arm64.IsSupported)
else if (AdvSimd.Arm64.IsSupported)
{
ThrowHelper.ThrowNotSupportedException();
return AdvSimd.Arm64.ZipLow(left, right);
}
else if (PackedSimd.IsSupported)
{
// Compose with two PackedSimd.Swizzle calls (clamp out-of-range to 0) plus OR.
// We call PackedSimd.Swizzle directly rather than Vector128.ShuffleNative because
// the latter goes through a Ssse3 -> AdvSimd.Arm64 -> PackedSimd dispatcher chain
// that the Mono SIMD intrinsic recognizer doesn't always lower cleanly.
// PackedSimd.Shuffle (two-vector i8x16.shuffle) requires constant lane indices
// and is impractical to call portably from generic code paths.
Vector128<byte> leftPart = PackedSimd.Swizzle(left,
Vector128.Create((byte)0, 0xFF, 1, 0xFF, 2, 0xFF, 3, 0xFF, 4, 0xFF, 5, 0xFF, 6, 0xFF, 7, 0xFF));
Vector128<byte> rightPart = PackedSimd.Swizzle(right,
Vector128.Create((byte)0xFF, 0, 0xFF, 1, 0xFF, 2, 0xFF, 3, 0xFF, 4, 0xFF, 5, 0xFF, 6, 0xFF, 7));
return leftPart | rightPart;
}
return AdvSimd.Arm64.ZipLow(left, right);
ThrowHelper.ThrowNotSupportedException();
return default;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(Sse2))]
[CompExactlyDependsOn(typeof(PackedSimd))]
internal static Vector128<byte> UnpackHigh(Vector128<byte> left, Vector128<byte> right)
{
if (Sse2.IsSupported)
{
return Sse2.UnpackHigh(left, right);
}
else if (!AdvSimd.Arm64.IsSupported)
else if (AdvSimd.Arm64.IsSupported)
{
ThrowHelper.ThrowNotSupportedException();
return AdvSimd.Arm64.ZipHigh(left, right);
}
else if (PackedSimd.IsSupported)
{
Vector128<byte> leftPart = PackedSimd.Swizzle(left,
Vector128.Create((byte)8, 0xFF, 9, 0xFF, 10, 0xFF, 11, 0xFF, 12, 0xFF, 13, 0xFF, 14, 0xFF, 15, 0xFF));
Vector128<byte> rightPart = PackedSimd.Swizzle(right,
Vector128.Create((byte)0xFF, 8, 0xFF, 9, 0xFF, 10, 0xFF, 11, 0xFF, 12, 0xFF, 13, 0xFF, 14, 0xFF, 15));
return leftPart | rightPart;
}
return AdvSimd.Arm64.ZipHigh(left, right);
ThrowHelper.ThrowNotSupportedException();
return default;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public ProbabilisticMap(ReadOnlySpan<char> values)
[BypassReadyToRun]
private static void SetCharBit(ref uint charMap, byte value)
{
if (Sse41.IsSupported || AdvSimd.Arm64.IsSupported)
if (Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported)
{
Unsafe.Add(ref Unsafe.As<uint, byte>(ref charMap), value & VectorizedIndexMask) |= (byte)(1u << (value >> VectorizedIndexShift));
}
Comment thread
lewing marked this conversation as resolved.
Expand All @@ -92,7 +92,7 @@ private static void SetCharBit(ref uint charMap, byte value)

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[BypassReadyToRun]
private static bool IsCharBitSet(ref uint charMap, byte value) => Sse41.IsSupported || AdvSimd.Arm64.IsSupported
private static bool IsCharBitSet(ref uint charMap, byte value) => Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported
? (Unsafe.Add(ref Unsafe.As<uint, byte>(ref charMap), value & VectorizedIndexMask) & (1u << (value >> VectorizedIndexShift))) != 0
: (Unsafe.Add(ref charMap, value & PortableIndexMask) & (1u << (value >> PortableIndexShift))) != 0;

Expand Down Expand Up @@ -220,6 +220,7 @@ private static Vector256<byte> IsCharBitNotSetAvx2(Vector256<byte> charMapLower,
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(Sse2))]
[CompExactlyDependsOn(typeof(PackedSimd))]
private static Vector128<byte> ContainsMask16Chars(Vector128<byte> charMapLower, Vector128<byte> charMapUpper, ref char searchSpace)
{
Vector128<ushort> source0 = Vector128.LoadUnsafe(ref searchSpace);
Expand All @@ -238,6 +239,11 @@ private static Vector128<byte> ContainsMask16Chars(Vector128<byte> charMapLower,
sourceLower = AdvSimd.Arm64.UnzipEven(source0.AsByte(), source1.AsByte());
sourceUpper = AdvSimd.Arm64.UnzipOdd(source0.AsByte(), source1.AsByte());
}
else if (PackedSimd.IsSupported)
{
sourceLower = PackedSimd.ConvertNarrowingSaturateUnsigned((source0 & Vector128.Create((ushort)255)).AsInt16(), (source1 & Vector128.Create((ushort)255)).AsInt16());
sourceUpper = PackedSimd.ConvertNarrowingSaturateUnsigned((source0 >>> 8).AsInt16(), (source1 >>> 8).AsInt16());
}
else
{
// We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are live/dead
Expand Down Expand Up @@ -392,7 +398,7 @@ private static unsafe int ProbabilisticLastIndexOfAny(ref char searchSpace, int
internal static int IndexOfAny<TUseFastContains>(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state)
where TUseFastContains : struct, SearchValues.IRuntimeConst
{
if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceLength >= 16)
if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && searchSpaceLength >= 16)
{
return Vector512.IsHardwareAccelerated && Avx512Vbmi.VL.IsSupported
? IndexOfAnyVectorizedAvx512<TUseFastContains>(ref searchSpace, searchSpaceLength, ref state)
Expand All @@ -406,7 +412,7 @@ internal static int IndexOfAny<TUseFastContains>(ref char searchSpace, int searc
internal static int LastIndexOfAny<TUseFastContains>(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state)
where TUseFastContains : struct, SearchValues.IRuntimeConst
{
if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported) && searchSpaceLength >= 16)
if ((Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported) && searchSpaceLength >= 16)
{
return Vector512.IsHardwareAccelerated && Avx512Vbmi.VL.IsSupported
? LastIndexOfAnyVectorizedAvx512<TUseFastContains>(ref searchSpace, searchSpaceLength, ref state)
Expand Down Expand Up @@ -501,10 +507,11 @@ private static int IndexOfAnyVectorizedAvx512<TUseFastContains>(ref char searchS

[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(Sse41))]
[CompExactlyDependsOn(typeof(PackedSimd))]
private static int IndexOfAnyVectorized<TUseFastContains>(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state)
where TUseFastContains : struct, SearchValues.IRuntimeConst
{
Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported);
Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported);
Debug.Assert(searchSpaceLength >= 16);

ref char searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
Expand Down Expand Up @@ -679,10 +686,11 @@ private static int LastIndexOfAnyVectorizedAvx512<TUseFastContains>(ref char sea

[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(Sse41))]
[CompExactlyDependsOn(typeof(PackedSimd))]
private static int LastIndexOfAnyVectorized<TUseFastContains>(ref char searchSpace, int searchSpaceLength, ref ProbabilisticMapState state)
where TUseFastContains : struct, SearchValues.IRuntimeConst
{
Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported);
Debug.Assert(Sse41.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported);
Debug.Assert(searchSpaceLength >= 16);

ref char cur = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.Wasm;
using System.Runtime.Intrinsics.X86;
using static System.Buffers.StringSearchValuesHelper;
using static System.Buffers.TeddyHelper;
Expand Down Expand Up @@ -150,6 +151,7 @@ protected AsciiStringSearchValuesTeddyBase(string[][] buckets, ReadOnlySpan<stri

[CompExactlyDependsOn(typeof(Ssse3))]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(PackedSimd))]
protected int IndexOfAnyN2(ReadOnlySpan<char> span)
{
// The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported
Expand All @@ -170,6 +172,7 @@ protected int IndexOfAnyN2(ReadOnlySpan<char> span)

[CompExactlyDependsOn(typeof(Ssse3))]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(PackedSimd))]
protected int IndexOfAnyN3(ReadOnlySpan<char> span)
{
// The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported
Expand All @@ -190,6 +193,7 @@ protected int IndexOfAnyN3(ReadOnlySpan<char> span)

[CompExactlyDependsOn(typeof(Ssse3))]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(PackedSimd))]
private int IndexOfAnyN2Vector128(ReadOnlySpan<char> span)
{
// See comments in 'IndexOfAnyN3Vector128' below.
Expand Down Expand Up @@ -350,6 +354,7 @@ private int IndexOfAnyN2Avx512(ReadOnlySpan<char> span)

[CompExactlyDependsOn(typeof(Ssse3))]
[CompExactlyDependsOn(typeof(AdvSimd.Arm64))]
[CompExactlyDependsOn(typeof(PackedSimd))]
private int IndexOfAnyN3Vector128(ReadOnlySpan<char> span)
{
// We can't process inputs shorter than 18 characters in a vectorized manner here.
Expand Down
Loading
Loading