Skip to content

Commit 5341027

Browse files
Delete more leftover UTF-16 code from compiler (#128887)
1 parent 2fa1dda commit 5341027

6 files changed

Lines changed: 62 additions & 94 deletions

File tree

src/coreclr/tools/Common/Compiler/NameMangler.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,6 @@ public NameMangler(NodeMangler nodeMangler)
3333

3434
public abstract Utf8String GetMangledFieldName(FieldDesc field);
3535

36-
public abstract string GetMangledStringName(string literal);
36+
public abstract Utf8String GetMangledStringName(string literal);
3737
}
3838
}

src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs

Lines changed: 20 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
using System.Collections.Generic;
66
using System.Diagnostics;
77
using System.Runtime.CompilerServices;
8+
using System.Runtime.InteropServices;
89
using System.Security.Cryptography;
9-
using System.Text;
1010

1111
using Internal.Text;
1212
using Internal.TypeSystem;
@@ -37,46 +37,6 @@ public override Utf8String CompilationUnitPrefix
3737
//
3838
// Turn a name into a valid C/C++ identifier
3939
//
40-
private static string SanitizeName(string s)
41-
{
42-
StringBuilder sb = null;
43-
for (int i = 0; i < s.Length; i++)
44-
{
45-
char c = s[i];
46-
47-
if (char.IsAsciiLetter(c))
48-
{
49-
sb?.Append(c);
50-
continue;
51-
}
52-
53-
if (char.IsAsciiDigit(c))
54-
{
55-
// C identifiers cannot start with a digit. Prepend underscores.
56-
if (i == 0)
57-
{
58-
sb ??= new StringBuilder(s.Length + 2);
59-
sb.Append('_');
60-
}
61-
sb?.Append(c);
62-
continue;
63-
}
64-
65-
sb ??= new StringBuilder(s, 0, i, s.Length);
66-
67-
// Everything else is replaced by underscore.
68-
// TODO: We assume that there won't be collisions with our own or C++ built-in identifiers.
69-
sb.Append('_');
70-
}
71-
72-
string sanitizedName = (sb != null) ? sb.ToString() : s;
73-
74-
// The character sequences denoting generic instantiations, arrays, byrefs, or pointers must be
75-
// restricted to that use only. Replace them if they happened to be used in any identifiers in
76-
// the compilation input.
77-
return sanitizedName;
78-
}
79-
8040
public override Utf8String SanitizeName(Utf8String s)
8141
=> SanitizeName(s.AsSpan());
8242

@@ -132,62 +92,26 @@ private static Utf8String SanitizeName(ReadOnlySpan<byte> s)
13292
return sanitizedName;
13393
}
13494

135-
private static byte[] GetBytesFromString(string literal)
95+
private static bool ContainsUtf8ReplacementCharacter(ReadOnlySpan<byte> bytes)
13696
{
137-
byte[] bytes = new byte[checked(literal.Length * 2)];
138-
for (int i = 0; i < literal.Length; i++)
139-
{
140-
int iByteBase = i * 2;
141-
char c = literal[i];
142-
bytes[iByteBase] = (byte)c;
143-
bytes[iByteBase + 1] = (byte)(c >> 8);
144-
}
145-
return bytes;
97+
ReadOnlySpan<byte> replacementCharacter = [0xEF, 0xBF, 0xBD];
98+
return bytes.IndexOf(replacementCharacter) >= 0;
14699
}
147100

148-
private string SanitizeNameWithHash(string literal)
149-
{
150-
string mangledName = SanitizeName(literal);
151-
152-
if (mangledName.Length > 30)
153-
mangledName = mangledName.Substring(0, 30);
154-
155-
if (mangledName != literal)
156-
{
157-
byte[] hash;
158-
lock (this)
159-
{
160-
// Use SHA256 hash here to provide a high degree of uniqueness to symbol names without requiring them to be long
161-
// This hash function provides an exceedingly high likelihood that no two strings will be given equal symbol names
162-
// This is not considered used for security purpose; however collisions would be highly unfortunate as they will cause compilation
163-
// failure.
164-
hash = SHA256.HashData(GetBytesFromString(literal));
165-
}
166-
167-
mangledName += "_" + Convert.ToHexString(hash);
168-
}
169-
170-
return mangledName;
171-
}
172-
173-
private Utf8String SanitizeNameWithHash(Utf8String literal)
101+
private Utf8String SanitizeNameWithHash(Utf8String literal, byte[] hash = null)
174102
{
175103
Utf8String mangledName = SanitizeName(literal);
176104

177105
if (mangledName.Length > 30)
178106
mangledName = new Utf8String(mangledName.AsSpan().Slice(0, 30).ToArray());
179107

180-
if (!mangledName.AsSpan().SequenceEqual(literal.AsSpan()))
108+
if (hash is not null || !mangledName.AsSpan().SequenceEqual(literal.AsSpan()))
181109
{
182-
byte[] hash;
183-
lock (this)
184-
{
185-
// Use SHA256 hash here to provide a high degree of uniqueness to symbol names without requiring them to be long
186-
// This hash function provides an exceedingly high likelihood that no two strings will be given equal symbol names
187-
// This is not considered used for security purpose; however collisions would be highly unfortunate as they will cause compilation
188-
// failure.
189-
hash = SHA256.HashData(literal.AsSpan());
190-
}
110+
// Use SHA256 hash here to provide a high degree of uniqueness to symbol names without requiring them to be long
111+
// This hash function provides an exceedingly high likelihood that no two strings will be given equal symbol names
112+
// This is not considered used for security purpose; however collisions would be highly unfortunate as they will cause compilation
113+
// failure.
114+
hash ??= SHA256.HashData(literal.AsSpan());
191115

192116
mangledName = new Utf8StringBuilder()
193117
.Append(mangledName)
@@ -297,7 +221,7 @@ private Utf8String ComputeMangledTypeName(TypeDesc type)
297221
// This problem needs a better fix.
298222
if (isSystemPrivate)
299223
assemblyName = string.Concat("S.P.", assemblyName.AsSpan(15));
300-
Utf8String prependAssemblyName = new Utf8String(SanitizeName(assemblyName));
224+
Utf8String prependAssemblyName = SanitizeName(new Utf8String(assemblyName));
301225

302226
var deduplicator = new HashSet<Utf8String>();
303227

@@ -684,18 +608,22 @@ private Utf8String ComputeMangledFieldName(FieldDesc field)
684608
return utf8MangledName;
685609
}
686610

687-
private Dictionary<string, string> _mangledStringLiterals = new Dictionary<string, string>();
611+
private Dictionary<string, Utf8String> _mangledStringLiterals = new Dictionary<string, Utf8String>();
688612

689-
public override string GetMangledStringName(string literal)
613+
public override Utf8String GetMangledStringName(string literal)
690614
{
691-
string mangledName;
615+
Utf8String mangledName;
692616
lock (this)
693617
{
694618
if (_mangledStringLiterals.TryGetValue(literal, out mangledName))
695619
return mangledName;
696620
}
697621

698-
mangledName = SanitizeNameWithHash(literal);
622+
Utf8String utf8Literal = new Utf8String(literal);
623+
byte[] hash = ContainsUtf8ReplacementCharacter(utf8Literal.AsSpan())
624+
? SHA256.HashData(MemoryMarshal.AsBytes(literal.AsSpan()))
625+
: null;
626+
mangledName = SanitizeNameWithHash(utf8Literal, hash);
699627

700628
lock (this)
701629
{

src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/NodeFactory.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1562,7 +1562,7 @@ public ISymbolNode ConstantUtf8String(string str)
15621562
byte[] stringBytes = new byte[stringBytesCount + 1];
15631563
Encoding.UTF8.GetBytes(str, 0, str.Length, stringBytes, 0);
15641564

1565-
Utf8String symbolName = new Utf8String("__utf8str_" + NameMangler.GetMangledStringName(str));
1565+
Utf8String symbolName = Utf8String.Concat("__utf8str_"u8, NameMangler.GetMangledStringName(str).AsSpan());
15661566

15671567
return ReadOnlyDataBlob(symbolName, stringBytes, 1);
15681568
}

src/tests/nativeaot/SmokeTests/UnitTests/Main.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
success &= RunTest(Devirtualization.Run);
1313
success &= RunTest(StackTraces.Run);
1414
success &= RunTest(Ordering.Run);
15+
success &= RunTest(MiscTests.Run);
1516

1617
return success ? 100 : 1;
1718

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Runtime.CompilerServices;
6+
7+
class MiscTests
8+
{
9+
internal static int Run()
10+
{
11+
TestSurrogateStringLiterals.Run();
12+
return 100;
13+
}
14+
15+
class TestSurrogateStringLiterals
16+
{
17+
public static void Run()
18+
{
19+
CheckSurrogateLiteral(GetFirstSurrogateLiteral(), '\uD800');
20+
CheckSurrogateLiteral(GetSecondSurrogateLiteral(), '\uD801');
21+
}
22+
23+
[MethodImpl(MethodImplOptions.NoInlining)]
24+
private static string GetFirstSurrogateLiteral() => "\uD800";
25+
26+
[MethodImpl(MethodImplOptions.NoInlining)]
27+
private static string GetSecondSurrogateLiteral() => "\uD801";
28+
29+
private static void CheckSurrogateLiteral(string value, char expected)
30+
{
31+
if (value.Length != 1)
32+
throw new Exception(value.Length.ToString());
33+
34+
if (value[0] != expected)
35+
throw new Exception(((int)value[0]).ToString("X4"));
36+
}
37+
}
38+
}

src/tests/nativeaot/SmokeTests/UnitTests/UnitTests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
<Compile Include="Devirtualization.cs" />
2424
<Compile Include="Generics.cs" />
2525
<Compile Include="Interfaces.cs" />
26+
<Compile Include="MiscTests.cs" />
2627
<Compile Include="Ordering.cs" />
2728
<Compile Include="Threading.cs" />
2829
<Compile Include="StackTraces.cs" />

0 commit comments

Comments
 (0)