|
5 | 5 | using System.Collections.Generic; |
6 | 6 | using System.Diagnostics; |
7 | 7 | using System.Runtime.CompilerServices; |
| 8 | +using System.Runtime.InteropServices; |
8 | 9 | using System.Security.Cryptography; |
9 | | -using System.Text; |
10 | 10 |
|
11 | 11 | using Internal.Text; |
12 | 12 | using Internal.TypeSystem; |
@@ -37,46 +37,6 @@ public override Utf8String CompilationUnitPrefix |
37 | 37 | // |
38 | 38 | // Turn a name into a valid C/C++ identifier |
39 | 39 | // |
40 | | - private static string SanitizeName(string s) |
41 | | - { |
42 | | - StringBuilder sb = null; |
43 | | - for (int i = 0; i < s.Length; i++) |
44 | | - { |
45 | | - char c = s[i]; |
46 | | - |
47 | | - if (char.IsAsciiLetter(c)) |
48 | | - { |
49 | | - sb?.Append(c); |
50 | | - continue; |
51 | | - } |
52 | | - |
53 | | - if (char.IsAsciiDigit(c)) |
54 | | - { |
55 | | - // C identifiers cannot start with a digit. Prepend underscores. |
56 | | - if (i == 0) |
57 | | - { |
58 | | - sb ??= new StringBuilder(s.Length + 2); |
59 | | - sb.Append('_'); |
60 | | - } |
61 | | - sb?.Append(c); |
62 | | - continue; |
63 | | - } |
64 | | - |
65 | | - sb ??= new StringBuilder(s, 0, i, s.Length); |
66 | | - |
67 | | - // Everything else is replaced by underscore. |
68 | | - // TODO: We assume that there won't be collisions with our own or C++ built-in identifiers. |
69 | | - sb.Append('_'); |
70 | | - } |
71 | | - |
72 | | - string sanitizedName = (sb != null) ? sb.ToString() : s; |
73 | | - |
74 | | - // The character sequences denoting generic instantiations, arrays, byrefs, or pointers must be |
75 | | - // restricted to that use only. Replace them if they happened to be used in any identifiers in |
76 | | - // the compilation input. |
77 | | - return sanitizedName; |
78 | | - } |
79 | | - |
80 | 40 | public override Utf8String SanitizeName(Utf8String s) |
81 | 41 | => SanitizeName(s.AsSpan()); |
82 | 42 |
|
@@ -132,62 +92,26 @@ private static Utf8String SanitizeName(ReadOnlySpan<byte> s) |
132 | 92 | return sanitizedName; |
133 | 93 | } |
134 | 94 |
|
135 | | - private static byte[] GetBytesFromString(string literal) |
| 95 | + private static bool ContainsUtf8ReplacementCharacter(ReadOnlySpan<byte> bytes) |
136 | 96 | { |
137 | | - byte[] bytes = new byte[checked(literal.Length * 2)]; |
138 | | - for (int i = 0; i < literal.Length; i++) |
139 | | - { |
140 | | - int iByteBase = i * 2; |
141 | | - char c = literal[i]; |
142 | | - bytes[iByteBase] = (byte)c; |
143 | | - bytes[iByteBase + 1] = (byte)(c >> 8); |
144 | | - } |
145 | | - return bytes; |
| 97 | + ReadOnlySpan<byte> replacementCharacter = [0xEF, 0xBF, 0xBD]; |
| 98 | + return bytes.IndexOf(replacementCharacter) >= 0; |
146 | 99 | } |
147 | 100 |
|
148 | | - private string SanitizeNameWithHash(string literal) |
149 | | - { |
150 | | - string mangledName = SanitizeName(literal); |
151 | | - |
152 | | - if (mangledName.Length > 30) |
153 | | - mangledName = mangledName.Substring(0, 30); |
154 | | - |
155 | | - if (mangledName != literal) |
156 | | - { |
157 | | - byte[] hash; |
158 | | - lock (this) |
159 | | - { |
160 | | - // Use SHA256 hash here to provide a high degree of uniqueness to symbol names without requiring them to be long |
161 | | - // This hash function provides an exceedingly high likelihood that no two strings will be given equal symbol names |
162 | | - // This is not considered used for security purpose; however collisions would be highly unfortunate as they will cause compilation |
163 | | - // failure. |
164 | | - hash = SHA256.HashData(GetBytesFromString(literal)); |
165 | | - } |
166 | | - |
167 | | - mangledName += "_" + Convert.ToHexString(hash); |
168 | | - } |
169 | | - |
170 | | - return mangledName; |
171 | | - } |
172 | | - |
173 | | - private Utf8String SanitizeNameWithHash(Utf8String literal) |
| 101 | + private Utf8String SanitizeNameWithHash(Utf8String literal, byte[] hash = null) |
174 | 102 | { |
175 | 103 | Utf8String mangledName = SanitizeName(literal); |
176 | 104 |
|
177 | 105 | if (mangledName.Length > 30) |
178 | 106 | mangledName = new Utf8String(mangledName.AsSpan().Slice(0, 30).ToArray()); |
179 | 107 |
|
180 | | - if (!mangledName.AsSpan().SequenceEqual(literal.AsSpan())) |
| 108 | + if (hash is not null || !mangledName.AsSpan().SequenceEqual(literal.AsSpan())) |
181 | 109 | { |
182 | | - byte[] hash; |
183 | | - lock (this) |
184 | | - { |
185 | | - // Use SHA256 hash here to provide a high degree of uniqueness to symbol names without requiring them to be long |
186 | | - // This hash function provides an exceedingly high likelihood that no two strings will be given equal symbol names |
187 | | - // This is not considered used for security purpose; however collisions would be highly unfortunate as they will cause compilation |
188 | | - // failure. |
189 | | - hash = SHA256.HashData(literal.AsSpan()); |
190 | | - } |
| 110 | + // Use SHA256 hash here to provide a high degree of uniqueness to symbol names without requiring them to be long |
| 111 | + // This hash function provides an exceedingly high likelihood that no two strings will be given equal symbol names |
| 112 | + // This is not considered used for security purpose; however collisions would be highly unfortunate as they will cause compilation |
| 113 | + // failure. |
| 114 | + hash ??= SHA256.HashData(literal.AsSpan()); |
191 | 115 |
|
192 | 116 | mangledName = new Utf8StringBuilder() |
193 | 117 | .Append(mangledName) |
@@ -297,7 +221,7 @@ private Utf8String ComputeMangledTypeName(TypeDesc type) |
297 | 221 | // This problem needs a better fix. |
298 | 222 | if (isSystemPrivate) |
299 | 223 | assemblyName = string.Concat("S.P.", assemblyName.AsSpan(15)); |
300 | | - Utf8String prependAssemblyName = new Utf8String(SanitizeName(assemblyName)); |
| 224 | + Utf8String prependAssemblyName = SanitizeName(new Utf8String(assemblyName)); |
301 | 225 |
|
302 | 226 | var deduplicator = new HashSet<Utf8String>(); |
303 | 227 |
|
@@ -684,18 +608,22 @@ private Utf8String ComputeMangledFieldName(FieldDesc field) |
684 | 608 | return utf8MangledName; |
685 | 609 | } |
686 | 610 |
|
687 | | - private Dictionary<string, string> _mangledStringLiterals = new Dictionary<string, string>(); |
| 611 | + private Dictionary<string, Utf8String> _mangledStringLiterals = new Dictionary<string, Utf8String>(); |
688 | 612 |
|
689 | | - public override string GetMangledStringName(string literal) |
| 613 | + public override Utf8String GetMangledStringName(string literal) |
690 | 614 | { |
691 | | - string mangledName; |
| 615 | + Utf8String mangledName; |
692 | 616 | lock (this) |
693 | 617 | { |
694 | 618 | if (_mangledStringLiterals.TryGetValue(literal, out mangledName)) |
695 | 619 | return mangledName; |
696 | 620 | } |
697 | 621 |
|
698 | | - mangledName = SanitizeNameWithHash(literal); |
| 622 | + Utf8String utf8Literal = new Utf8String(literal); |
| 623 | + byte[] hash = ContainsUtf8ReplacementCharacter(utf8Literal.AsSpan()) |
| 624 | + ? SHA256.HashData(MemoryMarshal.AsBytes(literal.AsSpan())) |
| 625 | + : null; |
| 626 | + mangledName = SanitizeNameWithHash(utf8Literal, hash); |
699 | 627 |
|
700 | 628 | lock (this) |
701 | 629 | { |
|
0 commit comments