2727namespace Microsoft . ComponentDetection . Common ;
2828
2929using System ;
30+ using System . Buffers ;
31+ using System . Collections . Generic ;
3032using System . Diagnostics . CodeAnalysis ;
3133using System . Text . RegularExpressions ;
3234using Microsoft . ComponentDetection . Contracts ;
@@ -40,20 +42,39 @@ public static class DockerReferenceUtility
4042 private const string LEGACYDEFAULTDOMAIN = "index.docker.io" ;
4143 private const string OFFICIALREPOSITORYNAME = "library" ;
4244
43- // Characters that only appear in an image reference as part of an unresolved templating
44- // token. '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
45- // {{ .Values.tag }}); '#' covers Azure DevOps and other token-replacement placeholders
46- // (e.g. #imageTag#) and is never valid in a resolved docker reference.
47- private static readonly char [ ] TemplateDelimiters = [ '$' , '{' , '}' , '#' ] ;
45+ // Delimiters that only appear in an image reference as part of an unresolved templating
46+ // token: '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
47+ // {{ .Values.tag }}). These are recognized templating syntaxes that are expected in
48+ // un-rendered manifests, so they are skipped silently rather than reported. A token wrapped
49+ // in matching '#' or '!' (handled by DelimiterWrappedTokenRegex) is treated the same way. A
50+ // stray invalid character that is not part of such a token (e.g. a single '#' or '!') is still
51+ // reported via GetInvalidReferenceCharacters.
52+ private static readonly char [ ] TemplateDelimiters = [ '$' , '{' , '}' ] ;
4853
4954 // Matches token-replacement placeholders that wrap an identifier in double underscores,
5055 // e.g. __IMAGE_TAG__ or __MCR_ENDPOINT__. Without this they parse as an uppercase repository
5156 // name and surface as a noisy parse failure instead of being skipped as a templated value.
5257 private static readonly Regex DoubleUnderscoreTokenRegex = new ( @"__\w+__" ) ;
5358
59+ // Matches token-replacement placeholders wrapped in a matching '#' or '!', e.g. #imageTag#,
60+ // #cs_containerRegistryLoginServerUrl#, or !imageTag!. A string surrounded by the same '#' or
61+ // '!' delimiter is almost always an unsubstituted template variable (Azure DevOps token
62+ // replacement and similar), so it is skipped silently instead of surfacing as a misleading
63+ // docker-reference parse failure. The backreference requires the closing delimiter to match
64+ // the opening one, so a mismatched stray '#' or '!' is left to GetInvalidReferenceCharacters.
65+ private static readonly Regex DelimiterWrappedTokenRegex = new ( @"([#!])[^#!]+\1" ) ;
66+
67+ // Every character permitted anywhere in a docker reference per the grammar at the top of this
68+ // file: alphanumerics, the separators '.', '_' and '-', the path separator '/', the tag/port
69+ // and digest separators ':' and '@', and the digest-algorithm separator '+'. Anything else
70+ // (e.g. '#', '!') comes from unsubstituted template tokens and is reported as invalid.
71+ private static readonly SearchValues < char > ValidReferenceChars = SearchValues . Create (
72+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-/:@+" ) ;
73+
5474 /// <summary>
5575 /// Returns true if the reference contains unresolved variable or templating placeholders,
56- /// e.g. <c>${VAR}</c>, <c>{{ .Values.tag }}</c>, <c>#imageTag#</c>, or <c>__IMAGE_TAG__</c>.
76+ /// e.g. <c>${VAR}</c>, <c>{{ .Values.tag }}</c>, <c>__IMAGE_TAG__</c>, <c>#imageTag#</c>, or
77+ /// <c>!imageTag!</c>.
5778 /// Such references are not real, resolvable images, so they should be skipped before calling
5879 /// <see cref="ParseFamiliarName"/> or <see cref="ParseQualifiedName"/> and treated as
5980 /// unresolved values rather than reported as parse failures.
@@ -62,11 +83,14 @@ public static class DockerReferenceUtility
6283 /// <returns><c>true</c> if the reference contains variable placeholder characters; otherwise <c>false</c>.</returns>
6384 public static bool HasUnresolvedVariables ( string reference ) =>
6485 reference . IndexOfAny ( TemplateDelimiters ) >= 0 ||
65- DoubleUnderscoreTokenRegex . IsMatch ( reference ) ;
86+ DoubleUnderscoreTokenRegex . IsMatch ( reference ) ||
87+ DelimiterWrappedTokenRegex . IsMatch ( reference ) ;
6688
6789 /// <summary>
6890 /// Attempts to parse an image reference string into a <see cref="DockerReference"/>.
69- /// Returns <c>null</c> if the reference contains unresolved variables or cannot be parsed.
91+ /// Returns <c>null</c> if the reference contains unresolved variables, contains characters that
92+ /// are not valid in a docker reference, or otherwise cannot be parsed. A warning is logged in
93+ /// every skip/failure case so that references which are not scanned remain visible in logs.
7094 /// </summary>
7195 /// <param name="imageReference">The image reference string to parse.</param>
7296 /// <param name="logger">Optional logger for recording parse failures.</param>
@@ -75,6 +99,19 @@ public static bool HasUnresolvedVariables(string reference) =>
7599 {
76100 if ( HasUnresolvedVariables ( imageReference ) )
77101 {
102+ logger ? . LogWarning (
103+ "Skipping image reference '{ImageReference}' because it contains one or more unresolved template tokens or variable placeholders." ,
104+ imageReference ) ;
105+ return null ;
106+ }
107+
108+ var invalidCharacters = GetInvalidReferenceCharacters ( imageReference ) ;
109+ if ( invalidCharacters . Length > 0 )
110+ {
111+ logger ? . LogWarning (
112+ "Skipping image reference '{ImageReference}' because it contains character(s) that are not valid in a docker reference: {InvalidCharacters}" ,
113+ imageReference ,
114+ invalidCharacters ) ;
78115 return null ;
79116 }
80117
@@ -92,7 +129,7 @@ public static bool HasUnresolvedVariables(string reference) =>
92129 /// <summary>
93130 /// Parses an image reference and registers it with the recorder if valid.
94131 /// Skips references with unresolved variables or that cannot be parsed,
95- /// logging a warning for parse failures so that remaining entries continue to be processed.
132+ /// logging a warning in each skipped case so that remaining entries continue to be processed.
96133 /// </summary>
97134 /// <param name="imageReference">The image reference string to parse.</param>
98135 /// <param name="recorder">The component recorder to register the image with.</param>
@@ -244,6 +281,38 @@ public static DockerReference ParseAll(string name)
244281 return ParseFamiliarName ( name ) ;
245282 }
246283
284+ /// <summary>
285+ /// Returns the distinct characters in <paramref name="reference"/> that are not valid in any
286+ /// part of a docker reference (domain, repository, tag, or digest) as a comma-separated string,
287+ /// or an empty string when every character is valid. Characters such as <c>#</c> and <c>!</c>
288+ /// commonly appear in unsubstituted template tokens and otherwise surface as misleading
289+ /// "must be lowercase" or "invalid reference format" parse errors.
290+ /// </summary>
291+ /// <param name="reference">The image reference string to inspect.</param>
292+ /// <returns>A comma-separated list of invalid characters, or an empty string if there are none.</returns>
293+ private static string GetInvalidReferenceCharacters ( string reference )
294+ {
295+ // Vectorized happy-path check: the overwhelmingly common case is an all-valid reference,
296+ // for which this returns without allocating. Only gather the offending characters when
297+ // at least one is present.
298+ var span = reference . AsSpan ( ) ;
299+ if ( ! span . ContainsAnyExcept ( ValidReferenceChars ) )
300+ {
301+ return string . Empty ;
302+ }
303+
304+ SortedSet < char > invalid = [ ] ;
305+ foreach ( var c in span )
306+ {
307+ if ( ! ValidReferenceChars . Contains ( c ) )
308+ {
309+ invalid . Add ( c ) ;
310+ }
311+ }
312+
313+ return string . Join ( ", " , invalid ) ;
314+ }
315+
247316 private static DockerReference CreateDockerReference ( Reference options )
248317 {
249318 return DockerReference . CreateDockerReference ( options . Repository , options . Domain , options . Digest , options . Tag ) ;
0 commit comments