Skip to content

Commit bf47244

Browse files
committed
Enhance DockerReferenceUtility to handle invalid characters and update tests for unresolved variables
1 parent f8c809e commit bf47244

2 files changed

Lines changed: 179 additions & 12 deletions

File tree

src/Microsoft.ComponentDetection.Common/DockerReference/DockerReferenceUtility.cs

Lines changed: 78 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
namespace Microsoft.ComponentDetection.Common;
2828

2929
using System;
30+
using System.Buffers;
31+
using System.Collections.Generic;
3032
using System.Diagnostics.CodeAnalysis;
3133
using System.Text.RegularExpressions;
3234
using Microsoft.ComponentDetection.Contracts;
@@ -40,20 +42,39 @@ public static class DockerReferenceUtility
4042
private const string LEGACYDEFAULTDOMAIN = "index.docker.io";
4143
private const string OFFICIALREPOSITORYNAME = "library";
4244

43-
// Characters that only appear in an image reference as part of an unresolved templating
44-
// token. '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
45-
// {{ .Values.tag }}); '#' covers Azure DevOps and other token-replacement placeholders
46-
// (e.g. #imageTag#) and is never valid in a resolved docker reference.
47-
private static readonly char[] TemplateDelimiters = ['$', '{', '}', '#'];
45+
// Delimiters that only appear in an image reference as part of an unresolved templating
46+
// token: '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
47+
// {{ .Values.tag }}). These are recognized templating syntaxes that are expected in
48+
// un-rendered manifests, so they are skipped silently rather than reported. A token wrapped
49+
// in matching '#' or '!' (handled by DelimiterWrappedTokenRegex) is treated the same way. A
50+
// stray invalid character that is not part of such a token (e.g. a single '#' or '!') is still
51+
// reported via GetInvalidReferenceCharacters.
52+
private static readonly char[] TemplateDelimiters = ['$', '{', '}'];
4853

4954
// Matches token-replacement placeholders that wrap an identifier in double underscores,
5055
// e.g. __IMAGE_TAG__ or __MCR_ENDPOINT__. Without this they parse as an uppercase repository
5156
// name and surface as a noisy parse failure instead of being skipped as a templated value.
5257
private static readonly Regex DoubleUnderscoreTokenRegex = new(@"__\w+__");
5358

59+
// Matches token-replacement placeholders wrapped in a matching '#' or '!', e.g. #imageTag#,
60+
// #cs_containerRegistryLoginServerUrl#, or !imageTag!. A string surrounded by the same '#' or
61+
// '!' delimiter is almost always an unsubstituted template variable (Azure DevOps token
62+
// replacement and similar), so it is skipped silently instead of surfacing as a misleading
63+
// docker-reference parse failure. The backreference requires the closing delimiter to match
64+
// the opening one, so a mismatched stray '#' or '!' is left to GetInvalidReferenceCharacters.
65+
private static readonly Regex DelimiterWrappedTokenRegex = new(@"([#!])[^#!]+\1");
66+
67+
// Every character permitted anywhere in a docker reference per the grammar at the top of this
68+
// file: alphanumerics, the separators '.', '_' and '-', the path separator '/', the tag/port
69+
// and digest separators ':' and '@', and the digest-algorithm separator '+'. Anything else
70+
// (e.g. '#', '!') comes from unsubstituted template tokens and is reported as invalid.
71+
private static readonly SearchValues<char> ValidReferenceChars = SearchValues.Create(
72+
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-/:@+");
73+
5474
/// <summary>
5575
/// Returns true if the reference contains unresolved variable or templating placeholders,
56-
/// e.g. <c>${VAR}</c>, <c>{{ .Values.tag }}</c>, <c>#imageTag#</c>, or <c>__IMAGE_TAG__</c>.
76+
/// e.g. <c>${VAR}</c>, <c>{{ .Values.tag }}</c>, <c>__IMAGE_TAG__</c>, <c>#imageTag#</c>, or
77+
/// <c>!imageTag!</c>.
5778
/// Such references are not real, resolvable images, so they should be skipped before calling
5879
/// <see cref="ParseFamiliarName"/> or <see cref="ParseQualifiedName"/> and treated as
5980
/// unresolved values rather than reported as parse failures.
@@ -62,11 +83,14 @@ public static class DockerReferenceUtility
6283
/// <returns><c>true</c> if the reference contains variable placeholder characters; otherwise <c>false</c>.</returns>
6384
public static bool HasUnresolvedVariables(string reference) =>
6485
reference.IndexOfAny(TemplateDelimiters) >= 0 ||
65-
DoubleUnderscoreTokenRegex.IsMatch(reference);
86+
DoubleUnderscoreTokenRegex.IsMatch(reference) ||
87+
DelimiterWrappedTokenRegex.IsMatch(reference);
6688

6789
/// <summary>
6890
/// Attempts to parse an image reference string into a <see cref="DockerReference"/>.
69-
/// Returns <c>null</c> if the reference contains unresolved variables or cannot be parsed.
91+
/// Returns <c>null</c> if the reference contains unresolved variables, contains characters that
92+
/// are not valid in a docker reference, or otherwise cannot be parsed. A warning is logged in
93+
/// every skip/failure case so that references which are not scanned remain visible in logs.
7094
/// </summary>
7195
/// <param name="imageReference">The image reference string to parse.</param>
7296
/// <param name="logger">Optional logger for recording parse failures.</param>
@@ -75,6 +99,19 @@ public static bool HasUnresolvedVariables(string reference) =>
7599
{
76100
if (HasUnresolvedVariables(imageReference))
77101
{
102+
logger?.LogWarning(
103+
"Skipping image reference '{ImageReference}' because it contains one or more unresolved template tokens or variable placeholders.",
104+
imageReference);
105+
return null;
106+
}
107+
108+
var invalidCharacters = GetInvalidReferenceCharacters(imageReference);
109+
if (invalidCharacters.Length > 0)
110+
{
111+
logger?.LogWarning(
112+
"Skipping image reference '{ImageReference}' because it contains character(s) that are not valid in a docker reference: {InvalidCharacters}",
113+
imageReference,
114+
invalidCharacters);
78115
return null;
79116
}
80117

@@ -92,7 +129,7 @@ public static bool HasUnresolvedVariables(string reference) =>
92129
/// <summary>
93130
/// Parses an image reference and registers it with the recorder if valid.
94131
/// Skips references with unresolved variables or that cannot be parsed,
95-
/// logging a warning for parse failures so that remaining entries continue to be processed.
132+
/// logging a warning in each skipped case so that remaining entries continue to be processed.
96133
/// </summary>
97134
/// <param name="imageReference">The image reference string to parse.</param>
98135
/// <param name="recorder">The component recorder to register the image with.</param>
@@ -244,6 +281,38 @@ public static DockerReference ParseAll(string name)
244281
return ParseFamiliarName(name);
245282
}
246283

284+
/// <summary>
285+
/// Returns the distinct characters in <paramref name="reference"/> that are not valid in any
286+
/// part of a docker reference (domain, repository, tag, or digest) as a comma-separated string,
287+
/// or an empty string when every character is valid. Characters such as <c>#</c> and <c>!</c>
288+
/// commonly appear in unsubstituted template tokens and otherwise surface as misleading
289+
/// "must be lowercase" or "invalid reference format" parse errors.
290+
/// </summary>
291+
/// <param name="reference">The image reference string to inspect.</param>
292+
/// <returns>A comma-separated list of invalid characters, or an empty string if there are none.</returns>
293+
private static string GetInvalidReferenceCharacters(string reference)
294+
{
295+
// Vectorized happy-path check: the overwhelmingly common case is an all-valid reference,
296+
// for which this returns without allocating. Only gather the offending characters when
297+
// at least one is present.
298+
var span = reference.AsSpan();
299+
if (!span.ContainsAnyExcept(ValidReferenceChars))
300+
{
301+
return string.Empty;
302+
}
303+
304+
SortedSet<char> invalid = [];
305+
foreach (var c in span)
306+
{
307+
if (!ValidReferenceChars.Contains(c))
308+
{
309+
invalid.Add(c);
310+
}
311+
}
312+
313+
return string.Join(", ", invalid);
314+
}
315+
247316
private static DockerReference CreateDockerReference(Reference options)
248317
{
249318
return DockerReference.CreateDockerReference(options.Repository, options.Domain, options.Digest, options.Tag);

test/Microsoft.ComponentDetection.Common.Tests/DockerReferenceUtilityTests.cs

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,19 @@ public void HasUnresolvedVariables_ReturnsTrueForDoubleUnderscoreTokens()
293293
[TestMethod]
294294
public void HasUnresolvedVariables_ReturnsTrueForHashDelimitedTokens()
295295
{
296+
// A token wrapped in matching '#' (e.g. #imageTag#) is treated as an unresolved template
297+
// variable and skipped silently rather than reported as an invalid character.
296298
DockerReferenceUtility.HasUnresolvedVariables("#cs_containerRegistryLoginServerUrl#/coreservicesaksservice_#cs_aks_workloadName#_#cs_aks_serviceTrackIdentifier#/#serviceName#:#imageTag#").Should().BeTrue();
297299
}
298300

301+
[TestMethod]
302+
public void HasUnresolvedVariables_ReturnsTrueForExclamationDelimitedTokens()
303+
{
304+
// A token wrapped in matching '!' (e.g. !imageTag!) is treated as an unresolved template
305+
// variable and skipped silently rather than reported as an invalid character.
306+
DockerReferenceUtility.HasUnresolvedVariables("!cs_containerRegistryLoginServerUrl!/coreservicesaksservice_!cs_aks_workloadName!/!serviceName!:!imageTag!").Should().BeTrue();
307+
}
308+
299309
[TestMethod]
300310
public void HasUnresolvedVariables_ReturnsFalseForPlainReference()
301311
{
@@ -327,7 +337,13 @@ public void TryParseImageReference_ReturnsNullForHashDelimitedTokens()
327337
}
328338

329339
[TestMethod]
330-
public void TryParseImageReference_DoesNotLogWarningForTemplatedReference()
340+
public void TryParseImageReference_ReturnsNullForExclamationDelimitedTokens()
341+
{
342+
DockerReferenceUtility.TryParseImageReference("!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!").Should().BeNull();
343+
}
344+
345+
[TestMethod]
346+
public void TryParseImageReference_LogsWarningForTemplatedReference()
331347
{
332348
var logger = new Mock<ILogger>();
333349

@@ -336,12 +352,94 @@ public void TryParseImageReference_DoesNotLogWarningForTemplatedReference()
336352
result.Should().BeNull();
337353
logger.Verify(
338354
l => l.Log(
339-
It.IsAny<LogLevel>(),
355+
LogLevel.Warning,
356+
It.IsAny<EventId>(),
357+
It.IsAny<It.IsAnyType>(),
358+
It.IsAny<Exception>(),
359+
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
360+
Times.Once);
361+
}
362+
363+
[TestMethod]
364+
public void TryParseImageReference_LogsWarningForHashDelimitedTokens()
365+
{
366+
var logger = new Mock<ILogger>();
367+
368+
var result = DockerReferenceUtility.TryParseImageReference(
369+
"#cs_containerRegistryLoginServerUrl#/svc/#serviceName#:#imageTag#",
370+
logger.Object);
371+
372+
result.Should().BeNull();
373+
logger.Verify(
374+
l => l.Log(
375+
LogLevel.Warning,
376+
It.IsAny<EventId>(),
377+
It.IsAny<It.IsAnyType>(),
378+
It.IsAny<Exception>(),
379+
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
380+
Times.Once);
381+
}
382+
383+
[TestMethod]
384+
public void TryParseImageReference_LogsWarningForExclamationDelimitedTokens()
385+
{
386+
var logger = new Mock<ILogger>();
387+
388+
var result = DockerReferenceUtility.TryParseImageReference(
389+
"!cs_containerRegistryLoginServerUrl!/svc/!serviceName!:!imageTag!",
390+
logger.Object);
391+
392+
result.Should().BeNull();
393+
logger.Verify(
394+
l => l.Log(
395+
LogLevel.Warning,
340396
It.IsAny<EventId>(),
341397
It.IsAny<It.IsAnyType>(),
342398
It.IsAny<Exception>(),
343399
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
344-
Times.Never);
400+
Times.Once);
401+
}
402+
403+
[TestMethod]
404+
public void TryParseImageReference_ReturnsNullForExclamationCharacter()
405+
{
406+
DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest").Should().BeNull();
407+
}
408+
409+
[TestMethod]
410+
public void TryParseImageReference_LogsWarningForExclamationCharacter()
411+
{
412+
var logger = new Mock<ILogger>();
413+
414+
var result = DockerReferenceUtility.TryParseImageReference("docker.io/library/nginx!:latest", logger.Object);
415+
416+
result.Should().BeNull();
417+
logger.Verify(
418+
l => l.Log(
419+
LogLevel.Warning,
420+
It.IsAny<EventId>(),
421+
It.IsAny<It.IsAnyType>(),
422+
It.IsAny<Exception>(),
423+
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
424+
Times.Once);
425+
}
426+
427+
[TestMethod]
428+
public void TryParseImageReference_LogsWarningForInvalidCharacterInTag()
429+
{
430+
var logger = new Mock<ILogger>();
431+
432+
var result = DockerReferenceUtility.TryParseImageReference("mcr.microsoft.com/dotnet/sdk:8.0#preview", logger.Object);
433+
434+
result.Should().BeNull();
435+
logger.Verify(
436+
l => l.Log(
437+
LogLevel.Warning,
438+
It.IsAny<EventId>(),
439+
It.IsAny<It.IsAnyType>(),
440+
It.IsAny<Exception>(),
441+
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
442+
Times.Once);
345443
}
346444

347445
[TestMethod]

0 commit comments

Comments
 (0)