Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace Microsoft.ComponentDetection.Common;

using System;
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.Extensions.Logging;

Expand All @@ -39,14 +40,29 @@ public static class DockerReferenceUtility
private const string LEGACYDEFAULTDOMAIN = "index.docker.io";
private const string OFFICIALREPOSITORYNAME = "library";

// Characters that only appear in an image reference as part of an unresolved templating
// token. '$', '{' and '}' cover shell / Helm / Go-template placeholders (e.g. ${VAR},
// {{ .Values.tag }}); '#' covers Azure DevOps and other token-replacement placeholders
// (e.g. #imageTag#) and is never valid in a resolved docker reference.
private static readonly char[] TemplateDelimiters = ['$', '{', '}', '#'];

// Matches token-replacement placeholders that wrap an identifier in double underscores,
// e.g. __IMAGE_TAG__ or __MCR_ENDPOINT__. Without this they parse as an uppercase repository
// name and surface as a noisy parse failure instead of being skipped as a templated value.
private static readonly Regex DoubleUnderscoreTokenRegex = new(@"__\w+__");
Comment thread
jpinz marked this conversation as resolved.

/// <summary>
/// Returns true if the reference contains unresolved variable placeholders (e.g., ${VAR}, {{ .Values.tag }}).
/// Such references should be skipped before calling <see cref="ParseFamiliarName"/> or <see cref="ParseQualifiedName"/>.
/// Returns true if the reference contains unresolved variable or templating placeholders,
/// e.g. <c>${VAR}</c>, <c>{{ .Values.tag }}</c>, <c>#imageTag#</c>, or <c>__IMAGE_TAG__</c>.
/// Such references are not real, resolvable images, so they should be skipped before calling
/// <see cref="ParseFamiliarName"/> or <see cref="ParseQualifiedName"/> and treated as
/// unresolved values rather than reported as parse failures.
/// </summary>
/// <param name="reference">The image reference string to check.</param>
/// <returns><c>true</c> if the reference contains variable placeholder characters; otherwise <c>false</c>.</returns>
public static bool HasUnresolvedVariables(string reference) =>
reference.IndexOfAny(['$', '{', '}']) >= 0;
reference.IndexOfAny(TemplateDelimiters) >= 0 ||
DoubleUnderscoreTokenRegex.IsMatch(reference);

/// <summary>
/// Attempts to parse an image reference string into a <see cref="DockerReference"/>.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,15 @@ public DockerComposeComponentDetector(

public override IEnumerable<string> Categories => [nameof(DetectorClass.DockerCompose)];

protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
/// <summary>
/// Gets or sets a value indicating whether compose files are processed concurrently.
/// Each file is parsed independently into its own <see cref="ISingleFileComponentRecorder"/>
/// and <see cref="DockerReferenceUtility"/> is stateless, so parsing is thread-safe and
/// scales across cores for repositories containing many compose files.
/// </summary>
protected override bool EnableParallelism { get; set; } = true;

protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
var file = processRequest.ComponentStream;
Expand All @@ -49,18 +57,18 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
{
this.Logger.LogInformation("Discovered Docker Compose file: {Location}", file.Location);

string contents;
// Parse directly from the stream; the content is already buffered in memory by
// LazyComponentStream, so reading it into an intermediate string only adds an
// extra full-file allocation and GC pressure under parallel processing.
var yaml = new YamlStream();
using (var reader = new StreamReader(file.Stream))
{
contents = await reader.ReadToEndAsync(cancellationToken);
yaml.Load(reader);
}

var yaml = new YamlStream();
yaml.Load(new StringReader(contents));

if (yaml.Documents.Count == 0)
{
return;
return Task.CompletedTask;
}

foreach (var document in yaml.Documents)
Expand All @@ -75,6 +83,8 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
{
this.Logger.LogError(e, "Failed to parse Docker Compose file: {Location}", file.Location);
}

return Task.CompletedTask;
}

private static YamlMappingNode? GetMappingChild(YamlMappingNode parent, string key)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ namespace Microsoft.ComponentDetection.Detectors.Helm;

public class HelmComponentDetector : FileComponentDetector, IExperimentalDetector
{
/// <summary>
/// Maximum size (in bytes) of a values file the detector will parse. The "*values*" globs
/// can match large, non-Helm YAML files whose full-DOM parse dominates worst-case runtime;
/// files above this limit are skipped so a single pathological file cannot exhaust the
/// detector's time budget.
/// </summary>
private const long MaxValuesFileSizeBytes = 20 * 1024 * 1024; // 20 MB

public HelmComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
Expand All @@ -41,6 +49,14 @@ public HelmComponentDetector(

public override IEnumerable<string> Categories => [nameof(DetectorClass.Helm)];

/// <summary>
/// Gets or sets a value indicating whether values files are processed concurrently.
/// Each file is parsed independently into its own <see cref="ISingleFileComponentRecorder"/>
/// and <see cref="DockerReferenceUtility"/> is stateless, so parsing is thread-safe and
/// scales across cores for repositories containing many charts.
/// </summary>
protected override bool EnableParallelism { get; set; } = true;

/// <summary>
/// Pre-filters scan work to only values files co-located with a Chart.yaml/Chart.yml.
/// Materializes all matched files, identifies Helm chart directories, then filters.
Expand All @@ -65,7 +81,7 @@ protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsy
.ToObservable();
}

protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
var file = processRequest.ComponentStream;

Expand All @@ -74,20 +90,34 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
// filename/directory checks are needed.
try
{
// Check the size before touching ComponentStream so an oversized file is never
// buffered into memory. The "*values*" globs can match large, non-Helm YAML files
// whose full-DOM parse is the main driver of worst-case (timeout) runtime.
var fileInfo = new FileInfo(file.Location);
if (fileInfo.Exists && fileInfo.Length > MaxValuesFileSizeBytes)
{
this.Logger.LogWarning(
"Skipping Helm values file exceeding size limit ({Length} bytes > {Limit} bytes): {Location}",
fileInfo.Length,
MaxValuesFileSizeBytes,
file.Location);
return Task.CompletedTask;
}

this.Logger.LogInformation("Discovered Helm values file: {Location}", file.Location);

string contents;
// Parse directly from the stream; the content is already buffered in memory by
// LazyComponentStream, so reading it into an intermediate string only adds an
// extra full-file allocation and GC pressure under parallel processing.
var yaml = new YamlStream();
using (var reader = new StreamReader(file.Stream))
{
contents = await reader.ReadToEndAsync(cancellationToken);
yaml.Load(reader);
}

var yaml = new YamlStream();
yaml.Load(new StringReader(contents));

if (yaml.Documents.Count == 0)
{
return;
return Task.CompletedTask;
}

this.ExtractImageReferencesFromValues(yaml, processRequest.SingleFileComponentRecorder);
Expand All @@ -96,6 +126,8 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
{
this.Logger.LogError(e, "Failed to parse Helm file: {Location}", file.Location);
}

return Task.CompletedTask;
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,18 +284,66 @@ public void HasUnresolvedVariables_ReturnsTrueForBraces()
DockerReferenceUtility.HasUnresolvedVariables("{{ .Values.image }}").Should().BeTrue();
}

[TestMethod]
public void HasUnresolvedVariables_ReturnsTrueForDoubleUnderscoreTokens()
{
DockerReferenceUtility.HasUnresolvedVariables("__MCR_ENDPOINT__/aks/devinfra/helm3sample:__IMAGE_TAG__").Should().BeTrue();
}

[TestMethod]
public void HasUnresolvedVariables_ReturnsTrueForHashDelimitedTokens()
{
DockerReferenceUtility.HasUnresolvedVariables("#cs_containerRegistryLoginServerUrl#/coreservicesaksservice_#cs_aks_workloadName#_#cs_aks_serviceTrackIdentifier#/#serviceName#:#imageTag#").Should().BeTrue();
}

[TestMethod]
public void HasUnresolvedVariables_ReturnsFalseForPlainReference()
{
DockerReferenceUtility.HasUnresolvedVariables("docker.io/library/nginx:latest").Should().BeFalse();
}

[TestMethod]
public void HasUnresolvedVariables_ReturnsFalseForReferenceWithUnderscores()
{
DockerReferenceUtility.HasUnresolvedVariables("mcr.microsoft.com/some_repo/my_image:1.0").Should().BeFalse();
}

[TestMethod]
public void TryParseImageReference_ReturnsNullForUnresolvedVariables()
{
DockerReferenceUtility.TryParseImageReference("${IMAGE}:latest").Should().BeNull();
}

[TestMethod]
public void TryParseImageReference_ReturnsNullForDoubleUnderscoreTokens()
{
DockerReferenceUtility.TryParseImageReference("__MCR_ENDPOINT__/aks/devinfra/helm3sample:__IMAGE_TAG__").Should().BeNull();
}

[TestMethod]
public void TryParseImageReference_ReturnsNullForHashDelimitedTokens()
{
DockerReferenceUtility.TryParseImageReference("#cs_containerRegistryLoginServerUrl#/svc/#serviceName#:#imageTag#").Should().BeNull();
}

[TestMethod]
public void TryParseImageReference_DoesNotLogWarningForTemplatedReference()
{
var logger = new Mock<ILogger>();

var result = DockerReferenceUtility.TryParseImageReference("__MCR_ENDPOINT__/aks/devinfra/helm3sample:__IMAGE_TAG__", logger.Object);

result.Should().BeNull();
logger.Verify(
l => l.Log(
It.IsAny<LogLevel>(),
It.IsAny<EventId>(),
It.IsAny<It.IsAnyType>(),
It.IsAny<Exception>(),
It.IsAny<Func<It.IsAnyType, Exception, string>>()),
Times.Never);
}

[TestMethod]
public void TryParseImageReference_ReturnsNullForInvalidReference()
{
Expand Down
Loading