Skip to content

Commit 89ea7c6

Browse files
committed
Enhance Docker Compose and Helm detectors for improved parallel processing and file size handling
1 parent 248e744 commit 89ea7c6

2 files changed

Lines changed: 56 additions & 14 deletions

File tree

src/Microsoft.ComponentDetection.Detectors/dockercompose/DockerComposeComponentDetector.cs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,15 @@ public DockerComposeComponentDetector(
4040

4141
public override IEnumerable<string> Categories => [nameof(DetectorClass.DockerCompose)];
4242

43-
protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
43+
/// <summary>
44+
/// Gets or sets a value indicating whether compose files are processed concurrently.
45+
/// Each file is parsed independently into its own <see cref="ISingleFileComponentRecorder"/>
46+
/// and <see cref="DockerReferenceUtility"/> is stateless, so parsing is thread-safe and
47+
/// scales across cores for repositories containing many compose files.
48+
/// </summary>
49+
protected override bool EnableParallelism { get; set; } = true;
50+
51+
protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
4452
{
4553
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
4654
var file = processRequest.ComponentStream;
@@ -49,18 +57,18 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
4957
{
5058
this.Logger.LogInformation("Discovered Docker Compose file: {Location}", file.Location);
5159

52-
string contents;
60+
// Parse directly from the stream; the content is already buffered in memory by
61+
// LazyComponentStream, so reading it into an intermediate string only adds an
62+
// extra full-file allocation and GC pressure under parallel processing.
63+
var yaml = new YamlStream();
5364
using (var reader = new StreamReader(file.Stream))
5465
{
55-
contents = await reader.ReadToEndAsync(cancellationToken);
66+
yaml.Load(reader);
5667
}
5768

58-
var yaml = new YamlStream();
59-
yaml.Load(new StringReader(contents));
60-
6169
if (yaml.Documents.Count == 0)
6270
{
63-
return;
71+
return Task.CompletedTask;
6472
}
6573

6674
foreach (var document in yaml.Documents)
@@ -75,6 +83,8 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
7583
{
7684
this.Logger.LogError(e, "Failed to parse Docker Compose file: {Location}", file.Location);
7785
}
86+
87+
return Task.CompletedTask;
7888
}
7989

8090
private static YamlMappingNode? GetMappingChild(YamlMappingNode parent, string key)

src/Microsoft.ComponentDetection.Detectors/helm/HelmComponentDetector.cs

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ namespace Microsoft.ComponentDetection.Detectors.Helm;
1717

1818
public class HelmComponentDetector : FileComponentDetector, IExperimentalDetector
1919
{
20+
/// <summary>
21+
/// Maximum size (in bytes) of a values file the detector will parse. The "*values*" globs
22+
/// can match large, non-Helm YAML files whose full-DOM parse dominates worst-case runtime;
23+
/// files above this limit are skipped so a single pathological file cannot exhaust the
24+
/// detector's time budget.
25+
/// </summary>
26+
private const long MaxValuesFileSizeBytes = 20 * 1024 * 1024; // 20 MB
27+
2028
public HelmComponentDetector(
2129
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
2230
IObservableDirectoryWalkerFactory walkerFactory,
@@ -41,6 +49,14 @@ public HelmComponentDetector(
4149

4250
public override IEnumerable<string> Categories => [nameof(DetectorClass.Helm)];
4351

52+
/// <summary>
53+
/// Gets or sets a value indicating whether values files are processed concurrently.
54+
/// Each file is parsed independently into its own <see cref="ISingleFileComponentRecorder"/>
55+
/// and <see cref="DockerReferenceUtility"/> is stateless, so parsing is thread-safe and
56+
/// scales across cores for repositories containing many charts.
57+
/// </summary>
58+
protected override bool EnableParallelism { get; set; } = true;
59+
4460
/// <summary>
4561
/// Pre-filters scan work to only values files co-located with a Chart.yaml/Chart.yml.
4662
/// Materializes all matched files, identifies Helm chart directories, then filters.
@@ -65,7 +81,7 @@ protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsy
6581
.ToObservable();
6682
}
6783

68-
protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
84+
protected override Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
6985
{
7086
var file = processRequest.ComponentStream;
7187

@@ -74,20 +90,34 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
7490
// filename/directory checks are needed.
7591
try
7692
{
93+
// Check the size before touching ComponentStream so an oversized file is never
94+
// buffered into memory. The "*values*" globs can match large, non-Helm YAML files
95+
// whose full-DOM parse is the main driver of worst-case (timeout) runtime.
96+
var fileInfo = new FileInfo(file.Location);
97+
if (fileInfo.Exists && fileInfo.Length > MaxValuesFileSizeBytes)
98+
{
99+
this.Logger.LogWarning(
100+
"Skipping Helm values file exceeding size limit ({Length} bytes > {Limit} bytes): {Location}",
101+
fileInfo.Length,
102+
MaxValuesFileSizeBytes,
103+
file.Location);
104+
return Task.CompletedTask;
105+
}
106+
77107
this.Logger.LogInformation("Discovered Helm values file: {Location}", file.Location);
78108

79-
string contents;
109+
// Parse directly from the stream; the content is already buffered in memory by
110+
// LazyComponentStream, so reading it into an intermediate string only adds an
111+
// extra full-file allocation and GC pressure under parallel processing.
112+
var yaml = new YamlStream();
80113
using (var reader = new StreamReader(file.Stream))
81114
{
82-
contents = await reader.ReadToEndAsync(cancellationToken);
115+
yaml.Load(reader);
83116
}
84117

85-
var yaml = new YamlStream();
86-
yaml.Load(new StringReader(contents));
87-
88118
if (yaml.Documents.Count == 0)
89119
{
90-
return;
120+
return Task.CompletedTask;
91121
}
92122

93123
this.ExtractImageReferencesFromValues(yaml, processRequest.SingleFileComponentRecorder);
@@ -96,6 +126,8 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
96126
{
97127
this.Logger.LogError(e, "Failed to parse Helm file: {Location}", file.Location);
98128
}
129+
130+
return Task.CompletedTask;
99131
}
100132

101133
/// <summary>

0 commit comments

Comments
 (0)