From af0cff9c2c5e5dc751d60453603cc541d77388ee Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Thu, 2 Apr 2026 14:47:11 -0700 Subject: [PATCH 1/2] Promote MavenWithFallback detector replacing MvnCli (#1756) * Promote MavenWithFallback detector replacing MvnCli * Fix and add test * Small nit * Update maven markdown * Update docs/detectors/maven.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Mask URL * Nit * Nit --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/detectors/maven.md | 179 +- .../maven/MavenCommandService.cs | 64 - .../maven/MavenConstants.cs | 5 - .../maven/MavenWithFallbackDetector.cs | 1334 ---------- .../maven/MvnCliComponentDetector.cs | 1340 +++++++++- .../Extensions/ServiceCollectionExtensions.cs | 1 - .../Services/DetectorProcessingService.cs | 132 - .../MavenCommandServiceTests.cs | 216 +- .../MavenWithFallbackDetectorTests.cs | 2234 ----------------- .../MvnCliDetectorTests.cs | 1612 +++++++++++- .../DetectorProcessingServiceTests.cs | 199 +- .../ComponentDetectionIntegrationTests.cs | 15 +- 12 files changed, 3000 insertions(+), 4331 deletions(-) delete mode 100644 src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs delete mode 100644 test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs diff --git a/docs/detectors/maven.md b/docs/detectors/maven.md index 55b02652d..9be29440f 100644 --- a/docs/detectors/maven.md +++ b/docs/detectors/maven.md @@ -4,22 +4,185 @@ Maven detection depends on the following to successfully run: -- Maven CLI as part of your PATH. mvn should be runnable from a given command line. -- Maven Dependency Plugin (installed with Maven). - One or more `pom.xml` files. +- Maven CLI (`mvn`) available in PATH **and** the Maven Dependency Plugin installed — required for full graph detection. If unavailable, the detector automatically falls back to static `pom.xml` parsing (see [Detection Strategy](#detection-strategy) below). ## Detection strategy -Maven detection is performed by running `mvn dependency:tree -f {pom.xml}` for each pom file and parsing down the results. +The detector (`MvnCliComponentDetector`, ID: `MvnCli`) uses a **two-path strategy**: Maven CLI for full dependency graph resolution, with automatic fallback to static `pom.xml` parsing when CLI is unavailable or fails. Both paths are handled in the same detector class. -Components tagged as a test dependency are marked as development dependencies. +### High-level lifecycle -Full dependency graph generation is supported. +``` +OnPrepareDetectionAsync (Phase 1 — runs once before any file is processed) + │ + ├─ [CLI disabled or unavailable] → return pom.xml stream as-is → OnFileFoundAsync (static path) + │ + └─ [CLI available] + │ + ├─ Collect all pom.xml ProcessRequests via observable + ├─ Sort by directory depth (shallowest first) → filter to root-level pom.xml only + ├─ For each root pom.xml (sequentially): + │ ├─ Run `mvn dependency:tree` → writes bcde.mvndeps next to pom.xml + │ ├─ [success] record directory as succeeded + │ └─ [failure] record directory as failed, capture error output + ├─ Scan entire source tree for all bcde.mvndeps files + │ └─ Read each into MemoryStream (file handle released immediately) + │ └─ Emit as ProcessRequests → OnFileFoundAsync (CLI path) + └─ For each failed directory → re-emit original pom.xml ProcessRequests + └─ → OnFileFoundAsync (static path) + +OnFileFoundAsync (Phase 2 — called once per file emitted from Phase 1) + │ + ├─ [bcde.mvndeps] → ParseDependenciesFile → register full graph with scopes + │ └─ [CleanupCreatedFiles=true] → delete bcde.mvndeps from disk + │ + └─ [pom.xml] → static XML parsing (3-pass approach, see below) + +OnDetectionFinishedAsync (Phase 3 — runs once after all files are processed) + ├─ Pass 2: resolve deferred Maven parent relationships + └─ Pass 3: resolve pending components with hierarchy-aware variable substitution +``` + +--- + +### Phase 1 — Prepare: CLI detection + +#### Step 1.1 — Skip check + +If the environment variable `CD_MAVEN_DISABLE_CLI=true` is set, Maven CLI is skipped entirely. All `pom.xml` files are passed through unchanged to Phase 2 for static parsing. `FallbackReason` is recorded as `MvnCliDisabledByUser`. + +#### Step 1.2 — CLI availability check + +`mvn --version` is executed (also tries `mvn.cmd` on Windows). If it fails to locate, all `pom.xml` files fall through to static parsing. `FallbackReason` is recorded as `MavenCliNotAvailable`. + +#### Step 1.3 — Root pom.xml identification + +All discovered `pom.xml` ProcessRequests are buffered and sorted by directory path length (shallowest first). The detector then walks each file's ancestors: if any ancestor directory already contains a `pom.xml`, the current file is **nested** and excluded from direct CLI invocation. This ensures Maven CLI is only run on the outermost project root in any given directory tree, which is how Maven itself works (parent POMs aggregate submodules). + +For each root pom.xml and all its nested children, a mapping is recorded in `parentPomDictionary` (keyed by root directory). This mapping is used for fallback: if CLI fails for a root, all its nested children are re-emitted for static parsing. + +> **Why `.ToList()` instead of streaming?** The nesting check requires knowledge of all discovered paths before any can be classified as root or nested. A streaming approach would risk emitting a file as a root before its true parent has been seen. Sorting by depth first guarantees correctness. The `ProcessRequest` objects at this stage hold a `LazyComponentStream` that does not open the file until `.Stream` is first accessed, so no file handles are held during the buffer. + +#### Step 1.4 — Sequential Maven CLI invocation + +For each root `pom.xml`, Maven CLI is invoked **sequentially** (not in parallel) to avoid Maven local repository lock contention and reduce JVM memory pressure: + +``` +mvn dependency:tree -B -DoutputFile=bcde.mvndeps -DoutputType=text -f{pom.xml} +``` + +- **`-B`** — batch mode (no interactive prompts). +- **`-DoutputFile=bcde.mvndeps`** — writes the dependency tree next to the `pom.xml`. +- **`-DoutputType=text`** — text format parseable by `MavenStyleDependencyGraphParser`. + +If the `MvnCLIFileLevelTimeoutSeconds` environment variable is set, a per-file cancellation timeout is applied via a linked `CancellationTokenSource`. + +On success, the existence of `bcde.mvndeps` is verified (CLI can exit 0 but skip the file in edge cases). On failure, error output is captured for later authentication error analysis. + +#### Step 1.5 — Dependency file discovery + +After all CLI invocations complete, the entire source directory is re-scanned for `bcde.mvndeps` files (this catches submodule output files generated by the parent POM run). Each file is: +1. Read fully into a `MemoryStream` — releasing the underlying file handle immediately. +2. Wrapped in a new `ProcessRequest` with a `SingleFileComponentRecorder` keyed to the corresponding `pom.xml` path in the same directory. + +#### Step 1.6 — Failure analysis and fallback assembly + +If any CLI invocations failed, error output is scanned for authentication patterns (`401`, `403`, `Unauthorized`, `Access denied`). If found, `FallbackReason` is set to `AuthenticationFailure` and any matching repository URLs are extracted and logged as guidance. Otherwise, `FallbackReason` is set to `OtherMvnCliFailure`. + +For each failed root directory, all `pom.xml` ProcessRequests from `parentPomDictionary` (the root itself plus all nested children) are emitted in depth-first order (parent before child) for static parsing. + +The final observable returned to the framework is the concatenation of: +- All `bcde.mvndeps` ProcessRequests (CLI successes) +- All `pom.xml` ProcessRequests from failed directories (static fallback) + +--- + +### Phase 2 — File processing: `OnFileFoundAsync` + +Each `ProcessRequest` emitted in Phase 1 is dispatched here. The file type is distinguished by its `Pattern` field. + +#### CLI path: `bcde.mvndeps` + +The file is passed to `MavenStyleDependencyGraphParser` via `MavenCommandService.ParseDependenciesFile`. The parser reads the text-format dependency tree line-by-line: + +1. **First non-blank line** — the root artifact (`groupId:artifactId:packaging:version`). Registered as a direct dependency. +2. **Subsequent lines** — each is a tree node prefixed with `+-` (direct child) or `\-` (last child) at an indented position. The indentation depth (character offset of the splitter) is used to maintain a parse stack, from which parent-child edges are derived and registered. + +Component string format: +``` +groupId:artifactId:packaging:version:scope +``` +Scope is mapped to `DependencyScope` (`MavenCompile`, `MavenTest`, `MavenProvided`, `MavenRuntime`, `MavenSystem`). `test`-scoped dependencies are also marked as `isDevelopmentDependency=true`. + +If `CleanupCreatedFiles` is set on the scan request, `bcde.mvndeps` is deleted from disk after parsing (wrapped in a try/catch so failures are non-fatal). + +#### Static fallback path: `pom.xml` + +Static parsing operates in **three passes** spread across Phase 2 and Phase 3, designed to handle Maven's property inheritance correctly. + +**Pass 1 (during `OnFileFoundAsync`):** + +The `pom.xml` XML document is parsed once. For each file, the detector: + +1. **Tracks project coordinates** — queries `groupId`, `artifactId`, and (from `` if own `groupId` is absent) stores the project in `processedMavenProjects` under both `artifactId` and `groupId:artifactId` keys. This enables coordinate-based parent lookup. + +2. **Parses Maven parent relationship** — reads `` and ``. If the parent pom.xml has already been processed, the `child → parent` relationship is stored immediately in `mavenParentChildRelationships`. Otherwise, the relationship is queued in `unresolvedParentRelationships` for Pass 2. + +3. **Collects variables** — all `` sections are read (supports multiple `` blocks for malformed XML). `project.version`, `project.groupId`, `project.artifactId`, `version`, `groupId`, `artifactId` are also collected. Variables are stored in `collectedVariables` keyed as `filePath::variableName` to scope them to their source file for hierarchy-aware resolution. + +4. **Registers dependencies:** + - **Literal version** (e.g., `1.2.3`) → registered immediately. + - **Variable version resolved locally** (e.g., `${revision}` defined in this same file's ``) → resolved and registered immediately. + - **Variable version unresolvable locally** (e.g., `${revision}` from a parent POM) → added to `pendingComponents` queue with the raw template for Pass 3. + - **Range version** (contains `,`) or **missing version** → skipped with a debug log. + +--- + +### Phase 3 — Finish: `OnDetectionFinishedAsync` + +#### Pass 2 — Deferred parent relationship resolution + +The `unresolvedParentRelationships` queue is drained. For each entry, the cache entry is cleared and `processedMavenProjects` is queried again (now fully populated). Lookup tries `groupId:artifactId` first, then `artifactId` alone. Resolved relationships are written to `mavenParentChildRelationships`. + +#### Pass 3 — Hierarchy-aware variable resolution + +All entries in `pendingComponents` are drained. For each component with an unresolved version template (e.g., `${myVersion}`): + +1. Starting from the component's own `pom.xml`, the detector walks up `mavenParentChildRelationships` (child → parent → grandparent). +2. At each level, `collectedVariables[filePath::variableName]` is checked. +3. The **first match wins** — this implements Maven's child-overrides-parent property precedence. +4. Circular parent references are detected via a `visitedFiles` HashSet and broken safely. +5. If the variable is still unresolved after exhausting the hierarchy (e.g., defined in an external parent POM not on disk), the component is skipped and `UnresolvedVariableCount` is incremented in telemetry. + +--- + +### Detection method tracking + +At completion, the `DetectionMethod` telemetry field records one of: + +| Value | Meaning | +|---|---| +| `MvnCliOnly` | All root pom.xml files were processed by Maven CLI successfully | +| `StaticParserOnly` | CLI was disabled or unavailable; all components from static parsing | +| `Mixed` | Maven CLI was attempted; at least one root fell back to static parsing (possibly all) | +| `None` | No pom.xml files were found | + +`FallbackReason` records why static parsing was triggered: `None`, `MvnCliDisabledByUser`, `MavenCliNotAvailable`, `AuthenticationFailure`, or `OtherMvnCliFailure`. + +--- ## Known limitations -Maven detection will not run if `mvn` is unavailable. +- Static fallback parsing does **not** resolve variables defined in external parent POMs that are not present on disk (e.g., published to a remote Maven repository). Affected components are skipped. +- Static parsing does **not** produce a dependency graph (no parent-child edges between components) — it produces a flat component list only. Full graph with transitive dependencies requires Maven CLI. +- Version ranges (e.g., `[1.0,2.0)`) are not supported by static parsing and are skipped. +- Maven CLI invocations run sequentially. On repositories with many independent root `pom.xml` files, this can be slow. Set `MvnCLIFileLevelTimeoutSeconds` to bound per-file execution time. +- If Maven CLI exits successfully but the `bcde.mvndeps` file is not created (edge case with certain POM configurations), the file falls back to static parsing. -## Environment Variables +## Environment variables -The environment variable `MvnCLIFileLevelTimeoutSeconds` is used to control the max execution time Mvn CLI is allowed to take per each `pom.xml` file. Default value, unbounded. This will restrict any spikes in scanning time caused by Mvn CLI during package restore. We suggest to restore the Maven packages beforehand, so that no network calls happen when executing "mvn dependency:tree" and the graph is captured quickly. +| Variable | Default | Description | +|---|---|---| +| `MvnCLIFileLevelTimeoutSeconds` | Unbounded | Maximum seconds Maven CLI may spend on a single `pom.xml`. Pre-restoring packages eliminates network calls and makes this limit more predictable. | +| `CD_MAVEN_DISABLE_CLI` | `false` | Set to `true` to skip Maven CLI entirely and use only static `pom.xml` parsing. | diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs index 98593a4b0..48ab997b2 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenCommandService.cs @@ -2,7 +2,6 @@ namespace Microsoft.ComponentDetection.Detectors.Maven; using System; -using System.Collections.Concurrent; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -20,19 +19,6 @@ internal class MavenCommandService : IMavenCommandService internal static readonly string[] AdditionalValidCommands = ["mvn.cmd"]; - /// - /// Per-location semaphores to prevent concurrent Maven CLI executions for the same pom.xml. - /// This allows multiple detectors (e.g., MvnCliComponentDetector and MavenWithFallbackDetector) - /// to safely share the same output file without race conditions. - /// - private readonly ConcurrentDictionary locationLocks = new(); - - /// - /// Tracks locations where dependency generation has completed successfully. - /// Used to skip duplicate executions when multiple detectors process the same pom.xml. - /// - private readonly ConcurrentDictionary completedLocations = new(); - private readonly ICommandLineInvocationService commandLineInvocationService; private readonly IMavenStyleDependencyGraphParserService parserService; private readonly IEnvironmentVariableService envVarService; @@ -58,56 +44,6 @@ public async Task MavenCLIExistsAsync() } public async Task GenerateDependenciesFileAsync(ProcessRequest processRequest, CancellationToken cancellationToken = default) - { - var pomFile = processRequest.ComponentStream; - var pomDir = Path.GetDirectoryName(pomFile.Location); - var depsFilePath = Path.Combine(pomDir, this.BcdeMvnDependencyFileName); - - // Check the cache before acquiring the semaphore to allow fast-path returns - // even when cancellation has been requested. - if (this.completedLocations.TryGetValue(pomFile.Location, out var cachedResult) - && cachedResult.Success - && File.Exists(depsFilePath)) - { - this.logger.LogDebug("{DetectorPrefix}: Skipping duplicate \"dependency:tree\" for {PomFileLocation}, already generated", DetectorLogPrefix, pomFile.Location); - return cachedResult; - } - - // Use semaphore to prevent concurrent Maven CLI executions for the same pom.xml. - // This allows MvnCliComponentDetector and MavenWithFallbackDetector to safely share the output file. - var semaphore = this.locationLocks.GetOrAdd(pomFile.Location, _ => new SemaphoreSlim(1, 1)); - - await semaphore.WaitAsync(cancellationToken); - try - { - // Re-check the cache after acquiring the semaphore in case another caller - // completed while we were waiting. - if (this.completedLocations.TryGetValue(pomFile.Location, out cachedResult) - && cachedResult.Success - && File.Exists(depsFilePath)) - { - this.logger.LogDebug("{DetectorPrefix}: Skipping duplicate \"dependency:tree\" for {PomFileLocation}, already generated", DetectorLogPrefix, pomFile.Location); - return cachedResult; - } - - var result = await this.GenerateDependenciesFileCoreAsync(processRequest, cancellationToken); - - // Only cache successful results. Failed results should allow retries for transient failures, - // and caching them would waste memory since the cache check requires Success == true anyway. - if (result.Success) - { - this.completedLocations[pomFile.Location] = result; - } - - return result; - } - finally - { - semaphore.Release(); - } - } - - private async Task GenerateDependenciesFileCoreAsync(ProcessRequest processRequest, CancellationToken cancellationToken) { var cliFileTimeout = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); var timeoutSeconds = -1; diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs index ceb1e8bee..f04fa936e 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MavenConstants.cs @@ -15,9 +15,4 @@ public static class MavenConstants /// Detector ID for MvnCliComponentDetector. /// public const string MvnCliDetectorId = "MvnCli"; - - /// - /// Detector ID for MavenWithFallbackDetector. - /// - public const string MavenWithFallbackDetectorId = "MavenWithFallback"; } diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs deleted file mode 100644 index b57fc33b1..000000000 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MavenWithFallbackDetector.cs +++ /dev/null @@ -1,1334 +0,0 @@ -#nullable disable -namespace Microsoft.ComponentDetection.Detectors.Maven; - -using System; -using System.Collections.Concurrent; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Reactive.Linq; -using System.Text; -using System.Text.RegularExpressions; -using System.Threading; -using System.Threading.Tasks; -using System.Threading.Tasks.Dataflow; -using System.Xml; -using Microsoft.ComponentDetection.Common; -using Microsoft.ComponentDetection.Contracts; -using Microsoft.ComponentDetection.Contracts.Internal; -using Microsoft.ComponentDetection.Contracts.TypedComponent; -using Microsoft.Extensions.Logging; - -/// -/// Enum representing which detection method was used. -/// -internal enum MavenDetectionMethod -{ - /// No detection performed. - None, - - /// MvnCli was used successfully for all files. - MvnCliOnly, - - /// Static parser was used for all files (MvnCli not available or failed completely). - StaticParserOnly, - - /// MvnCli succeeded for some files, static parser used for failed files. - Mixed, -} - -/// -/// Enum representing why fallback occurred. -/// -internal enum MavenFallbackReason -{ - /// No fallback was needed. - None, - - /// Maven CLI was explicitly disabled via the CD_MAVEN_DISABLE_CLI environment variable. - MvnCliDisabledByUser, - - /// Maven CLI was not available in PATH. - MavenCliNotAvailable, - - /// MvnCli failed due to authentication error (401/403). - AuthenticationFailure, - - /// MvnCli failed due to other reasons. - OtherMvnCliFailure, -} - -/// -/// Experimental Maven detector that combines MvnCli detection with static pom.xml parsing fallback. -/// Runs MvnCli detection first (like standard MvnCliComponentDetector), then checks if detection -/// produced any results. If MvnCli fails for any pom.xml, falls back to static parsing for failed files. -/// -public class MavenWithFallbackDetector : FileComponentDetector, IExperimentalDetector -{ - /// - /// Environment variable to disable MvnCli and use only static pom.xml parsing. - /// Set to "true" to disable MvnCli detection. - /// Usage: Set CD_MAVEN_DISABLE_CLI=true as a pipeline/environment variable. - /// - internal const string DisableMvnCliEnvVar = "CD_MAVEN_DISABLE_CLI"; - - private const string MavenManifest = "pom.xml"; - private const string MavenXmlNamespace = "http://maven.apache.org/POM/4.0.0"; - private const string ProjNamespace = "proj"; - private const string DependencyNode = "//proj:dependency"; - - private const string GroupIdSelector = "groupId"; - private const string ArtifactIdSelector = "artifactId"; - private const string VersionSelector = "version"; - - private static readonly Regex VersionRegex = new( - @"^\$\{(.*)\}$", - RegexOptions.Compiled | RegexOptions.IgnoreCase); - - // Auth error patterns to detect in Maven error output - private static readonly string[] AuthErrorPatterns = - [ - "401", - "403", - "Unauthorized", - "Access denied", - ]; - - // Pattern to extract failed endpoint URL from Maven error messages - private static readonly Regex EndpointRegex = new( - @"https?://[^\s\]\)>]+", - RegexOptions.Compiled | RegexOptions.IgnoreCase); - - /// - /// Maximum time allowed for the OnPrepareDetectionAsync phase. - /// This is a safety guardrail to prevent hangs in the experimental detector. - /// Most repos should complete the full Maven CLI scan within this window. - /// - private static readonly TimeSpan PrepareDetectionTimeout = TimeSpan.FromMinutes(5); - - private readonly IMavenCommandService mavenCommandService; - private readonly IEnvironmentVariableService envVarService; - private readonly IFileUtilityService fileUtilityService; - - // Two-pass static parsing: collect variables first, then resolve components - private readonly ConcurrentDictionary collectedVariables = new(); - private readonly ConcurrentQueue pendingComponents = new(); - - // Track Maven parent-child relationships for proper variable resolution - private readonly ConcurrentDictionary mavenParentChildRelationships = new(); - - // Track processed Maven projects by coordinates (groupId:artifactId -> file path) - private readonly ConcurrentDictionary processedMavenProjects = new(); - - // Track files that couldn't establish parent relationships during first pass (for second pass re-evaluation) - private readonly ConcurrentQueue<(string FilePath, string ParentGroupId, string ParentArtifactId)> unresolvedParentRelationships = new(); - - // Track original pom.xml files for potential fallback - private readonly ConcurrentQueue originalPomFiles = []; - - // Track Maven CLI errors for analysis - private readonly ConcurrentQueue mavenCliErrors = []; - private readonly ConcurrentQueue failedEndpoints = []; - - /// - /// Cache for parent POM lookups to avoid repeated file system operations. - /// Key: current file path, Value: parent POM path or empty string if not found. - /// - private readonly ConcurrentDictionary parentPomCache = new(); - - // Telemetry tracking - private MavenDetectionMethod usedDetectionMethod = MavenDetectionMethod.None; - private MavenFallbackReason fallbackReason = MavenFallbackReason.None; - private int mvnCliComponentCount; - private int staticParserComponentCount; - private int unresolvedVariableCount; - private int pendingComponentCountBeforeResolution; - private bool mavenCliAvailable; - - public MavenWithFallbackDetector( - IComponentStreamEnumerableFactory componentStreamEnumerableFactory, - IObservableDirectoryWalkerFactory walkerFactory, - IMavenCommandService mavenCommandService, - IEnvironmentVariableService envVarService, - IFileUtilityService fileUtilityService, - ILogger logger) - { - this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory; - this.Scanner = walkerFactory; - this.mavenCommandService = mavenCommandService; - this.envVarService = envVarService; - this.fileUtilityService = fileUtilityService; - this.Logger = logger; - } - - public override string Id => MavenConstants.MavenWithFallbackDetectorId; - - public override IList SearchPatterns => [MavenManifest]; - - public override IEnumerable SupportedComponentTypes => [ComponentType.Maven]; - - public override int Version => 2; - - public override IEnumerable Categories => [Enum.GetName(typeof(DetectorClass), DetectorClass.Maven)]; - - // Normalizes a directory path by ensuring it ends with a directory separator. - // This prevents false matches like "C:\foo" matching "C:\foobar". - private static string NormalizeDirectoryPath(string path) - { - if (string.IsNullOrEmpty(path)) - { - return path; - } - - var lastChar = path[^1]; - return lastChar == Path.DirectorySeparatorChar || lastChar == Path.AltDirectorySeparatorChar - ? path - : path + Path.DirectorySeparatorChar; - } - - private static bool IsAuthenticationError(string errorMessage) - { - if (string.IsNullOrWhiteSpace(errorMessage)) - { - return false; - } - - // Use ReadOnlySpan for more efficient string searching - var messageSpan = errorMessage.AsSpan(); - foreach (var pattern in AuthErrorPatterns) - { - if (messageSpan.Contains(pattern, StringComparison.OrdinalIgnoreCase)) - { - return true; - } - } - - return false; - } - - private void LogDebugWithId(string message) => - this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); - - private void LogWarning(string message) => - this.Logger.LogWarning("{DetectorId}: {Message}", this.Id, message); - - /// - /// Resets all per-scan state to prevent stale data from leaking between scans. - /// This is critical because detectors are registered as singletons. - /// - private void ResetScanState() - { - // Clear all concurrent collections - this.collectedVariables.Clear(); - this.mavenParentChildRelationships.Clear(); - this.processedMavenProjects.Clear(); - this.parentPomCache.Clear(); - - // Drain all concurrent queues - while (this.pendingComponents.TryDequeue(out _)) - { - // Intentionally empty - just draining the queue - } - - while (this.unresolvedParentRelationships.TryDequeue(out _)) - { - // Intentionally empty - just draining the queue - } - - while (this.originalPomFiles.TryDequeue(out _)) - { - // Intentionally empty - just draining the queue - } - - while (this.mavenCliErrors.TryDequeue(out _)) - { - // Intentionally empty - just draining the queue - } - - while (this.failedEndpoints.TryDequeue(out _)) - { - // Intentionally empty - just draining the queue - } - - // Reset telemetry counters and flags - this.usedDetectionMethod = MavenDetectionMethod.None; - this.fallbackReason = MavenFallbackReason.None; - this.mvnCliComponentCount = 0; - this.staticParserComponentCount = 0; - this.unresolvedVariableCount = 0; - this.pendingComponentCountBeforeResolution = 0; - this.mavenCliAvailable = false; - } - - protected override async Task> OnPrepareDetectionAsync( - IObservable processRequests, - IDictionary detectorArgs, - CancellationToken cancellationToken = default) - { - // Reset all per-scan state to prevent stale data from previous scans - // This is critical because detectors are registered as singletons - this.ResetScanState(); - - // Wrap the entire method in a try-catch with timeout to protect against hangs. - // OnPrepareDetectionAsync doesn't have the same guardrails as OnFileFoundAsync, - // so we need to be extra careful in this experimental detector. - try - { - using var timeoutCts = new CancellationTokenSource(PrepareDetectionTimeout); - using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token); - - return await this.OnPrepareDetectionCoreAsync(processRequests, linkedCts.Token); - } - catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested) - { - // Timeout occurred (not user cancellation) - this.LogWarning($"OnPrepareDetectionAsync timed out after {PrepareDetectionTimeout.TotalMinutes} minutes. Falling back to static pom.xml parsing."); - this.Telemetry["TimedOut"] = "true"; - this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure; - this.usedDetectionMethod = MavenDetectionMethod.Mixed; - return processRequests; - } - catch (Exception ex) - { - // Unexpected error - log and fall back to static parsing - this.LogWarning($"OnPrepareDetectionAsync failed with unexpected error: {ex.Message}. Falling back to static pom.xml parsing."); - this.Telemetry["PrepareDetectionError"] = ex.GetType().Name; - this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure; - this.usedDetectionMethod = MavenDetectionMethod.Mixed; - return processRequests; - } - } - - /// - /// Core implementation of OnPrepareDetectionAsync, called within the timeout wrapper. - /// - private async Task> OnPrepareDetectionCoreAsync( - IObservable processRequests, - CancellationToken cancellationToken) - { - // Check if we should skip Maven CLI and use static parsing only - if (this.ShouldSkipMavenCli()) - { - return processRequests; - } - - // Check if Maven CLI is available - if (!await this.TryInitializeMavenCliAsync()) - { - return processRequests; - } - - // Create per-scan dictionary to track nested pom.xml mappings - // This prevents state accumulation across scans since detectors are singletons - var parentPomDictionary = new ConcurrentDictionary>(StringComparer.OrdinalIgnoreCase); - - // Run Maven CLI detection on all pom.xml files - // Returns deps files for CLI successes, pom.xml files for CLI failures - return await this.RunMavenCliDetectionAsync(processRequests, parentPomDictionary, cancellationToken); - } - - /// - /// Checks if Maven CLI should be skipped due to environment variable configuration. - /// - /// True if Maven CLI should be skipped; otherwise, false. - private bool ShouldSkipMavenCli() - { - if (this.envVarService.IsEnvironmentVariableValueTrue(DisableMvnCliEnvVar)) - { - this.LogDebugWithId($"MvnCli detection disabled via {DisableMvnCliEnvVar} environment variable. Using static pom.xml parsing only."); - this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; - this.fallbackReason = MavenFallbackReason.MvnCliDisabledByUser; - this.mavenCliAvailable = false; - return true; - } - - return false; - } - - /// - /// Checks if Maven CLI is available. - /// - /// True if Maven CLI is available; otherwise, false. - private async Task TryInitializeMavenCliAsync() - { - this.mavenCliAvailable = await this.mavenCommandService.MavenCLIExistsAsync(); - - if (!this.mavenCliAvailable) - { - this.LogDebugWithId("Maven CLI not found in PATH. Will use static pom.xml parsing only."); - this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; - this.fallbackReason = MavenFallbackReason.MavenCliNotAvailable; - return false; - } - - this.LogDebugWithId("Maven CLI is available. Running MvnCli detection."); - return true; - } - - /// - /// Runs Maven CLI detection on all root pom.xml files. - /// For each pom.xml, if CLI succeeds, the deps file is added to results. - /// If CLI fails, all pom.xml files under that directory are added for static parsing fallback. - /// - /// The incoming process requests. - /// Dictionary to track nested pom.xml mappings for fallback scenarios. - /// Cancellation token for the operation. - /// An observable of process requests (deps files for CLI success, pom.xml for CLI failure). - private async Task> RunMavenCliDetectionAsync( - IObservable processRequests, - ConcurrentDictionary> parentPomDictionary, - CancellationToken cancellationToken) - { - var results = new ConcurrentQueue(); - var failedDirectories = new ConcurrentQueue(); - var cliSuccessCount = 0; - var cliFailureCount = 0; - - // Process pom.xml files sequentially to match MvnCliComponentDetector behavior. - // Sequential execution avoids Maven local repository lock contention and - // reduces memory pressure from concurrent Maven JVM processes. - var processPomFile = new ActionBlock( - async processRequest => - { - // Check for cancellation before processing each pom.xml - cancellationToken.ThrowIfCancellationRequested(); - - // Store original pom.xml for telemetry - this.originalPomFiles.Enqueue(processRequest); - - var pomFile = processRequest.ComponentStream; - var pomDir = Path.GetDirectoryName(pomFile.Location); - var depsFileName = this.mavenCommandService.BcdeMvnDependencyFileName; - var depsFilePath = Path.Combine(pomDir, depsFileName); - - // Generate dependency file using Maven CLI. - // Note: If both MvnCliComponentDetector and this detector are enabled, - // they may run Maven CLI on the same pom.xml independently. - var result = await this.mavenCommandService.GenerateDependenciesFileAsync( - processRequest, - cancellationToken); - - if (result.Success) - { - // CLI succeeded - verify deps file was generated - // Use existence check to avoid redundant I/O (file will be read during directory scan) - if (this.fileUtilityService.Exists(depsFilePath)) - { - // File reader registration is now handled in GenerateDependenciesFileAsync - Interlocked.Increment(ref cliSuccessCount); - } - else - { - // CLI reported success but deps file is missing - treat as failure - Interlocked.Increment(ref cliFailureCount); - failedDirectories.Enqueue(pomDir); - this.LogWarning($"Maven CLI succeeded but deps file not found: {depsFilePath}"); - } - } - else - { - // CLI failed - track directory for nested pom.xml scanning - Interlocked.Increment(ref cliFailureCount); - failedDirectories.Enqueue(pomDir); - - // Capture error output for later analysis - if (!string.IsNullOrWhiteSpace(result.ErrorOutput)) - { - this.mavenCliErrors.Enqueue(result.ErrorOutput); - } - } - }, - new ExecutionDataflowBlockOptions - { - CancellationToken = cancellationToken, - }); - - await this.RemoveNestedPomXmls(processRequests, parentPomDictionary, cancellationToken).ForEachAsync( - processRequest => - { - processPomFile.Post(processRequest); - }, - cancellationToken); - - processPomFile.Complete(); - await processPomFile.Completion; - - // For failed directories, scan and add all pom.xml files for static parsing - if (!failedDirectories.IsEmpty) - { - foreach (var failedDir in failedDirectories) - { - cancellationToken.ThrowIfCancellationRequested(); - var normalizedFailedDir = NormalizeDirectoryPath(failedDir); - if (parentPomDictionary.TryGetValue(normalizedFailedDir, out var staticParsingRequests)) - { - // Note: staticParsingRequests is already in parent-first order due to the sorted processing - // during dictionary building in RemoveNestedPomXmls - foreach (var request in staticParsingRequests) - { - cancellationToken.ThrowIfCancellationRequested(); - results.Enqueue(request); - } - } - } - } - - // Determine detection method based on results - this.DetermineDetectionMethod(cliSuccessCount, cliFailureCount); - - this.LogDebugWithId($"Maven CLI processing complete: {cliSuccessCount} succeeded, {cliFailureCount} failed out of {this.originalPomFiles.Count} root pom.xml files. Retrieving generated dependency graphs."); - - // Use comprehensive directory scanning after Maven CLI execution to find all generated dependency files - // This ensures we find dependency files from submodules even if Maven CLI was only run on parent pom.xml - var allGeneratedDependencyFiles = this.ComponentStreamEnumerableFactory - .GetComponentStreams( - this.CurrentScanRequest.SourceDirectory, - [this.mavenCommandService.BcdeMvnDependencyFileName], - this.CurrentScanRequest.DirectoryExclusionPredicate) - .Select(componentStream => - { - // Read and store content to avoid stream disposal issues - // Note: Cleanup coordination is handled in OnFileFoundAsync to avoid duplicate work - using var reader = new StreamReader(componentStream.Stream); - var content = reader.ReadToEnd(); - return new ProcessRequest - { - ComponentStream = new ComponentStream - { - Stream = new MemoryStream(Encoding.UTF8.GetBytes(content)), - Location = componentStream.Location, - Pattern = componentStream.Pattern, - }, - SingleFileComponentRecorder = this.ComponentRecorder.CreateSingleFileComponentRecorder( - Path.Combine(Path.GetDirectoryName(componentStream.Location), MavenManifest)), - }; - }); - - // Combine dependency files from CLI success with pom.xml files from CLI failures - return results.Concat(allGeneratedDependencyFiles).ToObservable(); - } - - /// - /// Determines the detection method based on CLI success/failure counts and analyzes any failures. - /// - /// Number of successful CLI executions. - /// Number of failed CLI executions. - private void DetermineDetectionMethod(int cliSuccessCount, int cliFailureCount) - { - if (cliFailureCount == 0 && cliSuccessCount > 0) - { - this.usedDetectionMethod = MavenDetectionMethod.MvnCliOnly; - this.LogDebugWithId("All pom.xml files processed successfully with Maven CLI."); - } - else if (cliFailureCount > 0) - { - this.usedDetectionMethod = MavenDetectionMethod.Mixed; - this.LogWarning($"Maven CLI failed for {cliFailureCount} pom.xml files. Using mixed detection."); - this.AnalyzeMvnCliFailure(); - } - } - - protected override Task OnFileFoundAsync( - ProcessRequest processRequest, - IDictionary detectorArgs, - CancellationToken cancellationToken = default) - { - var pattern = processRequest.ComponentStream.Pattern; - - if (pattern == this.mavenCommandService.BcdeMvnDependencyFileName) - { - // Process MvnCli result - this.ProcessMvnCliResult(processRequest); - } - else - { - // Process via static XML parsing - this.ProcessPomFileStatically(processRequest); - } - - return Task.CompletedTask; - } - - protected override Task OnDetectionFinishedAsync() - { - // Second pass: resolve any parent relationships that couldn't be resolved during first pass - // This handles cases where parent POM was processed after child POM - this.ResolveUnresolvedParentRelationships(); - - // Third pass: resolve all pending components with collected variables and complete hierarchy - this.ResolvePendingComponents(); - - // Record telemetry - cache string conversions - var detectionMethodStr = this.usedDetectionMethod.ToString(); - var fallbackReasonStr = this.fallbackReason.ToString(); - var mvnCliCountStr = this.mvnCliComponentCount.ToString(); - var staticCountStr = this.staticParserComponentCount.ToString(); - - this.Telemetry["DetectionMethod"] = detectionMethodStr; - this.Telemetry["FallbackReason"] = fallbackReasonStr; - this.Telemetry["MvnCliComponentCount"] = mvnCliCountStr; - this.Telemetry["StaticParserComponentCount"] = staticCountStr; - this.Telemetry["TotalComponentCount"] = (this.mvnCliComponentCount + this.staticParserComponentCount).ToString(); - this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString(); - this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString(); - this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString(); - this.Telemetry["PendingComponentCount"] = this.pendingComponentCountBeforeResolution.ToString(); - this.Telemetry["UnresolvedVariableCount"] = this.unresolvedVariableCount.ToString(); - - if (!this.failedEndpoints.IsEmpty) - { - this.Telemetry["FailedEndpoints"] = string.Join(";", this.failedEndpoints.Distinct().Take(10)); - } - - this.LogDebugWithId($"Detection completed. Method: {detectionMethodStr}, " + - $"FallbackReason: {fallbackReasonStr}, " + - $"MvnCli components: {mvnCliCountStr}, " + - $"Static parser components: {staticCountStr}"); - - return Task.CompletedTask; - } - - /// - /// Analyzes Maven CLI failure by checking logged errors for authentication issues. - /// - private void AnalyzeMvnCliFailure() - { - // Check if any recorded errors indicate authentication failure - var hasAuthError = this.mavenCliErrors.Any(IsAuthenticationError); - - if (hasAuthError) - { - this.fallbackReason = MavenFallbackReason.AuthenticationFailure; - - // Extract failed endpoints from error messages - foreach (var endpoint in this.mavenCliErrors.SelectMany(this.ExtractFailedEndpoints)) - { - this.failedEndpoints.Enqueue(endpoint); - } - - this.LogAuthErrorGuidance(); - } - else - { - this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure; - this.LogWarning("Maven CLI failed. Check Maven logs for details."); - } - } - - private void ProcessMvnCliResult(ProcessRequest processRequest) - { - this.mavenCommandService.ParseDependenciesFile(processRequest); - - // Count components registered to this specific file's recorder to avoid race conditions - // when OnFileFoundAsync runs concurrently for multiple files. - var componentsInFile = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; - Interlocked.Add(ref this.mvnCliComponentCount, componentsInFile); - } - - private void ProcessPomFileStatically(ProcessRequest processRequest) - { - var file = processRequest.ComponentStream; - var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder; - var filePath = file.Location; - - try - { - var document = new XmlDocument(); - document.Load(file.Stream); - - // Single XML parsing pass: create namespace manager once - var namespaceManager = new XmlNamespaceManager(document.NameTable); - namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace); - - // Collect variables from this document into a local dictionary first - var localVariables = new Dictionary(); - this.CollectVariablesFromDocument(document, namespaceManager, filePath, localVariables); - - // Batch add local variables to global collection for better performance - // Key format: "filePath::variableName" enables Maven hierarchy-aware lookup - if (localVariables.Count > 0) - { - var keyBuilder = new StringBuilder(filePath.Length + 64); // Pre-allocate capacity - var filePathWithSeparator = filePath + "::"; - - foreach (var (variableName, variableValue) in localVariables) - { - keyBuilder.Clear(); - keyBuilder.Append(filePathWithSeparator).Append(variableName); - var key = keyBuilder.ToString(); - - this.collectedVariables.AddOrUpdate(key, variableValue, (_, _) => variableValue); - } - - this.Logger.LogDebug("MavenWithFallback: Collected {Count} variables from {File}", localVariables.Count, Path.GetFileName(filePath)); - } - - // First pass: collect dependencies (may have unresolved variables) - var dependencyList = document.SelectNodes(DependencyNode, namespaceManager); - - foreach (XmlNode dependency in dependencyList) - { - var groupId = dependency[GroupIdSelector]?.InnerText; - var artifactId = dependency[ArtifactIdSelector]?.InnerText; - - if (groupId == null || artifactId == null) - { - continue; - } - - var version = dependency[VersionSelector]; - if (version != null && !version.InnerText.Contains(',')) - { - var versionRef = version.InnerText.Trim('[', ']'); - - if (versionRef.StartsWith("${")) - { - // Only resolve immediately if local variable exists (highest priority) - // Otherwise, defer to second pass to ensure proper hierarchy-aware resolution - var resolvedVersion = this.ResolveVersionFromLocalOnly(versionRef, localVariables); - if (!resolvedVersion.StartsWith("${")) - { - // Local variable found - resolve immediately (highest priority) - var component = new MavenComponent(groupId, artifactId, resolvedVersion); - var detectedComponent = new DetectedComponent(component); - singleFileComponentRecorder.RegisterUsage(detectedComponent); - Interlocked.Increment(ref this.staticParserComponentCount); - } - else - { - // No local variable - defer to second pass for hierarchy-aware resolution - // This ensures we consider all variable definitions before resolving - this.pendingComponents.Enqueue(new PendingComponent( - groupId, - artifactId, - versionRef, - singleFileComponentRecorder, - filePath)); - } - } - else - { - // Direct version - register immediately - var component = new MavenComponent(groupId, artifactId, versionRef); - var detectedComponent = new DetectedComponent(component); - singleFileComponentRecorder.RegisterUsage(detectedComponent); - Interlocked.Increment(ref this.staticParserComponentCount); - } - } - else - { - this.Logger.LogDebug( - "Version string for component {Group}/{Artifact} is invalid or unsupported and a component will not be recorded.", - groupId, - artifactId); - } - } - } - catch (Exception e) - { - this.Logger.LogError(e, "Failed to read file {Path}", filePath); - } - } - - /// - /// Collects all variable definitions from a POM document into the provided local dictionary. - /// Optimized to reuse XmlNamespaceManager and minimize XPath queries. - /// - /// The XML document to scan for variables. - /// Pre-configured namespace manager to reuse. - /// The file path for logging purposes. - /// Local dictionary to collect variables into. - private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, Dictionary localVariables) - { - try - { - // Query project coordinates once - used for both variable collection and project tracking - var projectGroupIdNode = document.SelectSingleNode("/proj:project/proj:groupId", namespaceManager); - var projectArtifactIdNode = document.SelectSingleNode("/proj:project/proj:artifactId", namespaceManager); - var projectVersionNode = document.SelectSingleNode("/proj:project/proj:version", namespaceManager); - - // Track this project by Maven coordinates for parent resolution (reuses queried nodes) - this.TrackMavenProjectCoordinates(document, namespaceManager, filePath, projectGroupIdNode, projectArtifactIdNode); - - // Parse Maven parent relationship to build proper hierarchy - this.ParseMavenParentRelationship(document, namespaceManager, filePath); - - // Collect properties variables from ALL properties sections (handles malformed XML with multiple ) - var propertiesNodes = document.SelectNodes("//proj:properties", namespaceManager); - if (propertiesNodes?.Count > 0) - { - if (propertiesNodes.Count > 1) - { - this.Logger.LogDebug("MavenWithFallback: Found {Count} properties sections in {File}", propertiesNodes.Count, Path.GetFileName(filePath)); - } - - foreach (XmlNode propertiesNode in propertiesNodes) - { - foreach (XmlNode propertyNode in propertiesNode.ChildNodes) - { - if (propertyNode.NodeType == XmlNodeType.Element && !string.IsNullOrWhiteSpace(propertyNode.InnerText)) - { - // Later properties sections override earlier ones (last wins - Maven behavior) - localVariables[propertyNode.Name] = propertyNode.InnerText; - } - } - } - } - - // Collect project-level variables from already-queried nodes - if (projectVersionNode != null && !string.IsNullOrWhiteSpace(projectVersionNode.InnerText)) - { - localVariables["version"] = projectVersionNode.InnerText; - localVariables["project.version"] = projectVersionNode.InnerText; - } - - if (projectGroupIdNode != null && !string.IsNullOrWhiteSpace(projectGroupIdNode.InnerText)) - { - localVariables["groupId"] = projectGroupIdNode.InnerText; - localVariables["project.groupId"] = projectGroupIdNode.InnerText; - } - - if (projectArtifactIdNode != null && !string.IsNullOrWhiteSpace(projectArtifactIdNode.InnerText)) - { - localVariables["artifactId"] = projectArtifactIdNode.InnerText; - localVariables["project.artifactId"] = projectArtifactIdNode.InnerText; - } - } - catch (Exception e) - { - this.Logger.LogError(e, "Failed to collect variables from file {Path}", filePath); - } - } - - /// - /// Parses Maven parent relationship from pom.xml to build proper inheritance hierarchy. - /// This is needed for Maven-compliant variable resolution that respects parent-child relationships. - /// - /// The XML document to parse. - /// XML namespace manager for Maven POM. - /// Current pom.xml file path. - private void ParseMavenParentRelationship(XmlDocument document, XmlNamespaceManager namespaceManager, string currentFilePath) - { - try - { - // Query parent element once and access children directly (more efficient than union XPath) - var parentNode = document.SelectSingleNode("/proj:project/proj:parent", namespaceManager); - - if (parentNode != null) - { - var parentGroupId = parentNode["groupId"]?.InnerText; - var parentArtifactId = parentNode["artifactId"]?.InnerText; - - if (!string.IsNullOrWhiteSpace(parentArtifactId)) - { - // Try to find parent pom.xml file by searching processed files for matching artifactId - // This works if parent was processed before child - var parentPath = this.FindParentPomByArtifactId(parentGroupId, parentArtifactId, currentFilePath); - if (!string.IsNullOrEmpty(parentPath)) - { - this.mavenParentChildRelationships[currentFilePath] = parentPath; - this.Logger.LogDebug( - "MavenWithFallback: Parsed parent relationship: {Child} → {Parent}", - Path.GetFileName(currentFilePath), - Path.GetFileName(parentPath)); - } - else - { - // Parent not found yet - queue for second pass resolution after all files are processed - this.unresolvedParentRelationships.Enqueue((currentFilePath, parentGroupId, parentArtifactId)); - this.Logger.LogDebug( - "MavenWithFallback: Queued unresolved parent relationship for {Child} → {ParentArtifactId}", - Path.GetFileName(currentFilePath), - parentArtifactId); - } - } - } - } - catch (Exception e) - { - this.Logger.LogError(e, "Failed to parse parent relationship from {FilePath}", currentFilePath); - } - } - - /// - /// Finds parent pom.xml file path by Maven coordinates (groupId:artifactId). - /// First searches by coordinates among processed projects, then falls back to directory traversal. - /// - /// Parent groupId to match. - /// Parent artifactId to match. - /// Current file path to start searching from. - /// Parent pom.xml file path, or empty string if not found. - private string FindParentPomByArtifactId(string parentGroupId, string parentArtifactId, string currentFilePath) - { - // Use cache to avoid repeated operations for the same file - return this.parentPomCache.GetOrAdd(currentFilePath, filePath => - { - try - { - // First, try to find by Maven coordinates (handles sibling projects) - if (!string.IsNullOrWhiteSpace(parentArtifactId)) - { - var coordinateKey = string.IsNullOrWhiteSpace(parentGroupId) - ? parentArtifactId - : $"{parentGroupId}:{parentArtifactId}"; - - if (this.processedMavenProjects.TryGetValue(coordinateKey, out var coordinateBasedPath)) - { - this.Logger.LogDebug( - "MavenWithFallback: Found parent {ParentCoordinate} at {Path} for {Child}", - coordinateKey, - Path.GetFileName(coordinateBasedPath), - Path.GetFileName(filePath)); - return coordinateBasedPath; - } - } - - // Fallback: Maven convention parent directory search - var currentDir = Path.GetDirectoryName(filePath); - var parentDir = Path.GetDirectoryName(currentDir); - - // Track visited directories to prevent infinite loops from circular directory structures - var visitedDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); - - while (!string.IsNullOrEmpty(parentDir)) - { - // Prevent infinite loops from circular directory references or file system anomalies - if (!visitedDirectories.Add(parentDir)) - { - this.Logger.LogDebug( - "MavenWithFallback: Circular directory reference detected while searching for parent POM, breaking at {Directory}", - parentDir); - break; - } - - var parentPomPath = Path.Combine(parentDir, "pom.xml"); - if (this.fileUtilityService.Exists(parentPomPath) && - !string.Equals(parentPomPath, filePath, StringComparison.OrdinalIgnoreCase)) - { - return parentPomPath; - } - - var nextParentDir = Path.GetDirectoryName(parentDir); - if (string.Equals(nextParentDir, parentDir, StringComparison.OrdinalIgnoreCase)) - { - break; // Reached file system root - } - - parentDir = nextParentDir; - } - - return string.Empty; // Not found - } - catch (Exception ex) - { - this.Logger.LogDebug(ex, "Error finding parent POM for {FilePath}", Path.GetFileName(filePath)); - return string.Empty; - } - }); - } - - /// - /// Tracks a Maven project by its coordinates to enable coordinate-based parent resolution. - /// - /// The XML document to parse. - /// XML namespace manager for Maven POM. - /// Current pom.xml file path. - /// Pre-queried groupId node (can be null). - /// Pre-queried artifactId node (can be null). - private void TrackMavenProjectCoordinates(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, XmlNode groupIdNode, XmlNode artifactIdNode) - { - try - { - // If project doesn't have its own groupId, try to get it from parent - groupIdNode ??= document.SelectSingleNode("/proj:project/proj:parent/proj:groupId", namespaceManager); - - if (artifactIdNode != null && !string.IsNullOrWhiteSpace(artifactIdNode.InnerText)) - { - var groupId = groupIdNode?.InnerText; - var artifactId = artifactIdNode.InnerText; - - // Store with both artifactId-only and groupId:artifactId keys for flexible lookup - this.processedMavenProjects.TryAdd(artifactId, filePath); - if (!string.IsNullOrWhiteSpace(groupId)) - { - this.processedMavenProjects.TryAdd($"{groupId}:{artifactId}", filePath); - } - - this.Logger.LogDebug( - "MavenWithFallback: Tracked project {GroupId}:{ArtifactId} at {Path}", - groupId ?? "(inherited)", - artifactId, - Path.GetFileName(filePath)); - } - } - catch (Exception e) - { - this.Logger.LogDebug(e, "Failed to track Maven project coordinates from {Path}", filePath); - } - } - - /// - /// Resolves a version template using only local variables from the current file. - /// This ensures immediate resolution only when the variable is defined in the same file (highest priority). - /// - /// The version template with variables (e.g., "${revision}"). - /// Local variables from the current file. - /// The resolved version string, or the original template if local variable not found. - private string ResolveVersionFromLocalOnly(string versionTemplate, Dictionary localVariables) - { - var resolvedVersion = versionTemplate; - var match = VersionRegex.Match(versionTemplate); - - if (match.Success) - { - var variable = match.Groups[1].Captures[0].ToString(); - - // Only check local variables (same file priority) - if (localVariables.TryGetValue(variable, out var localReplacement)) - { - resolvedVersion = versionTemplate.Replace("${" + variable + "}", localReplacement); - } - } - - return resolvedVersion; - } - - private IEnumerable ExtractFailedEndpoints(string errorMessage) - { - if (string.IsNullOrWhiteSpace(errorMessage)) - { - return []; - } - - return EndpointRegex.Matches(errorMessage) - .Select(m => m.Value) - .Distinct(); - } - - private void LogAuthErrorGuidance() - { - var guidance = new StringBuilder(); - guidance.AppendLine("Maven CLI failed with authentication errors."); - - if (!this.failedEndpoints.IsEmpty) - { - guidance.AppendLine("The following Maven repository endpoints had authentication failures:"); - foreach (var endpoint in this.failedEndpoints.Distinct().Take(5)) - { - guidance.AppendLine($" - {endpoint}"); - } - - guidance.AppendLine(" Ensure your pipeline has access to these Maven repositories."); - } - - guidance.AppendLine("Note: Falling back to static pom.xml parsing."); - - this.LogWarning(guidance.ToString()); - } - - /// - /// Resolves parent relationships that couldn't be established during first pass. - /// This handles cases where the parent POM was processed after the child POM. - /// - private void ResolveUnresolvedParentRelationships() - { - var resolvedCount = 0; - var unresolvedCount = 0; - - while (this.unresolvedParentRelationships.TryDequeue(out var unresolvedRelationship)) - { - var (filePath, parentGroupId, parentArtifactId) = unresolvedRelationship; - - // Skip if already resolved (could happen if resolved via directory traversal during first pass) - if (this.mavenParentChildRelationships.ContainsKey(filePath)) - { - continue; - } - - // Clear the cache entry so we can try again with the now-complete processedMavenProjects - this.parentPomCache.TryRemove(filePath, out _); - - // Try to find parent by coordinates now that all files have been processed - var parentPath = this.FindParentPomByCoordinatesOnly(parentGroupId, parentArtifactId, filePath); - if (!string.IsNullOrEmpty(parentPath)) - { - this.mavenParentChildRelationships[filePath] = parentPath; - resolvedCount++; - this.Logger.LogDebug( - "MavenWithFallback: Resolved deferred parent relationship: {Child} → {Parent}", - Path.GetFileName(filePath), - Path.GetFileName(parentPath)); - } - else - { - unresolvedCount++; - this.Logger.LogDebug( - "MavenWithFallback: Could not resolve parent {ParentGroupId}:{ParentArtifactId} for {Child}", - parentGroupId ?? "(null)", - parentArtifactId, - Path.GetFileName(filePath)); - } - } - - if (resolvedCount > 0 || unresolvedCount > 0) - { - this.LogDebugWithId($"Second pass (parent resolution) completed: {resolvedCount} deferred parent relationships resolved, {unresolvedCount} remain unresolved"); - } - } - - /// - /// Finds parent POM by Maven coordinates only (no directory traversal). - /// Used for deferred parent resolution after all files have been processed. - /// - private string FindParentPomByCoordinatesOnly(string parentGroupId, string parentArtifactId, string currentFilePath) - { - if (string.IsNullOrWhiteSpace(parentArtifactId)) - { - return string.Empty; - } - - // Try with full coordinates first - if (!string.IsNullOrWhiteSpace(parentGroupId)) - { - var fullCoordinateKey = $"{parentGroupId}:{parentArtifactId}"; - if (this.processedMavenProjects.TryGetValue(fullCoordinateKey, out var fullCoordinatePath) && - !string.Equals(fullCoordinatePath, currentFilePath, StringComparison.OrdinalIgnoreCase)) - { - return fullCoordinatePath; - } - } - - // Try with artifactId only - if (this.processedMavenProjects.TryGetValue(parentArtifactId, out var artifactIdPath) && - !string.Equals(artifactIdPath, currentFilePath, StringComparison.OrdinalIgnoreCase)) - { - return artifactIdPath; - } - - return string.Empty; - } - - /// - /// Third pass: resolve all pending components using hierarchy-aware variable resolution. - /// For components with unresolved variables, this picks the closest ancestor definition - /// based on Maven's property inheritance rules (child > parent precedence). - /// - private void ResolvePendingComponents() - { - // Capture count before draining for accurate telemetry - this.pendingComponentCountBeforeResolution = this.pendingComponents.Count; - - var resolvedCount = 0; - var skippedCount = 0; - - while (this.pendingComponents.TryDequeue(out var pendingComponent)) - { - try - { - var resolvedVersion = this.ResolveVersionWithHierarchyAwareness(pendingComponent.VersionTemplate, pendingComponent.FilePath); - if (!resolvedVersion.StartsWith("${")) - { - var component = new MavenComponent(pendingComponent.GroupId, pendingComponent.ArtifactId, resolvedVersion); - var detectedComponent = new DetectedComponent(component); - pendingComponent.Recorder.RegisterUsage(detectedComponent); - Interlocked.Increment(ref this.staticParserComponentCount); - resolvedCount++; - } - else - { - skippedCount++; - this.Logger.LogDebug( - "Version string {Version} for component {Group}/{Artifact} could not be resolved and a component will not be recorded. File: {File}", - resolvedVersion, - pendingComponent.GroupId, - pendingComponent.ArtifactId, - pendingComponent.FilePath); - } - } - catch (Exception e) - { - skippedCount++; - this.Logger.LogError( - e, - "Failed to resolve pending component {Group}/{Artifact} from {File}", - pendingComponent.GroupId, - pendingComponent.ArtifactId, - pendingComponent.FilePath); - } - } - - this.LogDebugWithId($"Third pass (variable resolution) completed: {resolvedCount} components resolved, {skippedCount} skipped due to unresolved variables"); - } - - /// - /// Resolves a version template with hierarchy-aware precedence. - /// When multiple variable definitions exist, picks the closest ancestor to the requesting file. - /// This implements Maven's property inheritance rule: child properties take precedence over parent properties. - /// - /// The version template with variables (e.g., "${revision}"). - /// The file path of the POM requesting the variable resolution. - /// The resolved version string, or the original template if variables cannot be resolved. - private string ResolveVersionWithHierarchyAwareness(string versionTemplate, string requestingFilePath) - { - var resolvedVersion = versionTemplate; - var match = VersionRegex.Match(versionTemplate); - - if (match.Success) - { - var variable = match.Groups[1].Captures[0].ToString(); - - // Use Maven-compliant hierarchy search: current → parent → grandparent - var foundValue = this.FindVariableInMavenHierarchy(variable, requestingFilePath); - if (foundValue != null) - { - resolvedVersion = versionTemplate.Replace("${" + variable + "}", foundValue.Value.Value); - } - else - { - // Variable not found in Maven hierarchy - log at debug level since unresolved - // properties are common (profiles, external parents, etc.) and aggregate count in telemetry - Interlocked.Increment(ref this.unresolvedVariableCount); - this.Logger.LogDebug( - "{DetectorId}: Variable {Variable} not found in Maven hierarchy for {File}", - this.Id, - variable, - Path.GetFileName(requestingFilePath)); - } - } - - return resolvedVersion; - } - - /// - /// Finds a variable value using Maven-compliant hierarchy search. - /// Searches in order: current file → parent → grandparent (stops at first match). - /// - /// Variable name to find. - /// The pom.xml file requesting the variable. - /// Variable value and source file, or null if not found in hierarchy. - private (string Value, string SourceFile)? FindVariableInMavenHierarchy(string variable, string requestingFilePath) - { - var currentFile = requestingFilePath; - var visitedFiles = new HashSet(StringComparer.OrdinalIgnoreCase); - var keyBuilder = new StringBuilder(256); // Pre-allocate for typical path lengths - - // Walk up Maven parent hierarchy until variable found or no more parents - while (!string.IsNullOrEmpty(currentFile)) - { - // Prevent infinite loops from circular parent references - if (!visitedFiles.Add(currentFile)) - { - this.Logger.LogDebug( - "{DetectorId}: Circular parent reference detected while resolving variable {Variable}, breaking at {File}", - this.Id, - variable, - Path.GetFileName(currentFile)); - break; - } - - // Check if this file has the variable definition using StringBuilder for efficiency - keyBuilder.Clear(); - keyBuilder.Append(currentFile).Append("::").Append(variable); - var variableKey = keyBuilder.ToString(); - - if (this.collectedVariables.TryGetValue(variableKey, out var value)) - { - return (value, currentFile); - } - - // Move to Maven parent (not directory parent) - this.mavenParentChildRelationships.TryGetValue(currentFile, out currentFile); - } - - return null; // Variable not found in Maven hierarchy - } - - /// - /// Filters out nested pom.xml files, keeping only root-level ones. - /// A pom.xml is considered nested if there's another pom.xml in a parent directory. - /// - /// The incoming process requests for pom.xml files. - /// Dictionary to populate with nested pom.xml mappings for fallback scenarios. - /// Cancellation token for the operation. - /// Process requests for only root-level pom.xml files. - private IObservable RemoveNestedPomXmls( - IObservable componentStreams, - ConcurrentDictionary> parentPomDictionary, - CancellationToken cancellationToken) - { - return componentStreams - .ToList() - .SelectMany(allRequests => - { - cancellationToken.ThrowIfCancellationRequested(); - - // Sort all requests by path depth (parent-first) to ensure deterministic processing order. - // This is critical for fallback static parsing where parent POMs must be processed before children - // to ensure proper property resolution and inheritance. - var sortedRequests = allRequests - .OrderBy(r => NormalizeDirectoryPath(Path.GetDirectoryName(r.ComponentStream.Location)).Length) - .ThenBy(r => r.ComponentStream.Location, StringComparer.OrdinalIgnoreCase) - .ToList(); - - // Use a HashSet of root directories for O(1) lookup instead of O(n) list iteration - var rootPomDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); - var filteredRequests = new List(); - - foreach (var request in sortedRequests) - { - cancellationToken.ThrowIfCancellationRequested(); - - var location = NormalizeDirectoryPath(Path.GetDirectoryName(request.ComponentStream.Location)); - - // Check if any ancestor directory is already a root POM directory - // Walk up the directory tree (O(depth) instead of O(n)) - var isNested = false; - var parentDir = Path.GetDirectoryName(location.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar)); - - while (!string.IsNullOrEmpty(parentDir)) - { - var normalizedParent = NormalizeDirectoryPath(parentDir); - if (rootPomDirectories.Contains(normalizedParent)) - { - this.LogDebugWithId($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {normalizedParent}."); - isNested = true; - parentPomDictionary.AddOrUpdate( - normalizedParent, - [request], - (key, existingList) => - { - existingList.Add(request); - return existingList; - }); - break; - } - - var nextParent = Path.GetDirectoryName(parentDir); - if (string.Equals(nextParent, parentDir, StringComparison.OrdinalIgnoreCase)) - { - break; // Reached root - } - - parentDir = nextParent; - } - - if (!isNested) - { - this.LogDebugWithId($"Discovered {request.ComponentStream.Location}."); - rootPomDirectories.Add(location); - parentPomDictionary.AddOrUpdate( - location, - [request], - (key, existingList) => - { - existingList.Add(request); - return existingList; - }); - filteredRequests.Add(request); - } - } - - return filteredRequests; - }); - } -} diff --git a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs index 10d3d112d..264b740ea 100644 --- a/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/maven/MvnCliComponentDetector.cs @@ -8,30 +8,158 @@ namespace Microsoft.ComponentDetection.Detectors.Maven; using System.Linq; using System.Reactive.Linq; using System.Text; +using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using System.Threading.Tasks.Dataflow; +using System.Xml; using Microsoft.ComponentDetection.Common; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Contracts.Internal; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.Extensions.Logging; +/// +/// Enum representing which detection method was used. +/// +internal enum MavenDetectionMethod +{ + /// No detection performed. + None, + + /// MvnCli was used successfully for all files. + MvnCliOnly, + + /// Static parser was used for all files (MvnCli not available or failed completely). + StaticParserOnly, + + /// MvnCli succeeded for some files, static parser used for failed files. + Mixed, +} + +/// +/// Enum representing why fallback occurred. +/// +internal enum MavenFallbackReason +{ + /// No fallback was needed. + None, + + /// Maven CLI was explicitly disabled via the CD_MAVEN_DISABLE_CLI environment variable. + MvnCliDisabledByUser, + + /// Maven CLI was not available in PATH. + MavenCliNotAvailable, + + /// MvnCli failed due to authentication error (401/403). + AuthenticationFailure, + + /// MvnCli failed due to other reasons. + OtherMvnCliFailure, +} + +/// +/// Maven detector that combines MvnCli detection with static pom.xml parsing fallback. +/// Runs MvnCli detection first, then checks if detection produced any results. +/// If MvnCli fails for any pom.xml, falls back to static parsing for failed files. +/// public class MvnCliComponentDetector : FileComponentDetector { + /// + /// Environment variable to disable MvnCli and use only static pom.xml parsing. + /// Set to "true" to disable MvnCli detection. + /// Usage: Set CD_MAVEN_DISABLE_CLI=true as a pipeline/environment variable. + /// + internal const string DisableMvnCliEnvVar = "CD_MAVEN_DISABLE_CLI"; + private const string MavenManifest = "pom.xml"; + private const string MavenXmlNamespace = "http://maven.apache.org/POM/4.0.0"; + private const string ProjNamespace = "proj"; + private const string DependencyNode = "//proj:dependency"; + + private const string GroupIdSelector = "groupId"; + private const string ArtifactIdSelector = "artifactId"; + private const string VersionSelector = "version"; + + private static readonly Regex VersionRegex = new( + @"^\$\{(.*)\}$", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + // Auth error patterns to detect in Maven error output + private static readonly string[] AuthErrorPatterns = + [ + "401", + "403", + "Unauthorized", + "Access denied", + ]; + + // Pattern to initially extract URLs from Maven error messages. + // Matched values are subsequently normalized (scheme+host+port only) before + // being stored in logs or telemetry to avoid leaking credentials or tokens. + private static readonly Regex EndpointRegex = new( + @"https?://[^\s\]\)>]+", + RegexOptions.Compiled | RegexOptions.IgnoreCase); + + /// + /// Maximum time allowed for the OnPrepareDetectionAsync phase. + /// This is a safety guardrail to prevent hangs. + /// Most repos should complete the full Maven CLI scan within this window. + /// + private static readonly TimeSpan PrepareDetectionTimeout = TimeSpan.FromMinutes(5); private readonly IMavenCommandService mavenCommandService; + private readonly IEnvironmentVariableService envVarService; + private readonly IFileUtilityService fileUtilityService; + + // Two-pass static parsing: collect variables first, then resolve components + private readonly ConcurrentDictionary collectedVariables = new(); + private readonly ConcurrentQueue pendingComponents = new(); + + // Track Maven parent-child relationships for proper variable resolution + private readonly ConcurrentDictionary mavenParentChildRelationships = new(); + + // Track processed Maven projects by coordinates (groupId:artifactId -> file path) + private readonly ConcurrentDictionary processedMavenProjects = new(); + + // Track files that couldn't establish parent relationships during first pass (for second pass re-evaluation) + private readonly ConcurrentQueue<(string FilePath, string ParentGroupId, string ParentArtifactId)> unresolvedParentRelationships = new(); + + // Track original pom.xml files for potential fallback + private readonly ConcurrentQueue originalPomFiles = []; + + // Track Maven CLI errors for analysis + private readonly ConcurrentQueue mavenCliErrors = []; + private readonly ConcurrentQueue failedEndpoints = []; + + /// + /// Cache for parent POM lookups to avoid repeated file system operations. + /// Key: current file path, Value: parent POM path or empty string if not found. + /// + private readonly ConcurrentDictionary parentPomCache = new(); + + // Telemetry tracking + private MavenDetectionMethod usedDetectionMethod = MavenDetectionMethod.None; + private MavenFallbackReason fallbackReason = MavenFallbackReason.None; + private int mvnCliComponentCount; + private int staticParserComponentCount; + private int unresolvedVariableCount; + private int pendingComponentCountBeforeResolution; + private bool mavenCliAvailable; public MvnCliComponentDetector( IComponentStreamEnumerableFactory componentStreamEnumerableFactory, IObservableDirectoryWalkerFactory walkerFactory, IMavenCommandService mavenCommandService, + IEnvironmentVariableService envVarService, + IFileUtilityService fileUtilityService, ILogger logger) { this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory; this.Scanner = walkerFactory; this.mavenCommandService = mavenCommandService; + this.envVarService = envVarService; + this.fileUtilityService = fileUtilityService; this.Logger = logger; } @@ -41,42 +169,352 @@ public MvnCliComponentDetector( public override IEnumerable SupportedComponentTypes => [ComponentType.Maven]; - public override int Version => 4; + public override int Version => 5; public override IEnumerable Categories => [Enum.GetName(typeof(DetectorClass), DetectorClass.Maven)]; + // Normalizes a directory path by ensuring it ends with a directory separator. + // This prevents false matches like "C:\foo" matching "C:\foobar". + private static string NormalizeDirectoryPath(string path) + { + if (string.IsNullOrEmpty(path)) + { + return path; + } + + var lastChar = path[^1]; + return lastChar == Path.DirectorySeparatorChar || lastChar == Path.AltDirectorySeparatorChar + ? path + : path + Path.DirectorySeparatorChar; + } + + private static bool IsAuthenticationError(string errorMessage) + { + if (string.IsNullOrWhiteSpace(errorMessage)) + { + return false; + } + + // Use ReadOnlySpan for more efficient string searching + var messageSpan = errorMessage.AsSpan(); + foreach (var pattern in AuthErrorPatterns) + { + if (messageSpan.Contains(pattern, StringComparison.OrdinalIgnoreCase)) + { + return true; + } + } + + return false; + } + + /// + /// Normalizes a raw URL string to scheme+host+port only, stripping any + /// userinfo (credentials), path, query string, and fragment that may + /// appear in Maven error messages and could contain sensitive tokens. + /// Returns when the input is not a well-formed + /// absolute URI with an http/https scheme. + /// + private static string NormalizeEndpointUrl(string rawUrl) + { + if (!Uri.TryCreate(rawUrl, UriKind.Absolute, out var uri)) + { + return null; + } + + // Only accept http/https — the regex already enforces this but be explicit. + if (uri.Scheme is not "http" and not "https") + { + return null; + } + + // Reconstruct scheme://host[:port] explicitly, omitting UserInfo (credentials), + // path, query, and fragment. Uri.GetLeftPart(UriPartial.Authority) preserves + // UserInfo, so we cannot use it here. + var port = uri.IsDefaultPort ? string.Empty : $":{uri.Port}"; + return $"{uri.Scheme}://{uri.Host}{port}"; + } + private void LogDebugWithId(string message) => this.Logger.LogDebug("{DetectorId}: {Message}", this.Id, message); - protected override async Task> OnPrepareDetectionAsync(IObservable processRequests, IDictionary detectorArgs, CancellationToken cancellationToken = default) + private void LogWarning(string message) => + this.Logger.LogWarning("{DetectorId}: {Message}", this.Id, message); + + /// + /// Resets all per-scan state to prevent stale data from leaking between scans. + /// This is critical because detectors are registered as singletons. + /// + private void ResetScanState() { - if (!await this.mavenCommandService.MavenCLIExistsAsync()) + // Clear all concurrent collections + this.collectedVariables.Clear(); + this.mavenParentChildRelationships.Clear(); + this.processedMavenProjects.Clear(); + this.parentPomCache.Clear(); + + // Drain all concurrent queues + while (this.pendingComponents.TryDequeue(out _)) { - this.LogDebugWithId("Skipping maven detection as maven is not available in the local PATH."); - return Enumerable.Empty().ToObservable(); + // Intentionally empty - just draining the queue } - var processPomFile = new ActionBlock(x => this.mavenCommandService.GenerateDependenciesFileAsync(x, cancellationToken)); + while (this.unresolvedParentRelationships.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } - await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest => + while (this.originalPomFiles.TryDequeue(out _)) { - processPomFile.Post(processRequest); - }); + // Intentionally empty - just draining the queue + } - processPomFile.Complete(); + while (this.mavenCliErrors.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + while (this.failedEndpoints.TryDequeue(out _)) + { + // Intentionally empty - just draining the queue + } + + // Reset telemetry counters and flags + this.usedDetectionMethod = MavenDetectionMethod.None; + this.fallbackReason = MavenFallbackReason.None; + this.mvnCliComponentCount = 0; + this.staticParserComponentCount = 0; + this.unresolvedVariableCount = 0; + this.pendingComponentCountBeforeResolution = 0; + this.mavenCliAvailable = false; + } + + protected override async Task> OnPrepareDetectionAsync( + IObservable processRequests, + IDictionary detectorArgs, + CancellationToken cancellationToken = default) + { + // Reset all per-scan state to prevent stale data from previous scans + // This is critical because detectors are registered as singletons + this.ResetScanState(); + + // Wrap the entire method in a try-catch with timeout to protect against hangs. + // OnPrepareDetectionAsync doesn't have the same guardrails as OnFileFoundAsync, + // so we need to be extra careful here. + try + { + using var timeoutCts = new CancellationTokenSource(PrepareDetectionTimeout); + using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token); + + return await this.OnPrepareDetectionCoreAsync(processRequests, linkedCts.Token); + } + catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested) + { + // Timeout occurred (not user cancellation) + this.LogWarning($"OnPrepareDetectionAsync timed out after {PrepareDetectionTimeout.TotalMinutes} minutes. Falling back to static pom.xml parsing."); + this.Telemetry["TimedOut"] = "true"; + this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure; + this.usedDetectionMethod = MavenDetectionMethod.Mixed; + return processRequests; + } + catch (Exception ex) + { + // Unexpected error - log and fall back to static parsing + this.LogWarning($"OnPrepareDetectionAsync failed with unexpected error: {ex.Message}. Falling back to static pom.xml parsing."); + this.Telemetry["PrepareDetectionError"] = ex.GetType().Name; + this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure; + this.usedDetectionMethod = MavenDetectionMethod.Mixed; + return processRequests; + } + } + + /// + /// Core implementation of OnPrepareDetectionAsync, called within the timeout wrapper. + /// + private async Task> OnPrepareDetectionCoreAsync( + IObservable processRequests, + CancellationToken cancellationToken) + { + // Check if we should skip Maven CLI and use static parsing only + if (this.ShouldSkipMavenCli()) + { + return processRequests; + } + + // Check if Maven CLI is available + if (!await this.TryInitializeMavenCliAsync()) + { + return processRequests; + } + + // Create per-scan dictionary to track nested pom.xml mappings + // This prevents state accumulation across scans since detectors are singletons + var parentPomDictionary = new ConcurrentDictionary>(StringComparer.OrdinalIgnoreCase); + + // Run Maven CLI detection on all pom.xml files + // Returns deps files for CLI successes, pom.xml files for CLI failures + return await this.RunMavenCliDetectionAsync(processRequests, parentPomDictionary, cancellationToken); + } + + /// + /// Checks if Maven CLI should be skipped due to environment variable configuration. + /// + /// True if Maven CLI should be skipped; otherwise, false. + private bool ShouldSkipMavenCli() + { + if (this.envVarService.IsEnvironmentVariableValueTrue(DisableMvnCliEnvVar)) + { + this.LogDebugWithId($"MvnCli detection disabled via {DisableMvnCliEnvVar} environment variable. Using static pom.xml parsing only."); + this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; + this.fallbackReason = MavenFallbackReason.MvnCliDisabledByUser; + this.mavenCliAvailable = false; + return true; + } + + return false; + } + + /// + /// Checks if Maven CLI is available. + /// + /// True if Maven CLI is available; otherwise, false. + private async Task TryInitializeMavenCliAsync() + { + this.mavenCliAvailable = await this.mavenCommandService.MavenCLIExistsAsync(); + + if (!this.mavenCliAvailable) + { + this.LogDebugWithId("Maven CLI not found in PATH. Will use static pom.xml parsing only."); + this.usedDetectionMethod = MavenDetectionMethod.StaticParserOnly; + this.fallbackReason = MavenFallbackReason.MavenCliNotAvailable; + return false; + } + + this.LogDebugWithId("Maven CLI is available. Running MvnCli detection."); + return true; + } + + /// + /// Runs Maven CLI detection on all root pom.xml files. + /// For each pom.xml, if CLI succeeds, the deps file is added to results. + /// If CLI fails, all pom.xml files under that directory are added for static parsing fallback. + /// + /// The incoming process requests. + /// Dictionary to track nested pom.xml mappings for fallback scenarios. + /// Cancellation token for the operation. + /// An observable of process requests (deps files for CLI success, pom.xml for CLI failure). + private async Task> RunMavenCliDetectionAsync( + IObservable processRequests, + ConcurrentDictionary> parentPomDictionary, + CancellationToken cancellationToken) + { + var results = new ConcurrentQueue(); + var failedDirectories = new ConcurrentQueue(); + var cliSuccessCount = 0; + var cliFailureCount = 0; + + // Process pom.xml files sequentially to avoid Maven local repository lock contention and + // reduces memory pressure from concurrent Maven JVM processes. + var processPomFile = new ActionBlock( + async processRequest => + { + // Check for cancellation before processing each pom.xml + cancellationToken.ThrowIfCancellationRequested(); + + // Store original pom.xml for telemetry + this.originalPomFiles.Enqueue(processRequest); + + var pomFile = processRequest.ComponentStream; + var pomDir = Path.GetDirectoryName(pomFile.Location); + var depsFileName = this.mavenCommandService.BcdeMvnDependencyFileName; + var depsFilePath = Path.Combine(pomDir, depsFileName); + // Generate dependency file using Maven CLI. + var result = await this.mavenCommandService.GenerateDependenciesFileAsync( + processRequest, + cancellationToken); + + if (result.Success) + { + // CLI succeeded - verify deps file was generated + // Use existence check to avoid redundant I/O (file will be read during directory scan) + if (this.fileUtilityService.Exists(depsFilePath)) + { + Interlocked.Increment(ref cliSuccessCount); + } + else + { + // CLI reported success but deps file is missing - treat as failure + Interlocked.Increment(ref cliFailureCount); + failedDirectories.Enqueue(pomDir); + this.LogWarning($"Maven CLI succeeded but deps file not found: {depsFilePath}"); + } + } + else + { + // CLI failed - track directory for nested pom.xml scanning + Interlocked.Increment(ref cliFailureCount); + failedDirectories.Enqueue(pomDir); + + // Capture error output for later analysis + if (!string.IsNullOrWhiteSpace(result.ErrorOutput)) + { + this.mavenCliErrors.Enqueue(result.ErrorOutput); + } + } + }, + new ExecutionDataflowBlockOptions + { + CancellationToken = cancellationToken, + }); + + await this.RemoveNestedPomXmls(processRequests, parentPomDictionary, cancellationToken).ForEachAsync( + processRequest => + { + processPomFile.Post(processRequest); + }, + cancellationToken); + + processPomFile.Complete(); await processPomFile.Completion; - this.LogDebugWithId($"Nested {MavenManifest} files processed successfully, retrieving generated dependency graphs."); + // For failed directories, scan and add all pom.xml files for static parsing + if (!failedDirectories.IsEmpty) + { + foreach (var failedDir in failedDirectories) + { + cancellationToken.ThrowIfCancellationRequested(); + var normalizedFailedDir = NormalizeDirectoryPath(failedDir); + if (parentPomDictionary.TryGetValue(normalizedFailedDir, out var staticParsingRequests)) + { + // Note: staticParsingRequests is already in parent-first order due to the sorted processing + // during dictionary building in RemoveNestedPomXmls + foreach (var request in staticParsingRequests) + { + cancellationToken.ThrowIfCancellationRequested(); + results.Enqueue(request); + } + } + } + } - return this.ComponentStreamEnumerableFactory.GetComponentStreams(this.CurrentScanRequest.SourceDirectory, [this.mavenCommandService.BcdeMvnDependencyFileName], this.CurrentScanRequest.DirectoryExclusionPredicate) + // Determine detection method based on results + this.DetermineDetectionMethod(cliSuccessCount, cliFailureCount); + + this.LogDebugWithId($"Maven CLI processing complete: {cliSuccessCount} succeeded, {cliFailureCount} failed out of {this.originalPomFiles.Count} root pom.xml files. Retrieving generated dependency graphs."); + + // Use comprehensive directory scanning after Maven CLI execution to find all generated dependency files + // This ensures we find dependency files from submodules even if Maven CLI was only run on parent pom.xml + var allGeneratedDependencyFiles = this.ComponentStreamEnumerableFactory + .GetComponentStreams( + this.CurrentScanRequest.SourceDirectory, + [this.mavenCommandService.BcdeMvnDependencyFileName], + this.CurrentScanRequest.DirectoryExclusionPredicate) .Select(componentStream => { - // The file stream is going to be disposed after the iteration is finished - // so is necessary to read the content and keep it in memory, for further processing. + // Read and store content to avoid stream disposal issues using var reader = new StreamReader(componentStream.Stream); var content = reader.ReadToEnd(); - return new ProcessRequest { ComponentStream = new ComponentStream @@ -88,78 +526,856 @@ await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest => SingleFileComponentRecorder = this.ComponentRecorder.CreateSingleFileComponentRecorder( Path.Combine(Path.GetDirectoryName(componentStream.Location), MavenManifest)), }; - }) - .ToObservable(); + }); + + // Combine dependency files from CLI success with pom.xml files from CLI failures + return results.Concat(allGeneratedDependencyFiles).ToObservable(); + } + + /// + /// Determines the detection method based on CLI success/failure counts and analyzes any failures. + /// + /// Number of successful CLI executions. + /// Number of failed CLI executions. + private void DetermineDetectionMethod(int cliSuccessCount, int cliFailureCount) + { + if (cliFailureCount == 0 && cliSuccessCount > 0) + { + this.usedDetectionMethod = MavenDetectionMethod.MvnCliOnly; + this.LogDebugWithId("All pom.xml files processed successfully with Maven CLI."); + } + else if (cliFailureCount > 0) + { + this.usedDetectionMethod = MavenDetectionMethod.Mixed; + this.LogWarning($"Maven CLI failed for {cliFailureCount} pom.xml files. Using mixed detection."); + this.AnalyzeMvnCliFailure(); + } + } + + protected override Task OnFileFoundAsync( + ProcessRequest processRequest, + IDictionary detectorArgs, + CancellationToken cancellationToken = default) + { + var pattern = processRequest.ComponentStream.Pattern; + + if (pattern == this.mavenCommandService.BcdeMvnDependencyFileName) + { + // Process MvnCli result + this.ProcessMvnCliResult(processRequest); + + // Delete the deps file now that its content has been consumed (was read into MemoryStream during prepare phase) + if (this.CurrentScanRequest?.CleanupCreatedFiles == true) + { + var filePath = processRequest.ComponentStream.Location; + try + { + this.fileUtilityService.Delete(filePath); + this.Logger.LogDebug("Cleaned up Maven deps file {File}", filePath); + } + catch (Exception e) + { + this.Logger.LogDebug(e, "Failed to delete Maven deps file {File}", filePath); + } + } + } + else + { + // Process via static XML parsing + this.ProcessPomFileStatically(processRequest); + } + + return Task.CompletedTask; + } + + protected override Task OnDetectionFinishedAsync() + { + // Second pass: resolve any parent relationships that couldn't be resolved during first pass + // This handles cases where parent POM was processed after child POM + this.ResolveUnresolvedParentRelationships(); + + // Third pass: resolve all pending components with collected variables and complete hierarchy + this.ResolvePendingComponents(); + + // Record telemetry - cache string conversions + var detectionMethodStr = this.usedDetectionMethod.ToString(); + var fallbackReasonStr = this.fallbackReason.ToString(); + var mvnCliCountStr = this.mvnCliComponentCount.ToString(); + var staticCountStr = this.staticParserComponentCount.ToString(); + + this.Telemetry["DetectionMethod"] = detectionMethodStr; + this.Telemetry["FallbackReason"] = fallbackReasonStr; + this.Telemetry["MvnCliComponentCount"] = mvnCliCountStr; + this.Telemetry["StaticParserComponentCount"] = staticCountStr; + this.Telemetry["TotalComponentCount"] = (this.mvnCliComponentCount + this.staticParserComponentCount).ToString(); + this.Telemetry["MavenCliAvailable"] = this.mavenCliAvailable.ToString(); + this.Telemetry["OriginalPomFileCount"] = this.originalPomFiles.Count.ToString(); + this.Telemetry["CollectedVariableCount"] = this.collectedVariables.Count.ToString(); + this.Telemetry["PendingComponentCount"] = this.pendingComponentCountBeforeResolution.ToString(); + this.Telemetry["UnresolvedVariableCount"] = this.unresolvedVariableCount.ToString(); + + if (!this.failedEndpoints.IsEmpty) + { + this.Telemetry["FailedEndpoints"] = string.Join(";", this.failedEndpoints.Distinct().Take(10)); + } + + this.LogDebugWithId($"Detection completed. Method: {detectionMethodStr}, " + + $"FallbackReason: {fallbackReasonStr}, " + + $"MvnCli components: {mvnCliCountStr}, " + + $"Static parser components: {staticCountStr}"); + + return Task.CompletedTask; } - protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default) + /// + /// Analyzes Maven CLI failure by checking logged errors for authentication issues. + /// + private void AnalyzeMvnCliFailure() { - var depsFilePath = processRequest.ComponentStream.Location; - this.LogDebugWithId($"OnFileFoundAsync: Processing {depsFilePath}"); + // Check if any recorded errors indicate authentication failure + var hasAuthError = this.mavenCliErrors.Any(IsAuthenticationError); - var componentsBefore = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; + if (hasAuthError) + { + this.fallbackReason = MavenFallbackReason.AuthenticationFailure; + + // Extract failed endpoints from error messages + foreach (var endpoint in this.mavenCliErrors.SelectMany(this.ExtractFailedEndpoints)) + { + this.failedEndpoints.Enqueue(endpoint); + } + + this.LogAuthErrorGuidance(); + } + else + { + this.fallbackReason = MavenFallbackReason.OtherMvnCliFailure; + this.LogWarning("Maven CLI failed. Check Maven logs for details."); + } + } + + private void ProcessMvnCliResult(ProcessRequest processRequest) + { this.mavenCommandService.ParseDependenciesFile(processRequest); - var componentsAfter = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; - this.LogDebugWithId($"OnFileFoundAsync: {depsFilePath} contributed {componentsAfter - componentsBefore} components"); - await Task.CompletedTask; + // Count components registered to this specific file's recorder to avoid race conditions + // when OnFileFoundAsync runs concurrently for multiple files. + var componentsInFile = processRequest.SingleFileComponentRecorder.GetDetectedComponents().Count; + Interlocked.Add(ref this.mvnCliComponentCount, componentsInFile); } - private IObservable RemoveNestedPomXmls(IObservable componentStreams) + private void ProcessPomFileStatically(ProcessRequest processRequest) { - var directoryItemFacades = new ConcurrentDictionary(StringComparer.OrdinalIgnoreCase); - var topLevelDirectories = new ConcurrentDictionary(StringComparer.OrdinalIgnoreCase); + var file = processRequest.ComponentStream; + var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder; + var filePath = file.Location; - return Observable.Create(s => + try { - return componentStreams.Subscribe( - processRequest => + var document = new XmlDocument(); + document.Load(file.Stream); + + // Single XML parsing pass: create namespace manager once + var namespaceManager = new XmlNamespaceManager(document.NameTable); + namespaceManager.AddNamespace(ProjNamespace, MavenXmlNamespace); + + // Collect variables from this document into a local dictionary first + var localVariables = new Dictionary(); + this.CollectVariablesFromDocument(document, namespaceManager, filePath, localVariables); + + // Batch add local variables to global collection for better performance + // Key format: "filePath::variableName" enables Maven hierarchy-aware lookup + if (localVariables.Count > 0) + { + var keyBuilder = new StringBuilder(filePath.Length + 64); // Pre-allocate capacity + var filePathWithSeparator = filePath + "::"; + + foreach (var (variableName, variableValue) in localVariables) { - var item = processRequest.ComponentStream; - var currentDir = item.Location; - DirectoryItemFacadeOptimized last = null; - while (!string.IsNullOrWhiteSpace(currentDir)) - { - currentDir = Path.GetDirectoryName(currentDir); + keyBuilder.Clear(); + keyBuilder.Append(filePathWithSeparator).Append(variableName); + var key = keyBuilder.ToString(); - // We've reached the top / root - if (string.IsNullOrWhiteSpace(currentDir)) - { - // If our last directory isn't in our list of top level nodes, it should be added. This happens for the first processed item and then subsequent times we have a new root (edge cases with multiple hard drives, for example) - if (last != null && !topLevelDirectories.ContainsKey(last.Name)) - { - topLevelDirectories.TryAdd(last.Name, last); - } + this.collectedVariables.AddOrUpdate(key, variableValue, (_, _) => variableValue); + } - this.LogDebugWithId($"Discovered {item.Location}."); + this.Logger.LogDebug("{DetectorId}: Collected {Count} variables from {File}", this.Id, localVariables.Count, Path.GetFileName(filePath)); + } - // If we got to the top without finding a directory that had a pom.xml on the way, we yield. - s.OnNext(processRequest); - break; - } + // First pass: collect dependencies (may have unresolved variables) + var dependencyList = document.SelectNodes(DependencyNode, namespaceManager); + + foreach (XmlNode dependency in dependencyList) + { + var groupId = dependency[GroupIdSelector]?.InnerText; + var artifactId = dependency[ArtifactIdSelector]?.InnerText; + + if (groupId == null || artifactId == null) + { + continue; + } - var current = directoryItemFacades.GetOrAdd(currentDir, _ => new DirectoryItemFacadeOptimized + var version = dependency[VersionSelector]; + if (version != null && !version.InnerText.Contains(',')) + { + var versionRef = version.InnerText.Trim('[', ']'); + + if (versionRef.StartsWith("${")) + { + // Only resolve immediately if local variable exists (highest priority) + // Otherwise, defer to second pass to ensure proper hierarchy-aware resolution + var resolvedVersion = this.ResolveVersionFromLocalOnly(versionRef, localVariables); + if (!resolvedVersion.StartsWith("${")) + { + // Local variable found - resolve immediately (highest priority) + var component = new MavenComponent(groupId, artifactId, resolvedVersion); + var detectedComponent = new DetectedComponent(component); + singleFileComponentRecorder.RegisterUsage(detectedComponent); + Interlocked.Increment(ref this.staticParserComponentCount); + } + else { - Name = currentDir, - FileNames = [], - }); + // No local variable - defer to second pass for hierarchy-aware resolution + // This ensures we consider all variable definitions before resolving + this.pendingComponents.Enqueue(new PendingComponent( + groupId, + artifactId, + versionRef, + singleFileComponentRecorder, + filePath)); + } + } + else + { + // Direct version - register immediately + var component = new MavenComponent(groupId, artifactId, versionRef); + var detectedComponent = new DetectedComponent(component); + singleFileComponentRecorder.RegisterUsage(detectedComponent); + Interlocked.Increment(ref this.staticParserComponentCount); + } + } + else + { + this.Logger.LogDebug( + "Version string for component {Group}/{Artifact} is invalid or unsupported and a component will not be recorded.", + groupId, + artifactId); + } + } + } + catch (Exception e) + { + this.Logger.LogError(e, "Failed to read file {Path}", filePath); + } + } + + /// + /// Collects all variable definitions from a POM document into the provided local dictionary. + /// Optimized to reuse XmlNamespaceManager and minimize XPath queries. + /// + /// The XML document to scan for variables. + /// Pre-configured namespace manager to reuse. + /// The file path for logging purposes. + /// Local dictionary to collect variables into. + private void CollectVariablesFromDocument(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, Dictionary localVariables) + { + try + { + // Query project coordinates once - used for both variable collection and project tracking + var projectGroupIdNode = document.SelectSingleNode("/proj:project/proj:groupId", namespaceManager); + var projectArtifactIdNode = document.SelectSingleNode("/proj:project/proj:artifactId", namespaceManager); + var projectVersionNode = document.SelectSingleNode("/proj:project/proj:version", namespaceManager); - // If we didn't come from a directory, it's because we're just getting started. Our current directory should include the file that led to it showing up in the graph. - if (last == null) + // Track this project by Maven coordinates for parent resolution (reuses queried nodes) + this.TrackMavenProjectCoordinates(document, namespaceManager, filePath, projectGroupIdNode, projectArtifactIdNode); + + // Parse Maven parent relationship to build proper hierarchy + this.ParseMavenParentRelationship(document, namespaceManager, filePath); + + // Collect properties variables from ALL properties sections (handles malformed XML with multiple ) + var propertiesNodes = document.SelectNodes("//proj:properties", namespaceManager); + if (propertiesNodes?.Count > 0) + { + if (propertiesNodes.Count > 1) + { + this.Logger.LogDebug("{DetectorId}: Found {Count} properties sections in {File}", this.Id, propertiesNodes.Count, Path.GetFileName(filePath)); + } + + foreach (XmlNode propertiesNode in propertiesNodes) + { + foreach (XmlNode propertyNode in propertiesNode.ChildNodes) + { + if (propertyNode.NodeType == XmlNodeType.Element && !string.IsNullOrWhiteSpace(propertyNode.InnerText)) { - current.FileNames.Add(Path.GetFileName(item.Location)); + // Later properties sections override earlier ones (last wins - Maven behavior) + localVariables[propertyNode.Name] = propertyNode.InnerText; } + } + } + } + + // Collect project-level variables from already-queried nodes + if (projectVersionNode != null && !string.IsNullOrWhiteSpace(projectVersionNode.InnerText)) + { + localVariables["version"] = projectVersionNode.InnerText; + localVariables["project.version"] = projectVersionNode.InnerText; + } + + if (projectGroupIdNode != null && !string.IsNullOrWhiteSpace(projectGroupIdNode.InnerText)) + { + localVariables["groupId"] = projectGroupIdNode.InnerText; + localVariables["project.groupId"] = projectGroupIdNode.InnerText; + } + + if (projectArtifactIdNode != null && !string.IsNullOrWhiteSpace(projectArtifactIdNode.InnerText)) + { + localVariables["artifactId"] = projectArtifactIdNode.InnerText; + localVariables["project.artifactId"] = projectArtifactIdNode.InnerText; + } + } + catch (Exception e) + { + this.Logger.LogError(e, "Failed to collect variables from file {Path}", filePath); + } + } + + /// + /// Parses Maven parent relationship from pom.xml to build proper inheritance hierarchy. + /// This is needed for Maven-compliant variable resolution that respects parent-child relationships. + /// + /// The XML document to parse. + /// XML namespace manager for Maven POM. + /// Current pom.xml file path. + private void ParseMavenParentRelationship(XmlDocument document, XmlNamespaceManager namespaceManager, string currentFilePath) + { + try + { + // Query parent element once and access children directly (more efficient than union XPath) + var parentNode = document.SelectSingleNode("/proj:project/proj:parent", namespaceManager); + + if (parentNode != null) + { + var parentGroupId = parentNode["groupId"]?.InnerText; + var parentArtifactId = parentNode["artifactId"]?.InnerText; + + if (!string.IsNullOrWhiteSpace(parentArtifactId)) + { + // Try to find parent pom.xml file by searching processed files for matching artifactId + // This works if parent was processed before child + var parentPath = this.FindParentPomByArtifactId(parentGroupId, parentArtifactId, currentFilePath); + if (!string.IsNullOrEmpty(parentPath)) + { + this.mavenParentChildRelationships[currentFilePath] = parentPath; + this.Logger.LogDebug( + "{DetectorId}: Parsed parent relationship: {Child} → {Parent}", + this.Id, + Path.GetFileName(currentFilePath), + Path.GetFileName(parentPath)); + } + else + { + // Parent not found yet - queue for second pass resolution after all files are processed + this.unresolvedParentRelationships.Enqueue((currentFilePath, parentGroupId, parentArtifactId)); + this.Logger.LogDebug( + "{DetectorId}: Queued unresolved parent relationship for {Child} → {ParentArtifactId}", + this.Id, + Path.GetFileName(currentFilePath), + parentArtifactId); + } + } + } + } + catch (Exception e) + { + this.Logger.LogError(e, "Failed to parse parent relationship from {FilePath}", currentFilePath); + } + } + + /// + /// Finds parent pom.xml file path by Maven coordinates (groupId:artifactId). + /// First searches by coordinates among processed projects, then falls back to directory traversal. + /// + /// Parent groupId to match. + /// Parent artifactId to match. + /// Current file path to start searching from. + /// Parent pom.xml file path, or empty string if not found. + private string FindParentPomByArtifactId(string parentGroupId, string parentArtifactId, string currentFilePath) + { + // Use cache to avoid repeated operations for the same file + return this.parentPomCache.GetOrAdd(currentFilePath, filePath => + { + try + { + // First, try to find by Maven coordinates (handles sibling projects) + if (!string.IsNullOrWhiteSpace(parentArtifactId)) + { + var coordinateKey = string.IsNullOrWhiteSpace(parentGroupId) + ? parentArtifactId + : $"{parentGroupId}:{parentArtifactId}"; + + if (this.processedMavenProjects.TryGetValue(coordinateKey, out var coordinateBasedPath)) + { + this.Logger.LogDebug( + "{DetectorId}: Found parent {ParentCoordinate} at {Path} for {Child}", + this.Id, + coordinateKey, + Path.GetFileName(coordinateBasedPath), + Path.GetFileName(filePath)); + return coordinateBasedPath; + } + } + + // Fallback: Maven convention parent directory search + var currentDir = Path.GetDirectoryName(filePath); + var parentDir = Path.GetDirectoryName(currentDir); + + // Track visited directories to prevent infinite loops from circular directory structures + var visitedDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); + + while (!string.IsNullOrEmpty(parentDir)) + { + // Prevent infinite loops from circular directory references or file system anomalies + if (!visitedDirectories.Add(parentDir)) + { + this.Logger.LogDebug( + "{DetectorId}: Circular directory reference detected while searching for parent POM, breaking at {Directory}", + this.Id, + parentDir); + break; + } + + var parentPomPath = Path.Combine(parentDir, "pom.xml"); + if (this.fileUtilityService.Exists(parentPomPath) && + !string.Equals(parentPomPath, filePath, StringComparison.OrdinalIgnoreCase)) + { + return parentPomPath; + } + + var nextParentDir = Path.GetDirectoryName(parentDir); + if (string.Equals(nextParentDir, parentDir, StringComparison.OrdinalIgnoreCase)) + { + break; // Reached file system root + } + + parentDir = nextParentDir; + } + + return string.Empty; // Not found + } + catch (Exception ex) + { + this.Logger.LogDebug(ex, "Error finding parent POM for {FilePath}", Path.GetFileName(filePath)); + return string.Empty; + } + }); + } + + /// + /// Tracks a Maven project by its coordinates to enable coordinate-based parent resolution. + /// + /// The XML document to parse. + /// XML namespace manager for Maven POM. + /// Current pom.xml file path. + /// Pre-queried groupId node (can be null). + /// Pre-queried artifactId node (can be null). + private void TrackMavenProjectCoordinates(XmlDocument document, XmlNamespaceManager namespaceManager, string filePath, XmlNode groupIdNode, XmlNode artifactIdNode) + { + try + { + // If project doesn't have its own groupId, try to get it from parent + groupIdNode ??= document.SelectSingleNode("/proj:project/proj:parent/proj:groupId", namespaceManager); + + if (artifactIdNode != null && !string.IsNullOrWhiteSpace(artifactIdNode.InnerText)) + { + var groupId = groupIdNode?.InnerText; + var artifactId = artifactIdNode.InnerText; + + // Store with both artifactId-only and groupId:artifactId keys for flexible lookup + this.processedMavenProjects.TryAdd(artifactId, filePath); + if (!string.IsNullOrWhiteSpace(groupId)) + { + this.processedMavenProjects.TryAdd($"{groupId}:{artifactId}", filePath); + } + + this.Logger.LogDebug( + "{DetectorId}: Tracked project {GroupId}:{ArtifactId} at {Path}", + this.Id, + groupId ?? "(inherited)", + artifactId, + Path.GetFileName(filePath)); + } + } + catch (Exception e) + { + this.Logger.LogDebug(e, "Failed to track Maven project coordinates from {Path}", filePath); + } + } + + /// + /// Resolves a version template using only local variables from the current file. + /// This ensures immediate resolution only when the variable is defined in the same file (highest priority). + /// + /// The version template with variables (e.g., "${revision}"). + /// Local variables from the current file. + /// The resolved version string, or the original template if local variable not found. + private string ResolveVersionFromLocalOnly(string versionTemplate, Dictionary localVariables) + { + var resolvedVersion = versionTemplate; + var match = VersionRegex.Match(versionTemplate); + + if (match.Success) + { + var variable = match.Groups[1].Captures[0].ToString(); + + // Only check local variables (same file priority) + if (localVariables.TryGetValue(variable, out var localReplacement)) + { + resolvedVersion = versionTemplate.Replace("${" + variable + "}", localReplacement); + } + } + + return resolvedVersion; + } + + private IEnumerable ExtractFailedEndpoints(string errorMessage) + { + if (string.IsNullOrWhiteSpace(errorMessage)) + { + return []; + } + + return EndpointRegex.Matches(errorMessage) + .Select(m => NormalizeEndpointUrl(m.Value)) + .Where(u => u is not null) + .Distinct(); + } + + private void LogAuthErrorGuidance() + { + var guidance = new StringBuilder(); + guidance.AppendLine("Maven CLI failed with authentication errors."); + + if (!this.failedEndpoints.IsEmpty) + { + guidance.AppendLine("The following Maven repository endpoints had authentication failures:"); + foreach (var endpoint in this.failedEndpoints.Distinct().Take(5)) + { + guidance.AppendLine($" - {endpoint}"); + } + + guidance.AppendLine(" Ensure your pipeline has access to these Maven repositories."); + } + + guidance.AppendLine("Note: Falling back to static pom.xml parsing."); + + this.LogWarning(guidance.ToString()); + } + + /// + /// Resolves parent relationships that couldn't be established during first pass. + /// This handles cases where the parent POM was processed after the child POM. + /// + private void ResolveUnresolvedParentRelationships() + { + var resolvedCount = 0; + var unresolvedCount = 0; + + while (this.unresolvedParentRelationships.TryDequeue(out var unresolvedRelationship)) + { + var (filePath, parentGroupId, parentArtifactId) = unresolvedRelationship; + + // Skip if already resolved (could happen if resolved via directory traversal during first pass) + if (this.mavenParentChildRelationships.ContainsKey(filePath)) + { + continue; + } + + // Clear the cache entry so we can try again with the now-complete processedMavenProjects + this.parentPomCache.TryRemove(filePath, out _); + + // Try to find parent by coordinates now that all files have been processed + var parentPath = this.FindParentPomByCoordinatesOnly(parentGroupId, parentArtifactId, filePath); + if (!string.IsNullOrEmpty(parentPath)) + { + this.mavenParentChildRelationships[filePath] = parentPath; + resolvedCount++; + this.Logger.LogDebug( + "{DetectorId}: Resolved deferred parent relationship: {Child} → {Parent}", + this.Id, + Path.GetFileName(filePath), + Path.GetFileName(parentPath)); + } + else + { + unresolvedCount++; + this.Logger.LogDebug( + "{DetectorId}: Could not resolve parent {ParentGroupId}:{ParentArtifactId} for {Child}", + this.Id, + parentGroupId ?? "(null)", + parentArtifactId, + Path.GetFileName(filePath)); + } + } + + if (resolvedCount > 0 || unresolvedCount > 0) + { + this.LogDebugWithId($"Second pass (parent resolution) completed: {resolvedCount} deferred parent relationships resolved, {unresolvedCount} remain unresolved"); + } + } + + /// + /// Finds parent POM by Maven coordinates only (no directory traversal). + /// Used for deferred parent resolution after all files have been processed. + /// + private string FindParentPomByCoordinatesOnly(string parentGroupId, string parentArtifactId, string currentFilePath) + { + if (string.IsNullOrWhiteSpace(parentArtifactId)) + { + return string.Empty; + } + + // Try with full coordinates first + if (!string.IsNullOrWhiteSpace(parentGroupId)) + { + var fullCoordinateKey = $"{parentGroupId}:{parentArtifactId}"; + if (this.processedMavenProjects.TryGetValue(fullCoordinateKey, out var fullCoordinatePath) && + !string.Equals(fullCoordinatePath, currentFilePath, StringComparison.OrdinalIgnoreCase)) + { + return fullCoordinatePath; + } + } + + // Try with artifactId only + if (this.processedMavenProjects.TryGetValue(parentArtifactId, out var artifactIdPath) && + !string.Equals(artifactIdPath, currentFilePath, StringComparison.OrdinalIgnoreCase)) + { + return artifactIdPath; + } + + return string.Empty; + } + + /// + /// Third pass: resolve all pending components using hierarchy-aware variable resolution. + /// For components with unresolved variables, this picks the closest ancestor definition + /// based on Maven's property inheritance rules (child > parent precedence). + /// + private void ResolvePendingComponents() + { + // Capture count before draining for accurate telemetry + this.pendingComponentCountBeforeResolution = this.pendingComponents.Count; + + var resolvedCount = 0; + var skippedCount = 0; + + while (this.pendingComponents.TryDequeue(out var pendingComponent)) + { + try + { + var resolvedVersion = this.ResolveVersionWithHierarchyAwareness(pendingComponent.VersionTemplate, pendingComponent.FilePath); + if (!resolvedVersion.StartsWith("${")) + { + var component = new MavenComponent(pendingComponent.GroupId, pendingComponent.ArtifactId, resolvedVersion); + var detectedComponent = new DetectedComponent(component); + pendingComponent.Recorder.RegisterUsage(detectedComponent); + Interlocked.Increment(ref this.staticParserComponentCount); + resolvedCount++; + } + else + { + skippedCount++; + this.Logger.LogDebug( + "Version string {Version} for component {Group}/{Artifact} could not be resolved and a component will not be recorded. File: {File}", + resolvedVersion, + pendingComponent.GroupId, + pendingComponent.ArtifactId, + pendingComponent.FilePath); + } + } + catch (Exception e) + { + skippedCount++; + this.Logger.LogError( + e, + "Failed to resolve pending component {Group}/{Artifact} from {File}", + pendingComponent.GroupId, + pendingComponent.ArtifactId, + pendingComponent.FilePath); + } + } + + this.LogDebugWithId($"Third pass (variable resolution) completed: {resolvedCount} components resolved, {skippedCount} skipped due to unresolved variables"); + } + + /// + /// Resolves a version template with hierarchy-aware precedence. + /// When multiple variable definitions exist, picks the closest ancestor to the requesting file. + /// This implements Maven's property inheritance rule: child properties take precedence over parent properties. + /// + /// The version template with variables (e.g., "${revision}"). + /// The file path of the POM requesting the variable resolution. + /// The resolved version string, or the original template if variables cannot be resolved. + private string ResolveVersionWithHierarchyAwareness(string versionTemplate, string requestingFilePath) + { + var resolvedVersion = versionTemplate; + var match = VersionRegex.Match(versionTemplate); + + if (match.Success) + { + var variable = match.Groups[1].Captures[0].ToString(); + + // Use Maven-compliant hierarchy search: current → parent → grandparent + var foundValue = this.FindVariableInMavenHierarchy(variable, requestingFilePath); + if (foundValue != null) + { + resolvedVersion = versionTemplate.Replace("${" + variable + "}", foundValue.Value.Value); + } + else + { + // Variable not found in Maven hierarchy - log at debug level since unresolved + // properties are common (profiles, external parents, etc.) and aggregate count in telemetry + Interlocked.Increment(ref this.unresolvedVariableCount); + this.Logger.LogDebug( + "{DetectorId}: Variable {Variable} not found in Maven hierarchy for {File}", + this.Id, + variable, + Path.GetFileName(requestingFilePath)); + } + } + + return resolvedVersion; + } + + /// + /// Finds a variable value using Maven-compliant hierarchy search. + /// Searches in order: current file → parent → grandparent (stops at first match). + /// + /// Variable name to find. + /// The pom.xml file requesting the variable. + /// Variable value and source file, or null if not found in hierarchy. + private (string Value, string SourceFile)? FindVariableInMavenHierarchy(string variable, string requestingFilePath) + { + var currentFile = requestingFilePath; + var visitedFiles = new HashSet(StringComparer.OrdinalIgnoreCase); + var keyBuilder = new StringBuilder(256); // Pre-allocate for typical path lengths + + // Walk up Maven parent hierarchy until variable found or no more parents + while (!string.IsNullOrEmpty(currentFile)) + { + // Prevent infinite loops from circular parent references + if (!visitedFiles.Add(currentFile)) + { + this.Logger.LogDebug( + "{DetectorId}: Circular parent reference detected while resolving variable {Variable}, breaking at {File}", + this.Id, + variable, + Path.GetFileName(currentFile)); + break; + } + + // Check if this file has the variable definition using StringBuilder for efficiency + keyBuilder.Clear(); + keyBuilder.Append(currentFile).Append("::").Append(variable); + var variableKey = keyBuilder.ToString(); + + if (this.collectedVariables.TryGetValue(variableKey, out var value)) + { + return (value, currentFile); + } - if (last != null && current.FileNames.Contains(MavenManifest)) + // Move to Maven parent (not directory parent) + this.mavenParentChildRelationships.TryGetValue(currentFile, out currentFile); + } + + return null; // Variable not found in Maven hierarchy + } + + /// + /// Filters out nested pom.xml files, keeping only root-level ones. + /// A pom.xml is considered nested if there's another pom.xml in a parent directory. + /// + /// The incoming process requests for pom.xml files. + /// Dictionary to populate with nested pom.xml mappings for fallback scenarios. + /// Cancellation token for the operation. + /// Process requests for only root-level pom.xml files. + private IObservable RemoveNestedPomXmls( + IObservable componentStreams, + ConcurrentDictionary> parentPomDictionary, + CancellationToken cancellationToken) + { + return componentStreams + .ToList() + .SelectMany(allRequests => + { + cancellationToken.ThrowIfCancellationRequested(); + + // Sort all requests by path depth (parent-first) to ensure deterministic processing order. + // This is critical for fallback static parsing where parent POMs must be processed before children + // to ensure proper property resolution and inheritance. + var sortedRequests = allRequests + .OrderBy(r => NormalizeDirectoryPath(Path.GetDirectoryName(r.ComponentStream.Location)).Length) + .ThenBy(r => r.ComponentStream.Location, StringComparer.OrdinalIgnoreCase) + .ToList(); + + // Use a HashSet of root directories for O(1) lookup instead of O(n) list iteration + var rootPomDirectories = new HashSet(StringComparer.OrdinalIgnoreCase); + var filteredRequests = new List(); + + foreach (var request in sortedRequests) + { + cancellationToken.ThrowIfCancellationRequested(); + + var location = NormalizeDirectoryPath(Path.GetDirectoryName(request.ComponentStream.Location)); + + // Check if any ancestor directory is already a root POM directory + // Walk up the directory tree (O(depth) instead of O(n)) + var isNested = false; + var parentDir = Path.GetDirectoryName(location.TrimEnd(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar)); + + while (!string.IsNullOrEmpty(parentDir)) + { + var normalizedParent = NormalizeDirectoryPath(parentDir); + if (rootPomDirectories.Contains(normalizedParent)) { - this.LogDebugWithId($"Ignoring {MavenManifest} at {item.Location}, as it has a parent {MavenManifest} that will be processed at {current.Name}\\{MavenManifest}."); + this.LogDebugWithId($"Ignoring {MavenManifest} at {location}, as it has a parent {MavenManifest} at {normalizedParent}."); + isNested = true; + parentPomDictionary.AddOrUpdate( + normalizedParent, + [request], + (key, existingList) => + { + existingList.Add(request); + return existingList; + }); break; } - last = current; + var nextParent = Path.GetDirectoryName(parentDir); + if (string.Equals(nextParent, parentDir, StringComparison.OrdinalIgnoreCase)) + { + break; // Reached root + } + + parentDir = nextParent; } - }, - s.OnCompleted); - }); + + if (!isNested) + { + this.LogDebugWithId($"Discovered {request.ComponentStream.Location}."); + rootPomDirectories.Add(location); + parentPomDictionary.AddOrUpdate( + location, + [request], + (key, existingList) => + { + existingList.Add(request); + return existingList; + }); + filteredRequests.Add(request); + } + } + + return filteredRequests; + }); } } diff --git a/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs b/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs index ab86692c6..9a9a2b2c3 100644 --- a/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs +++ b/src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs @@ -120,7 +120,6 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); - services.AddSingleton(); // npm services.AddSingleton(); diff --git a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs index 550b94f5e..d3a7d9fe4 100644 --- a/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs +++ b/src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs @@ -6,7 +6,6 @@ namespace Microsoft.ComponentDetection.Orchestrator.Services; using System.Collections.Generic; using System.Diagnostics; using System.IO; -using System.IO.Enumeration; using System.Linq; using System.Text.Json; using System.Threading; @@ -17,7 +16,6 @@ namespace Microsoft.ComponentDetection.Orchestrator.Services; using Microsoft.ComponentDetection.Common.Telemetry.Records; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Contracts.BcdeModels; -using Microsoft.ComponentDetection.Detectors.Maven; using Microsoft.ComponentDetection.Orchestrator.Commands; using Microsoft.ComponentDetection.Orchestrator.Experiments; using Microsoft.Extensions.Logging; @@ -31,20 +29,17 @@ internal class DetectorProcessingService : IDetectorProcessingService private const int ProcessTimeoutBufferSeconds = 5; private readonly IObservableDirectoryWalkerFactory scanner; - private readonly IPathUtilityService pathUtilityService; private readonly ILogger logger; private readonly IExperimentService experimentService; private readonly IAnsiConsole console; public DetectorProcessingService( IObservableDirectoryWalkerFactory scanner, - IPathUtilityService pathUtilityService, IExperimentService experimentService, ILogger logger, IAnsiConsole console = null) { this.scanner = scanner; - this.pathUtilityService = pathUtilityService; this.experimentService = experimentService; this.logger = logger; this.console = console ?? AnsiConsole.Console; @@ -165,13 +160,6 @@ public async Task ProcessDetectorsAsync( var results = await Task.WhenAll(scanTasks); await this.experimentService.FinishAsync(); - // Clean up Maven CLI temporary files after all detectors have finished - // Only cleanup if CleanupCreatedFiles is true (default) to respect user settings - if (settings.CleanupCreatedFiles ?? true) - { - this.CleanupMavenFiles(settings.SourceDirectory, detectors); - } - var detectorProcessingResult = this.ConvertDetectorResultsIntoResult(results, exitCode); var totalElapsedTime = stopwatch.Elapsed.TotalSeconds; @@ -434,124 +422,4 @@ private void LogTabularOutput(ConcurrentDictionary pr this.logger.LogInformation("{DetectionTimeLine}", line); } } - - /// - /// Cleans up Maven CLI temporary files after all detectors have finished. - /// This prevents race conditions between MvnCliComponentDetector and MavenWithFallbackDetector. - /// Uses the same symlink-aware traversal pattern as FastDirectoryWalkerFactory to handle - /// circular symlinks in large repositories. - /// - private void CleanupMavenFiles(DirectoryInfo sourceDirectory, IEnumerable detectors) - { - // Only clean up if Maven detectors are running - use shared constants to stay in sync - var hasMavenDetectors = detectors.Any(d => - d.Id == MavenConstants.MvnCliDetectorId || - d.Id == MavenConstants.MavenWithFallbackDetectorId); - - if (!hasMavenDetectors) - { - return; - } - - using var telemetryRecord = new MavenCliCleanupTelemetryRecord - { - SourceDirectory = sourceDirectory.FullName, - }; - - try - { - this.logger.LogDebug("Starting Maven CLI cleanup in directory: {SourceDirectory}", sourceDirectory.FullName); - - var cleanedCount = 0; - var failedCount = 0; - - // Track visited directories by their real (resolved) paths to handle circular symlinks - // This follows the same pattern used by FastDirectoryWalkerFactory - var visitedDirectories = new ConcurrentDictionary(StringComparer.OrdinalIgnoreCase); - - // Use FileSystemEnumerable with symlink-aware recursion predicate - // This matches the pattern in FastDirectoryWalkerFactory for safe directory traversal - var fileEnumerable = new FileSystemEnumerable( - sourceDirectory.FullName, - (ref FileSystemEntry entry) => entry.ToFullPath(), - new EnumerationOptions - { - RecurseSubdirectories = true, - IgnoreInaccessible = true, - }) - { - ShouldIncludePredicate = (ref FileSystemEntry entry) => - !entry.IsDirectory && - entry.FileName.Equals(MavenConstants.BcdeMvnDependencyFileName, StringComparison.OrdinalIgnoreCase), - - // Handle symlinks to prevent infinite loops - same pattern as FastDirectoryWalkerFactory - ShouldRecursePredicate = (ref FileSystemEntry entry) => - { - if (!entry.IsDirectory) - { - return false; - } - - try - { - var directoryPath = entry.ToFullPath(); - var realPath = directoryPath; - - // Check if this is a symlink (reparse point) and resolve to real path - if (entry.Attributes.HasFlag(FileAttributes.ReparsePoint)) - { - realPath = this.pathUtilityService.ResolvePhysicalPath(directoryPath); - - // If we can't resolve the path, skip this directory - if (string.IsNullOrEmpty(realPath)) - { - return false; - } - } - - // Only recurse if we haven't visited this real path before - return visitedDirectories.TryAdd(realPath, true); - } - catch (Exception ex) - { - // If symlink resolution fails (broken/inaccessible symlink), skip this directory - // and continue cleanup to avoid aborting the entire enumeration - this.logger.LogDebug(ex, "Skipping directory due to symlink resolution failure: {Directory}", entry.ToFullPath()); - return false; - } - }, - }; - - // Use Parallel.ForEach for concurrent deletion with better throughput - Parallel.ForEach( - fileEnumerable, - new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, - filePath => - { - try - { - File.Delete(filePath); - Interlocked.Increment(ref cleanedCount); - this.logger.LogDebug("Cleaned up Maven CLI file: {File}", filePath); - } - catch (Exception ex) - { - Interlocked.Increment(ref failedCount); - this.logger.LogDebug(ex, "Failed to clean up Maven CLI file: {File}", filePath); - } - }); - - telemetryRecord.FilesCleanedCount = cleanedCount; - telemetryRecord.FilesFailedCount = failedCount; - - if (cleanedCount > 0 || failedCount > 0) - { - this.logger.LogDebug("Maven CLI cleanup completed. Removed {CleanedCount} files, failed {FailedCount} files.", cleanedCount, failedCount); - } - } - catch (Exception ex) - { - this.logger.LogWarning(ex, "Maven CLI cleanup failed for directory: {SourceDirectory}", sourceDirectory.FullName); - } - } } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenCommandServiceTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenCommandServiceTests.cs index ecf05c59a..d99480e2e 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenCommandServiceTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenCommandServiceTests.cs @@ -111,21 +111,23 @@ public async Task GenerateDependenciesFile_WhenCancellationRequested_ThrowsOpera }, }; + // Set up the CLI mock to throw OperationCanceledException when called with a cancelled token + this.commandLineMock.Setup(x => x.ExecuteCommandAsync( + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny())) + .Returns((_, _, ct, _) => + { + ct.ThrowIfCancellationRequested(); + return Task.FromResult(new CommandLineExecutionResult { ExitCode = 0 }); + }); + await cts.CancelAsync(); - // When cancellation is already requested, the method should throw OperationCanceledException - // instead of proceeding with the CLI execution + // When cancellation is already requested, the method should propagate OperationCanceledException var action = async () => await this.mavenCommandService.GenerateDependenciesFileAsync(processRequest, cts.Token); await action.Should().ThrowAsync(); - - // Verify that the CLI was never invoked - this.commandLineMock.Verify( - x => x.ExecuteCommandAsync( - It.IsAny(), - It.IsAny(), - It.IsAny(), - It.IsAny()), - Times.Never()); } [TestMethod] @@ -256,142 +258,6 @@ public void ParseDependenciesFile_Success() Mock.Verify(this.parserServiceMock); } - [TestMethod] - public async Task GenerateDependenciesFile_ConcurrentCalls_OnlyOneCliInvocationAsync() - { - // Arrange: Create a temp directory with a real deps file - var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); - Directory.CreateDirectory(tempDir); - var pomLocation = Path.Combine(tempDir, "pom.xml"); - var depsFilePath = Path.Combine(tempDir, "bcde.mvndeps"); - - using var cliStartedEvent = new ManualResetEventSlim(false); - using var allowCliToCompleteEvent = new ManualResetEventSlim(false); - - try - { - var cliInvocationCount = 0; - - var bcdeMvnFileName = "bcde.mvndeps"; - var cliParameters = new[] { "dependency:tree", "-B", $"-DoutputFile={bcdeMvnFileName}", "-DoutputType=text", $"-f{pomLocation}" }; - - this.commandLineMock.Setup(x => x.ExecuteCommandAsync( - MavenCommandService.PrimaryCommand, - MavenCommandService.AdditionalValidCommands, - It.IsAny(), - It.Is(y => this.ShouldBeEquivalentTo(y, cliParameters)))) - .Returns(async () => - { - Interlocked.Increment(ref cliInvocationCount); - cliStartedEvent.Set(); - - // Simulate CLI execution time - wait until test allows completion - allowCliToCompleteEvent.Wait(TimeSpan.FromSeconds(5)).Should().BeTrue("CLI should receive completion signal within timeout"); - - // Create the deps file (simulating what mvn does) - await File.WriteAllTextAsync(depsFilePath, "com.test:artifact:jar:1.0.0"); - - return new CommandLineExecutionResult { ExitCode = 0 }; - }); - - // Act: Start two concurrent calls for the same pom.xml - var processRequest1 = new ProcessRequest - { - ComponentStream = new ComponentStream { Location = pomLocation }, - }; - var processRequest2 = new ProcessRequest - { - ComponentStream = new ComponentStream { Location = pomLocation }, - }; - - var task1 = Task.Run(() => this.mavenCommandService.GenerateDependenciesFileAsync(processRequest1)); - var task2 = Task.Run(() => this.mavenCommandService.GenerateDependenciesFileAsync(processRequest2)); - - // Wait for the first CLI call to start - cliStartedEvent.Wait(TimeSpan.FromSeconds(5)).Should().BeTrue("CLI should start within timeout"); - - // Allow the CLI to complete - allowCliToCompleteEvent.Set(); - - // Wait for both tasks to complete - var results = await Task.WhenAll(task1, task2); - - // Assert: Only one CLI invocation should have occurred - cliInvocationCount.Should().Be(1, "only one CLI invocation should occur for concurrent calls to the same pom.xml"); - - // Both results should indicate success - results[0].Success.Should().BeTrue(); - results[1].Success.Should().BeTrue(); - } - finally - { - // Cleanup - if (Directory.Exists(tempDir)) - { - Directory.Delete(tempDir, recursive: true); - } - } - } - - [TestMethod] - public async Task GenerateDependenciesFile_FileDeletedBetweenCallers_SecondCallerRerunsCliAsync() - { - // Arrange: Create a temp directory - var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); - Directory.CreateDirectory(tempDir); - var pomLocation = Path.Combine(tempDir, "pom.xml"); - var depsFilePath = Path.Combine(tempDir, "bcde.mvndeps"); - - try - { - var cliInvocationCount = 0; - - var bcdeMvnFileName = "bcde.mvndeps"; - var cliParameters = new[] { "dependency:tree", "-B", $"-DoutputFile={bcdeMvnFileName}", "-DoutputType=text", $"-f{pomLocation}" }; - - this.commandLineMock.Setup(x => x.ExecuteCommandAsync( - MavenCommandService.PrimaryCommand, - MavenCommandService.AdditionalValidCommands, - It.IsAny(), - It.Is(y => this.ShouldBeEquivalentTo(y, cliParameters)))) - .ReturnsAsync(() => - { - Interlocked.Increment(ref cliInvocationCount); - - // Create the deps file (simulating what mvn does) - File.WriteAllText(depsFilePath, "com.test:artifact:jar:1.0.0"); - - return new CommandLineExecutionResult { ExitCode = 0 }; - }); - - var processRequest = new ProcessRequest - { - ComponentStream = new ComponentStream { Location = pomLocation }, - }; - - // Act: First call - should invoke CLI - var result1 = await this.mavenCommandService.GenerateDependenciesFileAsync(processRequest); - result1.Success.Should().BeTrue(); - cliInvocationCount.Should().Be(1); - - // Delete the deps file (simulating what the detector does after reading it) - File.Delete(depsFilePath); - - // Second call - should re-invoke CLI because file was deleted - var result2 = await this.mavenCommandService.GenerateDependenciesFileAsync(processRequest); - result2.Success.Should().BeTrue(); - cliInvocationCount.Should().Be(2, "CLI should be re-invoked when the deps file was deleted"); - } - finally - { - // Cleanup - if (Directory.Exists(tempDir)) - { - Directory.Delete(tempDir, recursive: true); - } - } - } - [TestMethod] public async Task GenerateDependenciesFile_FailedResult_NotCachedAsync() { @@ -430,62 +296,6 @@ public async Task GenerateDependenciesFile_FailedResult_NotCachedAsync() cliInvocationCount.Should().Be(2, "failed results should not be cached, allowing retries"); } - [TestMethod] - public async Task GenerateDependenciesFile_SuccessfulResult_IsCachedAsync() - { - // Arrange: Create a temp directory with a real deps file - var tempDir = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); - Directory.CreateDirectory(tempDir); - var pomLocation = Path.Combine(tempDir, "pom.xml"); - var depsFilePath = Path.Combine(tempDir, "bcde.mvndeps"); - - try - { - var cliInvocationCount = 0; - - var bcdeMvnFileName = "bcde.mvndeps"; - var cliParameters = new[] { "dependency:tree", "-B", $"-DoutputFile={bcdeMvnFileName}", "-DoutputType=text", $"-f{pomLocation}" }; - - this.commandLineMock.Setup(x => x.ExecuteCommandAsync( - MavenCommandService.PrimaryCommand, - MavenCommandService.AdditionalValidCommands, - It.IsAny(), - It.Is(y => this.ShouldBeEquivalentTo(y, cliParameters)))) - .ReturnsAsync(() => - { - Interlocked.Increment(ref cliInvocationCount); - - // Create the deps file (simulating what mvn does) - File.WriteAllText(depsFilePath, "com.test:artifact:jar:1.0.0"); - - return new CommandLineExecutionResult { ExitCode = 0 }; - }); - - var processRequest = new ProcessRequest - { - ComponentStream = new ComponentStream { Location = pomLocation }, - }; - - // Act: First call - should invoke CLI - var result1 = await this.mavenCommandService.GenerateDependenciesFileAsync(processRequest); - result1.Success.Should().BeTrue(); - cliInvocationCount.Should().Be(1); - - // Second call - should use cached result (file still exists) - var result2 = await this.mavenCommandService.GenerateDependenciesFileAsync(processRequest); - result2.Success.Should().BeTrue(); - cliInvocationCount.Should().Be(1, "successful results should be cached when file still exists"); - } - finally - { - // Cleanup - if (Directory.Exists(tempDir)) - { - Directory.Delete(tempDir, recursive: true); - } - } - } - protected bool ShouldBeEquivalentTo(IEnumerable result, IEnumerable expected) { result.Should().BeEquivalentTo(expected); diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs deleted file mode 100644 index 2ef00073b..000000000 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/MavenWithFallbackDetectorTests.cs +++ /dev/null @@ -1,2234 +0,0 @@ -#nullable disable -namespace Microsoft.ComponentDetection.Detectors.Tests; - -using System.Linq; -using System.Threading; -using System.Threading.Tasks; -using AwesomeAssertions; -using Microsoft.ComponentDetection.Contracts; -using Microsoft.ComponentDetection.Contracts.Internal; -using Microsoft.ComponentDetection.Contracts.TypedComponent; -using Microsoft.ComponentDetection.Detectors.Maven; -using Microsoft.ComponentDetection.TestsUtilities; -using Microsoft.VisualStudio.TestTools.UnitTesting; -using Moq; - -[TestClass] -[TestCategory("Governance/All")] -[TestCategory("Governance/ComponentDetection")] -public class MavenWithFallbackDetectorTests : BaseDetectorTest -{ - /// - /// The shared deps filename used by MavenCommandService. - /// Must match BcdeMvnDependencyFileName from MavenCommandService. - /// - private const string BcdeMvnFileName = "bcde.mvndeps"; - - private readonly Mock mavenCommandServiceMock; - private readonly Mock envVarServiceMock; - private readonly Mock fileUtilityServiceMock; - - public MavenWithFallbackDetectorTests() - { - this.mavenCommandServiceMock = new Mock(); - this.mavenCommandServiceMock.Setup(x => x.BcdeMvnDependencyFileName).Returns(BcdeMvnFileName); - - // Default setup for GenerateDependenciesFileAsync - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - this.DetectorTestUtility.AddServiceMock(this.mavenCommandServiceMock); - - this.envVarServiceMock = new Mock(); - this.DetectorTestUtility.AddServiceMock(this.envVarServiceMock); - - this.fileUtilityServiceMock = new Mock(); - this.DetectorTestUtility.AddServiceMock(this.fileUtilityServiceMock); - } - - [TestMethod] - public async Task WhenMavenCliNotAvailable_FallsBackToStaticParsing_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - 3.12.0 - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.commons"); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - mavenComponent.Version.Should().Be("3.12.0"); - } - - [TestMethod] - public async Task WhenMavenCliNotAvailable_DetectsMultipleDependencies_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - 3.12.0 - - - com.google.guava - guava - 31.1-jre - - - junit - junit - 4.13.2 - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(3); - - var groupIds = detectedComponents - .Select(dc => (dc.Component as MavenComponent)?.GroupId) - .ToList(); - - groupIds.Should().Contain("org.apache.commons"); - groupIds.Should().Contain("com.google.guava"); - groupIds.Should().Contain("junit"); - } - - [TestMethod] - public async Task WhenMavenCliSucceeds_UsesMvnCliResults_Async() - { - // Arrange - const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; - - this.SetupMvnCliSuccess(componentString); - - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => - { - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT"))); - }); - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility.ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.maven"); - mavenComponent.ArtifactId.Should().Be("maven-compat"); - mavenComponent.Version.Should().Be("3.6.1-SNAPSHOT"); - } - - [TestMethod] - public async Task WhenMavenCliSucceeds_PreservesTransitiveDependencies_Async() - { - // Arrange - const string rootComponent = "com.test:my-app:jar:1.0.0"; - const string directDependency = "org.apache.commons:commons-lang3:jar:3.12.0"; - const string transitiveDependency = "org.apache.commons:commons-text:jar:1.9"; - - var content = $@"{rootComponent} -\- {directDependency} - \- {transitiveDependency}"; - - this.SetupMvnCliSuccess(content); - - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => - { - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("com.test", "my-app", "1.0.0")), - isExplicitReferencedDependency: true); - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.apache.commons", "commons-lang3", "3.12.0")), - isExplicitReferencedDependency: true); - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.apache.commons", "commons-text", "1.9")), - isExplicitReferencedDependency: false, - parentComponentId: "org.apache.commons commons-lang3 3.12.0 - Maven"); - }); - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility.ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(3); - - // Verify dependency graph has the transitive relationship - var dependencyGraph = componentRecorder.GetDependencyGraphsByLocation().Values.First(); - dependencyGraph.Should().NotBeNull(); - - // Verify the transitive component is reachable from the direct dependency - var directComponentId = "org.apache.commons commons-lang3 3.12.0 - Maven"; - var transitiveComponentId = "org.apache.commons commons-text 1.9 - Maven"; - - var directDependencies = dependencyGraph.GetDependenciesForComponent(directComponentId); - directDependencies.Should().Contain( - transitiveComponentId, - "the transitive dependency should be a child of the direct dependency"); - } - - [TestMethod] - public async Task WhenMavenCliProducesNoOutput_FallsBackToStaticParsing_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // MvnCli runs but produces no bcde.mvndeps files (simulating failure) - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - 3.12.0 - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.commons"); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - mavenComponent.Version.Should().Be("3.12.0"); - } - - [TestMethod] - public async Task StaticParser_IgnoresDependenciesWithoutVersion_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - - - - com.google.guava - guava - 31.1-jre - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.ArtifactId.Should().Be("guava"); - } - - [TestMethod] - public async Task StaticParser_IgnoresDependenciesWithVersionRanges_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - [3.0,4.0) - - - com.google.guava - guava - 31.1-jre - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Version ranges with commas are ignored - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.ArtifactId.Should().Be("guava"); - } - - [TestMethod] - public async Task StaticParser_ResolvesPropertyVersions_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - 3.12.0 - - - - org.apache.commons - commons-lang3 - ${commons.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.Version.Should().Be("3.12.0"); - } - - [TestMethod] - public async Task StaticParser_IgnoresDependenciesWithUnresolvablePropertyVersions_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - ${undefined.property} - - - com.google.guava - guava - 31.1-jre - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Unresolvable property versions are ignored - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.ArtifactId.Should().Be("guava"); - } - - [TestMethod] - public async Task StaticParser_ResolvesVariableFromPreviousFile_Async() - { - // Arrange - Test case 1: Variable defined in parent POM, referenced in child POM - // Uses Maven's standard parent inheritance mechanism - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Setup fileUtilityService to allow parent POM resolution - this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) - .Returns(true); - - var parentPomContent = @" - - 4.0.0 - com.test - parent - 1.0.0 - pom - - 3.12.0 - -"; - - var childPomContent = @" - - 4.0.0 - - com.test - parent - 1.0.0 - - child - - - org.apache.commons - commons-lang3 - ${commons.version} - - -"; - - // Act - Parent processed first, then child - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", parentPomContent) - .WithFile("module/pom.xml", childPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.commons"); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - mavenComponent.Version.Should().Be("3.12.0"); - } - - [TestMethod] - public async Task StaticParser_BackfillsVariableFromLaterFile_Async() - { - // Arrange - Test case 2: Child processed first, parent processed second (deferred resolution) - // Tests that variables can be resolved even when parent is processed after child - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Setup fileUtilityService to allow parent POM resolution - this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) - .Returns(true); - - var childPomContent = @" - - 4.0.0 - - com.test - parent - 1.0.0 - - child - - - org.apache.commons - commons-lang3 - ${commons.version} - - -"; - - var parentPomContent = @" - - 4.0.0 - com.test - parent - 1.0.0 - pom - - 3.13.0 - -"; - - // Act - Child processed first (has unresolved variable), then parent - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("module/pom.xml", childPomContent) - .WithFile("pom.xml", parentPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.commons"); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - mavenComponent.Version.Should().Be("3.13.0"); - } - - [TestMethod] - public async Task StaticParser_LocalVariableDefinitionTakesPriority_Async() - { - // Arrange - Test case 3: Variable defined in both files, local definition has priority - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var firstPomContent = @" - - 4.0.0 - com.test - parent - 1.0.0 - - 3.11.0 - -"; - - var secondPomContent = @" - - 4.0.0 - com.test - child - 1.0.0 - - 3.14.0 - - - - org.apache.commons - commons-lang3 - ${commons.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", firstPomContent) - .WithFile("module/pom.xml", secondPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.commons"); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - - // Should use the local definition (3.14.0) instead of parent definition (3.11.0) - mavenComponent.Version.Should().Be("3.14.0"); - } - - [TestMethod] - public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Async() - { - // Arrange - Test processing order independence for ancestor priority - // This test simulates the scenario where files are processed out of natural hierarchy order: - // grandparent → child → parent (instead of grandparent → parent → child) - // The child should still get the parent's variable value, not the grandparent's - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Setup fileUtilityService to return false for directory traversal - // This forces the detector to use coordinate-based parent resolution - // which correctly handles out-of-order processing through deferred resolution - this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) - .Returns(false); - - var grandparentPomContent = @" - - 4.0.0 - com.test - grandparent - 1.0.0 - pom - - 3.10.0 - -"; - - var parentPomContent = @" - - 4.0.0 - - com.test - grandparent - 1.0.0 - - parent - pom - - 3.11.0 - -"; - - var childPomContent = @" - - 4.0.0 - - com.test - parent - 1.0.0 - - child - - - org.apache.commons - commons-lang3 - ${commons.version} - - -"; - - // Act - Process files in out-of-order sequence: grandparent → child → parent - // This tests that deferred variable resolution correctly handles ancestor priority - // regardless of processing order - // Use file structure matching the working tests: root pom.xml and nested paths - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", grandparentPomContent) // Processed 1st: defines commons.version = 3.10.0 - .WithFile("parent/child/pom.xml", childPomContent) // Processed 2nd: references ${commons.version} - .WithFile("parent/pom.xml", parentPomContent) // Processed 3rd: defines commons.version = 3.11.0 - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.commons"); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - - // Should resolve to parent's version (3.11.0) since parent is the closest ancestor to child, - // NOT grandparent's version (3.10.0), even though grandparent was processed first. - // This validates that deferred resolution correctly implements Maven's ancestor priority rules. - mavenComponent.Version.Should().Be("3.11.0"); - } - - [TestMethod] - public async Task WhenNoPomXmlFiles_ReturnsSuccessWithNoComponents_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - componentRecorder.GetDetectedComponents().Should().BeEmpty(); - } - - [TestMethod] - public async Task WhenPomXmlHasNoDependencies_ReturnsSuccessWithNoComponents_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - componentRecorder.GetDetectedComponents().Should().BeEmpty(); - } - - [TestMethod] - public async Task WhenDisableMvnCliTrue_UsesStaticParsing_Async() - { - // Arrange - DisableMvnCliEnvVar is true (explicitly disable Maven CLI) - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Disable MvnCli explicitly - this.envVarServiceMock.Setup(x => x.IsEnvironmentVariableValueTrue(MavenWithFallbackDetector.DisableMvnCliEnvVar)) - .Returns(true); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - 3.12.0 - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should detect component via static parsing even though Maven CLI is available - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.GroupId.Should().Be("org.apache.commons"); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - mavenComponent.Version.Should().Be("3.12.0"); - - // Verify MavenCLIExistsAsync was never called since we disabled MvnCli - this.mavenCommandServiceMock.Verify(x => x.MavenCLIExistsAsync(), Times.Never); - } - - [TestMethod] - public async Task WhenDisableMvnCliEnvVarIsFalse_UsesMvnCliNormally_Async() - { - // Arrange - Maven CLI is available and CD_MAVEN_DISABLE_CLI is false - const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; - const string validPomXml = @" - - 4.0.0 - com.test - test-app - 1.0.0 -"; - - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Setup GenerateDependenciesFileAsync to return success - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - // Setup file utility to return the deps file content - this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(true); - this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(componentString); - - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => - { - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT"))); - }); - - // Set up the environment variable to NOT disable MvnCli (false) - this.envVarServiceMock.Setup(x => x.IsEnvironmentVariableValueTrue(MavenWithFallbackDetector.DisableMvnCliEnvVar)) - .Returns(false); - - // Act - var (detectorResult, _) = await this.DetectorTestUtility - .WithFile("pom.xml", validPomXml) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should use MvnCli since CD_MAVEN_DISABLE_CLI is false - this.mavenCommandServiceMock.Verify(x => x.MavenCLIExistsAsync(), Times.Once); - - // Verify telemetry shows MvnCliOnly detection method - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("DetectionMethod"); - detectorResult.AdditionalTelemetryDetails["DetectionMethod"].Should().Be("MvnCliOnly"); - } - - [TestMethod] - public async Task WhenDisableMvnCliEnvVarNotSet_UsesMvnCliNormally_Async() - { - // Arrange - Maven CLI is available and CD_MAVEN_DISABLE_CLI is NOT set (doesn't exist) - const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; - - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Setup GenerateDependenciesFileAsync to return success - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - // Setup file utility to return the deps file content - this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(true); - this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(componentString); - - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => - { - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT"))); - }); - - // Explicitly set up the environment variable to NOT exist (returns false) - this.envVarServiceMock.Setup(x => x.IsEnvironmentVariableValueTrue(MavenWithFallbackDetector.DisableMvnCliEnvVar)) - .Returns(false); - - // Act - var (detectorResult, _) = await this.DetectorTestUtility - .WithFile("pom.xml", componentString) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should use MvnCli since CD_MAVEN_DISABLE_CLI doesn't exist - this.mavenCommandServiceMock.Verify(x => x.MavenCLIExistsAsync(), Times.Once); - - // Verify telemetry shows MvnCliOnly detection method - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("DetectionMethod"); - detectorResult.AdditionalTelemetryDetails["DetectionMethod"].Should().Be("MvnCliOnly"); - } - - [TestMethod] - public async Task WhenDisableMvnCliEnvVarSetToInvalidValue_UsesMvnCliNormally_Async() - { - // Arrange - Maven CLI is available and CD_MAVEN_DISABLE_CLI is set to an invalid (non-boolean) value - const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; - const string validPomXml = @" - - 4.0.0 - com.test - test-app - 1.0.0 -"; - - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Setup GenerateDependenciesFileAsync to return success - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - // Setup file utility to return the deps file content - this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(true); - this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(componentString); - - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => - { - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT"))); - }); - - // Set up the environment variable with an invalid value (IsEnvironmentVariableValueTrue returns false for non-"true" values) - this.envVarServiceMock.Setup(x => x.IsEnvironmentVariableValueTrue(MavenWithFallbackDetector.DisableMvnCliEnvVar)) - .Returns(false); - - // Act - var (detectorResult, _) = await this.DetectorTestUtility - .WithFile("pom.xml", validPomXml) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should use MvnCli since the env var value is invalid (bool.TryParse fails) - this.mavenCommandServiceMock.Verify(x => x.MavenCLIExistsAsync(), Times.Once); - - // Verify telemetry shows MvnCliOnly detection method - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("DetectionMethod"); - detectorResult.AdditionalTelemetryDetails["DetectionMethod"].Should().Be("MvnCliOnly"); - } - - [TestMethod] - public async Task WhenMvnCliSucceeds_NestedPomXmlsAreFilteredOut_Async() - { - // Arrange - Maven CLI is available and succeeds. - // In a multi-module project, only the root pom.xml should be processed by MvnCli. - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Setup GenerateDependenciesFileAsync to return success - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - // Setup file utility to return the deps file content - this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(true); - this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns("com.test:parent-app:jar:1.0.0"); - - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => - { - // MvnCli processes root and generates deps for all modules - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.projecta", "dep-from-root-a", "1.0.0"))); - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.projecta", "dep-from-nested-a", "1.0.0"))); - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("org.projecta", "dep-from-submodule-a", "1.0.0"))); - }); - - // Root pom.xml content (doesn't matter for this test, just needs to exist) - var rootPomContent = @" - - com.test - parent-app - 1.0.0 - pom -"; - - // Nested module pom.xml content - var moduleAPomContent = @" - - - com.test - parent-app - 1.0.0 - - module-a - - - org.apache.commons - commons-lang3 - 3.12.0 - - -"; - - // Act - Add root pom.xml first, then nested module pom.xml - // The root should get MvnCli bcde.mvndeps, nested should be filtered - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", rootPomContent) - .WithFile("module-a/pom.xml", moduleAPomContent) - .WithFile(BcdeMvnFileName, "com.test:parent-app:jar:1.0.0", [BcdeMvnFileName]) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should have components from MvnCli (parent + modules), not from static parsing - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(3); - - // MvnCli should only be called once for root pom.xml (nested filtered out) - this.mavenCommandServiceMock.Verify( - x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny()), - Times.Once); - } - - [TestMethod] - public async Task WhenMvnCliFailsCompletely_AllNestedPomXmlsAreRestoredForStaticParsing_Async() - { - // Arrange - Maven CLI is available but fails for all pom.xml files (e.g., auth error) - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // MvnCli runs but produces no bcde.mvndeps files (simulating complete failure) - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - // Root pom.xml content - var rootPomContent = @" - - com.test - parent-app - 1.0.0 - pom - - - org.springframework - spring-core - 5.3.0 - - -"; - - // Nested module pom.xml content - should be restored for static parsing - var moduleAPomContent = @" - - - com.test - parent-app - 1.0.0 - - module-a - - - org.apache.commons - commons-lang3 - 3.12.0 - - -"; - - // Another nested module - var moduleBPomContent = @" - - - com.test - parent-app - 1.0.0 - - module-b - - - com.google.guava - guava - 31.1-jre - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", rootPomContent) - .WithFile("module-a/pom.xml", moduleAPomContent) - .WithFile("module-b/pom.xml", moduleBPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // All pom.xml files should be processed via static parsing (nested poms restored) - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(3); // spring-core, commons-lang3, guava - - var artifactIds = detectedComponents - .Select(dc => (dc.Component as MavenComponent)?.ArtifactId) - .ToList(); - - artifactIds.Should().Contain("spring-core"); // From root pom.xml - artifactIds.Should().Contain("commons-lang3"); // From module-a/pom.xml (nested - restored) - artifactIds.Should().Contain("guava"); // From module-b/pom.xml (nested - restored) - } - - [TestMethod] - public async Task WhenMvnCliPartiallyFails_NestedPomXmlsRestoredOnlyForFailedDirectories_Async() - { - // Arrange - Maven CLI succeeds for projectA but fails for projectB. - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // MvnCli runs: projectA succeeds, projectB fails - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.Is(pr => pr.ComponentStream.Location.Contains("projectA")), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.Is(pr => pr.ComponentStream.Location.Contains("projectB")), It.IsAny())) - .ReturnsAsync(new MavenCliResult(false, "Maven CLI failed for projectB")); - - // Setup file utility: projectA has deps file, projectB does not - this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) - .Returns((string path) => path.Contains("projectA") && path.EndsWith(BcdeMvnFileName)); - this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.IsAny())) - .Returns((string path) => path.Contains("projectA") && path.EndsWith(BcdeMvnFileName) ? "com.projecta:app-a:jar:1.0.0\ncom.projecta:module-a1:jar:1.0.0" : string.Empty); - - // Setup parsing: projectA deps file parsing - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => - { - // Only register components when processing the projectA dependency file - if (pr.ComponentStream.Location.Contains("projectA") && pr.ComponentStream.Location.EndsWith(BcdeMvnFileName)) - { - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("com.projecta", "app-a", "1.0.0"))); - pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent(new MavenComponent("com.projecta", "module-a1", "1.0.0"))); - } - }); - - // ProjectA - MvnCli will succeed - var projectAPomContent = @" - - com.projecta - app-a - 1.0.0 -"; - - var projectAModulePomContent = @" - - - com.projecta - app-a - 1.0.0 - - module-a1 - - - org.projecta - dep-from-nested-a - 1.0.0 - - -"; - - // ProjectB - MvnCli will fail (no bcde.mvndeps generated) - var projectBPomContent = @" - - com.projectb - app-b - 2.0.0 - - - org.projectb - dep-from-root-b - 2.0.0 - - -"; - - var projectBModulePomContent = @" - - - com.projectb - app-b - 2.0.0 - - module-b1 - - - org.projectb - dep-from-nested-b - 2.0.0 - - -"; - - // Act - projectA gets bcde.mvndeps (via mock), projectB does not - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("projectA/pom.xml", projectAPomContent) - .WithFile("projectA/module-a1/pom.xml", projectAModulePomContent) - .WithFile("projectB/pom.xml", projectBPomContent) - .WithFile("projectB/module-b1/pom.xml", projectBModulePomContent) - .WithFile(BcdeMvnFileName, "com.projecta:app-a:jar:1.0.0", [BcdeMvnFileName], $"projectA/{BcdeMvnFileName}") - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - - // ProjectA: 2 components from MvnCli (app-a, module-a1) - // ProjectB: 2 components from static parsing (dep-from-root-b, dep-from-nested-b) - // Note: nested pom in projectA should NOT be statically parsed (MvnCli handled it) - // Note: nested pom in projectB SHOULD be statically parsed (MvnCli failed) - detectedComponents.Should().HaveCount(4); - - var artifactIds = detectedComponents - .Select(dc => (dc.Component as MavenComponent)?.ArtifactId) - .ToList(); - - // From MvnCli for projectA - artifactIds.Should().Contain("app-a"); - artifactIds.Should().Contain("module-a1"); - - // From static parsing for projectB (both root and nested restored) - artifactIds.Should().Contain("dep-from-root-b"); - artifactIds.Should().Contain("dep-from-nested-b"); - - // Should NOT contain dep-from-nested-a (that nested pom was handled by MvnCli, not static) - artifactIds.Should().NotContain("dep-from-nested-a"); - } - - [TestMethod] - public async Task WhenMvnCliFailsWithAuthError_LogsFailedEndpointAndSetsTelemetry_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Simulate Maven CLI failure with authentication error message containing endpoint URL - var authErrorMessage = "[ERROR] Failed to execute goal on project my-app: Could not resolve dependencies for project com.test:my-app:jar:1.0.0: " + - "Failed to collect dependencies at com.private:private-lib:jar:2.0.0: " + - "Failed to read artifact descriptor for com.private:private-lib:jar:2.0.0: " + - "Could not transfer artifact com.private:private-lib:pom:2.0.0 from/to private-repo (https://private-maven-repo.example.com/repository/maven-releases/): " + - "status code: 401, reason phrase: Unauthorized"; - - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(false, authErrorMessage)); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - 3.12.0 - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should fall back to static parsing and detect the component - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - var mavenComponent = detectedComponents.First().Component as MavenComponent; - mavenComponent.Should().NotBeNull(); - mavenComponent.ArtifactId.Should().Be("commons-lang3"); - - // Verify telemetry contains auth failure info - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FallbackReason"); - detectorResult.AdditionalTelemetryDetails["FallbackReason"].Should().Be("AuthenticationFailure"); - - // Verify telemetry contains the failed endpoint - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FailedEndpoints"); - detectorResult.AdditionalTelemetryDetails["FailedEndpoints"].Should().Contain("https://private-maven-repo.example.com"); - } - - [TestMethod] - public async Task WhenMvnCliFailsWithNonAuthError_SetsFallbackReasonToOther_Async() - { - // Arrange - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - // Simulate Maven CLI failure with a non-auth error (e.g., build error) - var nonAuthErrorMessage = "[ERROR] Failed to execute goal on project my-app: Compilation failure: " + - "src/main/java/com/test/App.java:[10,5] cannot find symbol"; - - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(false, nonAuthErrorMessage)); - - var pomXmlContent = @" - - 4.0.0 - com.test - my-app - 1.0.0 - - - org.apache.commons - commons-lang3 - 3.12.0 - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomXmlContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should fall back to static parsing - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); - - // Verify telemetry shows non-auth failure - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FallbackReason"); - detectorResult.AdditionalTelemetryDetails["FallbackReason"].Should().Be("OtherMvnCliFailure"); - - // Should NOT have FailedEndpoints since this wasn't an auth error - detectorResult.AdditionalTelemetryDetails.Should().NotContainKey("FailedEndpoints"); - } - - [TestMethod] - public async Task WhenAuthenticationFailsAndParentChildPropertiesUsed_MaintainsCorrectOrderingDuringFallback_Async() - { - // Arrange - const string parentPomContent = @" - - 4.0.0 - com.yammer.veritas - veritas-parent - 1.0-SNAPSHOT - pom - - - 3.18.0 - 4.11.0 - 2.21.1 - - - - veritas-api - - - - - yammer-artifacts - yammer-artifacts - https://pkgs.dev.azure.com/yammer/_packaging/yammer-artifacts/maven/v1 - - -"; - - const string childPomContent = @" - - - veritas-parent - com.yammer.veritas - 1.0-SNAPSHOT - - 4.0.0 - veritas-api - - - - org.apache.commons - commons-lang3 - ${commons-lang3.version} - - - org.mockito - mockito-core - ${mockito.version} - test - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - -"; - - // Setup Maven CLI to fail with authentication error (401 Unauthorized) - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(false, "status code: 401, reason phrase: Unauthorized")); - - // Act - Test with parent and child POM structure - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", parentPomContent) - .WithFile("veritas-api/pom.xml", childPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should fall back to static parsing after authentication failure - var detectedComponents = componentRecorder.GetDetectedComponents(); - - // Should detect all 3 property-based dependencies from child POM - // This verifies that parent properties were correctly resolved during fallback - detectedComponents.Should().HaveCount(3); - - var mavenComponents = detectedComponents.Where(x => x.Component is MavenComponent).ToList(); - mavenComponents.Should().HaveCount(3); - - // Verify each property-based dependency was resolved with correct version from parent - var commonsLang3 = mavenComponents.FirstOrDefault(x => - ((MavenComponent)x.Component).ArtifactId == "commons-lang3"); - commonsLang3.Should().NotBeNull(); - ((MavenComponent)commonsLang3.Component).Version.Should().Be("3.18.0"); - - var mockitoCore = mavenComponents.FirstOrDefault(x => - ((MavenComponent)x.Component).ArtifactId == "mockito-core"); - mockitoCore.Should().NotBeNull(); - ((MavenComponent)mockitoCore.Component).Version.Should().Be("4.11.0"); - - var jacksonCore = mavenComponents.FirstOrDefault(x => - ((MavenComponent)x.Component).ArtifactId == "jackson-core"); - jacksonCore.Should().NotBeNull(); - ((MavenComponent)jacksonCore.Component).Version.Should().Be("2.21.1"); - - // Verify telemetry shows authentication failure - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FallbackReason"); - detectorResult.AdditionalTelemetryDetails["FallbackReason"].Should().Be("AuthenticationFailure"); - - // Should have method showing mixed detection was used (CLI failed but fallback succeeded) - detectorResult.AdditionalTelemetryDetails.Should().ContainKey("DetectionMethod"); - detectorResult.AdditionalTelemetryDetails["DetectionMethod"].Should().Be("Mixed"); - } - - private void SetupMvnCliSuccess(string depsFileContent) - { - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); - - this.mavenCommandServiceMock.Setup(x => x.BcdeMvnDependencyFileName) - .Returns(BcdeMvnFileName); - - // Setup for 3-parameter version (used by MavenWithFallbackDetector) - this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new MavenCliResult(true, null)); - - // Setup file utility service to return the deps file content - // The detector reads the file from disk after CLI succeeds - this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(true); - this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) - .Returns(depsFileContent); - - // Use a valid minimal pom.xml - the actual content doesn't matter for MvnCli success path - // since components come from the generated dependency file - const string validPomXml = @" - - 4.0.0 - com.test - test-app - 1.0.0 -"; - - this.DetectorTestUtility.WithFile("pom.xml", validPomXml); - - // Add the dependency file that Maven CLI would have generated - this.DetectorTestUtility.WithFile(BcdeMvnFileName, depsFileContent, [BcdeMvnFileName]); - } - - [TestMethod] - public async Task VariableResolution_SiblingPomVariablesShouldNotBeUsed_Async() - { - // Arrange - Maven-compliant behavior: sibling POM variables should NOT be resolved - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Parent POM with a property - var parentPomContent = @" - - 4.0.0 - com.test - parent - 1.0.0 - pom - - 3.12.0 - -"; - - // Sibling POM with different variable - should NOT be used for resolution - var siblingPomContent = @" - - 4.0.0 - - com.test - parent - 1.0.0 - - com.test - sibling - - 31.1-jre - -"; - - // Target POM trying to use sibling's variable - should fail to resolve - var targetPomContent = @" - - 4.0.0 - - com.test - parent - 1.0.0 - - com.test - target - - - com.google.guava - guava - ${guava.version} - - - org.apache.commons - commons-lang3 - ${commons.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("parent/pom.xml", parentPomContent) - .WithFile("sibling/pom.xml", siblingPomContent) - .WithFile("target/pom.xml", targetPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - - // Should only resolve commons-lang3 (from parent), not guava (sibling variable) - detectedComponents.Should().HaveCount(1); - var component = detectedComponents.First().Component as MavenComponent; - component.Should().NotBeNull(); - component.GroupId.Should().Be("org.apache.commons"); - component.ArtifactId.Should().Be("commons-lang3"); - component.Version.Should().Be("3.12.0"); // Resolved from parent - } - - [TestMethod] - public async Task VariableResolution_ParentHierarchyVariablesShouldBeUsed_Async() - { - // Arrange - Maven-compliant behavior: parent/grandparent variables should be resolved - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Setup fileUtilityService to allow parent POM resolution - this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) - .Returns((string path) => path.EndsWith("pom.xml")); - - // Grandparent POM - var grandparentPomContent = @" - - 4.0.0 - com.test - grandparent - 1.0.0 - pom - - 4.13.2 - -"; - - // Parent POM - var parentPomContent = @" - - 4.0.0 - - com.test - grandparent - 1.0.0 - - com.test - parent - - 3.12.0 - -"; - - // Child POM using variables from parent hierarchy - var childPomContent = @" - - 4.0.0 - - com.test - parent - 1.0.0 - - com.test - child - - 31.1-jre - - - - org.apache.commons - commons-lang3 - ${commons.version} - - - junit - junit - ${junit.version} - - - com.google.guava - guava - ${guava.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("grandparent/pom.xml", grandparentPomContent) - .WithFile("parent/pom.xml", parentPomContent) - .WithFile("child/pom.xml", childPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(3); - - var components = detectedComponents.Select(x => x.Component as MavenComponent).ToList(); - - // Should resolve commons-lang3 from parent - var commonsComponent = components.FirstOrDefault(c => c.ArtifactId == "commons-lang3"); - commonsComponent.Should().NotBeNull(); - commonsComponent.Version.Should().Be("3.12.0"); - - // Should resolve junit from grandparent - var junitComponent = components.FirstOrDefault(c => c.ArtifactId == "junit"); - junitComponent.Should().NotBeNull(); - junitComponent.Version.Should().Be("4.13.2"); - - // Should resolve guava from current POM - var guavaComponent = components.FirstOrDefault(c => c.ArtifactId == "guava"); - guavaComponent.Should().NotBeNull(); - guavaComponent.Version.Should().Be("31.1-jre"); - } - - [TestMethod] - public async Task VariableResolution_CircularReferencesShouldBeHandled_Async() - { - // Arrange - Test circular reference detection and prevention - // NOTE: Maven CLI fails completely when circular references exist (even if unused) - // Our detector should fall back to static parsing and handle circular refs gracefully - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // POM with circular references (Maven CLI would fail entirely) - var pomContent = @" - - 4.0.0 - com.test - circular-test - 1.0.0 - - - ${another.version} - ${circular.version} - - - 1.0.0 - - - - - org.apache.commons - commons-lang3 - ${circular.version} - - - - com.google.guava - guava - ${valid.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - - // Should only resolve guava (valid version), skip commons-lang3 (circular reference) - // This matches our detector's behavior: graceful handling vs Maven CLI's complete failure - detectedComponents.Should().HaveCount(1); - var component = detectedComponents.First().Component as MavenComponent; - component.Should().NotBeNull(); - component.GroupId.Should().Be("com.google.guava"); - component.ArtifactId.Should().Be("guava"); - component.Version.Should().Be("1.0.0"); - } - - [TestMethod] - public async Task VariableResolution_DeferredResolutionSecondPass_Async() - { - // Arrange - Test second pass resolution for components with initially unresolved variables - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Setup fileUtilityService to ONLY return true for the specific parent pom.xml file - // This prevents directory-based parent resolution from finding a "fake" parent - this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) - .Returns((string path) => path.Contains("parent") && path.EndsWith("pom.xml")); - - // Parent POM loaded after child (simulation) - var parentPomContent = @" - - 4.0.0 - com.test - parent - 1.0.0 - pom - - 2.0.0 - -"; - - // Child POM with dependency on parent variable - var childPomContent = @" - - 4.0.0 - - com.test - parent - 1.0.0 - - com.test - child - - - org.apache.commons - commons-lang3 - ${deferred.version} - - -"; - - // Act - Files are processed in order that might cause deferred resolution - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("child/pom.xml", childPomContent) - .WithFile("parent/pom.xml", parentPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(1); - - var component = detectedComponents.First().Component as MavenComponent; - component.Should().NotBeNull(); - component.GroupId.Should().Be("org.apache.commons"); - component.ArtifactId.Should().Be("commons-lang3"); - component.Version.Should().Be("2.0.0"); // Should be resolved in second pass - } - - [TestMethod] - public async Task VariableResolution_MavenBuiltInVariablesShouldWork_Async() - { - // Arrange - Test Maven built-in variables like ${project.version} - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Setup fileUtilityService (not needed for this test but required for consistency) - this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) - .Returns(false); - - var pomContent = @" - - 4.0.0 - com.test - maven-builtin-test - 1.5.0 - - - com.test - internal-dependency - ${project.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(1); - - var component = detectedComponents.First().Component as MavenComponent; - component.Should().NotBeNull(); - component.GroupId.Should().Be("com.test"); - component.ArtifactId.Should().Be("internal-dependency"); - component.Version.Should().Be("1.5.0"); // Should resolve ${project.version} - } - - [TestMethod] - public async Task VariableResolution_SiblingPomAsParentShouldBeUsed_Async() - { - // Arrange - Test case where a "sibling" POM is referenced as parent - // In this scenario, the sibling POM variables SHOULD be used for resolution - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // "Sibling" POM that will be referenced as parent by another module - var parentModulePomContent = @" - - 4.0.0 - com.test - shared-parent - 1.0.0 - pom - - 2.13.4 - 4.13.2 - -"; - - // Another "sibling" POM that references the first as its parent - var childModulePomContent = @" - - 4.0.0 - - com.test - shared-parent - 1.0.0 - ../shared-parent/pom.xml - - com.test - consumer-module - - 3.0.0 - - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - - junit - junit - ${junit.version} - - - org.apache.commons - commons-lang3 - ${local.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("shared-parent/pom.xml", parentModulePomContent) - .WithFile("consumer-module/pom.xml", childModulePomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(3); - - var components = detectedComponents.Select(x => x.Component as MavenComponent).ToList(); - - // Should resolve jackson-core from parent (shared-parent) - var jacksonComponent = components.FirstOrDefault(c => c.ArtifactId == "jackson-core"); - jacksonComponent.Should().NotBeNull(); - jacksonComponent.Version.Should().Be("2.13.4"); // From parent POM - - // Should resolve junit from parent (shared-parent) - var junitComponent = components.FirstOrDefault(c => c.ArtifactId == "junit"); - junitComponent.Should().NotBeNull(); - junitComponent.Version.Should().Be("4.13.2"); // From parent POM - - // Should resolve commons-lang3 from local properties - var commonsComponent = components.FirstOrDefault(c => c.ArtifactId == "commons-lang3"); - commonsComponent.Should().NotBeNull(); - commonsComponent.Version.Should().Be("3.0.0"); // From local POM - } - - [TestMethod] - public async Task VariableResolution_UnreferencedSiblingPomShouldNotBeUsed_Async() - { - // Arrange - Contrast test: sibling POM NOT referenced as parent should be ignored - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // True parent POM - var actualParentPomContent = @" - - 4.0.0 - com.test - actual-parent - 1.0.0 - pom - - 3.12.0 - -"; - - // Sibling POM with different variables - should NOT be used - var siblingPomContent = @" - - 4.0.0 - - com.test - actual-parent - 1.0.0 - - com.test - sibling-module - - 2.13.4 - -"; - - // Child POM that references actual-parent, not sibling - var childPomContent = @" - - 4.0.0 - - com.test - actual-parent - 1.0.0 - - com.test - child-module - - - org.apache.commons - commons-lang3 - ${commons.version} - - - com.fasterxml.jackson.core - jackson-core - ${jackson.version} - - -"; - - // Act - var (detectorResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("actual-parent/pom.xml", actualParentPomContent) - .WithFile("sibling-module/pom.xml", siblingPomContent) - .WithFile("child-module/pom.xml", childPomContent) - .ExecuteDetectorAsync(); - - // Assert - detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - - // Should only resolve commons-lang3 (from actual parent), not jackson-core (sibling variable) - detectedComponents.Should().HaveCount(1); - - var component = detectedComponents.First().Component as MavenComponent; - component.Should().NotBeNull(); - component.GroupId.Should().Be("org.apache.commons"); - component.ArtifactId.Should().Be("commons-lang3"); - component.Version.Should().Be("3.12.0"); // Resolved from actual parent - } - - [TestMethod] - public async Task TestSmartLoopPreventionInDirectoryTraversal() - { - // Arrange - Setup Maven CLI to fail so we use static parser - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Create a child POM that references a parent that won't be found in directory traversal - var childPomContent = """ - - 4.0.0 - - com.example - parent-project - 1.0.0 - - com.example - child-project - ${parent.version} - - - junit - junit - 4.13.2 - - - - """; - - // Act & Assert - This should not hang or throw due to infinite directory traversal - var (scanResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", childPomContent) - .ExecuteDetectorAsync(); - - // Should complete successfully without infinite loops - scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should register the dependency with direct version (junit) - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(1); - - var component = detectedComponents.First().Component as MavenComponent; - component.Should().NotBeNull(); - component.GroupId.Should().Be("junit"); - component.ArtifactId.Should().Be("junit"); - component.Version.Should().Be("4.13.2"); - } - - [TestMethod] - public async Task TestDeepDirectoryTraversalWithoutInfiniteLoop() - { - // Arrange - Setup to use static parser only - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Create a child POM in a deep directory structure - var childPomContent = """ - - 4.0.0 - - com.example - deep-parent - 1.0.0 - - deep-child - - - org.junit.jupiter - junit-jupiter - 5.8.2 - - - - """; - - // Act - Should handle deep traversal without issues - var (scanResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("very/deep/nested/directory/structure/project/pom.xml", childPomContent) - .ExecuteDetectorAsync(); - - // Assert - Should complete successfully - scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(1); - - var component = detectedComponents.First().Component as MavenComponent; - component.GroupId.Should().Be("org.junit.jupiter"); - component.ArtifactId.Should().Be("junit-jupiter"); - component.Version.Should().Be("5.8.2"); - } - - [TestMethod] - public async Task TestCircularDirectoryReferenceDetection() - { - // Arrange - Setup static parsing mode - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Create a simple POM that tests directory traversal robustness - var pomContent = """ - - 4.0.0 - com.example - circular-test - 1.0.0 - - - com.fasterxml.jackson.core - jackson-core - 2.13.3 - - - - """; - - // Act - Should not hang on edge cases - var (scanResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("subdir/pom.xml", pomContent) - .ExecuteDetectorAsync(); - - // Assert - Should complete without infinite loops - scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(1); - - var component = detectedComponents.First().Component as MavenComponent; - component.GroupId.Should().Be("com.fasterxml.jackson.core"); - component.ArtifactId.Should().Be("jackson-core"); - component.Version.Should().Be("2.13.3"); - } - - [TestMethod] - public async Task TestFileSystemRootReachedScenario() - { - // Arrange - Setup static parsing mode - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Create a simple POM - var pomContent = """ - - 4.0.0 - com.example - root-test - 1.0.0 - - - org.springframework - spring-core - 5.3.21 - - - - """; - - // Act - Should handle file system root gracefully - var (scanResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("pom.xml", pomContent) - .ExecuteDetectorAsync(); - - // Assert - Should complete without issues - scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(1); - - var component = detectedComponents.First().Component as MavenComponent; - component.GroupId.Should().Be("org.springframework"); - component.ArtifactId.Should().Be("spring-core"); - component.Version.Should().Be("5.3.21"); - } - - [TestMethod] - public async Task TestPerformanceOfSmartLoopPrevention() - { - // Arrange - Setup static parsing mode - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(false); - - // Create multiple POMs that will trigger directory traversal - // This validates that smart loop prevention completes without hanging - // (algorithmic validation rather than timing-based) - var pom1Content = CreatePomWithParentReference("project1", "parent-project", "1.0.0"); - var pom2Content = CreatePomWithParentReference("project2", "parent-project", "1.0.0"); - var pom3Content = CreatePomWithParentReference("project3", "parent-project", "1.0.0"); - - // Act - Process multiple POMs (test validates completion, not timing) - var (scanResult, componentRecorder) = await this.DetectorTestUtility - .WithFile("project1/pom.xml", pom1Content) - .WithFile("project2/pom.xml", pom2Content) - .WithFile("project3/pom.xml", pom3Content) - .ExecuteDetectorAsync(); - - // Assert - Should complete without hanging and produce correct results - // The test passing validates that smart loop prevention works algorithmically - // (if it had infinite loops, the test would timeout/hang rather than fail assertions) - scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Should have detected direct dependencies from all 3 POMs - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().HaveCount(3); - } - - private static string CreatePomWithParentReference(string artifactId, string parentArtifactId, string parentVersion) - { - return $""" - - 4.0.0 - - com.example - {parentArtifactId} - {parentVersion} - - {artifactId} - - - com.example - {artifactId}-dependency - 2.0.0 - - - - """; - } -} diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/MvnCliDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/MvnCliDetectorTests.cs index bb25c5cf3..3847b9ba0 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/MvnCliDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/MvnCliDetectorTests.cs @@ -1,16 +1,14 @@ #nullable disable namespace Microsoft.ComponentDetection.Detectors.Tests; -using System; -using System.Collections.Generic; using System.Linq; +using System.Threading; using System.Threading.Tasks; using AwesomeAssertions; using Microsoft.ComponentDetection.Contracts; using Microsoft.ComponentDetection.Contracts.Internal; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.ComponentDetection.Detectors.Maven; -using Microsoft.ComponentDetection.Detectors.Tests.Utilities; using Microsoft.ComponentDetection.TestsUtilities; using Microsoft.VisualStudio.TestTools.UnitTesting; using Moq; @@ -20,181 +18,1617 @@ namespace Microsoft.ComponentDetection.Detectors.Tests; [TestCategory("Governance/ComponentDetection")] public class MvnCliDetectorTests : BaseDetectorTest { + /// + /// The shared deps filename used by MavenCommandService. + /// Must match BcdeMvnDependencyFileName from MavenCommandService. + /// + private const string BcdeMvnFileName = "bcde.mvndeps"; + private readonly Mock mavenCommandServiceMock; + private readonly Mock envVarServiceMock; + private readonly Mock fileUtilityServiceMock; public MvnCliDetectorTests() { this.mavenCommandServiceMock = new Mock(); + this.mavenCommandServiceMock.Setup(x => x.BcdeMvnDependencyFileName).Returns(BcdeMvnFileName); + + // Default setup for GenerateDependenciesFileAsync + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(true, null)); + this.DetectorTestUtility.AddServiceMock(this.mavenCommandServiceMock); + + this.envVarServiceMock = new Mock(); + this.DetectorTestUtility.AddServiceMock(this.envVarServiceMock); + + this.fileUtilityServiceMock = new Mock(); + this.DetectorTestUtility.AddServiceMock(this.fileUtilityServiceMock); } [TestMethod] - public async Task IfMavenIsNotAvailableThenExitDetectorGracefullyAsync() + public async Task WhenMavenCliNotAvailable_FallsBackToStaticParsing_Async() { + // Arrange this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) .ReturnsAsync(false); + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + 3.12.0 + + +"; + + // Act var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) .ExecuteDetectorAsync(); - componentRecorder.GetDetectedComponents().Should().BeEmpty(); + // Assert detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.12.0"); } [TestMethod] - public async Task MavenAvailableHappyPathAsync() + public async Task WhenMavenCliNotAvailable_DetectsMultipleDependencies_Async() { - const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); - this.MvnCliHappyPath(content: componentString); - this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) - .Callback((ProcessRequest pr) => pr.SingleFileComponentRecorder.RegisterUsage(new DetectedComponent(new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT")))); - var (detectorResult, componentRecorder) = await this.DetectorTestUtility.ExecuteDetectorAsync(); + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + 3.12.0 + + + com.google.guava + guava + 31.1-jre + + + junit + junit + 4.13.2 + + +"; - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().ContainSingle(); + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - var mavenComponent = detectedComponents.First().Component as MavenComponent; - var splitComponent = componentString.Split(':'); - splitComponent.Should().HaveElementAt(0, mavenComponent.GroupId); - splitComponent.Should().HaveElementAt(1, mavenComponent.ArtifactId); - splitComponent.Should().HaveElementAt(3, mavenComponent.Version); - mavenComponent.Type.Should().Be(ComponentType.Maven); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + + var groupIds = detectedComponents + .Select(dc => (dc.Component as MavenComponent)?.GroupId) + .ToList(); + + groupIds.Should().Contain("org.apache.commons"); + groupIds.Should().Contain("com.google.guava"); + groupIds.Should().Contain("junit"); } [TestMethod] - public async Task MavenCli_FileObservableIsNotPresent_DetectionShouldNotFailAsync() + public async Task WhenMavenCliSucceeds_UsesMvnCliResults_Async() { - this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) - .ReturnsAsync(true); + // Arrange + const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; - Func action = async () => await this.DetectorTestUtility.ExecuteDetectorAsync(); + this.SetupMvnCliSuccess(componentString); - await action.Should().NotThrowAsync(); + this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) + .Callback((ProcessRequest pr) => + { + pr.SingleFileComponentRecorder.RegisterUsage( + new DetectedComponent(new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT"))); + }); + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility.ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.maven"); + mavenComponent.ArtifactId.Should().Be("maven-compat"); + mavenComponent.Version.Should().Be("3.6.1-SNAPSHOT"); } [TestMethod] - public async Task MavenRootsAsync() + public async Task WhenMavenCliSucceeds_PreservesTransitiveDependencies_Async() { - const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; - const string childComponentString = "org.apache.maven:maven-compat-child:jar:3.6.1-SNAPSHOT"; + // Arrange + const string rootComponent = "com.test:my-app:jar:1.0.0"; + const string directDependency = "org.apache.commons:commons-lang3:jar:3.12.0"; + const string transitiveDependency = "org.apache.commons:commons-text:jar:1.9"; - var content = $@"com.bcde.test:top-level:jar:1.0.0{Environment.NewLine}\- {componentString}{Environment.NewLine} \- {childComponentString}"; + var content = $@"{rootComponent} +\- {directDependency} + \- {transitiveDependency}"; + + this.SetupMvnCliSuccess(content); - this.MvnCliHappyPath(content); this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) .Callback((ProcessRequest pr) => { pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent( - new MavenComponent("com.bcde.test", "top-levelt", "1.0.0")), + new DetectedComponent(new MavenComponent("com.test", "my-app", "1.0.0")), isExplicitReferencedDependency: true); pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent( - new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT")), + new DetectedComponent(new MavenComponent("org.apache.commons", "commons-lang3", "3.12.0")), isExplicitReferencedDependency: true); pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent( - new MavenComponent("org.apache.maven", "maven-compat-child", "3.6.1-SNAPSHOT")), + new DetectedComponent(new MavenComponent("org.apache.commons", "commons-text", "1.9")), isExplicitReferencedDependency: false, - parentComponentId: "org.apache.maven maven-compat 3.6.1-SNAPSHOT - Maven"); + parentComponentId: "org.apache.commons commons-lang3 3.12.0 - Maven"); }); + // Act var (detectorResult, componentRecorder) = await this.DetectorTestUtility.ExecuteDetectorAsync(); + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + var detectedComponents = componentRecorder.GetDetectedComponents(); detectedComponents.Should().HaveCount(3); + + // Verify dependency graph has the transitive relationship + var dependencyGraph = componentRecorder.GetDependencyGraphsByLocation().Values.First(); + dependencyGraph.Should().NotBeNull(); + + // Verify the transitive component is reachable from the direct dependency + var directComponentId = "org.apache.commons commons-lang3 3.12.0 - Maven"; + var transitiveComponentId = "org.apache.commons commons-text 1.9 - Maven"; + + var directDependencies = dependencyGraph.GetDependenciesForComponent(directComponentId); + directDependencies.Should().Contain( + transitiveComponentId, + "the transitive dependency should be a child of the direct dependency"); + } + + [TestMethod] + public async Task WhenMavenCliProducesNoOutput_FallsBackToStaticParsing_Async() + { + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + // MvnCli runs but produces no bcde.mvndeps files (simulating failure) + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(true, null)); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + 3.12.0 + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - var splitComponent = componentString.Split(':'); - var splitChildComponent = childComponentString.Split(':'); + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); - var mavenComponent = detectedComponents.FirstOrDefault(x => (x.Component as MavenComponent).ArtifactId == splitChildComponent[1]); + var mavenComponent = detectedComponents.First().Component as MavenComponent; mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.12.0"); + } - componentRecorder.AssertAllExplicitlyReferencedComponents( - mavenComponent.Component.Id, - parentComponent => parentComponent.ArtifactId == splitComponent[1]); + [TestMethod] + public async Task StaticParser_IgnoresDependenciesWithoutVersion_Async() + { + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + + + + com.google.guava + guava + 31.1-jre + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.ArtifactId.Should().Be("guava"); } [TestMethod] - public async Task MavenDependencyGraphAsync() + public async Task StaticParser_IgnoresDependenciesWithVersionRanges_Async() { - const string explicitReferencedComponent = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); - const string intermediateParentComponent = "org.apache.maven:maven-compat-parent:jar:3.6.1-SNAPSHOT"; + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + [3.0,4.0) + + + com.google.guava + guava + 31.1-jre + + +"; - const string leafComponentString = "org.apache.maven:maven-compat-child:jar:3.6.1-SNAPSHOT"; + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); - var content = $@"com.bcde.test:top-level:jar:1.0.0 -\- {explicitReferencedComponent} - \- {intermediateParentComponent} - \-{leafComponentString}"; + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - const string explicitReferencedComponentId = "org.apache.maven maven-compat 3.6.1-SNAPSHOT - Maven"; - const string intermediateParentComponentId = "org.apache.maven maven-compat-parent 3.6.1-SNAPSHOT - Maven"; - const string leafComponentId = "org.apache.maven maven-compat-child 3.6.1-SNAPSHOT - Maven"; + // Version ranges with commas are ignored + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.ArtifactId.Should().Be("guava"); + } + + [TestMethod] + public async Task StaticParser_ResolvesPropertyVersions_Async() + { + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + 3.12.0 + + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.Version.Should().Be("3.12.0"); + } + + [TestMethod] + public async Task StaticParser_IgnoresDependenciesWithUnresolvablePropertyVersions_Async() + { + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + ${undefined.property} + + + com.google.guava + guava + 31.1-jre + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Unresolvable property versions are ignored + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.ArtifactId.Should().Be("guava"); + } + + [TestMethod] + public async Task StaticParser_ResolvesVariableFromPreviousFile_Async() + { + // Arrange - Test case 1: Variable defined in parent POM, referenced in child POM + // Uses Maven's standard parent inheritance mechanism + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) + .Returns(true); + + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 3.12.0 + +"; + + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + child + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act - Parent processed first, then child + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", parentPomContent) + .WithFile("module/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.12.0"); + } + + [TestMethod] + public async Task StaticParser_BackfillsVariableFromLaterFile_Async() + { + // Arrange - Test case 2: Child processed first, parent processed second (deferred resolution) + // Tests that variables can be resolved even when parent is processed after child + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith("pom.xml") && !s.Contains("module")))) + .Returns(true); + + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + child + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 3.13.0 + +"; + + // Act - Child processed first (has unresolved variable), then parent + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("module/pom.xml", childPomContent) + .WithFile("pom.xml", parentPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.13.0"); + } + + [TestMethod] + public async Task StaticParser_LocalVariableDefinitionTakesPriority_Async() + { + // Arrange - Test case 3: Variable defined in both files, local definition has priority + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var firstPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + + 3.11.0 + +"; + + var secondPomContent = @" + + 4.0.0 + com.test + child + 1.0.0 + + 3.14.0 + + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", firstPomContent) + .WithFile("module/pom.xml", secondPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + + // Should use the local definition (3.14.0) instead of parent definition (3.11.0) + mavenComponent.Version.Should().Be("3.14.0"); + } + + [TestMethod] + public async Task StaticParser_OutOfOrderProcessing_RespectsAncestorPriority_Async() + { + // Arrange - Test processing order independence for ancestor priority + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to return false for directory traversal + // This forces the detector to use coordinate-based parent resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns(false); + + var grandparentPomContent = @" + + 4.0.0 + com.test + grandparent + 1.0.0 + pom + + 3.10.0 + +"; + + var parentPomContent = @" + + 4.0.0 + + com.test + grandparent + 1.0.0 + + parent + pom + + 3.11.0 + +"; + + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + child + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act - Process files in out-of-order sequence: grandparent → child → parent + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", grandparentPomContent) + .WithFile("parent/child/pom.xml", childPomContent) + .WithFile("parent/pom.xml", parentPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + + // Should resolve to parent's version (3.11.0) since parent is the closest ancestor to child, + // NOT grandparent's version (3.10.0), even though grandparent was processed first. + mavenComponent.Version.Should().Be("3.11.0"); + } + + [TestMethod] + public async Task WhenNoPomXmlFiles_ReturnsSuccessWithNoComponents_Async() + { + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + componentRecorder.GetDetectedComponents().Should().BeEmpty(); + } + + [TestMethod] + public async Task WhenPomXmlHasNoDependencies_ReturnsSuccessWithNoComponents_Async() + { + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + componentRecorder.GetDetectedComponents().Should().BeEmpty(); + } + + [TestMethod] + public async Task WhenDisableMvnCliTrue_UsesStaticParsing_Async() + { + // Arrange - DisableMvnCliEnvVar is true (explicitly disable Maven CLI) + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + // Disable MvnCli explicitly + this.envVarServiceMock.Setup(x => x.IsEnvironmentVariableValueTrue(MvnCliComponentDetector.DisableMvnCliEnvVar)) + .Returns(true); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + 3.12.0 + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should detect component via static parsing even though Maven CLI is available + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.GroupId.Should().Be("org.apache.commons"); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + mavenComponent.Version.Should().Be("3.12.0"); + + // Verify MavenCLIExistsAsync was never called since we disabled MvnCli + this.mavenCommandServiceMock.Verify(x => x.MavenCLIExistsAsync(), Times.Never); + } + + [TestMethod] + public async Task WhenDisableMvnCliEnvVarIsFalse_UsesMvnCliNormally_Async() + { + // Arrange - Maven CLI is available and CD_MAVEN_DISABLE_CLI is false + const string componentString = "org.apache.maven:maven-compat:jar:3.6.1-SNAPSHOT"; + const string validPomXml = @" + + 4.0.0 + com.test + test-app + 1.0.0 +"; + + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + // Setup GenerateDependenciesFileAsync to return success + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(true, null)); + + // Setup file utility to return the deps file content + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) + .Returns(true); + this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) + .Returns(componentString); - this.MvnCliHappyPath(content); this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) .Callback((ProcessRequest pr) => { pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent( - new MavenComponent("com.bcde.test", "top-levelt", "1.0.0")), - isExplicitReferencedDependency: true); + new DetectedComponent(new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT"))); + }); + + // Set up the environment variable to NOT disable MvnCli (false) + this.envVarServiceMock.Setup(x => x.IsEnvironmentVariableValueTrue(MvnCliComponentDetector.DisableMvnCliEnvVar)) + .Returns(false); + + // Act + var (detectorResult, _) = await this.DetectorTestUtility + .WithFile("pom.xml", validPomXml) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should use MvnCli since CD_MAVEN_DISABLE_CLI is false + this.mavenCommandServiceMock.Verify(x => x.MavenCLIExistsAsync(), Times.Once); + + // Verify telemetry shows MvnCliOnly detection method + detectorResult.AdditionalTelemetryDetails.Should().ContainKey("DetectionMethod"); + detectorResult.AdditionalTelemetryDetails["DetectionMethod"].Should().Be("MvnCliOnly"); + } + + [TestMethod] + public async Task WhenMvnCliSucceeds_NestedPomXmlsAreFilteredOut_Async() + { + // Arrange - Maven CLI is available and succeeds. + // In a multi-module project, only the root pom.xml should be processed by MvnCli. + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + // Setup GenerateDependenciesFileAsync to return success + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(true, null)); + + // Setup file utility to return the deps file content + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) + .Returns(true); + this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) + .Returns("com.test:parent-app:jar:1.0.0"); + + this.mavenCommandServiceMock.Setup(x => x.ParseDependenciesFile(It.IsAny())) + .Callback((ProcessRequest pr) => + { + // MvnCli processes root and generates deps for all modules pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent( - new MavenComponent("org.apache.maven", "maven-compat", "3.6.1-SNAPSHOT")), - isExplicitReferencedDependency: true); + new DetectedComponent(new MavenComponent("org.projecta", "dep-from-root-a", "1.0.0"))); pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent( - new MavenComponent("org.apache.maven", "maven-compat-parent", "3.6.1-SNAPSHOT")), - isExplicitReferencedDependency: false, - parentComponentId: "org.apache.maven maven-compat 3.6.1-SNAPSHOT - Maven"); + new DetectedComponent(new MavenComponent("org.projecta", "dep-from-nested-a", "1.0.0"))); pr.SingleFileComponentRecorder.RegisterUsage( - new DetectedComponent( - new MavenComponent("org.apache.maven", "maven-compat-child", "3.6.1-SNAPSHOT")), - isExplicitReferencedDependency: false, - parentComponentId: "org.apache.maven maven-compat-parent 3.6.1-SNAPSHOT - Maven"); + new DetectedComponent(new MavenComponent("org.projecta", "dep-from-submodule-a", "1.0.0"))); }); - var (detectorResult, componentRecorder) = await this.DetectorTestUtility.ExecuteDetectorAsync(); + // Root pom.xml content (doesn't matter for this test, just needs to exist) + var rootPomContent = @" + + com.test + parent-app + 1.0.0 + pom +"; - componentRecorder.GetDetectedComponents().Should().HaveCount(4); + // Nested module pom.xml content + var moduleAPomContent = @" + + + com.test + parent-app + 1.0.0 + + module-a + + + org.apache.commons + commons-lang3 + 3.12.0 + + +"; + + // Act - Add root pom.xml first, then nested module pom.xml + // The root should get MvnCli bcde.mvndeps, nested should be filtered + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", rootPomContent) + .WithFile("module-a/pom.xml", moduleAPomContent) + .WithFile(BcdeMvnFileName, "com.test:parent-app:jar:1.0.0", [BcdeMvnFileName]) + .ExecuteDetectorAsync(); + + // Assert detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); - // There is only one graph - var dependencyGraph = componentRecorder.GetDependencyGraphsByLocation().Values.First(); + // Should have components from MvnCli (parent + modules), not from static parsing + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + + // MvnCli should only be called once for root pom.xml (nested filtered out) + this.mavenCommandServiceMock.Verify( + x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny()), + Times.Once); + } + + [TestMethod] + public async Task WhenMvnCliFailsCompletely_AllNestedPomXmlsAreRestoredForStaticParsing_Async() + { + // Arrange - Maven CLI is available but fails for all pom.xml files (e.g., auth error) + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + // MvnCli runs but produces no bcde.mvndeps files (simulating complete failure) + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(true, null)); - dependencyGraph.GetDependenciesForComponent(explicitReferencedComponentId).Should().ContainSingle(); - dependencyGraph.GetDependenciesForComponent(explicitReferencedComponentId).Should().Contain(intermediateParentComponentId); - dependencyGraph.IsComponentExplicitlyReferenced(explicitReferencedComponentId).Should().BeTrue(); + // Root pom.xml content + var rootPomContent = @" + + com.test + parent-app + 1.0.0 + pom + + + org.springframework + spring-core + 5.3.0 + + +"; - dependencyGraph.GetDependenciesForComponent(intermediateParentComponentId).Should().ContainSingle(); - dependencyGraph.GetDependenciesForComponent(intermediateParentComponentId).Should().Contain(leafComponentId); - dependencyGraph.IsComponentExplicitlyReferenced(intermediateParentComponentId).Should().BeFalse(); + // Nested module pom.xml content - should be restored for static parsing + var moduleAPomContent = @" + + + com.test + parent-app + 1.0.0 + + module-a + + + org.apache.commons + commons-lang3 + 3.12.0 + + +"; - dependencyGraph.GetDependenciesForComponent(leafComponentId).Should().BeEmpty(); - dependencyGraph.IsComponentExplicitlyReferenced(leafComponentId).Should().BeFalse(); + // Another nested module + var moduleBPomContent = @" + + + com.test + parent-app + 1.0.0 + + module-b + + + com.google.guava + guava + 31.1-jre + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", rootPomContent) + .WithFile("module-a/pom.xml", moduleAPomContent) + .WithFile("module-b/pom.xml", moduleBPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // All pom.xml files should be processed via static parsing (nested poms restored) + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); // spring-core, commons-lang3, guava + + var artifactIds = detectedComponents + .Select(dc => (dc.Component as MavenComponent)?.ArtifactId) + .ToList(); + + artifactIds.Should().Contain("spring-core"); // From root pom.xml + artifactIds.Should().Contain("commons-lang3"); // From module-a/pom.xml (nested - restored) + artifactIds.Should().Contain("guava"); // From module-b/pom.xml (nested - restored) } - protected bool ShouldBeEquivalentTo(IEnumerable result, IEnumerable expected) + [TestMethod] + public async Task WhenMvnCliFailsWithAuthError_LogsFailedEndpointAndSetsTelemetry_Async() { - result.Should().BeEquivalentTo(expected); - return true; + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + // Simulate Maven CLI failure with authentication error message containing endpoint URL + // The URL intentionally contains userinfo (credentials) and a path so we can verify they are stripped. + var authErrorMessage = "[ERROR] Failed to execute goal on project my-app: Could not resolve dependencies for project com.test:my-app:jar:1.0.0: " + + "Failed to collect dependencies at com.private:private-lib:jar:2.0.0: " + + "Failed to read artifact descriptor for com.private:private-lib:jar:2.0.0: " + + "Could not transfer artifact com.private:private-lib:pom:2.0.0 from/to private-repo (https://user:s3cr3t@private-maven-repo.example.com/repository/maven-releases/?token=abc): " + + "status code: 401, reason phrase: Unauthorized"; + + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(false, authErrorMessage)); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + 3.12.0 + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should fall back to static parsing and detect the component + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + var mavenComponent = detectedComponents.First().Component as MavenComponent; + mavenComponent.Should().NotBeNull(); + mavenComponent.ArtifactId.Should().Be("commons-lang3"); + + // Verify telemetry contains auth failure info + detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FallbackReason"); + detectorResult.AdditionalTelemetryDetails["FallbackReason"].Should().Be("AuthenticationFailure"); + + // Verify telemetry contains the failed endpoint normalized to scheme+host only. + // Credentials (userinfo), path, and query string must NOT appear in telemetry. + detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FailedEndpoints"); + var failedEndpoints = detectorResult.AdditionalTelemetryDetails["FailedEndpoints"]; + failedEndpoints.Should().Be( + "https://private-maven-repo.example.com", + "credentials, path, and query string must be stripped before reaching telemetry"); + failedEndpoints.Should().NotContain("user", "userinfo must be stripped"); + failedEndpoints.Should().NotContain("s3cr3t", "credentials must be stripped"); + failedEndpoints.Should().NotContain("token", "query string must be stripped"); + failedEndpoints.Should().NotContain("/repository", "path must be stripped"); } - private void MvnCliHappyPath(string content) + [TestMethod] + public async Task WhenMvnCliFailsWithNonAuthError_SetsFallbackReasonToOther_Async() { - const string bcdeMvnFileName = "bcde.mvndeps"; + // Arrange + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); - this.mavenCommandServiceMock.Setup(x => x.BcdeMvnDependencyFileName) - .Returns(bcdeMvnFileName); + // Simulate Maven CLI failure with a non-auth error (e.g., build error) + var nonAuthErrorMessage = "[ERROR] Failed to execute goal on project my-app: Compilation failure: " + + "src/main/java/com/test/App.java:[10,5] cannot find symbol"; + + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(false, nonAuthErrorMessage)); + + var pomXmlContent = @" + + 4.0.0 + com.test + my-app + 1.0.0 + + + org.apache.commons + commons-lang3 + 3.12.0 + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomXmlContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should fall back to static parsing + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().ContainSingle(); + + // Verify telemetry shows non-auth failure + detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FallbackReason"); + detectorResult.AdditionalTelemetryDetails["FallbackReason"].Should().Be("OtherMvnCliFailure"); + + // Should NOT have FailedEndpoints since this wasn't an auth error + detectorResult.AdditionalTelemetryDetails.Should().NotContainKey("FailedEndpoints"); + } + + [TestMethod] + public async Task WhenAuthenticationFailsAndParentChildPropertiesUsed_MaintainsCorrectOrderingDuringFallback_Async() + { + // Arrange + const string parentPomContent = @" + + 4.0.0 + com.yammer.veritas + veritas-parent + 1.0-SNAPSHOT + pom + + + 3.18.0 + 4.11.0 + 2.21.1 + + + + veritas-api + + + + + yammer-artifacts + yammer-artifacts + https://pkgs.dev.azure.com/yammer/_packaging/yammer-artifacts/maven/v1 + + +"; + + const string childPomContent = @" + + + veritas-parent + com.yammer.veritas + 1.0-SNAPSHOT + + 4.0.0 + veritas-api + + + + org.apache.commons + commons-lang3 + ${commons-lang3.version} + + + org.mockito + mockito-core + ${mockito.version} + test + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + + +"; + + // Setup Maven CLI to fail with authentication error (401 Unauthorized) + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(true); + + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(false, "status code: 401, reason phrase: Unauthorized")); + + // Act - Test with parent and child POM structure + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", parentPomContent) + .WithFile("veritas-api/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should fall back to static parsing after authentication failure + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should detect all 3 property-based dependencies from child POM + detectedComponents.Should().HaveCount(3); + + var mavenComponents = detectedComponents.Where(x => x.Component is MavenComponent).ToList(); + mavenComponents.Should().HaveCount(3); + + // Verify each property-based dependency was resolved with correct version from parent + var commonsLang3 = mavenComponents.FirstOrDefault(x => + ((MavenComponent)x.Component).ArtifactId == "commons-lang3"); + commonsLang3.Should().NotBeNull(); + ((MavenComponent)commonsLang3.Component).Version.Should().Be("3.18.0"); + + var mockitoCore = mavenComponents.FirstOrDefault(x => + ((MavenComponent)x.Component).ArtifactId == "mockito-core"); + mockitoCore.Should().NotBeNull(); + ((MavenComponent)mockitoCore.Component).Version.Should().Be("4.11.0"); + + var jacksonCore = mavenComponents.FirstOrDefault(x => + ((MavenComponent)x.Component).ArtifactId == "jackson-core"); + jacksonCore.Should().NotBeNull(); + ((MavenComponent)jacksonCore.Component).Version.Should().Be("2.21.1"); + + // Verify telemetry shows authentication failure + detectorResult.AdditionalTelemetryDetails.Should().ContainKey("FallbackReason"); + detectorResult.AdditionalTelemetryDetails["FallbackReason"].Should().Be("AuthenticationFailure"); + + // Should have method showing mixed detection was used (CLI failed but fallback succeeded) + detectorResult.AdditionalTelemetryDetails.Should().ContainKey("DetectionMethod"); + detectorResult.AdditionalTelemetryDetails["DetectionMethod"].Should().Be("Mixed"); + } + + [TestMethod] + public async Task VariableResolution_SiblingPomVariablesShouldNotBeUsed_Async() + { + // Arrange - Maven-compliant behavior: sibling POM variables should NOT be resolved + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Parent POM with a property + var parentPomContent = @" + + 4.0.0 + com.test + parent + 1.0.0 + pom + + 3.12.0 + +"; + + // Sibling POM with different variable - should NOT be used for resolution + var siblingPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + sibling + + 31.1-jre + +"; + + // Target POM trying to use sibling's variable - should fail to resolve + var targetPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + target + + + com.google.guava + guava + ${guava.version} + + + org.apache.commons + commons-lang3 + ${commons.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("parent/pom.xml", parentPomContent) + .WithFile("sibling/pom.xml", siblingPomContent) + .WithFile("target/pom.xml", targetPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + + // Should only resolve commons-lang3 (from parent), not guava (sibling variable) + detectedComponents.Should().HaveCount(1); + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("org.apache.commons"); + component.ArtifactId.Should().Be("commons-lang3"); + component.Version.Should().Be("3.12.0"); // Resolved from parent + } + + [TestMethod] + public async Task VariableResolution_ParentHierarchyVariablesShouldBeUsed_Async() + { + // Arrange - Maven-compliant behavior: parent/grandparent variables should be resolved + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Setup fileUtilityService to allow parent POM resolution + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns((string path) => path.EndsWith("pom.xml")); + + // Grandparent POM + var grandparentPomContent = @" + + 4.0.0 + com.test + grandparent + 1.0.0 + pom + + 4.13.2 + +"; + + // Parent POM + var parentPomContent = @" + + 4.0.0 + + com.test + grandparent + 1.0.0 + + com.test + parent + + 3.12.0 + +"; + + // Child POM using variables from parent hierarchy + var childPomContent = @" + + 4.0.0 + + com.test + parent + 1.0.0 + + com.test + child + + 31.1-jre + + + + org.apache.commons + commons-lang3 + ${commons.version} + + + junit + junit + ${junit.version} + + + com.google.guava + guava + ${guava.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("grandparent/pom.xml", grandparentPomContent) + .WithFile("parent/pom.xml", parentPomContent) + .WithFile("child/pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + + var components = detectedComponents.Select(x => x.Component as MavenComponent).ToList(); + + // Should resolve commons-lang3 from parent + var commonsComponent = components.FirstOrDefault(c => c.ArtifactId == "commons-lang3"); + commonsComponent.Should().NotBeNull(); + commonsComponent.Version.Should().Be("3.12.0"); + + // Should resolve junit from grandparent + var junitComponent = components.FirstOrDefault(c => c.ArtifactId == "junit"); + junitComponent.Should().NotBeNull(); + junitComponent.Version.Should().Be("4.13.2"); + + // Should resolve guava from current POM + var guavaComponent = components.FirstOrDefault(c => c.ArtifactId == "guava"); + guavaComponent.Should().NotBeNull(); + guavaComponent.Version.Should().Be("31.1-jre"); + } + + [TestMethod] + public async Task VariableResolution_MavenBuiltInVariablesShouldWork_Async() + { + // Arrange - Test Maven built-in variables like ${project.version} + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + this.fileUtilityServiceMock.Setup(x => x.Exists(It.IsAny())) + .Returns(false); + + var pomContent = @" + + 4.0.0 + com.test + maven-builtin-test + 1.5.0 + + + com.test + internal-dependency + ${project.version} + + +"; + + // Act + var (detectorResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", pomContent) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("com.test"); + component.ArtifactId.Should().Be("internal-dependency"); + component.Version.Should().Be("1.5.0"); // Should resolve ${project.version} + } + + [TestMethod] + public async Task WhenCleanupCreatedFilesIsTrue_DeletesDepsFileAfterProcessing_Async() + { + // Arrange + const string componentString = "org.apache.commons:commons-lang3:jar:3.12.0"; + this.SetupMvnCliSuccess(componentString); + + var deletedFiles = new System.Collections.Generic.List(); + this.fileUtilityServiceMock + .Setup(x => x.Delete(It.IsAny())) + .Callback(path => deletedFiles.Add(path)); + + // Act: default ScanRequest has cleanupCreatedFiles=true + var (detectorResult, _) = await this.DetectorTestUtility.ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + deletedFiles.Should().ContainSingle( + f => f.Contains(BcdeMvnFileName), + "the deps file should be deleted after its content is consumed"); + } + + [TestMethod] + public async Task WhenCleanupCreatedFilesIsFalse_DoesNotDeleteDepsFile_Async() + { + // Arrange + const string componentString = "org.apache.commons:commons-lang3:jar:3.12.0"; + this.SetupMvnCliSuccess(componentString); + + var scanRequest = new ScanRequest( + new System.IO.DirectoryInfo(System.IO.Path.GetTempPath()), + null, + null, + new System.Collections.Generic.Dictionary(), + null, + new Microsoft.ComponentDetection.Common.DependencyGraph.ComponentRecorder(), + cleanupCreatedFiles: false); + + // Act + var (detectorResult, _) = await this.DetectorTestUtility + .WithScanRequest(scanRequest) + .ExecuteDetectorAsync(); + + // Assert + detectorResult.ResultCode.Should().Be(ProcessingResultCode.Success); + this.fileUtilityServiceMock.Verify( + x => x.Delete(It.IsAny()), + Times.Never, + "the deps file should not be deleted when CleanupCreatedFiles is false"); + } + + [TestMethod] + public async Task TestSmartLoopPreventionInDirectoryTraversal() + { + // Arrange - Setup Maven CLI to fail so we use static parser + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + // Create a child POM that references a parent that won't be found in directory traversal + var childPomContent = """ + + 4.0.0 + + com.example + parent-project + 1.0.0 + + com.example + child-project + ${parent.version} + + + junit + junit + 4.13.2 + + + + """; + + // Act & Assert - This should not hang or throw due to infinite directory traversal + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("pom.xml", childPomContent) + .ExecuteDetectorAsync(); + + // Should complete successfully without infinite loops + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should register the dependency with direct version (junit) + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(1); + + var component = detectedComponents.First().Component as MavenComponent; + component.Should().NotBeNull(); + component.GroupId.Should().Be("junit"); + component.ArtifactId.Should().Be("junit"); + component.Version.Should().Be("4.13.2"); + } + + [TestMethod] + public async Task TestPerformanceOfSmartLoopPrevention() + { + // Arrange - Setup static parsing mode + this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) + .ReturnsAsync(false); + + var pom1Content = CreatePomWithParentReference("project1", "parent-project", "1.0.0"); + var pom2Content = CreatePomWithParentReference("project2", "parent-project", "1.0.0"); + var pom3Content = CreatePomWithParentReference("project3", "parent-project", "1.0.0"); + + // Act - Process multiple POMs (test validates completion, not timing) + var (scanResult, componentRecorder) = await this.DetectorTestUtility + .WithFile("project1/pom.xml", pom1Content) + .WithFile("project2/pom.xml", pom2Content) + .WithFile("project3/pom.xml", pom3Content) + .ExecuteDetectorAsync(); + + // Assert - Should complete without hanging and produce correct results + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Should have detected direct dependencies from all 3 POMs + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(3); + } + + private static string CreatePomWithParentReference(string artifactId, string parentArtifactId, string parentVersion) + { + return $""" + + 4.0.0 + + com.example + {parentArtifactId} + {parentVersion} + + {artifactId} + + + com.example + {artifactId}-dependency + 2.0.0 + + + + """; + } + + private void SetupMvnCliSuccess(string depsFileContent) + { this.mavenCommandServiceMock.Setup(x => x.MavenCLIExistsAsync()) .ReturnsAsync(true); - this.DetectorTestUtility.WithFile("pom.xml", content) - .WithFile("pom.xml", content, searchPatterns: [bcdeMvnFileName]); + + this.mavenCommandServiceMock.Setup(x => x.BcdeMvnDependencyFileName) + .Returns(BcdeMvnFileName); + + this.mavenCommandServiceMock.Setup(x => x.GenerateDependenciesFileAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync(new MavenCliResult(true, null)); + + // Setup file utility service to return the deps file content + this.fileUtilityServiceMock.Setup(x => x.Exists(It.Is(s => s.EndsWith(BcdeMvnFileName)))) + .Returns(true); + this.fileUtilityServiceMock.Setup(x => x.ReadAllText(It.Is(s => s.EndsWith(BcdeMvnFileName)))) + .Returns(depsFileContent); + + const string validPomXml = @" + + 4.0.0 + com.test + test-app + 1.0.0 +"; + + this.DetectorTestUtility.WithFile("pom.xml", validPomXml); + + // Add the dependency file that Maven CLI would have generated + this.DetectorTestUtility.WithFile(BcdeMvnFileName, depsFileContent, [BcdeMvnFileName]); } } diff --git a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs index 2eec47db6..4ded253ea 100644 --- a/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs +++ b/test/Microsoft.ComponentDetection.Orchestrator.Tests/Services/DetectorProcessingServiceTests.cs @@ -42,7 +42,6 @@ public class DetectorProcessingServiceTests private readonly Mock> loggerMock; private readonly DetectorProcessingService serviceUnderTest; private readonly Mock directoryWalkerFactory; - private readonly Mock pathUtilityServiceMock; private readonly Mock experimentServiceMock; private readonly Mock consoleMock; @@ -61,15 +60,10 @@ public DetectorProcessingServiceTests() this.experimentServiceMock = new Mock(); this.loggerMock = new Mock>(); this.directoryWalkerFactory = new Mock(); - this.pathUtilityServiceMock = new Mock(); this.consoleMock = new Mock(); - // Setup path utility to return the same path by default (no symlinks) - this.pathUtilityServiceMock.Setup(x => x.ResolvePhysicalPath(It.IsAny())) - .Returns(path => path); - this.serviceUnderTest = - new DetectorProcessingService(this.directoryWalkerFactory.Object, this.pathUtilityServiceMock.Object, this.experimentServiceMock.Object, this.loggerMock.Object, this.consoleMock.Object); + new DetectorProcessingService(this.directoryWalkerFactory.Object, this.experimentServiceMock.Object, this.loggerMock.Object, this.consoleMock.Object); this.firstFileComponentDetectorMock = this.SetupFileDetectorMock("firstFileDetectorId"); this.secondFileComponentDetectorMock = this.SetupFileDetectorMock("secondFileDetectorId"); @@ -665,195 +659,4 @@ private Mock SetupCommandDetectorMock(string id) return mockCommandDetector; } - - [TestMethod] - public async Task ProcessDetectorsAsync_WithMavenDetectors_ShouldCleanupMavenFilesAfterAllDetectorsFinish() - { - // Arrange - Create a temporary directory with mock Maven dependency files - var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); - Directory.CreateDirectory(tempDir); - - // Use bcde.mvndeps - the specific filename generated by MavenCommandService - var testFile1 = Path.Combine(tempDir, "bcde.mvndeps"); - var testFile2 = Path.Combine(tempDir, "subdir", "bcde.mvndeps"); - var nonMavenFile = Path.Combine(tempDir, "regular.txt"); - - Directory.CreateDirectory(Path.GetDirectoryName(testFile2)); - await File.WriteAllTextAsync(testFile1, "maven dependency content 1"); - await File.WriteAllTextAsync(testFile2, "maven dependency content 2"); - await File.WriteAllTextAsync(nonMavenFile, "regular file content"); - - try - { - var tempDirInfo = new DirectoryInfo(tempDir); - var scanSettings = new ScanSettings - { - SourceDirectory = tempDirInfo, - DetectorArgs = new Dictionary(), - CleanupCreatedFiles = true, - }; - - // Create mock Maven detectors - var mvnCliDetectorMock = new Mock(); - mvnCliDetectorMock.SetupAllProperties(); - mvnCliDetectorMock.SetupGet(x => x.Id).Returns("MvnCli"); - mvnCliDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); - mvnCliDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); - - var mavenWithFallbackDetectorMock = new Mock(); - mavenWithFallbackDetectorMock.SetupAllProperties(); - mavenWithFallbackDetectorMock.SetupGet(x => x.Id).Returns("MavenWithFallback"); - mavenWithFallbackDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); - mavenWithFallbackDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); - - var detectors = new List { mvnCliDetectorMock.Object, mavenWithFallbackDetectorMock.Object, }; - - // Verify files exist before running detectors - File.Exists(testFile1).Should().BeTrue("Maven dependency file 1 should exist before cleanup"); - File.Exists(testFile2).Should().BeTrue("Maven dependency file 2 should exist before cleanup"); - File.Exists(nonMavenFile).Should().BeTrue("Non-Maven file should exist before cleanup"); - - // Act - var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); - - // Assert - result.Should().NotBeNull(); - result.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Maven dependency files should be cleaned up - File.Exists(testFile1).Should().BeFalse("Maven dependency file 1 should be cleaned up"); - File.Exists(testFile2).Should().BeFalse("Maven dependency file 2 should be cleaned up"); - - // Non-Maven files should remain - File.Exists(nonMavenFile).Should().BeTrue("Non-Maven files should not be cleaned up"); - - // Verify both detectors were executed - mvnCliDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); - mavenWithFallbackDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); - } - finally - { - // Cleanup test directory - if (Directory.Exists(tempDir)) - { - Directory.Delete(tempDir, true); - } - } - } - - [TestMethod] - public async Task ProcessDetectorsAsync_WithoutMavenDetectors_ShouldNotCleanupMavenFiles() - { - // Arrange - Create a temporary directory with mock Maven dependency files - var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); - Directory.CreateDirectory(tempDir); - - // Use bcde.mvndeps - the specific filename generated by MavenCommandService - var testFile = Path.Combine(tempDir, "bcde.mvndeps"); - await File.WriteAllTextAsync(testFile, "maven dependency content"); - - try - { - var tempDirInfo = new DirectoryInfo(tempDir); - var scanSettings = new ScanSettings - { - SourceDirectory = tempDirInfo, - DetectorArgs = new Dictionary(), - CleanupCreatedFiles = true, - }; - - // Create mock non-Maven detector - var npmDetectorMock = new Mock(); - npmDetectorMock.SetupAllProperties(); - npmDetectorMock.SetupGet(x => x.Id).Returns("Npm"); - npmDetectorMock.SetupGet(x => x.Categories).Returns(["Npm"]); - npmDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); - - var detectors = new List { npmDetectorMock.Object, }; - - // Verify file exists before running detectors - File.Exists(testFile).Should().BeTrue("Maven dependency file should exist before processing"); - - // Act - var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); - - // Assert - result.Should().NotBeNull(); - result.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Maven dependency file should NOT be cleaned up when no Maven detectors are present - File.Exists(testFile).Should().BeTrue("Maven dependency file should remain when no Maven detectors are running"); - - // Verify detector was executed - npmDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); - } - finally - { - // Cleanup test directory - if (Directory.Exists(tempDir)) - { - Directory.Delete(tempDir, true); - } - } - } - - [TestMethod] - public async Task ProcessDetectorsAsync_OnlyOneMavenDetector_ShouldStillCleanupMavenFiles() - { - // Arrange - Create a temporary directory with mock Maven dependency files - var tempDir = Path.Combine(Path.GetTempPath(), $"maven_cleanup_test_{Guid.NewGuid()}"); - Directory.CreateDirectory(tempDir); - - // Use bcde.mvndeps - the specific filename generated by MavenCommandService - var testFile = Path.Combine(tempDir, "bcde.mvndeps"); - await File.WriteAllTextAsync(testFile, "maven dependency content"); - - try - { - var tempDirInfo = new DirectoryInfo(tempDir); - var scanSettings = new ScanSettings - { - SourceDirectory = tempDirInfo, - DetectorArgs = new Dictionary(), - CleanupCreatedFiles = true, - }; - - // Create mock Maven detector (only one) - var mvnCliDetectorMock = new Mock(); - mvnCliDetectorMock.SetupAllProperties(); - mvnCliDetectorMock.SetupGet(x => x.Id).Returns("MvnCli"); - mvnCliDetectorMock.SetupGet(x => x.Categories).Returns(["Maven"]); - mvnCliDetectorMock.Setup(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny())) - .ReturnsAsync(new IndividualDetectorScanResult { ResultCode = ProcessingResultCode.Success }); - - var detectors = new List { mvnCliDetectorMock.Object, }; - - // Verify file exists before running detectors - File.Exists(testFile).Should().BeTrue("Maven dependency file should exist before cleanup"); - - // Act - var result = await this.serviceUnderTest.ProcessDetectorsAsync(scanSettings, detectors, new DetectorRestrictions()); - - // Assert - result.Should().NotBeNull(); - result.ResultCode.Should().Be(ProcessingResultCode.Success); - - // Maven dependency file should be cleaned up even with only one Maven detector - File.Exists(testFile).Should().BeFalse("Maven dependency file should be cleaned up"); - - // Verify detector was executed - mvnCliDetectorMock.Verify(x => x.ExecuteDetectorAsync(It.IsAny(), It.IsAny()), Times.Once); - } - finally - { - // Cleanup test directory - if (Directory.Exists(tempDir)) - { - Directory.Delete(tempDir, true); - } - } - } } diff --git a/test/Microsoft.ComponentDetection.VerificationTests/ComponentDetectionIntegrationTests.cs b/test/Microsoft.ComponentDetection.VerificationTests/ComponentDetectionIntegrationTests.cs index a3e3b1fe3..abea9e4e6 100644 --- a/test/Microsoft.ComponentDetection.VerificationTests/ComponentDetectionIntegrationTests.cs +++ b/test/Microsoft.ComponentDetection.VerificationTests/ComponentDetectionIntegrationTests.cs @@ -15,6 +15,9 @@ namespace Microsoft.ComponentDetection.VerificationTests; [TestClass] public class ComponentDetectionIntegrationTests { + // Detectors intentionally removed (e.g., promoted/merged into another detector). MavenWithFallback was promoted into MvnCli. + private static readonly HashSet IntentionallyRemovedDetectors = ["MavenWithFallback"]; + private string oldLogFileContents; private string newLogFileContents; private DefaultGraphScanResult oldScanResult; @@ -183,7 +186,12 @@ public void CheckDetectorsRunTimesAndCounts() var oldMatches = Regex.Matches(this.oldLogFileContents, regexPattern); var newMatches = Regex.Matches(this.newLogFileContents, regexPattern); - newMatches.Should().HaveCountGreaterThanOrEqualTo(oldMatches.Count, "A detector was lost, make sure this was intentional."); + var removedDetectorsPresentInOldLog = oldMatches.Cast() + .Where(m => m.Groups[2].Success && IntentionallyRemovedDetectors.Contains(m.Groups[2].Value)) + .Select(m => m.Groups[2].Value) + .Distinct() + .Count(); + newMatches.Should().HaveCountGreaterThanOrEqualTo(oldMatches.Count - removedDetectorsPresentInOldLog, "A detector was lost, make sure this was intentional."); var detectorTimes = new Dictionary(); var detectorCounts = new Dictionary(); @@ -236,6 +244,11 @@ private void ProcessDetectorVersions() this.bumpedDetectorVersions = []; foreach (var cd in oldDetectors) { + if (IntentionallyRemovedDetectors.Contains(cd.DetectorId)) + { + continue; + } + var newDetector = newDetectors.FirstOrDefault(det => det.DetectorId == cd.DetectorId); if (newDetector == null) From 97f3d73ad73374b1b2e7c73e65e06ac672700ce6 Mon Sep 17 00:00:00 2001 From: Jason Paulos Date: Fri, 3 Apr 2026 14:34:17 -0400 Subject: [PATCH 2/2] Reapply "Add OCI image support to Linux scanner (#1708)" (#1716) (#1717) * Reapply "Add OCI image support to Linux scanner (#1708)" (#1716) This reverts commit db58407d4d3900af74be74ea95fbf4757d660584. * Reapply "Add Docker archive support to Linux scanner (#1711)" (#1715) (#1718) This reverts commit 69a20576841aab5dbdd235c9066daeb7b5a7695e. * Handle parse failures gracefully * Fix merge error --- docs/detectors/linux.md | 26 + .../DockerService.cs | 31 +- .../IDockerService.cs | 19 +- .../linux/Contracts/SourceClassExtensions.cs | 26 + .../linux/Contracts/SyftSourceLayer.cs | 18 + .../linux/Contracts/SyftSourceMetadata.cs | 46 + .../linux/ILinuxScanner.cs | 31 + .../linux/ImageReference.cs | 120 +++ .../linux/LinuxContainerDetector.cs | 438 +++++++-- .../linux/LinuxScanner.cs | 260 +++-- .../ImageReferenceTests.cs | 138 +++ .../LinuxContainerDetectorTests.cs | 916 ++++++++++++++++++ .../LinuxScannerTests.cs | 235 +++++ 13 files changed, 2120 insertions(+), 184 deletions(-) create mode 100644 src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SourceClassExtensions.cs create mode 100644 src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceLayer.cs create mode 100644 src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceMetadata.cs create mode 100644 src/Microsoft.ComponentDetection.Detectors/linux/ImageReference.cs create mode 100644 test/Microsoft.ComponentDetection.Detectors.Tests/ImageReferenceTests.cs diff --git a/docs/detectors/linux.md b/docs/detectors/linux.md index 87789f8a9..b4e1c613f 100644 --- a/docs/detectors/linux.md +++ b/docs/detectors/linux.md @@ -11,6 +11,31 @@ Linux detection depends on the following: Linux package detection is performed by running [Syft](https://github.com/anchore/syft) and parsing the output. The output contains the package name, version, and the layer of the container in which it was found. +### Supported Input Types + +The Linux detector runs on container images passed under the `--DockerImagesToScan` flag. + +Supported image reference formats are: + +#### Name and Tag/Digest + +Images in the local Docker daemon or a remote registry can be referenced by name and tag or digest. For example, `ubuntu:16.04`. Remote images will be pulled if they are not present locally. + +#### Digest Only + +Images already present in the local Docker daemon can be referenced by just a digest. For example, `sha256:56bab49eef2ef07505f6a1b0d5bd3a601dfc3c76ad4460f24c91d6fa298369ab`. + +#### OCI Images + +Images present on the filesystem as either an [OCI layout directory](https://specs.opencontainers.org/image-spec/image-layout/) or an OCI image archive (tarball) can be referenced by file path. + +- For OCI image layout directories, use the prefix `oci-dir:` followed by the path to the directory, e.g. `oci-dir:/path/to/image` +- For OCI image archives (tarballs), use the prefix `oci-archive:` followed by the path to the archive file, e.g. `oci-archive:/path/to/image.tar` + +#### Docker Archives + +Images saved to disk via `docker save` can be referenced using the `docker-archive:` prefix followed by the path to the tarball, e.g. `docker-archive:/path/to/image.tar`. + ### Scanner Scope By default, this detector invokes Syft with the `all-layers` scanning scope (i.e. the Syft argument `--scope all-layers`). @@ -28,3 +53,4 @@ For example: ## Known limitations - Windows container scanning is not supported +- Multiplatform images are not supported diff --git a/src/Microsoft.ComponentDetection.Common/DockerService.cs b/src/Microsoft.ComponentDetection.Common/DockerService.cs index f789d979b..d55540bd8 100644 --- a/src/Microsoft.ComponentDetection.Common/DockerService.cs +++ b/src/Microsoft.ComponentDetection.Common/DockerService.cs @@ -183,6 +183,11 @@ public async Task InspectImageAsync(string image, Cancellation } public async Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList command, CancellationToken cancellationToken = default) + { + return await this.CreateAndRunContainerAsync(image, command, additionalBinds: null, cancellationToken); + } + + public async Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList command, IList additionalBinds, CancellationToken cancellationToken = default) { var commandJson = JsonSerializer.Serialize(command); @@ -194,7 +199,7 @@ public async Task InspectImageAsync(string image, Cancellation }; await this.TryPullImageAsync(image, cancellationToken); - var container = await CreateContainerAsync(image, command, cancellationToken); + var container = await CreateContainerAsync(image, command, additionalBinds, cancellationToken); record.Container = JsonSerializer.Serialize(container); try @@ -272,6 +277,7 @@ public async Task InspectImageAsync(string image, Cancellation private static async Task CreateContainerAsync( string image, IList command, + IList additionalBinds, CancellationToken cancellationToken = default) { using var record = new DockerServiceStepTelemetryRecord @@ -283,6 +289,17 @@ private static async Task CreateContainerAsync( try { + var binds = new List + { + $"{Path.GetTempPath()}:/tmp", + "/var/run/docker.sock:/var/run/docker.sock", + }; + + if (additionalBinds != null) + { + binds.AddRange(additionalBinds); + } + var parameters = new CreateContainerParameters { Image = image, @@ -298,11 +315,7 @@ private static async Task CreateContainerAsync( [ "no-new-privileges", ], - Binds = - [ - $"{Path.GetTempPath()}:/tmp", - "/var/run/docker.sock:/var/run/docker.sock", - ], + Binds = binds, }, }; @@ -394,4 +407,10 @@ private static int GetContainerId() { return Interlocked.Increment(ref incrementingContainerId); } + + /// + public ContainerDetails GetEmptyContainerDetails() + { + return new ContainerDetails { Id = GetContainerId() }; + } } diff --git a/src/Microsoft.ComponentDetection.Contracts/IDockerService.cs b/src/Microsoft.ComponentDetection.Contracts/IDockerService.cs index 4f9a35313..462aff989 100644 --- a/src/Microsoft.ComponentDetection.Contracts/IDockerService.cs +++ b/src/Microsoft.ComponentDetection.Contracts/IDockerService.cs @@ -52,9 +52,26 @@ public interface IDockerService /// /// Creates and runs a container with the given image and command. /// - /// The image to inspect. + /// The image to run. /// The command to run in the container. /// The cancellation token. /// A tuple of stdout and stderr from the container. Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList command, CancellationToken cancellationToken = default); + + /// + /// Creates and runs a container with the given image, command, and additional volume binds. + /// + /// The image to run. + /// The command to run in the container. + /// Additional volume bind mounts to add to the container (e.g., "/host/path:/container/path:ro"). + /// The cancellation token. + /// A tuple of stdout and stderr from the container. + Task<(string Stdout, string Stderr)> CreateAndRunContainerAsync(string image, IList command, IList additionalBinds, CancellationToken cancellationToken = default); + + /// + /// Creates an empty with a unique ID assigned. + /// Used for image types where details are not obtained from Docker inspect (e.g., OCI layout images). + /// + /// A with only the populated. + ContainerDetails GetEmptyContainerDetails(); } diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SourceClassExtensions.cs b/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SourceClassExtensions.cs new file mode 100644 index 000000000..366aef450 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SourceClassExtensions.cs @@ -0,0 +1,26 @@ +namespace Microsoft.ComponentDetection.Detectors.Linux.Contracts; + +using System.Text.Json; + +/// +/// Extends the auto-generated with a method to +/// deserialize its untyped into a +/// strongly-typed . +/// +public partial class SourceClass +{ + /// + /// Deserializes the property into a . + /// Returns null if is null or not a . + /// + /// A deserialized instance, or null. + internal SyftSourceMetadata? GetSyftSourceMetadata() + { + if (this.Metadata is JsonElement element) + { + return JsonSerializer.Deserialize(element.GetRawText()); + } + + return null; + } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceLayer.cs b/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceLayer.cs new file mode 100644 index 000000000..2f575bbab --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceLayer.cs @@ -0,0 +1,18 @@ +namespace Microsoft.ComponentDetection.Detectors.Linux.Contracts; + +using System.Text.Json.Serialization; + +/// +/// Represents a single layer in the image source metadata from Syft output. +/// +internal class SyftSourceLayer +{ + [JsonPropertyName("mediaType")] + public string? MediaType { get; set; } + + [JsonPropertyName("digest")] + public string? Digest { get; set; } + + [JsonPropertyName("size")] + public long? Size { get; set; } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceMetadata.cs b/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceMetadata.cs new file mode 100644 index 000000000..069c2adee --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/linux/Contracts/SyftSourceMetadata.cs @@ -0,0 +1,46 @@ +namespace Microsoft.ComponentDetection.Detectors.Linux.Contracts; + +using System.Collections.Generic; +using System.Text.Json.Serialization; + +/// +/// Represents the metadata from a Syft scan source of type "image". +/// Contains image details such as layers, labels, tags, and image ID. +/// Deserialized from the source.metadata field in Syft JSON output, +/// which is typed as object in the auto-generated . +/// +internal class SyftSourceMetadata +{ + [JsonPropertyName("userInput")] + public string? UserInput { get; set; } + + [JsonPropertyName("imageID")] + public string? ImageId { get; set; } + + [JsonPropertyName("manifestDigest")] + public string? ManifestDigest { get; set; } + + [JsonPropertyName("mediaType")] + public string? MediaType { get; set; } + + [JsonPropertyName("tags")] + public string[]? Tags { get; set; } + + [JsonPropertyName("imageSize")] + public long? ImageSize { get; set; } + + [JsonPropertyName("layers")] + public SyftSourceLayer[]? Layers { get; set; } + + [JsonPropertyName("repoDigests")] + public string[]? RepoDigests { get; set; } + + [JsonPropertyName("architecture")] + public string? Architecture { get; set; } + + [JsonPropertyName("os")] + public string? Os { get; set; } + + [JsonPropertyName("labels")] + public Dictionary? Labels { get; set; } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/ILinuxScanner.cs b/src/Microsoft.ComponentDetection.Detectors/linux/ILinuxScanner.cs index aa3cf6b3e..3064a6013 100644 --- a/src/Microsoft.ComponentDetection.Detectors/linux/ILinuxScanner.cs +++ b/src/Microsoft.ComponentDetection.Detectors/linux/ILinuxScanner.cs @@ -5,6 +5,7 @@ namespace Microsoft.ComponentDetection.Detectors.Linux; using System.Threading.Tasks; using Microsoft.ComponentDetection.Contracts.BcdeModels; using Microsoft.ComponentDetection.Contracts.TypedComponent; +using Microsoft.ComponentDetection.Detectors.Linux.Contracts; /// /// Interface for scanning Linux container layers to identify components. @@ -13,6 +14,7 @@ public interface ILinuxScanner { /// /// Scans a Linux container image for components and maps them to their respective layers. + /// Runs Syft and processes the output in a single step. /// /// The hash identifier of the container image to scan. /// The collection of Docker layers that make up the container image. @@ -29,4 +31,33 @@ public Task> ScanLinuxAsync( LinuxScannerScope scope, CancellationToken cancellationToken = default ); + + /// + /// Runs the Syft scanner and returns the raw parsed output without processing components. + /// Use this when the caller needs access to the full Syft output (e.g., to extract source metadata for OCI images). + /// + /// The source argument passed to Syft (e.g., an image hash or "oci-dir:/oci-image"). + /// Additional volume bind mounts for the Syft container (e.g., for mounting OCI directories). + /// The scope for scanning the image. + /// A token to monitor for cancellation requests. + /// A task that represents the asynchronous operation. The task result contains the parsed . + public Task GetSyftOutputAsync( + string syftSource, + IList additionalBinds, + LinuxScannerScope scope, + CancellationToken cancellationToken = default + ); + + /// + /// Processes parsed Syft output into layer-mapped components. + /// + /// The parsed Syft output. + /// The layers to map components to. + /// The set of component types to include in the results. + /// A collection of representing the components found and their associated layers. + public IEnumerable ProcessSyftOutput( + SyftOutput syftOutput, + IEnumerable containerLayers, + ISet enabledComponentTypes + ); } diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/ImageReference.cs b/src/Microsoft.ComponentDetection.Detectors/linux/ImageReference.cs new file mode 100644 index 000000000..fcb8c1c34 --- /dev/null +++ b/src/Microsoft.ComponentDetection.Detectors/linux/ImageReference.cs @@ -0,0 +1,120 @@ +namespace Microsoft.ComponentDetection.Detectors.Linux; + +using System; + +/// +/// Specifies the type of image reference. +/// +internal enum ImageReferenceKind +{ + /// + /// A Docker image reference (e.g., "node:latest", "sha256:abc123"). + /// + DockerImage, + + /// + /// An OCI Image Layout directory on disk (e.g., "oci-dir:/path/to/image"). + /// + OciLayout, + + /// + /// An OCI archive (tarball) file on disk (e.g., "oci-archive:/path/to/image.tar"). + /// + OciArchive, + + /// + /// A Docker archive (tarball) file on disk created by "docker save" (e.g., "docker-archive:/path/to/image.tar"). + /// + DockerArchive, +} + +/// +/// Represents a parsed image reference from the scan input, with its type and cleaned reference string. +/// +internal class ImageReference +{ + private const string OciDirPrefix = "oci-dir:"; + private const string OciArchivePrefix = "oci-archive:"; + private const string DockerArchivePrefix = "docker-archive:"; + + /// + /// Gets the original input string as provided by the user. + /// + public required string OriginalInput { get; init; } + + /// + /// Gets the cleaned reference string with any scheme prefix removed. + /// For Docker images, this is lowercased. For file paths, case is preserved. + /// + public required string Reference { get; init; } + + /// + /// Gets the kind of image reference. + /// + public required ImageReferenceKind Kind { get; init; } + + /// + /// Parses an input image string into an . + /// + /// The raw image input string. + /// A parsed . + public static ImageReference Parse(string input) + { + if (input.StartsWith(OciDirPrefix, StringComparison.OrdinalIgnoreCase)) + { + var path = input[OciDirPrefix.Length..]; + if (string.IsNullOrWhiteSpace(path)) + { + throw new ArgumentException($"Input with '{OciDirPrefix}' prefix must include a path.", nameof(input)); + } + + return new ImageReference + { + OriginalInput = input, + Reference = path, + Kind = ImageReferenceKind.OciLayout, + }; + } + + if (input.StartsWith(OciArchivePrefix, StringComparison.OrdinalIgnoreCase)) + { + var path = input[OciArchivePrefix.Length..]; + if (string.IsNullOrWhiteSpace(path)) + { + throw new ArgumentException($"Input with '{OciArchivePrefix}' prefix must include a path.", nameof(input)); + } + + return new ImageReference + { + OriginalInput = input, + Reference = path, + Kind = ImageReferenceKind.OciArchive, + }; + } + + if (input.StartsWith(DockerArchivePrefix, StringComparison.OrdinalIgnoreCase)) + { + var path = input[DockerArchivePrefix.Length..]; + if (string.IsNullOrWhiteSpace(path)) + { + throw new ArgumentException($"Input with '{DockerArchivePrefix}' prefix must include a path.", nameof(input)); + } + + return new ImageReference + { + OriginalInput = input, + Reference = path, + Kind = ImageReferenceKind.DockerArchive, + }; + } + +#pragma warning disable CA1308 + return new ImageReference + { + OriginalInput = input, + Reference = input.ToLowerInvariant(), + Kind = ImageReferenceKind.DockerImage, + }; +#pragma warning restore CA1308 + } +} diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs index 3a097db36..8b3fff13e 100644 --- a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs @@ -3,6 +3,7 @@ namespace Microsoft.ComponentDetection.Detectors.Linux; using System; using System.Collections.Concurrent; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Runtime.InteropServices; using System.Threading; @@ -30,6 +31,12 @@ ILogger logger private const string ScanScopeConfigKey = "Linux.ImageScanScope"; private const LinuxScannerScope DefaultScanScope = LinuxScannerScope.AllLayers; + private const string LocalImageMountPoint = "/image"; + + // Base image annotations from ADO dockerTask + private const string BaseImageRefAnnotation = "image.base.ref.name"; + private const string BaseImageDigestAnnotation = "image.base.digest"; + private readonly ILinuxScanner linuxScanner = linuxScanner; private readonly IDockerService dockerService = dockerService; private readonly ILogger logger = logger; @@ -65,14 +72,23 @@ public async Task ExecuteDetectorAsync( CancellationToken cancellationToken = default ) { -#pragma warning disable CA1308 - var imagesToProcess = request - .ImagesToScan?.Where(image => !string.IsNullOrWhiteSpace(image)) - .Select(image => image.ToLowerInvariant()) + var imagesToParse = (request.ImagesToScan ?? []).Where(image => !string.IsNullOrWhiteSpace(image)) .ToList(); -#pragma warning restore CA1308 - if (imagesToProcess == null || imagesToProcess.Count == 0) + var allImages = new List(); + foreach (var image in imagesToParse) + { + try + { + allImages.Add(ImageReference.Parse(image)); + } + catch (Exception e) + { + this.logger.LogWarning(e, "Failed to parse image reference '{Image}', skipping", image); + } + } + + if (allImages.Count == 0) { this.logger.LogInformation("No instructions received to scan container images."); return EmptySuccessfulScan(); @@ -97,7 +113,7 @@ public async Task ExecuteDetectorAsync( try { results = await this.ProcessImagesAsync( - imagesToProcess, + allImages, request.ComponentRecorder, scannerScope, timeoutCts.Token @@ -204,118 +220,358 @@ private static void RecordImageDetectionFailure(Exception exception, string imag } private async Task> ProcessImagesAsync( - IEnumerable imagesToProcess, + IEnumerable imageReferences, IComponentRecorder componentRecorder, LinuxScannerScope scannerScope, CancellationToken cancellationToken = default ) { - var processedImages = new ConcurrentDictionary(); + // Phase 1: Resolve images. + + // Docker images will resolve to ContainerDetails via inspect. Deduplicate by ImageId since multiple refs can resolve to the same image. + var processedDockerImages = new ConcurrentDictionary(); + + // Local images will be validated for existence and tracked by their file path. + var localImages = new ConcurrentDictionary(); + + var resolveTasks = imageReferences.Select(imageRef => + this.ResolveImageAsync(imageRef, processedDockerImages, localImages, componentRecorder, cancellationToken)); + + await Task.WhenAll(resolveTasks); + + // Phase 2: Scan and record components for all resolved images concurrently. + var scanTasks = new List>(); - var inspectTasks = imagesToProcess.Select(async image => + scanTasks.AddRange(processedDockerImages.Select(kvp => + this.ScanDockerImageAsync(kvp.Key, kvp.Value, scannerScope, componentRecorder, cancellationToken))); + + scanTasks.AddRange(localImages + .Select(kvp => + this.ScanLocalImageAsync(kvp.Key, kvp.Value, scannerScope, componentRecorder, cancellationToken))); + + return await Task.WhenAll(scanTasks); + } + + /// + /// Resolves an image by doing one of the following: + /// * For Docker images, resolve the reference by pulling (if needed) and inspecting it. + /// Adds the result to the processedImages dictionary for deduplication. + /// * For local images, verify the path exists and adds the reference to a concurrent + /// set for tracking which images to scan in phase 2. + /// + private async Task ResolveImageAsync( + ImageReference imageRef, + ConcurrentDictionary resolvedDockerImages, + ConcurrentDictionary localImages, + IComponentRecorder componentRecorder, + CancellationToken cancellationToken) + { + try { - try + switch (imageRef.Kind) { - // Check image exists locally. Try pulling if not - if ( - !( - await this.dockerService.ImageExistsLocallyAsync(image, cancellationToken) - || await this.dockerService.TryPullImageAsync(image, cancellationToken) - ) - ) - { + case ImageReferenceKind.DockerImage: + await this.ResolveDockerImageAsync(imageRef.Reference, resolvedDockerImages, cancellationToken); + break; + case ImageReferenceKind.OciLayout: + case ImageReferenceKind.OciArchive: + case ImageReferenceKind.DockerArchive: + var fullPath = this.ValidateLocalImagePath(imageRef); + localImages.TryAdd(fullPath, imageRef.Kind); + break; + default: throw new InvalidUserInputException( - $"Container image {image} could not be found locally and could not be pulled. Verify the image is either available locally or can be pulled from a registry." + $"Unsupported image reference kind '{imageRef.Kind}' for image '{imageRef.OriginalInput}'." ); + } + } + catch (Exception e) + { + this.logger.LogWarning(e, "Processing of image {ContainerImage} (kind {ImageType}) failed", imageRef.OriginalInput, imageRef.Kind); + RecordImageDetectionFailure(e, imageRef.OriginalInput); + + var singleFileComponentRecorder = + componentRecorder.CreateSingleFileComponentRecorder(imageRef.OriginalInput); + singleFileComponentRecorder.RegisterPackageParseFailure(imageRef.OriginalInput); + } + } + + private async Task ResolveDockerImageAsync( + string image, + ConcurrentDictionary resolvedDockerImages, + CancellationToken cancellationToken) + { + if ( + !( + await this.dockerService.ImageExistsLocallyAsync(image, cancellationToken) + || await this.dockerService.TryPullImageAsync(image, cancellationToken) + ) + ) + { + throw new InvalidUserInputException( + $"Container image {image} could not be found locally and could not be pulled. Verify the image is either available locally or can be pulled from a registry." + ); + } + + var imageDetails = + await this.dockerService.InspectImageAsync(image, cancellationToken) + ?? throw new MissingContainerDetailException(image); + + resolvedDockerImages.TryAdd(imageDetails.ImageId, imageDetails); + } + + /// + /// Validates that a local image path exists on disk. Throws a if it does not. + /// For OCI layouts, checks for a directory. For OCI archives and Docker archives, checks for a file. + /// Returns the full path to the local image if validation succeeds. + /// + private string ValidateLocalImagePath(ImageReference imageRef) + { + var path = Path.GetFullPath(imageRef.Reference); + var exists = imageRef.Kind switch + { + ImageReferenceKind.OciLayout => Directory.Exists(path), + ImageReferenceKind.OciArchive => System.IO.File.Exists(path), + ImageReferenceKind.DockerArchive => System.IO.File.Exists(path), + ImageReferenceKind.DockerImage or _ => throw new InvalidOperationException( + $"ValidateLocalImagePath does not support image kind '{imageRef.Kind}'."), + }; + + if (!exists) + { + throw new FileNotFoundException( + $"Local image at path {imageRef.Reference} does not exist.", + imageRef.Reference + ); + } + + return path; + } + + /// + /// Scans a Docker image (already inspected) and records its components. + /// + private async Task ScanDockerImageAsync( + string imageId, + ContainerDetails containerDetails, + LinuxScannerScope scannerScope, + IComponentRecorder componentRecorder, + CancellationToken cancellationToken) + { + try + { + var baseImageLayerCount = await this.GetBaseImageLayerCountAsync( + containerDetails, + imageId, + cancellationToken + ); + + // Update layers with base image attribution + containerDetails.Layers = containerDetails.Layers.Select( + layer => new DockerLayer + { + DiffId = layer.DiffId, + LayerIndex = layer.LayerIndex, + IsBaseImage = layer.LayerIndex < baseImageLayerCount, } + ).ToList(); + + var enabledComponentTypes = this.GetEnabledComponentTypes(); + var layers = await this.linuxScanner.ScanLinuxAsync( + containerDetails.ImageId, + containerDetails.Layers, + baseImageLayerCount, + enabledComponentTypes, + scannerScope, + cancellationToken + ) ?? throw new InvalidOperationException($"Failed to scan image layers for image {containerDetails.ImageId}"); - var imageDetails = - await this.dockerService.InspectImageAsync(image, cancellationToken) - ?? throw new MissingContainerDetailException(image); + return this.RecordComponents(containerDetails, layers, componentRecorder); + } + catch (Exception e) + { + this.logger.LogWarning(e, "Scanning of image {ImageId} failed", containerDetails.ImageId); + RecordImageDetectionFailure(e, containerDetails.ImageId); - processedImages.TryAdd(imageDetails.ImageId, imageDetails); - } - catch (Exception e) - { - this.logger.LogWarning(e, "Processing of image {ContainerImage} failed", image); - RecordImageDetectionFailure(e, image); + var singleFileComponentRecorder = + componentRecorder.CreateSingleFileComponentRecorder(containerDetails.ImageId); + singleFileComponentRecorder.RegisterPackageParseFailure(imageId); + } - var singleFileComponentRecorder = - componentRecorder.CreateSingleFileComponentRecorder(image); - singleFileComponentRecorder.RegisterPackageParseFailure(image); - } - }); + return EmptyImageScanningResult(); + } - await Task.WhenAll(inspectTasks); + /// + /// Scans a local image (OCI layout directory or archive file) by invoking Syft with a volume + /// mount, extracting metadata from the Syft output to build ContainerDetails, and processing + /// detected components. + /// + private async Task ScanLocalImageAsync( + string localImagePath, + ImageReferenceKind imageRefKind, + LinuxScannerScope scannerScope, + IComponentRecorder componentRecorder, + CancellationToken cancellationToken) + { + string hostPathToBind; + string syftContainerPath; + switch (imageRefKind) + { + case ImageReferenceKind.OciLayout: + hostPathToBind = localImagePath; + syftContainerPath = $"oci-dir:{LocalImageMountPoint}"; + break; + case ImageReferenceKind.OciArchive: + hostPathToBind = Path.GetDirectoryName(localImagePath) + ?? throw new InvalidOperationException($"Could not determine parent directory for OCI archive path '{localImagePath}'."); + syftContainerPath = $"oci-archive:{LocalImageMountPoint}/{Path.GetFileName(localImagePath)}"; + break; + case ImageReferenceKind.DockerArchive: + hostPathToBind = Path.GetDirectoryName(localImagePath) + ?? throw new InvalidOperationException($"Could not determine parent directory for Docker archive path '{localImagePath}'."); + syftContainerPath = $"docker-archive:{LocalImageMountPoint}/{Path.GetFileName(localImagePath)}"; + break; + case ImageReferenceKind.DockerImage: + default: + throw new InvalidUserInputException( + $"Unsupported image reference kind '{imageRefKind}' for local image at path '{localImagePath}'." + ); + } - var scanTasks = processedImages.Select(async kvp => + try { + var additionalBinds = new List + { + // Bind the local image path into the Syft container as read-only + $"{hostPathToBind}:{LocalImageMountPoint}:ro", + }; + + var syftOutput = await this.linuxScanner.GetSyftOutputAsync( + syftContainerPath, + additionalBinds, + scannerScope, + cancellationToken + ); + + SyftSourceMetadata? sourceMetadata = null; try { - var internalContainerDetails = kvp.Value; - var image = kvp.Key; - var baseImageLayerCount = await this.GetBaseImageLayerCountAsync( - internalContainerDetails, - image, - cancellationToken + sourceMetadata = syftOutput.Source?.GetSyftSourceMetadata(); + } + catch (Exception e) + { + this.logger.LogWarning( + e, + "Failed to deserialize Syft source metadata for local image at {LocalImagePath}. Proceeding without metadata", + localImagePath ); + } - // Update the layer information to specify if a layer was found in the specified baseImage - internalContainerDetails.Layers = internalContainerDetails.Layers.Select( - layer => new DockerLayer - { - DiffId = layer.DiffId, - LayerIndex = layer.LayerIndex, - IsBaseImage = layer.LayerIndex < baseImageLayerCount, - } + if (sourceMetadata?.Layers == null || sourceMetadata.Layers.Length == 0) + { + this.logger.LogWarning( + "No layer information found in Syft output for local image at {LocalImagePath}", + localImagePath ); + } - var enabledComponentTypes = this.GetEnabledComponentTypes(); - var layers = await this.linuxScanner.ScanLinuxAsync( - kvp.Value.ImageId, - internalContainerDetails.Layers, - baseImageLayerCount, - enabledComponentTypes, - scannerScope, - cancellationToken - ); + // Build ContainerDetails from Syft source metadata + var containerDetails = this.dockerService.GetEmptyContainerDetails(); + containerDetails.ImageId = !string.IsNullOrWhiteSpace(sourceMetadata?.ImageId) + ? sourceMetadata.ImageId + : localImagePath; + containerDetails.Digests = sourceMetadata?.RepoDigests ?? []; + containerDetails.Tags = sourceMetadata?.Tags ?? []; + containerDetails.Layers = sourceMetadata?.Layers? + .Select((layer, index) => new DockerLayer + { + DiffId = layer.Digest ?? string.Empty, + LayerIndex = index, + }) + .ToList() ?? []; + + // Extract base image annotations from the Syft source metadata labels + var baseImageRef = string.Empty; + var baseImageDigest = string.Empty; + sourceMetadata?.Labels?.TryGetValue(BaseImageRefAnnotation, out baseImageRef); + sourceMetadata?.Labels?.TryGetValue(BaseImageDigestAnnotation, out baseImageDigest); + containerDetails.BaseImageRef = baseImageRef; + containerDetails.BaseImageDigest = baseImageDigest; + + // Determine base image layer count using existing logic + var baseImageLayerCount = await this.GetBaseImageLayerCountAsync( + containerDetails, + localImagePath, + cancellationToken + ); - var components = layers.SelectMany(layer => - layer.Components.Select(component => new DetectedComponent( - component, - null, - internalContainerDetails.Id, - layer.DockerLayer.LayerIndex - )) - ); - internalContainerDetails.Layers = layers.Select(layer => layer.DockerLayer); - var singleFileComponentRecorder = - componentRecorder.CreateSingleFileComponentRecorder(kvp.Value.ImageId); - components - .ToList() - .ForEach(detectedComponent => - singleFileComponentRecorder.RegisterUsage(detectedComponent, true) - ); - return new ImageScanningResult + // Update layers with base image attribution + containerDetails.Layers = containerDetails.Layers.Select( + layer => new DockerLayer { - ContainerDetails = kvp.Value, - Components = components, - }; - } - catch (Exception e) - { - this.logger.LogWarning(e, "Scanning of image {ImageId} failed", kvp.Value.ImageId); - RecordImageDetectionFailure(e, kvp.Value.ImageId); + DiffId = layer.DiffId, + LayerIndex = layer.LayerIndex, + IsBaseImage = layer.LayerIndex < baseImageLayerCount, + } + ).ToList(); + + // Process components from the same Syft output + var enabledComponentTypes = this.GetEnabledComponentTypes(); + var layers = this.linuxScanner.ProcessSyftOutput( + syftOutput, + containerDetails.Layers, + enabledComponentTypes + ); - var singleFileComponentRecorder = - componentRecorder.CreateSingleFileComponentRecorder(kvp.Value.ImageId); - singleFileComponentRecorder.RegisterPackageParseFailure(kvp.Key); - } + return this.RecordComponents(containerDetails, layers, componentRecorder); + } + catch (Exception e) + { + this.logger.LogWarning( + e, + "Processing of local image at {LocalImagePath} failed", + localImagePath + ); + RecordImageDetectionFailure(e, localImagePath); - return EmptyImageScanningResult(); - }); + var singleFileComponentRecorder = + componentRecorder.CreateSingleFileComponentRecorder(localImagePath); + singleFileComponentRecorder.RegisterPackageParseFailure(localImagePath); + } - return await Task.WhenAll(scanTasks); + return EmptyImageScanningResult(); + } + + /// + /// Records detected components from layer-mapped scan results into the component recorder. + /// + private ImageScanningResult RecordComponents( + ContainerDetails containerDetails, + IEnumerable layers, + IComponentRecorder componentRecorder) + { + var materializedLayers = layers.ToList(); + var components = materializedLayers.SelectMany(layer => + layer.Components.Select(component => new DetectedComponent( + component, + null, + containerDetails.Id, + layer.DockerLayer.LayerIndex + )) + ).ToList(); + containerDetails.Layers = materializedLayers.Select(layer => layer.DockerLayer); + + var singleFileComponentRecorder = + componentRecorder.CreateSingleFileComponentRecorder(containerDetails.ImageId); + components.ForEach(detectedComponent => + singleFileComponentRecorder.RegisterUsage(detectedComponent, true) + ); + + return new ImageScanningResult + { + ContainerDetails = containerDetails, + Components = components, + }; } private async Task GetBaseImageLayerCountAsync( diff --git a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs index c895904e3..6482958b9 100644 --- a/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs +++ b/src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs @@ -92,7 +92,176 @@ public async Task> ScanLinuxAsync( ImageToScan = imageHash, ScannerVersion = ScannerImage, }; + using var syftTelemetryRecord = new LinuxScannerSyftTelemetryRecord(); + var stdout = await this.RunSyftAsync(imageHash, scope, additionalBinds: [], record, syftTelemetryRecord, cancellationToken); + + try + { + var syftOutput = SyftOutput.FromJson(stdout); + return this.ProcessSyftOutputWithTelemetry(syftOutput, containerLayers, enabledComponentTypes, syftTelemetryRecord); + } + catch (Exception e) + { + record.FailedDeserializingScannerOutput = e.ToString(); + this.logger.LogError(e, "Failed to deserialize Syft output for image {ImageHash}", imageHash); + return []; + } + } + + /// + public async Task GetSyftOutputAsync( + string syftSource, + IList additionalBinds, + LinuxScannerScope scope, + CancellationToken cancellationToken = default + ) + { + using var record = new LinuxScannerTelemetryRecord + { + ImageToScan = syftSource, + ScannerVersion = ScannerImage, + }; + using var syftTelemetryRecord = new LinuxScannerSyftTelemetryRecord(); + var stdout = await this.RunSyftAsync(syftSource, scope, additionalBinds, record, syftTelemetryRecord, cancellationToken); + try + { + return SyftOutput.FromJson(stdout); + } + catch (Exception e) + { + record.FailedDeserializingScannerOutput = e.ToString(); + this.logger.LogError(e, "Failed to deserialize Syft output for source {SyftSource}", syftSource); + throw; + } + } + + /// + public IEnumerable ProcessSyftOutput( + SyftOutput syftOutput, + IEnumerable containerLayers, + ISet enabledComponentTypes) + { + using var syftTelemetryRecord = new LinuxScannerSyftTelemetryRecord(); + return this.ProcessSyftOutputWithTelemetry(syftOutput, containerLayers, enabledComponentTypes, syftTelemetryRecord); + } + + private IEnumerable ProcessSyftOutputWithTelemetry( + SyftOutput syftOutput, + IEnumerable containerLayers, + ISet enabledComponentTypes, + LinuxScannerSyftTelemetryRecord syftTelemetryRecord) + { + // Apply artifact filters (e.g., Mariner 2.0 workaround) + var validArtifacts = syftOutput.Artifacts.AsEnumerable(); + foreach (var filter in this.artifactFilters) + { + validArtifacts = filter.Filter(validArtifacts, syftOutput.Distro); + } + + // Build a set of enabled factories based on requested component types + var enabledFactories = new HashSet(); + foreach (var componentType in enabledComponentTypes) + { + if ( + this.componentTypeToFactoryLookup.TryGetValue(componentType, out var factory) + && factory != null + ) + { + enabledFactories.Add(factory); + } + } + + // Create components using only enabled factories + var componentsWithLayers = validArtifacts + .DistinctBy(artifact => (artifact.Name, artifact.Version, artifact.Type)) + .Select(artifact => + this.CreateComponentWithLayers(artifact, syftOutput.Distro, enabledFactories) + ) + .Where(result => result.Component != null) + .Select(result => (Component: result.Component!, result.LayerIds)) + .ToList(); + + // Track unsupported artifact types for telemetry + var unsupportedTypes = validArtifacts + .Where(a => !this.artifactTypeToFactoryLookup.ContainsKey(a.Type)) + .Select(a => a.Type) + .Distinct() + .ToList(); + + if (unsupportedTypes.Count > 0) + { + this.logger.LogDebug( + "Encountered unsupported artifact types: {UnsupportedTypes}", + string.Join(", ", unsupportedTypes) + ); + } + + // Track detected components in telemetry + syftTelemetryRecord.Components = JsonSerializer.Serialize( + componentsWithLayers.Select(c => c.Component.Id) + ); + + // Build a layer dictionary from the provided container layers and map components. + var knownLayers = containerLayers.ToList(); + + if (knownLayers.Count > 0) + { + var layerDictionary = knownLayers + .DistinctBy(layer => layer.DiffId) + .ToDictionary(layer => layer.DiffId, _ => new List()); + + foreach (var (component, layers) in componentsWithLayers) + { + foreach (var layer in layers) + { + if (layerDictionary.TryGetValue(layer, out var componentList)) + { + componentList.Add(component); + } + } + } + + return layerDictionary.Select(kvp => new LayerMappedLinuxComponents + { + Components = kvp.Value, + DockerLayer = knownLayers.First(layer => layer.DiffId == kvp.Key), + }); + } + + // No container layers provided — return all components under a single + // entry with no layer information rather than silently dropping them. + var allComponents = componentsWithLayers.Select(c => c.Component).ToList(); + if (allComponents.Count == 0) + { + return []; + } + return + [ + new LayerMappedLinuxComponents + { + Components = allComponents, + DockerLayer = new DockerLayer() + { + DiffId = string.Empty, + LayerIndex = 0, + IsBaseImage = false, + }, + }, + ]; + } + + /// + /// Runs the Syft scanner container and returns the stdout output. + /// + private async Task RunSyftAsync( + string syftSource, + LinuxScannerScope scope, + IList additionalBinds, + LinuxScannerTelemetryRecord record, + LinuxScannerSyftTelemetryRecord syftTelemetryRecord, + CancellationToken cancellationToken) + { var acquired = false; var stdout = string.Empty; var stderr = string.Empty; @@ -107,8 +276,6 @@ public async Task> ScanLinuxAsync( ), }; - using var syftTelemetryRecord = new LinuxScannerSyftTelemetryRecord(); - try { acquired = await ContainerSemaphore.WaitAsync(SemaphoreTimeout, cancellationToken); @@ -116,13 +283,14 @@ public async Task> ScanLinuxAsync( { try { - var command = new List { imageHash } + var command = new List { syftSource } .Concat(CmdParameters) .Concat(scopeParameters) .ToList(); (stdout, stderr) = await this.dockerService.CreateAndRunContainerAsync( ScannerImage, command, + additionalBinds, cancellationToken ); } @@ -137,8 +305,8 @@ public async Task> ScanLinuxAsync( { record.SemaphoreFailure = true; this.logger.LogWarning( - "Failed to enter the container semaphore for image {ImageHash}", - imageHash + "Failed to enter the container semaphore for image {SyftSource}", + syftSource ); } } @@ -160,87 +328,7 @@ public async Task> ScanLinuxAsync( ); } - var layerDictionary = containerLayers - .DistinctBy(layer => layer.DiffId) - .ToDictionary(layer => layer.DiffId, _ => new List()); - - try - { - var syftOutput = SyftOutput.FromJson(stdout); - - // Apply artifact filters (e.g., Mariner 2.0 workaround) - var validArtifacts = syftOutput.Artifacts.AsEnumerable(); - foreach (var filter in this.artifactFilters) - { - validArtifacts = filter.Filter(validArtifacts, syftOutput.Distro); - } - - // Build a set of enabled factories based on requested component types - var enabledFactories = new HashSet(); - foreach (var componentType in enabledComponentTypes) - { - if ( - this.componentTypeToFactoryLookup.TryGetValue(componentType, out var factory) - && factory != null - ) - { - enabledFactories.Add(factory); - } - } - - // Create components using only enabled factories - var componentsWithLayers = validArtifacts - .DistinctBy(artifact => (artifact.Name, artifact.Version, artifact.Type)) - .Select(artifact => - this.CreateComponentWithLayers(artifact, syftOutput.Distro, enabledFactories) - ) - .Where(result => result.Component != null) - .Select(result => (Component: result.Component!, result.LayerIds)) - .ToList(); - - // Track unsupported artifact types for telemetry - var unsupportedTypes = validArtifacts - .Where(a => !this.artifactTypeToFactoryLookup.ContainsKey(a.Type)) - .Select(a => a.Type) - .Distinct() - .ToList(); - - if (unsupportedTypes.Count > 0) - { - this.logger.LogDebug( - "Encountered unsupported artifact types: {UnsupportedTypes}", - string.Join(", ", unsupportedTypes) - ); - } - - // Map components to layers - foreach (var (component, layers) in componentsWithLayers) - { - layers.ToList().ForEach(layer => layerDictionary[layer].Add(component)); - } - - var layerMappedLinuxComponents = layerDictionary.Select(kvp => - { - (var layerId, var components) = kvp; - return new LayerMappedLinuxComponents - { - Components = components, - DockerLayer = containerLayers.First(layer => layer.DiffId == layerId), - }; - }); - - // Track detected components in telemetry - syftTelemetryRecord.Components = JsonSerializer.Serialize( - componentsWithLayers.Select(c => c.Component.Id) - ); - - return layerMappedLinuxComponents; - } - catch (Exception e) - { - record.FailedDeserializingScannerOutput = e.ToString(); - return []; - } + return stdout; } private (TypedComponent? Component, IEnumerable LayerIds) CreateComponentWithLayers( diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/ImageReferenceTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/ImageReferenceTests.cs new file mode 100644 index 000000000..d851677e5 --- /dev/null +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/ImageReferenceTests.cs @@ -0,0 +1,138 @@ +namespace Microsoft.ComponentDetection.Detectors.Tests; + +using System; +using AwesomeAssertions; +using Microsoft.ComponentDetection.Detectors.Linux; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +[TestClass] +[TestCategory("Governance/All")] +[TestCategory("Governance/ComponentDetection")] +public class ImageReferenceTests +{ + [TestMethod] + public void Parse_DockerImage_ReturnsDockerImageKind() + { + var result = ImageReference.Parse("node:latest"); + + result.Kind.Should().Be(ImageReferenceKind.DockerImage); + result.OriginalInput.Should().Be("node:latest"); + result.Reference.Should().Be("node:latest"); + } + + [TestMethod] + public void Parse_DockerImage_LowercasesReference() + { + var result = ImageReference.Parse("MyImage:Latest"); + + result.Kind.Should().Be(ImageReferenceKind.DockerImage); + result.OriginalInput.Should().Be("MyImage:Latest"); + result.Reference.Should().Be("myimage:latest"); + } + + [TestMethod] + public void Parse_DockerImageSha_ReturnsDockerImageKind() + { + var result = ImageReference.Parse("sha256:abc123def456"); + + result.Kind.Should().Be(ImageReferenceKind.DockerImage); + result.OriginalInput.Should().Be("sha256:abc123def456"); + result.Reference.Should().Be("sha256:abc123def456"); + } + + [TestMethod] + public void Parse_OciDir_ReturnsOciLayoutKind() + { + var result = ImageReference.Parse("oci-dir:/path/to/image"); + + result.Kind.Should().Be(ImageReferenceKind.OciLayout); + result.OriginalInput.Should().Be("oci-dir:/path/to/image"); + result.Reference.Should().Be("/path/to/image"); + } + + [TestMethod] + public void Parse_OciDir_PreservesPathCase() + { + var result = ImageReference.Parse("oci-dir:/Path/To/Image"); + + result.Kind.Should().Be(ImageReferenceKind.OciLayout); + result.OriginalInput.Should().Be("oci-dir:/Path/To/Image"); + result.Reference.Should().Be("/Path/To/Image"); + } + + [TestMethod] + public void Parse_OciDirCaseInsensitivePrefix_ReturnsOciLayoutKind() + { + var result = ImageReference.Parse("OCI-DIR:/path/to/image"); + + result.Kind.Should().Be(ImageReferenceKind.OciLayout); + result.OriginalInput.Should().Be("OCI-DIR:/path/to/image"); + result.Reference.Should().Be("/path/to/image"); + } + + [TestMethod] + public void Parse_OciDir_ErrorsOnEmptyPath() + { + var act = () => ImageReference.Parse("oci-dir:"); + act.Should().Throw() + .WithMessage("Input with 'oci-dir:' prefix must include a path.*") + .WithParameterName("input"); + } + + [TestMethod] + public void Parse_OciDir_ErrorsOnWhitespaceOnlyPath() + { + var act = () => ImageReference.Parse("oci-dir: "); + act.Should().Throw() + .WithMessage("Input with 'oci-dir:' prefix must include a path.*") + .WithParameterName("input"); + } + + [TestMethod] + public void Parse_OciArchive_ReturnsOciArchiveKind() + { + var result = ImageReference.Parse("oci-archive:/path/to/image.tar"); + + result.Kind.Should().Be(ImageReferenceKind.OciArchive); + result.OriginalInput.Should().Be("oci-archive:/path/to/image.tar"); + result.Reference.Should().Be("/path/to/image.tar"); + } + + [TestMethod] + public void Parse_OciArchive_PreservesPathCase() + { + var result = ImageReference.Parse("oci-archive:/Path/To/Image.tar"); + + result.Kind.Should().Be(ImageReferenceKind.OciArchive); + result.OriginalInput.Should().Be("oci-archive:/Path/To/Image.tar"); + result.Reference.Should().Be("/Path/To/Image.tar"); + } + + [TestMethod] + public void Parse_OciArchiveCaseInsensitivePrefix_ReturnsOciArchiveKind() + { + var result = ImageReference.Parse("OCI-ARCHIVE:/path/to/image.tar"); + + result.Kind.Should().Be(ImageReferenceKind.OciArchive); + result.OriginalInput.Should().Be("OCI-ARCHIVE:/path/to/image.tar"); + result.Reference.Should().Be("/path/to/image.tar"); + } + + [TestMethod] + public void Parse_OciArchive_ErrorsOnEmptyPath() + { + var act = () => ImageReference.Parse("oci-archive:"); + act.Should().Throw() + .WithMessage("Input with 'oci-archive:' prefix must include a path.*") + .WithParameterName("input"); + } + + [TestMethod] + public void Parse_OciArchive_ErrorsOnWhitespaceOnlyPath() + { + var act = () => ImageReference.Parse("oci-archive: "); + act.Should().Throw() + .WithMessage("Input with 'oci-archive:' prefix must include a path.*") + .WithParameterName("input"); + } +} diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs index b21b8a114..12308a383 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxContainerDetectorTests.cs @@ -12,6 +12,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests; using Microsoft.ComponentDetection.Contracts.BcdeModels; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.ComponentDetection.Detectors.Linux; +using Microsoft.ComponentDetection.Detectors.Linux.Contracts; using Microsoft.Extensions.Logging; using Microsoft.VisualStudio.TestTools.UnitTesting; using Moq; @@ -61,6 +62,8 @@ public LinuxContainerDetectorTests() Layers = [], } ); + this.mockDockerService.Setup(service => service.GetEmptyContainerDetails()) + .Returns(() => new ContainerDetails { Id = 100 }); this.mockLogger = new Mock(); this.mockLinuxContainerDetectorLogger = new Mock>(); @@ -374,4 +377,917 @@ public async Task TestLinuxContainerDetector_HandlesScratchBaseAsync() ); await this.TestLinuxContainerDetectorAsync(); } + + [TestMethod] + public async Task TestLinuxContainerDetector_OciLayoutImage_DetectsComponentsAsync() + { + var componentRecorder = new ComponentRecorder(); + + // Create a temp directory to act as the OCI layout path + var ociDir = Path.Combine(Path.GetTempPath(), "test-oci-layout-" + Guid.NewGuid().ToString("N")).TrimEnd(Path.DirectorySeparatorChar); + Directory.CreateDirectory(ociDir); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [$"oci-dir:{ociDir}"], + componentRecorder + ); + + // Build a SyftOutput with source metadata containing layers, labels, tags + var syftOutputJson = """ + { + "distro": { "id": "azurelinux", "versionID": "3.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "userInput": "/oci-image", + "imageID": "sha256:ociimage123", + "tags": ["myregistry.io/myimage:latest"], + "repoDigests": [], + "layers": [ + { "digest": "sha256:layer1", "size": 40000 }, + { "digest": "sha256:layer2", "size": 50000 } + ], + "labels": { + "image.base.ref.name": "mcr.microsoft.com/azurelinux/base/core:3.0", + "image.base.digest": "sha256:basedigest" + } + } + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + var layerMappedComponents = new[] + { + new LayerMappedLinuxComponents + { + DockerLayer = new DockerLayer { DiffId = "sha256:layer1", LayerIndex = 0 }, + Components = [new LinuxComponent("azurelinux", "3.0", "bash", "5.2.15")], + }, + }; + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns(layerMappedComponents); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + scanResult.ContainerDetails.Should().ContainSingle(); + + var containerDetails = scanResult.ContainerDetails.First(); + containerDetails.ImageId.Should().Be("sha256:ociimage123"); + containerDetails.BaseImageRef.Should().Be("mcr.microsoft.com/azurelinux/base/core:3.0"); + containerDetails.BaseImageDigest.Should().Be("sha256:basedigest"); + containerDetails.Tags.Should().ContainSingle().Which.Should().Be("myregistry.io/myimage:latest"); + + var detectedComponents = componentRecorder.GetDetectedComponents().ToList(); + detectedComponents.Should().ContainSingle(); + var detectedComponent = detectedComponents.First(); + detectedComponent.Component.Id.Should().Contain("bash"); + detectedComponent.ContainerLayerIds.Keys.Should().ContainSingle(); + var containerId = detectedComponent.ContainerLayerIds.Keys.First(); + detectedComponent.ContainerLayerIds[containerId].Should().BeEquivalentTo([0]); // Layer index from SyftOutput + + // Verify GetSyftOutputAsync was called (not ScanLinuxAsync) + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.GetSyftOutputAsync( + It.Is(s => s.StartsWith("oci-dir:")), + It.Is>(binds => + binds.Count == 1 && binds[0].Contains(ociDir)), + It.IsAny(), + It.IsAny() + ), + Times.Once + ); + + // Verify Docker inspect was NOT called for OCI images + this.mockDockerService.Verify( + service => + service.InspectImageAsync(ociDir, It.IsAny()), + Times.Never + ); + + // Verify ProcessSyftOutput was called with the correct layers + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.Is>(layers => + layers.Count() == 2 && + layers.First().DiffId == "sha256:layer1" && + layers.Last().DiffId == "sha256:layer2" + ), + It.IsAny>() + ), + Times.Once + ); + } + finally + { + Directory.Delete(ociDir, true); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_OciLayoutImage_DoesNotLowercasePathAsync() + { + var componentRecorder = new ComponentRecorder(); + + // Create a temp directory with mixed case + var ociDir = Path.Combine(Path.GetTempPath(), "TestOciLayout-" + Guid.NewGuid().ToString("N")).TrimEnd(Path.DirectorySeparatorChar); + Directory.CreateDirectory(ociDir); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [$"oci-dir:{ociDir}"], + componentRecorder + ); + + var syftOutputJson = """ + { + "distro": { "id": "test", "versionID": "1.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "userInput": "/oci-image", + "imageID": "sha256:img", + "layers": [], + "labels": {} + } + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns([]); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + // Verify the bind mount path was passed as-is (not lowercased) + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.GetSyftOutputAsync( + It.Is(s => s.StartsWith("oci-dir:")), + It.Is>(binds => + binds.Count == 1 && binds[0].Contains(ociDir)), + It.IsAny(), + It.IsAny() + ), + Times.Once + ); + } + finally + { + Directory.Delete(ociDir, true); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_OciLayoutImage_NormalizesPathAsync() + { + var componentRecorder = new ComponentRecorder(); + + // Create a temp directory with mixed case + var ociDir = Path.Combine(Path.GetTempPath(), "test-oci-layout-" + Guid.NewGuid().ToString("N")).TrimEnd(Path.DirectorySeparatorChar); + Directory.CreateDirectory(ociDir); + + var ociDirWithExtraComponents = Path.Combine(Path.GetDirectoryName(ociDir)!, ".", "random", "..", Path.GetFileName(ociDir)); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [$"oci-dir:{ociDirWithExtraComponents}"], + componentRecorder + ); + + var syftOutputJson = """ + { + "distro": { "id": "test", "versionID": "1.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "userInput": "/oci-image", + "imageID": "sha256:img", + "layers": [], + "labels": {} + } + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns([]); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.GetSyftOutputAsync( + It.Is(s => s.StartsWith("oci-dir:")), + It.Is>(binds => + binds.Count == 1 && binds[0].Contains(ociDir) && !binds[0].Contains(ociDirWithExtraComponents)), + It.IsAny(), + It.IsAny() + ), + Times.Once + ); + } + finally + { + Directory.Delete(ociDir, true); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_MixedDockerAndOciImages_BothProcessedAsync() + { + var componentRecorder = new ComponentRecorder(); + + var ociDir = Path.Combine(Path.GetTempPath(), "test-oci-mixed-" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(ociDir); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [NodeLatestImage, $"oci-dir:{ociDir}"], + componentRecorder + ); + + var syftOutputJson = """ + { + "distro": { "id": "azurelinux", "versionID": "3.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "userInput": "/oci-image", + "imageID": "sha256:ociimg", + "tags": [], + "repoDigests": [], + "layers": [ + { "digest": "sha256:ocilayer1", "size": 10000 } + ], + "labels": {} + } + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + var ociLayerMappedComponents = new[] + { + new LayerMappedLinuxComponents + { + DockerLayer = new DockerLayer { DiffId = "sha256:ocilayer1", LayerIndex = 0 }, + Components = [new LinuxComponent("azurelinux", "3.0", "curl", "8.0")], + }, + }; + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns(ociLayerMappedComponents); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + + // Both Docker and OCI images should have results + scanResult.ContainerDetails.Should().HaveCount(2); + + var detectedComponents = componentRecorder.GetDetectedComponents().ToList(); + detectedComponents.Should().HaveCount(2); + } + finally + { + Directory.Delete(ociDir, true); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_OciLayoutImage_NoMetadata_DetectsComponentsAsync() + { + // Ensure that if Syft output for an OCI image is missing metadata, we can still detect components and associate them with the correct container and layers. + var componentRecorder = new ComponentRecorder(); + + var ociDir = Path.Combine(Path.GetTempPath(), "test-oci-no-meta-" + Guid.NewGuid().ToString("N")).TrimEnd(Path.DirectorySeparatorChar); + Directory.CreateDirectory(ociDir); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [$"oci-dir:{ociDir}"], + componentRecorder + ); + + // Syft output with no source metadata at all + var syftOutputJson = """ + { + "distro": { "id": "azurelinux", "versionID": "3.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc" + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + var layerMappedComponents = new[] + { + new LayerMappedLinuxComponents + { + DockerLayer = new DockerLayer { DiffId = "unknown", LayerIndex = 0 }, + Components = [new LinuxComponent("azurelinux", "3.0", "curl", "8.0.0")], + }, + }; + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns(layerMappedComponents); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + scanResult.ContainerDetails.Should().ContainSingle(); + + var containerDetails = scanResult.ContainerDetails.First(); + + // When metadata is missing, ImageId falls back to the OCI path + containerDetails.ImageId.Should().Be(Path.GetFullPath(ociDir)); + containerDetails.Tags.Should().BeEmpty(); + containerDetails.BaseImageRef.Should().BeEmpty(); + containerDetails.BaseImageDigest.Should().BeEmpty(); + + var detectedComponents = componentRecorder.GetDetectedComponents().ToList(); + detectedComponents.Should().ContainSingle(); + var detectedComponent = detectedComponents.First(); + detectedComponent.Component.Id.Should().Contain("curl"); + detectedComponent.ContainerLayerIds.Keys.Should().ContainSingle(); + var containerId = detectedComponent.ContainerLayerIds.Keys.First(); + detectedComponent.ContainerLayerIds[containerId].Should().BeEquivalentTo([0]); // Layer index from SyftOutput + + // Verify ProcessSyftOutput was called with empty layers + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.Is>(layers => !layers.Any()), + It.IsAny>() + ), + Times.Once + ); + } + finally + { + Directory.Delete(ociDir, true); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_OciLayoutImage_IncompatibleMetadata_DetectsComponentsAsync() + { + // Ensure that if Syft output contains metadata with an incompatible schema, + // scanning still works as if no metadata were provided. + var componentRecorder = new ComponentRecorder(); + + var ociDir = Path.Combine(Path.GetTempPath(), "test-oci-bad-meta-" + Guid.NewGuid().ToString("N")).TrimEnd(Path.DirectorySeparatorChar); + Directory.CreateDirectory(ociDir); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [$"oci-dir:{ociDir}"], + componentRecorder + ); + + // Syft output with incompatible metadata (layers is a string, not an array) + var syftOutputJson = """ + { + "distro": { "id": "azurelinux", "versionID": "3.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "imageID": 12345, + "layers": "not-an-array", + "tags": "also-not-an-array" + } + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + var layerMappedComponents = new[] + { + new LayerMappedLinuxComponents + { + DockerLayer = new DockerLayer { DiffId = "unknown", LayerIndex = 0 }, + Components = [new LinuxComponent("azurelinux", "3.0", "zlib", "1.2.13")], + }, + }; + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns(layerMappedComponents); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + scanResult.ContainerDetails.Should().ContainSingle(); + + var containerDetails = scanResult.ContainerDetails.First(); + + // Incompatible metadata is treated like missing metadata — ImageId falls back to path + containerDetails.ImageId.Should().Be(Path.GetFullPath(ociDir)); + containerDetails.Tags.Should().BeEmpty(); + containerDetails.BaseImageRef.Should().BeEmpty(); + containerDetails.BaseImageDigest.Should().BeEmpty(); + + var detectedComponents = componentRecorder.GetDetectedComponents().ToList(); + detectedComponents.Should().ContainSingle(); + var detectedComponent = detectedComponents.First(); + detectedComponent.Component.Id.Should().Contain("zlib"); + detectedComponent.ContainerLayerIds.Keys.Should().ContainSingle(); + var containerId = detectedComponent.ContainerLayerIds.Keys.First(); + detectedComponent.ContainerLayerIds[containerId].Should().BeEquivalentTo([0]); + } + finally + { + Directory.Delete(ociDir, true); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_OciArchiveImage_DetectsComponentsAsync() + { + var componentRecorder = new ComponentRecorder(); + + // Create a temp file to act as the OCI archive + var ociArchiveDir = Path.GetTempPath().TrimEnd(Path.DirectorySeparatorChar); + var ociArchiveName = "test-oci-archive-" + Guid.NewGuid().ToString("N") + ".tar"; + var ociArchive = Path.Combine(ociArchiveDir, ociArchiveName); + await System.IO.File.WriteAllBytesAsync(ociArchive, []); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [$"oci-archive:{ociArchive}"], + componentRecorder + ); + + var syftOutputJson = """ + { + "distro": { "id": "azurelinux", "versionID": "3.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "userInput": "/oci-image", + "imageID": "sha256:archiveimg", + "tags": ["myregistry.io/archived:v1"], + "repoDigests": [], + "layers": [ + { "digest": "sha256:archivelayer1", "size": 30000 }, + { "digest": "sha256:archivelayer2", "size": 40000 } + ], + "labels": {} + } + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + var layerMappedComponents = new[] + { + new LayerMappedLinuxComponents + { + DockerLayer = new DockerLayer { DiffId = "sha256:archivelayer2", LayerIndex = 1 }, + Components = [new LinuxComponent("azurelinux", "3.0", "openssl", "3.1.0")], + }, + }; + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns(layerMappedComponents); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + scanResult.ContainerDetails.Should().ContainSingle(); + + var containerDetails = scanResult.ContainerDetails.First(); + containerDetails.ImageId.Should().Be("sha256:archiveimg"); + containerDetails.Tags.Should().ContainSingle().Which.Should().Be("myregistry.io/archived:v1"); + + var detectedComponents = componentRecorder.GetDetectedComponents().ToList(); + detectedComponents.Should().ContainSingle(); + var detectedComponent = detectedComponents.First(); + detectedComponent.Component.Id.Should().Contain("openssl"); + detectedComponent.ContainerLayerIds.Keys.Should().ContainSingle(); + var containerId = detectedComponent.ContainerLayerIds.Keys.First(); + detectedComponent.ContainerLayerIds[containerId].Should().BeEquivalentTo([1]); // Layer index from SyftOutput + + // Verify GetSyftOutputAsync was called with oci-archive: prefix + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.GetSyftOutputAsync( + It.Is(s => s.StartsWith("oci-archive:") && s.Contains(ociArchiveName)), + It.Is>(binds => + binds.Count == 1 && binds[0].Contains(ociArchiveDir)), + It.IsAny(), + It.IsAny() + ), + Times.Once + ); + + // Verify ProcessSyftOutput was called with the correct layers + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.Is>(layers => + layers.Count() == 2 && + layers.First().DiffId == "sha256:archivelayer1" && + layers.Last().DiffId == "sha256:archivelayer2" + ), + It.IsAny>() + ), + Times.Once + ); + } + finally + { + System.IO.File.Delete(ociArchive); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_DockerArchiveImage_DetectsComponentsAsync() + { + var componentRecorder = new ComponentRecorder(); + + // Create a temp file to act as the Docker archive + var dockerArchiveDir = Path.GetTempPath().TrimEnd(Path.DirectorySeparatorChar); + var dockerArchiveName = "test-docker-archive-" + Guid.NewGuid().ToString("N") + ".tar"; + var dockerArchive = Path.Combine(dockerArchiveDir, dockerArchiveName); + await System.IO.File.WriteAllBytesAsync(dockerArchive, []); + + try + { + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + [$"docker-archive:{dockerArchive}"], + componentRecorder + ); + + var syftOutputJson = """ + { + "distro": { "id": "ubuntu", "versionID": "22.04" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/local-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "userInput": "/local-image", + "imageID": "sha256:dockerarchiveimg", + "tags": ["myapp:v2"], + "repoDigests": [], + "layers": [ + { "digest": "sha256:dockerlayer1", "size": 50000 }, + { "digest": "sha256:dockerlayer2", "size": 60000 } + ], + "labels": {} + } + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.GetSyftOutputAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny(), + It.IsAny() + ) + ) + .ReturnsAsync(syftOutput); + + var layerMappedComponents = new[] + { + new LayerMappedLinuxComponents + { + DockerLayer = new DockerLayer { DiffId = "sha256:dockerlayer1", LayerIndex = 0 }, + Components = [new LinuxComponent("ubuntu", "22.04", "libc6", "2.35-0ubuntu3")], + }, + }; + + this.mockSyftLinuxScanner.Setup(scanner => + scanner.ProcessSyftOutput( + It.IsAny(), + It.IsAny>(), + It.IsAny>() + ) + ) + .Returns(layerMappedComponents); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + scanResult.ContainerDetails.Should().ContainSingle(); + + var containerDetails = scanResult.ContainerDetails.First(); + containerDetails.ImageId.Should().Be("sha256:dockerarchiveimg"); + containerDetails.Tags.Should().ContainSingle().Which.Should().Be("myapp:v2"); + + var detectedComponents = componentRecorder.GetDetectedComponents().ToList(); + detectedComponents.Should().ContainSingle(); + var detectedComponent = detectedComponents.First(); + detectedComponent.Component.Id.Should().Contain("libc6"); + detectedComponent.ContainerLayerIds.Keys.Should().ContainSingle(); + var containerId = detectedComponent.ContainerLayerIds.Keys.First(); + detectedComponent.ContainerLayerIds[containerId].Should().BeEquivalentTo([0]); + + // Verify GetSyftOutputAsync was called with docker-archive: prefix + this.mockSyftLinuxScanner.Verify( + scanner => + scanner.GetSyftOutputAsync( + It.Is(s => s.StartsWith("docker-archive:") && s.Contains(dockerArchiveName)), + It.Is>(binds => + binds.Count == 1 && binds[0].Contains(dockerArchiveDir)), + It.IsAny(), + It.IsAny() + ), + Times.Once + ); + } + finally + { + System.IO.File.Delete(dockerArchive); + } + } + + [TestMethod] + public async Task TestLinuxContainerDetector_ImageParseFailure_ContinuesScanningOtherImagesAsync() + { + var componentRecorder = new ComponentRecorder(); + + // "oci-dir:" with no path will cause ImageReference.Parse to throw + var scanRequest = new ScanRequest( + new DirectoryInfo(Path.GetTempPath()), + (_, __) => false, + this.mockLogger.Object, + null, + ["oci-dir:", NodeLatestImage], + componentRecorder + ); + + var linuxContainerDetector = new LinuxContainerDetector( + this.mockSyftLinuxScanner.Object, + this.mockDockerService.Object, + this.mockLinuxContainerDetectorLogger.Object + ); + + var scanResult = await linuxContainerDetector.ExecuteDetectorAsync(scanRequest); + + scanResult.ResultCode.Should().Be(ProcessingResultCode.Success); + scanResult.ContainerDetails.Should().ContainSingle(); + + var detectedComponents = componentRecorder.GetDetectedComponents().ToList(); + detectedComponents.Should().ContainSingle(); + detectedComponents.First().Component.Id.Should().Be(BashPackageId); + + // Verify the warning was logged for the failed parse with the correct message + this.mockLinuxContainerDetectorLogger.Verify( + logger => + logger.Log( + LogLevel.Warning, + It.IsAny(), + It.Is( + (v, t) => v.ToString()!.Contains("Failed to parse image reference 'oci-dir:'") + ), + It.IsAny(), + (Func)It.IsAny() + ), + Times.Once + ); + } } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxScannerTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxScannerTests.cs index ddc76a28b..5178eec0b 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxScannerTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/LinuxScannerTests.cs @@ -10,6 +10,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests; using Microsoft.ComponentDetection.Contracts.BcdeModels; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.ComponentDetection.Detectors.Linux; +using Microsoft.ComponentDetection.Detectors.Linux.Contracts; using Microsoft.ComponentDetection.Detectors.Linux.Factories; using Microsoft.ComponentDetection.Detectors.Linux.Filters; using Microsoft.Extensions.Logging; @@ -265,6 +266,7 @@ public async Task TestLinuxScannerAsync(string syftOutput) service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -315,6 +317,7 @@ public async Task TestLinuxScanner_ReturnsNullAuthorAndLicense_Async(string syft service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -367,6 +370,7 @@ string syftOutput service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -419,6 +423,7 @@ string syftOutput service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -514,6 +519,7 @@ public async Task TestLinuxScanner_SupportsMultipleComponentTypes_Async() service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -619,6 +625,7 @@ public async Task TestLinuxScanner_FiltersComponentsByEnabledTypes_OnlyLinux_Asy service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -705,6 +712,7 @@ public async Task TestLinuxScanner_FiltersComponentsByEnabledTypes_OnlyNpmAndPip service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -748,6 +756,7 @@ string expectedFlag service.CreateAndRunContainerAsync( It.IsAny(), It.IsAny>(), + It.IsAny>(), It.IsAny() ) ) @@ -769,6 +778,7 @@ await this.linuxScanner.ScanLinuxAsync( It.Is>(cmd => cmd.Contains("--scope") && cmd.Contains(expectedFlag) ), + It.IsAny>(), It.IsAny() ), Times.Once @@ -792,4 +802,229 @@ await this.linuxScanner.ScanLinuxAsync( await action.Should().ThrowAsync(); } + + [TestMethod] + public async Task TestLinuxScanner_ScanLinuxSyftOutputAsync_ReturnsParsedSyftOutputAsync() + { + const string syftOutputWithSource = """ + { + "distro": { + "id": "azurelinux", + "versionID": "3.0" + }, + "artifacts": [ + { + "name": "bash", + "version": "5.2.15-3.azl3", + "type": "rpm", + "locations": [ + { + "path": "/var/lib/rpm/Packages", + "layerID": "sha256:aaa111" + } + ], + "metadata": {}, + "licenses": [ + { "value": "GPL-3.0-or-later" } + ] + } + ], + "source": { + "id": "sha256:abc123", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc123", + "metadata": { + "userInput": "/oci-image", + "imageID": "sha256:image123", + "manifestDigest": "sha256:abc123", + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "tags": ["myregistry.io/myimage:latest"], + "imageSize": 100000, + "layers": [ + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "digest": "sha256:aaa111", + "size": 50000 + }, + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "digest": "sha256:bbb222", + "size": 50000 + } + ], + "repoDigests": [], + "architecture": "amd64", + "os": "linux", + "labels": { + "image.base.ref.name": "mcr.microsoft.com/azurelinux/base/core:3.0", + "image.base.digest": "sha256:basedigest123" + } + } + } + } + """; + + this.mockDockerService.Setup(service => + service.CreateAndRunContainerAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny>(), + It.IsAny() + ) + ) + .ReturnsAsync((syftOutputWithSource, string.Empty)); + + var additionalBinds = new List { "/some/oci/path:/oci-image:ro" }; + var syftOutput = await this.linuxScanner.GetSyftOutputAsync( + "oci-dir:/oci-image", + additionalBinds, + LinuxScannerScope.AllLayers + ); + + syftOutput.Should().NotBeNull(); + syftOutput.Artifacts.Should().ContainSingle(); + syftOutput.Artifacts[0].Name.Should().Be("bash"); + + // Verify source metadata can be extracted + var sourceMetadata = syftOutput.Source?.GetSyftSourceMetadata(); + sourceMetadata.Should().NotBeNull(); + sourceMetadata.ImageId.Should().Be("sha256:image123"); + sourceMetadata.Tags.Should().ContainSingle().Which.Should().Be("myregistry.io/myimage:latest"); + sourceMetadata.Layers.Should().HaveCount(2); + sourceMetadata.Labels.Should().ContainKey("image.base.ref.name"); + + // Verify ProcessSyftOutput works with the returned output + var containerLayers = sourceMetadata.Layers + .Select((layer, index) => new DockerLayer { DiffId = layer.Digest, LayerIndex = index }) + .ToList(); + var enabledTypes = new HashSet { ComponentType.Linux }; + var layerMappedComponents = this.linuxScanner.ProcessSyftOutput( + syftOutput, containerLayers, enabledTypes); + + layerMappedComponents.Should().HaveCount(2); + var layerWithComponents = layerMappedComponents + .First(l => l.DockerLayer.DiffId == "sha256:aaa111"); + layerWithComponents.Components.Should().ContainSingle(); + layerWithComponents.Components.First().Should().BeOfType(); + var bashComponent = layerWithComponents.Components.First() as LinuxComponent; + bashComponent.Should().NotBeNull(); + bashComponent.Name.Should().Be("bash"); + bashComponent.Version.Should().Be("5.2.15-3.azl3"); + bashComponent.Distribution.Should().Be("azurelinux"); + } + + [TestMethod] + public async Task TestLinuxScanner_ScanLinuxSyftOutputAsync_PassesAdditionalBindsAndCommandAsync() + { + const string syftOutput = """ + { + "distro": { "id": "test", "versionID": "1.0" }, + "artifacts": [], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc", + "metadata": { + "userInput": "/oci-image", + "imageID": "sha256:img", + "layers": [], + "labels": {} + } + } + } + """; + + this.mockDockerService.Setup(service => + service.CreateAndRunContainerAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny>(), + It.IsAny() + ) + ) + .ReturnsAsync((syftOutput, string.Empty)); + + var additionalBinds = new List { "/host/path/to/oci:/oci-image:ro" }; + await this.linuxScanner.GetSyftOutputAsync( + "oci-dir:/oci-image", + additionalBinds, + LinuxScannerScope.AllLayers + ); + + // Verify the Syft command uses oci-dir: scheme and passes binds + this.mockDockerService.Verify( + service => + service.CreateAndRunContainerAsync( + It.IsAny(), + It.Is>(cmd => cmd[0] == "oci-dir:/oci-image"), + It.Is>(binds => + binds.Count == 1 && binds[0] == "/host/path/to/oci:/oci-image:ro" + ), + It.IsAny() + ), + Times.Once + ); + } + + [TestMethod] + public void TestLinuxScanner_ProcessSyftOutput_ReturnsComponentsWithoutLayerInfoWhenNoContainerLayers() + { + var syftOutputJson = """ + { + "distro": { "id": "azurelinux", "versionID": "3.0" }, + "artifacts": [ + { + "name": "bash", + "version": "5.2.15", + "type": "rpm", + "locations": [ + { + "path": "/var/lib/rpm/rpmdb.sqlite", + "layerID": "sha256:layer1" + } + ] + }, + { + "name": "openssl", + "version": "3.1.0", + "type": "rpm", + "locations": [ + { + "path": "/var/lib/rpm/rpmdb.sqlite", + "layerID": "sha256:layer2" + } + ] + } + ], + "source": { + "id": "sha256:abc", + "name": "/oci-image", + "type": "image", + "version": "sha256:abc" + } + } + """; + var syftOutput = SyftOutput.FromJson(syftOutputJson); + var enabledTypes = new HashSet { ComponentType.Linux }; + + // Pass empty container layers — components should still be returned + var result = this.linuxScanner.ProcessSyftOutput( + syftOutput, [], enabledTypes).ToList(); + + // All components should be grouped under a single entry with no layer info + result.Should().ContainSingle(); + + var entry = result.First(); + entry.DockerLayer.Should().NotBeNull(); + entry.DockerLayer.DiffId.Should().Be(string.Empty); + entry.DockerLayer.LayerIndex.Should().Be(0); + entry.DockerLayer.IsBaseImage.Should().BeFalse(); + + entry.Components.Should().HaveCount(2); + entry.Components.Should().AllBeOfType(); + entry.Components.Select(c => (c as LinuxComponent)!.Name) + .Should().Contain("bash").And.Contain("openssl"); + } }