Skip to content

Commit 7ce401b

Browse files
AMaini503Aayush MainiCopilot
authored
Reconcile bare/rich component Ids in DependencyGraphs (#1784)
* Reconcile bare/rich component Ids in DependencyGraphs After PR #1760 dropped bare entries from ComponentsFound when rich counterparts exist, DependencyGraphs still referenced bare Ids as graph nodes — breaking the contract between the two outputs. This adds a post-processing step in DefaultGraphTranslationService that merges bare graph nodes into their rich counterparts: - Merges outbound edges (bare→rich targets) - Rewrites inbound edges from other nodes - Migrates metadata set membership (Explicit/DevDep/Dep) - Filters self-edges introduced by rewriting - Handles multiple rich variants (cross-product merge) - Leaves bare-only nodes (no rich counterpart) unchanged Includes 10 unit tests covering Paul's scenario, multi-rich variants, edge rewriting, self-edge prevention, leaf preservation, null/empty collections, and multi-location independence. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address PR feedback: reduce allocations and normalize empty edges - Change RewriteId to return IEnumerable<string> using Enumerable.Repeat instead of allocating a new HashSet for every non-bare id - Normalize empty edge sets to null after self-edge filtering to match the existing serialization convention in GraphTranslationUtility Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Aayush Maini <aamaini@microsoft.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4eb4071 commit 7ce401b

2 files changed

Lines changed: 612 additions & 4 deletions

File tree

src/Microsoft.ComponentDetection.Orchestrator/Services/GraphTranslation/DefaultGraphTranslationService.cs

Lines changed: 178 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,18 @@ public ScanResult GenerateScanResultFromProcessingResult(
3434

3535
this.LogComponentScopeTelemetry(mergedComponents);
3636

37+
var dependencyGraphs = GraphTranslationUtility.AccumulateAndConvertToContract(recorderDetectorPairs
38+
.Select(tuple => tuple.Recorder)
39+
.Where(x => x != null)
40+
.Select(x => x.GetDependencyGraphsByLocation()));
41+
42+
ReconcileDependencyGraphIds(dependencyGraphs, mergedComponents);
43+
3744
return new DefaultGraphScanResult
3845
{
3946
ComponentsFound = mergedComponents.Select(x => this.ConvertToContract(x)).ToList(),
4047
ContainerDetailsMap = detectorProcessingResult.ContainersDetailsMap,
41-
DependencyGraphs = GraphTranslationUtility.AccumulateAndConvertToContract(recorderDetectorPairs
42-
.Select(tuple => tuple.Recorder)
43-
.Where(x => x != null)
44-
.Select(x => x.GetDependencyGraphsByLocation())),
48+
DependencyGraphs = dependencyGraphs,
4549
SourceDirectory = settings.SourceDirectory.ToString(),
4650
};
4751
}
@@ -81,6 +85,176 @@ private static bool GraphContainsComponent(IDependencyGraph graph, TypedComponen
8185
(component.Id != component.BaseId && graph.Contains(component.BaseId));
8286
}
8387

88+
/// <summary>
89+
/// Reconciles bare component Ids in <see cref="DependencyGraphCollection"/> to match the merged
90+
/// identities in ComponentsFound. When a bare node (Id == BaseId) has rich counterparts
91+
/// (Id != BaseId, same BaseId) in the same location graph, the bare node is merged into
92+
/// all rich counterparts and removed. This ensures every Id referenced in the graph output
93+
/// also exists in ComponentsFound.
94+
/// </summary>
95+
internal static void ReconcileDependencyGraphIds(
96+
DependencyGraphCollection graphs,
97+
IReadOnlyList<DetectedComponent> mergedComponents)
98+
{
99+
if (graphs == null || graphs.Count == 0)
100+
{
101+
return;
102+
}
103+
104+
// Build BaseId → set of rich Ids from merged components.
105+
var baseIdToRichIds = new Dictionary<string, HashSet<string>>();
106+
foreach (var component in mergedComponents)
107+
{
108+
var id = component.Component.Id;
109+
var baseId = component.Component.BaseId;
110+
if (id != baseId)
111+
{
112+
if (!baseIdToRichIds.TryGetValue(baseId, out var richIds))
113+
{
114+
baseIdToRichIds[baseId] = richIds = [];
115+
}
116+
117+
richIds.Add(id);
118+
}
119+
}
120+
121+
if (baseIdToRichIds.Count == 0)
122+
{
123+
return;
124+
}
125+
126+
foreach (var graphWithMetadata in graphs.Values)
127+
{
128+
ReconcileGraph(graphWithMetadata, baseIdToRichIds);
129+
}
130+
}
131+
132+
private static void ReconcileGraph(
133+
DependencyGraphWithMetadata graphWithMetadata,
134+
Dictionary<string, HashSet<string>> baseIdToRichIds)
135+
{
136+
var graph = graphWithMetadata.Graph;
137+
138+
// Identify bare nodes that have at least one rich counterpart in THIS graph.
139+
var bareToRich = new Dictionary<string, HashSet<string>>();
140+
foreach (var nodeId in graph.Keys)
141+
{
142+
if (baseIdToRichIds.TryGetValue(nodeId, out var allRichIds))
143+
{
144+
var richInGraph = new HashSet<string>(allRichIds.Where(graph.ContainsKey));
145+
if (richInGraph.Count > 0)
146+
{
147+
bareToRich[nodeId] = richInGraph;
148+
}
149+
}
150+
}
151+
152+
if (bareToRich.Count == 0)
153+
{
154+
return;
155+
}
156+
157+
// Rewrite a single Id: if it's a bare Id being merged, expand to its rich counterparts.
158+
// Returns the existing set for bare ids; yields a single element for non-bare ids to avoid allocation.
159+
IEnumerable<string> RewriteId(string id) =>
160+
bareToRich.TryGetValue(id, out var richIds) ? richIds : Enumerable.Repeat(id, 1);
161+
162+
// Rebuild graph: skip bare nodes being merged, rewrite edge targets.
163+
var newGraph = new Contracts.BcdeModels.DependencyGraph();
164+
foreach (var (nodeId, edges) in graph)
165+
{
166+
if (bareToRich.ContainsKey(nodeId))
167+
{
168+
continue; // bare node will be merged into its rich counterparts below
169+
}
170+
171+
if (edges == null)
172+
{
173+
newGraph[nodeId] = null;
174+
}
175+
else
176+
{
177+
var newEdges = new HashSet<string>();
178+
foreach (var edge in edges)
179+
{
180+
foreach (var rewritten in RewriteId(edge))
181+
{
182+
// Avoid self-edges that rewriting could introduce.
183+
if (rewritten != nodeId)
184+
{
185+
newEdges.Add(rewritten);
186+
}
187+
}
188+
}
189+
190+
newGraph[nodeId] = newEdges.Count > 0 ? newEdges : null;
191+
}
192+
}
193+
194+
// Merge bare nodes' outbound edges into their rich counterparts.
195+
foreach (var (bareId, richIds) in bareToRich)
196+
{
197+
var bareEdges = graph[bareId];
198+
foreach (var richId in richIds)
199+
{
200+
if (bareEdges != null)
201+
{
202+
newGraph[richId] ??= [];
203+
foreach (var edge in bareEdges)
204+
{
205+
foreach (var rewritten in RewriteId(edge))
206+
{
207+
if (rewritten != richId)
208+
{
209+
newGraph[richId].Add(rewritten);
210+
}
211+
}
212+
}
213+
214+
// Normalize empty edge sets to null for consistent serialization.
215+
if (newGraph[richId].Count == 0)
216+
{
217+
newGraph[richId] = null;
218+
}
219+
}
220+
}
221+
}
222+
223+
// Rebuild metadata sets, rewriting bare Ids to their rich counterparts.
224+
graphWithMetadata.Graph = newGraph;
225+
graphWithMetadata.ExplicitlyReferencedComponentIds = RewriteIdSet(graphWithMetadata.ExplicitlyReferencedComponentIds, bareToRich);
226+
graphWithMetadata.DevelopmentDependencies = RewriteIdSet(graphWithMetadata.DevelopmentDependencies, bareToRich);
227+
graphWithMetadata.Dependencies = RewriteIdSet(graphWithMetadata.Dependencies, bareToRich);
228+
}
229+
230+
private static HashSet<string> RewriteIdSet(
231+
HashSet<string> original,
232+
Dictionary<string, HashSet<string>> bareToRich)
233+
{
234+
if (original == null || original.Count == 0)
235+
{
236+
return original;
237+
}
238+
239+
var result = new HashSet<string>();
240+
foreach (var id in original)
241+
{
242+
if (bareToRich.TryGetValue(id, out var richIds))
243+
{
244+
foreach (var richId in richIds)
245+
{
246+
result.Add(richId);
247+
}
248+
}
249+
else
250+
{
251+
result.Add(id);
252+
}
253+
}
254+
255+
return result;
256+
}
257+
84258
private void LogComponentScopeTelemetry(List<DetectedComponent> components)
85259
{
86260
using var record = new DetectedComponentScopeRecord();

0 commit comments

Comments
 (0)