From 28b68d9b049ba54b9a54674bbd339a06c43121c9 Mon Sep 17 00:00:00 2001 From: Hleb Shauchenka Date: Sat, 20 Jun 2026 19:23:23 +0200 Subject: [PATCH] fix(graph): resolve relationship endpoints case-insensitively, accept body-form tags Two fixes inside parseGraphXml: 1. Normalize endpoint names (trim + lowercase) via a name->node map so edges whose source/target differ only in case still resolve. 2. Parse both self-closing AND body-form ... elements. The original regex only matched self-closing. Adds a referential-integrity rule plus a few-shot to the extraction prompt so models emit endpoints that reference declared entities. Relations scored ~1/5 for every locally-hosted model in benchmarks even when entity extraction worked. Root cause was this parser plus prompt combination, not model capability: a perfect model could not satisfy both the case-sensitive name match and the self-closing-only XML expectation. Tests: test/graph-prompt.test.ts (+13 lines), test/graph.test.ts (+79 lines). Signed-off-by: Hleb Shauchenka --- src/functions/graph.ts | 29 ++++++++---- src/prompts/graph-extraction.ts | 14 +++++- test/graph-prompt.test.ts | 13 ++++++ test/graph.test.ts | 83 ++++++++++++++++++++++++++++++++- 4 files changed, 128 insertions(+), 11 deletions(-) create mode 100644 test/graph-prompt.test.ts diff --git a/src/functions/graph.ts b/src/functions/graph.ts index bb0d72722..394c85832 100644 --- a/src/functions/graph.ts +++ b/src/functions/graph.ts @@ -423,19 +423,23 @@ function parseGraphXml( addEntity(match[1], match[2]); } - const relRegex = /]*?)\/>/g; - while ((match = relRegex.exec(xml)) !== null) { - const attrs = parseAttrs(match[1]); + const nodeByNormName = new Map(); + for (const n of nodes) { + const key = n.name.trim().toLowerCase(); + if (!nodeByNormName.has(key)) nodeByNormName.set(key, n); + } + + const addRelationship = (rawAttrs: string): void => { + const attrs = parseAttrs(rawAttrs); const type = attrs["type"] as GraphEdge["type"] | undefined; const sourceName = attrs["source"]; const targetName = attrs["target"]; - if (!type || !sourceName || !targetName) continue; + if (!type || !sourceName || !targetName) return; + const sourceNode = nodeByNormName.get(sourceName.trim().toLowerCase()); + const targetNode = nodeByNormName.get(targetName.trim().toLowerCase()); + if (!sourceNode || !targetNode) return; const parsedWeight = parseFloat(attrs["weight"] ?? ""); const weight = Number.isFinite(parsedWeight) ? parsedWeight : 0.5; - - const sourceNode = nodes.find((n) => n.name === sourceName); - const targetNode = nodes.find((n) => n.name === targetName); - if (!sourceNode || !targetNode) continue; edges.push({ id: generateId("ge"), type, @@ -445,6 +449,15 @@ function parseGraphXml( sourceObservationIds: observationIds, createdAt: now, }); + }; + + const relSelfClose = /]*?)\/>/g; + while ((match = relSelfClose.exec(xml)) !== null) { + addRelationship(match[1]); + } + const relWithBody = /]*[^/])>[\s\S]*?<\/relationship>/g; + while ((match = relWithBody.exec(xml)) !== null) { + addRelationship(match[1]); } return { nodes, edges }; diff --git a/src/prompts/graph-extraction.ts b/src/prompts/graph-extraction.ts index 4f1049c1a..ca4f6e02f 100644 --- a/src/prompts/graph-extraction.ts +++ b/src/prompts/graph-extraction.ts @@ -14,7 +14,19 @@ Rules: - Extract concrete entities only (real file paths, function names, library names) - Use the most specific type available - Weight relationships by how strong/direct the connection is -- If no entities found, output empty tags`; +- If no entities found, output empty tags +- Every relationship source and target MUST be the exact name of an entity declared in ; never reference an entity you did not declare + +Example: + + + + + + + + +`; export function buildGraphExtractionPrompt( observations: Array<{ diff --git a/test/graph-prompt.test.ts b/test/graph-prompt.test.ts new file mode 100644 index 000000000..6e8601d42 --- /dev/null +++ b/test/graph-prompt.test.ts @@ -0,0 +1,13 @@ +import { describe, it, expect } from "vitest"; + +import { GRAPH_EXTRACTION_SYSTEM } from "../src/prompts/graph-extraction.js"; + +describe("GRAPH_EXTRACTION_SYSTEM prompt", () => { + it("instructs that relationship endpoints must reference a declared entity", () => { + expect(GRAPH_EXTRACTION_SYSTEM).toMatch(/declared/i); + }); + + it("includes a concrete few-shot example", () => { + expect(GRAPH_EXTRACTION_SYSTEM).toMatch(/Example/i); + }); +}); diff --git a/test/graph.test.ts b/test/graph.test.ts index da8b26651..cc1fb2ba4 100644 --- a/test/graph.test.ts +++ b/test/graph.test.ts @@ -1,10 +1,10 @@ -import { describe, it, expect, beforeEach, vi } from "vitest"; +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; vi.mock("../src/logger.js", () => ({ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }, })); -import { registerGraphFunction } from "../src/functions/graph.js"; +import { registerGraphFunction, getGraphExtractTimeoutMs } from "../src/functions/graph.js"; import type { CompressedObservation, GraphNode, @@ -730,3 +730,82 @@ describe("Graph Functions", () => { }); }); }); + + +describe("parseGraphXml relation resolution", () => { + let sdk: ReturnType; + let kv: ReturnType; + + beforeEach(() => { + sdk = mockSdk(); + kv = mockKV(); + vi.clearAllMocks(); + mockProvider.compress.mockResolvedValue( + ``, + ); + registerGraphFunction(sdk as never, kv as never, mockProvider as never); + }); + + it("resolves relationship endpoints case-insensitively and trimmed", async () => { + mockProvider.compress.mockResolvedValueOnce(` + + + + + +`); + + const result = (await sdk.trigger("mem::graph-extract", { + observations: [testObs], + })) as { success: boolean; nodesAdded: number; edgesAdded: number }; + + expect(result.success).toBe(true); + expect(result.nodesAdded).toBe(2); + expect(result.edgesAdded).toBe(1); + + const edges = await kv.list("mem:graph:edges"); + expect(edges).toHaveLength(1); + expect(edges[0].type).toBe("uses"); + }); + + it("accepts body-form ... tags", async () => { + mockProvider.compress.mockResolvedValueOnce(` + + + + + +`); + + const result = (await sdk.trigger("mem::graph-extract", { + observations: [testObs], + })) as { success: boolean; edgesAdded: number }; + + expect(result.success).toBe(true); + expect(result.edgesAdded).toBe(1); + + const edges = await kv.list("mem:graph:edges"); + expect(edges).toHaveLength(1); + expect(edges[0].type).toBe("uses"); + }); + + it("drops a relationship whose endpoint is not a declared entity", async () => { + mockProvider.compress.mockResolvedValueOnce(` + + + + +`); + + const result = (await sdk.trigger("mem::graph-extract", { + observations: [testObs], + })) as { success: boolean; nodesAdded: number; edgesAdded: number }; + + expect(result.success).toBe(true); + expect(result.nodesAdded).toBe(1); + expect(result.edgesAdded).toBe(0); + + const edges = await kv.list("mem:graph:edges"); + expect(edges).toHaveLength(0); + }); +}); \ No newline at end of file