From d9e8be8584cd789d2828960757c9dfd490182d0c Mon Sep 17 00:00:00 2001 From: Hleb Shauchenka Date: Sat, 20 Jun 2026 19:22:15 +0200 Subject: [PATCH 1/2] feat(api): add POST /agentmemory/reindex-vectors to re-embed after a model swap Switching the embedding model or dimensions invalidates the persisted vectors. Today the only paths are full-restart rebuild or AGENTMEMORY_DROP_STALE_INDEX, neither of which re-embeds history on demand without a restart. This adds a new REST endpoint (and the backing mem::reindex-vectors iii function) that re-embeds every observation and memory in place. The live index keeps serving during the rebuild; the fresh index is built off to the side, validated (failed === 0), then atomically swapped in via IndexPersistence.restoreFrom and persisted synchronously. Builds on the existing migrateVectorIndex helper, which already constructed a fresh index but discarded it. The new reindexVectors() keeps the fresh index and atomically swaps it. Tests: - test/reindex-vectors.test.ts (full coverage of the new function + REST handler). - test/vector-index-dimensions.test.ts extended. REST endpoint count bumps 128 -> 129. No MCP tool added. Signed-off-by: Hleb Shauchenka --- README.md | 2 +- src/functions/migrate-vector-index.ts | 4 +- src/functions/search.ts | 65 ++++++++++++++ src/index.ts | 2 +- src/triggers/api.ts | 17 ++++ test/reindex-vectors.test.ts | 123 ++++++++++++++++++++++++++ test/vector-index-dimensions.test.ts | 22 +++++ 7 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 test/reindex-vectors.test.ts diff --git a/README.md b/README.md index c4ec2c1e0..2689fa831 100644 --- a/README.md +++ b/README.md @@ -1500,7 +1500,7 @@ Create `~/.agentmemory/.env`:

API

-128 endpoints on port `3111`. The REST API binds to `127.0.0.1` by default. Protected endpoints require `Authorization: Bearer ` when `AGENTMEMORY_SECRET` is set, and mesh sync endpoints require `AGENTMEMORY_SECRET` on both peers. +129 endpoints on port `3111`. The REST API binds to `127.0.0.1` by default. Protected endpoints require `Authorization: Bearer ` when `AGENTMEMORY_SECRET` is set, and mesh sync endpoints require `AGENTMEMORY_SECRET` on both peers.
Key endpoints diff --git a/src/functions/migrate-vector-index.ts b/src/functions/migrate-vector-index.ts index 058eab84a..208112d65 100644 --- a/src/functions/migrate-vector-index.ts +++ b/src/functions/migrate-vector-index.ts @@ -10,6 +10,7 @@ export interface MigrateVectorIndexResult { failed: number; vectorSize: number; failedSessions: string[]; + index: VectorIndex; } // Validate one embedding's shape against the provider's declared dimensions @@ -111,7 +112,7 @@ export async function migrateVectorIndex( // failedSessions list and can't tell apart "0 sessions, all OK" // from "kv.list itself blew up". failedSessions.push(""); - return { success: false, totalProcessed: processed, failed, vectorSize: newIndex.size, failedSessions }; + return { success: false, totalProcessed: processed, failed, vectorSize: newIndex.size, failedSessions, index: newIndex }; } for (const session of sessions) { @@ -148,5 +149,6 @@ export async function migrateVectorIndex( failed, vectorSize: newIndex.size, failedSessions, + index: newIndex, }; } diff --git a/src/functions/search.ts b/src/functions/search.ts index df699a1a3..14660fcdf 100644 --- a/src/functions/search.ts +++ b/src/functions/search.ts @@ -6,6 +6,7 @@ import { SearchIndex } from '../state/search-index.js' import { VectorIndex } from '../state/vector-index.js' import type { EmbeddingProvider } from '../types.js' import { memoryToObservation } from '../state/memory-utils.js' +import { migrateVectorIndex } from './migrate-vector-index.js' import { recordAccessBatch } from './access-tracker.js' import { logger } from "../logger.js"; import { getAgentId, isAgentScopeIsolated } from "../config.js"; @@ -319,7 +320,71 @@ export async function rebuildIndex(kv: StateKV): Promise { return count } +// Re-embed the whole corpus against the active embedding provider and swap +// the result into the live vector index. Used after switching embedding +// model/dimensions: the new model produces a different vector space, so old +// vectors must be recomputed (see the dimension restore guard in index.ts). +// The new index is built off to the side via migrateVectorIndex so the live +// index keeps serving during the (possibly long) rebuild, then swapped in +// place with restoreFrom, since IndexPersistence holds a reference to the live +// VectorIndex, so replacing the reference would desync persistence. The swap +// only happens on a fully clean rebuild (failed === 0); on any failure the +// live index is left untouched and the caller gets failedSessions to retry. +export async function reindexVectors(kv: StateKV): Promise<{ + success: boolean + swapped: boolean + totalProcessed: number + failed: number + vectorSize: number + failedSessions: string[] + provider: string | null + dimensions: number | null + error?: string +}> { + const ep = currentEmbeddingProvider + if (!ep) { + return { + success: false, + swapped: false, + totalProcessed: 0, + failed: 0, + vectorSize: 0, + failedSessions: [], + provider: null, + dimensions: null, + error: + 'no embedding provider configured; set EMBEDDING_PROVIDER or a provider API key and restart', + } + } + const vi = vectorIndex + if (!vi) { + return { + success: false, + swapped: false, + totalProcessed: 0, + failed: 0, + vectorSize: 0, + failedSessions: [], + provider: ep.name, + dimensions: ep.dimensions, + error: 'vector index not initialized', + } + } + const { index, ...stats } = await migrateVectorIndex(kv, ep) + let swapped = false + if (stats.success) { + vi.restoreFrom(index) + await flushIndexSave() + swapped = true + } + return { ...stats, swapped, provider: ep.name, dimensions: ep.dimensions } +} + export function registerSearchFunction(sdk: ISdk, kv: StateKV): void { + sdk.registerFunction('mem::reindex-vectors', async () => { + return await reindexVectors(kv) + }) + sdk.registerFunction( 'mem::search', async (data: { diff --git a/src/index.ts b/src/index.ts index 4233e8a67..354477ad2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -518,7 +518,7 @@ async function main() { `Ready. ${embeddingProvider ? "Triple-stream (BM25+Vector+Graph)" : "BM25+Graph"} search active.`, ); bootLog( - `REST API: 128 endpoints at http://localhost:${config.restPort}/agentmemory/*`, + `REST API: 129 endpoints at http://localhost:${config.restPort}/agentmemory/*`, ); bootLog( `MCP surface (opt-in via \`npx @agentmemory/mcp\`): ${getAllTools().length} tools · 6 resources · 3 prompts`, diff --git a/src/triggers/api.ts b/src/triggers/api.ts index 7b6c2bf2b..2291f3f96 100644 --- a/src/triggers/api.ts +++ b/src/triggers/api.ts @@ -1104,6 +1104,23 @@ export function registerApiTriggers( config: { api_path: "/agentmemory/migrate", http_method: "POST" }, }); + sdk.registerFunction("api::reindex-vectors", + async (req: ApiRequest>): Promise => { + const authErr = checkAuth(req, secret); + if (authErr) return authErr; + const result = await sdk.trigger({ + function_id: "mem::reindex-vectors", + payload: {}, + }); + return { status_code: 200, body: result }; + }, + ); + sdk.registerTrigger({ + type: "http", + function_id: "api::reindex-vectors", + config: { api_path: "/agentmemory/reindex-vectors", http_method: "POST" }, + }); + sdk.registerFunction("api::evict", async (req: ApiRequest<{ dryRun?: boolean }>): Promise => { const authErr = checkAuth(req, secret); diff --git a/test/reindex-vectors.test.ts b/test/reindex-vectors.test.ts new file mode 100644 index 000000000..16c759e35 --- /dev/null +++ b/test/reindex-vectors.test.ts @@ -0,0 +1,123 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +vi.mock("../src/logger.js", () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }, +})); + +import { VectorIndex } from "../src/state/vector-index.js"; +import { + reindexVectors, + setVectorIndex, + setEmbeddingProvider, + getVectorIndex, +} from "../src/functions/search.js"; +import type { EmbeddingProvider } from "../src/types.js"; + +const fourDimProvider: EmbeddingProvider = { + name: "test-4d", + dimensions: 4, + embed: async (_text: string) => new Float32Array([0.1, 0.2, 0.3, 0.4]), + embedBatch: async (texts: string[]) => + texts.map(() => new Float32Array([0.1, 0.2, 0.3, 0.4])), +}; + +function mockKV() { + const store = new Map>(); + return { + get: async (scope: string, key: string): Promise => + (store.get(scope)?.get(key) as T) ?? null, + set: async (scope: string, key: string, data: T): Promise => { + if (!store.has(scope)) store.set(scope, new Map()); + store.get(scope)!.set(key, data); + return data; + }, + delete: async (_scope: string, _key: string): Promise => {}, + list: async (scope: string): Promise => { + const entries = store.get(scope); + return entries ? (Array.from(entries.values()) as T[]) : []; + }, + }; +} + +async function seedCorpus(kv: ReturnType) { + await kv.set("mem:sessions", "ses_1", { id: "ses_1" }); + await kv.set("mem:obs:ses_1", "obs_1", { + id: "obs_1", + sessionId: "ses_1", + timestamp: new Date().toISOString(), + type: "decision", + title: "reindex observation", + facts: ["x"], + narrative: "to be re-embedded", + concepts: [], + files: [], + importance: 5, + }); + await kv.set("mem:memories", "mem_1", { + id: "mem_1", + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + type: "fact", + title: "reindex memory", + content: "this memory will be re-embedded", + concepts: [], + files: [], + sessionIds: ["ses_1"], + strength: 7, + version: 1, + isLatest: true, + }); +} + +describe("reindexVectors", () => { + beforeEach(() => { + setVectorIndex(null); + setEmbeddingProvider(null); + }); + + it("re-embeds the corpus and swaps it into the live vector index", async () => { + const kv = mockKV(); + await seedCorpus(kv); + const live = new VectorIndex(); + setVectorIndex(live); + setEmbeddingProvider(fourDimProvider); + + const result = await reindexVectors(kv as never); + + expect(result.success).toBe(true); + expect(result.swapped).toBe(true); + expect(result.failed).toBe(0); + expect(result.totalProcessed).toBe(2); + expect(result.vectorSize).toBe(2); + expect(result.provider).toBe("test-4d"); + expect(result.dimensions).toBe(4); + expect(getVectorIndex()!.size).toBe(2); + }); + + it("returns success:false without swapping when no embedding provider is configured", async () => { + const kv = mockKV(); + await seedCorpus(kv); + const live = new VectorIndex(); + setVectorIndex(live); + setEmbeddingProvider(null); + + const result = await reindexVectors(kv as never); + + expect(result.success).toBe(false); + expect(result.swapped).toBe(false); + expect(result.error).toBeTruthy(); + expect(getVectorIndex()!.size).toBe(0); + }); + + it("returns success:false without throwing when the vector index is not initialized", async () => { + const kv = mockKV(); + await seedCorpus(kv); + setVectorIndex(null); + setEmbeddingProvider(fourDimProvider); + + const result = await reindexVectors(kv as never); + + expect(result.success).toBe(false); + expect(result.swapped).toBe(false); + }); +}); diff --git a/test/vector-index-dimensions.test.ts b/test/vector-index-dimensions.test.ts index 140d7f8ae..eadfb433a 100644 --- a/test/vector-index-dimensions.test.ts +++ b/test/vector-index-dimensions.test.ts @@ -129,4 +129,26 @@ describe("migrateVectorIndex", () => { expect(result.vectorSize).toBe(0); expect(result.failed).toBe(0); }); + + it("returns the rebuilt index so callers can swap it into the live index", async () => { + const kv = mockKV(); + await kv.set("mem:sessions", "ses_1", { id: "ses_1" }); + await kv.set("mem:obs:ses_1", "obs_1", { + id: "obs_1", + sessionId: "ses_1", + timestamp: new Date().toISOString(), + type: "decision", + title: "swap test", + facts: ["x"], + narrative: "to be re-embedded", + concepts: [], + files: [], + importance: 5, + }); + + const result = await migrateVectorIndex(kv as never, newProvider); + expect(result.index).toBeInstanceOf(VectorIndex); + expect(result.index.size).toBe(result.vectorSize); + expect(result.index.size).toBe(1); + }); }); From d22ef0e803cbc6825e352af31b826d43ef1010c9 Mon Sep 17 00:00:00 2001 From: Hleb Shauchenka Date: Sat, 20 Jun 2026 22:24:01 +0200 Subject: [PATCH 2/2] fix(reindex): handle persistence failure and audit vector index swap The previous flow mutated the live vector index via vi.restoreFrom() and then awaited flushIndexSave() with no guard. If the persistence call threw, callers saw the failure but the in-memory swap had already happened, so retries would have operated on a half-applied state. Move flushIndexSave() into a try/catch after the live swap. On failure, return a structured response with success:false, swapped:true, and the error message so operators can distinguish 'swap never happened' from 'swap applied but persistence failed'. Also record a vector_index_swap audit entry on the successful path, per the project rule that state-changing operations carry an audit trail. --- src/functions/search.ts | 19 ++++++++++++++++++- src/types.ts | 3 ++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/functions/search.ts b/src/functions/search.ts index 14660fcdf..a133215c2 100644 --- a/src/functions/search.ts +++ b/src/functions/search.ts @@ -8,6 +8,7 @@ import type { EmbeddingProvider } from '../types.js' import { memoryToObservation } from '../state/memory-utils.js' import { migrateVectorIndex } from './migrate-vector-index.js' import { recordAccessBatch } from './access-tracker.js' +import { recordAudit } from './audit.js' import { logger } from "../logger.js"; import { getAgentId, isAgentScopeIsolated } from "../config.js"; @@ -374,8 +375,24 @@ export async function reindexVectors(kv: StateKV): Promise<{ let swapped = false if (stats.success) { vi.restoreFrom(index) - await flushIndexSave() swapped = true + try { + await flushIndexSave() + } catch (err) { + return { + ...stats, + success: false, + swapped, + provider: ep.name, + dimensions: ep.dimensions, + error: `index swapped but persistence failed: ${err instanceof Error ? err.message : String(err)}`, + } + } + await recordAudit(kv, 'vector_index_swap', 'mem::reindex-vectors', [ep.name], { + provider: ep.name, + dimensions: ep.dimensions, + totalProcessed: stats.totalProcessed, + }) } return { ...stats, swapped, provider: ep.name, dimensions: ep.dimensions } } diff --git a/src/types.ts b/src/types.ts index 6797dfaf9..361046a25 100644 --- a/src/types.ts +++ b/src/types.ts @@ -608,7 +608,8 @@ export interface AuditEntry { | "slot_replace" | "slot_create" | "slot_delete" - | "slot_reflect"; + | "slot_reflect" + | "vector_index_swap"; userId?: string; functionId: string; targetIds: string[];