diff --git a/README.md b/README.md index c4ec2c1e0..2689fa831 100644 --- a/README.md +++ b/README.md @@ -1500,7 +1500,7 @@ Create `~/.agentmemory/.env`:

API

-128 endpoints on port `3111`. The REST API binds to `127.0.0.1` by default. Protected endpoints require `Authorization: Bearer ` when `AGENTMEMORY_SECRET` is set, and mesh sync endpoints require `AGENTMEMORY_SECRET` on both peers. +129 endpoints on port `3111`. The REST API binds to `127.0.0.1` by default. Protected endpoints require `Authorization: Bearer ` when `AGENTMEMORY_SECRET` is set, and mesh sync endpoints require `AGENTMEMORY_SECRET` on both peers.
Key endpoints diff --git a/src/functions/migrate-vector-index.ts b/src/functions/migrate-vector-index.ts index 058eab84a..208112d65 100644 --- a/src/functions/migrate-vector-index.ts +++ b/src/functions/migrate-vector-index.ts @@ -10,6 +10,7 @@ export interface MigrateVectorIndexResult { failed: number; vectorSize: number; failedSessions: string[]; + index: VectorIndex; } // Validate one embedding's shape against the provider's declared dimensions @@ -111,7 +112,7 @@ export async function migrateVectorIndex( // failedSessions list and can't tell apart "0 sessions, all OK" // from "kv.list itself blew up". failedSessions.push(""); - return { success: false, totalProcessed: processed, failed, vectorSize: newIndex.size, failedSessions }; + return { success: false, totalProcessed: processed, failed, vectorSize: newIndex.size, failedSessions, index: newIndex }; } for (const session of sessions) { @@ -148,5 +149,6 @@ export async function migrateVectorIndex( failed, vectorSize: newIndex.size, failedSessions, + index: newIndex, }; } diff --git a/src/functions/search.ts b/src/functions/search.ts index df699a1a3..a133215c2 100644 --- a/src/functions/search.ts +++ b/src/functions/search.ts @@ -6,7 +6,9 @@ import { SearchIndex } from '../state/search-index.js' import { VectorIndex } from '../state/vector-index.js' import type { EmbeddingProvider } from '../types.js' import { memoryToObservation } from '../state/memory-utils.js' +import { migrateVectorIndex } from './migrate-vector-index.js' import { recordAccessBatch } from './access-tracker.js' +import { recordAudit } from './audit.js' import { logger } from "../logger.js"; import { getAgentId, isAgentScopeIsolated } from "../config.js"; @@ -319,7 +321,87 @@ export async function rebuildIndex(kv: StateKV): Promise { return count } +// Re-embed the whole corpus against the active embedding provider and swap +// the result into the live vector index. Used after switching embedding +// model/dimensions: the new model produces a different vector space, so old +// vectors must be recomputed (see the dimension restore guard in index.ts). +// The new index is built off to the side via migrateVectorIndex so the live +// index keeps serving during the (possibly long) rebuild, then swapped in +// place with restoreFrom, since IndexPersistence holds a reference to the live +// VectorIndex, so replacing the reference would desync persistence. The swap +// only happens on a fully clean rebuild (failed === 0); on any failure the +// live index is left untouched and the caller gets failedSessions to retry. +export async function reindexVectors(kv: StateKV): Promise<{ + success: boolean + swapped: boolean + totalProcessed: number + failed: number + vectorSize: number + failedSessions: string[] + provider: string | null + dimensions: number | null + error?: string +}> { + const ep = currentEmbeddingProvider + if (!ep) { + return { + success: false, + swapped: false, + totalProcessed: 0, + failed: 0, + vectorSize: 0, + failedSessions: [], + provider: null, + dimensions: null, + error: + 'no embedding provider configured; set EMBEDDING_PROVIDER or a provider API key and restart', + } + } + const vi = vectorIndex + if (!vi) { + return { + success: false, + swapped: false, + totalProcessed: 0, + failed: 0, + vectorSize: 0, + failedSessions: [], + provider: ep.name, + dimensions: ep.dimensions, + error: 'vector index not initialized', + } + } + const { index, ...stats } = await migrateVectorIndex(kv, ep) + let swapped = false + if (stats.success) { + vi.restoreFrom(index) + swapped = true + try { + await flushIndexSave() + } catch (err) { + return { + ...stats, + success: false, + swapped, + provider: ep.name, + dimensions: ep.dimensions, + error: `index swapped but persistence failed: ${err instanceof Error ? err.message : String(err)}`, + } + } + await recordAudit(kv, 'vector_index_swap', 'mem::reindex-vectors', [ep.name], { + provider: ep.name, + dimensions: ep.dimensions, + totalProcessed: stats.totalProcessed, + }) + } + return { ...stats, swapped, provider: ep.name, dimensions: ep.dimensions } +} + export function registerSearchFunction(sdk: ISdk, kv: StateKV): void { + sdk.registerFunction('mem::reindex-vectors', async () => { + return await reindexVectors(kv) + }) + sdk.registerFunction( 'mem::search', async (data: { diff --git a/src/index.ts b/src/index.ts index 4233e8a67..354477ad2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -518,7 +518,7 @@ async function main() { `Ready. ${embeddingProvider ? "Triple-stream (BM25+Vector+Graph)" : "BM25+Graph"} search active.`, ); bootLog( - `REST API: 128 endpoints at http://localhost:${config.restPort}/agentmemory/*`, + `REST API: 129 endpoints at http://localhost:${config.restPort}/agentmemory/*`, ); bootLog( `MCP surface (opt-in via \`npx @agentmemory/mcp\`): ${getAllTools().length} tools · 6 resources · 3 prompts`, diff --git a/src/triggers/api.ts b/src/triggers/api.ts index 7b6c2bf2b..2291f3f96 100644 --- a/src/triggers/api.ts +++ b/src/triggers/api.ts @@ -1104,6 +1104,23 @@ export function registerApiTriggers( config: { api_path: "/agentmemory/migrate", http_method: "POST" }, }); + sdk.registerFunction("api::reindex-vectors", + async (req: ApiRequest>): Promise => { + const authErr = checkAuth(req, secret); + if (authErr) return authErr; + const result = await sdk.trigger({ + function_id: "mem::reindex-vectors", + payload: {}, + }); + return { status_code: 200, body: result }; + }, + ); + sdk.registerTrigger({ + type: "http", + function_id: "api::reindex-vectors", + config: { api_path: "/agentmemory/reindex-vectors", http_method: "POST" }, + }); + sdk.registerFunction("api::evict", async (req: ApiRequest<{ dryRun?: boolean }>): Promise => { const authErr = checkAuth(req, secret); diff --git a/src/types.ts b/src/types.ts index 6797dfaf9..361046a25 100644 --- a/src/types.ts +++ b/src/types.ts @@ -608,7 +608,8 @@ export interface AuditEntry { | "slot_replace" | "slot_create" | "slot_delete" - | "slot_reflect"; + | "slot_reflect" + | "vector_index_swap"; userId?: string; functionId: string; targetIds: string[]; diff --git a/test/reindex-vectors.test.ts b/test/reindex-vectors.test.ts new file mode 100644 index 000000000..16c759e35 --- /dev/null +++ b/test/reindex-vectors.test.ts @@ -0,0 +1,123 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +vi.mock("../src/logger.js", () => ({ + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn() }, +})); + +import { VectorIndex } from "../src/state/vector-index.js"; +import { + reindexVectors, + setVectorIndex, + setEmbeddingProvider, + getVectorIndex, +} from "../src/functions/search.js"; +import type { EmbeddingProvider } from "../src/types.js"; + +const fourDimProvider: EmbeddingProvider = { + name: "test-4d", + dimensions: 4, + embed: async (_text: string) => new Float32Array([0.1, 0.2, 0.3, 0.4]), + embedBatch: async (texts: string[]) => + texts.map(() => new Float32Array([0.1, 0.2, 0.3, 0.4])), +}; + +function mockKV() { + const store = new Map>(); + return { + get: async (scope: string, key: string): Promise => + (store.get(scope)?.get(key) as T) ?? null, + set: async (scope: string, key: string, data: T): Promise => { + if (!store.has(scope)) store.set(scope, new Map()); + store.get(scope)!.set(key, data); + return data; + }, + delete: async (_scope: string, _key: string): Promise => {}, + list: async (scope: string): Promise => { + const entries = store.get(scope); + return entries ? (Array.from(entries.values()) as T[]) : []; + }, + }; +} + +async function seedCorpus(kv: ReturnType) { + await kv.set("mem:sessions", "ses_1", { id: "ses_1" }); + await kv.set("mem:obs:ses_1", "obs_1", { + id: "obs_1", + sessionId: "ses_1", + timestamp: new Date().toISOString(), + type: "decision", + title: "reindex observation", + facts: ["x"], + narrative: "to be re-embedded", + concepts: [], + files: [], + importance: 5, + }); + await kv.set("mem:memories", "mem_1", { + id: "mem_1", + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + type: "fact", + title: "reindex memory", + content: "this memory will be re-embedded", + concepts: [], + files: [], + sessionIds: ["ses_1"], + strength: 7, + version: 1, + isLatest: true, + }); +} + +describe("reindexVectors", () => { + beforeEach(() => { + setVectorIndex(null); + setEmbeddingProvider(null); + }); + + it("re-embeds the corpus and swaps it into the live vector index", async () => { + const kv = mockKV(); + await seedCorpus(kv); + const live = new VectorIndex(); + setVectorIndex(live); + setEmbeddingProvider(fourDimProvider); + + const result = await reindexVectors(kv as never); + + expect(result.success).toBe(true); + expect(result.swapped).toBe(true); + expect(result.failed).toBe(0); + expect(result.totalProcessed).toBe(2); + expect(result.vectorSize).toBe(2); + expect(result.provider).toBe("test-4d"); + expect(result.dimensions).toBe(4); + expect(getVectorIndex()!.size).toBe(2); + }); + + it("returns success:false without swapping when no embedding provider is configured", async () => { + const kv = mockKV(); + await seedCorpus(kv); + const live = new VectorIndex(); + setVectorIndex(live); + setEmbeddingProvider(null); + + const result = await reindexVectors(kv as never); + + expect(result.success).toBe(false); + expect(result.swapped).toBe(false); + expect(result.error).toBeTruthy(); + expect(getVectorIndex()!.size).toBe(0); + }); + + it("returns success:false without throwing when the vector index is not initialized", async () => { + const kv = mockKV(); + await seedCorpus(kv); + setVectorIndex(null); + setEmbeddingProvider(fourDimProvider); + + const result = await reindexVectors(kv as never); + + expect(result.success).toBe(false); + expect(result.swapped).toBe(false); + }); +}); diff --git a/test/vector-index-dimensions.test.ts b/test/vector-index-dimensions.test.ts index 140d7f8ae..eadfb433a 100644 --- a/test/vector-index-dimensions.test.ts +++ b/test/vector-index-dimensions.test.ts @@ -129,4 +129,26 @@ describe("migrateVectorIndex", () => { expect(result.vectorSize).toBe(0); expect(result.failed).toBe(0); }); + + it("returns the rebuilt index so callers can swap it into the live index", async () => { + const kv = mockKV(); + await kv.set("mem:sessions", "ses_1", { id: "ses_1" }); + await kv.set("mem:obs:ses_1", "obs_1", { + id: "obs_1", + sessionId: "ses_1", + timestamp: new Date().toISOString(), + type: "decision", + title: "swap test", + facts: ["x"], + narrative: "to be re-embedded", + concepts: [], + files: [], + importance: 5, + }); + + const result = await migrateVectorIndex(kv as never, newProvider); + expect(result.index).toBeInstanceOf(VectorIndex); + expect(result.index.size).toBe(result.vectorSize); + expect(result.index.size).toBe(1); + }); });