diff --git a/docs/architecture.md b/docs/architecture.md index 3e1dbf1c..dedad3cd 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -181,6 +181,44 @@ description is the shared trunk, and three sibling *bypasses* project it into: The three bypasses are siblings — none is upstream of another; each is a different aggregation of the same descriptions. +### Synthesis mode (optional) + +By default `MEMORY.md` and the `skill/` tree are rendered deterministically from +already-extracted records. When `memory_files_config.synthesize=True`, they are +instead synthesized directly from the per-source descriptions by an LLM +(`memu.memory_fs.MemorySynthesizer`, prompts in `memu.prompts.memory_fs`): + +- `MEMORY.md`: one LLM pass turns all descriptions into a consolidated memory doc. +- `skill//SKILL.md`: one LLM pass extracts skills as a JSON array of + `{name, body}` objects, each written as its own skill doc. + +`INDEX.md` stays deterministic in both modes. Synthesis uses the +`synthesis_llm_profile` profile and leaves the existing memorize/extract pipeline +untouched. + +### Initialize vs. incremental update + +Synthesis is stateful and mirrors the "submit the changed part of the file system" +model. `MemoryService._build_memory_files(where, changed=...)` decides between two +paths: + +- **Initialization** (no prior tree on disk, or `changed is None`): scan all + in-scope sources, turn each into its multimodal description, and synthesize + `MEMORY.md` + the `skill/` tree from scratch (`MemorySynthesizer.synthesize`). +- **Incremental update** (a tree already exists and a changed set is supplied): + read the existing `MEMORY.md` body and existing skill bodies back off disk and + merge only the changed sources' descriptions into them + (`MemorySynthesizer.update`, prompts `MEMORY_UPDATE_PROMPT` / `SKILL_UPDATE_PROMPT`). + Skills are upserted by slug, so untouched skills survive. + +`INDEX.md` is always recomputed from the current source set, so it needs no LLM +merge. `export_memory_files(user=...)` always takes the initialization path (full +rebuild). When `memory_files_config.update_on_memorize=True`, each `memorize()` +call drives this builder with its just-created resources as the changed set, so the +tree initializes on first run and incrementally updates afterwards. The hook is +best-effort: an export failure is logged and never fails memorize, since the +structured memory is already persisted. + The exporter is read-only against the database and disabled by default (`memory_files_config.enabled`). Diff detection is handled by a sidecar manifest (`.memufs_manifest.json`) that stores per-file content hashes, so each export diff --git a/src/memu/app/memorize.py b/src/memu/app/memorize.py index 11f6148c..085af8aa 100644 --- a/src/memu/app/memorize.py +++ b/src/memu/app/memorize.py @@ -61,6 +61,8 @@ class MemorizeMixin: _extract_json_blob: Callable[[str], str] _escape_prompt_value: Callable[[str], str] user_model: type[BaseModel] + memory_files_config: Any + _build_memory_files: Callable[..., Awaitable[dict[str, Any]]] async def memorize( self, @@ -92,8 +94,33 @@ async def memorize( if response is None: msg = "Memorize workflow failed to produce a response" raise RuntimeError(msg) + + await self._maybe_update_memory_files(result, user_scope) return response + async def _maybe_update_memory_files( + self, result: WorkflowState, user_scope: dict[str, Any] | None + ) -> None: + """Drive the memory file tree from a memorize call (init or incremental update). + + Gated behind ``memory_files_config.enabled`` and ``update_on_memorize`` so the + default memorize behavior is unchanged. The just-created resources are the + "changed part of the file system" that an existing tree is updated against; + if no tree exists yet, ``_build_memory_files`` initializes it from the full + scoped store instead. Failures are best-effort: the memory is already + persisted, so an export error must not fail memorize. + """ + cfg = self.memory_files_config + if not (getattr(cfg, "enabled", False) and getattr(cfg, "update_on_memorize", False)): + return + changed = cast("list[Resource]", result.get("resources") or []) + if not changed: + return + try: + await self._build_memory_files(user_scope, changed=changed) + except Exception: + logger.exception("Memory file export failed after memorize") + def _build_memorize_workflow(self) -> list[WorkflowStep]: steps = [ WorkflowStep( diff --git a/src/memu/app/service.py b/src/memu/app/service.py index f7fd74dd..f1c7759d 100644 --- a/src/memu/app/service.py +++ b/src/memu/app/service.py @@ -31,7 +31,7 @@ LLMInterceptorHandle, LLMInterceptorRegistry, ) -from memu.memory_fs import ExportResult, MemoryFileExporter +from memu.memory_fs import ExportResult, MemoryFileExporter, MemorySynthesizer from memu.workflow.interceptor import WorkflowInterceptorHandle, WorkflowInterceptorRegistry from memu.workflow.pipeline import PipelineManager from memu.workflow.runner import WorkflowRunner, resolve_workflow_runner @@ -96,6 +96,7 @@ def __init__( # Writes are serialized through a single lock so concurrent exports never # interleave on the shared output directory. self._memory_file_exporter = MemoryFileExporter(self.memory_files_config.output_dir) + self._memory_synthesizer = MemorySynthesizer() self._memory_files_lock = asyncio.Lock() self._pipelines = PipelineManager( @@ -382,11 +383,52 @@ async def export_memory_files(self, *, user: dict[str, Any] | None = None) -> di msg = "Memory files are disabled; set memory_files_config.enabled=True to use export_memory_files()." raise RuntimeError(msg) where = self.user_model(**user).model_dump() if user is not None else None + # No changed set => full (re)initialization of the tree. + return await self._build_memory_files(where, changed=None) + + async def _build_memory_files( + self, + where: dict[str, Any] | None, + *, + changed: list[Any] | None, + ) -> dict[str, Any]: + """Initialize or incrementally update the memory file tree. + + ``changed`` is the list of just-memorized ``Resource`` objects driving an + incremental update. When it is ``None`` (or no prior tree exists), the tree + is (re)initialized from the full scoped store. + """ + memory_body: str | None = None + skills: dict[str, str] | None = None + + if self.memory_files_config.synthesize: + client = self._get_llm_client(self.memory_files_config.synthesis_llm_profile) + if changed is not None and self._memory_file_exporter.artifacts_exist(): + # UPDATE: merge the changed descriptions into existing artifacts. + existing_memory = await asyncio.to_thread(self._memory_file_exporter.read_memory_body) + existing_skills = await asyncio.to_thread(self._memory_file_exporter.read_skills) + synthesized = await self._memory_synthesizer.update( + MemoryFileExporter._build_descriptions(changed), + existing_memory=existing_memory, + existing_skills=existing_skills, + chat=client.chat, + ) + else: + # INIT: build from scratch over all in-scope descriptions. + resources = list(self.database.resource_repo.list_resources(where=where or None).values()) + synthesized = await self._memory_synthesizer.synthesize( + MemoryFileExporter._build_descriptions(resources), + chat=client.chat, + ) + memory_body, skills = synthesized.memory_body, synthesized.skills + async with self._memory_files_lock: result: ExportResult = await asyncio.to_thread( self._memory_file_exporter.export, self.database, where=where, + memory_body=memory_body, + skills=skills, ) return result.to_dict() diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py index 6fea0d70..946b2a1c 100644 --- a/src/memu/app/settings.py +++ b/src/memu/app/settings.py @@ -158,6 +158,24 @@ class MemoryFilesConfig(BaseModel): default="./data/memory", description="Directory where the memory markdown tree (INDEX.md/MEMORY.md/skill/) is written.", ) + synthesize: bool = Field( + default=False, + description=( + "Synthesize MEMORY.md and skill docs from per-source descriptions via the LLM " + "instead of rendering already-extracted records. INDEX.md stays deterministic." + ), + ) + synthesis_llm_profile: str = Field( + default="default", + description="LLM profile used when synthesize=True.", + ) + update_on_memorize: bool = Field( + default=False, + description=( + "Automatically initialize or incrementally update the memory file tree after each " + "memorize() call, using the just-created resources as the changed file set." + ), + ) class RetrieveCategoryConfig(BaseModel): diff --git a/src/memu/memory_fs/__init__.py b/src/memu/memory_fs/__init__.py index afaa1aa9..57e26293 100644 --- a/src/memu/memory_fs/__init__.py +++ b/src/memu/memory_fs/__init__.py @@ -5,5 +5,13 @@ """ from memu.memory_fs.exporter import ExportResult, FileDescription, MemoryFileExporter, slugify +from memu.memory_fs.synthesizer import MemorySynthesizer, SynthesisResult -__all__ = ["ExportResult", "FileDescription", "MemoryFileExporter", "slugify"] +__all__ = [ + "ExportResult", + "FileDescription", + "MemoryFileExporter", + "MemorySynthesizer", + "SynthesisResult", + "slugify", +] diff --git a/src/memu/memory_fs/exporter.py b/src/memu/memory_fs/exporter.py index c05d16f9..a63b2d21 100644 --- a/src/memu/memory_fs/exporter.py +++ b/src/memu/memory_fs/exporter.py @@ -107,8 +107,21 @@ class MemoryFileExporter: def __init__(self, output_dir: str) -> None: self.output_dir = pathlib.Path(output_dir) - def export(self, database: Database, *, where: Mapping[str, Any] | None = None) -> ExportResult: - """Render the (optionally scoped) store and write only changed artifacts.""" + def export( + self, + database: Database, + *, + where: Mapping[str, Any] | None = None, + memory_body: str | None = None, + skills: dict[str, str] | None = None, + ) -> ExportResult: + """Render the (optionally scoped) store and write only changed artifacts. + + ``memory_body`` and ``skills`` are optional synthesized overrides (e.g. + produced by :class:`~memu.memory_fs.synthesizer.MemorySynthesizer`). When + provided they replace the deterministic, database-derived rendering of + ``MEMORY.md`` / the ``skill/`` tree; ``INDEX.md`` is always deterministic. + """ self.output_dir.mkdir(parents=True, exist_ok=True) scope = dict(where) if where else None @@ -119,12 +132,20 @@ def export(self, database: Database, *, where: Mapping[str, Any] | None = None) # The shared trunk: one multimodal description per source file. descriptions = self._build_descriptions(resources) - skill_artifacts = self._skill_bypass(items) + if skills is not None: + skill_artifacts = { + f"{SKILL_DIRNAME}/{slug}/{SKILL_FILENAME}": self._skill_document(body) + for slug, body in skills.items() + } + else: + skill_artifacts = self._skill_bypass(items) skill_slugs = sorted(rel.split("/")[1] for rel in skill_artifacts) + body = memory_body if memory_body is not None else self._memory_body(categories) + artifacts: dict[str, str] = {} artifacts.update(skill_artifacts) - artifacts[MEMORY_FILENAME] = self._memory_bypass(categories) + artifacts[MEMORY_FILENAME] = self._memory_document(body) artifacts[INDEX_FILENAME] = self._index_bypass(categories, descriptions, skill_slugs, items, database=database) return self._sync(artifacts) @@ -162,9 +183,13 @@ def _skill_bypass(self, items: list[MemoryItem]) -> dict[str, str]: slug = base if count == 0 else f"{base}-{count + 1}" used[base] = count + 1 rel_path = f"{SKILL_DIRNAME}/{slug}/{SKILL_FILENAME}" - artifacts[rel_path] = f"{_GENERATED_NOTICE}\n\n{body}\n" + artifacts[rel_path] = self._skill_document(body) return artifacts + @staticmethod + def _skill_document(body: str) -> str: + return f"{_GENERATED_NOTICE}\n\n{body.strip()}\n" + @staticmethod def _skill_name(body: str, *, fallback: str) -> str: """Derive a skill folder slug from frontmatter ``name:`` or first heading.""" @@ -181,22 +206,26 @@ def _skill_name(body: str, *, fallback: str) -> str: # -- bypass: MEMORY ---------------------------------------------------- - def _memory_bypass(self, categories: list[MemoryCategory]) -> str: - """The living memory: folder (category) summaries aggregated into one file.""" - lines = ["# Memory", "", _GENERATED_NOTICE, ""] + @staticmethod + def _memory_document(body: str) -> str: + body = body.strip() or "_No memory yet._" + return f"# Memory\n\n{_GENERATED_NOTICE}\n\n{body}\n" + + def _memory_body(self, categories: list[MemoryCategory]) -> str: + """The living memory body: folder (category) summaries aggregated.""" if not categories: - lines += ["_No memory yet._", ""] - return "\n".join(lines) + return "" + sections: list[str] = [] for category in sorted(categories, key=lambda c: (c.name.lower(), c.id)): description = self._inline((category.description or "").strip()) summary = (category.summary or "").strip() - lines.append(f"## {category.name}") + block = [f"## {category.name}"] if description: - lines.append(f"_{description}_") - lines.append("") - lines.append(summary or "_No summary yet._") - lines.append("") - return "\n".join(lines) + block.append(f"_{description}_") + block.append("") + block.append(summary or "_No summary yet._") + sections.append("\n".join(block)) + return "\n\n".join(sections) # -- bypass: INDEX ----------------------------------------------------- @@ -305,6 +334,41 @@ def _prune_empty_dirs(self, directory: pathlib.Path) -> None: else: break + # -- reading existing artifacts (for incremental update) --------------- + + def artifacts_exist(self) -> bool: + """Whether a prior memory tree is already present (init vs update).""" + return (self.output_dir / MEMORY_FILENAME).exists() + + def read_memory_body(self) -> str: + """Read MEMORY.md and strip the heading/notice, returning just the body.""" + path = self.output_dir / MEMORY_FILENAME + if not path.exists(): + return "" + return self._strip_chrome(path.read_text(encoding="utf-8"), drop_heading="# Memory") + + def read_skills(self) -> dict[str, str]: + """Read existing ``skill//SKILL.md`` bodies keyed by slug.""" + skills: dict[str, str] = {} + skill_root = self.output_dir / SKILL_DIRNAME + if not skill_root.is_dir(): + return skills + for child in sorted(skill_root.iterdir()): + doc = child / SKILL_FILENAME + if child.is_dir() and doc.exists(): + skills[child.name] = self._strip_chrome(doc.read_text(encoding="utf-8")) + return skills + + @staticmethod + def _strip_chrome(text: str, *, drop_heading: str | None = None) -> str: + lines = text.splitlines() + kept = [ + line + for line in lines + if line.strip() != _GENERATED_NOTICE and (drop_heading is None or line.strip() != drop_heading) + ] + return "\n".join(kept).strip() + def _load_manifest(self) -> dict[str, str]: manifest_path = self.output_dir / MANIFEST_NAME if not manifest_path.exists(): diff --git a/src/memu/memory_fs/synthesizer.py b/src/memu/memory_fs/synthesizer.py new file mode 100644 index 00000000..26a259d2 --- /dev/null +++ b/src/memu/memory_fs/synthesizer.py @@ -0,0 +1,161 @@ +"""LLM synthesis of MEMORY/SKILL artifacts from the shared description trunk. + +This is the optional, opt-in counterpart to the deterministic exporter: instead of +rendering already-extracted database items/summaries, it feeds the per-source +multimodal descriptions to an LLM and synthesizes the memory document and skill +docs directly. ``INDEX.md`` stays deterministic and is handled by the exporter. +""" + +from __future__ import annotations + +import json +import re +from collections.abc import Awaitable, Callable +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +from memu.memory_fs.exporter import slugify +from memu.prompts.memory_fs import ( + DESCRIPTIONS_PLACEHOLDER, + EXISTING_PLACEHOLDER, + MEMORY_SYNTHESIS_PROMPT, + MEMORY_UPDATE_PROMPT, + SKILL_SYNTHESIS_PROMPT, + SKILL_UPDATE_PROMPT, +) + +if TYPE_CHECKING: + from memu.memory_fs.exporter import FileDescription + +ChatFn = Callable[[str], Awaitable[str]] + + +@dataclass +class SynthesisResult: + """Synthesized artifact payloads, ready to hand to the exporter.""" + + memory_body: str = "" + skills: dict[str, str] = field(default_factory=dict) + + +class MemorySynthesizer: + """Synthesize MEMORY/SKILL content from multimodal descriptions via an LLM.""" + + def __init__( + self, + *, + memory_prompt: str = MEMORY_SYNTHESIS_PROMPT, + skill_prompt: str = SKILL_SYNTHESIS_PROMPT, + memory_update_prompt: str = MEMORY_UPDATE_PROMPT, + skill_update_prompt: str = SKILL_UPDATE_PROMPT, + ) -> None: + self._memory_prompt = memory_prompt + self._skill_prompt = skill_prompt + self._memory_update_prompt = memory_update_prompt + self._skill_update_prompt = skill_update_prompt + + async def synthesize(self, descriptions: list[FileDescription], *, chat: ChatFn) -> SynthesisResult: + """Initialization: build MEMORY/SKILL from scratch over all descriptions.""" + formatted = self._format(descriptions) + if not formatted: + return SynthesisResult() + + memory_raw = await chat(self._memory_prompt.replace(DESCRIPTIONS_PLACEHOLDER, formatted)) + skills_raw = await chat(self._skill_prompt.replace(DESCRIPTIONS_PLACEHOLDER, formatted)) + + return SynthesisResult( + memory_body=self._clean_markdown(memory_raw), + skills=self._parse_skills(skills_raw), + ) + + async def update( + self, + descriptions: list[FileDescription], + *, + existing_memory: str, + existing_skills: dict[str, str], + chat: ChatFn, + ) -> SynthesisResult: + """Incremental: merge the changed descriptions into existing artifacts.""" + formatted = self._format(descriptions) + if not formatted: + return SynthesisResult(memory_body=existing_memory, skills=dict(existing_skills)) + + memory_prompt = self._memory_update_prompt.replace( + EXISTING_PLACEHOLDER, existing_memory.strip() or "(empty)" + ).replace(DESCRIPTIONS_PLACEHOLDER, formatted) + memory_raw = await chat(memory_prompt) + + skill_prompt = self._skill_update_prompt.replace( + EXISTING_PLACEHOLDER, self._format_existing_skills(existing_skills) or "(none)" + ).replace(DESCRIPTIONS_PLACEHOLDER, formatted) + upserts = self._parse_skills(await chat(skill_prompt)) + + return SynthesisResult( + memory_body=self._clean_markdown(memory_raw), + skills={**existing_skills, **upserts}, + ) + + @staticmethod + def _format_existing_skills(skills: dict[str, str]) -> str: + return "\n\n".join(f"## {slug}\n{body}".strip() for slug, body in sorted(skills.items())) + + @staticmethod + def _format(descriptions: list[FileDescription]) -> str: + lines = [ + f"- [{desc.modality}] {desc.url}: {desc.description}" + for desc in descriptions + if desc.description.strip() + ] + return "\n".join(lines) + + @staticmethod + def _clean_markdown(raw: str) -> str: + text = (raw or "").strip() + if text.startswith("```"): + text = re.sub(r"^```[a-zA-Z]*\n", "", text) + text = re.sub(r"\n```$", "", text).strip() + return text + + def _parse_skills(self, raw: str) -> dict[str, str]: + payload = self._extract_json_array(raw) + if payload is None: + return {} + try: + parsed = json.loads(payload) + except (json.JSONDecodeError, TypeError): + return {} + if not isinstance(parsed, list): + return {} + + skills: dict[str, str] = {} + used: dict[str, int] = {} + for entry in parsed: + if not isinstance(entry, dict): + continue + name = entry.get("name") + body = entry.get("body") + if not isinstance(name, str) or not isinstance(body, str): + continue + body = body.strip() + if not body: + continue + base = slugify(name) + count = used.get(base, 0) + slug = base if count == 0 else f"{base}-{count + 1}" + used[base] = count + 1 + skills[slug] = body + return skills + + @staticmethod + def _extract_json_array(raw: str) -> str | None: + if not raw: + return None + start = raw.find("[") + end = raw.rfind("]") + if start == -1 or end == -1 or end <= start: + return None + return raw[start : end + 1] + + +__all__ = ["ChatFn", "MemorySynthesizer", "SynthesisResult"] diff --git a/src/memu/prompts/memory_fs/__init__.py b/src/memu/prompts/memory_fs/__init__.py new file mode 100644 index 00000000..0ece4018 --- /dev/null +++ b/src/memu/prompts/memory_fs/__init__.py @@ -0,0 +1,93 @@ +"""Prompts for the optional memory_fs synthesis bypass. + +Both prompts consume the shared trunk — the per-source multimodal descriptions — +and synthesize one of the sibling artifacts. The literal token ``__DESCRIPTIONS__`` +is replaced (not ``str.format``) so description text containing braces is safe. +""" + +from __future__ import annotations + +DESCRIPTIONS_PLACEHOLDER = "__DESCRIPTIONS__" +EXISTING_PLACEHOLDER = "__EXISTING__" + +MEMORY_SYNTHESIS_PROMPT = """You are maintaining an AI agent's long-term memory about a user. + +Below is a list of source descriptions — one per source file the agent has seen. +Synthesize them into a single, well-organized Markdown memory document. + +Requirements: +- Output Markdown only. Do not wrap it in code fences. +- Use second-level headings (##) for sections such as Profile, Preferences, + Goals, and Key Events. Include a section only if there is real content for it. +- Be concise and factual. Do not invent details that are not supported by the + descriptions. +- Write in the same language as the descriptions. + +Source descriptions: +__DESCRIPTIONS__ +""" + +SKILL_SYNTHESIS_PROMPT = """You are extracting reusable skills and tool patterns for an AI agent. + +From the source descriptions below, identify concrete, repeatable skills or tool +usage patterns (what worked, how to repeat it, what to avoid). Ignore one-off +facts, preferences, or trivia — those belong in the memory document, not here. + +Return ONLY a JSON array. Each element is an object: + {"name": "kebab-case-skill-name", "body": "Markdown body for this skill"} +The "body" should be a self-contained Markdown skill document. +If there are no genuine skills, return an empty array: [] + +Source descriptions: +__DESCRIPTIONS__ +""" + +MEMORY_UPDATE_PROMPT = """You are maintaining an AI agent's long-term memory document. + +Below is the CURRENT memory document, followed by NEW source descriptions that +were just added. Update the document to incorporate the new information. + +Requirements: +- Merge new facts, revise statements the new descriptions make outdated, and keep + existing content that is still valid. +- Output the FULL updated Markdown document only. Do not wrap it in code fences. +- Keep the same heading structure (## Profile, ## Preferences, ## Goals, ## Key + Events, ...). Add or drop sections as the content warrants. +- Be concise and factual; do not invent unsupported details. Use the same language + as the descriptions. + +CURRENT memory document: +__EXISTING__ + +NEW source descriptions: +__DESCRIPTIONS__ +""" + +SKILL_UPDATE_PROMPT = """You are maintaining an AI agent's skill library. + +Below are the EXISTING skills (name + body), followed by NEW source descriptions +that were just added. + +Return ONLY a JSON array of skills to add or replace based on the new +descriptions. Each element is an object: + {"name": "kebab-case-skill-name", "body": "Markdown body for this skill"} +- To revise an existing skill, reuse its exact name and return the full new body. +- To add a new skill, use a new name. +- Only include skills actually affected by the new descriptions. +- If the new descriptions contain nothing skill-worthy, return an empty array: [] + +EXISTING skills: +__EXISTING__ + +NEW source descriptions: +__DESCRIPTIONS__ +""" + +__all__ = [ + "DESCRIPTIONS_PLACEHOLDER", + "EXISTING_PLACEHOLDER", + "MEMORY_SYNTHESIS_PROMPT", + "MEMORY_UPDATE_PROMPT", + "SKILL_SYNTHESIS_PROMPT", + "SKILL_UPDATE_PROMPT", +] diff --git a/tests/test_memory_fs_synthesis.py b/tests/test_memory_fs_synthesis.py new file mode 100644 index 00000000..538ed4be --- /dev/null +++ b/tests/test_memory_fs_synthesis.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +from pathlib import Path + +from memu.app import MemoryService +from memu.memory_fs import FileDescription, MemoryFileExporter, MemorySynthesizer + +_MEMORY_MD = "## Profile\nThe user is a coffee enthusiast.\n\n## Preferences\nPrefers pour-over." +_SKILLS_JSON = '[{"name": "Pour Over", "body": "# Pour-over\\nUse a 1:16 ratio."}]' + + +class _FakeChatClient: + """Stand-in LLM client: returns canned memory/skill responses by prompt shape.""" + + async def chat(self, prompt: str, system_prompt: str | None = None) -> str: + if "JSON array" in prompt: + return _SKILLS_JSON + return _MEMORY_MD + + +def _descriptions() -> list[FileDescription]: + return [ + FileDescription( + url="docs/coffee.txt", + modality="document", + description="The user likes pour-over coffee with a 1:16 ratio.", + resource_id="r1", + ) + ] + + +async def test_synthesizer_parses_memory_and_skills() -> None: + synth = MemorySynthesizer() + result = await synth.synthesize(_descriptions(), chat=_FakeChatClient().chat) + + assert "## Profile" in result.memory_body + assert "pour-over" in result.memory_body.lower() + assert result.skills == {"pour-over": "# Pour-over\nUse a 1:16 ratio."} + + +async def test_synthesizer_empty_when_no_descriptions() -> None: + synth = MemorySynthesizer() + result = await synth.synthesize([], chat=_FakeChatClient().chat) + assert result.memory_body == "" + assert result.skills == {} + + +def test_synthesizer_helpers() -> None: + synth = MemorySynthesizer() + assert synth._clean_markdown("```markdown\n# Hi\n```") == "# Hi" + assert synth._parse_skills("garbage, no array") == {} + assert synth._parse_skills("[]") == {} + assert synth._parse_skills('[{"name": "A", "body": ""}]') == {} + duplicate = '[{"name": "A", "body": "x"}, {"name": "A", "body": "y"}]' + assert synth._parse_skills(duplicate) == {"a": "x", "a-2": "y"} + + +def test_exporter_override_path(tmp_path: Path) -> None: + service = MemoryService( + llm_profiles={"default": {"api_key": "test-key"}}, + database_config={"metadata_store": {"provider": "inmemory"}}, + ) + exporter = MemoryFileExporter(str(tmp_path)) + + result = exporter.export( + service.database, + memory_body="## Profile\nSynthesized.", + skills={"brewing": "# Brewing\nbody"}, + ) + + assert "MEMORY.md" in result.written + assert "skill/brewing/SKILL.md" in result.written + assert "Synthesized." in (tmp_path / "MEMORY.md").read_text(encoding="utf-8") + assert "# Brewing" in (tmp_path / "skill" / "brewing" / "SKILL.md").read_text(encoding="utf-8") + assert "[brewing](./skill/brewing/SKILL.md)" in (tmp_path / "INDEX.md").read_text(encoding="utf-8") + + +async def test_service_synthesis_wiring(tmp_path: Path, monkeypatch) -> None: + service = MemoryService( + llm_profiles={"default": {"api_key": "test-key"}}, + database_config={"metadata_store": {"provider": "inmemory"}}, + memory_files_config={"enabled": True, "output_dir": str(tmp_path), "synthesize": True}, + ) + service.database.resource_repo.create_resource( + url="docs/coffee.txt", + modality="document", + local_path="coffee.txt", + caption="The user likes pour-over coffee.", + embedding=None, + user_data={"user_id": "u1"}, + ) + monkeypatch.setattr(service, "_get_llm_client", lambda *a, **k: _FakeChatClient()) + + result = await service.export_memory_files(user={"user_id": "u1"}) + + assert "MEMORY.md" in result["written"] + assert "skill/pour-over/SKILL.md" in result["written"] + memory_text = (tmp_path / "MEMORY.md").read_text(encoding="utf-8") + assert "The user is a coffee enthusiast." in memory_text + + +# -- incremental update path ------------------------------------------------- + +_UPDATE_MEMORY_MD = "## Profile\nThe user is a coffee enthusiast.\n\n## Preferences\nLikes oat milk." +_UPDATE_SKILLS_JSON = '[{"name": "Latte Art", "body": "# Latte art\\nPour slowly."}]' + + +class _InitUpdateChatClient: + """Returns init vs update payloads based on which prompt template fired.""" + + async def chat(self, prompt: str, system_prompt: str | None = None) -> str: + is_update = "CURRENT memory document" in prompt or "EXISTING skills" in prompt + if "JSON array" in prompt: + return _UPDATE_SKILLS_JSON if is_update else _SKILLS_JSON + return _UPDATE_MEMORY_MD if is_update else _MEMORY_MD + + +async def test_synthesizer_update_merges_into_existing() -> None: + synth = MemorySynthesizer() + result = await synth.update( + _descriptions(), + existing_memory="## Profile\nOld profile.", + existing_skills={"pour-over": "# Pour-over\nUse a 1:16 ratio."}, + chat=_InitUpdateChatClient().chat, + ) + + assert "Likes oat milk." in result.memory_body + # Existing skill is preserved, the new one is upserted alongside it. + assert result.skills["pour-over"] == "# Pour-over\nUse a 1:16 ratio." + assert result.skills["latte-art"] == "# Latte art\nPour slowly." + + +async def test_synthesizer_update_noop_without_descriptions() -> None: + synth = MemorySynthesizer() + existing_skills = {"pour-over": "# Pour-over"} + result = await synth.update( + [], + existing_memory="## Profile\nKeep me.", + existing_skills=existing_skills, + chat=_InitUpdateChatClient().chat, + ) + assert result.memory_body == "## Profile\nKeep me." + assert result.skills == existing_skills + + +def test_exporter_read_helpers_roundtrip(tmp_path: Path) -> None: + service = MemoryService( + llm_profiles={"default": {"api_key": "test-key"}}, + database_config={"metadata_store": {"provider": "inmemory"}}, + ) + exporter = MemoryFileExporter(str(tmp_path)) + + assert exporter.artifacts_exist() is False + exporter.export( + service.database, + memory_body="## Profile\nSynthesized body.", + skills={"brewing": "# Brewing\nbody"}, + ) + + assert exporter.artifacts_exist() is True + assert exporter.read_memory_body() == "## Profile\nSynthesized body." + assert exporter.read_skills() == {"brewing": "# Brewing\nbody"} + + +async def test_service_init_then_update(tmp_path: Path, monkeypatch) -> None: + service = MemoryService( + llm_profiles={"default": {"api_key": "test-key"}}, + database_config={"metadata_store": {"provider": "inmemory"}}, + memory_files_config={ + "enabled": True, + "output_dir": str(tmp_path), + "synthesize": True, + "update_on_memorize": True, + }, + ) + monkeypatch.setattr(service, "_get_llm_client", lambda *a, **k: _InitUpdateChatClient()) + + repo = service.database.resource_repo + repo.create_resource( + url="docs/coffee.txt", + modality="document", + local_path="coffee.txt", + caption="The user likes pour-over coffee.", + embedding=None, + user_data={"user_id": "u1"}, + ) + + # First pass: no tree yet -> initialization from the full store. + init = await service.export_memory_files(user={"user_id": "u1"}) + assert "skill/pour-over/SKILL.md" in init["written"] + assert "coffee enthusiast" in (tmp_path / "MEMORY.md").read_text(encoding="utf-8") + + # Second pass: tree exists -> incremental update from the changed resource only. + changed = repo.create_resource( + url="docs/latte.txt", + modality="document", + local_path="latte.txt", + caption="The user enjoys latte art and oat milk.", + embedding=None, + user_data={"user_id": "u1"}, + ) + updated = await service._build_memory_files({"user_id": "u1"}, changed=[changed]) + + memory_text = (tmp_path / "MEMORY.md").read_text(encoding="utf-8") + assert "Likes oat milk." in memory_text + assert "skill/latte-art/SKILL.md" in (updated["written"] + updated["unchanged"]) + # The originally-initialized skill survives the incremental update. + assert (tmp_path / "skill" / "pour-over" / "SKILL.md").exists()