diff --git a/Cargo.lock b/Cargo.lock
deleted file mode 100644
index a841b5e0..00000000
--- a/Cargo.lock
+++ /dev/null
@@ -1,172 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "autocfg"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
-
-[[package]]
-name = "heck"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
-
-[[package]]
-name = "indoc"
-version = "2.0.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
-dependencies = [
- "rustversion",
-]
-
-[[package]]
-name = "libc"
-version = "0.2.177"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
-
-[[package]]
-name = "memoffset"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
-dependencies = [
- "autocfg",
-]
-
-[[package]]
-name = "memu"
-version = "0.1.0"
-dependencies = [
- "pyo3",
-]
-
-[[package]]
-name = "once_cell"
-version = "1.21.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
-
-[[package]]
-name = "portable-atomic"
-version = "1.11.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.103"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "pyo3"
-version = "0.27.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf"
-dependencies = [
- "indoc",
- "libc",
- "memoffset",
- "once_cell",
- "portable-atomic",
- "pyo3-build-config",
- "pyo3-ffi",
- "pyo3-macros",
- "unindent",
-]
-
-[[package]]
-name = "pyo3-build-config"
-version = "0.27.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb"
-dependencies = [
- "target-lexicon",
-]
-
-[[package]]
-name = "pyo3-ffi"
-version = "0.27.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be"
-dependencies = [
- "libc",
- "pyo3-build-config",
-]
-
-[[package]]
-name = "pyo3-macros"
-version = "0.27.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71"
-dependencies = [
- "proc-macro2",
- "pyo3-macros-backend",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "pyo3-macros-backend"
-version = "0.27.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b"
-dependencies = [
- "heck",
- "proc-macro2",
- "pyo3-build-config",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.42"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "rustversion"
-version = "1.0.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
-
-[[package]]
-name = "syn"
-version = "2.0.109"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f17c7e013e88258aa9543dcbe81aca68a667a9ac37cd69c9fbc07858bfe0e2f"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "target-lexicon"
-version = "0.13.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c"
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
-
-[[package]]
-name = "unindent"
-version = "0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
diff --git a/Cargo.toml b/Cargo.toml
deleted file mode 100644
index 49494051..00000000
--- a/Cargo.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-[package]
-name = "memu"
-version = "0.1.0"
-edition = "2024"
-
-[lib]
-name = "_core"
-# "cdylib" is necessary to produce a shared library for Python to import from.
-crate-type = ["cdylib"]
-
-[dependencies]
-# "extension-module" tells pyo3 we want to build an extension module (skips linking against libpython.so)
-# "abi3-py313" tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.13
-pyo3 = { version = "0.27.1", features = ["extension-module", "abi3-py313"] }
diff --git a/README.md b/README.md
index 10b10f9c..4c95325c 100644
--- a/README.md
+++ b/README.md
@@ -31,8 +31,13 @@ Instead of flattening everything an agent learns into one giant prompt or an opa
 
 ```txt
 memory/
-├── INDEX.md              ← map of everything: categories, files, and summaries
-├── MEMORY.md             ← profile, preferences, goals, and key events
+├── INDEX.md              ← index of the raw files under resource/
+├── MEMORY.md             ← overall overview + index of memory/
+├── SKILL.md              ← index of the skills under skill/
+├── resource/
+│   └── {file_name}       ← a copied raw source file
+├── memory/
+│   └── {slug}.md         ← profile, preferences, goals, and key events (one per category)
 └── skill/
     ├── {skill_name}/
     │   └── SKILL.md       ← a learned skill or tool pattern
@@ -192,9 +197,10 @@ If you find memU useful or interesting, a GitHub Star ⭐️ would be greatly ap
 *Turn chat logs into user preferences, goals, events, and relationship context.*
 
 ```python
+# memorize() ingests a folder: it scans the directory, infers each file's
+# modality from its extension, and incrementally syncs (add/modify/delete).
 await service.memorize(
-    resource_url="examples/resources/conversations/conv1.json",
-    modality="conversation",
+    folder="examples/resources/conversations",
     user={"user_id": "123"},
 )
 
@@ -208,8 +214,8 @@ context = await service.retrieve(
 *Convert docs, PR notes, logs, and design decisions into reusable project memory.*
 
 ```python
-await service.memorize(resource_url="docs/architecture.md", modality="document")
-await service.memorize(resource_url="examples/resources/logs/log1.txt", modality="document")
+# Point memorize() at a folder of docs/notes/logs; modality is inferred per file.
+await service.memorize(folder="docs")
 
 context = await service.retrieve(
     queries=[{"role": "user", "content": {"text": "How should I structure this module?"}}],
@@ -220,10 +226,9 @@ context = await service.retrieve(
 *Extract searchable facts from documents, screenshots, images, videos, and audio notes.*
 
 ```python
-await service.memorize(resource_url="examples/resources/docs/doc1.txt", modality="document")
-await service.memorize(resource_url="examples/resources/images/image1.png", modality="image")
-# Audio is supported for your own .mp3/.wav/.m4a files.
-await service.memorize(resource_url="meeting-audio.mp3", modality="audio")
+# A single folder can mix modalities (documents, images, video, audio); each
+# file's modality is inferred from its extension during the scan.
+await service.memorize(folder="examples/resources")
 
 context = await service.retrieve(
     queries=[{"role": "user", "content": {"text": "What matters for the next research plan?"}}],
@@ -234,7 +239,7 @@ context = await service.retrieve(
 *Turn execution traces into tool memories that tell future agents when to use a tool and what mistakes to avoid.*
 
 ```python
-await service.memorize(resource_url="examples/resources/logs/log1.txt", modality="document")
+await service.memorize(folder="examples/resources/logs")
 
 context = await service.retrieve(
     queries=[{"role": "user", "content": {"text": "Which tools worked for config editing?"}}],
@@ -383,17 +388,21 @@ service = MemoryService(
 
 ```python
 result = await service.memorize(
-    resource_url="path/to/file.json",    # local file path or HTTP URL
-    modality="conversation",            # conversation | document | image | video | audio
-    user={"user_id": "123"},            # optional: scope to a user or agent
+    folder="path/to/folder",   # a directory; modality is inferred per file by extension
+    user={"user_id": "123"},   # optional: scope to a user or agent
 )
-# Returns after processing completes:
-# { "resource": {...}, "items": [...], "categories": [...], "relations": [...] }
+# Scans the folder, diffs against <folder>/.memu_manifest.json, and returns a
+# sync summary after processing completes:
+# {
+#   "folder": "...", "added": [...], "modified": [...], "deleted": [...],
+#   "resources": [...], "removed_resources": [...], "items": [...]
+# }
 ```
 
-- Converts raw input into typed memory items
+- Ingests a folder and incrementally syncs it (add / modify / delete)
+- Converts raw input into typed memory items; one input file maps to one resource
+- Cascade-deletes memory for modified/removed files and recomputes summaries
 - Categorizes and embeds items without manual tagging
-- Preserves source resources and item-category relations
 
 ---
 
diff --git a/docs/architecture.md b/docs/architecture.md
index dedad3cd..9dc250cd 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -72,18 +72,40 @@ LLM wrappers also extract best-effort usage metadata from raw provider responses
 
 ## Ingestion architecture (`memorize`)
 
-`memorize(...)` executes the `memorize` pipeline:
+`memorize(folder=..., user=...)` ingests a **folder** and incrementally syncs it
+into memory using an input-side manifest. Each call:
+
+1. Recursively scans `folder`, inferring each file's modality from its extension
+   (`.json` → conversation, `.txt/.md` → document, image/video/audio by
+   extension). Unsupported extensions are skipped (and logged); hidden files and
+   the manifest itself are ignored.
+2. Loads the sidecar `<folder>/.memu_manifest.json` (`relative path -> content
+   hash`) and diffs it against the scan to compute **added / modified / deleted**.
+3. For modified + deleted files: cascade-deletes the prior `Resource` together
+   with its `MemoryItem`s and item-category relations, then recomputes the
+   affected category summaries (discarded content fed in as `(before, None)`).
+4. For added + modified files: runs the per-file `memorize` pipeline below to
+   (re)extract memory. One input file maps to exactly one `Resource`.
+5. Refreshes the memory file tree (`memory_fs`): a full rebuild when anything was
+   modified/deleted, an incremental update for pure additions.
+6. Rewrites the manifest, and returns a sync summary (`added`, `modified`,
+   `deleted`, `resources`, `removed_resources`, `items`).
+
+The per-file pipeline (`_memorize_one`) executes the `memorize` workflow:
 
 1. `ingest_resource`: fetch local/remote resource into `blob_config.resources_dir` via `LocalFS`
 2. `preprocess_multimodal`: modality-specific preprocessing for conversation/document/audio (text-oriented path) and image/video (vision-oriented path)
-3. `extract_items`: per-memory-type LLM extraction into structured entries
+3. `extract_items`: per-memory-type LLM extraction into structured entries. Conversation segments are an internal extraction detail only — all segment entries/captions are aggregated into a single resource plan.
 4. `dedupe_merge`: placeholder stage (currently pass-through)
 5. `categorize_items`: persist resource + memory items + item-category relations and embeddings
 6. `persist_index`: update category summaries; optionally persist item references
-7. `build_response`: return resource(s), items, categories, relations
+7. `build_response`: return resource, items, categories, relations
 
 Category bootstrap is lazy and scoped: categories are initialized when needed with embeddings, and mapped by normalized category name.
 
+> The input manifest is keyed by folder (and is user-agnostic): a given folder is
+> expected to be synced for a single user scope.
+
 ## Retrieval architecture (`retrieve`)
 
 `retrieve(...)` chooses one of two pipelines from config:
@@ -119,6 +141,10 @@ Storage is abstracted through a `Database` protocol with four repositories:
 - `MemoryCategoryRepo`
 - `CategoryItemRepo`
 
+All record access goes through these repositories, which enforce scope (`where`)
+filtering. The `Database` protocol intentionally does **not** expose the raw
+in-process record stores, so business logic cannot bypass scope rules.
+
 ### Backends
 
 `build_database(...)` selects backend by `database_config.metadata_store.provider`:
@@ -144,11 +170,20 @@ LLM access is profile-based (`llm_profiles`):
 
 Per-step profile routing happens through step config (`chat_llm_profile`, `embed_llm_profile`, or `llm_profile`).
 
-Client backends:
+Chat and embedding are decoupled concerns with separate client implementations
+and separate per-profile caches in `MemoryService`:
+
+- chat-like clients (`chat`/`summarize`/`vision`/`transcribe`) live in `memu.llm`
+- embedding clients live in `memu.embedding` and are used for all vectorization
+  (the embedding profile is resolved to an embedding client, then wrapped by the
+  same `LLMClientWrapper` for observability)
+
+Client backends (apply to both chat and embedding clients):
 
 - `sdk`: official OpenAI SDK wrapper
 - `httpx`: provider-adapted HTTP backend (OpenAI, Doubao, Grok, OpenRouter)
-- `lazyllm_backend`: LazyLLM adapter
+- `lazyllm_backend`: LazyLLM adapter (a single unified client that also serves
+  embedding, since LazyLLM has no standalone embedding client)
 
 ## Integration surfaces
 
@@ -158,69 +193,88 @@ Client backends:
 ## Memory file system export (`memu.memory_fs`)
 
 `MemoryService.export_memory_files(...)` renders the structured store into the
-markdown tree described in the README. Every source first becomes a **multimodal
-description** (the modality-agnostic caption/text from preprocessing); that
-description is the shared trunk, and three sibling *bypasses* project it into:
+markdown tree described in the README:
 
 ```txt
 <output_dir>/
-├── INDEX.md                     ← map of everything: folders, skills, sources
-├── MEMORY.md                    ← living memory: folder (category) summaries
+├── INDEX.md                     ← index of the raw files under resource/
+├── MEMORY.md                    ← overall overview + index of memory/
+├── SKILL.md                     ← index of the skills under skill/
+├── resource/
+│   └── <file_name>              ← one copied raw source file
+├── memory/
+│   └── <slug>.md                ← one memory category (description + summary)
 └── skill/
-    └── <skill_name>/SKILL.md    ← one learned skill / tool pattern per folder
+    └── <skill_name>/SKILL.md    ← one extracted skill profile per folder
 ```
 
-- `INDEX.md` is a table of contents (where to look before reading); it lists
-  folders, skills, and the per-source descriptions, linking out rather than
-  duplicating summaries.
-- `MEMORY.md` aggregates `MemoryCategory` summaries (profile, preferences, goals,
-  events).
-- `skill/<name>/SKILL.md` breaks each skill-type `MemoryItem` out as a standalone
-  document; the folder name comes from the skill's frontmatter `name:`.
-
-The three bypasses are siblings — none is upstream of another; each is a
-different aggregation of the same descriptions.
-
-### Synthesis mode (optional)
-
-By default `MEMORY.md` and the `skill/` tree are rendered deterministically from
-already-extracted records. When `memory_files_config.synthesize=True`, they are
-instead synthesized directly from the per-source descriptions by an LLM
-(`memu.memory_fs.MemorySynthesizer`, prompts in `memu.prompts.memory_fs`):
-
-- `MEMORY.md`: one LLM pass turns all descriptions into a consolidated memory doc.
-- `skill/<name>/SKILL.md`: one LLM pass extracts skills as a JSON array of
-  `{name, body}` objects, each written as its own skill doc.
-
-`INDEX.md` stays deterministic in both modes. Synthesis uses the
-`synthesis_llm_profile` profile and leaves the existing memorize/extract pipeline
-untouched.
+- `resource/` holds the raw source files copied verbatim out of the blob store
+  (`Resource.local_path`), so the ingested bytes live next to the memory.
+- `INDEX.md` is an index of those raw files: for each in-scope `Resource` it lists
+  the file name, modality, multimodal description, and a link into `resource/`
+  (resources without a readable `local_path` are listed without a link). It does
+  not list folders or skills.
+- `memory/` splits the living memory one file per `MemoryCategory`
+  (`memory/<slug>.md`), each holding the category's description and summary
+  (profile, preferences, goals, events).
+- `MEMORY.md` opens with a deterministic `## Overview` of the `MemoryCategory`
+  structure, where each entry links to its `memory/<slug>.md` file.
+- `SKILL.md` (root) is a generated index/table of contents over the `skill/`
+  tree: one line per skill with its slug, one-line description, and a link to
+  `skill/<name>/SKILL.md`.
+- `skill/<name>/SKILL.md` is one `skill`-type `MemoryItem` extracted during
+  memorize. Each item's summary is a comprehensive skill profile (Markdown with
+  `name`/`description` frontmatter, produced by `memu.prompts.memory_type.skill`
+  from logs / workflow traces / technical content); the exporter renders it
+  verbatim and parses the frontmatter for the folder slug and index description.
+
+### How skills are produced
+
+The `skill/` tree is derived from the memorize/extract pipeline, **not** from a
+separate description-synthesis bypass. `skill` (along with `behavior` and `tool`)
+is a default memory type (`memu.prompts.memory_type.DEFAULT_MEMORY_TYPES`), so the
+extract step turns demonstrated skills in the source content into `skill`-type
+`MemoryItem`s. The exporter (`MemoryFileExporter._skills_from_items`) reads those
+items from the (scoped) store on every export and renders them into `skill/` plus
+the root `SKILL.md` index. This is fully deterministic and needs no extra LLM call.
+
+### MEMORY.md synthesis mode (optional)
+
+`MEMORY.md` defaults to a deterministic rendering of `MemoryCategory` summaries
+(`## Overview` plus per-category sections). When
+`memory_files_config.synthesize=True`, the `MEMORY.md` body is instead synthesized
+from the per-source multimodal descriptions by an LLM
+(`memu.memory_fs.MemorySynthesizer`, prompts in `memu.prompts.memory_fs`), using
+the `synthesis_llm_profile` profile. This affects only the `MEMORY.md` body;
+`resource/`, the per-category `memory/` files, `INDEX.md`, the `skill/` tree, and
+the root `SKILL.md` index stay deterministic in both modes.
 
 ### Initialize vs. incremental update
 
-Synthesis is stateful and mirrors the "submit the changed part of the file system"
-model. `MemoryService._build_memory_files(where, changed=...)` decides between two
-paths:
+`MEMORY.md` synthesis is stateful and mirrors the "submit the changed part of the
+file system" model. `MemoryService._build_memory_files(where, changed=...)` decides
+between two paths (only relevant when `synthesize=True`):
 
-- **Initialization** (no prior tree on disk, or `changed is None`): scan all
-  in-scope sources, turn each into its multimodal description, and synthesize
-  `MEMORY.md` + the `skill/` tree from scratch (`MemorySynthesizer.synthesize`).
+- **Initialization** (no prior tree on disk, or `changed is None`): synthesize the
+  `MEMORY.md` body from all in-scope source descriptions
+  (`MemorySynthesizer.synthesize`).
 - **Incremental update** (a tree already exists and a changed set is supplied):
-  read the existing `MEMORY.md` body and existing skill bodies back off disk and
-  merge only the changed sources' descriptions into them
-  (`MemorySynthesizer.update`, prompts `MEMORY_UPDATE_PROMPT` / `SKILL_UPDATE_PROMPT`).
-  Skills are upserted by slug, so untouched skills survive.
-
-`INDEX.md` is always recomputed from the current source set, so it needs no LLM
-merge. `export_memory_files(user=...)` always takes the initialization path (full
-rebuild). When `memory_files_config.update_on_memorize=True`, each `memorize()`
-call drives this builder with its just-created resources as the changed set, so the
-tree initializes on first run and incrementally updates afterwards. The hook is
+  read the existing `MEMORY.md` body back off disk and merge only the changed
+  sources' descriptions into it (`MemorySynthesizer.update`, prompt
+  `MEMORY_UPDATE_PROMPT`).
+
+The `resource/` copies, the per-category `memory/` files, `INDEX.md`, the `skill/`
+tree, and the root `SKILL.md` index are always recomputed from the current store,
+so they need no LLM merge.
+`export_memory_files(user=...)` always takes the initialization path (full
+rebuild). Each `memorize(folder=...)` call drives this builder after the folder
+sync: when any file was modified or deleted it forces the full-rebuild path (so
+stale skills/entries do not linger and cascade deletions are reflected), and for
+pure additions it incrementally merges the just-created resources. The hook is
 best-effort: an export failure is logged and never fails memorize, since the
 structured memory is already persisted.
 
-The exporter is read-only against the database and disabled by default
-(`memory_files_config.enabled`). Diff detection is handled by a sidecar manifest
+The exporter is read-only against the database. Diff detection is handled by a sidecar manifest
 (`.memufs_manifest.json`) that stores per-file content hashes, so each export
 only rewrites artifacts whose rendered content changed (and prunes stale skill
 files/dirs) — no database schema change is required. Rendered content avoids
diff --git a/examples/example_1_conversation_memory.py b/examples/example_1_conversation_memory.py
index e023fae3..aa835a94 100644
--- a/examples/example_1_conversation_memory.py
+++ b/examples/example_1_conversation_memory.py
@@ -78,28 +78,20 @@ async def main():
         },
     )
 
-    # Conversation files to process
-    conversation_files = [
-        "examples/resources/conversations/conv1.json",
-        "examples/resources/conversations/conv2.json",
-        "examples/resources/conversations/conv3.json",
-    ]
-
-    # Process each conversation
+    # Folder of conversation files to process. memorize() scans the folder and
+    # infers each file's modality from its extension (.json -> conversation).
+    conversation_folder = "examples/resources/conversations"
+
+    # Process the whole folder in one incremental sync.
     print("\nProcessing conversations...")
     total_items = 0
     categories = []
-    for conv_file in conversation_files:
-        if not os.path.exists(conv_file):
-            continue
-
-        try:
-            result = await service.memorize(resource_url=conv_file, modality="conversation")
-            total_items += len(result.get("items", []))
-            # Categories are returned in the result and updated after each memorize call
-            categories = result.get("categories", [])
-        except Exception as e:
-            print(f"Error: {e}")
+    try:
+        result = await service.memorize(folder=conversation_folder)
+        total_items = len(result.get("items", []))
+        categories = result.get("categories", [])
+    except Exception as e:
+        print(f"Error: {e}")
 
     # Write to output files
     output_dir = "examples/output/conversation_example"
@@ -108,7 +100,7 @@ async def main():
     # 1. Generate individual Markdown files for each category
     await generate_memory_md(categories, output_dir)
 
-    print(f"\n✓ Processed {len(conversation_files)} files, extracted {total_items} items")
+    print(f"\n✓ Processed folder {conversation_folder}, extracted {total_items} items")
     print(f"✓ Generated {len(categories)} categories")
     print(f"✓ Output: {output_dir}/")
 
diff --git a/examples/example_2_skill_extraction.py b/examples/example_2_skill_extraction.py
index 3ca75804..177dc76f 100644
--- a/examples/example_2_skill_extraction.py
+++ b/examples/example_2_skill_extraction.py
@@ -214,58 +214,38 @@ async def main():
         memorize_config=memorize_config,
     )
 
-    # Resources to process
-    resources = [
-        ("examples/resources/logs/log1.txt", "document"),
-        ("examples/resources/logs/log2.txt", "document"),
-        ("examples/resources/logs/log3.txt", "document"),
-    ]
+    # Folder of logs to process; memorize() scans it and infers modality per file.
+    logs_folder = "examples/resources/logs"
 
-    # Process each resource sequentially
+    # Process the whole folder in one incremental sync.
     print("\nProcessing files...")
     all_skills = []
     categories = []
 
-    for idx, (resource_file, modality) in enumerate(resources, 1):
-        if not os.path.exists(resource_file):
-            continue
-
-        try:
-            result = await service.memorize(resource_url=resource_file, modality=modality)
-
-            # Extract skill items
-            for item in result.get("items", []):
-                if item.get("memory_type") == "skill":
-                    all_skills.append({"skill": item.get("summary", ""), "source": os.path.basename(resource_file)})
-
-            # Categories are returned in the result and updated after each memorize call
-            categories = result.get("categories", [])
+    try:
+        result = await service.memorize(folder=logs_folder)
 
-            # Generate intermediate skill.md
-            await generate_skill_md(
-                all_skills=all_skills,
-                service=service,
-                output_file=f"examples/output/skill_example/log_{idx}.md",
-                attempt_number=idx,
-                total_attempts=len(resources),
-                categories=categories,
-            )
+        # Extract skill items
+        for item in result.get("items", []):
+            if item.get("memory_type") == "skill":
+                all_skills.append({"skill": item.get("summary", ""), "source": logs_folder})
 
-        except Exception as e:
-            print(f"Error: {e}")
+        categories = result.get("categories", [])
+    except Exception as e:
+        print(f"Error: {e}")
 
     # Generate final comprehensive skill.md
     await generate_skill_md(
         all_skills=all_skills,
         service=service,
         output_file="examples/output/skill_example/skill.md",
-        attempt_number=len(resources),
-        total_attempts=len(resources),
+        attempt_number=1,
+        total_attempts=1,
         categories=categories,
         is_final=True,
     )
 
-    print(f"\n✓ Processed {len(resources)} files, extracted {len(all_skills)} skills")
+    print(f"\n✓ Processed folder {logs_folder}, extracted {len(all_skills)} skills")
     print(f"✓ Generated {len(categories)} categories")
     print("✓ Output: examples/output/skill_example/")
 
diff --git a/examples/example_3_multimodal_memory.py b/examples/example_3_multimodal_memory.py
index 83aba74a..e07062ff 100644
--- a/examples/example_3_multimodal_memory.py
+++ b/examples/example_3_multimodal_memory.py
@@ -11,6 +11,7 @@
 
 import asyncio
 import os
+import shutil
 import sys
 
 from memu.app import MemoryService
@@ -98,28 +99,29 @@ async def main():
         memorize_config={"memory_categories": multimodal_categories},
     )
 
-    # Resources to process (file_path, modality)
-    resources = [
-        ("examples/resources/docs/doc1.txt", "document"),
-        ("examples/resources/docs/doc2.txt", "document"),
-        ("examples/resources/images/image1.png", "image"),
+    # Stage a mixed-modality folder; memorize() scans it and infers each file's
+    # modality from its extension (.txt -> document, .png -> image, ...).
+    source_files = [
+        "examples/resources/docs/doc1.txt",
+        "examples/resources/docs/doc2.txt",
+        "examples/resources/images/image1.png",
     ]
+    input_folder = "examples/output/multimodal_example_input"
+    os.makedirs(input_folder, exist_ok=True)
+    for src in source_files:
+        if os.path.exists(src):
+            shutil.copy(src, os.path.join(input_folder, os.path.basename(src)))
 
-    # Process each resource
+    # Process the whole folder in one incremental sync.
     print("\nProcessing resources...")
     total_items = 0
     categories = []
-    for resource_file, modality in resources:
-        if not os.path.exists(resource_file):
-            continue
-
-        try:
-            result = await service.memorize(resource_url=resource_file, modality=modality)
-            total_items += len(result.get("items", []))
-            # Categories are returned in the result and updated after each memorize call
-            categories = result.get("categories", [])
-        except Exception as e:
-            print(f"Error: {e}")
+    try:
+        result = await service.memorize(folder=input_folder)
+        total_items = len(result.get("items", []))
+        categories = result.get("categories", [])
+    except Exception as e:
+        print(f"Error: {e}")
 
     # Write to output files
     output_dir = "examples/output/multimodal_example"
@@ -128,7 +130,7 @@ async def main():
     # 1. Generate individual Markdown files for each category
     await generate_memory_md(categories, output_dir)
 
-    print(f"\n✓ Processed {len(resources)} files, extracted {total_items} items")
+    print(f"\n✓ Processed folder {input_folder}, extracted {total_items} items")
     print(f"✓ Generated {len(categories)} categories")
     print(f"✓ Output: {output_dir}/")
 
diff --git a/examples/example_4_openrouter_memory.py b/examples/example_4_openrouter_memory.py
index f5a8daa2..58306b14 100644
--- a/examples/example_4_openrouter_memory.py
+++ b/examples/example_4_openrouter_memory.py
@@ -75,35 +75,27 @@ async def main():
         },
     )
 
-    conversation_files = [
-        "examples/resources/conversations/conv1.json",
-        "examples/resources/conversations/conv2.json",
-        "examples/resources/conversations/conv3.json",
-    ]
+    # memorize() scans this folder and infers modality per file (.json -> conversation).
+    conversation_folder = "examples/resources/conversations"
 
     print("\nProcessing conversations...")
     total_items = 0
     categories = []
 
-    for conv_file in conversation_files:
-        if not os.path.exists(conv_file):
-            print(f"Skipped: {conv_file} not found")
-            continue
-
-        try:
-            print(f"Processing: {conv_file}")
-            result = await service.memorize(resource_url=conv_file, modality="conversation")
-            total_items += len(result.get("items", []))
-            categories = result.get("categories", [])
-        except Exception as e:
-            print(f"Error processing {conv_file}: {e}")
+    try:
+        print(f"Processing folder: {conversation_folder}")
+        result = await service.memorize(folder=conversation_folder)
+        total_items = len(result.get("items", []))
+        categories = result.get("categories", [])
+    except Exception as e:
+        print(f"Error processing {conversation_folder}: {e}")
 
     output_dir = "examples/output/openrouter_example"
     os.makedirs(output_dir, exist_ok=True)
 
     await generate_memory_md(categories, output_dir)
 
-    print(f"\nProcessed {len(conversation_files)} files, extracted {total_items} items")
+    print(f"\nProcessed folder {conversation_folder}, extracted {total_items} items")
     print(f"Generated {len(categories)} categories")
     print(f"Output: {output_dir}/")
 
diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py
index 3b300298..8e07c9a5 100644
--- a/examples/example_5_with_lazyllm_client.py
+++ b/examples/example_5_with_lazyllm_client.py
@@ -20,6 +20,7 @@
 
 import asyncio
 import os
+import shutil
 import sys
 from pathlib import Path
 
@@ -41,28 +42,19 @@ async def run_conversation_memory_demo(service):
     print("PART 1: Conversation Memory Processing")
     print("=" * 60)
 
-    conversation_files = [
-        "examples/resources/conversations/conv1.json",
-        "examples/resources/conversations/conv2.json",
-        "examples/resources/conversations/conv3.json",
-    ]
+    conversation_folder = "examples/resources/conversations"
 
     total_items = 0
     categories = []
 
-    for conv_file in conversation_files:
-        if not os.path.exists(conv_file):
-            print(f"⚠ File not found: {conv_file}")
-            continue
-
-        try:
-            print(f"  Processing: {conv_file}")
-            result = await service.memorize(resource_url=conv_file, modality="conversation")
-            total_items += len(result.get("items", []))
-            categories = result.get("categories", [])
-            print(f"    ✓ Extracted {len(result.get('items', []))} items")
-        except Exception as e:
-            print(f"  ✗ Error processing {conv_file}: {e}")
+    try:
+        print(f"  Processing folder: {conversation_folder}")
+        result = await service.memorize(folder=conversation_folder)
+        total_items = len(result.get("items", []))
+        categories = result.get("categories", [])
+        print(f"    ✓ Extracted {total_items} items")
+    except Exception as e:
+        print(f"  ✗ Error processing {conversation_folder}: {e}")
 
     # Output generation
     output_dir = "examples/output/lazyllm_example/conversation"
@@ -106,22 +98,18 @@ async def run_skill_extraction_demo(service):
     service.memorize_config.memory_types = ["skill"]
     service.memorize_config.memory_type_prompts = {"skill": skill_prompt}
 
-    logs = ["examples/resources/logs/log1.txt", "examples/resources/logs/log2.txt", "examples/resources/logs/log3.txt"]
+    logs_folder = "examples/resources/logs"
 
     all_skills = []
-    for log_file in logs:
-        if not os.path.exists(log_file):
-            continue
-
-        print(f"  Processing log: {log_file}")
-        try:
-            result = await service.memorize(resource_url=log_file, modality="document")
-            for item in result.get("items", []):
-                if item.get("memory_type") == "skill":
-                    all_skills.append(item.get("summary", ""))
-            print(f"    ✓ Extracted {len(result.get('items', []))} skills")
-        except Exception as e:
-            print(f"  ✗ Error: {e}")
+    print(f"  Processing logs folder: {logs_folder}")
+    try:
+        result = await service.memorize(folder=logs_folder)
+        for item in result.get("items", []):
+            if item.get("memory_type") == "skill":
+                all_skills.append(item.get("summary", ""))
+        print(f"    ✓ Extracted {len(result.get('items', []))} skills")
+    except Exception as e:
+        print(f"  ✗ Error: {e}")
 
     # Generate summary guide
     if all_skills:
@@ -158,23 +146,24 @@ async def run_multimodal_demo(service):
     service.memorize_config.memory_types = ["knowledge"]
     service.memorize_config.memory_type_prompts = {"knowledge": xml_prompt}
 
-    resources = [
-        ("examples/resources/docs/doc1.txt", "document"),
-        ("examples/resources/images/image1.png", "image"),
+    source_files = [
+        "examples/resources/docs/doc1.txt",
+        "examples/resources/images/image1.png",
     ]
+    input_folder = "examples/output/lazyllm_example/multimodal_input"
+    os.makedirs(input_folder, exist_ok=True)
+    for src in source_files:
+        if os.path.exists(src):
+            shutil.copy(src, os.path.join(input_folder, os.path.basename(src)))
 
     categories = []
-    for res_file, modality in resources:
-        if not os.path.exists(res_file):
-            continue
-
-        print(f"  Processing {modality}: {res_file}")
-        try:
-            result = await service.memorize(resource_url=res_file, modality=modality)
-            categories = result.get("categories", [])
-            print(f"    ✓ Extracted {len(result.get('items', []))} items")
-        except Exception as e:
-            print(f"  ✗ Error: {e}")
+    print(f"  Processing folder: {input_folder}")
+    try:
+        result = await service.memorize(folder=input_folder)
+        categories = result.get("categories", [])
+        print(f"    ✓ Extracted {len(result.get('items', []))} items")
+    except Exception as e:
+        print(f"  ✗ Error: {e}")
 
     output_dir = "examples/output/lazyllm_example/multimodal"
     os.makedirs(output_dir, exist_ok=True)
diff --git a/examples/example_memory_files.py b/examples/example_memory_files.py
new file mode 100644
index 00000000..07c9a075
--- /dev/null
+++ b/examples/example_memory_files.py
@@ -0,0 +1,98 @@
+"""
+Example: Memory File System (INDEX.md / MEMORY.md / skill/)
+
+This example demonstrates the `memu.memory_fs` export layer that is documented in
+docs/architecture.md. `memorize()` ingests a *folder*: it scans the directory,
+infers each file's modality by extension, diffs against a sidecar
+`.memu_manifest.json`, and incrementally syncs memory. The browsable markdown
+tree is always (re)built on every call, so it stays current automatically:
+
+    <output_dir>/
+    ├── INDEX.md                  ← index of the raw files under resource/
+    ├── MEMORY.md                 ← overall overview + index of memory/
+    ├── SKILL.md                  ← index/description of the skills under skill/
+    ├── resource/<file_name>      ← one copied raw source file
+    ├── memory/<slug>.md          ← one memory category (description + summary)
+    └── skill/<slug>/SKILL.md     ← one skill profile extracted during memorize
+
+Usage:
+    export OPENAI_API_KEY=your_api_key
+    python examples/example_memory_files.py
+"""
+
+import asyncio
+import os
+import pathlib
+import shutil
+
+from memu.app import MemoryService
+
+OUTPUT_DIR = "examples/output/memory_files_example"
+
+# Repo-bundled sample folder so the example runs without extra setup.
+SOURCE_FOLDER = "examples/resources/conversations"
+# Working copy memorize() syncs (so the input-side .memu_manifest.json is not
+# written into the tracked resources folder). memorize() scans this directory and
+# infers each file's modality from its extension.
+INPUT_FOLDER = "examples/output/memory_files_example_input"
+
+
+def print_tree(root: str) -> None:
+    """Print every generated artifact (relative path + its full content)."""
+    base = pathlib.Path(root)
+    if not base.exists():
+        print(f"(nothing written to {root})")
+        return
+    files = sorted(p for p in base.rglob("*") if p.is_file())
+    for path in files:
+        rel = path.relative_to(base)
+        print("\n" + "=" * 70)
+        print(f"# {rel}")
+        print("=" * 70)
+        # Skip dumping the sidecar manifest body; just note that it exists.
+        if path.name == ".memufs_manifest.json":
+            print("(diff-detection manifest)")
+            continue
+        print(path.read_text(encoding="utf-8").rstrip())
+
+
+async def main() -> None:
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        msg = "Please set OPENAI_API_KEY environment variable"
+        raise ValueError(msg)
+
+    # The memory file tree is always built/updated on every memorize(). Here we
+    # only override output_dir so the demo does not write to the default
+    # ./data/memory. MEMORY.md is rendered deterministically from category
+    # summaries by default (the skill/ tree is built from the skill-type memories
+    # extracted during memorize); set synthesize=True to synthesize MEMORY.md from
+    # descriptions instead.
+    service = MemoryService(
+        llm_profiles={
+            "default": {
+                "api_key": api_key,
+                "chat_model": "gpt-4o-mini",
+            },
+        },
+        memory_files_config={
+            "output_dir": OUTPUT_DIR,
+        },
+    )
+
+    if not os.path.isdir(SOURCE_FOLDER):
+        msg = f"Sample folder not found: {SOURCE_FOLDER}"
+        raise FileNotFoundError(msg)
+    shutil.copytree(SOURCE_FOLDER, INPUT_FOLDER, dirs_exist_ok=True)
+
+    print(f"Memorizing sample folder (tree initializes, then updates): {INPUT_FOLDER}")
+    result = await service.memorize(folder=INPUT_FOLDER)
+    print(f"  added={result['added']} modified={result['modified']} deleted={result['deleted']}")
+    print(f"  {len(result.get('items', []))} items extracted across {len(result['resources'])} files")
+
+    print("\nGenerated memory file tree:")
+    print_tree(OUTPUT_DIR)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/proactive/memory/local/memorize.py b/examples/proactive/memory/local/memorize.py
index 796a44bb..d63d1ec2 100644
--- a/examples/proactive/memory/local/memorize.py
+++ b/examples/proactive/memory/local/memorize.py
@@ -24,15 +24,18 @@ def dump_conversation_resource(
         ]
     }
     time_string = pendulum.now().format("YYYYMMDD_HHmmss")
-    resource_url = Path(__file__).parent / "data" / f"conv_{time_string}.json"
-    resource_url.parent.mkdir(parents=True, exist_ok=True)
+    data_dir = Path(__file__).parent / "data"
+    data_dir.mkdir(parents=True, exist_ok=True)
+    resource_url = data_dir / f"conv_{time_string}.json"
     with open(resource_url, "w") as f:
         json.dump(resource_data, f, indent=4, ensure_ascii=False)
-    return resource_url.as_posix()
+    return data_dir.as_posix()
 
 
 def memorize(conversation_messages: list[dict[str, Any]]) -> Awaitable[dict[str, Any]]:
     memory_service = get_memory_service()
 
-    resource_url = dump_conversation_resource(conversation_messages)
-    return memory_service.memorize(resource_url=resource_url, modality="conversation", user={"user_id": USER_ID})
+    # Append the new conversation to the data folder, then incrementally sync the
+    # whole folder; the input manifest ensures only the new file is processed.
+    data_folder = dump_conversation_resource(conversation_messages)
+    return memory_service.memorize(folder=data_folder, user={"user_id": USER_ID})
diff --git a/examples/sealos-assistant/main.py b/examples/sealos-assistant/main.py
index 64fde9b1..52852793 100644
--- a/examples/sealos-assistant/main.py
+++ b/examples/sealos-assistant/main.py
@@ -190,21 +190,19 @@ async def chat(request: ChatRequest):
         else:
             response_text = f"I received your message: '{request.message}'. I don't have any relevant memories yet."
 
-        # Store the conversation
+        # Store the conversation (memorize() ingests a folder).
+        import shutil
         import tempfile
 
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
+        temp_dir = tempfile.mkdtemp(prefix="memu_chat_")
+        with open(os.path.join(temp_dir, "message.txt"), "w", encoding="utf-8") as f:
             f.write(f"User ({request.user_id}) said: {request.message}")
-            temp_file = f.name
 
         try:
-            memorize_result = await memory_service.memorize(
-                resource_url=temp_file,
-                modality="text",
-            )
+            memorize_result = await memory_service.memorize(folder=temp_dir)
             memories_stored = len(memorize_result.get("items", []))
         finally:
-            os.unlink(temp_file)
+            shutil.rmtree(temp_dir, ignore_errors=True)
 
         return ChatResponse(
             response=response_text,
@@ -223,24 +221,22 @@ async def memorize(request: MemorizeRequest):
         raise HTTPException(status_code=503, detail="Memory service not initialized")
 
     try:
+        import shutil
         import tempfile
 
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
+        temp_dir = tempfile.mkdtemp(prefix="memu_memorize_")
+        with open(os.path.join(temp_dir, "content.txt"), "w", encoding="utf-8") as f:
             f.write(f"[User: {request.user_id}] {request.content}")
-            temp_file = f.name
 
         try:
-            result = await memory_service.memorize(
-                resource_url=temp_file,
-                modality="text",
-            )
+            result = await memory_service.memorize(folder=temp_dir)
             return MemorizeResponse(
                 status="stored",
                 items_created=len(result.get("items", [])),
                 categories=len(result.get("categories", [])),
             )
         finally:
-            os.unlink(temp_file)
+            shutil.rmtree(temp_dir, ignore_errors=True)
 
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Memorize failed: {e!s}")
diff --git a/examples/test_nebius_provider.py b/examples/test_nebius_provider.py
index 5df5e59a..58c30d67 100644
--- a/examples/test_nebius_provider.py
+++ b/examples/test_nebius_provider.py
@@ -138,19 +138,17 @@ async def test_memu_with_nebius():
         service = MemoryService(llm_profiles=llm_profiles)
         print("  ✓ MemoryService initialized with Nebius!")
 
-        # Test memorize with a file (create temp file)
+        # Test memorize with a folder (memorize() ingests a directory).
         print("\n  Testing memorize...")
+        import shutil
         import tempfile
 
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
+        temp_dir = tempfile.mkdtemp(prefix="memu_nebius_")
+        with open(os.path.join(temp_dir, "note.txt"), "w", encoding="utf-8") as f:
             f.write("User likes Python programming and AI development. They prefer dark mode in their IDE.")
-            temp_file = f.name
 
         try:
-            result = await service.memorize(
-                resource_url=temp_file,
-                modality="text",
-            )
+            result = await service.memorize(folder=temp_dir)
             items_count = len(result.get("items", []))
             categories_count = len(result.get("categories", []))
             print(f"  ✓ Memorized! Items: {items_count}, Categories: {categories_count}")
@@ -160,7 +158,7 @@ async def test_memu_with_nebius():
                 summary = item.get("summary", "")[:80]
                 print(f"    - {summary}...")
         finally:
-            os.unlink(temp_file)
+            shutil.rmtree(temp_dir, ignore_errors=True)
 
         # Test retrieve
         print("\n  Testing retrieve...")
diff --git a/pyproject.toml b/pyproject.toml
index c7858ec9..3c9e1896 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,13 +30,11 @@ dependencies = [
 ]
 
 [build-system]
-requires = ["maturin>=1.0,<2.0"]
-build-backend = "maturin"
+requires = ["hatchling"]
+build-backend = "hatchling.build"
 
-[tool.maturin]
-module-name = "memu._core"
-python-packages = ["memu"]
-python-source = "src"
+[tool.hatch.build.targets.wheel]
+packages = ["src/memu"]
 
 [dependency-groups]
 dev = [
diff --git a/src/lib.rs b/src/lib.rs
deleted file mode 100644
index c45f563d..00000000
--- a/src/lib.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-use pyo3::prelude::*;
-
-#[pyfunction]
-fn hello_from_bin() -> String {
-    "Hello from memu!".to_string()
-}
-
-/// A Python module implemented in Rust. The name of this function must match
-/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
-/// import the module.
-#[pymodule]
-fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_function(wrap_pyfunction!(hello_from_bin, m)?)?;
-    Ok(())
-}
diff --git a/src/memu/__init__.py b/src/memu/__init__.py
index 38772b8b..a30ada2b 100644
--- a/src/memu/__init__.py
+++ b/src/memu/__init__.py
@@ -1,9 +1,6 @@
-from memu._core import hello_from_bin
 from memu.app.service import MemoryService
 
 # Public alias used in documentation examples
 MemUService = MemoryService
 
-
-def _rust_entry() -> str:
-    return hello_from_bin()
+__all__ = ["MemUService", "MemoryService"]
diff --git a/src/memu/app/crud.py b/src/memu/app/crud.py
index 50d63d4c..40af9f4d 100644
--- a/src/memu/app/crud.py
+++ b/src/memu/app/crud.py
@@ -650,7 +650,7 @@ def _map_category_names_to_ids(self, names: list[str], ctx: Context) -> list[str
 
     async def _patch_category_summaries(
         self,
-        updates: dict[str, list[str]],
+        updates: dict[str, tuple[str | None, str | None]],
         ctx: Context,
         store: Database,
         llm_client: Any | None = None,
diff --git a/src/memu/app/memorize.py b/src/memu/app/memorize.py
index 085af8aa..d31a2aeb 100644
--- a/src/memu/app/memorize.py
+++ b/src/memu/app/memorize.py
@@ -1,18 +1,16 @@
 from __future__ import annotations
 
 import asyncio
-import json
 import logging
 import pathlib
 import re
 from collections.abc import Awaitable, Callable, Mapping, Sequence
 from typing import TYPE_CHECKING, Any, cast
-from xml.etree.ElementTree import Element
 
-import defusedxml.ElementTree as ET
 from pydantic import BaseModel
 
 from memu.app.settings import CategoryConfig, CustomPrompt
+from memu.blob.folder import diff_folder, load_manifest, manifest_from_scan, save_manifest, scan_folder
 from memu.database.models import CategoryItem, MemoryCategory, MemoryItem, MemoryType, Resource
 from memu.prompts.category_summary import (
     CUSTOM_PROMPT as CATEGORY_SUMMARY_CUSTOM_PROMPT,
@@ -57,6 +55,7 @@ class MemorizeMixin:
         _get_step_llm_client: Callable[[Mapping[str, Any] | None], Any]
         _get_step_embedding_client: Callable[[Mapping[str, Any] | None], Any]
         _get_llm_client: Callable[..., Any]
+        _get_embedding_client: Callable[..., Any]
         _model_dump_without_embeddings: Callable[[BaseModel], dict[str, Any]]
         _extract_json_blob: Callable[[str], str]
         _escape_prompt_value: Callable[[str], str]
@@ -64,20 +63,98 @@ class MemorizeMixin:
         memory_files_config: Any
         _build_memory_files: Callable[..., Awaitable[dict[str, Any]]]
 
+        # Provided by CRUDMixin (composed onto MemoryService).
+        async def _patch_category_summaries(
+            self,
+            updates: dict[str, tuple[str | None, str | None]],
+            ctx: Context,
+            store: Database,
+            llm_client: Any | None = None,
+        ) -> None: ...
+        # Provided by MemorizeParseMixin (composed onto MemoryService).
+        _parse_memory_type_response_xml: Callable[[str], list[dict[str, Any]]]
+        _parse_conversation_preprocess_with_segments: Callable[
+            [str, str], tuple[str | None, list[dict[str, int | str]] | None]
+        ]
+        _parse_multimodal_response: Callable[[str, str, str], tuple[str | None, str | None]]
+
     async def memorize(
         self,
         *,
-        resource_url: str,
-        modality: str,
+        folder: str,
         user: dict[str, Any] | None = None,
     ) -> dict[str, Any]:
+        """Sync a folder into memory by diffing against an input-side manifest.
+
+        Scans ``folder`` recursively, infers each file's modality by extension
+        (unsupported extensions are skipped), and diffs against the sidecar
+        ``.memu_manifest.json`` to find added/modified/deleted files. Modified and
+        deleted files have their previously extracted memory cascade-deleted (with
+        affected category summaries recomputed); added and modified files are
+        (re)memorized. The memory file tree is refreshed and the manifest rewritten.
+        """
         ctx = self._get_context()
         store = self._get_database()
         user_scope = self.user_model(**user).model_dump() if user is not None else None
         await self._ensure_categories_ready(ctx, store, user_scope)
 
-        memory_types = self._resolve_memory_types()
+        root = pathlib.Path(folder).resolve()
+        scanned = scan_folder(root)
+        manifest = load_manifest(root)
+        diff = diff_folder(scanned, manifest)
+
+        # 1. Cascade-delete memory for files that were modified or removed.
+        stale_urls = {sf.abs_path for sf in diff.modified}
+        stale_urls.update(str(root / rel) for rel in diff.deleted)
+        removed_resources = await self._cascade_delete_by_urls(stale_urls, ctx=ctx, store=store, user_scope=user_scope)
+
+        # 2. (Re)memorize added and modified files; each file maps to one Resource.
+        changed_resources: list[Resource] = []
+        items: list[dict[str, Any]] = []
+        categories: list[dict[str, Any]] = []
+        for scanned_file in [*diff.added, *diff.modified]:
+            result = await self._memorize_one(
+                resource_url=scanned_file.abs_path,
+                modality=scanned_file.modality,
+                user_scope=user_scope,
+                ctx=ctx,
+                store=store,
+            )
+            changed_resources.extend(cast("list[Resource]", result.get("resources") or []))
+            response = cast("dict[str, Any]", result.get("response") or {})
+            items.extend(response.get("items", []))
+            # Categories reflect the cumulative scoped state, so the latest wins.
+            if response.get("categories"):
+                categories = response["categories"]
+
+        # 3. Refresh the memory file tree (full rebuild when anything was removed).
+        await self._update_memory_files(changed_resources, user_scope, force_full=diff.has_removals)
 
+        # 4. Persist the updated input manifest.
+        save_manifest(root, manifest_from_scan(scanned))
+
+        return {
+            "folder": str(root),
+            "added": [sf.rel_path for sf in diff.added],
+            "modified": [sf.rel_path for sf in diff.modified],
+            "deleted": list(diff.deleted),
+            "resources": [self._model_dump_without_embeddings(r) for r in changed_resources],
+            "removed_resources": [self._model_dump_without_embeddings(r) for r in removed_resources],
+            "items": items,
+            "categories": categories,
+        }
+
+    async def _memorize_one(
+        self,
+        *,
+        resource_url: str,
+        modality: str,
+        user_scope: dict[str, Any] | None,
+        ctx: Context,
+        store: Database,
+    ) -> WorkflowState:
+        """Run the memorize workflow for a single file (one file -> one Resource)."""
+        memory_types = self._resolve_memory_types()
         state: WorkflowState = {
             "resource_url": resource_url,
             "modality": modality,
@@ -88,36 +165,74 @@ async def memorize(
             "category_ids": list(ctx.category_ids),
             "user": user_scope,
         }
-
         result = await self._run_workflow("memorize", state)
-        response = cast(dict[str, Any] | None, result.get("response"))
-        if response is None:
+        if result.get("response") is None:
             msg = "Memorize workflow failed to produce a response"
             raise RuntimeError(msg)
+        return result
 
-        await self._maybe_update_memory_files(result, user_scope)
-        return response
+    async def _cascade_delete_by_urls(
+        self,
+        urls: set[str],
+        *,
+        ctx: Context,
+        store: Database,
+        user_scope: dict[str, Any] | None,
+    ) -> list[Resource]:
+        """Delete resources (and their items/relations) whose url is in ``urls``.
+
+        Affected category summaries are recomputed so the structured memory and the
+        rendered file tree stay consistent after a source file is changed/removed.
+        """
+        if not urls:
+            return []
+        where = user_scope or None
+        targets = [res for res in store.resource_repo.list_resources(where=where).values() if res.url in urls]
+        if not targets:
+            return []
+        target_ids = {res.id for res in targets}
 
-    async def _maybe_update_memory_files(
-        self, result: WorkflowState, user_scope: dict[str, Any] | None
+        # Discarded item summaries per category, used to recompute summaries.
+        category_discards: dict[str, list[str]] = {}
+        for item in store.memory_item_repo.list_items(where=where).values():
+            if item.resource_id not in target_ids:
+                continue
+            for relation in store.category_item_repo.get_item_categories(item.id):
+                store.category_item_repo.unlink_item_category(item.id, relation.category_id)
+                category_discards.setdefault(relation.category_id, []).append(item.summary)
+            store.memory_item_repo.delete_item(item.id)
+
+        for res in targets:
+            store.resource_repo.delete_resource(res.id)
+
+        updates: dict[str, tuple[str | None, str | None]] = {
+            cid: ("\n".join(s for s in summaries if s and s.strip()), None)
+            for cid, summaries in category_discards.items()
+            if any(s and s.strip() for s in summaries)
+        }
+        if updates:
+            await self._patch_category_summaries(updates, ctx=ctx, store=store, llm_client=self._get_llm_client())
+        return targets
+
+    async def _update_memory_files(
+        self,
+        changed_resources: list[Resource],
+        user_scope: dict[str, Any] | None,
+        *,
+        force_full: bool = False,
     ) -> None:
-        """Drive the memory file tree from a memorize call (init or incremental update).
-
-        Gated behind ``memory_files_config.enabled`` and ``update_on_memorize`` so the
-        default memorize behavior is unchanged. The just-created resources are the
-        "changed part of the file system" that an existing tree is updated against;
-        if no tree exists yet, ``_build_memory_files`` initializes it from the full
-        scoped store instead. Failures are best-effort: the memory is already
-        persisted, so an export error must not fail memorize.
+        """Drive the memory file tree after a folder sync (init or incremental).
+
+        When any file was modified or deleted (``force_full``), the tree is rebuilt
+        from the full scoped store so stale skills/entries do not linger. Otherwise
+        an incremental update merges the just-created resources. Failures are
+        best-effort: the structured memory is already persisted, so an export error
+        must not fail memorize.
         """
-        cfg = self.memory_files_config
-        if not (getattr(cfg, "enabled", False) and getattr(cfg, "update_on_memorize", False)):
-            return
-        changed = cast("list[Resource]", result.get("resources") or [])
-        if not changed:
+        if not changed_resources and not force_full:
             return
         try:
-            await self._build_memory_files(user_scope, changed=changed)
+            await self._build_memory_files(user_scope, changed=None if force_full else changed_resources)
         except Exception:
             logger.exception("Memory file export failed after memorize")
 
@@ -226,31 +341,37 @@ async def _memorize_preprocess_multimodal(self, state: WorkflowState, step_conte
     async def _memorize_extract_items(self, state: WorkflowState, step_context: Any) -> WorkflowState:
         llm_client = self._get_step_llm_client(step_context)
         preprocessed_resources = state.get("preprocessed_resources", [])
-        resource_plans: list[dict[str, Any]] = []
-        total_segments = len(preprocessed_resources) or 1
 
-        for idx, prep in enumerate(preprocessed_resources):
-            res_url = self._segment_resource_url(state["resource_url"], idx, total_segments)
+        # A single input file maps to a single Resource: segmentation (e.g. for
+        # conversations) is only an internal preprocessing/extraction detail, so
+        # all segment entries and captions are aggregated into one resource plan.
+        all_entries: list[tuple[MemoryType, str, list[str]]] = []
+        captions: list[str] = []
+        for prep in preprocessed_resources:
             text = prep.get("text")
             caption = prep.get("caption")
+            if caption and caption.strip():
+                captions.append(caption.strip())
 
             structured_entries = await self._generate_structured_entries(
-                resource_url=res_url,
+                resource_url=state["resource_url"],
                 modality=state["modality"],
                 memory_types=state["memory_types"],
                 text=text,
                 categories_prompt_str=state["categories_prompt_str"],
                 llm_client=llm_client,
             )
-
-            resource_plans.append({
-                "resource_url": res_url,
-                "text": text,
-                "caption": caption,
-                "entries": structured_entries,
-            })
-
-        state["resource_plans"] = resource_plans
+            all_entries.extend(structured_entries)
+
+        combined_caption = " ".join(captions) if captions else None
+        state["resource_plans"] = [
+            {
+                "resource_url": state["resource_url"],
+                "text": None,
+                "caption": combined_caption,
+                "entries": all_entries,
+            }
+        ]
         return state
 
     def _memorize_dedupe_merge(self, state: WorkflowState, step_context: Any) -> WorkflowState:
@@ -351,12 +472,6 @@ def _memorize_build_response(self, state: WorkflowState, step_context: Any) -> W
         state["response"] = response
         return state
 
-    def _segment_resource_url(self, base_url: str, idx: int, total_segments: int) -> str:
-        if total_segments <= 1:
-            return base_url
-        path = pathlib.Path(base_url)
-        return f"{path.stem}_#segment_{idx}{path.suffix}"
-
     async def _fetch_and_preprocess_resource(
         self, resource_url: str, modality: str, llm_client: Any | None = None
     ) -> tuple[str, list[dict[str, str | None]]]:
@@ -389,7 +504,7 @@ async def _create_resource_with_caption(
     ) -> Resource:
         caption_text = caption.strip() if caption else None
         if caption_text:
-            client = embed_client or self._get_llm_client()
+            client = embed_client or self._get_embedding_client()
             caption_embedding = (await client.embed([caption_text]))[0]
         else:
             caption_embedding = None
@@ -620,7 +735,7 @@ async def _persist_memory_items(
             where category_updates maps category_id -> list of (item_id, summary) tuples
         """
         summary_payloads = [content for _, content, _ in structured_entries]
-        client = embed_client or self._get_llm_client()
+        client = embed_client or self._get_embedding_client()
         item_embeddings = await client.embed(summary_payloads) if summary_payloads else []
         items: list[MemoryItem] = []
         rels: list[CategoryItem] = []
@@ -718,7 +833,7 @@ async def _initialize_categories(
         embed_map: dict[int, list[float]] = {}
         if needs_embed:
             texts = [self._category_embedding_text(cfg) for _, cfg in needs_embed]
-            vecs = await self._get_llm_client("embedding").embed(texts)
+            vecs = await self._get_embedding_client("embedding").embed(texts)
             for (i, _), vec in zip(needs_embed, vecs, strict=True):
                 embed_map[i] = vec
 
@@ -1218,202 +1333,3 @@ async def _update_category_summaries(
             )
             updated_summaries[cid] = cleaned_summary
         return updated_summaries
-
-    def _parse_conversation_preprocess(self, raw: str) -> tuple[str | None, str | None]:
-        conversation = self._extract_tag_content(raw, "conversation")
-        summary = self._extract_tag_content(raw, "summary")
-        return conversation, summary
-
-    def _parse_multimodal_response(self, raw: str, content_tag: str, caption_tag: str) -> tuple[str | None, str | None]:
-        """
-        Parse multimodal preprocessing response (video, image, document, audio).
-        Extracts content and caption from XML-like tags.
-
-        Args:
-            raw: Raw LLM response
-            content_tag: Tag name for main content (e.g., "detailed_description", "processed_content")
-            caption_tag: Tag name for caption (typically "caption")
-
-        Returns:
-            Tuple of (content, caption)
-        """
-        content = self._extract_tag_content(raw, content_tag)
-        caption = self._extract_tag_content(raw, caption_tag)
-
-        # Fallback: if no tags found, try to use raw response as content
-        if not content:
-            content = raw.strip()
-
-        # Fallback for caption: use first sentence of content if no caption found
-        if not caption and content:
-            first_sentence = content.split(".")[0]
-            caption = first_sentence if len(first_sentence) <= 200 else first_sentence[:200]
-
-        return content, caption
-
-    def _parse_conversation_preprocess_with_segments(
-        self, raw: str, original_text: str
-    ) -> tuple[str | None, list[dict[str, int | str]] | None]:
-        """
-        Parse conversation preprocess response and extract segments.
-        Returns: (conversation_text, segments)
-        """
-        conversation = self._extract_tag_content(raw, "conversation")
-        segments = self._extract_segments_with_fallback(raw)
-        return conversation, segments
-
-    def _extract_segments_with_fallback(self, raw: str) -> list[dict[str, int | str]] | None:
-        segments = self._segments_from_json_payload(raw)
-        if segments is not None:
-            return segments
-        try:
-            blob = self._extract_json_blob(raw)
-        except Exception:
-            logging.exception("Failed to extract segments from conversation preprocess response")
-            return None
-        return self._segments_from_json_payload(blob)
-
-    def _segments_from_json_payload(self, payload: str) -> list[dict[str, int | str]] | None:
-        try:
-            parsed = json.loads(payload)
-        except (json.JSONDecodeError, TypeError):
-            return None
-        return self._segments_from_parsed_data(parsed)
-
-    @staticmethod
-    def _segments_from_parsed_data(parsed: Any) -> list[dict[str, int | str]] | None:
-        if not isinstance(parsed, dict):
-            return None
-        segments_data = parsed.get("segments")
-        if not isinstance(segments_data, list):
-            return None
-        segments: list[dict[str, int | str]] = []
-        for seg in segments_data:
-            if isinstance(seg, dict) and "start" in seg and "end" in seg:
-                try:
-                    segment: dict[str, int | str] = {
-                        "start": int(seg["start"]),
-                        "end": int(seg["end"]),
-                    }
-                    if "caption" in seg and isinstance(seg["caption"], str):
-                        segment["caption"] = seg["caption"]
-                    segments.append(segment)
-                except (TypeError, ValueError):
-                    continue
-        return segments or None
-
-    @staticmethod
-    def _extract_tag_content(raw: str, tag: str) -> str | None:
-        pattern = re.compile(rf"<{tag}>(.*?)</{tag}>", re.IGNORECASE | re.DOTALL)
-        match = pattern.search(raw)
-        if not match:
-            return None
-        content = match.group(1).strip()
-        return content or None
-
-    def _parse_memory_type_response(self, raw: str) -> list[dict[str, Any]]:
-        if not raw:
-            return []
-        raw = raw.strip()
-        if not raw:
-            return []
-        payload = None
-        try:
-            payload = json.loads(raw)
-        except json.JSONDecodeError:
-            try:
-                blob = self._extract_json_blob(raw)
-                payload = json.loads(blob)
-            except Exception:
-                return []
-        if not isinstance(payload, dict):
-            return []
-        items = payload.get("memories_items")
-        if not isinstance(items, list):
-            return []
-        normalized: list[dict[str, Any]] = []
-        for entry in items:
-            if not isinstance(entry, dict):
-                continue
-            normalized.append(entry)
-        return normalized
-
-    def _find_xml_boundaries(self, raw: str) -> tuple[int, int, str] | None:
-        """Find the start index, end index, and closing tag for XML root element."""
-        root_tags = ["item", "profile", "behaviors", "events", "knowledge", "skills"]
-        for tag in root_tags:
-            opening = f"<{tag}>"
-            closing = f"</{tag}>"
-            start_idx = raw.find(opening)
-            if start_idx != -1:
-                end_idx = raw.rfind(closing)
-                if end_idx != -1:
-                    return (start_idx, end_idx, closing)
-        return None
-
-    def _parse_memory_element(self, memory_elem: Element) -> dict[str, Any] | None:
-        """Parse a single memory XML element into a dict."""
-        memory_dict: dict[str, Any] = {}
-
-        content_elem = memory_elem.find("content")
-        if content_elem is not None and content_elem.text:
-            memory_dict["content"] = content_elem.text.strip()
-
-        categories_elem = memory_elem.find("categories")
-        if categories_elem is not None:
-            categories = [cat_elem.text.strip() for cat_elem in categories_elem.findall("category") if cat_elem.text]
-            memory_dict["categories"] = categories
-
-        if memory_dict.get("content") and memory_dict.get("categories"):
-            return memory_dict
-        return None
-
-    def _parse_memory_type_response_xml(self, raw: str) -> list[dict[str, Any]]:
-        """
-        Parse XML memory extraction output into a list of memory items.
-
-        Expected XML format (root tag varies by memory type):
-        <profile|behaviors|events|knowledge|skills>
-            <memory>
-                <content>...</content>
-                <categories>
-                    <category>...</category>
-                </categories>
-            </memory>
-        </...>
-        """
-        if not raw or not raw.strip():
-            return []
-        raw = raw.strip()
-
-        try:
-            boundaries = self._find_xml_boundaries(raw)
-            if boundaries is None:
-                logger.warning("Could not find valid root tag in XML response")
-                return []
-
-            start_idx, end_idx, end_tag = boundaries
-            xml_content = raw[start_idx : end_idx + len(end_tag)]
-            xml_content = xml_content.replace("&", "&amp;")
-
-            try:
-                root = ET.fromstring(xml_content)
-            except ET.ParseError:
-                # Some LLMs emit one <item> per memory rather than a single root
-                # element wrapping all memories, resulting in "junk after document
-                # element" when the slice contains multiple top-level tags.  Wrap
-                # the content in a synthetic root element and retry.
-                root = ET.fromstring(f"<_root_>{xml_content}</_root_>")
-
-            result: list[dict[str, Any]] = []
-
-            for memory_elem in root.iter("memory"):
-                parsed = self._parse_memory_element(memory_elem)
-                if parsed:
-                    result.append(parsed)
-
-        except ET.ParseError:
-            logger.exception("Failed to parse XML")
-            return []
-        else:
-            return result
diff --git a/src/memu/app/memorize_parse.py b/src/memu/app/memorize_parse.py
new file mode 100644
index 00000000..cca46452
--- /dev/null
+++ b/src/memu/app/memorize_parse.py
@@ -0,0 +1,225 @@
+from __future__ import annotations
+
+import json
+import logging
+import re
+from typing import TYPE_CHECKING, Any
+from xml.etree.ElementTree import Element
+
+import defusedxml.ElementTree as ET
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+
+class MemorizeParseMixin:
+    """Stateless parsers for memorize LLM outputs (XML/JSON/tagged text).
+
+    Split out of :class:`MemorizeMixin` to keep the memorize flow focused on
+    orchestration. Composed onto ``MemoryService``; the only external dependency
+    (``_extract_json_blob``) is resolved at runtime via the service instance.
+    """
+
+    if TYPE_CHECKING:
+        _extract_json_blob: Callable[[str], str]
+
+    def _parse_conversation_preprocess(self, raw: str) -> tuple[str | None, str | None]:
+        conversation = self._extract_tag_content(raw, "conversation")
+        summary = self._extract_tag_content(raw, "summary")
+        return conversation, summary
+
+    def _parse_multimodal_response(self, raw: str, content_tag: str, caption_tag: str) -> tuple[str | None, str | None]:
+        """
+        Parse multimodal preprocessing response (video, image, document, audio).
+        Extracts content and caption from XML-like tags.
+
+        Args:
+            raw: Raw LLM response
+            content_tag: Tag name for main content (e.g., "detailed_description", "processed_content")
+            caption_tag: Tag name for caption (typically "caption")
+
+        Returns:
+            Tuple of (content, caption)
+        """
+        content = self._extract_tag_content(raw, content_tag)
+        caption = self._extract_tag_content(raw, caption_tag)
+
+        # Fallback: if no tags found, try to use raw response as content
+        if not content:
+            content = raw.strip()
+
+        # Fallback for caption: use first sentence of content if no caption found
+        if not caption and content:
+            first_sentence = content.split(".")[0]
+            caption = first_sentence if len(first_sentence) <= 200 else first_sentence[:200]
+
+        return content, caption
+
+    def _parse_conversation_preprocess_with_segments(
+        self, raw: str, original_text: str
+    ) -> tuple[str | None, list[dict[str, int | str]] | None]:
+        """
+        Parse conversation preprocess response and extract segments.
+        Returns: (conversation_text, segments)
+        """
+        conversation = self._extract_tag_content(raw, "conversation")
+        segments = self._extract_segments_with_fallback(raw)
+        return conversation, segments
+
+    def _extract_segments_with_fallback(self, raw: str) -> list[dict[str, int | str]] | None:
+        segments = self._segments_from_json_payload(raw)
+        if segments is not None:
+            return segments
+        try:
+            blob = self._extract_json_blob(raw)
+        except Exception:
+            logging.exception("Failed to extract segments from conversation preprocess response")
+            return None
+        return self._segments_from_json_payload(blob)
+
+    def _segments_from_json_payload(self, payload: str) -> list[dict[str, int | str]] | None:
+        try:
+            parsed = json.loads(payload)
+        except (json.JSONDecodeError, TypeError):
+            return None
+        return self._segments_from_parsed_data(parsed)
+
+    @staticmethod
+    def _segments_from_parsed_data(parsed: Any) -> list[dict[str, int | str]] | None:
+        if not isinstance(parsed, dict):
+            return None
+        segments_data = parsed.get("segments")
+        if not isinstance(segments_data, list):
+            return None
+        segments: list[dict[str, int | str]] = []
+        for seg in segments_data:
+            if isinstance(seg, dict) and "start" in seg and "end" in seg:
+                try:
+                    segment: dict[str, int | str] = {
+                        "start": int(seg["start"]),
+                        "end": int(seg["end"]),
+                    }
+                    if "caption" in seg and isinstance(seg["caption"], str):
+                        segment["caption"] = seg["caption"]
+                    segments.append(segment)
+                except (TypeError, ValueError):
+                    continue
+        return segments or None
+
+    @staticmethod
+    def _extract_tag_content(raw: str, tag: str) -> str | None:
+        pattern = re.compile(rf"<{tag}>(.*?)</{tag}>", re.IGNORECASE | re.DOTALL)
+        match = pattern.search(raw)
+        if not match:
+            return None
+        content = match.group(1).strip()
+        return content or None
+
+    def _parse_memory_type_response(self, raw: str) -> list[dict[str, Any]]:
+        if not raw:
+            return []
+        raw = raw.strip()
+        if not raw:
+            return []
+        payload = None
+        try:
+            payload = json.loads(raw)
+        except json.JSONDecodeError:
+            try:
+                blob = self._extract_json_blob(raw)
+                payload = json.loads(blob)
+            except Exception:
+                return []
+        if not isinstance(payload, dict):
+            return []
+        items = payload.get("memories_items")
+        if not isinstance(items, list):
+            return []
+        normalized: list[dict[str, Any]] = []
+        for entry in items:
+            if not isinstance(entry, dict):
+                continue
+            normalized.append(entry)
+        return normalized
+
+    def _find_xml_boundaries(self, raw: str) -> tuple[int, int, str] | None:
+        """Find the start index, end index, and closing tag for XML root element."""
+        root_tags = ["item", "profile", "behaviors", "events", "knowledge", "skills"]
+        for tag in root_tags:
+            opening = f"<{tag}>"
+            closing = f"</{tag}>"
+            start_idx = raw.find(opening)
+            if start_idx != -1:
+                end_idx = raw.rfind(closing)
+                if end_idx != -1:
+                    return (start_idx, end_idx, closing)
+        return None
+
+    def _parse_memory_element(self, memory_elem: Element) -> dict[str, Any] | None:
+        """Parse a single memory XML element into a dict."""
+        memory_dict: dict[str, Any] = {}
+
+        content_elem = memory_elem.find("content")
+        if content_elem is not None and content_elem.text:
+            memory_dict["content"] = content_elem.text.strip()
+
+        categories_elem = memory_elem.find("categories")
+        if categories_elem is not None:
+            categories = [cat_elem.text.strip() for cat_elem in categories_elem.findall("category") if cat_elem.text]
+            memory_dict["categories"] = categories
+
+        if memory_dict.get("content") and memory_dict.get("categories"):
+            return memory_dict
+        return None
+
+    def _parse_memory_type_response_xml(self, raw: str) -> list[dict[str, Any]]:
+        """
+        Parse XML memory extraction output into a list of memory items.
+
+        Expected XML format (root tag varies by memory type):
+        <profile|behaviors|events|knowledge|skills>
+            <memory>
+                <content>...</content>
+                <categories>
+                    <category>...</category>
+                </categories>
+            </memory>
+        </...>
+        """
+        if not raw or not raw.strip():
+            return []
+        raw = raw.strip()
+
+        try:
+            boundaries = self._find_xml_boundaries(raw)
+            if boundaries is None:
+                logger.warning("Could not find valid root tag in XML response")
+                return []
+
+            start_idx, end_idx, end_tag = boundaries
+            xml_content = raw[start_idx : end_idx + len(end_tag)]
+            xml_content = xml_content.replace("&", "&amp;")
+
+            try:
+                root = ET.fromstring(xml_content)
+            except ET.ParseError:
+                # Some LLMs emit one <item> per memory rather than a single root
+                # element wrapping all memories, resulting in "junk after document
+                # element" when the slice contains multiple top-level tags.  Wrap
+                # the content in a synthetic root element and retry.
+                root = ET.fromstring(f"<_root_>{xml_content}</_root_>")
+
+            result: list[dict[str, Any]] = []
+
+            for memory_elem in root.iter("memory"):
+                parsed = self._parse_memory_element(memory_elem)
+                if parsed:
+                    result.append(parsed)
+
+        except ET.ParseError:
+            logger.exception("Failed to parse XML")
+            return []
+        else:
+            return result
diff --git a/src/memu/app/patch.py b/src/memu/app/patch.py
index c1796478..0b76063f 100644
--- a/src/memu/app/patch.py
+++ b/src/memu/app/patch.py
@@ -28,6 +28,7 @@ class PatchMixin:
         _get_database: Callable[[], Database]
         _get_step_llm_client: Callable[[Mapping[str, Any] | None], Any]
         _get_llm_client: Callable[..., Any]
+        _get_embedding_client: Callable[..., Any]
         _model_dump_without_embeddings: Callable[[BaseModel], dict[str, Any]]
         _extract_json_blob: Callable[[str], str]
         _escape_prompt_value: Callable[[str], str]
@@ -267,7 +268,7 @@ async def _patch_create_memory_item(self, state: WorkflowState, step_context: An
         category_memory_updates: dict[str, tuple[Any, Any]] = {}
 
         embed_payload = [memory_payload["content"]]
-        content_embedding = (await self._get_llm_client().embed(embed_payload))[0]
+        content_embedding = (await self._get_embedding_client().embed(embed_payload))[0]
 
         item = store.memory_item_repo.create_item(
             memory_type=memory_payload["type"],
@@ -307,7 +308,7 @@ async def _patch_update_memory_item(self, state: WorkflowState, step_context: An
 
         if memory_payload["content"]:
             embed_payload = [memory_payload["content"]]
-            content_embedding = (await self._get_llm_client().embed(embed_payload))[0]
+            content_embedding = (await self._get_embedding_client().embed(embed_payload))[0]
         else:
             content_embedding = None
 
diff --git a/src/memu/app/retrieve.py b/src/memu/app/retrieve.py
index a7cbff5c..b5af1bc8 100644
--- a/src/memu/app/retrieve.py
+++ b/src/memu/app/retrieve.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import json
 import logging
 import re
 from collections.abc import Awaitable, Callable, Mapping, Sequence
@@ -9,9 +8,6 @@
 from pydantic import BaseModel
 
 from memu.database.inmemory.vector import cosine_topk
-from memu.prompts.retrieve.llm_category_ranker import PROMPT as LLM_CATEGORY_RANKER_PROMPT
-from memu.prompts.retrieve.llm_item_ranker import PROMPT as LLM_ITEM_RANKER_PROMPT
-from memu.prompts.retrieve.llm_resource_ranker import PROMPT as LLM_RESOURCE_RANKER_PROMPT
 from memu.prompts.retrieve.pre_retrieval_decision import SYSTEM_PROMPT as PRE_RETRIEVAL_SYSTEM_PROMPT
 from memu.prompts.retrieve.pre_retrieval_decision import USER_PROMPT as PRE_RETRIEVAL_USER_PROMPT
 from memu.workflow.step import WorkflowState, WorkflowStep
@@ -34,6 +30,7 @@ class RetrieveMixin:
         _get_step_llm_client: Callable[[Mapping[str, Any] | None], Any]
         _get_step_embedding_client: Callable[[Mapping[str, Any] | None], Any]
         _get_llm_client: Callable[..., Any]
+        _get_embedding_client: Callable[..., Any]
         _model_dump_without_embeddings: Callable[[BaseModel], dict[str, Any]]
         _extract_json_blob: Callable[[str], str]
         _escape_prompt_value: Callable[[str], str]
@@ -451,277 +448,6 @@ def _rag_build_context(self, state: WorkflowState, _: Any) -> WorkflowState:
         state["response"] = response
         return state
 
-    def _build_llm_retrieve_workflow(self) -> list[WorkflowStep]:
-        steps = [
-            WorkflowStep(
-                step_id="route_intention",
-                role="route_intention",
-                handler=self._llm_route_intention,
-                requires={"original_query", "context_queries", "skip_rewrite"},
-                produces={"needs_retrieval", "rewritten_query", "active_query", "next_step_query"},
-                capabilities={"llm"},
-                config={"llm_profile": self.retrieve_config.sufficiency_check_llm_profile},
-            ),
-            WorkflowStep(
-                step_id="route_category",
-                role="route_category",
-                handler=self._llm_route_category,
-                requires={"needs_retrieval", "active_query", "ctx", "store", "where"},
-                produces={"category_hits"},
-                capabilities={"llm"},
-                config={"llm_profile": self.retrieve_config.llm_ranking_llm_profile},
-            ),
-            WorkflowStep(
-                step_id="sufficiency_after_category",
-                role="sufficiency_check",
-                handler=self._llm_category_sufficiency,
-                requires={"needs_retrieval", "active_query", "context_queries", "category_hits"},
-                produces={"next_step_query", "proceed_to_items"},
-                capabilities={"llm"},
-                config={"llm_profile": self.retrieve_config.sufficiency_check_llm_profile},
-            ),
-            WorkflowStep(
-                step_id="recall_items",
-                role="recall_items",
-                handler=self._llm_recall_items,
-                requires={
-                    "needs_retrieval",
-                    "proceed_to_items",
-                    "ctx",
-                    "store",
-                    "where",
-                    "active_query",
-                    "category_hits",
-                },
-                produces={"item_hits"},
-                capabilities={"llm"},
-                config={"llm_profile": self.retrieve_config.llm_ranking_llm_profile},
-            ),
-            WorkflowStep(
-                step_id="sufficiency_after_items",
-                role="sufficiency_check",
-                handler=self._llm_item_sufficiency,
-                requires={"needs_retrieval", "active_query", "context_queries", "item_hits"},
-                produces={"next_step_query", "proceed_to_resources"},
-                capabilities={"llm"},
-                config={"llm_profile": self.retrieve_config.sufficiency_check_llm_profile},
-            ),
-            WorkflowStep(
-                step_id="recall_resources",
-                role="recall_resources",
-                handler=self._llm_recall_resources,
-                requires={
-                    "needs_retrieval",
-                    "proceed_to_resources",
-                    "active_query",
-                    "ctx",
-                    "store",
-                    "where",
-                    "item_hits",
-                    "category_hits",
-                },
-                produces={"resource_hits"},
-                capabilities={"llm"},
-                config={"llm_profile": self.retrieve_config.llm_ranking_llm_profile},
-            ),
-            WorkflowStep(
-                step_id="build_context",
-                role="build_context",
-                handler=self._llm_build_context,
-                requires={"needs_retrieval", "original_query", "rewritten_query"},
-                produces={"response"},
-                capabilities=set(),
-            ),
-        ]
-        return steps
-
-    async def _llm_route_intention(self, state: WorkflowState, step_context: Any) -> WorkflowState:
-        if not state.get("route_intention"):
-            state.update({
-                "needs_retrieval": True,
-                "rewritten_query": state["original_query"],
-                "active_query": state["original_query"],
-                "next_step_query": None,
-                "proceed_to_items": False,
-                "proceed_to_resources": False,
-            })
-            return state
-
-        llm_client = self._get_step_llm_client(step_context)
-        needs_retrieval, rewritten_query = await self._decide_if_retrieval_needed(
-            state["original_query"],
-            state["context_queries"],
-            retrieved_content=None,
-            llm_client=llm_client,
-        )
-        if state.get("skip_rewrite"):
-            rewritten_query = state["original_query"]
-
-        state.update({
-            "needs_retrieval": needs_retrieval,
-            "rewritten_query": rewritten_query,
-            "active_query": rewritten_query,
-            "next_step_query": None,
-            "proceed_to_items": False,
-            "proceed_to_resources": False,
-        })
-        return state
-
-    async def _llm_route_category(self, state: WorkflowState, step_context: Any) -> WorkflowState:
-        if not state.get("needs_retrieval"):
-            state["category_hits"] = []
-            return state
-        llm_client = self._get_step_llm_client(step_context)
-        store = state["store"]
-        where_filters = state.get("where") or {}
-        category_pool = store.memory_category_repo.list_categories(where_filters)
-        hits = await self._llm_rank_categories(
-            state["active_query"],
-            self.retrieve_config.category.top_k,
-            state["ctx"],
-            store,
-            llm_client=llm_client,
-            categories=category_pool,
-        )
-        state["category_hits"] = hits
-        state["category_pool"] = category_pool
-        return state
-
-    async def _llm_category_sufficiency(self, state: WorkflowState, step_context: Any) -> WorkflowState:
-        if not state.get("needs_retrieval"):
-            state["proceed_to_items"] = False
-            return state
-        if not state.get("retrieve_category") or not state.get("sufficiency_check"):
-            state["proceed_to_items"] = True
-            return state
-
-        retrieved_content = ""
-        hits = state.get("category_hits") or []
-        if hits:
-            retrieved_content = self._format_llm_category_content(hits)
-
-        llm_client = self._get_step_llm_client(step_context)
-        needs_more, rewritten_query = await self._decide_if_retrieval_needed(
-            state["active_query"],
-            state["context_queries"],
-            retrieved_content=retrieved_content or "No content retrieved yet.",
-            llm_client=llm_client,
-        )
-        state["next_step_query"] = rewritten_query
-        state["active_query"] = rewritten_query
-        state["proceed_to_items"] = needs_more
-        return state
-
-    async def _llm_recall_items(self, state: WorkflowState, step_context: Any) -> WorkflowState:
-        if not state.get("needs_retrieval") or not state.get("proceed_to_items"):
-            state["item_hits"] = []
-            return state
-
-        where_filters = state.get("where") or {}
-        category_hits = state.get("category_hits", [])
-        category_ids = [cat["id"] for cat in category_hits]
-        llm_client = self._get_step_llm_client(step_context)
-        store = state["store"]
-
-        use_refs = getattr(self.retrieve_config.item, "use_category_references", False)
-        ref_ids: list[str] = []
-        if use_refs and category_hits:
-            # Extract all ref_ids from category summaries
-            from memu.utils.references import extract_references
-
-            for cat in category_hits:
-                summary = cat.get("summary") or ""
-                ref_ids.extend(extract_references(summary))
-        if ref_ids:
-            # Query items by ref_ids
-            items_pool = store.memory_item_repo.list_items_by_ref_ids(ref_ids, where_filters)
-        else:
-            items_pool = store.memory_item_repo.list_items(where_filters)
-
-        relations = store.category_item_repo.list_relations(where_filters)
-        category_pool = state.get("category_pool") or store.memory_category_repo.list_categories(where_filters)
-        state["item_hits"] = await self._llm_rank_items(
-            state["active_query"],
-            self.retrieve_config.item.top_k,
-            category_ids,
-            state.get("category_hits", []),
-            state["ctx"],
-            store,
-            llm_client=llm_client,
-            categories=category_pool,
-            items=items_pool,
-            relations=relations,
-        )
-        state["item_pool"] = items_pool
-        state["relation_pool"] = relations
-        return state
-
-    async def _llm_item_sufficiency(self, state: WorkflowState, step_context: Any) -> WorkflowState:
-        if not state.get("needs_retrieval"):
-            state["proceed_to_resources"] = False
-            return state
-        if not state.get("retrieve_item") or not state.get("sufficiency_check"):
-            state["proceed_to_resources"] = True
-            return state
-
-        retrieved_content = ""
-        hits = state.get("item_hits") or []
-        if hits:
-            retrieved_content = self._format_llm_item_content(hits)
-
-        llm_client = self._get_step_llm_client(step_context)
-        needs_more, rewritten_query = await self._decide_if_retrieval_needed(
-            state["active_query"],
-            state["context_queries"],
-            retrieved_content=retrieved_content or "No content retrieved yet.",
-            llm_client=llm_client,
-        )
-        state["next_step_query"] = rewritten_query
-        state["active_query"] = rewritten_query
-        state["proceed_to_resources"] = needs_more
-        return state
-
-    async def _llm_recall_resources(self, state: WorkflowState, step_context: Any) -> WorkflowState:
-        if not state.get("needs_retrieval") or not state.get("proceed_to_resources"):
-            state["resource_hits"] = []
-            return state
-
-        llm_client = self._get_step_llm_client(step_context)
-        store = state["store"]
-        where_filters = state.get("where") or {}
-        resource_pool = store.resource_repo.list_resources(where_filters)
-        items_pool = state.get("item_pool") or store.memory_item_repo.list_items(where_filters)
-        state["resource_hits"] = await self._llm_rank_resources(
-            state["active_query"],
-            self.retrieve_config.resource.top_k,
-            state.get("category_hits", []),
-            state.get("item_hits", []),
-            state["ctx"],
-            store,
-            llm_client=llm_client,
-            items=items_pool,
-            resources=resource_pool,
-        )
-        state["resource_pool"] = resource_pool
-        return state
-
-    def _llm_build_context(self, state: WorkflowState, _: Any) -> WorkflowState:
-        response = {
-            "needs_retrieval": bool(state.get("needs_retrieval")),
-            "original_query": state["original_query"],
-            "rewritten_query": state.get("rewritten_query", state["original_query"]),
-            "next_step_query": state.get("next_step_query"),
-            "categories": [],
-            "items": [],
-            "resources": [],
-        }
-        if state.get("needs_retrieval"):
-            response["categories"] = list(state.get("category_hits") or [])
-            response["items"] = list(state.get("item_hits") or [])
-            response["resources"] = list(state.get("resource_hits") or [])
-        state["response"] = response
-        return state
-
     async def _rank_categories_by_summary(
         self,
         query_vec: list[float],
@@ -736,7 +462,7 @@ async def _rank_categories_by_summary(
         if not entries:
             return [], {}
         summary_texts = [summary for _, summary in entries]
-        client = embed_client or self._get_llm_client()
+        client = embed_client or self._get_embedding_client()
         summary_embeddings = await client.embed(summary_texts)
         corpus = [(cid, emb) for (cid, _), emb in zip(entries, summary_embeddings, strict=True)]
         hits = cosine_topk(query_vec, corpus, k=top_k)
@@ -864,82 +590,6 @@ def _extract_rewritten_query(self, raw: str) -> str | None:
             return match.group(1).strip()
         return None
 
-    async def _embedding_based_retrieve(
-        self,
-        query: str,
-        top_k: int,
-        context_queries: list[dict[str, Any]] | None,
-        ctx: Context,
-        store: Database,
-        llm_client: Any | None = None,
-        where: Mapping[str, Any] | None = None,
-    ) -> dict[str, Any]:
-        """Embedding-based retrieval with query rewriting and judging at each tier"""
-        where_filters = self._normalize_where(where)
-        category_pool = store.memory_category_repo.list_categories(where_filters)
-        items_pool = store.memory_item_repo.list_items(where_filters)
-        resource_pool = store.resource_repo.list_resources(where_filters)
-        client = llm_client or self._get_llm_client()
-        current_query = query
-        qvec = (await client.embed([current_query]))[0]
-        response: dict[str, Any] = {"resources": [], "items": [], "categories": [], "next_step_query": None}
-        content_sections: list[str] = []
-
-        # Tier 1: Categories
-        cat_hits, summary_lookup = await self._rank_categories_by_summary(
-            qvec,
-            top_k,
-            ctx,
-            store,
-            embed_client=client,
-            categories=category_pool,
-        )
-        if cat_hits:
-            response["categories"] = self._materialize_hits(cat_hits, category_pool)
-            content_sections.append(
-                self._format_category_content(cat_hits, summary_lookup, store, categories=category_pool)
-            )
-
-            needs_more, current_query = await self._decide_if_retrieval_needed(
-                current_query,
-                context_queries,
-                retrieved_content="\n\n".join(content_sections),
-                llm_client=client,
-            )
-            response["next_step_query"] = current_query
-            if not needs_more:
-                return response
-            # Re-embed with rewritten query
-            qvec = (await client.embed([current_query]))[0]
-
-        # Tier 2: Items
-        item_hits = store.memory_item_repo.vector_search_items(qvec, top_k, where=where_filters)
-        if item_hits:
-            response["items"] = self._materialize_hits(item_hits, items_pool)
-            content_sections.append(self._format_item_content(item_hits, store, items=items_pool))
-
-            needs_more, current_query = await self._decide_if_retrieval_needed(
-                current_query,
-                context_queries,
-                retrieved_content="\n\n".join(content_sections),
-                llm_client=client,
-            )
-            response["next_step_query"] = current_query
-            if not needs_more:
-                return response
-            # Re-embed with rewritten query
-            qvec = (await client.embed([current_query]))[0]
-
-        # Tier 3: Resources
-        resource_corpus = self._resource_caption_corpus(store, resources=resource_pool)
-        if resource_corpus:
-            res_hits = cosine_topk(qvec, resource_corpus, k=top_k)
-            if res_hits:
-                response["resources"] = self._materialize_hits(res_hits, resource_pool)
-                content_sections.append(self._format_resource_content(res_hits, store, resources=resource_pool))
-
-        return response
-
     def _materialize_hits(self, hits: Sequence[tuple[str, float]], pool: dict[str, Any]) -> list[dict[str, Any]]:
         out = []
         for _id, score in hits:
@@ -980,19 +630,6 @@ def _format_item_content(
             lines.append(f"Memory Item ({item.memory_type}): {item.summary}\nScore: {score:.3f}")
         return "\n\n".join(lines).strip()
 
-    def _format_resource_content(
-        self, hits: list[tuple[str, float]], store: Database, resources: Mapping[str, Any] | None = None
-    ) -> str:
-        resource_pool = resources if resources is not None else store.resource_repo.resources
-        lines = []
-        for rid, score in hits:
-            res = resource_pool.get(rid)
-            if not res:
-                continue
-            caption = res.caption or f"Resource {res.url}"
-            lines.append(f"Resource: {caption}\nScore: {score:.3f}")
-        return "\n\n".join(lines).strip()
-
     def _resource_caption_corpus(
         self, store: Database, resources: Mapping[str, Any] | None = None
     ) -> list[tuple[str, list[float]]]:
@@ -1002,417 +639,3 @@ def _resource_caption_corpus(
             if res.embedding:
                 corpus.append((rid, res.embedding))
         return corpus
-
-    def _extract_judgement(self, raw: str) -> str:
-        if not raw:
-            return "MORE"
-        match = re.search(r"<judgement>(.*?)</judgement>", raw, re.IGNORECASE | re.DOTALL)
-        if match:
-            token = match.group(1).strip().upper()
-            if "ENOUGH" in token:
-                return "ENOUGH"
-            if "MORE" in token:
-                return "MORE"
-        upper = raw.strip().upper()
-        if "ENOUGH" in upper:
-            return "ENOUGH"
-        return "MORE"
-
-    async def _llm_based_retrieve(
-        self,
-        query: str,
-        top_k: int,
-        context_queries: list[dict[str, Any]] | None,
-        ctx: Context,
-        store: Database,
-        llm_client: Any | None = None,
-        where: Mapping[str, Any] | None = None,
-    ) -> dict[str, Any]:
-        """
-        LLM-based retrieval that uses language model to search and rank results
-        in a hierarchical manner, with query rewriting and judging at each tier.
-
-        Flow:
-        1. Search categories with LLM, judge + rewrite query
-        2. If needs more, search items from relevant categories, judge + rewrite
-        3. If needs more, search resources related to context
-        """
-        where_filters = self._normalize_where(where)
-        category_pool = store.memory_category_repo.list_categories(where_filters)
-        items_pool = store.memory_item_repo.list_items(where_filters)
-        relations = store.category_item_repo.list_relations(where_filters)
-        resource_pool = store.resource_repo.list_resources(where_filters)
-        current_query = query
-        client = llm_client or self._get_llm_client()
-        response: dict[str, Any] = {"resources": [], "items": [], "categories": [], "next_step_query": None}
-        content_sections: list[str] = []
-
-        # Tier 1: Search and rank categories
-        category_hits = await self._llm_rank_categories(
-            current_query,
-            top_k,
-            ctx,
-            store,
-            llm_client=client,
-            categories=category_pool,
-        )
-        if category_hits:
-            response["categories"] = category_hits
-            content_sections.append(self._format_llm_category_content(category_hits))
-
-            needs_more, current_query = await self._decide_if_retrieval_needed(
-                current_query,
-                context_queries,
-                retrieved_content="\n\n".join(content_sections),
-                llm_client=client,
-            )
-            response["next_step_query"] = current_query
-            if not needs_more:
-                return response
-
-        # Tier 2: Search memory items from relevant categories
-        relevant_category_ids = [cat["id"] for cat in category_hits]
-        item_hits = await self._llm_rank_items(
-            current_query,
-            top_k,
-            relevant_category_ids,
-            category_hits,
-            ctx,
-            store,
-            llm_client=client,
-            categories=category_pool,
-            items=items_pool,
-            relations=relations,
-        )
-        if item_hits:
-            response["items"] = item_hits
-            content_sections.append(self._format_llm_item_content(item_hits))
-
-            needs_more, current_query = await self._decide_if_retrieval_needed(
-                current_query,
-                context_queries,
-                retrieved_content="\n\n".join(content_sections),
-                llm_client=client,
-            )
-            response["next_step_query"] = current_query
-            if not needs_more:
-                return response
-
-        # Tier 3: Search resources related to the context
-        resource_hits = await self._llm_rank_resources(
-            current_query,
-            top_k,
-            category_hits,
-            item_hits,
-            ctx,
-            store,
-            llm_client=client,
-            items=items_pool,
-            resources=resource_pool,
-        )
-        if resource_hits:
-            response["resources"] = resource_hits
-            content_sections.append(self._format_llm_resource_content(resource_hits))
-
-        return response
-
-    def _format_categories_for_llm(
-        self,
-        store: Database,
-        category_ids: list[str] | None = None,
-        categories: Mapping[str, Any] | None = None,
-    ) -> str:
-        """Format categories for LLM consumption"""
-        categories_to_format = categories if categories is not None else store.memory_category_repo.categories
-        if category_ids:
-            categories_to_format = {cid: cat for cid, cat in categories_to_format.items() if cid in category_ids}
-
-        if not categories_to_format:
-            return "No categories available."
-
-        lines = []
-        for cid, cat in categories_to_format.items():
-            lines.append(f"ID: {cid}")
-            lines.append(f"Name: {cat.name}")
-            if cat.description:
-                lines.append(f"Description: {cat.description}")
-            if cat.summary:
-                lines.append(f"Summary: {cat.summary}")
-            lines.append("---")
-
-        return "\n".join(lines)
-
-    def _format_items_for_llm(
-        self,
-        store: Database,
-        category_ids: list[str] | None = None,
-        items: Mapping[str, Any] | None = None,
-        relations: Sequence[Any] | None = None,
-    ) -> str:
-        """Format memory items for LLM consumption, optionally filtered by category"""
-        item_pool = items if items is not None else store.memory_item_repo.items
-        relation_pool = relations if relations is not None else store.category_item_repo.relations
-        items_to_format = []
-        seen_item_ids = set()
-
-        if category_ids:
-            # Get items that belong to the specified categories
-            for rel in relation_pool:
-                if rel.category_id in category_ids:
-                    item = item_pool.get(rel.item_id)
-                    if item and item.id not in seen_item_ids:
-                        items_to_format.append(item)
-                        seen_item_ids.add(item.id)
-        else:
-            items_to_format = list(item_pool.values())
-
-        if not items_to_format:
-            return "No memory items available."
-
-        lines = []
-        for item in items_to_format:
-            lines.append(f"ID: {item.id}")
-            lines.append(f"Type: {item.memory_type}")
-            lines.append(f"Summary: {item.summary}")
-            lines.append("---")
-
-        return "\n".join(lines)
-
-    def _format_resources_for_llm(
-        self,
-        store: Database,
-        item_ids: list[str] | None = None,
-        items: Mapping[str, Any] | None = None,
-        resources: Mapping[str, Any] | None = None,
-    ) -> str:
-        """Format resources for LLM consumption, optionally filtered by related items"""
-        resource_pool = resources if resources is not None else store.resource_repo.resources
-        item_pool = items if items is not None else store.memory_item_repo.items
-        resources_to_format = []
-
-        if item_ids:
-            # Get resources that are related to the specified items
-            resource_ids = {item_pool[iid].resource_id for iid in item_ids if iid in item_pool and iid is not None}
-            resources_to_format = [
-                resource_pool[rid] for rid in resource_ids if rid in resource_pool and rid is not None
-            ]
-        else:
-            resources_to_format = list(resource_pool.values())
-
-        if not resources_to_format:
-            return "No resources available."
-
-        lines = []
-        for res in resources_to_format:
-            lines.append(f"ID: {res.id}")
-            lines.append(f"URL: {res.url}")
-            lines.append(f"Modality: {res.modality}")
-            if res.caption:
-                lines.append(f"Caption: {res.caption}")
-            lines.append("---")
-
-        return "\n".join(lines)
-
-    async def _llm_rank_categories(
-        self,
-        query: str,
-        top_k: int,
-        ctx: Context,
-        store: Database,
-        llm_client: Any | None = None,
-        categories: Mapping[str, Any] | None = None,
-    ) -> list[dict[str, Any]]:
-        """Use LLM to rank categories based on query relevance"""
-        category_pool = categories if categories is not None else store.memory_category_repo.categories
-        if not category_pool:
-            return []
-
-        categories_data = self._format_categories_for_llm(store, categories=category_pool)
-        prompt = LLM_CATEGORY_RANKER_PROMPT.format(
-            query=self._escape_prompt_value(query),
-            top_k=top_k,
-            categories_data=self._escape_prompt_value(categories_data),
-        )
-
-        client = llm_client or self._get_llm_client()
-        llm_response = await client.chat(prompt)
-        return self._parse_llm_category_response(llm_response, store, categories=category_pool)
-
-    async def _llm_rank_items(
-        self,
-        query: str,
-        top_k: int,
-        category_ids: list[str],
-        category_hits: list[dict[str, Any]],
-        ctx: Context,
-        store: Database,
-        llm_client: Any | None = None,
-        categories: Mapping[str, Any] | None = None,
-        items: Mapping[str, Any] | None = None,
-        relations: Sequence[Any] | None = None,
-    ) -> list[dict[str, Any]]:
-        """Use LLM to rank memory items from relevant categories"""
-        if not category_ids:
-            print("[LLM Rank Items] No category_ids provided")
-            return []
-
-        item_pool = items if items is not None else store.memory_item_repo.items
-        items_data = self._format_items_for_llm(store, category_ids, items=item_pool, relations=relations)
-        if items_data == "No memory items available.":
-            return []
-
-        # Format relevant categories for context
-        relevant_categories_info = "\n".join([
-            f"- {cat['name']}: {cat.get('summary', cat.get('description', ''))}" for cat in category_hits
-        ])
-
-        prompt = LLM_ITEM_RANKER_PROMPT.format(
-            query=self._escape_prompt_value(query),
-            top_k=top_k,
-            relevant_categories=self._escape_prompt_value(relevant_categories_info),
-            items_data=self._escape_prompt_value(items_data),
-        )
-
-        client = llm_client or self._get_llm_client()
-        llm_response = await client.chat(prompt)
-        return self._parse_llm_item_response(llm_response, store, items=item_pool)
-
-    async def _llm_rank_resources(
-        self,
-        query: str,
-        top_k: int,
-        category_hits: list[dict[str, Any]],
-        item_hits: list[dict[str, Any]],
-        ctx: Context,
-        store: Database,
-        llm_client: Any | None = None,
-        items: Mapping[str, Any] | None = None,
-        resources: Mapping[str, Any] | None = None,
-    ) -> list[dict[str, Any]]:
-        """Use LLM to rank resources related to the context"""
-        # Get item IDs to filter resources
-        item_ids = [item["id"] for item in item_hits]
-        if not item_ids:
-            return []
-
-        item_pool = items if items is not None else store.memory_item_repo.items
-        resource_pool = resources if resources is not None else store.resource_repo.resources
-        resources_data = self._format_resources_for_llm(store, item_ids, items=item_pool, resources=resource_pool)
-        if resources_data == "No resources available.":
-            return []
-
-        # Build context info
-        context_parts = []
-        if category_hits:
-            context_parts.append("Relevant Categories:")
-            context_parts.extend([f"- {cat['name']}" for cat in category_hits])
-        if item_hits:
-            context_parts.append("\nRelevant Memory Items:")
-            context_parts.extend([f"- {item.get('summary', '')[:100]}..." for item in item_hits[:3]])
-
-        context_info = "\n".join(context_parts)
-        prompt = LLM_RESOURCE_RANKER_PROMPT.format(
-            query=self._escape_prompt_value(query),
-            top_k=top_k,
-            context_info=self._escape_prompt_value(context_info),
-            resources_data=self._escape_prompt_value(resources_data),
-        )
-
-        client = llm_client or self._get_llm_client()
-        llm_response = await client.chat(prompt)
-        return self._parse_llm_resource_response(llm_response, store, resources=resource_pool)
-
-    def _parse_llm_category_response(
-        self, raw_response: str, store: Database, categories: Mapping[str, Any] | None = None
-    ) -> list[dict[str, Any]]:
-        """Parse LLM category ranking response"""
-        category_pool = categories if categories is not None else store.memory_category_repo.categories
-        results = []
-        try:
-            json_blob = self._extract_json_blob(raw_response)
-            parsed = json.loads(json_blob)
-
-            if "categories" in parsed and isinstance(parsed["categories"], list):
-                category_ids = parsed["categories"]
-                # Return categories in the order provided by LLM (already sorted by relevance)
-                for cat_id in category_ids:
-                    if isinstance(cat_id, str):
-                        cat = category_pool.get(cat_id)
-                        if cat:
-                            cat_data = self._model_dump_without_embeddings(cat)
-                            results.append(cat_data)
-        except Exception as e:
-            logger.warning(f"Failed to parse LLM category ranking response: {e}")
-
-        return results
-
-    def _parse_llm_item_response(
-        self, raw_response: str, store: Database, items: Mapping[str, Any] | None = None
-    ) -> list[dict[str, Any]]:
-        """Parse LLM item ranking response"""
-        item_pool = items if items is not None else store.memory_item_repo.items
-        results = []
-        try:
-            json_blob = self._extract_json_blob(raw_response)
-            parsed = json.loads(json_blob)
-
-            if "items" in parsed and isinstance(parsed["items"], list):
-                item_ids = parsed["items"]
-                # Return items in the order provided by LLM (already sorted by relevance)
-                for item_id in item_ids:
-                    if isinstance(item_id, str):
-                        mem_item = item_pool.get(item_id)
-                        if mem_item:
-                            item_data = self._model_dump_without_embeddings(mem_item)
-                            results.append(item_data)
-        except Exception as e:
-            logger.warning(f"Failed to parse LLM item ranking response: {e}")
-
-        return results
-
-    def _parse_llm_resource_response(
-        self, raw_response: str, store: Database, resources: Mapping[str, Any] | None = None
-    ) -> list[dict[str, Any]]:
-        """Parse LLM resource ranking response"""
-        resource_pool = resources if resources is not None else store.resource_repo.resources
-        results = []
-        try:
-            json_blob = self._extract_json_blob(raw_response)
-            parsed = json.loads(json_blob)
-
-            if "resources" in parsed and isinstance(parsed["resources"], list):
-                resource_ids = parsed["resources"]
-                # Return resources in the order provided by LLM (already sorted by relevance)
-                for res_id in resource_ids:
-                    if isinstance(res_id, str):
-                        res = resource_pool.get(res_id)
-                        if res:
-                            res_data = self._model_dump_without_embeddings(res)
-                            results.append(res_data)
-        except Exception as e:
-            logger.warning(f"Failed to parse LLM resource ranking response: {e}")
-
-        return results
-
-    def _format_llm_category_content(self, hits: list[dict[str, Any]]) -> str:
-        """Format LLM-ranked category content for judger"""
-        lines = []
-        for cat in hits:
-            summary = cat.get("summary", "") or cat.get("description", "")
-            lines.append(f"Category: {cat['name']}\nSummary: {summary}")
-        return "\n\n".join(lines).strip()
-
-    def _format_llm_item_content(self, hits: list[dict[str, Any]]) -> str:
-        """Format LLM-ranked item content for judger"""
-        lines = []
-        for item in hits:
-            lines.append(f"Memory Item ({item['memory_type']}): {item['summary']}")
-        return "\n\n".join(lines).strip()
-
-    def _format_llm_resource_content(self, hits: list[dict[str, Any]]) -> str:
-        """Format LLM-ranked resource content for judger"""
-        lines = []
-        for res in hits:
-            caption = res.get("caption", "") or f"Resource {res['url']}"
-            lines.append(f"Resource: {caption}")
-        return "\n\n".join(lines).strip()
diff --git a/src/memu/app/retrieve_llm.py b/src/memu/app/retrieve_llm.py
new file mode 100644
index 00000000..e7eef016
--- /dev/null
+++ b/src/memu/app/retrieve_llm.py
@@ -0,0 +1,603 @@
+from __future__ import annotations
+
+import json
+import logging
+from collections.abc import Awaitable, Callable, Mapping, Sequence
+from typing import TYPE_CHECKING, Any
+
+from pydantic import BaseModel
+
+from memu.prompts.retrieve.llm_category_ranker import PROMPT as LLM_CATEGORY_RANKER_PROMPT
+from memu.prompts.retrieve.llm_item_ranker import PROMPT as LLM_ITEM_RANKER_PROMPT
+from memu.prompts.retrieve.llm_resource_ranker import PROMPT as LLM_RESOURCE_RANKER_PROMPT
+from memu.workflow.step import WorkflowState, WorkflowStep
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from memu.app.service import Context
+    from memu.app.settings import RetrieveConfig
+    from memu.database.interfaces import Database
+
+
+class RetrieveLlmMixin:
+    """LLM-driven retrieve pipeline (``retrieve_llm``).
+
+    This is a sibling of :class:`RetrieveMixin` (which owns the RAG pipeline and
+    the shared helpers). Both are composed onto ``MemoryService``; the handlers
+    here resolve shared dependencies (e.g. ``_decide_if_retrieval_needed``) at
+    runtime through the final service instance.
+    """
+
+    if TYPE_CHECKING:
+        retrieve_config: RetrieveConfig
+        _get_step_llm_client: Callable[[Mapping[str, Any] | None], Any]
+        _get_llm_client: Callable[..., Any]
+        _decide_if_retrieval_needed: Callable[..., Awaitable[tuple[bool, str]]]
+        _model_dump_without_embeddings: Callable[[BaseModel], dict[str, Any]]
+        _extract_json_blob: Callable[[str], str]
+        _escape_prompt_value: Callable[[str], str]
+
+    def _build_llm_retrieve_workflow(self) -> list[WorkflowStep]:
+        steps = [
+            WorkflowStep(
+                step_id="route_intention",
+                role="route_intention",
+                handler=self._llm_route_intention,
+                requires={"original_query", "context_queries", "skip_rewrite"},
+                produces={"needs_retrieval", "rewritten_query", "active_query", "next_step_query"},
+                capabilities={"llm"},
+                config={"llm_profile": self.retrieve_config.sufficiency_check_llm_profile},
+            ),
+            WorkflowStep(
+                step_id="route_category",
+                role="route_category",
+                handler=self._llm_route_category,
+                requires={"needs_retrieval", "active_query", "ctx", "store", "where"},
+                produces={"category_hits"},
+                capabilities={"llm"},
+                config={"llm_profile": self.retrieve_config.llm_ranking_llm_profile},
+            ),
+            WorkflowStep(
+                step_id="sufficiency_after_category",
+                role="sufficiency_check",
+                handler=self._llm_category_sufficiency,
+                requires={"needs_retrieval", "active_query", "context_queries", "category_hits"},
+                produces={"next_step_query", "proceed_to_items"},
+                capabilities={"llm"},
+                config={"llm_profile": self.retrieve_config.sufficiency_check_llm_profile},
+            ),
+            WorkflowStep(
+                step_id="recall_items",
+                role="recall_items",
+                handler=self._llm_recall_items,
+                requires={
+                    "needs_retrieval",
+                    "proceed_to_items",
+                    "ctx",
+                    "store",
+                    "where",
+                    "active_query",
+                    "category_hits",
+                },
+                produces={"item_hits"},
+                capabilities={"llm"},
+                config={"llm_profile": self.retrieve_config.llm_ranking_llm_profile},
+            ),
+            WorkflowStep(
+                step_id="sufficiency_after_items",
+                role="sufficiency_check",
+                handler=self._llm_item_sufficiency,
+                requires={"needs_retrieval", "active_query", "context_queries", "item_hits"},
+                produces={"next_step_query", "proceed_to_resources"},
+                capabilities={"llm"},
+                config={"llm_profile": self.retrieve_config.sufficiency_check_llm_profile},
+            ),
+            WorkflowStep(
+                step_id="recall_resources",
+                role="recall_resources",
+                handler=self._llm_recall_resources,
+                requires={
+                    "needs_retrieval",
+                    "proceed_to_resources",
+                    "active_query",
+                    "ctx",
+                    "store",
+                    "where",
+                    "item_hits",
+                    "category_hits",
+                },
+                produces={"resource_hits"},
+                capabilities={"llm"},
+                config={"llm_profile": self.retrieve_config.llm_ranking_llm_profile},
+            ),
+            WorkflowStep(
+                step_id="build_context",
+                role="build_context",
+                handler=self._llm_build_context,
+                requires={"needs_retrieval", "original_query", "rewritten_query"},
+                produces={"response"},
+                capabilities=set(),
+            ),
+        ]
+        return steps
+
+    async def _llm_route_intention(self, state: WorkflowState, step_context: Any) -> WorkflowState:
+        if not state.get("route_intention"):
+            state.update({
+                "needs_retrieval": True,
+                "rewritten_query": state["original_query"],
+                "active_query": state["original_query"],
+                "next_step_query": None,
+                "proceed_to_items": False,
+                "proceed_to_resources": False,
+            })
+            return state
+
+        llm_client = self._get_step_llm_client(step_context)
+        needs_retrieval, rewritten_query = await self._decide_if_retrieval_needed(
+            state["original_query"],
+            state["context_queries"],
+            retrieved_content=None,
+            llm_client=llm_client,
+        )
+        if state.get("skip_rewrite"):
+            rewritten_query = state["original_query"]
+
+        state.update({
+            "needs_retrieval": needs_retrieval,
+            "rewritten_query": rewritten_query,
+            "active_query": rewritten_query,
+            "next_step_query": None,
+            "proceed_to_items": False,
+            "proceed_to_resources": False,
+        })
+        return state
+
+    async def _llm_route_category(self, state: WorkflowState, step_context: Any) -> WorkflowState:
+        if not state.get("needs_retrieval"):
+            state["category_hits"] = []
+            return state
+        llm_client = self._get_step_llm_client(step_context)
+        store = state["store"]
+        where_filters = state.get("where") or {}
+        category_pool = store.memory_category_repo.list_categories(where_filters)
+        hits = await self._llm_rank_categories(
+            state["active_query"],
+            self.retrieve_config.category.top_k,
+            state["ctx"],
+            store,
+            llm_client=llm_client,
+            categories=category_pool,
+        )
+        state["category_hits"] = hits
+        state["category_pool"] = category_pool
+        return state
+
+    async def _llm_category_sufficiency(self, state: WorkflowState, step_context: Any) -> WorkflowState:
+        if not state.get("needs_retrieval"):
+            state["proceed_to_items"] = False
+            return state
+        if not state.get("retrieve_category") or not state.get("sufficiency_check"):
+            state["proceed_to_items"] = True
+            return state
+
+        retrieved_content = ""
+        hits = state.get("category_hits") or []
+        if hits:
+            retrieved_content = self._format_llm_category_content(hits)
+
+        llm_client = self._get_step_llm_client(step_context)
+        needs_more, rewritten_query = await self._decide_if_retrieval_needed(
+            state["active_query"],
+            state["context_queries"],
+            retrieved_content=retrieved_content or "No content retrieved yet.",
+            llm_client=llm_client,
+        )
+        state["next_step_query"] = rewritten_query
+        state["active_query"] = rewritten_query
+        state["proceed_to_items"] = needs_more
+        return state
+
+    async def _llm_recall_items(self, state: WorkflowState, step_context: Any) -> WorkflowState:
+        if not state.get("needs_retrieval") or not state.get("proceed_to_items"):
+            state["item_hits"] = []
+            return state
+
+        where_filters = state.get("where") or {}
+        category_hits = state.get("category_hits", [])
+        category_ids = [cat["id"] for cat in category_hits]
+        llm_client = self._get_step_llm_client(step_context)
+        store = state["store"]
+
+        use_refs = getattr(self.retrieve_config.item, "use_category_references", False)
+        ref_ids: list[str] = []
+        if use_refs and category_hits:
+            # Extract all ref_ids from category summaries
+            from memu.utils.references import extract_references
+
+            for cat in category_hits:
+                summary = cat.get("summary") or ""
+                ref_ids.extend(extract_references(summary))
+        if ref_ids:
+            # Query items by ref_ids
+            items_pool = store.memory_item_repo.list_items_by_ref_ids(ref_ids, where_filters)
+        else:
+            items_pool = store.memory_item_repo.list_items(where_filters)
+
+        relations = store.category_item_repo.list_relations(where_filters)
+        category_pool = state.get("category_pool") or store.memory_category_repo.list_categories(where_filters)
+        state["item_hits"] = await self._llm_rank_items(
+            state["active_query"],
+            self.retrieve_config.item.top_k,
+            category_ids,
+            state.get("category_hits", []),
+            state["ctx"],
+            store,
+            llm_client=llm_client,
+            categories=category_pool,
+            items=items_pool,
+            relations=relations,
+        )
+        state["item_pool"] = items_pool
+        state["relation_pool"] = relations
+        return state
+
+    async def _llm_item_sufficiency(self, state: WorkflowState, step_context: Any) -> WorkflowState:
+        if not state.get("needs_retrieval"):
+            state["proceed_to_resources"] = False
+            return state
+        if not state.get("retrieve_item") or not state.get("sufficiency_check"):
+            state["proceed_to_resources"] = True
+            return state
+
+        retrieved_content = ""
+        hits = state.get("item_hits") or []
+        if hits:
+            retrieved_content = self._format_llm_item_content(hits)
+
+        llm_client = self._get_step_llm_client(step_context)
+        needs_more, rewritten_query = await self._decide_if_retrieval_needed(
+            state["active_query"],
+            state["context_queries"],
+            retrieved_content=retrieved_content or "No content retrieved yet.",
+            llm_client=llm_client,
+        )
+        state["next_step_query"] = rewritten_query
+        state["active_query"] = rewritten_query
+        state["proceed_to_resources"] = needs_more
+        return state
+
+    async def _llm_recall_resources(self, state: WorkflowState, step_context: Any) -> WorkflowState:
+        if not state.get("needs_retrieval") or not state.get("proceed_to_resources"):
+            state["resource_hits"] = []
+            return state
+
+        llm_client = self._get_step_llm_client(step_context)
+        store = state["store"]
+        where_filters = state.get("where") or {}
+        resource_pool = store.resource_repo.list_resources(where_filters)
+        items_pool = state.get("item_pool") or store.memory_item_repo.list_items(where_filters)
+        state["resource_hits"] = await self._llm_rank_resources(
+            state["active_query"],
+            self.retrieve_config.resource.top_k,
+            state.get("category_hits", []),
+            state.get("item_hits", []),
+            state["ctx"],
+            store,
+            llm_client=llm_client,
+            items=items_pool,
+            resources=resource_pool,
+        )
+        state["resource_pool"] = resource_pool
+        return state
+
+    def _llm_build_context(self, state: WorkflowState, _: Any) -> WorkflowState:
+        response = {
+            "needs_retrieval": bool(state.get("needs_retrieval")),
+            "original_query": state["original_query"],
+            "rewritten_query": state.get("rewritten_query", state["original_query"]),
+            "next_step_query": state.get("next_step_query"),
+            "categories": [],
+            "items": [],
+            "resources": [],
+        }
+        if state.get("needs_retrieval"):
+            response["categories"] = list(state.get("category_hits") or [])
+            response["items"] = list(state.get("item_hits") or [])
+            response["resources"] = list(state.get("resource_hits") or [])
+        state["response"] = response
+        return state
+
+    def _format_categories_for_llm(
+        self,
+        store: Database,
+        category_ids: list[str] | None = None,
+        categories: Mapping[str, Any] | None = None,
+    ) -> str:
+        """Format categories for LLM consumption"""
+        categories_to_format = categories if categories is not None else store.memory_category_repo.categories
+        if category_ids:
+            categories_to_format = {cid: cat for cid, cat in categories_to_format.items() if cid in category_ids}
+
+        if not categories_to_format:
+            return "No categories available."
+
+        lines = []
+        for cid, cat in categories_to_format.items():
+            lines.append(f"ID: {cid}")
+            lines.append(f"Name: {cat.name}")
+            if cat.description:
+                lines.append(f"Description: {cat.description}")
+            if cat.summary:
+                lines.append(f"Summary: {cat.summary}")
+            lines.append("---")
+
+        return "\n".join(lines)
+
+    def _format_items_for_llm(
+        self,
+        store: Database,
+        category_ids: list[str] | None = None,
+        items: Mapping[str, Any] | None = None,
+        relations: Sequence[Any] | None = None,
+    ) -> str:
+        """Format memory items for LLM consumption, optionally filtered by category"""
+        item_pool = items if items is not None else store.memory_item_repo.items
+        relation_pool = relations if relations is not None else store.category_item_repo.relations
+        items_to_format = []
+        seen_item_ids = set()
+
+        if category_ids:
+            # Get items that belong to the specified categories
+            for rel in relation_pool:
+                if rel.category_id in category_ids:
+                    item = item_pool.get(rel.item_id)
+                    if item and item.id not in seen_item_ids:
+                        items_to_format.append(item)
+                        seen_item_ids.add(item.id)
+        else:
+            items_to_format = list(item_pool.values())
+
+        if not items_to_format:
+            return "No memory items available."
+
+        lines = []
+        for item in items_to_format:
+            lines.append(f"ID: {item.id}")
+            lines.append(f"Type: {item.memory_type}")
+            lines.append(f"Summary: {item.summary}")
+            lines.append("---")
+
+        return "\n".join(lines)
+
+    def _format_resources_for_llm(
+        self,
+        store: Database,
+        item_ids: list[str] | None = None,
+        items: Mapping[str, Any] | None = None,
+        resources: Mapping[str, Any] | None = None,
+    ) -> str:
+        """Format resources for LLM consumption, optionally filtered by related items"""
+        resource_pool = resources if resources is not None else store.resource_repo.resources
+        item_pool = items if items is not None else store.memory_item_repo.items
+        resources_to_format = []
+
+        if item_ids:
+            # Get resources that are related to the specified items
+            resource_ids = {item_pool[iid].resource_id for iid in item_ids if iid in item_pool and iid is not None}
+            resources_to_format = [
+                resource_pool[rid] for rid in resource_ids if rid in resource_pool and rid is not None
+            ]
+        else:
+            resources_to_format = list(resource_pool.values())
+
+        if not resources_to_format:
+            return "No resources available."
+
+        lines = []
+        for res in resources_to_format:
+            lines.append(f"ID: {res.id}")
+            lines.append(f"URL: {res.url}")
+            lines.append(f"Modality: {res.modality}")
+            if res.caption:
+                lines.append(f"Caption: {res.caption}")
+            lines.append("---")
+
+        return "\n".join(lines)
+
+    async def _llm_rank_categories(
+        self,
+        query: str,
+        top_k: int,
+        ctx: Context,
+        store: Database,
+        llm_client: Any | None = None,
+        categories: Mapping[str, Any] | None = None,
+    ) -> list[dict[str, Any]]:
+        """Use LLM to rank categories based on query relevance"""
+        category_pool = categories if categories is not None else store.memory_category_repo.categories
+        if not category_pool:
+            return []
+
+        categories_data = self._format_categories_for_llm(store, categories=category_pool)
+        prompt = LLM_CATEGORY_RANKER_PROMPT.format(
+            query=self._escape_prompt_value(query),
+            top_k=top_k,
+            categories_data=self._escape_prompt_value(categories_data),
+        )
+
+        client = llm_client or self._get_llm_client()
+        llm_response = await client.chat(prompt)
+        return self._parse_llm_category_response(llm_response, store, categories=category_pool)
+
+    async def _llm_rank_items(
+        self,
+        query: str,
+        top_k: int,
+        category_ids: list[str],
+        category_hits: list[dict[str, Any]],
+        ctx: Context,
+        store: Database,
+        llm_client: Any | None = None,
+        categories: Mapping[str, Any] | None = None,
+        items: Mapping[str, Any] | None = None,
+        relations: Sequence[Any] | None = None,
+    ) -> list[dict[str, Any]]:
+        """Use LLM to rank memory items from relevant categories"""
+        if not category_ids:
+            print("[LLM Rank Items] No category_ids provided")
+            return []
+
+        item_pool = items if items is not None else store.memory_item_repo.items
+        items_data = self._format_items_for_llm(store, category_ids, items=item_pool, relations=relations)
+        if items_data == "No memory items available.":
+            return []
+
+        # Format relevant categories for context
+        relevant_categories_info = "\n".join([
+            f"- {cat['name']}: {cat.get('summary', cat.get('description', ''))}" for cat in category_hits
+        ])
+
+        prompt = LLM_ITEM_RANKER_PROMPT.format(
+            query=self._escape_prompt_value(query),
+            top_k=top_k,
+            relevant_categories=self._escape_prompt_value(relevant_categories_info),
+            items_data=self._escape_prompt_value(items_data),
+        )
+
+        client = llm_client or self._get_llm_client()
+        llm_response = await client.chat(prompt)
+        return self._parse_llm_item_response(llm_response, store, items=item_pool)
+
+    async def _llm_rank_resources(
+        self,
+        query: str,
+        top_k: int,
+        category_hits: list[dict[str, Any]],
+        item_hits: list[dict[str, Any]],
+        ctx: Context,
+        store: Database,
+        llm_client: Any | None = None,
+        items: Mapping[str, Any] | None = None,
+        resources: Mapping[str, Any] | None = None,
+    ) -> list[dict[str, Any]]:
+        """Use LLM to rank resources related to the context"""
+        # Get item IDs to filter resources
+        item_ids = [item["id"] for item in item_hits]
+        if not item_ids:
+            return []
+
+        item_pool = items if items is not None else store.memory_item_repo.items
+        resource_pool = resources if resources is not None else store.resource_repo.resources
+        resources_data = self._format_resources_for_llm(store, item_ids, items=item_pool, resources=resource_pool)
+        if resources_data == "No resources available.":
+            return []
+
+        # Build context info
+        context_parts = []
+        if category_hits:
+            context_parts.append("Relevant Categories:")
+            context_parts.extend([f"- {cat['name']}" for cat in category_hits])
+        if item_hits:
+            context_parts.append("\nRelevant Memory Items:")
+            context_parts.extend([f"- {item.get('summary', '')[:100]}..." for item in item_hits[:3]])
+
+        context_info = "\n".join(context_parts)
+        prompt = LLM_RESOURCE_RANKER_PROMPT.format(
+            query=self._escape_prompt_value(query),
+            top_k=top_k,
+            context_info=self._escape_prompt_value(context_info),
+            resources_data=self._escape_prompt_value(resources_data),
+        )
+
+        client = llm_client or self._get_llm_client()
+        llm_response = await client.chat(prompt)
+        return self._parse_llm_resource_response(llm_response, store, resources=resource_pool)
+
+    def _parse_llm_category_response(
+        self, raw_response: str, store: Database, categories: Mapping[str, Any] | None = None
+    ) -> list[dict[str, Any]]:
+        """Parse LLM category ranking response"""
+        category_pool = categories if categories is not None else store.memory_category_repo.categories
+        results = []
+        try:
+            json_blob = self._extract_json_blob(raw_response)
+            parsed = json.loads(json_blob)
+
+            if "categories" in parsed and isinstance(parsed["categories"], list):
+                category_ids = parsed["categories"]
+                # Return categories in the order provided by LLM (already sorted by relevance)
+                for cat_id in category_ids:
+                    if isinstance(cat_id, str):
+                        cat = category_pool.get(cat_id)
+                        if cat:
+                            cat_data = self._model_dump_without_embeddings(cat)
+                            results.append(cat_data)
+        except Exception as e:
+            logger.warning(f"Failed to parse LLM category ranking response: {e}")
+
+        return results
+
+    def _parse_llm_item_response(
+        self, raw_response: str, store: Database, items: Mapping[str, Any] | None = None
+    ) -> list[dict[str, Any]]:
+        """Parse LLM item ranking response"""
+        item_pool = items if items is not None else store.memory_item_repo.items
+        results = []
+        try:
+            json_blob = self._extract_json_blob(raw_response)
+            parsed = json.loads(json_blob)
+
+            if "items" in parsed and isinstance(parsed["items"], list):
+                item_ids = parsed["items"]
+                # Return items in the order provided by LLM (already sorted by relevance)
+                for item_id in item_ids:
+                    if isinstance(item_id, str):
+                        mem_item = item_pool.get(item_id)
+                        if mem_item:
+                            item_data = self._model_dump_without_embeddings(mem_item)
+                            results.append(item_data)
+        except Exception as e:
+            logger.warning(f"Failed to parse LLM item ranking response: {e}")
+
+        return results
+
+    def _parse_llm_resource_response(
+        self, raw_response: str, store: Database, resources: Mapping[str, Any] | None = None
+    ) -> list[dict[str, Any]]:
+        """Parse LLM resource ranking response"""
+        resource_pool = resources if resources is not None else store.resource_repo.resources
+        results = []
+        try:
+            json_blob = self._extract_json_blob(raw_response)
+            parsed = json.loads(json_blob)
+
+            if "resources" in parsed and isinstance(parsed["resources"], list):
+                resource_ids = parsed["resources"]
+                # Return resources in the order provided by LLM (already sorted by relevance)
+                for res_id in resource_ids:
+                    if isinstance(res_id, str):
+                        res = resource_pool.get(res_id)
+                        if res:
+                            res_data = self._model_dump_without_embeddings(res)
+                            results.append(res_data)
+        except Exception as e:
+            logger.warning(f"Failed to parse LLM resource ranking response: {e}")
+
+        return results
+
+    def _format_llm_category_content(self, hits: list[dict[str, Any]]) -> str:
+        """Format LLM-ranked category content for judger"""
+        lines = []
+        for cat in hits:
+            summary = cat.get("summary", "") or cat.get("description", "")
+            lines.append(f"Category: {cat['name']}\nSummary: {summary}")
+        return "\n\n".join(lines).strip()
+
+    def _format_llm_item_content(self, hits: list[dict[str, Any]]) -> str:
+        """Format LLM-ranked item content for judger"""
+        lines = []
+        for item in hits:
+            lines.append(f"Memory Item ({item['memory_type']}): {item['summary']}")
+        return "\n\n".join(lines).strip()
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
index f1c7759d..aeaae1b0 100644
--- a/src/memu/app/service.py
+++ b/src/memu/app/service.py
@@ -9,7 +9,9 @@
 
 from memu.app.crud import CRUDMixin
 from memu.app.memorize import MemorizeMixin
+from memu.app.memorize_parse import MemorizeParseMixin
 from memu.app.retrieve import RetrieveMixin
+from memu.app.retrieve_llm import RetrieveLlmMixin
 from memu.app.settings import (
     BlobConfig,
     CategoryConfig,
@@ -48,7 +50,7 @@ class Context:
     category_init_task: asyncio.Task | None = None
 
 
-class MemoryService(MemorizeMixin, RetrieveMixin, CRUDMixin):
+class MemoryService(MemorizeMixin, MemorizeParseMixin, RetrieveMixin, RetrieveLlmMixin, CRUDMixin):
     def __init__(
         self,
         *,
@@ -85,8 +87,11 @@ def __init__(
         # We need the concrete user scope (user_id: xxx) to initialize the categories
         # self._start_category_initialization(self._context, self.database)
 
-        # Initialize client caches (lazy creation on first use)
+        # Initialize client caches (lazy creation on first use). Chat-like and
+        # embedding clients are cached separately so embedding is fully decoupled
+        # from the chat LLM (different implementation, possibly different profile).
         self._llm_clients: dict[str, Any] = {}
+        self._embedding_clients: dict[str, Any] = {}
         self._llm_interceptors = LLMInterceptorRegistry()
         self._workflow_interceptors = WorkflowInterceptorRegistry()
 
@@ -116,8 +121,6 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any:
                 base_url=cfg.base_url,
                 api_key=cfg.api_key,
                 chat_model=cfg.chat_model,
-                embed_model=cfg.embed_model,
-                embed_batch_size=cfg.embed_batch_size,
             )
         elif backend == "httpx":
             return HTTPLLMClient(
@@ -126,7 +129,6 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any:
                 chat_model=cfg.chat_model,
                 provider=cfg.provider,
                 endpoint_overrides=cfg.endpoint_overrides,
-                embed_model=cfg.embed_model,
             )
         elif backend == "lazyllm_backend":
             from memu.llm.lazyllm_client import LazyLLMClient
@@ -145,6 +147,41 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any:
             msg = f"Unknown llm_client_backend '{cfg.client_backend}'"
             raise ValueError(msg)
 
+    def _init_embedding_client(self, config: LLMConfig | None = None) -> Any:
+        """Initialize a dedicated embedding client, decoupled from the chat LLM.
+
+        Embedding goes through ``memu.embedding`` rather than the chat LLM clients,
+        so chat and embedding are separate concerns. The ``lazyllm`` backend is the
+        sole exception: it exposes a single unified client (chat/embed/vision/stt)
+        with no standalone embedding counterpart, so we reuse it for embedding too.
+        """
+        cfg = config or self.llm_profiles.profiles.get("embedding") or self.llm_config
+        backend = cfg.client_backend
+        if backend == "sdk":
+            from memu.embedding import OpenAIEmbeddingSDKClient
+
+            return OpenAIEmbeddingSDKClient(
+                base_url=cfg.base_url,
+                api_key=cfg.api_key,
+                embed_model=cfg.embed_model,
+                batch_size=cfg.embed_batch_size,
+            )
+        elif backend == "httpx":
+            from memu.embedding import HTTPEmbeddingClient
+
+            return HTTPEmbeddingClient(
+                base_url=cfg.base_url,
+                api_key=cfg.api_key,
+                embed_model=cfg.embed_model,
+                provider=cfg.provider,
+                endpoint_overrides=cfg.endpoint_overrides,
+            )
+        elif backend == "lazyllm_backend":
+            return self._init_llm_client(cfg)
+        else:
+            msg = f"Unknown llm_client_backend '{cfg.client_backend}'"
+            raise ValueError(msg)
+
     def _get_llm_base_client(self, profile: str | None = None) -> Any:
         """
         Lazily initialize and cache LLM clients per profile to avoid eager network setup.
@@ -161,6 +198,20 @@ def _get_llm_base_client(self, profile: str | None = None) -> Any:
         self._llm_clients[name] = client
         return client
 
+    def _get_embedding_base_client(self, profile: str | None = None) -> Any:
+        """Lazily initialize and cache embedding clients per profile."""
+        name = profile or "embedding"
+        client = self._embedding_clients.get(name)
+        if client is not None:
+            return client
+        cfg: LLMConfig | None = self.llm_profiles.profiles.get(name)
+        if cfg is None:
+            msg = f"Unknown llm profile '{name}'"
+            raise KeyError(msg)
+        client = self._init_embedding_client(cfg)
+        self._embedding_clients[name] = client
+        return client
+
     @staticmethod
     def _llm_call_metadata(profile: str, step_context: Mapping[str, Any] | None) -> LLMCallMetadata:
         if not isinstance(step_context, Mapping):
@@ -199,6 +250,10 @@ def _get_llm_client(self, profile: str | None = None, step_context: Mapping[str,
         base_client = self._get_llm_base_client(profile)
         return self._wrap_llm_client(base_client, profile=profile, step_context=step_context)
 
+    def _get_embedding_client(self, profile: str | None = None, step_context: Mapping[str, Any] | None = None) -> Any:
+        base_client = self._get_embedding_base_client(profile)
+        return self._wrap_llm_client(base_client, profile=profile or "embedding", step_context=step_context)
+
     @property
     def llm_client(self) -> Any:
         """Default LLM client (lazy)."""
@@ -234,7 +289,7 @@ def _get_step_llm_client(self, step_context: Mapping[str, Any] | None) -> Any:
 
     def _get_step_embedding_client(self, step_context: Mapping[str, Any] | None) -> Any:
         profile = self._llm_profile_from_context(step_context, task="embedding") or "embedding"
-        return self._get_llm_client(profile, step_context=step_context)
+        return self._get_embedding_client(profile, step_context=step_context)
 
     def intercept_before_llm_call(
         self,
@@ -376,12 +431,7 @@ async def export_memory_files(self, *, user: dict[str, Any] | None = None) -> di
         Read-only against the database; only artifacts whose rendered content
         changed since the last export are rewritten (diff detection via a sidecar
         manifest). Returns a summary of written/unchanged/removed relative paths.
-
-        Requires ``memory_files_config.enabled=True``.
         """
-        if not self.memory_files_config.enabled:
-            msg = "Memory files are disabled; set memory_files_config.enabled=True to use export_memory_files()."
-            raise RuntimeError(msg)
         where = self.user_model(**user).model_dump() if user is not None else None
         # No changed set => full (re)initialization of the tree.
         return await self._build_memory_files(where, changed=None)
@@ -399,28 +449,29 @@ async def _build_memory_files(
         is (re)initialized from the full scoped store.
         """
         memory_body: str | None = None
-        skills: dict[str, str] | None = None
 
+        is_update = changed is not None and self._memory_file_exporter.artifacts_exist()
+
+        # MEMORY.md synthesis (optional) operates on the per-source descriptions:
+        # the just-changed sources for an incremental update, otherwise the full
+        # in-scope store for (re)initialization. The skill/ tree is built by the
+        # exporter from the skill-type memory items extracted during memorize, so it
+        # needs no work here.
         if self.memory_files_config.synthesize:
+            if is_update:
+                descriptions = MemoryFileExporter._build_descriptions(changed)  # type: ignore[arg-type]
+            else:
+                resources = list(self.database.resource_repo.list_resources(where=where or None).values())
+                descriptions = MemoryFileExporter._build_descriptions(resources)
+
             client = self._get_llm_client(self.memory_files_config.synthesis_llm_profile)
-            if changed is not None and self._memory_file_exporter.artifacts_exist():
-                # UPDATE: merge the changed descriptions into existing artifacts.
+            if is_update:
                 existing_memory = await asyncio.to_thread(self._memory_file_exporter.read_memory_body)
-                existing_skills = await asyncio.to_thread(self._memory_file_exporter.read_skills)
-                synthesized = await self._memory_synthesizer.update(
-                    MemoryFileExporter._build_descriptions(changed),
-                    existing_memory=existing_memory,
-                    existing_skills=existing_skills,
-                    chat=client.chat,
+                memory_body = await self._memory_synthesizer.update(
+                    descriptions, existing_memory=existing_memory, chat=client.chat
                 )
             else:
-                # INIT: build from scratch over all in-scope descriptions.
-                resources = list(self.database.resource_repo.list_resources(where=where or None).values())
-                synthesized = await self._memory_synthesizer.synthesize(
-                    MemoryFileExporter._build_descriptions(resources),
-                    chat=client.chat,
-                )
-            memory_body, skills = synthesized.memory_body, synthesized.skills
+                memory_body = await self._memory_synthesizer.synthesize(descriptions, chat=client.chat)
 
         async with self._memory_files_lock:
             result: ExportResult = await asyncio.to_thread(
@@ -428,7 +479,6 @@ async def _build_memory_files(
                 self.database,
                 where=where,
                 memory_body=memory_body,
-                skills=skills,
             )
         return result.to_dict()
 
diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py
index 946b2a1c..65491970 100644
--- a/src/memu/app/settings.py
+++ b/src/memu/app/settings.py
@@ -146,17 +146,19 @@ class BlobConfig(BaseModel):
 class MemoryFilesConfig(BaseModel):
     """Render structured memory into a browsable markdown "memory file system".
 
-    Purely additive and read-only against the store; disabled by default so it
-    never changes existing memorize/retrieve behavior.
+    Read-only against the store. The INDEX.md/MEMORY.md/SKILL.md tree (with the
+    sibling resource/, memory/, and skill/ directories) is always
+    initialized/updated on every ``memorize()`` call, so it stays current without
+    an explicit ``export_memory_files()`` call. Updates are best-effort: a render
+    failure is logged and never fails ``memorize()``.
     """
 
-    enabled: bool = Field(
-        default=False,
-        description="Enable rendering structured memory into browsable markdown files.",
-    )
     output_dir: str = Field(
         default="./data/memory",
-        description="Directory where the memory markdown tree (INDEX.md/MEMORY.md/skill/) is written.",
+        description=(
+            "Directory where the memory markdown tree (INDEX.md/MEMORY.md/SKILL.md plus the "
+            "resource/, memory/, and skill/ directories) is written."
+        ),
     )
     synthesize: bool = Field(
         default=False,
@@ -169,13 +171,6 @@ class MemoryFilesConfig(BaseModel):
         default="default",
         description="LLM profile used when synthesize=True.",
     )
-    update_on_memorize: bool = Field(
-        default=False,
-        description=(
-            "Automatically initialize or incrementally update the memory file tree after each "
-            "memorize() call, using the just-created resources as the changed file set."
-        ),
-    )
 
 
 class RetrieveCategoryConfig(BaseModel):
diff --git a/src/memu/blob/folder.py b/src/memu/blob/folder.py
new file mode 100644
index 00000000..c4993e0d
--- /dev/null
+++ b/src/memu/blob/folder.py
@@ -0,0 +1,174 @@
+"""Folder scanning, modality inference, and input-side manifest diffing.
+
+The folder-based ``memorize`` entry point treats a directory as the unit of
+ingestion. This module is responsible for the *input* side of the sync:
+
+- recursively scan a folder for ingestible files,
+- infer each file's modality from its extension (skipping unknown types),
+- maintain a sidecar ``.memu_manifest.json`` (``relative path -> content hash``)
+  so each call can tell which files were added, modified, or deleted.
+
+This is independent of the *output* side manifest used by ``memu.memory_fs``
+(which hashes the rendered markdown artifacts).
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import pathlib
+from dataclasses import dataclass, field
+
+logger = logging.getLogger(__name__)
+
+MANIFEST_FILENAME = ".memu_manifest.json"
+
+# Extension -> modality. Ambiguous extensions (.json, .webm) are mapped to a
+# single sensible default and can be made configurable later if needed.
+EXT_MODALITY: dict[str, str] = {
+    ".json": "conversation",
+    ".txt": "document",
+    ".md": "document",
+    ".text": "document",
+    ".jpg": "image",
+    ".jpeg": "image",
+    ".png": "image",
+    ".gif": "image",
+    ".webp": "image",
+    ".mp4": "video",
+    ".mov": "video",
+    ".mkv": "video",
+    ".avi": "video",
+    ".mp3": "audio",
+    ".wav": "audio",
+    ".m4a": "audio",
+    ".mpeg": "audio",
+    ".mpga": "audio",
+}
+
+
+@dataclass(frozen=True)
+class ScannedFile:
+    """A single ingestible file discovered in the scanned folder."""
+
+    rel_path: str
+    abs_path: str
+    modality: str
+    content_hash: str
+
+
+@dataclass
+class FolderDiff:
+    """The added/modified/deleted sets between a scan and the prior manifest."""
+
+    added: list[ScannedFile] = field(default_factory=list)
+    modified: list[ScannedFile] = field(default_factory=list)
+    deleted: list[str] = field(default_factory=list)
+
+    @property
+    def has_changes(self) -> bool:
+        return bool(self.added or self.modified or self.deleted)
+
+    @property
+    def has_removals(self) -> bool:
+        """Whether any file was modified or deleted (i.e. stale memory exists)."""
+        return bool(self.modified or self.deleted)
+
+
+def infer_modality(path: str | pathlib.Path) -> str | None:
+    """Infer modality from a file extension, or None if unsupported."""
+    return EXT_MODALITY.get(pathlib.Path(path).suffix.lower())
+
+
+def compute_file_hash(path: str | pathlib.Path) -> str:
+    """Content hash (sha256) of a file, streamed to stay memory-friendly."""
+    digest = hashlib.sha256()
+    with pathlib.Path(path).open("rb") as handle:
+        for chunk in iter(lambda: handle.read(65536), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def scan_folder(folder: str | pathlib.Path) -> dict[str, ScannedFile]:
+    """Recursively scan a folder for ingestible files keyed by relative path.
+
+    Hidden files/dirs (dot-prefixed), the manifest itself, and files with an
+    unknown extension are skipped (the latter is logged).
+    """
+    root = pathlib.Path(folder).resolve()
+    scanned: dict[str, ScannedFile] = {}
+    if not root.is_dir():
+        msg = f"memorize() expects an existing folder, got: {folder}"
+        raise NotADirectoryError(msg)
+
+    for path in sorted(root.rglob("*")):
+        if not path.is_file() or path.name == MANIFEST_FILENAME:
+            continue
+        rel = path.relative_to(root)
+        if any(part.startswith(".") for part in rel.parts):
+            continue
+        modality = infer_modality(path)
+        if modality is None:
+            logger.warning("Skipping file with unsupported extension: %s", rel)
+            continue
+        rel_str = rel.as_posix()
+        scanned[rel_str] = ScannedFile(
+            rel_path=rel_str,
+            abs_path=str(path),
+            modality=modality,
+            content_hash=compute_file_hash(path),
+        )
+    return scanned
+
+
+def load_manifest(folder: str | pathlib.Path) -> dict[str, str]:
+    """Load the sidecar input manifest (``relative path -> content hash``)."""
+    manifest_path = pathlib.Path(folder).resolve() / MANIFEST_FILENAME
+    if not manifest_path.exists():
+        return {}
+    try:
+        data = json.loads(manifest_path.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return {}
+    if not isinstance(data, dict):
+        return {}
+    return {str(key): str(value) for key, value in data.items()}
+
+
+def save_manifest(folder: str | pathlib.Path, manifest: dict[str, str]) -> None:
+    """Persist the input manifest into the scanned folder."""
+    manifest_path = pathlib.Path(folder).resolve() / MANIFEST_FILENAME
+    manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8")
+
+
+def diff_folder(scanned: dict[str, ScannedFile], manifest: dict[str, str]) -> FolderDiff:
+    """Compute added/modified/deleted relative to a prior manifest."""
+    diff = FolderDiff()
+    for rel, scanned_file in scanned.items():
+        prior_hash = manifest.get(rel)
+        if prior_hash is None:
+            diff.added.append(scanned_file)
+        elif prior_hash != scanned_file.content_hash:
+            diff.modified.append(scanned_file)
+    diff.deleted = [rel for rel in manifest if rel not in scanned]
+    return diff
+
+
+def manifest_from_scan(scanned: dict[str, ScannedFile]) -> dict[str, str]:
+    """Build the manifest payload to persist from a fresh scan."""
+    return {rel: scanned_file.content_hash for rel, scanned_file in scanned.items()}
+
+
+__all__ = [
+    "EXT_MODALITY",
+    "MANIFEST_FILENAME",
+    "FolderDiff",
+    "ScannedFile",
+    "compute_file_hash",
+    "diff_folder",
+    "infer_modality",
+    "load_manifest",
+    "manifest_from_scan",
+    "save_manifest",
+]
diff --git a/src/memu/database/inmemory/repo.py b/src/memu/database/inmemory/repo.py
index 44275f9f..d9781bc5 100644
--- a/src/memu/database/inmemory/repo.py
+++ b/src/memu/database/inmemory/repo.py
@@ -37,10 +37,6 @@ def __init__(
         ) = build_inmemory_models(self.scope_model)
 
         self.state = state or InMemoryState()
-        self.resources: dict[str, Resource] = self.state.resources
-        self.items: dict[str, MemoryItem] = self.state.items
-        self.categories: dict[str, MemoryCategory] = self.state.categories
-        self.relations: list[CategoryItem] = self.state.relations
 
         resource_model = resource_model or default_resource_model or Resource
         memory_item_model = memory_item_model or default_memory_item_model or MemoryItem
diff --git a/src/memu/database/inmemory/repositories/resource_repo.py b/src/memu/database/inmemory/repositories/resource_repo.py
index ba60e52b..5e9dbece 100644
--- a/src/memu/database/inmemory/repositories/resource_repo.py
+++ b/src/memu/database/inmemory/repositories/resource_repo.py
@@ -30,6 +30,9 @@ def clear_resources(self, where: Mapping[str, Any] | None = None) -> dict[str, R
         self.resources = {rid: res for rid, res in self.resources.items() if rid not in matches}
         return matches
 
+    def delete_resource(self, resource_id: str) -> None:
+        self.resources.pop(resource_id, None)
+
     def create_resource(
         self,
         *,
diff --git a/src/memu/database/interfaces.py b/src/memu/database/interfaces.py
index 4acd0312..2053add4 100644
--- a/src/memu/database/interfaces.py
+++ b/src/memu/database/interfaces.py
@@ -11,18 +11,18 @@
 
 @runtime_checkable
 class Database(Protocol):
-    """Backend-agnostic database contract."""
+    """Backend-agnostic database contract.
+
+    Access to records goes exclusively through the repositories, which enforce
+    scope (``where``) filtering. The contract intentionally does not expose the
+    raw in-process record stores, so business logic cannot bypass scope rules.
+    """
 
     resource_repo: ResourceRepo
     memory_category_repo: MemoryCategoryRepo
     memory_item_repo: MemoryItemRepo
     category_item_repo: CategoryItemRepo
 
-    resources: dict[str, ResourceRecord]
-    items: dict[str, MemoryItemRecord]
-    categories: dict[str, MemoryCategoryRecord]
-    relations: list[CategoryItemRecord]
-
     def close(self) -> None: ...
 
 
diff --git a/src/memu/database/postgres/postgres.py b/src/memu/database/postgres/postgres.py
index d1ff7b05..de63469b 100644
--- a/src/memu/database/postgres/postgres.py
+++ b/src/memu/database/postgres/postgres.py
@@ -91,11 +91,6 @@ def __init__(
             scope_fields=self._scope_fields,
         )
 
-        self.resources = self._state.resources
-        self.items = self._state.items
-        self.categories = self._state.categories
-        self.relations = self._state.relations
-
         # self._load_existing()
 
     def close(self) -> None:
diff --git a/src/memu/database/postgres/repositories/resource_repo.py b/src/memu/database/postgres/repositories/resource_repo.py
index d358febc..0a9e78b4 100644
--- a/src/memu/database/postgres/repositories/resource_repo.py
+++ b/src/memu/database/postgres/repositories/resource_repo.py
@@ -62,6 +62,15 @@ def clear_resources(self, where: Mapping[str, Any] | None = None) -> dict[str, R
 
         return deleted
 
+    def delete_resource(self, resource_id: str) -> None:
+        """Delete a single resource by id (used for cascade sync)."""
+        from sqlmodel import delete
+
+        with self._sessions.session() as session:
+            session.exec(delete(self._sqla_models.Resource).where(self._sqla_models.Resource.id == resource_id))
+            session.commit()
+        self.resources.pop(resource_id, None)
+
     def create_resource(
         self,
         *,
diff --git a/src/memu/database/repositories/resource.py b/src/memu/database/repositories/resource.py
index d2c33b14..6eac6543 100644
--- a/src/memu/database/repositories/resource.py
+++ b/src/memu/database/repositories/resource.py
@@ -16,6 +16,8 @@ def list_resources(self, where: Mapping[str, Any] | None = None) -> dict[str, Re
 
     def clear_resources(self, where: Mapping[str, Any] | None = None) -> dict[str, Resource]: ...
 
+    def delete_resource(self, resource_id: str) -> None: ...
+
     def create_resource(
         self,
         *,
diff --git a/src/memu/database/sqlite/repositories/resource_repo.py b/src/memu/database/sqlite/repositories/resource_repo.py
index 7777eefd..e6bed474 100644
--- a/src/memu/database/sqlite/repositories/resource_repo.py
+++ b/src/memu/database/sqlite/repositories/resource_repo.py
@@ -134,6 +134,13 @@ def clear_resources(self, where: Mapping[str, Any] | None = None) -> dict[str, R
 
         return deleted
 
+    def delete_resource(self, resource_id: str) -> None:
+        """Delete a single resource by id (used for cascade sync)."""
+        with self._sessions.session() as session:
+            session.exec(delete(self._resource_model).where(self._resource_model.id == resource_id))
+            session.commit()
+        self.resources.pop(resource_id, None)
+
     def create_resource(
         self,
         *,
diff --git a/src/memu/database/sqlite/sqlite.py b/src/memu/database/sqlite/sqlite.py
index 2083dd99..e1df846c 100644
--- a/src/memu/database/sqlite/sqlite.py
+++ b/src/memu/database/sqlite/sqlite.py
@@ -117,12 +117,6 @@ def __init__(
             scope_fields=self._scope_fields,
         )
 
-        # Set up cache references
-        self.resources = self._state.resources
-        self.items = self._state.items
-        self.categories = self._state.categories
-        self.relations = self._state.relations
-
     def _create_tables(self) -> None:
         """Create SQLite tables if they don't exist."""
         SQLModel.metadata.create_all(self._sessions.engine)
diff --git a/src/memu/embedding/backends/__init__.py b/src/memu/embedding/backends/__init__.py
index c4e916a2..6c205d6c 100644
--- a/src/memu/embedding/backends/__init__.py
+++ b/src/memu/embedding/backends/__init__.py
@@ -1,10 +1,12 @@
 from memu.embedding.backends.base import EmbeddingBackend
 from memu.embedding.backends.doubao import DoubaoEmbeddingBackend, DoubaoMultimodalEmbeddingInput
 from memu.embedding.backends.openai import OpenAIEmbeddingBackend
+from memu.embedding.backends.openrouter import OpenRouterEmbeddingBackend
 
 __all__ = [
     "DoubaoEmbeddingBackend",
     "DoubaoMultimodalEmbeddingInput",
     "EmbeddingBackend",
     "OpenAIEmbeddingBackend",
+    "OpenRouterEmbeddingBackend",
 ]
diff --git a/src/memu/embedding/backends/openrouter.py b/src/memu/embedding/backends/openrouter.py
new file mode 100644
index 00000000..51bfb868
--- /dev/null
+++ b/src/memu/embedding/backends/openrouter.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+from memu.embedding.backends.openai import OpenAIEmbeddingBackend
+
+
+class OpenRouterEmbeddingBackend(OpenAIEmbeddingBackend):
+    """OpenRouter uses an OpenAI-compatible embedding API."""
+
+    name = "openrouter"
+    embedding_endpoint = "/api/v1/embeddings"
diff --git a/src/memu/embedding/http_client.py b/src/memu/embedding/http_client.py
index 0c3066a7..8864fb99 100644
--- a/src/memu/embedding/http_client.py
+++ b/src/memu/embedding/http_client.py
@@ -10,6 +10,7 @@
 from memu.embedding.backends.base import EmbeddingBackend
 from memu.embedding.backends.doubao import DoubaoEmbeddingBackend, DoubaoMultimodalEmbeddingInput
 from memu.embedding.backends.openai import OpenAIEmbeddingBackend
+from memu.embedding.backends.openrouter import OpenRouterEmbeddingBackend
 
 
 def _load_proxy() -> str | None:
@@ -21,6 +22,9 @@ def _load_proxy() -> str | None:
 EMBEDDING_BACKENDS: dict[str, Callable[[], EmbeddingBackend]] = {
     OpenAIEmbeddingBackend.name: OpenAIEmbeddingBackend,
     DoubaoEmbeddingBackend.name: DoubaoEmbeddingBackend,
+    OpenRouterEmbeddingBackend.name: OpenRouterEmbeddingBackend,
+    # Grok exposes an OpenAI-compatible embedding API.
+    "grok": OpenAIEmbeddingBackend,
 }
 
 
diff --git a/src/memu/integrations/langgraph.py b/src/memu/integrations/langgraph.py
index 2e24ddc6..2f83dc98 100644
--- a/src/memu/integrations/langgraph.py
+++ b/src/memu/integrations/langgraph.py
@@ -5,6 +5,7 @@
 import contextlib
 import logging
 import os
+import shutil
 import tempfile
 import uuid
 from typing import Any
@@ -75,18 +76,18 @@ def save_memory_tool(self) -> StructuredTool:
 
         async def _save(content: str, user_id: str, metadata: dict | None = None) -> str:
             logger.info("Entering save_memory_tool for user_id: %s", user_id)
-            filename = f"memu_input_{uuid.uuid4()}.txt"
-            temp_dir = tempfile.gettempdir()
-            file_path = os.path.join(temp_dir, filename)
+            # memorize() ingests a folder, so stage the content as a single file
+            # inside a throwaway directory and sync that directory.
+            temp_dir = tempfile.mkdtemp(prefix="memu_input_")
+            file_path = os.path.join(temp_dir, f"{uuid.uuid4()}.txt")
 
             try:
                 with open(file_path, "w", encoding="utf-8") as f:
                     f.write(content)
 
-                logger.debug("Calling memory_service.memorize with temporary file: %s", file_path)
+                logger.debug("Calling memory_service.memorize with temporary folder: %s", temp_dir)
                 await self.memory_service.memorize(
-                    resource_url=file_path,
-                    modality="conversation",
+                    folder=temp_dir,
                     user={"user_id": user_id, **(metadata or {})},
                 )
                 logger.info("Successfully saved memory for user_id: %s", user_id)
@@ -95,10 +96,9 @@ async def _save(content: str, user_id: str, metadata: dict | None = None) -> str
                 logger.exception(error_msg)
                 return str(MemUIntegrationError(error_msg))
             finally:
-                if os.path.exists(file_path):
-                    with contextlib.suppress(OSError):
-                        os.remove(file_path)
-                        logger.debug("Cleaned up temporary file: %s", file_path)
+                with contextlib.suppress(OSError):
+                    shutil.rmtree(temp_dir)
+                    logger.debug("Cleaned up temporary folder: %s", temp_dir)
 
             return "Memory saved successfully."
 
diff --git a/src/memu/llm/http_client.py b/src/memu/llm/http_client.py
index ba84b05b..eab5b323 100644
--- a/src/memu/llm/http_client.py
+++ b/src/memu/llm/http_client.py
@@ -5,7 +5,7 @@
 import os
 from collections.abc import Callable
 from pathlib import Path
-from typing import Any, cast
+from typing import Any
 
 import httpx
 
@@ -20,53 +20,6 @@ def _load_proxy() -> str | None:
     return os.getenv("MEMU_HTTP_PROXY") or os.getenv("HTTP_PROXY") or os.getenv("HTTPS_PROXY") or None
 
 
-# Minimal embedding backend support (moved from embedding module)
-class _EmbeddingBackend:
-    name: str
-    embedding_endpoint: str
-
-    def build_embedding_payload(self, *, inputs: list[str], embed_model: str) -> dict[str, Any]:
-        raise NotImplementedError
-
-    def parse_embedding_response(self, data: dict[str, Any]) -> list[list[float]]:
-        raise NotImplementedError
-
-
-class _OpenAIEmbeddingBackend(_EmbeddingBackend):
-    name = "openai"
-    embedding_endpoint = "/embeddings"
-
-    def build_embedding_payload(self, *, inputs: list[str], embed_model: str) -> dict[str, Any]:
-        return {"model": embed_model, "input": inputs}
-
-    def parse_embedding_response(self, data: dict[str, Any]) -> list[list[float]]:
-        return [cast(list[float], d["embedding"]) for d in data["data"]]
-
-
-class _DoubaoEmbeddingBackend(_EmbeddingBackend):
-    name = "doubao"
-    embedding_endpoint = "/api/v3/embeddings"
-
-    def build_embedding_payload(self, *, inputs: list[str], embed_model: str) -> dict[str, Any]:
-        return {"model": embed_model, "input": inputs, "encoding_format": "float"}
-
-    def parse_embedding_response(self, data: dict[str, Any]) -> list[list[float]]:
-        return [cast(list[float], d["embedding"]) for d in data["data"]]
-
-
-class _OpenRouterEmbeddingBackend(_EmbeddingBackend):
-    """OpenRouter uses OpenAI-compatible embedding API."""
-
-    name = "openrouter"
-    embedding_endpoint = "/api/v1/embeddings"
-
-    def build_embedding_payload(self, *, inputs: list[str], embed_model: str) -> dict[str, Any]:
-        return {"model": embed_model, "input": inputs}
-
-    def parse_embedding_response(self, data: dict[str, Any]) -> list[list[float]]:
-        return [cast(list[float], d["embedding"]) for d in data["data"]]
-
-
 logger = logging.getLogger(__name__)
 
 LLM_BACKENDS: dict[str, Callable[[], LLMBackend]] = {
@@ -78,7 +31,11 @@ def parse_embedding_response(self, data: dict[str, Any]) -> list[list[float]]:
 
 
 class HTTPLLMClient:
-    """HTTP client for LLM APIs (chat, vision, transcription) and embeddings."""
+    """HTTP client for LLM APIs (chat, vision, transcription).
+
+    Embedding is intentionally not handled here; it lives in ``memu.embedding`` so
+    chat and embedding are decoupled concerns.
+    """
 
     def __init__(
         self,
@@ -89,7 +46,6 @@ def __init__(
         provider: str = "openai",
         endpoint_overrides: dict[str, str] | None = None,
         timeout: int = 60,
-        embed_model: str | None = None,
     ):
         # Ensure base_url ends with "/" so httpx doesn't discard the path
         # component when joining with endpoint paths.
@@ -99,21 +55,12 @@ def __init__(
         self.chat_model = chat_model
         self.provider = provider.lower()
         self.backend = self._load_backend(self.provider)
-        self.embedding_backend = self._load_embedding_backend(self.provider)
         overrides = endpoint_overrides or {}
         raw_summary_ep = overrides.get("chat") or overrides.get("summary") or self.backend.summary_endpoint
-        raw_embedding_ep = (
-            overrides.get("embeddings")
-            or overrides.get("embedding")
-            or overrides.get("embed")
-            or self.embedding_backend.embedding_endpoint
-        )
         # Strip leading "/" from endpoints so httpx resolves them relative to
         # base_url instead of treating them as absolute paths.
         self.summary_endpoint = raw_summary_ep.lstrip("/")
-        self.embedding_endpoint = raw_embedding_ep.lstrip("/")
         self.timeout = timeout
-        self.embed_model = embed_model or chat_model
         self.proxy = _load_proxy()
 
     async def chat(
@@ -208,16 +155,6 @@ async def vision(
         logger.debug("HTTP LLM vision response: %s", data)
         return self.backend.parse_summary_response(data), data
 
-    async def embed(self, inputs: list[str]) -> tuple[list[list[float]], dict[str, Any]]:
-        """Create text embeddings using the provider-specific embedding API."""
-        payload = self.embedding_backend.build_embedding_payload(inputs=inputs, embed_model=self.embed_model)
-        async with httpx.AsyncClient(base_url=self.base_url, timeout=self.timeout, proxy=self.proxy) as client:
-            resp = await client.post(self.embedding_endpoint, json=payload, headers=self._headers())
-            resp.raise_for_status()
-            data = resp.json()
-        logger.debug("HTTP embedding response: %s", data)
-        return self.embedding_backend.parse_embedding_response(data), data
-
     async def transcribe(
         self,
         audio_path: str,
@@ -285,16 +222,3 @@ def _load_backend(self, provider: str) -> LLMBackend:
             msg = f"Unsupported LLM provider '{provider}'. Available: {', '.join(LLM_BACKENDS.keys())}"
             raise ValueError(msg)
         return factory()
-
-    def _load_embedding_backend(self, provider: str) -> _EmbeddingBackend:
-        backends: dict[str, type[_EmbeddingBackend]] = {
-            _OpenAIEmbeddingBackend.name: _OpenAIEmbeddingBackend,
-            _DoubaoEmbeddingBackend.name: _DoubaoEmbeddingBackend,
-            "grok": _OpenAIEmbeddingBackend,
-            _OpenRouterEmbeddingBackend.name: _OpenRouterEmbeddingBackend,
-        }
-        factory = backends.get(provider)
-        if not factory:
-            msg = f"Unsupported embedding provider '{provider}'. Available: {', '.join(backends.keys())}"
-            raise ValueError(msg)
-        return factory()
diff --git a/src/memu/llm/openai_sdk.py b/src/memu/llm/openai_sdk.py
index 38c6c8bb..97c1ebc4 100644
--- a/src/memu/llm/openai_sdk.py
+++ b/src/memu/llm/openai_sdk.py
@@ -1,10 +1,9 @@
 import base64
 import logging
 from pathlib import Path
-from typing import Any, Literal, cast
+from typing import Any, Literal
 
 from openai import AsyncOpenAI
-from openai.types import CreateEmbeddingResponse
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionContentPartImageParam,
@@ -26,14 +25,10 @@ def __init__(
         base_url: str,
         api_key: str,
         chat_model: str,
-        embed_model: str,
-        embed_batch_size: int = 1,
     ):
         self.base_url = base_url.rstrip("/")
         self.api_key = api_key or ""
         self.chat_model = chat_model
-        self.embed_model = embed_model
-        self.embed_batch_size = embed_batch_size
         self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
 
     async def chat(
@@ -152,23 +147,6 @@ async def vision(
         logger.debug("OpenAI vision response: %s", response)
         return content or "", response
 
-    async def embed(self, inputs: list[str]) -> tuple[list[list[float]], CreateEmbeddingResponse | None]:
-        """Create text embeddings via the official SDK."""
-        if len(inputs) <= self.embed_batch_size:
-            response = await self.client.embeddings.create(model=self.embed_model, input=inputs)
-            return [cast(list[float], d.embedding) for d in response.data], response
-
-        # For batched requests, we aggregate embeddings but only return the last response for usage
-        all_embeddings: list[list[float]] = []
-        last_response: CreateEmbeddingResponse | None = None
-        for idx in range(0, len(inputs), self.embed_batch_size):
-            batch = inputs[idx : idx + self.embed_batch_size]
-            response = await self.client.embeddings.create(model=self.embed_model, input=batch)
-            all_embeddings.extend([cast(list[float], d.embedding) for d in response.data])
-            last_response = response
-
-        return all_embeddings, last_response
-
     async def transcribe(
         self,
         audio_path: str,
diff --git a/src/memu/memory_fs/__init__.py b/src/memu/memory_fs/__init__.py
index 57e26293..0a379c54 100644
--- a/src/memu/memory_fs/__init__.py
+++ b/src/memu/memory_fs/__init__.py
@@ -4,14 +4,14 @@
 markdown artifacts on disk. Read-only against the database and fully optional.
 """
 
-from memu.memory_fs.exporter import ExportResult, FileDescription, MemoryFileExporter, slugify
-from memu.memory_fs.synthesizer import MemorySynthesizer, SynthesisResult
+from memu.memory_fs.exporter import ExportResult, FileDescription, MemoryFileExporter, SkillDoc, slugify
+from memu.memory_fs.synthesizer import MemorySynthesizer
 
 __all__ = [
     "ExportResult",
     "FileDescription",
     "MemoryFileExporter",
     "MemorySynthesizer",
-    "SynthesisResult",
+    "SkillDoc",
     "slugify",
 ]
diff --git a/src/memu/memory_fs/exporter.py b/src/memu/memory_fs/exporter.py
index a63b2d21..55764df8 100644
--- a/src/memu/memory_fs/exporter.py
+++ b/src/memu/memory_fs/exporter.py
@@ -1,22 +1,38 @@
 """Render the structured memory store into memU's markdown "memory file system".
 
-Everything an agent learns first becomes a **multimodal description** (the
-modality-agnostic text/caption produced by preprocessing every source). That
-description is the single shared trunk; three independent *bypasses* then project
-it into the navigable file tree described in the README:
-
-    memory/
-    ├── INDEX.md                     ← map of everything: folders, files, sources
-    ├── MEMORY.md                    ← profile, preferences, goals, key events
+The exporter projects the (optionally scoped) store into the navigable file tree
+described in the README:
+
+    <output_dir>/
+    ├── INDEX.md                     ← index of the raw files under resource/
+    ├── MEMORY.md                    ← overall overview + index of memory/
+    ├── SKILL.md                     ← index of the skills under skill/
+    ├── resource/
+    │   └── <file_name>              ← one copied raw source file
+    ├── memory/
+    │   └── <slug>.md                ← one memory category (description + summary)
     └── skill/
-        └── <skill_name>/SKILL.md    ← one learned skill / tool pattern per folder
-
-The three bypasses are siblings — none is upstream of another; each is a
-different aggregation of the same descriptions:
-
-- ``INDEX.md``  : a table of contents (where to look before reading).
-- ``MEMORY.md`` : the living memory, aggregated from folder (category) summaries.
-- ``skill/**``  : skill memories broken out as standalone documents.
+        └── <skill_name>/SKILL.md    ← one extracted skill profile per folder
+
+Each artifact is a different aggregation of what the agent has stored. The three
+root indexes (``INDEX.md`` / ``MEMORY.md`` / ``SKILL.md``) each point at a sibling
+directory of payloads (``resource/`` / ``memory/`` / ``skill/``):
+
+- ``resource/`` : the raw source files copied verbatim out of the blob store
+  (``Resource.local_path``), so the actual ingested bytes live next to the memory.
+- ``INDEX.md``  : an index of those raw files (name, modality, description, link
+  into ``resource/``), so an agent knows which raw resources exist.
+- ``memory/``   : the living memory split one file per
+  :class:`~memu.database.models.MemoryCategory` (its description + summary).
+- ``MEMORY.md`` : an overall overview that links to each ``memory/<slug>.md`` file.
+- ``skill/**``  : reusable skills extracted **during memorize** — one folder per
+  ``skill``-type :class:`~memu.database.models.MemoryItem`. Each item's summary is
+  a comprehensive skill profile (Markdown with ``name``/``description``
+  frontmatter, see :mod:`memu.prompts.memory_type.skill`); the exporter renders it
+  verbatim and parses the frontmatter for the folder slug and one-line
+  description.
+- ``SKILL.md`` (root): a generated index/table of contents over the ``skill/``
+  tree.
 
 This layer is read-only against the database and never mutates memory records.
 A sidecar manifest (``.memufs_manifest.json``) records the content hash of every
@@ -42,7 +58,10 @@
 
 MANIFEST_NAME = ".memufs_manifest.json"
 SKILL_DIRNAME = "skill"
+RESOURCE_DIRNAME = "resource"
+MEMORY_DIRNAME = "memory"
 SKILL_FILENAME = "SKILL.md"
+SKILL_INDEX_FILENAME = "SKILL.md"
 INDEX_FILENAME = "INDEX.md"
 MEMORY_FILENAME = "MEMORY.md"
 SKILL_MEMORY_TYPE = "skill"
@@ -55,12 +74,34 @@ class FileDescription:
 
     Derived from a :class:`~memu.database.models.Resource`: ``caption`` is the
     text/caption produced by preprocessing, regardless of original modality.
+    ``local_path`` points at the ingested file inside the resources dir.
     """
 
     url: str
     modality: str
     description: str
     resource_id: str
+    local_path: str = ""
+
+    @property
+    def file_name(self) -> str:
+        """The ingested file's name in the resources dir (fallback: the url)."""
+        name = pathlib.Path(self.local_path).name if self.local_path else ""
+        return name or self.url
+
+
+@dataclass
+class SkillDoc:
+    """A single extracted skill: its Markdown profile body plus a one-line description.
+
+    Built from a ``skill``-type :class:`~memu.database.models.MemoryItem`. ``body``
+    is the item's full skill profile (rendered verbatim into
+    ``skill/<slug>/SKILL.md``); ``description`` is the one-line summary parsed from
+    the profile frontmatter and surfaced in the root ``SKILL.md`` index.
+    """
+
+    body: str
+    description: str = ""
 
 
 @dataclass
@@ -92,8 +133,9 @@ def slugify(name: str) -> str:
     return slug or "untitled"
 
 
-def _content_hash(text: str) -> str:
-    return hashlib.sha256(text.encode("utf-8")).hexdigest()
+def _content_hash(content: str | bytes) -> str:
+    data = content if isinstance(content, bytes) else content.encode("utf-8")
+    return hashlib.sha256(data).hexdigest()
 
 
 class MemoryFileExporter:
@@ -113,14 +155,16 @@ def export(
         *,
         where: Mapping[str, Any] | None = None,
         memory_body: str | None = None,
-        skills: dict[str, str] | None = None,
     ) -> ExportResult:
         """Render the (optionally scoped) store and write only changed artifacts.
 
-        ``memory_body`` and ``skills`` are optional synthesized overrides (e.g.
-        produced by :class:`~memu.memory_fs.synthesizer.MemorySynthesizer`). When
-        provided they replace the deterministic, database-derived rendering of
-        ``MEMORY.md`` / the ``skill/`` tree; ``INDEX.md`` is always deterministic.
+        The ``skill/`` tree is built from the ``skill``-type memory items in the
+        (scoped) store — the skills extracted during memorize. ``memory_body`` is
+        an optional synthesized override for the ``MEMORY.md`` body; when omitted,
+        ``MEMORY.md`` is rendered deterministically as an overview that links to the
+        per-category ``memory/<slug>.md`` files. The ``resource/`` raw copies,
+        ``memory/`` category files, ``INDEX.md``, and the root ``SKILL.md`` index
+        are always deterministic.
         """
         self.output_dir.mkdir(parents=True, exist_ok=True)
         scope = dict(where) if where else None
@@ -132,21 +176,35 @@ def export(
         # The shared trunk: one multimodal description per source file.
         descriptions = self._build_descriptions(resources)
 
-        if skills is not None:
-            skill_artifacts = {
-                f"{SKILL_DIRNAME}/{slug}/{SKILL_FILENAME}": self._skill_document(body)
-                for slug, body in skills.items()
-            }
-        else:
-            skill_artifacts = self._skill_bypass(items)
-        skill_slugs = sorted(rel.split("/")[1] for rel in skill_artifacts)
+        # resource/: copy the raw source bytes verbatim; ``links`` maps each
+        # resource id to its relative path under resource/ for the INDEX.md links.
+        raw_artifacts, links = self._resource_artifacts(descriptions)
+
+        # memory/: one file per category, indexed by MEMORY.md.
+        ordered_categories, category_slugs = self._category_slugs(categories)
+        category_artifacts = {
+            f"{MEMORY_DIRNAME}/{slug}.md": self._category_document(category)
+            for category, slug in zip(ordered_categories, category_slugs, strict=True)
+        }
 
-        body = memory_body if memory_body is not None else self._memory_body(categories)
+        # The skill/ tree is derived from the skill-type memory items extracted
+        # during memorize; each item's summary is a full skill profile.
+        skill_map = self._skills_from_items(items)
+        skill_artifacts = {
+            f"{SKILL_DIRNAME}/{slug}/{SKILL_FILENAME}": self._skill_document(doc)
+            for slug, doc in skill_map.items()
+        }
 
-        artifacts: dict[str, str] = {}
+        artifacts: dict[str, str | bytes] = {}
+        artifacts.update(raw_artifacts)
+        artifacts.update(category_artifacts)
         artifacts.update(skill_artifacts)
-        artifacts[MEMORY_FILENAME] = self._memory_document(body)
-        artifacts[INDEX_FILENAME] = self._index_bypass(categories, descriptions, skill_slugs, items, database=database)
+        if memory_body is not None:
+            artifacts[MEMORY_FILENAME] = self._memory_document(memory_body)
+        else:
+            artifacts[MEMORY_FILENAME] = self._memory_index(ordered_categories, category_slugs)
+        artifacts[INDEX_FILENAME] = self._index_bypass(descriptions, links)
+        artifacts[SKILL_INDEX_FILENAME] = self._skill_index(skill_map)
 
         return self._sync(artifacts)
 
@@ -162,47 +220,87 @@ def _build_descriptions(resources: list[Resource]) -> list[FileDescription]:
                     modality=resource.modality,
                     description=" ".join((resource.caption or "").split()),
                     resource_id=resource.id,
+                    local_path=resource.local_path,
                 )
             )
         return descriptions
 
     # -- bypass: SKILL -----------------------------------------------------
 
-    def _skill_bypass(self, items: list[MemoryItem]) -> dict[str, str]:
-        """Break out skill memories as standalone ``skill/<name>/SKILL.md`` docs."""
-        skills = sorted(
-            (item for item in items if item.memory_type == SKILL_MEMORY_TYPE and (item.summary or "").strip()),
-            key=lambda i: (i.created_at, i.id),
-        )
-        artifacts: dict[str, str] = {}
+    def _skills_from_items(self, items: list[MemoryItem]) -> dict[str, SkillDoc]:
+        """Build the slug -> :class:`SkillDoc` map from ``skill``-type memory items.
+
+        Each item's ``summary`` is a full skill profile whose frontmatter supplies
+        the folder slug (``name``) and the index ``description``. Items are ordered
+        deterministically and slugs deduplicated so the output is stable.
+        """
+        skill_items = [item for item in items if item.memory_type == SKILL_MEMORY_TYPE]
+        skills: dict[str, SkillDoc] = {}
         used: dict[str, int] = {}
-        for item in skills:
+        for item in sorted(skill_items, key=lambda i: (i.created_at, i.id)):
             body = (item.summary or "").strip()
-            base = self._skill_name(body, fallback=f"skill-{item.id[:6]}")
+            if not body:
+                continue
+            name, description = self._parse_skill_frontmatter(body)
+            base = slugify(name or self._fallback_skill_name(body))
             count = used.get(base, 0)
             slug = base if count == 0 else f"{base}-{count + 1}"
             used[base] = count + 1
-            rel_path = f"{SKILL_DIRNAME}/{slug}/{SKILL_FILENAME}"
-            artifacts[rel_path] = self._skill_document(body)
-        return artifacts
+            skills[slug] = SkillDoc(body=body, description=description)
+        return skills
 
-    @staticmethod
-    def _skill_document(body: str) -> str:
-        return f"{_GENERATED_NOTICE}\n\n{body.strip()}\n"
+    @classmethod
+    def _parse_skill_frontmatter(cls, body: str) -> tuple[str, str]:
+        """Extract ``name`` and ``description`` from a skill profile's frontmatter.
+
+        Returns ``("", "")`` when the body has no ``---`` delimited frontmatter.
+        """
+        lines = body.splitlines()
+        if not lines or lines[0].strip() != "---":
+            return "", ""
+        name = ""
+        description = ""
+        for line in lines[1:]:
+            if line.strip() == "---":
+                break
+            key, sep, value = line.partition(":")
+            if not sep:
+                continue
+            key = key.strip().lower()
+            value = cls._inline(value.strip())
+            if key == "name" and not name:
+                name = value
+            elif key == "description" and not description:
+                description = value
+        return name, description
+
+    @classmethod
+    def _fallback_skill_name(cls, body: str) -> str:
+        """Best-effort skill name when the profile has no usable frontmatter name."""
+        for raw_line in body.splitlines():
+            line = raw_line.strip()
+            if not line or line == "---":
+                continue
+            return cls._inline(line.lstrip("#").strip())
+        return "untitled"
 
     @staticmethod
-    def _skill_name(body: str, *, fallback: str) -> str:
-        """Derive a skill folder slug from frontmatter ``name:`` or first heading."""
-        if body.startswith("---"):
-            end = body.find("\n---", 3)
-            front = body[3:end] if end != -1 else ""
-            match = re.search(r"^\s*name:\s*(.+)$", front, re.MULTILINE)
-            if match:
-                return slugify(match.group(1))
-        heading = re.search(r"^#+\s*(.+)$", body, re.MULTILINE)
-        if heading:
-            return slugify(heading.group(1))
-        return slugify(fallback)
+    def _skill_document(doc: SkillDoc) -> str:
+        """Render a single skill profile verbatim under the generated notice."""
+        return f"{_GENERATED_NOTICE}\n\n{doc.body.strip()}\n"
+
+    def _skill_index(self, skills: dict[str, SkillDoc]) -> str:
+        """A table of contents over the ``skill/`` tree (slug, description, link)."""
+        lines = ["# Skills", "", _GENERATED_NOTICE, "", "## Skills", ""]
+        if skills:
+            for slug, doc in sorted(skills.items()):
+                description = self._inline((doc.description or "").strip()) or "_No description._"
+                link = f"{SKILL_DIRNAME}/{slug}/{SKILL_FILENAME}"
+                lines.append(f"- [`{slug}`]({link}) — {description}")
+        else:
+            lines.append("_No skills yet._")
+        lines.append("")
+        return "\n".join(lines)
 
     # -- bypass: MEMORY ----------------------------------------------------
 
@@ -211,62 +309,105 @@ def _memory_document(body: str) -> str:
         body = body.strip() or "_No memory yet._"
         return f"# Memory\n\n{_GENERATED_NOTICE}\n\n{body}\n"
 
-    def _memory_body(self, categories: list[MemoryCategory]) -> str:
-        """The living memory body: folder (category) summaries aggregated."""
-        if not categories:
-            return ""
-        sections: list[str] = []
-        for category in sorted(categories, key=lambda c: (c.name.lower(), c.id)):
-            description = self._inline((category.description or "").strip())
-            summary = (category.summary or "").strip()
-            block = [f"## {category.name}"]
-            if description:
-                block.append(f"_{description}_")
-            block.append("")
-            block.append(summary or "_No summary yet._")
-            sections.append("\n".join(block))
-        return "\n\n".join(sections)
-
-    # -- bypass: INDEX -----------------------------------------------------
-
-    def _index_bypass(
-        self,
-        categories: list[MemoryCategory],
-        descriptions: list[FileDescription],
-        skill_slugs: list[str],
-        items: list[MemoryItem],
-        *,
-        database: Database,
-    ) -> str:
-        """Table of contents: where folders, skills, and sources live."""
-        counts = self._item_counts_per_category(items, database=database)
-        lines = ["# Index", "", _GENERATED_NOTICE, ""]
-
-        lines += ["## Folders", ""]
-        if categories:
-            for category in sorted(categories, key=lambda c: (c.name.lower(), c.id)):
-                description = self._inline((category.description or "").strip())
-                count = counts.get(category.id, 0)
-                suffix = f" — {description}" if description else ""
-                lines.append(f"- **{category.name}**{suffix} ({count} items) → [{MEMORY_FILENAME}](./{MEMORY_FILENAME})")
-        else:
-            lines.append("_No folders yet._")
+    @staticmethod
+    def _category_slugs(categories: list[MemoryCategory]) -> tuple[list[MemoryCategory], list[str]]:
+        """Order categories deterministically and assign a unique slug to each.
+
+        Returns the ordered categories alongside a parallel list of slugs (used for
+        the ``memory/<slug>.md`` file names and the MEMORY.md links). Slug clashes
+        are de-duplicated with a numeric suffix so files never collide.
+        """
+        ordered = sorted(categories, key=lambda c: (c.name.lower(), c.id))
+        slugs: list[str] = []
+        used: dict[str, int] = {}
+        for category in ordered:
+            base = slugify(category.name)
+            count = used.get(base, 0)
+            used[base] = count + 1
+            slugs.append(base if count == 0 else f"{base}-{count + 1}")
+        return ordered, slugs
+
+    def _category_document(self, category: MemoryCategory) -> str:
+        """A single ``memory/<slug>.md`` file: the category's description + summary."""
+        description = self._inline((category.description or "").strip())
+        summary = (category.summary or "").strip()
+        lines = [f"# {category.name}", "", _GENERATED_NOTICE, ""]
+        if description:
+            lines.append(f"_{description}_")
+            lines.append("")
+        lines.append(summary or "_No summary yet._")
         lines.append("")
+        return "\n".join(lines)
 
-        lines += ["## Skills", ""]
-        if skill_slugs:
-            for slug in skill_slugs:
-                rel = f"{SKILL_DIRNAME}/{slug}/{SKILL_FILENAME}"
-                lines.append(f"- [{slug}](./{rel})")
+    def _memory_index(self, ordered: list[MemoryCategory], slugs: list[str]) -> str:
+        """The deterministic ``MEMORY.md``: an overview linking to each category file."""
+        lines = ["# Memory", "", _GENERATED_NOTICE, "", "## Overview", ""]
+        if ordered:
+            for category, slug in zip(ordered, slugs, strict=True):
+                description = self._inline((category.description or "").strip())
+                link = f"{MEMORY_DIRNAME}/{slug}.md"
+                line = f"- [**{category.name}**]({link})"
+                if description:
+                    line = f"{line} — {description}"
+                lines.append(line)
         else:
-            lines.append("_No skills yet._")
+            lines.append("_No memory yet._")
         lines.append("")
+        return "\n".join(lines)
 
-        lines += ["## Sources", ""]
+    # -- bypass: RESOURCE + INDEX ------------------------------------------
+
+    @staticmethod
+    def _resource_artifacts(
+        descriptions: list[FileDescription],
+    ) -> tuple[dict[str, bytes], dict[str, str]]:
+        """Copy each readable source file into ``resource/`` (verbatim bytes).
+
+        Returns the ``resource/<name>`` -> raw bytes artifact map plus a
+        ``resource_id`` -> relative path map used to link ``INDEX.md`` entries to
+        their copies. Resources without a readable ``local_path`` are skipped (they
+        are still listed in INDEX.md, just without a link). File-name collisions are
+        de-duplicated with a numeric suffix so copies never clobber each other.
+        """
+        raw: dict[str, bytes] = {}
+        links: dict[str, str] = {}
+        used: dict[str, int] = {}
+        for desc in descriptions:
+            if not desc.local_path:
+                continue
+            path = pathlib.Path(desc.local_path)
+            if not path.is_file():
+                continue
+            try:
+                content = path.read_bytes()
+            except OSError:
+                continue
+            name = desc.file_name
+            count = used.get(name, 0)
+            used[name] = count + 1
+            out_name = name if count == 0 else MemoryFileExporter._suffix_name(name, count + 1)
+            rel = f"{RESOURCE_DIRNAME}/{out_name}"
+            raw[rel] = content
+            links[desc.resource_id] = rel
+        return raw, links
+
+    @staticmethod
+    def _suffix_name(name: str, n: int) -> str:
+        """Insert a ``-n`` disambiguator before the extension (``a.txt`` -> ``a-2.txt``)."""
+        stem, dot, ext = name.rpartition(".")
+        if dot:
+            return f"{stem}-{n}.{ext}"
+        return f"{name}-{n}"
+
+    def _index_bypass(self, descriptions: list[FileDescription], links: dict[str, str]) -> str:
+        """An index of the raw source files copied into ``resource/``."""
+        lines = ["# Index", "", _GENERATED_NOTICE, "", "## Files", ""]
         if descriptions:
             for desc in descriptions:
                 text = desc.description or "_No description._"
-                lines.append(f"- `{desc.url}` ({desc.modality}) — {text}")
+                rel = links.get(desc.resource_id)
+                label = f"[`{desc.file_name}`]({rel})" if rel else f"`{desc.file_name}`"
+                lines.append(f"- {label} ({desc.modality}) — {text}")
         else:
             lines.append("_No source files yet._")
         lines.append("")
@@ -274,16 +415,6 @@ def _index_bypass(
 
     # -- shared helpers ----------------------------------------------------
 
-    @staticmethod
-    def _item_counts_per_category(items: list[MemoryItem], *, database: Database) -> dict[str, int]:
-        in_scope_ids = {item.id for item in items}
-        counts: dict[str, int] = {}
-        relations = getattr(database, "relations", None) or []
-        for relation in relations:
-            if relation.item_id in in_scope_ids:
-                counts[relation.category_id] = counts.get(relation.category_id, 0) + 1
-        return counts
-
     @staticmethod
     def _inline(text: str) -> str:
         """Collapse a value to a single safe line."""
@@ -291,7 +422,7 @@ def _inline(text: str) -> str:
 
     # -- diff / write ------------------------------------------------------
 
-    def _sync(self, artifacts: dict[str, str]) -> ExportResult:
+    def _sync(self, artifacts: dict[str, str | bytes]) -> ExportResult:
         manifest = self._load_manifest()
         result = ExportResult(output_dir=str(self.output_dir))
         new_manifest: dict[str, str] = {}
@@ -304,7 +435,10 @@ def _sync(self, artifacts: dict[str, str]) -> ExportResult:
                 result.unchanged.append(rel_path)
                 continue
             target.parent.mkdir(parents=True, exist_ok=True)
-            target.write_text(content, encoding="utf-8")
+            if isinstance(content, bytes):
+                target.write_bytes(content)
+            else:
+                target.write_text(content, encoding="utf-8")
             result.written.append(rel_path)
 
         for rel_path in manifest:
@@ -347,18 +481,6 @@ def read_memory_body(self) -> str:
             return ""
         return self._strip_chrome(path.read_text(encoding="utf-8"), drop_heading="# Memory")
 
-    def read_skills(self) -> dict[str, str]:
-        """Read existing ``skill/<slug>/SKILL.md`` bodies keyed by slug."""
-        skills: dict[str, str] = {}
-        skill_root = self.output_dir / SKILL_DIRNAME
-        if not skill_root.is_dir():
-            return skills
-        for child in sorted(skill_root.iterdir()):
-            doc = child / SKILL_FILENAME
-            if child.is_dir() and doc.exists():
-                skills[child.name] = self._strip_chrome(doc.read_text(encoding="utf-8"))
-        return skills
-
     @staticmethod
     def _strip_chrome(text: str, *, drop_heading: str | None = None) -> str:
         lines = text.splitlines()
@@ -386,4 +508,4 @@ def _save_manifest(self, manifest: dict[str, str]) -> None:
         manifest_path.write_text(json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8")
 
 
-__all__ = ["ExportResult", "FileDescription", "MemoryFileExporter", "slugify"]
+__all__ = ["ExportResult", "FileDescription", "MemoryFileExporter", "SkillDoc", "slugify"]
diff --git a/src/memu/memory_fs/synthesizer.py b/src/memu/memory_fs/synthesizer.py
index 26a259d2..9d806483 100644
--- a/src/memu/memory_fs/synthesizer.py
+++ b/src/memu/memory_fs/synthesizer.py
@@ -1,27 +1,24 @@
-"""LLM synthesis of MEMORY/SKILL artifacts from the shared description trunk.
+"""LLM synthesis of the MEMORY.md document from the shared description trunk.
 
 This is the optional, opt-in counterpart to the deterministic exporter: instead of
-rendering already-extracted database items/summaries, it feeds the per-source
-multimodal descriptions to an LLM and synthesizes the memory document and skill
-docs directly. ``INDEX.md`` stays deterministic and is handled by the exporter.
+rendering already-extracted category summaries, it feeds the per-source multimodal
+descriptions to an LLM and synthesizes the memory document directly. ``INDEX.md``,
+the ``skill/`` tree, and the root ``SKILL.md`` index stay deterministic and are
+handled by the exporter (the ``skill/`` tree is built from the ``skill``-type
+memory items extracted during memorize, not synthesized here).
 """
 
 from __future__ import annotations
 
-import json
 import re
 from collections.abc import Awaitable, Callable
-from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 
-from memu.memory_fs.exporter import slugify
 from memu.prompts.memory_fs import (
     DESCRIPTIONS_PLACEHOLDER,
     EXISTING_PLACEHOLDER,
     MEMORY_SYNTHESIS_PROMPT,
     MEMORY_UPDATE_PROMPT,
-    SKILL_SYNTHESIS_PROMPT,
-    SKILL_UPDATE_PROMPT,
 )
 
 if TYPE_CHECKING:
@@ -30,82 +27,47 @@
 ChatFn = Callable[[str], Awaitable[str]]
 
 
-@dataclass
-class SynthesisResult:
-    """Synthesized artifact payloads, ready to hand to the exporter."""
-
-    memory_body: str = ""
-    skills: dict[str, str] = field(default_factory=dict)
-
-
 class MemorySynthesizer:
-    """Synthesize MEMORY/SKILL content from multimodal descriptions via an LLM."""
+    """Synthesize the ``MEMORY.md`` body from multimodal descriptions via an LLM."""
 
     def __init__(
         self,
         *,
         memory_prompt: str = MEMORY_SYNTHESIS_PROMPT,
-        skill_prompt: str = SKILL_SYNTHESIS_PROMPT,
         memory_update_prompt: str = MEMORY_UPDATE_PROMPT,
-        skill_update_prompt: str = SKILL_UPDATE_PROMPT,
     ) -> None:
         self._memory_prompt = memory_prompt
-        self._skill_prompt = skill_prompt
         self._memory_update_prompt = memory_update_prompt
-        self._skill_update_prompt = skill_update_prompt
 
-    async def synthesize(self, descriptions: list[FileDescription], *, chat: ChatFn) -> SynthesisResult:
-        """Initialization: build MEMORY/SKILL from scratch over all descriptions."""
+    async def synthesize(self, descriptions: list[FileDescription], *, chat: ChatFn) -> str:
+        """Initialization: build the MEMORY.md body from scratch over all descriptions."""
         formatted = self._format(descriptions)
         if not formatted:
-            return SynthesisResult()
-
+            return ""
         memory_raw = await chat(self._memory_prompt.replace(DESCRIPTIONS_PLACEHOLDER, formatted))
-        skills_raw = await chat(self._skill_prompt.replace(DESCRIPTIONS_PLACEHOLDER, formatted))
-
-        return SynthesisResult(
-            memory_body=self._clean_markdown(memory_raw),
-            skills=self._parse_skills(skills_raw),
-        )
+        return self._clean_markdown(memory_raw)
 
     async def update(
         self,
         descriptions: list[FileDescription],
         *,
         existing_memory: str,
-        existing_skills: dict[str, str],
         chat: ChatFn,
-    ) -> SynthesisResult:
-        """Incremental: merge the changed descriptions into existing artifacts."""
+    ) -> str:
+        """Incremental: merge the changed descriptions into the existing MEMORY.md body."""
         formatted = self._format(descriptions)
         if not formatted:
-            return SynthesisResult(memory_body=existing_memory, skills=dict(existing_skills))
-
+            return existing_memory
         memory_prompt = self._memory_update_prompt.replace(
             EXISTING_PLACEHOLDER, existing_memory.strip() or "(empty)"
         ).replace(DESCRIPTIONS_PLACEHOLDER, formatted)
         memory_raw = await chat(memory_prompt)
-
-        skill_prompt = self._skill_update_prompt.replace(
-            EXISTING_PLACEHOLDER, self._format_existing_skills(existing_skills) or "(none)"
-        ).replace(DESCRIPTIONS_PLACEHOLDER, formatted)
-        upserts = self._parse_skills(await chat(skill_prompt))
-
-        return SynthesisResult(
-            memory_body=self._clean_markdown(memory_raw),
-            skills={**existing_skills, **upserts},
-        )
-
-    @staticmethod
-    def _format_existing_skills(skills: dict[str, str]) -> str:
-        return "\n\n".join(f"## {slug}\n{body}".strip() for slug, body in sorted(skills.items()))
+        return self._clean_markdown(memory_raw)
 
     @staticmethod
     def _format(descriptions: list[FileDescription]) -> str:
         lines = [
-            f"- [{desc.modality}] {desc.url}: {desc.description}"
-            for desc in descriptions
-            if desc.description.strip()
+            f"- [{desc.modality}] {desc.url}: {desc.description}" for desc in descriptions if desc.description.strip()
         ]
         return "\n".join(lines)
 
@@ -117,45 +79,5 @@ def _clean_markdown(raw: str) -> str:
             text = re.sub(r"\n```$", "", text).strip()
         return text
 
-    def _parse_skills(self, raw: str) -> dict[str, str]:
-        payload = self._extract_json_array(raw)
-        if payload is None:
-            return {}
-        try:
-            parsed = json.loads(payload)
-        except (json.JSONDecodeError, TypeError):
-            return {}
-        if not isinstance(parsed, list):
-            return {}
-
-        skills: dict[str, str] = {}
-        used: dict[str, int] = {}
-        for entry in parsed:
-            if not isinstance(entry, dict):
-                continue
-            name = entry.get("name")
-            body = entry.get("body")
-            if not isinstance(name, str) or not isinstance(body, str):
-                continue
-            body = body.strip()
-            if not body:
-                continue
-            base = slugify(name)
-            count = used.get(base, 0)
-            slug = base if count == 0 else f"{base}-{count + 1}"
-            used[base] = count + 1
-            skills[slug] = body
-        return skills
-
-    @staticmethod
-    def _extract_json_array(raw: str) -> str | None:
-        if not raw:
-            return None
-        start = raw.find("[")
-        end = raw.rfind("]")
-        if start == -1 or end == -1 or end <= start:
-            return None
-        return raw[start : end + 1]
-
 
-__all__ = ["ChatFn", "MemorySynthesizer", "SynthesisResult"]
+__all__ = ["ChatFn", "MemorySynthesizer"]
diff --git a/src/memu/prompts/memory_fs/__init__.py b/src/memu/prompts/memory_fs/__init__.py
index 0ece4018..92a04bb1 100644
--- a/src/memu/prompts/memory_fs/__init__.py
+++ b/src/memu/prompts/memory_fs/__init__.py
@@ -1,8 +1,10 @@
-"""Prompts for the optional memory_fs synthesis bypass.
+"""Prompts for the optional MEMORY.md synthesis mode.
 
-Both prompts consume the shared trunk — the per-source multimodal descriptions —
-and synthesize one of the sibling artifacts. The literal token ``__DESCRIPTIONS__``
-is replaced (not ``str.format``) so description text containing braces is safe.
+These prompts consume the shared trunk — the per-source multimodal descriptions —
+and synthesize the ``MEMORY.md`` body. The literal token ``__DESCRIPTIONS__`` is
+replaced (not ``str.format``) so description text containing braces is safe. The
+``skill/`` tree is not synthesized here; it is built by the exporter from the
+``skill``-type memory items extracted during memorize.
 """
 
 from __future__ import annotations
@@ -27,21 +29,6 @@
 __DESCRIPTIONS__
 """
 
-SKILL_SYNTHESIS_PROMPT = """You are extracting reusable skills and tool patterns for an AI agent.
-
-From the source descriptions below, identify concrete, repeatable skills or tool
-usage patterns (what worked, how to repeat it, what to avoid). Ignore one-off
-facts, preferences, or trivia — those belong in the memory document, not here.
-
-Return ONLY a JSON array. Each element is an object:
-  {"name": "kebab-case-skill-name", "body": "Markdown body for this skill"}
-The "body" should be a self-contained Markdown skill document.
-If there are no genuine skills, return an empty array: []
-
-Source descriptions:
-__DESCRIPTIONS__
-"""
-
 MEMORY_UPDATE_PROMPT = """You are maintaining an AI agent's long-term memory document.
 
 Below is the CURRENT memory document, followed by NEW source descriptions that
@@ -63,31 +50,9 @@
 __DESCRIPTIONS__
 """
 
-SKILL_UPDATE_PROMPT = """You are maintaining an AI agent's skill library.
-
-Below are the EXISTING skills (name + body), followed by NEW source descriptions
-that were just added.
-
-Return ONLY a JSON array of skills to add or replace based on the new
-descriptions. Each element is an object:
-  {"name": "kebab-case-skill-name", "body": "Markdown body for this skill"}
-- To revise an existing skill, reuse its exact name and return the full new body.
-- To add a new skill, use a new name.
-- Only include skills actually affected by the new descriptions.
-- If the new descriptions contain nothing skill-worthy, return an empty array: []
-
-EXISTING skills:
-__EXISTING__
-
-NEW source descriptions:
-__DESCRIPTIONS__
-"""
-
 __all__ = [
     "DESCRIPTIONS_PLACEHOLDER",
     "EXISTING_PLACEHOLDER",
     "MEMORY_SYNTHESIS_PROMPT",
     "MEMORY_UPDATE_PROMPT",
-    "SKILL_SYNTHESIS_PROMPT",
-    "SKILL_UPDATE_PROMPT",
 ]
diff --git a/src/memu/prompts/memory_type/__init__.py b/src/memu/prompts/memory_type/__init__.py
index ea584bd4..a8a17ddc 100644
--- a/src/memu/prompts/memory_type/__init__.py
+++ b/src/memu/prompts/memory_type/__init__.py
@@ -1,7 +1,6 @@
 from memu.prompts.memory_type import behavior, event, knowledge, profile, skill, tool
 
-# DEFAULT_MEMORY_TYPES: list[str] = ["profile", "event", "knowledge", "behavior"]
-DEFAULT_MEMORY_TYPES: list[str] = ["profile", "event"]
+DEFAULT_MEMORY_TYPES: list[str] = ["profile", "event", "behavior", "skill", "tool"]
 
 PROMPTS: dict[str, str] = {
     "profile": profile.PROMPT.strip(),
diff --git a/tests/llm/test_grok_provider.py b/tests/llm/test_grok_provider.py
index e5d37933..ecedba67 100644
--- a/tests/llm/test_grok_provider.py
+++ b/tests/llm/test_grok_provider.py
@@ -26,7 +26,6 @@ async def test_client_initialization_with_grok_config(self, mock_async_openai):
             base_url=config.base_url,
             api_key="fake-key",  # In real app, this would be os.getenv(config.api_key)
             chat_model=config.chat_model,
-            embed_model=config.embed_model,
         )
 
         # Assert AsyncOpenAI was called with the correct base_url
diff --git a/tests/rust_entry_test.py b/tests/rust_entry_test.py
deleted file mode 100644
index 76f23a31..00000000
--- a/tests/rust_entry_test.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from memu import _rust_entry
-
-
-def test_rust_entry():
-    assert _rust_entry() == "Hello from memu!"
diff --git a/tests/test_folder_memorize.py b/tests/test_folder_memorize.py
new file mode 100644
index 00000000..3c298776
--- /dev/null
+++ b/tests/test_folder_memorize.py
@@ -0,0 +1,217 @@
+"""Tests for folder-based incremental memorize: scanning, diffing, cascade sync."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from memu.app import MemoryService
+from memu.blob.folder import (
+    MANIFEST_FILENAME,
+    diff_folder,
+    infer_modality,
+    load_manifest,
+    manifest_from_scan,
+    save_manifest,
+    scan_folder,
+)
+
+# -- folder module: scan / modality / manifest / diff ------------------------
+
+
+def test_infer_modality_by_extension() -> None:
+    assert infer_modality("a.json") == "conversation"
+    assert infer_modality("a.txt") == "document"
+    assert infer_modality("a.MD") == "document"
+    assert infer_modality("a.png") == "image"
+    assert infer_modality("a.mp4") == "video"
+    assert infer_modality("a.mp3") == "audio"
+    assert infer_modality("a.unknownext") is None
+
+
+def test_scan_folder_recurses_skips_unknown_and_hidden(tmp_path: Path) -> None:
+    (tmp_path / "a.txt").write_text("alpha", encoding="utf-8")
+    (tmp_path / "nested").mkdir()
+    (tmp_path / "nested" / "b.md").write_text("beta", encoding="utf-8")
+    (tmp_path / "skip.bin").write_text("binary", encoding="utf-8")  # unknown ext
+    (tmp_path / ".hidden.txt").write_text("hidden", encoding="utf-8")  # hidden file
+    (tmp_path / MANIFEST_FILENAME).write_text("{}", encoding="utf-8")  # manifest
+
+    scanned = scan_folder(tmp_path)
+
+    assert set(scanned) == {"a.txt", "nested/b.md"}
+    assert scanned["a.txt"].modality == "document"
+    assert scanned["nested/b.md"].modality == "document"
+    assert scanned["a.txt"].content_hash != scanned["nested/b.md"].content_hash
+
+
+def test_manifest_roundtrip_and_diff(tmp_path: Path) -> None:
+    (tmp_path / "keep.txt").write_text("keep", encoding="utf-8")
+    (tmp_path / "change.txt").write_text("v1", encoding="utf-8")
+    (tmp_path / "gone.txt").write_text("bye", encoding="utf-8")
+
+    first = scan_folder(tmp_path)
+    save_manifest(tmp_path, manifest_from_scan(first))
+    assert load_manifest(tmp_path) == manifest_from_scan(first)
+
+    # Mutate the folder: modify one file, delete another, add a new one.
+    (tmp_path / "change.txt").write_text("v2", encoding="utf-8")
+    (tmp_path / "gone.txt").unlink()
+    (tmp_path / "new.md").write_text("fresh", encoding="utf-8")
+
+    second = scan_folder(tmp_path)
+    diff = diff_folder(second, load_manifest(tmp_path))
+
+    assert [f.rel_path for f in diff.added] == ["new.md"]
+    assert [f.rel_path for f in diff.modified] == ["change.txt"]
+    assert diff.deleted == ["gone.txt"]
+    assert diff.has_changes and diff.has_removals
+
+
+def test_diff_added_only_has_no_removals(tmp_path: Path) -> None:
+    (tmp_path / "a.txt").write_text("a", encoding="utf-8")
+    diff = diff_folder(scan_folder(tmp_path), {})
+    assert [f.rel_path for f in diff.added] == ["a.txt"]
+    assert not diff.has_removals
+
+
+# -- service-level: cascade delete & orchestration ---------------------------
+
+
+def _service() -> MemoryService:
+    return MemoryService(
+        llm_profiles={"default": {"api_key": "test-key"}},
+        database_config={"metadata_store": {"provider": "inmemory"}},
+    )
+
+
+def _seed_resource_with_item(service: MemoryService, *, url: str, category_id: str, user: dict[str, Any]) -> str:
+    """Create a resource + one item + one relation, returning the resource id."""
+    store = service.database
+    res = store.resource_repo.create_resource(
+        url=url,
+        modality="document",
+        local_path=url,
+        caption="cap",
+        embedding=None,
+        user_data=dict(user),
+    )
+    item = store.memory_item_repo.create_item(
+        resource_id=res.id,
+        memory_type="profile",
+        summary=f"summary for {url}",
+        embedding=[0.0],
+        user_data=dict(user),
+    )
+    store.category_item_repo.link_item_category(item.id, category_id, dict(user))
+    return res.id
+
+
+async def test_cascade_delete_removes_resource_items_relations(monkeypatch) -> None:
+    service = _service()
+    store = service.database
+    user = {"user_id": "u1"}
+    ctx = service._get_context()
+    category_id = "cat-1"
+
+    keep_id = _seed_resource_with_item(service, url="/folder/keep.txt", category_id=category_id, user=user)
+    drop_id = _seed_resource_with_item(service, url="/folder/drop.txt", category_id=category_id, user=user)
+
+    patched: list[dict[str, Any]] = []
+
+    async def _fake_patch(updates, *, ctx, store, llm_client=None) -> None:
+        patched.append(updates)
+
+    monkeypatch.setattr(service, "_patch_category_summaries", _fake_patch)
+
+    removed = await service._cascade_delete_by_urls(
+        {"/folder/drop.txt"}, ctx=ctx, store=store, user_scope=user
+    )
+
+    assert [r.id for r in removed] == [drop_id]
+    remaining = store.resource_repo.list_resources(where=user)
+    assert keep_id in remaining
+    assert drop_id not in remaining
+    # The dropped resource's item and relation are gone; the kept one's survive.
+    items = store.memory_item_repo.list_items(where=user)
+    assert all(it.resource_id == keep_id for it in items.values())
+    relations = store.category_item_repo.list_relations(where=user)
+    assert len(relations) == 1
+    # Discarded content was fed to the summary recompute as (before, None).
+    assert patched and category_id in patched[0]
+    assert patched[0][category_id][1] is None
+
+
+async def test_memorize_folder_sync_add_modify_delete(tmp_path: Path, monkeypatch) -> None:
+    service = _service()
+    store = service.database
+    user = {"user_id": "u1"}
+
+    # Avoid LLM-dependent paths; exercise scan -> diff -> cascade -> manifest.
+    async def _noop_categories(*a, **k) -> None:
+        return None
+
+    async def _noop_patch(updates, *, ctx, store, llm_client=None) -> None:
+        return None
+
+    builds: list[Any] = []
+
+    async def _fake_build(user_scope, *, changed=None) -> dict[str, Any]:
+        builds.append(changed)
+        return {"written": [], "unchanged": [], "removed": []}
+
+    async def _fake_memorize_one(*, resource_url, modality, user_scope, ctx, store) -> dict[str, Any]:
+        res = store.resource_repo.create_resource(
+            url=resource_url,
+            modality=modality,
+            local_path=resource_url,
+            caption="cap",
+            embedding=None,
+            user_data=dict(user_scope or {}),
+        )
+        store.memory_item_repo.create_item(
+            resource_id=res.id,
+            memory_type="profile",
+            summary=f"summary {resource_url}",
+            embedding=[0.0],
+            user_data=dict(user_scope or {}),
+        )
+        return {"resources": [res], "response": {"items": [{"summary": "x"}]}}
+
+    monkeypatch.setattr(service, "_ensure_categories_ready", _noop_categories)
+    monkeypatch.setattr(service, "_patch_category_summaries", _noop_patch)
+    monkeypatch.setattr(service, "_build_memory_files", _fake_build)
+    monkeypatch.setattr(service, "_memorize_one", _fake_memorize_one)
+
+    (tmp_path / "a.txt").write_text("a-v1", encoding="utf-8")
+    (tmp_path / "b.md").write_text("b-v1", encoding="utf-8")
+
+    # First sync: both files added.
+    first = await service.memorize(folder=str(tmp_path), user=user)
+    assert sorted(first["added"]) == ["a.txt", "b.md"]
+    assert first["modified"] == [] and first["deleted"] == []
+    assert len(store.resource_repo.list_resources(where=user)) == 2
+    # Pure additions => incremental build (changed list passed, not None).
+    assert builds[-1] is not None
+    # Manifest persisted in the folder.
+    manifest_path = tmp_path / MANIFEST_FILENAME
+    assert manifest_path.exists()
+    assert set(json.loads(manifest_path.read_text(encoding="utf-8"))) == {"a.txt", "b.md"}
+
+    # Second sync: modify a.txt, delete b.md, add c.txt.
+    (tmp_path / "a.txt").write_text("a-v2", encoding="utf-8")
+    (tmp_path / "b.md").unlink()
+    (tmp_path / "c.txt").write_text("c-v1", encoding="utf-8")
+
+    second = await service.memorize(folder=str(tmp_path), user=user)
+    assert second["added"] == ["c.txt"]
+    assert second["modified"] == ["a.txt"]
+    assert second["deleted"] == ["b.md"]
+
+    root = tmp_path.resolve()
+    urls = {r.url for r in store.resource_repo.list_resources(where=user).values()}
+    assert urls == {str(root / "a.txt"), str(root / "c.txt")}
+    # Removals present => full rebuild (changed=None).
+    assert builds[-1] is None
+    assert set(json.loads(manifest_path.read_text(encoding="utf-8"))) == {"a.txt", "c.txt"}
diff --git a/tests/test_inmemory.py b/tests/test_inmemory.py
index 250f15a2..67cd9d64 100644
--- a/tests/test_inmemory.py
+++ b/tests/test_inmemory.py
@@ -8,7 +8,7 @@ async def main():
     api_key = os.environ.get("OPENAI_API_KEY")
     # dashscope_api_key = os.environ.get("DASHSCOPE_API_KEY")
     # voyage_api_key = os.environ.get("VOYAGE_API_KEY")
-    file_path = os.path.abspath("example/example_conversation.json")
+    folder = os.path.abspath("examples/resources/conversations")
 
     print("\n" + "=" * 60)
     print("[INMEMORY] Starting test...")
@@ -37,7 +37,7 @@ async def main():
 
     # Memorize
     print("\n[INMEMORY] Memorizing...")
-    memory = await service.memorize(resource_url=file_path, modality="conversation", user={"user_id": "123"})
+    memory = await service.memorize(folder=folder, user={"user_id": "123"})
     for cat in memory.get("categories", []):
         print(f"  - {cat.get('name')}: {(cat.get('summary') or '')[:80]}...")
 
diff --git a/tests/test_memory_files.py b/tests/test_memory_files.py
index cf7a6e55..185bd14f 100644
--- a/tests/test_memory_files.py
+++ b/tests/test_memory_files.py
@@ -3,29 +3,43 @@
 import json
 from pathlib import Path
 
-import pytest
-
 from memu.app import MemoryService
 from memu.memory_fs import MemoryFileExporter
 from memu.memory_fs.exporter import MANIFEST_NAME
 
-_SKILL_BODY = "---\nname: pour-over\n---\n# Pour-over brewing\nUse a 1:16 ratio."
+# A skill-type memory item's summary is a full skill profile with frontmatter.
+# The exporter parses the frontmatter for the folder slug (name) and the index
+# description, and renders the body verbatim into skill/<slug>/SKILL.md.
+_SKILL_PROFILE = """---
+name: pour-over-brewing
+description: Brew pour-over coffee at a 1:16 ratio.
+category: preferences
+---
+
+# Pour-over brewing
+
+Use a 1:16 coffee-to-water ratio for a clean cup.
+"""
 
 
 def _build_service(output_dir: Path) -> MemoryService:
     return MemoryService(
         llm_profiles={"default": {"api_key": "test-key"}},
         database_config={"metadata_store": {"provider": "inmemory"}},
-        memory_files_config={"enabled": True, "output_dir": str(output_dir)},
+        memory_files_config={"output_dir": str(output_dir)},
     )
 
 
-def _seed(service: MemoryService, *, user: dict[str, str]) -> dict[str, str]:
+def _seed(service: MemoryService, *, user: dict[str, str], source: Path | None = None) -> dict[str, str]:
     store = service.database
+    # A readable local_path makes the exporter copy the raw bytes into resource/.
+    if source is not None:
+        source.write_text("Raw notes: the user loves pour-over coffee.\n", encoding="utf-8")
+    local_path = str(source) if source is not None else "coffee.txt"
     resource = store.resource_repo.create_resource(
         url="docs/coffee.txt",
         modality="document",
-        local_path="coffee.txt",
+        local_path=local_path,
         caption="Notes about the user's coffee preferences.",
         embedding=None,
         user_data=dict(user),
@@ -37,40 +51,65 @@ def _seed(service: MemoryService, *, user: dict[str, str]) -> dict[str, str]:
         user_data=dict(user),
     )
     store.memory_category_repo.update_category(category_id=category.id, summary="The user likes pour-over coffee.")
+    # A skill extracted during memorize drives the skill/ tree.
     skill = store.memory_item_repo.create_item(
         resource_id=resource.id,
         memory_type="skill",
-        summary=_SKILL_BODY,
-        embedding=[0.1, 0.2],
+        summary=_SKILL_PROFILE,
+        embedding=[0.3, 0.4],
         user_data=dict(user),
     )
-    store.category_item_repo.link_item_category(skill.id, category.id, user_data=dict(user))
     return {"category_id": category.id, "resource_id": resource.id, "skill_id": skill.id}
 
 
 async def test_export_writes_readme_layout(tmp_path: Path) -> None:
-    service = _build_service(tmp_path)
-    _seed(service, user={"user_id": "u1"})
+    service = _build_service(tmp_path / "out")
+    _seed(service, user={"user_id": "u1"}, source=tmp_path / "coffee.txt")
+    out = tmp_path / "out"
 
     result = await service.export_memory_files(user={"user_id": "u1"})
 
     assert result["changed"] is True
     assert "INDEX.md" in result["written"]
     assert "MEMORY.md" in result["written"]
-    assert "skill/pour-over/SKILL.md" in result["written"]
-
-    memory_text = (tmp_path / "MEMORY.md").read_text(encoding="utf-8")
-    assert "## Preferences" in memory_text
-    assert "The user likes pour-over coffee." in memory_text
-
-    index_text = (tmp_path / "INDEX.md").read_text(encoding="utf-8")
-    assert "docs/coffee.txt" in index_text
+    assert "SKILL.md" in result["written"]
+    assert "skill/pour-over-brewing/SKILL.md" in result["written"]
+    # The raw source file is copied verbatim and the category gets its own file.
+    assert "resource/coffee.txt" in result["written"]
+    assert "memory/preferences.md" in result["written"]
+
+    memory_text = (out / "MEMORY.md").read_text(encoding="utf-8")
+    # MEMORY.md is a deterministic overview that links to each category file.
+    assert "## Overview" in memory_text
+    assert "**Preferences**" in memory_text
+    assert "memory/preferences.md" in memory_text
+
+    # The per-category file holds the actual summary content.
+    category_text = (out / "memory" / "preferences.md").read_text(encoding="utf-8")
+    assert "# Preferences" in category_text
+    assert "The user likes pour-over coffee." in category_text
+
+    # The raw bytes are copied into resource/.
+    raw_text = (out / "resource" / "coffee.txt").read_text(encoding="utf-8")
+    assert "loves pour-over coffee" in raw_text
+
+    index_text = (out / "INDEX.md").read_text(encoding="utf-8")
+    # INDEX.md indexes the raw source files under resource/, not folders/skills.
+    assert "## Files" in index_text
+    assert "[`coffee.txt`](resource/coffee.txt)" in index_text
     assert "coffee preferences" in index_text
-    assert "[pour-over](./skill/pour-over/SKILL.md)" in index_text
-    assert "**Preferences**" in index_text
+    assert "skill/pour-over" not in index_text
 
-    skill_text = (tmp_path / "skill" / "pour-over" / "SKILL.md").read_text(encoding="utf-8")
-    assert "Pour-over brewing" in skill_text
+    # The root SKILL.md is a table of contents over the skill/ tree, driven by the
+    # frontmatter parsed from each skill-type memory item.
+    skill_index = (out / "SKILL.md").read_text(encoding="utf-8")
+    assert "skill/pour-over-brewing/SKILL.md" in skill_index
+    assert "Brew pour-over coffee at a 1:16 ratio." in skill_index
+
+    # The skill profile is rendered verbatim into its own folder.
+    skill_text = (out / "skill" / "pour-over-brewing" / "SKILL.md").read_text(encoding="utf-8")
+    assert "# Pour-over brewing" in skill_text
+    assert "1:16 coffee-to-water ratio" in skill_text
 
 
 async def test_export_is_idempotent_until_data_changes(tmp_path: Path) -> None:
@@ -85,14 +124,16 @@ async def test_export_is_idempotent_until_data_changes(tmp_path: Path) -> None:
     assert second["written"] == []
     assert "MEMORY.md" in second["unchanged"]
 
-    # Changing only a folder summary touches MEMORY.md but not INDEX.md (a TOC).
+    # Changing only a category summary touches its memory/<slug>.md file, but not
+    # MEMORY.md (an overview of names/descriptions) or INDEX.md (a TOC).
     service.database.memory_category_repo.update_category(
         category_id=ids["category_id"],
         summary="The user now prefers espresso.",
     )
     third = await service.export_memory_files(user={"user_id": "u1"})
     assert third["changed"] is True
-    assert "MEMORY.md" in third["written"]
+    assert "memory/preferences.md" in third["written"]
+    assert "MEMORY.md" in third["unchanged"]
     assert "INDEX.md" in third["unchanged"]
 
 
@@ -101,13 +142,14 @@ async def test_export_removes_stale_skill_and_prunes_dirs(tmp_path: Path) -> Non
     _seed(service, user={"user_id": "u1"})
 
     await service.export_memory_files(user={"user_id": "u1"})
-    assert (tmp_path / "skill" / "pour-over" / "SKILL.md").exists()
+    assert (tmp_path / "skill" / "pour-over-brewing" / "SKILL.md").exists()
 
+    # Removing the skill-type memory item drops its skill folder.
     service.database.memory_item_repo.clear_items(where={"user_id": "u1"})
     result = await service.export_memory_files(user={"user_id": "u1"})
 
-    assert "skill/pour-over/SKILL.md" in result["removed"]
-    assert not (tmp_path / "skill" / "pour-over").exists()
+    assert "skill/pour-over-brewing/SKILL.md" in result["removed"]
+    assert not (tmp_path / "skill" / "pour-over-brewing").exists()
 
 
 async def test_export_respects_user_scope(tmp_path: Path) -> None:
@@ -127,22 +169,6 @@ async def test_export_respects_user_scope(tmp_path: Path) -> None:
     assert "Secret" not in memory_text
 
 
-async def test_export_disabled_raises(tmp_path: Path) -> None:
-    service = MemoryService(
-        llm_profiles={"default": {"api_key": "test-key"}},
-        database_config={"metadata_store": {"provider": "inmemory"}},
-    )
-    with pytest.raises(RuntimeError, match="disabled"):
-        await service.export_memory_files(user={"user_id": "u1"})
-
-
-def test_skill_name_from_frontmatter_and_fallbacks(tmp_path: Path) -> None:
-    exporter = MemoryFileExporter(str(tmp_path))
-    assert exporter._skill_name("---\nname: My Skill\n---\nbody", fallback="x") == "my-skill"
-    assert exporter._skill_name("# Heading Title\nbody", fallback="x") == "heading-title"
-    assert exporter._skill_name("plain text only", fallback="skill-abc123") == "skill-abc123"
-
-
 def test_exporter_manifest_roundtrip(tmp_path: Path) -> None:
     exporter = MemoryFileExporter(str(tmp_path))
     exporter._save_manifest({"MEMORY.md": "abc"})
diff --git a/tests/test_memory_fs_synthesis.py b/tests/test_memory_fs_synthesis.py
index 538ed4be..78701401 100644
--- a/tests/test_memory_fs_synthesis.py
+++ b/tests/test_memory_fs_synthesis.py
@@ -6,15 +6,19 @@
 from memu.memory_fs import FileDescription, MemoryFileExporter, MemorySynthesizer
 
 _MEMORY_MD = "## Profile\nThe user is a coffee enthusiast.\n\n## Preferences\nPrefers pour-over."
-_SKILLS_JSON = '[{"name": "Pour Over", "body": "# Pour-over\\nUse a 1:16 ratio."}]'
+_UPDATE_MEMORY_MD = "## Profile\nThe user is a coffee enthusiast.\n\n## Preferences\nLikes oat milk."
+
+
+def _skill_profile(name: str, description: str, body: str) -> str:
+    return f"---\nname: {name}\ndescription: {description}\ncategory: technical_skills\n---\n\n{body}\n"
 
 
 class _FakeChatClient:
-    """Stand-in LLM client: returns canned memory/skill responses by prompt shape."""
+    """Stand-in LLM client: returns init vs update MEMORY.md bodies by prompt shape."""
 
     async def chat(self, prompt: str, system_prompt: str | None = None) -> str:
-        if "JSON array" in prompt:
-            return _SKILLS_JSON
+        if "CURRENT memory document" in prompt:
+            return _UPDATE_MEMORY_MD
         return _MEMORY_MD
 
 
@@ -29,121 +33,147 @@ def _descriptions() -> list[FileDescription]:
     ]
 
 
-async def test_synthesizer_parses_memory_and_skills() -> None:
-    synth = MemorySynthesizer()
-    result = await synth.synthesize(_descriptions(), chat=_FakeChatClient().chat)
+# -- MemorySynthesizer (MEMORY.md only) --------------------------------------
+
 
-    assert "## Profile" in result.memory_body
-    assert "pour-over" in result.memory_body.lower()
-    assert result.skills == {"pour-over": "# Pour-over\nUse a 1:16 ratio."}
+async def test_synthesizer_synthesizes_memory_body() -> None:
+    synth = MemorySynthesizer()
+    body = await synth.synthesize(_descriptions(), chat=_FakeChatClient().chat)
+    assert "## Profile" in body
+    assert "coffee enthusiast" in body.lower()
 
 
 async def test_synthesizer_empty_when_no_descriptions() -> None:
     synth = MemorySynthesizer()
-    result = await synth.synthesize([], chat=_FakeChatClient().chat)
-    assert result.memory_body == ""
-    assert result.skills == {}
+    assert await synth.synthesize([], chat=_FakeChatClient().chat) == ""
 
 
-def test_synthesizer_helpers() -> None:
+async def test_synthesizer_update_merges_into_existing() -> None:
+    synth = MemorySynthesizer()
+    body = await synth.update(
+        _descriptions(),
+        existing_memory="## Profile\nOld profile.",
+        chat=_FakeChatClient().chat,
+    )
+    assert "Likes oat milk." in body
+
+
+async def test_synthesizer_update_noop_without_descriptions() -> None:
+    synth = MemorySynthesizer()
+    body = await synth.update([], existing_memory="## Profile\nKeep me.", chat=_FakeChatClient().chat)
+    assert body == "## Profile\nKeep me."
+
+
+def test_synthesizer_cleans_code_fences() -> None:
     synth = MemorySynthesizer()
     assert synth._clean_markdown("```markdown\n# Hi\n```") == "# Hi"
-    assert synth._parse_skills("garbage, no array") == {}
-    assert synth._parse_skills("[]") == {}
-    assert synth._parse_skills('[{"name": "A", "body": ""}]') == {}
-    duplicate = '[{"name": "A", "body": "x"}, {"name": "A", "body": "y"}]'
-    assert synth._parse_skills(duplicate) == {"a": "x", "a-2": "y"}
 
 
-def test_exporter_override_path(tmp_path: Path) -> None:
-    service = MemoryService(
-        llm_profiles={"default": {"api_key": "test-key"}},
-        database_config={"metadata_store": {"provider": "inmemory"}},
-    )
-    exporter = MemoryFileExporter(str(tmp_path))
+# -- skill/ rendered from skill-type memory items ----------------------------
 
-    result = exporter.export(
-        service.database,
-        memory_body="## Profile\nSynthesized.",
-        skills={"brewing": "# Brewing\nbody"},
-    )
 
-    assert "MEMORY.md" in result.written
-    assert "skill/brewing/SKILL.md" in result.written
-    assert "Synthesized." in (tmp_path / "MEMORY.md").read_text(encoding="utf-8")
-    assert "# Brewing" in (tmp_path / "skill" / "brewing" / "SKILL.md").read_text(encoding="utf-8")
-    assert "[brewing](./skill/brewing/SKILL.md)" in (tmp_path / "INDEX.md").read_text(encoding="utf-8")
+def _seed_skill(service: MemoryService, *, name: str, description: str, body: str, user: dict[str, str]) -> None:
+    resource = service.database.resource_repo.create_resource(
+        url=f"docs/{name}.txt",
+        modality="document",
+        local_path=f"{name}.txt",
+        caption=f"Source for {name}.",
+        embedding=None,
+        user_data=dict(user),
+    )
+    service.database.memory_item_repo.create_item(
+        resource_id=resource.id,
+        memory_type="skill",
+        summary=_skill_profile(name, description, body),
+        embedding=[0.1, 0.2],
+        user_data=dict(user),
+    )
 
 
-async def test_service_synthesis_wiring(tmp_path: Path, monkeypatch) -> None:
-    service = MemoryService(
+def _service(tmp_path: Path, **memory_files: object) -> MemoryService:
+    return MemoryService(
         llm_profiles={"default": {"api_key": "test-key"}},
         database_config={"metadata_store": {"provider": "inmemory"}},
-        memory_files_config={"enabled": True, "output_dir": str(tmp_path), "synthesize": True},
-    )
-    service.database.resource_repo.create_resource(
-        url="docs/coffee.txt",
-        modality="document",
-        local_path="coffee.txt",
-        caption="The user likes pour-over coffee.",
-        embedding=None,
-        user_data={"user_id": "u1"},
+        memory_files_config={"output_dir": str(tmp_path), **memory_files},
     )
-    monkeypatch.setattr(service, "_get_llm_client", lambda *a, **k: _FakeChatClient())
 
-    result = await service.export_memory_files(user={"user_id": "u1"})
 
-    assert "MEMORY.md" in result["written"]
-    assert "skill/pour-over/SKILL.md" in result["written"]
-    memory_text = (tmp_path / "MEMORY.md").read_text(encoding="utf-8")
-    assert "The user is a coffee enthusiast." in memory_text
+async def test_export_renders_skill_tree_from_items(tmp_path: Path) -> None:
+    service = _service(tmp_path)
+    _seed_skill(
+        service,
+        name="canary-deployment",
+        description="Gradually shift traffic with monitoring.",
+        body="# Canary deployment\nShift traffic slowly.",
+        user={"user_id": "u1"},
+    )
 
+    result = await service.export_memory_files(user={"user_id": "u1"})
 
-# -- incremental update path -------------------------------------------------
+    assert "skill/canary-deployment/SKILL.md" in result["written"]
+    skill_text = (tmp_path / "skill" / "canary-deployment" / "SKILL.md").read_text(encoding="utf-8")
+    assert "# Canary deployment" in skill_text
+    index_text = (tmp_path / "SKILL.md").read_text(encoding="utf-8")
+    assert "Gradually shift traffic with monitoring." in index_text
 
-_UPDATE_MEMORY_MD = "## Profile\nThe user is a coffee enthusiast.\n\n## Preferences\nLikes oat milk."
-_UPDATE_SKILLS_JSON = '[{"name": "Latte Art", "body": "# Latte art\\nPour slowly."}]'
 
+def test_exporter_parses_frontmatter_and_dedupes_slugs(tmp_path: Path) -> None:
+    service = MemoryService(
+        llm_profiles={"default": {"api_key": "test-key"}},
+        database_config={"metadata_store": {"provider": "inmemory"}},
+    )
+    exporter = MemoryFileExporter(str(tmp_path))
 
-class _InitUpdateChatClient:
-    """Returns init vs update payloads based on which prompt template fired."""
+    # No frontmatter -> name/description fall back to derived values.
+    assert exporter._parse_skill_frontmatter("# Just a heading\nbody") == ("", "")
+    name, description = exporter._parse_skill_frontmatter(_skill_profile("brew", "Brew coffee.", "body"))
+    assert name == "brew"
+    assert description == "Brew coffee."
 
-    async def chat(self, prompt: str, system_prompt: str | None = None) -> str:
-        is_update = "CURRENT memory document" in prompt or "EXISTING skills" in prompt
-        if "JSON array" in prompt:
-            return _UPDATE_SKILLS_JSON if is_update else _SKILLS_JSON
-        return _UPDATE_MEMORY_MD if is_update else _MEMORY_MD
+    items = [
+        service.database.memory_item_repo.create_item(
+            resource_id="r1",
+            memory_type="skill",
+            summary=_skill_profile("brew", "Brew coffee.", "first"),
+            embedding=[0.1],
+            user_data={},
+        ),
+        service.database.memory_item_repo.create_item(
+            resource_id="r2",
+            memory_type="skill",
+            summary=_skill_profile("brew", "Brew tea too.", "second"),
+            embedding=[0.1],
+            user_data={},
+        ),
+        service.database.memory_item_repo.create_item(
+            resource_id="r3",
+            memory_type="event",
+            summary="not a skill",
+            embedding=[0.1],
+            user_data={},
+        ),
+    ]
+    skills = exporter._skills_from_items(items)
+    assert set(skills) == {"brew", "brew-2"}
 
 
-async def test_synthesizer_update_merges_into_existing() -> None:
-    synth = MemorySynthesizer()
-    result = await synth.update(
-        _descriptions(),
-        existing_memory="## Profile\nOld profile.",
-        existing_skills={"pour-over": "# Pour-over\nUse a 1:16 ratio."},
-        chat=_InitUpdateChatClient().chat,
+def test_exporter_memory_body_override(tmp_path: Path) -> None:
+    service = MemoryService(
+        llm_profiles={"default": {"api_key": "test-key"}},
+        database_config={"metadata_store": {"provider": "inmemory"}},
     )
+    exporter = MemoryFileExporter(str(tmp_path))
 
-    assert "Likes oat milk." in result.memory_body
-    # Existing skill is preserved, the new one is upserted alongside it.
-    assert result.skills["pour-over"] == "# Pour-over\nUse a 1:16 ratio."
-    assert result.skills["latte-art"] == "# Latte art\nPour slowly."
-
+    result = exporter.export(service.database, memory_body="## Profile\nSynthesized.")
 
-async def test_synthesizer_update_noop_without_descriptions() -> None:
-    synth = MemorySynthesizer()
-    existing_skills = {"pour-over": "# Pour-over"}
-    result = await synth.update(
-        [],
-        existing_memory="## Profile\nKeep me.",
-        existing_skills=existing_skills,
-        chat=_InitUpdateChatClient().chat,
-    )
-    assert result.memory_body == "## Profile\nKeep me."
-    assert result.skills == existing_skills
+    assert "MEMORY.md" in result.written
+    assert "Synthesized." in (tmp_path / "MEMORY.md").read_text(encoding="utf-8")
+    # No skill items -> the root SKILL.md is still written as an empty index.
+    assert "SKILL.md" in result.written
+    assert "_No skills yet._" in (tmp_path / "SKILL.md").read_text(encoding="utf-8")
 
 
-def test_exporter_read_helpers_roundtrip(tmp_path: Path) -> None:
+def test_exporter_read_memory_body_roundtrip(tmp_path: Path) -> None:
     service = MemoryService(
         llm_profiles={"default": {"api_key": "test-key"}},
         database_config={"metadata_store": {"provider": "inmemory"}},
@@ -151,47 +181,53 @@ def test_exporter_read_helpers_roundtrip(tmp_path: Path) -> None:
     exporter = MemoryFileExporter(str(tmp_path))
 
     assert exporter.artifacts_exist() is False
-    exporter.export(
-        service.database,
-        memory_body="## Profile\nSynthesized body.",
-        skills={"brewing": "# Brewing\nbody"},
-    )
+    exporter.export(service.database, memory_body="## Profile\nSynthesized body.")
 
     assert exporter.artifacts_exist() is True
     assert exporter.read_memory_body() == "## Profile\nSynthesized body."
-    assert exporter.read_skills() == {"brewing": "# Brewing\nbody"}
 
 
-async def test_service_init_then_update(tmp_path: Path, monkeypatch) -> None:
-    service = MemoryService(
-        llm_profiles={"default": {"api_key": "test-key"}},
-        database_config={"metadata_store": {"provider": "inmemory"}},
-        memory_files_config={
-            "enabled": True,
-            "output_dir": str(tmp_path),
-            "synthesize": True,
-            "update_on_memorize": True,
-        },
+# -- service synthesis wiring ------------------------------------------------
+
+
+async def test_service_synthesis_wiring(tmp_path: Path, monkeypatch) -> None:
+    service = _service(tmp_path, synthesize=True)
+    _seed_skill(
+        service,
+        name="pour-over",
+        description="Brew pour-over coffee.",
+        body="# Pour-over\nUse a 1:16 ratio.",
+        user={"user_id": "u1"},
     )
-    monkeypatch.setattr(service, "_get_llm_client", lambda *a, **k: _InitUpdateChatClient())
+    monkeypatch.setattr(service, "_get_llm_client", lambda *a, **k: _FakeChatClient())
 
-    repo = service.database.resource_repo
-    repo.create_resource(
-        url="docs/coffee.txt",
-        modality="document",
-        local_path="coffee.txt",
-        caption="The user likes pour-over coffee.",
-        embedding=None,
-        user_data={"user_id": "u1"},
+    result = await service.export_memory_files(user={"user_id": "u1"})
+
+    assert "MEMORY.md" in result["written"]
+    assert "skill/pour-over/SKILL.md" in result["written"]
+    memory_text = (tmp_path / "MEMORY.md").read_text(encoding="utf-8")
+    assert "The user is a coffee enthusiast." in memory_text
+
+
+async def test_service_init_then_update(tmp_path: Path, monkeypatch) -> None:
+    service = _service(tmp_path, synthesize=True)
+    monkeypatch.setattr(service, "_get_llm_client", lambda *a, **k: _FakeChatClient())
+
+    _seed_skill(
+        service,
+        name="pour-over",
+        description="Brew pour-over coffee.",
+        body="# Pour-over\nUse a 1:16 ratio.",
+        user={"user_id": "u1"},
     )
 
-    # First pass: no tree yet -> initialization from the full store.
+    # First pass: no tree yet -> initialization synthesizes the MEMORY.md body.
     init = await service.export_memory_files(user={"user_id": "u1"})
     assert "skill/pour-over/SKILL.md" in init["written"]
     assert "coffee enthusiast" in (tmp_path / "MEMORY.md").read_text(encoding="utf-8")
 
-    # Second pass: tree exists -> incremental update from the changed resource only.
-    changed = repo.create_resource(
+    # Second pass: tree exists -> incremental MEMORY.md update from the changed resource.
+    changed = service.database.resource_repo.create_resource(
         url="docs/latte.txt",
         modality="document",
         local_path="latte.txt",
@@ -203,6 +239,6 @@ async def test_service_init_then_update(tmp_path: Path, monkeypatch) -> None:
 
     memory_text = (tmp_path / "MEMORY.md").read_text(encoding="utf-8")
     assert "Likes oat milk." in memory_text
-    assert "skill/latte-art/SKILL.md" in (updated["written"] + updated["unchanged"])
-    # The originally-initialized skill survives the incremental update.
+    # The skill folder, rebuilt from the persisted skill item, survives.
     assert (tmp_path / "skill" / "pour-over" / "SKILL.md").exists()
+    assert "MEMORY.md" in (updated["written"] + updated["unchanged"])
diff --git a/tests/test_openrouter.py b/tests/test_openrouter.py
index ba4b47c4..34ae446a 100644
--- a/tests/test_openrouter.py
+++ b/tests/test_openrouter.py
@@ -43,12 +43,10 @@ def _print_items(items, max_items=3):
             print(f"    - [{memory_type}] {summary}...")
 
 
-async def _test_memorize(service, file_path, output_data):
+async def _test_memorize(service, folder, output_data):
     """Test conversation memorization."""
     print("\n[OPENROUTER] Test 1: Memorizing conversation...")
-    memory = await service.memorize(
-        resource_url=file_path, modality="conversation", user={"user_id": "openrouter_test_user"}
-    )
+    memory = await service.memorize(folder=folder, user={"user_id": "openrouter_test_user"})
     items_count = len(memory.get("items", []))
     categories_count = len(memory.get("categories", []))
 
@@ -89,9 +87,9 @@ async def test_openrouter_full_workflow():
     if not api_key:
         pytest.skip("OPENROUTER_API_KEY environment variable not set")
 
-    file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "example", "example_conversation.json"))
-    if not os.path.exists(file_path):
-        pytest.skip(f"Test file not found: {file_path}")
+    folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "examples", "resources", "conversations"))
+    if not os.path.isdir(folder):
+        pytest.skip(f"Test folder not found: {folder}")
 
     output_data: dict[str, Any] = {}
 
@@ -123,7 +121,7 @@ async def test_openrouter_full_workflow():
         {"role": "user", "content": {"text": "What foods does the user like to eat?"}},
     ]
 
-    await _test_memorize(service, file_path, output_data)
+    await _test_memorize(service, folder, output_data)
     await _test_retrieve(service, queries, "rag", 2, output_data)
     await _test_retrieve(service, queries, "llm", 3, output_data)
 
diff --git a/tests/test_postgres.py b/tests/test_postgres.py
index 8b375a30..ab11917f 100644
--- a/tests/test_postgres.py
+++ b/tests/test_postgres.py
@@ -8,7 +8,7 @@ async def main():
     api_key = os.environ.get("OPENAI_API_KEY")
     # Default port 5432; use 5433 if 5432 is occupied
     postgres_dsn = os.environ.get("POSTGRES_DSN", "postgresql+psycopg://postgres:postgres@127.0.0.1:5432/memu")
-    file_path = os.path.abspath("tests/example/example_conversation.json")
+    folder = os.path.abspath("examples/resources/conversations")
 
     print("\n" + "=" * 60)
     print("[POSTGRES] Starting test...")
@@ -30,7 +30,7 @@ async def main():
 
     # Memorize
     print("\n[POSTGRES] Memorizing...")
-    memory = await service.memorize(resource_url=file_path, modality="conversation", user={"user_id": "123"})
+    memory = await service.memorize(folder=folder, user={"user_id": "123"})
     for cat in memory.get("categories", []):
         print(f"  - {cat.get('name')}: {(cat.get('summary') or '')[:80]}...")
 
diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py
index 3031c56b..50df9e68 100644
--- a/tests/test_sqlite.py
+++ b/tests/test_sqlite.py
@@ -23,7 +23,7 @@ def _print_results(title: str, result: dict) -> None:
 async def main():
     """Test with SQLite storage."""
     api_key = os.environ.get("OPENAI_API_KEY")
-    file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "example", "example_conversation.json"))
+    folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "examples", "resources", "conversations"))
 
     # Create a temporary SQLite database file
     with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
@@ -52,7 +52,7 @@ async def main():
 
         # Memorize
         print("\n[SQLITE] Memorizing...")
-        memory = await service.memorize(resource_url=file_path, modality="conversation", user={"user_id": "123"})
+        memory = await service.memorize(folder=folder, user={"user_id": "123"})
         for cat in memory.get("categories", []):
             print(f"  - {cat.get('name')}: {(cat.get('summary') or '')[:80]}...")
 
diff --git a/uv.lock b/uv.lock
index 76e7b0c6..01864e3f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.13"
 
 [[package]]
@@ -458,6 +458,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" },
     { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" },
     { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" },
+    { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" },
     { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" },
     { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" },
     { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" },
@@ -465,6 +466,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d7/7c/f0a6d0ede2c7bf092d00bc83ad5bafb7e6ec9b4aab2fbdfa6f134dc73327/greenlet-3.3.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:60c2ef0f578afb3c8d92ea07ad327f9a062547137afe91f38408f08aacab667f", size = 275671, upload-time = "2025-12-04T14:23:05.267Z" },
     { url = "https://files.pythonhosted.org/packages/44/06/dac639ae1a50f5969d82d2e3dd9767d30d6dbdbab0e1a54010c8fe90263c/greenlet-3.3.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5d554d0712ba1de0a6c94c640f7aeba3f85b3a6e1f2899c11c2c0428da9365", size = 646360, upload-time = "2025-12-04T14:50:10.026Z" },
     { url = "https://files.pythonhosted.org/packages/e0/94/0fb76fe6c5369fba9bf98529ada6f4c3a1adf19e406a47332245ef0eb357/greenlet-3.3.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3a898b1e9c5f7307ebbde4102908e6cbfcb9ea16284a3abe15cab996bee8b9b3", size = 658160, upload-time = "2025-12-04T14:57:45.41Z" },
+    { url = "https://files.pythonhosted.org/packages/93/79/d2c70cae6e823fac36c3bbc9077962105052b7ef81db2f01ec3b9bf17e2b/greenlet-3.3.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dcd2bdbd444ff340e8d6bdf54d2f206ccddbb3ccfdcd3c25bf4afaa7b8f0cf45", size = 671388, upload-time = "2025-12-04T15:07:15.789Z" },
     { url = "https://files.pythonhosted.org/packages/b8/14/bab308fc2c1b5228c3224ec2bf928ce2e4d21d8046c161e44a2012b5203e/greenlet-3.3.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5773edda4dc00e173820722711d043799d3adb4f01731f40619e07ea2750b955", size = 660166, upload-time = "2025-12-04T14:26:05.099Z" },
     { url = "https://files.pythonhosted.org/packages/4b/d2/91465d39164eaa0085177f61983d80ffe746c5a1860f009811d498e7259c/greenlet-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ac0549373982b36d5fd5d30beb8a7a33ee541ff98d2b502714a09f1169f31b55", size = 1615193, upload-time = "2025-12-04T15:04:27.041Z" },
     { url = "https://files.pythonhosted.org/packages/42/1b/83d110a37044b92423084d52d5d5a3b3a73cafb51b547e6d7366ff62eff1/greenlet-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d198d2d977460358c3b3a4dc844f875d1adb33817f0613f663a656f463764ccc", size = 1683653, upload-time = "2025-12-04T14:27:32.366Z" },
@@ -472,6 +474,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a0/66/bd6317bc5932accf351fc19f177ffba53712a202f9df10587da8df257c7e/greenlet-3.3.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d6ed6f85fae6cdfdb9ce04c9bf7a08d666cfcfb914e7d006f44f840b46741931", size = 282638, upload-time = "2025-12-04T14:25:20.941Z" },
     { url = "https://files.pythonhosted.org/packages/30/cf/cc81cb030b40e738d6e69502ccbd0dd1bced0588e958f9e757945de24404/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9125050fcf24554e69c4cacb086b87b3b55dc395a8b3ebe6487b045b2614388", size = 651145, upload-time = "2025-12-04T14:50:11.039Z" },
     { url = "https://files.pythonhosted.org/packages/9c/ea/1020037b5ecfe95ca7df8d8549959baceb8186031da83d5ecceff8b08cd2/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:87e63ccfa13c0a0f6234ed0add552af24cc67dd886731f2261e46e241608bee3", size = 654236, upload-time = "2025-12-04T14:57:47.007Z" },
+    { url = "https://files.pythonhosted.org/packages/69/cc/1e4bae2e45ca2fa55299f4e85854606a78ecc37fead20d69322f96000504/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2662433acbca297c9153a4023fe2161c8dcfdcc91f10433171cf7e7d94ba2221", size = 662506, upload-time = "2025-12-04T15:07:16.906Z" },
     { url = "https://files.pythonhosted.org/packages/57/b9/f8025d71a6085c441a7eaff0fd928bbb275a6633773667023d19179fe815/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3c6e9b9c1527a78520357de498b0e709fb9e2f49c3a513afd5a249007261911b", size = 653783, upload-time = "2025-12-04T14:26:06.225Z" },
     { url = "https://files.pythonhosted.org/packages/f6/c7/876a8c7a7485d5d6b5c6821201d542ef28be645aa024cfe1145b35c120c1/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:286d093f95ec98fdd92fcb955003b8a3d054b4e2cab3e2707a5039e7b50520fd", size = 1614857, upload-time = "2025-12-04T15:04:28.484Z" },
     { url = "https://files.pythonhosted.org/packages/4f/dc/041be1dff9f23dac5f48a43323cd0789cb798342011c19a248d9c9335536/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c10513330af5b8ae16f023e8ddbfb486ab355d04467c4679c5cfe4659975dd9", size = 1676034, upload-time = "2025-12-04T14:27:33.531Z" },
@@ -929,14 +932,13 @@ wheels = [
 
 [[package]]
 name = "memu-py"
-version = "1.5.0"
+version = "1.5.1"
 source = { editable = "." }
 dependencies = [
     { name = "alembic" },
     { name = "defusedxml" },
     { name = "httpx" },
     { name = "langchain-core" },
-    { name = "lazyllm" },
     { name = "numpy" },
     { name = "openai" },
     { name = "pendulum" },
@@ -952,6 +954,9 @@ langgraph = [
     { name = "langchain-core" },
     { name = "langgraph" },
 ]
+lazyllm = [
+    { name = "lazyllm" },
+]
 postgres = [
     { name = "pgvector" },
     { name = "sqlalchemy", extra = ["postgresql-psycopgbinary"] },
@@ -1002,7 +1007,7 @@ requires-dist = [
     { name = "langchain-core", specifier = ">=1.2.7" },
     { name = "langchain-core", marker = "extra == 'langgraph'", specifier = ">=0.1.0" },
     { name = "langgraph", marker = "extra == 'langgraph'", specifier = ">=0.0.10" },
-    { name = "lazyllm", specifier = ">=0.7.3" },
+    { name = "lazyllm", marker = "extra == 'lazyllm'", specifier = ">=0.7.3" },
     { name = "numpy", specifier = ">=2.3.4" },
     { name = "openai", specifier = ">=2.8.0" },
     { name = "pendulum", specifier = ">=3.1.0" },
@@ -1011,7 +1016,7 @@ requires-dist = [
     { name = "sqlalchemy", extras = ["postgresql-psycopgbinary"], marker = "extra == 'postgres'", specifier = ">=2.0.36" },
     { name = "sqlmodel", specifier = ">=0.0.27" },
 ]
-provides-extras = ["postgres", "langgraph", "claude"]
+provides-extras = ["postgres", "langgraph", "claude", "lazyllm"]
 
 [package.metadata.requires-dev]
 dev = [