Harden messaging channel handoff

2026-05-25 11:10:18 +00:00 · 2026-05-03 12:59:07 +08:00
parent 20ef643bb8
commit c7e52084ba
10 changed files with 2145 additions and 150 deletions
@@ -255,6 +255,14 @@ def read_importable_agent_session_rows(
        parent_expr = _optional_col('parent_session_id', session_cols)
        ended_expr = _optional_col('ended_at', session_cols)
        end_reason_expr = _optional_col('end_reason', session_cols)
+        user_id_expr = _optional_col('user_id', session_cols)
+        chat_id_expr = _optional_col('chat_id', session_cols)
+        chat_type_expr = _optional_col('chat_type', session_cols)
+        thread_id_expr = _optional_col('thread_id', session_cols)
+        session_key_expr = _optional_col('session_key', session_cols)
+        origin_chat_id_expr = _optional_col('origin_chat_id', session_cols)
+        origin_user_id_expr = _optional_col('origin_user_id', session_cols)
+        platform_expr = _optional_col('platform', session_cols)

        where_clauses = ["s.source IS NOT NULL"]
        params: list[str] = []
@@ -269,6 +277,14 @@ def read_importable_agent_session_rows(
            f"""
            SELECT s.id, s.title, s.model, s.message_count,
                   s.started_at, s.source,
+                   {user_id_expr},
+                   {chat_id_expr},
+                   {chat_type_expr},
+                   {thread_id_expr},
+                   {session_key_expr},
+                   {origin_chat_id_expr},
+                   {origin_user_id_expr},
+                   {platform_expr},
                   {parent_expr},
                   {ended_expr},
                   {end_reason_expr},
@@ -1068,6 +1068,12 @@ def get_cli_sessions() -> list:
                'profile': profile,
                'source_tag': _source,
                'raw_source': row.get('raw_source'),
+                'user_id': row.get('user_id'),
+                'chat_id': row.get('chat_id') or row.get('origin_chat_id'),
+                'chat_type': row.get('chat_type'),
+                'thread_id': row.get('thread_id'),
+                'session_key': row.get('session_key'),
+                'platform': row.get('platform'),
                'session_source': row.get('session_source'),
                'source_label': row.get('source_label'),
                'parent_session_id': row.get('parent_session_id'),
@@ -1075,6 +1081,8 @@ def get_cli_sessions() -> list:
                'parent_source': row.get('parent_source'),
                'relationship_type': row.get('relationship_type'),
                '_parent_lineage_root_id': row.get('_parent_lineage_root_id'),
+                'end_reason': row.get('end_reason'),
+                'actual_message_count': row.get('actual_message_count'),
                '_lineage_root_id': row.get('_lineage_root_id'),
                '_lineage_tip_id': row.get('_lineage_tip_id'),
                '_compression_segment_count': row.get('_compression_segment_count'),
@@ -15,6 +15,7 @@ import sys
 import threading
 import time
 import uuid
+import re
 from pathlib import Path
 from urllib.parse import parse_qs
 from api.agent_sessions import MESSAGING_SOURCES
@@ -748,7 +749,6 @@ def _resolve_effective_session_model_for_display(session) -> str:
    )
    return effective_model or original_model

-
 def _resolve_effective_session_model_provider_for_display(session) -> str | None:
    original_model = getattr(session, "model", None) or ""
    _model, provider, _changed = _resolve_compatible_session_model_state(
@@ -1691,6 +1691,11 @@ def handle_get(handler, parsed) -> bool:
            _t1 = _time.monotonic()
            s = get_session(sid, metadata_only=(not load_messages))
            _clear_stale_stream_state(s)
+            cli_meta = _lookup_cli_session_metadata(sid)
+            is_messaging_session = _is_messaging_session_record(s) or _is_messaging_session_record(cli_meta)
+            cli_messages = []
+            if is_messaging_session:
+                cli_messages = get_cli_session_messages(sid)
            _t2 = _time.monotonic()
            effective_model = (
                _resolve_effective_session_model_for_display(s)
@@ -1703,7 +1708,13 @@ def handle_get(handler, parsed) -> bool:
                else None
            )
            _t3 = _time.monotonic()
-            _all_msgs = s.messages if load_messages else []
+            if load_messages:
+                if is_messaging_session and cli_messages:
+                    _all_msgs = cli_messages
+                else:
+                    _all_msgs = s.messages
+            else:
+                _all_msgs = []
            if load_messages:
                if msg_before is not None:
                    # Scroll-to-top paging: msg_before is a 0-based index into
@@ -1748,6 +1759,8 @@ def handle_get(handler, parsed) -> bool:
                "threshold_tokens": getattr(s, "threshold_tokens", 0) or 0,
                "last_prompt_tokens": getattr(s, "last_prompt_tokens", 0) or 0,
            }
+            if cli_meta and _is_messaging_session_record(cli_meta):
+                raw = _merge_cli_sidebar_metadata(raw, cli_meta)
            # Signal to the frontend that older messages were omitted.
            # For msg_before paging, compare against the filtered set,
            # not the full list — otherwise we signal truncation even when
@@ -1783,13 +1796,9 @@ def handle_get(handler, parsed) -> bool:
            return resp
        except KeyError:
            # Not a WebUI session -- try CLI store
+            cli_meta = _lookup_cli_session_metadata(sid)
            msgs = get_cli_session_messages(sid)
            if msgs:
-                cli_meta = None
-                for cs in get_cli_sessions():
-                    if cs["session_id"] == sid:
-                        cli_meta = cs
-                        break
                sess = {
                    "session_id": sid,
                    "title": (cli_meta or {}).get("title", "CLI Session"),
@@ -1799,15 +1808,21 @@ def handle_get(handler, parsed) -> bool:
                    "created_at": (cli_meta or {}).get("created_at", 0),
                    "updated_at": (cli_meta or {}).get("updated_at", 0),
                    "last_message_at": (cli_meta or {}).get("last_message_at")
-                    or (cli_meta or {}).get("updated_at", 0),
+                    or (cli_meta or {}).get("updated_at", 0)
+                    or (msgs[-1] if msgs else {"timestamp": 0}).get("timestamp", 0),
                    "pinned": False,
                    "archived": False,
                    "project_id": None,
                    "profile": (cli_meta or {}).get("profile"),
                    "is_cli_session": True,
+                    "source_tag": (cli_meta or {}).get("source_tag"),
+                    "raw_source": (cli_meta or {}).get("raw_source"),
+                    "session_source": (cli_meta or {}).get("session_source"),
+                    "source_label": (cli_meta or {}).get("source_label"),
                    "messages": msgs,
                    "tool_calls": [],
                }
+                sess = _merge_cli_sidebar_metadata(sess, cli_meta)
                return j(handler, {"session": redact_session_data(sess)})
            return bad(handler, "Session not found", 404)

@@ -1852,14 +1867,17 @@ def handle_get(handler, parsed) -> bool:
            cli = get_cli_sessions()
            cli_by_id = {s["session_id"]: s for s in cli}
            for s in webui_sessions:
-                if not s.get("is_cli_session"):
-                    continue
                meta = cli_by_id.get(s.get("session_id"))
                if not meta:
                    continue
-                for key in ("source_tag", "raw_source", "session_source", "source_label"):
-                    if not s.get(key) and meta.get(key):
-                        s[key] = meta[key]
+                if _is_messaging_session_record(meta):
+                    s.update(_merge_cli_sidebar_metadata(s, meta))
+                    if s.get("session_id") != meta.get("session_id"):
+                        s["session_id"] = meta.get("session_id")
+                else:
+                    for key in ("source_tag", "raw_source", "session_source", "source_label"):
+                        if not s.get(key) and meta.get(key):
+                            s[key] = meta[key]
            webui_ids = {s["session_id"] for s in webui_sessions}
            from api.models import _hide_from_default_sidebar as _cron_hide
            deduped_cli = [s for s in cli
@@ -3036,26 +3054,55 @@ def handle_post(handler, parsed) -> bool:
        try:
            s = get_session(sid)
        except KeyError:
-            if not _is_messaging_session_id(sid):
+            cli_meta = _lookup_cli_session_metadata(sid)
+            if not cli_meta:
                return bad(handler, "Session not found", 404)
-            msgs = get_cli_session_messages(sid)
-            if not msgs:
-                return bad(handler, "Session not found", 404)
-            cli_meta = next((cs for cs in get_cli_sessions() if cs["session_id"] == sid), {})
-            s = import_cli_session(
-                sid,
-                cli_meta.get("title") or title_from(msgs, "CLI Session"),
-                msgs,
-                cli_meta.get("model") or "unknown",
-                profile=cli_meta.get("profile"),
-                created_at=cli_meta.get("created_at"),
-                updated_at=cli_meta.get("updated_at"),
-            )
-            s.is_cli_session = True
-            s.source_tag = cli_meta.get("source_tag")
-            s.raw_source = cli_meta.get("raw_source") or cli_meta.get("source_tag")
-            s.session_source = cli_meta.get("session_source")
-            s.source_label = cli_meta.get("source_label")
+            if _is_messaging_session_record(cli_meta):
+                s = Session(
+                    session_id=sid,
+                    title=cli_meta.get("title") or title_from(get_cli_session_messages(sid), "CLI Session"),
+                    workspace=get_last_workspace(),
+                    messages=[],
+                    model=cli_meta.get("model") or "unknown",
+                    created_at=cli_meta.get("created_at"),
+                    updated_at=cli_meta.get("updated_at"),
+                )
+                s.is_cli_session = True
+                s.source_tag = cli_meta.get("source_tag")
+                s.raw_source = cli_meta.get("raw_source") or cli_meta.get("source_tag")
+                s.session_source = cli_meta.get("session_source")
+                s.source_label = cli_meta.get("source_label")
+                s.user_id = cli_meta.get("user_id")
+                s.chat_id = cli_meta.get("chat_id")
+                s.chat_type = cli_meta.get("chat_type")
+                s.thread_id = cli_meta.get("thread_id")
+                s.session_key = cli_meta.get("session_key")
+                s.platform = cli_meta.get("platform")
+                s.save(touch_updated_at=False)
+            else:
+                msgs = get_cli_session_messages(sid)
+                if not msgs:
+                    return bad(handler, "Session not found", 404)
+                s = import_cli_session(
+                    sid,
+                    cli_meta.get("title") or title_from(msgs, "CLI Session"),
+                    msgs,
+                    cli_meta.get("model") or "unknown",
+                    profile=cli_meta.get("profile"),
+                    created_at=cli_meta.get("created_at"),
+                    updated_at=cli_meta.get("updated_at"),
+                )
+                s.is_cli_session = True
+                s.source_tag = cli_meta.get("source_tag")
+                s.raw_source = cli_meta.get("raw_source") or cli_meta.get("source_tag")
+                s.session_source = cli_meta.get("session_source")
+                s.source_label = cli_meta.get("source_label")
+                s.user_id = cli_meta.get("user_id")
+                s.chat_id = cli_meta.get("chat_id")
+                s.chat_type = cli_meta.get("chat_type")
+                s.thread_id = cli_meta.get("thread_id")
+                s.session_key = cli_meta.get("session_key")
+                s.platform = cli_meta.get("platform")
        with _get_session_agent_lock(sid):
            s.archived = bool(body.get("archived", True))
            s.save(touch_updated_at=False)
@@ -5579,6 +5626,203 @@ def _handle_conversation_rounds(handler, body):
    })


+def _build_handoff_summary_tool_message(
+    sid: str,
+    summary: str,
+    channel: str | None,
+    rounds: int | None = None,
+    fallback: bool = False,
+) -> dict:
+    """Build a compact tool-role transcript marker for persistence."""
+    now = time.time()
+    return {
+        "role": "tool",
+        # Keep this intentionally empty so API-history sanitization drops it from
+        # model context (it is display-only data).
+        "tool_call_id": "",
+        "name": "handoff_summary",
+        "timestamp": now,
+        "_ts": now,
+        "content": json.dumps({
+            "_handoff_summary_card": True,
+            "session_id": sid,
+            "summary": str(summary or "").strip(),
+            "channel": (str(channel or "").strip() or None),
+            "rounds": rounds,
+            "fallback": bool(fallback),
+            "generated_at": now,
+        }, ensure_ascii=False),
+    }
+
+
+def _extract_handoff_summary_payload(message: dict) -> dict | None:
+    """Return a normalized handoff-summary payload if *message* is a tool marker."""
+    if not isinstance(message, dict):
+        return None
+    if message.get("role") != "tool" or message.get("name") != "handoff_summary":
+        return None
+
+    content = message.get("content")
+    if isinstance(content, dict):
+        payload = content
+    else:
+        try:
+            payload = json.loads(content or "")
+        except Exception:
+            return None
+
+    if not isinstance(payload, dict) or not payload.get("_handoff_summary_card"):
+        return None
+    if payload.get("session_id") is None:
+        return None
+    return {
+        "session_id": str(payload.get("session_id")),
+        "summary": str(payload.get("summary", "")),
+        "channel": payload.get("channel"),
+        "rounds": payload.get("rounds"),
+        "fallback": bool(payload.get("fallback")),
+        "_handoff_summary_card": True,
+    }
+
+
+def _is_matching_handoff_summary_message(existing: dict, target: dict) -> bool:
+    """Return True when two message payloads represent the same handoff summary."""
+    existing_payload = _extract_handoff_summary_payload(existing)
+    target_payload = _extract_handoff_summary_payload(target)
+    if not existing_payload or not target_payload:
+        return False
+    return (
+        existing_payload.get("session_id") == target_payload.get("session_id") and
+        existing_payload.get("summary") == target_payload.get("summary") and
+        existing_payload.get("channel") == target_payload.get("channel") and
+        existing_payload.get("rounds") == target_payload.get("rounds") and
+        existing_payload.get("fallback") == target_payload.get("fallback") and
+        existing_payload.get("_handoff_summary_card") == target_payload.get("_handoff_summary_card")
+    )
+
+
+def _is_matching_handoff_summary_content(content: object, target_payload: dict | None) -> bool:
+    """Return True if DB content JSON matches an expected handoff summary payload."""
+    if target_payload is None:
+        return False
+    try:
+        payload = json.loads(content or "")
+    except Exception:
+        return False
+    if not isinstance(payload, dict):
+        return False
+    if payload.get("session_id") is None:
+        return False
+    return (
+        payload.get("_handoff_summary_card") is True and
+        str(payload.get("session_id")) == str(target_payload.get("session_id")) and
+        str(payload.get("summary", "")) == str(target_payload.get("summary", "")) and
+        payload.get("channel") == target_payload.get("channel") and
+        payload.get("rounds") == target_payload.get("rounds") and
+        bool(payload.get("fallback")) == bool(target_payload.get("fallback"))
+    )
+
+
+def _persist_handoff_summary_locally(sid: str, message: dict) -> bool:
+    """Persist a handoff summary marker into a local WebUI session file."""
+    try:
+        from api.models import get_session
+
+        s = get_session(sid)
+    except KeyError:
+        return False
+
+    try:
+        if s.messages and _is_matching_handoff_summary_message(s.messages[-1], message):
+            return True
+        s.messages.append(message)
+        s.save()
+        return True
+    except Exception as e:
+        logger.warning("Failed to persist handoff summary marker in local session %s: %s", sid, e)
+        return False
+
+
+def _persist_handoff_summary_to_state_db(sid: str, message: dict) -> bool:
+    """Persist a handoff summary marker into CLI sessions state.db.
+
+    This keeps summary cards available after hard-refresh for imported gateway
+    sessions that are not in local session JSON yet.
+    """
+    import os
+
+    try:
+        import sqlite3
+    except ImportError:
+        return False
+
+    try:
+        from api.profiles import get_active_hermes_home
+
+        hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
+    except Exception:
+        hermes_home = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))).expanduser().resolve()
+
+    db_path = hermes_home / "state.db"
+    if not db_path.exists():
+        return False
+
+    ts = message.get("timestamp", time.time())
+    content = message.get("content", "")
+    if not isinstance(content, str):
+        content = json.dumps(content, ensure_ascii=False)
+
+    marker_payload = _extract_handoff_summary_payload(message)
+    try:
+        with sqlite3.connect(str(db_path)) as conn:
+            try:
+                if marker_payload is not None:
+                    cur = conn.execute(
+                        "SELECT content FROM messages WHERE session_id = ? AND role = 'tool' "
+                        "ORDER BY rowid DESC LIMIT 1",
+                        (sid,),
+                    )
+                    row = cur.fetchone()
+                    if row is not None and _is_matching_handoff_summary_content(row[0], marker_payload):
+                        return True
+            except Exception:
+                # If tail-read fails, continue with a best-effort write.
+                logger.debug("Unable to read tail handoff marker from state.db for %s", sid)
+
+            conn.execute(
+                "INSERT INTO messages (session_id, role, content, timestamp) "
+                "VALUES (?, 'tool', ?, ?)",
+                (sid, content, ts),
+            )
+            # Keep session row message_count/last-activity aligned with displayed
+            # transcript length. session rows are optional in some test DBs, so
+            # this update is best-effort.
+            conn.execute(
+                "UPDATE sessions SET message_count = COALESCE(message_count, 0) + 1 "
+                "WHERE id = ?",
+                (sid,),
+            )
+            conn.commit()
+        return True
+    except Exception as e:
+        logger.warning("Failed to persist handoff summary marker in state.db for %s: %s", sid, e)
+        return False
+
+
+def _persist_handoff_summary(sid: str, summary: str, channel: str | None, rounds: int | None, fallback: bool = False) -> dict:
+    """Persist a handoff summary marker across local/session backends."""
+    marker = _build_handoff_summary_tool_message(sid, summary, channel, rounds, fallback)
+    is_messaging_session = _is_messaging_session_id(sid)
+    if is_messaging_session:
+        _persist_handoff_summary_to_state_db(sid, marker)
+        _persist_handoff_summary_locally(sid, marker)
+        return marker
+    persisted_local = _persist_handoff_summary_locally(sid, marker)
+    if persisted_local:
+        return marker
+    return marker if _persist_handoff_summary_to_state_db(sid, marker) else marker
+
+
 def _handle_handoff_summary(handler, body):
    """Generate an on-demand handoff summary for a gateway session.

@@ -5642,42 +5886,138 @@ def _handle_handoff_summary(handler, body):
    if len(msgs) < 2:
        return bad(handler, "Not enough messages to summarize.", 400)

+    def _extract_handoff_text(raw_content):
+        if isinstance(raw_content, list):
+            return " ".join(
+                str(p.get("text") or p.get("content") or "")
+                for p in raw_content
+                if isinstance(p, dict)
+            ).strip()
+        return str(raw_content or "").strip()
+
+    def _contains_chinese(text):
+        return any("\u4e00" <= ch <= "\u9fff" for ch in str(text))
+
+    transcript_is_chinese = any(
+        _contains_chinese(_extract_handoff_text(m.get("content")))
+        for m in msgs
+    )
    # Build a lightweight conversation transcript for the LLM.
    lines = []
    for m in msgs:
        role = m.get("role", "")
-        content = m.get("content", "")
-        if isinstance(content, list):
-            content = " ".join(
-                str(p.get("text") or p.get("content") or "")
-                for p in content
-                if isinstance(p, dict)
-            )
+        content = _extract_handoff_text(m.get("content"))
        content = str(content or "").strip()[:1000]
        if role in ("user", "assistant") and content:
-            label = "User" if role == "user" else "Agent"
-            lines.append(f"{label}: {content}")
+            lines.append(content)
    transcript = "\n".join(lines)

    def _fallback_handoff_summary(items):
        """Return a deterministic summary when LLM summary generation is unavailable."""
-        recent = []
+        user_points = []
+        assistant_points = []
+
+        def _summarize_snippet(raw_text, max_len=78):
+            text = " ".join(str(raw_text or "").split()).strip()
+            if not text:
+                return ""
+            if len(text) <= max_len:
+                return text
+            return text[: max_len - 1].rstrip() + "…"
+
        for m in items:
            role = m.get("role", "")
-            content = m.get("content", "")
-            if isinstance(content, list):
-                content = " ".join(
-                    str(p.get("text") or p.get("content") or "")
-                    for p in content
-                    if isinstance(p, dict)
-                )
-            content = " ".join(str(content or "").split()).strip()
+            content = _summarize_snippet(_extract_handoff_text(m.get("content")), 82)
            if role in ("user", "assistant") and content:
-                label = "User" if role == "user" else "Agent"
-                recent.append(f"- {label}: {content[:180]}")
-        if not recent:
-            return "Recent external-channel messages were found, but no readable text was available."
-        return "Recent external-channel activity:\n" + "\n".join(recent[-6:])
+                if role == "user":
+                    user_points.append(content)
+                else:
+                    assistant_points.append(content)
+        if not user_points and not assistant_points:
+            return (
+                "近期可读文本不足，无法生成更完整的交接摘要，请补充一条消息后重试。"
+                if transcript_is_chinese
+                else "Not enough readable text to create a useful handoff summary; please send one more message and retry."
+            )
+
+        if transcript_is_chinese:
+            bullets = []
+            if user_points:
+                bullets.append(f"- 你刚讨论了：{user_points[-1]}。")
+            if assistant_points:
+                bullets.append(f"- 助手已回复：{assistant_points[-1]}。")
+            if len(user_points) + len(assistant_points) >= 2:
+                bullets.append("- 当前对话存在尚未确认的后续动作。")
+            else:
+                bullets.append("- 当前信息偏少，建议补充关键点后再切换。")
+            return "\n".join(bullets)
+
+        bullets = []
+        if user_points:
+            bullets.append(f"- You asked: {user_points[-1]}.")
+        if assistant_points:
+            bullets.append(f"- The assistant responded: {assistant_points[-1]}.")
+        if len(user_points) + len(assistant_points) >= 2:
+            bullets.append("- There is pending context to continue next.")
+        else:
+            bullets.append("- The conversation is still short; add one more turn before summarizing.")
+        return "\n".join(bullets)
+
+    def _summary_output_incomplete(text):
+        """Best-effort guard for truncated summaries when LLM signals are unavailable."""
+        if not isinstance(text, str):
+            text = str(text or "")
+        text = text.strip()
+        if not text:
+            return True
+        if text.endswith("...") or text.endswith("…"):
+            return True
+        lines = [line.strip() for line in text.splitlines() if line.strip()]
+        if not lines:
+            return True
+        last_line = lines[-1]
+        if re.search(r"[。！？；!?.；]$", last_line):
+            return False
+        if len(last_line) >= 56 and not re.search(r"\b(and|or|so|then|because|if|when|but|so|as)\b$", last_line, re.IGNORECASE):
+            return True
+        return bool(re.search(r"\b(and|or|but|so|because|if|when)$", last_line, re.IGNORECASE))
+
+    def _agent_summary_incomplete(summary_result):
+        if not isinstance(summary_result, dict):
+            return True
+        reason = (summary_result.get("finish_reason") or "").strip().lower()
+        if reason == "length":
+            return True
+        stop_reason = (summary_result.get("stop_reason") or "").strip().lower()
+        if stop_reason in {"max_tokens", "length"}:
+            return True
+        return _summary_output_incomplete(summary_result.get("text", ""))
+
+    def _resolve_handoff_channel_label():
+        channel_label = None
+        try:
+            from api.models import get_session as _get_session, get_cli_sessions
+
+            session_meta = _get_session(sid)
+            channel_label = (
+                session_meta.source_label
+                or session_meta.raw_source
+                or session_meta.source_tag
+                or session_meta.session_source
+            )
+            if not channel_label:
+                for candidate in get_cli_sessions():
+                    if candidate.get("session_id") == sid:
+                        channel_label = (
+                            candidate.get("source_label")
+                            or candidate.get("raw_source")
+                            or candidate.get("source_tag")
+                            or candidate.get("source")
+                        )
+                        break
+        except Exception:
+            pass
+        return channel_label

    def _agent_text_completion(agent, system_prompt, user_text, max_tokens=700):
        """Use the current Hermes Agent transport without mutating conversation history."""
@@ -5685,6 +6025,12 @@ def _handle_handoff_summary(handler, body):
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_text},
        ]
+        result = {
+            "text": "",
+            "finish_reason": None,
+            "stop_reason": None,
+            "incomplete": True,
+        }
        disabled_reasoning = {"enabled": False}
        previous_reasoning = getattr(agent, "reasoning_config", None)
        try:
@@ -5695,7 +6041,9 @@ def _handle_handoff_summary(handler, body):
                codex_kwargs["max_output_tokens"] = max_tokens
                resp = agent._run_codex_stream(codex_kwargs)
                assistant_message, _ = agent._normalize_codex_response(resp)
-                return str((assistant_message.content or "") if assistant_message else "").strip()
+                result["text"] = str((assistant_message.content or "") if assistant_message else "").strip()
+                result["incomplete"] = _summary_output_incomplete(result["text"])
+                return result

            if getattr(agent, "api_mode", "") == "anthropic_messages":
                from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
@@ -5715,7 +6063,9 @@ def _handle_handoff_summary(handler, body):
                    resp,
                    strip_tool_prefix=getattr(agent, "_is_anthropic_oauth", False),
                )
-                return str((assistant_message.content or "") if assistant_message else "").strip()
+                result["text"] = str((assistant_message.content or "") if assistant_message else "").strip()
+                result["incomplete"] = _summary_output_incomplete(result["text"])
+                return result

            api_kwargs = agent._build_api_kwargs(api_messages)
            api_kwargs.pop("tools", None)
@@ -5730,11 +6080,15 @@ def _handle_handoff_summary(handler, body):
            )
            choice = (getattr(resp, "choices", None) or [None])[0]
            msg = getattr(choice, "message", None) if choice is not None else None
-            return str(getattr(msg, "content", "") or "").strip()
+            result["text"] = str(getattr(msg, "content", "") or "").strip()
+            result["finish_reason"] = getattr(choice, "finish_reason", None)
+            result["stop_reason"] = getattr(choice, "stop_reason", None)
+            result["incomplete"] = _agent_summary_incomplete(result)
+            return result
        finally:
            agent.reasoning_config = previous_reasoning

-    # Call LLM for summary.
+        # Call LLM for summary.
    try:
        import api.config as _cfg
        import hermes_cli.runtime_provider as _runtime_provider
@@ -5765,9 +6119,20 @@ def _handle_handoff_summary(handler, body):
            logger.warning("resolve_runtime_provider failed for handoff summary: %s", _e)

        if not resolved_api_key:
+            summary_text = _fallback_handoff_summary(msgs)
+            try:
+                _persist_handoff_summary(
+                    sid,
+                    summary_text,
+                    _resolve_handoff_channel_label(),
+                    rounds,
+                    fallback=True,
+                )
+            except Exception:
+                pass
            return j(handler, {
                "ok": True,
-                "summary": _fallback_handoff_summary(msgs),
+                "summary": summary_text,
                "message_count": len(msgs),
                "rounds": rounds,
                "fallback": True,
@@ -5785,21 +6150,46 @@ def _handle_handoff_summary(handler, body):
        )

        summary_system_prompt = (
-            "You are summarizing a conversation that happened on an external channel "
-            "(WeChat/Telegram) so the user can quickly catch up when switching to Web UI.\n\n"
+            "You are summarizing an external-channel conversation so a Web UI reader "
+            "can quickly catch up after switching contexts.\n\n"
+            "Only use the latest messages, and never copy raw transcript lines.\n"
+            "Do not output role labels (no “你:” / “assistant:” / “user:” / “assistant”).\n"
+            "Use direct 2–5 bullet points in the conversation language.\n"
+            "English: speak using “you”.\n"
+            "中文: 使用“你”。\n\n"
            "Focus on:\n"
            "- Unfinished tasks or action items\n"
            "- Pending questions that need replies\n"
            "- Key decisions made\n"
            "- Open disagreements or TBD items\n\n"
-            "Keep it concise — 2-5 bullet points max. "
            "If the conversation is purely casual with no actionable items, "
            "say so in one sentence."
        )
        summary_user_text = f"Conversation transcript:\n{transcript}"

        try:
-            summary_text = _agent_text_completion(agent, summary_system_prompt, summary_user_text)
+            first_pass = _agent_text_completion(
+                agent,
+                summary_system_prompt,
+                summary_user_text,
+                max_tokens=700,
+            )
+            summary_text = first_pass.get("text") if isinstance(first_pass, dict) else ""
+            if _agent_summary_incomplete(first_pass):
+                second_pass = _agent_text_completion(
+                    agent,
+                    summary_system_prompt,
+                    summary_user_text,
+                    max_tokens=1400,
+                )
+                summary_text = second_pass.get("text") if isinstance(second_pass, dict) else ""
+                if _agent_summary_incomplete(second_pass):
+                    summary_text = _fallback_handoff_summary(msgs)
+                    fallback = True
+                else:
+                    fallback = False
+            else:
+                fallback = False
        finally:
            try:
                agent.release_clients()
@@ -5807,19 +6197,43 @@ def _handle_handoff_summary(handler, body):
                pass
        if not summary_text:
            summary_text = _fallback_handoff_summary(msgs)
+            fallback = True
+        elif _summary_output_incomplete(summary_text):
+            if not fallback:
+                fallback = True
+
+        channel_label = _resolve_handoff_channel_label()
+        _persist_handoff_summary(
+            sid,
+            summary_text,
+            channel_label,
+            rounds,
+            fallback=fallback,
+        )

        return j(handler, {
            "ok": True,
            "summary": summary_text,
            "message_count": len(msgs),
            "rounds": rounds,
-            "fallback": summary_text.startswith("Recent external-channel activity:"),
+            "fallback": fallback,
        })
    except Exception as e:
        logger.warning("Handoff summary generation failed: %s", e)
+        summary_text = _fallback_handoff_summary(msgs)
+        try:
+            _persist_handoff_summary(
+                sid,
+                summary_text,
+                _resolve_handoff_channel_label(),
+                rounds,
+                fallback=True,
+            )
+        except Exception:
+            pass
        return j(handler, {
            "ok": True,
-            "summary": _fallback_handoff_summary(msgs),
+            "summary": summary_text,
            "message_count": len(msgs),
            "rounds": rounds,
            "fallback": True,
@@ -5894,6 +6308,40 @@ def _handle_memory_write(handler, body):
    return j(handler, {"ok": True, "section": section, "path": str(target)})


+def _normalize_message_for_import_refresh(message: object) -> object:
+    """Normalize message payloads for import refresh prefix checks.
+
+    The strict dict comparison previously failed when existing messages held
+    integer timestamps while refreshed messages held floating-point timestamps.
+    Strip timing keys before comparison so we can safely treat semantic
+    prefixes as equivalent.
+    """
+    if not isinstance(message, dict):
+        return message
+    normalized = dict(message)
+    normalized.pop("timestamp", None)
+    normalized.pop("_ts", None)
+    return normalized
+
+
+def _is_messages_refresh_prefix_match(existing_messages: list, fresh_messages: list) -> bool:
+    """Return True when existing_messages is a prefix of fresh_messages by value.
+
+    This is a semantic comparison intended for import refresh, not deep
+    structural equality. It intentionally ignores timing fields that may differ
+    in type/precision between storage layers.
+    """
+    if not isinstance(existing_messages, list) or not isinstance(fresh_messages, list):
+        return False
+    if len(existing_messages) > len(fresh_messages):
+        return False
+    for idx, existing_message in enumerate(existing_messages):
+        fresh_message = fresh_messages[idx]
+        if _normalize_message_for_import_refresh(existing_message) != _normalize_message_for_import_refresh(fresh_message):
+            return False
+    return True
+
+
 def _handle_session_import_cli(handler, body):
    """Import a single CLI session into the WebUI store."""
    try:
@@ -5917,7 +6365,7 @@ def _handle_session_import_cli(handler, body):
            # Prefix-equality guard: only extend if existing messages are a prefix of
            # the fresh CLI messages. Prevents silently dropping WebUI-added messages
            # on hybrid sessions (user sent messages via WebUI while CLI continued).
-            if existing.messages == fresh_msgs[:len(existing.messages)]:
+            if _is_messages_refresh_prefix_match(existing.messages, fresh_msgs):
                existing.messages = fresh_msgs
                changed = True
        if cli_meta:
@@ -5961,6 +6409,12 @@ def _handle_session_import_cli(handler, body):
    cli_raw_source = None
    cli_session_source = None
    cli_source_label = None
+    cli_user_id = None
+    cli_chat_id = None
+    cli_chat_type = None
+    cli_thread_id = None
+    cli_session_key = None
+    cli_platform = None
    for cs in get_cli_sessions():
        if cs["session_id"] == sid:
            profile = cs.get("profile")
@@ -5972,6 +6426,12 @@ def _handle_session_import_cli(handler, body):
            cli_raw_source = cs.get("raw_source")
            cli_session_source = cs.get("session_source")
            cli_source_label = cs.get("source_label")
+            cli_user_id = cs.get("user_id")
+            cli_chat_id = cs.get("chat_id")
+            cli_chat_type = cs.get("chat_type")
+            cli_thread_id = cs.get("thread_id")
+            cli_session_key = cs.get("session_key")
+            cli_platform = cs.get("platform")
            break

    # Use the CLI session title if available (e.g., cron job name), otherwise derive from messages
@@ -5998,6 +6458,12 @@ def _handle_session_import_cli(handler, body):
    s.raw_source = cli_raw_source or cli_source_tag
    s.session_source = cli_session_source
    s.source_label = cli_source_label
+    s.user_id = cli_user_id
+    s.chat_id = cli_chat_id
+    s.chat_type = cli_chat_type
+    s.thread_id = cli_thread_id
+    s.session_key = cli_session_key
+    s.platform = cli_platform
    s._cli_origin = sid
    s.save(touch_updated_at=False)
    return j(
@@ -552,6 +552,15 @@ async function loadSession(sid){

 const _HANDOFF_THRESHOLD = 10;  // conversation rounds
 const _HANDOFF_STORAGE_PREFIX = 'handoff:';
+const _HANDOFF_SUFFIX_DISMISSED_AT = 'dismissed_at';
+const _HANDOFF_SUFFIX_SUMMARY_HANDLED_AT = 'summary_handled_at';
+const _MESSAGING_RAW_SOURCES = new Set(['weixin', 'telegram', 'discord', 'slack']);
+const _MESSAGING_SOURCE_LABELS = {
+  weixin: 'WeChat',
+  telegram: 'Telegram',
+  discord: 'Discord',
+  slack: 'Slack',
+};

 function _isMessagingSession(session) {
  if (!session) return false;
@@ -559,26 +568,83 @@ function _isMessagingSession(session) {
  if (session.session_source === 'messaging') return true;
  // Fallback: check raw_source directly
  const raw = (session.raw_source || session.source_tag || session.source || '').toLowerCase();
-  return ['weixin', 'telegram', 'discord', 'slack'].includes(raw);
+  return _MESSAGING_RAW_SOURCES.has(raw);
+}
+
+function _normalizeMessageForCliImportComparison(message) {
+  if (!message || typeof message !== 'object') return message;
+  const clone = { ...message };
+  delete clone.timestamp;
+  delete clone._ts;
+  return clone;
+}
+
+function _isCliImportRefreshPrefixMatch(localMessages, freshMessages) {
+  if (!Array.isArray(localMessages) || !Array.isArray(freshMessages)) return false;
+  if (localMessages.length > freshMessages.length) return false;
+  for (let i = 0; i < localMessages.length; i += 1) {
+    if (JSON.stringify(_normalizeMessageForCliImportComparison(localMessages[i])) !== JSON.stringify(_normalizeMessageForCliImportComparison(freshMessages[i]))) {
+      return false;
+    }
+  }
+  return true;
 }

 function _handoffStorageKey(sid) {
-  return _HANDOFF_STORAGE_PREFIX + sid + ':dismissed_at';
+  return `${_HANDOFF_STORAGE_PREFIX}${sid}:`;
 }

-function _getHandoffDismissedAt(sid) {
+function _getHandoffStorageValue(sid, suffix) {
  try {
-    const val = localStorage.getItem(_handoffStorageKey(sid));
-    return val ? parseFloat(val) : null;
+    const raw = localStorage.getItem(_handoffStorageKey(sid) + suffix);
+    return raw ? parseFloat(raw) : null;
  } catch { return null; }
 }

-function _setHandoffDismissedAt(sid, ts) {
+function _setHandoffStorageValue(sid, suffix, ts) {
+  const key = _handoffStorageKey(sid) + suffix;
  try {
-    localStorage.setItem(_handoffStorageKey(sid), String(ts));
+    if (!Number.isFinite(ts)) {
+      localStorage.removeItem(key);
+      return;
+    }
+    localStorage.setItem(key, String(ts));
  } catch {}
 }

+function _clearHandoffStorageForSession(sid) {
+  if (!sid) return;
+  try {
+    _setHandoffStorageValue(sid, _HANDOFF_SUFFIX_DISMISSED_AT, null);
+    _setHandoffStorageValue(sid, _HANDOFF_SUFFIX_SUMMARY_HANDLED_AT, null);
+  } catch {}
+}
+
+function _getHandoffDismissedAt(sid) {
+  return _getHandoffStorageValue(sid, _HANDOFF_SUFFIX_DISMISSED_AT);
+}
+
+function _setHandoffDismissedAt(sid, ts) {
+  _setHandoffStorageValue(sid, _HANDOFF_SUFFIX_DISMISSED_AT, ts);
+}
+
+function _getHandoffSummaryHandledAt(sid) {
+  return _getHandoffStorageValue(sid, _HANDOFF_SUFFIX_SUMMARY_HANDLED_AT);
+}
+
+function _setHandoffSummaryHandledAt(sid, ts) {
+  _setHandoffStorageValue(sid, _HANDOFF_SUFFIX_SUMMARY_HANDLED_AT, ts);
+}
+
+function _getHandoffSince(sid) {
+  const dismissedAt = _getHandoffDismissedAt(sid);
+  const summaryHandledAt = _getHandoffSummaryHandledAt(sid);
+  if (Number.isFinite(dismissedAt) && Number.isFinite(summaryHandledAt)) return Math.max(dismissedAt, summaryHandledAt);
+  if (Number.isFinite(dismissedAt)) return dismissedAt;
+  if (Number.isFinite(summaryHandledAt)) return summaryHandledAt;
+  return null;
+}
+
 function _handoffMessagesEl() {
  return document.getElementById('messages');
 }
@@ -614,13 +680,12 @@ function _getChannelLabel(session) {
  // Use source_label from PR #1294 if available
  if (session.source_label) return session.source_label;
  const raw = (session.raw_source || session.source_tag || session.source || '').toLowerCase();
-  const labels = { weixin: 'WeChat', telegram: 'Telegram', discord: 'Discord', slack: 'Slack' };
-  return labels[raw] || raw || '';
+  return _MESSAGING_SOURCE_LABELS[raw] || raw || '';
 }

 async function _checkAndShowHandoffHint(sid) {
  try {
-    const since = _getHandoffDismissedAt(sid);
+    const since = _getHandoffSince(sid);
    const body = { session_id: sid };
    if (since != null) body.since = since;

@@ -628,14 +693,19 @@ async function _checkAndShowHandoffHint(sid) {
      method: 'POST',
      body: JSON.stringify(body),
    });
-
    // Stale? Session switched while we were fetching.
    if (!S.session || S.session.session_id !== sid) return;

    if (result && result.ok && result.should_show) {
      _showHandoffHint(sid, result.rounds);
    } else {
-      _hideHandoffHint();
+      const container = $('handoffHintContainer');
+      const isSameVisibleSession = !!(
+        container &&
+        container.classList.contains('is-visible') &&
+        container.dataset.sessionId === String(sid)
+      );
+      if (!isSameVisibleSession) _hideHandoffHint();
    }
  } catch (e) {
    console.warn('Handoff hint check failed:', e);
@@ -651,26 +721,32 @@ function _showHandoffHint(sid, rounds) {
  container.innerHTML = '';
  container.style.display = '';
  container.classList.add('is-visible');
+  container.dataset.sessionId = String(sid);

  const channel = _getChannelLabel(S.session);
  const hintText = channel
-    ? `${channel} has ${rounds} new conversation rounds — click to view summary`
-    : `${rounds} new conversation rounds — click to view summary`;
+    ? `${channel} handoff`
+    : `Conversation handoff`;
+  const hintMeta = `${rounds} new conversation rounds`;

  const bar = document.createElement('div');
  bar.className = 'handoff-hint-bar';
  bar.id = 'handoffHintBar';
  bar.innerHTML = `
    <div class="handoff-hint-text">
-      <span class="handoff-hint-icon">${li('arrow-left', 18)}</span>
-      <span>${esc(hintText)}</span>
+      <span class="handoff-hint-dot" aria-hidden="true"></span>
+      <span class="handoff-hint-label">${esc(hintText)}</span>
+      <span class="handoff-hint-meta">${esc(hintMeta)}</span>
+    </div>
+    <div class="handoff-hint-actions">
+      <button class="handoff-hint-action" type="button">View summary</button>
+      <button class="handoff-hint-dismiss" type="button" onclick="event.stopPropagation(); _dismissHandoffHint('${esc(sid)}')" title="Dismiss">
+        Close
+      </button>
    </div>
-    <button class="handoff-hint-dismiss" onclick="event.stopPropagation(); _dismissHandoffHint('${esc(sid)}')" title="Dismiss">
-      ${li('x', 14)}
-    </button>
  `;

-  // Click on the bar (not the dismiss button) triggers summary generation.
+  // Click on the bar (not the explicit close button) triggers summary generation.
  bar.addEventListener('click', (e) => {
    if (e.target.closest('.handoff-hint-dismiss')) return;
    _generateHandoffSummary(sid, rounds);
@@ -686,6 +762,7 @@ function _hideHandoffHint() {
    container.innerHTML = '';
    container.style.display = 'none';
    container.classList.remove('is-visible');
+    delete container.dataset.sessionId;
  }
  _syncHandoffDockSpace(false);
 }
@@ -695,6 +772,41 @@ function _dismissHandoffHint(sid) {
  _hideHandoffHint();
 }

+function _buildHandoffSummaryToolMessage(summary, channel, rounds, fallback) {
+  const generatedAt = Date.now() / 1000;
+  return {
+    role: 'tool',
+    tool_call_id: '',
+    name: 'handoff_summary',
+    timestamp: generatedAt,
+    _ts: generatedAt,
+    content: JSON.stringify({
+      _handoff_summary_card: true,
+      session_id: sidValue(),
+      summary: String(summary || '').trim(),
+      channel: (typeof channel === 'string' && channel.trim()) ? channel.trim() : null,
+      rounds: Number.isFinite(rounds) ? rounds : null,
+      fallback: !!fallback,
+      generated_at: generatedAt,
+    }),
+  };
+}
+
+function sidValue() {
+  return S && S.session && S.session.session_id ? S.session.session_id : null;
+}
+
+function _extractHandoffSummaryPayload(content){
+  if(!content) return null;
+  if(typeof content!=='string') return null;
+  try {
+    const parsed=JSON.parse(content);
+    return parsed&&typeof parsed==='object'&&parsed._handoff_summary_card===true?parsed:null;
+  } catch (e) {
+    return null;
+  }
+}
+
 async function _generateHandoffSummary(sid, rounds) {
  // Treat handoff like a slash-command result: the composer dock entry
  // disappears and the transient summary card renders in the transcript.
@@ -710,7 +822,7 @@ async function _generateHandoffSummary(sid, rounds) {
  }

  try {
-    const since = _getHandoffDismissedAt(sid);
+    const since = _getHandoffSince(sid);
    const body = { session_id: sid };
    if (since != null) body.since = since;

@@ -718,32 +830,29 @@ async function _generateHandoffSummary(sid, rounds) {
      method: 'POST',
      body: JSON.stringify(body),
    });
-
-    // Stale?
-    if (!S.session || S.session.session_id !== sid) return;
-
-    if (result && result.ok && result.summary) {
-      const summaryText = result.summary;
-      if (typeof setHandoffUi === 'function') {
-        setHandoffUi({
-          sessionId: sid,
-          phase: 'done',
-          channel,
-          rounds: result.rounds || rounds,
-          summary: summaryText,
-          fallback: !!result.fallback,
-        });
+    const isSuccess = result && result.ok && result.summary;
+    if (isSuccess) {
+      _setHandoffSummaryHandledAt(sid, Date.now() / 1000);
+      _setHandoffDismissedAt(sid, null);
+      const marker=_buildHandoffSummaryToolMessage(result.summary, channel, result.rounds || rounds, !!result.fallback);
+      if (S.session && S.session.session_id === sid) {
+        S.messages = [...S.messages, marker];
+        if (typeof renderMessages === 'function') renderMessages();
      }
+      if (typeof setHandoffUi === 'function') {
+        setHandoffUi(null);
+      }
+    } else if (S.session && S.session.session_id === sid && typeof setHandoffUi === 'function') {
+      // Keep transient card while the user can retry the action.
+      setHandoffUi({
+        sessionId: sid,
+        phase: 'error',
+        channel,
+        rounds,
+        errorText: 'Could not generate summary. Please try again.',
+      });
    } else {
-      if (typeof setHandoffUi === 'function') {
-        setHandoffUi({
-          sessionId: sid,
-          phase: 'error',
-          channel,
-          rounds,
-          errorText: 'Could not generate summary. Please try again.',
-        });
-      }
+      // Stale session response path: only record success baseline.
    }
  } catch (e) {
    console.warn('Handoff summary failed:', e);
@@ -758,9 +867,9 @@ async function _generateHandoffSummary(sid, rounds) {
    }
  }

-  // Generating a summary should not dismiss the handoff entry point. Only the
-  // explicit X button suppresses it until enough newer external-channel rounds
-  // arrive.
+  // If generation succeeds, set a baseline so only new activity after that time
+  // can re-trigger handoff prompts. Failures keep the hint active so users can
+  // retry.
 }

 function _resolveSessionModelForDisplaySoon(sid){
@@ -1029,7 +1138,9 @@ function _renderBatchActionBar(){
    const ids=[..._selectedSessions];
    const ok=await showConfirmDialog({message:t('session_batch_delete_confirm',ids.length),confirmLabel:t('delete_title'),danger:true});
    if(!ok)return;
-    try{await Promise.all(ids.map(sid=>api('/api/session/delete',{method:'POST',body:JSON.stringify({session_id:sid})})));
+    try{
+      await Promise.all(ids.map(sid=>api('/api/session/delete',{method:'POST',body:JSON.stringify({session_id:sid})})));
+      ids.forEach(_clearHandoffStorageForSession);
      if(S.session&&ids.includes(S.session.session_id)){
        S.session=null;S.messages=[];S.entries=[];localStorage.removeItem('hermes-webui-session');
        const remaining=await api('/api/sessions');
@@ -1119,6 +1230,25 @@ function _buildSessionAction(label, meta, icon, onSelect, extraClass=''){
  return opt;
 }

+function _appendSessionDuplicateAction(menu, session){
+  menu.appendChild(_buildSessionAction(
+    t('session_duplicate'),
+    t('session_duplicate_desc'),
+    ICONS.dup,
+    async()=>{
+      closeSessionActionMenu();
+      try{
+        const res=await api('/api/session/duplicate',{method:'POST',body:JSON.stringify({session_id:session.session_id})});
+        if(res.session){
+          await loadSession(res.session.session_id);
+          await renderSessionList();
+          showToast(t('session_duplicated'));
+        }
+      }catch(err){showToast(t('session_duplicate_failed')+err.message);}
+    }
+  ));
+}
+
 function _openSessionActionMenu(session, anchorEl){
  if(_sessionActionMenu && _sessionActionSessionId===session.session_id && _sessionActionAnchor===anchorEl){
    closeSessionActionMenu();
@@ -1169,22 +1299,7 @@ function _openSessionActionMenu(session, anchorEl){
    }
  ));
  if(!isMessagingSession){
-    menu.appendChild(_buildSessionAction(
-      t('session_duplicate'),
-      t('session_duplicate_desc'),
-      ICONS.dup,
-      async()=>{
-        closeSessionActionMenu();
-        try{
-          const res=await api('/api/session/duplicate',{method:'POST',body:JSON.stringify({session_id:session.session_id})});
-          if(res.session){
-            await loadSession(res.session.session_id);
-            await renderSessionList();
-            showToast(t('session_duplicated'));
-          }
-        }catch(err){showToast(t('session_duplicate_failed')+err.message);}
-      }
-    ));
+    _appendSessionDuplicateAction(menu, session);
  }
  if(session.active_stream_id){
    menu.appendChild(_buildSessionAction(
@@ -1369,7 +1484,10 @@ function startGatewaySSE(){
                  if(!S.session || S.session.session_id !== activeSid) return;
                  if(res && res.session && Array.isArray(res.session.messages)){
                    const prev = S.messages.length;
-                    S.messages = res.session.messages.filter(m=>m&&m.role);
+                    const next = res.session.messages.filter(m => m && m.role);
+                    if (next.length < prev) return;
+                    if (prev > 0 && !_isCliImportRefreshPrefixMatch(S.messages, next)) return;
+                    S.messages = next;
                    if(S.messages.length !== prev){
                      renderMessages();
                      if(typeof highlightCode==='function') highlightCode();
@@ -2208,6 +2326,7 @@ async function deleteSession(sid){
  if(!ok)return;
  try{
    await api('/api/session/delete',{method:'POST',body:JSON.stringify({session_id:sid})});
+    _clearHandoffStorageForSession(sid);
  }catch(e){setStatus(`Delete failed: ${e.message}`);return;}
  if(S.session&&S.session.session_id===sid){
    S.session=null;S.messages=[];S.entries=[];
@@ -1042,15 +1042,17 @@
  /* ── Handoff hint bar ── */
  .handoff-hint-container{position:absolute;left:0;right:0;bottom:-2px;width:min(calc(100% - 112px),560px);margin:0 auto;box-sizing:border-box;overflow:visible;pointer-events:none;z-index:3;}
  .handoff-hint-container.is-visible{pointer-events:auto;}
-  .handoff-hint-bar{display:flex;align-items:center;justify-content:space-between;gap:12px;min-height:42px;border:1px solid var(--border);border-radius:13px;background:color-mix(in srgb,var(--surface) 86%,transparent);box-shadow:0 8px 22px rgba(0,0,0,.16);backdrop-filter:blur(10px);padding:7px 9px 7px 12px;cursor:pointer;transform:translateY(100%);opacity:0;transition:transform .32s cubic-bezier(.32,.72,.16,1),opacity .2s ease,background .15s ease,border-color .15s ease;}
+  .handoff-hint-bar{display:flex;align-items:center;justify-content:space-between;gap:12px;min-height:42px;border:1px solid var(--border);border-radius:13px;background:color-mix(in srgb,var(--surface) 86%,transparent);box-shadow:0 8px 22px rgba(0,0,0,.16);backdrop-filter:blur(10px);padding:7px 12px;cursor:pointer;transform:translateY(100%);opacity:0;transition:transform .32s cubic-bezier(.32,.72,.16,1),opacity .2s ease,background .15s ease,border-color .15s ease;}
  .handoff-hint-container.is-visible .handoff-hint-bar{transform:translateY(0);opacity:.94;}
  .handoff-hint-bar:hover{background:color-mix(in srgb,var(--surface) 92%,transparent);border-color:color-mix(in srgb,var(--border) 70%,var(--accent));}
  .handoff-hint-bar[hidden]{display:none!important;}
-  .handoff-hint-text{display:flex;align-items:center;gap:8px;min-width:0;font-size:13px;font-weight:500;color:var(--text);}
-  .handoff-hint-text span:last-child{min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;}
-  .handoff-hint-icon{width:18px;height:18px;flex:0 0 auto;color:var(--accent);}
-  .handoff-hint-dismiss{width:28px;height:28px;display:flex;align-items:center;justify-content:center;border:none;background:transparent;color:var(--muted);border-radius:8px;cursor:pointer;flex:0 0 auto;transition:background .15s ease,color .15s ease;}
-  .handoff-hint-dismiss:hover{background:color-mix(in srgb,var(--muted) 12%,transparent);color:var(--text);}
+  .handoff-hint-text{min-width:0;display:flex;align-items:center;gap:10px;color:var(--muted);font-size:12px;font-weight:700;letter-spacing:.02em;text-transform:uppercase;}
+  .handoff-hint-dot{width:7px;height:7px;border-radius:999px;background:var(--success);box-shadow:0 0 0 3px color-mix(in srgb,var(--success) 16%,transparent);flex:0 0 auto;}
+  .handoff-hint-label{min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;color:var(--text);text-transform:none;letter-spacing:0;font-weight:700;font-size:12px;}
+  .handoff-hint-meta{min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;color:var(--muted);text-transform:none;letter-spacing:0;font-weight:600;font-size:12px;}
+  .handoff-hint-actions{display:flex;align-items:center;gap:8px;flex:0 0 auto;}
+  .handoff-hint-action,.handoff-hint-dismiss{border:none;background:transparent;color:var(--muted);font:inherit;font-size:12px;font-weight:700;padding:4px 6px;border-radius:8px;cursor:pointer;transition:background .15s ease,color .15s ease;}
+  .handoff-hint-action:hover,.handoff-hint-dismiss:hover{background:color-mix(in srgb,var(--muted) 12%,transparent);color:var(--text);}
  #terminalDockWorkspaceLabel{min-width:0;max-width:220px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;color:var(--muted);text-transform:none;letter-spacing:0;font-weight:600;}
  .composer-terminal-resize-handle{height:12px;display:flex;align-items:center;justify-content:center;flex:0 0 auto;cursor:ns-resize;touch-action:none;background:linear-gradient(to bottom,rgba(255,255,255,.04),transparent);}
  .composer-terminal-resize-handle::before{content:"";width:52px;height:4px;border-radius:999px;background:var(--border2);opacity:.72;transition:opacity .15s,background .15s;}
@@ -1809,6 +1811,13 @@ body.resizing{user-select:none;cursor:col-resize;}
 .tool-card-handoff-summary .tool-card-preview{
  margin-left:10px;
 }
+.handoff-summary-fallback-note{
+  margin:10px 0 0;
+  color:var(--muted);
+  font-size:11px;
+  line-height:1.5;
+  font-style:normal;
+}
 .handoff-summary-body{
  color:var(--text);
  font-size:var(--font-size-sm);
@@ -3291,6 +3291,48 @@ function _compressionCardsNode(state){
  wrap.innerHTML=`<div class="compression-turn-blocks">${_compressionCardsHtml(state)}</div>`;
  return wrap;
 }
+function _isHandoffSummaryToolPayload(value){
+  if(!value||typeof value!=='object'||Array.isArray(value)) return false;
+  return value._handoff_summary_card === true;
+}
+function _parseHandoffSummaryPayload(content){
+  if(!content) return null;
+  if(typeof content==='object' && !Array.isArray(content)) return _isHandoffSummaryToolPayload(content)?content:null;
+  if(typeof content!=='string') return null;
+  try {
+    const parsed=JSON.parse(content);
+    return _isHandoffSummaryToolPayload(parsed)?parsed:null;
+  } catch (e) {
+    return null;
+  }
+}
+function _handoffSummaryStateFromMessage(m){
+  if(!m||m.role!=='tool') return null;
+  const payload = _parseHandoffSummaryPayload(m.content);
+  if(!payload) return null;
+  if(String(payload.session_id||'') && S.session && String(m.session_id||'') && String(payload.session_id)!==String(S.session.session_id||'')) {
+    return null;
+  }
+  const summary = String(payload.summary||'').trim();
+  if(!summary) return null;
+  return {
+    phase: 'done',
+    channel: payload.channel || null,
+    rounds: Number.isFinite(payload.rounds)?payload.rounds:null,
+    summary,
+    fallback: !!payload.fallback,
+    generatedAt: Number(payload.generated_at) || null,
+  };
+}
+function _collectHandoffSummaryStates(messages){
+  const states=[];
+  if(!Array.isArray(messages)) return states;
+  for(let i=0;i<messages.length;i++){
+    const state=_handoffSummaryStateFromMessage(messages[i]);
+    if(state) states.push({state, rawIdx:i});
+  }
+  return states;
+}
 function _isContextCompactionMessage(m){
  if(!m||!m.role||m.role==='tool') return false;
  const text=msgContent(m)||String(m.content||'');
@@ -3448,6 +3490,7 @@ function _handoffCardsHtml(state){
  const label=channel?`${channel} handoff summary`:'Handoff summary';
  const isError=state.phase==='error';
  const isDone=state.phase==='done';
+  const isFallback=!!state.fallback;
  const detail=isError
    ? String(state.errorText||'Could not generate summary. Please try again.')
    : isDone
@@ -3462,7 +3505,13 @@ function _handoffCardsHtml(state){
      ? li('check',13)
      : '<span class="tool-card-running-dot"></span>';
  const bodyHtml=isDone&&!isError
-    ? renderMd(detail)
+    ? (
+      `${renderMd(detail)}${
+        isFallback
+          ? '<p class="handoff-summary-fallback-note">Fallback summary generated from recent turns; no model-based rewrite was used.</p>'
+          : ''
+      }`
+    )
    : `<p>${esc(detail)}</p>`;
  return `
    <div class="tool-card-row compression-card-row handoff-card-row" data-compression-card="1" data-handoff-card="1">
@@ -3768,17 +3817,61 @@ function renderMessages(){
    }
    inner.appendChild(node);
  }
+  function _insertCompressionLikeNodeByRawIdx(node, rawIdx){
+    if(!node) return;
+    if(!visWithIdx.length){
+      inner.appendChild(node);
+      return;
+    }
+    let anchorIdx=null;
+    for(let i=0;i<visWithIdx.length;i++){
+      if(visWithIdx[i].rawIdx > rawIdx){
+        anchorIdx=i;
+        break;
+      }
+    }
+    if(anchorIdx===null){
+      inner.appendChild(node);
+      return;
+    }
+    const anchorRawIdx=visWithIdx[anchorIdx].rawIdx;
+    const anchorSeg=assistantSegments.get(anchorRawIdx);
+    if(anchorSeg){
+      const turn=anchorSeg.closest('.assistant-turn');
+      const blocks=_assistantTurnBlocks(turn);
+      if(blocks){
+        blocks.appendChild(node);
+        return;
+      }
+      const turnParent=turn && turn.parentElement;
+      if(turnParent){
+        turnParent.insertBefore(node, turn);
+        return;
+      }
+    }
+    const userRow=userRows.get(anchorRawIdx);
+    if(userRow && userRow.parentElement){
+      userRow.parentElement.insertBefore(node, userRow);
+      return;
+    }
+    inner.appendChild(node);
+  }
  const preservedOnlyNode=(!preservedCompressionTaskCardsAttached&&(!referenceMessage||compressionState)&&preservedCompressionTaskMessages.length)
    ? (()=>{const row=document.createElement('div');row.innerHTML=`<div class="compression-turn"><div class="compression-turn-blocks">${_preservedCompressionTaskListCardsHtml(preservedCompressionTaskMessages)}</div></div>`;return row.firstElementChild;})()
    : null;
  const preservedOnlyAnchor=preservedCompressionRawIdxs.length
    ? (()=>{let idx=null;for(let i=0;i<visWithIdx.length;i++){if(visWithIdx[i].rawIdx<preservedCompressionRawIdxs[0]) idx=i;}return idx;})()
    : null;
+  const handoffSummaryStates=_collectHandoffSummaryStates(S.messages);

  _insertCompressionLikeNode(compressionNode);
  _insertCompressionLikeNode(referenceNode);
  _insertCompressionLikeNode(preservedOnlyNode, preservedOnlyAnchor);
  _insertCompressionLikeNode(handoffState?_handoffCardsNode(handoffState):null, visWithIdx.length?visWithIdx.length-1:null);
+  for(const entry of handoffSummaryStates){
+    if(!entry||!entry.state) continue;
+    _insertCompressionLikeNodeByRawIdx(_handoffCardsNode(entry.state), entry.rawIdx);
+  }
  renderCompressionUi();
  // Insert settled tool call cards (history view only).
  // During live streaming, tool cards are rendered in #liveToolCards by the
@@ -87,6 +87,14 @@ def _ensure_state_db():
        );
    """)
    for column, ddl in (
+        ('user_id', 'ALTER TABLE sessions ADD COLUMN user_id TEXT'),
+        ('chat_id', 'ALTER TABLE sessions ADD COLUMN chat_id TEXT'),
+        ('chat_type', 'ALTER TABLE sessions ADD COLUMN chat_type TEXT'),
+        ('thread_id', 'ALTER TABLE sessions ADD COLUMN thread_id TEXT'),
+        ('session_key', 'ALTER TABLE sessions ADD COLUMN session_key TEXT'),
+        ('origin_chat_id', 'ALTER TABLE sessions ADD COLUMN origin_chat_id TEXT'),
+        ('origin_user_id', 'ALTER TABLE sessions ADD COLUMN origin_user_id TEXT'),
+        ('platform', 'ALTER TABLE sessions ADD COLUMN platform TEXT'),
        ('parent_session_id', 'ALTER TABLE sessions ADD COLUMN parent_session_id TEXT'),
        ('ended_at', 'ALTER TABLE sessions ADD COLUMN ended_at REAL'),
        ('end_reason', 'ALTER TABLE sessions ADD COLUMN end_reason TEXT'),
@@ -100,13 +108,34 @@ def _ensure_state_db():

 def _insert_gateway_session(conn, session_id='20260401_120000_abcdefgh', source='telegram',
                             title='Telegram Chat', model='anthropic/claude-sonnet-4-5',
-                             started_at=None, message_count=2):
+                             started_at=None, message_count=2, user_id=None, chat_id=None,
+                             chat_type=None, thread_id=None, session_key=None, origin_chat_id=None,
+                             origin_user_id=None, platform=None):
    """Insert a gateway session into state.db."""
    conn.execute(
-        "INSERT OR REPLACE INTO sessions (id, source, title, model, started_at, message_count) "
-        "VALUES (?, ?, ?, ?, ?, ?)",
-        (session_id, source, title, model, started_at or time.time(), message_count)
+        "INSERT OR REPLACE INTO sessions (id, source, user_id, title, model, started_at, message_count) "
+        "VALUES (?, ?, ?, ?, ?, ?, ?)",
+        (session_id, source, user_id, title, model, started_at or time.time(), message_count)
    )
+    updates = []
+    params = []
+    for key, value in (
+        ("chat_id", chat_id),
+        ("chat_type", chat_type),
+        ("thread_id", thread_id),
+        ("session_key", session_key),
+        ("origin_chat_id", origin_chat_id),
+        ("origin_user_id", origin_user_id),
+        ("platform", platform),
+    ):
+        if value is not None:
+            updates.append(f"{key} = ?")
+            params.append(value)
+    if updates:
+        conn.execute(
+            f"UPDATE sessions SET {', '.join(updates)} WHERE id = ?",
+            [*params, session_id]
+        )
    # Delete any existing messages for this session (idempotent re-insert)
    conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
    # Insert some messages
@@ -183,6 +212,13 @@ def _cleanup_state_db():
            pass


+def _insert_message(conn, sid, role, content, timestamp):
+    conn.execute(
+        "INSERT INTO messages (session_id, role, content, timestamp) VALUES (?, ?, ?, ?)",
+        (sid, role, content, timestamp),
+    )
+
+
 # ── Tests ──────────────────────────────────────────────────────────────────

 def test_gateway_sessions_appear_when_enabled():
@@ -849,7 +885,7 @@ def test_sessions_response_backfills_imported_messaging_source_metadata(cleanup_


 def test_sessions_response_keeps_only_latest_messaging_session_per_source(cleanup_test_sessions):
-    """Sidebar should expose only the newest session for each messaging source."""
+    """Sidebar should keep messaging sessions by stable identity, not source-wide."""
    from api.models import Session

    conn = _ensure_state_db()
@@ -884,6 +920,328 @@ def test_sessions_response_keeps_only_latest_messaging_session_per_source(cleanu
            pass


+def test_sessions_response_keeps_distinct_messaging_sessions_for_distinct_users(cleanup_test_sessions):
+    """Messaging collapse should survive for different users on the same platform."""
+    conn = _ensure_state_db()
+    sid_a = 'gw_tg_distinct_user_a'
+    sid_b = 'gw_tg_distinct_user_b'
+    cleanup_test_sessions.extend([sid_a, sid_b])
+    try:
+        _insert_gateway_session(
+            conn,
+            session_id=sid_a,
+            source='telegram',
+            title='TG User A',
+            user_id='1143399746',
+            started_at=time.time() - 20,
+        )
+        _insert_gateway_session(
+            conn,
+            session_id=sid_b,
+            source='telegram',
+            title='TG User B',
+            user_id='9988776655',
+            started_at=time.time(),
+        )
+
+        post('/api/settings', {'show_cli_sessions': True})
+        data, status = get('/api/sessions')
+        assert status == 200
+        ids = {s['session_id'] for s in data.get('sessions', []) if s.get('session_id') in {sid_a, sid_b}}
+        assert ids == {sid_a, sid_b}, f"Expected both Telegram sessions to remain, got {ids}"
+    finally:
+        try:
+            post('/api/settings', {'show_cli_sessions': False})
+            _remove_test_sessions(conn, sid_a, sid_b)
+            conn.close()
+        except Exception:
+            pass
+
+
+def test_sessions_response_distinguishes_same_user_different_chat_identity_from_gateway_metadata(cleanup_test_sessions):
+    """Same user_id sessions should stay separate when gateway metadata exposes chat identity."""
+    conn = _ensure_state_db()
+    sid_dm = 'gw_tg_same_user_dm'
+    sid_group = 'gw_tg_same_user_group'
+    cleanup_test_sessions.extend([sid_dm, sid_group])
+    sessions_file = _get_test_state_dir() / 'sessions' / 'sessions.json'
+    original_sessions_json = None
+    if sessions_file.exists():
+        original_sessions_json = sessions_file.read_text()
+    sessions_file.parent.mkdir(parents=True, exist_ok=True)
+    sessions_payload = {
+        "agent:main:telegram:dm:1143399746": {
+            "session_key": "agent:main:telegram:dm:1143399746",
+            "session_id": sid_dm,
+            "origin": {
+                "platform": "telegram",
+                "chat_type": "dm",
+                "chat_id": "1143399746",
+                "user_id": "1143399746",
+            },
+        },
+        "agent:main:telegram:group:chat_42:1143399746": {
+            "session_key": "agent:main:telegram:group:chat_42:1143399746",
+            "session_id": sid_group,
+            "origin": {
+                "platform": "telegram",
+                "chat_type": "group",
+                "chat_id": "chat_42",
+                "user_id": "1143399746",
+            },
+        },
+    }
+    try:
+        sessions_file.write_text(json.dumps(sessions_payload), encoding='utf-8')
+        _insert_gateway_session(conn, session_id=sid_dm, source='telegram', title='DM Same User', user_id='1143399746', started_at=time.time() - 40)
+        _insert_gateway_session(conn, session_id=sid_group, source='telegram', title='Group Same User', user_id='1143399746', started_at=time.time())
+
+        post('/api/settings', {'show_cli_sessions': True})
+        data, status = get('/api/sessions')
+        assert status == 200
+        ids = {s['session_id'] for s in data.get('sessions', []) if s.get('session_id') in {sid_dm, sid_group}}
+        assert ids == {sid_dm, sid_group}, f"Expected both DM/group Telegram sessions, got {ids}"
+    finally:
+        try:
+            post('/api/settings', {'show_cli_sessions': False})
+            _remove_test_sessions(conn, sid_dm, sid_group)
+            if original_sessions_json is None:
+                sessions_file.unlink(missing_ok=True)
+            else:
+                sessions_file.write_text(original_sessions_json, encoding='utf-8')
+            conn.close()
+        except Exception:
+            pass
+
+
+def test_messaging_projection_hides_stale_gateway_internal_segments(monkeypatch):
+    """Active Gateway identity should hide old reset rows and internal child segments."""
+    from api import routes
+
+    monkeypatch.setattr(
+        routes,
+        "_load_gateway_session_identity_map",
+        lambda: {
+            "weixin_current_sid": {
+                "session_key": "agent:main:weixin:dm:user_1",
+                "raw_source": "weixin",
+                "platform": "weixin",
+                "chat_type": "dm",
+                "chat_id": "user_1",
+                "user_id": "user_1",
+            },
+        },
+    )
+    sessions = [
+        {
+            "session_id": "weixin_current_sid",
+            "raw_source": "weixin",
+            "title": "Current Weixin",
+            "updated_at": 100,
+            "message_count": 8,
+        },
+        {
+            "session_id": "weixin_internal_child_sid",
+            "raw_source": "weixin",
+            "title": "Internal Weixin Segment",
+            "parent_session_id": "weixin_current_sid",
+            "updated_at": 120,
+            "message_count": 4,
+        },
+        {
+            "session_id": "weixin_reset_sid",
+            "raw_source": "weixin",
+            "title": "Old Weixin Reset",
+            "end_reason": "session_reset",
+            "updated_at": 90,
+            "message_count": 6,
+        },
+        {
+            "session_id": "weixin_legacy_fallback_sid",
+            "raw_source": "weixin",
+            "title": "Legacy Weixin Fallback",
+            "updated_at": 95,
+            "message_count": 3,
+            "user_id": "user_1",
+        },
+        {
+            "session_id": "webui_sid",
+            "title": "Regular WebUI",
+            "updated_at": 80,
+            "message_count": 2,
+        },
+    ]
+
+    kept = routes._keep_latest_messaging_session_per_source(sessions)
+    ids = {session.get("session_id") for session in kept}
+
+    assert "weixin_current_sid" in ids
+    assert "webui_sid" in ids
+    assert "weixin_internal_child_sid" not in ids
+    assert "weixin_reset_sid" not in ids
+    assert "weixin_legacy_fallback_sid" not in ids
+
+
+def test_messaging_projection_keeps_distinct_active_gateway_conversations(monkeypatch):
+    """Telegram DM and group chats must not collapse just because source matches."""
+    from api import routes
+
+    monkeypatch.setattr(
+        routes,
+        "_load_gateway_session_identity_map",
+        lambda: {
+            "telegram_dm_sid": {
+                "session_key": "agent:main:telegram:dm:user_1",
+                "raw_source": "telegram",
+                "platform": "telegram",
+                "chat_type": "dm",
+                "chat_id": "user_1",
+                "user_id": "user_1",
+            },
+            "telegram_group_sid": {
+                "session_key": "agent:main:telegram:group:group_1:user_1",
+                "raw_source": "telegram",
+                "platform": "telegram",
+                "chat_type": "group",
+                "chat_id": "group_1",
+                "user_id": "user_1",
+            },
+        },
+    )
+    sessions = [
+        {
+            "session_id": "telegram_dm_sid",
+            "raw_source": "telegram",
+            "title": "Telegram DM",
+            "updated_at": 100,
+            "message_count": 4,
+        },
+        {
+            "session_id": "telegram_group_sid",
+            "raw_source": "telegram",
+            "title": "Telegram Group",
+            "updated_at": 110,
+            "message_count": 4,
+        },
+    ]
+
+    kept = routes._keep_latest_messaging_session_per_source(sessions)
+    ids = {session.get("session_id") for session in kept}
+
+    assert ids == {"telegram_dm_sid", "telegram_group_sid"}
+
+
+def test_messaging_projection_does_not_aggressively_hide_without_gateway_metadata(monkeypatch):
+    """Without sessions.json as source of truth, keep fallback behavior."""
+    from api import routes
+
+    monkeypatch.setattr(routes, "_load_gateway_session_identity_map", lambda: {})
+    sessions = [
+        {
+            "session_id": "weixin_reset_sid",
+            "raw_source": "weixin",
+            "title": "Old Weixin Reset",
+            "end_reason": "session_reset",
+            "updated_at": 90,
+            "message_count": 6,
+        },
+    ]
+
+    kept = routes._keep_latest_messaging_session_per_source(sessions)
+
+    assert [session.get("session_id") for session in kept] == ["weixin_reset_sid"]
+
+
+def test_sessions_response_distinguishes_same_platform_same_group_chat_different_users_without_session_key(cleanup_test_sessions):
+    """Group sessions with same chat_id but different users should not collapse without session_key."""
+    conn = _ensure_state_db()
+    sid_u1 = 'gw_tg_group_chat_001'
+    sid_u2 = 'gw_tg_group_chat_002'
+    cleanup_test_sessions.extend([sid_u1, sid_u2])
+    try:
+        _insert_gateway_session(
+            conn,
+            session_id=sid_u1,
+            source='telegram',
+            title='TG Group Same Chat User1',
+            user_id='2001001',
+            chat_id='tg_group_42',
+            chat_type='group',
+            started_at=time.time() - 20,
+        )
+        _insert_gateway_session(
+            conn,
+            session_id=sid_u2,
+            source='telegram',
+            title='TG Group Same Chat User2',
+            user_id='2001002',
+            chat_id='tg_group_42',
+            chat_type='group',
+            started_at=time.time(),
+        )
+
+        post('/api/settings', {'show_cli_sessions': True})
+        data, status = get('/api/sessions')
+        assert status == 200
+        ids = {s['session_id'] for s in data.get('sessions', []) if s.get('session_id') in {sid_u1, sid_u2}}
+        assert ids == {sid_u1, sid_u2}, (
+            f"Expected both group sessions in same chat to stay visible without session_key, got {ids}"
+        )
+    finally:
+        try:
+            post('/api/settings', {'show_cli_sessions': False})
+            _remove_test_sessions(conn, sid_u1, sid_u2)
+            conn.close()
+        except Exception:
+            pass
+
+
+def test_sessions_response_distinguishes_same_user_different_thread_without_session_key(cleanup_test_sessions):
+    """Same user_id but different thread context should remain separate without session_key."""
+    conn = _ensure_state_db()
+    sid_t1 = 'gw_tg_thread_001'
+    sid_t2 = 'gw_tg_thread_002'
+    cleanup_test_sessions.extend([sid_t1, sid_t2])
+    try:
+        _insert_gateway_session(
+            conn,
+            session_id=sid_t1,
+            source='telegram',
+            title='TG Thread A',
+            user_id='5550007',
+            chat_id='tg_group_42',
+            chat_type='thread',
+            thread_id='thread_a',
+            started_at=time.time() - 20,
+        )
+        _insert_gateway_session(
+            conn,
+            session_id=sid_t2,
+            source='telegram',
+            title='TG Thread B',
+            user_id='5550007',
+            chat_id='tg_group_42',
+            chat_type='thread',
+            thread_id='thread_b',
+            started_at=time.time(),
+        )
+
+        post('/api/settings', {'show_cli_sessions': True})
+        data, status = get('/api/sessions')
+        assert status == 200
+        ids = {s['session_id'] for s in data.get('sessions', []) if s.get('session_id') in {sid_t1, sid_t2}}
+        assert ids == {sid_t1, sid_t2}, (
+            f"Expected both thread-scoped Telegram sessions to stay visible without session_key, got {ids}"
+        )
+    finally:
+        try:
+            post('/api/settings', {'show_cli_sessions': False})
+            _remove_test_sessions(conn, sid_t1, sid_t2)
+            conn.close()
+        except Exception:
+            pass
+
+
 def test_archiving_raw_messaging_session_imports_without_erasing_agent_memory(cleanup_test_sessions):
    """Archive should be the safe hide path for raw messaging sessions."""
    conn = _ensure_state_db()
@@ -1076,6 +1434,176 @@ def test_gateway_session_messages_readable():
        post('/api/settings', {'show_cli_sessions': False})


+def test_session_prefers_state_db_messages_over_stale_local_snapshot(cleanup_test_sessions):
+    """Stale local JSON for messaging sessions should not mask newer state.db messages."""
+    from api.models import Session
+
+    conn = _ensure_state_db()
+    sid = 'gw_masking_regression_001'
+    cleanup_test_sessions.append(sid)
+    base_ts = time.time() - 120
+    stale_messages = [
+        ("user", "Old local user", base_ts + 1),
+        ("assistant", "Old local assistant", base_ts + 2),
+    ]
+    fresh_messages = [
+        ("user", "Fresh user 1", base_ts + 10),
+        ("assistant", "Fresh assistant 1", base_ts + 11),
+        ("user", "Fresh user 2", base_ts + 12),
+        ("assistant", "Fresh assistant 2", base_ts + 13),
+    ]
+    expected_tail = fresh_messages[-1][1]
+    expected_total = len(stale_messages) + len(fresh_messages)
+    try:
+        _insert_gateway_session(
+            conn,
+            session_id=sid,
+            source='telegram',
+            title='Regression Telegram Chat',
+            message_count=expected_total,
+            started_at=base_ts + 1,
+        )
+        # Replace the two auto-inserted starter messages with a controlled sequence
+        # so we can assert ordering across local+state updates.
+        conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
+        for role, content, ts in stale_messages + fresh_messages:
+            _insert_message(conn, sid, role, content, ts)
+        conn.execute(
+            "UPDATE sessions SET message_count = ? WHERE id = ?",
+            (expected_total, sid),
+        )
+        conn.commit()
+
+        s = Session(
+            session_id=sid,
+            title='Legacy Local Telegram Snapshot',
+            workspace=str(pathlib.Path.home() / '.hermes'),
+            model='openai/gpt-5',
+            messages=[{"role": r, "content": c, "timestamp": t} for r, c, t in stale_messages],
+        )
+        s.is_cli_session = True
+        s.session_source = 'messaging'
+        s.source_tag = 'telegram'
+        s.raw_source = 'telegram'
+        s.source_label = 'Telegram'
+        s.save(touch_updated_at=False)
+
+        post('/api/settings', {'show_cli_sessions': True})
+        data, status = get(f'/api/session?session_id={sid}')
+        assert status == 200, data
+        session = data.get('session', {})
+        msgs = session.get('messages', [])
+        assert len(msgs) == expected_total, f"Expected {expected_total} messages, got {len(msgs)}"
+        assert msgs[-1].get('content') == expected_tail
+        assert session.get('message_count') == expected_total
+    finally:
+        try:
+            _remove_test_sessions(conn, sid)
+            conn.close()
+        except Exception:
+            pass
+        try:
+            post('/api/settings', {'show_cli_sessions': False})
+        except Exception:
+            pass
+
+
+def test_sessions_prefers_state_db_metadata_for_messaging_overlap(cleanup_test_sessions):
+    """Sidebar metadata for messaging sessions should come from state.db, not local JSON snapshots."""
+    conn = _ensure_state_db()
+    sid = 'gw_sidebar_metadata_regression_001'
+    cleanup_test_sessions.append(sid)
+    now = time.time()
+    rows = [
+        ("user", "Hello", now - 30),
+        ("assistant", "Welcome", now - 29),
+        ("user", "Need details", now - 5),
+    ]
+    try:
+        _insert_gateway_session(conn, session_id=sid, source='weixin', title='Live metadata chat', message_count=len(rows), started_at=now - 30)
+        conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
+        for role, content, ts in rows:
+            _insert_message(conn, sid, role, content, ts)
+        conn.commit()
+
+        stale = [
+            {"role": "user", "content": "stale one", "timestamp": now - 100},
+            {"role": "assistant", "content": "stale two", "timestamp": now - 99},
+        ]
+        from api.models import Session
+        local = Session(
+            session_id=sid,
+            title='Stale Sidebar',
+            messages=stale,
+            model='openai/gpt-4',
+        )
+        local.is_cli_session = True
+        local.session_source = 'messaging'
+        local.source_tag = 'weixin'
+        local.raw_source = 'weixin'
+        local.source_label = 'Weixin'
+        local.save(touch_updated_at=False)
+
+        post('/api/settings', {'show_cli_sessions': True})
+        data, status = get('/api/sessions')
+        assert status == 200, data
+        session = next(item for item in data.get('sessions', []) if item.get('session_id') == sid)
+        assert session.get('message_count') == len(rows)
+        expected_updated = max(ts for _, _, ts in rows)
+        assert abs(float(session.get('updated_at') or 0) - expected_updated) < 1.0
+    finally:
+        try:
+            post('/api/settings', {'show_cli_sessions': False})
+            _remove_test_sessions(conn, sid)
+            conn.close()
+        except Exception:
+            pass
+
+
+def test_archiving_messaging_session_keeps_state_db_history(cleanup_test_sessions):
+    """Archiving a messaging session should persist metadata without importing full transcript."""
+    from api.models import Session
+
+    conn = _ensure_state_db()
+    sid = 'gw_archive_metadata_only_001'
+    cleanup_test_sessions.append(sid)
+    try:
+        _insert_gateway_session(
+            conn,
+            session_id=sid,
+            source='discord',
+            title='Archive Safe',
+            message_count=2,
+            started_at=time.time() - 20,
+        )
+        # Do not create a local session first; archive should create minimal metadata only.
+        data, status = post('/api/session/archive', {'session_id': sid, 'archived': True})
+        assert status == 200, data
+        archived = data.get('session', {})
+        assert archived.get('archived') is True
+        remaining = conn.execute(
+            "SELECT COUNT(*) FROM messages WHERE session_id = ?",
+            (sid,),
+        ).fetchone()[0]
+        assert remaining >= 2
+
+        local = Session.load(sid)
+        assert local is not None
+        assert local.messages == [], "Archive should not import historical messages into local JSON"
+        assert local.archived is True
+
+        session_data, session_status = get(f'/api/session?session_id={sid}')
+        assert session_status == 200, session_data
+        assert session_data.get('session', {}).get('archived') is True
+        assert session_data.get('session', {}).get('message_count') == 2
+    finally:
+        try:
+            _remove_test_sessions(conn, sid)
+            conn.close()
+        except Exception:
+            pass
+
+
 def test_importing_older_gateway_session_preserves_original_timestamps_and_order():
    """Importing an older gateway session should not bump it above newer WebUI sessions."""
    conn = _ensure_state_db()
@@ -1,6 +1,11 @@
 """Regression guards for cross-channel handoff UI and summary generation."""

+import json
+import time
+import sqlite3
 from pathlib import Path
+import sys
+import types


 ROOT = Path(__file__).resolve().parents[1]
@@ -8,6 +13,49 @@ INDEX = (ROOT / "static" / "index.html").read_text(encoding="utf-8")
 SESSIONS_JS = (ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
 STYLE_CSS = (ROOT / "static" / "style.css").read_text(encoding="utf-8")
 ROUTES = (ROOT / "api" / "routes.py").read_text(encoding="utf-8")
+UI_JS = (ROOT / "static" / "ui.js").read_text(encoding="utf-8")
+
+
+def _new_state_db(path: Path) -> sqlite3.Connection:
+    """Create a minimal state.db shape for handoff-summary persistence tests."""
+    conn = sqlite3.connect(str(path))
+    conn.executescript(
+        """
+        CREATE TABLE IF NOT EXISTS sessions (
+            id TEXT PRIMARY KEY,
+            source TEXT NOT NULL,
+            title TEXT,
+            model TEXT,
+            started_at REAL NOT NULL,
+            message_count INTEGER DEFAULT 0,
+            parent_session_id TEXT,
+            ended_at REAL,
+            end_reason TEXT
+        );
+        CREATE TABLE IF NOT EXISTS messages (
+            session_id TEXT NOT NULL,
+            role TEXT NOT NULL,
+            content TEXT,
+            timestamp REAL
+        );
+        """
+    )
+    return conn
+
+
+def _extract_handoff_marker_payload(message):
+    content = message.get("content") if isinstance(message, dict) else None
+    if not isinstance(content, str):
+        return None
+    try:
+        data = json.loads(content)
+    except json.JSONDecodeError:
+        return None
+    if not isinstance(data, dict):
+        return None
+    if not data.get("_handoff_summary_card"):
+        return None
+    return data


 def test_handoff_hint_is_docked_in_composer_flyout_not_transcript():
@@ -28,6 +76,32 @@ def test_handoff_dock_reserves_transcript_space_like_terminal_dock():
    assert "_syncHandoffDockSpace(false)" in SESSIONS_JS


+def test_handoff_dock_width_aligns_with_existing_slide_up_panels():
+    assert ".handoff-hint-container{position:absolute;left:0;right:0;bottom:-2px;width:min(calc(100% - 112px),560px);" in STYLE_CSS
+    assert ".handoff-hint-container{bottom:-1px;width:calc(100% - 28px);}" in STYLE_CSS
+    start = STYLE_CSS.find(".handoff-hint-container")
+    assert start != -1
+    end = STYLE_CSS.find("}", start)
+    assert end != -1
+    handoff_hint_rule = STYLE_CSS[start:end+1]
+    assert "width:min(calc(100% - 112px),560px)" in handoff_hint_rule
+    assert ".handoff-hint-dot{width:7px;height:7px;border-radius:999px;background:var(--success);" in STYLE_CSS
+
+
+def test_handoff_summary_fallback_displays_clear_user_note():
+    assert "const isFallback=!!state.fallback;" in UI_JS
+    assert "class=\"handoff-summary-fallback-note\"" in UI_JS
+    assert "Fallback summary generated from recent turns; no model-based rewrite was used." in UI_JS
+
+
+def test_handoff_delete_clears_local_storage_markers():
+    assert "function _clearHandoffStorageForSession(sid) {" in SESSIONS_JS
+    assert "_setHandoffStorageValue(sid, _HANDOFF_SUFFIX_DISMISSED_AT, null);" in SESSIONS_JS
+    assert "_setHandoffStorageValue(sid, _HANDOFF_SUFFIX_SUMMARY_HANDLED_AT, null);" in SESSIONS_JS
+    assert "_clearHandoffStorageForSession(sid);" in SESSIONS_JS
+    assert "ids.forEach(_clearHandoffStorageForSession);" in SESSIONS_JS
+
+
 def test_handoff_summary_renders_as_transcript_card_not_dock_card():
    assert "function setHandoffUi" in SESSIONS_JS or "function setHandoffUi" in (ROOT / "static" / "ui.js").read_text(encoding="utf-8")
    ui_js = (ROOT / "static" / "ui.js").read_text(encoding="utf-8")
@@ -43,6 +117,16 @@ def test_handoff_summary_renders_as_transcript_card_not_dock_card():
    assert "handoff-summary-card" not in STYLE_CSS


+def test_handoff_summary_card_rendering_uses_persisted_messages():
+    """Persistent summary markers are parsed from message history and rendered via compression-like cards."""
+    assert "_collectHandoffSummaryStates" in UI_JS
+    assert "_handoffSummaryStateFromMessage" in UI_JS
+    assert "_handoffSummaryPayload" in UI_JS or "_parseHandoffSummaryPayload" in UI_JS
+    assert "_insertCompressionLikeNodeByRawIdx" in UI_JS
+    assert "_isHandoffSummaryToolPayload" in UI_JS
+    assert "_buildHandoffSummaryToolMessage" in SESSIONS_JS
+
+
 def test_handoff_summary_does_not_call_removed_agent_get_response():
    """Current Hermes Agent exposes run_conversation/private transports, not get_response."""
    handoff_start = ROUTES.index("def _handle_handoff_summary")
@@ -53,8 +137,23 @@ def test_handoff_summary_does_not_call_removed_agent_get_response():
    assert "_fallback_handoff_summary" in handoff_body


-def test_generating_handoff_summary_does_not_dismiss_future_hints():
-    """Summary generation is a read action; only explicit dismiss should suppress the dock."""
+def test_handoff_summary_prompt_uses_you_and_你():
+    """Summary prompt should use assistant-facing pronouns instead of “user/用户”."""
+    handoff_start = ROUTES.index("def _handle_handoff_summary")
+    next_handler = ROUTES.index("\ndef _handle_skill_save", handoff_start)
+    handoff_body = ROUTES[handoff_start:next_handler]
+    prompt_start = handoff_body.index("summary_system_prompt = (")
+    prompt_end = handoff_body.index("summary_user_text =", prompt_start)
+    prompt_body = handoff_body[prompt_start:prompt_end]
+
+    assert "speak using “you”" in prompt_body
+    assert "用“你”" in prompt_body
+    assert "the user" not in prompt_body.lower()
+    assert "用户" not in prompt_body
+
+
+def test_generating_handoff_summary_marks_session_as_handled():
+    """Summary success uses a max(dismissed/handled) baseline for future checks."""
    generate_start = SESSIONS_JS.index("async function _generateHandoffSummary")
    resolve_start = SESSIONS_JS.index("function _resolveSessionModelForDisplaySoon", generate_start)
    generate_body = SESSIONS_JS[generate_start:resolve_start]
@@ -63,8 +162,534 @@ def test_generating_handoff_summary_does_not_dismiss_future_hints():
    generate_start_after_dismiss = SESSIONS_JS.index("async function _generateHandoffSummary", dismiss_start)
    dismiss_body = SESSIONS_JS[dismiss_start:generate_start_after_dismiss]

-    assert "_setHandoffDismissedAt(" not in generate_body
+    assert "_getHandoffSince(sid)" in generate_body
+    assert "_setHandoffSummaryHandledAt(sid, Date.now() / 1000)" in generate_body
+    assert "_hasMatchingHandoffSummary" not in generate_body
    assert "_setHandoffDismissedAt(" in dismiss_body
+    assert "_setHandoffSummaryHandledAt(" not in dismiss_body
+    assert "_HANDOFF_SUFFIX_SUMMARY_HANDLED_AT" in SESSIONS_JS
    assert "setHandoffUi({" in generate_body
-    assert ":dismissed_at'" in SESSIONS_JS
-    assert ":seen_at'" not in SESSIONS_JS
+    assert "phase: 'done'" not in generate_body
+    assert "_getHandoffSince(sid)" in SESSIONS_JS
+    assert "_HANDOFF_SUFFIX_SUMMARY_HANDLED_AT" in SESSIONS_JS
+    assert "_HANDOFF_SUFFIX_DISMISSED_AT" in SESSIONS_JS
+
+
+def test_handoff_hints_use_max_baseline_since():
+    """Handled and dismissed state are coalesced with max() before calling conversation-rounds."""
+    check_start = SESSIONS_JS.index("async function _checkAndShowHandoffHint")
+    resolve_start = SESSIONS_JS.index("function _showHandoffHint", check_start)
+    check_body = SESSIONS_JS[check_start:resolve_start]
+    assert "_getHandoffSince(sid)" in check_body
+    assert "_getHandoffSummaryHandledAt(sid)" in SESSIONS_JS
+    assert "_getHandoffDismissedAt(sid)" in SESSIONS_JS
+    assert "Math.max(dismissedAt, summaryHandledAt)" in SESSIONS_JS
+
+    assert "_isHandoffSummaryHandled" not in SESSIONS_JS
+
+
+def test_no_api_key_handoff_summary_persists_fallback_summary(monkeypatch):
+    """No-API-key path should persist fallback summary markers."""
+    import api.routes as routes
+    import api.config as cfg
+    import api.models as models
+
+    # Force API-path validation to focus on fallback behavior only.
+    monkeypatch.setattr(routes, "require", lambda body, *keys: None)
+    monkeypatch.setattr(routes, "bad", lambda _handler, msg, status=400: {"ok": False, "error": msg, "status": status})
+    monkeypatch.setattr(routes, "j", lambda _handler, payload, status=200, extra_headers=None: payload)
+
+    persisted = []
+    monkeypatch.setattr(
+        routes,
+        "_persist_handoff_summary",
+        lambda sid, summary, channel, rounds, fallback=False: persisted.append({
+            "sid": sid,
+            "summary": summary,
+            "channel": channel,
+            "rounds": rounds,
+            "fallback": fallback,
+        }) or {"ok": True},
+    )
+
+    monkeypatch.setattr(models, "count_conversation_rounds", lambda sid, since=None: models.CONVERSATION_ROUND_THRESHOLD)
+    monkeypatch.setattr(
+        models,
+        "get_cli_session_messages",
+        lambda sid: [
+            {"role": "user", "content": "Need help with setup", "timestamp": 1.0},
+            {"role": "assistant", "content": "I'll help you", "timestamp": 2.0},
+        ],
+    )
+    monkeypatch.setattr(cfg, "resolve_model_provider", lambda resolved_model=None: ("gpt-test", "openrouter", None))
+
+    fake_runtime_module = types.ModuleType("hermes_cli.runtime_provider")
+    fake_runtime_module.resolve_runtime_provider = lambda requested=None: {"api_key": "", "provider": "openrouter", "base_url": None}
+    fake_hermes_cli = types.ModuleType("hermes_cli")
+    fake_hermes_cli.__path__ = []
+    fake_hermes_cli.runtime_provider = fake_runtime_module
+    monkeypatch.setitem(sys.modules, "hermes_cli", fake_hermes_cli)
+    monkeypatch.setitem(sys.modules, "hermes_cli.runtime_provider", fake_runtime_module)
+
+    response = routes._handle_handoff_summary(object(), {"session_id": "session-without-api-key"})
+
+    assert response["ok"] is True
+    assert response["fallback"] is True
+    assert response["summary"].startswith("-")
+    assert "You asked:" in response["summary"]
+    assert "Recent external-channel activity:" not in response["summary"]
+    assert len(persisted) == 1
+    assert persisted[0]["sid"] == "session-without-api-key"
+    assert persisted[0]["fallback"] is True
+    assert persisted[0]["rounds"] == models.CONVERSATION_ROUND_THRESHOLD
+
+
+def test_exception_handoff_summary_persists_fallback_summary(monkeypatch):
+    """Unhandled summary exception should still persist a fallback handoff marker."""
+    import api.routes as routes
+    import api.config as cfg
+    import api.models as models
+
+    monkeypatch.setattr(routes, "require", lambda body, *keys: None)
+    monkeypatch.setattr(routes, "bad", lambda _handler, msg, status=400: {"ok": False, "error": msg, "status": status})
+    monkeypatch.setattr(routes, "j", lambda _handler, payload, status=200, extra_headers=None: payload)
+
+    persisted = []
+    monkeypatch.setattr(
+        routes,
+        "_persist_handoff_summary",
+        lambda sid, summary, channel, rounds, fallback=False: persisted.append({
+            "sid": sid,
+            "summary": summary,
+            "channel": channel,
+            "rounds": rounds,
+            "fallback": fallback,
+        }) or {"ok": True},
+    )
+
+    monkeypatch.setattr(models, "count_conversation_rounds", lambda sid, since=None: models.CONVERSATION_ROUND_THRESHOLD)
+    monkeypatch.setattr(
+        models,
+        "get_cli_session_messages",
+        lambda sid: [
+            {"role": "user", "content": "Could you check this?", "timestamp": 1.0},
+            {"role": "assistant", "content": "Sure, I can help", "timestamp": 2.0},
+        ],
+    )
+    monkeypatch.setattr(cfg, "resolve_model_provider", lambda resolved_model=None: ("gpt-test", "openrouter", None))
+
+    fake_runtime_module = types.ModuleType("hermes_cli.runtime_provider")
+    fake_runtime_module.resolve_runtime_provider = lambda requested=None: {
+        "api_key": "x",
+        "provider": "openrouter",
+        "base_url": None,
+    }
+    fake_hermes_cli = types.ModuleType("hermes_cli")
+    fake_hermes_cli.__path__ = []
+    fake_hermes_cli.runtime_provider = fake_runtime_module
+    monkeypatch.setitem(sys.modules, "hermes_cli", fake_hermes_cli)
+    monkeypatch.setitem(sys.modules, "hermes_cli.runtime_provider", fake_runtime_module)
+
+    class _Client:
+        class completions:
+            @staticmethod
+            def create(*args, **kwargs):
+                raise RuntimeError("intentional handoff-summary failure")
+
+    class _Chat:
+        completions = _Client.completions
+
+    class _OpenAIClient:
+        chat = _Chat
+
+    class _FailingAgent:
+        api_mode = ""
+
+        def __init__(self, *args, **kwargs):
+            self.model = kwargs.get("model")
+            self.reasoning_config = None
+
+        def _build_api_kwargs(self, *args, **kwargs):
+            return {}
+
+        def _ensure_primary_openai_client(self, reason=None):
+            return _OpenAIClient()
+
+        def release_clients(self):
+            return None
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = _FailingAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    response = routes._handle_handoff_summary(object(), {"session_id": "session-with-exception"})
+
+    assert response["ok"] is True
+    assert response["fallback"] is True
+    assert response["summary"].startswith("-")
+    assert "You asked:" in response["summary"]
+    assert "Recent external-channel activity:" not in response["summary"]
+    assert "warning" in response
+    assert len(persisted) == 1
+    assert persisted[0]["sid"] == "session-with-exception"
+    assert persisted[0]["fallback"] is True
+    assert persisted[0]["rounds"] == models.CONVERSATION_ROUND_THRESHOLD
+
+
+def test_handoff_summary_retries_once_when_length_limit_reached(monkeypatch):
+    """finish_reason='length' should trigger one retry with larger budget."""
+    import api.routes as routes
+    import api.config as cfg
+    import api.models as models
+
+    monkeypatch.setattr(routes, "require", lambda body, *keys: None)
+    monkeypatch.setattr(routes, "bad", lambda _handler, msg, status=400: {"ok": False, "error": msg, "status": status})
+    monkeypatch.setattr(routes, "j", lambda _handler, payload, status=200, extra_headers=None: payload)
+
+    persisted = []
+    monkeypatch.setattr(
+        routes,
+        "_persist_handoff_summary",
+        lambda sid, summary, channel, rounds, fallback=False: persisted.append({
+            "sid": sid,
+            "summary": summary,
+            "channel": channel,
+            "rounds": rounds,
+            "fallback": fallback,
+        }) or {"ok": True},
+    )
+
+    monkeypatch.setattr(models, "count_conversation_rounds", lambda sid, since=None: models.CONVERSATION_ROUND_THRESHOLD)
+    monkeypatch.setattr(
+        models,
+        "get_cli_session_messages",
+        lambda sid: [
+            {"role": "user", "content": "Can we switch to a different method?", "timestamp": 1.0},
+            {"role": "assistant", "content": "Sure, here is the outline.", "timestamp": 2.0},
+            {"role": "user", "content": "Keep going.", "timestamp": 3.0},
+            {"role": "assistant", "content": "Step 1 is done, step 2 is pending.", "timestamp": 4.0},
+        ],
+    )
+    monkeypatch.setattr(cfg, "resolve_model_provider", lambda resolved_model=None: ("gpt-test", "openrouter", None))
+
+    completion_calls = []
+
+    def _choice(content, finish_reason="stop"):
+        return types.SimpleNamespace(
+            message=types.SimpleNamespace(content=content),
+            finish_reason=finish_reason,
+        )
+
+    class _Client:
+        class completions:
+            @staticmethod
+            def create(*args, **kwargs):
+                max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens")
+                completion_calls.append(max_tokens)
+                if len(completion_calls) == 1:
+                    return types.SimpleNamespace(choices=[
+                        _choice("- You can do step A, B, and C", finish_reason="length")
+                    ])
+                return types.SimpleNamespace(choices=[
+                    _choice("- You should continue with step D.\n- You can then review results.", finish_reason="stop")
+                ])
+
+    class _Chat:
+        completions = _Client.completions
+
+    class _OpenAIClient:
+        chat = _Chat
+
+    class _LengthAwareAgent:
+        api_mode = ""
+
+        def __init__(self, *args, **kwargs):
+            self.model = kwargs.get("model")
+            self.reasoning_config = None
+
+        def _build_api_kwargs(self, *args, **kwargs):
+            return {}
+
+        def _ensure_primary_openai_client(self, reason=None):
+            return _OpenAIClient()
+
+        def release_clients(self):
+            return None
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = _LengthAwareAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    fake_runtime_module = types.ModuleType("hermes_cli.runtime_provider")
+    fake_runtime_module.resolve_runtime_provider = lambda requested=None: {
+        "api_key": "x",
+        "provider": "openrouter",
+        "base_url": None,
+    }
+    fake_hermes_cli = types.ModuleType("hermes_cli")
+    fake_hermes_cli.__path__ = []
+    fake_hermes_cli.runtime_provider = fake_runtime_module
+    monkeypatch.setitem(sys.modules, "hermes_cli", fake_hermes_cli)
+    monkeypatch.setitem(sys.modules, "hermes_cli.runtime_provider", fake_runtime_module)
+
+    response = routes._handle_handoff_summary(object(), {"session_id": "session-length-retry"})
+
+    assert response["ok"] is True
+    assert response["fallback"] is False
+    assert response["summary"].startswith("- You should continue with step D.")
+    assert completion_calls == [700, 1400]
+    assert len(persisted) == 1
+    assert persisted[0]["fallback"] is False
+    assert persisted[0]["sid"] == "session-length-retry"
+
+
+def test_handoff_summary_falls_back_when_retry_still_incomplete(monkeypatch):
+    """Retry may still truncate; fallback should still return deterministic concise bullets."""
+    import api.routes as routes
+    import api.config as cfg
+    import api.models as models
+
+    monkeypatch.setattr(routes, "require", lambda body, *keys: None)
+    monkeypatch.setattr(routes, "bad", lambda _handler, msg, status=400: {"ok": False, "error": msg, "status": status})
+    monkeypatch.setattr(routes, "j", lambda _handler, payload, status=200, extra_headers=None: payload)
+
+    persisted = []
+    monkeypatch.setattr(
+        routes,
+        "_persist_handoff_summary",
+        lambda sid, summary, channel, rounds, fallback=False: persisted.append({
+            "sid": sid,
+            "summary": summary,
+            "channel": channel,
+            "rounds": rounds,
+            "fallback": fallback,
+        }) or {"ok": True},
+    )
+
+    monkeypatch.setattr(models, "count_conversation_rounds", lambda sid, since=None: models.CONVERSATION_ROUND_THRESHOLD)
+    monkeypatch.setattr(
+        models,
+        "get_cli_session_messages",
+        lambda sid: [
+            {"role": "user", "content": "Could you plan next moves?", "timestamp": 1.0},
+            {"role": "assistant", "content": "Let's draft a schedule.", "timestamp": 2.0},
+            {"role": "user", "content": "Anything else?", "timestamp": 3.0},
+            {"role": "assistant", "content": "Yes, one more check is needed.", "timestamp": 4.0},
+        ],
+    )
+    monkeypatch.setattr(cfg, "resolve_model_provider", lambda resolved_model=None: ("gpt-test", "openrouter", None))
+
+    class _Client:
+        class completions:
+            @staticmethod
+            def create(*args, **kwargs):
+                return types.SimpleNamespace(choices=[
+                    types.SimpleNamespace(
+                        message=types.SimpleNamespace(
+                            content="I can help summarize this but",
+                            ),
+                        finish_reason="length",
+                    )
+                ])
+
+    class _Chat:
+        completions = _Client.completions
+
+    class _LengthAwareAgent:
+        api_mode = ""
+
+        def __init__(self, *args, **kwargs):
+            self.model = kwargs.get("model")
+            self.reasoning_config = None
+
+        def _build_api_kwargs(self, *args, **kwargs):
+            return {}
+
+        def _ensure_primary_openai_client(self, reason=None):
+            return _Chat()
+
+        def release_clients(self):
+            return None
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = _LengthAwareAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    fake_runtime_module = types.ModuleType("hermes_cli.runtime_provider")
+    fake_runtime_module.resolve_runtime_provider = lambda requested=None: {
+        "api_key": "x",
+        "provider": "openrouter",
+        "base_url": None,
+    }
+    fake_hermes_cli = types.ModuleType("hermes_cli")
+    fake_hermes_cli.__path__ = []
+    fake_hermes_cli.runtime_provider = fake_runtime_module
+    monkeypatch.setitem(sys.modules, "hermes_cli", fake_hermes_cli)
+    monkeypatch.setitem(sys.modules, "hermes_cli.runtime_provider", fake_runtime_module)
+
+    response = routes._handle_handoff_summary(object(), {"session_id": "session-length-fallback"})
+
+    assert response["ok"] is True
+    assert response["fallback"] is True
+    assert response["summary"].startswith("- You asked:")
+    assert "Recent external-channel activity:" not in response["summary"]
+    assert len(persisted) == 1
+    assert persisted[0]["fallback"] is True
+    assert persisted[0]["sid"] == "session-length-fallback"
+
+
+def test_handoff_summary_persistence_targets_both_backends_for_messaging_session(tmp_path, monkeypatch):
+    """Messaging sessions should persist handoff summary markers into both local JSON and state.db."""
+    import api.routes as routes
+    import api.models as models
+    import api.profiles as profiles
+
+    sid = "messaging_1013_both_backends_01"
+    mock_home = tmp_path / "hermes_home"
+    mock_home.mkdir()
+    mock_sessions = tmp_path / "sessions"
+    mock_sessions.mkdir()
+
+    monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: mock_home)
+    monkeypatch.setattr(models, "SESSION_DIR", mock_sessions)
+
+    conn = _new_state_db(mock_home / "state.db")
+    try:
+        seed_ts = time.time() - 10
+        conn.execute(
+            "INSERT INTO sessions (id, source, title, model, started_at, message_count, parent_session_id, ended_at, end_reason) "
+            "VALUES (?, 'telegram', 'Messaging Session', 'openai/gpt-5', ?, 0, NULL, NULL, NULL)",
+            (sid, seed_ts),
+        )
+        conn.commit()
+
+        session = models.Session(
+            session_id=sid,
+            title="Imported Messaging Session",
+            workspace=str(tmp_path),
+            messages=[{"role": "user", "content": "Need help", "timestamp": 1.0}],
+        )
+        session.is_cli_session = True
+        session.session_source = "messaging"
+        session.source_tag = "telegram"
+        session.raw_source = "telegram"
+        session.source_label = "Telegram"
+        session.save(touch_updated_at=False)
+
+        routes._persist_handoff_summary(sid, "Please handoff after context", "telegram", 2, False)
+
+        saved = models.Session.load(sid)
+        assert len(saved.messages) == 2
+        marker = saved.messages[-1]
+        assert marker.get("name") == "handoff_summary"
+        marker_payload = _extract_handoff_marker_payload(marker)
+        assert marker_payload is not None
+        assert marker_payload.get("session_id") == sid
+        assert marker_payload.get("summary") == "Please handoff after context"
+        assert marker_payload.get("channel") == "telegram"
+        assert marker_payload.get("rounds") == 2
+
+        rows = conn.execute(
+            "SELECT role, content FROM messages WHERE session_id = ? ORDER BY rowid ASC",
+            (sid,),
+        ).fetchall()
+        assert len(rows) == 1
+        assert rows[0][0] == "tool"
+        db_payload = _extract_handoff_marker_payload({"content": rows[0][1]})
+        assert db_payload is not None
+        assert db_payload.get("session_id") == sid
+        assert db_payload.get("summary") == "Please handoff after context"
+    finally:
+        conn.close()
+
+
+def test_persisted_handoff_summary_deduplicates_identical_tail_markers(tmp_path, monkeypatch):
+    """When the tail already contains the same handoff marker, repeated generation should be idempotent."""
+    import api.routes as routes
+    import api.models as models
+    import api.profiles as profiles
+
+    sid = "messaging_1013_dedupe_tail"
+    mock_home = tmp_path / "hermes_home"
+    mock_home.mkdir()
+    mock_sessions = tmp_path / "sessions"
+    mock_sessions.mkdir()
+    monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: mock_home)
+    monkeypatch.setattr(models, "SESSION_DIR", mock_sessions)
+
+    conn = _new_state_db(mock_home / "state.db")
+    try:
+        baseline = time.time()
+        conn.execute(
+            "INSERT INTO sessions (id, source, title, model, started_at, message_count, parent_session_id, ended_at, end_reason) "
+            "VALUES (?, 'telegram', 'Messaging Session', 'openai/gpt-5', ?, 1, NULL, NULL, NULL)",
+            (sid, baseline),
+        )
+        conn.commit()
+
+        marker = routes._build_handoff_summary_tool_message(sid, "Repeat me", "telegram", 3, False)
+        session = models.Session(
+            session_id=sid,
+            title="Imported Messaging Session",
+            workspace=str(tmp_path),
+            messages=[
+                {"role": "user", "content": "Need help", "timestamp": baseline - 1},
+                marker,
+            ],
+        )
+        session.is_cli_session = True
+        session.session_source = "messaging"
+        session.source_tag = "telegram"
+        session.raw_source = "telegram"
+        session.source_label = "Telegram"
+        session.save(touch_updated_at=False)
+
+        conn.execute(
+            "INSERT INTO messages (session_id, role, content, timestamp) VALUES (?, 'tool', ?, ?)",
+            (sid, marker["content"], marker["timestamp"]),
+        )
+        conn.commit()
+
+        routes._persist_handoff_summary(sid, "Repeat me", "telegram", 3, False)
+
+        refreshed = models.Session.load(sid)
+        assert len(refreshed.messages) == 2
+
+        rows = conn.execute(
+            "SELECT content FROM messages WHERE session_id = ? ORDER BY rowid ASC",
+            (sid,),
+        ).fetchall()
+        assert len(rows) == 1
+        assert _extract_handoff_marker_payload({"content": rows[0][0]}) is not None
+    finally:
+        conn.close()
+
+
+def test_persist_handoff_summary_falls_back_when_local_session_file_missing(tmp_path, monkeypatch):
+    """Messaging session IDs should still persist to state.db when no local WebUI session exists."""
+    import api.routes as routes
+    import api.profiles as profiles
+
+    sid = "messaging_1013_no_local_file"
+    mock_home = tmp_path / "hermes_home"
+    mock_home.mkdir()
+
+    monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: mock_home)
+    conn = _new_state_db(mock_home / "state.db")
+
+    # Force messaging classification while keeping the local shell absent.
+    monkeypatch.setattr(routes, "_is_messaging_session_id", lambda _sid: True)
+    try:
+        routes._persist_handoff_summary(sid, "Persist without local shell", "telegram", 1, True)
+        rows = conn.execute(
+            "SELECT role, content FROM messages WHERE session_id = ? ORDER BY rowid ASC",
+            (sid,),
+        ).fetchall()
+        assert len(rows) == 1
+        assert rows[0][0] == "tool"
+        payload = _extract_handoff_marker_payload({"content": rows[0][1]})
+        assert payload is not None
+        assert payload.get("session_id") == sid
+        assert payload.get("fallback") is True
+    finally:
+        conn.close()
@@ -67,3 +67,105 @@ def test_import_cli_passes_model_to_import_helper():
    assert "model" in call_block, (
        "import_cli_session() call should still receive the `model` argument."
    )
+
+
+def test_session_import_cli_refresh_matches_messages_despite_timestamp_type_differences(monkeypatch):
+    """Refreshing an imported session should still extend when timestamps differ only by type.
+
+    Existing WebUI messages can use integer timestamps while CLI refresh returns
+    floating-point timestamps for the same turns. This test verifies the handler
+    accepts that as semantic equality and replaces with the longer, fresher tail.
+    """
+    import api.routes as routes
+
+    session_id = "ts_type_diff_001"
+
+    class FakeSession:
+        def __init__(self):
+            self.messages = [
+                {"role": "user", "content": "hello", "timestamp": 1710000000},
+                {"role": "assistant", "content": "working", "timestamp": 1710000001},
+            ]
+            self.source_tag = "weixin"
+            self.raw_source = "weixin"
+            self.session_source = "messaging"
+            self.source_label = "WeChat"
+
+        def compact(self):
+            return {"session_id": session_id, "title": "Imported"}
+
+        def save(self, touch_updated_at=False):
+            save_calls.append(touch_updated_at)
+
+    save_calls = []
+    existing = FakeSession()
+    fresh = [
+        {"role": "user", "content": "hello", "timestamp": 1710000000.0},
+        {"role": "assistant", "content": "working", "timestamp": 1710000001.0},
+        {"role": "assistant", "content": "next", "timestamp": 1710000002.0},
+    ]
+
+    monkeypatch.setattr(routes.Session, "load", classmethod(lambda _cls, sid: existing if sid == session_id else None))
+    monkeypatch.setattr(routes, "require", lambda body, *keys: None)
+    monkeypatch.setattr(routes, "bad", lambda _handler, msg, status=400: {"ok": False, "error": msg, "status": status})
+    monkeypatch.setattr(routes, "j", lambda _handler, payload, status=200, extra_headers=None: payload)
+    monkeypatch.setattr(routes, "get_cli_session_messages", lambda sid: fresh if sid == session_id else [])
+    monkeypatch.setattr(routes, "get_cli_sessions", lambda: [{"session_id": session_id, "source_tag": "weixin", "raw_source": "weixin", "session_source": "messaging", "source_label": "WeChat"}])
+
+    response = routes._handle_session_import_cli(object(), {"session_id": session_id})
+
+    assert response["imported"] is False
+    assert response["session"]["messages"] == fresh
+    assert existing.messages == fresh
+    assert save_calls == [False]
+
+
+def test_session_import_cli_refresh_rejects_prefix_if_non_timing_content_diverges(monkeypatch):
+    """Only true prefixes should be treated as unchanged history during refresh.
+
+    If the refreshed message body diverges, we should keep the existing in-memory
+    transcript instead of replacing it with potentially older content.
+    """
+    import api.routes as routes
+
+    session_id = "ts_type_diverge_001"
+
+    class FakeSession:
+        def __init__(self):
+            self.messages = [
+                {"role": "user", "content": "old-prefix", "timestamp": 1710000000},
+                {"role": "assistant", "content": "from local", "timestamp": 1710000001},
+            ]
+            self.source_tag = "telegram"
+            self.raw_source = "telegram"
+            self.session_source = "messaging"
+            self.source_label = "Telegram"
+            self.is_cli_session = True
+
+        def compact(self):
+            return {"session_id": session_id, "title": "Imported"}
+
+        def save(self, touch_updated_at=False):
+            save_calls.append(touch_updated_at)
+
+    save_calls = []
+    existing = FakeSession()
+    fresh = [
+        {"role": "user", "content": "different-prefix", "timestamp": 1710000000.0},
+        {"role": "assistant", "content": "from cli", "timestamp": 1710000001.0},
+        {"role": "assistant", "content": "next", "timestamp": 1710000002.0},
+    ]
+
+    monkeypatch.setattr(routes.Session, "load", classmethod(lambda _cls, sid: existing if sid == session_id else None))
+    monkeypatch.setattr(routes, "require", lambda body, *keys: None)
+    monkeypatch.setattr(routes, "bad", lambda _handler, msg, status=400: {"ok": False, "error": msg, "status": status})
+    monkeypatch.setattr(routes, "j", lambda _handler, payload, status=200, extra_headers=None: payload)
+    monkeypatch.setattr(routes, "get_cli_session_messages", lambda sid: fresh if sid == session_id else [])
+    monkeypatch.setattr(routes, "get_cli_sessions", lambda: [{"session_id": session_id, "source_tag": "telegram", "raw_source": "telegram", "session_source": "messaging", "source_label": "Telegram"}])
+
+    response = routes._handle_session_import_cli(object(), {"session_id": session_id})
+
+    assert response["imported"] is False
+    assert response["session"]["messages"] == existing.messages
+    assert existing.messages[0]["content"] == "old-prefix"
+    assert save_calls == []
@@ -0,0 +1,29 @@
+"""Regression guard for CLI import refresh overwriting active transcript."""
+
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+SESSIONS_JS = (ROOT / "static" / "sessions.js").read_text(encoding="utf-8")
+
+
+def test_sse_import_cli_guard_skips_shorter_transcript_overwrite():
+    """The SSE import refresh path should refuse stale/shorter transcripts."""
+    start = SESSIONS_JS.index("function startGatewaySSE")
+    stop = SESSIONS_JS.index("function stopGatewaySSE", start)
+    sse_block = SESSIONS_JS[start:stop]
+
+    assert "const prev = S.messages.length;" in sse_block
+    assert "const next = res.session.messages.filter(m => m && m.role);" in sse_block
+    assert "if (next.length < prev) return;" in sse_block
+    assert "if (prev > 0 && !_isCliImportRefreshPrefixMatch(S.messages, next)) return;" in sse_block
+    assert "S.messages = next;" in sse_block
+
+
+def test_sse_import_cli_refresh_prefix_helper_ignores_timestamps():
+    """Refresh-prefix helper used by SSE should compare messages without timestamp keys."""
+    assert "function _normalizeMessageForCliImportComparison(message)" in SESSIONS_JS
+    assert "delete clone.timestamp;" in SESSIONS_JS
+    assert "delete clone._ts;" in SESSIONS_JS
+    assert "function _isCliImportRefreshPrefixMatch(localMessages, freshMessages)" in SESSIONS_JS
+    assert "_normalizeMessageForCliImportComparison" in SESSIONS_JS
+    assert "localMessages.length > freshMessages.length" in SESSIONS_JS