diff --git a/sdk/src/sdk/postcall_memory.py b/sdk/src/sdk/postcall_memory.py index 6bbbff6..92b29d8 100644 --- a/sdk/src/sdk/postcall_memory.py +++ b/sdk/src/sdk/postcall_memory.py @@ -40,7 +40,7 @@ import uuid from dataclasses import dataclass from pathlib import Path -from typing import Any +from typing import Any, Literal import google.genai as genai from google.genai import types as genai_types @@ -177,22 +177,32 @@ def _validate_memory(raw: Any) -> ExtractedMemory | None: """Coerce a raw dict from Gemini into a clean :class:`ExtractedMemory`, or return None if the shape is bad enough to drop. - We're forgiving: missing fields fill with sensible defaults. The - only hard reject is missing ``content`` (the row would be empty). + Forgiving: missing fields fill with sensible defaults. Non-string + values for the string fields (``content``, ``summary``, ``category``) + are rejected explicitly so a payload like ``{"content": 123}`` + drops the row cleanly instead of raising ``AttributeError`` on + ``.strip()`` — that crash used to propagate up through the + validation loop and abort the whole extraction, an unclassified + failure that bypassed the typed-status surface. """ if not isinstance(raw, dict): return None - content = (raw.get("content") or "").strip() + raw_content = raw.get("content") + if not isinstance(raw_content, str): + return None + content = raw_content.strip() if not content: return None - summary = (raw.get("summary") or content[:80]).strip() + raw_summary = raw.get("summary") + summary = (raw_summary if isinstance(raw_summary, str) else content[:80]).strip() raw_topics = raw.get("topics") or [] topics: list[str] = [] if isinstance(raw_topics, list): for t in raw_topics[:5]: if isinstance(t, str) and t.strip(): topics.append(t.strip().lower()) - category = (raw.get("category") or "general").strip().lower() + raw_category = raw.get("category") + category = (raw_category if isinstance(raw_category, str) else "general").strip().lower() if category not in CATEGORIES: category = "general" return ExtractedMemory( @@ -203,20 +213,87 @@ def _validate_memory(raw: Any) -> ExtractedMemory | None: ) -async def _extract_memories(transcript: str) -> list[ExtractedMemory]: +# Status hints emitted by `_extract_memories` so the caller's +# completion log distinguishes "Gemini auth broke" from "transcript was +# genuinely empty" — the conflation that hid the 2026-05-15 voice-path +# silent-loss for three days (see openclaw-livekit#29). +# +# `extracted` = memories list non-empty, capture step runs next. +# `empty_extraction` = valid transcript reached Gemini, Gemini returned +# no memory objects. Genuinely uneventful call. +# `no_transcript_text` = transcript was whitespace-only (handled here; +# `run_extraction` separately handles the "no transcript file" case +# as `no_transcript`). +# `no_api_key` = GEMINI_API_KEY / GOOGLE_API_KEY unset. +# `auth_failed` = Gemini returned 401 / 403 / UNAUTHENTICATED. The +# typical credential-rotation-not-propagated failure (see +# wiki/gotchas/openclaw-livekit-deploy-traps §1). +# `transport_failed` = catch-all non-auth Gemini call failure (network +# error, timeout, connection refused, unexpected SDK-side exception). +# The provider may or may not have been reached — distinguishing them +# requires more SDK-specific exception introspection than is worth +# the complexity for the operator-side signal. Treat as "something +# went wrong outside the auth check; look at the ERROR log line above +# the completion line for specifics." +# `parse_failed` = Gemini returned non-JSON or non-conformant JSON +# (no `memories` array, wrong shape). +ExtractionStatus = Literal[ + "extracted", + "empty_extraction", + "no_transcript_text", + "no_api_key", + "auth_failed", + "transport_failed", + "parse_failed", +] + + +@dataclass(frozen=True) +class ExtractionResult: + """Outcome of one Gemini extraction call. + + ``memories`` is empty for every status except ``extracted``. The + ``status`` field carries the cause when ``memories`` is empty so + the completion-log line can distinguish auth failure from + genuine no-extraction (the silent-loss class fixed in + openclaw-livekit#29). + """ + + memories: list[ExtractedMemory] + status: ExtractionStatus + + +def _classify_gemini_exception(exc: BaseException) -> ExtractionStatus: + """Map a Gemini SDK exception to an `ExtractionStatus`. + + The google-genai SDK doesn't expose typed auth/transport exceptions + we can isinstance-match cleanly. Falls back to substring matching + on the formatted message — the same shape the production 2026-05-15 + incident surfaced (``"401 UNAUTHENTICATED"`` substring). Order + matters: auth check before transport so a 401 reaching us via a + transport-shaped exception still classifies correctly. + """ + msg = str(exc) + if "401" in msg or "403" in msg or "UNAUTHENTICATED" in msg or "PERMISSION_DENIED" in msg: + return "auth_failed" + return "transport_failed" + + +async def _extract_memories(transcript: str) -> ExtractionResult: """Send the transcript to Gemini Flash and parse the JSON response. - Returns ``[]`` on any failure: missing API key, network error, - malformed JSON, schema-violating response. The caller treats an - empty list as "no extraction happened" — explicit saves from the - call are still in Musubi. + Returns an :class:`ExtractionResult` whose ``status`` field + distinguishes the failure modes that used to collapse to ``[]``: + auth failure, transport failure, parse failure, missing API key, + or genuinely empty extraction. The caller uses the status hint + on the completion log line. """ if not transcript.strip(): - return [] + return ExtractionResult(memories=[], status="no_transcript_text") api_key = _gemini_api_key() if not api_key: logger.warning("postcall_memory: no Gemini API key, skipping extraction") - return [] + return ExtractionResult(memories=[], status="no_api_key") client = genai.Client(api_key=api_key) @@ -238,24 +315,29 @@ def _call() -> str: raw_text = await asyncio.to_thread(_call) except Exception as exc: logger.error("postcall_memory: Gemini call failed: %s", exc) - return [] + return ExtractionResult(memories=[], status=_classify_gemini_exception(exc)) try: data = json.loads(raw_text) except json.JSONDecodeError as exc: logger.error("postcall_memory: malformed JSON from Gemini: %s", exc) - return [] + return ExtractionResult(memories=[], status="parse_failed") raw_memories = data.get("memories") if isinstance(data, dict) else None if not isinstance(raw_memories, list): - return [] + return ExtractionResult(memories=[], status="parse_failed") out: list[ExtractedMemory] = [] for r in raw_memories: m = _validate_memory(r) if m is not None: out.append(m) - return out + if not out: + # Gemini reached + parsed but produced 0 valid memories. This is + # the genuine "uneventful call" path, distinct from auth/parse + # failures above. + return ExtractionResult(memories=[], status="empty_extraction") + return ExtractionResult(memories=out, status="extracted") # --- capture ---------------------------------------------------------------- @@ -328,8 +410,26 @@ async def run_extraction( def _complete(status: str, *, extracted: int = 0, captured: int = 0) -> int: """Single completion log line so audit/Rin can grep one shape. - Status is one of: ``no_transcript``, ``empty_extraction``, - ``captured``, ``no_captures``.""" + + Status is one of: + - ``no_transcript`` (transcript file missing / unreadable) + - ``no_transcript_text`` (file present, body whitespace-only) + - ``no_api_key`` (GEMINI_API_KEY/GOOGLE_API_KEY unset) + - ``auth_failed`` (Gemini 401/403 — most often a stale key + per wiki/gotchas/openclaw-livekit-deploy-traps §1) + - ``transport_failed`` (Gemini network/timeout error) + - ``parse_failed`` (Gemini returned non-JSON or wrong shape) + - ``empty_extraction`` (Gemini reached + parsed but produced + zero memories — genuinely uneventful call) + - ``captured`` (memories extracted AND at least one capture + succeeded) + - ``no_captures`` (memories extracted but every capture failed) + + Pre-openclaw-livekit#29, ``auth_failed`` / ``transport_failed`` / + ``parse_failed`` / ``empty_extraction`` all logged as + ``empty_extraction`` — silently hid the 2026-05-15 voice path + breakage for three days. + """ total_ms = int((time.monotonic() - started) * 1000) logger.info( "postcall_memory: completed call_sid=%s status=%s extracted=%d captured=%d total_ms=%d", @@ -349,20 +449,18 @@ def _complete(status: str, *, extracted: int = 0, captured: int = 0) -> int: if transcript is None: return _complete("no_transcript") - memories = await _extract_memories(transcript) - if not memories: - # Could be: Gemini errored, malformed JSON, empty transcript, or - # genuinely nothing extractable. The error-level log lines from - # _extract_memories distinguish them in the upstream log; here we - # just record the outcome. - return _complete("empty_extraction") + result = await _extract_memories(transcript) + if result.status != "extracted": + # Propagate the typed status from _extract_memories — distinguishes + # auth/transport/parse failures from genuine empty extraction. + return _complete(result.status) if client is None: cfg = MusubiV2ClientConfig.from_env() client = MusubiV2Client(cfg) captured = 0 - for memory in memories: + for memory in result.memories: ok = await _capture_one( client=client, namespace=namespace, @@ -374,7 +472,7 @@ def _complete(status: str, *, extracted: int = 0, captured: int = 0) -> int: captured += 1 status = "captured" if captured > 0 else "no_captures" - return _complete(status, extracted=len(memories), captured=captured) + return _complete(status, extracted=len(result.memories), captured=captured) # --- wiring ----------------------------------------------------------------- diff --git a/sdk/tests/test_postcall_memory.py b/sdk/tests/test_postcall_memory.py index 8e4ef53..39ce852 100644 --- a/sdk/tests/test_postcall_memory.py +++ b/sdk/tests/test_postcall_memory.py @@ -109,35 +109,41 @@ async def test_extract_memories_happy_path(monkeypatch: pytest.MonkeyPatch) -> N mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript content here") + result = await postcall_memory._extract_memories("transcript content here") - assert len(memories) == 2 - assert memories[0].content == "Eric mentioned he wants to look at Vespa" - assert memories[0].category == "idea" - assert memories[1].category == "household" + assert result.status == "extracted" + assert len(result.memories) == 2 + assert result.memories[0].content == "Eric mentioned he wants to look at Vespa" + assert result.memories[0].category == "idea" + assert result.memories[1].category == "household" @pytest.mark.asyncio -async def test_extract_memories_no_api_key_returns_empty(monkeypatch: pytest.MonkeyPatch) -> None: +async def test_extract_memories_no_api_key_returns_typed_status( + monkeypatch: pytest.MonkeyPatch, +) -> None: monkeypatch.delenv("GEMINI_API_KEY", raising=False) monkeypatch.delenv("GOOGLE_API_KEY", raising=False) - memories = await postcall_memory._extract_memories("some transcript") - assert memories == [] + result = await postcall_memory._extract_memories("some transcript") + assert result.memories == [] + assert result.status == "no_api_key" @pytest.mark.asyncio -async def test_extract_memories_empty_transcript_returns_empty( +async def test_extract_memories_empty_transcript_returns_typed_status( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") - memories = await postcall_memory._extract_memories("") - assert memories == [] - memories = await postcall_memory._extract_memories(" \n \n") - assert memories == [] + result = await postcall_memory._extract_memories("") + assert result.memories == [] + assert result.status == "no_transcript_text" + result = await postcall_memory._extract_memories(" \n \n") + assert result.memories == [] + assert result.status == "no_transcript_text" @pytest.mark.asyncio -async def test_extract_memories_malformed_json_returns_empty( +async def test_extract_memories_malformed_json_returns_parse_failed( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") @@ -150,12 +156,13 @@ async def test_extract_memories_malformed_json_returns_empty( mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") - assert memories == [] + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "parse_failed" @pytest.mark.asyncio -async def test_extract_memories_gemini_raises_returns_empty( +async def test_extract_memories_gemini_raises_transport_returns_transport_failed( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") @@ -166,8 +173,50 @@ async def test_extract_memories_gemini_raises_returns_empty( mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") - assert memories == [] + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "transport_failed" + + +@pytest.mark.asyncio +async def test_extract_memories_gemini_401_returns_auth_failed( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """The 2026-05-15 voice-path silent-loss case: stale Gemini key returns 401. + + Pre-fix: this surfaced as `status=empty_extraction`, indistinguishable + from a genuinely uneventful call. Three days of voice memories were + lost before the operator noticed. + """ + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + + mock_models = MagicMock() + mock_models.generate_content.side_effect = RuntimeError( + "401 UNAUTHENTICATED. Request had invalid authentication credentials." + ) + mock_client = MagicMock() + mock_client.models = mock_models + + with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "auth_failed" + + +@pytest.mark.asyncio +async def test_extract_memories_gemini_403_returns_auth_failed( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + mock_models = MagicMock() + mock_models.generate_content.side_effect = RuntimeError( + "403 PERMISSION_DENIED. Quota exceeded." + ) + mock_client = MagicMock() + mock_client.models = mock_models + with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): + result = await postcall_memory._extract_memories("transcript") + assert result.status == "auth_failed" @pytest.mark.asyncio @@ -194,14 +243,15 @@ async def test_extract_memories_drops_invalid_entries(monkeypatch: pytest.Monkey mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") + result = await postcall_memory._extract_memories("transcript") - assert len(memories) == 1 - assert memories[0].content == "Real memory content" + assert result.status == "extracted" + assert len(result.memories) == 1 + assert result.memories[0].content == "Real memory content" @pytest.mark.asyncio -async def test_extract_memories_no_memories_key_returns_empty( +async def test_extract_memories_no_memories_key_returns_parse_failed( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("GEMINI_API_KEY", "fake-key") @@ -214,8 +264,45 @@ async def test_extract_memories_no_memories_key_returns_empty( mock_client.models = mock_models with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): - memories = await postcall_memory._extract_memories("transcript") - assert memories == [] + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + # `parse_failed` because the response is JSON but lacks the expected + # `memories` array — same status as malformed JSON, both are + # "Gemini didn't give us the shape we expected". + assert result.status == "parse_failed" + + +@pytest.mark.asyncio +async def test_extract_memories_zero_valid_memories_returns_empty_extraction( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Distinct from `parse_failed`: Gemini gave a valid `memories` array + but every entry was rejected by `_validate_memory`. Treated as + `empty_extraction` because the conversation was reached, parsed, and + Gemini's response was structurally fine — just nothing extractable. + """ + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + + fake_response_text = json.dumps( + { + "memories": [ + {"content": ""}, + {"summary": "no content key here"}, + "not a dict", + ] + } + ) + mock_response = MagicMock() + mock_response.text = fake_response_text + mock_models = MagicMock() + mock_models.generate_content.return_value = mock_response + mock_client = MagicMock() + mock_client.models = mock_models + + with patch("sdk.postcall_memory.genai.Client", return_value=mock_client): + result = await postcall_memory._extract_memories("transcript") + assert result.memories == [] + assert result.status == "empty_extraction" # --- _capture_one ---------------------------------------------------------- @@ -353,6 +440,64 @@ async def test_run_extraction_full_loop(monkeypatch: pytest.MonkeyPatch, tmp_pat assert "source:transcript" in call_kwargs["tags"] +@pytest.mark.asyncio +async def test_run_extraction_auth_failure_logs_distinct_status( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + caplog: pytest.LogCaptureFixture, +) -> None: + """The 2026-05-15 silent-loss case, locked in as a regression test. + + Stale Gemini key → 401 from Gemini → `_extract_memories` returns + `auth_failed` status → `run_extraction` propagates it to the + completion log line as `status=auth_failed`, NOT `status=empty_extraction`. + + Pre-openclaw-livekit#29, this same scenario logged + `status=empty_extraction`, indistinguishable from a genuinely + uneventful call. If this assertion ever breaks, the silent-loss + class is back. + """ + import logging + + monkeypatch.setenv("LIVEKIT_VOICE_LOGS", str(tmp_path)) + monkeypatch.setenv("GEMINI_API_KEY", "fake-key") + + transcripts = tmp_path / "phone-transcripts" + transcripts.mkdir(parents=True, exist_ok=True) + (transcripts / "test-auth.txt").write_text( + "[10:00:00] [USER] anything that should extract\n[10:00:02] [ASSISTANT] reply\n" + ) + + mock_models = MagicMock() + mock_models.generate_content.side_effect = RuntimeError( + "401 UNAUTHENTICATED. Request had invalid authentication credentials." + ) + mock_genai_client = MagicMock() + mock_genai_client.models = mock_models + + with ( + caplog.at_level(logging.INFO, logger="openclaw-livekit.agent"), + patch("sdk.postcall_memory.genai.Client", return_value=mock_genai_client), + ): + captured = await run_extraction( + call_sid="test-auth", + namespace="nyla/voice/episodic", + speaker_tag="nyla-voice", + client=MagicMock(), # never reached on auth failure + ) + + assert captured == 0 + # The decisive assertion: completion log says auth_failed, not + # empty_extraction. This is the exact contract that hid the + # 2026-05-15 voice silent-loss for three days before the fix. + completion_logs = [ + r.message for r in caplog.records if "postcall_memory: completed" in r.message + ] + assert any("status=auth_failed" in m for m in completion_logs), ( + f"expected completion log with status=auth_failed; got: {completion_logs}" + ) + + # --- _spawn_extraction_subprocess ------------------------------------------