diff --git a/api/profiles.py b/api/profiles.py index 22c73312..e731dfb8 100644 --- a/api/profiles.py +++ b/api/profiles.py @@ -712,24 +712,21 @@ def profile_env_for_background_worker( yield return - env_keys = set(runtime_env.keys()) | {"HERMES_HOME"} - # Stage-360 maintainer fix: narrow the _ENV_LOCK critical section to just - # the env mutation (and the env restoration). Pre-fix, this held _ENV_LOCK - # for the entire `yield` duration — i.e. the whole background worker's - # runtime (title generation, compression, update summary). That caused - # _ENV_LOCK to be held for many seconds, blocking ALL other sessions and - # surfacing as the QA `test_third_message_completes` timeout. The fix - # mirrors the narrow-lock pattern in _run_agent_streaming: acquire briefly - # to set env, run worker without holding the lock, reacquire to restore. - # See also QA `test_finally_restores_env_with_lock`. + # Route profile-specific env through the same thread-local channel used by + # foreground streaming runs. Mutating os.environ for detached background + # workers creates a cross-profile race: worker B can snapshot and later + # restore worker A's temporary env while A is still running. Keep + # os.environ unchanged here and use the short global lock only for the + # remaining process-global skill-home module cache patch. + from api.config import _set_thread_env, _clear_thread_env + + thread_env = dict(runtime_env) + thread_env["HERMES_HOME"] = str(profile_home_path) skill_home_snapshot = None - old_env = {} try: + _set_thread_env(**thread_env) with _ENV_LOCK: - old_env = {key: os.environ.get(key) for key in env_keys} skill_home_snapshot = snapshot_skill_home_modules() - os.environ.update(runtime_env) - os.environ["HERMES_HOME"] = str(profile_home_path) try: patch_skill_home_modules(profile_home_path) except Exception: @@ -741,12 +738,8 @@ def profile_env_for_background_worker( ) yield finally: + _clear_thread_env() with _ENV_LOCK: - for key, old_value in old_env.items(): - if old_value is None: - os.environ.pop(key, None) - else: - os.environ[key] = old_value if skill_home_snapshot is not None: restore_skill_home_modules(skill_home_snapshot) diff --git a/tests/test_profile_terminal_env.py b/tests/test_profile_terminal_env.py index 1614d6b6..3d8c8fe8 100644 --- a/tests/test_profile_terminal_env.py +++ b/tests/test_profile_terminal_env.py @@ -1,5 +1,8 @@ import os import re +import sys +import threading +import types from pathlib import Path import yaml @@ -95,3 +98,62 @@ def test_streaming_thread_env_allows_profile_terminal_cwd_override(): assert env["HERMES_SESSION_PLATFORM"] == "webui" assert env["HERMES_HOME"] == "/active/profile/home" assert env["TERMINAL_ENV"] == "ssh" + + +def test_background_worker_profile_env_uses_thread_local_without_process_env(monkeypatch, tmp_path): + from api import config as config_api + from api import profiles + + home = tmp_path / "profiles" / "ops" + home.mkdir(parents=True) + (home / "config.yaml").write_text( + yaml.safe_dump({"terminal": {"backend": "ssh", "cwd": "/profile/cwd"}}), + encoding="utf-8", + ) + (home / ".env").write_text("HERMES_MAX_ITERATIONS=42\n", encoding="utf-8") + + monkeypatch.setattr(profiles, "get_hermes_home_for_profile", lambda profile: home) + monkeypatch.setattr(profiles, "snapshot_skill_home_modules", lambda: {"snapshot": True}) + patched_homes = [] + restored = [] + monkeypatch.setattr(profiles, "patch_skill_home_modules", lambda path: patched_homes.append(path)) + monkeypatch.setattr(profiles, "restore_skill_home_modules", lambda snapshot: restored.append(snapshot)) + monkeypatch.setitem(sys.modules, "api.streaming", types.SimpleNamespace(_ENV_LOCK=threading.Lock())) + + monkeypatch.setenv("HERMES_HOME", "/root/default") + monkeypatch.delenv("TERMINAL_ENV", raising=False) + monkeypatch.delenv("TERMINAL_CWD", raising=False) + monkeypatch.delenv("HERMES_MAX_ITERATIONS", raising=False) + config_api._clear_thread_env() + + with profiles.profile_env_for_background_worker("ops"): + assert os.environ["HERMES_HOME"] == "/root/default" + assert "TERMINAL_ENV" not in os.environ + assert "TERMINAL_CWD" not in os.environ + assert "HERMES_MAX_ITERATIONS" not in os.environ + assert config_api._thread_ctx.env["HERMES_HOME"] == str(home) + assert config_api._thread_ctx.env["TERMINAL_ENV"] == "ssh" + assert config_api._thread_ctx.env["TERMINAL_CWD"] == "/profile/cwd" + assert config_api._thread_ctx.env["HERMES_MAX_ITERATIONS"] == "42" + assert patched_homes == [home] + + assert os.environ["HERMES_HOME"] == "/root/default" + assert getattr(config_api._thread_ctx, "env", {}) == {} + assert restored == [{"snapshot": True}] + + +def test_background_worker_profile_env_source_avoids_os_environ_mutation(): + src = Path("api/profiles.py").read_text(encoding="utf-8") + match = re.search( + r"(@contextmanager\ndef profile_env_for_background_worker\(.*?\n)(?=\ndef |\nclass )", + src, + re.DOTALL, + ) + assert match, "profile_env_for_background_worker not found" + body = match.group(1) + + assert "from api.config import _set_thread_env, _clear_thread_env" in body + assert "_set_thread_env(**thread_env)" in body + assert "_clear_thread_env()" in body + assert "os.environ.update(runtime_env)" not in body + assert 'os.environ["HERMES_HOME"] = str(profile_home_path)' not in body diff --git a/tests/test_sprint46.py b/tests/test_sprint46.py index 0db0ffa2..f98897f5 100644 --- a/tests/test_sprint46.py +++ b/tests/test_sprint46.py @@ -408,6 +408,7 @@ def test_session_compress_async_reports_stream_state_guard(monkeypatch, cleanup_ def test_manual_compress_worker_uses_session_profile_env(monkeypatch, tmp_path, cleanup_test_sessions): + import api.config as config_api import api.profiles as profiles import api.routes as routes @@ -416,9 +417,12 @@ def test_manual_compress_worker_uses_session_profile_env(monkeypatch, tmp_path, def __init__(self, **kwargs): skill_module = sys.modules.get("tools.skills_tool") + thread_env = getattr(config_api._thread_ctx, "env", {}) EnvAssertingAgent.seen_env = { - "HERMES_HOME": os.environ.get("HERMES_HOME"), - "HERMES_TEST_PROFILE_ENV": os.environ.get("HERMES_TEST_PROFILE_ENV"), + "HERMES_HOME": thread_env.get("HERMES_HOME"), + "HERMES_TEST_PROFILE_ENV": thread_env.get("HERMES_TEST_PROFILE_ENV"), + "PROCESS_HERMES_HOME": os.environ.get("HERMES_HOME"), + "PROCESS_HERMES_TEST_PROFILE_ENV": os.environ.get("HERMES_TEST_PROFILE_ENV"), "SKILL_MODULE_HOME": getattr(skill_module, "HERMES_HOME", None), "SKILL_MODULE_DIR": getattr(skill_module, "SKILLS_DIR", None), } @@ -461,6 +465,8 @@ def test_manual_compress_worker_uses_session_profile_env(monkeypatch, tmp_path, assert EnvAssertingAgent.seen_env == { "HERMES_HOME": str(profile_home), "HERMES_TEST_PROFILE_ENV": "work-runtime", + "PROCESS_HERMES_HOME": "default-home", + "PROCESS_HERMES_TEST_PROFILE_ENV": None, "SKILL_MODULE_HOME": profile_home, "SKILL_MODULE_DIR": profile_home / "skills", } diff --git a/tests/test_title_aux_routing.py b/tests/test_title_aux_routing.py index a125d361..2d18e4a8 100644 --- a/tests/test_title_aux_routing.py +++ b/tests/test_title_aux_routing.py @@ -448,6 +448,7 @@ class TestBackgroundTitleProfileRouting(unittest.TestCase): self, mock_get_session, mock_configured, ): """A background title worker for a non-default profile must resolve aux config from that profile.""" + from api import config as config_api from api.streaming import _run_background_title_update mock_session = MagicMock() @@ -469,7 +470,9 @@ class TestBackgroundTitleProfileRouting(unittest.TestCase): sys.modules['tools.skills_tool'] = fake_skill_module def fake_aux_title(*args, **kwargs): - captured['hermes_home'] = os.environ.get('HERMES_HOME') + thread_env = getattr(config_api._thread_ctx, 'env', {}) + captured['hermes_home'] = thread_env.get('HERMES_HOME') + captured['process_hermes_home'] = os.environ.get('HERMES_HOME') captured['skill_module_home'] = getattr(fake_skill_module, 'HERMES_HOME') captured['skill_module_dir'] = getattr(fake_skill_module, 'SKILLS_DIR') return ('Profile Routed Title', 'llm_aux', '') @@ -495,6 +498,7 @@ class TestBackgroundTitleProfileRouting(unittest.TestCase): sys.modules['tools.skills_tool'] = original_skill_module self.assertEqual(captured.get('hermes_home'), 'profile-home') + self.assertEqual(captured.get('process_hermes_home'), 'default-home') self.assertEqual(str(captured.get('skill_module_home')), 'profile-home') self.assertEqual(str(captured.get('skill_module_dir')), 'profile-home/skills') self.assertEqual(captured.get('restored_hermes_home'), 'default-home') diff --git a/tests/test_update_banner_fixes.py b/tests/test_update_banner_fixes.py index bc3a2cba..fadb5235 100644 --- a/tests/test_update_banner_fixes.py +++ b/tests/test_update_banner_fixes.py @@ -476,9 +476,12 @@ class TestUpdateSummaryRouteModelSelection: monkeypatch.setattr(cfg, 'get_effective_default_model', lambda: 'openai/test-main') def fake_resolve_model_provider(model): + thread_env = getattr(cfg._thread_ctx, 'env', {}) captured['model_resolution_env'] = { - 'HERMES_HOME': os.environ.get('HERMES_HOME'), - 'HERMES_TEST_PROFILE_ENV': os.environ.get('HERMES_TEST_PROFILE_ENV'), + 'HERMES_HOME': thread_env.get('HERMES_HOME'), + 'HERMES_TEST_PROFILE_ENV': thread_env.get('HERMES_TEST_PROFILE_ENV'), + 'PROCESS_HERMES_HOME': os.environ.get('HERMES_HOME'), + 'PROCESS_HERMES_TEST_PROFILE_ENV': os.environ.get('HERMES_TEST_PROFILE_ENV'), } return model, 'openai', 'https://example.test/v1' @@ -514,9 +517,12 @@ class TestUpdateSummaryRouteModelSelection: ) def fake_get_text_auxiliary_client(task, main_runtime=None): + thread_env = getattr(cfg._thread_ctx, 'env', {}) captured['aux_env'] = { - 'HERMES_HOME': os.environ.get('HERMES_HOME'), - 'HERMES_TEST_PROFILE_ENV': os.environ.get('HERMES_TEST_PROFILE_ENV'), + 'HERMES_HOME': thread_env.get('HERMES_HOME'), + 'HERMES_TEST_PROFILE_ENV': thread_env.get('HERMES_TEST_PROFILE_ENV'), + 'PROCESS_HERMES_HOME': os.environ.get('HERMES_HOME'), + 'PROCESS_HERMES_TEST_PROFILE_ENV': os.environ.get('HERMES_TEST_PROFILE_ENV'), 'SKILL_MODULE_HOME': getattr(fake_skill_module, 'HERMES_HOME'), 'SKILL_MODULE_DIR': getattr(fake_skill_module, 'SKILLS_DIR'), } @@ -564,10 +570,14 @@ class TestUpdateSummaryRouteModelSelection: assert captured['model_resolution_env'] == { 'HERMES_HOME': str(profile_home), 'HERMES_TEST_PROFILE_ENV': 'work-runtime', + 'PROCESS_HERMES_HOME': 'default-home', + 'PROCESS_HERMES_TEST_PROFILE_ENV': 'default-runtime', } assert captured['aux_env'] == { 'HERMES_HOME': str(profile_home), 'HERMES_TEST_PROFILE_ENV': 'work-runtime', + 'PROCESS_HERMES_HOME': 'default-home', + 'PROCESS_HERMES_TEST_PROFILE_ENV': 'default-runtime', 'SKILL_MODULE_HOME': profile_home, 'SKILL_MODULE_DIR': profile_home / 'skills', }