diff --git a/WINDOWS_PORT_HANDOFF.md b/WINDOWS_PORT_HANDOFF.md new file mode 100644 index 0000000..7641c33 --- /dev/null +++ b/WINDOWS_PORT_HANDOFF.md @@ -0,0 +1,454 @@ +# Windows Port Handoff + +## What this project is + +`iai-personal-memory-engine` (repo at `C:\Users\Daniel Hertz\Documents\GitHub\iai-personal-memory-engine`) +is a local MCP server that gives Claude Code persistent long-term memory across sessions. +It captures every conversation, builds a personal model of the user, and injects relevant +context at session start — automatically. It is Python + Rust (PyO3), with a Node.js MCP wrapper. + +It was macOS-only. We are porting it to Windows. + +## What has already been done (Step 1 — committed) + +**Commit:** `1dc1d64` — "Add platform-agnostic IPC transport layer for Windows porting" + +Created `src/iai_mcp/_ipc.py` — a platform-agnostic IPC abstraction module. + +- On POSIX: delegates to the existing Unix-domain socket at `~/.iai-mcp/.daemon.sock` +- On Windows: uses TCP loopback `127.0.0.1:`, port stored in `~/.iai-mcp/.daemon.port` + +Updated all 9 callsites that previously used raw `asyncio.open_unix_connection` / +`asyncio.start_unix_server` / `socket.AF_UNIX`: +- `src/iai_mcp/concurrency.py` +- `src/iai_mcp/socket_server.py` +- `src/iai_mcp/cli/__init__.py` +- `src/iai_mcp/core/__init__.py` +- `src/iai_mcp/direct_write.py` +- `src/iai_mcp/daemon/_watchdog.py` +- `src/iai_mcp/doctor/_lifecycle_checks.py` +- `src/iai_mcp/doctor/__init__.py` +- `src/iai_mcp/semantic_recall.py` + +## Completion Status + +**Steps 1-7: COMPLETED** ✅ + +- **Step 1** (`1dc1d64`): Platform-agnostic IPC (Unix sockets → TCP loopback on Windows) +- **Step 2** (`8154b9b`): fcntl file locking → `_filelock.py` shim +- **Steps 3+4+9** (`c009736`): POSIX signals, resource module, CLI daemon logging +- **Steps 7+10** (`8ecd257`): uid/geteuid guards, os.fchmod guards, icacls file security +- **Step 5** (`0e8321c`): Windows Task Scheduler daemon installer (schtasks.exe) +- **Step 6** (`f4865bf`): PowerShell hook equivalents (.ps1 scripts + hook installer updates) +- **Step 7 — bench files** (`59839a3`): `resource.getrusage()` → psutil `peak_wset` on Windows; + POSIX path unchanged. All four bench files (`memory_footprint.py`, + `memorygraph_memory.py`, `consolidation_rss_peak.py`, `embed_warm_cost.py`) + now import cleanly on Windows. +- **Fix** (`13808e1`): `lifecycle_event_log.py` was importing `timedelta` / + `timezone` from `iai_mcp._filelock` (regression from the Step 2 rewrite). + Moved them back to the `datetime` import. Was broken on ALL platforms, not + just Windows — surfaced only when we exercised the full import chain. +- **Help text** (`019e52f`): `daemon install` / `uninstall` / `start` / `stop` / + `logs` argparse help now lists the Windows backend (Task Scheduler / schtasks / + `%APPDATA%\\iai-mcp\\logs`) alongside launchd and systemd. + +## Verified on Windows in-situ (this session) + +Running from `C:\\Users\\Daniel Hertz\\Documents\\GitHub\\iai-personal-memory-engine` +with system Python 3.14 (no venv, no full project install): + +- All 23 files touched by the port: **AST parse clean**. +- 10/10 ported runtime modules (excluding ones that need numpy/hnswlib at + import-time): **import clean** on Windows. +- `python -m iai_mcp.cli --help`: **lists all subcommands**, no crash. +- `python -m iai_mcp.cli daemon install --dry-run`: **emits a valid Task + Scheduler XML** with the right user, pythonw path, log dir, and LogonTrigger. + XML file write uses `encoding="utf-16"` — schtasks-compatible. +- `python -m iai_mcp.cli capture-hooks status`: **detects all three `.ps1` + hook templates** in the source tree, reports the expected + `~/.claude/hooks/*.ps1` install paths and "NOT WIRED" status. + +Cosmetic only (not blocking): the em-dash in the schtasks XML description +renders as `�` when printed to a cp1252 console, but the file written to +disk for `schtasks /Create /XML` is UTF-16 and round-trips fine. + +## What remains + +**Nothing — the port is COMPLETE and verified end-to-end (see below).** + +The full venv E2E run (`pip install -e ".[dev]"` → Rust extension + numpy + +hnswlib, then `daemon install` / `capture-hooks install` / live daemon start) +was carried out. It passed, but only after fixing three real runtime bugs that +the earlier AST-parse and import-only checks could not catch — the port was not +actually working before that run. + +## End-to-end verification (COMPLETE) + +Run from the venv (Python 3.12.10) on Windows 11: + +- Rust `iai_mcp_native.*.pyd`, numpy, hnswlib: **import clean**. +- Daemon starts via **all three** paths — direct `python -m iai_mcp.daemon`, + detached `pythonw`, and the production **Task Scheduler** task — reporting + `ok: True`, state WAKE, with a valid `~/.iai-mcp/.daemon.port` written. +- State-save survived **20 concurrent `daemon status` readers** with zero tick + failures (previously failed within seconds). +- Hooks wired in `~/.claude/settings.json` (Stop / UserPromptSubmit / + SessionStart), all pointing at `.ps1` scripts. +- `tests/test_cli_daemon.py`: **32 passed, 2 skipped** (was 27 passed, + 7 failed); 78 passed across all touched modules. + +### Bugs found and fixed during the E2E run + +1. **`lifecycle_lock._is_pid_alive` used `os.kill(pid, 0)`** — the POSIX + liveness idiom. Windows `os.kill` rejects signal 0 with `WinError 87`, so + the daemon crashed on startup whenever a stale `.locked` was present (i.e. + after every reboot/relaunch). Guarded the probe to POSIX; Windows relies on + the psutil refinement that already followed. +2. **schtasks XML `` with spaces** — set to `%APPDATA%\iai-mcp\logs` + under `C:\Users\\...`. The Task Scheduler engine rejects an + XML-set working directory containing spaces with `0x8007010B` + ("directory name is invalid"), so the task never launched even though the + path exists. Removed the element (the daemon never depends on cwd). +3. **`daemon_state.save_state` `os.replace` reader contention** — on Windows + `os.replace` (MoveFileEx) fails with `WinError 5`/`32` (→ `PermissionError`) + when a concurrent reader holds the destination open without + `FILE_SHARE_DELETE`, which Python's `open()` does not request. Every + scheduler tick was failing. Added a short Windows-only retry loop. + +Plus: `tests/test_cli_daemon.py` fixtures were POSIX-only (fake daemon used +`asyncio.start_unix_server`; two stop tests referenced `signal.SIGKILL`). +Ported the fake daemon to the `_ipc` transport (TCP loopback + port file on +Windows) and skipped the SIGKILL-escalation tests on Windows. + +Commits: `7b13793` (lifecycle_lock), `9ced147` (schtasks WorkingDirectory), +`1edccb3` (daemon_state replace retry), `5d26920` (test fixtures). + +### Bench Files — resource.getrusage() (OPTIONAL — not required for daemon) + +Lower priority, affects only benchmarking tools (not runtime code). + +`fcntl` is POSIX-only. On Windows, importing any of these files raises `ModuleNotFoundError`. + +Files to fix: +- `src/iai_mcp/capture_queue.py` — uses `fcntl.flock()` +- `src/iai_mcp/hippo/_db.py` — uses `fcntl.flock()` +- `src/iai_mcp/lifecycle_event_log.py` — uses `fcntl.flock()` +- `src/iai_mcp/lifecycle.py` — uses `fcntl.flock()` +- `src/iai_mcp/lock_protocol.py` — uses `fcntl.flock()` +- `src/iai_mcp/doctor/_lifecycle_checks.py` — uses `fcntl.flock()` + +**Fix:** Create `src/iai_mcp/_filelock.py` that provides a `flock(fd, operation)` shim: +- On POSIX: delegates to `fcntl.flock(fd, operation)` +- On Windows: uses `msvcrt.locking()` with appropriate size (use `os.path.getsize` or a large constant like `2**31 - 1`) + +Example shim: +```python +import platform, os +if platform.system() == "Windows": + import msvcrt + LOCK_EX = 1; LOCK_SH = 2; LOCK_UN = 4; LOCK_NB = 8 + def flock(fd, operation): + if isinstance(fd, int): + raw = fd + else: + raw = fd.fileno() + if operation & LOCK_UN: + try: msvcrt.locking(raw, msvcrt.LK_UNLCK, 2**30) + except OSError: pass + elif operation & LOCK_EX: + mode = msvcrt.LK_NBLCK if (operation & LOCK_NB) else msvcrt.LK_LOCK + msvcrt.locking(raw, mode, 2**30) + elif operation & LOCK_SH: + mode = msvcrt.LK_NBLCK if (operation & LOCK_NB) else msvcrt.LK_LOCK + msvcrt.locking(raw, mode, 2**30) +else: + import fcntl as _fcntl + LOCK_EX = _fcntl.LOCK_EX; LOCK_SH = _fcntl.LOCK_SH + LOCK_UN = _fcntl.LOCK_UN; LOCK_NB = _fcntl.LOCK_NB + def flock(fd, operation): + _fcntl.flock(fd, operation) +``` + +Then in each affected file, replace: +```python +import fcntl +... +fcntl.flock(fd, fcntl.LOCK_EX) +``` +with: +```python +from iai_mcp._filelock import flock, LOCK_EX, LOCK_SH, LOCK_UN, LOCK_NB +... +flock(fd, LOCK_EX) +``` + +--- + +### Step 3 — resource module (CRITICAL — daemon crashes on import) + +`resource` is POSIX-only. `src/iai_mcp/daemon/__init__.py` imports it at the top level. + +Files to fix: +- `src/iai_mcp/daemon/__init__.py` — `resource.getrlimit()`, `resource.setrlimit()` + +**Fix:** Wrap in a platform guard: +```python +import platform as _platform +if _platform.system() != "Windows": + import resource as _resource + def _raise_fd_limit(): + soft, hard = _resource.getrlimit(_resource.RLIMIT_NOFILE) + if soft < 4096: + _resource.setrlimit(_resource.RLIMIT_NOFILE, (min(4096, hard), hard)) +else: + def _raise_fd_limit(): + pass # Windows manages FD limits via OS handles +``` + +Also fix in bench files (lower priority, bench-only): +- `bench/memory_footprint.py`, `bench/embed_warm_cost.py`, `bench/consolidation_rss_peak.py`, + `bench/memorygraph_memory.py` — use `psutil.Process(os.getpid()).memory_info().rss` instead + of `resource.getrusage(resource.RUSAGE_SELF).ru_maxrss` + +--- + +### Step 4 — POSIX signals (CRITICAL — daemon crashes on Windows) + +`signal.SIGHUP`, `signal.SIGKILL` do not exist on Windows. + +Files to fix: +- `src/iai_mcp/daemon/__init__.py` — registers SIGHUP handler; calls SIGTERM/SIGKILL +- `src/iai_mcp/daemon/_watchdog.py` — `os.kill(os.getpid(), signal.SIGKILL)` +- `src/iai_mcp/cli/_daemon.py` — `os.kill(pid, signal.SIGTERM)` / `SIGKILL` +- `src/iai_mcp/doctor/__init__.py` — `os.kill(pid, signal.SIGTERM)` + +**Fix:** +```python +import platform, signal, os + +def _terminate_process(pid: int, graceful: bool = True) -> None: + if platform.system() == "Windows": + os.kill(pid, signal.CTRL_C_EVENT) + else: + sig = signal.SIGTERM if graceful else signal.SIGKILL + os.kill(pid, sig) + +# For SIGHUP registration, guard it: +if hasattr(signal, "SIGHUP"): + signal.signal(signal.SIGHUP, _reload_handler) +``` + +For `os.kill(os.getpid(), signal.SIGKILL)` (self-termination in watchdog), replace with +`sys.exit(1)` on Windows. + +--- + +### Step 5 — Daemon installer: Windows Task Scheduler (MAJOR) + +`iai-mcp daemon install` only supports launchd (macOS) and systemd (Linux). +It needs a Windows backend. + +File: `src/iai_mcp/cli/_daemon.py` + +Add `_is_windows()` guard and implement `cmd_daemon_install_windows()` that: +1. Uses Python's `subprocess` to call `schtasks.exe` — the built-in Windows Task Scheduler CLI. +2. Creates a task that runs `pythonw.exe -m iai_mcp.daemon` at login, hidden. +3. Writes a `WINDOWS_SERVICE_TARGET` path constant analogous to `LAUNCHD_TARGET`. + +Example schtasks command: +``` +schtasks /Create /SC ONLOGON /TN "iai-mcp-daemon" /TR "pythonw.exe -m iai_mcp.daemon" /RL HIGHEST /F +``` + +Also implement `cmd_daemon_uninstall_windows()`: +``` +schtasks /Delete /TN "iai-mcp-daemon" /F +``` + +And `cmd_daemon_start_windows()` / `cmd_daemon_stop_windows()`: +``` +schtasks /Run /TN "iai-mcp-daemon" +taskkill /F /IM pythonw.exe /FI "WINDOWTITLE eq iai-mcp-daemon" +``` + +Wire these into the existing `cmd_daemon_install()` dispatch block alongside the +`_is_macos()` and `_is_linux()` branches. + +--- + +### Step 6 — Shell hooks: PowerShell equivalents (MAJOR) + +Claude Code on Windows does not run `.sh` hook scripts. The three hooks need `.ps1` equivalents. + +Hooks are in `src/iai_mcp/_deploy/hooks/`: +- `iai-mcp-turn-capture.sh` — appends each prompt+response turn to per-session buffer +- `iai-mcp-session-capture.sh` — at session end, rolls the buffer for the daemon +- `iai-mcp-session-recall.sh` — at session start, pipes cached memory prefix to stdout + +**Fix:** Create `.ps1` versions of each that call the Python CLI equivalents: +```powershell +# iai-mcp-turn-capture.ps1 +$python = (Get-Command python).Source +& $python -m iai_mcp capture-turn @args +``` +The Python CLI already has `capture-transcript`, `session-start` subcommands — +the PowerShell hooks just need to call them. + +Also update `src/iai_mcp/cli/_capture.py`'s `cmd_capture_hooks_install()` to: +1. Detect Windows and copy `.ps1` files instead of `.sh` files +2. Patch `~/.claude/settings.json` hooks to reference `.ps1` paths on Windows + +--- + +### Step 7 — os.getuid / pwd module guards (MODERATE) + +`os.getuid()` and the `pwd` module are POSIX-only. + +Files to fix: +- `src/iai_mcp/crypto.py` — `os.geteuid()` at line ~121 +- `src/iai_mcp/cli/_crypto.py` — `st.st_uid == os.geteuid()` at line ~39 +- `src/iai_mcp/hippo/__init__.py` — `pwd.getpwuid(os.getuid()).pw_dir` at line ~54 + +**Fix:** +```python +# For ownership checks: +if hasattr(os, "geteuid") and st.st_uid != os.geteuid(): + raise PermissionError(...) + +# For home directory (hippo/__init__.py): +# Replace pwd.getpwuid(os.getuid()).pw_dir with: +home = str(Path.home()) +``` + +--- + +### Step 8 — Rust build: disable macOS-only features (MODERATE) + +`rust/iai_mcp_embed_core/Cargo.toml` has `accelerate` and `metal` features +(Apple Accelerate framework and Apple Metal GPU). These fail to compile on Windows. + +**Fix:** In `pyproject.toml` (the setuptools-rust build config), add platform-conditional +feature flags. Find the `[[tool.setuptools-rust.ext-modules]]` section and add: + +```toml +[[tool.setuptools-rust.ext-modules]] +target = "iai_mcp_native" +path = "rust/iai_mcp_native/Cargo.toml" +binding = "PyO3" +features = ["extension-module"] +args = ["--no-default-features"] +``` + +This already disables default features. Verify `accelerate` and `metal` are not in the +default feature set of `Cargo.toml`. If they are, add a `[target.'cfg(target_os = "macos")'.dependencies]` +section in `Cargo.toml` to gate them. + +--- + +### Step 9 — Log paths and temp dirs (MINOR) + +`src/iai_mcp/cli/_daemon.py` uses `~/Library/Logs/` for daemon logs (macOS-specific). + +**Fix:** Add `_get_daemon_log_path()`: +```python +import platform +def _get_daemon_log_path() -> Path: + if platform.system() == "Darwin": + return Path.home() / "Library" / "Logs" / "iai-mcp-daemon.stderr.log" + elif platform.system() == "Windows": + return Path(os.environ.get("APPDATA", Path.home())) / "iai-mcp" / "logs" / "daemon.log" + else: + return Path.home() / ".local" / "share" / "iai-mcp" / "logs" / "daemon.log" +``` + +--- + +### Step 10 — chmod security for crypto key (MINOR) + +`src/iai_mcp/crypto.py` calls `os.chmod(key_file, 0o600)` to restrict the encryption key. +On Windows, `chmod` is a no-op for access control. Use `icacls.exe` instead: + +```python +import platform, subprocess +def _secure_key_file(path: Path) -> None: + if platform.system() == "Windows": + user = os.environ.get("USERNAME", "") + subprocess.run( + ["icacls", str(path), "/inheritance:d", "/grant:r", f"{user}:F"], + check=False, capture_output=True, + ) + else: + path.chmod(0o600) +``` + +--- + +## Next Steps (for the next session) + +The core daemon + hook infrastructure is now Windows-ready, and bench files +no longer crash on Windows import. Remaining work: + +1. **Manual testing on Windows:** Verify the port works by: + ```powershell + cd "C:\Users\Daniel Hertz\Documents\GitHub\iai-personal-memory-engine" + python -m venv .venv + .venv\Scripts\activate + pip install -e ".[dev]" + python -m iai_mcp daemon install --dry-run # Check schtasks XML renders + python -m iai_mcp capture-hooks install --dry-run # Check hook paths + ``` + +2. **Update CLAUDE.md:** Add Windows-specific setup notes to the project's CLAUDE.md (if it exists) or create one with: + - Running `iai-mcp daemon install` on Windows (uses Task Scheduler) + - Running `iai-mcp capture-hooks install` on Windows (uses PowerShell hooks) + - Expected log locations (`%APPDATA%\iai-mcp\logs\`) + +## Verification Checklist + +All verified on Windows 11 / Python 3.12.10: +- [x] Daemon imports without crashing on Windows +- [x] Daemon actually starts and serves (direct, `pythonw`, and Task Scheduler) +- [x] `iai-mcp daemon install` creates a Task Scheduler entry that launches +- [x] `iai-mcp capture-hooks install` creates PowerShell hooks and registers in settings.json +- [x] Hook commands reference `.ps1` files (not `.sh`) on Windows in settings.json +- [x] State persists across scheduler ticks under concurrent reader load +- [ ] Logs go to `%APPDATA%\iai-mcp\logs\` (Windows) not `~/.local/share` (Linux) + — log dir created; `pythonw` discards stdio so the daemon writes no file + there in normal operation (not blocking) +- [ ] Crypto key file created with appropriate icacls permissions + — code path ported (Step 10), not exercised in this E2E run + +## Key Design Decisions + +1. **Platform detection:** Uses `platform.system()` checks (`== "Windows"`, `== "Darwin"`, `== "Linux"`) throughout +2. **File locking:** `_filelock.py` shim normalizes `msvcrt.locking()` (Windows) to `fcntl.flock()` interface (POSIX) +3. **Daemon management:** Task Scheduler on Windows, launchd on macOS, systemd on Linux +4. **Hooks:** Python calls wrapped in shell scripts (.sh on POSIX) or PowerShell scripts (.ps1 on Windows) +5. **No cross-platform abstractions:** Branching logic is explicit per-platform to avoid accidental breakage + +After Step 5 (daemon installer): +```powershell +iai-mcp daemon install +iai-mcp daemon status +``` + +After Step 6 (hooks): +```powershell +iai-mcp capture-hooks install +iai-mcp capture-hooks status +``` + +Full E2E after all steps: +```powershell +iai-mcp doctor +``` + +## Notes + +- The user is on Windows 11 Pro, Python 3.12, Node 18+, has Rust toolchain +- GitHub user: `danielhertz1999-bit`, repo fork is under their account +- The upstream repo is `CodeAbra/iai-personal-memory-engine` +- All changes should be committed to the local `main` branch; a PR to upstream can be opened later +- Keep each step as a separate commit for clean history +- The `setproctitle` module (used in `daemon/__init__.py`) may need a try/except fallback + on Windows if it fails to compile — wrap: `try: from setproctitle import setproctitle\nexcept ImportError: setproctitle = lambda x: None` diff --git a/bench/consolidation_rss_peak.py b/bench/consolidation_rss_peak.py index 5984da7..151d6db 100644 --- a/bench/consolidation_rss_peak.py +++ b/bench/consolidation_rss_peak.py @@ -4,7 +4,6 @@ import gc import json import os -import resource import shutil import sys import tempfile @@ -38,6 +37,14 @@ def _cur_rss_bytes() -> int: def _ru_maxrss_bytes() -> int: + if sys.platform == "win32": + try: + import psutil + mi = psutil.Process().memory_info() + return int(getattr(mi, "peak_wset", mi.rss)) + except Exception: + return 0 + import resource r = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss if sys.platform == "darwin": return int(r) diff --git a/bench/embed_warm_cost.py b/bench/embed_warm_cost.py index ce2f086..f5fef0b 100644 --- a/bench/embed_warm_cost.py +++ b/bench/embed_warm_cost.py @@ -58,23 +58,42 @@ """ _PAYLOAD_RSS = r""" -import sys, resource +import sys sys.path.insert(0, {src_path!r}) +import platform as _plat +_system = _plat.system() +if _system == "Windows": + import psutil as _psutil + def _peak_raw(): + mi = _psutil.Process().memory_info() + return int(getattr(mi, "peak_wset", mi.rss)) + def _to_mb(raw): + return raw / 1048576 + _unit_is_bytes = True +else: + import resource as _resource + def _peak_raw(): + return _resource.getrusage(_resource.RUSAGE_SELF).ru_maxrss + if _system == "Darwin": + def _to_mb(raw): + return raw / 1048576 + _unit_is_bytes = True + else: + def _to_mb(raw): + return raw / 1024 + _unit_is_bytes = False from iai_mcp.embed import Embedder e = Embedder() -rss_post_construct_raw = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss +rss_post_construct_raw = _peak_raw() text = {text!r} _ = e.embed(text) -rss_post_encode_raw = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss -import platform as _plat -is_mac = (_plat.system() == "Darwin") -def to_mb(raw): - return raw / 1048576 if is_mac else raw / 1024 -print(f"rss_post_construct_mb={{to_mb(rss_post_construct_raw):.1f}}") -print(f"rss_post_encode_mb={{to_mb(rss_post_encode_raw):.1f}}") +rss_post_encode_raw = _peak_raw() +print(f"rss_post_construct_mb={{_to_mb(rss_post_construct_raw):.1f}}") +print(f"rss_post_encode_mb={{_to_mb(rss_post_encode_raw):.1f}}") print(f"rss_post_construct_raw={{rss_post_construct_raw}}") print(f"rss_post_encode_raw={{rss_post_encode_raw}}") -print(f"unit_is_bytes={{is_mac}}") +print(f"unit_is_bytes={{_unit_is_bytes}}") +print(f"rss_platform={{_system}}") """ @@ -210,10 +229,17 @@ def measure_rss(src_path: str, text: str) -> dict: rss_post_construct_mb = float(kv["rss_post_construct_mb"]) rss_post_encode_mb = float(kv["rss_post_encode_mb"]) unit_is_bytes = kv["unit_is_bytes"] == "True" + rss_platform = kv.get("rss_platform", "") + if rss_platform == "Windows": + unit_label = "bytes (Windows peak_wset)" + elif rss_platform == "Darwin" or (unit_is_bytes and not rss_platform): + unit_label = "bytes (macOS)" + else: + unit_label = "KB (Linux)" print( f" RSS post-construct={rss_post_construct_mb:.1f}MB " f"post-first-encode={rss_post_encode_mb:.1f}MB " - f"unit={'bytes (macOS)' if unit_is_bytes else 'KB (Linux)'}" + f"unit={unit_label}" ) return { "rss_post_construct_mb": rss_post_construct_mb, @@ -221,6 +247,7 @@ def measure_rss(src_path: str, text: str) -> dict: "rss_post_construct_raw": int(kv["rss_post_construct_raw"]), "rss_post_encode_raw": int(kv["rss_post_encode_raw"]), "unit_is_bytes_macos": unit_is_bytes, + "rss_platform": rss_platform, } diff --git a/bench/memory_footprint.py b/bench/memory_footprint.py index ea879df..504ff3a 100644 --- a/bench/memory_footprint.py +++ b/bench/memory_footprint.py @@ -4,7 +4,6 @@ import gc import json import os -import resource import sys import tempfile import time @@ -42,6 +41,11 @@ def _threshold_mb_for_n(n: int) -> float: def _rss_mb() -> float: + if sys.platform == "win32": + import psutil + mi = psutil.Process().memory_info() + return float(getattr(mi, "peak_wset", mi.rss)) / 1024.0 / 1024.0 + import resource r = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss if sys.platform == "darwin": return float(r) / 1024.0 / 1024.0 diff --git a/bench/memorygraph_memory.py b/bench/memorygraph_memory.py index 06291be..e8f9c46 100644 --- a/bench/memorygraph_memory.py +++ b/bench/memorygraph_memory.py @@ -2,7 +2,6 @@ import argparse import gc -import resource import sys from pathlib import Path from uuid import uuid4 @@ -14,6 +13,11 @@ def rss_mb() -> float: + if sys.platform == "win32": + import psutil + mi = psutil.Process().memory_info() + return float(getattr(mi, "peak_wset", mi.rss)) / (1024 * 1024) + import resource ru = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss if sys.platform == "darwin": return ru / (1024 * 1024) diff --git a/mcp-wrapper/package-lock.json b/mcp-wrapper/package-lock.json index 776a840..c45beb4 100644 --- a/mcp-wrapper/package-lock.json +++ b/mcp-wrapper/package-lock.json @@ -7,6 +7,7 @@ "": { "name": "iai-mcp-wrapper", "version": "1.0.0", + "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "^1.0.0", "zod": "^3.23.0" diff --git a/mcp-wrapper/src/bridge.ts b/mcp-wrapper/src/bridge.ts index 8d6f907..07db282 100644 --- a/mcp-wrapper/src/bridge.ts +++ b/mcp-wrapper/src/bridge.ts @@ -1,13 +1,13 @@ import * as crypto from "node:crypto"; import * as net from "node:net"; -import * as os from "node:os"; -import * as path from "node:path"; +import { + type ConnectTarget, + createDaemonConnection, + daemonUnreachableHint, + getDaemonConnectTarget, +} from "./ipc.js"; -function getDaemonSocketPath(): string { - return process.env.IAI_DAEMON_SOCKET_PATH - ?? path.join(os.homedir(), ".iai-mcp", ".daemon.sock"); -} const SOCKET_CONNECT_TIMEOUT_MS = 5000; const ERR_DAEMON_UNREACHABLE = -32002; @@ -69,29 +69,30 @@ export class PythonCoreBridge { private async _doStart(): Promise { this.reconnectAttempted = false; + const target = getDaemonConnectTarget(); + if (target === null) { + throw new DaemonUnreachableError(daemonUnreachableHint()); + } + let sock: net.Socket; try { sock = await this.connectWithTimeout( - getDaemonSocketPath(), + target, SOCKET_CONNECT_TIMEOUT_MS, ); } catch (e) { - throw new DaemonUnreachableError( - "iai-mcp daemon not running. " - + "Run: launchctl load -w ~/Library/LaunchAgents/com.iai-mcp.daemon.plist " - + "or run scripts/install.sh" - ); + throw new DaemonUnreachableError(daemonUnreachableHint()); } this.sock = sock; this.attachSocketHandlers(); } private connectWithTimeout( - socketPath: string, + target: ConnectTarget, timeoutMs: number, ): Promise { return new Promise((resolve, reject) => { - const sock = net.createConnection(socketPath); + const sock = createDaemonConnection(target); // Keep a pending/abandoned connect attempt from pinning the event loop // (e.g. an in-flight reconnect after socket death). A live connected // socket re-refs below so real RPC still holds the process open. @@ -193,8 +194,12 @@ export class PythonCoreBridge { if (testDelayMs > 0) { await new Promise((r) => setTimeout(r, testDelayMs)); } + const target = getDaemonConnectTarget(); + if (target === null) { + return; + } this.sock = await this.connectWithTimeout( - getDaemonSocketPath(), + target, SOCKET_CONNECT_TIMEOUT_MS, ); this.attachSocketHandlers(); @@ -254,13 +259,6 @@ export class PythonCoreBridge { } -export function sessionOpenSocketPath(): string { - const env = process.env.IAI_DAEMON_SOCKET_PATH; - if (env) return env; - return path.join(os.homedir(), ".iai-mcp", ".daemon.sock"); -} - - export function newSessionId(): string { return crypto.randomUUID(); } @@ -275,8 +273,12 @@ export function emitSessionOpen(sessionId: string): Promise { resolve(); }; try { - const socketPath = sessionOpenSocketPath(); - const sock = net.createConnection(socketPath, () => { + const target = getDaemonConnectTarget(); + if (target === null) { + finish(); + return; + } + const sock = createDaemonConnection(target, () => { const msg = JSON.stringify({ type: "session_open", diff --git a/mcp-wrapper/src/ipc.ts b/mcp-wrapper/src/ipc.ts new file mode 100644 index 0000000..b361983 --- /dev/null +++ b/mcp-wrapper/src/ipc.ts @@ -0,0 +1,97 @@ + +/** + * Platform-agnostic IPC transport, mirroring the Python `iai_mcp._ipc` module. + * + * POSIX: Unix-domain socket -> ~/.iai-mcp/.daemon.sock + * Windows: TCP loopback -> 127.0.0.1:, port read from + * ~/.iai-mcp/.daemon.port + * + * The base dir is ~/.iai-mcp (os.homedir()) to match `_ipc._BASE_DIR`, which + * uses Path.home() regardless of IAI_MCP_STORE. + */ +import * as fs from "node:fs"; +import * as net from "node:net"; +import * as os from "node:os"; +import * as path from "node:path"; + +export const IS_WINDOWS = process.platform === "win32"; + +export type ConnectTarget = string | { host: string; port: number }; + +function daemonBaseDir(): string { + return path.join(os.homedir(), ".iai-mcp"); +} + +export function daemonSocketPath(): string { + return path.join(daemonBaseDir(), ".daemon.sock"); +} + +export function daemonPortFile(): string { + return path.join(daemonBaseDir(), ".daemon.port"); +} + +export function readDaemonPort(): number | null { + try { + const txt = fs.readFileSync(daemonPortFile(), "utf-8").trim(); + const port = Number.parseInt(txt, 10); + return Number.isFinite(port) && port > 0 ? port : null; + } catch { + return null; + } +} + +/** + * Resolve the daemon IPC endpoint. + * POSIX -> Unix-domain socket path (string) + * Windows -> { host: "127.0.0.1", port } from the port file + * Returns null when the endpoint cannot be determined (on Windows: port file + * absent => daemon not running). IAI_DAEMON_SOCKET_PATH overrides on POSIX. + */ +export function getDaemonConnectTarget(): ConnectTarget | null { + const env = process.env.IAI_DAEMON_SOCKET_PATH; + if (env) return env; + if (IS_WINDOWS) { + const port = readDaemonPort(); + return port === null ? null : { host: "127.0.0.1", port }; + } + return daemonSocketPath(); +} + +export function daemonUnreachableHint(): string { + if (IS_WINDOWS) { + return ( + "iai-mcp daemon not running. " + + 'Start it with: schtasks /Run /TN "iai-mcp-daemon" ' + + "(or: iai-mcp daemon install)." + ); + } + if (process.platform === "darwin") { + return ( + "iai-mcp daemon not running. " + + "Run: launchctl load -w ~/Library/LaunchAgents/com.iai-mcp.daemon.plist " + + "or run scripts/install.sh" + ); + } + return ( + "iai-mcp daemon not running. " + + "Run: systemctl --user start iai-mcp-daemon or run scripts/install.sh" + ); +} + +/** + * Open a net.Socket to the daemon for either transport. Accepts the union + * target returned by getDaemonConnectTarget so callers stay platform-agnostic. + */ +export function createDaemonConnection( + target: ConnectTarget, + connectListener?: () => void, +): net.Socket { + if (typeof target === "string") { + return connectListener + ? net.createConnection(target, connectListener) + : net.createConnection(target); + } + return connectListener + ? net.createConnection(target.port, target.host, connectListener) + : net.createConnection(target.port, target.host); +} diff --git a/mcp-wrapper/src/lifecycle.ts b/mcp-wrapper/src/lifecycle.ts index 2dff96f..9b07444 100644 --- a/mcp-wrapper/src/lifecycle.ts +++ b/mcp-wrapper/src/lifecycle.ts @@ -5,9 +5,17 @@ import { mkdir, rename, unlink, writeFile } from "node:fs/promises"; import { homedir } from "node:os"; import { dirname, join } from "node:path"; import { promisify } from "node:util"; +import { + type ConnectTarget, + createDaemonConnection, + getDaemonConnectTarget, + IS_WINDOWS, +} from "./ipc.js"; const execFileAsync = promisify(execFile); +const SCHTASKS_TASK_NAME = "iai-mcp-daemon"; + export const HEARTBEAT_REFRESH_INTERVAL_MS = 30_000; @@ -95,7 +103,10 @@ export class WrapperLifecycle { if (alive) { return; } - if (this.platform === "darwin") { + // macOS: launchctl kickstart. Windows: schtasks /Run the daemon task. + // Both are best-effort; fall through to the wake-signal sentinel on + // failure or on Linux (where systemd/scripts own daemon startup). + if (this.platform === "darwin" || this.platform === "win32") { try { await this.spawnKickstart(); return; @@ -170,7 +181,12 @@ function isoNow(): string { function defaultSocketReachable(socketPath: string): () => Promise { return async () => { - const { createConnection } = await import("node:net"); + // POSIX: probe the (possibly injected) Unix socket path. Windows: probe + // the TCP loopback endpoint from the daemon port file. + const target: ConnectTarget | null = IS_WINDOWS + ? getDaemonConnectTarget() + : socketPath; + if (target === null) return false; return await new Promise((resolve) => { let settled = false; const settle = (v: boolean): void => { @@ -182,7 +198,7 @@ function defaultSocketReachable(socketPath: string): () => Promise { } resolve(v); }; - const socket = createConnection({ path: socketPath }); + const socket = createDaemonConnection(target); socket.setTimeout(1_000); socket.once("connect", () => settle(true)); socket.once("error", () => settle(false)); @@ -192,6 +208,13 @@ function defaultSocketReachable(socketPath: string): () => Promise { } function defaultSpawnKickstart(): () => Promise { + if (IS_WINDOWS) { + return async () => { + await execFileAsync("schtasks", ["/Run", "/TN", SCHTASKS_TASK_NAME], { + timeout: KICKSTART_TIMEOUT_MS, + }); + }; + } return async () => { const uid = typeof process.getuid === "function" ? process.getuid() : 0; const args = ["kickstart", "-k", `gui/${uid}/${LAUNCHD_LABEL}`]; diff --git a/pyproject.toml b/pyproject.toml index 684c975..579ba91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ iai_mcp = [ "_deploy/launchd/*.plist", "_deploy/systemd/*.service", "_deploy/hooks/*.sh", + "_deploy/hooks/*.ps1", "_wrapper/*.js", ] diff --git a/src/iai_mcp/__main__.py b/src/iai_mcp/__main__.py new file mode 100644 index 0000000..a6d7fbc --- /dev/null +++ b/src/iai_mcp/__main__.py @@ -0,0 +1,18 @@ +"""Package entry point so ``python -m iai_mcp`` works. + +The Windows PowerShell hooks (``_deploy/hooks/*.ps1``) invoke the CLI as +``python -m iai_mcp `` rather than via the ``iai-mcp`` console +script (which may not be on PATH inside a hook subprocess). That form requires +this module; without it Python raises "No module named iai_mcp.__main__" and +every hook silently no-ops. Delegates to the same entry point as the +``iai-mcp`` console script and ``python -m iai_mcp.cli``. +""" + +from __future__ import annotations + +import sys + +from iai_mcp.cli import main + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/iai_mcp/_deploy/hooks/iai-mcp-session-capture.ps1 b/src/iai_mcp/_deploy/hooks/iai-mcp-session-capture.ps1 new file mode 100644 index 0000000..9910b52 --- /dev/null +++ b/src/iai_mcp/_deploy/hooks/iai-mcp-session-capture.ps1 @@ -0,0 +1,146 @@ +# IAI-MCP Stop hook — ambient WRITE-side capture (Windows). +# +# PowerShell equivalent of iai-mcp-session-capture.sh. +# Fires when a Claude Code session ends. Calls `iai-mcp capture-transcript +# --no-spawn` to batch-capture the session transcript. +# Fail-safe: always exits 0. + +$ErrorActionPreference = 'SilentlyContinue' + +try { + $inputText = [Console]::In.ReadToEnd() +} catch { + $inputText = '' +} + +$session_id = '' +$transcript_path = '' +$cwd = '' +try { + $obj = $inputText | ConvertFrom-Json + $session_id = if ($obj.session_id) { $obj.session_id } else { '' } + $transcript_path = if ($obj.transcript_path) { $obj.transcript_path } else { '' } + $cwd = if ($obj.cwd) { $obj.cwd } else { '' } +} catch {} + +# Fallback: locate transcript if the hook payload didn't include its path. +if (-not $transcript_path -and $session_id) { + $projectsDir = Join-Path $env:USERPROFILE '.claude\projects' + if (Test-Path $projectsDir) { + Get-ChildItem -Path $projectsDir -Directory | ForEach-Object { + $candidate = Join-Path $_.FullName "$session_id.jsonl" + if ((Test-Path $candidate) -and -not $transcript_path) { + $transcript_path = $candidate + } + } + } +} + +$logDir = Join-Path $env:USERPROFILE '.iai-mcp\logs' +if (-not (Test-Path $logDir)) { New-Item -ItemType Directory -Path $logDir -Force | Out-Null } +$logDate = (Get-Date).ToUniversalTime().ToString('yyyy-MM-dd') +$logFile = Join-Path $logDir "capture-$logDate.log" +$ts = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') + +Add-Content -Path $logFile -Value "---" -ErrorAction SilentlyContinue +Add-Content -Path $logFile -Value "$ts session=$session_id cwd=$cwd transcript=$transcript_path" -ErrorAction SilentlyContinue + +if (-not $transcript_path -or -not (Test-Path $transcript_path)) { + Add-Content -Path $logFile -Value "$ts skipped: no transcript found" -ErrorAction SilentlyContinue + exit 0 +} + +# Rename the active-writer marker so the drain can see it. +if ($session_id) { + $liveFile = Join-Path $env:USERPROFILE ".iai-mcp\.deferred-captures\$session_id.live.jsonl" + if (Test-Path $liveFile) { + $epoch = [int][double]::Parse((Get-Date -UFormat '%s')) + $newName = "$session_id.live-$epoch.jsonl" + $destDir = Split-Path $liveFile -Parent + Move-Item -Path $liveFile -Destination (Join-Path $destDir $newName) -Force -ErrorAction SilentlyContinue + } + $offsetState = Join-Path $env:USERPROFILE ".iai-mcp\.capture-state\$session_id.offset" + if (Test-Path $offsetState) { Remove-Item -Path $offsetState -Force -ErrorAction SilentlyContinue } +} + +# Find the iai-mcp CLI +$iai_cli = $null + +# 1. Environment variable override +if ($env:IAI_MCP_SESSION_CAPTURE_CLI -and (Test-Path $env:IAI_MCP_SESSION_CAPTURE_CLI)) { + $iai_cli = $env:IAI_MCP_SESSION_CAPTURE_CLI +} + +# 2. Cached CLI path +if (-not $iai_cli) { + $cliCache = Join-Path $env:USERPROFILE '.iai-mcp\.cli-path' + if (Test-Path $cliCache) { + $cached = (Get-Content $cliCache -ErrorAction SilentlyContinue).Trim() + if ($cached -and (Test-Path $cached)) { $iai_cli = $cached } + } +} + +# 3. PATH lookup +if (-not $iai_cli) { + try { + $resolved = (Get-Command iai-mcp -ErrorAction Stop).Source + if ($resolved) { + $iai_cli = $resolved + Set-Content -Path (Join-Path $env:USERPROFILE '.iai-mcp\.cli-path') -Value $iai_cli -ErrorAction SilentlyContinue + } + } catch {} +} + +# 4. Common Windows install locations +if (-not $iai_cli) { + $candidates = @( + (Join-Path $env:USERPROFILE '.local\bin\iai-mcp.exe'), + (Join-Path $env:USERPROFILE 'IAI-MCP\.venv\Scripts\iai-mcp.exe'), + (Join-Path $env:LOCALAPPDATA 'Programs\Python\Scripts\iai-mcp.exe') + ) + foreach ($c in $candidates) { + if (Test-Path $c) { + $iai_cli = $c + Set-Content -Path (Join-Path $env:USERPROFILE '.iai-mcp\.cli-path') -Value $iai_cli -ErrorAction SilentlyContinue + break + } + } +} + +# 5. Fall back to python -m iai_mcp +if (-not $iai_cli) { + $pyExe = $null + try { $pyExe = (Get-Command python -ErrorAction Stop).Source } catch {} + if ($pyExe) { + $iai_cli = "__python__" + } +} + +if (-not $iai_cli) { + Add-Content -Path $logFile -Value "$ts skipped: iai-mcp CLI not found" -ErrorAction SilentlyContinue + exit 0 +} + +# Run capture with a 30s timeout +try { + if ($iai_cli -eq "__python__") { + $pyExe = (Get-Command python -ErrorAction Stop).Source + $proc = Start-Process -FilePath $pyExe ` + -ArgumentList '-m', 'iai_mcp', 'capture-transcript', '--no-spawn', '--session-id', $session_id, '--max-turns', '100000', $transcript_path ` + -NoNewWindow -PassThru -RedirectStandardOutput (Join-Path $logDir 'capture-stdout.tmp') -RedirectStandardError (Join-Path $logDir 'capture-stderr.tmp') + } else { + $proc = Start-Process -FilePath $iai_cli ` + -ArgumentList 'capture-transcript', '--no-spawn', '--session-id', $session_id, '--max-turns', '100000', $transcript_path ` + -NoNewWindow -PassThru -RedirectStandardOutput (Join-Path $logDir 'capture-stdout.tmp') -RedirectStandardError (Join-Path $logDir 'capture-stderr.tmp') + } + $exited = $proc.WaitForExit(30000) + if (-not $exited) { + try { $proc.Kill() } catch {} + } + $rc = if ($exited) { $proc.ExitCode } else { 124 } +} catch { + $rc = 1 +} + +Add-Content -Path $logFile -Value "$ts rc=$rc" -ErrorAction SilentlyContinue +exit 0 diff --git a/src/iai_mcp/_deploy/hooks/iai-mcp-session-recall.ps1 b/src/iai_mcp/_deploy/hooks/iai-mcp-session-recall.ps1 new file mode 100644 index 0000000..8de2379 --- /dev/null +++ b/src/iai_mcp/_deploy/hooks/iai-mcp-session-recall.ps1 @@ -0,0 +1,143 @@ +# IAI-MCP SessionStart hook — recall injection (Windows). +# +# PowerShell equivalent of iai-mcp-session-recall.sh. +# Fires on Claude Code session start. Prints the cached session prefix +# to stdout for Claude Code to inject as additionalContext. +# Fail-safe: always exits 0 with empty stdout on any error. + +$ErrorActionPreference = 'SilentlyContinue' + +try { + $inputText = [Console]::In.ReadToEnd() +} catch { + $inputText = '' +} + +$session_id = '' +$source_evt = '' +try { + $obj = $inputText | ConvertFrom-Json + $session_id = if ($obj.session_id) { $obj.session_id } else { '' } + $source_evt = if ($obj.source) { $obj.source } else { '' } +} catch {} + +$logDir = Join-Path $env:USERPROFILE '.iai-mcp\logs' +if (-not (Test-Path $logDir)) { New-Item -ItemType Directory -Path $logDir -Force | Out-Null } +$logDate = (Get-Date).ToUniversalTime().ToString('yyyy-MM-dd') +$logFile = Join-Path $logDir "recall-$logDate.log" +$ts = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') + +Add-Content -Path $logFile -Value "---" -ErrorAction SilentlyContinue +Add-Content -Path $logFile -Value "$ts session=$session_id source=$source_evt" -ErrorAction SilentlyContinue + +# Try the precache file first +$cachePath = Join-Path $env:USERPROFILE '.iai-mcp\.session-start-payload.cached.md' +if ((Test-Path $cachePath) -and (Get-Item $cachePath).Length -gt 0) { + try { + $cacheOut = Get-Content $cachePath -Raw -ErrorAction Stop + if ($cacheOut.Length -gt 10000) { $cacheOut = $cacheOut.Substring(0, 10000) } + if ($cacheOut) { + [Console]::Out.Write($cacheOut) + $cacheAge = [int]((Get-Date) - (Get-Item $cachePath).LastWriteTime).TotalSeconds + Add-Content -Path $logFile -Value "$ts cache-hit age=${cacheAge}s bytes=$($cacheOut.Length)" -ErrorAction SilentlyContinue + exit 0 + } + } catch {} + Add-Content -Path $logFile -Value "$ts cache-miss empty" -ErrorAction SilentlyContinue +} else { + Add-Content -Path $logFile -Value "$ts cache-miss absent" -ErrorAction SilentlyContinue +} + +# Find the iai-mcp CLI +$iai_cli = $null + +if ($env:IAI_MCP_SESSION_RECALL_CLI -and (Test-Path $env:IAI_MCP_SESSION_RECALL_CLI)) { + $iai_cli = $env:IAI_MCP_SESSION_RECALL_CLI +} + +if (-not $iai_cli) { + $cliCache = Join-Path $env:USERPROFILE '.iai-mcp\.cli-path' + if (Test-Path $cliCache) { + $cached = (Get-Content $cliCache -ErrorAction SilentlyContinue).Trim() + if ($cached -and (Test-Path $cached)) { $iai_cli = $cached } + } +} + +if (-not $iai_cli) { + try { + $resolved = (Get-Command iai-mcp -ErrorAction Stop).Source + if ($resolved) { + $iai_cli = $resolved + Set-Content -Path (Join-Path $env:USERPROFILE '.iai-mcp\.cli-path') -Value $iai_cli -ErrorAction SilentlyContinue + } + } catch {} +} + +if (-not $iai_cli) { + $candidates = @( + (Join-Path $env:USERPROFILE '.local\bin\iai-mcp.exe'), + (Join-Path $env:USERPROFILE 'IAI-MCP\.venv\Scripts\iai-mcp.exe'), + (Join-Path $env:LOCALAPPDATA 'Programs\Python\Scripts\iai-mcp.exe') + ) + foreach ($c in $candidates) { + if (Test-Path $c) { + $iai_cli = $c + Set-Content -Path (Join-Path $env:USERPROFILE '.iai-mcp\.cli-path') -Value $iai_cli -ErrorAction SilentlyContinue + break + } + } +} + +$usePythonModule = $false +if (-not $iai_cli) { + try { + $pyExe = (Get-Command python -ErrorAction Stop).Source + $usePythonModule = $true + } catch {} +} + +if (-not $iai_cli -and -not $usePythonModule) { + Add-Content -Path $logFile -Value "$ts skipped: iai-mcp CLI not found" -ErrorAction SilentlyContinue + exit 0 +} + +# Run session-start with a 10s timeout +$hookTimeout = if ($env:IAI_MCP_RECALL_HOOK_TIMEOUT) { [int]$env:IAI_MCP_RECALL_HOOK_TIMEOUT } else { 10 } +$outTmp = Join-Path $logDir 'recall-stdout.tmp' + +try { + if ($usePythonModule) { + $pyExe = (Get-Command python -ErrorAction Stop).Source + $proc = Start-Process -FilePath $pyExe ` + -ArgumentList '-m', 'iai_mcp', 'session-start', '--session-id', $session_id ` + -NoNewWindow -PassThru -RedirectStandardOutput $outTmp -RedirectStandardError (Join-Path $logDir 'recall-stderr.tmp') + } else { + $proc = Start-Process -FilePath $iai_cli ` + -ArgumentList 'session-start', '--session-id', $session_id ` + -NoNewWindow -PassThru -RedirectStandardOutput $outTmp -RedirectStandardError (Join-Path $logDir 'recall-stderr.tmp') + } + $exited = $proc.WaitForExit($hookTimeout * 1000) + if (-not $exited) { + try { $proc.Kill() } catch {} + $rc = 124 + } else { + $rc = $proc.ExitCode + } +} catch { + $rc = 1 +} + +if ($rc -eq 0 -and (Test-Path $outTmp)) { + $out = Get-Content $outTmp -Raw -ErrorAction SilentlyContinue + if ($out) { + [Console]::Out.Write($out) + } + $outLen = if ($out) { $out.Length } else { 0 } +} else { + $outLen = 0 +} + +Remove-Item -Path $outTmp -Force -ErrorAction SilentlyContinue + +Add-Content -Path $logFile -Value "$ts rc=$rc bytes=$outLen" -ErrorAction SilentlyContinue +exit 0 diff --git a/src/iai_mcp/_deploy/hooks/iai-mcp-turn-capture.ps1 b/src/iai_mcp/_deploy/hooks/iai-mcp-turn-capture.ps1 new file mode 100644 index 0000000..a830aa5 --- /dev/null +++ b/src/iai_mcp/_deploy/hooks/iai-mcp-turn-capture.ps1 @@ -0,0 +1,65 @@ +# IAI-MCP UserPromptSubmit hook — per-turn ambient capture (Windows). +# +# PowerShell equivalent of iai-mcp-turn-capture.sh. +# Reads stdin JSON, extracts session_id + transcript_path, runs inline +# Python for low-latency capture. Fail-safe: always exits 0. + +$ErrorActionPreference = 'SilentlyContinue' + +try { + $inputText = [Console]::In.ReadToEnd() +} catch { + $inputText = '' +} + +$session_id = '' +$transcript_path = '' +try { + $obj = $inputText | ConvertFrom-Json + $session_id = if ($obj.session_id) { $obj.session_id } else { '' } + $transcript_path = if ($obj.transcript_path) { $obj.transcript_path } else { '' } +} catch {} + +$logDir = Join-Path $env:USERPROFILE '.iai-mcp\logs' +if (-not (Test-Path $logDir)) { New-Item -ItemType Directory -Path $logDir -Force | Out-Null } +$logDate = (Get-Date).ToUniversalTime().ToString('yyyy-MM-dd') +$logFile = Join-Path $logDir "turn-capture-$logDate.log" +$ts = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ') + +if (-not $session_id -or -not $transcript_path) { + Add-Content -Path $logFile -Value "$ts skipped: missing session_id or transcript_path" -ErrorAction SilentlyContinue + exit 0 +} + +# Find python +$pyExe = $null +try { $pyExe = (Get-Command python -ErrorAction Stop).Source } catch {} +if (-not $pyExe) { + try { $pyExe = (Get-Command python3 -ErrorAction Stop).Source } catch {} +} +if (-not $pyExe) { + # Check common venv location + $venvPy = Join-Path $env:USERPROFILE '.iai-mcp\.venv\Scripts\python.exe' + if (Test-Path $venvPy) { $pyExe = $venvPy } +} +if (-not $pyExe) { + Add-Content -Path $logFile -Value "$ts skipped: python not found" -ErrorAction SilentlyContinue + exit 0 +} + +# Run the Python CLI for turn capture with a 5s timeout +try { + $proc = Start-Process -FilePath $pyExe ` + -ArgumentList '-m', 'iai_mcp', 'capture-turn-deferred', '--session-id', $session_id, '--transcript-path', $transcript_path ` + -NoNewWindow -PassThru -RedirectStandardError (Join-Path $logDir 'turn-capture-stderr.tmp') + $exited = $proc.WaitForExit(5000) + if (-not $exited) { + try { $proc.Kill() } catch {} + } + $rc = if ($exited) { $proc.ExitCode } else { 124 } +} catch { + $rc = 1 +} + +Add-Content -Path $logFile -Value "$ts session=$session_id rc=$rc" -ErrorAction SilentlyContinue +exit 0 diff --git a/src/iai_mcp/_filelock.py b/src/iai_mcp/_filelock.py new file mode 100644 index 0000000..db1d11a --- /dev/null +++ b/src/iai_mcp/_filelock.py @@ -0,0 +1,100 @@ +"""Platform-agnostic file locking shim. + +On POSIX: thin wrapper around fcntl.flock. +On Windows: msvcrt.locking with errno normalisation so callers checking +errno.EWOULDBLOCK / errno.EAGAIN on non-blocking failures work unchanged. +The file offset is saved/restored around each call (msvcrt locks relative to +the file position; fcntl.flock does not move it) and the blocking path polls +so it waits indefinitely like POSIX rather than giving up after msvcrt's ~10 s. + +Known divergence — shared locks are not truly shared on Windows. +``msvcrt.locking`` only offers exclusive byte-range locks, so LOCK_SH is +serviced as an exclusive lock: a second concurrent reader blocks where POSIX +would let both in. This is a throughput limitation, not a correctness one, and +it is deliberately NOT fixed with Win32 ``LockFileEx`` (which does support +shared locks) because callers in ``hippo/_db.py`` rely on fcntl.flock's atomic +lock *conversion* — downgrading EXCLUSIVE->SHARED and escalating SHARED-> +EXCLUSIVE in place on the same fd. ``LockFileEx`` has no atomic conversion +(you must Unlock then re-Lock, racing other waiters), so swapping it in would +trade a throughput limit for a correctness hazard on the conversion paths. +A faithful port would need those call sites reworked to a conversion-free +protocol first. +""" +from __future__ import annotations + +import os +import platform + +if platform.system() == "Windows": + import errno as _errno + import msvcrt as _msvcrt + import time as _time + + LOCK_SH = 1 + LOCK_EX = 2 + LOCK_NB = 4 + LOCK_UN = 8 + + _LOCK_BYTES = 2**30 + # Poll interval when emulating POSIX's block-until-acquired behaviour. + _BLOCK_POLL_SECONDS = 0.05 + + def flock(fd: int, operation: int) -> None: + if not isinstance(fd, int): + fd = fd.fileno() + # msvcrt.locking locks bytes starting from the current file position, so + # we must seek to 0 to lock a consistent byte range across callers. + # fcntl.flock leaves the file offset untouched, however, so save the + # caller's offset and restore it afterwards to match POSIX semantics. + try: + saved_offset: int | None = os.lseek(fd, 0, os.SEEK_CUR) + except OSError: + saved_offset = None + os.lseek(fd, 0, os.SEEK_SET) + try: + if operation & LOCK_UN: + try: + _msvcrt.locking(fd, _msvcrt.LK_UNLCK, _LOCK_BYTES) + except OSError: + pass + elif operation & (LOCK_EX | LOCK_SH): + if operation & LOCK_NB: + try: + _msvcrt.locking(fd, _msvcrt.LK_NBLCK, _LOCK_BYTES) + except OSError: + raise OSError( + _errno.EWOULDBLOCK, "resource temporarily unavailable" + ) + else: + # POSIX flock blocks until the lock is acquired, but msvcrt + # has no infinite-block mode (LK_LOCK gives up after ~10 s + # and raises). Poll LK_NBLCK so a blocking acquire matches + # POSIX semantics instead of spuriously failing under long + # contention (e.g. while the consolidator holds the lock). + while True: + try: + _msvcrt.locking(fd, _msvcrt.LK_NBLCK, _LOCK_BYTES) + break + except OSError: + os.lseek(fd, 0, os.SEEK_SET) + _time.sleep(_BLOCK_POLL_SECONDS) + finally: + if saved_offset is not None: + try: + os.lseek(fd, saved_offset, os.SEEK_SET) + except OSError: + pass + +else: + import fcntl as _fcntl + + LOCK_SH = _fcntl.LOCK_SH + LOCK_EX = _fcntl.LOCK_EX + LOCK_NB = _fcntl.LOCK_NB + LOCK_UN = _fcntl.LOCK_UN + + def flock(fd: int, operation: int) -> None: + _fcntl.flock(fd, operation) + + +__all__ = ["flock", "LOCK_EX", "LOCK_NB", "LOCK_SH", "LOCK_UN"] diff --git a/src/iai_mcp/_ipc.py b/src/iai_mcp/_ipc.py new file mode 100644 index 0000000..48e7cd2 --- /dev/null +++ b/src/iai_mcp/_ipc.py @@ -0,0 +1,352 @@ +""" +Platform-agnostic IPC transport layer. + +POSIX: Unix-domain socket → ~/.iai-mcp/.daemon.sock + Access control is provided by the socket file's filesystem permissions. + +Windows: TCP loopback → 127.0.0.1: + Port is persisted in ~/.iai-mcp/.daemon.port. + Because loopback TCP is reachable by any local process, an + auth-token handshake is layered on top: the daemon generates a + 32-byte random hex token on start, writes it to + ~/.iai-mcp/.daemon.token (ACL-restricted to the current user via + icacls), and requires every client to send that token as the + first line of each connection. Connections that send the wrong + token are closed immediately without processing any requests. +""" +from __future__ import annotations + +import asyncio +import inspect +import os +import platform +import secrets +import socket +import subprocess +from pathlib import Path +from typing import Any + +IS_WINDOWS: bool = platform.system() == "Windows" + +_BASE_DIR: Path = Path.home() / ".iai-mcp" +SOCKET_PATH: Path = _BASE_DIR / ".daemon.sock" # POSIX only — kept for compatibility +PORT_FILE: Path = _BASE_DIR / ".daemon.port" # Windows only +TOKEN_FILE: Path = _BASE_DIR / ".daemon.token" # Windows only — auth secret + +_TOKEN_BYTES = 32 # 256-bit random token → 64 hex chars on the wire + + +# --------------------------------------------------------------------------- +# Port file helpers (Windows only) +# --------------------------------------------------------------------------- + +def _port_file_path() -> Path: + """Resolve the Windows port-file location at call time. + + Mirrors the POSIX ``IAI_DAEMON_SOCKET_PATH`` override (see ``ipc_address``) + so a daemon bound to a non-default endpoint — a custom ``IAI_MCP_STORE``, + or an isolated test harness — persists its port *alongside* that socket + path (``.port``) instead of always clobbering the shared + ``~/.iai-mcp/.daemon.port``. Without this, every Windows daemon (and every + test) raced for one global port file. Resolved dynamically, not as a module + constant, because tests set the env var after import. + """ + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + if env: + return Path(f"{env}.port") + return PORT_FILE + + +def _read_port() -> int | None: + try: + return int(_port_file_path().read_text(encoding="utf-8").strip()) + except (FileNotFoundError, ValueError, OSError): + return None + + +def _write_port(port: int) -> None: + path = _port_file_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(str(port), encoding="utf-8") + + +def _remove_port_file() -> None: + try: + _port_file_path().unlink() + except (FileNotFoundError, OSError): + pass + + +# --------------------------------------------------------------------------- +# Token file helpers (Windows only) +# --------------------------------------------------------------------------- + +def _restrict_token_file(path: Path) -> None: + """Restrict token file to current user only via icacls (Windows equivalent of chmod 0o600).""" + username = os.environ.get("USERNAME", "") + if username: + subprocess.run( + ["icacls", str(path), "/inheritance:d", "/grant:r", f"{username}:F"], + check=False, + capture_output=True, + ) + + +def _token_file_path() -> Path: + """Resolve the Windows auth-token file at call time, mirroring + ``_port_file_path`` so the token is per-endpoint (an isolated test harness + or a custom ``IAI_MCP_STORE``) rather than a single shared + ``~/.iai-mcp/.daemon.token`` that every daemon and test would clobber.""" + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + if env: + return Path(f"{env}.token") + return TOKEN_FILE + + +def _generate_token() -> str: + """Generate a fresh 32-byte random token and persist it to the token file.""" + token = secrets.token_hex(_TOKEN_BYTES) + path = _token_file_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(token, encoding="utf-8") + _restrict_token_file(path) + return token + + +def _read_token() -> str | None: + try: + return _token_file_path().read_text(encoding="utf-8").strip() + except (FileNotFoundError, OSError): + return None + + +def _remove_token_file() -> None: + try: + _token_file_path().unlink() + except (FileNotFoundError, OSError): + pass + + +# --------------------------------------------------------------------------- +# Auth-wrapping helpers (Windows only) +# --------------------------------------------------------------------------- + +def _make_authenticated_handler(handler: Any, token: str) -> Any: + """ + Wrap *handler* so that the first line received on each connection must be + the auth token. If it matches, the connection proceeds normally. + If it doesn't, the connection is closed immediately. + """ + async def _auth_handler( + reader: asyncio.StreamReader, + writer: asyncio.StreamWriter, + ) -> None: + try: + line = await asyncio.wait_for(reader.readline(), timeout=5.0) + except (asyncio.TimeoutError, OSError): + writer.close() + return + received = line.decode("utf-8", errors="replace").strip() + if not secrets.compare_digest(received, token): + writer.close() + return + await handler(reader, writer) + + return _auth_handler + + +async def _send_token_async(writer: asyncio.StreamWriter) -> None: + """Send the auth token as the first line on a Windows client connection.""" + token = _read_token() + if token is None: + raise FileNotFoundError( + f"Daemon auth token not found: {_token_file_path()} missing." + ) + writer.write((token + "\n").encode("utf-8")) + await writer.drain() + + +def _send_token_sync(sock: socket.socket) -> None: + """Send the auth token as the first line on a synchronous Windows client socket.""" + token = _read_token() + if token is None: + raise FileNotFoundError( + f"Daemon auth token not found: {_token_file_path()} missing." + ) + sock.sendall((token + "\n").encode("utf-8")) + + +# --------------------------------------------------------------------------- +# Public helpers +# --------------------------------------------------------------------------- + +def ipc_address() -> str | tuple[str, int]: + """ + Return the current IPC endpoint. + POSIX: Unix socket path string. + Windows: ("127.0.0.1", port) tuple. + """ + if not IS_WINDOWS: + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + return env if env else str(SOCKET_PATH) + port = _read_port() + if port is None: + raise FileNotFoundError( + "Daemon not running: ~/.iai-mcp/.daemon.port not found." + ) + return ("127.0.0.1", port) + + +async def open_ipc_connection( + addr: str | tuple[str, int] | None = None, + *, + timeout: float | None = None, +) -> tuple[asyncio.StreamReader, asyncio.StreamWriter]: + """ + Open a client connection to the daemon. + + On POSIX wraps asyncio.open_unix_connection; on Windows wraps + asyncio.open_connection over TCP loopback and performs the auth-token + handshake before returning. + + The *addr* parameter is ignored on Windows (always uses port file). + """ + coro: Any + if IS_WINDOWS: + port = _read_port() + if port is None: + raise FileNotFoundError( + f"Daemon not running: {_port_file_path()} not found." + ) + coro = asyncio.open_connection("127.0.0.1", port) + else: + if addr is None: + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + addr = env if env else str(SOCKET_PATH) + coro = asyncio.open_unix_connection(str(addr)) + + if timeout is not None: + reader, writer = await asyncio.wait_for(coro, timeout=timeout) + else: + reader, writer = await coro + + if IS_WINDOWS: + await _send_token_async(writer) + + return reader, writer + + +async def start_ipc_server( + handler: Any, + addr: str | Path | None = None, +) -> tuple[asyncio.AbstractServer, str | tuple[str, int], bool]: + """ + Start the daemon server. + + Returns ``(server, actual_addr, needs_manual_cleanup)`` where: + - *actual_addr* is the socket path (POSIX) or ("127.0.0.1", port) (Windows). + - *needs_manual_cleanup* is True if the caller must call ``shutdown_ipc`` + in its finally block (i.e. asyncio will NOT clean up automatically). + + On Windows a fresh auth token is generated and written to TOKEN_FILE, and + the port is written to PORT_FILE immediately after bind. + """ + if IS_WINDOWS: + token = _generate_token() + authenticated_handler = _make_authenticated_handler(handler, token) + server = await asyncio.start_server(authenticated_handler, "127.0.0.1", 0) + port: int = server.sockets[0].getsockname()[1] + _write_port(port) + return server, ("127.0.0.1", port), True + + # POSIX: try to use asyncio's built-in cleanup_socket (Python 3.12+) + if addr is None: + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + path_str = env if env else str(SOCKET_PATH) + else: + path_str = str(addr) + + sig = inspect.signature(asyncio.start_unix_server) + supports_cleanup = "cleanup_socket" in sig.parameters + kwargs: dict[str, Any] = {"cleanup_socket": True} if supports_cleanup else {} + + server = await asyncio.start_unix_server(handler, path=path_str, **kwargs) + return server, path_str, not supports_cleanup + + +def cleanup_ipc_address(addr: str | Path | None = None) -> None: + """ + Remove a stale socket file before binding (POSIX only). No-op on Windows. + """ + if IS_WINDOWS: + return + if addr is None: + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + path = Path(env) if env else SOCKET_PATH + else: + path = Path(addr) + try: + path.unlink() + except FileNotFoundError: + pass + except OSError: + try: + path.unlink() + except OSError: + pass + + +def shutdown_ipc(addr: str | tuple[str, int] | None = None) -> None: + """ + Clean up after daemon shutdown. + POSIX: unlink the socket file (idempotent). + Windows: remove the port file and the token file. + """ + if IS_WINDOWS: + _remove_port_file() + _remove_token_file() + return + if addr is None or isinstance(addr, tuple): + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + path = Path(env) if env else SOCKET_PATH + else: + path = Path(addr) + try: + path.unlink() + except (FileNotFoundError, OSError): + pass + + +def make_sync_ipc_socket() -> tuple[socket.socket, str | tuple[str, int]]: + """ + Create a synchronous (blocking) client socket and the address to connect to. + + Returns ``(sock, addr)`` where *addr* is a string path (POSIX) or + ``("127.0.0.1", port)`` tuple (Windows). Caller is responsible for + ``settimeout``, ``connect``, and ``close``. + + On Windows the caller must also call ``send_sync_auth_token(sock)`` after + ``connect()`` and before sending any application messages. + """ + if IS_WINDOWS: + port = _read_port() + if port is None: + raise FileNotFoundError( + "Daemon not running: ~/.iai-mcp/.daemon.port not found." + ) + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + return s, ("127.0.0.1", port) + + env = os.environ.get("IAI_DAEMON_SOCKET_PATH") + path = env if env else str(SOCKET_PATH) + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + return s, path + + +def send_sync_auth_token(sock: socket.socket) -> None: + """ + Send the Windows auth token on a synchronous socket immediately after connect(). + No-op on POSIX. + """ + if IS_WINDOWS: + _send_token_sync(sock) diff --git a/src/iai_mcp/backup.py b/src/iai_mcp/backup.py index 40f2301..d425a67 100644 --- a/src/iai_mcp/backup.py +++ b/src/iai_mcp/backup.py @@ -29,7 +29,7 @@ def export_jsonl(output: Path | None = None) -> Path: records = store.all_records() count = 0 - with open(output, "w") as f: + with open(output, "w", encoding="utf-8") as f: for rec in records: entry = { "id": str(rec.id), diff --git a/src/iai_mcp/capture.py b/src/iai_mcp/capture.py index 9c54ae6..e2eeae4 100644 --- a/src/iai_mcp/capture.py +++ b/src/iai_mcp/capture.py @@ -94,6 +94,16 @@ def is_drain_in_progress() -> bool: def _pid_is_alive(pid: int) -> bool: + # NOT os.kill(pid, 0): on Windows signal 0 is CTRL_C_EVENT (it would try to + # signal the process group), not a liveness probe — so the stale-PID + # crash-recovery rescan never reclaims abandoned .processing- files. + # psutil.pid_exists is correct and cross-platform (psutil is a hard dep). + try: + import psutil + + return psutil.pid_exists(pid) + except Exception: + pass try: os.kill(pid, 0) except ProcessLookupError: @@ -113,11 +123,13 @@ def _strip_processing_marker( return path, True new_path = path.with_name(new_name) try: - path.rename(new_path) + # replace (not rename): rename raises on Windows if the dest exists; + # POSIX rename already replaces, so behaviour is unchanged there. + path.replace(new_path) except OSError as e: if log_path is not None: try: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} " f"strip-marker-failed {path.name}: {type(e).__name__}\n" @@ -163,7 +175,7 @@ def _quarantine_file( except Exception as exc: # noqa: BLE001 -- fail-safe boundary log.debug("quarantine_event_write_failed: %s", exc) try: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} " f"quarantined-event-skipped {target.name}\n" @@ -172,7 +184,7 @@ def _quarantine_file( log.debug("quarantine_event_log_fallback_failed: %s", exc2) try: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} " f"quarantined {target.name}: crash_loop attempts={attempts}\n" @@ -211,7 +223,7 @@ def _advance_failed_path( if next_attempt > FAILED_MAX_ATTEMPTS: new_name = f"{base}.permanent-failed-{ts_str}.jsonl" failed_path = fpath.with_name(new_name) - fpath.rename(failed_path) + fpath.replace(failed_path) try: from iai_mcp.events import write_event @@ -229,7 +241,7 @@ def _advance_failed_path( except Exception as exc: # noqa: BLE001 -- fail-safe boundary log.debug("permanent_capture_failure_event_failed: %s", exc) try: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} " f"permanent_capture_failure-event-skipped {new_name}\n" @@ -239,7 +251,7 @@ def _advance_failed_path( return failed_path new_name = f"{base}.failed-{ts_str}-attempt-{next_attempt}.jsonl" failed_path = fpath.with_name(new_name) - fpath.rename(failed_path) + fpath.replace(failed_path) return failed_path @@ -481,7 +493,7 @@ def capture_transcript( counts = {"inserted": 0, "reinforced": 0, "skipped": 0, "errors": 0} seen = 0 - with path.open() as fh: + with path.open(encoding="utf-8") as fh: for line in fh: if seen >= max_turns: break @@ -587,7 +599,7 @@ def write_deferred_event( deferred_dir.mkdir(parents=True, exist_ok=True) path = deferred_dir / f"{session_id}.live.jsonl" need_header = (not path.exists()) or path.stat().st_size == 0 - with path.open("a") as fh: + with path.open("a", encoding="utf-8") as fh: if need_header: header = { "version": 1, @@ -716,7 +728,7 @@ def write_deferred_captures( final_name = f"{session_id}-{int(time.time())}-{os.getpid()}.jsonl" out_path = deferred_dir / final_name tmp_path = deferred_dir / f"{final_name}.tmp" - with tmp_path.open("w") as fh: + with tmp_path.open("w", encoding="utf-8") as fh: header = { "version": 1, "deferred_at": datetime.now(timezone.utc).isoformat(), @@ -727,7 +739,7 @@ def write_deferred_captures( path = Path(transcript_path).expanduser() if path.exists(): seen = 0 - with path.open() as src: + with path.open(encoding="utf-8") as src: for line in src: if seen >= max_turns: break @@ -833,7 +845,7 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: ".jsonl", f".crash-{next_n}.jsonl" ) try: - fpath.rename(fpath.with_name(new_name)) + fpath.replace(fpath.with_name(new_name)) except Exception as exc: # noqa: BLE001 log.debug("crash_rename_failed %s: %s", fpath.name, exc) @@ -867,12 +879,12 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: fpath.stem + f".processing-{os.getpid()}.jsonl" ) try: - fpath.rename(claim_path) + fpath.replace(claim_path) except FileNotFoundError: continue except OSError as e: try: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} " f"claim-failed {fpath.name}: {type(e).__name__}\n" @@ -885,14 +897,14 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: file_had_insert_failure = False file_first_error: str | None = None try: - with work_path.open() as fh: + with work_path.open(encoding="utf-8") as fh: lines = [ln.rstrip("\n") for ln in fh if ln.strip()] if not lines: work_path.unlink() continue header = json.loads(lines[0]) if header.get("version", 0) > 1: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} skip " f"{work_path.name}: version={header.get('version')}\n" @@ -913,7 +925,7 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: break partial_path = work_path.with_suffix(".partial.jsonl") tmp_path = work_path.with_suffix(".partial.tmp") - with tmp_path.open("w") as ph: + with tmp_path.open("w", encoding="utf-8") as ph: ph.write(lines[0] + "\n") for r in remainder: ph.write(r + "\n") @@ -972,7 +984,7 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: ) if not _strip_ok: try: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} " f"insert-failed-skip {work_path.name}: " @@ -988,7 +1000,7 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: first_error=file_first_error or "unknown", log_path=log_path, ) - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} insert-failed " f"{work_path.name}: first_error={file_first_error}\n" @@ -1004,7 +1016,7 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: ) if not _strip_ok: try: - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} " f"exception-skip {work_path.name}: " @@ -1020,7 +1032,7 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: first_error=file_first_error or repr(e), log_path=log_path, ) - with log_path.open("a") as logf: + with log_path.open("a", encoding="utf-8") as logf: logf.write( f"{datetime.now(timezone.utc).isoformat()} failed " f"{work_path.name}: {type(e).__name__}: {e}\n" @@ -1043,7 +1055,7 @@ def _drain_deferred_captures_impl(store: MemoryStore) -> dict[str, int]: def _count_lines(fpath: Path) -> int: try: - with fpath.open() as fh: + with fpath.open(encoding="utf-8") as fh: return sum(1 for ln in fh if ln.strip()) except OSError: return 0 @@ -1108,7 +1120,7 @@ def drain_permanent_failed_files( file_dropped = 0 try: - with fpath.open() as fh: + with fpath.open(encoding="utf-8") as fh: lines = [ln.rstrip("\n") for ln in fh if ln.strip()] if not lines: @@ -1247,7 +1259,7 @@ def _drain_active_live_captures_impl( if not _LIVE_ACTIVE_RE.search(fpath.name): continue try: - with fpath.open() as fh: + with fpath.open(encoding="utf-8") as fh: raw_lines = fh.readlines() except OSError: continue @@ -1273,7 +1285,7 @@ def _drain_active_live_captures_impl( prev_offset: int = 0 try: if offset_path.exists(): - prev_offset = int(offset_path.read_text().strip() or "0") + prev_offset = int(offset_path.read_text(encoding="utf-8").strip() or "0") except (ValueError, OSError): prev_offset = 0 @@ -1321,7 +1333,7 @@ def _drain_active_live_captures_impl( state_dir.mkdir(parents=True, exist_ok=True) tmp_offset = offset_path.with_suffix(".drain-offset.tmp") try: - tmp_offset.write_text(str(new_offset)) + tmp_offset.write_text(str(new_offset), encoding="utf-8") os.replace(tmp_offset, offset_path) except OSError as exc: log.warning("drain_active_offset_write_failed: %s", exc) diff --git a/src/iai_mcp/capture_queue.py b/src/iai_mcp/capture_queue.py index 3c601af..b157f7f 100644 --- a/src/iai_mcp/capture_queue.py +++ b/src/iai_mcp/capture_queue.py @@ -1,11 +1,12 @@ from __future__ import annotations import errno -import fcntl import json import os import secrets import threading + +from iai_mcp._filelock import LOCK_EX, LOCK_NB, LOCK_UN, flock import time from collections.abc import Callable from datetime import datetime, timezone @@ -184,7 +185,7 @@ def ingest_pending(self, handler: Callable[[dict], None]) -> int: try: try: - fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + flock(lock_fd, LOCK_EX | LOCK_NB) except OSError as exc: if exc.errno in (errno.EWOULDBLOCK, errno.EAGAIN): continue @@ -211,7 +212,7 @@ def ingest_pending(self, handler: Callable[[dict], None]) -> int: ingested += 1 finally: try: - fcntl.flock(lock_fd, fcntl.LOCK_UN) + flock(lock_fd, LOCK_UN) except OSError: pass os.close(lock_fd) @@ -300,12 +301,12 @@ def _audit_drop( return try: try: - fcntl.flock(fd, fcntl.LOCK_EX) + flock(fd, LOCK_EX) os.write(fd, line.encode("utf-8")) os.fsync(fd) finally: try: - fcntl.flock(fd, fcntl.LOCK_UN) + flock(fd, LOCK_UN) except OSError: pass finally: diff --git a/src/iai_mcp/claude_cli.py b/src/iai_mcp/claude_cli.py index db8b63f..81c0b75 100644 --- a/src/iai_mcp/claude_cli.py +++ b/src/iai_mcp/claude_cli.py @@ -70,7 +70,7 @@ def _read_keychain_credentials() -> dict | None: def verify_credentials_subscription() -> dict: if CREDENTIALS_PATH.exists(): try: - data = json.loads(CREDENTIALS_PATH.read_text()) + data = json.loads(CREDENTIALS_PATH.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError) as exc: return {"ok": False, "reason": "credentials_unreadable", "error": str(exc)} else: diff --git a/src/iai_mcp/cli/__init__.py b/src/iai_mcp/cli/__init__.py index da6501d..e8d0e53 100644 --- a/src/iai_mcp/cli/__init__.py +++ b/src/iai_mcp/cli/__init__.py @@ -23,6 +23,7 @@ DAEMON_LABEL: str = "com.iai-mcp.daemon" SERVICE_NAME: str = "iai-mcp-daemon.service" +SCHTASKS_TASK_NAME: str = "iai-mcp-daemon" CONSENT_BANNER: str = """\ ============================================================================== @@ -56,6 +57,10 @@ def _is_linux() -> bool: return platform.system() == "Linux" +def _is_windows() -> bool: + return platform.system() == "Windows" + + def _ensure_crypto_key_present(): if os.environ.get("IAI_MCP_CRYPTO_PASSPHRASE"): return None @@ -72,15 +77,17 @@ def _ensure_crypto_key_present(): def _try_short_timeout_connect(timeout_ms: int = 250) -> bool: - import socket as _socket - - sock_path = os.environ.get("IAI_DAEMON_SOCKET_PATH") or str(SOCKET_PATH) - s = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) + from iai_mcp._ipc import make_sync_ipc_socket, send_sync_auth_token + try: + s, addr = make_sync_ipc_socket() + except (FileNotFoundError, OSError): + return False s.settimeout(timeout_ms / 1000.0) try: - s.connect(sock_path) + s.connect(addr) + send_sync_auth_token(s) return True - except (FileNotFoundError, ConnectionRefusedError, OSError, _socket.timeout): + except (FileNotFoundError, ConnectionRefusedError, OSError): return False finally: try: @@ -99,18 +106,14 @@ def _send_jsonrpc_request( read_timeout: float = 30.0, ) -> dict | None: import asyncio + from iai_mcp._ipc import open_ipc_connection from iai_mcp.cli._capture import _is_custom_store as _isc if not os.environ.get("IAI_DAEMON_SOCKET_PATH") and _isc(): return None - sock_path = os.environ.get("IAI_DAEMON_SOCKET_PATH") or str(SOCKET_PATH) - async def _runner() -> dict | None: try: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(sock_path), - timeout=connect_timeout, - ) + reader, writer = await open_ipc_connection(timeout=connect_timeout) except (FileNotFoundError, ConnectionRefusedError, OSError, asyncio.TimeoutError): return None try: @@ -142,17 +145,12 @@ async def _runner() -> dict | None: def _send_socket_request(req: dict, *, timeout: float = 30.0) -> dict | None: import asyncio + from iai_mcp._ipc import open_ipc_connection async def _runner() -> dict | None: - _sock = os.environ.get("IAI_DAEMON_SOCKET_PATH") or str(SOCKET_PATH) try: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(_sock), - timeout=5.0, - ) - except (FileNotFoundError, ConnectionRefusedError): - return None - except OSError: + reader, writer = await open_ipc_connection(timeout=5.0) + except (FileNotFoundError, ConnectionRefusedError, OSError): return None try: writer.write((json.dumps(req) + "\n").encode("utf-8")) @@ -324,6 +322,7 @@ def _maintenance_compact_metrics( _launchd_template, _render_launchd_plist, _render_systemd_unit, + _render_schtasks_xml, _prompt_consent, _record_consent_receipt, _remove_state_files, @@ -658,14 +657,17 @@ def _build_parser() -> argparse.ArgumentParser: di = daemon_sub.add_parser( "install", help=( - "install launchd plist (macOS) / systemd user unit (Linux); " - "first-run consent banner unless --yes" + "install launchd plist (macOS) / systemd user unit (Linux) / " + "Task Scheduler job (Windows); first-run consent banner unless --yes" ), ) di.add_argument( "--dry-run", action="store_true", - help="print plist/unit contents without writing or invoking launchctl/systemctl", + help=( + "print service definition (plist / unit / schtasks XML) without " + "writing or invoking launchctl/systemctl/schtasks" + ), ) di.add_argument( "--yes", "-y", @@ -676,17 +678,19 @@ def _build_parser() -> argparse.ArgumentParser: du = daemon_sub.add_parser( "uninstall", - help="C4 clean uninstall: remove plist/unit + 3 state files", + help="C4 clean uninstall: remove plist/unit/scheduled task + 3 state files", ) du.add_argument("--yes", "-y", action="store_true") du.set_defaults(func=cmd_daemon_uninstall) daemon_sub.add_parser( - "start", help="launchctl kickstart / systemctl --user start", + "start", + help="launchctl kickstart / systemctl --user start / schtasks /Run", ).set_defaults(func=cmd_daemon_start) daemon_sub.add_parser( - "stop", help="launchctl kill SIGTERM / systemctl --user stop", + "stop", + help="launchctl kill SIGTERM / systemctl --user stop / schtasks /End", ).set_defaults(func=cmd_daemon_stop) daemon_sub.add_parser( @@ -699,7 +703,10 @@ def _build_parser() -> argparse.ArgumentParser: dlogs = daemon_sub.add_parser( "logs", - help="tail daemon log file (macOS Library/Logs) or journalctl (Linux)", + help=( + "tail daemon log file (macOS Library/Logs, " + "Linux journalctl, Windows %%APPDATA%%\\iai-mcp\\logs)" + ), ) dlogs.add_argument("-f", "--follow", action="store_true") dlogs.add_argument("-n", "--lines", type=int, default=50) @@ -1046,7 +1053,23 @@ def _cmd_doctor_lazy(args: argparse.Namespace) -> int: return parser +def _force_utf8_streams() -> None: + """Recalled memory is arbitrary UTF-8 (emoji, CJK, smart quotes, em-dashes). + The session-recall hook reads this CLI's stdout, but on Windows — and under + a POSIX C/POSIX locale — stdout/stderr default to a non-UTF-8 codepage, so + writing recalled memory would raise UnicodeEncodeError and the hook would + silently produce no context. Force UTF-8 on the std streams.""" + for _stream in (sys.stdout, sys.stderr): + _reconfigure = getattr(_stream, "reconfigure", None) + if _reconfigure is not None: + try: + _reconfigure(encoding="utf-8") + except (ValueError, OSError): + pass + + def main(argv: list[str] | None = None) -> int: + _force_utf8_streams() parser = _build_parser() args = parser.parse_args(argv) return args.func(args) diff --git a/src/iai_mcp/cli/_capture.py b/src/iai_mcp/cli/_capture.py index 37f23d7..f65f3cb 100644 --- a/src/iai_mcp/cli/_capture.py +++ b/src/iai_mcp/cli/_capture.py @@ -90,7 +90,7 @@ def read_live_fingerprint(session_id: str) -> int | None: try: if not p.exists(): return None - raw = p.read_text().strip() + raw = p.read_text(encoding="utf-8").strip() if not raw: return None return int(raw) @@ -102,7 +102,7 @@ def write_live_fingerprint(session_id: str, total_size: int) -> None: d = Path.home() / ".iai-mcp" / ".capture-state" d.mkdir(parents=True, exist_ok=True) tmp = d / f"{session_id}.live-fingerprint.tmp" - tmp.write_text(str(total_size)) + tmp.write_text(str(total_size), encoding="utf-8") os.replace(tmp, d / f"{session_id}.live-fingerprint") @@ -142,7 +142,7 @@ def read_watermark(session_id: str) -> str | None: try: if not p.exists(): return None - return p.read_text().strip() or None + return p.read_text(encoding="utf-8").strip() or None except OSError: return None @@ -151,7 +151,7 @@ def write_watermark(session_id: str, ts: str) -> None: d = Path.home() / ".iai-mcp" / ".capture-state" d.mkdir(parents=True, exist_ok=True) tmp = d / f"{session_id}.watermark.tmp" - tmp.write_text(_utc_iso(ts)) + tmp.write_text(_utc_iso(ts), encoding="utf-8") os.replace(tmp, d / f"{session_id}.watermark") @@ -276,11 +276,11 @@ def cmd_capture_turn_deferred(args: argparse.Namespace) -> int: prev_offset = 0 if offset_path.exists(): try: - prev_offset = int(offset_path.read_text().strip() or "0") + prev_offset = int(offset_path.read_text(encoding="utf-8").strip() or "0") except ValueError: prev_offset = 0 - with transcript.open() as fh: + with transcript.open(encoding="utf-8") as fh: all_lines = fh.readlines() total = len(all_lines) @@ -310,7 +310,7 @@ def cmd_capture_turn_deferred(args: argparse.Namespace) -> int: new_offset = prev_offset + consumed tmp_path = offset_path.parent / (offset_path.name + ".tmp") - tmp_path.write_text(str(new_offset)) + tmp_path.write_text(str(new_offset), encoding="utf-8") os.replace(tmp_path, offset_path) return 0 except Exception as e: @@ -323,15 +323,17 @@ def cmd_capture_turn_deferred(args: argparse.Namespace) -> int: def _capture_hook_paths() -> tuple: - src = _res.files("iai_mcp") / "_deploy" / "hooks" / "iai-mcp-session-capture.sh" - dst = Path.home() / ".claude" / "hooks" / "iai-mcp-session-capture.sh" + ext = _hook_ext() + src = _res.files("iai_mcp") / "_deploy" / "hooks" / f"iai-mcp-session-capture{ext}" + dst = Path.home() / ".claude" / "hooks" / f"iai-mcp-session-capture{ext}" settings = Path.home() / ".claude" / "settings.json" return src, dst, settings def _turn_hook_paths() -> tuple: - src = _res.files("iai_mcp") / "_deploy" / "hooks" / "iai-mcp-turn-capture.sh" - dst = Path.home() / ".claude" / "hooks" / "iai-mcp-turn-capture.sh" + ext = _hook_ext() + src = _res.files("iai_mcp") / "_deploy" / "hooks" / f"iai-mcp-turn-capture{ext}" + dst = Path.home() / ".claude" / "hooks" / f"iai-mcp-turn-capture{ext}" return src, dst @@ -387,6 +389,38 @@ def _build_iai_mcp_server_entry() -> dict: } +def _iai_entry_or_placeholder(config_label: str, *, include_type: bool) -> dict: + """Build the MCP server entry, or a placeholder (with a stderr warning) + when the wrapper isn't built yet, so ``capture-hooks install`` doesn't + crash mid-run after it has already written the hooks. ``include_type`` + controls the stdio ``type`` field that Claude Code expects but Claude + Desktop omits.""" + from iai_mcp import cli as _cli + + try: + entry = _build_iai_mcp_server_entry() + except FileNotFoundError as exc: + print( + f"WARN: MCP wrapper not found — {config_label} entry written with " + f"placeholder args. Build it first: cd mcp-wrapper && npm run build. " + f"({exc})", + file=_cli.sys.stderr, + ) + entry = { + "command": "node", + "args": [""], + "env": { + "IAI_MCP_PYTHON": _cli.sys.executable, + "IAI_MCP_STORE": str(Path.home() / ".iai-mcp"), + "TRANSFORMERS_VERBOSITY": "error", + "TOKENIZERS_PARALLELISM": "false", + }, + } + if include_type: + entry.setdefault("type", "stdio") + return entry + + def _patch_claude_desktop_config(action: str) -> str: from iai_mcp import cli as _cli import json as _json @@ -399,12 +433,13 @@ def _patch_claude_desktop_config(action: str) -> str: if action == "uninstall": return f"Claude Desktop: {cfg_path} absent — skipped" cfg_path.parent.mkdir(parents=True, exist_ok=True) - data = {"mcpServers": {"iai-mcp": _build_iai_mcp_server_entry()}} - cfg_path.write_text(_json.dumps(data, indent=2)) + entry = _iai_entry_or_placeholder("Claude Desktop", include_type=False) + data = {"mcpServers": {"iai-mcp": entry}} + cfg_path.write_text(_json.dumps(data, indent=2), encoding="utf-8") return f"Claude Desktop: created {cfg_path} with iai-mcp registered" try: - data = _json.loads(cfg_path.read_text()) + data = _json.loads(cfg_path.read_text(encoding="utf-8")) except (OSError, ValueError) as e: return f"Claude Desktop: {cfg_path} unreadable ({type(e).__name__}) — skipped" @@ -413,20 +448,19 @@ def _patch_claude_desktop_config(action: str) -> str: if action == "uninstall": if "iai-mcp" in servers: servers.pop("iai-mcp", None) - cfg_path.write_text(_json.dumps(data, indent=2)) + cfg_path.write_text(_json.dumps(data, indent=2), encoding="utf-8") return f"Claude Desktop: removed iai-mcp from {cfg_path}" return f"Claude Desktop: iai-mcp not in config — no change" - new_entry = _build_iai_mcp_server_entry() + new_entry = _iai_entry_or_placeholder("Claude Desktop", include_type=False) if servers.get("iai-mcp") == new_entry: return f"Claude Desktop: {cfg_path} already has iai-mcp — no change" servers["iai-mcp"] = new_entry - cfg_path.write_text(_json.dumps(data, indent=2)) + cfg_path.write_text(_json.dumps(data, indent=2), encoding="utf-8") return f"Claude Desktop: patched {cfg_path} (iai-mcp registered)" def _patch_claude_code_config(action: str) -> str: - from iai_mcp import cli as _cli import json as _json cfg_path = Path.home() / ".claude.json" @@ -435,46 +469,25 @@ def _patch_claude_code_config(action: str) -> str: if not cfg_path.exists(): return "Claude Code: ~/.claude.json absent — skipped" try: - data = _json.loads(cfg_path.read_text()) + data = _json.loads(cfg_path.read_text(encoding="utf-8")) except (OSError, ValueError) as e: return f"Claude Code: ~/.claude.json unreadable ({type(e).__name__}) — skipped" servers = data.get("mcpServers", {}) if "iai-mcp" in servers: servers.pop("iai-mcp") data["mcpServers"] = servers - cfg_path.write_text(_json.dumps(data, indent=2)) + cfg_path.write_text(_json.dumps(data, indent=2), encoding="utf-8") return "Claude Code: removed iai-mcp from ~/.claude.json" return "Claude Code: iai-mcp not in ~/.claude.json — no change" - try: - entry = _build_iai_mcp_server_entry() - except FileNotFoundError as exc: - entry = { - "type": "stdio", - "command": "node", - "args": [""], - "env": { - "IAI_MCP_PYTHON": _cli.sys.executable, - "IAI_MCP_STORE": str(Path.home() / ".iai-mcp"), - "TRANSFORMERS_VERBOSITY": "error", - "TOKENIZERS_PARALLELISM": "false", - }, - } - print( - f"WARN: MCP wrapper not found — ~/.claude.json entry written with " - f"placeholder args. Build it first: cd mcp-wrapper && npm run build. " - f"({exc})", - file=_cli.sys.stderr, - ) - else: - entry.setdefault("type", "stdio") + entry = _iai_entry_or_placeholder("~/.claude.json", include_type=True) if not cfg_path.exists(): - cfg_path.write_text(_json.dumps({"mcpServers": {"iai-mcp": entry}}, indent=2)) + cfg_path.write_text(_json.dumps({"mcpServers": {"iai-mcp": entry}}, indent=2), encoding="utf-8") return "Claude Code: created ~/.claude.json with iai-mcp registered" try: - data = _json.loads(cfg_path.read_text()) + data = _json.loads(cfg_path.read_text(encoding="utf-8")) except (OSError, ValueError) as e: return f"Claude Code: ~/.claude.json unreadable ({type(e).__name__}) — skipped" @@ -482,18 +495,25 @@ def _patch_claude_code_config(action: str) -> str: if servers.get("iai-mcp") == entry: return "Claude Code: ~/.claude.json already has iai-mcp — no change" servers["iai-mcp"] = entry - cfg_path.write_text(_json.dumps(data, indent=2)) + cfg_path.write_text(_json.dumps(data, indent=2), encoding="utf-8") return "Claude Code: patched ~/.claude.json (iai-mcp registered)" -_CAPTURE_HOOK_MARKER = "iai-mcp-session-capture.sh" -_TURN_HOOK_MARKER = "iai-mcp-turn-capture.sh" -_SESSION_RECALL_HOOK_MARKER = "iai-mcp-session-recall.sh" +import platform as _platform + +_CAPTURE_HOOK_MARKER = "iai-mcp-session-capture" +_TURN_HOOK_MARKER = "iai-mcp-turn-capture" +_SESSION_RECALL_HOOK_MARKER = "iai-mcp-session-recall" + + +def _hook_ext() -> str: + return ".ps1" if _platform.system() == "Windows" else ".sh" def _session_recall_hook_paths() -> tuple: - src = _res.files("iai_mcp") / "_deploy" / "hooks" / "iai-mcp-session-recall.sh" - dst = Path.home() / ".claude" / "hooks" / "iai-mcp-session-recall.sh" + ext = _hook_ext() + src = _res.files("iai_mcp") / "_deploy" / "hooks" / f"iai-mcp-session-recall{ext}" + dst = Path.home() / ".claude" / "hooks" / f"iai-mcp-session-recall{ext}" settings = Path.home() / ".claude" / "settings.json" return src, dst, settings @@ -503,7 +523,7 @@ def _load_settings(path): if not path.exists(): return {} try: - return _json.loads(path.read_text()) + return _json.loads(path.read_text(encoding="utf-8")) except (OSError, ValueError): return {} @@ -525,12 +545,14 @@ def cmd_capture_hooks_install(args: argparse.Namespace) -> int: dst.parent.mkdir(parents=True, exist_ok=True) dst.write_bytes(src.read_bytes()) - dst.chmod(dst.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP) + if hasattr(os, "chmod") and _platform.system() != "Windows": + dst.chmod(dst.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP) print(f"installed: {dst}") turn_dst.parent.mkdir(parents=True, exist_ok=True) turn_dst.write_bytes(turn_src.read_bytes()) - turn_dst.chmod(turn_dst.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP) + if hasattr(os, "chmod") and _platform.system() != "Windows": + turn_dst.chmod(turn_dst.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP) print(f"installed: {turn_dst}") settings.parent.mkdir(parents=True, exist_ok=True) @@ -539,8 +561,12 @@ def cmd_capture_hooks_install(args: argparse.Namespace) -> int: stop_list = data["hooks"].setdefault("Stop", []) submit_list = data["hooks"].setdefault("UserPromptSubmit", []) - hook_cmd = f"bash {dst}" - turn_cmd = f"bash {turn_dst}" + if _platform.system() == "Windows": + hook_cmd = f"powershell -ExecutionPolicy Bypass -File \"{dst}\"" + turn_cmd = f"powershell -ExecutionPolicy Bypass -File \"{turn_dst}\"" + else: + hook_cmd = f"bash {dst}" + turn_cmd = f"bash {turn_dst}" already_stop = any( any(_CAPTURE_HOOK_MARKER in (h.get("command") or "") @@ -568,11 +594,15 @@ def cmd_capture_hooks_install(args: argparse.Namespace) -> int: if src_recall.exists(): dst_recall.parent.mkdir(parents=True, exist_ok=True) dst_recall.write_bytes(src_recall.read_bytes()) - dst_recall.chmod(dst_recall.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP) + if hasattr(os, "chmod") and _platform.system() != "Windows": + dst_recall.chmod(dst_recall.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP) print(f"installed: {dst_recall}") ss_list = data["hooks"].setdefault("SessionStart", []) - recall_cmd = f"bash {dst_recall}" + if _platform.system() == "Windows": + recall_cmd = f"powershell -ExecutionPolicy Bypass -File \"{dst_recall}\"" + else: + recall_cmd = f"bash {dst_recall}" already_recall = any( any(_SESSION_RECALL_HOOK_MARKER in (h.get("command") or "") for h in (entry.get("hooks") or [])) @@ -589,7 +619,7 @@ def cmd_capture_hooks_install(args: argparse.Namespace) -> int: else: print(f"WARN: recall hook template missing in package data: {src_recall}") - settings.write_text(_json.dumps(data, indent=2)) + settings.write_text(_json.dumps(data, indent=2), encoding="utf-8") code_msg = _patch_claude_code_config("install") print(code_msg) @@ -648,7 +678,7 @@ def cmd_capture_hooks_uninstall(args: argparse.Namespace) -> int: changed = True print(f"patched: {settings} ({key} entry removed)") if changed: - settings.write_text(_json.dumps(data, indent=2)) + settings.write_text(_json.dumps(data, indent=2), encoding="utf-8") else: print(f"(no hook entry to remove) {settings}") @@ -664,7 +694,7 @@ def cmd_capture_hooks_uninstall(args: argparse.Namespace) -> int: data["hooks"]["SessionStart"] = kept_ss else: data["hooks"].pop("SessionStart", None) - settings.write_text(_json.dumps(data, indent=2)) + settings.write_text(_json.dumps(data, indent=2), encoding="utf-8") print(f"patched: {settings} (SessionStart entry removed)") else: print(f"(no SessionStart entry to remove) {settings}") @@ -724,7 +754,7 @@ def cmd_capture_hooks_status(args: argparse.Namespace) -> int: desktop_wired = False else: try: - d = _json.loads(desktop_cfg.read_text()) + d = _json.loads(desktop_cfg.read_text(encoding="utf-8")) desktop_wired = "iai-mcp" in d.get("mcpServers", {}) desktop_line = f"Claude Desktop: {desktop_cfg} {'WIRED' if desktop_wired else 'NOT WIRED'}" except (OSError, ValueError): diff --git a/src/iai_mcp/cli/_crypto.py b/src/iai_mcp/cli/_crypto.py index 3a41997..a6a32bb 100644 --- a/src/iai_mcp/cli/_crypto.py +++ b/src/iai_mcp/cli/_crypto.py @@ -35,8 +35,10 @@ def cmd_crypto_status(args: argparse.Namespace) -> int: length = st.st_size status["mode"] = mode_octal status["mode_secure"] = (st.st_mode & 0o077 == 0) - status["uid"] = st.st_uid - status["uid_matches_process"] = (st.st_uid == _os.geteuid()) + status["uid"] = getattr(st, "st_uid", -1) + status["uid_matches_process"] = ( + hasattr(_os, "geteuid") and st.st_uid == _os.geteuid() + ) status["length_bytes"] = length status["length_valid"] = (length == KEY_BYTES) status["passphrase_fallback_set"] = bool( diff --git a/src/iai_mcp/cli/_daemon.py b/src/iai_mcp/cli/_daemon.py index 609eca5..98b8b58 100644 --- a/src/iai_mcp/cli/_daemon.py +++ b/src/iai_mcp/cli/_daemon.py @@ -63,7 +63,7 @@ def _launchd_template(): def _render_launchd_plist() -> str: from iai_mcp import cli as _cli - text = _launchd_template().read_text() + text = _launchd_template().read_text(encoding="utf-8") username = os.environ.get("USER") or Path.home().name text = text.replace("/usr/local/bin/python3", _cli.sys.executable) text = text.replace("{USERNAME}", username) @@ -73,11 +73,64 @@ def _render_launchd_plist() -> str: def _render_systemd_unit() -> str: from iai_mcp import cli as _cli tmpl = _res.files("iai_mcp") / "_deploy" / "systemd" / "iai-mcp-daemon.service" - text = tmpl.read_text() + text = tmpl.read_text(encoding="utf-8") text = text.replace("/usr/bin/python3", _cli.sys.executable) return text +def _find_pythonw() -> str: + exe = Path(sys.executable) + pythonw = exe.parent / "pythonw.exe" + if pythonw.exists(): + return str(pythonw) + return sys.executable + + +def _render_schtasks_xml() -> str: + pythonw = _find_pythonw() + username = os.environ.get("USERNAME", "") + # No : the Task Scheduler engine rejects a working + # directory set via XML when the path contains spaces (e.g. the default + # %APPDATA% under "C:\\Users\\First Last\\..."), failing the launch with + # 0x8007010B "The directory name is invalid" — even though the path exists + # and CreateProcess accepts it fine outside the scheduler. The daemon never + # depends on cwd (all state lives under ~/.iai-mcp via absolute paths), so + # we omit it and let the task default to %windir%\\system32. + return f"""\ + + + + iai-mcp sleep daemon — background memory consolidation for Claude Code + + + + true + {username} + + + + + {username} + InteractiveToken + LeastPrivilege + + + + IgnoreNew + false + false + PT0S + true + + + + {pythonw} + -m iai_mcp.daemon + + +""" + + def _prompt_consent(stream_out=None) -> bool: from iai_mcp import cli as _cli if stream_out is None: @@ -106,7 +159,7 @@ def _record_consent_receipt() -> None: safe_ts = ts.replace(":", "").replace("-", "").replace(".", "") receipt = state_dir / f".consent-{safe_ts}.json" try: - receipt.write_text(json.dumps(payload, indent=2)) + receipt.write_text(json.dumps(payload, indent=2), encoding="utf-8") os.chmod(receipt, 0o600) except OSError as exc: print(f"warning: could not write consent receipt: {exc}", file=sys.stderr) @@ -140,25 +193,71 @@ def cmd_daemon_install(args: argparse.Namespace) -> int: elif _cli._is_linux(): content = _render_systemd_unit() target = _cli.SYSTEMD_TARGET + elif _cli._is_windows(): + content = _render_schtasks_xml() + target = None else: print(f"Unsupported OS: {platform.system()}", file=sys.stderr) return 1 if dry_run: - print(f"# Would install to: {target}") + if target is not None: + print(f"# Would install to: {target}") + else: + print(f"# Would create scheduled task: {_cli.SCHTASKS_TASK_NAME}") print(content) return 0 + _cli._ensure_crypto_key_present() + + if _cli._is_windows(): + import subprocess as _sp + import tempfile as _tmpmod + + log_dir = Path(os.environ.get("APPDATA", str(Path.home()))) / "iai-mcp" / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + + fd, xml_path = _tmpmod.mkstemp(suffix=".xml", prefix="iai-mcp-task-") + try: + with os.fdopen(fd, "w", encoding="utf-16") as f: + f.write(content) + result = _sp.run( + [ + "schtasks", "/Create", + "/TN", _cli.SCHTASKS_TASK_NAME, + "/XML", xml_path, + "/F", + ], + check=False, capture_output=True, text=True, + ) + if result.returncode != 0: + print( + f"schtasks /Create failed ({result.returncode}): " + f"{result.stderr.strip()}", + file=sys.stderr, + ) + return 1 + finally: + try: + os.unlink(xml_path) + except OSError: + pass + + _sp.run( + ["schtasks", "/Run", "/TN", _cli.SCHTASKS_TASK_NAME], + check=False, capture_output=True, + ) + print(f"Installed scheduled task: {_cli.SCHTASKS_TASK_NAME}") + return 0 + target.parent.mkdir(parents=True, exist_ok=True) - target.write_text(content) + target.write_text(content, encoding="utf-8") try: os.chmod(target, 0o644) except OSError: pass - _cli._ensure_crypto_key_present() - - uid = os.getuid() + uid = os.getuid() if hasattr(os, "getuid") else 0 if _cli._is_macos(): _cli.subprocess.run( ["launchctl", "bootout", f"gui/{uid}", str(target)], @@ -228,7 +327,7 @@ def cmd_daemon_uninstall(args: argparse.Namespace) -> int: print("Uninstall cancelled.", file=sys.stderr) return 1 - uid = os.getuid() + uid = os.getuid() if hasattr(os, "getuid") else 0 if _cli._is_macos(): if _cli.LAUNCHD_TARGET.exists(): _cli.subprocess.run( @@ -253,6 +352,16 @@ def cmd_daemon_uninstall(args: argparse.Namespace) -> int: ["systemctl", "--user", "daemon-reload"], check=False, capture_output=True, ) + elif _cli._is_windows(): + import subprocess as _sp + _sp.run( + ["schtasks", "/End", "/TN", _cli.SCHTASKS_TASK_NAME], + check=False, capture_output=True, + ) + _sp.run( + ["schtasks", "/Delete", "/TN", _cli.SCHTASKS_TASK_NAME, "/F"], + check=False, capture_output=True, + ) _remove_state_files() print("Daemon uninstalled. State files removed.") @@ -261,7 +370,7 @@ def cmd_daemon_uninstall(args: argparse.Namespace) -> int: def cmd_daemon_start(args: argparse.Namespace) -> int: from iai_mcp import cli as _cli - uid = os.getuid() + uid = os.getuid() if hasattr(os, "getuid") else 0 if _cli._is_macos(): target = _cli.LAUNCHD_TARGET _cli.subprocess.run( @@ -282,6 +391,12 @@ def cmd_daemon_start(args: argparse.Namespace) -> int: ["systemctl", "--user", "start", _cli.SERVICE_NAME], check=False, ) + elif _cli._is_windows(): + import subprocess as _sp + _sp.run( + ["schtasks", "/Run", "/TN", _cli.SCHTASKS_TASK_NAME], + check=False, capture_output=True, + ) else: print(f"Unsupported OS: {platform.system()}", file=sys.stderr) return 1 @@ -302,7 +417,7 @@ def cmd_daemon_stop(args: argparse.Namespace) -> int: except (OSError, ValueError, RuntimeError) as exc: logger.debug("sentinel write failed (non-blocking): %s", exc) - uid = os.getuid() + uid = os.getuid() if hasattr(os, "getuid") else 0 if _cli._is_macos(): from iai_mcp.lifecycle_lock import LifecycleLock, _is_pid_alive @@ -318,8 +433,9 @@ def cmd_daemon_stop(args: argparse.Namespace) -> int: return 0 if _is_pid_alive(pid): + _term_sig = getattr(_signal, "SIGTERM", _signal.SIGINT) try: - os.kill(pid, _signal.SIGTERM) + os.kill(pid, _term_sig) except (ProcessLookupError, PermissionError) as exc: logger.debug("SIGTERM to daemon pid=%d failed: %s", pid, exc) return 0 @@ -332,8 +448,9 @@ def cmd_daemon_stop(args: argparse.Namespace) -> int: _time.sleep(interval) if _is_pid_alive(pid): + _kill_sig = getattr(_signal, "SIGKILL", _term_sig) try: - os.kill(pid, _signal.SIGKILL) + os.kill(pid, _kill_sig) except (ProcessLookupError, PermissionError) as exc: logger.debug("SIGKILL to daemon pid=%d failed: %s", pid, exc) return 0 @@ -342,6 +459,36 @@ def cmd_daemon_stop(args: argparse.Namespace) -> int: ["systemctl", "--user", "stop", _cli.SERVICE_NAME], check=False, ) + elif _cli._is_windows(): + import subprocess as _sp + from iai_mcp.lifecycle_lock import LifecycleLock, _is_pid_alive + + _sp.run( + ["schtasks", "/End", "/TN", _cli.SCHTASKS_TASK_NAME], + check=False, capture_output=True, + ) + + payload = LifecycleLock().read() + pid = payload["pid"] if payload else None + if pid is not None and _is_pid_alive(pid): + try: + os.kill(pid, _signal.SIGINT) + except (ProcessLookupError, PermissionError) as exc: + logger.debug("SIGINT to daemon pid=%d failed: %s", pid, exc) + return 0 + + deadline = _time.monotonic() + _stop_escalation_bound() + interval = _stop_poll_interval() + while _time.monotonic() < deadline: + if not _is_pid_alive(pid): + return 0 + _time.sleep(interval) + + if _is_pid_alive(pid): + _sp.run( + ["taskkill", "/F", "/PID", str(pid)], + check=False, capture_output=True, + ) else: print(f"Unsupported OS: {platform.system()}", file=sys.stderr) return 1 @@ -437,12 +584,20 @@ def cmd_daemon_status(args: argparse.Namespace) -> int: return 0 +def _get_daemon_log_path() -> Path: + if platform.system() == "Darwin": + return Path.home() / "Library" / "Logs" / "iai-mcp-daemon.stderr.log" + if platform.system() == "Windows": + return Path(os.environ.get("APPDATA", str(Path.home()))) / "iai-mcp" / "logs" / "daemon.log" + return Path.home() / ".local" / "share" / "iai-mcp" / "logs" / "daemon.log" + + def cmd_daemon_logs(args: argparse.Namespace) -> int: from iai_mcp import cli as _cli follow = bool(getattr(args, "follow", False)) lines = int(getattr(args, "lines", 50)) if _cli._is_macos(): - path = Path.home() / "Library" / "Logs" / "iai-mcp-daemon.stderr.log" + path = _get_daemon_log_path() argv = ["tail"] if follow: argv.append("-f") @@ -453,6 +608,15 @@ def cmd_daemon_logs(args: argparse.Namespace) -> int: if follow: argv.append("-f") _cli.subprocess.run(argv, check=False) + elif platform.system() == "Windows": + path = _get_daemon_log_path() + if not path.exists(): + print(f"No log file at {path}", file=sys.stderr) + return 1 + with open(path, "r", encoding="utf-8", errors="replace") as f: + all_lines = f.readlines() + for line in all_lines[-lines:]: + print(line, end="") else: print(f"Unsupported OS: {platform.system()}", file=sys.stderr) return 1 diff --git a/src/iai_mcp/cli/_maintenance.py b/src/iai_mcp/cli/_maintenance.py index 228956f..d177c66 100644 --- a/src/iai_mcp/cli/_maintenance.py +++ b/src/iai_mcp/cli/_maintenance.py @@ -63,7 +63,7 @@ def _maintenance_compact_preflight_daemon_alive() -> str | None: if not _cli.STATE_PATH.exists(): return None try: - state = _json.loads(_cli.STATE_PATH.read_text()) + state = _json.loads(_cli.STATE_PATH.read_text(encoding="utf-8")) except (OSError, ValueError): return None pid = state.get("daemon_pid") @@ -181,7 +181,7 @@ def _maintenance_compact_apply( } try: failed_path.parent.mkdir(parents=True, exist_ok=True) - failed_path.write_text(_json.dumps(failed_payload, indent=2)) + failed_path.write_text(_json.dumps(failed_payload, indent=2), encoding="utf-8") except OSError: pass print( @@ -206,7 +206,7 @@ def _maintenance_compact_apply( } try: audit_path.parent.mkdir(parents=True, exist_ok=True) - audit_path.write_text(_json.dumps(payload, indent=2)) + audit_path.write_text(_json.dumps(payload, indent=2), encoding="utf-8") except OSError as exc: print( f"warning: could not write audit file {audit_path}: {exc}", diff --git a/src/iai_mcp/concurrency.py b/src/iai_mcp/concurrency.py index cf74987..f8a80e4 100644 --- a/src/iai_mcp/concurrency.py +++ b/src/iai_mcp/concurrency.py @@ -13,15 +13,8 @@ def cleanup_stale_socket(path: Path = SOCKET_PATH) -> None: - try: - path.unlink() - except FileNotFoundError: - pass - except OSError: - try: - path.unlink() - except OSError: - pass + from iai_mcp._ipc import cleanup_ipc_address + cleanup_ipc_address(path) def _validate_socket_message(req: dict) -> tuple[bool, str | None]: @@ -238,19 +231,11 @@ async def serve_control_socket( dispatcher: Callable[[dict], Awaitable[dict]] | None = None, socket_path: Path = SOCKET_PATH, ) -> None: - cleanup_stale_socket(socket_path) - socket_path.parent.mkdir(parents=True, exist_ok=True) + from iai_mcp._ipc import IS_WINDOWS, cleanup_ipc_address, start_ipc_server, shutdown_ipc - _supports_cleanup_socket = False - try: - import inspect as _inspect - import asyncio as _asyncio_mod - _loop_sig = _inspect.signature( - _asyncio_mod.get_event_loop_policy().new_event_loop().create_unix_server - ) - _supports_cleanup_socket = "cleanup_socket" in _loop_sig.parameters - except (TypeError, ValueError, AttributeError): - _supports_cleanup_socket = False + cleanup_ipc_address(socket_path) + if not IS_WINDOWS: + socket_path.parent.mkdir(parents=True, exist_ok=True) async def handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None: try: @@ -280,23 +265,16 @@ async def handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> except (OSError, ConnectionError): # noqa: BLE001 -- cleanup is best-effort pass - _server_kwargs = {"cleanup_socket": True} if _supports_cleanup_socket else {} - server = await asyncio.start_unix_server( - handle, path=str(socket_path), **_server_kwargs, - ) - try: - os.chmod(str(socket_path), 0o600) - except OSError: - pass + server, actual_addr, needs_cleanup = await start_ipc_server(handle, socket_path) + if not IS_WINDOWS: + try: + os.chmod(str(socket_path), 0o600) + except OSError: + pass try: async with server: await shutdown.wait() finally: - if not _supports_cleanup_socket: - try: - socket_path.unlink() - except FileNotFoundError: - pass - except OSError: - pass + if needs_cleanup: + shutdown_ipc(actual_addr) diff --git a/src/iai_mcp/core/__init__.py b/src/iai_mcp/core/__init__.py index efeb543..60c1998 100644 --- a/src/iai_mcp/core/__init__.py +++ b/src/iai_mcp/core/__init__.py @@ -958,9 +958,10 @@ async def _send_to_daemon( timeout: float = 30.0, socket_path=None, ) -> dict: + from iai_mcp._ipc import open_ipc_connection path_used = socket_path if socket_path is not None else SOCKET_PATH try: - reader, writer = await asyncio.open_unix_connection(str(path_used)) + reader, writer = await open_ipc_connection(str(path_used)) except (FileNotFoundError, ConnectionRefusedError) as exc: return {"ok": False, "reason": "daemon_not_running", "error": str(exc)} diff --git a/src/iai_mcp/core/_identity.py b/src/iai_mcp/core/_identity.py index c71d51f..6ae1515 100644 --- a/src/iai_mcp/core/_identity.py +++ b/src/iai_mcp/core/_identity.py @@ -27,7 +27,7 @@ def _load_l0_identity_seed() -> str: ) if os.path.isfile(config_path): try: - with open(config_path) as f: + with open(config_path, encoding="utf-8") as f: cfg = json.load(f) identity = cfg.get("identity", {}) parts = [] diff --git a/src/iai_mcp/crypto.py b/src/iai_mcp/crypto.py index b82f76b..00dc5a4 100644 --- a/src/iai_mcp/crypto.py +++ b/src/iai_mcp/crypto.py @@ -12,12 +12,28 @@ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC +import platform as _platform +import subprocess as _subprocess + CIPHERTEXT_PREFIX: str = "iai:enc:v1:" NONCE_BYTES: int = 12 KEY_BYTES: int = 32 PBKDF2_ITERATIONS: int = 600_000 SERVICE_NAME_DEFAULT: str = "iai-mcp" + +def _secure_key_file(path: Path) -> None: + """Restrict file permissions to owner-only. On POSIX uses chmod; on Windows uses icacls.""" + if _platform.system() == "Windows": + user = os.environ.get("USERNAME", "") + if user: + _subprocess.run( + ["icacls", str(path), "/inheritance:d", "/grant:r", f"{user}:F"], + check=False, capture_output=True, + ) + else: + path.chmod(0o600) + _DEFAULT_STORE_ROOT: Path = Path.home() / ".iai-mcp" _KEY_FILE_NAME: str = ".crypto.key" @@ -112,13 +128,13 @@ def _try_file_get(self) -> Optional[bytes]: if not path.exists(): return None st = os.stat(path) - if st.st_mode & 0o077 != 0: + if hasattr(os, "geteuid") and st.st_mode & 0o077 != 0: raise CryptoKeyError( f"crypto key file at {path} has insecure mode " f"0o{st.st_mode & 0o777:03o}; expected 0o600 " f"(run: chmod 0o600 {path})" ) - if st.st_uid != os.geteuid(): + if hasattr(os, "geteuid") and st.st_uid != os.geteuid(): raise CryptoKeyError( f"crypto key file at {path} is owned by uid={st.st_uid}; " f"current process runs as uid={os.geteuid()} (refusing to read)" @@ -144,12 +160,18 @@ def _try_file_set(self, key: bytes) -> None: tmp = final.parent / f"{final.name}.tmp.{os.getpid()}" fd = os.open(str(tmp), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600) try: - os.fchmod(fd, 0o600) + if hasattr(os, "fchmod"): + os.fchmod(fd, 0o600) os.write(fd, key) os.fsync(fd) finally: os.close(fd) - os.rename(str(tmp), str(final)) + if not hasattr(os, "fchmod"): + _secure_key_file(tmp) + # os.replace (not os.rename): on Windows rename raises if the + # destination exists, which it always does during key rotation. POSIX + # rename already replaces, so this is behaviour-preserving there. + os.replace(str(tmp), str(final)) def get_or_create(self) -> bytes: diff --git a/src/iai_mcp/daemon/__init__.py b/src/iai_mcp/daemon/__init__.py index 027c2b1..bd27805 100644 --- a/src/iai_mcp/daemon/__init__.py +++ b/src/iai_mcp/daemon/__init__.py @@ -6,7 +6,7 @@ import json import logging import os -import resource +import platform as _platform import signal import sys import threading @@ -116,6 +116,10 @@ def _hippo_health_check_on_boot(store) -> dict[str, int | str]: def _raise_fd_limit() -> None: + if _platform.system() == "Windows": + return + import resource as _resource + try: floor = int( os.environ.get("IAI_MCP_DAEMON_NOFILE_FLOOR", _DAEMON_NOFILE_FLOOR_DEFAULT) @@ -124,18 +128,18 @@ def _raise_fd_limit() -> None: floor = _DAEMON_NOFILE_FLOOR_DEFAULT try: - soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) + soft, hard = _resource.getrlimit(_resource.RLIMIT_NOFILE) except (OSError, ValueError): return - effective_hard = hard if hard != resource.RLIM_INFINITY else floor + effective_hard = hard if hard != _resource.RLIM_INFINITY else floor target = min(max(soft, floor), effective_hard) if target <= soft: return try: - resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard)) + _resource.setrlimit(_resource.RLIMIT_NOFILE, (target, hard)) log.debug("daemon_fd_limit_raised soft=%d->%d hard=%d", soft, target, hard) except (OSError, ValueError) as exc: log.debug("daemon_fd_limit_raise failed (non-fatal): %s", exc) @@ -750,10 +754,35 @@ def _set_process_title(title: str = "iai lilli (iai_mcp.daemon)") -> None: pass +def _auto_set_embed_offline() -> None: + """Set IAI_MCP_EMBED_OFFLINE if the bge-small-en-v1.5 model is already cached locally. + + The Rust hf-hub client uses a different TLS stack than Python and may fail to reach + huggingface.co in restricted network environments (e.g., containers with custom CA + certificates). When the model files are already present in the HF cache, setting this + env var tells the Rust embedder to skip the network entirely. + """ + if os.environ.get("IAI_MCP_EMBED_OFFLINE"): + return + import pathlib + + revision = "5c38ec7c405ec4b44b94cc5a9bb96e735b38267a" + hf_home = os.environ.get("HF_HOME") or os.environ.get("HUGGINGFACE_HUB_CACHE") + if hf_home: + cache_base = pathlib.Path(hf_home) + else: + cache_base = pathlib.Path.home() / ".cache" / "huggingface" / "hub" + snap = cache_base / "models--BAAI--bge-small-en-v1.5" / "snapshots" / revision + if (snap / "model.safetensors").exists() and (snap / "tokenizer.json").exists(): + os.environ["IAI_MCP_EMBED_OFFLINE"] = "1" + log.debug("bge-small-en-v1.5 found in HF cache — setting IAI_MCP_EMBED_OFFLINE=1") + + async def main() -> int: _set_process_title() _require_native() _raise_fd_limit() + _auto_set_embed_offline() store = await _open_exclusive_store_with_backoff( lambda: MemoryStore( @@ -1000,10 +1029,15 @@ def _capture_handler(record: dict) -> None: shutdown = asyncio.Event() loop = asyncio.get_running_loop() - for sig in (signal.SIGTERM, signal.SIGINT, signal.SIGHUP): + _shutdown_sigs = [signal.SIGINT] + if hasattr(signal, "SIGTERM"): + _shutdown_sigs.append(signal.SIGTERM) + if hasattr(signal, "SIGHUP"): + _shutdown_sigs.append(signal.SIGHUP) + for sig in _shutdown_sigs: try: loop.add_signal_handler(sig, shutdown.set) - except (NotImplementedError, RuntimeError): + except (NotImplementedError, RuntimeError, ValueError): pass try: diff --git a/src/iai_mcp/daemon/_watchdog.py b/src/iai_mcp/daemon/_watchdog.py index b40409f..b96e3f9 100644 --- a/src/iai_mcp/daemon/_watchdog.py +++ b/src/iai_mcp/daemon/_watchdog.py @@ -416,7 +416,10 @@ def _self_kill(reason: str, kind: str) -> None: _write_breadcrumb(line) except Exception: # noqa: BLE001 -- breadcrumb is best-effort ONLY pass - os.kill(os.getpid(), signal.SIGKILL) + if hasattr(signal, "SIGKILL"): + os.kill(os.getpid(), signal.SIGKILL) + else: + sys.exit(1) def _capture_blackbox( @@ -630,10 +633,9 @@ def _check_crisis_mode_expiry( async def _probe_status_roundtrip(sock_path: str, read_timeout: float) -> bool: + from iai_mcp._ipc import open_ipc_connection try: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(sock_path), timeout=5.0 - ) + reader, writer = await open_ipc_connection(sock_path, timeout=5.0) except (FileNotFoundError, ConnectionRefusedError, OSError): return False except asyncio.TimeoutError: diff --git a/src/iai_mcp/daemon_state.py b/src/iai_mcp/daemon_state.py index ac6e2eb..019254b 100644 --- a/src/iai_mcp/daemon_state.py +++ b/src/iai_mcp/daemon_state.py @@ -2,12 +2,42 @@ import json import os +import platform import tempfile +import time from datetime import datetime, timedelta, timezone from pathlib import Path STATE_PATH: Path = Path.home() / ".iai-mcp" / ".daemon-state.json" +_IS_WINDOWS: bool = platform.system() == "Windows" + + +def _atomic_replace(src: str, dst: Path) -> None: + """os.replace, with a brief retry loop on Windows. + + On Windows os.replace maps to MoveFileEx, which fails with + PermissionError (WinError 5/ACCESS_DENIED or 32/SHARING_VIOLATION) when + another process momentarily holds the destination open. Python's open() + on Windows does not request FILE_SHARE_DELETE, so any concurrent reader + (`daemon status`, the MCP server, a hook reading first-turn state) can + transiently block the replace. The handle is held only briefly, so a few + short retries resolve it. POSIX rename is atomic and never sees this, so + the path there is unchanged (single attempt, errors propagate). + """ + if not _IS_WINDOWS: + os.replace(src, dst) + return + attempts = 10 + for i in range(attempts): + try: + os.replace(src, dst) + return + except PermissionError: + if i == attempts - 1: + raise + time.sleep(0.05) + DIGEST_SHOW_THRESHOLD_HOURS: int = 18 FIRST_TURN_TTL_HOURS: int = 24 @@ -18,7 +48,7 @@ def load_state() -> dict: if not STATE_PATH.exists(): return {} try: - return json.loads(STATE_PATH.read_text()) + return json.loads(STATE_PATH.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return {} @@ -36,7 +66,7 @@ def save_state(state: dict) -> None: f.flush() os.fsync(f.fileno()) os.chmod(tmp, 0o600) - os.replace(tmp, STATE_PATH) + _atomic_replace(tmp, STATE_PATH) except (OSError, TypeError, ValueError): try: os.unlink(tmp) diff --git a/src/iai_mcp/direct_write.py b/src/iai_mcp/direct_write.py index 4a67d9a..b3dfc35 100644 --- a/src/iai_mcp/direct_write.py +++ b/src/iai_mcp/direct_write.py @@ -116,19 +116,18 @@ def _find_record_by_tag_direct(db: Any, tag: str) -> str | None: def _try_get_embedding_fast(text: str, cue: str) -> list[float] | None: - socket_path = os.environ.get("IAI_DAEMON_SOCKET_PATH") - if socket_path: - try: - import socket as _socket - s = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) - s.settimeout(0.1) - s.connect(socket_path) - s.close() - except (OSError, ConnectionRefusedError, FileNotFoundError): - return None - else: + from iai_mcp._ipc import IS_WINDOWS, make_sync_ipc_socket, send_sync_auth_token + # On POSIX only proceed when IAI_DAEMON_SOCKET_PATH is explicitly set + if not IS_WINDOWS and not os.environ.get("IAI_DAEMON_SOCKET_PATH"): + return None + try: + s, addr = make_sync_ipc_socket() + s.settimeout(0.1) + s.connect(addr) + send_sync_auth_token(s) + s.close() + except (OSError, ConnectionRefusedError, FileNotFoundError): return None - return None @@ -194,7 +193,7 @@ def _write_sidecar(root: Path, record_id: str, embedding: list[float], db: Any) try: npy_tmp.write_bytes(blob) - json_tmp.write_text(json.dumps({"uuid": record_id, "vec_label": vec_label})) + json_tmp.write_text(json.dumps({"uuid": record_id, "vec_label": vec_label}), encoding="utf-8") os.replace(npy_tmp, npy_final) os.replace(json_tmp, json_final) except OSError as exc: diff --git a/src/iai_mcp/doctor/__init__.py b/src/iai_mcp/doctor/__init__.py index a33196f..97e7bf3 100644 --- a/src/iai_mcp/doctor/__init__.py +++ b/src/iai_mcp/doctor/__init__.py @@ -56,11 +56,9 @@ def _resolve_socket_path() -> Path: async def _socket_status_probe(socket_path: Path, timeout: float) -> dict | None: + from iai_mcp._ipc import open_ipc_connection try: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(path=str(socket_path)), - timeout=timeout, - ) + reader, writer = await open_ipc_connection(str(socket_path), timeout=timeout) except (FileNotFoundError, ConnectionRefusedError, asyncio.TimeoutError, OSError): return None try: @@ -307,7 +305,8 @@ def _kill_orphan_cores() -> tuple[bool, str, int]: if "iai_mcp.core" not in cl: continue pid = p.info["pid"] - os.kill(pid, signal.SIGTERM) + _term = getattr(signal, "SIGTERM", signal.SIGINT) + os.kill(pid, _term) killed.append(pid) except (psutil.NoSuchProcess, psutil.AccessDenied): continue diff --git a/src/iai_mcp/doctor/_lifecycle_checks.py b/src/iai_mcp/doctor/_lifecycle_checks.py index 0a589f4..7a1c46e 100644 --- a/src/iai_mcp/doctor/_lifecycle_checks.py +++ b/src/iai_mcp/doctor/_lifecycle_checks.py @@ -113,11 +113,9 @@ def check_a_daemon_alive() -> CheckResult: async def _socket_connect_probe(socket_path: Path, timeout: float) -> str | None: + from iai_mcp._ipc import open_ipc_connection try: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(path=str(socket_path)), - timeout=timeout, - ) + reader, writer = await open_ipc_connection(str(socket_path), timeout=timeout) except FileNotFoundError: return "FileNotFoundError" except ConnectionRefusedError: @@ -136,11 +134,16 @@ async def _socket_connect_probe(socket_path: Path, timeout: float) -> str | None def check_b_socket_fresh() -> CheckResult: socket_path = _resolve_socket_path() - if not socket_path.exists(): + # Windows binds TCP loopback and records the port in a sidecar file — there + # is no AF_UNIX socket file — so check whichever endpoint actually exists + # for this platform. (The connect probe below is already cross-platform.) + from iai_mcp._ipc import IS_WINDOWS, _port_file_path + endpoint = _port_file_path() if IS_WINDOWS else socket_path + if not endpoint.exists(): return CheckResult( "(b) socket file fresh", False, - f"{socket_path} does not exist", + f"{endpoint} does not exist", ) t0 = time.monotonic() @@ -169,7 +172,8 @@ def check_b_socket_fresh() -> CheckResult: def check_c_lock_healthy() -> CheckResult: import errno as _errno - import fcntl as _fcntl + from iai_mcp._filelock import LOCK_NB, LOCK_SH, LOCK_UN + from iai_mcp._filelock import flock as _flock lock_path = _resolve_hippo_db_path().parent / ".lock" if not lock_path.exists(): @@ -180,10 +184,12 @@ def check_c_lock_healthy() -> CheckResult: ) fd = None try: - fd = os.open(str(lock_path), os.O_RDONLY) + # O_RDWR required on Windows (msvcrt.locking needs write access); + # harmless on POSIX since flock ignores open mode. + fd = os.open(str(lock_path), os.O_RDWR) try: - _fcntl.flock(fd, _fcntl.LOCK_SH | _fcntl.LOCK_NB) - _fcntl.flock(fd, _fcntl.LOCK_UN) + _flock(fd, LOCK_SH | LOCK_NB) + _flock(fd, LOCK_UN) return CheckResult( "(c) lock file healthy", True, @@ -197,7 +203,7 @@ def check_c_lock_healthy() -> CheckResult: f"{lock_path} held (consolidating or recall active — normal)", ) raise - except Exception as e: # noqa: BLE001 — fcntl/OSError/permission all FAIL + except Exception as e: # noqa: BLE001 — flock/OSError/permission all FAIL logger.debug("check_c: store-lock probe failed: %s", e) return CheckResult( "(c) lock file healthy", diff --git a/src/iai_mcp/fsm_reconcile.py b/src/iai_mcp/fsm_reconcile.py index c148d1a..802a8e5 100644 --- a/src/iai_mcp/fsm_reconcile.py +++ b/src/iai_mcp/fsm_reconcile.py @@ -17,7 +17,7 @@ def _read_canonical(path: Path) -> str | None: if not path.exists(): return None try: - raw = json.loads(path.read_text()) + raw = json.loads(path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return None if not isinstance(raw, dict): @@ -30,7 +30,7 @@ def _read_legacy(path: Path) -> str | None: if not path.exists(): return None try: - raw = json.loads(path.read_text()) + raw = json.loads(path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return None if not isinstance(raw, dict): @@ -55,7 +55,7 @@ def _auto_correct_legacy(legacy_path: Path, canonical_state: str) -> bool: try: raw: dict = {} if legacy_path.exists(): - raw = json.loads(legacy_path.read_text()) + raw = json.loads(legacy_path.read_text(encoding="utf-8")) if not isinstance(raw, dict): raw = {} except (OSError, json.JSONDecodeError): diff --git a/src/iai_mcp/hippo/__init__.py b/src/iai_mcp/hippo/__init__.py index 280cab4..482a5f3 100644 --- a/src/iai_mcp/hippo/__init__.py +++ b/src/iai_mcp/hippo/__init__.py @@ -3,7 +3,6 @@ import contextlib import enum import errno -import fcntl import logging import os import re diff --git a/src/iai_mcp/hippo/_db.py b/src/iai_mcp/hippo/_db.py index 70b51a7..a47b3db 100644 --- a/src/iai_mcp/hippo/_db.py +++ b/src/iai_mcp/hippo/_db.py @@ -4,7 +4,6 @@ import contextlib import errno -import fcntl import logging import os import re @@ -20,6 +19,7 @@ import numpy as np import pyarrow as pa +from iai_mcp._filelock import LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN, flock from iai_mcp.crypto import ( decrypt_field, encrypt_field, @@ -190,7 +190,7 @@ def _acquire_exclusive_lock(self) -> None: ) os.chmod(str(self._lock_path), 0o600) try: - fcntl.flock(base_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + flock(base_fd, LOCK_EX | LOCK_NB) except OSError as exc: os.close(base_fd) if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK): @@ -250,7 +250,7 @@ def _acquire_shared_lock( continue try: - fcntl.flock(base_fd, fcntl.LOCK_SH | fcntl.LOCK_NB) + flock(base_fd, LOCK_SH | LOCK_NB) except OSError as exc: if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK): if time.monotonic() >= deadline: @@ -261,7 +261,7 @@ def _acquire_shared_lock( raise if _intent_path.exists(): - fcntl.flock(base_fd, fcntl.LOCK_UN) + flock(base_fd, LOCK_UN) if time.monotonic() >= deadline: break time.sleep(_SHARED_RETRY_SLEEP_S) @@ -277,7 +277,7 @@ def _acquire_shared_lock( with _PROCESS_LOCKS_GUARD: held_sh = _PROCESS_LOCKS_SHARED.get(self._lock_key) if held_sh is not None: - fcntl.flock(base_fd, fcntl.LOCK_UN) + flock(base_fd, LOCK_UN) os.close(base_fd) base_fd2, refcount2 = held_sh self._lock_fd = os.dup(base_fd2) @@ -303,7 +303,7 @@ def downgrade_to_shared(self) -> None: return base_fd, refcount = held try: - fcntl.flock(base_fd, fcntl.LOCK_SH) + flock(base_fd, LOCK_SH) except OSError: return del _PROCESS_LOCKS[self._lock_key] @@ -344,7 +344,7 @@ def escalate_to_exclusive(self, intent_budget_ms: int = 4000) -> None: acquired = False while time.monotonic() < deadline: try: - fcntl.flock(base_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + flock(base_fd, LOCK_EX | LOCK_NB) acquired = True break except OSError as exc: @@ -672,7 +672,7 @@ def ingest_pending_embeddings(self) -> int: _log.warning("ingest_pending_embeddings: malformed .npy %s, skipping", npy_path) continue vec = list(_struct.unpack(f"<{n_floats}f", vec_bytes)) - meta = _json.loads(json_path.read_text()) + meta = _json.loads(json_path.read_text(encoding="utf-8")) vec_label = int(meta["vec_label"]) except Exception as exc: # noqa: BLE001 _log.warning("ingest_pending_embeddings: failed to load %s: %s", npy_path, exc) @@ -1031,7 +1031,7 @@ def close(self) -> None: base_fd, refcount = held if refcount <= 1: try: - fcntl.flock(base_fd, fcntl.LOCK_UN) + flock(base_fd, LOCK_UN) except Exception: # noqa: BLE001 pass try: diff --git a/src/iai_mcp/lifecycle.py b/src/iai_mcp/lifecycle.py index a55d68b..29a5e47 100644 --- a/src/iai_mcp/lifecycle.py +++ b/src/iai_mcp/lifecycle.py @@ -2,7 +2,6 @@ import asyncio import errno -import fcntl import os from contextlib import contextmanager from datetime import datetime, timezone @@ -10,6 +9,7 @@ from pathlib import Path from typing import Any, Iterator +from iai_mcp._filelock import LOCK_EX, LOCK_NB, LOCK_UN, flock from iai_mcp.lifecycle_event_log import LifecycleEventLog from iai_mcp.lifecycle_state import ( LIFECYCLE_STATE_PATH, @@ -92,7 +92,7 @@ def _lifecycle_lock(lock_path: Path) -> Iterator[int]: fd = os.open(str(lock_path), os.O_RDWR | os.O_CREAT, 0o600) try: try: - fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + flock(fd, LOCK_EX | LOCK_NB) except OSError as exc: if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK): raise LifecycleStateLocked( @@ -103,7 +103,7 @@ def _lifecycle_lock(lock_path: Path) -> Iterator[int]: yield fd finally: try: - fcntl.flock(fd, fcntl.LOCK_UN) + flock(fd, LOCK_UN) except OSError: pass finally: diff --git a/src/iai_mcp/lifecycle_event_log.py b/src/iai_mcp/lifecycle_event_log.py index 1ee02fd..2c9b1ee 100644 --- a/src/iai_mcp/lifecycle_event_log.py +++ b/src/iai_mcp/lifecycle_event_log.py @@ -1,12 +1,13 @@ from __future__ import annotations import errno -import fcntl import gzip import json import os import shutil from datetime import datetime, timedelta, timezone + +from iai_mcp._filelock import LOCK_EX, LOCK_UN, flock from pathlib import Path from typing import Any @@ -73,12 +74,12 @@ def append(self, event: dict[str, Any], now: datetime | None = None) -> None: 0o600, ) try: - fcntl.flock(fd, fcntl.LOCK_EX) + flock(fd, LOCK_EX) try: os.write(fd, line.encode("utf-8")) os.fsync(fd) finally: - fcntl.flock(fd, fcntl.LOCK_UN) + flock(fd, LOCK_UN) finally: os.close(fd) @@ -127,7 +128,7 @@ def read_all(self, date_str: str | None = None) -> list[dict[str, Any]]: if not target.exists(): return [] out: list[dict[str, Any]] = [] - with target.open("r") as f: + with target.open("r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: diff --git a/src/iai_mcp/lifecycle_lock.py b/src/iai_mcp/lifecycle_lock.py index 699d2ba..03d5fbe 100644 --- a/src/iai_mcp/lifecycle_lock.py +++ b/src/iai_mcp/lifecycle_lock.py @@ -3,6 +3,7 @@ import json import logging import os +import platform import socket import tempfile from datetime import datetime, timezone @@ -50,12 +51,17 @@ def _is_pid_alive(pid: int) -> bool: if pid <= 0: return False - try: - os.kill(pid, 0) - except ProcessLookupError: - return False - except PermissionError: - return True + # os.kill(pid, 0) is the POSIX liveness idiom, but on Windows os.kill + # rejects signal 0 with OSError [WinError 87] (invalid parameter). Skip + # the probe there and rely on the psutil refinement below, which both + # confirms the pid exists and that it is actually an iai_mcp.daemon. + if platform.system() != "Windows": + try: + os.kill(pid, 0) + except ProcessLookupError: + return False + except PermissionError: + return True try: import psutil diff --git a/src/iai_mcp/lifecycle_state.py b/src/iai_mcp/lifecycle_state.py index 1abecd3..a47a61e 100644 --- a/src/iai_mcp/lifecycle_state.py +++ b/src/iai_mcp/lifecycle_state.py @@ -137,7 +137,7 @@ def load_state(path: Path | None = None) -> LifecycleStateRecord: if not target.exists(): return default_state() try: - raw = json.loads(target.read_text()) + raw = json.loads(target.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return default_state() try: diff --git a/src/iai_mcp/lock_protocol.py b/src/iai_mcp/lock_protocol.py index b105406..535eab4 100644 --- a/src/iai_mcp/lock_protocol.py +++ b/src/iai_mcp/lock_protocol.py @@ -1,11 +1,12 @@ from __future__ import annotations import errno -import fcntl import logging import os from pathlib import Path +from iai_mcp._filelock import LOCK_NB, LOCK_SH, flock + logger = logging.getLogger(__name__) @@ -55,7 +56,7 @@ def acquire_client_shared_nb(fd: int, lock_path: Path) -> bool: return False try: - fcntl.flock(fd, fcntl.LOCK_SH | fcntl.LOCK_NB) + flock(fd, LOCK_SH | LOCK_NB) return True except OSError as exc: if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK): diff --git a/src/iai_mcp/memory_bank.py b/src/iai_mcp/memory_bank.py index e1889c2..b703560 100644 --- a/src/iai_mcp/memory_bank.py +++ b/src/iai_mcp/memory_bank.py @@ -225,7 +225,8 @@ def append_recent_record( 0o600, ) try: - os.fchmod(fd, 0o600) + if hasattr(os, "fchmod"): + os.fchmod(fd, 0o600) os.write(fd, line) os.fsync(fd) finally: diff --git a/src/iai_mcp/migrate/_reembed.py b/src/iai_mcp/migrate/_reembed.py index c25fb17..66ff9bf 100644 --- a/src/iai_mcp/migrate/_reembed.py +++ b/src/iai_mcp/migrate/_reembed.py @@ -74,7 +74,7 @@ def _progress_read(store: MemoryStore) -> dict: if not path.exists(): return {} try: - return json.loads(path.read_text()) + return json.loads(path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError, ValueError): return {} diff --git a/src/iai_mcp/provenance_buffer.py b/src/iai_mcp/provenance_buffer.py index 1d5bc3c..7d0d4ff 100644 --- a/src/iai_mcp/provenance_buffer.py +++ b/src/iai_mcp/provenance_buffer.py @@ -33,7 +33,7 @@ def defer_provenance( "cue": cue, "session_id": session_id, })) - with open(path, "a") as f: + with open(path, "a", encoding="utf-8") as f: f.write("\n".join(lines) + "\n") @@ -42,7 +42,7 @@ def flush_deferred_provenance(store: MemoryStore) -> int: if not path.exists(): return 0 try: - with open(path) as f: + with open(path, encoding="utf-8") as f: raw_lines = f.read().strip().splitlines() except OSError: return 0 @@ -72,7 +72,7 @@ def flush_deferred_provenance(store: MemoryStore) -> int: return 0 try: - path.write_text("") + path.write_text("", encoding="utf-8") except OSError: pass return len(pairs) diff --git a/src/iai_mcp/provenance_queue.py b/src/iai_mcp/provenance_queue.py index a8c076c..d59fadb 100644 --- a/src/iai_mcp/provenance_queue.py +++ b/src/iai_mcp/provenance_queue.py @@ -124,7 +124,7 @@ def _spill_to_disk(self, pairs: list) -> None: with tmp_path.open("w", encoding="utf-8") as fh: for rid, entry in pairs: fh.write(json.dumps({"id": str(rid), "entry": entry}) + "\n") - tmp_path.rename(fpath) + tmp_path.replace(fpath) except (OSError, TypeError, ValueError) as exc: logger.warning("provenance_queue_spill_failed", extra={"err": str(exc)[:200], "n_pairs": len(pairs)}) try: @@ -164,7 +164,7 @@ def _drain_overflow_dir(self) -> int: logger.warning("provenance_queue_spill_drain_failed", extra={"err": str(exc)[:200]}) try: failed = fpath.with_suffix(f".failed-{int(time.time())}.jsonl") - fpath.rename(failed) + fpath.replace(failed) sys.stderr.write( '{"event":"provenance_queue_spill_drain_failed","error":' + _json_str(str(exc)) + '}\n' diff --git a/src/iai_mcp/runtime_graph_cache.py b/src/iai_mcp/runtime_graph_cache.py index add150a..b5029e0 100644 --- a/src/iai_mcp/runtime_graph_cache.py +++ b/src/iai_mcp/runtime_graph_cache.py @@ -3,6 +3,7 @@ import json import logging import os +import platform import sys import threading from datetime import datetime, timezone @@ -113,6 +114,16 @@ def _get_persistent_graph(): _WORKER_TIMEOUT_MAX_S: float = 3600.0 _first_spawn_seen: bool = False +# Windows `multiprocessing` spawn is broken for the RGC worker: the spawn child +# launches under the venv's *base* interpreter (`sys._base_executable`, e.g. +# `...\Python312\pythonw.exe`) rather than the venv interpreter, re-imports the +# heavy `iai_mcp.daemon` module, and hangs well past the watchdog timeout — +# killing it took the parent daemon down with it. `ctx.set_executable(...)` does +# not override the base-interpreter selection (verified empirically). On Windows +# we therefore run the worker in an in-process daemon thread (`_ThreadWorkerHandle`) +# instead of spawning; POSIX keeps the spawned subprocess unchanged. +_IS_WINDOWS: bool = platform.system() == "Windows" + class WorkerCrashedError(RuntimeError): """Child worker exited with a non-zero exit code.""" @@ -157,6 +168,69 @@ def _terminate_worker(process) -> None: pass +class _ThreadWorkerHandle: + """In-process stand-in for a spawned worker `Process`, used on Windows. + + Runs `_worker_entry` in a daemon thread of the current process and exposes + the subset of the `multiprocessing.Process` API the rebuild path touches + (`start`, `is_alive`, `join`, `exitcode`, `terminate`, `kill`), so the + surrounding spawn/drain logic is reused verbatim. See the `_IS_WINDOWS` + note above for why spawn cannot be used here. + + Trade-off: the worker no longer runs in a separate address space, so the + fat per-rebuild allocations live in the daemon heap until they fall out of + scope and are GC'd, rather than being reclaimed by process exit. The + rebuild is a periodic sleep-time operation, not a hot path, so this is an + acceptable cost for correctness. The AES-key isolation the subprocess gave + is moot in-process anyway; the worker module still never imports the + storage/crypto surface, so no key is reachable through it. + """ + + def __init__(self, target, conn) -> None: + self._target = target + self._conn = conn + self._exitcode: int | None = None + self._thread = threading.Thread(target=self._run, daemon=True) + + def _run(self) -> None: + try: + self._target(self._conn) + self._exitcode = 0 + except SystemExit as exc: + # The worker calls sys.exit(1) on its error path; map that to a + # non-zero exitcode so the parent's crash check fires as it would + # for a subprocess. + self._exitcode = exc.code if isinstance(exc.code, int) else 1 + except BaseException: # noqa: BLE001 -- mirror a non-zero subprocess exit + self._exitcode = 1 + + def start(self) -> None: + self._thread.start() + + def is_alive(self) -> bool: + return self._thread.is_alive() + + def join(self, timeout: float | None = None) -> None: + self._thread.join(timeout) + + @property + def exitcode(self) -> int | None: + # Mirror Process.exitcode: None while the worker is still running. + if self._thread.is_alive(): + return None + return self._exitcode + + def terminate(self) -> None: + # A Python thread cannot be force-killed. On the normal path the worker + # unwinds when the parent closes its pipe end (EOFError); on a genuine + # compute hang the daemon thread is left to finish in the background + # (daemon=True, so it never blocks interpreter shutdown). + pass + + def kill(self) -> None: + pass + + def _drain_worker_result(parent_conn, timeout: float) -> dict: """Drain the chunked compact result envelope into a parent-side dict. @@ -953,20 +1027,27 @@ def _rebuild_and_save_rgc(store: Any, *, force: bool = False) -> dict: except Exception: # noqa: BLE001 est_node_count = 0 - # Spawn the worker. Spawn-context (not fork) so the child re-imports - # cleanly on macOS and Linux; the child closes its end after start so - # the parent does not hold a half-of-pipe alive on crash detection. + # Start the worker. On POSIX we spawn a subprocess (spawn-context, not + # fork, so the child re-imports cleanly on macOS and Linux) and close + # the parent's copy of the child end so we don't hold half a pipe alive + # on crash detection. On Windows spawn is broken for this worker (see + # `_IS_WINDOWS`), so we run it in an in-process daemon thread and keep + # child_conn open — the in-process worker owns that same Connection. first_spawn_flag = not _first_spawn_seen timeout_s = _resolve_timeout(est_node_count) ctx = multiprocessing.get_context("spawn") parent_conn, child_conn = ctx.Pipe(duplex=True) - process = ctx.Process( - target=_worker_entry_indirection, - args=(child_conn,), - daemon=True, - ) - process.start() - child_conn.close() + if _IS_WINDOWS: + process = _ThreadWorkerHandle(_worker_entry_indirection, child_conn) + process.start() + else: + process = ctx.Process( + target=_worker_entry_indirection, + args=(child_conn,), + daemon=True, + ) + process.start() + child_conn.close() db_path = store.db._hippo_dir / "brain.sqlite3" ro_conn = None diff --git a/src/iai_mcp/semantic_recall.py b/src/iai_mcp/semantic_recall.py index b6bda69..8278e2b 100644 --- a/src/iai_mcp/semantic_recall.py +++ b/src/iai_mcp/semantic_recall.py @@ -66,17 +66,12 @@ def _send_embed_cue_rpc(cue: str, timeout_ms: int) -> "list[float] | None": import asyncio import json - from iai_mcp.concurrency import SOCKET_PATH - - sock_path = os.environ.get("IAI_DAEMON_SOCKET_PATH") or str(SOCKET_PATH) + from iai_mcp._ipc import open_ipc_connection connect_timeout = timeout_ms / 1000.0 async def _runner() -> "list[float] | None": try: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(sock_path), - timeout=connect_timeout, - ) + reader, writer = await open_ipc_connection(timeout=connect_timeout) except (FileNotFoundError, ConnectionRefusedError, OSError, asyncio.TimeoutError): return None try: diff --git a/src/iai_mcp/sleep_wal.py b/src/iai_mcp/sleep_wal.py index 3b51619..0394bac 100644 --- a/src/iai_mcp/sleep_wal.py +++ b/src/iai_mcp/sleep_wal.py @@ -97,7 +97,7 @@ def pending_entries(self) -> list[WALEntry]: return [] entries: dict[str, WALEntry] = {} try: - with open(self.path) as f: + with open(self.path, encoding="utf-8") as f: for line in f: line = line.strip() if not line: @@ -119,7 +119,7 @@ def cleanup(self, max_age_hours: int = 168) -> int: kept: list[str] = [] removed = 0 try: - with open(self.path) as f: + with open(self.path, encoding="utf-8") as f: for line in f: line = line.strip() if not line: @@ -134,7 +134,7 @@ def cleanup(self, max_age_hours: int = 168) -> int: continue kept.append(line) if removed > 0: - self.path.write_text("\n".join(kept) + "\n" if kept else "") + self.path.write_text("\n".join(kept) + "\n" if kept else "", encoding="utf-8") except OSError: pass return removed @@ -142,7 +142,7 @@ def cleanup(self, max_age_hours: int = 168) -> int: def _append(self, entry: WALEntry) -> None: try: self.path.parent.mkdir(parents=True, exist_ok=True) - with open(self.path, "a") as f: + with open(self.path, "a", encoding="utf-8") as f: f.write(json.dumps(entry.to_dict()) + "\n") except OSError as exc: logger.warning("WAL write failed: %s", exc) diff --git a/src/iai_mcp/socket_server.py b/src/iai_mcp/socket_server.py index 2d21108..22dace7 100644 --- a/src/iai_mcp/socket_server.py +++ b/src/iai_mcp/socket_server.py @@ -10,6 +10,7 @@ from pathlib import Path from typing import Any +from iai_mcp._ipc import IS_WINDOWS, cleanup_ipc_address, open_ipc_connection, shutdown_ipc, start_ipc_server from iai_mcp.concurrency import SOCKET_PATH, cleanup_stale_socket from iai_mcp.core import UnknownMethodError @@ -205,30 +206,32 @@ async def handle( async def serve(self, socket_path: Path | None = None) -> None: + if IS_WINDOWS: + # Windows: TCP server on loopback; socket_path is unused + server, actual_addr, needs_cleanup = await start_ipc_server(self.handle) + try: + async with server: + await self.shutdown_event.wait() + server.close() + await server.wait_closed() + finally: + if needs_cleanup: + shutdown_ipc(actual_addr) + return + if socket_path is None: env_path = os.environ.get("IAI_DAEMON_SOCKET_PATH") socket_path = Path(env_path) if env_path else SOCKET_PATH - sig = inspect.signature(asyncio.start_unix_server) - supports_cleanup_socket = "cleanup_socket" in sig.parameters - inherited = _inherit_activated_socket() if inherited is not None: - server = await asyncio.start_unix_server( - self.handle, - sock=inherited, - ) + server = await asyncio.start_unix_server(self.handle, sock=inherited) + needs_cleanup = False + actual_addr: Any = str(socket_path) else: - cleanup_stale_socket(socket_path) + cleanup_ipc_address(socket_path) socket_path.parent.mkdir(parents=True, exist_ok=True) - server_kwargs: dict[str, Any] = ( - {"cleanup_socket": True} if supports_cleanup_socket else {} - ) - server = await asyncio.start_unix_server( - self.handle, - path=str(socket_path), - **server_kwargs, - ) + server, actual_addr, needs_cleanup = await start_ipc_server(self.handle, socket_path) try: os.chmod(str(socket_path), 0o600) except OSError: @@ -240,8 +243,5 @@ async def serve(self, socket_path: Path | None = None) -> None: server.close() await server.wait_closed() finally: - if inherited is None and not supports_cleanup_socket: - try: - socket_path.unlink() - except (FileNotFoundError, OSError): - pass + if inherited is None and needs_cleanup: + shutdown_ipc(actual_addr) diff --git a/src/iai_mcp/tz.py b/src/iai_mcp/tz.py index 027199b..6106921 100644 --- a/src/iai_mcp/tz.py +++ b/src/iai_mcp/tz.py @@ -33,7 +33,7 @@ def _seed_config(cfg_path: Path, tz_key: str) -> None: existing: dict = {} if cfg_path.exists(): try: - with open(cfg_path) as f: + with open(cfg_path, encoding="utf-8") as f: existing = json.load(f) if not isinstance(existing, dict): existing = {} @@ -44,7 +44,7 @@ def _seed_config(cfg_path: Path, tz_key: str) -> None: existing["user"] = {} existing["user"]["timezone"] = tz_key tmp = cfg_path.with_suffix(".tmp") - with open(tmp, "w") as f: + with open(tmp, "w", encoding="utf-8") as f: json.dump(existing, f, indent=2) os.replace(tmp, cfg_path) @@ -53,7 +53,7 @@ def load_user_tz() -> ZoneInfo: cfg_path = _config_path() if cfg_path.exists(): try: - with open(cfg_path) as f: + with open(cfg_path, encoding="utf-8") as f: cfg = json.load(f) except (json.JSONDecodeError, OSError): cfg = None diff --git a/src/iai_mcp/user_model.py b/src/iai_mcp/user_model.py index 4480f4f..743e157 100644 --- a/src/iai_mcp/user_model.py +++ b/src/iai_mcp/user_model.py @@ -41,7 +41,7 @@ def load() -> UserModel: if not path.exists(): return default() try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return default() diff --git a/tests/_socket_test_helpers.py b/tests/_socket_test_helpers.py new file mode 100644 index 0000000..5b166f1 --- /dev/null +++ b/tests/_socket_test_helpers.py @@ -0,0 +1,87 @@ +"""Cross-platform fake-daemon socket binding for tests. + +Production code reaches the daemon via ``iai_mcp._ipc``: on POSIX a unix-domain +socket at ``IAI_DAEMON_SOCKET_PATH``; on Windows TCP loopback with the port +persisted to ``".port"``. Tests that stand up a *raw* +fake daemon socket (to simulate stalls, fast replies, dead endpoints, etc.) +must bind the matching transport so the production client actually connects to +them. This helper hides the per-platform binding; callers keep their own +accept/recv/reply logic unchanged. +""" +from __future__ import annotations + +import os +import secrets +import socket +from pathlib import Path + +from iai_mcp._ipc import IS_WINDOWS + + +def write_fake_daemon_token(sock_path) -> None: + """Write an auth token alongside a fake daemon socket so the production + client's mandatory Windows handshake (see ``_ipc._send_token_async``) finds + one. The raw fake servers don't validate it, so any value works. No-op on + POSIX, where access control is the unix-socket file permissions.""" + if IS_WINDOWS: + Path(f"{sock_path}.token").write_text(secrets.token_hex(16), encoding="utf-8") + + +def send_daemon_token(sock: socket.socket, sock_path) -> None: + """Send the auth token as the first line on a *raw* client socket, matching + the daemon's Windows handshake. Reads ``.token`` (written by the + daemon or by ``write_fake_daemon_token``). No-op on POSIX.""" + if IS_WINDOWS: + token = Path(f"{sock_path}.token").read_text(encoding="utf-8").strip() + sock.sendall((token + "\n").encode("utf-8")) + + +def bind_fake_daemon_socket(sock_path) -> socket.socket: + """Return a bound, listening socket that an ``_ipc`` client configured with + ``IAI_DAEMON_SOCKET_PATH=sock_path`` will connect to. + + POSIX: ``AF_UNIX`` bound at ``sock_path``. Windows: ``AF_INET`` on + ``127.0.0.1:`` with the chosen port written to + ``".port"`` (matching ``_ipc._port_file_path``). Caller owns the + returned socket (``accept``/``recv``/``close``). + """ + if IS_WINDOWS: + srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + srv.bind(("127.0.0.1", 0)) + port = srv.getsockname()[1] + Path(f"{sock_path}.port").write_text(str(port), encoding="utf-8") + write_fake_daemon_token(sock_path) + else: + try: + os.unlink(sock_path) + except FileNotFoundError: + pass + srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + srv.bind(str(sock_path)) + srv.listen(5) + return srv + + +def daemon_endpoint_ready_path(sock_path) -> Path: + """Path that exists once a daemon bound at ``sock_path`` is reachable: the + unix socket file on POSIX, the ``.port`` file on Windows.""" + return Path(f"{sock_path}.port") if IS_WINDOWS else Path(sock_path) + + +def daemon_endpoint(sock_path): + """Connect target for a daemon bound at ``sock_path``: the unix socket path + (POSIX) or ``("127.0.0.1", port)`` read from ``.port`` (Windows). + Raises ``FileNotFoundError`` if the Windows port file is absent.""" + if IS_WINDOWS: + port = int(Path(f"{sock_path}.port").read_text(encoding="utf-8").strip()) + return ("127.0.0.1", port) + return str(sock_path) + + +def new_daemon_client_socket() -> socket.socket: + """A raw client socket of the right family for the current platform + (``AF_INET`` on Windows, ``AF_UNIX`` on POSIX).""" + family = socket.AF_INET if IS_WINDOWS else socket.AF_UNIX + return socket.socket(family, socket.SOCK_STREAM) diff --git a/tests/test_bridge_socket_first.py b/tests/test_bridge_socket_first.py index 66113dc..7a563b8 100644 --- a/tests/test_bridge_socket_first.py +++ b/tests/test_bridge_socket_first.py @@ -176,7 +176,7 @@ def _wait_for_daemon_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool: def test_start_throws_DaemonUnreachableError_when_socket_missing( built_wrapper, tmp_path ): - sock_dir = Path(f"/tmp/iai-7.1-noconn-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" store_dir = sock_dir / "store" @@ -294,7 +294,7 @@ def test_start_throws_DaemonUnreachableError_when_socket_missing( def test_start_succeeds_with_warm_daemon_no_extra_spawn(built_wrapper, tmp_path): - sock_dir = Path(f"/tmp/iai-7.1-warm-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" store_dir = sock_dir / "store" diff --git a/tests/test_capture.py b/tests/test_capture.py index 933110b..223b1c6 100644 --- a/tests/test_capture.py +++ b/tests/test_capture.py @@ -5,6 +5,8 @@ """ from __future__ import annotations +import tempfile + import json import platform import uuid @@ -124,7 +126,7 @@ def test_deferred_capture_beyond_200(iai_home, tmp_path): out_path = write_deferred_captures( session_id=SESSION_ID, transcript_path=transcript, - cwd="/tmp/test", + cwd=str(Path(tempfile.gettempdir()) / "test"), ) assert out_path.exists(), f"Deferred capture file not created at {out_path}" diff --git a/tests/test_capture_queue.py b/tests/test_capture_queue.py index 94875e3..134f913 100644 --- a/tests/test_capture_queue.py +++ b/tests/test_capture_queue.py @@ -1,8 +1,9 @@ from __future__ import annotations import errno -import fcntl import json +from iai_mcp._filelock import LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN +from iai_mcp._filelock import flock as _flock import os import threading import time @@ -171,7 +172,7 @@ def test_idempotent_ingest_lock_skipped(tmp_path): lock_a = tmp_path / f"pending-{ulid_a}.lock" fd = os.open(str(lock_a), os.O_WRONLY | os.O_CREAT, 0o600) try: - fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + _flock(fd, LOCK_EX | LOCK_NB) seen: list[str] = [] @@ -186,7 +187,7 @@ def handler(record: dict) -> None: assert not (tmp_path / f"pending-{ulid_c}.json").exists() finally: try: - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) except OSError: pass os.close(fd) diff --git a/tests/test_capture_source_uuid_idem.py b/tests/test_capture_source_uuid_idem.py index 328559e..f7bd137 100644 --- a/tests/test_capture_source_uuid_idem.py +++ b/tests/test_capture_source_uuid_idem.py @@ -1,5 +1,7 @@ from __future__ import annotations +import tempfile + import json import platform from datetime import datetime, timezone @@ -240,7 +242,7 @@ def test_drain_deferred_deduplicates_already_inserted_uuid(iai_home): "version": 1, "deferred_at": "2026-05-31T18:00:00.000Z", "session_id": SESSION, - "cwd": "/tmp/test", + "cwd": str(Path(tempfile.gettempdir()) / "test"), } event = { "text": TEXT, diff --git a/tests/test_capture_transcript_no_spawn.py b/tests/test_capture_transcript_no_spawn.py index 85e822a..9c9541a 100644 --- a/tests/test_capture_transcript_no_spawn.py +++ b/tests/test_capture_transcript_no_spawn.py @@ -37,7 +37,7 @@ def _count_iai_mcp_processes() -> dict[str, int]: def _isolated_env(tmp_path: Path) -> tuple[dict[str, str], Path]: - sock_dir = Path(f"/tmp/iai-no-spawn-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" diff --git a/tests/test_capture_transcript_no_spawn_defer.py b/tests/test_capture_transcript_no_spawn_defer.py index 302d954..4d0e520 100644 --- a/tests/test_capture_transcript_no_spawn_defer.py +++ b/tests/test_capture_transcript_no_spawn_defer.py @@ -20,7 +20,7 @@ def _isolated_env(tmp_path: Path) -> tuple[dict[str, str], Path, Path]: - sock_dir = Path(f"/tmp/iai-no-spawn-defer-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" diff --git a/tests/test_cli_crypto.py b/tests/test_cli_crypto.py index e79cfa5..9dc0a37 100644 --- a/tests/test_cli_crypto.py +++ b/tests/test_cli_crypto.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import json import os import secrets @@ -30,7 +32,8 @@ def test_cli_crypto_status_shows_file_backend(tmp_path, monkeypatch, capsys): assert "default" in out assert "file" in out_lower, f"status must report backend=file; got:\n{out}" assert ".crypto.key" in out, f"status must include the file path; got:\n{out}" - assert "600" in out, f"status must expose mode 0o600; got:\n{out}" + if sys.platform != "win32": + assert "600" in out, f"status must expose mode 0o600; got:\n{out}" assert "keyring" not in out_lower, ( f"status must NOT mention keyring (backend retired in 07.10); got:\n{out}" ) @@ -89,7 +92,8 @@ def test_cli_crypto_rotate_regenerates_key(tmp_path, monkeypatch, capsys): assert len(new_key_bytes) == 32 assert new_key_bytes != key_a, "rotate must write a fresh key to the file" mode = stat.S_IMODE(os.stat(key_path).st_mode) - assert mode == 0o600, f"rotated key file must be 0o600, got 0o{mode:03o}" + if sys.platform != "win32": + assert mode == 0o600, f"rotated key file must be 0o600, got 0o{mode:03o}" store2 = MemoryStore() post_ct = store2.db.open_table(RECORDS_TABLE).to_pandas()[ @@ -260,7 +264,8 @@ def test_cli_crypto_init_creates_fresh_file(tmp_path, monkeypatch, capsys): assert key_path.exists() assert key_path.stat().st_size == 32 mode = stat.S_IMODE(os.stat(key_path).st_mode) - assert mode == 0o600, f"init key file must be 0o600, got 0o{mode:03o}" + if sys.platform != "win32": + assert mode == 0o600, f"init key file must be 0o600, got 0o{mode:03o}" assert ".crypto.key" in out raw = key_path.read_bytes() for i in range(0, 32, 4): diff --git a/tests/test_cli_daemon.py b/tests/test_cli_daemon.py index 8140bb1..3342b9b 100644 --- a/tests/test_cli_daemon.py +++ b/tests/test_cli_daemon.py @@ -5,6 +5,7 @@ import json import os import platform +import signal import sys import tempfile import threading @@ -14,6 +15,7 @@ import pytest +from iai_mcp import _ipc from iai_mcp import cli as cli_mod @@ -60,10 +62,20 @@ async def _handle(reader, writer): pass async def _serve(): - self.path.parent.mkdir(parents=True, exist_ok=True) - self._server = await asyncio.start_unix_server( - _handle, path=str(self.path), - ) + # Mirror the production IPC transport (_ipc): Unix-domain + # socket on POSIX, TCP loopback + port file on Windows + # (asyncio.start_unix_server does not exist on Windows). + if _ipc.IS_WINDOWS: + self._server = await asyncio.start_server( + _handle, "127.0.0.1", 0, + ) + port = self._server.sockets[0].getsockname()[1] + _ipc._write_port(port) + else: + self.path.parent.mkdir(parents=True, exist_ok=True) + self._server = await asyncio.start_unix_server( + _handle, path=str(self.path), + ) self._ready.set() async with self._server: await self._server.serve_forever() @@ -96,13 +108,20 @@ async def _shutdown(): loop.call_soon_threadsafe(loop.stop) if self._thread is not None: self._thread.join(timeout=5.0) + if _ipc.IS_WINDOWS: + _ipc._remove_port_file() @pytest.fixture -def short_socket(tmp_path: Path) -> Path: +def short_socket(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: candidate = tmp_path / "d.sock" if len(str(candidate)) > 100: candidate = Path(tempfile.mkdtemp(prefix="iai-clitest-")) / "d.sock" + # On Windows the IPC layer rendezvous via a TCP port file, not the socket + # path. Redirect _ipc.PORT_FILE into the temp dir so the fake daemon and + # the CLI client (both reference this module global) find each other, + # without reading or clobbering a real daemon's ~/.iai-mcp/.daemon.port. + monkeypatch.setattr(_ipc, "PORT_FILE", candidate.parent / ".daemon.port") return candidate @@ -715,6 +734,10 @@ def test_stop_bootout_precedes_sigterm(monkeypatch: pytest.MonkeyPatch) -> None: assert ("kill", 4242, sig.SIGTERM) in calls +@pytest.mark.skipif( + not hasattr(signal, "SIGKILL"), + reason="SIGKILL escalation is POSIX-only; Windows os.kill has no SIGKILL", +) def test_stop_escalates_to_sigkill_when_pid_survives( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -740,6 +763,10 @@ def test_stop_escalates_to_sigkill_when_pid_survives( assert ("kill", 5151, sig.SIGTERM) in calls +@pytest.mark.skipif( + not hasattr(signal, "SIGKILL"), + reason="SIGKILL escalation is POSIX-only; Windows os.kill has no SIGKILL", +) def test_stop_no_sigkill_when_pid_dies_during_wait( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/test_cli_ensure_crypto_key_present.py b/tests/test_cli_ensure_crypto_key_present.py index 73ad5c5..fd6c686 100644 --- a/tests/test_cli_ensure_crypto_key_present.py +++ b/tests/test_cli_ensure_crypto_key_present.py @@ -2,6 +2,7 @@ import os import stat +import sys import pytest @@ -21,7 +22,8 @@ def test_ensure_crypto_key_generates_on_fresh_install(tmp_path, monkeypatch): assert path is not None assert path.exists() assert path.stat().st_size == 32 - assert stat.S_IMODE(path.stat().st_mode) == 0o600 + if sys.platform != "win32": + assert stat.S_IMODE(path.stat().st_mode) == 0o600 def test_ensure_crypto_key_idempotent_when_file_exists(tmp_path, monkeypatch): diff --git a/tests/test_concurrency.py b/tests/test_concurrency.py index 885b877..4a3ce08 100644 --- a/tests/test_concurrency.py +++ b/tests/test_concurrency.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import asyncio import json import os @@ -7,14 +9,24 @@ import pytest +from iai_mcp._ipc import IS_WINDOWS, open_ipc_connection + + +def _endpoint_ready_path(sock_path: Path) -> Path: + """Path that exists once the control socket has bound: the unix socket on + POSIX, the TCP port file (``.port``) on Windows.""" + return Path(f"{sock_path}.port") if IS_WINDOWS else sock_path + @pytest.fixture def socket_path(tmp_path, monkeypatch): from iai_mcp import concurrency - sock_dir = Path(f"/tmp/iai-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path) + # Per-test endpoint isolation honored by start_ipc_server/open_ipc_connection. + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path)) try: yield sock_path finally: @@ -40,13 +52,14 @@ async def runner(): serve_control_socket(store=None, state=state, shutdown=shutdown, socket_path=socket_path) ) + ready_path = _endpoint_ready_path(socket_path) for _ in range(100): - if socket_path.exists(): + if ready_path.exists(): break await asyncio.sleep(0.02) - assert socket_path.exists(), "socket never bound" + assert ready_path.exists(), "socket never bound" - reader, writer = await asyncio.open_unix_connection(path=str(socket_path)) + reader, writer = await open_ipc_connection() writer.write(b'{"type":"status"}\n') await writer.drain() line = await reader.readline() @@ -90,15 +103,16 @@ async def runner(): dispatcher=custom_dispatcher, socket_path=socket_path, ) ) + ready_path = _endpoint_ready_path(socket_path) for _ in range(100): - if socket_path.exists(): + if ready_path.exists(): break await asyncio.sleep(0.02) - assert socket_path.exists() + assert ready_path.exists() responses = [] for req in requests: - r, w = await asyncio.open_unix_connection(path=str(socket_path)) + r, w = await open_ipc_connection() w.write((json.dumps(req) + "\n").encode()) await w.drain() line = await r.readline() @@ -120,6 +134,9 @@ async def runner(): assert resp == {"ok": True, "seen": req["type"]} +@pytest.mark.skipif( + IS_WINDOWS, reason="stale unix-socket-file cleanup is POSIX-only (Windows uses a TCP port file)" +) def test_stale_socket_cleanup(socket_path): from iai_mcp.concurrency import serve_control_socket @@ -155,6 +172,9 @@ async def runner(): assert resp.get("ok") is True +@pytest.mark.skipif( + IS_WINDOWS, reason="0o600 unix-socket-file mode is POSIX-only (Windows uses a TCP port file)" +) def test_socket_permissions_user_only(socket_path): from iai_mcp.concurrency import serve_control_socket @@ -174,4 +194,5 @@ async def runner(): return sock_mode sock_mode = asyncio.run(runner()) - assert sock_mode == 0o600, f"socket mode is {oct(sock_mode)}, expected 0o600" + if sys.platform != "win32": + assert sock_mode == 0o600, f"socket mode is {oct(sock_mode)}, expected 0o600" diff --git a/tests/test_concurrency_session_open.py b/tests/test_concurrency_session_open.py index 9a7cfab..3cf23b1 100644 --- a/tests/test_concurrency_session_open.py +++ b/tests/test_concurrency_session_open.py @@ -19,10 +19,13 @@ @pytest.fixture -def tmp_socket(tmp_path: Path) -> Path: +def tmp_socket(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: candidate = tmp_path / "d.sock" if len(str(candidate)) > 100: candidate = Path(tempfile.mkdtemp(prefix="iai-sock-")) / "d.sock" + # Per-test endpoint isolation: serve_control_socket + open_ipc_connection + # resolve through this (unix socket on POSIX, TCP ".port" on Windows). + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(candidate)) return candidate @@ -256,7 +259,9 @@ def stop(self) -> None: async def _send(path: Path, msg: dict, *, timeout: float = 5.0) -> dict: - reader, writer = await asyncio.open_unix_connection(str(path)) + from iai_mcp._ipc import open_ipc_connection + + reader, writer = await open_ipc_connection(timeout=timeout) try: writer.write((json.dumps(msg) + "\n").encode("utf-8")) await writer.drain() diff --git a/tests/test_concurrent_wrapper_spawn.py b/tests/test_concurrent_wrapper_spawn.py index 215d08e..5ef09e8 100644 --- a/tests/test_concurrent_wrapper_spawn.py +++ b/tests/test_concurrent_wrapper_spawn.py @@ -37,7 +37,7 @@ def test_launchagent(tmp_path): if os.environ.get("IAI_MCP_SKIP_LAUNCHCTL_TESTS") == "1": pytest.skip("IAI_MCP_SKIP_LAUNCHCTL_TESTS=1") - sock_dir = Path(f"/tmp/iai-cspawn-{os.getpid()}-{id(tmp_path) & 0xFFFFFF:x}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" if sock_path.exists(): diff --git a/tests/test_core_bedtime_inject.py b/tests/test_core_bedtime_inject.py index 956e6f7..92b3516 100644 --- a/tests/test_core_bedtime_inject.py +++ b/tests/test_core_bedtime_inject.py @@ -12,6 +12,7 @@ import pytest from iai_mcp import core +from iai_mcp._ipc import start_ipc_server class _ThreadedFakeDaemon: @@ -46,7 +47,7 @@ async def _handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> async def _serve() -> None: self.path.parent.mkdir(parents=True, exist_ok=True) - self._server = await asyncio.start_unix_server(_handle, path=str(self.path)) + self._server, _addr, _cleanup = await start_ipc_server(_handle) self._ready.set() async with self._server: await self._server.serve_forever() @@ -83,10 +84,13 @@ async def _shutdown() -> None: @pytest.fixture -def tmp_socket(tmp_path: Path) -> Path: +def tmp_socket(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: candidate = tmp_path / "d.sock" if len(str(candidate)) > 100: candidate = Path(tempfile.mkdtemp(prefix="iai-sock-")) / "d.sock" + # Per-test endpoint isolation: start_ipc_server + open_ipc_connection resolve + # through this (unix socket on POSIX, TCP ".port" on Windows). + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(candidate)) return candidate @@ -115,7 +119,8 @@ async def _handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> pass sock.parent.mkdir(parents=True, exist_ok=True) - return await asyncio.start_unix_server(_handle, path=str(sock)) + server, _addr, _cleanup = await start_ipc_server(_handle) + return server def test_consent_false_short_circuits_no_socket_touch( @@ -124,10 +129,12 @@ def test_consent_false_short_circuits_no_socket_touch( async def _explode(*args, **kwargs): raise AssertionError( - "C2 violation: asyncio.open_unix_connection reached with consent=False" + "C2 violation: daemon connection reached with consent=False" ) - monkeypatch.setattr(asyncio, "open_unix_connection", _explode) + # Patch the actual connection entry point core uses (cross-platform), not + # the POSIX-only asyncio.open_unix_connection. + monkeypatch.setattr("iai_mcp._ipc.open_ipc_connection", _explode) result = asyncio.run( core.handle_initiate_sleep_mode({"consent": False, "reason": "not ready"}) diff --git a/tests/test_crypto_file_backend.py b/tests/test_crypto_file_backend.py index 4272f53..91f2b97 100644 --- a/tests/test_crypto_file_backend.py +++ b/tests/test_crypto_file_backend.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import os import secrets import stat @@ -103,7 +105,8 @@ def test_try_file_set_writes_atomic_with_0o600(tmp_path: Path) -> None: assert key_path.exists() assert key_path.read_bytes() == payload mode = stat.S_IMODE(os.stat(key_path).st_mode) - assert mode == 0o600 + if sys.platform != "win32": + assert mode == 0o600 leftover_tmps = list(tmp_path.glob(".crypto.key.tmp.*")) assert leftover_tmps == [], f"leaked tmp files: {leftover_tmps}" @@ -174,7 +177,8 @@ def fake_delete(service: str, username: str) -> None: key_path = tmp_path / ".crypto.key" assert key_path.exists() mode = stat.S_IMODE(os.stat(key_path).st_mode) - assert mode == 0o600 + if sys.platform != "win32": + assert mode == 0o600 assert key_path.read_bytes() == keyring_key, ( "file contents must equal the round-tripped keyring key bytes" ) diff --git a/tests/test_daemon.py b/tests/test_daemon.py index 05a628c..268375a 100644 --- a/tests/test_daemon.py +++ b/tests/test_daemon.py @@ -23,7 +23,7 @@ def _short_socket_paths(tmp_path, monkeypatch): import os from iai_mcp import concurrency lock_path = tmp_path / ".lock" - sock_dir = Path(f"/tmp/iai-daemon-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path) diff --git a/tests/test_daemon_crash_loop_immunity.py b/tests/test_daemon_crash_loop_immunity.py index 85b7b0d..96154f1 100644 --- a/tests/test_daemon_crash_loop_immunity.py +++ b/tests/test_daemon_crash_loop_immunity.py @@ -12,10 +12,17 @@ import pytest +from _socket_test_helpers import ( + daemon_endpoint, + daemon_endpoint_ready_path, + new_daemon_client_socket, +) + @pytest.fixture def iai_home(tmp_path, monkeypatch): monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) # Path.home() reads USERPROFILE on Windows monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring") monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-crash-loop-passphrase") monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / ".iai-mcp" / "hippo")) @@ -217,6 +224,7 @@ def _stub(*_args: Any, **_kwargs: Any) -> dict: def test_socket_binds_before_drain_completes(tmp_path, monkeypatch, request): monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) # Path.home() reads USERPROFILE on Windows monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring") monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-bind-first-passphrase") monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / ".iai-mcp" / "hippo")) @@ -225,7 +233,7 @@ def test_socket_binds_before_drain_completes(tmp_path, monkeypatch, request): keyring.core._keyring_backend = None - tmp_socket = Path(f"/tmp/iai-test-{os.getpid()}-{int(time.time()*1000)}.sock") + tmp_socket = tmp_path / f"iai-test-{os.getpid()}.sock" monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(tmp_socket)) def _cleanup_socket(): @@ -270,11 +278,11 @@ async def _scenario() -> bool: if exc is not None: raise exc return False - if tmp_socket.exists(): + if daemon_endpoint_ready_path(tmp_socket).exists(): try: - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s = new_daemon_client_socket() s.settimeout(1.0) - await asyncio.to_thread(s.connect, str(tmp_socket)) + await asyncio.to_thread(s.connect, daemon_endpoint(tmp_socket)) s.close() snapshot["bound_at"] = time.monotonic() snapshot["drain_started"] = drain_state["started"] @@ -313,6 +321,7 @@ async def _scenario() -> bool: def test_atomic_claim_logs_generic_oserror(tmp_path, monkeypatch): monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("USERPROFILE", str(tmp_path)) # Path.home() reads USERPROFILE on Windows monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring") monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "p6-1-fix-a-test-passphrase") monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / ".iai-mcp" / "hippo")) @@ -330,14 +339,16 @@ def test_atomic_claim_logs_generic_oserror(tmp_path, monkeypatch): import pathlib as _pathlib - real_rename = _pathlib.Path.rename + real_replace = _pathlib.Path.replace def boom(self, target): if ".processing-" in str(target) and self == fpath: raise PermissionError("simulated EACCES on atomic claim") - return real_rename(self, target) + return real_replace(self, target) - monkeypatch.setattr(_pathlib.Path, "rename", boom) + # The atomic claim uses Path.replace (os.replace) — not rename — so the + # claim survives a pre-existing dest on Windows. Patch what the code calls. + monkeypatch.setattr(_pathlib.Path, "replace", boom) from iai_mcp.capture import drain_deferred_captures from iai_mcp.store import MemoryStore @@ -387,7 +398,8 @@ def test_strip_processing_marker_returns_false_on_rename_failure( def boom(self, target): raise PermissionError("simulated") - monkeypatch.setattr(_pathlib.Path, "rename", boom) + # _strip_processing_marker uses Path.replace (os.replace), not rename. + monkeypatch.setattr(_pathlib.Path, "replace", boom) new_path, ok = _strip_processing_marker(src, log_path=log_path) assert ok is False, "strip MUST report failure" diff --git a/tests/test_daemon_dispatcher.py b/tests/test_daemon_dispatcher.py index e1c3dd3..d30c7ba 100644 --- a/tests/test_daemon_dispatcher.py +++ b/tests/test_daemon_dispatcher.py @@ -8,18 +8,29 @@ import pytest +from iai_mcp._ipc import IS_WINDOWS, open_ipc_connection + + +def _endpoint_ready_path(sock_path: Path) -> Path: + """Path that exists once the control socket has bound: the unix socket on + POSIX, the TCP port file (``.port``) on Windows.""" + return Path(f"{sock_path}.port") if IS_WINDOWS else sock_path + @pytest.fixture def short_socket_paths(tmp_path, monkeypatch): from iai_mcp import concurrency, daemon_state - sock_dir = Path(f"/tmp/iai-disp-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" state_path = tmp_path / ".daemon-state.json" monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path) monkeypatch.setattr(daemon_state, "STATE_PATH", state_path) + # Per-test endpoint isolation (unix socket on POSIX; TCP port file on + # Windows) via the env var start_ipc_server/open_ipc_connection honor. + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path)) try: yield None, sock_path, state_path @@ -36,10 +47,7 @@ def short_socket_paths(tmp_path, monkeypatch): async def _send_ndjson(sock_path: Path, message: dict, *, timeout: float = 5.0) -> dict: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(path=str(sock_path)), - timeout=timeout, - ) + reader, writer = await open_ipc_connection(timeout=timeout) try: writer.write((json.dumps(message) + "\n").encode("utf-8")) await writer.drain() @@ -67,11 +75,12 @@ async def _with_real_dispatcher(sock_path: Path, state: dict, coro_fn): socket_path=sock_path, ), ) + ready_path = _endpoint_ready_path(sock_path) for _ in range(250): - if sock_path.exists(): + if ready_path.exists(): break await asyncio.sleep(0.01) - if not sock_path.exists(): + if not ready_path.exists(): shutdown.set() await asyncio.wait_for(server_task, timeout=5) raise AssertionError("socket never bound") diff --git a/tests/test_daemon_fdlimit_and_fsm.py b/tests/test_daemon_fdlimit_and_fsm.py index 14106d9..9da88d9 100644 --- a/tests/test_daemon_fdlimit_and_fsm.py +++ b/tests/test_daemon_fdlimit_and_fsm.py @@ -1,8 +1,10 @@ from __future__ import annotations import json -import resource import sys + +if sys.platform != "win32": + import resource from pathlib import Path from unittest.mock import MagicMock, patch @@ -13,6 +15,7 @@ from iai_mcp.s2_coordinator import S2Coordinator +@pytest.mark.skipif(sys.platform == "win32", reason="resource module not available on Windows") class TestRaiseFdLimitClampsToHard: def test_raises_low_soft_to_floor(self): diff --git a/tests/test_daemon_state.py b/tests/test_daemon_state.py index f2f6cf3..d052b16 100644 --- a/tests/test_daemon_state.py +++ b/tests/test_daemon_state.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import json import os from datetime import datetime, timedelta, timezone @@ -30,7 +32,8 @@ def test_save_and_load_roundtrip_with_0600_mode(isolated_state_path): assert isolated_state_path.exists() mode = isolated_state_path.stat().st_mode & 0o777 - assert mode == 0o600, f"expected 0o600, got {oct(mode)}" + if sys.platform != "win32": + assert mode == 0o600, f"expected 0o600, got {oct(mode)}" loaded = load_state() assert loaded == state diff --git a/tests/test_daemon_watchdog.py b/tests/test_daemon_watchdog.py index 74bed32..ef973a9 100644 --- a/tests/test_daemon_watchdog.py +++ b/tests/test_daemon_watchdog.py @@ -10,6 +10,15 @@ import pytest from iai_mcp import daemon +from _socket_test_helpers import bind_fake_daemon_socket + +# The watchdog's self-kill path uses signal.SIGKILL, which doesn't exist on +# Windows (the production code guards it with hasattr). These tests assert that +# POSIX self-kill behavior, so they only apply where SIGKILL exists. +_REQUIRES_SIGKILL = pytest.mark.skipif( + not hasattr(signal, "SIGKILL"), + reason="watchdog SIGKILL self-kill is POSIX-only (guarded off on Windows)", +) HARD_CAP = 2_684_354_560 FLOOR = 1_610_612_736 @@ -234,6 +243,7 @@ def _read_breadcrumb(log_path): return log_path.read_text(encoding="utf-8") +@_REQUIRES_SIGKILL def test_thread_wedge_after_n_consecutive_kills(watchdog_env): store = object() consec = 0 @@ -280,6 +290,7 @@ def test_thread_healthy_busy_not_killed(watchdog_env): assert consec == 0 +@_REQUIRES_SIGKILL def test_thread_warn_plus_big_memory_kill(watchdog_env): store = object() consec = 0 @@ -408,6 +419,7 @@ def _fake_write_event(store, kind, data, **kw): pass +@_REQUIRES_SIGKILL def test_self_kill_is_unconditional_when_breadcrumb_fails_wedge( tmp_path, monkeypatch ): @@ -439,6 +451,7 @@ def test_self_kill_is_unconditional_when_breadcrumb_fails_wedge( assert kill_calls == [(os.getpid(), signal.SIGKILL)] +@_REQUIRES_SIGKILL def test_self_kill_is_unconditional_when_breadcrumb_fails_memory( tmp_path, monkeypatch ): @@ -470,6 +483,7 @@ def test_self_kill_is_unconditional_when_breadcrumb_fails_memory( assert kill_calls == [(os.getpid(), signal.SIGKILL)] +@_REQUIRES_SIGKILL def test_self_kill_direct_breadcrumb_failure_still_kills(tmp_path, monkeypatch): def _raise(_line): @@ -484,16 +498,17 @@ def _raise(_line): assert kill_calls == [(os.getpid(), signal.SIGKILL)] -def test_probe_returns_false_when_no_socket(tmp_path): +def test_probe_returns_false_when_no_socket(tmp_path, monkeypatch): sock_path = str(tmp_path / "absent.sock") + # Isolate the endpoint so the probe can't reach a real daemon on this box. + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", sock_path) assert asyncio.run(daemon._probe_status_roundtrip(sock_path, 0.2)) is False -def test_probe_returns_false_on_connect_but_no_reply(tmp_path, short_socket): +def test_probe_returns_false_on_connect_but_no_reply(tmp_path, short_socket, monkeypatch): sock_path = str(short_socket) - srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - srv.bind(sock_path) - srv.listen(1) + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", sock_path) + srv = bind_fake_daemon_socket(sock_path) accepted: list = [] def _accept_and_hang(): @@ -517,11 +532,10 @@ def _accept_and_hang(): srv.close() -def test_probe_returns_true_on_full_roundtrip(tmp_path, short_socket): +def test_probe_returns_true_on_full_roundtrip(tmp_path, short_socket, monkeypatch): sock_path = str(short_socket) - srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - srv.bind(sock_path) - srv.listen(1) + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", sock_path) + srv = bind_fake_daemon_socket(sock_path) held: list = [] def _accept_and_reply(): diff --git a/tests/test_doctor_apply_recovery.py b/tests/test_doctor_apply_recovery.py index 7d39319..7a04d56 100644 --- a/tests/test_doctor_apply_recovery.py +++ b/tests/test_doctor_apply_recovery.py @@ -23,7 +23,7 @@ def isolated_daemon_paths(tmp_path, monkeypatch): store_dir = iai_dir / "store" store_dir.mkdir(parents=True, exist_ok=True) - sock_dir = Path(f"/tmp/iai-rec-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" diff --git a/tests/test_doctor_checklist.py b/tests/test_doctor_checklist.py index 870b3db..8e2641a 100644 --- a/tests/test_doctor_checklist.py +++ b/tests/test_doctor_checklist.py @@ -10,11 +10,14 @@ import pytest +from iai_mcp._ipc import IS_WINDOWS +from _socket_test_helpers import bind_fake_daemon_socket + @pytest.fixture def short_socket_paths(tmp_path, monkeypatch): lock_path = tmp_path / ".lock" - sock_dir = Path(f"/tmp/iai-doc-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" state_path = tmp_path / ".daemon-state.json" @@ -222,9 +225,7 @@ def test_check_b_passes_against_silent_listening_socket(short_socket_paths): if sock_path.exists(): sock_path.unlink() - server = _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) - server.bind(str(sock_path)) - server.listen(8) + server = bind_fake_daemon_socket(sock_path) stop = threading.Event() accepted: list = [] @@ -269,6 +270,10 @@ def _accept_loop(): th.join(timeout=1.0) +@pytest.mark.skipif( + IS_WINDOWS, + reason="regular-file-where-a-socket-should-be is an AF_UNIX concept; Windows uses a TCP port file", +) def test_check_b_fails_when_socket_is_regular_file(short_socket_paths): _, sock_path, _ = short_socket_paths if sock_path.exists(): diff --git a/tests/test_doctor_lock_probe.py b/tests/test_doctor_lock_probe.py index 18452d7..46dab73 100644 --- a/tests/test_doctor_lock_probe.py +++ b/tests/test_doctor_lock_probe.py @@ -1,8 +1,8 @@ from __future__ import annotations -import fcntl - import pytest +from iai_mcp._filelock import LOCK_EX, LOCK_NB, LOCK_UN +from iai_mcp._filelock import flock as _flock from iai_mcp.doctor import check_c_lock_healthy @@ -46,14 +46,14 @@ def test_held_lock_is_healthy(tmp_store): try: with open(lock_path, "r") as held: held_fd = held.fileno() - fcntl.flock(held_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + _flock(held_fd, LOCK_EX | LOCK_NB) result = check_c_lock_healthy() assert result.passed is True assert result.name == "(c) lock file healthy" assert "held" in result.detail - fcntl.flock(held_fd, fcntl.LOCK_UN) + _flock(held_fd, LOCK_UN) finally: pass diff --git a/tests/test_drain_active_live_e2e.py b/tests/test_drain_active_live_e2e.py index ebfba36..6e28232 100644 --- a/tests/test_drain_active_live_e2e.py +++ b/tests/test_drain_active_live_e2e.py @@ -1,5 +1,7 @@ from __future__ import annotations +import tempfile + import json import platform from pathlib import Path @@ -41,7 +43,7 @@ def _write_live_file( "version": 1, "deferred_at": "2026-05-31T04:45:00.000000+00:00", "session_id": session_id, - "cwd": "/tmp/test", + "cwd": str(Path(tempfile.gettempdir()) / "test"), } lines = [json.dumps(header, ensure_ascii=False)] for ev in events: diff --git a/tests/test_drain_deferred_captures.py b/tests/test_drain_deferred_captures.py index 4a0d401..037376f 100644 --- a/tests/test_drain_deferred_captures.py +++ b/tests/test_drain_deferred_captures.py @@ -373,7 +373,7 @@ def test_daemon_main_drain_does_not_crash_on_bad_file(tmp_path, monkeypatch): ) assert bad.exists() - sock_dir = Path(f"/tmp/iai-drn-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" diff --git a/tests/test_episodic_verbatim_dedup.py b/tests/test_episodic_verbatim_dedup.py index d0e7d06..ba34417 100644 --- a/tests/test_episodic_verbatim_dedup.py +++ b/tests/test_episodic_verbatim_dedup.py @@ -1,5 +1,7 @@ from __future__ import annotations +import tempfile + import json import platform from datetime import datetime, timezone @@ -55,7 +57,7 @@ def _write_live_file( "version": 1, "deferred_at": "2026-05-30T10:00:00.000000+00:00", "session_id": session_id, - "cwd": "/tmp/test", + "cwd": str(Path(tempfile.gettempdir()) / "test"), } lines = [json.dumps(header, ensure_ascii=False)] for ev in events: diff --git a/tests/test_hippo_skeleton.py b/tests/test_hippo_skeleton.py index 224eb08..454c1bc 100644 --- a/tests/test_hippo_skeleton.py +++ b/tests/test_hippo_skeleton.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import stat from datetime import datetime, timezone from pathlib import Path @@ -220,7 +222,8 @@ def test_lock_file_created_on_open(tmp_path: Path) -> None: with HippoDB(tmp_path): assert lock_path.exists() mode = stat.S_IMODE(lock_path.stat().st_mode) - assert mode == 0o600, f"Expected 0o600, got {oct(mode)}" + if sys.platform != "win32": + assert mode == 0o600, f"Expected 0o600, got {oct(mode)}" def test_second_open_same_process_succeeds(tmp_path: Path) -> None: diff --git a/tests/test_iai_recall_fail_fast.py b/tests/test_iai_recall_fail_fast.py index b1bd3bf..ae94645 100644 --- a/tests/test_iai_recall_fail_fast.py +++ b/tests/test_iai_recall_fail_fast.py @@ -12,6 +12,7 @@ sys.path.insert(0, str(Path(__file__).parent)) from test_store import _make +from _socket_test_helpers import bind_fake_daemon_socket FAIL_FAST_CEILING_S = 3.5 @@ -44,15 +45,7 @@ def _unix_socket_server_stall(sock_path: str, stall_seconds: float = 60.0) -> th ready = threading.Event() def _server(): - try: - os.unlink(sock_path) - except FileNotFoundError: - pass - - srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - srv.bind(sock_path) - srv.listen(5) + srv = bind_fake_daemon_socket(sock_path) ready.set() srv.settimeout(120.0) try: @@ -78,15 +71,7 @@ def _unix_socket_server_fast(sock_path: str, hits: list[dict]) -> threading.Even ready = threading.Event() def _server(): - try: - os.unlink(sock_path) - except FileNotFoundError: - pass - - srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - srv.bind(sock_path) - srv.listen(5) + srv = bind_fake_daemon_socket(sock_path) ready.set() srv.settimeout(10.0) try: diff --git a/tests/test_immediate_recall_live.py b/tests/test_immediate_recall_live.py index a16d077..850cbb0 100644 --- a/tests/test_immediate_recall_live.py +++ b/tests/test_immediate_recall_live.py @@ -1,5 +1,7 @@ from __future__ import annotations +import tempfile + import hashlib import json import os @@ -29,7 +31,7 @@ def _write_live_file( "version": version, "deferred_at": datetime.now(timezone.utc).isoformat(), "session_id": session_id, - "cwd": "/tmp/test", + "cwd": str(Path(tempfile.gettempdir()) / "test"), } lines = [json.dumps(header, ensure_ascii=False)] for ev in events: diff --git a/tests/test_lat05_asleep_skip.py b/tests/test_lat05_asleep_skip.py index aa9813d..ea2f885 100644 --- a/tests/test_lat05_asleep_skip.py +++ b/tests/test_lat05_asleep_skip.py @@ -15,6 +15,7 @@ sys.path.insert(0, str(Path(__file__).parent)) from test_store import _make +from _socket_test_helpers import bind_fake_daemon_socket SLEEP_SKIP_CEILING_S = 1.5 @@ -56,15 +57,7 @@ def _start_stall_server(sock_path: str, stall_seconds: float = 60.0) -> threadin ready = threading.Event() def _server(): - try: - os.unlink(sock_path) - except FileNotFoundError: - pass - - srv = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - srv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - srv.bind(sock_path) - srv.listen(5) + srv = bind_fake_daemon_socket(sock_path) ready.set() srv.settimeout(120.0) try: diff --git a/tests/test_lifecycle_event_log.py b/tests/test_lifecycle_event_log.py index 63c37b8..e64e3e9 100644 --- a/tests/test_lifecycle_event_log.py +++ b/tests/test_lifecycle_event_log.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import gzip import json import multiprocessing as mp @@ -84,7 +86,8 @@ def test_log_file_chmod_user_only(tmp_path): log = LifecycleEventLog(log_dir=tmp_path) log.append({"event": "wrapper_event", "kind": "heartbeat_refresh"}) mode = os.stat(log.current_file()).st_mode & 0o777 - assert mode == 0o600 + if sys.platform != "win32": + assert mode == 0o600 def test_rotation_writes_to_per_date_file(tmp_path): diff --git a/tests/test_lifecycle_lock.py b/tests/test_lifecycle_lock.py index 010f38d..f3b6d31 100644 --- a/tests/test_lifecycle_lock.py +++ b/tests/test_lifecycle_lock.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import json import os from pathlib import Path @@ -223,4 +225,5 @@ def test_acquire_writes_mode_0600(tmp_path: Path) -> None: lock.acquire() mode = lock_path.stat().st_mode & 0o777 - assert mode == 0o600, f"expected mode 0o600, got 0o{mode:o}" + if sys.platform != "win32": + assert mode == 0o600, f"expected mode 0o600, got 0o{mode:o}" diff --git a/tests/test_lifecycle_state.py b/tests/test_lifecycle_state.py index 69fdd04..2410d51 100644 --- a/tests/test_lifecycle_state.py +++ b/tests/test_lifecycle_state.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import json import os from datetime import datetime, timezone @@ -123,7 +125,8 @@ def test_save_state_chmod_user_only(tmp_path): target = tmp_path / "lifecycle_state.json" save_state(default_state(), target) mode = os.stat(target).st_mode & 0o777 - assert mode == 0o600 + if sys.platform != "win32": + assert mode == 0o600 def test_save_state_rejects_invalid_record(tmp_path): diff --git a/tests/test_live_e2e_gate.py b/tests/test_live_e2e_gate.py index 98aef7a..ad8b2b4 100644 --- a/tests/test_live_e2e_gate.py +++ b/tests/test_live_e2e_gate.py @@ -1,8 +1,9 @@ from __future__ import annotations import errno -import fcntl import json +from iai_mcp._filelock import LOCK_NB, LOCK_SH, LOCK_UN +from iai_mcp._filelock import flock as _flock import os import shutil import subprocess @@ -245,8 +246,8 @@ def _ex_held(store_dir: Path) -> bool: probe_fd = -1 try: probe_fd = os.open(str(lock_path), os.O_RDWR) - fcntl.flock(probe_fd, fcntl.LOCK_SH | fcntl.LOCK_NB) - fcntl.flock(probe_fd, fcntl.LOCK_UN) + _flock(probe_fd, LOCK_SH | LOCK_NB) + _flock(probe_fd, LOCK_UN) return False except OSError as exc: if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK): diff --git a/tests/test_lock_starvation.py b/tests/test_lock_starvation.py index 2eb8cc9..e39670e 100644 --- a/tests/test_lock_starvation.py +++ b/tests/test_lock_starvation.py @@ -1,7 +1,8 @@ from __future__ import annotations -import fcntl import os +from iai_mcp._filelock import LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN +from iai_mcp._filelock import flock as _flock import tempfile import threading import time @@ -33,7 +34,7 @@ def _reader_loop() -> None: acquired = acquire_client_shared_nb(fd, lock_path) if acquired: time.sleep(0.001) - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) else: time.sleep(0.001) except Exception as exc: @@ -54,13 +55,13 @@ def _reader_loop() -> None: deadline = time.monotonic() + 4.0 while time.monotonic() < deadline: try: - fcntl.flock(fd_ex, fcntl.LOCK_EX | fcntl.LOCK_NB) + _flock(fd_ex, LOCK_EX | LOCK_NB) acquired = True break except OSError: time.sleep(0.01) if acquired: - fcntl.flock(fd_ex, fcntl.LOCK_UN) + _flock(fd_ex, LOCK_UN) os.close(fd_ex) finally: clear_consolidation_intent(lock_path) @@ -121,10 +122,10 @@ def test_recency_read_during_busy_meets_slo(hermetic_store: Path) -> None: def _hold_exclusive() -> None: fd = os.open(str(lock_path), os.O_RDWR) try: - fcntl.flock(fd, fcntl.LOCK_EX) + _flock(fd, LOCK_EX) ready.set() done.wait(timeout=3.0) - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) finally: os.close(fd) @@ -178,11 +179,11 @@ def _churn_client() -> None: acquired = acquire_client_shared_nb(fd, lock_path) if acquired: if check_consolidation_intent(lock_path): - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) post_acquire_recheck_count += 1 else: time.sleep(0.0005) - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) else: time.sleep(0.001) except Exception as exc: @@ -202,15 +203,15 @@ def _prober_client() -> None: _intent_set.wait(timeout=2.0) try: - fcntl.flock(fd, fcntl.LOCK_SH | fcntl.LOCK_NB) + _flock(fd, LOCK_SH | LOCK_NB) except OSError: return if check_consolidation_intent(lock_path): - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) post_acquire_recheck_count += 1 else: - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) finally: os.close(fd) @@ -233,13 +234,13 @@ def _prober_client() -> None: deadline = time.monotonic() + 4.0 while time.monotonic() < deadline: try: - fcntl.flock(fd_ex, fcntl.LOCK_EX | fcntl.LOCK_NB) + _flock(fd_ex, LOCK_EX | LOCK_NB) acquired = True break except OSError: time.sleep(0.005) if acquired: - fcntl.flock(fd_ex, fcntl.LOCK_UN) + _flock(fd_ex, LOCK_UN) os.close(fd_ex) finally: clear_consolidation_intent(lock_path) @@ -272,10 +273,10 @@ def test_client_lock_wait_bounded_below_slo(hermetic_store: Path) -> None: def _hold_ex() -> None: fd = os.open(str(lock_path), os.O_RDWR) try: - fcntl.flock(fd, fcntl.LOCK_EX) + _flock(fd, LOCK_EX) ready.set() done.wait(timeout=0.6) - fcntl.flock(fd, fcntl.LOCK_UN) + _flock(fd, LOCK_UN) finally: os.close(fd) @@ -297,7 +298,7 @@ def _hold_ex() -> None: time.sleep(0.01) elapsed = time.monotonic() - t0 if acquired: - fcntl.flock(fd_sh, fcntl.LOCK_UN) + _flock(fd_sh, LOCK_UN) finally: os.close(fd_sh) t.join(timeout=2.0) diff --git a/tests/test_mcp_tools.py b/tests/test_mcp_tools.py index 1e411c9..9d3a694 100644 --- a/tests/test_mcp_tools.py +++ b/tests/test_mcp_tools.py @@ -33,7 +33,7 @@ def built_wrapper() -> Path: @pytest.fixture(scope="module") def daemon_sock() -> "Path": - sock_dir = Path(f"/tmp/iai-mcp-tools-{os.getpid()}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" store_dir = sock_dir / "store" diff --git a/tests/test_memory_bank_processed.py b/tests/test_memory_bank_processed.py index a26fb18..6d53f7c 100644 --- a/tests/test_memory_bank_processed.py +++ b/tests/test_memory_bank_processed.py @@ -5,6 +5,7 @@ import logging import os import stat +import sys from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path @@ -125,7 +126,8 @@ def test_processed_salience_top_n_written_at_rem_completion( assert target.exists(), f"expected file at {target}" file_mode = oct(stat.S_IMODE(os.stat(target).st_mode)) - assert file_mode == "0o600", f"file mode {file_mode} != 0o600" + if sys.platform != "win32": + assert file_mode == "0o600", f"file mode {file_mode} != 0o600" lines = _read_jsonl(target) expected_count = min(m_records, 4) diff --git a/tests/test_memory_bank_recent.py b/tests/test_memory_bank_recent.py index aed08b4..deeb119 100644 --- a/tests/test_memory_bank_recent.py +++ b/tests/test_memory_bank_recent.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import base64 import json import os @@ -110,7 +112,8 @@ def test_recent_append_creates_dated_window_file(iai_home): file_mode = stat.S_IMODE(os.stat(target).st_mode) parent_mode = stat.S_IMODE(os.stat(target.parent).st_mode) - assert file_mode == 0o600, f"file mode = 0o{file_mode:o}, expected 0o600" + if sys.platform != "win32": + assert file_mode == 0o600, f"file mode = 0o{file_mode:o}, expected 0o600" assert parent_mode == 0o700, f"parent mode = 0o{parent_mode:o}, expected 0o700" body = target.read_text(encoding="utf-8") diff --git a/tests/test_session_payload_latency.py b/tests/test_session_payload_latency.py index b1e281c..38bbca9 100644 --- a/tests/test_session_payload_latency.py +++ b/tests/test_session_payload_latency.py @@ -1,5 +1,7 @@ from __future__ import annotations +import tempfile + import json import time from datetime import datetime, timezone, timedelta @@ -20,7 +22,7 @@ def _make_large_live_file(deferred_dir: Path, session_id: str, n_events: int = 5 "version": 1, "deferred_at": datetime.now(timezone.utc).isoformat(), "session_id": session_id, - "cwd": "/tmp/latency-test", + "cwd": str(Path(tempfile.gettempdir()) / "latency-test"), } lines = [json.dumps(header, ensure_ascii=False)] base = datetime(2026, 5, 31, 8, 0, 0, tzinfo=timezone.utc) diff --git a/tests/test_session_recall_precache.py b/tests/test_session_recall_precache.py index 0d5d3b1..e0e9aad 100644 --- a/tests/test_session_recall_precache.py +++ b/tests/test_session_recall_precache.py @@ -157,10 +157,11 @@ def test_cache_file_mode_is_owner_only(tmp_path, monkeypatch): daemon_mod._write_session_start_cache(store, cache_path=cache_path) assert cache_path.exists(), "cache file was not created" - assert oct(stat.S_IMODE(cache_path.stat().st_mode)) == "0o600", ( - f"cache file mode is not 0o600; got " - f"{oct(stat.S_IMODE(cache_path.stat().st_mode))}" - ) + if sys.platform != "win32": + assert oct(stat.S_IMODE(cache_path.stat().st_mode)) == "0o600", ( + f"cache file mode is not 0o600; got " + f"{oct(stat.S_IMODE(cache_path.stat().st_mode))}" + ) def test_precache_does_not_compress_payload(tmp_path, monkeypatch): from iai_mcp import daemon as daemon_mod diff --git a/tests/test_socket_activity_tracking.py b/tests/test_socket_activity_tracking.py index 9bde5d3..f6d22f5 100644 --- a/tests/test_socket_activity_tracking.py +++ b/tests/test_socket_activity_tracking.py @@ -20,18 +20,29 @@ import pytest +from iai_mcp._ipc import IS_WINDOWS, open_ipc_connection + + +def _endpoint_ready_path(sock_path: Path) -> Path: + """Path that exists once SocketServer has bound: the unix socket on POSIX, + the TCP port file (``.port``) on Windows.""" + return Path(f"{sock_path}.port") if IS_WINDOWS else sock_path + @pytest.fixture def short_socket_paths(tmp_path, monkeypatch): from iai_mcp import concurrency, daemon_state - sock_dir = Path(f"/tmp/iai-srvact-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" state_path = tmp_path / ".daemon-state.json" monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path) monkeypatch.setattr(daemon_state, "STATE_PATH", state_path) + # Per-test endpoint isolation (unix socket on POSIX; TCP port file on + # Windows) via the env var both serve() and open_ipc_connection() honor. + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path)) store_root = tmp_path / "store_root" store_root.mkdir(parents=True, exist_ok=True) monkeypatch.setenv("IAI_MCP_STORE", str(store_root)) @@ -51,9 +62,7 @@ def short_socket_paths(tmp_path, monkeypatch): async def _send_line(sock_path: Path, payload: dict, *, timeout: float = 10.0) -> dict: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(path=str(sock_path)), timeout=timeout, - ) + reader, writer = await open_ipc_connection(timeout=timeout) try: writer.write((json.dumps(payload) + "\n").encode("utf-8")) await writer.drain() @@ -73,12 +82,13 @@ async def _serve(sock_path: Path, store, coro_fn): from iai_mcp.socket_server import SocketServer srv = SocketServer(store, idle_secs=99999) - server_task = asyncio.create_task(srv.serve(socket_path=sock_path)) + server_task = asyncio.create_task(srv.serve()) + ready_path = _endpoint_ready_path(sock_path) for _ in range(250): - if sock_path.exists(): + if ready_path.exists(): break await asyncio.sleep(0.01) - if not sock_path.exists(): + if not ready_path.exists(): srv.shutdown_event.set() raise AssertionError("socket never bound") try: diff --git a/tests/test_socket_disconnect_reconnect.py b/tests/test_socket_disconnect_reconnect.py index d1555f6..48d13c5 100644 --- a/tests/test_socket_disconnect_reconnect.py +++ b/tests/test_socket_disconnect_reconnect.py @@ -11,6 +11,19 @@ import pytest +from iai_mcp._ipc import IS_WINDOWS + +# Heavy end-to-end integration test: builds the Node mcp-wrapper via npm and +# drives it against an embedded AF_UNIX fake daemon, exercising the full +# stdio<->unix-socket bridge and reconnect path. Both the npm subprocess +# invocation and the AF_UNIX bridge are POSIX-stack-specific; a Windows port +# needs the Node wrapper to speak TCP loopback (separate effort). The Windows +# socket dispatch/reconnect behavior is covered by the ported _ipc unit tests. +pytestmark = pytest.mark.skipif( + IS_WINDOWS, + reason="AF_UNIX + npm + Node-wrapper bridge integration; Windows path covered by _ipc unit tests", +) + REPO = Path(__file__).resolve().parent.parent WRAPPER = REPO / "mcp-wrapper" @@ -160,7 +173,7 @@ def _drop_fake_daemon_conn(proc: subprocess.Popen) -> None: @pytest.fixture def fake_daemon(): - sock_dir = Path(f"/tmp/iai-mcp-disconnect-{os.getpid()}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" diff --git a/tests/test_socket_fail_loud.py b/tests/test_socket_fail_loud.py index d3aad26..860f07b 100644 --- a/tests/test_socket_fail_loud.py +++ b/tests/test_socket_fail_loud.py @@ -13,10 +13,17 @@ import psutil import pytest +from _socket_test_helpers import ( + daemon_endpoint, + daemon_endpoint_ready_path, + new_daemon_client_socket, + send_daemon_token, +) + @pytest.fixture def short_socket_paths(tmp_path): lock_path = tmp_path / ".lock" - sock_dir = Path(f"/tmp/iai-fl-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" state_path = tmp_path / ".daemon-state.json" @@ -63,9 +70,10 @@ def _spawn_daemon_for_test(sock_path: Path, store_root: Path) -> subprocess.Pope ) def _wait_for_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool: + ready = daemon_endpoint_ready_path(sock_path) deadline = time.monotonic() + timeout_sec while time.monotonic() < deadline: - if sock_path.exists(): + if ready.exists(): return True time.sleep(0.1) return False @@ -93,7 +101,7 @@ def test_kill_daemon_midcall_no_orphan_core_spawn(short_socket_paths, tmp_path): f"(baseline={baseline}, before={before}) — singleton invariant violated" ) - proc.send_signal(signal.SIGKILL) + proc.kill() proc.wait(timeout=5) time.sleep(0.5) @@ -106,11 +114,11 @@ def test_kill_daemon_midcall_no_orphan_core_spawn(short_socket_paths, tmp_path): "— invariant: the daemon must never spawn a second core." ) - s = sk.socket(sk.AF_UNIX, sk.SOCK_STREAM) + s = new_daemon_client_socket() s.settimeout(0.5) err_kind = None try: - s.connect(str(sock_path)) + s.connect(daemon_endpoint(sock_path)) err_kind = "no_error" except (ConnectionRefusedError, FileNotFoundError, OSError) as e: err_kind = type(e).__name__ @@ -120,11 +128,11 @@ def test_kill_daemon_midcall_no_orphan_core_spawn(short_socket_paths, tmp_path): except OSError: pass assert err_kind in ( - "ConnectionRefusedError", "FileNotFoundError", "OSError", + "ConnectionRefusedError", "FileNotFoundError", "OSError", "TimeoutError", ), f"unexpected post-kill connect outcome: {err_kind}" finally: if proc.poll() is None: - proc.send_signal(signal.SIGKILL) + proc.kill() try: proc.wait(timeout=5) except subprocess.TimeoutExpired: @@ -146,9 +154,10 @@ def test_kill_daemon_during_active_connection(short_socket_paths, tmp_path): "daemon never bound socket within 30s" ) - s = sk.socket(sk.AF_UNIX, sk.SOCK_STREAM) + s = new_daemon_client_socket() s.settimeout(15) - s.connect(str(sock_path)) + s.connect(daemon_endpoint(sock_path)) + send_daemon_token(s, sock_path) # Windows handshake; no-op on POSIX msg = (json.dumps({"type": "status"}) + "\n").encode("utf-8") s.sendall(msg) @@ -162,7 +171,7 @@ def test_kill_daemon_during_active_connection(short_socket_paths, tmp_path): decoded = json.loads(first_response.decode("utf-8")) assert decoded.get("ok") is True, decoded - proc.send_signal(signal.SIGKILL) + proc.kill() proc.wait(timeout=5) s.settimeout(2.0) @@ -183,11 +192,11 @@ def test_kill_daemon_during_active_connection(short_socket_paths, tmp_path): "wrapper-side daemon_unreachable translation would silently hang" ) - s2 = sk.socket(sk.AF_UNIX, sk.SOCK_STREAM) + s2 = new_daemon_client_socket() s2.settimeout(0.5) err_kind = None try: - s2.connect(str(sock_path)) + s2.connect(daemon_endpoint(sock_path)) err_kind = "no_error" except (ConnectionRefusedError, FileNotFoundError, OSError) as e: err_kind = type(e).__name__ @@ -197,11 +206,11 @@ def test_kill_daemon_during_active_connection(short_socket_paths, tmp_path): except OSError: pass assert err_kind in ( - "ConnectionRefusedError", "FileNotFoundError", "OSError", + "ConnectionRefusedError", "FileNotFoundError", "OSError", "TimeoutError", ), f"unexpected post-kill connect outcome: {err_kind}" finally: if proc.poll() is None: - proc.send_signal(signal.SIGKILL) + proc.kill() try: proc.wait(timeout=5) except subprocess.TimeoutExpired: diff --git a/tests/test_socket_first_store_hermeticity.py b/tests/test_socket_first_store_hermeticity.py index 8e4d79c..655fe5f 100644 --- a/tests/test_socket_first_store_hermeticity.py +++ b/tests/test_socket_first_store_hermeticity.py @@ -9,8 +9,19 @@ import pytest +from iai_mcp._ipc import IS_WINDOWS from iai_mcp.cli import _send_jsonrpc_request +# This module asserts *which unix-socket path* the client routes to by spying on +# asyncio.open_unix_connection — a POSIX-only mechanism (Windows routes over TCP +# loopback via a port file, and open_unix_connection doesn't exist there). The +# equivalent Windows endpoint routing/isolation is covered by the _ipc +# port-file tests and the IAI_DAEMON_SOCKET_PATH isolation in PR #6. +pytestmark = pytest.mark.skipif( + IS_WINDOWS, + reason="POSIX unix-socket-path routing hermeticity; Windows routes via TCP port file (covered elsewhere)", +) + def _capture_stdout(fn) -> tuple[str, int]: buf = io.StringIO() with redirect_stdout(buf): diff --git a/tests/test_socket_inherit_launchd_fd.py b/tests/test_socket_inherit_launchd_fd.py index 98096f3..cbe3ac1 100644 --- a/tests/test_socket_inherit_launchd_fd.py +++ b/tests/test_socket_inherit_launchd_fd.py @@ -48,7 +48,7 @@ def _bind_to_fd_3(sock_path: Path) -> Iterator[socket.socket]: pass def _short_sock_path(suffix: str) -> Path: - sock_dir = Path(f"/tmp/iai-launchd-{os.getpid()}-{suffix}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) return sock_dir / "d.sock" diff --git a/tests/test_socket_server_dispatch.py b/tests/test_socket_server_dispatch.py index ac84fd6..1af226d 100644 --- a/tests/test_socket_server_dispatch.py +++ b/tests/test_socket_server_dispatch.py @@ -8,17 +8,32 @@ import pytest +from iai_mcp._ipc import IS_WINDOWS, open_ipc_connection + + +def _endpoint_ready_path(sock_path: Path) -> Path: + """Path that exists once the SocketServer has bound its endpoint. + POSIX: the unix socket file. Windows: the TCP port file written alongside + it (``.port``, see iai_mcp._ipc._port_file_path).""" + return Path(f"{sock_path}.port") if IS_WINDOWS else sock_path + + @pytest.fixture def short_socket_paths(tmp_path, monkeypatch): from iai_mcp import concurrency, daemon_state - sock_dir = Path(f"/tmp/iai-srvdisp-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" state_path = tmp_path / ".daemon-state.json" monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path) monkeypatch.setattr(daemon_state, "STATE_PATH", state_path) + # Isolate the IPC endpoint per-test. POSIX uses this as the unix socket + # path; Windows persists the ephemeral TCP port to ".port". + # Both SocketServer.serve() and open_ipc_connection() resolve through it, + # so concurrent tests never collide on the shared default endpoint. + monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path)) store_root = tmp_path / "store_root" store_root.mkdir(parents=True, exist_ok=True) monkeypatch.setenv("IAI_MCP_STORE", str(store_root)) @@ -44,10 +59,7 @@ async def _send_jsonrpc( *, timeout: float = 10.0, ) -> dict: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(path=str(sock_path)), - timeout=timeout, - ) + reader, writer = await open_ipc_connection(timeout=timeout) try: envelope: dict = {"jsonrpc": "2.0", "id": req_id, "method": method} if params is not None: @@ -66,10 +78,7 @@ async def _send_jsonrpc( return json.loads(line.decode("utf-8")) async def _send_raw(sock_path: Path, raw_bytes: bytes, *, timeout: float = 5.0) -> dict: - reader, writer = await asyncio.wait_for( - asyncio.open_unix_connection(path=str(sock_path)), - timeout=timeout, - ) + reader, writer = await open_ipc_connection(timeout=timeout) try: writer.write(raw_bytes) await writer.drain() @@ -88,13 +97,16 @@ async def _with_socket_server(sock_path: Path, store, coro_fn): from iai_mcp.socket_server import SocketServer srv = SocketServer(store, idle_secs=99999) - server_task = asyncio.create_task(srv.serve(socket_path=sock_path)) + # No socket_path: serve() resolves the endpoint from IAI_DAEMON_SOCKET_PATH + # (set by the fixture) — a unix socket on POSIX, TCP loopback on Windows. + server_task = asyncio.create_task(srv.serve()) + ready_path = _endpoint_ready_path(sock_path) for _ in range(250): - if sock_path.exists(): + if ready_path.exists(): break await asyncio.sleep(0.01) - if not sock_path.exists(): + if not ready_path.exists(): srv.shutdown_event.set() try: await asyncio.wait_for(server_task, timeout=5) diff --git a/tests/test_socket_subagent_reuse.py b/tests/test_socket_subagent_reuse.py index 19009cf..8a0cb88 100644 --- a/tests/test_socket_subagent_reuse.py +++ b/tests/test_socket_subagent_reuse.py @@ -155,7 +155,7 @@ def _spawn_daemon_in_background( ) def test_subagent_spawns_zero_new_processes(built_wrapper, tmp_path): - sock_dir = Path(f"/tmp/iai-subagent-{os.getpid()}-{id(tmp_path)}") + sock_dir = tmp_path / "sock" sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" store_dir = sock_dir / "store" diff --git a/tests/test_user_model.py b/tests/test_user_model.py index 56cc5b0..c5c45b7 100644 --- a/tests/test_user_model.py +++ b/tests/test_user_model.py @@ -1,5 +1,7 @@ from __future__ import annotations +import sys + import os import stat import uuid @@ -139,7 +141,8 @@ def test_R1_persistence_roundtrip_chmod_default( assert target.exists(), "save() must materialise the file at tmp path" mode = stat.S_IMODE(os.stat(target).st_mode) - assert mode == 0o600, f"file mode must be 0o600, got {oct(mode)}" + if sys.platform != "win32": + assert mode == 0o600, f"file mode must be 0o600, got {oct(mode)}" loaded = load() assert loaded.top_recent_topics == [