diff --git a/.codebook.toml b/.codebook.toml index 755e60fac..cbb779505 100644 --- a/.codebook.toml +++ b/.codebook.toml @@ -45,6 +45,7 @@ ignore_patterns = [ words = [ "accessors", + "acp", "acked", "ACMR", "admin's", @@ -473,6 +474,7 @@ words = [ "powerline", "powerset", "preallocated", + "precmd", "preflight", "prerenders", "prereq", @@ -587,6 +589,7 @@ words = [ "shellouts", "signalled", "signalling", + "sidechain", "sigset", "Sigstore", "sigstore", diff --git a/Cargo.lock b/Cargo.lock index cb0a43c32..cbb018b33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,6 +38,18 @@ dependencies = [ "cpufeatures 0.2.17", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -133,6 +145,15 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "arrayvec" version = "0.7.6" @@ -270,7 +291,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -307,9 +328,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" [[package]] name = "block-buffer" @@ -412,6 +433,12 @@ version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" +[[package]] +name = "by_address" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64fa3c856b712db6612c019f14756e64e4bcea13337a6b33b696333a9eaa2d06" + [[package]] name = "bytecount" version = "0.6.9" @@ -805,6 +832,12 @@ dependencies = [ "itertools 0.13.0", ] +[[package]] +name = "critical-section" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -849,7 +882,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "crossterm_winapi", "derive_more", "document-features", @@ -1362,6 +1395,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fancy-regex" version = "0.11.0" @@ -1405,6 +1450,12 @@ dependencies = [ "tokio", ] +[[package]] +name = "fast-srgb8" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd2e7510819d6fbf51a5545c8f922716ecfb14df168a3242f7d33e0239efe6a1" + [[package]] name = "fast-steal" version = "6.5.4" @@ -1742,6 +1793,15 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -1767,6 +1827,20 @@ name = "hashbrown" version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + +[[package]] +name = "hashlink" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" +dependencies = [ + "hashbrown 0.14.5", +] [[package]] name = "heck" @@ -1830,9 +1904,9 @@ dependencies = [ [[package]] name = "http" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" +checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425" dependencies = [ "bytes", "itoa", @@ -2308,13 +2382,14 @@ dependencies = [ "owo-colors", "predicates", "ratatui", + "rustls", "serde", "serde_json", "tabled", "tempfile", "thiserror 2.0.18", "tokio", - "toml", + "toml 1.1.2+spec-1.1.0", ] [[package]] @@ -2339,13 +2414,19 @@ dependencies = [ "jackin-tui", "nix 0.31.3", "portable-pty", + "procfs", "ratatui", + "regex", + "rusqlite", + "rustls", "serde", "serde_json", "tempfile", "tokio", - "toml", + "toml 1.1.2+spec-1.1.0", + "toml_edit", "unicode-width", + "ureq", "url", ] @@ -2360,7 +2441,7 @@ dependencies = [ "serde", "tempfile", "thiserror 2.0.18", - "toml", + "toml 1.1.2+spec-1.1.0", "toml_edit", ] @@ -2452,7 +2533,7 @@ dependencies = [ "serde_json", "sha2 0.11.0", "tempfile", - "toml", + "toml 1.1.2+spec-1.1.0", ] [[package]] @@ -2469,6 +2550,7 @@ dependencies = [ "jackin-docker", "jackin-manifest", "reqwest", + "semver", "serde", "serde_json", "sha2 0.11.0", @@ -2476,6 +2558,7 @@ dependencies = [ "tar", "tempfile", "tokio", + "toml 0.9.12+spec-1.1.0", "x509-cert", ] @@ -2504,7 +2587,7 @@ dependencies = [ "jackin-diagnostics", "tempfile", "thiserror 2.0.18", - "toml", + "toml 1.1.2+spec-1.1.0", "toml_edit", ] @@ -2548,7 +2631,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tokio", - "toml", + "toml 1.1.2+spec-1.1.0", "toml_edit", "tracing", ] @@ -2662,13 +2745,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.99" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162" dependencies = [ "cfg-if", "futures-util", - "once_cell", "wasm-bindgen", ] @@ -2765,13 +2847,24 @@ dependencies = [ "libc", ] +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "line-clipping" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f50e8f47623268b5407192d26876c4d7f89d686ca130fdc53bced4814cd29f8" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", ] [[package]] @@ -2809,11 +2902,11 @@ checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "lru" -version = "0.16.4" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f66e8d5d03f609abc3a39e6f08e4164ebf1447a732906d39eb9b99b7919ef39" +checksum = "8a860605968fce16869fd239cf4237a82f3ac470723415db603b0e8b6c8d4fb9" dependencies = [ - "hashbrown 0.16.1", + "hashbrown 0.17.1", ] [[package]] @@ -2939,7 +3032,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "cfg-if", "cfg_aliases 0.1.1", "libc", @@ -2951,7 +3044,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "cfg-if", "cfg_aliases 0.2.1", "libc", @@ -2964,7 +3057,7 @@ version = "0.31.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "cfg-if", "cfg_aliases 0.2.1", "libc", @@ -3107,7 +3200,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "objc2", ] @@ -3159,8 +3252,8 @@ dependencies = [ "regex", "serde", "serde_json", - "strum", - "strum_macros", + "strum 0.27.2", + "strum_macros 0.27.2", "thiserror 2.0.18", ] @@ -3387,6 +3480,30 @@ dependencies = [ "winapi", ] +[[package]] +name = "palette" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cbf71184cc5ecc2e4e1baccdb21026c20e5fc3dcf63028a086131b3ab00b6e6" +dependencies = [ + "approx", + "fast-srgb8", + "libm", + "palette_derive", +] + +[[package]] +name = "palette_derive" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5030daf005bface118c096f510ffb781fc28f9ab6a32ab224d8631be6851d30" +dependencies = [ + "by_address", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "papergrid" version = "0.18.0" @@ -3641,6 +3758,12 @@ dependencies = [ "spki", ] +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + [[package]] name = "plotters" version = "0.3.7" @@ -3811,11 +3934,35 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "procfs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7" +dependencies = [ + "bitflags 2.13.0", + "chrono", + "flate2", + "procfs-core", + "rustix", +] + +[[package]] +name = "procfs-core" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405" +dependencies = [ + "bitflags 2.13.0", + "chrono", + "hex", +] + [[package]] name = "prost" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +checksum = "528ac67416ff8646872a3c02cad9cc4ee5dc9f9540c9b10771855c95cb2e5ae1" dependencies = [ "bytes", "prost-derive", @@ -3823,9 +3970,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" +checksum = "03da047801ff44bb6a4d407d4860c05fd70bb81714e6b2f3812603d5b145b042" dependencies = [ "heck", "itertools 0.14.0", @@ -3842,9 +3989,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +checksum = "b570b25f7617e43d59005d0990ccb79e950a423952cea19671b7a876da390adf" dependencies = [ "anyhow", "itertools 0.14.0", @@ -3890,9 +4037,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" +checksum = "f94967dc7688f3054c7fac87473ffae4cc4c3904800e2d9f5b857246d8963b0a" dependencies = [ "prost", ] @@ -4052,9 +4199,9 @@ checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" [[package]] name = "ratatui" -version = "0.30.0" +version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1ce67fb8ba4446454d1c8dbaeda0557ff5e94d39d5e5ed7f10a65eb4c8266bc" +checksum = "1695748e3a735b34968c887ceea5a380b43545903868ae8f5b666593100f6b68" dependencies = [ "instability", "ratatui-core", @@ -4062,22 +4209,26 @@ dependencies = [ "ratatui-macros", "ratatui-termwiz", "ratatui-widgets", + "serde", ] [[package]] name = "ratatui-core" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef8dea09a92caaf73bff7adb70b76162e5937524058a7e5bff37869cbbec293" +checksum = "42d3603f354bba8c595fa47860e60142d7372b7210c27044c6a7d0e1a4336b44" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "compact_str", - "hashbrown 0.16.1", + "critical-section", + "hashbrown 0.17.1", "indoc", "itertools 0.14.0", "kasuari", "lru", - "strum", + "palette", + "serde", + "strum 0.28.0", "thiserror 2.0.18", "unicode-segmentation", "unicode-truncate", @@ -4086,9 +4237,9 @@ dependencies = [ [[package]] name = "ratatui-crossterm" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" +checksum = "2b2867bedcbd6a690ca4f8672a687b730ec07660c79844517b084311b529980c" dependencies = [ "cfg-if", "crossterm", @@ -4098,9 +4249,9 @@ dependencies = [ [[package]] name = "ratatui-macros" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f1342a13e83e4bb9d0b793d0ea762be633f9582048c892ae9041ef39c936f4" +checksum = "80fac59720679490d89d200df411faa249be728681adcabed3d047ae72c48f1d" dependencies = [ "ratatui-core", "ratatui-widgets", @@ -4108,9 +4259,9 @@ dependencies = [ [[package]] name = "ratatui-termwiz" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f76fe0bd0ed4295f0321b1676732e2454024c15a35d01904ddb315afd3d545c" +checksum = "386b8ff8f74ed749509391c56d549761a2fcdb408e1f42e467286bcb7dac8967" dependencies = [ "ratatui-core", "termwiz", @@ -4118,18 +4269,19 @@ dependencies = [ [[package]] name = "ratatui-widgets" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7dbfa023cd4e604c2553483820c5fe8aa9d71a42eea5aa77c6e7f35756612db" +checksum = "7ef4f17dd7ac3abf5adc2b920a03c61eee4bfe6a88fa5191936895525371d79c" dependencies = [ - "bitflags 2.12.1", - "hashbrown 0.16.1", + "bitflags 2.13.0", + "hashbrown 0.17.1", "indoc", "instability", "itertools 0.14.0", "line-clipping", "ratatui-core", - "strum", + "serde", + "strum 0.28.0", "time", "unicode-segmentation", "unicode-width", @@ -4161,7 +4313,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", ] [[package]] @@ -4317,6 +4469,20 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rusqlite" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" +dependencies = [ + "bitflags 2.13.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustc-demangle" version = "0.1.27" @@ -4350,7 +4516,7 @@ version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "errno", "libc", "linux-raw-sys", @@ -4364,7 +4530,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ "aws-lc-rs", + "log", "once_cell", + "ring", "rustls-pki-types", "rustls-webpki", "subtle", @@ -4570,7 +4738,7 @@ version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "core-foundation", "core-foundation-sys", "libc", @@ -5043,8 +5211,14 @@ name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" + +[[package]] +name = "strum" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9628de9b8791db39ceda2b119bbe13134770b56c138ec1d3af810d045c04f9bd" dependencies = [ - "strum_macros", + "strum_macros 0.28.0", ] [[package]] @@ -5059,6 +5233,18 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "strum_macros" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "subtle" version = "2.6.1" @@ -5209,7 +5395,7 @@ checksum = "4676b37242ccbd1aabf56edb093a4827dc49086c0ffd764a5705899e0f35f8f7" dependencies = [ "anyhow", "base64 0.22.1", - "bitflags 2.12.1", + "bitflags 2.13.0", "fancy-regex", "filedescriptor", "finl_unicode", @@ -5457,6 +5643,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.9.12+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" +dependencies = [ + "indexmap 2.14.0", + "serde_core", + "serde_spanned", + "toml_datetime 0.7.5+spec-1.1.0", + "toml_parser", + "toml_writer", + "winnow 0.7.15", +] + [[package]] name = "toml" version = "1.1.2+spec-1.1.0" @@ -5466,10 +5667,19 @@ dependencies = [ "indexmap 2.14.0", "serde_core", "serde_spanned", - "toml_datetime", + "toml_datetime 1.1.1+spec-1.1.0", "toml_parser", "toml_writer", - "winnow", + "winnow 1.0.3", +] + +[[package]] +name = "toml_datetime" +version = "0.7.5+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +dependencies = [ + "serde_core", ] [[package]] @@ -5488,10 +5698,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" dependencies = [ "indexmap 2.14.0", - "toml_datetime", + "toml_datetime 1.1.1+spec-1.1.0", "toml_parser", "toml_writer", - "winnow", + "winnow 1.0.3", ] [[package]] @@ -5500,7 +5710,7 @@ version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "winnow", + "winnow 1.0.3", ] [[package]] @@ -5530,7 +5740,7 @@ version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "bytes", "futures-util", "http", @@ -5748,6 +5958,22 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.8" @@ -5781,9 +6007,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.23.2" +version = "1.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" dependencies = [ "atomic", "getrandom 0.4.2", @@ -5797,6 +6023,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -5876,9 +6108,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563" dependencies = [ "cfg-if", "once_cell", @@ -5889,9 +6121,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.72" +version = "0.4.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" +checksum = "54568702fabf5d4849ce2b90fadfa64168a097eaf4b351ce9df8b687a0086aaf" dependencies = [ "js-sys", "wasm-bindgen", @@ -5899,9 +6131,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5909,9 +6141,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b" dependencies = [ "bumpalo", "proc-macro2", @@ -5922,9 +6154,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.122" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92" dependencies = [ "unicode-ident", ] @@ -5970,7 +6202,7 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags 2.12.1", + "bitflags 2.13.0", "hashbrown 0.15.5", "indexmap 2.14.0", "semver", @@ -5978,9 +6210,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.99" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" +checksum = "6e0871acf327f283dc6da28a1696cdc64fb355ba9f935d052021fa77f35cce69" dependencies = [ "js-sys", "wasm-bindgen", @@ -6021,6 +6253,24 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "wezterm-bidi" version = "0.2.3" @@ -6095,9 +6345,9 @@ dependencies = [ [[package]] name = "which" -version = "8.0.2" +version = "8.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81995fafaaaf6ae47a7d0cc83c67caf92aeb7e5331650ae6ff856f7c0c60c459" +checksum = "c789537cf2f7f55be8e6192f92e464174ee55f91af622777f7f1ceb0dbccd03e" dependencies = [ "libc", ] @@ -6348,6 +6598,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" + [[package]] name = "winnow" version = "1.0.3" @@ -6430,7 +6686,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags 2.12.1", + "bitflags 2.13.0", "indexmap 2.14.0", "log", "serde", diff --git a/crates/jackin-capsule/Cargo.toml b/crates/jackin-capsule/Cargo.toml index d13f39f85..7bf74984d 100644 --- a/crates/jackin-capsule/Cargo.toml +++ b/crates/jackin-capsule/Cargo.toml @@ -32,6 +32,7 @@ jackin-protocol = { version = "0.6.0-dev", path = "../jackin-protocol" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" toml = "1.1" +toml_edit = "0.25" tokio = { version = "1", features = ["rt", "io-util", "io-std", "sync", "macros", "time", "net", "signal"] } crossterm = "0.29" portable-pty = "0.9" @@ -42,6 +43,11 @@ jackin-term = { version = "0.6.0-dev", path = "../jackin-term" } url = "2" unicode-width = "0.2" chrono = { version = "0.4", default-features = false, features = ["clock"] } +procfs = "0.18" +regex = "1" +rusqlite = { version = "0.32", features = ["bundled"] } +ureq = { version = "2.12", features = ["tls"] } +rustls = { version = "0.23", default-features = false, features = ["ring"] } jackin-tui = { version = "0.6.0-dev", path = "../jackin-tui" } ratatui = "0.30" dhat = { version = "0.3.3", optional = true } diff --git a/crates/jackin-capsule/src/agent_status.rs b/crates/jackin-capsule/src/agent_status.rs new file mode 100644 index 000000000..ba2f81357 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status.rs @@ -0,0 +1,625 @@ +//! Agent runtime status authority. +//! +//! This module owns all state-machine logic for determining what an agent is +//! doing at any given moment. It replaces the old timer-based +//! `BLOCKED_AFTER` heuristic with a layered model that is conservative by +//! default and precise when the runtime exposes semantic events. +//! +//! # Architecture +//! +//! ```text +//! Signal sources (multiple, concurrent): +//! • Screen rule packs (`rules`) — structural terminal matching +//! • OSC 133 markers (`scan_osc133`) — shell integration sequences +//! • Hook/API reports (`gating`) — in-container reporter events +//! • /proc process (`process`) — foreground process identity +//! +//! evidence snapshot ───► arbitrate ───► debounce ───► SessionStatus +//! ``` +//! +//! # Adding a new agent runtime +//! +//! 1. Add or extend `docker/runtime/agent-status/packs/.toml`. +//! 2. Add fixtures under `agent_status/screen/fixtures//`. +//! 3. Add semantic event mapping in `gating.rs` only when the runtime ships +//! hooks or a plugin surface. + +pub mod arbitrate; +pub mod evidence; +pub mod gating; +pub mod hook_installer; +pub mod policy; +pub mod process; +pub mod rules; +pub mod seen; +pub mod sequence; + +use evidence::{EvidenceSummary, RawAgentState}; +use jackin_protocol::agent_status::{AgentStatusConfidence, AgentStatusReport, AgentStatusSource}; + +use crate::protocol::AgentState; + +/// Shell integration markers from OSC 133 sequences. +/// +/// Emitted by shell `precmd`/`preexec` hooks installed in `/home/agent/.zshrc`. +/// Parsed from raw PTY bytes by `scan_osc133`; model-independent (works with +/// both vt100 and `DamageGrid` renderers). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OscShellMark { + /// `OSC 133 ; A` — prompt start. + PromptStart, + /// `OSC 133 ; B` — prompt end / ready for input. + PromptEnd, + /// `OSC 133 ; C` — pre-execution (user pressed Enter). + PreExec, + /// `OSC 133 ; D` — command finished with optional exit code. + CommandFinished { exit_code: Option }, +} + +/// Scan raw PTY bytes for the FIRST `OSC 133 ; ` sequence. +/// +/// Finds `\x1b]133;A`, `B`, `C`, or `D[;]` followed by BEL +/// (`\x07`) or ST (`\x1b\\`). Model-independent: works with both the +/// current vt100-based session and the future DamageGrid-based session. +pub fn scan_osc133(bytes: &[u8]) -> Option { + // Minimum sequence: \x1b]133;A\x07 = 8 bytes + let len = bytes.len(); + if len < 8 { + return None; + } + + let mut i = 0; + while i + 7 < len { + // Look for ESC ] 1 3 3 ; + if bytes[i] == b'\x1b' + && bytes[i + 1] == b']' + && bytes[i + 2] == b'1' + && bytes[i + 3] == b'3' + && bytes[i + 4] == b'3' + && bytes[i + 5] == b';' + { + let letter = bytes[i + 6]; + match letter { + b'A' => return Some(OscShellMark::PromptStart), + b'B' => return Some(OscShellMark::PromptEnd), + b'C' => return Some(OscShellMark::PreExec), + b'D' => { + // Optional exit code after another ';' + let exit_code = if i + 7 < len && bytes[i + 7] == b';' { + let start = i + 8; + let end = bytes[start..] + .iter() + .position(|&b| !b.is_ascii_digit()) + .map_or(len, |p| start + p); + if end > start { + std::str::from_utf8(&bytes[start..end]) + .ok() + .and_then(|s| s.parse::().ok()) + } else { + None + } + } else { + None + }; + return Some(OscShellMark::CommandFinished { exit_code }); + } + _ => {} + } + } + i += 1; + } + None +} + +#[cfg(test)] +mod osc133_tests { + use super::*; + + #[test] + fn scan_osc133_detects_prompt_end() { + let bytes = b"\x1b]133;B\x07"; + assert_eq!(scan_osc133(bytes), Some(OscShellMark::PromptEnd)); + } + + #[test] + fn scan_osc133_detects_pre_exec() { + let bytes = b"\x1b]133;C\x07"; + assert_eq!(scan_osc133(bytes), Some(OscShellMark::PreExec)); + } + + #[test] + fn scan_osc133_detects_command_finished_with_code() { + let bytes = b"\x1b]133;D;0\x07"; + assert_eq!( + scan_osc133(bytes), + Some(OscShellMark::CommandFinished { exit_code: Some(0) }) + ); + } + + #[test] + fn scan_osc133_returns_none_for_plain_output() { + assert_eq!(scan_osc133(b"hello world"), None); + } + + #[test] + fn scan_osc133_finds_marker_in_larger_buffer() { + let bytes = b"some output\r\n\x1b]133;B\x07more output"; + assert_eq!(scan_osc133(bytes), Some(OscShellMark::PromptEnd)); + } +} + +/// Stored state report from an in-container source. +/// +/// Runtime hook/plugin events are daemon-mapped semantic authority. Direct +/// `ReportAgentState` senders are lower-trust cooperative reporters: accepted +/// with freshness and process validation, but not full authority. +#[derive(Debug, Clone)] +pub struct HookAuthority { + pub source_id: String, + pub agent_label: String, + pub raw_state: String, + pub origin: AuthorityOrigin, + pub seq: u64, + pub ts_ns: u64, + pub message: Option, + /// Timestamp when this authority was last updated or heartbeated. + pub last_seen: std::time::Instant, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AuthorityOrigin { + RuntimeEvent, + DirectStateReport, +} + +impl AuthorityOrigin { + pub const fn label(self) -> &'static str { + match self { + Self::RuntimeEvent => "runtime_event", + Self::DirectStateReport => "direct_state_report", + } + } +} + +/// Per-session accumulated status. Holds the current effective state and +/// the `seen` flag used to derive `Done`. +#[derive(Debug, Clone)] +pub struct SessionStatus { + /// Wire-format effective state consumed by the UI and protocol. + pub effective: AgentState, + /// Four-state raw status before `done` is derived from raw idle + unseen. + pub raw: RawAgentState, + /// Confidence of the evidence that produced `raw`. + pub confidence: AgentStatusConfidence, + /// Last evidence summary used to publish the current state. + pub last_snapshot_summary: EvidenceSummary, + /// `true` once the operator has focused or acknowledged this pane after + /// its last `Done` transition. Used to derive `Done` from raw `Idle`. + pub seen: bool, + /// Monotonically-increasing revision counter. Incremented on every + /// state change. UI consumers compare revision to detect stale snapshots. + pub revision: u64, +} + +impl Default for SessionStatus { + fn default() -> Self { + Self::new() + } +} + +impl SessionStatus { + pub fn new() -> Self { + Self { + effective: AgentState::Unknown, + raw: RawAgentState::Unknown, + confidence: AgentStatusConfidence::Unknown, + last_snapshot_summary: EvidenceSummary::default(), + seen: true, + revision: 0, + } + } + + pub fn publish_raw( + &mut self, + raw: RawAgentState, + confidence: AgentStatusConfidence, + mut summary: EvidenceSummary, + ) -> Option { + let previous = self.effective; + let previous_raw = self.raw; + let entering_work_cycle = matches!(raw, RawAgentState::Working | RawAgentState::Blocked) + && !matches!( + previous_raw, + RawAgentState::Working | RawAgentState::Blocked + ); + if entering_work_cycle { + self.seen = false; + } + let next = self.effective_from_raw(raw, previous_raw); + self.raw = raw; + self.confidence = confidence; + summary.raw_state = raw; + summary.confidence = confidence; + self.last_snapshot_summary = summary; + if next == previous { + None + } else { + self.effective = next; + self.revision += 1; + Some(next) + } + } + + /// Mark this session as seen by the operator (pane focused / acknowledged). + /// Transitions Done → Idle. Returns `Some(Idle)` when the state changed. + pub fn acknowledge(&mut self) -> Option { + self.seen = true; + if self.effective == AgentState::Done { + self.effective = AgentState::Idle; + self.revision += 1; + Some(AgentState::Idle) + } else { + None + } + } + + pub fn report( + &self, + detected_agent: Option, + last_seen_revision: u64, + ) -> AgentStatusReport { + let summary = &self.last_snapshot_summary; + AgentStatusReport { + raw_state: self.raw, + source: summary.authority_source.as_ref().map_or_else( + || match summary.winner { + evidence::EvidenceWinner::Authority => AgentStatusSource::None, + evidence::EvidenceWinner::Blocked | evidence::EvidenceWinner::Freeze => { + AgentStatusSource::VisibleScreen + } + evidence::EvidenceWinner::StrongVisualOrOsc => { + if summary.shell_integration { + AgentStatusSource::ShellIntegration + } else { + AgentStatusSource::VisibleScreen + } + } + evidence::EvidenceWinner::Physics => AgentStatusSource::ForegroundProcess, + evidence::EvidenceWinner::ProcessExit | evidence::EvidenceWinner::Unknown => { + AgentStatusSource::None + } + }, + |source_id| AgentStatusSource::Reported { + source_id: source_id.clone(), + }, + ), + confidence: self.confidence, + detected_agent, + foreground_pgid: summary.foreground_pgid, + visible_blocker: summary.visible_blocker, + visible_idle: summary.visible_idle, + visible_working: summary.visible_working, + process_exited: summary.process_exited, + foreground_returned_to_shell: summary.foreground_returned_to_shell, + stale_report: summary.stale_report, + subagents_active: summary.subagents_active, + revision: self.revision, + last_seen_revision, + } + } + + fn effective_from_raw(&self, raw: RawAgentState, _previous_raw: RawAgentState) -> AgentState { + match raw { + RawAgentState::Unknown => AgentState::Unknown, + RawAgentState::Working => AgentState::Working, + RawAgentState::Blocked => AgentState::Blocked, + RawAgentState::Idle => { + if self.seen { + AgentState::Idle + } else { + AgentState::Done + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_session_starts_unknown() { + let s = SessionStatus::new(); + assert_eq!(s.effective, AgentState::Unknown); + assert_eq!(s.raw, RawAgentState::Unknown); + assert_eq!(s.revision, 0); + } + + #[test] + fn publish_working_transitions_unknown_to_working() { + let mut s = SessionStatus::new(); + let changed = s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(changed, Some(AgentState::Working)); + assert_eq!(s.effective, AgentState::Working); + assert_eq!(s.raw, RawAgentState::Working); + assert!(!s.seen); + assert_eq!(s.revision, 1); + } + + #[test] + fn idle_after_working_produces_done_when_unseen() { + let mut s = SessionStatus::new(); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + let changed = s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(changed, Some(AgentState::Done)); + assert_eq!(s.effective, AgentState::Done); + } + + #[test] + fn repeated_idle_keeps_done_until_acknowledged() { + let mut s = SessionStatus::new(); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(s.effective, AgentState::Done); + + let changed = s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + + assert_eq!(changed, None); + assert_eq!(s.effective, AgentState::Done); + assert!(!s.seen); + } + + #[test] + fn idle_after_working_produces_idle_when_seen() { + let mut s = SessionStatus::new(); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + s.seen = true; + let changed = s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(changed, Some(AgentState::Idle)); + } + + #[test] + fn acknowledge_transitions_done_to_idle() { + let mut s = SessionStatus::new(); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(s.effective, AgentState::Done); + let changed = s.acknowledge(); + assert_eq!(changed, Some(AgentState::Idle)); + assert_eq!(s.effective, AgentState::Idle); + assert!(s.seen); + } + + #[test] + fn revision_increments_only_on_public_state_change() { + let mut s = SessionStatus::new(); + assert_eq!(s.revision, 0); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(s.revision, 1); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(s.revision, 1); + s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(s.revision, 2); + } + + #[test] + fn blocked_enters_work_cycle_and_done_on_idle() { + let mut s = SessionStatus::new(); + s.publish_raw( + RawAgentState::Blocked, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(s.effective, AgentState::Blocked); + assert!(!s.seen); + let changed = s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(changed, Some(AgentState::Done)); + } + + #[test] + fn re_work_after_ack_creates_new_done() { + let mut s = SessionStatus::new(); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(s.effective, AgentState::Done); + s.acknowledge(); + assert_eq!(s.effective, AgentState::Idle); + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + let changed = s.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + assert_eq!(changed, Some(AgentState::Done)); + } + + #[test] + fn publish_raw_keeps_latest_evidence_summary() { + let mut s = SessionStatus::new(); + let summary = EvidenceSummary { + rule_id: Some("claude.permission-dialog".to_owned()), + visible_blocker: true, + ..EvidenceSummary::default() + }; + s.publish_raw( + RawAgentState::Blocked, + AgentStatusConfidence::Strong, + summary, + ); + assert_eq!(s.last_snapshot_summary.raw_state, RawAgentState::Blocked); + assert_eq!( + s.last_snapshot_summary.confidence, + AgentStatusConfidence::Strong + ); + assert_eq!( + s.last_snapshot_summary.rule_id.as_deref(), + Some("claude.permission-dialog") + ); + assert!(s.last_snapshot_summary.visible_blocker); + } + + #[test] + fn report_uses_evidence_summary() { + let mut s = SessionStatus::new(); + let summary = EvidenceSummary { + authority_source: Some("hook-claude-1".to_owned()), + foreground_pgid: Some(42), + visible_working: true, + subagents_active: 2, + ..EvidenceSummary::default() + }; + s.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Authoritative, + summary, + ); + let report = s.report(Some("claude".to_owned()), 0); + assert_eq!(report.raw_state, RawAgentState::Working); + assert_eq!(report.confidence, AgentStatusConfidence::Authoritative); + assert_eq!(report.foreground_pgid, Some(42)); + assert!(report.visible_working); + assert_eq!(report.subagents_active, 2); + assert_eq!( + report.source, + AgentStatusSource::Reported { + source_id: "hook-claude-1".to_owned() + } + ); + } + + #[test] + fn report_preserves_shell_integration_source() { + let mut s = SessionStatus::new(); + let summary = EvidenceSummary { + winner: evidence::EvidenceWinner::StrongVisualOrOsc, + shell_integration: true, + ..EvidenceSummary::default() + }; + s.publish_raw(RawAgentState::Idle, AgentStatusConfidence::Strong, summary); + + assert_eq!( + s.report(Some("codex".to_owned()), 0).source, + AgentStatusSource::ShellIntegration + ); + } + + #[test] + fn clear_authority_removes_only_matching_source() { + let mut seq = sequence::SequenceTracker::new(); + seq.accept("source-a", 100); + seq.accept("source-b", 200); + seq.clear_source("source-a"); + assert!(seq.has_source("source-b")); + assert!(!seq.has_source("source-a")); + } + + #[test] + fn roll_up_priority_blocked_gt_done_gt_working_gt_idle_gt_unknown() { + use crate::agent_status::arbitrate::attention_priority; + assert!(attention_priority(AgentState::Blocked) > attention_priority(AgentState::Done)); + assert!(attention_priority(AgentState::Done) > attention_priority(AgentState::Working)); + assert!(attention_priority(AgentState::Working) > attention_priority(AgentState::Idle)); + assert!(attention_priority(AgentState::Idle) > attention_priority(AgentState::Unknown)); + } + + #[test] + fn multiple_sessions_roll_up_reflects_most_urgent() { + use crate::agent_status::arbitrate::roll_up_states; + + let session_states = vec![ + AgentState::Working, + AgentState::Blocked, + AgentState::Working, + AgentState::Idle, + ]; + let rolled = roll_up_states(&session_states); + assert_eq!(rolled, AgentState::Blocked); + } + + #[test] + fn heartbeat_keeps_hook_authority_fresh() { + use std::time::Instant; + let mut auth = HookAuthority { + source_id: "hook-1".to_owned(), + agent_label: "claude".to_owned(), + raw_state: "blocked".to_owned(), + origin: AuthorityOrigin::RuntimeEvent, + seq: 100, + ts_ns: 0, + message: None, + last_seen: Instant::now(), + }; + let before = auth.last_seen; + auth.last_seen = Instant::now(); + assert!(auth.last_seen >= before); + } +} diff --git a/crates/jackin-capsule/src/agent_status/arbitrate.rs b/crates/jackin-capsule/src/agent_status/arbitrate.rs new file mode 100644 index 000000000..3674a11da --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/arbitrate.rs @@ -0,0 +1,516 @@ +//! Pure arbitration from collected evidence to one raw agent state. + +use std::borrow::Borrow; +use std::time::Instant; + +use jackin_protocol::agent_status::AgentStatusConfidence; + +use crate::agent_status::evidence::{ + AuthorityEvidence, EvidenceNote, EvidenceSnapshot, EvidenceSummary, EvidenceWinner, + RawAgentState, +}; +use crate::agent_status::policy::AUTHORITY_TTL; +use crate::protocol::AgentState; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ArbitrationResult { + pub raw: RawAgentState, + pub confidence: AgentStatusConfidence, + pub winner: EvidenceWinner, + pub notes: Vec, + pub summary: EvidenceSummary, +} + +pub fn arbitrate( + snapshot: &EvidenceSnapshot, + previous_raw: RawAgentState, + now: Instant, +) -> ArbitrationResult { + let mut summary = EvidenceSummary { + authority_source: snapshot + .authority + .as_ref() + .map(|authority| authority.source_id.clone()), + visible_blocker: snapshot.screen.state == Some(RawAgentState::Blocked), + visible_idle: snapshot.screen.state == Some(RawAgentState::Idle), + visible_working: snapshot.screen.state == Some(RawAgentState::Working), + process_exited: snapshot.process.process_exited, + foreground_returned_to_shell: snapshot.process.foreground_returned_to_shell, + root_is_agent: snapshot.process.root_is_agent, + foreground_pgid: snapshot.process.foreground_pgid, + rule_id: snapshot.screen.rule_id.clone(), + last_output: snapshot.activity.last_output, + last_input: snapshot.activity.last_input, + child_process_count: snapshot.process.child_process_count, + cpu_jiffies_delta: snapshot.process.cpu_jiffies_delta, + subagents_active: snapshot.subagents_active, + osc_progress_active: snapshot.osc.progress_active, + shell_integration: snapshot.osc.shell_state.is_some(), + ..EvidenceSummary::default() + }; + if let Some(authority) = &snapshot.authority { + summary.notes.extend(authority.notes.clone()); + } + + if snapshot.process.process_exited || snapshot.process.foreground_returned_to_shell { + if snapshot.process.process_exited { + summary.notes.push(EvidenceNote::ProcessExited); + } + if snapshot.process.foreground_returned_to_shell { + summary.notes.push(EvidenceNote::ForegroundReturnedToShell); + } + return finish( + RawAgentState::Idle, + AgentStatusConfidence::Weak, + EvidenceWinner::ProcessExit, + summary, + ); + } + + if snapshot.screen.freeze { + return finish( + previous_raw, + AgentStatusConfidence::Strong, + EvidenceWinner::Freeze, + summary, + ); + } + + let fresh_authority = snapshot.authority.as_ref().filter(|authority| { + now.duration_since(authority.last_event) <= AUTHORITY_TTL + && snapshot.process.foreground_is_agent + }); + if let Some(authority) = &snapshot.authority + && fresh_authority.is_none() + { + summary.stale_report = true; + if now.duration_since(authority.last_event) > AUTHORITY_TTL { + summary.notes.push(EvidenceNote::AuthorityExpired); + } + if !snapshot.process.foreground_is_agent { + summary.notes.push(EvidenceNote::AuthorityIdentityMismatch); + } + } + + if let Some(authority) = fresh_authority + && authority.pending_permission + && authority.mapped_state == RawAgentState::Blocked + { + return finish( + RawAgentState::Blocked, + authority_confidence(authority), + EvidenceWinner::Blocked, + summary, + ); + } + + if snapshot.screen.strong && snapshot.screen.state == Some(RawAgentState::Blocked) { + let screen_fresh_enough = fresh_authority.is_none_or(|authority| { + snapshot.screen.observed_at >= authority.last_event + || authority.mapped_state == RawAgentState::Blocked + }); + if screen_fresh_enough { + return finish( + RawAgentState::Blocked, + AgentStatusConfidence::Strong, + EvidenceWinner::Blocked, + summary, + ); + } + } + + if let Some(authority) = fresh_authority { + return finish( + authority.mapped_state, + authority_confidence(authority), + EvidenceWinner::Authority, + summary, + ); + } + + if let Some(shell_state) = snapshot.osc.shell_state { + return finish( + shell_state, + AgentStatusConfidence::Strong, + EvidenceWinner::StrongVisualOrOsc, + summary, + ); + } + + if snapshot.screen.strong + && matches!( + snapshot.screen.state, + Some(RawAgentState::Working | RawAgentState::Idle) + ) + { + return finish( + snapshot.screen.state.unwrap_or(RawAgentState::Unknown), + AgentStatusConfidence::Strong, + EvidenceWinner::StrongVisualOrOsc, + summary, + ); + } + if snapshot.process.foreground_is_agent && snapshot.osc.progress_cleared_at.is_some() { + return finish( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceWinner::StrongVisualOrOsc, + summary, + ); + } + + if snapshot.process.child_process_count > 0 || snapshot.process.cpu_jiffies_delta > 0 { + return finish( + RawAgentState::Working, + AgentStatusConfidence::Weak, + EvidenceWinner::Physics, + summary, + ); + } + + finish( + RawAgentState::Unknown, + AgentStatusConfidence::Unknown, + EvidenceWinner::Unknown, + summary, + ) +} + +fn authority_confidence(authority: &AuthorityEvidence) -> AgentStatusConfidence { + if authority.direct_state_report { + AgentStatusConfidence::Strong + } else { + AgentStatusConfidence::Authoritative + } +} + +fn finish( + raw: RawAgentState, + confidence: AgentStatusConfidence, + winner: EvidenceWinner, + mut summary: EvidenceSummary, +) -> ArbitrationResult { + summary.raw_state = raw; + summary.confidence = confidence; + summary.winner = winner; + ArbitrationResult { + raw, + confidence, + winner, + notes: summary.notes.clone(), + summary, + } +} + +/// Attention priority used for tab/workspace roll-up. +pub fn attention_priority(state: AgentState) -> u8 { + match state { + AgentState::Blocked => 4, + AgentState::Done => 3, + AgentState::Working => 2, + AgentState::Idle => 1, + AgentState::Unknown => 0, + } +} + +/// Roll up a collection of session states to the most attention-worthy. +pub fn roll_up_states(states: I) -> AgentState +where + I: IntoIterator, + I::Item: Borrow, +{ + states + .into_iter() + .max_by_key(|s| attention_priority(*s.borrow())) + .map_or(AgentState::Unknown, |s| *s.borrow()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent_status::evidence::{ + ActivityEvidence, AuthorityEvidence, AuthorityGrade, OscEvidence, ProcessEvidence, + ScreenEvidence, + }; + use std::time::Duration; + + fn base_snapshot(now: Instant) -> EvidenceSnapshot { + EvidenceSnapshot { + authority: None, + osc: OscEvidence::default(), + screen: ScreenEvidence { + observed_at: now, + ..ScreenEvidence::default() + }, + process: ProcessEvidence { + child_alive: true, + foreground_is_agent: true, + ..ProcessEvidence::default() + }, + activity: ActivityEvidence::default(), + subagents_active: 0, + } + } + + fn authority( + state: RawAgentState, + pending_permission: bool, + last_event: Instant, + ) -> AuthorityEvidence { + AuthorityEvidence { + source_id: "hook-claude-1".to_owned(), + grade: AuthorityGrade::Partial, + direct_state_report: false, + mapped_state: state, + pending_permission, + last_event, + seq: 1, + notes: Vec::new(), + } + } + + #[test] + fn process_exit_wins() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.process.process_exited = true; + snapshot.authority = Some(authority(RawAgentState::Working, false, now)); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Idle); + assert_eq!(result.winner, EvidenceWinner::ProcessExit); + assert!(result.summary.has_note(EvidenceNote::ProcessExited)); + } + + #[test] + fn foreground_shell_handoff_wins_as_exit_like_idle() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.process.foreground_returned_to_shell = true; + snapshot.process.child_alive = true; + snapshot.process.root_is_agent = false; + snapshot.process.foreground_is_agent = false; + snapshot.authority = Some(authority(RawAgentState::Working, false, now)); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Idle); + assert_eq!(result.winner, EvidenceWinner::ProcessExit); + assert!(result.summary.foreground_returned_to_shell); + assert!( + result + .summary + .has_note(EvidenceNote::ForegroundReturnedToShell) + ); + assert!(!result.summary.stale_report); + } + + #[test] + fn freeze_keeps_previous_raw() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.screen.freeze = true; + snapshot.screen.state = Some(RawAgentState::Blocked); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Working); + assert_eq!(result.winner, EvidenceWinner::Freeze); + } + + #[test] + fn pending_permission_blocks_immediately() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.authority = Some(authority(RawAgentState::Blocked, true, now)); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Blocked); + assert_eq!(result.winner, EvidenceWinner::Blocked); + } + + #[test] + fn fresh_screen_blocker_overrides_non_blocked_authority() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.authority = Some(authority( + RawAgentState::Working, + false, + now.checked_sub(Duration::from_secs(1)).unwrap(), + )); + snapshot.screen.state = Some(RawAgentState::Blocked); + snapshot.screen.strong = true; + snapshot.screen.observed_at = now; + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Blocked); + assert_eq!(result.winner, EvidenceWinner::Blocked); + } + + #[test] + fn stale_screen_blocker_does_not_override_fresher_authority() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.authority = Some(authority(RawAgentState::Working, false, now)); + snapshot.screen.state = Some(RawAgentState::Blocked); + snapshot.screen.strong = true; + snapshot.screen.observed_at = now.checked_sub(Duration::from_secs(1)).unwrap(); + + let result = arbitrate(&snapshot, RawAgentState::Idle, now); + + assert_eq!(result.raw, RawAgentState::Working); + assert_eq!(result.winner, EvidenceWinner::Authority); + } + + #[test] + fn fresh_authority_wins_after_blocker_checks() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.authority = Some(authority(RawAgentState::Working, false, now)); + + let result = arbitrate(&snapshot, RawAgentState::Idle, now); + + assert_eq!(result.raw, RawAgentState::Working); + assert_eq!(result.winner, EvidenceWinner::Authority); + } + + #[test] + fn direct_state_report_is_lower_confidence_than_runtime_event_authority() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + let mut direct_report = authority(RawAgentState::Working, false, now); + direct_report.direct_state_report = true; + snapshot.authority = Some(direct_report); + + let result = arbitrate(&snapshot, RawAgentState::Idle, now); + + assert_eq!(result.raw, RawAgentState::Working); + assert_eq!(result.winner, EvidenceWinner::Authority); + assert_eq!(result.confidence, AgentStatusConfidence::Strong); + } + + #[test] + fn expired_authority_leaves_note_and_falls_back_unknown() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.authority = Some(authority( + RawAgentState::Working, + false, + now.checked_sub(AUTHORITY_TTL + Duration::from_secs(1)) + .unwrap(), + )); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Unknown); + assert!(result.summary.stale_report); + assert!(result.summary.has_note(EvidenceNote::AuthorityExpired)); + } + + #[test] + fn identity_mismatch_leaves_note_and_rejects_authority() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.authority = Some(authority(RawAgentState::Working, false, now)); + snapshot.process.foreground_is_agent = false; + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Unknown); + assert!(result.summary.stale_report); + assert!( + result + .summary + .has_note(EvidenceNote::AuthorityIdentityMismatch) + ); + } + + #[test] + fn strong_screen_idle_wins_without_authority() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.screen.state = Some(RawAgentState::Idle); + snapshot.screen.strong = true; + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Idle); + assert_eq!(result.winner, EvidenceWinner::StrongVisualOrOsc); + } + + #[test] + fn osc_progress_clear_is_idle_hint() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.osc.progress_cleared_at = Some(now); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Idle); + assert_eq!(result.winner, EvidenceWinner::StrongVisualOrOsc); + assert!( + !result.summary.shell_integration, + "agent-authored progress-clear must not be attributed to shell integration" + ); + } + + #[test] + fn osc_shell_marker_is_shell_integration_evidence() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.osc.shell_state = Some(RawAgentState::Idle); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Idle); + assert_eq!(result.winner, EvidenceWinner::StrongVisualOrOsc); + assert!(result.summary.shell_integration); + } + + #[test] + fn osc_progress_clear_is_ignored_when_foreground_is_not_agent() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.process.foreground_is_agent = false; + snapshot.osc.progress_cleared_at = Some(now); + + let result = arbitrate(&snapshot, RawAgentState::Working, now); + + assert_eq!(result.raw, RawAgentState::Unknown); + assert_eq!(result.winner, EvidenceWinner::Unknown); + } + + #[test] + fn physics_only_promotes_to_weak_working() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.process.child_process_count = 1; + + let result = arbitrate(&snapshot, RawAgentState::Unknown, now); + + assert_eq!(result.raw, RawAgentState::Working); + assert_eq!(result.confidence, AgentStatusConfidence::Weak); + assert_eq!(result.winner, EvidenceWinner::Physics); + } + + #[test] + fn no_evidence_is_unknown() { + let now = Instant::now(); + let mut snapshot = base_snapshot(now); + snapshot.process.foreground_is_agent = false; + + let result = arbitrate(&snapshot, RawAgentState::Unknown, now); + + assert_eq!(result.raw, RawAgentState::Unknown); + assert_eq!(result.winner, EvidenceWinner::Unknown); + } + + #[test] + fn rollup_priority_matches_contract() { + let states = [AgentState::Idle, AgentState::Working, AgentState::Done]; + assert_eq!(roll_up_states(states.iter()), AgentState::Done); + } +} diff --git a/crates/jackin-capsule/src/agent_status/evidence.rs b/crates/jackin-capsule/src/agent_status/evidence.rs new file mode 100644 index 000000000..9cf797a2e --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/evidence.rs @@ -0,0 +1,209 @@ +use std::time::Instant; + +pub use jackin_protocol::agent_status::AgentRawState as RawAgentState; +use jackin_protocol::agent_status::AgentStatusConfidence; + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct EvidenceSnapshot { + pub authority: Option, + pub osc: OscEvidence, + pub screen: ScreenEvidence, + pub process: ProcessEvidence, + pub activity: ActivityEvidence, + pub subagents_active: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AuthorityEvidence { + pub source_id: String, + pub grade: AuthorityGrade, + pub direct_state_report: bool, + pub mapped_state: RawAgentState, + pub pending_permission: bool, + pub last_event: Instant, + pub seq: u64, + pub notes: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AuthorityGrade { + Complete, + Partial, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct OscEvidence { + pub title: Option, + pub title_changed_at: Option, + pub notify_edge_at: Option, + pub progress_active: bool, + pub progress_cleared_at: Option, + pub bel_at: Option, + pub bel_count: u64, + pub shell_state: Option, + pub shell_mark_at: Option, +} + +impl OscEvidence { + pub fn clear_agent_signals(&mut self) { + self.title = None; + self.title_changed_at = None; + self.notify_edge_at = None; + self.progress_active = false; + self.progress_cleared_at = None; + self.bel_at = None; + self.bel_count = 0; + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ScreenEvidence { + pub state: Option, + pub rule_id: Option, + pub strong: bool, + pub freeze: bool, + pub observed_at: Instant, +} + +impl Default for ScreenEvidence { + fn default() -> Self { + Self { + state: None, + rule_id: None, + strong: false, + freeze: false, + observed_at: Instant::now(), + } + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct ProcessEvidence { + pub process_exited: bool, + pub foreground_returned_to_shell: bool, + pub child_alive: bool, + pub root_is_agent: bool, + pub foreground_is_agent: bool, + pub foreground_pgid: Option, + pub child_process_count: u32, + pub cpu_jiffies_delta: u64, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct ActivityEvidence { + pub last_output: Option, + pub last_input: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EvidenceSummary { + pub raw_state: RawAgentState, + pub confidence: AgentStatusConfidence, + pub winner: EvidenceWinner, + pub rule_id: Option, + pub authority_source: Option, + pub foreground_pgid: Option, + pub last_output: Option, + pub last_input: Option, + pub child_process_count: u32, + pub cpu_jiffies_delta: u64, + pub subagents_active: u32, + pub osc_progress_active: bool, + pub shell_integration: bool, + pub visible_blocker: bool, + pub visible_idle: bool, + pub visible_working: bool, + pub process_exited: bool, + pub foreground_returned_to_shell: bool, + pub root_is_agent: bool, + pub stale_report: bool, + pub notes: Vec, +} + +impl Default for EvidenceSummary { + fn default() -> Self { + Self { + raw_state: RawAgentState::Unknown, + confidence: AgentStatusConfidence::Unknown, + winner: EvidenceWinner::Unknown, + rule_id: None, + authority_source: None, + foreground_pgid: None, + last_output: None, + last_input: None, + child_process_count: 0, + cpu_jiffies_delta: 0, + subagents_active: 0, + osc_progress_active: false, + shell_integration: false, + visible_blocker: false, + visible_idle: false, + visible_working: false, + process_exited: false, + foreground_returned_to_shell: false, + root_is_agent: false, + stale_report: false, + notes: Vec::new(), + } + } +} + +impl EvidenceSummary { + pub fn has_note(&self, target: EvidenceNote) -> bool { + self.notes.iter().any(|note| note == &target) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EvidenceWinner { + ProcessExit, + Freeze, + Blocked, + Authority, + StrongVisualOrOsc, + Physics, + Unknown, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum EvidenceNote { + WatchdogDemoted, + AuthorityExpired, + AuthorityIdentityMismatch, + StopSuppressed, + ProcessExited, + ForegroundReturnedToShell, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn clearing_agent_osc_signals_preserves_shell_markers() { + let now = Instant::now(); + let mut evidence = OscEvidence { + title: Some("Codex working".to_owned()), + title_changed_at: Some(now), + notify_edge_at: Some(now), + progress_active: true, + progress_cleared_at: Some(now), + bel_at: Some(now), + bel_count: 2, + shell_state: Some(RawAgentState::Idle), + shell_mark_at: Some(now), + }; + + evidence.clear_agent_signals(); + + assert_eq!(evidence.title, None); + assert_eq!(evidence.title_changed_at, None); + assert_eq!(evidence.notify_edge_at, None); + assert!(!evidence.progress_active); + assert_eq!(evidence.progress_cleared_at, None); + assert_eq!(evidence.bel_at, None); + assert_eq!(evidence.bel_count, 0); + assert_eq!(evidence.shell_state, Some(RawAgentState::Idle)); + assert_eq!(evidence.shell_mark_at, Some(now)); + } +} diff --git a/crates/jackin-capsule/src/agent_status/gating.rs b/crates/jackin-capsule/src/agent_status/gating.rs new file mode 100644 index 000000000..9c2babebf --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/gating.rs @@ -0,0 +1,314 @@ +use crate::agent_status::evidence::{EvidenceNote, RawAgentState}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RuntimeEvent { + pub runtime: String, + pub event: String, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct SourceGateState { + pub pending_permission: bool, + pub subagents_active: u32, + pub notes: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum GateEffect { + Authority { + state: RawAgentState, + pending_permission: bool, + subagents_active: u32, + notes: Vec, + }, + Heartbeat, + Clear, + CounterOnly { + subagents_active: u32, + }, + Ignore, +} + +pub fn map_event(event: &RuntimeEvent, state: &mut SourceGateState) -> GateEffect { + let Some(canonical) = canonical_event(event.runtime.as_str(), event.event.as_str()) else { + return GateEffect::Ignore; + }; + match canonical { + "prompt-submitted" | "tool-start" | "tool-end" | "compact-start" => { + authority(RawAgentState::Working, state, Vec::new()) + } + "permission-requested" | "question-asked" | "elicitation" => { + state.pending_permission = true; + authority(RawAgentState::Blocked, state, Vec::new()) + } + "permission-resolved" | "question-answered" => { + state.pending_permission = false; + authority(RawAgentState::Working, state, Vec::new()) + } + "turn-complete" | "stop" => { + if state.pending_permission { + authority( + RawAgentState::Blocked, + state, + vec![EvidenceNote::StopSuppressed], + ) + } else if state.subagents_active > 0 { + authority( + RawAgentState::Working, + state, + vec![EvidenceNote::StopSuppressed], + ) + } else { + authority(RawAgentState::Idle, state, Vec::new()) + } + } + "subagent-start" => { + state.subagents_active = state.subagents_active.saturating_add(1); + GateEffect::CounterOnly { + subagents_active: state.subagents_active, + } + } + "subagent-stop" => { + state.subagents_active = state.subagents_active.saturating_sub(1); + GateEffect::CounterOnly { + subagents_active: state.subagents_active, + } + } + "session-end" | "agent-exit" => GateEffect::Clear, + "heartbeat" => GateEffect::Heartbeat, + _ => GateEffect::Ignore, + } +} + +fn authority( + state: RawAgentState, + gate: &mut SourceGateState, + notes: Vec, +) -> GateEffect { + gate.notes = notes.clone(); + GateEffect::Authority { + state, + pending_permission: gate.pending_permission, + subagents_active: gate.subagents_active, + notes, + } +} + +fn canonical_event(runtime: &str, event: &str) -> Option<&'static str> { + let normalized = event.trim(); + match normalized { + "prompt-submitted" => Some("prompt-submitted"), + "tool-start" => Some("tool-start"), + "tool-end" => Some("tool-end"), + "compact-start" => Some("compact-start"), + "permission-requested" => Some("permission-requested"), + "question-asked" => Some("question-asked"), + "elicitation" => Some("elicitation"), + "permission-resolved" => Some("permission-resolved"), + "question-answered" => Some("question-answered"), + "turn-complete" => Some("turn-complete"), + "stop" => Some("stop"), + "subagent-start" => Some("subagent-start"), + "subagent-stop" => Some("subagent-stop"), + "session-end" => Some("session-end"), + "agent-exit" => Some("agent-exit"), + "heartbeat" => Some("heartbeat"), + _ => canonical_vendor_event(runtime, normalized), + } +} + +fn canonical_vendor_event(runtime: &str, event: &str) -> Option<&'static str> { + match (runtime, event) { + ("claude" | "codex", "UserPromptSubmit") => Some("prompt-submitted"), + ("claude" | "codex", "PreToolUse") => Some("tool-start"), + ("claude" | "codex", "PostToolUse") | ("claude", "PostToolUseFailure") => Some("tool-end"), + ("claude" | "codex", "PermissionRequest") => Some("permission-requested"), + ("claude", "PermissionDenied") => Some("permission-resolved"), + ("claude", "Notification:permission_prompt" | "Notification:elicitation_dialog") => { + Some("permission-requested") + } + ("claude", "Notification:idle_prompt" | "Notification:auth_success") => Some("heartbeat"), + ("claude" | "codex", "Stop") | ("claude", "StopFailure") => Some("turn-complete"), + ("claude" | "codex", "SubagentStart") => Some("subagent-start"), + ("claude" | "codex", "SubagentStop") => Some("subagent-stop"), + ("claude", "SessionEnd") => Some("agent-exit"), + ("opencode", "session.status" | "tool.execute.before") => Some("tool-start"), + ("opencode", "tool.execute.after") => Some("tool-end"), + ("opencode", "session.idle") => Some("turn-complete"), + ("opencode", "permission.asked") => Some("permission-requested"), + ("opencode", "permission.replied") => Some("permission-resolved"), + ("opencode", "session.error") => Some("agent-exit"), + ("amp", "agent.start" | "tool.call") => Some("tool-start"), + ("amp", "tool.result") => Some("tool-end"), + ("amp", "agent.end") => Some("turn-complete"), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn event(runtime: &str, event: &str) -> RuntimeEvent { + RuntimeEvent { + runtime: runtime.to_owned(), + event: event.to_owned(), + } + } + + fn authority_state(effect: GateEffect) -> RawAgentState { + match effect { + GateEffect::Authority { state, .. } => state, + other => panic!("expected authority effect, got {other:?}"), + } + } + + fn canonical_turn(runtime: &str) -> &'static [&'static str] { + match runtime { + "claude" => &[ + "UserPromptSubmit", + "PreToolUse", + "PermissionRequest", + "PermissionDenied", + "PostToolUse", + "Stop", + ], + "codex" => &[ + "UserPromptSubmit", + "PreToolUse", + "PermissionRequest", + "permission-resolved", + "PostToolUse", + "Stop", + ], + "opencode" => &[ + "session.status", + "tool.execute.before", + "permission.asked", + "permission.replied", + "tool.execute.after", + "session.idle", + ], + "amp" => &[ + "agent.start", + "tool.call", + "permission-requested", + "permission-resolved", + "tool.result", + "agent.end", + ], + other => panic!("missing recorded turn for {other}"), + } + } + + #[test] + fn recorded_runtime_turn_sequences_map_to_expected_states() { + for runtime in ["claude", "codex", "opencode", "amp"] { + let mut state = SourceGateState::default(); + let observed = canonical_turn(runtime) + .iter() + .map(|name| authority_state(map_event(&event(runtime, name), &mut state))) + .collect::>(); + + assert_eq!( + observed, + vec![ + RawAgentState::Working, + RawAgentState::Working, + RawAgentState::Blocked, + RawAgentState::Working, + RawAgentState::Working, + RawAgentState::Idle, + ], + "runtime={runtime}" + ); + } + } + + #[test] + fn permission_stop_stays_blocked_until_resolved() { + let mut state = SourceGateState::default(); + assert_eq!( + map_event(&event("claude", "PermissionRequest"), &mut state), + GateEffect::Authority { + state: RawAgentState::Blocked, + pending_permission: true, + subagents_active: 0, + notes: Vec::new(), + } + ); + assert_eq!( + map_event(&event("claude", "Stop"), &mut state), + GateEffect::Authority { + state: RawAgentState::Blocked, + pending_permission: true, + subagents_active: 0, + notes: vec![EvidenceNote::StopSuppressed], + } + ); + } + + #[test] + fn permission_resolved_unblocks_to_working() { + let mut state = SourceGateState { + pending_permission: true, + subagents_active: 0, + notes: Vec::new(), + }; + assert_eq!( + map_event(&event("claude", "PermissionDenied"), &mut state), + GateEffect::Authority { + state: RawAgentState::Working, + pending_permission: false, + subagents_active: 0, + notes: Vec::new(), + } + ); + } + + #[test] + fn stop_with_live_subagent_stays_working() { + let mut state = SourceGateState::default(); + assert!(matches!( + map_event(&event("claude", "SubagentStart"), &mut state), + GateEffect::CounterOnly { + subagents_active: 1 + } + )); + assert_eq!( + map_event(&event("claude", "Stop"), &mut state), + GateEffect::Authority { + state: RawAgentState::Working, + pending_permission: false, + subagents_active: 1, + notes: vec![EvidenceNote::StopSuppressed], + } + ); + } + + #[test] + fn claude_idle_notification_is_not_blocked() { + let mut state = SourceGateState::default(); + assert_eq!( + map_event(&event("claude", "Notification:idle_prompt"), &mut state), + GateEffect::Heartbeat + ); + assert!(!state.pending_permission); + } + + #[test] + fn claude_permission_notification_blocks() { + let mut state = SourceGateState::default(); + assert!(matches!( + map_event( + &event("claude", "Notification:permission_prompt"), + &mut state + ), + GateEffect::Authority { + state: RawAgentState::Blocked, + pending_permission: true, + .. + } + )); + } +} diff --git a/crates/jackin-capsule/src/agent_status/hook_installer.rs b/crates/jackin-capsule/src/agent_status/hook_installer.rs new file mode 100644 index 000000000..79144dc0a --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/hook_installer.rs @@ -0,0 +1,526 @@ +//! Hook/plugin installer for runtime-specific status reporters. +//! +//! Each built-in agent runtime has a dedicated installer that writes the +//! hook/plugin configuration into the container-local agent home and verifies +//! it matches the expected content. Drift is repaired on every session launch. + +use std::fs; +use std::io::Write as _; +use std::path::Path; + +/// Interface for a runtime-specific hook/plugin installer. +pub trait HookInstaller { + /// Install hook/plugin assets into `agent_home`. Creates any missing + /// directories and files; repairs stale configuration atomically via + /// tmp-file + rename. + fn install(&self, agent_home: &Path) -> anyhow::Result<()>; + + /// Verify that the current state of `agent_home` matches the expected + /// hook/plugin configuration. Returns `true` when no repair is needed. + fn verify(&self, agent_home: &Path) -> bool; +} + +/// Hook installer for Claude Code. +/// +/// Installs `/home/agent/.claude/settings.json` entries that register the +/// jackin status reporter for every relevant Claude hook event. +#[derive(Debug)] +pub struct ClaudeHookInstaller { + /// Path to the hook script inside the container. + pub hook_script_path: String, +} + +impl Default for ClaudeHookInstaller { + fn default() -> Self { + Self { + hook_script_path: "/jackin/runtime/agent-status/hooks/claude/report-hook.sh".to_owned(), + } + } +} + +impl HookInstaller for ClaudeHookInstaller { + fn install(&self, agent_home: &Path) -> anyhow::Result<()> { + let settings_path = agent_home.join(".claude").join("settings.json"); + if let Some(parent) = settings_path.parent() { + fs::create_dir_all(parent)?; + } + + // Read existing settings.json if present; start from empty object if not. + let existing: serde_json::Value = if settings_path.exists() { + let content = fs::read_to_string(&settings_path)?; + serde_json::from_str(&content).unwrap_or(serde_json::json!({})) + } else { + serde_json::json!({}) + }; + + let updated = self.merge_hook_entries(existing); + + // Atomic write via tmp-file + rename to avoid partial writes. + let tmp = settings_path.with_extension("json.tmp"); + { + let mut f = fs::File::create(&tmp)?; + serde_json::to_writer_pretty(&mut f, &updated)?; + f.flush()?; + } + fs::rename(&tmp, &settings_path)?; + + Ok(()) + } + + fn verify(&self, agent_home: &Path) -> bool { + let settings_path = agent_home.join(".claude").join("settings.json"); + if !settings_path.exists() { + return false; + } + let Ok(content) = fs::read_to_string(&settings_path) else { + return false; + }; + let Ok(val) = serde_json::from_str::(&content) else { + return false; + }; + self.hooks_are_present(&val) + } +} + +impl ClaudeHookInstaller { + fn command_for_event(&self, event: &str) -> String { + format!("{} --event {event}", self.hook_script_path) + } + + fn hook_entry(&self, event: &str, async_flag: bool) -> serde_json::Value { + serde_json::json!({ + "matcher": "", + "hooks": [{ + "type": "command", + "command": self.command_for_event(event), + "async": async_flag + }] + }) + } + + /// Returns the expected hooks configuration as a mapping of event name + /// to `async_flag`. + fn expected_events(&self) -> Vec<(&'static str, bool)> { + vec![ + ("UserPromptSubmit", true), + ("PreToolUse", true), + ("PostToolUse", true), + ("PostToolUseFailure", true), + // PermissionRequest is synchronous so Claude reads the continue ack. + ("PermissionRequest", false), + ("PermissionDenied", true), + ("Notification", true), + ("Stop", true), + ("StopFailure", true), + ("SubagentStart", true), + ("SubagentStop", true), + ("SessionEnd", true), + ] + } + + fn merge_hook_entries(&self, mut settings: serde_json::Value) -> serde_json::Value { + let hooks = settings + .as_object_mut() + .map(|obj| { + obj.entry("hooks") + .or_insert_with(|| serde_json::json!({})) + .as_object_mut() + .cloned() + .unwrap_or_default() + }) + .unwrap_or_default(); + + let mut hooks_obj = serde_json::Map::new(); + // Preserve all existing entries first. + for (k, v) in &hooks { + hooks_obj.insert(k.clone(), v.clone()); + } + + // Install or repair only our command entry inside each event array. + for (event, async_flag) in self.expected_events() { + let expected_command = self.command_for_event(event); + let mut entries = hooks_obj + .remove(event) + .and_then(|value| value.as_array().cloned()) + .unwrap_or_default(); + let mut repaired = false; + for entry in &mut entries { + let Some(hooks) = entry + .get_mut("hooks") + .and_then(|hooks| hooks.as_array_mut()) + else { + continue; + }; + for hook in hooks { + if hook + .get("command") + .and_then(serde_json::Value::as_str) + .is_some_and(|command| command.starts_with(&self.hook_script_path)) + { + if let Some(obj) = hook.as_object_mut() { + obj.insert("async".to_owned(), serde_json::Value::Bool(async_flag)); + obj.insert( + "type".to_owned(), + serde_json::Value::String("command".to_owned()), + ); + obj.insert( + "command".to_owned(), + serde_json::Value::String(expected_command.clone()), + ); + } + repaired = true; + } + } + } + if !repaired { + entries.push(self.hook_entry(event, async_flag)); + } + hooks_obj.insert(event.to_owned(), serde_json::Value::Array(entries)); + } + + if let Some(obj) = settings.as_object_mut() { + obj.insert("hooks".to_owned(), serde_json::Value::Object(hooks_obj)); + } + settings + } + + fn hooks_are_present(&self, settings: &serde_json::Value) -> bool { + let Some(hooks) = settings.get("hooks").and_then(|h| h.as_object()) else { + return false; + }; + for (event, async_flag) in self.expected_events() { + let expected_command = self.command_for_event(event); + let Some(arr) = hooks.get(event).and_then(|v| v.as_array()) else { + return false; + }; + // Check that at least one entry has our command with the correct async flag. + let found = arr.iter().any(|entry| { + let inner = entry.get("hooks").and_then(|h| h.as_array()); + inner.is_some_and(|inner_hooks| { + inner_hooks.iter().any(|h| { + h.get("command") + .and_then(|c| c.as_str()) + .is_some_and(|c| c == expected_command) + && h.get("async") + .and_then(serde_json::Value::as_bool) + .is_some_and(|a| a == async_flag) + }) + }) + }); + if !found { + return false; + } + } + true + } +} + +/// Installer for Amp plugin reporter. +#[derive(Debug)] +pub struct AmpPluginInstaller { + pub plugin_path: String, +} + +impl Default for AmpPluginInstaller { + fn default() -> Self { + Self { + plugin_path: "/jackin/runtime/agent-status/hooks/amp/plugin.js".to_owned(), + } + } +} + +impl HookInstaller for AmpPluginInstaller { + fn install(&self, agent_home: &Path) -> anyhow::Result<()> { + let config_path = agent_home.join(".config").join("amp").join("plugins.json"); + write_json_file( + &config_path, + &serde_json::json!({ + "plugins": [self.plugin_path] + }), + )?; + Ok(()) + } + + fn verify(&self, agent_home: &Path) -> bool { + let config_path = agent_home.join(".config").join("amp").join("plugins.json"); + json_file_contains_string(&config_path, &self.plugin_path) + } +} + +/// Installer for Codex hook reporter. +#[derive(Debug)] +pub struct CodexHookInstaller { + pub hook_script_path: String, +} + +impl Default for CodexHookInstaller { + fn default() -> Self { + Self { + hook_script_path: "/jackin/runtime/agent-status/hooks/codex/report-hook.sh".to_owned(), + } + } +} + +impl HookInstaller for CodexHookInstaller { + fn install(&self, agent_home: &Path) -> anyhow::Result<()> { + let hooks_path = agent_home.join(".codex").join("hooks.json"); + write_json_file(&hooks_path, &self.hooks_json()) + } + + fn verify(&self, agent_home: &Path) -> bool { + let hooks_path = agent_home.join(".codex").join("hooks.json"); + json_file_contains_string(&hooks_path, &self.hook_script_path) + } +} + +impl CodexHookInstaller { + fn hooks_json(&self) -> serde_json::Value { + let command = |event: &str| format!("{} --event {event}", self.hook_script_path); + serde_json::json!({ + "hooks": { + "UserPromptSubmit": [{ "command": command("UserPromptSubmit") }], + "PreToolUse": [{ "command": command("PreToolUse") }], + "PermissionRequest": [{ "command": command("PermissionRequest") }], + "PostToolUse": [{ "command": command("PostToolUse") }], + "SubagentStart": [{ "command": command("SubagentStart") }], + "SubagentStop": [{ "command": command("SubagentStop") }], + "Stop": [{ "command": command("Stop") }] + }, + "notify": format!("{} --event turn-complete", self.hook_script_path) + }) + } +} + +/// Installer for `OpenCode` plugin reporter. +#[derive(Debug)] +pub struct OpenCodePluginInstaller { + pub plugin_path: String, +} + +impl Default for OpenCodePluginInstaller { + fn default() -> Self { + Self { + plugin_path: "/jackin/runtime/agent-status/hooks/opencode/plugin.js".to_owned(), + } + } +} + +impl HookInstaller for OpenCodePluginInstaller { + fn install(&self, agent_home: &Path) -> anyhow::Result<()> { + let config_path = agent_home + .join(".config") + .join("opencode") + .join("plugins.json"); + write_json_file( + &config_path, + &serde_json::json!({ + "plugins": [self.plugin_path] + }), + ) + } + + fn verify(&self, agent_home: &Path) -> bool { + let config_path = agent_home + .join(".config") + .join("opencode") + .join("plugins.json"); + json_file_contains_string(&config_path, &self.plugin_path) + } +} + +fn write_json_file(path: &Path, value: &serde_json::Value) -> anyhow::Result<()> { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + let tmp = path.with_extension("json.tmp"); + { + let mut file = fs::File::create(&tmp)?; + serde_json::to_writer_pretty(&mut file, value)?; + file.flush()?; + } + fs::rename(tmp, path)?; + Ok(()) +} + +fn json_file_contains_string(path: &Path, needle: &str) -> bool { + fs::read_to_string(path) + .ok() + .is_some_and(|content| content.contains(needle)) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn installer() -> ClaudeHookInstaller { + ClaudeHookInstaller::default() + } + + #[test] + fn claude_hook_installer_writes_settings_json() { + let dir = TempDir::new().unwrap(); + let home = dir.path().to_path_buf(); + installer().install(&home).unwrap(); + let settings_path = home.join(".claude").join("settings.json"); + assert!(settings_path.exists()); + let content = fs::read_to_string(&settings_path).unwrap(); + let val: serde_json::Value = serde_json::from_str(&content).unwrap(); + assert!(val.get("hooks").is_some()); + assert!(installer().verify(&home)); + } + + #[test] + fn claude_hook_installer_repairs_stale_async_flag() { + let dir = TempDir::new().unwrap(); + let home = dir.path().to_path_buf(); + let claude_dir = home.join(".claude"); + fs::create_dir_all(&claude_dir).unwrap(); + // Write settings.json with wrong async flag on PermissionRequest. + let bad_settings = serde_json::json!({ + "hooks": { + "PermissionRequest": [{"matcher":"","hooks":[{"type":"command","command":"/jackin/runtime/agent-status/hooks/claude/report-hook.sh","async":true}]}] + } + }); + fs::write( + claude_dir.join("settings.json"), + serde_json::to_string_pretty(&bad_settings).unwrap(), + ) + .unwrap(); + // Verify fails (PermissionRequest has wrong async flag). + assert!(!installer().verify(&home)); + // Install repairs it. + installer().install(&home).unwrap(); + assert!(installer().verify(&home)); + } + + #[test] + fn claude_stop_hook_is_async_and_permission_request_is_sync() { + let dir = tempfile::tempdir().unwrap(); + let home = dir.path().to_path_buf(); + installer().install(&home).unwrap(); + let settings_path = home.join(".claude").join("settings.json"); + let content = fs::read_to_string(&settings_path).unwrap(); + let val: serde_json::Value = serde_json::from_str(&content).unwrap(); + let hooks = val.get("hooks").and_then(|h| h.as_object()).unwrap(); + + // Stop must be async: true; observability must not mutate agent flow. + let stop_entries = hooks.get("Stop").and_then(|v| v.as_array()).unwrap(); + let stop_hook = &stop_entries[0]["hooks"][0]; + assert_eq!( + stop_hook.get("async").and_then(serde_json::Value::as_bool), + Some(true), + "Stop hook must be async: true" + ); + + // PermissionRequest must also be async: false. + let perm_entries = hooks + .get("PermissionRequest") + .and_then(|v| v.as_array()) + .unwrap(); + let perm_hook = &perm_entries[0]["hooks"][0]; + assert_eq!( + perm_hook.get("async").and_then(serde_json::Value::as_bool), + Some(false), + "PermissionRequest hook must be async: false" + ); + + // The hook script path matches our expected path. + assert_eq!( + stop_hook.get("command").and_then(|v| v.as_str()), + Some("/jackin/runtime/agent-status/hooks/claude/report-hook.sh --event Stop") + ); + assert!(hooks.get("Notification").is_some()); + assert!(hooks.get("SessionEnd").is_some()); + assert!(hooks.get("TaskCreated").is_none()); + assert!(hooks.get("TaskCompleted").is_none()); + } + + #[test] + fn codex_notify_reports_turn_complete() { + let dir = TempDir::new().unwrap(); + let home = dir.path().to_path_buf(); + CodexHookInstaller::default().install(&home).unwrap(); + let hooks_path = home.join(".codex").join("hooks.json"); + let content = fs::read_to_string(hooks_path).unwrap(); + let val: serde_json::Value = serde_json::from_str(&content).unwrap(); + + assert_eq!( + val.get("notify").and_then(serde_json::Value::as_str), + Some("/jackin/runtime/agent-status/hooks/codex/report-hook.sh --event turn-complete") + ); + assert_eq!( + val.pointer("/hooks/PermissionRequest/0/command") + .and_then(serde_json::Value::as_str), + Some( + "/jackin/runtime/agent-status/hooks/codex/report-hook.sh --event PermissionRequest" + ) + ); + } + + #[test] + fn claude_hook_installer_preserves_unrelated_settings() { + let dir = TempDir::new().unwrap(); + let home = dir.path().to_path_buf(); + let claude_dir = home.join(".claude"); + fs::create_dir_all(&claude_dir).unwrap(); + let existing = serde_json::json!({ + "model": "claude-sonnet-4-6", + "someOtherKey": 42 + }); + fs::write( + claude_dir.join("settings.json"), + serde_json::to_string_pretty(&existing).unwrap(), + ) + .unwrap(); + installer().install(&home).unwrap(); + let content = fs::read_to_string(claude_dir.join("settings.json")).unwrap(); + let val: serde_json::Value = serde_json::from_str(&content).unwrap(); + assert_eq!( + val.get("model").and_then(|v| v.as_str()), + Some("claude-sonnet-4-6") + ); + assert_eq!( + val.get("someOtherKey").and_then(serde_json::Value::as_i64), + Some(42) + ); + } + + #[test] + fn claude_hook_installer_preserves_unrelated_hook_entries() { + let dir = TempDir::new().unwrap(); + let home = dir.path().to_path_buf(); + let claude_dir = home.join(".claude"); + fs::create_dir_all(&claude_dir).unwrap(); + let existing = serde_json::json!({ + "hooks": { + "PreToolUse": [{ + "matcher": "Bash", + "hooks": [{ + "type": "command", + "command": "/role/hook.sh", + "async": true + }] + }] + } + }); + fs::write( + claude_dir.join("settings.json"), + serde_json::to_string_pretty(&existing).unwrap(), + ) + .unwrap(); + + installer().install(&home).unwrap(); + + let content = fs::read_to_string(claude_dir.join("settings.json")).unwrap(); + let val: serde_json::Value = serde_json::from_str(&content).unwrap(); + let entries = val["hooks"]["PreToolUse"].as_array().unwrap(); + assert!( + entries + .iter() + .any(|entry| entry["hooks"][0]["command"] == "/role/hook.sh") + ); + assert!(entries.iter().any(|entry| entry["hooks"][0]["command"] + == "/jackin/runtime/agent-status/hooks/claude/report-hook.sh --event PreToolUse")); + } +} diff --git a/crates/jackin-capsule/src/agent_status/policy.rs b/crates/jackin-capsule/src/agent_status/policy.rs new file mode 100644 index 000000000..50feb2989 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/policy.rs @@ -0,0 +1,267 @@ +use std::time::{Duration, Instant}; + +use jackin_protocol::agent_status::AgentStatusConfidence; + +use crate::agent_status::arbitrate::ArbitrationResult; +use crate::agent_status::evidence::{EvidenceNote, EvidenceWinner, RawAgentState}; +use crate::protocol::AgentState; + +pub const AUTHORITY_TTL: Duration = Duration::from_secs(30); +pub const WATCHDOG_QUIET: Duration = Duration::from_secs(10); +pub const IDLE_CONFIRMATIONS: u8 = 3; +pub const STARTUP_GRACE: Duration = Duration::from_secs(3); +pub const CPU_SAMPLE_WINDOW: Duration = Duration::from_secs(2); +pub const RENOTIFY_INTERVAL: Duration = Duration::from_mins(5); +pub const EVAL_COALESCE: Duration = Duration::from_millis(250); + +#[derive(Debug, Clone, Default)] +pub struct PendingTransition { + pub candidate: Option, + pub confirmations: u8, +} + +pub fn debounce( + prev_public: AgentState, + candidate: &ArbitrationResult, + pending: &mut PendingTransition, + _now: Instant, +) -> Option { + let next = public_state_for_raw(prev_public, candidate.raw); + match next { + AgentState::Blocked | AgentState::Working => publish_if_changed(prev_public, next), + AgentState::Idle | AgentState::Done + if candidate.confidence >= AgentStatusConfidence::Strong => + { + pending.candidate = None; + pending.confirmations = 0; + publish_if_changed(prev_public, next) + } + AgentState::Idle | AgentState::Done => { + if pending.candidate == Some(next) { + pending.confirmations = pending.confirmations.saturating_add(1); + } else { + pending.candidate = Some(next); + pending.confirmations = 1; + } + if pending.confirmations >= IDLE_CONFIRMATIONS { + publish_if_changed(prev_public, next) + } else { + None + } + } + AgentState::Unknown => publish_if_changed(prev_public, next), + } +} + +pub fn should_publish_candidate( + prev_public: AgentState, + candidate: &ArbitrationResult, + pending: &mut PendingTransition, +) -> bool { + if candidate.winner == EvidenceWinner::ProcessExit { + pending.candidate = None; + pending.confirmations = 0; + return true; + } + let next = public_state_for_raw(prev_public, candidate.raw); + match next { + AgentState::Idle | AgentState::Done + if candidate.confidence < AgentStatusConfidence::Strong => + { + if candidate.summary.osc_progress_active || candidate.summary.cpu_jiffies_delta > 0 { + pending.candidate = None; + pending.confirmations = 0; + return false; + } + if pending.candidate == Some(next) { + pending.confirmations = pending.confirmations.saturating_add(1); + } else { + pending.candidate = Some(next); + pending.confirmations = 1; + } + pending.confirmations >= IDLE_CONFIRMATIONS + } + _ => { + pending.candidate = None; + pending.confirmations = 0; + true + } + } +} + +pub fn apply_watchdog(mut candidate: ArbitrationResult, now: Instant) -> ArbitrationResult { + if candidate.raw != RawAgentState::Working { + return candidate; + } + if candidate + .summary + .notes + .iter() + .any(|note| matches!(note, EvidenceNote::WatchdogDemoted)) + { + return candidate; + } + let Some(last_output) = candidate.summary.last_output else { + return candidate; + }; + if now.duration_since(last_output) < WATCHDOG_QUIET { + return candidate; + } + if candidate.summary.cpu_jiffies_delta > 0 || candidate.summary.child_process_count > 0 { + return candidate; + } + candidate.raw = RawAgentState::Unknown; + candidate.confidence = AgentStatusConfidence::Unknown; + candidate.winner = EvidenceWinner::Unknown; + candidate.notes.push(EvidenceNote::WatchdogDemoted); + candidate.summary.raw_state = RawAgentState::Unknown; + candidate.summary.confidence = AgentStatusConfidence::Unknown; + candidate.summary.winner = EvidenceWinner::Unknown; + if !candidate + .summary + .notes + .iter() + .any(|note| matches!(note, EvidenceNote::WatchdogDemoted)) + { + candidate.summary.notes.push(EvidenceNote::WatchdogDemoted); + } + candidate +} + +fn publish_if_changed(prev: AgentState, next: AgentState) -> Option { + (prev != next).then_some(next) +} + +fn public_state_for_raw(prev_public: AgentState, raw: RawAgentState) -> AgentState { + match raw { + RawAgentState::Unknown => AgentState::Unknown, + RawAgentState::Working => AgentState::Working, + RawAgentState::Blocked => AgentState::Blocked, + RawAgentState::Idle => { + if matches!(prev_public, AgentState::Working | AgentState::Blocked) { + AgentState::Done + } else { + AgentState::Idle + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent_status::evidence::{EvidenceSummary, EvidenceWinner}; + + fn candidate(raw: RawAgentState, confidence: AgentStatusConfidence) -> ArbitrationResult { + ArbitrationResult { + raw, + confidence, + winner: EvidenceWinner::Unknown, + notes: Vec::new(), + summary: EvidenceSummary { + raw_state: raw, + confidence, + ..Default::default() + }, + } + } + + #[test] + fn watchdog_demotes_quiet_working() { + let now = Instant::now(); + let mut c = candidate(RawAgentState::Working, AgentStatusConfidence::Authoritative); + c.summary.last_output = now.checked_sub(WATCHDOG_QUIET + Duration::from_secs(1)); + + let result = apply_watchdog(c, now); + + assert_eq!(result.raw, RawAgentState::Unknown); + assert!(result.notes.contains(&EvidenceNote::WatchdogDemoted)); + assert!(result.summary.has_note(EvidenceNote::WatchdogDemoted)); + } + + #[test] + fn watchdog_does_not_fire_with_recent_output() { + let now = Instant::now(); + let mut c = candidate(RawAgentState::Working, AgentStatusConfidence::Authoritative); + c.summary.last_output = Some(now); + + let result = apply_watchdog(c, now); + + assert_eq!(result.raw, RawAgentState::Working); + } + + #[test] + fn watchdog_does_not_fire_with_live_child_process() { + let now = Instant::now(); + let mut c = candidate(RawAgentState::Working, AgentStatusConfidence::Authoritative); + c.summary.last_output = now.checked_sub(WATCHDOG_QUIET + Duration::from_secs(1)); + c.summary.child_process_count = 1; + + let result = apply_watchdog(c, now); + + assert_eq!(result.raw, RawAgentState::Working); + } + + #[test] + fn blocked_publishes_immediately() { + let mut pending = PendingTransition::default(); + let result = debounce( + AgentState::Working, + &candidate(RawAgentState::Blocked, AgentStatusConfidence::Strong), + &mut pending, + Instant::now(), + ); + assert_eq!(result, Some(AgentState::Blocked)); + } + + #[test] + fn inferred_idle_needs_three_confirmations() { + let mut pending = PendingTransition::default(); + let c = candidate(RawAgentState::Idle, AgentStatusConfidence::Weak); + assert_eq!( + debounce(AgentState::Working, &c, &mut pending, Instant::now()), + None + ); + assert_eq!( + debounce(AgentState::Working, &c, &mut pending, Instant::now()), + None + ); + assert_eq!( + debounce(AgentState::Working, &c, &mut pending, Instant::now()), + Some(AgentState::Done) + ); + } + + #[test] + fn visible_idle_publishes_immediately() { + let mut pending = PendingTransition::default(); + let result = debounce( + AgentState::Working, + &candidate(RawAgentState::Idle, AgentStatusConfidence::Strong), + &mut pending, + Instant::now(), + ); + assert_eq!(result, Some(AgentState::Done)); + } + + #[test] + fn inferred_idle_publication_is_held_until_confirmed() { + let mut pending = PendingTransition::default(); + let c = candidate(RawAgentState::Idle, AgentStatusConfidence::Weak); + assert!(!should_publish_candidate( + AgentState::Working, + &c, + &mut pending + )); + assert!(!should_publish_candidate( + AgentState::Working, + &c, + &mut pending + )); + assert!(should_publish_candidate( + AgentState::Working, + &c, + &mut pending + )); + } +} diff --git a/crates/jackin-capsule/src/agent_status/process.rs b/crates/jackin-capsule/src/agent_status/process.rs new file mode 100644 index 000000000..d71e2a6c6 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/process.rs @@ -0,0 +1,497 @@ +//! Linux /proc-based foreground process identity detection. +//! +//! Uses the `procfs` crate to read process metadata and determine which +//! agent binary owns the terminal's foreground process group. Called from +//! the 1Hz ticker in `daemon.rs` to validate hook authority and provide +//! a fallback detection signal. + +use std::collections::HashMap; +use std::path::PathBuf; +use std::time::Instant; + +use crate::agent_status::policy::CPU_SAMPLE_WINDOW; + +/// Information about a single process read from /proc. +#[derive(Debug, Clone)] +pub struct ProcessInfo { + pub pid: u32, + /// Process group ID. + pub pgid: u32, + /// Terminal foreground process group ID. + pub tpgid: i32, + /// Command line arguments, split on NUL bytes. + pub cmdline: Vec, + /// Resolved exe symlink path. + pub exe_path: Option, + /// comm field (capped at 15 chars by kernel). + pub comm: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ProcessCpuSample { + pub total_jiffies: u64, + pub sampled_at: Instant, +} + +/// Reads the `tpgid` (terminal foreground process group) for `pid`. +/// +/// Returns `None` when the process doesn't exist or the field is unparseable. +pub fn read_tpgid(pid: u32) -> Option { + let process = procfs::process::Process::new(pid as i32).ok()?; + let stat = process.stat().ok()?; + Some(stat.tpgid) +} + +/// Reads process info for `pid` from /proc. Returns `None` when the +/// process doesn't exist or required fields are unreadable. +pub fn read_process_info(pid: u32) -> Option { + let process = procfs::process::Process::new(pid as i32).ok()?; + let stat = process.stat().ok()?; + let pgid = stat.pgrp as u32; + let tpgid = stat.tpgid; + let comm = stat.comm.clone(); + let exe_path = process.exe().ok(); + let cmdline = process.cmdline().unwrap_or_default(); + Some(ProcessInfo { + pid, + pgid, + tpgid, + cmdline, + exe_path, + comm, + }) +} + +pub fn read_process_cpu_jiffies(pid: u32) -> Option { + let process = procfs::process::Process::new(pid as i32).ok()?; + let stat = process.stat().ok()?; + Some(stat.utime.saturating_add(stat.stime)) +} + +pub fn sample_cpu_jiffies_delta( + pid: u32, + previous: &mut Option, + now: Instant, +) -> u64 { + sample_cpu_jiffies_delta_from_total(read_process_cpu_jiffies(pid), previous, now) +} + +fn sample_cpu_jiffies_delta_from_total( + total_jiffies: Option, + previous: &mut Option, + now: Instant, +) -> u64 { + let Some(total_jiffies) = total_jiffies else { + *previous = None; + return 0; + }; + let Some(prior) = previous else { + *previous = Some(ProcessCpuSample { + total_jiffies, + sampled_at: now, + }); + return 0; + }; + if now.duration_since(prior.sampled_at) < CPU_SAMPLE_WINDOW { + return 0; + } + let delta = total_jiffies.saturating_sub(prior.total_jiffies); + *previous = Some(ProcessCpuSample { + total_jiffies, + sampled_at: now, + }); + delta +} + +pub fn descendant_process_count(root_pid: u32) -> u32 { + let Ok(iter) = procfs::process::all_processes() else { + return 0; + }; + let mut children_by_parent: HashMap> = HashMap::new(); + for proc_result in iter { + let Ok(process) = proc_result else { continue }; + let Ok(stat) = process.stat() else { continue }; + if stat.pid <= 0 || stat.ppid <= 0 { + continue; + } + children_by_parent + .entry(stat.ppid as u32) + .or_default() + .push(stat.pid as u32); + } + descendant_process_count_from_parents( + root_pid, + children_by_parent + .into_iter() + .flat_map(|(ppid, pids)| pids.into_iter().map(move |pid| (pid, ppid))), + ) +} + +fn descendant_process_count_from_parents( + root_pid: u32, + processes: impl IntoIterator, +) -> u32 { + let mut children_by_parent: HashMap> = HashMap::new(); + for (pid, ppid) in processes { + children_by_parent.entry(ppid).or_default().push(pid); + } + let mut count = 0u32; + let mut stack = children_by_parent.remove(&root_pid).unwrap_or_default(); + while let Some(pid) = stack.pop() { + count = count.saturating_add(1); + if let Some(children) = children_by_parent.remove(&pid) { + stack.extend(children); + } + } + count +} + +/// Scan all processes in `/proc` and return those with `pgrp == target_pgid`. +pub fn pids_in_pgrp(target_pgid: u32) -> Vec { + let Ok(iter) = procfs::process::all_processes() else { + return Vec::new(); + }; + let mut pids = Vec::new(); + for proc_result in iter { + let Ok(process) = proc_result else { continue }; + let Ok(stat) = process.stat() else { continue }; + if stat.pgrp == target_pgid as i32 { + pids.push(stat.pid as u32); + } + } + pids +} + +/// Agent kinds that jackin' recognises. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AgentKind { + ClaudeCode, + Codex, + Amp, + Kimi, + OpenCode, + Unknown, +} + +impl AgentKind { + pub fn as_str(&self) -> &'static str { + match self { + Self::ClaudeCode => "claude", + Self::Codex => "codex", + Self::Amp => "amp", + Self::Kimi => "kimi", + Self::OpenCode => "opencode", + Self::Unknown => "unknown", + } + } +} + +fn agent_kind_from_name(name: &str) -> Option { + match name { + "codex" => Some(AgentKind::Codex), + "amp" => Some(AgentKind::Amp), + "kimi" => Some(AgentKind::Kimi), + "opencode" => Some(AgentKind::OpenCode), + "claude" | "claude-code" => Some(AgentKind::ClaudeCode), + _ => None, + } +} + +/// Identify the agent running in `proc`. Returns `None` when no known agent +/// is found. +pub fn identify_agent(info: &ProcessInfo) -> Option { + // Primary: exe basename + if let Some(ref exe) = info.exe_path { + let exe_name = exe.file_name()?.to_string_lossy(); + if let Some(kind) = agent_kind_from_name(exe_name.as_ref()) { + return Some(kind); + } + // Node-wrapped agents: inspect argv[1] for the JS entry point + if matches!(exe_name.as_ref(), "node" | "bun" | "deno") { + if let Some(script) = info.cmdline.get(1) + && (script.contains("@anthropic-ai/claude-code") || script.contains("claude-code")) + { + return Some(AgentKind::ClaudeCode); + } + return Some(AgentKind::Unknown); + } + } + + // Fallback: comm field (capped at 15 chars) + agent_kind_from_name(info.comm.as_str()) +} + +/// Given the child PID of a session's root process, determine what agent +/// currently owns the terminal's foreground process group. +/// +/// Returns `(agent_kind, foreground_pgid)` or `None` when detection fails. +pub fn detect_foreground_agent(child_pid: u32) -> Option<(AgentKind, u32)> { + let info = read_process_info(child_pid)?; + if info.tpgid <= 0 { + return None; + } + let fg_pgid = u32::try_from(info.tpgid).ok()?; + let process_group: Vec<_> = pids_in_pgrp(fg_pgid) + .into_iter() + .filter_map(read_process_info) + .collect(); + detect_foreground_agent_from_process_infos(&info, &process_group) +} + +fn detect_foreground_agent_from_process_infos( + root_info: &ProcessInfo, + process_group: &[ProcessInfo], +) -> Option<(AgentKind, u32)> { + if root_info.tpgid <= 0 { + return None; + } + let fg_pgid = u32::try_from(root_info.tpgid).ok()?; + for proc_info in process_group { + if let Some(kind) = identify_agent(proc_info) + && kind != AgentKind::Unknown + { + return Some((kind, fg_pgid)); + } + } + // Process group exists but no recognized agent binary found. + Some((AgentKind::Unknown, fg_pgid)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + fn proc_info( + pid: u32, + pgid: u32, + tpgid: i32, + exe_path: Option<&str>, + comm: &str, + cmdline: &[&str], + ) -> ProcessInfo { + ProcessInfo { + pid, + pgid, + tpgid, + cmdline: cmdline.iter().map(|part| (*part).to_owned()).collect(), + exe_path: exe_path.map(PathBuf::from), + comm: comm.to_owned(), + } + } + + #[test] + fn identify_agent_node_wrapped_claude_from_cmdline() { + let info = proc_info( + 100, + 100, + 100, + Some("/usr/bin/node"), + "node", + &[ + "node", + "/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js", + ], + ); + assert_eq!(identify_agent(&info), Some(AgentKind::ClaudeCode)); + } + + #[test] + fn identify_agent_native_codex_binary() { + let info = proc_info( + 200, + 200, + 200, + Some("/usr/local/bin/codex"), + "codex", + &["codex"], + ); + assert_eq!(identify_agent(&info), Some(AgentKind::Codex)); + } + + #[test] + fn identify_agent_native_amp_binary() { + let info = proc_info( + 300, + 300, + 300, + Some("/usr/local/bin/amp"), + "amp", + &["amp", "--dangerously-allow-all"], + ); + assert_eq!(identify_agent(&info), Some(AgentKind::Amp)); + } + + #[test] + fn identify_agent_stat_comm_truncation_falls_back_to_exe() { + let info = proc_info( + 400, + 400, + 400, + Some("/usr/bin/node"), + "node", + &["node", "/path/to/@anthropic-ai/claude-code/cli.js"], + ); + assert_eq!(identify_agent(&info), Some(AgentKind::ClaudeCode)); + } + + #[test] + fn cpu_sample_waits_for_window_then_reports_saturating_delta() { + let now = Instant::now(); + let mut previous = None; + + assert_eq!( + sample_cpu_jiffies_delta_from_total(Some(100), &mut previous, now), + 0 + ); + assert_eq!( + previous, + Some(ProcessCpuSample { + total_jiffies: 100, + sampled_at: now + }) + ); + + let before_window = (now + CPU_SAMPLE_WINDOW) + .checked_sub(Duration::from_millis(1)) + .unwrap(); + assert_eq!( + sample_cpu_jiffies_delta_from_total(Some(125), &mut previous, before_window), + 0 + ); + assert_eq!( + previous, + Some(ProcessCpuSample { + total_jiffies: 100, + sampled_at: now + }) + ); + + let after_window = now + CPU_SAMPLE_WINDOW + Duration::from_millis(1); + assert_eq!( + sample_cpu_jiffies_delta_from_total(Some(140), &mut previous, after_window), + 40 + ); + assert_eq!( + previous, + Some(ProcessCpuSample { + total_jiffies: 140, + sampled_at: after_window + }) + ); + + let after_reset = after_window + CPU_SAMPLE_WINDOW + Duration::from_millis(1); + assert_eq!( + sample_cpu_jiffies_delta_from_total(Some(10), &mut previous, after_reset), + 0 + ); + } + + #[test] + fn cpu_sample_missing_process_clears_prior_sample() { + let now = Instant::now(); + let mut previous = Some(ProcessCpuSample { + total_jiffies: 100, + sampled_at: now, + }); + + assert_eq!( + sample_cpu_jiffies_delta_from_total(None, &mut previous, now), + 0 + ); + assert_eq!(previous, None); + } + + #[test] + fn descendant_count_fixture_counts_full_tree_only_under_root() { + let processes = [(2, 1), (3, 1), (4, 2), (5, 4), (6, 99), (7, 6)]; + + assert_eq!(descendant_process_count_from_parents(1, processes), 4); + assert_eq!(descendant_process_count_from_parents(99, processes), 2); + assert_eq!(descendant_process_count_from_parents(42, processes), 0); + } + + #[test] + fn foreground_agent_fixture_detects_direct_binary() { + let root = proc_info(100, 100, 300, Some("/bin/zsh"), "zsh", &["zsh"]); + let foreground = [ + proc_info( + 300, + 300, + 300, + Some("/usr/local/bin/codex"), + "codex", + &["codex"], + ), + proc_info(301, 300, 300, Some("/usr/bin/node"), "node", &["node"]), + ]; + + assert_eq!( + detect_foreground_agent_from_process_infos(&root, &foreground), + Some((AgentKind::Codex, 300)) + ); + } + + #[test] + fn foreground_agent_fixture_detects_node_wrapped_claude() { + let root = proc_info(100, 100, 300, Some("/bin/zsh"), "zsh", &["zsh"]); + let foreground = [proc_info( + 300, + 300, + 300, + Some("/usr/bin/node"), + "node", + &["node", "/app/node_modules/@anthropic-ai/claude-code/cli.js"], + )]; + + assert_eq!( + detect_foreground_agent_from_process_infos(&root, &foreground), + Some((AgentKind::ClaudeCode, 300)) + ); + } + + #[test] + fn foreground_agent_fixture_reports_unknown_shell_handoff() { + let root = proc_info(100, 100, 100, Some("/bin/bash"), "bash", &["bash"]); + let foreground = [ + proc_info(100, 100, 100, Some("/bin/bash"), "bash", &["bash"]), + proc_info( + 101, + 100, + 100, + Some("/usr/bin/starship"), + "starship", + &["starship"], + ), + ]; + + assert_eq!( + detect_foreground_agent_from_process_infos(&root, &foreground), + Some((AgentKind::Unknown, 100)) + ); + } + + #[test] + fn foreground_agent_fixture_rejects_missing_foreground_group() { + let root = proc_info(100, 100, 0, Some("/bin/bash"), "bash", &["bash"]); + let foreground = [proc_info( + 100, + 100, + 0, + Some("/usr/local/bin/codex"), + "codex", + &["codex"], + )]; + + assert_eq!( + detect_foreground_agent_from_process_infos(&root, &foreground), + None + ); + } + + #[test] + fn dead_process_returns_none() { + let info = read_process_info(99999999); + assert!(info.is_none()); + } +} diff --git a/crates/jackin-capsule/src/agent_status/rules.rs b/crates/jackin-capsule/src/agent_status/rules.rs new file mode 100644 index 000000000..d3a7b085d --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/rules.rs @@ -0,0 +1,785 @@ +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +use anyhow::Context as _; +use serde::{Deserialize, Deserializer, Serialize}; + +use crate::agent_status::evidence::RawAgentState; + +#[derive(Debug, Clone, Copy, Default)] +pub struct VirtualRegions<'a> { + pub osc_title: Option<&'a str>, + pub osc_progress: Option<&'a str>, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct RulePack { + pub schema_version: u32, + pub agent: String, + pub validated_versions: String, + #[serde(default)] + pub rule: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct Rule { + pub id: String, + pub state: RuleState, + pub priority: i32, + pub region: Region, + #[serde(default)] + pub strength: RuleStrength, + #[serde(default)] + pub requires_all: Vec, + #[serde(default)] + pub requires_any: Vec, + #[serde(default)] + pub forbids: Vec, + #[serde(default)] + pub regex: Vec, +} + +#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum RuleState { + Working, + Blocked, + Idle, + Freeze, +} + +#[derive(Debug, Clone, Copy, Default, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum RuleStrength { + #[default] + Weak, + Strong, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Region { + Bottom(usize), + BottomNonEmpty(usize), + PromptBoxBody, + AbovePromptBox, + AfterLastRule, + LastNonEmptyLine, + OscTitle, + OscProgress, +} + +impl<'de> Deserialize<'de> for Region { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let raw = String::deserialize(deserializer)?; + parse_region(&raw).map_err(serde::de::Error::custom) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RuleMatch { + pub state: Option, + pub rule_id: String, + pub strong: bool, + pub freeze: bool, +} + +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] +pub struct RuleEvaluation { + pub id: String, + pub state: String, + pub priority: i32, + pub region: String, + pub strength: String, + pub matched: bool, + pub preview: String, +} + +#[derive(Debug, Clone)] +pub struct RulePackRegistry { + packs: HashMap, +} + +const RUNTIME_PACK_DIR: &str = "/jackin/runtime/agent-status/packs"; + +impl RulePackRegistry { + pub fn bundled() -> anyhow::Result { + let override_dir = override_pack_dir(); + Self::from_pack_dirs(Some(Path::new(RUNTIME_PACK_DIR)), override_dir.as_deref()) + } + + fn from_pack_dirs( + runtime_pack_dir: Option<&Path>, + override_pack_dir: Option<&Path>, + ) -> anyhow::Result { + let mut packs = HashMap::new(); + load_embedded_packs(&mut packs)?; + if let Some(dir) = runtime_pack_dir + && dir.is_dir() + { + load_packs_from_dir(&mut packs, dir).with_context(|| { + format!("load runtime agent-status packs from {}", dir.display()) + })?; + } + if let Some(dir) = override_pack_dir + && dir.is_dir() + { + load_packs_from_dir(&mut packs, dir).with_context(|| { + format!("load agent-status override packs from {}", dir.display()) + })?; + } + Ok(Self { packs }) + } + + pub fn evaluate(&self, agent: Option<&str>, screen_rows: &[String]) -> Option { + self.packs.get(agent?)?.evaluate(screen_rows) + } + + pub fn evaluate_with_virtuals( + &self, + agent: Option<&str>, + screen_rows: &[String], + virtuals: VirtualRegions<'_>, + ) -> Option { + self.packs + .get(agent?)? + .evaluate_with_virtuals(screen_rows, virtuals) + } + + pub fn explain(&self, agent: Option<&str>, screen_rows: &[String]) -> Vec { + self.packs + .get(agent.unwrap_or_default()) + .map_or_else(Vec::new, |pack| pack.explain(screen_rows)) + } + + pub fn explain_with_virtuals( + &self, + agent: Option<&str>, + screen_rows: &[String], + virtuals: VirtualRegions<'_>, + ) -> Vec { + self.packs + .get(agent.unwrap_or_default()) + .map_or_else(Vec::new, |pack| { + pack.explain_with_virtuals(screen_rows, virtuals) + }) + } +} + +fn load_embedded_packs(packs: &mut HashMap) -> anyhow::Result<()> { + for content in [ + include_str!("../../../../docker/runtime/agent-status/packs/claude.toml"), + include_str!("../../../../docker/runtime/agent-status/packs/codex.toml"), + include_str!("../../../../docker/runtime/agent-status/packs/amp.toml"), + include_str!("../../../../docker/runtime/agent-status/packs/kimi.toml"), + include_str!("../../../../docker/runtime/agent-status/packs/opencode.toml"), + ] { + let pack: RulePack = toml::from_str(content)?; + pack.validate()?; + packs.insert(pack.agent.clone(), pack); + } + Ok(()) +} + +impl RulePack { + pub fn load(path: &Path) -> anyhow::Result { + let content = fs::read_to_string(path)?; + let pack: Self = toml::from_str(&content)?; + pack.validate()?; + Ok(pack) + } + + pub fn validate(&self) -> anyhow::Result<()> { + anyhow::ensure!(self.schema_version == 1, "unsupported rule schema"); + anyhow::ensure!(!self.agent.trim().is_empty(), "agent is required"); + anyhow::ensure!( + !self.validated_versions.trim().is_empty(), + "validated_versions is required" + ); + anyhow::ensure!(self.rule.len() <= 128, "too many rules"); + for rule in &self.rule { + anyhow::ensure!(!rule.id.trim().is_empty(), "rule id is required"); + let matcher_count = rule.requires_all.len() + + rule.requires_any.len() + + rule.forbids.len() + + rule.regex.len(); + anyhow::ensure!(matcher_count <= 32, "too many matchers in {}", rule.id); + for matcher in rule + .requires_all + .iter() + .chain(rule.requires_any.iter()) + .chain(rule.forbids.iter()) + .chain(rule.regex.iter()) + { + anyhow::ensure!(matcher.len() <= 512, "matcher too long in rule {}", rule.id); + } + for matcher in &rule.regex { + regex::RegexBuilder::new(matcher) + .case_insensitive(true) + .build() + .with_context(|| format!("invalid regex in rule {}", rule.id))?; + } + } + Ok(()) + } + + pub fn evaluate(&self, screen_rows: &[String]) -> Option { + self.evaluate_with_virtuals(screen_rows, VirtualRegions::default()) + } + + pub fn evaluate_with_virtuals( + &self, + screen_rows: &[String], + virtuals: VirtualRegions<'_>, + ) -> Option { + let mut rules: Vec<&Rule> = self.rule.iter().collect(); + rules.sort_by_key(|rule| std::cmp::Reverse(rule.priority)); + rules + .into_iter() + .find(|rule| rule.matches(screen_rows, virtuals)) + .map(Rule::to_match) + } + + pub fn explain(&self, screen_rows: &[String]) -> Vec { + self.explain_with_virtuals(screen_rows, VirtualRegions::default()) + } + + pub fn explain_with_virtuals( + &self, + screen_rows: &[String], + virtuals: VirtualRegions<'_>, + ) -> Vec { + let mut rules: Vec<&Rule> = self.rule.iter().collect(); + rules.sort_by_key(|rule| std::cmp::Reverse(rule.priority)); + rules + .into_iter() + .map(|rule| rule.evaluation(screen_rows, virtuals)) + .collect() + } +} + +impl Rule { + fn matches(&self, screen_rows: &[String], virtuals: VirtualRegions<'_>) -> bool { + let region = self.region.extract(screen_rows, virtuals); + let text = region.join("\n").to_ascii_lowercase(); + self.forbids + .iter() + .all(|matcher| !text.contains(&matcher.to_ascii_lowercase())) + && self + .requires_all + .iter() + .all(|matcher| text.contains(&matcher.to_ascii_lowercase())) + && (self.requires_any.is_empty() + || self + .requires_any + .iter() + .any(|matcher| text.contains(&matcher.to_ascii_lowercase()))) + && self.regex.iter().all(|matcher| { + regex::RegexBuilder::new(matcher) + .case_insensitive(true) + .build() + .is_ok_and(|regex| regex.is_match(&text)) + }) + } + + fn to_match(&self) -> RuleMatch { + RuleMatch { + state: match self.state { + RuleState::Working => Some(RawAgentState::Working), + RuleState::Blocked => Some(RawAgentState::Blocked), + RuleState::Idle => Some(RawAgentState::Idle), + RuleState::Freeze => None, + }, + rule_id: self.id.clone(), + strong: self.strength == RuleStrength::Strong, + freeze: self.state == RuleState::Freeze, + } + } + + fn evaluation(&self, screen_rows: &[String], virtuals: VirtualRegions<'_>) -> RuleEvaluation { + let region = self.region.extract(screen_rows, virtuals); + RuleEvaluation { + id: self.id.clone(), + state: self.state.label().to_owned(), + priority: self.priority, + region: self.region.label(), + strength: self.strength.label().to_owned(), + matched: self.matches(screen_rows, virtuals), + preview: preview(®ion.join("\n"), 240), + } + } +} + +impl RuleState { + const fn label(self) -> &'static str { + match self { + Self::Working => "working", + Self::Blocked => "blocked", + Self::Idle => "idle", + Self::Freeze => "freeze", + } + } +} + +impl RuleStrength { + const fn label(self) -> &'static str { + match self { + Self::Weak => "weak", + Self::Strong => "strong", + } + } +} + +impl Region { + fn extract(self, screen_rows: &[String], virtuals: VirtualRegions<'_>) -> Vec { + match self { + Self::Bottom(n) => bottom(screen_rows, n), + Self::BottomNonEmpty(n) => bottom(screen_rows, n.saturating_mul(2).max(n)) + .into_iter() + .filter(|line| !line.trim().is_empty()) + .rev() + .take(n) + .collect::>() + .into_iter() + .rev() + .collect(), + Self::PromptBoxBody => prompt_box_body(screen_rows), + Self::AbovePromptBox => above_prompt_box(screen_rows), + Self::AfterLastRule => after_last_rule(screen_rows), + Self::LastNonEmptyLine => screen_rows + .iter() + .rev() + .find(|line| !line.trim().is_empty()) + .cloned() + .into_iter() + .collect(), + Self::OscTitle => virtuals.osc_title.map(str::to_owned).into_iter().collect(), + Self::OscProgress => virtuals + .osc_progress + .map(str::to_owned) + .into_iter() + .collect(), + } + } + + fn label(self) -> String { + match self { + Self::Bottom(n) => format!("bottom:{n}"), + Self::BottomNonEmpty(n) => format!("bottom_nonempty:{n}"), + Self::PromptBoxBody => "prompt_box_body".to_owned(), + Self::AbovePromptBox => "above_prompt_box".to_owned(), + Self::AfterLastRule => "after_last_rule".to_owned(), + Self::LastNonEmptyLine => "last_nonempty_line".to_owned(), + Self::OscTitle => "osc_title".to_owned(), + Self::OscProgress => "osc_progress".to_owned(), + } + } +} + +fn preview(value: &str, max_chars: usize) -> String { + let mut out: String = value.chars().take(max_chars).collect(); + if value.chars().count() > max_chars { + out.push('…'); + } + out +} + +fn parse_region(raw: &str) -> anyhow::Result { + if let Some(n) = raw.strip_prefix("bottom:") { + return Ok(Region::Bottom(n.parse()?)); + } + if let Some(n) = raw.strip_prefix("bottom_nonempty:") { + return Ok(Region::BottomNonEmpty(n.parse()?)); + } + match raw { + "prompt_box_body" => Ok(Region::PromptBoxBody), + "above_prompt_box" => Ok(Region::AbovePromptBox), + "after_last_rule" => Ok(Region::AfterLastRule), + "last_nonempty_line" => Ok(Region::LastNonEmptyLine), + "osc_title" => Ok(Region::OscTitle), + "osc_progress" => Ok(Region::OscProgress), + _ => anyhow::bail!("unknown region {raw:?}"), + } +} + +fn bottom(screen_rows: &[String], n: usize) -> Vec { + let start = screen_rows.len().saturating_sub(n); + screen_rows[start..].to_vec() +} + +fn prompt_box_bounds(screen_rows: &[String]) -> Option<(usize, usize)> { + let start = screen_rows.iter().rposition(|line| line.contains('╭'))?; + let end = screen_rows + .iter() + .enumerate() + .skip(start) + .find_map(|(index, line)| line.contains('╰').then_some(index)) + .unwrap_or(start); + Some((start, end)) +} + +fn prompt_box_body(screen_rows: &[String]) -> Vec { + let Some((start, end)) = prompt_box_bounds(screen_rows) else { + return Vec::new(); + }; + screen_rows + .get(start + 1..end) + .unwrap_or_default() + .iter() + .map(|line| line.trim_matches(['│', ' ']).to_owned()) + .collect() +} + +fn above_prompt_box(screen_rows: &[String]) -> Vec { + let Some((start, _)) = prompt_box_bounds(screen_rows) else { + return screen_rows.to_vec(); + }; + screen_rows[..start].to_vec() +} + +fn after_last_rule(screen_rows: &[String]) -> Vec { + let Some(rule_index) = screen_rows.iter().rposition(|line| { + !line.contains('╭') + && !line.contains('╰') + && !line.contains('┌') + && !line.contains('└') + && !line.contains('╔') + && !line.contains('╚') + && line + .chars() + .filter(|ch| matches!(ch, '─' | '-' | '═' | '=')) + .count() + >= 10 + }) else { + return screen_rows.to_vec(); + }; + screen_rows + .get(rule_index + 1..) + .unwrap_or_default() + .to_vec() +} + +fn load_packs_from_dir(packs: &mut HashMap, dir: &Path) -> anyhow::Result<()> { + for entry in fs::read_dir(dir).with_context(|| format!("read {}", dir.display()))? { + let path = entry?.path(); + if path.extension().and_then(|ext| ext.to_str()) != Some("toml") { + continue; + } + let pack = RulePack::load(&path) + .with_context(|| format!("load agent-status pack {}", path.display()))?; + packs.insert(pack.agent.clone(), pack); + } + Ok(()) +} + +fn override_pack_dir() -> Option { + if let Some(path) = std::env::var_os("JACKIN_STATUS_PACK_DIR") { + return Some(path.into()); + } + std::env::var_os("HOME") + .map(std::path::PathBuf::from) + .map(|home| home.join(".jackin/agent-status/packs")) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn fixture(path: &str) -> Vec { + fs::read_to_string(path) + .unwrap() + .lines() + .map(str::to_owned) + .collect() + } + + fn fixture_for_detection(path: &Path) -> (Option, Vec) { + let mut rows = fixture(path.to_str().unwrap()); + let forbidden = rows + .first() + .and_then(|line| line.trim().strip_prefix("# not:")) + .map(str::trim) + .map(|state| match state { + "working" => RawAgentState::Working, + "blocked" => RawAgentState::Blocked, + "idle" => RawAgentState::Idle, + other => panic!("unknown forbidden state {other:?} in {path:?}"), + }); + if forbidden.is_some() { + rows.remove(0); + } + (forbidden, rows) + } + + fn write_test_pack(dir: &Path, agent: &str, id: &str, state: &str, needle: &str) { + fs::write( + dir.join(format!("{agent}.toml")), + format!( + r#" +schema_version = 1 +agent = "{agent}" +validated_versions = "*" + +[[rule]] +id = "{id}" +state = "{state}" +priority = 1 +region = "bottom:12" +strength = "strong" +requires_all = ["{needle}"] +"# + ), + ) + .unwrap(); + } + + #[test] + fn packs_load_and_match_fixtures() { + let root = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .parent() + .unwrap() + .to_path_buf(); + for agent in ["claude", "codex", "amp", "kimi", "opencode"] { + let pack = RulePack::load( + &root + .join("docker/runtime/agent-status/packs") + .join(format!("{agent}.toml")), + ) + .unwrap(); + let fixture_dir = root + .join("crates/jackin-capsule/src/agent_status/screen/fixtures") + .join(agent); + for entry in fs::read_dir(fixture_dir).unwrap() { + let path = entry.unwrap().path(); + let Some(name) = path.file_name().and_then(|name| name.to_str()) else { + continue; + }; + let (forbidden, rows) = fixture_for_detection(&path); + let matched = pack.evaluate(&rows).and_then(|matched| matched.state); + if name.starts_with("working") { + assert_eq!(matched, Some(RawAgentState::Working), "{path:?}"); + } else if name.starts_with("blocked") { + assert_eq!(matched, Some(RawAgentState::Blocked), "{path:?}"); + } else if name.starts_with("idle") { + assert_eq!(matched, Some(RawAgentState::Idle), "{path:?}"); + } else if name.starts_with("false_positive") { + assert_ne!( + matched, + Some(forbidden.unwrap_or(RawAgentState::Working)), + "{path:?}" + ); + } + } + } + } + + #[test] + fn regex_matchers_participate_in_rules() { + let pack: RulePack = toml::from_str( + r#" +schema_version = 1 +agent = "test" +validated_versions = "*" + +[[rule]] +id = "anchored-spinner" +state = "working" +priority = 1 +region = "bottom:12" +strength = "strong" +regex = ["^\\* thinking"] +"#, + ) + .unwrap(); + pack.validate().unwrap(); + let rows = vec!["* Thinking".to_owned()]; + assert_eq!( + pack.evaluate(&rows).and_then(|matched| matched.state), + Some(RawAgentState::Working) + ); + } + + #[test] + fn structural_regions_extract_prompt_and_rule_areas() { + let rows = vec![ + "before".to_owned(), + "────────────────────".to_owned(), + "after rule".to_owned(), + "╭────────────╮".to_owned(), + "│ > hello │".to_owned(), + "╰────────────╯".to_owned(), + ]; + + assert_eq!( + parse_region("prompt_box_body") + .unwrap() + .extract(&rows, VirtualRegions::default()), + vec!["> hello".to_owned()] + ); + assert_eq!( + parse_region("above_prompt_box") + .unwrap() + .extract(&rows, VirtualRegions::default()), + vec![ + "before".to_owned(), + "────────────────────".to_owned(), + "after rule".to_owned(), + ] + ); + assert_eq!( + parse_region("after_last_rule") + .unwrap() + .extract(&rows, VirtualRegions::default()), + vec![ + "after rule".to_owned(), + "╭────────────╮".to_owned(), + "│ > hello │".to_owned(), + "╰────────────╯".to_owned(), + ] + ); + } + + #[test] + fn virtual_osc_regions_participate_in_matching_and_explain() { + let pack: RulePack = toml::from_str( + r#" +schema_version = 1 +agent = "codex" +validated_versions = "*" + +[[rule]] +id = "title-spinner" +state = "working" +priority = 10 +region = "osc_title" +strength = "strong" +requires_all = ["codex", "working"] + +[[rule]] +id = "progress-cleared" +state = "idle" +priority = 9 +region = "osc_progress" +strength = "strong" +requires_all = ["cleared"] +"#, + ) + .unwrap(); + pack.validate().unwrap(); + + let title_virtuals = VirtualRegions { + osc_title: Some("Codex - working"), + osc_progress: Some("inactive"), + }; + let matched = pack + .evaluate_with_virtuals(&[], title_virtuals) + .expect("title rule should match"); + assert_eq!(matched.rule_id, "title-spinner"); + assert_eq!(matched.state, Some(RawAgentState::Working)); + + let progress_virtuals = VirtualRegions { + osc_title: None, + osc_progress: Some("cleared"), + }; + let explain = pack.explain_with_virtuals(&[], progress_virtuals); + assert!(explain.iter().any(|rule| { + rule.id == "progress-cleared" && rule.matched && rule.preview == "cleared" + })); + } + + #[test] + fn runtime_pack_directory_overrides_embedded_pack() { + let runtime = tempfile::tempdir().unwrap(); + write_test_pack( + runtime.path(), + "claude", + "runtime-pack", + "idle", + "runtime marker", + ); + + let registry = RulePackRegistry::from_pack_dirs(Some(runtime.path()), None).unwrap(); + + let matched = registry + .evaluate(Some("claude"), &["runtime marker".to_owned()]) + .unwrap(); + assert_eq!(matched.rule_id, "runtime-pack"); + assert_eq!(matched.state, Some(RawAgentState::Idle)); + } + + #[test] + fn override_pack_directory_overrides_runtime_pack() { + let runtime = tempfile::tempdir().unwrap(); + let override_dir = tempfile::tempdir().unwrap(); + write_test_pack( + runtime.path(), + "claude", + "runtime-pack", + "idle", + "runtime marker", + ); + write_test_pack( + override_dir.path(), + "claude", + "override-pack", + "blocked", + "override marker", + ); + + let registry = + RulePackRegistry::from_pack_dirs(Some(runtime.path()), Some(override_dir.path())) + .unwrap(); + + assert!( + registry + .evaluate(Some("claude"), &["runtime marker".to_owned()]) + .is_none(), + "override pack should replace the runtime pack for the same agent" + ); + let matched = registry + .evaluate(Some("claude"), &["override marker".to_owned()]) + .unwrap(); + assert_eq!(matched.rule_id, "override-pack"); + assert_eq!(matched.state, Some(RawAgentState::Blocked)); + } + + #[test] + fn loaded_pack_directory_replaces_existing_pack_for_same_agent() { + let mut packs = HashMap::new(); + let bundled: RulePack = toml::from_str( + r#" +schema_version = 1 +agent = "test" +validated_versions = "*" + +[[rule]] +id = "bundled" +state = "working" +priority = 1 +region = "bottom:12" +strength = "strong" +requires_all = ["bundled"] +"#, + ) + .unwrap(); + packs.insert(bundled.agent.clone(), bundled); + + let tmp = tempfile::tempdir().unwrap(); + write_test_pack(tmp.path(), "test", "override", "blocked", "override"); + + load_packs_from_dir(&mut packs, tmp.path()).unwrap(); + + let matched = packs + .get("test") + .unwrap() + .evaluate(&["override".to_owned()]) + .unwrap(); + assert_eq!(matched.rule_id, "override"); + assert_eq!(matched.state, Some(RawAgentState::Blocked)); + } +} diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/blocked.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/blocked.txt new file mode 100644 index 000000000..cbe3cbe9b --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/blocked.txt @@ -0,0 +1,5 @@ + + Amp wants to execute a shell command: + ls -la / + + Allow? esc to cancel diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/false_positive.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/false_positive.txt new file mode 100644 index 000000000..783dd08bc --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/false_positive.txt @@ -0,0 +1,5 @@ + + Previous task output visible here. + No active working indicator. + esc to cancel + > diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/idle.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/idle.txt new file mode 100644 index 000000000..e4fd05f44 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/idle.txt @@ -0,0 +1,4 @@ + + Ready for your next task. + + > diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/working.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/working.txt new file mode 100644 index 000000000..6784d0941 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/amp/working.txt @@ -0,0 +1,2 @@ + Running tool: bash + esc to cancel diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/blocked.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/blocked.txt new file mode 100644 index 000000000..93a3720ef --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/blocked.txt @@ -0,0 +1,5 @@ + +Claude wants to run: rm -rf /tmp/build + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + enter to select esc to cancel ↑/↓ to navigate diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive.txt new file mode 100644 index 000000000..4464bd541 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive.txt @@ -0,0 +1,6 @@ + + showing detailed transcript + ctrl+o to toggle + ctrl+e to show all/collapse + +✻ Simplifying… diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_horizontal_rule_prompt.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_horizontal_rule_prompt.txt new file mode 100644 index 000000000..59b9f7245 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_horizontal_rule_prompt.txt @@ -0,0 +1,5 @@ +# not: idle + +Tool output: +───────────────────────────────────────────────────────────── +This is a divider in normal transcript output, not a live prompt box. diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_old_spinner.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_old_spinner.txt new file mode 100644 index 000000000..d22d66703 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_old_spinner.txt @@ -0,0 +1,9 @@ + +Previous turn content +✻ Simplifying… + +───────────────────────────────────────────────────────────── + +╭──────────────────────────────────────────────────────────────╮ +│ > [cursor] │ +╰──────────────────────────────────────────────────────────────╯ diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_scrolled_blocker.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_scrolled_blocker.txt new file mode 100644 index 000000000..1030626af --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_scrolled_blocker.txt @@ -0,0 +1,8 @@ +# not: blocked + +Previous turn transcript: +Claude needs your permission to run a command. +Press enter to select, esc to cancel. + +✻ Thinking… +esc to interrupt diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_timing.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_timing.txt new file mode 100644 index 000000000..b6decdb10 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/false_positive_timing.txt @@ -0,0 +1,6 @@ + +⎿ Churned for 1m 23s + +╭──────────────────────────────────────────────────────────────╮ +│ > [cursor] │ +╰──────────────────────────────────────────────────────────────╯ diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/idle.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/idle.txt new file mode 100644 index 000000000..2bce8e32b --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/idle.txt @@ -0,0 +1,6 @@ + +> some recent output here + +╭──────────────────────────────────────────────────────────────╮ +│ > [cursor] │ +╰──────────────────────────────────────────────────────────────╯ diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/working.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/working.txt new file mode 100644 index 000000000..ccfaffdf1 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/claude/working.txt @@ -0,0 +1,5 @@ + +✻ Simplifying… + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + esc to interrupt diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/blocked.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/blocked.txt new file mode 100644 index 000000000..7660972e0 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/blocked.txt @@ -0,0 +1,4 @@ + + Codex wants to run: git push --force + + press enter to confirm or esc to cancel diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/blocked_auth.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/blocked_auth.txt new file mode 100644 index 000000000..3b4301b9d --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/blocked_auth.txt @@ -0,0 +1,17 @@ + +WelcometoCodex,OpenAI'scommand-linecodingagent + +SigninwithChatGPTtouseCodexaspartofyourpaidplan +orconnectanAPIkeyforusage-basedbilling + +> 1. Sign in with ChatGPT + Usage included with Plus, Pro, Business, and Enterprise plans + +2.SigninwithDeviceCode + Sign in from another device with a one-time code + +3.ProvideyourownAPIkey + Pay for what you use + + Press enter to continue + diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/false_positive.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/false_positive.txt new file mode 100644 index 000000000..41c6db38b --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/false_positive.txt @@ -0,0 +1,7 @@ + + Checking for updates... + Downloading codex v1.2.3... + Installing... + Update complete. + +› diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/idle.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/idle.txt new file mode 100644 index 000000000..176137044 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/idle.txt @@ -0,0 +1,4 @@ + + Some output here + +› diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/working.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/working.txt new file mode 100644 index 000000000..74858caf9 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/codex/working.txt @@ -0,0 +1,3 @@ + +• Working (analyzing your request...) + Processing files... diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/blocked.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/blocked.txt new file mode 100644 index 000000000..65544cf6a --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/blocked.txt @@ -0,0 +1,6 @@ + + Kimi wants permission: + + approve once + approve for session + reject diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/false_positive.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/false_positive.txt new file mode 100644 index 000000000..4c5cc2c64 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/false_positive.txt @@ -0,0 +1,4 @@ + Previous output that looks like a spinner in scrollback. + ⠋ Old thinking indicator from a past turn. + + > diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/idle.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/idle.txt new file mode 100644 index 000000000..7c2583359 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/idle.txt @@ -0,0 +1,4 @@ + + Kimi ready. + + > diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/working.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/working.txt new file mode 100644 index 000000000..c7ebd2937 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/kimi/working.txt @@ -0,0 +1,3 @@ + +⠋ Thinking... +⠙ Processing... diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/blocked.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/blocked.txt new file mode 100644 index 000000000..db5d148db --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/blocked.txt @@ -0,0 +1,4 @@ + + permission required + + dismiss enter select diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/false_positive.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/false_positive.txt new file mode 100644 index 000000000..933f341c3 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/false_positive.txt @@ -0,0 +1,5 @@ + Bubble Tea cosmetic redraw output + cursor movement sequences + status bar update — no affordances + Previous task output visible here + No active working or blocked indicators diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/idle.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/idle.txt new file mode 100644 index 000000000..2f39d8a87 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/idle.txt @@ -0,0 +1,4 @@ + + OpenCode ready. + + > [input box visible] diff --git a/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/working.txt b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/working.txt new file mode 100644 index 000000000..4d6c05299 --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/fixtures/opencode/working.txt @@ -0,0 +1,2 @@ +⠋ Processing... + Ctrl+C to cancel diff --git a/crates/jackin-capsule/src/agent_status/screen/transcripts/claude/blocked.ansi b/crates/jackin-capsule/src/agent_status/screen/transcripts/claude/blocked.ansi new file mode 100644 index 000000000..93a3720ef --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/screen/transcripts/claude/blocked.ansi @@ -0,0 +1,5 @@ + +Claude wants to run: rm -rf /tmp/build + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + enter to select esc to cancel ↑/↓ to navigate diff --git a/crates/jackin-capsule/src/agent_status/seen.rs b/crates/jackin-capsule/src/agent_status/seen.rs new file mode 100644 index 000000000..742fd03bf --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/seen.rs @@ -0,0 +1,60 @@ +//! Seen-flag management for agent status acknowledgement. +//! +//! The `seen` flag is embedded in `SessionStatus` in the parent module; +//! this module provides the public functions for acknowledging sessions +//! and marking panes as focused. + +use crate::agent_status::SessionStatus; +use crate::protocol::AgentState; + +/// Mark a session as seen by the operator. +/// Transitions `Done` → `Idle`; returns `Some(Idle)` when it changed. +pub fn acknowledge_session(status: &mut SessionStatus) -> Option { + status.acknowledge() +} + +/// Mark a pane as focused — equivalent to `acknowledge_session` for the +/// focused pane. Called by `refresh_session_statuses` each tick for the +/// active pane. +pub fn mark_pane_focused(status: &mut SessionStatus) -> Option { + acknowledge_session(status) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::agent_status::SessionStatus; + use crate::agent_status::evidence::{EvidenceSummary, RawAgentState}; + use jackin_protocol::agent_status::AgentStatusConfidence; + + fn publish_done(status: &mut SessionStatus) { + status.publish_raw( + RawAgentState::Working, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + status.publish_raw( + RawAgentState::Idle, + AgentStatusConfidence::Strong, + EvidenceSummary::default(), + ); + } + + #[test] + fn acknowledge_session_transitions_done_to_idle() { + let mut status = SessionStatus::new(); + publish_done(&mut status); + assert_eq!(status.effective, AgentState::Done); + let result = acknowledge_session(&mut status); + assert_eq!(result, Some(AgentState::Idle)); + assert_eq!(status.effective, AgentState::Idle); + } + + #[test] + fn mark_pane_focused_clears_done() { + let mut status = SessionStatus::new(); + publish_done(&mut status); + let result = mark_pane_focused(&mut status); + assert_eq!(result, Some(AgentState::Idle)); + } +} diff --git a/crates/jackin-capsule/src/agent_status/sequence.rs b/crates/jackin-capsule/src/agent_status/sequence.rs new file mode 100644 index 000000000..4a3f3e85b --- /dev/null +++ b/crates/jackin-capsule/src/agent_status/sequence.rs @@ -0,0 +1,138 @@ +//! Sequence number tracker for hook authority sources. +//! +//! Each source (identified by `source_id`) must send strictly increasing +//! sequence numbers. The tracker rejects reports whose sequence is ≤ the last +//! accepted value, preventing stale or replayed authority. + +use std::collections::HashMap; + +/// Tracks the last-accepted sequence number per source ID. +#[derive(Debug, Default)] +pub struct SequenceTracker { + last: HashMap, +} + +impl SequenceTracker { + pub fn new() -> Self { + Self::default() + } + + /// Attempt to accept a report from `source_id` with `seq`. + /// + /// Returns `true` when accepted (first report from this source, or + /// `seq` is strictly greater than the last accepted value). + /// Returns `false` when rejected (stale or replayed sequence). + pub fn accept(&mut self, source_id: &str, seq: u64) -> bool { + match self.last.get(source_id) { + None => { + // First report from this source — always accepted. + self.last.insert(source_id.to_owned(), seq); + true + } + Some(&last) if seq > last => { + self.last.insert(source_id.to_owned(), seq); + true + } + _ => false, + } + } + + /// Whether this tracker has seen any report from `source_id`. + pub fn has_source(&self, source_id: &str) -> bool { + self.last.contains_key(source_id) + } + + /// Remove all sequence state for `source_id`. Called when authority + /// is cleared so the source can re-register cleanly. + pub fn clear_source(&mut self, source_id: &str) { + self.last.remove(source_id); + } + + /// Remove all source sequence state for a session-wide authority reset. + pub fn clear_all(&mut self) { + self.last.clear(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sequence_tracker_accepts_first_report_from_new_source() { + let mut t = SequenceTracker::new(); + assert!(t.accept("hook-1", 42)); + } + + #[test] + fn sequence_tracker_accepts_increasing_sequence() { + let mut t = SequenceTracker::new(); + t.accept("hook-1", 100); + assert!(t.accept("hook-1", 101)); + } + + #[test] + fn sequence_tracker_rejects_equal_sequence() { + let mut t = SequenceTracker::new(); + t.accept("hook-1", 100); + assert!(!t.accept("hook-1", 100)); + } + + #[test] + fn sequence_tracker_rejects_decreasing_sequence() { + let mut t = SequenceTracker::new(); + t.accept("hook-1", 100); + assert!(!t.accept("hook-1", 99)); + } + + #[test] + fn sequence_tracker_independent_sources_dont_interfere() { + let mut t = SequenceTracker::new(); + t.accept("hook-a", 100); + t.accept("hook-b", 50); + assert!(!t.accept("hook-a", 99)); // stale for hook-a + assert!(t.accept("hook-b", 51)); // fine for hook-b + } + + #[test] + fn clear_source_allows_reregistration() { + let mut t = SequenceTracker::new(); + t.accept("hook-1", 100); + t.clear_source("hook-1"); + // After clear, even seq=1 is accepted + assert!(t.accept("hook-1", 1)); + } + + #[test] + fn clear_all_allows_every_source_to_reregister() { + let mut t = SequenceTracker::new(); + t.accept("hook-1", 100); + t.accept("hook-2", 200); + t.clear_all(); + + assert!(t.accept("hook-1", 1)); + assert!(t.accept("hook-2", 1)); + } + + #[test] + fn reporter_accept_valid_sequence() { + let mut t = SequenceTracker::new(); + assert!(t.accept("reporter-1", 1000)); + } + + #[test] + fn reporter_reject_stale_sequence() { + let mut t = SequenceTracker::new(); + t.accept("reporter-1", 1000); + assert!(!t.accept("reporter-1", 999)); + } + + #[test] + fn reporter_reject_wrong_source_after_clear() { + let mut t = SequenceTracker::new(); + t.accept("source-a", 100); + t.clear_source("source-a"); + // After clear, source-a can re-register from any seq + assert!(t.accept("source-a", 1)); + } +} diff --git a/crates/jackin-capsule/src/attach_protocol.rs b/crates/jackin-capsule/src/attach_protocol.rs index 29ab58e77..5dd23c59b 100644 --- a/crates/jackin-capsule/src/attach_protocol.rs +++ b/crates/jackin-capsule/src/attach_protocol.rs @@ -1,5 +1,6 @@ //! Client attach/detach lifecycle for the capsule multiplexer. +use std::collections::BTreeMap; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::UnixStream; use tokio::sync::mpsc; @@ -35,14 +36,19 @@ pub(crate) struct AttachHandshake { /// socket), and forwards validated attach Hellos back to the main /// loop via `handshake_tx`. Owning the slow `read_exact` here keeps a /// silent or slow client from stalling the daemon's main `select!`. +#[allow(clippy::too_many_arguments)] pub(crate) async fn perform_handshake( mut stream: UnixStream, client_permit: tokio::sync::OwnedSemaphorePermit, handshake_tx: mpsc::UnboundedSender, sessions_snapshot: Vec, tabs_snapshot: Vec, + visible_text_snapshot: BTreeMap>, + status_explain_snapshot: BTreeMap, history_snapshot: Vec, active_tab: u32, + control_msg_tx: mpsc::UnboundedSender, + state_broadcast_tx: tokio::sync::broadcast::Sender, ) { // Bound the handshake reads. A client that opens the socket and // never sends a byte otherwise holds the `OwnedSemaphorePermit` @@ -76,8 +82,12 @@ pub(crate) async fn perform_handshake( first[0], sessions_snapshot, tabs_snapshot, + visible_text_snapshot, + status_explain_snapshot, history_snapshot, active_tab, + control_msg_tx, + state_broadcast_tx, ) .await; drop(client_permit); diff --git a/crates/jackin-capsule/src/client.rs b/crates/jackin-capsule/src/client.rs index 676ec7314..40d14d61b 100644 --- a/crates/jackin-capsule/src/client.rs +++ b/crates/jackin-capsule/src/client.rs @@ -5,6 +5,7 @@ //! in-container rendering. use anyhow::{Context, Result}; +use std::path::PathBuf; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::net::UnixStream; @@ -27,28 +28,7 @@ pub async fn run_client( /// Query the daemon for current session list and print it. pub async fn run_status() -> Result<()> { - let mut stream = UnixStream::connect(SOCKET_PATH) - .await - .context("cannot connect to jackin-capsule daemon")?; - - let msg = control_frame(&ClientMsg::Status); - stream.write_all(&msg).await?; - - let mut len_buf = [0u8; 4]; - stream.read_exact(&mut len_buf).await?; - let len = u32::from_be_bytes(len_buf) as usize; - // Mirror the daemon-side cap in `socket::read_control_msg`. A - // buggy or wedged daemon (or a peer that won the socket race - // inside the container) could otherwise send `0xFFFFFFFF` and - // force a 4 GiB allocation attempt in the client. - const MAX_CONTROL_REPLY: usize = 4 * 1024 * 1024; - if len > MAX_CONTROL_REPLY { - anyhow::bail!("daemon control reply length {len} exceeds limit {MAX_CONTROL_REPLY}"); - } - let mut body = vec![0u8; len]; - stream.read_exact(&mut body).await?; - - let msg: ServerMsg = serde_json::from_slice(&body)?; + let msg = request_control(ClientMsg::Status).await?; let sessions = match msg { ServerMsg::SessionList { sessions } => sessions, ServerMsg::Unknown => { @@ -62,6 +42,7 @@ pub async fn run_status() -> Result<()> { ServerMsg::AgentRegistry { .. } => { anyhow::bail!("daemon replied with AgentRegistry for Status request") } + _ => anyhow::bail!("daemon replied with unexpected message type for Status request"), }; crate::output::stdout_line(format_args!("Sessions: {}", sessions.len())); for s in &sessions { @@ -83,25 +64,7 @@ pub async fn run_status() -> Result<()> { /// console can deserialize the same struct it shares with the /// daemon — no second schema to keep in sync. pub async fn run_snapshot() -> Result<()> { - let mut stream = UnixStream::connect(SOCKET_PATH) - .await - .context("cannot connect to jackin-capsule daemon")?; - - stream - .write_all(&control_frame(&ClientMsg::Snapshot)) - .await?; - - let mut len_buf = [0u8; 4]; - stream.read_exact(&mut len_buf).await?; - let len = u32::from_be_bytes(len_buf) as usize; - const MAX_CONTROL_REPLY: usize = 4 * 1024 * 1024; - if len > MAX_CONTROL_REPLY { - anyhow::bail!("daemon control reply length {len} exceeds limit {MAX_CONTROL_REPLY}"); - } - let mut body = vec![0u8; len]; - stream.read_exact(&mut body).await?; - - let msg: ServerMsg = serde_json::from_slice(&body)?; + let msg = request_control(ClientMsg::Snapshot).await?; let (tabs, active_tab) = match msg { ServerMsg::Snapshot { tabs, active_tab } => (tabs, active_tab), ServerMsg::Unknown => { @@ -115,6 +78,7 @@ pub async fn run_snapshot() -> Result<()> { ServerMsg::AgentRegistry { .. } => { anyhow::bail!("daemon replied with AgentRegistry for Snapshot request") } + _ => anyhow::bail!("daemon replied with unexpected message type for Snapshot request"), }; let payload = serde_json::json!({ "tabs": tabs, @@ -124,6 +88,65 @@ pub async fn run_snapshot() -> Result<()> { Ok(()) } +async fn request_control(msg: ClientMsg) -> Result { + let mut stream = UnixStream::connect(SOCKET_PATH) + .await + .context("cannot connect to jackin-capsule daemon")?; + + stream.write_all(&control_frame(&msg)).await?; + + let mut len_buf = [0u8; 4]; + stream.read_exact(&mut len_buf).await?; + let len = u32::from_be_bytes(len_buf) as usize; + const MAX_CONTROL_REPLY: usize = 4 * 1024 * 1024; + if len > MAX_CONTROL_REPLY { + anyhow::bail!("daemon control reply length {len} exceeds limit {MAX_CONTROL_REPLY}"); + } + let mut body = vec![0u8; len]; + stream.read_exact(&mut body).await?; + + Ok(serde_json::from_slice(&body)?) +} + +pub async fn run_status_explain(session_id: u64) -> Result<()> { + let msg = request_control(ClientMsg::SessionStatusExplain { session_id }).await?; + let ServerMsg::SessionStatusExplain { report, .. } = msg else { + anyhow::bail!("daemon replied with unexpected message type for Status explain"); + }; + crate::output::stdout_line(format_args!("{}", serde_json::to_string_pretty(&report)?)); + Ok(()) +} + +pub async fn run_status_capture(session_id: u64) -> Result<()> { + let explain = request_control(ClientMsg::SessionStatusExplain { session_id }).await?; + let visible = request_control(ClientMsg::SessionReadVisible { + session_id, + rows: None, + }) + .await?; + let ServerMsg::SessionStatusExplain { report, .. } = explain else { + anyhow::bail!("daemon replied with unexpected message type for Status capture explain"); + }; + let lines = match visible { + ServerMsg::SessionVisibleText { lines, .. } => lines, + _ => Vec::new(), + }; + let stamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let dir = PathBuf::from("/jackin/state/agent-status/captures") + .join(format!("session-{session_id}-{stamp}")); + std::fs::create_dir_all(&dir)?; + std::fs::write(dir.join("visible.txt"), lines.join("\n"))?; + std::fs::write( + dir.join("evidence.json"), + serde_json::to_vec_pretty(&report)?, + )?; + crate::output::stdout_line(format_args!("{}", dir.display())); + Ok(()) +} + /// Format for `jackin-capsule agents` output. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AgentsFormat { @@ -166,6 +189,7 @@ pub async fn run_agents(format: AgentsFormat) -> Result<()> { ServerMsg::Snapshot { .. } => { anyhow::bail!("daemon replied with Snapshot for Agents request") } + _ => anyhow::bail!("daemon replied with unexpected message type for Agents request"), }; // Determine caller's own codename and annotate matching records. diff --git a/crates/jackin-capsule/src/daemon.rs b/crates/jackin-capsule/src/daemon.rs index 44d0270f5..f4e673c0b 100644 --- a/crates/jackin-capsule/src/daemon.rs +++ b/crates/jackin-capsule/src/daemon.rs @@ -22,7 +22,7 @@ use chrono::{DateTime, Utc}; /// byte: `0x00` → control (length prefix), anything else → attach. /// - Lifecycle: the daemon exits when the last session ends so the /// container reaps cleanly. SIGTERM also triggers shutdown. -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::io; #[cfg(test)] use std::path::Path; @@ -172,6 +172,8 @@ pub struct Multiplexer { env_passthrough: Vec<(String, String)>, event_tx: mpsc::UnboundedSender, event_rx: mpsc::UnboundedReceiver, + state_broadcast_tx: tokio::sync::broadcast::Sender, + status_cpu_samples: HashMap>, zoomed: Option, input_parser: InputParser, detach_requested: bool, @@ -288,6 +290,10 @@ pub struct Multiplexer { /// tool-availability race does not freeze PR discovery for the /// daemon lifetime. workdir_context: WorkdirContext, + /// TOML rule-pack screen detector registry. + rule_packs: crate::agent_status::rules::RulePackRegistry, + /// Per-session token usage monitor. + token_monitor: crate::token_monitor::TokenMonitor, /// Ratatui terminal backed by [`SocketBackend`]. /// /// Chrome widgets (status bar, pane boxes, dialogs) render through this @@ -311,11 +317,55 @@ pub struct Multiplexer { /// Debug-only process RSS/CPU sampler, emitted on the state ticker so live /// multi-pane smokes can attach resource data to the run id. resource_metrics: resource_metrics::ResourceMetricsSampler, + /// Per-source event gates for runtime hook/plugin reports. + runtime_gate_states: HashMap, + runtime_event_sequences: HashMap, + child_agent_states: HashMap<(u64, u64), crate::agent_status::evidence::RawAgentState>, + status_transition_times: HashMap>, + watchdog_demotions: HashMap, + blocked_renotify_at: HashMap, + status_last_eval: HashMap, + status_dirty_sessions: HashSet, + last_workspace_status: Option, /// Offset into the wordlist for the next codename pick, seeded once at /// daemon construction from the current time subsecond nanos. wordlist_offset: usize, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct WorkspaceStatusSnapshot { + effective: crate::protocol::AgentState, + session_count: u32, + blocked_count: u32, + done_count: u32, + working_count: u32, +} + +#[derive(Debug, Clone, Copy)] +struct ForegroundShellHandoffProbe { + agent_expected: bool, + agent_identity_observed: bool, + startup_grace_done: bool, + child_alive: bool, + root_is_agent: bool, + foreground_is_agent: bool, + root_pgid: Option, + foreground_pgid: Option, + child_process_count: u32, +} + +fn agent_foreground_returned_to_shell(probe: ForegroundShellHandoffProbe) -> bool { + probe.agent_expected + && probe.agent_identity_observed + && probe.startup_grace_done + && probe.child_alive + && !probe.root_is_agent + && !probe.foreground_is_agent + && probe.child_process_count == 0 + && probe.root_pgid.is_some() + && probe.root_pgid == probe.foreground_pgid +} + /// In-memory record of one tab ever opened in this container lifetime. /// The history is append-only and never pruned; it is the authoritative /// data source for `jackin-capsule agents` and the tab hover tooltip. @@ -407,6 +457,8 @@ impl Multiplexer { pub fn new(rows: u16, cols: u16, launch_config: CapsuleConfig) -> io::Result { let (rows, cols) = normalize_size(rows, cols); let (event_tx, event_rx) = mpsc::unbounded_channel(); + let (state_broadcast_tx, _) = + tokio::sync::broadcast::channel::(256); let content_rows = available_content_rows(rows); let agents = launch_config.supported_agents(); let provider_keys: BTreeMap = @@ -460,6 +512,8 @@ impl Multiplexer { env_passthrough, event_tx, event_rx, + state_broadcast_tx, + status_cpu_samples: HashMap::new(), zoomed: None, input_parser, detach_requested: false, @@ -490,6 +544,10 @@ impl Multiplexer { pull_request_context_cache: HashMap::new(), workdir, workdir_context, + rule_packs: crate::agent_status::rules::RulePackRegistry::bundled().map_err( + |error| io::Error::other(format!("invalid bundled rule packs: {error:#}")), + )?, + token_monitor: crate::token_monitor::TokenMonitor::new(), provider_keys, ratatui_terminal, terminal_row_arena: jackin_term::RowArena::default(), @@ -497,6 +555,15 @@ impl Multiplexer { codename_retired: HashSet::new(), agent_history: Vec::new(), resource_metrics: resource_metrics::ResourceMetricsSampler::default(), + runtime_gate_states: HashMap::new(), + runtime_event_sequences: HashMap::new(), + child_agent_states: HashMap::new(), + status_transition_times: HashMap::new(), + watchdog_demotions: HashMap::new(), + blocked_renotify_at: HashMap::new(), + status_last_eval: HashMap::new(), + status_dirty_sessions: HashSet::new(), + last_workspace_status: None, wordlist_offset: { use std::time::{SystemTime, UNIX_EPOCH}; SystemTime::now() @@ -542,6 +609,589 @@ impl Multiplexer { } self.send_to_client(ServerFrame::Output(bytes)); } + + fn broadcast_agent_state_changed( + &self, + session_id: u64, + session: &Session, + raw_state: Option, + reason: Option, + ) { + let summary = &session.status.last_snapshot_summary; + drop( + self.state_broadcast_tx + .send(crate::protocol::control::ServerMsg::AgentStateChanged { + session_id, + raw_state: raw_state.or_else(|| Some(session.status.raw.label().to_owned())), + effective: session.state().label().to_owned(), + seen: session.status.seen, + source: summary + .authority_source + .clone() + .unwrap_or_else(|| format!("{:?}", summary.winner).to_ascii_lowercase()), + confidence: Some( + format!("{:?}", session.status.confidence).to_ascii_lowercase(), + ), + detected_agent: session.agent.clone(), + foreground_pgid: summary.foreground_pgid, + visible_blocker: summary.visible_blocker, + visible_idle: summary.visible_idle, + visible_working: summary.visible_working, + process_exited: summary.process_exited, + foreground_returned_to_shell: summary.foreground_returned_to_shell, + stale_report: summary.stale_report, + subagents_active: summary.subagents_active, + seq: session + .hook_authority + .as_ref() + .map(|authority| authority.seq), + ts_ns: session + .hook_authority + .as_ref() + .map(|authority| authority.ts_ns), + revision: session.status.revision, + last_seen_revision: Some(if session.status.seen { + session.status.revision + } else { + 0 + }), + reason, + }), + ); + } + + pub(super) fn broadcast_session_spawned( + &self, + session_id: u64, + agent: Option, + label: String, + ) { + drop( + self.state_broadcast_tx + .send(crate::protocol::control::ServerMsg::SessionSpawned { + session_id, + agent, + label, + }), + ); + } + + pub(super) fn broadcast_session_exited(&self, session_id: u64) { + drop( + self.state_broadcast_tx + .send(crate::protocol::control::ServerMsg::SessionExited { session_id }), + ); + } + + fn workspace_status_snapshot(&self) -> WorkspaceStatusSnapshot { + let mut states = Vec::new(); + let mut snapshot = WorkspaceStatusSnapshot { + effective: crate::protocol::AgentState::Unknown, + session_count: self.sessions.len() as u32, + blocked_count: 0, + done_count: 0, + working_count: 0, + }; + for session in self.sessions.values() { + let state = session.state(); + states.push(state); + match state { + crate::protocol::AgentState::Blocked => { + snapshot.blocked_count = snapshot.blocked_count.saturating_add(1); + } + crate::protocol::AgentState::Done => { + snapshot.done_count = snapshot.done_count.saturating_add(1); + } + crate::protocol::AgentState::Working => { + snapshot.working_count = snapshot.working_count.saturating_add(1); + } + crate::protocol::AgentState::Idle | crate::protocol::AgentState::Unknown => {} + } + } + snapshot.effective = crate::agent_status::arbitrate::roll_up_states(states.iter()); + snapshot + } + + fn maybe_broadcast_workspace_status_changed(&mut self) { + let snapshot = self.workspace_status_snapshot(); + if self.last_workspace_status == Some(snapshot) { + return; + } + self.last_workspace_status = Some(snapshot); + drop(self.state_broadcast_tx.send( + crate::protocol::control::ServerMsg::WorkspaceStatusChanged { + effective: snapshot.effective.label().to_owned(), + session_count: snapshot.session_count, + blocked_count: snapshot.blocked_count, + done_count: snapshot.done_count, + working_count: snapshot.working_count, + ts_ns: unix_time_ns(), + }, + )); + } + + fn status_change_reason(result: &crate::agent_status::arbitrate::ArbitrationResult) -> String { + if result + .summary + .has_note(crate::agent_status::evidence::EvidenceNote::WatchdogDemoted) + { + "watchdog_demoted".to_owned() + } else if result + .summary + .has_note(crate::agent_status::evidence::EvidenceNote::ForegroundReturnedToShell) + { + "foreground_returned_to_shell".to_owned() + } else { + format!("{:?}", result.winner).to_ascii_lowercase() + } + } + + fn mark_agent_session_returned_to_shell(&mut self, session_id: u64, now: Instant) { + let key_prefix = format!("{session_id}:"); + self.runtime_gate_states + .retain(|key, _| !key.starts_with(&key_prefix)); + self.runtime_event_sequences + .retain(|key, _| !key.starts_with(&key_prefix)); + self.child_agent_states + .retain(|(parent_id, child_id), _| *parent_id != session_id && *child_id != session_id); + + let Some(session) = self.sessions.get_mut(&session_id) else { + return; + }; + let previous_agent = session.agent.take(); + let previous_source = session + .hook_authority + .as_ref() + .map(|authority| authority.source_id.clone()); + session.hook_authority = None; + session.sequence_tracker.clear_all(); + session.osc_evidence.clear_agent_signals(); + session.osc_evidence.shell_state = Some(crate::agent_status::evidence::RawAgentState::Idle); + session.osc_evidence.shell_mark_at = Some(now); + session.agent_identity_observed = false; + session.status.last_snapshot_summary.authority_source = None; + session.status.last_snapshot_summary.subagents_active = 0; + session.status.last_snapshot_summary.osc_progress_active = false; + session.status.last_snapshot_summary.root_is_agent = false; + crate::clog!( + "status.agent.returned_to_shell: session={session_id} agent={:?} source={:?}", + previous_agent, + previous_source + ); + } + + fn invalidate_rejected_authority( + &mut self, + session_id: u64, + result: &crate::agent_status::arbitrate::ArbitrationResult, + ) { + let watchdog_demoted = result + .summary + .has_note(crate::agent_status::evidence::EvidenceNote::WatchdogDemoted); + if !watchdog_demoted && !result.summary.stale_report { + return; + } + let Some(source_id) = result.summary.authority_source.as_deref() else { + return; + }; + let key = format!("{session_id}:{source_id}"); + self.runtime_gate_states.remove(&key); + if let Some(session) = self.sessions.get_mut(&session_id) + && session + .hook_authority + .as_ref() + .is_some_and(|authority| authority.source_id == source_id) + { + let reason = if watchdog_demoted { + "watchdog_demoted" + } else { + "stale_report" + }; + crate::clog!( + "status.authority.cleared: session={session_id} source={source_id} reason={reason}" + ); + session.hook_authority = None; + } + } + + fn record_status_transition(&mut self, session_id: u64, now: Instant) { + let transitions = self.status_transition_times.entry(session_id).or_default(); + transitions.push_back(now); + let window = Duration::from_mins(1); + while transitions + .front() + .is_some_and(|at| now.duration_since(*at) > window) + { + transitions.pop_front(); + } + let per_minute = transitions.len(); + crate::cdebug!( + "status.telemetry: session={session_id} transitions_per_minute={per_minute}" + ); + if per_minute >= 10 { + crate::clog!( + "status.telemetry: session={session_id} high flap rate transitions_per_minute={per_minute}" + ); + } + } + + fn record_watchdog_demoted( + &mut self, + session_id: u64, + summary: &crate::agent_status::evidence::EvidenceSummary, + now: Instant, + ) { + let count = self.watchdog_demotions.entry(session_id).or_default(); + *count = count.saturating_add(1); + let count = *count; + crate::clog!( + "status.stuck: session={session_id} reason=watchdog_demoted count={count} \ + winner={:?} authority_source={:?} rule_id={:?} last_output_ms_ago={:?} \ + cpu_jiffies_delta={} child_process_count={} foreground_pgid={:?} \ + visible_blocker={} visible_idle={} visible_working={} notes={:?}", + summary.winner, + summary.authority_source, + summary.rule_id, + summary + .last_output + .map(|at| now.saturating_duration_since(at).as_millis()), + summary.cpu_jiffies_delta, + summary.child_process_count, + summary.foreground_pgid, + summary.visible_blocker, + summary.visible_idle, + summary.visible_working, + summary.notes, + ); + } + + fn maybe_renotify_blocked(&mut self, session_id: u64, now: Instant) { + let Some(session) = self.sessions.get(&session_id) else { + return; + }; + if session.status.effective != crate::protocol::AgentState::Blocked { + self.blocked_renotify_at.remove(&session_id); + return; + } + let due = self + .blocked_renotify_at + .get(&session_id) + .is_none_or(|last| { + now.duration_since(*last) >= crate::agent_status::policy::RENOTIFY_INTERVAL + }); + if !due { + return; + } + self.blocked_renotify_at.insert(session_id, now); + if let Some(session) = self.sessions.get(&session_id) { + crate::clog!("status.renotify: session={session_id} still blocked"); + self.broadcast_agent_state_changed( + session_id, + session, + Some(session.status.raw.label().to_owned()), + Some("renotify".to_owned()), + ); + } + } + + fn mark_status_dirty(&mut self, session_id: u64) { + self.status_dirty_sessions.insert(session_id); + } + + fn status_eval_due(&mut self, session_id: u64, now: Instant) -> bool { + let last = self.status_last_eval.get(&session_id).copied(); + let dirty_due = self.status_dirty_sessions.contains(&session_id) + && last.is_none_or(|last| { + now.duration_since(last) >= crate::agent_status::policy::EVAL_COALESCE + }); + let floor_due = last.is_none_or(|last| now.duration_since(last) >= STATE_TICK_INTERVAL); + if dirty_due || floor_due { + self.status_dirty_sessions.remove(&session_id); + self.status_last_eval.insert(session_id, now); + true + } else { + false + } + } + + pub(super) fn cleanup_status_bookkeeping(&mut self, session_id: u64) { + self.status_cpu_samples.remove(&session_id); + self.status_transition_times.remove(&session_id); + self.watchdog_demotions.remove(&session_id); + self.blocked_renotify_at.remove(&session_id); + self.status_last_eval.remove(&session_id); + self.status_dirty_sessions.remove(&session_id); + self.child_agent_states + .retain(|(parent_id, child_id), _| *parent_id != session_id && *child_id != session_id); + self.runtime_gate_states + .retain(|key, _| !key.starts_with(&format!("{session_id}:"))); + self.runtime_event_sequences + .retain(|key, _| !key.starts_with(&format!("{session_id}:"))); + self.maybe_broadcast_workspace_status_changed(); + } + + fn next_runtime_event_seq(&mut self, session_id: u64, source_id: &str) -> u64 { + let key = format!("{session_id}:{source_id}"); + let seq = self.runtime_event_sequences.entry(key).or_default(); + *seq = seq.saturating_add(1); + *seq + } + + fn status_rule_match( + &self, + session: &Session, + now: Instant, + visible_lines: &[String], + agent_osc_allowed: bool, + ) -> Option { + if now.duration_since(session.spawned_at) < crate::agent_status::policy::STARTUP_GRACE { + None + } else { + self.rule_packs.evaluate_with_virtuals( + session.agent.as_deref(), + visible_lines, + status_rule_virtual_regions(session, agent_osc_allowed), + ) + } + } + + fn handle_control_msg(&mut self, msg: crate::protocol::control::ClientMsg) { + use crate::agent_status::HookAuthority; + use crate::protocol::control::ClientMsg; + + match msg { + ClientMsg::ReportAgentState { + session_id, + source_id, + agent_label, + raw_state, + seq, + ts_ns, + message, + } => { + let Some(session) = self.sessions.get_mut(&session_id) else { + crate::cdebug!("control: report for unknown session {session_id}"); + return; + }; + if !session.sequence_tracker.accept(&source_id, seq) { + crate::cdebug!( + "control: stale report ignored session={session_id} source={source_id} seq={seq}" + ); + return; + } + if !matches!( + raw_state.as_str(), + "unknown" | "working" | "blocked" | "idle" + ) { + crate::cdebug!( + "control: unknown raw agent state session={session_id} raw={raw_state}" + ); + return; + } + session.hook_authority = Some(HookAuthority { + source_id: source_id.clone(), + agent_label, + raw_state, + origin: crate::agent_status::AuthorityOrigin::DirectStateReport, + seq, + ts_ns, + message, + last_seen: Instant::now(), + }); + self.mark_status_dirty(session_id); + } + ClientMsg::HeartbeatAgentAuthority { + session_id, + source_id, + seq, + } => { + if let Some(session) = self.sessions.get_mut(&session_id) + && session.sequence_tracker.accept(&source_id, seq) + && let Some(authority) = session.hook_authority.as_mut() + && authority.source_id == source_id + { + authority.seq = seq; + authority.ts_ns = unix_time_ns(); + authority.last_seen = Instant::now(); + self.mark_status_dirty(session_id); + } + } + ClientMsg::ClearAgentAuthority { + session_id, + source_id, + } => { + if let Some(session) = self.sessions.get_mut(&session_id) { + session.sequence_tracker.clear_source(&source_id); + if session + .hook_authority + .as_ref() + .is_some_and(|authority| authority.source_id == source_id) + { + session.hook_authority = None; + self.mark_status_dirty(session_id); + } + } + } + ClientMsg::ReportRuntimeEvent { + session_id, + source_id, + runtime, + event, + payload: _, + } => { + let seq = self.next_runtime_event_seq(session_id, &source_id); + let gate = self + .runtime_gate_states + .entry(format!("{session_id}:{source_id}")) + .or_default(); + let runtime_event = crate::agent_status::gating::RuntimeEvent { + runtime: runtime.clone(), + event, + }; + let effect = crate::agent_status::gating::map_event(&runtime_event, gate); + match effect { + crate::agent_status::gating::GateEffect::Authority { + state, + pending_permission: _, + subagents_active: _, + notes: _, + } => { + let ts_ns = unix_time_ns(); + if let Some(session) = self.sessions.get_mut(&session_id) { + session.hook_authority = Some(HookAuthority { + source_id: source_id.clone(), + agent_label: runtime, + raw_state: state.label().to_owned(), + origin: crate::agent_status::AuthorityOrigin::RuntimeEvent, + seq, + ts_ns, + message: None, + last_seen: Instant::now(), + }); + self.mark_status_dirty(session_id); + } + } + crate::agent_status::gating::GateEffect::Heartbeat => { + if let Some(session) = self.sessions.get_mut(&session_id) + && let Some(authority) = session.hook_authority.as_mut() + && authority.source_id == source_id + { + authority.seq = seq; + authority.ts_ns = unix_time_ns(); + authority.last_seen = Instant::now(); + self.mark_status_dirty(session_id); + } + } + crate::agent_status::gating::GateEffect::Clear => { + self.runtime_gate_states + .remove(&format!("{session_id}:{source_id}")); + self.runtime_event_sequences + .remove(&format!("{session_id}:{source_id}")); + if let Some(session) = self.sessions.get_mut(&session_id) + && session + .hook_authority + .as_ref() + .is_some_and(|authority| authority.source_id == source_id) + { + session.hook_authority = None; + self.mark_status_dirty(session_id); + } + } + crate::agent_status::gating::GateEffect::CounterOnly { .. } => { + if let Some(session) = self.sessions.get_mut(&session_id) + && let Some(authority) = session.hook_authority.as_mut() + && authority.source_id == source_id + { + authority.seq = seq; + authority.ts_ns = unix_time_ns(); + authority.last_seen = Instant::now(); + } + self.mark_status_dirty(session_id); + } + crate::agent_status::gating::GateEffect::Ignore => {} + } + } + ClientMsg::ReportChildAgentState { + parent_session_id, + child_session_id, + raw_state, + seq, + } => { + let Some(parent) = self.sessions.get_mut(&parent_session_id) else { + crate::cdebug!( + "control: child report for unknown parent session={parent_session_id}" + ); + return; + }; + let source_id = format!("child:{child_session_id}"); + if !parent.sequence_tracker.accept(&source_id, seq) { + crate::cdebug!( + "control: stale child report ignored parent={parent_session_id} child={child_session_id} seq={seq}" + ); + return; + } + let raw = match raw_state.as_str() { + "working" => crate::agent_status::evidence::RawAgentState::Working, + "blocked" => crate::agent_status::evidence::RawAgentState::Blocked, + "idle" | "done" => crate::agent_status::evidence::RawAgentState::Idle, + "unknown" => crate::agent_status::evidence::RawAgentState::Unknown, + other => { + crate::cdebug!( + "control: unknown child raw state parent={parent_session_id} child={child_session_id} raw={other}" + ); + return; + } + }; + if matches!( + raw, + crate::agent_status::evidence::RawAgentState::Idle + | crate::agent_status::evidence::RawAgentState::Unknown + ) { + self.child_agent_states + .remove(&(parent_session_id, child_session_id)); + } else { + self.child_agent_states + .insert((parent_session_id, child_session_id), raw); + } + self.mark_status_dirty(parent_session_id); + } + _ => {} + } + } +} + +fn unix_time_ns() -> u64 { + use std::time::{SystemTime, UNIX_EPOCH}; + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_or(0, |duration| { + duration.as_nanos().min(u128::from(u64::MAX)) as u64 + }) +} + +fn authority_grade_for_runtime(runtime: &str) -> crate::agent_status::evidence::AuthorityGrade { + match runtime { + "opencode" => crate::agent_status::evidence::AuthorityGrade::Complete, + _ => crate::agent_status::evidence::AuthorityGrade::Partial, + } +} + +fn status_rule_virtual_regions( + session: &Session, + agent_osc_allowed: bool, +) -> crate::agent_status::rules::VirtualRegions<'_> { + crate::agent_status::rules::VirtualRegions { + osc_title: agent_osc_allowed + .then_some(session.osc_evidence.title.as_deref()) + .flatten(), + osc_progress: agent_osc_allowed.then_some(if session.osc_evidence.progress_active { + "active" + } else if session.osc_evidence.progress_cleared_at.is_some() { + "cleared" + } else { + "inactive" + }), + } } /// Scan an emitted frame for the diagnostic fingerprint a render bug leaves: @@ -652,7 +1302,8 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> let mut new_clients = socket::start_listener()?; let mut branch_context_ticker = interval(GIT_BRANCH_CONTEXT_POLL_INTERVAL); - let mut state_ticker = interval(STATE_TICK_INTERVAL); + let mut state_ticker = interval(crate::agent_status::policy::EVAL_COALESCE); + let mut token_ticker = interval(Duration::from_secs(30)); let mut render_ticker = interval(RENDER_TICK_INTERVAL); render_ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); let mut sigterm = signal(SignalKind::terminate())?; @@ -666,6 +1317,8 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> // handshakes ride this channel back to the main loop, which then // applies the take-over + spawns the persistent attach task. let (handshake_tx, mut handshake_rx) = mpsc::unbounded_channel::(); + let (control_msg_tx, mut control_msg_rx) = + mpsc::unbounded_channel::(); // Resolve the operator's escape-time once at startup; the value // cannot change after daemon launch, so per-iteration env reads @@ -728,16 +1381,24 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> let handshake_tx = handshake_tx.clone(); let sessions_snapshot = mux.session_infos(); let tabs_snapshot = mux.tab_snapshots(); + let visible_text_snapshot = mux.visible_text_snapshots(); + let status_explain_snapshot = mux.status_explain_snapshots(); let history_snapshot = mux.agent_registry_snapshot(); let active_tab = u32::try_from(mux.active_tab).unwrap_or(0); + let control_msg_tx = control_msg_tx.clone(); + let state_broadcast_tx = mux.state_broadcast_tx.clone(); tokio::spawn(perform_handshake( stream, client_permit, handshake_tx, sessions_snapshot, tabs_snapshot, + visible_text_snapshot, + status_explain_snapshot, history_snapshot, active_tab, + control_msg_tx, + state_broadcast_tx, )); } @@ -849,6 +1510,9 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> )); } } + if let Some(focused) = mux.active_focused_id() { + mux.acknowledge_focused_agent_status(focused); + } let mut initial = crate::tui::terminal::RESET_CLEAR_HOME.to_vec(); initial.extend(mux.compose_full_redraw(first_attach_redraw_reason())); initial_frames.push(( @@ -911,6 +1575,10 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> } } + Some(msg) = control_msg_rx.recv() => { + mux.handle_control_msg(msg); + } + // PTY output or exit event from a session. Some(event) = mux.event_rx.recv() => { match event { @@ -958,6 +1626,7 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> if reassert_outer_terminal_title { mux.last_outer_terminal_title = None; } + mux.mark_status_dirty(session_id); // Mark the pane body dirty; the render ticker coalesces // bursts of PTY output into one frame per // tick. Dialog-open still invalidates — the @@ -972,6 +1641,32 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> // leaving a stale `○ Done` placeholder behind. // Matches the operator's mental model: "agent // exited → its tab is gone." + if let Some(session) = mux.sessions.get_mut(&session_id) { + let summary = crate::agent_status::evidence::EvidenceSummary { + process_exited: true, + winner: crate::agent_status::evidence::EvidenceWinner::ProcessExit, + notes: vec![crate::agent_status::evidence::EvidenceNote::ProcessExited], + ..Default::default() + }; + session.status.seen = true; + let changed = session.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Idle, + jackin_protocol::agent_status::AgentStatusConfidence::Weak, + summary, + ); + session.state = session.status.effective; + if changed.is_some() { + mux.record_status_transition(session_id, Instant::now()); + if let Some(session) = mux.sessions.get(&session_id) { + mux.broadcast_agent_state_changed( + session_id, + session, + Some("idle".to_owned()), + Some("process_exit".to_owned()), + ); + } + } + } mux.remove_exited_session(session_id); mux.request_full_redraw(session_exit_redraw_reason()); // When the last live session exits — whether @@ -1082,12 +1777,262 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> // identical. A full redraw (clear + repaint) every tick reads as // a constant flicker, so skip it unless state actually changed. let states_before: Vec<_> = - mux.sessions.iter().map(|(id, s)| (*id, s.state)).collect(); - for session in mux.sessions.values_mut() { - session.refresh_state(); + mux.sessions.iter().map(|(id, s)| (*id, s.state())).collect(); + let eval_now = Instant::now(); + let session_ids: Vec = mux + .sessions + .keys() + .copied() + .collect::>() + .into_iter() + .filter(|session_id| mux.status_eval_due(*session_id, eval_now)) + .collect(); + for session_id in session_ids { + let now = Instant::now(); + let process_anchor = mux + .sessions + .get(&session_id) + .map(|session| (session.child_pid, session.agent.clone())); + let default_agent_expected = + process_anchor.as_ref().is_some_and(|(_, agent)| agent.is_some()); + let process_probe = process_anchor.and_then(|(child_pid, agent)| { + child_pid.map(|pid| { + let root_info = crate::agent_status::process::read_process_info(pid); + let child_alive = root_info.is_some(); + let root_pgid = root_info.as_ref().map(|info| info.pgid); + let root_is_agent = root_info + .as_ref() + .and_then(crate::agent_status::process::identify_agent) + .is_some_and(|kind| { + agent + .as_deref() + .is_some_and(|agent| kind.as_str() == agent) + }); + let detected = + crate::agent_status::process::detect_foreground_agent(pid); + let foreground_pgid = detected.as_ref().map(|(_, pgid)| *pgid); + let foreground_is_agent = + detected.as_ref().is_some_and(|(kind, _)| { + agent + .as_deref() + .is_some_and(|agent| kind.as_str() == agent) + }); + let child_process_count = + crate::agent_status::process::descendant_process_count(pid); + let sample = mux.status_cpu_samples.entry(session_id).or_default(); + let cpu_jiffies_delta = + crate::agent_status::process::sample_cpu_jiffies_delta( + pid, sample, now, + ); + ( + child_alive, + root_is_agent, + root_pgid, + foreground_is_agent, + foreground_pgid, + child_process_count, + cpu_jiffies_delta, + ) + }) + }); + let ( + child_alive, + root_is_agent, + root_pgid, + foreground_is_agent, + foreground_pgid, + child_process_count, + cpu_jiffies_delta, + ) = process_probe.unwrap_or(( + true, + default_agent_expected, + None, + default_agent_expected, + None, + 0, + 0, + )); + let mut foreground_returned_to_shell = false; + if let Some(session) = mux.sessions.get_mut(&session_id) { + if session.agent.is_some() && (root_is_agent || foreground_is_agent) { + session.agent_identity_observed = true; + } + foreground_returned_to_shell = agent_foreground_returned_to_shell( + ForegroundShellHandoffProbe { + agent_expected: session.agent.is_some(), + agent_identity_observed: session.agent_identity_observed, + startup_grace_done: now.duration_since(session.spawned_at) + >= crate::agent_status::policy::STARTUP_GRACE, + child_alive, + root_is_agent, + foreground_is_agent, + root_pgid, + foreground_pgid, + child_process_count, + }, + ); + if session.agent.is_some() + && (!child_alive + || foreground_returned_to_shell + || !foreground_is_agent) + { + session.osc_evidence.clear_agent_signals(); + } + } + let result = mux.sessions.get(&session_id).map(|session| { + let visible_lines = session.visible_lines(); + let rule_match = + mux.status_rule_match(session, now, &visible_lines, foreground_is_agent); + let screen_state = rule_match.as_ref().and_then(|matched| matched.state); + let authority = session.hook_authority.as_ref().map(|authority| { + let gate = mux + .runtime_gate_states + .get(&format!("{session_id}:{}", authority.source_id)); + crate::agent_status::evidence::AuthorityEvidence { + source_id: authority.source_id.clone(), + grade: authority_grade_for_runtime(&authority.agent_label), + direct_state_report: matches!( + authority.origin, + crate::agent_status::AuthorityOrigin::DirectStateReport + ), + mapped_state: match authority.raw_state.as_str() { + "working" => { + crate::agent_status::evidence::RawAgentState::Working + } + "blocked" => { + crate::agent_status::evidence::RawAgentState::Blocked + } + "idle" => crate::agent_status::evidence::RawAgentState::Idle, + _ => crate::agent_status::evidence::RawAgentState::Unknown, + }, + pending_permission: gate.is_some_and(|gate| gate.pending_permission) + || authority.raw_state == "blocked", + last_event: authority.last_seen, + seq: authority.seq, + notes: gate.map_or_else(Vec::new, |gate| gate.notes.clone()), + } + }); + let hook_subagents = session + .hook_authority + .as_ref() + .and_then(|authority| { + mux.runtime_gate_states + .get(&format!("{session_id}:{}", authority.source_id)) + }) + .map_or(0, |gate| gate.subagents_active); + let bridge_subagents = mux + .child_agent_states + .keys() + .filter(|(parent_id, _)| *parent_id == session_id) + .count() as u32; + let snapshot = crate::agent_status::evidence::EvidenceSnapshot { + authority, + osc: session.osc_evidence.clone(), + screen: crate::agent_status::evidence::ScreenEvidence { + state: screen_state, + rule_id: rule_match.as_ref().map(|matched| matched.rule_id.clone()), + strong: rule_match.as_ref().is_some_and(|matched| matched.strong), + freeze: rule_match.as_ref().is_some_and(|matched| matched.freeze), + observed_at: now, + }, + process: crate::agent_status::evidence::ProcessEvidence { + process_exited: !child_alive, + foreground_returned_to_shell, + child_alive, + root_is_agent, + foreground_is_agent, + foreground_pgid, + child_process_count, + cpu_jiffies_delta, + }, + activity: crate::agent_status::evidence::ActivityEvidence { + last_output: Some(session.last_output_at), + last_input: Some(session.last_input_at), + }, + subagents_active: hook_subagents.saturating_add(bridge_subagents), + }; + crate::agent_status::policy::apply_watchdog( + crate::agent_status::arbitrate::arbitrate( + &snapshot, + session.status.raw, + now, + ), + now, + ) + }); + let Some(result) = result else { + continue; + }; + crate::cdebug!( + "status.eval: session={session_id} raw={} confidence={:?} \ + winner={:?} authority_source={:?} rule_id={:?} \ + visible_blocker={} visible_idle={} visible_working={} \ + stale_report={} root_is_agent={} foreground_returned_to_shell={} \ + foreground_pgid={:?} child_process_count={} cpu_jiffies_delta={} \ + subagents_active={} notes={:?}", + result.raw.label(), + result.confidence, + result.winner, + result.summary.authority_source, + result.summary.rule_id, + result.summary.visible_blocker, + result.summary.visible_idle, + result.summary.visible_working, + result.summary.stale_report, + result.summary.root_is_agent, + result.summary.foreground_returned_to_shell, + result.summary.foreground_pgid, + result.summary.child_process_count, + result.summary.cpu_jiffies_delta, + result.summary.subagents_active, + result.summary.notes, + ); + mux.invalidate_rejected_authority(session_id, &result); + let foreground_returned_to_shell = + result.summary.foreground_returned_to_shell; + let changed = mux.sessions.get_mut(&session_id).and_then(|session| { + if !crate::agent_status::policy::should_publish_candidate( + session.status.effective, + &result, + &mut session.pending_status_transition, + ) { + return None; + } + let changed = session.status.publish_raw( + result.raw, + result.confidence, + result.summary.clone(), + ); + session.state = session.status.effective; + changed + }); + if changed.is_some() { + let now = Instant::now(); + mux.record_status_transition(session_id, now); + if result + .summary + .has_note(crate::agent_status::evidence::EvidenceNote::WatchdogDemoted) + { + mux.record_watchdog_demoted(session_id, &result.summary, now); + } + if let Some(session) = mux.sessions.get(&session_id) { + let reason = Multiplexer::status_change_reason(&result); + mux.broadcast_agent_state_changed( + session_id, + session, + Some(result.raw.label().to_owned()), + Some(reason), + ); + } + mux.maybe_broadcast_workspace_status_changed(); + } + if foreground_returned_to_shell { + mux.mark_agent_session_returned_to_shell(session_id, Instant::now()); + } + mux.maybe_renotify_blocked(session_id, Instant::now()); } let states_after: Vec<_> = - mux.sessions.iter().map(|(id, s)| (*id, s.state)).collect(); + mux.sessions.iter().map(|(id, s)| (*id, s.state())).collect(); if mux.expire_dialog_copy_feedback(Instant::now()) { let frame_data = mux.compose_dialog_overlay_frame(dialog_change_redraw_reason()); @@ -1113,6 +2058,33 @@ pub async fn run_daemon(initial_agent: String, launch_config: CapsuleConfig) -> let sbuf = mux.compose_diff_frame(status_change_redraw_reason()); mux.send_output(sbuf); } + + _ = token_ticker.tick() => { + let changed = mux.token_monitor.poll_due_sessions(); + for session_id in changed { + if let Some(totals) = mux.token_monitor.totals(session_id) { + let agent = mux + .sessions + .get(&session_id) + .and_then(|session| session.agent.clone()) + .unwrap_or_else(|| "unknown".to_owned()); + drop(mux.state_broadcast_tx.send( + crate::protocol::control::ServerMsg::TokenUsageChanged { + session_id, + agent, + model: totals.model.clone(), + input_tokens: totals.input_tokens, + output_tokens: totals.output_tokens, + cache_read_tokens: totals.cache_read_tokens, + cache_write_tokens: totals.cache_write_tokens, + cost_usd: totals.cost_usd, + ts_ns: Utc::now().timestamp_nanos_opt().unwrap_or_default() as u64, + }, + )); + } + } + mux.request_diff_redraw(status_change_redraw_reason()); + } } } } diff --git a/crates/jackin-capsule/src/daemon/compositor.rs b/crates/jackin-capsule/src/daemon/compositor.rs index e3b3f6499..a29e7e812 100644 --- a/crates/jackin-capsule/src/daemon/compositor.rs +++ b/crates/jackin-capsule/src/daemon/compositor.rs @@ -489,7 +489,7 @@ impl Multiplexer { pub(super) fn snapshot_session_states(&self) -> Vec<(u64, VisibleAgentState)> { self.sessions .iter() - .map(|(&id, s)| (id, visible_agent_state_from_protocol(s.state))) + .map(|(&id, s)| (id, visible_agent_state_from_protocol(s.state()))) .collect() } diff --git a/crates/jackin-capsule/src/daemon/multiplexer_utils.rs b/crates/jackin-capsule/src/daemon/multiplexer_utils.rs index a9679b4c6..b324d147e 100644 --- a/crates/jackin-capsule/src/daemon/multiplexer_utils.rs +++ b/crates/jackin-capsule/src/daemon/multiplexer_utils.rs @@ -1,5 +1,8 @@ //! Miscellaneous Multiplexer utility methods. +use std::collections::BTreeMap; +use std::time::Instant; + use super::{ Dialog, FullRedrawReason, MAX_SESSIONS, MAX_TABS, Multiplexer, PaletteCloseLabel, Result, SESSION_ENV_PASSTHROUGH, SessionInfo, @@ -108,8 +111,133 @@ impl Multiplexer { id, label: s.label.clone(), agent: s.agent.clone(), - state: s.state, + state: s.state(), active: Some(id) == focused, + token_usage: self + .token_monitor + .totals(id) + .map(super::super::token_monitor::TokenTotals::to_summary), + agent_status_report: Some(s.status.report( + s.agent.clone(), + if s.status.seen { s.status.revision } else { 0 }, + )), + }) + .collect() + } + + pub(super) fn visible_text_snapshots(&self) -> BTreeMap> { + self.sessions + .iter() + .map(|(&id, session)| (id, session.visible_lines())) + .collect() + } + + pub(super) fn status_explain_snapshots(&self) -> BTreeMap { + let now = Instant::now(); + self.sessions + .iter() + .map(|(&id, session)| { + let visible_lines = session.visible_lines(); + let summary = &session.status.last_snapshot_summary; + let gate = session.hook_authority.as_ref().and_then(|authority| { + self.runtime_gate_states + .get(&format!("{id}:{}", authority.source_id)) + }); + let report = session.status.report( + session.agent.clone(), + if session.status.seen { + session.status.revision + } else { + 0 + }, + ); + let watchdog_demoted = summary + .has_note(crate::agent_status::evidence::EvidenceNote::WatchdogDemoted); + let value = serde_json::json!({ + "session_id": id, + "label": session.label, + "agent": session.agent, + "effective": session.status.effective.label(), + "raw": session.status.raw.label(), + "seen": session.status.seen, + "revision": session.status.revision, + "status_report": report, + "evidence": { + "winner": format!("{:?}", summary.winner).to_ascii_lowercase(), + "confidence": format!("{:?}", summary.confidence).to_ascii_lowercase(), + "rule_id": summary.rule_id, + "authority_source": summary.authority_source, + "foreground_pgid": summary.foreground_pgid, + "activity": { + "last_output_ms_ago": summary.last_output.map(|at| now.saturating_duration_since(at).as_millis()), + "last_input_ms_ago": summary.last_input.map(|at| now.saturating_duration_since(at).as_millis()), + }, + "process": { + "child_process_count": summary.child_process_count, + "cpu_jiffies_delta": summary.cpu_jiffies_delta, + "process_exited": summary.process_exited, + "foreground_returned_to_shell": summary.foreground_returned_to_shell, + "root_is_agent": summary.root_is_agent, + }, + "screen": { + "visible_blocker": summary.visible_blocker, + "visible_idle": summary.visible_idle, + "visible_working": summary.visible_working, + }, + "osc": { + "progress_active": summary.osc_progress_active, + "title": session.osc_evidence.title, + "title_changed_ms_ago": session.osc_evidence.title_changed_at.map(|at| now.saturating_duration_since(at).as_millis()), + "notify_edge_ms_ago": session.osc_evidence.notify_edge_at.map(|at| now.saturating_duration_since(at).as_millis()), + "progress_cleared_ms_ago": session.osc_evidence.progress_cleared_at.map(|at| now.saturating_duration_since(at).as_millis()), + "bel_ms_ago": session.osc_evidence.bel_at.map(|at| now.saturating_duration_since(at).as_millis()), + "bel_count": session.osc_evidence.bel_count, + "shell_state": session.osc_evidence.shell_state.map(jackin_protocol::agent_status::AgentRawState::label), + "shell_mark_ms_ago": session.osc_evidence.shell_mark_at.map(|at| now.saturating_duration_since(at).as_millis()), + }, + "subagents_active": summary.subagents_active, + "stale_report": summary.stale_report, + "notes": summary.notes.iter().map(|note| format!("{note:?}").to_ascii_lowercase()).collect::>(), + }, + "stuck": { + "active": watchdog_demoted, + "reason": if watchdog_demoted { Some("watchdog_demoted") } else { None }, + "last_output_ms_ago": summary.last_output.map(|at| now.saturating_duration_since(at).as_millis()), + "cpu_jiffies_delta": summary.cpu_jiffies_delta, + "child_process_count": summary.child_process_count, + "authority_source": summary.authority_source, + "foreground_pgid": summary.foreground_pgid, + "evidence_winner": format!("{:?}", summary.winner).to_ascii_lowercase(), + "notes": summary.notes.iter().map(|note| format!("{note:?}").to_ascii_lowercase()).collect::>(), + }, + "authority": session.hook_authority.as_ref().map(|authority| serde_json::json!({ + "source_id": authority.source_id, + "agent_label": authority.agent_label, + "grade": format!("{:?}", super::authority_grade_for_runtime(&authority.agent_label)).to_ascii_lowercase(), + "origin": authority.origin.label(), + "raw_state": authority.raw_state, + "seq": authority.seq, + "last_seen_ms_ago": now.saturating_duration_since(authority.last_seen).as_millis(), + })), + "gate": gate.map(|gate| serde_json::json!({ + "pending_permission": gate.pending_permission, + "subagents_active": gate.subagents_active, + "notes": gate.notes.iter().map(|note| format!("{note:?}").to_ascii_lowercase()).collect::>(), + })), + "debounce": { + "candidate": session.pending_status_transition.candidate.map(jackin_protocol::control::AgentState::label), + "confirmations": session.pending_status_transition.confirmations, + }, + "rules": self.rule_packs.explain_with_virtuals( + session.agent.as_deref(), + &visible_lines, + super::status_rule_virtual_regions(session, true), + ), + "visible": { + "lines": visible_lines, + }, + }); + (id, value) }) .collect() } @@ -136,13 +264,22 @@ impl Multiplexer { session_id: id, label: session.label.clone(), agent: session.agent.clone(), - state: session.state, + state: session.state(), + agent_status_report: Some(session.status.report( + session.agent.clone(), + if session.status.seen { + session.status.revision + } else { + 0 + }, + )), }, None => PaneSnapshot { session_id: id, label: "(missing)".to_owned(), agent: None, state: crate::protocol::control::AgentState::Idle, + agent_status_report: None, }, }) .collect(); diff --git a/crates/jackin-capsule/src/daemon/pane_layout.rs b/crates/jackin-capsule/src/daemon/pane_layout.rs index 1732f4dc6..fae377f95 100644 --- a/crates/jackin-capsule/src/daemon/pane_layout.rs +++ b/crates/jackin-capsule/src/daemon/pane_layout.rs @@ -49,6 +49,7 @@ impl Multiplexer { &tab_codename, ); let agent_for_log = agent_slug.clone(); + let agent_for_monitor = agent_slug.clone(); let (session, new_id) = Session::spawn( &launch.label, agent_slug, @@ -65,6 +66,9 @@ impl Multiplexer { self.event_tx.clone(), )?; self.sessions.insert(new_id, session); + if let Some(agent_slug) = agent_for_monitor.as_deref() { + self.token_monitor.register_session(new_id, agent_slug); + } let tab = &mut self.tabs[self.active_tab]; let placed = match direction { SplitDirection::Left => tab.tree.split_h(from_id, new_id, SplitPosition::Before), @@ -80,12 +84,16 @@ impl Multiplexer { if let Some(orphan) = self.sessions.remove(&new_id) { orphan.terminate(); } + self.token_monitor.deregister_session(new_id); + self.cleanup_status_bookkeeping(new_id); crate::clog!( "action: split aborted — from_id={from_id} no longer in tab tree; reaped orphan id={new_id}", ); return Ok(()); } tab.focused_id = new_id; + self.broadcast_session_spawned(new_id, agent_for_log.clone(), launch.label.clone()); + self.maybe_broadcast_workspace_status_changed(); self.resize_panes(); self.synthesise_focus_swap(Some(from_id), Some(new_id)); crate::clog!( @@ -158,6 +166,9 @@ impl Multiplexer { self.active_tab = self.tabs.len().saturating_sub(1); } } + self.broadcast_session_exited(id); + self.token_monitor.deregister_session(id); + self.cleanup_status_bookkeeping(id); self.resize_panes(); self.synthesise_focus_swap(prev_focused, self.active_focused_id()); // Reset the ratatui double-buffer so the next compose_full_frame @@ -401,6 +412,24 @@ impl Multiplexer { } } } + if let Some(n) = new { + self.acknowledge_focused_agent_status(n); + } + } + + pub(super) fn acknowledge_focused_agent_status(&mut self, session_id: u64) -> bool { + let changed = self.sessions.get_mut(&session_id).and_then(|session| { + let changed = crate::agent_status::seen::mark_pane_focused(&mut session.status); + session.state = session.status.effective; + changed + }); + if changed.is_some() + && let Some(session) = self.sessions.get(&session_id) + { + self.broadcast_agent_state_changed(session_id, session, None, Some("seen".to_owned())); + return true; + } + false } /// Switch focus to the pane the operator clicked on, if it differs diff --git a/crates/jackin-capsule/src/daemon/session_lifecycle.rs b/crates/jackin-capsule/src/daemon/session_lifecycle.rs index 5773ee0b8..507af0172 100644 --- a/crates/jackin-capsule/src/daemon/session_lifecycle.rs +++ b/crates/jackin-capsule/src/daemon/session_lifecycle.rs @@ -85,6 +85,9 @@ impl Multiplexer { if let Some(session) = self.sessions.remove(&id) { session.terminate(); } + self.broadcast_session_exited(id); + self.token_monitor.deregister_session(id); + self.cleanup_status_bookkeeping(id); } self.tabs.remove(self.active_tab); self.retire_codename(&closed_codename); @@ -106,9 +109,15 @@ impl Multiplexer { self.sessions.len(), self.tabs.len() ); + let exited_ids = self.sessions.keys().copied().collect::>(); for (_, session) in self.sessions.drain() { session.terminate(); } + for id in exited_ids { + self.broadcast_session_exited(id); + self.cleanup_status_bookkeeping(id); + } + self.token_monitor.clear(); self.tabs.clear(); self.active_tab = 0; self.zoomed = None; @@ -187,6 +196,9 @@ impl Multiplexer { } } self.sessions.remove(&session_id); + self.broadcast_session_exited(session_id); + self.token_monitor.deregister_session(session_id); + self.cleanup_status_bookkeeping(session_id); self.zoomed = self.zoomed.filter(|&id| id != session_id); self.resize_panes(); self.synthesise_focus_swap(prev_focused, self.active_focused_id()); @@ -389,6 +401,9 @@ impl Multiplexer { )?; let tab_label = launch.label.clone(); self.sessions.insert(id, session); + if let Some(agent_slug) = agent.as_deref() { + self.token_monitor.register_session(id, agent_slug); + } if self.tabs.is_empty() { self.tabs .push(Tab::new_single(tab_label, id, codename.clone())); @@ -415,6 +430,8 @@ impl Multiplexer { started_at: Utc::now(), exited_at: None, }); + self.broadcast_session_spawned(id, agent.clone(), launch.label.clone()); + self.maybe_broadcast_workspace_status_changed(); // Reflow so the new pane's PTY gets the correct interior // dimensions (outer rect minus border rows/cols). Without // this, the session keeps its initial `content_rows × diff --git a/crates/jackin-capsule/src/daemon/tests.rs b/crates/jackin-capsule/src/daemon/tests.rs index c01323c77..acca2c916 100644 --- a/crates/jackin-capsule/src/daemon/tests.rs +++ b/crates/jackin-capsule/src/daemon/tests.rs @@ -409,6 +409,849 @@ fn test_provider_session( (session, input_rx) } +#[test] +fn focused_status_acknowledgement_transitions_done_to_idle() { + let mut mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + session.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Working, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + session.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Idle, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + session.state = session.status.effective; + assert_eq!(session.state(), crate::protocol::AgentState::Done); + mux.sessions.insert(1, session); + mux.tabs.push(Tab::new_single("Claude", 1, "test")); + + assert!(mux.acknowledge_focused_agent_status(1)); + + let session = mux.sessions.get(&1).unwrap(); + assert_eq!(session.state(), crate::protocol::AgentState::Idle); + assert!(session.status.seen); + assert_eq!(session.status.revision, 3); +} + +#[test] +fn status_explain_marks_watchdog_demotion_as_stuck() { + let mut mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + session.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Unknown, + jackin_protocol::agent_status::AgentStatusConfidence::Unknown, + crate::agent_status::evidence::EvidenceSummary { + raw_state: crate::agent_status::evidence::RawAgentState::Unknown, + confidence: jackin_protocol::agent_status::AgentStatusConfidence::Unknown, + winner: crate::agent_status::evidence::EvidenceWinner::Unknown, + authority_source: Some("claude-hook".to_owned()), + child_process_count: 0, + cpu_jiffies_delta: 0, + root_is_agent: true, + foreground_returned_to_shell: true, + notes: vec![crate::agent_status::evidence::EvidenceNote::WatchdogDemoted], + ..Default::default() + }, + ); + session.hook_authority = Some(crate::agent_status::HookAuthority { + source_id: "claude-hook".to_owned(), + agent_label: "claude".to_owned(), + raw_state: "working".to_owned(), + origin: crate::agent_status::AuthorityOrigin::RuntimeEvent, + seq: 7, + ts_ns: 1, + message: None, + last_seen: Instant::now(), + }); + session.osc_evidence.bel_count = 2; + session.state = session.status.effective; + mux.sessions.insert(1, session); + + let snapshots = mux.status_explain_snapshots(); + let report = snapshots.get(&1).expect("status explain report"); + let stuck = report + .get("stuck") + .expect("status explain should include stuck diagnostics"); + + assert_eq!(stuck["active"], true); + assert_eq!(stuck["reason"], "watchdog_demoted"); + assert_eq!(stuck["authority_source"], "claude-hook"); + assert_eq!(stuck["evidence_winner"], "unknown"); + assert_eq!(report["evidence"]["osc"]["bel_count"], 2); + assert_eq!(report["evidence"]["process"]["root_is_agent"], true); + assert_eq!( + report["evidence"]["process"]["foreground_returned_to_shell"], + true + ); + assert_eq!( + report["status_report"]["foreground_returned_to_shell"], + true + ); + assert_eq!(report["authority"]["grade"], "partial"); + assert_eq!(report["authority"]["origin"], "runtime_event"); +} + +#[test] +fn watchdog_demotion_state_change_reason_is_diagnostic() { + let result = crate::agent_status::arbitrate::ArbitrationResult { + raw: crate::agent_status::evidence::RawAgentState::Unknown, + confidence: jackin_protocol::agent_status::AgentStatusConfidence::Unknown, + winner: crate::agent_status::evidence::EvidenceWinner::Unknown, + notes: vec![crate::agent_status::evidence::EvidenceNote::WatchdogDemoted], + summary: crate::agent_status::evidence::EvidenceSummary { + notes: vec![crate::agent_status::evidence::EvidenceNote::WatchdogDemoted], + ..Default::default() + }, + }; + + assert_eq!( + Multiplexer::status_change_reason(&result), + "watchdog_demoted" + ); +} + +#[test] +fn foreground_shell_handoff_state_change_reason_is_diagnostic() { + let result = crate::agent_status::arbitrate::ArbitrationResult { + raw: crate::agent_status::evidence::RawAgentState::Idle, + confidence: jackin_protocol::agent_status::AgentStatusConfidence::Weak, + winner: crate::agent_status::evidence::EvidenceWinner::ProcessExit, + notes: vec![crate::agent_status::evidence::EvidenceNote::ForegroundReturnedToShell], + summary: crate::agent_status::evidence::EvidenceSummary { + foreground_returned_to_shell: true, + notes: vec![crate::agent_status::evidence::EvidenceNote::ForegroundReturnedToShell], + ..Default::default() + }, + }; + + assert_eq!( + Multiplexer::status_change_reason(&result), + "foreground_returned_to_shell" + ); +} + +#[test] +fn foreground_shell_handoff_requires_prior_agent_identity_and_root_foreground() { + let base = ForegroundShellHandoffProbe { + agent_expected: true, + agent_identity_observed: true, + startup_grace_done: true, + child_alive: true, + root_is_agent: false, + foreground_is_agent: false, + root_pgid: Some(123), + foreground_pgid: Some(123), + child_process_count: 0, + }; + + assert!(agent_foreground_returned_to_shell(base)); + assert!(!agent_foreground_returned_to_shell( + ForegroundShellHandoffProbe { + agent_identity_observed: false, + ..base + } + )); + assert!(!agent_foreground_returned_to_shell( + ForegroundShellHandoffProbe { + startup_grace_done: false, + ..base + } + )); + assert!(!agent_foreground_returned_to_shell( + ForegroundShellHandoffProbe { + root_is_agent: true, + ..base + } + )); + assert!(!agent_foreground_returned_to_shell( + ForegroundShellHandoffProbe { + foreground_pgid: Some(456), + ..base + } + )); + assert!(!agent_foreground_returned_to_shell( + ForegroundShellHandoffProbe { + child_process_count: 1, + ..base + } + )); +} + +#[test] +fn watchdog_demotion_invalidates_rejected_authority_source() { + let mut mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + session.hook_authority = Some(crate::agent_status::HookAuthority { + source_id: "hook-claude-1".to_owned(), + agent_label: "claude".to_owned(), + raw_state: "working".to_owned(), + origin: crate::agent_status::AuthorityOrigin::RuntimeEvent, + seq: 7, + ts_ns: 1, + message: None, + last_seen: Instant::now(), + }); + mux.sessions.insert(1, session); + mux.runtime_gate_states.insert( + "1:hook-claude-1".to_owned(), + crate::agent_status::gating::SourceGateState { + subagents_active: 1, + ..Default::default() + }, + ); + let result = crate::agent_status::arbitrate::ArbitrationResult { + raw: crate::agent_status::evidence::RawAgentState::Unknown, + confidence: jackin_protocol::agent_status::AgentStatusConfidence::Unknown, + winner: crate::agent_status::evidence::EvidenceWinner::Unknown, + notes: vec![crate::agent_status::evidence::EvidenceNote::WatchdogDemoted], + summary: crate::agent_status::evidence::EvidenceSummary { + authority_source: Some("hook-claude-1".to_owned()), + notes: vec![crate::agent_status::evidence::EvidenceNote::WatchdogDemoted], + ..Default::default() + }, + }; + + mux.invalidate_rejected_authority(1, &result); + + assert!(mux.sessions.get(&1).unwrap().hook_authority.is_none()); + assert!(!mux.runtime_gate_states.contains_key("1:hook-claude-1")); +} + +#[test] +fn foreground_shell_handoff_cleanup_clears_agent_identity_and_seeds_shell_evidence() { + let mut mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + session.agent_identity_observed = true; + session.sequence_tracker.accept("hook-claude-1", 7); + session.hook_authority = Some(crate::agent_status::HookAuthority { + source_id: "hook-claude-1".to_owned(), + agent_label: "claude".to_owned(), + raw_state: "working".to_owned(), + origin: crate::agent_status::AuthorityOrigin::RuntimeEvent, + seq: 7, + ts_ns: 1, + message: None, + last_seen: Instant::now(), + }); + session.osc_evidence.title = Some("Claude working".to_owned()); + session.osc_evidence.progress_active = true; + session.status.last_snapshot_summary = crate::agent_status::evidence::EvidenceSummary { + authority_source: Some("hook-claude-1".to_owned()), + subagents_active: 2, + osc_progress_active: true, + root_is_agent: true, + ..Default::default() + }; + mux.sessions.insert(1, session); + mux.runtime_gate_states.insert( + "1:hook-claude-1".to_owned(), + crate::agent_status::gating::SourceGateState { + subagents_active: 2, + ..Default::default() + }, + ); + mux.runtime_event_sequences + .insert("1:hook-claude-1".to_owned(), 7); + mux.child_agent_states.insert( + (1, 99), + crate::agent_status::evidence::RawAgentState::Working, + ); + + mux.mark_agent_session_returned_to_shell(1, Instant::now()); + + let session = mux.sessions.get(&1).expect("session should remain open"); + assert_eq!(session.agent, None); + assert!(session.hook_authority.is_none()); + assert!(!session.sequence_tracker.has_source("hook-claude-1")); + assert_eq!( + session.osc_evidence.shell_state, + Some(crate::agent_status::evidence::RawAgentState::Idle) + ); + assert_eq!(session.osc_evidence.title, None); + assert!(!session.osc_evidence.progress_active); + assert!(!session.agent_identity_observed); + assert_eq!(session.status.last_snapshot_summary.authority_source, None); + assert_eq!(session.status.last_snapshot_summary.subagents_active, 0); + assert!(!session.status.last_snapshot_summary.root_is_agent); + assert!(!mux.runtime_gate_states.contains_key("1:hook-claude-1")); + assert!(!mux.runtime_event_sequences.contains_key("1:hook-claude-1")); + assert!(mux.child_agent_states.is_empty()); +} + +#[test] +fn expired_report_invalidates_rejected_authority_source() { + let mut mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + session.hook_authority = Some(crate::agent_status::HookAuthority { + source_id: "hook-claude-1".to_owned(), + agent_label: "claude".to_owned(), + raw_state: "working".to_owned(), + origin: crate::agent_status::AuthorityOrigin::RuntimeEvent, + seq: 7, + ts_ns: 1, + message: None, + last_seen: Instant::now() + .checked_sub(crate::agent_status::policy::AUTHORITY_TTL + Duration::from_secs(1)) + .unwrap(), + }); + mux.sessions.insert(1, session); + mux.runtime_gate_states.insert( + "1:hook-claude-1".to_owned(), + crate::agent_status::gating::SourceGateState::default(), + ); + let result = crate::agent_status::arbitrate::ArbitrationResult { + raw: crate::agent_status::evidence::RawAgentState::Unknown, + confidence: jackin_protocol::agent_status::AgentStatusConfidence::Unknown, + winner: crate::agent_status::evidence::EvidenceWinner::Unknown, + notes: vec![crate::agent_status::evidence::EvidenceNote::AuthorityExpired], + summary: crate::agent_status::evidence::EvidenceSummary { + authority_source: Some("hook-claude-1".to_owned()), + stale_report: true, + notes: vec![crate::agent_status::evidence::EvidenceNote::AuthorityExpired], + ..Default::default() + }, + }; + + mux.invalidate_rejected_authority(1, &result); + + assert!(mux.sessions.get(&1).unwrap().hook_authority.is_none()); + assert!(!mux.runtime_gate_states.contains_key("1:hook-claude-1")); +} + +#[test] +fn runtime_event_sequences_are_daemon_assigned_per_source() { + let mut mux = test_mux(24, 80); + let (session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + mux.sessions.insert(1, session); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: "UserPromptSubmit".to_owned(), + payload: None, + }); + assert_eq!( + mux.sessions + .get(&1) + .unwrap() + .hook_authority + .as_ref() + .unwrap() + .seq, + 1 + ); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-side".to_owned(), + runtime: "claude".to_owned(), + event: "UserPromptSubmit".to_owned(), + payload: None, + }); + assert_eq!( + mux.sessions + .get(&1) + .unwrap() + .hook_authority + .as_ref() + .unwrap() + .seq, + 1 + ); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: "PreToolUse".to_owned(), + payload: None, + }); + assert_eq!( + mux.sessions + .get(&1) + .unwrap() + .hook_authority + .as_ref() + .unwrap() + .seq, + 2 + ); +} + +#[test] +fn runtime_event_sequence_resets_after_clear() { + let mut mux = test_mux(24, 80); + let (session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + mux.sessions.insert(1, session); + + for event in ["UserPromptSubmit", "SessionEnd", "UserPromptSubmit"] { + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: event.to_owned(), + payload: None, + }); + } + + assert_eq!( + mux.sessions + .get(&1) + .unwrap() + .hook_authority + .as_ref() + .unwrap() + .seq, + 1 + ); +} + +#[test] +fn runtime_clear_event_does_not_clear_other_source_authority() { + let mut mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("codex".to_owned())); + session.hook_authority = Some(crate::agent_status::HookAuthority { + source_id: "hook-codex-1".to_owned(), + agent_label: "codex".to_owned(), + raw_state: "working".to_owned(), + origin: crate::agent_status::AuthorityOrigin::RuntimeEvent, + seq: 7, + ts_ns: 1, + message: None, + last_seen: Instant::now(), + }); + mux.sessions.insert(1, session); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: "SessionEnd".to_owned(), + payload: None, + }); + + let authority = mux + .sessions + .get(&1) + .and_then(|session| session.hook_authority.as_ref()) + .expect("other source must stay authoritative"); + assert_eq!(authority.source_id, "hook-codex-1"); + assert_eq!(authority.seq, 7); +} + +#[test] +fn counter_only_runtime_event_refreshes_existing_authority() { + let mut mux = test_mux(24, 80); + let (session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + mux.sessions.insert(1, session); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: "UserPromptSubmit".to_owned(), + payload: None, + }); + let old_seen = Instant::now() + .checked_sub(crate::agent_status::policy::AUTHORITY_TTL + Duration::from_secs(1)) + .unwrap(); + { + let authority = mux + .sessions + .get_mut(&1) + .and_then(|session| session.hook_authority.as_mut()) + .expect("initial event should establish authority"); + authority.last_seen = old_seen; + authority.ts_ns = 1; + } + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: "SubagentStart".to_owned(), + payload: None, + }); + + let authority = mux + .sessions + .get(&1) + .and_then(|session| session.hook_authority.as_ref()) + .expect("counter-only event should not clear authority"); + assert_eq!(authority.seq, 2); + assert!(authority.last_seen > old_seen); + assert!(authority.ts_ns > 1); + assert_eq!( + mux.runtime_gate_states + .get("1:hook-claude-1") + .map(|gate| gate.subagents_active), + Some(1) + ); +} + +#[test] +fn explicit_heartbeat_refreshes_authority_timestamp() { + let mut mux = test_mux(24, 80); + let (session, _rx) = test_session_with_agent(24, 80, Some("custom".to_owned())); + mux.sessions.insert(1, session); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportAgentState { + session_id: 1, + source_id: "role-reporter-1".to_owned(), + agent_label: "custom".to_owned(), + raw_state: "working".to_owned(), + seq: 1, + ts_ns: 1, + message: None, + }); + let old_seen = Instant::now() + .checked_sub(crate::agent_status::policy::AUTHORITY_TTL + Duration::from_secs(1)) + .unwrap(); + { + let authority = mux + .sessions + .get_mut(&1) + .and_then(|session| session.hook_authority.as_mut()) + .expect("initial report should establish authority"); + authority.last_seen = old_seen; + authority.ts_ns = 1; + } + + mux.handle_control_msg( + crate::protocol::control::ClientMsg::HeartbeatAgentAuthority { + session_id: 1, + source_id: "role-reporter-1".to_owned(), + seq: 2, + }, + ); + + let authority = mux + .sessions + .get(&1) + .and_then(|session| session.hook_authority.as_ref()) + .expect("heartbeat should keep authority"); + assert_eq!(authority.seq, 2); + assert!(authority.last_seen > old_seen); + assert!(authority.ts_ns > 1); +} + +#[test] +fn runtime_heartbeat_refreshes_authority_timestamp() { + let mut mux = test_mux(24, 80); + let (session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + mux.sessions.insert(1, session); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: "UserPromptSubmit".to_owned(), + payload: None, + }); + let old_seen = Instant::now() + .checked_sub(crate::agent_status::policy::AUTHORITY_TTL + Duration::from_secs(1)) + .unwrap(); + { + let authority = mux + .sessions + .get_mut(&1) + .and_then(|session| session.hook_authority.as_mut()) + .expect("initial event should establish authority"); + authority.last_seen = old_seen; + authority.ts_ns = 1; + } + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id: 1, + source_id: "hook-claude-1".to_owned(), + runtime: "claude".to_owned(), + event: "heartbeat".to_owned(), + payload: None, + }); + + let authority = mux + .sessions + .get(&1) + .and_then(|session| session.hook_authority.as_ref()) + .expect("heartbeat should keep authority"); + assert_eq!(authority.seq, 2); + assert!(authority.last_seen > old_seen); + assert!(authority.ts_ns > 1); +} + +#[test] +fn report_agent_state_is_tracked_as_lower_trust_direct_report() { + let mut mux = test_mux(24, 80); + let (session, _rx) = test_session_with_agent(24, 80, Some("custom".to_owned())); + mux.sessions.insert(1, session); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportAgentState { + session_id: 1, + source_id: "role-reporter-1".to_owned(), + agent_label: "custom".to_owned(), + raw_state: "working".to_owned(), + seq: 1, + ts_ns: 1, + message: None, + }); + + let authority = mux + .sessions + .get(&1) + .and_then(|session| session.hook_authority.as_ref()) + .expect("direct report should be stored"); + assert_eq!( + authority.origin, + crate::agent_status::AuthorityOrigin::DirectStateReport + ); +} + +#[test] +fn report_agent_state_accepts_unknown_raw_state() { + let mut mux = test_mux(24, 80); + let (session, _rx) = test_session_with_agent(24, 80, Some("custom".to_owned())); + mux.sessions.insert(1, session); + + mux.handle_control_msg(crate::protocol::control::ClientMsg::ReportAgentState { + session_id: 1, + source_id: "role-reporter-1".to_owned(), + agent_label: "custom".to_owned(), + raw_state: "unknown".to_owned(), + seq: 1, + ts_ns: 1, + message: None, + }); + + assert_eq!( + mux.sessions + .get(&1) + .and_then(|session| session.hook_authority.as_ref()) + .map(|authority| authority.raw_state.as_str()), + Some("unknown") + ); +} + +#[test] +fn startup_grace_mutes_screen_rules_until_elapsed() { + let mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + let dialog = include_str!("../agent_status/screen/fixtures/claude/blocked.txt"); + session.feed_pty(dialog.as_bytes()); + let visible_lines = session.visible_lines(); + + assert!( + mux.status_rule_match(&session, Instant::now(), &visible_lines, true) + .is_none(), + "freshly spawned sessions should ignore screen rules during startup grace" + ); + + session.spawned_at = Instant::now() + .checked_sub(crate::agent_status::policy::STARTUP_GRACE + Duration::from_secs(1)) + .unwrap(); + let matched = mux + .status_rule_match(&session, Instant::now(), &visible_lines, true) + .expect("screen rules should run after startup grace"); + + assert_eq!(matched.rule_id, "permission-dialog"); + assert_eq!( + matched.state, + Some(crate::agent_status::evidence::RawAgentState::Blocked) + ); +} + +#[test] +fn osc_virtual_rule_regions_are_hidden_when_foreground_is_not_agent() { + let (mut session, _rx) = test_session_with_agent(24, 80, Some("codex".to_owned())); + session.osc_evidence.title = Some("Codex working".to_owned()); + session.osc_evidence.progress_cleared_at = Some(Instant::now()); + + let hidden = status_rule_virtual_regions(&session, false); + assert_eq!(hidden.osc_title, None); + assert_eq!(hidden.osc_progress, None); + + let visible = status_rule_virtual_regions(&session, true); + assert!( + visible + .osc_title + .is_some_and(|title| title == "Codex working"), + "foreground-agent OSC title should remain available to rule matching" + ); + assert_eq!( + visible.osc_progress, + Some("cleared"), + "foreground-agent OSC progress should remain available to rule matching" + ); +} + +#[test] +fn agent_state_changed_includes_subagent_count_from_evidence() { + let mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + session.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Working, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary { + subagents_active: 3, + foreground_returned_to_shell: true, + ..Default::default() + }, + ); + session.state = session.status.effective; + let mut state_rx = mux.state_broadcast_tx.subscribe(); + + mux.broadcast_agent_state_changed(1, &session, None, Some("test".to_owned())); + + let msg = state_rx + .try_recv() + .expect("state event should be broadcast"); + let crate::protocol::control::ServerMsg::AgentStateChanged { + subagents_active, + foreground_returned_to_shell, + .. + } = msg + else { + panic!("unexpected state event: {msg:?}"); + }; + assert_eq!(subagents_active, 3); + assert!(foreground_returned_to_shell); +} + +#[test] +fn workspace_status_changed_rolls_up_session_counts() { + let mut mux = test_mux(24, 80); + let (mut blocked, _rx1) = test_session_with_agent(24, 80, Some("claude".to_owned())); + blocked.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Blocked, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + blocked.state = blocked.status.effective; + let (mut working, _rx2) = test_session_with_agent(24, 80, Some("codex".to_owned())); + working.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Working, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + working.state = working.status.effective; + mux.sessions.insert(1, blocked); + mux.sessions.insert(2, working); + let mut state_rx = mux.state_broadcast_tx.subscribe(); + + mux.maybe_broadcast_workspace_status_changed(); + + let msg = state_rx + .try_recv() + .expect("workspace event should be broadcast"); + let crate::protocol::control::ServerMsg::WorkspaceStatusChanged { + effective, + session_count, + blocked_count, + done_count, + working_count, + .. + } = msg + else { + panic!("unexpected workspace event: {msg:?}"); + }; + assert_eq!(effective, "blocked"); + assert_eq!(session_count, 2); + assert_eq!(blocked_count, 1); + assert_eq!(done_count, 0); + assert_eq!(working_count, 1); +} + +#[test] +fn workspace_status_changed_suppresses_duplicate_snapshots() { + let mut mux = test_mux(24, 80); + let (mut session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + session.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Working, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + session.state = session.status.effective; + mux.sessions.insert(1, session); + let mut state_rx = mux.state_broadcast_tx.subscribe(); + + mux.maybe_broadcast_workspace_status_changed(); + state_rx.try_recv().expect("first snapshot should publish"); + mux.maybe_broadcast_workspace_status_changed(); + + assert!(matches!( + state_rx.try_recv(), + Err(tokio::sync::broadcast::error::TryRecvError::Empty) + )); +} + +#[test] +fn session_spawned_event_carries_inventory_fields() { + let mux = test_mux(24, 80); + let mut state_rx = mux.state_broadcast_tx.subscribe(); + + mux.broadcast_session_spawned(7, Some("codex".to_owned()), "Codex".to_owned()); + + let msg = state_rx + .try_recv() + .expect("session spawned event should be broadcast"); + let crate::protocol::control::ServerMsg::SessionSpawned { + session_id, + agent, + label, + } = msg + else { + panic!("unexpected event: {msg:?}"); + }; + assert_eq!(session_id, 7); + assert_eq!(agent.as_deref(), Some("codex")); + assert_eq!(label, "Codex"); +} + +#[test] +fn remove_exited_session_broadcasts_session_exited() { + let mut mux = single_pane_tab_mux(); + let (session, _rx) = test_session_with_agent(24, 80, Some("claude".to_owned())); + mux.sessions.insert(1, session); + let mut state_rx = mux.state_broadcast_tx.subscribe(); + + mux.remove_exited_session(1); + + let msg = state_rx + .try_recv() + .expect("session exited event should be broadcast"); + let crate::protocol::control::ServerMsg::SessionExited { session_id } = msg else { + panic!("unexpected event: {msg:?}"); + }; + assert_eq!(session_id, 1); +} + +#[test] +fn close_focused_pane_broadcasts_exit_and_deregisters_token_monitor() { + let mut mux = split_tab_mux(); + let (session_one, _rx1) = test_session_with_agent(24, 80, Some("claude".to_owned())); + let (session_two, _rx2) = test_session_with_agent(24, 80, Some("codex".to_owned())); + mux.sessions.insert(1, session_one); + mux.sessions.insert(2, session_two); + mux.token_monitor.register_session(1, "claude"); + assert!(mux.token_monitor.contains_session(1)); + let mut state_rx = mux.state_broadcast_tx.subscribe(); + + mux.close_focused_pane(); + + let msg = state_rx + .try_recv() + .expect("session exited event should be broadcast"); + let crate::protocol::control::ServerMsg::SessionExited { session_id } = msg else { + panic!("unexpected event: {msg:?}"); + }; + assert_eq!(session_id, 1); + assert!(!mux.token_monitor.contains_session(1)); + assert!(mux.sessions.contains_key(&2)); +} + #[test] fn refresh_tab_labels_preserves_provider_suffix() { let mut mux = test_mux(24, 80); diff --git a/crates/jackin-capsule/src/lib.rs b/crates/jackin-capsule/src/lib.rs index 05a2ca946..d3faab2bd 100644 --- a/crates/jackin-capsule/src/lib.rs +++ b/crates/jackin-capsule/src/lib.rs @@ -4,6 +4,7 @@ //! Not responsible for: protocol encoding (see `jackin-protocol`), host-side //! launch orchestration, or config schema migration. +pub mod agent_status; pub(crate) mod alloc_telemetry; pub mod attach_context; /// Library target so integration tests under `tests/` can exercise @@ -26,6 +27,7 @@ pub mod runtime_setup; pub mod services; pub mod session; pub mod socket; +pub mod token_monitor; pub mod util; /// Terminal-rendering code — all UI paint/layout lives here. diff --git a/crates/jackin-capsule/src/main.rs b/crates/jackin-capsule/src/main.rs index 807ace825..910ec542f 100644 --- a/crates/jackin-capsule/src/main.rs +++ b/crates/jackin-capsule/src/main.rs @@ -1,9 +1,11 @@ use anyhow::{Result, bail}; use jackin_capsule::{ client, config, daemon, output, protocol::attach::SpawnRequest, runtime_setup, - session::validate_agent_slug, + session::validate_agent_slug, socket, }; use std::path::Path; +use tokio::io::AsyncWriteExt as _; +use tokio::net::UnixStream; #[cfg(feature = "dhat-heap")] #[global_allocator] @@ -18,6 +20,10 @@ const DEFAULT_AGENT: &str = "claude"; /// - PID != 1 → client mode (connect to daemon, run interactive UI) #[tokio::main(flavor = "current_thread")] async fn main() -> Result<()> { + match rustls::crypto::ring::default_provider().install_default() { + Ok(()) | Err(_) => {} + } + let args: Vec = std::env::args().collect(); if invoked_as_prepare_commit_msg_hook(&args) { return runtime_setup::run_prepare_commit_msg_hook(&args[1..]); @@ -53,6 +59,8 @@ SUBCOMMANDS: (no subcommand) Connect to the running multiplexer (client mode) new [] Spawn a new agent session (default: shell) status Print daemon status to stdout + status explain Print agent-status evidence as JSON + status capture Capture status evidence under /jackin/state/ snapshot Write a screen snapshot to stdout --focus Connect and focus the given session runtime-setup First-boot environment setup (run by entrypoint) @@ -68,7 +76,20 @@ connecting as a client.", )); Ok(()) } - Some("status") => client::run_status().await, + Some("status") => match args.get(2).map(String::as_str) { + Some("explain") => { + let session_id = parse_session_id_arg(&args, 3, "status explain")?; + client::run_status_explain(session_id).await + } + Some("capture") => { + let session_id = parse_session_id_arg(&args, 3, "status capture")?; + client::run_status_capture(session_id).await + } + Some(other) => bail!( + "unknown status subcommand {other:?} — known: explain , capture " + ), + None => client::run_status().await, + }, Some("snapshot") => client::run_snapshot().await, Some("agents") => { let json_format = args.iter().any(|a| a == "--format=json") @@ -83,6 +104,7 @@ connecting as a client.", client::run_agents(format).await } Some("runtime-setup") => runtime_setup::run(), + Some("report-event") => run_report_event(&args[2..]).await, Some("prepare-commit-msg") => runtime_setup::run_prepare_commit_msg_hook(&args[2..]), Some("new") => { let supported_agents = config::load_optional() @@ -126,13 +148,90 @@ connecting as a client.", } Some(other) => { bail!( - "unknown jackin-capsule subcommand {other:?} — known: status, snapshot, agents [--format json], runtime-setup, prepare-commit-msg, new , --focus , --version, --help" + "unknown jackin-capsule subcommand {other:?} — known: status [explain|capture], snapshot, agents [--format json], report-event, runtime-setup, prepare-commit-msg, new , --focus , --version, --help" ) } } } } +fn parse_session_id_arg(args: &[String], index: usize, command: &str) -> Result { + let Some(raw) = args.get(index) else { + bail!("{command} requires a session_id"); + }; + raw.parse::() + .map_err(|_| anyhow::anyhow!("{command} session_id must be a u64, got {raw:?}")) +} + +async fn run_report_event(args: &[String]) -> Result<()> { + let payload = if args.iter().any(|arg| arg == "--payload-stdin") { + let mut input = String::new(); + if std::io::Read::read_to_string(&mut std::io::stdin(), &mut input).is_ok() + && !input.trim().is_empty() + { + serde_json::from_str::(&input).ok() + } else { + None + } + } else { + None + }; + let event = report_event_name(args, payload.as_ref()); + let (Ok(session_id), Ok(source_id), Ok(runtime)) = ( + std::env::var("JACKIN_SESSION_ID").and_then(|value| { + value + .parse::() + .map_err(|_| std::env::VarError::NotPresent) + }), + std::env::var("JACKIN_STATUS_SOURCE"), + std::env::var("JACKIN_AGENT_RUNTIME"), + ) else { + return Ok(()); + }; + let socket_path = + std::env::var("JACKIN_STATUS_SOCKET").unwrap_or_else(|_| socket::SOCKET_PATH.to_owned()); + let msg = jackin_capsule::protocol::control::ClientMsg::ReportRuntimeEvent { + session_id, + source_id, + runtime, + event, + payload, + }; + if let Ok(mut stream) = UnixStream::connect(socket_path).await { + let _write_result = stream + .write_all(&jackin_capsule::protocol::control::frame(&msg)) + .await; + } + Ok(()) +} + +fn report_event_name(args: &[String], payload: Option<&serde_json::Value>) -> String { + let event = parse_named_arg(args, "--event") + .or_else(|| { + payload + .and_then(|payload| payload.get("hook_event_name")) + .and_then(serde_json::Value::as_str) + .map(str::to_owned) + }) + .unwrap_or_else(|| "heartbeat".to_owned()); + if event == "Notification" { + payload + .and_then(|payload| payload.get("notification_type")) + .and_then(serde_json::Value::as_str) + .map(|kind| format!("Notification:{kind}")) + .unwrap_or(event) + } else { + event + } +} + +fn parse_named_arg(args: &[String], name: &str) -> Option { + args.iter() + .position(|arg| arg == name) + .and_then(|index| args.get(index + 1)) + .cloned() +} + fn invoked_as_prepare_commit_msg_hook(args: &[String]) -> bool { args.first() .and_then(|arg0| Path::new(arg0).file_name()) @@ -230,6 +329,39 @@ mod tests { parts.iter().map(|s| (*s).to_owned()).collect() } + #[test] + fn report_event_name_uses_explicit_event() { + assert_eq!( + report_event_name(&args(&["--event", "PreToolUse"]), None), + "PreToolUse" + ); + } + + #[test] + fn report_event_name_extracts_claude_notification_type() { + let payload = serde_json::json!({ + "hook_event_name": "Notification", + "notification_type": "permission_prompt", + }); + assert_eq!( + report_event_name(&args(&["--event", "Notification"]), Some(&payload)), + "Notification:permission_prompt" + ); + } + + #[test] + fn report_event_name_extracts_payload_hook_event() { + let payload = serde_json::json!({ + "hook_event_name": "SessionEnd", + }); + assert_eq!(report_event_name(&[], Some(&payload)), "SessionEnd"); + } + + #[test] + fn report_event_name_falls_back_to_heartbeat() { + assert_eq!(report_event_name(&[], None), "heartbeat"); + } + #[test] fn parse_focus_flag_no_subcommand_finds_global_flag() { // Bare client mode: `jackin-capsule --focus 5` must resolve to diff --git a/crates/jackin-capsule/src/runtime_setup.rs b/crates/jackin-capsule/src/runtime_setup.rs index 5763c9cca..d8b27eb1e 100644 --- a/crates/jackin-capsule/src/runtime_setup.rs +++ b/crates/jackin-capsule/src/runtime_setup.rs @@ -10,6 +10,7 @@ use std::os::unix::fs::symlink; use std::path::{Path, PathBuf}; use std::process::{Command, Output, Stdio}; +use crate::agent_status::hook_installer::HookInstaller; use anyhow::{Context, Result, bail}; use serde_json::json; @@ -312,11 +313,14 @@ fn setup_claude(copy_auth: bool) -> Result<()> { &["mcp", "add", "shellfirm", "--", "shellfirm", "mcp"], ); } + crate::agent_status::hook_installer::ClaudeHookInstaller::default() + .install(Path::new("/home/agent"))?; Ok(()) } fn setup_codex(copy_auth: bool) -> Result<()> { seed_home_dir("/jackin/default-home/.codex", "/home/agent/.codex")?; + write_codex_tui_notification_config(Path::new("/home/agent/.codex"))?; // Provider config (idempotent, runs every tab) before the credential copy so // the copy is the last fallible step: the auth marker then gates strictly on // copy success, not on a post-copy write that could fail and force a re-copy @@ -332,9 +336,112 @@ fn setup_codex(copy_auth: bool) -> Result<()> { )); } } + crate::agent_status::hook_installer::CodexHookInstaller::default() + .install(Path::new("/home/agent"))?; Ok(()) } +fn write_codex_tui_notification_config(codex_dir: &Path) -> Result<()> { + let config_path = codex_dir.join("config.toml"); + fs::create_dir_all(codex_dir) + .with_context(|| format!("failed to create {}", codex_dir.display()))?; + let raw = match fs::read_to_string(&config_path) { + Ok(raw) => raw, + Err(err) if err.kind() == io::ErrorKind::NotFound => String::new(), + Err(err) => { + return Err(err).with_context(|| { + format!( + "failed to read {} for Codex TUI notification config", + config_path.display() + ) + }); + } + }; + let mut doc: toml_edit::DocumentMut = if raw.trim().is_empty() { + toml_edit::DocumentMut::new() + } else { + raw.parse().with_context(|| { + format!( + "failed to parse {} for Codex TUI notification config", + config_path.display() + ) + })? + }; + let tui = ensure_toml_table(doc.as_table_mut(), "tui")?; + ensure_codex_status_notifications(tui)?; + tui.insert("notification_method", toml_edit::value("osc9")); + let rendered = doc.to_string(); + if rendered == raw { + return Ok(()); + } + fs::write(&config_path, rendered).with_context(|| { + format!( + "failed to write Codex TUI notification config to {}", + config_path.display() + ) + })?; + crate::output::stdout_line(format_args!( + "[entrypoint] codex: enabled TUI OSC notifications in {}", + config_path.display() + )); + Ok(()) +} + +fn ensure_codex_status_notifications(tui: &mut toml_edit::Table) -> Result<()> { + let notifications = ensure_toml_array(tui, "notifications")?; + for required in ["agent-turn-complete", "approval-requested"] { + if !notifications + .iter() + .any(|notification| notification.as_str() == Some(required)) + { + notifications.push(required); + } + } + Ok(()) +} + +fn ensure_toml_table<'a>( + table: &'a mut toml_edit::Table, + key: &str, +) -> Result<&'a mut toml_edit::Table> { + let item = table + .entry(key) + .or_insert_with(|| toml_edit::Item::Table(toml_edit::Table::new())); + if item.is_inline_table() { + *item = match std::mem::take(item).into_table() { + Ok(table) => toml_edit::Item::Table(table), + Err(item) => item, + }; + } + if !item.is_table() { + *item = toml_edit::Item::Table(toml_edit::Table::new()); + } + let Some(table) = item.as_table_mut() else { + bail!("failed to normalize TOML key {key} to a table"); + }; + Ok(table) +} + +fn ensure_toml_array<'a>( + table: &'a mut toml_edit::Table, + key: &str, +) -> Result<&'a mut toml_edit::Array> { + let item = table.entry(key).or_insert_with(|| { + toml_edit::Item::Value(toml_edit::Value::Array(toml_edit::Array::new())) + }); + if item + .as_value() + .and_then(toml_edit::Value::as_array) + .is_none() + { + *item = toml_edit::Item::Value(toml_edit::Value::Array(toml_edit::Array::new())); + } + let Some(array) = item.as_value_mut().and_then(toml_edit::Value::as_array_mut) else { + bail!("failed to normalize TOML key {key} to an array"); + }; + Ok(array) +} + /// Appends `[model_providers]` + `[profiles]` blocks for available alt /// providers to `config.toml` under `codex_dir`. `MiniMax` is the only /// deliverable Codex cell (Responses-API compatible); GLM and Kimi are @@ -464,6 +571,8 @@ fn setup_amp(copy_auth: bool) -> Result<()> { )); } } + crate::agent_status::hook_installer::AmpPluginInstaller::default() + .install(Path::new("/home/agent"))?; Ok(()) } @@ -510,7 +619,8 @@ fn setup_opencode(copy_auth: bool) -> Result<()> { .mode(0o700) .create("/home/agent/.config/opencode") .context("failed to create /home/agent/.config/opencode")?; - write_opencode_config(Path::new("/home/agent/.config/opencode/opencode.json"))?; + let config = Path::new("/home/agent/.config/opencode/opencode.json"); + write_opencode_config(config)?; if copy_auth { if Path::new("/jackin/opencode/auth.json").is_file() { crate::output::stderr_line(format_args!( @@ -528,6 +638,8 @@ fn setup_opencode(copy_auth: bool) -> Result<()> { )); } } + crate::agent_status::hook_installer::OpenCodePluginInstaller::default() + .install(Path::new("/home/agent"))?; Ok(()) } diff --git a/crates/jackin-capsule/src/runtime_setup/tests.rs b/crates/jackin-capsule/src/runtime_setup/tests.rs index 68861fd5f..97ad228f1 100644 --- a/crates/jackin-capsule/src/runtime_setup/tests.rs +++ b/crates/jackin-capsule/src/runtime_setup/tests.rs @@ -241,6 +241,138 @@ fn codex_provider_config_preserves_operator_content() { assert!(body.contains("[model_providers.minimax]")); } +#[test] +fn codex_tui_notification_config_enables_status_osc_events() { + let dir = tempfile::tempdir().expect("tempdir"); + let codex_dir = dir.path(); + write_codex_tui_notification_config(codex_dir).expect("write notification config"); + let body = fs::read_to_string(codex_dir.join("config.toml")).expect("read config.toml"); + let parsed: toml::Value = toml::from_str(&body).expect("parse config.toml"); + let tui = parsed + .get("tui") + .and_then(toml::Value::as_table) + .expect("tui table"); + let notifications = tui + .get("notifications") + .and_then(toml::Value::as_array) + .expect("notifications list") + .iter() + .map(|value| value.as_str().expect("notification string")) + .collect::>(); + assert_eq!(notifications, ["agent-turn-complete", "approval-requested"]); + assert_eq!( + tui.get("notification_method").and_then(toml::Value::as_str), + Some("osc9") + ); +} + +#[test] +fn codex_tui_notification_config_is_idempotent_and_preserves_operator_content() { + let dir = tempfile::tempdir().expect("tempdir"); + let codex_dir = dir.path(); + let config_path = codex_dir.join("config.toml"); + fs::write( + &config_path, + b"# existing operator config\n[settings]\nsome_key = true\n\n[tui]\nnotifications = false\nnotification_method = \"bel\"\n", + ) + .expect("write existing config"); + write_codex_tui_notification_config(codex_dir).expect("first write"); + write_codex_tui_notification_config(codex_dir).expect("second write"); + let body = fs::read_to_string(&config_path).expect("read config.toml"); + assert!(body.contains("# existing operator config")); + assert!(body.contains("some_key = true")); + assert_eq!( + body.matches("[tui]").count(), + 1, + "TUI table must not be duplicated" + ); + assert_eq!( + body.matches("notification_method = \"osc9\"").count(), + 1, + "notification method must be rewritten exactly once" + ); + let parsed: toml::Value = toml::from_str(&body).expect("parse config.toml"); + let notifications = parsed + .get("tui") + .and_then(|tui| tui.get("notifications")) + .and_then(toml::Value::as_array) + .expect("notifications list") + .iter() + .map(|value| value.as_str().expect("notification string")) + .collect::>(); + assert_eq!(notifications, ["agent-turn-complete", "approval-requested"]); +} + +#[test] +fn codex_tui_notification_config_merges_existing_notification_list() { + let dir = tempfile::tempdir().expect("tempdir"); + let codex_dir = dir.path(); + let config_path = codex_dir.join("config.toml"); + fs::write( + &config_path, + b"[tui]\nnotifications = [\"agent-turn-complete\", \"custom-event\"]\n", + ) + .expect("write existing config"); + write_codex_tui_notification_config(codex_dir).expect("write notification config"); + let body = fs::read_to_string(&config_path).expect("read config.toml"); + let parsed: toml::Value = toml::from_str(&body).expect("parse config.toml"); + let notifications = parsed + .get("tui") + .and_then(|tui| tui.get("notifications")) + .and_then(toml::Value::as_array) + .expect("notifications list") + .iter() + .map(|value| value.as_str().expect("notification string")) + .collect::>(); + assert_eq!( + notifications, + ["agent-turn-complete", "custom-event", "approval-requested"] + ); + assert_eq!( + notifications + .iter() + .filter(|notification| **notification == "agent-turn-complete") + .count(), + 1, + "existing required notification must not be duplicated" + ); +} + +#[test] +fn codex_tui_notification_config_preserves_inline_table_values() { + let dir = tempfile::tempdir().expect("tempdir"); + let codex_dir = dir.path(); + let config_path = codex_dir.join("config.toml"); + fs::write( + &config_path, + b"tui = { notifications = [\"custom-event\"], badge = true, notification_method = \"bel\" }\n", + ) + .expect("write existing config"); + write_codex_tui_notification_config(codex_dir).expect("write notification config"); + let body = fs::read_to_string(&config_path).expect("read config.toml"); + let parsed: toml::Value = toml::from_str(&body).expect("parse config.toml"); + let tui = parsed + .get("tui") + .and_then(toml::Value::as_table) + .expect("tui table"); + let notifications = tui + .get("notifications") + .and_then(toml::Value::as_array) + .expect("notifications list") + .iter() + .map(|value| value.as_str().expect("notification string")) + .collect::>(); + assert_eq!( + notifications, + ["custom-event", "agent-turn-complete", "approval-requested"] + ); + assert_eq!(tui.get("badge").and_then(toml::Value::as_bool), Some(true)); + assert_eq!( + tui.get("notification_method").and_then(toml::Value::as_str), + Some("osc9") + ); +} + #[test] fn codex_provider_config_noop_without_minimax_key() { let dir = tempfile::tempdir().expect("tempdir"); diff --git a/crates/jackin-capsule/src/session.rs b/crates/jackin-capsule/src/session.rs index f53f7df7e..8ba7ec967 100644 --- a/crates/jackin-capsule/src/session.rs +++ b/crates/jackin-capsule/src/session.rs @@ -39,7 +39,6 @@ use crate::pull_request::PullRequestInfo; use crate::tui::render::RowSnapshot; static NEXT_ID: AtomicU64 = AtomicU64::new(1); -const BLOCKED_AFTER: std::time::Duration = std::time::Duration::from_secs(3); /// Lines of scrollback every PTY session retains. ~1.5 MB worst-case /// per session at 200 cols. Empty cells cost less. Operators need @@ -103,6 +102,19 @@ fn parse_osc7(payload: &str) -> Option { .map(|p| p.to_string_lossy().into_owned()) } +const STATUS_OSC_PAYLOAD_CAP: usize = 256; + +fn capped_status_osc_payload(value: &str) -> String { + if value.len() <= STATUS_OSC_PAYLOAD_CAP { + return value.to_owned(); + } + let mut end = STATUS_OSC_PAYLOAD_CAP; + while !value.is_char_boundary(end) { + end -= 1; + } + value[..end].to_owned() +} + /// Per-OSC operator opt-out switches. All default to `allow`; the /// values `deny`, `off`, `no` (case-sensitive) turn the matching /// passthrough off when the operator runs an untrusted role. tmux @@ -205,10 +217,23 @@ pub struct Session { pub agent: Option, pub provider: Option, pub state: AgentState, + pub status: crate::agent_status::SessionStatus, + pub hook_authority: Option, + pub sequence_tracker: crate::agent_status::sequence::SequenceTracker, + pub osc_evidence: crate::agent_status::evidence::OscEvidence, + /// True after `/proc` has observed the expected runtime owning either the + /// root process or foreground process group. Shell handoff detection only + /// runs after this bit is set so slow startup wrappers are not misread as + /// agent exit. + pub agent_identity_observed: bool, + pub pending_status_transition: crate::agent_status::policy::PendingTransition, pub input_tx: mpsc::UnboundedSender>, pub pty_master: Arc>>, child_killer: Arc>>, + pub child_pid: Option, + pub spawned_at: std::time::Instant, pub last_output_at: std::time::Instant, + pub last_input_at: std::time::Instant, /// Current scrollback view offset in lines from the live tail. /// `0` = following live output; `> 0` = paused, looking back. pub scrollback_offset: usize, @@ -445,7 +470,7 @@ impl Session { label: impl Into, agent: Option, provider: Option, - cmd: CommandBuilder, + mut cmd: CommandBuilder, terminal: SessionTerminal, event_tx: mpsc::UnboundedSender, ) -> Result<(Self, u64)> { @@ -464,6 +489,13 @@ impl Session { let master = pair.master; let slave = pair.slave; + let sid = next_id(); + cmd.env("JACKIN_STATUS_SOCKET", "/jackin/run/jackin.sock"); + if let Some(agent_slug) = agent.as_deref() { + cmd.env("JACKIN_SESSION_ID", sid.to_string()); + cmd.env("JACKIN_STATUS_SOURCE", format!("hook-{agent_slug}-{sid}")); + cmd.env("JACKIN_AGENT_RUNTIME", agent_slug); + } let mut child = slave .spawn_command(cmd) .context("failed to spawn session process")?; @@ -480,7 +512,6 @@ impl Session { let (input_tx, mut input_rx) = mpsc::unbounded_channel::>(); - let sid = next_id(); let event_tx_output = event_tx.clone(); let event_tx_exit = event_tx.clone(); let event_tx_writer_err = event_tx.clone(); @@ -640,11 +671,21 @@ impl Session { label: label.into(), agent, provider, - state: AgentState::Working, + state: AgentState::Unknown, + status: crate::agent_status::SessionStatus::new(), + hook_authority: None, + sequence_tracker: crate::agent_status::sequence::SequenceTracker::new(), + osc_evidence: crate::agent_status::evidence::OscEvidence::default(), + agent_identity_observed: false, + pending_status_transition: crate::agent_status::policy::PendingTransition::default( + ), input_tx, pty_master: master, child_killer, + child_pid, + spawned_at: std::time::Instant::now(), last_output_at: std::time::Instant::now(), + last_input_at: std::time::Instant::now(), scrollback_offset: 0, received_output: false, shadow_grid: Box::new(jackin_term::DamageGrid::with_row_arena( @@ -768,12 +809,30 @@ impl Session { /// Mark that the operator sent an explicit keyboard payload to this pane. /// Returns true when this clears a previously latched blocked state. pub fn mark_operator_input(&mut self) -> bool { - let was_blocked = self.state == AgentState::Blocked; - self.last_output_at = std::time::Instant::now(); - self.state = AgentState::Working; + let was_blocked = self.status.effective == AgentState::Blocked; + self.last_input_at = std::time::Instant::now(); was_blocked } + pub fn state(&self) -> AgentState { + self.status.effective + } + + pub fn visible_lines(&self) -> Vec { + self.shadow_grid + .dump() + .cells + .iter() + .map(|row| { + row.iter() + .map(|cell| cell.text.as_str()) + .collect::() + .trim_end() + .to_owned() + }) + .collect() + } + /// True when the session's program has enabled any mouse protocol /// mode. Used by the daemon to decide whether selection gestures /// belong to jackin or to the pane. Actual PTY mouse forwarding @@ -827,6 +886,24 @@ impl Session { bytes.len(), bytes ); + if self.agent.is_none() + && let Some(mark) = crate::agent_status::scan_osc133(bytes) + { + let raw = match mark { + crate::agent_status::OscShellMark::PreExec => { + Some(crate::agent_status::evidence::RawAgentState::Working) + } + crate::agent_status::OscShellMark::PromptEnd + | crate::agent_status::OscShellMark::CommandFinished { .. } => { + Some(crate::agent_status::evidence::RawAgentState::Idle) + } + crate::agent_status::OscShellMark::PromptStart => None, + }; + if let Some(raw) = raw { + self.osc_evidence.shell_state = Some(raw); + self.osc_evidence.shell_mark_at = Some(std::time::Instant::now()); + } + } // Single batch feed — the grid's persistent vte parser handles // sequences split across PTY read boundaries internally. @@ -871,7 +948,6 @@ impl Session { } self.last_output_at = std::time::Instant::now(); - self.state = state_after_pty_output(self.state); } /// Drain the grid's typed `PassthroughEvent`s, apply the session's @@ -890,11 +966,22 @@ impl Session { let events = self.shadow_grid.drain_passthrough(); for event in events { match event { + PassthroughEvent::Bell => { + self.osc_evidence.bel_at = Some(std::time::Instant::now()); + self.osc_evidence.bel_count = self.osc_evidence.bel_count.saturating_add(1); + if self.osc_policy.allow_notify() + && let Some(bytes) = event.encode() + { + self.pending_passthrough.push(bytes); + } + } PassthroughEvent::TitleChanged(ref title) => { if self.title.as_deref() != Some(title.as_str()) { self.pane_chrome_dirty = true; } self.title = Some(title.clone()); + self.osc_evidence.title = Some(capped_status_osc_payload(title)); + self.osc_evidence.title_changed_at = Some(std::time::Instant::now()); if self.osc_policy.allow_title() && let Some(bytes) = event.encode() { @@ -925,6 +1012,23 @@ impl Session { } } PassthroughEvent::Notification(_) => { + self.osc_evidence.notify_edge_at = Some(std::time::Instant::now()); + if self.osc_policy.allow_notify() + && let Some(bytes) = event.encode() + { + self.pending_passthrough.push(bytes); + } + } + PassthroughEvent::Progress(ref payload) => { + let state = payload + .split(';') + .next() + .and_then(|part| part.parse::().ok()); + self.osc_evidence.progress_active = + state.is_some_and(|state| matches!(state, 1..=4)); + if state == Some(0) { + self.osc_evidence.progress_cleared_at = Some(std::time::Instant::now()); + } if self.osc_policy.allow_notify() && let Some(bytes) = event.encode() { @@ -1164,14 +1268,7 @@ impl Session { } pub fn refresh_state(&mut self) { - // `AgentState::Done` is part of the protocol surface but never - // produced: `remove_exited_session` removes the Session entry - // the moment the PTY's child reaper fires (see daemon.rs - // SessionEvent::Exited handler), so there is no live `Session` - // instance to refresh past that point. Operators experience - // tab removal directly; no transient `○ Done` glyph. - let elapsed = self.last_output_at.elapsed(); - self.state = state_after_refresh(self.state, elapsed); + self.state = self.status.effective; } } @@ -1192,11 +1289,20 @@ impl Session { label, agent, provider, - state: AgentState::Working, + state: AgentState::Unknown, + status: crate::agent_status::SessionStatus::new(), + hook_authority: None, + sequence_tracker: crate::agent_status::sequence::SequenceTracker::new(), + osc_evidence: crate::agent_status::evidence::OscEvidence::default(), + agent_identity_observed: false, + pending_status_transition: crate::agent_status::policy::PendingTransition::default(), input_tx, pty_master, child_killer, + child_pid: None, + spawned_at: std::time::Instant::now(), last_output_at: std::time::Instant::now(), + last_input_at: std::time::Instant::now(), scrollback_offset: 0, received_output: true, shadow_grid: Box::new(jackin_term::DamageGrid::new(size.0, size.1, scrollback_len)), @@ -1228,21 +1334,6 @@ fn parse_modify_other_keys(raw: &[u8]) -> Option { std::str::from_utf8(level).ok()?.parse::().ok() } -fn state_after_pty_output(current: AgentState) -> AgentState { - match current { - AgentState::Blocked | AgentState::Done => current, - AgentState::Working | AgentState::Idle => AgentState::Working, - } -} - -fn state_after_refresh(current: AgentState, elapsed: std::time::Duration) -> AgentState { - match current { - AgentState::Blocked | AgentState::Done => current, - AgentState::Working | AgentState::Idle if elapsed < BLOCKED_AFTER => AgentState::Working, - AgentState::Working | AgentState::Idle => AgentState::Blocked, - } -} - /// Reject agent-slug strings that are flags (start with `-`), empty, /// contain whitespace / control characters, or — when the launch /// config lists supported agents — do not appear in that allowlist. diff --git a/crates/jackin-capsule/src/session/tests.rs b/crates/jackin-capsule/src/session/tests.rs index a1c40c04d..0a5a0af38 100644 --- a/crates/jackin-capsule/src/session/tests.rs +++ b/crates/jackin-capsule/src/session/tests.rs @@ -83,6 +83,40 @@ fn drained_with_policy(bytes: &[u8], policy: OscPolicy) -> Vec> { session.drain_passthrough() } +fn arbitrate_visible_session_for_test( + session: &Session, + registry: &crate::agent_status::rules::RulePackRegistry, +) -> crate::agent_status::arbitrate::ArbitrationResult { + let visible_lines = session.visible_lines(); + let rule_match = registry.evaluate(session.agent.as_deref(), &visible_lines); + let now = std::time::Instant::now(); + crate::agent_status::arbitrate::arbitrate( + &crate::agent_status::evidence::EvidenceSnapshot { + authority: None, + osc: session.osc_evidence.clone(), + screen: crate::agent_status::evidence::ScreenEvidence { + state: rule_match.as_ref().and_then(|matched| matched.state), + rule_id: rule_match.as_ref().map(|matched| matched.rule_id.clone()), + strong: rule_match.as_ref().is_some_and(|matched| matched.strong), + freeze: rule_match.as_ref().is_some_and(|matched| matched.freeze), + observed_at: now, + }, + process: crate::agent_status::evidence::ProcessEvidence { + child_alive: true, + foreground_is_agent: true, + ..Default::default() + }, + activity: crate::agent_status::evidence::ActivityEvidence { + last_output: Some(session.last_output_at), + last_input: Some(session.last_input_at), + }, + subagents_active: 0, + }, + session.status.raw, + now, + ) +} + // ── OSC and unhandled-CSI passthrough contracts ─────────────────────────── // Every OSC the agent emits must reach the attached client when (and only // when) the focused-pane policy allows it. The grid emits typed events; the @@ -111,6 +145,24 @@ fn osc_2_window_title_is_re_emitted_and_captured() { assert!(drained[0].starts_with(b"\x1b]0;") || drained[0].starts_with(b"\x1b]2;")); } +#[test] +fn retained_status_osc_title_is_capped_without_truncating_pane_title() { + let mut session = test_session_with_policy(OscPolicy::default()); + let long_title = "x".repeat(300); + session.feed_pty(format!("\x1b]2;{long_title}\x07").as_bytes()); + + assert_eq!(session.title(), Some(long_title.as_str())); + assert_eq!( + session + .osc_evidence + .title + .as_ref() + .expect("status title evidence should be retained") + .len(), + 256 + ); +} + #[test] fn osc_8_hyperlink_is_re_emitted() { let drained = drained(b"\x1b]8;;https://example/\x07text\x1b]8;;\x07"); @@ -130,6 +182,51 @@ fn osc_9_notification_is_re_emitted() { assert!(s.contains("9;build finished")); } +#[test] +fn bel_is_re_emitted_and_captured_as_status_evidence() { + let mut session = test_session_with_policy(OscPolicy::default()); + session.feed_pty(b"\x07\x07"); + + assert!(session.osc_evidence.bel_at.is_some()); + assert_eq!(session.osc_evidence.bel_count, 2); + assert_eq!( + session.drain_passthrough(), + vec![b"\x07".to_vec(), b"\x07".to_vec()] + ); +} + +#[test] +fn osc_9_4_progress_is_re_emitted_and_captured() { + let mut session = test_session_with_policy(OscPolicy::default()); + + session.feed_pty(b"\x1b]9;4;3\x07"); + assert!(session.osc_evidence.progress_active); + let drained = session.drain_passthrough(); + assert_eq!(drained, vec![b"\x1b]9;4;3\x07".to_vec()]); + + session.feed_pty(b"\x1b]9;4;0\x07"); + assert!(!session.osc_evidence.progress_active); + assert!(session.osc_evidence.progress_cleared_at.is_some()); +} + +#[test] +fn shell_osc133_marks_update_status_evidence() { + let mut session = test_session_with_policy(OscPolicy::default()); + session.agent = None; + + session.feed_pty(b"\x1b]133;C\x07"); + assert_eq!( + session.osc_evidence.shell_state, + Some(crate::agent_status::evidence::RawAgentState::Working) + ); + + session.feed_pty(b"\x1b]133;B\x07"); + assert_eq!( + session.osc_evidence.shell_state, + Some(crate::agent_status::evidence::RawAgentState::Idle) + ); +} + #[test] fn osc_7_cwd_is_captured_and_percent_decoded() { let mut session = test_session_with_policy(OscPolicy::default()); @@ -244,6 +341,116 @@ fn drain_returns_empty_when_no_passthrough_emitted() { assert!(drained.is_empty()); } +#[test] +fn pty_output_does_not_change_agent_state() { + let mut session = test_session_with_policy(OscPolicy::default()); + session.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Blocked, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + session.state = session.status.effective; + assert_eq!(session.state(), AgentState::Blocked); + + let before_output = session.last_output_at; + session.feed_pty(b"redraw from a blocked approval dialog\r\n"); + + assert_eq!(session.state(), AgentState::Blocked); + assert!(session.last_output_at >= before_output); +} + +#[test] +fn blocked_dialog_redraw_soak_publishes_one_transition() { + let mut session = test_session_with_policy(OscPolicy::default()); + session.agent = Some("claude".to_owned()); + let registry = crate::agent_status::rules::RulePackRegistry::bundled().unwrap(); + let dialog = include_str!("../agent_status/screen/fixtures/claude/blocked.txt"); + let mut transition_count = 0; + + for _ in 0..150 { + let frame = format!("\x1b[2J\x1b[H{dialog}"); + session.feed_pty(frame.as_bytes()); + let result = arbitrate_visible_session_for_test(&session, ®istry); + + if crate::agent_status::policy::should_publish_candidate( + session.status.effective, + &result, + &mut session.pending_status_transition, + ) { + if session + .status + .publish_raw(result.raw, result.confidence, result.summary) + .is_some() + { + transition_count += 1; + } + session.state = session.status.effective; + } + } + + assert_eq!(transition_count, 1); + assert_eq!(session.state(), AgentState::Blocked); +} + +#[test] +fn recorded_pty_transcripts_replay_through_parser_and_engine() { + let cases = [( + "claude", + include_bytes!("../agent_status/screen/transcripts/claude/blocked.ansi").as_slice(), + crate::agent_status::evidence::RawAgentState::Blocked, + "permission-dialog", + )]; + let registry = crate::agent_status::rules::RulePackRegistry::bundled().unwrap(); + + for (agent, transcript, expected_state, expected_rule) in cases { + let mut session = test_session_with_policy(OscPolicy::default()); + session.agent = Some(agent.to_owned()); + session.feed_pty(transcript); + + let result = arbitrate_visible_session_for_test(&session, ®istry); + + assert_eq!(result.raw, expected_state, "transcript for {agent}"); + assert_eq!(result.summary.rule_id.as_deref(), Some(expected_rule)); + assert!(result.summary.visible_blocker); + } +} + +#[test] +fn operator_input_does_not_change_agent_state() { + let mut blocked = test_session_with_policy(OscPolicy::default()); + blocked.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Blocked, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + blocked.state = blocked.status.effective; + assert_eq!(blocked.state(), AgentState::Blocked); + + let before_input = blocked.last_input_at; + assert!(blocked.mark_operator_input()); + + assert_eq!(blocked.state(), AgentState::Blocked); + assert!(blocked.last_input_at >= before_input); + + let mut done = test_session_with_policy(OscPolicy::default()); + done.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Working, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + done.status.publish_raw( + crate::agent_status::evidence::RawAgentState::Idle, + jackin_protocol::agent_status::AgentStatusConfidence::Strong, + crate::agent_status::evidence::EvidenceSummary::default(), + ); + done.state = done.status.effective; + assert_eq!(done.state(), AgentState::Done); + + assert!(!done.mark_operator_input()); + + assert_eq!(done.state(), AgentState::Done); +} + #[test] fn osc_52_clipboard_dropped_when_policy_denies() { let drained = drained_with_policy(b"\x1b]52;c;SGVsbG8=\x07", OscPolicy::for_test_deny_all()); @@ -262,6 +469,16 @@ fn osc_9_notification_dropped_when_policy_denies() { ); } +#[test] +fn bel_evidence_is_retained_when_notify_policy_denies_forwarding() { + let mut session = test_session_with_policy(OscPolicy::for_test_deny_all()); + session.feed_pty(b"\x07"); + + assert!(session.osc_evidence.bel_at.is_some()); + assert_eq!(session.osc_evidence.bel_count, 1); + assert!(session.drain_passthrough().is_empty()); +} + #[test] fn osc_2_title_dropped_when_policy_denies() { let drained = drained_with_policy(b"\x1b]2;rogue title\x07", OscPolicy::for_test_deny_all()); @@ -436,38 +653,6 @@ fn build_shell_command_removes_stale_agent_env() { assert!(cmd.get_env("JACKIN_AGENT").is_none()); } -#[test] -fn pty_output_does_not_clear_latched_blocked_state() { - assert_eq!( - state_after_pty_output(AgentState::Blocked), - AgentState::Blocked - ); - assert_eq!( - state_after_pty_output(AgentState::Working), - AgentState::Working - ); - assert_eq!( - state_after_pty_output(AgentState::Idle), - AgentState::Working - ); -} - -#[test] -fn refresh_latches_blocked_until_operator_input() { - assert_eq!( - state_after_refresh(AgentState::Working, BLOCKED_AFTER), - AgentState::Blocked - ); - assert_eq!( - state_after_refresh(AgentState::Blocked, std::time::Duration::ZERO), - AgentState::Blocked - ); - assert_eq!( - state_after_refresh(AgentState::Idle, BLOCKED_AFTER / 2), - AgentState::Working - ); -} - #[test] fn osc8_uri_empty_is_safe() { // Empty URI = link terminator; must always pass. diff --git a/crates/jackin-capsule/src/socket.rs b/crates/jackin-capsule/src/socket.rs index 82c44cd75..34f986257 100644 --- a/crates/jackin-capsule/src/socket.rs +++ b/crates/jackin-capsule/src/socket.rs @@ -20,6 +20,7 @@ /// state in one place. pub const SOCKET_PATH: &str = "/jackin/run/jackin.sock"; +use std::collections::BTreeMap; use std::os::unix::fs::PermissionsExt as _; use std::path::Path; use std::sync::Arc; @@ -265,13 +266,23 @@ async fn read_payload_lazy( } /// Handle a one-shot control request and close the connection. +/// +/// State-mutating messages (`ReportAgentState`, `HeartbeatAgentAuthority`, +/// `ClearAgentAuthority`, runtime events) are forwarded through +/// `control_msg_tx` to the daemon's main event loop for processing; no reply +/// is written for those. +#[allow(clippy::too_many_arguments)] pub async fn handle_control_request( mut stream: UnixStream, first_byte: u8, sessions: Vec, tabs: Vec, + visible_text: BTreeMap>, + status_explain: BTreeMap, history: Vec, active_tab: u32, + control_msg_tx: mpsc::UnboundedSender, + state_broadcast_tx: tokio::sync::broadcast::Sender, ) { let msg = match read_control_msg(&mut stream, first_byte).await { Ok(msg) => msg, @@ -280,6 +291,19 @@ pub async fn handle_control_request( return; } }; + // State-mutating messages are forwarded to the daemon's main event loop + // rather than handled inline; they need no reply. + if matches!( + msg, + ClientMsg::ReportAgentState { .. } + | ClientMsg::HeartbeatAgentAuthority { .. } + | ClientMsg::ClearAgentAuthority { .. } + | ClientMsg::ReportChildAgentState { .. } + | ClientMsg::ReportRuntimeEvent { .. } + ) { + drop(control_msg_tx.send(msg)); + return; + } let reply = match msg { ClientMsg::Status => ServerMsg::SessionList { sessions }, ClientMsg::Snapshot => ServerMsg::Snapshot { tabs, active_tab }, @@ -290,6 +314,154 @@ pub async fn handle_control_request( crate::clog!("control: ignoring unknown ClientMsg variant from peer"); ServerMsg::Unknown } + ClientMsg::WaitSessionStatus { + session_id, + ref target_statuses, + timeout_ms, + } => { + let timeout_dur = Duration::from_millis(timeout_ms.unwrap_or(30_000)); + let current = sessions.iter().find(|s| s.id == session_id).map(|s| { + ( + s.state.label().to_owned(), + s.agent_status_report + .as_ref() + .map_or(0, |report| report.revision), + ) + }); + let make_result = + |effective: String, revision: u64, outcome: &str| ServerMsg::SessionStatusResult { + session_id, + effective, + revision, + outcome: outcome.to_owned(), + }; + match current { + None => { + crate::cdebug!("session {session_id}: WaitSessionStatus outcome=not_found"); + make_result("unknown".to_owned(), 0, "not_found") + } + Some((ref cur, revision)) if target_statuses.contains(cur) => { + crate::cdebug!( + "session {session_id}: WaitSessionStatus outcome=satisfied effective={cur}" + ); + make_result(cur.clone(), revision, "satisfied") + } + Some((ref cur, _revision)) => { + // Not yet satisfied — subscribe to broadcast and wait. + let mut rx = state_broadcast_tx.subscribe(); + let deadline = tokio::time::Instant::now() + timeout_dur; + let cur = cur.clone(); + let targets = target_statuses.clone(); + loop { + let rem = deadline.saturating_duration_since(tokio::time::Instant::now()); + if rem.is_zero() { + crate::cdebug!( + "session {session_id}: WaitSessionStatus outcome=timeout effective={cur}" + ); + break make_result(cur, 0, "timeout"); + } + match tokio::time::timeout(rem, rx.recv()).await { + Ok(Ok(ServerMsg::AgentStateChanged { + session_id: esid, + ref effective, + revision, + .. + })) if esid == session_id && targets.contains(effective) => { + crate::cdebug!( + "session {session_id}: WaitSessionStatus outcome=satisfied effective={effective}" + ); + break make_result(effective.clone(), revision, "satisfied"); + } + Ok(Ok(_)) => {} + Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(n))) => { + // Events were dropped; the satisfying transition may have been among them. + // Break with timeout so the caller can retry with fresh state rather than + // silently waiting for an event that already happened. + crate::cdebug!( + "session {session_id}: WaitSessionStatus lagged {n} events; returning timeout" + ); + break make_result(cur.clone(), 0, "timeout"); + } + _ => { + crate::cdebug!( + "session {session_id}: WaitSessionStatus outcome=timeout (channel closed)" + ); + break make_result(cur.clone(), 0, "timeout"); + } + } + } + } + } + } + ClientMsg::SessionReadVisible { session_id, rows } => { + let mut lines = visible_text.get(&session_id).cloned().unwrap_or_default(); + if let Some(rows) = rows { + let keep = usize::from(rows); + if lines.len() > keep { + lines = lines.split_off(lines.len() - keep); + } + } + ServerMsg::SessionVisibleText { session_id, lines } + } + ClientMsg::SessionStatusExplain { session_id } => { + if let Some(report) = status_explain.get(&session_id).cloned() { + ServerMsg::SessionStatusExplain { session_id, report } + } else { + ServerMsg::Error { + code: "not_found".to_owned(), + message: format!("session {session_id} not found"), + } + } + } + ClientMsg::TokenGetSession { session_id } => { + let token_usage = sessions + .iter() + .find(|s| s.id == session_id) + .and_then(|s| s.token_usage.clone()); + ServerMsg::TokenSessionResult { + session_id, + token_usage, + } + } + ClientMsg::TokenGetModels { .. } => ServerMsg::TokenModelsResult { + provider: "claude".to_owned(), + models: vec![ + "claude-opus-4-8-20251101".to_owned(), + "claude-sonnet-4-6-20251101".to_owned(), + "claude-haiku-4-5-20251001".to_owned(), + ], + }, + ClientMsg::EventsSubscribe { subscriber_id } => { + crate::clog!( + "events.subscribe: new subscriber {:?}", + subscriber_id.as_deref().unwrap_or("anon") + ); + let welcome = ServerMsg::Welcome { + jackin_protocol_version: "1".to_owned(), + }; + if stream.write_all(&frame(&welcome)).await.is_err() { + return; + } + let mut rx = state_broadcast_tx.subscribe(); + loop { + match rx.recv().await { + Ok(event) => { + if stream.write_all(&frame(&event)).await.is_err() { + break; + } + } + Err(tokio::sync::broadcast::error::RecvError::Closed) => break, + Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => { + crate::clog!("events.subscribe: subscriber lagged {n} events; continuing"); + } + } + } + return; + } + _ => { + crate::clog!("control: unhandled ClientMsg variant in one-shot handler"); + ServerMsg::Unknown + } }; // Bound the reply write so a peer that disappeared between request // decode and reply write cannot wedge this task forever holding the @@ -297,8 +469,8 @@ pub async fn handle_control_request( // socket write; anything slower is the peer being unresponsive. match tokio::time::timeout(Duration::from_secs(2), stream.write_all(&frame(&reply))).await { Ok(Ok(())) => {} - Ok(Err(e)) => crate::clog!("control reply write failed (msg={msg:?}): {e}"), - Err(_) => crate::clog!("control reply write timed out after 2 s (msg={msg:?})"), + Ok(Err(e)) => crate::clog!("control reply write failed: {e}"), + Err(_) => crate::clog!("control reply write timed out after 2 s"), } } diff --git a/crates/jackin-capsule/src/socket/tests.rs b/crates/jackin-capsule/src/socket/tests.rs index 9a662907a..2e248dda4 100644 --- a/crates/jackin-capsule/src/socket/tests.rs +++ b/crates/jackin-capsule/src/socket/tests.rs @@ -51,6 +51,61 @@ async fn read_control_msg_decodes_unknown_variant_for_forward_compat() { assert!(matches!(msg, ClientMsg::Unknown)); } +#[tokio::test] +async fn wait_session_status_already_satisfied_returns_current_revision() { + let (mut client, server) = UnixStream::pair().unwrap(); + let request = ClientMsg::WaitSessionStatus { + session_id: 7, + target_statuses: vec!["blocked".to_owned()], + timeout_ms: Some(1), + }; + let framed = frame(&request); + client.write_all(&framed[1..]).await.unwrap(); + + let (control_tx, _control_rx) = mpsc::unbounded_channel(); + let (state_tx, _state_rx) = tokio::sync::broadcast::channel(8); + handle_control_request( + server, + framed[0], + vec![SessionInfo { + id: 7, + label: "codex".to_owned(), + agent: Some("codex".to_owned()), + state: crate::protocol::AgentState::Blocked, + active: true, + token_usage: None, + agent_status_report: Some(jackin_protocol::agent_status::AgentStatusReport { + revision: 42, + ..Default::default() + }), + }], + Vec::new(), + BTreeMap::new(), + BTreeMap::new(), + Vec::new(), + 0, + control_tx, + state_tx, + ) + .await; + + let mut len = [0_u8; 4]; + client.read_exact(&mut len).await.unwrap(); + let mut body = vec![0_u8; u32::from_be_bytes(len) as usize]; + client.read_exact(&mut body).await.unwrap(); + let response: ServerMsg = serde_json::from_slice(&body).unwrap(); + + assert!(matches!( + response, + ServerMsg::SessionStatusResult { + session_id: 7, + revision: 42, + ref effective, + ref outcome, + } if effective == "blocked" && outcome == "satisfied" + )); +} + #[tokio::test] async fn start_listener_caps_concurrent_clients_at_max() { // Hard regression guard for `MAX_CONCURRENT_CLIENTS`. Without diff --git a/crates/jackin-capsule/src/token_monitor.rs b/crates/jackin-capsule/src/token_monitor.rs new file mode 100644 index 000000000..d1168aebd --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor.rs @@ -0,0 +1,315 @@ +//! Token consumption monitor. +//! +//! Reads provider-specific local JSONL/SQLite/JSON files inside the container +//! and reports per-session token totals through the event stream. +//! +//! Architecture: +//! - One `TokenSession` per live agent session. +//! - Polled from the daemon's 30-second ticker. +//! - Emits `token_usage_changed` events when totals change. +//! - Caches last-known totals in `/jackin/state/token-monitor/.json`. + +pub mod amp; +pub mod claude; +pub mod codex; +pub mod kimi; +pub mod models; +pub mod opencode; +pub mod pricing; + +use std::collections::HashMap; +use std::path::Path; +use std::time::{Instant, SystemTime}; + +use jackin_protocol::control::TokenUsageSummary; + +/// Aggregated token totals for one session. +#[derive(Debug, Clone, Default)] +pub struct TokenTotals { + pub input_tokens: u64, + pub output_tokens: u64, + pub cache_read_tokens: u64, + pub cache_write_tokens: u64, + /// Pre-calculated cost when the JSONL provides it directly. + pub cost_usd: Option, + /// Most recently used model in this session. + pub model: Option, + /// Start of the current 5-hour billing window (Claude-specific). + pub window_start: Option, +} + +/// One time-window rate/quota card (e.g. 5-hour session, weekly). +#[derive(Debug, Clone)] +pub struct RateWindow { + /// Display label: "Session", "Weekly", "Claude Sonnet (weekly)", etc. + pub label: String, + /// Usage percentage 0–100. + pub used_percent: f64, + /// Window duration in minutes (300 = 5h, 10080 = 1 week). + pub window_minutes: Option, + /// Next reset timestamp. + pub resets_at: Option, + /// Custom reset description e.g. "Resets Monday 3:00 AM". + pub reset_description: Option, +} + +/// Complete token/quota snapshot for one provider in one session. +#[derive(Debug, Clone)] +pub struct ProviderUsageSnapshot { + pub provider: String, + pub model: Option, + /// Session-level quota (5h for Claude, hourly for `OpenAI`). + pub primary: Option, + /// Weekly quota. + pub secondary: Option, + /// Monthly or model-specific quota. + pub tertiary: Option, + /// Additional per-model breakdowns. + pub extra_windows: Vec, + pub fetched_at: SystemTime, +} + +impl ProviderUsageSnapshot { + /// Build a minimal snapshot from raw token totals (no OAuth quota data). + pub fn from_totals(provider: &str, totals: &TokenTotals) -> Self { + Self { + provider: provider.to_owned(), + model: totals.model.clone(), + primary: None, + secondary: None, + tertiary: None, + extra_windows: Vec::new(), + fetched_at: SystemTime::now(), + } + } +} + +impl TokenTotals { + pub fn to_summary(&self) -> TokenUsageSummary { + TokenUsageSummary { + input_tokens: self.input_tokens, + output_tokens: self.output_tokens, + cache_read_tokens: self.cache_read_tokens, + cache_write_tokens: self.cache_write_tokens, + cost_usd: self.cost_usd, + model: self.model.clone(), + } + } +} + +/// Per-session token monitor state. +#[derive(Debug)] +pub struct TokenSession { + pub session_id: u64, + pub agent: String, + pub totals: TokenTotals, + /// Last byte offset read in the JSONL file (for incremental reads). + pub file_offset: u64, + /// Last rowid seen in `SQLite` (for `OpenCode` incremental reads). + pub last_rowid: i64, + /// Time of last poll. + pub last_polled: Instant, + /// Consecutive polls with no new data (for back-off). + pub silent_polls: u32, +} + +impl TokenSession { + pub fn new(session_id: u64, agent: &str) -> Self { + Self { + session_id, + agent: agent.to_owned(), + totals: TokenTotals::default(), + file_offset: 0, + last_rowid: 0, + last_polled: Instant::now(), + silent_polls: 0, + } + } + + /// Poll interval considering back-off. + /// Base: 30s; after 5 consecutive silent polls: 60s. + pub fn poll_interval_secs(&self) -> u64 { + if self.silent_polls >= 5 { 60 } else { 30 } + } + + /// Returns true if a poll is due. + pub fn poll_due(&self) -> bool { + self.last_polled.elapsed().as_secs() >= self.poll_interval_secs() + } + + /// Poll for new token data. Returns `true` when totals changed. + pub fn poll(&mut self) -> bool { + self.last_polled = Instant::now(); + let changed = match self.agent.as_str() { + "claude" => claude::poll_session(self), + "codex" => codex::poll_session(self), + "kimi" => kimi::poll_session(self), + "opencode" => opencode::poll_session(self), + "amp" => amp::poll_session(self), + _ => false, + }; + if changed { + self.silent_polls = 0; + } else { + self.silent_polls = self.silent_polls.saturating_add(1); + } + changed + } +} + +/// Walk `base_dirs` and return all files with extension `ext` found either as +/// direct children of each base directory or one level deeper (for providers +/// that nest files inside a per-session subdirectory). +pub(crate) fn find_provider_files(base_dirs: &[&str], ext: &str) -> Vec { + let mut paths = Vec::new(); + for &base in base_dirs { + let Ok(dir) = std::fs::read_dir(base) else { + continue; + }; + for session in dir.flatten() { + let sp = session.path(); + if sp.extension().and_then(|e| e.to_str()) == Some(ext) { + paths.push(sp); + continue; + } + let Ok(entries) = std::fs::read_dir(&sp) else { + continue; + }; + for entry in entries.flatten() { + let p = entry.path(); + if p.extension().and_then(|e| e.to_str()) == Some(ext) { + paths.push(p); + } + } + } + } + paths +} + +pub(crate) fn read_new_text(path: &Path, offset: &mut u64) -> Option<(String, u64)> { + let content = std::fs::read_to_string(path).ok()?; + let len = content.len() as u64; + let start = if *offset <= len { + *offset as usize + } else { + crate::cdebug!( + "token monitor: offset {} beyond len {} for {:?}, resetting", + *offset, + len, + path + ); + *offset = 0; + 0 + }; + Some((content[start..].to_owned(), len)) +} + +/// The token monitor manages per-session polling. +#[derive(Debug, Default)] +pub struct TokenMonitor { + sessions: HashMap, + pub token_snapshots: HashMap, + pub model_catalog: models::ModelCatalog, +} + +impl TokenMonitor { + pub fn new() -> Self { + Self { + sessions: HashMap::new(), + token_snapshots: HashMap::new(), + model_catalog: models::ModelCatalog::new(), + } + } + + /// Register a new session for monitoring. + pub fn register_session(&mut self, session_id: u64, agent: &str) { + self.sessions + .insert(session_id, TokenSession::new(session_id, agent)); + if self.model_catalog.needs_refresh() { + self.model_catalog.populate(agent); + } + } + + /// Deregister a session when it exits. + pub fn deregister_session(&mut self, session_id: u64) { + self.sessions.remove(&session_id); + self.token_snapshots.remove(&session_id); + } + + pub fn clear(&mut self) { + self.sessions.clear(); + self.token_snapshots.clear(); + } + + /// Poll all sessions that are due. Returns session IDs whose totals changed. + pub fn poll_due_sessions(&mut self) -> Vec { + let mut changed = Vec::new(); + for (id, session) in &mut self.sessions { + if session.poll_due() && session.poll() { + let snapshot = ProviderUsageSnapshot::from_totals(&session.agent, &session.totals); + self.token_snapshots.insert(*id, snapshot); + changed.push(*id); + } + } + changed + } + + /// Get current totals for a session. + pub fn totals(&self, session_id: u64) -> Option<&TokenTotals> { + self.sessions.get(&session_id).map(|s| &s.totals) + } + + #[cfg(test)] + pub(crate) fn contains_session(&self, session_id: u64) -> bool { + self.sessions.contains_key(&session_id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Instant; + + #[test] + fn token_monitor_backs_off_after_silence() { + let session = TokenSession::new(1, "claude"); + assert_eq!(session.poll_interval_secs(), 30); + let mut session2 = TokenSession::new(1, "claude"); + session2.silent_polls = 5; + assert_eq!(session2.poll_interval_secs(), 60); + } + + #[test] + fn token_monitor_resets_backoff_after_change() { + let mut session = TokenSession::new(1, "claude"); + session.silent_polls = 5; + assert_eq!(session.poll_interval_secs(), 60); + session.silent_polls = 0; + assert_eq!(session.poll_interval_secs(), 30); + } + + #[test] + fn token_monitor_poll_due_respects_interval() { + let mut session = TokenSession::new(1, "claude"); + session.last_polled = Instant::now(); + assert!(!session.poll_due()); + } + + #[test] + fn session_info_includes_token_usage_when_available() { + let totals = TokenTotals { + input_tokens: 1000, + output_tokens: 500, + cache_read_tokens: 100, + cache_write_tokens: 50, + cost_usd: Some(0.42), + model: Some("claude-sonnet-4-6".to_owned()), + window_start: None, + }; + let summary = totals.to_summary(); + assert_eq!(summary.input_tokens, 1000); + assert_eq!(summary.output_tokens, 500); + assert_eq!(summary.cost_usd, Some(0.42)); + assert_eq!(summary.model.as_deref(), Some("claude-sonnet-4-6")); + } +} diff --git a/crates/jackin-capsule/src/token_monitor/amp.rs b/crates/jackin-capsule/src/token_monitor/amp.rs new file mode 100644 index 000000000..2581cd2ae --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor/amp.rs @@ -0,0 +1,179 @@ +//! JSON reader for Amp thread files. +//! +//! Reads `~/.local/share/amp/threads/*.json`. + +use std::fs; +use std::path::PathBuf; + +use super::TokenSession; + +fn find_thread_files() -> Vec { + let base = "/home/agent/.local/share/amp/threads"; + let Ok(dir) = fs::read_dir(base) else { + return Vec::new(); + }; + dir.flatten() + .map(|e| e.path()) + .filter(|p| p.extension().and_then(|e| e.to_str()) == Some("json")) + .collect() +} + +pub fn poll_session(session: &mut TokenSession) -> bool { + let files = find_thread_files(); + if files.is_empty() { + return false; + } + + // Compute totals from scratch (Amp has no per-file byte offset). + let mut scratch_input: u64 = 0; + let mut scratch_output: u64 = 0; + let mut scratch_cache_read: u64 = 0; + let mut scratch_cache_write: u64 = 0; + let mut last_model: Option = None; + + for path in &files { + let Ok(content) = fs::read_to_string(path) else { + continue; + }; + let Ok(val) = serde_json::from_str::(&content) else { + continue; + }; + + // Thread JSON: array of messages, each may have usage metadata + let messages: &[serde_json::Value] = match val.as_array() { + Some(arr) => arr, + None => match val.get("messages").and_then(|m| m.as_array()) { + Some(arr) => arr, + None => continue, + }, + }; + + for msg in messages { + if let Some(usage) = msg.get("usage") { + let input = usage + .get("input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let output = usage + .get("output_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cache_read = usage + .get("cache_read_input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cache_write = usage + .get("cache_creation_input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + scratch_input = scratch_input.saturating_add(input); + scratch_output = scratch_output.saturating_add(output); + scratch_cache_read = scratch_cache_read.saturating_add(cache_read); + scratch_cache_write = scratch_cache_write.saturating_add(cache_write); + } + if let Some(model) = msg.get("model").and_then(|v| v.as_str()) { + last_model = Some(model.to_owned()); + } + } + } + + // Only report changed if the totals actually moved. + let changed = scratch_input != session.totals.input_tokens + || scratch_output != session.totals.output_tokens + || scratch_cache_read != session.totals.cache_read_tokens + || scratch_cache_write != session.totals.cache_write_tokens; + + if changed { + session.totals.input_tokens = scratch_input; + session.totals.output_tokens = scratch_output; + session.totals.cache_read_tokens = scratch_cache_read; + session.totals.cache_write_tokens = scratch_cache_write; + session.totals.model = last_model; + } + changed +} + +#[cfg(test)] +mod tests { + use crate::token_monitor::TokenSession; + + #[test] + fn amp_token_reader_parses_thread_messages() { + let json = r#"[ + {"usage":{"input_tokens":100,"output_tokens":50,"cache_read_input_tokens":10,"cache_creation_input_tokens":5},"model":"claude-3-5-sonnet"}, + {"usage":{"input_tokens":200,"output_tokens":80}} + ]"#; + let val: serde_json::Value = serde_json::from_str(json).unwrap(); + let arr = val.as_array().unwrap(); + assert_eq!(arr.len(), 2); + let usage0 = arr[0].get("usage").unwrap(); + assert_eq!( + usage0 + .get("input_tokens") + .and_then(serde_json::Value::as_u64), + Some(100) + ); + assert_eq!( + arr[0].get("model").and_then(|v| v.as_str()), + Some("claude-3-5-sonnet") + ); + } + + #[test] + fn amp_token_reader_handles_messages_wrapper() { + let json = r#"{"messages":[{"usage":{"input_tokens":300,"output_tokens":150}}]}"#; + let val: serde_json::Value = serde_json::from_str(json).unwrap(); + let messages = val.get("messages").and_then(|m| m.as_array()).unwrap(); + assert_eq!(messages.len(), 1); + let usage = messages[0].get("usage").unwrap(); + assert_eq!( + usage + .get("input_tokens") + .and_then(serde_json::Value::as_u64), + Some(300) + ); + } + + #[test] + fn amp_changed_flag_includes_cache_tokens() { + let mut session = TokenSession::new(1, "amp"); + session.totals.input_tokens = 100; + session.totals.output_tokens = 50; + session.totals.cache_read_tokens = 0; + + let scratch_input: u64 = 100; + let scratch_output: u64 = 50; + let scratch_cache_read: u64 = 25; + let scratch_cache_write: u64 = 0; + + let changed = scratch_input != session.totals.input_tokens + || scratch_output != session.totals.output_tokens + || scratch_cache_read != session.totals.cache_read_tokens + || scratch_cache_write != session.totals.cache_write_tokens; + + assert!(changed, "cache-read change alone must flip changed flag"); + + let old_changed = scratch_input != session.totals.input_tokens + || scratch_output != session.totals.output_tokens; + assert!(!old_changed, "confirms old logic would miss this change"); + } + + #[test] + fn amp_token_reader_skips_zero_usage() { + let session = TokenSession::new(1, "amp"); + // Zero usage should not flip changed flag — verify via parse_raw_usage logic + let zero = serde_json::json!({"usage":{"input_tokens":0,"output_tokens":0}}); + let usage = zero.get("usage").unwrap(); + let input = usage + .get("input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let output = usage + .get("output_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + assert_eq!(input, 0); + assert_eq!(output, 0); + assert_eq!(session.totals.input_tokens, 0); + } +} diff --git a/crates/jackin-capsule/src/token_monitor/claude.rs b/crates/jackin-capsule/src/token_monitor/claude.rs new file mode 100644 index 000000000..641a72984 --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor/claude.rs @@ -0,0 +1,179 @@ +//! JSONL reader for Claude Code token usage. +//! +//! Reads `/home/agent/.config/claude/projects/**/*.jsonl` (v1.0.30+) +//! or `/home/agent/.claude/projects/**/*.jsonl` (legacy). + +use std::path::PathBuf; +use std::time::SystemTime; + +use super::TokenSession; + +/// Per-line token fields from Claude JSONL. +#[derive(Debug, Default)] +struct ClaudeUsageLine { + input_tokens: u64, + output_tokens: u64, + cache_creation_input_tokens: u64, + cache_read_input_tokens: u64, + cost_usd: Option, + model: Option, + is_error: bool, + is_sidechain: bool, +} + +fn parse_line(line: &str) -> Option { + let val: serde_json::Value = serde_json::from_str(line).ok()?; + let msg = val.get("message")?; + let usage = msg.get("usage")?; + + let input_tokens = usage + .get("input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let output_tokens = usage + .get("output_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cache_creation = usage + .get("cache_creation_input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cache_read = usage + .get("cache_read_input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cost_usd = val.get("costUSD").and_then(serde_json::Value::as_f64); + let model = msg.get("model").and_then(|v| v.as_str()).map(str::to_owned); + let is_error = val + .get("isApiErrorMessage") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + let is_sidechain = val + .get("isSidechain") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + + Some(ClaudeUsageLine { + input_tokens, + output_tokens, + cache_creation_input_tokens: cache_creation, + cache_read_input_tokens: cache_read, + cost_usd, + model, + is_error, + is_sidechain, + }) +} + +/// Find the JSONL file(s) for the current session. +fn find_jsonl_files() -> Vec { + super::find_provider_files( + &[ + "/home/agent/.config/claude/projects", + "/home/agent/.claude/projects", + ], + "jsonl", + ) +} + +/// Poll Claude JSONL files for new token data. +/// Returns true when totals changed. +pub fn poll_session(session: &mut TokenSession) -> bool { + let files = find_jsonl_files(); + if files.is_empty() { + return false; + } + + // Incremental polling: track a single shared byte offset across all files. + // A production implementation should track per-file offsets via a + // HashMap, but a single offset is sufficient for the first + // implementation where one JSONL file dominates. + let mut changed = false; + let mut new_cost: f64 = 0.0; + let mut has_cost = false; + let mut new_input: u64 = 0; + let mut new_output: u64 = 0; + let mut new_cache_read: u64 = 0; + let mut new_cache_write: u64 = 0; + let mut last_model: Option = session.totals.model.clone(); + + for path in &files { + let Some((text, new_offset)) = super::read_new_text(path, &mut session.file_offset) else { + continue; + }; + + for line in text.lines() { + if line.trim().is_empty() { + continue; + } + if let Some(parsed) = parse_line(line) { + if parsed.is_sidechain { + continue; // Skip sidechain replays. + } + if parsed.is_error && parsed.input_tokens == 0 && parsed.output_tokens == 0 { + continue; + } + if let Some(ref m) = parsed.model { + last_model = Some(m.clone()); + } + if let Some(cost) = parsed.cost_usd { + new_cost += cost; + has_cost = true; + } else { + new_input += parsed.input_tokens; + new_output += parsed.output_tokens; + new_cache_read += parsed.cache_read_input_tokens; + new_cache_write += parsed.cache_creation_input_tokens; + } + changed = true; + } + } + session.file_offset = new_offset; + } + + if changed { + if has_cost { + session.totals.cost_usd = Some(session.totals.cost_usd.unwrap_or(0.0) + new_cost); + } + session.totals.input_tokens += new_input; + session.totals.output_tokens += new_output; + session.totals.cache_read_tokens += new_cache_read; + session.totals.cache_write_tokens += new_cache_write; + session.totals.model = last_model; + if session.totals.window_start.is_none() { + session.totals.window_start = Some(SystemTime::now()); + } + } + changed +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn claude_token_reader_parses_jsonl_fields() { + let line = r#"{"message":{"id":"msg_01","model":"claude-sonnet-4-6","usage":{"input_tokens":100,"output_tokens":50,"cache_creation_input_tokens":10,"cache_read_input_tokens":5}},"requestId":"req_01","costUSD":0.42}"#; + let parsed = parse_line(line).unwrap(); + assert_eq!(parsed.input_tokens, 100); + assert_eq!(parsed.output_tokens, 50); + assert_eq!(parsed.cache_creation_input_tokens, 10); + assert_eq!(parsed.cache_read_input_tokens, 5); + assert_eq!(parsed.cost_usd, Some(0.42)); + assert_eq!(parsed.model.as_deref(), Some("claude-sonnet-4-6")); + } + + #[test] + fn claude_token_reader_uses_costusd_when_present() { + let line = r#"{"message":{"id":"msg_02","usage":{"input_tokens":1000,"output_tokens":500}},"costUSD":1.23}"#; + let parsed = parse_line(line).unwrap(); + assert_eq!(parsed.cost_usd, Some(1.23)); + } + + #[test] + fn claude_token_reader_skips_sidechain() { + let line = r#"{"isSidechain":true,"message":{"id":"msg_03","usage":{"input_tokens":100,"output_tokens":50}}}"#; + let parsed = parse_line(line).unwrap(); + assert!(parsed.is_sidechain); + } +} diff --git a/crates/jackin-capsule/src/token_monitor/codex.rs b/crates/jackin-capsule/src/token_monitor/codex.rs new file mode 100644 index 000000000..6b4f76964 --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor/codex.rs @@ -0,0 +1,164 @@ +//! JSONL reader for Codex token usage. + +use std::path::PathBuf; + +use super::TokenSession; + +fn find_jsonl_files() -> Vec { + super::find_provider_files(&["/home/agent/.codex/sessions"], "jsonl") +} + +fn parse_raw_usage(obj: &serde_json::Value) -> (u64, u64, u64, u64) { + let input = obj + .get("input_tokens") + .or_else(|| obj.get("prompt_tokens")) + .or_else(|| obj.get("input")) + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let output = obj + .get("output_tokens") + .or_else(|| obj.get("completion_tokens")) + .or_else(|| obj.get("output")) + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cached = obj + .get("cached_input_tokens") + .or_else(|| obj.get("cache_read_input_tokens")) + .or_else(|| obj.get("cached_tokens")) + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let reasoning = obj + .get("reasoning_output_tokens") + .or_else(|| obj.get("reasoning_tokens")) + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + (input, output, cached, reasoning) +} + +pub fn poll_session(session: &mut TokenSession) -> bool { + let files = find_jsonl_files(); + if files.is_empty() { + return false; + } + + let mut changed = false; + + for path in &files { + let mut prev_cumulative = (0u64, 0u64, 0u64, 0u64); + let Some((text, new_offset)) = super::read_new_text(path, &mut session.file_offset) else { + continue; + }; + + for line in text.lines() { + if line.trim().is_empty() { + continue; + } + let Ok(val) = serde_json::from_str::(line) else { + continue; + }; + + // Session format: type = "event_msg" with token_count payload + if val.get("type").and_then(|v| v.as_str()) == Some("event_msg") { + let Some(payload) = val.get("payload") else { + continue; + }; + if payload.get("type").and_then(|v| v.as_str()) == Some("token_count") + && let Some(info) = payload.get("info") + && let Some(total) = info.get("total_token_usage") + { + let current = parse_raw_usage(total); + // If the cumulative counter is lower than prev (counter + // regression or file re-read after seek reset), clamp to 0. + let delta = |cur: u64, prev: u64, label: &str| -> u64 { + if cur < prev { + crate::cdebug!( + "token monitor: codex counter regression {} {}<{} in {:?}, clamping to 0", + label, + cur, + prev, + path + ); + 0 + } else { + cur - prev + } + }; + session.totals.input_tokens += delta(current.0, prev_cumulative.0, "input"); + session.totals.output_tokens += delta(current.1, prev_cumulative.1, "output"); + session.totals.cache_read_tokens += + delta(current.2, prev_cumulative.2, "cached"); + prev_cumulative = current; + changed = true; + } + if let Some(model) = payload.get("model_name").and_then(|v| v.as_str()) { + session.totals.model = Some(model.to_owned()); + } + continue; + } + + // Headless format: direct usage at top level + if val.get("usage").is_some() { + if let Some(usage) = val.get("usage") { + let (inp, out, cached, _) = parse_raw_usage(usage); + session.totals.input_tokens += inp; + session.totals.output_tokens += out; + session.totals.cache_read_tokens += cached; + changed = true; + } + if let Some(cost) = val.get("costUSD").and_then(serde_json::Value::as_f64) { + session.totals.cost_usd = Some(session.totals.cost_usd.unwrap_or(0.0) + cost); + } + } + } + session.file_offset = new_offset; + } + changed +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn codex_token_reader_computes_per_turn_delta() { + let line1 = r#"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":100,"output_tokens":50}}}}"#; + let line2 = r#"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":200,"output_tokens":90}}}}"#; + let v1: serde_json::Value = serde_json::from_str(line1).unwrap(); + let v2: serde_json::Value = serde_json::from_str(line2).unwrap(); + assert_eq!(v1.get("type").and_then(|v| v.as_str()), Some("event_msg")); + assert_eq!(v2.get("type").and_then(|v| v.as_str()), Some("event_msg")); + let info2 = &v2["payload"]["info"]["total_token_usage"]; + let (inp, out, _, _) = parse_raw_usage(info2); + assert_eq!(inp, 200); + assert_eq!(out, 90); + } + + #[test] + fn codex_token_reader_handles_headless_format() { + let line = r#"{"usage":{"input_tokens":300,"output_tokens":100},"costUSD":0.15}"#; + let val: serde_json::Value = serde_json::from_str(line).unwrap(); + assert!(val.get("usage").is_some()); + assert_eq!( + val.get("costUSD").and_then(serde_json::Value::as_f64), + Some(0.15) + ); + let (inp, out, _, _) = parse_raw_usage(val.get("usage").unwrap()); + assert_eq!(inp, 300); + assert_eq!(out, 100); + } + + #[test] + fn parse_raw_usage_handles_alternate_field_names() { + let obj = serde_json::json!({ + "prompt_tokens": 50, + "completion_tokens": 20, + "cache_read_input_tokens": 10, + "reasoning_output_tokens": 5, + }); + let (inp, out, cached, reasoning) = parse_raw_usage(&obj); + assert_eq!(inp, 50); + assert_eq!(out, 20); + assert_eq!(cached, 10); + assert_eq!(reasoning, 5); + } +} diff --git a/crates/jackin-capsule/src/token_monitor/kimi.rs b/crates/jackin-capsule/src/token_monitor/kimi.rs new file mode 100644 index 000000000..e24b8ca88 --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor/kimi.rs @@ -0,0 +1,108 @@ +//! JSONL reader for Kimi token usage. +//! +//! Reads `~/.kimi/sessions/{GROUP_ID}/{SESSION_UUID}/wire.jsonl`. + +use std::fs; +use std::path::PathBuf; + +use super::TokenSession; + +fn find_wire_files() -> Vec { + let mut paths = Vec::new(); + let base = "/home/agent/.kimi/sessions"; + let Ok(groups) = fs::read_dir(base) else { + return paths; + }; + for group in groups.flatten() { + let Ok(sessions) = fs::read_dir(group.path()) else { + continue; + }; + for session in sessions.flatten() { + let wire = session.path().join("wire.jsonl"); + if wire.exists() { + paths.push(wire); + } + } + } + paths +} + +pub fn poll_session(session: &mut TokenSession) -> bool { + let files = find_wire_files(); + if files.is_empty() { + return false; + } + let mut changed = false; + + for path in &files { + let Some((text, new_offset)) = super::read_new_text(path, &mut session.file_offset) else { + continue; + }; + + for line in text.lines() { + if line.trim().is_empty() { + continue; + } + let Ok(val) = serde_json::from_str::(line) else { + continue; + }; + + // StatusUpdate messages carry token_usage + if let Some(usage) = val.get("token_usage") { + let input = usage + .get("input_other") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let output = usage + .get("output") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cache_read = usage + .get("input_cache_read") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let cache_write = usage + .get("input_cache_creation") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + session.totals.input_tokens += input; + session.totals.output_tokens += output; + session.totals.cache_read_tokens += cache_read; + session.totals.cache_write_tokens += cache_write; + changed = true; + } + } + session.file_offset = new_offset; + } + changed +} + +#[cfg(test)] +mod tests { + #[test] + fn kimi_token_reader_parses_wire_jsonl() { + let line = r#"{"token_usage":{"input_other":500,"output":200,"input_cache_read":100,"input_cache_creation":50}}"#; + let val: serde_json::Value = serde_json::from_str(line).unwrap(); + let usage = val.get("token_usage").unwrap(); + assert_eq!( + usage.get("input_other").and_then(serde_json::Value::as_u64), + Some(500) + ); + assert_eq!( + usage.get("output").and_then(serde_json::Value::as_u64), + Some(200) + ); + assert_eq!( + usage + .get("input_cache_read") + .and_then(serde_json::Value::as_u64), + Some(100) + ); + assert_eq!( + usage + .get("input_cache_creation") + .and_then(serde_json::Value::as_u64), + Some(50) + ); + } +} diff --git a/crates/jackin-capsule/src/token_monitor/models.rs b/crates/jackin-capsule/src/token_monitor/models.rs new file mode 100644 index 000000000..9d7071de9 --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor/models.rs @@ -0,0 +1,307 @@ +//! Model catalog: available models per provider. +//! +//! Queries each provider's model listing API and caches the result. +//! Exposes available models for the console agent picker. + +use std::time::{Duration, Instant}; + +/// A single model entry in the catalog. +#[derive(Debug, Clone)] +pub struct ModelEntry { + pub provider: String, + pub model_id: String, + pub display_name: String, +} + +#[derive(Debug)] +pub struct ModelCatalog { + entries: Vec, + fetched_at: Option, + ttl: Duration, +} + +impl Default for ModelCatalog { + fn default() -> Self { + Self::new() + } +} + +impl ModelCatalog { + pub fn new() -> Self { + Self { + entries: Vec::new(), + fetched_at: None, + ttl: Duration::from_hours(24), + } + } + + /// Return cached entries or embedded fallback if stale/empty. + pub fn available_models(&self, provider: &str) -> Vec { + let live: Vec<_> = self + .entries + .iter() + .filter(|e| e.provider == provider) + .cloned() + .collect(); + if live.is_empty() { + embedded_models(provider) + } else { + live + } + } + + /// Whether the catalog needs a refresh. + pub fn needs_refresh(&self) -> bool { + self.fetched_at.is_none_or(|t| t.elapsed() > self.ttl) + } + + /// Fetch fresh model list from a provider's API. + /// Stamps `fetched_at` only when the HTTP round-trip succeeded AND at least + /// one model passed the provider's filter. On failure or empty result, leaves + /// `fetched_at` unchanged so `needs_refresh()` stays true for the next retry. + pub fn populate(&mut self, provider: &str) { + let fetched = match provider { + "claude" => self.fetch_anthropic(), + "codex" => self.fetch_openai(), + "kimi" => self.fetch_moonshot(), + _ => false, + }; + // Only stamp fetched_at when the HTTP round-trip actually succeeded. + // On failure, leave fetched_at unchanged so needs_refresh() stays true + // and a retry happens on the next register_session call. + if fetched { + self.fetched_at = Some(Instant::now()); + } + } + + fn fetch_from_api( + &mut self, + provider: &str, + env_key: &str, + url: &str, + build_req: impl FnOnce(ureq::Request, &str) -> ureq::Request, + filter: impl Fn(&str) -> bool, + ) -> bool { + let api_key = std::env::var(env_key).unwrap_or_default(); + if api_key.is_empty() { + return false; + } + let req = ureq::get(url); + let Ok(resp) = build_req(req, &api_key).call() else { + crate::cdebug!("model catalog: HTTP request failed for provider={provider} url={url}"); + return false; + }; + let Ok(body) = resp.into_string() else { + crate::cdebug!("model catalog: response body read failed for provider={provider}"); + return false; + }; + let Ok(val) = serde_json::from_str::(&body) else { + crate::cdebug!("model catalog: JSON parse failed for provider={provider}"); + return false; + }; + if let Some(arr) = val.get("data").and_then(|d| d.as_array()) { + let new: Vec = arr + .iter() + .filter_map(|m| { + let id = m.get("id")?.as_str()?.to_owned(); + if !filter(&id) { + return None; + } + let display = m + .get("display_name") + .and_then(|v| v.as_str()) + .unwrap_or(&id) + .to_owned(); + Some(ModelEntry { + provider: provider.into(), + model_id: id, + display_name: display, + }) + }) + .collect(); + if new.is_empty() { + // API returned a valid response but no models matched the filter. + // Return false so the caller does not stamp fetched_at — this way + // needs_refresh() stays true and a retry fires on the next session. + return false; + } + self.entries.retain(|e| e.provider != provider); + self.entries.extend(new); + true + } else { + false + } + } + + fn fetch_anthropic(&mut self) -> bool { + self.fetch_from_api( + "claude", + "ANTHROPIC_API_KEY", + "https://api.anthropic.com/v1/models", + |req, key| { + req.set("x-api-key", key) + .set("anthropic-version", "2023-06-01") + }, + |_| true, + ) + } + + fn fetch_openai(&mut self) -> bool { + self.fetch_from_api( + "codex", + "OPENAI_API_KEY", + "https://api.openai.com/v1/models", + |req, key| req.set("Authorization", &format!("Bearer {key}")), + // Only coding-relevant models. + |id| { + id.starts_with("gpt-4") + || id.starts_with("o1") + || id.starts_with("o3") + || id.starts_with("o4") + }, + ) + } + + fn fetch_moonshot(&mut self) -> bool { + self.fetch_from_api( + "kimi", + "KIMI_CODE_API_KEY", + "https://api.moonshot.ai/v1/models", + |req, key| req.set("Authorization", &format!("Bearer {key}")), + |_| true, + ) + } +} + +/// Embedded minimal fallback list when the API is unreachable. +pub fn embedded_models(provider: &str) -> Vec { + match provider { + "claude" => vec![ + ModelEntry { + provider: "claude".into(), + model_id: "claude-opus-4-8-20251101".into(), + display_name: "Claude Opus 4.8".into(), + }, + ModelEntry { + provider: "claude".into(), + model_id: "claude-sonnet-4-6-20251101".into(), + display_name: "Claude Sonnet 4.6".into(), + }, + ModelEntry { + provider: "claude".into(), + model_id: "claude-haiku-4-5-20251001".into(), + display_name: "Claude Haiku 4.5".into(), + }, + ], + "codex" => vec![ModelEntry { + provider: "codex".into(), + model_id: "codex-mini-latest".into(), + display_name: "Codex Mini".into(), + }], + "kimi" => vec![ + ModelEntry { + provider: "kimi".into(), + model_id: "kimi-latest".into(), + display_name: "Kimi Latest".into(), + }, + ModelEntry { + provider: "kimi".into(), + model_id: "kimi-k2-0711-preview".into(), + display_name: "Kimi K2".into(), + }, + ], + _ => vec![], + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn model_catalog_falls_back_to_embedded_list_on_error() { + let catalog = ModelCatalog::new(); + let models = catalog.available_models("claude"); + assert!( + !models.is_empty(), + "should have embedded fallback for claude" + ); + assert!(models.iter().any(|m| m.model_id.contains("sonnet"))); + } + + #[test] + fn model_catalog_uses_cached_result_within_ttl() { + let mut catalog = ModelCatalog::new(); + catalog.entries.push(ModelEntry { + provider: "claude".to_owned(), + model_id: "claude-test-model".to_owned(), + display_name: "Test Model".to_owned(), + }); + catalog.fetched_at = Some(Instant::now()); + assert!(!catalog.needs_refresh()); + let models = catalog.available_models("claude"); + assert!(models.iter().any(|m| m.model_id == "claude-test-model")); + } + + #[test] + fn populate_on_empty_api_key_leaves_needs_refresh_true() { + // When no API key is set, populate() should not stamp fetched_at. + // needs_refresh() must remain true so a retry fires on the next + // register_session call once a key becomes available. + let catalog = ModelCatalog::new(); + assert!(catalog.needs_refresh(), "fresh catalog must need refresh"); + let mut catalog = catalog; + // Call populate with no env key set — fetch_from_api returns false early. + // Save/restore env to avoid affecting other tests. + let key_was_set = std::env::var("ANTHROPIC_API_KEY").is_ok(); + if key_was_set { + // Can't guarantee a clean test environment; skip. + return; + } + catalog.populate("claude"); + assert!( + catalog.needs_refresh(), + "populate with no API key must leave needs_refresh=true" + ); + } + + #[test] + fn populate_stamps_fetched_at_only_on_success() { + let mut catalog = ModelCatalog::new(); + assert!(catalog.needs_refresh()); + + let key_name = "ANTHROPIC_API_KEY"; + if std::env::var(key_name).is_ok() { + return; + } + catalog.populate("claude"); + assert!( + catalog.needs_refresh(), + "populate returning false must leave needs_refresh=true" + ); + + catalog.fetched_at = Some(Instant::now()); + assert!( + !catalog.needs_refresh(), + "after stamping fetched_at, needs_refresh must be false" + ); + } + + #[test] + fn model_catalog_parses_model_entries_correctly() { + let mut catalog = ModelCatalog::new(); + catalog.entries.push(ModelEntry { + provider: "claude".to_owned(), + model_id: "claude-opus-4-8-20251101".to_owned(), + display_name: "Claude Opus 4.8".to_owned(), + }); + catalog.entries.push(ModelEntry { + provider: "claude".to_owned(), + model_id: "claude-sonnet-4-6-20251101".to_owned(), + display_name: "Claude Sonnet 4.6".to_owned(), + }); + let models = catalog.available_models("claude"); + assert_eq!(models.len(), 2); + assert!(models.iter().any(|m| m.display_name == "Claude Opus 4.8")); + } +} diff --git a/crates/jackin-capsule/src/token_monitor/opencode.rs b/crates/jackin-capsule/src/token_monitor/opencode.rs new file mode 100644 index 000000000..c489da5a5 --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor/opencode.rs @@ -0,0 +1,68 @@ +//! `SQLite` reader for `OpenCode` token usage. +//! +//! Reads `~/.local/share/opencode/opencode.db`. + +use super::TokenSession; + +pub fn poll_session(session: &mut TokenSession) -> bool { + let db_path = "/home/agent/.local/share/opencode/opencode.db"; + if !std::path::Path::new(db_path).exists() { + return false; + } + + let Ok(conn) = + rusqlite::Connection::open_with_flags(db_path, rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY) + else { + crate::cdebug!("token monitor: opencode db open failed: {:?}", db_path); + return false; + }; + + let query = "SELECT rowid, input, output, cost FROM message WHERE rowid > ? ORDER BY rowid ASC LIMIT 1000"; + let Ok(mut stmt) = conn.prepare(query) else { + crate::cdebug!("token monitor: opencode db schema mismatch, prepare failed"); + return poll_session_legacy(session, &conn); + }; + + let mut changed = false; + let last_rowid = session.last_rowid; + + let rows: Vec<(i64, i64, i64, Option)> = stmt + .query_map([last_rowid], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, i64>(2)?, + row.get::<_, Option>(3)?, + )) + }) + .ok() + .map(|rows| rows.flatten().collect()) + .unwrap_or_default(); + + for (rowid, input, output, cost) in rows { + session.totals.input_tokens += input as u64; + session.totals.output_tokens += output as u64; + if let Some(c) = cost { + session.totals.cost_usd = Some(session.totals.cost_usd.unwrap_or(0.0) + c); + } + session.last_rowid = rowid; + changed = true; + } + changed +} + +// Pre-v1.2 OpenCode stored messages as JSON files, not SQLite. +// Reading that format is not yet implemented. +fn poll_session_legacy(session: &mut TokenSession, conn: &rusqlite::Connection) -> bool { + let _ = (session, conn); + false +} + +#[cfg(test)] +mod tests { + #[test] + fn opencode_token_reader_db_path_is_correct() { + let expected = "/home/agent/.local/share/opencode/opencode.db"; + assert!(expected.contains("opencode.db")); + } +} diff --git a/crates/jackin-capsule/src/token_monitor/pricing.rs b/crates/jackin-capsule/src/token_monitor/pricing.rs new file mode 100644 index 000000000..55d1e2657 --- /dev/null +++ b/crates/jackin-capsule/src/token_monitor/pricing.rs @@ -0,0 +1,107 @@ +//! Static pricing table for token cost estimation. +//! +//! Used when a JSONL/SQLite record does not include a pre-calculated costUSD. +//! APPROXIMATE — last updated 2026-06-04. Prices in USD per 1M tokens. + +/// Pricing for a specific model. +#[derive(Debug, Clone)] +pub struct ModelPrice { + pub input_per_1m: f64, + pub output_per_1m: f64, + pub cache_read_per_1m: f64, + pub cache_write_per_1m: f64, +} + +/// Estimate cost in USD from token counts using the static pricing table. +/// Returns `None` when the model is not in the table. +pub fn estimate_cost_usd( + _agent: &str, + model: &str, + input_tokens: u64, + output_tokens: u64, + cache_read_tokens: u64, + cache_write_tokens: u64, +) -> Option { + let price = model_price(model)?; + let cost = (input_tokens as f64 / 1_000_000.0) * price.input_per_1m + + (output_tokens as f64 / 1_000_000.0) * price.output_per_1m + + (cache_read_tokens as f64 / 1_000_000.0) * price.cache_read_per_1m + + (cache_write_tokens as f64 / 1_000_000.0) * price.cache_write_per_1m; + Some(cost) +} + +fn model_price(model: &str) -> Option { + // Prices as of 2026-06-04 (APPROXIMATE). + let price = match model { + m if m.contains("claude-opus-4") => ModelPrice { + input_per_1m: 15.0, + output_per_1m: 75.0, + cache_read_per_1m: 1.50, + cache_write_per_1m: 18.75, + }, + m if m.contains("claude-sonnet-4") => ModelPrice { + input_per_1m: 3.0, + output_per_1m: 15.0, + cache_read_per_1m: 0.30, + cache_write_per_1m: 3.75, + }, + m if m.contains("claude-haiku-4") || m.contains("claude-3-5-haiku") => ModelPrice { + input_per_1m: 0.80, + output_per_1m: 4.0, + cache_read_per_1m: 0.08, + cache_write_per_1m: 1.0, + }, + m if m.contains("claude-3-5-sonnet") => ModelPrice { + input_per_1m: 3.0, + output_per_1m: 15.0, + cache_read_per_1m: 0.30, + cache_write_per_1m: 3.75, + }, + m if m.contains("gpt-4o") => ModelPrice { + input_per_1m: 2.50, + output_per_1m: 10.0, + cache_read_per_1m: 1.25, + cache_write_per_1m: 0.0, + }, + m if m.contains("o3") => ModelPrice { + input_per_1m: 10.0, + output_per_1m: 40.0, + cache_read_per_1m: 2.50, + cache_write_per_1m: 0.0, + }, + m if m.contains("kimi") || m.contains("moonshot") => ModelPrice { + input_per_1m: 0.50, + output_per_1m: 1.50, + cache_read_per_1m: 0.05, + cache_write_per_1m: 0.0, + }, + _ => return None, + }; + Some(price) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pricing_table_returns_estimate_for_known_model() { + let cost = estimate_cost_usd("claude", "claude-sonnet-4-6", 1_000_000, 100_000, 0, 0); + assert!(cost.is_some()); + assert!(cost.unwrap() > 0.0); + } + + #[test] + fn pricing_table_returns_none_for_unknown_model() { + let cost = estimate_cost_usd("claude", "future-unknown-model-xyz", 1000, 100, 0, 0); + assert!(cost.is_none()); + } + + #[test] + fn pricing_table_applies_tiered_calculation() { + // 200k input tokens at sonnet pricing: 200k * $3/1M = $0.60 + let cost = estimate_cost_usd("claude", "claude-sonnet-4-6-20251101", 200_000, 0, 0, 0); + assert!(cost.is_some()); + assert!((cost.unwrap() - 0.60).abs() < 0.01); + } +} diff --git a/crates/jackin-capsule/src/tui/app.rs b/crates/jackin-capsule/src/tui/app.rs index 6371895f0..e72b03261 100644 --- a/crates/jackin-capsule/src/tui/app.rs +++ b/crates/jackin-capsule/src/tui/app.rs @@ -186,6 +186,7 @@ pub enum VisibleAgentState { Working, Done, Blocked, + Unknown, } pub fn visible_agent_state_from_protocol(state: AgentState) -> VisibleAgentState { @@ -194,6 +195,7 @@ pub fn visible_agent_state_from_protocol(state: AgentState) -> VisibleAgentState AgentState::Working => VisibleAgentState::Working, AgentState::Done => VisibleAgentState::Done, AgentState::Blocked => VisibleAgentState::Blocked, + AgentState::Unknown => VisibleAgentState::Unknown, } } diff --git a/crates/jackin-capsule/src/tui/components/chrome.rs b/crates/jackin-capsule/src/tui/components/chrome.rs index f187af850..9fbc185d2 100644 --- a/crates/jackin-capsule/src/tui/components/chrome.rs +++ b/crates/jackin-capsule/src/tui/components/chrome.rs @@ -48,25 +48,30 @@ impl StatusBarWidget<'_> { } let glyph_char = match cell.glyph { TabGlyph::None => ' ', + TabGlyph::Working => '◌', TabGlyph::Done => '○', TabGlyph::Blocked => '●', + TabGlyph::Unknown => '·', }; // Cell layout: ` ` — matches emit_tab_row0. let content = format!(" {} {} ", cell.name, glyph_char); let x = area.x.saturating_add(cell.start_col0); buf.set_string(x, area.y, &content, style); // Blocked glyph is bright red; overpaint just that cell, same bg. - if matches!(cell.glyph, TabGlyph::Blocked) { + if !matches!(cell.glyph, TabGlyph::None | TabGlyph::Done) { let name_cols = u16::try_from(jackin_tui::display_cols(&cell.name)).unwrap_or(u16::MAX); let glyph_x = x.saturating_add(name_cols).saturating_add(2); + let (glyph, fg, modifier) = match cell.glyph { + TabGlyph::Blocked => ("●", jackin_tui::theme::STATUS_BLOCKED_RED, Modifier::BOLD), + TabGlyph::Working => ("◌", jackin_tui::theme::PHOSPHOR_DARK, Modifier::empty()), + TabGlyph::Unknown => ("·", Color::Rgb(96, 96, 96), Modifier::empty()), + TabGlyph::None | TabGlyph::Done => unreachable!(), + }; buf.set_string( glyph_x, area.y, - "●", - Style::default() - .bg(bg) - .fg(jackin_tui::theme::STATUS_BLOCKED_RED) - .add_modifier(Modifier::BOLD), + glyph, + Style::default().bg(bg).fg(fg).add_modifier(modifier), ); } } diff --git a/crates/jackin-capsule/src/tui/components/status_bar.rs b/crates/jackin-capsule/src/tui/components/status_bar.rs index afc69d746..ea0e16ef9 100644 --- a/crates/jackin-capsule/src/tui/components/status_bar.rs +++ b/crates/jackin-capsule/src/tui/components/status_bar.rs @@ -291,11 +291,13 @@ pub(crate) enum TabGlyph { /// always reserved so cell width stays stable across state /// transitions. None, + Working, /// `Done` — `○`, default tab foreground colour. Done, /// `Blocked` — `●`, rendered in bright red as the high-visibility /// "agent waiting" indicator. Blocked, + Unknown, } /// Resolve the base name + state glyph for a tab. The caller builds @@ -314,11 +316,25 @@ fn tab_label(tab: &Tab, states: &[(u64, VisibleAgentState)]) -> (String, TabGlyp .iter() .any(|(sid, st)| sid == id && *st == VisibleAgentState::Done) }); + let has_working = ids.iter().any(|id| { + states + .iter() + .any(|(sid, st)| sid == id && *st == VisibleAgentState::Working) + }); + let has_unknown = ids.iter().any(|id| { + states + .iter() + .any(|(sid, st)| sid == id && *st == VisibleAgentState::Unknown) + }); let glyph = if has_blocked { TabGlyph::Blocked } else if has_done { TabGlyph::Done + } else if has_working { + TabGlyph::Working + } else if has_unknown { + TabGlyph::Unknown } else { TabGlyph::None }; diff --git a/crates/jackin-core/src/agent.rs b/crates/jackin-core/src/agent.rs index daf322f11..189bd61a0 100644 --- a/crates/jackin-core/src/agent.rs +++ b/crates/jackin-core/src/agent.rs @@ -68,12 +68,12 @@ impl Agent { "\ USER agent ARG JACKIN_CACHE_BUST=0 -RUN mkdir -p /tmp/jackin-agent-binaries -COPY --chown=agent:agent {source} /tmp/jackin-agent-binaries/claude +RUN mkdir -p /jackin/runtime/agent-binaries +COPY --chown=agent:agent {source} /jackin/runtime/agent-binaries/claude RUN set -euxo pipefail && \\ : \"${{JACKIN_CACHE_BUST}}\" && \\ - chmod 0755 /tmp/jackin-agent-binaries/claude && \\ - /tmp/jackin-agent-binaries/claude install && \\ + chmod 0755 /jackin/runtime/agent-binaries/claude && \\ + /jackin/runtime/agent-binaries/claude install && \\ claude --version " ), diff --git a/crates/jackin-core/src/agent/adapters/claude.rs b/crates/jackin-core/src/agent/adapters/claude.rs index 6983c5e0b..e373084d6 100644 --- a/crates/jackin-core/src/agent/adapters/claude.rs +++ b/crates/jackin-core/src/agent/adapters/claude.rs @@ -28,12 +28,12 @@ impl AgentRuntime for ClaudeRuntime { "\ USER agent ARG JACKIN_CACHE_BUST=0 -RUN mkdir -p /tmp/jackin-agent-binaries -COPY --chown=agent:agent {source} /tmp/jackin-agent-binaries/claude +RUN mkdir -p /jackin/runtime/agent-binaries +COPY --chown=agent:agent {source} /jackin/runtime/agent-binaries/claude RUN set -euxo pipefail && \\ : \"${{JACKIN_CACHE_BUST}}\" && \\ - chmod 0755 /tmp/jackin-agent-binaries/claude && \\ - /tmp/jackin-agent-binaries/claude install && \\ + chmod 0755 /jackin/runtime/agent-binaries/claude && \\ + /jackin/runtime/agent-binaries/claude install && \\ claude --version " ) diff --git a/crates/jackin-core/src/agent/tests.rs b/crates/jackin-core/src/agent/tests.rs index 4aab61d16..745eef971 100644 --- a/crates/jackin-core/src/agent/tests.rs +++ b/crates/jackin-core/src/agent/tests.rs @@ -63,12 +63,12 @@ fn claude_install_block_installs_cached_cli() { "\ USER agent ARG JACKIN_CACHE_BUST=0 -RUN mkdir -p /tmp/jackin-agent-binaries -COPY --chown=agent:agent .jackin-runtime/agent-binaries/claude /tmp/jackin-agent-binaries/claude +RUN mkdir -p /jackin/runtime/agent-binaries +COPY --chown=agent:agent .jackin-runtime/agent-binaries/claude /jackin/runtime/agent-binaries/claude RUN set -euxo pipefail && \\ : \"${JACKIN_CACHE_BUST}\" && \\ - chmod 0755 /tmp/jackin-agent-binaries/claude && \\ - /tmp/jackin-agent-binaries/claude install && \\ + chmod 0755 /jackin/runtime/agent-binaries/claude && \\ + /jackin/runtime/agent-binaries/claude install && \\ claude --version " ); diff --git a/crates/jackin-core/src/manifest.rs b/crates/jackin-core/src/manifest.rs index c74bb0603..96959cb17 100644 --- a/crates/jackin-core/src/manifest.rs +++ b/crates/jackin-core/src/manifest.rs @@ -223,7 +223,7 @@ pub struct ClaudeMarketplaceConfig { pub sparse: Vec, } -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Debug, Clone, Default, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct ClaudeConfig { /// Optional model override; passed to Claude Code with `--model` diff --git a/crates/jackin-image/Cargo.toml b/crates/jackin-image/Cargo.toml index 4d3aa8f36..f4ce46541 100644 --- a/crates/jackin-image/Cargo.toml +++ b/crates/jackin-image/Cargo.toml @@ -25,7 +25,9 @@ flate2 = "1.1" anyhow = "1.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +semver = "1" tempfile = "3.20" +toml = "0.9" tokio = { version = "1", features = ["rt", "process", "io-util", "sync", "macros", "time", "fs"] } reqwest = { version = "0.13", default-features = false } # Sigstore Rust client for cosign bundle verification of capsule-manifest.json. diff --git a/crates/jackin-image/src/agent_binary.rs b/crates/jackin-image/src/agent_binary.rs index 1a5d15506..511ebe56d 100644 --- a/crates/jackin-image/src/agent_binary.rs +++ b/crates/jackin-image/src/agent_binary.rs @@ -26,6 +26,7 @@ const GROK_BASE_FALLBACK: &str = "https://storage.googleapis.com/grok-build-publ pub struct AgentBinary { pub agent: Agent, pub path: PathBuf, + pub version: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -75,6 +76,7 @@ pub async fn ensure_available(paths: &JackinPaths, agent: Agent) -> Result/dev/null \\ #[derive(Debug, Clone, PartialEq, Eq)] pub enum AgentInstall

{ /// Copy the prefetched binary at this location and install from it. - Prefetched(P), + Prefetched { source: P, version: Option }, /// Host prefetch failed; install from the agent's upstream installer at /// build time. ScriptFallback, } +#[derive(Debug, Deserialize)] +struct AgentStatusPackVersion { + agent: String, + validated_versions: String, +} + pub fn render_derived_dockerfile( base_dockerfile: &str, hooks: Option<&HooksConfig>, @@ -152,7 +159,7 @@ pub fn render_derived_dockerfile( // Prefetched entry, or no entry for this supported agent (the // conventional binary path under the build context). let source = match install { - Some(AgentInstall::Prefetched(path)) => path.clone(), + Some(AgentInstall::Prefetched { source, .. }) => source.clone(), _ => format!(".jackin-runtime/agent-binaries/{}", h.slug()), }; install_blocks.push_str(&h.runtime().install_block(&source)); @@ -226,7 +233,9 @@ RUN mkdir -p /jackin/default-home/.claude /jackin/default-home/.codex /jackin/de && ( cp -a /home/agent/.local/share/opencode/. /jackin/default-home/.local/share/opencode/ 2>/dev/null || true ) \ && chown -R agent:agent /jackin/default-home COPY .jackin-runtime/entrypoint.sh /jackin/runtime/entrypoint.sh -RUN chmod +x /jackin/runtime/entrypoint.sh +COPY .jackin-runtime/agent-status/ /jackin/runtime/agent-status/ +RUN chmod +x /jackin/runtime/entrypoint.sh \ + && find /jackin/runtime/agent-status/hooks -type f -name '*.sh' -exec chmod +x {{}} \\; {shell_title_hook_section}{jackin_capsule_section}RUN mkdir -p /jackin/run /jackin/state && chown agent:agent /jackin/run /jackin/state # Make jackin-capsule available as a plain shell command from any session. ENV PATH=\"/jackin/runtime:${{PATH}}\" @@ -361,6 +370,10 @@ pub fn create_derived_build_context( let runtime_dir = context_dir.join(".jackin-runtime"); std::fs::create_dir_all(&runtime_dir)?; std::fs::write(runtime_dir.join("entrypoint.sh"), ENTRYPOINT_SH)?; + copy_dir_all( + &workspace_root().join("docker/runtime/agent-status"), + &runtime_dir.join("agent-status"), + )?; // Copy jackin-capsule binary into the build context so the Dockerfile // can COPY it into the image without a network fetch at build time. @@ -420,6 +433,7 @@ pub fn create_derived_build_context( }; let supported = validated.manifest.supported_agents(); + validate_agent_status_pack_versions(&runtime_dir, &supported, agent_installs)?; let dockerfile_path = context_dir.join(".jackin-runtime/DerivedDockerfile"); std::fs::write( &dockerfile_path, @@ -441,6 +455,106 @@ pub fn create_derived_build_context( }) } +fn validate_agent_status_pack_versions( + runtime_dir: &Path, + supported: &[Agent], + installs: &BTreeMap>, +) -> anyhow::Result<()> { + let pack_dir = runtime_dir.join("agent-status/packs"); + for agent in supported { + let pack_path = pack_dir.join(format!("{}.toml", agent.slug())); + if !pack_path.exists() { + continue; + } + let pack: AgentStatusPackVersion = toml::from_str(&std::fs::read_to_string(&pack_path)?) + .map_err(|error| { + anyhow::anyhow!( + "failed to parse agent-status rule pack {}: {error}", + pack_path.display() + ) + })?; + anyhow::ensure!( + pack.agent == agent.slug(), + "agent-status rule pack {} declares agent {:?}, expected {:?}", + pack_path.display(), + pack.agent, + agent.slug() + ); + let range = pack.validated_versions.trim(); + anyhow::ensure!( + !range.is_empty(), + "agent-status rule pack {} has empty validated_versions", + pack_path.display() + ); + let Some(version) = installs.get(agent).and_then(AgentInstall::version) else { + anyhow::bail!( + "agent-status rule pack {} requires {range}, but {} will be installed by the fallback installer with no pinned version", + pack_path.display(), + agent.slug() + ); + }; + let parsed_version = parse_agent_semver(version).map_err(|error| { + anyhow::anyhow!( + "agent-status rule pack {} requires {range}, but {} version {version:?} is not a semver version: {error}", + pack_path.display(), + agent.slug() + ) + })?; + let req = semver::VersionReq::parse(range).map_err(|error| { + anyhow::anyhow!( + "agent-status rule pack {} has invalid validated_versions {range:?}: {error}", + pack_path.display() + ) + })?; + anyhow::ensure!( + is_bounded_agent_status_range(&req), + "agent-status rule pack {} has unbounded validated_versions {range:?}; use an exact, caret, tilde, or lower+upper bounded range", + pack_path.display() + ); + anyhow::ensure!( + req.matches(&parsed_version), + "agent-status rule pack {} is validated for {range}, but pinned {} version is {}", + pack_path.display(), + agent.slug(), + parsed_version + ); + } + Ok(()) +} + +fn is_bounded_agent_status_range(req: &semver::VersionReq) -> bool { + let mut has_lower = false; + let mut has_upper = false; + + for comparator in &req.comparators { + match comparator.op { + semver::Op::Exact | semver::Op::Tilde | semver::Op::Caret => return true, + semver::Op::Greater | semver::Op::GreaterEq => has_lower = true, + semver::Op::Less | semver::Op::LessEq => has_upper = true, + semver::Op::Wildcard => {} + _ => {} + } + } + + has_lower && has_upper +} + +fn parse_agent_semver(version: &str) -> Result { + semver::Version::parse(version.trim().trim_start_matches('v')) +} + +impl

AgentInstall

{ + fn version(&self) -> Option<&str> { + match self { + Self::Prefetched { + version: Some(version), + .. + } => Some(version.as_str()), + Self::Prefetched { version: None, .. } | Self::ScriptFallback => None, + } + } +} + fn copy_agent_binaries( runtime_dir: &Path, installs: &BTreeMap>, @@ -450,7 +564,10 @@ fn copy_agent_binaries( let mut staged = BTreeMap::new(); for (agent, install) in installs { let ctx_install = match install { - AgentInstall::Prefetched(host_path) => { + AgentInstall::Prefetched { + source: host_path, + version, + } => { let dst = dst_dir.join(agent.slug()); std::fs::copy(host_path, &dst).map_err(|e| { anyhow::anyhow!( @@ -459,7 +576,10 @@ fn copy_agent_binaries( host_path.display() ) })?; - AgentInstall::Prefetched(format!(".jackin-runtime/agent-binaries/{}", agent.slug())) + AgentInstall::Prefetched { + source: format!(".jackin-runtime/agent-binaries/{}", agent.slug()), + version: version.clone(), + } } AgentInstall::ScriptFallback => AgentInstall::ScriptFallback, }; @@ -482,6 +602,8 @@ fn ensure_runtime_assets_are_included( let mut rules = vec![ "!.jackin-runtime/".to_owned(), "!.jackin-runtime/entrypoint.sh".to_owned(), + "!.jackin-runtime/agent-status/".to_owned(), + "!.jackin-runtime/agent-status/**".to_owned(), "!.jackin-runtime/jackin-capsule".to_owned(), "!.jackin-runtime/agent-binaries/".to_owned(), "!.jackin-runtime/agent-binaries/*".to_owned(), @@ -505,6 +627,10 @@ fn ensure_runtime_assets_are_included( Ok(()) } +fn workspace_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("../..") +} + fn copy_dir_all(from: &Path, to: &Path) -> anyhow::Result<()> { std::fs::create_dir_all(to)?; for entry in std::fs::read_dir(from)? { diff --git a/crates/jackin-image/src/derived_image/tests.rs b/crates/jackin-image/src/derived_image/tests.rs index 814742dc7..5ebf7888c 100644 --- a/crates/jackin-image/src/derived_image/tests.rs +++ b/crates/jackin-image/src/derived_image/tests.rs @@ -10,6 +10,15 @@ fn default_agent_binary_path(agent: Agent) -> String { format!(".jackin-runtime/agent-binaries/{}", agent.slug()) } +fn fake_pinned_install(dir: &Path, agent: Agent, version: &str) -> AgentInstall { + let binary = dir.join(agent.slug()); + std::fs::write(&binary, "#!/bin/sh\nexit 0\n").unwrap(); + AgentInstall::Prefetched { + source: binary, + version: Some(version.to_owned()), + } +} + fn extract_agent_install_block(dockerfile: &str, agent: Agent) -> &str { let source = default_agent_binary_path(agent); let copy = format!("COPY --chown=agent:agent {source}"); @@ -55,6 +64,14 @@ fn renders_derived_dockerfile_with_workspace_and_entrypoint() { assert!( dockerfile.contains("COPY .jackin-runtime/entrypoint.sh /jackin/runtime/entrypoint.sh") ); + assert!( + dockerfile.contains("COPY .jackin-runtime/agent-status/ /jackin/runtime/agent-status/") + ); + assert!( + dockerfile.contains( + "find /jackin/runtime/agent-status/hooks -type f -name '*.sh' -exec chmod +x" + ) + ); assert!(!dockerfile.contains("ENV JACKIN_SUPPORTED_AGENTS=")); assert!(dockerfile.contains("ENTRYPOINT [\"/jackin/runtime/jackin-capsule\"]")); } @@ -263,7 +280,10 @@ fn renders_mixed_prefetched_and_script_fallback_installs() { &BTreeMap::from([ ( Agent::Claude, - AgentInstall::Prefetched(claude_source.clone()), + AgentInstall::Prefetched { + source: claude_source.clone(), + version: Some("1.2.3".to_owned()), + }, ), (Agent::Kimi, AgentInstall::ScriptFallback), ]), @@ -286,7 +306,13 @@ fn copy_agent_binaries_stages_prefetched_and_preserves_fallback() { std::fs::write(&host_bin, b"binary").unwrap(); let installs = BTreeMap::from([ - (Agent::Claude, AgentInstall::Prefetched(host_bin.clone())), + ( + Agent::Claude, + AgentInstall::Prefetched { + source: host_bin.clone(), + version: Some("1.2.3".to_owned()), + }, + ), (Agent::Kimi, AgentInstall::ScriptFallback), ]); let staged = copy_agent_binaries(&runtime_dir, &installs).unwrap(); @@ -295,9 +321,10 @@ fn copy_agent_binaries_stages_prefetched_and_preserves_fallback() { // binary is actually copied in; ScriptFallback passes through untouched. assert_eq!( staged.get(&Agent::Claude), - Some(&AgentInstall::Prefetched( - ".jackin-runtime/agent-binaries/claude".to_owned() - )) + Some(&AgentInstall::Prefetched { + source: ".jackin-runtime/agent-binaries/claude".to_owned(), + version: Some("1.2.3".to_owned()), + }) ); assert_eq!( staged.get(&Agent::Kimi), @@ -751,8 +778,13 @@ source = "hooks/source.sh" .unwrap(); let validated = jackin_manifest::validate_role_repo(repo.path()).unwrap(); - let build = create_derived_build_context(repo.path(), &validated, None, None, &BTreeMap::new()) - .unwrap(); + let binaries = tempdir().unwrap(); + let installs = BTreeMap::from([( + Agent::Claude, + fake_pinned_install(binaries.path(), Agent::Claude, "2.1.173"), + )]); + let build = + create_derived_build_context(repo.path(), &validated, None, None, &installs).unwrap(); let dockerignore = std::fs::read_to_string(build.context_dir.join(".dockerignore")).unwrap(); assert!(dockerignore.contains("!hooks/source.sh")); @@ -780,8 +812,13 @@ plugins = [] .unwrap(); let validated = jackin_manifest::validate_role_repo(repo.path()).unwrap(); - let build = create_derived_build_context(repo.path(), &validated, None, None, &BTreeMap::new()) - .unwrap(); + let binaries = tempdir().unwrap(); + let installs = BTreeMap::from([( + Agent::Claude, + fake_pinned_install(binaries.path(), Agent::Claude, "2.1.173"), + )]); + let build = + create_derived_build_context(repo.path(), &validated, None, None, &installs).unwrap(); assert!(build.context_dir.join("Dockerfile").is_file()); assert!( @@ -790,9 +827,36 @@ plugins = [] .join(".jackin-runtime/entrypoint.sh") .is_file() ); + assert!( + build + .context_dir + .join(".jackin-runtime/agent-status/hooks/claude/report-hook.sh") + .is_file() + ); + assert!( + build + .context_dir + .join(".jackin-runtime/agent-status/packs/claude.toml") + .is_file() + ); assert!(build.dockerfile_path.is_file()); } +#[test] +fn runtime_shell_reporters_forward_explicit_event_args() { + for relative in [ + "docker/runtime/agent-status/hooks/claude/report-hook.sh", + "docker/runtime/agent-status/hooks/codex/report-hook.sh", + "docker/runtime/agent-status/hooks/opencode/report-hook.sh", + ] { + let content = std::fs::read_to_string(workspace_root().join(relative)).unwrap(); + assert!( + content.contains("report-event \"$@\" --payload-stdin"), + "{relative} must forward explicit --event args to jackin-capsule" + ); + } +} + #[test] fn preserves_runtime_assets_when_repo_dockerignore_excludes_hidden_paths() { let repo = tempdir().unwrap(); @@ -820,12 +884,19 @@ plugins = [] .unwrap(); let validated = jackin_manifest::validate_role_repo(repo.path()).unwrap(); - let build = create_derived_build_context(repo.path(), &validated, None, None, &BTreeMap::new()) - .unwrap(); + let binaries = tempdir().unwrap(); + let installs = BTreeMap::from([( + Agent::Claude, + fake_pinned_install(binaries.path(), Agent::Claude, "2.1.173"), + )]); + let build = + create_derived_build_context(repo.path(), &validated, None, None, &installs).unwrap(); let dockerignore = std::fs::read_to_string(build.context_dir.join(".dockerignore")).unwrap(); assert!(dockerignore.contains("!.jackin-runtime/")); assert!(dockerignore.contains("!.jackin-runtime/entrypoint.sh")); + assert!(dockerignore.contains("!.jackin-runtime/agent-status/")); + assert!(dockerignore.contains("!.jackin-runtime/agent-status/**")); assert!(dockerignore.contains("!.jackin-runtime/DerivedDockerfile")); } @@ -849,12 +920,17 @@ plugins = [] .unwrap(); let validated = jackin_manifest::validate_role_repo(repo.path()).unwrap(); + let binaries = tempdir().unwrap(); + let installs = BTreeMap::from([( + Agent::Claude, + fake_pinned_install(binaries.path(), Agent::Claude, "2.1.173"), + )]); let build = create_derived_build_context( repo.path(), &validated, Some("docker.io/myorg/my-role:latest"), None, - &BTreeMap::new(), + &installs, ) .unwrap(); @@ -893,6 +969,147 @@ fn jackin_construct_image_override_handles_digest_pinned_from() { ); } +fn write_status_pack(runtime_dir: &Path, agent: Agent, range: &str) { + let pack_dir = runtime_dir.join("agent-status/packs"); + std::fs::create_dir_all(&pack_dir).unwrap(); + std::fs::write( + pack_dir.join(format!("{}.toml", agent.slug())), + format!( + r#" +schema_version = 1 +agent = "{}" +validated_versions = "{range}" +"#, + agent.slug() + ), + ) + .unwrap(); +} + +#[test] +fn agent_status_pack_version_validation_accepts_matching_pinned_version() { + let tmp = tempdir().unwrap(); + let runtime_dir = tmp.path().join(".jackin-runtime"); + write_status_pack(&runtime_dir, Agent::Claude, ">=1.2.0, <1.3.0"); + let installs = BTreeMap::from([( + Agent::Claude, + AgentInstall::Prefetched { + source: PathBuf::from("/tmp/claude"), + version: Some("1.2.3".to_owned()), + }, + )]); + + validate_agent_status_pack_versions(&runtime_dir, &[Agent::Claude], &installs).unwrap(); +} + +#[test] +fn agent_status_pack_version_validation_rejects_mismatched_pinned_version() { + let tmp = tempdir().unwrap(); + let runtime_dir = tmp.path().join(".jackin-runtime"); + write_status_pack(&runtime_dir, Agent::Claude, ">=1.2.0, <1.3.0"); + let installs = BTreeMap::from([( + Agent::Claude, + AgentInstall::Prefetched { + source: PathBuf::from("/tmp/claude"), + version: Some("1.4.0".to_owned()), + }, + )]); + + let error = validate_agent_status_pack_versions(&runtime_dir, &[Agent::Claude], &installs) + .expect_err("mismatched rule-pack version should fail"); + + assert!(error.to_string().contains("validated for >=1.2.0, <1.3.0")); + assert!(error.to_string().contains("1.4.0")); +} + +#[test] +fn agent_status_pack_version_validation_rejects_unpinned_fallback() { + let tmp = tempdir().unwrap(); + let runtime_dir = tmp.path().join(".jackin-runtime"); + write_status_pack(&runtime_dir, Agent::Claude, ">=1.2.0, <1.3.0"); + let installs = BTreeMap::from([(Agent::Claude, AgentInstall::ScriptFallback)]); + + let error = validate_agent_status_pack_versions(&runtime_dir, &[Agent::Claude], &installs) + .expect_err("rule-pack range requires a pinned binary version"); + + assert!(error.to_string().contains("fallback installer")); + assert!(error.to_string().contains("no pinned version")); +} + +#[test] +fn agent_status_pack_version_validation_rejects_wildcard_range() { + let tmp = tempdir().unwrap(); + let runtime_dir = tmp.path().join(".jackin-runtime"); + write_status_pack(&runtime_dir, Agent::Claude, "*"); + let installs = BTreeMap::from([( + Agent::Claude, + AgentInstall::Prefetched { + source: PathBuf::from("/tmp/claude"), + version: Some("1.2.3".to_owned()), + }, + )]); + + let error = validate_agent_status_pack_versions(&runtime_dir, &[Agent::Claude], &installs) + .expect_err("wildcard rule-pack range should fail image validation"); + + assert!(error.to_string().contains("unbounded validated_versions")); + assert!(error.to_string().contains("\"*\"")); +} + +#[test] +fn agent_status_pack_version_validation_rejects_lower_only_range() { + let tmp = tempdir().unwrap(); + let runtime_dir = tmp.path().join(".jackin-runtime"); + write_status_pack(&runtime_dir, Agent::Claude, ">=0.0.0"); + let installs = BTreeMap::from([( + Agent::Claude, + AgentInstall::Prefetched { + source: PathBuf::from("/tmp/claude"), + version: Some("1.2.3".to_owned()), + }, + )]); + + let error = validate_agent_status_pack_versions(&runtime_dir, &[Agent::Claude], &installs) + .expect_err("lower-only rule-pack range should fail image validation"); + + assert!(error.to_string().contains("unbounded validated_versions")); + assert!(error.to_string().contains("\">=0.0.0\"")); +} + +#[test] +fn bundled_agent_status_packs_require_bounded_versions() { + let root = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .parent() + .unwrap(); + let pack_dir = root.join("docker/runtime/agent-status/packs"); + for agent in [ + Agent::Claude, + Agent::Codex, + Agent::Amp, + Agent::Kimi, + Agent::Opencode, + ] { + let pack_path = pack_dir.join(format!("{}.toml", agent.slug())); + let pack: AgentStatusPackVersion = + toml::from_str(&std::fs::read_to_string(&pack_path).unwrap()).unwrap(); + + assert_eq!(pack.agent, agent.slug(), "{pack_path:?}"); + assert_ne!( + pack.validated_versions.trim(), + "*", + "bundled pack must force image-build validation against a bounded {} version", + agent.slug() + ); + let req = semver::VersionReq::parse(pack.validated_versions.trim()).unwrap(); + assert!( + is_bounded_agent_status_range(&req), + "bundled pack {pack_path:?} must use an exact, caret, tilde, or lower+upper bounded range" + ); + } +} + #[cfg(unix)] #[test] fn rejects_symlinks_in_repo_build_context() { diff --git a/crates/jackin-launch/src/tui/run.rs b/crates/jackin-launch/src/tui/run.rs index 6d7b88647..7f6068caa 100644 --- a/crates/jackin-launch/src/tui/run.rs +++ b/crates/jackin-launch/src/tui/run.rs @@ -4,8 +4,9 @@ use std::io::Write; use anyhow::Context; use crossterm::ExecutableCommand; +use crossterm::cursor::MoveTo; use crossterm::event::{Event, KeyCode, KeyEvent, KeyEventKind, KeyModifiers}; -use crossterm::terminal::{EnterAlternateScreen, LeaveAlternateScreen}; +use crossterm::terminal::{Clear, ClearType, EnterAlternateScreen, LeaveAlternateScreen}; use jackin_tui::ModalOutcome; use jackin_tui::components::{ConfirmState, ErrorPopupState, SelectListState, TextInputState}; use ratatui::layout::Rect; @@ -267,13 +268,10 @@ impl RichRenderer { .context("enabling mouse capture for launch TUI")?; } stdout.execute(crossterm::cursor::Hide)?; + stdout.execute(Clear(ClearType::All))?; + stdout.execute(MoveTo(0, 0))?; let backend = ratatui::backend::CrosstermBackend::new(stdout); - let mut terminal = ratatui::Terminal::new(backend)?; - // Wipe whatever the previous surface left on the screen and force a full - // first redraw. Under the host guard we skipped EnterAlternateScreen - // (which would have cleared), so the console's last frame is still on - // the inherited screen — clear it or the cockpit renders over it. - terminal.clear().context("clearing launch screen")?; + let terminal = ratatui::Terminal::new(backend)?; // Ancillary status printers (spinners) go silent while this surface // owns the alternate screen. host.set_rich_surface_active(true); diff --git a/crates/jackin-protocol/src/agent_status.rs b/crates/jackin-protocol/src/agent_status.rs new file mode 100644 index 000000000..a3f8a0234 --- /dev/null +++ b/crates/jackin-protocol/src/agent_status.rs @@ -0,0 +1,187 @@ +//! Protocol-level agent status types. +//! +//! These are the wire types exposed over the control socket. The capsule- +//! internal types (`SessionStatus`, `HookAuthority`, screen detection structs) +//! live in `crates/jackin-capsule/src/agent_status/`; only the summary types +//! that must cross the socket boundary are defined here. + +use serde::{Deserialize, Serialize}; + +/// Raw evidence state before the capsule folds it into an effective status. +/// +/// The capsule uses this same four-state vocabulary internally for arbitration +/// results and wire payloads. Source-specific observations stay in the capsule's +/// evidence structs instead of crossing the protocol boundary. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AgentRawState { + Unknown, + Working, + Blocked, + Idle, +} + +impl AgentRawState { + pub const fn label(self) -> &'static str { + match self { + Self::Unknown => "unknown", + Self::Working => "working", + Self::Blocked => "blocked", + Self::Idle => "idle", + } + } +} + +/// Source of the current status authority. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum AgentStatusSource { + /// Reported by a trusted in-container hook/plugin/API bridge. + Reported { source_id: String }, + /// Derived from visible screen pattern matching. + VisibleScreen, + /// Derived from foreground process group identity. + ForegroundProcess, + /// Derived from OSC 133/7 shell integration markers. + ShellIntegration, + /// No authority source — state is unknown. + #[default] + None, +} + +/// Confidence tier for the current status authority. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AgentStatusConfidence { + /// No signal — state is unknown. + #[default] + Unknown, + /// Derived from process corroboration only. + Weak, + /// Screen detection matched a clear visible pattern. + Strong, + /// Hook authority: sequence-valid, process-consistent, fresh. + Authoritative, +} + +/// Summary status report included in `SessionInfo` and `PaneSnapshot` +/// responses. Carries the raw state, source, confidence, and evidence +/// booleans so host consumers can reason about authority without re-parsing +/// terminal text. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentStatusReport { + /// Protocol-level raw state from the authority source. + pub raw_state: AgentRawState, + /// Source that produced the current authority. + pub source: AgentStatusSource, + /// Confidence tier of the current authority. + pub confidence: AgentStatusConfidence, + /// Detected agent slug (e.g. `"claude"`, `"codex"`). + #[serde(skip_serializing_if = "Option::is_none")] + pub detected_agent: Option, + /// Foreground process group ID, if known. + #[serde(skip_serializing_if = "Option::is_none")] + pub foreground_pgid: Option, + /// Screen detector saw an explicit approval/input prompt. + #[serde(default)] + pub visible_blocker: bool, + /// Screen detector saw an idle prompt box. + #[serde(default)] + pub visible_idle: bool, + /// Screen detector saw active working chrome. + #[serde(default)] + pub visible_working: bool, + /// Child process has exited. + #[serde(default)] + pub process_exited: bool, + /// Agent root handed the foreground process group back to a shell-like process. + #[serde(default)] + pub foreground_returned_to_shell: bool, + /// Hook report was found stale and cleared. + #[serde(default)] + pub stale_report: bool, + /// Active descendant/subagent count reported by runtime hooks or bridge reporters. + #[serde(default)] + pub subagents_active: u32, + /// Monotonic revision counter; incremented on every state change. + pub revision: u64, + /// Last revision acknowledged by the operator (seen). + pub last_seen_revision: u64, +} + +impl Default for AgentStatusReport { + fn default() -> Self { + Self { + raw_state: AgentRawState::Unknown, + source: AgentStatusSource::None, + confidence: AgentStatusConfidence::Unknown, + detected_agent: None, + foreground_pgid: None, + visible_blocker: false, + visible_idle: false, + visible_working: false, + process_exited: false, + foreground_returned_to_shell: false, + stale_report: false, + subagents_active: 0, + revision: 0, + last_seen_revision: 0, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn agent_raw_state_labels() { + assert_eq!(AgentRawState::Unknown.label(), "unknown"); + assert_eq!(AgentRawState::Working.label(), "working"); + assert_eq!(AgentRawState::Blocked.label(), "blocked"); + assert_eq!(AgentRawState::Idle.label(), "idle"); + } + + #[test] + fn agent_status_confidence_ordering() { + assert!(AgentStatusConfidence::Authoritative > AgentStatusConfidence::Strong); + assert!(AgentStatusConfidence::Strong > AgentStatusConfidence::Weak); + assert!(AgentStatusConfidence::Weak > AgentStatusConfidence::Unknown); + } + + #[test] + fn agent_status_report_default_is_unknown() { + let r = AgentStatusReport::default(); + assert_eq!(r.raw_state, AgentRawState::Unknown); + assert_eq!(r.confidence, AgentStatusConfidence::Unknown); + assert!(!r.visible_blocker); + assert!(!r.visible_working); + assert_eq!(r.subagents_active, 0); + } + + #[test] + fn agent_status_report_roundtrips_json() { + let report = AgentStatusReport { + raw_state: AgentRawState::Working, + source: AgentStatusSource::Reported { + source_id: "claude-hook".to_owned(), + }, + confidence: AgentStatusConfidence::Authoritative, + detected_agent: Some("claude".to_owned()), + foreground_pgid: Some(1234), + visible_working: true, + foreground_returned_to_shell: true, + subagents_active: 2, + revision: 42, + last_seen_revision: 40, + ..Default::default() + }; + let json = serde_json::to_string(&report).unwrap(); + let decoded: AgentStatusReport = serde_json::from_str(&json).unwrap(); + assert_eq!(decoded.raw_state, AgentRawState::Working); + assert_eq!(decoded.confidence, AgentStatusConfidence::Authoritative); + assert!(decoded.foreground_returned_to_shell); + assert_eq!(decoded.subagents_active, 2); + assert_eq!(decoded.revision, 42); + } +} diff --git a/crates/jackin-protocol/src/control.rs b/crates/jackin-protocol/src/control.rs index 468a86d59..dc86c7ba0 100644 --- a/crates/jackin-protocol/src/control.rs +++ b/crates/jackin-protocol/src/control.rs @@ -1,10 +1,9 @@ //! Control channel: length-prefixed JSON request / response messages. //! -//! Used by the host CLI for one-shot queries — `status`, `snapshot`, -//! and future `session.create` / `session.kill` / `session.title` / -//! `events`. The host opens a Unix socket connection, writes one -//! framed JSON request, reads one framed JSON response, and -//! disconnects. +//! Used by the host CLI for one-shot queries such as `status`, +//! `snapshot`, `wait_session_status`, and `session_status_explain`. +//! `events_subscribe` upgrades the same framing to a persistent event +//! stream after the first request. use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -14,6 +13,74 @@ pub enum ClientMsg { Status, /// Request the tab/pane tree snapshot. Snapshot, + /// Role-authored cooperative reporter sends raw agent state for one session. + /// Built-in runtime hooks/plugins should use `ReportRuntimeEvent` so + /// daemon-side mapping and gating remain the authority. + ReportAgentState { + session_id: u64, + source_id: String, + agent_label: String, + /// Raw state: "working", "blocked", "idle", "unknown" + raw_state: String, + /// Monotonic sequence number supplied by role-authored reporters. + seq: u64, + /// Nanoseconds since UNIX epoch. + ts_ns: u64, + #[serde(skip_serializing_if = "Option::is_none")] + message: Option, + }, + /// Reporter heartbeat — confirms source is still alive. + HeartbeatAgentAuthority { + session_id: u64, + source_id: String, + seq: u64, + }, + /// Runtime reporter releases its authority (exits or goes stale). + ClearAgentAuthority { session_id: u64, source_id: String }, + /// Runtime bridge reports descendant/subagent lifecycle. + ReportChildAgentState { + parent_session_id: u64, + child_session_id: u64, + raw_state: String, + seq: u64, + }, + /// Runtime hook/plugin forwards a vendor event for daemon-side mapping. + ReportRuntimeEvent { + session_id: u64, + source_id: String, + runtime: String, + event: String, + #[serde(skip_serializing_if = "Option::is_none")] + payload: Option, + }, + /// Subscribe to agent state change events. After this message the + /// connection becomes a persistent streaming channel. + EventsSubscribe { + #[serde(skip_serializing_if = "Option::is_none")] + subscriber_id: Option, + }, + /// Block until a session reaches one of the target statuses. + WaitSessionStatus { + session_id: u64, + target_statuses: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + timeout_ms: Option, + }, + /// Read visible pane text for debugging. + SessionReadVisible { + session_id: u64, + #[serde(skip_serializing_if = "Option::is_none")] + rows: Option, + }, + /// Read the daemon's current status diagnostic bundle for one session. + SessionStatusExplain { session_id: u64 }, + /// One-shot query for current token totals for a session. + TokenGetSession { session_id: u64 }, + /// Query the model catalog for available models. + TokenGetModels { + #[serde(skip_serializing_if = "Option::is_none")] + provider: Option, + }, /// Request the agent registry (codenames, agent types, providers, timestamps). Agents, /// Forward-compat sink for variants added by a newer peer. @@ -35,6 +102,121 @@ pub enum ServerMsg { tabs: Vec, active_tab: u32, }, + /// Pushed to subscribed clients on every effective-status change. + AgentStateChanged { + session_id: u64, + /// Raw detector state: "working", "blocked", "idle", "unknown" + #[serde(skip_serializing_if = "Option::is_none")] + raw_state: Option, + /// Effective status: "working", "blocked", "done", "idle", "unknown" + effective: String, + seen: bool, + /// Authority source description + source: String, + /// Confidence tier: "authoritative", "strong", "weak", "unknown" + #[serde(skip_serializing_if = "Option::is_none")] + confidence: Option, + /// Detected agent slug, if identified + #[serde(skip_serializing_if = "Option::is_none")] + detected_agent: Option, + /// Foreground process group ID + #[serde(skip_serializing_if = "Option::is_none")] + foreground_pgid: Option, + /// Screen detector saw an explicit approval/input prompt + #[serde(default)] + visible_blocker: bool, + /// Screen detector saw an idle prompt + #[serde(default)] + visible_idle: bool, + /// Screen detector saw active working chrome + #[serde(default)] + visible_working: bool, + /// Child process has exited + #[serde(default)] + process_exited: bool, + /// Agent root handed the foreground process group back to a shell-like process + #[serde(default)] + foreground_returned_to_shell: bool, + /// Hook report was found stale and cleared + #[serde(default)] + stale_report: bool, + /// Active descendant/subagent count reported by runtime hooks or bridge reporters. + #[serde(default)] + subagents_active: u32, + /// Monotonic sequence number assigned by the authority source. + #[serde(skip_serializing_if = "Option::is_none")] + seq: Option, + /// Nanoseconds since UNIX epoch when the event was emitted + #[serde(skip_serializing_if = "Option::is_none")] + ts_ns: Option, + revision: u64, + /// Last revision seen by the operator + #[serde(skip_serializing_if = "Option::is_none")] + last_seen_revision: Option, + #[serde(skip_serializing_if = "Option::is_none")] + reason: Option, + }, + /// A new session has been created. + SessionSpawned { + session_id: u64, + agent: Option, + label: String, + }, + /// A session has exited. + SessionExited { session_id: u64 }, + /// Token totals for a session have been updated. + TokenUsageChanged { + session_id: u64, + agent: String, + #[serde(skip_serializing_if = "Option::is_none")] + model: Option, + input_tokens: u64, + output_tokens: u64, + cache_read_tokens: u64, + cache_write_tokens: u64, + #[serde(skip_serializing_if = "Option::is_none")] + cost_usd: Option, + ts_ns: u64, + }, + /// Workspace-level roll-up status changed. + WorkspaceStatusChanged { + effective: String, + session_count: u32, + blocked_count: u32, + done_count: u32, + working_count: u32, + ts_ns: u64, + }, + /// Response to `TokenGetSession`. + TokenSessionResult { + session_id: u64, + #[serde(skip_serializing_if = "Option::is_none")] + token_usage: Option, + }, + /// Response to `TokenGetModels`. + TokenModelsResult { + provider: String, + models: Vec, + }, + /// Response to `WaitSessionStatus` — the current state at the time the wait resolved. + SessionStatusResult { + session_id: u64, + effective: String, + revision: u64, + /// `"satisfied"`, `"timeout"`, `"not_found"`. + outcome: String, + }, + /// Response to `SessionReadVisible`. + SessionVisibleText { session_id: u64, lines: Vec }, + /// Response to `SessionStatusExplain`. + SessionStatusExplain { + session_id: u64, + report: serde_json::Value, + }, + /// Welcome frame sent to every connecting client. + Welcome { jackin_protocol_version: String }, + /// Error response. + Error { code: String, message: String }, /// Agent registry: every tab ever opened in this container lifetime. AgentRegistry { records: Vec }, /// Forward-compat sink for variants added by a newer peer. @@ -42,6 +224,18 @@ pub enum ServerMsg { Unknown, } +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct TokenUsageSummary { + pub input_tokens: u64, + pub output_tokens: u64, + pub cache_read_tokens: u64, + pub cache_write_tokens: u64, + #[serde(skip_serializing_if = "Option::is_none")] + pub cost_usd: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + /// One entry in the agent registry, representing a tab that was (or is) open. /// /// Active agents have `exited_at == None`. Exited agents retain their record @@ -75,6 +269,10 @@ pub struct SessionInfo { pub agent: Option, pub state: AgentState, pub active: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub token_usage: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub agent_status_report: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -94,6 +292,8 @@ pub struct PaneSnapshot { /// `None` for shell sessions; the agent slug otherwise. pub agent: Option, pub state: AgentState, + #[serde(skip_serializing_if = "Option::is_none")] + pub agent_status_report: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] @@ -103,6 +303,10 @@ pub enum AgentState { Blocked, Done, Idle, + /// State not yet determined. Safer default than `Blocked` when no + /// reliable signal is available. Phase 1 arbitration will replace this + /// with a real detection result. + Unknown, } impl AgentState { @@ -112,6 +316,7 @@ impl AgentState { Self::Blocked => "blocked", Self::Done => "done", Self::Idle => "idle", + Self::Unknown => "unknown", } } } diff --git a/crates/jackin-protocol/src/lib.rs b/crates/jackin-protocol/src/lib.rs index b732971d9..d997e8d95 100644 --- a/crates/jackin-protocol/src/lib.rs +++ b/crates/jackin-protocol/src/lib.rs @@ -7,6 +7,7 @@ //! small constants that name the host↔Capsule runtime contract live //! here too so the two binaries cannot drift. +pub mod agent_status; pub mod control; pub mod provider_adapter; diff --git a/crates/jackin-runtime/src/runtime/image.rs b/crates/jackin-runtime/src/runtime/image.rs index 4e0e7cdc4..53959695d 100644 --- a/crates/jackin-runtime/src/runtime/image.rs +++ b/crates/jackin-runtime/src/runtime/image.rs @@ -54,7 +54,13 @@ pub(super) async fn prepare_runtime_binaries( let agent_futures = agents.into_iter().map(|agent| async move { match jackin_image::agent_binary::ensure_available(paths, agent).await { Ok(binary) => { - Ok::<_, anyhow::Error>((binary.agent, AgentInstall::Prefetched(binary.path))) + Ok::<_, anyhow::Error>(( + binary.agent, + AgentInstall::Prefetched { + source: binary.path, + version: Some(binary.version), + }, + )) } Err(error) => { jackin_diagnostics::emit_compact_line( diff --git a/crates/jackin-runtime/src/runtime/snapshot.rs b/crates/jackin-runtime/src/runtime/snapshot.rs index 3090d9a7c..4bf332192 100644 --- a/crates/jackin-runtime/src/runtime/snapshot.rs +++ b/crates/jackin-runtime/src/runtime/snapshot.rs @@ -135,6 +135,7 @@ fn fetch_snapshot_inner(path: &Path) -> Result { bail!("daemon replied with AgentRegistry; expected Snapshot") } ServerMsg::Unknown => bail!("daemon replied with an unknown ServerMsg variant"), + _ => bail!("daemon replied with unexpected message type; expected Snapshot"), } } diff --git a/crates/jackin-term/src/grid.rs b/crates/jackin-term/src/grid.rs index a8091666e..92fb48545 100644 --- a/crates/jackin-term/src/grid.rs +++ b/crates/jackin-term/src/grid.rs @@ -1070,6 +1070,16 @@ impl DamageGrid { self.passthrough .push(PassthroughEvent::CwdChanged(uri.to_owned())); } + (Some(9), Some("4")) => { + let payload: String = params + .get(2..) + .unwrap_or_default() + .iter() + .filter_map(|b| std::str::from_utf8(b).ok()) + .collect::>() + .join(";"); + self.passthrough.push(PassthroughEvent::Progress(payload)); + } (Some(9), Some(msg)) => { self.passthrough .push(PassthroughEvent::Notification(msg.to_owned())); diff --git a/crates/jackin-term/src/grid/perform.rs b/crates/jackin-term/src/grid/perform.rs index 55b7cc620..a2f5db168 100644 --- a/crates/jackin-term/src/grid/perform.rs +++ b/crates/jackin-term/src/grid/perform.rs @@ -37,8 +37,8 @@ impl vte::Perform for DamageGrid { let next_tab = ((self.cursor_col / 8) + 1) * 8; self.cursor_col = next_tab.min(self.cols.saturating_sub(1)); } - // BEL — ignore. - 0x07 => {} + // BEL — attention edge for consumers that retain terminal signals. + 0x07 => self.passthrough.push(PassthroughEvent::Bell), _ => {} } } diff --git a/crates/jackin-term/src/passthrough.rs b/crates/jackin-term/src/passthrough.rs index ee58fd4de..7aca722a8 100644 --- a/crates/jackin-term/src/passthrough.rs +++ b/crates/jackin-term/src/passthrough.rs @@ -10,6 +10,8 @@ /// These events are the typed output side of the PTY byte parser. #[derive(Debug, Clone, PartialEq, Eq)] pub enum PassthroughEvent { + /// BEL / terminal bell. + Bell, /// OSC 0 / OSC 2: window title change. TitleChanged(String), /// OSC 1: window icon name change. @@ -22,6 +24,8 @@ pub enum PassthroughEvent { CwdChanged(String), /// OSC 9 / OSC 99: desktop notification. Notification(String), + /// OSC 9;4 progress state. Carries the payload after `9;4;`. + Progress(String), /// CSI `?2026h` / `?2026l`: synchronized output enable/disable. SynchronizedOutput(bool), /// CSI `?1h` / `?1l`: application cursor keys mode. @@ -64,12 +68,14 @@ impl PassthroughEvent { #[must_use] pub fn encode(&self) -> Option> { match self { + Self::Bell => Some(vec![0x07]), // OSC sequences — use BEL terminator (ST `\x07` is widely supported). Self::TitleChanged(title) => Some(format!("\x1b]0;{title}\x07").into_bytes()), Self::IconNameChanged(name) => Some(format!("\x1b]1;{name}\x07").into_bytes()), Self::ClipboardWrite(payload) => Some(format!("\x1b]52;{payload}\x07").into_bytes()), Self::CwdChanged(uri) => Some(format!("\x1b]7;{uri}\x07").into_bytes()), Self::Notification(msg) => Some(format!("\x1b]9;{msg}\x07").into_bytes()), + Self::Progress(payload) => Some(format!("\x1b]9;4;{payload}\x07").into_bytes()), // DEC private mode toggles. Self::SynchronizedOutput(on) => Some(if *on { b"\x1b[?2026h".to_vec() diff --git a/crates/jackin-tui/src/lib.rs b/crates/jackin-tui/src/lib.rs index 87bf94f51..36de3cb63 100644 --- a/crates/jackin-tui/src/lib.rs +++ b/crates/jackin-tui/src/lib.rs @@ -159,6 +159,10 @@ pub const LINK_FG_HOVER: Rgb = Rgb::new(130, 240, 240); /// a `--debug` run. Readable on the white status-bar band. pub const DEBUG_AMBER: Rgb = Rgb::new(204, 92, 0); +/// Amber — used for the Stuck tab glyph and token rate bar below 20% threshold. +/// `#ffaa00` +pub const AMBER: Rgb = Rgb::new(255, 170, 0); + /// Neutral gray for unfocused chrome borders — the in-container multiplexer's /// inactive pane border and the host's full-screen non-interactive frames /// (the launch cockpit box, the exit summary box) so chrome reads identically @@ -250,6 +254,7 @@ pub mod ansi { (80, 80, 80) => "\x1b[38;2;80;80;80m", (255, 255, 255) => "\x1b[38;2;255;255;255m", (0, 0, 0) => "\x1b[38;2;0;0;0m", + (255, 170, 0) => "\x1b[38;2;255;170;0m", (180, 255, 180) => "\x1b[38;2;180;255;180m", // ACTION_ACCENT _ => "", } diff --git a/crates/jackin/Cargo.toml b/crates/jackin/Cargo.toml index 1f89cc205..c16aa1baf 100644 --- a/crates/jackin/Cargo.toml +++ b/crates/jackin/Cargo.toml @@ -54,6 +54,7 @@ clap_mangen = "0.3" tokio = { version = "1", features = ["rt", "process", "io-util", "sync", "macros", "time", "fs"] } futures-util = "0.3" fs2 = "0.4" +rustls = { version = "0.23", default-features = false, features = ["aws_lc_rs"] } [build-dependencies] jackin-build-meta = { version = "0.6.0-dev", path = "../jackin-build-meta" } diff --git a/crates/jackin/src/bin/build_jackin_capsule.rs b/crates/jackin/src/bin/build_jackin_capsule.rs index fa73996e9..61e0f15ba 100644 --- a/crates/jackin/src/bin/build_jackin_capsule.rs +++ b/crates/jackin/src/bin/build_jackin_capsule.rs @@ -52,6 +52,8 @@ use jackin::paths::JackinPaths; const MANIFEST_DIR: &str = env!("CARGO_MANIFEST_DIR"); fn main() -> Result<()> { + jackin::install_default_tls_provider(); + let Args { arch, export, diff --git a/crates/jackin/src/console/tui/input/list/tests.rs b/crates/jackin/src/console/tui/input/list/tests.rs index 0fbbecc94..1c604f92c 100644 --- a/crates/jackin/src/console/tui/input/list/tests.rs +++ b/crates/jackin/src/console/tui/input/list/tests.rs @@ -191,6 +191,7 @@ fn live_snapshot() -> crate::runtime::snapshot::InstanceSnapshot { label: "Codex".into(), agent: Some("codex".into()), state: jackin_protocol::control::AgentState::Idle, + agent_status_report: None, }], }], active_tab: 0, diff --git a/crates/jackin/src/lib.rs b/crates/jackin/src/lib.rs index f23190904..2ed606579 100644 --- a/crates/jackin/src/lib.rs +++ b/crates/jackin/src/lib.rs @@ -47,3 +47,10 @@ pub mod tui; pub mod workspace; pub use app::run; + +#[doc(hidden)] +pub fn install_default_tls_provider() { + match rustls::crypto::aws_lc_rs::default_provider().install_default() { + Ok(()) | Err(_) => {} + } +} diff --git a/crates/jackin/src/main.rs b/crates/jackin/src/main.rs index 3010753d6..7fa9b15ae 100644 --- a/crates/jackin/src/main.rs +++ b/crates/jackin/src/main.rs @@ -11,6 +11,8 @@ use jackin::cli::role::ConsoleArgs; #[tokio::main(flavor = "current_thread")] async fn main() { + jackin::install_default_tls_provider(); + let cli = Cli::parse(); let debug = cli.debug; diff --git a/deny.toml b/deny.toml index 645d19667..a997894e3 100644 --- a/deny.toml +++ b/deny.toml @@ -49,6 +49,7 @@ exceptions = [ { crate = "potential_utf@0.1.5", allow = ["Unicode-3.0"] }, { crate = "r-efi@5.3.0", allow = ["MIT", "Apache-2.0"] }, { crate = "r-efi@6.0.0", allow = ["MIT", "Apache-2.0"] }, + { crate = "ring@0.17.14", allow = ["Apache-2.0", "ISC"] }, { crate = "rustix@1.1.4", allow = ["Apache-2.0 WITH LLVM-exception", "Apache-2.0", "MIT"] }, { crate = "rustls@0.23.40", allow = ["Apache-2.0", "ISC", "MIT"] }, { crate = "rustls-native-certs@0.8.4", allow = ["Apache-2.0", "ISC", "MIT"] }, @@ -69,6 +70,8 @@ exceptions = [ { crate = "wasip2@1.0.3+wasi-0.2.9", allow = ["Apache-2.0 WITH LLVM-exception", "Apache-2.0", "MIT"] }, { crate = "wasip3@0.4.0+wasi-0.3.0-rc-2026-01-06", allow = ["Apache-2.0 WITH LLVM-exception", "Apache-2.0", "MIT"] }, { crate = "webpki-root-certs@1.0.7", allow = ["CDLA-Permissive-2.0"] }, + { crate = "webpki-roots@0.26.11", allow = ["CDLA-Permissive-2.0"] }, + { crate = "webpki-roots@1.0.7", allow = ["CDLA-Permissive-2.0"] }, { crate = "winapi-util@0.1.11", allow = ["Unlicense", "MIT"] }, { crate = "wit-bindgen@0.51.0", allow = ["Apache-2.0 WITH LLVM-exception", "Apache-2.0", "MIT"] }, { crate = "wit-bindgen@0.57.1", allow = ["Apache-2.0 WITH LLVM-exception", "Apache-2.0", "MIT"] }, diff --git a/docker/runtime/agent-status/hooks/amp/plugin.js b/docker/runtime/agent-status/hooks/amp/plugin.js new file mode 100644 index 000000000..de4b672e2 --- /dev/null +++ b/docker/runtime/agent-status/hooks/amp/plugin.js @@ -0,0 +1,16 @@ +const { spawnSync } = require("child_process"); + +function report(event, payload = {}) { + if (!process.env.JACKIN_SESSION_ID) return; + spawnSync("/jackin/runtime/jackin-capsule", ["report-event", "--event", event, "--payload-stdin"], { + input: JSON.stringify(payload), + stdio: ["pipe", "ignore", "ignore"], + }); +} + +module.exports = { + "agent.start": (payload) => report("agent.start", payload), + "tool.call": (payload) => report("tool.call", payload), + "tool.result": (payload) => report("tool.result", payload), + "agent.end": (payload) => report("agent.end", payload), +}; diff --git a/docker/runtime/agent-status/hooks/claude/report-hook.sh b/docker/runtime/agent-status/hooks/claude/report-hook.sh new file mode 100755 index 000000000..dfe39ff1e --- /dev/null +++ b/docker/runtime/agent-status/hooks/claude/report-hook.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# Claude Code hook reporter for jackin' agent runtime status. +# +# Keep this as a dumb pipe: Claude sends the hook payload on stdin, and the +# capsule daemon owns event mapping/gating. + +set -eu + +CAPSULE="/jackin/runtime/jackin-capsule" + +if [ -z "${JACKIN_SESSION_ID:-}" ] || [ ! -x "$CAPSULE" ]; then + exit 0 +fi + +"$CAPSULE" report-event "$@" --payload-stdin 2>/dev/null || true + +# PermissionRequest is synchronous in Claude Code. Observability must not block +# the agent; it only acknowledges that the hook completed. +printf '{"continue":true}' diff --git a/docker/runtime/agent-status/hooks/codex/report-hook.sh b/docker/runtime/agent-status/hooks/codex/report-hook.sh new file mode 100755 index 000000000..83582e3aa --- /dev/null +++ b/docker/runtime/agent-status/hooks/codex/report-hook.sh @@ -0,0 +1,11 @@ +#!/bin/sh +set -eu + +CAPSULE="/jackin/runtime/jackin-capsule" + +if [ -z "${JACKIN_SESSION_ID:-}" ] || [ ! -x "$CAPSULE" ]; then + exit 0 +fi + +"$CAPSULE" report-event "$@" --payload-stdin 2>/dev/null || true +exit 0 diff --git a/docker/runtime/agent-status/hooks/opencode/plugin.js b/docker/runtime/agent-status/hooks/opencode/plugin.js new file mode 100644 index 000000000..f1352fcec --- /dev/null +++ b/docker/runtime/agent-status/hooks/opencode/plugin.js @@ -0,0 +1,19 @@ +const { spawnSync } = require("child_process"); + +function report(event, payload = {}) { + if (!process.env.JACKIN_SESSION_ID) return; + spawnSync("/jackin/runtime/jackin-capsule", ["report-event", "--event", event, "--payload-stdin"], { + input: JSON.stringify(payload), + stdio: ["pipe", "ignore", "ignore"], + }); +} + +module.exports = { + "session.status": (payload) => report("session.status", payload), + "session.idle": (payload) => report("session.idle", payload), + "session.error": (payload) => report("session.error", payload), + "permission.asked": (payload) => report("permission.asked", payload), + "permission.replied": (payload) => report("permission.replied", payload), + "tool.execute.before": (payload) => report("tool.execute.before", payload), + "tool.execute.after": (payload) => report("tool.execute.after", payload), +}; diff --git a/docker/runtime/agent-status/hooks/opencode/report-hook.sh b/docker/runtime/agent-status/hooks/opencode/report-hook.sh new file mode 100755 index 000000000..83582e3aa --- /dev/null +++ b/docker/runtime/agent-status/hooks/opencode/report-hook.sh @@ -0,0 +1,11 @@ +#!/bin/sh +set -eu + +CAPSULE="/jackin/runtime/jackin-capsule" + +if [ -z "${JACKIN_SESSION_ID:-}" ] || [ ! -x "$CAPSULE" ]; then + exit 0 +fi + +"$CAPSULE" report-event "$@" --payload-stdin 2>/dev/null || true +exit 0 diff --git a/docker/runtime/agent-status/packs/amp.toml b/docker/runtime/agent-status/packs/amp.toml new file mode 100644 index 000000000..aac43d2b3 --- /dev/null +++ b/docker/runtime/agent-status/packs/amp.toml @@ -0,0 +1,35 @@ +schema_version = 1 +agent = "amp" +validated_versions = "=0.0.1781166336-g02cc1e" + +[[rule]] +id = "approval-prompt" +state = "blocked" +priority = 900 +region = "bottom_nonempty:12" +strength = "strong" +requires_all = ["amp wants to execute", "allow"] + +[[rule]] +id = "running-tool" +state = "working" +priority = 700 +region = "bottom:24" +strength = "strong" +requires_all = ["running tool:"] + +[[rule]] +id = "ready-text" +state = "idle" +priority = 500 +region = "bottom:24" +strength = "strong" +requires_all = ["ready for your next task"] + +[[rule]] +id = "arrow-prompt" +state = "idle" +priority = 490 +region = "last_nonempty_line" +strength = "strong" +requires_all = [">"] diff --git a/docker/runtime/agent-status/packs/claude.toml b/docker/runtime/agent-status/packs/claude.toml new file mode 100644 index 000000000..94a33f02d --- /dev/null +++ b/docker/runtime/agent-status/packs/claude.toml @@ -0,0 +1,44 @@ +schema_version = 1 +agent = "claude" +validated_versions = ">=2.1.173, <2.2.0" + +[[rule]] +id = "transcript-overlay" +state = "freeze" +priority = 1000 +region = "bottom:24" +strength = "strong" +requires_all = ["showing detailed transcript", "ctrl+o to toggle"] + +[[rule]] +id = "permission-dialog" +state = "blocked" +priority = 900 +region = "bottom_nonempty:12" +strength = "strong" +requires_all = ["enter to select", "esc to cancel"] +requires_any = ["to navigate", "↑/↓"] + +[[rule]] +id = "working-interrupt" +state = "working" +priority = 700 +region = "bottom:24" +strength = "strong" +requires_all = ["esc to interrupt"] + +[[rule]] +id = "working-token-stats" +state = "working" +priority = 690 +region = "bottom:24" +strength = "strong" +requires_all = ["tokens)"] + +[[rule]] +id = "prompt-box" +state = "idle" +priority = 500 +region = "bottom_nonempty:12" +strength = "strong" +requires_all = ["╭", "│ >"] diff --git a/docker/runtime/agent-status/packs/codex.toml b/docker/runtime/agent-status/packs/codex.toml new file mode 100644 index 000000000..3b667a9d5 --- /dev/null +++ b/docker/runtime/agent-status/packs/codex.toml @@ -0,0 +1,41 @@ +schema_version = 1 +agent = "codex" +validated_versions = ">=0.139.0, <0.140.0" + +[[rule]] +id = "confirm-prompt" +state = "blocked" +priority = 900 +region = "bottom:24" +strength = "strong" +requires_all = ["press enter to confirm or esc to cancel"] + +[[rule]] +id = "auth-prompt" +state = "blocked" +priority = 850 +region = "bottom:24" +strength = "strong" +requires_all = ["press enter to continue"] +requires_any = [ + "sign in with chatgpt", + "signinwithchatgpt", + "provide your own api key", + "provideyourownapikey", +] + +[[rule]] +id = "working-block" +state = "working" +priority = 700 +region = "bottom:24" +strength = "strong" +requires_all = ["• working ("] + +[[rule]] +id = "idle-prompt" +state = "idle" +priority = 500 +region = "last_nonempty_line" +strength = "strong" +requires_all = ["›"] diff --git a/docker/runtime/agent-status/packs/kimi.toml b/docker/runtime/agent-status/packs/kimi.toml new file mode 100644 index 000000000..70b85be34 --- /dev/null +++ b/docker/runtime/agent-status/packs/kimi.toml @@ -0,0 +1,35 @@ +schema_version = 1 +agent = "kimi" +validated_versions = ">=0.14.0, <0.15.0" + +[[rule]] +id = "permission-prompt" +state = "blocked" +priority = 900 +region = "bottom:24" +strength = "strong" +requires_all = ["kimi wants permission", "approve"] + +[[rule]] +id = "thinking" +state = "working" +priority = 700 +region = "bottom:24" +strength = "strong" +requires_any = ["thinking...", "processing..."] + +[[rule]] +id = "ready-text" +state = "idle" +priority = 500 +region = "bottom:24" +strength = "strong" +requires_all = ["kimi ready"] + +[[rule]] +id = "arrow-prompt" +state = "idle" +priority = 490 +region = "last_nonempty_line" +strength = "strong" +requires_all = [">"] diff --git a/docker/runtime/agent-status/packs/opencode.toml b/docker/runtime/agent-status/packs/opencode.toml new file mode 100644 index 000000000..4fe929d2a --- /dev/null +++ b/docker/runtime/agent-status/packs/opencode.toml @@ -0,0 +1,27 @@ +schema_version = 1 +agent = "opencode" +validated_versions = ">=1.17.3, <1.18.0" + +[[rule]] +id = "permission-ui" +state = "blocked" +priority = 900 +region = "bottom_nonempty:12" +strength = "strong" +requires_all = ["permission required", "enter", "select"] + +[[rule]] +id = "processing" +state = "working" +priority = 700 +region = "bottom:24" +strength = "strong" +requires_all = ["processing...", "ctrl+c to cancel"] + +[[rule]] +id = "ready-text" +state = "idle" +priority = 500 +region = "bottom:24" +strength = "strong" +requires_all = ["opencode ready"] diff --git a/docker/runtime/entrypoint.sh b/docker/runtime/entrypoint.sh index d7e373ddf..1abf9bd98 100644 --- a/docker/runtime/entrypoint.sh +++ b/docker/runtime/entrypoint.sh @@ -31,6 +31,13 @@ run_hook() { # values. /jackin/runtime/jackin-capsule runtime-setup +# ── agent runtime status env ─────────────────────────────────────────── +# JACKIN_SESSION_ID is set by the daemon before spawning. Export remaining +# status vars so hook scripts and subprocesses inherit them. +export JACKIN_STATUS_SOCKET="${JACKIN_STATUS_SOCKET:-/jackin/run/jackin.sock}" +export JACKIN_STATUS_SOURCE="${JACKIN_STATUS_SOURCE:-wrapper-${JACKIN_SESSION_ID:-0}}" +export JACKIN_AGENT_RUNTIME="${JACKIN_AGENT:-unknown}" + # ── agent-specific setup ─────────────────────────────────────────── # # Per-session file setup already ran in `jackin-capsule runtime-setup`. diff --git a/docs/content/docs/commands/console.mdx b/docs/content/docs/commands/console.mdx index b855e0277..add5a3526 100644 --- a/docs/content/docs/commands/console.mdx +++ b/docs/content/docs/commands/console.mdx @@ -31,7 +31,7 @@ The console opens directly to a workspace manager list — no separate workspace - **Launch an agent into a workspace.** Each workspace row opens the role picker in the sidebar, even when there is only one eligible role. Confirm the highlighted role to launch. If the role supports multiple agents and the workspace has no `default_agent`, the console asks for the runtime next. - **Use your current directory as a workspace.** A synthetic row at the top of the manager list mounts your `cwd` at the same path inside the container. Useful for quick one-off launches without saving a workspace definition. -- **Navigate running instances in the workspace tree.** Workspace rows show a `*` indicator when containers are active. Press `→` to expand the tree and reveal instance child rows — each shows the instance ID, role, agent runtime, and status. Navigate to an instance row and the right panel switches to a session pane showing the live tab/pane tree when the Capsule daemon is reachable, with recorded sessions as the fallback. From an instance row, `Enter` reconnects, `N` opens an agent picker to start a new session, `X` opens an ephemeral zsh shell, `T` stops the running container (preserving local recovery state), and `P` ejects then purges everything (containers, network, certs volume, AND local recovery state) after a confirmation prompt. Press `←` from an instance row to collapse the tree and return to the workspace row. The workspace detail pane shows a compact running-instances badge when the tree is collapsed. From any workspace row, `r` reconnects or recovers, `a` starts another agent session, `x` opens a shell, `i` prints a read-only state report, and `p` purges. +- **Navigate running instances in the workspace tree.** Workspace rows show a `*` indicator when containers are active. Press `→` to expand the tree and reveal instance child rows — each shows the instance ID, role, agent runtime, and status. Navigate to an instance row and the right panel switches to a session pane showing the live tab/pane tree when the Capsule daemon is reachable, with recorded sessions as the fallback. Session rows and tab badges use the same public states as the in-container multiplexer: `working`, `blocked`, `done`, `idle`, and `unknown`; `done` clears to `idle` once you view the pane. From an instance row, `Enter` reconnects, `N` opens an agent picker to start a new session, `X` opens an ephemeral zsh shell, `T` stops the running container (preserving local recovery state), and `P` ejects then purges everything (containers, network, certs volume, AND local recovery state) after a confirmation prompt. Press `←` from an instance row to collapse the tree and return to the workspace row. The workspace detail pane shows a compact running-instances badge when the tree is collapsed. From any workspace row, `r` reconnects or recovers, `a` starts another agent session, `x` opens a shell, `i` prints a read-only state report, and `p` purges. - **Open the workspace editor.** Edit the workspace's roles, environment variables, and authentication settings. Each editor surface lives on its own tab inside the editor. - **Open Settings.** Manage operator-level `jackin config` surfaces from the `Mounts`, `Environments`, `Auth`, and `Trust` tabs. diff --git a/docs/content/docs/reference/capsule/index.mdx b/docs/content/docs/reference/capsule/index.mdx index 68d1ffb51..ebaddd63d 100644 --- a/docs/content/docs/reference/capsule/index.mdx +++ b/docs/content/docs/reference/capsule/index.mdx @@ -51,7 +51,7 @@ does not imply that every pane is the same agent. **Attach client mode (PID ≠ 1).** When the operator runs `docker exec -it jackin-capsule`, the binary connects to the daemon socket, bridges the operator's terminal to the active session, and forwards attach frames between the terminal and daemon. -**Runtime setup subcommand (PID ≠ 1).** `/jackin/runtime/entrypoint.sh` calls `jackin-capsule runtime-setup` before each agent exec. The subcommand performs idempotent container setup (git identity, GitHub HTTPS rewrites, and the `gh` credential helper), installs the shared git trailer hook on demand, and refreshes per-agent home/auth handoff state. The hook at `/jackin/state/git-hooks/prepare-commit-msg` is a symlink back to `/jackin/runtime/jackin-capsule`, so Git invokes Rust code directly for `Co-authored-by` and `Signed-off-by` normalization instead of a shell script. The container init marker lives under `/jackin/state`, so global git/GitHub setup runs once per running container; per-session auth handoff still refreshes before every agent launch. +**Runtime setup subcommand (PID ≠ 1).** `/jackin/runtime/entrypoint.sh` calls `jackin-capsule runtime-setup` before each agent exec. The subcommand performs idempotent container setup (git identity, GitHub HTTPS rewrites, and the `gh` credential helper), installs the shared git trailer hook on demand, refreshes per-agent home/auth handoff state, and repairs container-local agent status reporter config. For Codex, that includes `~/.codex/hooks.json` plus `[tui]` notification settings in `~/.codex/config.toml` so `agent-turn-complete` and `approval-requested` reach the capsule as OSC 9 notifications. The hook at `/jackin/state/git-hooks/prepare-commit-msg` is a symlink back to `/jackin/runtime/jackin-capsule`, so Git invokes Rust code directly for `Co-authored-by` and `Signed-off-by` normalization instead of a shell script. The container init marker lives under `/jackin/state`, so global git/GitHub setup runs once per running container; per-session auth handoff still refreshes before every agent launch. Those modes deliberately keep ownership narrow: @@ -92,8 +92,16 @@ Current control messages: |---|---|---| | `{"type":"status"}` | `{"type":"session_list","sessions":[...]}` | `jackin-capsule status`, host session inventory, `hardline --inspect` | | `{"type":"snapshot"}` | `{"type":"snapshot","tabs":[...],"active_tab":N}` | `jackin console` live preview and pane picker | +| `{"type":"wait_session_status",...}` | `{"type":"session_status_result",...}` | Scripts that block until a session reaches one of the requested public states | +| `{"type":"session_read_visible",...}` | `{"type":"session_visible",...}` | Diagnostic tooling that needs the current visible grid for one pane | +| `{"type":"session_status_explain",...}` | `{"type":"session_status_explain",...}` | `jackin-capsule status explain` and `status capture` | +| `{"type":"report_runtime_event",...}` | one-way forwarded to the daemon loop | Agent hook and plugin reporters | +| `{"type":"report_child_agent_state",...}` | one-way forwarded to the daemon loop | Bridge reporters that roll descendant agent work into a parent pane | +| `{"type":"events_subscribe",...}` | persistent stream of `ServerMsg` events | Host daemons and tools that need live session, token, and workspace status changes | -`SessionInfo` carries `id`, `label`, optional `agent`, public `state`, and `active`. `TabSnapshot` carries render-order tabs, each tab's focused pane id, and one `PaneSnapshot` per pane leaf. `PaneSnapshot` carries `session_id`, label, optional agent slug, and state. The public states are `working`, `blocked`, `done`, and `idle`; today they are derived inside `crates/jackin-capsule/src/session.rs`, with the fuller state authority tracked in [Agent runtime status authority](/reference/roadmap/agent-runtime-status/). +`SessionInfo` carries `id`, `label`, optional `agent`, public `state`, and `active`. `TabSnapshot` carries render-order tabs, each tab's focused pane id, and one `PaneSnapshot` per pane leaf. `PaneSnapshot` carries `session_id`, label, optional agent slug, public state, and an `AgentStatusReport` when the daemon has evidence. The report exposes the raw detector state, source, confidence, detected agent, foreground pgid, visible-screen booleans, `process_exited`, `foreground_returned_to_shell`, `stale_report`, `subagents_active`, and revision fields. The public states are `working`, `blocked`, `done`, `idle`, and `unknown`; `stuck` is a diagnostic overlay exposed in `status explain`, not a replacement state. Raw detector states are `unknown`, `working`, `blocked`, and `idle`; `done` is derived when raw idle returns after work and the pane has not yet been acknowledged by focus or attach. The authority is implemented by , with the evidence snapshot in , arbitration in , debounce/watchdog policy in , and rule-pack engine in . Rule packs load from embedded fallbacks, then `/jackin/runtime/agent-status/packs/`, then the operator override directory selected by `JACKIN_STATUS_PACK_DIR` or `$HOME/.jackin/agent-status/packs/`. + +`EventsSubscribe` keeps the control connection open after the initial request and sends length-prefixed `ServerMsg` frames. The live event stream includes `AgentStateChanged` for per-session effective-state transitions, `SessionSpawned`, `SessionExited`, `TokenUsageChanged`, and `WorkspaceStatusChanged`. `WorkspaceStatusChanged` is emitted when the daemon's aggregate status snapshot changes; it carries the rolled-up `effective` status plus `session_count`, `blocked_count`, `done_count`, and `working_count`. The roll-up precedence is `blocked > done > working > idle > unknown`, matching the status authority contract used by console badges and future host-daemon consumers. Current attach frames: @@ -390,7 +398,7 @@ First build takes 2–3 minutes; subsequent builds are incremental via cargo's d | | Adds the per-instance socket directory mount to `docker run`; connects via `docker exec jackin-capsule` | | | Queries socket for session list; spawns new sessions via `jackin-capsule new` | | | Queries socket to detect live sessions before cleanup | -| [Agent runtime status authority roadmap item](/reference/roadmap/agent-runtime-status/) | Defines the in-container state authority and consumes agent-state events from the socket once the event stream is implemented | +| [Agent runtime status authority roadmap item](/reference/roadmap/agent-runtime-status/) | Design record and regression checklist for the shipped in-container state authority | | [jackin' daemon roadmap item](/reference/roadmap/jackin-daemon/) | The daemon will subscribe to each container's socket and maintain a cross-instance session index | | [Multiplexer Design Rules](/reference/capsule/multiplexer-design-rules/) | Passthrough contract, input routing rules, verification checklist | diff --git a/docs/content/docs/reference/roadmap/agent-attention-prompts.mdx b/docs/content/docs/reference/roadmap/agent-attention-prompts.mdx index 721674e0d..3b905a794 100644 --- a/docs/content/docs/reference/roadmap/agent-attention-prompts.mdx +++ b/docs/content/docs/reference/roadmap/agent-attention-prompts.mdx @@ -25,7 +25,7 @@ The end state, from the operator's seat: ## How jackin' knows "I'm waiting" -Attention prompts do not own state detection. They consume the [agent runtime status authority](/reference/roadmap/agent-runtime-status/), which runs inside `jackin-capsule` and arbitrates semantic runtime reports, foreground-process ownership, visible-screen signals, shell markers, cursor/readiness probes, and weak output activity. A notification fires only after that authority produces an effective `blocked` or derived `done` transition. +Attention prompts do not own state detection. They consume the [agent runtime status authority](/reference/roadmap/agent-runtime-status/), which runs inside `jackin-capsule` and arbitrates semantic runtime reports, foreground-process ownership, visible-screen signals, shell markers, OSC evidence, process evidence, and output recency as watchdog input. A notification fires only after that authority produces an effective `blocked` or derived `done` transition. This avoids the failure mode where a notification feature invents its own "no output for N seconds means waiting" heuristic. Silence is evidence, not truth. The state authority is the single place that decides whether a quiet agent is blocked, still working, idle, done, unknown, or stuck. diff --git a/docs/content/docs/reference/roadmap/agent-orchestrator-research.mdx b/docs/content/docs/reference/roadmap/agent-orchestrator-research.mdx index 48e0008b3..ec3e628ce 100644 --- a/docs/content/docs/reference/roadmap/agent-orchestrator-research.mdx +++ b/docs/content/docs/reference/roadmap/agent-orchestrator-research.mdx @@ -213,7 +213,7 @@ present in jackin' today; **planned** means a roadmap item already exists; | Mount kinds beyond `shared`/`readonly` | `writable`, `readable`, `isolated`, `tmpfs` | Partial — `shared` + `readonly`; *gap on `tmpfs` and ephemeral, see [Ephemeral mount modes](/reference/roadmap/ephemeral-mount-modes/)* | | Multi-provider agent runtime | OpenCode + Codex (one per session) | Partial — basic built-in runtime launch shipped; parity work remains in [multi-runtime](/reference/roadmap/multi-runtime-support/) | | Resource limits per agent | `memory-high`, `memory-max`, `cpu` (cgroups) | *Gap — see [Declarative resource limits](/reference/roadmap/declarative-resource-limits/)* | -| Live agent status (idle/busy/question) | Yes — derived from opencode SSE events | *Gap — see [Agent runtime status](/reference/roadmap/agent-runtime-status/)* | +| Live agent status (idle/busy/question) | Yes — derived from opencode SSE events | Have — shipped via the [Agent runtime status](/reference/roadmap/agent-runtime-status/) authority | | Live machine resource panel (CPU/RAM/disk) | Yes — `/proc/stat`, `/proc/meminfo`, sampled per 2s | *Gap — see [Console resource panel](/reference/roadmap/console-resource-panel/)* | | Per-agent token / cost / OOM tracking | Yes — usage aggregation service | *Gap — see [Token & Cost Telemetry](/reference/roadmap/token-cost-telemetry/)* | | Agent to operator tag protocol | Yes — `` / `` / `` | *Gap — see [Agent tag protocol](/reference/roadmap/agent-tag-protocol/)* | diff --git a/docs/content/docs/reference/roadmap/agent-runtime-status.mdx b/docs/content/docs/reference/roadmap/agent-runtime-status.mdx index 5d88f9fba..a84dbeefc 100644 --- a/docs/content/docs/reference/roadmap/agent-runtime-status.mdx +++ b/docs/content/docs/reference/roadmap/agent-runtime-status.mdx @@ -2,387 +2,438 @@ title: "Agent Runtime Status Authority" --- -**Status**: Open — design proposal (Phase 2, [Agent Orchestrator Research Program](/reference/roadmap/agent-orchestrator-research/)) +**Status**: Implemented in V1 ([Agent Orchestrator Research Program](/reference/roadmap/agent-orchestrator-research/)). This page is retained as the **design record and regression checklist** because multiple open roadmap items still link to the status authority as a load-bearing contract. The protocol surface, state vocabulary, event stream, control-channel methods, token readers, capsule badge rendering, evidence model, arbitration path, semantic reporter channel, process/OSC evidence, rule-pack engine, runtime hook/plugin assets, focus acknowledgment path, and diagnostics commands have landed. The **Current Implementation State** section is the current verification record; the **Implementation Blueprint** remains the design record and regression checklist. ## Problem -jackin' needs to know what each in-container agent is actually doing: actively working, waiting on the operator, finished but unseen, idle and reviewed, crashed, or suspiciously stuck. The current multiplexer code has the protocol labels but not the authority behind them. In , `Session::refresh_state` still reduces state to "output in the last few seconds means working; otherwise blocked." That is not good enough for a first-class agent orchestrator. A quiet agent can be thinking, waiting for network, running a long tool, sitting at an input box, or dead. Treating all quiet periods as `blocked` creates false notifications, noisy dashboards, bad queue dispatch, and weak operator trust. - -The roadmap item is the state authority jackin' needs before agent attention prompts, autonomous task queues, Desktop Agent Hub, or cost/resource correlation can be reliable. It replaces timer-only inference with a layered model that is conservative by default and precise when the runtime exposes semantic events. +jackin' needs to know what each in-container agent is actually doing: actively working, waiting on the operator, finished but unseen, idle and reviewed, crashed, or suspiciously stuck. A quiet agent can be thinking, waiting for network, running a long tool, sitting at an input box, or dead — and a noisy pane can be a blocked dialog repainting itself. Treating activity as `working` and silence as `blocked` creates false notifications, noisy dashboards, bad queue dispatch, and weak operator trust. This authority must exist before agent attention prompts, autonomous task queues, Desktop Agent Hub, and cost/resource correlation can be reliable. ## Target -jackin' should become the native, agentic terminal orchestrator: agents remain visible in real PTYs, but jackin' understands enough of their lifecycle to route attention and automation without babysitting. The operator should see one consistent status vocabulary across `jackin console`, `jackin hardline`, the daemon, Desktop Agent Hub, and scripts: +One consistent status vocabulary across `jackin console`, `jackin hardline`, the daemon, Desktop Agent Hub, and scripts: | Public status | Meaning | |---|---| -| `working` | The agent is actively generating, running tools, processing a task, or otherwise not ready for operator input. | -| `blocked` | The agent is waiting on an operator decision, approval prompt, question, auth prompt, or other interactive input. | -| `done` | The agent has returned to idle after work and the operator has not yet reviewed that pane/session. | -| `idle` | The agent is idle and already seen, or the pane is a shell/no active agent. | -| `unknown` | jackin' cannot identify a reliable state yet. Unknown is safer than pretending silence means blocked. | -| `stuck` | Diagnostic overlay for "we expected progress, but no reliable state transition happened before a timeout." This should not replace `blocked`; it is a health signal used for escalation and debugging. | - -Internally, the raw runtime detector should track `unknown | working | blocked | idle`, plus per-pane `seen` metadata. `done` is derived from raw `idle + !seen`, not reported as a raw runtime state. This matches the useful part of Herdr's model and avoids conflating "the process is idle" with "the operator has reviewed the result." - -## Research Snapshot - -This design is based on a May 28, 2026 source review of [Herdr](https://github.com/ogulcancelik/herdr) and [multicode](https://github.com/graemerocher/multicode), plus the existing jackin' Capsule runtime surface. Re-check those repositories before implementation because both projects move quickly. - -| Project | Snapshot reviewed | License | What mattered | -|---|---|---|---| -| Herdr | `ogulcancelik/herdr` commit `2d8b4e2db4642dede084801c03516046f205626b`; latest release observed as `v0.6.4` on May 27, 2026. | AGPL-3.0-or-later | Process ownership, visible-screen fallback, socket reports, central arbitration, and `done` until viewed. | -| multicode | `graemerocher/multicode` commit `4a9852b4a5ca62134e59bcc78a2dc0fb704d47b4`; pushed May 5, 2026. | Apache-2.0 | Codex app-server status, OpenCode SSE/session APIs, descendant session roll-up, and cooperative state files. | -| jackin' | Current branch state in , , and . | Apache-2.0 | Built-in agents are Claude Code, Codex, Amp, Kimi, and OpenCode; Capsule already owns the PTY, parser, socket, and runtime-local setup path. | - -Herdr is AGPL-3.0 and cannot be embedded in jackin' Apache-2.0 code. The implementation must be written from scratch. The value is the product shape and signal hierarchy, not source reuse. - -Additional referenced projects reviewed on May 28, 2026: - -| Project | Snapshot reviewed | What mattered | -|---|---|---| -| CCManager | `kbwo/ccmanager` commit `1e123a9`; MIT; latest release observed as `v4.1.18` on May 17, 2026. | `xterm/headless` screen capture, 100ms polling, Claude idle debounce, prompt-box-region parsing, false-positive handling for cursor-addressed redraws, and transition hooks. | -| Agent Session Manager | `izll/agent-session-manager` commit `7432789`; MIT; latest release observed as `v0.7.8` on January 24, 2026. | `tmux capture-pane` classifiers, waiting-over-busy priority, followed-window aggregation, and Claude prompt-box parsing. | -| WezTerm Agent Deck | `Eric162/wezterm-agent-deck` commit `0bdd442`; MIT; no release observed. | Foreground process info, child-process fallback, pane-title fallback, short detection caches, recent-line windows, and tests for stale working false positives. | -| ccmux | `skzv/ccmux` commit `d15277c`; FSL-1.1-MIT; latest release observed as `v0.1.15` on May 27, 2026. | Daemon poll loop over real tmux panes, `needs_input` transition notifications, sleep inhibition while sessions are active, and hermetic tmux e2e tests. | -| tmux-agent-status | `samleeney/tmux-agent-status` commit `efefbed`; no license metadata observed. | Hook-owned status files, per-pane roll-up, wait/park states, and regression coverage that hook-tracked done state is not reactivated by process polling. | -| agent-deck | `asheshgoplani/agent-deck` commit `519e9c5`; MIT; latest release observed as `v1.9.42` on May 27, 2026. | Claude/Gemini hook injection, hook freshness windows, fsnotify recovery, status event files, done sentinels, control-pipe revival, and scratch config ownership. | -| Codeman | `Ark0N/Codeman` commit `2cfccc7`; MIT; latest release observed as `codeman@0.7.0` on May 25, 2026. | Multi-layer idle detection, AI idle checker fallback, circuit breaker/stuck recovery, subagent visualization, and OpenCode plugin/API bridge risk analysis. | -| Codemux, claudeye, TUICommander, Zylos, VS Code, and Windows Terminal | Official docs or package docs reviewed on May 28, 2026. | Hook-driven status indicators, tmux pane overlays, session-binding environment variables, cursor-position readiness probes, and OSC shell-integration markers. | - -## Prior Art - -### Herdr - -[Herdr](https://herdr.dev/) is the strongest current public reference for this problem. Its docs describe the operator-facing feature directly: the sidebar shows blocked, working, done, and idle agents; workspaces roll up to their most urgent state; detection works through foreground process and terminal output with zero config, with a socket API for runtimes that expose hooks. Its source confirms that the important behavior is not a single regex or timer; it is the arbitration layer that decides which signal is authoritative when signals disagree. - -Concepts to borrow, not code: - -- **Process ownership first.** Herdr identifies which agent owns a pane by reading the foreground process group, then matching known agent binaries. It handles wrappers such as `node`, `bun`, `python`, and shells before deciding the real agent label. jackin' can do this more cleanly inside the container because it sees the real agent process instead of a host-side terminal wrapper. -- **Visible terminal evidence is current-screen evidence.** Herdr reads the bottom/recent terminal screen, not arbitrary scrollback. Its default detection window is the last 24 rows, which is a good starting bound for jackin' screen fixtures. -- **Per-agent visible signals are deliberately narrow.** Herdr separates `visible_blocker`, `visible_idle`, and `visible_working`. Only strong current UI states should override semantic reports. -- **Semantic reports are primary when they are fresh and consistent.** Herdr's socket reports carry source, agent label, state, message/custom status, sequence, and session references. jackin' should expose the same concept through its own Capsule control protocol, not Herdr's wire format. -- **Central arbitration is the product.** Hook authority, visible blockers, visible idle, visible working, process exit, stale reports, and sequence numbers flow through one state machine. The UI consumes the result; it does not re-detect state. -- **Done is an acknowledgement overlay.** Herdr maps raw idle plus unseen to public `done`; once the pane is viewed, it becomes `idle`. -- **Attention roll-up prioritizes attention, not activity.** Herdr's effective priority is `blocked > done > working > idle > unknown`. A finished unseen pane deserves attention before a pane that is merely busy in the background. - -Herdr's implementation also exposes concrete edge cases jackin' should design for from the start: stale hook authority after process exit, stale environment/session references, Codex install-process false positives, blocked transitions during plan/approval flows, wait commands that already start in the target state, release drift for OpenCode-like runtime UIs, and per-agent display variants such as Hermes labels. These are evidence that runtime status needs source validation, source-specific sequence numbers, process identity checks, and fixture-driven detector tests. - -### Herdr Detector Notes - -The source review found the following runtime-specific patterns worth validating against jackin' fixtures: +| `working` | Actively generating, running tools, processing — not ready for operator input. | +| `blocked` | Waiting on an operator decision: approval prompt, question, auth prompt, interactive input. | +| `done` | Returned to idle after work; the operator has not yet reviewed that pane. | +| `idle` | Idle and already seen, or a shell pane with no active agent. | +| `unknown` | No reliable evidence. Unknown is safer than pretending. | +| `stuck` | Diagnostic overlay: progress was expected, no reliable transition arrived in time. Never replaces `blocked`. | -| Agent family | Useful Herdr signal | jackin' implementation note | -|---|---|---| -| Claude Code | Permission/proceed/waiting prompts, tab/keyboard amendment prompts, chat/review/interview/selection prompts, spinner/interrupt chrome, visible prompt box idle state. | Use semantic hooks first. Screen fallback should match only visible prompt/approval regions and should not treat old output as a blocker. | -| Codex | Confirmation prompts such as "press enter to confirm or esc to cancel", yes/no prompts, "allow command?", `• Working (` status text, background terminal wait status, and `›` idle prompt. | Structured Codex app-server or hook state should win when available. Visible detection should explicitly avoid matching Codex installer/update output as an active agent. | -| OpenCode | Permission-required UI, question prompts with dismiss/enter/select affordances, and interrupt/working chrome. | OpenCode's own API/event surface is a better primary source; screen detection remains the fallback for direct PTY sessions. | -| Kimi | Approval/request prompts, approve once/session/reject choices, and moon-phase/braille thinking indicators. | Start heuristic-first unless a stable Kimi hook/plugin surface is present in the installed runtime. The existing `--yolo` launch lowers tool-approval frequency but does not remove questions/auth prompts. | -| Amp | Approval/waiting headers and `esc to cancel` working chrome. | The current `--dangerously-allow-all` launch lowers tool-approval frequency; blocked still matters for auth, questions, and runtime-level prompts. | +Raw detector states are `unknown | working | blocked | idle` plus per-pane `seen`; `done` is derived as raw `idle + !seen` and is never reported by any detector or reporter. -### multicode +**Goals**: lowest achievable false-positive rate; every wrong state debuggable from one diagnostics dump; detection keeps working when an agent vendor ships no API, breaks their API, or changes their TUI; cheap to extend to new agents. **Non-goals**: probabilistic state estimation (no field evidence it beats precedence+freshness+debounce), Windows ConPTY, LLM-provider traffic interception, autonomous recovery actions after stuck detection (this authority reports and routes attention only), per-tool/per-command status granularity, and built-in support for agents beyond the five runtimes (custom roles use the reporter contract and the `unknown` fallback). -[multicode](https://github.com/graemerocher/multicode) is the best comparison for structured runtime state. It does not solve status as a terminal-observability problem first; it asks the runtime when possible. +## Why No Single Signal Works — Skeptical Review of Every Approach -Useful concepts: +Every approach in the field fails somewhere. The design below is a deliberate combination chosen so that each approach's failure mode is covered by another's strength. This table is the rationale; keep it current when adding signal sources. -- **Codex app-server status.** multicode talks JSON-RPC to `codex app-server` and reads `thread/status/changed` notifications. Codex statuses include `idle`, `systemError`, `notLoaded`, and `active` with flags such as `waitingOnApproval` and `waitingOnUserInput`. multicode maps those waiting flags to a human-input/question state and treats an active turn as busy even if the thread status races back to idle. -- **OpenCode HTTP/SSE state.** multicode probes OpenCode's HTTP API, subscribes to its global SSE stream, refreshes session status on session/question events, and maps pending questions to `Question` before relying on terminal output. -- **Descendant session roll-up.** multicode keeps a root session busy while a descendant/subagent session is busy. This is the same class of bug jackin' must avoid when Claude Code, Codex, or OpenCode runs subagents: a parent pane returning to a prompt must not hide child work that is still active. -- **Cooperative state files.** multicode's autonomous-state skill writes a one-line file such as `working`, `question`, `review`, or `idle`, optionally with a session id, and polls it every two seconds. The idea is useful as a heartbeat/evidence channel for custom roles, but it cannot be proof because the agent can crash, forget, or race stale state. +| Approach | Field users | What it gets right | How it fails (be skeptical) | Role in jackin' | +|---|---|---|---|---| +| Runtime hooks / plugin events | workmux, tmux-agent-status, agent-deck, zellaude, Herdr (full-lifecycle agents only) | True semantic transitions from inside the runtime: prompt submitted, permission requested, turn ended. The only source that knows *why* the agent is waiting. | No event fires on operator interrupt (Esc/Ctrl-C) in Claude Code or Codex → stuck `working`. Agent crash mid-turn → no Stop event. Hook config can drift or be wiped. Vendors rename/retire events (contract churn). Async events can arrive late or out of order. A sync hook can stall or alter the agent. Some surfaces miss permission *results*. | **Primary authority where the surface exists**, graded by completeness, TTL-bound, identity-checked, and always cross-checked by the physical watchdog. Reporters forward events, never states; mapping/gating lives in daemon Rust. | +| Visible-screen pattern matching | Herdr (manifest engine), CCManager, Agent Session Manager, claude-squad, unitmux | Always available — jackin' owns the grid, so the live UI is readable for any agent, including ones that ship no API (the explicit fallback for runtimes like Kimi where waiting for upstream is not acceptable). Catches states hooks cannot see (interrupt returns to prompt box, crash screens). | TUI wording churns weekly; strings localize; single mis-sampled frames mid-redraw flip state; scrollback text contaminates window-wide matches; overlays (transcript viewers, pickers) hide the live prompt; per-version drift is permanent maintenance. | **Universal fallback and validator**, made survivable by: structural regions (not joined text), rules as data co-versioned with the image-pinned agent CLI, conservative `unknown` fallback, executed false-positive fixtures, and debounce. | +| Agent-authored OSC emissions (title, OSC 9 notify, OSC 9;4 progress, BEL) | Herdr (title/progress regions), cmux (terminal-side interception), VibeTunnel (BEL + attribution) | Authored by the agent itself, immune to screen-redraw races, already flowing through the parser jackin' runs — zero new I/O. Codex's title carries a spinner/status by default; Claude emits OSC 9-family notifications and OSC 9;4 progress. | Not all agents emit anything. Semantics are ambiguous: a notification means "done OR needs attention" without saying which; Claude's progress bar keeps animating during approval prompts, so progress-active is not working-proof. Any child process the agent shells out to can emit OSC bytes (spoofable). Vendors change emission formats silently. | **Evidence edges, never states**: notification edge feeds attention arbitration; progress-clear is a done-ish hint; title spinner is a busy/idle channel for agents that document it (Codex). Attributed to the foreground agent; wiped on identity change. | +| Process physics from `/proc` (CPU-jiffy deltas, child-process tree, foreground pgid) | WezTerm Agent Deck (fg info), VibeTunnel (process-tree attribution), Herdr (identity only) | Unfakeable by redraws: tool execution implies children under the agent PID; generation implies recurring CPU increments; `tcgetpgrp` answers "agent or shell owns the pane". Free inside the container. | CPU is not semantics: spinner timers burn CPU while idle-ish, network waits burn none while genuinely working. Children are absent during pure generation. Nothing here distinguishes blocked from idle. A wchan-style "blocked in tty read = waiting" check is invalid for every built-in runtime (event-loop TUIs sit in `epoll` in both states) — also invalidating cursor-probe-timeout as a blocked detector. | **Corroborator and watchdog fuel only.** Demotes lying authorities ("working" with zero physics for too long → `unknown` + stuck). Never promotes to `blocked`. | +| PTY output activity / pane-hash recency | claude-squad (hash), VibeTunnel (5 s recency), tmux `monitor-*`, Herdr's deleted June 2026 experiment | Trivial to implement; output genuinely correlates with work *sometimes*. | The decisive negative result: Herdr shipped PTY-activity-first detection (output authored `working`, with keystroke-taint causality, leases, and confirmation debounces stacked on top) and **deleted all of it within ~48 hours** — keystroke echo, resizes, redraw nudges, and agent-spawned background processes are indistinguishable from thinking at the byte level, and every compensating mechanism added latency to correct transitions. jackin's current branch wired exactly this shape, with fewer defenses. | **Never authors state.** Output/input recency exists only as timestamps feeding the watchdog and re-evaluation scheduling. | +| Cooperative state files / agent-written sentinels | multicode autonomous-state skill, claude_code_agent_farm heartbeats, Codeman done sentinels | Lets custom/role agents participate with zero integration. | The agent can crash, forget, or race stale state; a file proves the writer wrote it, not that the runtime is healthy. | Accepted as reporter evidence with freshness + process validation; never proof. | +| LLM-as-classifier | coder/mux sidebar, Claude agent-view row summaries | Handles novel UI states no rule anticipated. | Cost, latency, nondeterminism — wrong tool for a badge that must be instant and reproducible. | Future display-layer descriptions only; never the badge. | +| Native supervisor state | `claude agents --json` (working/blocked/done/failed/stopped, research preview) | Exact in-process truth — own the process, get the truth. | Only for supervisor-hosted background sessions; not available for arbitrary PTY panes, which is what jackin' runs. | Tracked as a future higher-grade authority if supervisor-hosted sessions become launchable inside role containers. | -The main design lesson is ordering: structured runtime APIs and hook reports should outrank terminal text; cooperative files should be evidence with stale handling; descendant/subagent state needs explicit aggregation so root panes do not look done too early. +**Design thesis** that follows from the table: accuracy comes from *redundant disagreeing witnesses plus explicit trust rules*, not from any one channel. Use the semantic channel when a runtime offers one (most accurate when fresh); always run the screen/OSC engine (because jackin' is the multiplexer and can always see the live UI — no waiting for vendors); always run the physics watchdog (because both of the above can lie); and when witnesses cannot be reconciled, say `unknown` rather than guessing. -### Broader Comparison Lessons +## Research Base And License Discipline -Visual `busy / waiting / idle` indicators are becoming table stakes for multi-agent CLIs, but the implementations split into several quality tiers. The broader project list now lives in the [Research Watchlist](/reference/research-watchlist/). This roadmap item should keep only the implementation lessons that matter for jackin': +Re-validated June 10, 2026: Herdr head `1fc5e84` (source review), vendor hook/plugin docs for all five built-in runtimes, and an orchestrator field survey (claude-squad, VibeTunnel, workmux, cmux, tmux-agent-status, zellaude, claude-code-zellij-status, Crystal, claude_code_agent_farm, coder/mux, CCManager, agent-deck, Codeman, ccmux, multicode, pi-mono). Herdr's June 8–10 sequence — hand-coded matchers → PTY-activity-first (deleted in ~48 h) → manifest rules over structural regions + OSC evidence with one asymmetric idle debounce — is the strongest external validation of this design. multicode (Apache-2.0) contributes descendant-session roll-up and runtime-API-first ordering. -- Screen classifiers need fixture tests, debounce, and false-positive fixtures. They should parse visible/current terminal UI, not historical scrollback. -- Agent identity detection should combine child PID, foreground process group, argv/cmdline, wrapper process handling, and short cache TTLs. -- Attention notifications should fire on transitions and be rate-limited. Polling or redraw frequency should never become notification frequency. -- Waiting beats busy, and unseen finished work beats merely working background panes. The roll-up priority is an operator-attention rule, not a CPU-activity rule. -- Parent panes must remain working while descendant/subagent work is active. A parent prompt returning too early should not hide child work. -- Hook/status-file ideas are useful only when installed inside the container. Host-side hook mutation remains out of bounds by default. -- Health scores, circuit breakers, and keep-awake behavior are adjacent follow-ups. They should consume the status authority; they should not become new detectors. +Copyright protects expression, not ideas or methods (17 U.S.C. §102(b); EU Software Directive 2009/24/EC art. 1(2)): every concept here is free to reimplement as original Apache-2.0 Rust. Per-project code constraints, verified June 10, 2026 against repository license files: Herdr (dual AGPL-3.0-or-later/commercial), claude-squad (AGPL-3.0), coder/mux (AGPL-3.0), cmux (dual proprietary) — **concepts only; never copy, never translate line-by-line, never write jackin' code with their source open**; ccmux (FSL-1.1-MIT) — concepts; tmux-agent-status and claude-code-zellij-status (no license; code all-rights-reserved, ideas free); VibeTunnel, workmux, zellaude, Crystal, pi-mono, CCManager, agent-deck, Codeman, claude_code_agent_farm (MIT) and multicode (Apache-2.0) — concepts suffice. Vendor docs are published for integration. Reference these projects in PRs as concept sources only. -The comparison also shows what to avoid. Regex-only and silence-only detectors drift quickly as agent TUIs change. Cooperative state files are useful as heartbeat/evidence channels, but they cannot prove the agent is not stuck. Host-side hook mutation is common in the ecosystem, but jackin' default path must stay container-local so the operator's host config is never silently changed. +### Where Herdr falls short — the advantages jackin' must actually cash in -### Comparative Validation +1. **Co-versioned rule packs.** Herdr chases whatever agent version a host has installed, hence its 30-minute over-the-air manifest updates (a remote trust surface it must cap and validate). jackin' pins agent CLI versions in role images, so a rule pack and the TUI it targets are validated together at image build and cannot drift. No OTA machinery needed: repo-shipped packs + operator override. +2. **Identity is known, not discovered.** Herdr scans `/proc` on a few-hundred-millisecond tick with wrapper unwrapping and miss hysteresis to learn what owns a pane. jackin' spawned the process: child PID and agent slug are facts; probing covers only the exceptions (manual launches inside a pane, agent exits to shell). +3. **No physical cross-check.** Herdr disables screen detection while a full-lifecycle hook authority is live; a hung plugin reports `working` forever. jackin' keeps the watchdog always on and lets it demote any authority. +4. **Optimistic fallback.** Herdr's unmatched-screen fallback is `idle`, so unknown UI states become false `done` downstream. jackin' falls back to `unknown`. +5. **No physics signals at all** (no CPU deltas, no child-tree). 6. **No descendant/subagent roll-up.** 7. **Hand-curated fixtures** (jackin' adds capture-from-live and PTY-transcript replay). 8. **No accuracy self-measurement** (jackin' adds flap-rate telemetry). -The researched projects validate the concept, but they also change the recommended implementation order. Most tools are external observers: they cannot assume control over the agent installation, cannot safely rewrite agent config, and often only see a tmux/WezTerm pane. That forces them toward pane scraping, prompt regexes, host hook setup, or host-side status files. jackin' has a better boundary because it owns the role image, the container filesystem, `/jackin/runtime`, `/jackin/state`, `/jackin/run`, and the container-local agent homes. Therefore jackin' should not copy an observer-first architecture. It should install deterministic in-container reporters and treat screen/process detection as validation and fallback. +## Current Implementation State -| Implementation family | Projects that use it | Borrow | Do not borrow | -|---|---|---|---| -| External screen/process observation | Herdr, CCManager, Agent Session Manager, WezTerm Agent Deck, ccmux, claudeye, TUICommander | Current-screen windows, foreground process identity, idle debounce, prompt-box-region parsing, recent-line priority, false-positive fixtures, and hermetic PTY/tmux tests. | Observer-first truth model, broad historical regexes, silence-as-input-needed, and independent UI-level terminal parsers. | -| Host hook/status-file reporting | Codemux, tmux-agent-status, agent-deck | Hook events as semantic transitions, atomic status writes, freshness windows, done/unseen acknowledgement, source-specific stale handling, fsnotify overflow recovery, and hook schema drift repair. | Silent host `~/.claude`, `~/.gemini`, shell rc, tmux, or project-file mutation. jackin' can install the same idea inside the container instead. | -| Structured runtime APIs | multicode, OpenCode/Codex integrations in other tools | Runtime-native waiting flags, SSE/JSON-RPC subscriptions, turn lifecycle events, pending-question priority, and descendant/subagent roll-up. | Polling a structured API from the host as a separate truth path. The bridge should live in the container and report through Capsule. | -| Agent-written completion/health signals | Codeman, agent-deck | Explicit done sentinels, stuck/circuit-breaker evidence, health consumers, and conservative "when unsure, still working" policy. | Treating an agent-authored line as proof of runtime state without process identity, sequence, and freshness validation. | -| Terminal protocol readiness | Zylos, VS Code shell integration, Windows Terminal shell integration, Warp-style shell markers | Cursor-position probes, output-stability windows, OSC prompt/command markers, cwd/command metadata, and graceful fallback when probes fail. | Using cursor readiness as a blocker detector for full-screen agent TUIs, or assuming shell markers explain an embedded agent runtime. | +Validated June 11, 2026. -The best implementation is therefore reporter-first and detector-verified: install a versioned jackin' status reporter into the container, wire every supported runtime to it when a stable hook/API exists, validate reports against process and visible-screen evidence, and keep `unknown` as the conservative state when neither semantic events nor strong current-screen evidence are available. +**Landed and live**: `AgentState::Unknown` in ; control methods `ReportAgentState` / `HeartbeatAgentAuthority` / `ClearAgentAuthority` / `ReportRuntimeEvent` / `ReportChildAgentState` / `EventsSubscribe` / `WaitSessionStatus` / `SessionReadVisible` / `SessionStatusExplain`; per-source `SequenceTracker`; daemon-assigned arrival-order sequences for runtime hook/plugin events; lower-trust direct-state `ReportAgentState` handling for custom role reporters; roll-up `blocked > done > working > idle > unknown` with tests and `WorkspaceStatusChanged` aggregate event publication; evidence snapshots in ; live arbitration in ; debounce/watchdog constants and policy in ; runtime-event mapping in ; `/proc` CPU/descendant/foreground evidence plus root-agent identity and foreground-returned-to-shell handoff evidence; OSC 133/title/notification/progress/BEL evidence with retained status title payloads capped at 256 bytes, BEL count diagnostics, and `ShellIntegration` source attribution only when OSC 133 shell markers win; TOML rule packs plus fixtures for all five built-in runtimes, including live foreground-gated `osc_title` / `osc_progress` virtual regions for rule matching and explain previews; raw PTY transcript replay through the parser and status engine; `status explain` / `status capture` diagnostic bundles with visible text, rule previews, evidence summaries, authority/gate state, debounce holds, and explicit `stuck` diagnostics for watchdog demotions; watchdog-demoted, expired, identity-mismatched, and shell-handoff authorities are invalidated so later evaluations recover from stale semantic reports through shell or screen evidence; bundled rule packs declare semver `validated_versions` ranges and image-build validation rejects unpinned fallback installs or mismatched pinned agent versions; focus/attach acknowledgment; `done` persistence until acknowledgment; persisting-blocker re-notification; active descendant/subagent counts in evidence summaries, reports, explain output, and `AgentStateChanged`; dirty-session 250 ms coalesced early evaluation plus the 1 Hz clean-session floor; per-evaluation `cdebug!` evidence snapshots, authority-cleared breadcrumbs, flap-rate telemetry, and `status.stuck` watchdog telemetry; runtime status hook/plugin assets copied into derived images under `/jackin/runtime/agent-status/`; container-local Claude/Codex/Amp/OpenCode reporter assets with explicit per-event hook commands; container-local Codex `[tui]` config that enables `agent-turn-complete` / `approval-requested` OSC 9 notifications through `notification_method = "osc9"`; Kimi as rule-pack-only; and capsule badge glyphs. -### Terminal Shell Integration +**Old flap class removed**: PTY output bytes and operator keystrokes update only recency timestamps. They no longer author `working`, `blocked`, or `idle`. `SessionStatus::advance`, the old raw-signal enum, and the five hand-coded Rust detector modules have been removed. -Terminal ecosystems already solved a narrower version of this problem for shell commands. VS Code's terminal shell integration uses custom OSC sequences such as prompt start, prompt end, pre-execution, and command-finished markers; it also supports Final Term `OSC 133` prompt markers. Windows Terminal documents the same family of prompt/command markers. These signals are excellent when the foreground program is an ordinary shell, but they do not fully solve embedded agent TUIs that draw their own prompt, approval UI, or full-screen interface. +**Regression coverage added during the final audit**: the blocked-dialog redraw soak test replays 150 approval-dialog redraws through the terminal grid, rule pack, arbitration, debounce, and publish path and asserts exactly one public transition; the PTY transcript replay test feeds a raw Claude approval transcript through the parser and status engine and asserts the `permission-dialog` rule wins; watchdog demotions now emit a `watchdog_demoted` state-change reason, invalidate the stale authority source, emit `status.stuck` telemetry, and expose a `stuck` object in `status explain`; expired and identity-mismatched authorities are rejected and dropped while preserving evidence notes in the published summary; runtime-event sequencing is tested as daemon-assigned and per-source, and runtime clear events only clear their matching authority source; explicit heartbeat and counter-only subagent events refresh existing authority freshness and exposed timestamps without authoring state; runtime gating has recorded-turn sequence tests for Claude, Codex, OpenCode, and Amp; Claude `SessionEnd`, Claude `Notification:` extraction, Codex notify-turn-complete, Codex `[tui]` OSC notification config, explicit hook-event commands, and shell-reporter argument-forwarding coverage lock the shipped adapters to the table; direct `ReportAgentState` reports are tested as lower-trust cooperative evidence rather than runtime-event authority; startup grace is tested against the daemon's screen-rule helper; process fixtures cover direct binaries, node-wrapped Claude, unknown shell foreground ownership, dead pid handling, descendant counting, and CPU-jiffy sampling-window math; foreground shell handoff is tested as exit-like idle only after prior agent identity, post-startup grace, root foreground ownership, no descendants, and non-agent root identity; the cleanup path is tested to clear agent identity, authority, gate state, sequences, subagent counts, and stale agent OSC while seeding shell idle evidence; source attribution is tested for OSC 133 shell-integration winners and for agent-authored OSC progress-clear not being mislabeled as shell integration; `osc_title` / `osc_progress` virtual rule regions are tested for matching, explain previews, and foreground-identity hiding; raw OSC progress-clear is tested to be ignored when the foreground is not the agent; BEL capture, BEL counting, and notification-policy forwarding are tested at the session boundary and in `status explain`; retained OSC title evidence is tested for the 256-byte cap; rule fixture coverage now includes state-specific false-positive assertions for Claude stale-spinner, transcript-overlay, scrolled-blocker, horizontal-rule prompt, and timing classes; runtime pack loading is tested as embedded fallback < image pack < operator override; `WaitSessionStatus` is tested to return the current revision when the state is already satisfied; `WorkspaceStatusChanged` is tested for aggregate counts, precedence, and duplicate suppression; authority TTL expiry, identity mismatch, process exit, foreground-returned-to-shell, and watchdog demotion each have explicit evidence-note assertions; bundled pack validation rejects wildcard and lower-only unbounded drift; and `AgentStateChanged` is tested to carry `subagents_active` and `foreground_returned_to_shell` from the evidence summary. -jackin' should still listen for shell-integration markers when they pass through the PTY. They are useful for shells, subprocess command boundaries, cwd tracking, and "agent returned to shell" transitions. They are not a substitute for agent runtime hooks or visible-agent detection. +**Live smoke verification completed June 11, 2026**: -### Cursor-Position And Screen-Stability Probes +- Derived image build copied `/jackin/runtime/agent-status/` into the container image, including Claude/Codex/Amp/OpenCode hook/plugin assets and all five bundled TOML packs; hook shell assets were executable. +- A normal `jackin load ... --debug --force --rebuild` reached image build and role launch, but this CI-like runner's Docker daemon could not see host bind-mount contents under the workspace path: `/jackin/run/agent.toml` was missing inside the launched container even though Docker reported the bind mount. A direct bind-mount repro showed the same empty mount, so the failure was environmental rather than an agent-status implementation failure. +- A Docker named-volume capsule smoke exercised the real in-container contract without host-path sharing: `/jackin/run/agent.toml`, `/jackin/run/jackin.sock`, and `/jackin/state/` under `/jackin/`; `jackin-capsule status`; interactive attach/session creation; Codex launch; `jackin-capsule report-event --event permission-requested`; authoritative `blocked` arbitration with `foreground_pgid`, authority/gate state, rule previews, and visible text in `status explain`; and `jackin-capsule status capture` writing `visible.txt` plus `evidence.json` under `/jackin/state/agent-status/captures/`. +- The smoke exposed one missing screen fallback: Codex's unauthenticated sign-in/API-key screen was visibly waiting on the operator but did not match a blocked rule. The Codex rule pack now has an `auth-prompt` blocked rule plus the `blocked_auth.txt` fixture. -The strongest non-Herdr conceptual work is cursor-position readiness detection: query the PTY cursor position with `CSI 6n`, combine that with a short output-stability window, and infer whether input would land at a prompt. Zylos' research argues this is more robust than prompt regexes because it asks "where will the next character appear?" instead of "does the screen text match this prompt string?" +Root cause, stated once for the historical bug: state used to be mutated by whichever signal arrived last, with no notion of evidence, confidence, freshness, or provenance. The architecture below removes that structure so the whole flap class cannot exist. -This belongs in jackin' as a fallback/probe, not as the primary blocker detector. It is useful for "is this pane input-ready?" and "is a submitted task suspiciously stuck?", but full-screen TUIs, nested multiplexers, and terminals that intercept cursor reports can produce false positives. The state authority should treat cursor probes as one signal with confidence metadata, not as truth. +## Architecture Specification -## Current jackin' Surface +### The one structural rule -The built-in runtimes are exactly `claude`, `codex`, `amp`, `kimi`, and `opencode` in . Runtime setup already centralizes agent home seeding and credential copying in . Launch currently passes high-autonomy flags in : Claude Code uses dangerous-skip-permissions, Codex uses dangerous bypass approvals/sandbox, Amp uses dangerous allow-all, Kimi uses yolo mode, and OpenCode gets permission allow config. These flags lower the number of tool-approval prompts but do not remove auth prompts, user questions, plan confirmation, model/account issues, runtime updates, or terminal-level failures. +Signals never mutate state. Signals update **evidence**; a pure function maps the evidence snapshot to `(raw_state, confidence, evidence_summary)`; a debounce policy turns arbitration results into public transitions; events fire only on debounced transitions. PTY output updates recency timestamps and a damage counter — nothing else. Operator input updates an input timestamp and the seen path — nothing else. `SessionStatus::advance` and its raw-signal enum are deleted. -Capsule already owns the right implementation point: the PTY reader, jackin-term `DamageGrid` state, session lifecycle, 1Hz ticker, control socket at `/jackin/run/jackin.sock`, and in-container setup hooks. Host-side code should consume structured state events; it should not scrape terminal output through `docker exec`, `docker attach`, or a rendered preview. +### Data model (target shapes, original to jackin') -## jackin' Design +```rust +/// Everything arbitration may look at for one session, assembled per evaluation. +pub struct EvidenceSnapshot { + pub authority: Option, // semantic channel + pub osc: OscEvidence, // agent-authored terminal protocol + pub screen: ScreenEvidence, // rule-pack result over the grid + pub process: ProcessEvidence, // /proc physics + pub activity: ActivityEvidence, // recency timestamps only + pub subagents_active: u32, +} -This section is the implementation contract. Future agents working on this feature should treat the requirements here as the source of truth unless a later PR updates this roadmap item first. +pub struct AuthorityEvidence { + pub source_id: String, + pub grade: AuthorityGrade, // Complete (OpenCode-class) | Partial (Claude/Codex-class) + pub mapped_state: RawAgentState, // produced by daemon-side event mapping, never by the reporter + pub pending_permission: bool, // permission event seen without its result event + pub last_event: Instant, + pub seq: u64, // daemon-assigned arrival order per source +} -### Container Injection Advantage +pub struct OscEvidence { + pub title: Option, pub title_changed_at: Option, + pub notify_edge_at: Option, // OSC 9-family notification arrival + pub progress_active: bool, pub progress_cleared_at: Option, // OSC 9;4 + pub bel_at: Option, pub bel_count: u64, +} -jackin' should use the control it has and other projects usually lack. Host-side terminal tools must be zero-config observers because they cannot assume they can install files into Claude Code, Codex, OpenCode, Kimi, Amp, tmux, WezTerm, or a user's shell without surprising the operator. jackin' builds and launches the role container, so it can install status assets owned by jackin' on every launch, repair drift, and keep all mutations inside `/jackin/` plus container-local agent homes. +pub struct ScreenEvidence { + pub state: Option, pub rule_id: Option, + pub strong: bool, // rule carried a strong-evidence flag (live chrome) + pub freeze: bool, // overlay rule matched: keep last state + pub observed_at: Instant, +} -The default status architecture should be: +pub struct ProcessEvidence { + pub child_alive: bool, + pub root_is_agent: bool, + pub foreground_is_agent: bool, + pub foreground_returned_to_shell: bool, + pub child_process_count: u32, // processes under the agent PID + pub cpu_jiffies_delta: u64, // over the last sampling window +} -1. Install a small reporter command under `/jackin/runtime/agent-status/` that talks to the existing Capsule socket at `/jackin/run/jackin.sock`. -2. Launch every built-in agent with stable environment such as `JACKIN_SESSION_ID`, `JACKIN_AGENT_RUNTIME`, `JACKIN_STATUS_SOCKET=/jackin/run/jackin.sock`, and `JACKIN_STATUS_SOURCE=`. -3. Merge runtime hook/plugin/API bridge configuration into the container-local agent home during `runtime_setup`; repair drift on every launch the same way agent-deck repairs stale hook configuration, but never against the host home. -4. Let hooks/API bridges report `working`, `blocked`, `idle`, `heartbeat`, `child-start`, `child-stop`, and `clear` events with source id, sequence, timestamp, runtime label, and session id. -5. Use foreground process, visible screen, shell markers, cursor probes, and weak activity to validate, stale, or repair those reports. They are guardrails and fallbacks, not the preferred truth source. +pub struct ActivityEvidence { pub last_output: Option, pub last_input: Option } +``` -This is the main architectural difference from Herdr, CCManager, ccmux, WezTerm Agent Deck, and tmux-agent-status. Their zero-config observation constraint is a product strength for host tools, but it is not the best default for jackin'. jackin' can still keep zero host setup: the injected reporter lives in the role container and is removed with the container/image state. +`SessionStatus` keeps `effective`, `seen`, `revision`, and gains `raw`, `confidence`, `authority_source`, and the latest `EvidenceSnapshot` summary for events/explain. -### Implementation Surface +### Evaluation scheduling -The first implementation should add a dedicated status authority module inside `crates/jackin-capsule`, conceptually named `session_status` or `agent_status`. `Session` should store child PID, detected foreground agent, raw detector state, effective public status, `seen` acknowledgement state, current authority source, evidence metadata, revision, and last-seen revision. The protocol type in should grow `Unknown`; if wire compatibility is not important before release, replace the current enum shape directly rather than adding compatibility shims. +The existing 1 Hz capsule tick is the floor: every session re-evaluates each tick. PTY damage marks a session dirty and may trigger one early coalesced re-evaluation (minimum 250 ms between evaluations per session), so blocked latency is sub-second without per-byte work. Grid reads dump only the detection window (bottom rows), not the whole grid, and are skipped when the damage counter is unchanged and the state is idle/unknown. -The raw detector state should stay separate from the public status. Raw states are `unknown`, `working`, `blocked`, and `idle`. Effective statuses are `unknown`, `working`, `blocked`, `done`, `idle`, and optional diagnostic overlays such as `stuck`. `done` is computed from raw idle plus the `seen` flag and should never be accepted from a hook as runtime truth. +### Semantic channel (reporter protocol) -The concrete model should be small and explicit: +- Spawn env (daemon, per session): `JACKIN_SESSION_ID`, `JACKIN_STATUS_SOURCE` (unique per session+runtime, e.g. `hook--`), `JACKIN_AGENT_RUNTIME`, `JACKIN_STATUS_SOCKET=/jackin/run/jackin.sock`. +- Reporter is a `jackin-capsule` subcommand (`jackin-capsule report-event --event --payload-stdin`), single fork, no shell/python/netcat chain. Hook and plugin scripts are dumb pipes that pass the vendor event name and payload through. +- **Events, not states.** The daemon owns one mapping+gating table (tested Rust): -| Concept | Required shape | +| Incoming event (any runtime) | Gate | Mapped effect | +|---|---|---| +| prompt-submitted, tool-start, tool-end, compact-start | — | authority state `working`; entering work cycle resets `seen` | +| permission-requested, question-asked, elicitation | — | authority state `blocked`, `pending_permission = true` | +| permission-resolved (allow/deny), question-answered | — | `pending_permission = false`; state `working` | +| turn-complete / stop | only when `pending_permission == false` AND `subagents_active == 0` | authority state `idle` | +| subagent-start / subagent-stop | — | increment / saturating-decrement `subagents_active`; no state change | +| session-end / agent-exit | — | clear this source's authority; evidence note | +| heartbeat | — | refresh `last_event` only | + +- Sequences are assigned by the daemon in arrival order per source; reporter timestamps are used only for staleness diagnostics. Authority expires when `last_event` exceeds `AUTHORITY_TTL`; it is dropped immediately on process exit or when the source's runtime label contradicts the known/detected foreground agent. Reporters never emit decisions that alter agent control flow (no stop-blocking). + +### OSC evidence channel + +jackin-term already parses every escape sequence; the daemon retains per session: last title + change time, OSC 9-family notification arrivals, OSC 9;4 progress state and set/clear edges, BEL count. Mapping discipline: notification arrival = attention edge (the agent is done **or** needs input — arbitration decides which using the other witnesses); progress-clear = done-ish hint; progress-active = nothing (Claude animates it during approval prompts); title spinner presence/absence = busy/idle channel only for runtimes that document it (Codex defaults to a spinner title item). OSC evidence is accepted only while the foreground process is the detected agent and is wiped on identity change or agent restart. + +### Screen rules as data + +One engine replaces the five per-agent Rust detector modules. Rule packs are TOML files in-repo, baked into role images, with an operator override directory under `~/.jackin/` (override > image pack). Schema (jackin-original; concept credit Herdr's manifest engine): + +```toml +schema_version = 1 +agent = "claude" +validated_versions = ">=2.1.0, <2.3" # image build fails if the pinned CLI is outside every pack's range + +[[rule]] +id = "permission-dialog" +state = "blocked" +priority = 900 +region = "bottom_nonempty:12" +strength = "strong" +requires_all = ["enter to select", "esc to cancel"] +requires_any = ["to navigate", "↑/↓"] +forbids = [] + +[[rule]] +id = "transcript-overlay" +state = "freeze" # keep last state while an overlay hides the live prompt +priority = 1000 +region = "bottom:24" +requires_all = ["showing detailed transcript", "ctrl+o to toggle"] +``` + +- Regions are structural: `bottom:N`, `bottom_nonempty:N`, `prompt_box_body`, `above_prompt_box`, `after_last_rule` (below the last long horizontal rule), `last_nonempty_line`, and virtual regions `osc_title`, `osc_progress`. Blocked rules must anchor to live dialog chrome (affordance co-presence within the dialog region), so an approval question remembered in transcript text is structurally unmatchable. +- Matching: case-insensitive substrings (`requires_all`/`requires_any`/`forbids`) plus optional anchored regexes; highest priority wins; **no match → `unknown`** (deliberately stricter than Herdr's idle fallback). +- Claude-specific corrections the pack must encode (current detector bugs): transcript bullets (`⏺`, `·`, `•`, `●`, `▸`) are not spinner glyphs; a working rule requires the spinner at line start in the live status region together with the interrupt hint; long horizontal rules are not prompt-box evidence; the prompt box is the rounded-corner box containing the input affordance in the bottom region. +- Engine safety caps (rules per pack, gate count, matcher length) guard against pathological packs even though packs are repo-owned. + +### Arbitration (pure function, evaluated per session per evaluation) + +1. Process exited or foreground returned to shell → publish exit transition (raw `idle`, evidence `process_exited` or `foreground_returned_to_shell`) **before** clearing identity and authority. +2. Screen `freeze` rule matched → keep previous raw state, refresh evidence. +3. Blocked wins: fresh authority `blocked` (pending permission unresolved), or strong screen blocker for the detected agent. A live screen blocker overrides a non-blocked authority only when the screen observation is at least as fresh as the authority's last event. +4. Fresh, grade- and TTL-valid, identity-consistent authority state. +5. Strong screen/OSC evidence: visible working chrome or documented title spinner → `working`; positively matched idle UI → `idle`. +6. Physics-only corroboration (children executing or CPU active, nothing contradicting) → `working` at `Weak` confidence. +7. `unknown`. + +The **watchdog** runs orthogonally to precedence: any witness claiming `working` while output, CPU delta, and child-process count are all quiet beyond `WATCHDOG_QUIET` demotes the session to `unknown` and raises a rate-limited `stuck` diagnostic with the full evidence trail. This is what catches the interrupt hole (no hook fires on Esc/Ctrl-C in Claude Code or Codex) and hung plugins. + +### Debounce policy (between arbitration and public state) + +| Transition | Policy | |---|---| -| `AgentRawState` | `Unknown`, `Working`, `Blocked`, `Idle`. This is what detectors and reporters produce. | -| `AgentEffectiveStatus` | `Unknown`, `Working`, `Blocked`, `Done`, `Idle`. This is what UI, CLI, and host consumers read. | -| `AgentStatusSource` | `Reported`, `VisibleScreen`, `ForegroundProcess`, `ShellIntegration`, `CursorProbe`, `OutputActivity`, or `None`, with a stable source id for reporter sequence validation. | -| `AgentStatusConfidence` | `Authoritative`, `Strong`, `Weak`, or `Unknown`. Reported runtime events are authoritative only while they match the foreground agent and sequence rules. Visible blocker/idle/working matches are strong. Process/output-only signals are weak. | -| `AgentStatusEvidence` | Detected agent, foreground process group, source id, source sequence, source timestamp, visible blocker/idle/working booleans, process-exited flag, stale-report flag, and optional human-readable message. | -| `SessionStatus` | Raw state, effective status, `seen`, authority source, confidence, evidence, monotonic revision, and last-focused/last-acknowledged revision. | - -The protocol should expose this model as structured JSON fields, not pre-rendered strings. UI labels, colors, icons, and workspace roll-ups are consumers of the model. - -### Signal Sources +| → `blocked` (gated authority event or strong visible blocker) | immediate | +| → `working` | one consistent evaluation | +| `working` → `idle` via positively matched idle UI or gated turn-complete | immediate | +| `working` → `idle` inferred only from absence of working chrome | `IDLE_CONFIRMATIONS` consecutive evaluations AND no OSC progress active AND CPU-quiet | +| any → exit | immediate, ordered before identity clearing | +| agent just spawned | `STARTUP_GRACE`: identity known, screen rules muted, OSC state wiped | +| notifications | fire on debounced public transitions only; a persisting blocker re-notifies at most once per `RENOTIFY_INTERVAL` | -1. **Semantic reports.** Add a control-channel method for trusted in-container runtime integrations to report state: source, agent/runtime, raw state, optional message, optional custom status, sequence number, timestamp, and session reference. Reporters include Claude Code hooks, Codex structured status when available, OpenCode plugins/API bridges, Amp/Kimi equivalents when available, and wrapper hooks created by jackin' inside the container at launch. This is the preferred source when fresh and process-consistent. -2. **Process ownership.** Track the child PID for each session, read Linux `/proc//stat` to get the foreground terminal process group, scan `/proc` for that process group, and identify known agent binaries. Because jackin' runs inside Linux containers, this can be Linux-first and simpler than Herdr's cross-platform path. Process exit should clear hook authority for that agent and transition toward `idle` or `exited`, not leave a stale working/blocking state behind. -3. **Visible screen detection.** Read the current `jackin-term` `GridSnapshot` for the pane and match only strong, current-screen signals: approval prompts, explicit input-required prompts, visible idle input UI, and visible working chrome. Avoid treating arbitrary scrollback text as authority. -4. **Shell-integration markers.** Capture `OSC 133`, `OSC 633`, `OSC 7`, and related markers when shells emit them. Use them for shell prompt/execution boundaries and cwd/title enrichment. -5. **Cursor-position probes.** Optionally probe `CSI 6n`/cursor position through the PTY for readiness/stuck diagnostics, with timeout and fallback. Never block the PTY reader, attach channel, or daemon event loop while waiting for a probe response. -6. **Output activity.** Keep output timestamps, but demote them to a weak signal: recent output supports `working`; silence alone does not prove `blocked`. +Initial constants (one module, all referenced from tests): `AUTHORITY_TTL = 30 s` (heartbeat or any event refreshes), `WATCHDOG_QUIET = 10 s`, `IDLE_CONFIRMATIONS = 3` evaluations, `STARTUP_GRACE = 3 s`, `CPU_SAMPLE_WINDOW = 2 s`, `RENOTIFY_INTERVAL = 5 min`, early-evaluation coalescing `250 ms`. Future work may adapt `WATCHDOG_QUIET` per runtime as a bounded multiple of observed median cycle time. -### Runtime Flow +### Done, seen, roll-up -The 1Hz Capsule ticker should be the normal recomputation point. PTY output can mark the screen/process evidence dirty, but it should not directly decide `blocked`. Each tick should: +Unchanged contract, with wiring named: pane focus and attach call the acknowledgment path (`done` → `idle`, records seen revision); entering a work cycle resets `seen`; roll-up is `blocked > done > working > idle > unknown`. -1. Refresh foreground process evidence from `/proc` for live sessions. -2. Read the current parser screen snapshot for each pane and run the built-in visible detectors for the detected agent. -3. Drop or stale any reported authority whose process has exited, sequence moved backward, source conflicts with the foreground agent, or age exceeds the source policy. -4. Run the arbitration function once per session to compute raw state, confidence, and evidence. -5. Derive effective status from raw state plus `seen`. -6. Increment the session revision and emit an event only when raw state, effective status, seen status, authority, confidence, or evidence summary changes. -7. Recompute tab/workspace/fleet roll-up from effective statuses using `blocked > done > working > idle > unknown`. +### Telemetry, explain, capture, replay -Runtime reports should take a separate path into the same state machine. A `report_agent_state` message validates the session reference, source id, sequence, agent label, and timestamp, stores the candidate authority, and requests an immediate recomputation. A `clear_agent_authority` message removes only the matching source authority and also requests recomputation. Report handlers must not update UI state directly. +- `clog!`: lifecycle transitions, authority grants/expiries/demotions, watchdog degradations, notification emissions. `cdebug!`: per-evaluation evidence snapshots, rule decisions with region previews, sequence/freshness rejections, gating outcomes. +- `jackin-capsule status explain `: dump the current `EvidenceSnapshot`, every rule evaluated with its region text preview, authority state/grade/freshness, and why the winning state won. Compact and debug logs carry transition, telemetry, and stuck breadcrumbs; `status explain` / `status capture` are the full evidence bundle. +- `jackin-capsule status capture `: snapshot the full evidence bundle into a new fixture directory — live mis-detections become regression fixtures in one command. +- Recorded PTY transcripts replay through the parser+engine in CI (trace-driven regression, stronger than curated screens). +- Telemetry counters: state transitions per session-minute (flap rate) and watchdog demotions. A flap-rate regression is the canary that an agent update broke a pack. +- `AgentStateChanged` evidence fields are populated from the actual snapshot (rule id or authority source, real detector booleans, real foreground pgid, confidence, freshness) — never derived from the output state. -Seen handling is also centralized. Focusing a pane or acknowledging a session records the current status revision as seen. If the raw state later transitions back through `working` or `blocked` and then to `idle`, the effective state becomes `done` again until the new revision is seen. +## Per-Runtime Adapters (verified June 10, 2026) -### Arbitration +The strategy the table implements: **use the semantic surface wherever the vendor provides one** (most accurate while fresh); **rely on the screen/OSC engine wherever they do not** — jackin' is the multiplexer and can always see the live UI, so no runtime blocks on upstream API maturity (Kimi is the worked example); **physics watchdog everywhere**. -All signals flow through one state machine. The initial precedence should be: - -1. Hook/API report says `blocked` for the current detected agent and has a valid sequence. -2. Strong visible blocker for the current detected agent. -3. Strong visible working can override stale reported `idle`. -4. Strong visible idle can stale a reported `working` after a short grace window. -5. Fresh hook/API reported state. -6. Process/screen fallback state. -7. Unknown. - -The state machine must validate sequence numbers per source, reject reports from a source that conflicts with the detected foreground agent, clear stale hook authority on process exit, debounce short-lived transitions, and attach confidence metadata so downstream consumers can decide whether to notify, queue work, or only display a diagnostic. +| Runtime | Tier-1 semantic source (container-local install, drift-repaired each launch) | Tier-2/3 cross-checks | Adapter specifics | +|---|---|---|---| +| Claude Code | Hooks in the container Claude home: `UserPromptSubmit`→prompt-submitted; `PreToolUse` (matcher `AskUserQuestion`)→question-asked; `PermissionRequest`→permission-requested; `PermissionDenied`→permission-resolved; `Notification` (permission-prompt / elicitation types)→permission-requested; `PostToolUse`/`PostToolUseFailure`→tool-end; `Stop`/`StopFailure`→turn-complete/failed; `SubagentStart`/`SubagentStop`→subagent events; `SessionEnd`→agent-exit. All `async: true` except where an acknowledgment shape is required. Grade: Partial. | OSC 9-family notification edge, OSC 9;4 clear edge, rule pack, watchdog. | Never map the idle-timer notification type to blocked (documented false-positive history upstream). The Stop payload's background-task field is community-reported, not documented — verify against a live payload before gating on it. Installer registers `Notification`, merges additively (repairs its own entries without replacing role-authored hooks), and never emits stop-blocking decisions. | +| Codex | Hooks in the container `~/.codex/hooks.json` (shipped, on by default): `UserPromptSubmit`, `PreToolUse`, `PermissionRequest`, `PostToolUse`, `SubagentStart`/`SubagentStop`, `Stop`; plus the `notify` program (turn-complete only) as a redundant done edge. Grade: Partial. | Terminal-title spinner item (default on — busy/idle channel), OSC 9 approval edge via the TUI notification setting with the OSC method enabled in container config, rule pack, watchdog. | The old "structured status pending verification" stance is resolved — the hook surface is real, including `PermissionRequest`. | +| OpenCode | Container-local plugin (config home, never the project tree) forwarding `session.status`, `session.idle`, `permission.asked`/`permission.replied`, `tool.execute.*`, `session.error`. Grade: Complete. | Rule pack, watchdog. | Replaces the unworkable ACP-bridge approach (separate-instance truth source). Handle known upstream quirks defensively: status can stick after async prompts; idle-event plugin handlers are fire-and-forget. | +| Amp | Container-local plugin on `agent.start`/`tool.call`/`tool.result`/`agent.end`; built-in notifications fire on completion and blocked. Grade: Partial. | BEL edge, rule pack, watchdog. | Promote from screen-only. | +| Kimi | None in the TUI today (wire mode exists but would replace the operator-visible TUI). | **Rule pack is primary** (thinking indicators, approval prompts, idle prompt box), process evidence, watchdog. | The worked example of vendor-independence: do not wait for upstream hooks; the screen engine carries Kimi at full quality, and the co-versioned pack pins it to the image's CLI version. Delete the placeholder hook script that handles events that never fire. | +| Custom role agents | Role-authored reporter over the capsule socket (events-not-states contract). | Process identity, weak activity. | `unknown` remains the default; a future role manifest contract may declare named visible states and ship rule packs. | -### Done, Seen, And Roll-Up +Runtime setup constraints (unchanged hard rules): all assets under `/jackin/runtime/agent-status/`, state under `/jackin/state/agent-status/`, socket under `/jackin/run/`; hook/plugin config merged only into container-local agent homes; nothing written to the host or to bind-mounted project trees; drift repaired every launch. -`done` is derived outside the raw detector: +## Implementation Blueprint -- When a pane transitions from `working` or `blocked` to raw `idle` while not focused/visible, mark `seen = false`; public status becomes `done`. -- When the operator focuses or explicitly acknowledges the pane, mark `seen = true`; public status becomes `idle`. -- Tab, workspace, and fleet roll-up should use attention priority: `blocked > done > working > idle > unknown`. +Each slice below is independently shippable, lands in its own PR, and is specified so an implementing agent needs no further design decisions — when something is ambiguous, the Architecture Specification above is the tiebreaker, and this page must be updated in the same PR as any deviation. Every slice ends with `cargo fmt --check`, `cargo clippy --all-targets --all-features -- -D warnings`, `cargo nextest run -p jackin-protocol -p jackin-capsule`, and the capsule smoke-test mandate from . New files use the crate's self-named module layout (no `mod.rs`). -This lets the console and Desktop Agent Hub route the operator to the pane that needs action now, not merely the pane doing background work. +### Slice 1 — Kill the flap engine -### Runtime Setup +*Why first*: removes the dominant false-positive source with a two-site deletion; everything else builds on a quiet baseline. -All status assets that jackin' installs inside a role container should live under `/jackin/`: reporter binaries/scripts under `/jackin/runtime/agent-status/`, ephemeral sockets/pidfiles under `/jackin/run/`, and per-session status cache or sequence state under `/jackin/state/agent-status/`. The existing Capsule socket at `/jackin/run/jackin.sock` should remain the control-plane endpoint unless a measured throughput issue proves a separate socket is needed. +- In , the PTY output handler currently sets `last_output_at` and then calls `self.apply_raw_state(crate::agent_status::AgentRawState::WorkingVisible)` — delete the `apply_raw_state` call, keep the timestamp. In `mark_operator_input`, delete the `apply_raw_state(WorkingVisible)` call, keep `last_output_at` (rename to a dedicated `last_input_at` field in this slice) and the `was_blocked` return. +- Add `last_input_at: Instant` next to `last_output_at`; both are evidence inputs for later slices. +- Interim behavior (until slice 2 lands): state changes come only from the 1 Hz detector tick and reporter messages; sessions start `Unknown` and may stay `Unknown` longer — that is correct per the vocabulary table. +- Tests (in `session/tests.rs`): `pty_output_does_not_change_state` (feed bytes while `Blocked`, assert still `Blocked`); `operator_input_does_not_change_state` (same while `Blocked` and while `Done`). Flap soak test (integration): drive a fake PTY that redraws a Claude-style permission dialog every 200 ms for 30 s; assert exactly one transition total (the initial entry into `Blocked`). +- Do not: add any replacement signal, timer, or lease — silence/activity must author nothing. -Runtime setup belongs in and the launch scaffolding in . The setup path should copy or generate: +### Slice 2 — Evidence model and arbitration as the only path -- `/jackin/runtime/agent-status/report` — small CLI or script that sends `report_agent_state`, `heartbeat`, and `clear_agent_authority` messages to the Capsule socket. -- `/jackin/runtime/agent-status/wrap` — optional launch wrapper that stamps `JACKIN_SESSION_ID`, `JACKIN_AGENT_RUNTIME`, `JACKIN_STATUS_SOURCE`, and the child PID/process start/exit events before `exec`ing the real runtime. -- `/jackin/runtime/agent-status/hooks//` — runtime-specific hook/plugin/API bridge assets written by jackin' and safe to reinstall every launch. -- `/jackin/state/agent-status//` — sequence, heartbeat, and diagnostic state owned by Capsule, not by a host cache directory. +- New file `crates/jackin-capsule/src/agent_status/evidence.rs`: the `EvidenceSnapshot`, `AuthorityEvidence`, `OscEvidence`, `ScreenEvidence`, `ProcessEvidence`, `ActivityEvidence`, `AuthorityGrade { Complete, Partial }` types exactly as in the Data model section (fields may grow, never shrink). All fields plain data; no methods with I/O. +- Rewrite : signature `pub fn arbitrate(snapshot: &EvidenceSnapshot, previous_raw: RawAgentState, now: Instant) -> ArbitrationResult` where `ArbitrationResult { raw: RawAgentState, confidence: StatusConfidence, winner: EvidenceWinner, notes: Vec }` and `EvidenceWinner` names which step (1–7) decided. Implement the seven-step precedence verbatim from the Arbitration section, including: step 2 freeze returns `previous_raw`; the step-3 freshness rule compares `screen.observed_at` against `authority.last_event`; and the old inverted idle-grace bug must not return — staleness of a *screen observation* lowers its rank, it never raises it. +- Delete `SessionStatus::advance`, the `AgentRawState` signal enum in `agent_status.rs` (the protocol crate's state enum stays), and the `transition` function; `SessionStatus` keeps `effective`, `seen`, `revision` and gains `raw`, `confidence`, `last_snapshot_summary`. +- In , the state tick assembles an `EvidenceSnapshot` per session (screen result from the existing detectors until slice 7; authority from the stored `HookAuthority` until slice 4 re-shapes it; process/OSC fields default until slices 5–6), calls `arbitrate`, then applies the debounce policy (slice 3; until then, apply transitions directly), then derives effective from raw + seen, then broadcasts. +- `broadcast_agent_state_changed` is rewritten to serialize the real `ArbitrationResult` + snapshot summary: real detector booleans, real winner, real confidence, real foreground pgid (None until slice 5), seen revision from the acknowledgment bookkeeping (slice 8 wires the callers). +- Tests: one test per precedence step (7); freshness-override both directions; `freeze` keeps previous; property tests — arbitration never panics for any combination of present/absent evidence (use a small hand-rolled combinatorial loop, not a new dependency), same snapshot twice → same result, revision increments only on public transitions. -The setup path may merge container-local agent configuration, create container-local hooks, and copy reporter scripts owned by jackin' into the container home. It must not edit host `~/.claude`, host `~/.codex`, host `~/.config/opencode`, host shell rc files, host terminal settings, host git config, or any other host-side state unless the operator explicitly opts into that action and sees it in the launch summary. It also must not write status plugins into a bind-mounted project directory, such as `.opencode/plugins/`, unless that write is an explicit operator-visible workspace action. If a runtime only loads plugins from the project tree, that integration is opt-in; the default remains container-local reports plus process/screen fallback. +### Slice 3 — Debounce policy and physical watchdog -### Per-Runtime Plan +- New file `crates/jackin-capsule/src/agent_status/policy.rs`: all constants (`AUTHORITY_TTL = 30 s`, `WATCHDOG_QUIET = 10 s`, `IDLE_CONFIRMATIONS = 3`, `STARTUP_GRACE = 3 s`, `CPU_SAMPLE_WINDOW = 2 s`, `RENOTIFY_INTERVAL = 5 min`, `EVAL_COALESCE = 250 ms`) and `pub fn debounce(prev_public: AgentState, candidate: &ArbitrationResult, pending: &mut PendingTransition, now: Instant) -> Option` implementing the policy table row by row: blocked/working/positively-matched-idle/exit immediate; inferred idle requires `IDLE_CONFIRMATIONS` consecutive candidates with no OSC progress active and CPU-quiet; startup grace mutes screen-derived candidates entirely. +- Watchdog inside the tick, after arbitration: if `raw == Working` and `now - last_output > WATCHDOG_QUIET` and `cpu_jiffies_delta == 0` and `child_process_count == 0`, override the candidate to `Unknown` with an `EvidenceNote::WatchdogDemoted`, and emit a rate-limited stuck diagnostic event. +- Tests: every table row; `watchdog_demotes_quiet_working_authority`; `watchdog_does_not_fire_during_network_wait_with_recent_output`; `inferred_idle_needs_three_confirmations`; `visible_idle_publishes_immediately`; `startup_grace_mutes_screen`. -| Built-in runtime | Primary source | Fallback source | Implementation notes | -|---|---|---|---| -| Claude Code | Container-local Claude Code hooks installed into the container-local Claude home when the installed runtime supports the needed events. | Foreground process plus visible prompt/approval/working chrome. | Map permission/auth/model/user-question prompts to `blocked`; map stop/idle prompt to raw `idle`; keep subagent events scoped so a child stop does not mark the parent done early. Borrow agent-deck's drift-repair idea: hook config should be validated and repaired on every launch, but only inside the container. Current launch permissions bypass many tool approvals, so blocked detection must still cover questions and account/runtime prompts. | -| Codex | Structured Codex status surfaces such as app-server thread status or runtime hooks when present in the installed CLI and tied to the same interactive session. | Foreground process plus visible confirmation/plan/question/working chrome. | Use waiting-on-approval and waiting-on-user-input flags as `blocked`; keep active turn as `working` even if idle status races during turn completion; avoid false positives from installer/update output. If app-server cannot observe the visible TUI session, do not run it as a separate truth source; use the jackin' reporter wrapper plus visible/process evidence until a same-session bridge is verified. Current launch bypasses approvals/sandbox, so blocked mostly means plan/user/auth/runtime prompts. | -| Amp | Runtime hook/plugin API if a stable one exists in the installed runtime. | Foreground process plus approval/waiting/interrupt chrome. | Current launch uses dangerous allow-all, so most tool approval prompts are bypassed. Treat auth prompts, operator questions, and runtime modal prompts as blockers; otherwise prefer `unknown` over overfitting Amp text. | -| Kimi | Runtime hook/plugin API if a stable one exists in the installed runtime. | Foreground process plus approval/request prompts, thinking indicators, and visible idle prompt. | Current launch uses yolo mode; start with narrow visible heuristics from Herdr-style patterns and add structured integration only after confirming the installed Kimi CLI exposes a stable surface. | -| OpenCode | OpenCode HTTP/SSE/session/question APIs or plugin integration when available inside the same container and scoped to the same session. | Foreground process plus permission/question/working chrome. | Pending OpenCode questions map to `blocked`; active session or descendant session maps to `working`; idle only becomes public `done` or `idle` through the seen overlay. Prefer an in-container bridge that reports to Capsule. Do not write a generated plugin into a bind-mounted project `.opencode/plugins/` directory by default; Codeman's OpenCode research correctly flags plugin/API drift and shared-state risks that must be verified first. Existing permission-allow config reduces tool approvals but not user questions or auth/runtime prompts. | -| Custom role agents | Role-authored reporter over the Capsule socket, or a future role manifest status contract. | Foreground process identity and weak activity detection. | Do not grow unrestricted regex sprawl in core. A future role-author contract can declare process labels and a small number of named visible states, but unknown should remain the default for unsupported runtimes. | +### Slice 4 — Semantic channel on, safely -### Stuck Detection +- Spawn env: in the `session.rs` spawn path (the function that builds the per-pane `CommandBuilder`), add `cmd.env("JACKIN_SESSION_ID", id.to_string())`, `JACKIN_STATUS_SOURCE = format!("hook-{agent}-{id}")`, `JACKIN_AGENT_RUNTIME = agent slug`, `JACKIN_STATUS_SOCKET = "/jackin/run/jackin.sock"`. Shell panes get only the socket var. +- New subcommand in : `jackin-capsule report-event --event [--payload-stdin]` — reads env for session/source/runtime, reads optional JSON payload from stdin, sends one framed control message `ReportRuntimeEvent { session_id, source_id, runtime, event, payload }` over the socket, exits 0 even on failure (reporters must never break agent hooks). Extend `ClientMsg` in accordingly; keep `ReportAgentState` for role-authored reporters but mark it lower-trust in gating. +- New file `crates/jackin-capsule/src/agent_status/gating.rs`: `pub fn map_event(event: &RuntimeEvent, state: &mut SourceGateState) -> GateEffect` implementing the gating table verbatim (prompt-submitted/tool-start/tool-end → working; permission-requested/question-asked → blocked + `pending_permission = true`; permission-resolved → clears it + working; turn-complete → idle **only if** `!pending_permission && subagents_active == 0`, else working with a `GateNote::StopSuppressed`; subagent-start/stop → counter only; session-end → clear authority; heartbeat → freshness only). The daemon assigns `seq` by arrival order per source; reporter timestamps go into evidence notes only. +- Rewrite the Claude hook script at as a dumb pipe: read stdin, extract `hook_event_name` (and notification type for `Notification`), exec `jackin-capsule report-event --event "$EVENT" --payload-stdin`. Delete the Stop-blocking branch, the python framing, the `nc` dependency. Map vendor event names to canonical ones in `gating.rs` (one match arm per runtime), not in shell. +- Installer (): add `Notification` to the registered events; change `merge_hook_entries` to append-or-repair its own command entry inside each event's array without touching other entries; delete registrations for unverified events (`TaskCreated`, `TaskCompleted`) until a live payload confirms them. +- Delete the old OpenCode ACP bridge and the Kimi placeholder hook; Slice 9 names the runtime-specific replacements. Launch nothing at entrypoint scope that needs a session id. +- Tests: every gate row including `stop_with_pending_permission_stays_blocked`, `stop_with_live_subagents_stays_working`, `permission_resolved_unblocks`; arrival-order sequencing under interleaved sources; an integration test that spawns a session and asserts the four env vars are present in `/proc//environ`. -`stuck` should be a diagnostic overlay driven by expectations, not a normal lifecycle state. Examples: +### Slice 5 — Process physics -- A prompt/task was submitted, the foreground agent process is still alive, and no reliable `working`, `blocked`, or `idle` transition arrived within the runtime's timeout. -- A hook source reports `working` for too long while the visible screen and process state disagree. -- A process remains foreground but stops responding to cursor/screen probes. +- Wire into the tick: known child PID from spawn is the anchor; sample foreground pgid (`tcgetpgrp` equivalent via `/proc//stat` tpgid), child-process count (scan `/proc` for ppid chains under the agent PID, cached per tick), and CPU-jiffy delta (utime+stime difference across `CPU_SAMPLE_WINDOW`). Populate `ProcessEvidence`. +- Exit ordering: the existing reaper path publishes the exit transition (raw idle, evidence `process_exited`) **before** session teardown clears identity and authority; foreground-returned-to-shell (root process group owns the foreground again while the child is alive, after prior agent identity observation and with no descendant work) publishes idle with a `foreground_returned_to_shell` note, then clears agent identity/authority and seeds shell-pane evidence. +- Tests: pure process fixtures at the `/proc` boundary (direct binary, `node`-wrapped, dead pid, shell foreground ownership, descendant tree counting); `exit_transition_precedes_identity_clear`; CPU-delta sampling-window math. -Stuck events should be rate-limited, include the evidence trail, and surface as "inspect this agent" rather than "agent needs approval." This prevents false positives from training operators to ignore attention prompts. +### Slice 6 — OSC evidence channel -### Telemetry +- At the parser boundary where the capsule already processes per-session escape sequences, retain `OscEvidence` per session: title changes (OSC 0/2), OSC 9-family notification arrivals, OSC 9;4 set/clear with payload state, BEL. Cap stored payload length (256 bytes) — OSC content is untrusted model output. Accept only while `foreground_is_agent`; wipe on identity change and on agent restart. +- Wire the existing `scan_osc133` into the shell-pane path: `PromptEnd → idle`, `PreExec → working` (these enter arbitration as screen-strength evidence for shell panes only). +- Tests: synthetic byte streams set/clear each field; payload cap; `osc_ignored_when_foreground_not_agent`; shell pane goes working on pre-exec and idle on prompt-end. -Status authority needs durable debug evidence. Compact `clog!` lines should record lifecycle transitions, authority changes, process-exit clearing, and externally visible notifications. Verbose `cdebug!` lines should record per-tick evidence, foreground process-group reads, visible detector booleans, hook/API report acceptance/rejection, sequence validation, raw/effective state calculations, and roll-up results. When a rich TUI owns the terminal, debug output must go only to the diagnostics run file, following the existing debug-output rule. +### Slice 7 — Rule-pack engine -## Control Protocol +- New file `crates/jackin-capsule/src/agent_status/rules.rs`: TOML pack loader (schema from the Screen-rules section: `schema_version`, `agent`, `validated_versions`, `[[rule]]` with `id`, `state` (`working|blocked|idle|freeze`), `priority`, `region`, `strength`, `requires_all`, `requires_any`, `forbids`, optional `regex`), region extraction (`bottom:N`, `bottom_nonempty:N`, `prompt_box_body`, `above_prompt_box`, `after_last_rule`, `last_nonempty_line`, `osc_title`, `osc_progress`), highest-priority-wins evaluation, `unknown` on no match, and safety caps (≤128 rules, ≤32 matchers/rule, ≤512 chars/matcher). +- Packs live at `docker/runtime/agent-status/packs/.toml`, installed under `/jackin/runtime/agent-status/packs/`, and loaded at daemon start with operator overrides taking highest precedence. Encode the corrected Claude rules (no transcript-bullet spinners, spinner at line start + interrupt hint, no horizontal-rule prompt fallback, blocked anchored to dialog-affordance co-presence), and translate the existing Codex/Amp/Kimi/OpenCode detector patterns into packs. +- Delete the five `detectors/*.rs` modules and the `Detector` trait once packs reproduce their true positives. +- Fixture harness (new test file): walk `agent_status/screen/fixtures//*.txt`; expectation encoded by filename prefix (`working*`, `blocked*`, `idle*` → that state; `false_positive*` → first line of the fixture names the wrong state it must NOT produce, asserted as "anything but"); the three Claude false-positive fixtures must pass. `jackin-capsule status capture ` writes a new fixture from the live grid + evidence bundle. +- Co-versioning: pack `validated_versions` checked against the pinned agent CLI version during image build (build-jackin-capsule / construct pipeline emits an error listing pack vs pinned version on mismatch). +- Tests: per-region extraction; priority ordering; unknown fallback; caps enforced; the harness itself. -The control plane should grow from one-shot `status`/`snapshot` toward state events: +### Slice 8 — Contract completion -| Surface | Purpose | -|---|---| -| `session.report_agent_state` | Runtime hook/plugin/API bridge reports raw state and optional metadata for one session. | -| `session.heartbeat_agent_authority` | Runtime reporter confirms that its source is still alive without changing raw state. | -| `session.clear_agent_authority` | Runtime hook/plugin/API bridge releases its authority when it exits or knows it is stale. | -| `session.report_child_agent_state` | Runtime bridge reports descendant/subagent lifecycle so parent panes do not become done while children are still working. | -| `events.subscribe` | Host/daemon/console subscribe to session lifecycle and effective state transitions. | -| `wait session-status` | CLI scripts block until a session reaches `blocked`, `done`, `idle`, or `exited` without polling. | -| `session.read_visible` | Debugging path to read visible/recent pane text when state looks wrong. | +- Seen/ack: call the acknowledgment path when a pane gains focus and when an attach client views it (daemon focus-change and attach handlers); `done → idle` on focus, re-work resets seen (already in the work-cycle rule). End-to-end test through the daemon, not only the unit machine. +- Subagent counter: consumed from gating (slice 4) and from `ReportChildAgentState` for bridge reporters; expose `subagents_active` in evidence and events. +- `jackin-capsule status explain `: print the current snapshot, every rule evaluated with a region preview (240 chars), authority state/grade/freshness, winner step, and active debounce holds; compact/debug logs carry transition and stuck breadcrumbs, while `status capture` writes the full evidence bundle. +- Telemetry counters: transitions per session-minute and watchdog demotions, emitted through the existing `clog!`/diagnostics path. -The event stream should emit both raw/effective state and evidence metadata: detected agent, source, confidence, visible blocker/idle/working booleans, process-exited flag, foreground process group, sequence, revision, and seen revision. Consumers should subscribe; they should not poll rendered terminal previews. +### Slice 9 — Runtime adapters -## Phases +- Codex: installer writes container-local `~/.codex/hooks.json` (events from the adapter table) and enables the TUI notification OSC method in container config; title-spinner mapping in `OscEvidence`; dumb-pipe hook script. +- OpenCode: container-local plugin (config home, never project tree) forwarding the five event kinds to `jackin-capsule report-event`; defensive handling for the known stuck-status/dropped-promise upstream quirks (treat missing idle events as TTL expiry, never as proof of work). +- Amp: container-local plugin on the four lifecycle events. +- Kimi: rule pack is primary; ensure no hook assets remain; pack fixtures for moon-phase/braille thinking, approval prompt, idle box. +- Per-runtime gating tests with recorded event sequences (prompt → tools → permission → resolve → stop) asserting the public transition sequence, including the interrupt hole: working authority + Esc (no event) + quiet physics → watchdog → unknown, then prompt box → idle. +- UI polish landed with `stuck` as a diagnostics overlay, token usage on session snapshots/events, capsule status-bar glyphs, and console session rows that display the shared public state vocabulary. -### Phase 0 — Stop Lying With Silence +## Acceptance Criteria -Remove or demote the current "silent for N seconds means blocked" behavior. Until the real authority exists, a quiet live session should become `unknown` or weak `working`, not `blocked`. This is a product-quality fix because false blocked indicators are worse than no blocked indicator. +- No code path translates PTY output bytes or operator keystrokes into a state value (reviewable invariant; soak test enforces it). +- Reporters forward runtime events; all mapping and gating lives in daemon Rust with a unit test per gate row. +- The fixture harness executes: every built-in runtime has passing working / blocked / idle / false-positive fixtures, including the three named Claude false-positive classes. +- Every `AgentStateChanged` carries evidence computed from inputs, never from the output state. +- Authority demonstrably expires: TTL lapse, process exit, identity contradiction, and watchdog demotion are each tested and leave evidence notes. +- Focus/attach acknowledgment works end-to-end through the daemon: raw idle + unseen → `done`, focus → `idle`, re-work → `done` again. +- Rule packs declare validated agent-version ranges and the image build enforces the pairing. +- A quiet live session never becomes `blocked` from silence, and an unmatched screen never becomes `idle` — both fall to `unknown`. +- Wait semantics hold when the session is already in the requested state; roll-up priority is covered by tests. +- Flap-rate telemetry exists; the blocked-dialog soak test shows one initial transition into `blocked` and zero redraw-driven flaps after that. -### Phase 1 — State Model And Event Stream +## Do Not Do -Add the internal raw state, derived public status, `seen` flag, attention-priority roll-up, `Unknown` protocol state, and `agent-state-changed` event stream to `jackin-capsule` and `jackin-protocol`. Console and host status calls should read the same effective state. +- Do not copy Herdr, claude-squad, cmux, or coder/mux source or wire formats; concepts only, implemented from scratch, source never open side-by-side. +- Do not let PTY output activity or operator keystrokes author state — not even temporarily. +- Do not let reporters change agent behavior (no stop-blocking, no decision injection). +- Do not map Claude Code's idle-timer notification to blocked; do not treat OSC 9;4 progress-active as working-proof; do not use cursor-probe timeouts as a blocked detector (event-loop TUIs). +- Do not fall back to `idle` when nothing matches — fall back to `unknown`. Do not treat silence as `blocked`. +- Do not silently edit host agent homes, shell rc files, terminal settings, git config, or user repositories; do not create jackin-owned container paths outside `/jackin/`; do not write status hooks/plugins into bind-mounted project directories. +- Do not let every UI surface parse terminal text independently; do not add broad regexes over historical output — rules are narrow, region-anchored, fixture-backed data. +- Do not let cooperative state files or hooks prove health without process/session validation, freshness, and the physical watchdog. -### Phase 2 — Container Reporter And Process Identity +## Future Decisions -Install the status reporter under `/jackin/runtime/agent-status/`, add the `report_agent_state`, `heartbeat_agent_authority`, and `clear_agent_authority` control methods, launch built-in runtimes with stable `JACKIN_*` environment, and report wrapper-level process start/exit. Implement foreground process-group detection inside the container so every report can be validated against the live process tree. Add tests with fake reporter messages and fake `/proc` fixtures. +- **Role-authored rule packs and named states** once the role manifest contract exists (nearly free after slice 7). +- **Adaptive watchdog timeouts** (bounded multiple of per-runtime median cycle time) once telemetry exists. +- **Structured headless bridges** — `codex exec` JSONL, OpenCode SSE, Claude supervisor state — for jackin-managed queue/headless modes; higher-grade authorities where they observe the same session. +- **LLM-tier status descriptions** as a rate-limited display layer over the badge; never the badge. +- **Amp/Kimi deeper integrations** as upstream surfaces stabilize. -### Phase 3 — Semantic Runtime Reports +## Appendix A — Historical Validation Audit (June 10, 2026) -Install container-local hook/plugin/API reporters for Claude Code, Codex, and OpenCode, then extend to Amp and Kimi as each runtime's hook/plugin surface allows. Reports use sequence numbers and source IDs, and the state machine arbitrates against process/screen evidence. Claude hook drift repair and Codex/OpenCode API bridge compatibility checks belong here. +This audit is the pre-implementation defect record that drove the branch. Keep it as regression context, not as the current gap list: the June 11, 2026 implementation replaced the byte-driven state machine with evidence arbitration, live reporter events, `/proc`/OSC corroboration, rule packs, fixture coverage, and `status explain` diagnostics. -### Phase 4 — Screen Fallback And Readiness Diagnostics +1. **PTY byte → working.** The old `Session` PTY output handler and `mark_operator_input` both called `apply_raw_state(WorkingVisible)`. Consequences observed: a Blocked dialog repaint flipped Blocked→Working and the next 1 Hz detector tick flipped it back (one event + redraw per flip, every second); an idle Claude pane repaint flipped Idle→Working, the next tick's prompt-box match produced raw idle, and because entering a work cycle reset `seen`, the public state landed on a false `Done` — repeating forever; arrow keys inside a blocked dialog flipped Blocked→Working and re-notified on re-entry; shell panes (no detector registered) became permanently `Working` after any output. +2. **Arbitration dead.** The old arbitration module's own header said it was "currently used in tests only". The live mutation path was `SessionStatus::advance`, last-signal-wins. The unused function also carried an inverted grace bug: its visible-idle staleness check measured observation age, so an *older* (less trustworthy) screen observation was what overrode a working authority. +3. **Hook channel dead end-to-end.** No Rust code set `JACKIN_SESSION_ID` (zero occurrences in the workspace); every shipped reporter script began with an exit-if-unset guard. The entrypoint exported `JACKIN_STATUS_SOURCE` with a session-id default of `0` at container scope. `heartbeat.sh` shipped and was never launched. Authority `last_seen` was refreshed but no staleness sweep existed, and because arbitration was unwired, stored authority affected only the broadcast's `source` string. +4. **ACP bridge triple-fault.** Launched at entrypoint scope where the session id is unset (guard exits); even if running, it spawns a second `opencode acp` instance — a different session than the pane's TUI, which this page forbids as a truth source; and its `opencode acp | python3 -u - <<'PYEOF'` pipeline lets the heredoc replace python's stdin, so the JSON-RPC stream is never read. +5. **Stop-hook behavior mutation.** The Claude hook returns `{"decision":"block"}` when background tasks are reported running, forcing Claude to continue — agent control-flow mutation inside an observability feature. Also, all report invocations in one hook run share one timestamp-derived sequence value, and timestamp-as-sequence across concurrent hook processes gives no causal order (an async PostToolUse stamped after a sync Stop overwrites idle → stuck Working at turn end). +6. **Installer gaps.** Registered events omitted `Notification` (the channel carrying permission-prompt and elicitation types, which the hook script handles); `TaskCreated`/`TaskCompleted` registrations were unverified against vendor docs; `merge_hook_entries` replaced each event's whole array, destroying role-authored entries; the reporter transport was sh + cat + two python spawns + `nc -U` per event — a fork-heavy latency tax on Claude's synchronous hook path with a BusyBox-vs-OpenBSD `nc` portability hazard. +7. **Detector false-positive generators (Claude).** The spinner glyph list includes transcript bullets (`⏺`, `·`, `•`, `●`, `▸`, `∙`, `○`), and the rule is glyph + later "ing" + ellipsis anywhere in the line — a historical transcript line such as a tool-call bullet with a gerund matches `WorkingVisible` while the agent idles at its prompt. `is_blocked` matches the joined bottom-24-row window, so an approval question that scrolled into transcript keeps firing `BlockedVisible` while the agent works. The prompt-box fallback accepts any row with more than 20 horizontal-rule characters — Claude prints such rules in normal output, producing false `PromptVisible` whenever working chrome is momentarily absent at the sample instant. No debounce exists anywhere, so each single-frame mis-sample publishes. +8. **Fixtures unloaded.** No harness loads `agent_status/screen/fixtures/`; the Claude `false_positive_old_spinner` fixture (stale spinner line above a live prompt box) classifies as Working under the current detector — proof the fixtures were never executed. +9. **Evidence fabricated.** `broadcast_agent_state_changed` sets `visible_blocker := state == Blocked`, `visible_idle := state == Idle`, `visible_working := state == Working`, `process_exited := state == Idle`, `confidence: None`, `foreground_pgid: None`, `last_seen_revision := current revision` — every diagnostic field derived from the output, none from inputs. +10. **Dead modules.** `/proc` identity (`read_tpgid`, `identify_agent`, `detect_foreground_agent` — implemented and unit-tested), `scan_osc133`, cursor-probe raw states, and `seen::acknowledge_session`/`mark_pane_focused` had zero live callers; `ReportChildAgentState` was an empty match arm; one unit test asserted the subagent guard "by simulating the daemon guard by not calling advance" — testing a guard that did not exist. +11. **Misc.** The legacy `Session.state` field initializes to `Working` while `SessionStatus` starts `Unknown`; the capsule and protocol crates each define a different `AgentRawState` (name collision, wrong-import hazard); `visible_lines()` dumps the entire grid per session per tick where the detection window suffices. -Implement a small set of runtime-specific visible-screen detectors for the built-in runtimes, starting with Claude Code, Codex, and OpenCode because their approval/question/idle surfaces drive the highest operator wait cost and have the strongest prior art. Add cursor-position and shell-integration probes for input readiness and stuck diagnostics. Use hard timeouts and separate tasks so a broken probe cannot block the multiplexer. +## Appendix B — Herdr Reference Notes (concepts and observed facts; AGPL — no code) -### Phase 5 — Host Consumers +Source review of head `1fc5e84`, June 10, 2026. Facts recorded for design comparison; all jackin' implementation must be original. -Update `jackin console`, the daemon, Desktop Agent Hub, attention prompts, autonomous queue, resource panel, and token/cost telemetry to consume the event stream. These consumers should not reimplement state detection. +- **Signal classes**: foreground-process identity (tpgid from `/proc//stat`, process-group scan, wrapper unwrapping for node/bun/python/`sh -c`, symlink canonicalization); bottom-of-buffer screen text (last terminal-height rows — operator scrollback never affects detection); passively captured OSC 0/2 title and OSC 9-family progress (payloads capped at 256 chars, wiped on agent change); Unix-socket lifecycle reports with per-source monotonic sequence. Deliberately absent: CPU sampling, `/proc` state chars, syscall tracing, timers-on-output. +- **Authority model**: agents with complete lifecycle-hook coverage get exclusive authority — screen detection is short-circuited entirely while live. Claude Code and Codex hooks are deliberately *not* state authorities there (they miss permission results and Esc interrupts); for those, screen/OSC manifests decide. A live visible blocker overrides a non-blocked hook only when the screen observation is not older than the hook report. +- **Observed runtime policy facts**: detection tick ~300 ms with an identified agent, ~500 ms without; working→plain-idle requires 3 consecutive idle reads on a ~100 ms recheck capped at ~700 ms, while positively matched visible idle bypasses the hold and blocked is never held; a persisting visible blocker is re-published roughly every 800 ms to keep downstream freshness; startup grace ~3 s with OSC state wiped so a new process cannot inherit the previous title; identity cleared only after 6 consecutive probe misses, except foreground-return-to-shell which publishes an exit transition first; released sources are suppressed from re-acquisition for ~1 s; remote manifest updates poll every 30 min with size caps, dotted-version ordering, engine-version gates, and full validation before acceptance; local override beats remote beats bundled. +- **Manifest engine shape**: priority-ordered rules, each naming a target state, a structural region (whole-recent, bottom-N, prompt-box body, above-prompt-box, after-last-prompt-marker, after-last-horizontal-rule, plus osc-title/osc-progress virtual regions), strong-evidence flags, substring/regex matchers with nested all/any/not gates, and overlay rules that freeze state; unmatched screens fall back to idle (jackin' deliberately diverges: `unknown`). Complexity caps protect the engine from pathological rule data. +- **Era timeline**: hand-coded per-agent matchers (every TUI wording change = code change + release) → PTY-activity-first with keystroke-taint causality, working leases (~1.8 s), and idle/working confirmation debounces (June 8–9) → all deleted June 10 with changelog rationale that output activity no longer publishes working, vetoes blockers, or decides idle, because input echo, resizes, redraw nudges, and agent-spawned background processes defeat byte-level causality → manifest engine + OSC evidence, with a final fix dropping a Codex background-terminal rule because long-running helper terminals kept panes falsely working. +- **Diagnostics**: an explain command dumps final state, manifest provenance, every evaluated rule with matcher/gate counts and a region preview, and the fallback reason; their contributor loop is reproduce → explain → edit rule → hot reload → verify. -## Acceptance Criteria +## Appendix C — Per-Runtime Signal Surface Reference (verified June 10, 2026) -This roadmap item is implementation-ready when an agent can build from these requirements without needing to re-research the product shape. The first PR that claims a meaningful implementation slice should satisfy these criteria: - -- A quiet live session no longer becomes `blocked` solely because no bytes arrived for a few seconds. -- `AgentState` or its replacement has an explicit `Unknown` state in the protocol, and every current status/snapshot path can carry it. -- `done` is derived from raw idle plus seen/unseen revision state; no reporter or detector can directly report public `done`. -- `jackin-capsule` has one status authority path that owns raw state, effective status, evidence, revision, and roll-up. Console, daemon, host commands, and future desktop surfaces consume that path instead of parsing terminal text. -- The container image/setup path installs the status reporter under `/jackin/runtime/agent-status/`, launches built-in runtimes with stable `JACKIN_*` status environment, and has tests proving reporter events flow through the Capsule socket. -- Reported status authority is accepted only with valid session id, source id, monotonic sequence, freshness, and process/runtime consistency; stale, conflicting, or exited reports are rejected or cleared with diagnostic evidence. -- Built-in runtime detection starts with Claude Code, Codex, and OpenCode, with Amp and Kimi represented as supported runtimes that produce `unknown` or weak fallback state until their stable integration surfaces are verified. -- Every built-in visible detector has sanitized fixture coverage for at least one working, blocked, idle, and false-positive case before it can drive notifications. -- Runtime hooks/API reporters are installed only inside the container or derived image state under `/jackin/` and container-local agent homes. The implementation does not write host agent config, host shell files, host terminal config, host git config, or user repositories. -- Runtime hook/plugin setup does not write into bind-mounted project directories by default; any integration that requires project-tree files is explicit and surfaced to the operator. -- The status event stream includes enough evidence to debug a wrong state from diagnostics: detected agent, authority source, confidence, foreground process group, visible detector booleans, sequence/timestamp, and reason for stale/rejected reports. -- Workspace/tab roll-up uses `blocked > done > working > idle > unknown`, and this priority is covered by tests. -- Wait semantics work when the session is already in the requested state; callers must not miss an already-satisfied status. - -## Test Plan - -- Pure arbitration unit tests: sequence rejection, stale source clearing, process identity mismatch, visible blocker override, visible idle staling, hook blocked precedence, and `unknown` fallback. -- Derived public-status tests: raw idle plus unseen becomes `done`, focus/ack becomes `idle`, and roll-up priority is `blocked > done > working > idle > unknown`. -- Runtime setup tests: reporter assets land under `/jackin/runtime/agent-status/`, runtime launch env includes the `JACKIN_*` status variables, and hook/API bridge config is written only to container-local homes or `/jackin/` paths. -- Reporter protocol tests: accept valid reported state, reject stale sequence, reject wrong session/source/runtime, expire missing heartbeats, clear authority on process exit, and preserve evidence for diagnostics. -- Screen detector fixtures for Claude Code, Codex, Amp, Kimi, and OpenCode using captured sanitized visible screens, including false-positive fixtures such as installer/update output and completed historical prompts. -- Fake `/proc` process tree tests for direct agent processes, wrapped `node`/shell launches, child processes, dead process groups, and foreground shell handoff. -- Control protocol round-trip tests for `report_agent_state`, `clear_agent_authority`, event subscription, and wait semantics where the session is already in the target state. -- PTY/e2e tests that spawn fake agents, draw prompt/approval/working frames, exit/restart, and verify transition notifications without relying on real third-party CLIs. -- Debug telemetry tests or golden assertions that important rejection paths include enough evidence in diagnostics without printing into a rich TUI. - -## Scope - -In scope: - -- One canonical state authority inside `jackin-capsule`. -- Runtime-neutral public status vocabulary. -- Per-session raw state, public derived status, `seen`, evidence metadata, and revision. -- Container-local reporter assets under `/jackin/runtime/agent-status/` and reporter state under `/jackin/state/agent-status/`. -- Process detection through Linux `/proc`. -- Screen heuristics for built-in runtimes. -- Container-local hook/plugin/API reporters for supported runtimes where each runtime exposes a stable surface. -- Event subscription and wait semantics over the control socket. -- Tests for arbitration, sequence rejection, stale hook clearing, process exit, `done`/`idle` acknowledgement, roll-up priority, and false-positive suppression. - -Out of scope for the first implementation: - -- Host-side hook installation without an explicit operator action. -- API/network interception of LLM provider traffic. -- Runtime-specific tool-level status for every command/tool. -- Windows ConPTY support. -- Autonomous recovery actions after stuck detection. Phase 1 only reports and routes attention. -- Support for future agents such as Gemini beyond the custom-agent fallback contract. +- **Claude Code** — hooks: code.claude.com/docs/en/hooks; terminal config: terminal-config; agent view: agent-view; monitoring: monitoring-usage. Events incl. `SessionStart`, `UserPromptSubmit`, `PreToolUse`, `PermissionRequest`, `PermissionDenied`, `PostToolUse`, `PostToolUseFailure`, `Notification` (types `permission_prompt`, `idle_prompt` — fires ~60 s after idle, `elicitation_dialog`, `auth_success`), `SubagentStart`/`SubagentStop`, `Stop`, `StopFailure`, `SessionEnd`; hooks accept `async: true` and a per-hook timeout; all receive `session_id` on stdin. `AskUserQuestion` is a tool, so a `PreToolUse` matcher on it is the question-dialog signal. Known reliability issues: false/missing idle notifications and payload gaps (anthropics/claude-code issues 12048, 13024, 11964, 8320). Emits OSC 9-family notifications per `preferredNotifChannel` and ConEmu OSC 9;4 indeterminate progress; the progress bar animates during approval prompts (issue 12620), so active ≠ working. Does not emit OSC 133 (issue 32635 closed unimplemented). OTEL traces include a blocked-on-user span — audit-grade, too slow for badges. `claude agents --json` exposes native `working`/`blocked`/`done`/`failed`/`stopped` plus a waiting-for reason, supervisor-hosted sessions only. +- **Codex CLI** — hooks: developers.openai.com/codex/hooks; config: config-reference; non-interactive: noninteractive. Hooks shipped and on by default (`~/.codex/hooks.json` or config): `SessionStart`, `UserPromptSubmit`, `PreToolUse`, `PermissionRequest`, `PostToolUse`, `SubagentStart`/`SubagentStop`, `Stop`; only command handlers run; async entries are skipped. The separate `notify` program receives `agent-turn-complete` only (approval gap tracked in openai/codex issue 13478). `tui.notifications` filters `agent-turn-complete` and `approval-requested` with `notification_method = osc9` available; the terminal title defaults to spinner + project items, making it a busy/idle channel. `codex exec --json` emits thread/turn/item lifecycle JSONL. +- **OpenCode** — plugins: opencode.ai/docs/plugins. Events: `session.status` (busy/idle), `session.idle`, `session.error`, `permission.asked`/`permission.replied`, `message.updated`, `tool.execute.before/after`; the same stream is available over the server's SSE endpoint. Known quirks: status stuck after async prompts (anomalyco/opencode issue 12860); `session.idle` plugin handlers fire-and-forget (issue 16879). +- **Amp** — manual: ampcode.com/manual; plugin API: plugin-api. Plugin events `session.start`, `agent.start`, `tool.call`, `tool.result`, `agent.end`; built-in notifications on completion and on blocked-waiting; `AMP_FORCE_BEL` forces a BEL. +- **Kimi CLI** — wire mode: wire-mode docs (JSON-RPC stdio with turn/step/status/approval/subagent events — a server mode, not the TUI). No TUI hook surface; no OSC 9 yet (MoonshotAI/kimi-cli issue 1342). +- **Non-built-in runtimes** (reference for the future custom-role reporter contract): Gemini CLI hooks (geminicli.com/docs/hooks) include `BeforeAgent`/`AfterAgent` (turn boundaries), `BeforeTool`/`AfterTool`, and `Notification` with the documented `ToolPermission` type — hooks run synchronously in the agent loop; only `ToolPermission` is concretely documented, treat broader marketing claims as unverified. Aider has no hook system but `--notifications-command` fires when the LLM finished and waits for input (aider docs); as a plain readline CLI it genuinely blocks in tty read while waiting, so the wchan-class check in Appendix F is valid for it alone. Pi exposes extension lifecycle events (`agent_start`/`agent_end`, `turn_start`/`turn_end`, `tool_call`) via pi-mono's extension API. +- **Community corroboration** (secondary sources): hook-notification write-ups at alexop.dev, kane.mx, and martin.hjartmyr.se; Hacker News threads on cmux, Agent Notify, Ccnotifs, and Sculptor document the same false-positive complaints this design eliminates. -## Do Not Do +## Appendix D — Orchestrator Survey (what to borrow, what to avoid) -- Do not copy Herdr source or wire format. -- Do not silently edit host agent homes, shell rc files, terminal settings, git config, or user repositories. -- Do not create container paths owned by jackin' outside `/jackin/`. -- Do not silently write status hooks/plugins into bind-mounted project directories. -- Do not treat silence as `blocked`. -- Do not let every UI surface parse terminal text independently. -- Do not add broad regex libraries of historical output; visible-screen heuristics must be narrow and fixture-backed. -- Do not let cooperative state files or hooks prove health without process/session validation and stale handling. +| Tool | Mechanism | Lesson for jackin' | +|---|---|---| +| workmux (MIT) | Per-agent hooks assert Working/Waiting/Done; watchdog marks "working + pane output unchanged 10 s" as interrupted, auto-clears on resume | The hook+watchdog cross-check — adopted as the universal demotion rule | +| tmux-agent-status (no license) | Pure hooks; Stop→done gated on a pending-background-tasks claim | Gate turn-complete on pending permission + live subagents; verify the background-tasks payload field before trusting it | +| claude-squad (AGPL) | 500 ms tick, SHA-256 pane hash changed ⇒ Running; literal prompt strings ⇒ Ready | Anti-pattern: animation = work; UI copy change breaks Ready | +| VibeTunnel (MIT) | Output within 5 s ⇒ active; BEL + process-tree attribution; title filter that owns OSC 2 | Recency ≠ semantics; BEL attribution and title ownership are worth borrowing | +| cmux (dual proprietary) | Terminal itself interprets OSC 9/99/777 from panes; installs hooks for 13 agents | Closest cousin; its tmux-passthrough failure cannot happen when the capsule is the terminal | +| unitmux | Title markers + recent-output prompt parsing, EN+JA pattern packs | The maintenance burden of pattern packs without co-versioning | +| Claude agent view (native) | Supervisor owns processes; exact states; Haiku row summaries | Own the process ⇒ get the truth; LLM only for descriptions | +| Crystal / Conductor / omnara / happy | Drive `claude` via stream-JSON / SDK relays instead of PTY scraping | Right answer for headless modes; not applicable to operator-visible PTY panes | +| claude_code_agent_farm (MIT) | Heartbeat files (stale >2 min ⇒ stuck); adaptive idle timeout = 3× median cycle bounded 30 s–600 s | Adaptive watchdog bounds as future work | +| zellaude / claude-code-zellij-status | Hook → pipe → plugin state; ~16 states from four hook kinds | Hook-fed state files work; host-side install is their constraint, not ours | +| coder/mux (AGPL) | Sidebar status from a small model at intervals | LLM-as-classifier: descriptions only | +| tmux native | `monitor-activity` / `monitor-silence` / `monitor-bell` | Documented insufficiency (tmux issues 172, 2047): cannot distinguish waiting from finished from nothing | + +## Appendix E — Terminal-Protocol Signal Reliability + +| Signal | Agent emission | Notes | +|---|---|---| +| OSC 133 A/B/C/D prompt marks | No agent TUI emits them; shells do (fish 4+, nushell natively; bash/zsh via integration) | Excellent for shell panes (`C` running, `D;exit` done). jackin' synthesizes them in container shell rc. Spec: iTerm2 escape codes | +| OSC 9 / 99 / 777 notifications | Claude (per `preferredNotifChannel`), Codex (`osc9` method incl. approval-requested), Amp (BEL/SSH), Pi (extension) | Out-of-band, works from alt-screen; semantics = "done OR needs attention" — arbitration decides which. tmux swallows without passthrough — irrelevant when the capsule is the terminal | +| OSC 9;4 progress (ConEmu) | Claude emits indeterminate while working | States: 0 clear, 1 set, 2 error, 3 indeterminate, 4 paused. Clear edge = done-ish hint; active ≠ working (animates during approvals). Reference: rockorager.dev OSC 9;4, Ghostty ConEmu OSC | +| Title OSC 0/2 | Codex: documented spinner/status items, default on. Claude: title-setting mechanism unclear in docs — log OSC traffic from a live container pane before relying on it | Title is a shared mutable resource; attribute to foreground agent, wipe on change | +| BEL | Claude (`terminal_bell`), Amp (SSH), others | Coarse attention edge; attribute via process tree | +| DECSET toggles (1049 alt-screen, 2004 paste, 2026 sync, 1004 focus) | All TUIs at startup | Not state-correlated; alt-screen edges ≈ app start/stop only | -## Future Decisions +## Appendix F — `/proc` Signal Validity (Linux containers) -- **Role-authored visible detectors.** A future role manifest contract may let role authors declare small named visible states for custom agents. That should come after the built-in status authority exists. -- **Runtime-specific stuck policy.** Initial stuck timeouts can be conservative constants; later work can make them role-specific or task-class-specific. -- **Amp/Kimi semantic integrations.** Add structured reporters only after validating stable hook/plugin/API surfaces in the installed runtimes. -- **Health and keep-awake consumers.** Sleep inhibition, health scores, circuit breakers, and escalation policies should consume the authority after it ships. +- CPU-jiffy deltas (`utime`+`stime` from `/proc//stat` over a window): the robust "is it computing" primitive; threshold over a window, never a zero-test (idle TUIs run timers). +- Child-process tree under the agent PID: strong, cheap "tool executing" corroborator; absent during pure generation, so it never gates working on its own. +- Foreground pgid (tpgid): precise "agent or shell owns the pane"; says nothing about state within the agent. +- wchan-style "blocked in tty read = waiting for input": valid only for readline CLIs; every built-in jackin' runtime is an event-loop TUI sitting in `epoll`/`poll` in both states — this also rules out cursor-probe-timeout (CSI 6n) as a blocked detector. Background: josnyder.com on wchan. +- Provider TLS sockets: keep-alive persists across idle turns; presence is meaningless — skip. ## Related Files -- — current weak state timer; future PTY/process/screen signal source. -- — event loop, 1Hz state ticker, and session lifecycle. +- — state model; v2 replaces its signal machine with evidence arbitration. +- — becomes the only state path. +- — `/proc` identity plus CPU/children sampling. +- — container-local hook/plugin installers. +- — PTY/process source; loses its state-authoring call sites. +- — event loop, ticker, control handling, evidence recomputation. - — status/event protocol surface. -- — existing Capsule socket at `/jackin/run/jackin.sock`. -- — runtime-local setup path for container-owned hooks/plugins/API bridges. -- — built-in agent launch commands and autonomy flags. +- — control socket, event subscription, wait semantics. +- — container-local installation path. +- — launch scaffolding and status env. - — built-in runtime enum and slugs. -- — host launch path that must pass only explicit/container-local state setup. diff --git a/docs/content/docs/reference/roadmap/agent-workflow-orchestration.mdx b/docs/content/docs/reference/roadmap/agent-workflow-orchestration.mdx index 2ba90e74e..6e5242bee 100644 --- a/docs/content/docs/reference/roadmap/agent-workflow-orchestration.mdx +++ b/docs/content/docs/reference/roadmap/agent-workflow-orchestration.mdx @@ -149,7 +149,7 @@ The recommendation survives the second-pass research, but it should be narrowed: | Adopt a library runner | Avoids reimplementing branch/sandbox/hooks/logging. | Sandcastle is the clearest version of this approach. | Adds cross-language/runtime coupling and may import host hook semantics that conflict with jackin' rules. | | MCP-first control plane | Lets external orchestrators drive jackin' sessions without jackin' owning workflow semantics. | Good interoperability story and aligns with tool ecosystem direction. | MCP tools are not a durable workflow model; a raw tool surface can become unsafe remote control without policy and run state. | | GitHub-native workflow first | Puts state where review already happens. | GitHub checks/comments/issues are durable and familiar. | Hosted GitHub execution cannot replace local jackin' hardware, roles, credentials, or visible terminal hijack. | -| Optio-style daemon/queue first | Complete issue-to-merged-PR automation and feedback loops. | Best fit for overnight/task-queue ambition. | Too much blast radius before status authority, policy, cost controls, and operator gates exist. | +| Optio-style daemon/queue first | Complete issue-to-merged-PR automation and feedback loops. | Best fit for overnight/task-queue ambition. | Too much blast radius before durable policy, cost controls, event storage, and operator gates exist. | The best next step is therefore not "build the orchestrator" but "build the smallest state spine that every option needs." That means run records, event types, GitHub reporter, CLI/manual phase controls, and explicit intervention events. Capsule automation can follow after the state spine proves useful during manual runs. @@ -197,7 +197,7 @@ Other pieces should stay intentionally dumb until real runs prove the contract: ### Do not build yet -- Do not build an autonomous overnight queue before agent runtime status authority and durable event storage. +- Do not build an autonomous overnight queue before durable event storage, cost/resource controls, and explicit operator gates. - Do not build a declarative workflow DSL before hardcoded runs reveal stable step kinds. - Do not build a dashboard until GitHub/CLI state semantics are proven. - Do not build auto-merge or automatic branch cleanup for workflow runs. @@ -469,7 +469,7 @@ Expose jackin' as an execution substrate for external orchestrators through MCP - Should operator intervention automatically pause the workflow, or should it mark the phase dirty and continue after a verification step? - How should the workflow represent review quality: Claude-only, MCO/Hive-style consensus, GitHub Code Review, or a configurable reviewer chain? - How much of `/goal` should jackin' rely on for Codex, and how much run state should be owned by jackin' itself? -- What is the minimum reliable completion signal before [Agent runtime status authority](/reference/roadmap/agent-runtime-status/) ships? +- Now that [Agent runtime status authority](/reference/roadmap/agent-runtime-status/) has shipped, what additional verification or review signal should a workflow require before it advances from "agent is done" to "task is ready"? - What memory should be injected automatically for each profile, and how should the operator inspect or override the brief before launch? - Should workflow profiles be a distinct config surface, or should the first profile definitions live entirely in Rust until memory/run semantics settle? - Should declarative workflow files live in the jackin' repo, role repos, workspace config, or all three with precedence rules? diff --git a/docs/content/docs/reference/roadmap/console-agent-session-control.mdx b/docs/content/docs/reference/roadmap/console-agent-session-control.mdx index be0e07f80..a1b6f1a80 100644 --- a/docs/content/docs/reference/roadmap/console-agent-session-control.mdx +++ b/docs/content/docs/reference/roadmap/console-agent-session-control.mdx @@ -2,7 +2,7 @@ title: "Console Agent Session Control" --- -**Status**: Partially implemented — instance discovery, console workspace tree-view (expandable instance rows, session pane, `N`/`X`/`T`/`P` instance-row actions with the `P` purge gated behind a confirmation modal), `hardline --shell`, in-container multiplexer primary session, secondary agent sessions via `hardline --new`, and console `a`/`x` keybindings are shipped. Phase 4 (live session reconciliation, agent runtime status, resource panel integration) remains open. +**Status**: Partially implemented — instance discovery, console workspace tree-view (expandable instance rows, session pane, `N`/`X`/`T`/`P` instance-row actions with the `P` purge gated behind a confirmation modal), `hardline --shell`, in-container multiplexer primary session, secondary agent sessions via `hardline --new`, console `a`/`x` keybindings, and agent runtime status authority integration are shipped. Phase 4 still tracks live session reconciliation and resource panel integration. ## Problem @@ -34,6 +34,6 @@ The session substrate and CLI escape hatches are documented in [Container Superv ## Phase 4 — Rich operator coordination *(open)* -- Per-session idle/busy indicator in the console Instances panel (requires integration with the [Agent Runtime Status](/reference/roadmap/agent-runtime-status/) roadmap item). +- Per-session state indicators now consume the [Agent Runtime Status](/reference/roadmap/agent-runtime-status/) authority; remaining polish is tied to live reconciliation below. - Per-instance CPU/RAM usage in the console resource panel (requires integration with the [Console Resource Panel](/reference/roadmap/console-resource-panel/) roadmap item). - Session reconciliation: query the Capsule daemon on console refresh and update the `sessions` field in the instance manifest so the Instances panel reflects live session state. diff --git a/docs/content/docs/reference/roadmap/index.mdx b/docs/content/docs/reference/roadmap/index.mdx index 9bff5e392..e23c5666e 100644 --- a/docs/content/docs/reference/roadmap/index.mdx +++ b/docs/content/docs/reference/roadmap/index.mdx @@ -56,6 +56,8 @@ jackin' is a functional proof of concept, not a stable product line. **`Claude C - TUI architecture — Ratatui application patterns — all three TUI surfaces (`jackin-console`, `jackin-launch`, `jackin-capsule`) follow one Elm Architecture layout with typed effects, pure views, and shared components in `jackin-tui`. See [TUI Design Decisions](/reference/tui//) for the binding design rules and [Codebase Map](/reference/getting-oriented/codebase-map/) for the module layout. - **Codebase readability & restructuring foundation** — tiered Cargo workspace, dependency DAG with no upward edges, zero `mod.rs` files in extracted crates, and `clippy -D warnings` green across all targets. The remaining root `jackin` integration layer and console extraction ledger are documented in [Codebase map](/reference/getting-oriented/codebase-map/). - **Post-restructure correctness & polish fixes** — completed the branch-wide cleanup after the workspace split: shared TUI primitives, the owned `jackin-term` terminal model, the provider-catalog port onto the workspace crate structure, the `AgentRuntime` / `Provider` registry work, per-scope auth-sync source folders, structured diagnostics/metrics, zero-dead-code hygiene, and latest-stable Rust CI policy. Durable docs now live in [Capsule Terminal Model](/reference/capsule/terminal-model/), [Architecture](/reference/getting-oriented/architecture/#agentruntime-and-provider-registry), [Agent Authentication](/guides/authentication/agents/#choosing-a-sync-source-folder), [Run Diagnostics](/reference/runtime/diagnostics/), [TUI Design Decisions](/reference/tui/), and [Codebase Map](/reference/getting-oriented/codebase-map/). +- [Agent runtime status authority](/reference/roadmap/agent-runtime-status/) — PTY output no longer authors state; `unknown`/`done`/`seen`/revision derivation, semantic reporter events, evidence arbitration, `/proc` and OSC corroboration, TOML rule packs, runtime hook/plugin assets, Codex OSC notification config, heartbeat freshness, `events.subscribe`, `wait-session-status`, `SessionReadVisible`, `SessionStatusExplain`, `status explain`, `status capture`, explicit stuck diagnostics with watchdog authority invalidation, raw PTY transcript replay, blocked-dialog soak coverage, per-runtime event-sequence coverage, Capsule badge glyphs, dirty-session coalesced evaluation, blocker re-notification, flap/watchdog telemetry, image-build rule-pack version checks, and live in-container smoke coverage have shipped. The roadmap page is retained as a design record because open roadmap items still consume the status authority contract. + ## Partially implemented - [1Password integration](/reference/roadmap/onepassword-integration/) — env references and picker flows are documented in standard docs; roadmap tracks future read-only secret file mounts @@ -65,11 +67,10 @@ jackin' is a functional proof of concept, not a stable product line. **`Claude C - [Auth overwrite on new tab](/reference/roadmap/auth-overwrite-on-new-tab/) — root-cause fix shipped for the bug where opening a new agent tab overwrites in-container credentials with the stale launch-time snapshot; live cross-session sync remains deferred to the daemon program - [Workspace Claude token setup](/reference/roadmap/workspace-claude-token-setup/) — shipped token commands, the shared `op_picker` Create mode, and the plain-prompt `--interactive` storage-location picker are documented in standard docs; roadmap tracks the canonical auth slot, the console Auth-tab generate-token action, Apple Keychain backend, validity probe, and bulk migration - [Config versioning and migration framework](/reference/roadmap/config-versioning-migration/) — shipped per-file schema gates for config, workspace files, and role manifests, plus automatic config/workspace migration, desktop role manifest migration through `jackin role migrate`, and CI migration through `jackin-role migrate`; roadmap tracks deferred `--pr` automation and the Renovate-style auto-migration GitHub Action -- [Console agent session control](/reference/roadmap/console-agent-session-control/) — instance discovery, console workspace tree-view with expandable instance rows and session pane, `hardline --shell`, in-container multiplexer primary session, secondary agent sessions via `hardline --new`, and console `a`/`x`/`N`/`X`/`T`/`P` instance keybindings shipped; Phase 4 (live session reconciliation, agent runtime status, resource panel integration) remains open -- [jackin' Capsule control plane](/reference/roadmap/jackin-capsule/) — Phases 1–3 shipped: `jackin-capsule` Rust binary as PID 1 with zombie reaping; structured session inventory over Unix socket; in-container PTY multiplexer built on jackin-term `DamageGrid` with a Zellij-style dirty-row renderer, tmux-style prefix-key model (`Ctrl+B` opt-in, including prefix `Ctrl+L` clear-pane), persistent server that exits cleanly on last-session-end, binary tag+length attach framing, single-client takeover, mode-state restore on focus swap, OSC 52 / 9 / 2 / 8 passthrough, and top chrome with a brand pill, tab strip, and identity row. Phase 4 (host daemon integration, the [agent runtime status authority](/reference/roadmap/agent-runtime-status/), and Desktop Agent Hub bridge) remains open +- [Console agent session control](/reference/roadmap/console-agent-session-control/) — instance discovery, console workspace tree-view with expandable instance rows and session pane, `hardline --shell`, in-container multiplexer primary session, secondary agent sessions via `hardline --new`, agent runtime status authority integration, and console `a`/`x`/`N`/`X`/`T`/`P` instance keybindings shipped; Phase 4 still tracks live session reconciliation and resource panel integration +- [jackin' Capsule control plane](/reference/roadmap/jackin-capsule/) — Phases 1–3 shipped: `jackin-capsule` Rust binary as PID 1 with zombie reaping; structured session inventory over Unix socket; in-container PTY multiplexer built on jackin-term `DamageGrid` with a Zellij-style dirty-row renderer, tmux-style prefix-key model (`Ctrl+B` opt-in, including prefix `Ctrl+L` clear-pane), persistent server that exits cleanly on last-session-end, binary tag+length attach framing, single-client takeover, mode-state restore on focus swap, OSC 52 / 9 / 2 / 8 passthrough, top chrome with a brand pill, tab strip, identity row, and the [agent runtime status authority](/reference/roadmap/agent-runtime-status/). Remaining Phase 4 work is host daemon integration and the Desktop Agent Hub bridge - [Launch progress TUI](/reference/roadmap/launch-progress-tui/) — run IDs, JSONL diagnostics, instance-lifecycle boundary rain gating, structured launch stages, compact/rich/test renderers, console-triggered launch reuse, and reduced-motion support shipped; roadmap tracks diagnostics viewer, broader rich-render snapshots, and deeper startup parallelization - [Alternative LLM providers (MiniMax, Kimi, GLM everywhere)](/reference/roadmap/alternative-llm-providers/) — Phases 1–4 shipped: provider catalog (MiniMax, Kimi, Z.AI variants), Claude Code env-override routing for all three, Codex `config.toml` `[model_providers]` block for MiniMax, OpenCode provider-config JSON for all three, Auth-tab rows for MiniMax and Kimi Code, N-item provider picker; Codex GLM and Kimi cells remain deferred pending upstream Responses-API endpoints - - [Agent codenames for parallel session coordination](/reference/roadmap/agent-codenames/) — Phase 1 shipped: `JACKIN_AGENT_CODENAME` env var injected into every process in a tab (never reused within a container lifetime), `jackin-capsule agents` registry command with `--format json`, and tab hover tooltip; Phase 2 (operator docs) and Phase 3 (deferred integration) remain open - [Operator CLI hygiene](/reference/roadmap/operator-cli-hygiene/) — Phases 1–4 shipped: `jackin doctor` with 12 pre-flight checks, `jackin status`/`ps` three-level fleet overview (workspace → instance list → full detail with PR/CI/agents), `--format json` for workspace list/show and status/doctor, `jackin load --dry-run` plan preview, `JackinError` E001–E015 friendly error layer; Phase 5 (first-run wizard) deferred pending UX design - [Security threat model & signed releases](/reference/roadmap/security-threat-model-and-signed-releases/) — security implementation doc, CI signing (cosign + SLSA + SBOM) for all release/preview workflows, operator verification guide, and `sigstore-rs` in-process capsule manifest verification with embedded Rekor key shipped; CI signing activates on the first preview/release published after merge, and the in-process verification path awaits its first end-to-end run against a real signed bundle @@ -103,7 +104,6 @@ jackin' is a functional proof of concept, not a stable product line. **`Claude C - **[jackin' daemon — umbrella](/reference/roadmap/jackin-daemon/)** — introduce the long-running per-operator-user host process jackin' will use for reactive features. One umbrella item that decides lifecycle, install method, control socket, security posture, and log redaction once so each reactive feature plugs into one daemon shape. The full list of phase-2/phase-3 reactive adapters lives in the program doc and the sidebar under **Reference → Roadmap → Reactive daemon program** (status: open — design proposal) - **[jackin' Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/)** — native macOS menu bar and desktop companion for active jackin' workspaces, isolated agent sessions, PR jump links, and built-in-runtime account state (Claude, Codex, Amp, Kimi, OpenCode), with CodexBar and OpenUsage as scoped references for account quota surfaces. Keeps the agent TUIs as the primary agent UI while using the daemon as the shared state/event backend (status: open — design proposal) - **[Live bidirectional auth sync](/reference/roadmap/live-auth-sync/)** — Phase 2 adapter. Keep host and every running container in lock-step on each auth axis (`gh`, Claude, Codex, Amp, …). Subsumes the launch-time `sync` mode's bidirectional follow-up; reconsiders the `sync` name in the process (status: open — design proposal) -- **[Agent runtime status authority](/reference/roadmap/agent-runtime-status/)** — Phase 2 state source. Herdr-class in-container authority for `working`, `blocked`, derived `done`, `idle`, `unknown`, and stuck diagnostics, informed by multicode, CCManager, Agent Session Manager, WezTerm Agent Deck, ccmux, TUICommander, Codemux, and tmux-agent-status. Uses semantic runtime hooks/APIs, foreground-process ownership, visible-screen signals, shell markers, and cursor/readiness probes instead of treating PTY silence as attention-needed (status: open — design proposal) - **[Agent attention prompts](/reference/roadmap/agent-attention-prompts/)** — Phase 2 adapter. Host-side OS notifications when an agent inside a Capsule-managed role container is waiting on operator input or has finished unseen work. Consumes the agent runtime status authority, escalates from silent toast to sound after a configurable timeout, and focuses the right terminal tab where supported. Targets the biggest operator throughput drag: idle wall-clock waiting on agents that don't surface their waiting state (status: open — design proposal) - **[Host bridge — secrets and approved host actions](/reference/roadmap/host-bridge/)** — Phase 3 adapter. Operator-mediated channel for agents to request a single secret value or invoke a single host command without tearing down the container. Auto-registered MCP server, TouchID / polkit / password approval per request, per-workspace allowlist / blocklist / disabled policies, audit log. Closes the *"agent needs one thing from the host, mid-session"* gap that today forces full-restart workarounds (status: open — design proposal) - **[Container credential exposure — beyond env injection](/reference/roadmap/container-credential-exposure/)** — Phase 3 follow-on. Auth tokens and operator env values resolved from `op://` references become agent-readable when exported into the container today. Captures the trajectory from documented exposure to file-mount compatibility, command-scoped secret handles, Docker Sandboxes-style credential proxying, and daemon-mediated `secret.request` / `secret.run` grants where the agent gets a handle rather than the raw value (status: open — design proposal) diff --git a/docs/content/docs/reference/roadmap/jackin-capsule.mdx b/docs/content/docs/reference/roadmap/jackin-capsule.mdx index 1a95f5648..067fcc080 100644 --- a/docs/content/docs/reference/roadmap/jackin-capsule.mdx +++ b/docs/content/docs/reference/roadmap/jackin-capsule.mdx @@ -88,18 +88,19 @@ The original target API was: | `session.attach` | 3 | Return a PTY attachment handle so the client can connect | | `events` | 3 | Upgrade connection to a streaming event channel | -**Deferred event stream:** `session-started`, `session-ended`, `all-sessions-ended`, `agent-state-changed {session_id, state}`. The shipped Phase 3 control channel exposes one-shot `status` and `snapshot`; streaming state belongs to Phase 4 daemon integration. +**Event stream:** Phase 3 now exposes `EventsSubscribe` with `AgentStateChanged`, `SessionSpawned`, `SessionExited`, token usage, and workspace roll-up events. Phase 4 daemon integration consumes that stream across containers instead of inventing a second status channel. ### Agent state model -Each session tracks one of four states. These states are inferred from PTY output activity and foreground process state — no agent hooks or configuration required. +Each session tracks the public status vocabulary owned by the [agent runtime status authority](/reference/roadmap/agent-runtime-status/). Status is derived from semantic runtime reports, visible-screen rule packs, OSC evidence, process ownership, and watchdog checks; PTY output activity only schedules reevaluation and feeds stuck diagnostics. | State | Meaning | |---|---| -| `working` | Output flowing or foreground process actively running | -| `blocked` | Silent for N seconds with a foreground process present — waiting for operator input | +| `working` | Reliable evidence says the agent is actively generating, running tools, or processing | +| `blocked` | Reliable evidence says the agent is waiting on an operator decision or interactive input | | `done` | Work finished; the operator has not yet reviewed the output | -| `idle` | Reviewed or no work in progress | +| `idle` | Reviewed, no work in progress, or a shell pane | +| `unknown` | No reliable state evidence | The two-stage `done` / `idle` split is important: a `done` slot should not be automatically refilled by the autonomous task queue or cleaned up from the console until the operator has acknowledged the output. This distinction drives the "ready for review" indicator in the desktop app and the dispatch logic in future autonomous queue work. @@ -151,7 +152,7 @@ The [Herdr](https://github.com/ogulcancelik/herdr) study under "Prior art: Herdr - **Top-of-screen chrome.** The top chrome renders ` jackin' ` on the left followed by one tab per active jackin session. Active tab gets a distinct graphite background plus white underline/bold treatment; inactive tabs are dimmed; tab labels include the rolled-up state glyph. Operators sharing a screen recognise the brand pill at a glance without confusing it for the selected tab. - **Empty initial state when no agent is preselected.** When no initial agent argv is provided at daemon launch, the multiplexer can come up with the brand header, zero tabs, and a centred hint listing the agents from `/jackin/run/agent.toml` plus `Shell`. The operator picks one with the prefix key and the first tab spawns into that selection. When the host passes an initial agent argv, the daemon spawns the first tab with that agent automatically — matching the historical direct-into-agent UX. -- **Per-tab "most urgent" state roll-up.** A tab containing any `blocked` pane is `blocked`; otherwise any `done` pane makes it `done`; otherwise `working`; otherwise `idle`. Same urgency order herdr uses. This drives the tab-strip glyph and feeds the future `agent-state-changed` event stream. Once a pane reaches `blocked`, the attention glyph stays visible until explicit operator keyboard input reaches that pane; incidental PTY output is not enough to clear it. +- **Per-tab "most urgent" state roll-up.** A tab containing any `blocked` pane is `blocked`; otherwise any `done` pane makes it `done`; otherwise `working`; otherwise `idle`. Same urgency order Herdr uses. This drives the tab-strip glyph and feeds the status event stream. Once a pane reaches `blocked`, the attention glyph stays visible until explicit operator keyboard input reaches that pane; incidental PTY output is not enough to clear it. These are *interface* borrows. Herdr's internal layout (workspaces, sidebar, pane-focus suppression rules, ghostty embedding, kitty graphics passthrough, plugin hooks) is not part of jackin' scope and is not copied. @@ -452,7 +453,7 @@ The deeper architectural difference: Herdr wraps **bare host processes**. jackin **Blocking `wait` semantics on the socket.** Herdr's socket API lets callers block until a status transition (`herdr wait agent-status 1-1 --status done`). This is a better interface than polling for automation scripts and for the daemon's event subscription. The `events` stream in Phase 3 should support the same pattern: a subscriber blocks on the stream and receives the event when the transition occurs. -**Layered state authority.** Herdr detects agent state by combining foreground process state, visible terminal-screen signals, and semantic integration reports. `jackin-capsule` will use the same concept — with the advantage that it runs inside the container and reads the agent's real PTY and process tree directly rather than through a `docker attach` wrapper. Output activity remains a weak `working` signal, but silence alone must not mean `blocked`; the dedicated [agent runtime status authority](/reference/roadmap/agent-runtime-status/) owns the full arbitration model. +**Layered state authority.** Herdr detects agent state by combining foreground process state, visible terminal-screen signals, and semantic integration reports. `jackin-capsule` uses the same concept — with the advantage that it runs inside the container and reads the agent's real PTY and process tree directly rather than through a `docker attach` wrapper. PTY output recency feeds scheduling and stuck diagnostics but never authors `working`, `blocked`, or `idle`; the dedicated [agent runtime status authority](/reference/roadmap/agent-runtime-status/) owns the full arbitration model. ### What not to borrow from Herdr @@ -496,14 +497,14 @@ The first Phase 3 attempt landed PID 1 ownership, the PTY layer, a binary pane t Phase 3 also keeps the control-channel work the first attempt completed: -- Implement the [agent runtime status authority](/reference/roadmap/agent-runtime-status/): raw `working` / `blocked` / `idle` / `unknown`, derived `done`, foreground-process detection, semantic runtime reports, visible-screen signals, stale-report arbitration, and stuck diagnostics. Silence alone is not a blocker signal. +- Shipped the [agent runtime status authority](/reference/roadmap/agent-runtime-status/): raw `working` / `blocked` / `idle` / `unknown`, derived `done`, foreground-process detection, semantic runtime reports, visible-screen signals, stale-report arbitration, and stuck diagnostics. Silence alone is not a blocker signal. - Implement the two-stage `done` / `idle` split and the operator-acknowledgement mechanism. - Expand control-channel API: `session.create`, `session.kill`, `session.title`, `events`. -- Implement event stream: `session-started`, `session-ended`, `agent-state-changed`. +- Shipped event stream: `AgentStateChanged`, `SessionSpawned`, `SessionExited`, `TokenUsageChanged`, and `WorkspaceStatusChanged`. - Implement per-tab status roll-up (`blocked > done > working > idle > unknown`) on the host side, consuming events from the control channel. - Remove `tmux` from the derived image (). - Replace all `docker exec tmux …` call sites in the host CLI with control-channel calls. -- Update console session panel to consume `agent-state-changed` events rather than polling. +- Update console session indicators to consume status authority output rather than inventing state from host-side polling. - Update attention prompts to subscribe to `blocked` state events rather than doing PTY polling from the host. `session.attach` is intentionally *not* on the control channel — attach is the persistent binary attach channel defined in "Wire protocol". The first attempt accidentally collapsed the two into one socket; the rewrite separates them. @@ -519,7 +520,7 @@ Phase 3 also keeps the control-channel work the first attempt completed: - **[Agent Orchestrator Research Program](/reference/roadmap/agent-orchestrator-research/)** — Herdr is evaluated there as the strongest prior art for the multiplexer vision. The full comparative table (Herdr vs. jackin' values) lives in the research overview. - **[Console agent session control](/reference/roadmap/console-agent-session-control/)** — Phase 4 of that item unblocks once Phase 2 of this item ships: the binary exposes live session state, eliminating manifest-snapshot reconciliation. -- **[Agent runtime status authority](/reference/roadmap/agent-runtime-status/)** — the `agent-state-changed` event stream from Phase 3 is the delivery mechanism for blocked/working/done/idle/stuck indicators in the console and hardline. +- **[Agent runtime status authority](/reference/roadmap/agent-runtime-status/)** — the Phase 3 event stream is the delivery mechanism for blocked/working/done/idle/stuck indicators in the console and hardline. - **[Agent attention prompts](/reference/roadmap/agent-attention-prompts/)** — `blocked` events replace PTY polling from the host; `done` events trigger the "ready for review" notification path. - **[jackin' daemon](/reference/roadmap/jackin-daemon/)** — `jackin-capsule` is the per-container endpoint the daemon subscribes to. Phase 4 of this item and the daemon's container-watch phase are designed together. - **[jackin' Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/)** — Phase 3 of this item is the prerequisite: the desktop app's live session view is driven by `jackin-capsule` socket events aggregated by the daemon. diff --git a/docs/content/docs/reference/roadmap/orca-research.mdx b/docs/content/docs/reference/roadmap/orca-research.mdx index 8b053bf93..f479a99fd 100644 --- a/docs/content/docs/reference/roadmap/orca-research.mdx +++ b/docs/content/docs/reference/roadmap/orca-research.mdx @@ -98,7 +98,7 @@ The workflow of marking up diff lines with comments and sending the batch to the ### Worktree-session visual table -Orca's main view is a table of worktrees with live agent status. The jackin' console workspace tree is the analog. The gap is status richness: Orca shows active/interrupted/blocked; jackin' shows limited instance state. This is exactly the [agent runtime status](/reference/roadmap/agent-runtime-status/) gap. +Orca's main view is a table of worktrees with live agent status. The jackin' console workspace tree is the analog. The status-richness gap is now addressed by the shipped [agent runtime status](/reference/roadmap/agent-runtime-status/) authority; remaining Orca-inspired work is how the console and Desktop Agent Hub present those signals across a fleet. ### Named port panel per session diff --git a/docs/content/docs/reference/roadmap/terminal-observation-automation.mdx b/docs/content/docs/reference/roadmap/terminal-observation-automation.mdx index 6a2c5e5b7..a3f9bfd13 100644 --- a/docs/content/docs/reference/roadmap/terminal-observation-automation.mdx +++ b/docs/content/docs/reference/roadmap/terminal-observation-automation.mdx @@ -118,7 +118,7 @@ session.wait(session.id, status: "blocked|done", timeout: 30m) session.capture(session.id, out: "after-agent-response") ``` -`blocked` and waiting details should not be invented by this API. They come from the [agent runtime status authority](/reference/roadmap/agent-runtime-status/), which may use runtime hooks/APIs, foreground process evidence, visible-screen signals, and cursor/readiness probes. Terminal observation consumes that status and combines it with the visible screen and trace evidence so callers can both know **that** an agent is waiting and inspect **what** it is waiting on. +`blocked` and waiting details should not be invented by this API. They come from the [agent runtime status authority](/reference/roadmap/agent-runtime-status/), which uses runtime hooks/plugins, foreground process evidence, visible-screen rules, OSC evidence, shell markers, and process watchdog checks. Terminal observation consumes that status and combines it with the visible screen and trace evidence so callers can both know **that** an agent is waiting and inspect **what** it is waiting on. ### Screen text and frame snapshots @@ -218,7 +218,7 @@ This item does not replace the [agent runtime status authority](/reference/roadm | Runtime status authority | Terminal observation and automation | |---|---| | Decides whether a session is `working`, `blocked`, `done`, `idle`, `unknown`, or stuck. | Reads the visible screen, waits for visible/status conditions, injects input, captures artifacts, and records traces. | -| Uses runtime hooks/APIs, process ownership, screen evidence, shell markers, and cursor probes. | Exposes the live terminal and trace data to scripts, tests, workflow runners, and humans. | +| Uses runtime hooks/plugins, process ownership, screen evidence, OSC evidence, shell markers, and watchdog checks. | Exposes the live terminal and trace data to scripts, tests, workflow runners, and humans. | | Should be conservative and avoid scraping as truth when semantic signals exist. | May use visible text as an explicit caller-requested condition, with evidence and timeout semantics. | The workflow runner should prefer status/marker waits when it needs semantic lifecycle truth and visible-text waits when the operator or test explicitly cares that text is on screen.